├── matrix.h
├── inits.h
├── typedef_tree.h
├── col2im.h
├── typedefs.h
├── im2col.h
├── yolo_detector.h
├── maxpool_layer.h
├── tree.h
├── yolo_detector_test.cpp
├── list.h
├── softmax_layer.h
├── connected_layer.h
├── batchnorm_layer.h
├── col2im.cpp
├── README.md
├── data.cpp
├── option_list.h
├── region_layer.h
├── blas.h
├── box.h
├── utilities.h
├── convolutional_layer.h
├── list.cpp
├── gemm.h
├── parser.h
├── softmax_layer.cpp
├── im2col.cpp
├── data.h
├── network.h
├── network.cpp
├── batchnorm_layer.cpp
├── timer.h
├── maxpool_layer.cpp
├── image.h
├── sysarr.cpp
├── activations.cpp
├── box.cpp
├── inits.cpp
├── activations.h
├── tree.cpp
├── blas.cpp
├── option_list.cpp
├── layer.h
├── utilities.cpp
├── yolo_detector.cpp
├── connected_layer.cpp
├── convolutional_layer.cpp
├── gemm2.cpp
├── region_layer.cpp
├── image.cpp
├── parser.cpp
└── stb_image_write.h


/matrix.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Matrix header file
 3 | //========================================================================
 4 | // @brief: struct type definition
 5 | 
 6 | #ifndef SRC_MATRIX_H_
 7 | #define SRC_MATRIX_H_
 8 | 
 9 | // row, column, and data
10 | typedef struct matrix
11 | {
12 |     int row;
13 |     int cols;
14 |     float **vals;
15 | } matrix;
16 | 
17 | #endif /* SRC_MATRIX_H_ */
18 | 


--------------------------------------------------------------------------------
/inits.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Inits header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_INITS_H_
 7 | #define SRC_INITS_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | #include "layer.h"
13 | #include "network.h"
14 | 
15 | // layer init
16 | void init_layer(layer &l);
17 | 
18 | #endif /* SRC_INITS_H_ */
19 | 


--------------------------------------------------------------------------------
/typedef_tree.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Typedef_tree header file
 3 | //========================================================================
 4 | // @brief: struct type definition
 5 | 
 6 | #ifndef SRC_TYPEDEF_TREE_H_
 7 | #define SRC_TYPEDEF_TREE_H_
 8 | 
 9 | // tree structure
10 | typedef struct tree
11 | {
12 | 	int *leaf;
13 | 	int n;
14 | 	int *parent;
15 | 	int *child;
16 | 	int *group;
17 | 	char **name;
18 | 
19 | 	int groups;
20 | 	int *group_size;
21 | 	int *group_offset;
22 | } tree;
23 | 
24 | #endif /* SRC_TYPEDEF_TREE_H_ */
25 | 


--------------------------------------------------------------------------------
/col2im.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Col2im header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_COL2IM_H_
 7 | #define SRC_COL2IM_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | // column to image: filters%batch == 0
13 | void col2img(float *c_col,float *c, int m, int n, int count, int batch);
14 | // column to image: filters%batch != 0
15 | void col2img_extra(float *c_col,float *c, int m, int n, int count, int batch);
16 | 
17 | #endif /* SRC_COL2IM_H_ */
18 | 


--------------------------------------------------------------------------------
/typedefs.h:
--------------------------------------------------------------------------------
 1 | //===========================================================================
 2 | // typedefs.h
 3 | //===========================================================================
 4 | // @brief: define bitwise variables & macros
 5 | 
 6 | #ifndef LOADATA_H
 7 | #define LOADATA_H
 8 | 
 9 | #include <ap_int.h>
10 | #include <ap_fixed.h>
11 | 
12 | #define TOT_WIDTH_IN 32
13 | #define INT_WIDTH_IN 8
14 | #define TOT_WIDTH_OUT 64
15 | #define INT_WIDTH_OUT 16
16 | 
17 | typedef ap_fixed<TOT_WIDTH_IN, INT_WIDTH_IN> INPUT_32;
18 | typedef ap_fixed<TOT_WIDTH_OUT, INT_WIDTH_OUT> OUTPUT_64;
19 | 
20 | typedef ap_int<16> bit16;
21 | typedef ap_int<32> bit32;
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/im2col.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Im2col header file
 3 | //========================================================================
 4 | // @brief: function prototype & activate type definition
 5 | 
 6 | #ifndef SRC_IM2COL_H_
 7 | #define SRC_IM2COL_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | // image to column : filters%batch == 0
13 | void im2col(float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col);
14 | // image to column : filters%batch != 0
15 | void im2col_extra(float *data_im,int channels, int height, int width, int ksize,  int stride, int pad, float* data_col);
16 | 
17 | #endif /* SRC_IM2COL_H_ */
18 | 


--------------------------------------------------------------------------------
/yolo_detector.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // yolo_detector header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_YOLO_DETECTOR_H_
 7 | #define SRC_YOLO_DETECTOR_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | #include "timer.h"
13 | #include "network.h"
14 | #include "region_layer.h"
15 | #include "utilities.h"
16 | #include "parser.h"
17 | #include "box.h"
18 | #include "option_list.h"
19 | #include "data.h"
20 | #include "timer.h"
21 | 
22 | void detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh);
23 | 
24 | #endif /* SRC_YOLO_DETECTOR_H_ */
25 | 


--------------------------------------------------------------------------------
/maxpool_layer.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Maxpooling header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_MAXPOOL_LAYER_H_
 7 | #define SRC_MAXPOOL_LAYER_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | #include "image.h"
13 | #include "layer.h"
14 | #include "network.h"
15 | #include "inits.h"
16 | 
17 | // redefine layer
18 | typedef layer maxpool_layer;
19 | 
20 | // make maxpooling layer
21 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
22 | // maxpooling top function
23 | void forward_maxpool_layer(const maxpool_layer l, network_state state);
24 | 
25 | #endif /* SRC_MAXPOOL_LAYER_H_ */
26 | 


--------------------------------------------------------------------------------
/tree.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Tree header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_TREE_H_
 7 | #define SRC_TREE_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | #include "typedef_tree.h"
13 | #include "utilities.h"
14 | #include "data.h"
15 | 
16 | // update prediction tree
17 | int hierarchy_top_prediction(float *predictions, tree *hier, float thresh);
18 | // build tree hierarchy
19 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves);
20 | // get probabilities
21 | float get_hierarchy_probability(float *x, tree *hier, int c);
22 | // read values
23 | tree *read_tree(char *filaname);
24 | 
25 | #endif /* SRC_TREE_H_ */
26 | 


--------------------------------------------------------------------------------
/yolo_detector_test.cpp:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // testbench.cpp
 3 | //========================================================================
 4 | // @brief: testbench for yolo detector
 5 | 
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | 
 9 | #include "yolo_detector.h"
10 | 
11 | int main (int argc, char **argv)
12 | {
13 | /*
14 | 	// transfer value
15 | 	// argv[0]: yolo_detector_test
16 | 	// argv[1]: detect
17 | 	// argv[2]: cfg/yolo.cfg
18 | 	// argv[3]: yolo.weights
19 | 	// argv[4]: data/dog.jpg
20 | */
21 | 	if (strcmp(argv[1],"detect") == 0)
22 | 	{
23 | 		float thresh = 0.24;
24 | 		char  *filename = (argc > 4) ? argv[4] : 0;
25 | 		detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5);
26 | 	}
27 | 	else
28 | 	{
29 | 		printf("Invalid input, program stop...");
30 | 	}
31 | 	return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/list.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // List header file
 3 | //========================================================================
 4 | // @brief: function prototype & activate type definition
 5 | 
 6 | #ifndef SRC_LIST_H_
 7 | #define SRC_LIST_H_
 8 | 
 9 | #include <stdlib.h>
10 | #include <stdio.h>
11 | 
12 | // linked list
13 | typedef struct node
14 | {
15 | 	void *val;
16 | 	struct node *next;
17 | 	struct node *prev;
18 | } node;
19 | 
20 | typedef struct list
21 | {
22 | 	int size;
23 | 	node *front;
24 | 	node *back;
25 | } list;
26 | 
27 | // make a new list
28 | list *make_list();
29 | // insert a node to l->back
30 | void list_insert(list *l, void *val);
31 | // convert the list to a 2D array (array of pointer)
32 | void **list_to_array(list *l);
33 | // free the space allocated for the list
34 | void free_list(list *l);
35 | // free the node
36 | void free_node(node *n);
37 | 
38 | #endif /* SRC_LIST_H_ */
39 | 


--------------------------------------------------------------------------------
/softmax_layer.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Softmax layer header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_SOFTMAX_LAYER_H_
 7 | #define SRC_SOFTMAX_LAYER_H_
 8 | 
 9 | #include <stdlib.h>
10 | #include <stdio.h>
11 | #include <math.h>
12 | #include <assert.h>
13 | #include <float.h>
14 | 
15 | #include "blas.h"
16 | #include "layer.h"
17 | #include "network.h"
18 | #include "inits.h"
19 | 
20 | // redefine layer
21 | typedef layer softmax_layer;
22 | 
23 | // update softmax tree
24 | void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output);
25 | // make softmax layer
26 | softmax_layer make_softmax_layer(int batch, int inputs, int groups);
27 | // softmx layer top function
28 | void forward_softmax_layer(const softmax_layer l, network_state state);
29 | // backward softmax function
30 | void backward_softmax_layer(const softmax_layer l, network_state state);
31 | 
32 | 
33 | #endif /* SRC_SOFTMAX_LAYER_H_ */
34 | 


--------------------------------------------------------------------------------
/connected_layer.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Connected layer header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_CONNECTED_LAYER_H_
 7 | #define SRC_CONNECTED_LAYER_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <string.h>
12 | #include <math.h>
13 | 
14 | #include "batchnorm_layer.h"
15 | #include "utilities.h"
16 | #include "blas.h"
17 | #include "gemm.h"
18 | #include "activations.h"
19 | #include "layer.h"
20 | #include "network.h"
21 | #include "inits.h"
22 | 
23 | // redefine layer
24 | typedef layer connected_layer;
25 | 
26 | connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize);
27 | void forward_connected_layer(connected_layer layer, network_state state);
28 | void backward_connected_layer(connected_layer layer, network_state state);
29 | void update_connected_layer(connected_layer l, int batch, float learning_rate, float momentum, float decay);
30 | void denormalize_connected_layer(layer l);
31 | void statistics_connected_layer(layer l);
32 | 
33 | #endif /* SRC_CONNECTED_LAYER_H_ */
34 | 


--------------------------------------------------------------------------------
/batchnorm_layer.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Batchnorm header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_BATCHNORM_LAYER_H_
 7 | #define SRC_BATCHNORM_LAYER_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | #include "blas.h"
13 | #include "layer.h"
14 | #include "image.h"
15 | #include "network.h"
16 | #include "convolutional_layer.h"
17 | 
18 | // batchnorm layer top function
19 | void forward_batchnorm_layer(layer l, network_state state);
20 | // scale calculation for backward propagation
21 | void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
22 | // mean calculation
23 | void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
24 | // variance calculation
25 | void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
26 | // barchnorm with delta
27 | void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);
28 | 
29 | #endif /* SRC_BATCHNORM_LAYER_H_ */
30 | 


--------------------------------------------------------------------------------
/col2im.cpp:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Col2im header file
 3 | //========================================================================
 4 | // @brief: post-processing image data 
 5 | 
 6 | // column to image: filters%batch == 0
 7 | void col2img(float *c_col,float *c, int m, int n, int count, int batch)
 8 | {
 9 |     for(int k = 0; k < count; k++)
10 |     {
11 |         for(int i = 0; i < batch; i++)
12 |         {
13 |             for(int j = 0; j < n; j++)
14 |             {
15 |                 c[j+k*n*batch+i*n] = c_col[k*n*batch+i+j*batch];
16 |             }
17 |         }
18 |     }
19 | }
20 | 
21 | // column to image: filters%batch == 0
22 | void col2img_extra(float *c_col,float *c, int m, int n, int count, int batch)
23 | {
24 |     for(int k = 0; k < count-1; k++)
25 |     {
26 |         for(int i = 0; i < batch; i++)
27 |         {
28 |             for(int j = 0; j < n; j++)
29 |             {
30 |                 c[i*n+j+k*n*batch] = c_col[i+j*batch+k*n*batch];
31 |             }
32 |         }
33 |     }
34 | 
35 |         for(int i = 0; i < m%batch; i++)
36 |         {
37 |             for(int j = 0; j < n; j++)
38 |             {
39 |                 c[i*n+j+(m/batch)*n*batch] = c_col[i+j*batch+(m/batch)*n*batch];
40 |             }
41 |         }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Yolo Detector Tutorial
 2 | 
 3 | This tutorial goes over how to write, build and run the software (C/C++) and Hardware (SDSoC) yolo detector Application.
 4 | The design is revised after previous YOLO designed, with systolic array structure implemented, though the performance is not satisfying currently. It can be improved by rearranging buffers on-chip. It is strongly suggested to go over the previous design referred by the current one, before any change is committed.
 5 | 
 6 | ## Designing an application
 7 | 
 8 | This section is a general overview of how to write an application.
 9 | 
10 | ### Main
11 | [main](yolo_detector_test.cpp) interfaces with the top-level [gemm2] to be instantiated on the FPGA.
12 | 
13 | ## Software
14 | 
15 | The software emulation runs the hardware [gemm2] on the host CPU. This is useful 
16 | for functional verification. The design is complied using gcc/g++ and uses a
17 | pure software flow.
18 | 
19 | 
20 | ## Hardware
21 | 
22 | The hardware design can be built by SDSoC. First of all, SDSoC will call Vivado HLS to synthesize the hardware [gemm2] into RTL.
23 | Then SDSoC will create datamover and wrap up the whole design. This design currently runs well on SDSoC (Vivado) 2017.1.
24 | For more details of using SDSoC, please refer to UG1028: SDSoC Environment User Guide https://forums.xilinx.com/xlnx/attachments/xlnx/sdsoc/23/2/ug1028-sdsoc-getting-started.pdf
25 | 
26 | 


--------------------------------------------------------------------------------
/data.cpp:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Data 
 3 | //========================================================================
 4 | // @brief: loading data function
 5 | 
 6 | #include "data.h"
 7 | 
 8 | pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
 9 | 
10 | // get 80 labels(classes) and store them into 2D array
11 | char **get_labels(char *filename)
12 | {
13 |     // get label list
14 |     list *plist = get_paths(filename);
15 | 
16 |     /*
17 |     // verify plist
18 |     node * pnode = plist->front;
19 |     int counter = 0;
20 |     printf("name_list size: %d;\n",plist->size);
21 |     while(pnode->next)
22 |     {
23 |         pnode = pnode->next;
24 |         printf("name_list NO. %d: %s; \n",counter, (char *)pnode->val);
25 |         counter++;
26 |     }
27 |     */
28 | 
29 | 
30 |     char **labels = (char **)list_to_array(plist); //???
31 |     free_list(plist);
32 |     return labels;
33 | }
34 | 
35 | // read each line from a file, return a list
36 | list *get_paths(char *filename)
37 | {
38 |     char *line;
39 |     FILE *file = fopen(filename, "r");
40 |     if(!file)
41 |     {
42 |         file_error(filename);
43 |     }
44 |     // make a new list
45 |     list *lines = make_list();
46 |     // store every line (classes) into the list
47 |     while((line=fgetl(file)))
48 |     {
49 |         list_insert(lines, line);
50 |     }
51 |     fclose(file);
52 |     return lines;
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/option_list.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Option list header file
 3 | //========================================================================
 4 | // @brief: function prototype & type definition
 5 | 
 6 | #ifndef SRC_OPTION_LIST_H_
 7 | #define SRC_OPTION_LIST_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <string.h>
12 | 
13 | #include "utilities.h"
14 | #include "list.h"
15 | 
16 | // key+value+number
17 | typedef struct
18 | {
19 | 	char *key;
20 | 	char *val;
21 | 	int used;
22 | } kvp;
23 | 
24 | // function prototype
25 | // read cfg data, and build a list
26 | list *read_data_cfg(char *filename);
27 | // change "=" to "\n", and insert it into a list option
28 | // val stores the address of string of value
29 | int read_option(char *s, list *options);
30 | // insert value(*val) into list option
31 | void option_insert(list *l, char *key, char *val);
32 | // find specific key in list l
33 | char *option_find(list *l, char *key);
34 | // find specific strings
35 | char *option_find_str(list *l, char *key, char *def);
36 | // find specific ints
37 | int option_find_int_quiet(list *l, char *key, int def);
38 | int option_find_int(list *l, char *key, int def);
39 | // find specific flaots
40 | float option_find_float(list *l, char *key, float def);
41 | float option_find_float_quiet(list *l, char *key, float def);
42 | // find unused items
43 | void option_unused(list *l);
44 | 
45 | #endif /* SRC_OPTION_LIST_H_ */
46 | 


--------------------------------------------------------------------------------
/region_layer.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Region layer header file
 3 | //========================================================================
 4 | // @brief: function prototype & activate type definition
 5 | 
 6 | #ifndef SRC_REGION_LAYER_H_
 7 | #define SRC_REGION_LAYER_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <string.h>
12 | #include <assert.h>
13 | 
14 | #include "layer.h"
15 | #include "network.h"
16 | #include "box.h"
17 | #include "utilities.h"
18 | #include "blas.h"
19 | #include "activations.h"
20 | #include "region_layer.h"
21 | #include "inits.h"
22 | 
23 | // make region layer
24 | layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
25 | // get bounding boxes
26 | void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh);
27 | // get bounding box (single)
28 | box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h);
29 | // region layer top function
30 | void forward_region_layer(const layer l, network_state state);
31 | // extra region classes
32 | void delta_region_class(float *output, float *delta, int index, int class_s, int classes, tree *hier, float scale, float *avg_cat);
33 | // extra region boxes
34 | float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale);
35 | 
36 | #endif /* SRC_REGION_LAYER_H_ */
37 | 


--------------------------------------------------------------------------------
/blas.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Blas header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_BLAS_H_
 7 | #define SRC_BLAS_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <string.h>
12 | #include <assert.h>
13 | #include <float.h>
14 | #include <math.h>
15 | 
16 | // multiply some values in *X with ALPHA
17 | void scal_cpu(int N, float ALPHA, float *X, int INCX);
18 | // assign some values in *X with ALPHA
19 | void fill_cpu(int N, float ALPHA, float *X, int INCX);
20 | // mean calculation
21 | void mean_cpu(float *x, int batch, int filters, int spatical, float *mean);
22 | // variance calculation
23 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatical,float *variance);
24 | // multiply some values in *X with ALPHA
25 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
26 | // array copy
27 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY);
28 | // normalization with mean and variance
29 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
30 | // scale an array
31 | void scale_cpu(int N, float ALPHA, float *X, int INCX);
32 | // flatten layer
33 | void flatten(float *x, int size, int layers, int batch, int forward);
34 | // softmax layer
35 | void softmax(float *input, int n, float temp, float *output);
36 | 
37 | #endif /* SRC_BLAS_H_ */
38 | 


--------------------------------------------------------------------------------
/box.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Box header file
 3 | //========================================================================
 4 | // @brief: function prototype & special type definition
 5 | 
 6 | #ifndef SRC_BOX_H_
 7 | #define SRC_BOX_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <math.h>
12 | 
13 | // bounding box
14 | typedef struct box
15 | {
16 | 	float x;
17 | 	float y;
18 | 	float w;
19 | 	float h;
20 | } box;
21 | 
22 | // distance of bounding boxes
23 | typedef struct dbox
24 | {
25 | 	float dx;
26 | 	float dy;
27 | 	float dw;
28 | 	float dh;
29 | } dbox;
30 | 
31 | // box for sort
32 | typedef struct sortable_box
33 | {
34 | 	int index;
35 | 	int classes;
36 | 	float **probs;
37 | }sortable_box;
38 | 
39 | // sort boxes
40 | void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh);
41 | // compare function for qsort
42 | int nms_comparator(const void *pa, const void *pb);
43 | // intersection/union
44 | float box_iou(box a, box b);
45 | // overlap area
46 | float box_intersection(box a, box b);
47 | // overlap length (width, height, etc.)
48 | // x1, x2 midpoint of the boxes
49 | float overlap(float x1, float w1, float x2, float w2);
50 | // union area = total - intersection
51 | float box_union(box a, box b);
52 | // select boxes contains a confidence larger than the threshhold
53 | void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh);
54 | // transfer float type to stuct box
55 | box float_to_box(float *f);
56 | 
57 | #endif /* SRC_BOX_H_ */
58 | 


--------------------------------------------------------------------------------
/utilities.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Utilities header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_UTILITIES_H_
 7 | #define SRC_UTILITIES_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <string.h>
12 | #include <math.h>
13 | #include <assert.h>
14 | #include <unistd.h>
15 | #include <float.h>
16 | #include <limits.h>
17 | 
18 | #include "list.h"
19 | 
20 | #define SECRET_NUM -1234
21 | #define TWO_PI 6.2831853071795864769252866
22 | 
23 | // read files
24 | int *read_map(char *filename);
25 | // report file open error
26 | void file_error(char *s);
27 | // remove space, tab, and enter in a string
28 | void strip(char *s);
29 | // get one line from file
30 | char *fgetl(FILE *fp);
31 | // report malloc error
32 | void malloc_error();
33 | // report specific error
34 | void error(const char *s);
35 | // free 2D array (array of pointer)
36 | void free_ptrs(void **ptrs, int n);
37 | // find the maximum value in an array, return its index
38 | int max_index(float *a, int n);
39 | // return a random number in the given range(min, max)
40 | float rand_uniform(float min, float max);
41 | // print function
42 | void print_statistics(float *a, int n);
43 | // mean value of array a
44 | float mean_array(float *a, int n);
45 | // sum of array a
46 | float sum_array(float *a, int n);
47 | // variance of array a
48 | float variance_array(float *a, int n);
49 | // mean squared error of array a
50 | float mse_array(float *a, int n);
51 | // difference of two squares
52 | float mag_array(float *a, int n);
53 | 
54 | #endif /* SRC_UTILITIES_H_ */
55 | 


--------------------------------------------------------------------------------
/convolutional_layer.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Convolutional layer header file
 3 | //========================================================================
 4 | // @brief: function prototype definition
 5 | 
 6 | #ifndef SRC_CONVOLUTIONAL_LAYER_H_
 7 | #define SRC_CONVOLUTIONAL_LAYER_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include "sds_lib.h"
12 | 
13 | #include "image.h"
14 | #include "activations.h"
15 | #include "layer.h"
16 | #include "network.h"
17 | #include "utilities.h"
18 | #include "batchnorm_layer.h"
19 | #include "im2col.h"
20 | #include "col2im.h"
21 | #include "blas.h"
22 | #include "gemm.h"
23 | #include "inits.h"
24 | #include "timer.h"
25 | 
26 | // redefine struct layer
27 | typedef layer convolutional_layer;
28 | 
29 | // build and configure convolutional layer
30 | convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride,\
31 | 		int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam);
32 | // calculate workspace size for memory allocation
33 | size_t get_workspace_size(layer l);
34 | // convolutional layer top function
35 | void forward_convolutional_layer(const convolutional_layer layer, network_state state);
36 | // calculate output height
37 | int convolutional_out_height(convolutional_layer l);
38 | // calculate output_weight
39 | int convolutional_out_width(convolutional_layer l);
40 | // add bias to output values
41 | void add_bias(float *output, float *biases, int batch, int n, int size);
42 | // scale bias
43 | void scale_bias(float *output, float *scales, int batch, int n, int size);
44 | // swap values
45 | void swap_binary(convolutional_layer *l);
46 | 
47 | #endif /* SRC_CONVOLUTIONAL_LAYER_H_ */
48 | 


--------------------------------------------------------------------------------
/list.cpp:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // List
 3 | //========================================================================
 4 | // @brief: linked list for reading parameters
 5 | 
 6 | #include "list.h"
 7 | 
 8 | // make an empty list, return a pointer
 9 | list *make_list()
10 | {
11 |     list *l  = (list *)malloc(sizeof(list));
12 |     l->size  = 0;
13 |     l->front = 0;
14 |     l->back  = 0;
15 | 
16 |     return l;
17 | }
18 | 
19 | // insert a new node into list *l with "value":*val
20 | void list_insert(list *l, void *val)
21 | {
22 |     node *new_node = (node *)malloc(sizeof(node));
23 |     new_node->val  = val;
24 |     new_node->next = 0;
25 |     // add new node to l->back
26 |     if(!l->back)
27 |     {   // empty list
28 |         l->front = new_node;
29 |         new_node->prev = 0;
30 |     }
31 |     else
32 |     {
33 |         l->back->next = new_node;
34 |         new_node->prev = l->back;
35 |     }
36 |     l->back = new_node;
37 |     l->size++; ////
38 | }
39 | 
40 | // convert a list to 2D array (***array of pointer***)
41 | void **list_to_array(list *l)
42 | {
43 |     void **res = (void **)calloc(l->size, sizeof(void *));
44 |     int counter = 0;
45 |     node *n = l->front; // first node in list l
46 |     // convert the list
47 |     while(n)
48 |     {
49 |         res[counter++] = n->val; //
50 |         n = n->next;
51 |     }
52 |     return res;
53 | }
54 | 
55 | // free memory allocated for the list
56 | void free_list(list *l)
57 | {
58 |     free_node(l->front); // first node
59 |     free(l);
60 | }
61 | 
62 | // free node
63 | void free_node(node *n)
64 | {
65 |     node *next;
66 |     // free all nodes
67 |     while(n)
68 |     {
69 |         next = n->next;
70 |         free(n);
71 |         n = next;
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/gemm.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Gemm header file
 3 | //========================================================================
 4 | // @brief: function prototype & macro definition
 5 | 
 6 | #ifndef SRC_GEMM_H_
 7 | #define SRC_GEMM_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | #include "typedefs.h"
13 | 
14 | // data size
15 | #define SIZE_BATCH 16
16 | #define MAX_A 3*3*1024*SIZE_BATCH
17 | //#define MAX_A 1024*425
18 | #define MAX_B 210*210*16
19 | #define MAX_C 416*416*16
20 | // finter size
21 | #define SIZE_FILTER 3*3
22 | #define SIZE_FILTER_EXTRA 1*1
23 | #define MAX_FILTER_DEPTH 1024
24 | // line buffer size
25 | #define NUM_LINE_BUFFER 3
26 | #define SIZE_LINE_BUFFER 15*1024
27 | #define NUM_LINE_BUFFER_EXTRA 1
28 | // window buffer size
29 | #define NUM_WINDOW_BUFFER 3*3
30 | #define SIZE_WINDOW_BUFFER 1024
31 | #define NUM_WINDOW_BUFFER_EXTRA 1
32 | // systolic kernel size
33 | #define SystolicKernelSize 13 //greatest number the zc706 FPGA can hold: 13
34 | // data access pattern
35 | #pragma SDS data mem_attribute(A:PHYSICAL_CONTIGUOUS)
36 | #pragma SDS data mem_attribute(B:PHYSICAL_CONTIGUOUS)
37 | #pragma SDS data mem_attribute(C:PHYSICAL_CONTIGUOUS)
38 | #pragma SDS data access_pattern(A:SEQUENTIAL, B:SEQUENTIAL, C:SEQUENTIAL)
39 | #pragma SDS data copy(A[0:size_filter*SIZE_BATCH], B[0:(width+2*pad)*(height+2*pad)*channels], C[0:size_channel*SIZE_BATCH])
40 | // gemm with filter size 3x3
41 | void gemm2(float A[MAX_A], float B[MAX_B], float C[MAX_C],int num_filter, int size_channel,int size_filter,\
42 | 		  int channels, int height, int width, int ksize, int pad);
43 | // extra gemm with filter size 1x1
44 | void gemm_extra2(float A[MAX_A], float B[MAX_B], float C[MAX_C],int size_channel,int size_filter,int ksize,
45 | 		        INPUT_32 weights[SIZE_BATCH][SIZE_FILTER][MAX_FILTER_DEPTH],OUTPUT_64 output[SIZE_BATCH]);
46 | 
47 | #endif /* SRC_GEMM_H_ */
48 | 


--------------------------------------------------------------------------------
/parser.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Parser header file
 3 | //========================================================================
 4 | // @brief: function prototype & type definition
 5 | 
 6 | #ifndef SRC_PARSER_H_
 7 | #define SRC_PARSER_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <string.h>
12 | #include <assert.h>
13 | #include "sds_lib.h"
14 | 
15 | #include "network.h"
16 | #include "activations.h"
17 | #include "batchnorm_layer.h"
18 | #include "blas.h"
19 | #include "connected_layer.h"
20 | #include "convolutional_layer.h"
21 | #include "list.h"
22 | #include "maxpool_layer.h"
23 | #include "option_list.h"
24 | #include "parser.h"
25 | #include "region_layer.h"
26 | #include "utilities.h"
27 | #include "inits.h"
28 | 
29 | // parameters 
30 | typedef struct size_params{
31 |     int batch;
32 |     int inputs;
33 |     int h;
34 |     int w;
35 |     int c;
36 |     int index;
37 |     int time_steps;
38 |     network net;
39 | } size_params;
40 | 
41 | typedef struct section{
42 |     char *type;
43 |     list *options;
44 | } section;
45 | 
46 | // parser differnet layers
47 | maxpool_layer parse_maxpool(list *options, size_params params);
48 | void transpose_matrix(float *a, int rows, int cols);
49 | layer parse_region(list *options, size_params params);
50 | convolutional_layer parse_convolutional(list *options, size_params params);
51 | LAYER_TYPE string_to_layer_type(char * type);
52 | learning_rate_policy get_policy(char *s);
53 | void parse_net_options(list *options, network *net);
54 | void free_section(section *s);
55 | network parse_network_cfg(char *filename);
56 | // read data from file
57 | list *read_cfg(char *filename);
58 | // load weights for different layers
59 | void load_convolutional_weights(layer l, FILE *fp);
60 | void load_batchnorm_weights(layer l, FILE *fp);
61 | void load_connected_weights(layer l, FILE *fp, int transpose);
62 | void load_weights_upto(network *net, char *filename, int cutoff);
63 | // load weights top function
64 | void load_weights(network *net, char *filename);
65 | 
66 | #endif /* SRC_PARSER_H_ */
67 | 


--------------------------------------------------------------------------------
/softmax_layer.cpp:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Softmax layer
 3 | //========================================================================
 4 | // @brief: softmax layer
 5 | 
 6 | #include "softmax_layer.h"
 7 | 
 8 | // update softmax tree
 9 | void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output)
10 | {
11 |     //
12 |     for(int b = 0; b < batch; ++b)
13 |     {
14 |         int count = 0;
15 |         for(int i = 0; i < hierarchy->groups; i++)
16 |         {
17 |             int group_size = hierarchy->group_size[i];
18 |             softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count);
19 |             count += group_size;
20 |         }
21 |     }
22 | }
23 | 
24 | // make softmax layer
25 | softmax_layer make_softmax_layer(int batch, int inputs, int groups)
26 | {
27 |     assert(inputs%groups == 0);
28 |     fprintf(stderr, "softmax                                        %4d\n",  inputs);
29 | 
30 |     softmax_layer l;
31 |     init_layer(l);
32 | 
33 |     l.type = SOFTMAX;
34 |     l.batch = batch;
35 |     l.groups = groups;
36 |     l.inputs = inputs;
37 |     l.outputs = inputs;
38 |     l.output = (float *)calloc(inputs*batch, sizeof(float));
39 |     l.delta = (float *)calloc(inputs*batch, sizeof(float));
40 | 
41 |     l.forward = forward_softmax_layer;
42 |     l.backward = backward_softmax_layer;
43 | 
44 |     return l;
45 | }
46 | 
47 | // softmx layer top function
48 | void forward_softmax_layer(const softmax_layer l, network_state state)
49 | {
50 |     int inputs = l.inputs / l.groups;
51 |     int batch = l.batch * l.groups;
52 |     if(l.softmax_tree)
53 |     {
54 |         softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output);
55 |     }
56 |     else
57 |     {
58 |         for(int b = 0; b < batch; b++)
59 |         {
60 |             softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs);
61 |         }
62 |     }
63 | }
64 | 
65 | // backward softmax function
66 | void backward_softmax_layer(const softmax_layer l, network_state state)
67 | {
68 |     for(int i = 0; i < l.inputs*l.batch; i++)
69 |     {
70 |         state.delta[i] += l.delta[i];
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/im2col.cpp:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Im2col 
 3 | //========================================================================
 4 | // @brief: pre-processing image data
 5 | 
 6 | #include "gemm.h"
 7 | #include "im2col.h"
 8 | 
 9 | // image to column : filters%batch == 0
10 | void im2col(float *data_im,int channels, int height, int width, int ksize,  int stride, int pad, float* data_col)
11 | {
12 |     int c,h,w;
13 |     int height_col = height + 2*pad;
14 |     int width_col = width + 2*pad;
15 |     float temp;
16 |     int step;
17 |     for(c = 0; c < channels; c++)
18 |     {
19 |     	for(h = 0; h < height_col; h++)
20 |     	{
21 |     		for(w = 0; w < width_col; w++)
22 |     		{
23 | 
24 | 		        //for those width mod SystolicKernelSize != 0 and comes to last few points in each column
25 |     			if( ( w - ( w - 2 ) % SystolicKernelSize + SystolicKernelSize ) > width_col )
26 |     				step = ( width_col - 2 ) % SystolicKernelSize;
27 |     			else
28 |     				step = SystolicKernelSize;
29 |                 //pad
30 |     			if((w == 0) || (h == 0) || (w == width_col-1) || (h == height_col-1))
31 |     				temp = 0;
32 |     			//read data
33 | 				else
34 |     				temp = data_im[c*width*height+(h-1)*width+(w-1)];
35 |                 
36 | 				//first two channel are directly read into buffer, thus it is transferred directly
37 |     			if( w == 0 || w == 1 )
38 |     			{
39 |     				data_col[h * width_col * channels + w * channels + c] = temp;
40 |     			}
41 | 				//deal with data other than first two channels: pls refer to report
42 |     			else
43 |     			    data_col[h * width_col * channels + ( w - ( w - 2 ) % SystolicKernelSize ) * channels + step * c + ( w - 2 ) % SystolicKernelSize ] = temp;
44 |     		}
45 |     	}
46 |     }
47 | }
48 | 
49 | // image to column : filters%batch != 0
50 | void im2col_extra(float *data_im,int channels, int height, int width, int ksize,  int stride, int pad, float* data_col)
51 | {
52 |     int c,h,w;
53 |     float temp;
54 |     for(w = 0; w < width; w++)
55 |     {
56 |         for(h = 0; h < height; h++)
57 |         {
58 |             for(c = 0; c < channels; c++)
59 |             {
60 |                 int index_col = (w+h*width)*channels+c;
61 |                 int index_im = c*width*height+h*width+w;
62 |                 data_col[index_col] = data_im[index_im];
63 |             }
64 |         }
65 |     }
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/data.h:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Data header file
  3 | //========================================================================
  4 | // @brief: function prototype & struct type defination
  5 | 
  6 | #ifndef SRC_DATA_H_
  7 | #define SRC_DATA_H_
  8 | 
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <string.h>
 12 | #include <pthread.h>
 13 | 
 14 | #include "typedef_tree.h"
 15 | #include "tree.h"
 16 | #include "matrix.h"
 17 | #include "list.h"
 18 | #include "image.h"
 19 | #include "utilities.h"
 20 | 
 21 | // data struct
 22 | typedef struct data
 23 | {
 24 | 	int w;
 25 | 	int h;
 26 | 	matrix X;
 27 | 	matrix Y;
 28 | 	int shallow;
 29 | 	int *num_boxes;
 30 | 	box **boxes;
 31 | } data;
 32 | 
 33 | // data type
 34 | typedef enum
 35 | {
 36 | 	CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA,\
 37 | 	IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA,\
 38 | 	OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA
 39 | } data_type;
 40 | 
 41 | // loading struct
 42 | typedef struct load_args
 43 | {
 44 | 	int threads;
 45 | 	char **paths;
 46 | 	char *path;
 47 | 	int n;
 48 | 	int m;
 49 | 	char **labels;
 50 | 	int h;
 51 | 	int w;
 52 | 	int out_w;
 53 | 	int out_h;
 54 | 	int nh;
 55 | 	int nw;
 56 | 	int num_boxes;
 57 | 	int min;
 58 | 	int max;
 59 | 	int size;
 60 | 	int classes;
 61 | 	int background;
 62 | 	int scale;
 63 | 	float jitter;
 64 | 	float angle;
 65 | 	float aspect;
 66 | 	float saturation;
 67 | 	float exposure;
 68 | 	float hue;
 69 | 	data *d;
 70 | 	image *im;
 71 | 	image *resized;
 72 | 	data_type type;
 73 | 	tree *hierarchy;
 74 | } load_args;
 75 | 
 76 | // loading box labels
 77 | typedef struct box_label
 78 | {
 79 | 	int id;
 80 | 	float x;
 81 | 	float y;
 82 | 	float w;
 83 | 	float h;
 84 | 	float left;
 85 | 	float right;
 86 | 	float top;
 87 | 	float bottom;
 88 | } box_label;
 89 | 
 90 | // get 80 labels(classes) and store them into 2D array
 91 | char **get_labels(char *filename);
 92 | // read each line from a file, return a list
 93 | list *get_paths(char *filename);
 94 | 
 95 | 
 96 | // static inline function
 97 | // the compiler simply copy codes when it is invoked
 98 | static inline float distance_from_edge (int x, int max)
 99 | {
100 | 	int dx = (max/2) - x;
101 | 	if (dx < 0)
102 | 	{
103 | 		dx = -dx;
104 | 	}
105 | 	dx = (max/2) + 1 -dx;
106 | 	dx *= 2;
107 | 	float dis = (float)dx/(float)max;
108 | 	if(dis > 1)
109 | 	{
110 | 		dis = 1;
111 | 	}
112 | 	return dis;
113 | }
114 | 
115 | #endif /* SRC_DATA_H_ */
116 | 


--------------------------------------------------------------------------------
/network.h:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Network header file
  3 | //========================================================================
  4 | // @brief: function prototype & type definition
  5 | 
  6 | #ifndef SRC_NETWORK_H_
  7 | #define SRC_NETWORK_H_
  8 | 
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <assert.h>
 12 | 
 13 | #include "image.h"
 14 | #include "data.h"
 15 | #include "utilities.h"
 16 | #include "blas.h"
 17 | #include "tree.h"
 18 | 
 19 | #include "layer.h"
 20 | #include "connected_layer.h"
 21 | #include "convolutional_layer.h"
 22 | #include "region_layer.h"
 23 | #include "batchnorm_layer.h"
 24 | #include "maxpool_layer.h"
 25 | #include "softmax_layer.h"
 26 | 
 27 | // learning rate policy
 28 | typedef enum
 29 | {
 30 | 	CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
 31 | } learning_rate_policy;
 32 | 
 33 | // struct network
 34 | typedef struct network
 35 | {
 36 | 	float *workspace;       
 37 | 	int n;
 38 | 	int batch;
 39 | 	int *seen;
 40 | 	float epoch;
 41 | 	int subdivisions;
 42 | 	float momentum;
 43 | 	float decay;
 44 | 	layer *layers;	        // layers 
 45 | 	int outputs;			// output sizes
 46 | 	float *output; 			// output values 
 47 | 	learning_rate_policy policy;
 48 | 	//
 49 | 	float learning_rate;
 50 | 	float gamma;
 51 | 	float scale;
 52 | 	float power;
 53 | 	int time_steps;
 54 | 	int step;
 55 | 	int max_batches;
 56 | 	float *scales;
 57 | 	int *steps;
 58 | 	int num_steps;
 59 | 	int burn_in;
 60 | 	//
 61 | 	int adam;
 62 | 	float B1;
 63 | 	float B2;
 64 | 	float eps;
 65 | 	//
 66 | 	int inputs;
 67 | 	int h;
 68 | 	int w;
 69 | 	int c;
 70 | 	int max_crop;
 71 | 	int min_crop;
 72 | 	float angle;
 73 | 	float aspect;
 74 | 	float exposure;
 75 | 	float saturation;
 76 | 	float hue;
 77 | 	//
 78 | 	int gpu_index;
 79 | 	tree *hierarchy;
 80 | } network;
 81 | 
 82 | // network state
 83 | typedef struct network_state
 84 | {
 85 | 	float *truth;
 86 | 	float *input;
 87 | 	float *delta;
 88 | 	float *workspace;
 89 | 	int train;
 90 | 	int index;
 91 | 	network net;
 92 | } network_state;
 93 | 
 94 | // make a new && empty network with n layers
 95 | network make_network(int n);
 96 | // calculate the size of network
 97 | int get_network_output_size(network net);
 98 | // get network netowrk
 99 | float *get_network_output(network net);
100 | // set batch mode
101 | void set_batch_network(network *net, int b);
102 | // top prediction function
103 | float *network_predict(network net, float *input);
104 | // go through all network layers ************************
105 | void forward_network(network net, network_state state);
106 | 
107 | #endif /* SRC_NETWORK_H_ */
108 | 


--------------------------------------------------------------------------------
/network.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Network
  3 | //========================================================================
  4 | // @brief: network layer
  5 | 
  6 | #include "network.h"
  7 | 
  8 | // make a new && empty network with n layers
  9 | network make_network(int n)
 10 | {
 11 |     network net = {0};
 12 |     net.n = n;
 13 |     net.layers = (layer *)calloc(net.n, sizeof(layer));
 14 |     net.seen = (int *)calloc(1, sizeof(int));  // what's net.seen? 1 integer
 15 | 
 16 |     return net;
 17 | }
 18 | 
 19 | // get output size from the layer with type COST
 20 | int get_network_output_size(network net)
 21 | {
 22 |     int i;
 23 |     for (i = net.n - 1; i > 0; i--)
 24 |     {
 25 |         if (net.layers[i].type != COST)
 26 |         {
 27 |             break;
 28 |         }
 29 |     }
 30 |     return net.layers[i].outputs;
 31 | }
 32 | 
 33 | // get output from the layer with type COST
 34 | float *get_network_output(network net)
 35 | {
 36 |     int i;
 37 |     for (i = net.n - 1; i > 0; i--)
 38 |     {
 39 |         if (net.layers[i].type != COST)
 40 |         {
 41 |             break;
 42 |         }
 43 |     }
 44 |     return net.layers[i].output;
 45 | }
 46 | 
 47 | // set batch size for each layer in the network
 48 | void set_batch_network(network *net, int b)
 49 | {
 50 |     net->batch = b;
 51 |     for (int i = 0; i < net->n; i++)
 52 |     {
 53 |         net->layers[i].batch = b;
 54 |     }
 55 | }
 56 | 
 57 | // top forward function, return final output ???
 58 | float *network_predict(network net, float *input)
 59 | {
 60 |     printf("network_predict.\n");
 61 |     network_state state;
 62 |     state.net   = net;
 63 |     state.index = 0;
 64 |     state.input = input;
 65 |     state.truth = 0;
 66 |     state.train = 0;
 67 |     state.delta = 0;
 68 |     //
 69 |     forward_network(net, state);
 70 |     float *out = get_network_output(net);
 71 | 
 72 |     return out;
 73 | }
 74 | 
 75 | // go through all network layers ************************
 76 | void forward_network(network net, network_state state)
 77 | {
 78 |     state.workspace = net.workspace;
 79 |     for (int i = 0; i < net.n; i++)
 80 |     {
 81 |         //printf("predicting: layer NO. %d.\n",i);
 82 |         //Timer timer("the whole layer");
 83 |         //timer.start();
 84 |         state.index = i;
 85 |         layer l = net.layers[i];
 86 |         // delta = 0
 87 |         if (l.delta)
 88 |         {
 89 |             //Timer timer2("scale_cpu");
 90 |             //timer2.start();
 91 |             //printf("l.delta\n");
 92 |             scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
 93 |             //printf("ch1\n");
 94 |             //timer2.stop();
 95 |         }
 96 |         //printf("ch2\n");
 97 |         l.forward(l, state);
 98 |         state.input = l.output;
 99 |         //timer.stop();
100 |     }
101 | }
102 | 
103 | 


--------------------------------------------------------------------------------
/batchnorm_layer.cpp:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Batchnorm
 3 | //========================================================================
 4 | // @brief: batchnorm layer
 5 | 
 6 | #include "batchnorm_layer.h"
 7 | 
 8 | // batchnorm layer top function
 9 | void forward_batchnorm_layer(layer l, network_state state)
10 | {
11 |     normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
12 | 
13 |     scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
14 | }
15 | 
16 | // scale calculation for backward propagation
17 | void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
18 | {
19 |     for(int f = 0; f < n; ++f)
20 |     {
21 |         float sum = 0;
22 |         for(int b = 0; b < batch; ++b)
23 |         {
24 |             for(int i = 0; i < size; ++i)
25 |             {
26 |                 int index = i + size*(f + n*b);
27 |                 sum += delta[index] * x_norm[index];
28 |             }
29 |         }
30 |         scale_updates[f] += sum;
31 |     }
32 | }
33 | 
34 | // mean calculation
35 | void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
36 | {
37 | 	//
38 |     for(int i = 0; i < filters; ++i)
39 |     {
40 |         mean_delta[i] = 0;
41 |         for (int j = 0; j < batch; ++j)
42 |         {
43 |             for (int k = 0; k < spatial; ++k)
44 |             {
45 |                 int index = j*filters*spatial + i*spatial + k;
46 |                 mean_delta[i] += delta[index];
47 |             }
48 |         }
49 |         mean_delta[i] *= (-1./sqrt(variance[i] + .00001f));
50 |     }
51 | }
52 | 
53 | // variance calculation
54 | void  variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
55 | {
56 |     for(int i = 0; i < filters; ++i)
57 |     {
58 |         variance_delta[i] = 0;
59 |         for(int j = 0; j < batch; ++j)
60 |         {
61 |             for(int k = 0; k < spatial; ++k)
62 |             {
63 |                 int index = j*filters*spatial + i*spatial + k;
64 |                 variance_delta[i] += delta[index]*(x[index] - mean[i]);
65 |             }
66 |         }
67 |         variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.));
68 |     }
69 | }
70 | 
71 | // barchnorm with delta
72 | void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
73 | {
74 |     for(int j = 0; j < batch; ++j)
75 |     {
76 |         for(int f = 0; f < filters; ++f)
77 |         {
78 |             for(int k = 0; k < spatial; ++k)
79 |             {
80 |                 int index = j*filters*spatial + f*spatial + k;
81 |                 delta[index] = delta[index] * 1./(sqrt(variance[f]) + .00001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
82 |             }
83 |         }
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/timer.h:
--------------------------------------------------------------------------------
 1 | //---------------------------------------------------------
 2 | // Timer.h
 3 | //---------------------------------------------------------
 4 | #ifndef __TIMER_H__
 5 | #define __TIMER_H__
 6 | #include <time.h>
 7 | #include <sys/time.h>
 8 | #include <string.h>
 9 | #include <stdio.h>
10 | 
11 | #define TIMER_ON
12 | 
13 | //---------------------------------------------------------
14 | // Timer is an object which helps profile programs using
15 | // the clock() function.
16 | // - By default, a timer is stopped when you instantiate it
17 | //   and must be started manually
18 | // - Passing True to the constructor starts the timer when
19 | //   it is constructed
20 | // - When the timer is destructed it prints stats to stdout
21 | //---------------------------------------------------------
22 | class Timer {
23 | 
24 |   #ifdef TIMER_ON
25 | 
26 |     char binName[50];
27 |     unsigned nCalls;
28 |     timeval ts_start;
29 |     float totalTime;
30 |     
31 |     public:
32 |       //------------------------------------------------------------------
33 |       // constructor
34 |       //------------------------------------------------------------------
35 |       Timer (const char* Name="", bool On=false) {
36 |         if (On) {
37 |           // record the start time
38 |           gettimeofday(&ts_start, NULL);
39 |           nCalls = 1;
40 |         }
41 |         else {
42 |           nCalls = 0;
43 |         }
44 |         totalTime = 0;	
45 |         strcpy(binName, Name);
46 |       }
47 | 
48 |       //------------------------------------------------------------------
49 |       // destructor
50 |       //------------------------------------------------------------------
51 |       ~Timer () {
52 |         // on being destroyed, print the average and total time
53 |         if (nCalls > 0) {
54 |           printf ("%-20s: ", binName);
55 |           printf ("%6d calls; ", nCalls);
56 |           printf ("%7.3f msecs total time\n", 1000*totalTime);
57 |           //printf ("%7.4f msecs average time;\n", 1000*totalTime/nCalls);
58 |         }
59 |       }
60 |       
61 |       //------------------------------------------------------------------
62 |       // start timer
63 |       //------------------------------------------------------------------
64 |       void start() {
65 |         // record start time
66 |         gettimeofday(&ts_start, NULL);
67 |         nCalls++;
68 |       }
69 |       
70 |       //------------------------------------------------------------------
71 |       // stop timer
72 |       //------------------------------------------------------------------
73 |       void stop() {
74 |         // get current time, add elapsed time to totalTime
75 |         timeval ts_curr;
76 |         gettimeofday(&ts_curr, NULL);
77 |         totalTime += float(ts_curr.tv_sec - ts_start.tv_sec) +
78 |                      float(ts_curr.tv_usec)*1e-6 - float(ts_start.tv_usec)*1e-6;
79 |       }
80 | 
81 |   #else
82 | 
83 |     //--------------------------------------------------------------------
84 |     // all methods do nothing if TIMER_ON is not set
85 |     //--------------------------------------------------------------------
86 |     public:
87 |       Timer (const char* Name, bool On=true) {}
88 |       void start() {}
89 |       void stop() {}
90 | 
91 |   #endif
92 | };
93 | 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/maxpool_layer.cpp:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Maxpooling
 3 | //========================================================================
 4 | // @brief: maxpooling layer
 5 | 
 6 | #include "maxpool_layer.h"
 7 | 
 8 | // make maxpooling layer
 9 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
10 | {
11 |     maxpool_layer l;
12 |     init_layer(l);
13 | 
14 |     l.type = MAXPOOL;
15 |     l.batch = batch;
16 |     l.h = h;
17 |     l.w = w;
18 |     l.c = c;
19 |     l.pad = padding;
20 |     l.out_w = (w + 2*padding)/stride;
21 |     l.out_h = (h + 2*padding)/stride;
22 |     l.out_c = c;
23 |     l.outputs = l.out_h * l.out_w * l.out_c;
24 |     l.inputs = h*w*c;
25 |     l.size = size;
26 |     l.stride = stride;
27 |     int output_size = l.out_h * l.out_w * l.out_c * batch;
28 |     //printf("\noutput_size: %d;\n",output_size);
29 |     l.indexes = (int *)calloc(output_size, sizeof(int));
30 |     l.output  = (float *)calloc(output_size, sizeof(float));
31 |     l.delta   = (float *)calloc(output_size, sizeof(float));
32 |     l.forward = forward_maxpool_layer;
33 |     //l.backward = backward_maxpool_layer;
34 |     fprintf(stderr, "max          %d x %d / %d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
35 | 
36 |     return l;
37 | }
38 | 
39 | // maxpooling top function
40 | void forward_maxpool_layer(const maxpool_layer l, network_state state)
41 | {
42 |     int w_offset = -l.pad;
43 |     int h_offset = -l.pad;
44 | 
45 |     int h = l.out_h;
46 |     int w = l.out_w;
47 |     int c = l.c;
48 |     //
49 |     for (int b = 0; b < l.batch; b++)
50 |     {
51 |         for (int k = 0; k < c; k++)
52 |         {
53 |             for (int i = 0; i < h; i++)
54 |             {
55 |                 for (int j = 0; j < w; j++)
56 |                 {
57 |                     int out_index = j + w*(i + h*(k + c*b));
58 |                     float max = -FLT_MAX;
59 |                     int max_i = -1;
60 |                     for (int n = 0; n < l.size; n++)
61 |                     {
62 |                         for (int m = 0; m < l.size; m++)
63 |                         {
64 |                             int cur_h = h_offset + i*l.stride + n;
65 |                             int cur_w = w_offset + j*l.stride + m;
66 |                             int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c));
67 |                             int valid = (cur_h >= 0 && cur_h < l.h && cur_w >= 0 && cur_w < l.w);
68 |                             float val = (valid != 0) ? state.input[index] : -FLT_MAX;
69 |                             max_i = (val > max) ? index : max_i;
70 |                             max   = (val > max) ? val   : max;
71 |                         }
72 |                     }
73 |                     l.output[out_index] = max;
74 |                     l.indexes[out_index] = max_i;
75 |                     //printf("l.output[%d]:%f;\n",out_index,l.output[out_index]);
76 |                 }
77 |             }
78 |         }
79 |     }
80 | /*
81 |     //
82 |     for (int x = 900; x < 1000; x++)
83 |     {
84 |         printf("state.input[%d]:%.12f; l.output[%d]:%.12f;\n",x,state.input[x],x,l.output[x]);
85 |     }
86 | */
87 | }
88 | 
89 | 


--------------------------------------------------------------------------------
/image.h:
--------------------------------------------------------------------------------
 1 | //========================================================================
 2 | // Image header file
 3 | //========================================================================
 4 | // @brief: function prototype & struct type definition
 5 | 
 6 | #ifndef SRC_IMAGE_H_
 7 | #define SRC_IMAGE_H_
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <float.h>
12 | #include <string.h>
13 | #include <math.h>
14 | 
15 | #include "box.h"
16 | #include "utilities.h"
17 | #include "blas.h"
18 | 
19 | #define LABEL_SIZE 8
20 | #define LABEL_TYPE 128
21 | 
22 | // height, weight, channel and data
23 | typedef struct image
24 | {
25 | 	int h;
26 | 	int w;
27 | 	int c;
28 | 	float *data;
29 | } image;
30 | 
31 | //======================================================================================
32 | // Read && resize images
33 | //======================================================================================
34 | // return 8*128*image
35 | // load labels 8(different size), 32~126 (different type)
36 | // store information&value of labels: w,h,c,*data
37 | image **load_alphabet();
38 | // pass value
39 | image load_image_color(char *filename, int w, int h);
40 | // load image top function
41 | image load_image(char *filename, int w, int h, int c);
42 | // return im.data: w(width); h(height); z(depth,channel)
43 | image load_image_stb(char *filename, int channels);
44 | // make image top function
45 | image make_image(int w, int h, int c);
46 | // make an empty image
47 | image make_empty_image(int w, int h, int c);
48 | // resize the given image (w*h)
49 | image resize_image(image im, int w, int h);
50 | // pick up pixel in m.data: x - width, y - height, c - channel
51 | float get_pixel(image m, int x, int y, int c);
52 | // fetch extra pixels
53 | float get_pixel_extend(image m, int x, int y, int c);
54 | // check the validity of data && store data into image
55 | void set_pixel(image m, int x, int y, int c, float val);
56 | // add value to pixels
57 | void add_pixel(image m, int x, int y, int c, float val);
58 | 
59 | //======================================================================================
60 | // Draw detections & save etc.
61 | //======================================================================================
62 | // draw detecting results
63 | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes);
64 | // get label
65 | image get_label(image **characters, char *string, int size);
66 | // splite image
67 | image tile_images(image a, image b, int dx);
68 | // border/wrap up image
69 | image border_image(image a, int border);
70 | // copy image
71 | image copy_image(image p);
72 | // embed image (image data transmission)
73 | void embed_image(image source, image dest, int dx, int dy);
74 | // merge images
75 | void composite_image(image source, image dest, int dx, int dy);
76 | // get width of boxes
77 | void draw_box_width(image a, int x1, int y1, int x2, int y2,int w, float r, float g, float b);
78 | // draw one box
79 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b);
80 | // draw labels
81 | void draw_label(image a, int r, int c, image label, const float *rgb);
82 | // get image color
83 | float get_color(int c, int x, int max);
84 | // display image
85 | void show_image(image p, const char *name);
86 | // save image top function
87 | void save_image(image p, const char *name);
88 | // rearrange the output image
89 | void save_image_png(image im, const char *name);
90 | // free allocated memory
91 | void free_image(image p);
92 | 
93 | #endif /* SRC_IMAGE_H_ */
94 | 


--------------------------------------------------------------------------------
/sysarr.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <iomanip>
  3 | using namespace std;
  4 | 
  5 | const int N = 20;
  6 | const int M = 5;
  7 | 
  8 | void printArr(int A[N][N]) {
  9 |   for (int i = 0; i < N; i++) {
 10 |     for (int j = 0; j < N; j++)
 11 |       cout << setw(3) << A[i][j] << " ";
 12 |     cout << endl;
 13 |   }
 14 |   cout << endl;
 15 | }
 16 | /*
 17 | void top2( int A[N][N], int B[N][N], int C[N][N] ){
 18 | 
 19 | #pragma HLS array_partition variable=A dim=0
 20 | #pragma HLS array_partition variable=B dim=0
 21 | #pragma HLS array_partition variable=C dim=0
 22 | 
 23 |     for( int i = 0; i < N; i++ ){
 24 |         for( int j = 0; j < N; j++ ){
 25 |             for( int k = 0; k < N; k++ ){
 26 |                 C[i][j] += A[i][k] * B[k][j];
 27 |             }
 28 |         }
 29 |     }
 30 | }
 31 | */
 32 | 
 33 | void top(int A[N][N], int B[N][N], int C[N][N]) {
 34 | 
 35 |   int inA[M][M];
 36 |   int inB[M][M];
 37 | 
 38 | #pragma HLS array_partition variable=inA dim=0
 39 | #pragma HLS array_partition variable=inB dim=0
 40 | #pragma HLS array_partition variable=A dim=1
 41 | #pragma HLS array_partition variable=B dim=2
 42 | #pragma HLS array_partition variable=C dim=0
 43 | 
 44 |   // initialization
 45 |   for (int i = 0; i < M; i++) {
 46 | #pragma HLS pipeline
 47 |     for (int j = 0; j < M; j++) {
 48 |       inA[i][j] = 0;
 49 |       inB[i][j] = 0;
 50 |     }
 51 |   }
 52 | 
 53 |     for( int ii = 0; ii < N/M; ii++ ){
 54 |         for( int jj = 0; jj < N/M; jj++ ){
 55 | 
 56 |             for (int r = 0; r < N + 2 * M - 2; r++) {
 57 |                 #pragma HLS pipeline
 58 |             // update data (i.e., reads data from previous PE)
 59 |             for (int i = 0; i < M; i++)
 60 |               for (int j = M - 1; j >= 1; j--)
 61 |                 inA[i][j] = inA[i][j-1];
 62 | 
 63 |             for (int i = M - 1; i >= 1; i--)
 64 |                 for (int j = 0; j < M; j++)
 65 |                     inB[i][j] = inB[i-1][j];
 66 | 
 67 |             // read new data from inputs
 68 |             // not ok here!
 69 |             for (int i = 0; i < M; i++) {
 70 |                 if (r >= i && r < i+N)
 71 |                     inA[i][0] = A[i + ii * M][r-i];
 72 |                 else
 73 |                     inA[i][0] = 0;
 74 |             }
 75 | 
 76 |             for (int j = 0; j < M; j++) {
 77 |                 if (r >= j && r < j+N)
 78 |                     inB[0][j] = B[r-j][j + jj * M];
 79 |                 else
 80 |                     inB[0][j] = 0;
 81 |             }
 82 | 
 83 | 
 84 |             // PE
 85 |             for (int i = 0; i < M; i++)
 86 |                 for (int j = 0; j < M; j++)
 87 |                     C[i + ii * M][j + jj * M] += inA[i][j] * inB[i][j];
 88 |             }
 89 | 
 90 |         }
 91 |     }
 92 | 
 93 | }
 94 | 
 95 | 
 96 | 
 97 | int main(void) {
 98 | 
 99 |   int A[N][N];
100 |   int B[N][N];
101 |   int C[N][N];
102 |   int O[N][N];
103 | 
104 |   for (int i = 0; i < N; i++) {
105 |     for (int j = 0; j < N; j++) {
106 |       A[i][j] = i + j;
107 |       B[i][j] = i - j;
108 |       C[i][j] = 0;
109 |       O[i][j] = 0;
110 |     }
111 |   }
112 | 
113 |   top(A, B, C);
114 | 
115 |   for (int i = 0; i < N; i++)
116 |     for (int j = 0; j < N; j++)
117 |       for (int r = 0; r < N; r++)
118 |         O[i][j] += A[i][r] * B[r][j];
119 | 
120 |   for (int i = 0; i < N; i++) {
121 |     for (int j = 0; j < N; j++) {
122 |       if (O[i][j] != C[i][j]) {
123 |         cout << "Wrong value at (" << j << ", " << i << "): " << O[i][j] << " != " << C[i][j] << endl;
124 |         return 1;
125 |       }
126 |     }
127 |   }
128 |   cout << "Success!!" << endl;
129 | 
130 |   return 0;
131 | }
132 | 


--------------------------------------------------------------------------------
/activations.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Activation
  3 | //========================================================================
  4 | // @brief: activation layer
  5 | 
  6 | #include "activations.h"
  7 | 
  8 | // get activation type
  9 | ACTIVATION get_activation(char *s)
 10 | {
 11 |     if (strcmp(s, "logistic")==0) return LOGISTIC;
 12 |     if (strcmp(s, "loggy")==0) return LOGGY;
 13 |     if (strcmp(s, "relu")==0) return RELU;
 14 |     if (strcmp(s, "elu")==0) return ELU;
 15 |     if (strcmp(s, "relie")==0) return RELIE;
 16 |     if (strcmp(s, "plse")==0) return PLSE;
 17 |     if (strcmp(s, "hardtan")==0) return HARDTAN;
 18 |     if (strcmp(s, "lhtan")==0) return LHTAN;
 19 |     if (strcmp(s, "linear")==0) return LINEAR;
 20 |     if (strcmp(s, "ramp")==0) return RAMP;
 21 |     if (strcmp(s, "leaky")==0) return LEAKY;
 22 |     if (strcmp(s, "tanh")==0) return TANH;
 23 |     if (strcmp(s, "stair")==0) return STAIR;
 24 |     fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
 25 | 
 26 |     return RELU;
 27 | }
 28 | 
 29 | // select activation type
 30 | float activate(float x, ACTIVATION a)
 31 | {
 32 |     switch(a)
 33 |     {
 34 |         case LINEAR:
 35 |             return linear_activate(x);
 36 |         case LOGISTIC:
 37 |             return logistic_activate(x);
 38 |         case LOGGY:
 39 |             return loggy_activate(x);
 40 |         case RELU:
 41 |             return relu_activate(x);
 42 |         case ELU:
 43 |             return elu_activate(x);
 44 |         case RELIE:
 45 |             return relie_activate(x);
 46 |         case RAMP:
 47 |             return ramp_activate(x);
 48 |         case LEAKY:
 49 |             return leaky_activate(x);
 50 |         case TANH:
 51 |             return tanh_activate(x);
 52 |         case PLSE:
 53 |             return plse_activate(x);
 54 |         case STAIR:
 55 |             return stair_activate(x);
 56 |         case HARDTAN:
 57 |             return hardtan_activate(x);
 58 |         case LHTAN:
 59 |             return lhtan_activate(x);
 60 |     }
 61 |     return 0;
 62 | }
 63 | 
 64 | // activate all layers
 65 | void activate_array(float *x, const int n, const ACTIVATION a)
 66 | {
 67 | 	for (int i = 0; i < n; i++)
 68 | 	{
 69 | 		x[i] = activate(x[i], a);
 70 | 	}
 71 | }
 72 | 
 73 | // select gradient type
 74 | float gradient(float x, ACTIVATION a)
 75 | {
 76 |     switch(a)
 77 |     {
 78 |         case LINEAR:
 79 |             return linear_gradient(x);
 80 |         case LOGISTIC:
 81 |             return logistic_gradient(x);
 82 |         case LOGGY:
 83 |             return loggy_gradient(x);
 84 |         case RELU:
 85 |             return relu_gradient(x);
 86 |         case ELU:
 87 |             return elu_gradient(x);
 88 |         case RELIE:
 89 |             return relie_gradient(x);
 90 |         case RAMP:
 91 |             return ramp_gradient(x);
 92 |         case LEAKY:
 93 |             return leaky_gradient(x);
 94 |         case TANH:
 95 |             return tanh_gradient(x);
 96 |         case PLSE:
 97 |             return plse_gradient(x);
 98 |         case STAIR:
 99 |             return stair_gradient(x);
100 |         case HARDTAN:
101 |             return hardtan_gradient(x);
102 |         case LHTAN:
103 |             return lhtan_gradient(x);
104 |     }
105 |     return 0;
106 | }
107 | 
108 | // activate all layers
109 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
110 | {
111 | 	for (int i = 0; i < n; i++)
112 | 	{
113 | 		delta[i] *= gradient(x[i], a);
114 | 	}
115 | }
116 | 


--------------------------------------------------------------------------------
/box.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Box
  3 | //========================================================================
  4 | // @brief: sort boxes according to the confidence 
  5 | 
  6 | #ifndef SRC_BOX_CPP_
  7 | #define SRC_BOX_CPP_
  8 | 
  9 | #include "box.h"
 10 | 
 11 | // sort boxes
 12 | void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh)
 13 | {
 14 |     sortable_box *s = (sortable_box *)calloc(total, sizeof(sortable_box));
 15 |     for (int i = 0; i < total; i++)
 16 |     {
 17 |         s[i].index   = i;
 18 |         s[i].classes = 0;
 19 |         s[i].probs   = probs;
 20 |     }
 21 |     for (int k = 0; k < classes; k++)
 22 |     {
 23 |         for (int i = 0; i < total; i++)
 24 |         {
 25 |             s[i].classes = k;
 26 |         }
 27 |         qsort(s, total, sizeof(sortable_box), nms_comparator);
 28 |         for (int i = 0; i < total; i++)
 29 |         {
 30 |             if (probs[s[i].index][k] == 0)
 31 |             {
 32 |                 continue;
 33 |             }
 34 |             box a = boxes[s[i].index];
 35 |             for (int j = i+1; j < total; j++)
 36 |             {
 37 |                 box b = boxes[s[j].index];
 38 |                 if (box_iou(a, b) > thresh)
 39 |                 {
 40 |                     probs[s[j].index][k] = 0;
 41 |                 }
 42 |             }
 43 |         }
 44 |     }
 45 |     free(s);
 46 | }
 47 | 
 48 | // compare function for qsort
 49 | int nms_comparator(const void *pa, const void *pb)
 50 | {
 51 |     sortable_box a = *(sortable_box *)pa;
 52 |     sortable_box b = *(sortable_box *)pb;
 53 |     float diff = a.probs[a.index][b.classes] - b.probs[b.index][b.classes];
 54 |     if (diff < 0)      return 1;
 55 |     else if (diff > 0) return -1;
 56 |     return 0;
 57 | }
 58 | 
 59 | //
 60 | float box_iou(box a, box b)
 61 | {
 62 |     return box_intersection(a, b)/box_union(a,b);
 63 | }
 64 | 
 65 | // overlap area
 66 | float box_intersection(box a, box b)
 67 | {
 68 |     float w = overlap(a.x, a.w, b.x, b.w);
 69 |     float h = overlap(a.y, a.h, b.y, b.h);
 70 |     if (w < 0 || h < 0)
 71 |     {
 72 |         return 0;
 73 |     }
 74 |     float area = w*h;
 75 |     return area;
 76 | }
 77 | 
 78 | // overlap length (width, height, etc.)
 79 | // x1, x2 midpoint of the boxes
 80 | float overlap(float x1, float w1, float x2, float w2)
 81 | {
 82 |     float l1     = x1 - w1/2;
 83 |     float l2     = x2 - w2/2;
 84 |     float left   = l1 > l2 ? l1 : l2;
 85 |     float r1     = x1 + w1/2;
 86 |     float r2     = x2 + w2/2;
 87 |     float right  = r1 < r2 ? r1 : r2;
 88 | 
 89 |     return right - left;
 90 | }
 91 | 
 92 | // union area = total - intersection
 93 | float box_union(box a, box b)
 94 | {
 95 |     float i = box_intersection(a, b);
 96 |     float u = a.w*a.h + b.w*b.h - i;
 97 | 
 98 |     return u;
 99 | }
100 | 
101 | // select boxes contains a confidence larger than the threshhold
102 | void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh)
103 | {
104 |     sortable_box *s = (sortable_box *)calloc(total, sizeof(sortable_box));
105 |     for (int i = 0; i < total; i++)
106 |     {
107 |         s[i].index   = i;
108 |         s[i].classes = classes;
109 |         s[i].probs   = probs;
110 |     }
111 |     qsort(s, total, sizeof(sortable_box), nms_comparator);
112 |     for (int i = 0; i < total; i++)
113 |     {
114 |         if (probs[s[i].index][classes] == 0)
115 |         {
116 |             continue;
117 |         }
118 |         box a = boxes[s[i].index];
119 |         for (int j = i+1; j < total; j++)
120 |         {
121 |             box b = boxes[s[j].index];
122 |             if (box_iou(a, b) > thresh)
123 |             {
124 |                 for (int k = 0; k < classes+1; k++)
125 |                 {
126 |                     probs[s[j].index][k] = 0;
127 |                 }
128 |             }
129 |         }
130 |     }
131 |     free(s);
132 | }
133 | 
134 | // store value into box
135 | box float_to_box (float *f)
136 | {
137 |     box b;
138 |     b.x = f[0];
139 |     b.y = f[1];
140 |     b.w = f[2];
141 |     b.h = f[3];
142 | 
143 |     return b;
144 | }
145 | 
146 | #endif /* SRC_BOX_CPP_ */
147 | 


--------------------------------------------------------------------------------
/inits.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Inits
  3 | //========================================================================
  4 | // @brief: initilization of struct
  5 | 
  6 | #include "inits.h"
  7 | 
  8 | // init layer ***pass by reference
  9 | void init_layer(layer &l)
 10 | {
 11 |     /*
 12 |     l.type = 0;
 13 |     l.activation = 0;
 14 |     l.cost_type = 0;
 15 |     */
 16 |     //
 17 |     l.forward  = 0;
 18 |     l.backward = 0;
 19 |     l.update   = 0;
 20 |     l.forward_gpu  = 0;
 21 |     l.backward_gpu = 0;
 22 |     l.update_gpu   = 0;
 23 | 
 24 |     //
 25 |     l.batch_normalize = 0;
 26 |     l.shorcut = 0;
 27 |     l.batch = 0;
 28 |     l.forced = 0;
 29 |     l.flipped = 0;
 30 |     l.inputs = 0;
 31 |     l.outputs = 0;
 32 |     l.truths = 0;
 33 |     l.h = 0;
 34 |     l.w = 0;
 35 |     l.c = 0;
 36 |     l.out_h = 0;
 37 |     l.out_w = 0;
 38 |     l. out_c = 0;
 39 |     l. n = 0;
 40 |     l. max_boxes = 0;
 41 |     l. groups = 0;
 42 |     l. size = 0;
 43 |     l. side = 0;
 44 |     l. stride = 0;
 45 |     l. reverse = 0;
 46 |     l. pad = 0;
 47 |     l. sqrt = 0;
 48 |     l. flip = 0;
 49 |     l. index = 0;
 50 |     l. binary = 0;
 51 |     l. xnor = 0;
 52 |     l. steps = 0;
 53 |     l. hidden = 0;
 54 |     l. dot = 0;
 55 |     l. angle = 0;
 56 |     l. jitter = 0;
 57 |     l. saturation = 0;
 58 |     l. exposure = 0;
 59 |     l. shift = 0;
 60 |     l. ratio = 0;
 61 |     l. softmax = 0;
 62 |     l. classes = 0;
 63 |     l. coords = 0;
 64 |     l. background = 0;
 65 |     l. rescore = 0;
 66 |     l. objectness = 0;
 67 |     l. does_cost = 0;
 68 |     l. joint = 0;
 69 |     l. noadjust = 0;
 70 |     l. reorg = 0;
 71 |     l. log = 0;
 72 |     // repeat in network ???
 73 |     l. adam = 0;
 74 |     l. B1 = 0;
 75 |     l. B2 = 0;
 76 |     l. eps = 0;
 77 |     l. t = 0;
 78 |     //
 79 |     l. alpha = 0;
 80 |     l. belta = 0;
 81 |     l. kappa = 0;
 82 |     //
 83 |     l. coord_scale = 0;
 84 |     l. object_scale = 0;
 85 |     l. noobject_scale = 0;
 86 |     l. class_scale = 0;
 87 |     l. bias_match = 0;
 88 |     l. random = 0;
 89 |     l. thresh = 0;
 90 |     l. classfix = 0;
 91 |     l. absolute = 0;
 92 |     //
 93 |     l. dontload = 0;
 94 |     l. dontloadscales = 0;
 95 |     //
 96 |     l. temperature = 0;
 97 |     l. probability = 0;
 98 |     l. scale = 0;
 99 |     //
100 |     l.cweights = 0;
101 |     l.indexes = 0;
102 |     l.input_layers = 0;
103 |     l.input_sizes = 0;
104 |     l. map = 0;
105 |     l.rand = 0;
106 |     l.cost = 0;
107 |     l.state = 0;
108 |     l.prev_state = 0;
109 |     l.forgot_state = 0;
110 |     l.forgot_delta = 0;
111 |     l.state_delta = 0;
112 |     //
113 |     l.concat = 0;
114 |     l.concat_delta = 0;
115 |     //
116 |     l.binary_weights = 0;
117 |     //
118 |     l.biases = 0;
119 |     l.bias_updates = 0;
120 |     //
121 |     l.scales = 0;
122 |     l.scale_updates = 0;
123 |     //
124 |     l.weights = 0;
125 |     l.weight_updates = 0;
126 |     //
127 |     l.col_image = 0;
128 |     l.delta = 0;
129 |     l.output = 0;
130 |     l.squared = 0;
131 |     l.norms = 0;
132 |     //
133 |     l.spatial_mean = 0;
134 |     l.mean = 0;
135 |     l.variance = 0;
136 |     //
137 |     l.mean_delta = 0;
138 |     l.variance_delta = 0;
139 |         //
140 |     l.rolling_mean = 0;
141 |     l.rolling_variance = 0;
142 |         //
143 |     l.x = 0;
144 |     l. x_norm = 0;
145 |     l.m = 0;
146 |     l.v = 0;
147 |     //
148 |     l.z_cpu = 0;
149 |     l.r_cpu = 0;
150 |     l.h_cpu = 0;
151 |     //
152 |     l.binary_input = 0;
153 |     //
154 |     l.input_layer = 0;
155 |     l.self_layer = 0;
156 |     l.output_layer = 0;
157 |     //
158 |     l.input_gate_layer = 0;
159 |     l.state_gate_layer = 0;
160 |     l.input_save_layer = 0;
161 |     l.state_save_layer = 0;
162 |     l.input_state_layer = 0;
163 |     l.state_state_layer = 0;
164 |     //
165 |     l.input_z_layer = 0;
166 |     l.state_z_layer = 0;
167 |     //
168 |     l.input_r_layer = 0;
169 |     l.state_r_layer = 0;
170 |     //
171 |     l.input_h_layer = 0;
172 |     l.state_h_layer = 0;
173 |     //
174 |     l.softmax_tree = 0;
175 |     //
176 |     l.workspace_size = 0;
177 | }
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/activations.h:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Activation header file
  3 | //========================================================================
  4 | // @brief: function prototype & activate type definition
  5 | 
  6 | #ifndef SRC_ACTIVATIONS_H_
  7 | #define SRC_ACTIVATIONS_H_
  8 | 
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <string.h>
 12 | #include <math.h>
 13 | 
 14 | // activation type
 15 | typedef enum
 16 | {
 17 |     LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
 18 | } ACTIVATION;
 19 | 
 20 | // get activation type
 21 | ACTIVATION get_activation(char *s);
 22 | // activation
 23 | float activate(float x, ACTIVATION a);
 24 | // activation in batch mode
 25 | void activate_array(float *x, const int n, const ACTIVATION a);
 26 | // add gradient
 27 | float gradient(float x, ACTIVATION a);
 28 | // add gradient in batch mode
 29 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
 30 | 
 31 | //activation helper function
 32 | static inline float logistic_activate(float x)
 33 | {
 34 |     return 1.0/(1.0 + exp(-x));
 35 | }
 36 | static inline float logistic_gradient(float x)
 37 | {
 38 |     return (1-x)*x;
 39 | }
 40 | static inline float stair_activate(float x)
 41 | {
 42 |     int n = floor(x);
 43 |     if (n%2 == 0)
 44 |     {
 45 |         return floor(x/2.0);
 46 |     }
 47 |     else
 48 |     {
 49 |         return (x - n) + floor(x/2.0);
 50 |     }
 51 | }
 52 | static inline float hardtan_activate(float x)
 53 | {
 54 |     if (x < -1)
 55 |     {
 56 |         return -1;
 57 |     }
 58 |     if (x > 1)
 59 |     {
 60 |         return 1;
 61 |     }
 62 |     return x;
 63 | }
 64 | static inline float linear_activate(float x)
 65 | {
 66 |     return x;
 67 | }
 68 | static inline float loggy_activate(float x)
 69 | {
 70 |     return 2.0/(1.0 + exp(-x)) - 1;
 71 | }
 72 | static inline float relu_activate(float x)
 73 | {
 74 |     return x*(x>0);
 75 | }
 76 | static inline float elu_activate(float x)
 77 | {
 78 |     return (x >= 0)*x + (x < 0)*(exp(x)-1);
 79 | }
 80 | static inline float relie_activate(float x)
 81 | {
 82 |     return (x>0) ? x : 0.01*x;
 83 | }
 84 | static inline float ramp_activate(float x)
 85 | {
 86 |     return x*(x>0)+0.1*x;
 87 | }
 88 | static inline float leaky_activate(float x)
 89 | {
 90 |     return (x>0) ? x : 0.1*x;
 91 | }
 92 | static inline float tanh_activate(float x)
 93 | {
 94 |     return (exp(2*x)-1)/(exp(2*x)+1);
 95 | }
 96 | static inline float plse_activate(float x)
 97 | {
 98 |     if(x < -4)
 99 |     {
100 |         return 0.01 * (x + 4);
101 |     }
102 |     if(x > 4)
103 |     {
104 |         return 0.01 * (x - 4) + 1;
105 |     }
106 |     return 0.125*x + .5;
107 | }
108 | static inline float lhtan_activate(float x)
109 | {
110 |     if(x < 0)
111 |     {
112 |         return 0.001*x;
113 |     }
114 |     if(x > 1)
115 |     {
116 |         return 0.001*(x-1) + 1;
117 |     }
118 |     return x;
119 | }
120 | static inline float lhtan_gradient(float x)
121 | {
122 |     if(x > 0 && x < 1)
123 |     {
124 |         return 1;
125 |     }
126 |     return 0.001;
127 | }
128 | static inline float hardtan_gradient(float x)
129 | {
130 |     if (x > -1 && x < 1)
131 |     {
132 |         return 1;
133 |     }
134 |     return 0;
135 | }
136 | static inline float linear_gradient(float x)
137 | {
138 |     return 1;
139 | }
140 | static inline float loggy_gradient(float x)
141 | {
142 |     float y = (x+1.0)/2.0;
143 |     return 2*(1-y)*y;
144 | }
145 | static inline float stair_gradient(float x)
146 | {
147 |     if (floor(x) == x)
148 |     {
149 |         return 0;
150 |     }
151 |     return 1;
152 | }
153 | static inline float relu_gradient(float x)
154 | {
155 |     return (x>0);
156 | }
157 | static inline float elu_gradient(float x)
158 | {
159 |     return (x >= 0) + (x < 0)*(x + 1);
160 | }
161 | static inline float relie_gradient(float x)
162 | {
163 |     return (x>0) ? 1 : 0.01;
164 | }
165 | static inline float ramp_gradient(float x)
166 | {
167 |     return (x>0)+0.1;
168 | }
169 | static inline float leaky_gradient(float x)
170 | {
171 |     return (x>0) ? 1 : 0.1;
172 | }
173 | static inline float tanh_gradient(float x)
174 | {
175 |     return 1-x*x;
176 | }
177 | static inline float plse_gradient(float x)
178 | {
179 |     return (x < 0 || x > 1) ? 0.01 : 0.125;
180 | }
181 | 
182 | #endif /* SRC_ACTIVATIONS_H_ */
183 | 


--------------------------------------------------------------------------------
/tree.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Tree
  3 | //========================================================================
  4 | // @brief: update tree of probabilities
  5 | 
  6 | #include "tree.h"
  7 | 
  8 | // update prediction tree
  9 | int hierarchy_top_prediction(float *predictions, tree *hier, float thresh)
 10 | {
 11 |     float p = 1;
 12 |     int group = 0;
 13 |     while(1)
 14 |     {
 15 |         float max = 0;
 16 |         int max_i = 0;
 17 | 
 18 |         for(int i = 0; i < hier->group_size[group]; i++)
 19 |         {
 20 |             int index = i + hier->group_offset[group];
 21 |             float val = predictions[i + hier->group_offset[group]];
 22 |             if(val > max)
 23 |             {
 24 |                 max_i = index;
 25 |                 max = val;
 26 |             }
 27 |         }
 28 |         if(p*max > thresh)
 29 |         {
 30 |             p = p*max;
 31 |             group = hier->child[max_i];
 32 |             if(hier->child[max_i] < 0)
 33 |             {
 34 |                 return max_i;
 35 |             }
 36 |         }
 37 |         else
 38 |         {
 39 |             return hier->parent[hier->group_offset[group]];
 40 |         }
 41 |     }
 42 |     return 0;
 43 | }
 44 | 
 45 | // build tree hierarchy
 46 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves)
 47 | {
 48 |     for (int j = 0; j < n; j++)
 49 |     {
 50 |         int parent = hier->parent[j];
 51 |         if(parent >= 0)
 52 |         {
 53 |             predictions[j] *= predictions[parent];
 54 |         }
 55 |     }
 56 |     if (only_leaves)
 57 |     {
 58 |         for (int j = 0; j < n; j++)
 59 |         {
 60 |             if (!hier->leaf[j])
 61 |             {
 62 |                 predictions[j] = 0;
 63 |             }
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | // get probabilities
 69 | float get_hierarchy_probability(float *x, tree *hier, int c)
 70 | {
 71 |     float p = 1;
 72 |     while (c >= 0)
 73 |     {
 74 |         p *= x[c];
 75 |         c = hier->parent[c];
 76 |     }
 77 |     return p;
 78 | }
 79 | 
 80 | // read values
 81 | tree *read_tree(char *filename)
 82 | {
 83 |     tree t = {0};
 84 |     FILE *file = fopen(filename, "r");
 85 |     char *line;
 86 |     int last_parent = -1;
 87 |     int group_size = 0;
 88 |     int groups = 0;
 89 |     int n = 0;
 90 |     //
 91 |     while ((line = fgetl(file)) != 0)
 92 |     {
 93 |         char *id = (char *)calloc(256, sizeof(char));
 94 |         int parent = -1;
 95 |         // read dat afrom string line
 96 |         sscanf(line, "%s %d", id, &parent);
 97 |         t.parent = (int *)realloc(t.parent, (n+1)*sizeof(int));
 98 |         t.parent[n] = parent;
 99 | 
100 |         t.child = (int *)realloc(t.child, (n+1)*sizeof(int));
101 |         t.child[n] = -1;
102 | 
103 |         t.name = (char **)realloc(t.name, (n+1)*sizeof(char *));
104 |         t.name[n] = id;
105 | 
106 |         if(parent != last_parent)
107 |         {
108 |             groups++;
109 |             t.group_offset = (int *)realloc(t.group_offset, groups * sizeof(int));
110 |             t.group_offset[groups - 1] = n - group_size;
111 |             t.group_size = (int *)realloc(t.group_size, groups * sizeof(int));
112 |             t.group_size[groups - 1] = group_size;
113 |             group_size = 0;
114 |             last_parent = parent;
115 |         }
116 |         t.group = (int *)realloc(t.group, (n+1)*sizeof(int));
117 |         t.group[n] = groups;
118 |         if (parent >= 0)
119 |         {
120 |             t.child[parent] = groups;
121 |         }
122 |         n++;
123 |         group_size++;
124 |     }
125 |     groups++;
126 |     //
127 |     t.group_offset = (int *)realloc (t.group_offset, groups * sizeof(int));
128 |     t.group_offset[groups - 1] = n - group_size;
129 |     t.group_size = (int *)realloc(t.group_size, groups * sizeof(int));
130 |     t.group_size[groups - 1] = group_size;
131 |     t.n = n;
132 |     t.groups = groups;
133 |     t. leaf = (int *)calloc(n, sizeof(int));
134 |     //
135 |     for (int i = 0; i < n; i++)
136 |     {
137 |         t.leaf[i] = 1;
138 |     }
139 |     for (int i = 0; i < n; i++)
140 |     {
141 |         if (t.parent[i] >= 0)
142 |         {
143 |             t.leaf[t.parent[i]] = 0;
144 |         }
145 |     }
146 |     //
147 |     fclose(file);
148 |     tree *tree_ptr = (tree *)calloc(1, sizeof(tree));
149 |     *tree_ptr = t;
150 |     return tree_ptr;
151 | }
152 | 


--------------------------------------------------------------------------------
/blas.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Blas
  3 | //========================================================================
  4 | // @brief: helper function for barchnorm layer
  5 | 
  6 | #include "blas.h"
  7 | 
  8 | // multiply some values in *X with ALPHA
  9 | void scal_cpu(int N, float ALPHA, float *X, int INCX)
 10 | {
 11 |     for (int i = 0; i < N; i++)
 12 |     {
 13 |         X[i] = 0;
 14 |     }
 15 | }
 16 | 
 17 | // assign some values in *X with ALPHA
 18 | void fill_cpu(int N, float ALPHA, float *X, int INCX)
 19 | {
 20 |     for (int i = 0; i < N; i++)
 21 |     {
 22 |         X[i] = 0;
 23 |     }
 24 | }
 25 | 
 26 | // calculation about *mean
 27 | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
 28 | {
 29 |     float scale = 1.0/(batch * spatial);
 30 |     //
 31 |     for (int i = 0; i < filters; i++)
 32 |     {
 33 |         mean[i] = 0;
 34 |         for (int j = 0; j < batch; j++)
 35 |         {
 36 |             for (int k = 0; k < spatial; k++)
 37 |             {
 38 |                 int index = j*filters*spatial + i*spatial + k;
 39 |                 mean[i]  += x[index];
 40 |             }
 41 |         }
 42 |         mean[i] *= scale;
 43 |     }
 44 | }
 45 | 
 46 | // calculation about *variance
 47 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
 48 | {
 49 |     float scale = 1.0/(batch * spatial - 1);
 50 |     //
 51 |     for (int i = 0; i < filters; i++)
 52 |     {
 53 |         variance[i] = 0;
 54 |         for (int j = 0; j < batch; j++)
 55 |         {
 56 |             for (int k = 0; k < spatial; k++)
 57 |             {
 58 |                 int index = j*filters*spatial + i*spatial + k;
 59 |                 variance[i] += pow((x[index] - mean[i]), 2);
 60 |             }
 61 |         }
 62 |         variance[i] *= scale;
 63 |     }
 64 | }
 65 | 
 66 | // multiply some values in *X with ALPHA
 67 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
 68 | {
 69 |     for (int i = 0; i < N; i++)
 70 |     {
 71 |         Y[i*INCY] += ALPHA*X[i*INCX];
 72 |     }
 73 | }
 74 | 
 75 | // array copy
 76 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
 77 | {
 78 |     for (int i = 0; i < N; i++)
 79 |     {
 80 |         Y[i*INCY] = X[i*INCX];
 81 |     }
 82 | }
 83 | 
 84 | // normalization with mean and variance
 85 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
 86 | {
 87 |     for (int j = 0; j < batch; j++)
 88 |     {
 89 |         for (int k = 0; k < filters; k++)
 90 |         {
 91 |             float p = sqrt(variance[k])+0.000001f;
 92 |             for (int i = 0; i < spatial; i++)
 93 |             {
 94 |                 int index = j*filters*spatial + k*spatial + i;
 95 |                 x[index] = (x[index] - mean[k])/p;
 96 |                 //x[index] *= scales[k];
 97 |                 //x[index] += bias[k];
 98 |             }
 99 |         }
100 |     }
101 | }
102 | 
103 | // scale an array
104 | void scale_cpu(int N, float ALPHA, float *X, int INCX)
105 | {
106 |     for (int i = 0; i < N; i++)
107 |     {
108 |         X[i*INCX] *= ALPHA;
109 |     }
110 | }
111 | 
112 | // flatten layer
113 | void flatten(float *x, int size, int layers, int batch, int forward)
114 | {
115 |     float *swap = (float *)calloc(size*layers*batch, sizeof(float));
116 |     //
117 |     for(int b = 0; b < batch; ++b)
118 |     {
119 |         for(int c = 0; c < layers; ++c)
120 |         {
121 |             for(int i = 0; i < size; ++i)
122 |             {
123 |                 int i1 = b*layers*size + c*size + i;
124 |                 int i2 = b*layers*size + i*layers + c;
125 |                 if (forward)
126 |                 {
127 |                     swap[i2] = x[i1];
128 |                 }
129 |                 else
130 |                 {
131 |                     swap[i1] = x[i2];
132 |                 }
133 |             }
134 |         }
135 |     }
136 |     memcpy(x, swap, size*layers*batch*sizeof(float));
137 |     free(swap);
138 | }
139 | 
140 | // softmax layer
141 | void softmax(float *input, int n, float temp, float *output)
142 | {
143 |     float sum = 0;
144 |     float largest = -FLT_MAX;
145 |     for(int i = 0; i < n; i++)
146 |     {
147 |         if(input[i] > largest)
148 |         {
149 |             largest = input[i];
150 |         }
151 |     }
152 |     for(int i = 0; i < n; i++)
153 |     {
154 |         float e = exp(input[i]/temp - largest/temp);
155 |         sum += e;
156 |         output[i] = e;
157 |     }
158 |     for(int i = 0; i < n; i++)
159 |     {
160 |         output[i] /= sum;
161 |     }
162 | }
163 | 


--------------------------------------------------------------------------------
/option_list.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Option list
  3 | //========================================================================
  4 | // @brief: read and compare parameters
  5 | 
  6 | #include "option_list.h"
  7 | 
  8 | // read cfg data
  9 | list *read_data_cfg(char *filename)
 10 | {
 11 |     FILE *file = fopen(filename, "r");
 12 |     if (file == 0)
 13 |     {
 14 |         file_error(filename);
 15 |     }
 16 |     char *line;
 17 |     list *options = make_list();
 18 |     int nu = 0;
 19 |     // get each line
 20 |     while((line=fgetl(file)) != 0)
 21 |     {
 22 |         nu++;
 23 |         //printf("nu:%d; line: %s;\n",nu,line);
 24 |         strip(line);
 25 |         switch(line[0])
 26 |         {
 27 |             case '\0':
 28 |             case '#':
 29 |             case ';':
 30 |             {
 31 |                 free(line);
 32 |                 break;
 33 |             }
 34 |             default:
 35 |             {
 36 |                 if (!read_option(line, options))
 37 |                 {
 38 |                     fprintf(stderr,"Config file error line %d, could parse: %s\n",nu,line);
 39 |                     free(line);
 40 |                 }
 41 |                 break;
 42 |             }
 43 |         }
 44 |     }
 45 |     fclose(file);
 46 |     return options;
 47 | }
 48 | 
 49 | // change "=" to "\n", and insert it into a list option
 50 | // val stores the address of string of value
 51 | int read_option(char *s, list *options)
 52 | {
 53 |     size_t i;
 54 |     size_t len = strlen(s);
 55 |     char *val=0;
 56 |     // split the string s within "="
 57 |     for (i = 0; i < len; i++)
 58 |     {
 59 |         if (s[i] == '=')
 60 |         {
 61 |             s[i] = '\0';
 62 |             val  = s+i+1;
 63 |             break;
 64 |         }
 65 |     }
 66 |     //
 67 |     if(i == len-1)
 68 |     {   // no value for this key, insert failed: return 0
 69 |         return 0;
 70 |     }
 71 |     char *key = s;
 72 |     option_insert(options, key, val);
 73 |     // successfully insert key&value into option: return 1
 74 |     return 1;
 75 | }
 76 | 
 77 | // insert value(*val) into list option
 78 | void option_insert(list *l, char *key, char *val)
 79 | {
 80 |     kvp *p = (kvp *)malloc(sizeof(kvp));
 81 |     p->key  = key;
 82 |     p->val  = val;
 83 |     p->used = 0;
 84 |     list_insert(l, p);
 85 | }
 86 | 
 87 | // check specific strings (keys)
 88 | char *option_find_str(list *l, char *key, char *def)
 89 | {
 90 |     char *v = option_find(l, key);
 91 |     if (v)
 92 |     {   //
 93 |         return v;
 94 |     }
 95 |     if (def)
 96 |     {   // use default cfg
 97 |         fprintf(stderr, "%s: Using default '%s' \n", key, def);
 98 |     }
 99 |     return def;
100 | }
101 | 
102 | //traverse the list l
103 | char *option_find (list *l, char *key)
104 | {
105 |     node *n = l->front;
106 |     // traverse the list from the first node l->front
107 |     while(n)
108 |     {
109 |         kvp *p = (kvp *)n->val;
110 |         if (strcmp(p->key, key) == 0)
111 |         {
112 |             p->used = 1;
113 |             return p->val;
114 |         }
115 |         n = n->next;
116 |     }
117 |     // no match key found, return 0
118 |     return 0;
119 | }
120 | 
121 | // ???
122 | void option_unused (list *l)
123 | {
124 |     node *n = l->front;
125 |     // traverse the list from the first node l->front
126 |     while(n)
127 |     {
128 |         kvp *p = (kvp *)n->val;
129 |         if(!p->used)
130 |         {
131 |             fprintf(stderr, "Unused field: '%s' = '%s'\n", p->key, p->val);
132 |         }
133 |         n = n->next;
134 |     }
135 | }
136 | 
137 | // find specific ints
138 | int option_find_int(list *l, char *key, int def)
139 | {
140 |     char *v = option_find(l, key);
141 |     if (v)
142 |     {
143 |         return atoi(v);
144 |     }
145 |     if (def)
146 |     {
147 |         fprintf(stderr, "%s: Using default '%d'\n", key, def);
148 |     }
149 |     return def;
150 | }
151 | 
152 | // find specific ints
153 | int option_find_int_quiet(list *l, char *key, int def)
154 | {
155 |     char *v = option_find(l, key);
156 |     if (v)
157 |     {
158 |         return atoi(v);
159 |     }
160 |     return def;
161 | }
162 | 
163 | // find specific floats
164 | float option_find_float(list *l, char *key, float def)
165 | {
166 |     char *v = option_find(l, key);
167 |     if (v)
168 |     {
169 |         return atof(v);
170 |     }
171 |     fprintf(stderr, "%s: Using default: '%lf'\n", key, def);
172 |     return def;
173 | }
174 | 
175 | // find specific floats
176 | float option_find_float_quiet(list *l, char *key, float def)
177 | {
178 |     char *v = option_find(l, key);
179 |     if (v)
180 |     {
181 |         return atof(v);
182 |     }
183 |     return def;
184 | }
185 | 


--------------------------------------------------------------------------------
/layer.h:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Layer header file
  3 | //========================================================================
  4 | // @brief: function prototype & type definition
  5 | 
  6 | #ifndef SRC_LAYER_H_
  7 | #define SRC_LAYER_H_
  8 | 
  9 | #include "activations.h"
 10 | #include "stddef.h"
 11 | #include "tree.h"
 12 | 
 13 | // layer type
 14 | typedef enum {
 15 |     CONVOLUTIONAL,
 16 |     DECONVOLUTIONAL,
 17 |     CONNECTED,
 18 |     MAXPOOL,
 19 |     SOFTMAX,
 20 |     DETECTION,
 21 |     DROPOUT,
 22 |     CROP,
 23 |     ROUTE,
 24 |     COST,
 25 |     NORMALIZATION,
 26 |     AVGPOOL,
 27 |     LOCAL,
 28 |     SHORTCUT,
 29 |     ACTIVE,
 30 |     RNN,
 31 |     GRU,
 32 |     CRNN,
 33 |     BATCHNORM,
 34 |     NETWORK,
 35 |     XNOR,
 36 |     REGION,
 37 |     REORG,
 38 |     BLANK
 39 | } LAYER_TYPE;
 40 | 
 41 | // ???
 42 | typedef enum{
 43 |     SSE, MASKED, SMOOTH
 44 | } COST_TYPE;
 45 | 
 46 | typedef struct layer
 47 | {
 48 |     LAYER_TYPE type;
 49 |     ACTIVATION activation;
 50 |     COST_TYPE cost_type;
 51 |     //
 52 |     void (*forward)      (struct layer, struct network_state);
 53 |     void (*backward)     (struct layer, struct network_state);
 54 |     void (*update)       (struct layer, int, float, float, float);
 55 |     void (*forward_gpu)  (struct layer, struct network_state);
 56 |     void (*backward_gpu) (struct layer, struct network_state);
 57 |     void (*update_gpu)   (struct layer, int, float, float, float);
 58 |     //
 59 |     int batch_normalize;
 60 |     int shorcut;
 61 |     int batch;
 62 |     int forced;
 63 |     int flipped;
 64 |     int inputs;
 65 |     int outputs;  // size of output
 66 |     int truths;
 67 |     int h;
 68 |     int w;
 69 |     int c;
 70 |     int out_h;
 71 |     int out_w;
 72 |     int out_c;
 73 |     int n;
 74 |     int max_boxes;
 75 |     int groups;
 76 |     int size;
 77 |     int side;
 78 |     int stride;
 79 |     int reverse;
 80 |     int pad;
 81 |     int sqrt;
 82 |     int flip;
 83 |     int index;
 84 |     int binary;
 85 |     int xnor;
 86 |     int steps;
 87 |     int hidden;
 88 |     float dot;
 89 |     float angle;
 90 |     float jitter;
 91 |     float saturation;
 92 |     float exposure;
 93 |     float shift;
 94 |     float ratio;
 95 |     int softmax;
 96 |     int classes;
 97 |     int coords;
 98 |     int background;
 99 |     int rescore;
100 |     int objectness;
101 |     int does_cost;
102 |     int joint;
103 |     int noadjust;
104 |     int reorg;
105 |     int log;
106 |     // repeat in network ???
107 |     int adam;
108 |     float B1;
109 |     float B2;
110 |     float eps;
111 |     int t;
112 |     //
113 |     float alpha;
114 |     float belta;
115 |     float kappa;
116 |     //
117 |     float coord_scale;
118 |     float object_scale;
119 |     float noobject_scale;
120 |     float class_scale;
121 |     int bias_match;
122 |     int random;
123 |     float thresh;
124 |     int classfix;
125 |     int absolute;
126 |     //
127 |     int dontload;
128 |     int dontloadscales;
129 |     //
130 |     float temperature;
131 |     float probability;
132 |     float scale;
133 |     //
134 |     char  * cweights;
135 |     int   * indexes;
136 |     int   * input_layers;
137 |     int   * input_sizes;
138 |     int   * map;
139 |     float * rand;
140 |     float * cost;
141 |     float * state;
142 |     float * prev_state;
143 |     float * forgot_state;
144 |     float * forgot_delta;
145 |     float * state_delta;
146 |     //
147 |     float * concat;
148 |     float * concat_delta;
149 |     //
150 |     float * binary_weights;
151 |     //
152 |     float * biases;
153 |     float * bias_updates;
154 |     //
155 |     float * scales;
156 |     float * scale_updates;
157 |     //
158 |     float * weights;
159 |     float * weight_updates;
160 |     //
161 |     float * col_image;
162 |     float * delta;
163 |     float * output;     //output values
164 |     float * squared;
165 |     float * norms;
166 |     //
167 |     float * spatial_mean;
168 |     float * mean;
169 |     float * variance;
170 |     //
171 |     float * mean_delta;
172 |     float * variance_delta;
173 |     //
174 |     float * rolling_mean;
175 |     float * rolling_variance;
176 |     //
177 |     float * x;
178 |     float * x_norm;
179 |     float * m;
180 |     float * v;
181 |     //
182 |     float * z_cpu;
183 |     float * r_cpu;
184 |     float * h_cpu;
185 |     //
186 |     float *binary_input;
187 |     //
188 |     struct layer * input_layer;
189 |     struct layer * self_layer;
190 |     struct layer * output_layer;
191 |     //
192 |     struct layer * input_gate_layer;
193 |     struct layer * state_gate_layer;
194 |     struct layer * input_save_layer;
195 |     struct layer * state_save_layer;
196 |     struct layer * input_state_layer;
197 |     struct layer * state_state_layer;
198 |     //
199 |     struct layer * input_z_layer;
200 |     struct layer * state_z_layer;
201 |     //
202 |     struct layer * input_r_layer;
203 |     struct layer * state_r_layer;
204 |     //
205 |     struct layer * input_h_layer;
206 |     struct layer * state_h_layer;
207 |     //
208 |     tree * softmax_tree;
209 |     //
210 |     size_t workspace_size;
211 | } layer;
212 | 
213 | // free struct layer 
214 | void free_layer(layer l);
215 | 
216 | #endif /* SRC_LAYER_H_ */
217 | 


--------------------------------------------------------------------------------
/utilities.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Utilities
  3 | //========================================================================
  4 | // @brief: helper functions
  5 | 
  6 | #include "utilities.h"
  7 | 
  8 | // read file
  9 | int *read_map(char *filename)
 10 | {
 11 |     int n = 0;
 12 |     int *map = 0;
 13 |     char *str;
 14 |     FILE *file = fopen("filename", "r");
 15 |     if(!file)
 16 |     {   // open error
 17 |         file_error(filename);
 18 |     }
 19 |     while ((str=fgetl(file)))
 20 |     {
 21 |         n++;
 22 |         map = (int *)realloc(map, n*sizeof(int));
 23 |         map[n-1] = atoi(str);
 24 |     }
 25 |     return map;
 26 | }
 27 | 
 28 | // report open file error
 29 | void file_error(char *s)
 30 | {
 31 |     fprintf(stderr, "Couldn't open file: %s\n", s);
 32 |     exit(0); // exit(0): normal exit
 33 | }
 34 | 
 35 | // remove space, tab, and enter from string
 36 | void strip(char *s)
 37 | {
 38 |     size_t len = strlen(s);
 39 |     size_t offset = 0;
 40 | 
 41 |     for (size_t i = 0; i < len; i++)
 42 |     {
 43 |         char c = s[i];
 44 |         if (c == ' ' || c == '\t' || c == '\n')
 45 |         {
 46 |             offset++;
 47 |         }
 48 |         else
 49 |         {
 50 |             s[i-offset] = c;
 51 |         }
 52 |     }
 53 |     s[len-offset] = '\0';
 54 | }
 55 | 
 56 | // read one line from file
 57 | char *fgetl(FILE *file)
 58 | {
 59 |     if (feof(file))
 60 |     {   // check end of file indicator
 61 |         return 0;
 62 |     }
 63 |     size_t size = 512;
 64 |     char *line = (char *)malloc(size * sizeof(char));
 65 |     // read one line from file
 66 |     if (!fgets(line, size, file))
 67 |     {   // blank line
 68 |         free(line);
 69 |         return 0;
 70 |     }
 71 | 
 72 |     size_t curr = strlen(line);
 73 |     // verify the size of input line
 74 |     while (line[curr-1] != '\n' && !feof(file))
 75 |     {   // last char in line is '\0', size of line is 511
 76 |         if (curr == size - 1)
 77 |         {
 78 |             size *= 2;
 79 |             line = (char *)realloc(line, size * sizeof(char));
 80 |             if(!line)
 81 |             {
 82 |                 printf("Required size:%ld\n", size);
 83 |                 malloc_error();
 84 |             }
 85 |         }
 86 |         // read extra chars
 87 |         size_t readsize = size - curr;
 88 |         // check whether it overflow the maximum size
 89 |         // INT_MAX: 32767 (2^15-1) or greater*
 90 |         if (readsize > INT_MAX)
 91 |         {
 92 |             readsize = INT_MAX - 1;
 93 |         }
 94 |         // continue to read this line
 95 |         fgets(&line[curr], readsize, file);
 96 |         // update the current length read from file
 97 |         curr = strlen(line);
 98 |     }
 99 |     if (line[curr - 1] == '\n')
100 |     {   // final line in char - '\0'
101 |         line[curr - 1] = '\0';
102 |     }
103 | 
104 |     return line;
105 | }
106 | 
107 | // report malloc error
108 | void malloc_error()
109 | {
110 |     fprintf(stderr, "Malloc error\n");
111 |     exit(-1); // exit(other numbers): abnormal exit
112 | }
113 | 
114 | // report specific error
115 | void error(const char *s)
116 | {
117 |     printf("%s\n",s);
118 |     assert(0);
119 |     exit(-1);
120 | }
121 | 
122 | // free array of pointer
123 | void free_ptrs(void **ptrs, int n)
124 | {
125 |     for (int i = 0; i < n; i++)
126 |     {
127 |         free(ptrs[i]);
128 |     }
129 |     free(ptrs);
130 | }
131 | 
132 | // find the maximum value in an array, return its index
133 | int max_index(float *a, int n)
134 | {
135 |     if(n <= 0)
136 |     {
137 |         return -1;
138 |     }
139 |     int max_index = 0;
140 |     float temp = a[0];
141 |     // traverse the array
142 |     for (int i = 1; i < n; i++)
143 |     {
144 |         if (a[i] > temp)
145 |         {
146 |             temp = a[i];
147 |             max_index = i;
148 |         }
149 |     }
150 |     return max_index;
151 | }
152 | 
153 | //return a random number in the given range(min, max)
154 | float rand_uniform(float min, float max)
155 | {
156 |     if(max < min)
157 |     {
158 |         float temp = min;
159 |         min = max;
160 |         max = temp;
161 |     }
162 |     return ((float)rand()/RAND_MAX * (max-min)) + min;
163 | }
164 | 
165 | // print function
166 | void print_statistics(float *a, int n)
167 | {
168 |     float m = mean_array(a, n);
169 |     float v = variance_array(a, n);
170 |     printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v);
171 | }
172 | 
173 | // mean value of array a
174 | float mean_array(float *a, int n)
175 | {
176 |     return sum_array(a, n)/n;
177 | }
178 | 
179 | // sum of array a
180 | float sum_array(float *a, int n)
181 | {
182 |     float sum = 0;
183 |     for (int i = 0; i < n; i++)
184 |     {
185 |         sum += a[i];
186 |     }
187 |     return sum;
188 | }
189 | 
190 | // variance of array a
191 | float variance_array(float *a, int n)
192 | {
193 |     float sum = 0;
194 |     float mean = mean_array(a, n);
195 |     for (int i = 0; i < n; i++)
196 |     {
197 |         sum += (a[i] - mean)*(a[i] - mean);
198 |     }
199 |     return sum/n;
200 | }
201 | 
202 | // mean squared error of array a
203 | float mse_array(float *a, int n)
204 | {
205 |     float sum = 0;
206 |     for (int i = 0; i < n; i++)
207 |     {
208 |         sum += a[i] * a[i];
209 |     }
210 |     return sqrt(sum/n);
211 | }
212 | 
213 | // difference of two squares
214 | float mag_array(float *a, int n)
215 | {
216 |     float sum = 0;
217 |     for (int i = 0; i < n; i++)
218 |     {
219 |         sum += a[i] * a[i];
220 |     }
221 |     return sqrt(sum);
222 | }
223 | 


--------------------------------------------------------------------------------
/yolo_detector.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // yolo_detector header file
  3 | //========================================================================
  4 | // @brief: Application for detecting objects
  5 | 
  6 | #include "yolo_detector.h"
  7 | 
  8 | /*
  9 |     // transfer value
 10 |     // argv[0] - datacfg     : cfg/coco.data
 11 |     // argv[1] - cfgfile     : cfg/tiny-yolo.cfg
 12 |     // argv[2] - weightfile  : tiny-yolo.weights
 13 |     // argv[3] - filename    : data/dog.jpg
 14 |     // argv[4] - thresh      : 0.24
 15 |     // argv[5] - hier_thresh : 0.5
 16 | */
 17 | 
 18 | void detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh)
 19 | {
 20 |     // load datacfg
 21 |     printf("datacfg:%s\n",datacfg);
 22 |     list *options   = read_data_cfg(datacfg);
 23 | 
 24 | /*
 25 |     // print options
 26 |         node *pnode = options->front;
 27 |         kvp  *dis;
 28 |         int counter = 0;
 29 |         printf("Option size:%d\n",options->size);
 30 |         while (pnode)
 31 |         {
 32 |             dis = (kvp *)pnode->val;
 33 |             printf("NO. %d: (1)key: %s; (2)value: %s;\n",counter,(char*)dis->key,(char*)dis->val);
 34 |             counter++;
 35 |             pnode = pnode->next;
 36 |         }
 37 | */
 38 | 
 39 |     char *name_list = option_find_str(options, "names", "data/names.list");
 40 |     //printf("name_list:%s\n",name_list);
 41 | 
 42 |     // name_list: data/coco.names
 43 |     char **names    = get_labels(name_list);
 44 | 
 45 | /*
 46 |     int size_names = sizeof(names);
 47 |     for (int i = 0; i < 80; i++)
 48 |     {
 49 |         printf("names NO. %d: %s;\n",i,names[i]);
 50 |     }
 51 | */
 52 |     // read labels
 53 |     image **alphabet = load_alphabet();
 54 | 
 55 |     // load cfgfile
 56 |     network net = parse_network_cfg(cfgfile);
 57 | 
 58 |     // load weitht file
 59 |     if (weightfile)
 60 |     {
 61 |         load_weights(&net,weightfile);
 62 |     }
 63 | 
 64 |     // setup net.batch = 1
 65 |     set_batch_network(&net, 1);
 66 |     char buffer[255];
 67 |     char *input = buffer;
 68 |     float nms = 0.4;
 69 | 
 70 |     // start timer
 71 |     Timer timer("yolo_detector");
 72 | 
 73 |     while (1)
 74 |     {
 75 |         // copy image name
 76 |         if (filename)
 77 |         {
 78 |             strncpy (input,filename,256);
 79 |         }
 80 |         else
 81 |         {
 82 |             printf("Please enter image path: ");
 83 |             fflush(stdout);
 84 |             input = fgets(input, 256, stdin);
 85 |             if (!input)
 86 |             {
 87 |                 return;
 88 |             }
 89 |             strtok(input,"\n");
 90 |         }
 91 |         //
 92 |         image im      = load_image_color(input,0,0);
 93 |         image sized   = resize_image(im, net.w, net.h);
 94 |         //printf("sized.h:%d; sized.w:%d; sized.c:%d;\n",sized.h,sized.w,sized.c);
 95 | /*
 96 |         // print input image
 97 |         for (int m = 0; m < 100; m++)
 98 |         {
 99 |             printf("im.data[%d]:%.12f;\n",m,im.data[m]);
100 |         }
101 | */
102 |         // region layer
103 |         layer l       = net.layers[net.n-1];
104 |         //
105 |         box *boxes    = (box *)calloc(l.w * l.h * l.n, sizeof(box));
106 |         float **probs = (float **)calloc(l.w * l.h * l.n, sizeof(float *));
107 |         for (int i = 0; i < l.w*l.h*l.n; i++)
108 |         {
109 |             probs[i] = (float *)calloc(l.classes+1, sizeof(float)); // ???
110 |         }
111 | 
112 |         //
113 |         float *X = sized.data;
114 | 
115 |         Timer timer("Total time");
116 |         // start prediction
117 |         printf("Start prediction...\n");
118 |         timer.start();
119 |         network_predict(net,X);
120 |         timer.stop();
121 |         printf("Prediction finishes!\n");
122 | 
123 |         // draw region boxes
124 |         printf("Getting region boxes...\n");
125 |         get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);
126 | /*
127 |         // verify **probs
128 |         float sum = 0.0;
129 |         printf("max_i:%d; max_j:%d;\n",l.w * l.h * l.n,l.classes+1);
130 |         for (int i = 0; i < l.w * l.h * l.n; i++)
131 |         {
132 |             for (int j = 0; j < l.classes+1; j++)
133 |             {
134 |                 if (probs[i][j] != 0)
135 |                 {
136 |                     printf("probs[%d][%d]:%.12f;\n",i,j,probs[i][j]);
137 |                 }
138 |                 sum += probs[i][j];
139 |             }
140 |         }
141 |         printf("sum:%.12f;\n",sum);
142 | */
143 |         //
144 |         if (l.softmax_tree && nms)
145 |         {
146 |             //printf("Enter 111111\n");
147 |             do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
148 |         }
149 |         else if (nms)
150 |         {
151 |             //printf("Enter 222222\n");
152 |             do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
153 |         }
154 | /*
155 |         // verify **probs
156 |                 float sum = 0.0;
157 |                 printf("max_i:%d; max_j:%d;\n",l.w * l.h * l.n,l.classes+1);
158 |                 for (int i = 0; i < l.w * l.h * l.n; i++)
159 |                 {
160 |                     for (int j = 0; j < l.classes+1; j++)
161 |                     {
162 |                         if (probs[i][j] != 0)
163 |                         {
164 |                             printf("probs[%d][%d]:%.12f;\n",i,j,probs[i][j]);
165 |                         }
166 |                         sum += probs[i][j];
167 |                     }
168 |                 }
169 |                 printf("sum:%.12f;\n",sum);
170 | */
171 | 
172 | 
173 |         printf("Start draw predictions...\n");
174 |         draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
175 |         printf("Save & print images...\n");
176 |         save_image(im, "predictions");
177 |         show_image(im, "predictions");
178 |         //printf("ch0\n");
179 | 
180 |         // free memory
181 |         free_image(im);
182 |         free_image(sized);
183 |         //printf("ch1\n");
184 |         free_ptrs((void **)probs, l.w*l.h*l.n);
185 |         //printf("ch2\n");
186 |         free(boxes);
187 |         //printf("ch3\n");
188 |         // where did we modify the value filename to jump out of the while loop?????
189 |         if (filename)
190 |         {
191 |             break;
192 |         }
193 |     }
194 |     printf("Exit program.\n");
195 | }
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/connected_layer.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Connected layer
  3 | //========================================================================
  4 | // @brief: connected layer
  5 | 
  6 | #include "connected_layer.h"
  7 | 
  8 | // make connected layer
  9 | connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize)
 10 | {
 11 |     connected_layer l;
 12 |     init_layer(l);
 13 |     //
 14 |     l.type            = CONNECTED;
 15 |     l.inputs          = inputs;
 16 |     l.outputs         = outputs;
 17 |     l.batch           = batch;
 18 |     l.batch_normalize = batch_normalize;
 19 |     //
 20 |     l.h     = 1;
 21 |     l.w     = 1;
 22 |     l.c     = inputs;
 23 |     l.out_h = 1;
 24 |     l.out_w = 1;
 25 |     l.out_c = outputs;
 26 |     //
 27 |     l.output = (float *)calloc(batch*outputs, sizeof(float));
 28 |     l.delta  = (float *)calloc(batch*outputs, sizeof(float));
 29 |     //
 30 |     l.weight_updates = (float *)calloc(inputs*outputs, sizeof(float));
 31 |     l.bias_updates   = (float *)calloc(inputs*outputs, sizeof(float));
 32 |     //
 33 |     l.weights = (float *)calloc(outputs*inputs, sizeof(float));
 34 |     l.biases  = (float *)calloc(outputs, sizeof(float));
 35 |     // function pointers
 36 |     l.forward  = forward_connected_layer;
 37 |     l.backward = backward_connected_layer;
 38 |     l.update   = update_connected_layer;
 39 |     //
 40 |     float scale = sqrt(2.0/inputs);
 41 |     for (int i = 0; i < outputs*inputs; i++)
 42 |     {
 43 |         l.weights[i] = scale * rand_uniform(-1,1);
 44 |     }
 45 |     //
 46 |     for (int i = 0; i < outputs; i++)
 47 |     {
 48 |         l.biases[i] = 0;
 49 |     }
 50 |     //
 51 |     if (batch_normalize)
 52 |     {
 53 |         l.scales = (float *)calloc(outputs, sizeof(float));
 54 |         l.scale_updates = (float *)calloc(outputs, sizeof(float));
 55 |         //
 56 |         for (int i = 0; i < outputs; i++)
 57 |         {
 58 |             l.scales[i] = 1;
 59 |         }
 60 |         //
 61 |         l.mean           = (float *)calloc(outputs, sizeof(float));
 62 |         l.mean_delta     = (float *)calloc(outputs, sizeof(float));
 63 |         l.variance       = (float *)calloc(outputs, sizeof(float));
 64 |         l.variance_delta = (float *)calloc(outputs, sizeof(float));
 65 |         //
 66 |         l.rolling_mean     = (float *)calloc(outputs, sizeof(float));
 67 |         l.rolling_variance = (float *)calloc(outputs, sizeof(float));
 68 |         //
 69 |         l.x      = (float *)calloc(outputs, sizeof(float));
 70 |         l.x_norm = (float *)calloc(outputs, sizeof(float));
 71 |     }
 72 |     //
 73 |     l.activation = activation;
 74 |     fprintf(stderr, "connected                            %4d  ->  %4d\n", inputs, outputs);
 75 |     return l;
 76 | }
 77 | 
 78 | //
 79 | void forward_connected_layer(connected_layer l, network_state state)
 80 | {
 81 |     // empty the l.output array
 82 |     fill_cpu(l.outputs*l.batch, 0, l.output, 1);
 83 |     int m = l.batch;
 84 |     int k = l.inputs;
 85 |     int n = l.outputs;
 86 |     float *a = state.input;
 87 |     float *b = l.weights;
 88 |     float *c = l.output;
 89 |     // 
 90 |     //gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
 91 |     //
 92 |     if(l.batch_normalize)
 93 |     {
 94 |         if(state.train)
 95 |         {
 96 |             mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
 97 |             variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);
 98 |             //
 99 |             scal_cpu(l.outputs, .95, l.rolling_mean, 1);
100 |             axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1);
101 |             scal_cpu(l.outputs, .95, l.rolling_variance, 1);
102 |             axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1);
103 | 
104 |             copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
105 |             normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1);
106 |             copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
107 |         }
108 |         else
109 |         {
110 |             normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1);
111 |         }
112 |         scale_bias(l.output, l.scales, l.batch, l.outputs, 1);
113 |     }
114 | }
115 | 
116 | //
117 | void backward_connected_layer(connected_layer l, network_state state)
118 | {
119 |     //
120 |     gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
121 |     //
122 |     for (int i = 0; i < l.batch; i++)
123 |     {
124 |         axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
125 |     }
126 |     //
127 |     if(l.batch_normalize)
128 |     {
129 |         backward_scale_cpu(l.x_norm, l.delta, l.batch, l.outputs, 1, l.scale_updates);
130 | 
131 |         scale_bias(l.delta, l.scales, l.batch, l.outputs, 1);
132 | 
133 |         mean_delta_cpu(l.delta, l.variance, l.batch, l.outputs, 1, l.mean_delta);
134 |         variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.outputs, 1, l.variance_delta);
135 |         normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.outputs, 1, l.delta);
136 |     }
137 |     //
138 |     int m = l.outputs;
139 |     int k = l.batch;
140 |     int n = l.inputs;
141 |     float *a = l.delta;
142 |     float *b = state.input;
143 |     float *c = l.weight_updates;
144 |     //
145 |     m = l.batch;
146 |     k = l.outputs;
147 |     n = l.inputs;
148 |     //
149 |     a = l.delta;
150 |     b = l.weights;
151 |     c = state.delta;
152 |     //
153 |     if(c)
154 |     {
155 |         //gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
156 |     }
157 | }
158 | 
159 | //
160 | void update_connected_layer(connected_layer l, int batch, float learning_rate, float momentum, float decay)
161 | {
162 |     axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
163 |     scal_cpu(l.outputs, momentum, l.bias_updates, 1);
164 | 
165 |     if(l.batch_normalize)
166 |     {
167 |         axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
168 |         scal_cpu(l.outputs, momentum, l.scale_updates, 1);
169 |     }
170 | 
171 |     axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1);
172 |     axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
173 |     scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1);
174 | }
175 | 
176 | //
177 | void denormalize_connected_layer(layer l)
178 | {
179 |     //
180 |     for (int i = 0; i < l.outputs; i++)
181 |     {
182 |         float scale = l.scales[i]/sqrt(l.rolling_variance[i] + 0.000001);
183 |         for (int j = 0; j < l.inputs; i++)
184 |         {
185 |             l.weights[i*l.inputs + j] *= scale;
186 |         }
187 |         l.biases[i] -= l.rolling_mean[i] * scale;
188 |         l.scales[i] = 1;
189 |         l.rolling_mean[i] = 0;
190 |         l.rolling_variance[i] = 1;
191 |     }
192 | }
193 | 
194 | //
195 | void statistics_connected_layer(layer l)
196 | {
197 |     if(l.batch_normalize)
198 |     {
199 |         printf("Scales ");
200 |         print_statistics(l.scales, l.outputs);   //???
201 |         /*
202 |         printf("Rolling Mean ");
203 |         print_statistics(l.rolling_mean, l.outputs);
204 |         printf("Rolling Variance ");
205 |         print_statistics(l.rolling_variance, l.outputs);
206 |         */
207 |     }
208 |     printf("Biases ");
209 |     print_statistics(l.biases, l.outputs);
210 |     printf("Weights ");
211 |     print_statistics(l.weights, l.outputs);
212 | }
213 | 


--------------------------------------------------------------------------------
/convolutional_layer.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Convolutional layer
  3 | //========================================================================
  4 | // @brief: convolutional layer
  5 | 
  6 | #include "convolutional_layer.h"
  7 | #include <time.h>
  8 | 
  9 | convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride,\
 10 |         int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
 11 | {
 12 |     convolutional_layer l;
 13 |     init_layer(l);
 14 |     l.type = CONVOLUTIONAL;
 15 | 
 16 |     l.h = h;
 17 |     l.w = w;
 18 |     l.c = c;
 19 |     l.n = n;
 20 |     l.binary = binary;
 21 |     l.xnor = xnor;
 22 |     l.batch = batch;
 23 |     l.stride = stride;
 24 |     l.size = size;
 25 |     l.pad = padding;
 26 |     l.batch_normalize = batch_normalize;
 27 | 
 28 |     //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 29 |     l.weights = (float *)calloc(c*n*size*size, sizeof(float));
 30 |     //l.weights = (float *)sds_alloc(c*n*size*size * sizeof(float));
 31 |     l.weight_updates = (float *)calloc(c*n*size*size, sizeof(float));
 32 | 
 33 |     l.biases = (float *)calloc(n, sizeof(float));
 34 |     l.bias_updates = (float *)calloc(n, sizeof(float));
 35 | 
 36 |     // float scale = 1./sqrt(size*size*c);
 37 |     float scale = sqrt(2./(size*size*c));
 38 |     for (int i = 0; i < c*n*size*size; i++)
 39 |     {
 40 |         l.weights[i] = scale*rand_uniform(-1, 1);
 41 |     }
 42 |     int out_h = convolutional_out_height(l);
 43 |     int out_w = convolutional_out_width(l);
 44 |     l.out_h = out_h;
 45 |     l.out_w = out_w;
 46 |     l.out_c = n;
 47 |     l.outputs = l.out_h * l.out_w * l.out_c;
 48 |     l.inputs = l.w * l.h * l.c;
 49 | 
 50 |     //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 51 |     l.output = (float *)calloc(l.batch*l.outputs, sizeof(float));
 52 |     //l.output = (float *)sds_alloc(l.batch*l.outputs * sizeof(float));
 53 |     l.delta  = (float *)calloc(l.batch*l.outputs, sizeof(float));
 54 | 
 55 |     l.forward = forward_convolutional_layer;
 56 |     //l.backward = backward_convolutional_layer;
 57 |     //l.update = update_convolutional_layer;
 58 |    /*if(binary){
 59 |         l.binary_weights = calloc(c*n*size*size, sizeof(float));
 60 |         l.cweights = calloc(c*n*size*size, sizeof(char));
 61 |         l.scales = calloc(n, sizeof(float));
 62 |     }
 63 |     if(xnor){
 64 |         l.binary_weights = calloc(c*n*size*size, sizeof(float));
 65 |         l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
 66 |     }*/
 67 | 
 68 |     if(batch_normalize)
 69 |     {
 70 |         l.scales = (float *)calloc(n, sizeof(float));
 71 |         l.scale_updates = (float *)calloc(n, sizeof(float));
 72 |         for(int i = 0; i < n; i++)
 73 |         {
 74 |             l.scales[i] = 1;
 75 |         }
 76 | 
 77 |         l.mean = (float *)calloc(n, sizeof(float));
 78 |         l.variance = (float *)calloc(n, sizeof(float));
 79 | 
 80 |         l.mean_delta = (float *)calloc(n, sizeof(float));
 81 |         l.variance_delta = (float *)calloc(n, sizeof(float));
 82 | 
 83 |         l.rolling_mean = (float *)calloc(n, sizeof(float));
 84 |         l.rolling_variance = (float *)calloc(n, sizeof(float));
 85 |         l.x = (float *)calloc(l.batch*l.outputs, sizeof(float));
 86 |         l.x_norm = (float *)calloc(l.batch*l.outputs, sizeof(float));
 87 |     }
 88 |     if(adam)
 89 |     {
 90 |         l.adam = 1;
 91 |         l.m = (float *)calloc(c*n*size*size, sizeof(float));
 92 |         l.v = (float *)calloc(c*n*size*size, sizeof(float));
 93 |     }
 94 | 
 95 |     l.workspace_size = get_workspace_size(l);
 96 |     l.activation = activation;
 97 | 
 98 |     fprintf(stderr, "conv  %5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
 99 | 
100 |     return l;
101 | }
102 | 
103 | // get the size of output image 
104 | size_t get_workspace_size(layer l)
105 | {
106 |     return (size_t)l.out_h*l.out_w*l.size*l.size*l.c*sizeof(float);
107 | }
108 | 
109 | // forward to convolutional layer
110 | void forward_convolutional_layer(convolutional_layer l, network_state state)
111 | {
112 |     //printf("ch3\n");
113 |     int out_h = convolutional_out_height(l);
114 |     int out_w = convolutional_out_width(l);
115 |     // init l.output = 0
116 |     //Timer timer2("fill_cpu");
117 |     //timer2.start();
118 |     fill_cpu(l.outputs * l.batch, 0, l.output, 1);
119 |     //timer2.stop();
120 |     //printf("ch4\n");
121 |     //
122 |     //Timer timer9("part left");
123 |     //timer9.start();
124 |     int m = l.n;
125 |     int k = l.size * l.size * l.c;
126 |     int n = out_h * out_w ;
127 |     int n2 = (out_h + 2) * (out_w + 2);
128 |     //
129 |     float *a = l.weights;
130 |     float *b = state.workspace;
131 |     float *c = l.output;
132 | 
133 | 	//Storing iamge
134 |     b = (float *) sds_alloc ( l.c * n2 * sizeof(float) );
135 |     //Storing convolution results
136 |     float *c_col = (float *) sds_alloc ((m*n+n*(SIZE_BATCH-m%SIZE_BATCH)) * sizeof(float));
137 |     //Storing weights
138 |     float a_buf[3*3*1024*16];
139 |     
140 |     int count;
141 |     int batch;
142 | 	//Change input image format
143 |     if(m%SIZE_BATCH == 0)
144 |     {
145 |         count = m/SIZE_BATCH;
146 |         batch = SIZE_BATCH;
147 |         im2col(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b);
148 |     }
149 |     else
150 |     {
151 | 		//for last layer: no special format
152 |         count = m/SIZE_BATCH+1;
153 |         batch = SIZE_BATCH;
154 |         im2col_extra(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b);
155 |     }
156 |     
157 |     int aiCount = batch*k;
158 |     struct timeval t0, t1;
159 |     static double Duration = 0;
160 |     double DurationTemp = Duration;
161 | 
162 |     for (int x = 0; x < count; x++)
163 |     {
164 | 		//Give the layer being calculated
165 |     	if( x == 0 ){
166 |         	printf("Layer info: channel = %d, x = %d, m = %d, n = %d\n", l.c, x, m, n);
167 |         	fflush(stdout);
168 |     	}
169 | 		//Copying weights
170 |     	if( m % SIZE_BATCH != 0 && x == count - 1 )
171 |     		aiCount = ( m % SIZE_BATCH ) * k;
172 |     	for( int ai = 0; ai < aiCount; ai++ ){
173 |     		a_buf[ai] = a[ai + x*batch*k];
174 |     	}
175 |         
176 |         gettimeofday(&t0, 0);
177 |         gemm2( a_buf,b,c_col,m,n,k,l.c,l.h,l.w,l.size, l.pad);
178 |         gettimeofday(&t1, 0);
179 |         Duration += (t1.tv_sec-t0.tv_sec)*1000000 + t1.tv_usec-t0.tv_usec;
180 | 		
181 |         c_col += batch * n;
182 |     }
183 | 
184 |     std::cout << "Duration for channel = " << l.c << " : " << ( Duration - DurationTemp ) / 1000 << " msec" << std::endl;
185 |     if( m == 425 )
186 |         std::cout << "Duration in all: " << Duration / 1000 << " msec" << std::endl;
187 | 
188 | 	//Transfer the output data format back
189 |     if(m%SIZE_BATCH ==0)
190 |     {
191 |         c_col -= m*n;
192 |         col2img(c_col,c,m,n,count,SIZE_BATCH);
193 |     }
194 |     else
195 |     {
196 |         c_col -= count * n * SIZE_BATCH;
197 |         col2img_extra(c_col,c,m,n,count,SIZE_BATCH);
198 |     }
199 |     sds_free(b);
200 |     sds_free(c_col);
201 | 
202 |     if (l.batch_normalize)
203 |     {
204 |         forward_batchnorm_layer(l, state);
205 |     }
206 |     add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
207 |     activate_array(l.output, m*n*l.batch, l.activation);
208 | 
209 | }
210 | 
211 | // calculate output height
212 | int convolutional_out_height(convolutional_layer l)
213 | {
214 |     return (l.h + 2*l.pad - l.size)/l.stride + 1;
215 | }
216 | 
217 | // calculate output_weight
218 | int convolutional_out_width(convolutional_layer l)
219 | {
220 |     return (l.w + 2*l.pad - l.size)/l.stride + 1;
221 | }
222 | 
223 | // add bias to output values
224 | void add_bias(float *output, float *biases, int batch, int n, int size)
225 | {
226 |     //
227 |     for (int b = 0; b < batch; b++)
228 |     {
229 |         for (int i = 0; i < n; i++)
230 |         {
231 |             for (int j = 0; j < size; j++)
232 |             {
233 |                 output[(b*n+i)*size + j] += biases[i];
234 |             }
235 |         }
236 |     }
237 | }
238 | 
239 | // scale bias
240 | void scale_bias(float *output, float *scales, int batch, int n, int size)
241 | {
242 |     //
243 |     for(int b = 0; b < batch; b++)
244 |     {
245 |         for(int i = 0; i < n; i++)
246 |         {
247 |             for(int j = 0; j < size; j++)
248 |             {
249 |                 output[(b*n + i)*size + j] *= scales[i];
250 |             }
251 |         }
252 |     }
253 | }
254 | 
255 | // scale bias
256 | void swap_binary(convolutional_layer *l)
257 | {
258 |     float *swap = l->weights;
259 |     l->weights = l->binary_weights;
260 |     l->binary_weights = swap;
261 | }
262 | 


--------------------------------------------------------------------------------
/gemm2.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Gemm header file
  3 | //========================================================================
  4 | // @brief: top hardware function - convolutional computation
  5 | 
  6 | #include "gemm.h"
  7 | 
  8 | /*
  9 |                                                             //first convolutional layer
 10 |     M: l.n - number of filters & number of output channels  e.g. 16         num_filter
 11 |     N: out_h * out_w - size of input&output channel         e.g. 416x416    SIZE_CHANNEL
 12 |     K: l.size * l.size * l.c - size of one input filter     e.g. 3x3x3      SIZE_FILTER
 13 |     A  : weights/filters
 14 |     lda: l.size * l.size * l.c - size of one input filter   e.g. 3x3x3
 15 |     B  : input images
 16 |     ldb: out_h * out_w - size of input&output channel       e.g. 416x416
 17 |     C  : output images
 18 |     ldc: out_h * out_w - size of input&output channel       e.g. 416x416
 19 |  */
 20 | void gemm2(float A[MAX_A], float B[MAX_B], float C[MAX_C],int num_filter, int size_channel,int size_filter,\
 21 |         int channels, int height, int width, int ksize, int pad)
 22 | {
 23 | 
 24 |     INPUT_32 weights[SIZE_BATCH][SIZE_FILTER][MAX_FILTER_DEPTH];
 25 |     //store image of 3 separate column  
 26 | 	INPUT_32 line_buffer[NUM_LINE_BUFFER][SIZE_LINE_BUFFER];
 27 |     //store image required by filter while computing
 28 | 	INPUT_32 window_buffer[NUM_WINDOW_BUFFER][SIZE_WINDOW_BUFFER];
 29 | 	//read image from window_buffer to achieve parallel computing
 30 |     INPUT_32 ParallelWindow[SystolicKernelSize][NUM_WINDOW_BUFFER];
 31 |     //store output image other than last layer
 32 |     OUTPUT_64 output[SIZE_BATCH][SystolicKernelSize];
 33 |     //store image of last layer
 34 |     OUTPUT_64 output2[SIZE_BATCH];
 35 | 	//counter
 36 |     int index_lb;
 37 | 
 38 | 
 39 | #pragma HLS array_partition variable=weights complete dim=1
 40 | #pragma HLS array_partition variable=weights complete dim=2
 41 | #pragma HLS array_partition variable=line_buffer complete dim=1
 42 | #pragma HLS array_partition variable=window_buffer complete dim=1
 43 | #pragma HLS array_partition variable=output complete
 44 | #pragma HLS array_partition variable=output2 complete
 45 | #pragma HLS array_partition variable=ParallelWindow complete dim=1
 46 | 
 47 |     //systolic data
 48 |     INPUT_32 inA[SIZE_BATCH][SystolicKernelSize];
 49 |     INPUT_32 inB[SIZE_BATCH][SystolicKernelSize];
 50 | #pragma HLS array_partition variable=inA complete dim=0
 51 | #pragma HLS array_partition variable=inB complete dim=0
 52 | 
 53 | 
 54 |     
 55 | 
 56 |     if (ksize == NUM_WINDOW_BUFFER_EXTRA)
 57 |     {
 58 | 		//different computing core for last layer
 59 |         gemm_extra2(A,B,C,size_channel,size_filter,ksize,weights,output2);
 60 |     }
 61 |     else
 62 |     {
 63 |         // copy loop: store filters/weights in local BRAM
 64 | 
 65 |         Copy_weights:
 66 |         for (int i = 0; i < SIZE_BATCH; i++)
 67 |         {
 68 |             for (int k = 0; k < channels; k++)
 69 |             {
 70 |                 for (int j = 0; j < ksize*ksize; j++)
 71 |                 {
 72 | #pragma HLS PIPELINE II=1
 73 |                     int index_j = (j % ksize) * ksize + j / ksize;
 74 |                     weights[i][index_j][k] = A[i*ksize*ksize*channels+k*ksize*ksize+j];
 75 |                 }
 76 |             }
 77 |         }
 78 | 
 79 |         Copy_image1://pads of first column
 80 |         for (int w = 0; w < (width+2*pad); w++)
 81 |         {
 82 |             for (int c = 0; c < channels; c++)
 83 |             {
 84 | #pragma HLS PIPELINE II=1
 85 |                 int index = w * channels + c;
 86 |                 line_buffer[2][index] = B[index];
 87 |             }
 88 |         }
 89 | 
 90 |         Copy_image2://pads for fisrt channel of second column
 91 |         for (int c = 0; c < channels; c++)
 92 |         {
 93 | #pragma HLS PIPELINE II=1
 94 |             line_buffer[1][c] = line_buffer[2][c];
 95 |             line_buffer[2][c] =  B[(width+2*pad)*channels+c];
 96 |         }
 97 | 
 98 |         //initialize counters
 99 |         int CountStep = 1;
100 |         int step = 0;
101 | 
102 |         // start calculation
103 |         Cal_h:
104 |         for (int h = 0; h < (height+2*pad-1); h++)
105 |         {
106 |             Cal_w:
107 |             for ( int w = 0; w < (width+2*pad); )
108 |             {
109 |                 // last iteration - after last pixel of map - do nothing
110 |                 if ((h == height+2*pad-2) && (w == width+2*pad-1))
111 |                 {
112 |                     break;
113 |                 }
114 | 
115 |                 // second column, first & last row -- read only, prepare image data
116 |                 int flag = (h != 0) && (w != 0) && (w != width+2*pad-1);
117 | 
118 |                 // init output array
119 |                 Init_output:
120 |                 if (flag)
121 |                 {
122 |                     for (int i = 0; i < SIZE_BATCH; i++)
123 |                     {
124 | #pragma HLS unroll
125 |                         for (int j = 0; j < SystolicKernelSize; j++)
126 |                         {
127 | #pragma HLS unroll
128 |                             output[i][j] = 0;
129 |                         }
130 |                     }
131 |                 }
132 |                 // data_fetch & computation
133 |                 Cal_c:
134 |                 //all systolic array are busy then
135 |                 if( flag == 1 || ( h == 0 && w != 0 && w != width + 2 * pad - 1 ) )
136 |                     CountStep = SystolicKernelSize;
137 | 
138 |                 for (int c = 0; c < channels; c++)
139 |                 {
140 | #pragma HLS DEPENDENCE variable=line_buffer inter false
141 | #pragma HLS DEPENDENCE variable=index_lb inter false
142 | 
143 |                     for( step = 0; step < CountStep; step++ ){
144 |                         //when it comes to last row of each col
145 |                         if( w + 1 + step == width + 2 * pad && c == 0 )
146 |                         {
147 | 							//width mod SystolicKernelSize == 0 or finished computing this column
148 |                             if( step == 0 )
149 |                                 CountStep = 1;
150 |                             //width mod SystolicKernelSize != 0 and finished computing this column
151 | 							else
152 |                             {
153 |                                 CountStep = step;
154 |                                 break;
155 |                             }
156 |                         }
157 | 
158 |                         // update window buffer
159 |                         ParallelWindow[step][0] = ( window_buffer[0][c] = window_buffer[3][c] );
160 |                         ParallelWindow[step][1] = ( window_buffer[1][c] = window_buffer[4][c] );
161 |                         ParallelWindow[step][2] = ( window_buffer[2][c] = window_buffer[5][c] );
162 |                         ParallelWindow[step][3] = ( window_buffer[3][c] = window_buffer[6][c] );
163 |                         ParallelWindow[step][4] = ( window_buffer[4][c] = window_buffer[7][c] );
164 |                         ParallelWindow[step][5] = ( window_buffer[5][c] = window_buffer[8][c] );
165 |                         // update line buffer
166 |                         int fetch_w;
167 |                         if( w == 0 || w + 1 + step == width + 2 * pad )
168 |                             fetch_w = ( w + 1 ) % ( width + 2 * pad );
169 |                         else
170 |                             fetch_w = ( w + 1 - ( w - 1 ) % SystolicKernelSize ) % ( width + 2 * pad );
171 |                         int fetch_h = h + 1 + ( w + 1 ) / ( width + 2 * pad );
172 |                         //
173 |                         index_lb = fetch_w * channels + c * CountStep + step;  //column
174 |                         int index_input = fetch_h * (width+2*pad) * channels + fetch_w * channels + c * CountStep + step;
175 |                         //read new image data, combine data read before and generate ParallelWindow required by filter
176 |                         ParallelWindow[step][6] = ( window_buffer[6][c] = (line_buffer[0][index_lb] = line_buffer[1][index_lb]) );
177 |                         ParallelWindow[step][7] = ( window_buffer[7][c] = (line_buffer[1][index_lb] = line_buffer[2][index_lb]) );
178 |                         ParallelWindow[step][8] = ( window_buffer[8][c] = (line_buffer[2][index_lb] = B[index_input]) );
179 | 
180 | 
181 |                     }
182 | 
183 | 
184 | 
185 |                     // multiplication 16 x SystolicKernelSize using systolic core
186 |                     if (flag)
187 |                     {
188 | 				        //init data buffer of systolic core
189 |                         for( int j = 0; j < SIZE_BATCH; j++ ){
190 | #pragma HLS pipeline
191 |                             for( int i = 0; i < SystolicKernelSize; i++ ){
192 |                                 inA[j][i]= 0;
193 |                                 inB[j][i] = 0;
194 |                             }
195 |                         }
196 | 
197 |                         //Iteration cycles determined by both array
198 |                         for( int r = 0; r < SIZE_BATCH + SIZE_FILTER + step - 2; r++ ){
199 | #pragma HLS pipeline
200 | 
201 |                             for (int i = 0; i < SIZE_BATCH; i++)
202 |                                 for (int j = SystolicKernelSize - 1; j >= 1; j--)
203 |                                     inA[i][j] = inA[i][j-1];
204 | 
205 |                             for (int i = SIZE_BATCH - 1; i >= 1; i--)
206 |                                 for (int j = 0; j < SystolicKernelSize; j++)
207 |                                     inB[i][j] = inB[i-1][j];
208 | 
209 | 
210 |                             for( int i = 0; i < SIZE_BATCH; i++ )
211 |                                 if( r >= i && r < i + SIZE_FILTER )
212 |                                     inA[i][0] = weights[i][r-i][c];
213 |                                 else
214 |                                     inA[i][0] = 0;
215 | 
216 |                             for (int j = 0; j < SystolicKernelSize; j++)
217 |                                 if( r >= j && r < j + SIZE_FILTER )
218 |                                     inB[0][j] = ParallelWindow[j][r-j];
219 |                                 else
220 |                                     inB[0][j] = 0;
221 | 
222 |                             //PE
223 |                             for( int i = 0; i < SIZE_BATCH; i++ )
224 |                                 for( int j = 0; j < SystolicKernelSize; j++ )
225 |                                     output[i][j] += inA[i][j] * inB[i][j];
226 | 
227 |                         }
228 | 
229 |                     }
230 |                 }
231 |                 // output results
232 |                 if (flag)
233 |                 {
234 |                     for( int OutChannel = 0; OutChannel < step; OutChannel++ ){
235 | 
236 |                         int index_c = ( h - 1 ) * width + w - 1 + OutChannel;
237 |                         Output:
238 |                         for (int i = 0; i < SIZE_BATCH; i++)
239 |                         {
240 | #pragma HLS DEPENDENCE variable=output inter false
241 | #pragma HLS PIPELINE II=1
242 |                             // output final result
243 | 
244 |                             C[index_c*SIZE_BATCH+i] = output[i][OutChannel];
245 | 
246 |                         }
247 |                     }
248 |                 }
249 | 
250 |                 if( w + 1 + step == width + 2 * pad )
251 |                     CountStep = 1;
252 |                 w += step;
253 |             }
254 |         }
255 |     }
256 | 
257 | }
258 | 
259 | // extra gemm with filter size 1x1
260 | void gemm_extra2(float A[MAX_A], float B[MAX_B], float C[MAX_C],int size_channel,int size_filter,int ksize,
261 |         INPUT_32 weights[SIZE_BATCH][SIZE_FILTER][MAX_FILTER_DEPTH],OUTPUT_64 output[SIZE_BATCH])
262 | {
263 |     // copy loop: store weights/filters in local BRAM
264 |     Copy_weights_E:
265 |     for (int i = 0; i < SIZE_BATCH; i++)
266 |     {
267 |         for (int k = 0; k < size_filter; k++) //1x1x425
268 |         {
269 |             for (int j = 0; j < ksize*ksize; j++) //1x1
270 |             {
271 | #pragma HLS PIPELINE II=1
272 |                 weights[i][j][k] = A[i*ksize*ksize*size_filter+k*ksize*ksize+j];
273 |             }
274 |         }
275 |     }
276 |     // start calculation
277 |     Cal_t_E:
278 |     for (int i = 0; i < size_channel; i++)
279 |     {
280 |         // init output
281 |         Init_E:
282 |         for (int k = 0; k < SIZE_BATCH; k++)
283 |         {
284 | #pragma HLS unroll
285 |             output[k] = 0;
286 |         }
287 |         // start calculation 1024 mul+add
288 |         Cal_L1_E:
289 |         for (int j = 0; j < size_filter; j++)
290 |         {
291 | #pragma HLS PIPELINE II=1
292 |             INPUT_32 input = B[i*size_filter+j];
293 |             for (int k = 0; k < SIZE_BATCH; k++)
294 |             {
295 |                 Cal_L2_E:
296 |                 output[k] += input * weights[k][0][j];
297 |             }
298 |         }
299 |         // output results
300 |         Output_E:
301 |         for (int j = 0; j < SIZE_BATCH; j++)
302 |         {
303 | #pragma HLS PIPELINE II=1
304 |             C[i*SIZE_BATCH+j] = output[j];
305 |         }
306 |     }
307 | }
308 | 
309 | 
310 | 


--------------------------------------------------------------------------------
/region_layer.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Region layer
  3 | //========================================================================
  4 | // @brief: get predictions (final bounding boxes)
  5 | 
  6 | #include "region_layer.h"
  7 | 
  8 | // make region layer
  9 | layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
 10 | {
 11 |     layer l;
 12 |     init_layer(l);
 13 | 
 14 |     l.type = REGION;
 15 | 
 16 |     l.n = n;
 17 |     l.batch = batch;
 18 |     l.h = h;
 19 |     l.w = w;
 20 |     l.classes = classes;
 21 |     l.coords = coords;
 22 |     l.cost = (float *)calloc(1, sizeof(float));
 23 |     l.biases = (float *)calloc(n*2, sizeof(float));
 24 |     l.bias_updates = (float *)calloc(n*2, sizeof(float));
 25 |     l.outputs = h*w*n*(classes + coords + 1);
 26 |     l.inputs = l.outputs;
 27 |     l.truths = 30*(5);
 28 |     l.delta = (float *)calloc(batch*l.outputs, sizeof(float));
 29 |     l.output = (float *)calloc(batch*l.outputs, sizeof(float));
 30 |     //
 31 |     for(int i = 0; i < n*2; i++){
 32 |         l.biases[i] = .5;
 33 |     }
 34 | 
 35 |     l.forward = forward_region_layer;
 36 |     //l.backward = backward_region_layer;
 37 |     fprintf(stderr, "detection\n");
 38 |     srand(0);
 39 | 
 40 |     return l;
 41 | }
 42 | 
 43 | // get bounding boxes
 44 | //                          l      1      1        0.24            probs       boxes       0                   0                 0.5
 45 | void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh)
 46 | {
 47 |     //
 48 |     float *predictions = l.output;
 49 | /*
 50 |     // print output
 51 |     printf("l.outputs:%d;\n",l.outputs);
 52 |     for (int i = 0; i < 100; i++)
 53 |     {
 54 |         printf("predictions[%d]:%.12f;\n",i,predictions[i]);
 55 |     }
 56 | */
 57 | 
 58 |     //cover l.w * l.h grids  l.n = 5 5boxes  l.classes = 20
 59 |     for (int i = 0; i < l.w*l.h; i++){
 60 |         int row = i / l.w;
 61 |         int col = i % l.w;
 62 |         for(int n = 0; n < l.n; n++){
 63 |             int index = i*l.n + n;
 64 |             int p_index = index * (l.classes + 5) + 4;
 65 |             float scale = predictions[p_index];
 66 |             int box_index = index * (l.classes + 5);
 67 |             boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h);
 68 |             boxes[index].x *= w;
 69 |             boxes[index].y *= h;
 70 |             boxes[index].w *= w;
 71 |             boxes[index].h *= h;
 72 | 
 73 |             int class_index = index * (l.classes + 5) + 5;
 74 |             if(l.softmax_tree)
 75 |             {
 76 |                 hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);
 77 |                 if(map)
 78 |                 {
 79 |                     for(int j = 0; j < 200; j++)
 80 |                     {
 81 |                         float prob = scale*predictions[class_index+map[j]];
 82 |                         probs[index][j] = (prob > thresh) ? prob : 0;
 83 |                     }
 84 |                 }
 85 |                 else
 86 |                 {
 87 |                     int j =  hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh);
 88 |                     probs[index][j] = (scale > thresh) ? scale : 0;
 89 |                     probs[index][l.classes] = scale;
 90 |                 }
 91 |             }
 92 |             else
 93 |             {
 94 |                 for(int j = 0; j < l.classes; ++j){
 95 |                     float prob = scale*predictions[class_index+j];
 96 |                     probs[index][j] = (prob > thresh) ? prob : 0;
 97 |                 }
 98 |             }
 99 |             if(only_objectness)
100 |             {
101 |                 probs[index][0] = scale;
102 |             }
103 |         }
104 |     }
105 | }
106 | 
107 | // get bounding box (single)
108 | box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h)
109 | {
110 |     box b;
111 |     b.x = (i + logistic_activate(x[index + 0])) / w;
112 |     b.y = (j + logistic_activate(x[index + 1])) / h;
113 |     b.w = exp(x[index + 2]) * biases[2*n]   / w;
114 |     b.h = exp(x[index + 3]) * biases[2*n+1] / h;
115 | 
116 |     return b;
117 | }
118 | 
119 | // region layer top function
120 | void forward_region_layer(const layer l, network_state state)
121 | {
122 |     int i,j,b,t,n;
123 |     int size = l.coords + l.classes + 1;
124 |     memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
125 | /*
126 |     // input of region layer
127 |     for (int x = 0; x < 100; x++)
128 |     {
129 |         printf("l.output[%d]:%.12f;\n",x,l.output[x]);
130 |     }
131 | */
132 | 
133 | 
134 | #ifndef GPU
135 |     flatten(l.output, l.w*l.h, size*l.n, l.batch, 1);
136 | #endif
137 |     for (b = 0; b < l.batch; ++b){
138 |         for(i = 0; i < l.h*l.w*l.n; ++i){
139 |             int index = size*i + b*l.outputs;
140 |             l.output[index + 4] = logistic_activate(l.output[index + 4]);
141 |         }
142 |     }
143 | 
144 | 
145 | #ifndef GPU
146 |     if (l.softmax_tree){
147 |         for (b = 0; b < l.batch; ++b){
148 |             for(i = 0; i < l.h*l.w*l.n; ++i){
149 |                 int index = size*i + b*l.outputs;
150 |                 softmax_tree(l.output + index + 5, 1, 0, 1, l.softmax_tree, l.output + index + 5);
151 |             }
152 |         }
153 |     } else if (l.softmax){
154 |         for (b = 0; b < l.batch; ++b){
155 |             for(i = 0; i < l.h*l.w*l.n; ++i){
156 |                 int index = size*i + b*l.outputs;
157 |                 softmax(l.output + index + 5, l.classes, 1, l.output + index + 5);
158 |             }
159 |         }
160 |     }
161 | /*
162 |     // output of region layer
163 |     for (int x = 0; x < 100; x++)
164 |     {
165 |         printf("l.output[%d]:%.12f;\n",x,l.output[x]);
166 |     }
167 | */
168 | 
169 | #endif
170 |     if(!state.train)
171 |     {
172 |         //printf("return here???\n");
173 |         return;
174 |     }
175 |     memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
176 |     float avg_iou = 0;
177 |     float recall = 0;
178 |     float avg_cat = 0;
179 |     float avg_obj = 0;
180 |     float avg_anyobj = 0;
181 |     int count = 0;
182 |     int class_count = 0;
183 |     *(l.cost) = 0;
184 |     for (b = 0; b < l.batch; ++b) {
185 |         if(l.softmax_tree){
186 |             int onlyclass = 0;
187 |             for(t = 0; t < 30; ++t){
188 |                 box truth = float_to_box(state.truth + t*5 + b*l.truths);
189 |                 if(!truth.x) break;
190 |                 int class_s = state.truth[t*5 + b*l.truths + 4];
191 |                 float maxp = 0;
192 |                 int maxi = 0;
193 |                 if(truth.x > 100000 && truth.y > 100000){
194 |                     for(n = 0; n < l.n*l.w*l.h; ++n){
195 |                         int index = size*n + b*l.outputs + 5;
196 |                         float scale =  l.output[index-1];
197 |                         l.delta[index - 1] = l.noobject_scale * ((0 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
198 |                         float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class_s);
199 |                         if(p > maxp){
200 |                             maxp = p;
201 |                             maxi = n;
202 |                         }
203 |                     }
204 |                     int index = size*maxi + b*l.outputs + 5;
205 |                     delta_region_class(l.output, l.delta, index, class_s, l.classes, l.softmax_tree, l.class_scale, &avg_cat);
206 |                     if(l.output[index - 1] < .3) l.delta[index - 1] = l.object_scale * ((.3 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
207 |                     else  l.delta[index - 1] = 0;
208 |                     ++class_count;
209 |                     onlyclass = 1;
210 |                     break;
211 |                 }
212 |             }
213 |             if(onlyclass) continue;
214 |         }
215 |         for (j = 0; j < l.h; ++j) {
216 |             for (i = 0; i < l.w; ++i) {
217 |                 for (n = 0; n < l.n; ++n) {
218 |                     int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
219 |                     box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
220 |                     float best_iou = 0;
221 |                     for(t = 0; t < 30; ++t){
222 |                         box truth = float_to_box(state.truth + t*5 + b*l.truths);
223 |                         if(!truth.x) break;
224 |                         float iou = box_iou(pred, truth);
225 |                         if (iou > best_iou) {
226 |                             best_iou = iou;
227 |                         }
228 |                     }
229 |                     avg_anyobj += l.output[index + 4];
230 |                     l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
231 |                     if (best_iou > l.thresh) {
232 |                         l.delta[index + 4] = 0;
233 |                     }
234 | 
235 |                     if(*(state.net.seen) < 12800){
236 |                         box truth = {0};
237 |                         truth.x = (i + .5)/l.w;
238 |                         truth.y = (j + .5)/l.h;
239 |                         truth.w = l.biases[2*n]/l.w;
240 |                         truth.h = l.biases[2*n+1]/l.h;
241 |                         delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01);
242 |                     }
243 |                 }
244 |             }
245 |         }
246 |         for(t = 0; t < 30; ++t){
247 |             box truth = float_to_box(state.truth + t*5 + b*l.truths);
248 | 
249 |             if(!truth.x) break;
250 |             float best_iou = 0;
251 |             int best_index = 0;
252 |             int best_n = 0;
253 |             i = (truth.x * l.w);
254 |             j = (truth.y * l.h);
255 |             //printf("%d %f %d %f\n", i, truth.x*l.w, j, truth.y*l.h);
256 |             box truth_shift = truth;
257 |             truth_shift.x = 0;
258 |             truth_shift.y = 0;
259 |             //printf("index %d %d\n",i, j);
260 |             for(n = 0; n < l.n; ++n){
261 |                 int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
262 |                 box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
263 |                 if(l.bias_match){
264 |                     pred.w = l.biases[2*n]/l.w;
265 |                     pred.h = l.biases[2*n+1]/l.h;
266 |                 }
267 |                 //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h);
268 |                 pred.x = 0;
269 |                 pred.y = 0;
270 |                 float iou = box_iou(pred, truth_shift);
271 |                 if (iou > best_iou){
272 |                     best_index = index;
273 |                     best_iou = iou;
274 |                     best_n = n;
275 |                 }
276 |             }
277 |             //printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h);
278 | 
279 |             float iou = delta_region_box(truth, l.output, l.biases, best_n, best_index, i, j, l.w, l.h, l.delta, l.coord_scale);
280 |             if(iou > .5) recall += 1;
281 |             avg_iou += iou;
282 | 
283 |             //l.delta[best_index + 4] = iou - l.output[best_index + 4];
284 |             avg_obj += l.output[best_index + 4];
285 |             l.delta[best_index + 4] = l.object_scale * (1 - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]);
286 |             if (l.rescore) {
287 |                 l.delta[best_index + 4] = l.object_scale * (iou - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]);
288 |             }
289 | 
290 | 
291 |             int class_s = state.truth[t*5 + b*l.truths + 4];
292 |             if (l.map) class_s = l.map[class_s];
293 |             delta_region_class(l.output, l.delta, best_index + 5, class_s, l.classes, l.softmax_tree, l.class_scale, &avg_cat);
294 |             ++count;
295 |             ++class_count;
296 |         }
297 |     }
298 |     //printf("\n");
299 | #ifndef GPU
300 |     flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0);
301 | #endif
302 |     *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
303 |     printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f,  count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count);
304 | }
305 | 
306 | // extra region classes
307 | void delta_region_class(float *output, float *delta, int index, int class_s, int classes, tree *hier, float scale, float *avg_cat)
308 | {
309 |     if(hier)
310 |     {
311 |         float pred = 1;
312 |         while(class_s >= 0)
313 |         {
314 |             pred *= output[index + class_s];
315 |             int g = hier->group[class_s];
316 |             int offset = hier->group_offset[g];
317 |             for(int i = 0; i < hier->group_size[g]; i++)
318 |             {
319 |                 delta[index + offset + i] = scale * (0 - output[index + offset + i]);
320 |             }
321 |             delta[index + class_s] = scale * (1 - output[index + class_s]);
322 | 
323 |             class_s = hier->parent[class_s];
324 |         }
325 |         *avg_cat += pred;
326 |     }
327 |     else
328 |     {
329 |         for(int n = 0; n < classes; n++)
330 |         {
331 |             delta[index + n] = scale * (((n == class_s)?1 : 0) - output[index + n]);
332 |             if(n == class_s)
333 |             {
334 |                 *avg_cat += output[index + n];
335 |             }
336 |         }
337 |     }
338 | }
339 | 
340 | // extra region boxes
341 | float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale)
342 | {
343 |     box pred = get_region_box(x, biases, n, index, i, j, w, h);
344 |     float iou = box_iou(pred, truth);
345 | 
346 |     float tx = (truth.x*w - i);
347 |     float ty = (truth.y*h - j);
348 |     float tw = log(truth.w*w / biases[2*n]);
349 |     float th = log(truth.h*h / biases[2*n + 1]);
350 | 
351 |     delta[index + 0] = scale * (tx - logistic_activate(x[index + 0])) * logistic_gradient(logistic_activate(x[index + 0]));
352 |     delta[index + 1] = scale * (ty - logistic_activate(x[index + 1])) * logistic_gradient(logistic_activate(x[index + 1]));
353 |     delta[index + 2] = scale * (tw - x[index + 2]);
354 |     delta[index + 3] = scale * (th - x[index + 3]);
355 | 
356 |     return iou;
357 | }
358 | 


--------------------------------------------------------------------------------
/image.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Image
  3 | //========================================================================
  4 | // @brief: loading image data
  5 | 
  6 | #include "image.h"
  7 | 
  8 | #define STB_IMAGE_IMPLEMENTATION
  9 | #include "stb_image_read.h"
 10 | #define STB_IMAGE_WRITE_IMPLEMENTATION
 11 | #include "stb_image_write.h"
 12 | 
 13 | int windows = 0;
 14 | float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} };
 15 | 
 16 | // return 8*128*image
 17 | // load labels 8(different size), 32~126 (different type)
 18 | // store information&value of labels: w,h,c,*data
 19 | image **load_alphabet()
 20 | {
 21 |     const int nsize = LABEL_SIZE;
 22 |     image **alphabets = (image **)calloc(nsize, sizeof(image *));
 23 |     //
 24 |     for (int j = 0; j < nsize; j++)
 25 |     {
 26 |         alphabets[j] = (image *)calloc(LABEL_TYPE, sizeof(image));
 27 |         for (int i = 32; i < LABEL_TYPE - 1; i++)
 28 |         {
 29 |             char buffer[256];
 30 |             sprintf(buffer, "data/labels/%d_%d.png", i, j);
 31 |             // buffer: filename of labels
 32 |             alphabets[j][i] = load_image_color(buffer, 0, 0);
 33 |         }
 34 |     }
 35 |     return alphabets;
 36 | }
 37 | 
 38 | // pass value?
 39 | image load_image_color(char *filename, int w, int h)
 40 | {
 41 |     return load_image(filename, w, h, 3);
 42 | }
 43 | 
 44 | // load image top function
 45 | image load_image(char *filename, int w, int h, int c) //003
 46 | {
 47 |     // load image value BUG
 48 |     image out = load_image_stb(filename, c);
 49 | 
 50 |     //printf("out.h: %d; out.w: %d; out.c: %d;\n",out.h,out.w,out.c);
 51 |     // when resize???
 52 |     if((h && w) && (h != out.h || w != out.w))
 53 |     {
 54 |         image resized = resize_image(out, w, h);
 55 |         free_image(out);
 56 |         out = resized;
 57 |     }
 58 |     return out;
 59 | }
 60 | 
 61 | // return im.data: w(width); h(height); z(depth,channel)
 62 | image load_image_stb(char *filename, int channels) // filename, 3
 63 | {
 64 |     int w, h, c;
 65 |     // standard image load function
 66 |     // stbi_load output: z(depth, channel); w(width); h(height)
 67 |     //printf("filename: %s; channels: %d;\n",filename,channels);
 68 |     unsigned char *data = stbi_load(filename, &w, &h, &c, channels);
 69 |     //printf("out.h: %d; out.w: %d; out.c: %d;\n",h,w,c);
 70 | 
 71 |     if(channels)
 72 |     {
 73 |         c = channels;
 74 |     }
 75 |     // make new image
 76 |     image im = make_image(w, h, c);
 77 |     for (int k = 0; k < c; k++)
 78 |     {
 79 |         for (int j = 0; j < h; j++)
 80 |         {
 81 |             for(int i = 0; i < w; i++)
 82 |             {
 83 |                 int index_dst = i + w*j + w*h*k;
 84 |                 int index_src = k + c*i + c*w*j;
 85 |                 im.data[index_dst] = (float)data[index_src]/255.0;
 86 |             }
 87 |         }
 88 |     }
 89 |     free(data);
 90 |     return im;
 91 | }
 92 | 
 93 | // make_image top function
 94 | image make_image(int w, int h, int c)
 95 | {
 96 |     image out = make_empty_image(w, h, c);
 97 |     out.data  = (float *)calloc(h*w*c, sizeof(float));
 98 |     return out;
 99 | }
100 | 
101 | // make empty image (data pointer: 0)
102 | image make_empty_image(int w, int h, int c)
103 | {
104 |     image out;
105 |     out.data = 0;
106 |     out.h    = h;
107 |     out.w    = w;
108 |     out.c    = c;
109 | 
110 |     return out;
111 | }
112 | 
113 | // resize the given image (w*h)
114 | image resize_image(image im, int w, int h)
115 | {
116 |     image resized = make_image(w, h, im.c);
117 |     image part    = make_image(w, im.h, im.c);
118 | 
119 |     float w_scale = (float)(im.w - 1)/(w - 1);
120 |     float h_scale = (float)(im.h - 1)/(h - 1);
121 |     // stage 1: resize image within given width (column)
122 |     for (int k = 0; k < im.c; k++)
123 |     {
124 |         for (int r = 0; r < im.h; r++) // row
125 |         {
126 |             for(int c = 0; c < w; c++) // column
127 |             {
128 |                 float val = 0;
129 |                 // last column || only one column
130 |                 if (c == w-1 || im.w == 1)
131 |                 {   // simply fetch the original final column
132 |                     val = get_pixel(im, im.w - 1, r, k);
133 |                 }
134 |                 else
135 |                 {
136 |                     float sx = c*w_scale;
137 |                     int ix = (int)sx;
138 |                     float dx = sx - ix;
139 |                     // weighted sum for other columns
140 |                     val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
141 |                 }
142 |                 // store val into image part
143 |                 set_pixel(part, c, r, k, val);
144 |             }
145 |         }
146 |     }
147 |     // stage 2: resize image within given height (row)
148 |     for (int k = 0; k < im.c; k++)
149 |     {
150 |         for(int r = 0; r < h; r++)
151 |         {
152 |             float sy = r*h_scale;
153 |             int iy = (int)sy;
154 |             float dy = sy - iy;
155 |             //
156 |             for (int c = 0; c < w; c++)
157 |             {
158 |                 float val = (1 - dy) * get_pixel(part, c, iy, k);
159 |                 // store val into image resized
160 |                 set_pixel(resized, c, r, k, val);
161 |             }
162 |             // the last row || only one row
163 |             if (r == h-1 || im.h == 1)
164 |             {
165 |                 continue;
166 |             }
167 |             //
168 |             for (int c = 0; c < w; c++)
169 |             {
170 |                 float val = dy * get_pixel(part, c, iy+1, k);
171 |                 add_pixel(resized, c, r, k, val);
172 |             }
173 |         }
174 |     }
175 |     free_image(part);
176 |     return resized;
177 | }
178 | 
179 | // pick up pixel in m.data: x - width, y - height, c - channel
180 | float get_pixel(image m, int x, int y, int c)
181 | {
182 |     // x < m.w && y < m.h && c < m.c == 0: assert
183 |     assert(x < m.w && y < m.h && c < m.c);
184 |     return m.data[c*m.h*m.w + y*m.w + x];
185 | }
186 | 
187 | // fetch extra pixels
188 | float get_pixel_extend(image m, int x, int y, int c)
189 | {
190 |     if(x < 0)
191 |     {
192 |         x = 0;
193 |     }
194 |     if(x >= m.w)
195 |     {
196 |         x = m.w-1;
197 |     }
198 |     if(y < 0)
199 |     {
200 |         y = 0;
201 |     }
202 |     if(y >= m.h)
203 |     {
204 |         y = m.h-1;
205 |     }
206 |     if(c < 0 || c >= m.c)
207 |     {
208 |         return 0;
209 |     }
210 |     return get_pixel(m, x, y, c);
211 | }
212 | 
213 | // check the validity of data && store data into image
214 | void set_pixel(image m, int x, int y, int c, float val)
215 | {
216 |     if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c)
217 |     {
218 |         return;
219 |     }
220 |     // x < m.w && y < m.h && c < m.c == 0: assert
221 |     assert(x < m.w && y < m.h && c < m.c);
222 |     m.data[c*m.h*m.w + y*m.w + x] = val;
223 | }
224 | 
225 | // add value to pixels
226 | void add_pixel(image m, int x, int y, int c, float val)
227 | {
228 |     // x < m.w && y < m.h && c < m.c == 0: assert
229 |     assert(x < m.w && y < m.h && c < m.c);
230 |     m.data[c*m.h*m.w + y*m.w + x] += val;
231 | }
232 | 
233 | // draw detecting results
234 | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes)
235 | {
236 |     //printf("probs[176][2]:%.12f; probs[176][7]:%.12f;\n",probs[176][2],probs[176][7]);
237 |     for (int i = 0; i < num; i++)
238 |     {
239 |         //printf("ch0;\n");
240 |         int class_max = max_index(probs[i], classes); // max_index???
241 |         float prob = probs[i][class_max];
242 | /*
243 |         if(probs[i][class_max] != 0)
244 |         {
245 |             printf("i:%d; class_max:%d; prob:%.12f; \n",i,class_max,prob);
246 |         }
247 | */
248 |         if (prob > thresh)
249 |         {
250 |             //printf("ch1;\n");
251 |             int width = im.h * 0.012;
252 |             /* ??????????????????????????????????????
253 |             if(0)
254 |             {
255 |                 width = pow(prob, 1.0/2.0)*10 + 1;
256 |                 alphabet = 0;
257 |             }
258 |              */
259 |             printf("%s: %.0f%%\n", names[class_max], prob*100);
260 |             int offset  = class_max * 123457 % classes;
261 |             float red   = get_color(2, offset, classes);
262 |             float green = get_color(1, offset, classes);
263 |             float blue  = get_color(0, offset, classes);
264 |             float rgb[3];
265 |             //
266 |             rgb[0] = red;
267 |             rgb[1] = green;
268 |             rgb[2] = blue;
269 |             box b  = boxes[i];
270 |             //
271 |             int left  = (b.x-b.w/2.0)*im.w;
272 |             int right = (b.x+b.w/2.0)*im.w;
273 |             int top   = (b.y-b.h/2.0)*im.h;
274 |             int bot   = (b.y+b.h/2.0)*im.h;
275 |             //
276 |             if(left < 0)       left  = 0;
277 |             if(right > im.w-1) right = im.w-1;
278 |             if(top < 0)        top   = 0;
279 |             if(bot > im.h-1)   bot   = im.h-1;
280 |             //printf("ch2;\n");
281 |             //
282 |             draw_box_width(im, left, top, right, bot, width, red, green, blue);
283 |             //printf("ch3;\n");
284 |             if(alphabet)
285 |             {
286 |                 image label = get_label(alphabet, names[class_max], (im.h*0.03)/10);
287 |                 draw_label(im, top + width, left, label, rgb);
288 |             }
289 |         }
290 |         //printf("ch4;\n");
291 |     }
292 | }
293 | 
294 | // get label
295 | image get_label(image **characters, char *string, int size)
296 | {
297 |     if (size > 7)
298 |     {
299 |         size = 7;
300 |     }
301 |     image label = make_empty_image(0, 0, 0);
302 |     //
303 |     while(*string)
304 |     {
305 |         image l = characters[size][(int)*string];
306 |         image n = tile_images(label, l, -size - 1 + (size+1)/2);
307 |         free_image(label);
308 |         label = n;
309 |         string++;
310 |     }
311 |     image b = border_image(label, label.h*.25);
312 |     free_image(label);
313 | 
314 |     return b;
315 | }
316 | 
317 | // splite image
318 | image tile_images(image a, image b, int dx)
319 | {
320 |     if(a.w == 0)
321 |     {
322 |         return copy_image(b);
323 |     }
324 |     image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c);
325 |     fill_cpu(c.w*c.h*c.c, 1, c.data, 1);
326 |     embed_image(a, c, 0, 0);
327 |     composite_image(b, c, a.w + dx, 0);
328 | 
329 |     return c;
330 | }
331 | 
332 | // border/wrap up image
333 | image border_image(image a, int border)
334 | {
335 |     image b = make_image(a.w + 2*border, a.h + 2*border, a.c);
336 |     //
337 | 
338 |     for(int k = 0; k < b.c; ++k)
339 |     {
340 |        for(int y = 0; y < b.h; ++y)
341 |        {
342 |            for(int x = 0; x < b.w; ++x)
343 |            {
344 |                float val = get_pixel_extend(a, x - border, y - border, k);
345 |                if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h)
346 |                {
347 |                    val = 1;
348 |                }
349 |                set_pixel(b, x, y, k, val);
350 |             }
351 |        }
352 |     }
353 | 
354 |     return b;
355 | }
356 | 
357 | // copy image
358 | image copy_image(image p)
359 | {
360 |     image copy = p;
361 |     copy.data = (float *)calloc(p.h*p.w*p.c, sizeof(float));
362 |     memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
363 | 
364 |     return copy;
365 | }
366 | 
367 | // embed image (image data transmission)
368 | void embed_image(image source, image dest, int dx, int dy)
369 | {
370 |     for(int k = 0; k < source.c; k++)
371 |     {
372 | 
373 |         for(int y = 0; y < source.h; y++)
374 |         {
375 |             for(int x = 0; x < source.w; x++)
376 |             {
377 |                 float val = get_pixel(source, x,y,k);
378 |                 set_pixel(dest, dx+x, dy+y, k, val);
379 |             }
380 |         }
381 |     }
382 | }
383 | 
384 | // merge images
385 | void composite_image(image source, image dest, int dx, int dy)
386 | {
387 |     for (int k = 0; k < source.c; k++)
388 |     {
389 |         for (int y = 0; y < source.h; y++)
390 |         {
391 |             for (int x = 0; x < source.w; x++)
392 |             {
393 |                 float val = get_pixel(source, x, y, k);
394 |                 float val2 = get_pixel_extend(dest, dx+x, dy+y, k);
395 |                 set_pixel(dest, dx+x, dy+y, k, val * val2);
396 |             }
397 |         }
398 |     }
399 | }
400 | 
401 | // get width of boxes
402 | void draw_box_width(image a, int x1, int y1, int x2, int y2,int w, float r, float g, float b)
403 | {
404 |     for (int i = 0; i < w; i++)
405 |     {
406 |         draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b);
407 |     }
408 | }
409 | 
410 | // draw one box
411 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b)
412 | {
413 |     // normalize_image(a)
414 |     // ensure the boxed in the picture
415 |     if(x1 < 0)    x1 = 0;
416 |     if(x1 >= a.w) x1 = a.w-1;
417 |     if(x2 < 0)    x2 = 0;
418 |     if(x2 >= a.w) x2 = a.w-1;
419 | 
420 |     if(y1 < 0)    y1 = 0;
421 |     if(y1 >= a.h) y1 = a.h-1;
422 |     if(y2 < 0)    y2 = 0;
423 |     if(y2 >= a.h) y2 = a.h-1;
424 |     // draw boxes: rgb
425 |     for (int i = x1; i <= x2; i++)
426 |     {   // two horizontal lines
427 |         a.data[i + y1*a.w + 0*a.w*a.h] = r;
428 |         a.data[i + y2*a.w + 0*a.w*a.h] = r;
429 |         a.data[i + y1*a.w + 1*a.w*a.h] = g;
430 |         a.data[i + y2*a.w + 1*a.w*a.h] = g;
431 |         a.data[i + y1*a.w + 2*a.w*a.h] = b;
432 |         a.data[i + y2*a.w + 2*a.w*a.h] = b;
433 |     }
434 |     for (int i = y1; i <= y2; i++)
435 |     {   // two vertical lines
436 |         a.data[x1 + i*a.w + 0*a.w*a.h] = r;
437 |         a.data[x2 + i*a.w + 0*a.w*a.h] = r;
438 |         a.data[x1 + i*a.w + 1*a.w*a.h] = g;
439 |         a.data[x2 + i*a.w + 1*a.w*a.h] = g;
440 |         a.data[x1 + i*a.w + 2*a.w*a.h] = b;
441 |         a.data[x2 + i*a.w + 2*a.w*a.h] = b;
442 |     }
443 | }
444 | 
445 | // draw labels
446 | void draw_label(image a, int r, int c, image label, const float *rgb)
447 | {
448 |     int w = label.w;
449 |     int h = label.h;
450 |     if(r - h >= 0)
451 |     {
452 |         r = r - h;
453 |     }
454 |     // replace corresponding pixels for labels
455 |     for (int j = 0; j < h && j + r < a.h; j++)
456 |     {
457 |         for (int i = 0; i < w && i + c < a.w; i++)
458 |         {
459 |             for (int k = 0; k < label.c; k++)
460 |             {
461 |                 float val = get_pixel(label, i, j, k);
462 |                 set_pixel(a, i+c, j+r, k, rgb[k] * val);
463 |             }
464 |         }
465 |     }
466 | }
467 | 
468 | // get image color
469 | float get_color(int c, int x, int max)
470 | {
471 |     float ratio = ((float)x/max)*5;
472 |     int i = floor(ratio); 
473 |     int j = ceil(ratio);  
474 |     ratio -= i;
475 |     float r = (1-ratio) * colors[i][c] + ratio*colors[j][c];
476 | 
477 |     return r;
478 | }
479 | 
480 | // display image
481 | void show_image(image p, const char *name)
482 | {
483 |     fprintf(stderr,"Not compiled with OpenCV, saving to %s.png instead.\n", name);
484 |     save_image(p, name);
485 | }
486 | 
487 | // save image top function
488 | void save_image(image im, const char *name)
489 | {
490 |     save_image_png(im, name);
491 | }
492 | 
493 | // data: one pixel(three channels); im.data: all pixels for one channel, next channel, etc.
494 | void save_image_png(image im, const char *name)
495 | {
496 |     char buffer[256];
497 |     // save picture name into buffer
498 |     sprintf(buffer, "%s.png", name);
499 |     unsigned char *data = (unsigned char *)calloc(im.w*im.h*im.c, sizeof(char));
500 |     //
501 |     //printf("ch0;\n");
502 |     for(int k = 0; k < im.c; k++)
503 |     {
504 |         for(int i = 0; i < im.w*im.h; i++)
505 |         {
506 |             data[i*im.c + k] = (unsigned char) (255 * im.data[i + k*im.w*im.h]);
507 |         }
508 |     }
509 |     //printf("ch1;\n");
510 |     int success = stbi_write_png(buffer, im.w, im.h, im.c, data, im.w*im.c);
511 |     //printf("ch2;\n");
512 |     free(data);
513 |     if(!success) fprintf(stderr, "Failed to write image %s\n", buffer);
514 | }
515 | 
516 | // free allocated memory
517 | void free_image(image m)
518 | {
519 |     if(m.data)
520 |     {
521 |         free(m.data);
522 |     }
523 | }
524 | 


--------------------------------------------------------------------------------
/parser.cpp:
--------------------------------------------------------------------------------
  1 | //========================================================================
  2 | // Parser
  3 | //========================================================================
  4 | // @brief: parse and store configs
  5 | 
  6 | #ifndef SRC_PARSER_CPP_
  7 | #define SRC_PARSER_CPP_
  8 | 
  9 | #include "parser.h"
 10 | 
 11 | // parse amxpooling layer
 12 | maxpool_layer parse_maxpool(list *options, size_params params)
 13 | {
 14 |     int stride = option_find_int(options, "stride",1);
 15 |     int size = option_find_int(options, "size",stride);
 16 |     int padding = option_find_int_quiet(options, "padding", (size-1)/2);
 17 |     //printf("\nstride: %d; size: %d; padding: %d;\n",stride,size,padding);
 18 | 
 19 |     int batch,h,w,c;
 20 |     h = params.h;
 21 |     w = params.w;
 22 |     c = params.c;
 23 |     batch=params.batch;
 24 |     //printf("\nh: %d; w: %d; c: %d; batch: %d;\n",h,w,c,batch);
 25 |     if(!(h && w && c))
 26 |     {
 27 |         error("Layer before maxpool layer must output image.");
 28 |     }
 29 | 
 30 |     maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding);
 31 |     return layer;
 32 | }
 33 | 
 34 | // copy matrix
 35 | void transpose_matrix(float *a, int rows, int cols)
 36 | {
 37 |     float *transpose = (float *)calloc(rows*cols, sizeof(float));
 38 |     //
 39 |     for(int x = 0; x < rows; x++)
 40 |     {
 41 |         for(int y = 0; y < cols; y++)
 42 |         {
 43 |             transpose[y*rows + x] = a[x*cols + y];
 44 |         }
 45 |     }
 46 |     memcpy(a, transpose, rows*cols*sizeof(float));
 47 |     free(transpose);
 48 | }
 49 | 
 50 | // parse region layers
 51 | layer parse_region(list *options, size_params params)
 52 | {
 53 |     int coords = option_find_int(options, "coords", 4);
 54 |     int classes = option_find_int(options, "classes", 20);
 55 |     int num = option_find_int(options, "num", 1);
 56 | 
 57 |     layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
 58 |     assert(l.outputs == params.inputs);
 59 | 
 60 |     l.log = option_find_int_quiet(options, "log", 0);
 61 |     l.sqrt = option_find_int_quiet(options, "sqrt", 0);
 62 | 
 63 |     l.softmax = option_find_int(options, "softmax", 0);
 64 |     l.max_boxes = option_find_int_quiet(options, "max",30);
 65 |     l.jitter = option_find_float(options, "jitter", .2);
 66 |     l.rescore = option_find_int_quiet(options, "rescore",0);
 67 | 
 68 |     l.thresh = option_find_float(options, "thresh", .5);
 69 |     l.classfix = option_find_int_quiet(options, "classfix", 0);
 70 |     l.absolute = option_find_int_quiet(options, "absolute", 0);
 71 |     l.random = option_find_int_quiet(options, "random", 0);
 72 | 
 73 |     l.coord_scale = option_find_float(options, "coord_scale", 1);
 74 |     l.object_scale = option_find_float(options, "object_scale", 1);
 75 |     l.noobject_scale = option_find_float(options, "noobject_scale", 1);
 76 |     l.class_scale = option_find_float(options, "class_scale", 1);
 77 |     l.bias_match = option_find_int_quiet(options, "bias_match",0);
 78 | 
 79 |     char *tree_file = option_find_str(options, "tree", 0);
 80 |     if (tree_file)
 81 |     {
 82 |         l.softmax_tree = read_tree(tree_file);
 83 |     }
 84 |     char *map_file = option_find_str(options, "map", 0);
 85 |     if (map_file)
 86 |     {
 87 |         l.map = read_map(map_file);
 88 |     }
 89 | 
 90 |     char *a = option_find_str(options, "anchors", 0);
 91 |     if(a)
 92 |     {
 93 |         int len = strlen(a);
 94 |         int n = 1;
 95 |         //
 96 |         for(int i = 0; i < len; i++){
 97 |             if (a[i] == ',')
 98 |             {
 99 |                 n++;
100 |             }
101 |         }
102 |         for(int i = 0; i < n; i++){
103 |             float bias = atof(a);
104 |             l.biases[i] = bias;
105 |             a = strchr(a, ',')+1;
106 |         }
107 |     }
108 |     return l;
109 | }
110 | 
111 | // parse convolutional layer
112 | convolutional_layer parse_convolutional(list *options, size_params params)
113 | {
114 |     int n = option_find_int(options, "filters",1);
115 |     int size = option_find_int(options, "size",1);
116 |     int stride = option_find_int(options, "stride",1);
117 |     int pad = option_find_int_quiet(options, "pad",0);
118 |     int padding = option_find_int_quiet(options, "padding",0);
119 |     if(pad) padding = size/2;
120 | 
121 |     char *activation_s = option_find_str(options, "activation", "logistic");
122 |     ACTIVATION activation = get_activation(activation_s);
123 | 
124 |     int batch,h,w,c;
125 |     h = params.h;
126 |     w = params.w;
127 |     c = params.c;
128 |     batch=params.batch;
129 |     if(!(h && w && c))
130 |     {
131 |         error("Layer before convolutional layer must output image.");
132 |     }
133 |     int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
134 |     int binary = option_find_int_quiet(options, "binary", 0);
135 |     int xnor = option_find_int_quiet(options, "xnor", 0);
136 | 
137 |     convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor, params.net.adam);
138 |     layer.flipped = option_find_int_quiet(options, "flipped", 0);
139 |     layer.dot = option_find_float_quiet(options, "dot", 0);
140 |     if(params.net.adam)
141 |     {
142 |         layer.B1 = params.net.B1;
143 |         layer.B2 = params.net.B2;
144 |         layer.eps = params.net.eps;
145 |     }
146 | 
147 |     return layer;
148 | }
149 | 
150 | // get string type
151 | LAYER_TYPE string_to_layer_type(char * type)
152 | {
153 | 
154 |     if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
155 |     if (strcmp(type, "[crop]")==0) return CROP;
156 |     if (strcmp(type, "[cost]")==0) return COST;
157 |     if (strcmp(type, "[detection]")==0) return DETECTION;
158 |     if (strcmp(type, "[region]")==0) return REGION;
159 |     if (strcmp(type, "[local]")==0) return LOCAL;
160 |     if (strcmp(type, "[conv]")==0
161 |             || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
162 |     if (strcmp(type, "[activation]")==0) return ACTIVE;
163 |     if (strcmp(type, "[net]")==0
164 |             || strcmp(type, "[network]")==0) return NETWORK;
165 |     if (strcmp(type, "[crnn]")==0) return CRNN;
166 |     if (strcmp(type, "[gru]")==0) return GRU;
167 |     if (strcmp(type, "[rnn]")==0) return RNN;
168 |     if (strcmp(type, "[conn]")==0
169 |             || strcmp(type, "[connected]")==0) return CONNECTED;
170 |     if (strcmp(type, "[max]")==0
171 |             || strcmp(type, "[maxpool]")==0) return MAXPOOL;
172 |     if (strcmp(type, "[reorg]")==0) return REORG;
173 |     if (strcmp(type, "[avg]")==0
174 |             || strcmp(type, "[avgpool]")==0) return AVGPOOL;
175 |     if (strcmp(type, "[dropout]")==0) return DROPOUT;
176 |     if (strcmp(type, "[lrn]")==0
177 |             || strcmp(type, "[normalization]")==0) return NORMALIZATION;
178 |     if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
179 |     if (strcmp(type, "[soft]")==0
180 |             || strcmp(type, "[softmax]")==0) return SOFTMAX;
181 |     if (strcmp(type, "[route]")==0) return ROUTE;
182 |     return BLANK;
183 | }
184 | 
185 | // get larning rate policy
186 | learning_rate_policy get_policy(char *s)
187 | {
188 |     if (strcmp(s, "random")==0) return RANDOM;
189 |     if (strcmp(s, "poly")==0) return POLY;
190 |     if (strcmp(s, "constant")==0) return CONSTANT;
191 |     if (strcmp(s, "step")==0) return STEP;
192 |     if (strcmp(s, "exp")==0) return EXP;
193 |     if (strcmp(s, "sigmoid")==0) return SIG;
194 |     if (strcmp(s, "steps")==0) return STEPS;
195 |     fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
196 | 
197 |     return CONSTANT;
198 | }
199 | 
200 | // config parser
201 | void parse_net_options(list *options, network *net)
202 | {
203 |     net->batch = option_find_int(options, "batch",1);
204 |     net->learning_rate = option_find_float(options, "learning_rate", .001);
205 |     net->momentum = option_find_float(options, "momentum", .9);
206 |     net->decay = option_find_float(options, "decay", .0001);
207 |     int subdivs = option_find_int(options, "subdivisions",1);
208 |     net->time_steps = option_find_int_quiet(options, "time_steps",1);
209 |     net->batch /= subdivs;
210 |     net->batch *= net->time_steps;
211 |     net->subdivisions = subdivs;
212 | 
213 |     net->adam = option_find_int_quiet(options, "adam", 0);
214 |     if(net->adam)
215 |     {
216 |         net->B1 = option_find_float(options, "B1", .9);
217 |         net->B2 = option_find_float(options, "B2", .999);
218 |         net->eps = option_find_float(options, "eps", .000001);
219 |     }
220 | 
221 |     net->h = option_find_int_quiet(options, "height",0);
222 |     net->w = option_find_int_quiet(options, "width",0);
223 |     net->c = option_find_int_quiet(options, "channels",0);
224 |     net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
225 |     net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
226 |     net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
227 | 
228 |     net->angle = option_find_float_quiet(options, "angle", 0);
229 |     net->aspect = option_find_float_quiet(options, "aspect", 1);
230 |     net->saturation = option_find_float_quiet(options, "saturation", 1);
231 |     net->exposure = option_find_float_quiet(options, "exposure", 1);
232 |     net->hue = option_find_float_quiet(options, "hue", 0);
233 | 
234 |     if(!net->inputs && !(net->h && net->w && net->c))
235 |     {
236 |         error("No input parameters supplied");
237 |     }
238 | 
239 |     char *policy_s = option_find_str(options, "policy", "constant");
240 |     net->policy = get_policy(policy_s);
241 |     net->burn_in = option_find_int_quiet(options, "burn_in", 0);
242 |     if(net->policy == STEP)
243 |     {
244 |         net->step = option_find_int(options, "step", 1);
245 |         net->scale = option_find_float(options, "scale", 1);
246 |     }
247 |     else if (net->policy == STEPS)
248 |     {
249 |         char *l = option_find(options, "steps");
250 |         char *p = option_find(options, "scales");
251 |         if(!l || !p)
252 |         {
253 |             error("STEPS policy must have steps and scales in cfg file");
254 |         }
255 | 
256 |         int len = strlen(l);
257 |         int n = 1;
258 |         //
259 |         for(int i = 0; i < len; i++)
260 |         {
261 |             if (l[i] == ',')
262 |             {
263 |                 n++;
264 |             }
265 |         }
266 |         int *steps = (int *) calloc(n, sizeof(int));
267 |         float *scales = (float *)calloc(n, sizeof(float));
268 |         for(int i = 0; i < n; i++)
269 |         {
270 |             int step    = atoi(l);
271 |             float scale = atof(p);
272 |             l = strchr(l, ',')+1;
273 |             p = strchr(p, ',')+1;
274 |             steps[i] = step;
275 |             scales[i] = scale;
276 |         }
277 |         net->scales = scales;
278 |         net->steps = steps;
279 |         net->num_steps = n;
280 |     }
281 |     else if (net->policy == EXP)
282 |     {
283 |         net->gamma = option_find_float(options, "gamma", 1);
284 |     }
285 |     else if (net->policy == SIG)
286 |     {
287 |         net->gamma = option_find_float(options, "gamma", 1);
288 |         net->step = option_find_int(options, "step", 1);
289 |     }
290 |     else if (net->policy == POLY || net->policy == RANDOM)
291 |     {
292 |         net->power = option_find_float(options, "power", 1);
293 |     }
294 |     net->max_batches = option_find_int(options, "max_batches", 0);
295 | }
296 | 
297 | // free section
298 | void free_section(section *s)
299 | {
300 |     free(s->type);
301 |     node *n = s->options->front;
302 |     while(n){
303 |         kvp *pair = (kvp *)n->val;
304 |         free(pair->key);
305 |         free(pair);
306 |         node *next = n->next;
307 |         free(n);
308 |         n = next;
309 |     }
310 |     free(s->options);
311 |     free(s);
312 | }
313 | 
314 | // tiny-yolo.cfg
315 | network parse_network_cfg(char *filename)
316 | {
317 |     // read cfg lines into a list
318 |     // list:    size, *front(start 'node'), *back(end 'node')
319 |     // node:    val(*'section'), *next, *prev
320 |     // section: *type, *option('list')
321 |     // list:    size, *front(start 'node'), *back(end 'node')
322 |     // node:    val(*'kvp'), *next, *prev
323 |     // kvp:     *key, *val, used?(init 0 - unused)
324 |     list *sections = read_cfg(filename);
325 |     node *n = sections->front;
326 |     if(!n)
327 |     {
328 |         error("Config file has no sections");
329 |     }
330 |     // network within net(general setup) + 16 layers(9 conv + 6 maxpool + 1 region) in tiny-yolo
331 |     network net = make_network(sections->size - 1);
332 |     size_params params; // why define this?
333 | 
334 |     // traverse the sections in the top list
335 |     section *s = (section *)n->val;
336 |     list *options = s->options;
337 |     //if(!is_network(s)) error("First section must be [net] or [network]");
338 |     parse_net_options(options, &net);
339 | 
340 |     params.h = net.h;
341 |     params.w = net.w;
342 |     params.c = net.c;
343 |     params.inputs = net.inputs;
344 |     params.batch = net.batch;
345 |     params.time_steps = net.time_steps;
346 |     params.net = net;
347 | 
348 |     size_t workspace_size = 0;
349 |     n = n->next;
350 |     int count = 0;
351 |     free_section(s);
352 |     fprintf(stderr, "layer     filters    size              input                output\n");
353 |     while(n)
354 |     {
355 |         params.index = count;
356 |         fprintf(stderr, "%5d ", count);
357 |         s = (section *)n->val;
358 |         options = s->options;
359 | 
360 |         layer l;
361 |         init_layer(l);
362 | 
363 |         //printf("\n(1).workspace_size:%d\n;",l.workspace_size);
364 | 
365 |         LAYER_TYPE lt = string_to_layer_type(s->type);
366 |         if(lt == CONVOLUTIONAL)
367 |         {
368 |             l = parse_convolutional(options, params);
369 |         }
370 |         else if(lt == MAXPOOL)
371 |         {
372 |             l = parse_maxpool(options, params);
373 |         }
374 |         else if(lt == REGION)
375 |         {
376 |            l = parse_region(options, params);
377 |         }
378 | 
379 |         //printf("\n(2).workspace_size:%d\n;",l.workspace_size);
380 |         /*}else if(lt == LOCAL){
381 |             l = parse_local(options, params);
382 |         }else if(lt == ACTIVE){
383 |             l = parse_activation(options, params);
384 |         }else if(lt == RNN){
385 |             l = parse_rnn(options, params);
386 |         }else if(lt == GRU){
387 |             l = parse_gru(options, params);
388 |         }else if(lt == CRNN){
389 |             l = parse_crnn(options, params);
390 |         }else if(lt == CONNECTED){
391 |             l = parse_connected(options, params);
392 |         }else if(lt == CROP){
393 |             l = parse_crop(options, params);
394 |         }else if(lt == COST){
395 |             l = parse_cost(options, params);
396 |         }else if(lt == REGION){
397 |             l = parse_region(options, params);
398 |         }else if(lt == DETECTION){
399 |             l = parse_detection(options, params);
400 |         }else if(lt == SOFTMAX){
401 |             l = parse_softmax(options, params);
402 |             net.hierarchy = l.softmax_tree;
403 |         }else if(lt == NORMALIZATION){
404 |             l = parse_normalization(options, params);
405 |         }else if(lt == BATCHNORM){
406 |             l = parse_batchnorm(options, params);
407 |         }else if(lt == MAXPOOL){
408 |             l = parse_maxpool(options, params);
409 |         }else if(lt == REORG){
410 |             l = parse_reorg(options, params);
411 |         }else if(lt == AVGPOOL){
412 |             l = parse_avgpool(options, params);
413 |         }else if(lt == ROUTE){
414 |             l = parse_route(options, params, net);
415 |         }else if(lt == SHORTCUT){
416 |             l = parse_shortcut(options, params, net);
417 |         }else if(lt == DROPOUT){
418 |             l = parse_dropout(options, params);
419 |             l.output = net.layers[count-1].output;
420 |             l.delta = net.layers[count-1].delta;
421 |         }else{
422 |             fprintf(stderr, "Type not recognized: %s\n", s->type);
423 |         }*/
424 |         l.dontload = option_find_int_quiet(options, "dontload", 0);
425 |         l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
426 |         // check unused kvps
427 |         option_unused(options);
428 |         net.layers[count] = l;
429 |         //printf("n:%d; l.workspace_size: %d;\n",count,l.workspace_size);
430 |         if (l.workspace_size > workspace_size)
431 |         {
432 |             workspace_size = l.workspace_size;
433 |         }
434 |         free_section(s);
435 |         n = n->next;
436 |         count++;
437 |         if(n)
438 |         {
439 |             params.h = l.out_h;
440 |             params.w = l.out_w;
441 |             params.c = l.out_c;
442 |             params.inputs = l.outputs;
443 |         }
444 |     }
445 |     free_list(sections);
446 |     net.outputs = get_network_output_size(net);  // output size
447 |     //printf("net.outputs:%d;\n",net.outputs);
448 |     net.output = get_network_output(net);        // output value???
449 |     if(workspace_size)
450 |     {
451 |         //printf("workspace_size:%ld;\n", workspace_size);
452 |         //??????????
453 |         //net.workspace = (float *)calloc(1, workspace_size);
454 |         net.workspace = (float *)calloc(workspace_size,sizeof(float));
455 |         //net.workspace = (float *)sds_alloc(workspace_size * sizeof(float));
456 |     }
457 |     return net;
458 | }
459 | 
460 | // read configs
461 | list *read_cfg(char *filename)
462 | {
463 |     FILE *file = fopen(filename, "r");
464 |     //if(file == 0) file_error(filename);
465 |     char *line;
466 |     int nu = 0;
467 |     list *sections = make_list();
468 |     section *current = 0;
469 |     while((line=fgetl(file)) != 0)
470 |     {
471 |         nu++;
472 |         strip(line);
473 |         switch(line[0])
474 |         {
475 |             case '[':
476 |                 current = (section *)malloc(sizeof(section));
477 |                 list_insert(sections, current);
478 |                 current->options = make_list();
479 |                 current->type = line;
480 |                 break;
481 |             case '\0':
482 |             case '#':
483 |             case ';':
484 |                 free(line);
485 |                 break;
486 |             default:
487 |                 if(!read_option(line, current->options)){
488 |                     fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line);
489 |                     free(line);
490 |                 }
491 |                 break;
492 |         }
493 |     }
494 |     fclose(file);
495 |     return sections;
496 | }
497 | 
498 | // laod weights
499 | void load_convolutional_weights(layer l, FILE *fp)
500 | {
501 |     if(l.binary)
502 |     {
503 |         //load_convolutional_weights_binary(l, fp);
504 |         //return;
505 |     }
506 |     int num = l.n*l.c*l.size*l.size;
507 |     fread(l.biases, sizeof(float), l.n, fp);
508 |     //printf("num:%d; l.n:%d;\n",num,l.n);
509 |     if (l.batch_normalize && (!l.dontloadscales))
510 |     {
511 |         fread(l.scales, sizeof(float), l.n, fp);
512 |         fread(l.rolling_mean, sizeof(float), l.n, fp);
513 |         fread(l.rolling_variance, sizeof(float), l.n, fp);
514 |         //printf("enter here1\n");
515 |         if(0)
516 |         {
517 |             //
518 |             for(int i = 0; i < l.n; i++)
519 |             {
520 |                 printf("%g, ", l.rolling_mean[i]);
521 |             }
522 |             printf("\n");
523 |             for(int i = 0; i < l.n; i++)
524 |             {
525 |                 printf("%g, ", l.rolling_variance[i]);
526 |             }
527 |             printf("\n");
528 |         }
529 |         if(0)
530 |         {
531 |             fill_cpu(l.n, 0, l.rolling_mean, 1);
532 |             fill_cpu(l.n, 0, l.rolling_variance, 1);
533 |         }
534 |     }
535 |     fread(l.weights, sizeof(float), num, fp);
536 |     //l.adam = 0;
537 |     if(l.adam)
538 |     {
539 |         fread(l.m, sizeof(float), num, fp);
540 |         fread(l.v, sizeof(float), num, fp);
541 |         //printf("enter here2\n");
542 |     }
543 |     //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);
544 |     if (l.flipped)
545 |     {
546 |         transpose_matrix(l.weights, l.c*l.size*l.size, l.n);
547 |         //printf("enter here3\n");
548 |     }
549 | /*
550 |     // print weights
551 |     for(int j = 300; j < 400; j++)
552 |     {
553 |         printf("l.weights[%d]:%.12f;\n",j,l.weights[j]);
554 |     }
555 | */
556 |     //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights);
557 | }
558 | 
559 | // batchnorm weights
560 | void load_batchnorm_weights(layer l, FILE *fp)
561 | {
562 |     fread(l.scales, sizeof(float), l.c, fp);
563 |     fread(l.rolling_mean, sizeof(float), l.c, fp);
564 |     fread(l.rolling_variance, sizeof(float), l.c, fp);
565 | 
566 | }
567 | 
568 | // connected weights
569 | void load_connected_weights(layer l, FILE *fp, int transpose)
570 | {
571 |     fread(l.biases, sizeof(float), l.outputs, fp);
572 |     fread(l.weights, sizeof(float), l.outputs*l.inputs, fp);
573 |     if(transpose)
574 |     {
575 |         transpose_matrix(l.weights, l.inputs, l.outputs);
576 |     }
577 |     //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs));
578 |     //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs));
579 |     if (l.batch_normalize && (!l.dontloadscales))
580 |     {
581 |         fread(l.scales, sizeof(float), l.outputs, fp);
582 |         fread(l.rolling_mean, sizeof(float), l.outputs, fp);
583 |         fread(l.rolling_variance, sizeof(float), l.outputs, fp);
584 |         //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs));
585 |         //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs));
586 |         //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs));
587 |     }
588 | 
589 | }
590 | 
591 | // load weights top function
592 | void load_weights_upto(network *net, char *filename, int cutoff)
593 | {
594 |     fprintf(stderr, "Loading weights from %s...", filename);
595 |     fflush(stdout);
596 |     FILE *fp = fopen(filename, "rb");
597 |     //if(!fp) file_error(filename);
598 | 
599 |     int major;
600 |     int minor;
601 |     int revision;
602 |     fread(&major, sizeof(int), 1, fp);
603 |     fread(&minor, sizeof(int), 1, fp);
604 |     fread(&revision, sizeof(int), 1, fp);
605 |     fread(net->seen, sizeof(int), 1, fp);
606 |     int transpose = (major > 1000) || (minor > 1000);
607 |     //
608 |     for(int i = 0; i < net->n && i < cutoff; i++){
609 |         layer l = net->layers[i];
610 |         if (l.dontload) continue;
611 |         if(l.type == CONVOLUTIONAL)
612 |         {
613 |             //printf("layer %d: CONVOLUTIONAl;\n",i);
614 |             load_convolutional_weights(l, fp);
615 |         }
616 |         if(l.type == CONNECTED)
617 |         {
618 |             load_connected_weights(l, fp, transpose);
619 |         }
620 |         if(l.type == BATCHNORM)
621 |         {
622 |             load_batchnorm_weights(l, fp);
623 |         }
624 |         if(l.type == CRNN)
625 |         {
626 |             load_convolutional_weights(*(l.input_layer), fp);
627 |             load_convolutional_weights(*(l.self_layer), fp);
628 |             load_convolutional_weights(*(l.output_layer), fp);
629 |         }
630 |         if(l.type == RNN)
631 |         {
632 |             load_connected_weights(*(l.input_layer), fp, transpose);
633 |             load_connected_weights(*(l.self_layer), fp, transpose);
634 |             load_connected_weights(*(l.output_layer), fp, transpose);
635 |         }
636 |         if(l.type == GRU)
637 |         {
638 |             load_connected_weights(*(l.input_z_layer), fp, transpose);
639 |             load_connected_weights(*(l.input_r_layer), fp, transpose);
640 |             load_connected_weights(*(l.input_h_layer), fp, transpose);
641 |             load_connected_weights(*(l.state_z_layer), fp, transpose);
642 |             load_connected_weights(*(l.state_r_layer), fp, transpose);
643 |             load_connected_weights(*(l.state_h_layer), fp, transpose);
644 |         }
645 |         if(l.type == LOCAL)
646 |         {
647 |             int locations = l.out_w*l.out_h;
648 |             int size = l.size*l.size*l.c*l.n*locations;
649 |             fread(l.biases, sizeof(float), l.outputs, fp);
650 |             fread(l.weights, sizeof(float), size, fp);
651 |         }
652 |     }
653 |     fprintf(stderr, "Done!\n");
654 |     fclose(fp);
655 | }
656 | 
657 | // load weights top function
658 | void load_weights(network *net, char *filename)
659 | {
660 |     load_weights_upto(net, filename, net->n);
661 | }
662 | 
663 | #endif /* SRC_PARSER_CPP_ */
664 | 


--------------------------------------------------------------------------------
/stb_image_write.h:
--------------------------------------------------------------------------------
  1 | /* stb_image_write - v0.98 - public domain - http://nothings.org/stb/stb_image_write.h
  2 |    writes out PNG/BMP/TGA images to C stdio - Sean Barrett 2010
  3 |                             no warranty implied; use at your own risk
  4 | 
  5 | 
  6 |    Before #including,
  7 | 
  8 |        #define STB_IMAGE_WRITE_IMPLEMENTATION
  9 | 
 10 |    in the file that you want to have the implementation.
 11 | 
 12 |    Will probably not work correctly with strict-aliasing optimizations.
 13 | 
 14 | ABOUT:
 15 | 
 16 |    This header file is a library for writing images to C stdio. It could be
 17 |    adapted to write to memory or a general streaming interface; let me know.
 18 | 
 19 |    The PNG output is not optimal; it is 20-50% larger than the file
 20 |    written by a decent optimizing implementation. This library is designed
 21 |    for source code compactness and simplicitly, not optimal image file size
 22 |    or run-time performance.
 23 | 
 24 | BUILDING:
 25 | 
 26 |    You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.
 27 |    You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace
 28 |    malloc,realloc,free.
 29 |    You can define STBIW_MEMMOVE() to replace memmove()
 30 | 
 31 | USAGE:
 32 | 
 33 |    There are four functions, one for each image file format:
 34 | 
 35 |      int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes);
 36 |      int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
 37 |      int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
 38 |      int stbi_write_hdr(char const *filename, int w, int h, int comp, const void *data);
 39 | 
 40 |    Each function returns 0 on failure and non-0 on success.
 41 | 
 42 |    The functions create an image file defined by the parameters. The image
 43 |    is a rectangle of pixels stored from left-to-right, top-to-bottom.
 44 |    Each pixel contains 'comp' channels of data stored interleaved with 8-bits
 45 |    per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is
 46 |    monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.
 47 |    The *data pointer points to the first byte of the top-left-most pixel.
 48 |    For PNG, "stride_in_bytes" is the distance in bytes from the first byte of
 49 |    a row of pixels to the first byte of the next row of pixels.
 50 | 
 51 |    PNG creates output files with the same number of components as the input.
 52 |    The BMP format expands Y to RGB in the file format and does not
 53 |    output alpha.
 54 | 
 55 |    PNG supports writing rectangles of data even when the bytes storing rows of
 56 |    data are not consecutive in memory (e.g. sub-rectangles of a larger image),
 57 |    by supplying the stride between the beginning of adjacent rows. The other
 58 |    formats do not. (Thus you cannot write a native-format BMP through the BMP
 59 |    writer, both because it is in BGR order and because it may have padding
 60 |    at the end of the line.)
 61 | 
 62 |    HDR expects linear float data. Since the format is always 32-bit rgb(e)
 63 |    data, alpha (if provided) is discarded, and for monochrome data it is
 64 |    replicated across all three channels.
 65 | 
 66 | CREDITS:
 67 | 
 68 |    PNG/BMP/TGA
 69 |       Sean Barrett
 70 |    HDR
 71 |       Baldur Karlsson
 72 |    TGA monochrome:
 73 |       Jean-Sebastien Guay
 74 |    misc enhancements:
 75 |       Tim Kelsey
 76 |    bugfixes:
 77 |       github:Chribba
 78 | */
 79 | 
 80 | #ifndef INCLUDE_STB_IMAGE_WRITE_H
 81 | #define INCLUDE_STB_IMAGE_WRITE_H
 82 | 
 83 | #ifdef __cplusplus
 84 | extern "C" {
 85 | #endif
 86 | 
 87 | extern int stbi_write_png(char const *filename, int w, int h, int comp, const void  *data, int stride_in_bytes);
 88 | extern int stbi_write_bmp(char const *filename, int w, int h, int comp, const void  *data);
 89 | extern int stbi_write_tga(char const *filename, int w, int h, int comp, const void  *data);
 90 | extern int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
 91 | 
 92 | #ifdef __cplusplus
 93 | }
 94 | #endif
 95 | 
 96 | #endif//INCLUDE_STB_IMAGE_WRITE_H
 97 | 
 98 | #ifdef STB_IMAGE_WRITE_IMPLEMENTATION
 99 | 
100 | #include <stdarg.h>
101 | #include <stdlib.h>
102 | #include <stdio.h>
103 | #include <string.h>
104 | #include <math.h>
105 | 
106 | #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && defined(STBIW_REALLOC)
107 | // ok
108 | #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC)
109 | // ok
110 | #else
111 | #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC."
112 | #endif
113 | 
114 | #ifndef STBIW_MALLOC
115 | #define STBIW_MALLOC(sz)    malloc(sz)
116 | #define STBIW_REALLOC(p,sz) realloc(p,sz)
117 | #define STBIW_FREE(p)       free(p)
118 | #endif
119 | #ifndef STBIW_MEMMOVE
120 | #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz)
121 | #endif
122 | 
123 | 
124 | #ifndef STBIW_ASSERT
125 | #include <assert.h>
126 | #define STBIW_ASSERT(x) assert(x)
127 | #endif
128 | 
129 | typedef unsigned int stbiw_uint32;
130 | typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1];
131 | 
132 | static void writefv(FILE *f, const char *fmt, va_list v)
133 | {
134 |    while (*fmt) {
135 |       switch (*fmt++) {
136 |          case ' ': break;
137 |          case '1': { unsigned char x = (unsigned char) va_arg(v, int); fputc(x,f); break; }
138 |          case '2': { int x = va_arg(v,int); unsigned char b[2];
139 |                      b[0] = (unsigned char) x; b[1] = (unsigned char) (x>>8);
140 |                      fwrite(b,2,1,f); break; }
141 |          case '4': { stbiw_uint32 x = va_arg(v,int); unsigned char b[4];
142 |                      b[0]=(unsigned char)x; b[1]=(unsigned char)(x>>8);
143 |                      b[2]=(unsigned char)(x>>16); b[3]=(unsigned char)(x>>24);
144 |                      fwrite(b,4,1,f); break; }
145 |          default:
146 |             STBIW_ASSERT(0);
147 |             return;
148 |       }
149 |    }
150 | }
151 | 
152 | static void write3(FILE *f, unsigned char a, unsigned char b, unsigned char c)
153 | {
154 |    unsigned char arr[3];
155 |    arr[0] = a, arr[1] = b, arr[2] = c;
156 |    fwrite(arr, 3, 1, f);
157 | }
158 | 
159 | static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono)
160 | {
161 |    unsigned char bg[3] = { 255, 0, 255}, px[3];
162 |    stbiw_uint32 zero = 0;
163 |    int i,j,k, j_end;
164 | 
165 |    if (y <= 0)
166 |       return;
167 | 
168 |    if (vdir < 0)
169 |       j_end = -1, j = y-1;
170 |    else
171 |       j_end =  y, j = 0;
172 | 
173 |    for (; j != j_end; j += vdir) {
174 |       for (i=0; i < x; ++i) {
175 |          unsigned char *d = (unsigned char *) data + (j*x+i)*comp;
176 |          if (write_alpha < 0)
177 |             fwrite(&d[comp-1], 1, 1, f);
178 |          switch (comp) {
179 |             case 1: fwrite(d, 1, 1, f);
180 |                     break;
181 |             case 2: if (expand_mono)
182 |                        write3(f, d[0],d[0],d[0]); // monochrome bmp
183 |                     else
184 |                        fwrite(d, 1, 1, f);  // monochrome TGA
185 |                     break;
186 |             case 4:
187 |                if (!write_alpha) {
188 |                   // composite against pink background
189 |                   for (k=0; k < 3; ++k)
190 |                      px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255;
191 |                   write3(f, px[1-rgb_dir],px[1],px[1+rgb_dir]);
192 |                   break;
193 |                }
194 |                /* FALLTHROUGH */
195 |             case 3:
196 |                write3(f, d[1-rgb_dir],d[1],d[1+rgb_dir]);
197 |                break;
198 |          }
199 |          if (write_alpha > 0)
200 |             fwrite(&d[comp-1], 1, 1, f);
201 |       }
202 |       fwrite(&zero,scanline_pad,1,f);
203 |    }
204 | }
205 | 
206 | static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...)
207 | {
208 |    FILE *f;
209 |    if (y < 0 || x < 0) return 0;
210 |    f = fopen(filename, "wb");
211 |    if (f) {
212 |       va_list v;
213 |       va_start(v, fmt);
214 |       writefv(f, fmt, v);
215 |       va_end(v);
216 |       write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad,expand_mono);
217 |       fclose(f);
218 |    }
219 |    return f != NULL;
220 | }
221 | 
222 | int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data)
223 | {
224 |    int pad = (-x*3) & 3;
225 |    return outfile(filename,-1,-1,x,y,comp,1,(void *) data,0,pad,
226 |            "11 4 22 4" "4 44 22 444444",
227 |            'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40,  // file header
228 |             40, x,y, 1,24, 0,0,0,0,0,0);             // bitmap header
229 | }
230 | 
231 | int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data)
232 | {
233 |    int has_alpha = (comp == 2 || comp == 4);
234 |    int colorbytes = has_alpha ? comp-1 : comp;
235 |    int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3
236 |    return outfile(filename, -1,-1, x, y, comp, 0, (void *) data, has_alpha, 0,
237 |                   "111 221 2222 11", 0,0,format, 0,0,0, 0,0,x,y, (colorbytes+has_alpha)*8, has_alpha*8);
238 | }
239 | 
240 | // *************************************************************************************************
241 | // Radiance RGBE HDR writer
242 | // by Baldur Karlsson
243 | #define stbiw__max(a, b)  ((a) > (b) ? (a) : (b))
244 | 
245 | void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
246 | {
247 |    int exponent;
248 |    float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));
249 | 
250 |    if (maxcomp < 1e-32) {
251 |       rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;
252 |    } else {
253 |       float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp;
254 | 
255 |       rgbe[0] = (unsigned char)(linear[0] * normalize);
256 |       rgbe[1] = (unsigned char)(linear[1] * normalize);
257 |       rgbe[2] = (unsigned char)(linear[2] * normalize);
258 |       rgbe[3] = (unsigned char)(exponent + 128);
259 |    }
260 | }
261 | 
262 | void stbiw__write_run_data(FILE *f, int length, unsigned char databyte)
263 | {
264 |    unsigned char lengthbyte = (unsigned char) (length+128);
265 |    STBIW_ASSERT(length+128 <= 255);
266 |    fwrite(&lengthbyte, 1, 1, f);
267 |    fwrite(&databyte, 1, 1, f);
268 | }
269 | 
270 | void stbiw__write_dump_data(FILE *f, int length, unsigned char *data)
271 | {
272 |    unsigned char lengthbyte = (unsigned char )(length & 0xff);
273 |    STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code
274 |    fwrite(&lengthbyte, 1, 1, f);
275 |    fwrite(data, length, 1, f);
276 | }
277 | 
278 | void stbiw__write_hdr_scanline(FILE *f, int width, int comp, unsigned char *scratch, const float *scanline)
279 | {
280 |    unsigned char scanlineheader[4] = { 2, 2, 0, 0 };
281 |    unsigned char rgbe[4];
282 |    float linear[3] = {0};
283 |    int x;
284 | 
285 |    scanlineheader[2] = (width&0xff00)>>8;
286 |    scanlineheader[3] = (width&0x00ff);
287 | 
288 |    /* skip RLE for images too small or large */
289 |    if (width < 8 || width >= 32768) {
290 |       for (x=0; x < width; x++) {
291 |          switch (comp) {
292 |             case 4: /* fallthrough */
293 |             case 3: linear[2] = scanline[x*comp + 2];
294 |                     linear[1] = scanline[x*comp + 1];
295 |                     linear[0] = scanline[x*comp + 0];
296 |                     break;
297 |             case 2: /* fallthrough */
298 |             case 1: linear[0] = linear[1] = linear[2] = scanline[x*comp + 0];
299 |                     break;
300 |          }
301 |          stbiw__linear_to_rgbe(rgbe, linear);
302 |          fwrite(rgbe, 4, 1, f);
303 |       }
304 |    } else {
305 |       int c,r;
306 |       /* encode into scratch buffer */
307 |       for (x=0; x < width; x++) {
308 |          switch(comp) {
309 |             case 4: /* fallthrough */
310 |             case 3: linear[2] = scanline[x*comp + 2];
311 |                     linear[1] = scanline[x*comp + 1];
312 |                     linear[0] = scanline[x*comp + 0];
313 |                     break;
314 |             case 2: /* fallthrough */
315 |             case 1: linear[0] = linear[1] = linear[2] = scanline[x*comp + 0];
316 |                     break;
317 |          }
318 |          stbiw__linear_to_rgbe(rgbe, linear);
319 |          scratch[x + width*0] = rgbe[0];
320 |          scratch[x + width*1] = rgbe[1];
321 |          scratch[x + width*2] = rgbe[2];
322 |          scratch[x + width*3] = rgbe[3];
323 |       }
324 | 
325 |       fwrite(scanlineheader, 4, 1, f);
326 | 
327 |       /* RLE each component separately */
328 |       for (c=0; c < 4; c++) {
329 |          unsigned char *comp = &scratch[width*c];
330 | 
331 |          x = 0;
332 |          while (x < width) {
333 |             // find first run
334 |             r = x;
335 |             while (r+2 < width) {
336 |                if (comp[r] == comp[r+1] && comp[r] == comp[r+2])
337 |                   break;
338 |                ++r;
339 |             }
340 |             if (r+2 >= width)
341 |                r = width;
342 |             // dump up to first run
343 |             while (x < r) {
344 |                int len = r-x;
345 |                if (len > 128) len = 128;
346 |                stbiw__write_dump_data(f, len, &comp[x]);
347 |                x += len;
348 |             }
349 |             // if there's a run, output it
350 |             if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd
351 |                // find next byte after run
352 |                while (r < width && comp[r] == comp[x])
353 |                   ++r;
354 |                // output run up to r
355 |                while (x < r) {
356 |                   int len = r-x;
357 |                   if (len > 127) len = 127;
358 |                   stbiw__write_run_data(f, len, comp[x]);
359 |                   x += len;
360 |                }
361 |             }
362 |          }
363 |       }
364 |    }
365 | }
366 | 
367 | int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
368 | {
369 |    int i;
370 |    FILE *f;
371 |    if (y <= 0 || x <= 0 || data == NULL) return 0;
372 |    f = fopen(filename, "wb");
373 |    if (f) {
374 |       /* Each component is stored separately. Allocate scratch space for full output scanline. */
375 |       unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4);
376 |       fprintf(f, "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"      );
377 |       fprintf(f, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n"                 , y, x);
378 |       for(i=0; i < y; i++)
379 |          stbiw__write_hdr_scanline(f, x, comp, scratch, data + comp*i*x);
380 |       STBIW_FREE(scratch);
381 |       fclose(f);
382 |    }
383 |    return f != NULL;
384 | }
385 | 
386 | /////////////////////////////////////////////////////////
387 | // PNG
388 | 
389 | // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size()
390 | #define stbiw__sbraw(a) ((int *) (a) - 2)
391 | #define stbiw__sbm(a)   stbiw__sbraw(a)[0]
392 | #define stbiw__sbn(a)   stbiw__sbraw(a)[1]
393 | 
394 | #define stbiw__sbneedgrow(a,n)  ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a))
395 | #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0)
396 | #define stbiw__sbgrow(a,n)  stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a)))
397 | 
398 | #define stbiw__sbpush(a, v)      (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v))
399 | #define stbiw__sbcount(a)        ((a) ? stbiw__sbn(a) : 0)
400 | #define stbiw__sbfree(a)         ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0)
401 | 
402 | static void *stbiw__sbgrowf(void **arr, int increment, int itemsize)
403 | {
404 |    int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1;
405 |    void *p = STBIW_REALLOC(*arr ? stbiw__sbraw(*arr) : 0, itemsize * m + sizeof(int)*2);
406 |    STBIW_ASSERT(p);
407 |    if (p) {
408 |       if (!*arr) ((int *) p)[1] = 0;
409 |       *arr = (void *) ((int *) p + 2);
410 |       stbiw__sbm(*arr) = m;
411 |    }
412 |    return *arr;
413 | }
414 | 
415 | static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount)
416 | {
417 |    while (*bitcount >= 8) {
418 |       stbiw__sbpush(data, (unsigned char) *bitbuffer);
419 |       *bitbuffer >>= 8;
420 |       *bitcount -= 8;
421 |    }
422 |    return data;
423 | }
424 | 
425 | static int stbiw__zlib_bitrev(int code, int codebits)
426 | {
427 |    int res=0;
428 |    while (codebits--) {
429 |       res = (res << 1) | (code & 1);
430 |       code >>= 1;
431 |    }
432 |    return res;
433 | }
434 | 
435 | static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit)
436 | {
437 |    int i;
438 |    for (i=0; i < limit && i < 258; ++i)
439 |       if (a[i] != b[i]) break;
440 |    return i;
441 | }
442 | 
443 | static unsigned int stbiw__zhash(unsigned char *data)
444 | {
445 |    stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);
446 |    hash ^= hash << 3;
447 |    hash += hash >> 5;
448 |    hash ^= hash << 4;
449 |    hash += hash >> 17;
450 |    hash ^= hash << 25;
451 |    hash += hash >> 6;
452 |    return hash;
453 | }
454 | 
455 | #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))
456 | #define stbiw__zlib_add(code,codebits) \
457 |       (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())
458 | #define stbiw__zlib_huffa(b,c)  stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c)
459 | // default huffman tables
460 | #define stbiw__zlib_huff1(n)  stbiw__zlib_huffa(0x30 + (n), 8)
461 | #define stbiw__zlib_huff2(n)  stbiw__zlib_huffa(0x190 + (n)-144, 9)
462 | #define stbiw__zlib_huff3(n)  stbiw__zlib_huffa(0 + (n)-256,7)
463 | #define stbiw__zlib_huff4(n)  stbiw__zlib_huffa(0xc0 + (n)-280,8)
464 | #define stbiw__zlib_huff(n)  ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))
465 | #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))
466 | 
467 | #define stbiw__ZHASH   16384
468 | 
469 | unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality)
470 | {
471 |    static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 };
472 |    static unsigned char  lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,  5,  5,  5,  0 };
473 |    static unsigned short distc[]   = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 };
474 |    static unsigned char  disteb[]  = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
475 |    unsigned int bitbuf=0;
476 |    int i,j, bitcount=0;
477 |    unsigned char *out = NULL;
478 |    unsigned char **hash_table[stbiw__ZHASH]; // 64KB on the stack!
479 |    if (quality < 5) quality = 5;
480 | 
481 |    stbiw__sbpush(out, 0x78);   // DEFLATE 32K window
482 |    stbiw__sbpush(out, 0x5e);   // FLEVEL = 1
483 |    stbiw__zlib_add(1,1);  // BFINAL = 1
484 |    stbiw__zlib_add(1,2);  // BTYPE = 1 -- fixed huffman
485 | 
486 |    for (i=0; i < stbiw__ZHASH; ++i)
487 |       hash_table[i] = NULL;
488 | 
489 |    i=0;
490 |    while (i < data_len-3) {
491 |       // hash next 3 bytes of data to be compressed
492 |       int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3;
493 |       unsigned char *bestloc = 0;
494 |       unsigned char **hlist = hash_table[h];
495 |       int n = stbiw__sbcount(hlist);
496 |       for (j=0; j < n; ++j) {
497 |          if (hlist[j]-data > i-32768) { // if entry lies within window
498 |             int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i);
499 |             if (d >= best) best=d,bestloc=hlist[j];
500 |          }
501 |       }
502 |       // when hash table entry is too long, delete half the entries
503 |       if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) {
504 |          STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality);
505 |          stbiw__sbn(hash_table[h]) = quality;
506 |       }
507 |       stbiw__sbpush(hash_table[h],data+i);
508 | 
509 |       if (bestloc) {
510 |          // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal
511 |          h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1);
512 |          hlist = hash_table[h];
513 |          n = stbiw__sbcount(hlist);
514 |          for (j=0; j < n; ++j) {
515 |             if (hlist[j]-data > i-32767) {
516 |                int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1);
517 |                if (e > best) { // if next match is better, bail on current match
518 |                   bestloc = NULL;
519 |                   break;
520 |                }
521 |             }
522 |          }
523 |       }
524 | 
525 |       if (bestloc) {
526 |          int d = (int) (data+i - bestloc); // distance back
527 |          STBIW_ASSERT(d <= 32767 && best <= 258);
528 |          for (j=0; best > lengthc[j+1]-1; ++j);
529 |          stbiw__zlib_huff(j+257);
530 |          if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]);
531 |          for (j=0; d > distc[j+1]-1; ++j);
532 |          stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5);
533 |          if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]);
534 |          i += best;
535 |       } else {
536 |          stbiw__zlib_huffb(data[i]);
537 |          ++i;
538 |       }
539 |    }
540 |    // write out final bytes
541 |    for (;i < data_len; ++i)
542 |       stbiw__zlib_huffb(data[i]);
543 |    stbiw__zlib_huff(256); // end of block
544 |    // pad with 0 bits to byte boundary
545 |    while (bitcount)
546 |       stbiw__zlib_add(0,1);
547 | 
548 |    for (i=0; i < stbiw__ZHASH; ++i)
549 |       (void) stbiw__sbfree(hash_table[i]);
550 | 
551 |    {
552 |       // compute adler32 on input
553 |       unsigned int i=0, s1=1, s2=0, blocklen = data_len % 5552;
554 |       int j=0;
555 |       while (j < data_len) {
556 |          for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1;
557 |          s1 %= 65521, s2 %= 65521;
558 |          j += blocklen;
559 |          blocklen = 5552;
560 |       }
561 |       stbiw__sbpush(out, (unsigned char) (s2 >> 8));
562 |       stbiw__sbpush(out, (unsigned char) s2);
563 |       stbiw__sbpush(out, (unsigned char) (s1 >> 8));
564 |       stbiw__sbpush(out, (unsigned char) s1);
565 |    }
566 |    *out_len = stbiw__sbn(out);
567 |    // make returned pointer freeable
568 |    STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);
569 |    return (unsigned char *) stbiw__sbraw(out);
570 | }
571 | 
572 | unsigned int stbiw__crc32(unsigned char *buffer, int len)
573 | {
574 |    static unsigned int crc_table[256];
575 |    unsigned int crc = ~0u;
576 |    int i,j;
577 |    if (crc_table[1] == 0)
578 |       for(i=0; i < 256; i++)
579 |          for (crc_table[i]=i, j=0; j < 8; ++j)
580 |             crc_table[i] = (crc_table[i] >> 1) ^ (crc_table[i] & 1 ? 0xedb88320 : 0);
581 |    for (i=0; i < len; ++i)
582 |       crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];
583 |    return ~crc;
584 | }
585 | 
586 | #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=(unsigned char)(a),(o)[1]=(unsigned char)(b),(o)[2]=(unsigned char)(c),(o)[3]=(unsigned char)(d),(o)+=4)
587 | #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v));
588 | #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3])
589 | 
590 | static void stbiw__wpcrc(unsigned char **data, int len)
591 | {
592 |    unsigned int crc = stbiw__crc32(*data - len - 4, len+4);
593 |    stbiw__wp32(*data, crc);
594 | }
595 | 
596 | static unsigned char stbiw__paeth(int a, int b, int c)
597 | {
598 |    int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c);
599 |    if (pa <= pb && pa <= pc) return (unsigned char) a;
600 |    if (pb <= pc) return (unsigned char) b;
601 |    return (unsigned char) c;
602 | }
603 | 
604 | unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len)
605 | {
606 |    int ctype[5] = { -1, 0, 4, 2, 6 };
607 |    unsigned char sig[8] = { 137,80,78,71,13,10,26,10 };
608 |    unsigned char *out,*o, *filt, *zlib;
609 |    signed char *line_buffer;
610 |    int i,j,k,p,zlen;
611 | 
612 |    if (stride_bytes == 0)
613 |       stride_bytes = x * n;
614 | 
615 |    filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0;
616 |    line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; }
617 |    for (j=0; j < y; ++j) {
618 |       static int mapping[] = { 0,1,2,3,4 };
619 |       static int firstmap[] = { 0,1,0,5,6 };
620 |       int *mymap = j ? mapping : firstmap;
621 |       int best = 0, bestval = 0x7fffffff;
622 |       for (p=0; p < 2; ++p) {
623 |          for (k= p?best:0; k < 5; ++k) {
624 |             int type = mymap[k],est=0;
625 |             unsigned char *z = pixels + stride_bytes*j;
626 |             for (i=0; i < n; ++i)
627 |                switch (type) {
628 |                   case 0: line_buffer[i] = z[i]; break;
629 |                   case 1: line_buffer[i] = z[i]; break;
630 |                   case 2: line_buffer[i] = z[i] - z[i-stride_bytes]; break;
631 |                   case 3: line_buffer[i] = z[i] - (z[i-stride_bytes]>>1); break;
632 |                   case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-stride_bytes],0)); break;
633 |                   case 5: line_buffer[i] = z[i]; break;
634 |                   case 6: line_buffer[i] = z[i]; break;
635 |                }
636 |             for (i=n; i < x*n; ++i) {
637 |                switch (type) {
638 |                   case 0: line_buffer[i] = z[i]; break;
639 |                   case 1: line_buffer[i] = z[i] - z[i-n]; break;
640 |                   case 2: line_buffer[i] = z[i] - z[i-stride_bytes]; break;
641 |                   case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-stride_bytes])>>1); break;
642 |                   case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-stride_bytes], z[i-stride_bytes-n]); break;
643 |                   case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break;
644 |                   case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break;
645 |                }
646 |             }
647 |             if (p) break;
648 |             for (i=0; i < x*n; ++i)
649 |                est += abs((signed char) line_buffer[i]);
650 |             if (est < bestval) { bestval = est; best = k; }
651 |          }
652 |       }
653 |       // when we get here, best contains the filter type, and line_buffer contains the data
654 |       filt[j*(x*n+1)] = (unsigned char) best;
655 |       STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n);
656 |    }
657 |    STBIW_FREE(line_buffer);
658 |    zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, 8); // increase 8 to get smaller but use more memory
659 |    STBIW_FREE(filt);
660 |    if (!zlib) return 0;
661 | 
662 |    // each tag requires 12 bytes of overhead
663 |    out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12);
664 |    if (!out) return 0;
665 |    *out_len = 8 + 12+13 + 12+zlen + 12;
666 | 
667 |    o=out;
668 |    STBIW_MEMMOVE(o,sig,8); o+= 8;
669 |    stbiw__wp32(o, 13); // header length
670 |    stbiw__wptag(o, "IHDR");
671 |    stbiw__wp32(o, x);
672 |    stbiw__wp32(o, y);
673 |    *o++ = 8;
674 |    *o++ = (unsigned char) ctype[n];
675 |    *o++ = 0;
676 |    *o++ = 0;
677 |    *o++ = 0;
678 |    stbiw__wpcrc(&o,13);
679 | 
680 |    stbiw__wp32(o, zlen);
681 |    stbiw__wptag(o, "IDAT");
682 |    STBIW_MEMMOVE(o, zlib, zlen);
683 |    o += zlen;
684 |    STBIW_FREE(zlib);
685 |    stbiw__wpcrc(&o, zlen);
686 | 
687 |    stbiw__wp32(o,0);
688 |    stbiw__wptag(o, "IEND");
689 |    stbiw__wpcrc(&o,0);
690 | 
691 |    STBIW_ASSERT(o == out + *out_len);
692 | 
693 |    return out;
694 | }
695 | 
696 | int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes)
697 | {
698 |    FILE *f;
699 |    int len;
700 |    unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len);
701 |    if (!png) return 0;
702 |    f = fopen(filename, "wb");
703 |    if (!f) { STBIW_FREE(png); return 0; }
704 |    fwrite(png, 1, len, f);
705 |    fclose(f);
706 |    STBIW_FREE(png);
707 |    return 1;
708 | }
709 | #endif // STB_IMAGE_WRITE_IMPLEMENTATION
710 | 
711 | /* Revision history
712 |       0.98 (2015-04-08)
713 |              added STBIW_MALLOC, STBIW_ASSERT etc
714 |       0.97 (2015-01-18)
715 |              fixed HDR asserts, rewrote HDR rle logic
716 |       0.96 (2015-01-17)
717 |              add HDR output
718 |              fix monochrome BMP
719 |       0.95 (2014-08-17)
720 | 		       add monochrome TGA output
721 |       0.94 (2014-05-31)
722 |              rename private functions to avoid conflicts with stb_image.h
723 |       0.93 (2014-05-27)
724 |              warning fixes
725 |       0.92 (2010-08-01)
726 |              casts to unsigned char to fix warnings
727 |       0.91 (2010-07-17)
728 |              first public release
729 |       0.90   first internal release
730 | */
731 | 


--------------------------------------------------------------------------------