├── Makefile ├── README.md ├── cnn.h ├── mnist.c ├── bnn.c ├── rnn.c └── cnn.c /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | RM=rm -f 4 | CC=cc -O -Wall -Werror 5 | CURL=curl 6 | GZIP=gzip 7 | 8 | LIBS=-lm 9 | 10 | DATADIR=./data 11 | MNIST_FILES= \ 12 | $(DATADIR)/train-images-idx3-ubyte \ 13 | $(DATADIR)/train-labels-idx1-ubyte \ 14 | $(DATADIR)/t10k-images-idx3-ubyte \ 15 | $(DATADIR)/t10k-labels-idx1-ubyte 16 | 17 | all: test_rnn 18 | 19 | clean: 20 | -$(RM) ./bnn ./mnist ./rnn *.o 21 | 22 | get_mnist: 23 | -mkdir ./data 24 | -$(CURL) http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz | \ 25 | $(GZIP) -dc > ./data/train-images-idx3-ubyte 26 | -$(CURL) http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz | \ 27 | $(GZIP) -dc > ./data/train-labels-idx1-ubyte 28 | -$(CURL) http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz | \ 29 | $(GZIP) -dc > ./data/t10k-images-idx3-ubyte 30 | -$(CURL) http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz | \ 31 | $(GZIP) -dc > ./data/t10k-labels-idx1-ubyte 32 | 33 | test_bnn: ./bnn 34 | ./bnn 35 | 36 | test_mnist: ./mnist $(MNIST_FILES) 37 | ./mnist $(MNIST_FILES) 38 | 39 | test_rnn: ./rnn 40 | ./rnn 41 | 42 | ./bnn: bnn.c 43 | $(CC) -o $@ $^ $(LIBS) 44 | 45 | ./mnist: mnist.c cnn.c 46 | $(CC) -o $@ $^ $(LIBS) 47 | 48 | ./rnn: rnn.c 49 | $(CC) -o $@ $^ $(LIBS) 50 | 51 | mnist.c: cnn.h 52 | cnn.c: cnn.h 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Convolutional Neural Network in C 2 | 3 | * Plain C99. 4 | * Simple. 5 | * Straightforward. 6 | * For educational purposes. 7 | 8 | ## `bnn.c` 9 | 10 | * Primitive SGD. 11 | * Only one type of layer (sigmoid). 12 | * No minibatch, no dropout, etc. 13 | 14 | ### Exercises 15 | 16 | * `$ cc -o bnn bnn.c -lm` 17 | * Increase the number of epochs and see how the error rate drops. 18 | * Try different learning rates. 19 | * Try different functions to learn. 20 | 21 | ## `cnn.c` 22 | 23 | * Simple SGD + Minibatch. 24 | * Three types of layers (input, convolutional, fully-connected). 25 | * ReLU for conv layers. 26 | * Tanh for non-last fc layers. 27 | * Softmax for the output (last) layer. 28 | 29 | ### Exercises 30 | 31 | * Obtain the MNIST database from http://yann.lecun.com/exdb/mnist/ 32 | * Compile and run `mnist.c`. 33 | * Set the batch size to 1 (no minibatch) and see the results. 34 | * Try changing the last layer from softmax to tanh. 35 | * Change the network configurations and see how the accuracy changes. 36 | 37 | ## `rnn.c` 38 | 39 | * Stateful + Simple SGD + Minibatch. 40 | * Only one type of layer (Truncated BPTT). 41 | * No gate. 42 | 43 | ## What I (re)discovered through this (re)implementation. 44 | 45 | * Use a proper learning rate. 46 | * Use minibatch training. 47 | * Use Softmax for the output layer. 48 | * Use Tanh/ReLU. 49 | * Choose the initial weight distribution wisely. 50 | * Feed the same data multiple times in a random order. 51 | * Memorize past outputs/errors for RNN. 52 | 53 | ### What I further learned... 54 | 55 | In addition to the usual ML tips (more data is better, 56 | balanced data is better, data prep. matters, etc.), 57 | I learned the following when you design your own NN model: 58 | 59 | * Encoding/decoding matters. 60 | * Loss function is super important. It can make or break the whole project. 61 | * Learning rate / scheduler is also important. 62 | (But today it's getting easier with Adam, etc.) 63 | * Do not mix different activation functions at the output layer. 64 | -------------------------------------------------------------------------------- /cnn.h: -------------------------------------------------------------------------------- 1 | /* 2 | cnn.h 3 | Convolutional Neural Network in C. 4 | */ 5 | 6 | 7 | /* LayerType 8 | */ 9 | typedef enum _LayerType { 10 | LAYER_INPUT = 0, 11 | LAYER_FULL, 12 | LAYER_CONV 13 | } LayerType; 14 | 15 | 16 | /* Layer 17 | */ 18 | typedef struct _Layer { 19 | 20 | int lid; /* Layer ID */ 21 | struct _Layer* lprev; /* Previous Layer */ 22 | struct _Layer* lnext; /* Next Layer */ 23 | 24 | int depth, width, height; /* Shape */ 25 | 26 | int nnodes; /* Num. of Nodes */ 27 | double* outputs; /* Node Outputs */ 28 | double* gradients; /* Node Gradients */ 29 | double* errors; /* Node Errors */ 30 | 31 | int nbiases; /* Num. of Biases */ 32 | double* biases; /* Biases (trained) */ 33 | double* u_biases; /* Bias updates */ 34 | 35 | int nweights; /* Num. of Weights */ 36 | double* weights; /* Weights (trained) */ 37 | double* u_weights; /* Weight updates */ 38 | 39 | LayerType ltype; /* Layer type */ 40 | union { 41 | /* Full */ 42 | struct { 43 | } full; 44 | 45 | /* Conv */ 46 | struct { 47 | int kernsize; /* kernel size (>0) */ 48 | int padding; /* padding size */ 49 | int stride; /* stride (>0) */ 50 | } conv; 51 | }; 52 | 53 | } Layer; 54 | 55 | /* Layer_create_input(depth, width, height) 56 | Creates an input Layer with size (depth x weight x height). 57 | */ 58 | Layer* Layer_create_input( 59 | int depth, int width, int height); 60 | 61 | /* Layer_create_full(lprev, nnodes, std) 62 | Creates a fully-connected Layer. 63 | */ 64 | Layer* Layer_create_full( 65 | Layer* lprev, int nnodes, double std); 66 | 67 | /* Layer_create_conv(lprev, depth, width, height, kernsize, padding, stride, std) 68 | Creates a convolutional Layer. 69 | */ 70 | Layer* Layer_create_conv( 71 | Layer* lprev, int depth, int width, int height, 72 | int kernsize, int padding, int stride, double std); 73 | 74 | /* Layer_destroy(self) 75 | Releases the memory. 76 | */ 77 | void Layer_destroy(Layer* self); 78 | 79 | /* Layer_dump(self, fp) 80 | Shows the debug output. 81 | */ 82 | void Layer_dump(const Layer* self, FILE* fp); 83 | 84 | /* Layer_setInputs(self, values) 85 | Sets the input values. 86 | */ 87 | void Layer_setInputs(Layer* self, const double* values); 88 | 89 | /* Layer_getOutputs(self, outputs) 90 | Gets the output values. 91 | */ 92 | void Layer_getOutputs(const Layer* self, double* outputs); 93 | 94 | /* Layer_getErrorTotal(self) 95 | Gets the error total. 96 | */ 97 | double Layer_getErrorTotal(const Layer* self); 98 | 99 | /* Layer_learnOutputs(self, values) 100 | Learns the output values. 101 | */ 102 | void Layer_learnOutputs(Layer* self, const double* values); 103 | 104 | /* Layer_update(self, rate) 105 | Updates the weights. 106 | */ 107 | void Layer_update(Layer* self, double rate); 108 | -------------------------------------------------------------------------------- /mnist.c: -------------------------------------------------------------------------------- 1 | /* 2 | mnist.c 3 | 4 | Usage: 5 | $ ./mnist train-images train-labels test-images test-labels 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "cnn.h" 15 | 16 | 17 | /* IdxFile 18 | */ 19 | typedef struct _IdxFile 20 | { 21 | int ndims; 22 | uint32_t* dims; 23 | uint8_t* data; 24 | } IdxFile; 25 | 26 | #define DEBUG_IDXFILE 0 27 | 28 | /* IdxFile_read(fp) 29 | Reads all the data from given fp. 30 | */ 31 | IdxFile* IdxFile_read(FILE* fp) 32 | { 33 | /* Read the file header. */ 34 | struct { 35 | uint16_t magic; 36 | uint8_t type; 37 | uint8_t ndims; 38 | /* big endian */ 39 | } header; 40 | if (fread(&header, sizeof(header), 1, fp) != 1) return NULL; 41 | #if DEBUG_IDXFILE 42 | fprintf(stderr, "IdxFile_read: magic=%x, type=%x, ndims=%u\n", 43 | header.magic, header.type, header.ndims); 44 | #endif 45 | if (header.magic != 0) return NULL; 46 | if (header.type != 0x08) return NULL; 47 | if (header.ndims < 1) return NULL; 48 | 49 | /* Read the dimensions. */ 50 | IdxFile* self = (IdxFile*)calloc(1, sizeof(IdxFile)); 51 | if (self == NULL) return NULL; 52 | self->ndims = header.ndims; 53 | self->dims = (uint32_t*)calloc(self->ndims, sizeof(uint32_t)); 54 | if (self->dims == NULL) return NULL; 55 | 56 | if (fread(self->dims, sizeof(uint32_t), self->ndims, fp) == self->ndims) { 57 | uint32_t nbytes = sizeof(uint8_t); 58 | for (int i = 0; i < self->ndims; i++) { 59 | /* Fix the byte order. */ 60 | uint32_t size = be32toh(self->dims[i]); 61 | #if DEBUG_IDXFILE 62 | fprintf(stderr, "IdxFile_read: size[%d]=%u\n", i, size); 63 | #endif 64 | nbytes *= size; 65 | self->dims[i] = size; 66 | } 67 | /* Read the data. */ 68 | self->data = (uint8_t*) malloc(nbytes); 69 | if (self->data != NULL) { 70 | fread(self->data, sizeof(uint8_t), nbytes, fp); 71 | #if DEBUG_IDXFILE 72 | fprintf(stderr, "IdxFile_read: read: %lu bytes\n", n); 73 | #endif 74 | } 75 | } 76 | 77 | return self; 78 | } 79 | 80 | /* IdxFile_destroy(self) 81 | Release the memory. 82 | */ 83 | void IdxFile_destroy(IdxFile* self) 84 | { 85 | assert (self != NULL); 86 | if (self->dims != NULL) { 87 | free(self->dims); 88 | self->dims = NULL; 89 | } 90 | if (self->data != NULL) { 91 | free(self->data); 92 | self->data = NULL; 93 | } 94 | free(self); 95 | } 96 | 97 | /* IdxFile_get1(self, i) 98 | Get the i-th record of the Idx1 file. (uint8_t) 99 | */ 100 | uint8_t IdxFile_get1(IdxFile* self, int i) 101 | { 102 | assert (self != NULL); 103 | assert (self->ndims == 1); 104 | assert (i < self->dims[0]); 105 | return self->data[i]; 106 | } 107 | 108 | /* IdxFile_get3(self, i, out) 109 | Get the i-th record of the Idx3 file. (matrix of uint8_t) 110 | */ 111 | void IdxFile_get3(IdxFile* self, int i, uint8_t* out) 112 | { 113 | assert (self != NULL); 114 | assert (self->ndims == 3); 115 | assert (i < self->dims[0]); 116 | size_t n = self->dims[1] * self->dims[2]; 117 | memcpy(out, &self->data[i*n], n); 118 | } 119 | 120 | 121 | /* main */ 122 | int main(int argc, char* argv[]) 123 | { 124 | /* argv[1] = train images */ 125 | /* argv[2] = train labels */ 126 | /* argv[3] = test images */ 127 | /* argv[4] = test labels */ 128 | if (argc < 4) return 100; 129 | 130 | /* Use a fixed random seed for debugging. */ 131 | srand(0); 132 | /* Initialize layers. */ 133 | /* Input layer - 1x28x28. */ 134 | Layer* linput = Layer_create_input(1, 28, 28); 135 | /* Conv1 layer - 16x14x14, 3x3 conv, padding=1, stride=2. */ 136 | /* (14-1)*2+3 < 28+1*2 */ 137 | Layer* lconv1 = Layer_create_conv(linput, 16, 14, 14, 3, 1, 2, 0.1); 138 | /* Conv2 layer - 32x7x7, 3x3 conv, padding=1, stride=2. */ 139 | /* (7-1)*2+3 < 14+1*2 */ 140 | Layer* lconv2 = Layer_create_conv(lconv1, 32, 7, 7, 3, 1, 2, 0.1); 141 | /* FC1 layer - 200 nodes. */ 142 | Layer* lfull1 = Layer_create_full(lconv2, 200, 0.1); 143 | /* FC2 layer - 200 nodes. */ 144 | Layer* lfull2 = Layer_create_full(lfull1, 200, 0.1); 145 | /* Output layer - 10 nodes. */ 146 | Layer* loutput = Layer_create_full(lfull2, 10, 0.1); 147 | 148 | /* Read the training images & labels. */ 149 | IdxFile* images_train = NULL; 150 | { 151 | FILE* fp = fopen(argv[1], "rb"); 152 | if (fp == NULL) return 111; 153 | images_train = IdxFile_read(fp); 154 | if (images_train == NULL) return 111; 155 | fclose(fp); 156 | } 157 | IdxFile* labels_train = NULL; 158 | { 159 | FILE* fp = fopen(argv[2], "rb"); 160 | if (fp == NULL) return 111; 161 | labels_train = IdxFile_read(fp); 162 | if (labels_train == NULL) return 111; 163 | fclose(fp); 164 | } 165 | 166 | fprintf(stderr, "training...\n"); 167 | double rate = 0.1; 168 | double etotal = 0; 169 | int nepoch = 10; 170 | int batch_size = 32; 171 | int train_size = images_train->dims[0]; 172 | for (int i = 0; i < nepoch * train_size; i++) { 173 | /* Pick a random sample from the training data */ 174 | uint8_t img[28*28]; 175 | double x[28*28]; 176 | double y[10]; 177 | int index = rand() % train_size; 178 | IdxFile_get3(images_train, index, img); 179 | for (int j = 0; j < 28*28; j++) { 180 | x[j] = img[j]/255.0; 181 | } 182 | Layer_setInputs(linput, x); 183 | Layer_getOutputs(loutput, y); 184 | int label = IdxFile_get1(labels_train, index); 185 | #if 0 186 | fprintf(stderr, "label=%u, y=[", label); 187 | for (int j = 0; j < 10; j++) { 188 | fprintf(stderr, " %.3f", y[j]); 189 | } 190 | fprintf(stderr, "]\n"); 191 | #endif 192 | for (int j = 0; j < 10; j++) { 193 | y[j] = (j == label)? 1 : 0; 194 | } 195 | Layer_learnOutputs(loutput, y); 196 | etotal += Layer_getErrorTotal(loutput); 197 | if ((i % batch_size) == 0) { 198 | /* Minibatch: update the network for every n samples. */ 199 | Layer_update(loutput, rate/batch_size); 200 | } 201 | if ((i % 1000) == 0) { 202 | fprintf(stderr, "i=%d, error=%.4f\n", i, etotal/1000); 203 | etotal = 0; 204 | } 205 | } 206 | 207 | IdxFile_destroy(images_train); 208 | IdxFile_destroy(labels_train); 209 | 210 | /* Training finished. */ 211 | 212 | //Layer_dump(linput, stdout); 213 | //Layer_dump(lconv1, stdout); 214 | //Layer_dump(lconv2, stdout); 215 | //Layer_dump(lfull1, stdout); 216 | //Layer_dump(lfull2, stdout); 217 | //Layer_dump(loutput, stdout); 218 | 219 | /* Read the test images & labels. */ 220 | 221 | IdxFile* images_test = NULL; 222 | { 223 | FILE* fp = fopen(argv[3], "rb"); 224 | if (fp == NULL) return 111; 225 | images_test = IdxFile_read(fp); 226 | if (images_test == NULL) return 111; 227 | fclose(fp); 228 | } 229 | IdxFile* labels_test = NULL; 230 | { 231 | FILE* fp = fopen(argv[4], "rb"); 232 | if (fp == NULL) return 111; 233 | labels_test = IdxFile_read(fp); 234 | if (labels_test == NULL) return 111; 235 | fclose(fp); 236 | } 237 | 238 | fprintf(stderr, "testing...\n"); 239 | int ntests = images_test->dims[0]; 240 | int ncorrect = 0; 241 | for (int i = 0; i < ntests; i++) { 242 | uint8_t img[28*28]; 243 | double x[28*28]; 244 | double y[10]; 245 | IdxFile_get3(images_test, i, img); 246 | for (int j = 0; j < 28*28; j++) { 247 | x[j] = img[j]/255.0; 248 | } 249 | Layer_setInputs(linput, x); 250 | Layer_getOutputs(loutput, y); 251 | int label = IdxFile_get1(labels_test, i); 252 | /* Pick the most probable label. */ 253 | int mj = -1; 254 | for (int j = 0; j < 10; j++) { 255 | if (mj < 0 || y[mj] < y[j]) { 256 | mj = j; 257 | } 258 | } 259 | if (mj == label) { 260 | ncorrect++; 261 | } 262 | if ((i % 1000) == 0) { 263 | fprintf(stderr, "i=%d\n", i); 264 | } 265 | } 266 | fprintf(stderr, "ntests=%d, ncorrect=%d\n", ntests, ncorrect); 267 | 268 | IdxFile_destroy(images_test); 269 | IdxFile_destroy(labels_test); 270 | 271 | Layer_destroy(linput); 272 | Layer_destroy(lconv1); 273 | Layer_destroy(lconv2); 274 | Layer_destroy(lfull1); 275 | Layer_destroy(lfull2); 276 | Layer_destroy(loutput); 277 | 278 | return 0; 279 | } 280 | -------------------------------------------------------------------------------- /bnn.c: -------------------------------------------------------------------------------- 1 | /* 2 | bnn.c 3 | Basic Neural Network in C. 4 | 5 | $ cc -o bnn bnn.c -lm 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define DEBUG_LAYER 0 14 | 15 | /* f: function to learn */ 16 | static double f(double a, double b) 17 | { 18 | /* return a*b; */ 19 | return fabs(a-b); 20 | } 21 | 22 | 23 | /* Misc. functions 24 | */ 25 | 26 | /* rnd(): uniform random [0.0, 1.0] */ 27 | static inline double rnd() 28 | { 29 | return ((double)rand() / RAND_MAX); 30 | } 31 | 32 | /* nrnd(): normal random (std=1.0) */ 33 | static inline double nrnd() 34 | { 35 | return (rnd()+rnd()+rnd()+rnd()-2.0) * 1.724; /* std=1.0 */ 36 | } 37 | 38 | /* sigmoid(x): sigmoid function */ 39 | static inline double sigmoid(double x) 40 | { 41 | return 1.0 / (1.0 + exp(-x)); 42 | } 43 | /* sigmoid_d(y): sigmoid gradient */ 44 | static inline double sigmoid_g(double y) 45 | { 46 | return y * (1.0 - y); 47 | } 48 | 49 | 50 | /* Layer 51 | */ 52 | 53 | typedef struct _Layer { 54 | 55 | int lid; /* Layer ID */ 56 | struct _Layer* lprev; /* Previous Layer */ 57 | struct _Layer* lnext; /* Next Layer */ 58 | 59 | int nnodes; /* Num. of Nodes */ 60 | double* outputs; /* Node Outputs */ 61 | double* gradients; /* Node Gradients */ 62 | double* errors; /* Node Errors */ 63 | 64 | int nbiases; /* Num. of Biases */ 65 | double* biases; /* Biases (trained) */ 66 | double* u_biases; /* Bias Updates */ 67 | 68 | int nweights; /* Num. of Weights */ 69 | double* weights; /* Weights (trained) */ 70 | double* u_weights; /* Weight Updates */ 71 | 72 | } Layer; 73 | 74 | /* Layer_create(lprev, nnodes) 75 | Creates a Layer object. 76 | */ 77 | Layer* Layer_create(Layer* lprev, int nnodes) 78 | { 79 | Layer* self = (Layer*)calloc(1, sizeof(Layer)); 80 | if (self == NULL) return NULL; 81 | 82 | self->lprev = lprev; 83 | self->lnext = NULL; 84 | self->lid = 0; 85 | if (lprev != NULL) { 86 | assert (lprev->lnext == NULL); 87 | lprev->lnext = self; 88 | self->lid = lprev->lid+1; 89 | } 90 | 91 | self->nnodes = nnodes; 92 | self->outputs = (double*)calloc(self->nnodes, sizeof(double)); 93 | self->gradients = (double*)calloc(self->nnodes, sizeof(double)); 94 | self->errors = (double*)calloc(self->nnodes, sizeof(double)); 95 | 96 | if (lprev != NULL) { 97 | /* Fully connected */ 98 | self->nbiases = self->nnodes; 99 | self->biases = (double*)calloc(self->nbiases, sizeof(double)); 100 | self->u_biases = (double*)calloc(self->nbiases, sizeof(double)); 101 | for (int i = 0; i < self->nbiases; i++) { 102 | self->biases[i] = 0; 103 | } 104 | 105 | self->nweights = lprev->nnodes * self->nnodes; 106 | self->weights = (double*)calloc(self->nweights, sizeof(double)); 107 | self->u_weights = (double*)calloc(self->nweights, sizeof(double)); 108 | for (int i = 0; i < self->nweights; i++) { 109 | self->weights[i] = 0.1 * nrnd(); 110 | } 111 | } 112 | 113 | return self; 114 | } 115 | 116 | /* Layer_destroy(self) 117 | Releases the memory. 118 | */ 119 | void Layer_destroy(Layer* self) 120 | { 121 | assert (self != NULL); 122 | 123 | free(self->outputs); 124 | free(self->gradients); 125 | free(self->errors); 126 | 127 | if (self->biases != NULL) { 128 | free(self->biases); 129 | } 130 | if (self->u_biases != NULL) { 131 | free(self->u_biases); 132 | } 133 | if (self->weights != NULL) { 134 | free(self->weights); 135 | } 136 | if (self->u_weights != NULL) { 137 | free(self->u_weights); 138 | } 139 | 140 | free(self); 141 | } 142 | 143 | /* Layer_dump(self, fp) 144 | Shows the debug output. 145 | */ 146 | void Layer_dump(const Layer* self, FILE* fp) 147 | { 148 | assert (self != NULL); 149 | Layer* lprev = self->lprev; 150 | fprintf(fp, "Layer%d", self->lid); 151 | if (lprev != NULL) { 152 | fprintf(fp, " (<- Layer%d)", lprev->lid); 153 | } 154 | fprintf(fp, ": nodes=%d\n", self->nnodes); 155 | fprintf(fp, " outputs = ["); 156 | for (int i = 0; i < self->nnodes; i++) { 157 | fprintf(fp, " %.4f", self->outputs[i]); 158 | } 159 | fprintf(fp, "]\n"); 160 | 161 | if (self->biases != NULL) { 162 | fprintf(fp, " biases = ["); 163 | for (int i = 0; i < self->nbiases; i++) { 164 | fprintf(fp, " %.4f", self->biases[i]); 165 | } 166 | fprintf(fp, "]\n"); 167 | } 168 | if (self->weights != NULL) { 169 | fprintf(fp, " weights = ["); 170 | for (int i = 0; i < self->nweights; i++) { 171 | fprintf(fp, " %.4f", self->weights[i]); 172 | } 173 | fprintf(fp, "]\n"); 174 | } 175 | } 176 | 177 | /* Layer_feedForw(self) 178 | Performs feed forward updates. 179 | */ 180 | static void Layer_feedForw(Layer* self) 181 | { 182 | assert (self->lprev != NULL); 183 | Layer* lprev = self->lprev; 184 | 185 | int k = 0; 186 | for (int i = 0; i < self->nnodes; i++) { 187 | /* Y = f(W * X + B) */ 188 | double x = self->biases[i]; 189 | for (int j = 0; j < lprev->nnodes; j++) { 190 | x += (lprev->outputs[j] * self->weights[k++]); 191 | } 192 | double y = sigmoid(x); 193 | self->outputs[i] = y; 194 | /* Store the gradient at this point. */ 195 | self->gradients[i] = sigmoid_g(y); 196 | } 197 | 198 | #if DEBUG_LAYER 199 | fprintf(stderr, "Layer_feedForw(Layer%d):\n", self->lid); 200 | fprintf(stderr, " outputs = ["); 201 | for (int i = 0; i < self->nnodes; i++) { 202 | fprintf(stderr, " %.4f", self->outputs[i]); 203 | } 204 | fprintf(stderr, "]\n gradients = ["); 205 | for (int i = 0; i < self->nnodes; i++) { 206 | fprintf(stderr, " %.4f", self->gradients[i]); 207 | } 208 | fprintf(stderr, "]\n"); 209 | #endif 210 | } 211 | 212 | /* Layer_feedBack(self) 213 | Performs backpropagation. 214 | */ 215 | static void Layer_feedBack(Layer* self) 216 | { 217 | if (self->lprev == NULL) return; 218 | 219 | assert (self->lprev != NULL); 220 | Layer* lprev = self->lprev; 221 | 222 | /* Clear errors. */ 223 | for (int j = 0; j < lprev->nnodes; j++) { 224 | lprev->errors[j] = 0; 225 | } 226 | 227 | int k = 0; 228 | for (int i = 0; i < self->nnodes; i++) { 229 | /* Computer the weight/bias updates. */ 230 | double dnet = self->errors[i] * self->gradients[i]; 231 | for (int j = 0; j < lprev->nnodes; j++) { 232 | /* Propagate the errors to the previous layer. */ 233 | lprev->errors[j] += self->weights[k] * dnet; 234 | self->u_weights[k] += dnet * lprev->outputs[j]; 235 | k++; 236 | } 237 | self->u_biases[i] += dnet; 238 | } 239 | 240 | #if DEBUG_LAYER 241 | fprintf(stderr, "Layer_feedBack(Layer%d):\n", self->lid); 242 | for (int i = 0; i < self->nnodes; i++) { 243 | double dnet = self->errors[i] * self->gradients[i]; 244 | fprintf(stderr, " dnet = %.4f, dw = [", dnet); 245 | for (int j = 0; j < lprev->nnodes; j++) { 246 | double dw = dnet * lprev->outputs[j]; 247 | fprintf(stderr, " %.4f", dw); 248 | } 249 | fprintf(stderr, "]\n"); 250 | } 251 | #endif 252 | } 253 | 254 | /* Layer_setInputs(self, values) 255 | Sets the input values. 256 | */ 257 | void Layer_setInputs(Layer* self, const double* values) 258 | { 259 | assert (self != NULL); 260 | assert (self->lprev == NULL); 261 | 262 | #if DEBUG_LAYER 263 | fprintf(stderr, "Layer_setInputs(Layer%d): values = [", self->lid); 264 | for (int i = 0; i < self->nnodes; i++) { 265 | fprintf(stderr, " %.4f", values[i]); 266 | } 267 | fprintf(stderr, "]\n"); 268 | #endif 269 | 270 | /* Set the values as the outputs. */ 271 | for (int i = 0; i < self->nnodes; i++) { 272 | self->outputs[i] = values[i]; 273 | } 274 | 275 | /* Start feed forwarding. */ 276 | Layer* layer = self->lnext; 277 | while (layer != NULL) { 278 | Layer_feedForw(layer); 279 | layer = layer->lnext; 280 | } 281 | } 282 | 283 | /* Layer_getOutputs(self, outputs) 284 | Gets the output values. 285 | */ 286 | void Layer_getOutputs(const Layer* self, double* outputs) 287 | { 288 | assert (self != NULL); 289 | for (int i = 0; i < self->nnodes; i++) { 290 | outputs[i] = self->outputs[i]; 291 | } 292 | } 293 | 294 | /* Layer_getErrorTotal(self) 295 | Gets the error total. 296 | */ 297 | double Layer_getErrorTotal(const Layer* self) 298 | { 299 | assert (self != NULL); 300 | double total = 0; 301 | for (int i = 0; i < self->nnodes; i++) { 302 | double e = self->errors[i]; 303 | total += e*e; 304 | } 305 | return (total / self->nnodes); 306 | } 307 | 308 | /* Layer_learnOutputs(self, values) 309 | Learns the output values. 310 | */ 311 | void Layer_learnOutputs(Layer* self, const double* values) 312 | { 313 | assert (self != NULL); 314 | assert (self->lprev != NULL); 315 | for (int i = 0; i < self->nnodes; i++) { 316 | self->errors[i] = (self->outputs[i] - values[i]); 317 | } 318 | 319 | #if DEBUG_LAYER 320 | fprintf(stderr, "Layer_learnOutputs(Layer%d): errors = [", self->lid); 321 | for (int i = 0; i < self->nnodes; i++) { 322 | fprintf(stderr, " %.4f", self->errors[i]); 323 | } 324 | fprintf(stderr, "]\n"); 325 | #endif 326 | 327 | /* Start backpropagation. */ 328 | Layer* layer = self->lprev; 329 | while (layer != NULL) { 330 | Layer_feedBack(layer); 331 | layer = layer->lprev; 332 | } 333 | } 334 | 335 | /* Layer_update(self, rate) 336 | Updates the weights. 337 | */ 338 | void Layer_update(Layer* self, double rate) 339 | { 340 | #if DEBUG_LAYER 341 | fprintf(stderr, "Layer_update(Layer%d): rate = %.4f\n", self->lid, rate); 342 | #endif 343 | 344 | /* Update the bias and weights. */ 345 | if (self->biases != NULL) { 346 | for (int i = 0; i < self->nbiases; i++) { 347 | self->biases[i] -= rate * self->u_biases[i]; 348 | self->u_biases[i] = 0; 349 | } 350 | } 351 | if (self->weights != NULL) { 352 | for (int i = 0; i < self->nweights; i++) { 353 | self->weights[i] -= rate * self->u_weights[i]; 354 | self->u_weights[i] = 0; 355 | } 356 | } 357 | 358 | /* Update the previous layer. */ 359 | if (self->lprev != NULL) { 360 | Layer_update(self->lprev, rate); 361 | } 362 | } 363 | 364 | 365 | /* main */ 366 | int main(int argc, char* argv[]) 367 | { 368 | /* Use a fixed random seed for debugging. */ 369 | srand(0); 370 | /* Initialize layers. */ 371 | Layer* linput = Layer_create(NULL, 2); 372 | Layer* lhidden = Layer_create(linput, 3); 373 | Layer* loutput = Layer_create(lhidden, 1); 374 | Layer_dump(linput, stderr); 375 | Layer_dump(lhidden, stderr); 376 | Layer_dump(loutput, stderr); 377 | 378 | /* Run the network. */ 379 | double rate = 1.0; 380 | int nepochs = 10000; 381 | for (int i = 0; i < nepochs; i++) { 382 | double x[2]; 383 | double y[1]; 384 | double t[1]; 385 | x[0] = rnd(); 386 | x[1] = rnd(); 387 | t[0] = f(x[0], x[1]); 388 | Layer_setInputs(linput, x); 389 | Layer_getOutputs(loutput, y); 390 | Layer_learnOutputs(loutput, t); 391 | double etotal = Layer_getErrorTotal(loutput); 392 | fprintf(stderr, "i=%d, x=[%.4f, %.4f], y=[%.4f], t=[%.4f], etotal=%.4f\n", 393 | i, x[0], x[1], y[0], t[0], etotal); 394 | Layer_update(loutput, rate); 395 | } 396 | 397 | /* Dump the finished network. */ 398 | Layer_dump(linput, stdout); 399 | Layer_dump(lhidden, stdout); 400 | Layer_dump(loutput, stdout); 401 | 402 | Layer_destroy(linput); 403 | Layer_destroy(lhidden); 404 | Layer_destroy(loutput); 405 | return 0; 406 | } 407 | -------------------------------------------------------------------------------- /rnn.c: -------------------------------------------------------------------------------- 1 | /* 2 | rnn.c 3 | Recurrent Neural Network in C. 4 | 5 | $ cc -o rnn rnn.c -lm 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define DEBUG_LAYER 0 14 | 15 | 16 | /* f: input generator */ 17 | static int f(int i) 18 | { 19 | static int a[] = { 5, 9, 4, 0, 5, 9, 6, 3 }; 20 | return a[i % 8]; 21 | } 22 | /* g: function to learn */ 23 | static double g(int i) 24 | { 25 | return ((i % 8) == 4)? 1 : 0; 26 | } 27 | 28 | 29 | /* Misc. functions 30 | */ 31 | 32 | /* rnd(): uniform random [0.0, 1.0] */ 33 | static inline double rnd() 34 | { 35 | return ((double)rand() / RAND_MAX); 36 | } 37 | 38 | /* nrnd(): normal random (std=1.0) */ 39 | static inline double nrnd() 40 | { 41 | return (rnd()+rnd()+rnd()+rnd()-2.0) * 1.724; /* std=1.0 */ 42 | } 43 | 44 | #if 0 45 | /* tanh(x): hyperbolic tangent */ 46 | static inline double tanh(double x) 47 | { 48 | return 2.0 / (1.0 + exp(-2*x)) - 1.0; 49 | } 50 | #endif 51 | /* tanh_g(y): hyperbolic tangent gradient */ 52 | static inline double tanh_g(double y) 53 | { 54 | return 1.0 - y*y; 55 | } 56 | 57 | 58 | /* RNNLayer 59 | */ 60 | 61 | typedef struct _RNNLayer { 62 | 63 | int lid; /* Layer ID */ 64 | struct _RNNLayer* lprev; /* Previous Layer */ 65 | struct _RNNLayer* lnext; /* Next Layer */ 66 | 67 | int nnodes; /* Num. of Nodes */ 68 | int ntimes; /* Num. of Times */ 69 | 70 | /* array layout: [ v[t=0], v[t=-1], ..., v[t=-(ntimes-1)] ] */ 71 | double* outputs; /* Node Outputs */ 72 | double* errors; /* Node Errors */ 73 | double* temp; /* Node Hidden (temporary) */ 74 | 75 | int nxweights; /* Num. of XWeights */ 76 | double* xweights; /* XWeights (trained) */ 77 | double* u_xweights; /* XWeight Updates */ 78 | int nhweights; /* Num. of HWeights */ 79 | double* hweights; /* HWeights (trained) */ 80 | double* u_hweights; /* HWeight Updates */ 81 | 82 | int nbiases; /* Num. of Biases */ 83 | double* biases; /* Biases (trained) */ 84 | double* u_biases; /* Bias Updates */ 85 | 86 | } RNNLayer; 87 | 88 | /* RNNLayer_create(lprev, nnodes) 89 | Creates a RNNLayer object. 90 | */ 91 | RNNLayer* RNNLayer_create(RNNLayer* lprev, int nnodes, int ntimes) 92 | { 93 | RNNLayer* self = (RNNLayer*)calloc(1, sizeof(RNNLayer)); 94 | if (self == NULL) return NULL; 95 | 96 | self->lprev = lprev; 97 | self->lnext = NULL; 98 | self->lid = 0; 99 | if (lprev != NULL) { 100 | assert (lprev->lnext == NULL); 101 | lprev->lnext = self; 102 | self->lid = lprev->lid+1; 103 | } 104 | 105 | self->nnodes = nnodes; 106 | self->ntimes = ntimes; 107 | int n = self->nnodes * self->ntimes; 108 | self->outputs = (double*)calloc(n, sizeof(double)); 109 | self->errors = (double*)calloc(n, sizeof(double)); 110 | self->temp = (double*)calloc(self->nnodes, sizeof(double)); 111 | 112 | if (lprev != NULL) { 113 | /* Fully connected */ 114 | self->nxweights = lprev->nnodes * self->nnodes; 115 | self->xweights = (double*)calloc(self->nxweights, sizeof(double)); 116 | self->u_xweights = (double*)calloc(self->nxweights, sizeof(double)); 117 | for (int i = 0; i < self->nxweights; i++) { 118 | self->xweights[i] = 0.1 * nrnd(); 119 | } 120 | self->nhweights = self->nnodes * self->nnodes; 121 | self->hweights = (double*)calloc(self->nhweights, sizeof(double)); 122 | self->u_hweights = (double*)calloc(self->nhweights, sizeof(double)); 123 | for (int i = 0; i < self->nhweights; i++) { 124 | self->hweights[i] = 0.1 * nrnd(); 125 | } 126 | 127 | self->nbiases = self->nnodes; 128 | self->biases = (double*)calloc(self->nbiases, sizeof(double)); 129 | self->u_biases = (double*)calloc(self->nbiases, sizeof(double)); 130 | for (int i = 0; i < self->nbiases; i++) { 131 | self->biases[i] = 0; 132 | } 133 | } 134 | 135 | return self; 136 | } 137 | 138 | /* RNNLayer_destroy(self) 139 | Releases the memory. 140 | */ 141 | void RNNLayer_destroy(RNNLayer* self) 142 | { 143 | assert (self != NULL); 144 | 145 | free(self->temp); 146 | free(self->outputs); 147 | free(self->errors); 148 | 149 | if (self->xweights != NULL) { 150 | free(self->xweights); 151 | } 152 | if (self->u_xweights != NULL) { 153 | free(self->u_xweights); 154 | } 155 | if (self->hweights != NULL) { 156 | free(self->hweights); 157 | } 158 | if (self->u_hweights != NULL) { 159 | free(self->u_hweights); 160 | } 161 | 162 | if (self->biases != NULL) { 163 | free(self->biases); 164 | } 165 | if (self->u_biases != NULL) { 166 | free(self->u_biases); 167 | } 168 | 169 | free(self); 170 | } 171 | 172 | /* RNNLayer_dump(self, fp) 173 | Shows the debug output. 174 | */ 175 | void RNNLayer_dump(const RNNLayer* self, FILE* fp) 176 | { 177 | assert (self != NULL); 178 | RNNLayer* lprev = self->lprev; 179 | fprintf(fp, "RNNLayer%d", self->lid); 180 | if (lprev != NULL) { 181 | fprintf(fp, " (<- Layer%d)", lprev->lid); 182 | } 183 | fprintf(fp, ": nodes=%d\n", self->nnodes); 184 | 185 | if (self->xweights != NULL) { 186 | int k = 0; 187 | for (int i = 0; i < self->nnodes; i++) { 188 | fprintf(fp, " xweights(%d) = [", i); 189 | for (int j = 0; j < lprev->nnodes; j++) { 190 | fprintf(fp, " %.4f", self->xweights[k++]); 191 | } 192 | fprintf(fp, "]\n"); 193 | } 194 | assert (k == self->nxweights); 195 | } 196 | if (self->hweights != NULL) { 197 | int k = 0; 198 | for (int i = 0; i < self->nnodes; i++) { 199 | fprintf(fp, " hweights(%d) = [", i); 200 | for (int j = 0; j < self->nnodes; j++) { 201 | fprintf(fp, " %.4f", self->hweights[k++]); 202 | } 203 | fprintf(fp, "]\n"); 204 | } 205 | assert (k == self->nhweights); 206 | } 207 | 208 | if (self->biases != NULL) { 209 | fprintf(fp, " biases = ["); 210 | for (int i = 0; i < self->nbiases; i++) { 211 | fprintf(fp, " %.4f", self->biases[i]); 212 | } 213 | fprintf(fp, "]\n"); 214 | } 215 | 216 | { 217 | int k = 0; 218 | for (int t = 0; t < self->ntimes; t++) { 219 | fprintf(fp, " outputs(t=%d) = [", -t); 220 | for (int i = 0; i < self->nnodes; i++) { 221 | fprintf(fp, " %.4f", self->outputs[k++]); 222 | } 223 | fprintf(fp, "]\n"); 224 | } 225 | } 226 | fprintf(fp, "\n"); 227 | } 228 | 229 | /* RNNLayer_reset(self) 230 | Resets the hidden states. 231 | */ 232 | void RNNLayer_reset(RNNLayer* self) 233 | { 234 | assert (self != NULL); 235 | 236 | for (int i = 0; i < self->nnodes; i++) { 237 | self->outputs[i] = 0; 238 | } 239 | } 240 | 241 | 242 | /* RNNLayer_feedForw(self) 243 | Performs feed forward updates. 244 | */ 245 | static void RNNLayer_feedForw(RNNLayer* self) 246 | { 247 | assert (self->lprev != NULL); 248 | RNNLayer* lprev = self->lprev; 249 | 250 | /* Save the previous values. */ 251 | for (int t = self->ntimes-1; 0 < t; t--) { 252 | int idst = self->nnodes * t; 253 | int isrc = self->nnodes * (t-1); 254 | for (int i = 0; i < self->nnodes; i++) { 255 | self->outputs[idst + i] = self->outputs[isrc + i]; 256 | } 257 | } 258 | /* outputs[0..] will be replaced by the new values. */ 259 | 260 | int kx = 0, kh = 0; 261 | for (int i = 0; i < self->nnodes; i++) { 262 | /* H = f(Bh + Wx * X + Wh * H) */ 263 | double h = self->biases[i]; 264 | for (int j = 0; j < lprev->nnodes; j++) { 265 | h += (lprev->outputs[j] * self->xweights[kx++]); 266 | } 267 | for (int j = 0; j < self->nnodes; j++) { 268 | h += (self->outputs[j] * self->hweights[kh++]); 269 | } 270 | self->temp[i] = h; 271 | } 272 | assert (kx == self->nxweights); 273 | assert (kh == self->nhweights); 274 | for (int i = 0; i < self->nnodes; i++) { 275 | self->outputs[i] = tanh(self->temp[i]); 276 | } 277 | 278 | #if DEBUG_LAYER 279 | fprintf(stderr, "RNNLayer_feedForw(Layer%d):\n", self->lid); 280 | fprintf(stderr, " outputs = ["); 281 | for (int i = 0; i < self->nnodes; i++) { 282 | fprintf(stderr, " %.4f (%.4f)", self->outputs[i], self->temp[i]); 283 | } 284 | fprintf(stderr, "]\n"); 285 | #endif 286 | } 287 | 288 | /* RNNLayer_feedBack(self) 289 | Performs backpropagation. 290 | */ 291 | static void RNNLayer_feedBack(RNNLayer* self) 292 | { 293 | if (self->lprev == NULL) return; 294 | 295 | assert (self->lprev != NULL); 296 | RNNLayer* lprev = self->lprev; 297 | 298 | /* Clear errors. */ 299 | for (int j = 0; j < lprev->nnodes; j++) { 300 | lprev->errors[j] = 0; 301 | } 302 | 303 | for (int t = 0; t < self->ntimes; t++) { 304 | int kx = 0, kh = 0; 305 | int i0 = t * self->nnodes; 306 | int i1 = (t+1) * self->nnodes; 307 | int j0 = t * lprev->nnodes; 308 | for (int i = 0; i < self->nnodes; i++) { 309 | /* Computer the weight/bias updates. */ 310 | double y = self->outputs[i0+i]; 311 | double g = tanh_g(y); 312 | double dnet = self->errors[i0+i] * g; 313 | if ((t+1) < lprev->ntimes) { 314 | for (int j = 0; j < lprev->nnodes; j++) { 315 | /* Propagate the errors to the previous layer. */ 316 | lprev->errors[j0+j] += self->xweights[kx] * dnet; 317 | self->u_xweights[kx] += dnet * lprev->outputs[j0+j]; 318 | kx++; 319 | } 320 | } 321 | if ((t+1) < self->ntimes) { 322 | for (int j = 0; j < self->nnodes; j++) { 323 | self->errors[i1+j] += self->hweights[kh] * dnet; 324 | self->u_hweights[kh] += dnet * self->outputs[i1+j]; 325 | kh++; 326 | } 327 | } 328 | self->u_biases[i] += dnet; 329 | } 330 | if ((t+1) < lprev->ntimes) { 331 | assert (kx == self->nxweights); 332 | } 333 | if ((t+1) < self->ntimes) { 334 | assert (kh == self->nhweights); 335 | } 336 | } 337 | 338 | /* Save the previous values. */ 339 | for (int t = self->ntimes-1; 0 < t; t--) { 340 | int idst = self->nnodes * t; 341 | int isrc = self->nnodes * (t-1); 342 | for (int i = 0; i < self->nnodes; i++) { 343 | self->errors[idst + i] = self->errors[isrc + i]; 344 | } 345 | } 346 | /* errors[0..] will be replaced by the new values. */ 347 | 348 | #if DEBUG_LAYER 349 | fprintf(stderr, "RNNLayer_feedBack(Layer%d):\n", self->lid); 350 | for (int i = 0; i < self->nnodes; i++) { 351 | double y = self->outputs[i]; 352 | double g = tanh_g(y); 353 | double dnet = self->errors[i] * g; 354 | fprintf(stderr, " dnet = %.4f, dw = [", dnet); 355 | for (int j = 0; j < lprev->nnodes; j++) { 356 | double dw = dnet * lprev->outputs[j]; 357 | fprintf(stderr, " %.4f", dw); 358 | } 359 | fprintf(stderr, "]\n"); 360 | } 361 | #endif 362 | } 363 | 364 | 365 | /* RNNLayer_setInputs(self, values) 366 | Sets the input values. 367 | */ 368 | void RNNLayer_setInputs(RNNLayer* self, const double* values) 369 | { 370 | assert (self != NULL); 371 | assert (self->lprev == NULL); 372 | 373 | #if DEBUG_LAYER 374 | fprintf(stderr, "RNNLayer_setInputs(Layer%d):\n", self->lid); 375 | fprintf(stderr, " values = ["); 376 | for (int i = 0; i < self->nnodes; i++) { 377 | fprintf(stderr, " %.4f", values[i]); 378 | } 379 | fprintf(stderr, "]\n"); 380 | #endif 381 | 382 | /* Save the previous values. */ 383 | for (int t = self->ntimes-1; 0 < t; t--) { 384 | int idst = self->nnodes * t; 385 | int isrc = self->nnodes * (t-1); 386 | for (int i = 0; i < self->nnodes; i++) { 387 | self->outputs[idst + i] = self->outputs[isrc + i]; 388 | } 389 | } 390 | /* outputs[0..] will be replaced by the new values. */ 391 | 392 | /* Set the input values as the outputs. */ 393 | for (int i = 0; i < self->nnodes; i++) { 394 | self->outputs[i] = values[i]; 395 | } 396 | 397 | /* Start feed forwarding. */ 398 | RNNLayer* layer = self->lnext; 399 | while (layer != NULL) { 400 | RNNLayer_feedForw(layer); 401 | layer = layer->lnext; 402 | } 403 | } 404 | 405 | /* RNNLayer_getOutputs(self, outputs) 406 | Gets the output values. 407 | */ 408 | void RNNLayer_getOutputs(const RNNLayer* self, double* outputs) 409 | { 410 | assert (self != NULL); 411 | for (int i = 0; i < self->nnodes; i++) { 412 | outputs[i] = self->outputs[i]; 413 | } 414 | } 415 | 416 | /* RNNLayer_getErrorTotal(self) 417 | Gets the error total. 418 | */ 419 | double RNNLayer_getErrorTotal(const RNNLayer* self) 420 | { 421 | assert (self != NULL); 422 | double total = 0; 423 | for (int i = 0; i < self->nnodes; i++) { 424 | double e = self->errors[i]; 425 | total += e*e; 426 | } 427 | return (total / self->nnodes); 428 | } 429 | 430 | /* RNNLayer_learnOutputs(self, values) 431 | Learns the output values. 432 | */ 433 | void RNNLayer_learnOutputs(RNNLayer* self, const double* values) 434 | { 435 | assert (self != NULL); 436 | assert (self->lprev != NULL); 437 | for (int i = 0; i < self->nnodes; i++) { 438 | self->errors[i] = (self->outputs[i] - values[i]); 439 | } 440 | 441 | #if DEBUG_LAYER 442 | fprintf(stderr, "RNNLayer_learnOutputs(Layer%d):\n", self->lid); 443 | fprintf(stderr, " values = ["); 444 | for (int i = 0; i < self->nnodes; i++) { 445 | fprintf(stderr, " %.4f", values[i]); 446 | } 447 | fprintf(stderr, "]\n errors = ["); 448 | for (int i = 0; i < self->nnodes; i++) { 449 | fprintf(stderr, " %.4f", self->errors[i]); 450 | } 451 | fprintf(stderr, "]\n"); 452 | #endif 453 | 454 | /* Start backpropagation. */ 455 | RNNLayer* layer = self; 456 | while (layer != NULL) { 457 | RNNLayer_feedBack(layer); 458 | layer = layer->lprev; 459 | } 460 | } 461 | 462 | /* RNNLayer_update(self, rate) 463 | Updates the weights. 464 | */ 465 | void RNNLayer_update(RNNLayer* self, double rate) 466 | { 467 | #if DEBUG_LAYER 468 | fprintf(stderr, "RNNLayer_update(Layer%d): rate = %.4f\n", self->lid, rate); 469 | #endif 470 | 471 | /* Update the bias and weights. */ 472 | if (self->biases != NULL) { 473 | for (int i = 0; i < self->nbiases; i++) { 474 | self->biases[i] -= rate * self->u_biases[i]; 475 | self->u_biases[i] = 0; 476 | } 477 | } 478 | if (self->xweights != NULL) { 479 | for (int i = 0; i < self->nxweights; i++) { 480 | self->xweights[i] -= rate * self->u_xweights[i]; 481 | self->u_xweights[i] = 0; 482 | } 483 | } 484 | if (self->hweights != NULL) { 485 | for (int i = 0; i < self->nhweights; i++) { 486 | self->hweights[i] -= rate * self->u_hweights[i]; 487 | self->u_hweights[i] = 0; 488 | } 489 | } 490 | 491 | /* Update the previous layer. */ 492 | if (self->lprev != NULL) { 493 | RNNLayer_update(self->lprev, rate); 494 | } 495 | } 496 | 497 | 498 | /* main */ 499 | int main(int argc, char* argv[]) 500 | { 501 | int ntimes = 5; 502 | 503 | /* Use a fixed random seed for debugging. */ 504 | srand(0); 505 | /* Initialize layers. */ 506 | RNNLayer* linput = RNNLayer_create(NULL, 10, ntimes); 507 | RNNLayer* lhidden = RNNLayer_create(linput, 3, ntimes); 508 | RNNLayer* loutput = RNNLayer_create(lhidden, 1, ntimes); 509 | RNNLayer_dump(linput, stderr); 510 | RNNLayer_dump(lhidden, stderr); 511 | RNNLayer_dump(loutput, stderr); 512 | 513 | /* Run the network. */ 514 | double rate = 0.005; 515 | int nepochs = 100; 516 | for (int n = 0; n < nepochs; n++) { 517 | int i = rand() % 10000; 518 | double x[10]; 519 | double y[1]; 520 | double r[1]; 521 | RNNLayer_reset(linput); 522 | RNNLayer_reset(lhidden); 523 | RNNLayer_reset(loutput); 524 | fprintf(stderr, "reset: i=%d\n", i); 525 | for (int j = 0; j < 100; j++) { 526 | int p = f(i); 527 | for (int k = 0; k < 10; k++) { 528 | x[k] = (k == p)? 1 : 0; 529 | } 530 | r[0] = g(i); /* answer */ 531 | RNNLayer_setInputs(linput, x); 532 | RNNLayer_getOutputs(loutput, y); 533 | RNNLayer_learnOutputs(loutput, r); 534 | double etotal = RNNLayer_getErrorTotal(loutput); 535 | fprintf(stderr, "x[%d]=%d, y=%.4f, r=%.4f, etotal=%.4f\n", 536 | i, p, y[0], r[0], etotal); 537 | i++; 538 | } 539 | RNNLayer_update(loutput, rate); 540 | } 541 | 542 | /* Dump the finished network. */ 543 | RNNLayer_dump(linput, stdout); 544 | RNNLayer_dump(lhidden, stdout); 545 | RNNLayer_dump(loutput, stdout); 546 | 547 | RNNLayer_reset(linput); 548 | RNNLayer_reset(lhidden); 549 | RNNLayer_reset(loutput); 550 | for (int i = 0; i < 20; i++) { 551 | double x[10]; 552 | double y[1]; 553 | int p = f(i); 554 | for (int k = 0; k < 10; k++) { 555 | x[k] = (k == p)? 1 : 0; 556 | } 557 | RNNLayer_setInputs(linput, x); 558 | RNNLayer_getOutputs(loutput, y); 559 | fprintf(stderr, "x[%d]=%d, y=%.4f, %.4f\n", i, p, y[0], g(i)); 560 | } 561 | 562 | RNNLayer_destroy(linput); 563 | RNNLayer_destroy(lhidden); 564 | RNNLayer_destroy(loutput); 565 | return 0; 566 | } 567 | -------------------------------------------------------------------------------- /cnn.c: -------------------------------------------------------------------------------- 1 | /* 2 | cnn.c 3 | Convolutional Neural Network in C. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "cnn.h" 11 | 12 | #define DEBUG_LAYER 0 13 | 14 | 15 | /* Misc. functions 16 | */ 17 | 18 | /* rnd(): uniform random [0.0, 1.0] */ 19 | static inline double rnd() 20 | { 21 | return ((double)rand() / RAND_MAX); 22 | } 23 | 24 | /* nrnd(): normal random (std=1.0) */ 25 | static inline double nrnd() 26 | { 27 | return (rnd()+rnd()+rnd()+rnd()-2.0) * 1.724; /* std=1.0 */ 28 | } 29 | 30 | #if 0 31 | /* sigmoid(x): sigmoid function */ 32 | static inline double sigmoid(double x) 33 | { 34 | return 1.0 / (1.0 + exp(-x)); 35 | } 36 | /* sigmoid_d(y): sigmoid gradient */ 37 | static inline double sigmoid_g(double y) 38 | { 39 | return y * (1.0 - y); 40 | } 41 | #endif 42 | 43 | #if 0 44 | /* tanh(x): hyperbolic tangent */ 45 | static inline double tanh(double x) 46 | { 47 | return 2.0 / (1.0 + exp(-2*x)) - 1.0; 48 | } 49 | #endif 50 | /* tanh_g(y): hyperbolic tangent gradient */ 51 | static inline double tanh_g(double y) 52 | { 53 | return 1.0 - y*y; 54 | } 55 | 56 | /* relu(x): ReLU */ 57 | static inline double relu(double x) 58 | { 59 | return (0 < x)? x : 0; 60 | } 61 | /* relu_g(y): ReLU gradient */ 62 | static inline double relu_g(double y) 63 | { 64 | return (0 < y)? 1 : 0; 65 | } 66 | 67 | 68 | /* Layer 69 | */ 70 | 71 | /* Layer_create(lprev, ltype, depth, width, height, nbiases, nweights) 72 | Creates a Layer object for internal use. 73 | */ 74 | static Layer* Layer_create( 75 | Layer* lprev, LayerType ltype, 76 | int depth, int width, int height, 77 | int nbiases, int nweights) 78 | { 79 | Layer* self = (Layer*)calloc(1, sizeof(Layer)); 80 | if (self == NULL) return NULL; 81 | 82 | self->lprev = lprev; 83 | self->lnext = NULL; 84 | self->ltype = ltype; 85 | self->lid = 0; 86 | if (lprev != NULL) { 87 | assert (lprev->lnext == NULL); 88 | lprev->lnext = self; 89 | self->lid = lprev->lid+1; 90 | } 91 | self->depth = depth; 92 | self->width = width; 93 | self->height = height; 94 | 95 | /* Nnodes: number of outputs. */ 96 | self->nnodes = depth * width * height; 97 | self->outputs = (double*)calloc(self->nnodes, sizeof(double)); 98 | self->gradients = (double*)calloc(self->nnodes, sizeof(double)); 99 | self->errors = (double*)calloc(self->nnodes, sizeof(double)); 100 | 101 | self->nbiases = nbiases; 102 | self->biases = (double*)calloc(self->nbiases, sizeof(double)); 103 | self->u_biases = (double*)calloc(self->nbiases, sizeof(double)); 104 | 105 | self->nweights = nweights; 106 | self->weights = (double*)calloc(self->nweights, sizeof(double)); 107 | self->u_weights = (double*)calloc(self->nweights, sizeof(double)); 108 | 109 | return self; 110 | } 111 | 112 | /* Layer_destroy(self) 113 | Releases the memory. 114 | */ 115 | void Layer_destroy(Layer* self) 116 | { 117 | assert (self != NULL); 118 | 119 | free(self->outputs); 120 | free(self->gradients); 121 | free(self->errors); 122 | 123 | free(self->biases); 124 | free(self->u_biases); 125 | free(self->weights); 126 | free(self->u_weights); 127 | 128 | free(self); 129 | } 130 | 131 | /* Layer_dump(self, fp) 132 | Shows the debug output. 133 | */ 134 | void Layer_dump(const Layer* self, FILE* fp) 135 | { 136 | assert (self != NULL); 137 | Layer* lprev = self->lprev; 138 | fprintf(fp, "Layer%d ", self->lid); 139 | if (lprev != NULL) { 140 | fprintf(fp, "(lprev=Layer%d) ", lprev->lid); 141 | } 142 | fprintf(fp, "shape=(%d,%d,%d), nodes=%d\n", 143 | self->depth, self->width, self->height, self->nnodes); 144 | { 145 | int i = 0; 146 | for (int z = 0; z < self->depth; z++) { 147 | fprintf(fp, " %d:\n", z); 148 | for (int y = 0; y < self->height; y++) { 149 | fprintf(fp, " ["); 150 | for (int x = 0; x < self->width; x++) { 151 | fprintf(fp, " %.4f", self->outputs[i++]); 152 | } 153 | fprintf(fp, "]\n"); 154 | } 155 | } 156 | } 157 | 158 | switch (self->ltype) { 159 | case LAYER_FULL: 160 | /* Fully connected layer. */ 161 | assert (lprev != NULL); 162 | fprintf(fp, " biases = ["); 163 | for (int i = 0; i < self->nnodes; i++) { 164 | fprintf(fp, " %.4f", self->biases[i]); 165 | } 166 | fprintf(fp, "]\n"); 167 | fprintf(fp, " weights = [\n"); 168 | { 169 | int k = 0; 170 | for (int i = 0; i < self->nnodes; i++) { 171 | fprintf(fp, " ["); 172 | for (int j = 0; j < lprev->nnodes; j++) { 173 | fprintf(fp, " %.4f", self->weights[k++]); 174 | } 175 | fprintf(fp, "]\n"); 176 | } 177 | } 178 | fprintf(fp, " ]\n"); 179 | break; 180 | 181 | case LAYER_CONV: 182 | /* Convolutional layer. */ 183 | assert (lprev != NULL); 184 | fprintf(fp, " stride=%d, kernsize=%d\n", 185 | self->conv.stride, self->conv.kernsize); 186 | { 187 | int k = 0; 188 | for (int z = 0; z < self->depth; z++) { 189 | fprintf(fp, " %d: bias=%.4f, weights = [", z, self->biases[z]); 190 | for (int j = 0; j < lprev->depth * self->conv.kernsize * self->conv.kernsize; j++) { 191 | fprintf(fp, " %.4f", self->weights[k++]); 192 | } 193 | fprintf(fp, "]\n"); 194 | } 195 | } 196 | break; 197 | 198 | default: 199 | break; 200 | } 201 | } 202 | 203 | /* Layer_feedForw_full(self) 204 | Performs feed forward updates. 205 | */ 206 | static void Layer_feedForw_full(Layer* self) 207 | { 208 | assert (self->ltype == LAYER_FULL); 209 | assert (self->lprev != NULL); 210 | Layer* lprev = self->lprev; 211 | 212 | int k = 0; 213 | for (int i = 0; i < self->nnodes; i++) { 214 | /* Compute Y = (W * X + B) without activation function. */ 215 | double x = self->biases[i]; 216 | for (int j = 0; j < lprev->nnodes; j++) { 217 | x += (lprev->outputs[j] * self->weights[k++]); 218 | } 219 | self->outputs[i] = x; 220 | } 221 | 222 | if (self->lnext == NULL) { 223 | /* Last layer - use Softmax. */ 224 | double m = -1; 225 | for (int i = 0; i < self->nnodes; i++) { 226 | double x = self->outputs[i]; 227 | if (m < x) { m = x; } 228 | } 229 | double t = 0; 230 | for (int i = 0; i < self->nnodes; i++) { 231 | double x = self->outputs[i]; 232 | double y = exp(x-m); 233 | self->outputs[i] = y; 234 | t += y; 235 | } 236 | for (int i = 0; i < self->nnodes; i++) { 237 | self->outputs[i] /= t; 238 | /* This isn't right, but set the same value to all the gradients. */ 239 | self->gradients[i] = 1; 240 | } 241 | } else { 242 | /* Otherwise, use Tanh. */ 243 | for (int i = 0; i < self->nnodes; i++) { 244 | double x = self->outputs[i]; 245 | double y = tanh(x); 246 | self->outputs[i] = y; 247 | self->gradients[i] = tanh_g(y); 248 | } 249 | } 250 | 251 | #if DEBUG_LAYER 252 | fprintf(stderr, "Layer_feedForw_full(Layer%d):\n", self->lid); 253 | fprintf(stderr, " outputs = ["); 254 | for (int i = 0; i < self->nnodes; i++) { 255 | fprintf(stderr, " %.4f", self->outputs[i]); 256 | } 257 | fprintf(stderr, "]\n gradients = ["); 258 | for (int i = 0; i < self->nnodes; i++) { 259 | fprintf(stderr, " %.4f", self->gradients[i]); 260 | } 261 | fprintf(stderr, "]\n"); 262 | #endif 263 | } 264 | 265 | static void Layer_feedBack_full(Layer* self) 266 | { 267 | assert (self->ltype == LAYER_FULL); 268 | assert (self->lprev != NULL); 269 | Layer* lprev = self->lprev; 270 | 271 | /* Clear errors. */ 272 | for (int j = 0; j < lprev->nnodes; j++) { 273 | lprev->errors[j] = 0; 274 | } 275 | 276 | int k = 0; 277 | for (int i = 0; i < self->nnodes; i++) { 278 | /* Computer the weight/bias updates. */ 279 | double dnet = self->errors[i] * self->gradients[i]; 280 | for (int j = 0; j < lprev->nnodes; j++) { 281 | /* Propagate the errors to the previous layer. */ 282 | lprev->errors[j] += self->weights[k] * dnet; 283 | self->u_weights[k] += dnet * lprev->outputs[j]; 284 | k++; 285 | } 286 | self->u_biases[i] += dnet; 287 | } 288 | 289 | #if DEBUG_LAYER 290 | fprintf(stderr, "Layer_feedBack_full(Layer%d):\n", self->lid); 291 | for (int i = 0; i < self->nnodes; i++) { 292 | double dnet = self->errors[i] * self->gradients[i]; 293 | fprintf(stderr, " dnet = %.4f, dw = [", dnet); 294 | for (int j = 0; j < lprev->nnodes; j++) { 295 | double dw = dnet * lprev->outputs[j]; 296 | fprintf(stderr, " %.4f", dw); 297 | } 298 | fprintf(stderr, "]\n"); 299 | } 300 | #endif 301 | } 302 | 303 | /* Layer_feedForw_conv(self) 304 | Performs feed forward updates. 305 | */ 306 | static void Layer_feedForw_conv(Layer* self) 307 | { 308 | assert (self->ltype == LAYER_CONV); 309 | assert (self->lprev != NULL); 310 | Layer* lprev = self->lprev; 311 | 312 | int kernsize = self->conv.kernsize; 313 | int i = 0; 314 | for (int z1 = 0; z1 < self->depth; z1++) { 315 | /* z1: dst matrix */ 316 | /* qbase: kernel matrix base index */ 317 | int qbase = z1 * lprev->depth * kernsize * kernsize; 318 | for (int y1 = 0; y1 < self->height; y1++) { 319 | int y0 = self->conv.stride * y1 - self->conv.padding; 320 | for (int x1 = 0; x1 < self->width; x1++) { 321 | int x0 = self->conv.stride * x1 - self->conv.padding; 322 | /* Compute the kernel at (x1,y1) */ 323 | /* (x0,y0): src pixel */ 324 | double v = self->biases[z1]; 325 | for (int z0 = 0; z0 < lprev->depth; z0++) { 326 | /* z0: src matrix */ 327 | /* pbase: src matrix base index */ 328 | int pbase = z0 * lprev->width * lprev->height; 329 | for (int dy = 0; dy < kernsize; dy++) { 330 | int y = y0+dy; 331 | if (0 <= y && y < lprev->height) { 332 | int p = pbase + y*lprev->width; 333 | int q = qbase + dy*kernsize; 334 | for (int dx = 0; dx < kernsize; dx++) { 335 | int x = x0+dx; 336 | if (0 <= x && x < lprev->width) { 337 | v += lprev->outputs[p+x] * self->weights[q+dx]; 338 | } 339 | } 340 | } 341 | } 342 | } 343 | /* Apply the activation function. */ 344 | v = relu(v); 345 | self->outputs[i] = v; 346 | self->gradients[i] = relu_g(v); 347 | i++; 348 | } 349 | } 350 | } 351 | assert (i == self->nnodes); 352 | 353 | #if DEBUG_LAYER 354 | fprintf(stderr, "Layer_feedForw_conv(Layer%d):\n", self->lid); 355 | fprintf(stderr, " outputs = ["); 356 | for (int i = 0; i < self->nnodes; i++) { 357 | fprintf(stderr, " %.4f", self->outputs[i]); 358 | } 359 | fprintf(stderr, "]\n gradients = ["); 360 | for (int i = 0; i < self->nnodes; i++) { 361 | fprintf(stderr, " %.4f", self->gradients[i]); 362 | } 363 | fprintf(stderr, "]\n"); 364 | #endif 365 | } 366 | 367 | static void Layer_feedBack_conv(Layer* self) 368 | { 369 | assert (self->ltype == LAYER_CONV); 370 | assert (self->lprev != NULL); 371 | Layer* lprev = self->lprev; 372 | 373 | /* Clear errors. */ 374 | for (int j = 0; j < lprev->nnodes; j++) { 375 | lprev->errors[j] = 0; 376 | } 377 | 378 | int kernsize = self->conv.kernsize; 379 | int i = 0; 380 | for (int z1 = 0; z1 < self->depth; z1++) { 381 | /* z1: dst matrix */ 382 | /* qbase: kernel matrix base index */ 383 | int qbase = z1 * lprev->depth * kernsize * kernsize; 384 | for (int y1 = 0; y1 < self->height; y1++) { 385 | int y0 = self->conv.stride * y1 - self->conv.padding; 386 | for (int x1 = 0; x1 < self->width; x1++) { 387 | int x0 = self->conv.stride * x1 - self->conv.padding; 388 | /* Compute the kernel at (x1,y1) */ 389 | /* (x0,y0): src pixel */ 390 | double dnet = self->errors[i] * self->gradients[i]; 391 | for (int z0 = 0; z0 < lprev->depth; z0++) { 392 | /* z0: src matrix */ 393 | /* pbase: src matrix base index */ 394 | int pbase = z0 * lprev->width * lprev->height; 395 | for (int dy = 0; dy < kernsize; dy++) { 396 | int y = y0+dy; 397 | if (0 <= y && y < lprev->height) { 398 | int p = pbase + y*lprev->width; 399 | int q = qbase + dy*kernsize; 400 | for (int dx = 0; dx < kernsize; dx++) { 401 | int x = x0+dx; 402 | if (0 <= x && x < lprev->width) { 403 | lprev->errors[p+x] += self->weights[q+dx] * dnet; 404 | self->u_weights[q+dx] += dnet * lprev->outputs[p+x]; 405 | } 406 | } 407 | } 408 | } 409 | } 410 | self->u_biases[z1] += dnet; 411 | i++; 412 | } 413 | } 414 | } 415 | assert (i == self->nnodes); 416 | 417 | #if DEBUG_LAYER 418 | fprintf(stderr, "Layer_feedBack_conv(Layer%d):\n", self->lid); 419 | for (int i = 0; i < self->nnodes; i++) { 420 | double dnet = self->errors[i] * self->gradients[i]; 421 | fprintf(stderr, " dnet=%.4f, dw=[", dnet); 422 | for (int j = 0; j < lprev->nnodes; j++) { 423 | double dw = dnet * lprev->outputs[j]; 424 | fprintf(stderr, " %.4f", dw); 425 | } 426 | fprintf(stderr, "]\n"); 427 | } 428 | #endif 429 | } 430 | 431 | /* Layer_setInputs(self, values) 432 | Sets the input values. 433 | */ 434 | void Layer_setInputs(Layer* self, const double* values) 435 | { 436 | assert (self != NULL); 437 | assert (self->ltype == LAYER_INPUT); 438 | assert (self->lprev == NULL); 439 | 440 | #if DEBUG_LAYER 441 | fprintf(stderr, "Layer_setInputs(Layer%d): values = [", self->lid); 442 | for (int i = 0; i < self->nnodes; i++) { 443 | fprintf(stderr, " %.4f", values[i]); 444 | } 445 | fprintf(stderr, "]\n"); 446 | #endif 447 | 448 | /* Set the values as the outputs. */ 449 | for (int i = 0; i < self->nnodes; i++) { 450 | self->outputs[i] = values[i]; 451 | } 452 | 453 | /* Start feed forwarding. */ 454 | Layer* layer = self->lnext; 455 | while (layer != NULL) { 456 | switch (layer->ltype) { 457 | case LAYER_FULL: 458 | Layer_feedForw_full(layer); 459 | break; 460 | case LAYER_CONV: 461 | Layer_feedForw_conv(layer); 462 | break; 463 | default: 464 | break; 465 | } 466 | layer = layer->lnext; 467 | } 468 | } 469 | 470 | /* Layer_getOutputs(self, outputs) 471 | Gets the output values. 472 | */ 473 | void Layer_getOutputs(const Layer* self, double* outputs) 474 | { 475 | assert (self != NULL); 476 | for (int i = 0; i < self->nnodes; i++) { 477 | outputs[i] = self->outputs[i]; 478 | } 479 | } 480 | 481 | /* Layer_getErrorTotal(self) 482 | Gets the error total. 483 | */ 484 | double Layer_getErrorTotal(const Layer* self) 485 | { 486 | assert (self != NULL); 487 | double total = 0; 488 | for (int i = 0; i < self->nnodes; i++) { 489 | double e = self->errors[i]; 490 | total += e*e; 491 | } 492 | return (total / self->nnodes); 493 | } 494 | 495 | /* Layer_learnOutputs(self, values) 496 | Learns the output values. 497 | */ 498 | void Layer_learnOutputs(Layer* self, const double* values) 499 | { 500 | assert (self != NULL); 501 | assert (self->ltype != LAYER_INPUT); 502 | assert (self->lprev != NULL); 503 | for (int i = 0; i < self->nnodes; i++) { 504 | self->errors[i] = (self->outputs[i] - values[i]); 505 | } 506 | 507 | #if DEBUG_LAYER 508 | fprintf(stderr, "Layer_learnOutputs(Layer%d): errors = [", self->lid); 509 | for (int i = 0; i < self->nnodes; i++) { 510 | fprintf(stderr, " %.4f", self->errors[i]); 511 | } 512 | fprintf(stderr, "]\n"); 513 | #endif 514 | 515 | /* Start backpropagation. */ 516 | Layer* layer = self; 517 | while (layer != NULL) { 518 | switch (layer->ltype) { 519 | case LAYER_FULL: 520 | Layer_feedBack_full(layer); 521 | break; 522 | case LAYER_CONV: 523 | Layer_feedBack_conv(layer); 524 | break; 525 | default: 526 | break; 527 | } 528 | layer = layer->lprev; 529 | } 530 | } 531 | 532 | /* Layer_update(self, rate) 533 | Updates the weights. 534 | */ 535 | void Layer_update(Layer* self, double rate) 536 | { 537 | for (int i = 0; i < self->nbiases; i++) { 538 | self->biases[i] -= rate * self->u_biases[i]; 539 | self->u_biases[i] = 0; 540 | } 541 | for (int i = 0; i < self->nweights; i++) { 542 | self->weights[i] -= rate * self->u_weights[i]; 543 | self->u_weights[i] = 0; 544 | } 545 | if (self->lprev != NULL) { 546 | Layer_update(self->lprev, rate); 547 | } 548 | } 549 | 550 | /* Layer_create_input(depth, width, height) 551 | Creates an input Layer with size (depth x weight x height). 552 | */ 553 | Layer* Layer_create_input(int depth, int width, int height) 554 | { 555 | return Layer_create( 556 | NULL, LAYER_INPUT, depth, width, height, 0, 0); 557 | } 558 | 559 | /* Layer_create_full(lprev, nnodes, std) 560 | Creates a fully-connected Layer. 561 | */ 562 | Layer* Layer_create_full(Layer* lprev, int nnodes, double std) 563 | { 564 | assert (lprev != NULL); 565 | Layer* self = Layer_create( 566 | lprev, LAYER_FULL, nnodes, 1, 1, 567 | nnodes, nnodes * lprev->nnodes); 568 | assert (self != NULL); 569 | 570 | for (int i = 0; i < self->nweights; i++) { 571 | self->weights[i] = std * nrnd(); 572 | } 573 | 574 | #if DEBUG_LAYER 575 | Layer_dump(self, stderr); 576 | #endif 577 | return self; 578 | } 579 | 580 | /* Layer_create_conv(lprev, depth, width, height, kernsize, padding, stride, std) 581 | Creates a convolutional Layer. 582 | */ 583 | Layer* Layer_create_conv( 584 | Layer* lprev, int depth, int width, int height, 585 | int kernsize, int padding, int stride, double std) 586 | { 587 | assert (lprev != NULL); 588 | assert ((kernsize % 2) == 1); 589 | assert ((width-1) * stride + kernsize <= lprev->width + padding*2); 590 | assert ((height-1) * stride + kernsize <= lprev->height + padding*2); 591 | 592 | Layer* self = Layer_create( 593 | lprev, LAYER_CONV, depth, width, height, 594 | depth, depth * lprev->depth * kernsize * kernsize); 595 | assert (self != NULL); 596 | 597 | self->conv.kernsize = kernsize; 598 | self->conv.padding = padding; 599 | self->conv.stride = stride; 600 | 601 | for (int i = 0; i < self->nweights; i++) { 602 | self->weights[i] = std * nrnd(); 603 | } 604 | 605 | #if DEBUG_LAYER 606 | Layer_dump(self, stderr); 607 | #endif 608 | return self; 609 | } 610 | --------------------------------------------------------------------------------