├── .gitignore
├── build.sh
├── gates.c
├── papers
    ├── grad.pdf
    └── grad.tex
├── twice.c
└── xor.c


/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | *.aux
3 | *.log
4 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | set -xe
4 | 
5 | mkdir -p build/
6 | clang -Wall -Wextra -o build/twice twice.c -lm
7 | clang -Wall -Wextra -o build/gates gates.c -lm
8 | clang -Wall -Wextra -o build/xor xor.c -lm
9 | 


--------------------------------------------------------------------------------
/gates.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <time.h>
  4 | #include <math.h>
  5 | 
  6 | float sigmoidf(float x)
  7 | {
  8 |     return 1.f / (1.f + expf(-x));
  9 | }
 10 | 
 11 | typedef float sample[3];
 12 | 
 13 | // NAND-gate
 14 | sample or_train[] = {
 15 |     {0, 0, 0},
 16 |     {1, 0, 1},
 17 |     {0, 1, 1},
 18 |     {1, 1, 1},
 19 | };
 20 | 
 21 | sample and_train[] = {
 22 |     {0, 0, 0},
 23 |     {1, 0, 0},
 24 |     {0, 1, 0},
 25 |     {1, 1, 1},
 26 | };
 27 | 
 28 | sample nand_train[] = {
 29 |     {0, 0, 1},
 30 |     {1, 0, 1},
 31 |     {0, 1, 1},
 32 |     {1, 1, 0},
 33 | };
 34 | 
 35 | sample xor_train[] = {
 36 |     {0, 0, 0},
 37 |     {1, 0, 1},
 38 |     {0, 1, 1},
 39 |     {1, 1, 0},
 40 | };
 41 | 
 42 | sample *train = and_train;
 43 | size_t train_count = 4;
 44 | 
 45 | float cost(float w1, float w2, float b)
 46 | {
 47 |     float result = 0.0f;
 48 |     for (size_t i = 0; i < train_count; ++i) {
 49 |         float x1 = train[i][0];
 50 |         float x2 = train[i][1];
 51 |         float y = sigmoidf(x1*w1 + x2*w2 + b);
 52 |         float d = y - train[i][2];
 53 |         result += d*d;
 54 |     }
 55 |     result /= train_count;
 56 |     return result;
 57 | }
 58 | 
 59 | void dcost(float eps,
 60 |            float w1, float w2, float b,
 61 |            float *dw1, float *dw2, float *db)
 62 | {
 63 |     float c = cost(w1, w2, b);
 64 |     *dw1 = (cost(w1 + eps, w2, b) - c)/eps;
 65 |     *dw2 = (cost(w1, w2 + eps, b) - c)/eps;
 66 |     *db  = (cost(w1, w2, b + eps) - c)/eps;
 67 | }
 68 | 
 69 | 
 70 | void gcost(float w1, float w2, float b,
 71 |            float *dw1, float *dw2, float *db)
 72 | {
 73 |     *dw1 = 0;
 74 |     *dw2 = 0;
 75 |     *db = 0;
 76 |     size_t n = train_count;
 77 |     for (size_t i = 0; i < n; ++i) {
 78 |         float xi = train[i][0];
 79 |         float yi = train[i][1];
 80 |         float zi = train[i][2];
 81 |         float ai = sigmoidf(xi*w1 + yi*w2 + b);
 82 |         float di = 2*(ai - zi)*ai*(1 - ai);
 83 |         *dw1 += di*xi;
 84 |         *dw2 += di*yi;
 85 |         *db  += di;
 86 |     }
 87 |     *dw1 /= n;
 88 |     *dw2 /= n;
 89 |     *db /= n;
 90 | }
 91 | 
 92 | float rand_float(void)
 93 | {
 94 |     return (float) rand()/ (float) RAND_MAX;
 95 | }
 96 | 
 97 | int main2(void)
 98 | {
 99 |     // (x|y) & ~(x&y)
100 |     for (size_t x = 0; x < 2; ++x) {
101 |         for (size_t y = 0; y < 2; ++y) {
102 |             printf("%zu ^ %zu = %zu\n", x, y, (x|y) & (~(x&y)));
103 |         }
104 |     }
105 |     return 0;
106 | }
107 | 
108 | int main(void)
109 | {
110 |     srand(time(0));
111 |     float w1 = rand_float();
112 |     float w2 = rand_float();
113 |     float b  = rand_float();
114 | 
115 |     float rate = 1e-1;
116 | 
117 |     for (size_t i = 0; i < 10*1000; ++i) {
118 |         float c = cost(w1, w2, b);
119 |         printf("c = %f, w1 = %f, w2 = %f, b = %f\n", c, w1, w2, b);
120 | 
121 |         float dw1, dw2, db;
122 | #if 1
123 |         float eps = 1e-1;
124 |         dcost(eps, w1, w2, b, &dw1, &dw2, &db);
125 | #else
126 |         gcost(w1, w2, b, &dw1, &dw2, &db);
127 | #endif
128 |         w1 -= rate*dw1;
129 |         w2 -= rate*dw2;
130 |         b  -= rate*db;
131 |     }
132 |     printf("c = %f, w1 = %f, w2 = %f, b = %f\n", cost(w1, w2, b), w1, w2, b);
133 | 
134 |     for (size_t i = 0; i < 2; ++i) {
135 |         for (size_t j = 0; j < 2; ++j) {
136 |             printf("%zu | %zu = %f\n", i, j, sigmoidf(i*w1 + j*w2 + b));
137 |         }
138 |     }
139 | 
140 |     return 0;
141 | }
142 | 


--------------------------------------------------------------------------------
/papers/grad.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsoding/ml-notes/fd64cea05648e1480b6f9556a887237def0c6300/papers/grad.pdf


--------------------------------------------------------------------------------
/papers/grad.tex:
--------------------------------------------------------------------------------
  1 | \documentclass{article}
  2 | 
  3 | \usepackage{amsmath}
  4 | \usepackage{tikz}
  5 | 
  6 | \begin{document}
  7 | \section{Gradient Descent}
  8 | 
  9 | If we keep decreasing the $\epsilon$ in our Finite Difference approach we effectively get the Derivative of the Cost Function.
 10 | 
 11 | \begin{align}
 12 |   C'(w) = \lim_{\epsilon \to 0}\frac{C(w + \epsilon) - C(w)}{\epsilon}
 13 | \end{align}
 14 | 
 15 | Let's compute the derivatives of all our models. Throughout the entire paper $n$ means the amount of samples in the training set.
 16 | 
 17 | \subsection{Linear Model}
 18 | 
 19 | \def\d{2.0}
 20 | 
 21 | \begin{center}
 22 |   \begin{tikzpicture}
 23 |     \node (X) at ({-\d*0.75}, 0) {$x$};
 24 |     \node[shape=circle,draw=black] (N) at (0, 0) {$w$};
 25 |     \node (Y) at ({\d*0.75}, 0) {$y$};
 26 |     \path[->] (X) edge (N);
 27 |     \path[->] (N) edge (Y);
 28 |   \end{tikzpicture}
 29 | \end{center}
 30 | 
 31 | \begin{align}
 32 |   y &= x \cdot w
 33 | \end{align}
 34 | 
 35 | \subsubsection{Cost}
 36 | 
 37 | \begin{align}
 38 |   C(w) &= \frac{1}{n}\sum_{i=1}^{n}(x_iw - y_i)^2 \\
 39 |   C'(w)
 40 |        &= \left(\frac{1}{n}\sum_{i=1}^{n}(x_iw - y_i)^2\right)' = \\
 41 |        &= \frac{1}{n}\left(\sum_{i=1}^{n}(x_iw - y_i)^2\right)' \\
 42 |        &= \frac{1}{n}\sum_{i=1}^{n}\left((x_iw - y_i)^2\right)' \\
 43 |        &= \frac{1}{n}\sum_{i=1}^{n}2(x_iw - y_i)x_i
 44 | \end{align}
 45 | 
 46 | \subsection{One Neuron Model with 2 inputs}
 47 | 
 48 | \begin{center}
 49 |   \begin{tikzpicture}
 50 |     \node (X) at (-\d, 1) {$x$};
 51 |     \node (Y) at (-\d, -1) {$y$};
 52 |     \node[shape=circle,draw=black] (N) at (0, 0) {$\sigma, b$};
 53 |     \node (Z) at (\d, 0) {$z$};
 54 |     \path[->] (X) edge node[above] {$w_1$} (N);
 55 |     \path[->] (Y) edge node[above] {$w_2$} (N);
 56 |     \path[->] (N) edge (Z);
 57 |   \end{tikzpicture}
 58 | \end{center}
 59 | \begin{align}
 60 |   z &= \sigma(xw_1 + yw_2 + b) \\
 61 |   \sigma(x) &= \frac{1}{1 + e^{-x}} \\
 62 |   \sigma'(x) &= \sigma(x)(1 - \sigma(x))
 63 | \end{align}
 64 | 
 65 | \subsubsection{Cost}
 66 | 
 67 | \def\pd[#1]{\partial_{#1}}
 68 | \def\avgsum[#1,#2]{\frac{1}{#2}\sum_{#1=1}^{#2}}
 69 | \begin{align}
 70 |   a_i &= \sigma(x_iw_1 + y_iw_2 + b) \\
 71 |   \pd[w_1]a_i
 72 |       &= \pd[w_1](\sigma(x_iw_1 + y_iw_2 + b)) = \\
 73 |       &= a_i(1 - a_i)\pd[w_1](x_iw_1 + y_iw_2 + b) = \\
 74 |       &= a_i(1 - a_i)x_i \\
 75 |   \pd[w_2]a_i &= a_i(1 - a_i)y_i \\
 76 |   \pd[b]a_i &= a_i(1 - a_i) \\
 77 |   C &= \avgsum[i, n](a_i - z_i)^2 \\
 78 |   \pd[w_1] C
 79 |       &= \avgsum[i, n]\pd[w_1]\left((a_i - z_i)^2\right) = \\
 80 |       &= \avgsum[i, n]2(a_i - z_i)\pd[w_1]a_i = \\
 81 |       &= \avgsum[i, n]2(a_i - z_i)a_i(1 - a_i)x_i \\
 82 |   \pd[w_2] C &= \avgsum[i, n]2(a_i - z_i)a_i(1 - a_i)y_i \\
 83 |   \pd[b] C &= \avgsum[i, n]2(a_i - z_i)a_i(1 - a_i)
 84 | \end{align}
 85 | 
 86 | \subsection{Two Neurons Model with 1 input}
 87 | 
 88 | \begin{center}
 89 |   \begin{tikzpicture}
 90 |     \node (X) at (-\d, 0) {$x$};
 91 |     \node[shape=circle,draw=black] (N1) at (0, 0) {$\sigma, b^{(1)}$};
 92 |     \node[shape=circle,draw=black] (N2) at (\d, 0) {$\sigma, b^{(2)}$};
 93 |     \node (Y) at ({2*\d}, 0) {$y$};
 94 |     \path[->] (X) edge node[above] {$w^{(1)}$} (N1);
 95 |     \path[->] (N1) edge node[above] {$w^{(2)}$} (N2);
 96 |     \path[->] (N2) edge (Y);
 97 |   \end{tikzpicture}
 98 | \end{center}
 99 | 
100 | \begin{align}
101 |   a^{(1)} &= \sigma(xw^{(1)} + b^{(1)}) \\
102 |   y &= \sigma(a^{(1)}w^{(2)} + b^{(2)})
103 | \end{align}
104 | 
105 | The superscript in parenthesis denotes the current layer. For example $a_i^{(l)}$ denotes the activation from the $l$-th layer on $i$-th sample.
106 | 
107 | \subsubsection{Feed-Forward}
108 | 
109 | \begin{align}
110 |   a_i^{(1)} &= \sigma(x_iw^{(1)} + b^{(1)}) \\
111 |   \pd[w^{(1)}]a_i^{(1)} &= a_i^{(1)}(1 - a_i^{(1)})x_i \\
112 |   \pd[b^{1}]a_i^{(1)} &= a_i^{(1)}(1 - a_i^{(1)}) \\
113 |   a_i^{(2)} &= \sigma(a_i^{(1)}w^{(2)} + b^{(2)}) \\
114 |   \pd[w^{(2)}]a_i^{(2)} &= a_i^{(2)}(1 - a_i^{(2)})a_i^{(1)} \\
115 |   \pd[b^{(2)}]a_i^{(2)} &= a_i^{(2)}(1 - a_i^{(2)}) \\
116 |   \pd[a_i^{(1)}]a_i^{(2)} &= a_i^{(2)}(1 - a_i^{(2)})w^{(2)}
117 | \end{align}
118 | 
119 | \subsubsection{Back-Propagation}
120 | 
121 | \begin{align}
122 |   C^{(2)} &= \avgsum[i, n] (a_i^{(2)} - y_i)^2 \\
123 |   \pd[w^{(2)}] C^{(2)}
124 |             &= \avgsum[i, n] \pd[w^{(2)}]((a_i^{(2)} - y_i)^2) = \\
125 |             &= \avgsum[i, n] 2(a_i^{(2)} - y_i)\pd[w^{(2)}]a_i^{(2)} = \\
126 |             &= \avgsum[i, n] 2(a_i^{(2)} - y_i)a_i^{(2)}(1 - a_i^{(2)})a_i^{(1)} \\
127 |   \pd[b^{(2)}] C^{(2)} &= \avgsum[i, n] 2(a_i^{(2)} - y_i)a_i^{(2)}(1 - a_i^{(2)}) \\
128 |   \pd[a_i^{(1)}]C^{(2)} &= \avgsum[i, n] 2(a_i^{(2)} - y_i)a_i^{(2)}(1 - a_i^{(2)})w^{(2)} \\
129 |   e_i &= a_i^{(1)} - \pd[a_i^{(1)}]C^{(2)} \\
130 |   C^{(1)} &= \avgsum[i, n] (a_i^{(1)} - e_i)^2 \\
131 |   \pd[w^{(1)}]C^{(1)}
132 |             &= \pd[w^{(1)}]\left(\avgsum[i, n] (a_i^{(1)} - e_i)^2\right) =\\
133 |             &= \avgsum[i, n] \pd[w^{(1)}]\left((a_i^{(1)} - e_i)^2\right) =\\
134 |             &= \avgsum[i, n] 2(a_i^{(1)} - e_i)\pd[w^{(1)}]a_i^{(1)} =\\
135 |             &= \avgsum[i, n] 2(\pd[a_i^{(1)}]C^{(2)})a_i^{(1)}(1 - a_i^{(1)})x_i \\
136 |   \pd[b^{1}]C^{(1)} &= \avgsum[i, n] 2(\pd[a_i^{(1)}]C^{(2)})a_i^{(1)}(1 - a_i^{(1)})
137 | \end{align}
138 | 
139 | \subsection{Arbitrary Neurons Model with 1 input}
140 | 
141 | Let's assume that we have $m$ layers.
142 | 
143 | \subsubsection{Feed-Forward}
144 | 
145 | Let's assume that $a_i^{(0)}$ is $x_i$.
146 | 
147 | \begin{align}
148 |   a_i^{(l)} &= \sigma(a_i^{(l-1)}w^{(l)} + b^{(l)}) \\
149 |   \pd[w^{(l)}]a_i^{(l)} &= a_i^{(l)}(1 - a_i^{(l)})a_i^{(l-1)} \\
150 |   \pd[b^{(l)}]a_i^{(l)} &= a_i^{(l)}(1 - a_i^{(l)}) \\
151 |   \pd[a_i^{(l-1)}]a_i^{(l)} &= a_i^{(l)}(1 - a_i^{(l)})w^{(l)}
152 | \end{align}
153 | 
154 | \subsubsection{Back-Propagation}
155 | 
156 | Let's denote $a_i^{(m)} - y_i$ as $\pd[a_i^{(m)}]C^{(m+1)}$.
157 | 
158 | \begin{align}
159 |   C^{(l)} &= \avgsum[i, n] (\pd[a_i^{(l)}]C^{(l+1)})^2 \\
160 |   \pd[w^{(l)}]C^{(l)} &= \avgsum[i, n] 2(\pd[a_i^{(l)}]C^{(l+1)})a_i^{(l)}(1 - a_i^{(l)})a_i^{(l-1)} =\\
161 |   \pd[b^{(l)}]C^{(l)} &= \avgsum[i, n] 2(\pd[a_i^{(l)}]C^{(l+1)})a_i^{(l)}(1 - a_i^{(l)}) \\
162 |   \pd[a_i^{(l-1)}]C^{(l)} &= \avgsum[i, n] 2(\pd[a_i^{(l)}]C^{(l+1)})a_i^{(l)}(1 - a_i^{(l)})w^{(l)}
163 | \end{align}
164 | 
165 | \end{document}


--------------------------------------------------------------------------------
/twice.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <time.h>
 4 | 
 5 | float train[][2] = {
 6 |     {0, 0},
 7 |     {1, 2},
 8 |     {2, 4},
 9 |     {3, 6},
10 |     {4, 8},
11 | };
12 | #define train_count (sizeof(train)/sizeof(train[0]))
13 | 
14 | float rand_float(void)
15 | {
16 |     return (float) rand()/ (float) RAND_MAX;
17 | }
18 | 
19 | // x1, x2, x3, ..., b
20 | // w1, w2, w3, ...
21 | // y = x1*w1 + x2*w2 + x3*w3 + ... + b
22 | 
23 | float cost(float w)
24 | {
25 |     float result = 0.0f;
26 |     size_t n = train_count;
27 |     for (size_t i = 0; i < n; ++i) {
28 |         float x = train[i][0];
29 |         float y = x*w;
30 |         float d = y - train[i][1];
31 |         result += d*d;
32 |     }
33 |     result /= n;
34 |     return result;
35 | }
36 | 
37 | float dcost(float w)
38 | {
39 |     float result = 0.0f;
40 |     size_t n = train_count;
41 |     for (size_t i = 0; i < n; ++i) {
42 |         float x = train[i][0];
43 |         float y = train[i][1];
44 |         result += 2*(x*w - y)*x;
45 |     }
46 |     result /= n;
47 |     return result;
48 | }
49 | 
50 | int main()
51 | {
52 |     // srand(time(0));
53 |     srand(69);
54 |     float w = rand_float()*10.0f;
55 | 
56 |     float rate = 1e-1;
57 | 
58 |     printf("cost = %f, w = %f\n", cost(w), w);
59 |     for (size_t i = 0; i < 50; ++i) {
60 | #if 0
61 |         float eps = 1e-3;
62 |         float c = cost(w);
63 |         float dw = (cost(w + eps) - c)/eps;;
64 | #else
65 |         float dw = dcost(w);
66 | #endif
67 |         w -= rate*dw;
68 |         printf("cost = %f, w = %f\n", cost(w), w);
69 |     }
70 | 
71 |     printf("------------------------------\n");
72 |     printf("w = %f\n", w);
73 | 
74 |     return 0;
75 | }
76 | 


--------------------------------------------------------------------------------
/xor.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <math.h>
  4 | #include <time.h>
  5 | 
  6 | typedef struct {
  7 |     float or_w1;
  8 |     float or_w2;
  9 |     float or_b;
 10 |     float nand_w1;
 11 |     float nand_w2;
 12 |     float nand_b;
 13 |     float and_w1;
 14 |     float and_w2;
 15 |     float and_b;
 16 | } Xor;
 17 | 
 18 | float sigmoidf(float x)
 19 | {
 20 |     return 1.f / (1.f + expf(-x));
 21 | }
 22 | 
 23 | float forward(Xor m, float x1, float x2)
 24 | {
 25 |     float a = sigmoidf(m.or_w1*x1 + m.or_w2*x2 + m.or_b);
 26 |     float b = sigmoidf(m.nand_w1*x1 + m.nand_w2*x2 + m.nand_b);
 27 |     return sigmoidf(a*m.and_w1 + b*m.and_w2 + m.and_b);
 28 | }
 29 | 
 30 | typedef float sample[3];
 31 | sample xor_train[] = {
 32 |     {0, 0, 0},
 33 |     {1, 0, 1},
 34 |     {0, 1, 1},
 35 |     {1, 1, 0},
 36 | };
 37 | 
 38 | // NAND-gate
 39 | sample or_train[] = {
 40 |     {0, 0, 0},
 41 |     {1, 0, 1},
 42 |     {0, 1, 1},
 43 |     {1, 1, 1},
 44 | };
 45 | 
 46 | sample and_train[] = {
 47 |     {0, 0, 0},
 48 |     {1, 0, 0},
 49 |     {0, 1, 0},
 50 |     {1, 1, 1},
 51 | };
 52 | 
 53 | sample nand_train[] = {
 54 |     {0, 0, 1},
 55 |     {1, 0, 1},
 56 |     {0, 1, 1},
 57 |     {1, 1, 0},
 58 | };
 59 | 
 60 | sample nor_train[] = {
 61 |     {0, 0, 1},
 62 |     {1, 0, 0},
 63 |     {0, 1, 0},
 64 |     {1, 1, 0},
 65 | };
 66 | 
 67 | sample *train = xor_train;
 68 | size_t train_count = 4;
 69 | 
 70 | float cost(Xor m)
 71 | {
 72 |     float result = 0.0f;
 73 |     for (size_t i = 0; i < train_count; ++i) {
 74 |         float x1 = train[i][0];
 75 |         float x2 = train[i][1];
 76 |         float y = forward(m, x1, x2);
 77 |         float d = y - train[i][2];
 78 |         result += d*d;
 79 |     }
 80 |     result /= train_count;
 81 |     return result;
 82 | }
 83 | 
 84 | float rand_float(void)
 85 | {
 86 |     return (float) rand()/ (float) RAND_MAX;
 87 | }
 88 | 
 89 | Xor rand_xor(void)
 90 | {
 91 |     Xor m;
 92 |     m.or_w1 = rand_float();
 93 |     m.or_w2 = rand_float();
 94 |     m.or_b = rand_float();
 95 |     m.nand_w1 = rand_float();
 96 |     m.nand_w2 = rand_float();
 97 |     m.nand_b = rand_float();
 98 |     m.and_w1 = rand_float();
 99 |     m.and_w2 = rand_float();
100 |     m.and_b = rand_float();
101 |     return m;
102 | }
103 | 
104 | void print_xor(Xor m)
105 | {
106 |     printf("or_w1 = %f\n", m.or_w1);
107 |     printf("or_w2 = %f\n", m.or_w2);
108 |     printf("or_b = %f\n", m.or_b);
109 |     printf("nand_w1 = %f\n", m.nand_w1);
110 |     printf("nand_w2 = %f\n", m.nand_w2);
111 |     printf("nand_b = %f\n", m.nand_b);
112 |     printf("and_w1 = %f\n", m.and_w1);
113 |     printf("and_w2 = %f\n", m.and_w2);
114 |     printf("and_b = %f\n", m.and_b);
115 | }
116 | 
117 | Xor learn(Xor m, Xor g, float rate)
118 | {
119 |     m.or_w1 -= rate*g.or_w1;
120 |     m.or_w2 -= rate*g.or_w2;
121 |     m.or_b -= rate*g.or_b;
122 |     m.nand_w1 -= rate*g.nand_w1;
123 |     m.nand_w2 -= rate*g.nand_w2;
124 |     m.nand_b -= rate*g.nand_b;
125 |     m.and_w1 -= rate*g.and_w1;
126 |     m.and_w2 -= rate*g.and_w2;
127 |     m.and_b -= rate*g.and_b;
128 |     return m;
129 | }
130 | 
131 | Xor finite_diff(Xor m, float eps)
132 | {
133 |     Xor g;
134 |     float c = cost(m);
135 |     float saved;
136 | 
137 |     saved = m.or_w1;
138 |     m.or_w1 += eps;
139 |     g.or_w1 = (cost(m) - c)/eps;
140 |     m.or_w1 = saved;
141 | 
142 |     saved = m.or_w2;
143 |     m.or_w2 += eps;
144 |     g.or_w2 = (cost(m) - c)/eps;
145 |     m.or_w2 = saved;
146 | 
147 |     saved = m.or_b;
148 |     m.or_b += eps;
149 |     g.or_b = (cost(m) - c)/eps;
150 |     m.or_b = saved;
151 | 
152 |     saved = m.nand_w1;
153 |     m.nand_w1 += eps;
154 |     g.nand_w1 = (cost(m) - c)/eps;
155 |     m.nand_w1 = saved;
156 | 
157 |     saved = m.nand_w2;
158 |     m.nand_w2 += eps;
159 |     g.nand_w2 = (cost(m) - c)/eps;
160 |     m.nand_w2 = saved;
161 | 
162 |     saved = m.nand_b;
163 |     m.nand_b += eps;
164 |     g.nand_b = (cost(m) - c)/eps;
165 |     m.nand_b = saved;
166 | 
167 |     saved = m.and_w1;
168 |     m.and_w1 += eps;
169 |     g.and_w1 = (cost(m) - c)/eps;
170 |     m.and_w1 = saved;
171 | 
172 |     saved = m.and_w2;
173 |     m.and_w2 += eps;
174 |     g.and_w2 = (cost(m) - c)/eps;
175 |     m.and_w2 = saved;
176 | 
177 |     saved = m.and_b;
178 |     m.and_b += eps;
179 |     g.and_b = (cost(m) - c)/eps;
180 |     m.and_b = saved;
181 | 
182 |     return g;
183 | }
184 | 
185 | int main(void)
186 | {
187 |     srand(time(0));
188 |     Xor m = rand_xor();
189 | 
190 |     float eps = 1e-1;
191 |     float rate = 1e-1;
192 | 
193 |     for (size_t i = 0; i < 100*1000; ++i) {
194 |         Xor g = finite_diff(m, eps);
195 |         m = learn(m, g, rate);
196 |         // printf("cost = %f\n", cost(m));
197 |     }
198 |     printf("cost = %f\n", cost(m));
199 | 
200 |     printf("------------------------------\n");
201 |     for (size_t i = 0; i < 2; ++i) {
202 |         for (size_t j = 0; j < 2; ++j) {
203 |             printf("%zu ^ %zu = %f\n", i, j, forward(m, i, j));
204 |         }
205 |     }
206 |     printf("------------------------------\n");
207 |     printf("\"OR\" neuron:\n");
208 |     for (size_t i = 0; i < 2; ++i) {
209 |         for (size_t j = 0; j < 2; ++j) {
210 |             printf("%zu | %zu = %f\n", i, j, sigmoidf(m.or_w1*i + m.or_w2*j + m.or_b));
211 |         }
212 |     }
213 |     printf("------------------------------\n");
214 |     printf("\"NAND\" neuron:\n");
215 |     for (size_t i = 0; i < 2; ++i) {
216 |         for (size_t j = 0; j < 2; ++j) {
217 |             printf("~(%zu & %zu) = %f\n", i, j, sigmoidf(m.nand_w1*i + m.nand_w2*j + m.nand_b));
218 |         }
219 |     }
220 |     printf("------------------------------\n");
221 |     printf("\"AND\" neuron:\n");
222 |     for (size_t i = 0; i < 2; ++i) {
223 |         for (size_t j = 0; j < 2; ++j) {
224 |             printf("%zu & %zu = %f\n", i, j, sigmoidf(m.and_w1*i + m.and_w2*j + m.and_b));
225 |         }
226 |     }
227 |     return 0;
228 | }
229 | 


--------------------------------------------------------------------------------