├── .gitignore ├── build.sh ├── gates.c ├── papers ├── grad.pdf └── grad.tex ├── twice.c └── xor.c /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | *.aux 3 | *.log 4 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -xe 4 | 5 | mkdir -p build/ 6 | clang -Wall -Wextra -o build/twice twice.c -lm 7 | clang -Wall -Wextra -o build/gates gates.c -lm 8 | clang -Wall -Wextra -o build/xor xor.c -lm 9 | -------------------------------------------------------------------------------- /gates.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | float sigmoidf(float x) 7 | { 8 | return 1.f / (1.f + expf(-x)); 9 | } 10 | 11 | typedef float sample[3]; 12 | 13 | // NAND-gate 14 | sample or_train[] = { 15 | {0, 0, 0}, 16 | {1, 0, 1}, 17 | {0, 1, 1}, 18 | {1, 1, 1}, 19 | }; 20 | 21 | sample and_train[] = { 22 | {0, 0, 0}, 23 | {1, 0, 0}, 24 | {0, 1, 0}, 25 | {1, 1, 1}, 26 | }; 27 | 28 | sample nand_train[] = { 29 | {0, 0, 1}, 30 | {1, 0, 1}, 31 | {0, 1, 1}, 32 | {1, 1, 0}, 33 | }; 34 | 35 | sample xor_train[] = { 36 | {0, 0, 0}, 37 | {1, 0, 1}, 38 | {0, 1, 1}, 39 | {1, 1, 0}, 40 | }; 41 | 42 | sample *train = and_train; 43 | size_t train_count = 4; 44 | 45 | float cost(float w1, float w2, float b) 46 | { 47 | float result = 0.0f; 48 | for (size_t i = 0; i < train_count; ++i) { 49 | float x1 = train[i][0]; 50 | float x2 = train[i][1]; 51 | float y = sigmoidf(x1*w1 + x2*w2 + b); 52 | float d = y - train[i][2]; 53 | result += d*d; 54 | } 55 | result /= train_count; 56 | return result; 57 | } 58 | 59 | void dcost(float eps, 60 | float w1, float w2, float b, 61 | float *dw1, float *dw2, float *db) 62 | { 63 | float c = cost(w1, w2, b); 64 | *dw1 = (cost(w1 + eps, w2, b) - c)/eps; 65 | *dw2 = (cost(w1, w2 + eps, b) - c)/eps; 66 | *db = (cost(w1, w2, b + eps) - c)/eps; 67 | } 68 | 69 | 70 | void gcost(float w1, float w2, float b, 71 | float *dw1, float *dw2, float *db) 72 | { 73 | *dw1 = 0; 74 | *dw2 = 0; 75 | *db = 0; 76 | size_t n = train_count; 77 | for (size_t i = 0; i < n; ++i) { 78 | float xi = train[i][0]; 79 | float yi = train[i][1]; 80 | float zi = train[i][2]; 81 | float ai = sigmoidf(xi*w1 + yi*w2 + b); 82 | float di = 2*(ai - zi)*ai*(1 - ai); 83 | *dw1 += di*xi; 84 | *dw2 += di*yi; 85 | *db += di; 86 | } 87 | *dw1 /= n; 88 | *dw2 /= n; 89 | *db /= n; 90 | } 91 | 92 | float rand_float(void) 93 | { 94 | return (float) rand()/ (float) RAND_MAX; 95 | } 96 | 97 | int main2(void) 98 | { 99 | // (x|y) & ~(x&y) 100 | for (size_t x = 0; x < 2; ++x) { 101 | for (size_t y = 0; y < 2; ++y) { 102 | printf("%zu ^ %zu = %zu\n", x, y, (x|y) & (~(x&y))); 103 | } 104 | } 105 | return 0; 106 | } 107 | 108 | int main(void) 109 | { 110 | srand(time(0)); 111 | float w1 = rand_float(); 112 | float w2 = rand_float(); 113 | float b = rand_float(); 114 | 115 | float rate = 1e-1; 116 | 117 | for (size_t i = 0; i < 10*1000; ++i) { 118 | float c = cost(w1, w2, b); 119 | printf("c = %f, w1 = %f, w2 = %f, b = %f\n", c, w1, w2, b); 120 | 121 | float dw1, dw2, db; 122 | #if 1 123 | float eps = 1e-1; 124 | dcost(eps, w1, w2, b, &dw1, &dw2, &db); 125 | #else 126 | gcost(w1, w2, b, &dw1, &dw2, &db); 127 | #endif 128 | w1 -= rate*dw1; 129 | w2 -= rate*dw2; 130 | b -= rate*db; 131 | } 132 | printf("c = %f, w1 = %f, w2 = %f, b = %f\n", cost(w1, w2, b), w1, w2, b); 133 | 134 | for (size_t i = 0; i < 2; ++i) { 135 | for (size_t j = 0; j < 2; ++j) { 136 | printf("%zu | %zu = %f\n", i, j, sigmoidf(i*w1 + j*w2 + b)); 137 | } 138 | } 139 | 140 | return 0; 141 | } 142 | -------------------------------------------------------------------------------- /papers/grad.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tsoding/ml-notes/fd64cea05648e1480b6f9556a887237def0c6300/papers/grad.pdf -------------------------------------------------------------------------------- /papers/grad.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \usepackage{amsmath} 4 | \usepackage{tikz} 5 | 6 | \begin{document} 7 | \section{Gradient Descent} 8 | 9 | If we keep decreasing the $\epsilon$ in our Finite Difference approach we effectively get the Derivative of the Cost Function. 10 | 11 | \begin{align} 12 | C'(w) = \lim_{\epsilon \to 0}\frac{C(w + \epsilon) - C(w)}{\epsilon} 13 | \end{align} 14 | 15 | Let's compute the derivatives of all our models. Throughout the entire paper $n$ means the amount of samples in the training set. 16 | 17 | \subsection{Linear Model} 18 | 19 | \def\d{2.0} 20 | 21 | \begin{center} 22 | \begin{tikzpicture} 23 | \node (X) at ({-\d*0.75}, 0) {$x$}; 24 | \node[shape=circle,draw=black] (N) at (0, 0) {$w$}; 25 | \node (Y) at ({\d*0.75}, 0) {$y$}; 26 | \path[->] (X) edge (N); 27 | \path[->] (N) edge (Y); 28 | \end{tikzpicture} 29 | \end{center} 30 | 31 | \begin{align} 32 | y &= x \cdot w 33 | \end{align} 34 | 35 | \subsubsection{Cost} 36 | 37 | \begin{align} 38 | C(w) &= \frac{1}{n}\sum_{i=1}^{n}(x_iw - y_i)^2 \\ 39 | C'(w) 40 | &= \left(\frac{1}{n}\sum_{i=1}^{n}(x_iw - y_i)^2\right)' = \\ 41 | &= \frac{1}{n}\left(\sum_{i=1}^{n}(x_iw - y_i)^2\right)' \\ 42 | &= \frac{1}{n}\sum_{i=1}^{n}\left((x_iw - y_i)^2\right)' \\ 43 | &= \frac{1}{n}\sum_{i=1}^{n}2(x_iw - y_i)x_i 44 | \end{align} 45 | 46 | \subsection{One Neuron Model with 2 inputs} 47 | 48 | \begin{center} 49 | \begin{tikzpicture} 50 | \node (X) at (-\d, 1) {$x$}; 51 | \node (Y) at (-\d, -1) {$y$}; 52 | \node[shape=circle,draw=black] (N) at (0, 0) {$\sigma, b$}; 53 | \node (Z) at (\d, 0) {$z$}; 54 | \path[->] (X) edge node[above] {$w_1$} (N); 55 | \path[->] (Y) edge node[above] {$w_2$} (N); 56 | \path[->] (N) edge (Z); 57 | \end{tikzpicture} 58 | \end{center} 59 | \begin{align} 60 | z &= \sigma(xw_1 + yw_2 + b) \\ 61 | \sigma(x) &= \frac{1}{1 + e^{-x}} \\ 62 | \sigma'(x) &= \sigma(x)(1 - \sigma(x)) 63 | \end{align} 64 | 65 | \subsubsection{Cost} 66 | 67 | \def\pd[#1]{\partial_{#1}} 68 | \def\avgsum[#1,#2]{\frac{1}{#2}\sum_{#1=1}^{#2}} 69 | \begin{align} 70 | a_i &= \sigma(x_iw_1 + y_iw_2 + b) \\ 71 | \pd[w_1]a_i 72 | &= \pd[w_1](\sigma(x_iw_1 + y_iw_2 + b)) = \\ 73 | &= a_i(1 - a_i)\pd[w_1](x_iw_1 + y_iw_2 + b) = \\ 74 | &= a_i(1 - a_i)x_i \\ 75 | \pd[w_2]a_i &= a_i(1 - a_i)y_i \\ 76 | \pd[b]a_i &= a_i(1 - a_i) \\ 77 | C &= \avgsum[i, n](a_i - z_i)^2 \\ 78 | \pd[w_1] C 79 | &= \avgsum[i, n]\pd[w_1]\left((a_i - z_i)^2\right) = \\ 80 | &= \avgsum[i, n]2(a_i - z_i)\pd[w_1]a_i = \\ 81 | &= \avgsum[i, n]2(a_i - z_i)a_i(1 - a_i)x_i \\ 82 | \pd[w_2] C &= \avgsum[i, n]2(a_i - z_i)a_i(1 - a_i)y_i \\ 83 | \pd[b] C &= \avgsum[i, n]2(a_i - z_i)a_i(1 - a_i) 84 | \end{align} 85 | 86 | \subsection{Two Neurons Model with 1 input} 87 | 88 | \begin{center} 89 | \begin{tikzpicture} 90 | \node (X) at (-\d, 0) {$x$}; 91 | \node[shape=circle,draw=black] (N1) at (0, 0) {$\sigma, b^{(1)}$}; 92 | \node[shape=circle,draw=black] (N2) at (\d, 0) {$\sigma, b^{(2)}$}; 93 | \node (Y) at ({2*\d}, 0) {$y$}; 94 | \path[->] (X) edge node[above] {$w^{(1)}$} (N1); 95 | \path[->] (N1) edge node[above] {$w^{(2)}$} (N2); 96 | \path[->] (N2) edge (Y); 97 | \end{tikzpicture} 98 | \end{center} 99 | 100 | \begin{align} 101 | a^{(1)} &= \sigma(xw^{(1)} + b^{(1)}) \\ 102 | y &= \sigma(a^{(1)}w^{(2)} + b^{(2)}) 103 | \end{align} 104 | 105 | The superscript in parenthesis denotes the current layer. For example $a_i^{(l)}$ denotes the activation from the $l$-th layer on $i$-th sample. 106 | 107 | \subsubsection{Feed-Forward} 108 | 109 | \begin{align} 110 | a_i^{(1)} &= \sigma(x_iw^{(1)} + b^{(1)}) \\ 111 | \pd[w^{(1)}]a_i^{(1)} &= a_i^{(1)}(1 - a_i^{(1)})x_i \\ 112 | \pd[b^{1}]a_i^{(1)} &= a_i^{(1)}(1 - a_i^{(1)}) \\ 113 | a_i^{(2)} &= \sigma(a_i^{(1)}w^{(2)} + b^{(2)}) \\ 114 | \pd[w^{(2)}]a_i^{(2)} &= a_i^{(2)}(1 - a_i^{(2)})a_i^{(1)} \\ 115 | \pd[b^{(2)}]a_i^{(2)} &= a_i^{(2)}(1 - a_i^{(2)}) \\ 116 | \pd[a_i^{(1)}]a_i^{(2)} &= a_i^{(2)}(1 - a_i^{(2)})w^{(2)} 117 | \end{align} 118 | 119 | \subsubsection{Back-Propagation} 120 | 121 | \begin{align} 122 | C^{(2)} &= \avgsum[i, n] (a_i^{(2)} - y_i)^2 \\ 123 | \pd[w^{(2)}] C^{(2)} 124 | &= \avgsum[i, n] \pd[w^{(2)}]((a_i^{(2)} - y_i)^2) = \\ 125 | &= \avgsum[i, n] 2(a_i^{(2)} - y_i)\pd[w^{(2)}]a_i^{(2)} = \\ 126 | &= \avgsum[i, n] 2(a_i^{(2)} - y_i)a_i^{(2)}(1 - a_i^{(2)})a_i^{(1)} \\ 127 | \pd[b^{(2)}] C^{(2)} &= \avgsum[i, n] 2(a_i^{(2)} - y_i)a_i^{(2)}(1 - a_i^{(2)}) \\ 128 | \pd[a_i^{(1)}]C^{(2)} &= \avgsum[i, n] 2(a_i^{(2)} - y_i)a_i^{(2)}(1 - a_i^{(2)})w^{(2)} \\ 129 | e_i &= a_i^{(1)} - \pd[a_i^{(1)}]C^{(2)} \\ 130 | C^{(1)} &= \avgsum[i, n] (a_i^{(1)} - e_i)^2 \\ 131 | \pd[w^{(1)}]C^{(1)} 132 | &= \pd[w^{(1)}]\left(\avgsum[i, n] (a_i^{(1)} - e_i)^2\right) =\\ 133 | &= \avgsum[i, n] \pd[w^{(1)}]\left((a_i^{(1)} - e_i)^2\right) =\\ 134 | &= \avgsum[i, n] 2(a_i^{(1)} - e_i)\pd[w^{(1)}]a_i^{(1)} =\\ 135 | &= \avgsum[i, n] 2(\pd[a_i^{(1)}]C^{(2)})a_i^{(1)}(1 - a_i^{(1)})x_i \\ 136 | \pd[b^{1}]C^{(1)} &= \avgsum[i, n] 2(\pd[a_i^{(1)}]C^{(2)})a_i^{(1)}(1 - a_i^{(1)}) 137 | \end{align} 138 | 139 | \subsection{Arbitrary Neurons Model with 1 input} 140 | 141 | Let's assume that we have $m$ layers. 142 | 143 | \subsubsection{Feed-Forward} 144 | 145 | Let's assume that $a_i^{(0)}$ is $x_i$. 146 | 147 | \begin{align} 148 | a_i^{(l)} &= \sigma(a_i^{(l-1)}w^{(l)} + b^{(l)}) \\ 149 | \pd[w^{(l)}]a_i^{(l)} &= a_i^{(l)}(1 - a_i^{(l)})a_i^{(l-1)} \\ 150 | \pd[b^{(l)}]a_i^{(l)} &= a_i^{(l)}(1 - a_i^{(l)}) \\ 151 | \pd[a_i^{(l-1)}]a_i^{(l)} &= a_i^{(l)}(1 - a_i^{(l)})w^{(l)} 152 | \end{align} 153 | 154 | \subsubsection{Back-Propagation} 155 | 156 | Let's denote $a_i^{(m)} - y_i$ as $\pd[a_i^{(m)}]C^{(m+1)}$. 157 | 158 | \begin{align} 159 | C^{(l)} &= \avgsum[i, n] (\pd[a_i^{(l)}]C^{(l+1)})^2 \\ 160 | \pd[w^{(l)}]C^{(l)} &= \avgsum[i, n] 2(\pd[a_i^{(l)}]C^{(l+1)})a_i^{(l)}(1 - a_i^{(l)})a_i^{(l-1)} =\\ 161 | \pd[b^{(l)}]C^{(l)} &= \avgsum[i, n] 2(\pd[a_i^{(l)}]C^{(l+1)})a_i^{(l)}(1 - a_i^{(l)}) \\ 162 | \pd[a_i^{(l-1)}]C^{(l)} &= \avgsum[i, n] 2(\pd[a_i^{(l)}]C^{(l+1)})a_i^{(l)}(1 - a_i^{(l)})w^{(l)} 163 | \end{align} 164 | 165 | \end{document} -------------------------------------------------------------------------------- /twice.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | float train[][2] = { 6 | {0, 0}, 7 | {1, 2}, 8 | {2, 4}, 9 | {3, 6}, 10 | {4, 8}, 11 | }; 12 | #define train_count (sizeof(train)/sizeof(train[0])) 13 | 14 | float rand_float(void) 15 | { 16 | return (float) rand()/ (float) RAND_MAX; 17 | } 18 | 19 | // x1, x2, x3, ..., b 20 | // w1, w2, w3, ... 21 | // y = x1*w1 + x2*w2 + x3*w3 + ... + b 22 | 23 | float cost(float w) 24 | { 25 | float result = 0.0f; 26 | size_t n = train_count; 27 | for (size_t i = 0; i < n; ++i) { 28 | float x = train[i][0]; 29 | float y = x*w; 30 | float d = y - train[i][1]; 31 | result += d*d; 32 | } 33 | result /= n; 34 | return result; 35 | } 36 | 37 | float dcost(float w) 38 | { 39 | float result = 0.0f; 40 | size_t n = train_count; 41 | for (size_t i = 0; i < n; ++i) { 42 | float x = train[i][0]; 43 | float y = train[i][1]; 44 | result += 2*(x*w - y)*x; 45 | } 46 | result /= n; 47 | return result; 48 | } 49 | 50 | int main() 51 | { 52 | // srand(time(0)); 53 | srand(69); 54 | float w = rand_float()*10.0f; 55 | 56 | float rate = 1e-1; 57 | 58 | printf("cost = %f, w = %f\n", cost(w), w); 59 | for (size_t i = 0; i < 50; ++i) { 60 | #if 0 61 | float eps = 1e-3; 62 | float c = cost(w); 63 | float dw = (cost(w + eps) - c)/eps;; 64 | #else 65 | float dw = dcost(w); 66 | #endif 67 | w -= rate*dw; 68 | printf("cost = %f, w = %f\n", cost(w), w); 69 | } 70 | 71 | printf("------------------------------\n"); 72 | printf("w = %f\n", w); 73 | 74 | return 0; 75 | } 76 | -------------------------------------------------------------------------------- /xor.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | typedef struct { 7 | float or_w1; 8 | float or_w2; 9 | float or_b; 10 | float nand_w1; 11 | float nand_w2; 12 | float nand_b; 13 | float and_w1; 14 | float and_w2; 15 | float and_b; 16 | } Xor; 17 | 18 | float sigmoidf(float x) 19 | { 20 | return 1.f / (1.f + expf(-x)); 21 | } 22 | 23 | float forward(Xor m, float x1, float x2) 24 | { 25 | float a = sigmoidf(m.or_w1*x1 + m.or_w2*x2 + m.or_b); 26 | float b = sigmoidf(m.nand_w1*x1 + m.nand_w2*x2 + m.nand_b); 27 | return sigmoidf(a*m.and_w1 + b*m.and_w2 + m.and_b); 28 | } 29 | 30 | typedef float sample[3]; 31 | sample xor_train[] = { 32 | {0, 0, 0}, 33 | {1, 0, 1}, 34 | {0, 1, 1}, 35 | {1, 1, 0}, 36 | }; 37 | 38 | // NAND-gate 39 | sample or_train[] = { 40 | {0, 0, 0}, 41 | {1, 0, 1}, 42 | {0, 1, 1}, 43 | {1, 1, 1}, 44 | }; 45 | 46 | sample and_train[] = { 47 | {0, 0, 0}, 48 | {1, 0, 0}, 49 | {0, 1, 0}, 50 | {1, 1, 1}, 51 | }; 52 | 53 | sample nand_train[] = { 54 | {0, 0, 1}, 55 | {1, 0, 1}, 56 | {0, 1, 1}, 57 | {1, 1, 0}, 58 | }; 59 | 60 | sample nor_train[] = { 61 | {0, 0, 1}, 62 | {1, 0, 0}, 63 | {0, 1, 0}, 64 | {1, 1, 0}, 65 | }; 66 | 67 | sample *train = xor_train; 68 | size_t train_count = 4; 69 | 70 | float cost(Xor m) 71 | { 72 | float result = 0.0f; 73 | for (size_t i = 0; i < train_count; ++i) { 74 | float x1 = train[i][0]; 75 | float x2 = train[i][1]; 76 | float y = forward(m, x1, x2); 77 | float d = y - train[i][2]; 78 | result += d*d; 79 | } 80 | result /= train_count; 81 | return result; 82 | } 83 | 84 | float rand_float(void) 85 | { 86 | return (float) rand()/ (float) RAND_MAX; 87 | } 88 | 89 | Xor rand_xor(void) 90 | { 91 | Xor m; 92 | m.or_w1 = rand_float(); 93 | m.or_w2 = rand_float(); 94 | m.or_b = rand_float(); 95 | m.nand_w1 = rand_float(); 96 | m.nand_w2 = rand_float(); 97 | m.nand_b = rand_float(); 98 | m.and_w1 = rand_float(); 99 | m.and_w2 = rand_float(); 100 | m.and_b = rand_float(); 101 | return m; 102 | } 103 | 104 | void print_xor(Xor m) 105 | { 106 | printf("or_w1 = %f\n", m.or_w1); 107 | printf("or_w2 = %f\n", m.or_w2); 108 | printf("or_b = %f\n", m.or_b); 109 | printf("nand_w1 = %f\n", m.nand_w1); 110 | printf("nand_w2 = %f\n", m.nand_w2); 111 | printf("nand_b = %f\n", m.nand_b); 112 | printf("and_w1 = %f\n", m.and_w1); 113 | printf("and_w2 = %f\n", m.and_w2); 114 | printf("and_b = %f\n", m.and_b); 115 | } 116 | 117 | Xor learn(Xor m, Xor g, float rate) 118 | { 119 | m.or_w1 -= rate*g.or_w1; 120 | m.or_w2 -= rate*g.or_w2; 121 | m.or_b -= rate*g.or_b; 122 | m.nand_w1 -= rate*g.nand_w1; 123 | m.nand_w2 -= rate*g.nand_w2; 124 | m.nand_b -= rate*g.nand_b; 125 | m.and_w1 -= rate*g.and_w1; 126 | m.and_w2 -= rate*g.and_w2; 127 | m.and_b -= rate*g.and_b; 128 | return m; 129 | } 130 | 131 | Xor finite_diff(Xor m, float eps) 132 | { 133 | Xor g; 134 | float c = cost(m); 135 | float saved; 136 | 137 | saved = m.or_w1; 138 | m.or_w1 += eps; 139 | g.or_w1 = (cost(m) - c)/eps; 140 | m.or_w1 = saved; 141 | 142 | saved = m.or_w2; 143 | m.or_w2 += eps; 144 | g.or_w2 = (cost(m) - c)/eps; 145 | m.or_w2 = saved; 146 | 147 | saved = m.or_b; 148 | m.or_b += eps; 149 | g.or_b = (cost(m) - c)/eps; 150 | m.or_b = saved; 151 | 152 | saved = m.nand_w1; 153 | m.nand_w1 += eps; 154 | g.nand_w1 = (cost(m) - c)/eps; 155 | m.nand_w1 = saved; 156 | 157 | saved = m.nand_w2; 158 | m.nand_w2 += eps; 159 | g.nand_w2 = (cost(m) - c)/eps; 160 | m.nand_w2 = saved; 161 | 162 | saved = m.nand_b; 163 | m.nand_b += eps; 164 | g.nand_b = (cost(m) - c)/eps; 165 | m.nand_b = saved; 166 | 167 | saved = m.and_w1; 168 | m.and_w1 += eps; 169 | g.and_w1 = (cost(m) - c)/eps; 170 | m.and_w1 = saved; 171 | 172 | saved = m.and_w2; 173 | m.and_w2 += eps; 174 | g.and_w2 = (cost(m) - c)/eps; 175 | m.and_w2 = saved; 176 | 177 | saved = m.and_b; 178 | m.and_b += eps; 179 | g.and_b = (cost(m) - c)/eps; 180 | m.and_b = saved; 181 | 182 | return g; 183 | } 184 | 185 | int main(void) 186 | { 187 | srand(time(0)); 188 | Xor m = rand_xor(); 189 | 190 | float eps = 1e-1; 191 | float rate = 1e-1; 192 | 193 | for (size_t i = 0; i < 100*1000; ++i) { 194 | Xor g = finite_diff(m, eps); 195 | m = learn(m, g, rate); 196 | // printf("cost = %f\n", cost(m)); 197 | } 198 | printf("cost = %f\n", cost(m)); 199 | 200 | printf("------------------------------\n"); 201 | for (size_t i = 0; i < 2; ++i) { 202 | for (size_t j = 0; j < 2; ++j) { 203 | printf("%zu ^ %zu = %f\n", i, j, forward(m, i, j)); 204 | } 205 | } 206 | printf("------------------------------\n"); 207 | printf("\"OR\" neuron:\n"); 208 | for (size_t i = 0; i < 2; ++i) { 209 | for (size_t j = 0; j < 2; ++j) { 210 | printf("%zu | %zu = %f\n", i, j, sigmoidf(m.or_w1*i + m.or_w2*j + m.or_b)); 211 | } 212 | } 213 | printf("------------------------------\n"); 214 | printf("\"NAND\" neuron:\n"); 215 | for (size_t i = 0; i < 2; ++i) { 216 | for (size_t j = 0; j < 2; ++j) { 217 | printf("~(%zu & %zu) = %f\n", i, j, sigmoidf(m.nand_w1*i + m.nand_w2*j + m.nand_b)); 218 | } 219 | } 220 | printf("------------------------------\n"); 221 | printf("\"AND\" neuron:\n"); 222 | for (size_t i = 0; i < 2; ++i) { 223 | for (size_t j = 0; j < 2; ++j) { 224 | printf("%zu & %zu = %f\n", i, j, sigmoidf(m.and_w1*i + m.and_w2*j + m.and_b)); 225 | } 226 | } 227 | return 0; 228 | } 229 | --------------------------------------------------------------------------------