├── .github
    └── FUNDING.yml
├── .gitignore
├── LICENSE
├── NanoNeuron.js
├── README.md
├── README.pt-BR.md
├── README.ru-RU.md
└── assets
    ├── 00-nano-neuron.png
    ├── 01_celsius_to_fahrenheit.png
    ├── 02_cost_function.png
    ├── 03_average_cost_function.png
    ├── 04_db.png
    ├── 04_dw.png
    ├── 05_b.png
    ├── 05_w.png
    ├── 06-training-process.png
    └── 07-converter.png


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # @see: https://docs.github.com/en/github/administering-a-repository/displaying-a-sponsor-button-in-your-repository
2 | github: trekhleb
3 | patreon: trekhleb
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Oleksii Trekhleb
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NanoNeuron.js:
--------------------------------------------------------------------------------
  1 | // NanoNeuron model.
  2 | // It implements basic linear dependency between 'x' and 'y': y = w * x + b.
  3 | // Simply saying our NanoNeuron is a "kid" that can draw the straight line in XY coordinates.
  4 | // w, b - parameters of the model.
  5 | function NanoNeuron(w, b) {
  6 |   // NanoNeuron knows only about these two parameters of linear function.
  7 |   // These parameters are something that NanoNeuron is going to "learn" during the training process.
  8 |   this.w = w;
  9 |   this.b = b;
 10 |   // This is the only thing that NanoNeuron can do - imitate linear dependency.
 11 |   // It accepts some input 'x' and predicts the output 'y'. No magic here.
 12 |   this.predict = (x) => {
 13 |     return x * this.w + this.b;
 14 |   }
 15 | }
 16 | 
 17 | // Convert Celsius values to Fahrenheit using formula: f = 1.8 * c + 32.
 18 | // Ultimately we want to teach our NanoNeuron to imitate this function (to learn
 19 | // that w = 1.8 and b = 32) without knowing these parameters in advance.
 20 | // c - temperature in Celsius
 21 | // f - calculated temperature in Fahrenheit
 22 | function celsiusToFahrenheit(c) {
 23 |   const w = 1.8;
 24 |   const b = 32;
 25 |   const f = c * w + b;
 26 |   return f;
 27 | };
 28 | 
 29 | // Generate training and test data-sets based on celsiusToFahrenheit function.
 30 | // Data-sets consist of pairs of input values and correctly labeled output values.
 31 | // In real life in most of the cases this data would be rather collected than generated.
 32 | // For example we might have a set of images of hand-drawn numbers and corresponding set
 33 | // of numbers that explain what number is written on each picture.  
 34 | function generateDataSets() {
 35 |   // Generate TRAINING examples.
 36 |   // We will use this data to train our NanoNeuron.
 37 |   // Before our NanoNeuron will grow and will be able to make decisions by its own
 38 |   // we need to teach it what is right and what is wrong using training examples.
 39 |   // xTrain -> [0, 1, 2, ...],
 40 |   // yTrain -> [32, 33.8, 35.6, ...]
 41 |   const xTrain = [];
 42 |   const yTrain = [];
 43 |   for (let x = 0; x < 100; x += 1) {
 44 |     const y = celsiusToFahrenheit(x);
 45 |     xTrain.push(x);
 46 |     yTrain.push(y);
 47 |   }
 48 | 
 49 |   // Generate TEST examples.
 50 |   // This data will be used to evaluate how well our NanoNeuron performs on the data
 51 |   // that it didn't see during the training. This is the point where we could
 52 |   // see that our "kid" has grown and can make decisions on its own.
 53 |   // xTest -> [0.5, 1.5, 2.5, ...]
 54 |   // yTest -> [32.9, 34.7, 36.5, ...]
 55 |   const xTest = [];
 56 |   const yTest = [];
 57 |   // By starting from 0.5 and using the same step of 1 as we have used for training set
 58 |   // we make sure that test set has different data comparing to training set.
 59 |   for (let x = 0.5; x < 100; x += 1) {
 60 |     const y = celsiusToFahrenheit(x);
 61 |     xTest.push(x);
 62 |     yTest.push(y);
 63 |   }
 64 | 
 65 |   return [xTrain, yTrain, xTest, yTest];
 66 | }
 67 | 
 68 | // Calculate the cost (the mistake) between the correct output value of 'y' and 'prediction' that NanoNeuron made.
 69 | function predictionCost(y, prediction) {
 70 |   // This is a simple difference between two values.
 71 |   // The closer the values to each other - the smaller the difference.
 72 |   // We're using power of 2 here just to get rid of negative numbers
 73 |   // so that (1 - 2) ^ 2 would be the same as (2 - 1) ^ 2.
 74 |   // Division by 2 is happening just to simplify further backward propagation formula (see below).
 75 |   return (y - prediction) ** 2 / 2; // i.e. -> 235.6
 76 | }
 77 | 
 78 | // Forward propagation.
 79 | // This function takes all examples from training sets xTrain and yTrain and calculates
 80 | // model predictions for each example from xTrain.
 81 | // Along the way it also calculates the prediction cost (average error our NanoNeuron made while predicting).
 82 | function forwardPropagation(model, xTrain, yTrain) {
 83 |   const m = xTrain.length;
 84 |   const predictions = [];
 85 |   let cost = 0;
 86 |   for (let i = 0; i < m; i += 1) {
 87 |     const prediction = nanoNeuron.predict(xTrain[i]);
 88 |     cost += predictionCost(yTrain[i], prediction);
 89 |     predictions.push(prediction);
 90 |   }
 91 |   // We are interested in average cost. 
 92 |   cost /= m;
 93 |   return [predictions, cost];
 94 | }
 95 | 
 96 | // Backward propagation.
 97 | // This is the place where machine learning looks like a magic.
 98 | // The key concept here is derivative which shows what step to take to get closer
 99 | // to the function minimum. Remember, finding the minimum of a cost function is the
100 | // ultimate goal of training process. The cost function looks like this:
101 | // (y - prediction) ^ 2 * 1/2, where prediction = x * w + b.
102 | function backwardPropagation(predictions, xTrain, yTrain) {
103 |   const m = xTrain.length;
104 |   // At the beginning we don't know in which way our parameters 'w' and 'b' need to be changed.
105 |   // Therefore we're setting up the changing steps for each parameters to 0.
106 |   let dW = 0;
107 |   let dB = 0;
108 |   for (let i = 0; i < m; i += 1) {
109 |     // This is derivative of the cost function by 'w' param.
110 |     // It will show in which direction (positive/negative sign of 'dW') and
111 |     // how fast (the absolute value of 'dW') the 'w' param needs to be changed.
112 |     dW += (yTrain[i] - predictions[i]) * xTrain[i];
113 |     // This is derivative of the cost function by 'b' param.
114 |     // It will show in which direction (positive/negative sign of 'dB') and
115 |     // how fast (the absolute value of 'dB') the 'b' param needs to be changed.
116 |     dB += yTrain[i] - predictions[i];
117 |   }
118 |   // We're interested in average deltas for each params.
119 |   dW /= m;
120 |   dB /= m;
121 |   return [dW, dB];
122 | }
123 | 
124 | // Train the model.
125 | // This is like a "teacher" for our NanoNeuron model:
126 | // - it will spend some time (epochs) with our yet stupid NanoNeuron model and try to train/teach it, 
127 | // - it will use specific "books" (xTrain and yTrain data-sets) for training,
128 | // - it will push our kid to learn harder (faster) by using a learning rate parameter 'alpha'
129 | //   (the harder the push the faster our "nano-kid" will learn but if the teacher will push too hard 
130 | //    the "kid" will have a nervous breakdown and won't be able to learn anything).
131 | function trainModel({model, epochs, alpha, xTrain, yTrain}) {
132 |   // The is the history array of how NanoNeuron learns.
133 |   // It might have a good or bad "marks" (costs) during the learning process.
134 |   const costHistory = [];
135 | 
136 |   // Let's start counting epochs.
137 |   for (let epoch = 0; epoch < epochs; epoch += 1) {
138 |     // Forward propagation for all training examples.
139 |     // Let's save the cost for current iteration.
140 |     // This will help us to analyse how our model learns.
141 |     const [predictions, cost] = forwardPropagation(model, xTrain, yTrain);
142 |     costHistory.push(cost);
143 |   
144 |     // Backward propagation. Let's learn some lessons from the mistakes.
145 |     // This function returns smalls steps we need to take for params 'w' and 'b'
146 |     // to make predictions more accurate.
147 |     const [dW, dB] = backwardPropagation(predictions, xTrain, yTrain);
148 |   
149 |     // Adjust our NanoNeuron parameters to increase accuracy of our model predictions.
150 |     nanoNeuron.w += alpha * dW;
151 |     nanoNeuron.b += alpha * dB;
152 |   }
153 | 
154 |   // Let's return cost history from the function to be able to log or to plot it after training.
155 |   return costHistory;
156 | }
157 | 
158 | // ===========================================================================================
159 | // Now let's use the functions we have created above.
160 | 
161 | // Let's create our NanoNeuron model instance.
162 | // At this moment NanoNeuron doesn't know what values should be set for parameters 'w' and 'b'.
163 | // So let's set up 'w' and 'b' randomly.
164 | const w = Math.random(); // i.e. -> 0.9492
165 | const b = Math.random(); // i.e. -> 0.4570
166 | const nanoNeuron = new NanoNeuron(w, b);
167 | 
168 | // Generate training and test data-sets.
169 | const [xTrain, yTrain, xTest, yTest] = generateDataSets();
170 | 
171 | // Let's train the model with small (0.0005) steps during the 70000 epochs.
172 | // You can play with these parameters, they are being defined empirically.
173 | const epochs = 70000;
174 | const alpha = 0.0005;
175 | const trainingCostHistory = trainModel({model: nanoNeuron, epochs, alpha, xTrain, yTrain});
176 | 
177 | // Let's check how the cost function was changing during the training.
178 | // We're expecting that the cost after the training should be much lower than before.
179 | // This would mean that NanoNeuron got smarter. The opposite is also possible. 
180 | console.log('Cost before the training:', trainingCostHistory[0]); // i.e. -> 4694.3335043
181 | console.log('Cost after the training:', trainingCostHistory[epochs - 1]); // i.e. -> 0.0000024
182 | 
183 | // Let's take a look at NanoNeuron parameters to see what it has learned.
184 | // We expect that NanoNeuron parameters 'w' and 'b' to be similar to ones we have in
185 | // celsiusToFahrenheit() function (w = 1.8 and b = 32) since our NanoNeuron tried to imitate it.
186 | console.log('NanoNeuron parameters:', {w: nanoNeuron.w, b: nanoNeuron.b}); // i.e. -> {w: 1.8, b: 31.99}
187 | 
188 | // Evaluate our model accuracy for test data-set to see how well our NanoNeuron deals with new unknown data predictions.
189 | // The cost of predictions on test sets is expected to be be close to the training cost.
190 | // This would mean that NanoNeuron performs well on known and unknown data.
191 | [testPredictions, testCost] = forwardPropagation(nanoNeuron, xTest, yTest);
192 | console.log('Cost on new testing data:', testCost); // i.e. -> 0.0000023
193 | 
194 | // Now, since we see that our NanoNeuron "kid" has performed well in the "school" during the training
195 | // and that he can convert Celsius to Fahrenheit temperatures correctly even for the data it hasn't seen
196 | // we can call it "smart" and ask him some questions. This was the ultimate goal of whole training process.
197 | const tempInCelsius = 70;
198 | const customPrediction = nanoNeuron.predict(tempInCelsius);
199 | console.log(`NanoNeuron "thinks" that ${tempInCelsius}°C in Fahrenheit is:`, customPrediction); // -> 158.0002
200 | console.log('Correct answer is:', celsiusToFahrenheit(tempInCelsius)); // -> 158
201 | 
202 | // So close! As all the humans our NanoNeuron is good but not ideal :)
203 | // Happy learning to you!
204 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # NanoNeuron
  2 | 
  3 | > 7 simple JavaScript functions that will give you a feeling of how machines can actually "learn".
  4 | 
  5 | _In other languages: [Русский](README.ru-RU.md), [Português](README.pt-BR.md)_
  6 | 
  7 | > _You might also be interested in 🤖 [Interactive Machine Learning Experiments](https://github.com/trekhleb/machine-learning-experiments)_
  8 | 
  9 | ## TL;DR
 10 | 
 11 | [NanoNeuron](https://github.com/trekhleb/nano-neuron) is an _over-simplified_ version of the Neuron concept from Neural Networks. NanoNeuron is trained to convert temperature values from Celsius to Fahrenheit.
 12 | 
 13 | The [NanoNeuron.js](https://github.com/trekhleb/nano-neuron/blob/master/NanoNeuron.js) code example contains 7 simple JavaScript functions (which touches on model prediction, cost calculation, forward/backwards propagation, and training) that will give you a feeling of how machines can actually "learn". No 3rd-party libraries, no external data-sets or dependencies, only pure and simple JavaScript functions.
 14 | 
 15 | ☝🏻These functions are **NOT**, by any means, a complete guide to machine learning. A lot of machine learning concepts are skipped and over-simplified! This simplification is done on purpose to give the reader a really **basic** understanding and feeling of how machines can learn and ultimately to make it possible for the reader to recognize that it's not "machine learning MAGIC" but rather "machine learning MATH" 🤓.
 16 | 
 17 | ## What our NanoNeuron will learn
 18 | 
 19 | You've probably heard about Neurons in the context of [Neural Networks](https://en.wikipedia.org/wiki/Neural_network). NanoNeuron is just that but simpler and we're going to implement it from scratch. For simplicity reasons we're not even going to build a network on NanoNeurons. We will have it all working on its own, doing some magical predictions for us. Namely, we will teach this singular NanoNeuron to convert (predict) the temperature from Celsius to Fahrenheit.
 20 | 
 21 | By the way, the formula for converting Celsius to Fahrenheit is this:
 22 | 
 23 | ![Celsius to Fahrenheit](https://github.com/trekhleb/nano-neuron/blob/master/assets/01_celsius_to_fahrenheit.png?raw=true)
 24 | 
 25 | But for now our NanoNeuron doesn't know about it...
 26 | 
 27 | ### The NanoNeuron model
 28 | 
 29 | Let's implement our NanoNeuron model function. It implements basic linear dependency between `x` and `y` which looks like `y = w * x + b`. Simply saying our NanoNeuron is a "kid" in a "school" that is being taught to draw the straight line in `XY` coordinates.
 30 | 
 31 | Variables `w`, `b` are parameters of the model. NanoNeuron knows only about these two parameters of the linear function.
 32 | These parameters are something that NanoNeuron is going to "learn" during the training process.
 33 | 
 34 | The only thing that NanoNeuron can do is to imitate linear dependency. In its `predict()` method it accepts some input `x` and predicts the output `y`. No magic here.
 35 | 
 36 | ```javascript
 37 | function NanoNeuron(w, b) {
 38 |   this.w = w;
 39 |   this.b = b;
 40 |   this.predict = (x) => {
 41 |     return x * this.w + this.b;
 42 |   }
 43 | }
 44 | ```
 45 | 
 46 | _(...wait... [linear regression](https://en.wikipedia.org/wiki/Linear_regression) is it you?)_  🧐
 47 | 
 48 | ### Celsius to Fahrenheit conversion
 49 | 
 50 | The temperature value in Celsius can be converted to Fahrenheit using the following formula: `f = 1.8 * c + 32`, where `c` is a temperature in Celsius and `f` is the calculated temperature in Fahrenheit.
 51 | 
 52 | ```javascript
 53 | function celsiusToFahrenheit(c) {
 54 |   const w = 1.8;
 55 |   const b = 32;
 56 |   const f = c * w + b;
 57 |   return f;
 58 | };
 59 | ```
 60 | 
 61 | Ultimately we want to teach our NanoNeuron to imitate this function (to learn that `w = 1.8` and `b = 32`) without knowing these parameters in advance.
 62 | 
 63 | This is how the Celsius to Fahrenheit conversion function looks like:
 64 | 
 65 | ![Celsius to Fahrenheit conversion](https://github.com/trekhleb/nano-neuron/blob/master/assets/07-converter.png?raw=true)
 66 | 
 67 | ### Generating data-sets
 68 | 
 69 | Before the training we need to generate **training** and **test data-sets** based on the `celsiusToFahrenheit()` function. Data-sets consist of pairs of input values and correctly labeled output values.
 70 | 
 71 | > In real life, in most of cases, this data would be collected rather than generated. For example, we might have a set of images of hand-drawn numbers and the corresponding set of numbers that explains what number is written on each picture.
 72 | 
 73 | We will use TRAINING example data to train our NanoNeuron. Before our NanoNeuron will grow and be able to make decisions on its own, we need to teach it what is right and what is wrong using training examples.
 74 | 
 75 | We will use TEST examples to evaluate how well our NanoNeuron performs on the data that it didn't see during the training. This is the point where we could see that our "kid" has grown and can make decisions on its own.
 76 | 
 77 | ```javascript
 78 | function generateDataSets() {
 79 |   // xTrain -> [0, 1, 2, ...],
 80 |   // yTrain -> [32, 33.8, 35.6, ...]
 81 |   const xTrain = [];
 82 |   const yTrain = [];
 83 |   for (let x = 0; x < 100; x += 1) {
 84 |     const y = celsiusToFahrenheit(x);
 85 |     xTrain.push(x);
 86 |     yTrain.push(y);
 87 |   }
 88 | 
 89 |   // xTest -> [0.5, 1.5, 2.5, ...]
 90 |   // yTest -> [32.9, 34.7, 36.5, ...]
 91 |   const xTest = [];
 92 |   const yTest = [];
 93 |   // By starting from 0.5 and using the same step of 1 as we have used for training set
 94 |   // we make sure that test set has different data comparing to training set.
 95 |   for (let x = 0.5; x < 100; x += 1) {
 96 |     const y = celsiusToFahrenheit(x);
 97 |     xTest.push(x);
 98 |     yTest.push(y);
 99 |   }
100 | 
101 |   return [xTrain, yTrain, xTest, yTest];
102 | }
103 | ```
104 | 
105 | ### The cost (the error) of prediction
106 | 
107 | We need to have some metric that will show us how close our model's prediction is to correct values. The calculation of the cost (the mistake) between the correct output value of `y` and `prediction`, that our NanoNeuron created, will be made using the following formula:
108 | 
109 | ![Prediction Cost](https://github.com/trekhleb/nano-neuron/blob/master/assets/02_cost_function.png?raw=true)
110 | 
111 | This is a simple difference between two values. The closer the values are to each other, the smaller the difference. We're using a power of `2` here just to get rid of negative numbers so that `(1 - 2) ^ 2` would be the same as `(2 - 1) ^ 2`. Division by `2` is happening just to simplify further the backward propagation formula (see below).
112 | 
113 | The cost function in this case will be as simple as:
114 | 
115 | ```javascript
116 | function predictionCost(y, prediction) {
117 |   return (y - prediction) ** 2 / 2; // i.e. -> 235.6
118 | }
119 | ```
120 | 
121 | ### Forward propagation
122 | 
123 | To do forward propagation means to do a prediction for all training examples from `xTrain` and `yTrain` data-sets and to calculate the average cost of those predictions along the way.
124 | 
125 | We just let our NanoNeuron say its opinion, at this point, by just allowing it to guess how to convert the temperature. It might be stupidly wrong here. The average cost will show us how wrong our model is right now. This cost value is really important since changing the NanoNeuron parameters `w` and `b`, and by doing the forward propagation again; we will be able to evaluate if our NanoNeuron became smarter or not after these parameters change.
126 | 
127 | The average cost will be calculated using the following formula:
128 | 
129 | ![Average Cost](https://github.com/trekhleb/nano-neuron/blob/master/assets/03_average_cost_function.png?raw=true)
130 | 
131 | Where `m` is a number of training examples (in our case: `100`).
132 | 
133 | Here is how we may implement it in code:
134 | 
135 | ```javascript
136 | function forwardPropagation(model, xTrain, yTrain) {
137 |   const m = xTrain.length;
138 |   const predictions = [];
139 |   let cost = 0;
140 |   for (let i = 0; i < m; i += 1) {
141 |     const prediction = nanoNeuron.predict(xTrain[i]);
142 |     cost += predictionCost(yTrain[i], prediction);
143 |     predictions.push(prediction);
144 |   }
145 |   // We are interested in average cost.
146 |   cost /= m;
147 |   return [predictions, cost];
148 | }
149 | ```
150 | 
151 | ### Backward propagation
152 | 
153 | When we know how right or wrong our NanoNeuron's predictions are (based on average cost at this point) what should we do to make the predictions more precise?
154 | 
155 | The backward propagation gives us the answer to this question. Backward propagation is the process of evaluating the cost of prediction and adjusting the NanoNeuron's parameters `w` and `b` so that next and future predictions would be more precise.
156 | 
157 | This is the place where machine learning looks like magic 🧞‍♂️. The key concept here is the **derivative** which shows what step to take to get closer to the cost function minimum.
158 | 
159 | Remember, finding the minimum of a cost function is the ultimate goal of the training process. If we find such values for `w` and `b` such that our average cost function will be small, it would mean that the NanoNeuron model does really good and precise predictions.
160 | 
161 | Derivatives are a big and separate topic that we will not cover in this article. [MathIsFun](https://www.mathsisfun.com/calculus/derivatives-introduction.html) is a good resource to get a basic understanding of it.
162 | 
163 | One thing about derivatives that will help you to understand how backward propagation works is that the derivative, by its meaning, is a tangent line to the function curve that points toward the direction of the function minimum.
164 | 
165 | ![Derivative slope](https://www.mathsisfun.com/calculus/images/slope-x2-2.svg)
166 | 
167 | _Image source: [MathIsFun](https://www.mathsisfun.com/calculus/derivatives-introduction.html)_
168 | 
169 | For example, on the plot above, you can see that if we're at the point of `(x=2, y=4)` then the slope tells us to go `left` and `down` to get to the function minimum. Also notice that the bigger the slope, the faster we should move to the minimum.
170 | 
171 | The derivatives of our `averageCost` function for parameters `w` and `b` looks like this:
172 | 
173 | ![dW](https://github.com/trekhleb/nano-neuron/blob/master/assets/04_dw.png?raw=true)
174 | 
175 | ![dB](https://github.com/trekhleb/nano-neuron/blob/master/assets/04_db.png?raw=true)
176 | 
177 | Where `m` is a number of training examples (in our case: `100`).
178 | 
179 | _You may read more about derivative rules and how to get a derivative of complex functions [here](https://www.mathsisfun.com/calculus/derivatives-rules.html)._
180 | 
181 | ```javascript
182 | function backwardPropagation(predictions, xTrain, yTrain) {
183 |   const m = xTrain.length;
184 |   // At the beginning we don't know in which way our parameters 'w' and 'b' need to be changed.
185 |   // Therefore we're setting up the changing steps for each parameters to 0.
186 |   let dW = 0;
187 |   let dB = 0;
188 |   for (let i = 0; i < m; i += 1) {
189 |     dW += (yTrain[i] - predictions[i]) * xTrain[i];
190 |     dB += yTrain[i] - predictions[i];
191 |   }
192 |   // We're interested in average deltas for each params.
193 |   dW /= m;
194 |   dB /= m;
195 |   return [dW, dB];
196 | }
197 | ```
198 | 
199 | ### Training the model
200 | 
201 | Now we know how to evaluate the correctness of our model for all training set examples (_forward propagation_). We also know how to do small adjustments to parameters `w` and `b` of our NanoNeuron model (_backward propagation_). But the issue is that if we run forward propagation and then backward propagation only once, it won't be enough for our model to learn any laws/trends from the training data. You may compare it with attending a one day of elementary school for the kid. He/she should go to the school not once but day after day and year after year to learn something.
202 | 
203 | So we need to repeat forward and backward propagation for our model many times. That is exactly what the `trainModel()` function does. It is like a "teacher" for our NanoNeuron model:
204 | 
205 | - it will spend some time (`epochs`) with our slightly stupid NanoNeuron model and try to train/teach it,
206 | - it will use specific "books" (`xTrain` and `yTrain` data-sets) for training,
207 | - it will push our kid to learn harder (faster) by using a learning rate parameter `alpha`
208 | 
209 | A few words about the learning rate `alpha`. This is just a multiplier for `dW` and `dB` values we have calculated during the backward propagation. So, derivative pointed us toward the direction we need to take to find a minimum of the cost function (`dW` and `dB` sign) and it also showed us how fast we need to go in that direction (absolute values of `dW` and `dB`). Now we need to multiply those step sizes to `alpha` just to adjust our movement to the minimum faster or slower. Sometimes if we use big values for `alpha`, we might simply jump over the minimum and never find it.
210 | 
211 | The analogy with the teacher would be that the harder s/he pushes our "nano-kid" the faster our "nano-kid" will learn but if the teacher pushes too hard, the "kid" will have a nervous breakdown and won't be able to learn anything 🤯.
212 | 
213 | Here is how we're going to update our model's `w` and `b` params:
214 | 
215 | ![w](https://github.com/trekhleb/nano-neuron/blob/master/assets/05_w.png?raw=true)
216 | 
217 | ![b](https://github.com/trekhleb/nano-neuron/blob/master/assets/05_b.png?raw=true)
218 | 
219 | And here is our trainer function:
220 | 
221 | ```javascript
222 | function trainModel({model, epochs, alpha, xTrain, yTrain}) {
223 |   // The is the history array of how NanoNeuron learns.
224 |   const costHistory = [];
225 | 
226 |   // Let's start counting epochs.
227 |   for (let epoch = 0; epoch < epochs; epoch += 1) {
228 |     // Forward propagation.
229 |     const [predictions, cost] = forwardPropagation(model, xTrain, yTrain);
230 |     costHistory.push(cost);
231 |   
232 |     // Backward propagation.
233 |     const [dW, dB] = backwardPropagation(predictions, xTrain, yTrain);
234 |   
235 |     // Adjust our NanoNeuron parameters to increase accuracy of our model predictions.
236 |     nanoNeuron.w += alpha * dW;
237 |     nanoNeuron.b += alpha * dB;
238 |   }
239 | 
240 |   return costHistory;
241 | }
242 | ```
243 | 
244 | ### Putting all the pieces together
245 | 
246 | Now let's use the functions we have created above.
247 | 
248 | Let's create our NanoNeuron model instance. At this moment the NanoNeuron doesn't know what values should be set for parameters `w` and `b`. So let's set up `w` and `b` randomly.
249 | 
250 | ```javascript
251 | const w = Math.random(); // i.e. -> 0.9492
252 | const b = Math.random(); // i.e. -> 0.4570
253 | const nanoNeuron = new NanoNeuron(w, b);
254 | ```
255 | 
256 | Generate training and test data-sets.
257 | 
258 | ```javascript
259 | const [xTrain, yTrain, xTest, yTest] = generateDataSets();
260 | ```
261 | 
262 | Let's train the model with small incremental (`0.0005`) steps for `70000` epochs. You can play with these parameters, they are being defined empirically.
263 | 
264 | ```javascript
265 | const epochs = 70000;
266 | const alpha = 0.0005;
267 | const trainingCostHistory = trainModel({model: nanoNeuron, epochs, alpha, xTrain, yTrain});
268 | ```
269 | 
270 | Let's check how the cost function was changing during the training. We're expecting that the cost after the training should be much lower than before. This would mean that NanoNeuron got smarter. The opposite is also possible. 
271 | 
272 | ```javascript
273 | console.log('Cost before the training:', trainingCostHistory[0]); // i.e. -> 4694.3335043
274 | console.log('Cost after the training:', trainingCostHistory[epochs - 1]); // i.e. -> 0.0000024
275 | ```
276 | 
277 | This is how the training cost changes over the epochs. On the `x` axes is the epoch number x1000.
278 | 
279 | ![Training process](https://github.com/trekhleb/nano-neuron/blob/master/assets/06-training-process.png?raw=true)
280 | 
281 | Let's take a look at NanoNeuron parameters to see what it has learned. We expect that NanoNeuron parameters `w` and `b` to be similar to ones we have in the `celsiusToFahrenheit()` function (`w = 1.8` and `b = 32`) since our NanoNeuron tried to imitate it.
282 | 
283 | ```javascript
284 | console.log('NanoNeuron parameters:', {w: nanoNeuron.w, b: nanoNeuron.b}); // i.e. -> {w: 1.8, b: 31.99}
285 | ```
286 | 
287 | Evaluate the model accuracy for the test data-set to see how well our NanoNeuron deals with new unknown data predictions. The cost of predictions on test sets is expected to be close to the training cost. This would mean that our NanoNeuron performs well on known and unknown data.
288 | 
289 | ```javascript
290 | [testPredictions, testCost] = forwardPropagation(nanoNeuron, xTest, yTest);
291 | console.log('Cost on new testing data:', testCost); // i.e. -> 0.0000023
292 | ```
293 | 
294 | Now, since we see that our NanoNeuron "kid" has performed well in the "school" during the training and that he can convert Celsius to Fahrenheit temperatures correctly, even for the data it hasn't seen, we can call it "smart" and ask him some questions. This was the ultimate goal of the entire training process.
295 | 
296 | ```javascript
297 | const tempInCelsius = 70;
298 | const customPrediction = nanoNeuron.predict(tempInCelsius);
299 | console.log(`NanoNeuron "thinks" that ${tempInCelsius}°C in Fahrenheit is:`, customPrediction); // -> 158.0002
300 | console.log('Correct answer is:', celsiusToFahrenheit(tempInCelsius)); // -> 158
301 | ```
302 | 
303 | So close! As all of us humans, our NanoNeuron is good but not ideal :)
304 | 
305 | Happy learning to you!
306 | 
307 | ## How to launch NanoNeuron
308 | 
309 | You may clone the repository and run it locally:
310 | 
311 | ```bash
312 | git clone https://github.com/trekhleb/nano-neuron.git
313 | cd nano-neuron
314 | ```
315 | 
316 | ```bash
317 | node ./NanoNeuron.js
318 | ```
319 | 
320 | ## Skipped machine learning concepts
321 | 
322 | The following machine learning concepts were skipped and simplified for simplicity of explanation.
323 | 
324 | **Training/testing data-set splitting**
325 | 
326 | Normally you have one big set of data. Depending on the number of examples in that set, you may want to split it in proportion of 70/30 for train/test sets. The data in the set should be randomly shuffled before the split. If the number of examples is big (i.e. millions) then the split might happen in proportions that are closer to 90/10 or 95/5 for train/test data-sets.
327 | 
328 | **The network brings the power**
329 | 
330 | Normally you won't notice the usage of just one standalone neuron. The power is in the [network](https://en.wikipedia.org/wiki/Neural_network) of such neurons. The network might learn much more complex features. NanoNeuron alone looks more like a simple [linear regression](https://en.wikipedia.org/wiki/Linear_regression) than a neural network.
331 | 
332 | **Input normalization**
333 | 
334 | Before the training, it would be better to [normalize input values](https://www.jeremyjordan.me/batch-normalization/).
335 | 
336 | **Vectorized implementation**
337 | 
338 | For networks, the vectorized (matrix) calculations work much faster than `for` loops. Normally forward/backward propagation works much faster if it is implemented in vectorized form and calculated using, for example, [Numpy](https://numpy.org/) Python library.
339 | 
340 | **Minimum of the cost function**
341 | 
342 | The cost function that we were using in this example is over-simplified. It should have [logarithmic components](https://stackoverflow.com/questions/32986123/why-the-cost-function-of-logistic-regression-has-a-logarithmic-expression/32998675). Changing the cost function will also change its derivatives so the back propagation step would also use different formulas.
343 | 
344 | **Activation function**
345 | 
346 | Normally the output of a neuron should be passed through an activation function like [Sigmoid](https://en.wikipedia.org/wiki/Sigmoid_function) or [ReLU](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) or others.
347 | 
348 | ## Author
349 | 
350 | - [@trekhleb](https://trekhleb.dev)
351 | 


--------------------------------------------------------------------------------
/README.pt-BR.md:
--------------------------------------------------------------------------------
  1 | # NanoNeuron
  2 | 
  3 | > 7 funções simples do JavaScript que farão você ter uma ideia de como as máquinas podem "aprender"  literalmente.
  4 | 
  5 | _Em outros idiomas: [Русский](README.ru-RU.md), [English](README.md)_
  6 | 
  7 | > _Você também pode se interessar por 🤖 [Experimentos interativos de Machine Learning (em inglês)](https://github.com/trekhleb/machine-learning-experiments)_
  8 | 
  9 | ## Resumo
 10 | 
 11 | [NanoNeuron](https://github.com/trekhleb/nano-neuron) é uma versão _bem simples_ do conceito de Neurônio em uma Rede Neural. NanoNeuron é treinado para converter valores de graus Celsius em Fahrenheit.
 12 | 
 13 | O código de exemplo [NanoNeuron.js](https://github.com/trekhleb/nano-neuron/blob/master/NanoNeuron.js) contém 7 simples funções JavaScript (sobre predição de modelo, cálculo de custo, propagação e retropropagação, e treinamento) que irá te dar a visão de como as máquinas podem literalmente "aprender". Sem bibliotecas de terceiros, sem conjuntos de dados externos ou dependências, apenas simples e puramente funções JavaScript.
 14 | 
 15 | ☝🏻Essas funções **NÃO** são, de nenhuma forma, um guia completo para aprendizado de máquina (_"machine learning" em inglês_). Um monte de conceitos de machine learning foram desconsiderados e muito simplificados! Essa simplificação foi feita com o propósito de dar ao leitor apenas um entendimento **básico** da visão de como as máquinas podem aprender e por fim para tornar possível para o leitor reconhecer que isso não é um "aprendizado MÁGICO de máquina" mas sim um "aprendizado MATEMÁTICO de máquina" 🤓.
 16 | 
 17 | ## O que o nosso NanoNeuron irá aprender
 18 | 
 19 | Provavelmente você já ouviu falar sobre Neurônios no contexto de [Redes Neurais](https://pt.wikipedia.org/wiki/Rede_neural_artificial). NanoNeuron é isso mas de forma simples e vamos implementar desde o início. Para efeitos de simplicidade nós não iremos construir uma rede de NanoNeuron. Teremos tudo funcionando no mesmo lugar, fazendo algumas predições mágicas para nós. Só pra você saber, vamos ensinar esse NanoNeuron a converter (predizer) a temperatura em graus Celsius para Fahrenheit.
 20 | 
 21 | A propósito, a fórmula para converter graus Celsius em Fahrenheit é essa:
 22 | 
 23 | ![Celsius para Fahrenheit](https://github.com/trekhleb/nano-neuron/blob/master/assets/01_celsius_to_fahrenheit.png?raw=true)
 24 | 
 25 | Mas por enquanto nosso NanoNeuron não sabe disso...
 26 | 
 27 | ### O modelo NanoNeuron
 28 | 
 29 | Vamos implementar nossa função de modelo do NanoNeuron. Ela implementa uma dependência linear básica entre `x` e `y` que se parece com `y = w * x + b`. Basicamente, nosso NanoNeuron é uma "criança" na "escola" aprendendo a desenhar uma linha reta nas coordenadas `XY`.
 30 | 
 31 | Variáveis `w`, `b` são parâmetros do modelo. NanoNeuron só conhece esses dois parâmetros da função linear. Eles são algo que NanoNeuron irá "aprender" durante o processo de treinamento.
 32 | 
 33 | A única coisa que o NanoNeuron pode fazer é imitar a dependência linear. No método `predict()` é aceito um dado de entrada `x` e prediz a saída `y`. Nenhuma mágica aqui.
 34 | 
 35 | ```javascript
 36 | function NanoNeuron(w, b) {
 37 |   this.w = w;
 38 |   this.b = b;
 39 |   this.predict = (x) => {
 40 |     return x * this.w + this.b;
 41 |   }
 42 | }
 43 | ```
 44 | 
 45 | _(...espera... [regressão linear](https://pt.wikipedia.org/wiki/Regress%C3%A3o_linear) é você?)_  🧐
 46 | 
 47 | ### Conversão de graus Celsius para Fahrenheit
 48 | 
 49 | A temperatura em graus Celsius pode ser convertida para Fahrenheit usando a seguinte fórmula: `f = 1.8 * c + 32`, onde `c` é a temperatura em graus Celsius e `f` a temperatura calculada em Fahrenheit.
 50 | 
 51 | ```javascript
 52 | function celsiusToFahrenheit(c) {
 53 |   const w = 1.8;
 54 |   const b = 32;
 55 |   const f = c * w + b;
 56 |   return f;
 57 | };
 58 | ```
 59 | 
 60 | Queremos que o nosso NanoNeuron imite essa função (para aprender que `w = 1.8` e `b = 32`) sem conhecer esses parâmetros antecipadamente.
 61 | 
 62 | Assim é como a função de conversão de graus Celsius para Fahrenheit irá parecer:
 63 | 
 64 | ![conversão de graus Celsius para Fahrenheit](https://github.com/trekhleb/nano-neuron/blob/master/assets/07-converter.png?raw=true)
 65 | 
 66 | ### Gerando os conjuntos de dados
 67 | 
 68 | Antes do treinamento nós precisamos **treinar** e **testar os dados** baseando-se na função `celsiusToFahrenheit()`. Os conjuntos de dados consistem em pares de valores de entrada e valores de saída corretamente calculados.
 69 | 
 70 | > Na vida real, na maioria dos casos, esses dados são coletados ao invés de gerados. Por exemplo, podemos ter um conjunto de imagens de números desenhados à mão e o conjunto com os números que explicam qual é o número escrito em cada imagem.
 71 | 
 72 | Usaremos os dados de exemplo de TREINAMENTO para treinar nosso NanoNeuron. Antes dele crescer e ser capaz de fazer decisões sozinho, precisamos ensiná-lo o que é certo e o que é errado usando os exemplos de treinamento.
 73 | 
 74 | Usaremos os exemplos de TESTE para avaliar o quanto nosso NanoNeuron performa bem nos dados que ele nunca viu durante o treinamento. Esse é o ponto onde podemos ver que a nossa "criança" cresceu e pode tomar decisões sozinha.
 75 | 
 76 | ```javascript
 77 | function generateDataSets() {
 78 |   // xTrain -> [0, 1, 2, ...],
 79 |   // yTrain -> [32, 33.8, 35.6, ...]
 80 |   const xTrain = [];
 81 |   const yTrain = [];
 82 |   for (let x = 0; x < 100; x += 1) {
 83 |     const y = celsiusToFahrenheit(x);
 84 |     xTrain.push(x);
 85 |     yTrain.push(y);
 86 |   }
 87 | 
 88 |   // xTest -> [0.5, 1.5, 2.5, ...]
 89 |   // yTest -> [32.9, 34.7, 36.5, ...]
 90 |   const xTest = [];
 91 |   const yTest = [];
 92 |   // Ao começar com 0,5 e usar o mesmo incremento de 1 como usamos para o conjunto
 93 |   // de treinamento, temos certeza que teremos dados diferentes para comparar.
 94 |   for (let x = 0.5; x < 100; x += 1) {
 95 |     const y = celsiusToFahrenheit(x);
 96 |     xTest.push(x);
 97 |     yTest.push(y);
 98 |   }
 99 | 
100 |   return [xTrain, yTrain, xTest, yTest];
101 | }
102 | ```
103 | 
104 | ### O custo (do erro) da predição
105 | 
106 | Precisamos ter alguma métrica que nos mostre o quão perto nosso modelo de predição está dos valores corretos. O cálculo do custo (o engano) entre o valor correto calculado de `y` e a `prediction`, que o nosso NanoNeuron criou, será feito usando a seguinte fórmula:
107 | 
108 | ![Custo de predição](https://github.com/trekhleb/nano-neuron/blob/master/assets/02_cost_function.png?raw=true)
109 | 
110 | Esse é uma simples diferença entre dois valores. O quanto mais perto os valores estão um do outro, menor a diferença. Estamos usando uma potência de `2` aqui apenas para se livrar dos números negativos de forma que `(1 - 2) ^ 2` será o mesmo que `(2 - 1) ^ 2`. Divisão por `2` acontece apenas para simplificar depois a fórmula de retropropagação (veja abaixo).
111 | 
112 | A função de custo nesse caso, será tão simples quanto:
113 | 
114 | ```javascript
115 | function predictionCost(y, prediction) {
116 |   return (y - prediction) ** 2 / 2; // ex.: -> 235.6
117 | }
118 | ```
119 | 
120 | ### Propagação (para frente)
121 | 
122 | Propagação _("forward propagation" em inglês)_ significa fazer uma predição de todos os exemplos de treinamento para os conjuntos de dados `xTrain` e `yTrain` e para calcular o custo médio dessas predições no meio do caminho.
123 | 
124 | Vamos apenas deixar nosso NanoNeuron dizer sua opinião nesse momento, permitindo-o adivinhar como converter a temperatura. Ele deve estar estupidamente errado nessa fase. O custo médio nos mostrará o quão errado nosso modelo está agora. Esse valor de custo é realmente importante visto que alterando os parâmetros NanoNeuron `w` e `b` e fazendo a propagação novamente, estaremos aptos a avaliar depois se nosso NanoNeuron se tornou esperto ou não conforme os parâmetros mudam.
125 | 
126 | O custo médio será calculado usando a seguinte fórmula:
127 | 
128 | ![Custo médio](https://github.com/trekhleb/nano-neuron/blob/master/assets/03_average_cost_function.png?raw=true)
129 | 
130 | Onde `m` é o número de exemplos de treinamento (no nosso caso: `100`).
131 | 
132 | Esta é a forma como devemos implementar no código:
133 | 
134 | ```javascript
135 | function forwardPropagation(model, xTrain, yTrain) {
136 |   const m = xTrain.length;
137 |   const predictions = [];
138 |   let cost = 0;
139 |   for (let i = 0; i < m; i += 1) {
140 |     const prediction = nanoNeuron.predict(xTrain[i]);
141 |     cost += predictionCost(yTrain[i], prediction);
142 |     predictions.push(prediction);
143 |   }
144 |   // Estamos interessados no custo médio
145 |   cost /= m;
146 |   return [predictions, cost];
147 | }
148 | ```
149 | 
150 | ### retropropagação (para trás)
151 | 
152 | Quando conhecemos o quão certo ou errado nossas predições do NanoNeuron estão (baseado no custo médio a este ponto) o que devemos fazer para tornar essas predições mais precisas?
153 | 
154 | A retropropagação nos dá a resposta para essa questão. retropropagação _(Backward propagation em inglês)_ é o processo de avaliar o custo da predição e ajustar os parâmetros do NanoNeuron `w` e `b` para que as próximas e futuras predições sejam mais precisas.
155 | 
156 | Isso é onde o aprendizado de máquina se parece com mágica 🧞‍♂️. O conceito chave aqui é a **derivada** que nos mostra qual passo dar para chegar perto do custo mínimo da função.
157 | 
158 | Lembre-se, encontrar o custo mínimo da função é o objetivo final do processo de treinamento. Se encontrarmos ambos valores de `w` e `b` de forma que o custo médio da nossa função seja pequeno, isso significa que o modelo NanoNeuron fez predições ótimas e precisas.
159 | 
160 | Derivada é um grande e separado tópico que não iremos cobrir neste artigo. [Wikipedia](https://pt.wikipedia.org/wiki/Derivada) pode te ajudar a entender melhor sobre isso.
161 | 
162 | Uma coisa sobre as derivadas que irá te ajudar a entender como a retropropagação funciona é que a derivada é ela representa a inclinação da reta tangente ao gráfico desta função em um determinado ponto.
163 | 
164 | ![Inclinação da derivada](https://www.mathsisfun.com/calculus/images/slope-x2-2.svg)
165 | 
166 | _Origem da imagem: [MathIsFun](https://www.mathsisfun.com/calculus/derivatives-introduction.html)_
167 | 
168 | Por exemplo, no gráfico acima, você pode ver que se estivermos no ponto `(x=2, y=4)` então a inclinação nos diz para ir para a `esquerda` e para `baixo` para obter a função mínima. Note também que quanto maior a inclinação, mais rápido nos movemos para o mínimo.
169 | 
170 | As derivadas da nossa função `averageCost` _(custo médio em inglês)_ para os parâmetros `w` e `b` se parecem com:
171 | 
172 | ![dW](https://github.com/trekhleb/nano-neuron/blob/master/assets/04_dw.png?raw=true)
173 | 
174 | ![dB](https://github.com/trekhleb/nano-neuron/blob/master/assets/04_db.png?raw=true)
175 | 
176 | Onde `m` é o número de exemplos de treinamento (no nosso caso: `100`).
177 | 
178 | _Você pode aprender mais sobre as regras das derivadas e como obter uma derivada de funções complexas [aqui](https://brasilescola.uol.com.br/matematica/introducao-ao-estudo-das-derivadas.htm) ou na [indicação do autor original (em inglês)](https://www.mathsisfun.com/calculus/derivatives-rules.html)._
179 | 
180 | ```javascript
181 | function backwardPropagation(predictions, xTrain, yTrain) {
182 |   const m = xTrain.length;
183 |   // No começo não conhecemos de que forma nossos parâmetros 'w' e 'b' precisam ser alterados.
184 |   // Portanto vamos configurar cada parâmetro para 0.
185 |   let dW = 0;
186 |   let dB = 0;
187 |   for (let i = 0; i < m; i += 1) {
188 |     dW += (yTrain[i] - predictions[i]) * xTrain[i];
189 |     dB += yTrain[i] - predictions[i];
190 |   }
191 |   // Estamos interessados em deltas médios de cada parâmetro.
192 |   dW /= m;
193 |   dB /= m;
194 |   return [dW, dB];
195 | }
196 | ```
197 | 
198 | ### Treinando o modelo
199 | 
200 | Agora que sabemos como avaliar a exatidão do nosso modelo para todo o conjunto de exemplos (_propagação_), nós precisamos também saber como fazer pequenos ajustes nos parâmetros `w` e `b` do nosso modelo (_retropropagação_). Mas o problema é que se rodarmos apenas uma vez a propagação e a retropropagação, não será o suficiente para o nosso modelo aprender qualquer lei/tendência dos dados de treinamento. Você deve comparar isso com um dia da escola primária para a criança. Ela deve ir para a escola não apenas uma vez, mas dia após dia e ano após ano para aprender algo.
201 | 
202 | Então precisamos repetir as propagações do nosso modelo várias vezes. Isto é exatamente o que a função `trainModel()` faz. É como um "professor" para nosso modelo do NanoNeuron:
203 | 
204 | - ela irá passar um tempo (`epochs`) com o nosso ligeiro modelo do NanoNeuron e tentará treiná-lo/ensiná-lo,
205 | - usará "livros" específicos (os conjuntos de dados `xTrain` e `yTrain`) para treinar,
206 | - irá forçar nossa criança a aprender pesado (rápido) usando um parâmetro de ajuste `alpha`.
207 | 
208 | Uma nota sobre a taxa de aprendizado `alpha`. Ela é simplesmente um multiplicador dos valores de `dW` e `dB` que calculamos durante a retropropagação. Assim, as derivadas nos apontam para a direção que precisamos para obter a função de custo mínimo (indicadores `dW` e `dB`) e isso nos mostra também o quão rápido precisamos ir naquela direção  (valores absolutos de `dW` e `dB`). Então precisamos multiplicar o tamanho dos passos de `alpha` para ajustar nosso movimento ao mínimo, mais rápido ou mais devagar. Algumas vezes se usarmos um valor alto para `alpha`, vamos simplesmente passar do mínimo e nunca vamos encontrá-lo.
209 | 
210 | A analogia com o professor pode ser que quanto mais ele força nossa "criança nano" a ser mais rápida, ela irá aprender, mas se forçarmos demais, a "criança" terá um ataque de nervos e não será capaz de aprender nada 🤯.
211 | 
212 | Aqui é como vamos fazer para atualizar nossos parâmetros `w` e `b` do modelo:
213 | 
214 | ![w](https://github.com/trekhleb/nano-neuron/blob/master/assets/05_w.png?raw=true)
215 | 
216 | ![b](https://github.com/trekhleb/nano-neuron/blob/master/assets/05_b.png?raw=true)
217 | 
218 | E aqui está nossa função de treinamento:
219 | 
220 | ```javascript
221 | function trainModel({model, epochs, alpha, xTrain, yTrain}) {
222 |   // Esse é o histórico de aprendizado do NanoNeuron.
223 |   const costHistory = [];
224 | 
225 |   // Vamos começar enumerando as épocas
226 |   for (let epoch = 0; epoch < epochs; epoch += 1) {
227 |     // Propagação
228 |     const [predictions, cost] = forwardPropagation(model, xTrain, yTrain);
229 |     costHistory.push(cost);
230 |   
231 |     // retropropagação
232 |     const [dW, dB] = backwardPropagation(predictions, xTrain, yTrain);
233 |   
234 |     // Ajustar os parâmetros do nosso NanoNeuron para aumentar a acurácia do nosso modelo de predições.
235 |     nanoNeuron.w += alpha * dW;
236 |     nanoNeuron.b += alpha * dB;
237 |   }
238 | 
239 |   return costHistory;
240 | }
241 | ```
242 | 
243 | ### Juntando as peças
244 | 
245 | Agora vamos usar as funções que criamos acima.
246 | 
247 | Vamos criar nossa instância do modelo do NanoNeuron. Nesse momento o NanoNeuron não sabe que valores deve usar nos parâmetros `w` e `b`. Então vamos colocar um valor qualquer em `w` e `b`.
248 | 
249 | ```javascript
250 | const w = Math.random(); // ex: -> 0.9492
251 | const b = Math.random(); // ex: -> 0.4570
252 | const nanoNeuron = new NanoNeuron(w, b);
253 | ```
254 | 
255 | Gerar os conjuntos de dados do treinamento e o de testes.
256 | 
257 | ```javascript
258 | const [xTrain, yTrain, xTest, yTest] = generateDataSets();
259 | ```
260 | 
261 | Vamos treinar nosso modelo com um pequeno incremento (`0,0005`) por passo para `70.000` épocas. Você pode brincar com esses parâmetros, eles foram definidos empiricamente.
262 | 
263 | ```javascript
264 | const epochs = 70000;
265 | const alpha = 0.0005;
266 | const trainingCostHistory = trainModel({model: nanoNeuron, epochs, alpha, xTrain, yTrain});
267 | ```
268 | 
269 | Vamos checar o quanto a função de custo mudou durante o treinamento. Esperamos que o custo após o treinamento seja menor que antes. Isso significa que o NanoNeuron se tornou esperto. O oposto também é possível.
270 | 
271 | ```javascript
272 | console.log('Custo antes do treinamento:', trainingCostHistory[0]); // ex: -> 4694.3335043
273 | console.log('Custo depois do treinamento:', trainingCostHistory[epochs - 1]); // ex: -> 0.0000024
274 | ```
275 | 
276 | Isso é como o custo do treinamento muda através das épocas. No eixo `x` é a época multiplicada por 1000.
277 | 
278 | ![Processo de treinamento](https://github.com/trekhleb/nano-neuron/blob/master/assets/06-training-process.png?raw=true)
279 | 
280 | Vamos dar uma olhada nos parâmetros do NanoNeuron para ver o que ele aprendeu. Esperamos que os parâmetros `w` e `b` do NanoNeuron sejam similares com os que temos na função `celsiusToFahrenheit()` (`w = 1.8` e `b = 32`) visto que treinamos o NanoNeuron para imitar isso.
281 | 
282 | ```javascript
283 | console.log('Parâmetros NanoNeuron:', {w: nanoNeuron.w, b: nanoNeuron.b}); // ex: -> {w: 1.8, b: 31.99}
284 | ```
285 | 
286 | Avalie a acurácia do modelo usando os dados de teste para ver o quanto o NanoNeuron se dá bem com predições de dados desconhecidos. É esperado que os custos das predições no conjunto de testes seja próximo do custo de treinamento. Isso pode significar que nosso NanoNeuron performa bem em dados que ele conhece e os que ele não conhece.
287 | 
288 | ```javascript
289 | [testPredictions, testCost] = forwardPropagation(nanoNeuron, xTest, yTest);
290 | console.log('Custo com novos dados de teste:', testCost); // ex: -> 0.0000023
291 | ```
292 | 
293 | Agora, visto que nossa "criança" NanoNeuron performou bem na "escola" durante o treinamento e ele pode converter graus Celsius em Fahrenheit corretamente, mesmo para dados que nunca viu, podemos chamá-lo de "esperto" e perguntá-lo algumas coisas. Esse era o objetivo final de todo nosso processo de treinamento.
294 | 
295 | ```javascript
296 | const tempInCelsius = 70;
297 | const customPrediction = nanoNeuron.predict(tempInCelsius);
298 | console.log(`NanoNeuron "acha" que ${tempInCelsius}°C em Fahrenheit é:`, customPrediction); // -> 158.0002
299 | console.log('Resposta correta é:', celsiusToFahrenheit(tempInCelsius)); // -> 158
300 | ```
301 | 
302 | Muito próximo! Para nós humanos, nosso NanoNeuron é bom, mas não ideal :)
303 | 
304 | Bom aprendizado para você!
305 | 
306 | ## Como executar o NanoNeuron
307 | 
308 | Você pode clonar esse repositório e executá-lo localmente:
309 | 
310 | ```bash
311 | git clone https://github.com/trekhleb/nano-neuron.git
312 | cd nano-neuron
313 | ```
314 | 
315 | ```bash
316 | node ./NanoNeuron.js
317 | ```
318 | 
319 | ## Conceitos desconsiderados do aprendizado de máquina
320 | 
321 | Os seguintes conceitos de _machine learning_ foram pulados e simplificados para uma explicação mais simples.
322 | 
323 | ### Divisão do conjunto de dados de treinamento/teste
324 | 
325 | Normalmente você tem um grande conjunto de dados. Dependendo do número de exemplos no conjunto, você pode querer dividi-lo em 70/30 para treino/teste. Os dados no conjunto devem ser embaralhados aleatoriamente antes da divisão. Se o número de exemplos é grande (ex: milhões) então a divisão acontece em proporções próximas a 90/10 ou 95/5 para treino/teste.
326 | 
327 | ### A rede traz o poder
328 | 
329 | Normalmente você não observa o uso de apenas um neurônio independente. O poder está na [rede neural](https://pt.wikipedia.org/wiki/Rede_neural_artificial) desses neurônios. A rede pode aprender coisas muito mais complexas. NanoNeuron sozinho se parece mais com uma simples [regressão linear](https://pt.wikipedia.org/wiki/Regress%C3%A3o_linear) do que uma rede neural.
330 | 
331 | ### Normalização dos dados de entrada
332 | 
333 | Antes do treinamento, seria melhor [normalizar os dados de entrada (em inglês)](https://www.jeremyjordan.me/batch-normalization/).
334 | 
335 | ### Implementação vetorizada
336 | 
337 | Para redes neurais, cálculos vetorizados (matriz) trabalham muito mais rápido do que laços `for`. Normalmente as propagações (frente e trás) trabalham muito rápido se implementadas de forma vetorizada e calculadas usando, por exemplo uma biblioteca Python [Numpy](https://numpy.org/).
338 | 
339 | ### Função de custo mínimo
340 | 
341 | A função de custo que estamos usando nesse exemplo é muito simplificada. Deveria ter [componentes logarítmicos (em inglês)](https://stackoverflow.com/questions/32986123/why-the-cost-function-of-logistic-regression-has-a-logarithmic-expression/32998675). Alterando a função de custo também irá alterar suas derivadas então o passo de retropropagação também deveria usar fórmulas diferentes.
342 | 
343 | ### Função de ativação
344 | 
345 | Normalmente a saída do neurônio deveria passar por uma função de ativação como a [Sigmoid](https://pt.wikipedia.org/wiki/Fun%C3%A7%C3%A3o_sigmoide) ou a [ReLU](https://pt.qwe.wiki/wiki/Rectifier_(neural_networks)) ou outras.
346 | 


--------------------------------------------------------------------------------
/README.ru-RU.md:
--------------------------------------------------------------------------------
  1 | # Нано-нейрон
  2 | 
  3 | > 7 простых JavaScript функций, показывающих, как машина может «учиться»
  4 | 
  5 | _На других языках: [English](README.md), [Português](README.pt-BR.md)_
  6 | 
  7 | ## TL;DR
  8 | 
  9 | [**Нано-нейрон**](https://github.com/trekhleb/nano-neuron) — это _упрощенная_ версия нейрона из концепции нейронной сети. Нано-нейрон выполняет простейшую задачу и натренирован на конвертацию температуры из градусов Цельсия в градусы Фаренгейта.
 10 | 
 11 | Код [**NanoNeuron.js**](https://github.com/trekhleb/nano-neuron/blob/master/NanoNeuron.js) состоит из 7 простых JavaScript функций, затрагивающих обучение, тренировку, предсказание, прямое и обратное распространение сигнала модели. Целью написания этих функций было дать читателю минимальное, базовое объяснение (интуицию) того, как же все-таки машина может «обучаться». В коде не используются сторонние библиотеки. Как-говорится, только простые «vanilla» JavaScript функции.
 12 | 
 13 | Эти функци **ни в коей мере** не являются исчерпывающим руководством по машинному обучению. Множество концепций машинного обучения в них пропущено или же упрощено! Это упрощение допущено с единственной целью — дать читателю самое **базовое** понимание и интуицию о том, как машина в принципе может «учиться», чтобы в итоге «МАГИЯ машинного обучения» звучала для читателя все более как «МАТЕМАТИКА машинного обучения».
 14 | 
 15 | ![NanoNeuron](https://raw.githubusercontent.com/trekhleb/nano-neuron/master/assets/00-nano-neuron.png)
 16 | 
 17 | ## Что «выучит» наш нано-нейрон
 18 | 
 19 | Вы, возможно, слышали о нейронах в контексте [нейронных сетей](https://ru.wikipedia.org/wiki/%D0%98%D1%81%D0%BA%D1%83%D1%81%D1%81%D1%82%D0%B2%D0%B5%D0%BD%D0%BD%D0%B0%D1%8F_%D0%BD%D0%B5%D0%B9%D1%80%D0%BE%D0%BD%D0%BD%D0%B0%D1%8F_%D1%81%D0%B5%D1%82%D1%8C). Нано-нейрон является упрощенной версией того самого нейрона. В этом примере мы напишем его реализацию с нуля. Для простоты примера мы не будем строить сеть из нано-нейронов. Мы остановимся на создании одного единственного нано-нейрона и попробуем научить его конвертировать температуру из градусов Цельсия в градусы Фаренгейта. Другими словами мы научим его **предсказывать** температуру в градусах Фаренгейта на основании температуры в градусах Цельсия.
 20 | 
 21 | Кстати, формула для конвертации градусов Цельсия в градусы Фаренгейта выглядит следующим образом:
 22 | 
 23 | ![Celsius to Fahrenheit](https://github.com/trekhleb/nano-neuron/blob/master/assets/01_celsius_to_fahrenheit.png?raw=true)
 24 | 
 25 | Но на данный момент наш нано-нейрон ничего об этой формуле не знает...
 26 | 
 27 | ### Модель нано-нейрона
 28 | 
 29 | Начнем с создания функции, описывающей модель нашего нано-нейрона. Эта модель представляет собой простую линейную зависимость между `x` и `y`, которая выглядит следующим образом: `y = w * x + b`. Проще говоря, наш нано-нейрон — это ребенок, который умеет рисовать прямую линию в системе координат `XY`.
 30 | 
 31 | Переменные `w` и `b` являются **параметрами** модели. Нано-нейрон знает только эти два параметра линейной функции. Эти параметры как-раз и есть тем, что наш нано-нейрон будет учить во время процесса тренировки (обучения).
 32 | 
 33 | Единственная вещь, которую нано-нейрон на данном этапе умеет делать — это имитировать линейные зависимости. Делает он это в методе `predict()`, который принимает переменную `x` на входе и предсказывает переменную `y` на выходе. Никакой магии.
 34 | 
 35 | ```javascript
 36 | function NanoNeuron(w, b) {
 37 |   this.w = w;
 38 |   this.b = b;
 39 |   this.predict = (x) => {
 40 |     return x * this.w + this.b;
 41 |   }
 42 | }
 43 | ```
 44 | 
 45 | _(...постой... [линейная регрессия](https://en.wikipedia.org/wiki/Linear_regression) это ты, что ли?)_
 46 | 
 47 | ### Конвертация градусов Цельсия в градусы Фаренгейта
 48 | 
 49 | Температура в градусах Цельсия может быть преобразована в градусы Фаренгейта по формуле: `f = 1.8 * c + 32`, где `c` — температура в градусах Цельсия и `f` — температура в градусах Фаренгейта.
 50 | 
 51 | ```javascript
 52 | function celsiusToFahrenheit(c) {
 53 |   const w = 1.8;
 54 |   const b = 32;
 55 |   const f = c * w + b;
 56 |   return f;
 57 | };
 58 | ```
 59 | 
 60 | В итоге мы хотим, чтобы наш нано-нейрон смог имитировать именно эту функцию. Он должен будет сам догадаться (научиться), что параметр `w = 1.8` и `b = 32` не зная об этом заранее.
 61 | 
 62 | Вот так функция конвертации выглядит на графике. Именно ее должен научиться «рисовать» наш нано-нейронный «малыш»:
 63 | 
 64 | ![Celsius to Fahrenheit conversion](https://github.com/trekhleb/nano-neuron/blob/master/assets/07-converter.png?raw=true)
 65 | 
 66 | ### Генерирование данных
 67 | 
 68 | В классическом программировании нам известны данные на входе (`x`) и алгоритм преобразования этих данных (параметры `w` и `b`), но неизвестны выходные данные (`y`). Выходные данные вычисляются на основании входных с помощью известного нам алгоритма. В машинном обучении же напротив, известны лишь входные и выходные данные (`x` и `y`), а вот алгоритм перехода от `x` к `y` неизвестен (параметры `w` и `b`).
 69 | 
 70 | Именно генерацией входных и выходных данных мы сейчас и займемся. Нам необходимо сгенерировать данные для **тренировки** нашей модели и данные для **тестирования** модели. В этом нам поможет функция-помощник `celsiusToFahrenheit()`. Каждый из тренировочных и тестовых наборов данных представляет собой множество пар `x` и `y`. Например, если  `x = 2`, то `y = 35,6` и так далее.
 71 | 
 72 | > В реальном мире, в основном, данные скорее-всего будут *собраны*, а не *cгенерированы*. Например, такими собранными данными может быть набор пар «фото лица» --> «имя человека».
 73 | 
 74 | Мы будем использовать ТРЕНИРОВОЧНЫЙ набор данных для обучения нашего нано-нейрона. Перед тем как тот вырастет и будет способен принимать решения  самостоятельно мы должны научить его что является «правдой», а что «ложью» используя «правильные» данные из тренировочного набора.
 75 | 
 76 | > Кстати, тут явно прослеживается жизненный принцип «мусор на входе — мусор на выходе». Если нано-нейрончику в тренировочный набор подкинуть «ложь», что 5°C конвертируются в 1000°F, то через много итераций обучения он станет этому верить и будет корректно конвертировать все значения температуры **кроме** 5°C. Нам надо быть очень осторожными с теми тренировочными данными, которые мы каждый день загружаем нашей мозговой нейронной сеточке.
 77 | 
 78 | Отвлекся. Продолжим.
 79 | 
 80 | Мы будем использовать ТЕСТОВЫЙ набор данных для оценки того, насколько наш нано-нейрон хорошо обучился и может делать корректные предсказания на новых данных, которых он не видел во время своего обучения.
 81 | 
 82 | ```javascript
 83 | function generateDataSets() {
 84 |   // xTrain -> [0, 1, 2, ...],
 85 |   // yTrain -> [32, 33.8, 35.6, ...]
 86 |   const xTrain = [];
 87 |   const yTrain = [];
 88 |   for (let x = 0; x < 100; x += 1) {
 89 |     const y = celsiusToFahrenheit(x);
 90 |     xTrain.push(x);
 91 |     yTrain.push(y);
 92 |   }
 93 | 
 94 |   // xTest -> [0.5, 1.5, 2.5, ...]
 95 |   // yTest -> [32.9, 34.7, 36.5, ...]
 96 |   const xTest = [];
 97 |   const yTest = [];
 98 |   // Начав с 0.5 и используя шаг 1, который мы использовали для тренировочного набора
 99 |   // мы можем утверждать, что тестовый и тренировочный наборы не пересекаются.
100 |   for (let x = 0.5; x < 100; x += 1) {
101 |     const y = celsiusToFahrenheit(x);
102 |     xTest.push(x);
103 |     yTest.push(y);
104 |   }
105 | 
106 |   return [xTrain, yTrain, xTest, yTest];
107 | }
108 | ```
109 | 
110 | ### Оценка погрешности предсказаний
111 | 
112 | Нам необходима определенная метрика (измерение, число, оценка), которая покажет насколько близко предсказание нано-нейрона к истинному. Другими словами это число/метрика/функция должна показать, насколько нано-нейрон прав или неправ. Это как в школе, ученик может за свою контрольную получить оценку `5` или `2`.
113 | 
114 | В случае с нано-нейроном его ошибка (погрешность) между истинным значением `y` и предсказанным значением `prediction` будем производить по формуле:
115 | 
116 | ![Prediction Cost](https://github.com/trekhleb/nano-neuron/blob/master/assets/02_cost_function.png?raw=true)
117 | 
118 | Как видно из формулы мы будем считать ошибку, как простую разницу между двумя значениями. Чем ближе значения друг к другу, тем меньше эта разница. Мы используем возведения в квадрат здесь для того, чтобы избавиться от знака, чтобы в итоге `(1 - 2) ^ 2` было равнозначно `(2 - 1) ^ 2`. Деление на `2` происходит исключительно для того, чтобы упростить значение производной этой функции в формуле обратного распространения сигнала (об этом ниже).
119 | 
120 | Функция ошибки в данном случае будет выглядеть следующим образом:
121 | 
122 | ```javascript
123 | function predictionCost(y, prediction) {
124 |   return (y - prediction) ** 2 / 2; // i.e. -> 235.6
125 | }
126 | ```
127 | 
128 | ### Прямое распространение сигнала
129 | 
130 | Произвести прямое распространение сигнала через нашу модель означает осуществить предсказания для всех пар из тренировочного набора данных `xTrain` и `yTrain` и вычислить среднюю ошибку (погрешность) этих предсказаний.
131 | 
132 | Мы всего-лишь даем нашему нано-нейрону «высказаться», позволяя ему сделать предсказания (конвертировать температуру). При этом нано-нейрон на данном этапе может очень сильно ошибаться. Среднее значение ошибки предсказания покажет нам насколько наша модель далека/близка к истине на данный момент. Значение ошибки здесь очень важно, поскольку изменив параметры `w` и `b` и произведя прямое распространение сигнала снова, мы сможем оценить стал ли наш нано-нейрон «умнее» с новыми параметрами или нет.
133 | 
134 | Средняя ошибка предсказаний нано-нейрона будет выполняться по следующей формуле:
135 | 
136 | ![Average Cost](https://github.com/trekhleb/nano-neuron/blob/master/assets/03_average_cost_function.png?raw=true)
137 | 
138 | Где `m` — количество тренировочных экземпляров (в нашем случае у нас `100` пар данных).
139 | 
140 | Вот как мы можем реализовать это в коде:
141 | 
142 | ```javascript
143 | function forwardPropagation(model, xTrain, yTrain) {
144 |   const m = xTrain.length;
145 |   const predictions = [];
146 |   let cost = 0;
147 |   for (let i = 0; i < m; i += 1) {
148 |     const prediction = nanoNeuron.predict(xTrain[i]);
149 |     cost += predictionCost(yTrain[i], prediction);
150 |     predictions.push(prediction);
151 |   }
152 |   // Нас интересует среднее значение ошибки.
153 |   cost /= m;
154 |   return [predictions, cost];
155 | }
156 | ```
157 | 
158 | ### Обратное распространение сигнала
159 | 
160 | Теперь, когда мы знаем насколько наш нано-нейрон прав или неправ в своих предсказаниях (основываясь на среднем значении ошибки), как мы можем сделать предсказания более точными?
161 | 
162 | Обратное распространения сигнала поможет нам в этом. Обратное распространение сигнала — это процесс оценки ошибки нано-нейрона и последующей корректировки его параметров `w` и `b` так, чтобы очередные предсказания нано-нейрона для всего набора тренировочных данных стали чуть-чуть точнее.
163 | 
164 | Вот здесь машинное обучение становится похожим на магию. Ключевая концепция здесь — это **производная функции**, которая показывает какого размера шаг и в какую сторону нам надо сделать, чтобы приблизиться к минимуму функции (в нашем случае к минимуму функции ошибки).
165 | 
166 | Конечной целью обучения нано-нейрона является нахождение минимума функции ошибки (см. функцию выше). Если нам удастся найти такие значения `w` и `b` при которых среднее значение функции ошибки будет маленьким, то это будет означать, что наш нано-нейрон неплохо справляется с предсказаниями температуры в градусах Фаренгейта.
167 | 
168 | Производные — это большая и отдельная тема, которую мы не покроем в этой статье. [MathIsFun](https://www.mathsisfun.com/calculus/derivatives-introduction.html) — отличный ресурс, который может дать базовое понимание производных.
169 | 
170 | Одна вещь, которую мы должны вынести из сути производной и которая поможет нам понять как работает обратное распространение сигнала — это то, что производная функции в определенной точке `x` и `y`, по своему определению, представляет собой касательную линию к кривой этой функции в точке `x` и `y` и *указывает нам на направление к минимуму функции*.
171 | 
172 | ![Derivative slope](https://www.mathsisfun.com/calculus/images/slope-x2-2.svg)
173 | 
174 | _Изображение взято из [MathIsFun](https://www.mathsisfun.com/calculus/derivatives-introduction.html)_
175 | 
176 | Например, на графике выше, вы видите, что в точке `(x=2, y=4)` наклон касательной показывает нам, что нужно двигаться `влево` и `вниз`, чтобы добраться до минимума функции. Также обратите внимание, что чем больше наклон касательной, тем быстрее мы должны двигаться к точке минимума.
177 | 
178 | Производные нашей средней функции ошибки `averageCost` по параметрам `w` и `b` будет выглядеть следующим образом:
179 | 
180 | ![dW](https://github.com/trekhleb/nano-neuron/blob/master/assets/04_dw.png?raw=true)
181 | 
182 | ![dB](https://github.com/trekhleb/nano-neuron/blob/master/assets/04_db.png?raw=true)
183 | 
184 | Где `m` — количество тренировочных экземпляров (в нашем случае у нас `100` пар данных).
185 | 
186 | _Вы можете прочесть более детально о том, как брать производную сложных функций [здесь](https://www.mathsisfun.com/calculus/derivatives-rules.html)._
187 | 
188 | ```javascript
189 | function backwardPropagation(predictions, xTrain, yTrain) {
190 |   const m = xTrain.length;
191 |   // В начале мы не знаем насколько мы должны изменить параметры 'w' и 'b'.
192 |   // Поэтому размер шага пока равен 0.
193 |   let dW = 0;
194 |   let dB = 0;
195 |   for (let i = 0; i < m; i += 1) {
196 |     dW += (yTrain[i] - predictions[i]) * xTrain[i];
197 |     dB += yTrain[i] - predictions[i];
198 |   }
199 |   // Нас интересуют средние значения.
200 |   dW /= m;
201 |   dB /= m;
202 |   return [dW, dB];
203 | }
204 | ```
205 | 
206 | ### Тренировка модели
207 | 
208 | Теперь мы знаем, как оценить погрешность/ошибку предсказаний модели нашего нано-нейрона для всех тренировочных данных (прямое распространение сигнала). Мы так же знаем, как делать корректировку параметров `w` и `b` модели нано-нейрона (обратное распространение сигнала), чтобы улучшить точность предсказаний. Проблема в том, что если мы произведем прямое и обратное распространение сигнала всего один раз, то этого будет недостаточно для нашей модели, чтобы выявить и выучить зависимости и законы в тренировочных данных. Вы можете сравнить это с однодневным посещением школы учеником. Он/она должны ходить в школу регулярно, день за днем, год за годом, чтобы выучить весь материал.
209 | 
210 | Итак, мы должны *повторить* прямое и обратное распространение сигнала много раз. Этим как-раз и занимается функция `trainModel()`. Она будто «учительница» для модели нашего нано-нейрона:
211 | 
212 | - она проведет определенное время (`epochs`) с нашим пока еще глупеньким нано-нейроном, пытаясь обучить его,
213 | - она будет использовать специальные книги (`xTrain` и `yTrain` наборы данных) для обучения,
214 | - она будет стимулировать нашего «ученика» учиться прилежнее (быстрее) используя параметр `alpha`, который по сути регулирует скорость обучения.
215 | 
216 | Пару слов о параметре `alpha`. Это всего-лишь коэффициент (умножитель) для значений переменных `dW` и `dB`, которые мы вычисляем во время обратного распространения сигнала. Итак, производная показала нам направление к минимуму функции ошибки (об этом нам говорят знаки значений `dW` и `dB`). Также производная показала нам как быстро нам надо двигаться в сторону минимума функции (об этом нам говорят абсолютные значения `dW` и `dB`). Теперь нам надо умножить размер шага на `alpha`, чтобы отрегулировать скорость нашего приближения к минимуму (итоговый размер шага). Иногда, если мы будем использовать большие значения для `alpha`, мы можем идти настолько большими шагами, что можем попросту *переступить* минимум функции, тем самым пропустив его.
217 | 
218 | По аналогии с «учительницей», чем сильнее она заставляла бы нашего «нано-ученика» учиться, тем быстрее бы он выучился, НО, если заставлять и давить на него очень сильно, то у нашего «нано-ученика» может случиться нервный срыв и полная апатия и он совсем ничего не выучит.
219 | 
220 | Мы будем обновлять параметры нашей модели `w` и `b` следующим образом:
221 | 
222 | ![w](https://github.com/trekhleb/nano-neuron/blob/master/assets/05_w.png?raw=true)
223 | 
224 | ![b](https://github.com/trekhleb/nano-neuron/blob/master/assets/05_b.png?raw=true)
225 | 
226 | И вот так выглядит сама тренировка:
227 | 
228 | ```javascript
229 | function trainModel({model, epochs, alpha, xTrain, yTrain}) {
230 |   // Это история обучения нашего нано-нейрона. Дневник успеваемости.
231 |   const costHistory = [];
232 | 
233 |   // Начнем считать дни (эпохи) обучения
234 |   for (let epoch = 0; epoch < epochs; epoch += 1) {
235 |     // Прямое распространение сигнала.
236 |     const [predictions, cost] = forwardPropagation(model, xTrain, yTrain);
237 |     costHistory.push(cost);
238 |   
239 |     // Обратное распространение сигнала.
240 |     const [dW, dB] = backwardPropagation(predictions, xTrain, yTrain);
241 |   
242 |     // Корректируем параметры модели нано-нейрона, чтобы улучшить точность предсказаний.
243 |     nanoNeuron.w += alpha * dW;
244 |     nanoNeuron.b += alpha * dB;
245 |   }
246 | 
247 |   return costHistory;
248 | }
249 | ```
250 | 
251 | ### Соберем все функции вместе
252 | 
253 | Время использовать все ранее созданные функции вместе.
254 | 
255 | Создадим экземпляр модели нано-нейрона. На данный момент нано-нейрон не знает ничего о том, какими должны быть параметры `w` и `b`. Так что давайте установим `w` и `b` случайным образом.
256 | 
257 | ```javascript
258 | const w = Math.random(); // i.e. -> 0.9492
259 | const b = Math.random(); // i.e. -> 0.4570
260 | const nanoNeuron = new NanoNeuron(w, b);
261 | ```
262 | 
263 | Генерируем тренировочный и тестовый наборы данных.
264 | 
265 | ```javascript
266 | const [xTrain, yTrain, xTest, yTest] = generateDataSets();
267 | ```
268 | 
269 | Теперь давайте попробуем натренировать нашу модель используя небольшие шаги (`0.0005`) в течение `70000` эпох. Вы можете поэкспериментировать с этими параметрами, они определяются эмпирическим путем.
270 | 
271 | ```javascript
272 | const epochs = 70000;
273 | const alpha = 0.0005;
274 | const trainingCostHistory = trainModel({model: nanoNeuron, epochs, alpha, xTrain, yTrain});
275 | ```
276 | 
277 | Проверим, как изменялось значение ошибки нашей модели во время тренировки. Мы ожидаем, что значение ошибки после тренировки должно быть значительно меньше, чем до тренировки. Это означало бы, что наш нано-нейрон поумнел. Возможен также и противоположный вариант, когда после тренировки, погрешность предсказаний только выросла (например больших значениях шага обучения `alpha`).
278 | 
279 | ```javascript
280 | console.log('Ошибка до тренировки:', trainingCostHistory[0]); // i.e. -> 4694.3335043
281 | console.log('Ошибка после тренировки:', trainingCostHistory[epochs - 1]); // i.e. -> 0.0000024
282 | ```
283 | 
284 | А вот как значение ошибки модели изменялось во время тренировки. По оси `x` находятся эпохи (в тысячах). Ожидаем, что график будет убывающим.
285 | 
286 | ![Training process](https://github.com/trekhleb/nano-neuron/blob/master/assets/06-training-process.png?raw=true)
287 | 
288 | Давайте посмотрим на то, какие параметры «выучил» наш нано-нейрон. Ожидаем, что параметры `w` и `b` будут похожими на одноименные параметры из функции `celsiusToFahrenheit()` (`w = 1.8` и `b = 32`), ведь именно ее наш нано-нейрон пытался имитировать.
289 | 
290 | ```javascript
291 | console.log('Параметры нано-нейрона:', {w: nanoNeuron.w, b: nanoNeuron.b}); // i.e. -> {w: 1.8, b: 31.99}
292 | ```
293 | Как видим, нано-нейрон очень близок к функции `celsiusToFahrenheit()`.
294 | 
295 | А теперь давайте посмотрим насколько точны предсказания нашего нано-нейрона для тестовых данных, которых он не видел во время обучения. Ошибка предсказаний для тестовых данных должна быть близкой к ошибке предсказаний для тренировочных данных. Это будет означать, что нано-нейрон выучил правильные зависимости и может корректно абстрагировать свой опыт и на ранее неизвестные ему данные (в этом ведь вся ценность модели).
296 | 
297 | ```javascript
298 | [testPredictions, testCost] = forwardPropagation(nanoNeuron, xTest, yTest);
299 | console.log('Ошибка на новых данных:', testCost); // i.e. -> 0.0000023
300 | ```
301 | 
302 | Теперь, поскольку наш «нано-малыш» хорошо обучился в «школе» и теперь умеет достаточно точно конвертировать градусы Цельсия в градусы Фаренгейта даже для данных, которых он не видел, мы можем назвать его достаточно «умненьким». Теперь мы даже можем спрашивать у него совета касательно конвертации температуры, а ведь это и была цель всего обучения.
303 | 
304 | ```javascript
305 | const tempInCelsius = 70;
306 | const customPrediction = nanoNeuron.predict(tempInCelsius);
307 | console.log(`Нано-нейрон "думает", что ${tempInCelsius}°C в Фаренгейтах будет:`, customPrediction); // -> 158.0002
308 | console.log('А правильный ответ:', celsiusToFahrenheit(tempInCelsius)); // -> 158
309 | ```
310 | 
311 | Очень близко! Как и люди, наш нано-нейрон — хорош, но не идеален :)
312 | 
313 | Успешного кодинга!
314 | 
315 | ## Как запустить и протестировать нано-нейрон
316 | 
317 | Вы можете клонировать репозиторий и запустить нано-нейрон локально:
318 | 
319 | ```bash
320 | git clone https://github.com/trekhleb/nano-neuron.git
321 | cd nano-neuron
322 | ```
323 | 
324 | ```bash
325 | node ./NanoNeuron.js
326 | ```
327 | 
328 | ## Упущенные концепции
329 | 
330 | Следующие концепции машинного обучения были упущены или упрощены для простоты объяснения.
331 | 
332 | **Разделение тренировочного и тестового наборов данных**
333 | 
334 | Обычно у вас есть один большой набор данных. В зависимости от количества экземпляров в этом наборе его разделение на тренировочный и тестовый наборы может осуществляться в пропорции 70/30. Данные в наборе должны быть случайным образом перемешаны перед разделением. Если же количество данных большое (например, миллионы), то разделение на тестовый и тренировочный наборы может производиться в пропорциях близких к 90/10 или 95/5.  
335 | 
336 | **Сила в сети**
337 | 
338 | Обычно вы не найдете случаев, когда используется всего-лишь один нейрон. Сила именно в [сети](https://en.wikipedia.org/wiki/Neural_network) таких нейронов. Нейронная сеть может выучить гораздо более сложные зависимости.
339 | 
340 | Так же в примере выше, наш нано-нейрон может выглядеть скорее как простая [линейная регрессия](https://en.wikipedia.org/wiki/Linear_regression), чем как нейронная сеть.
341 | 
342 | **Нормализация входных данных**
343 | 
344 | Перед тренировкой принято производить [нормализацию входных данных](https://www.jeremyjordan.me/batch-normalization/).
345 | 
346 | **Векторная имплементация**
347 | 
348 | Для нейронных сетей векторные (матричные) вычисления производятся значительно быстрее, чем вычисления в циклах `for`. Обычно прямое и обратное распространение сигнала производится именно с использованием матричных операций с использованием, например Python библиотеки [Numpy](https://numpy.org/).
349 | 
350 | **Минимум функции ошибки**
351 | 
352 | Функция ошибки, которую мы использовали для нано-нейрона очень упрощена. Она должна содержать [логарифмические компоненты](https://stackoverflow.com/questions/32986123/why-the-cost-function-of-logistic-regression-has-a-logarithmic-expression/32998675). Изменение формулы функции ошибки так же повлечет за собой изменение формул для прямого и обратного распространения сигнала.
353 | 
354 | **Функция активации**
355 | 
356 | Обычно выходное значение нейрона проходит еще через функцию активации. Для активации могут использоваться такие функции, как [Sigmoid](https://en.wikipedia.org/wiki/Sigmoid_function),  [ReLU](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) и прочие.
357 | 


--------------------------------------------------------------------------------
/assets/00-nano-neuron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/00-nano-neuron.png


--------------------------------------------------------------------------------
/assets/01_celsius_to_fahrenheit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/01_celsius_to_fahrenheit.png


--------------------------------------------------------------------------------
/assets/02_cost_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/02_cost_function.png


--------------------------------------------------------------------------------
/assets/03_average_cost_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/03_average_cost_function.png


--------------------------------------------------------------------------------
/assets/04_db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/04_db.png


--------------------------------------------------------------------------------
/assets/04_dw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/04_dw.png


--------------------------------------------------------------------------------
/assets/05_b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/05_b.png


--------------------------------------------------------------------------------
/assets/05_w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/05_w.png


--------------------------------------------------------------------------------
/assets/06-training-process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/06-training-process.png


--------------------------------------------------------------------------------
/assets/07-converter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trekhleb/nano-neuron/378dbb07af84e8daf6e87711e5cb0fdc3c3c23ce/assets/07-converter.png


--------------------------------------------------------------------------------