├── .editorconfig
├── README.md
└── neural-network.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 4
 6 | end_of_line = lf
 7 | charset = utf-8
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | 
11 | [*.md]
12 | trim_trailing_whitespace = false
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Neural Network with Backpropagation
 2 | 
 3 | A simple Python script showing how the backpropagation algorithm works.
 4 | 
 5 | Checkout this blog post for background: [A Step by Step Backpropagation Example](http://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/).
 6 | 
 7 | # Learn More
 8 | 
 9 | Check out [Emergent Mind](https://www.emergentmind.com/?utm_source=backprop), an AI Research Assistant I've been working on that helps you discover and learn about important new AI/ML research.
10 | 
11 | # Contact
12 | 
13 | If you have any suggestions, find a bug, or just want to say hey drop me a note at [@mhmazur](https://twitter.com/mhmazur) on Twitter or by email at matthew.h.mazur@gmail.com.
14 | 
15 | # License
16 | 
17 | MIT © [Matt Mazur](http://mattmazur.com)
18 | 


--------------------------------------------------------------------------------
/neural-network.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import math
  3 | 
  4 | #
  5 | # Shorthand:
  6 | #   "pd_" as a variable prefix means "partial derivative"
  7 | #   "d_" as a variable prefix means "derivative"
  8 | #   "_wrt_" is shorthand for "with respect to"
  9 | #   "w_ho" and "w_ih" are the index of weights from hidden to output layer neurons and input to hidden layer neurons respectively
 10 | #
 11 | # Comment references:
 12 | #
 13 | # [1] Wikipedia article on Backpropagation
 14 | #   http://en.wikipedia.org/wiki/Backpropagation#Finding_the_derivative_of_the_error
 15 | # [2] Neural Networks for Machine Learning course on Coursera by Geoffrey Hinton
 16 | #   https://class.coursera.org/neuralnets-2012-001/lecture/39
 17 | # [3] The Back Propagation Algorithm
 18 | #   https://www4.rgu.ac.uk/files/chapter3%20-%20bp.pdf
 19 | 
 20 | class NeuralNetwork:
 21 |     LEARNING_RATE = 0.5
 22 | 
 23 |     def __init__(self, num_inputs, num_hidden, num_outputs, hidden_layer_weights = None, hidden_layer_bias = None, output_layer_weights = None, output_layer_bias = None):
 24 |         self.num_inputs = num_inputs
 25 | 
 26 |         self.hidden_layer = NeuronLayer(num_hidden, hidden_layer_bias)
 27 |         self.output_layer = NeuronLayer(num_outputs, output_layer_bias)
 28 | 
 29 |         self.init_weights_from_inputs_to_hidden_layer_neurons(hidden_layer_weights)
 30 |         self.init_weights_from_hidden_layer_neurons_to_output_layer_neurons(output_layer_weights)
 31 | 
 32 |     def init_weights_from_inputs_to_hidden_layer_neurons(self, hidden_layer_weights):
 33 |         weight_num = 0
 34 |         for h in range(len(self.hidden_layer.neurons)):
 35 |             for i in range(self.num_inputs):
 36 |                 if not hidden_layer_weights:
 37 |                     self.hidden_layer.neurons[h].weights.append(random.random())
 38 |                 else:
 39 |                     self.hidden_layer.neurons[h].weights.append(hidden_layer_weights[weight_num])
 40 |                 weight_num += 1
 41 | 
 42 |     def init_weights_from_hidden_layer_neurons_to_output_layer_neurons(self, output_layer_weights):
 43 |         weight_num = 0
 44 |         for o in range(len(self.output_layer.neurons)):
 45 |             for h in range(len(self.hidden_layer.neurons)):
 46 |                 if not output_layer_weights:
 47 |                     self.output_layer.neurons[o].weights.append(random.random())
 48 |                 else:
 49 |                     self.output_layer.neurons[o].weights.append(output_layer_weights[weight_num])
 50 |                 weight_num += 1
 51 | 
 52 |     def inspect(self):
 53 |         print('------')
 54 |         print('* Inputs: {}'.format(self.num_inputs))
 55 |         print('------')
 56 |         print('Hidden Layer')
 57 |         self.hidden_layer.inspect()
 58 |         print('------')
 59 |         print('* Output Layer')
 60 |         self.output_layer.inspect()
 61 |         print('------')
 62 | 
 63 |     def feed_forward(self, inputs):
 64 |         hidden_layer_outputs = self.hidden_layer.feed_forward(inputs)
 65 |         return self.output_layer.feed_forward(hidden_layer_outputs)
 66 | 
 67 |     # Uses online learning, ie updating the weights after each training case
 68 |     def train(self, training_inputs, training_outputs):
 69 |         self.feed_forward(training_inputs)
 70 | 
 71 |         # 1. Output neuron deltas
 72 |         pd_errors_wrt_output_neuron_total_net_input = [0] * len(self.output_layer.neurons)
 73 |         for o in range(len(self.output_layer.neurons)):
 74 | 
 75 |             # ∂E/∂zⱼ
 76 |             pd_errors_wrt_output_neuron_total_net_input[o] = self.output_layer.neurons[o].calculate_pd_error_wrt_total_net_input(training_outputs[o])
 77 | 
 78 |         # 2. Hidden neuron deltas
 79 |         pd_errors_wrt_hidden_neuron_total_net_input = [0] * len(self.hidden_layer.neurons)
 80 |         for h in range(len(self.hidden_layer.neurons)):
 81 | 
 82 |             # We need to calculate the derivative of the error with respect to the output of each hidden layer neuron
 83 |             # dE/dyⱼ = Σ ∂E/∂zⱼ * ∂z/∂yⱼ = Σ ∂E/∂zⱼ * wᵢⱼ
 84 |             d_error_wrt_hidden_neuron_output = 0
 85 |             for o in range(len(self.output_layer.neurons)):
 86 |                 d_error_wrt_hidden_neuron_output += pd_errors_wrt_output_neuron_total_net_input[o] * self.output_layer.neurons[o].weights[h]
 87 | 
 88 |             # ∂E/∂zⱼ = dE/dyⱼ * ∂zⱼ/∂
 89 |             pd_errors_wrt_hidden_neuron_total_net_input[h] = d_error_wrt_hidden_neuron_output * self.hidden_layer.neurons[h].calculate_pd_total_net_input_wrt_input()
 90 | 
 91 |         # 3. Update output neuron weights
 92 |         for o in range(len(self.output_layer.neurons)):
 93 |             for w_ho in range(len(self.output_layer.neurons[o].weights)):
 94 | 
 95 |                 # ∂Eⱼ/∂wᵢⱼ = ∂E/∂zⱼ * ∂zⱼ/∂wᵢⱼ
 96 |                 pd_error_wrt_weight = pd_errors_wrt_output_neuron_total_net_input[o] * self.output_layer.neurons[o].calculate_pd_total_net_input_wrt_weight(w_ho)
 97 | 
 98 |                 # Δw = α * ∂Eⱼ/∂wᵢ
 99 |                 self.output_layer.neurons[o].weights[w_ho] -= self.LEARNING_RATE * pd_error_wrt_weight
100 | 
101 |         # 4. Update hidden neuron weights
102 |         for h in range(len(self.hidden_layer.neurons)):
103 |             for w_ih in range(len(self.hidden_layer.neurons[h].weights)):
104 | 
105 |                 # ∂Eⱼ/∂wᵢ = ∂E/∂zⱼ * ∂zⱼ/∂wᵢ
106 |                 pd_error_wrt_weight = pd_errors_wrt_hidden_neuron_total_net_input[h] * self.hidden_layer.neurons[h].calculate_pd_total_net_input_wrt_weight(w_ih)
107 | 
108 |                 # Δw = α * ∂Eⱼ/∂wᵢ
109 |                 self.hidden_layer.neurons[h].weights[w_ih] -= self.LEARNING_RATE * pd_error_wrt_weight
110 | 
111 |     def calculate_total_error(self, training_sets):
112 |         total_error = 0
113 |         for t in range(len(training_sets)):
114 |             training_inputs, training_outputs = training_sets[t]
115 |             self.feed_forward(training_inputs)
116 |             for o in range(len(training_outputs)):
117 |                 total_error += self.output_layer.neurons[o].calculate_error(training_outputs[o])
118 |         return total_error
119 | 
120 | class NeuronLayer:
121 |     def __init__(self, num_neurons, bias):
122 | 
123 |         # Every neuron in a layer shares the same bias
124 |         self.bias = bias if bias else random.random()
125 | 
126 |         self.neurons = []
127 |         for i in range(num_neurons):
128 |             self.neurons.append(Neuron(self.bias))
129 | 
130 |     def inspect(self):
131 |         print('Neurons:', len(self.neurons))
132 |         for n in range(len(self.neurons)):
133 |             print(' Neuron', n)
134 |             for w in range(len(self.neurons[n].weights)):
135 |                 print('  Weight:', self.neurons[n].weights[w])
136 |             print('  Bias:', self.bias)
137 | 
138 |     def feed_forward(self, inputs):
139 |         outputs = []
140 |         for neuron in self.neurons:
141 |             outputs.append(neuron.calculate_output(inputs))
142 |         return outputs
143 | 
144 |     def get_outputs(self):
145 |         outputs = []
146 |         for neuron in self.neurons:
147 |             outputs.append(neuron.output)
148 |         return outputs
149 | 
150 | class Neuron:
151 |     def __init__(self, bias):
152 |         self.bias = bias
153 |         self.weights = []
154 | 
155 |     def calculate_output(self, inputs):
156 |         self.inputs = inputs
157 |         self.output = self.squash(self.calculate_total_net_input())
158 |         return self.output
159 | 
160 |     def calculate_total_net_input(self):
161 |         total = 0
162 |         for i in range(len(self.inputs)):
163 |             total += self.inputs[i] * self.weights[i]
164 |         return total + self.bias
165 | 
166 |     # Apply the logistic function to squash the output of the neuron
167 |     # The result is sometimes referred to as 'net' [2] or 'net' [1]
168 |     def squash(self, total_net_input):
169 |         return 1 / (1 + math.exp(-total_net_input))
170 | 
171 |     # Determine how much the neuron's total input has to change to move closer to the expected output
172 |     #
173 |     # Now that we have the partial derivative of the error with respect to the output (∂E/∂yⱼ) and
174 |     # the derivative of the output with respect to the total net input (dyⱼ/dzⱼ) we can calculate
175 |     # the partial derivative of the error with respect to the total net input.
176 |     # This value is also known as the delta (δ) [1]
177 |     # δ = ∂E/∂zⱼ = ∂E/∂yⱼ * dyⱼ/dzⱼ
178 |     #
179 |     def calculate_pd_error_wrt_total_net_input(self, target_output):
180 |         return self.calculate_pd_error_wrt_output(target_output) * self.calculate_pd_total_net_input_wrt_input();
181 | 
182 |     # The error for each neuron is calculated by the Mean Square Error method:
183 |     def calculate_error(self, target_output):
184 |         return 0.5 * (target_output - self.output) ** 2
185 | 
186 |     # The partial derivate of the error with respect to actual output then is calculated by:
187 |     # = 2 * 0.5 * (target output - actual output) ^ (2 - 1) * -1
188 |     # = -(target output - actual output)
189 |     #
190 |     # The Wikipedia article on backpropagation [1] simplifies to the following, but most other learning material does not [2]
191 |     # = actual output - target output
192 |     #
193 |     # Alternative, you can use (target - output), but then need to add it during backpropagation [3]
194 |     #
195 |     # Note that the actual output of the output neuron is often written as yⱼ and target output as tⱼ so:
196 |     # = ∂E/∂yⱼ = -(tⱼ - yⱼ)
197 |     def calculate_pd_error_wrt_output(self, target_output):
198 |         return -(target_output - self.output)
199 | 
200 |     # The total net input into the neuron is squashed using logistic function to calculate the neuron's output:
201 |     # yⱼ = φ = 1 / (1 + e^(-zⱼ))
202 |     # Note that where ⱼ represents the output of the neurons in whatever layer we're looking at and ᵢ represents the layer below it
203 |     #
204 |     # The derivative (not partial derivative since there is only one variable) of the output then is:
205 |     # dyⱼ/dzⱼ = yⱼ * (1 - yⱼ)
206 |     def calculate_pd_total_net_input_wrt_input(self):
207 |         return self.output * (1 - self.output)
208 | 
209 |     # The total net input is the weighted sum of all the inputs to the neuron and their respective weights:
210 |     # = zⱼ = netⱼ = x₁w₁ + x₂w₂ ...
211 |     #
212 |     # The partial derivative of the total net input with respective to a given weight (with everything else held constant) then is:
213 |     # = ∂zⱼ/∂wᵢ = some constant + 1 * xᵢw₁^(1-0) + some constant ... = xᵢ
214 |     def calculate_pd_total_net_input_wrt_weight(self, index):
215 |         return self.inputs[index]
216 | 
217 | ###
218 | 
219 | # Blog post example:
220 | 
221 | nn = NeuralNetwork(2, 2, 2, hidden_layer_weights=[0.15, 0.2, 0.25, 0.3], hidden_layer_bias=0.35, output_layer_weights=[0.4, 0.45, 0.5, 0.55], output_layer_bias=0.6)
222 | for i in range(10000):
223 |     nn.train([0.05, 0.1], [0.01, 0.99])
224 |     print(i, round(nn.calculate_total_error([[[0.05, 0.1], [0.01, 0.99]]]), 9))
225 | 
226 | # XOR example:
227 | 
228 | # training_sets = [
229 | #     [[0, 0], [0]],
230 | #     [[0, 1], [1]],
231 | #     [[1, 0], [1]],
232 | #     [[1, 1], [0]]
233 | # ]
234 | 
235 | # nn = NeuralNetwork(len(training_sets[0][0]), 5, len(training_sets[0][1]))
236 | # for i in range(10000):
237 | #     training_inputs, training_outputs = random.choice(training_sets)
238 | #     nn.train(training_inputs, training_outputs)
239 | #     print(i, nn.calculate_total_error(training_sets))
240 | 


--------------------------------------------------------------------------------