├── .editorconfig ├── README.md └── neural-network.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neural Network with Backpropagation 2 | 3 | A simple Python script showing how the backpropagation algorithm works. 4 | 5 | Checkout this blog post for background: [A Step by Step Backpropagation Example](http://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/). 6 | 7 | # Learn More 8 | 9 | Check out [Emergent Mind](https://www.emergentmind.com/?utm_source=backprop), an AI Research Assistant I've been working on that helps you discover and learn about important new AI/ML research. 10 | 11 | # Contact 12 | 13 | If you have any suggestions, find a bug, or just want to say hey drop me a note at [@mhmazur](https://twitter.com/mhmazur) on Twitter or by email at matthew.h.mazur@gmail.com. 14 | 15 | # License 16 | 17 | MIT © [Matt Mazur](http://mattmazur.com) 18 | -------------------------------------------------------------------------------- /neural-network.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | # 5 | # Shorthand: 6 | # "pd_" as a variable prefix means "partial derivative" 7 | # "d_" as a variable prefix means "derivative" 8 | # "_wrt_" is shorthand for "with respect to" 9 | # "w_ho" and "w_ih" are the index of weights from hidden to output layer neurons and input to hidden layer neurons respectively 10 | # 11 | # Comment references: 12 | # 13 | # [1] Wikipedia article on Backpropagation 14 | # http://en.wikipedia.org/wiki/Backpropagation#Finding_the_derivative_of_the_error 15 | # [2] Neural Networks for Machine Learning course on Coursera by Geoffrey Hinton 16 | # https://class.coursera.org/neuralnets-2012-001/lecture/39 17 | # [3] The Back Propagation Algorithm 18 | # https://www4.rgu.ac.uk/files/chapter3%20-%20bp.pdf 19 | 20 | class NeuralNetwork: 21 | LEARNING_RATE = 0.5 22 | 23 | def __init__(self, num_inputs, num_hidden, num_outputs, hidden_layer_weights = None, hidden_layer_bias = None, output_layer_weights = None, output_layer_bias = None): 24 | self.num_inputs = num_inputs 25 | 26 | self.hidden_layer = NeuronLayer(num_hidden, hidden_layer_bias) 27 | self.output_layer = NeuronLayer(num_outputs, output_layer_bias) 28 | 29 | self.init_weights_from_inputs_to_hidden_layer_neurons(hidden_layer_weights) 30 | self.init_weights_from_hidden_layer_neurons_to_output_layer_neurons(output_layer_weights) 31 | 32 | def init_weights_from_inputs_to_hidden_layer_neurons(self, hidden_layer_weights): 33 | weight_num = 0 34 | for h in range(len(self.hidden_layer.neurons)): 35 | for i in range(self.num_inputs): 36 | if not hidden_layer_weights: 37 | self.hidden_layer.neurons[h].weights.append(random.random()) 38 | else: 39 | self.hidden_layer.neurons[h].weights.append(hidden_layer_weights[weight_num]) 40 | weight_num += 1 41 | 42 | def init_weights_from_hidden_layer_neurons_to_output_layer_neurons(self, output_layer_weights): 43 | weight_num = 0 44 | for o in range(len(self.output_layer.neurons)): 45 | for h in range(len(self.hidden_layer.neurons)): 46 | if not output_layer_weights: 47 | self.output_layer.neurons[o].weights.append(random.random()) 48 | else: 49 | self.output_layer.neurons[o].weights.append(output_layer_weights[weight_num]) 50 | weight_num += 1 51 | 52 | def inspect(self): 53 | print('------') 54 | print('* Inputs: {}'.format(self.num_inputs)) 55 | print('------') 56 | print('Hidden Layer') 57 | self.hidden_layer.inspect() 58 | print('------') 59 | print('* Output Layer') 60 | self.output_layer.inspect() 61 | print('------') 62 | 63 | def feed_forward(self, inputs): 64 | hidden_layer_outputs = self.hidden_layer.feed_forward(inputs) 65 | return self.output_layer.feed_forward(hidden_layer_outputs) 66 | 67 | # Uses online learning, ie updating the weights after each training case 68 | def train(self, training_inputs, training_outputs): 69 | self.feed_forward(training_inputs) 70 | 71 | # 1. Output neuron deltas 72 | pd_errors_wrt_output_neuron_total_net_input = [0] * len(self.output_layer.neurons) 73 | for o in range(len(self.output_layer.neurons)): 74 | 75 | # ∂E/∂zⱼ 76 | pd_errors_wrt_output_neuron_total_net_input[o] = self.output_layer.neurons[o].calculate_pd_error_wrt_total_net_input(training_outputs[o]) 77 | 78 | # 2. Hidden neuron deltas 79 | pd_errors_wrt_hidden_neuron_total_net_input = [0] * len(self.hidden_layer.neurons) 80 | for h in range(len(self.hidden_layer.neurons)): 81 | 82 | # We need to calculate the derivative of the error with respect to the output of each hidden layer neuron 83 | # dE/dyⱼ = Σ ∂E/∂zⱼ * ∂z/∂yⱼ = Σ ∂E/∂zⱼ * wᵢⱼ 84 | d_error_wrt_hidden_neuron_output = 0 85 | for o in range(len(self.output_layer.neurons)): 86 | d_error_wrt_hidden_neuron_output += pd_errors_wrt_output_neuron_total_net_input[o] * self.output_layer.neurons[o].weights[h] 87 | 88 | # ∂E/∂zⱼ = dE/dyⱼ * ∂zⱼ/∂ 89 | pd_errors_wrt_hidden_neuron_total_net_input[h] = d_error_wrt_hidden_neuron_output * self.hidden_layer.neurons[h].calculate_pd_total_net_input_wrt_input() 90 | 91 | # 3. Update output neuron weights 92 | for o in range(len(self.output_layer.neurons)): 93 | for w_ho in range(len(self.output_layer.neurons[o].weights)): 94 | 95 | # ∂Eⱼ/∂wᵢⱼ = ∂E/∂zⱼ * ∂zⱼ/∂wᵢⱼ 96 | pd_error_wrt_weight = pd_errors_wrt_output_neuron_total_net_input[o] * self.output_layer.neurons[o].calculate_pd_total_net_input_wrt_weight(w_ho) 97 | 98 | # Δw = α * ∂Eⱼ/∂wᵢ 99 | self.output_layer.neurons[o].weights[w_ho] -= self.LEARNING_RATE * pd_error_wrt_weight 100 | 101 | # 4. Update hidden neuron weights 102 | for h in range(len(self.hidden_layer.neurons)): 103 | for w_ih in range(len(self.hidden_layer.neurons[h].weights)): 104 | 105 | # ∂Eⱼ/∂wᵢ = ∂E/∂zⱼ * ∂zⱼ/∂wᵢ 106 | pd_error_wrt_weight = pd_errors_wrt_hidden_neuron_total_net_input[h] * self.hidden_layer.neurons[h].calculate_pd_total_net_input_wrt_weight(w_ih) 107 | 108 | # Δw = α * ∂Eⱼ/∂wᵢ 109 | self.hidden_layer.neurons[h].weights[w_ih] -= self.LEARNING_RATE * pd_error_wrt_weight 110 | 111 | def calculate_total_error(self, training_sets): 112 | total_error = 0 113 | for t in range(len(training_sets)): 114 | training_inputs, training_outputs = training_sets[t] 115 | self.feed_forward(training_inputs) 116 | for o in range(len(training_outputs)): 117 | total_error += self.output_layer.neurons[o].calculate_error(training_outputs[o]) 118 | return total_error 119 | 120 | class NeuronLayer: 121 | def __init__(self, num_neurons, bias): 122 | 123 | # Every neuron in a layer shares the same bias 124 | self.bias = bias if bias else random.random() 125 | 126 | self.neurons = [] 127 | for i in range(num_neurons): 128 | self.neurons.append(Neuron(self.bias)) 129 | 130 | def inspect(self): 131 | print('Neurons:', len(self.neurons)) 132 | for n in range(len(self.neurons)): 133 | print(' Neuron', n) 134 | for w in range(len(self.neurons[n].weights)): 135 | print(' Weight:', self.neurons[n].weights[w]) 136 | print(' Bias:', self.bias) 137 | 138 | def feed_forward(self, inputs): 139 | outputs = [] 140 | for neuron in self.neurons: 141 | outputs.append(neuron.calculate_output(inputs)) 142 | return outputs 143 | 144 | def get_outputs(self): 145 | outputs = [] 146 | for neuron in self.neurons: 147 | outputs.append(neuron.output) 148 | return outputs 149 | 150 | class Neuron: 151 | def __init__(self, bias): 152 | self.bias = bias 153 | self.weights = [] 154 | 155 | def calculate_output(self, inputs): 156 | self.inputs = inputs 157 | self.output = self.squash(self.calculate_total_net_input()) 158 | return self.output 159 | 160 | def calculate_total_net_input(self): 161 | total = 0 162 | for i in range(len(self.inputs)): 163 | total += self.inputs[i] * self.weights[i] 164 | return total + self.bias 165 | 166 | # Apply the logistic function to squash the output of the neuron 167 | # The result is sometimes referred to as 'net' [2] or 'net' [1] 168 | def squash(self, total_net_input): 169 | return 1 / (1 + math.exp(-total_net_input)) 170 | 171 | # Determine how much the neuron's total input has to change to move closer to the expected output 172 | # 173 | # Now that we have the partial derivative of the error with respect to the output (∂E/∂yⱼ) and 174 | # the derivative of the output with respect to the total net input (dyⱼ/dzⱼ) we can calculate 175 | # the partial derivative of the error with respect to the total net input. 176 | # This value is also known as the delta (δ) [1] 177 | # δ = ∂E/∂zⱼ = ∂E/∂yⱼ * dyⱼ/dzⱼ 178 | # 179 | def calculate_pd_error_wrt_total_net_input(self, target_output): 180 | return self.calculate_pd_error_wrt_output(target_output) * self.calculate_pd_total_net_input_wrt_input(); 181 | 182 | # The error for each neuron is calculated by the Mean Square Error method: 183 | def calculate_error(self, target_output): 184 | return 0.5 * (target_output - self.output) ** 2 185 | 186 | # The partial derivate of the error with respect to actual output then is calculated by: 187 | # = 2 * 0.5 * (target output - actual output) ^ (2 - 1) * -1 188 | # = -(target output - actual output) 189 | # 190 | # The Wikipedia article on backpropagation [1] simplifies to the following, but most other learning material does not [2] 191 | # = actual output - target output 192 | # 193 | # Alternative, you can use (target - output), but then need to add it during backpropagation [3] 194 | # 195 | # Note that the actual output of the output neuron is often written as yⱼ and target output as tⱼ so: 196 | # = ∂E/∂yⱼ = -(tⱼ - yⱼ) 197 | def calculate_pd_error_wrt_output(self, target_output): 198 | return -(target_output - self.output) 199 | 200 | # The total net input into the neuron is squashed using logistic function to calculate the neuron's output: 201 | # yⱼ = φ = 1 / (1 + e^(-zⱼ)) 202 | # Note that where ⱼ represents the output of the neurons in whatever layer we're looking at and ᵢ represents the layer below it 203 | # 204 | # The derivative (not partial derivative since there is only one variable) of the output then is: 205 | # dyⱼ/dzⱼ = yⱼ * (1 - yⱼ) 206 | def calculate_pd_total_net_input_wrt_input(self): 207 | return self.output * (1 - self.output) 208 | 209 | # The total net input is the weighted sum of all the inputs to the neuron and their respective weights: 210 | # = zⱼ = netⱼ = x₁w₁ + x₂w₂ ... 211 | # 212 | # The partial derivative of the total net input with respective to a given weight (with everything else held constant) then is: 213 | # = ∂zⱼ/∂wᵢ = some constant + 1 * xᵢw₁^(1-0) + some constant ... = xᵢ 214 | def calculate_pd_total_net_input_wrt_weight(self, index): 215 | return self.inputs[index] 216 | 217 | ### 218 | 219 | # Blog post example: 220 | 221 | nn = NeuralNetwork(2, 2, 2, hidden_layer_weights=[0.15, 0.2, 0.25, 0.3], hidden_layer_bias=0.35, output_layer_weights=[0.4, 0.45, 0.5, 0.55], output_layer_bias=0.6) 222 | for i in range(10000): 223 | nn.train([0.05, 0.1], [0.01, 0.99]) 224 | print(i, round(nn.calculate_total_error([[[0.05, 0.1], [0.01, 0.99]]]), 9)) 225 | 226 | # XOR example: 227 | 228 | # training_sets = [ 229 | # [[0, 0], [0]], 230 | # [[0, 1], [1]], 231 | # [[1, 0], [1]], 232 | # [[1, 1], [0]] 233 | # ] 234 | 235 | # nn = NeuralNetwork(len(training_sets[0][0]), 5, len(training_sets[0][1])) 236 | # for i in range(10000): 237 | # training_inputs, training_outputs = random.choice(training_sets) 238 | # nn.train(training_inputs, training_outputs) 239 | # print(i, nn.calculate_total_error(training_sets)) 240 | --------------------------------------------------------------------------------