├── .gitignore ├── README.md ├── activation_layer.py ├── activations.py ├── conv_layer.py ├── example_conv.py ├── example_mnist_conv.py ├── example_mnist_fc.py ├── example_xor.py ├── fc_layer.py ├── flatten_layer.py ├── layer.py ├── losses.py └── network.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mathematics of Neural Network 2 | 3 | This code is part of my post on **[medium](https://medium.com/@omaraflak/math-neural-network-from-scratch-in-python-d6da9f29ce65)**. 4 | 5 | It shows how to create a neural network **from scratch** in **Python**, going all the way from the **mathamatics** to the code. 6 | 7 | # Run it 8 | 9 | ```shell 10 | python example_xor.py 11 | python example_conv.py 12 | python example_mnist.py 13 | ``` 14 | -------------------------------------------------------------------------------- /activation_layer.py: -------------------------------------------------------------------------------- 1 | from layer import Layer 2 | 3 | # inherit from base class Layer 4 | class ActivationLayer(Layer): 5 | def __init__(self, activation, activation_prime): 6 | self.activation = activation 7 | self.activation_prime = activation_prime 8 | 9 | # returns the activated input 10 | def forward_propagation(self, input_data): 11 | self.input = input_data 12 | self.output = self.activation(self.input) 13 | return self.output 14 | 15 | # Returns input_error=dE/dX for a given output_error=dE/dY. 16 | # learning_rate is not used because there is no "learnable" parameters. 17 | def backward_propagation(self, output_error, learning_rate): 18 | return self.activation_prime(self.input) * output_error 19 | -------------------------------------------------------------------------------- /activations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # activation function and its derivative 4 | def tanh(x): 5 | return np.tanh(x) 6 | 7 | def tanh_prime(x): 8 | return 1-np.tanh(x)**2 9 | -------------------------------------------------------------------------------- /conv_layer.py: -------------------------------------------------------------------------------- 1 | from layer import Layer 2 | from scipy import signal 3 | import numpy as np 4 | 5 | ## Math behind this layer can found at : 6 | ## https://medium.com/@2017csm1006/forward-and-backpropagation-in-convolutional-neural-network-4dfa96d7b37e 7 | 8 | # inherit from base class Layer 9 | # This convolutional layer is always with stride 1 10 | class ConvLayer(Layer): 11 | # input_shape = (i,j,d) 12 | # kernel_shape = (m,n) 13 | # layer_depth = output_depth 14 | def __init__(self, input_shape, kernel_shape, layer_depth): 15 | self.input_shape = input_shape 16 | self.input_depth = input_shape[2] 17 | self.kernel_shape = kernel_shape 18 | self.layer_depth = layer_depth 19 | self.output_shape = (input_shape[0]-kernel_shape[0]+1, input_shape[1]-kernel_shape[1]+1, layer_depth) 20 | self.weights = np.random.rand(kernel_shape[0], kernel_shape[1], self.input_depth, layer_depth) - 0.5 21 | self.bias = np.random.rand(layer_depth) - 0.5 22 | 23 | # returns output for a given input 24 | def forward_propagation(self, input): 25 | self.input = input 26 | self.output = np.zeros(self.output_shape) 27 | 28 | for k in range(self.layer_depth): 29 | for d in range(self.input_depth): 30 | self.output[:,:,k] += signal.correlate2d(self.input[:,:,d], self.weights[:,:,d,k], 'valid') + self.bias[k] 31 | 32 | return self.output 33 | 34 | # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX. 35 | def backward_propagation(self, output_error, learning_rate): 36 | in_error = np.zeros(self.input_shape) 37 | dWeights = np.zeros((self.kernel_shape[0], self.kernel_shape[1], self.input_depth, self.layer_depth)) 38 | dBias = np.zeros(self.layer_depth) 39 | 40 | for k in range(self.layer_depth): 41 | for d in range(self.input_depth): 42 | in_error[:,:,d] += signal.convolve2d(output_error[:,:,k], self.weights[:,:,d,k], 'full') 43 | dWeights[:,:,d,k] = signal.correlate2d(self.input[:,:,d], output_error[:,:,k], 'valid') 44 | dBias[k] = self.layer_depth * np.sum(output_error[:,:,k]) 45 | 46 | self.weights -= learning_rate*dWeights 47 | self.bias -= learning_rate*dBias 48 | return in_error 49 | -------------------------------------------------------------------------------- /example_conv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from network import Network 4 | from conv_layer import ConvLayer 5 | from activation_layer import ActivationLayer 6 | from activations import tanh, tanh_prime 7 | from losses import mse, mse_prime 8 | 9 | # training data 10 | x_train = [np.random.rand(10,10,1)] 11 | y_train = [np.random.rand(4,4,2)] 12 | 13 | # network 14 | net = Network() 15 | net.add(ConvLayer((10,10,1), (3,3), 1)) 16 | net.add(ActivationLayer(tanh, tanh_prime)) 17 | net.add(ConvLayer((8,8,1), (3,3), 1)) 18 | net.add(ActivationLayer(tanh, tanh_prime)) 19 | net.add(ConvLayer((6,6,1), (3,3), 2)) 20 | net.add(ActivationLayer(tanh, tanh_prime)) 21 | 22 | # train 23 | net.use(mse, mse_prime) 24 | net.fit(x_train, y_train, epochs=1000, learning_rate=0.3) 25 | 26 | # test 27 | out = net.predict(x_train) 28 | print("predicted = ", out) 29 | print("expected = ", y_train) 30 | -------------------------------------------------------------------------------- /example_mnist_conv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from network import Network 4 | from fc_layer import FCLayer 5 | from conv_layer import ConvLayer 6 | from flatten_layer import FlattenLayer 7 | from activation_layer import ActivationLayer 8 | from activations import tanh, tanh_prime 9 | from losses import mse, mse_prime 10 | 11 | from keras.datasets import mnist 12 | from keras.utils import np_utils 13 | 14 | # load MNIST from server 15 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 16 | 17 | # training data : 60000 samples 18 | # reshape and normalize input data 19 | x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) 20 | x_train = x_train.astype('float32') 21 | x_train /= 255 22 | # encode output which is a number in range [0,9] into a vector of size 10 23 | # e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] 24 | y_train = np_utils.to_categorical(y_train) 25 | 26 | # same for test data : 10000 samples 27 | x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) 28 | x_test = x_test.astype('float32') 29 | x_test /= 255 30 | y_test = np_utils.to_categorical(y_test) 31 | 32 | # Network 33 | net = Network() 34 | net.add(ConvLayer((28, 28, 1), (3, 3), 1)) # input_shape=(28, 28, 1) ; output_shape=(26, 26, 1) 35 | net.add(ActivationLayer(tanh, tanh_prime)) 36 | net.add(FlattenLayer()) # input_shape=(26, 26, 1) ; output_shape=(1, 26*26*1) 37 | net.add(FCLayer(26*26*1, 100)) # input_shape=(1, 26*26*1) ; output_shape=(1, 100) 38 | net.add(ActivationLayer(tanh, tanh_prime)) 39 | net.add(FCLayer(100, 10)) # input_shape=(1, 100) ; output_shape=(1, 10) 40 | net.add(ActivationLayer(tanh, tanh_prime)) 41 | 42 | # train on 1000 samples 43 | # as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples... 44 | net.use(mse, mse_prime) 45 | net.fit(x_train[0:1000], y_train[0:1000], epochs=100, learning_rate=0.1) 46 | 47 | # test on 3 samples 48 | out = net.predict(x_test[0:3]) 49 | print("\n") 50 | print("predicted values : ") 51 | print(out, end="\n") 52 | print("true values : ") 53 | print(y_test[0:3]) -------------------------------------------------------------------------------- /example_mnist_fc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from network import Network 4 | from fc_layer import FCLayer 5 | from activation_layer import ActivationLayer 6 | from activations import tanh, tanh_prime 7 | from losses import mse, mse_prime 8 | 9 | from keras.datasets import mnist 10 | from keras.utils import np_utils 11 | 12 | # load MNIST from server 13 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 14 | 15 | # training data : 60000 samples 16 | # reshape and normalize input data 17 | x_train = x_train.reshape(x_train.shape[0], 1, 28*28) 18 | x_train = x_train.astype('float32') 19 | x_train /= 255 20 | # encode output which is a number in range [0,9] into a vector of size 10 21 | # e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] 22 | y_train = np_utils.to_categorical(y_train) 23 | 24 | # same for test data : 10000 samples 25 | x_test = x_test.reshape(x_test.shape[0], 1, 28*28) 26 | x_test = x_test.astype('float32') 27 | x_test /= 255 28 | y_test = np_utils.to_categorical(y_test) 29 | 30 | # Network 31 | net = Network() 32 | net.add(FCLayer(28*28, 100)) # input_shape=(1, 28*28) ; output_shape=(1, 100) 33 | net.add(ActivationLayer(tanh, tanh_prime)) 34 | net.add(FCLayer(100, 50)) # input_shape=(1, 100) ; output_shape=(1, 50) 35 | net.add(ActivationLayer(tanh, tanh_prime)) 36 | net.add(FCLayer(50, 10)) # input_shape=(1, 50) ; output_shape=(1, 10) 37 | net.add(ActivationLayer(tanh, tanh_prime)) 38 | 39 | # train on 1000 samples 40 | # as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples... 41 | net.use(mse, mse_prime) 42 | net.fit(x_train[0:1000], y_train[0:1000], epochs=35, learning_rate=0.1) 43 | 44 | # test on 3 samples 45 | out = net.predict(x_test[0:3]) 46 | print("\n") 47 | print("predicted values : ") 48 | print(out, end="\n") 49 | print("true values : ") 50 | print(y_test[0:3]) 51 | -------------------------------------------------------------------------------- /example_xor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from network import Network 4 | from fc_layer import FCLayer 5 | from activation_layer import ActivationLayer 6 | from activations import tanh, tanh_prime 7 | from losses import mse, mse_prime 8 | 9 | # training data 10 | x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]]) 11 | y_train = np.array([[[0]], [[1]], [[1]], [[0]]]) 12 | 13 | # network 14 | net = Network() 15 | net.add(FCLayer(2, 3)) 16 | net.add(ActivationLayer(tanh, tanh_prime)) 17 | net.add(FCLayer(3, 1)) 18 | net.add(ActivationLayer(tanh, tanh_prime)) 19 | 20 | # train 21 | net.use(mse, mse_prime) 22 | net.fit(x_train, y_train, epochs=1000, learning_rate=0.1) 23 | 24 | # test 25 | out = net.predict(x_train) 26 | print(out) 27 | -------------------------------------------------------------------------------- /fc_layer.py: -------------------------------------------------------------------------------- 1 | from layer import Layer 2 | import numpy as np 3 | 4 | # inherit from base class Layer 5 | class FCLayer(Layer): 6 | # input_size = number of input neurons 7 | # output_size = number of output neurons 8 | def __init__(self, input_size, output_size): 9 | self.weights = np.random.rand(input_size, output_size) - 0.5 10 | self.bias = np.random.rand(1, output_size) - 0.5 11 | 12 | # returns output for a given input 13 | def forward_propagation(self, input_data): 14 | self.input = input_data 15 | self.output = np.dot(self.input, self.weights) + self.bias 16 | return self.output 17 | 18 | # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX. 19 | def backward_propagation(self, output_error, learning_rate): 20 | input_error = np.dot(output_error, self.weights.T) 21 | weights_error = np.dot(self.input.T, output_error) 22 | # dBias = output_error 23 | 24 | # update parameters 25 | self.weights -= learning_rate * weights_error 26 | self.bias -= learning_rate * output_error 27 | return input_error -------------------------------------------------------------------------------- /flatten_layer.py: -------------------------------------------------------------------------------- 1 | from layer import Layer 2 | 3 | # inherit from base class Layer 4 | class FlattenLayer(Layer): 5 | # returns the flattened input 6 | def forward_propagation(self, input_data): 7 | self.input = input_data 8 | self.output = input_data.flatten().reshape((1,-1)) 9 | return self.output 10 | 11 | # Returns input_error=dE/dX for a given output_error=dE/dY. 12 | # learning_rate is not used because there is no "learnable" parameters. 13 | def backward_propagation(self, output_error, learning_rate): 14 | return output_error.reshape(self.input.shape) -------------------------------------------------------------------------------- /layer.py: -------------------------------------------------------------------------------- 1 | # Base class 2 | class Layer: 3 | def __init__(self): 4 | self.input = None 5 | self.output = None 6 | 7 | # computes the output Y of a layer for a given input X 8 | def forward_propagation(self, input): 9 | raise NotImplementedError 10 | 11 | # computes dE/dX for a given dE/dY (and update parameters if any) 12 | def backward_propagation(self, output_error, learning_rate): 13 | raise NotImplementedError 14 | -------------------------------------------------------------------------------- /losses.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # loss function and its derivative 4 | def mse(y_true, y_pred): 5 | return np.mean(np.power(y_true-y_pred, 2)) 6 | 7 | def mse_prime(y_true, y_pred): 8 | return 2*(y_pred-y_true)/y_true.size 9 | -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | class Network: 2 | def __init__(self): 3 | self.layers = [] 4 | self.loss = None 5 | self.loss_prime = None 6 | 7 | # add layer to network 8 | def add(self, layer): 9 | self.layers.append(layer) 10 | 11 | # set loss to use 12 | def use(self, loss, loss_prime): 13 | self.loss = loss 14 | self.loss_prime = loss_prime 15 | 16 | # predict output for given input 17 | def predict(self, input_data): 18 | # sample dimension first 19 | samples = len(input_data) 20 | result = [] 21 | 22 | # run network over all samples 23 | for i in range(samples): 24 | # forward propagation 25 | output = input_data[i] 26 | for layer in self.layers: 27 | output = layer.forward_propagation(output) 28 | result.append(output) 29 | 30 | return result 31 | 32 | # train the network 33 | def fit(self, x_train, y_train, epochs, learning_rate): 34 | # sample dimension first 35 | samples = len(x_train) 36 | 37 | # training loop 38 | for i in range(epochs): 39 | err = 0 40 | for j in range(samples): 41 | # forward propagation 42 | output = x_train[j] 43 | for layer in self.layers: 44 | output = layer.forward_propagation(output) 45 | 46 | # compute loss (for display purpose only) 47 | err += self.loss(y_train[j], output) 48 | 49 | # backward propagation 50 | error = self.loss_prime(y_train[j], output) 51 | for layer in reversed(self.layers): 52 | error = layer.backward_propagation(error, learning_rate) 53 | 54 | # calculate average error on all samples 55 | err /= samples 56 | print('epoch %d/%d error=%f' % (i+1, epochs, err)) 57 | --------------------------------------------------------------------------------