├── .gitignore
├── README.md
├── activation_layer.py
├── activations.py
├── conv_layer.py
├── example_conv.py
├── example_mnist_conv.py
├── example_mnist_fc.py
├── example_xor.py
├── fc_layer.py
├── flatten_layer.py
├── layer.py
├── losses.py
└── network.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mathematics of Neural Network
 2 | 
 3 | This code is part of my post on **[medium](https://medium.com/@omaraflak/math-neural-network-from-scratch-in-python-d6da9f29ce65)**.
 4 | 
 5 | It shows how to create a neural network **from scratch** in **Python**, going all the way from the **mathamatics** to the code.
 6 | 
 7 | # Run it
 8 | 
 9 | ```shell
10 | python example_xor.py
11 | python example_conv.py
12 | python example_mnist.py
13 | ```
14 | 


--------------------------------------------------------------------------------
/activation_layer.py:
--------------------------------------------------------------------------------
 1 | from layer import Layer
 2 | 
 3 | # inherit from base class Layer
 4 | class ActivationLayer(Layer):
 5 |     def __init__(self, activation, activation_prime):
 6 |         self.activation = activation
 7 |         self.activation_prime = activation_prime
 8 | 
 9 |     # returns the activated input
10 |     def forward_propagation(self, input_data):
11 |         self.input = input_data
12 |         self.output = self.activation(self.input)
13 |         return self.output
14 | 
15 |     # Returns input_error=dE/dX for a given output_error=dE/dY.
16 |     # learning_rate is not used because there is no "learnable" parameters.
17 |     def backward_propagation(self, output_error, learning_rate):
18 |         return self.activation_prime(self.input) * output_error
19 | 


--------------------------------------------------------------------------------
/activations.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | # activation function and its derivative
4 | def tanh(x):
5 |     return np.tanh(x)
6 | 
7 | def tanh_prime(x):
8 |     return 1-np.tanh(x)**2
9 | 


--------------------------------------------------------------------------------
/conv_layer.py:
--------------------------------------------------------------------------------
 1 | from layer import Layer
 2 | from scipy import signal
 3 | import numpy as np
 4 | 
 5 | ## Math behind this layer can found at : 
 6 | ## https://medium.com/@2017csm1006/forward-and-backpropagation-in-convolutional-neural-network-4dfa96d7b37e
 7 | 
 8 | # inherit from base class Layer
 9 | # This convolutional layer is always with stride 1
10 | class ConvLayer(Layer):
11 |     # input_shape = (i,j,d)
12 |     # kernel_shape = (m,n)
13 |     # layer_depth = output_depth
14 |     def __init__(self, input_shape, kernel_shape, layer_depth):
15 |         self.input_shape = input_shape
16 |         self.input_depth = input_shape[2]
17 |         self.kernel_shape = kernel_shape
18 |         self.layer_depth = layer_depth
19 |         self.output_shape = (input_shape[0]-kernel_shape[0]+1, input_shape[1]-kernel_shape[1]+1, layer_depth)
20 |         self.weights = np.random.rand(kernel_shape[0], kernel_shape[1], self.input_depth, layer_depth) - 0.5
21 |         self.bias = np.random.rand(layer_depth) - 0.5
22 | 
23 |     # returns output for a given input
24 |     def forward_propagation(self, input):
25 |         self.input = input
26 |         self.output = np.zeros(self.output_shape)
27 | 
28 |         for k in range(self.layer_depth):
29 |             for d in range(self.input_depth):
30 |                 self.output[:,:,k] += signal.correlate2d(self.input[:,:,d], self.weights[:,:,d,k], 'valid') + self.bias[k]
31 | 
32 |         return self.output
33 | 
34 |     # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
35 |     def backward_propagation(self, output_error, learning_rate):
36 |         in_error = np.zeros(self.input_shape)
37 |         dWeights = np.zeros((self.kernel_shape[0], self.kernel_shape[1], self.input_depth, self.layer_depth))
38 |         dBias = np.zeros(self.layer_depth)
39 | 
40 |         for k in range(self.layer_depth):
41 |             for d in range(self.input_depth):
42 |                 in_error[:,:,d] += signal.convolve2d(output_error[:,:,k], self.weights[:,:,d,k], 'full')
43 |                 dWeights[:,:,d,k] = signal.correlate2d(self.input[:,:,d], output_error[:,:,k], 'valid')
44 |             dBias[k] = self.layer_depth * np.sum(output_error[:,:,k])
45 | 
46 |         self.weights -= learning_rate*dWeights
47 |         self.bias -= learning_rate*dBias
48 |         return in_error
49 | 


--------------------------------------------------------------------------------
/example_conv.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from network import Network
 4 | from conv_layer import ConvLayer
 5 | from activation_layer import ActivationLayer
 6 | from activations import tanh, tanh_prime
 7 | from losses import mse, mse_prime
 8 | 
 9 | # training data
10 | x_train = [np.random.rand(10,10,1)]
11 | y_train = [np.random.rand(4,4,2)]
12 | 
13 | # network
14 | net = Network()
15 | net.add(ConvLayer((10,10,1), (3,3), 1))
16 | net.add(ActivationLayer(tanh, tanh_prime))
17 | net.add(ConvLayer((8,8,1), (3,3), 1))
18 | net.add(ActivationLayer(tanh, tanh_prime))
19 | net.add(ConvLayer((6,6,1), (3,3), 2))
20 | net.add(ActivationLayer(tanh, tanh_prime))
21 | 
22 | # train
23 | net.use(mse, mse_prime)
24 | net.fit(x_train, y_train, epochs=1000, learning_rate=0.3)
25 | 
26 | # test
27 | out = net.predict(x_train)
28 | print("predicted = ", out)
29 | print("expected = ", y_train)
30 | 


--------------------------------------------------------------------------------
/example_mnist_conv.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from network import Network
 4 | from fc_layer import FCLayer
 5 | from conv_layer import ConvLayer
 6 | from flatten_layer import FlattenLayer
 7 | from activation_layer import ActivationLayer
 8 | from activations import tanh, tanh_prime
 9 | from losses import mse, mse_prime
10 | 
11 | from keras.datasets import mnist
12 | from keras.utils import np_utils
13 | 
14 | # load MNIST from server
15 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
16 | 
17 | # training data : 60000 samples
18 | # reshape and normalize input data 
19 | x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
20 | x_train = x_train.astype('float32')
21 | x_train /= 255
22 | # encode output which is a number in range [0,9] into a vector of size 10
23 | # e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
24 | y_train = np_utils.to_categorical(y_train)
25 | 
26 | # same for test data : 10000 samples
27 | x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
28 | x_test = x_test.astype('float32')
29 | x_test /= 255
30 | y_test = np_utils.to_categorical(y_test)
31 | 
32 | # Network
33 | net = Network()
34 | net.add(ConvLayer((28, 28, 1), (3, 3), 1))  # input_shape=(28, 28, 1)   ;   output_shape=(26, 26, 1) 
35 | net.add(ActivationLayer(tanh, tanh_prime))
36 | net.add(FlattenLayer())                     # input_shape=(26, 26, 1)   ;   output_shape=(1, 26*26*1)
37 | net.add(FCLayer(26*26*1, 100))              # input_shape=(1, 26*26*1)  ;   output_shape=(1, 100)
38 | net.add(ActivationLayer(tanh, tanh_prime))
39 | net.add(FCLayer(100, 10))                   # input_shape=(1, 100)      ;   output_shape=(1, 10)
40 | net.add(ActivationLayer(tanh, tanh_prime))
41 | 
42 | # train on 1000 samples
43 | # as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
44 | net.use(mse, mse_prime)
45 | net.fit(x_train[0:1000], y_train[0:1000], epochs=100, learning_rate=0.1)
46 | 
47 | # test on 3 samples
48 | out = net.predict(x_test[0:3])
49 | print("\n")
50 | print("predicted values : ")
51 | print(out, end="\n")
52 | print("true values : ")
53 | print(y_test[0:3])


--------------------------------------------------------------------------------
/example_mnist_fc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from network import Network
 4 | from fc_layer import FCLayer
 5 | from activation_layer import ActivationLayer
 6 | from activations import tanh, tanh_prime
 7 | from losses import mse, mse_prime
 8 | 
 9 | from keras.datasets import mnist
10 | from keras.utils import np_utils
11 | 
12 | # load MNIST from server
13 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
14 | 
15 | # training data : 60000 samples
16 | # reshape and normalize input data
17 | x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
18 | x_train = x_train.astype('float32')
19 | x_train /= 255
20 | # encode output which is a number in range [0,9] into a vector of size 10
21 | # e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
22 | y_train = np_utils.to_categorical(y_train)
23 | 
24 | # same for test data : 10000 samples
25 | x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
26 | x_test = x_test.astype('float32')
27 | x_test /= 255
28 | y_test = np_utils.to_categorical(y_test)
29 | 
30 | # Network
31 | net = Network()
32 | net.add(FCLayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
33 | net.add(ActivationLayer(tanh, tanh_prime))
34 | net.add(FCLayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
35 | net.add(ActivationLayer(tanh, tanh_prime))
36 | net.add(FCLayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
37 | net.add(ActivationLayer(tanh, tanh_prime))
38 | 
39 | # train on 1000 samples
40 | # as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
41 | net.use(mse, mse_prime)
42 | net.fit(x_train[0:1000], y_train[0:1000], epochs=35, learning_rate=0.1)
43 | 
44 | # test on 3 samples
45 | out = net.predict(x_test[0:3])
46 | print("\n")
47 | print("predicted values : ")
48 | print(out, end="\n")
49 | print("true values : ")
50 | print(y_test[0:3])
51 | 


--------------------------------------------------------------------------------
/example_xor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from network import Network
 4 | from fc_layer import FCLayer
 5 | from activation_layer import ActivationLayer
 6 | from activations import tanh, tanh_prime
 7 | from losses import mse, mse_prime
 8 | 
 9 | # training data
10 | x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
11 | y_train = np.array([[[0]], [[1]], [[1]], [[0]]])
12 | 
13 | # network
14 | net = Network()
15 | net.add(FCLayer(2, 3))
16 | net.add(ActivationLayer(tanh, tanh_prime))
17 | net.add(FCLayer(3, 1))
18 | net.add(ActivationLayer(tanh, tanh_prime))
19 | 
20 | # train
21 | net.use(mse, mse_prime)
22 | net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)
23 | 
24 | # test
25 | out = net.predict(x_train)
26 | print(out)
27 | 


--------------------------------------------------------------------------------
/fc_layer.py:
--------------------------------------------------------------------------------
 1 | from layer import Layer
 2 | import numpy as np
 3 | 
 4 | # inherit from base class Layer
 5 | class FCLayer(Layer):
 6 |     # input_size = number of input neurons
 7 |     # output_size = number of output neurons
 8 |     def __init__(self, input_size, output_size):
 9 |         self.weights = np.random.rand(input_size, output_size) - 0.5
10 |         self.bias = np.random.rand(1, output_size) - 0.5
11 | 
12 |     # returns output for a given input
13 |     def forward_propagation(self, input_data):
14 |         self.input = input_data
15 |         self.output = np.dot(self.input, self.weights) + self.bias
16 |         return self.output
17 | 
18 |     # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
19 |     def backward_propagation(self, output_error, learning_rate):
20 |         input_error = np.dot(output_error, self.weights.T)
21 |         weights_error = np.dot(self.input.T, output_error)
22 |         # dBias = output_error
23 | 
24 |         # update parameters
25 |         self.weights -= learning_rate * weights_error
26 |         self.bias -= learning_rate * output_error
27 |         return input_error


--------------------------------------------------------------------------------
/flatten_layer.py:
--------------------------------------------------------------------------------
 1 | from layer import Layer
 2 | 
 3 | # inherit from base class Layer
 4 | class FlattenLayer(Layer):
 5 |     # returns the flattened input
 6 |     def forward_propagation(self, input_data):
 7 |         self.input = input_data
 8 |         self.output = input_data.flatten().reshape((1,-1))
 9 |         return self.output
10 | 
11 |     # Returns input_error=dE/dX for a given output_error=dE/dY.
12 |     # learning_rate is not used because there is no "learnable" parameters.
13 |     def backward_propagation(self, output_error, learning_rate):
14 |         return output_error.reshape(self.input.shape)


--------------------------------------------------------------------------------
/layer.py:
--------------------------------------------------------------------------------
 1 | # Base class
 2 | class Layer:
 3 |     def __init__(self):
 4 |         self.input = None
 5 |         self.output = None
 6 | 
 7 |     # computes the output Y of a layer for a given input X
 8 |     def forward_propagation(self, input):
 9 |         raise NotImplementedError
10 | 
11 |     # computes dE/dX for a given dE/dY (and update parameters if any)
12 |     def backward_propagation(self, output_error, learning_rate):
13 |         raise NotImplementedError
14 | 


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | # loss function and its derivative
4 | def mse(y_true, y_pred):
5 |     return np.mean(np.power(y_true-y_pred, 2))
6 | 
7 | def mse_prime(y_true, y_pred):
8 |     return 2*(y_pred-y_true)/y_true.size
9 | 


--------------------------------------------------------------------------------
/network.py:
--------------------------------------------------------------------------------
 1 | class Network:
 2 |     def __init__(self):
 3 |         self.layers = []
 4 |         self.loss = None
 5 |         self.loss_prime = None
 6 | 
 7 |     # add layer to network
 8 |     def add(self, layer):
 9 |         self.layers.append(layer)
10 | 
11 |     # set loss to use
12 |     def use(self, loss, loss_prime):
13 |         self.loss = loss
14 |         self.loss_prime = loss_prime
15 | 
16 |     # predict output for given input
17 |     def predict(self, input_data):
18 |         # sample dimension first
19 |         samples = len(input_data)
20 |         result = []
21 | 
22 |         # run network over all samples
23 |         for i in range(samples):
24 |             # forward propagation
25 |             output = input_data[i]
26 |             for layer in self.layers:
27 |                 output = layer.forward_propagation(output)
28 |             result.append(output)
29 | 
30 |         return result
31 | 
32 |     # train the network
33 |     def fit(self, x_train, y_train, epochs, learning_rate):
34 |         # sample dimension first
35 |         samples = len(x_train)
36 | 
37 |         # training loop
38 |         for i in range(epochs):
39 |             err = 0
40 |             for j in range(samples):
41 |                 # forward propagation
42 |                 output = x_train[j]
43 |                 for layer in self.layers:
44 |                     output = layer.forward_propagation(output)
45 | 
46 |                 # compute loss (for display purpose only)
47 |                 err += self.loss(y_train[j], output)
48 | 
49 |                 # backward propagation
50 |                 error = self.loss_prime(y_train[j], output)
51 |                 for layer in reversed(self.layers):
52 |                     error = layer.backward_propagation(error, learning_rate)
53 | 
54 |             # calculate average error on all samples
55 |             err /= samples
56 |             print('epoch %d/%d   error=%f' % (i+1, epochs, err))
57 | 


--------------------------------------------------------------------------------