├── .gitignore
├── LICENSE
├── README.md
├── mnist.py
├── mnist_cnn.py
├── src
    ├── __init__.py
    ├── activation.py
    ├── cost.py
    ├── layers
    │   ├── __init__.py
    │   ├── conv.py
    │   ├── dropout.py
    │   ├── fc.py
    │   ├── flatten.py
    │   ├── layer.py
    │   └── pool.py
    ├── nn.py
    └── optimizer.py
└── test
    ├── __init__.py
    ├── test_cost.py
    ├── test_layers
        ├── __init__.py
        ├── test_conv.py
        ├── test_fc.py
        ├── test_flatten.py
        └── test_pool.py
    ├── test_nn.py
    └── utils
        ├── __init__.py
        └── grad_check.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .idea
3 | __pycache__
4 | *.pyc
5 | *.ipynb_checkpoints
6 | *.pkl
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Lorenzo Pratissoli
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NumpyCNN
 2 | A simple vectorized implementation of a Convolutional Neural Network in plain numpy I wrote while learning about neural networks, aaaand more.   
 3 | 
 4 | ##### Example
 5 | 
 6 | ```python
 7 | # ... some imports here ...
 8 | mnist.init()
 9 | x_train, y_train, x_test, y_test = preprocess(*mnist.load())
10 |     
11 | cnn = NeuralNetwork(
12 |     input_dim=(28, 28, 1),
13 |     layers=[
14 |         Conv(5, 1, 32, activation=relu),
15 |         Pool(2, 2, 'max'),
16 |         Dropout(0.75),
17 |         Flatten(),
18 |         FullyConnected(128, relu),
19 |         Dropout(0.9),
20 |         FullyConnected(10, softmax),
21 |     ],
22 |     cost_function=softmax_cross_entropy,
23 |     optimizer=adam
24 | )
25 |     
26 | cnn.train(x_train, y_train,
27 |           mini_batch_size=256,
28 |           learning_rate=0.001,
29 |           num_epochs=30,
30 |           validation_data=(x_test, y_test))
31 | ```
32 | 
33 | 
34 | In mnist_cnn.py there is a complete example with a simple model I used to get 99.06% accuracy on the mnist test dataset.
35 | 
36 | ## You can find an implementation of: 
37 | #### Gradient Checking
38 | To check the correctness of derivatives during backpropagation as explained [here](http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)  
39 | There are examples of its usage in the tests.
40 | 
41 | #### Layers
42 | - FullyConnected (Dense)
43 | - Conv (Conv2D)
44 | - Pool (MaxPool2D, AveragePool2D)
45 | - Dropout
46 | - Flatten
47 | 
48 | #### Optimizers
49 | - Gradient Descent
50 | - RMSProp
51 | - Adam


--------------------------------------------------------------------------------
/mnist.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Slightly modified version of https://github.com/hsjeong5/MNIST-for-Numpy
 3 | """
 4 | 
 5 | import gzip
 6 | import os
 7 | import pickle
 8 | from urllib import request
 9 | 
10 | import numpy as np
11 | 
12 | filename = [
13 |     ["training_images", "train-images-idx3-ubyte.gz"],
14 |     ["test_images", "t10k-images-idx3-ubyte.gz"],
15 |     ["training_labels", "train-labels-idx1-ubyte.gz"],
16 |     ["test_labels", "t10k-labels-idx1-ubyte.gz"]
17 | ]
18 | 
19 | 
20 | def download_mnist():
21 |     base_url = "http://yann.lecun.com/exdb/mnist/"
22 |     for name in filename:
23 |         print("Downloading " + name[1] + "...")
24 |         request.urlretrieve(base_url + name[1], name[1])
25 |     print("Download complete.")
26 | 
27 | 
28 | def save_mnist():
29 |     mnist = {}
30 |     for name in filename[:2]:
31 |         with gzip.open(name[1], 'rb') as f:
32 |             mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28 * 28)
33 |     for name in filename[-2:]:
34 |         with gzip.open(name[1], 'rb') as f:
35 |             mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
36 | 
37 |     for _, gz_file in filename:
38 |         os.remove(gz_file)
39 | 
40 |     with open("mnist.pkl", 'wb') as f:
41 |         pickle.dump(mnist, f)
42 |     print("Save complete.")
43 | 
44 | 
45 | def init():
46 |     if not os.path.isfile("mnist.pkl"):
47 |         download_mnist()
48 |         save_mnist()
49 |     else:
50 |         print("Dataset already downloaded, delete mnist.pkl if you want to re-download.")
51 | 
52 | 
53 | def load():
54 |     with open("mnist.pkl", 'rb') as f:
55 |         mnist = pickle.load(f)
56 |     return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]
57 | 


--------------------------------------------------------------------------------
/mnist_cnn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import mnist
 4 | from src.activation import relu, softmax
 5 | from src.cost import softmax_cross_entropy
 6 | from src.layers.conv import Conv
 7 | from src.layers.dropout import Dropout
 8 | from src.layers.fc import FullyConnected
 9 | from src.layers.flatten import Flatten
10 | from src.layers.pool import Pool
11 | from src.nn import NeuralNetwork
12 | from src.optimizer import adam
13 | 
14 | 
15 | def one_hot(x, num_classes=10):
16 |     out = np.zeros((x.shape[0], num_classes))
17 |     out[np.arange(x.shape[0]), x[:, 0]] = 1
18 |     return out
19 | 
20 | 
21 | def preprocess(x_train, y_train, x_test, y_test):
22 |     x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype(np.float32)
23 |     x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype(np.float32)
24 |     y_train = one_hot(y_train.reshape(y_train.shape[0], 1))
25 |     x_train /= 255
26 |     x_test /= 255
27 |     return x_train, y_train, x_test, y_test
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     mnist.init()
32 |     x_train, y_train, x_test, y_test = preprocess(*mnist.load())
33 | 
34 |     cnn = NeuralNetwork(
35 |         input_dim=(28, 28, 1),
36 |         layers=[
37 |             Conv(5, 1, 32, activation=relu),
38 |             Pool(2, 2, 'max'),
39 |             Dropout(0.75),
40 |             Flatten(),
41 |             FullyConnected(128, relu),
42 |             Dropout(0.9),
43 |             FullyConnected(10, softmax),
44 |         ],
45 |         cost_function=softmax_cross_entropy,
46 |         optimizer=adam
47 |     )
48 | 
49 |     cnn.train(x_train, y_train,
50 |               mini_batch_size=256,
51 |               learning_rate=0.001,
52 |               num_epochs=30,
53 |               validation_data=(x_test, y_test))
54 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lpraat/numpyCNN/368d5f2f11ecbbad638813b8adfa1527e0412461/src/__init__.py


--------------------------------------------------------------------------------
/src/activation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class ActivationFunction:
 5 |     def f(self, x):
 6 |         raise NotImplementedError
 7 | 
 8 |     def df(self, x, cached_y=None):
 9 |         raise NotImplementedError
10 | 
11 | 
12 | class Identity(ActivationFunction):
13 |     def f(self, x):
14 |         return x
15 | 
16 |     def df(self, x, cached_y=None):
17 |         return np.full(x.shape, 1)
18 | 
19 | 
20 | class Sigmoid(ActivationFunction):
21 |     def f(self, x):
22 |         return np.where(x >= 0, 1 / (1 + np.exp(-x)), np.exp(x) / (1 + np.exp(x)))
23 | 
24 |     def df(self, x, cached_y=None):
25 |         y = cached_y if cached_y is not None else self.f(x)
26 |         return y * (1 - y)
27 | 
28 | 
29 | class ReLU(ActivationFunction):
30 |     def f(self, x):
31 |         return np.maximum(0, x)
32 | 
33 |     def df(self, x, cached_y=None):
34 |         return np.where(x <= 0, 0, 1)
35 | 
36 | 
37 | class SoftMax(ActivationFunction):
38 |     def f(self, x):
39 |         y = np.exp(x - np.max(x, axis=1, keepdims=True))
40 |         return y / np.sum(y, axis=1, keepdims=True)
41 | 
42 |     def df(self, x, cached_y=None):
43 |         raise NotImplementedError
44 | 
45 | 
46 | identity = Identity()
47 | sigmoid = Sigmoid()
48 | relu = ReLU()
49 | softmax = SoftMax()
50 | 


--------------------------------------------------------------------------------
/src/cost.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | epsilon = 1e-20
 4 | 
 5 | 
 6 | class CostFunction:
 7 |     def f(self, a_last, y):
 8 |         raise NotImplementedError
 9 | 
10 |     def grad(self, a_last, y):
11 |         raise NotImplementedError
12 | 
13 | 
14 | class SigmoidCrossEntropy(CostFunction):
15 |     def f(self, a_last, y):
16 |         batch_size = y.shape[0]
17 |         # It would be better to have the logits and use this instead
18 |         # max(logits, 0) - logits * y + log(1 + exp(-abs(logits)))
19 |         a_last = np.clip(a_last, epsilon, 1.0 - epsilon)
20 |         cost = -1 / batch_size * (y * np.log(a_last) + (1 - y) * np.log(1 - a_last)).sum()
21 |         return cost
22 | 
23 |     def grad(self, a_last, y):
24 |         a_last = np.clip(a_last, epsilon, 1.0 - epsilon)
25 |         return - (np.divide(y, a_last) - np.divide(1 - y, 1 - a_last))
26 | 
27 | 
28 | class SoftmaxCrossEntropy(CostFunction):
29 |     def f(self, a_last, y):
30 |         batch_size = y.shape[0]
31 |         cost = -1 / batch_size * (y * np.log(np.clip(a_last, epsilon, 1.0))).sum()
32 |         return cost
33 | 
34 |     def grad(self, a_last, y):
35 |         return - np.divide(y, np.clip(a_last, epsilon, 1.0))
36 | 
37 | 
38 | softmax_cross_entropy = SoftmaxCrossEntropy()
39 | sigmoid_cross_entropy = SigmoidCrossEntropy()
40 | 


--------------------------------------------------------------------------------
/src/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lpraat/numpyCNN/368d5f2f11ecbbad638813b8adfa1527e0412461/src/layers/__init__.py


--------------------------------------------------------------------------------
/src/layers/conv.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from src.activation import identity
  4 | from src.layers.layer import Layer
  5 | 
  6 | 
  7 | class Conv(Layer):
  8 |     """2D convolutional layer.
  9 | 
 10 |     Attributes
 11 |     ----------
 12 |     kernel_size : int
 13 |         Height and Width of the 2D convolution window.
 14 |     stride : int
 15 |         Stride along height and width of the input volume on which the convolution is applied.
 16 |     padding: str
 17 |         Padding mode, 'valid' or 'same'.
 18 |     pad : int
 19 |         Padding size.
 20 |     n_h : int
 21 |         Height of the output volume.
 22 |     n_w : int
 23 |         Width of the output volume.
 24 |     n_c : int
 25 |         Number of channels of the output volume. Corresponds to the number of filters.
 26 |     n_h_prev : int
 27 |         Height of the input volume.
 28 |     n_w_prev : int
 29 |         Width of the input volume.
 30 |     n_c_prev : int
 31 |         Number of channels of the input volume.
 32 |     w : numpy.ndarray
 33 |         Weights.
 34 |     b : numpy.ndarray
 35 |         Biases.
 36 |     activation : Activation
 37 |         Activation function applied to the output volume after performing the convolution operation.
 38 |     cache : dict
 39 |         Cache.
 40 |     """
 41 |     def __init__(self, kernel_size, stride, n_c, padding='valid', activation=identity):
 42 |         super().__init__()
 43 |         self.kernel_size = kernel_size
 44 |         self.stride = stride
 45 |         self.padding = padding
 46 |         self.pad = None
 47 |         self.n_h, self.n_w, self.n_c = None, None, n_c
 48 |         self.n_h_prev, self.n_w_prev, self.n_c_prev = None, None, None
 49 |         self.w = None
 50 |         self.b = None
 51 |         self.activation = activation
 52 |         self.cache = {}
 53 | 
 54 |     def init(self, in_dim):
 55 |         self.pad = 0 if self.padding == 'valid' else int((self.kernel_size - 1) / 2)
 56 | 
 57 |         self.n_h_prev, self.n_w_prev, self.n_c_prev = in_dim
 58 |         self.n_h = int((self.n_h_prev - self.kernel_size + 2 * self.pad) / self.stride + 1)
 59 |         self.n_w = int((self.n_w_prev - self.kernel_size + 2 * self.pad) / self.stride + 1)
 60 | 
 61 |         self.w = np.random.randn(self.kernel_size, self.kernel_size, self.n_c_prev, self.n_c)
 62 |         self.b = np.zeros((1, 1, 1, self.n_c))
 63 | 
 64 |     def forward(self, a_prev, training):
 65 |         batch_size = a_prev.shape[0]
 66 |         a_prev_padded = Conv.zero_pad(a_prev, self.pad)
 67 |         out = np.zeros((batch_size, self.n_h, self.n_w, self.n_c))
 68 | 
 69 |         # Convolve
 70 |         for i in range(self.n_h):
 71 |             v_start = i * self.stride
 72 |             v_end = v_start + self.kernel_size
 73 | 
 74 |             for j in range(self.n_w):
 75 |                 h_start = j * self.stride
 76 |                 h_end = h_start + self.kernel_size
 77 | 
 78 |                 out[:, i, j, :] = np.sum(a_prev_padded[:, v_start:v_end, h_start:h_end, :, np.newaxis] *
 79 |                                          self.w[np.newaxis, :, :, :], axis=(1, 2, 3))
 80 | 
 81 |         z = out + self.b
 82 |         a = self.activation.f(z)
 83 | 
 84 |         if training:
 85 |             # Cache for backward pass
 86 |             self.cache.update({'a_prev': a_prev, 'z': z, 'a': a})
 87 | 
 88 |         return a
 89 | 
 90 |     def backward(self, da):
 91 |         batch_size = da.shape[0]
 92 |         a_prev, z, a = (self.cache[key] for key in ('a_prev', 'z', 'a'))
 93 |         a_prev_pad = Conv.zero_pad(a_prev, self.pad) if self.pad != 0 else a_prev
 94 | 
 95 |         da_prev = np.zeros((batch_size, self.n_h_prev, self.n_w_prev, self.n_c_prev))
 96 |         da_prev_pad = Conv.zero_pad(da_prev, self.pad) if self.pad != 0 else da_prev
 97 | 
 98 |         dz = da * self.activation.df(z, cached_y=a)
 99 |         db = 1 / batch_size * dz.sum(axis=(0, 1, 2))
100 |         dw = np.zeros((self.kernel_size, self.kernel_size, self.n_c_prev, self.n_c))
101 | 
102 |         # 'Convolve' back
103 |         for i in range(self.n_h):
104 |             v_start = self.stride * i
105 |             v_end = v_start + self.kernel_size
106 | 
107 |             for j in range(self.n_w):
108 |                 h_start = self.stride * j
109 |                 h_end = h_start + self.kernel_size
110 | 
111 |                 da_prev_pad[:, v_start:v_end, h_start:h_end, :] += \
112 |                     np.sum(self.w[np.newaxis, :, :, :, :] * dz[:, i:i+1, j:j+1, np.newaxis, :], axis=4)
113 | 
114 |                 dw += np.sum(a_prev_pad[:, v_start:v_end, h_start:h_end, :, np.newaxis] *
115 |                              dz[:, i:i+1, j:j+1, np.newaxis, :], axis=0)
116 | 
117 |         dw /= batch_size
118 | 
119 |         if self.pad != 0:
120 |             da_prev = da_prev_pad[:, self.pad:-self.pad, self.pad:-self.pad, :]
121 | 
122 |         return da_prev, dw, db
123 | 
124 |     def get_output_dim(self):
125 |         return self.n_h, self.n_w, self.n_c
126 | 
127 |     def update_params(self, dw, db):
128 |         self.w -= dw
129 |         self.b -= db
130 | 
131 |     def get_params(self):
132 |         return self.w, self.b
133 | 
134 |     @staticmethod
135 |     def zero_pad(x, pad):
136 |         return np.pad(x, ((0, 0), (pad, pad), (pad, pad), (0, 0)), mode='constant')
137 | 


--------------------------------------------------------------------------------
/src/layers/dropout.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from src.layers.layer import Layer
 4 | 
 5 | 
 6 | class Dropout(Layer):
 7 |     """Dropout layer.
 8 | 
 9 |     Attributes
10 |     ----------
11 |     keep_prob : float
12 |         Probability that a neuron is kept.
13 |     mask_dim : tuple
14 |         Shape of the input ndarray.
15 |     cached_mask : numpy.ndarray
16 |         Mask representing kept/dropped neurons.
17 |     """
18 |     def __init__(self, keep_prob):
19 |         super().__init__()
20 |         assert 0 < keep_prob < 1, "Keep probability must be between 0 and 1"
21 |         self.keep_prob = keep_prob
22 |         self.mask_dim = None
23 |         self.cached_mask = None
24 | 
25 |     def init(self, in_dim):
26 |         self.mask_dim = in_dim
27 | 
28 |     def forward(self, a_prev, training):
29 |         if training:
30 |             mask = (np.random.rand(*a_prev.shape) < self.keep_prob)
31 |             a = self.inverted_dropout(a_prev, mask)
32 | 
33 |             # Cache for backward pass
34 |             self.cached_mask = mask
35 | 
36 |             return a
37 | 
38 |         return a_prev
39 | 
40 |     def backward(self, da):
41 |         return self.inverted_dropout(da, self.cached_mask), None, None
42 | 
43 |     def update_params(self, dw, db):
44 |         pass
45 | 
46 |     def get_params(self):
47 |         pass
48 | 
49 |     def get_output_dim(self):
50 |         return self.mask_dim
51 | 
52 |     def inverted_dropout(self, a, mask):
53 |         a *= mask
54 |         a /= self.keep_prob
55 |         return a
56 | 
57 | 


--------------------------------------------------------------------------------
/src/layers/fc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from src.activation import SoftMax
 4 | from src.layers.layer import Layer
 5 | 
 6 | 
 7 | class FullyConnected(Layer):
 8 |     """Densely connected layer.
 9 | 
10 |     Attributes
11 |     ----------
12 |     size : int
13 |         Number of neurons.
14 |     activation : Activation
15 |         Neurons' activation's function.
16 |     is_softmax : bool
17 |         Whether or not the activation is softmax.
18 |     cache : dict
19 |         Cache.
20 |     w : numpy.ndarray
21 |         Weights.
22 |     b : numpy.ndarray
23 |         Biases.
24 |     """
25 |     def __init__(self, size, activation):
26 |         super().__init__()
27 |         self.size = size
28 |         self.activation = activation
29 |         self.is_softmax = isinstance(self.activation, SoftMax)
30 |         self.cache = {}
31 |         self.w = None
32 |         self.b = None
33 | 
34 |     def init(self, in_dim):
35 |         # He initialization
36 |         self.w = np.random.randn(self.size, in_dim) * np.sqrt(2 / in_dim)
37 | 
38 |         self.b = np.zeros((1, self.size))
39 | 
40 |     def forward(self, a_prev, training):
41 |         z = np.dot(a_prev, self.w.T) + self.b
42 |         a = self.activation.f(z)
43 | 
44 |         if training:
45 |             # Cache for backward pass
46 |             self.cache.update({'a_prev': a_prev, 'z': z, 'a': a})
47 | 
48 |         return a
49 | 
50 |     def backward(self, da):
51 |         a_prev, z, a = (self.cache[key] for key in ('a_prev', 'z', 'a'))
52 |         batch_size = a_prev.shape[0]
53 | 
54 |         if self.is_softmax:
55 |             # Get back y from the gradient wrt the cost of this layer's activations
56 |             # That is get back y from - y/a = da
57 |             y = da * (-a)
58 | 
59 |             dz = a - y
60 |         else:
61 |             dz = da * self.activation.df(z, cached_y=a)
62 | 
63 |         dw = 1 / batch_size * np.dot(dz.T, a_prev)
64 |         db = 1 / batch_size * dz.sum(axis=0, keepdims=True)
65 |         da_prev = np.dot(dz, self.w)
66 | 
67 |         return da_prev, dw, db
68 | 
69 |     def update_params(self, dw, db):
70 |         self.w -= dw
71 |         self.b -= db
72 | 
73 |     def get_params(self):
74 |         return self.w, self.b
75 | 
76 |     def get_output_dim(self):
77 |         return self.size
78 | 


--------------------------------------------------------------------------------
/src/layers/flatten.py:
--------------------------------------------------------------------------------
 1 | from functools import reduce
 2 | 
 3 | from src.layers.layer import Layer
 4 | 
 5 | 
 6 | class Flatten(Layer):
 7 |     """Flatten layer.
 8 | 
 9 |     Attributes
10 |     ----------
11 |     original_dim : tuple
12 |         Shape of the input ndarray.
13 |     output_dim : tuple
14 |         Shape of the output ndarray.
15 |     """
16 |     def __init__(self):
17 |         super().__init__()
18 |         self.original_dim = None
19 |         self.output_dim = None
20 | 
21 |     def init(self, in_dim):
22 |         self.original_dim = in_dim
23 |         self.output_dim = reduce(lambda x, y: x * y, self.original_dim)
24 | 
25 |     def forward(self, a_prev, training):
26 |         return a_prev.reshape(a_prev.shape[0], -1)
27 | 
28 |     def backward(self, da):
29 |         return da.reshape(da.shape[0], *self.original_dim), None, None
30 | 
31 |     def get_params(self):
32 |         pass
33 | 
34 |     def update_params(self, dw, db):
35 |         pass
36 | 
37 |     def get_output_dim(self):
38 |         return self.output_dim
39 | 
40 | 


--------------------------------------------------------------------------------
/src/layers/layer.py:
--------------------------------------------------------------------------------
 1 | class Layer:
 2 | 
 3 |     def init(self, in_dim):
 4 |         """
 5 |         Initializes the layer.
 6 | 
 7 |         Parameters
 8 |         ----------
 9 |         in_dim : int or tuple
10 |             Shape of the input data.
11 |         """
12 |         raise NotImplementedError
13 | 
14 |     def forward(self, a_prev, training):
15 |         """
16 |         Propagates forward the activations.
17 | 
18 |         Parameters
19 |         ----------
20 |         a_prev : numpy.ndarray
21 |             The input to this layer which corresponds to the previous layer's activations.
22 |         training : bool
23 |             Whether the model in which this layer is in is training.
24 | 
25 |         Returns
26 |         -------
27 |         numpy.ndarray
28 |             The activations(output) of this layer.
29 |         """
30 |         raise NotImplementedError
31 | 
32 |     def backward(self, da):
33 |         """
34 |         Propagates back the gradients.
35 | 
36 |         Parameters
37 |         ----------
38 |         da : numpy.ndarray
39 |             The gradients wrt the cost of this layer activations.
40 | 
41 |         Returns
42 |         -------
43 |         tuple
44 |             Triplet with gradients wrt the cost of: previous layer's activations, weights and biases of this layer.
45 |         """
46 |         raise NotImplementedError
47 | 
48 |     def update_params(self, dw, db):
49 |         """
50 |         Updates parameters given their gradients.
51 | 
52 |         Parameters
53 |         ----------
54 |         dw : numpy.ndarray
55 |             The gradients wrt the cost of this layer's weights.
56 |         db : numpy.ndarray
57 |             The gradients wrt the cost of this layer's biases.
58 |         """
59 |         raise NotImplementedError
60 | 
61 |     def get_params(self):
62 |         """
63 |         Returns
64 |         -------
65 |         tuple
66 |             Trainable parameters(weights and biases) of this layer.
67 |         """
68 |         raise NotImplementedError
69 | 
70 |     def get_output_dim(self):
71 |         """
72 |         Returns
73 |         -------
74 |         tuple
75 |             Shape of the ndarray layer's output.
76 |         """
77 |         raise NotImplementedError
78 | 


--------------------------------------------------------------------------------
/src/layers/pool.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from src.layers.layer import Layer
  4 | 
  5 | 
  6 | class Pool(Layer):
  7 |     """2D pooling layer.
  8 | 
  9 |     Supports both max and average pooling.
 10 | 
 11 |     Attributes
 12 |     ----------
 13 |     pool_size : int
 14 |         Height and Width of the 2D pooling window.
 15 |     stride : int
 16 |         Stride along height and width of the input volume on which the pooling operation is applied.
 17 |     n_h : int
 18 |         Height of the output volume.
 19 |     n_w : int
 20 |         Width of the output volume.
 21 |     n_c : int
 22 |         Number of channels of the output volume.
 23 |     n_h_prev : int
 24 |         Height of the input volume.
 25 |     n_w_prev : int
 26 |         Width of the input volume.
 27 |     n_c_prev : int
 28 |         Number of channels of the input volume.
 29 |     w : numpy.ndarray
 30 |         Weights.
 31 |     b : numpy.ndarray
 32 |         Biases.
 33 |     mode : str
 34 |         Pooling mode, either max or average.
 35 |     cache : dict
 36 |         Cache.
 37 |     """
 38 |     def __init__(self, pool_size, stride, mode):
 39 |         super().__init__()
 40 |         self.pool_size = pool_size
 41 |         self.stride = stride
 42 |         self.n_h, self.n_w, self.n_c = None, None, None
 43 |         self.n_h_prev, self.n_w_prev, self.n_c_prev = None, None, None
 44 |         self.w = None
 45 |         self.b = None
 46 |         self.mode = mode
 47 |         self.cache = {}
 48 | 
 49 |     def init(self, in_dim):
 50 |         self.n_h_prev, self.n_w_prev, self.n_c_prev = in_dim
 51 |         self.n_h = int((self.n_h_prev - self.pool_size) / self.stride + 1)
 52 |         self.n_w = int((self.n_w_prev - self.pool_size) / self.stride + 1)
 53 |         self.n_c = self.n_c_prev
 54 | 
 55 |     def forward(self, a_prev, training):
 56 |         batch_size = a_prev.shape[0]
 57 |         a = np.zeros((batch_size, self.n_h, self.n_w, self.n_c))
 58 | 
 59 |         # Pool
 60 |         for i in range(self.n_h):
 61 |             v_start = i * self.stride
 62 |             v_end = v_start + self.pool_size
 63 | 
 64 |             for j in range(self.n_w):
 65 |                 h_start = j * self.stride
 66 |                 h_end = h_start + self.pool_size
 67 | 
 68 |                 if self.mode == 'max':
 69 |                     a_prev_slice = a_prev[:, v_start:v_end, h_start:h_end, :]
 70 | 
 71 |                     if training:
 72 |                         # Cache for backward pass
 73 |                         self.cache_max_mask(a_prev_slice, (i, j))
 74 | 
 75 |                     a[:, i, j, :] = np.max(a_prev_slice, axis=(1, 2))
 76 | 
 77 |                 elif self.mode == 'average':
 78 |                     a[:, i, j, :] = np.mean(a_prev[:, v_start:v_end, h_start:h_end, :], axis=(1, 2))
 79 | 
 80 |                 else:
 81 |                     raise NotImplementedError("Invalid type of pooling")
 82 | 
 83 |         if training:
 84 |             self.cache['a_prev'] = a_prev
 85 | 
 86 |         return a
 87 | 
 88 |     def backward(self, da):
 89 |         a_prev = self.cache['a_prev']
 90 |         batch_size = a_prev.shape[0]
 91 |         da_prev = np.zeros((batch_size, self.n_h_prev, self.n_w_prev, self.n_c_prev))
 92 | 
 93 |         # 'Pool' back
 94 |         for i in range(self.n_h):
 95 |             v_start = i * self.stride
 96 |             v_end = v_start + self.pool_size
 97 | 
 98 |             for j in range(self.n_w):
 99 |                 h_start = j * self.stride
100 |                 h_end = h_start + self.pool_size
101 | 
102 |                 if self.mode == 'max':
103 |                     da_prev[:, v_start:v_end, h_start:h_end, :] += da[:, i:i+1, j:j+1, :] * self.cache[(i, j)]
104 | 
105 |                 elif self.mode == 'average':
106 |                     # Distribute the average value back
107 |                     mean_value = np.copy(da[:, i:i+1, j:j+1, :])
108 |                     mean_value[:, :, :, np.arange(mean_value.shape[-1])] /= (self.pool_size * self.pool_size)
109 |                     da_prev[:, v_start:v_end, h_start:h_end, :] += mean_value
110 | 
111 |                 else:
112 |                     raise NotImplementedError("Invalid type of pooling")
113 | 
114 |         return da_prev, None, None
115 | 
116 |     def cache_max_mask(self, x, ij):
117 |         mask = np.zeros_like(x)
118 | 
119 |         # This would be like doing idx = np.argmax(x, axis=(1,2)) if that was possible
120 |         reshaped_x = x.reshape(x.shape[0], x.shape[1] * x.shape[2], x.shape[3])
121 |         idx = np.argmax(reshaped_x, axis=1)
122 | 
123 |         ax1, ax2 = np.indices((x.shape[0], x.shape[3]))
124 |         mask.reshape(mask.shape[0], mask.shape[1] * mask.shape[2], mask.shape[3])[ax1, idx, ax2] = 1
125 |         self.cache[ij] = mask
126 | 
127 |     def update_params(self, dw, db):
128 |         pass
129 | 
130 |     def get_params(self):
131 |         pass
132 | 
133 |     def get_output_dim(self):
134 |         return self.n_h, self.n_w, self.n_c
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/src/nn.py:
--------------------------------------------------------------------------------
  1 | from functools import reduce
  2 | 
  3 | import numpy as np
  4 | 
  5 | from src.optimizer import gradient_descent
  6 | 
  7 | 
  8 | class NeuralNetwork:
  9 |     """Neural network model.
 10 | 
 11 |     Attributes
 12 |     ----------
 13 |     layers : list
 14 |         Layers used in the model.
 15 |     w_grads : dict
 16 |         Weights' gradients during backpropagation.
 17 |     b_grads : dict
 18 |         Biases' gradients during backpropagation.
 19 |     cost_function : CostFunction
 20 |         Cost function to be minimized.
 21 |     optimizer : Optimizer
 22 |         Optimizer used to update trainable parameters (weights and biases).
 23 |     l2_lambda : float
 24 |         L2 regularization parameter.
 25 |     trainable_layers: list
 26 |         Trainable layers(those that have trainable parameters) used in the model.
 27 |     """
 28 | 
 29 |     def __init__(self, input_dim, layers, cost_function, optimizer=gradient_descent, l2_lambda=0):
 30 |         self.layers = layers
 31 |         self.w_grads = {}
 32 |         self.b_grads = {}
 33 |         self.cost_function = cost_function
 34 |         self.optimizer = optimizer
 35 |         self.l2_lambda = l2_lambda
 36 | 
 37 |         # Initialize the layers in the model providing the input dimension they should expect
 38 |         self.layers[0].init(input_dim)
 39 |         for prev_layer, curr_layer in zip(self.layers, self.layers[1:]):
 40 |             curr_layer.init(prev_layer.get_output_dim())
 41 | 
 42 |         self.trainable_layers = set(layer for layer in self.layers if layer.get_params() is not None)
 43 |         self.optimizer = optimizer(self.trainable_layers)
 44 |         self.optimizer.initialize()
 45 | 
 46 |     def forward_prop(self, x, training=True):
 47 |         """
 48 |         Performs a forward propagation pass.
 49 | 
 50 |         Parameters
 51 |         ----------
 52 |         x : numpy.ndarray
 53 |             Input that is fed to the first layer.
 54 |         training : bool
 55 |             Whether the model is training.
 56 | 
 57 |         Returns
 58 |         -------
 59 |         numpy.ndarray
 60 |             Model's output, corresponding to the last layer's activations.
 61 |         """
 62 |         a = x
 63 |         for layer in self.layers:
 64 |             a = layer.forward(a, training)
 65 | 
 66 |         return a
 67 | 
 68 |     def backward_prop(self, a_last, y):
 69 |         """
 70 |         Performs a backward propagation pass.
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         a_last : numpy.ndarray
 75 |             Last layer's activations.
 76 |         y : numpy.ndarray
 77 |             Target labels.
 78 |         """
 79 |         da = self.cost_function.grad(a_last, y)
 80 |         batch_size = da.shape[0]
 81 | 
 82 |         for layer in reversed(self.layers):
 83 |             da_prev, dw, db = layer.backward(da)
 84 | 
 85 |             if layer in self.trainable_layers:
 86 |                 if self.l2_lambda != 0:
 87 |                     # Update the weights' gradients also wrt the l2 regularization cost
 88 |                     self.w_grads[layer] = dw + (self.l2_lambda / batch_size) * layer.get_params()[0]
 89 |                 else:
 90 |                     self.w_grads[layer] = dw
 91 | 
 92 |                 self.b_grads[layer] = db
 93 | 
 94 |             da = da_prev
 95 | 
 96 |     def predict(self, x):
 97 |         """
 98 |         Calculates the output of the model for the input.
 99 | 
100 |         Parameters
101 |         ----------
102 |         x : numpy.ndarray
103 |             Input.
104 | 
105 |         Returns
106 |         -------
107 |         numpy.ndarray
108 |             Prediction of the model, corresponding to the last layer's activations.
109 |         """
110 |         a_last = self.forward_prop(x, training=False)
111 |         return a_last
112 | 
113 |     def update_param(self, learning_rate, step):
114 |         """
115 |         Updates the trainable parameters of the layers in the model.
116 | 
117 |         Parameters
118 |         ----------
119 |         learning_rate : float
120 |             Update's learning rate.
121 |         step : int
122 |             How many updates have been performed from the start of the training.
123 |         """
124 |         self.optimizer.update(learning_rate, self.w_grads, self.b_grads, step)
125 | 
126 |     def compute_cost(self, a_last, y):
127 |         """
128 |         Computes the cost, given the output and the target labels.
129 | 
130 |         Parameters
131 |         ----------
132 |         a_last : numpy.ndarray
133 |             Output.
134 |         y : numpy.ndarray
135 |             Target labels.
136 | 
137 |         Returns
138 |         -------
139 |         float
140 |             The cost.
141 |         """
142 |         cost = self.cost_function.f(a_last, y)
143 |         if self.l2_lambda != 0:
144 |             batch_size = y.shape[0]
145 |             weights = [layer.get_params()[0] for layer in self.trainable_layers]
146 |             l2_cost = (self.l2_lambda / (2 * batch_size)) * reduce(lambda ws, w: ws + np.sum(np.square(w)), weights, 0)
147 |             return cost + l2_cost
148 |         else:
149 |             return cost
150 | 
151 |     def train(self, x_train, y_train, mini_batch_size, learning_rate, num_epochs, validation_data):
152 |         """
153 |         Trains the model for a given number of epochs.
154 | 
155 |         Parameters
156 |         ----------
157 |         x_train : numpy.ndarray
158 |             Training input data.
159 |         y_train : numpy.ndarray
160 |             Training target labels.
161 |         mini_batch_size : int
162 |             Size of a mini batch. Number of samples per parameters update step.
163 |         learning_rate : float
164 |             Parameters' update learning rate.
165 |         num_epochs : int
166 |             The number of epochs.
167 |         validation_data : tuple
168 |             A pair of input data and target labels to evaluate the model on.
169 |         """
170 |         x_val, y_val = validation_data
171 |         print(f"Started training [batch_size={mini_batch_size}, learning_rate={learning_rate}]")
172 |         step = 0
173 |         for e in range(num_epochs):
174 |             print("Epoch " + str(e + 1))
175 |             epoch_cost = 0
176 | 
177 |             if mini_batch_size == x_train.shape[0]:
178 |                 mini_batches = (x_train, y_train)
179 |             else:
180 |                 mini_batches = NeuralNetwork.create_mini_batches(x_train, y_train, mini_batch_size)
181 | 
182 |             num_mini_batches = len(mini_batches)
183 |             for i, mini_batch in enumerate(mini_batches, 1):
184 |                 mini_batch_x, mini_batch_y = mini_batch
185 |                 step += 1
186 |                 epoch_cost += self.train_step(mini_batch_x, mini_batch_y, learning_rate, step) / mini_batch_size
187 |                 print("\rProgress {:1.1%}".format(i / num_mini_batches), end="")
188 | 
189 |             print(f"\nCost after epoch {e+1}: {epoch_cost}")
190 | 
191 |             print("Computing accuracy on validation set...")
192 |             accuracy = np.sum(np.argmax(self.predict(x_val), axis=1) == y_val) / x_val.shape[0]
193 |             print(f"Accuracy on validation set: {accuracy}")
194 | 
195 |         print("Finished training")
196 | 
197 |     def train_step(self, x_train, y_train, learning_rate, step):
198 |         """
199 |         Performs one model training step.
200 | 
201 |         Parameters
202 |         ----------
203 |         x_train : numpy.ndarray
204 |             Training input data.
205 |         y_train : numpy.ndarray
206 |             Training target labels.
207 |         learning_rate : float
208 |             Parameters' update learning rate.
209 |         step : int
210 |             How many parameters updates have been performed from the start of the training.
211 | 
212 |         Returns
213 |         -------
214 |         float
215 |             The cost during this training step.
216 |         """
217 |         a_last = self.forward_prop(x_train, training=True)
218 |         self.backward_prop(a_last, y_train)
219 |         cost = self.compute_cost(a_last, y_train)
220 |         self.update_param(learning_rate, step)
221 |         return cost
222 | 
223 |     @staticmethod
224 |     def create_mini_batches(x, y, mini_batch_size):
225 |         """
226 |         Creates sample mini batches from input and target labels batches.
227 |         x : numpy.ndarray
228 |             Input batch.
229 |         y : numpy.ndarray
230 |             Target labels batch.
231 | 
232 |         Returns
233 |         -------
234 |         list
235 |             Mini batches pairs of input and target labels.
236 |         """
237 |         batch_size = x.shape[0]
238 |         mini_batches = []
239 | 
240 |         p = np.random.permutation(x.shape[0])
241 |         x, y = x[p, :], y[p, :]
242 |         num_complete_minibatches = batch_size // mini_batch_size
243 | 
244 |         for k in range(0, num_complete_minibatches):
245 |             mini_batches.append((
246 |                 x[k * mini_batch_size:(k + 1) * mini_batch_size, :],
247 |                 y[k * mini_batch_size:(k + 1) * mini_batch_size, :]
248 |             ))
249 | 
250 |         # Fill with remaining data, if needed
251 |         if batch_size % mini_batch_size != 0:
252 |             mini_batches.append((
253 |                 x[num_complete_minibatches * mini_batch_size:, :],
254 |                 y[num_complete_minibatches * mini_batch_size:, :]
255 |             ))
256 | 
257 |         return mini_batches
258 | 


--------------------------------------------------------------------------------
/src/optimizer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class Optimizer:
  5 |     """Optimizer.
  6 | 
  7 |     Attributes
  8 |     ----------
  9 |     trainable_layers : list
 10 |         Trainable layers(those that have weights and biases).
 11 |     """
 12 |     def __init__(self, trainable_layers):
 13 |         self.trainable_layers = trainable_layers
 14 | 
 15 |     def initialize(self):
 16 |         """
 17 |         Initializes the optimizer.
 18 |         """
 19 |         raise NotImplementedError
 20 | 
 21 |     def update(self, learning_rate, w_grads, b_grads, step):
 22 |         """
 23 |         Updates the parameters of trainable layers.
 24 | 
 25 |         Parameters
 26 |         ----------
 27 |         learning_rate : float
 28 |             Parameters' update learning rate.
 29 |         w_grads : numpy.ndarray
 30 |             Weights' gradients.
 31 |         b_grads : numpy.ndarray
 32 |             Biases' gradients.
 33 |         step : int
 34 |             How many updates have been performed by this optimizer.
 35 |         """
 36 |         raise NotImplementedError
 37 | 
 38 | 
 39 | class GradientDescent(Optimizer):
 40 |     def __init__(self, trainable_layers):
 41 |         Optimizer.__init__(self, trainable_layers)
 42 | 
 43 |     def initialize(self):
 44 |         pass
 45 | 
 46 |     def update(self, learning_rate, w_grads, b_grads, step):
 47 |         for layer in self.trainable_layers:
 48 |             layer.update_params(dw=learning_rate * w_grads[layer],
 49 |                                 db=learning_rate * b_grads[layer])
 50 | 
 51 | 
 52 | class RMSProp(Optimizer):
 53 |     def __init__(self, trainable_layers, beta=0.9, epsilon=1e-8):
 54 |         Optimizer.__init__(self, trainable_layers)
 55 |         self.s = {}
 56 |         self.beta = beta
 57 |         self.epsilon = epsilon
 58 | 
 59 |     def initialize(self):
 60 |         for layer in self.trainable_layers:
 61 |             w, b = layer.get_params()
 62 |             w_shape = w.shape
 63 |             b_shape = b.shape
 64 |             self.s[('dw', layer)] = np.zeros(w_shape)
 65 |             self.s[('db', layer)] = np.zeros(b_shape)
 66 | 
 67 |     def update(self, learning_rate, w_grads, b_grads, step):
 68 |         s_corrected = {}
 69 |         s_correction_term = 1 - np.power(self.beta, step)
 70 | 
 71 |         for layer in self.trainable_layers:
 72 |             layer_dw = ('dw', layer)
 73 |             layer_db = ('db', layer)
 74 | 
 75 |             self.s[layer_dw] = (self.beta * self.s[layer_dw] + (1 - self.beta) * np.square(w_grads[layer]))
 76 |             self.s[layer_db] = (self.beta * self.s[layer_db] + (1 - self.beta) * np.square(b_grads[layer]))
 77 | 
 78 |             s_corrected[layer_dw] = self.s[layer_dw] / s_correction_term
 79 |             s_corrected[layer_db] = self.s[layer_db] / s_correction_term
 80 | 
 81 |             dw = (learning_rate * (w_grads[layer] / (np.sqrt(s_corrected[layer_dw]) + self.epsilon)))
 82 |             db = (learning_rate * (b_grads[layer] / (np.sqrt(s_corrected[layer_db]) + self.epsilon)))
 83 | 
 84 |             layer.update_params(dw, db)
 85 | 
 86 | 
 87 | class Adam(Optimizer):
 88 |     def __init__(self, trainable_layers, beta1=0.9, beta2=0.999, epsilon=1e-8):
 89 |         Optimizer.__init__(self, trainable_layers)
 90 |         self.v = {}
 91 |         self.s = {}
 92 |         self.beta1 = beta1
 93 |         self.beta2 = beta2
 94 |         self.epsilon = epsilon
 95 | 
 96 |     def initialize(self):
 97 |         for layer in self.trainable_layers:
 98 |             w, b = layer.get_params()
 99 |             w_shape = w.shape
100 |             b_shape = b.shape
101 |             self.v[('dw', layer)] = np.zeros(w_shape)
102 |             self.v[('db', layer)] = np.zeros(b_shape)
103 |             self.s[('dw', layer)] = np.zeros(w_shape)
104 |             self.s[('db', layer)] = np.zeros(b_shape)
105 | 
106 |     def update(self, learning_rate, w_grads, b_grads, step):
107 |         v_correction_term = 1 - np.power(self.beta1, step)
108 |         s_correction_term = 1 - np.power(self.beta2, step)
109 |         s_corrected = {}
110 |         v_corrected = {}
111 | 
112 |         for layer in self.trainable_layers:
113 |             layer_dw = ('dw', layer)
114 |             layer_db = ('db', layer)
115 | 
116 |             self.v[layer_dw] = (self.beta1 * self.v[layer_dw] + (1 - self.beta1) * w_grads[layer])
117 |             self.v[layer_db] = (self.beta1 * self.v[layer_db] + (1 - self.beta1) * b_grads[layer])
118 | 
119 |             v_corrected[layer_dw] = self.v[layer_dw] / v_correction_term
120 |             v_corrected[layer_db] = self.v[layer_db] / v_correction_term
121 | 
122 |             self.s[layer_dw] = (self.beta2 * self.s[layer_dw] + (1 - self.beta2) * np.square(w_grads[layer]))
123 |             self.s[layer_db] = (self.beta2 * self.s[layer_db] + (1 - self.beta2) * np.square(b_grads[layer]))
124 | 
125 |             s_corrected[layer_dw] = self.s[layer_dw] / s_correction_term
126 |             s_corrected[layer_db] = self.s[layer_db] / s_correction_term
127 | 
128 |             dw = (learning_rate * v_corrected[layer_dw] / (np.sqrt(s_corrected[layer_dw]) + self.epsilon))
129 |             db = (learning_rate * v_corrected[layer_db] / (np.sqrt(s_corrected[layer_db]) + self.epsilon))
130 | 
131 |             layer.update_params(dw, db)
132 | 
133 | 
134 | adam = Adam
135 | rmsprop = RMSProp
136 | gradient_descent = GradientDescent
137 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lpraat/numpyCNN/368d5f2f11ecbbad638813b8adfa1527e0412461/test/__init__.py


--------------------------------------------------------------------------------
/test/test_cost.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from src.cost import sigmoid_cross_entropy
 6 | 
 7 | 
 8 | class TestCost(unittest.TestCase):
 9 |     def test_cross_entropy(self):
10 |         y = np.asarray([[1, 1, 1]])
11 |         a_last = np.array([[.8, .9, 0.4]])
12 |         self.assertAlmostEqual(sigmoid_cross_entropy.f(a_last.T, y.T), 0.41493159961539694)
13 | 


--------------------------------------------------------------------------------
/test/test_layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lpraat/numpyCNN/368d5f2f11ecbbad638813b8adfa1527e0412461/test/test_layers/__init__.py


--------------------------------------------------------------------------------
/test/test_layers/test_conv.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from src.layers.conv import Conv
 6 | 
 7 | 
 8 | class TestConv(unittest.TestCase):
 9 |     def test_conv_forward(self):
10 |         np.random.seed(1)
11 |         a_prev = np.random.randn(10, 4, 4, 3)
12 |         w = np.random.randn(2, 2, 3, 8)
13 |         b = np.random.randn(1, 1, 1, 8)
14 |         c = Conv(2, 2, 8)
15 |         c.init(a_prev.shape[1:])
16 |         c.w = w
17 |         c.b = b
18 |         c.n_w = 4
19 |         c.n_h = 4
20 |         c.pad = 2
21 |         z = c.forward(a_prev, False)
22 |         np.testing.assert_almost_equal(np.mean(z), np.array([0.0489952035289]))
23 |         np.testing.assert_almost_equal(z[3, 2, 1], np.array([-0.61490741, -6.7439236, -2.55153897,
24 |                                                              1.75698377, 3.56208902, 0.53036437,
25 |                                                              5.18531798, 8.75898442]))
26 | 
27 |     def test_conv_backward(self):
28 |         np.random.seed(1)
29 |         a_prev = np.random.randn(10, 4, 4, 3)
30 |         w = np.random.randn(2, 2, 3, 8)
31 |         b = np.random.randn(1, 1, 1, 8)
32 |         c = Conv(2, 2, 8)
33 |         c.init(a_prev.shape[1:])
34 |         c.w = w
35 |         c.b = b
36 |         c.n_w = 4
37 |         c.n_h = 4
38 |         c.pad = 2
39 |         z = c.forward(a_prev, True)
40 | 
41 |         da, dw, db = c.backward(z)
42 |         np.testing.assert_almost_equal(np.mean(da), np.array([1.45243777754]))
43 |         np.testing.assert_almost_equal(np.mean(dw), np.array([0.172699145831]))
44 |         np.testing.assert_almost_equal(np.mean(db), np.array([0.783923256462]))
45 | 


--------------------------------------------------------------------------------
/test/test_layers/test_fc.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from src.activation import relu, sigmoid
 6 | from src.layers.fc import FullyConnected
 7 | 
 8 | 
 9 | class TestLayer(unittest.TestCase):
10 | 
11 |     def test_fully_connected_forward(self):
12 |         np.random.seed(2)
13 |         layer_size = 1
14 |         previous_layer_size = 3
15 |         a_prev = np.random.randn(previous_layer_size, 2)
16 |         w = np.random.randn(layer_size, previous_layer_size)
17 |         b = np.random.randn(layer_size, 1).reshape(1, layer_size)
18 | 
19 |         fc_sigmoid = FullyConnected(3, sigmoid)
20 |         fc_relu = FullyConnected(3, relu)
21 | 
22 |         fc_sigmoid.w = w
23 |         fc_sigmoid.b = b
24 |         fc_relu.w = w
25 |         fc_relu.b = b
26 | 
27 |         np.testing.assert_array_almost_equal(fc_sigmoid.forward(a_prev.T, False), np.array([[0.96890023, 0.11013289]]).T)
28 |         np.testing.assert_array_almost_equal(fc_relu.forward(a_prev.T, False), np.array([[3.43896131, 0.]]).T)
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/test/test_layers/test_flatten.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from src.layers.flatten import Flatten
 6 | 
 7 | 
 8 | class TestFlatten(unittest.TestCase):
 9 | 
10 |     def test_flatten(self):
11 |         batch_size = 10
12 |         n_h, n_w, n_c = 32, 32, 3
13 |         a_prev = np.random.randn(batch_size, n_h, n_w, n_c)
14 |         f = Flatten()
15 |         f.init((n_h, n_w, n_c))
16 |         self.assertEqual(f.get_output_dim(), n_h * n_w * n_c)
17 |         self.assertTupleEqual(f.forward(a_prev, False).shape, (batch_size, n_h * n_w * n_c))
18 |         da, _, _ = f.backward(a_prev)
19 |         self.assertTupleEqual(da.shape, (batch_size, n_h, n_w, n_c))
20 |         np.testing.assert_array_almost_equal(a_prev, da)
21 | 


--------------------------------------------------------------------------------
/test/test_layers/test_pool.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from src.layers.pool import Pool
 6 | 
 7 | 
 8 | class TestPool(unittest.TestCase):
 9 | 
10 |     def test_pool_forward(self):
11 |         np.random.seed(1)
12 |         a_prev = np.random.randn(2, 4, 4, 3)
13 | 
14 |         p = Pool(3, 2, 'max')
15 |         p.init(a_prev.shape[1:])
16 |         a = p.forward(a_prev, False)
17 |         np.testing.assert_almost_equal(a, np.array([[[[1.74481176, 0.86540763, 1.13376944]]],
18 |                                                     [[[1.13162939, 1.51981682, 2.18557541]]]]))
19 | 
20 |         p = Pool(3, 2, 'average')
21 |         p.init(a_prev.shape[1:])
22 |         a = p.forward(a_prev, False)
23 |         np.testing.assert_almost_equal(a, np.array([[[[0.02105773, -0.20328806, -0.40389855]]],
24 |                                                     [[[-0.22154621, 0.51716526, 0.48155844]]]]))
25 | 
26 |     def test_pool_backward(self):
27 |         np.random.seed(1)
28 |         a_prev = np.random.randn(5, 5, 3, 2)
29 |         p = Pool(2, 1, 'max')
30 |         p.init(a_prev.shape[1:])
31 |         p.forward(a_prev, True)
32 |         da = np.random.randn(5, 4, 2, 2)
33 |         da_prev, _, _ = p.backward(da)
34 |         np.testing.assert_almost_equal(np.mean(da), np.array([0.145713902729]))
35 |         np.testing.assert_almost_equal(da_prev[1, 1], np.array([[0., 0.],
36 |                                                                 [5.05844394, -1.68282702],
37 |                                                                 [0., 0.]]))
38 | 
39 |         np.random.seed(1)
40 |         a_prev = np.random.randn(5, 5, 3, 2)
41 |         p = Pool(2, 1, 'average')
42 |         p.init(a_prev.shape[1:])
43 |         p.forward(a_prev, True)
44 |         da = np.random.randn(5, 4, 2, 2)
45 |         da_prev, _, _ = p.backward(da)
46 |         np.testing.assert_almost_equal(np.mean(da), np.array([0.145713902729]))
47 |         np.testing.assert_array_almost_equal(da_prev[1, 1], np.array([[0.08485462, 0.2787552],
48 |                                                                       [1.26461098, -0.25749373],
49 |                                                                       [1.17975636, -0.53624893]]))
50 | 


--------------------------------------------------------------------------------
/test/test_nn.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | import numpy as np
  4 | 
  5 | from src.activation import relu, softmax, sigmoid
  6 | from src.cost import sigmoid_cross_entropy, softmax_cross_entropy
  7 | from src.layers.conv import Conv
  8 | from src.layers.dropout import Dropout
  9 | from src.layers.fc import FullyConnected
 10 | from src.layers.flatten import Flatten
 11 | from src.layers.pool import Pool
 12 | from src.nn import NeuralNetwork
 13 | from test.utils.grad_check import grad_check
 14 | 
 15 | 
 16 | class TestNn(unittest.TestCase):
 17 | 
 18 |     def test_cnn(self):
 19 |         x = np.random.randn(2, 8, 8, 3)
 20 | 
 21 |         nn = NeuralNetwork(
 22 |             input_dim=(8, 8, 3),
 23 |             layers=[
 24 |                 Conv(2, 2, 6, activation=relu),
 25 |                 Pool(2, 2, 'max'),
 26 |                 Flatten(),
 27 |                 FullyConnected(12, relu),
 28 |                 FullyConnected(6, relu),
 29 |                 FullyConnected(3, softmax)
 30 |             ],
 31 |             cost_function=softmax_cross_entropy
 32 |         )
 33 | 
 34 |         y = np.array([[0, 1, 0], [1, 0, 0]])
 35 |         self.assertTrue(grad_check(nn, x, y) < 2e-7)
 36 | 
 37 |     def test_regularized_cnn(self):
 38 |         x = np.random.randn(2, 8, 8, 3)
 39 | 
 40 |         nn = NeuralNetwork(
 41 |             input_dim=(8, 8, 3),
 42 |             layers=[
 43 |                 Conv(2, 2, 6, activation=relu),
 44 |                 Pool(2, 2, 'max'),
 45 |                 Flatten(),
 46 |                 FullyConnected(12, relu),
 47 |                 FullyConnected(6, relu),
 48 |                 FullyConnected(2, softmax)
 49 |             ],
 50 |             cost_function=softmax_cross_entropy,
 51 |             l2_lambda=0.015
 52 |         )
 53 | 
 54 |         y = np.array([[0, 1], [1, 0]])
 55 |         self.assertTrue(grad_check(nn, x, y) < 2e-7)
 56 | 
 57 |     def test_softmax_backprop(self):
 58 |         x = np.random.randn(2, 32)
 59 | 
 60 |         nn = NeuralNetwork(
 61 |             input_dim=32,
 62 |             layers=[
 63 |                 FullyConnected(16, relu),
 64 |                 FullyConnected(8, relu),
 65 |                 FullyConnected(4, softmax)
 66 |             ],
 67 |             cost_function=softmax_cross_entropy,
 68 |         )
 69 | 
 70 |         y = np.array([[1, 0, 0, 0], [0, 0, 0, 1]])
 71 |         self.assertTrue(grad_check(nn, x, y) < 2e-7)
 72 | 
 73 |     def test_backprop(self):
 74 |         x = np.random.randn(32, 3)
 75 |         y = np.array([[1, 1, 0]])
 76 | 
 77 |         nn = NeuralNetwork(
 78 |             input_dim=32,
 79 |             layers=[
 80 |                 FullyConnected(16, relu),
 81 |                 FullyConnected(8, sigmoid),
 82 |                 FullyConnected(1, sigmoid)
 83 |             ],
 84 |             cost_function=sigmoid_cross_entropy,
 85 |         )
 86 | 
 87 |         self.assertTrue(grad_check(nn, x.T, y.T) < 2e-7)
 88 | 
 89 |     def test_regularized_backprop(self):
 90 |         x = np.random.randn(10, 3)
 91 |         y = np.array([[1, 1, 0]])
 92 | 
 93 |         nn = NeuralNetwork(
 94 |             input_dim=10,
 95 |             layers=[
 96 |                 FullyConnected(5, relu),
 97 |                 FullyConnected(3, sigmoid),
 98 |                 FullyConnected(1, sigmoid)
 99 |             ],
100 |             cost_function=sigmoid_cross_entropy,
101 |             l2_lambda=0.5
102 |         )
103 |         self.assertTrue(grad_check(nn, x.T, y.T) < 2e-7)
104 | 
105 |     def test_params_shape(self):
106 |         x_num = 3
107 |         h_num = 2
108 |         y_num = 1
109 | 
110 |         np.random.seed(1)
111 | 
112 |         nn = NeuralNetwork(
113 |             input_dim=x_num,
114 |             layers=[
115 |                 FullyConnected(h_num, relu),
116 |                 FullyConnected(y_num, sigmoid)
117 |             ],
118 |             cost_function=sigmoid_cross_entropy
119 |         )
120 | 
121 |         self.assertEqual(nn.layers[0].w.shape, (h_num, x_num))
122 |         self.assertEqual(nn.layers[0].b.shape, (1, h_num))
123 |         self.assertEqual(nn.layers[1].w.shape, (y_num, h_num))
124 |         self.assertEqual(nn.layers[1].b.shape, (1, y_num))
125 | 
126 |     def test_forward(self):
127 |         np.random.seed(6)
128 |         x = np.random.randn(5, 4)
129 |         x_num = 5
130 |         h1_num = 4
131 |         h2_num = 3
132 |         y_num = 1
133 | 
134 |         w1 = np.random.randn(h1_num, x_num)
135 |         b1 = np.random.randn(1, h1_num)
136 |         w2 = np.random.randn(h2_num, h1_num)
137 |         b2 = np.random.randn(1, h2_num)
138 |         w3 = np.random.randn(y_num, h2_num)
139 |         b3 = np.random.randn(1, y_num)
140 | 
141 |         nn = NeuralNetwork(
142 |             input_dim=x_num,
143 |             layers=[
144 |                 FullyConnected(h1_num, relu),
145 |                 FullyConnected(h2_num, relu),
146 |                 FullyConnected(y_num, sigmoid)
147 |             ],
148 |             cost_function=sigmoid_cross_entropy
149 |         )
150 | 
151 |         nn.layers[0].w = w1
152 |         nn.layers[0].b = b1
153 |         nn.layers[1].w = w2
154 |         nn.layers[1].b = b2
155 |         nn.layers[2].w = w3
156 |         nn.layers[2].b = b3
157 | 
158 |         a_last = nn.forward_prop(x.T)
159 |         np.testing.assert_array_almost_equal(a_last, np.array([[0.03921668, 0.70498921, 0.19734387, 0.04728177]]).T)
160 | 
161 |     def test_regularized_cost(self):
162 |         np.random.seed(1)
163 |         y = np.array([[1, 1, 0, 1, 0]])
164 |         w1 = np.random.randn(2, 3)
165 |         b1 = np.random.randn(2, 1).reshape(1, 2)
166 |         w2 = np.random.randn(3, 2)
167 |         b2 = np.random.randn(3, 1).reshape(1, 3)
168 |         w3 = np.random.randn(1, 3)
169 |         b3 = np.random.randn(1, 1)
170 |         a3 = np.array([[0.40682402, 0.01629284, 0.16722898, 0.10118111, 0.40682402]]).T
171 | 
172 |         nn = NeuralNetwork(
173 |             input_dim=3,
174 |             layers=[
175 |                 FullyConnected(2, relu),
176 |                 FullyConnected(3, sigmoid),
177 |                 FullyConnected(1, sigmoid)
178 |             ],
179 |             cost_function=sigmoid_cross_entropy,
180 |             l2_lambda=0.1
181 |         )
182 |         nn.layers[0].w = w1
183 |         nn.layers[0].b = b1
184 |         nn.layers[1].w = w2
185 |         nn.layers[1].b = b2
186 |         nn.layers[2].w = w3
187 |         nn.layers[2].b = b3
188 |         self.assertAlmostEqual(nn.compute_cost(a3, y.T), 1.78648594516)
189 | 
190 |     def test_dropout(self):
191 |         np.random.seed(1)
192 |         x = np.random.randn(3, 5)
193 |         w1 = np.random.randn(2, 3)
194 |         b1 = np.random.randn(2, 1).reshape(1, 2)
195 |         w2 = np.random.randn(3, 2)
196 |         b2 = np.random.randn(3, 1).reshape(1, 3)
197 |         w3 = np.random.randn(1, 3)
198 |         b3 = np.random.randn(1, 1)
199 | 
200 |         nn = NeuralNetwork(
201 |             input_dim=3,
202 |             layers=[
203 |                 FullyConnected(2, relu),
204 |                 Dropout(keep_prob=0.7),
205 |                 FullyConnected(3, relu),
206 |                 Dropout(keep_prob=0.7),
207 |                 FullyConnected(1, sigmoid)
208 |             ],
209 |             cost_function=sigmoid_cross_entropy,
210 |         )
211 |         nn.layers[0].w = w1
212 |         nn.layers[0].b = b1
213 |         nn.layers[2].w = w2
214 |         nn.layers[2].b = b2
215 |         nn.layers[4].w = w3
216 |         nn.layers[4].b = b3
217 |         np.random.seed(1)
218 |         a_last = nn.forward_prop(x.T)
219 |         np.testing.assert_array_almost_equal(a_last,
220 |                                              np.array([[0.369747, 0.496834, 0.045651, 0.014469, 0.369747]]).T)
221 | 


--------------------------------------------------------------------------------
/test/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lpraat/numpyCNN/368d5f2f11ecbbad638813b8adfa1527e0412461/test/utils/__init__.py


--------------------------------------------------------------------------------
/test/utils/grad_check.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utilities to perform Gradient Checking
 3 | """
 4 | from functools import reduce
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | def to_vector(layers, w_grads, b_grads):
10 |     v_params = np.array([])
11 |     v_grads = np.array([])
12 |     params_shapes = {}
13 | 
14 |     for layer in layers:
15 |         w, b = layer.get_params()
16 |         params_shapes[("w", layer)] = w.shape
17 |         v_params = np.append(v_params, w.reshape(-1, reduce(lambda x, y: x * y, w.shape)))
18 | 
19 |         params_shapes[("b", layer)] = b.shape
20 |         v_params = np.append(v_params, b.reshape(-1, reduce(lambda x, y: x * y, b.shape)))
21 | 
22 |         dw = w_grads[layer]
23 |         v_grads = np.append(v_grads, dw.reshape(-1, reduce(lambda x, y: x * y, dw.shape)))
24 | 
25 |         db = b_grads[layer]
26 |         v_grads = np.append(v_grads, db.reshape(-1, reduce(lambda x, y: x * y, db.shape)))
27 | 
28 |     v_params = v_params.reshape(v_params.shape[0], 1)
29 |     v_grads = v_grads.reshape(v_grads.shape[0], 1)
30 | 
31 |     return v_params, v_grads, params_shapes
32 | 
33 | 
34 | def to_dict(layers, v_params, params_shapes):
35 |     curr = 0
36 |     params = {}
37 | 
38 |     for layer in layers:
39 |         sh = params_shapes[("w", layer)]
40 |         to_take = reduce(lambda x, y: x * y, sh)
41 |         w = v_params[curr:curr+to_take].reshape(*sh)
42 |         layer.w = w
43 |         curr += to_take
44 | 
45 |         sh = params_shapes[("b", layer)]
46 |         to_take = reduce(lambda x, y: x * y, sh)
47 |         b = v_params[curr:curr+to_take].reshape(*sh)
48 |         layer.b = b
49 |         curr += to_take
50 | 
51 |     return params
52 | 
53 | 
54 | def grad_check(nn, x, y, epsilon=1e-7):
55 |     a_last = nn.forward_prop(x)
56 |     nn.backward_prop(a_last, y)
57 |     v_params, v_grads, params_shapes = to_vector(nn.trainable_layers, nn.w_grads, nn.b_grads)
58 |     n_param = v_params.shape[0]
59 |     J_plus = np.zeros((n_param, 1))
60 |     J_minus = np.zeros((n_param, 1))
61 |     grad_approx = np.zeros((n_param, 1))
62 | 
63 |     for i in range(n_param):
64 |         v_params_plus = np.copy(v_params)
65 |         v_params_plus[i][0] += epsilon
66 |         nn.params = to_dict(nn.trainable_layers, v_params_plus, params_shapes)
67 |         a_last = nn.forward_prop(x)
68 |         J_plus[i] = nn.compute_cost(a_last, y)
69 | 
70 |         v_params_minus = np.copy(v_params)
71 |         v_params_minus[i][0] -= epsilon
72 |         nn.params = to_dict(nn.trainable_layers, v_params_minus, params_shapes)
73 |         a_last = nn.forward_prop(x)
74 |         J_minus[i] = nn.compute_cost(a_last, y)
75 | 
76 |         grad_approx[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
77 | 
78 |     return np.linalg.norm(grad_approx - v_grads) / (np.linalg.norm(v_grads) + np.linalg.norm(grad_approx))
79 | 


--------------------------------------------------------------------------------