├── .gitignore ├── README.md ├── conv_tests.py ├── data └── mnist_one_hot.pkl.gz ├── mnist_example.py └── neural_network ├── __init__.py ├── network.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Numpy Atrous/Dilated and Transposed Convolutions 2 | 3 | A pure numpy-based implementation of transposed convolutions which are used for upscaling the tensors and dilated convolutions proposed in [Multi-Scale Context Aggregation by Dilated Convolutions](https://arxiv.org/abs/1511.07122) by Yu et al. The convolutions are performed by matrix multiplications by transforming the image and the filers into matrices. The loops in the transformation routines are numba-compatible and therefore can be compiled using the numba JIT. 4 | 5 | ## Requirements 6 | - Python 3 7 | - Numpy 8 | - Numba (recommended) 9 | - tqdm 10 | 11 | ## Creating a neural network 12 | 13 | The `Network` class in `network.py` represents a single neural network. The various available layers are also present in `network.py`. These include convolutional layers (with dilation), transposed convolutional layers, pooling layers, fully connected layers, various activations, cross-entropy and squared error losses and various padding/reshaping layers. These layers can be added to the network using the `network.add_layer` function, with the last layer being a loss function. To train a model, you must run the `model.forward` function, which gives you the loss, the `model.backward` function which calculates the derivatives, and the `model.adam_trainstep` function which updates the parameters using the [ADAM](https://arxiv.org/abs/1412.6980) optimizer. For inference, `model.run` can be used. 14 | 15 | ## Running the tests 16 | 17 | The tests for the convolution gradient implementation can by run by 18 | 19 | python conv_tests.py 20 | 21 | ## Running the networks on MNIST 22 | 23 | python mnist_example.py 24 | 25 | The `mnist_example.py` runs two networks on the MNIST dataset present in the repository. The first is a CNN classifier, which should achieve a test accuracy of about 0.97, and the second is a convolution-transposed convolution autoencoder over MNIST trained using a sum of squared errors. The convolution shape arithmetic should be kept in mind when designing such autoencoders. 26 | 27 | ## Authors 28 | - [Tanmaya Shekhar Dabral](https://github.com/many-facedgod) 29 | -------------------------------------------------------------------------------- /conv_tests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from tqdm import tqdm 4 | 5 | from neural_network.utils import * 6 | 7 | 8 | def findiff_grad(inp, function, eps=1e-6): 9 | """ 10 | Approximate the gradient of a function using finite differences 11 | :param inp: The input at which to evaluate the function 12 | :param function: The function 13 | :param eps: The epsilon value to use 14 | :return: The approximate gradient of the function at the input 15 | """ 16 | grad = np.zeros_like(inp) 17 | for i in range(len(inp)): 18 | inp[i] += eps 19 | fn_2 = function(inp) 20 | inp[i] -= 2 * eps 21 | fn_1 = function(inp) 22 | grad[i] = (fn_2 - fn_1) / (2 * eps) 23 | inp[i] += eps 24 | return grad 25 | 26 | 27 | def check_conv2d(): 28 | """ 29 | Run tests for the conv2d gradient implementation. 30 | """ 31 | input_shapes = [(1, 1, 28, 28), (2, 5, 27, 27), (3, 15, 31, 31)] 32 | filter_shapes = [(32, 1, 4, 4), (11, 5, 19, 19), (32, 15, 3, 3)] 33 | strides = [(1, 1), (2, 2), (3, 3)] 34 | dilations = [2, 1, 3] 35 | n_tests = len(input_shapes) 36 | tqdm.write('Running conv2d tests...') 37 | for test in tqdm(zip(input_shapes, filter_shapes, strides, dilations), total=n_tests, file=sys.stdout): 38 | inp_shape, filt_shape, stride, dilation = test 39 | image = np.random.random(inp_shape) * 2 - 1 40 | filter_ = np.random.random(filt_shape) * 2 - 1 41 | convolved = conv2d(image, filter_, dilation, stride) 42 | conv_grad = backward_tensor_sum(convolved) 43 | image_grad, filter_grad = backward_conv2d(conv_grad, image, filter_, dilation, stride) 44 | findiff_image_grad = findiff_grad(image.flatten(), lambda x: tensor_sum(conv2d(x.reshape(image.shape), 45 | filter_, dilation, stride))) 46 | findiff_filt_grad = findiff_grad(filter_.flatten(), lambda x: tensor_sum(conv2d(image, 47 | x.reshape(filter_.shape), 48 | dilation, stride))) 49 | assert np.allclose(filter_grad.flatten(), findiff_filt_grad, atol=1e-6), f'Filter grad wrong for {test}' 50 | assert np.allclose(image_grad.flatten(), findiff_image_grad, atol=1e-6), f'Input grad wrong for {test}' 51 | tqdm.write('All tests passed\n------------------------------------------------------') 52 | 53 | 54 | def check_transposed_conv2d(): 55 | """ 56 | Run tests for the transposed convolution gradient implementation. 57 | """ 58 | input_shapes = [(1, 1, 7, 7), (2, 5, 3, 3), (3, 15, 1, 1)] 59 | filter_shapes = [(1, 13, 4, 4), (5, 8, 1, 1), (15, 20, 5, 5)] 60 | strides = [(1, 1), (2, 2), (3, 3)] 61 | dilations = [2, 1, 3] 62 | n_tests = len(input_shapes) 63 | tqdm.write('Running transposed_conv2d tests...') 64 | for test in tqdm(zip(input_shapes, filter_shapes, strides, dilations), total=n_tests, file=sys.stdout): 65 | inp_shape, filt_shape, stride, dilation = test 66 | image = np.random.random(inp_shape) * 2 - 1 67 | filter_ = np.random.random(filt_shape) * 2 - 1 68 | t_convolved = transposed_conv2d(image, filter_, dilation, stride) 69 | t_convolved_grad = backward_tensor_sum(t_convolved) 70 | image_grad, filter_grad = backward_transposed_conv2d(t_convolved_grad, image, filter_, dilation, stride) 71 | findiff_image_grad = findiff_grad(image.flatten(), lambda x: tensor_sum( 72 | transposed_conv2d(x.reshape(image.shape), filter_, dilation, stride)) 73 | ) 74 | findiff_filt_grad = findiff_grad(filter_.flatten(), lambda x: tensor_sum( 75 | transposed_conv2d(image, x.reshape(filter_.shape), dilation, stride)) 76 | ) 77 | assert np.allclose(filter_grad.flatten(), findiff_filt_grad, atol=1e-6), f'Filter grad wrong for {test}' 78 | assert np.allclose(image_grad.flatten(), findiff_image_grad, atol=1e-6), f'Input grad wrong for {test}' 79 | tqdm.write('All tests passed\n------------------------------------------------------') 80 | 81 | 82 | def main(): 83 | check_conv2d() 84 | check_transposed_conv2d() 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /data/mnist_one_hot.pkl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/many-facedgod/Numpy-Atrous-Transposed-CNN/5f54521a803f0d1e938a518aed8ed610ebf57e2a/data/mnist_one_hot.pkl.gz -------------------------------------------------------------------------------- /mnist_example.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import gzip 3 | import sys 4 | 5 | from tqdm import tqdm, trange 6 | 7 | from neural_network.network import * 8 | 9 | 10 | def load_data(): 11 | """Load the MNIST data and normalize it.""" 12 | (trainx, trainy), (valx, valy), (testx, testy) = pickle.load(gzip.open("data/mnist_one_hot.pkl.gz"), 13 | encoding="latin1") 14 | trainy = np.argmax(trainy, axis=1) 15 | valy = np.argmax(valy, axis=1) 16 | testy = np.argmax(testy, axis=1) 17 | trainx = trainx * 2 - 1 18 | valx = valx * 2 - 1 19 | testx = testx * 2 - 1 20 | return (trainx.reshape(-1, 1, 28, 28), trainy), (valx.reshape(-1, 1, 28, 28), valy), (testx.reshape(-1, 1, 28, 28), 21 | testy) 22 | 23 | 24 | def train_classifier(data, n_iters=3, batch_size=100): 25 | """ 26 | Train a CNN classifier on the data 27 | :param data: The MNIST data loaded 28 | :param n_iters: The number of iterations to train for 29 | :param batch_size: The batch size to use 30 | """ 31 | tqdm.write(f'Training a dilated CNN classifier for {n_iters} iterations.') 32 | (trainx, trainy), (valx, valy), (testx, testy) = data 33 | train_size, val_size, test_size = trainx.shape[0], valx.shape[0], testx.shape[0] 34 | train_batches = (train_size - 1) // batch_size + 1 35 | val_batches = (val_size - 1) // batch_size + 1 36 | test_batches = (test_size - 1) // batch_size + 1 37 | 38 | model = Network() 39 | model.add_layer(ConvLayer(10, (3, 3), (1, 1), 2)) \ 40 | .add_layer(ReluLayer()) \ 41 | .add_layer(Pad2DLayer((2, 2))) \ 42 | .add_layer(ConvLayer(10, (3, 3), (1, 1), 2)) \ 43 | .add_layer(ReluLayer()) \ 44 | .add_layer(Pool2DLayer((2, 2))) \ 45 | .add_layer(ConvLayer(10, (3, 3), (1, 1), 2)) \ 46 | .add_layer(ReluLayer()) \ 47 | .add_layer(Pool2DLayer((2, 2))) \ 48 | .add_layer(FlattenLayer()) \ 49 | .add_layer(FCLayer(32)) \ 50 | .add_layer(ReluLayer()) \ 51 | .add_layer(FCLayer(10)) \ 52 | .add_layer(SoftmaxCELayer()) 53 | for i in range(1, n_iters + 1): 54 | train_order = np.random.permutation(train_size) 55 | bar = trange(train_batches, file=sys.stdout) 56 | for j in bar: 57 | cost = model.forward(trainx[train_order[j * batch_size: (j + 1) * batch_size]], 58 | trainy[train_order[j * batch_size: (j + 1) * batch_size]]) 59 | bar.set_description(f'Curr loss: {cost}') 60 | model.backward() 61 | model.adam_trainstep() 62 | correct = [] 63 | for j in range(val_batches): 64 | res = model.run(valx[j * batch_size:(j + 1) * batch_size]) 65 | correct.append(np.argmax(res, axis=1) == valy[j * batch_size:(j + 1) * batch_size]) 66 | tqdm.write(f'Validation accuracy: {np.mean(correct)}') 67 | tqdm.write('-------------------------------------------------------') 68 | 69 | correct = [] 70 | for i in range(test_batches): 71 | res = model.run(testx[i * batch_size:(i + 1) * batch_size]) 72 | correct.append(np.argmax(res, axis=1) == testy[i * batch_size:(i + 1) * batch_size]) 73 | tqdm.write(f'Test accuracy: {np.mean(correct)}') 74 | tqdm.write('-------------------------------------------------------') 75 | 76 | 77 | def train_autoencoder(data, n_iters=10, batch_size=100): 78 | """ 79 | Train a convolution-transposed convolution based autoencoder 80 | :param data: The loaded MNIST data 81 | :param n_iters: The number of iterations 82 | :param batch_size: The batch size to use 83 | """ 84 | tqdm.write(f'Training a fully-convolutional autoencoder for {n_iters} iterations.') 85 | (trainx, trainy), (valx, valy), (testx, testy) = data 86 | train_size, val_size, test_size = trainx.shape[0], valx.shape[0], testx.shape[0] 87 | train_batches = (train_size - 1) // batch_size + 1 88 | val_batches = (val_size - 1) // batch_size + 1 89 | test_batches = (test_size - 1) // batch_size + 1 90 | 91 | model = Network() 92 | model.add_layer(ConvLayer(10, (2, 2), (2, 2), 1)) \ 93 | .add_layer(ConvLayer(10, (2, 2), (2, 2), 1)) \ 94 | .add_layer(ConvLayer(15, (1, 1), (2, 2), 1)) \ 95 | .add_layer(TransposedConvLayer(10, (1, 1), (2, 2), 1)) \ 96 | .add_layer(TransposedConvLayer(10, (2, 2), (2, 2), 1)) \ 97 | .add_layer(TransposedConvLayer(1, (2, 2), (2, 2), 1)) \ 98 | .add_layer(SSELayer()) 99 | for i in range(1, n_iters + 1): 100 | train_order = np.random.permutation(train_size) 101 | bar = trange(train_batches, file=sys.stdout) 102 | for j in bar: 103 | cost = model.forward(trainx[train_order[j * batch_size: (j + 1) * batch_size]], 104 | trainx[train_order[j * batch_size: (j + 1) * batch_size]]) 105 | bar.set_description(f'Curr squared error: {cost}') 106 | model.backward() 107 | model.adam_trainstep() 108 | errors = [] 109 | for j in range(val_batches): 110 | errors.append(model.forward(valx[j * batch_size:(j + 1) * batch_size], 111 | valx[j * batch_size:(j + 1) * batch_size])) 112 | tqdm.write(f'Validation squared error: {np.mean(errors)}') 113 | tqdm.write('-------------------------------------------------------') 114 | 115 | errors = [] 116 | for i in range(test_batches): 117 | errors.append(model.forward(testx[i * batch_size:(i + 1) * batch_size], 118 | testx[i * batch_size:(i + 1) * batch_size])) 119 | tqdm.write(f'Test squared error: {np.mean(errors)}') 120 | tqdm.write('-------------------------------------------------------') 121 | 122 | 123 | def main(): 124 | data = load_data() 125 | train_classifier(data) 126 | train_autoencoder(data) 127 | 128 | 129 | if __name__ == "__main__": 130 | main() 131 | -------------------------------------------------------------------------------- /neural_network/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/many-facedgod/Numpy-Atrous-Transposed-CNN/5f54521a803f0d1e938a518aed8ed610ebf57e2a/neural_network/__init__.py -------------------------------------------------------------------------------- /neural_network/network.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from .utils import * 4 | 5 | 6 | class Layer: 7 | """ 8 | A superclass for all layers 9 | """ 10 | def __init__(self): 11 | self.built = False 12 | self.input_shape = self.output_shape = None 13 | self.params = [] 14 | self.grads = [] 15 | 16 | def build(self, input_shape): 17 | """Initialize the actual parameters. To be called on the first forward pass""" 18 | raise NotImplementedError 19 | 20 | def forward(self, *args): 21 | """The forward pass through the layer. Initializes the params if it's the first call. Returns the output.""" 22 | raise NotImplementedError 23 | 24 | def backward(self, top_grad): 25 | """The backward pass through the layer to calculate the gradients. Returns the gradient wrt the input.""" 26 | raise NotImplementedError 27 | 28 | 29 | class FCLayer(Layer): 30 | """ 31 | A fully connected layer. 32 | """ 33 | def __init__(self, n_units): 34 | """ 35 | :param n_units: The number of hidden units 36 | """ 37 | Layer.__init__(self) 38 | self.n_units = n_units 39 | 40 | def build(self, input_shape): 41 | self.input_shape = input_shape 42 | self.output_shape = (self.n_units,) 43 | stddev = np.sqrt(2.0 / (self.input_shape[0] + self.n_units)) 44 | self.weights = np.random.normal(0.0, stddev, size=(self.input_shape[0], self.n_units)) 45 | self.bias = np.ones((self.n_units,)) * 0.01 46 | self.params = [self.weights, self.bias] 47 | self.grads = [np.empty_like(param) for param in self.params] 48 | self.built = True 49 | 50 | def forward(self, input_): 51 | if not self.built: 52 | input_shape = input_.shape[1:] 53 | self.build(input_shape) 54 | self.input = input_ 55 | self.output = affine_transform(input_, self.weights, self.bias) 56 | return self.output 57 | 58 | def backward(self, top_grad): 59 | input_grad, weight_grad, bias_grad = backward_affine_transform(top_grad, self.input, self.weights) 60 | self.grads[0][...] = weight_grad 61 | self.grads[1][...] = bias_grad 62 | self.bottom_grad = input_grad 63 | return self.bottom_grad 64 | 65 | 66 | class ConvLayer(Layer): 67 | """A convolutional layer that performs a valid convolution on the input.""" 68 | 69 | def __init__(self, n_filters, filter_shape, stride=(1, 1), dilation=1): 70 | """ 71 | :param n_filters: The number of convolution filters 72 | :param filter_shape: The shape of each filter 73 | :param stride: The stride for convolving 74 | :param dilation: The dilation factor for the filters 75 | """ 76 | Layer.__init__(self) 77 | self.filter_shape = filter_shape 78 | self.stride = stride 79 | self.dilation = dilation 80 | self.n_filters = n_filters 81 | 82 | def build(self, input_shape): 83 | self.input_shape = input_shape 84 | fan_in = input_shape[0] * self.filter_shape[0] * self.filter_shape[1] 85 | fan_out = self.n_filters * self.filter_shape[0] * self.filter_shape[1] 86 | stddev = np.sqrt(2.0 / (fan_in + fan_out)) 87 | self.filters = np.random.normal(0.0, stddev, 88 | size=(self.n_filters, self.input_shape[0], 89 | self.filter_shape[0], self.filter_shape[1])) 90 | self.bias = np.ones((self.n_filters,)) * 0.01 91 | self.params = [self.filters, self.bias] 92 | dilated_shape = ((self.filter_shape[0] - 1) * self.dilation + 1, (self.filter_shape[1] - 1) * self.dilation + 1) 93 | self.output_shape = (self.n_filters, 94 | (input_shape[1] - dilated_shape[0]) // self.stride[0] + 1, 95 | (input_shape[2] - dilated_shape[1]) // self.stride[1] + 1) 96 | self.grads = [np.empty_like(param) for param in self.params] 97 | self.built = True 98 | 99 | def forward(self, input_): 100 | if not self.built: 101 | input_shape = input_.shape[1:] 102 | self.build(input_shape) 103 | self.input = input_ 104 | self.output = conv2d(input_, self.filters, self.dilation, self.stride) + self.bias[np.newaxis, :, np.newaxis, 105 | np.newaxis] 106 | return self.output 107 | 108 | def backward(self, top_grad): 109 | self.bottom_grad, self.grads[0][...] = backward_conv2d(top_grad, self.input, self.filters, 110 | self.dilation, self.stride) 111 | self.grads[1][...] = top_grad.sum(axis=(0, 2, 3)) 112 | return self.bottom_grad 113 | 114 | 115 | class TransposedConvLayer(Layer): 116 | """ 117 | A layer that performs a transposed convolution. The output shape will be: 118 | 119 | stride * (inp_shape - 1) + dilation * (filter_shape - 1) + 1 120 | 121 | This layer can be used to upscale a tensor. 122 | """ 123 | 124 | def __init__(self, n_filters, filter_shape, stride, dilation=1): 125 | """ 126 | :param n_filters: The number of convolution filters (channels expected in the output of this layer) 127 | :param filter_shape: The shape of each filter 128 | :param stride: The stride for forward convolving 129 | :param dilation: The dilation factor for the filters 130 | """ 131 | Layer.__init__(self) 132 | self.filter_shape = filter_shape 133 | self.stride = stride 134 | self.dilation = dilation 135 | self.n_filters = n_filters 136 | 137 | def build(self, input_shape): 138 | self.input_shape = input_shape 139 | fan_in = input_shape[0] * self.filter_shape[0] * self.filter_shape[1] 140 | fan_out = self.n_filters * self.filter_shape[0] * self.filter_shape[1] 141 | stddev = np.sqrt(2.0 / (fan_in + fan_out)) 142 | self.filters = np.random.normal(0.0, stddev, 143 | size=(self.input_shape[0], self.n_filters, 144 | self.filter_shape[0], self.filter_shape[1])) 145 | self.bias = np.ones((self.n_filters,)) * 0.01 146 | self.params = [self.filters, self.bias] 147 | dilated_shape = ((self.filter_shape[0] - 1) * self.dilation + 1, (self.filter_shape[1] - 1) * self.dilation + 1) 148 | self.output_shape = (self.n_filters, 149 | (input_shape[1] - 1) * self.stride[0] + dilated_shape[0], 150 | (input_shape[2] - 1) * self.stride[1] + dilated_shape[1]) 151 | self.grads = [np.empty_like(param) for param in self.params] 152 | self.built = True 153 | 154 | def forward(self, input_): 155 | if not self.built: 156 | input_shape = input_.shape[1:] 157 | self.build(input_shape) 158 | self.input = input_ 159 | self.output = transposed_conv2d(input_, self.filters, self.dilation, self.stride) + self.bias[np.newaxis, :, 160 | np.newaxis, 161 | np.newaxis] 162 | return self.output 163 | 164 | def backward(self, top_grad): 165 | self.bottom_grad, self.grads[0][...] = backward_transposed_conv2d(top_grad, self.input, self.filters, 166 | self.dilation, self.stride) 167 | self.grads[1][...] = top_grad.sum(axis=(0, 2, 3)) 168 | return self.bottom_grad 169 | 170 | 171 | class Pool2DLayer(Layer): 172 | """A pooling layer that picks out the maximum element.""" 173 | 174 | def __init__(self, pool_shape, stride=None, dilation=1, pool_type='max'): 175 | """ 176 | :param pool_shape: The shape for pooling. 177 | :param stride: The stride for the filter. If None, taken to be the same as the pool_shape. 178 | :param dilation: The dilation factor for the filter. 179 | """ 180 | Layer.__init__(self) 181 | self.pool_shape = pool_shape 182 | self.stride = stride if stride is not None else pool_shape 183 | self.dilation = dilation 184 | self.pool_type = pool_type 185 | self.forward_pool_fn = maxpool2d if pool_type == 'max' else meanpool2d 186 | self.backward_pool_fn = backward_maxpool2d if pool_type == 'max' else backward_meanpool2d 187 | 188 | def build(self, input_shape): 189 | self.input_shape = input_shape 190 | dilated_shape = ((self.pool_shape[0] - 1) * self.dilation + 1, (self.pool_shape[1] - 1) * self.dilation + 1) 191 | self.output_shape = (input_shape[0], 192 | (input_shape[1] - dilated_shape[0]) // self.stride[0] + 1, 193 | (input_shape[2] - dilated_shape[1]) // self.stride[1] + 1) 194 | self.built = True 195 | 196 | def forward(self, input_): 197 | if not self.built: 198 | input_shape = input_[1:] 199 | self.build(input_shape) 200 | self.input = input_ 201 | self.output, self.cache = self.forward_pool_fn(input_, self.pool_shape, self.dilation, self.stride) 202 | return self.output 203 | 204 | def backward(self, top_grad): 205 | self.bottom_grad = self.backward_pool_fn(top_grad, self.cache, self.input, self.pool_shape, 206 | self.dilation, self.stride) 207 | return self.bottom_grad 208 | 209 | 210 | class ReluLayer(Layer): 211 | """An activation layer that activates with the ReLU activation.""" 212 | 213 | def __init__(self): 214 | Layer.__init__(self) 215 | 216 | def build(self, input_shape): 217 | self.input_shape = input_shape 218 | self.output_shape = input_shape 219 | self.built = True 220 | 221 | def forward(self, input_): 222 | if not self.built: 223 | input_shape = input_.shape[1:] 224 | self.build(input_shape) 225 | self.input = input_ 226 | self.output, self.cache = relu(input_) 227 | return self.output 228 | 229 | def backward(self, top_grad): 230 | self.bottom_grad = backward_relu(top_grad, self.cache) 231 | return self.bottom_grad 232 | 233 | 234 | class SigmoidLayer(Layer): 235 | """An activation layer that activates with the sigmoid activation.""" 236 | 237 | def __init__(self): 238 | Layer.__init__(self) 239 | 240 | def build(self, input_shape): 241 | self.input_shape = input_shape 242 | self.output_shape = input_shape 243 | self.built = True 244 | 245 | def forward(self, input_): 246 | if not self.built: 247 | input_shape = input_.shape[1:] 248 | self.build(input_shape) 249 | self.input = input_ 250 | self.output = sigmoid(input_) 251 | return self.output 252 | 253 | def backward(self, top_grad): 254 | self.bottom_grad = backward_sigmoid(top_grad, self.output) 255 | return self.bottom_grad 256 | 257 | 258 | class FlattenLayer(Layer): 259 | """A layer that flattens all the dimensions except the batch.""" 260 | 261 | def __init__(self): 262 | Layer.__init__(self) 263 | 264 | def build(self, input_shape): 265 | self.input_shape = input_shape 266 | self.output_shape = (np.prod(input_shape),) 267 | self.built = True 268 | 269 | def forward(self, input_): 270 | if not self.built: 271 | input_shape = input_.shape[1:] 272 | self.build(input_shape) 273 | self.input = input_ 274 | self.output, self.cache = flatten(input_) 275 | return self.output 276 | 277 | def backward(self, top_grad): 278 | self.bottom_grad = backward_flatten(top_grad, self.cache) 279 | return self.bottom_grad 280 | 281 | 282 | class ReshapeLayer(Layer): 283 | """A layer that reshapes the tensor to a new shape (preserves the batch dimension).""" 284 | 285 | def __init__(self, new_shape): 286 | """ 287 | :param new_shape: The new shape to reshape to. 288 | """ 289 | Layer.__init__(self) 290 | self.new_shape = new_shape 291 | 292 | def build(self, input_shape): 293 | self.input_shape = input_shape 294 | self.output_shape = self.new_shape 295 | assert np.prod(self.new_shape) == np.prod(self.input_shape), (f'Input shape {input_shape} not compatible with ' 296 | f'the given shape {self.new_shape}') 297 | self.built = True 298 | 299 | def forward(self, input_): 300 | if not self.built: 301 | input_shape = input_.shape[1:] 302 | self.build(input_shape) 303 | self.input = input_ 304 | self.output, self.cache = reshape(input_, self.new_shape) 305 | return self.output 306 | 307 | def backward(self, top_grad): 308 | self.bottom_grad = backward_reshape(top_grad, self.cache) 309 | return self.bottom_grad 310 | 311 | 312 | class Pad2DLayer(Layer): 313 | """Pads a 2D image with zeros.""" 314 | 315 | def __init__(self, pad_shape): 316 | """ 317 | :param pad_shape: A tuple representing the height and the width padding 318 | """ 319 | Layer.__init__(self) 320 | self.pad_shape = pad_shape 321 | 322 | def build(self, input_shape): 323 | self.input_shape = input_shape 324 | self.output_shape = (self.input_shape[0], self.input_shape[1] + 2 * self.pad_shape[0], 325 | self.input_shape[1] + 2 * self.pad_shape[1]) 326 | self.built = True 327 | 328 | def forward(self, input_): 329 | if not self.built: 330 | input_shape = input_.shape[1:] 331 | self.build(input_shape) 332 | self.input = input_ 333 | self.output = pad2D(input_, self.pad_shape) 334 | return self.output 335 | 336 | def backward(self, top_grad): 337 | self.bottom_grad = backward_pad2D(top_grad, self.pad_shape) 338 | return self.bottom_grad 339 | 340 | 341 | class SoftmaxCELayer(Layer): 342 | """Calculates the softmax-crossentropy loss of the given input logits wrt some truth value.""" 343 | 344 | def __init__(self): 345 | Layer.__init__(self) 346 | 347 | def build(self, input_shape): 348 | self.input_shape = input_shape 349 | self.output_shape = () 350 | self.built = True 351 | 352 | def forward(self, input_, truth): 353 | """ 354 | :param input_: The logits 355 | :param truth: The indices of the correct classification 356 | :return: The calculated loss 357 | """ 358 | if not self.built: 359 | input_shape = input_.shape[1:] 360 | self.build(input_shape) 361 | self.input = input_ 362 | self.truth = truth 363 | self.output, self.cache = softmax_crossentropy(input_, self.truth) 364 | return self.output 365 | 366 | def backward(self, top_grad=1.0): 367 | self.bottom_grad = backward_softmax_crossentropy(top_grad, self.cache, self.truth) 368 | return self.bottom_grad 369 | 370 | 371 | class SSELayer(Layer): 372 | """Calculates the sum of squared error between the input and the truth value. """ 373 | 374 | def __init__(self): 375 | Layer.__init__(self) 376 | 377 | def build(self, input_shape): 378 | self.input_shape = input_shape 379 | self.output_shape = () 380 | self.built = True 381 | 382 | def forward(self, input_, truth): 383 | """ 384 | :param input_: The logits 385 | :param truth: The indices of the correct classification 386 | :return: The calculated loss 387 | """ 388 | if not self.built: 389 | input_shape = input_.shape[1:] 390 | self.build(input_shape) 391 | self.input = input_ 392 | self.truth = truth 393 | self.output = sse(input_, self.truth) 394 | return self.output 395 | 396 | def backward(self, top_grad=1.0): 397 | self.bottom_grad = backward_sse(top_grad, self.input, self.truth) 398 | return self.bottom_grad 399 | 400 | 401 | class Network: 402 | """A sequential neural network""" 403 | 404 | def __init__(self): 405 | self.layers = [] 406 | self.params = [] 407 | self.grads = [] 408 | self.optimizer_built = False 409 | 410 | def add_layer(self, layer): 411 | """ 412 | Add a layer to this network. The last layer should be a loss layer. 413 | :param layer: The Layer object 414 | :return: self 415 | """ 416 | self.layers.append(layer) 417 | return self 418 | 419 | def forward(self, input_, truth): 420 | """ 421 | Run the entire network, and return the loss. 422 | :param input_: The input to the network 423 | :param truth: The ground truth labels to be passed to the last layer 424 | :return: The calculated loss. 425 | """ 426 | input_ = self.run(input_) 427 | return self.layers[-1].forward(input_, truth) 428 | 429 | def run(self, input_, k=-1): 430 | """ 431 | Run the network for k layers. 432 | :param k: If positive, run for the first k layers, if negative, ignore the last -k layers. Cannot be 0. 433 | :param input_: The input to the network 434 | :return: The output of the second last layer 435 | """ 436 | k = len(self.layers) if not k else k 437 | for layer in self.layers[:min(len(self.layers) - 1, k)]: 438 | input_ = layer.forward(input_) 439 | return input_ 440 | 441 | def backward(self): 442 | """ 443 | Run the backward pass and accumulate the gradients. 444 | """ 445 | top_grad = 1.0 446 | for layer in self.layers[::-1]: 447 | top_grad = layer.backward(top_grad) 448 | 449 | def adam_trainstep(self, alpha=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, l2=0.): 450 | """ 451 | Run the update step after calculating the gradients 452 | :param alpha: The learning rate 453 | :param beta_1: The exponential average weight for the first moment 454 | :param beta_2: The exponential average weight for the second moment 455 | :param epsilon: The smoothing constant 456 | :param l2: The l2 decay constant 457 | """ 458 | if not self.optimizer_built: 459 | self.params.extend(itertools.chain(*[layer.params for layer in self.layers])) 460 | self.grads.extend(itertools.chain(*[layer.grads for layer in self.layers])) 461 | self.first_moments = [np.zeros_like(param) for param in self.params] 462 | self.second_moments = [np.zeros_like(param) for param in self.params] 463 | self.time_step = 1 464 | self.optimizer_built = True 465 | for param, grad, first_moment, second_moment in zip(self.params, self.grads, 466 | self.first_moments, self.second_moments): 467 | first_moment *= beta_1 468 | first_moment += (1 - beta_1) * grad 469 | second_moment *= beta_2 470 | second_moment += (1 - beta_2) * (grad ** 2) 471 | m_hat = first_moment / (1 - beta_1 ** self.time_step) 472 | v_hat = second_moment / (1 - beta_2 ** self.time_step) 473 | param -= alpha * m_hat / (np.sqrt(v_hat) + epsilon) + l2 * param 474 | self.time_step += 1 475 | -------------------------------------------------------------------------------- /neural_network/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from warnings import warn 3 | 4 | 5 | def _im_to_rows(x, filter_shape, dilation, stride, dilated_shape, res_shape): 6 | """ 7 | Converts the 4D image to a form such that convolution can be performed via matrix multiplication 8 | :param x: The image of the dimensions (batch, channels, height, width) 9 | :param filter_shape: The shape of the filter (num_filters, depth, height, width) 10 | :param dilation: The dilation for the filter 11 | :param stride: The stride for the filter 12 | :param dilated_shape: The dilated shape of the filter 13 | :param res_shape: The shape of the expected result 14 | :return: The transformed image 15 | """ 16 | dilated_rows, dilated_cols = dilated_shape 17 | num_rows, num_cols = res_shape 18 | res = np.zeros((x.shape[0], num_rows * num_cols, filter_shape[1], filter_shape[2], filter_shape[3]), dtype=x.dtype) 19 | for i in range(num_rows): 20 | for j in range(num_cols): 21 | res[:, i * num_cols + j, :, :, :] = x[:, :, i * stride[0]:i * stride[0] + dilated_rows:dilation, 22 | j * stride[1]:j * stride[1] + dilated_cols:dilation] 23 | return res.reshape((res.shape[0], res.shape[1], -1)) 24 | 25 | 26 | def _backward_im_to_rows(top_grad, inp_shape, filter_shape, dilation, stride, dilated_shape, res_shape): 27 | """ 28 | Gradient transformation for the im2rows operation 29 | :param top_grad: The grad from the next layer 30 | :param inp_shape: The shape of the input image 31 | :param filter_shape: The shape of the filter (num_filters, depth, height, width) 32 | :param dilation: The dilation for the filter 33 | :param stride: The stride for the filter 34 | :param dilated_shape: The dilated shape of the filter 35 | :param res_shape: The shape of the expected result 36 | :return: The reformed gradient of the shape of the image 37 | """ 38 | dilated_rows, dilated_cols = dilated_shape 39 | num_rows, num_cols = res_shape 40 | res = np.zeros(inp_shape, dtype=top_grad.dtype) 41 | top_grad = top_grad.reshape( 42 | (top_grad.shape[0], top_grad.shape[1], filter_shape[1], filter_shape[2], filter_shape[3])) 43 | for it in range(num_rows * num_cols): 44 | i = it // num_rows 45 | j = it % num_rows 46 | res[:, :, i * stride[0]:i * stride[0] + dilated_rows:dilation, 47 | j * stride[1]:j * stride[1] + dilated_cols:dilation] += top_grad[:, it, :, :, :] 48 | return res 49 | 50 | 51 | try: 52 | from numba.decorators import jit 53 | 54 | _im_to_rows = jit(_im_to_rows) 55 | _backward_im_to_rows = jit(_backward_im_to_rows) 56 | except ModuleNotFoundError: 57 | warn("Numba not found, convolutions will be slow.") 58 | 59 | 60 | def _filter_to_mat(f): 61 | """ 62 | Converts a filter to matrix form 63 | :param f: The filter (num_filters, depth, height, width) 64 | :return: The matrix form of the filter which can be multiplied 65 | """ 66 | return f.reshape(f.shape[0], -1).T 67 | 68 | 69 | def _convolved_to_im(im, res_shape): 70 | """ 71 | Reshapes the convolved matrix to the shape of the image 72 | :param im: The convolved matrix 73 | :param res_shape: The expected shape of the result 74 | :return: The reshaped image 75 | """ 76 | im = im.transpose((0, 2, 1)) 77 | return im.reshape(im.shape[0], im.shape[1], res_shape[0], res_shape[1]) 78 | 79 | 80 | def conv2d(image, filters, dilation, stride): 81 | """ 82 | Performs a 2D convolution on the image given the filters 83 | :param image: The input image (batch, channel, height, width) 84 | :param filters: The filters (num_filters, depth, height, width) 85 | :param dilation: The dilation factor for the filter 86 | :param stride: The stride for convolution 87 | :return: The convolved image 88 | """ 89 | filter_shape = filters.shape 90 | im_shape = image.shape 91 | dilated_shape = ((filter_shape[2] - 1) * dilation + 1, (filter_shape[3] - 1) * dilation + 1) 92 | res_shape = ((im_shape[2] - dilated_shape[0]) // stride[0] + 1, (im_shape[3] - dilated_shape[1]) // stride[1] + 1) 93 | imrow = _im_to_rows(image, filters.shape, dilation, stride, dilated_shape, res_shape) 94 | filtmat = _filter_to_mat(filters) 95 | res = imrow.dot(filtmat) 96 | return _convolved_to_im(res, res_shape) 97 | 98 | 99 | def backward_conv2d(top_grad, image, filters, dilation, stride): 100 | """ 101 | Given the grads from the next op, performs the backward convolution pass 102 | :param top_grad: The grad from the next op 103 | :param image: The input image to this operation 104 | :param filters: The filters for this operation 105 | :param dilation: The dilation factor for the filter 106 | :param stride: The stride for the convolution 107 | :return: A tuple representing the grads wrt the input image and the filters 108 | """ 109 | filter_shape = filters.shape 110 | im_shape = image.shape 111 | dilated_shape = ((filter_shape[2] - 1) * dilation + 1, (filter_shape[3] - 1) * dilation + 1) 112 | res_shape = ((im_shape[2] - dilated_shape[0]) // stride[0] + 1, (im_shape[3] - dilated_shape[1]) // stride[1] + 1) 113 | imrow = _im_to_rows(image, filters.shape, dilation, stride, dilated_shape, res_shape) 114 | filtmat = _filter_to_mat(filters) 115 | gradmat = top_grad.reshape((top_grad.shape[0], top_grad.shape[1], -1)).transpose((0, 2, 1)) 116 | filt_grad = np.matmul(imrow.transpose((0, 2, 1)), gradmat).sum(axis=0).T.reshape(filter_shape) 117 | inp_grad_mat = gradmat.dot(filtmat.T) 118 | inp_grad = _backward_im_to_rows(inp_grad_mat, image.shape, filters.shape, dilation, 119 | stride, dilated_shape, res_shape) 120 | return inp_grad, filt_grad 121 | 122 | 123 | def transposed_conv2d(image, filters, dilation, stride): 124 | """ 125 | Perform a transposed convolution, which can upscale the image. 126 | :param image: The input image to upscale 127 | :param filters: The filters for this operation 128 | :param dilation: The dilation factor for the filters 129 | :param stride: The stride for the *forward* convolution 130 | :return: The return upscaled image 131 | """ 132 | filter_shape = filters.shape 133 | im_shape = image.shape 134 | dilated_shape = ((filter_shape[2] - 1) * dilation + 1, (filter_shape[3] - 1) * dilation + 1) 135 | res_shape = (im_shape[2] - 1) * stride[0] + dilated_shape[0], (im_shape[3] - 1) * stride[1] + dilated_shape[1] 136 | image_mat = image.reshape((image.shape[0], image.shape[1], -1)).transpose((0, 2, 1)) 137 | filtmat = _filter_to_mat(filters) 138 | res_mat = image_mat.dot(filtmat.T) 139 | return _backward_im_to_rows(res_mat, (image.shape[0], filters.shape[1], *res_shape), filters.shape, dilation, 140 | stride, dilated_shape, im_shape[2:]) 141 | 142 | 143 | def backward_transposed_conv2d(top_grad, image, filters, dilation, stride): 144 | """ 145 | Given the grads from the next operation, performs the backward transposed convolution pass 146 | :param top_grad: The gradients with respect to the outputs of this operation 147 | :param image: The input to this operation 148 | :param filters: The filters used in this operation 149 | :param dilation: The dilation factor for the filters 150 | :param stride: The strides for the convolution 151 | :return: A tuple representing the grads wrt the input image and the filters 152 | """ 153 | filter_shape = filters.shape 154 | im_shape = image.shape 155 | filtmat = _filter_to_mat(filters) 156 | dilated_shape = ((filter_shape[2] - 1) * dilation + 1, (filter_shape[3] - 1) * dilation + 1) 157 | gradmat = _im_to_rows(top_grad, filter_shape, dilation, stride, dilated_shape, im_shape[2:]) 158 | image_mat = image.reshape((image.shape[0], image.shape[1], -1)).transpose((0, 2, 1)) 159 | filt_grad = np.matmul(gradmat.transpose((0, 2, 1)), image_mat).sum(axis=0).T.reshape(filter_shape) 160 | image_grad = gradmat.dot(filtmat) 161 | return image_grad.transpose((0, 2, 1)).reshape(image.shape), filt_grad 162 | 163 | 164 | def maxpool2d(image, pool_shape, dilation=1, stride=None): 165 | """ 166 | Performs the max-pooling operation on the image 167 | :param image: The image to be maxpooled 168 | :param pool_shape: The shape of the pool filter 169 | :param dilation: The dilation of the filter 170 | :param stride: The stride for the filter (defaults to the shape of the pool 171 | :return: The pooled image and the argmax cache used for backprop as a tuple 172 | """ 173 | if stride is None: 174 | stride = pool_shape 175 | im_shape = image.shape 176 | dilated_shape = ((pool_shape[0] - 1) * dilation + 1, (pool_shape[1] - 1) * dilation + 1) 177 | res_shape = ((im_shape[2] - dilated_shape[0]) // stride[0] + 1, (im_shape[3] - dilated_shape[1]) // stride[1] + 1) 178 | imrow = _im_to_rows(image, (1, im_shape[1]) + pool_shape, dilation, stride, dilated_shape, res_shape) 179 | imrow = imrow.reshape((imrow.shape[0], imrow.shape[1], im_shape[1], -1)) 180 | maxpooled = np.max(imrow, axis=3).transpose((0, 2, 1)) 181 | maxpooled = maxpooled.reshape((maxpooled.shape[0], maxpooled.shape[1], res_shape[0], res_shape[1])) 182 | max_indices = np.argmax(imrow, axis=3) 183 | return maxpooled, max_indices 184 | 185 | 186 | def backward_maxpool2d(top_grad, max_indices, image, pool_shape, dilation=1, stride=None): 187 | """ 188 | Performs the backward pass on the max-pool operation 189 | :param top_grad: The grad from the next op 190 | :param max_indices: The cache generated in the forward pass 191 | :param image: The original input image to this op 192 | :param pool_shape: The shape of the max-pool 193 | :param dilation: The dilation factor 194 | :param stride: The stride for the pool (defaults to the shape of the pool) 195 | :return: The gradient wrt the input image 196 | """ 197 | if stride is None: 198 | stride = pool_shape 199 | im_shape = image.shape 200 | dilated_shape = ((pool_shape[0] - 1) * dilation + 1, (pool_shape[1] - 1) * dilation + 1) 201 | res_shape = ((im_shape[2] - dilated_shape[0]) // stride[0] + 1, (im_shape[3] - dilated_shape[1]) // stride[1] + 1) 202 | gradrow = np.zeros((im_shape[0], res_shape[0] * res_shape[1], im_shape[1], pool_shape[0] * pool_shape[1]), 203 | dtype=top_grad.dtype) 204 | gradmat = top_grad.reshape((top_grad.shape[0], top_grad.shape[1], -1)).transpose((0, 2, 1)) 205 | i1, i2, i3 = np.ogrid[:image.shape[0], :res_shape[0] * res_shape[1], :im_shape[1]] 206 | gradrow[i1, i2, i3, max_indices] = gradmat 207 | inp_grad = _backward_im_to_rows(gradrow, image.shape, (1, im_shape[1]) + pool_shape, dilation, stride, 208 | dilated_shape, res_shape) 209 | return inp_grad 210 | 211 | 212 | def meanpool2d(image, pool_shape, dilation=1, stride=None): 213 | """ 214 | Performs the mean-pooling operation on the image 215 | :param image: The image to be mean pooled 216 | :param pool_shape: The shape of the pool filter 217 | :param dilation: The dilation of the filter 218 | :param stride: The stride for the filter (defaults to the shape of the pool 219 | :return: The pooled image and an empty cache to make it consistent with the max-pool API 220 | """ 221 | if stride is None: 222 | stride = pool_shape 223 | im_shape = image.shape 224 | dilated_shape = ((pool_shape[0] - 1) * dilation + 1, (pool_shape[1] - 1) * dilation + 1) 225 | res_shape = ((im_shape[2] - dilated_shape[0]) // stride[0] + 1, (im_shape[3] - dilated_shape[1]) // stride[1] + 1) 226 | imrow = _im_to_rows(image, (1, im_shape[1]) + pool_shape, dilation, stride, dilated_shape, res_shape) 227 | imrow = imrow.reshape((imrow.shape[0], imrow.shape[1], im_shape[1], -1)) 228 | meanpooled = np.mean(imrow, axis=3).transpose((0, 2, 1)) 229 | meanpooled = meanpooled.reshape((meanpooled.shape[0], meanpooled.shape[1], res_shape[0], res_shape[1])) 230 | return meanpooled, None 231 | 232 | 233 | def backward_meanpool2d(top_grad, cache, image, pool_shape, dilation=1, stride=None): 234 | """ 235 | Performs the backward pass on the mean-pool operation 236 | :param top_grad: The grad from the next op 237 | :param cache: Not used 238 | :param image: The original input image to this op 239 | :param pool_shape: The shape of the mean-pool 240 | :param dilation: The dilation factor 241 | :param stride: The stride for the pool (defaults to the shape of the pool) 242 | :return: The gradient wrt the input image 243 | """ 244 | if stride is None: 245 | stride = pool_shape 246 | im_shape = image.shape 247 | dilated_shape = ((pool_shape[0] - 1) * dilation + 1, (pool_shape[1] - 1) * dilation + 1) 248 | res_shape = ((im_shape[2] - dilated_shape[0]) // stride[0] + 1, (im_shape[3] - dilated_shape[1]) // stride[1] + 1) 249 | gradrow = np.zeros((im_shape[0], res_shape[0] * res_shape[1], im_shape[1], pool_shape[0] * pool_shape[1]), 250 | dtype=top_grad.dtype) 251 | gradmat = top_grad.reshape((top_grad.shape[0], 252 | top_grad.shape[1], -1)).transpose((0, 2, 1)) / (pool_shape[0] * pool_shape[1]) 253 | gradrow[:, :, :, :] = gradmat[:, :, :, np.newaxis] 254 | inp_grad = _backward_im_to_rows(gradrow, image.shape, (1, im_shape[1]) + pool_shape, dilation, stride, 255 | dilated_shape, res_shape) 256 | return inp_grad 257 | 258 | 259 | def affine_transform(input_, weight, bias): 260 | """ 261 | Apply an affine transformation to the input 262 | :param input_: The input 263 | :param weight: The weight to be used 264 | :param bias: The bias to be used 265 | :return: The transformed input 266 | """ 267 | return input_.dot(weight) + bias 268 | 269 | 270 | def backward_affine_transform(top_grad, input_, weight): 271 | """ 272 | Perform a backward pass on the affine transformation 273 | :param top_grad: The gradient from the next op 274 | :param input_: The input used in the forward pass 275 | :param weight: The weight used in the forward pass 276 | :return: The gradients for the input, the weight and the bias 277 | """ 278 | bias_grad = top_grad.sum(axis=0) 279 | weight_grad = input_.T.dot(top_grad) 280 | input_grad = top_grad.dot(weight.T) 281 | return input_grad, weight_grad, bias_grad 282 | 283 | 284 | def pad2D(image, pad_shape): 285 | """ 286 | Pads an image with 2D padding of zeros 287 | :param image: The image to be padded (batch, channel, height, width) 288 | :param pad_shape: The shape of the symmetric pad (height_pad, width_pad) 289 | :return: The padded tensor 290 | """ 291 | return np.pad(image, ((0, 0), (0, 0), (pad_shape[0], pad_shape[0]), (pad_shape[1], pad_shape[1])), mode='constant') 292 | 293 | 294 | def backward_pad2D(top_grad, pad_shape): 295 | """ 296 | Performs the backward pass on the pad operation 297 | :param top_grad: Gradient from the next operation 298 | :param pad_shape: The pad shape for this op 299 | :return: The transformed gradient 300 | """ 301 | return top_grad[:, :, pad_shape[0]:-pad_shape[0], pad_shape[1]:-pad_shape[1]] 302 | 303 | 304 | def relu(x): 305 | """ 306 | Performs the ReLU operation on the input tensor 307 | :param x: The input tensor 308 | :return: The ReLU'd tensor and a cache used for backprop 309 | """ 310 | cache = x > 0 311 | return x * cache, cache 312 | 313 | 314 | def backward_relu(top_grad, cache): 315 | """ 316 | Performs the backward pass on the relu operator 317 | :param top_grad: The gradient from the next operator 318 | :param cache: The cache from the forward pass 319 | :return: The gradient wrt the input 320 | """ 321 | return top_grad * cache 322 | 323 | 324 | def sigmoid(x): 325 | """ 326 | Performs the element-wise sigmoid function 327 | :param x: The input tensor 328 | :return: The sigmoided tensor 329 | """ 330 | return 1.0 / (1 + np.exp(-x)) 331 | 332 | 333 | def backward_sigmoid(top_grad, inp_sigmoid): 334 | """ 335 | Performs the backward pass on the sigmoid operation 336 | :param top_grad: The grad from the next operation 337 | :param inp_sigmoid: The output of the forward pass 338 | :return: The gradient wrt the input of this op 339 | """ 340 | return top_grad * inp_sigmoid * (1 - inp_sigmoid) 341 | 342 | 343 | def swish(x): 344 | """ 345 | Performs the element-wise swish operation 346 | :param x: The input tensor 347 | :return: The swished tensor and the sigmoid values 348 | """ 349 | sigmoid_ = sigmoid(x) 350 | return x * sigmoid_, sigmoid_ 351 | 352 | 353 | def backward_swish(top_grad, output, sigmoid_): 354 | """ 355 | Performs the backward pass on the swish operation 356 | :param top_grad: The gradient from the next operation 357 | :param output: The output of this operation 358 | :param sigmoid_: The cache from the forward pass 359 | :return: The gradient wrt the inputs of this operation 360 | """ 361 | return top_grad * (sigmoid_ + output * (1 - sigmoid_)) 362 | 363 | 364 | def softmax(x): 365 | """ 366 | Performs the softmax operation on a 2D tensor 367 | :param x: The 2D tensor (batch, features) 368 | :return: The softmaxed tensor 369 | """ 370 | temp = np.exp(x - x.max(axis=1, keepdims=True)) 371 | res = temp / temp.sum(axis=1, keepdims=True) 372 | return res 373 | 374 | 375 | def backward_softmax(top_grad, inp_softmax): 376 | """ 377 | Performs the backward pass on the softmax operation 378 | :param top_grad: The gradient from the next operation 379 | :param inp_softmax: The output of this op 380 | :return: The gradient wrt the input 381 | """ 382 | left = inp_softmax[:, :, np.newaxis] 383 | right = inp_softmax[:, np.newaxis, :] 384 | sub = left * np.eye(inp_softmax.shape[1]) 385 | mul = np.matmul(left, right) 386 | res = np.matmul((sub - mul), top_grad[:, :, np.newaxis]).squeeze() 387 | return res 388 | 389 | 390 | def crossentropy(x, y): 391 | """ 392 | Generates the cross-entropy cost 393 | :param x: The input variable (batch, features) 394 | :param y: The ground truth (batch, ) (not one-hot) 395 | :return: The cost 396 | """ 397 | return np.mean(-np.log(x[np.arange(x.shape[0]), y])) 398 | 399 | 400 | def backward_crossentropy(top_grad, x, y): 401 | """ 402 | Performs the backward pass through the crossentropy function 403 | :param top_grad: The gradient from the next layer 404 | :param x: The input to this op 405 | :param y: The ground truth 406 | :return: The gradient wrt the input 407 | """ 408 | res = np.zeros(x.shape, dtype=x.dtype) 409 | res[np.arange(x.shape[0]), y] = - np.reciprocal(x[np.arange(x.shape[0]), y]) / x.shape[0] 410 | return res * top_grad 411 | 412 | 413 | def softmax_crossentropy(x, y): 414 | """ 415 | Calculates the softmax cross-entropy cost 416 | :param x: The input variable 417 | :param y: The ground truth (not one-hot) 418 | :return: The cost and a the softmaxed values 419 | """ 420 | s = softmax(x) 421 | return crossentropy(s, y), s 422 | 423 | 424 | def backward_softmax_crossentropy(top_grad, inp_softmax, y): 425 | """ 426 | Backward pass through the softmax crossentropy op 427 | :param top_grad: The gradient from the next layer. 428 | :param inp_softmax: The softmax generated in the forward pass 429 | :param y: The ground truth (not one-hot) 430 | :return: THe gradient wrt the input 431 | """ 432 | res = inp_softmax 433 | res[np.arange(res.shape[0]), y] -= 1 434 | return top_grad * res / inp_softmax.shape[0] 435 | 436 | 437 | def flatten(x): 438 | """ 439 | Flattens the tensor into a 2D one 440 | :param x: The tensor to be flattened 441 | :return: The flattened tensor and the original shape 442 | """ 443 | return x.reshape((x.shape[0], -1)), x.shape 444 | 445 | 446 | def backward_flatten(top_grad, original_shape): 447 | """ 448 | Performs a backward pass on the flatten operation 449 | :param top_grad: The gradient from the next op 450 | :param original_shape: The shape generated during the forward pass 451 | :return: The gradient wrt the inputs to this op 452 | """ 453 | return top_grad.reshape(original_shape) 454 | 455 | 456 | def reshape(x, new_shape): 457 | """ 458 | Reshape the input to the new shape (preserves the batch) 459 | :param x: The input 460 | :param new_shape: The new shape 461 | :return: The reshaped tensor 462 | """ 463 | old_shape = x.shape[1:] 464 | return x.reshape((x.shape[0], *new_shape)), old_shape 465 | 466 | 467 | def backward_reshape(top_grad, old_shape): 468 | """ 469 | Perform the backward pass on the reshape operation 470 | :param top_grad: The gradient from the next layer 471 | :param old_shape: The old shape 472 | :return: The gradient for the input 473 | """ 474 | return top_grad.reshape((top_grad.shape[0], *old_shape)) 475 | 476 | 477 | def tensor_sum(input_): 478 | """ 479 | Sum the values of the tensor 480 | :param input_: The tensor 481 | :return: The sum 482 | """ 483 | return input_.sum() 484 | 485 | 486 | def backward_tensor_sum(input_): 487 | """ 488 | The backward pass for the sum 489 | :param input_: The input used in the forward pass 490 | :return: The gradient for the input 491 | """ 492 | return np.ones_like(input_) 493 | 494 | 495 | def sse(x, y): 496 | """ 497 | Sum of squared error between two tensors. Average across the batch. 498 | :param x: The input tensor 499 | :param y: The target tensor 500 | :return: The squared error 501 | """ 502 | return ((x - y) ** 2).sum() / x.shape[0] 503 | 504 | 505 | def backward_sse(top_grad, x, y): 506 | """ 507 | Get the gradient with respect to x. 508 | :param top_grad: The gradient from the next layer 509 | :param x: The input 510 | :param y: The target 511 | :return: The grad wrt x 512 | """ 513 | return top_grad * 2 * (x - y) / x.shape[0] 514 | --------------------------------------------------------------------------------