├── .gitignore ├── LICENSE ├── README.md ├── demo_cifar.py ├── demo_mnist.py ├── setup.py └── simple_convnet ├── __init__.py ├── convnet.py ├── helpers.py └── tests └── simple_convnet_tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | # simple_convnet specific stuff 2 | data/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | bin/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # Installer logs 29 | pip-log.txt 30 | pip-delete-this-directory.txt 31 | 32 | # Unit test / coverage reports 33 | htmlcov/ 34 | .tox/ 35 | .coverage 36 | .cache 37 | nosetests.xml 38 | coverage.xml 39 | 40 | # Translations 41 | *.mo 42 | 43 | # Mr Developer 44 | .mr.developer.cfg 45 | .project 46 | .pydevproject 47 | 48 | # Rope 49 | .ropeproject 50 | 51 | # Django stuff: 52 | *.log 53 | *.pot 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Boris Babenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | SimpleConvnet 2 | ============== 3 | 4 | This is a basic implementation of a convolutional neural net. It is meant primarily for pedagogical 5 | purposes -- if you are looking for a fully featured, efficient implementation, there are a few other 6 | options I'd recommend: 7 | 8 | * [cuda-convnet](https://code.google.com/p/cuda-convnet/) 9 | * [caffe](http://caffe.berkeleyvision.org/) 10 | 11 | ### Installing 12 | To install, run: 13 | 14 | ```bash 15 | python setup.py install 16 | ``` 17 | 18 | ### Dependencies 19 | * matplotlib 1.1 20 | * numpy 1.6 21 | * scipy 0.10 22 | * scikit-image 0.9 23 | * scikit-learn 0.14 24 | * opencv 2.4 25 | 26 | ### Running unit tests 27 | To run unit tests you will need nosetests installed. You can run all unit tests with this: 28 | 29 | ``` 30 | nosetests -v 31 | ``` 32 | -------------------------------------------------------------------------------- /demo_cifar.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | from os.path import join 5 | from sklearn.metrics import confusion_matrix 6 | 7 | from simple_convnet import convnet as cn 8 | 9 | #################################################################################################### 10 | # LOAD DATA 11 | 12 | def load_batch(fname): 13 | with open('data/cifar-10-batches-py/%s'%fname, 'rb') as f: 14 | data = pickle.load(f) 15 | x = data['data'].reshape((-1,3,32,32)).astype('float32')/255 16 | x = np.rollaxis(x, 1, 4) 17 | y = np.array(data['labels']) 18 | return x, y 19 | 20 | num_train_batch = 3 # can go up to 4 if memory allows 21 | train_x = np.zeros((num_train_batch*10000,32,32,3), dtype='float32') 22 | train_y = np.zeros(num_train_batch*10000, dtype='float32') 23 | for b in xrange(num_train_batch): 24 | train_x[b*10000:(b+1)*10000,...], train_y[b*10000:(b+1)*10000] = \ 25 | load_batch('data_batch_%d' % (b+1)) 26 | mean_x = train_x[::10,...].mean(0)[np.newaxis,...] 27 | 28 | val_x, val_y = load_batch('data_batch_5') 29 | test_x, test_y = load_batch('test_batch') 30 | 31 | #################################################################################################### 32 | # SET UP PARAMETERS 33 | 34 | layer_args = [ 35 | (cn.ConvLayer, dict(num_filters=32, filter_shape=(5,5), init_from=None)), 36 | (cn.BiasLayer, dict(init_val=1)), 37 | (cn.ReluLayer, dict()), 38 | (cn.MeanPoolingLayer, dict(pool_size=2)), 39 | (cn.ConvLayer, dict(num_filters=32, filter_shape=(5,5))), 40 | (cn.BiasLayer, dict(init_val=1)), 41 | (cn.ReluLayer, dict()), 42 | (cn.MeanPoolingLayer, dict(pool_size=2)), 43 | (cn.DenseLayer, dict(num_nodes=64)), 44 | (cn.BiasLayer, dict(init_val=1)), 45 | (cn.ReluLayer, dict()), 46 | (cn.DenseLayer, dict(num_nodes=10)), 47 | (cn.BiasLayer, dict()) 48 | ] 49 | 50 | fit_args = dict( 51 | val_freq=20, 52 | batch_size=32, 53 | num_epoch=30, 54 | weight_decay=0.0005, 55 | learn_rate_decay=.00005, 56 | chill_out_iters=100, 57 | momentum=0.9, 58 | learn_rate=.01) 59 | 60 | #################################################################################################### 61 | # TRAIN AND TEST 62 | 63 | net = cn.SoftmaxNet(layer_args=layer_args, 64 | input_shape=train_x.shape[1:], 65 | rand_state=np.random.RandomState(0)) 66 | 67 | net.fit(train_x, 68 | train_y, 69 | val_x=val_x[::30,:], 70 | val_y=val_y[::30], 71 | verbose=True, 72 | **fit_args) 73 | 74 | with open(join('data', 'cifar_model.pkl'), 'wb') as f: 75 | pickle.dump((net, layer_args, fit_args), f) 76 | 77 | yp = net.predict(test_x, batch_size=128) 78 | print 'test accuracy: %f' % np.mean(yp == test_y) 79 | 80 | conf = confusion_matrix(test_y, yp) 81 | plt.matshow(conf) 82 | plt.xticks(np.arange(10)) 83 | plt.yticks(np.arange(10)) 84 | -------------------------------------------------------------------------------- /demo_mnist.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | from sklearn.metrics import confusion_matrix 5 | from os.path import join 6 | 7 | from simple_convnet import convnet as cn 8 | 9 | #################################################################################################### 10 | # LOAD DATA 11 | 12 | # Download at http://deeplearning.net/data/mnist/mnist.pkl.gz 13 | with open('data/mnist.pkl', 'rb') as f: 14 | (train_x, train_y), (val_x, val_y), (test_x, test_y) = pickle.load(f) 15 | 16 | train_x = train_x.reshape((-1,28,28,1)).astype('float32') 17 | test_x = test_x.reshape((-1,28,28,1)).astype('float32') 18 | val_x = val_x.reshape((-1,28,28,1)).astype('float32') 19 | 20 | #################################################################################################### 21 | # SET UP PARAMETERS 22 | 23 | ### simpler net with much fewer params 24 | # layer_args = [ 25 | # (cn.ConvLayer, dict(num_filters=20, filter_shape=(9,9))), 26 | # (cn.BiasLayer, dict(init_val=0.1)), 27 | # (cn.ReluLayer, dict()), 28 | # (cn.MeanPoolingLayer, dict(pool_size=2)), 29 | # (cn.DenseLayer, dict(num_nodes=10)), 30 | # (cn.BiasLayer, dict()) 31 | # ] 32 | 33 | ### closer to LeNet5 34 | layer_args = [ 35 | (cn.ConvLayer, dict(num_filters=8, filter_shape=(5,5))), 36 | (cn.BiasLayer, dict(init_val=0.1)), 37 | (cn.ReluLayer, dict()), 38 | (cn.MeanPoolingLayer, dict(pool_size=2)), 39 | (cn.ConvLayer, dict(num_filters=16, filter_shape=(5,5))), 40 | (cn.BiasLayer, dict(init_val=0.1)), 41 | (cn.ReluLayer, dict()), 42 | (cn.MeanPoolingLayer, dict(pool_size=2)), 43 | (cn.DenseLayer, dict(num_nodes=128)), 44 | (cn.BiasLayer, dict(init_val=0.1)), 45 | (cn.ReluLayer, dict()), 46 | (cn.DenseLayer, dict(num_nodes=10)), 47 | (cn.BiasLayer, dict()) 48 | ] 49 | 50 | fit_args = dict( 51 | val_freq=10, 52 | batch_size=64, 53 | num_epoch=3, 54 | weight_decay=0.0005, 55 | momentum=0.9, 56 | learn_rate=1e-1) 57 | 58 | #################################################################################################### 59 | # TRAIN AND TEST 60 | 61 | net = cn.SoftmaxNet(layer_args=layer_args, 62 | input_shape=train_x.shape[1:], 63 | rand_state=np.random.RandomState(0)) 64 | net.fit(train_x, 65 | train_y, 66 | val_x=val_x[::10,:], 67 | val_y=val_y[::10], 68 | verbose=True, 69 | **fit_args) 70 | 71 | with open(join('data', 'mnist_model_deep.pkl'), 'wb') as f: 72 | pickle.dump((net, layer_args, fit_args), f) 73 | 74 | yp = net.predict(test_x, batch_size=128) 75 | print 'test accuracy: %f' % np.mean(yp == test_y) 76 | 77 | conf = confusion_matrix(test_y, yp) 78 | plt.matshow(conf) 79 | plt.xticks(np.arange(10)) 80 | plt.yticks(np.arange(10)) 81 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | setup(name='SimpleConvnet', 6 | version='1.0', 7 | description='A basic implementation of convolutional neural nets', 8 | author='Boris', 9 | author_email='bbabenko@gmail.com', 10 | packages=['simple_convnet'], 11 | install_requires=[ 12 | 'matplotlib', 13 | 'numpy', 14 | 'scipy', 15 | 'scikit-image', 16 | 'scikit-learn', 17 | ], 18 | ) -------------------------------------------------------------------------------- /simple_convnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bbabenko/simple_convnet/015dd33be80365f704049790a547a2b3e0251f2a/simple_convnet/__init__.py -------------------------------------------------------------------------------- /simple_convnet/convnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from simple_convnet.helpers import ( 4 | filter2D, batch_filter3D, padarray, atleast, safe_exp, safe_log, choice, imshow 5 | ) 6 | from matplotlib import pyplot as plt 7 | from time import time 8 | from skimage.transform import downscale_local_mean 9 | 10 | class Layer(object): 11 | def __init__(self, input_shape, rand_state=np.random): 12 | """ 13 | Layer constructor (abstract). 14 | 15 | Parameters 16 | ---------- 17 | input_shape : tuple of ints specifying shape of a single input 18 | rand_state : a RandomState object 19 | 20 | """ 21 | self.input_shape = np.array(input_shape) 22 | self.output_shape = self.input_shape 23 | 24 | def forward(self, input_act): 25 | """ 26 | Forward propagation. This class is mostly wraps around _forward and does some extra 27 | asserts. Child classes should overwrite _forward rather than this method. 28 | 29 | Parameters 30 | ---------- 31 | input_act : numpy array, activations from the layer below; shape must either be the same as 32 | self.input_shape, or (NUMBER_OF_EXAMPLES,) + self.input_shape 33 | 34 | Returns 35 | ------- 36 | output_act : numpy array, output activations from this layer; shape will be 37 | self.output_shape or (NUMBER_OF_EXAMPLES,) + self.output_shape, depending on the input 38 | 39 | """ 40 | input_ndim = len(self.input_shape) 41 | assert input_act.shape[-input_ndim:] == tuple(self.input_shape), 'wrong input shape' 42 | many = (input_act.ndim > input_ndim) 43 | input_act = atleast(input_act, input_ndim+1) 44 | 45 | act = self._forward(input_act) 46 | 47 | assert act.shape[1:] == tuple(self.output_shape), 'wrong output shape' 48 | return act if many else act[0,...] 49 | 50 | def backward(self, grad_act, input_act): 51 | """ 52 | Backward propagation. This class is mostly wraps around _backward and does some extra 53 | asserts. Child classes should overwrite _backward rather than this method. 54 | 55 | Parameters 56 | ---------- 57 | grad_act : nump array, gradient of cost function with respect to the activations from this 58 | layer (usually calculated in the layer above and passed down during backward 59 | propagation), shape is self.output_shape or (NUMBER_OF_EXAMPLES,) + self.output_shape 60 | input_act : numpy array, activations from the layer below; shape must either be the same as 61 | self.input_shape, or (NUMBER_OF_EXAMPLES,) + self.input_shape 62 | 63 | Returns 64 | ------- 65 | grad_input_act : numpy array, gradient of cost function with respect to the input 66 | activations this layer received, which is to be passed down to the layer below; shape 67 | will be self.input_shape or (NUMBER_OF_EXAMPLES,) + self.input_shape, depending on the 68 | input 69 | grad_params : 1D numpy array of length self.num_params() (or None if self.num_params()==0), 70 | gradient of cost function with respect to the params of this layer 71 | 72 | """ 73 | input_ndim = len(self.input_shape) 74 | output_ndim = len(self.output_shape) 75 | 76 | assert grad_act.shape[-output_ndim:] == tuple(self.output_shape), 'wrong grad input shape' 77 | assert input_act.shape[-input_ndim:] == tuple(self.input_shape), 'wrong input shape' 78 | assert ((grad_act.ndim==output_ndim and input_act.ndim==input_ndim) 79 | or grad_act.shape[0] == input_act.shape[0]), 'wrong number of samples' 80 | many = (input_act.ndim > input_ndim) 81 | input_act = atleast(input_act, input_ndim+1) 82 | grad_act = atleast(grad_act, output_ndim+1) 83 | 84 | grad_input_act, grad_params = self._backward(grad_act, input_act) 85 | 86 | assert grad_input_act.shape[1:] == tuple(self.input_shape), \ 87 | 'wrong input act grad shape' 88 | if self.num_params() > 0: 89 | grad_params = grad_params.ravel() 90 | assert grad_params.size == self.num_params(), 'wrong param grad shape' 91 | 92 | return (grad_input_act if many else grad_input_act[0,...], grad_params) 93 | 94 | ################################################################################################ 95 | ### METHODS TO OVERWRITE IN CHILD CLASSES 96 | def num_params(self): 97 | """ 98 | Returns the number of parameters in this layer 99 | """ 100 | return 0 101 | 102 | def get_params(self): 103 | """ 104 | Returns a 1D numpy array, length self.num_params(), with the parameters of this layer. 105 | """ 106 | return None 107 | 108 | def set_params(self, params): 109 | """ 110 | Sets the parameters of this layer 111 | 112 | Parameters 113 | ---------- 114 | params : 1D numpy array, length self.num_params(), with the parameters of this layer 115 | 116 | """ 117 | pass 118 | 119 | def _forward(self, input_act): 120 | """ 121 | Forward propagation. 122 | 123 | Parameters 124 | ---------- 125 | input_act : numpy array, activations from the layer below; shape is 126 | (NUMBER_OF_EXAMPLES,) + self.input_shape 127 | 128 | Returns 129 | ------- 130 | output_act : numpy array, output activations from this layer; shape will be 131 | (NUMBER_OF_EXAMPLES,) + self.output_shape 132 | 133 | """ 134 | raise NotImplemented 135 | 136 | def _backward(self, grad_act, input_act): 137 | """ 138 | Backward propagation. 139 | 140 | Parameters 141 | ---------- 142 | grad_act : nump array, gradient of cost function with respect to the activations from this 143 | layer (usually calculated in the layer above and passed down during backward 144 | propagation), shape is (NUMBER_OF_EXAMPLES,) + self.output_shape 145 | input_act : numpy array, activations from the layer below; shape must either be the same as 146 | (NUMBER_OF_EXAMPLES,) + self.input_shape 147 | 148 | Returns 149 | ------- 150 | grad_input_act : numpy array, gradient of cost function with respect to the input 151 | activations this layer received, which is to be passed down to the layer below; shape 152 | will be (NUMBER_OF_EXAMPLES,) + self.input_shape 153 | grad_params : 1D numpy array of length self.num_params() (or None if self.num_params()==0), 154 | gradient of cost function with respect to the params of this layer 155 | 156 | """ 157 | # returns next grad_act (layer below), and grad_params for this layer 158 | raise NotImplemented 159 | 160 | class ConvLayer(Layer): 161 | def __init__(self, 162 | input_shape, 163 | num_filters=1, 164 | filter_shape=(3,3), 165 | init_from=None, 166 | rand_state=np.random): 167 | """ 168 | Convolutional layer. 169 | 170 | Parameters 171 | ---------- 172 | input_shape : tuple of ints specifying shape of a single input; this particular layer 173 | expects the input shape to be 3D (height x width x channels) 174 | num_filters : int, number of filters in this layer 175 | filter_shape : tuple specifying height and width of the filters (current implementation 176 | only square filters) 177 | init_from : (experimental feature) a dataset to use in initializing filters 178 | rand_state : a RandomState object 179 | 180 | """ 181 | super(ConvLayer, self).__init__(input_shape) 182 | assert filter_shape[0]%2 == 1 and filter_shape[1]%2 ==1 183 | assert filter_shape[0] == filter_shape[1], 'Only square filters currently supported' 184 | if init_from is not None: 185 | # a bit of a hack to try out... 186 | assert init_from.shape[3] == input_shape[2] 187 | assert init_from.shape[0] > 5 188 | self.filters_ = np.zeros(filter_shape + (input_shape[2], num_filters), dtype='float32') 189 | for i in xrange(num_filters): 190 | sample = init_from[choice(15, init_from.shape[0]),...].mean(0) 191 | r_start = rand_state.randint(init_from.shape[1] - filter_shape[0]) 192 | c_start = rand_state.randint(init_from.shape[2] - filter_shape[1]) 193 | self.filters_[...,i] = sample[r_start:r_start+filter_shape[0], 194 | c_start:c_start+filter_shape[1], 195 | ...]/10 196 | else: 197 | self.filters_ = rand_state.randn(*(filter_shape + (input_shape[2], num_filters))) 198 | self.filters_ /= np.sqrt(np.prod(self.filters_.shape[:-1])) 199 | self.filters_ = self.filters_.astype('float32') 200 | self.filter_shape = filter_shape 201 | self.filter_pad = (filter_shape[0]/2, filter_shape[1]/2) 202 | self.output_shape = np.array([self.input_shape[0] - filter_shape[0] + 1, 203 | self.input_shape[1] - filter_shape[1] + 1, 204 | num_filters]) 205 | 206 | def viz(self, num_row=1): 207 | """ 208 | Displays the filters in this layer (only makes sense for the first layer of a network) 209 | """ 210 | num_filters = self.filters_.shape[-1] 211 | fig = plt.figure() 212 | num_col = int(np.ceil(float(num_filters)/num_row)) 213 | 214 | for i in xrange(num_filters): 215 | ax = fig.add_subplot(num_row, num_col, i) 216 | imshow(self.filters_[...,i], ax=ax) 217 | 218 | def num_params(self): 219 | return np.prod(self.filters_.shape) 220 | 221 | def get_params(self): 222 | return self.filters_.ravel() 223 | 224 | def set_params(self, params): 225 | self.filters_ = params.reshape(self.filters_.shape) 226 | 227 | def _forward(self, input_act): 228 | fp = self.filter_pad 229 | act = batch_filter3D(input_act, self.filters_) 230 | act = act[:,fp[0]:-fp[0],fp[1]:-fp[1],:] 231 | return act 232 | 233 | def _backward(self, grad_act, input_act): 234 | # this is probably the trickiest method in this entire module... 235 | 236 | # input activation gradient -- notice that we have to flip the filters horizontally and 237 | # vertically 238 | rev_filters = np.fliplr(np.flipud(self.filters_)) 239 | 240 | # note: opencv doesn't like arbitrary slices of numpy arrays, so we need to shuffle the 241 | # dimensions around a little bit 242 | 243 | # rev_filters will now be NUM_FILTERS x NUM_CHANNELS x ... 244 | rev_filters = np.rollaxis(np.rollaxis(rev_filters, 2, 0), 3, 0).copy() 245 | padded_grad_act = padarray(grad_act, self.filter_pad) 246 | # padded_grad_act will now be NUM_FILTERS x NUM_EXAMPLES x ... 247 | padded_grad_act = np.rollaxis(padded_grad_act, 3, 0).copy() 248 | grad_input_act = np.zeros(input_act.shape, dtype='float32') 249 | for z in xrange(input_act.shape[0]): 250 | for c in xrange(input_act.shape[-1]): 251 | for f in xrange(self.filters_.shape[-1]): 252 | grad_input_act[z,:,:,c] += filter2D(padded_grad_act[f,z], rev_filters[f,c]) 253 | 254 | # grad_input_act = grad_input_act.sum(-1) 255 | 256 | # params gradient 257 | grad_params = np.zeros((input_act.shape[1:4] + (grad_act.shape[-1],)), dtype='float32') 258 | # grad_act_ will now be NUM_FILTERS x NUM_EXAMPLES x ... 259 | grad_act_ = np.rollaxis(grad_act, 3, 0).copy() 260 | # padded_grad_act will now be NUM_CHANNELS x NUM_EXAMPLES x ... 261 | input_act = np.rollaxis(input_act, 3, 0).copy() 262 | for n in xrange(input_act.shape[1]): 263 | for c in xrange(input_act.shape[0]): 264 | for f in xrange(grad_act.shape[-1]): 265 | grad_params[:,:,c,f] += filter2D(input_act[c,n], grad_act_[f,n]) 266 | grad_params /= input_act.shape[1] 267 | 268 | r_border, c_border = grad_act.shape[1]/2, grad_act.shape[2]/2 269 | if grad_act.shape[1] %2 == 0: 270 | grad_params = grad_params[r_border:-r_border+1, c_border:-c_border+1,...] 271 | else: 272 | grad_params = grad_params[r_border:-r_border, c_border:-c_border,...] 273 | assert grad_params.shape == self.filters_.shape, 'wrong param grad shape' 274 | 275 | return grad_input_act, grad_params.ravel() 276 | 277 | class MeanPoolingLayer(Layer): 278 | def __init__(self, input_shape, pool_size=2, rand_state=np.random): 279 | """ 280 | Mean pooling layer. There are no learnable parameters in this layer type. 281 | 282 | Parameters 283 | ---------- 284 | input_shape : tuple of ints specifying shape of a single input 285 | pool_size : int, size of the pooling window (stride will be the same as this size, in other 286 | words no overlap in the pooling) 287 | rand_state : a RandomState object 288 | 289 | """ 290 | super(MeanPoolingLayer, self).__init__(input_shape) 291 | self.output_shape = self.input_shape / np.array([pool_size, pool_size, 1]) 292 | self.pool_size = pool_size 293 | 294 | def _forward(self, input_act): 295 | act = downscale_local_mean(np.rollaxis(input_act, 0, 4), 296 | (self.pool_size, self.pool_size, 1, 1)) 297 | return np.rollaxis(act, 3, 0) 298 | 299 | def _backward(self, grad_act, input_act): 300 | kron_kernel = np.ones((self.pool_size,self.pool_size))[np.newaxis,...,np.newaxis] 301 | grad_input_act = np.kron(grad_act, kron_kernel)/self.pool_size/self.pool_size 302 | return grad_input_act, None 303 | 304 | class ReluLayer(Layer): 305 | """ 306 | Rectified linear unit layer. There are no learnable parameters in this layer type. 307 | """ 308 | def _forward(self, input_act): 309 | return input_act * (input_act>0) 310 | 311 | def _backward(self, grad_act, input_act): 312 | return (input_act>0).astype('float')*grad_act, None 313 | 314 | class SigmoidLayer(Layer): 315 | """ 316 | Sigmoid unit layer. There are no learnable parameters in this layer type. 317 | """ 318 | @staticmethod 319 | def _sigmoid(x): 320 | return 1.0/(1.0+np.exp(-x)) 321 | 322 | def _forward(self, input_act): 323 | return SigmoidLayer._sigmoid(input_act) 324 | 325 | def _backward(self, grad_act, input_act): 326 | out = SigmoidLayer._sigmoid(input_act) 327 | return out*(1.0-out)*grad_act, None 328 | 329 | class DenseLayer(Layer): 330 | def __init__(self, input_shape, num_nodes=1, rand_state=np.random): 331 | """ 332 | Dense/fully connected layer. 333 | 334 | Parameters 335 | ---------- 336 | input_shape : tuple of ints specifying shape of a single input 337 | num_nodes : int, number of nodes in the layer 338 | rand_state : a RandomState object 339 | 340 | """ 341 | super(DenseLayer, self).__init__(input_shape) 342 | self.output_shape = np.array([num_nodes]) 343 | self.weights_ = rand_state.randn(np.prod(self.input_shape), num_nodes).astype('float32') 344 | self.weights_ /= np.sqrt(np.prod(self.weights_.shape)) 345 | 346 | def num_params(self): 347 | return self.weights_.size 348 | 349 | def get_params(self): 350 | return self.weights_.ravel() 351 | 352 | def set_params(self, params): 353 | self.weights_ = params.reshape(self.weights_.shape) 354 | 355 | def _forward(self, input_act): 356 | input_act = input_act.reshape((-1,self.weights_.shape[0])) 357 | return np.dot(input_act, self.weights_) 358 | 359 | def _backward(self, grad_act, input_act): 360 | input_act = input_act.reshape((-1,self.weights_.shape[0])) 361 | 362 | grad_input_act = np.dot(grad_act, self.weights_.T) 363 | grad_input_act = grad_input_act.reshape((-1,) + tuple(self.input_shape)) 364 | 365 | grad_params = np.array([np.outer(act, grad) for act, grad in zip(input_act, grad_act)]) 366 | grad_params = grad_params.mean(0) 367 | 368 | return grad_input_act, grad_params 369 | 370 | class BiasLayer(Layer): 371 | def __init__(self, input_shape, init_val=0, rand_state=np.random): 372 | """ 373 | Bias layer. For an input shape of [...] x N, this layer adds N bias terms. E.g., for a 374 | convolutional layer with an output of shape WxHxC where C is the number of channels/filters, 375 | this layer will contain C bias terms, one for each filter. 376 | 377 | Parameters 378 | ---------- 379 | input_shape : tuple of ints specifying shape of a single input 380 | init_val : float, value to initialize all weights with 381 | rand_state : a RandomState object 382 | 383 | """ 384 | super(BiasLayer, self).__init__(input_shape) 385 | # assert len(input_shape) == 3 386 | self.output_shape = np.array(input_shape) 387 | self.weights_ = np.ones(input_shape[-1]) * init_val 388 | 389 | def num_params(self): 390 | return self.weights_.size 391 | 392 | def get_params(self): 393 | return self.weights_.ravel() 394 | 395 | def set_params(self, params): 396 | self.weights_ = params.reshape(self.weights_.shape) 397 | 398 | def _forward(self, input_act): 399 | return input_act + self.weights_ 400 | 401 | def _backward(self, grad_act, input_act): 402 | grad_input_act = grad_act 403 | # sum over the width and height dimensions (if any), average over all input examples 404 | grad_params = grad_act.mean(0) 405 | while grad_params.ndim > 1: 406 | grad_params = grad_params.sum(0) 407 | 408 | return grad_input_act, grad_params 409 | 410 | class NNet(object): 411 | def __init__(self, layer_args, input_shape, rand_state=np.random): 412 | """ 413 | Abstract neural net class. 414 | 415 | Parameters 416 | ---------- 417 | layer_args : list of (LayerClass, kwargs) tuples where LayerClass is a class that inherits 418 | from the Layer class, and kwargs are to be passed into the constructor of that class. 419 | layer_args[0] is the first layer, closest to the input, and layer_args[-1] is the 420 | top-most layer. The kwargs need not include the input_shape argument -- this will be 421 | determined automatically starting with the input_shape for the network (see below). 422 | input_shape : tuple of ints specifying shape of a single input to the network 423 | rand_state : a RandomState object 424 | 425 | """ 426 | # layer_args is a list of (layer_class, layer_init_args) for first through last layer 427 | self.layers_ = [] 428 | self.input_shape = input_shape 429 | for args in layer_args: 430 | layer_class, args = args 431 | args['rand_state'] = rand_state 432 | layer = layer_class(input_shape, **args) 433 | self.layers_.append(layer) 434 | # get input shape for the next layer 435 | input_shape = layer.output_shape 436 | 437 | self._rand_state = rand_state 438 | self._cache_acts = None 439 | 440 | # this will keep track of how many batches and epochs have been trained 441 | self.num_batch = 0 442 | self.num_epoch = 0 443 | 444 | def set_params(self, params): 445 | """ 446 | Set parameters to the network (i.e. all the layer parameters). 447 | 448 | Parameters 449 | ---------- 450 | params : numpy array of length self.num_params() 451 | 452 | """ 453 | ind = 0 454 | for layer in self.layers_: 455 | num_params = layer.num_params() 456 | if num_params: 457 | layer.set_params(params[ind:ind+num_params]) 458 | ind += num_params 459 | 460 | def get_params(self): 461 | """ 462 | Returns a single numpy array of length self.num_params() with all the parameters (i.e. all 463 | the layer parameters concatenated into one vector). 464 | """ 465 | return np.concatenate([layer.get_params() 466 | for layer in self.layers_ if layer.get_params() is not None]) 467 | 468 | def num_params(self): 469 | """ 470 | Returns the number of (learnable) parameters in the entire network. 471 | """ 472 | return np.sum([layer.num_params() for layer in self.layers_]) 473 | 474 | def num_nodes(self): 475 | """ 476 | Returns the number of nodes/neurons in the network. 477 | """ 478 | return (np.sum(np.prod(layer.output_shape) for layer in self.layers_) + 479 | np.prod(self.input_shape)) 480 | 481 | def cost_for_params(self, params, x, y=None): 482 | """ 483 | Calculates the cost of the network for the specified inputs and the specified network 484 | params. 485 | 486 | Parameters 487 | ---------- 488 | params : numpy array of length self.num_params() specified network parameters 489 | x : input examples 490 | y : labels of the examples 491 | 492 | Returns 493 | ------- 494 | cost : float 495 | 496 | """ 497 | curr_params = self.get_params() 498 | self.set_params(params) 499 | cost = self.cost(x, y=y) 500 | # revert params 501 | self.set_params(curr_params) 502 | return cost 503 | 504 | def cost(self, x, y=None, final_acts=None): 505 | """ 506 | Calculates the cost of the network for the specified inputs. Child classes should 507 | implement _cost rather than this method. 508 | 509 | Parameters 510 | ---------- 511 | x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape 512 | y : numpy array, training labels, shape should be (NUMBER_OF_EXAMPLES, shape of labels) 513 | final_acts : (optional) output of top-most layer in the network for the set of examples 514 | 515 | Returns 516 | ------- 517 | cost : float 518 | 519 | """ 520 | if final_acts is None: 521 | final_acts = self.forward(x)[-1] 522 | return self._cost(final_acts, y) 523 | 524 | def forward(self, x, batch_size=None): 525 | """ 526 | Forward propagation through the whole network. 527 | 528 | Parameters 529 | ---------- 530 | x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape 531 | 532 | Returns 533 | ------- 534 | acts : list that contains a numpy array for each layer in the network; the first element in 535 | the list is the array x itself, and each following array is the output of that layer for 536 | the given examples x 537 | 538 | """ 539 | acts = [x] 540 | for layer in self.layers_: 541 | act = layer.forward(acts[-1]) 542 | acts.append(act) 543 | return acts 544 | 545 | def forward_final(self, x, batch_size=None): 546 | """ 547 | Forward propagation through the whole network; returns only output of final layer. 548 | 549 | Parameters 550 | ---------- 551 | x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape 552 | batch_size : number of samples to process at a time (conserves memory) 553 | 554 | Returns 555 | ------- 556 | acts : activations of the final layer 557 | 558 | """ 559 | if batch_size is None or batch_size > x.shape[0]: 560 | batch_size = x.shape[0] 561 | 562 | ind = 0 563 | res = [] 564 | while ind < x.shape[0]: 565 | acts = x[ind:ind+batch_size,...] 566 | for layer in self.layers_: 567 | acts = layer.forward(acts) 568 | res.append(acts) 569 | ind += batch_size 570 | return np.concatenate(res) if len(res)>1 else res[0] 571 | 572 | def param_grad(self, x, y=None, acts=None): 573 | """ 574 | Calculate the gradient of the cost function with respect to all learnable parameters of this 575 | network. 576 | 577 | Parameters 578 | ---------- 579 | x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape 580 | y : numpy array, training labels, shape should be (NUMBER_OF_EXAMPLES, shape of labels) 581 | acts : (optional) list that contains a numpy array for each layer in the network; the first 582 | element in the list is the array x itself, and each following array is the output of 583 | that layer for the given examples x 584 | 585 | Returns 586 | ------- 587 | param_grad : numpy array of length self.num_params() 588 | 589 | """ 590 | if acts is None: 591 | acts = self.forward(x) 592 | 593 | curr_act_grad = self.cost_grad(final_acts=acts[-1], y=y) 594 | param_grad = [] 595 | 596 | for ind_from_end, layer in enumerate(reversed(self.layers_)): 597 | curr_act_grad, curr_param_grad = layer.backward(curr_act_grad, acts[-2-ind_from_end]) 598 | if curr_param_grad is not None: 599 | param_grad.append(curr_param_grad) 600 | 601 | param_grad.reverse() 602 | return np.concatenate(param_grad) 603 | 604 | @staticmethod 605 | def get_batch(x, y=None, batch_size=128, batch_ind=0, inds=None): 606 | """ 607 | Calculate the gradient of the cost function with respect to all learnable parameters of this 608 | network. 609 | 610 | Parameters 611 | ---------- 612 | x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape 613 | y : numpy array, training labels, shape should be (NUMBER_OF_EXAMPLES, shape of labels) 614 | batch_size : number of examples to use in each batch 615 | batch_ind : which batch to return 616 | inds : a permuation of indexes for this dataset (numpy array of length x.shape[0]) 617 | 618 | Returns 619 | ------- 620 | batch_x : subset of at most batch_size examples in x 621 | batch_y : corresponding labels for this batch 622 | 623 | """ 624 | if inds is None: 625 | inds = np.arange(x.shape[0]) 626 | batch_x = x[inds[batch_ind*batch_size:(batch_ind+1)*batch_size],...] 627 | batch_y = None 628 | if y is not None: 629 | batch_y = y[inds[batch_ind*batch_size:(batch_ind+1)*batch_size],...] 630 | 631 | return batch_x, batch_y 632 | 633 | @staticmethod 634 | def get_num_batch(num_examples, batch_size): 635 | """ 636 | Returns the number of batches for a given number of examples and given batch size. 637 | """ 638 | return int(np.ceil(num_examples/float(batch_size))) 639 | 640 | def split_per_layer(self, vec): 641 | """ Given a vector with entries for each learnable parameter in the net, this method sums 642 | up the entries for each layer and returns a vector of such sums. E.g., can be used to 643 | calculate absolute mean of weights in each layer.""" 644 | split = [] 645 | ind = 0 646 | for layer in self.layers_: 647 | split.append(vec[ind:layer.num_params()]) 648 | ind += layer.num_params() 649 | 650 | return split 651 | 652 | def fit(self, 653 | x, 654 | y=None, 655 | val_x=None, 656 | val_y=None, 657 | val_freq=10, 658 | batch_size=128, 659 | num_epoch=10, 660 | momentum=0.9, 661 | learn_rate=0.01, 662 | learn_rate_decay=0.05, 663 | chill_out_iters=10, 664 | weight_decay=.0005, 665 | verbose=False): 666 | """ 667 | Train the neural network via mini-batch gradient descent. 668 | 669 | Parameters 670 | ---------- 671 | x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape 672 | y : numpy array, training labels, shape should be (NUMBER_OF_EXAMPLES, shape of labels) 673 | val_x : validation examples, similar shape as x 674 | val_y : validation labels, similar shape as y 675 | val_freq : validation will be performed every val_freq iterations 676 | batch_size : number of examples to use in each batch 677 | num_epoch : number of epochs to train for (maximum, may terminate earlier) 678 | learn_rate : initial learning rate, will decay as learning proceeds 679 | learn_rate_decay : at each iteration i the learning rate will be 680 | learn_rate/(i*learn_rate_decay+1) 681 | chill_out_iters : if there is no improvement in validation error after this many iterations 682 | of validation, the learning rate will be cut in half and the network will go back to 683 | the set of parameters that achieved the lower cost so far 684 | weight_decay : amount of weight decay to apply 685 | verbose : whether to print debug messages during training or not 686 | 687 | """ 688 | 689 | if verbose: 690 | print '='*80 691 | print 'training net on %d samples' % x.shape[0] 692 | if val_x is not None: 693 | print 'using %d validation samples' % val_x.shape[0] 694 | print '='*80 695 | 696 | min_cost = 1e8 697 | velocity = np.zeros(self.num_params(), dtype='float32') 698 | best_params = self.get_params() 699 | stop = False 700 | no_improvement_iters = 0 701 | num_train = x.shape[0] 702 | num_batch = NNet.get_num_batch(num_train, batch_size) 703 | init_learn_rate = learn_rate 704 | 705 | start_time = time() 706 | for epoch in xrange(self.num_epoch, self.num_epoch + num_epoch): 707 | inds = self._rand_state.permutation(x.shape[0]) 708 | if stop: 709 | break 710 | for batch in xrange(num_batch): 711 | batch_x, batch_y = self.get_batch( 712 | x, y=y, batch_size=batch_size, inds=inds, batch_ind=batch) 713 | assert batch_x.shape[0] > 0 714 | param_grad = self.param_grad(batch_x, y=batch_y) 715 | params = self.get_params() 716 | learn_rate = init_learn_rate/((epoch*num_batch + batch)*learn_rate_decay+1) 717 | velocity = ( 718 | momentum*velocity - 719 | learn_rate*param_grad - 720 | learn_rate*weight_decay*params) 721 | self.set_params(params + velocity) 722 | 723 | # check validation error every once in a while 724 | if (batch%val_freq == 0 and batch>0) or batch == num_batch-1: 725 | if val_x is None: 726 | val_x, val_y = batch_x, batch_y 727 | val_acts = self.forward_final(val_x, batch_size=batch_size) 728 | cost = self.cost(val_x, val_y, final_acts=val_acts) 729 | # child classes don't necessarily have a concept of "accuracy" and might not 730 | # implement the accuracy method 731 | try: 732 | acc = self.accuracy(val_acts, val_y) 733 | except NotImplemented: 734 | acc = np.nan 735 | 736 | cost_diff = cost - min_cost 737 | 738 | # if there has been significant regression in cost, chill out 739 | if ((cost_diff > 0 and cost_diff/min_cost > 1) or 740 | no_improvement_iters>chill_out_iters): 741 | self.set_params(best_params) 742 | no_improvement_iters = 0 743 | init_learn_rate /= 2 744 | velocity = np.zeros_like(velocity, dtype='float32') 745 | print 'cost was %.3e, chilling out...' % cost 746 | cost = min_cost 747 | elif cost < min_cost: 748 | best_params = self.get_params() 749 | min_cost = cost 750 | no_improvement_iters = 0 751 | else: 752 | no_improvement_iters += 1 753 | 754 | if verbose: 755 | print 'epoch %03d, batch=%04d/%04d' % (epoch, batch+1, num_batch) 756 | print 'cost=%.3e, min_cost=%.3e, acc=%.2f' % (cost, min_cost, acc) 757 | print 'learn_rate=%.3e, velocity L1 norm %f' % (learn_rate, 758 | np.abs(velocity).sum(0)) 759 | print '-'*80 760 | 761 | self.num_epoch = epoch 762 | end_time = time() 763 | print 'training complete [%.2f min]' % ((end_time-start_time)/60) 764 | 765 | def predict(self, x, batch_size=None): 766 | """ 767 | Retruns the output of the final layer of this network. 768 | """ 769 | return self.forward_final(x, batch_size) 770 | 771 | ################################################################################################ 772 | ### METHODS TO OVERWRITE IN CHILD CLASSES 773 | def _cost(self, final_acts, y): 774 | """ 775 | Calculates the cost of the network for the specified inputs. 776 | 777 | Parameters 778 | ---------- 779 | final_acts : output of top-most layer in the network for a set of examples 780 | y : labels of the examples 781 | 782 | Returns 783 | ------- 784 | cost : float 785 | 786 | """ 787 | raise NotImplemented 788 | 789 | def accuracy(self, final_acts, y): 790 | """ 791 | Child class can optionally implement this in case there is a notion of accuracy that is 792 | separate from cost (e.g. cross entropy cost versus classifcation accuracy). 793 | 794 | Parameters 795 | ---------- 796 | final_acts : output of top-most layer in the network for a set of examples 797 | y : labels of the examples 798 | 799 | Returns 800 | ------- 801 | accuracy : float 802 | 803 | """ 804 | raise NotImplemented 805 | 806 | def cost_grad(self, final_acts, y): 807 | """ 808 | Calculates the gradient of the cost function with respect to the top-most layer activations. 809 | 810 | Parameters 811 | ---------- 812 | final_acts : output of top-most layer in the network for a set of examples 813 | y : labels of the examples 814 | 815 | Returns 816 | ------- 817 | cost_grad : numpy array, same shape as the output_shape of the top-most layer. 818 | 819 | """ 820 | raise NotImplemented 821 | 822 | class SoftmaxNet(NNet): 823 | def __init__(self, layer_args, input_shape, rand_state=np.random): 824 | """ 825 | Softmax (cross entropy) cost neural net. 826 | """ 827 | super(SoftmaxNet, self).__init__(layer_args, input_shape, rand_state=rand_state) 828 | self.num_classes = self.layers_[-1].output_shape[0] 829 | 830 | def fit(self, x, y=None, **kwargs): 831 | assert y is not None, 'Labels must be passed in' 832 | assert tuple(np.unique(y)) == tuple(range(self.num_classes)), \ 833 | 'Labels should range from 0 to C-1 where C is the number of nodes in the last layer' 834 | binary_y = self.binarize_labels(y) 835 | if 'val_y' in kwargs: 836 | kwargs['val_y'] = self.binarize_labels(kwargs['val_y']) 837 | super(SoftmaxNet, self).fit(x, binary_y, **kwargs) 838 | 839 | def binarize_labels(self, y): 840 | """ 841 | Turns discrete labels into binary vector labels. 842 | 843 | Parameters 844 | ---------- 845 | y : numpy array of N integers from 0 to C-1 846 | 847 | Returns 848 | ------- 849 | b : numpy array of shape Nx(C-1) s.t. b[i,j]=1 if y[i]==j, and b[i,k] for all k!=j 850 | """ 851 | binary_y = np.zeros((len(y), self.num_classes)) 852 | for c in xrange(self.num_classes): 853 | binary_y[y==c,c] = 1 854 | 855 | return binary_y 856 | 857 | def predict(self, x, batch_size=None): 858 | acts = self.forward_final(x, batch_size) 859 | return np.argmax(acts, axis=1) 860 | 861 | def _cost(self, final_acts, y): 862 | exp_act = safe_exp(final_acts) 863 | lse_act = safe_log(np.sum(exp_act, axis=1)) 864 | return -np.mean(np.sum((y * (final_acts - lse_act[:,np.newaxis])), axis=1)) 865 | 866 | def accuracy(self, final_acts, y): 867 | yp = np.argmax(final_acts, axis=1) 868 | if y.ndim == 2: 869 | y = np.argmax(y, axis=1) 870 | return np.mean(yp == y)*100 871 | 872 | def cost_grad(self, final_acts, y): 873 | exp_act = safe_exp(final_acts) 874 | sum_exp = np.sum(exp_act, axis=1) 875 | return exp_act/sum_exp[:,np.newaxis] - y 876 | 877 | -------------------------------------------------------------------------------- /simple_convnet/helpers.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from matplotlib import pyplot as plt 5 | from warnings import warn 6 | 7 | def filter2D(input_arr, filter): 8 | """ 9 | 2D filtering (i.e. convolution but without mirroring the filter). Mostly a convenience wrapper 10 | around OpenCV. 11 | 12 | Parameters 13 | ---------- 14 | input_arr : numpy array, HxW size 15 | filter : numpy array, H1xW1 size 16 | 17 | Returns 18 | ------- 19 | result : numpy array, HxW size 20 | 21 | """ 22 | return cv2.filter2D(input_arr, 23 | -1, 24 | filter, 25 | borderType=cv2.BORDER_CONSTANT) 26 | 27 | def batch_filter3D(input_arr, filters): 28 | """ 29 | 3D filtering (i.e. convolution but without mirroring the filter). 30 | 31 | Parameters 32 | ---------- 33 | input_arr : numpy array, NxHxWxC size where N is the number of images to be filtered 34 | filter : numpy array, H1xW1xC size 35 | 36 | Returns 37 | ------- 38 | result : numpy array, NxHxW size 39 | 40 | """ 41 | assert input_arr.shape[3] == filters.shape[2] 42 | num_input = input_arr.shape[0] 43 | output = np.zeros(input_arr.shape[:3] + (filters.shape[-1],)) 44 | for n in xrange(num_input): 45 | input1 = input_arr[n] 46 | for f in xrange(filters.shape[-1]): 47 | for c in xrange(filters.shape[-2]): 48 | output[n,:,:,f] += filter2D(input1[...,c].copy(), filters[...,c,f].copy()) 49 | return output 50 | 51 | def padarray(arr, amount): 52 | """ 53 | Pad array by some amount in height and width dimensions. 54 | 55 | Parameters 56 | ---------- 57 | arr : numpy array, HxW size 58 | amount : (int, int) tuple specifying padding amounts in height and width dimensions. Padding 59 | be added on all 4 sides. 60 | 61 | Returns 62 | ------- 63 | result : numpy array, (H+2*H_pad)x(W+2*W_pad) 64 | 65 | """ 66 | padded = np.zeros(arr.shape[0:1] + 67 | (arr.shape[1]+2*amount[0], arr.shape[2]+2*amount[1]) + 68 | arr.shape[3:]) 69 | padded[:, amount[0]:-amount[0], amount[1]:-amount[1], ...] = arr 70 | return padded 71 | 72 | def atleast(arr, ndim=4): 73 | """ 74 | Increase number of dimensions by adding singleton dimensions. 75 | 76 | Parameters 77 | ---------- 78 | arr : numpy array 79 | ndim : desired number of dimensions 80 | 81 | Returns 82 | ------- 83 | result : numpy array where result.ndim == ndim, size 1 x 1 x 1 ... arr.shape (number of leading 84 | ones will depend on ndim). 85 | 86 | """ 87 | while arr.ndim < ndim: 88 | arr = arr[np.newaxis,...] 89 | return arr 90 | 91 | def safe_exp(v): 92 | """ 93 | Safely apply np.exp. If the input is beyond a "safe" range, it will be clipped and a warning 94 | will be issued. 95 | 96 | """ 97 | v = np.array(v) 98 | if np.any(v > 500): 99 | warn('Warning: exp overflowing!', RuntimeWarning) 100 | return np.exp(np.minimum(v, 500)) 101 | 102 | def safe_log(v): 103 | """ 104 | Safely apply np.log. If the input is beyond a "safe" range, it will be clipped and a warning 105 | will be issued. 106 | 107 | """ 108 | v = np.array(v) 109 | if np.any(v < -300): 110 | warn('Warning: exp overflowing!', RuntimeWarning) 111 | return np.log(np.maximum(v, -300)) 112 | 113 | def choice(num, total): 114 | """ 115 | Returns indecies to (total) randomly selected items out of (num) without replacement. 116 | 117 | """ 118 | return np.random.permutation(total)[:num] 119 | 120 | def imshow(img, ax=None): 121 | """ 122 | Displays an image, taking care of rescaling it and taking care of gray versus color. 123 | 124 | Parameters 125 | ---------- 126 | img : numpy array, HxW or or HxWx1 or HxWx3 size 127 | ax : axes object 128 | 129 | """ 130 | if ax is None: 131 | fig = plt.figure() 132 | ax = fig.add_subplot(111) 133 | prm = dict(interpolation='none') 134 | if img.ndim==2 or img.shape[2]==1: 135 | prm['cmap']=plt.cm.gray 136 | if img.ndim > 2: 137 | img = img[:,:,0] 138 | img -= img.min() 139 | img /= img.max() 140 | ax.imshow(img, **prm) 141 | ax.get_xaxis().set_visible(False) 142 | ax.get_yaxis().set_visible(False) 143 | 144 | def imshows(imgs, num_row=1): 145 | """ 146 | Display a set of images. 147 | 148 | Parameters 149 | ---------- 150 | imgs : HxWxN or HxWx3xN image stack 151 | num_row : number of rows in the grid of images 152 | 153 | """ 154 | num_imgs = imgs.shape[-1] 155 | fig = plt.figure() 156 | num_col = int(np.ceil(float(num_imgs)/num_row)) 157 | 158 | for i in xrange(num_imgs): 159 | ax = fig.add_subplot(num_row, num_col, i) 160 | imshow(imgs[...,i], ax=ax) -------------------------------------------------------------------------------- /simple_convnet/tests/simple_convnet_tests.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from simple_convnet import convnet as cn 3 | 4 | from scipy.optimize import approx_fprime 5 | 6 | def _check_gradients(layer_args, input_shape): 7 | rand = np.random.RandomState(0) 8 | net = cn.SoftmaxNet(layer_args=layer_args, input_shape=input_shape, rand_state=rand) 9 | x = rand.randn(*(10,)+net.input_shape)/100 10 | y = rand.randn(10) > 0 11 | by = net.binarize_labels(y) 12 | 13 | g1 = approx_fprime(net.get_params(), net.cost_for_params, 1e-5, x, by) 14 | g2 = net.param_grad(x, by) 15 | err = np.max(np.abs(g1-g2))/np.abs(g1).max() 16 | print err 17 | assert err < 1e-3, 'incorrect gradient!' 18 | 19 | def test_dense_layer(): 20 | layer_args = [(cn.DenseLayer, dict(num_nodes=20)), 21 | (cn.DenseLayer, dict(num_nodes=2))] 22 | _check_gradients(layer_args, (10,)) 23 | 24 | def test_relu_layer(): 25 | layer_args = [(cn.ReluLayer, dict()), 26 | (cn.DenseLayer, dict(num_nodes=2))] 27 | _check_gradients(layer_args, (10,)) 28 | 29 | def test_sigmoid_layer(): 30 | layer_args = [(cn.SigmoidLayer, dict()), 31 | (cn.DenseLayer, dict(num_nodes=2))] 32 | _check_gradients(layer_args, (10,)) 33 | 34 | def test_conv_layer(): 35 | layer_args = [(cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))), 36 | (cn.DenseLayer, dict(num_nodes=2))] 37 | _check_gradients(layer_args, (8,8,3)) 38 | 39 | def test_convbias_layer(): 40 | layer_args = [(cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))), 41 | (cn.BiasLayer, dict()), 42 | (cn.DenseLayer, dict(num_nodes=2))] 43 | _check_gradients(layer_args, (8,8,3)) 44 | 45 | def test_pool_layer(): 46 | layer_args = [(cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))), 47 | (cn.MeanPoolingLayer, dict(pool_size=2)), 48 | (cn.DenseLayer, dict(num_nodes=2))] 49 | _check_gradients(layer_args, (8,8,3)) 50 | 51 | def test_deep(): 52 | layer_args = [(cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))), 53 | (cn.BiasLayer, dict()), 54 | (cn.ReluLayer, dict()), 55 | (cn.MeanPoolingLayer, dict(pool_size=2)), 56 | (cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))), 57 | (cn.BiasLayer, dict()), 58 | (cn.SigmoidLayer, dict()), 59 | (cn.MeanPoolingLayer, dict(pool_size=2)), 60 | (cn.DenseLayer, dict(num_nodes=10)), 61 | (cn.BiasLayer, dict()), 62 | (cn.DenseLayer, dict(num_nodes=2))] 63 | _check_gradients(layer_args, (18,18,3)) 64 | 65 | def test_fit(): 66 | layer_args = [(cn.DenseLayer, dict(num_nodes=4)), 67 | (cn.DenseLayer, dict(num_nodes=2))] 68 | net = cn.SoftmaxNet(layer_args=layer_args, input_shape=(2,)) 69 | 70 | num = 1000 71 | rand = np.random.RandomState(0) 72 | x = rand.rand(num,2) 73 | y = np.zeros(num) 74 | y[x[:,0]>0.5] = 1 75 | 76 | net.fit(x, y, batch_size=16, learn_rate=1, num_epoch=100, verbose=True) 77 | yp = net.predict(x) 78 | acc = np.mean(y==yp) 79 | assert acc > 0.7 80 | --------------------------------------------------------------------------------