├── .gitignore
├── LICENSE
├── README.md
├── demo_cifar.py
├── demo_mnist.py
├── setup.py
└── simple_convnet
    ├── __init__.py
    ├── convnet.py
    ├── helpers.py
    └── tests
        └── simple_convnet_tests.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # simple_convnet specific stuff
 2 | data/
 3 | 
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | bin/
15 | build/
16 | develop-eggs/
17 | dist/
18 | eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # Installer logs
29 | pip-log.txt
30 | pip-delete-this-directory.txt
31 | 
32 | # Unit test / coverage reports
33 | htmlcov/
34 | .tox/
35 | .coverage
36 | .cache
37 | nosetests.xml
38 | coverage.xml
39 | 
40 | # Translations
41 | *.mo
42 | 
43 | # Mr Developer
44 | .mr.developer.cfg
45 | .project
46 | .pydevproject
47 | 
48 | # Rope
49 | .ropeproject
50 | 
51 | # Django stuff:
52 | *.log
53 | *.pot
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Boris Babenko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | SimpleConvnet
 2 | ==============
 3 | 
 4 | This is a basic implementation of a convolutional neural net.  It is meant primarily for pedagogical
 5 | purposes -- if you are looking for a fully featured, efficient implementation, there are a few other
 6 | options I'd recommend:
 7 | 
 8 | * [cuda-convnet](https://code.google.com/p/cuda-convnet/)
 9 | * [caffe](http://caffe.berkeleyvision.org/)
10 | 
11 | ### Installing
12 | To install, run:
13 | 
14 | ```bash
15 | python setup.py install
16 | ```
17 | 
18 | ### Dependencies
19 | * matplotlib 1.1
20 | * numpy 1.6
21 | * scipy 0.10
22 | * scikit-image 0.9
23 | * scikit-learn 0.14
24 | * opencv 2.4
25 | 
26 | ### Running unit tests
27 | To run unit tests you will need nosetests installed.  You can run all unit tests with this:
28 | 
29 | ```
30 | nosetests -v
31 | ```
32 | 


--------------------------------------------------------------------------------
/demo_cifar.py:
--------------------------------------------------------------------------------
 1 | import cPickle as pickle
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | from os.path import join
 5 | from sklearn.metrics import confusion_matrix
 6 | 
 7 | from simple_convnet import convnet as cn
 8 | 
 9 | ####################################################################################################
10 | # LOAD DATA
11 | 
12 | def load_batch(fname):
13 |     with open('data/cifar-10-batches-py/%s'%fname, 'rb') as f:
14 |         data = pickle.load(f)
15 |     x = data['data'].reshape((-1,3,32,32)).astype('float32')/255
16 |     x =  np.rollaxis(x, 1, 4)
17 |     y = np.array(data['labels'])
18 |     return x, y
19 | 
20 | num_train_batch = 3 # can go up to 4 if memory allows
21 | train_x = np.zeros((num_train_batch*10000,32,32,3), dtype='float32')
22 | train_y = np.zeros(num_train_batch*10000, dtype='float32')
23 | for b in xrange(num_train_batch):
24 |     train_x[b*10000:(b+1)*10000,...], train_y[b*10000:(b+1)*10000] = \
25 |         load_batch('data_batch_%d' % (b+1))
26 | mean_x = train_x[::10,...].mean(0)[np.newaxis,...]
27 | 
28 | val_x, val_y = load_batch('data_batch_5')
29 | test_x, test_y = load_batch('test_batch')
30 | 
31 | ####################################################################################################
32 | # SET UP PARAMETERS
33 | 
34 | layer_args = [
35 |         (cn.ConvLayer, dict(num_filters=32, filter_shape=(5,5), init_from=None)),
36 |         (cn.BiasLayer, dict(init_val=1)),
37 |         (cn.ReluLayer, dict()),
38 |         (cn.MeanPoolingLayer, dict(pool_size=2)),
39 |         (cn.ConvLayer, dict(num_filters=32, filter_shape=(5,5))),
40 |         (cn.BiasLayer, dict(init_val=1)),
41 |         (cn.ReluLayer, dict()),
42 |         (cn.MeanPoolingLayer, dict(pool_size=2)),
43 |         (cn.DenseLayer, dict(num_nodes=64)),
44 |         (cn.BiasLayer, dict(init_val=1)),
45 |         (cn.ReluLayer, dict()),
46 |         (cn.DenseLayer, dict(num_nodes=10)),
47 |         (cn.BiasLayer, dict())
48 |         ]
49 | 
50 | fit_args = dict(
51 |         val_freq=20,
52 |         batch_size=32, 
53 |         num_epoch=30, 
54 |         weight_decay=0.0005,
55 |         learn_rate_decay=.00005,
56 |         chill_out_iters=100,
57 |         momentum=0.9,
58 |         learn_rate=.01)
59 | 
60 | ####################################################################################################
61 | # TRAIN AND TEST
62 | 
63 | net = cn.SoftmaxNet(layer_args=layer_args, 
64 |                     input_shape=train_x.shape[1:], 
65 |                     rand_state=np.random.RandomState(0))
66 | 
67 | net.fit(train_x, 
68 |         train_y, 
69 |         val_x=val_x[::30,:],
70 |         val_y=val_y[::30],
71 |         verbose=True,
72 |         **fit_args)
73 | 
74 | with open(join('data', 'cifar_model.pkl'), 'wb') as f:
75 |     pickle.dump((net, layer_args, fit_args), f)
76 |         
77 | yp = net.predict(test_x, batch_size=128)
78 | print 'test accuracy: %f' % np.mean(yp == test_y)
79 | 
80 | conf = confusion_matrix(test_y, yp)
81 | plt.matshow(conf)
82 | plt.xticks(np.arange(10))
83 | plt.yticks(np.arange(10))
84 | 


--------------------------------------------------------------------------------
/demo_mnist.py:
--------------------------------------------------------------------------------
 1 | import cPickle as pickle
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | from sklearn.metrics import confusion_matrix
 5 | from os.path import join
 6 | 
 7 | from simple_convnet import convnet as cn
 8 | 
 9 | ####################################################################################################
10 | # LOAD DATA
11 | 
12 | # Download at http://deeplearning.net/data/mnist/mnist.pkl.gz
13 | with open('data/mnist.pkl', 'rb') as f:
14 |     (train_x, train_y), (val_x, val_y), (test_x, test_y) = pickle.load(f)
15 | 
16 | train_x = train_x.reshape((-1,28,28,1)).astype('float32')
17 | test_x = test_x.reshape((-1,28,28,1)).astype('float32')
18 | val_x = val_x.reshape((-1,28,28,1)).astype('float32')
19 | 
20 | ####################################################################################################
21 | # SET UP PARAMETERS
22 | 
23 | ### simpler net with much fewer params
24 | # layer_args = [
25 | #         (cn.ConvLayer, dict(num_filters=20, filter_shape=(9,9))),
26 | #         (cn.BiasLayer, dict(init_val=0.1)),
27 | #         (cn.ReluLayer, dict()),
28 | #         (cn.MeanPoolingLayer, dict(pool_size=2)),
29 | #         (cn.DenseLayer, dict(num_nodes=10)),
30 | #         (cn.BiasLayer, dict())
31 | #         ] 
32 | 
33 | ### closer to LeNet5
34 | layer_args = [
35 |         (cn.ConvLayer, dict(num_filters=8, filter_shape=(5,5))),
36 |         (cn.BiasLayer, dict(init_val=0.1)),
37 |         (cn.ReluLayer, dict()),
38 |         (cn.MeanPoolingLayer, dict(pool_size=2)),
39 |         (cn.ConvLayer, dict(num_filters=16, filter_shape=(5,5))),
40 |         (cn.BiasLayer, dict(init_val=0.1)),
41 |         (cn.ReluLayer, dict()),
42 |         (cn.MeanPoolingLayer, dict(pool_size=2)),
43 |         (cn.DenseLayer, dict(num_nodes=128)),
44 |         (cn.BiasLayer, dict(init_val=0.1)),
45 |         (cn.ReluLayer, dict()),
46 |         (cn.DenseLayer, dict(num_nodes=10)),
47 |         (cn.BiasLayer, dict())
48 |         ]
49 | 
50 | fit_args = dict(
51 |         val_freq=10,
52 |         batch_size=64, 
53 |         num_epoch=3, 
54 |         weight_decay=0.0005,
55 |         momentum=0.9,
56 |         learn_rate=1e-1)
57 | 
58 | ####################################################################################################
59 | # TRAIN AND TEST
60 | 
61 | net = cn.SoftmaxNet(layer_args=layer_args, 
62 |                     input_shape=train_x.shape[1:],
63 |                     rand_state=np.random.RandomState(0))
64 | net.fit(train_x, 
65 |         train_y, 
66 |         val_x=val_x[::10,:],
67 |         val_y=val_y[::10],
68 |         verbose=True,
69 |         **fit_args)
70 | 
71 | with open(join('data', 'mnist_model_deep.pkl'), 'wb') as f:
72 |     pickle.dump((net, layer_args, fit_args), f)
73 | 
74 | yp = net.predict(test_x, batch_size=128)
75 | print 'test accuracy: %f' % np.mean(yp == test_y)
76 | 
77 | conf = confusion_matrix(test_y, yp)
78 | plt.matshow(conf)
79 | plt.xticks(np.arange(10))
80 | plt.yticks(np.arange(10))
81 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | setup(name='SimpleConvnet',
 6 |       version='1.0',
 7 |       description='A basic implementation of convolutional neural nets',
 8 |       author='Boris',
 9 |       author_email='bbabenko@gmail.com',
10 |       packages=['simple_convnet'],
11 |       install_requires=[
12 |               'matplotlib',
13 |               'numpy',
14 |               'scipy',
15 |               'scikit-image',
16 |               'scikit-learn',
17 |           ],
18 |      )


--------------------------------------------------------------------------------
/simple_convnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bbabenko/simple_convnet/015dd33be80365f704049790a547a2b3e0251f2a/simple_convnet/__init__.py


--------------------------------------------------------------------------------
/simple_convnet/convnet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from simple_convnet.helpers import (
  4 |     filter2D, batch_filter3D, padarray, atleast, safe_exp, safe_log, choice, imshow
  5 |     )
  6 | from matplotlib import pyplot as plt
  7 | from time import time
  8 | from skimage.transform import downscale_local_mean
  9 | 
 10 | class Layer(object):
 11 |     def __init__(self, input_shape, rand_state=np.random):
 12 |         """
 13 |         Layer constructor (abstract).
 14 | 
 15 |         Parameters
 16 |         ----------
 17 |         input_shape : tuple of ints specifying shape of a single input
 18 |         rand_state : a RandomState object
 19 | 
 20 |         """
 21 |         self.input_shape = np.array(input_shape)
 22 |         self.output_shape = self.input_shape
 23 | 
 24 |     def forward(self, input_act):
 25 |         """
 26 |         Forward propagation.  This class is mostly wraps around _forward and does some extra
 27 |         asserts.  Child classes should overwrite _forward rather than this method.
 28 | 
 29 |         Parameters
 30 |         ----------
 31 |         input_act : numpy array, activations from the layer below; shape must either be the same as
 32 |             self.input_shape, or (NUMBER_OF_EXAMPLES,) + self.input_shape
 33 |         
 34 |         Returns
 35 |         -------
 36 |         output_act : numpy array, output activations from this layer; shape will be
 37 |             self.output_shape or (NUMBER_OF_EXAMPLES,) + self.output_shape, depending on the input
 38 | 
 39 |         """
 40 |         input_ndim = len(self.input_shape)
 41 |         assert input_act.shape[-input_ndim:] == tuple(self.input_shape), 'wrong input shape'
 42 |         many = (input_act.ndim > input_ndim)
 43 |         input_act = atleast(input_act, input_ndim+1)
 44 | 
 45 |         act = self._forward(input_act)
 46 | 
 47 |         assert act.shape[1:] == tuple(self.output_shape), 'wrong output shape'
 48 |         return act if many else act[0,...]
 49 | 
 50 |     def backward(self, grad_act, input_act):
 51 |         """
 52 |         Backward propagation.  This class is mostly wraps around _backward and does some extra
 53 |         asserts.  Child classes should overwrite _backward rather than this method.
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         grad_act : nump array, gradient of cost function with respect to the activations from this
 58 |             layer (usually calculated in the layer above and passed down during backward
 59 |             propagation), shape is self.output_shape or (NUMBER_OF_EXAMPLES,) + self.output_shape
 60 |         input_act : numpy array, activations from the layer below; shape must either be the same as
 61 |             self.input_shape, or (NUMBER_OF_EXAMPLES,) + self.input_shape
 62 | 
 63 |         Returns
 64 |         -------
 65 |         grad_input_act : numpy array, gradient of cost function with respect to the input
 66 |             activations this layer received, which is to be passed down to the layer below; shape
 67 |             will be self.input_shape or (NUMBER_OF_EXAMPLES,) + self.input_shape, depending on the
 68 |             input
 69 |         grad_params : 1D  numpy array of length self.num_params() (or None if self.num_params()==0),
 70 |             gradient of cost function with respect to the params of this layer
 71 |             
 72 |         """
 73 |         input_ndim = len(self.input_shape)
 74 |         output_ndim = len(self.output_shape)
 75 | 
 76 |         assert grad_act.shape[-output_ndim:] == tuple(self.output_shape), 'wrong grad input shape'
 77 |         assert input_act.shape[-input_ndim:] == tuple(self.input_shape), 'wrong input shape'
 78 |         assert ((grad_act.ndim==output_ndim and input_act.ndim==input_ndim)
 79 |                 or grad_act.shape[0] == input_act.shape[0]), 'wrong number of samples'
 80 |         many = (input_act.ndim > input_ndim)
 81 |         input_act = atleast(input_act, input_ndim+1)
 82 |         grad_act = atleast(grad_act, output_ndim+1)
 83 | 
 84 |         grad_input_act, grad_params = self._backward(grad_act, input_act)
 85 | 
 86 |         assert grad_input_act.shape[1:] == tuple(self.input_shape), \
 87 |             'wrong input act grad shape'
 88 |         if self.num_params() > 0:
 89 |             grad_params = grad_params.ravel()
 90 |             assert grad_params.size == self.num_params(), 'wrong param grad shape'
 91 | 
 92 |         return (grad_input_act if many else grad_input_act[0,...], grad_params)
 93 | 
 94 |     ################################################################################################
 95 |     ### METHODS TO OVERWRITE IN CHILD CLASSES
 96 |     def num_params(self):
 97 |         """
 98 |         Returns the number of parameters in this layer
 99 |         """
100 |         return 0
101 | 
102 |     def get_params(self):
103 |         """
104 |         Returns a 1D numpy array, length self.num_params(), with the parameters of this layer.
105 |         """
106 |         return None
107 | 
108 |     def set_params(self, params):
109 |         """
110 |         Sets the parameters of this layer
111 |         
112 |         Parameters
113 |         ----------
114 |         params : 1D numpy array, length self.num_params(), with the parameters of this layer
115 | 
116 |         """
117 |         pass
118 | 
119 |     def _forward(self, input_act):
120 |         """
121 |         Forward propagation.
122 | 
123 |         Parameters
124 |         ----------
125 |         input_act : numpy array, activations from the layer below; shape is 
126 |             (NUMBER_OF_EXAMPLES,) + self.input_shape
127 |         
128 |         Returns
129 |         -------
130 |         output_act : numpy array, output activations from this layer; shape will be
131 |             (NUMBER_OF_EXAMPLES,) + self.output_shape
132 | 
133 |         """
134 |         raise NotImplemented
135 | 
136 |     def _backward(self, grad_act, input_act):
137 |         """
138 |         Backward propagation.
139 | 
140 |         Parameters
141 |         ----------
142 |         grad_act : nump array, gradient of cost function with respect to the activations from this
143 |             layer (usually calculated in the layer above and passed down during backward
144 |             propagation), shape is (NUMBER_OF_EXAMPLES,) + self.output_shape
145 |         input_act : numpy array, activations from the layer below; shape must either be the same as
146 |             (NUMBER_OF_EXAMPLES,) + self.input_shape
147 | 
148 |         Returns
149 |         -------
150 |         grad_input_act : numpy array, gradient of cost function with respect to the input
151 |             activations this layer received, which is to be passed down to the layer below; shape
152 |             will be (NUMBER_OF_EXAMPLES,) + self.input_shape
153 |         grad_params : 1D  numpy array of length self.num_params() (or None if self.num_params()==0),
154 |             gradient of cost function with respect to the params of this layer
155 |             
156 |         """
157 |         # returns next grad_act (layer below), and grad_params for this layer
158 |         raise NotImplemented
159 | 
160 | class ConvLayer(Layer):
161 |     def __init__(self, 
162 |                  input_shape, 
163 |                  num_filters=1, 
164 |                  filter_shape=(3,3), 
165 |                  init_from=None, 
166 |                  rand_state=np.random):
167 |         """
168 |         Convolutional layer.
169 | 
170 |         Parameters
171 |         ----------
172 |         input_shape : tuple of ints specifying shape of a single input; this particular layer
173 |             expects the input shape to be 3D (height x width x channels)
174 |         num_filters : int, number of filters in this layer
175 |         filter_shape : tuple specifying height and width of the filters (current implementation
176 |             only square filters)
177 |         init_from : (experimental feature) a dataset to use in initializing filters
178 |         rand_state : a RandomState object
179 | 
180 |         """
181 |         super(ConvLayer, self).__init__(input_shape)
182 |         assert filter_shape[0]%2 == 1 and filter_shape[1]%2 ==1
183 |         assert filter_shape[0] == filter_shape[1], 'Only square filters currently supported'
184 |         if init_from is not None:
185 |             # a bit of a hack to try out...
186 |             assert init_from.shape[3] == input_shape[2]
187 |             assert init_from.shape[0] > 5
188 |             self.filters_ = np.zeros(filter_shape + (input_shape[2], num_filters), dtype='float32')
189 |             for i in xrange(num_filters):
190 |                 sample = init_from[choice(15, init_from.shape[0]),...].mean(0)
191 |                 r_start = rand_state.randint(init_from.shape[1] - filter_shape[0])
192 |                 c_start = rand_state.randint(init_from.shape[2] - filter_shape[1])
193 |                 self.filters_[...,i] = sample[r_start:r_start+filter_shape[0], 
194 |                                               c_start:c_start+filter_shape[1],
195 |                                               ...]/10
196 |         else:
197 |             self.filters_ = rand_state.randn(*(filter_shape + (input_shape[2], num_filters)))
198 |             self.filters_ /= np.sqrt(np.prod(self.filters_.shape[:-1]))
199 |             self.filters_ = self.filters_.astype('float32')
200 |         self.filter_shape = filter_shape
201 |         self.filter_pad = (filter_shape[0]/2, filter_shape[1]/2)
202 |         self.output_shape = np.array([self.input_shape[0] - filter_shape[0] + 1,
203 |                                       self.input_shape[1] - filter_shape[1] + 1,
204 |                                       num_filters])
205 | 
206 |     def viz(self, num_row=1):
207 |         """
208 |         Displays the filters in this layer (only makes sense for the first layer of a network)
209 |         """
210 |         num_filters = self.filters_.shape[-1]
211 |         fig = plt.figure()
212 |         num_col = int(np.ceil(float(num_filters)/num_row))
213 |         
214 |         for i in xrange(num_filters):
215 |             ax = fig.add_subplot(num_row, num_col, i)
216 |             imshow(self.filters_[...,i], ax=ax)
217 | 
218 |     def num_params(self):
219 |         return np.prod(self.filters_.shape)
220 | 
221 |     def get_params(self):
222 |         return self.filters_.ravel()
223 | 
224 |     def set_params(self, params):
225 |         self.filters_ = params.reshape(self.filters_.shape)
226 | 
227 |     def _forward(self, input_act):
228 |         fp = self.filter_pad
229 |         act = batch_filter3D(input_act, self.filters_)
230 |         act = act[:,fp[0]:-fp[0],fp[1]:-fp[1],:]
231 |         return act
232 | 
233 |     def _backward(self, grad_act, input_act):
234 |         # this is probably the trickiest method in this entire module...
235 | 
236 |         # input activation gradient -- notice that we have to flip the filters horizontally and
237 |         # vertically
238 |         rev_filters = np.fliplr(np.flipud(self.filters_))
239 | 
240 |         # note: opencv doesn't like arbitrary slices of numpy arrays, so we need to shuffle the
241 |         # dimensions around a little bit
242 | 
243 |         # rev_filters will now be NUM_FILTERS x NUM_CHANNELS x ...
244 |         rev_filters = np.rollaxis(np.rollaxis(rev_filters, 2, 0), 3, 0).copy()
245 |         padded_grad_act = padarray(grad_act, self.filter_pad)
246 |         # padded_grad_act will now be NUM_FILTERS x NUM_EXAMPLES x ...
247 |         padded_grad_act = np.rollaxis(padded_grad_act, 3, 0).copy()
248 |         grad_input_act = np.zeros(input_act.shape, dtype='float32')
249 |         for z in xrange(input_act.shape[0]):
250 |             for c in xrange(input_act.shape[-1]):
251 |                 for f in xrange(self.filters_.shape[-1]):
252 |                     grad_input_act[z,:,:,c] +=  filter2D(padded_grad_act[f,z], rev_filters[f,c])
253 | 
254 |         # grad_input_act = grad_input_act.sum(-1)
255 | 
256 |         # params gradient
257 |         grad_params = np.zeros((input_act.shape[1:4] + (grad_act.shape[-1],)), dtype='float32')
258 |         # grad_act_ will now be NUM_FILTERS x NUM_EXAMPLES x ...
259 |         grad_act_ = np.rollaxis(grad_act, 3, 0).copy()
260 |         # padded_grad_act will now be NUM_CHANNELS x NUM_EXAMPLES x ...
261 |         input_act = np.rollaxis(input_act, 3, 0).copy()
262 |         for n in xrange(input_act.shape[1]):
263 |             for c in xrange(input_act.shape[0]):
264 |                 for f in xrange(grad_act.shape[-1]):
265 |                     grad_params[:,:,c,f] +=  filter2D(input_act[c,n], grad_act_[f,n])
266 |         grad_params /= input_act.shape[1]
267 | 
268 |         r_border, c_border = grad_act.shape[1]/2, grad_act.shape[2]/2
269 |         if grad_act.shape[1] %2 == 0:
270 |             grad_params = grad_params[r_border:-r_border+1, c_border:-c_border+1,...]
271 |         else:
272 |             grad_params = grad_params[r_border:-r_border, c_border:-c_border,...]
273 |         assert grad_params.shape == self.filters_.shape, 'wrong param grad shape'
274 | 
275 |         return grad_input_act, grad_params.ravel()
276 | 
277 | class MeanPoolingLayer(Layer):
278 |     def __init__(self, input_shape, pool_size=2, rand_state=np.random):
279 |         """
280 |         Mean pooling layer.  There are no learnable parameters in this layer type.
281 | 
282 |         Parameters
283 |         ----------
284 |         input_shape : tuple of ints specifying shape of a single input
285 |         pool_size : int, size of the pooling window (stride will be the same as this size, in other 
286 |             words no overlap in the pooling)
287 |         rand_state : a RandomState object
288 | 
289 |         """
290 |         super(MeanPoolingLayer, self).__init__(input_shape)
291 |         self.output_shape = self.input_shape / np.array([pool_size, pool_size, 1])
292 |         self.pool_size = pool_size
293 | 
294 |     def _forward(self, input_act):
295 |         act = downscale_local_mean(np.rollaxis(input_act, 0, 4),
296 |                                    (self.pool_size, self.pool_size, 1, 1))
297 |         return np.rollaxis(act, 3, 0)
298 | 
299 |     def _backward(self, grad_act, input_act):
300 |         kron_kernel = np.ones((self.pool_size,self.pool_size))[np.newaxis,...,np.newaxis]
301 |         grad_input_act = np.kron(grad_act, kron_kernel)/self.pool_size/self.pool_size
302 |         return grad_input_act, None
303 | 
304 | class ReluLayer(Layer):
305 |     """
306 |     Rectified linear unit layer.  There are no learnable parameters in this layer type.
307 |     """
308 |     def _forward(self, input_act):
309 |         return input_act * (input_act>0)
310 | 
311 |     def _backward(self, grad_act, input_act):
312 |         return (input_act>0).astype('float')*grad_act, None
313 | 
314 | class SigmoidLayer(Layer):
315 |     """
316 |     Sigmoid unit layer.  There are no learnable parameters in this layer type.
317 |     """
318 |     @staticmethod
319 |     def _sigmoid(x):
320 |         return 1.0/(1.0+np.exp(-x))
321 | 
322 |     def _forward(self, input_act):
323 |         return SigmoidLayer._sigmoid(input_act)
324 | 
325 |     def _backward(self, grad_act, input_act):
326 |         out = SigmoidLayer._sigmoid(input_act)
327 |         return out*(1.0-out)*grad_act, None
328 | 
329 | class DenseLayer(Layer):
330 |     def __init__(self, input_shape, num_nodes=1, rand_state=np.random):
331 |         """
332 |         Dense/fully connected layer.
333 | 
334 |         Parameters
335 |         ----------
336 |         input_shape : tuple of ints specifying shape of a single input
337 |         num_nodes : int, number of nodes in the layer
338 |         rand_state : a RandomState object
339 | 
340 |         """
341 |         super(DenseLayer, self).__init__(input_shape)
342 |         self.output_shape = np.array([num_nodes])
343 |         self.weights_ = rand_state.randn(np.prod(self.input_shape), num_nodes).astype('float32')
344 |         self.weights_ /= np.sqrt(np.prod(self.weights_.shape))
345 | 
346 |     def num_params(self):
347 |         return self.weights_.size
348 | 
349 |     def get_params(self):
350 |         return self.weights_.ravel()
351 | 
352 |     def set_params(self, params):
353 |         self.weights_ = params.reshape(self.weights_.shape)
354 | 
355 |     def _forward(self, input_act):
356 |         input_act = input_act.reshape((-1,self.weights_.shape[0]))
357 |         return np.dot(input_act, self.weights_)
358 | 
359 |     def _backward(self, grad_act, input_act):
360 |         input_act = input_act.reshape((-1,self.weights_.shape[0]))
361 | 
362 |         grad_input_act = np.dot(grad_act, self.weights_.T)
363 |         grad_input_act = grad_input_act.reshape((-1,) + tuple(self.input_shape))
364 | 
365 |         grad_params = np.array([np.outer(act, grad) for act, grad in zip(input_act, grad_act)])
366 |         grad_params = grad_params.mean(0)
367 | 
368 |         return grad_input_act, grad_params
369 | 
370 | class BiasLayer(Layer):
371 |     def __init__(self, input_shape, init_val=0, rand_state=np.random):
372 |         """
373 |         Bias layer.  For an input shape of [...] x N, this layer adds N bias terms.  E.g., for a 
374 |         convolutional layer with an output of shape WxHxC where C is the number of channels/filters,
375 |         this layer will contain C bias terms, one for each filter.
376 | 
377 |         Parameters
378 |         ----------
379 |         input_shape : tuple of ints specifying shape of a single input
380 |         init_val : float, value to initialize all weights with
381 |         rand_state : a RandomState object
382 | 
383 |         """
384 |         super(BiasLayer, self).__init__(input_shape)
385 |         # assert len(input_shape) == 3
386 |         self.output_shape = np.array(input_shape)
387 |         self.weights_ = np.ones(input_shape[-1]) * init_val
388 | 
389 |     def num_params(self):
390 |         return self.weights_.size
391 | 
392 |     def get_params(self):
393 |         return self.weights_.ravel()
394 | 
395 |     def set_params(self, params):
396 |         self.weights_ = params.reshape(self.weights_.shape)
397 | 
398 |     def _forward(self, input_act):
399 |         return input_act + self.weights_
400 | 
401 |     def _backward(self, grad_act, input_act):
402 |         grad_input_act = grad_act
403 |         # sum over the width and height dimensions (if any), average over all input examples
404 |         grad_params = grad_act.mean(0)
405 |         while grad_params.ndim > 1:
406 |             grad_params = grad_params.sum(0)
407 | 
408 |         return grad_input_act, grad_params
409 | 
410 | class NNet(object):
411 |     def __init__(self, layer_args, input_shape, rand_state=np.random):
412 |         """
413 |         Abstract neural net class.
414 | 
415 |         Parameters
416 |         ----------
417 |         layer_args : list of (LayerClass, kwargs) tuples where LayerClass is a class that inherits
418 |             from the Layer class, and kwargs are to be passed into the constructor of that class.
419 |             layer_args[0] is the first layer, closest to the input, and layer_args[-1] is the
420 |             top-most layer.  The kwargs need not include the input_shape argument -- this will be
421 |             determined automatically starting with the input_shape for the network (see below).
422 |         input_shape : tuple of ints specifying shape of a single input to the network
423 |         rand_state : a RandomState object
424 |         
425 |         """
426 |         # layer_args is a list of (layer_class, layer_init_args) for first through last layer
427 |         self.layers_ = []
428 |         self.input_shape = input_shape
429 |         for args in layer_args:
430 |             layer_class, args = args
431 |             args['rand_state'] = rand_state
432 |             layer = layer_class(input_shape, **args)
433 |             self.layers_.append(layer)
434 |             # get input shape for the next layer
435 |             input_shape = layer.output_shape
436 | 
437 |         self._rand_state = rand_state
438 |         self._cache_acts = None
439 | 
440 |         # this will keep track of how many batches and epochs have been trained
441 |         self.num_batch = 0
442 |         self.num_epoch = 0
443 | 
444 |     def set_params(self, params):
445 |         """
446 |         Set parameters to the network (i.e. all the layer parameters).
447 | 
448 |         Parameters
449 |         ----------
450 |         params : numpy array of length self.num_params()
451 | 
452 |         """
453 |         ind = 0
454 |         for layer in self.layers_:
455 |             num_params = layer.num_params()
456 |             if num_params:
457 |                 layer.set_params(params[ind:ind+num_params])
458 |             ind += num_params
459 | 
460 |     def get_params(self):
461 |         """
462 |         Returns a single numpy array of length self.num_params() with all the parameters (i.e. all
463 |             the layer parameters concatenated into one vector).
464 |         """
465 |         return np.concatenate([layer.get_params() 
466 |             for layer in self.layers_ if layer.get_params() is not None])
467 | 
468 |     def num_params(self):
469 |         """
470 |         Returns the number of (learnable) parameters in the entire network.
471 |         """
472 |         return np.sum([layer.num_params() for layer in self.layers_])
473 | 
474 |     def num_nodes(self):
475 |         """
476 |         Returns the number of nodes/neurons in the network.
477 |         """
478 |         return (np.sum(np.prod(layer.output_shape) for layer in self.layers_) + 
479 |                 np.prod(self.input_shape))
480 | 
481 |     def cost_for_params(self, params, x, y=None):
482 |         """
483 |         Calculates the cost of the network for the specified inputs and the specified network
484 |         params.
485 |         
486 |         Parameters
487 |         ----------
488 |         params : numpy array of length self.num_params() specified network parameters
489 |         x : input examples
490 |         y : labels of the examples
491 |         
492 |         Returns
493 |         -------
494 |         cost : float
495 | 
496 |         """
497 |         curr_params = self.get_params()
498 |         self.set_params(params)
499 |         cost = self.cost(x, y=y)
500 |         # revert params
501 |         self.set_params(curr_params)
502 |         return cost
503 |         
504 |     def cost(self, x, y=None, final_acts=None):
505 |         """
506 |         Calculates the cost of the network for the specified inputs.  Child classes should
507 |         implement _cost rather than this method.
508 |         
509 |         Parameters
510 |         ----------
511 |         x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape
512 |         y : numpy array, training labels, shape should be (NUMBER_OF_EXAMPLES, shape of labels)
513 |         final_acts : (optional) output of top-most layer in the network for the set of examples
514 |         
515 |         Returns
516 |         -------
517 |         cost : float
518 | 
519 |         """
520 |         if final_acts is None:
521 |             final_acts = self.forward(x)[-1]
522 |         return self._cost(final_acts, y)
523 | 
524 |     def forward(self, x, batch_size=None):
525 |         """
526 |         Forward propagation through the whole network.
527 | 
528 |         Parameters
529 |         ----------
530 |         x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape
531 |         
532 |         Returns
533 |         -------
534 |         acts : list that contains a numpy array for each layer in the network; the first element in 
535 |             the list is the array x itself, and each following array is the output of that layer for
536 |             the given examples x
537 | 
538 |         """
539 |         acts = [x]
540 |         for layer in self.layers_:
541 |             act = layer.forward(acts[-1])
542 |             acts.append(act)
543 |         return acts
544 | 
545 |     def forward_final(self, x, batch_size=None):
546 |         """
547 |         Forward propagation through the whole network; returns only output of final layer.
548 | 
549 |         Parameters
550 |         ----------
551 |         x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape
552 |         batch_size : number of samples to process at a time (conserves memory)
553 |         
554 |         Returns
555 |         -------
556 |         acts : activations of the final layer
557 | 
558 |         """
559 |         if batch_size is None or batch_size > x.shape[0]:
560 |             batch_size = x.shape[0]
561 | 
562 |         ind = 0
563 |         res = []
564 |         while ind < x.shape[0]:
565 |             acts = x[ind:ind+batch_size,...]
566 |             for layer in self.layers_:
567 |                 acts = layer.forward(acts)
568 |             res.append(acts)
569 |             ind += batch_size
570 |         return np.concatenate(res) if len(res)>1 else res[0]
571 |         
572 |     def param_grad(self, x, y=None, acts=None):
573 |         """
574 |         Calculate the gradient of the cost function with respect to all learnable parameters of this
575 |         network.
576 |         
577 |         Parameters
578 |         ----------
579 |         x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape
580 |         y : numpy array, training labels, shape should be (NUMBER_OF_EXAMPLES, shape of labels)
581 |         acts : (optional) list that contains a numpy array for each layer in the network; the first
582 |             element in the list is the array x itself, and each following array is the output of
583 |             that layer for the given examples x
584 |         
585 |         Returns
586 |         -------
587 |         param_grad : numpy array of length self.num_params()
588 | 
589 |         """
590 |         if acts is None:
591 |             acts = self.forward(x)
592 | 
593 |         curr_act_grad = self.cost_grad(final_acts=acts[-1], y=y)
594 |         param_grad = []
595 | 
596 |         for ind_from_end, layer in enumerate(reversed(self.layers_)):
597 |             curr_act_grad, curr_param_grad = layer.backward(curr_act_grad, acts[-2-ind_from_end])
598 |             if curr_param_grad is not None:
599 |                 param_grad.append(curr_param_grad)
600 | 
601 |         param_grad.reverse()
602 |         return np.concatenate(param_grad)
603 | 
604 |     @staticmethod
605 |     def get_batch(x, y=None, batch_size=128, batch_ind=0, inds=None):
606 |         """
607 |         Calculate the gradient of the cost function with respect to all learnable parameters of this
608 |         network.
609 |         
610 |         Parameters
611 |         ----------
612 |         x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape
613 |         y : numpy array, training labels, shape should be (NUMBER_OF_EXAMPLES, shape of labels)
614 |         batch_size : number of examples to use in each batch
615 |         batch_ind : which batch to return
616 |         inds : a permuation of indexes for this dataset (numpy array of length x.shape[0])
617 |         
618 |         Returns
619 |         -------
620 |         batch_x : subset of at most batch_size examples in x
621 |         batch_y : corresponding labels for this batch
622 | 
623 |         """
624 |         if inds is None:
625 |             inds = np.arange(x.shape[0])
626 |         batch_x = x[inds[batch_ind*batch_size:(batch_ind+1)*batch_size],...]
627 |         batch_y = None
628 |         if y is not None:
629 |             batch_y = y[inds[batch_ind*batch_size:(batch_ind+1)*batch_size],...]
630 | 
631 |         return batch_x, batch_y
632 | 
633 |     @staticmethod
634 |     def get_num_batch(num_examples, batch_size):
635 |         """
636 |         Returns the number of batches for a given number of examples and given batch size.
637 |         """
638 |         return int(np.ceil(num_examples/float(batch_size)))
639 | 
640 |     def split_per_layer(self, vec):
641 |         """ Given a vector with entries for each learnable parameter in the net, this method sums
642 |         up the entries for each layer and returns a vector of such sums.  E.g., can be used to
643 |         calculate absolute mean of weights in each layer."""
644 |         split = []
645 |         ind = 0
646 |         for layer in self.layers_:
647 |             split.append(vec[ind:layer.num_params()])
648 |             ind += layer.num_params()
649 | 
650 |         return split
651 | 
652 |     def fit(self,
653 |             x,
654 |             y=None,
655 |             val_x=None,
656 |             val_y=None,
657 |             val_freq=10,
658 |             batch_size=128,
659 |             num_epoch=10,
660 |             momentum=0.9,
661 |             learn_rate=0.01,
662 |             learn_rate_decay=0.05,
663 |             chill_out_iters=10,
664 |             weight_decay=.0005,
665 |             verbose=False):
666 |         """
667 |         Train the neural network via mini-batch gradient descent.
668 | 
669 |         Parameters
670 |         ----------
671 |         x : numpy array, training examples; shape should be (NUMBER_OF_EXAMPLES,) + self.input_shape
672 |         y : numpy array, training labels, shape should be (NUMBER_OF_EXAMPLES, shape of labels)
673 |         val_x : validation examples, similar shape as x
674 |         val_y : validation labels, similar shape as y
675 |         val_freq : validation will be performed every val_freq iterations
676 |         batch_size : number of examples to use in each batch
677 |         num_epoch : number of epochs to train for (maximum, may terminate earlier)
678 |         learn_rate : initial learning rate, will decay as learning proceeds
679 |         learn_rate_decay : at each iteration i the learning rate will be
680 |             learn_rate/(i*learn_rate_decay+1)
681 |         chill_out_iters : if there is no improvement in validation error after this many iterations
682 |             of validation, the learning rate will be cut in half and the network will go back to
683 |             the set of parameters that achieved the lower cost so far
684 |         weight_decay : amount of weight decay to apply
685 |         verbose : whether to print debug messages during training or not
686 | 
687 |         """
688 | 
689 |         if verbose:
690 |             print '='*80
691 |             print 'training net on %d samples' % x.shape[0]
692 |             if val_x is not None:
693 |                 print 'using %d validation samples' % val_x.shape[0]
694 |             print '='*80
695 | 
696 |         min_cost = 1e8
697 |         velocity = np.zeros(self.num_params(), dtype='float32')
698 |         best_params = self.get_params()
699 |         stop = False
700 |         no_improvement_iters = 0
701 |         num_train = x.shape[0]
702 |         num_batch = NNet.get_num_batch(num_train, batch_size)
703 |         init_learn_rate = learn_rate
704 | 
705 |         start_time = time()
706 |         for epoch in xrange(self.num_epoch, self.num_epoch + num_epoch):
707 |             inds = self._rand_state.permutation(x.shape[0])
708 |             if stop: 
709 |                 break
710 |             for batch in xrange(num_batch):
711 |                 batch_x, batch_y = self.get_batch(
712 |                         x, y=y, batch_size=batch_size, inds=inds, batch_ind=batch)
713 |                 assert batch_x.shape[0] > 0
714 |                 param_grad = self.param_grad(batch_x, y=batch_y)
715 |                 params = self.get_params()
716 |                 learn_rate = init_learn_rate/((epoch*num_batch + batch)*learn_rate_decay+1)
717 |                 velocity = (
718 |                         momentum*velocity - 
719 |                         learn_rate*param_grad - 
720 |                         learn_rate*weight_decay*params)
721 |                 self.set_params(params + velocity)
722 | 
723 |                 # check validation error every once in a while
724 |                 if (batch%val_freq == 0 and batch>0) or batch == num_batch-1:
725 |                     if val_x is None:
726 |                         val_x, val_y = batch_x, batch_y
727 |                     val_acts = self.forward_final(val_x, batch_size=batch_size)
728 |                     cost = self.cost(val_x, val_y, final_acts=val_acts)
729 |                     # child classes don't necessarily have a concept of "accuracy" and might not
730 |                     # implement the accuracy method
731 |                     try:
732 |                         acc = self.accuracy(val_acts, val_y)
733 |                     except NotImplemented:
734 |                         acc = np.nan
735 | 
736 |                     cost_diff = cost - min_cost
737 | 
738 |                     # if there has been significant regression in cost, chill out
739 |                     if ((cost_diff > 0 and cost_diff/min_cost > 1) or 
740 |                         no_improvement_iters>chill_out_iters):
741 |                         self.set_params(best_params)
742 |                         no_improvement_iters = 0
743 |                         init_learn_rate /= 2
744 |                         velocity = np.zeros_like(velocity, dtype='float32')
745 |                         print 'cost was %.3e, chilling out...' % cost
746 |                         cost = min_cost
747 |                     elif cost < min_cost:
748 |                         best_params = self.get_params()
749 |                         min_cost = cost
750 |                         no_improvement_iters = 0
751 |                     else:
752 |                         no_improvement_iters += 1
753 | 
754 |                     if verbose:
755 |                         print 'epoch %03d, batch=%04d/%04d' % (epoch, batch+1, num_batch)
756 |                         print 'cost=%.3e, min_cost=%.3e, acc=%.2f' % (cost, min_cost, acc)
757 |                         print 'learn_rate=%.3e, velocity L1 norm %f' % (learn_rate, 
758 |                                                                         np.abs(velocity).sum(0))
759 |                         print '-'*80
760 | 
761 |         self.num_epoch = epoch
762 |         end_time = time()
763 |         print 'training complete [%.2f min]' % ((end_time-start_time)/60)
764 | 
765 |     def predict(self, x, batch_size=None):
766 |         """
767 |         Retruns the output of the final layer of this network.
768 |         """
769 |         return self.forward_final(x, batch_size)
770 | 
771 |     ################################################################################################
772 |     ### METHODS TO OVERWRITE IN CHILD CLASSES
773 |     def _cost(self, final_acts, y):
774 |         """
775 |         Calculates the cost of the network for the specified inputs.
776 |         
777 |         Parameters
778 |         ----------
779 |         final_acts : output of top-most layer in the network for a set of examples
780 |         y : labels of the examples
781 |         
782 |         Returns
783 |         -------
784 |         cost : float
785 | 
786 |         """
787 |         raise NotImplemented
788 | 
789 |     def accuracy(self, final_acts, y):
790 |         """
791 |         Child class can optionally implement this in case there is a notion of accuracy that is
792 |         separate from cost (e.g. cross entropy cost versus classifcation accuracy).
793 | 
794 |         Parameters
795 |         ----------
796 |         final_acts : output of top-most layer in the network for a set of examples
797 |         y : labels of the examples
798 | 
799 |         Returns
800 |         -------
801 |         accuracy : float
802 | 
803 |         """
804 |         raise NotImplemented
805 | 
806 |     def cost_grad(self, final_acts, y):
807 |         """
808 |         Calculates the gradient of the cost function with respect to the top-most layer activations.
809 |         
810 |         Parameters
811 |         ----------
812 |         final_acts : output of top-most layer in the network for a set of examples
813 |         y : labels of the examples
814 |         
815 |         Returns
816 |         -------
817 |         cost_grad : numpy array, same shape as the output_shape of the top-most layer.
818 | 
819 |         """
820 |         raise NotImplemented
821 | 
822 | class SoftmaxNet(NNet):
823 |     def __init__(self, layer_args, input_shape, rand_state=np.random):
824 |         """
825 |         Softmax (cross entropy) cost neural net.
826 |         """
827 |         super(SoftmaxNet, self).__init__(layer_args, input_shape, rand_state=rand_state)
828 |         self.num_classes = self.layers_[-1].output_shape[0]
829 | 
830 |     def fit(self, x, y=None, **kwargs):
831 |         assert y is not None, 'Labels must be passed in'
832 |         assert tuple(np.unique(y)) == tuple(range(self.num_classes)), \
833 |             'Labels should range from 0 to C-1 where C is the number of nodes in the last layer'
834 |         binary_y = self.binarize_labels(y)
835 |         if 'val_y' in kwargs:
836 |             kwargs['val_y'] = self.binarize_labels(kwargs['val_y'])
837 |         super(SoftmaxNet, self).fit(x, binary_y, **kwargs)
838 | 
839 |     def binarize_labels(self, y):
840 |         """
841 |         Turns discrete labels into binary vector labels.
842 | 
843 |         Parameters
844 |         ----------
845 |         y : numpy array of N integers from 0 to C-1
846 |         
847 |         Returns
848 |         -------
849 |         b : numpy array of shape Nx(C-1) s.t. b[i,j]=1 if y[i]==j, and b[i,k] for all k!=j
850 |         """
851 |         binary_y = np.zeros((len(y), self.num_classes))
852 |         for c in xrange(self.num_classes):
853 |             binary_y[y==c,c] = 1
854 | 
855 |         return binary_y
856 | 
857 |     def predict(self, x, batch_size=None):
858 |         acts = self.forward_final(x, batch_size)
859 |         return np.argmax(acts, axis=1)
860 | 
861 |     def _cost(self, final_acts, y):
862 |         exp_act = safe_exp(final_acts)
863 |         lse_act = safe_log(np.sum(exp_act, axis=1))
864 |         return -np.mean(np.sum((y * (final_acts - lse_act[:,np.newaxis])), axis=1))
865 | 
866 |     def accuracy(self, final_acts, y):
867 |         yp = np.argmax(final_acts, axis=1)
868 |         if y.ndim == 2:
869 |             y = np.argmax(y, axis=1)
870 |         return np.mean(yp == y)*100
871 | 
872 |     def cost_grad(self, final_acts, y):
873 |         exp_act = safe_exp(final_acts)
874 |         sum_exp = np.sum(exp_act, axis=1)
875 |         return exp_act/sum_exp[:,np.newaxis] - y
876 | 
877 | 


--------------------------------------------------------------------------------
/simple_convnet/helpers.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | from matplotlib import pyplot as plt
  5 | from warnings import warn
  6 | 
  7 | def filter2D(input_arr, filter):
  8 |     """
  9 |     2D filtering (i.e. convolution but without mirroring the filter).  Mostly a convenience wrapper
 10 |     around OpenCV.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     input_arr : numpy array, HxW size
 15 |     filter : numpy array, H1xW1 size
 16 |     
 17 |     Returns
 18 |     -------
 19 |     result : numpy array, HxW size
 20 | 
 21 |     """
 22 |     return cv2.filter2D(input_arr, 
 23 |                         -1, 
 24 |                         filter,
 25 |                         borderType=cv2.BORDER_CONSTANT)
 26 | 
 27 | def batch_filter3D(input_arr, filters):
 28 |     """
 29 |     3D filtering (i.e. convolution but without mirroring the filter).
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     input_arr : numpy array, NxHxWxC size where N is the number of images to be filtered
 34 |     filter : numpy array, H1xW1xC size
 35 |     
 36 |     Returns
 37 |     -------
 38 |     result : numpy array, NxHxW size
 39 |     
 40 |     """
 41 |     assert input_arr.shape[3] == filters.shape[2]
 42 |     num_input = input_arr.shape[0]
 43 |     output = np.zeros(input_arr.shape[:3] + (filters.shape[-1],))
 44 |     for n in xrange(num_input):
 45 |         input1 = input_arr[n]
 46 |         for f in xrange(filters.shape[-1]):
 47 |             for c in xrange(filters.shape[-2]):
 48 |                 output[n,:,:,f] += filter2D(input1[...,c].copy(), filters[...,c,f].copy())
 49 |     return output
 50 | 
 51 | def padarray(arr, amount):
 52 |     """
 53 |     Pad array by some amount in height and width dimensions.
 54 | 
 55 |     Parameters
 56 |     ----------
 57 |     arr : numpy array, HxW size
 58 |     amount : (int, int) tuple specifying padding amounts in height and width dimensions.  Padding
 59 |         be added on all 4 sides.
 60 |     
 61 |     Returns
 62 |     -------
 63 |     result : numpy array, (H+2*H_pad)x(W+2*W_pad)
 64 | 
 65 |     """
 66 |     padded = np.zeros(arr.shape[0:1] +
 67 |                       (arr.shape[1]+2*amount[0], arr.shape[2]+2*amount[1]) +
 68 |                       arr.shape[3:])
 69 |     padded[:, amount[0]:-amount[0], amount[1]:-amount[1], ...] = arr
 70 |     return padded
 71 | 
 72 | def atleast(arr, ndim=4):
 73 |     """
 74 |     Increase number of dimensions by adding singleton dimensions.
 75 | 
 76 |     Parameters
 77 |     ----------
 78 |     arr : numpy array
 79 |     ndim : desired number of dimensions
 80 | 
 81 |     Returns
 82 |     -------
 83 |     result : numpy array where result.ndim == ndim, size 1 x 1 x 1 ... arr.shape (number of leading
 84 |         ones will depend on ndim).
 85 | 
 86 |     """
 87 |     while arr.ndim < ndim:
 88 |         arr = arr[np.newaxis,...]
 89 |     return arr
 90 | 
 91 | def safe_exp(v):
 92 |     """
 93 |     Safely apply np.exp.  If the input is beyond a "safe" range, it will be clipped and a warning
 94 |     will be issued.
 95 | 
 96 |     """
 97 |     v = np.array(v)
 98 |     if np.any(v > 500):
 99 |         warn('Warning: exp overflowing!', RuntimeWarning)
100 |     return np.exp(np.minimum(v, 500))
101 | 
102 | def safe_log(v):
103 |     """
104 |     Safely apply np.log.  If the input is beyond a "safe" range, it will be clipped and a warning
105 |     will be issued.
106 | 
107 |     """
108 |     v = np.array(v)
109 |     if np.any(v < -300):
110 |         warn('Warning: exp overflowing!', RuntimeWarning)
111 |     return np.log(np.maximum(v, -300))
112 | 
113 | def choice(num, total):
114 |     """
115 |     Returns indecies to (total) randomly selected items out of (num) without replacement.
116 | 
117 |     """
118 |     return np.random.permutation(total)[:num]
119 | 
120 | def imshow(img, ax=None):
121 |     """
122 |     Displays an image, taking care of rescaling it and taking care of gray versus color.
123 | 
124 |     Parameters
125 |     ----------
126 |     img : numpy array, HxW or or HxWx1 or HxWx3 size
127 |     ax : axes object    
128 | 
129 |     """
130 |     if ax is None:
131 |         fig = plt.figure()
132 |         ax = fig.add_subplot(111)
133 |     prm = dict(interpolation='none')
134 |     if img.ndim==2 or img.shape[2]==1:
135 |         prm['cmap']=plt.cm.gray
136 |         if img.ndim > 2:
137 |             img = img[:,:,0]
138 |     img -= img.min()
139 |     img /= img.max()
140 |     ax.imshow(img, **prm)
141 |     ax.get_xaxis().set_visible(False)
142 |     ax.get_yaxis().set_visible(False)
143 | 
144 | def imshows(imgs, num_row=1):
145 |     """
146 |     Display a set of images.
147 |     
148 |     Parameters
149 |     ----------
150 |     imgs : HxWxN or HxWx3xN image stack
151 |     num_row : number of rows in the grid of images
152 |     
153 |     """
154 |     num_imgs = imgs.shape[-1]
155 |     fig = plt.figure()
156 |     num_col = int(np.ceil(float(num_imgs)/num_row))
157 |     
158 |     for i in xrange(num_imgs):
159 |         ax = fig.add_subplot(num_row, num_col, i)
160 |         imshow(imgs[...,i], ax=ax)


--------------------------------------------------------------------------------
/simple_convnet/tests/simple_convnet_tests.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from simple_convnet import convnet as cn
 3 | 
 4 | from scipy.optimize import approx_fprime
 5 | 
 6 | def _check_gradients(layer_args, input_shape):
 7 |     rand = np.random.RandomState(0)
 8 |     net = cn.SoftmaxNet(layer_args=layer_args, input_shape=input_shape, rand_state=rand)
 9 |     x = rand.randn(*(10,)+net.input_shape)/100
10 |     y = rand.randn(10) > 0
11 |     by = net.binarize_labels(y)
12 | 
13 |     g1 = approx_fprime(net.get_params(), net.cost_for_params, 1e-5, x, by)
14 |     g2 = net.param_grad(x, by)
15 |     err = np.max(np.abs(g1-g2))/np.abs(g1).max()
16 |     print err
17 |     assert err < 1e-3, 'incorrect gradient!'
18 | 
19 | def test_dense_layer():
20 |     layer_args = [(cn.DenseLayer, dict(num_nodes=20)), 
21 |                   (cn.DenseLayer, dict(num_nodes=2))]
22 |     _check_gradients(layer_args, (10,))
23 | 
24 | def test_relu_layer():
25 |     layer_args = [(cn.ReluLayer, dict()),
26 |                   (cn.DenseLayer, dict(num_nodes=2))]
27 |     _check_gradients(layer_args, (10,))
28 | 
29 | def test_sigmoid_layer():
30 |     layer_args = [(cn.SigmoidLayer, dict()),
31 |                   (cn.DenseLayer, dict(num_nodes=2))]
32 |     _check_gradients(layer_args, (10,))
33 | 
34 | def test_conv_layer():
35 |     layer_args = [(cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))),
36 |                   (cn.DenseLayer, dict(num_nodes=2))]
37 |     _check_gradients(layer_args, (8,8,3))
38 | 
39 | def test_convbias_layer():
40 |     layer_args = [(cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))),
41 |                   (cn.BiasLayer, dict()),
42 |                   (cn.DenseLayer, dict(num_nodes=2))]
43 |     _check_gradients(layer_args, (8,8,3))
44 | 
45 | def test_pool_layer():
46 |     layer_args = [(cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))),
47 |                   (cn.MeanPoolingLayer, dict(pool_size=2)),
48 |                   (cn.DenseLayer, dict(num_nodes=2))]
49 |     _check_gradients(layer_args, (8,8,3))
50 | 
51 | def test_deep():
52 |     layer_args = [(cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))),
53 |                   (cn.BiasLayer, dict()),
54 |                   (cn.ReluLayer, dict()),
55 |                   (cn.MeanPoolingLayer, dict(pool_size=2)),
56 |                   (cn.ConvLayer, dict(num_filters=5, filter_shape=(3,3))),
57 |                   (cn.BiasLayer, dict()),
58 |                   (cn.SigmoidLayer, dict()),
59 |                   (cn.MeanPoolingLayer, dict(pool_size=2)),
60 |                   (cn.DenseLayer, dict(num_nodes=10)),
61 |                   (cn.BiasLayer, dict()),
62 |                   (cn.DenseLayer, dict(num_nodes=2))]
63 |     _check_gradients(layer_args, (18,18,3))
64 | 
65 | def test_fit():
66 |     layer_args = [(cn.DenseLayer, dict(num_nodes=4)),
67 |                   (cn.DenseLayer, dict(num_nodes=2))]
68 |     net = cn.SoftmaxNet(layer_args=layer_args, input_shape=(2,))
69 | 
70 |     num = 1000
71 |     rand = np.random.RandomState(0)
72 |     x = rand.rand(num,2)
73 |     y = np.zeros(num)
74 |     y[x[:,0]>0.5] = 1
75 | 
76 |     net.fit(x, y, batch_size=16, learn_rate=1, num_epoch=100, verbose=True)
77 |     yp = net.predict(x)
78 |     acc = np.mean(y==yp)
79 |     assert acc > 0.7
80 | 


--------------------------------------------------------------------------------