├── __init__.py
├── mozi
    ├── __init__.py
    ├── layers
    │   ├── __init__.py
    │   ├── embedding.py
    │   ├── preprocessor.py
    │   ├── template.py
    │   ├── misc.py
    │   ├── vae.py
    │   ├── linear.py
    │   ├── noise.py
    │   ├── alexnet.py
    │   ├── activation.py
    │   ├── normalization.py
    │   ├── convolution.py
    │   └── recurrent.py
    ├── utils
    │   ├── __init__.py
    │   ├── cnn_utils.py
    │   ├── theano_utils.py
    │   ├── check_memory.py
    │   ├── train_object_utils.py
    │   ├── progbar.py
    │   ├── mnist_utils.py
    │   ├── image.py
    │   └── utils.py
    ├── datasets
    │   ├── __init__.py
    │   ├── cifar100.py
    │   ├── cifar10.py
    │   ├── mnist.py
    │   ├── dataset_noise.py
    │   ├── imdb.py
    │   ├── voc.py
    │   ├── iterator.py
    │   ├── dataset.py
    │   └── preprocessor.py
    ├── env.py
    ├── cost.py
    ├── weight_init.py
    ├── model.py
    ├── learning_method.py
    ├── log.py
    └── train_object.py
├── .gitignore
├── setup.cfg
├── MANIFEST.in
├── setup.py
├── LICENSE
├── example
    ├── voc_alexnet.py
    ├── mnist_vae.py
    ├── mnist_dae.py
    ├── mnist_mlp.py
    ├── cifar10_cnn.py
    ├── datablocks_example.py
    └── imdb_bilstm.py
├── doc
    ├── vae.md
    ├── dae.md
    └── cnn.md
└── README.md


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mozi/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *pyc
2 | 


--------------------------------------------------------------------------------
/mozi/layers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mozi/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mozi/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md LICENCE
2 | recursive-include mozi *.py
3 | recursive-include doc *.md
4 | 


--------------------------------------------------------------------------------
/mozi/utils/cnn_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def valid(x, y, kernel, stride):
 4 |     return ((x-kernel)/stride + 1, (y-kernel)/stride + 1)
 5 | 
 6 | 
 7 | def full(x, y, kernel, stride):
 8 |     return ((x+kernel)/stride - 1, (y+kernel)/stride - 1)
 9 | 
10 | 
11 | def spp_outdim(input_channels, levels):    
12 |     outdim = 0
13 |     for lvl in levels:
14 |         outdim += lvl**2 * input_channels
15 |     return outdim
16 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from setuptools import find_packages
 3 | 
 4 | 
 5 | setup(
 6 |     name='mozi',
 7 |     version='2.0.3',
 8 |     author=u'Wu Zhen Zhou',
 9 |     author_email='hyciswu@gmail.com',
10 |     install_requires=['numpy>=1.7.1', 'scipy>=0.11',
11 |                       'six>=1.9.0', 'scikit-learn>=0.17', 'pandas>=0.17',
12 |                       'matplotlib>=1.5', 'Theano>=0.8'],
13 |     url='https://github.com/hycis/Mozi',
14 |     license='The MIT License (MIT), see LICENCE',
15 |     description='Deep learning package based on theano for building all kinds of models',
16 |     long_description=open('README.md').read(),
17 |     packages=find_packages(),
18 |     zip_safe=False,
19 |     include_package_data=True
20 | )
21 | 


--------------------------------------------------------------------------------
/mozi/env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def setenv():
 4 |     NNdir = os.path.dirname(os.path.realpath(__file__))
 5 |     NNdir = os.path.dirname(NNdir)
 6 | 
 7 |     # directory to save all the dataset
 8 |     if not os.getenv('MOZI_DATA_PATH'):
 9 |         os.environ['MOZI_DATA_PATH'] = NNdir + '/data'
10 | 
11 |     # directory for saving the database that is used for logging the results
12 |     if not os.getenv('MOZI_DATABASE_PATH'):
13 |         os.environ['MOZI_DATABASE_PATH'] = NNdir + '/database'
14 | 
15 |     # directory to save all the trained models and outputs
16 |     if not os.getenv('MOZI_SAVE_PATH'):
17 |         os.environ['MOZI_SAVE_PATH'] = NNdir + '/save'
18 | 
19 |     print('MOZI_DATA_PATH = ' + os.environ['MOZI_DATA_PATH'])
20 |     print('MOZI_SAVE_PATH = ' + os.environ['MOZI_SAVE_PATH'])
21 |     print('MOZI_DATABASE_PATH = ' + os.environ['MOZI_DATABASE_PATH'])
22 | 


--------------------------------------------------------------------------------
/mozi/utils/theano_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import theano
 4 | import theano.tensor as T
 5 | 
 6 | floatX = theano.config.floatX
 7 | '''
 8 | from keras
 9 | '''
10 | 
11 | def asfloatX(X):
12 |     return np.asarray(X, dtype=floatX)
13 | 
14 | def sharedX(value, dtype=floatX, name=None, borrow=False, **kwargs):
15 |     return theano.shared(np.asarray(value, dtype=dtype), name=name, borrow=borrow, **kwargs)
16 | 
17 | def shared_zeros(shape, dtype=floatX, name=None, **kwargs):
18 |     return sharedX(np.zeros(shape), dtype=dtype, name=name, **kwargs)
19 | 
20 | def shared_scalar(val=0., dtype=floatX, name=None, **kwargs):
21 |     return theano.shared(np.cast[dtype](val), **kwargs)
22 | 
23 | def shared_ones(shape, dtype=floatX, name=None, **kwargs):
24 |     return sharedX(np.ones(shape), dtype=dtype, name=name, **kwargs)
25 | 
26 | def alloc_zeros_matrix(*dims):
27 |     return T.alloc(np.cast[floatX](0.), *dims)
28 | 


--------------------------------------------------------------------------------
/mozi/layers/embedding.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from mozi.layers.template import Template
 3 | from mozi.utils.theano_utils import sharedX
 4 | from mozi.weight_init import UniformWeight
 5 | 
 6 | class Embedding(Template):
 7 |     '''
 8 |         Turn positive integers (indexes) into denses vectors of fixed size.
 9 |         eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]
10 | 
11 |         @input_dim: size of vocabulary (highest input integer + 1)
12 |         @output_dim: size of dense representation
13 |     '''
14 |     def __init__(self, input_dim, output_dim, init=UniformWeight(scale=0.1), weights=None):
15 | 
16 |         self.input_dim = input_dim
17 |         self.output_dim = output_dim
18 |         if weights is None:
19 |             self.W = init((input_dim, output_dim))
20 |         else:
21 |             self.W = sharedX(weights)
22 |         self.params = [self.W]
23 | 
24 |     def _train_fprop(self, state_below):
25 |         state_below = state_below.astype('int32')
26 |         return self.W[state_below]
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Zhenzhou Wu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/mozi/layers/preprocessor.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import theano.tensor as T
 3 | from mozi.layers.template import Template
 4 | 
 5 | class Scale(Template):
 6 | 
 7 |     """
 8 |     Scale the input into a range
 9 | 
10 |     Parameters
11 |     ----------
12 |     X : ndarray, 2-dimensional
13 |         numpy matrix with examples indexed on the first axis and
14 |         features indexed on the second.
15 | 
16 |     global_max : real
17 |         the maximum value of the whole dataset. If not provided, global_max is set to X.max()
18 | 
19 |     global_min : real
20 |         the minimum value of the whole dataset. If not provided, global_min is set to X.min()
21 | 
22 |     scale_range : size 2 list
23 |         set the upper bound and lower bound after scaling
24 | 
25 |     buffer : float
26 |         the buffer on the upper lower bound such that [L+buffer, U-buffer]
27 |     """
28 | 
29 | 
30 |     def __init__(self, global_max, global_min, scale_range=[-1,1], buffer=0.1):
31 | 
32 |         self.scale_range = scale_range
33 |         self.buffer = buffer
34 |         self.max = global_max
35 |         self.min = global_min
36 |         assert scale_range[0] + buffer < scale_range[1] - buffer, \
37 |                 'the lower bound is larger than the upper bound'
38 |         self.params = []
39 | 
40 | 
41 |     def _train_fprop(self, state_below):
42 |         width = self.max - self.min
43 |         scale = (self.scale_range[1] - self.scale_range[0] - 2 * self.buffer) / width
44 |         state_below = scale * (state_below - self.min)
45 |         state_below = state_below + self.scale_range[0] + self.buffer
46 |         return state_below
47 | 


--------------------------------------------------------------------------------
/example/voc_alexnet.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | import theano.tensor as T
 5 | 
 6 | from mozi.datasets.voc import VOC
 7 | from mozi.model import Sequential
 8 | from mozi.layers.alexnet import Alexnet
 9 | from mozi.log import Log
10 | from mozi.train_object import TrainObject
11 | from mozi.cost import error, entropy
12 | from mozi.learning_method import SGD
13 | from mozi.env import setenv
14 | 
15 | import os
16 | import theano
17 | 
18 | 
19 | def train():
20 | 
21 |     data = VOC(batch_size=32, train_valid_test_ratio=[5,1,1])
22 |     model = Sequential(input_var=T.tensor4(), output_var=T.matrix())
23 |     model.add(Alexnet(input_shape=(3,222,222), output_dim=11))
24 |     # build learning method
25 |     learning_method = SGD(learning_rate=0.01, momentum=0.9,
26 |                           lr_decay_factor=0.9, decay_batch=5000)
27 |     # put everything into the train object
28 |     train_object = TrainObject(model = model,
29 |                                log = None,
30 |                                dataset = data,
31 |                                train_cost = error,
32 |                                valid_cost = error,
33 |                                learning_method = learning_method,
34 |                                stop_criteria = {'max_epoch' : 10,
35 |                                                 'epoch_look_back' : 5,
36 |                                                 'percent_decrease' : 0.01}
37 |                                )
38 |     # finally run the code
39 |     train_object.setup()
40 |     train_object.run()
41 | 
42 | if __name__ == '__main__':
43 |     setenv()
44 |     train()
45 | 


--------------------------------------------------------------------------------
/doc/vae.md:
--------------------------------------------------------------------------------
 1 | Variational Autoencoder
 2 | =====
 3 | You can try the Variational Autoencoder [Example](../example/mnist_vae.py) running on Mnist.
 4 | ```python
 5 | import os
 6 | 
 7 | import theano
 8 | import theano.tensor as T
 9 | import numpy as np
10 | 
11 | from mozi.datasets.mnist import Mnist
12 | from mozi.model import Sequential
13 | from mozi.layers.vae import VariationalAutoencoder
14 | from mozi.log import Log
15 | from mozi.train_object import TrainObject
16 | from mozi.cost import SGVB_bin
17 | from mozi.learning_method import SGD
18 | 
19 | # build dataset
20 | data = Mnist(batch_size=100, binary=False, train_valid_test_ratio=[5,1,1])
21 | # for autoencoder, the output will be equal to input
22 | data.set_train(X=data.get_train().X, y=data.get_train().X)
23 | data.set_valid(X=data.get_valid().X, y=data.get_valid().X)
24 | 
25 | # build model
26 | model = Sequential(input_var=T.matrix(), output_var=T.matrix())
27 | model.add(VariationalAutoencoder(input_dim=28*28, bottlenet_dim=200, z_dim=20))
28 | 
29 | # build learning method
30 | learning_method = SGD(learning_rate=0.0001, momentum=0.9,
31 |                           lr_decay_factor=0.9, decay_batch=10000)
32 | 
33 | # put everything into the train object
34 | train_object = TrainObject(model = model,
35 |                            log = None,
36 |                            dataset = data,
37 |                            train_cost = SGVB_bin,
38 |                            valid_cost = SGVB_bin,
39 |                            learning_method = learning_method,
40 |                            stop_criteria = {'max_epoch' : 10,
41 |                                             'epoch_look_back' : 5,
42 |                                             'percent_decrease' : 0.01}
43 |                            )
44 | # finally run the code
45 | train_object.setup()
46 | train_object.run()
47 | ```
48 | 


--------------------------------------------------------------------------------
/mozi/datasets/cifar100.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | logger = logging.getLogger(__name__)
 3 | import os
 4 | import cPickle
 5 | import numpy as np
 6 | import theano
 7 | floatX = theano.config.floatX
 8 | 
 9 | from mozi.utils.utils import get_file, make_one_hot
10 | from mozi.datasets.dataset import SingleBlock
11 | 
12 | class Cifar100(SingleBlock):
13 | 
14 |     def __init__(self, flatten=False, fine_label=True, **kwargs):
15 |         '''
16 |         PARAM:
17 |             fine_label: True (100 classes) False (20 classes)
18 |         '''
19 | 
20 |         im_dir = os.environ['MOZI_DATA_PATH'] + '/cifar100/'
21 |         path = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
22 |         im_dir = get_file(fpath="{}/cifar-100-python.tar.gz".format(im_dir), origin=path, untar=True)
23 | 
24 |         self.img_shape = (3,32,32)
25 |         self.img_size = np.prod(self.img_shape)
26 | 
27 |         fnames = ['train', 'test']
28 | 
29 |         X = []
30 |         y = []
31 |         for fname in fnames:
32 |             data_path = "{}/{}".format(im_dir, fname)
33 |             with open(data_path) as fin:
34 |                 data_batch = cPickle.load(fin)
35 |                 if flatten:
36 |                     X.extend(data_batch['data'].reshape((len(data_batch['data']), self.img_size)))
37 |                 else:
38 |                     X.extend(data_batch['data'].reshape((len(data_batch['data']),)+self.img_shape))
39 |                 if fine_label:
40 |                     y.extend(data_batch['fine_labels'])
41 |                     self.n_classes = 100
42 |                 else:
43 |                     y.extend(data_batch['coarse_labels'])
44 |                     self.n_classes = 20
45 | 
46 |         X_npy = np.array(X, dtype=floatX)
47 |         X_npy /= 255.0
48 |         y_npy = make_one_hot(y, onehot_size=self.n_classes)
49 | 
50 |         super(Cifar100, self).__init__(X=X_npy, y=y_npy, **kwargs)
51 | 


--------------------------------------------------------------------------------
/mozi/datasets/cifar10.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | logger = logging.getLogger(__name__)
 3 | import os
 4 | import cPickle
 5 | import numpy as np
 6 | import theano
 7 | floatX = theano.config.floatX
 8 | 
 9 | from mozi.utils.utils import get_file, make_one_hot
10 | from mozi.datasets.dataset import SingleBlock
11 | 
12 | class Cifar10(SingleBlock):
13 | 
14 |     def __init__(self, flatten=False, **kwargs):
15 | 
16 |         im_dir = os.environ['MOZI_DATA_PATH'] + '/cifar10/'
17 |         path = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
18 |         im_dir = get_file(fpath="{}/cifar-10-python.tar.gz".format(im_dir), origin=path, untar=True)
19 |         self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
20 |                             'dog', 'frog','horse','ship','truck']
21 | 
22 |         self.img_shape = (3,32,32)
23 |         self.img_size = np.prod(self.img_shape)
24 |         self.n_classes = 10
25 |         fnames = ['data_batch_%i' % i for i in range(1,6)] + ['test_batch']
26 | 
27 |         X = []
28 |         y = []
29 |         for fname in fnames:
30 |             data_path = "{}/{}".format(im_dir, fname)
31 |             with open(data_path) as fin:
32 |                 data_batch = cPickle.load(fin)
33 |                 if flatten:
34 |                     X.extend(data_batch['data'].reshape((len(data_batch['data']), self.img_size)))
35 |                 else:
36 |                     X.extend(data_batch['data'].reshape((len(data_batch['data']),)+self.img_shape))
37 |                 y.extend(data_batch['labels'])
38 | 
39 | 
40 |         X_npy = np.array(X, dtype=floatX)
41 |         X_npy /= 255.0
42 |         y_npy = make_one_hot(y, onehot_size=self.n_classes)
43 |         ridx = np.arange(len(X_npy))
44 |         np.random.shuffle(ridx)
45 |         X_npy = X_npy[ridx]
46 |         y_npy = y_npy[ridx]
47 | 
48 |         super(Cifar10, self).__init__(X=X_npy, y=y_npy, **kwargs)
49 | 


--------------------------------------------------------------------------------
/mozi/datasets/mnist.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | logger = logging.getLogger(__name__)
 3 | import os
 4 | import numpy as np
 5 | import theano
 6 | 
 7 | from mozi.utils.mnist_utils import read_mnist_images, read_mnist_labels, get_mnist_file
 8 | from mozi.datasets.dataset import SingleBlock, DataBlocks
 9 | 
10 | 
11 | class Mnist(SingleBlock):
12 | 
13 |     def __init__(self, binary=True, **kwargs):
14 | 
15 |         im_dir = os.environ['MOZI_DATA_PATH'] + '/mnist/'
16 | 
17 |         url = 'http://yann.lecun.com/exdb/mnist'
18 | 
19 |         paths = []
20 |         for fname in ['train-images-idx3-ubyte', 'train-labels-idx1-ubyte',
21 |                       't10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte']:
22 |             path = get_mnist_file('{}/{}'.format(im_dir,fname), origin='{}/{}.gz'.format(url,fname))
23 |             paths.append(path)
24 | 
25 |         train_X = read_mnist_images(paths[0], dtype='float32')
26 |         train_y = read_mnist_labels(paths[1])
27 | 
28 |         test_X = read_mnist_images(paths[2], dtype='float32')
29 |         test_y = read_mnist_labels(paths[3])
30 | 
31 |         train_X = train_X.reshape(train_X.shape[0], train_X.shape[1] * train_X.shape[2])
32 |         test_X = test_X.reshape(test_X.shape[0], test_X.shape[1] * test_X.shape[2])
33 | 
34 |         X = np.concatenate((train_X, test_X), axis=0)
35 | 
36 |         train_y_tmp = np.zeros((train_X.shape[0], 10), dtype=theano.config.floatX)
37 |         test_y_tmp = np.zeros((test_X.shape[0], 10), dtype=theano.config.floatX)
38 | 
39 |         for i in xrange(train_X.shape[0]):
40 |             train_y_tmp[i, train_y[i]] = 1
41 | 
42 |         for i in xrange(test_X.shape[0]):
43 |             test_y_tmp[i, test_y[i]] = 1
44 | 
45 |         train_y = train_y_tmp
46 |         test_y = test_y_tmp
47 | 
48 |         if binary:
49 |             X = (X >= 0.5).astype(int)
50 |         y = np.concatenate((train_y, test_y), axis=0)
51 | 
52 |         super(Mnist, self).__init__(X=X, y=y, **kwargs)
53 | 


--------------------------------------------------------------------------------
/mozi/utils/check_memory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | _proc_status = '/proc/%d/status' % os.getpid()
 3 | 
 4 | _scale = {'kB': 1024.0, 'mB': 1024.0*1024.0,
 5 |           'KB': 1024.0, 'MB': 1024.0*1024.0}
 6 | 
 7 | def _VmB(VmKey):
 8 |     '''Private.
 9 |     '''
10 |     global _proc_status, _scale
11 |      # get pseudo file  /proc/<pid>/status
12 |     try:
13 |         t = open(_proc_status)
14 |         v = t.read()
15 |         t.close()
16 |     except:
17 |         return 0.0  # non-Linux?
18 |      # get VmKey line e.g. 'VmRSS:  9999  kB\n ...'
19 |     i = v.index(VmKey)
20 |     v = v[i:].split(None, 3)  # whitespace
21 |     if len(v) < 3:
22 |         return 0.0  # invalid format?
23 |      # convert Vm value to bytes
24 |     return float(v[1]) * _scale[v[2]]
25 | 
26 | 
27 | def memory(since=0.0):
28 |     '''Return virtual memory usage in bytes.
29 |     '''
30 |     return _VmB('VmSize:') - since
31 | 
32 | def peak_Vm(since=0.0):
33 |     '''Return the peak virtual memory usage in bytes
34 |     '''
35 |     return _VmB('VmPeak:') - since
36 | 
37 | def resident(since=0.0):
38 |     '''Return resident memory usage in bytes.
39 |     '''
40 |     return _VmB('VmRSS:') - since
41 | 
42 | def peak_resident(since=0.0):
43 |     '''Return the peak resident memory usage in bytes.
44 |     '''
45 |     return _VmB('VmHWM:') - since
46 | 
47 | def stacksize(since=0.0):
48 |     '''Return stack size in bytes.
49 |     '''
50 |     return _VmB('VmStk:') - since
51 | 
52 | 
53 | def get_mem_usage():
54 |     denom = 1024 * 1024 * 1024
55 |     unit = 'GB'
56 |     rstr = 'virtual memory: ' + str(memory()/denom) + ' {}\n'.format(unit)
57 |     rstr += 'peak virtual memory: ' + str(peak_Vm()/denom) + ' {}\n'.format(unit)
58 |     rstr += 'resident memory: ' + str(resident()/denom) + ' {}\n'.format(unit)
59 |     rstr += 'peak resident memory: ' + str(peak_resident()/denom) + ' {}\n'.format(unit)
60 |     rstr += 'stacksize: ' + str(resident()/denom) + ' {}\n'.format(unit)
61 | 
62 |     return rstr
63 | 


--------------------------------------------------------------------------------
/mozi/cost.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import theano.tensor as T
 3 | import theano
 4 | from mozi.utils.utils import theano_unique
 5 | from mozi.utils.theano_utils import asfloatX
 6 | 
 7 | floatX = theano.config.floatX
 8 | 
 9 | if floatX == 'float64':
10 |     epsilon = 1.0e-8
11 | else:
12 |     epsilon = 1.0e-6
13 | 
14 | def accuracy(y, y_pred):
15 |     L = T.eq(y_pred.argmax(axis=1), y.argmax(axis=1))
16 |     return T.mean(L)
17 |     # L = T.eq(y_pred.argmax(axis=1), y.argmax(axis=1))
18 |     # return T.sum(L) / y.shape[0].astype(floatX)
19 | 
20 | def mse(y, y_pred):
21 |     return T.mean(T.sqr(y-y_pred))
22 | 
23 | def entropy(y, y_pred):
24 |     y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)
25 |     L = -(y * T.log(y_pred) + (1-y) * T.log(1-y_pred))
26 |     return T.mean(L)
27 |     # L = - T.sum(y * T.log(y_pred) + (1-y) * T.log(1-y_pred), axis=1)
28 |     # return T.mean(L)
29 | 
30 | def error(y, y_pred):
31 |     L = T.neq(y_pred.argmax(axis=1), y.argmax(axis=1))
32 |     return T.mean(L)
33 | 
34 | def recall(y, y_pred):
35 |     L = T.eq(y_pred.argmax(axis=1), y.argmax(axis=1))
36 |     return T.sum(L) / y.shape[0].astype(floatX)
37 | 
38 | def precision(y, y_pred):
39 |     L = T.eq(y_pred.argmax(axis=1), y.argmax(axis=1))
40 |     return T.sum(L) / y_pred.shape[0].astype(floatX)
41 | 
42 | def f1(y, y_pred):
43 |     r = recall(y, y_pred)
44 |     p = precision(y, y_pred)
45 |     return 2 * p * r / (p + r)
46 | 
47 | def hingeloss(y, y_pred):
48 |     y_pred = T.clip(y_pred, 0., 1.0)
49 |     L = T.max(0, 1 - y * y_pred)
50 |     return T.mean(L)
51 | 
52 | def abs(y, y_pred):
53 |     return T.mean(T.abs_(y-y_pred))
54 | 
55 | def SGVB_bin(y, y_pred):
56 |     '''
57 |     This cost function is for variational autoencoder with binary inputs
58 |     '''
59 |     ypred, miu_e, logsig_e = y_pred
60 |     ypred = T.clip(ypred, epsilon, 1.0 - epsilon)
61 |     logpxz = -T.nnet.binary_crossentropy(ypred, y).sum(axis=1)
62 |     L = logpxz + 0.5 * (1 + 2*logsig_e - miu_e**2 - T.exp(2*logsig_e)).sum(axis=1)
63 |     return L.mean()
64 | 


--------------------------------------------------------------------------------
/mozi/layers/template.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class Template(object):
 4 |     """
 5 |     DESCRIPTION:
 6 |         The interface to be implemented by any layer.
 7 |     """
 8 |     def __init__(self):
 9 |         '''
10 |         FIELDS:
11 |             self.params: any params from the layer that needs to be updated
12 |                          by backpropagation can be put inside self.params
13 |             self.updates: use for updating any shared variables
14 |         '''
15 |         self.params = []
16 |         self.updates = []
17 | 
18 |     def _test_fprop(self, state_below):
19 |         '''
20 |         DESCRIPTION:
21 |             This is called during validating/testing of the model.
22 |         PARAM:
23 |             state_below: the input to layer
24 |         '''
25 |         return self._train_fprop(state_below)
26 | 
27 |     def _train_fprop(self, state_below):
28 |         '''
29 |         DESCRIPTION:
30 |             This is called during every training batch whereby the output from the
31 |             model will be used to update the parameters during error backpropagation.
32 |         PARAM:
33 |             state_below: the input to layer
34 |         '''
35 |         raise NotImplementedError()
36 | 
37 | 
38 |     def _layer_stats(self, state_below, layer_output):
39 |         """
40 |         DESCRIPTION:
41 |             Layer stats is used for debugging the layer by allowing user to peek
42 |             at the weight values or the layer output or any parameters of interest
43 |             during training. By computing the values of parameter of interest,
44 |             for example T.max(self.W) and put in the return list, the training will
45 |             print the maximum of the weight in the layer after every epoch.
46 |         PARAM:
47 |             state_below: the input to layer
48 |             layer_output: the output from the layer
49 |         RETURN:
50 |             A list of tuples of [('name_a', var_a), ('name_b', var_b)] whereby var is scalar
51 |         """
52 |         return []
53 | 


--------------------------------------------------------------------------------
/example/mnist_vae.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | 
 4 | import theano
 5 | import theano.tensor as T
 6 | import numpy as np
 7 | 
 8 | from mozi.datasets.mnist import Mnist
 9 | from mozi.model import Sequential
10 | from mozi.layers.vae import VariationalAutoencoder
11 | from mozi.log import Log
12 | from mozi.train_object import TrainObject
13 | from mozi.cost import SGVB_bin
14 | from mozi.learning_method import *
15 | from mozi.weight_init import *
16 | from mozi.env import setenv
17 | 
18 | 
19 | def train():
20 |     """
21 |     This examples implements the variational autoencoder from the paper
22 |     Auto-Encoding Variational Bayes by Diederik P Kingma, Max Welling, arXiv:1312.6114
23 |     """
24 | 
25 |     # build dataset
26 |     data = Mnist(batch_size=100, binary=False, train_valid_test_ratio=[5,1,1])
27 |     # for autoencoder, the output will be equal to input
28 |     data.set_train(X=data.get_train().X, y=data.get_train().X)
29 |     data.set_valid(X=data.get_valid().X, y=data.get_valid().X)
30 | 
31 |     # build model
32 |     model = Sequential(input_var=T.matrix(), output_var=T.matrix())
33 |     model.add(VariationalAutoencoder(input_dim=28*28, bottlenet_dim=200, z_dim=20))
34 | 
35 |     # build learning method
36 |     learning_method = SGD(learning_rate=0.0001, momentum=0.9,
37 |                               lr_decay_factor=0.9, decay_batch=10000)
38 | 
39 |     # put everything into the train object
40 |     train_object = TrainObject(model = model,
41 |                                log = None,
42 |                                dataset = data,
43 |                                train_cost = SGVB_bin,
44 |                                valid_cost = SGVB_bin,
45 |                                learning_method = learning_method,
46 |                                stop_criteria = {'max_epoch' : 10,
47 |                                                 'epoch_look_back' : 5,
48 |                                                 'percent_decrease' : 0.01}
49 |                                )
50 |     # finally run the code
51 |     train_object.setup()
52 |     train_object.run()
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     setenv()
57 |     train()
58 | 


--------------------------------------------------------------------------------
/mozi/layers/misc.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import theano
 3 | import theano.tensor as T
 4 | from mozi.layers.template import Template
 5 | import numpy as np
 6 | 
 7 | class Flatten(Template):
 8 | 
 9 |     def _train_fprop(self, state_below):
10 |         size = T.prod(state_below.shape) / state_below.shape[0]
11 |         nshape = (state_below.shape[0], size)
12 |         return T.reshape(state_below, nshape)
13 | 
14 | 
15 | class Reshape(Template):
16 | 
17 |     def __init__(self, dims):
18 |         self.params = []
19 |         self.dims = dims
20 | 
21 |     def _train_fprop(self, state_below):
22 |         nshape = (state_below.shape[0],) + self.dims
23 |         return T.reshape(state_below, nshape)
24 | 
25 | 
26 | class Transform(Template):
27 | 
28 |     def __init__(self, dims):
29 |         '''
30 |         Reshaping the data such that the first dim alters when the rest of the
31 |         dim is altered. If X of shape (a, b, c, d) and input dims of shape (d, e),
32 |         then return shape will be (a*b*c*d/(d*e), d, e). Useful for tranforming
33 |         data in RNN/LSTM with mlp layers before recurrent layers.
34 |         '''
35 |         self.params = []
36 |         self.dims = dims
37 | 
38 |     def _train_fprop(self, state_below):
39 |         first_dim = T.prod(state_below.shape) / np.prod(self.dims)
40 |         return T.reshape(state_below, (first_dim,)+self.dims)
41 | 
42 | 
43 | class Crop(Template):
44 |     def __init__(self, border):
45 |         self.border = border
46 |         self.params = []
47 |         assert len(self.border) == 2
48 | 
49 |     def _train_fprop(self, state_below):
50 |         w, h = self.border
51 |         return state_below[:,:,h:-h,w:-w]
52 | 
53 | 
54 | class Parallel(Template):
55 | 
56 |     def __init__(self, *models):
57 |         self.models = models
58 |         self.params = []
59 |         for model in self.models:
60 |             for layer in model.layers:
61 |                 self.params += layer.params
62 | 
63 |     def _train_fprop(self, state_below):
64 |         rstate = []
65 |         for model, state in zip(self.models, state_below):
66 |             out, _ = model.train_fprop(state)
67 |             rstate.append(out)
68 |         return rstate
69 | 


--------------------------------------------------------------------------------
/mozi/layers/vae.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import theano
 3 | import theano.tensor as T
 4 | from theano.sandbox.rng_mrg import MRG_RandomStreams
 5 | 
 6 | from mozi.layers.template import Template
 7 | from mozi.weight_init import GaussianWeight
 8 | from mozi.utils.theano_utils import shared_zeros
 9 | 
10 | floatX = theano.config.floatX
11 | theano_rand = MRG_RandomStreams()
12 | 
13 | class VariationalAutoencoder(Template):
14 | 
15 |     def __init__(self, input_dim, bottlenet_dim, z_dim, weight_init=GaussianWeight(mean=0, std=0.01)):
16 | 
17 |         self.input_dim = input_dim
18 |         self.bottlenet_dim = bottlenet_dim
19 | 
20 |         # encoder
21 |         self.W_e = weight_init((input_dim, bottlenet_dim), name='W_e')
22 |         self.b_e = shared_zeros(shape=bottlenet_dim, name='b_e')
23 |         self.W_miu = weight_init((bottlenet_dim, z_dim), name='W_miu')
24 |         self.b_miu = shared_zeros(shape=z_dim, name='b_miu')
25 |         self.W_sig = weight_init((bottlenet_dim, z_dim), name='W_sig')
26 |         self.b_sig = shared_zeros(shape=z_dim, name='b_sig')
27 |         # decoder
28 |         self.W1_d = weight_init((z_dim, bottlenet_dim), name='W1_d')
29 |         self.b1_d = shared_zeros(shape=bottlenet_dim, name='b1_d')
30 |         self.W2_d = weight_init((bottlenet_dim, input_dim), name='W2_d')
31 |         self.b2_d = shared_zeros(shape=input_dim, name='b2_d')
32 | 
33 |         self.params = [self.W_e, self.b_e, self.W_miu, self.b_miu, self.W_sig, self.b_sig,
34 |                        self.W1_d, self.b1_d, self.W2_d, self.b2_d]
35 | 
36 | 
37 |     def _train_fprop(self, state_below):
38 |         h_e = T.tanh(T.dot(state_below, self.W_e) + self.b_e)
39 |         miu_e = T.dot(h_e, self.W_miu) + self.b_miu
40 |         logsig_e = 0.5 * (T.dot(h_e, self.W_sig) + self.b_sig)
41 |         eps = theano_rand.normal(avg=0, std=1, size=logsig_e.shape, dtype=floatX)
42 |         z = miu_e + T.exp(logsig_e) * eps
43 |         h_d = T.tanh(T.dot(z, self.W1_d) + self.b1_d)
44 |         y = T.nnet.sigmoid(T.dot(h_d, self.W2_d) + self.b2_d)
45 |         return y, miu_e, logsig_e
46 | 
47 | 
48 |     def _layer_stats(self, state_below, layer_output):
49 |         y, miu, logsig = layer_output
50 |         return [('W_miu', self.W_miu.mean()),
51 |                 ('W_e', self.W_e.mean()),
52 |                 ('logsig', logsig.mean()),
53 |                 ('ymean,', y.mean()),
54 |                 ('miu', miu.mean())]
55 | 


--------------------------------------------------------------------------------
/mozi/datasets/dataset_noise.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | """
 4 | Functionality : Define the noise that is to be added to the dataset
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | class Noise(object):
10 |     """
11 |     This is an abstract class for applying noise to dataset
12 |     """
13 | 
14 |     def apply(self, X):
15 |         """
16 |         DESCRIPTION:
17 |             This method applies noise to X and return a noisy X
18 |         PARAM:
19 |             X : 2d numpy array of dimension number of examples by number of dimensions
20 |         """
21 |         raise NotImplementedError(str(type(self))+" does not implement an apply method.")
22 | 
23 |     def invert(self, X):
24 |         """
25 |         DESCRIPTION:
26 |             Remove the noise from X
27 |         PARAM:
28 |             X : 2d numpy array of dimension number of examples by number of dimensions
29 |         """
30 |         raise NotImplementedError(str(type(self))+" does not implement an invert method.")
31 | 
32 | 
33 | 
34 | 
35 | class MaskOut(Noise):
36 | 
37 |     """
38 |     This noise masked out a portion of the dimension from each example
39 |     """
40 | 
41 |     def __init__(self, ratio=0.5):
42 |         """
43 |         PARAM:
44 |             ratio : float
45 |                 The portion of the inputs that is masked out
46 |         """
47 |         self.ratio = ratio
48 | 
49 |     def apply(self, X):
50 |         self.noise = np.random.binomial(size=X.shape, n=1, p=(1-self.ratio))
51 |         return X * self.noise
52 | 
53 |     def invert(self, X):
54 |         return X / self.noise
55 | 
56 | 
57 | class Gaussian(Noise):
58 |     """
59 |     Applies gaussian noise to each value of X
60 |     """
61 | 
62 |     def __init__(self, std=0.01, mean=0):
63 |         self.std = std
64 |         self.mean = mean
65 | 
66 |     def apply(self, X):
67 |         return X + np.random.normal(loc=self.mean, scale=self.std, size=X.shape)
68 | 
69 | 
70 | 
71 | class BlackOut(Noise):
72 |     """
73 |     This noise masked out a random example in a dataset,
74 |     adding noise in the time dimension
75 |     """
76 | 
77 |     def __init__(self, ratio=0.5):
78 |         """
79 |         PARAM:
80 |             ratio : float
81 |                 The portion of the examples that is masked out
82 |         """
83 |         self.ratio = ratio
84 | 
85 |     def apply(self, X):
86 |         return X * np.random.binomial(size=(X.shape[0],1), n=1, p=(1-self.ratio))
87 | 


--------------------------------------------------------------------------------
/mozi/datasets/imdb.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import logging
 3 | logger = logging.getLogger(__name__)
 4 | import os
 5 | import cPickle
 6 | import numpy as np
 7 | import theano
 8 | floatX = theano.config.floatX
 9 | 
10 | from mozi.utils.utils import get_file, make_one_hot, pad_sequences
11 | from mozi.datasets.dataset import SingleBlock
12 | 
13 | class IMDB(SingleBlock):
14 | 
15 |     def __init__(self, nb_words=None, skip_top=0, maxlen=None, seed=113,
16 |                  pad_zero=False, start_char=1, oov_char=2, index_from=3, **kwargs):
17 |         '''
18 |         adapted from keras
19 |         '''
20 |         im_dir = os.environ['MOZI_DATA_PATH'] + '/imdb/'
21 |         path = "https://s3.amazonaws.com/text-datasets/imdb.pkl"
22 |         im_dir = get_file(fpath="{}/imdb.pkl".format(im_dir), origin=path, untar=False)
23 |         with open('{}/imdb.pkl'.format(im_dir)) as fin:
24 |             X, labels = np.load(fin)
25 |         np.random.seed(seed)
26 |         np.random.shuffle(X)
27 |         np.random.seed(seed)
28 |         np.random.shuffle(labels)
29 | 
30 |         if start_char is not None:
31 |             X = [[start_char] + [w + index_from for w in x] for x in X]
32 |         elif index_from:
33 |             X = [[w + index_from for w in x] for x in X]
34 | 
35 |         if maxlen:
36 |             new_X = []
37 |             new_labels = []
38 |             for x, y in zip(X, labels):
39 |                 if len(x) < maxlen:
40 |                     new_X.append(x)
41 |                     new_labels.append(y)
42 |             X = new_X
43 |             labels = new_labels
44 | 
45 |         if not nb_words:
46 |             nb_words = max([max(x) for x in X])
47 | 
48 |         # by convention, use 2 as OOV word
49 |         # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
50 |         if oov_char is not None:
51 |             X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X]
52 |         else:
53 |             nX = []
54 |             for x in X:
55 |                 nx = []
56 |                 for w in x:
57 |                     if (w >= nb_words or w < skip_top):
58 |                         nx.append(w)
59 |                 nX.append(nx)
60 |             X = nX
61 | 
62 |         if pad_zero and maxlen:
63 |             X = pad_sequences(X, maxlen=maxlen)
64 |         super(IMDB, self).__init__(X=np.asarray(X), y=np.asarray(labels).reshape((len(labels),1)), **kwargs)
65 | 


--------------------------------------------------------------------------------
/doc/dae.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Denoising Autoencoder
 3 | =====
 4 | You can try the Denoising Autoencoder [Example](../example/mnist_dae.py) running on Mnist. Here we build two hidden layer encoding and decoding layers
 5 | ```python
 6 | from mozi.model import Sequential
 7 | from mozi.layers.linear import Linear
 8 | from mozi.layers.activation import *
 9 | from mozi.layers.noise import Gaussian
10 | import theano.tensor as T
11 | 
12 | # build model
13 | model = Sequential(input_var=T.matrix(), output_var=T.matrix())
14 | # build encoder
15 | model.add(Gaussian())
16 | encode_layer1 = Linear(prev_dim=28*28, this_dim=200)
17 | model.add(encode_layer1)
18 | model.add(RELU())
19 | encode_layer2 = Linear(prev_dim=200, this_dim=50)
20 | model.add(encode_layer2)
21 | model.add(Tanh())
22 | 
23 | # build decoder
24 | decode_layer1 = Linear(prev_dim=50, this_dim=200, W=encode_layer2.W.T)
25 | model.add(decode_layer1)
26 | model.add(RELU())
27 | decode_layer2 = Linear(prev_dim=200, this_dim=28*28, W=encode_layer1.W.T)
28 | model.add(decode_layer2)
29 | model.add(Sigmoid())
30 | ```
31 | Next we prepare the mnist dataset such that the input X and output y is the same
32 | ```python
33 | from mozi.datasets.mnist import Mnist
34 | 
35 | # build dataset
36 | data = Mnist(batch_size=64, train_valid_test_ratio=[5,1,1])
37 | # for autoencoder, the output will be equal to input
38 | data.set_train(X=data.get_train().X, y=data.get_train().X)
39 | data.set_valid(X=data.get_valid().X, y=data.get_valid().X)
40 | 
41 | ```
42 | 
43 | 
44 | Finally build learning method and put everything in train object and run
45 | ```python
46 | from mozi.train_object import TrainObject
47 | from mozi.cost import entropy
48 | from mozi.learning_method import AdaGrad
49 | 
50 | learning_method = AdaGrad(learning_rate=0.01, momentum=0.9,
51 |                           lr_decay_factor=0.9, decay_batch=10000)
52 | 
53 | train_object = TrainObject(model = model,
54 |                            log = None,
55 |                            dataset = data,
56 |                            train_cost = entropy,
57 |                            valid_cost = entropy,
58 |                            learning_method = learning_method,
59 |                            stop_criteria = {'max_epoch' : 10,
60 |                                             'epoch_look_back' : 5,
61 |                                             'percent_decrease' : 0.01}
62 |                            )
63 | # finally run the code
64 | train_object.setup()
65 | train_object.run()
66 | ```
67 | 


--------------------------------------------------------------------------------
/doc/cnn.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Convolution Neural Network
 3 | =====
 4 | You can try the Convolution Neural Network [Example](../example/cifar10_cnn.py) running on Cifar10. Here we build four convolution layers with two fully-connected layers
 5 | ```python
 6 | from mozi.model import Sequential
 7 | from mozi.layers.linear import Linear
 8 | from mozi.layers.noise import Dropout
 9 | from mozi.layers.activation import *
10 | from mozi.layers.convolution import *
11 | from mozi.layers.misc import Flatten
12 | import theano.tensor as T
13 | 
14 | model = Sequential(input_var=T.tensor4(), output_var=T.matrix())
15 | model.add(Convolution2D(input_channels=3, filters=32, kernel_size=(3,3), stride=(1,1), border_mode='full'))
16 | model.add(RELU())
17 | model.add(Convolution2D(input_channels=32, filters=32, kernel_size=(3,3), stride=(1,1)))
18 | model.add(RELU())
19 | model.add(Pooling2D(poolsize=(2, 2), mode='max'))
20 | model.add(Dropout(0.25))
21 | 
22 | model.add(Convolution2D(input_channels=32, filters=64, kernel_size=(3,3), stride=(1,1), border_mode='full'))
23 | model.add(RELU())
24 | model.add(Convolution2D(input_channels=64, filters=64, kernel_size=(3,3), stride=(1,1),))
25 | model.add(RELU())
26 | model.add(Pooling2D(poolsize=(2, 2), mode='max'))
27 | model.add(Dropout(0.25))
28 | 
29 | model.add(Flatten())
30 | model.add(Linear(64*8*8, 512))
31 | model.add(RELU())
32 | model.add(Dropout(0.5))
33 | 
34 | model.add(Linear(512, 10))
35 | model.add(Softmax())
36 | ```
37 | Next build the `Cifar10` dataset, `LearningMethod` and put everything in `TrainObject` and run
38 | ```python
39 | from mozi.datasets.cifar10 import Cifar10
40 | from mozi.train_object import TrainObject
41 | from mozi.cost import error, entropy
42 | from mozi.learning_method import SGD
43 | 
44 | data = Cifar10(batch_size=64, train_valid_test_ratio=[5,1,1])
45 | learning_method = SGD(learning_rate=0.01, momentum=0.9,
46 |                       lr_decay_factor=0.9, decay_batch=5000)
47 | train_object = TrainObject(model = model,
48 |                            log = None,
49 |                            dataset = data,
50 |                            train_cost = entropy,
51 |                            valid_cost = error,
52 |                            learning_method = learning_method,
53 |                            stop_criteria = {'max_epoch' : 10,
54 |                                             'epoch_look_back' : 5,
55 |                                             'percent_decrease' : 0.01}
56 |                            )
57 | train_object.setup()
58 | train_object.run()
59 | ```
60 | 


--------------------------------------------------------------------------------
/example/mnist_dae.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import theano
 4 | import theano.tensor as T
 5 | import numpy as np
 6 | 
 7 | from mozi.datasets.mnist import Mnist
 8 | from mozi.model import Sequential
 9 | from mozi.layers.linear import Linear
10 | from mozi.layers.activation import *
11 | from mozi.layers.noise import Dropout, Gaussian
12 | from mozi.log import Log
13 | from mozi.train_object import TrainObject
14 | from mozi.cost import mse, error, entropy
15 | from mozi.learning_method import *
16 | from mozi.weight_init import *
17 | from mozi.env import setenv
18 | 
19 | from sklearn.metrics import accuracy_score
20 | 
21 | 
22 | def train():
23 | 
24 |     # build dataset
25 |     data = Mnist(batch_size=64, train_valid_test_ratio=[5,1,1])
26 |     # for autoencoder, the output will be equal to input
27 |     data.set_train(X=data.get_train().X, y=data.get_train().X)
28 |     data.set_valid(X=data.get_valid().X, y=data.get_valid().X)
29 | 
30 |     # build model
31 |     model = Sequential(input_var=T.matrix(), output_var=T.matrix())
32 |     # build encoder
33 |     model.add(Gaussian())
34 |     encode_layer1 = Linear(prev_dim=28*28, this_dim=200)
35 |     model.add(encode_layer1)
36 |     model.add(RELU())
37 |     encode_layer2 = Linear(prev_dim=200, this_dim=50)
38 |     model.add(encode_layer2)
39 |     model.add(Tanh())
40 | 
41 |     # build decoder
42 |     decode_layer1 = Linear(prev_dim=50, this_dim=200, W=encode_layer2.W.T)
43 |     model.add(decode_layer1)
44 |     model.add(RELU())
45 |     decode_layer2 = Linear(prev_dim=200, this_dim=28*28, W=encode_layer1.W.T)
46 |     model.add(decode_layer2)
47 |     model.add(Sigmoid())
48 | 
49 |     # build learning method
50 |     learning_method = AdaGrad(learning_rate=0.01, momentum=0.9,
51 |                               lr_decay_factor=0.9, decay_batch=10000)
52 | 
53 |     # put everything into the train object
54 |     train_object = TrainObject(model = model,
55 |                                log = None,
56 |                                dataset = data,
57 |                                train_cost = entropy,
58 |                                valid_cost = entropy,
59 |                                learning_method = learning_method,
60 |                                stop_criteria = {'max_epoch' : 10,
61 |                                                 'epoch_look_back' : 5,
62 |                                                 'percent_decrease' : 0.01}
63 |                                )
64 |     # finally run the code
65 |     train_object.setup()
66 |     train_object.run()
67 | 
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     setenv()
72 |     train()
73 | 


--------------------------------------------------------------------------------
/mozi/weight_init.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import theano
 4 | from mozi.utils.theano_utils import sharedX
 5 | 
 6 | 
 7 | def get_fans(shape):
 8 |     '''From keras'''
 9 |     fan_in = shape[0] if len(shape) == 2 else np.prod(shape[1:])
10 |     fan_out = shape[1] if len(shape) == 2 else shape[0]
11 |     return fan_in, fan_out
12 | 
13 | 
14 | class WeightInitialization(object):
15 | 
16 |     def __call__(self, dim, name='W'):
17 |         raise NotImplementedError(str(type(self))+" does not implement __call__.")
18 | 
19 | 
20 | class GaussianWeight(WeightInitialization):
21 |     def __init__(self, mean=0, std=0.1):
22 |         self.mean = mean
23 |         self.std = std
24 | 
25 |     def __call__(self, dim, name='W', **kwargs):
26 |         W_values = np.random.normal(loc=self.mean, scale=self.std, size=dim)
27 |         return sharedX(name=name, value=W_values, borrow=True, **kwargs)
28 | 
29 | 
30 | class XavierUniformWeight(WeightInitialization):
31 |     def __call__(self, dim, name='W', **kwargs):
32 |         fan_in, fan_out = get_fans(dim)
33 |         W_values = np.random.uniform(low = -4 * np.sqrt(6. / (fan_in + fan_out)),
34 |                                      high = 4 * np.sqrt(6. / (fan_in + fan_out)),
35 |                                      size = dim)
36 |         return sharedX(name=name, value=W_values, borrow=True, **kwargs)
37 | 
38 | 
39 | class UniformWeight(WeightInitialization):
40 |     def __init__(self, scale=0.05):
41 |         self.scale = scale
42 | 
43 |     def __call__(self, dim, name='W', **kwargs):
44 |         W_values = np.random.uniform(low=-self.scale, high=self.scale, size=dim)
45 |         return sharedX(name=name, value=W_values, borrow=True, **kwargs)
46 | 
47 | 
48 | class OrthogonalWeight(WeightInitialization):
49 |     def __init__(self, scale=1.1):
50 |         self.scale = scale
51 | 
52 |     def __call__(self, dim, name='W', **kwargs):
53 |         ''' From Lasagne
54 |         '''
55 |         flat_shape = (dim[0], np.prod(dim[1:]))
56 |         a = np.random.normal(0.0, 1.0, flat_shape)
57 |         u, _, v = np.linalg.svd(a, full_matrices=False)
58 |         # pick the one with the correct shape
59 |         q = u if u.shape == flat_shape else v
60 |         q = q.reshape(dim)
61 |         return sharedX(name=name, value=self.scale * q[:dim[0],:dim[1]], borrow=True, **kwargs)
62 | 
63 | 
64 | class Identity(WeightInitialization):
65 |     def __init__(self, scale=1):
66 |         self.scale = scale
67 | 
68 |     def __call__(self, dim, name='W', **kwargs):
69 |         if len(dim) != 2 or dim[0] != dim[1]:
70 |             raise Exception("Identity matrix initialization can only be used for 2D square matrices")
71 |         else:
72 |             return sharedX(self.scale * np.identity(dim[0]), **kwargs)
73 | 


--------------------------------------------------------------------------------
/mozi/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from mozi.utils.utils import get_file, make_one_hot
 4 | from mozi.datasets.dataset import SingleBlock
 5 | import xml.etree.ElementTree as ET
 6 | import os
 7 | import glob
 8 | from skimage.transform import resize
 9 | from skimage.io import imread
10 | import cPickle
11 | import marshal
12 | import numpy as np
13 | 
14 | class VOC(SingleBlock):
15 | 
16 |     def __init__(self, resized_shape=(222,222,3), **kwargs):
17 |         '''
18 |         using only voc 2012 for actions classification, total 2154 images
19 |         resized_shape is of (height, width, channel)
20 |         '''
21 |         im_dir = os.environ['MOZI_DATA_PATH'] + '/voc'
22 |         path = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar'
23 |         im_dir = get_file(fpath="{}/VOCtrainval_11-May-2012.tar".format(im_dir), origin=path, untar=True)
24 |         actls = ['jumping', 'phoning', 'playinginstrument', 'reading', 'ridingbike',
25 |                    'ridinghorse', 'running', 'takingphoto', 'usingcomputer', 'walking',
26 |                    'other']
27 |         X_path = os.environ['MOZI_DATA_PATH'] + '/voc/X.npy'
28 |         y_path = os.environ['MOZI_DATA_PATH'] + '/voc/y.npy'
29 |         if not os.path.exists(X_path) or not os.path.exists(y_path):
30 |             print X_path + ' does not exists, generating..'
31 |             annote = im_dir + '/VOC2012/Annotations'
32 |             images = im_dir + '/VOC2012/JPEGImages'
33 |             files = glob.glob(annote + '/2012*xml')
34 |             labels = []
35 |             rimage = []
36 |             for f in files:
37 |                 bname = os.path.basename(f).rstrip('.xml')
38 |                 image = imread('{}/{}.jpg'.format(images, bname))
39 |                 rimage.append(resize(image, resized_shape))
40 |                 tree = ET.parse(f)
41 |                 root = tree.getroot()
42 |                 actions = root.find('object').find('actions')
43 | 
44 |                 for act in actions:
45 |                     if act.text == '1':
46 |                         labels.append(actls.index(act.tag))
47 |                         # only restrict to one action per photo
48 |                         break
49 | 
50 |             print 'saving data'
51 |             with open(X_path, 'wb') as Xout, open(y_path, 'wb') as yout:
52 |                 X = np.asarray(rimage)
53 |                 y = np.asarray(labels)
54 |                 np.save(Xout, X)
55 |                 np.save(yout, y)
56 | 
57 |         else:
58 |             print X_path + ' exists, loading..'
59 |             with open(X_path, 'rb') as Xin, open(y_path, 'rb') as yin:
60 |                 X = np.load(Xin)
61 |                 y = np.load(yin)
62 |                 
63 |         super(VOC, self).__init__(X=np.rollaxis(X,3,1), y=make_one_hot(y,len(actls)), **kwargs)
64 | 
65 | # x = VOC()
66 | 


--------------------------------------------------------------------------------
/example/mnist_mlp.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import theano
 4 | import theano.tensor as T
 5 | import numpy as np
 6 | 
 7 | from mozi.datasets.mnist import Mnist
 8 | from mozi.datasets.preprocessor import *
 9 | from mozi.model import Sequential
10 | from mozi.layers.linear import *
11 | from mozi.layers.activation import *
12 | from mozi.layers.noise import Dropout
13 | from mozi.log import Log
14 | from mozi.train_object import TrainObject
15 | from mozi.cost import mse, error
16 | from mozi.learning_method import *
17 | from mozi.weight_init import *
18 | from mozi.env import setenv
19 | 
20 | 
21 | def train():
22 | 
23 |     # build dataset
24 |     batch_size = 64
25 |     data = Mnist(batch_size=batch_size, train_valid_test_ratio=[5,1,1])
26 | 
27 |     # build model
28 |     model = Sequential(input_var=T.matrix(), output_var=T.matrix())
29 |     model.add(Linear(prev_dim=28*28, this_dim=200))
30 |     model.add(RELU())
31 |     model.add(Linear(prev_dim=200, this_dim=100))
32 |     model.add(RELU())
33 |     model.add(Dropout(0.5))
34 |     model.add(Linear(prev_dim=100, this_dim=10))
35 |     model.add(Softmax())
36 | 
37 |     # build learning method
38 |     decay_batch = int(data.train.X.shape[0] * 2 / batch_size)
39 |     learning_method = SGD(learning_rate=0.1, momentum=0.9,
40 |                           lr_decay_factor=0.9, decay_batch=decay_batch)
41 | 
42 |     # Build Logger
43 |     log = Log(experiment_name = 'MLP',
44 |               description = 'This is a tutorial',
45 |               save_outputs = True, # log all the outputs from the screen
46 |               save_model = True, # save the best model
47 |               save_epoch_error = True, # log error at every epoch
48 |               save_to_database = {'name': 'Example.sqlite3',
49 |                                   'records': {'Batch_Size': batch_size,
50 |                                               'Learning_Rate': learning_method.learning_rate,
51 |                                               'Momentum': learning_method.momentum}}
52 |              ) # end log
53 | 
54 |     # put everything into the train object
55 |     train_object = TrainObject(model = model,
56 |                                log = log,
57 |                                dataset = data,
58 |                                train_cost = mse,
59 |                                valid_cost = error,
60 |                                learning_method = learning_method,
61 |                                stop_criteria = {'max_epoch' : 100,
62 |                                                 'epoch_look_back' : 5,
63 |                                                 'percent_decrease' : 0.01}
64 |                                )
65 |     # finally run the code
66 |     train_object.setup()
67 |     train_object.run()
68 | 
69 |     ypred = model.fprop(data.get_test().X)
70 |     ypred = np.argmax(ypred, axis=1)
71 |     y = np.argmax(data.get_test().y, axis=1)
72 |     accuracy = np.equal(ypred, y).astype('f4').sum() / len(y)
73 |     print 'test accuracy:', accuracy
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     setenv()
78 |     train()
79 | 


--------------------------------------------------------------------------------
/mozi/utils/train_object_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import matplotlib
  5 | import theano
  6 | import theano.tensor as T
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | from theano.compile.ops import as_op
 10 | from mozi.utils.progbar import Progbar
 11 | 
 12 | import tarfile, inspect, os
 13 | from six.moves.urllib.request import urlretrieve
 14 | 
 15 | floatX = theano.config.floatX
 16 | 
 17 | def split_list(tuple_list):
 18 |     """
 19 |     DESCRIPTION:
 20 |         split a list of tuples into two lists whereby one list contains the first elements
 21 |         of the tuples and the other list contains the second elements.
 22 |     PARAM:
 23 |         tuple_list: a list of tuples, example tuple_list = [('a', 1), ('b', 2)]
 24 |     RETURN:
 25 |         two lists, example from above tuple_list will be split into ['a', 'b'] and [1, 2]
 26 |     """
 27 |     ls_A = []
 28 |     ls_B = []
 29 | 
 30 |     for tuple in tuple_list:
 31 |         ls_A.append(tuple[0])
 32 |         ls_B.append(tuple[1])
 33 | 
 34 |     return ls_A, ls_B
 35 | 
 36 | 
 37 | def generate_shared_list(ls):
 38 |     """
 39 |     DESCRIPTION:
 40 |         generate a list of shared variables that matched the length of ls
 41 |     PARAM:
 42 |         ls: the list used for generating the shared variables
 43 |     RETURN:
 44 |         a list of shared variables initialized to 0 of len(ls)
 45 |     """
 46 |     rlist = []
 47 | 
 48 |     for i in xrange(len(ls)):
 49 |         rlist.append(theano.shared(np.array(0., dtype=theano.config.floatX)))
 50 | 
 51 |     return rlist
 52 | 
 53 | 
 54 | def merge_lists(ls_A, ls_B):
 55 |     """
 56 |     DESCRIPTION:
 57 |         merge two lists of equal length into into a list of tuples
 58 |     PARAM:
 59 |         ls_A: first list
 60 |         ls_B: second list
 61 |     RETURN:
 62 |         a list of tuples
 63 |     """
 64 | 
 65 |     assert len(ls_A) == len(ls_B), 'two lists of different length'
 66 | 
 67 |     rlist = []
 68 |     for a, b in zip(ls_A, ls_B):
 69 |         rlist.append((a,b))
 70 | 
 71 |     return rlist
 72 | 
 73 | 
 74 | def get_shared_values(shared_ls):
 75 |     """
 76 |     DESCRIPTION:
 77 |         get a list of values from a list of shared variables
 78 |     PARAM:
 79 |         shared_ls: list of shared variables
 80 |     RETURN:
 81 |         numpy array of the list of values
 82 |     """
 83 | 
 84 |     val_ls = []
 85 |     for var in shared_ls:
 86 |         val_ls.append(var.get_value())
 87 | 
 88 |     return np.asarray(val_ls, dtype=theano.config.floatX)
 89 | 
 90 | 
 91 | def is_shared_var(var):
 92 |     return var.__class__.__name__ == 'TensorSharedVariable' or \
 93 |             var.__class__.__name__ == 'CudaNdarraySharedVariable'
 94 | 
 95 | 
 96 | def merge_var(*vars):
 97 |     def absortvar(v):
 98 |         rvar = []
 99 |         if isinstance(v, (list, tuple)):
100 |             rvar += v
101 |         else:
102 |             rvar.append(v)
103 |         return rvar
104 | 
105 |     rvars = []
106 |     for var in vars:
107 |         rvars += absortvar(var)
108 |     return rvars
109 | 


--------------------------------------------------------------------------------
/mozi/model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import theano
 3 | 
 4 | 
 5 | class Model(object):
 6 | 
 7 |     def test_fprop(self, input_state):
 8 |         pass
 9 | 
10 |     def train_fprop(self, input_state):
11 |         pass
12 | 
13 | class Sequential(Model):
14 | 
15 |     def __init__(self, input_var, output_var, verbose=True):
16 |         """
17 |         PARAM:
18 |             input_var (T.vector() | T.matrix() | T.tensor3() | T.tensor4()):
19 |                     The tensor variable input to the model that corresponds to
20 |                     the number of dimensions of the input X of dataset
21 |             input_var (T.vector() | T.matrix() | T.tensor3() | T.tensor4()):
22 |                     The tensor variable output from the model that corresponds to
23 |                     the number of dimensions of the output y of dataset
24 |             verbose (bool):
25 |                     print out the layer stats from each layer if True
26 | 
27 |         """
28 |         self.input_var = input_var
29 |         self.output_var = output_var
30 |         self.layers = []
31 |         self.verbose = verbose
32 | 
33 |     def add(self, layer):
34 |         self.layers.append(layer)
35 | 
36 |     def pop(self, index):
37 |         return self.layers.pop(index)
38 | 
39 |     def test_fprop(self, input_state, layers=None):
40 |         test_layers_stats = []
41 |         if layers is None:
42 |             layers = xrange(len(self.layers))
43 |         for i in layers:
44 |             layer_output = self.layers[i]._test_fprop(input_state)
45 |             stats = []
46 |             if self.verbose:
47 |                 stats = self.layers[i]._layer_stats(input_state, layer_output)
48 |             input_state = layer_output
49 |             class_name = self.layers[i].__class__.__name__
50 |             stats = [(str(i)+'_'+class_name+'_'+a, b) for (a,b) in stats]
51 |             test_layers_stats += stats
52 | 
53 |         return input_state, test_layers_stats
54 | 
55 | 
56 |     def train_fprop(self, input_state, layers=None):
57 |         train_layers_stats = []
58 |         if layers is None:
59 |             layers = xrange(len(self.layers))
60 |         for i in layers:
61 |             layer_output = self.layers[i]._train_fprop(input_state)
62 |             stats = []
63 |             if self.verbose:
64 |                 stats = self.layers[i]._layer_stats(input_state, layer_output)
65 |             input_state = layer_output
66 |             class_name = self.layers[i].__class__.__name__
67 |             stats = [(str(i)+'_'+class_name+'_'+a, b) for (a,b) in stats]
68 |             train_layers_stats += stats
69 | 
70 |         return input_state, train_layers_stats
71 | 
72 | 
73 |     def fprop(self, input_values):
74 |         return self.fprop_layers(input_values)
75 | 
76 | 
77 |     def fprop_layers(self, input_values, layers=None):
78 |         output, stats = self.test_fprop(self.input_var, layers)
79 |         if isinstance(self.input_var, (list, tuple)):
80 |             f = theano.function(self.input_var, output, on_unused_input='warn', allow_input_downcast=True)
81 |         else:
82 |             f = theano.function([self.input_var], output, on_unused_input='warn', allow_input_downcast=True)
83 | 
84 |         if isinstance(input_values, tuple):
85 |             return f(*input_values)
86 |         else:
87 |             return f(input_values)
88 | 
89 | 
90 |     def get_layers(self):
91 |         return self.layers
92 | 


--------------------------------------------------------------------------------
/mozi/utils/progbar.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import time
 4 | import sys
 5 | 
 6 | '''
 7 | from keras
 8 | '''
 9 | 
10 | class Progbar(object):
11 |     def __init__(self, target, width=30, verbose=1):
12 |         '''
13 |             @param target: total number of steps expected
14 |         '''
15 |         self.width = width
16 |         self.target = target
17 |         self.sum_values = {}
18 |         self.unique_values = []
19 |         self.start = time.time()
20 |         self.total_width = 0
21 |         self.seen_so_far = 0
22 |         self.verbose = verbose
23 | 
24 |     def update(self, current, values=[]):
25 |         '''
26 |             @param current: index of current step
27 |             @param values: list of tuples (name, value_for_last_step).
28 |             The progress bar will display averages for these values.
29 |         '''
30 |         for k, v in values:
31 |             if k not in self.sum_values:
32 |                 self.sum_values[k] = [v * (current-self.seen_so_far), current-self.seen_so_far]
33 |                 self.unique_values.append(k)
34 |             else:
35 |                 self.sum_values[k][0] += v * (current-self.seen_so_far)
36 |                 self.sum_values[k][1] += (current-self.seen_so_far)
37 |         self.seen_so_far = current
38 | 
39 |         now = time.time()
40 |         if self.verbose == 1:
41 |             prev_total_width = self.total_width
42 |             sys.stdout.write("\b" * prev_total_width)
43 |             sys.stdout.write("\r")
44 | 
45 |             numdigits = int(np.floor(np.log10(self.target))) + 1
46 |             barstr = '%%%dd/%%%dd [' % (numdigits, numdigits)
47 |             bar = barstr % (current, self.target)
48 |             prog = float(current)/self.target
49 |             if current > self.target:
50 |                 prog = 1
51 |             prog_width = int(self.width*prog)
52 |             if prog_width > 0:
53 |                 bar += ('='*(prog_width-1))
54 |                 if current < self.target:
55 |                     bar += '>'
56 |             bar += ('.'*(self.width-prog_width))
57 |             bar += ']'
58 |             sys.stdout.write(bar)
59 |             self.total_width = len(bar)
60 | 
61 |             if current:
62 |                 time_per_unit = (now - self.start) / current
63 |             else:
64 |                 time_per_unit = 0
65 |             eta = time_per_unit*(self.target - current)
66 |             info = ''
67 |             if current < self.target:
68 |                 info += ' - ETA: %ds' % eta
69 |             else:
70 |                 info += ' - %ds' % (now - self.start)
71 |             for k in self.unique_values:
72 |                 info += ' - %s: %.4f' % (k, self.sum_values[k][0]/ max(1, self.sum_values[k][1]))
73 | 
74 |             self.total_width += len(info)
75 |             if prev_total_width > self.total_width:
76 |                 info += ((prev_total_width-self.total_width) * " ")
77 | 
78 |             sys.stdout.write(info)
79 |             sys.stdout.flush()
80 | 
81 |         if self.verbose == 2:
82 |             if current >= self.target:
83 |                 info = '%ds' % (now - self.start)
84 |                 for k in self.unique_values:
85 |                     info += ' - %s: %.4f' % (k, self.sum_values[k][0]/ max(1, self.sum_values[k][1]))
86 |                 sys.stdout.write(info + "\n")
87 | 
88 | 
89 |     def add(self, n, values=[]):
90 |         self.update(self.seen_so_far+n, values)
91 | 


--------------------------------------------------------------------------------
/mozi/layers/linear.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import theano
 4 | import theano.tensor as T
 5 | from theano.sandbox.rng_mrg import MRG_RandomStreams
 6 | 
 7 | from mozi.utils.theano_utils import shared_zeros
 8 | from mozi.weight_init import GaussianWeight
 9 | from mozi.layers.template import Template
10 | 
11 | floatX = theano.config.floatX
12 | theano_rand = MRG_RandomStreams()
13 | 
14 | 
15 | class Linear(Template):
16 | 
17 |     def __init__(self, prev_dim=None, this_dim=None, W=None, b=None,
18 |                  weight_init=GaussianWeight(mean=0, std=0.1)):
19 |         """
20 |         DESCRIPTION:
21 |             This is a fully connected layer
22 |         PARAM:
23 |             prev_dim(int): dimension of previous layer
24 |             this_dim(int): dimension of this layer
25 |             name(string): name of the layer
26 |             W(tensor variable): Weight of 2D tensor matrix
27 |             b(tensor variable): bias of 2D tensor matrix
28 |             params(list): a list of params in layer that can be updated
29 |         """
30 | 
31 |         self.prev_dim = prev_dim
32 |         self.this_dim = this_dim
33 | 
34 |         self.W = W
35 |         if self.W is None:
36 |             self.W = weight_init((prev_dim, this_dim), name='W')
37 | 
38 |         self.b = b
39 |         if self.b is None:
40 |             self.b = shared_zeros(shape=this_dim, name='b')
41 | 
42 |         self.params = [self.W, self.b]
43 | 
44 |     def _train_fprop(self, state_below):
45 |         """
46 | 		DESCRIPTION:
47 | 			performs linear transform y = dot(W, state_below) + b
48 | 		PARAM:
49 | 			state_below: 1d array of inputs from layer below
50 |         """
51 |         return T.dot(state_below, self.W) + self.b
52 | 
53 | 
54 |     def _layer_stats(self, state_below, layer_output):
55 |         """
56 |         DESCRIPTION:
57 |             This method is called every batch whereby the examples from test or valid set
58 |             is pass through, the final result will be the mean of all the results from all
59 |             the batches in an epoch from the test set or valid set.
60 |         PARAM:
61 |             layer_output: the output from the layer
62 |         RETURN:
63 |             A list of tuples of [('name_a', var_a), ('name_b', var_b)] whereby var is scalar
64 |         """
65 |         w_len = T.sqrt((self.W ** 2).sum(axis=0))
66 |         max_length = T.max(w_len)
67 |         mean_length = T.mean(w_len)
68 |         min_length = T.min(w_len)
69 |         max_output = T.max(layer_output)
70 |         mean_output = T.mean(T.abs_(layer_output))
71 |         min_output = T.min(layer_output)
72 |         max_state = T.max(state_below)
73 |         mean_state = T.mean(T.abs_(state_below))
74 |         min_state = T.min(state_below)
75 | 
76 |         return [('max_W', T.max(self.W)),
77 |                 ('mean_W', T.mean(self.W)),
78 |                 ('min_W', T.min(self.W)),
79 |                 ('max_b', T.max(self.b)),
80 |                 ('mean_b', T.mean(self.b)),
81 |                 ('min_b', T.min(self.b)),
82 |                 ('max_layer_output', max_output),
83 |                 ('mean_layer_output', mean_output),
84 |                 ('min_layer_output', min_output),
85 |                 ('max_col_length', max_length),
86 |                 ('mean_col_length', mean_length),
87 |                 ('min_col_length', min_length),
88 |                 ('max_state_below', max_state),
89 |                 ('mean_state_below', mean_state),
90 |                 ('min_state_below', min_state)]
91 | 


--------------------------------------------------------------------------------
/mozi/layers/noise.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import theano
  3 | import theano.tensor as T
  4 | from theano.sandbox.rng_mrg import MRG_RandomStreams
  5 | 
  6 | from mozi.layers.template import Template
  7 | 
  8 | floatX = theano.config.floatX
  9 | theano_rand = MRG_RandomStreams()
 10 | 
 11 | class Dropout(Template):
 12 | 
 13 |     def __init__(self, dropout_below=0.5):
 14 |         '''
 15 |         PARAMS:
 16 |             dropout_below(float): probability of the inputs from the layer below been masked out
 17 |         '''
 18 |         self.dropout_below = dropout_below
 19 |         self.params = []
 20 | 
 21 | 
 22 |     def _test_fprop(self, state_below):
 23 |         """
 24 |         DESCRIPTION:
 25 |             resize the weight during testing for models trained with dropout.
 26 |             The weight will be resized to W' = self.dropout_below * W
 27 |         """
 28 |         return state_below * (1 - self.dropout_below)
 29 | 
 30 | 
 31 |     def _train_fprop(self, state_below):
 32 |         """
 33 |         DESCRIPTION:
 34 |             Applies dropout to the layer during training
 35 |         """
 36 |         return theano_rand.binomial(size=state_below.shape, n=1,
 37 |                                     p=(1-self.dropout_below),
 38 |                                     dtype=floatX) * state_below
 39 | 
 40 | 
 41 | class MaskOut(Template):
 42 | 
 43 |     """
 44 |     This noise masked out a portion of the dimension from each example
 45 |     """
 46 | 
 47 |     def __init__(self, ratio=0.5):
 48 |         """
 49 |         PARAM:
 50 |             ratio : float
 51 |                 The portion of the inputs that is masked out
 52 |         """
 53 |         self.ratio = ratio
 54 |         self.params = []
 55 | 
 56 |     def _train_fprop(self, state_below):
 57 |         return state_below * theano_rand.binomial(size=state_below.shape, n=1, p=(1-self.ratio), dtype=floatX)
 58 | 
 59 | 
 60 | class Gaussian(Template):
 61 |     """
 62 |     Applies gaussian noise to each value of X
 63 |     """
 64 | 
 65 |     def __init__(self, std=0.1, mean=0):
 66 |         self.std = std
 67 |         self.mean = mean
 68 |         self.params = []
 69 | 
 70 |     def _train_fprop(self, state_below):
 71 |         return state_below + theano_rand.normal(avg=self.mean, std=self.std, size=state_below.shape, dtype=floatX)
 72 | 
 73 | 
 74 | class BlackOut(Template):
 75 |     """
 76 |     This noise masked out a random example in a dataset,
 77 |     adding noise in the time dimension
 78 |     """
 79 | 
 80 |     def __init__(self, ratio=0.5):
 81 |         """
 82 |         PARAM:
 83 |             ratio : float
 84 |                 The portion of the examples that is masked out
 85 |         """
 86 |         self.ratio = ratio
 87 |         self.params = []
 88 | 
 89 |     def _train_fprop(self, state_below):
 90 |         rd = theano_rand.binomial(size=(state_below.shape[0],), n=1, p=(1-self.ratio), dtype=floatX)
 91 |         return state_below * T.shape_padright(rd)
 92 | 
 93 | 
 94 | class BatchOut(Template):
 95 |     """
 96 |     This noise masked out a random batch in an epoch,
 97 |     adding noise in the time dimension
 98 |     """
 99 | 
100 |     def __init__(self, ratio=0.5):
101 |         """
102 |         PARAM:
103 |             ratio : float
104 |                 The portion of the batch that is masked out
105 |         """
106 |         self.ratio = ratio
107 |         self.params = []
108 | 
109 |     def _train_fprop(self, state_below):
110 |         rd = theano_rand.binomial(size=(1,1), n=1, p=(1-self.ratio), dtype=floatX)
111 |         return state_below * T.patternbroadcast(rd, broadcastable=(True, True))
112 | 


--------------------------------------------------------------------------------
/example/cifar10_cnn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | 
 4 | import theano.tensor as T
 5 | 
 6 | from mozi.datasets.cifar10 import Cifar10
 7 | from mozi.model import Sequential
 8 | from mozi.layers.linear import *
 9 | from mozi.layers.noise import Dropout
10 | from mozi.layers.activation import *
11 | from mozi.layers.convolution import *
12 | from mozi.layers.misc import Flatten
13 | from mozi.layers.normalization import *
14 | from mozi.log import Log
15 | from mozi.train_object import TrainObject
16 | from mozi.cost import error, entropy
17 | from mozi.learning_method import *
18 | from mozi.env import setenv
19 | from mozi.utils.cnn_utils import valid, full
20 | 
21 | def train():
22 |     batch_size = 256
23 |     short_memory = 0.9
24 |     learning_rate = 0.005
25 |     data = Cifar10(batch_size=batch_size, train_valid_test_ratio=[4,1,1])
26 |     _, c, h, w = data.train.X.shape
27 | 
28 |     model = Sequential(input_var=T.tensor4(), output_var=T.matrix())
29 |     model.add(Convolution2D(input_channels=c, filters=8, kernel_size=(3,3), stride=(1,1), border_mode='full'))
30 |     h, w = full(h, w, kernel=3, stride=1)
31 |     model.add(BatchNormalization(dim=8, layer_type='conv', short_memory=short_memory))
32 |     model.add(RELU())
33 |     model.add(Convolution2D(input_channels=8, filters=16, kernel_size=(3,3), stride=(1,1), border_mode='valid'))
34 |     h, w = valid(h, w, kernel=3, stride=1)
35 |     model.add(BatchNormalization(dim=16, layer_type='conv', short_memory=short_memory))
36 |     model.add(RELU())
37 |     model.add(Pooling2D(poolsize=(4, 4), stride=(4,4), mode='max'))
38 |     h, w = valid(h, w, kernel=4, stride=4)
39 |     model.add(Flatten())
40 |     model.add(Linear(16*h*w, 512))
41 |     model.add(BatchNormalization(dim=512, layer_type='fc', short_memory=short_memory))
42 |     model.add(RELU())
43 | 
44 |     model.add(Linear(512, 10))
45 |     model.add(Softmax())
46 | 
47 |     # learning_method = RMSprop(learning_rate=learning_rate)
48 |     learning_method = Adam(learning_rate=learning_rate)
49 |     # learning_method = SGD(learning_rate=0.001)
50 | 
51 |     # Build Logger
52 |     log = Log(experiment_name = 'cifar10_cnn_tutorial',
53 |               description = 'This is a tutorial',
54 |               save_outputs = True, # log all the outputs from the screen
55 |               save_model = True, # save the best model
56 |               save_epoch_error = True, # log error at every epoch
57 |               save_to_database = {'name': 'hyperparam.sqlite3',
58 |                                   'records': {'Batch_Size': batch_size,
59 |                                               'Learning_Rate': learning_method.learning_rate}}
60 |              ) # end log
61 | 
62 |     # put everything into the train object
63 |     train_object = TrainObject(model = model,
64 |                                log = log,
65 |                                dataset = data,
66 |                                train_cost = entropy,
67 |                                valid_cost = error,
68 |                                learning_method = learning_method,
69 |                                stop_criteria = {'max_epoch' : 100,
70 |                                                 'epoch_look_back' : 10,
71 |                                                 'percent_decrease' : 0.01}
72 |                                )
73 |     # finally run the code
74 |     train_object.setup()
75 |     train_object.run()
76 | 
77 |     # test the model on test set
78 |     ypred = model.fprop(data.get_test().X)
79 |     ypred = np.argmax(ypred, axis=1)
80 |     y = np.argmax(data.get_test().y, axis=1)
81 |     accuracy = np.equal(ypred, y).astype('f4').sum() / len(y)
82 |     print 'test accuracy:', accuracy
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     setenv()
87 |     train()
88 | 


--------------------------------------------------------------------------------
/mozi/layers/alexnet.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from mozi.layers.activation import RELU, Softmax
 3 | from mozi.layers.normalization import LRN
 4 | from mozi.layers.convolution import Convolution2D, Pooling2D
 5 | from mozi.layers.linear import Linear
 6 | from mozi.layers.noise import Dropout
 7 | from mozi.layers.misc import Flatten
 8 | from mozi.layers.template import Template
 9 | 
10 | 
11 | class Alexnet(Template):
12 | 
13 |     def __init__(self, input_shape, output_dim):
14 |         '''
15 |         FIELDS:
16 |             self.params: any params from the layer that needs to be updated
17 |                          by backpropagation can be put inside self.params
18 |         PARAMS:
19 |             input_shape: tuple
20 |                          shape of the input image with format (channel, height, width)
21 |             output_dim: int
22 |                         the output dimension of the model
23 |         '''
24 |         assert len(input_shape) == 3, 'input_shape must be a tuple or list of dim (channel, height, width)'
25 |         c, h, w = input_shape
26 | 
27 |         valid = lambda x, y, kernel, stride : ((x-kernel)/stride + 1, (y-kernel)/stride + 1)
28 |         full = lambda x, y, kernel, stride : ((x+kernel)/stride - 1, (y+kernel)/stride - 1)
29 | 
30 |         self.layers = []
31 |         self.layers.append(Convolution2D(input_channels=3, filters=96, kernel_size=(11,11),
32 |                                          stride=(4,4), border_mode='valid'))
33 |         nh, nw = valid(h, w, 11, 4)
34 |         self.layers.append(RELU())
35 |         self.layers.append(LRN())
36 |         self.layers.append(Pooling2D(poolsize=(3,3), stride=(2,2), mode='max'))
37 |         nh, nw = valid(nh, nw, 3, 2)
38 |         self.layers.append(Convolution2D(input_channels=96, filters=256, kernel_size=(5,5),
39 |                                          stride=(1,1), border_mode='full'))
40 |         nh, nw = full(nh, nw, 5, 1)
41 |         self.layers.append(RELU())
42 |         self.layers.append(LRN())
43 |         self.layers.append(Pooling2D(poolsize=(3,3), stride=(2,2), mode='max'))
44 |         nh, nw = valid(nh, nw, 3, 2)
45 |         self.layers.append(Convolution2D(input_channels=256, filters=384, kernel_size=(3,3),
46 |                                          stride=(1,1), border_mode='full'))
47 |         nh, nw = full(nh, nw, 3, 1)
48 |         self.layers.append(RELU())
49 |         self.layers.append(Convolution2D(input_channels=384, filters=384, kernel_size=(3,3),
50 |                                          stride=(1,1), border_mode='full'))
51 |         nh, nw = full(nh, nw, 3, 1)
52 |         self.layers.append(RELU())
53 |         self.layers.append(Convolution2D(input_channels=384, filters=256, kernel_size=(3,3),
54 |                                          stride=(1,1), border_mode='full'))
55 |         nh, nw = full(nh, nw, 3, 1)
56 |         self.layers.append(RELU())
57 |         self.layers.append(Pooling2D(poolsize=(3,3), stride=(2,2), mode='max'))
58 |         nh, nw = valid(nh, nw, 3, 2)
59 | 
60 |         self.layers.append(Flatten())
61 |         self.layers.append(Linear(256*nh*nw,4096))
62 |         self.layers.append(RELU())
63 |         self.layers.append(Dropout(0.5))
64 |         self.layers.append(Linear(4096,4096))
65 |         self.layers.append(RELU())
66 |         self.layers.append(Dropout(0.5))
67 |         self.layers.append(Linear(4096,output_dim))
68 |         self.layers.append(Softmax())
69 | 
70 |         self.params = []
71 |         for layer in self.layers:
72 |             self.params += layer.params
73 | 
74 |     def _test_fprop(self, state_below):
75 |         for layer in self.layers:
76 |             state_below = layer._test_fprop(state_below)
77 |         return state_below
78 | 
79 |     def _train_fprop(self, state_below):
80 |         for layer in self.layers:
81 |             state_below = layer._train_fprop(state_below)
82 |         return state_below
83 | 


--------------------------------------------------------------------------------
/example/datablocks_example.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import theano.tensor as T
  4 | 
  5 | from mozi.datasets.cifar10 import Cifar10
  6 | from mozi.model import Sequential
  7 | from mozi.layers.linear import *
  8 | from mozi.layers.noise import Dropout
  9 | from mozi.layers.activation import *
 10 | from mozi.layers.convolution import *
 11 | from mozi.layers.misc import Flatten
 12 | from mozi.log import Log
 13 | from mozi.train_object import TrainObject
 14 | from mozi.cost import error, entropy
 15 | from mozi.learning_method import SGD
 16 | from mozi.datasets.dataset import DataBlocks
 17 | 
 18 | def setenv():
 19 |     NNdir = os.path.dirname(os.path.realpath(__file__))
 20 |     NNdir = os.path.dirname(NNdir)
 21 | 
 22 |     # directory to save all the dataset
 23 |     if not os.getenv('MOZI_DATA_PATH'):
 24 |         os.environ['MOZI_DATA_PATH'] = NNdir + '/data'
 25 | 
 26 |     # directory for saving the database that is used for logging the results
 27 |     if not os.getenv('MOZI_DATABASE_PATH'):
 28 |         os.environ['MOZI_DATABASE_PATH'] = NNdir + '/database'
 29 | 
 30 |     # directory to save all the trained models and outputs
 31 |     if not os.getenv('MOZI_SAVE_PATH'):
 32 |         os.environ['MOZI_SAVE_PATH'] = NNdir + '/save'
 33 | 
 34 |     print('MOZI_DATA_PATH = ' + os.environ['MOZI_DATA_PATH'])
 35 |     print('MOZI_SAVE_PATH = ' + os.environ['MOZI_SAVE_PATH'])
 36 |     print('MOZI_DATABASE_PATH = ' + os.environ['MOZI_DATABASE_PATH'])
 37 | 
 38 | 
 39 | def train():
 40 |     # create a fake dataset
 41 |     X1 = np.random.rand(100000, 1000)
 42 |     y1 = np.random.rand(100000, 10)
 43 |     with open('X1.npy', 'wb') as xin, open('y1.npy', 'wb') as yin:
 44 |         np.save(xin, X1)
 45 |         np.save(yin, y1)
 46 | 
 47 |     X2 = np.random.rand(100000, 1000)
 48 |     y2 = np.random.rand(100000, 10)
 49 |     with open('X2.npy', 'wb') as xin, open('y2.npy', 'wb') as yin:
 50 |         np.save(xin, X2)
 51 |         np.save(yin, y2)
 52 | 
 53 |     X3 = np.random.rand(100000, 1000)
 54 |     y3 = np.random.rand(100000, 10)
 55 |     with open('X3.npy', 'wb') as xin, open('y3.npy', 'wb') as yin:
 56 |         np.save(xin, X3)
 57 |         np.save(yin, y3)
 58 | 
 59 |     # now we can create the data by putting the paths
 60 |     # ('X1.npy', 'y1.npy') and ('X2.npy', 'y2.npy') into DataBlocks
 61 |     data = DataBlocks(data_paths=[('X1.npy', 'y1.npy'), ('X2.npy', 'y2.npy'), ('X3.npy', 'y3.npy')],
 62 |                       batch_size=100, train_valid_test_ratio=[3,2,0], allow_preload=False)
 63 | 
 64 | 
 65 |     model = Sequential(input_var=T.matrix(), output_var=T.matrix())
 66 |     model.add(Linear(prev_dim=1000, this_dim=200))
 67 |     model.add(RELU())
 68 |     model.add(Linear(prev_dim=200, this_dim=100))
 69 |     model.add(RELU())
 70 |     model.add(Dropout(0.5))
 71 |     model.add(Linear(prev_dim=100, this_dim=10))
 72 |     model.add(Softmax())
 73 | 
 74 |     # build learning method
 75 |     learning_method = SGD(learning_rate=0.01, momentum=0.9,
 76 |                           lr_decay_factor=0.9, decay_batch=5000)
 77 | 
 78 |     # put everything into the train object
 79 |     train_object = TrainObject(model = model,
 80 |                                log = None,
 81 |                                dataset = data,
 82 |                                train_cost = entropy,
 83 |                                valid_cost = error,
 84 |                                learning_method = learning_method,
 85 |                                stop_criteria = {'max_epoch' : 10,
 86 |                                                 'epoch_look_back' : 5,
 87 |                                                 'percent_decrease' : 0.01}
 88 |                                )
 89 |     # finally run the code
 90 |     train_object.setup()
 91 |     train_object.run()
 92 | 
 93 |     for X_path, y_path in [('X1.npy', 'y1.npy'), ('X2.npy', 'y2.npy')]:
 94 |         with open(X_path) as Xin, open(y_path) as yin:
 95 |             # test the model on test set
 96 |             ypred = model.fprop(np.load(Xin))
 97 |             ypred = np.argmax(ypred, axis=1)
 98 |             y = np.argmax(np.load(yin), axis=1)
 99 |             accuracy = np.equal(ypred, y).astype('f4').sum() / len(y)
100 |             print 'combined accuracy for blk %s:'%X_path, accuracy
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     setenv()
105 |     train()
106 | 


--------------------------------------------------------------------------------
/mozi/layers/activation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import theano
  3 | import theano.tensor as T
  4 | from theano.sandbox.rng_mrg import MRG_RandomStreams
  5 | from mozi.layers.template import Template
  6 | from mozi.utils.theano_utils import sharedX
  7 | 
  8 | floatX = theano.config.floatX
  9 | theano_rand = MRG_RandomStreams()
 10 | 
 11 | 
 12 | class Sigmoid(Template):
 13 |     def _train_fprop(self, state_below):
 14 |         return T.nnet.sigmoid(state_below)
 15 | 
 16 | 
 17 | class RELU(Template):
 18 |     def _train_fprop(self, state_below):
 19 |         return state_below * (state_below > 0.)
 20 | 
 21 | 
 22 | class PRELU(Template):
 23 |     def __init__(self, dim, alpha=0.2):
 24 |         '''
 25 |         y = wx + b
 26 |         if y > 0 then z = y else z = alpha * y
 27 |         return z
 28 |         alpha: the gradient of the slope which is updated by backpropagation
 29 |         '''
 30 |         self.alpha = sharedX(np.ones(dim) * alpha, name='PRELU_gradient')
 31 |         self.params = [self.alpha]
 32 | 
 33 |     def _test_fprop(self, state_below):
 34 |         return self._train_fprop(state_below)
 35 | 
 36 |     def _train_fprop(self, state_below):
 37 |         return state_below * (state_below >= 0) \
 38 |         + self.alpha * state_below * (state_below < 0)
 39 | 
 40 | 
 41 | class LeakyRELU(Template):
 42 |     def __init__(self, alpha=0.01):
 43 |         self.alpha = sharedX(alpha)
 44 |         self.params = []
 45 | 
 46 |     def _train_fprop(self, state_below):
 47 |         return state_below * (state_below >= 0) \
 48 |         + self.alpha * state_below * (state_below < 0)
 49 | 
 50 | 
 51 | class Noisy_RELU(Template):
 52 |     def __init__(self, sparsity_factor=0.1, threshold_lr=0.01, alpha=0.01, std=0.1, num_batch=10000, **kwargs):
 53 |         '''
 54 |         sparsityFactor: the micro sparsity of signals through each neuron
 55 |         threshold_lr: the learning rate of learning the optimum threshold for each neuron
 56 |                    so that the activeness of the neuron approaches sparsityFactor
 57 |         alpha_range: {start_weight, num_batches, final_weight} for setting the weight on the
 58 |                   contemporary sparsity when calculating the mean sparsity over many batches.
 59 |                   For the start, it will place more weight on the contemporary, but as more
 60 |                   epoch goes through, the weight on contemporary batch should decrease, so
 61 |                   that mean_sparsity will be more stable.
 62 |         std: the standard deviation of the noise
 63 |         '''
 64 |         super(Noisy_RELU, self).__init__(**kwargs)
 65 |         self.sparsity_factor = sparsity_factor
 66 |         self.threshold_lr = threshold_lr
 67 |         self.alpha = alpha
 68 |         self.std = std
 69 |         self.num_batch = num_batch
 70 |         self.threshold = 0.
 71 |         self.activity = 0.
 72 |         self.batch_count = 0
 73 | 
 74 | 
 75 |     def _test_fprop(self, state_below):
 76 |         return output * (output > self.threshold)
 77 | 
 78 |     def _train_fprop(self, state_below):
 79 |         if self.batch_count > self.num_batch:
 80 |             return state_below * (state_below > self.threshold)
 81 | 
 82 |         else:
 83 |             self.batch_count += 1
 84 |             state_below = state_below + theano_rand.normal(size=state_below.shape, std=self.std, dtype=floatX)
 85 |             state_below = state_below * (state_below > self.threshold)
 86 |             activity = theano.mean(state_below > 0, axis=0)
 87 |             self.activity = self.alpha * activity + (1-self.alpha) * self.activity
 88 |             self.threshold += self.threshold_lr * (self.activity - self.sparsity_factor)
 89 |             return state_below * (state_below > self.threshold)
 90 | 
 91 | 
 92 | class Softmax(Template):
 93 |     def _train_fprop(self, state_below):
 94 |         return T.nnet.softmax(state_below)
 95 | 
 96 | 
 97 | class Tanh(Template):
 98 |     def _train_fprop(self, state_below):
 99 |         return T.tanh(state_below)
100 | 
101 | 
102 | class Softplus(Template):
103 |     def _train_fprop(self, state_below):
104 |         return T.nnet.softplus(state_below)
105 | 
106 | 
107 | class ELU(Template):
108 |     def __init__(self, alpha=1.0):
109 |         self.alpha = alpha
110 | 
111 |     def _train_fprop(self, state_below):
112 |         return self.alpha*(T.exp(state_below)-1)*(state_below<0) + state_below*(state_below>=0)
113 | 


--------------------------------------------------------------------------------
/mozi/layers/normalization.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from mozi.layers.template import Template
  3 | from mozi.utils.theano_utils import shared_zeros, sharedX, shared_ones
  4 | from mozi.weight_init import UniformWeight
  5 | import theano.tensor as T
  6 | import theano
  7 | 
  8 | class BatchNormalization(Template):
  9 | 
 10 |     def __init__(self, dim, layer_type, gamma_init=UniformWeight(), short_memory=0.01):
 11 |         '''
 12 |         REFERENCE:
 13 |             Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
 14 |         PARAMS:
 15 |             short_memory: short term memory
 16 |                 y_t is the latest value, the moving average x_tp1 is calculated as
 17 |                 x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term
 18 |                 memory, the more weight is put on contempory.
 19 |             layer_type: fc or conv
 20 |             epsilon:
 21 |                 denominator min value for preventing division by zero in computing std
 22 |             dim: for fc layers, shape is the layer dimension, for conv layers,
 23 |                 shape is the number of feature maps
 24 |         '''
 25 | 
 26 |         assert layer_type in ['fc', 'conv']
 27 |         self.layer_type = layer_type
 28 |         self.epsilon = 1e-6
 29 |         self.dim = dim
 30 |         self.mem = short_memory
 31 | 
 32 |         if self.layer_type == 'fc':
 33 |             input_shape = (1, dim)
 34 |             self.broadcastable = (True, False)
 35 |         elif self.layer_type == 'conv':
 36 |             input_shape = (1, dim, 1, 1)
 37 |             self.broadcastable = (True, False, True, True)
 38 | 
 39 |         self.gamma = gamma_init(input_shape, name='gamma')
 40 |         self.beta = shared_zeros(input_shape, name='beta')
 41 |         self.params = [self.gamma, self.beta]
 42 |         self.moving_mean = 0
 43 |         self.moving_var = 1
 44 | 
 45 |     def _train_fprop(self, state_below):
 46 |         if self.layer_type == 'fc':
 47 |             miu = state_below.mean(axis=0)
 48 |             var = T.mean((state_below - miu)**2, axis=0)
 49 |         elif self.layer_type == 'conv':
 50 |             miu = state_below.mean(axis=(0,2,3), keepdims=True)
 51 |             var = T.mean((state_below - miu)**2, axis=(0,2,3), keepdims=True)
 52 |         self.moving_mean = self.mem * miu + (1-self.mem) * self.moving_mean
 53 |         self.moving_var = self.mem * var + (1-self.mem) * self.moving_var
 54 | 
 55 |         Z = (state_below - self.moving_mean) / T.sqrt(self.moving_var + self.epsilon)
 56 |         gamma = T.patternbroadcast(self.gamma, self.broadcastable)
 57 |         beta = T.patternbroadcast(self.beta, self.broadcastable)
 58 |         return gamma * Z + beta
 59 | 
 60 | 
 61 |     def _test_fprop(self, state_below):
 62 |         Z = (state_below - self.moving_mean) / T.sqrt(self.moving_var + self.epsilon)
 63 |         gamma = T.patternbroadcast(self.gamma, self.broadcastable)
 64 |         beta = T.patternbroadcast(self.beta, self.broadcastable)
 65 |         return gamma * Z + beta
 66 | 
 67 | 
 68 |     def _layer_stats(self, state_below, layer_output):
 69 |         return [('moving_mean', T.mean(self.moving_mean)),
 70 |                 ('moving_std', T.mean(self.moving_var)),
 71 |                 ('gamma_mean', T.mean(self.gamma)),
 72 |                 ('beta_mean', T.mean(self.beta)),
 73 |                 ('gamma_max', T.max(self.gamma))]
 74 | 
 75 | 
 76 | # class LRN(Template):
 77 | #     """
 78 | #     Adapted from pylearn2
 79 | #     Local Response Normalization
 80 | #     """
 81 | #
 82 | #     def __init__(self, n=5, alpha=0.0001, beta=0.75, k=2):
 83 | #         super(LRN, self).__init__()
 84 | #         self.n = n
 85 | #         self.alpha = alpha
 86 | #         self.beta = beta
 87 | #         self.k = k
 88 | #         assert self.n % 2 == 1, 'only odd n is supported'
 89 | #
 90 | #     def _train_fprop(self, state_below):
 91 | #         half = self.n / 2
 92 | #         sq = T.sqr(state_below)
 93 | #         b, ch, r, c = state_below.shape
 94 | #         extra_channels = T.alloc(0., b, ch + 2*half, r, c)
 95 | #         sq = T.set_subtensor(extra_channels[:,half:half+ch,:,:], sq)
 96 | #         scale = self.k
 97 | #
 98 | #         for i in xrange(self.n):
 99 | #             scale += self.alpha * sq[:,i:i+ch,:,:]
100 | #
101 | #         scale = scale ** self.beta
102 | #         return state_below / scale
103 | #
104 | #     def _test_fprop(self, state_below):
105 | #         return self._train_fprop(state_below)
106 | 


--------------------------------------------------------------------------------
/example/imdb_bilstm.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | 
  4 | from mozi.datasets.imdb import IMDB
  5 | from mozi.model import Sequential
  6 | from mozi.layers.linear import Linear
  7 | from mozi.layers.noise import Dropout
  8 | from mozi.layers.activation import RELU, Sigmoid
  9 | from mozi.layers.normalization import BatchNormalization
 10 | from mozi.layers.embedding import Embedding
 11 | from mozi.env import setenv
 12 | from mozi.layers.recurrent import BiLSTM, LSTM
 13 | from mozi.layers.misc import Transform, Flatten, Reshape
 14 | from mozi.learning_method import SGD
 15 | from mozi.log import Log
 16 | from mozi.train_object import TrainObject
 17 | from mozi.cost import mse, error
 18 | import theano.tensor as T
 19 | 
 20 | import cPickle
 21 | import sys
 22 | 
 23 | '''
 24 |     Train a BiDirectionLSTM LSTM on the IMDB sentiment classification task.
 25 |     The dataset is actually too small for LSTM to be of any advantage
 26 |     compared to simpler, much faster methods such as TF-IDF+LogReg.
 27 |     Notes:
 28 |     - RNNs are tricky. Choice of batch size is important,
 29 |     choice of loss and optimizer is critical, etc.
 30 |     Most configurations won't converge.
 31 |     - LSTM loss decrease during training can be quite different
 32 |     from what you see with CNNs/MLPs/etc. It's more or less a sigmoid
 33 |     instead of an inverse exponential.
 34 |     GPU command:
 35 |         THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_lstm.py
 36 |     250s/epoch on GPU (GT 650M), vs. 400s/epoch on CPU (2.4Ghz Core i7).
 37 | '''
 38 | 
 39 | def train():
 40 |     max_features=20000
 41 |     maxseqlen = 100 # cut texts after this number of words (among top max_features most common words)
 42 |     batch_size = 16
 43 |     word_vec_len = 256
 44 |     iter_class = 'SequentialRecurrentIterator'
 45 |     seq_len = 10
 46 | 
 47 |     data = IMDB(pad_zero=True, maxlen=100, nb_words=max_features, batch_size=batch_size,
 48 |                 train_valid_test_ratio=[8,2,0], iter_class=iter_class, seq_len=seq_len)
 49 | 
 50 |     print('Build model...')
 51 |     model = Sequential(input_var=T.matrix(), output_var=T.matrix())
 52 |     model.add(Embedding(max_features, word_vec_len))
 53 | 
 54 |     # MLP layers
 55 |     model.add(Transform((word_vec_len,))) # transform from 3d dimensional input to 2d input for mlp
 56 |     model.add(Linear(word_vec_len, 100))
 57 |     model.add(RELU())
 58 |     model.add(BatchNormalization(dim=100, layer_type='fc'))
 59 |     model.add(Linear(100,100))
 60 |     model.add(RELU())
 61 |     model.add(BatchNormalization(dim=100, layer_type='fc'))
 62 |     model.add(Linear(100, word_vec_len))
 63 |     model.add(RELU())
 64 |     model.add(Transform((maxseqlen, word_vec_len))) # transform back from 2d to 3d for recurrent input
 65 | 
 66 |     # Stacked up BiLSTM layers
 67 |     model.add(BiLSTM(word_vec_len, 50, output_mode='concat', return_sequences=True))
 68 |     model.add(BiLSTM(100, 24, output_mode='sum', return_sequences=True))
 69 |     model.add(LSTM(24, 24, return_sequences=True))
 70 | 
 71 |     # MLP layers
 72 |     model.add(Reshape((24 * maxseqlen,)))
 73 |     model.add(BatchNormalization(dim=24 * maxseqlen, layer_type='fc'))
 74 |     model.add(Linear(24 * maxseqlen, 50))
 75 |     model.add(RELU())
 76 |     model.add(Dropout(0.2))
 77 |     model.add(Linear(50, 1))
 78 |     model.add(Sigmoid())
 79 | 
 80 |     # build learning method
 81 |     decay_batch = int(data.train.X.shape[0] * 5 / batch_size)
 82 |     learning_method = SGD(learning_rate=0.1, momentum=0.9,
 83 |                           lr_decay_factor=1.0, decay_batch=decay_batch)
 84 | 
 85 |     # Build Logger
 86 |     log = Log(experiment_name = 'MLP',
 87 |               description = 'This is a tutorial',
 88 |               save_outputs = True, # log all the outputs from the screen
 89 |               save_model = True, # save the best model
 90 |               save_epoch_error = True, # log error at every epoch
 91 |               save_to_database = {'name': 'Example.sqlite3',
 92 |                                   'records': {'Batch_Size': batch_size,
 93 |                                               'Learning_Rate': learning_method.learning_rate,
 94 |                                               'Momentum': learning_method.momentum}}
 95 |              ) # end log
 96 | 
 97 |     # put everything into the train object
 98 |     train_object = TrainObject(model = model,
 99 |                                log = log,
100 |                                dataset = data,
101 |                                train_cost = mse,
102 |                                valid_cost = error,
103 |                                learning_method = learning_method,
104 |                                stop_criteria = {'max_epoch' : 100,
105 |                                                 'epoch_look_back' : 5,
106 |                                                 'percent_decrease' : 0.01}
107 |                                )
108 |     # finally run the code
109 |     train_object.setup()
110 |     train_object.run()
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     setenv()
115 |     train()
116 | 


--------------------------------------------------------------------------------
/mozi/utils/mnist_utils.py:
--------------------------------------------------------------------------------
  1 | """Low-level utilities for reading in raw MNIST files."""
  2 | 
  3 | __author__ = "David Warde-Farley"
  4 | __copyright__ = "Copyright 2012, Universite de Montreal"
  5 | __credits__ = ["David Warde-Farley"]
  6 | __license__ = "3-clause BSD"
  7 | __email__ = "wardefar@iro"
  8 | __maintainer__ = "David Warde-Farley"
  9 | 
 10 | 
 11 | import struct
 12 | import numpy
 13 | import gzip
 14 | 
 15 | import tarfile, inspect, os
 16 | from six.moves.urllib.request import urlretrieve
 17 | from mozi.utils.progbar import Progbar
 18 | 
 19 | MNIST_IMAGE_MAGIC = 2051
 20 | MNIST_LABEL_MAGIC = 2049
 21 | 
 22 | def get_mnist_file(fpath, origin):
 23 |     datadir = os.path.dirname(fpath)
 24 |     if not os.path.exists(datadir):
 25 |         os.makedirs(datadir)
 26 | 
 27 |     try:
 28 |         f = open(fpath)
 29 |     except:
 30 |         print('Downloading data from',  origin)
 31 | 
 32 |         global progbar
 33 |         progbar = None
 34 |         def dl_progress(count, block_size, total_size):
 35 |             global progbar
 36 |             if progbar is None:
 37 |                 progbar = Progbar(total_size)
 38 |             else:
 39 |                 progbar.update(count*block_size)
 40 | 
 41 |         urlretrieve(origin, fpath + '.gz', dl_progress)
 42 |         progbar = None
 43 | 
 44 |         fin = gzip.open(fpath + '.gz', 'rb')
 45 |         fout = open(fpath, 'wb')
 46 |         fout.write(fin.read())
 47 |         fin.close()
 48 |         fout.close()
 49 | 
 50 |     return fpath
 51 | 
 52 | 
 53 | class open_if_filename(object):
 54 |     def __init__(self, f, mode='r', buffering=-1):
 55 |         self._f = f
 56 |         self._mode = mode
 57 |         self._buffering = buffering
 58 |         self._handle = None
 59 | 
 60 |     def __enter__(self):
 61 |         if isinstance(self._f, basestring):
 62 |             self._handle = open(self._f, self._mode, self._buffering)
 63 |         else:
 64 |             self._handle = self._f
 65 |         return self._handle
 66 | 
 67 |     def __exit__(self, exc_type, exc_value, traceback):
 68 |         if self._handle is not self._f:
 69 |             self._handle.close()
 70 | 
 71 | 
 72 | def read_mnist_images(fn, dtype=None):
 73 |     """
 74 |     Read MNIST images from the original ubyte file format.
 75 | 
 76 |     Parameters
 77 |     ----------
 78 |     fn : str or object
 79 |         Filename/path from which to read labels, or an open file
 80 |         object for the same (will not be closed for you).
 81 | 
 82 |     dtype : str or object, optional
 83 |         A NumPy dtype or string that can be converted to one.
 84 |         If unspecified, images will be returned in their original
 85 |         unsigned byte format.
 86 | 
 87 |     Returns
 88 |     -------
 89 |     images : ndarray, shape (n_images, n_rows, n_cols)
 90 |         An image array, with individual examples indexed along the
 91 |         first axis and the image dimensions along the second and
 92 |         third axis.
 93 | 
 94 |     Notes
 95 |     -----
 96 |     If the dtype provided was boolean, the resulting array will
 97 |     be boolean with `True` if the corresponding pixel had a value
 98 |     greater than or equal to 128, `False otherwise.
 99 | 
100 |     If the dtype provided was a float or complex dtype, the values
101 |     will be mapped to the unit interval [0, 1], with pixel values
102 |     that were 255 in the original unsigned byte representation
103 |     equal to 1.0.
104 |     """
105 |     with open_if_filename(fn, 'rb') as f:
106 |         magic, number, rows, cols = struct.unpack('>iiii', f.read(16))
107 |         if magic != MNIST_IMAGE_MAGIC:
108 |             raise ValueError('wrong magic number reading MNIST image file: ' +
109 |                              fn)
110 |         array = numpy.fromfile(f, dtype='uint8').reshape((number, rows, cols))
111 |     if dtype:
112 |         dtype = numpy.dtype(dtype)
113 |         # If the user wants booleans, threshold at half the range.
114 |         if dtype.kind is 'b':
115 |             array = array >= 128
116 |         else:
117 |             # Otherwise, just convert.
118 |             array = array.astype(dtype)
119 |         # I don't know why you'd ever turn MNIST into complex,
120 |         # but just in case, check for float *or* complex dtypes.
121 |         # Either way, map to the unit interval.
122 |         if dtype.kind in ('f', 'c'):
123 |             array /= 255.
124 |     return array
125 | 
126 | 
127 | def read_mnist_labels(fn):
128 |     """
129 |     Read MNIST labels from the original ubyte file format.
130 | 
131 |     Parameters
132 |     ----------
133 |     fn : str or object
134 |         Filename/path from which to read labels, or an open file
135 |         object for the same (will not be closed for you).
136 | 
137 |     Returns
138 |     -------
139 |     labels : ndarray, shape (nlabels,)
140 |         A one-dimensional unsigned byte array containing the
141 |         labels as integers.
142 |     """
143 |     with open_if_filename(fn, 'rb') as f:
144 |         magic, number = struct.unpack('>ii', f.read(8))
145 |         if magic != MNIST_LABEL_MAGIC:
146 |             raise ValueError('wrong magic number reading MNIST label file: ' +
147 |                              fn)
148 |         array = numpy.fromfile(f, dtype='uint8')
149 |     return array
150 | 


--------------------------------------------------------------------------------
/mozi/utils/image.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | 
  3 | '''
  4 | Adapted from http://deeplearning.net/tutorial/code/utils.py
  5 | '''
  6 | 
  7 | 
  8 | def scale_to_unit_interval(ndar, eps=1e-8):
  9 |     """ Scales all values in the ndarray ndar to be between 0 and 1 """
 10 |     ndar = ndar.copy()
 11 |     ndar -= ndar.min()
 12 |     ndar *= 1.0 / (ndar.max() + eps)
 13 |     return ndar
 14 | 
 15 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
 16 |                        scale_rows_to_unit_interval=True,
 17 |                        output_pixel_vals=True):
 18 |     """
 19 |     Transform an array with one flattened image per row, into an array in
 20 |     which images are reshaped and layed out like tiles on a floor.
 21 | 
 22 |     This function is useful for visualizing datasets whose rows are images,
 23 |     and also columns of matrices for transforming those rows
 24 |     (such as the first layer of a neural net).
 25 | 
 26 |     :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
 27 |     be 2-D ndarrays or None;
 28 |     :param X: a 2-D array in which every row is a flattened image.
 29 | 
 30 |     :type img_shape: tuple; (height, width)
 31 |     :param img_shape: the original shape of each image
 32 | 
 33 |     :type tile_shape: tuple; (rows, cols)
 34 |     :param tile_shape: the number of images to tile (rows, cols)
 35 | 
 36 |     :param output_pixel_vals: if output should be pixel values (i.e. int8
 37 |     values) or floats
 38 | 
 39 |     :param scale_rows_to_unit_interval: if the values need to be scaled before
 40 |     being plotted to [0,1] or not
 41 | 
 42 | 
 43 |     :returns: array suitable for viewing as an image.
 44 |     (See:`PIL.Image.fromarray`.)
 45 |     :rtype: a 2-d array with same dtype as X.
 46 | 
 47 |     """
 48 | 
 49 |     assert len(img_shape) == 2
 50 |     assert len(tile_shape) == 2
 51 |     assert len(tile_spacing) == 2
 52 | 
 53 |     # The expression below can be re-written in a more C style as
 54 |     # follows :
 55 |     #
 56 |     # out_shape    = [0,0]
 57 |     # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
 58 |     #                tile_spacing[0]
 59 |     # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
 60 |     #                tile_spacing[1]
 61 |     out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
 62 |                         in zip(img_shape, tile_shape, tile_spacing)]
 63 | 
 64 |     if isinstance(X, (list, tuple)):
 65 |         assert len(X) == 4
 66 |         # Create an output numpy ndarray to store the image
 67 |         if output_pixel_vals:
 68 |             out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
 69 |                                     dtype='uint8')
 70 |         else:
 71 |             out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
 72 |                                     dtype=X.dtype)
 73 | 
 74 |         #colors default to 0, alpha defaults to 1 (opaque)
 75 |         if output_pixel_vals:
 76 |             channel_defaults = [0, 0, 0, 255]
 77 |         else:
 78 |             channel_defaults = [0., 0., 0., 1.]
 79 | 
 80 |         for i in xrange(4):
 81 |             if X[i] is None:
 82 |                 # if channel is None, fill it with zeros of the correct
 83 |                 # dtype
 84 |                 dt = out_array.dtype
 85 |                 if output_pixel_vals:
 86 |                     dt = 'uint8'
 87 |                 out_array[:, :, i] = numpy.zeros(out_shape,
 88 |                         dtype=dt) + channel_defaults[i]
 89 |             else:
 90 |                 # use a recurrent call to compute the channel and store it
 91 |                 # in the output
 92 |                 out_array[:, :, i] = tile_raster_images(
 93 |                     X[i], img_shape, tile_shape, tile_spacing,
 94 |                     scale_rows_to_unit_interval, output_pixel_vals)
 95 |         return out_array
 96 | 
 97 |     else:
 98 |         # if we are dealing with only one channel
 99 |         H, W = img_shape
100 |         Hs, Ws = tile_spacing
101 | 
102 |         # generate a matrix to store the output
103 |         dt = X.dtype
104 |         if output_pixel_vals:
105 |             dt = 'uint8'
106 |         out_array = numpy.zeros(out_shape, dtype=dt)
107 | 
108 |         for tile_row in xrange(tile_shape[0]):
109 |             for tile_col in xrange(tile_shape[1]):
110 |                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
111 |                     this_x = X[tile_row * tile_shape[1] + tile_col]
112 |                     if scale_rows_to_unit_interval:
113 |                         # if we should scale values to be between 0 and 1
114 |                         # do this by calling the `scale_to_unit_interval`
115 |                         # function
116 |                         this_img = scale_to_unit_interval(
117 |                             this_x.reshape(img_shape))
118 |                     else:
119 |                         this_img = this_x.reshape(img_shape)
120 |                     # add the slice to the corresponding position in the
121 |                     # output array
122 |                     c = 1
123 |                     if output_pixel_vals:
124 |                         c = 255
125 |                     out_array[
126 |                         tile_row * (H + Hs): tile_row * (H + Hs) + H,
127 |                         tile_col * (W + Ws): tile_col * (W + Ws) + W
128 |                         ] = this_img * c
129 |         return out_array
130 | 


--------------------------------------------------------------------------------
/mozi/learning_method.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import theano
  3 | import theano.tensor as T
  4 | from theano.ifelse import ifelse
  5 | floatX = theano.config.floatX
  6 | 
  7 | import numpy as np
  8 | from mozi.utils.theano_utils import sharedX, asfloatX
  9 | 
 10 | class LearningMethod(object):
 11 | 
 12 |     def update(self, deltas, params, gparams):
 13 |         """
 14 |         Return a list of tuples
 15 |         """
 16 |         raise NotImplementedError(str(type(self))+" does not implement delta.")
 17 | 
 18 |     @property
 19 |     def learning_rate(self):
 20 |         return float(self.lr.get_value())
 21 | 
 22 |     @property
 23 |     def momentum(self):
 24 |         return float(self.mom.get_value())
 25 | 
 26 | 
 27 | class DecayLearning(LearningMethod):
 28 | 
 29 |     def __init__(self, lr_decay_factor=1.0, decay_batch=10000):
 30 |         self.batch = sharedX(0)
 31 |         self.decay_batch = sharedX(decay_batch)
 32 |         self.lr_decay_factor = asfloatX(lr_decay_factor)
 33 | 
 34 |     def decay(self):
 35 |         updates = []
 36 |         new_batch = ifelse(T.gt(self.batch, self.decay_batch), sharedX(0), self.batch+1)
 37 |         new_lr = ifelse(T.gt(self.batch, self.decay_batch), self.lr*self.lr_decay_factor, self.lr)
 38 |         updates.append((self.batch, new_batch))
 39 |         updates.append((self.lr, new_lr))
 40 |         return updates
 41 | 
 42 | 
 43 | class SGD(DecayLearning):
 44 | 
 45 |     def __init__(self, learning_rate=0.01, momentum=0.9, **kwargs):
 46 |         super(SGD, self).__init__(**kwargs)
 47 |         self.lr = sharedX(learning_rate)
 48 |         self.mom = sharedX(momentum)
 49 | 
 50 |     def update(self, deltas, params, gparams):
 51 |         updates = []
 52 |         for delta, param, gparam in zip(deltas, params, gparams):
 53 |             updates.append((delta, self.mom * delta - self.lr * gparam))
 54 |             updates.append((param, param+delta))
 55 | 
 56 |         updates += self.decay()
 57 |         return updates
 58 | 
 59 | 
 60 | class AdaGrad(DecayLearning):
 61 | 
 62 |     def __init__(self, learning_rate=0.9, momentum=0., k=1.0, **kwargs):
 63 |         """
 64 |         dx = -learning_rate / sqrt(k + sum(gparam^2)) * gparam
 65 |         ref : Chris Dyer : Notes on AdaGrad
 66 |         """
 67 |         super(AdaGrad, self).__init__(**kwargs)
 68 |         self.lr = sharedX(learning_rate)
 69 |         self.mom = sharedX(momentum)
 70 |         self.k = sharedX(k)
 71 | 
 72 |     def update(self, deltas, params, gparams):
 73 |         updates = []
 74 |         for delta, param, gparam in zip(deltas, params, gparams):
 75 |             eps = theano.shared(self.k.get_value() * np.ones_like(delta.get_value(borrow=True, return_internal_type=True)))
 76 |             updates.append((eps, eps + gparam ** 2))
 77 |             updates.append((delta, self.mom * delta - self.lr * gparam / T.sqrt(eps)))
 78 |             updates.append((param, param+delta))
 79 |         updates += self.decay()
 80 |         return updates
 81 | 
 82 | 
 83 | class AdaDelta(LearningMethod):
 84 | 
 85 |     def __init__(self, eps=1e-6, rho=0.95):
 86 |         """
 87 |         dx_t = -rms(dx_{t-1}) / rms(gparam_t) * gparam_t
 88 |         rms(dx) = sqrt(E_t(dx^2) + eps)
 89 |         E_t(dx^s) = rho E_{t-1}(dx^2) + (1-rho) dx^2
 90 |         ref : Matthew D. Zeiler: ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
 91 |         """
 92 |         self.eps = sharedX(eps)
 93 |         self.rho = sharedX(rho)
 94 | 
 95 |     def update(self, deltas, params, gparams):
 96 |         updates = []
 97 |         for delta, param, gparam in zip(deltas, params, gparams):
 98 |             gparam_mean = theano.shared(np.zeros_like(delta.get_value(borrow=True, return_internal_type=True)))
 99 |             updates.append((gparam_mean, self.rho * gparam_mean + (1-self.rho) * gparam**2))
100 |             delta_mean = theano.shared(np.zeros_like(delta.get_value(borrow=True, return_internal_type=True)))
101 |             updates.append((delta_mean, self.rho * delta_mean + (1-self.rho) * delta**2))
102 |             updates.append((delta, -T.sqrt(delta_mean+self.eps) / T.sqrt(gparam_mean+self.eps) * gparam))
103 |             updates.append((param, param+delta))
104 |         return updates
105 | 
106 | 
107 | class RMSprop(DecayLearning):
108 | 
109 |     def __init__(self, learning_rate=0.01, eps=1e-6, rho=0.9, **kwargs):
110 |         super(RMSprop, self).__init__(**kwargs)
111 |         self.lr = sharedX(learning_rate)
112 |         self.eps = sharedX(eps)
113 |         self.rho = sharedX(rho)
114 | 
115 |     def update(self, deltas, params, gparams):
116 |         updates = []
117 |         for delta, param, gparam in zip(deltas, params, gparams):
118 |             new_delta = self.rho * delta + (1-self.rho) * gparam**2
119 |             new_param = param - self.lr * gparam / T.sqrt(new_delta + self.eps)
120 |             updates.append((delta, new_delta))
121 |             updates.append((param, new_param))
122 |         updates += self.decay()
123 |         return updates
124 | 
125 | 
126 | class Adam(DecayLearning):
127 | 
128 |     def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, eps=1e-8, **kwargs):
129 |         super(Adam, self).__init__(**kwargs)
130 |         self.lr = sharedX(learning_rate)
131 |         self.iter = sharedX(0)
132 |         self.beta_1 = sharedX(beta_1)
133 |         self.beta_2 = sharedX(beta_2)
134 |         self.eps = sharedX(eps)
135 | 
136 |     def update(self, deltas, params, gparams):
137 |         t = self.iter + 1
138 |         lr_t = self.lr * T.sqrt(1-self.beta_2**t)/(1-self.beta_1**t)
139 |         updates = []
140 |         for delta, param, gparam in zip(deltas, params, gparams):
141 |             m = sharedX(param.get_value() * 0.)
142 |             v = sharedX(param.get_value() * 0.)
143 |             m_t = (self.beta_1 * m) + (1 - self.beta_1) * gparam
144 |             v_t = (self.beta_2 * v) + (1 - self.beta_2) * gparam**2
145 |             param_t = param - lr_t * m_t / (T.sqrt(v_t) + self.eps)
146 |             updates.append((m, m_t))
147 |             updates.append((v, v_t))
148 |             updates.append((param, param_t))
149 |         updates += self.decay()
150 |         return updates
151 | 


--------------------------------------------------------------------------------
/mozi/log.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from datetime import datetime
  3 | import os
  4 | import sys
  5 | import logging
  6 | import cPickle
  7 | import sqlite3
  8 | import operator
  9 | import copy
 10 | import numpy as np
 11 | 
 12 | import theano
 13 | from theano.sandbox.cuda.var import CudaNdarraySharedVariable
 14 | 
 15 | floatX = theano.config.floatX
 16 | 
 17 | class Log:
 18 | 
 19 |     def __init__(self, experiment_name="experiment", description=None,
 20 |                 save_outputs=False, save_model=False,
 21 |                 save_epoch_error=False, save_to_database=None, logger=None):
 22 | 
 23 |         self.experiment_name = experiment_name
 24 |         self.description = description
 25 |         self.save_outputs = save_outputs
 26 |         self.save_model = save_model
 27 |         self.save_epoch_error = save_epoch_error
 28 |         self.save_to_database = save_to_database
 29 | 
 30 |         dt = datetime.now()
 31 |         dt = dt.strftime('%Y%m%d_%H%M_%S%f')
 32 | 
 33 |         self.exp_id = experiment_name + '_' + dt
 34 | 
 35 |         if save_outputs or save_model:
 36 |             save_dir = os.environ['MOZI_SAVE_PATH']
 37 |             if not os.path.exists(save_dir):
 38 |                 os.mkdir(save_dir)
 39 | 
 40 |             self.exp_dir = save_dir + '/' + self.exp_id
 41 |             if not os.path.exists(self.exp_dir):
 42 |                 os.mkdir(self.exp_dir)
 43 | 
 44 |         self.logger = logger
 45 |         if self.logger is None:
 46 |             self.logger = logging.getLogger(__name__)
 47 |             self.logger.setLevel(logging.DEBUG)
 48 | 
 49 |         self.logger.info('exp_id: ' + experiment_name)
 50 | 
 51 |         if save_outputs:
 52 |             ch = logging.FileHandler(filename=self.exp_dir+'/outputs.log')
 53 |             ch.setLevel(logging.INFO)
 54 |             formatter = logging.Formatter('%(message)s')
 55 |             ch.setFormatter(formatter)
 56 |             self.logger.addHandler(ch)
 57 | 
 58 |         if save_epoch_error:
 59 |             self.epoch_error_path = self.exp_dir+'/epoch_error.csv'
 60 |             with open(self.epoch_error_path, 'wb') as epoch_file:
 61 |                 epoch_file.write('Epoch,Train_Cost,Valid_Cost,Valid_Error\n')
 62 | 
 63 |         if description is not None:
 64 |             self.logger.info('Description: ' + self.description)
 65 | 
 66 |         if save_to_database:
 67 |             self.first_time_record = True
 68 |             if not os.path.exists(os.environ['MOZI_DATABASE_PATH']):
 69 |                 os.mkdir(os.environ['MOZI_DATABASE_PATH'])
 70 | 
 71 |     def info(self, msg):
 72 |         self.logger.info(msg)
 73 | 
 74 |     def print_records(self):
 75 |         sorted_ls = sorted(self.save_to_database['records'].iteritems(),
 76 |                              key=operator.itemgetter(0))
 77 |         for key, value in sorted_ls:
 78 |             self.info(key + ': ' + str(value))
 79 | 
 80 |     def _log_outputs(self, outputs):
 81 |         dt = datetime.now()
 82 |         dt = dt.strftime('%Y-%m-%d %H:%M')
 83 |         self.logger.info('Time: ' + dt)
 84 | 
 85 |         for (name, val) in outputs:
 86 |             self.logger.info(name + ': ' + str(val))
 87 | 
 88 |         if self.save_outputs:
 89 |             self.logger.info('[ outputs saved to: %s ]\n' %self.exp_id)
 90 | 
 91 |     def _save_model(self, model):
 92 |         with open(self.exp_dir+'/model.pkl', 'wb') as pkl_file:
 93 |             cPickle.dump(model, pkl_file)
 94 | 
 95 |     def _save_epoch_error(self, epoch, train_cost, valid_cost, valid_error):
 96 |         with open(self.epoch_error_path, 'ab') as epoch_file:
 97 |             epoch_file.write('{},{},{},{}\n'.format(epoch, train_cost, valid_cost, valid_error))
 98 | 
 99 |     def _save_to_database(self, epoch, train_cost, valid_error, best_valid_error):
100 |         conn = sqlite3.connect(os.environ['MOZI_DATABASE_PATH'] + '/' + self.save_to_database['name'])
101 |         cur = conn.cursor()
102 | 
103 |         if self.first_time_record:
104 |             query = 'CREATE TABLE IF NOT EXISTS ' + self.experiment_name + \
105 |                     '(exp_id TEXT PRIMARY KEY NOT NULL,'
106 | 
107 |             for k,v in self.save_to_database['records'].items():
108 |                 if type(v) is str:
109 |                     query += k + ' TEXT,'
110 |                 elif type(v) is int:
111 |                     query += k + ' INT,'
112 |                 elif type(v) is float:
113 |                     query += k + ' REAL,'
114 |                 else:
115 |                     try:
116 |                         self.save_to_database['records'][k] = str(v)
117 |                         query += str(k) + ' TEXT,'
118 |                     except:
119 |                         raise Exception("Error: The input types for records '{}' of {}".format(k, type(v))
120 |                                     + " is not primitive types (str, int, float) and not castable as str.")
121 | 
122 |             query += 'epoch INT, train_cost REAL, valid_error REAL, best_valid_error REAL);'
123 | 
124 |             cur.execute(query)
125 | 
126 |             try:
127 |                 query = 'INSERT INTO ' + self.experiment_name + ' VALUES('
128 |                 ls = [self.exp_id]
129 |                 for k, v in self.save_to_database['records'].items():
130 |                     query += '?,'
131 |                     ls.append(v)
132 |                 query += '?,?,?,?,?);'
133 |                 ls.extend([epoch, train_cost, valid_error, best_valid_error])
134 |                 cur.execute(query, ls)
135 |                 self.first_time_record = False
136 | 
137 |             except sqlite3.OperationalError as err:
138 |                 self.logger.error('sqlite3.OperationalError: ' + err.message)
139 |                 self.logger.error('Solution: Change the experiment_name in Log() to a new name, '
140 |                         + 'or drop the same table name from the database. '
141 |                         + 'experiment_name is used as the table name.')
142 |                 raise
143 | 
144 |         else:
145 |             cur.execute('UPDATE ' + self.experiment_name + ' SET ' +
146 |                         'epoch = ?, ' +
147 |                         'train_cost = ?,' +
148 |                         'valid_error = ?,' +
149 |                         'best_valid_error = ?' +
150 |                         "WHERE exp_id='%s'"%self.exp_id,
151 |                         [epoch,
152 |                         train_cost,
153 |                         valid_error,
154 |                         best_valid_error])
155 |         conn.commit()
156 |         conn.close()
157 | 


--------------------------------------------------------------------------------
/mozi/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import matplotlib
  5 | import theano
  6 | import theano.tensor as T
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | from theano.compile.ops import as_op
 10 | from mozi.utils.progbar import Progbar
 11 | from mozi.utils.train_object_utils import is_shared_var
 12 | 
 13 | import tarfile, inspect, os
 14 | from six.moves.urllib.request import urlretrieve
 15 | 
 16 | floatX = theano.config.floatX
 17 | 
 18 | def duplicate_param(name, tensor_list):
 19 | 
 20 |     for param in tensor_list:
 21 |         if param.name is name:
 22 |             return True
 23 | 
 24 |     return False
 25 | 
 26 | 
 27 | def tile_raster_graphs(dct_reconstruct, orig, ae_reconstruct, tile_shape, tile_spacing=(0.1,0.1),
 28 |                         slice=(0,-1), axis=None, legend=True):
 29 |     """
 30 |     DESCRIPTION:
 31 |         compare the original and the reconstructed examples by plot them on the same graph
 32 |     PARAM:
 33 |         orig / ae_reconstruct / dct_reconstruct : 2d numpy array of axis label [example, feature]
 34 |         tile_shape : tuple
 35 |         tile_spacing : tuple
 36 |         slice : index [start:end]
 37 |             gives the range of values in the example to plot
 38 |         axis : list [x_min, x_max, y_min, y_max]
 39 |             sets the bounds of the x and y axis
 40 |     RETURN:
 41 |         matplotlib.plot object
 42 |     """
 43 | 
 44 |     assert orig.shape == ae_reconstruct.shape, 'orig ' + str(orig.shape) + ' and reconstruct ' + \
 45 |         str(ae_reconstruct.shape) + ' shapes are different'
 46 | 
 47 |     # make a little extra space between the subplots
 48 |     plt.subplots_adjust(wspace=tile_spacing[0], hspace=tile_spacing[1])
 49 | 
 50 |     num_examples = orig.shape[0]
 51 |     if num_examples > tile_shape[0] * tile_shape[1]:
 52 |         num_examples = tile_shape[0] * tile_shape[1]
 53 | 
 54 |     for i in xrange(0, num_examples):
 55 |         plt.subplot(tile_shape[0], tile_shape[1], i+1)
 56 |         plt.plot(orig[i][slice[0]:slice[1]], 'b-', label='orig')
 57 |         plt.plot(ae_reconstruct[i][slice[0]:slice[1]], 'g-', label='AE reconstruct')
 58 |         plt.plot(dct_reconstruct[i][slice[0]:slice[1]], 'r-', label='DCT reconstruct')
 59 |         if legend:
 60 |             plt.legend(loc='best')
 61 |         if axis is None:
 62 |             plt.axis('tight')
 63 |         else:
 64 |             plt.axis(axis)
 65 |     return plt
 66 | 
 67 | def make_one_hot(X, onehot_size):
 68 |     """
 69 |     DESCRIPTION:
 70 |         Make a one-hot version of X
 71 |     PARAM:
 72 |         X: 1d numpy with each value in X representing the class of X
 73 |         onehot_size: length of the one hot vector
 74 |     RETURN:
 75 |         2d numpy tensor, with each row been the onehot vector
 76 |     """
 77 | 
 78 |     rX = np.zeros((len(X), onehot_size), dtype=theano.config.floatX)
 79 |     for i in xrange(len(X)):
 80 |         rX[i, X[i]] = 1
 81 | 
 82 |     return rX
 83 | 
 84 | def get_file(fpath, origin, untar=False):
 85 |     datadir = os.path.dirname(fpath)
 86 |     if not os.path.exists(datadir):
 87 |         os.makedirs(datadir)
 88 | 
 89 |     if not os.path.exists(fpath):
 90 |         print('Downloading data from',  origin)
 91 | 
 92 |         global progbar
 93 |         progbar = None
 94 |         def dl_progress(count, block_size, total_size):
 95 |             global progbar
 96 |             if progbar is None:
 97 |                 progbar = Progbar(total_size)
 98 |             else:
 99 |                 progbar.update(count*block_size)
100 | 
101 |         urlretrieve(origin, fpath, dl_progress)
102 |         progbar = None
103 | 
104 |     dirname = ""
105 |     if untar:
106 |         tfile = tarfile.open(fpath, 'r:*')
107 |         names = tfile.getnames()
108 |         dirname = names[0]
109 |         not_exists = [int(not os.path.exists("{}/{}".format(datadir, fname))) for fname in names]
110 |         if sum(not_exists) > 0:
111 |             print('Untaring file...')
112 |             tfile.extractall(path=datadir)
113 |         else:
114 |             print('Files already downloaded and untarred')
115 |         tfile.close()
116 | 
117 |     return "{}/{}".format(datadir, dirname)
118 | 
119 | 
120 | @as_op(itypes=[theano.tensor.fmatrix],
121 |        otypes=[theano.tensor.fmatrix])
122 | def theano_unique(a):
123 |     return numpy.unique(a)
124 | 
125 | 
126 | def get_from_module(identifier, module_params, module_name, instantiate=False):
127 |     if type(identifier) is str:
128 |         res = module_params.get(identifier)
129 |         if not res:
130 |             raise Exception('Invalid ' + str(module_name) + ': ' + str(identifier))
131 |         if instantiate:
132 |             return res()
133 |         else:
134 |             return res
135 |     return identifier
136 | 
137 | 
138 | def make_tuple(*args):
139 |     return args
140 | 
141 | 
142 | def gpu_to_cpu_model(model):
143 |     for layer in model.layers:
144 |         for member, value in layer.__dict__.items():
145 |             if is_shared_var(value):
146 |                 layer.__dict__[member] = T._shared(np.array(value.get_value(), floatX),
147 |                                           name=value.name, borrow=False)
148 |         for i in xrange(len(layer.params)):
149 |             if is_shared_var(layer.params[i]):
150 |                 layer.params[i] = T._shared(np.array(layer.params[i].get_value(), floatX),
151 |                                           name=layer.params[i].name, borrow=False)
152 |     return model
153 | 
154 | 
155 | def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.):
156 |     """
157 |         Pad each sequence to the same length:
158 |         the length of the longuest sequence.
159 | 
160 |         If maxlen is provided, any sequence longer
161 |         than maxlen is truncated to maxlen. Truncation happens off either the beginning (default) or
162 |         the end of the sequence.
163 | 
164 |         Supports post-padding and pre-padding (default).
165 | 
166 |     """
167 |     lengths = [len(s) for s in sequences]
168 | 
169 |     nb_samples = len(sequences)
170 |     if maxlen is None:
171 |         maxlen = np.max(lengths)
172 | 
173 |     x = (np.ones((nb_samples, maxlen)) * value).astype(dtype)
174 |     for idx, s in enumerate(sequences):
175 |         if truncating == 'pre':
176 |             trunc = s[-maxlen:]
177 |         elif truncating == 'post':
178 |             trunc = s[:maxlen]
179 |         else:
180 |             raise ValueError("Truncating type '%s' not understood" % padding)
181 | 
182 |         if padding == 'post':
183 |             x[idx, :len(trunc)] = trunc
184 |         elif padding == 'pre':
185 |             x[idx, -len(trunc):] = trunc
186 |         else:
187 |             raise ValueError("Padding type '%s' not understood" % padding)
188 |     return x
189 | 


--------------------------------------------------------------------------------
/mozi/layers/convolution.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | 
  4 | import numpy as np
  5 | 
  6 | import theano
  7 | from theano.sandbox.cuda.fftconv import conv2d_fft
  8 | import theano.tensor as T
  9 | # from theano.tensor.signal import downsample
 10 | from theano.tensor.signal.pool import pool_2d
 11 | from mozi.weight_init import XavierUniformWeight, GaussianWeight
 12 | from mozi.layers.template import Template
 13 | from mozi.utils.theano_utils import shared_zeros
 14 | # from theano.tensor.nnet.conv import conv2d
 15 | from theano.tensor.nnet import conv2d
 16 | # from theano.tensor.signal.conv import conv2d
 17 | 
 18 | floatX = theano.config.floatX
 19 | 
 20 | class Convolution2D(Template):
 21 |     def __init__(self, input_channels, filters, kernel_size=(3,3), stride=(1,1),
 22 |                  W=None, b=None, weight_init=GaussianWeight(mean=0, std=0.1), border_mode='valid'):
 23 |         '''
 24 |         PARAM:
 25 |             border_mode: (from theano)
 26 |                 valid: only apply filter to complete patches of the image. Generates
 27 |                 output of shape: image_shape - filter_shape + 1
 28 |                 full: zero-pads image to multiple of filter shape to generate output
 29 |                 of shape: image_shape + filter_shape - 1
 30 |         '''
 31 |         self.input_channels = input_channels
 32 |         self.filters = filters
 33 |         self.kernel_size = kernel_size
 34 |         self.stride = stride
 35 |         self.border_mode = border_mode
 36 | 
 37 |         self.W_shape = (self.filters, self.input_channels) + self.kernel_size
 38 |         self.W = W
 39 |         if self.W is None:
 40 |             self.W = weight_init(self.W_shape, name='W')
 41 | 
 42 |         self.b = b
 43 |         if self.b is None:
 44 |             self.b = shared_zeros(shape=(self.filters,), name='b')
 45 | 
 46 |         self.params = [self.W, self.b]
 47 | 
 48 | 
 49 |     def _train_fprop(self, state_below):
 50 |         conv_out = conv2d(state_below, self.W,
 51 |                                                   border_mode=self.border_mode,
 52 |                                                   subsample=self.stride)
 53 |         return conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
 54 | 
 55 | 
 56 |     def _layer_stats(self, state_below, layer_output):
 57 |         w_max = self.W.max()
 58 |         w_min = self.W.min()
 59 |         w_mean = self.W.mean()
 60 |         w_std = self.W.std()
 61 |         return[('filter_max', w_max),
 62 |                ('filter_min', w_min),
 63 |                ('filter_mean', w_mean),
 64 |                ('filter_std', w_std)]
 65 | 
 66 | 
 67 | 
 68 | class Pooling2D(Template):
 69 |     def __init__(self, poolsize=(2, 2), stride=None, padding=(0,0),
 70 |                  ignore_border=True, mode='max'):
 71 |         '''
 72 |         DESCRIPTION:
 73 |             pooling layer
 74 |         PARAM:
 75 |             stride: two-dimensional tuple (a, b), the separation horizontally a
 76 |                 or vertically b between two pools
 77 |             padding: pad zeros to the border of the feature map
 78 |             mode: max | sum | average_inc_pad | average_exc_pad
 79 |             ignore_border:
 80 |         '''
 81 | 
 82 |         self.poolsize = poolsize
 83 |         self.stride = stride
 84 |         self.padding = padding
 85 |         self.ignore_border = ignore_border
 86 |         self.mode = mode
 87 | 
 88 |         self.params = []
 89 | 
 90 | 
 91 |     def _train_fprop(self, state_below):
 92 |         return pool_2d(state_below, ds=self.poolsize, st=self.stride,
 93 |                        padding=self.padding, ignore_border=self.ignore_border,
 94 |                        mode=self.mode)
 95 | 
 96 | 
 97 | class ConvFFT2D(Template):
 98 |     def __init__(self, input_channels, filters, stride, kernel_size=(3,3),
 99 |         W=None, b=None, weight_init=GaussianWeight(mean=0, std=0.1),
100 |         image_shape=None, border_mode='valid', pad_last_dim=False):
101 |         '''
102 |         PARAM:
103 |             border_mode: (from theano)
104 |                 valid: only apply filter to complete patches of the image. Generates
105 |                 output of shape: image_shape - filter_shape + 1
106 |                 full: zero-pads image to multiple of filter shape to generate output
107 |                 of shape: image_shape + filter_shape - 1
108 |         '''
109 |         self.input_channels = input_channels
110 |         self.filters = filters
111 |         self.kernel_size = kernel_size
112 |         self.border_mode = border_mode
113 |         self.image_shape = image_shape
114 |         self.pad_last_dim = pad_last_dim
115 | 
116 |         self.W_shape = (self.filters, self.input_channels) + self.kernel_size
117 |         self.W = W
118 |         if self.W is None:
119 |             self.W = weight_init(self.W_shape, name='W')
120 | 
121 |         self.b = b
122 |         if self.b is None:
123 |             self.b = shared_zeros(shape=(self.filters,), name='b')
124 | 
125 |         self.params = [self.W, self.b]
126 | 
127 | 
128 |     def _train_fprop(self, state_below):
129 |         conv_out = conv2d_fft(state_below, self.W,
130 |                               border_mode=self.border_mode,
131 |                               image_shape=self.image_shape,
132 |                               pad_last_dim=self.pad_last_dim)
133 |         return conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
134 | 
135 | 
136 | class SpatialPyramidPooling(Template):
137 |     def __init__(self, levels=[1,2,3], padding=None, ignore_border=False, mode='max'):
138 |         """
139 |         DESCRIPTION:
140 |             This pooling layer describe the method in SPPnet paper
141 |             Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition
142 |             by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun
143 |         PARAM:
144 |             num_levels (list):
145 |                 levels in SPP, example [1,2,3]
146 |             ignore_border (bool):
147 |                 if x = 5, stride = 2
148 |                 True: return 3 = ceil(x / stride)
149 |                 False: return 2 = floor(x / stride)
150 |         """
151 | 
152 |         self.levels = levels
153 |         self.padding = padding
154 |         self.ignore_border = ignore_border
155 |         self.mode = mode
156 | 
157 |         self.params = []
158 | 
159 | 
160 |     def _train_fprop(self, state_below):
161 |         b, c, h, w = state_below.shape
162 |         layer_out = []
163 |         for i in self.levels:
164 |             out = downsample.max_pool_2d(state_below, ds=(h/i, w/i),
165 |                                          st=(h/i, w/i),
166 |                                          padding=self.padding,
167 |                                          ignore_border=self.ignore_border,
168 |                                          mode=self.mode)
169 | 
170 |             # theano.scan(downsample.max_pool_2d, sequences=[self.levels])
171 |             layer_out.append(out.reshape((b, T.prod(out.shape)/b)))
172 | 
173 |         return T.concat(layer_out, axis=1)
174 | 


--------------------------------------------------------------------------------
/mozi/datasets/iterator.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # from __future__ import division
  3 | import warnings
  4 | import numpy
  5 | np = numpy
  6 | from theano import config
  7 | 
  8 | 
  9 | class SubsetIterator(object):
 10 |     def __init__(self, dataset_size, batch_size=64, num_batches=None, rng=None):
 11 |         """
 12 |             rng: either a seed value for a numpy RandomState or
 13 |             numpy RandomState workalike
 14 |         """
 15 |         self.dataset_size = dataset_size
 16 |         self.batch_size = batch_size
 17 |         self.num_batches = num_batches
 18 |         self.rng = rng
 19 |         self.idx = 0
 20 | 
 21 |     def next(self):
 22 |         raise NotImplementedError()
 23 | 
 24 |     def __iter__(self):
 25 |         self.idx = 0
 26 |         return self
 27 | 
 28 |     # Class-level attributes that might hint the behaviour of
 29 |     # FiniteDatasetIterator.
 30 | 
 31 |     # Does this return subsets that need fancy indexing? (i.e. lists
 32 |     # of indices)
 33 |     fancy = False
 34 | 
 35 |     # Does this class make use of random number generators?
 36 |     stochastic = False
 37 | 
 38 |     @property
 39 |     def num_examples(self):
 40 |         return self.batch_size * self.num_batches
 41 | 
 42 |     @property
 43 |     def uneven(self):
 44 |         return False
 45 | 
 46 | 
 47 | class SequentialSubsetIterator(SubsetIterator):
 48 |     def __init__(self, dataset_size, batch_size, num_batches=None, rng=None):
 49 |         if rng is not None:
 50 |             raise ValueError("non-None rng argument not supported for "
 51 |                              "sequential batch iteration")
 52 |         assert num_batches is None or num_batches >= 0
 53 |         if batch_size is None:
 54 |             if num_batches is not None:
 55 |                 batch_size = int(numpy.ceil(dataset_size / num_batches))
 56 |             else:
 57 |                 raise ValueError("need one of batch_size, num_batches "
 58 |                                  "for sequential batch iteration")
 59 |         elif batch_size is not None:
 60 |             if num_batches is not None:
 61 |                 max_num_batches = numpy.ceil(dataset_size / batch_size)
 62 |                 if num_batches > max_num_batches:
 63 |                     raise ValueError("dataset of %d examples can only provide "
 64 |                                      "%d batches with batch_size %d, but %d "
 65 |                                      "batches were requested" %
 66 |                                      (dataset_size, max_num_batches,
 67 |                                       batch_size, num_batches))
 68 |             else:
 69 |                 num_batches = numpy.ceil(dataset_size / float(batch_size))
 70 |         self.next_batch_no = 0
 71 |         self.batch = 0
 72 |         super(SequentialSubsetIterator, self).__init__(dataset_size, batch_size, num_batches)
 73 |         self.idx = 0
 74 |         self.indices = np.arange(self.dataset_size)
 75 | 
 76 | 
 77 |     def next(self):
 78 |         if self.batch >= self.num_batches or self.idx >= self.dataset_size:
 79 |             raise StopIteration()
 80 | 
 81 |         # this fix the problem where dataset_size % batch_size != 0
 82 |         elif (self.idx + self.batch_size) > self.dataset_size:
 83 |             self.last = self.indices[self.idx : self.dataset_size]
 84 |             self.idx = self.dataset_size
 85 |             return self.last
 86 | 
 87 |         else:
 88 |             self.last = self.indices[self.idx : self.idx + self.batch_size]
 89 |             self.idx += self.batch_size
 90 |             self.batch += 1
 91 |             return self.last
 92 | 
 93 |     fancy = False
 94 |     stochastic = False
 95 | 
 96 |     @property
 97 |     def num_examples(self):
 98 |         product = self.batch_size * self.num_batches
 99 |         return min(product, self.dataset_size)
100 | 
101 |     @property
102 |     def uneven(self):
103 |         return self.batch_size * self.num_batches > self.dataset_size
104 | 
105 | 
106 | class ShuffledSequentialSubsetIterator(SequentialSubsetIterator):
107 | 
108 |     stochastic = True
109 |     fancy = True
110 | 
111 |     def __init__(self, dataset_size, batch_size, num_batches=None, rng=None):
112 |         super(ShuffledSequentialSubsetIterator, self).__init__(
113 |             dataset_size,
114 |             batch_size,
115 |             num_batches,
116 |             None
117 |         )
118 |         self.idx = 0
119 |         self.indices = np.arange(self.dataset_size)
120 |         if rng is not None and hasattr(rng, 'random_integers'):
121 |             self.rng = rng
122 |         else:
123 |             self.rng = numpy.random.RandomState(rng)
124 |         self.shuffled = numpy.arange(self.dataset_size)
125 |         self.rng.shuffle(self.shuffled)
126 | 
127 |     def next(self):
128 |         if self.batch >= self.num_batches or self.idx >= self.dataset_size:
129 |             raise StopIteration()
130 | 
131 |         # this fix the problem where dataset_size % batch_size != 0
132 |         elif (self.idx + self.batch_size) > self.dataset_size:
133 |             rval = self.shuffled[self.idx: self.dataset_size]
134 |             self.idx = self.dataset_size
135 |             return rval
136 |         else:
137 |             rval = self.shuffled[self.idx: self.idx + self.batch_size]
138 |             self.idx += self.batch_size
139 |             self.batch += 1
140 |             return rval
141 | 
142 | 
143 | class SequentialContinuousIterator(SubsetIterator):
144 | 
145 |     def __init__(self, dataset_size, batch_size, step_size=1):
146 |         '''
147 |         The is for continous sequence with fix step at a time.
148 |         '''
149 |         super(SequentialRecurrentIterator, self).__init__(dataset_size, batch_size)
150 |         self.idx = 0
151 |         self.indices = np.arange(self.dataset_size)
152 |         self.step_size = step_size
153 |         assert self.step_size > 0
154 | 
155 |     def next(self):
156 |         if self.idx + self.batch_size > self.dataset_size:
157 |             raise StopIteration()
158 | 
159 |         rval = self.indices[self.idx:self.idx+self.batch_size]
160 |         self.idx += self.step_size
161 |         return rval
162 | 
163 | class SequentialRecurrentIterator(SubsetIterator):
164 | 
165 |     def __init__(self, dataset_size, batch_size, seq_len):
166 |         '''
167 |         This is for generating sequences of equal len (seq_len) with (batch_size)
168 |         number of sequences. example of seq_len 3 and batch_size 4 will generate
169 |         [0, 1, 2; 1, 2, 3; 2, 3, 4; 3, 4, 5]
170 |         '''
171 |         super(SequentialRecurrentIterator, self).__init__(dataset_size, batch_size)
172 |         assert dataset_size >= seq_len, 'size of dataset has to be at least larger than sequence length'
173 |         self.seq_len = seq_len
174 |         self.ridx = np.concatenate([np.arange(self.seq_len) + i for i in range(batch_size)])
175 | 
176 |     def __iter__(self):
177 |         self.ridx = np.concatenate([np.arange(self.seq_len) + i for i in range(batch_size)])
178 | 
179 |     def next(self):
180 |         if self.ridx[-1] >= self.dataset_size:
181 |             last = self.ridx[-1] - self.dataset_size + 1
182 |             if len(self.ridx[:-last*self.seq_len]) == 0:
183 |                 raise StopIteration()
184 |             ridx = np.copy(self.ridx)
185 |             self.ridx += self.batch_size
186 |             return ridx[:-last*self.seq_len]
187 |         else:
188 |             ridx = np.copy(self.ridx)
189 |             self.ridx += self.batch_size
190 |             return ridx
191 | 


--------------------------------------------------------------------------------
/mozi/layers/recurrent.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from mozi.utils.theano_utils import shared_zeros, alloc_zeros_matrix, shared_ones
  3 | from mozi.layers.template import Template
  4 | from mozi.weight_init import OrthogonalWeight, GaussianWeight, Identity
  5 | import theano.tensor as T
  6 | import theano
  7 | 
  8 | 
  9 | class LSTM(Template):
 10 | 
 11 |     def __init__(self, input_dim, output_dim, truncate_gradient=-1, return_sequences=True,
 12 |                 weight_init=OrthogonalWeight(), inner_init=GaussianWeight(mean=0, std=0.1)):
 13 | 
 14 |         self.input_dim = input_dim
 15 |         self.output_dim = output_dim
 16 |         self.truncate_gradient = truncate_gradient
 17 |         self.return_sequences = return_sequences
 18 | 
 19 |         self.W_i = weight_init((self.input_dim, self.output_dim))
 20 |         self.U_i = inner_init((self.output_dim, self.output_dim))
 21 |         self.b_i = shared_zeros((self.output_dim), name='b_i')
 22 | 
 23 |         self.W_f = weight_init((self.input_dim, self.output_dim))
 24 |         self.U_f = inner_init((self.output_dim, self.output_dim))
 25 |         self.b_f = shared_ones((self.output_dim), name='b_f')
 26 | 
 27 |         self.W_c = weight_init((self.input_dim, self.output_dim))
 28 |         self.U_c = inner_init((self.output_dim, self.output_dim))
 29 |         self.b_c = shared_zeros((self.output_dim), name='b_c')
 30 | 
 31 |         self.W_o = weight_init((self.input_dim, self.output_dim))
 32 |         self.U_o = inner_init((self.output_dim, self.output_dim))
 33 |         self.b_o = shared_zeros((self.output_dim), name='b_o')
 34 | 
 35 |         self.params = [
 36 |             self.W_i, self.U_i, self.b_i,
 37 |             self.W_c, self.U_c, self.b_c,
 38 |             self.W_f, self.U_f, self.b_f,
 39 |             self.W_o, self.U_o, self.b_o,
 40 |         ]
 41 | 
 42 | 
 43 |     def _step(self, xi_t, xf_t, xo_t, xc_t,
 44 |               h_tm1, c_tm1, u_i, u_f, u_o, u_c):
 45 |         i_t = T.nnet.sigmoid(xi_t + T.dot(h_tm1, u_i))
 46 |         f_t = T.nnet.sigmoid(xf_t + T.dot(h_tm1, u_f))
 47 |         o_t = T.nnet.sigmoid(xo_t + T.dot(h_tm1, u_o))
 48 |         g_t = T.tanh(xc_t + T.dot(h_tm1, u_c))
 49 |         c_t = f_t * c_tm1 + i_t * g_t
 50 | 
 51 |         h_t = o_t * T.tanh(c_t)
 52 |         return h_t, c_t
 53 | 
 54 | 
 55 |     def _train_fprop(self, state_below):
 56 |         X = state_below.dimshuffle((1, 0, 2))
 57 | 
 58 |         xi = T.dot(X, self.W_i) + self.b_i
 59 |         xf = T.dot(X, self.W_f) + self.b_f
 60 |         xc = T.dot(X, self.W_c) + self.b_c
 61 |         xo = T.dot(X, self.W_o) + self.b_o
 62 | 
 63 |         [outputs, memories], updates = theano.scan(
 64 |             self._step,
 65 |             sequences=[xi, xf, xo, xc],
 66 |             outputs_info=[
 67 |                 T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
 68 |                 T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
 69 |             ],
 70 |             non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
 71 |             truncate_gradient=self.truncate_gradient)
 72 | 
 73 |         if self.return_sequences:
 74 |             return outputs.dimshuffle((1, 0, 2))
 75 |         return outputs[-1]
 76 | 
 77 | 
 78 | class BiLSTM(Template):
 79 |     '''
 80 |     Bidirection LSTM
 81 |     '''
 82 | 
 83 |     def __init__(self, input_dim, output_dim, weight_init=OrthogonalWeight(),
 84 |                  inner_init=GaussianWeight(mean=0, std=0.1), truncate_gradient=-1,
 85 |                  output_mode='concat', return_sequences=False, return_idx=-1):
 86 | 
 87 |         self.input_dim = input_dim
 88 |         self.output_dim = output_dim
 89 |         self.truncate_gradient = truncate_gradient
 90 |         self.output_mode = output_mode # output_mode is either sum or concatenate
 91 |         self.return_sequences = return_sequences
 92 |         self.return_idx = return_idx
 93 |         # forward weights
 94 |         self.W_i = weight_init((self.input_dim, self.output_dim))
 95 |         self.U_i = inner_init((self.output_dim, self.output_dim))
 96 |         self.b_i = shared_zeros((self.output_dim), name='b_i')
 97 | 
 98 |         self.W_f = weight_init((self.input_dim, self.output_dim))
 99 |         self.U_f = inner_init((self.output_dim, self.output_dim))
100 |         self.b_f = shared_ones((self.output_dim), name='b_f')
101 | 
102 |         self.W_c = weight_init((self.input_dim, self.output_dim))
103 |         self.U_c = inner_init((self.output_dim, self.output_dim))
104 |         self.b_c = shared_zeros((self.output_dim), name='b_c')
105 | 
106 |         self.W_o = weight_init((self.input_dim, self.output_dim))
107 |         self.U_o = inner_init((self.output_dim, self.output_dim))
108 |         self.b_o = shared_zeros((self.output_dim), name='b_o')
109 | 
110 |         # backward weights
111 |         self.Wb_i = weight_init((self.input_dim, self.output_dim))
112 |         self.Ub_i = inner_init((self.output_dim, self.output_dim))
113 |         self.bb_i = shared_zeros((self.output_dim), name='bb_i')
114 | 
115 |         self.Wb_f = weight_init((self.input_dim, self.output_dim))
116 |         self.Ub_f = inner_init((self.output_dim, self.output_dim))
117 |         self.bb_f = shared_ones((self.output_dim), name='bb_f')
118 | 
119 |         self.Wb_c = weight_init((self.input_dim, self.output_dim))
120 |         self.Ub_c = inner_init((self.output_dim, self.output_dim))
121 |         self.bb_c = shared_zeros((self.output_dim), name='bb_c')
122 | 
123 |         self.Wb_o = weight_init((self.input_dim, self.output_dim))
124 |         self.Ub_o = inner_init((self.output_dim, self.output_dim))
125 |         self.bb_o = shared_zeros((self.output_dim), name='bb_o')
126 | 
127 |         self.params = [
128 |             self.W_i, self.U_i, self.b_i,
129 |             self.W_c, self.U_c, self.b_c,
130 |             self.W_f, self.U_f, self.b_f,
131 |             self.W_o, self.U_o, self.b_o,
132 | 
133 |             self.Wb_i, self.Ub_i, self.bb_i,
134 |             self.Wb_c, self.Ub_c, self.bb_c,
135 |             self.Wb_f, self.Ub_f, self.bb_f,
136 |             self.Wb_o, self.Ub_o, self.bb_o,
137 |         ]
138 | 
139 | 
140 |     def _forward_step(self,
141 |         xi_t, xf_t, xo_t, xc_t,
142 |         h_tm1, c_tm1,
143 |         u_i, u_f, u_o, u_c):
144 |         i_t = T.nnet.sigmoid(xi_t + T.dot(h_tm1, u_i))
145 |         f_t = T.nnet.sigmoid(xf_t + T.dot(h_tm1, u_f))
146 |         o_t = T.nnet.sigmoid(xo_t + T.dot(h_tm1, u_o))
147 |         g_t = T.tanh(xc_t + T.dot(h_tm1, u_c))
148 |         c_t = f_t * c_tm1 + i_t * g_t
149 |         h_t = o_t * T.tanh(c_t)
150 |         return h_t, c_t
151 | 
152 | 
153 |     def get_forward_output(self, state_below):
154 |         X = state_below.dimshuffle((1,0,2))
155 | 
156 |         xi = T.dot(X, self.W_i) + self.b_i
157 |         xf = T.dot(X, self.W_f) + self.b_f
158 |         xc = T.dot(X, self.W_c) + self.b_c
159 |         xo = T.dot(X, self.W_o) + self.b_o
160 | 
161 |         [outputs, memories], updates = theano.scan(
162 |             self._forward_step,
163 |             sequences=[xi, xf, xo, xc],
164 |             outputs_info=[
165 |                 T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
166 |                 T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
167 |             ],
168 |             non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
169 |             truncate_gradient=self.truncate_gradient
170 |         )
171 |         return outputs.dimshuffle((1,0,2))
172 | 
173 | 
174 |     def get_backward_output(self, state_below):
175 |         X = state_below.dimshuffle((1,0,2))
176 | 
177 |         xi = T.dot(X, self.Wb_i) + self.bb_i
178 |         xf = T.dot(X, self.Wb_f) + self.bb_f
179 |         xc = T.dot(X, self.Wb_c) + self.bb_c
180 |         xo = T.dot(X, self.Wb_o) + self.bb_o
181 | 
182 |         [outputs, memories], updates = theano.scan(
183 |             self._forward_step,
184 |             sequences=[xi, xf, xo, xc],
185 |             outputs_info=[
186 |                 T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
187 |                 T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
188 |             ],
189 |             non_sequences=[self.Ub_i, self.Ub_f, self.Ub_o, self.Ub_c],
190 |             go_backwards = True,
191 |             truncate_gradient=self.truncate_gradient
192 |         )
193 |         return outputs.dimshuffle((1,0,2))
194 | 
195 | 
196 |     def _train_fprop(self, state_below):
197 |         forward = self.get_forward_output(state_below)
198 |         backward = self.get_backward_output(state_below)
199 |         if self.output_mode == 'sum':
200 |             output = forward + backward
201 |         elif self.output_mode == 'concat':
202 |             output = T.concatenate([forward, backward], axis=2)
203 |         else:
204 |             raise Exception('output mode is not sum or concat')
205 |         if self.return_sequences==False:
206 |             return output[:,self.return_idx,:]
207 |         elif self.return_sequences==True:
208 |             return output
209 |         else:
210 |             raise Exception('Unexpected output shape for return_sequences')
211 | 


--------------------------------------------------------------------------------
/mozi/datasets/dataset.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import mozi.datasets.iterator as iterators
  4 | import numpy as np
  5 | import theano
  6 | from multiprocessing import Process, Queue
  7 | import time
  8 | floatX = theano.config.floatX
  9 | 
 10 | import logging
 11 | internal_logger = logging.getLogger(__name__)
 12 | logging.basicConfig(level=logging.DEBUG)
 13 | 
 14 | from mozi.log import Log
 15 | 
 16 | class IterMatrix(object):
 17 | 
 18 |     def __init__(self, X, y, iter_class='SequentialSubsetIterator',
 19 |                 batch_size=100, **kwargs):
 20 |         self.X = X
 21 |         self.y = y
 22 |         self.batch_size = batch_size
 23 |         self.iter_class = iter_class
 24 |         self.kwargs = kwargs
 25 |         self.iterator = getattr(iterators, self.iter_class)
 26 | 
 27 |     def __iter__(self):
 28 |         return self.iterator(dataset_size=self.dataset_size,
 29 |                             batch_size=self.batch_size, **self.kwargs)
 30 | 
 31 |     def set_iterator(self, iterator):
 32 |         self.iterator = iterator
 33 | 
 34 |     def __getitem__(self, key):
 35 |         return self.X[key], self.y[key]
 36 | 
 37 |     @property
 38 |     def dataset_size(self):
 39 |         return self.X.shape[0] if self.X is not None else -1
 40 | 
 41 | 
 42 | class IterDatasets(IterMatrix):
 43 | 
 44 |     def __init__(self, X, y, iter_class='SequentialSubsetIterator',
 45 |                 batch_size=100, **kwargs):
 46 |         self.X = X
 47 |         self.y = y
 48 |         self.batch_size = batch_size
 49 |         self.iter_class = iter_class
 50 |         self.kwargs = kwargs
 51 |         self.iterator = getattr(iterators, self.iter_class)
 52 | 
 53 |     def __getitem__(self, key):
 54 |         Xslice = []
 55 |         yslice = []
 56 |         for dataset in self.X:
 57 |             Xslice.append(dataset[key])
 58 |         for label in self.y:
 59 |             yslice.append(label[key])
 60 |         return Xslice + yslice
 61 | 
 62 |     def __len__(self):
 63 |         return self.dataset_size
 64 | 
 65 |     @property
 66 |     def dataset_size(self):
 67 |         if isinstance(self.X, tuple):
 68 |             if len(self.X) == 0:
 69 |                 dsize = -1
 70 |             else:
 71 |                 dsize = len(self.y[0])
 72 |         elif self.X is None:
 73 |             dsize = -1
 74 |         else:
 75 |             dsize = len(self.X)
 76 |         return dsize
 77 | 
 78 | 
 79 | class Dataset(object):
 80 | 
 81 |     def __init__(self, log):
 82 |         self.log = log
 83 |         if self.log is None:
 84 |             # use default Log setting, using the internal logger
 85 |             self.log = Log(logger=internal_logger)
 86 | 
 87 | 
 88 |     def __iter__(self):
 89 |         raise NotImplementedError(str(type(self))+" does not implement the __iter__ method.")
 90 | 
 91 |     def next(self):
 92 |         raise NotImplementedError(str(type(self))+" does not implement the next method.")
 93 | 
 94 |     @property
 95 |     def nblocks(self):
 96 |         raise NotImplementedError(str(type(self))+" does not implement the nblocks method.")
 97 | 
 98 | 
 99 | class SingleBlock(Dataset):
100 | 
101 |     def __init__(self, X=None, y=None, train_valid_test_ratio=[8,1,1], log=None, **kwargs):
102 |         '''
103 |         All the data is loaded into memory for one go training
104 |         '''
105 |         super(SingleBlock, self).__init__(log=log)
106 |         self.ratio = train_valid_test_ratio
107 |         self.train = IterMatrix(X=None, y=None, **kwargs)
108 |         self.valid = IterMatrix(X=None, y=None, **kwargs)
109 |         self.test = IterMatrix(X=None, y=None, **kwargs)
110 | 
111 |         assert len(self.ratio) == 3, 'the size of list is not 3'
112 | 
113 |         if X is not None and y is not None:
114 |             self.set_Xy(X, y)
115 | 
116 |     def __iter__(self):
117 |         self.iter = True
118 |         return self
119 | 
120 |     def next(self):
121 |         if self.iter:
122 |             # only one iteration since there is only one data block
123 |             self.iter = False
124 |             return self
125 |         else:
126 |             raise StopIteration
127 | 
128 |     @property
129 |     def nblocks(self):
130 |         return 1
131 | 
132 |     def set_Xy(self, X, y):
133 |         num_examples = len(X)
134 |         total_ratio = sum(self.ratio)
135 |         num_train = int(self.ratio[0] * 1.0 * num_examples / total_ratio)
136 |         num_valid = int(self.ratio[1] * 1.0 * num_examples / total_ratio)
137 | 
138 |         train_X = X[:num_train]
139 |         train_y = y[:num_train]
140 | 
141 |         valid_X = X[num_train:num_train+num_valid]
142 |         valid_y = y[num_train:num_train+num_valid]
143 | 
144 |         test_X = X[num_train+num_valid:]
145 |         test_y = y[num_train+num_valid:]
146 | 
147 |         self.train.X = train_X
148 |         self.train.y = train_y
149 | 
150 |         if self.ratio[1] == 0:
151 |             self.log.info('Valid set is empty! It is needed for early stopping and saving best model')
152 | 
153 |         self.valid.X = valid_X
154 |         self.valid.y = valid_y
155 | 
156 |         if self.ratio[2] == 0:
157 |             self.log.info('Test set is empty! It is needed for testing the best model')
158 | 
159 |         self.test.X = test_X
160 |         self.test.y = test_y
161 | 
162 | 
163 |     def get_train(self):
164 |         return self.train
165 | 
166 |     def get_valid(self):
167 |         return self.valid
168 | 
169 |     def get_test(self):
170 |         return self.test
171 | 
172 |     def set_train(self, X, y):
173 |         self.train.X = X
174 |         self.train.y = y
175 | 
176 |     def set_valid(self, X, y):
177 |         self.valid.X = X
178 |         self.valid.y = y
179 | 
180 |     def set_test(self, X, y):
181 |         self.test.X = X
182 |         self.test.y = y
183 | 
184 | 
185 | class DataBlocks(Dataset):
186 | 
187 |     def __init__(self, data_paths, train_valid_test_ratio=[8,1,1], log=None, allow_preload=False, **kwargs):
188 | 
189 |         """
190 |         DESCRIPTION:
191 |             This is class for processing blocks of data, whereby dataset is loaded
192 |             and unloaded into memory one block at a time.
193 |         PARAM:
194 |             data_paths(list): contains the paths to the numpy data files. It's a
195 |                             list of tuples whereby the first element of the tuple
196 |                             is the X path, and the second is the y path.
197 |                             example [(X_path1, y_path1),(X_path2, y_path2)]
198 |             allow_preload(bool): by allowing preload, it will preload the next data block
199 |                             while training at the same time on the current datablock,
200 |                             this will reduce time but will also cost more memory.
201 | 
202 |         """
203 |         super(DataBlocks, self).__init__(log=log)
204 |         assert isinstance(data_paths, (list,tuple)), "data_paths is not a list"
205 |         self.data_paths = data_paths
206 |         self.single_block = SingleBlock(None, None, train_valid_test_ratio, log, **kwargs)
207 |         self.allow_preload = allow_preload
208 |         self.q = Queue()
209 | 
210 |     def __iter__(self):
211 |         self.files = iter(self.data_paths)
212 |         if self.allow_preload:
213 |             self.lastblock = False
214 |             bufile = next(self.files)
215 |             self.load_Xy(bufile, self.q)
216 |         return self
217 | 
218 |     def next(self):
219 |         if self.allow_preload:
220 |             if self.lastblock:
221 |                 raise StopIteration
222 | 
223 |             try:
224 |                 X, y = self.q.get(block=True, timeout=None)
225 |                 self.single_block.set_Xy(X,y)
226 |                 bufile = next(self.files)
227 |                 p = Process(target=self.load_Xy, args=(bufile, self.q))
228 |                 p.start()
229 |             except:
230 |                 self.lastblock = True
231 |         else:
232 |             fpaths = next(self.files)
233 |             X,y = self.openfile(fpaths)
234 |             self.single_block.set_Xy(X=X, y=y)
235 | 
236 |         return self.single_block
237 | 
238 |     @staticmethod
239 |     def openfile(paths):
240 |         assert isinstance(paths, (list,tuple)), str(type(paths)) + "is not a tuple or list"
241 |         with open(paths[0], 'rb') as X_fin, open(paths[1], 'rb') as y_fin:
242 |             X = np.load(X_fin)
243 |             y = np.load(y_fin)
244 |         return X,y
245 | 
246 |     def load_Xy(self, paths, q):
247 |         self.log.info('..loading: ' + str(paths))
248 |         X,y = self.openfile(paths)
249 |         q.put((X,y))
250 | 
251 |     @property
252 |     def nblocks(self):
253 |         return len(self.data_paths)
254 | 
255 | 
256 | class MultiInputsData(SingleBlock):
257 | 
258 |     def __init__(self, X=None, y=None, train_valid_test_ratio=[8,1,1], log=None, **kwargs):
259 | 
260 |         """
261 |         DESCRIPTION:
262 |             This class is used for multitask learning where we have multiple data
263 |             inputs and multiple data output.
264 |         PARAM:
265 |             X (tuple of arrays or just one array of X): If our input is X1 and X2, both
266 |             with same number of rows, then X = (X1, X2)
267 |             y (tuple of arrays or just one array of y): label of same number of rows as
268 |             input data X
269 |         """
270 |         super(MultiInputsData, self).__init__(train_valid_test_ratio=train_valid_test_ratio,
271 |                                               log=log, **kwargs)
272 | 
273 |         self.train = IterDatasets(None, None, **kwargs)
274 |         self.valid = IterDatasets(None, None, **kwargs)
275 |         self.test = IterDatasets(None, None, **kwargs)
276 |         self.set(X, y)
277 | 
278 | 
279 |     def set(self, X, y):
280 |         if isinstance(X, tuple):
281 |             self.num_examples = len(X[0])
282 |             for dataset in X:
283 |                 assert len(dataset) == self.num_examples, 'number of rows for different datasets is not the same'
284 |         elif X is None:
285 |             self.num_examples = 0
286 |             X = ()
287 |         else:
288 |             self.num_examples = len(X)
289 |             X = (X,)
290 | 
291 |         if isinstance(y, tuple):
292 |             for label in y:
293 |                 assert len(label) == self.num_examples, 'number of rows for different y is not the same'
294 |         elif y is None:
295 |             y = ()
296 |             assert self.num_examples == 0
297 |         else:
298 |             assert len(y) == self.num_examples, 'number of rows for y is not the same as input features'
299 |             y = (y,)
300 | 
301 |         total_ratio = sum(self.ratio)
302 |         num_train = int(float(self.ratio[0]) * self.num_examples / total_ratio)
303 |         num_valid = int(float(self.ratio[1]) * self.num_examples / total_ratio)
304 | 
305 |         trainset = ()
306 |         validset = ()
307 |         testset = ()
308 |         for dataset in X:
309 |             trainset += (dataset[:num_train],)
310 |             validset += (dataset[num_train:num_train+num_valid],)
311 |             testset += (dataset[num_train+num_valid:],)
312 | 
313 |         trainlbl = ()
314 |         validlbl = ()
315 |         testlbl = ()
316 |         for label in y:
317 |             trainlbl += (label[:num_train],)
318 |             validlbl += (label[num_train:num_train+num_valid],)
319 |             testlbl += (label[num_train+num_valid:],)
320 | 
321 |         self.train.X = trainset
322 |         self.train.y = trainlbl
323 | 
324 |         if self.ratio[1] == 0:
325 |             self.log.info('Valid set is empty! It is needed for early stopping and saving best model')
326 |         self.valid.X = validset
327 |         self.valid.y = validlbl
328 | 
329 |         if self.ratio[2] == 0:
330 |             self.log.info('Test set is empty! It is needed for testing the best model')
331 |         self.test.X = testset
332 |         self.test.y = testlbl
333 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Mozi
  2 | =====
  3 | 
  4 | Mozi is based on Theano with a clean and sharp design, the **design philosophy** of Mozi
  5 | 
  6 | 1. **Fast and Simple**: The main engine of the package is only 200 lines of code. There is only one full compiled graph for training which ensures all the data manipulation happens in one go through the pipeline and as a result make it super fast package.
  7 | 2. **Highly Modular**: Building a model in Mozi is like building a house with Lego, you can design whatever imaginable layers and stack them together easily.
  8 | 3. **Model Abstract from Training**: In order to facilitate deployment of trained model for real use, the model is abstracted away from the training module and keep as minimalist as possible. Objective is to allowed realtime deployment and easy model exchange.
  9 | 4. **Logging System**: Mozi provides a full logging feature that allows user to log the training results and the hyperparameters to the database for paranormal overview. Also it allows automatic saving of best model and logging of all training outputs for easy aftermath analysis.
 10 | 
 11 | ---
 12 | ### Install
 13 | 
 14 | First you need to install [theano](https://github.com/Theano/Theano)
 15 | 
 16 | You can simply install mozi via pip for latest stable version
 17 | ```
 18 | sudo pip install mozi
 19 | ```
 20 | or install via pip for bleeding edge version
 21 | ```bash
 22 | sudo pip install git+https://github.com/hycis/Mozi.git@master
 23 | ```
 24 | or simply clone and add to `PYTHONPATH`.
 25 | ```bash
 26 | git clone https://github.com/hycis/Mozi.git
 27 | export PYTHONPATH=/path/to/Mozi:$PYTHONPATH
 28 | ```
 29 | in order for the install to persist via export `PYTHONPATH`. Add `PYTHONPATH=/path/to/TensorGraph:$PYTHONPATH` to your `.bashrc` for linux or
 30 | `.bash_profile` for mac. While this method works, you will have to ensure that
 31 | all the dependencies in [setup.py](setup.py) are installed.
 32 | 
 33 | ---
 34 | ### Set Environment
 35 | In Mozi, we need to set three environment paths
 36 | * *MOZI_DATA_PATH*
 37 | * *MOZI_SAVE_PATH*
 38 | * *MOZI_DATABASE_PATH*
 39 | 
 40 | `MOZI_DATA_PATH` is the directory for saving and loading the datasets.  
 41 | `MOZI_SAVE_PATH` is the directory for saving all the models, the output log and epoch error.  
 42 | `MOZI_DATABASE_PATH` is the directory for saving the database that contains tables recording the hyperparameters and test errors for each training job.
 43 | 
 44 | ---
 45 | ### Let's Have Fun!
 46 | Building a model in Mozi is as simple as
 47 | 
 48 | ```python
 49 | import theano.tensor as T
 50 | from mozi.model import Sequential
 51 | model = Sequential(input_var=T.matrix(), output_var=T.matrix())
 52 | ```
 53 | The `input_var` is the input tensor variable that corresponds to the number of dimensions of the input dataset. The `output_var` is the tensor variable that corresponds to the target of the dataset. `T.matrix` for `2d data`, `T.tensor3` for `3d data` and `T.tensor4` for `4d data`. Next add the layers
 54 | 
 55 | ```python
 56 | from mozi.layers.linear import Linear
 57 | from mozi.layers.activation import RELU, Softmax
 58 | from mozi.layers.noise import Dropout
 59 | 
 60 | model.add(Linear(prev_dim=28*28, this_dim=200))
 61 | model.add(RELU())
 62 | model.add(Linear(prev_dim=200, this_dim=100))
 63 | model.add(RELU())
 64 | model.add(Dropout(0.5))
 65 | model.add(Linear(prev_dim=100, this_dim=10))
 66 | model.add(Softmax())
 67 | ```
 68 | To train the model, first build a dataset and a learning method, here we use the mnist dataset and SGD
 69 | ```python
 70 | from mozi.datasets.mnist import Mnist
 71 | from mozi.learning_method import SGD
 72 | 
 73 | data = Mnist(batch_size=64, train_valid_test_ratio=[5,1,1])
 74 | learning_method = SGD(learning_rate=0.1, momentum=0.9, lr_decay_factor=0.9, decay_batch=10000)
 75 | ```
 76 | Finally build a training object and put everything in to train the model
 77 | ```python
 78 | from mozi.train_object import TrainObject
 79 | from mozi.cost import mse, error
 80 | 
 81 | train_object = TrainObject(model = model,
 82 |                            log = None,
 83 |                            dataset = data,
 84 |                            train_cost = mse,
 85 |                            valid_cost = error,
 86 |                            learning_method = learning_method,
 87 |                            stop_criteria = {'max_epoch' : 10,
 88 |                                             'epoch_look_back' : 5,
 89 |                                             'percent_decrease' : 0.01}
 90 |                            )
 91 | train_object.setup()
 92 | train_object.run()
 93 | ```
 94 | #### Stopping Criteria
 95 | ```python
 96 | stop_criteria = {'max_epoch' : 10,
 97 |                  'epoch_look_back' : 5,
 98 |                  'percent_decrease' : 0.01}
 99 | ```
100 | The stopping criteria here means the training will stop if training has reach 'max_epoch' of 10 or the validation error does not decrease by at least 1% in the past 5 epoch.
101 | #### Test Model
102 | And that's it! Once the training is done, to test the model, it's as simple as calling the forward propagation `fprop(X)` in model
103 | ```python
104 | import numpy as np
105 | 
106 | ypred = model.fprop(data.get_test().X)
107 | ypred = np.argmax(ypred, axis=1)
108 | y = np.argmax(data.get_test().y, axis=1)
109 | accuracy = np.equal(ypred, y).astype('f4').sum() / len(y)
110 | print 'test accuracy:', accuracy
111 | ```
112 | ---
113 | ### More Examples
114 | Mozi can be used to build effectively any kind of architecture. Below is another few examples
115 | * [**Convolution Neural Network**](doc/cnn.md)
116 | * [**Denoising Autoencoder**](doc/dae.md)
117 | * [**Variational Autoencoder**](doc/vae.md)
118 | * [**Alexnet**](example/voc_alexnet.py)
119 | 
120 | ---
121 | ### Layer Template
122 | To build a layer for Mozi, the layer has to implement the template
123 | ```python
124 | class Template(object):
125 |     """
126 |     DESCRIPTION:
127 |         The interface to be implemented by any layer.
128 |     """
129 |     def __init__(self):
130 |         self.params = [] # all params that needs to be updated by training go into the list
131 | 
132 |     def _test_fprop(self, state_below):
133 |         # the testing track whereby no params update is performed after data flows through this track
134 |         raise NotImplementedError()
135 | 
136 |     def _train_fprop(self, state_below):
137 |         # the training track whereby params is updated every time data flows through this track
138 |         raise NotImplementedError()
139 | 
140 |     def _layer_stats(self, state_below, layer_output):
141 |         # calculate everything you want to know about the layer, the input, the output,
142 |         # the weight and put in the return list in the format [('W max', T.max(W)), ('W min', T.min(W))].
143 |         # This method provides a peek into the layer and is useful for debugging.
144 |         return []
145 | ```
146 | Each layer provides two tracks: training track and testing track. During training, the model will call `_train_fprop` in every layer and the output from the model will be used to update the params in `self.params` in each layer. During testing, `_test_fprop` is called in every layer and the output is used to evaluate the model and to judge if model should stop training based on the stopping criteria set in the `TrainObject`. We can also peek into each layer by putting whatever we want to know about the layer into `_layer_stats`. For example, if we want to know what is the maximum weight in a layer, we can compute `T.max(W)` and return `[('W max', T.max(W))]` from `_layer_stats`, so that after every epoch, `'W max'` will be calculated for that layer and output to screen.
147 | 
148 | ---
149 | ### Data Interface
150 | Mozi provides two data interface, one is for dataset small enough to fit all into the memory [(SingleBlock)](mozi/datasets/dataset.py#L99), another is for large datasets which cannot be fit into memory in one go and has to be broken up into blocks and load into training one block at a time [(DataBlocks)](mozi/datasets/dataset.py#L185).  
151 | Check out the [Mnist](mozi/datasets/mnist.py) or [Cifar10](mozi/datasets/cifar10.py) examples on how to build a dataset.
152 | 
153 | #### Using SingleBlock Directly
154 | Besides subclassing `SingleBlock` or `DataBlocks` to create dataset like Mnist and Cifar10 example, we can use `SingleBlock` or `DataBlocks` directly to build a dataset in as simple as
155 | ```python
156 | from mozi.datasets.dataset import SingleBlock
157 | import numpy as np
158 | X = np.random.rand(1000, 5)
159 | y = np.random.rand(1000, 3)
160 | data = SingleBlock(X=X, y=y, batch_size=100, train_valid_test_ratio=[3,1,1])
161 | train_object = TrainObject(dataset = data)
162 | ```
163 | 
164 | #### For Large Dataset Trained in Blocks
165 | For large dataset that cannot fit into memory, you can use `DataBlocks` to train block by block. Below is an example for demonstration. You can also run the [Example](example/datablocks_exp.py)
166 | ```python
167 | from mozi.datasets.dataset import DataBlocks
168 | import numpy as np
169 | 
170 | # we have two blocks of 100 images each saved
171 | # as ('X1.npy', 'y1.npy') and ('X2.npy', 'y2.npy')
172 | X1 = np.random.rand(1000, 3, 32, 32)
173 | y1 = np.random.rand(1000, 10)
174 | with open('X1.npy', 'wb') as xin, open('y1.npy', 'wb') as yin:
175 |     np.save(xin, X1)
176 |     np.save(yin, y1)
177 | 
178 | X2 = np.random.rand(1000, 3, 32, 32)
179 | y2 = np.random.rand(1000, 10)
180 | with open('X2.npy', 'wb') as xin, open('y2.npy', 'wb') as yin:
181 |     np.save(xin, X1)
182 |     np.save(yin, y1)
183 | 
184 | # now we can create the data by putting the paths
185 | # ('X1.npy', 'y1.npy') and ('X2.npy', 'y2.npy') into DataBlocks
186 | # during training, the TrainObject will load and unload one block at a time
187 | data = DataBlocks(data_paths=[('X1.npy', 'y1.npy'), ('X2.npy', 'y2.npy')],
188 |                   batch_size=100, train_valid_test_ratio=[3,1,1])
189 | train_object = TrainObject(dataset = data)
190 | ```
191 | 
192 | ---
193 | ### Logging
194 | Mozi provides a logging module for automatic saving of best model and logging the errors for each epoch.
195 | 
196 | 
197 | ```python
198 | from mozi.log import Log
199 | 
200 | log = Log(experiment_name = 'MLP',
201 |           description = 'This is a tutorial',
202 |           save_outputs = True, # log all the outputs from the screen
203 |           save_model = True, # save the best model
204 |           save_epoch_error = True, # log error at every epoch
205 |           save_to_database = {'name': 'Example.db',
206 |                               'records': {'Batch_Size': data.batch_size,
207 |                                           'Learning_Rate': learning_method.learning_rate,
208 |                                           'Momentum': learning_method.momentum}}
209 |          ) # end log
210 | ```
211 | The log module allows logging of outputs from screen, saving best model and epoch-errors. It also allows recording of hyperparameters to the database using the `save_to_database` argument, the `save_to_database` argument takes in a dictionary that contains two fields `'name'` and `'records'`. `'name'` indicates the name of the database to save the recording table. The name of the recording table will follow the experiment name under argument `experiment_name`. The `'records'` field takes in a dictionary of unrestricted number of hyperparameters that we want to record. The `'records'` only accepts primitive data types (str, int, float).
212 | Once log object is built, it can be passed into `TrainObject` as
213 | ```python
214 | TrainObject(log = log)
215 | ```
216 | <!--### More Features-->
217 | <!--##### Data Iterators-->
218 | <!--##### Customized Learning Method-->
219 | <!--##### Customized Weight Initialization-->
220 | ---
221 | ### Load Saved Model
222 | When we set `save_model` in `Log` to be true, the best model is automatically saved to `MOZI_SAVE_PATH`. The model is serialized in pickle format, so to load the saved model. We can use cPickle.
223 | ```python
224 | import cPickle
225 | with open('model.pkl', 'rb') as fout:
226 |     model = cPickle.load(fout)
227 |     y = model.fprop(X)
228 | ```
229 | ---
230 | 
231 | ### Why Mozi?
232 | [Mozi](https://en.wikiquote.org/wiki/Mozi) (墨子) (470 B.C - 391 B.C) is a Chinese philosopher during warring states period (春秋戰國), his philosophy advocates peace, simplicity, universal love and pragmatism.
233 | 
234 | ---
235 | ### Licence
236 | MIT Licence
237 | 


--------------------------------------------------------------------------------
/mozi/train_object.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import theano
  4 | import theano.tensor as T
  5 | floatX = theano.config.floatX
  6 | 
  7 | import numpy as np
  8 | 
  9 | import time, datetime
 10 | import sys
 11 | 
 12 | import logging
 13 | internal_logger = logging.getLogger(__name__)
 14 | logging.basicConfig(level=logging.DEBUG)
 15 | 
 16 | from mozi.log import Log
 17 | from mozi.utils.theano_utils import shared_zeros
 18 | from mozi.utils.train_object_utils import split_list, generate_shared_list, merge_lists, \
 19 |                              get_shared_values, is_shared_var, merge_var
 20 | 
 21 | from mozi.utils.check_memory import get_mem_usage
 22 | from mozi.utils.progbar import Progbar
 23 | 
 24 | 
 25 | class TrainObject():
 26 | 
 27 |     def __init__(self, model, dataset, train_cost, valid_cost, learning_method, stop_criteria, log=None, verbose=True):
 28 |         self.model = model
 29 |         self.dataset = dataset
 30 |         self.train_cost = train_cost
 31 |         self.valid_cost = valid_cost
 32 |         self.learning_method = learning_method
 33 |         self.stop_criteria = stop_criteria
 34 |         self.log = log
 35 |         self.verbose = verbose
 36 | 
 37 | 
 38 |         if self.log is None:
 39 |             # use default Log setting
 40 |             self.log = Log(logger=internal_logger)
 41 | 
 42 |         elif self.log.save_to_database:
 43 |             self.log.print_records()
 44 |             self.log.info('\n')
 45 | 
 46 | 
 47 |     def setup(self):
 48 | 
 49 |         self.log.info( '..begin setting up train object')
 50 | 
 51 |         #===================[ build params and deltas list ]==================#
 52 | 
 53 |         params = []
 54 |         deltas = []
 55 | 
 56 |         for i, layer in enumerate(self.model.layers):
 57 |             layer_name = "{}_{}".format(layer.__class__.__name__, i)
 58 |             if hasattr(layer, 'params'):
 59 |                 for param in layer.params:
 60 |                     # checked that the param to be updated is shared variable
 61 |                     if is_shared_var(param):
 62 |                         param.name = str(i) + '_' + str(param.name)
 63 |                         param.name += '_' + layer.__class__.__name__
 64 |                         params += [param]
 65 |                         deltas += [shared_zeros(shape=param.shape.eval())]
 66 | 
 67 |         #=====================[ training params updates ]=====================#
 68 | 
 69 |         self.log.info("..update params: " + str(params))
 70 |         train_y_pred, train_layers_stats = self.model.train_fprop(self.model.input_var)
 71 |         train_cost = self.train_cost(self.model.output_var, train_y_pred).astype(floatX)
 72 |         gparams = T.grad(train_cost, params)
 73 |         train_updates = self.learning_method.update(deltas, params, gparams)
 74 | 
 75 |         #=================[ append updates from each layer ]==================#
 76 | 
 77 |         for i, layer in enumerate(self.model.layers):
 78 |             layer_name = "{}_{}".format(layer.__class__.__name__, i)
 79 |             if hasattr(layer, 'updates') and len(layer.updates) > 0:
 80 |                 self.log.info("..{}: has shared variable updates".format(layer_name))
 81 |                 train_updates += layer.updates
 82 | 
 83 |         #----[ append updates of stats from each layer to train updates ]-----#
 84 | 
 85 |         self.train_stats_names, train_stats_vars = split_list(train_layers_stats)
 86 |         train_stats_vars = [var.astype(floatX) for var in train_stats_vars]
 87 |         self.train_stats_shared = generate_shared_list(train_stats_vars)
 88 |         train_stats_updates = merge_lists(self.train_stats_shared, train_stats_vars)
 89 |         if self.verbose:
 90 |             train_updates += train_stats_updates
 91 | 
 92 |         #-------------------------[ train functions ]-------------------------#
 93 | 
 94 |         self.log.info('..begin compiling functions')
 95 |         self.training = theano.function(inputs=merge_var(self.model.input_var, self.model.output_var),
 96 |                                         outputs=train_cost,
 97 |                                         updates=train_updates,
 98 |                                         on_unused_input='warn',
 99 |                                         allow_input_downcast=True)
100 | 
101 |         self.log.info('..training function compiled')
102 | 
103 |         #=============================[ testing ]=============================#
104 | 
105 |         test_y_pred, test_layers_stats = self.model.test_fprop(self.model.input_var)
106 | 
107 |         #-----[ append updates of stats from each layer to test updates ]-----#
108 | 
109 |         self.test_stats_names, test_stats_vars = split_list(test_layers_stats)
110 |         test_stats_vars = [var.astype(floatX) for var in test_stats_vars]
111 |         self.test_stats_shared = generate_shared_list(test_stats_vars)
112 |         test_stats_updates = []
113 |         if self.verbose:
114 |             test_stats_updates = merge_lists(self.test_stats_shared, test_stats_vars)
115 | 
116 |         #-------------------------[ test functions ]--------------------------#
117 | 
118 |         test_stopping_error = self.valid_cost(self.model.output_var, test_y_pred).astype(floatX)
119 |         test_cost = self.train_cost(self.model.output_var, test_y_pred).astype(floatX)
120 | 
121 |         self.testing = theano.function(inputs=merge_var(self.model.input_var, self.model.output_var),
122 |                                        outputs=(test_stopping_error, test_cost),
123 |                                        updates=test_stats_updates,
124 |                                        on_unused_input='warn',
125 |                                        allow_input_downcast=True)
126 | 
127 |         self.log.info('..testing function compiled')
128 | 
129 | 
130 |     def run(self):
131 | 
132 |         best_valid_error = float(sys.maxint)
133 |         valid_error = float(sys.maxint)
134 | 
135 |         train_cost = float(sys.maxint)
136 |         valid_cost = float(sys.maxint)
137 | 
138 |         train_stats_values = []
139 |         valid_stats_values = []
140 | 
141 |         epoch = 0
142 |         error_dcr = 0
143 |         self.best_epoch_last_update = 0
144 |         self.best_valid_last_update = float(sys.maxint)
145 | 
146 |         train_stats_names = ['train_' + name for name in self.train_stats_names]
147 |         valid_stats_names = ['valid_' + name for name in self.test_stats_names]
148 | 
149 |         job_start = time.time()
150 | 
151 |         while (self.continue_learning(epoch, error_dcr, best_valid_error)):
152 | 
153 |             if epoch > 0:
154 |                 self.log.info("best_epoch_last_update: %d"%self.best_epoch_last_update)
155 |                 self.log.info("valid_error_decrease: %f"%error_dcr)
156 |                 self.log.info("best_valid_last_update: %f"%self.best_valid_last_update)
157 |                 self.log.info("========[ End of Epoch ]========\n\n")
158 | 
159 |             epoch += 1
160 | 
161 |             start_time = time.time()
162 | 
163 |             num_train_examples = 0
164 |             total_train_cost = 0.
165 |             train_stats_values = np.zeros(len(train_stats_names), dtype=floatX)
166 | 
167 |             num_valid_examples = 0
168 |             total_valid_cost = 0.
169 |             total_valid_stopping_cost = 0.
170 |             valid_stats_values = np.zeros(len(valid_stats_names), dtype=floatX)
171 | 
172 |             blk = 0
173 | 
174 |             for block in self.dataset:
175 |                 block_time = time.time()
176 |                 blk += 1
177 | 
178 |                 train_set = block.get_train()
179 |                 valid_set = block.get_valid()
180 | 
181 |                 #====================[ Training Progress ]====================#
182 |                 if train_set.dataset_size > 0:
183 |                     self.log.info('..training '+ self.dataset.__class__.__name__
184 |                                 + ' block %s/%s'%(blk, self.dataset.nblocks))
185 | 
186 |                     progbar = Progbar(target=train_set.dataset_size)
187 |                     blk_sz = 0
188 |                     for idx in train_set:
189 |                         cost = self.training(*train_set[idx])
190 |                         total_train_cost += cost * len(idx)
191 |                         num_train_examples += len(idx)
192 |                         train_stats_values += len(idx) * get_shared_values(self.train_stats_shared)
193 |                         blk_sz += len(idx)
194 |                         progbar.update(blk_sz)
195 |                     print
196 | 
197 |                 #===================[ Validating Progress ]===================#
198 |                 if valid_set.dataset_size > 0:
199 | 
200 |                     self.log.info('..validating ' + self.dataset.__class__.__name__
201 |                                 + ' block %s/%s'%(blk, self.dataset.nblocks))
202 | 
203 |                     progbar = Progbar(target=valid_set.dataset_size)
204 |                     blk_sz = 0
205 |                     for idx in valid_set:
206 |                         stopping_cost, cost = self.testing(*valid_set[idx])
207 |                         total_valid_cost += cost * len(idx)
208 |                         total_valid_stopping_cost += stopping_cost * len(idx)
209 |                         num_valid_examples += len(idx)
210 |                         valid_stats_values += len(idx) * get_shared_values(self.test_stats_shared)
211 |                         blk_sz += len(idx)
212 |                         progbar.update(blk_sz)
213 |                     print
214 | 
215 |                 self.log.info('block time: %0.2fs'%(time.time()-block_time))
216 |                 self.log.info(get_mem_usage())
217 | 
218 |             #-------[ Update train best cost and error values ]-------#
219 |             if num_train_examples > 0:
220 |                 train_cost = total_train_cost / num_train_examples
221 |                 train_stats_values /= num_train_examples
222 | 
223 |             #-------[ Update valid best cost and error values ]-------#
224 |             if num_valid_examples > 0:
225 |                 valid_error = total_valid_stopping_cost / num_valid_examples
226 |                 valid_cost = total_valid_cost / num_valid_examples
227 |                 valid_stats_values /= num_valid_examples
228 | 
229 |                 if valid_error < best_valid_error:
230 |                     best_valid_error = valid_error
231 |                     self.log.info('..best validation error so far')
232 |                     if self.log.save_model:
233 |                         self.log._save_model(self.model)
234 |                         self.log.info('..model saved')
235 | 
236 |                 if valid_error < self.best_valid_last_update:
237 |                     error_dcr = self.best_valid_last_update - valid_error
238 |                 else:
239 |                     error_dcr = 0
240 | 
241 |             #==============[ save to database, save epoch error]==============#
242 |             if self.log.save_to_database:
243 |                 self.log._save_to_database(epoch, train_cost, valid_error, best_valid_error)
244 |                 self.log.info('..sent to database: %s:%s' % (self.log.save_to_database['name'],
245 |                                                              self.log.experiment_name))
246 | 
247 |             if self.log.save_epoch_error:
248 |                 self.log._save_epoch_error(epoch, train_cost, valid_cost, valid_error)
249 |                 self.log.info('..epoch error saved')
250 | 
251 |             end_time = time.time()
252 | 
253 |             #=====================[ log outputs to file ]=====================#
254 |             merged_train = merge_lists(train_stats_names, train_stats_values)
255 |             merged_valid = merge_lists(valid_stats_names, valid_stats_values)
256 | 
257 |             outputs = [('epoch', epoch),
258 |                         ('runtime(s)', int(end_time-start_time)),
259 |                         ('train_' + self.train_cost.func_name, train_cost),
260 |                         ('valid_' + self.train_cost.func_name, valid_cost),
261 |                         ('valid_' + self.valid_cost.func_name, valid_error),
262 |                         ('best_valid_' + self.valid_cost.func_name, best_valid_error)]
263 | 
264 |             outputs += merged_train + merged_valid
265 |             self.log._log_outputs(outputs)
266 | 
267 | 
268 |         job_end = time.time()
269 |         self.log.info('Job Completed on %s'%time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(job_end)))
270 |         ttl_time = int(job_end - job_start)
271 |         dt = datetime.timedelta(seconds=ttl_time)
272 |         self.log.info('Total Time Taken: %s'%str(dt))
273 |         self.log.info("========[ End of Job ]========\n\n")
274 | 
275 | 
276 |     def continue_learning(self, epoch, error_dcr, best_valid_error):
277 | 
278 |         if epoch > self.stop_criteria['max_epoch']:
279 |             return False
280 | 
281 |         elif self.stop_criteria['percent_decrease'] is None or \
282 |             self.stop_criteria['epoch_look_back'] is None:
283 |             return True
284 | 
285 |         elif np.abs(float(error_dcr) / self.best_valid_last_update) \
286 |             >= self.stop_criteria['percent_decrease']:
287 |             self.best_valid_last_update = best_valid_error
288 |             self.best_epoch_last_update = epoch
289 |             return True
290 | 
291 |         elif epoch - self.best_epoch_last_update > \
292 |             self.stop_criteria['epoch_look_back']:
293 |             return False
294 | 
295 |         else:
296 |             return True
297 | 


--------------------------------------------------------------------------------
/mozi/datasets/preprocessor.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | Functionality for preprocessing Datasets. With Preprocessor, GCN, Standardize adapted from pylearn2
  4 | """
  5 | 
  6 | import sys
  7 | import copy
  8 | import logging
  9 | import time
 10 | import warnings
 11 | import numpy as np
 12 | import sklearn.preprocessing as preproc
 13 | try:
 14 |     from scipy import linalg
 15 | except ImportError:
 16 |     warnings.warn("Could not import scipy.linalg")
 17 | from theano import function
 18 | import theano.tensor as T
 19 | import theano
 20 | import scipy
 21 | 
 22 | log = logging.getLogger(__name__)
 23 | 
 24 | class Preprocessor(object):
 25 |     """
 26 |         Adapted from pylearn2
 27 | 
 28 |         Abstract class.
 29 | 
 30 |         An object that can preprocess a dataset.
 31 | 
 32 |         Preprocessing a dataset implies changing the data that
 33 |         a dataset actually stores. This can be useful to save
 34 |         memory--if you know you are always going to access only
 35 |         the same processed version of the dataset, it is better
 36 |         to process it once and discard the original.
 37 | 
 38 |         Preprocessors are capable of modifying many aspects of
 39 |         a dataset. For example, they can change the way that it
 40 |         converts between different formats of data. They can
 41 |         change the number of examples that a dataset stores.
 42 |         In other words, preprocessors can do a lot more than
 43 |         just example-wise transformations of the examples stored
 44 |         in the dataset.
 45 |     """
 46 | 
 47 |     def apply(self, X):
 48 |         """
 49 |             dataset: The dataset to act on.
 50 |             can_fit: If True, the Preprocessor can adapt internal parameters
 51 |                      based on the contents of dataset. Otherwise it must not
 52 |                      fit any parameters, or must re-use old ones.
 53 | 
 54 |             Typical usage:
 55 |                 # Learn PCA preprocessing and apply it to the training set
 56 |                 my_pca_preprocessor.apply(training_set, can_fit = True)
 57 |                 # Now apply the same transformation to the test set
 58 |                 my_pca_preprocessor.apply(test_set, can_fit = False)
 59 | 
 60 |             Note: this method must take a dataset, rather than a numpy ndarray,
 61 |                   for a variety of reasons:
 62 |                       1) Preprocessors should work on any dataset, and not all
 63 |                          datasets will store their data as ndarrays.
 64 |                       2) Preprocessors often need to change a dataset's metadata.
 65 |                          For example, suppose you have a DenseDesignMatrix dataset
 66 |                          of images. If you implement a fovea Preprocessor that
 67 |                          reduces the dimensionality of images by sampling them finely
 68 |                          near the center and coarsely with blurring at the edges,
 69 |                          then your preprocessor will need to change the way that the
 70 |                          dataset converts example vectors to images for visualization.
 71 |         """
 72 | 
 73 |         raise NotImplementedError(str(type(self))+" does not implement an apply method.")
 74 | 
 75 |     def invert(self, X):
 76 |         """
 77 |         Do any necessary prep work to be able to support the "inverse" method
 78 |         later. Default implementation is no-op.
 79 |         """
 80 |         raise NotImplementedError(str(type(self))+" does not implement an invert method.")
 81 | 
 82 | class ExamplewisePreprocessor(Preprocessor):
 83 |     """
 84 |         Abstract class.
 85 | 
 86 |         A Preprocessor that restricts the actions it can do in its
 87 |         apply method so that it could be implemented as a Block's
 88 |         perform method.
 89 | 
 90 |         In other words, this Preprocessor can't modify the Dataset's
 91 |         metadata, etc.
 92 | 
 93 |         TODO: can these things fit themselves in their apply method?
 94 |         That seems like a difference from Block.
 95 |     """
 96 | 
 97 |     def as_block(self):
 98 |         raise NotImplementedError(str(type(self))+" does not implement as_block.")
 99 | 
100 | class Standardize(ExamplewisePreprocessor):
101 |     """
102 |     Adapted from pylearn2
103 |     Subtracts the mean and divides by the standard deviation.
104 |     """
105 |     def __init__(self, global_mean=None, global_std=None, std_eps=1e-4):
106 |         """
107 |         Initialize a Standardize preprocessor.
108 | 
109 |         Parameters
110 |         ----------
111 |         global_mean : bool
112 |             If `True`, subtract the (scalar) mean over every element
113 |             in the design matrix. If `False`, subtract the mean from
114 |             each column (feature) separately. Default is `False`.
115 |         global_std : bool
116 |             If `True`, after centering, divide by the (scalar) standard
117 |             deviation of every element in the design matrix. If `False`,
118 |             divide by the column-wise (per-feature) standard deviation.
119 |             Default is `False`.
120 |         std_eps : float
121 |             Stabilization factor added to the standard deviations before
122 |             dividing, to prevent standard deviations very close to zero
123 |             from causing the feature values to blow up too much.
124 |             Default is `1e-4`.
125 |         """
126 |         self._std_eps = std_eps
127 |         self._mean = global_mean
128 |         self._std = global_std
129 | 
130 |     def apply(self, X):
131 |         if self._mean is None:
132 |             self._mean = X.mean(axis=0)
133 |         if self._std is None:
134 |             self._std = X.std(axis=0)
135 |         X = (X - self._mean) / (self._std_eps + self._std)
136 |         return X
137 | 
138 |     def invert(self, X):
139 |         return X * (self._std_eps + self._std) + self._mean
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | class GCN(Preprocessor):
147 | 
148 |     """
149 |     Adapted from pylearn2
150 |     Global contrast normalizes by (optionally) subtracting the mean
151 |     across features and then normalizes by either the vector norm
152 |     or the standard deviation (across features, for each example).
153 | 
154 |     Parameters
155 |     ----------
156 |     X : ndarray, 2-dimensional
157 |         Design matrix with examples indexed on the first axis and
158 |         features indexed on the second.
159 | 
160 |     scale : float, optional
161 |         Multiply features by this const.
162 | 
163 |     subtract_mean : bool, optional
164 |         Remove the mean across features/pixels before normalizing.
165 |         Defaults to `False`.
166 | 
167 |     use_std : bool, optional
168 |         Normalize by the per-example standard deviation across features
169 |         instead of the vector norm.
170 | 
171 |     sqrt_bias : float, optional
172 |         Fudge factor added inside the square root. Defaults to 0.
173 | 
174 |     min_divisor : float, optional
175 |         If the divisor for an example is less than this value,
176 |         do not apply it. Defaults to `1e-8`.
177 |     """
178 | 
179 |     def __init__(self, scale=1., subtract_mean=True, use_std=False,
180 |                 sqrt_bias=0., min_divisor=1e-8):
181 | 
182 |         self.scale = scale
183 |         self.subtract_mean = subtract_mean
184 |         self.use_std = use_std
185 |         self.sqrt_bias = sqrt_bias
186 |         self.min_divisor = min_divisor
187 | 
188 |     def apply(self, X):
189 |         """
190 |         Returns
191 |         -------
192 |         Xp : ndarray, 2-dimensional
193 |             The contrast-normalized features.
194 | 
195 |         Notes
196 |         -----
197 |         `sqrt_bias` = 10 and `use_std = True` (and defaults for all other
198 |         parameters) corresponds to the preprocessing used in [1].
199 | 
200 |         .. [1] A. Coates, H. Lee and A. Ng. "An Analysis of Single-Layer
201 |            Networks in Unsupervised Feature Learning". AISTATS 14, 2011.
202 |            http://www.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf
203 |         """
204 |         assert X.ndim == 2, "X.ndim must be 2"
205 |         scale = float(self.scale)
206 |         # Note: this is per-example mean across pixels, not the
207 |         # per-pixel mean across examples. So it is perfectly fine
208 |         # to subtract this without worrying about whether the current
209 |         # object is the train, valid, or test set.
210 |         if self.subtract_mean:
211 |             self.mean = np.mean(X, axis=1)
212 |             X = X - self.mean[:, np.newaxis]  # Makes a copy.
213 |         else:
214 |             X = X.copy()
215 | 
216 |         if self.use_std:
217 |             # ddof=1 simulates MATLAB's var() behaviour, which is what Adam
218 |             # Coates' code does.
219 |             self.normalizers = np.sqrt(self.sqrt_bias + X.var(axis=1, ddof=1)) / scale
220 |         else:
221 |             self.normalizers = np.sqrt(self.sqrt_bias + (X ** 2).sum(axis=1)) / scale
222 |         # Don't normalize by anything too small.
223 |         self.normalizers[self.normalizers < self.min_divisor] = 1.
224 |         X = X / self.normalizers[:, np.newaxis]  # Does not make a copy.
225 |         return X
226 | 
227 |     def invert(self, X):
228 |         try:
229 |             if self.subtract_mean:
230 |                 X = X + self.mean
231 |             rval = X * self.normalizers[:, np.newaxis]
232 |             return rval
233 |         except AttributeError:
234 |             print 'apply() needs to be used before invert()'
235 |         except:
236 |             print "Unexpected error:", sys.exc_info()[0]
237 | 
238 | 
239 | class GCN_IMG(GCN):
240 | 
241 |     def apply(self, X):
242 |         assert X.ndim == 4, 'img dimension should be 4 of (b, c, h, w)'
243 |         b, c, h, w = X.shape
244 |         newX = super(GCN_IMG, self).apply(X.reshape((b*c, h*w)))
245 |         return newX.reshape((b,c,h,w))
246 | 
247 |     def invert(self, X):
248 |         assert X.ndim == 4, 'img dimension should be 4 of (b, c, h, w)'
249 |         b, c, h, w = X.shape
250 |         newX = super(GCN_IMG, self).invert(X.reshape((b*c, h*w)))
251 |         return newX.reshape((b,c,h,w))
252 | 
253 | 
254 | class LogGCN(GCN):
255 | 
256 |     def __init__(self, positive_values=True, **kwarg):
257 |         '''
258 |         postive_values: bool
259 |             indicates whether the output of the processor should be scaled to be positive
260 |         '''
261 |         self.positive_values = positive_values
262 |         super(LogGCN, self).__init__(**kwarg)
263 | 
264 |     def apply(self, X):
265 |         if self.positive_values:
266 |             rval = X + 1
267 |         rval = np.log(rval)
268 |         return super(LogGCN, self).apply(rval)
269 | 
270 |     def invert(self, X):
271 |         X = super(LogGCN, self).invert(X)
272 |         if self.positive_values:
273 |             return np.exp(X) - 1
274 |         else:
275 |             return np.exp(X)
276 | 
277 | class Log(Preprocessor):
278 | 
279 |     def __init__(self, positive_values=False, **kwarg):
280 |         '''
281 |         postive_values: bool
282 |             indicates whether the output of the processor should be scaled to be positive
283 |         '''
284 |         self.positive_values = positive_values
285 | 
286 |     def apply(self, X):
287 |         if self.positive_values:
288 |             X = X + 1
289 |         return np.log(X)
290 | 
291 |     def invert(self, X):
292 |         if self.positive_values:
293 |             return np.exp(X) - 1
294 |         else:
295 |             return np.exp(X)
296 | 
297 | 
298 | class Scale(Preprocessor):
299 | 
300 |     """
301 |     Scale the input into a range
302 | 
303 |     Parameters
304 |     ----------
305 |     X : ndarray, 2-dimensional
306 |         numpy matrix with examples indexed on the first axis and
307 |         features indexed on the second.
308 | 
309 |     global_max : real
310 |         the maximum value of the whole dataset. If not provided, global_max is set to X.max()
311 | 
312 |     global_min : real
313 |         the minimum value of the whole dataset. If not provided, global_min is set to X.min()
314 | 
315 |     scale_range : size 2 list
316 |         set the upper bound and lower bound after scaling
317 | 
318 |     buffer : float
319 |         the buffer on the upper lower bound such that [L+buffer, U-buffer]
320 |     """
321 | 
322 | 
323 |     def __init__(self, global_max=None, global_min=None, scale_range=[-1,1], buffer=0.1):
324 | 
325 |         self.scale_range = scale_range
326 |         self.buffer = buffer
327 |         self.max = global_max
328 |         self.min = global_min
329 |         assert scale_range[0] + buffer < scale_range[1] - buffer, \
330 |                 'the lower bound is larger than the upper bound'
331 | 
332 |     def apply(self, X):
333 |         self.max = self.max if self.max else X.max()
334 |         self.min = self.min if self.min else X.min()
335 |         width = self.max - self.min
336 |         assert width > 0, 'the max is not bigger than the min'
337 |         scale = (self.scale_range[1] - self.scale_range[0] - 2 * self.buffer) / width
338 |         X = scale * (X - self.min)
339 |         X = X + self.scale_range[0] + self.buffer
340 | 
341 |         return X
342 | 
343 |     def invert(self, X):
344 |         if self.max is None or self.min is None:
345 |             raise ValueError('to use invert, either global_max and global_min are provided or \
346 |                                 apply(X) is used before')
347 |         width = self.max - self.min
348 |         assert width > 0, 'the max is not bigger than the min'
349 |         scale = width / (self.scale_range[1] - self.scale_range[0] - 2 * self.buffer)
350 |         X = scale * (X - self.scale_range[0] - self.buffer)
351 |         X = X + self.min
352 | 
353 |         return X
354 | 
355 | 
356 | class Pipeline(Preprocessor):
357 | 
358 |     def __init__(self, preprocessors, inplace=False):
359 |         self.preprocessors = preprocessors
360 |         self.inplace = inplace
361 | 
362 |     def apply(self, X):
363 |         newX = X
364 |         if not self.inplace:
365 |             newX = X.copy()
366 |         for proc in self.preprocessors:
367 |             newX = proc.apply(newX)
368 |         return newX
369 | 
370 |     def invert(self, X):
371 |         newX = X.copy()
372 |         for proc in self.preprocessors:
373 |             newX = proc.invert(newX)
374 |         return newX
375 | 
376 | class Normalize(Preprocessor):
377 | 
378 |     def __init__(self, norm='l2', axis=1, channelwise=False):
379 |         """
380 |         normalize each data vector to unit length
381 | 
382 |         Parameters
383 |         ----------
384 |         X : ndarray, 2-dimensional
385 |             numpy matrix with examples indexed on the first axis and
386 |             features indexed on the second.
387 |         norm : l1, l2 or max
388 |         channelwise: apply preprocessing channelwise
389 |         """
390 |         self.norm = norm
391 |         self.axis = axis
392 |         self.channelwise = channelwise
393 | 
394 | 
395 |     def apply(self, X):
396 |         if X.ndim == 4 and self.channelwise:
397 |             shape = X.shape
398 |             flattern_X = np.reshape(X, (shape[0]*shape[1], shape[2]*shape[3]))
399 |             flattern_X = preproc.normalize(flattern_X, norm=self.norm, axis=1, copy=True)
400 |             return flattern_X.reshape(shape)
401 | 
402 |         if X.ndim > 2:
403 |             shape = X.shape
404 |             flattern_X = np.reshape(X, (shape[0], np.prod(shape[1:])))
405 |             flattern_X = preproc.normalize(flattern_X, norm=self.norm, axis=self.axis, copy=True)
406 |             return flattern_X.reshape(shape)
407 | 
408 |         return preproc.normalize(X, norm=self.norm, axis=self.axis, copy=True)
409 | 
410 | 
411 | class Sigmoid(Preprocessor):
412 | 
413 |     def apply(self, X):
414 |         return 1 / (1 + np.exp(-X))
415 | 
416 |     def invert(self, X):
417 |         return np.log(X / (1-X + 1e-9))
418 | 
419 | 
420 | class ZCA(Preprocessor):
421 | 
422 |     """
423 |     from pylearn2
424 |     Performs ZCA whitening.
425 |     .. TODO::
426 |         WRITEME properly
427 |         add reference
428 |     Parameters
429 |     ----------
430 |     n_components : integer, optional
431 |         Keeps the n_components biggest eigenvalues and corresponding
432 |         eigenvectors of covariance matrix.
433 |     n_drop_components : integer, optional
434 |         Drops the n_drop_components smallest eigenvalues and corresponding
435 |         eigenvectors of covariance matrix. Will only drop components
436 |         when n_components is not set i.e. n_components has preference over
437 |         n_drop_components.
438 |     filter_bias : float, optional
439 |         TODO: verify that default of 0.1 is what was used in the
440 |         Coates and Ng paper, add reference
441 |     store_inverse : bool, optional
442 |         When self.apply(dataset, can_fit=True) store not just the
443 |         preprocessing matrix, but its inverse. This is necessary when
444 |         using this preprocessor to instantiate a ZCA_Dataset.
445 |     """
446 | 
447 |     def __init__(self, n_components=None, n_drop_components=None,
448 |                  filter_bias=0.1, store_inverse=True):
449 |         warnings.warn("This ZCA preprocessor class is known to yield very "
450 |                       "different results on different platforms. If you plan "
451 |                       "to conduct experiments with this preprocessing on "
452 |                       "multiple machines, it is probably a good idea to do "
453 |                       "the preprocessing on a single machine and copy the "
454 |                       "preprocessed datasets to the others, rather than "
455 |                       "preprocessing the data independently in each "
456 |                       "location.")
457 |         # TODO: test to see if differences across platforms
458 |         # e.g., preprocessing STL-10 patches in LISA lab versus on
459 |         # Ian's Ubuntu 11.04 machine
460 |         # are due to the problem having a bad condition number or due to
461 |         # different version numbers of scipy or something
462 |         self.n_components = n_components
463 |         self.n_drop_components = n_drop_components
464 |         self.copy = True
465 |         self.filter_bias = np.cast[theano.config.floatX](filter_bias)
466 |         self.has_fit_ = False
467 |         self.store_inverse = store_inverse
468 |         self.P_ = None  # set by fit()
469 |         self.inv_P_ = None  # set by fit(), if self.store_inverse is True
470 | 
471 |         # Analogous to DenseDesignMatrix.design_loc. If not None, the
472 |         # matrices P_ and inv_P_ will be saved together in <save_path>
473 |         # (or <save_path>.npz, if the suffix is omitted).
474 |         self.matrices_save_path = None
475 | 
476 |     @staticmethod
477 |     def _gpu_matrix_dot(matrix_a, matrix_b, matrix_c=None):
478 |         """
479 |         Performs matrix multiplication.
480 |         Attempts to use the GPU if it's available. If the matrix multiplication
481 |         is too big to fit on the GPU, this falls back to the CPU after throwing
482 |         a warning.
483 |         Parameters
484 |         ----------
485 |         matrix_a : WRITEME
486 |         matrix_b : WRITEME
487 |         matrix_c : WRITEME
488 |         """
489 |         if not hasattr(ZCA._gpu_matrix_dot, 'theano_func'):
490 |             ma, mb = T.matrices('A', 'B')
491 |             mc = T.dot(ma, mb)
492 |             ZCA._gpu_matrix_dot.theano_func = \
493 |                 theano.function([ma, mb], mc, allow_input_downcast=True)
494 | 
495 |         theano_func = ZCA._gpu_matrix_dot.theano_func
496 | 
497 |         try:
498 |             if matrix_c is None:
499 |                 return theano_func(matrix_a, matrix_b)
500 |             else:
501 |                 matrix_c[...] = theano_func(matrix_a, matrix_b)
502 |                 return matrix_c
503 |         except MemoryError:
504 |             warnings.warn('Matrix multiplication too big to fit on GPU. '
505 |                           'Re-doing with CPU. Consider using '
506 |                           'THEANO_FLAGS="device=cpu" for your next '
507 |                           'preprocessor run')
508 |             return np.dot(matrix_a, matrix_b, matrix_c)
509 | 
510 |     @staticmethod
511 |     def _gpu_mdmt(mat, diags):
512 |         """
513 |         Performs the matrix multiplication M * D * M^T.
514 |         First tries to do this on the GPU. If this throws a MemoryError, it
515 |         falls back to the CPU, with a warning message.
516 |         Parameters
517 |         ----------
518 |         mat : WRITEME
519 |         diags : WRITEME
520 |         """
521 | 
522 |         floatX = theano.config.floatX
523 | 
524 |         # compile theano function
525 |         if not hasattr(ZCA._gpu_mdmt, 'theano_func'):
526 |             t_mat = T.matrix('M')
527 |             t_diags = T.vector('D')
528 |             result = T.dot(t_mat * t_diags, t_mat.T)
529 |             ZCA._gpu_mdmt.theano_func = theano.function(
530 |                 [t_mat, t_diags],
531 |                 result,
532 |                 allow_input_downcast=True)
533 | 
534 |         try:
535 |             # function()-call above had to downcast the data. Emit warnings.
536 |             if str(mat.dtype) != floatX:
537 |                 warnings.warn('Implicitly converting mat from dtype=%s to '
538 |                               '%s for gpu' % (mat.dtype, floatX))
539 |             if str(diags.dtype) != floatX:
540 |                 warnings.warn('Implicitly converting diag from dtype=%s to '
541 |                               '%s for gpu' % (diags.dtype, floatX))
542 | 
543 |             return ZCA._gpu_mdmt.theano_func(mat, diags)
544 | 
545 |         except MemoryError:
546 |             # fall back to cpu
547 |             warnings.warn('M * D * M^T was too big to fit on GPU. '
548 |                           'Re-doing with CPU. Consider using '
549 |                           'THEANO_FLAGS="device=cpu" for your next '
550 |                           'preprocessor run')
551 |             return np.dot(mat * diags, mat.T)
552 | 
553 |     def set_matrices_save_path(self, matrices_save_path):
554 |         """
555 |         Analogous to DenseDesignMatrix.use_design_loc().
556 |         If a matrices_save_path is set, when this ZCA is pickled, the internal
557 |         parameter matrices will be saved separately to `matrices_save_path`, as
558 |         a numpy .npz archive. This uses half the memory that a normal pickling
559 |         does.
560 |         Parameters
561 |         ----------
562 |         matrices_save_path : WRITEME
563 |         """
564 |         if matrices_save_path is not None:
565 |             assert isinstance(matrices_save_path, str)
566 |             matrices_save_path = os.path.abspath(matrices_save_path)
567 | 
568 |             if os.path.isdir(matrices_save_path):
569 |                 raise IOError('Matrix save path "%s" must not be an existing '
570 |                               'directory.')
571 | 
572 |             assert matrices_save_path[-1] not in ('/', '\\')
573 |             if not os.path.isdir(os.path.split(matrices_save_path)[0]):
574 |                 raise IOError('Couldn\'t find parent directory:\n'
575 |                               '\t"%s"\n'
576 |                               '\t of matrix path\n'
577 |                               '\t"%s"')
578 | 
579 |         self.matrices_save_path = matrices_save_path
580 | 
581 |     def __getstate__(self):
582 |         """
583 |         Used by pickle.  Returns a dictionary to pickle in place of
584 |         self.__dict__.
585 |         If self.matrices_save_path is set, this saves the matrices P_ and
586 |         inv_P_ separately in matrices_save_path as a .npz archive, which uses
587 |         much less space & memory than letting pickle handle them.
588 |         """
589 |         result = copy.copy(self.__dict__)  # shallow copy
590 |         if self.matrices_save_path is not None:
591 |             matrices = {'P_': self.P_}
592 |             if self.inv_P_ is not None:
593 |                 matrices['inv_P_'] = self.inv_P_
594 | 
595 |             np.savez(self.matrices_save_path, **matrices)
596 | 
597 |             # Removes the matrices from the dictionary to be pickled.
598 |             for key, matrix in matrices.items():
599 |                 del result[key]
600 | 
601 |         return result
602 | 
603 |     def __setstate__(self, state):
604 |         """
605 |         Used to unpickle.
606 |         Parameters
607 |         ----------
608 |         state : dict
609 |             The dictionary created by __setstate__, presumably unpickled
610 |             from disk.
611 |         """
612 | 
613 |         # Patch old pickle files
614 |         if 'matrices_save_path' not in state:
615 |             state['matrices_save_path'] = None
616 | 
617 |         if state['matrices_save_path'] is not None:
618 |             matrices = np.load(state['matrices_save_path'])
619 | 
620 |             # puts matrices' items into state, overriding any colliding keys in
621 |             # state.
622 |             state = dict(state.items() + matrices.items())
623 |             del matrices
624 | 
625 |         self.__dict__.update(state)
626 | 
627 |         if not hasattr(self, "inv_P_"):
628 |             self.inv_P_ = None
629 | 
630 |     def fit(self, X):
631 |         """
632 |         Fits this `ZCA` instance to a design matrix `X`.
633 |         Parameters
634 |         ----------
635 |         X : ndarray
636 |             A matrix where each row is a datum.
637 |         Notes
638 |         -----
639 |         Implementation details:
640 |         Stores result as `self.P_`.
641 |         If self.store_inverse is true, this also computes `self.inv_P_`.
642 |         """
643 | 
644 |         assert X.dtype in ['float32', 'float64']
645 |         assert not np.any(np.isnan(X))
646 |         assert len(X.shape) == 2
647 |         n_samples = X.shape[0]
648 |         if self.copy:
649 |             X = X.copy()
650 |         # Center data
651 |         self.mean_ = np.mean(X, axis=0)
652 |         X -= self.mean_
653 | 
654 |         log.info('computing zca of a {0} matrix'.format(X.shape))
655 |         t1 = time.time()
656 | 
657 |         bias = self.filter_bias * scipy.sparse.identity(X.shape[1],
658 |                                                         theano.config.floatX)
659 | 
660 |         covariance = ZCA._gpu_matrix_dot(X.T, X) / X.shape[0] + bias
661 |         t2 = time.time()
662 |         log.info("cov estimate took {0} seconds".format(t2 - t1))
663 | 
664 |         t1 = time.time()
665 |         eigs, eigv = linalg.eigh(covariance)
666 |         t2 = time.time()
667 | 
668 |         log.info("eigh() took {0} seconds".format(t2 - t1))
669 |         assert not np.any(np.isnan(eigs))
670 |         assert not np.any(np.isnan(eigv))
671 |         assert eigs.min() > 0
672 | 
673 |         if self.n_components and self.n_drop_components:
674 |             raise ValueError('Either n_components or n_drop_components'
675 |                              'should be specified')
676 | 
677 |         if self.n_components:
678 |             eigs = eigs[-self.n_components:]
679 |             eigv = eigv[:, -self.n_components:]
680 | 
681 |         if self.n_drop_components:
682 |             eigs = eigs[self.n_drop_components:]
683 |             eigv = eigv[:, self.n_drop_components:]
684 | 
685 |         t1 = time.time()
686 | 
687 |         sqrt_eigs = np.sqrt(eigs)
688 |         try:
689 |             self.P_ = ZCA._gpu_mdmt(eigv, 1.0 / sqrt_eigs)
690 |         except MemoryError:
691 |             warnings.warn()
692 |             self.P_ = np.dot(eigv * (1.0 / sqrt_eigs), eigv.T)
693 | 
694 |         t2 = time.time()
695 |         assert not np.any(np.isnan(self.P_))
696 |         self.has_fit_ = True
697 | 
698 |         if self.store_inverse:
699 |             self.inv_P_ = ZCA._gpu_mdmt(eigv, sqrt_eigs)
700 |         else:
701 |             self.inv_P_ = None
702 | 
703 |     def apply(self, X, can_fit=True):
704 |         """
705 |         .. todo::
706 |             WRITEME
707 |         """
708 |         # Compiles apply.x_minus_mean_times_p(), a numeric Theano function that
709 |         # evauates dot(X - mean, P)
710 |         if not hasattr(ZCA, '_x_minus_mean_times_p'):
711 |             x_symbol = T.matrix('X')
712 |             mean_symbol = T.vector('mean')
713 |             p_symbol = T.matrix('P_')
714 |             new_x_symbol = T.dot(x_symbol - mean_symbol, p_symbol)
715 |             ZCA._x_minus_mean_times_p = theano.function([x_symbol,
716 |                                                          mean_symbol,
717 |                                                          p_symbol],
718 |                                                         new_x_symbol)
719 | 
720 |         assert X.dtype in ['float32', 'float64']
721 |         if not self.has_fit_:
722 |             assert can_fit
723 |             self.fit(X)
724 | 
725 |         new_X = ZCA._gpu_matrix_dot(X - self.mean_, self.P_)
726 |         return new_X
727 | 
728 |     def invert(self, X):
729 |         """
730 |         .. todo::
731 |             WRITEME
732 |         """
733 |         assert X.ndim == 2
734 | 
735 |         if self.inv_P_ is None:
736 |             warnings.warn("inv_P_ was None. Computing "
737 |                           "inverse of P_ now. This will take "
738 |                           "some time. For efficiency, it is recommended that "
739 |                           "in the future you compute the inverse in ZCA.fit() "
740 |                           "instead, by passing it store_inverse=True.")
741 |             log.info('inverting...')
742 |             self.inv_P_ = np.linalg.inv(self.P_)
743 |             log.info('...done inverting')
744 | 
745 |         return self._gpu_matrix_dot(X, self.inv_P_) + self.mean_
746 | 


--------------------------------------------------------------------------------