├── .DS_Store
├── .gitignore
├── almost_log_gamma.m
├── autodisc
    ├── DexNet.py
    ├── LICENSE
    ├── MnistTests.py
    ├── NetTrainers.py
    ├── load_data.py
    ├── output_losses.py
    └── utils.py
├── basic_sear
    ├── FrankeNet.py
    ├── LICENSE
    ├── LICENSE.md
    ├── MnistTests.py
    ├── NetTrainers.py
    ├── load_data.py
    ├── output_losses.py
    └── utils.py
├── generalized_ear
    ├── ConvDemo.py
    ├── EarNet.py
    ├── LICENSE
    ├── MnistTests.py
    ├── NetLayers.py
    ├── NetTrainers.py
    ├── load_data.py
    ├── output_losses.py
    └── utils.py
├── generative_models
    ├── ADPair.py
    ├── AEDPair.py
    ├── BlocksAttention.py
    ├── BlocksModels.py
    ├── ClassModel.py
    ├── DKCode.py
    ├── GCPair.py
    ├── GIPair.py
    ├── GIPair2.py
    ├── GIStack.py
    ├── GITonGIP.py
    ├── GITrip.py
    ├── GPSImputer.py
    ├── HelperFuncs.py
    ├── HydraNet.py
    ├── InfNet.py
    ├── LICENSE
    ├── LogPDFs.py
    ├── MCSampler.py
    ├── MSDUtils.py
    ├── MnistTests.py
    ├── MnistWalkReg.py
    ├── MnistWalkoutTest.py
    ├── MultiStageModel.py
    ├── MultiStageModelSS.py
    ├── MultiStageModelSS2.py
    ├── NetLayers.py
    ├── OneStageModel.py
    ├── PeaNet.py
    ├── PeaNetSeq.py
    ├── SVHNWalkReg.py
    ├── SVHNWalkoutTest.py
    ├── TFDWalkoutTest.py
    ├── TempTests.py
    ├── TestBlocksCLModels.py
    ├── TestBlocksDDModels.py
    ├── TestBlocksESModels.py
    ├── TestBlocksImpModels.py
    ├── TestBlocksOLModels.py
    ├── TestClassModel.py
    ├── TestImpGPSI_MNIST.py
    ├── TestImpGPSI_SVHN.py
    ├── TestImpGPSI_TFD.py
    ├── TestImpTM.py
    ├── TestImpVAE.py
    ├── TestMSM.py
    ├── TestMSMSS.py
    ├── TestMSMSS2.py
    ├── TestTSM.py
    ├── TwoStageModel.py
    ├── VCGLoop.py
    ├── VideoUtils.py
    ├── WalkoutResults.py
    ├── blocks_models
    │   ├── attention.py
    │   ├── binarized_mnist_converter.py
    │   ├── lib
    │   │   ├── __init__.py
    │   │   ├── myutils.py
    │   │   └── prob_layers.py
    │   ├── models.py
    │   ├── plot-log.py
    │   ├── run-att-rw.py
    │   ├── sample.py
    │   ├── simple_script.sh
    │   ├── train-dotmatrix.py
    │   ├── train-draw.py
    │   └── train-imodraw.py
    ├── load_data.py
    ├── output_losses.py
    ├── result_parsing_script.py
    └── utils.py
└── nlp
    ├── CorpusUtils.py
    ├── CythonFuncs.py
    ├── CythonFuncsPyx.pyx
    ├── DataLoaders.py
    ├── GPULayers.py
    ├── HelperFuncs.py
    ├── LICENSE.md
    ├── NLMLayers.py
    ├── NLModels.py
    ├── NumbaFuncs.py
    ├── TestCuBlas.py
    ├── gensim_code
        ├── GensimUtils.py
        ├── TestGensim.py
        ├── W2VInner.pyx
        └── W2VSimple.py
    ├── gnumpy.py
    ├── nlp_convnet
        ├── LNFuncs.py
        ├── LNLayers.py
        ├── LayerNets.py
        ├── STBTests.py
        └── StanfordTrees.py
    ├── npmat.py
    └── voidptr.h


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Philip-Bachman/NN-Python/e9a7619806c5ccbe2bd648b2a2e0af7967dc6996/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py~
 2 | *.swp
 3 | *.npy
 4 | *.csv
 5 | *.dat
 6 | *.data
 7 | *.npz
 8 | *.pkl
 9 | *.pkl.gz
10 | *.png
11 | *.pyc
12 | *.DS_STORE
13 | .DS_STORE
14 | *.DS_Store
15 | .DS_Store
16 | *.lprof
17 | /basic_sear/data/*
18 | /basic_sear/test_results/*
19 | /generalized_ear/data/*
20 | /generalized_ear/test_results/*
21 | /autodisc/data/*
22 | /nlp/trees/*
23 | /nlp/training_text/*
24 | 


--------------------------------------------------------------------------------
/almost_log_gamma.m:
--------------------------------------------------------------------------------
1 | %
2 | % APPROXIMATION FOR GAMMALN (I.E. THE LOG GAMMA FUNCTION)
3 | %
4 | small_approx = @( x, c ) log(1 ./ x) - (0.57721566490153 * x) + (c * x.^2);
5 | large_approx = @( x, c ) (((x-0.5) .* log(x)) - x) + 0.5*log(2*pi) + c*(1./x);
6 | 
7 | X = linspace(0.01, 4.0, 500);
8 | Y = almost_gammaln(X,0.25,0.025);


--------------------------------------------------------------------------------
/autodisc/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2014 Philip Bachman
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 
21 | 
22 | ********************************************************************************
23 | * The copyright notice below comes from code which has been _heavily_ modified *
24 | * in the production of the code in this directory.                             *
25 | ********************************************************************************
26 | 
27 | 
28 | Copyright (C) 2012 Misha Denil
29 | 
30 | Permission is hereby granted, free of charge, to any person obtaining a copy of
31 | this software and associated documentation files (the "Software"), to deal in
32 | the Software without restriction, including without limitation the rights to
33 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
34 | of the Software, and to permit persons to whom the Software is furnished to do
35 | so, subject to the following conditions:
36 | 
37 | The above copyright notice and this permission notice shall be included in all
38 | copies or substantial portions of the Software.
39 | 
40 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
43 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
44 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
45 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
46 | SOFTWARE.
47 | 
48 | 


--------------------------------------------------------------------------------
/autodisc/load_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cPickle
  3 | import gzip
  4 | import os
  5 | import sys
  6 | 
  7 | import theano
  8 | import theano.tensor as T
  9 | 
 10 | def _shared_dataset(data_xy):
 11 |     """ Function that loads the dataset into shared variables
 12 | 
 13 |     The reason we store our dataset in shared variables is to allow
 14 |     Theano to copy it into the GPU memory (when code is run on GPU).
 15 |     Since copying data into the GPU is slow, copying a minibatch everytime
 16 |     is needed (the default behaviour if the data is not in a shared
 17 |     variable) would lead to a large decrease in performance.
 18 |     """
 19 |     data_x, data_y = data_xy
 20 |     shared_x = theano.shared(np.asarray(data_x,
 21 |                                            dtype=theano.config.floatX))
 22 |     shared_y = theano.shared(np.asarray(data_y,
 23 |                                            dtype=theano.config.floatX))
 24 |     # When storing data on the GPU it has to be stored as floats
 25 |     # therefore we will store the labels as ``floatX`` as well
 26 |     # (``shared_y`` does exactly that).
 27 |     return shared_x, shared_y
 28 | 
 29 | def load_mnist(path, zero_mean=True):
 30 |     mnist = np.load(path)
 31 |     train_set_x = mnist['train_data']
 32 |     train_set_y = mnist['train_labels'] + 1
 33 |     test_set_x = mnist['test_data']
 34 |     test_set_y = mnist['test_labels'] + 1
 35 | 
 36 |     if zero_mean:
 37 |         obs_mean = np.mean(train_set_x, axis=0, keepdims=True)
 38 |         train_set_x = train_set_x - obs_mean
 39 |         test_set_x = test_set_x - obs_mean
 40 | 
 41 |     train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
 42 |     test_set_x, test_set_y = _shared_dataset((test_set_x, test_set_y))
 43 |     valid_set_x, valid_set_y = test_set_x, test_set_y
 44 | 
 45 |     rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
 46 |             (test_set_x, test_set_y)]
 47 |     return rval
 48 | 
 49 | def load_udm_ss(dataset, sup_count, rng, zero_mean=True):
 50 |     """Load semi-supervised version of the standard UdM MNIST data.
 51 | 
 52 |     For this, the training data is split into labeled and unlabeled portions.
 53 |     The number of labeled examples is 'sup_count', and an equal number of
 54 |     labeled examples will be selected for each class. The remaining (50000 -
 55 |     sup_count) examples are provided as unlabeled training data. The validate
 56 |     and test sets are left unchanged.
 57 | 
 58 |     Note: labels for the normal digit classes will range from 1-10, i.e. +1
 59 |     compared to their standard value, as 'un-classed' examples take label 0.
 60 |     """
 61 | 
 62 |     udm_data = load_udm(dataset, as_shared=False, zero_mean=zero_mean)
 63 |     Xtr = udm_data[0][0]
 64 |     Ytr = udm_data[0][1][:,np.newaxis]
 65 | 
 66 |     all_count = Xtr.shape[0]
 67 |     pc_count = int(np.ceil(sup_count / 10.0))
 68 |     sup_count = int(10 * pc_count)
 69 |     unsup_count = all_count - sup_count
 70 | 
 71 |     Xtr_su = []
 72 |     Ytr_su = []
 73 |     Xtr_un = []
 74 |     Ytr_un = []
 75 | 
 76 |     # Sample supervised and unsupervised subsets of each class' observations
 77 |     for c_label in np.unique(Ytr):
 78 |         c_idx = [i for i in range(all_count) if (Ytr[i] == c_label)]
 79 |         rng.shuffle(c_idx)
 80 |         Xtr_su.append(Xtr[c_idx[0:pc_count],:])
 81 |         Ytr_su.append(Ytr[c_idx[0:pc_count],:])
 82 |         Xtr_un.append(Xtr[c_idx[pc_count:],:])
 83 |         Ytr_un.append(Ytr[c_idx[pc_count:],:])
 84 | 
 85 |     # Stack per-class supervised/unsupervised splits into matrices
 86 |     Xtr_su = np.vstack(Xtr_su)
 87 |     Ytr_su = np.vstack(Ytr_su)
 88 |     Xtr_un = np.vstack(Xtr_un)
 89 |     Ytr_un = np.vstack(Ytr_un)
 90 |     # Also keep "unsupervised" copies of the "supervised" data
 91 |     Xtr_un = Xtr_un #np.vstack([Xtr_un, Xtr_su])
 92 |     Ytr_un = 0 * Ytr_un #np.vstack([Ytr_un, Ytr_su])
 93 | 
 94 |     # Shuffle the rows so that observations are not grouped by class
 95 |     shuf_idx = rng.permutation(Xtr_su.shape[0])
 96 |     Xtr_su = Xtr_su[shuf_idx,:]
 97 |     Ytr_su = Ytr_su[shuf_idx].ravel() + 1
 98 |     shuf_idx = rng.permutation(Xtr_un.shape[0])
 99 |     Xtr_un = Xtr_un[shuf_idx,:]
100 |     Ytr_un = Ytr_un[shuf_idx].ravel()
101 | 
102 |     # Put matrices into GPU shared variables, for great justice
103 |     Xtr_su, Ytr_su = _shared_dataset((Xtr_su, Ytr_su))
104 |     Xtr_un, Ytr_un = _shared_dataset((Xtr_un, Ytr_un))
105 |     Xva, Yva = _shared_dataset((udm_data[1][0], (udm_data[1][1] + 1)))
106 |     Xte, Yte = _shared_dataset((udm_data[2][0], (udm_data[2][1] + 1)))
107 | 
108 |     rval = [(Xtr_su, Ytr_su), (Xtr_un, Ytr_un), (Xva, Yva), (Xte, Yte)]
109 | 
110 |     return rval
111 | 
112 | def load_udm(dataset, as_shared=True, zero_mean=True):
113 |     """Loads the UdM train/validate/test split of MNIST."""
114 | 
115 |     #############
116 |     # LOAD DATA #
117 |     #############
118 | 
119 |     # Download the MNIST dataset if it is not present
120 |     data_dir, data_file = os.path.split(dataset)
121 |     if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
122 |         import urllib
123 |         origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
124 |         print 'Downloading data from %s' % origin
125 |         urllib.urlretrieve(origin, dataset)
126 | 
127 |     print '... loading data'
128 | 
129 |     # Load the dataset
130 |     f = gzip.open(dataset, 'rb')
131 |     train_set, valid_set, test_set = cPickle.load(f)
132 |     f.close()
133 |     #train_set, valid_set, test_set format: tuple(input, target)
134 |     #input is an np.ndarray of 2 dimensions (a matrix)
135 |     #witch row's correspond to an example. target is a
136 |     #np.ndarray of 1 dimensions (vector)) that have the same length as
137 |     #the number of rows in the input. It should give the target
138 |     #target to the example with the same index in the input.
139 |     train_set = [v for v in train_set]
140 |     valid_set = [v for v in valid_set]
141 |     test_set = [v for v in test_set]
142 |     train_set[0] = np.asarray(train_set[0]).astype(np.float32)
143 |     valid_set[0] = np.asarray(valid_set[0]).astype(np.float32)
144 |     test_set[0] = np.asarray(test_set[0]).astype(np.float32)
145 |     if zero_mean:
146 |         obs_mean = np.mean(train_set[0], axis=0, keepdims=True)
147 |         train_set[0] = train_set[0] - obs_mean
148 |         valid_set[0] = valid_set[0] - obs_mean
149 |         test_set[0] = test_set[0] - obs_mean
150 |     if as_shared:
151 |         test_set_x, test_set_y = _shared_dataset((test_set[0],test_set[1]+1))
152 |         valid_set_x, valid_set_y = _shared_dataset((valid_set[0],valid_set[1]+1))
153 |         train_set_x, train_set_y = _shared_dataset((train_set[0],train_set[1]+1))
154 |     else:
155 |         test_set_x, test_set_y = test_set
156 |         valid_set_x, valid_set_y = valid_set
157 |         train_set_x, train_set_y = train_set
158 | 
159 |     rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
160 |             (test_set_x, test_set_y)]
161 |     return rval
162 | 
163 | 


--------------------------------------------------------------------------------
/autodisc/output_losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import theano
  3 | import theano.tensor as T
  4 | 
  5 | class LogisticRegression(object):
  6 |     """Multi-class Logistic Regression loss dangler."""
  7 | 
  8 |     def __init__(self, linear_layer):
  9 |         """Dangle a logistic regression from the given linear layer.
 10 | 
 11 |         The given linear layer should be a HiddenLayer (or subclass) object,
 12 |         for HiddenLayer as defined in LayerNet.py."""
 13 |         self.input_layer = linear_layer
 14 | 
 15 |     def loss_func(self, y):
 16 |         """Return the multiclass logistic regression loss for y.
 17 | 
 18 |         The class labels in y are assumed to be in correspondence with the
 19 |         set of column indices for self.input_layer.linear_output.
 20 |         """
 21 |         p_y_given_x = T.nnet.softmax(self.input_layer.linear_output)
 22 |         loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]),y])
 23 |         return loss
 24 | 
 25 |     def errors(self, y):
 26 |         """Compute the number of wrong predictions by self.input_layer.
 27 | 
 28 |         Predicted class labels are computed as the indices of the columns of
 29 |         self.input_layer.linear_output which are maximal. Wrong predictions are
 30 |         those for which max indices do not match their corresponding y values.
 31 |         """
 32 |         # Compute class memberships predicted by self.input_layer
 33 |         y_pred = T.argmax(self.input_layer.linear_output, axis=1)
 34 |         errs = 0
 35 |         # check if y has same dimension of y_pred
 36 |         if y.ndim != y_pred.ndim:
 37 |             raise TypeError('y should have the same shape as self.y_pred',
 38 |                 ('y', y.type, 'y_pred', y_pred.type))
 39 |         # check if y is of the correct datatype
 40 |         if y.dtype.startswith('int'):
 41 |             # the T.neq operator returns a vector of 0s and 1s, where 1
 42 |             # represents a mistake in prediction
 43 |             errs = T.sum(T.neq(y_pred, y))
 44 |         else:
 45 |             raise NotImplementedError()
 46 |         return errs
 47 | 
 48 | class LogRegSS(object):
 49 |     """Multi-class semi-supervised Logistic Regression loss dangler."""
 50 | 
 51 |     def __init__(self, linear_layer):
 52 |         """Dangle a logistic regression from the given linear layer.
 53 | 
 54 |         The given linear layer should be a HiddenLayer (or subclass) object,
 55 |         for HiddenLayer as defined in LayerNet.py."""
 56 |         self.input_layer = linear_layer
 57 | 
 58 |     def safe_softmax_ss(self, x):
 59 |         """Softmax that shouldn't overflow."""
 60 |         e_x = T.exp(x - T.max(x, axis=1, keepdims=True))
 61 |         x_sm = e_x / T.sum(e_x, axis=1, keepdims=True)
 62 |         return x_sm
 63 | 
 64 |     def loss_func(self, y):
 65 |         """Return the multiclass logistic regression loss for y.
 66 | 
 67 |         The class labels in y are assumed to be in correspondence with the
 68 |         set of column indices for self.input_layer.linear_output.
 69 |         """
 70 |         row_idx = T.arange(y.shape[0])
 71 |         row_mask = T.neq(y, 0).reshape((y.shape[0], 1))
 72 |         p_y_given_x = self.safe_softmax_ss(self.input_layer.linear_output)
 73 |         wacky_mat = (p_y_given_x * row_mask) + (1. - row_mask)
 74 |         loss = -T.sum(T.log(wacky_mat[row_idx,y])) / T.sum(row_mask)
 75 |         return loss
 76 | 
 77 |     def errors(self, y):
 78 |         """Compute the number of wrong predictions by self.input_layer.
 79 | 
 80 |         Predicted class labels are computed as the indices of the columns of
 81 |         self.input_layer.linear_output which are maximal. Wrong predictions are
 82 |         those for which max indices do not match their corresponding y values.
 83 |         """
 84 |         # Compute class memberships predicted by self.input_layer
 85 |         y_pred = T.argmax(self.input_layer.linear_output[:,1:], axis=1)
 86 |         y_pred = y_pred + 1
 87 |         errs = 0
 88 |         # check if y has same dimension of y_pred
 89 |         if y.ndim != y_pred.ndim:
 90 |             raise TypeError('y should have the same shape as self.y_pred',
 91 |                 ('y', y.type, 'y_pred', y_pred.type))
 92 |         # check if y is of the correct datatype
 93 |         if y.dtype.startswith('int'):
 94 |             # the T.neq operator returns a vector of 0s and 1s, where 1
 95 |             # represents a mistake in prediction
 96 |             errs = T.sum(T.neq(y_pred, y) * T.neq(y, 0))
 97 |         else:
 98 |             raise NotImplementedError()
 99 |         return errs
100 | 
101 | class MCL2Hinge(object):
102 |     """Multi-class one-vs-all L2 hinge loss dangler."""
103 | 
104 |     def __init__(self, linear_layer):
105 |         """Dangle a squred hinge loss from the given linear layer.
106 | 
107 |         The given linear layer should be a HiddenLayer (or subclass) object,
108 |         for HiddenLayer as defined in LayerNet.py."""
109 |         self.input_layer = linear_layer
110 | 
111 |     def loss_func(self, y):
112 |         """Return the multiclass squared hinge loss for y.
113 | 
114 |         The class labels in y are assumed to be in correspondence with the
115 |         set of column indices for self.input_layer.linear_output.
116 |         """
117 |         y_hat = self.input_layer.linear_output
118 |         margin_pos = T.maximum(0.0, (1.0 - y_hat))
119 |         margin_neg = T.maximum(0.0, (1.0 + y_hat))
120 |         obs_idx = T.arange(y.shape[0])
121 |         loss_pos = T.sum(margin_pos[obs_idx,y]**2.0)
122 |         loss_neg = T.sum(margin_neg**2.0) - T.sum(margin_neg[obs_idx,y]**2.0)
123 |         loss = (loss_pos + loss_neg) / y.shape[0]
124 |         return loss
125 | 
126 |     def errors(self, y):
127 |         """Compute the number of wrong predictions by self.input_layer.
128 | 
129 |         Predicted class labels are computed as the indices of the columns of
130 |         self.input_layer.linear_output which are maximal. Wrong predictions are
131 |         those for which max indices do not match their corresponding y values.
132 |         """
133 |         # Compute class memberships predicted by self.input_layer
134 |         y_pred = T.argmax(self.input_layer.linear_output, axis=1)
135 |         errs = 0
136 |         # check if y has same dimension of y_pred
137 |         if y.ndim != y_pred.ndim:
138 |             raise TypeError('y should have the same shape as self.y_pred',
139 |                 ('y', y.type, 'y_pred', y_pred.type))
140 |         # check if y is of the correct datatype
141 |         if y.dtype.startswith('int'):
142 |             # the T.neq operator returns a vector of 0s and 1s, where 1
143 |             # represents a mistake in prediction
144 |             errs = T.sum(T.neq(y_pred, y))
145 |         else:
146 |             raise NotImplementedError()
147 |         return errs
148 | 
149 | class MCL2HingeSS(object):
150 |     """Multi-class one-vs-all L2 hinge loss dangler.
151 | 
152 |     For this loss, class index 0 is never penalized, and errors for inputs
153 |     with class index 0 are similarly ignored. This is for semi-supervised
154 |     training, constrained by Theano's programming model."""
155 | 
156 |     def __init__(self, linear_layer):
157 |         """Dangle a squred hinge loss from the given linear layer.
158 | 
159 |         The given linear layer should be a HiddenLayer (or subclass) object,
160 |         for HiddenLayer as defined in LayerNet.py."""
161 |         self.input_layer = linear_layer
162 | 
163 |     def loss_func(self, y):
164 |         """Return the multiclass squared hinge loss for y.
165 | 
166 |         The class labels in y are assumed to be in correspondence with the
167 |         set of column indices for self.input_layer.linear_output.
168 |         """
169 |         y_hat = self.input_layer.linear_output
170 |         row_idx = T.arange(y.shape[0])
171 |         row_mask = T.neq(y, 0).reshape((y_hat.shape[0], 1))
172 |         margin_pos = T.maximum(0.0, (1.0 - y_hat)) * row_mask
173 |         margin_neg = T.maximum(0.0, (1.0 + y_hat)) * row_mask
174 |         loss_pos = T.sum(margin_pos[row_idx,y]**2.0)
175 |         loss_neg = T.sum(margin_neg**2.0) - T.sum(margin_neg[row_idx,y]**2.0)
176 |         loss = (loss_pos + loss_neg) / T.sum(row_mask)
177 |         return loss
178 | 
179 |     def errors(self, y):
180 |         """Compute the number of wrong predictions by self.input_layer.
181 | 
182 |         Predicted class labels are computed as the indices of the columns of
183 |         self.input_layer.linear_output which are maximal. Wrong predictions are
184 |         those for which max indices do not match their corresponding y values.
185 |         """
186 |         # Compute class memberships predicted by self.input_layer
187 |         y_pred = T.argmax(self.input_layer.linear_output[:,1:], axis=1)
188 |         y_pred = y_pred + 1
189 |         errs = 0
190 |         # check if y has same dimension of y_pred
191 |         if y.ndim != y_pred.ndim:
192 |             raise TypeError('y should have the same shape as self.y_pred',
193 |                 ('y', y.type, 'y_pred', y_pred.type))
194 |         # check if y is of the correct datatype
195 |         if y.dtype.startswith('int'):
196 |             # the T.neq operator returns a vector of 0s and 1s, where 1
197 |             # represents a mistake in prediction
198 |             errs = T.sum(T.neq(y_pred, y) * T.neq(y, 0))
199 |         else:
200 |             raise NotImplementedError()
201 |         return errs
202 | 


--------------------------------------------------------------------------------
/autodisc/utils.py:
--------------------------------------------------------------------------------
  1 | """ This file contains different utility functions that are not connected
  2 | in anyway to the networks presented in the tutorials, but rather help in
  3 | processing the outputs into a more understandable way.
  4 | 
  5 | For example ``tile_raster_images`` helps in generating a easy to grasp
  6 | image from a set of samples or weights.
  7 | """
  8 | 
  9 | import numpy as np
 10 | import pylab as plt
 11 | import PIL as PIL
 12 | 
 13 | class batch(object):
 14 |     def __init__(self,batch_size):
 15 |         self.batch_size = batch_size
 16 | 
 17 |     def __call__(self,f):
 18 |         def wrapper(t,X):
 19 |             X = np.array(X)
 20 |             p = 0
 21 |             rem = 0
 22 |             results = []
 23 |             while p < len(X):
 24 |                 Z = X[p:p+self.batch_size]
 25 |                 if Z.shape[0] != self.batch_size:
 26 |                     zeros = np.zeros((self.batch_size-len(Z),X.shape[1]))
 27 |                     rem = len(Z)
 28 |                     Z = np.array(np.vstack((Z,zeros)),dtype=X.dtype)
 29 | 
 30 |                 temp_results = f(t,Z)
 31 |                 if rem != 0:
 32 |                     temp_results = temp_results[:rem]
 33 | 
 34 |                 results.extend(temp_results)
 35 |                 p += self.batch_size
 36 |             return np.array(results,dtype='float32')
 37 |         return wrapper
 38 | 
 39 | def scale_to_unit_interval(ndar, eps=1e-8):
 40 |     """ Scales all values in the ndarray ndar to be between 0 and 1 """
 41 |     ndar = ndar.copy()
 42 |     ndar -= ndar.min()
 43 |     ndar *= 1.0 / (ndar.max() + eps)
 44 |     return ndar
 45 | 
 46 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
 47 |                        scale_rows_to_unit_interval=True,
 48 |                        output_pixel_vals=True):
 49 |     """
 50 |     Transform an array with one flattened image per row, into an array in
 51 |     which images are reshaped and layed out like tiles on a floor.
 52 | 
 53 |     This function is useful for visualizing datasets whose rows are images,
 54 |     and also columns of matrices for transforming those rows
 55 |     (such as the first layer of a neural net).
 56 | 
 57 |     :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
 58 |     be 2-D ndarrays or None;
 59 |     :param X: a 2-D array in which every row is a flattened image.
 60 | 
 61 |     :type img_shape: tuple; (height, width)
 62 |     :param img_shape: the original shape of each image
 63 | 
 64 |     :type tile_shape: tuple; (rows, cols)
 65 |     :param tile_shape: the number of images to tile (rows, cols)
 66 | 
 67 |     :param output_pixel_vals: if output should be pixel values (i.e. int8
 68 |     values) or floats
 69 | 
 70 |     :param scale_rows_to_unit_interval: if the values need to be scaled before
 71 |     being plotted to [0,1] or not
 72 | 
 73 | 
 74 |     :returns: array suitable for viewing as an image.
 75 |     (See:`PIL.Image.fromarray`.)
 76 |     :rtype: a 2-d array with same dtype as X.
 77 | 
 78 |     """
 79 | 
 80 |     assert len(img_shape) == 2
 81 |     assert len(tile_shape) == 2
 82 |     assert len(tile_spacing) == 2
 83 | 
 84 |     # The expression below can be re-written in a more C style as
 85 |     # follows :
 86 |     #
 87 |     # out_shape    = [0,0]
 88 |     # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
 89 |     #                tile_spacing[0]
 90 |     # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
 91 |     #                tile_spacing[1]
 92 |     out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
 93 |                         in zip(img_shape, tile_shape, tile_spacing)]
 94 | 
 95 |     if isinstance(X, tuple):
 96 |         assert len(X) == 4
 97 |         # Create an output numpy ndarray to store the image
 98 |         if output_pixel_vals:
 99 |             out_array = np.zeros((out_shape[0], out_shape[1], 4),
100 |                                     dtype='uint8')
101 |         else:
102 |             out_array = np.zeros((out_shape[0], out_shape[1], 4),
103 |                                     dtype=X.dtype)
104 | 
105 |         #colors default to 0, alpha defaults to 1 (opaque)
106 |         if output_pixel_vals:
107 |             channel_defaults = [0, 0, 0, 255]
108 |         else:
109 |             channel_defaults = [0., 0., 0., 1.]
110 | 
111 |         for i in xrange(4):
112 |             if X[i] is None:
113 |                 # if channel is None, fill it with zeros of the correct
114 |                 # dtype
115 |                 dt = out_array.dtype
116 |                 if output_pixel_vals:
117 |                     dt = 'uint8'
118 |                 out_array[:, :, i] = np.zeros(out_shape,
119 |                         dtype=dt) + channel_defaults[i]
120 |             else:
121 |                 # use a recurrent call to compute the channel and store it
122 |                 # in the output
123 |                 out_array[:, :, i] = tile_raster_images(
124 |                     X[i], img_shape, tile_shape, tile_spacing,
125 |                     scale_rows_to_unit_interval, output_pixel_vals)
126 |         return out_array
127 |     else:
128 |         # if we are dealing with only one channel
129 |         H, W = img_shape
130 |         Hs, Ws = tile_spacing
131 |         # generate a matrix to store the output
132 |         dt = X.dtype
133 |         if output_pixel_vals:
134 |             dt = 'uint8'
135 |         out_array = np.zeros(out_shape, dtype=dt)
136 |         for tile_row in xrange(tile_shape[0]):
137 |             for tile_col in xrange(tile_shape[1]):
138 |                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
139 |                     this_x = X[tile_row * tile_shape[1] + tile_col]
140 |                     if scale_rows_to_unit_interval:
141 |                         # if we should scale values to be between 0 and 1
142 |                         # do this by calling the `scale_to_unit_interval`
143 |                         # function
144 |                         this_img = scale_to_unit_interval(
145 |                             this_x.reshape(img_shape))
146 |                     else:
147 |                         this_img = this_x.reshape(img_shape)
148 |                     # add the slice to the corresponding position in the
149 |                     # output array
150 |                     c = 1
151 |                     if output_pixel_vals:
152 |                         c = 255
153 |                     out_array[
154 |                         tile_row * (H + Hs): tile_row * (H + Hs) + H,
155 |                         tile_col * (W + Ws): tile_col * (W + Ws) + W
156 |                         ] = this_img * c
157 |         return out_array
158 | 
159 | 
160 | def plot_histograms(firings):
161 |     N = int(np.ceil(np.sqrt(firings.shape[1])))
162 |     plt.figure(figsize=(N,N))
163 |     axisNum = 0
164 |     for row in range(N):
165 |         for col in range(N):
166 |             axisNum += 1
167 |             ax = plt.subplot(N, N, axisNum)
168 |             ax.set_xticklabels([])
169 |             ax.set_yticklabels([])
170 |             plt.hist(firings[:,row*N+col],bins=50)
171 |     plt.show()
172 |     return
173 | 
174 | def visualize(EN, proto_key, layer_num, file_name):
175 |     W = EN.proto_nets[proto_key][layer_num].W.get_value(borrow=True).T
176 |     size = int(np.sqrt(W.shape[1]))
177 |     # hist(W.flatten(),bins=50)
178 |     image = PIL.Image.fromarray(tile_raster_images(X=W, \
179 |             img_shape=(size, size), tile_shape=(10,W.shape[0]/10),tile_spacing=(1, 1)))
180 |     image.save(file_name)
181 |     return
182 | 
183 | 
184 | 
185 | 
186 | 


--------------------------------------------------------------------------------
/basic_sear/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2014 Philip Bachman
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 
21 | 
22 | ********************************************************************************
23 | * The copyright notice below comes from code which has been _heavily_ modified *
24 | * in the production of the code in this directory.                             *
25 | ********************************************************************************
26 | 
27 | 
28 | Copyright (C) 2012 Misha Denil
29 | 
30 | Permission is hereby granted, free of charge, to any person obtaining a copy of
31 | this software and associated documentation files (the "Software"), to deal in
32 | the Software without restriction, including without limitation the rights to
33 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
34 | of the Software, and to permit persons to whom the Software is furnished to do
35 | so, subject to the following conditions:
36 | 
37 | The above copyright notice and this permission notice shall be included in all
38 | copies or substantial portions of the Software.
39 | 
40 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
43 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
44 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
45 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
46 | SOFTWARE.
47 | 
48 | 


--------------------------------------------------------------------------------
/basic_sear/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) {{{2014}}} {{{Philip Bachman}}}
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/basic_sear/load_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cPickle
  3 | import gzip
  4 | import os
  5 | import sys
  6 | 
  7 | import theano
  8 | import theano.tensor as T
  9 | 
 10 | def _shared_dataset(data_xy):
 11 |     """ Function that loads the dataset into shared variables
 12 | 
 13 |     The reason we store our dataset in shared variables is to allow
 14 |     Theano to copy it into the GPU memory (when code is run on GPU).
 15 |     Since copying data into the GPU is slow, copying a minibatch everytime
 16 |     is needed (the default behaviour if the data is not in a shared
 17 |     variable) would lead to a large decrease in performance.
 18 |     """
 19 |     data_x, data_y = data_xy
 20 |     shared_x = theano.shared(np.asarray(data_x,
 21 |                                            dtype=theano.config.floatX))
 22 |     shared_y = theano.shared(np.asarray(data_y,
 23 |                                            dtype=theano.config.floatX))
 24 |     # When storing data on the GPU it has to be stored as floats
 25 |     # therefore we will store the labels as ``floatX`` as well
 26 |     # (``shared_y`` does exactly that).
 27 |     return shared_x, shared_y
 28 | 
 29 | def load_mnist(path):
 30 |     mnist = np.load(path)
 31 |     train_set_x = mnist['train_data']
 32 |     train_set_y = mnist['train_labels'] + 1
 33 |     test_set_x = mnist['test_data']
 34 |     test_set_y = mnist['test_labels'] + 1
 35 | 
 36 |     train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
 37 |     test_set_x, test_set_y = _shared_dataset((test_set_x, test_set_y))
 38 |     valid_set_x, valid_set_y = test_set_x, test_set_y
 39 | 
 40 |     rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
 41 |             (test_set_x, test_set_y)]
 42 |     return rval
 43 | 
 44 | def load_udm_ss(dataset, sup_count, rng):
 45 |     """Load semi-supervised version of the standard UdM MNIST data.
 46 | 
 47 |     For this, the training data is split into labeled and unlabeled portions.
 48 |     The number of labeled examples is 'sup_count', and an equal number of
 49 |     labeled examples will be selected for each class. The remaining (50000 -
 50 |     sup_count) examples are provided as unlabeled training data. The validate
 51 |     and test sets are left unchanged.
 52 | 
 53 |     Note: labels for the normal digit classes will range from 1-10, i.e. +1
 54 |     compared to their standard value, as 'un-classed' examples take label 0.
 55 |     """
 56 | 
 57 |     udm_data = load_udm(dataset,as_shared=False)
 58 |     Xtr = udm_data[0][0]
 59 |     Ytr = udm_data[0][1][:,np.newaxis]
 60 | 
 61 |     all_count = Xtr.shape[0]
 62 |     pc_count = int(np.ceil(sup_count / 10.0))
 63 |     sup_count = int(10 * pc_count)
 64 |     unsup_count = all_count - sup_count
 65 | 
 66 |     Xtr_su = []
 67 |     Ytr_su = []
 68 |     Xtr_un = []
 69 |     Ytr_un = []
 70 | 
 71 |     # Sample supervised and unsupervised subsets of each class' observations
 72 |     for c_label in np.unique(Ytr):
 73 |         c_idx = [i for i in range(all_count) if (Ytr[i] == c_label)]
 74 |         rng.shuffle(c_idx)
 75 |         Xtr_su.append(Xtr[c_idx[0:pc_count],:])
 76 |         Ytr_su.append(Ytr[c_idx[0:pc_count],:])
 77 |         Xtr_un.append(Xtr[c_idx[pc_count:],:])
 78 |         Ytr_un.append(Ytr[c_idx[pc_count:],:])
 79 | 
 80 |     # Stack per-class supervised/unsupervised splits into matrices
 81 |     Xtr_su = np.vstack(Xtr_su)
 82 |     Ytr_su = np.vstack(Ytr_su)
 83 |     Xtr_un = np.vstack(Xtr_un)
 84 |     Ytr_un = np.vstack(Ytr_un)
 85 |     # Also keep "unsupervised" copies of the "supervised" data
 86 |     Xtr_un = Xtr_un #np.vstack([Xtr_un, Xtr_su])
 87 |     Ytr_un = 0 * Ytr_un #np.vstack([Ytr_un, Ytr_su])
 88 | 
 89 |     # Shuffle the rows so that observations are not grouped by class
 90 |     shuf_idx = rng.permutation(Xtr_su.shape[0])
 91 |     Xtr_su = Xtr_su[shuf_idx,:]
 92 |     Ytr_su = Ytr_su[shuf_idx].ravel() + 1
 93 |     shuf_idx = rng.permutation(Xtr_un.shape[0])
 94 |     Xtr_un = Xtr_un[shuf_idx,:]
 95 |     Ytr_un = Ytr_un[shuf_idx].ravel()
 96 | 
 97 |     # Put matrices into GPU shared variables, for great justice
 98 |     Xtr_su, Ytr_su = _shared_dataset((Xtr_su, Ytr_su))
 99 |     Xtr_un, Ytr_un = _shared_dataset((Xtr_un, Ytr_un))
100 |     Xva, Yva = _shared_dataset((udm_data[1][0], (udm_data[1][1] + 1)))
101 |     Xte, Yte = _shared_dataset((udm_data[2][0], (udm_data[2][1] + 1)))
102 | 
103 |     rval = [(Xtr_su, Ytr_su), (Xtr_un, Ytr_un), (Xva, Yva), (Xte, Yte)]
104 | 
105 |     return rval
106 | 
107 | def load_udm(dataset, as_shared=True):
108 |     """Loads the UdM train/validate/test split of MNIST."""
109 | 
110 |     #############
111 |     # LOAD DATA #
112 |     #############
113 | 
114 |     # Download the MNIST dataset if it is not present
115 |     data_dir, data_file = os.path.split(dataset)
116 |     if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
117 |         import urllib
118 |         origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
119 |         print 'Downloading data from %s' % origin
120 |         urllib.urlretrieve(origin, dataset)
121 | 
122 |     print '... loading data'
123 | 
124 |     # Load the dataset
125 |     f = gzip.open(dataset, 'rb')
126 |     train_set, valid_set, test_set = cPickle.load(f)
127 |     f.close()
128 |     #train_set, valid_set, test_set format: tuple(input, target)
129 |     #input is an np.ndarray of 2 dimensions (a matrix)
130 |     #witch row's correspond to an example. target is a
131 |     #np.ndarray of 1 dimensions (vector)) that have the same length as
132 |     #the number of rows in the input. It should give the target
133 |     #target to the example with the same index in the input.
134 |     train_set = [v for v in train_set]
135 |     valid_set = [v for v in valid_set]
136 |     test_set = [v for v in test_set]
137 |     train_set[0] = np.asarray(train_set[0]).astype(np.float32)
138 |     valid_set[0] = np.asarray(valid_set[0]).astype(np.float32)
139 |     test_set[0] = np.asarray(test_set[0]).astype(np.float32)
140 |     obs_mean = 1.0 * np.mean(train_set[0], axis=0, keepdims=True)
141 |     train_set[0] = train_set[0] - obs_mean
142 |     valid_set[0] = valid_set[0] - obs_mean
143 |     test_set[0] = test_set[0] - obs_mean
144 |     if as_shared:
145 |         test_set_x, test_set_y = _shared_dataset((test_set[0],test_set[1]+1))
146 |         valid_set_x, valid_set_y = _shared_dataset((valid_set[0],valid_set[1]+1))
147 |         train_set_x, train_set_y = _shared_dataset((train_set[0],train_set[1]+1))
148 |     else:
149 |         test_set_x, test_set_y = test_set
150 |         valid_set_x, valid_set_y = valid_set
151 |         train_set_x, train_set_y = train_set
152 | 
153 |     rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
154 |             (test_set_x, test_set_y)]
155 |     return rval
156 | 
157 | 


--------------------------------------------------------------------------------
/basic_sear/output_losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import theano
  3 | import theano.tensor as T
  4 | 
  5 | class LogisticRegression(object):
  6 |     """Multi-class Logistic Regression loss dangler."""
  7 | 
  8 |     def __init__(self, linear_layer):
  9 |         """Dangle a logistic regression from the given linear layer.
 10 | 
 11 |         The given linear layer should be a HiddenLayer (or subclass) object,
 12 |         for HiddenLayer as defined in LayerNet.py."""
 13 |         self.input_layer = linear_layer
 14 | 
 15 |     def loss_func(self, y):
 16 |         """Return the multiclass logistic regression loss for y.
 17 | 
 18 |         The class labels in y are assumed to be in correspondence with the
 19 |         set of column indices for self.input_layer.linear_output.
 20 |         """
 21 |         p_y_given_x = T.nnet.softmax(self.input_layer.linear_output)
 22 |         loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]),y])
 23 |         return loss
 24 | 
 25 |     def errors(self, y):
 26 |         """Compute the number of wrong predictions by self.input_layer.
 27 | 
 28 |         Predicted class labels are computed as the indices of the columns of
 29 |         self.input_layer.linear_output which are maximal. Wrong predictions are
 30 |         those for which max indices do not match their corresponding y values.
 31 |         """
 32 |         # Compute class memberships predicted by self.input_layer
 33 |         y_pred = T.argmax(self.input_layer.linear_output, axis=1)
 34 |         errs = 0
 35 |         # check if y has same dimension of y_pred
 36 |         if y.ndim != y_pred.ndim:
 37 |             raise TypeError('y should have the same shape as self.y_pred',
 38 |                 ('y', y.type, 'y_pred', y_pred.type))
 39 |         # check if y is of the correct datatype
 40 |         if y.dtype.startswith('int'):
 41 |             # the T.neq operator returns a vector of 0s and 1s, where 1
 42 |             # represents a mistake in prediction
 43 |             errs = T.sum(T.neq(y_pred, y))
 44 |         else:
 45 |             raise NotImplementedError()
 46 |         return errs
 47 | 
 48 | class LogRegSS(object):
 49 |     """Multi-class semi-supervised Logistic Regression loss dangler."""
 50 | 
 51 |     def __init__(self, linear_layer):
 52 |         """Dangle a logistic regression from the given linear layer.
 53 | 
 54 |         The given linear layer should be a HiddenLayer (or subclass) object,
 55 |         for HiddenLayer as defined in LayerNet.py."""
 56 |         self.input_layer = linear_layer
 57 | 
 58 |     def safe_softmax_ss(self, x):
 59 |         """Softmax that shouldn't overflow."""
 60 |         e_x = T.exp(x - T.max(x, axis=1, keepdims=True))
 61 |         x_sm = e_x / T.sum(e_x, axis=1, keepdims=True)
 62 |         return x_sm
 63 | 
 64 |     def loss_func(self, y):
 65 |         """Return the multiclass logistic regression loss for y.
 66 | 
 67 |         The class labels in y are assumed to be in correspondence with the
 68 |         set of column indices for self.input_layer.linear_output.
 69 |         """
 70 |         row_idx = T.arange(y.shape[0])
 71 |         row_mask = T.neq(y, 0).reshape((y.shape[0], 1))
 72 |         p_y_given_x = self.safe_softmax_ss(self.input_layer.linear_output)
 73 |         wacky_mat = (p_y_given_x * row_mask) + (1. - row_mask)
 74 |         loss = -T.sum(T.log(wacky_mat[row_idx,y])) / T.sum(row_mask)
 75 |         return loss
 76 | 
 77 |     def errors(self, y):
 78 |         """Compute the number of wrong predictions by self.input_layer.
 79 | 
 80 |         Predicted class labels are computed as the indices of the columns of
 81 |         self.input_layer.linear_output which are maximal. Wrong predictions are
 82 |         those for which max indices do not match their corresponding y values.
 83 |         """
 84 |         # Compute class memberships predicted by self.input_layer
 85 |         y_pred = T.argmax(self.input_layer.linear_output[:,1:], axis=1)
 86 |         y_pred = y_pred + 1
 87 |         errs = 0
 88 |         # check if y has same dimension of y_pred
 89 |         if y.ndim != y_pred.ndim:
 90 |             raise TypeError('y should have the same shape as self.y_pred',
 91 |                 ('y', y.type, 'y_pred', y_pred.type))
 92 |         # check if y is of the correct datatype
 93 |         if y.dtype.startswith('int'):
 94 |             # the T.neq operator returns a vector of 0s and 1s, where 1
 95 |             # represents a mistake in prediction
 96 |             errs = T.sum(T.neq(y_pred, y) * T.neq(y, 0))
 97 |         else:
 98 |             raise NotImplementedError()
 99 |         return errs
100 | 
101 | class MCL2Hinge(object):
102 |     """Multi-class one-vs-all L2 hinge loss dangler."""
103 | 
104 |     def __init__(self, linear_layer):
105 |         """Dangle a squred hinge loss from the given linear layer.
106 | 
107 |         The given linear layer should be a HiddenLayer (or subclass) object,
108 |         for HiddenLayer as defined in LayerNet.py."""
109 |         self.input_layer = linear_layer
110 | 
111 |     def loss_func(self, y):
112 |         """Return the multiclass squared hinge loss for y.
113 | 
114 |         The class labels in y are assumed to be in correspondence with the
115 |         set of column indices for self.input_layer.linear_output.
116 |         """
117 |         y_hat = self.input_layer.linear_output
118 |         margin_pos = T.maximum(0.0, (1.0 - y_hat))
119 |         margin_neg = T.maximum(0.0, (1.0 + y_hat))
120 |         obs_idx = T.arange(y.shape[0])
121 |         loss_pos = T.sum(margin_pos[obs_idx,y]**2.0)
122 |         loss_neg = T.sum(margin_neg**2.0) - T.sum(margin_neg[obs_idx,y]**2.0)
123 |         loss = (loss_pos + loss_neg) / y.shape[0]
124 |         return loss
125 | 
126 |     def errors(self, y):
127 |         """Compute the number of wrong predictions by self.input_layer.
128 | 
129 |         Predicted class labels are computed as the indices of the columns of
130 |         self.input_layer.linear_output which are maximal. Wrong predictions are
131 |         those for which max indices do not match their corresponding y values.
132 |         """
133 |         # Compute class memberships predicted by self.input_layer
134 |         y_pred = T.argmax(self.input_layer.linear_output, axis=1)
135 |         errs = 0
136 |         # check if y has same dimension of y_pred
137 |         if y.ndim != y_pred.ndim:
138 |             raise TypeError('y should have the same shape as self.y_pred',
139 |                 ('y', y.type, 'y_pred', y_pred.type))
140 |         # check if y is of the correct datatype
141 |         if y.dtype.startswith('int'):
142 |             # the T.neq operator returns a vector of 0s and 1s, where 1
143 |             # represents a mistake in prediction
144 |             errs = T.sum(T.neq(y_pred, y))
145 |         else:
146 |             raise NotImplementedError()
147 |         return errs
148 | 
149 | class MCL2HingeSS(object):
150 |     """Multi-class one-vs-all L2 hinge loss dangler.
151 | 
152 |     For this loss, class index 0 is never penalized, and errors for inputs
153 |     with class index 0 are similarly ignored. This is for semi-supervised
154 |     training, constrained by Theano's programming model."""
155 | 
156 |     def __init__(self, linear_layer):
157 |         """Dangle a squred hinge loss from the given linear layer.
158 | 
159 |         The given linear layer should be a HiddenLayer (or subclass) object,
160 |         for HiddenLayer as defined in LayerNet.py."""
161 |         self.input_layer = linear_layer
162 | 
163 |     def loss_func(self, y):
164 |         """Return the multiclass squared hinge loss for y.
165 | 
166 |         The class labels in y are assumed to be in correspondence with the
167 |         set of column indices for self.input_layer.linear_output.
168 |         """
169 |         y_hat = self.input_layer.linear_output
170 |         row_idx = T.arange(y.shape[0])
171 |         row_mask = T.neq(y, 0).reshape((y_hat.shape[0], 1))
172 |         margin_pos = T.maximum(0.0, (1.0 - y_hat)) * row_mask
173 |         margin_neg = T.maximum(0.0, (1.0 + y_hat)) * row_mask
174 |         loss_pos = T.sum(margin_pos[row_idx,y]**2.0)
175 |         loss_neg = T.sum(margin_neg**2.0) - T.sum(margin_neg[row_idx,y]**2.0)
176 |         loss = (loss_pos + loss_neg) / T.sum(row_mask)
177 |         return loss
178 | 
179 |     def errors(self, y):
180 |         """Compute the number of wrong predictions by self.input_layer.
181 | 
182 |         Predicted class labels are computed as the indices of the columns of
183 |         self.input_layer.linear_output which are maximal. Wrong predictions are
184 |         those for which max indices do not match their corresponding y values.
185 |         """
186 |         # Compute class memberships predicted by self.input_layer
187 |         y_pred = T.argmax(self.input_layer.linear_output[:,1:], axis=1)
188 |         y_pred = y_pred + 1
189 |         errs = 0
190 |         # check if y has same dimension of y_pred
191 |         if y.ndim != y_pred.ndim:
192 |             raise TypeError('y should have the same shape as self.y_pred',
193 |                 ('y', y.type, 'y_pred', y_pred.type))
194 |         # check if y is of the correct datatype
195 |         if y.dtype.startswith('int'):
196 |             # the T.neq operator returns a vector of 0s and 1s, where 1
197 |             # represents a mistake in prediction
198 |             errs = T.sum(T.neq(y_pred, y) * T.neq(y, 0))
199 |         else:
200 |             raise NotImplementedError()
201 |         return errs
202 | 


--------------------------------------------------------------------------------
/basic_sear/utils.py:
--------------------------------------------------------------------------------
  1 | """ This file contains different utility functions that are not connected
  2 | in anyway to the networks presented in the tutorials, but rather help in
  3 | processing the outputs into a more understandable way.
  4 | 
  5 | For example ``tile_raster_images`` helps in generating a easy to grasp
  6 | image from a set of samples or weights.
  7 | """
  8 | 
  9 | import numpy as np
 10 | import pylab as plt
 11 | import PIL as PIL
 12 | 
 13 | class batch(object):
 14 |     def __init__(self,batch_size):
 15 |         self.batch_size = batch_size
 16 | 
 17 |     def __call__(self,f):
 18 |         def wrapper(t,X):
 19 |             X = np.array(X)
 20 |             p = 0
 21 |             rem = 0
 22 |             results = []
 23 |             while p < len(X):
 24 |                 Z = X[p:p+self.batch_size]
 25 |                 if Z.shape[0] != self.batch_size:
 26 |                     zeros = np.zeros((self.batch_size-len(Z),X.shape[1]))
 27 |                     rem = len(Z)
 28 |                     Z = np.array(np.vstack((Z,zeros)),dtype=X.dtype)
 29 | 
 30 |                 temp_results = f(t,Z)
 31 |                 if rem != 0:
 32 |                     temp_results = temp_results[:rem]
 33 | 
 34 |                 results.extend(temp_results)
 35 |                 p += self.batch_size
 36 |             return np.array(results,dtype='float32')
 37 |         return wrapper
 38 | 
 39 | def scale_to_unit_interval(ndar, eps=1e-8):
 40 |     """ Scales all values in the ndarray ndar to be between 0 and 1 """
 41 |     ndar = ndar.copy()
 42 |     ndar -= ndar.min()
 43 |     ndar *= 1.0 / (ndar.max() + eps)
 44 |     return ndar
 45 | 
 46 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
 47 |                        scale_rows_to_unit_interval=True,
 48 |                        output_pixel_vals=True):
 49 |     """
 50 |     Transform an array with one flattened image per row, into an array in
 51 |     which images are reshaped and layed out like tiles on a floor.
 52 | 
 53 |     This function is useful for visualizing datasets whose rows are images,
 54 |     and also columns of matrices for transforming those rows
 55 |     (such as the first layer of a neural net).
 56 | 
 57 |     :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
 58 |     be 2-D ndarrays or None;
 59 |     :param X: a 2-D array in which every row is a flattened image.
 60 | 
 61 |     :type img_shape: tuple; (height, width)
 62 |     :param img_shape: the original shape of each image
 63 | 
 64 |     :type tile_shape: tuple; (rows, cols)
 65 |     :param tile_shape: the number of images to tile (rows, cols)
 66 | 
 67 |     :param output_pixel_vals: if output should be pixel values (i.e. int8
 68 |     values) or floats
 69 | 
 70 |     :param scale_rows_to_unit_interval: if the values need to be scaled before
 71 |     being plotted to [0,1] or not
 72 | 
 73 | 
 74 |     :returns: array suitable for viewing as an image.
 75 |     (See:`PIL.Image.fromarray`.)
 76 |     :rtype: a 2-d array with same dtype as X.
 77 | 
 78 |     """
 79 | 
 80 |     assert len(img_shape) == 2
 81 |     assert len(tile_shape) == 2
 82 |     assert len(tile_spacing) == 2
 83 | 
 84 |     # The expression below can be re-written in a more C style as
 85 |     # follows :
 86 |     #
 87 |     # out_shape    = [0,0]
 88 |     # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
 89 |     #                tile_spacing[0]
 90 |     # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
 91 |     #                tile_spacing[1]
 92 |     out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
 93 |                         in zip(img_shape, tile_shape, tile_spacing)]
 94 | 
 95 |     if isinstance(X, tuple):
 96 |         assert len(X) == 4
 97 |         # Create an output numpy ndarray to store the image
 98 |         if output_pixel_vals:
 99 |             out_array = np.zeros((out_shape[0], out_shape[1], 4),
100 |                                     dtype='uint8')
101 |         else:
102 |             out_array = np.zeros((out_shape[0], out_shape[1], 4),
103 |                                     dtype=X.dtype)
104 | 
105 |         #colors default to 0, alpha defaults to 1 (opaque)
106 |         if output_pixel_vals:
107 |             channel_defaults = [0, 0, 0, 255]
108 |         else:
109 |             channel_defaults = [0., 0., 0., 1.]
110 | 
111 |         for i in xrange(4):
112 |             if X[i] is None:
113 |                 # if channel is None, fill it with zeros of the correct
114 |                 # dtype
115 |                 dt = out_array.dtype
116 |                 if output_pixel_vals:
117 |                     dt = 'uint8'
118 |                 out_array[:, :, i] = np.zeros(out_shape,
119 |                         dtype=dt) + channel_defaults[i]
120 |             else:
121 |                 # use a recurrent call to compute the channel and store it
122 |                 # in the output
123 |                 out_array[:, :, i] = tile_raster_images(
124 |                     X[i], img_shape, tile_shape, tile_spacing,
125 |                     scale_rows_to_unit_interval, output_pixel_vals)
126 |         return out_array
127 |     else:
128 |         # if we are dealing with only one channel
129 |         H, W = img_shape
130 |         Hs, Ws = tile_spacing
131 |         # generate a matrix to store the output
132 |         dt = X.dtype
133 |         if output_pixel_vals:
134 |             dt = 'uint8'
135 |         out_array = np.zeros(out_shape, dtype=dt)
136 |         for tile_row in xrange(tile_shape[0]):
137 |             for tile_col in xrange(tile_shape[1]):
138 |                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
139 |                     this_x = X[tile_row * tile_shape[1] + tile_col]
140 |                     if scale_rows_to_unit_interval:
141 |                         # if we should scale values to be between 0 and 1
142 |                         # do this by calling the `scale_to_unit_interval`
143 |                         # function
144 |                         this_img = scale_to_unit_interval(
145 |                             this_x.reshape(img_shape))
146 |                     else:
147 |                         this_img = this_x.reshape(img_shape)
148 |                     # add the slice to the corresponding position in the
149 |                     # output array
150 |                     c = 1
151 |                     if output_pixel_vals:
152 |                         c = 255
153 |                     out_array[
154 |                         tile_row * (H + Hs): tile_row * (H + Hs) + H,
155 |                         tile_col * (W + Ws): tile_col * (W + Ws) + W
156 |                         ] = this_img * c
157 |         return out_array
158 | 
159 | 
160 | def plot_histograms(firings):
161 |     N = int(np.ceil(np.sqrt(firings.shape[1])))
162 |     plt.figure(figsize=(N,N))
163 |     axisNum = 0
164 |     for row in range(N):
165 |         for col in range(N):
166 |             axisNum += 1
167 |             ax = plt.subplot(N, N, axisNum)
168 |             ax.set_xticklabels([])
169 |             ax.set_yticklabels([])
170 |             plt.hist(firings[:,row*N+col],bins=50)
171 |     plt.show()
172 | 
173 | def visualize(MLP,layer_idx,file_name):
174 | 
175 | 	W = MLP.layers[layer_idx].W.get_value(borrow=True).T
176 | 
177 | 	size = int(np.sqrt(W.shape[1]))
178 | 
179 | 	# hist(W.flatten(),bins=50)
180 | 	image = PIL.Image.fromarray(tile_raster_images(X=W,
181 | 		img_shape=(size, size), tile_shape=(10,W.shape[0]/10),tile_spacing=(1, 1)))
182 | 	image.save(file_name)
183 | 
184 | 
185 | 
186 | 


--------------------------------------------------------------------------------
/generalized_ear/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2014 Philip Bachman
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 
21 | 
22 | ********************************************************************************
23 | * The copyright notice below comes from code which has been _heavily_ modified *
24 | * in the production of the code in this directory.                             *
25 | ********************************************************************************
26 | 
27 | 
28 | Copyright (C) 2012 Misha Denil
29 | 
30 | Permission is hereby granted, free of charge, to any person obtaining a copy of
31 | this software and associated documentation files (the "Software"), to deal in
32 | the Software without restriction, including without limitation the rights to
33 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
34 | of the Software, and to permit persons to whom the Software is furnished to do
35 | so, subject to the following conditions:
36 | 
37 | The above copyright notice and this permission notice shall be included in all
38 | copies or substantial portions of the Software.
39 | 
40 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
43 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
44 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
45 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
46 | SOFTWARE.
47 | 
48 | 


--------------------------------------------------------------------------------
/generalized_ear/load_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cPickle
  3 | import gzip
  4 | import os
  5 | import sys
  6 | 
  7 | import theano
  8 | import theano.tensor as T
  9 | 
 10 | def _shared_dataset(data_xy):
 11 |     """ Function that loads the dataset into shared variables
 12 | 
 13 |     The reason we store our dataset in shared variables is to allow
 14 |     Theano to copy it into the GPU memory (when code is run on GPU).
 15 |     Since copying data into the GPU is slow, copying a minibatch everytime
 16 |     is needed (the default behaviour if the data is not in a shared
 17 |     variable) would lead to a large decrease in performance.
 18 |     """
 19 |     data_x, data_y = data_xy
 20 |     shared_x = theano.shared(np.asarray(data_x,
 21 |                                            dtype=theano.config.floatX))
 22 |     shared_y = theano.shared(np.asarray(data_y,
 23 |                                            dtype=theano.config.floatX))
 24 |     # When storing data on the GPU it has to be stored as floats
 25 |     # therefore we will store the labels as ``floatX`` as well
 26 |     # (``shared_y`` does exactly that).
 27 |     return shared_x, shared_y
 28 | 
 29 | def load_mnist(path, zero_mean=True):
 30 |     mnist = np.load(path)
 31 |     train_set_x = mnist['train_data']
 32 |     train_set_y = mnist['train_labels'] + 1
 33 |     test_set_x = mnist['test_data']
 34 |     test_set_y = mnist['test_labels'] + 1
 35 | 
 36 |     if zero_mean:
 37 |         obs_mean = np.mean(train_set_x, axis=0, keepdims=True)
 38 |         train_set_x = train_set_x - obs_mean
 39 |         test_set_x = test_set_x - obs_mean
 40 | 
 41 |     train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
 42 |     test_set_x, test_set_y = _shared_dataset((test_set_x, test_set_y))
 43 |     valid_set_x, valid_set_y = test_set_x, test_set_y
 44 | 
 45 |     rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
 46 |             (test_set_x, test_set_y)]
 47 |     return rval
 48 | 
 49 | def load_udm_ss(dataset, sup_count, rng, zero_mean=True):
 50 |     """Load semi-supervised version of the standard UdM MNIST data.
 51 | 
 52 |     For this, the training data is split into labeled and unlabeled portions.
 53 |     The number of labeled examples is 'sup_count', and an equal number of
 54 |     labeled examples will be selected for each class. The remaining (50000 -
 55 |     sup_count) examples are provided as unlabeled training data. The validate
 56 |     and test sets are left unchanged.
 57 | 
 58 |     Note: labels for the normal digit classes will range from 1-10, i.e. +1
 59 |     compared to their standard value, as 'un-classed' examples take label 0.
 60 |     """
 61 | 
 62 |     udm_data = load_udm(dataset, as_shared=False, zero_mean=zero_mean)
 63 |     Xtr = udm_data[0][0]
 64 |     Ytr = udm_data[0][1][:,np.newaxis]
 65 | 
 66 |     all_count = Xtr.shape[0]
 67 |     pc_count = int(np.ceil(sup_count / 10.0))
 68 |     sup_count = int(10 * pc_count)
 69 |     unsup_count = all_count - sup_count
 70 | 
 71 |     Xtr_su = []
 72 |     Ytr_su = []
 73 |     Xtr_un = []
 74 |     Ytr_un = []
 75 | 
 76 |     # Sample supervised and unsupervised subsets of each class' observations
 77 |     for c_label in np.unique(Ytr):
 78 |         c_idx = [i for i in range(all_count) if (Ytr[i] == c_label)]
 79 |         rng.shuffle(c_idx)
 80 |         Xtr_su.append(Xtr[c_idx[0:pc_count],:])
 81 |         Ytr_su.append(Ytr[c_idx[0:pc_count],:])
 82 |         Xtr_un.append(Xtr[c_idx[pc_count:],:])
 83 |         Ytr_un.append(Ytr[c_idx[pc_count:],:])
 84 | 
 85 |     # Stack per-class supervised/unsupervised splits into matrices
 86 |     Xtr_su = np.vstack(Xtr_su)
 87 |     Ytr_su = np.vstack(Ytr_su)
 88 |     Xtr_un = np.vstack(Xtr_un)
 89 |     Ytr_un = np.vstack(Ytr_un)
 90 |     # Also keep "unsupervised" copies of the "supervised" data
 91 |     Xtr_un = Xtr_un #np.vstack([Xtr_un, Xtr_su])
 92 |     Ytr_un = 0 * Ytr_un #np.vstack([Ytr_un, Ytr_su])
 93 | 
 94 |     # Shuffle the rows so that observations are not grouped by class
 95 |     shuf_idx = rng.permutation(Xtr_su.shape[0])
 96 |     Xtr_su = Xtr_su[shuf_idx,:]
 97 |     Ytr_su = Ytr_su[shuf_idx].ravel() + 1
 98 |     shuf_idx = rng.permutation(Xtr_un.shape[0])
 99 |     Xtr_un = Xtr_un[shuf_idx,:]
100 |     Ytr_un = Ytr_un[shuf_idx].ravel()
101 | 
102 |     # Put matrices into GPU shared variables, for great justice
103 |     Xtr_su, Ytr_su = _shared_dataset((Xtr_su, Ytr_su))
104 |     Xtr_un, Ytr_un = _shared_dataset((Xtr_un, Ytr_un))
105 |     Xva, Yva = _shared_dataset((udm_data[1][0], (udm_data[1][1] + 1)))
106 |     Xte, Yte = _shared_dataset((udm_data[2][0], (udm_data[2][1] + 1)))
107 | 
108 |     rval = [(Xtr_su, Ytr_su), (Xtr_un, Ytr_un), (Xva, Yva), (Xte, Yte)]
109 | 
110 |     return rval
111 | 
112 | def load_udm(dataset, as_shared=True, zero_mean=True):
113 |     """Loads the UdM train/validate/test split of MNIST."""
114 | 
115 |     #############
116 |     # LOAD DATA #
117 |     #############
118 | 
119 |     # Download the MNIST dataset if it is not present
120 |     data_dir, data_file = os.path.split(dataset)
121 |     if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
122 |         import urllib
123 |         origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
124 |         print 'Downloading data from %s' % origin
125 |         urllib.urlretrieve(origin, dataset)
126 | 
127 |     print '... loading data'
128 | 
129 |     # Load the dataset
130 |     f = gzip.open(dataset, 'rb')
131 |     train_set, valid_set, test_set = cPickle.load(f)
132 |     f.close()
133 |     #train_set, valid_set, test_set format: tuple(input, target)
134 |     #input is an np.ndarray of 2 dimensions (a matrix)
135 |     #witch row's correspond to an example. target is a
136 |     #np.ndarray of 1 dimensions (vector)) that have the same length as
137 |     #the number of rows in the input. It should give the target
138 |     #target to the example with the same index in the input.
139 |     train_set = [v for v in train_set]
140 |     valid_set = [v for v in valid_set]
141 |     test_set = [v for v in test_set]
142 |     train_set[0] = np.asarray(train_set[0]).astype(np.float32)
143 |     valid_set[0] = np.asarray(valid_set[0]).astype(np.float32)
144 |     test_set[0] = np.asarray(test_set[0]).astype(np.float32)
145 |     if zero_mean:
146 |         obs_mean = np.mean(train_set[0], axis=0, keepdims=True)
147 |         train_set[0] = train_set[0] - obs_mean
148 |         valid_set[0] = valid_set[0] - obs_mean
149 |         test_set[0] = test_set[0] - obs_mean
150 |     if as_shared:
151 |         test_set_x, test_set_y = _shared_dataset((test_set[0],test_set[1]+1))
152 |         valid_set_x, valid_set_y = _shared_dataset((valid_set[0],valid_set[1]+1))
153 |         train_set_x, train_set_y = _shared_dataset((train_set[0],train_set[1]+1))
154 |     else:
155 |         test_set_x, test_set_y = test_set
156 |         valid_set_x, valid_set_y = valid_set
157 |         train_set_x, train_set_y = train_set
158 | 
159 |     rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
160 |             (test_set_x, test_set_y)]
161 |     return rval
162 | 
163 | 


--------------------------------------------------------------------------------
/generalized_ear/output_losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import theano
  3 | import theano.tensor as T
  4 | 
  5 | class LogisticRegression(object):
  6 |     """Multi-class Logistic Regression loss dangler."""
  7 | 
  8 |     def __init__(self, linear_layer):
  9 |         """Dangle a logistic regression from the given linear layer.
 10 | 
 11 |         The given linear layer should be a HiddenLayer (or subclass) object,
 12 |         for HiddenLayer as defined in LayerNet.py."""
 13 |         self.input_layer = linear_layer
 14 | 
 15 |     def loss_func(self, y):
 16 |         """Return the multiclass logistic regression loss for y.
 17 | 
 18 |         The class labels in y are assumed to be in correspondence with the
 19 |         set of column indices for self.input_layer.linear_output.
 20 |         """
 21 |         p_y_given_x = T.nnet.softmax(self.input_layer.linear_output)
 22 |         loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]),y])
 23 |         return loss
 24 | 
 25 |     def errors(self, y):
 26 |         """Compute the number of wrong predictions by self.input_layer.
 27 | 
 28 |         Predicted class labels are computed as the indices of the columns of
 29 |         self.input_layer.linear_output which are maximal. Wrong predictions are
 30 |         those for which max indices do not match their corresponding y values.
 31 |         """
 32 |         # Compute class memberships predicted by self.input_layer
 33 |         y_pred = T.argmax(self.input_layer.linear_output, axis=1)
 34 |         errs = 0
 35 |         # check if y has same dimension of y_pred
 36 |         if y.ndim != y_pred.ndim:
 37 |             raise TypeError('y should have the same shape as self.y_pred',
 38 |                 ('y', y.type, 'y_pred', y_pred.type))
 39 |         # check if y is of the correct datatype
 40 |         if y.dtype.startswith('int'):
 41 |             # the T.neq operator returns a vector of 0s and 1s, where 1
 42 |             # represents a mistake in prediction
 43 |             errs = T.sum(T.neq(y_pred, y))
 44 |         else:
 45 |             raise NotImplementedError()
 46 |         return errs
 47 | 
 48 | class LogRegSS(object):
 49 |     """Multi-class semi-supervised Logistic Regression loss dangler."""
 50 | 
 51 |     def __init__(self, linear_layer):
 52 |         """Dangle a logistic regression from the given linear layer.
 53 | 
 54 |         The given linear layer should be a HiddenLayer (or subclass) object,
 55 |         for HiddenLayer as defined in LayerNet.py."""
 56 |         self.input_layer = linear_layer
 57 | 
 58 |     def safe_softmax_ss(self, x):
 59 |         """Softmax that shouldn't overflow."""
 60 |         e_x = T.exp(x - T.max(x, axis=1, keepdims=True))
 61 |         x_sm = e_x / T.sum(e_x, axis=1, keepdims=True)
 62 |         return x_sm
 63 | 
 64 |     def loss_func(self, y):
 65 |         """Return the multiclass logistic regression loss for y.
 66 | 
 67 |         The class labels in y are assumed to be in correspondence with the
 68 |         set of column indices for self.input_layer.linear_output.
 69 |         """
 70 |         row_idx = T.arange(y.shape[0])
 71 |         row_mask = T.neq(y, 0).reshape((y.shape[0], 1))
 72 |         p_y_given_x = self.safe_softmax_ss(self.input_layer.linear_output)
 73 |         wacky_mat = (p_y_given_x * row_mask) + (1. - row_mask)
 74 |         loss = -T.sum(T.log(wacky_mat[row_idx,y])) / T.sum(row_mask)
 75 |         return loss
 76 | 
 77 |     def errors(self, y):
 78 |         """Compute the number of wrong predictions by self.input_layer.
 79 | 
 80 |         Predicted class labels are computed as the indices of the columns of
 81 |         self.input_layer.linear_output which are maximal. Wrong predictions are
 82 |         those for which max indices do not match their corresponding y values.
 83 |         """
 84 |         # Compute class memberships predicted by self.input_layer
 85 |         y_pred = T.argmax(self.input_layer.linear_output[:,1:], axis=1)
 86 |         y_pred = y_pred + 1
 87 |         errs = 0
 88 |         # check if y has same dimension of y_pred
 89 |         if y.ndim != y_pred.ndim:
 90 |             raise TypeError('y should have the same shape as self.y_pred',
 91 |                 ('y', y.type, 'y_pred', y_pred.type))
 92 |         # check if y is of the correct datatype
 93 |         if y.dtype.startswith('int'):
 94 |             # the T.neq operator returns a vector of 0s and 1s, where 1
 95 |             # represents a mistake in prediction
 96 |             errs = T.sum(T.neq(y_pred, y) * T.neq(y, 0))
 97 |         else:
 98 |             raise NotImplementedError()
 99 |         return errs
100 | 
101 | class MCL2Hinge(object):
102 |     """Multi-class one-vs-all L2 hinge loss dangler."""
103 | 
104 |     def __init__(self, linear_layer):
105 |         """Dangle a squred hinge loss from the given linear layer.
106 | 
107 |         The given linear layer should be a HiddenLayer (or subclass) object,
108 |         for HiddenLayer as defined in LayerNet.py."""
109 |         self.input_layer = linear_layer
110 | 
111 |     def loss_func(self, y):
112 |         """Return the multiclass squared hinge loss for y.
113 | 
114 |         The class labels in y are assumed to be in correspondence with the
115 |         set of column indices for self.input_layer.linear_output.
116 |         """
117 |         y_hat = self.input_layer.linear_output
118 |         margin_pos = T.maximum(0.0, (1.0 - y_hat))
119 |         margin_neg = T.maximum(0.0, (1.0 + y_hat))
120 |         obs_idx = T.arange(y.shape[0])
121 |         loss_pos = T.sum(margin_pos[obs_idx,y]**2.0)
122 |         loss_neg = T.sum(margin_neg**2.0) - T.sum(margin_neg[obs_idx,y]**2.0)
123 |         loss = (loss_pos + loss_neg) / y.shape[0]
124 |         return loss
125 | 
126 |     def errors(self, y):
127 |         """Compute the number of wrong predictions by self.input_layer.
128 | 
129 |         Predicted class labels are computed as the indices of the columns of
130 |         self.input_layer.linear_output which are maximal. Wrong predictions are
131 |         those for which max indices do not match their corresponding y values.
132 |         """
133 |         # Compute class memberships predicted by self.input_layer
134 |         y_pred = T.argmax(self.input_layer.linear_output, axis=1)
135 |         errs = 0
136 |         # check if y has same dimension of y_pred
137 |         if y.ndim != y_pred.ndim:
138 |             raise TypeError('y should have the same shape as self.y_pred',
139 |                 ('y', y.type, 'y_pred', y_pred.type))
140 |         # check if y is of the correct datatype
141 |         if y.dtype.startswith('int'):
142 |             # the T.neq operator returns a vector of 0s and 1s, where 1
143 |             # represents a mistake in prediction
144 |             errs = T.sum(T.neq(y_pred, y))
145 |         else:
146 |             raise NotImplementedError()
147 |         return errs
148 | 
149 | class MCL2HingeSS(object):
150 |     """Multi-class one-vs-all L2 hinge loss dangler.
151 | 
152 |     For this loss, class index 0 is never penalized, and errors for inputs
153 |     with class index 0 are similarly ignored. This is for semi-supervised
154 |     training, constrained by Theano's programming model."""
155 | 
156 |     def __init__(self, linear_layer):
157 |         """Dangle a squred hinge loss from the given linear layer.
158 | 
159 |         The given linear layer should be a HiddenLayer (or subclass) object,
160 |         for HiddenLayer as defined in LayerNet.py."""
161 |         self.input_layer = linear_layer
162 | 
163 |     def loss_func(self, y):
164 |         """Return the multiclass squared hinge loss for y.
165 | 
166 |         The class labels in y are assumed to be in correspondence with the
167 |         set of column indices for self.input_layer.linear_output.
168 |         """
169 |         y_hat = self.input_layer.linear_output
170 |         row_idx = T.arange(y.shape[0])
171 |         row_mask = T.neq(y, 0).reshape((y_hat.shape[0], 1))
172 |         margin_pos = T.maximum(0.0, (1.0 - y_hat)) * row_mask
173 |         margin_neg = T.maximum(0.0, (1.0 + y_hat)) * row_mask
174 |         loss_pos = T.sum(margin_pos[row_idx,y]**2.0)
175 |         loss_neg = T.sum(margin_neg**2.0) - T.sum(margin_neg[row_idx,y]**2.0)
176 |         loss = (loss_pos + loss_neg) / T.sum(row_mask)
177 |         return loss
178 | 
179 |     def errors(self, y):
180 |         """Compute the number of wrong predictions by self.input_layer.
181 | 
182 |         Predicted class labels are computed as the indices of the columns of
183 |         self.input_layer.linear_output which are maximal. Wrong predictions are
184 |         those for which max indices do not match their corresponding y values.
185 |         """
186 |         # Compute class memberships predicted by self.input_layer
187 |         y_pred = T.argmax(self.input_layer.linear_output[:,1:], axis=1)
188 |         y_pred = y_pred + 1
189 |         errs = 0
190 |         # check if y has same dimension of y_pred
191 |         if y.ndim != y_pred.ndim:
192 |             raise TypeError('y should have the same shape as self.y_pred',
193 |                 ('y', y.type, 'y_pred', y_pred.type))
194 |         # check if y is of the correct datatype
195 |         if y.dtype.startswith('int'):
196 |             # the T.neq operator returns a vector of 0s and 1s, where 1
197 |             # represents a mistake in prediction
198 |             errs = T.sum(T.neq(y_pred, y) * T.neq(y, 0))
199 |         else:
200 |             raise NotImplementedError()
201 |         return errs
202 | 


--------------------------------------------------------------------------------
/generalized_ear/utils.py:
--------------------------------------------------------------------------------
  1 | """ This file contains different utility functions that are not connected
  2 | in anyway to the networks presented in the tutorials, but rather help in
  3 | processing the outputs into a more understandable way.
  4 | 
  5 | For example ``tile_raster_images`` helps in generating a easy to grasp
  6 | image from a set of samples or weights.
  7 | """
  8 | 
  9 | import numpy as np
 10 | import pylab as plt
 11 | import PIL as PIL
 12 | 
 13 | class batch(object):
 14 |     def __init__(self,batch_size):
 15 |         self.batch_size = batch_size
 16 | 
 17 |     def __call__(self,f):
 18 |         def wrapper(t,X):
 19 |             X = np.array(X)
 20 |             p = 0
 21 |             rem = 0
 22 |             results = []
 23 |             while p < len(X):
 24 |                 Z = X[p:p+self.batch_size]
 25 |                 if Z.shape[0] != self.batch_size:
 26 |                     zeros = np.zeros((self.batch_size-len(Z),X.shape[1]))
 27 |                     rem = len(Z)
 28 |                     Z = np.array(np.vstack((Z,zeros)),dtype=X.dtype)
 29 | 
 30 |                 temp_results = f(t,Z)
 31 |                 if rem != 0:
 32 |                     temp_results = temp_results[:rem]
 33 | 
 34 |                 results.extend(temp_results)
 35 |                 p += self.batch_size
 36 |             return np.array(results,dtype='float32')
 37 |         return wrapper
 38 | 
 39 | def scale_to_unit_interval(ndar, eps=1e-8):
 40 |     """ Scales all values in the ndarray ndar to be between 0 and 1 """
 41 |     ndar = ndar.copy()
 42 |     ndar -= ndar.min()
 43 |     ndar *= 1.0 / (ndar.max() + eps)
 44 |     return ndar
 45 | 
 46 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
 47 |                        scale_rows_to_unit_interval=True,
 48 |                        output_pixel_vals=True):
 49 |     """
 50 |     Transform an array with one flattened image per row, into an array in
 51 |     which images are reshaped and layed out like tiles on a floor.
 52 | 
 53 |     This function is useful for visualizing datasets whose rows are images,
 54 |     and also columns of matrices for transforming those rows
 55 |     (such as the first layer of a neural net).
 56 | 
 57 |     :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
 58 |     be 2-D ndarrays or None;
 59 |     :param X: a 2-D array in which every row is a flattened image.
 60 | 
 61 |     :type img_shape: tuple; (height, width)
 62 |     :param img_shape: the original shape of each image
 63 | 
 64 |     :type tile_shape: tuple; (rows, cols)
 65 |     :param tile_shape: the number of images to tile (rows, cols)
 66 | 
 67 |     :param output_pixel_vals: if output should be pixel values (i.e. int8
 68 |     values) or floats
 69 | 
 70 |     :param scale_rows_to_unit_interval: if the values need to be scaled before
 71 |     being plotted to [0,1] or not
 72 | 
 73 | 
 74 |     :returns: array suitable for viewing as an image.
 75 |     (See:`PIL.Image.fromarray`.)
 76 |     :rtype: a 2-d array with same dtype as X.
 77 | 
 78 |     """
 79 | 
 80 |     assert len(img_shape) == 2
 81 |     assert len(tile_shape) == 2
 82 |     assert len(tile_spacing) == 2
 83 | 
 84 |     # The expression below can be re-written in a more C style as
 85 |     # follows :
 86 |     #
 87 |     # out_shape    = [0,0]
 88 |     # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
 89 |     #                tile_spacing[0]
 90 |     # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
 91 |     #                tile_spacing[1]
 92 |     out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
 93 |                         in zip(img_shape, tile_shape, tile_spacing)]
 94 | 
 95 |     if isinstance(X, tuple):
 96 |         assert len(X) == 4
 97 |         # Create an output numpy ndarray to store the image
 98 |         if output_pixel_vals:
 99 |             out_array = np.zeros((out_shape[0], out_shape[1], 4),
100 |                                     dtype='uint8')
101 |         else:
102 |             out_array = np.zeros((out_shape[0], out_shape[1], 4),
103 |                                     dtype=X.dtype)
104 | 
105 |         #colors default to 0, alpha defaults to 1 (opaque)
106 |         if output_pixel_vals:
107 |             channel_defaults = [0, 0, 0, 255]
108 |         else:
109 |             channel_defaults = [0., 0., 0., 1.]
110 | 
111 |         for i in xrange(4):
112 |             if X[i] is None:
113 |                 # if channel is None, fill it with zeros of the correct
114 |                 # dtype
115 |                 dt = out_array.dtype
116 |                 if output_pixel_vals:
117 |                     dt = 'uint8'
118 |                 out_array[:, :, i] = np.zeros(out_shape,
119 |                         dtype=dt) + channel_defaults[i]
120 |             else:
121 |                 # use a recurrent call to compute the channel and store it
122 |                 # in the output
123 |                 out_array[:, :, i] = tile_raster_images(
124 |                     X[i], img_shape, tile_shape, tile_spacing,
125 |                     scale_rows_to_unit_interval, output_pixel_vals)
126 |         return out_array
127 |     else:
128 |         # if we are dealing with only one channel
129 |         H, W = img_shape
130 |         Hs, Ws = tile_spacing
131 |         # generate a matrix to store the output
132 |         dt = X.dtype
133 |         if output_pixel_vals:
134 |             dt = 'uint8'
135 |         out_array = np.zeros(out_shape, dtype=dt)
136 |         for tile_row in xrange(tile_shape[0]):
137 |             for tile_col in xrange(tile_shape[1]):
138 |                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
139 |                     this_x = X[tile_row * tile_shape[1] + tile_col]
140 |                     if scale_rows_to_unit_interval:
141 |                         # if we should scale values to be between 0 and 1
142 |                         # do this by calling the `scale_to_unit_interval`
143 |                         # function
144 |                         this_img = scale_to_unit_interval(
145 |                             this_x.reshape(img_shape))
146 |                     else:
147 |                         this_img = this_x.reshape(img_shape)
148 |                     # add the slice to the corresponding position in the
149 |                     # output array
150 |                     c = 1
151 |                     if output_pixel_vals:
152 |                         c = 255
153 |                     out_array[
154 |                         tile_row * (H + Hs): tile_row * (H + Hs) + H,
155 |                         tile_col * (W + Ws): tile_col * (W + Ws) + W
156 |                         ] = this_img * c
157 |         return out_array
158 | 
159 | 
160 | def plot_histograms(firings):
161 |     N = int(np.ceil(np.sqrt(firings.shape[1])))
162 |     plt.figure(figsize=(N,N))
163 |     axisNum = 0
164 |     for row in range(N):
165 |         for col in range(N):
166 |             axisNum += 1
167 |             ax = plt.subplot(N, N, axisNum)
168 |             ax.set_xticklabels([])
169 |             ax.set_yticklabels([])
170 |             plt.hist(firings[:,row*N+col],bins=50)
171 |     plt.show()
172 |     return
173 | 
174 | def visualize(EN, proto_key, layer_num, file_name):
175 |     W = EN.proto_nets[proto_key][layer_num].W.get_value(borrow=True).T
176 |     size = int(np.sqrt(W.shape[1]))
177 |     # hist(W.flatten(),bins=50)
178 |     image = PIL.Image.fromarray(tile_raster_images(X=W, \
179 |             img_shape=(size, size), tile_shape=(10,W.shape[0]/10),tile_spacing=(1, 1)))
180 |     image.save(file_name)
181 |     return
182 | 
183 | def visualize_samples(X_samp, file_name):
184 |     d = int(np.sqrt(X_samp.shape[1]))
185 |     # hist(W.flatten(),bins=50)
186 |     image = PIL.Image.fromarray(tile_raster_images(X=X_samp, img_shape=(d, d), \
187 |             tile_shape=(10,X_samp.shape[0]/10),tile_spacing=(1, 1)))
188 |     image.save(file_name)
189 |     return
190 | 
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/generative_models/DKCode.py:
--------------------------------------------------------------------------------
  1 | ###################################################################
  2 | # Code adapted from Durk Kingma's Github repository: "nips14-ssl" #
  3 | ###################################################################
  4 | 
  5 | from collections import OrderedDict
  6 | import numpy as np
  7 | import theano as theano
  8 | import theano.tensor as T
  9 | from theano.ifelse import ifelse
 10 | 
 11 | # Pre-processing routines
 12 | 
 13 | def PCA_theano(x_in, cutoff=0.99, global_sd=True):
 14 |     """
 15 |     Given input matrix x_in in numpy form, compute transform functions for
 16 |     reducing the dimensionality of inputs. Make the transform functions and
 17 |     all their parameters based around theano shared variables, for GPU use.
 18 |     """
 19 |     x_center = x_in.mean(axis=0)
 20 |     x = x_in - x_center
 21 |     if not global_sd:
 22 |         x_sd = x.std(axis=0) + 1e-5
 23 |     else:
 24 |         x_sd = x.std() + 1e-5
 25 |     # normalize to either unit standard deviation "globally" or
 26 |     # per-feature
 27 |     x = x / x_sd
 28 |     # compute covariance matrix and its eigen-decomposition
 29 |     print "Performing eigen-decomposition for PCA..."
 30 |     x_cov = np.dot(x.T, x) / x.shape[0]
 31 |     eigval, eigvec = np.linalg.eig(x_cov)
 32 |     #
 33 |     #eigval = np.ones(eigval.shape)
 34 |     #
 35 |     print "Done."
 36 |     if cutoff <= 1:
 37 |         # pick the number of dimensions to keep based on recovered variance
 38 |         n_used = ((eigval.cumsum() / eigval.sum()) < cutoff).sum()
 39 |         print 'PCA cutoff:', cutoff, 'n_used:', n_used
 40 |     else:
 41 |         # pick the number of dimensions to keep by user-provided value
 42 |         n_used = int(cutoff)
 43 |     eigval = eigval[:n_used].reshape((n_used,))
 44 |     eigvec = eigvec[:,:n_used]
 45 |     # construct functions for applying PCA
 46 |     f_enc, f_dec, pca_shared_params = \
 47 |             PCA_encdec_theano(eigvec, eigval, x_center, x_sd)
 48 |     pca_shared_params['pca_dim'] = n_used
 49 |     return f_enc, f_dec, pca_shared_params
 50 |         
 51 | def PCA_encdec_theano(eigvec, eigval, x_mean, x_sd):
 52 |     """
 53 |     Construct PCA encoder/decoder functions based around Theano shared
 54 |     variables. Return the function handles and a dict containing the relevant
 55 |     shared variables (well, symbolic references to them, at least).
 56 |     """
 57 |     # construct the shared variables to use in the encoder/decoder functions
 58 |     fx = theano.config.floatX
 59 |     eigval_shared = theano.shared(value=eigval.astype(fx), name='eigval')
 60 |     eigvec_shared = theano.shared(value=eigvec.astype(fx), name='eigvec')
 61 |     x_mean_shared = theano.shared(value=x_mean.astype(fx), name='x_mean')
 62 |     x_sd_shared = theano.shared(value=x_sd.astype(fx), name='x_sd')
 63 |     pca_shared_params = {'eigval': eigval_shared, 'eigvec': eigvec_shared, \
 64 |             'x_mean':x_mean_shared, 'x_sd':x_sd_shared}
 65 |     # construct the encoder/decoder functions using the shared variables
 66 |     def f_enc( x ):
 67 |         x_sands = (x - x_mean_shared) / x_sd_shared
 68 |         result = T.dot(x_sands, eigvec_shared) / T.sqrt(eigval_shared)
 69 |         return result
 70 |     def f_dec( x ):
 71 |         result = (T.dot((x * T.sqrt(eigval_shared)), eigvec_shared.T) * \
 72 |                 x_sd_shared) + x_mean_shared
 73 |         return result
 74 |     return f_enc, f_dec, pca_shared_params
 75 | 
 76 | 
 77 | def norm_clip(dW, max_l2_norm=10.0):
 78 |     """
 79 |     Clip theano symbolic var dW to have some max l2 norm.
 80 |     """
 81 |     dW_l2_norm = T.sqrt(T.sum(dW**2.0))
 82 |     norm_ratio = (max_l2_norm / dW_l2_norm)
 83 |     clip_factor = ifelse(T.lt(norm_ratio, 1.0), norm_ratio, 1.0)
 84 |     dW_clipped = dW * clip_factor
 85 |     return dW_clipped
 86 | 
 87 | def get_adam_updates(params=None, grads=None, \
 88 |         alpha=None, beta1=None, beta2=None, \
 89 |         mom2_init=1e-3, smoothing=1e-6, max_grad_norm=10000.0):
 90 |     """
 91 |     Get the Theano updates to perform ADAM optimization of the shared-var
 92 |     parameters in params, given the shaared-var gradients in grads.
 93 | 
 94 |     params should be an iterable containing "keyable" values, grads should be
 95 |     a dict containing the grads for all values in params, and the remaining
 96 |     arguments should be theano shared variable arrays.
 97 |     """
 98 | 
 99 |     # make an OrderedDict to hold the updates
100 |     updates = OrderedDict()
101 |     
102 |     for p in params:
103 |         # initialize update the iteration counter
104 |         zero_ary = np.zeros((1,)).astype(theano.config.floatX)
105 |         it_count = theano.shared(value=zero_ary)
106 |         it_count_new = it_count + 1.
107 | 
108 |         # apply a bias correction factor to the learning rate
109 |         fix1 = 1. - beta1[0]**(it_count[0] + 1.)
110 |         fix2 = 1. - beta2[0]**(it_count[0] + 1.)
111 |         lr_t = alpha[0] * (T.sqrt(fix2) / fix1)
112 | 
113 |         # get gradient for parameter p
114 |         grad_p = norm_clip(grads[p], max_grad_norm)
115 | 
116 |         # mean_squared_grad := E[g^2]_{t-1}
117 |         mom1_ary = 0.0 * p.get_value(borrow=False)
118 |         mom2_ary = (0.0 * p.get_value(borrow=False)) + mom2_init
119 |         mom1 = theano.shared(mom1_ary)
120 |         mom2 = theano.shared(mom2_ary)
121 |         
122 |         # update moments
123 |         mom1_new = (beta1[0] * mom1) + ((1. - beta1[0]) * grad_p)
124 |         mom2_new = (beta2[0] * mom2) + ((1. - beta2[0]) * T.sqr(grad_p))
125 |         
126 |         # compute the effective gradient
127 |         effgrad = mom1_new / (T.sqrt(mom2_new) + smoothing)
128 |         
129 |         # do update
130 |         p_new = p - (lr_t * effgrad)
131 |             
132 |         # apply updates
133 |         updates[p] = p_new
134 |         updates[mom1] = mom1_new
135 |         updates[mom2] = mom2_new
136 |         updates[it_count] = it_count_new
137 |         
138 |     return updates
139 | 
140 | def get_adadelta_updates(params=None, grads=None, \
141 |         alpha=None, beta1=None, max_grad_norm=10000.0):
142 |     """
143 |     Get the Theano updates to perform AdaDelta optimization of the shared-var
144 |     parameters in params, given the shaared-var gradients in grads.
145 | 
146 |     params should be an iterable containing "keyable" values, grads should be
147 |     a dict containing the grads for all values in params, and the remaining
148 |     arguments should be theano shared variable arrays.
149 |     """
150 | 
151 |     # make an OrderedDict to hold the updates
152 |     updates = OrderedDict()
153 |     lr_t = alpha[0]
154 |     
155 |     for p in params:
156 |         # get gradient for parameter p
157 |         grad_p = norm_clip(grads[p], max_grad_norm)
158 | 
159 |         # initialize squared gradient accumulator
160 |         mom_ary = (0.0 * p.get_value(borrow=False)) + 1.0
161 |         mom1 = theano.shared(mom_ary)
162 |         
163 |         # update moments
164 |         mom1_new = (beta1[0] * mom1) + ((1. - beta1[0]) * T.sqr(grad_p))
165 |         
166 |         # compute the effective gradient
167 |         effgrad = grad_p / (T.sqrt(mom1_new) + 1e-6)
168 |         
169 |         # do update
170 |         p_new = p - (lr_t * clipped_grad)
171 |             
172 |         # apply update
173 |         updates[p] = p_new
174 |         updates[mom1] = mom1_new
175 |         
176 |     return updates


--------------------------------------------------------------------------------
/generative_models/HelperFuncs.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import utils as utils
  3 | import numpy as np
  4 | import numpy.random as npr
  5 | import theano
  6 | import theano.tensor as T
  7 | 
  8 | ##################################
  9 | # MISCELLANEOUS HELPER FUNCTIONS #
 10 | ##################################
 11 | 
 12 | def DCG(x):
 13 |     x_dcg = theano.gradient.disconnected_grad(x)
 14 |     return x_dcg
 15 | 
 16 | def constFX(x):
 17 |     """Cast x as constant TensorVariable with dtype floatX."""
 18 |     x_CFX = T.constant(x, dtype=theano.config.floatX)
 19 |     return x_CFX
 20 | 
 21 | def to_fX(np_ary):
 22 |     np_ary_fX = np_ary.astype(theano.config.floatX)
 23 |     return np_ary_fX
 24 | 
 25 | def posterior_klds(IN, Xtr, batch_size, batch_count):
 26 |     """
 27 |     Get posterior KLd cost for some inputs from Xtr.
 28 |     """
 29 |     post_klds = []
 30 |     for i in range(batch_count):
 31 |         batch_idx = npr.randint(low=0, high=Xtr.shape[0], size=(batch_size,))
 32 |         X = Xtr.take(batch_idx, axis=0)
 33 |         post_klds.extend([k for k in IN.kld_func(X)])
 34 |     return post_klds
 35 | 
 36 | def row_shuffle(X, Y=None):
 37 |     """
 38 |     Return a copy of X with shuffled rows.
 39 |     """
 40 |     shuf_idx = np.arange(X.shape[0])
 41 |     npr.shuffle(shuf_idx)
 42 |     X_shuf = X[shuf_idx]
 43 |     if Y is None:
 44 |         result = X_shuf
 45 |     else:
 46 |         Y_shuf = Y[shuf_idx]
 47 |         result = [X_shuf, Y_shuf]
 48 |     return result
 49 | 
 50 | #####################################
 51 | # HELPER FUNCTIONS FOR DATA MASKING #
 52 | #####################################
 53 | 
 54 | def apply_mask(Xd=None, Xc=None, Xm=None):
 55 |     """
 56 |     Apply a mask, like in the old days.
 57 |     """
 58 |     X_masked = ((1.0 - Xm) * Xd) + (Xm * Xc)
 59 |     return X_masked
 60 | 
 61 | def binarize_data(X):
 62 |     """
 63 |     Make a sample of bernoulli variables with probabilities given by X.
 64 |     """
 65 |     X_shape = X.shape
 66 |     probs = npr.rand(*X_shape)
 67 |     X_binary = 1.0 * (probs < X)
 68 |     return X_binary.astype(theano.config.floatX)
 69 | 
 70 | def sample_masks(X, drop_prob=0.3):
 71 |     """
 72 |     Sample a binary mask to apply to the matrix X, with rate mask_prob.
 73 |     """
 74 |     probs = npr.rand(*X.shape)
 75 |     mask = 1.0 * (probs > drop_prob)
 76 |     return mask.astype(theano.config.floatX)
 77 | 
 78 | def sample_patch_masks(X, im_shape, patch_shape):
 79 |     """
 80 |     Sample a random patch mask for each image in X.
 81 |     """
 82 |     obs_count = X.shape[0]
 83 |     rs = patch_shape[0]
 84 |     cs = patch_shape[1]
 85 |     off_row = npr.randint(1,high=(im_shape[0]-rs-1), size=(obs_count,))
 86 |     off_col = npr.randint(1,high=(im_shape[1]-cs-1), size=(obs_count,))
 87 |     dummy = np.zeros(im_shape)
 88 |     mask = np.zeros(X.shape)
 89 |     for i in range(obs_count):
 90 |         dummy = (0.0 * dummy) + 1.0
 91 |         dummy[off_row[i]:(off_row[i]+rs), off_col[i]:(off_col[i]+cs)] = 0.0
 92 |         mask[i,:] = dummy.ravel()
 93 |     return mask.astype(theano.config.floatX)
 94 | 
 95 | def collect_obs_costs(batch_costs, batch_reps):
 96 |     """
 97 |     Collect per-observation costs from a cost vector containing the cost for
 98 |     multiple repetitions of each observation.
 99 |     """
100 |     obs_count = int(batch_costs.shape[0] / batch_reps)
101 |     obs_costs = np.zeros((obs_count,))
102 |     obs_idx = -1
103 |     for i in range(batch_costs.shape[0]):
104 |         if ((i % batch_reps) == 0):
105 |             obs_idx = obs_idx + 1
106 |         obs_costs[obs_idx] = obs_costs[obs_idx] + batch_costs[i]
107 |     obs_costs = obs_costs / batch_reps
108 |     return obs_costs
109 | 
110 | def construct_masked_data(xi, \
111 |                           drop_prob=0.0, \
112 |                           occ_dim=None, \
113 |                           data_mean=None):
114 |     """
115 |     Construct randomly masked data from xi.
116 |     """
117 |     if data_mean is None:
118 |         data_mean = np.zeros((xi.shape[1],))
119 |     im_dim = int(xi.shape[1]**0.5) # images should be square
120 |     xo = xi.copy()
121 |     if drop_prob > 0.0:
122 |         # apply fully-random occlusion
123 |         xm_rand = sample_masks(xi, drop_prob=drop_prob)
124 |     else:
125 |         # don't apply fully-random occlusion
126 |         xm_rand = np.ones(xi.shape)
127 |     if occ_dim is None:
128 |         # don't apply rectangular occlusion
129 |         xm_patch = np.ones(xi.shape)
130 |     else:
131 |         # apply rectangular occlusion
132 |         xm_patch = sample_patch_masks(xi, (im_dim,im_dim), (occ_dim,occ_dim))
133 |     xm = xm_rand * xm_patch
134 |     xi = (xm * xi) + ((1.0 - xm) * data_mean)
135 |     xi = to_fX(xi)
136 |     xo = to_fX(xo)
137 |     xm = to_fX(xm)
138 |     return xi, xo, xm
139 | 
140 | def shift_and_scale_into_01(X):
141 |     X = X - np.min(X, axis=1, keepdims=True)
142 |     X = X / np.max(X, axis=1, keepdims=True)
143 |     return X


--------------------------------------------------------------------------------
/generative_models/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2014 Philip Bachman
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 
21 | 
22 | ********************************************************************************
23 | * The copyright notice below comes from code which has been _heavily_ modified *
24 | * in the production of the code in this directory.                             *
25 | ********************************************************************************
26 | 
27 | 
28 | Copyright (C) 2012 Misha Denil
29 | 
30 | Permission is hereby granted, free of charge, to any person obtaining a copy of
31 | this software and associated documentation files (the "Software"), to deal in
32 | the Software without restriction, including without limitation the rights to
33 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
34 | of the Software, and to permit persons to whom the Software is furnished to do
35 | so, subject to the following conditions:
36 | 
37 | The above copyright notice and this permission notice shall be included in all
38 | copies or substantial portions of the Software.
39 | 
40 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
43 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
44 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
45 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
46 | SOFTWARE.
47 | 
48 | 


--------------------------------------------------------------------------------
/generative_models/LogPDFs.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import theano
  3 | import theano.tensor as T
  4 | 
  5 | # library with theano PDF functions
  6 | PI = np.pi
  7 | C = -0.5 * np.log(2*PI)
  8 | 
  9 | def normal(x, mean, sd):
 10 | 	return C - T.log(T.abs_(sd)) - ((x - mean)**2 / (2 * sd**2))
 11 | 
 12 | def normal2(x, mean, logvar):
 13 | 	return C - logvar/2 - (x - mean)**2 / (2 * T.exp(logvar))
 14 | 
 15 | def laplace(x, mean, logvar):
 16 |     sd = T.exp(0.5 * logvar)
 17 |     return -(abs(x - mean) / sd) - (0.5 * logvar) - np.log(2)
 18 |     
 19 | def standard_normal(x):
 20 | 	return C - (x**2 / 2)
 21 | 
 22 | # Centered laplace with unit scale (b=1)
 23 | def standard_laplace(x):
 24 | 	return np.log(0.5) - T.abs_(x)
 25 | 
 26 | # Centered student-t distribution
 27 | # v>0 is degrees of freedom
 28 | # See: http://en.wikipedia.org/wiki/Student's_t-distribution
 29 | def studentt(x, v):
 30 | 	gamma1 = log_gamma_lanczos((v+1)/2.)
 31 | 	gamma2 = log_gamma_lanczos(0.5*v)
 32 | 	return gamma1 - 0.5 * T.log(v * PI) - gamma2 - (v+1)/2. * T.log(1 + (x*x)/v)
 33 | 
 34 | ################################################################
 35 | # Funcs for temporary backwards compatibilit while refactoring #
 36 | ################################################################
 37 | 
 38 | def log_prob_bernoulli(p_true, p_approx, mask=None):
 39 |     """
 40 |     Compute log probability of some binary variables with probabilities
 41 |     given by p_true, for probability estimates given by p_approx. We'll
 42 |     compute joint log probabilities over row-wise groups. (Theano version).
 43 |     """
 44 |     if mask is None:
 45 |         mask = T.ones((1, p_approx.shape[1]))
 46 |     log_prob_1 = p_true * T.log(p_approx+1e-6)
 47 |     log_prob_0 = (1.0 - p_true) * T.log((1.0 - p_approx)+1e-6)
 48 |     log_prob_01 = log_prob_1 + log_prob_0
 49 |     row_log_probs = T.sum((log_prob_01 * mask), axis=1, keepdims=True)
 50 |     return row_log_probs
 51 | 
 52 | def log_prob_bernoulli_np(p_true, p_approx, mask=None):
 53 |     """
 54 |     Compute log probability of some binary variables with probabilities
 55 |     given by p_true, for probability estimates given by p_approx. We'll
 56 |     compute joint log probabilities over row-wise groups. (Numpy version).
 57 |     """
 58 |     if mask is None:
 59 |         mask = np.ones((1, p_approx.shape[1]))
 60 |     log_prob_1 = p_true * np.log(p_approx+1e-6)
 61 |     log_prob_0 = (1.0 - p_true) * np.log((1.0 - p_approx)+1e-6)
 62 |     log_prob_01 = log_prob_1 + log_prob_0
 63 |     row_log_probs = np.sum((log_prob_01 * mask), axis=1)
 64 |     return row_log_probs
 65 | 
 66 | def log_prob_gaussian(mu_true, mu_approx, les_sigmas=1.0, mask=None):
 67 |     """
 68 |     Compute log probability of some continuous variables with values given
 69 |     by mu_true, w.r.t. gaussian distributions with means given by mu_approx
 70 |     and standard deviations given by les_sigmas.
 71 |     """
 72 |     if mask is None:
 73 |         mask = T.ones((1, mu_approx.shape[1]))
 74 |     ind_log_probs = C - T.log(T.abs_(les_sigmas)) - \
 75 |             ((mu_true - mu_approx)**2.0 / (2.0 * les_sigmas**2.0))
 76 |     row_log_probs = T.sum((ind_log_probs * mask), axis=1, keepdims=True)
 77 |     return row_log_probs
 78 | 
 79 | def log_prob_gaussian2(mu_true, mu_approx, log_vars=1.0, mask=None):
 80 |     """
 81 |     Compute log probability of some continuous variables with values given
 82 |     by mu_true, w.r.t. gaussian distributions with means given by mu_approx
 83 |     and log variances given by les_logvars.
 84 |     """
 85 |     if mask is None:
 86 |         mask = T.ones((1, mu_approx.shape[1]))
 87 |     ind_log_probs = C - (0.5 * log_vars)  - \
 88 |             ((mu_true - mu_approx)**2.0 / (2.0 * T.exp(log_vars)))
 89 |     row_log_probs = T.sum((ind_log_probs * mask), axis=1, keepdims=True)
 90 |     return T.cast(row_log_probs, 'floatX')
 91 | 
 92 | def gaussian_kld(mu_left, logvar_left, mu_right, logvar_right):
 93 |     """
 94 |     Compute KL divergence between a bunch of univariate Gaussian distributions
 95 |     with the given means and log-variances.
 96 |     We do KL(N(mu_left, logvar_left) || N(mu_right, logvar_right)).
 97 |     """
 98 |     gauss_klds = 0.5 * (logvar_right - logvar_left + \
 99 |             (T.exp(logvar_left) / T.exp(logvar_right)) + \
100 |             ((mu_left - mu_right)**2.0 / T.exp(logvar_right)) - 1.0)
101 |     return gauss_klds
102 | 
103 | def gaussian_kld_BN(logvar_left, logvar_right):
104 |     """
105 |     Compute KL divergence between a bunch of univariate Gaussian distributions
106 |     with the given means and log-variances.
107 |     We do KL(N(mu_left, logvar_left) || N(mu_right, logvar_right)).
108 |     """
109 |     gauss_klds = 0.5 * (logvar_right - logvar_left + \
110 |             (T.exp(logvar_left) / T.exp(logvar_right)) + \
111 |             (1.0 / T.exp(logvar_right)) - 1.0)
112 |     return gauss_klds
113 | 
114 | #################################
115 | # Log-gamma function for theano #
116 | #################################
117 | LOG_PI = np.log(PI)
118 | LOG_SQRT_2PI = np.log(np.sqrt(2*PI))
119 | def log_gamma_lanczos(z):
120 |     # reflection formula. Normally only used for negative arguments,
121 |     # but here it's also used for 0 < z < 0.5 to improve accuracy in this region.
122 |     flip_z = 1 - z
123 |     # because both paths are always executed (reflected and non-reflected),
124 |     # the reflection formula causes trouble when the input argument is larger than one.
125 |     # Note that for any z > 1, flip_z < 0.
126 |     # To prevent these problems, we simply set all flip_z < 0 to a 'dummy' value.
127 |     # This is not a problem, since these computations are useless anyway and
128 |     # are discarded by the T.switch at the end of the function.
129 |     flip_z = T.switch(flip_z < 0, 1, flip_z)
130 |     small = LOG_PI - T.log(T.sin(PI * z)) - log_gamma_lanczos_sub(flip_z)
131 |     big = log_gamma_lanczos_sub(z)
132 |     return T.switch(z < 0.5, small, big)
133 |    
134 | ## version that isn't vectorised, since g is small anyway
135 | def log_gamma_lanczos_sub(z): #expanded version
136 |     # Coefficients used by the GNU Scientific Library
137 |     g = 7
138 |     p = np.array([0.99999999999980993, 676.5203681218851, -1259.1392167224028,
139 |                   771.32342877765313, -176.61502916214059, 12.507343278686905,
140 |                   -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7])                    
141 |     z = z - 1
142 |     x = p[0]
143 |     for i in range(1, g+2):
144 |         x += p[i]/(z+i)
145 |     t = z + g + 0.5
146 |     return LOG_SQRT_2PI + (z + 0.5) * T.log(t) - t + T.log(x)
147 | 
148 | ############################
149 | # PARZEN DENSITY ESTIMATOR #
150 | ############################
151 | import time
152 | import gc
153 | 
154 | def get_nll(x, parzen, batch_size=100):
155 |     """
156 |     Credit: Yann N. Dauphin
157 |     """
158 | 
159 |     inds = range(x.shape[0])
160 |     n_batches = int(np.ceil(float(len(inds)) / batch_size))
161 | 
162 |     times = []
163 |     nlls = []
164 |     for i in range(n_batches):
165 |         begin = time.time()
166 |         nll = parzen(x[inds[i::n_batches]])
167 |         end = time.time()
168 |         times.append(end-begin)
169 |         nlls.extend(nll)
170 |         if i % 10 == 0:
171 |             print i, np.mean(times), np.mean(nlls)
172 |     return np.array(nlls)
173 | 
174 | 
175 | def log_mean_exp(a):
176 |     """
177 |     Credit: Yann N. Dauphin
178 |     """
179 |     max_ = a.max(1)
180 |     result = max_ + T.log(T.exp(a - max_.dimshuffle(0, 'x')).mean(1))
181 |     return result 
182 | 
183 | def theano_parzen(mu, sigma):
184 |     """
185 |     Credit: Yann N. Dauphin
186 |     """
187 |     x = T.matrix()
188 |     mu = theano.shared(mu)
189 |     a = ( x.dimshuffle(0, 'x', 1) - mu.dimshuffle('x', 0, 1) ) / sigma
190 |     E = log_mean_exp(-0.5*(a**2).sum(2))
191 |     Z = mu.shape[1] * T.log(sigma * np.sqrt(np.pi * 2))
192 |     parzen_func = theano.function([x], E - Z)
193 |     return parzen_func
194 | 
195 | def cross_validate_sigma(samples, data, sigmas, batch_size):
196 |     """
197 |     Find which sigma is best for the Parzen estimator bound.
198 |     """
199 |     lls = []
200 |     best_ll = -1e6
201 |     best_lls = None
202 |     best_sigma = None
203 |     for sigma in sigmas:
204 |         print sigma
205 |         parzen = theano_parzen(samples, sigma)
206 |         tmp = get_nll(data, parzen, batch_size=batch_size)
207 |         sigma_lls = np.asarray(tmp)
208 |         mean_ll = sigma_lls.mean()
209 |         lls.append(mean_ll)
210 |         if (mean_ll > best_ll):
211 |             best_ll = mean_ll
212 |             best_lls = sigma_lls
213 |             best_sigma = sigma
214 |         del parzen
215 |         gc.collect()
216 |     return [best_sigma, best_ll, best_lls]


--------------------------------------------------------------------------------
/generative_models/MCSampler.py:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | # CODE FOR EFFICIENTLY SAMPLING A (SMALL) FIXED-LENGTH VAE CHAIN #
  3 | ##################################################################
  4 | 
  5 | # basic python
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | from collections import OrderedDict
  9 | 
 10 | # theano business
 11 | import theano
 12 | import theano.tensor as T
 13 | #from theano.tensor.shared_randomstreams import RandomStreams as RandStream
 14 | from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams as RandStream
 15 | 
 16 | # phil's sweetness
 17 | from GIPair import GIPair
 18 | from NetLayers import apply_mask
 19 | 
 20 | 
 21 | class MCSampler(object):
 22 |     """
 23 |     Class for quickly sampling some small fixed number of steps from the
 24 |     Markov chain constructed by self-looping a variational auto-encoder.
 25 | 
 26 |     Parameters:
 27 |         rng: numpy.random.RandomState (for reproducibility)
 28 |         Xd: symbolic var for providing points for starting the Markov Chain
 29 |         i_net: The InfNet instance that will serve as the inferencer
 30 |         g_net: The GenNet instance that will serve as the generator
 31 |         d_net: The PeaNet instance that will serve as the discriminator
 32 |         chain_len: number of steps to unroll the VAE Markov Chain
 33 |         data_dim: dimension of the generated data
 34 |         prior_dim: dimension of the model prior
 35 |     """
 36 |     def __init__(self, rng=None, Xd=None, \
 37 |                  i_net=None, g_net=None, chain_len=None, \
 38 |                  data_dim=None, prior_dim=None):
 39 |         # Do some stuff!
 40 |         self.rng = RandStream(rng.randint(100000))
 41 |         self.data_dim = data_dim
 42 |         self.prior_dim = prior_dim
 43 | 
 44 |         # symbolic var for inputting samples for initializing the VAE chain
 45 |         self.Xd = Xd
 46 |         # symbolic var for masking subsets of the state variables
 47 |         self.Xm = T.zeros_like(self.Xd)
 48 |         # symbolic var for controlling subsets of the state variables
 49 |         self.Xc = T.zeros_like(self.Xd)
 50 |         # integer number of times to cycle the VAE loop
 51 |         self.chain_len = chain_len
 52 | 
 53 |         # get a clone of the desired VAE, for easy access
 54 |         self.GIP = GIPair(rng=rng, Xd=self.Xd, Xc=self.Xc, Xm=self.Xm, \
 55 |                 g_net=g_net, i_net=i_net, data_dim=self.data_dim, \
 56 |                 prior_dim=self.prior_dim, params=None, shared_param_dicts=None)
 57 |         self.IN = self.GIP.IN
 58 |         self.GN = self.GIP.GN
 59 |         self.use_encoder = self.IN.use_encoder
 60 |         assert(self.use_encoder == self.GN.use_decoder)
 61 |         # self-loop some clones of the main VAE into a chain.
 62 |         # ** All VAEs in the chain share the same Xc and Xm, which are the
 63 |         #    symbolic inputs for providing the observed portion of the input
 64 |         #    and a mask indicating which part of the input is "observed".
 65 |         #    These inputs are used for training "reconstruction" policies.
 66 |         self.IN_chain = []
 67 |         self.GN_chain = []
 68 |         self.Xg_chain = []
 69 |         _Xd = self.Xd
 70 |         for i in range(self.chain_len):
 71 |             if (i == 0):
 72 |                 # start the chain with data provided by used
 73 |                 _IN = self.IN.shared_param_clone(rng=rng, \
 74 |                         Xd=apply_mask(Xd=_Xd, Xc=self.Xc, Xm=self.Xm))
 75 |                 _GN = self.GN.shared_param_clone(rng=rng, Xp=_IN.output)
 76 |             else:
 77 |                 # continue the chain with samples from previous VAE
 78 |                 _IN = self.IN.shared_param_clone(rng=rng, \
 79 |                         Xd=apply_mask(Xd=_Xd, Xc=self.Xc, Xm=self.Xm))
 80 |                 _GN = self.GN.shared_param_clone(rng=rng, Xp=_IN.output)
 81 |             if self.use_encoder:
 82 |                 # use the "decoded" output of the previous generator as input
 83 |                 # to the next inferencer, which will re-encode it prior to
 84 |                 # inference
 85 |                 _Xd = _GN.output_decoded
 86 |             else:
 87 |                 # use the "encoded" output of the previous generator as input
 88 |                 # to the next inferencer, as the inferencer won't try to 
 89 |                 # re-encode it prior to inference
 90 |                 _Xd = _GN.output
 91 |             self.IN_chain.append(_IN)
 92 |             self.GN_chain.append(_GN)
 93 |             self.Xg_chain.append(_Xd)
 94 | 
 95 |         # construct the function for training on training data
 96 |         self.sample_from_chain = self._construct_sample_from_chain()
 97 |         return
 98 | 
 99 |     def _construct_sample_from_chain(self):
100 |         """
101 |         Sample for several steps of a self-looped VAE.
102 |         """
103 |         outputs = [Xg for Xg in self.Xg_chain]
104 |         sample_func = theano.function([self.Xd], outputs=outputs)
105 |         return sample_func
106 | 
107 | def resample_chain_steps(MCS, Xtr_chains):
108 |     # get and set some basic dataset information
109 |     assert(len(Xtr_chains) == (MCS.chain_len + 1))
110 |     Xtr = Xtr_chains[0]
111 |     for Xc in Xtr_chains:
112 |         assert(Xc.shape[0] == Xtr.shape[0])
113 |         assert(Xc.shape[1] == Xtr.shape[1])
114 |     tr_samples = Xtr.shape[0]
115 |     data_dim = Xtr.shape[1]
116 |     batch_size = 5000
117 |     batch_count = int(np.ceil(tr_samples / float(batch_size)))
118 |     # print("Resampling {0:d} batches of {1:d} chains with {2:d} steps...".format(batch_count, batch_size, MCS.chain_len))
119 |     for i in range(batch_count):
120 |         batch_start = i * batch_size
121 |         batch_end = min(tr_samples, (batch_start + batch_size))
122 |         batch_Xd = Xtr[batch_start:batch_end]
123 |         batch_chains = MCS.sample_from_chain(batch_Xd)
124 |         for j in range(len(batch_chains)):
125 |             Xtr_chains[j+1][batch_start:batch_end] = batch_chains[j]
126 |     return Xtr_chains
127 | 
128 | 
129 | if __name__=="__main__":
130 |     import utils
131 |     import time
132 |     from load_data import load_udm
133 |     import InfNet as INet
134 |     import GenNet as GNet
135 |     # Initialize a source of randomness
136 |     rng = npr.RandomState(12345)
137 |     # Load some data to train/validate/test with
138 |     dataset = 'data/mnist.pkl.gz'
139 |     datasets = load_udm(dataset, zero_mean=False)
140 |     Xtr = datasets[0][0]
141 |     Xtr = Xtr.get_value(borrow=False)
142 |     Xva = datasets[1][0]
143 |     Xva = Xva.get_value(borrow=False)
144 |     print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape)))
145 | 
146 |     # get and set some basic dataset information
147 |     tr_samples = Xtr.shape[0]
148 |     data_dim = Xtr.shape[1]
149 |     batch_size = 2000
150 |     batch_count = int(np.ceil(tr_samples / float(batch_size)))
151 | 
152 |     # Symbolic inputs
153 |     Xd = T.matrix(name='Xd')
154 |     Xc = T.matrix(name='Xc')
155 |     Xm = T.matrix(name='Xm')
156 |     Xt = T.matrix(name='Xt')
157 |     Xp = T.matrix(name='Xp')
158 | 
159 |     # Load inferencer and generator from saved parameters
160 |     gn_fname = "MNIST_WALKOUT_TEST_BIN/pt_walk_params_b150000_GN.pkl"
161 |     in_fname = "MNIST_WALKOUT_TEST_BIN/pt_walk_params_b150000_IN.pkl"
162 |     IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
163 |     GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp)
164 |     IN.set_sigma_scale(1.25)
165 |     prior_dim = GN.latent_dim
166 | 
167 |     MCS = MCSampler(rng=rng, Xd=Xd, i_net=IN, g_net=GN, chain_len=9, \
168 |                     data_dim=data_dim, prior_dim=prior_dim)
169 | 
170 |     Xtr_chains = [Xtr]
171 |     for i in range(MCS.chain_len):
172 |         Xtr_chains.append(0.0*Xtr)
173 | 
174 |     print("Testing chain sampler....")
175 |     loop_times = []
176 |     # TESTING SAMPLING SPEED!
177 |     for i in range(batch_count):
178 |         start_time = time.clock()
179 |         batch_start = i * batch_size
180 |         batch_end = min(tr_samples, (batch_start + batch_size))
181 |         Xd_batch = Xtr[batch_start:batch_end]
182 |         Xd_chain = MCS.sample_from_chain(Xd_batch)
183 |         Xs = [Xd_batch[0:50]]
184 |         Xs.extend([xd[0:50] for xd in Xd_chain])
185 |         file_name = "MCS_TEST_{0:d}.png".format(i)
186 |         utils.visualize_samples(np.vstack(Xs), file_name, num_rows=10)
187 |         loop_times.append((time.clock() - start_time))
188 |     total_time = sum(loop_times)
189 |     mean_time = total_time / batch_count
190 |     time_std = sum([(t - mean_time)**2.0 for t in loop_times]) / batch_count
191 |     print("total_time: {0:.4f}".format(total_time))
192 |     print("mean_time: {0:.4f}, time_std: {1:.4f}".format(mean_time, time_std))
193 |     start_time = time.clock()
194 |     Xtr_chains = resample_chain_steps(MCS, Xtr_chains)
195 |     total_time = time.clock() - start_time
196 |     print("total_time: {0:.4f}".format(total_time))
197 | 
198 | 
199 | 
200 | 
201 | 
202 | ##############
203 | # EYE BUFFER #
204 | ##############
205 | 


--------------------------------------------------------------------------------
/generative_models/MnistWalkReg.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import utils as utils
  3 | import numpy as np
  4 | import numpy.random as npr
  5 | import theano
  6 | import theano.tensor as T
  7 | 
  8 | from load_data import load_udm, load_udm_ss, load_mnist
  9 | from PeaNet import PeaNet, load_peanet_from_file
 10 | from InfNet import InfNet, load_infnet_from_file
 11 | from GenNet import GenNet, load_gennet_from_file
 12 | from PeaNetSeq import PeaNetSeq
 13 | from GIPair import GIPair
 14 | from NetLayers import relu_actfun, softplus_actfun, \
 15 |                       safe_softmax, safe_log
 16 | import GenNet as GNet
 17 | import InfNet as INet
 18 | import PeaNet as PNet
 19 | from DKCode import PCA_theano
 20 | from MCSampler import MCSampler, resample_chain_steps
 21 | 
 22 | def downsample_chains(X_chain, stride=1):
 23 | 	Xs = [X_chain[i] for i in range(len(X_chain)) if ((i % stride) == 0)]
 24 | 	return Xs
 25 | 
 26 | 
 27 | def manifold_walk_regularization():
 28 | 
 29 |     for t_num in range(10):
 30 | 	    out_file = open("MWR_TEST_RESULTS_{0:d}.txt".format(t_num), 'wb')
 31 | 
 32 | 	    # Initialize a source of randomness
 33 | 	    rng = np.random.RandomState(t_num)
 34 | 
 35 | 	    # Load some data to train/validate/test with
 36 | 	    sup_count = 600
 37 | 	    dataset = 'data/mnist.pkl.gz'
 38 | 	    datasets = load_udm_ss(dataset, sup_count, rng, zero_mean=False)
 39 | 	    Xtr_su = datasets[0][0].get_value(borrow=False)
 40 | 	    Ytr_su = datasets[0][1].get_value(borrow=False).astype(np.int32)
 41 | 	    Xtr_un = datasets[1][0].get_value(borrow=False)
 42 | 	    Ytr_un = datasets[1][1].get_value(borrow=False).astype(np.int32)
 43 | 
 44 | 	    # get the joint labeled and unlabeled data
 45 | 	    Xtr_un = np.vstack([Xtr_su, Xtr_un]).astype(theano.config.floatX)
 46 | 	    Ytr_un = np.vstack([Ytr_su[:,np.newaxis], Ytr_un[:,np.newaxis]])
 47 | 	    Ytr_un = 0 * Ytr_un # KEEP CATS FIXED OR FREE? YES/NO?
 48 | 	    Xtr_mean = np.mean(Xtr_un, axis=0, keepdims=True)
 49 | 	    # get the labeled data
 50 | 	    Xtr_su = Xtr_su.astype(theano.config.floatX)
 51 | 	    Ytr_su = Ytr_su[:,np.newaxis]
 52 | 	    # get observations and labels for the validation set
 53 | 	    Xva = datasets[2][0].get_value(borrow=False).astype(theano.config.floatX)
 54 | 	    Yva = datasets[2][1].get_value(borrow=False).astype(np.int32)
 55 | 	    Yva = Yva[:,np.newaxis] # numpy is dumb
 56 | 	    # get observations and labels for the test set
 57 | 	    Xte = datasets[3][0].get_value(borrow=False).astype(theano.config.floatX)
 58 | 	    Yte = datasets[3][1].get_value(borrow=False).astype(np.int32)
 59 | 	    Yte = Yte[:,np.newaxis] # numpy is dumb
 60 | 	    # get size information for the data and training batches
 61 | 	    un_samples = Xtr_un.shape[0]
 62 | 	    su_samples = Xtr_su.shape[0]
 63 | 	    va_samples = Xva.shape[0]
 64 | 	    data_dim = Xtr_su.shape[1]
 65 | 	    label_dim = 10
 66 | 	    batch_size = 100
 67 | 
 68 | 	    # Symbolic inputs
 69 | 	    Xd = T.matrix(name='Xd')
 70 | 	    Xc = T.matrix(name='Xc')
 71 | 	    Xm = T.matrix(name='Xm')
 72 | 	    Xt = T.matrix(name='Xt')
 73 | 	    Xp = T.matrix(name='Xp')
 74 | 	    Yd = T.icol('Yd')
 75 | 
 76 | 	    # Load inferencer and generator from saved parameters
 77 | 	    gn_fname = "MNIST_WALKOUT_TEST_BIN/pt_walk_params_b150000_GN.pkl"
 78 | 	    in_fname = "MNIST_WALKOUT_TEST_BIN/pt_walk_params_b150000_IN.pkl"
 79 | 	    IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
 80 | 	    GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp)
 81 | 	    IN.set_sigma_scale(1.3)
 82 | 	    prior_dim = GN.latent_dim
 83 | 
 84 | 	    MCS = MCSampler(rng=rng, Xd=Xd, i_net=IN, g_net=GN, chain_len=2, \
 85 | 	                    data_dim=data_dim, prior_dim=prior_dim)
 86 | 	    full_chain_len = MCS.chain_len + 1
 87 | 
 88 | 	    # setup "chain" versions of the labeled/unlabeled/validate sets
 89 | 	    Xtr_su_chains = [Xtr_su.copy() for i in range(full_chain_len)]
 90 | 	    Xtr_un_chains = [Xtr_un.copy() for i in range(full_chain_len)]
 91 | 	    Ytr_su_chains = [Ytr_su for i in range(full_chain_len)]
 92 | 	    Ytr_un_chains = [Ytr_un for i in range(full_chain_len)]
 93 | 	    Xva_chains = [Xva for i in range(full_chain_len)]
 94 | 	    Yva_chains = [Yva for i in range(full_chain_len)]
 95 | 
 96 | 	    # downsample, to feed less into the PNS
 97 | 	    Xtr_su_short = downsample_chains(Xtr_su_chains, stride=1)
 98 | 	    Xtr_un_short = downsample_chains(Xtr_un_chains, stride=1)
 99 | 	    Ytr_su_short = downsample_chains(Ytr_su_chains, stride=1)
100 | 	    Ytr_un_short = downsample_chains(Ytr_un_chains, stride=1)
101 | 	    Xva_short = downsample_chains(Xva_chains, stride=1)
102 | 	    Yva_short = downsample_chains(Yva_chains, stride=1)
103 | 	    short_chain_len = len(Xtr_su_short)
104 | 	    print("REGULARIZATION CHAIN STEPS: {0:d}".format(short_chain_len))
105 | 
106 | 	    # choose some parameters for the categorical inferencer
107 | 	    pn_params = {}
108 | 	    pc0 = [data_dim, 800, 800, label_dim]
109 | 	    pn_params['proto_configs'] = [pc0]
110 | 	    # Set up some spawn networks
111 | 	    sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True}
112 | 	    pn_params['spawn_configs'] = [ sc0 ]
113 | 	    pn_params['spawn_weights'] = [ 1.0 ]
114 | 	    # Set remaining params
115 | 	    pn_params['activation'] = relu_actfun
116 | 	    pn_params['init_scale'] = 0.5
117 | 	    pn_params['lam_l2a'] = 1e-3
118 | 	    pn_params['vis_drop'] = 0.2
119 | 	    pn_params['hid_drop'] = 0.5
120 | 
121 | 	    # Initialize the base network for this PNSeq
122 | 	    PN = PeaNet(rng=rng, Xd=Xd, params=pn_params)
123 | 	    PN.init_biases(0.1)
124 | 
125 | 	    print("Initializing PNS...")
126 | 	    # Initialize the PeaNetSeq
127 | 	    PNS = PeaNetSeq(rng=rng, pea_net=PN, seq_len=short_chain_len, \
128 | 	    		seq_Xd=None, params=None)
129 | 
130 | 	    # set weighting parameters for the various costs...
131 | 	    PNS.set_lam_class(1.0)
132 | 	    PNS.set_lam_pea_su(0.0)
133 | 	    PNS.set_lam_pea_un(2.0)
134 | 	    PNS.set_lam_ent(0.0)
135 | 	    PNS.set_lam_l2w(1e-5)
136 | 
137 | 	    learn_rate = 0.05
138 | 	    PNS.set_pn_sgd_params(lr_pn=learn_rate, mom_1=0.9, mom_2=0.999)
139 | 	    for i in range(300000):
140 | 	        if i < 5000:
141 | 	            scale = float(i + 1) / 5000.0
142 | 	        if ((i+1 % 100000) == 0):
143 | 	            learn_rate = learn_rate * 0.5
144 | 	        if ((i % 250) == 0):
145 | 	        	Xtr_su_chains = resample_chain_steps(MCS, Xtr_su_chains)
146 | 	        	Xtr_un_chains = resample_chain_steps(MCS, Xtr_un_chains)
147 | 	        	Xtr_su_short = downsample_chains(Xtr_su_chains, stride=1)
148 | 	        	Xtr_un_short = downsample_chains(Xtr_un_chains, stride=1)
149 | 	        # get some data to train with
150 | 	        su_idx = npr.randint(low=0,high=su_samples,size=(batch_size,))
151 | 	        xsuc = [(x.take(su_idx, axis=0) - Xtr_mean) for x in Xtr_su_short]
152 | 	        ysuc = [y.take(su_idx, axis=0) for y in Ytr_su_short]
153 | 	        un_idx = npr.randint(low=0,high=un_samples,size=(batch_size,))
154 | 	        xunc = [(x.take(un_idx, axis=0) - Xtr_mean) for x in Xtr_un_short]
155 | 	        yunc = [y.take(un_idx, axis=0) for y in Ytr_un_short]
156 | 	        Xb_chains = [np.vstack((xsu, xun)) for (xsu, xun) in zip(xsuc, xunc)]
157 | 	        Yb_chains = [np.vstack((ysu, yun)) for (ysu, yun) in zip(ysuc, yunc)]
158 | 	        # set learning parameters for this update
159 | 	        PNS.set_pn_sgd_params(lr_pn=learn_rate, mom_1=0.9, mom_2=0.999)
160 | 	        # do a minibatch update of all PeaNet parameters
161 | 	        outputs = PNS.train_joint(*(Xb_chains + Yb_chains))
162 | 	        joint_cost = 1.0 * outputs[0]
163 | 	        class_cost = 1.0 * outputs[1]
164 | 	        pea_cost = 1.0 * outputs[2]
165 | 	        ent_cost = 1.0 * outputs[3]
166 | 	        other_reg_cost = 1.0 * outputs[4]
167 | 	        assert(not (np.isnan(joint_cost)))
168 | 	        if ((i % 500) == 0):
169 | 	            o_str = "batch: {0:d}, joint: {1:.4f}, class: {2:.4f}, pea: {3:.4f}, ent: {4:.4f}, other_reg: {5:.4f}".format( \
170 | 	                    i, joint_cost, class_cost, pea_cost, ent_cost, other_reg_cost)
171 | 	            print(o_str)
172 | 	            out_file.write(o_str+"\n")
173 | 	            out_file.flush()
174 | 	            # check classification error on training and validation set
175 | 	            train_err = PNS.classification_error(Xtr_su-Xtr_mean, Ytr_su)
176 | 	            va_err = PNS.classification_error(Xva-Xtr_mean, Yva)
177 | 	            o_str = "    tr_err: {0:.4f}, va_err: {1:.4f}".format(train_err, va_err)
178 | 	            print(o_str)
179 | 	            out_file.write(o_str+"\n")
180 | 	            out_file.flush()
181 | 	        if ((i % 1000) == 0):
182 | 	            # draw the main PeaNet's first-layer filters/weights
183 | 	            file_name = "MWR_PN_WEIGHTS.png".format(i)
184 | 	            utils.visualize_net_layer(PNS.PN.proto_nets[0][0], file_name)
185 | 	    print("TESTING COMPLETE!")
186 | 
187 | if __name__ == "__main__":
188 | 	manifold_walk_regularization()


--------------------------------------------------------------------------------
/generative_models/SVHNWalkReg.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import utils as utils
  3 | import numpy as np
  4 | import numpy.random as npr
  5 | import theano
  6 | import theano.tensor as T
  7 | 
  8 | from load_data import load_svhn, load_svhn_gray, load_svhn_all_gray_zca
  9 | from PeaNet import PeaNet, load_peanet_from_file
 10 | from InfNet import InfNet, load_infnet_from_file
 11 | from GenNet import GenNet, load_gennet_from_file
 12 | from PeaNetSeq import PeaNetSeq
 13 | from VCGLoop import VCGLoop
 14 | from GIPair import GIPair
 15 | from NetLayers import relu_actfun, softplus_actfun, \
 16 |                       safe_softmax, safe_log
 17 | import GenNet as GNet
 18 | import InfNet as INet
 19 | import PeaNet as PNet
 20 | from DKCode import PCA_theano
 21 | 
 22 | import sys, resource
 23 | resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
 24 | sys.setrecursionlimit(10**6)
 25 | 
 26 | # DERP
 27 | RESULT_PATH = "SVHN_WALKOUT_TEST_KLD/"
 28 | 
 29 | ####################
 30 | # HELPER FUNCTIONS #
 31 | ####################
 32 | 
 33 | def shift_and_scale_into_01(X):
 34 |     X = X - np.min(X, axis=1, keepdims=True)
 35 |     X = X / np.max(X, axis=1, keepdims=True)
 36 |     return X
 37 | 
 38 | def train_valid_split(X, valid_count=1000):
 39 |     """
 40 |     Split the observations in the rows of X into train/validate sets.
 41 |     """
 42 |     obs_count = X.shape[0]
 43 |     idx = np.arange(obs_count)
 44 |     npr.shuffle(idx)
 45 |     va_idx = idx[:valid_count]
 46 |     tr_idx = idx[valid_count:]
 47 |     Xtr = X.take(tr_idx, axis=0)
 48 |     Xva = X.take(va_idx, axis=0)
 49 |     return Xtr, Xva
 50 | 
 51 | ##########################################
 52 | ##########################################
 53 | ## TEST SEMISUPERVISED LEARNING ON SVHN ##
 54 | ##########################################
 55 | ##########################################
 56 | 
 57 | def test_semisupervised():
 58 |     import utils as utils
 59 |     from load_data import load_udm, load_udm_ss, load_mnist
 60 |     from NetLayers import relu_actfun
 61 | 
 62 |     # Initialize a source of randomness
 63 |     rng = np.random.RandomState(123)
 64 | 
 65 |     sup_count = 1000
 66 |     va_count = 10000
 67 |     # Load some data to train/validate/test with
 68 |     tr_file = 'data/svhn_train_gray.pkl'
 69 |     te_file = 'data/svhn_test_gray.pkl'
 70 |     ex_file = 'data/svhn_extra_gray.pkl'
 71 |     data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000)
 72 |     X_mean = np.mean(data['Xtr'], axis=0, keepdims=True)
 73 |     X_std = np.std(data['Xtr'], axis=0, keepdims=True)
 74 |     data['Xtr'] = (data['Xtr'] - X_mean) / X_std
 75 |     data['Xte'] = (data['Xte'] - X_mean) / X_std
 76 |     data['Xex'] = (data['Xex'] - X_mean) / X_std
 77 |     idx = np.arange(data['Xtr'].shape[0])
 78 |     npr.shuffle(idx)
 79 |     Xva = data['Xte'][:,:] #[idx[0:va_count],:]
 80 |     Yva = data['Yte'][:,:].astype(np.int32) # [idx[0:va_count],:].astype(np.int32)
 81 |     Xtr_su = data['Xtr'][idx[va_count:(va_count+sup_count)], :]
 82 |     Ytr_su = data['Ytr'][idx[va_count:(va_count+sup_count)], :].astype(np.int32)
 83 |     Xtr_un = np.vstack([data['Xtr'][idx[va_count:], :], data['Xex']])
 84 |     Ytr_un = np.zeros((Xtr_un.shape[0],1)).astype(np.int32)
 85 |     print("unique(Ytr_su): {0:s}".format(str(np.unique(Ytr_su))))
 86 |     print("unique(Ytr_un): {0:s}".format(str(np.unique(Ytr_un))))
 87 |     print("Xtr_su.shape: {0:s}, Ytr_su.shape: {1:s}".format(str(Xtr_su.shape), str(Ytr_su.shape)))
 88 |     print("Xva.shape: {0:s}, Yva.shape: {1:s}".format(str(Xva.shape), str(Yva.shape)))
 89 | 
 90 |     un_samples = Xtr_un.shape[0]
 91 |     su_samples = Xtr_su.shape[0]
 92 |     va_samples = Xva.shape[0]
 93 | 
 94 |     # set up some symbolic variables for input to the GITrip
 95 |     Xd = T.matrix('Xd_base')
 96 |     Yd = T.icol('Yd_base')
 97 |     # set some "shape" parameters for the networks
 98 |     data_dim = Xtr_un.shape[1]
 99 |     label_dim = 10
100 |     batch_size = 200 # we'll take 2x this per batch, for sup and unsup
101 | 
102 |     # choose some parameters for the categorical inferencer
103 |     pn_params = {}
104 |     pc0 = [data_dim, 800, 800, label_dim]
105 |     pn_params['proto_configs'] = [pc0]
106 |     # Set up some spawn networks
107 |     sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.2, 'do_dropout': True}
108 |     pn_params['spawn_configs'] = [ sc0 ]
109 |     pn_params['spawn_weights'] = [ 1.0 ]
110 |     # Set remaining params
111 |     pn_params['activation'] = relu_actfun
112 |     pn_params['init_scale'] = 0.5
113 |     pn_params['lam_l2a'] = 1e-3
114 |     pn_params['vis_drop'] = 0.2
115 |     pn_params['hid_drop'] = 0.5
116 | 
117 |     # Initialize the base network for this PNSeq
118 |     PN = PeaNet(rng=rng, Xd=Xd, params=pn_params)
119 |     PN.init_biases(0.1)
120 | 
121 |     # Initialize the PeaNetSeq
122 |     PNS = PeaNetSeq(rng=rng, pea_net=PN, seq_len=2, seq_Xd=None, params=None)
123 | 
124 |     # set weighting parameters for the various costs...
125 |     PNS.set_lam_class(1.0)
126 |     PNS.set_lam_pea_su(0.0)
127 |     PNS.set_lam_pea_un(1.0)
128 |     PNS.set_lam_ent(0.0)
129 |     PNS.set_lam_l2w(1e-5)
130 | 
131 |     out_file = open("SVHN_SS_TEST.txt", 'wb')
132 |     cost_1 = [0. for i in range(10)]
133 |     learn_rate = 0.02
134 |     PNS.set_pn_sgd_params(lr_pn=learn_rate, mom_1=0.9, mom_2=0.999)
135 |     for i in range(300000):
136 |         # get some data to train with
137 |         su_idx = npr.randint(low=0,high=su_samples,size=(batch_size,))
138 |         Xd_su = Xtr_su.take(su_idx, axis=0)
139 |         Yd_su = Ytr_su.take(su_idx, axis=0)
140 |         un_idx = npr.randint(low=0,high=un_samples,size=(batch_size,))
141 |         Xd_un = Xtr_un.take(un_idx, axis=0)
142 |         Yd_un = Ytr_un.take(un_idx, axis=0)
143 |         Xd_batch = np.vstack((Xd_su, Xd_un))
144 |         Yd_batch = np.vstack((Yd_su, Yd_un))
145 |         # set learning parameters for this update
146 |         PNS.set_pn_sgd_params(lr_pn=learn_rate, mom_1=0.9, mom_2=0.999)
147 |         # do a minibatch update of all PeaNet parameters
148 |         outputs = PNS.train_joint(Xd_batch, Xd_batch, Yd_batch, Yd_batch)
149 |         cost_1 = [(cost_1[k] + 1.*outputs[k]) for k in range(len(outputs))]
150 |         if ((i % 1000) == 0):
151 |             cost_1 = [(v / 1000.) for v in cost_1]
152 |             o_str = "batch: {0:d}, joint: {1:.4f}, class: {2:.4f}, pea: {3:.4f}, ent: {4:.4f}, other_reg: {5:.4f}".format( \
153 |                     i, cost_1[0], cost_1[1], cost_1[2], cost_1[3], cost_1[4])
154 |             print(o_str)
155 |             out_file.write(o_str+"\n")
156 |             out_file.flush()
157 |             cost_1 = [0. for v in cost_1]
158 |             # check classification error on training and validation set
159 |             train_err = PNS.classification_error(Xtr_su, Ytr_su)
160 |             va_err = PNS.classification_error(Xva, Yva)
161 |             o_str = "    tr_err: {0:.4f}, va_err: {1:.4f}".format(train_err, va_err)
162 |             print(o_str)
163 |             out_file.write(o_str+"\n")
164 |             out_file.flush()
165 |         if ((i % 1000) == 0):
166 |             # draw the main PeaNet's first-layer filters/weights
167 |             file_name = "SVHN_SS_PN_WEIGHTS.png".format(i)
168 |             utils.visualize_net_layer(PNS.PN.proto_nets[0][0], file_name)
169 |     print("TESTING COMPLETE!")
170 | 
171 | 
172 | 
173 | if __name__=="__main__":
174 |     test_semisupervised()


--------------------------------------------------------------------------------
/generative_models/TFDWalkoutTest.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import utils as utils
  3 | import numpy as np
  4 | import numpy.random as npr
  5 | import theano
  6 | import theano.tensor as T
  7 | 
  8 | from load_data import load_tfd
  9 | from PeaNet import PeaNet, load_peanet_from_file
 10 | from InfNet import InfNet, load_infnet_from_file
 11 | from HydraNet import HydraNet, load_hydranet_from_file
 12 | from VCGLoop import VCGLoop
 13 | from OneStageModel import OneStageModel
 14 | from NetLayers import relu_actfun, softplus_actfun, \
 15 |                       safe_softmax, row_shuffle
 16 | from HelperFuncs import sample_masks, sample_patch_masks, posterior_klds, \
 17 |                         collect_obs_costs
 18 | 
 19 | import sys, resource
 20 | resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
 21 | sys.setrecursionlimit(10**6)
 22 | 
 23 | # DERP
 24 | RESULT_PATH = "TFD_WALKOUT_TEST_KLD/"
 25 | #RESULT_PATH = "TFD_WALKOUT_TEST_VAE/"
 26 | #RESULT_PATH = "TFD_WALKOUT_TEST_MAX_KLD/"
 27 | PRIOR_DIM = 100
 28 | LOGVAR_BOUND = 6.0
 29 | 
 30 | ###########################################
 31 | ###########################################
 32 | ## VAE PRETRAINING FOR THE OneStageModel ##
 33 | ###########################################
 34 | ###########################################
 35 | 
 36 | def pretrain_osm(lam_kld=0.0):
 37 |     # Initialize a source of randomness
 38 |     rng = np.random.RandomState(1234)
 39 | 
 40 |     # Load some data to train/validate/test with
 41 |     data_file = 'data/tfd_data_48x48.pkl'
 42 |     dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all')
 43 |     Xtr_unlabeled = dataset[0]
 44 |     dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
 45 |     Xtr_train = dataset[0]
 46 |     Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
 47 |     dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
 48 |     Xva = dataset[0]
 49 |     tr_samples = Xtr.shape[0]
 50 |     va_samples = Xva.shape[0]
 51 |     batch_size = 200
 52 |     batch_reps = 1
 53 | 
 54 |     # setup some symbolic variables and stuff
 55 |     Xd = T.matrix('Xd_base')
 56 |     data_dim = Xtr.shape[1]
 57 |     Xtr_mean = np.mean(Xtr, axis=0)
 58 | 
 59 |     ##########################
 60 |     # NETWORK CONFIGURATIONS #
 61 |     ##########################
 62 |     gn_params = {}
 63 |     shared_config = [PRIOR_DIM, 1500, 1500]
 64 |     output_config = [data_dim, data_dim]
 65 |     gn_params['shared_config'] = shared_config
 66 |     gn_params['output_config'] = output_config
 67 |     gn_params['activation'] = relu_actfun
 68 |     gn_params['init_scale'] = 1.2
 69 |     gn_params['lam_l2a'] = 0.0
 70 |     gn_params['vis_drop'] = 0.0
 71 |     gn_params['hid_drop'] = 0.0
 72 |     gn_params['bias_noise'] = 0.0
 73 |     gn_params['input_noise'] = 0.0
 74 |     # choose some parameters for the continuous inferencer
 75 |     in_params = {}
 76 |     shared_config = [data_dim, 1500, 1500]
 77 |     top_config = [shared_config[-1], PRIOR_DIM]
 78 |     in_params['shared_config'] = shared_config
 79 |     in_params['mu_config'] = top_config
 80 |     in_params['sigma_config'] = top_config
 81 |     in_params['activation'] = relu_actfun
 82 |     in_params['init_scale'] = 1.2
 83 |     in_params['lam_l2a'] = 0.0
 84 |     in_params['vis_drop'] = 0.0
 85 |     in_params['hid_drop'] = 0.0
 86 |     in_params['bias_noise'] = 0.0
 87 |     in_params['input_noise'] = 0.0
 88 |     # Initialize the base networks for this OneStageModel
 89 |     IN = InfNet(rng=rng, Xd=Xd, \
 90 |             params=in_params, shared_param_dicts=None)
 91 |     GN = HydraNet(rng=rng, Xd=Xd, \
 92 |             params=gn_params, shared_param_dicts=None)
 93 |     # Initialize biases in IN and GN
 94 |     IN.init_biases(0.2)
 95 |     GN.init_biases(0.2)
 96 | 
 97 |     ######################################
 98 |     # LOAD AND RESTART FROM SAVED PARAMS #
 99 |     ######################################
100 |     # gn_fname = RESULT_PATH+"pt_osm_params_b110000_GN.pkl"
101 |     # in_fname = RESULT_PATH+"pt_osm_params_b110000_IN.pkl"
102 |     # IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, \
103 |     #         new_params=None)
104 |     # GN = load_hydranet_from_file(f_name=gn_fname, rng=rng, Xd=Xd, \
105 |     #         new_params=None)
106 |     # in_params = IN.params
107 |     # gn_params = GN.params
108 | 
109 |     #########################
110 |     # INITIALIZE THE GIPAIR #
111 |     #########################
112 |     osm_params = {}
113 |     osm_params['x_type'] = 'gaussian'
114 |     osm_params['xt_transform'] = 'sigmoid'
115 |     osm_params['logvar_bound'] = LOGVAR_BOUND
116 |     OSM = OneStageModel(rng=rng, x_in=Xd, \
117 |             p_x_given_z=GN, q_z_given_x=IN, \
118 |             x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params)
119 |     OSM.set_lam_l2w(1e-4)
120 | 
121 |     ######################
122 |     # BASIC VAE TRAINING #
123 |     ######################
124 |     out_file = open(RESULT_PATH+"pt_osm_results.txt", 'wb')
125 |     # Set initial learning rate and basic SGD hyper parameters
126 |     obs_costs = np.zeros((batch_size,))
127 |     costs = [0. for i in range(10)]
128 |     learn_rate = 0.0002
129 |     momentum = 0.8
130 |     for i in range(200000):
131 |         kld_scale = min(1.0, float(i) / 20000.0)
132 |         if ((i > 1) and ((i % 10000) == 0)):
133 |             learn_rate = learn_rate * 0.9
134 |         # do a minibatch update of the model, and compute some costs
135 |         tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
136 |         Xb = Xtr.take(tr_idx, axis=0)
137 |         # do a minibatch update of the model, and compute some costs
138 |         OSM.set_sgd_params(lr=learn_rate, mom_1=momentum, mom_2=0.98)
139 |         OSM.set_lam_nll(1.0)
140 |         OSM.set_lam_kld(lam_kld_1=(1.0 + (kld_scale * (lam_kld - 1.0))), \
141 |                         lam_kld_2=0.0)
142 |         result = OSM.train_joint(Xb, batch_reps)
143 |         costs = [(costs[j] + result[j]) for j in range(len(result))]
144 |         if ((i % 1000) == 0):
145 |             # record and then reset the cost trackers
146 |             costs = [(v / 1000.0) for v in costs]
147 |             str_1 = "-- batch {0:d} --".format(i)
148 |             str_2 = "    joint_cost: {0:.4f}".format(costs[0])
149 |             str_3 = "    nll_cost  : {0:.4f}".format(costs[1])
150 |             str_4 = "    kld_cost  : {0:.4f}".format(costs[2])
151 |             str_5 = "    reg_cost  : {0:.4f}".format(costs[3])
152 |             costs = [0.0 for v in costs]
153 |             # print out some diagnostic information
154 |             joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5])
155 |             print(joint_str)
156 |             out_file.write(joint_str+"\n")
157 |             out_file.flush()
158 |         if ((i % 2000) == 0):
159 |             Xva = row_shuffle(Xva)
160 |             model_samps = OSM.sample_from_prior(500)
161 |             file_name = RESULT_PATH+"pt_osm_samples_b{0:d}_XG.png".format(i)
162 |             utils.visualize_samples(model_samps, file_name, num_rows=20)
163 |             file_name = RESULT_PATH+"pt_osm_inf_weights_b{0:d}.png".format(i)
164 |             utils.visualize_samples(OSM.inf_weights.get_value(borrow=False).T, \
165 |                     file_name, num_rows=30)
166 |             file_name = RESULT_PATH+"pt_osm_gen_weights_b{0:d}.png".format(i)
167 |             utils.visualize_samples(OSM.gen_weights.get_value(borrow=False), \
168 |                     file_name, num_rows=30)
169 |             # compute information about free-energy on validation set
170 |             file_name = RESULT_PATH+"pt_osm_free_energy_b{0:d}.png".format(i)
171 |             fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20)
172 |             fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
173 |             fe_str = "    nll_bound : {0:.4f}".format(fe_mean)
174 |             print(fe_str)
175 |             out_file.write(fe_str+"\n")
176 |             utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
177 |                     x_label='Posterior KLd', y_label='Negative Log-likelihood')
178 |             # compute information about posterior KLds on validation set
179 |             file_name = RESULT_PATH+"pt_osm_post_klds_b{0:d}.png".format(i)
180 |             post_klds = OSM.compute_post_klds(Xva[0:2500])
181 |             post_dim_klds = np.mean(post_klds, axis=0)
182 |             utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \
183 |                     file_name)
184 |         if ((i % 5000) == 0):
185 |             IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_IN.pkl".format(i))
186 |             GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_GN.pkl".format(i))
187 |     IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_IN.pkl")
188 |     GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_GN.pkl")
189 |     return
190 | 
191 | if __name__=="__main__":
192 |     # FOR EXTREME KLD REGULARIZATION
193 | 	#pretrain_osm(lam_kld=50.0)
194 | 	#train_walk_from_pretrained_osm(lam_kld=60.0)
195 | 
196 |     # FOR KLD MODEL
197 |     pretrain_osm(lam_kld=15.0)
198 |     # train_walk_from_pretrained_osm(lam_kld=15.0)
199 | 
200 |     # FOR VAE MODEL
201 |     #pretrain_osm(lam_kld=1.0)
202 |     #train_walk_from_pretrained_osm(lam_kld=1.0)


--------------------------------------------------------------------------------
/generative_models/TestBlocksDDModels.py:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | # Code for testing the variational Multi-Stage Generative Model. #
  3 | ##################################################################
  4 | 
  5 | from __future__ import print_function, division
  6 | 
  7 | # basic python
  8 | import cPickle as pickle
  9 | from PIL import Image
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | from collections import OrderedDict
 13 | 
 14 | # theano business
 15 | import theano
 16 | import theano.tensor as T
 17 | 
 18 | # blocks stuff
 19 | from blocks.initialization import Constant, IsotropicGaussian, Orthogonal
 20 | from blocks.filter import VariableFilter
 21 | from blocks.graph import ComputationGraph
 22 | from blocks.roles import PARAMETER
 23 | from blocks.model import Model
 24 | from blocks.bricks import Tanh, Identity, Rectifier
 25 | from blocks.bricks.cost import BinaryCrossEntropy
 26 | from blocks.bricks.recurrent import SimpleRecurrent, LSTM
 27 | 
 28 | # phil's sweetness
 29 | import utils
 30 | from BlocksModels import *
 31 | from load_data import load_udm, load_udm_ss, load_mnist, load_binarized_mnist
 32 | from HelperFuncs import row_shuffle, to_fX
 33 | 
 34 | ###################################
 35 | ###################################
 36 | ## HELPER FUNCTIONS FOR SAMPLING ##
 37 | ###################################
 38 | ###################################
 39 | 
 40 | def scale_norm(arr):
 41 |     arr = arr - arr.min()
 42 |     scale = (arr.max() - arr.min())
 43 |     return scale * arr
 44 | 
 45 | def img_grid(arr, global_scale=True):
 46 |     N, height, width = arr.shape
 47 | 
 48 |     rows = int(np.sqrt(N))
 49 |     cols = int(np.sqrt(N))
 50 | 
 51 |     if rows*cols < N:
 52 |         cols = cols + 1
 53 | 
 54 |     if rows*cols < N:
 55 |         rows = rows + 1
 56 | 
 57 |     total_height = rows * height
 58 |     total_width  = cols * width
 59 | 
 60 |     if global_scale:
 61 |         arr = scale_norm(arr)
 62 | 
 63 |     I = np.zeros((total_height, total_width))
 64 | 
 65 |     for i in xrange(N):
 66 |         r = i // cols
 67 |         c = i % cols
 68 | 
 69 |         if global_scale:
 70 |             this = arr[i]
 71 |         else:
 72 |             this = scale_norm(arr[i])
 73 | 
 74 |         offset_y, offset_x = r*height, c*width
 75 |         I[offset_y:(offset_y+height), offset_x:(offset_x+width)] = this
 76 |     
 77 |     I = (255*I).astype(np.uint8)
 78 |     return Image.fromarray(I)
 79 | 
 80 | 
 81 | ########################################
 82 | ########################################
 83 | ## TEST WITH MODEL-BASED INITIAL STEP ##
 84 | ########################################
 85 | ########################################
 86 | 
 87 | def test_ddm_generation():
 88 |     ##########################
 89 |     # Get some training data #
 90 |     ##########################
 91 |     rng = np.random.RandomState(1234)
 92 |     Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
 93 |     Xtr = np.vstack((Xtr, Xva))
 94 |     Xva = Xte
 95 |     #del Xte
 96 |     tr_samples = Xtr.shape[0]
 97 |     va_samples = Xva.shape[0]
 98 |     batch_size = 250
 99 | 
100 |     ############################################################
101 |     # Setup some parameters for the Iterative Refinement Model #
102 |     ############################################################
103 |     x_dim = Xtr.shape[1]
104 |     enc_dim = 250
105 |     dec_dim = 250
106 |     mix_dim = 20
107 |     z_dim = 100
108 |     n_iter = 8
109 |     
110 |     rnninits = {
111 |         'weights_init': IsotropicGaussian(0.01),
112 |         'biases_init': Constant(0.),
113 |     }
114 |     inits = {
115 |         'weights_init': IsotropicGaussian(0.01),
116 |         'biases_init': Constant(0.),
117 |     }
118 | 
119 |     # setup the infinite mixture initialization model
120 |     mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \
121 |                           name="mix_enc_mlp", **inits)
122 |     mix_dec_mlp = MLP([Tanh(), Tanh()], \
123 |                       [mix_dim, 250, (2*enc_dim + 2*dec_dim)], \
124 |                       name="mix_dec_mlp", **inits)
125 |     # setup the components of the sequential generative model
126 |     enc_mlp_in = MLP([Identity()], [(x_dim + dec_dim + dec_dim), 4*enc_dim], \
127 |                      name="enc_mlp_in", **inits)
128 |     dec_mlp_in = MLP([Identity()], [z_dim, 4*dec_dim], \
129 |                      name="dec_mlp_in", **inits)
130 |     enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits)
131 |     dec_mlp_out = CondNet([], [dec_dim, z_dim], name="dec_mlp_out", **inits)
132 |     enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \
133 |                          name="enc_rnn", **rnninits)
134 |     dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \
135 |                          name="dec_rnn", **rnninits)
136 |     # set up the transform from latent space to observation space
137 |     s2x_mlp = TanhMLPwFFBP(dec_dim, [500], x_dim, name="s2x_mlp", **inits)
138 | 
139 |     draw = DriftDiffModel(
140 |                 n_iter,
141 |                 mix_enc_mlp=mix_enc_mlp,
142 |                 mix_dec_mlp=mix_dec_mlp,
143 |                 enc_mlp_in=enc_mlp_in,
144 |                 enc_mlp_out=enc_mlp_out,
145 |                 enc_rnn=enc_rnn,
146 |                 dec_mlp_in=dec_mlp_in,
147 |                 dec_mlp_out=dec_mlp_out,
148 |                 dec_rnn=dec_rnn,
149 |                 s2x_mlp=s2x_mlp)
150 |     draw.initialize()
151 | 
152 |     # build the cost gradients, training function, samplers, etc.
153 |     draw.build_model_funcs()
154 | 
155 |     #draw.load_model_params(f_name="TBDDM_GEN_PARAMS.pkl")
156 | 
157 |     ################################################################
158 |     # Apply some updates, to check that they aren't totally broken #
159 |     ################################################################
160 |     print("Beginning to train the model...")
161 |     out_file = open("TBDDM_GEN_RESULTS.txt", 'wb')
162 |     costs = [0. for i in range(10)]
163 |     learn_rate = 0.0002
164 |     momentum = 0.5
165 |     batch_idx = np.arange(batch_size) + tr_samples
166 |     for i in range(250000):
167 |         scale = min(1.0, ((i+1) / 1000.0))
168 |         if (((i + 1) % 10000) == 0):
169 |             learn_rate = learn_rate * 0.95
170 |         if (i > 10000):
171 |             momentum = 0.90
172 |         else:
173 |             momentum = 0.50
174 |         # get the indices of training samples for this batch update
175 |         batch_idx += batch_size
176 |         if (np.max(batch_idx) >= tr_samples):
177 |             # we finished an "epoch", so we rejumble the training set
178 |             Xtr = row_shuffle(Xtr)
179 |             batch_idx = np.arange(batch_size)
180 | 
181 |         # set sgd and objective function hyperparams for this update
182 |         zero_ary = np.zeros((1,))
183 |         draw.lr.set_value(to_fX(zero_ary + learn_rate))
184 |         draw.mom_1.set_value(to_fX(zero_ary + momentum))
185 |         draw.mom_2.set_value(to_fX(zero_ary + 0.99))
186 | 
187 |         # perform a minibatch update and record the cost for this batch
188 |         Xb = to_fX(Xtr.take(batch_idx, axis=0))
189 |         result = draw.train_joint(Xb, Xb)
190 |         costs = [(costs[j] + result[j]) for j in range(len(result))]
191 | 
192 |         # diagnostics
193 |         if ((i % 250) == 0):
194 |             costs = [(v / 250.0) for v in costs]
195 |             str1 = "-- batch {0:d} --".format(i)
196 |             str2 = "    total_cost: {0:.4f}".format(costs[0])
197 |             str3 = "    nll_bound : {0:.4f}".format(costs[1])
198 |             str4 = "    nll_term  : {0:.4f}".format(costs[2])
199 |             str5 = "    kld_q2p   : {0:.4f}".format(costs[3])
200 |             str6 = "    kld_p2q   : {0:.4f}".format(costs[4])
201 |             str7 = "    reg_term  : {0:.4f}".format(costs[5])
202 |             joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7])
203 |             print(joint_str)
204 |             out_file.write(joint_str+"\n")
205 |             out_file.flush()
206 |             costs = [0.0 for v in costs]
207 |         if ((i % 500) == 0):
208 |             draw.save_model_params("TBDDM_GEN_PARAMS.pkl")
209 |             # compute a small-sample estimate of NLL bound on validation set
210 |             Xva = row_shuffle(Xva)
211 |             Xb = to_fX(Xva[:5000])
212 |             va_costs = draw.compute_nll_bound(Xb, Xb)
213 |             str1 = "    va_nll_bound : {}".format(va_costs[1])
214 |             str2 = "    va_nll_term  : {}".format(va_costs[2])
215 |             str3 = "    va_kld_q2p   : {}".format(va_costs[3])
216 |             joint_str = "\n".join([str1, str2, str3])
217 |             print(joint_str)
218 |             out_file.write(joint_str+"\n")
219 |             out_file.flush()
220 |             # draw some independent samples from the model
221 |             samples = draw.do_sample(16*16)
222 |             n_iter, N, D = samples.shape
223 |             samples = samples.reshape( (n_iter, N, 28, 28) )
224 |             for j in xrange(n_iter):
225 |                 img = img_grid(samples[j,:,:,:])
226 |                 img.save("TBDDM-gen-samples-%03d.png" % (j,))
227 | 
228 | if __name__=="__main__":
229 |     test_ddm_generation()


--------------------------------------------------------------------------------
/generative_models/TestClassModel.py:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | # Code for testing the variational Multi-Stage Generative Model. #
  3 | ##################################################################
  4 | 
  5 | # basic python
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | 
  9 | # theano business
 10 | import theano
 11 | import theano.tensor as T
 12 | 
 13 | # phil's sweetness
 14 | from LogPDFs import log_prob_bernoulli, log_prob_gaussian2, gaussian_kld
 15 | from NetLayers import relu_actfun, softplus_actfun, tanh_actfun, \
 16 |                       apply_mask, binarize_data, row_shuffle, to_fX
 17 | from InfNet import InfNet
 18 | from ClassModel import ClassModel
 19 | from load_data import load_udm, load_udm_ss, load_mnist, load_binarized_mnist
 20 | from HelperFuncs import collect_obs_costs
 21 | import utils
 22 | 
 23 | ########################################
 24 | ########################################
 25 | ## TEST WITH MODEL-BASED INITIAL STEP ##
 26 | ########################################
 27 | ########################################
 28 | 
 29 | def test_with_model_init():
 30 |     ##########################
 31 |     # Get some training data #
 32 |     ##########################
 33 |     rng = np.random.RandomState(1234)
 34 |     dataset = 'data/mnist.pkl.gz'
 35 |     datasets = load_udm(dataset, as_shared=False, zero_mean=False)
 36 |     Xtr = to_fX(datasets[0][0])
 37 |     Xva = to_fX(datasets[1][0])
 38 |     Ytr = datasets[0][1]
 39 |     Yva = datasets[1][1]
 40 | 
 41 |     tr_samples = Xtr.shape[0]
 42 |     va_samples = Xva.shape[0]
 43 |     batch_size = 200
 44 | 
 45 |     BD = lambda ary: binarize_data(ary)
 46 | 
 47 |     #######################################
 48 |     # Setup some parameters for the model #
 49 |     #######################################
 50 |     obs_dim = Xtr.shape[1]
 51 |     z_dim = 64
 52 |     init_scale = 0.2
 53 | 
 54 |     # some InfNet instances to build the TwoStageModel from
 55 |     x_in = T.matrix('x_in')
 56 |     y_in = T.lvector('y_in')
 57 | 
 58 |     ###############
 59 |     # q_z_given_x #
 60 |     ###############
 61 |     print("Building q_z_given_x...")
 62 |     params = {}
 63 |     shared_config = [obs_dim, 1000, 1000]
 64 |     top_config = [shared_config[-1], z_dim]
 65 |     params['shared_config'] = shared_config
 66 |     params['mu_config'] = top_config
 67 |     params['sigma_config'] = top_config
 68 |     params['activation'] = relu_actfun
 69 |     params['init_scale'] = init_scale
 70 |     params['lam_l2a'] = 0.0
 71 |     params['vis_drop'] = 0.2
 72 |     params['hid_drop'] = 0.5
 73 |     params['bias_noise'] = 0.0
 74 |     params['input_noise'] = 0.0
 75 |     params['build_theano_funcs'] = False
 76 |     q_z_given_x = InfNet(rng=rng, Xd=x_in, \
 77 |             params=params, shared_param_dicts=None)
 78 |     q_z_given_x.init_biases(0.2)
 79 | 
 80 | 
 81 |     ###########################################################
 82 |     # Define parameters for the ClassModel, and initialize it #
 83 |     ###########################################################
 84 |     print("Building the ClassModel...")
 85 |     CM = ClassModel(rng=rng, \
 86 |             x_in=x_in, y_in=y_in, \
 87 |             q_z_given_x=q_z_given_x, \
 88 |             class_count=10, \
 89 |             z_dim=z_dim, \
 90 |             use_samples=False)
 91 |     CM.set_drop_rate(0.5)
 92 |     CM.set_lam_nll(lam_nll=1.0)
 93 |     CM.set_lam_kld(lam_kld_q2p=1.0, lam_kld_p2q=0.0)
 94 |     CM.set_lam_l2w(lam_l2w=1e-5)
 95 | 
 96 |     ################################################################
 97 |     # Apply some updates, to check that they aren't totally broken #
 98 |     ################################################################
 99 |     out_file = open("CM_RESULTS.txt", 'wb')
100 |     costs = [0. for i in range(10)]
101 |     learn_rate = 0.0002
102 |     momentum = 0.9
103 |     batch_idx = np.arange(batch_size) + tr_samples
104 |     for i in range(250000):
105 |         scale = min(1.0, ((i+1) / 1000.0))
106 |         if (((i + 1) % 10000) == 0):
107 |             learn_rate = learn_rate * 0.95
108 |         # get the indices of training samples for this batch update
109 |         batch_idx += batch_size
110 |         if (np.max(batch_idx) >= tr_samples):
111 |             # we finished an "epoch", so we rejumble the training set
112 |             Xtr, Ytr = row_shuffle(Xtr, Ytr)
113 |             batch_idx = np.arange(batch_size)
114 |         # set sgd and objective function hyperparams for this update
115 |         CM.set_sgd_params(lr_1=scale*learn_rate, lr_2=scale*learn_rate, \
116 |                           mom_1=scale*momentum, mom_2=0.99)
117 |         # perform a minibatch update and record the cost for this batch
118 |         Xi_tr = Xtr.take(batch_idx, axis=0)
119 |         Yi_tr = Ytr.take(batch_idx, axis=0)
120 |         result = CM.train_joint(Xi_tr, Yi_tr)
121 |         costs = [(costs[j] + result[j]) for j in range(len(result)-1)]
122 |         # output useful information about training progress
123 |         if ((i % 500) == 0):
124 |             costs = [(v / 500.0) for v in costs]
125 |             str1 = "-- batch {0:d} --".format(i)
126 |             str2 = "    joint_cost  : {0:.4f}".format(costs[0])
127 |             str3 = "    nll_cost    : {0:.4f}".format(costs[1])
128 |             str4 = "    kld_cost    : {0:.4f}".format(costs[2])
129 |             str5 = "    reg_cost    : {0:.4f}".format(costs[3])
130 |             joint_str = "\n".join([str1, str2, str3, str4, str5])
131 |             print(joint_str)
132 |             out_file.write(joint_str+"\n")
133 |             out_file.flush()
134 |             costs = [0.0 for v in costs]
135 |         if (((i % 2000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
136 |             #####################################################
137 |             # compute multi-sample estimates of the free-energy #
138 |             #####################################################
139 |             # training set...
140 |             fe_terms = CM.compute_fe_terms(Xtr[0:2500],Ytr[0:2500], 30)
141 |             fe_nll = np.mean(fe_terms[0])
142 |             fe_kld = np.mean(fe_terms[1])
143 |             fe_joint = fe_nll + fe_kld
144 |             joint_str = "    vfe-tr: {0:.4f}, nll: ({1:.4f}, {2:.4f}, {3:.4f}), kld: ({4:.4f}, {5:.4f}, {6:.4f})".format( \
145 |                     fe_joint, fe_nll, np.min(fe_terms[0]), np.max(fe_terms[0]), fe_kld, np.min(fe_terms[1]), np.max(fe_terms[1]))
146 |             print(joint_str)
147 |             out_file.write(joint_str+"\n")
148 |             out_file.flush()
149 |             # validation set...
150 |             Xva, Yva = row_shuffle(Xva, Yva)
151 |             fe_terms = CM.compute_fe_terms(Xva[0:2500], Yva[0:2500], 30)
152 |             fe_nll = np.mean(fe_terms[0])
153 |             fe_kld = np.mean(fe_terms[1])
154 |             fe_joint = fe_nll + fe_kld
155 |             joint_str = "    vfe-va: {0:.4f}, nll: ({1:.4f}, {2:.4f}, {3:.4f}), kld: ({4:.4f}, {5:.4f}, {6:.4f})".format( \
156 |                     fe_joint, fe_nll, np.min(fe_terms[0]), np.max(fe_terms[0]), fe_kld, np.min(fe_terms[1]), np.max(fe_terms[1]))
157 |             print(joint_str)
158 |             out_file.write(joint_str+"\n")
159 |             out_file.flush()
160 |             ##########################################################
161 |             # compute multi-sample estimates of classification error #
162 |             ##########################################################
163 |             # training set...
164 |             va_error, va_preds = CM.class_error(Xtr[:2500], Ytr[:2500], samples=30)
165 |             joint_str = "    tr-class-error: {0:.4f}".format(va_error)
166 |             print(joint_str)
167 |             out_file.write(joint_str+"\n")
168 |             out_file.flush()
169 |             # validation set...
170 |             va_error, va_preds = CM.class_error(Xva[:2500], Yva[:2500], samples=30)
171 |             joint_str = "    va-class-error: {0:.4f}".format(va_error)
172 |             print(joint_str)
173 |             out_file.write(joint_str+"\n")
174 |             out_file.flush()
175 | 
176 | if __name__=="__main__":
177 |     test_with_model_init()


--------------------------------------------------------------------------------
/generative_models/TestImpTM.py:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | # Code for testing the variational Multi-Stage Generative Model. #
  3 | ##################################################################
  4 | 
  5 | # basic python
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | import cPickle
  9 | 
 10 | # theano business
 11 | import theano
 12 | import theano.tensor as T
 13 | 
 14 | # phil's sweetness
 15 | import utils
 16 | from GPSImputer import TemplateMatchImputer
 17 | from load_data import load_udm, load_mnist, load_tfd, load_svhn_gray
 18 | from HelperFuncs import construct_masked_data, shift_and_scale_into_01, \
 19 |                         row_shuffle, to_fX
 20 | 
 21 | RESULT_PATH = "IMP_MNIST_TM/"
 22 | 
 23 | ###############################
 24 | ###############################
 25 | ## TEST GPS IMPUTER ON MNIST ##
 26 | ###############################
 27 | ###############################
 28 | 
 29 | def test_mnist_nll(occ_dim=15, drop_prob=0.0):
 30 |     #########################################
 31 |     # Format the result tag more thoroughly #
 32 |     #########################################
 33 |     dp_int = int(100.0 * drop_prob)
 34 |     result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)
 35 | 
 36 |     ##########################
 37 |     # Get some training data #
 38 |     ##########################
 39 |     rng = np.random.RandomState(1234)
 40 |     dataset = 'data/mnist.pkl.gz'
 41 |     datasets = load_udm(dataset, as_shared=False, zero_mean=False)
 42 |     Xtr = datasets[0][0]
 43 |     Xva = datasets[1][0]
 44 |     Xtr = to_fX(shift_and_scale_into_01(Xtr))
 45 |     Xva = to_fX(shift_and_scale_into_01(Xva))
 46 |     tr_samples = Xtr.shape[0]
 47 |     va_samples = Xva.shape[0]
 48 |     batch_size = 200
 49 |     batch_reps = 1
 50 |     all_pix_mean = np.mean(np.mean(Xtr, axis=1))
 51 |     data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1],)))
 52 | 
 53 |     TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')
 54 | 
 55 |     log_name = "{}_RESULTS.txt".format(result_tag)
 56 |     out_file = open(log_name, 'wb')
 57 | 
 58 |     Xva = row_shuffle(Xva)
 59 |     # record an estimate of performance on the test set
 60 |     xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \
 61 |                                        occ_dim=occ_dim, data_mean=data_mean)
 62 |     result = TM.best_match_nll(xo, xm)
 63 |     match_on_known = np.mean(result[0])
 64 |     match_on_unknown = np.mean(result[1])
 65 |     str0 = "Test 1:"
 66 |     str1 = "    match on known   : {}".format(match_on_known)
 67 |     str2 = "    match on unknown : {}".format(match_on_unknown)
 68 |     joint_str = "\n".join([str0, str1, str2])
 69 |     print(joint_str)
 70 |     out_file.write(joint_str+"\n")
 71 |     out_file.flush()
 72 |     out_file.close()
 73 |     return
 74 | 
 75 | def test_mnist_img(occ_dim=15, drop_prob=0.0):
 76 |     #########################################
 77 |     # Format the result tag more thoroughly #
 78 |     #########################################
 79 |     dp_int = int(100.0 * drop_prob)
 80 |     result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)
 81 | 
 82 |     ##########################
 83 |     # Get some training data #
 84 |     ##########################
 85 |     rng = np.random.RandomState(1234)
 86 |     dataset = 'data/mnist.pkl.gz'
 87 |     datasets = load_udm(dataset, as_shared=False, zero_mean=False)
 88 |     Xtr = datasets[0][0]
 89 |     Xva = datasets[1][0]
 90 |     Xtr = to_fX(shift_and_scale_into_01(Xtr))
 91 |     Xva = to_fX(shift_and_scale_into_01(Xva))
 92 |     tr_samples = Xtr.shape[0]
 93 |     va_samples = Xva.shape[0]
 94 |     batch_size = 200
 95 |     batch_reps = 1
 96 |     all_pix_mean = np.mean(np.mean(Xtr, axis=1))
 97 |     data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1],)))
 98 | 
 99 |     TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')
100 | 
101 |     Xva = row_shuffle(Xva)
102 |     # record an estimate of performance on the test set
103 |     xi, xo, xm = construct_masked_data(Xva[:500], drop_prob=drop_prob, \
104 |                                        occ_dim=occ_dim, data_mean=data_mean)
105 |     img_match_on_known, img_match_on_unknown = TM.best_match_img(xo, xm)
106 | 
107 |     display_count = 100
108 |     # visualize matches on known elements
109 |     Xs = np.zeros((2*display_count, Xva.shape[1]))
110 |     for idx in range(display_count):
111 |         Xs[2*idx] = xi[idx]
112 |         Xs[(2*idx)+1] = img_match_on_known[idx]
113 |     file_name = "{0:s}_SAMPLES_MOK.png".format(result_tag)
114 |     utils.visualize_samples(Xs, file_name, num_rows=20)
115 |     # visualize matches on unknown elements
116 |     Xs = np.zeros((2*display_count, Xva.shape[1]))
117 |     for idx in range(display_count):
118 |         Xs[2*idx] = xi[idx]
119 |         Xs[(2*idx)+1] = img_match_on_unknown[idx]
120 |     file_name = "{0:s}_SAMPLES_MOU.png".format(result_tag)
121 |     utils.visualize_samples(Xs, file_name, num_rows=20)
122 |     return
123 | 
124 | 
125 | if __name__=="__main__":
126 |     #########
127 |     # MNIST #
128 |     #########
129 |     # test_mnist_nll(occ_dim=0, drop_prob=0.6)
130 |     # test_mnist_nll(occ_dim=0, drop_prob=0.7)
131 |     # test_mnist_nll(occ_dim=0, drop_prob=0.8)
132 |     # test_mnist_nll(occ_dim=0, drop_prob=0.9)
133 |     # test_mnist_nll(occ_dim=14, drop_prob=0.0)
134 |     # test_mnist_nll(occ_dim=16, drop_prob=0.0)
135 |     test_mnist_img(occ_dim=0, drop_prob=0.6)
136 |     test_mnist_img(occ_dim=0, drop_prob=0.7)
137 |     test_mnist_img(occ_dim=0, drop_prob=0.8)
138 |     test_mnist_img(occ_dim=0, drop_prob=0.9)
139 |     test_mnist_img(occ_dim=14, drop_prob=0.0)
140 |     test_mnist_img(occ_dim=16, drop_prob=0.0)
141 | 


--------------------------------------------------------------------------------
/generative_models/blocks_models/binarized_mnist_converter.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import fuel
 4 | import h5py
 5 | import numpy
 6 | 
 7 | default_directory = os.path.join(fuel.config.data_path, 'binarized_mnist')
 8 | default_save_path = os.path.join(default_directory, 'binarized_mnist.hdf5')
 9 | 
10 | 
11 | def binarized_mnist(directory=None, save_path=None):
12 |     """Converts the binarized MNIST dataset to HDF5.
13 |     Converts the binarized MNIST dataset used in R. Salakhutdinov's DBN
14 |     paper [DBN] to an HDF5 dataset compatible with
15 |     :class:`fuel.datasets.BinarizedMNIST`.
16 |     This method assumes the existence of the files
17 |     `binarized_mnist_{train,valid,test}.amat`, which are accessible
18 |     through Hugo Larochelle's website [HUGO].
19 |     .. [DBN] Ruslan Salakhutdinov and Iain Murray, *On the Quantitative
20 |        Analysis of Deep Belief Networks*, Proceedings of the 25th
21 |        international conference on Machine learning, 2008, pp. 872-879.
22 |     .. [HUGO] http://www.cs.toronto.edu/~larocheh/public/datasets/
23 |        binarized_mnist/binarized_mnist_{train,valid,test}.amat
24 |     Parameters
25 |     ----------
26 |     directory : str, optional
27 |         Base directory in which the required input files reside. Defaults
28 |         to `None`, in which case `'$FUEL_DATA_PATH/binarized_mnist'` is
29 |         used.
30 |     save_path : str, optional
31 |         Where to save the converted dataset. Defaults to `None`, in which
32 |         case `'$FUEL_DATA_PATH/binarized_mnist/binarized_mnist.hdf5'` is
33 |         used.
34 |     """
35 |     if directory is None:
36 |         directory = default_directory
37 |     if save_path is None:
38 |         save_path = default_save_path
39 | 
40 |     train_set = numpy.loadtxt(
41 |         os.path.join(directory, 'binarized_mnist_train.amat'))
42 |     valid_set = numpy.loadtxt(
43 |         os.path.join(directory, 'binarized_mnist_valid.amat'))
44 |     test_set = numpy.loadtxt(
45 |         os.path.join(directory, 'binarized_mnist_test.amat'))
46 | 
47 |     f = h5py.File(save_path, mode="w")
48 | 
49 |     features = f.create_dataset('features', (70000, 1, 28, 28), dtype='uint8')
50 |     features[...] = numpy.vstack([train_set.reshape((-1, 1, 28, 28)),
51 |                                   valid_set.reshape((-1, 1, 28, 28)),
52 |                                   test_set.reshape((-1, 1, 28, 28))])
53 |     f.attrs['train'] = [0, 50000]
54 |     f.attrs['valid'] = [50000, 60000]
55 |     f.attrs['test'] = [60000, 70000]
56 | 
57 |     f.flush()
58 |     f.close()


--------------------------------------------------------------------------------
/generative_models/blocks_models/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Philip-Bachman/NN-Python/e9a7619806c5ccbe2bd648b2a2e0af7967dc6996/generative_models/blocks_models/lib/__init__.py


--------------------------------------------------------------------------------
/generative_models/blocks_models/lib/myutils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from __future__ import division
 3 | 
 4 | from abc import ABCMeta, abstractmethod
 5 | 
 6 | import ipdb
 7 | import numpy
 8 | import six
 9 | import theano
10 | 
11 | from collections import OrderedDict
12 | 
13 | from theano import tensor
14 | from blocks.initialization import NdarrayInitialization, Uniform
15 | 
16 | 
17 | def merge_gradients(*gradient_list):
18 |     """Take and merge multiple ordered dicts 
19 |     """
20 |     merged = OrderedDict()
21 |     for gradients in gradient_list:
22 |         assert isinstance(gradients, (dict, OrderedDict))
23 |         for key, val in gradients.items():
24 |             if merged.has_key(key):
25 |                 merged[key] = merged[key] + val
26 |             else:       
27 |                 merged[key] = val
28 |     return merged
29 | 
30 | #-----------------------------------------------------------------------------
31 | 
32 | 
33 | class ShapeDependentInitialization(NdarrayInitialization):
34 |     """Initialize 
35 | 
36 |     Parameters
37 |     ----------
38 |     weights_init : :class:`NdarrayInitialization` instance
39 |         The unscaled initialization scheme to initialize the weights with.
40 |     """
41 |     def __init__(self, weights_init):
42 |         super(ShapeDependentInitialization, self).__init__()
43 |         self.weights_init = weights_init
44 | 
45 |     def generate(self, rng, shape):
46 |         weights = self.weights_init.generate(rng, shape)
47 |         scale = self.scale_func(*shape)
48 |         return scale*weights
49 | 
50 |     # TODO: Abstract
51 |     def scale_func(self, *shape):
52 |         pass
53 | 
54 | 
55 | class TanhInitialization(ShapeDependentInitialization):
56 |     """Normalized initialization for tanh MLPs. 
57 | 
58 |     This class initializes parameters by drawing from the uniform 
59 |     distribution   with the interval 
60 | 
61 |         [- sqrt(6)/sqrt(dim_in+dim_out)  .. sqrt(6)/sqrt(dim_in+dim_out)]
62 |     """
63 |     def __init__(self):
64 |         super(TanhInitialization, self).__init__(Uniform(mean=0., width=2.))
65 | 
66 |     def scale_func(self, dim_in, dim_out):
67 |         return numpy.sqrt(6)/numpy.sqrt(dim_in+dim_out)
68 | 


--------------------------------------------------------------------------------
/generative_models/blocks_models/plot-log.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | 
 3 | from __future__ import division, print_function
 4 | 
 5 | import logging
 6 | import argparse
 7 | import numpy as np
 8 | import pylab
 9 | import matplotlib as mpl
10 | import matplotlib.pyplot as plt
11 | import cPickle as pickle
12 | 
13 | from mpl_toolkits.mplot3d import Axes3D
14 | 
15 | from blocks.main_loop import MainLoop
16 | from blocks.log import TrainingLog
17 | 
18 | FORMAT = '[%(asctime)s] %(name)-15s %(message)s'
19 | DATEFMT = "%H:%M:%S"
20 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     from argparse import ArgumentParser
25 | 
26 |     parser = ArgumentParser()
27 |     parser.add_argument("model_file", help="filename the log to plot from")
28 |     args = parser.parse_args()
29 | 
30 |     logging.info("Loading file %s..." % args.model_file)
31 |     with open(args.model_file, "rb") as f:
32 |         p = pickle.load(f)
33 | 
34 |     if isinstance(p, MainLoop):
35 |         print("GOOD LUCK, BUT PLEASE USE A LOG!")
36 |         assert(False)
37 |     elif isinstance(p, TrainingLog):
38 |         log = p
39 | 
40 |     plot_tag = args.model_file[0:-8]
41 |     df = log.to_dataframe()
42 |     df_keys = df.keys()
43 | 
44 |     ################################
45 |     # PLOT VARIATIONAL FREE-ENERGY #
46 |     ################################
47 |     nll_bound_types = [k for k in df_keys if (k.find('nll_bound') > -1)]
48 |     nll_bound_idx = df[nll_bound_types[0]].keys()[1:-5]
49 |     #nll_bound_idx = [i for i in nll_bound_idx if i < 40000]
50 |     nll_bound_map = {}
51 |     for k in nll_bound_types:
52 |         idx = np.asarray(nll_bound_idx)
53 |         vals = np.asarray(df[k][nll_bound_idx])
54 |         nll_bound_map[k] = [idx, vals]
55 | 
56 |     nll_plot_name = "NLL_BOUNDS_{}.png".format(plot_tag)
57 |     fig = plt.figure()
58 |     ax = fig.add_subplot(111)
59 |     ax.hold(True)
60 |     min_map = {}
61 |     for k, v in nll_bound_map.items():
62 |         x, y = v
63 |         y_min = np.min(y)
64 |         ax.plot(x, y, label=k)
65 |         ax.plot(x, ((0.0*y) + y_min), label="min({0:s})={1:.4f}".format(k,y_min))
66 |     ax.legend()
67 |     fig.savefig(nll_plot_name, dpi=None, facecolor='w', edgecolor='w', \
68 |         orientation='portrait', papertype=None, format=None, \
69 |         transparent=False, bbox_inches=None, pad_inches=0.1, \
70 |         frameon=None)
71 |     plt.close(fig)
72 |     #####################
73 |     # PLOT PER-STEP KLD #
74 |     #####################
75 |     valid_kl_keys = [k for k in df_keys if (k.find('valid_kl_') > -1)]
76 |     valid_kl_idx = df[valid_kl_keys[0]].keys()[1:-5]
77 |     valid_kl_map = {}
78 |     for k in valid_kl_keys:
79 |         idx = np.asarray(valid_kl_idx)
80 |         vals = np.asarray(df[k][valid_kl_idx])
81 |         valid_kl_map[k] = [idx, vals]
82 | 
83 |     kl_plot_name = "KL_TERMS_{}.png".format(plot_tag)
84 |     fig = plt.figure()
85 |     ax = fig.add_subplot(111)
86 |     ax.hold(True)
87 |     for k, v in valid_kl_map.items():
88 |         x, y = v
89 |         ax.plot(x, y, label=k)
90 |     ax.legend()
91 |     fig.savefig(kl_plot_name, dpi=None, facecolor='w', edgecolor='w', \
92 |         orientation='portrait', papertype=None, format=None, \
93 |         transparent=False, bbox_inches=None, pad_inches=0.1, \
94 |         frameon=None)
95 |     plt.close(fig)
96 | 


--------------------------------------------------------------------------------
/generative_models/blocks_models/run-att-rw.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import division, print_function
  4 | 
  5 | import logging
  6 | 
  7 | FORMAT = '[%(asctime)s] %(name)-15s %(message)s'
  8 | DATEFMT = "%H:%M:%S"
  9 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO)
 10 | 
 11 | import theano
 12 | import theano.tensor as T
 13 | import ipdb
 14 | import fuel
 15 | 
 16 | from argparse import ArgumentParser
 17 | from collections import OrderedDict
 18 | from theano import tensor
 19 | 
 20 | from fuel.streams import DataStream, ForceFloatX
 21 | from fuel.schemes import SequentialScheme
 22 | from fuel.datasets.binarized_mnist import BinarizedMNIST
 23 | 
 24 | from blocks.algorithms import GradientDescent, CompositeRule, StepClipping, RMSProp, Adam, RemoveNotFinite
 25 | from blocks.initialization import Constant, IsotropicGaussian, Orthogonal 
 26 | from blocks.filter import VariableFilter
 27 | from blocks.graph import ComputationGraph
 28 | from blocks.roles import WEIGHTS, BIASES, PARAMETER
 29 | from blocks.model import Model
 30 | from blocks.monitoring import aggregation
 31 | from blocks.extensions import FinishAfter, Timing, Printing, ProgressBar
 32 | from blocks.extensions.plot import Plot
 33 | from blocks.extensions.saveload import SerializeMainLoop
 34 | from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
 35 | from blocks.main_loop import MainLoop
 36 | 
 37 | from blocks.bricks import Tanh, MLP
 38 | from blocks.bricks.cost import BinaryCrossEntropy
 39 | from blocks.bricks.recurrent import SimpleRecurrent, LSTM
 40 | 
 41 | from models import *
 42 | from attention import ZoomableAttentionWindow
 43 | 
 44 | fuel.config.floatX = theano.config.floatX
 45 | 
 46 | 
 47 | #----------------------------------------------------------------------------
 48 | def main(name, epochs, batch_size, learning_rate):
 49 |     if name is None:
 50 |         name = "att-rw" 
 51 | 
 52 |     print("\nRunning experiment %s" % name)
 53 |     print("         learning rate: %5.3f" % learning_rate) 
 54 |     print()
 55 | 
 56 | 
 57 |     #------------------------------------------------------------------------
 58 | 
 59 |     img_height, img_width = 28, 28
 60 |     
 61 |     read_N = 12
 62 |     write_N = 14
 63 | 
 64 |     inits = {
 65 |         #'weights_init': Orthogonal(),
 66 |         'weights_init': IsotropicGaussian(0.001),
 67 |         'biases_init': Constant(0.),
 68 |     }
 69 |     
 70 |     x_dim = img_height * img_width
 71 | 
 72 |     reader = ZoomableAttentionWindow(img_height, img_width,  read_N)
 73 |     writer = ZoomableAttentionWindow(img_height, img_width, write_N)
 74 | 
 75 |     # Parameterize the attention reader and writer
 76 |     mlpr = MLP(activations=[Tanh(), Identity()], 
 77 |                 dims=[x_dim, 50, 5], 
 78 |                 name="RMLP",
 79 |                 **inits)
 80 |     mlpw = MLP(activations=[Tanh(), Identity()],
 81 |                 dims=[x_dim, 50, 5],
 82 |                 name="WMLP",
 83 |                 **inits)
 84 | 
 85 |     # MLP between the reader and writer
 86 |     mlp = MLP(activations=[Tanh(), Identity()],
 87 |                 dims=[read_N**2, 300, write_N**2],
 88 |                 name="MLP",
 89 |                 **inits)
 90 | 
 91 |     for brick in [mlpr, mlpw, mlp]:
 92 |         brick.allocate()
 93 |         brick.initialize()
 94 | 
 95 |     #------------------------------------------------------------------------
 96 |     x = tensor.matrix('features')
 97 | 
 98 |     hr = mlpr.apply(x)
 99 |     hw = mlpw.apply(x)
100 | 
101 |     center_y, center_x, delta, sigma, gamma = reader.nn2att(hr)
102 |     r = reader.read(x, center_y, center_x, delta, sigma)
103 | 
104 |     h = mlp.apply(r)
105 | 
106 |     center_y, center_x, delta, sigma, gamma = writer.nn2att(hw)
107 |     c = writer.write(h, center_y, center_x, delta, sigma) / gamma
108 |     x_recons = T.nnet.sigmoid(c)
109 | 
110 |     cost = BinaryCrossEntropy().apply(x, x_recons)
111 |     cost.name = "cost"
112 | 
113 |     #------------------------------------------------------------
114 |     cg = ComputationGraph([cost])
115 |     params = VariableFilter(roles=[PARAMETER])(cg.variables)
116 | 
117 |     algorithm = GradientDescent(
118 |         cost=cost, 
119 |         params=params,
120 |         step_rule=CompositeRule([
121 |             RemoveNotFinite(),
122 |             Adam(learning_rate),
123 |             StepClipping(3.), 
124 |         ])
125 |         #step_rule=RMSProp(learning_rate),
126 |         #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95)
127 |     )
128 | 
129 | 
130 |     #------------------------------------------------------------------------
131 |     # Setup monitors
132 |     monitors = [cost]
133 |     #for v in [center_y, center_x, log_delta, log_sigma, log_gamma]:
134 |     #    v_mean = v.mean()
135 |     #    v_mean.name = v.name
136 |     #    monitors += [v_mean]
137 |     #    monitors += [aggregation.mean(v)]
138 | 
139 |     train_monitors = monitors[:]
140 |     train_monitors += [aggregation.mean(algorithm.total_gradient_norm)]
141 |     train_monitors += [aggregation.mean(algorithm.total_step_norm)]
142 | 
143 |     # Live plotting...
144 |     plot_channels = [
145 |         ["cost"],
146 |     ]
147 | 
148 |     #------------------------------------------------------------
149 | 
150 |     mnist_train = BinarizedMNIST("train", sources=['features'])
151 |     mnist_test = BinarizedMNIST("test", sources=['features'])
152 |     #mnist_train = MNIST("train", binary=True, sources=['features'])
153 |     #mnist_test = MNIST("test", binary=True, sources=['features'])
154 | 
155 |     main_loop = MainLoop(
156 |         model=Model(cost),
157 |         data_stream=ForceFloatX(DataStream(mnist_train,
158 |                         iteration_scheme=SequentialScheme(
159 |                         mnist_train.num_examples, batch_size))),
160 |         algorithm=algorithm,
161 |         extensions=[
162 |             Timing(),
163 |             FinishAfter(after_n_epochs=epochs),
164 |             DataStreamMonitoring(
165 |                 monitors,
166 |                 ForceFloatX(DataStream(mnist_test,
167 |                     iteration_scheme=SequentialScheme(
168 |                     mnist_test.num_examples, batch_size))),
169 |                 prefix="test"),
170 |             TrainingDataMonitoring(
171 |                 train_monitors, 
172 |                 prefix="train",
173 |                 after_every_epoch=True),
174 |             SerializeMainLoop(name+".pkl"),
175 |             #Plot(name, channels=plot_channels),
176 |             ProgressBar(),
177 |             Printing()])
178 |     main_loop.run()
179 | 
180 | #-----------------------------------------------------------------------------
181 | 
182 | if __name__ == "__main__":
183 |     parser = ArgumentParser()
184 |     parser.add_argument("--name", type=str, dest="name",
185 |                 default=None, help="Name for this experiment")
186 |     parser.add_argument("--epochs", type=int, dest="epochs",
187 |                 default=25, help="Number of training epochs to do")
188 |     parser.add_argument("--bs", "--batch-size", type=int, dest="batch_size",
189 |                 default=100, help="Size of each mini-batch")
190 |     parser.add_argument("--lr", "--learning-rate", type=float, dest="learning_rate",
191 |                 default=1e-3, help="Learning rate")
192 |     args = parser.parse_args()
193 | 
194 |     main(**vars(args))
195 | 
196 | 


--------------------------------------------------------------------------------
/generative_models/blocks_models/sample.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python 
  2 | 
  3 | from __future__ import print_function, division
  4 | 
  5 | import logging
  6 | import theano
  7 | import theano.tensor as T
  8 | import cPickle as pickle
  9 | 
 10 | import numpy as np
 11 | 
 12 | 
 13 | from PIL import Image
 14 | from blocks.main_loop import MainLoop
 15 | from blocks.model import AbstractModel
 16 | from blocks import config
 17 | 
 18 | FORMAT = '[%(asctime)s] %(name)-15s %(message)s'
 19 | DATEFMT = "%H:%M:%S"
 20 | logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO)
 21 | 
 22 | def scale_norm(arr):
 23 |     arr = arr - arr.min()
 24 |     scale = (arr.max() - arr.min())
 25 |     return scale * arr
 26 | 
 27 | def img_grid(arr, global_scale=True):
 28 |     N, height, width = arr.shape
 29 | 
 30 |     rows = int(np.sqrt(N))
 31 |     cols = int(np.sqrt(N))
 32 | 
 33 |     if rows*cols < N:
 34 |         cols = cols + 1
 35 | 
 36 |     if rows*cols < N:
 37 |         rows = rows + 1
 38 | 
 39 |     total_height = rows * height
 40 |     total_width  = cols * width
 41 | 
 42 |     if global_scale:
 43 |         arr = scale_norm(arr)
 44 | 
 45 |     I = np.zeros((total_height, total_width))
 46 | 
 47 |     for i in xrange(N):
 48 |         r = i // cols
 49 |         c = i % cols
 50 | 
 51 |         if global_scale:
 52 |             this = arr[i]
 53 |         else:
 54 |             this = scale_norm(arr[i])
 55 | 
 56 |         offset_y, offset_x = r*height, c*width
 57 |         I[offset_y:(offset_y+height), offset_x:(offset_x+width)] = this
 58 |     
 59 |     I = (255*I).astype(np.uint8)
 60 |     return Image.fromarray(I)
 61 | 
 62 | 
 63 | if __name__ == "__main__":
 64 |     from argparse import ArgumentParser
 65 | 
 66 |     parser = ArgumentParser()
 67 |     parser.add_argument("model_file", help="filename of a pickled DRAW model")
 68 |     parser.add_argument("--size", type=int,
 69 |                 default=28, help="Output image size (width and height)")
 70 |     args = parser.parse_args()
 71 | 
 72 |     logging.info("Loading file %s..." % args.model_file)
 73 |     with open(args.model_file, "rb") as f:
 74 |         p = pickle.load(f)
 75 | 
 76 |     if isinstance(p, MainLoop):
 77 |         model = p.model
 78 |     elif isinstance(p, AbstractModel):
 79 |         model = p
 80 |     else: 
 81 |         print("Don't know how to handle unpickled %s" % type(p))
 82 |         exit(1)
 83 | 
 84 |     draw = model.get_top_bricks()[0]
 85 |     # reset the random generator
 86 |     del draw._theano_rng
 87 |     del draw._theano_seed
 88 |     draw.seed_rng = np.random.RandomState(config.default_seed)
 89 | 
 90 |     #------------------------------------------------------------
 91 |     logging.info("Compiling sample function...")
 92 | 
 93 |     n_samples = T.iscalar("n_samples")
 94 |     samples = draw.sample(n_samples)
 95 | 
 96 |     do_sample = theano.function([n_samples], outputs=samples, allow_input_downcast=True)
 97 | 
 98 |     #------------------------------------------------------------
 99 |     logging.info("Sampling and saving images...")
100 |     
101 |     samples = do_sample(16*16)
102 |     #samples = np.random.normal(size=(16, 100, 28*28))
103 | 
104 |     if (len(samples.shape) == 2):
105 |         # there was only one iter of sampling, so fake more iters
106 |         temp = np.zeros((3, samples.shape[0], samples.shape[1]))
107 |         temp[0,:,:] = samples.copy()
108 |         temp[1,:,:] = samples.copy()
109 |         samples = temp
110 | 
111 |     n_iter, N, D = samples.shape
112 | 
113 |     samples = samples.reshape( (n_iter, N, args.size, args.size) )
114 |     
115 |     for i in xrange(n_iter):
116 |         img = img_grid(samples[i,:,:,:])
117 |         img.save("samples-%03d.png" % i)
118 |         
119 |     #with open("centers.pkl", "wb") as f:
120 |     #    pikle.dump(f, (center_y, center_x, delta))
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/generative_models/blocks_models/simple_script.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | echo "RUNNING SCRIPT"
3 | tar -czf imod_bikld_results.tar *.pkl
4 | aws s3 cp imod_bikld_results.tar s3://nipsmodels/imod_bikld_results.tar
5 | echo "FINISHED SCRIPT"
6 | date '+%A %W %Y %X'
7 | 


--------------------------------------------------------------------------------
/generative_models/output_losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import theano
  3 | import theano.tensor as T
  4 | 
  5 | class LogisticRegression(object):
  6 |     """Multi-class Logistic Regression loss dangler."""
  7 | 
  8 |     def __init__(self, linear_layer):
  9 |         """Dangle a logistic regression from the given linear layer.
 10 | 
 11 |         The given linear layer should be a HiddenLayer (or subclass) object,
 12 |         for HiddenLayer as defined in LayerNet.py."""
 13 |         self.input_layer = linear_layer
 14 | 
 15 |     def loss_func(self, y):
 16 |         """Return the multiclass logistic regression loss for y.
 17 | 
 18 |         The class labels in y are assumed to be in correspondence with the
 19 |         set of column indices for self.input_layer.linear_output.
 20 |         """
 21 |         p_y_given_x = T.nnet.softmax(self.input_layer.linear_output)
 22 |         loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]),y])
 23 |         return loss
 24 | 
 25 |     def errors(self, y):
 26 |         """Compute the number of wrong predictions by self.input_layer.
 27 | 
 28 |         Predicted class labels are computed as the indices of the columns of
 29 |         self.input_layer.linear_output which are maximal. Wrong predictions are
 30 |         those for which max indices do not match their corresponding y values.
 31 |         """
 32 |         # Compute class memberships predicted by self.input_layer
 33 |         y_pred = T.argmax(self.input_layer.linear_output, axis=1)
 34 |         errs = 0
 35 |         # check if y has same dimension of y_pred
 36 |         if y.ndim != y_pred.ndim:
 37 |             raise TypeError('y should have the same shape as self.y_pred',
 38 |                 ('y', y.type, 'y_pred', y_pred.type))
 39 |         # check if y is of the correct datatype
 40 |         if y.dtype.startswith('int'):
 41 |             # the T.neq operator returns a vector of 0s and 1s, where 1
 42 |             # represents a mistake in prediction
 43 |             errs = T.sum(T.neq(y_pred, y))
 44 |         else:
 45 |             raise NotImplementedError()
 46 |         return errs
 47 | 
 48 | class LogRegSS(object):
 49 |     """Multi-class semi-supervised Logistic Regression loss dangler."""
 50 | 
 51 |     def __init__(self, linear_layer):
 52 |         """Dangle a logistic regression from the given linear layer.
 53 | 
 54 |         The given linear layer should be a HiddenLayer (or subclass) object,
 55 |         for HiddenLayer as defined in LayerNet.py."""
 56 |         self.input_layer = linear_layer
 57 | 
 58 |     def safe_softmax_ss(self, x):
 59 |         """Softmax that shouldn't overflow."""
 60 |         e_x = T.exp(x - T.max(x, axis=1, keepdims=True))
 61 |         x_sm = e_x / T.sum(e_x, axis=1, keepdims=True)
 62 |         return x_sm
 63 | 
 64 |     def loss_func(self, y):
 65 |         """Return the multiclass logistic regression loss for y.
 66 | 
 67 |         The class labels in y are assumed to be in correspondence with the
 68 |         set of column indices for self.input_layer.linear_output.
 69 |         """
 70 |         row_idx = T.arange(y.shape[0])
 71 |         row_mask = T.neq(y, 0).reshape((y.shape[0], 1))
 72 |         p_y_given_x = self.safe_softmax_ss(self.input_layer.linear_output)
 73 |         wacky_mat = (p_y_given_x * row_mask) + (1. - row_mask)
 74 |         loss = -T.sum(T.log(wacky_mat[row_idx,y])) / T.sum(row_mask)
 75 |         return loss
 76 | 
 77 |     def errors(self, y):
 78 |         """Compute the number of wrong predictions by self.input_layer.
 79 | 
 80 |         Predicted class labels are computed as the indices of the columns of
 81 |         self.input_layer.linear_output which are maximal. Wrong predictions are
 82 |         those for which max indices do not match their corresponding y values.
 83 |         """
 84 |         # Compute class memberships predicted by self.input_layer
 85 |         y_pred = T.argmax(self.input_layer.linear_output[:,1:], axis=1)
 86 |         y_pred = y_pred + 1
 87 |         errs = 0
 88 |         # check if y has same dimension of y_pred
 89 |         if y.ndim != y_pred.ndim:
 90 |             raise TypeError('y should have the same shape as self.y_pred',
 91 |                 ('y', y.type, 'y_pred', y_pred.type))
 92 |         # check if y is of the correct datatype
 93 |         if y.dtype.startswith('int'):
 94 |             # the T.neq operator returns a vector of 0s and 1s, where 1
 95 |             # represents a mistake in prediction
 96 |             errs = T.sum(T.neq(y_pred, y) * T.neq(y, 0))
 97 |         else:
 98 |             raise NotImplementedError()
 99 |         return errs
100 | 
101 | class MCL2Hinge(object):
102 |     """Multi-class one-vs-all L2 hinge loss dangler."""
103 | 
104 |     def __init__(self, linear_layer):
105 |         """Dangle a squred hinge loss from the given linear layer.
106 | 
107 |         The given linear layer should be a HiddenLayer (or subclass) object,
108 |         for HiddenLayer as defined in LayerNet.py."""
109 |         self.input_layer = linear_layer
110 | 
111 |     def loss_func(self, y):
112 |         """Return the multiclass squared hinge loss for y.
113 | 
114 |         The class labels in y are assumed to be in correspondence with the
115 |         set of column indices for self.input_layer.linear_output.
116 |         """
117 |         y_hat = self.input_layer.linear_output
118 |         margin_pos = T.maximum(0.0, (1.0 - y_hat))
119 |         margin_neg = T.maximum(0.0, (1.0 + y_hat))
120 |         obs_idx = T.arange(y.shape[0])
121 |         loss_pos = T.sum(margin_pos[obs_idx,y]**2.0)
122 |         loss_neg = T.sum(margin_neg**2.0) - T.sum(margin_neg[obs_idx,y]**2.0)
123 |         loss = (loss_pos + loss_neg) / y.shape[0]
124 |         return loss
125 | 
126 |     def errors(self, y):
127 |         """Compute the number of wrong predictions by self.input_layer.
128 | 
129 |         Predicted class labels are computed as the indices of the columns of
130 |         self.input_layer.linear_output which are maximal. Wrong predictions are
131 |         those for which max indices do not match their corresponding y values.
132 |         """
133 |         # Compute class memberships predicted by self.input_layer
134 |         y_pred = T.argmax(self.input_layer.linear_output, axis=1)
135 |         errs = 0
136 |         # check if y has same dimension of y_pred
137 |         if y.ndim != y_pred.ndim:
138 |             raise TypeError('y should have the same shape as self.y_pred',
139 |                 ('y', y.type, 'y_pred', y_pred.type))
140 |         # check if y is of the correct datatype
141 |         if y.dtype.startswith('int'):
142 |             # the T.neq operator returns a vector of 0s and 1s, where 1
143 |             # represents a mistake in prediction
144 |             errs = T.sum(T.neq(y_pred, y))
145 |         else:
146 |             raise NotImplementedError()
147 |         return errs
148 | 
149 | class MCL2HingeSS(object):
150 |     """Multi-class one-vs-all L2 hinge loss dangler.
151 | 
152 |     For this loss, class index 0 is never penalized, and errors for inputs
153 |     with class index 0 are similarly ignored. This is for semi-supervised
154 |     training, constrained by Theano's programming model."""
155 | 
156 |     def __init__(self, linear_layer):
157 |         """Dangle a squred hinge loss from the given linear layer.
158 | 
159 |         The given linear layer should be a HiddenLayer (or subclass) object,
160 |         for HiddenLayer as defined in LayerNet.py."""
161 |         self.input_layer = linear_layer
162 | 
163 |     def loss_func(self, y):
164 |         """Return the multiclass squared hinge loss for y.
165 | 
166 |         The class labels in y are assumed to be in correspondence with the
167 |         set of column indices for self.input_layer.linear_output.
168 |         """
169 |         y_hat = self.input_layer.linear_output
170 |         row_idx = T.arange(y.shape[0])
171 |         row_mask = T.neq(y, 0).reshape((y_hat.shape[0], 1))
172 |         margin_pos = T.maximum(0.0, (1.0 - y_hat)) * row_mask
173 |         margin_neg = T.maximum(0.0, (1.0 + y_hat)) * row_mask
174 |         loss_pos = T.sum(margin_pos[row_idx,y]**2.0)
175 |         loss_neg = T.sum(margin_neg**2.0) - T.sum(margin_neg[row_idx,y]**2.0)
176 |         loss = (loss_pos + loss_neg) / T.sum(row_mask)
177 |         return loss
178 | 
179 |     def errors(self, y):
180 |         """Compute the number of wrong predictions by self.input_layer.
181 | 
182 |         Predicted class labels are computed as the indices of the columns of
183 |         self.input_layer.linear_output which are maximal. Wrong predictions are
184 |         those for which max indices do not match their corresponding y values.
185 |         """
186 |         # Compute class memberships predicted by self.input_layer
187 |         y_pred = T.argmax(self.input_layer.linear_output[:,1:], axis=1)
188 |         y_pred = y_pred + 1
189 |         errs = 0
190 |         # check if y has same dimension of y_pred
191 |         if y.ndim != y_pred.ndim:
192 |             raise TypeError('y should have the same shape as self.y_pred',
193 |                 ('y', y.type, 'y_pred', y_pred.type))
194 |         # check if y is of the correct datatype
195 |         if y.dtype.startswith('int'):
196 |             # the T.neq operator returns a vector of 0s and 1s, where 1
197 |             # represents a mistake in prediction
198 |             errs = T.sum(T.neq(y_pred, y) * T.neq(y, 0))
199 |         else:
200 |             raise NotImplementedError()
201 |         return errs
202 | 


--------------------------------------------------------------------------------
/generative_models/result_parsing_script.py:
--------------------------------------------------------------------------------
 1 | import os as os
 2 | import sys as sys
 3 | import numpy as np
 4 | import numpy.random as npr
 5 | 
 6 | def print_res(res):
 7 |     print("err: {0:.4f}".format(res['err']))
 8 |     for h_param in ['learn_rate', 'lam_cat', 'lam_pea', 'lam_ent', 'lam_l2w']:
 9 |         print("    {0:s}: {1:.4f}".format(h_param, res[h_param]))
10 |     return 1
11 | 
12 | def parse_file(f_name):
13 |     f_lines = [l for l in open(f_name).readlines()]
14 |     f_dict = {}
15 |     for i in [1, 2, 3, 4, 5]:
16 |         f_dict[f_lines[i].split()[0].strip(':')] = float(f_lines[i].split()[1])
17 |     e_lines = [l for l in f_lines if ('va_err:' in l)]
18 |     e_vals = [float(l.split()[-1]) for l in e_lines]
19 |     mean_err = sum(e_vals[-10:]) / len(e_vals[-10:])
20 |     f_dict['err'] = mean_err
21 |     return f_dict
22 | 
23 | comp_func = lambda x, y: 1 if (x['err'] > y['err']) else -1
24 | 
25 | if __name__=="__main__":
26 | 	if (len(sys.argv) < 2):
27 | 		print("FILE TAG REQUIRED!")
28 | 		assert(False)
29 | 	res_dicts = [parse_file(f) for f in os.listdir(os.getcwd()) if (sys.argv[1] in f)]
30 | 	res_dicts.sort(cmp=comp_func)
31 | 	print("**RESULTS**")
32 | 	for rd in res_dicts:
33 | 		print("========================================")
34 | 		print_res(rd)
35 | 


--------------------------------------------------------------------------------
/nlp/CythonFuncs.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | # try to compile and use the faster cython version
 4 | import os
 5 | from numpy import get_include
 6 | import pyximport
 7 | models_dir = os.path.dirname(__file__) or os.getcwd()
 8 | pyximport.install(setup_args={"include_dirs": [models_dir, get_include()]})
 9 | from CythonFuncsPyx import w2v_ff_bp_pyx, ag_update_2d_pyx, ag_update_1d_pyx, \
10 |                            lut_bp_pyx, nsl_ff_bp_pyx, acl_ff_bp_pyx, DO_INIT
11 | 
12 | import numpy as np
13 | import numpy.random as npr
14 | import threading
15 | from ctypes import pythonapi, c_void_p
16 | 
17 | ########################################
18 | # MULTITHREADING HELPER-FUNC AND DEFNS #
19 | ########################################
20 | 
21 | THREAD_NUM = 4
22 | 
23 | def make_multithread(inner_func, numthreads):
24 |     def func_mt(*args):
25 |         length = len(args[0])
26 |         sp_idx = np.arange(0,length).astype(np.uint32)
27 |         chunklen = (length + (numthreads-1)) // numthreads
28 |         chunkargs = [(sp_idx[i*chunklen:(i+1)*chunklen],)+args for i in range(numthreads)]
29 |         # Start a thread for all but the last chunk of work
30 |         threads = [threading.Thread(target=inner_func, args=cargs)
31 |                    for cargs in chunkargs[:-1]]
32 |         for thread in threads:
33 |             thread.start()
34 |         # Give the last chunk of work to the main thread
35 |         inner_func(*chunkargs[-1])
36 |         for thread in threads:
37 |             thread.join()
38 |         return 1
39 |     def func_st(*args):
40 |         length = len(args[0])
41 |         sp_idx = np.arange(0,length).astype(np.uint32)
42 |         sp_args = (sp_idx,) + args
43 |         inner_fun(*sp_args)
44 |     func = None
45 |     if numthreads == 1:
46 |         func = func_st
47 |     else:
48 |         func = func_mt
49 |     return func_mt
50 | 
51 | ##############################
52 | # NUMBA FUNCTION DEFINITIONS #
53 | ##############################
54 | 
55 | w2v_ff_bp = make_multithread(w2v_ff_bp_pyx, THREAD_NUM)
56 | hsm_ff_bp = make_multithread(nsl_ff_bp_pyx, THREAD_NUM)
57 | nsl_ff_bp = make_multithread(nsl_ff_bp_pyx, THREAD_NUM)
58 | lut_bp = make_multithread(lut_bp_pyx, THREAD_NUM)
59 | 
60 | ag_update_2d = make_multithread(ag_update_2d_pyx, THREAD_NUM)
61 | ag_update_1d = make_multithread(ag_update_1d_pyx, 1)
62 | 
63 | 
64 | ##############
65 | # EYE BUFFER #
66 | ##############
67 | 


--------------------------------------------------------------------------------
/nlp/GPULayers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | # Imports of public stuff
  4 | import numpy as np
  5 | import numpy.random as npr
  6 | import gnumpy as gp
  7 | import numexpr as ne
  8 | 
  9 | # Imports of my stuff
 10 | from HelperFuncs import randn, ones, zeros
 11 | 
 12 | # UH OH, GLOBAL PARAMS (TODO: GET RID OF THESE!)
 13 | ADA_EPS = 1e-3
 14 | MAX_HSM_KEY = 12345678
 15 | 
 16 | #################################
 17 | # FULLY-CONNECTED SOFTMAX LAYER #
 18 | #################################
 19 | 
 20 | class FullLayer:
 21 |     def __init__(self, in_dim=0, max_out_key=0):
 22 |         # Set dimension of incoming vectors and the number of outcomes for
 23 |         # which to perform prediction. Increment the requested prediction size
 24 |         # by 1, to accommodate 0 indexing.
 25 |         out_dim = max_out_key + 1
 26 |         self.dim_input = in_dim
 27 |         self.dim_output = out_dim
 28 |         # Initialize parameters, gradients, and adagrad "momentums"
 29 |         self.params = {}
 30 |         self.params['W'] = 0.01 * gp.randn((in_dim, out_dim))
 31 |         self.params['b'] = gp.zeros((1, out_dim))
 32 |         self.grads = {}
 33 |         self.grads['W'] = gp.zeros((in_dim, out_dim))
 34 |         self.grads['b'] = gp.zeros((1, out_dim))
 35 |         self.moms = {}
 36 |         self.moms['W'] = gp.zeros((in_dim, out_dim))
 37 |         self.moms['b'] = gp.zeros((1, out_dim))
 38 |         # Initialize temp vars to use during feedforward/backpropagation
 39 |         self.X = []
 40 |         self.Y = []
 41 |         self.Y_cat = []
 42 |         return
 43 | 
 44 |     def init_params(self, w_scale=0.01, b_scale=0.0):
 45 |         """Randomly initialize the weights in this layer."""
 46 |         self.params['W'] = w_scale * gp.randn((self.dim_input, self.dim_output))
 47 |         self.grads['W'] = gp.zeros((self.dim_input, self.dim_output))
 48 |         self.params['b'] = gp.zeros((1, self.dim_output))
 49 |         self.grads['b'] = gp.zeros((1, self.dim_output))
 50 |         return
 51 | 
 52 |     def clip_params(self, max_norm=10.0):
 53 |         """Bound L2 (row-wise) norm of W by max_norm."""
 54 |         M = self.params['W']
 55 |         m_scales = max_norm / gp.sqrt(gp.sum(M**2.0,axis=1) + 1e-5)
 56 |         mask = (m_scales < 1.0) # with gnumpy, this already comes as float32
 57 |         m_scales = (m_scales * mask) + (1.0 - mask)
 58 |         self.params['W'] = M * m_scales[:,gp.newaxis]
 59 |         return
 60 | 
 61 |     def feedforward(self, X):
 62 |         """Run feedforward for this layer."""
 63 |         # Cleanup debris from any previous feedforward
 64 |         self._cleanup()
 65 |         # Do new feedforward...
 66 |         self.X = gp.garray(X)
 67 |         self.Y = gp.dot(self.X, self.params['W']) + self.params['b']
 68 |         return self.Y
 69 | 
 70 |     def backprop(self, Y_cat, L_ary=None, return_on_gpu=False):
 71 |         """Backprop through softmax using the given target predictions."""
 72 |         # Compute gradient of cross-entropy objective, based on the given
 73 |         # target predictions and the most recent feedforward information.
 74 |         L, dLdY = self.xent_loss_and_grad(self.Y, Y_cat.astype(np.uint32))
 75 |         # Backprop cross-ent grads to get grads w.r.t. layer parameters
 76 |         dLdW = gp.dot(self.X.T, dLdY)
 77 |         dLdb = gp.sum(dLdY, axis=0)
 78 |         dLdb = dLdb[gp.newaxis,:]
 79 |         self.grads['W'] += dLdW
 80 |         self.grads['b'] += dLdb
 81 |         # Backprop cross-ent grads to get grads w.r.t. layer input
 82 |         dLdX = gp.dot(dLdY, self.params['W'].T)
 83 |         # Return gradients w.r.t. to input, either on or off the GPU
 84 |         if not return_on_gpu:
 85 |             dLdX = gp.as_numpy_array(dLdX).astype(np.float32)
 86 |         # Write loss into L_ary if it was given
 87 |         L_ary[0] = L
 88 |         return dLdX
 89 | 
 90 |     def safe_softmax(self, Y):
 91 |         """Compute a reasonably (numerically) safe softmax."""
 92 |         Y_max = gp.max(Y, axis=1)
 93 |         Y_max = Y_max[:,gp.newaxis]
 94 |         Y_exp = gp.exp(Y - Y_max)
 95 |         Y_sum = gp.sum(Y_exp, axis=1)
 96 |         Y_sum = Y_sum[:,gp.newaxis]
 97 |         Y_sm = Y_exp / Y_sum
 98 |         return Y_sm
 99 | 
100 |     def xent_loss_and_grad(self, Yh, Y_cat):
101 |         """Cross-entropy loss for predictions Yh given targets Y_cat."""
102 |         # Convert from categorical classes to "one-hot" target vectors
103 |         Y_ind = zeros(Yh.shape)
104 |         Y_ind[np.arange(Y_ind.shape[0]), Y_cat] = 1.0
105 |         # Push one-hot targets vectors to the GPU
106 |         Y_ind = gp.garray(Y_ind)
107 |         # Compute softmax and then cross-entropy loss
108 |         Yh_sm = self.safe_softmax(Yh)
109 |         L = -gp.sum((Y_ind * gp.log(Yh_sm)))
110 |         dLdYh = Yh_sm - Y_ind
111 |         return [L, dLdYh]
112 | 
113 |     def l2_regularize(self, lam_l2=1e-5):
114 |         """Apply some amount of l2 "shrinkage" to weights and biases."""
115 |         self.params['W'] -= lam_l2 * self.params['W']
116 |         self.params['b'] -= lam_l2 * self.params['b']
117 |         return
118 | 
119 |     def apply_grad(self, learn_rate=1e-2,):
120 |         """Apply the current accumulated gradients, with adagrad."""
121 |         # Update the adagrad "momentums"
122 |         self.moms['W'] = (0.95 * self.moms['W']) + (0.05 * self.grads['W']**2.0)
123 |         self.moms['b'] = (0.95 * self.moms['b']) + (0.05 * self.grads['b']**2.0)
124 |         # Apply adagrad-style updates using current grads and moms
125 |         self.params['W'] -= learn_rate * (self.grads['W'] / \
126 |                 (gp.sqrt(self.moms['W']) + ADA_EPS))
127 |         self.params['b'] -= learn_rate * (self.grads['b'] / \
128 |                 (gp.sqrt(self.moms['b']) + ADA_EPS))
129 |         # Reset gradient accumulators
130 |         self.reset_grads()
131 |         return
132 | 
133 |     def reset_grads(self):
134 |         """Reset the gradient accumulators for this layer."""
135 |         self.grads['W'] = 0.0 * self.grads['W']
136 |         self.grads['b'] = 0.0 * self.grads['b']
137 |         return
138 | 
139 |     def reset_moms(self, ada_init=1e-3):
140 |         """Reset the adagrad "momentums" for this layer."""
141 |         self.moms['W'] = (0.0 * self.moms['W']) + ada_init
142 |         self.moms['b'] = (0.0 * self.moms['b']) + ada_init
143 |         return
144 | 
145 |     def _cleanup(self):
146 |         """Cleanup temp vars used during feedforward/backprop."""
147 |         self.X = []
148 |         self.Y = []
149 |         self.Y_cat = []
150 |         return
151 | 
152 | ##########################
153 | # NOISE INJECTION LAYERS #
154 | ##########################
155 | 
156 | class NoiseLayer:
157 |     def __init__(self, drop_rate=0.0, fuzz_scale=0.0):
158 |         # Set stuff required for managing this type of layer
159 |         self.dYdX = []
160 |         self.drop_rate = drop_rate
161 |         self.drop_scale = 1.0 / (1.0 - drop_rate)
162 |         self.fuzz_scale = fuzz_scale
163 |         # Set stuff common to all layer types
164 |         self.X = []
165 |         self.Y = []
166 |         self.dLdY = []
167 |         return
168 | 
169 |     def set_noise_params(self, drop_rate=0.0, fuzz_scale=0.0):
170 |         """Set the drop rate for this drop layer."""
171 |         self.drop_rate = drop_rate
172 |         self.drop_scale = 1.0 / (1.0 - drop_rate)
173 |         self.fuzz_scale = fuzz_scale
174 |         return
175 | 
176 |     def feedforward(self, X, return_on_gpu=False):
177 |         """Perform feedforward through this layer.
178 |         """
179 |         # Cleanup debris from any previous feedforward
180 |         self._cleanup()
181 |         # Record (a pointer to) the passed input
182 |         self.X = gp.garray(X)
183 |         # Generate and apply a dropout mask to the input
184 |         if (self.drop_rate > 1e-4):
185 |             drop_mask = self.drop_scale * \
186 |                     (gp.rand((self.X.shape[0], self.X.shape[1])) > self.drop_rate)
187 |         else:
188 |             drop_mask = gp.ones((self.X.shape[0], self.X.shape[1]))
189 |         self.dYdX = drop_mask
190 |         if (self.fuzz_scale > 1e-4):
191 |             fuzz_bump = (self.fuzz_scale / self.drop_scale) * \
192 |                     gp.randn((self.X.shape[0], self.X.shape[1]))
193 |             self.Y = drop_mask * (self.X + fuzz_bump)
194 |         else:
195 |             self.Y = drop_mask * self.X
196 |         if not return_on_gpu:
197 |             self.Y = gp.as_numpy_array(self.Y)
198 |         return self.Y
199 | 
200 |     def backprop(self, dLdY, return_on_gpu=False):
201 |         """Perform backprop through this layer.
202 |         """
203 |         # Backprop is just multiplication by the mask from feedforward
204 |         dLdX = gp.garray(dLdY) * self.dYdX
205 |         if not return_on_gpu:
206 |             dLdX = gp.as_numpy_array(dLdX).astype(np.float32)
207 |         return dLdX
208 | 
209 |     def _cleanup(self):
210 |         """Clear all temp variables for this layer."""
211 |         self.X = []
212 |         self.Y = []
213 |         self.dYdX = []
214 |         return
215 | 
216 | ###################################
217 | # TEST BASIC MODULE FUNCTIONALITY #
218 | ###################################
219 | 
220 | def run_test():
221 |     #####################
222 |     # TODO: write tests # 
223 |     ##################### 
224 |     print("TODO: WRITE TEST FOR GPULayers.py")
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     run_test()
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | ##############
240 | # EYE BUFFER #
241 | ##############
242 | 


--------------------------------------------------------------------------------
/nlp/NumbaFuncs.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import numpy as np
  4 | import numpy.random as npr
  5 | import threading
  6 | import numba
  7 | from math import exp, log, sqrt
  8 | from numba import jit, void, i4, f4, u4
  9 | from ctypes import pythonapi, c_void_p
 10 | 
 11 | ADA_EPS = 0.001
 12 | 
 13 | ########################################
 14 | # MULTITHREADING HELPER-FUNC AND DEFNS #
 15 | ########################################
 16 | 
 17 | THREAD_NUM = 4
 18 | 
 19 | savethread = pythonapi.PyEval_SaveThread
 20 | savethread.argtypes = []
 21 | savethread.restype = c_void_p
 22 | 
 23 | restorethread = pythonapi.PyEval_RestoreThread
 24 | restorethread.argtypes = [c_void_p]
 25 | restorethread.restype = None
 26 | 
 27 | def make_multithread(inner_func, numthreads):
 28 |     def func_mt(*args):
 29 |         length = len(args[0])
 30 |         sp_idx = np.arange(0,length).astype(np.int32)
 31 |         chunklen = (length + (numthreads-1)) // numthreads
 32 |         chunkargs = [(sp_idx[i*chunklen:(i+1)*chunklen],)+args for i in range(numthreads)]
 33 |         # Start a thread for all but the last chunk of work
 34 |         threads = [threading.Thread(target=inner_func, args=cargs)
 35 |                    for cargs in chunkargs[:-1]]
 36 |         for thread in threads:
 37 |             thread.start()
 38 |         # Give the last chunk of work to the main thread
 39 |         inner_func(*chunkargs[-1])
 40 |         for thread in threads:
 41 |             thread.join()
 42 |         return 1
 43 |     def func_st(*args):
 44 |         length = len(args[0])
 45 |         sp_idx = np.arange(0,length).astype(np.int32)
 46 |         sp_args = (sp_idx,) + args
 47 |         inner_fun(*sp_args)
 48 |     func = None
 49 |     if numthreads == 1:
 50 |         func = func_st
 51 |     else:
 52 |         func = func_mt
 53 |     return func_mt
 54 | 
 55 | ##############################
 56 | # NUMBA FUNCTION DEFINITIONS #
 57 | ##############################
 58 | 
 59 | def w2v_ff_bp_sp(sp_idx, anc_idx, pn_idx, pn_sign, Wa, Wc, b, dWa, dWc, db, L, do_grad):
 60 |     """Feedforward and backprop for unified (neg-sample) word-2-vec layer."""
 61 |     threadstate = savethread()
 62 |     sp_size = sp_idx.shape[0]
 63 |     cols = pn_idx.shape[1]
 64 |     vec_dim = Wa.shape[1]
 65 |     for sp_i in range(sp_size):
 66 |         i = sp_idx[sp_i]
 67 |         ai = anc_idx[i]
 68 |         for j in range(cols):
 69 |             ci = pn_idx[i,j]
 70 |             y = b[ci]
 71 |             for k in range(vec_dim):
 72 |                 y += (Wa[ai,k] * Wc[ci,k])
 73 |             exp_pns_y = exp(pn_sign[i,j] * y)
 74 |             L[0] += log(1.0 + exp_pns_y)
 75 |             if (do_grad == 1):
 76 |                 dLdy = pn_sign[i,j] * (exp_pns_y / (1.0 + exp_pns_y))
 77 |                 db[ci] = db[ci] + dLdy
 78 |                 for k in range(vec_dim):
 79 |                     dWa[ai,k] += (dLdy * Wc[ci,k])
 80 |                     dWc[ci,k] += (dLdy * Wa[ai,k])
 81 |     restorethread(threadstate)
 82 |     return
 83 | fn_sig_1 = void(i4[:], i4[:], i4[:,:], f4[:,:], f4[:,:], f4[:,:], f4[:], f4[:,:], f4[:,:], f4[:], f4[:], i4)
 84 | w2v_ff_bp_st = jit(fn_sig_1, nopython=True)(w2v_ff_bp_sp)
 85 | w2v_ff_bp = make_multithread(w2v_ff_bp_st, THREAD_NUM)
 86 | 
 87 | def nsl_bp_sp(sp_idx, table_idx, X, W, dLdY, dLdX, dW, db):
 88 |     """Backprop for NSLayer: main loop in Numba-friendly form."""
 89 |     threadstate = savethread()
 90 |     rows = sp_idx.shape[0]
 91 |     cols = dLdY.shape[1]
 92 |     vec_dim = X.shape[1]
 93 |     for spi in range(rows):
 94 |         i = sp_idx[spi]
 95 |         for j in range(cols):
 96 |             dldy = dLdY[i,j]
 97 |             idx = table_idx[i,j]
 98 |             db[idx] += dldy
 99 |             for k in range(vec_dim):
100 |                 dW[idx,k] += dldy * X[i,k]
101 |                 dLdX[i,k] += dldy * W[idx,k]
102 |     restorethread(threadstate)
103 |     return
104 | fn_sig_2 = void(i4[:], i4[:,:], f4[:,:], f4[:,:], f4[:,:], f4[:,:], f4[:,:], f4[:])
105 | nsl_bp_st = jit(fn_sig_2, nopython=True)(nsl_bp_sp)
106 | nsl_bp = make_multithread(nsl_bp_st, THREAD_NUM)
107 | 
108 | def nsl_ff_sp(sp_idx, table_idx, X, W, b, Y):
109 |     """Feedforward for NSLayer: main loop in Numba-friendly form."""
110 |     threadstate = savethread()
111 |     rows = sp_idx.shape[0]
112 |     cols = table_idx.shape[1]
113 |     vec_dim = X.shape[1]
114 |     for spi in range(rows):
115 |         i = sp_idx[spi]
116 |         for j in range(cols):
117 |             idx = table_idx[i,j]
118 |             Y[i,j] = b[idx]
119 |             for k in range(vec_dim):
120 |                 Y[i,j] += X[i,k] * W[idx,k]
121 |     restorethread(threadstate)
122 |     return
123 | fn_sig_3 = void(i4[:], i4[:,:], f4[:,:], f4[:,:], f4[:], f4[:,:])
124 | nsl_ff_st = jit(fn_sig_3, nopython=True)(nsl_ff_sp)
125 | nsl_ff = make_multithread(nsl_ff_st, THREAD_NUM)
126 | 
127 | def ag_update_2d_sp(sp_idx, row_idx, W, dW, mW, learn_rate):
128 |     """Element-wise partial update ala adagrad.
129 | 
130 |     For the entries indicated by row_idx, this first updates the adagrad sums
131 |     of squares in mW, then updates the params in W, and finally sets the
132 |     grads in dW back to 0.
133 |     """
134 |     threadstate = savethread()
135 |     row_count = sp_idx.shape[0]
136 |     vec_dim = W.shape[1]
137 |     for spi in range(row_count):
138 |         idx = row_idx[sp_idx[spi]]
139 |         for j in range(vec_dim):
140 |             mW[idx,j] = (0.95 * mW[idx,j]) + (0.05 * dW[idx,j] * dW[idx,j])
141 |             W[idx,j] -= (learn_rate * (dW[idx,j] / (sqrt(mW[idx,j]) + ADA_EPS)))
142 |             dW[idx,j] = 0.0
143 |     restorethread(threadstate)
144 |     return
145 | fn_sig_4 = void(i4[:], i4[:], f4[:,:], f4[:,:], f4[:,:], f4)
146 | ag_update_2d_st = jit(fn_sig_4, nopython=True)(ag_update_2d_sp)
147 | ag_update_2d = make_multithread(ag_update_2d_st, THREAD_NUM)
148 | 
149 | @numba.jit("void(i4[:], f4[:], f4[:], f4[:], f4)")
150 | def ag_update_1d(row_idx, W, dW, mW, learn_rate):
151 |     """Element-wise partial update ala adagrad.
152 | 
153 |     For the entries indicated by row_idx, this first updates the adagrad sums
154 |     of squares in mW, then updates the params in W, and finally sets the
155 |     grads in dW back to 0.
156 |     """
157 |     row_count = row_idx.shape[0]
158 |     for i in range(row_count):
159 |         idx = row_idx[i]
160 |         mW[idx] = (0.95 * mW[idx]) + (0.05 * dW[idx] * dW[idx])
161 |         W[idx] -= learn_rate * (dW[idx] / (sqrt(mW[idx]) + ADA_EPS))
162 |         dW[idx] = 0.0
163 |     return
164 | 
165 | def lut_sp(sp_idx, row_idx, dLdY, dW):
166 |     """Simple row-wise updates for adjusting dW with dLdY.
167 | 
168 |     This adds each row of dLdY to some row of dW. The row of dW to adjust
169 |     is given by the corresponding item in row_idx."""
170 |     threadstate = savethread()
171 |     row_count = sp_idx.shape[0]
172 |     vec_dim = dW.shape[1]
173 |     for i in range(row_count):
174 |         idx = row_idx[sp_idx[i]]
175 |         for j in range(vec_dim):
176 |             dW[idx,j] += dLdY[i,j]
177 |     restorethread(threadstate)
178 |     return
179 | fn_sig_5 = void(i4[:], i4[:], f4[:,:], f4[:,:])
180 | lut_st = jit(fn_sig_5, nopython=True)(lut_sp)
181 | lut_bp = make_multithread(lut_st, THREAD_NUM)
182 | 
183 | 
184 | def hsm_ff_bp_sp(sp_idx, X, code_keys, code_signs, W, b, dLdX, dLdW, dLdb, L):
185 |     threadstate = savethread()
186 |     obs_count = sp_idx.shape[0]
187 |     code_len = code_keys.shape[1]
188 |     vec_dim = X.shape[1]
189 |     for spi in range(obs_count):
190 |         i = sp_idx[spi]
191 |         for j in range(code_len):
192 |             code_key = code_keys[i,j]
193 |             if code_key < 1234567:
194 |                 y = b[code_key]
195 |                 # for speed, this needs to change to sdot via BLAS
196 |                 for k in range(vec_dim):
197 |                     y += X[i,k] * W[code_key,k]
198 |                 neg_label = -1.0 * code_signs[i,j]
199 |                 exp_y = exp(neg_label * y)
200 |                 L[i,j] = log(1.0 + exp_y)
201 |                 g = neg_label * (exp_y / (1.0 + exp_y))
202 |                 dLdb[code_key] += g
203 |                 # for speed, this needs to change to saxpy via BLAS
204 |                 for k in range(vec_dim):
205 |                     dLdX[i,k] += g * W[code_key,k]
206 |                     dLdW[code_key,k] += g * X[i,k]
207 |     restorethread(threadstate)
208 |     return
209 | fn_sig_6 = void(i4[:], f4[:,:], u4[:,:], f4[:,:], f4[:,:], f4[:], f4[:,:], f4[:,:], f4[:], f4[:,:])
210 | hsm_ff_bp_st = jit(fn_sig_6, nopython=True)(hsm_ff_bp_sp)
211 | hsm_ff_bp = make_multithread(hsm_ff_bp_st, THREAD_NUM)
212 | 
213 | ##############
214 | # EYE BUFFER #
215 | ##############
216 | 


--------------------------------------------------------------------------------
/nlp/TestCuBlas.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This example uses cuBLAS gemm routine to perform matrix-matrix multiplication.
  3 | Please refer to the documentation for details of how to use the gemm routine
  4 |   http://docs.continuum.io/numbapro/cudalib.html#blas-level-2
  5 |   
  6 | Note: cuBLAS uses Fortran layout
  7 | '''
  8 | 
  9 | import numbapro.cudalib.cublas as cublas
 10 | from numbapro import cuda
 11 | import numpy as np
 12 | import numpy.random as npr
 13 | from timeit import default_timer as timer
 14 | import gnumpy as gp
 15 | 
 16 | N = 5000     # no. of rows/cols
 17 | 
 18 | def gemm_v1():
 19 |     '''
 20 |     Note that all arrays are in Fortran order.
 21 |     '''
 22 |     print("Version 1".center(80, '='))
 23 |     # Prepare arrays for input
 24 |     A = np.array(np.arange(N ** 2, dtype=np.float32).reshape(N, N), order='F')
 25 |     B = np.array(np.arange(N) + 10, dtype=A.dtype, order='F')
 26 |     D = np.zeros_like(A, order='F')
 27 | 
 28 |     # NumPy
 29 |     start = timer()
 30 |     E = np.dot(A, np.diag(B))
 31 |     numpy_time = timer() - start
 32 |     print("Numpy took %f seconds" % numpy_time)
 33 | 
 34 |     # cuBLAS
 35 |     blas = cublas.Blas()
 36 | 
 37 |     stream = cuda.stream()
 38 |     cuda.to_device(A, stream=stream)
 39 |     stream.synchronize()
 40 |     
 41 |     start = timer()
 42 |     blas.gemm('N', 'N', N, N, N, 1.0, A, np.diag(B), 1.0, D)
 43 |     cuda_time = timer() - start
 44 | 
 45 |     print("CUBLAS took %f seconds" % cuda_time)
 46 |     diff = np.abs(D - E)
 47 |     print("Maximum error %f" % np.max(diff))
 48 | 
 49 | 
 50 | def gemm_v2():
 51 |     """
 52 |     Let GEMM transpose the input matrices so that they can be in C order,
 53 |     originally.  Note that the output matrix is still in Fortran array.
 54 |     The string arguments in gemm tells it to apply transformation on the input
 55 |     matrices.
 56 |     
 57 |     See argument description in:
 58 |         http://docs.continuum.io/numbapro/cudalib.html#blas-level-2
 59 |     """
 60 |     print("Version 2".center(80, '='))
 61 |     # Prepare arrays for input
 62 |     A = np.array(np.arange(N ** 2, dtype=np.float32).reshape(N, N))
 63 |     B = np.array(np.arange(N) + 10, dtype=A.dtype)
 64 |     D = np.zeros_like(A, order='F')
 65 | 
 66 |     # NumPy
 67 |     start = timer()
 68 |     E = np.dot(A, np.diag(B))
 69 |     numpy_time = timer() - start
 70 |     print("Numpy took %f seconds" % numpy_time)
 71 | 
 72 |     # cuBLAS
 73 |     blas = cublas.Blas()
 74 |     
 75 |     stream = cuda.stream()
 76 |     cuda.to_device(A, stream=stream)
 77 |     stream.synchronize()
 78 | 
 79 |     start = timer()
 80 |     blas.gemm('T', 'T', N, N, N, 1.0, A, np.diag(B), 1.0, D)
 81 |     cuda_time = timer() - start
 82 | 
 83 |     print("CUBLAS took %f seconds" % cuda_time)
 84 |     diff = np.abs(D - E)
 85 |     print("Maximum error %f" % np.max(diff))
 86 | 
 87 | 
 88 | def main():
 89 |     gemm_v1()
 90 |     gemm_v2()
 91 | 
 92 | if __name__ == '__main__':
 93 |     main()
 94 |     start = timer()
 95 |     A = npr.randn(256, 1500)
 96 |     for i in range(1000):
 97 |         B = gp.garray(A)
 98 |         B = B + B
 99 |         A = gp.randn((256, 1500)).as_numpy_array()
100 |     berk_time = timer() - start
101 |     print("Berk time: {0:.4f}".format(berk_time))
102 |     print("  @ {0:.4f} transfers/second".format(1000.0 / berk_time))


--------------------------------------------------------------------------------
/nlp/gensim_code/GensimUtils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright (C) 2010 Radim Rehurek <radimrehurek@seznam.cz>
  5 | # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
  6 | 
  7 | """
  8 | This module contains various general utility functions.
  9 | """
 10 | 
 11 | try:
 12 |     import cPickle as _pickle
 13 | except ImportError:
 14 |     import pickle as _pickle
 15 | 
 16 | import re
 17 | import os
 18 | import sys
 19 | import itertools
 20 | import traceback
 21 | import unicodedata
 22 | 
 23 | if sys.version_info[0] >= 3:
 24 |     unicode = str
 25 | 
 26 | from six import iteritems, u
 27 | 
 28 | 
 29 | PAT_ALPHABETIC = re.compile('(((?![\d])\w)+)', re.UNICODE)
 30 | RE_HTML_ENTITY = re.compile(r'&(#?)(x?)(\w+);', re.UNICODE)
 31 | 
 32 | 
 33 | def tokenize(text, errors="strict", to_lower=False):
 34 |     """
 35 |     Iteratively yield tokens as unicode strings, optionally also lowercasing them.
 36 | 
 37 |     Input text may be either unicode or utf8-encoded byte string.
 38 | 
 39 |     The tokens on output are maximal contiguous sequences of alphabetic
 40 |     characters (no digits!).
 41 | 
 42 |     """
 43 |     text = to_unicode(text, errors=errors)
 44 |     if to_lower:
 45 |         text = text.lower()
 46 |     for match in PAT_ALPHABETIC.finditer(text):
 47 |         yield match.group()
 48 | 
 49 | 
 50 | def simple_preprocess(doc, min_len=2, max_len=15):
 51 |     """
 52 |     Convert a document into a list of tokens.
 53 | 
 54 |     This lowercases, tokenizes, stems, normalizes etc. -- the output are final
 55 |     tokens = unicode strings, that won't be processed any further.
 56 | 
 57 |     """
 58 |     tokens = [token for token in tokenize(doc, to_lower=True, errors='ignore')
 59 |             if min_len <= len(token) <= max_len and not token.startswith('_')]
 60 |     return tokens
 61 | 
 62 | 
 63 | def to_unicode(text, encoding='utf8', errors='strict'):
 64 |     """Convert a string (bytestring in `encoding` or unicode), to unicode."""
 65 |     if isinstance(text, unicode):
 66 |         return text
 67 |     return unicode(text, encoding, errors=errors)
 68 | 
 69 | 
 70 | def make_closing(base, **attrs):
 71 |     """
 72 |     Add support for `with Base(attrs) as fout:` to the base class if it's missing.
 73 |     The base class' `close()` method will be called on context exit, to always close the file properly.
 74 | 
 75 |     This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise
 76 |     raise "AttributeError: GzipFile instance has no attribute '__exit__'".
 77 | 
 78 |     """
 79 |     if not hasattr(base, '__enter__'):
 80 |         attrs['__enter__'] = lambda self: self
 81 |     if not hasattr(base, '__exit__'):
 82 |         attrs['__exit__'] = lambda self, type, value, traceback: self.close()
 83 |     return type('Closing' + base.__name__, (base, object), attrs)
 84 | 
 85 | 
 86 | def smart_open(fname, mode='rb'):
 87 |     _, ext = os.path.splitext(fname)
 88 |     if ext == '.bz2':
 89 |         from bz2 import BZ2File
 90 |         return make_closing(BZ2File)(fname, mode)
 91 |     if ext == '.gz':
 92 |         from gzip import GzipFile
 93 |         return make_closing(GzipFile)(fname, mode)
 94 |     return open(fname, mode)
 95 | 
 96 | 
 97 | def pickle(obj, fname, protocol=-1):
 98 |     """Pickle object `obj` to file `fname`."""
 99 |     with smart_open(fname, 'wb') as fout: # 'b' for binary, needed on Windows
100 |         _pickle.dump(obj, fout, protocol=protocol)
101 | 
102 | 
103 | def unpickle(fname):
104 |     """Load pickled object from `fname`"""
105 |     with smart_open(fname) as f:
106 |         return _pickle.load(f)
107 | 
108 | 
109 | def revdict(d):
110 |     """
111 |     Reverse a dictionary mapping.
112 | 
113 |     When two keys map to the same value, only one of them will be kept in the
114 |     result (which one is kept is arbitrary).
115 | 
116 |     """
117 |     return dict((v, k) for (k, v) in iteritems(d))
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/nlp/gensim_code/TestGensim.py:
--------------------------------------------------------------------------------
 1 | import logging, os
 2 | import numpy as np
 3 | import numpy.random as npr
 4 | import W2VSimple as w2vs
 5 | 
 6 | logging.basicConfig(format='%(message)s', level=logging.INFO)
 7 | 
 8 | class MySentences(object):
 9 |     def __init__(self, dirname):
10 |         self.dirname = dirname
11 |     def __iter__(self):
12 |         for fname in os.listdir(self.dirname):
13 |             for line in open(os.path.join(self.dirname, fname)):
14 |                 yield line.split()
15 | 
16 | def some_nearest_words(keys_to_words, sample_count, W):
17 |     norms = np.sqrt(np.sum(W**2.0,axis=1,keepdims=1))
18 |     W = W / (norms + 1e-5)
19 |     source_keys = np.zeros((sample_count,)).astype(np.int32)
20 |     neighbor_keys = np.zeros((sample_count, 10)).astype(np.int32)
21 |     all_keys = np.asarray(keys_to_words.keys()).astype(np.int32)
22 |     for s in range(sample_count):
23 |         i = npr.randint(0,all_keys.size)
24 |         source_k = all_keys[i]
25 |         neg_cos_sims = -1.0 * np.sum(W * W[source_k], axis=1)
26 |         sorted_k = np.argsort(neg_cos_sims)
27 |         source_keys[s] = source_k
28 |         neighbor_keys[s,:] = sorted_k[1:11]
29 |     source_words = []
30 |     neighbor_words = []
31 |     for s in range(sample_count):
32 |         source_words.append(keys_to_words[source_keys[s]])
33 |         neighbor_words.append([keys_to_words[k] for k in neighbor_keys[s]])
34 |     return [source_keys, neighbor_keys, source_words, neighbor_words]
35 | 
36 | sentences = MySentences('./training_text')
37 | 
38 | model = w2vs.W2VSimple(sentences, alpha=0.002, size=152, window=6, \
39 |                                min_count=1, workers=4, hs=1)
40 | k2w = {}
41 | w2k = {}
42 | for w in model.vocab:
43 |     k = model.vocab[w].index
44 |     k2w[k] = w
45 |     w2k[w] = k
46 | 
47 | for i in range(1001):
48 |     print("ROUND {0:d}".format(i))
49 |     sentences = MySentences('./training_text')
50 |     model.train(sentences, chunksize=200)
51 |     if ((i > 1) and ((i % 50) == 0)):
52 |         print("============================================================")
53 |         [s_keys, n_keys, s_words, n_words] = some_nearest_words(k2w, 10, model.syn0)
54 |         for w in range(10):
55 |             print("{0:s}: {1:s}".format(s_words[w],", ".join(n_words[w])))
56 | 
57 | 


--------------------------------------------------------------------------------
/nlp/nlp_convnet/STBTests.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.random as npr
 3 | import StanfordTrees as st
 4 | import LNLayers as lnl
 5 | import LayerNets as ln
 6 | import random as random
 7 | from time import clock
 8 | from sys import stdout as stdout
 9 | 
10 | def simple_stb_test(tree_dir='./trees'):
11 |     stb_data = st.SimpleLoad(tree_dir)
12 |     return
13 | 
14 | if __name__ == '__main__':
15 |     tree_dir = './trees'
16 |     stb_data = st.SimpleLoad(tree_dir)
17 |     max_lut_idx = max(stb_data['lut_keys'].values())
18 |     basic_opts = {}
19 |     basic_opts['class_count'] = 5
20 |     lut_opts = {}
21 |     lut_opts['max_key'] = max_lut_idx
22 |     lut_opts['embed_dim'] = 30
23 |     lut_opts['max_norm'] = 2.0
24 |     basic_opts['lut_layer'] = lut_opts
25 | 
26 |     # Initialize a network
27 |     KMN = ln.KMaxNet(basic_opts)
28 |     KMN.init_weights(w_scale=0.05, b_shift=0.1)
29 | 
30 |     # Get a "flattened" list of training phrases and classes
31 |     train_phrases = []
32 |     train_labels = []
33 |     for (phrases, labels) in zip(stb_data['train_phrases'], stb_data['train_labels']):
34 |         train_phrases.extend(phrases)
35 |         train_labels.extend(labels)
36 | 
37 |     batch_size = 50
38 |     epoch_batches = 2500
39 |     learn_rate = 0.01
40 |     train_pairs = [(phrase, label) for (phrase, label) in zip(train_phrases, train_labels)]
41 |     train_phrases = []
42 |     train_labels = []
43 |     for e in range(500):
44 |         print("Starting epoch {0:d}, {1:d} batches".format(e, len(train_pairs)/batch_size))
45 |         stdout.flush()
46 |         # Reset batch extraction indices and completed batch counter
47 |         batch_start = 0
48 |         batch_end = batch_start + batch_size
49 |         completed_batches = 0
50 |         # Perform batch updates for the current epoch
51 |         L = 0.0
52 |         acc = 0.0
53 |         t1 = clock()
54 |         random.shuffle(train_pairs)
55 |         if ((e % 5) == 0):
56 |             KMN.reset_moms(ada_init=0.0, clear_moms=False)
57 |         while ((batch_end < len(train_pairs)) and (completed_batches < epoch_batches)):
58 |             # Extract the current training phrase/label batch
59 |             batch_pairs = train_pairs[batch_start:batch_end]
60 |             # Train on this batch, and count its completion
61 |             Xb = [pair[0] for pair in batch_pairs]
62 |             Yb = [pair[1] for pair in batch_pairs]
63 |             res = KMN.process_training_batch(Xb, Yb, learn_rate, use_dropout=True)
64 |             L += res[0]
65 |             acc += res[1]
66 |             completed_batches += 1
67 |             # Advance batch extraction indices
68 |             batch_start = batch_start + batch_size
69 |             batch_end = batch_start + batch_size
70 |             # Print diagnostic info from time-to-time
71 |             if ((completed_batches % 50) == 0):
72 |                 print("completed {0:d} updates, with loss {1:.4f} and acc {2:.4f}".format( \
73 |                         completed_batches, (L / 50.0), (acc / 50.0)))
74 |                 L = 0.0
75 |                 acc = 0.0
76 |                 t2 = clock()
77 |                 print("-- time: {0:.2f}".format(t2-t1))
78 |                 t1 = clock()
79 |                 stdout.flush()
80 | 
81 | 
82 | 
83 | ##############
84 | # EYE BUFFER #
85 | ##############
86 | 


--------------------------------------------------------------------------------
/nlp/voidptr.h:
--------------------------------------------------------------------------------
 1 | #include <Python.h>
 2 | 
 3 | #if PY_VERSION_HEX >= 0x03020000
 4 | 
 5 | /*
 6 | ** compatibility with python >= 3.2, which doesn't have CObject anymore
 7 | */
 8 | static void * PyCObject_AsVoidPtr(PyObject *obj)
 9 | {
10 |     void *ret = PyCapsule_GetPointer(obj, NULL);
11 |     if (ret == NULL) {
12 |         PyErr_Clear();
13 |     }
14 |     return ret;
15 | }
16 | 
17 | #endif


--------------------------------------------------------------------------------