├── LICENSE ├── README.md ├── exp ├── cmuarctic.py ├── fruitspeech.py ├── librispeech.py ├── mnist.py ├── net.py └── test_fruitspeech_model.py ├── net.py ├── tests ├── bi_multilayer_softmax.py ├── bi_softmax.py ├── multilayer_softmax.py ├── net.py ├── predict.py └── softmax.py └── util ├── continue_model_mnist.py └── load_model.py /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Kyle Kastner 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of gomorrah nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | net 2 | === 3 | 4 | "A library for testing implementations of neural networks and training 5 | algorithms" 6 | -------------------------------------------------------------------------------- /exp/cmuarctic.py: -------------------------------------------------------------------------------- 1 | from net import RecurrentNetwork, load_cmuarctic, labels_to_chars 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_cmuarctic() 6 | clf = RecurrentNetwork(learning_alg="sfg", 7 | hidden_layer_sizes=[500], 8 | max_col_norm=1.9635, 9 | max_iter=1000, cost="ctc", bidirectional=True, 10 | learning_rate=0.0001, momentum=0.9, 11 | recurrent_activation="lstm", 12 | random_seed=1999) 13 | 14 | tx = train_x[2] 15 | tx = (tx - tx.mean()) / tx.std() 16 | clf.fit(train_x[2], train_y[2]) 17 | y = labels_to_chars(train_y[2]) 18 | print(y) 19 | -------------------------------------------------------------------------------- /exp/fruitspeech.py: -------------------------------------------------------------------------------- 1 | from net import RecurrentNetwork, load_fruitspeech, labels_to_chars 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_fruitspeech() 6 | clf = RecurrentNetwork(learning_alg="rmsprop", 7 | hidden_layer_sizes=[500], 8 | max_iter=100, cost="encdec", bidirectional=True, 9 | learning_rate=0.00002, momentum=0.9, 10 | recurrent_activation="lstm", 11 | random_seed=1999) 12 | 13 | all_frames = np.vstack(train_x) 14 | means = np.mean(all_frames, axis=0) 15 | std = np.std(all_frames, axis=0) 16 | for n, t in enumerate(train_x): 17 | train_x[n] = (t - means) / std 18 | 19 | for n, v in enumerate(valid_x): 20 | valid_x[n] = (v - means) / std 21 | 22 | from IPython import embed; embed() 23 | 24 | 25 | clf.fit(train_x, train_y, valid_x, valid_y) 26 | y_hat = labels_to_chars(clf.predict(valid_x[0])[0]) 27 | y = labels_to_chars(valid_y[0]) 28 | print(y_hat) 29 | print(y) 30 | -------------------------------------------------------------------------------- /exp/librispeech.py: -------------------------------------------------------------------------------- 1 | from net import RecurrentNetwork, load_librispeech, labels_to_chars 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_librispeech() 6 | clf = RecurrentNetwork(learning_alg="rmsprop", hidden_layer_sizes=[500,], 7 | max_iter=100, cost="ctc", bidirectional=True, 8 | learning_rate=0.0001, momentum=0.9, 9 | recurrent_activation="lstm", 10 | random_seed=1999) 11 | print(labels_to_chars(train_y[2])) 12 | means = np.mean(train_x[2], axis=0) 13 | std = np.std(train_x[2], axis=0) 14 | tx = (train_x[2] - means) / std 15 | clf.fit(tx, train_y[2]) 16 | from IPython import embed; embed() 17 | -------------------------------------------------------------------------------- /exp/mnist.py: -------------------------------------------------------------------------------- 1 | from net import load_mnist, FeedforwardClassifier 2 | 3 | datasets = load_mnist() 4 | 5 | train_set_x, train_set_y = datasets[0] 6 | valid_set_x, valid_set_y = datasets[1] 7 | test_set_x, test_set_y = datasets[2] 8 | 9 | print('... building the model') 10 | # construct the MLP class 11 | classifier = FeedforwardClassifier(hidden_layer_sizes=[500], 12 | random_seed=1999) 13 | 14 | print('... training') 15 | classifier.fit(train_set_x, train_set_y, valid_set_x, valid_set_y) 16 | -------------------------------------------------------------------------------- /exp/net.py: -------------------------------------------------------------------------------- 1 | ../net.py -------------------------------------------------------------------------------- /exp/test_fruitspeech_model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | try: 3 | import cPickle 4 | except ImportError: 5 | import pickle as cPickle 6 | import matplotlib.pyplot as plt 7 | from net import load_fruitspeech, labels_to_chars 8 | import numpy as np 9 | (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_fruitspeech() 10 | 11 | f = open(sys.argv[1]) 12 | clf = cPickle.load(f) 13 | 14 | all_frames = np.vstack(train_x) 15 | means = np.mean(all_frames, axis=0) 16 | std = np.std(all_frames, axis=0) 17 | for n, t in enumerate(train_x): 18 | train_x[n] = (t - means) / std 19 | 20 | for n, v in enumerate(valid_x): 21 | valid_x[n] = (v - means) / std 22 | 23 | for n, v in enumerate(valid_y): 24 | y = labels_to_chars(v) 25 | y_hat = labels_to_chars(clf.predict(valid_x[n])[0]) 26 | print("Expected: %s, predicted: %s" % (y, y_hat)) 27 | -------------------------------------------------------------------------------- /net.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf 8 -*- 2 | from __future__ import division 3 | try: 4 | import cPickle 5 | except ImportError: 6 | import pickle as cPickle 7 | import gzip 8 | import tarfile 9 | import tempfile 10 | import os 11 | import numpy as np 12 | from scipy import linalg 13 | from scipy.io import wavfile 14 | import tables 15 | import numbers 16 | import glob 17 | import random 18 | import theano 19 | import string 20 | import theano.tensor as T 21 | from theano.compat.python2x import OrderedDict 22 | import matplotlib 23 | matplotlib.use('Agg') 24 | import matplotlib.pyplot as plt 25 | # Sandbox? 26 | import fnmatch 27 | from theano.tensor.shared_randomstreams import RandomStreams 28 | 29 | 30 | def concatenate(tensor_list, axis=0): 31 | """ 32 | Alternative implementation of `theano.tensor.concatenate`. 33 | This function does exactly the same thing, but contrary to Theano's own 34 | implementation, the gradient is implemented on the GPU. 35 | Stolen from Lasagne 36 | """ 37 | if axis < 0: 38 | axis += tensor_list[0].ndim 39 | 40 | concat_size = sum(tensor.shape[axis] for tensor in tensor_list) 41 | 42 | output_shape = () 43 | for k in range(axis): 44 | output_shape += (tensor_list[0].shape[k],) 45 | output_shape += (concat_size,) 46 | for k in range(axis + 1, tensor_list[0].ndim): 47 | output_shape += (tensor_list[0].shape[k],) 48 | 49 | out = T.zeros(output_shape) 50 | offset = 0 51 | for tensor in tensor_list: 52 | indices = () 53 | for k in range(axis): 54 | indices += (slice(None),) 55 | indices += (slice(offset, offset + tensor.shape[axis]),) 56 | for k in range(axis + 1, tensor_list[0].ndim): 57 | indices += (slice(None),) 58 | 59 | out = T.set_subtensor(out[indices], tensor) 60 | offset += tensor.shape[axis] 61 | 62 | return out 63 | 64 | 65 | def minibatch_indices(X, minibatch_size): 66 | minibatch_indices = np.arange(0, len(X), minibatch_size) 67 | minibatch_indices = np.asarray(list(minibatch_indices) + [len(X)]) 68 | start_indices = minibatch_indices[:-1] 69 | end_indices = minibatch_indices[1:] 70 | return zip(start_indices, end_indices) 71 | 72 | 73 | def make_minibatch(X, y, one_hot_size): 74 | minibatch_size = len(X) 75 | is_one_hot = True 76 | X_max_sizes = np.max([xi.shape for xi in X], axis=0) 77 | X_max_sizes = np.asarray([minibatch_size] + list(X_max_sizes)) 78 | # Order into time, samples, feature 79 | X_max_sizes = np.array([X_max_sizes[1], X_max_sizes[0], 80 | X_max_sizes[2]]) 81 | y_max_sizes = np.max([yi.shape for yi in y], axis=0) 82 | y_max_sizes = np.array([minibatch_size] + list(y_max_sizes)) 83 | # Order into time, samples, label 84 | # dim is 1 for output label? This may need adjustment for regression 85 | if len(y_max_sizes) == 3: 86 | y_max_sizes = np.array([y_max_sizes[1], y_max_sizes[0], y_max_sizes[2]]) 87 | elif len(y_max_sizes) < 3: 88 | y_max_sizes = np.array([y_max_sizes[1], y_max_sizes[0], one_hot_size]) 89 | is_one_hot = False 90 | else: 91 | raise ValueError("y must be 2 or 3 dimensional!") 92 | 93 | for y_t in y: 94 | if not np.all(np.in1d([0, 1], np.unique(y_t.ravel()))): 95 | is_one_hot = False 96 | X_n = np.zeros(X_max_sizes, dtype=X[0].dtype) 97 | y_n = np.zeros(y_max_sizes).astype(theano.config.floatX) 98 | X_mask = np.zeros((X_max_sizes[0], X_max_sizes[1])).astype( 99 | theano.config.floatX) 100 | y_mask = np.zeros((y_max_sizes[0], y_max_sizes[1])).astype( 101 | theano.config.floatX) 102 | for n, t in enumerate(X): 103 | xshp = X[n].shape 104 | X_n[:xshp[0], n, :xshp[1]] = X[n] 105 | X_mask[:xshp[0], n] = 1. 106 | 107 | for n, t in enumerate(y): 108 | yshp = y[n].shape 109 | if not is_one_hot: 110 | for i, v in enumerate(y[n]): 111 | y_n[i, n, v] = 1. 112 | else: 113 | y_n[:yshp[0], n, :yshp[1]] = y[n] 114 | y_mask[:yshp[0], n] = 1. 115 | return X_n, y_n, X_mask, y_mask 116 | 117 | 118 | def labels_to_chars(labels): 119 | return "".join([chr(l + 97) for l in labels]) 120 | 121 | 122 | def _make_ctc_labels(y): 123 | # Assume that class values are sequential! and start from 0 124 | highest_class = np.max([np.max(d) for d in y]) 125 | # Need to insert blanks at start, end, and between each label 126 | # See A. Graves "Supervised Sequence Labelling with Recurrent Neural 127 | # Networks" figure 7.2 (pg. 58) 128 | # (http://www.cs.toronto.edu/~graves/preprint.pdf) 129 | blank = highest_class + 1 130 | y_fixed = [blank * np.ones(2 * yi.shape[0] + 1).astype('int32') 131 | for yi in y] 132 | for i, yi in enumerate(y): 133 | y_fixed[i][1:-1:2] = yi 134 | return y_fixed 135 | 136 | 137 | def relu(x): 138 | return x * (x > 1e-6) 139 | 140 | 141 | def clip_relu(x, clip_lim=20): 142 | return x * (T.lt(x, 1e-6) and T.gt(x, clip_lim)) 143 | 144 | 145 | def dropout(random_state, X, keep_prob=0.5): 146 | if keep_prob > 0. and keep_prob < 1.: 147 | seed = random_state.randint(2 ** 30) 148 | srng = RandomStreams(seed) 149 | mask = srng.binomial(n=1, p=keep_prob, size=X.shape, 150 | dtype=theano.config.floatX) 151 | return X * mask 152 | return X 153 | 154 | 155 | def fast_dropout(random_state, X): 156 | seed = random_state.randint(2 ** 30) 157 | srng = RandomStreams(seed) 158 | mask = srng.normal(size=X.shape, avg=1., dtype=theano.config.floatX) 159 | return X * mask 160 | 161 | 162 | def shared_zeros(shape): 163 | """ Builds a theano shared variable filled with a zeros numpy array """ 164 | return theano.shared(value=np.zeros(*shape).astype(theano.config.floatX), 165 | borrow=True) 166 | 167 | 168 | def shared_rand(shape, rng): 169 | """ Builds a theano shared variable filled with random values """ 170 | return theano.shared(value=(0.01 * (rng.rand(*shape) - 0.5)).astype( 171 | theano.config.floatX), borrow=True) 172 | 173 | 174 | def np_rand(shape, rng): 175 | return (0.01 * (rng.rand(*shape) - 0.5)).astype(theano.config.floatX) 176 | 177 | 178 | def np_randn(shape, rng, name=None): 179 | """ Builds a numpy variable filled with random normal values """ 180 | return (0.01 * rng.randn(*shape)).astype(theano.config.floatX) 181 | 182 | 183 | def np_ortho(shape, rng, name=None): 184 | """ Builds a theano variable filled with orthonormal random values """ 185 | g = rng.randn(*shape) 186 | o_g = linalg.svd(g)[0] 187 | return o_g.astype(theano.config.floatX) 188 | 189 | 190 | def shared_ortho(shape, rng, name=None): 191 | """ Builds a theano shared variable filled with random values """ 192 | g = rng.randn(*shape) 193 | o_g = linalg.svd(g)[0] 194 | return theano.shared(value=o_g.astype(theano.config.floatX), borrow=True) 195 | 196 | 197 | def load_mnist(): 198 | # Check if dataset is in the data directory. 199 | data_path = os.path.join(os.path.split(__file__)[0], "data") 200 | if not os.path.exists(data_path): 201 | os.makedirs(data_path) 202 | 203 | dataset = 'mnist.pkl.gz' 204 | data_file = os.path.join(data_path, dataset) 205 | if os.path.isfile(data_file): 206 | dataset = data_file 207 | 208 | if (not os.path.isfile(data_file)): 209 | try: 210 | import urllib 211 | urllib.urlretrieve('http://google.com') 212 | except AttributeError: 213 | import urllib.request as urllib 214 | url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' 215 | print('Downloading data from %s' % url) 216 | urllib.urlretrieve(url, data_file) 217 | 218 | print('... loading data') 219 | # Load the dataset 220 | f = gzip.open(data_file, 'rb') 221 | try: 222 | train_set, valid_set, test_set = cPickle.load(f, encoding="latin1") 223 | except TypeError: 224 | train_set, valid_set, test_set = cPickle.load(f) 225 | f.close() 226 | 227 | test_x, test_y = test_set 228 | test_x = test_x.astype('float32') 229 | test_y = test_y.astype('int32') 230 | valid_x, valid_y = valid_set 231 | valid_x = valid_x.astype('float32') 232 | valid_y = valid_y.astype('int32') 233 | train_x, train_y = train_set 234 | train_x = train_x.astype('float32') 235 | train_y = train_y.astype('int32') 236 | 237 | rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] 238 | return rval 239 | 240 | 241 | def load_cifar10(): 242 | # Check if dataset is in the data directory. 243 | data_path = os.path.join(os.path.split(__file__)[0], "data") 244 | if not os.path.exists(data_path): 245 | os.makedirs(data_path) 246 | 247 | dataset = 'cifar-10-python.tar.gz' 248 | data_file = os.path.join(data_path, dataset) 249 | if os.path.isfile(data_file): 250 | dataset = data_file 251 | 252 | if (not os.path.isfile(data_file)): 253 | try: 254 | import urllib 255 | urllib.urlretrieve('http://google.com') 256 | except AttributeError: 257 | import urllib.request as urllib 258 | url = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' 259 | print('Downloading data from %s' % url) 260 | urllib.urlretrieve(url, data_file) 261 | 262 | print('... loading data') 263 | tar = tarfile.open(data_file) 264 | os.chdir(data_path) 265 | tar.extractall() 266 | tar.close() 267 | 268 | data_path = os.path.join(data_path, "cifar-10-batches-py") 269 | batch_files = glob.glob(os.path.join(data_path, "*batch*")) 270 | train_data = [] 271 | train_labels = [] 272 | test_data = [] 273 | test_labels = [] 274 | for f in batch_files: 275 | batch_file = open(f, 'rb') 276 | d = cPickle.load(batch_file) 277 | batch_file.close() 278 | fname = f.split(os.path.sep)[-1] 279 | if "data" in fname: 280 | data = d['data'] 281 | labels = d['labels'] 282 | train_data.append(data) 283 | train_labels.append(labels) 284 | elif "test" in fname: 285 | data = d['data'] 286 | labels = d['labels'] 287 | test_data.append(data) 288 | test_labels.append(labels) 289 | 290 | # Split into 40000 train 10000 valid 10000 test 291 | train_x = np.asarray(train_data) 292 | train_y = np.asarray(train_labels) 293 | test_x = np.asarray(test_data) 294 | test_y = np.asarray(test_labels) 295 | valid_x = train_x[-10000:] 296 | valid_y = train_y[-10000:] 297 | train_x = train_x[:-10000] 298 | train_y = train_y[:-10000] 299 | 300 | test_x = test_x.astype('float32') 301 | test_y = test_y.astype('int32') 302 | valid_x = valid_x.astype('float32') 303 | valid_y = valid_y.astype('int32') 304 | train_x = train_x.astype('float32') 305 | train_y = train_y.astype('int32') 306 | 307 | rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] 308 | return rval 309 | 310 | 311 | def load_scribe(): 312 | # Check if dataset is in the data directory. 313 | data_path = os.path.join(os.path.split(__file__)[0], "data") 314 | if not os.path.exists(data_path): 315 | os.makedirs(data_path) 316 | 317 | dataset = 'scribe.pkl' 318 | data_file = os.path.join(data_path, dataset) 319 | if os.path.isfile(data_file): 320 | dataset = data_file 321 | 322 | if (not os.path.isfile(data_file)): 323 | try: 324 | import urllib 325 | urllib.urlretrieve('http://google.com') 326 | url = 'https://dl.dropboxusercontent.com/u/15378192/scribe2.pkl' 327 | except AttributeError: 328 | import urllib.request as urllib 329 | url = 'https://dl.dropboxusercontent.com/u/15378192/scribe3.pkl' 330 | print('Downloading data from %s' % url) 331 | urllib.urlretrieve(url, data_file) 332 | 333 | print('... loading data') 334 | with open(data_file, 'rb') as pkl_file: 335 | data = cPickle.load(pkl_file) 336 | 337 | data_x, data_y = [], [] 338 | for x, y in zip(data['x'], data['y']): 339 | data_y.append(np.asarray(y, dtype=np.int32)) 340 | data_x.append(np.asarray(x, dtype=theano.config.floatX).T) 341 | 342 | train_x = data_x[:750] 343 | train_y = data_y[:750] 344 | valid_x = data_x[750:900] 345 | valid_y = data_y[750:900] 346 | test_x = data_x[900:] 347 | test_y = data_y[900:] 348 | rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] 349 | return rval 350 | 351 | 352 | # A tricky trick for monkeypatching an instancemethod that is 353 | # CPython :( there must be a better way 354 | class _cVLArray(tables.VLArray): 355 | pass 356 | 357 | 358 | def load_fruitspeech(): 359 | # Check if dataset is in the data directory. 360 | data_path = os.path.join(os.path.split(__file__)[0], "data") 361 | if not os.path.exists(data_path): 362 | os.makedirs(data_path) 363 | 364 | dataset = 'audio.tar.gz' 365 | data_file = os.path.join(data_path, dataset) 366 | if os.path.isfile(data_file): 367 | dataset = data_file 368 | 369 | if not os.path.isfile(data_file): 370 | try: 371 | import urllib 372 | urllib.urlretrieve('http://google.com') 373 | url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz' 374 | except AttributeError: 375 | import urllib.request as urllib 376 | url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz' 377 | print('Downloading data from %s' % url) 378 | urllib.urlretrieve(url, data_file) 379 | 380 | print('... loading data') 381 | if not os.path.exists(os.path.join(data_path, "audio")): 382 | tar = tarfile.open(data_file) 383 | os.chdir(data_path) 384 | tar.extractall() 385 | tar.close() 386 | 387 | h5_file_path = os.path.join(data_path, "saved_fruit.h5") 388 | if not os.path.exists(h5_file_path): 389 | data_path = os.path.join(data_path, "audio") 390 | 391 | audio_matches = [] 392 | for root, dirnames, filenames in os.walk(data_path): 393 | for filename in fnmatch.filter(filenames, '*.wav'): 394 | audio_matches.append(os.path.join(root, filename)) 395 | 396 | random.seed(1999) 397 | random.shuffle(audio_matches) 398 | 399 | # http://mail.scipy.org/pipermail/numpy-discussion/2011-March/055219.html 400 | h5_file = tables.openFile(h5_file_path, mode='w') 401 | data_x = h5_file.createVLArray(h5_file.root, 'data_x', 402 | tables.Float32Atom(shape=()), 403 | filters=tables.Filters(1)) 404 | data_x_shapes = h5_file.createVLArray(h5_file.root, 'data_x_shapes', 405 | tables.Int32Atom(shape=()), 406 | filters=tables.Filters(1)) 407 | data_y = h5_file.createVLArray(h5_file.root, 'data_y', 408 | tables.Int32Atom(shape=()), 409 | filters=tables.Filters(1)) 410 | for wav_path in audio_matches: 411 | # Convert chars to int classes 412 | word = wav_path.split(os.sep)[-1][:-6] 413 | chars = [ord(c) - 97 for c in word] 414 | data_y.append(np.array(chars, dtype='int32')) 415 | fs, d = wavfile.read(wav_path) 416 | # Preprocessing from A. Graves "Towards End-to-End Speech 417 | # Recognition" 418 | Pxx, _, _, _ = plt.specgram(d, NFFT=256, noverlap=128) 419 | data_x_shapes.append(np.array(Pxx.T.shape, dtype='int32')) 420 | data_x.append(Pxx.T.astype('float32').flatten()) 421 | h5_file.close() 422 | 423 | h5_file = tables.openFile(h5_file_path, mode='r') 424 | data_x = h5_file.root.data_x 425 | data_x_shapes = h5_file.root.data_x_shapes 426 | data_y = h5_file.root.data_y 427 | # A dirty hack to only monkeypatch data_x 428 | data_x.__class__ = _cVLArray 429 | 430 | # override getter so that it gets reshaped to 2D when fetched 431 | old_getter = data_x.__getitem__ 432 | 433 | def getter(self, key): 434 | if isinstance(key, numbers.Integral) or isinstance(key, np.integer): 435 | return old_getter(key).reshape(data_x_shapes[key]).astype( 436 | theano.config.floatX) 437 | elif isinstance(key, slice): 438 | start, stop, step = self._processRange(key.start, key.stop, 439 | key.step) 440 | return [o.reshape(s) for o, s in zip( 441 | self.read(start, stop, step), data_x_shapes[slice( 442 | start, stop, step)])] 443 | 444 | # Patch __getitem__ in custom subclass, applying to all instances of it 445 | _cVLArray.__getitem__ = getter 446 | 447 | train_x = data_x[:80] 448 | train_y = data_y[:80] 449 | valid_x = data_x[80:90] 450 | valid_y = data_y[80:90] 451 | test_x = data_x[90:] 452 | test_y = data_y[90:] 453 | rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] 454 | return rval 455 | 456 | 457 | def load_cmuarctic(): 458 | # Check if dataset is in the data directory. 459 | data_path = os.path.join(os.path.split(__file__)[0], "data") 460 | if not os.path.exists(data_path): 461 | os.makedirs(data_path) 462 | 463 | urls = ['http://www.speech.cs.cmu.edu/cmu_arctic/packed/cmu_us_awb_arctic-0.95-release.tar.bz2', 464 | 'http://www.speech.cs.cmu.edu/cmu_arctic/packed/cmu_us_bdl_arctic-0.95-release.tar.bz2', 465 | 'http://www.speech.cs.cmu.edu/cmu_arctic/packed/cmu_us_clb_arctic-0.95-release.tar.bz2', 466 | 'http://www.speech.cs.cmu.edu/cmu_arctic/packed/cmu_us_jmk_arctic-0.95-release.tar.bz2', 467 | 'http://www.speech.cs.cmu.edu/cmu_arctic/packed/cmu_us_ksp_arctic-0.95-release.tar.bz2', 468 | 'http://www.speech.cs.cmu.edu/cmu_arctic/packed/cmu_us_rms_arctic-0.95-release.tar.bz2', 469 | 'http://www.speech.cs.cmu.edu/cmu_arctic/packed/cmu_us_slt_arctic-0.95-release.tar.bz2', 470 | ] 471 | 472 | data_files = [] 473 | 474 | for url in urls: 475 | dataset = url.split('/')[-1] 476 | data_file = os.path.join(data_path, dataset) 477 | data_files.append(data_file) 478 | if os.path.isfile(data_file): 479 | dataset = data_file 480 | if not os.path.isfile(data_file): 481 | try: 482 | import urllib 483 | urllib.urlretrieve('http://google.com') 484 | except AttributeError: 485 | import urllib.request as urllib 486 | print('Downloading data from %s' % url) 487 | urllib.urlretrieve(url, data_file) 488 | 489 | print('... loading data') 490 | 491 | folder_paths = [] 492 | for data_file in data_files: 493 | folder_name = data_file.split(os.sep)[-1].split("-")[0] 494 | folder_path = os.path.join(data_path, folder_name) 495 | folder_paths.append(folder_path) 496 | if not os.path.exists(folder_path): 497 | tar = tarfile.open(data_file) 498 | os.chdir(data_path) 499 | tar.extractall() 500 | tar.close() 501 | 502 | h5_file_path = os.path.join(data_path, "saved_cmu.h5") 503 | if not os.path.exists(h5_file_path): 504 | # http://mail.scipy.org/pipermail/numpy-discussion/2011-March/055219.html 505 | h5_file = tables.openFile(h5_file_path, mode='w') 506 | data_x = h5_file.createVLArray(h5_file.root, 'data_x', 507 | tables.Float32Atom(shape=()), 508 | filters=tables.Filters(1)) 509 | data_x_shapes = h5_file.createVLArray(h5_file.root, 'data_x_shapes', 510 | tables.Int32Atom(shape=()), 511 | filters=tables.Filters(1)) 512 | data_y = h5_file.createVLArray(h5_file.root, 'data_y', 513 | tables.Int32Atom(shape=()), 514 | filters=tables.Filters(1)) 515 | data_meta = h5_file.createVLArray(h5_file.root, 'data_meta', 516 | tables.StringAtom(200), 517 | filters=tables.Filters(1)) 518 | for folder_path in folder_paths: 519 | audio_matches = [] 520 | for root, dirnames, filenames in os.walk(folder_path): 521 | for filename in fnmatch.filter(filenames, '*.wav'): 522 | audio_matches.append(os.path.join(root, filename)) 523 | 524 | f = open(os.path.join(folder_path, "etc", "txt.done.data")) 525 | read_raw_text = f.readlines() 526 | f.close() 527 | # Remove all punctuations 528 | list_text = [t.strip().lower().translate( 529 | string.maketrans("", ""), string.punctuation).split(" ")[1:-1] 530 | for t in read_raw_text] 531 | # Get rid of numbers, even though it will probably hurt 532 | # recognition on certain examples 533 | cleaned_lookup = {lt[0]: " ".join(lt[1:]).translate( 534 | None, string.digits).strip() for lt in list_text} 535 | data_meta.append(folder_path.split(os.sep)[-1]) 536 | 537 | for wav_path in audio_matches: 538 | lookup_key = wav_path.split(os.sep)[-1][:-4] 539 | # Some files aren't consistent! 540 | if "_" in cleaned_lookup.keys()[0] and "_" not in lookup_key: 541 | # Needs an _ to match text format... sometimes! 542 | lookup_key = lookup_key[:6] + "_" + lookup_key[6:] 543 | elif "_" not in cleaned_lookup.keys()[0]: 544 | lookup_key = lookup_key.translate(None, "_") 545 | try: 546 | words = cleaned_lookup[lookup_key] 547 | # Convert chars to int classes 548 | chars = [ord(c) - 97 for c in words] 549 | # Make spaces last class 550 | chars = [c if c >= 0 else 26 for c in chars] 551 | data_y.append(np.array(chars, dtype='int32')) 552 | # Convert chars to int classes 553 | fs, d = wavfile.read(wav_path) 554 | # Preprocessing from A. Graves "Towards End-to-End Speech 555 | # Recognition" 556 | Pxx, _, _, _ = plt.specgram(d, NFFT=256, noverlap=128) 557 | data_x_shapes.append(np.array(Pxx.T.shape, dtype='int32')) 558 | data_x.append(Pxx.T.astype('float32').flatten()) 559 | except KeyError: 560 | # Necessary because some labels are missing in some folders 561 | print("Skipping %s due to missing key" % wav_path) 562 | 563 | h5_file.close() 564 | 565 | h5_file = tables.openFile(h5_file_path, mode='r') 566 | data_x = h5_file.root.data_x 567 | data_x_shapes = h5_file.root.data_x_shapes 568 | data_y = h5_file.root.data_y 569 | # A dirty hack to only monkeypatch data_x 570 | data_x.__class__ = _cVLArray 571 | 572 | # override getter so that it gets reshaped to 2D when fetched 573 | old_getter = data_x.__getitem__ 574 | 575 | def getter(self, key): 576 | if isinstance(key, numbers.Integral) or isinstance(key, np.integer): 577 | return old_getter(key).reshape(data_x_shapes[key]).astype( 578 | theano.config.floatX) 579 | elif isinstance(key, slice): 580 | start, stop, step = self._processRange(key.start, key.stop, 581 | key.step) 582 | return [o.reshape(s) for o, s in zip( 583 | self.read(start, stop, step), data_x_shapes[slice( 584 | start, stop, step)])] 585 | 586 | # Patch __getitem__ in custom subclass, applying to all instances of it 587 | _cVLArray.__getitem__ = getter 588 | 589 | train_x = data_x[:6000] 590 | train_y = data_y[:6000] 591 | valid_x = data_x[6000:7500] 592 | valid_y = data_y[6000:7500] 593 | test_x = data_x[7500:] 594 | test_y = data_y[7500:] 595 | rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] 596 | return rval 597 | 598 | 599 | def load_librispeech(): 600 | # Check if dataset is in the data directory. 601 | data_path = os.path.join(os.path.split(__file__)[0], "data") 602 | if not os.path.exists(data_path): 603 | os.makedirs(data_path) 604 | 605 | dataset = 'dev-clean.tar.gz' 606 | data_file = os.path.join(data_path, dataset) 607 | if os.path.isfile(data_file): 608 | dataset = data_file 609 | 610 | if not os.path.isfile(data_file): 611 | try: 612 | import urllib 613 | urllib.urlretrieve('http://google.com') 614 | url = 'http://www.openslr.org/resources/12/dev-clean.tar.gz' 615 | except AttributeError: 616 | import urllib.request as urllib 617 | url = 'http://www.openslr.org/resources/12/dev-clean.tar.gz' 618 | print('Downloading data from %s' % url) 619 | urllib.urlretrieve(url, data_file) 620 | 621 | print('... loading data') 622 | if not os.path.exists(os.path.join(data_path, "LibriSpeech", "dev-clean")): 623 | tar = tarfile.open(data_file) 624 | os.chdir(data_path) 625 | tar.extractall() 626 | tar.close() 627 | 628 | h5_file_path = os.path.join(data_path, "saved_libri.h5") 629 | if not os.path.exists(h5_file_path): 630 | data_path = os.path.join(data_path, "LibriSpeech", "dev-clean") 631 | 632 | audio_matches = [] 633 | for root, dirnames, filenames in os.walk(data_path): 634 | for filename in fnmatch.filter(filenames, '*.flac'): 635 | audio_matches.append(os.path.join(root, filename)) 636 | 637 | text_matches = [] 638 | for root, dirnames, filenames in os.walk(data_path): 639 | for filename in fnmatch.filter(filenames, '*.txt'): 640 | text_matches.append(os.path.join(root, filename)) 641 | 642 | # http://mail.scipy.org/pipermail/numpy-discussion/2011-March/055219.html 643 | h5_file = tables.openFile(h5_file_path, mode='w') 644 | data_x = h5_file.createVLArray(h5_file.root, 'data_x', 645 | tables.Float32Atom(shape=()), 646 | filters=tables.Filters(1)) 647 | data_x_shapes = h5_file.createVLArray(h5_file.root, 'data_x_shapes', 648 | tables.Int32Atom(shape=()), 649 | filters=tables.Filters(1)) 650 | data_y = h5_file.createVLArray(h5_file.root, 'data_y', 651 | tables.Int32Atom(shape=()), 652 | filters=tables.Filters(1)) 653 | for full_t in text_matches: 654 | f = open(full_t, 'r') 655 | for line in f.readlines(): 656 | word_splits = line.strip().split(" ") 657 | file_tag = word_splits[0] 658 | words = word_splits[1:] 659 | # Convert chars to int classes 660 | chars = [ord(c) - 97 for c in (" ").join(words).lower()] 661 | # Make spaces last class 662 | chars = [c if c >= 0 else 26 for c in chars] 663 | data_y.append(np.array(chars, dtype='int32')) 664 | audio_path = [a for a in audio_matches if file_tag in a] 665 | if len(audio_path) != 1: 666 | raise ValueError("More than one match for" 667 | "tag %s!" % file_tag) 668 | if not os.path.exists(audio_path[0][:-5] + ".wav"): 669 | r = os.system("ffmpeg -i %s %s.wav" % (audio_path[0], 670 | audio_path[0][:-5])) 671 | if r: 672 | raise ValueError("A problem occured converting flac to" 673 | "wav, make sure ffmpeg is installed") 674 | wav_path = audio_path[0][:-5] + '.wav' 675 | fs, d = wavfile.read(wav_path) 676 | # Preprocessing from A. Graves "Towards End-to-End Speech 677 | # Recognition" 678 | Pxx, _, _, _ = plt.specgram(d, NFFT=256, noverlap=128) 679 | data_x_shapes.append(np.array(Pxx.T.shape, dtype='int32')) 680 | data_x.append(Pxx.T.astype('float32').flatten()) 681 | f.close() 682 | h5_file.close() 683 | 684 | h5_file_path = os.path.join(data_path, "saved_libri.h5") 685 | h5_file = tables.openFile(h5_file_path, mode='r') 686 | data_x = h5_file.root.data_x 687 | data_x_shapes = h5_file.root.data_x_shapes 688 | data_y = h5_file.root.data_y 689 | # A dirty hack to only monkeypatch data_x 690 | data_x.__class__ = _cVLArray 691 | 692 | # override getter so that it gets reshaped to 2D when fetched 693 | old_getter = data_x.__getitem__ 694 | 695 | def getter(self, key): 696 | if isinstance(key, numbers.Integral) or isinstance(key, np.integer): 697 | return old_getter(key).reshape(data_x_shapes[key]).astype( 698 | theano.config.floatX) 699 | elif isinstance(key, slice): 700 | start, stop, step = self._processRange(key.start, key.stop, 701 | key.step) 702 | return [o.reshape(s) for o, s in zip( 703 | self.read(start, stop, step), data_x_shapes[slice( 704 | start, stop, step)])] 705 | 706 | # Patch __getitem__ in custom subclass, applying to all instances of it 707 | _cVLArray.__getitem__ = getter 708 | 709 | train_x = data_x[:2000] 710 | train_y = data_y[:2000] 711 | valid_x = data_x[2000:2500] 712 | valid_y = data_y[2000:2500] 713 | test_x = data_x[2500:] 714 | test_y = data_y[2500:] 715 | rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] 716 | return rval 717 | 718 | 719 | class BaseNet(object): 720 | def __getstate__(self): 721 | if not hasattr(self, '_pickle_skip_list'): 722 | self._pickle_skip_list = [] 723 | for k, v in self.__dict__.items(): 724 | try: 725 | f = tempfile.TemporaryFile() 726 | cPickle.dump(v, f) 727 | except: 728 | self._pickle_skip_list.append(k) 729 | state = OrderedDict() 730 | for k, v in self.__dict__.items(): 731 | if k not in self._pickle_skip_list: 732 | state[k] = v 733 | return state 734 | 735 | def __setstate__(self, state): 736 | self.__dict__ = state 737 | 738 | 739 | class TrainingMixin(object): 740 | def get_sgd_updates(self, X_sym, y_sym, params, cost, learning_rate, 741 | momentum): 742 | gparams = T.grad(cost, params) 743 | updates = OrderedDict() 744 | 745 | if not hasattr(self, "momentum_velocity_"): 746 | self.momentum_velocity_ = [0.] * len(gparams) 747 | 748 | for n, (param, gparam) in enumerate(zip(params, gparams)): 749 | velocity = self.momentum_velocity_[n] 750 | update_step = momentum * velocity - learning_rate * gparam 751 | self.momentum_velocity_[n] = update_step 752 | updates[param] = param + update_step 753 | 754 | return updates 755 | 756 | def _norm_constraint(self, param, update_step, max_col_norm): 757 | stepped_param = param + update_step 758 | if param.get_value(borrow=True).ndim == 2: 759 | col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) 760 | desired_norms = T.clip(col_norms, 0, max_col_norm) 761 | scale = desired_norms / (1e-7 + col_norms) 762 | new_param = param * scale 763 | new_update_step = update_step * scale 764 | else: 765 | new_param = param 766 | new_update_step = update_step 767 | return new_param, new_update_step 768 | 769 | def get_clip_sgd_updates(self, X_sym, y_sym, params, cost, learning_rate, 770 | momentum, rescale=5.): 771 | gparams = T.grad(cost, params) 772 | updates = OrderedDict() 773 | 774 | if not hasattr(self, "momentum_velocity_"): 775 | self.momentum_velocity_ = [0.] * len(gparams) 776 | 777 | # Gradient clipping 778 | grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), gparams))) 779 | not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm)) 780 | grad_norm = T.sqrt(grad_norm) 781 | scaling_num = rescale 782 | scaling_den = T.maximum(rescale, grad_norm) 783 | for n, (param, gparam) in enumerate(zip(params, gparams)): 784 | # clip gradient directly, not momentum etc. 785 | gparam = T.switch(not_finite, 0.1 * param, 786 | gparam * (scaling_num / scaling_den)) 787 | velocity = self.momentum_velocity_[n] 788 | update_step = momentum * velocity - learning_rate * gparam 789 | self.momentum_velocity_[n] = update_step 790 | updates[param] = param + update_step 791 | return updates 792 | 793 | def get_clip_rmsprop_updates(self, X_sym, y_sym, params, cost, 794 | learning_rate, momentum, rescale=5.): 795 | gparams = T.grad(cost, params) 796 | updates = OrderedDict() 797 | 798 | if not hasattr(self, "running_average_"): 799 | self.running_square_ = [0.] * len(gparams) 800 | self.running_avg_ = [0.] * len(gparams) 801 | self.updates_storage_ = [0.] * len(gparams) 802 | 803 | if not hasattr(self, "momentum_velocity_"): 804 | self.momentum_velocity_ = [0.] * len(gparams) 805 | 806 | # Gradient clipping 807 | grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), gparams))) 808 | not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm)) 809 | grad_norm = T.sqrt(grad_norm) 810 | scaling_num = rescale 811 | scaling_den = T.maximum(rescale, grad_norm) 812 | for n, (param, gparam) in enumerate(zip(params, gparams)): 813 | gparam = T.switch(not_finite, 0.1 * param, 814 | gparam * (scaling_num / scaling_den)) 815 | combination_coeff = 0.9 816 | minimum_grad = 1e-4 817 | old_square = self.running_square_[n] 818 | new_square = combination_coeff * old_square + ( 819 | 1. - combination_coeff) * T.sqr(gparam) 820 | old_avg = self.running_avg_[n] 821 | new_avg = combination_coeff * old_avg + ( 822 | 1. - combination_coeff) * gparam 823 | rms_grad = T.sqrt(new_square - new_avg ** 2) 824 | rms_grad = T.maximum(rms_grad, minimum_grad) 825 | velocity = self.momentum_velocity_[n] 826 | update_step = momentum * velocity - learning_rate * ( 827 | gparam / rms_grad) 828 | self.running_square_[n] = new_square 829 | self.running_avg_[n] = new_avg 830 | self.updates_storage_[n] = update_step 831 | self.momentum_velocity_[n] = update_step 832 | updates[param] = param + update_step 833 | 834 | return updates 835 | 836 | def get_sfg_updates(self, X_sym, y_sym, params, cost, 837 | learning_rate, momentum): 838 | gparams = T.grad(cost, params) 839 | updates = OrderedDict() 840 | from sfg import SFG 841 | if not hasattr(self, "sfg_"): 842 | self.count_ = theano.shared(0) 843 | self.slow_freq_ = 20 844 | self.sfg_ = SFG(params, gparams) 845 | 846 | slow_updates, fast_updates = self.sfg_.updates(self.learning_rate, 847 | self.momentum, 848 | epsilon=0.0001, 849 | momentum_clipping=None) 850 | for param in slow_updates.keys(): 851 | updates[param] = theano.ifelse.ifelse(T.eq(self.count_, 852 | self.slow_freq_ - 1), 853 | slow_updates[param], 854 | fast_updates[param]) 855 | updates[self.count_] = T.mod(self.count_ + 1, self.slow_freq_) 856 | return updates 857 | 858 | 859 | def init_linear_layer(input_size, output_size, random_state): 860 | W_values = np.asarray(random_state.uniform( 861 | low=-np.sqrt(6. / (input_size + output_size)), 862 | high=np.sqrt(6. / (input_size + output_size)), 863 | size=(input_size, output_size)), dtype=theano.config.floatX) 864 | W = theano.shared(value=W_values, name='W', borrow=True) 865 | b_values = np.zeros((output_size,), dtype=theano.config.floatX) 866 | b = theano.shared(value=b_values, name='b', borrow=True) 867 | params = [W, b] 868 | return params 869 | 870 | 871 | def build_linear_layer_from_params(params, input_variable): 872 | W, b = params 873 | output_variable = T.dot(input_variable, W) + b 874 | return output_variable, params 875 | 876 | 877 | def build_linear_layer(input_size, output_size, input_variable, random_state): 878 | params = init_linear_layer(input_size, output_size, random_state) 879 | return build_linear_layer_from_params(params, input_variable) 880 | 881 | 882 | def init_tanh_layer(input_size, output_size, random_state): 883 | W_values = np.asarray(random_state.uniform( 884 | low=-np.sqrt(6. / (input_size + output_size)), 885 | high=np.sqrt(6. / (input_size + output_size)), 886 | size=(input_size, output_size)), dtype=theano.config.floatX) 887 | W = theano.shared(value=W_values, name='W', borrow=True) 888 | b_values = np.zeros((output_size,), dtype=theano.config.floatX) 889 | b = theano.shared(value=b_values, name='b', borrow=True) 890 | params = [W, b] 891 | return params 892 | 893 | 894 | def build_tanh_layer_from_params(params, input_variable): 895 | W, b = params 896 | output_variable = T.tanh(T.dot(input_variable, W) + b) 897 | return output_variable, params 898 | 899 | 900 | def build_tanh_layer(input_size, output_size, input_variable, random_state): 901 | params = init_tanh_layer(input_size, output_size, random_state) 902 | return build_tanh_layer_from_params(params, input_variable) 903 | 904 | 905 | def build_relu_layer(input_size, output_size, input_variable, random_state): 906 | W_values = np.asarray(random_state.uniform( 907 | low=-np.sqrt(6. / (input_size + output_size)), 908 | high=np.sqrt(6. / (input_size + output_size)), 909 | size=(input_size, output_size)), dtype=theano.config.floatX) 910 | W = theano.shared(value=W_values, name='W', borrow=True) 911 | b_values = np.zeros((output_size,), dtype=theano.config.floatX) 912 | b = theano.shared(value=b_values, name='b', borrow=True) 913 | output_variable = relu(T.dot(input_variable, W) + b) 914 | params = [W, b] 915 | return output_variable, params 916 | 917 | 918 | def build_sigmoid_layer(input_size, output_size, input_variable, random_state): 919 | W_values = np.asarray(random_state.uniform( 920 | low=-np.sqrt(6. / (input_size + output_size)), 921 | high=np.sqrt(6. / (input_size + output_size)), 922 | size=(input_size, output_size)), dtype=theano.config.floatX) 923 | W = theano.shared(value=4 * W_values, name='W', borrow=True) 924 | b_values = np.zeros((output_size,), dtype=theano.config.floatX) 925 | b = theano.shared(value=b_values, name='b', borrow=True) 926 | output_variable = T.nnet.sigmoid(T.dot(input_variable, W) + b) 927 | params = [W, b] 928 | return output_variable, params 929 | 930 | 931 | def softmax_cost(y_hat_sym, y_sym): 932 | return -T.mean(T.log(y_hat_sym)[T.arange(y_sym.shape[0]), y_sym]) 933 | 934 | """ 935 | class FeedforwardNetwork(BaseNet, TrainingMixin): 936 | def __init__(self, hidden_layer_sizes=[500], batch_size=100, max_iter=1E3, 937 | learning_rate=0.01, momentum=0., learning_alg="sgd", 938 | activation="tanh", model_save_name="saved_model", 939 | save_frequency=100, random_seed=None): 940 | 941 | if random_seed is None or type(random_seed) is int: 942 | self.random_state = np.random.RandomState(random_seed) 943 | self.max_iter = int(max_iter) 944 | self.hidden_layer_sizes = hidden_layer_sizes 945 | self.batch_size = batch_size 946 | self.save_frequency = save_frequency 947 | self.model_save_name = model_save_name 948 | 949 | self.learning_rate = learning_rate 950 | self.momentum = momentum 951 | self.learning_alg = learning_alg 952 | if activation == "relu": 953 | self.feedforward_function = build_relu_layer 954 | elif activation == "tanh": 955 | self.feedforward_function = build_tanh_layer 956 | elif activation == "sigmoid": 957 | self.feedforward_function = build_sigmoid_layer 958 | else: 959 | raise ValueError("Value %s not understood for activation" 960 | % activation) 961 | 962 | def _setup_functions(self, X_sym, y_sym, layer_sizes): 963 | input_variable = X_sym 964 | params = [] 965 | for i, (input_size, output_size) in enumerate(zip(layer_sizes[:-1], 966 | layer_sizes[1:-1])): 967 | output_variable, layer_params = self.feedforward_function( 968 | input_size, output_size, input_variable, self.random_state) 969 | params.extend(layer_params) 970 | input_variable = output_variable 971 | 972 | output_variable, layer_params = build_linear_layer( 973 | layer_sizes[-2], layer_sizes[-1], input_variable, self.random_state) 974 | params.extend(layer_params) 975 | y_hat_sym = T.nnet.softmax(output_variable) 976 | cost = softmax_cost(y_hat_sym, y_sym) 977 | 978 | self.params_ = params 979 | 980 | if self.learning_alg == "sgd": 981 | updates = self.get_sgd_updats(X_sym, y_sym, params, cost, 982 | self.learning_rate, 983 | self.momentum) 984 | else: 985 | raise ValueError("Algorithm %s is not " 986 | "a valid argument for learning_alg!" 987 | % self.learning_alg) 988 | self.fit_function = theano.function( 989 | inputs=[X_sym, y_sym], outputs=cost, updates=updates) 990 | self.loss_function = theano.function( 991 | inputs=[X_sym, y_sym], outputs=cost) 992 | 993 | self.predict_function = theano.function( 994 | inputs=[X_sym], 995 | outputs=[y_hat_sym],) 996 | 997 | def partial_fit(self, X, y): 998 | return self.fit_function(X, y.astype('int32')) 999 | 1000 | def fit(self, X, y, valid_X=None, valid_y=None): 1001 | input_size = X.shape[1] 1002 | output_size = len(np.unique(y)) 1003 | X_sym = T.matrix('x') 1004 | y_sym = T.ivector('y') 1005 | self.layers_ = [] 1006 | self.layer_sizes_ = [input_size] 1007 | self.layer_sizes_.extend(self.hidden_layer_sizes) 1008 | self.layer_sizes_.append(output_size) 1009 | self.training_loss_ = [] 1010 | self.validation_loss_ = [] 1011 | 1012 | if not hasattr(self, 'fit_function'): 1013 | self._setup_functions(X_sym, y_sym, 1014 | self.layer_sizes_) 1015 | 1016 | batch_indices = list(range(0, X.shape[0], self.batch_size)) 1017 | if X.shape[0] != batch_indices[-1]: 1018 | batch_indices.append(X.shape[0]) 1019 | 1020 | best_valid_loss = np.inf 1021 | for itr in range(self.max_iter): 1022 | print("Starting pass %d through the dataset" % itr) 1023 | batch_bounds = list(zip(batch_indices[:-1], batch_indices[1:])) 1024 | # Random minibatches 1025 | self.random_state.shuffle(batch_bounds) 1026 | for start, end in batch_bounds: 1027 | self.partial_fit(X[start:end], y[start:end]) 1028 | current_train_loss = self.loss_function(X, y) 1029 | self.training_loss_.append(current_train_loss) 1030 | 1031 | if (itr % self.save_frequency) == 0 or (itr == self.max_iter): 1032 | f = open(self.model_save_name + "_snapshot.pkl", 'wb') 1033 | cPickle.dump(self, f, protocol=2) 1034 | f.close() 1035 | 1036 | if valid_X is not None: 1037 | current_valid_loss = self.loss_function(valid_X, valid_y) 1038 | self.validation_loss_.append(current_valid_loss) 1039 | print("Validation loss %f" % current_valid_loss) 1040 | # if we got the best validation score until now, save 1041 | if current_valid_loss < best_valid_loss: 1042 | best_valid_loss = current_valid_loss 1043 | f = open(self.model_save_name + "_best.pkl", 'wb') 1044 | cPickle.dump(self, f, protocol=2) 1045 | f.close() 1046 | return self 1047 | 1048 | def predict(self, X): 1049 | return np.argmax(self.predict_function(X), axis=1) 1050 | """ 1051 | 1052 | 1053 | def init_recurrent_conditional_lstm_layer(input_size, hidden_size, output_size, 1054 | random_state): 1055 | # input to LSTM 1056 | W_ = np.concatenate( 1057 | [np_rand((input_size, hidden_size), random_state), 1058 | np_rand((input_size, hidden_size), random_state), 1059 | np_rand((input_size, hidden_size), random_state), 1060 | np_rand((input_size, hidden_size), random_state)], 1061 | axis=1) 1062 | 1063 | W = theano.shared(W_, borrow=True) 1064 | 1065 | # LSTM to LSTM 1066 | U_ = np.concatenate( 1067 | [np_ortho((hidden_size, hidden_size), random_state), 1068 | np_ortho((hidden_size, hidden_size), random_state), 1069 | np_ortho((hidden_size, hidden_size), random_state), 1070 | np_ortho((hidden_size, hidden_size), random_state)], 1071 | axis=1) 1072 | 1073 | U = theano.shared(U_, borrow=True) 1074 | 1075 | # bias to LSTM 1076 | # TODO: Ilya init for biases... 1077 | b = shared_zeros((4 * hidden_size,)) 1078 | 1079 | # Context to LSTM 1080 | Wc = shared_rand((output_size, 4 * hidden_size), random_state) 1081 | 1082 | # attention: context to hidden 1083 | Wc_att = shared_ortho((output_size, output_size), random_state) 1084 | 1085 | # attention: LSTM to hidden 1086 | Wd_att = shared_rand((hidden_size, output_size), random_state) 1087 | 1088 | # attention: hidden bias 1089 | b_att = shared_zeros((output_size,)) 1090 | 1091 | # attention 1092 | U_att = shared_rand((output_size, 1), random_state) 1093 | c_att = shared_zeros((1,)) 1094 | 1095 | params = [W, U, b, Wc, Wc_att, Wd_att, b_att, U_att, c_att] 1096 | 1097 | return params 1098 | 1099 | 1100 | def build_recurrent_conditional_lstm_layer(input_size, hidden_size, output_size, 1101 | input_variable, mask, context, 1102 | context_mask, init_state, 1103 | init_memory, random_state, 1104 | one_step=False): 1105 | params = init_recurrent_conditional_lstm_layer(input_size, hidden_size, 1106 | output_size, random_state) 1107 | 1108 | return build_recurrent_conditional_lstm_layer_from_params(params, 1109 | input_variable, 1110 | mask, context, 1111 | context_mask, 1112 | init_state, 1113 | init_memory, 1114 | random_state, 1115 | one_step=one_step) 1116 | 1117 | 1118 | def build_recurrent_conditional_lstm_layer_from_params(params, input_variable, 1119 | mask, context, 1120 | context_mask, init_state, 1121 | init_memory, 1122 | random_state, 1123 | one_step=False): 1124 | [W, U, b, Wc, Wc_att, Wd_att, b_att, U_att, c_att] = params 1125 | 1126 | n_steps = input_variable.shape[0] 1127 | n_samples = input_variable.shape[1] 1128 | n_features = input_variable.shape[2] 1129 | 1130 | hidden_size = U.shape[0] 1131 | 1132 | # projected context 1133 | projected_context = T.dot(context, Wc_att) + b_att 1134 | 1135 | # projected input 1136 | x = T.dot(input_variable, W) + b 1137 | 1138 | def _slice(X, n, hidden_size): 1139 | # Function is needed because tensor size changes across calls to step? 1140 | if X.ndim == 3: 1141 | return X[:, :, n * hidden_size:(n + 1) * hidden_size] 1142 | return X[:, n * hidden_size:(n + 1) * hidden_size] 1143 | 1144 | def step(x_t, m, h_tm1, c_tm1, ctx_t, att, pctx_): 1145 | projected_state = T.dot(h_tm1, Wd_att) 1146 | pctx_ = T.tanh(pctx_ + projected_state[None, :, :]) 1147 | new_att = T.dot(pctx_, U_att) + c_att 1148 | new_att = new_att.reshape([new_att.shape[0], new_att.shape[1]]) 1149 | new_att = T.exp(new_att) * context_mask 1150 | new_att = new_att / new_att.sum(axis=0, keepdims=True) 1151 | # Current context 1152 | ctx_t = (context * new_att[:, :, None]).sum(axis=0) 1153 | 1154 | preactivation = T.dot(h_tm1, U) 1155 | preactivation += x_t 1156 | preactivation += T.dot(ctx_t, Wc) 1157 | 1158 | i_t = T.nnet.sigmoid(_slice(preactivation, 0, hidden_size)) 1159 | f_t = T.nnet.sigmoid(_slice(preactivation, 1, hidden_size)) 1160 | o_t = T.nnet.sigmoid(_slice(preactivation, 2, hidden_size)) 1161 | c_t = T.tanh(_slice(preactivation, 3, hidden_size)) 1162 | 1163 | c_t = f_t * c_tm1 + i_t * c_t 1164 | c_t = m[:, None] * c_t + (1. - m)[:, None] * c_tm1 1165 | h_t = o_t * T.tanh(c_t) 1166 | h_t = m[:, None] * h_t + (1. - m)[:, None] * h_tm1 1167 | return (h_t, c_t, ctx_t, new_att.T, projected_state, 1168 | i_t, f_t, o_t, preactivation) 1169 | 1170 | init_context = T.zeros((n_samples, context.shape[2]), 1171 | dtype=theano.config.floatX) 1172 | init_att = T.zeros((n_samples, context.shape[0]), 1173 | dtype=theano.config.floatX) 1174 | # Scan cannot handle batch sizes of 1? 1175 | # Unbroadcast can fix it... but still weird 1176 | #https://github.com/Theano/Theano/issues/1772 1177 | #init_context = T.unbroadcast(init_context, 0) 1178 | #init_att = T.unbroadcast(init_att, 0) 1179 | 1180 | if one_step: 1181 | rval = step(x, mask, init_state, init_memory, None, None, 1182 | projected_context) 1183 | else: 1184 | rval, _ = theano.scan(step, 1185 | sequences=[x, mask], 1186 | outputs_info=[init_state, init_memory, 1187 | init_context, init_att, 1188 | None, None, None, None, None], 1189 | non_sequences=[projected_context,], 1190 | n_steps=n_steps) 1191 | 1192 | #hidden = rval[0] 1193 | #state = rval[1] 1194 | #final_context = rval[2] 1195 | #final_att = rval[3] 1196 | return rval[:4], params 1197 | 1198 | 1199 | def init_recurrent_lstm_layer(input_size, hidden_size, output_size, 1200 | random_state): 1201 | # input to LSTM 1202 | W_ = np.concatenate( 1203 | [np_rand((input_size, hidden_size), random_state), 1204 | np_rand((input_size, hidden_size), random_state), 1205 | np_rand((input_size, hidden_size), random_state), 1206 | np_rand((input_size, hidden_size), random_state)], 1207 | axis=1) 1208 | 1209 | W = theano.shared(W_, borrow=True) 1210 | 1211 | # LSTM to LSTM 1212 | U_ = np.concatenate( 1213 | [np_ortho((hidden_size, hidden_size), random_state), 1214 | np_ortho((hidden_size, hidden_size), random_state), 1215 | np_ortho((hidden_size, hidden_size), random_state), 1216 | np_ortho((hidden_size, hidden_size), random_state)], 1217 | axis=1) 1218 | 1219 | U = theano.shared(U_, borrow=True) 1220 | 1221 | # bias to LSTM 1222 | b = shared_zeros((4 * hidden_size,)) 1223 | 1224 | params = [W, U, b] 1225 | return params 1226 | 1227 | 1228 | def build_recurrent_lstm_layer(input_size, hidden_size, output_size, 1229 | input_variable, mask, 1230 | random_state, one_step=False): 1231 | params = init_recurrent_lstm_layer(input_size, hidden_size, output_size, 1232 | random_state) 1233 | return build_recurrent_lstm_layer_from_params(params, input_variable, mask, 1234 | random_state, 1235 | one_step=one_step) 1236 | 1237 | 1238 | def build_recurrent_lstm_layer_from_params(params, input_variable, mask, 1239 | random_state, one_step=False): 1240 | [W, U, b] = params 1241 | 1242 | hidden_size = U.shape[0] 1243 | 1244 | n_steps = input_variable.shape[0] 1245 | n_samples = input_variable.shape[1] 1246 | n_features = input_variable.shape[2] 1247 | 1248 | def _slice(X, n, hidden_size): 1249 | # Function is needed because tensor size changes across calls to step? 1250 | if X.ndim == 3: 1251 | return X[:, :, n * hidden_size:(n + 1) * hidden_size] 1252 | return X[:, n * hidden_size:(n + 1) * hidden_size] 1253 | 1254 | def step(x_t, m, h_tm1, c_tm1): 1255 | preactivation = T.dot(h_tm1, U) 1256 | preactivation += x_t 1257 | preactivation += b 1258 | 1259 | i_t = T.nnet.sigmoid(_slice(preactivation, 0, hidden_size)) 1260 | f_t = T.nnet.sigmoid(_slice(preactivation, 1, hidden_size)) 1261 | o_t = T.nnet.sigmoid(_slice(preactivation, 2, hidden_size)) 1262 | c_t = T.tanh(_slice(preactivation, 3, hidden_size)) 1263 | 1264 | c_t = f_t * c_tm1 + i_t * c_t 1265 | c_t = m[:, None] * c_t + (1. - m)[:, None] * c_tm1 1266 | h_t = o_t * T.tanh(c_t) 1267 | h_t = m[:, None] * h_t + (1. - m)[:, None] * h_tm1 1268 | return h_t, c_t, i_t, f_t, o_t, preactivation 1269 | 1270 | # Scan cannot handle batch sizes of 1? 1271 | # Unbroadcast can fix it... but still weird 1272 | #https://github.com/Theano/Theano/issues/1772 1273 | init_hidden = T.zeros((n_samples, hidden_size)) 1274 | init_cell = T.zeros((n_samples, hidden_size)) 1275 | init_hidden = T.unbroadcast(init_hidden, 0) 1276 | init_cell = T.unbroadcast(init_cell, 0) 1277 | 1278 | x = T.dot(input_variable, W) + b 1279 | if one_step: 1280 | rval = step(x, mask, init_hidden, init_cell) 1281 | else: 1282 | rval, _ = theano.scan(step, 1283 | sequences=[x, mask], 1284 | outputs_info=[init_hidden, init_cell, 1285 | None, None, None, None], 1286 | n_steps=n_steps) 1287 | 1288 | hidden = rval[0] 1289 | return hidden, params 1290 | 1291 | 1292 | def recurrence_relation(size): 1293 | """ 1294 | Based on code from Shawn Tan 1295 | """ 1296 | 1297 | eye2 = T.eye(size + 2) 1298 | return T.eye(size) + eye2[2:, 1:-1] + eye2[2:, :-2] * (T.arange(size) % 2) 1299 | 1300 | 1301 | def path_probs(predict, y_sym): 1302 | """ 1303 | Based on code from Rakesh - blank is assumed to be highest class in y_sym 1304 | """ 1305 | pred_y = predict[:, y_sym] 1306 | rr = recurrence_relation(y_sym.shape[0]) 1307 | 1308 | def step(p_curr, p_prev): 1309 | return p_curr * T.dot(p_prev, rr) 1310 | 1311 | probabilities, _ = theano.scan( 1312 | step, 1313 | sequences=[pred_y], 1314 | outputs_info=[T.eye(y_sym.shape[0])[0]] 1315 | ) 1316 | return probabilities 1317 | 1318 | 1319 | def _epslog(X): 1320 | return T.cast(T.log(T.clip(X, 1E-12, 1E12)), theano.config.floatX) 1321 | 1322 | 1323 | def log_path_probs(y_hat_sym, y_sym): 1324 | """ 1325 | Based on code from Shawn Tan with calculations in log space 1326 | """ 1327 | pred_y = y_hat_sym[:, y_sym] 1328 | rr = recurrence_relation(y_sym.shape[0]) 1329 | 1330 | def step(logp_curr, logp_prev): 1331 | return logp_curr + _epslog(T.dot(T.exp(logp_prev), rr)) 1332 | 1333 | log_probs, _ = theano.scan( 1334 | step, 1335 | sequences=[_epslog(pred_y)], 1336 | outputs_info=[_epslog(T.eye(y_sym.shape[0])[0])] 1337 | ) 1338 | return log_probs 1339 | 1340 | 1341 | def ctc_cost(y_hat_sym, y_sym): 1342 | """ 1343 | Based on code from Shawn Tan 1344 | """ 1345 | forward_probs = path_probs(y_hat_sym, y_sym) 1346 | backward_probs = path_probs(y_hat_sym[::-1], y_sym[::-1])[::-1, ::-1] 1347 | probs = forward_probs * backward_probs / y_hat_sym[:, y_sym] 1348 | total_probs = T.sum(probs) 1349 | return -T.log(total_probs) 1350 | 1351 | 1352 | def log_ctc_cost(y_hat_sym, y_sym): 1353 | """ 1354 | Based on code from Shawn Tan with sum calculations in log space 1355 | """ 1356 | log_forward_probs = log_path_probs(y_hat_sym, y_sym) 1357 | log_backward_probs = log_path_probs( 1358 | y_hat_sym[::-1], y_sym[::-1])[::-1, ::-1] 1359 | log_probs = log_forward_probs + log_backward_probs - _epslog( 1360 | y_hat_sym[:, y_sym]) 1361 | log_probs = log_probs.flatten() 1362 | max_log = T.max(log_probs) 1363 | # Stable logsumexp 1364 | loss = max_log + T.log(T.sum(T.exp(log_probs - max_log))) 1365 | return -loss 1366 | 1367 | 1368 | def rnn_check_array(X, y=None): 1369 | if type(X) == np.ndarray and len(X.shape) == 2: 1370 | X = [X.astype(theano.config.floatX)] 1371 | elif type(X) == np.ndarray and len(X.shape) == 3: 1372 | X = X.astype(theano.config.floatX) 1373 | elif type(X) == list: 1374 | if type(X[0]) == np.ndarray and len(X[0].shape) == 2: 1375 | X = [x.astype(theano.config.floatX) for x in X] 1376 | else: 1377 | raise ValueError("X must be a 2D numpy array or an" 1378 | "iterable of 2D numpy arrays") 1379 | try: 1380 | X[0].shape[1] 1381 | except AttributeError: 1382 | raise ValueError("X must be a 2D numpy array or an" 1383 | "iterable of 2D numpy arrays") 1384 | 1385 | if y is not None: 1386 | if type(y) == np.ndarray and len(y.shape) == 1: 1387 | y = [y.astype('int32')] 1388 | elif type(y) == np.ndarray and len(y.shape) == 2: 1389 | y = y.astype('int32') 1390 | elif type(y) == list: 1391 | if type(y[0]) == np.ndarray and len(y[0].shape) == 1: 1392 | y = [yi.astype('int32') for yi in y] 1393 | elif type(y[0]) != np.ndarray: 1394 | y = [np.asarray(y).astype('int32')] 1395 | try: 1396 | y[0].shape[0] 1397 | except AttributeError: 1398 | raise ValueError("y must be an iterable of 1D numpy arrays") 1399 | return X, y 1400 | else: 1401 | # If y is not passed don't return it 1402 | return X 1403 | 1404 | 1405 | class RecurrentNetwork(BaseNet, TrainingMixin): 1406 | def __init__(self, hidden_layer_sizes=[100], max_iter=1E2, 1407 | learning_rate=0.01, momentum=0., learning_alg="sgd", 1408 | recurrent_activation="lstm", minibatch_size=1, 1409 | bidirectional=False, cost="softmax", save_frequency=10, 1410 | model_save_name="saved_model", random_seed=None, 1411 | input_checking=True): 1412 | if random_seed is None or type(random_seed) is int: 1413 | self.random_state = np.random.RandomState(random_seed) 1414 | self.learning_rate = learning_rate 1415 | self.learning_alg = learning_alg 1416 | self.momentum = momentum 1417 | self.bidirectional = bidirectional 1418 | self.cost = cost 1419 | self.hidden_layer_sizes = hidden_layer_sizes 1420 | self.max_iter = int(max_iter) 1421 | self.minibatch_size = minibatch_size 1422 | self.save_frequency = save_frequency 1423 | self.model_save_name = model_save_name 1424 | self.recurrent_activation = recurrent_activation 1425 | self.input_checking = input_checking 1426 | if recurrent_activation == "lstm": 1427 | self.recurrent_function = build_recurrent_lstm_layer 1428 | else: 1429 | raise ValueError("Value %s not understood for recurrent_activation" 1430 | % recurrent_activation) 1431 | 1432 | def _setup_functions(self, X_sym, y_sym, X_mask, y_mask, layer_sizes): 1433 | input_variable = X_sym 1434 | 1435 | # layer_sizes consists of input size, all hidden sizes, and output size 1436 | hidden_sizes = layer_sizes[1:-1] 1437 | # set these to stop pep8 vim plugin from complaining 1438 | input_size = None 1439 | output_size = None 1440 | for n in range(len(hidden_sizes)): 1441 | if (n - 1) < 0: 1442 | input_size = layer_sizes[0] 1443 | else: 1444 | if self.bidirectional: 1445 | # Accomodate for concatenated hiddens 1446 | input_size = 2 * output_size 1447 | else: 1448 | input_size = output_size 1449 | hidden_size = hidden_sizes[n] 1450 | if (n + 1) != len(hidden_sizes): 1451 | output_size = hidden_sizes[n + 1] 1452 | else: 1453 | output_size = layer_sizes[-1] 1454 | 1455 | forward_hidden, forward_params = self.recurrent_function( 1456 | input_size, hidden_size, output_size, input_variable, X_mask, 1457 | self.random_state) 1458 | 1459 | if self.bidirectional: 1460 | backward_hidden, backward_params = self.recurrent_function( 1461 | input_size, hidden_size, output_size, input_variable[::-1], 1462 | X_mask[::-1], self.random_state) 1463 | params = forward_params + backward_params 1464 | input_variable = concatenate( 1465 | [forward_hidden, backward_hidden[::-1]], 1466 | axis=forward_hidden.ndim - 1) 1467 | else: 1468 | params = forward_params 1469 | input_variable = forward_hidden 1470 | 1471 | if self.bidirectional: 1472 | # Accomodate for concatenated hiddens 1473 | sz = 2 * hidden_sizes[-1] 1474 | else: 1475 | sz = hidden_sizes[-1] 1476 | 1477 | if self.cost == "softmax": 1478 | # easy mode 1479 | output, output_params = build_linear_layer(sz, output_size, 1480 | input_variable, 1481 | self.random_state) 1482 | params = params + output_params 1483 | shp = output.shape 1484 | output = output.reshape([shp[0] * shp[1], shp[2]]) 1485 | y_hat_sym = T.nnet.softmax(output) 1486 | y_sym_reshaped = y_sym.reshape([shp[0] * shp[1], shp[2]]) 1487 | cost = -T.mean((y_sym_reshaped * T.log(y_hat_sym)).sum(axis=1)) 1488 | 1489 | elif self.cost == "encdec": 1490 | # hardmode 1491 | context = input_variable 1492 | context_mean = context[0] 1493 | 1494 | init_state, state_params = build_tanh_layer(sz, hidden_sizes[-1], 1495 | context_mean, 1496 | self.random_state) 1497 | init_memory, memory_params = build_tanh_layer(sz, hidden_sizes[-1], 1498 | context_mean, 1499 | self.random_state) 1500 | # partial sampler setup 1501 | self._encode = theano.function([X_sym, X_mask], 1502 | [init_state, init_memory, context]) 1503 | init_state_sampler = T.matrix() 1504 | init_memory_sampler = T.matrix() 1505 | y_sw_sampler = T.tensor3() 1506 | y_sw_mask = T.alloc(1., y_sw_sampler.shape[0], 1) 1507 | 1508 | # need this style of init to reuse params for sampler and actual 1509 | # training. This makes this part quite nasty - dictionary 1510 | # for initialization and params is making more and more sense. 1511 | # conditional params will be reused below 1512 | conditional_params = init_recurrent_conditional_lstm_layer( 1513 | output_size, hidden_sizes[-1], sz, self.random_state) 1514 | 1515 | rval, _p = build_recurrent_conditional_lstm_layer_from_params( 1516 | conditional_params, y_sw_sampler, y_sw_mask, context, X_mask, 1517 | init_state_sampler, init_memory_sampler, 1518 | self.random_state, one_step=True) 1519 | next_state, next_memory, sampler_contexts, _ = rval 1520 | #end sampler parts... for now 1521 | 1522 | params = params + state_params + memory_params 1523 | shifted_labels = T.zeros_like(y_sym) 1524 | shifted_labels = T.set_subtensor(shifted_labels[1:], y_sym[:-1]) 1525 | y_sym = shifted_labels 1526 | 1527 | rval, _p = build_recurrent_conditional_lstm_layer_from_params( 1528 | conditional_params, shifted_labels, y_mask, context, X_mask, 1529 | init_state, init_memory, self.random_state) 1530 | projected_hidden, _, contexts, attention = rval 1531 | 1532 | params = params + conditional_params 1533 | 1534 | # once again, need to use same params for sample gen 1535 | lh_params = init_linear_layer(hidden_sizes[-1], output_size, 1536 | self.random_state) 1537 | logit_hidden, _ = build_linear_layer_from_params(lh_params, 1538 | projected_hidden) 1539 | params = params + lh_params 1540 | 1541 | lo_params = init_linear_layer(output_size, output_size, 1542 | self.random_state) 1543 | logit_out, _ = build_linear_layer_from_params(lo_params, 1544 | y_sym) 1545 | params = params + lo_params 1546 | 1547 | 1548 | lc_params = init_linear_layer(sz, output_size, 1549 | self.random_state) 1550 | logit_contexts, _ = build_linear_layer_from_params(lc_params, 1551 | contexts) 1552 | params = params + lc_params 1553 | 1554 | logit = T.tanh(logit_hidden + logit_out + logit_contexts) 1555 | output_params = init_linear_layer(output_size, output_size, 1556 | self.random_state) 1557 | output, _ = build_linear_layer_from_params(output_params, 1558 | logit) 1559 | params = params + output_params 1560 | 1561 | shp = output.shape 1562 | output = output.reshape([shp[0] * shp[1], shp[2]]) 1563 | y_hat_sym = T.nnet.softmax(output) 1564 | 1565 | # Need to apply mask so that cost isn't punished 1566 | y_sym_reshaped = (y_sym * y_mask.dimshuffle(0, 1, 'x')).reshape( 1567 | [shp[0] * shp[1], shp[2]]) 1568 | y_sym_reshaped = y_sym.reshape([shp[0] * shp[1], shp[2]]) 1569 | cost = -T.mean((y_sym_reshaped * T.log(y_hat_sym)).sum(axis=1)) 1570 | 1571 | # Finish sampler 1572 | logit_sampler_hidden, _ = build_linear_layer_from_params(lh_params, 1573 | next_state) 1574 | logit_sampler_out, _ = build_linear_layer_from_params(lo_params, 1575 | y_sw_sampler) 1576 | logit_sampler_contexts, _ = build_linear_layer_from_params( 1577 | lc_params, sampler_contexts) 1578 | logit_sampler = T.tanh(logit_sampler_hidden + logit_sampler_out 1579 | + logit_sampler_contexts) 1580 | output_sampler, _ = build_linear_layer_from_params(output_params, 1581 | logit_sampler) 1582 | shp = output_sampler.shape 1583 | output_sampler = output_sampler.reshape([shp[0] * shp[1], shp[2]]) 1584 | y_hat_sampler = T.nnet.softmax(output_sampler) 1585 | self._sampler_step = theano.function( 1586 | [y_sw_sampler, context, X_mask, init_state_sampler, 1587 | init_memory_sampler], 1588 | [y_hat_sampler, next_state, next_memory]) 1589 | 1590 | else: 1591 | raise ValueError("Value of %s not a valid cost!" 1592 | % self.cost) 1593 | 1594 | self.params_ = params 1595 | 1596 | if self.learning_alg == "sgd": 1597 | updates = self.get_clip_sgd_updates( 1598 | X_sym, y_sym, params, cost, self.learning_rate, self.momentum) 1599 | elif self.learning_alg == "rmsprop": 1600 | updates = self.get_clip_rmsprop_updates( 1601 | X_sym, y_sym, params, cost, self.learning_rate, self.momentum) 1602 | elif self.learning_alg == "sfg": 1603 | updates = self.get_sfg_updates( 1604 | X_sym, y_sym, params, cost, self.learning_rate, self.momentum) 1605 | else: 1606 | raise ValueError("Value of %s not a valid learning_alg!" 1607 | % self.learning_alg) 1608 | 1609 | if self.cost == "softmax": 1610 | self.fit_function = theano.function(inputs=[X_sym, y_sym, X_mask, 1611 | y_mask], 1612 | outputs=cost, 1613 | updates=updates, 1614 | on_unused_input="ignore") 1615 | 1616 | self.loss_function = theano.function(inputs=[X_sym, y_sym, X_mask, 1617 | y_mask], 1618 | outputs=cost, 1619 | on_unused_input="ignore") 1620 | 1621 | self.predict_function = theano.function( 1622 | inputs=[X_sym, X_mask], 1623 | outputs=y_hat_sym, 1624 | on_unused_input="ignore") 1625 | 1626 | else: 1627 | self.fit_function = theano.function(inputs=[X_sym, y_sym, X_mask, 1628 | y_mask], 1629 | outputs=cost, 1630 | updates=updates, 1631 | on_unused_input="warn") 1632 | 1633 | self.loss_function = theano.function(inputs=[X_sym, y_sym, X_mask, 1634 | y_mask], 1635 | outputs=cost, 1636 | on_unused_input="warn") 1637 | 1638 | self.predict_function = theano.function( 1639 | inputs=[X_sym, X_mask, y_sym, y_mask], 1640 | outputs=y_hat_sym) 1641 | 1642 | 1643 | def fit(self, X, y, valid_X=None, valid_y=None): 1644 | if self.input_checking: 1645 | X, y = rnn_check_array(X, y) 1646 | input_size = X[0].shape[1] 1647 | # Assume that class values are sequential! and start from 0 1648 | highest_class = np.max([np.max(d) for d in y]) 1649 | lowest_class = np.min([np.min(d) for d in y]) 1650 | if lowest_class != 0: 1651 | raise ValueError("Labels must start from 0!") 1652 | # Create a list of all classes, then get uniques 1653 | # sum(lists, []) is list concatenation 1654 | all_classes = np.unique(sum([list(np.unique(d)) for d in y], [])) 1655 | # +1 to include endpoint 1656 | output_size = len(np.arange(lowest_class, highest_class + 1)) 1657 | X_sym = T.tensor3('x') 1658 | y_sym = T.tensor3('y') 1659 | X_mask = T.matrix('x_mask') 1660 | y_mask = T.matrix('y_mask') 1661 | 1662 | self.layers_ = [] 1663 | self.layer_sizes_ = [input_size] 1664 | self.layer_sizes_.extend(self.hidden_layer_sizes) 1665 | self.layer_sizes_.append(output_size) 1666 | if not hasattr(self, 'fit_function'): 1667 | print("Building model!") 1668 | self._setup_functions(X_sym, y_sym, X_mask, y_mask, 1669 | self.layer_sizes_) 1670 | self.training_loss_ = [] 1671 | if valid_X is not None: 1672 | self.validation_loss_ = [] 1673 | if self.input_checking: 1674 | valid_X, valid_y = rnn_check_array(valid_X, valid_y) 1675 | for vy in valid_y: 1676 | if not np.in1d(np.unique(vy), all_classes).all(): 1677 | raise ValueError( 1678 | "Validation set contains classes not in training" 1679 | "set! Training set classes: %s\n, Validation set \ 1680 | classes: %s" % (all_classes, np.unique(vy))) 1681 | 1682 | best_valid_loss = np.inf 1683 | for itr in range(self.max_iter): 1684 | print("Starting pass %d through the dataset" % itr) 1685 | total_train_loss = 0 1686 | for i, j in minibatch_indices(X, self.minibatch_size): 1687 | X_n, y_n, X_mask, y_mask = make_minibatch(X[i:j], y[i:j], 1688 | output_size) 1689 | train_loss = self.fit_function(X_n, y_n, X_mask, y_mask) 1690 | total_train_loss += train_loss 1691 | current_train_loss = total_train_loss / len(X) 1692 | print("Training loss %f" % current_train_loss) 1693 | self.training_loss_.append(current_train_loss) 1694 | 1695 | if (itr % self.save_frequency) == 0 or (itr == self.max_iter): 1696 | f = open(self.model_save_name + "_snapshot.pkl", 'wb') 1697 | cPickle.dump(self, f, protocol=2) 1698 | f.close() 1699 | 1700 | if valid_X is not None: 1701 | total_valid_loss = 0 1702 | for i, j in minibatch_indices(valid_X, self.minibatch_size): 1703 | valid_X_n, valid_y_n, X_mask, y_mask = make_minibatch( 1704 | valid_X[i:j], valid_y[i:j], output_size) 1705 | valid_loss = self.loss_function(valid_X_n, valid_y_n, 1706 | X_mask, y_mask) 1707 | total_valid_loss += valid_loss 1708 | current_valid_loss = total_valid_loss / len(valid_X) 1709 | print("Validation loss %f" % current_valid_loss) 1710 | self.validation_loss_.append(current_valid_loss) 1711 | if current_valid_loss < best_valid_loss: 1712 | best_valid_loss = current_valid_loss 1713 | f = open(self.model_save_name + "_best.pkl", 'wb') 1714 | cPickle.dump(self, f, protocol=2) 1715 | f.close() 1716 | 1717 | 1718 | def predict(self, X): 1719 | raise ValueError("Not yet implemented!") 1720 | X = rnn_check_array(X) 1721 | predictions = [] 1722 | for n in range(len(X)): 1723 | X_n = X[n][None].transpose(1, 0, 2) 1724 | X_mask = np.ones((len(X_n), 1)).astype(theano.config.floatX) 1725 | pred = np.argmax(self.predict_function(X_n, X_mask)[0], axis=1) 1726 | predictions.append(pred) 1727 | return predictions 1728 | 1729 | def predict_proba(self, X): 1730 | raise ValueError("Not yet implemented!") 1731 | X = rnn_check_array(X) 1732 | predictions = [] 1733 | for n in range(len(X)): 1734 | X_n = X[n][None].transpose(1, 0, 2) 1735 | X_mask = np.ones((len(X_n), 1)).astype(theano.config.floatX) 1736 | pred = self.predict_function(X_n, X_mask)[0] 1737 | predictions.append(pred) 1738 | return predictions 1739 | -------------------------------------------------------------------------------- /tests/bi_multilayer_softmax.py: -------------------------------------------------------------------------------- 1 | from net import RecurrentNetwork 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Test adapted from Mohammad P 6 | # https://github.com/mohammadpz/Recurrent-Neural-Networks 7 | 8 | n_u = 2 9 | n_h = 6 10 | n_y = 3 11 | time_steps = 10 12 | n_seq = 100 13 | # n_y is equal to the number of calsses 14 | random_state = np.random.RandomState(1999) 15 | 16 | seq = random_state.randn(n_seq, time_steps, n_u) 17 | targets = np.zeros((n_seq, time_steps), dtype=np.int32) 18 | 19 | thresh = 0.5 20 | targets[:, 2:][seq[:, 1:-1, 1] > seq[:, :-2, 0] + thresh] = 1 21 | targets[:, 2:][seq[:, 1:-1, 1] < seq[:, :-2, 0] - thresh] = 2 22 | 23 | clf = RecurrentNetwork(learning_alg="sgd", hidden_layer_sizes=[n_h, n_h], 24 | max_iter=1E3, cost="softmax", learning_rate=0.1, 25 | momentum=0.99, recurrent_activation="lstm", 26 | bidirectional=True, random_seed=1999) 27 | 28 | clf.fit(seq, targets) 29 | 30 | plt.close('all') 31 | fig = plt.figure() 32 | plt.grid() 33 | ax1 = plt.subplot(211) 34 | 35 | plt.scatter(np.arange(time_steps), targets[1], marker='o', c='b') 36 | plt.grid() 37 | 38 | guess = clf.predict_proba(seq[1]) 39 | guessed_probs = plt.imshow(guess[0].T, interpolation='nearest', cmap='gray') 40 | ax1.set_title('blue points: true class, grayscale: model output (white mean class)') 41 | 42 | ax2 = plt.subplot(212) 43 | plt.plot(clf.training_loss_) 44 | plt.grid() 45 | ax2.set_title('Training loss') 46 | plt.show() 47 | -------------------------------------------------------------------------------- /tests/bi_softmax.py: -------------------------------------------------------------------------------- 1 | from net import RecurrentNetwork 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Test adapted from Mohammad P 6 | # https://github.com/mohammadpz/Recurrent-Neural-Networks 7 | 8 | n_u = 2 9 | n_h = 6 10 | n_y = 3 11 | time_steps = 10 12 | n_seq = 100 13 | # n_y is equal to the number of calsses 14 | random_state = np.random.RandomState(1999) 15 | 16 | seq = random_state.randn(n_seq, time_steps, n_u) 17 | targets = np.zeros((n_seq, time_steps), dtype=np.int32) 18 | 19 | thresh = 0.5 20 | targets[:, 2:][seq[:, 1:-1, 1] > seq[:, :-2, 0] + thresh] = 1 21 | targets[:, 2:][seq[:, 1:-1, 1] < seq[:, :-2, 0] - thresh] = 2 22 | 23 | clf = RecurrentNetwork(learning_alg="sgd", hidden_layer_sizes=[n_h], 24 | max_iter=1E3, cost="softmax", learning_rate=0.1, 25 | momentum=0.9, bidirectional=True, 26 | recurrent_activation="lstm", random_seed=1999) 27 | 28 | clf.fit(seq, targets) 29 | 30 | plt.close('all') 31 | fig = plt.figure() 32 | plt.grid() 33 | ax1 = plt.subplot(211) 34 | 35 | plt.scatter(np.arange(time_steps), targets[1], marker='o', c='b') 36 | plt.grid() 37 | 38 | guess = clf.predict_proba(seq[1]) 39 | guessed_probs = plt.imshow(guess[0].T, interpolation='nearest', cmap='gray') 40 | ax1.set_title('blue points: true class, grayscale: model output (white mean class)') 41 | 42 | ax2 = plt.subplot(212) 43 | plt.plot(clf.training_loss_) 44 | plt.grid() 45 | ax2.set_title('Training loss') 46 | plt.show() 47 | -------------------------------------------------------------------------------- /tests/multilayer_softmax.py: -------------------------------------------------------------------------------- 1 | from net import RecurrentNetwork 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Test adapted from Mohammad P 6 | # https://github.com/mohammadpz/Recurrent-Neural-Networks 7 | 8 | n_u = 2 9 | n_h = 6 10 | n_y = 3 11 | time_steps = 10 12 | n_seq = 100 13 | # n_y is equal to the number of calsses 14 | random_state = np.random.RandomState(1999) 15 | 16 | seq = random_state.randn(n_seq, time_steps, n_u) 17 | targets = np.zeros((n_seq, time_steps), dtype=np.int32) 18 | 19 | thresh = 0.5 20 | targets[:, 2:][seq[:, 1:-1, 1] > seq[:, :-2, 0] + thresh] = 1 21 | targets[:, 2:][seq[:, 1:-1, 1] < seq[:, :-2, 0] - thresh] = 2 22 | 23 | clf = RecurrentNetwork(learning_alg="sgd", hidden_layer_sizes=[n_h, n_h], 24 | max_iter=1E3, cost="softmax", learning_rate=0.1, 25 | momentum=0.99, recurrent_activation="lstm", 26 | random_seed=1999) 27 | 28 | clf.fit(seq, targets) 29 | 30 | plt.close('all') 31 | fig = plt.figure() 32 | plt.grid() 33 | ax1 = plt.subplot(211) 34 | 35 | plt.scatter(np.arange(time_steps), targets[1], marker='o', c='b') 36 | plt.grid() 37 | 38 | guess = clf.predict_proba(seq[1]) 39 | guessed_probs = plt.imshow(guess[0].T, interpolation='nearest', cmap='gray') 40 | ax1.set_title('blue points: true class, grayscale: model output (white mean class)') 41 | 42 | ax2 = plt.subplot(212) 43 | plt.plot(clf.training_loss_) 44 | plt.grid() 45 | ax2.set_title('Training loss') 46 | plt.show() 47 | -------------------------------------------------------------------------------- /tests/net.py: -------------------------------------------------------------------------------- 1 | ../net.py -------------------------------------------------------------------------------- /tests/predict.py: -------------------------------------------------------------------------------- 1 | from net import RecurrentNetwork 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Test adapted from Mohammad P 6 | # https://github.com/mohammadpz/Recurrent-Neural-Networks 7 | 8 | n_u = 2 9 | n_h = 6 10 | n_y = 3 11 | time_steps = 10 12 | n_seq = 100 13 | # n_y is equal to the number of calsses 14 | random_state = np.random.RandomState(1999) 15 | 16 | seq = random_state.randn(n_seq, time_steps, n_u) 17 | targets = np.zeros((n_seq, time_steps), dtype=np.int32) 18 | 19 | thresh = 0.5 20 | targets[:, 2:][seq[:, 1:-1, 1] > seq[:, :-2, 0] + thresh] = 1 21 | targets[:, 2:][seq[:, 1:-1, 1] < seq[:, :-2, 0] - thresh] = 2 22 | 23 | clf = RecurrentNetwork(learning_alg="sgd", hidden_layer_sizes=[n_h], 24 | max_iter=10, cost="softmax", learning_rate=0.1, 25 | momentum=0.99, recurrent_activation="lstm", 26 | random_seed=1999) 27 | 28 | clf.fit(seq, targets) 29 | clf.predict(seq) 30 | 31 | plt.close('all') 32 | fig = plt.figure() 33 | plt.grid() 34 | ax1 = plt.subplot(211) 35 | 36 | plt.scatter(np.arange(time_steps), targets[1], marker='o', c='b') 37 | plt.grid() 38 | 39 | guess = clf.predict_proba(seq[1]) 40 | guessed_probs = plt.imshow(guess[0].T, interpolation='nearest', cmap='gray') 41 | ax1.set_title('blue points: true class, grayscale: model output (white mean class)') 42 | 43 | ax2 = plt.subplot(212) 44 | plt.plot(clf.training_loss_) 45 | plt.grid() 46 | ax2.set_title('Training loss') 47 | plt.show() 48 | -------------------------------------------------------------------------------- /tests/softmax.py: -------------------------------------------------------------------------------- 1 | from net import RecurrentNetwork 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Test adapted from Mohammad P 6 | # https://github.com/mohammadpz/Recurrent-Neural-Networks 7 | 8 | n_u = 2 9 | n_h = 6 10 | n_y = 3 11 | time_steps = 10 12 | n_seq = 100 13 | # n_y is equal to the number of classes 14 | random_state = np.random.RandomState(1999) 15 | 16 | seq = random_state.randn(n_seq, time_steps, n_u) 17 | targets = np.zeros((n_seq, time_steps), dtype=np.int32) 18 | 19 | thresh = 0.5 20 | targets[:, 2:][seq[:, 1:-1, 1] > seq[:, :-2, 0] + thresh] = 1 21 | targets[:, 2:][seq[:, 1:-1, 1] < seq[:, :-2, 0] - thresh] = 2 22 | 23 | clf = RecurrentNetwork(learning_alg="sgd", hidden_layer_sizes=[n_h], 24 | max_iter=1E3, cost="softmax", learning_rate=0.1, 25 | momentum=0.99, recurrent_activation="lstm", 26 | random_seed=1999) 27 | 28 | clf.fit(seq, targets) 29 | 30 | plt.close('all') 31 | fig = plt.figure() 32 | plt.grid() 33 | ax1 = plt.subplot(211) 34 | 35 | plt.scatter(np.arange(time_steps), targets[1], marker='o', c='b') 36 | plt.grid() 37 | 38 | guess = clf.predict_proba(seq[1]) 39 | guessed_probs = plt.imshow(guess[0].T, interpolation='nearest', cmap='gray') 40 | ax1.set_title('blue points: true class, grayscale: model output (white mean class)') 41 | 42 | ax2 = plt.subplot(212) 43 | plt.plot(clf.training_loss_) 44 | plt.grid() 45 | ax2.set_title('Training loss') 46 | plt.show() 47 | -------------------------------------------------------------------------------- /util/continue_model_mnist.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import time 3 | import os 4 | from net import load_data 5 | 6 | datasets = load_data('mnist.pkl.gz') 7 | train_set_x, train_set_y = datasets[0] 8 | valid_set_x, valid_set_y = datasets[1] 9 | test_set_x, test_set_y = datasets[2] 10 | 11 | f = open('model.save', 'rb') 12 | classifier = cPickle.load(f) 13 | print '... training' 14 | start_time = time.clock() 15 | classifier.fit(train_set_x, train_set_y, valid_set_x, valid_set_y) 16 | end_time = time.clock() 17 | print('The code for file ' + os.path.split(__file__)[1] + 18 | ' ran for %.2fm' % ((end_time - start_time) / 60.)) 19 | -------------------------------------------------------------------------------- /util/load_model.py: -------------------------------------------------------------------------------- 1 | try: 2 | import cPickle 3 | except ImportError: 4 | import pickle as cPickle 5 | import sys 6 | 7 | f = open(sys.argv[1], 'rb') 8 | clf = cPickle.load(f) 9 | 10 | from IPython import embed; embed() 11 | --------------------------------------------------------------------------------