├── .ipynb_checkpoints
    ├── Atomic Experiments in Deep Learning-checkpoint.ipynb
    ├── Deep Networks on UCI Datasets-checkpoint.ipynb
    ├── What Can an RNN Predict-checkpoint.ipynb
    ├── experiments-checkpoint.py
    ├── ise-checkpoint.csv
    ├── uci_utils-checkpoint.py
    └── utils-checkpoint.py
├── Atomic Experiments in Deep Learning.html
├── Atomic Experiments in Deep Learning.ipynb
├── Deep Networks on UCI Datasets.html
├── Deep Networks on UCI Datasets.ipynb
├── What Can an RNN Predict.ipynb
├── __pycache__
    ├── experiments.cpython-35.pyc
    ├── uci_utils.cpython-35.pyc
    └── utils.cpython-35.pyc
├── experiments.py
├── ise.csv
├── uci_utils.py
└── utils.py


/.ipynb_checkpoints/experiments-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import time    
  4 | import warnings
  5 | from utils import *
  6 | from sklearn.neural_network import MLPClassifier
  7 | from sklearn.decomposition import PCA
  8 | 
  9 | '''
 10 |  All of the Experiments, starting from a base class
 11 | '''
 12 | class Experiment():
 13 |     def __init__(self):
 14 |         pass
 15 |     
 16 |     #ideally, this would make experiments completely reproducible, but because jobs are distributed over multiple cores, small differences may persist in practice
 17 |     def initialize(self, seed=0, fix_seed=True):
 18 |         if fix_seed:
 19 |             np.random.seed(seed)
 20 |             tf.set_random_seed(seed)
 21 |         self.timer = Timer()
 22 |         self.timer.start()
 23 | 
 24 |     def conclude(self):
 25 |         self.timer.end_and_print()
 26 | '''
 27 | Experiment 1: Why do we use neural networks?
 28 | Description: Performs regression using a neural network with 1 hidden layer and different number of units. Returns the original x-values, true y-values, and predicted y-values, along with the MSE loss.
 29 | '''
 30 | class Experiment1(Experiment):
 31 |     def __init__(self):
 32 |         pass
 33 |         
 34 |     def run(self,
 35 |             n_hidden = 2,
 36 |             learning_rate = 0.003,
 37 |             num_steps = 10000,
 38 |             num_values = 100,
 39 |             function = sin(omega=6),
 40 |             verbose=True):
 41 |         
 42 |         
 43 |         x_values = np.linspace(-1,1, num_values).reshape(-1,1)
 44 |         y_values = function(x_values).reshape(-1,1)
 45 | 
 46 |         tf.reset_default_graph()
 47 |         x = tf.placeholder(dtype="float", shape=[None,1])
 48 |         y = tf.placeholder(dtype="float", shape=[None,1])
 49 |         y_ = multilayer_perceptron(x, num_nodes=[n_hidden])
 50 | 
 51 |         loss_op = tf.reduce_mean(tf.square(y_ - y))
 52 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
 53 |         train_op = optimizer.minimize(loss_op)
 54 |         init_op = tf.global_variables_initializer()
 55 | 
 56 |         with tf.Session() as sess:
 57 |             sess.run(init_op)
 58 |             y_preds = list()
 59 |             for step in range(num_steps):
 60 |                 _, loss, y_pred = sess.run([train_op, loss_op, y_], feed_dict={x:x_values,y:y_values})
 61 |                 if (step%(num_steps/10)==0 and verbose):
 62 |                     print(loss)
 63 |                     y_preds.append(y_pred.squeeze())
 64 | 
 65 |         return x_values.squeeze(), y_values.squeeze(), y_pred.squeeze(), loss
 66 | 
 67 | '''
 68 | Experiment 2: Why are Deeper Networks Better?
 69 | '''
 70 | class Experiment2(Experiment):
 71 |     def __init__(self):
 72 |         pass
 73 |         
 74 |     def run(self,
 75 |             n=16, 
 76 |             n_hidden=[10],
 77 |             num_steps=15000,
 78 |             learning_rate = 0.003,
 79 |             verbose=False,
 80 |             recurrent=True):
 81 |         
 82 |         
 83 |         x_values = np.linspace(0,1-1/n,n).reshape(-1,1)
 84 |         y_values = np.resize([[0,1],[1,0]], (n,2))
 85 |         
 86 |         tf.reset_default_graph()
 87 |         x = tf.placeholder(dtype="float", shape=[None,1])
 88 |         y = tf.placeholder(dtype="float", shape=[None,2])
 89 | 
 90 |         if recurrent:
 91 |             y_ = recurrent_multilayer_perceptron(x, num_input=1, num_output=2, num_nodes=n_hidden,activation=tf.nn.relu)
 92 |         else:
 93 |             y_ = multilayer_perceptron(x, num_input=1, num_output=2, num_nodes=n_hidden,bias=bias,activation=tf.nn.relu)
 94 | 
 95 |         
 96 |         n_params = np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.global_variables()])        
 97 |         #show_graph(tf.get_default_graph().as_graph_def())
 98 |         loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
 99 |         correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
100 |         accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
101 |         
102 |         #loss_op = tf.reduce_mean(tf.square(y_ - y))
103 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
104 |         train_op = optimizer.minimize(loss_op)
105 |         init_op = tf.global_variables_initializer()
106 | 
107 |         with tf.Session() as sess:
108 |             sess.run(init_op)
109 |             for step in range(num_steps):
110 |                 x_batch, y_batch = random_batch(x_values, y_values)
111 |                 _, loss, y_pred = sess.run([train_op, loss_op, y_], feed_dict={x:x_batch,y:y_batch})
112 |                 if (step%(num_steps/10)==0 and verbose):
113 |                     print(loss)
114 |         
115 |             accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:x_values,y:y_values})
116 | 
117 |         return x_values.squeeze(), y_values.squeeze(), y_pred.squeeze(), loss, accuracy, n_params
118 | 
119 |     
120 | '''
121 | Experiment 3: Does More Data Favor Deeper Neural Networks?
122 | '''
123 | class Experiment3(Experiment):
124 |     def __init__(self):
125 |         pass
126 |         
127 |     def run(self,
128 |             classifiers, 
129 |             d = 12,
130 |             class_seps = [1],
131 |             ns = np.logspace(2,4,10),
132 |             iters = 3,
133 |             covariance_scale = 1,
134 |             test_size = 0.2,
135 |             accuracy_on = 'test',
136 |             recurrent=True):        
137 |         
138 |         acc = np.zeros((len(ns),len(classifiers),iters))
139 |         n_max = int(np.max(ns))
140 | 
141 |         for k in range(iters):
142 |         
143 |             X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n_max, 
144 |                                                                                      d=d,
145 |                                                                                      class_seps=class_seps, 
146 |                                                                                      covariance_scale=covariance_scale, 
147 |                                                                                      test_size=test_size)
148 |             for i, n in enumerate(ns):
149 |                 for j, clf in enumerate(classifiers):
150 |                     with warnings.catch_warnings():
151 |                         warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge                     
152 |                         n_train = int(n*(1-test_size))
153 |                         clf.fit(X_train[:n_train],y_train[:n_train]) #choose a subset of the training data
154 |                         if accuracy_on=='train':
155 |                             acc[i,j,k] = clf.score(X_train[:int(n*(1-test_size))],y_train[:int(n*(1-test_size))])
156 |                         elif accuracy_on=='test':
157 |                             acc[i,j,k] = clf.score(X_test,y_test)
158 |                         else:
159 |                             raise ValueError("accuracy_on must be 'test' or 'train'") 
160 | 
161 |         return acc
162 | 
163 | 
164 | '''
165 | Experiment 4: Does Unbalanced Data Hurt Neural Networks?
166 | '''
167 | class Experiment4(Experiment):
168 |    
169 |     def __init__(self):
170 |         pass
171 |         
172 |     def run(self,
173 |             d = 12,
174 |             iters = 3,
175 |             covariance_scale = 1,
176 |             test_size = 0.2,
177 |             resample=False,
178 |             n = 1200,
179 |             num_steps=500,
180 |             learning_rate = 0.003,
181 |             verbose=False,
182 |             load_covs = None,
183 |             classify_grid = False,
184 |             hidden_layer_sizes=(100,100),
185 |             ratios = [1]):
186 |         
187 |         acc_matrix = np.zeros((len(ratios),iters))
188 |         class_seps = [1/(i+1) for i in range(d)]
189 |         clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes)
190 |         saved_covs = []
191 |         
192 |         counter = 0
193 |         for k in range(iters):
194 |             for r, ratio in enumerate(ratios):
195 |                 # load covariance matrices for reproducibility
196 |                 if load_covs is None:
197 |                     cov = None
198 |                 else:
199 |                     cov = load_covs[counter]
200 |                 counter += 1
201 |                     
202 |                     
203 |                 X_train, _, y_train, _, cov = Dataset.generate_mixture_of_gaussians(n=n, 
204 |                                                                              d=d,
205 |                                                                              normalize_x=True,
206 |                                                                              one_hot=True,
207 |                                                                              class_seps=class_seps, 
208 |                                                                              covariance_scale=covariance_scale, 
209 |                                                                              test_size=0,
210 |                                                                              cov = cov,
211 |                                                                              class_ratio=ratio,
212 |                                                                              resample=resample,
213 |                                                                              return_covariance=True)
214 |                 saved_covs.append(cov)
215 |                 X_test, _, y_test, _ = Dataset.generate_mixture_of_gaussians(n=int(n/4), 
216 |                                                                              d=d,
217 |                                                                              normalize_x = True,
218 |                                                                              one_hot=True,
219 |                                                                              class_seps=class_seps, 
220 |                                                                              covariance_scale=covariance_scale, 
221 |                                                                              test_size=0,
222 |                                                                              cov=cov)
223 |                 
224 | 
225 |                 
226 |                 
227 |                 tf.reset_default_graph()
228 |                 x = tf.placeholder(dtype="float", shape=[None,d])
229 |                 y = tf.placeholder(dtype="float", shape=[None,2])
230 |                 y_ = multilayer_perceptron(x, num_input=d, num_output=2, num_nodes=hidden_layer_sizes)
231 | 
232 |                 cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_))
233 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
234 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
235 |                 
236 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
237 |                 train_op = optimizer.minimize(cross_entropy)
238 |                 init_op = tf.global_variables_initializer()
239 | 
240 |                 with tf.Session() as sess:
241 |                     sess.run(init_op)
242 |                     y_preds = list()
243 |                     for step in range(num_steps):
244 |                         _, loss, acc, cp = sess.run([train_op, cross_entropy, accuracy_op, correct_prediction], feed_dict={x:X_train,y:y_train})
245 |                         if (step%(num_steps/10)==0 and verbose):
246 |                             print(loss, acc, cp)               
247 |                 
248 |                     accuracy, y_pred = sess.run([accuracy_op, y_], feed_dict={x:X_test,y:y_test})
249 |                     acc_matrix[r,k] = accuracy
250 |                 
251 |                     if classify_grid:
252 |                         xx, yy = np.meshgrid(np.arange(0, 1.02, 0.02), np.arange(0, 1.02, 0.02))
253 |                         grid_points = np.c_[xx.ravel(), yy.ravel()]
254 |                         grid_preds = sess.run(y_, feed_dict={x:grid_points})
255 |             
256 |         if classify_grid:
257 |             return acc_matrix, saved_covs, X_train, y_train, X_test, y_test, y_pred, grid_preds
258 |         return acc_matrix, saved_covs
259 | 
260 | 
261 | '''
262 | 5. Are Neural Networks Memorizing Or Generalizing During Training?
263 | '''
264 | class Experiment5(Experiment):
265 |    
266 |     def __init__(self):
267 |         pass
268 |         
269 |     def run(self,
270 |             d = 6,
271 |             iters = 3,
272 |             covariance_scale = 1,
273 |             test_size = 0.2,
274 |             class_seps = [1 for i in range(6)],
275 |             ns = [500],
276 |             return_accuracy_per_epoch=False,
277 |             randomize=False,
278 |             verbose=False,
279 |             learning_rate = 0.003,
280 |             num_steps=2500,
281 |             hidden_layer_sizes=(100,100)):
282 |         
283 |         if return_accuracy_per_epoch:
284 |             acc = np.zeros((10, len(ns),iters))
285 |         else:
286 |             acc = np.zeros((len(ns),iters))      
287 |         n_max = np.max(ns)
288 |         
289 |         for k in range(iters):
290 |             X_train_, _, y_train_, _ = Dataset.generate_mixture_of_gaussians(n=n_max, 
291 |                                                              d=d,
292 |                                                              class_seps=class_seps, 
293 |                                                              covariance_scale=covariance_scale, 
294 |                                                              one_hot=True,
295 |                                                              test_size=0)
296 |             if randomize:
297 |                 y_train_ = np.random.permutation(y_train_)
298 |                 
299 |             for n_i, n in enumerate(ns):
300 |                 step_multiple = 0
301 |                 tf.reset_default_graph()
302 |                 X_train = X_train_[:n]; y_train = y_train_[:n] 
303 |                 x = tf.placeholder(dtype="float", shape=[None,d])
304 |                 y = tf.placeholder(dtype="float", shape=[None,2])
305 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2)
306 | 
307 |                 loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
308 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
309 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
310 | 
311 |                 #loss_op = tf.reduce_mean(tf.square(y_ - y))
312 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
313 |                 train_op = optimizer.minimize(loss_op)
314 |                 init_op = tf.global_variables_initializer()
315 | 
316 |                 with tf.Session() as sess:
317 |                     sess.run(init_op)
318 |                     for step in range(num_steps):
319 |                         x_batch, y_batch = random_batch(X_train, y_train)
320 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
321 |                         if (step%(num_steps/10)==0 and verbose):
322 |                             print(accuracy)
323 |                         if (step%(num_steps/10)==0 and return_accuracy_per_epoch):
324 |                             accuracy = sess.run(accuracy_op, feed_dict={x:X_train,y:y_train})
325 |                             acc[step_multiple, n_i, k] = accuracy 
326 |                             step_multiple += 1
327 | 
328 |                     accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_train,y:y_train})
329 |                 
330 |                 if not(return_accuracy_per_epoch):
331 |                     acc[n_i,k] = accuracy
332 |         
333 |         return acc
334 |     
335 |     
336 | '''
337 | ## 6. Does Unsupervised Feature Reduction Help or Hurt?
338 | '''
339 | class Experiment6(Experiment):
340 |    
341 |     def __init__(self):
342 |         pass
343 |         
344 |     def run(self,
345 |             d = 10,
346 |             iters = 3,
347 |             covariance_scale = 0.2,
348 |             test_size = 0.2,
349 |             n = 100,
350 |             dummy_dims = [0],
351 |             pca_dims = [None],
352 |             verbose=False,
353 |             noise_level = 0,
354 |             learning_rate = 0.003,
355 |             num_steps=500,
356 |             hidden_layer_sizes=(100,100)):
357 |         
358 |         from scipy.stats import special_ortho_group
359 |         
360 |         class_seps = [1 for i in range(d)]
361 |         acc = np.zeros((iters, len(dummy_dims),len(pca_dims)))      
362 |         
363 |         for k in range(iters):            
364 |             X_train_, X_test_, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
365 |                                                              d=d,
366 |                                                              class_seps=class_seps, 
367 |                                                              covariance_scale=covariance_scale, 
368 |                                                              one_hot=True,
369 |                                                              test_size=test_size)
370 |             
371 |             for d_i, dummy_dim in enumerate(dummy_dims):
372 |                 X_train = np.concatenate((X_train_, noise_level*np.random.random(size=(X_train_.shape[0], dummy_dim))),axis=1);
373 |                 X_test = np.concatenate((X_test_, noise_level*np.random.random(size=(X_test_.shape[0], dummy_dim))),axis=1);
374 |                 
375 |                 rotation_matrix = np.random.random(size=(d+dummy_dim,d+dummy_dim))
376 |                 X_train = X_train.dot(rotation_matrix)
377 |                 X_test = X_test.dot(rotation_matrix)
378 | 
379 |                 
380 |                 for p_i, pca_dim in enumerate(pca_dims):
381 |                     pca = PCA(n_components = pca_dim)
382 |                     if not(pca_dim is None):
383 |                         X_train = pca.fit_transform(X_train)
384 |                         X_test = pca.transform(X_test)
385 |                     if pca_dim is None:
386 |                         pca_dim = d+dummy_dim
387 |                     
388 |                     tf.reset_default_graph()
389 |                     x = tf.placeholder(dtype="float", shape=[None,pca_dim])
390 |                     y = tf.placeholder(dtype="float", shape=[None,2])
391 |                     y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=pca_dim, num_output=2)
392 | 
393 |                     loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
394 |                     correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
395 |                     accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
396 | 
397 |                     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
398 |                     train_op = optimizer.minimize(loss_op)
399 |                     init_op = tf.global_variables_initializer()
400 | 
401 |                     with tf.Session() as sess:
402 |                         sess.run(init_op)
403 |                         for step in range(num_steps):
404 |                             _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:X_train,y:y_train})
405 |                             if (step%(num_steps/10)==0 and verbose):
406 |                                 print(accuracy)
407 | 
408 |                         accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
409 |                     acc[k,d_i,p_i] = accuracy
410 |         return acc
411 | 
412 |     
413 | '''
414 | 7. Can Any Non-linearity Be Used As the Activation Function?
415 | '''
416 | class Experiment7(Experiment):
417 |    
418 |     def __init__(self):
419 |         pass
420 |         
421 |     def run(self,
422 |             iters = 1,
423 |             d = 2,
424 |             test_size = 0.2,
425 |             n = 500,
426 |             noise = 0.1,
427 |             verbose=False,
428 |             activations = [tf.nn.sigmoid, tf.square],
429 |             learning_rate = 0.003,
430 |             num_steps=800,
431 |             hidden_layer_sizes=(30,30)):
432 |         
433 |         
434 |         acc = np.zeros((iters, 10, len(activations)))      
435 |         n_max = n
436 |         
437 |         for k in range(iters):                            
438 |             X_train, X_test, y_train, y_test = Dataset.generate_moons(n=n_max, 
439 |                                                                         test_size=0.2, 
440 |                                                                         one_hot=True, 
441 |                                                                         noise=noise)
442 |             
443 |             for a_i, a in enumerate(activations):
444 |                 step_counter = 0
445 |                 tf.reset_default_graph()
446 |                 x = tf.placeholder(dtype="float", shape=[None,d])
447 |                 y = tf.placeholder(dtype="float", shape=[None,2])
448 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, activation=a)
449 | 
450 |                 loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
451 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
452 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
453 | 
454 |                 #loss_op = tf.reduce_mean(tf.square(y_ - y))
455 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
456 |                 train_op = optimizer.minimize(loss_op)
457 |                 init_op = tf.global_variables_initializer()
458 | 
459 |                 with tf.Session() as sess:
460 |                     sess.run(init_op)
461 |                     for step in range(num_steps):
462 |                         x_batch, y_batch = random_batch(X_train, y_train)
463 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
464 |                         if (step%(num_steps/10)==0):
465 |                             accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
466 |                             acc[k, step_counter, a_i] = accuracy
467 |                             step_counter += 1
468 |                             if verbose:
469 |                                 print(accuracy)
470 | 
471 | 
472 |         
473 |         return acc
474 | 
475 | '''
476 | 8. How Does Batch Size Affect the Results?
477 | '''    
478 | class Experiment8(Experiment):
479 |    
480 |     def __init__(self):
481 |         pass
482 |         
483 |     def run(self,
484 |             d = 12,
485 |             iters = 3,
486 |             covariance_scale = 1,
487 |             test_size = 0.2,
488 |             n = 500,
489 |             batch_sizes = [32],
490 |             return_accuracy_per_epoch=False,
491 |             verbose=False,
492 |             learning_rate = 0.003,
493 |             num_epochs=150,
494 |             store_every=10,
495 |             hidden_layer_sizes=(100,100)):
496 |         
497 |         class_seps = [1 for i in range(12)]
498 |         timer = Timer()
499 |         if return_accuracy_per_epoch:
500 |             acc = np.zeros((int(num_epochs/store_every)-1, len(batch_sizes),iters))
501 |         else:
502 |             acc = np.zeros((len(batch_sizes),iters))      
503 |         
504 |         runtimes = np.zeros((len(batch_sizes))) 
505 |         for k in range(iters):
506 |             X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
507 |                                                              d=d,
508 |                                                              class_seps=class_seps, 
509 |                                                              covariance_scale=covariance_scale, 
510 |                                                              one_hot=True,
511 |                                                              test_size=test_size)
512 |             for b_i, batch_size in enumerate(batch_sizes):
513 |                 timer.start()
514 |                 step_multiple = 0
515 |                 tf.reset_default_graph()
516 |                 x = tf.placeholder(dtype="float", shape=[None,d])
517 |                 y = tf.placeholder(dtype="float", shape=[None,2])
518 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2)
519 | 
520 |                 loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
521 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
522 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
523 | 
524 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
525 |                 train_op = optimizer.minimize(loss_op)
526 |                 init_op = tf.global_variables_initializer()
527 | 
528 |                 with tf.Session() as sess:
529 |                     sess.run(init_op)
530 |                     num_steps = int(num_epochs*n/batch_size)
531 |                     store_acc_threshold = num_steps/num_epochs*store_every
532 |                     for step in range(num_steps):
533 |                         x_batch, y_batch = random_batch(X_train, y_train, size=batch_size)
534 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
535 |                         if (step%(num_steps/num_epochs)==0 and verbose):
536 |                             print(accuracy)
537 |                         if (step>store_acc_threshold and return_accuracy_per_epoch):
538 |                             accuracy = sess.run(accuracy_op, feed_dict={x:X_train,y:y_train})
539 |                             acc[step_multiple, b_i, k] = accuracy 
540 |                             step_multiple += 1
541 |                             store_acc_threshold += num_steps/num_epochs*store_every
542 | 
543 |                     accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
544 |                 
545 |                 if not(return_accuracy_per_epoch):
546 |                     acc[b_i,k] = accuracy # otherwise, this is stored earlier
547 |                 runtimes[b_i] = timer.end()
548 |                 
549 |         return runtimes, acc
550 | 
551 | 
552 |     
553 | '''
554 | 9. How Does the Loss Function Matter?
555 | '''
556 | class Experiment9(Experiment):
557 |    
558 |     def __init__(self):
559 |         pass
560 |         
561 |     def run(self,
562 |             d = 12,
563 |             iters = 1,
564 |             covariance_scale = 1,
565 |             test_size = 0.2,
566 |             n = 500,
567 |             randomize=False,
568 |             verbose=False,
569 |             loss_functions = ['cross_entropy', 'mean_squared_error'],
570 |             learning_rate = 0.003,
571 |             num_steps=500,
572 |             hidden_layer_sizes=(100,100)):
573 |         
574 |         class_seps = [1/(i+1) for i in range(d)]
575 |         acc = np.zeros((iters, 10, len(loss_functions)))      
576 |         n_max = n
577 |         LOSS_FUNCTIONS = ['cross_entropy',
578 |                   'mean_abs_error',
579 |                   'mean_squared_error',
580 |                   'mean_fourth_pow_error', 
581 |                   'hinge_loss', 
582 |                   'constant']
583 |         
584 |         for k in range(iters):
585 |             X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n_max, 
586 |                                                              d=d,
587 |                                                              class_seps=class_seps, 
588 |                                                              covariance_scale=covariance_scale, 
589 |                                                              one_hot=True)
590 |             if randomize:
591 |                 y_train_ = np.random.permutation(y_train_)
592 |                     
593 | 
594 |             for l_i, l in enumerate(loss_functions):
595 |                 step_counter = 0
596 |                 if not(l in LOSS_FUNCTIONS):
597 |                     raise ValueError("Valid loss functions are " + str(LOSS_FUNCTIONS))
598 | 
599 |                 tf.reset_default_graph()
600 |                 x = tf.placeholder(dtype="float", shape=[None,d])
601 |                 y = tf.placeholder(dtype="float", shape=[None,2])
602 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2)
603 | 
604 |                 if l=='cross_entropy':
605 |                     loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
606 |                 elif l=='mean_squared_error':
607 |                     loss_op = tf.reduce_mean(tf.square(y_ - y))
608 |                 elif l=='mean_abs_error':
609 |                     loss_op = tf.reduce_mean(tf.abs(y_ - y))
610 |                 elif l=='hinge_loss':
611 |                     loss_op = tf.losses.hinge_loss(labels=y, logits=y_)
612 |                 elif l=='mean_fourth_pow_error':
613 |                     loss_op = tf.reduce_mean(tf.pow(y_ - y, 4))
614 |                 elif l=='constant':
615 |                     loss_op = 0*tf.reduce_mean(tf.square(y_ - y))
616 | 
617 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
618 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
619 | 
620 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
621 |                 train_op = optimizer.minimize(loss_op)
622 |                 init_op = tf.global_variables_initializer()
623 | 
624 |                 with tf.Session() as sess:
625 |                     sess.run(init_op)
626 |                     for step in range(num_steps):
627 |                         x_batch, y_batch = random_batch(X_train, y_train)
628 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
629 |                         if (step%(num_steps/10)==0):
630 |                             accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
631 |                             acc[k, step_counter, l_i] = accuracy
632 |                             step_counter += 1
633 |                             if verbose:
634 |                                 print(accuracy)
635 |         
636 |         return acc
637 | 
638 | '''
639 | 10. How Does the Initialization Affect Performance?
640 | '''
641 | class Experiment10(Experiment):
642 |    
643 |     def __init__(self):
644 |         pass
645 |         
646 |     def run(self,
647 |             d = 12,
648 |             iters = 1,
649 |             covariance_scale = 1,
650 |             test_size = 0.2,
651 |             n = 500,
652 |             randomize=False,
653 |             verbose=False,
654 |             initializers = [tf.contrib.layers.xavier_initializer()],
655 |             learning_rate = 0.003,
656 |             num_steps=500,
657 |             hidden_layer_sizes=(100,100)):
658 |         
659 |         class_seps = [1/(i+1) for i in range(d)]
660 |         acc = np.zeros((iters, 10, len(initializers)))
661 |         
662 |         for k in range(iters):
663 |             X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
664 |                                                              d=d,
665 |                                                              class_seps=class_seps, 
666 |                                                              covariance_scale=covariance_scale, 
667 |                                                              one_hot=True)
668 |                     
669 |             for i_i, initializer in enumerate(initializers):    
670 |                 step_counter = 0
671 |                 tf.reset_default_graph()
672 |                 x = tf.placeholder(dtype="float", shape=[None,d])
673 |                 y = tf.placeholder(dtype="float", shape=[None,2])
674 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, initializer=initializer)
675 | 
676 |                 loss_op = tf.reduce_mean(tf.square(y_ - y))                       
677 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
678 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
679 | 
680 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
681 |                 train_op = optimizer.minimize(loss_op)
682 |                 init_op = tf.global_variables_initializer()
683 | 
684 |                 with tf.Session() as sess:
685 |                     sess.run(init_op)
686 |                     for step in range(num_steps):
687 |                         x_batch, y_batch = random_batch(X_train, y_train)
688 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
689 |                         if (step%(num_steps/10)==0):
690 |                             accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
691 |                             acc[k, step_counter, i_i] = accuracy
692 |                             step_counter += 1
693 |                             if verbose:
694 |                                 print(accuracy)
695 | 
696 |                     accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
697 | 
698 |         
699 |         return acc
700 | 
701 | '''
702 | 11. Do Weights in Different Layers Evolve At Different Speeds?
703 | '''
704 | class Experiment11(Experiment):
705 |    
706 |     def __init__(self):
707 |         pass
708 |         
709 |     def run(self,
710 |             d = 12,
711 |             covariance_scale = 1,
712 |             test_size = 0.2,
713 |             n = 500,
714 |             store_every=2,
715 |             randomize=False,
716 |             verbose=False,
717 |             learning_rate = 0.003,
718 |             num_steps=500,):
719 |         
720 |         class_seps = [1/(i+1) for i in range(d)]
721 |         hidden_layer_sizes=(50,50,50)
722 |         weights = []
723 |         accs = []
724 |         
725 |         X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
726 |                                                          d=d,
727 |                                                          class_seps=class_seps, 
728 |                                                          covariance_scale=covariance_scale, 
729 |                                                          one_hot=True)
730 | 
731 |         step_counter = 0
732 | 
733 |         tf.reset_default_graph()
734 |         x = tf.placeholder(dtype="float", shape=[None,d])
735 |         y = tf.placeholder(dtype="float", shape=[None,2])
736 |         y_, wts = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, return_weight_tensors=True)
737 | 
738 |         loss_op = tf.reduce_mean(tf.square(y_ - y))                       
739 |         correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
740 |         accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
741 | 
742 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
743 |         train_op = optimizer.minimize(loss_op)
744 |         init_op = tf.global_variables_initializer()
745 | 
746 |         with tf.Session() as sess:
747 |             sess.run(init_op)
748 |             for step in range(num_steps):
749 |                 x_batch, y_batch = random_batch(X_train, y_train)
750 |                 _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
751 |                 if (step%2==0):                        
752 |                     accuracy, w0, w1, w2, w3 = sess.run([accuracy_op, wts[0], wts[1], wts[2], wts[3]], feed_dict={x:X_test,y:y_test})
753 |                     weights.append([w0, w1, w2, w3])
754 |                     accs.append(accuracy)
755 | 
756 |                     if verbose:
757 |                         print(accuracy)
758 | 
759 |         return weights, accs
760 |     
761 | '''
762 | 12. How Does Regularization Affect Weight Evolution?
763 | '''
764 | class Experiment12(Experiment):
765 |    
766 |     def __init__(self):
767 |         pass
768 |         
769 |     def run(self,
770 |             d = 12,
771 |             covariance_scale = 1,
772 |             test_size = 0.2,
773 |             n = 500,
774 |             regularization_type = 'L2',
775 |             regularization_strength = 0,
776 |             store_every=2,
777 |             randomize=False,
778 |             verbose=False,
779 |             learning_rate = 0.003,
780 |             num_steps=500,):
781 |         
782 |         class_seps = [1/(i+1) for i in range(d)]
783 |         hidden_layer_sizes=(50,50)
784 |         weights = []
785 |         accs = []
786 |         
787 |         X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
788 |                                                          d=d,
789 |                                                          class_seps=class_seps, 
790 |                                                          covariance_scale=covariance_scale, 
791 |                                                          one_hot=True)
792 | 
793 |         step_counter = 0
794 | 
795 |         tf.reset_default_graph()
796 |         x = tf.placeholder(dtype="float", shape=[None,d])
797 |         y = tf.placeholder(dtype="float", shape=[None,2])
798 |         y_, wts = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, return_weight_tensors=True)
799 |         
800 |         if regularization_type=='L2':
801 |             loss_op = tf.reduce_mean(tf.square(y_ - y)) + regularization_strength*(tf.reduce_mean(tf.square(wts[0])) + tf.reduce_mean(tf.square(wts[1])) + tf.reduce_mean(tf.square(wts[2])))
802 |         elif regularization_type=='L1':
803 |             loss_op = tf.reduce_mean(tf.square(y_ - y)) + regularization_strength*(tf.reduce_mean(tf.abs(wts[0])) + tf.reduce_mean(tf.abs(wts[1])) + tf.reduce_mean(tf.abs(wts[2])))
804 |         else:
805 |             raise ValueError("regularization_type must be 'L1' or 'L2'")
806 |             
807 |         correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
808 |         accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
809 | 
810 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
811 |         train_op = optimizer.minimize(loss_op)
812 |         init_op = tf.global_variables_initializer()
813 | 
814 |         with tf.Session() as sess:
815 |             sess.run(init_op)
816 |             for step in range(num_steps):
817 |                 x_batch, y_batch = random_batch(X_train, y_train)
818 |                 _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
819 |                 if (step%2==0):                        
820 |                     accuracy, w0, w1, w2 = sess.run([accuracy_op, wts[0], wts[1], wts[2]], feed_dict={x:X_test,y:y_test})
821 |                     weights.append([w0, w1, w2])
822 |                     accs.append(accuracy)
823 | 
824 |                     if verbose:
825 |                         print(accuracy)
826 | 
827 |         return weights, accs
828 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/ise-checkpoint.csv:
--------------------------------------------------------------------------------
  1 | 0.035753708
  2 | 0.025425873
  3 | -0.02886173
  4 | -0.062208079
  5 | 0.009859905
  6 | -0.029191028
  7 | 0.015445348
  8 | -0.041167612
  9 | 0.000661905
 10 | 0.022037345
 11 | -0.022692465
 12 | -0.013708704
 13 | 0.000864697
 14 | -0.00381506
 15 | 0.00566126
 16 | 0.046831302
 17 | -0.006634978
 18 | 0.034566982
 19 | -0.020528213
 20 | -0.008776701
 21 | -0.025919141
 22 | 0.015279487
 23 | 0.018577796
 24 | -0.014132879
 25 | 0.036607044
 26 | 0.011353209
 27 | -0.040542021
 28 | -0.022105644
 29 | -0.014888368
 30 | 0.007026745
 31 | -0.011494996
 32 | -0.041136038
 33 | -0.002631499
 34 | 0.024654643
 35 | -0.03584061
 36 | 0.017303168
 37 | 0.001725406
 38 | 0.004975853
 39 | 0.000671759
 40 | -0.005891895
 41 | -0.013689039
 42 | 0.002192959
 43 | 0.007913215
 44 | -0.03852223
 45 | 0.007958798
 46 | -0.007133473
 47 | 0.011234009
 48 | -0.001410361
 49 | 0.010974424
 50 | 0.003213253
 51 | 0.000214245
 52 | -0.00711875
 53 | 0.001891803
 54 | 0.019874248
 55 | 0.002918699
 56 | 0.035968063
 57 | 0.003298424
 58 | 0.021165071
 59 | -0.004968387
 60 | 0.011247875
 61 | -0.021780194
 62 | 0.024406595
 63 | 0.006913411
 64 | 0.031401768
 65 | -0.005247358
 66 | 0.000735712
 67 | -0.010297613
 68 | 0.047238893
 69 | 0.03177774
 70 | 0.010170736
 71 | -0.013661316
 72 | -0.010111415
 73 | 0.0057888
 74 | 0.041454961
 75 | -0.002617982
 76 | -0.030954776
 77 | 0.004548211
 78 | 0.009655946
 79 | 0.024517037
 80 | -0.004162916
 81 | -0.012367953
 82 | 0.05198032
 83 | 0.029022746
 84 | 0.016261076
 85 | 0.025882269
 86 | 0.021061108
 87 | -0.026273031
 88 | -0.001123867
 89 | -0.018541411
 90 | 0.025756738
 91 | -0.006508992
 92 | 0.008070571
 93 | 0.011703446
 94 | 0.00540756
 95 | 0.042840743
 96 | -0.011999815
 97 | 0.008430531
 98 | 0.011166484
 99 | -0.015572642
100 | 0.008801231
101 | -0.004209168
102 | -0.000514681
103 | 0.028131284
104 | -0.007792426
105 | -0.022362446
106 | -0.019171953
107 | 0.013947094
108 | -0.032005875
109 | 0.004478365
110 | 0.017089104
111 | 0.00967985
112 | 0.002670961
113 | -0.003623787
114 | -0.004441286
115 | 0.001688065
116 | 0.031866012
117 | 0.002917994
118 | -0.011589253
119 | -0.014168795
120 | 0.024046363
121 | 0.015786402
122 | 0.011719598
123 | 0.002315584
124 | 0.002929655
125 | 0.007996801
126 | -0.005590715
127 | -0.006518816
128 | -0.00105307
129 | 0.012265455
130 | -0.001519732
131 | -0.004539036
132 | -0.005617343
133 | 0.006074147
134 | -0.008232237
135 | 0.032511512
136 | 0.006370441
137 | -0.011323604
138 | 0.013780465
139 | -0.006357127
140 | 0.004758472
141 | 0.018495281
142 | 0.012603659
143 | 0.011235934
144 | 0.004036218
145 | 0.009330697
146 | 0.043744798
147 | 0.010826132
148 | 0.045219554
149 | 0.001853539
150 | -0.017993859
151 | -0.001445482
152 | 0.021028135
153 | -0.001832467
154 | -0.02278477
155 | 0.004813828
156 | 0.013453937
157 | -0.004084155
158 | -0.000271351
159 | -0.001299983
160 | 0.031414816
161 | 0.019757306
162 | 0.013108203
163 | 0.012676973
164 | 0.006291416
165 | -0.023435627
166 | 0.008987423
167 | 0.003705789
168 | -0.021432986
169 | 0.008224308
170 | -0.02534402
171 | 0.008378078
172 | -0.021587901
173 | 0.002509442
174 | -0.009497407
175 | -0.005834448
176 | 0.007574225
177 | 0.036271557
178 | -0.001994969
179 | -0.012536673
180 | 0.016615187
181 | 0.006084621
182 | 0.004157378
183 | 0.008195123
184 | 0.009023651
185 | -0.007433515
186 | 0.004001644
187 | 0.011414049
188 | -0.009442384
189 | -0.002213036
190 | -0.020018346
191 | 0.023026511
192 | 0.031160934
193 | -0.008730222
194 | 0.01706096
195 | 0.006746257
196 | 0.015520491
197 | -0.01843108
198 | 0.018883892
199 | -0.000160911
200 | -0.024346941
201 | 0.009915384
202 | 0.01507818
203 | 0.004774399
204 | -0.005441211
205 | 0.007105868
206 | -0.002934588
207 | -0.01632457
208 | -0.030082971
209 | -0.035849614
210 | 0.005735384
211 | -0.02390659
212 | 0.020219135
213 | 0.000351028
214 | -0.006962099
215 | 0.016985685
216 | 0.00751899
217 | 0.015029496
218 | -0.002417171
219 | -0.006258617
220 | 0.00685869
221 | -0.031914469
222 | -0.017581763
223 | -0.006598214
224 | -0.019347799
225 | 0.012533508
226 | -0.00630611
227 | 0.00055265
228 | 0.011903098
229 | 0.038612983
230 | 0.036468359
231 | 0.008514453
232 | -0.003723744
233 | -0.006519958
234 | -0.008229144
235 | 0.008292258
236 | -0.004204453
237 | 0.016307467
238 | 0.004990278
239 | 0.007262631
240 | -0.016948929
241 | 0.003501244
242 | 0.022530184
243 | 0.004894702
244 | -0.007211305
245 | 0.00581665
246 | 0.003891123
247 | -0.000811768
248 | 0.00322285
249 | -0.002274045
250 | 0.022138372
251 | 0.010229371
252 | 0.013898022
253 | 0.007956979
254 | 0.007771749
255 | -0.003189192
256 | -0.016130747
257 | -0.00454863
258 | 0.017559249
259 | 0.00207392
260 | -0.013516994
261 | 0.010044257
262 | 0.011097874
263 | 0.00559711
264 | -0.003033665
265 | -0.023856682
266 | 0.005236694
267 | 0.000671833
268 | -0.004547723
269 | 0.012852447
270 | -0.002190987
271 | 0.015891732
272 | -0.006837607
273 | -0.001432456
274 | -0.029575211
275 | -0.038300784
276 | -0.030015785
277 | 0.017725401
278 | -0.00510604
279 | 0.015960155
280 | -0.00619301
281 | 0.005285259
282 | 0.008033964
283 | 0.029039821
284 | -0.015578937
285 | 0.016435847
286 | -0.013711428
287 | -0.02262045
288 | -0.03476938
289 | -0.018702566
290 | 0.019627036
291 | 0.029306318
292 | 0.005696108
293 | 0.00432159
294 | -0.005009524
295 | 0.022773381
296 | 0.01266815
297 | -0.006335257
298 | -0.003915514
299 | -0.007161671
300 | -0.002744684
301 | -0.005021298
302 | 0.023990399
303 | 0.019908239
304 | -0.014267377
305 | -0.00182161
306 | 0.008486639
307 | 0.008486611
308 | 0.014337851
309 | 0.034020727
310 | -0.007637163
311 | 0.0010159
312 | 0.001520156
313 | -0.00381384
314 | 0.020479928
315 | 0.006075096
316 | 0.010027432
317 | -0.004077191
318 | 0.004087933
319 | -0.003990517
320 | 0.013150996
321 | -0.006163386
322 | -0.01276782
323 | 0.021378817
324 | -0.004650074
325 | -0.014181169
326 | -0.001211584
327 | 0.012526916
328 | -0.00741867
329 | -0.001926377
330 | 0.012008849
331 | -0.014084423
332 | -0.004895555
333 | 0.013199043
334 | 0.004356217
335 | -0.009524497
336 | -0.018303062
337 | -0.015818452
338 | -0.012077182
339 | -0.056752612
340 | 0.068951684
341 | 0.000243026
342 | 0.02647418
343 | -0.000807723
344 | -0.038389961
345 | 0.005780967
346 | 0.016049266
347 | -0.045388971
348 | -0.006232065
349 | 0.008218708
350 | -0.043092096
351 | 0.034723686
352 | 0.007264208
353 | 0.013409826
354 | -0.015485635
355 | 0.00138582
356 | 0.013716279
357 | 0.003520353
358 | -0.014439062
359 | -0.005624349
360 | -0.011897225
361 | 0.011865737
362 | 0.010203012
363 | 0.004905756
364 | 0.01005563
365 | 0.013067872
366 | -0.000326348
367 | 0.008616579
368 | 0.00251849
369 | 0.004246468
370 | -0.004728047
371 | -0.019495737
372 | 0.000166157
373 | -0.000920201
374 | 0.010131759
375 | -0.021815769
376 | -0.006830443
377 | -0.005580513
378 | 0.020009067
379 | 0.001419823
380 | 0.019062249
381 | 0.000190164
382 | 0.009585147
383 | -0.003400806
384 | 0.007303681
385 | 0.012237341
386 | 0.000388746
387 | -0.010542271
388 | 0.000683248
389 | 0.006648115
390 | 0.001949078
391 | 0.021523653
392 | 0.007114932
393 | -0.010400281
394 | 0.003504
395 | 0.013706542
396 | 0.001248869
397 | 0.005496886
398 | -0.014436418
399 | 0.013711065
400 | -0.010180284
401 | -0.003298125
402 | -0.004645658
403 | 0.002511476
404 | 0.002217177
405 | -0.012028845
406 | -0.017302052
407 | 0.003930899
408 | 0.001645509
409 | 0.005847453
410 | 0.01082305
411 | -0.000327821
412 | -0.010043636
413 | -0.001691507
414 | 0.002389147
415 | -0.004548585
416 | -0.0057746
417 | 0.013165613
418 | 0.004862007
419 | 0.010504082
420 | 0.011817165
421 | 0.002965501
422 | 0.002184557
423 | 0.00167273
424 | -0.005813995
425 | -0.002285365
426 | 0.026892714
427 | 0.003509248
428 | 0.014597561
429 | 0.00527176
430 | 0.002025474
431 | 0.006672435
432 | -0.002567317
433 | 0.005511668
434 | -0.004453821
435 | 0.000761169
436 | 0.005987659
437 | 0.014784496
438 | -0.000587001
439 | 0.003395986
440 | -0.013648916
441 | 0.008169493
442 | 0.013186098
443 | 0.008959157
444 | -0.001807411
445 | 0.006841092
446 | 0.026176163
447 | 0.009736797
448 | 0.007033826
449 | -0.013504077
450 | 0.012567728
451 | 0.005076448
452 | -0.008827632
453 | -0.006813227
454 | 0.015531853
455 | 0.00786829
456 | -0.001169304
457 | -0.002500309
458 | -0.030981288
459 | 0.007058143
460 | -0.008025245
461 | 0.001202099
462 | 0.032986428
463 | -0.001781987
464 | 0.002284926
465 | 0.00845291
466 | -0.013824293
467 | -0.006769931
468 | -0.007617876
469 | 0.006377468
470 | -0.030041874
471 | -0.023701804
472 | 0.013406293
473 | -0.001218191
474 | -0.01502137
475 | -0.031880777
476 | 0.019760499
477 | 0.012248015
478 | 0.011764491
479 | -0.001179229
480 | 0.012661171
481 | 0.002380343
482 | -0.002490964
483 | -0.026805079
484 | -0.017676197
485 | 0.024729439
486 | 0.001945517
487 | -0.015310023
488 | -0.016466636
489 | -0.014150672
490 | -0.000355833
491 | 0.020543856
492 | 0.00952841
493 | 0.011830075
494 | 0.000730038
495 | 0.00134576
496 | 0.001285406
497 | 0.004171479
498 | 0.001534651
499 | -0.012311844
500 | 0.024002222
501 | 0.009040201
502 | 0.008859545
503 | 0.006982356
504 | -0.007835122
505 | -0.013108757
506 | -0.003035482
507 | 0.003760527
508 | 0.009546783
509 | -0.010199048
510 | -0.015563411
511 | -0.006049411
512 | 0.000520727
513 | -0.017835035
514 | 0.00974406
515 | -0.011067076
516 | -0.00074888
517 | 0.012326085
518 | -0.014082114
519 | -0.028498178
520 | 0.001056216
521 | 0.024115793
522 | 0.007446755
523 | -0.02447813
524 | 0.02450661
525 | -0.0061961
526 | 0.005355587
527 | 0.004822987
528 | -0.017664432
529 | 0.004782286
530 | -0.002497928
531 | 0.003606376
532 | 0.008599056
533 | 0.009310309
534 | 0.000190969
535 | -0.013069043
536 | -0.007246324
537 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/uci_utils-checkpoint.py:
--------------------------------------------------------------------------------
  1 | from sklearn.neural_network import MLPClassifier
  2 | from sklearn.neighbors import KNeighborsClassifier
  3 | from sklearn.svm import SVC
  4 | from sklearn.gaussian_process import GaussianProcessClassifier
  5 | from sklearn.tree import DecisionTreeClassifier
  6 | from sklearn.ensemble import RandomForestClassifier
  7 | from sklearn.naive_bayes import GaussianNB
  8 | from sklearn.model_selection import train_test_split
  9 | import pandas as pd, numpy as np
 10 | import warnings
 11 | from IPython.display import Markdown, display
 12 | 
 13 | 
 14 | class UCI_Dataset_Loader():
 15 |     @classmethod
 16 |     def adult(cls):
 17 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
 18 |         data=pd.read_csv(url, header=None, )
 19 |         features = data.iloc[:,:-1]
 20 |         features = pd.get_dummies(features)
 21 |         labels = data.iloc[:,-1]
 22 |         labels = labels.astype('category').cat.codes
 23 |         return features, labels
 24 | 
 25 |     @classmethod
 26 |     def car(cls):
 27 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data"
 28 |         data=pd.read_csv(url, header=None, )
 29 |         features = data.iloc[:,:-1]
 30 |         features = pd.get_dummies(features)
 31 |         labels = data.iloc[:,-1]
 32 |         labels = labels.astype('category').cat.codes
 33 |         return features, labels
 34 |     
 35 |     @classmethod
 36 |     def credit_default(cls):
 37 |         try:
 38 |             import xlrd
 39 |         except:
 40 |             raise ImportError("To load this dataset, you need the library 'xlrd'. Try installing: pip install xlrd")
 41 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls"
 42 |         data=pd.read_excel(url, header=1)
 43 |         features = data.iloc[:,:-1]
 44 |         features = pd.get_dummies(features)
 45 |         labels = data.iloc[:,-1]
 46 |         labels = labels.astype('category').cat.codes
 47 |         return features, labels  
 48 |     
 49 |     @classmethod
 50 |     def dermatology(cls):
 51 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data"
 52 |         data=pd.read_csv(url, header=None, )
 53 |         features = data.iloc[:,1:]
 54 |         features = pd.get_dummies(features)
 55 |         labels = data.iloc[:,0]
 56 |         labels = labels.astype('category').cat.codes
 57 |         return features, labels
 58 |     
 59 |     @classmethod
 60 |     def diabetic_retinopathy(cls):
 61 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00329/messidor_features.arff"
 62 |         data=pd.read_csv(url, skiprows=24, header=None)
 63 |         features = data.iloc[:,:-1]
 64 |         features = pd.get_dummies(features)
 65 |         labels = data.iloc[:,-1]
 66 |         labels = labels.astype('category').cat.codes
 67 |         return features, labels
 68 |     
 69 |     @classmethod
 70 |     def ecoli(cls):
 71 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/ecoli/ecoli.data"
 72 |         data=pd.read_csv(url, header=None, sep='\s+')
 73 |         features = data.iloc[:,1:-1]
 74 |         features = pd.get_dummies(features)
 75 |         labels = data.iloc[:,-1]
 76 |         labels = labels.astype('category').cat.codes
 77 |         return features, labels    
 78 |     
 79 |     @classmethod
 80 |     def eeg_eyes(cls):
 81 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00264/EEG%20Eye%20State.arff"
 82 |         data=pd.read_csv(url, skiprows=19, header=None, sep=',')
 83 |         features = data.iloc[:,:-1]
 84 |         features = pd.get_dummies(features)
 85 |         labels = data.iloc[:,-1]
 86 |         labels = labels.astype('category').cat.codes
 87 |         return features, labels        
 88 |     
 89 |     @classmethod
 90 |     def haberman(cls):
 91 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data"
 92 |         data=pd.read_csv(url, skiprows=0, header=None, sep=',')
 93 |         features = data.iloc[:,:-1]
 94 |         features = pd.get_dummies(features)
 95 |         labels = data.iloc[:,-1]
 96 |         labels = labels.astype('category').cat.codes
 97 |         return features, labels            
 98 |     
 99 |     @classmethod
100 |     def ionosphere(cls):
101 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data"
102 |         data=pd.read_csv(url, skiprows=0, header=None, sep=',')
103 |         features = data.iloc[:,:-1]
104 |         features = pd.get_dummies(features)
105 |         labels = data.iloc[:,-1]
106 |         labels = labels.astype('category').cat.codes
107 |         return features, labels                
108 |     
109 |     @classmethod
110 |     def ionosphere(cls):
111 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data"
112 |         data=pd.read_csv(url, skiprows=0, header=None, sep=',')
113 |         features = data.iloc[:,:-1]
114 |         features = pd.get_dummies(features)
115 |         labels = data.iloc[:,-1]
116 |         labels = labels.astype('category').cat.codes
117 |         return features, labels                    
118 |     
119 |     @classmethod
120 |     def mice_protein(cls):
121 |         try:
122 |             import xlrd
123 |         except:
124 |             raise ImportError("To load this dataset, you need the library 'xlrd'. Try installing: pip install xlrd")
125 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00342/Data_Cortex_Nuclear.xls"
126 |         data=pd.read_excel(url, header=0, na_values=['', ' '])
127 |         features = data.iloc[:,1:-4]
128 |         features = features.fillna(value=0)
129 |         features = pd.get_dummies(features)
130 |         labels = data.iloc[:,-1]
131 |         labels = labels.astype('category').cat.codes
132 |         return features, labels    
133 |     
134 |     @classmethod
135 |     def nursery(cls):
136 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.data"
137 |         data=pd.read_csv(url, header=0)
138 |         features = data.iloc[:,:-1]
139 |         features = pd.get_dummies(features)
140 |         labels = data.iloc[:,-1]
141 |         labels = labels.astype('category').cat.codes
142 |         return features, labels                            
143 |     
144 |     @classmethod
145 |     def seeds(cls):
146 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00236/seeds_dataset.txt"
147 |         data=pd.read_csv(url, header=0, sep='\s+')
148 |         features = data.iloc[:,:-1]
149 |         features = pd.get_dummies(features)
150 |         labels = data.iloc[:,-1]
151 |         labels = labels.astype('category').cat.codes
152 |         return features, labels          
153 |     
154 |     @classmethod
155 |     def seismic(cls):
156 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00266/seismic-bumps.arff"
157 |         data=pd.read_csv(url, skiprows=154, header=0, sep=',')
158 |         features = data.iloc[:,:-1]
159 |         features = pd.get_dummies(features)
160 |         labels = data.iloc[:,-1]
161 |         labels = labels.astype('category').cat.codes
162 |         return features, labels              
163 |     
164 |     @classmethod
165 |     def soybean(cls):
166 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/soybean/soybean-small.data"
167 |         data=pd.read_csv(url, skiprows=0, header=0, sep=',')
168 |         features = data.iloc[:,:-1]
169 |         features = pd.get_dummies(features)
170 |         labels = data.iloc[:,-1]
171 |         labels = labels.astype('category').cat.codes
172 |         return features, labels                  
173 |     
174 |     @classmethod
175 |     def teaching_assistant(cls):
176 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/tae/tae.data"
177 |         data=pd.read_csv(url, skiprows=0, header=0, sep=',')
178 |         features = data.iloc[:,:-1]
179 |         features = pd.get_dummies(features)
180 |         labels = data.iloc[:,-1]
181 |         labels = labels.astype('category').cat.codes
182 |         return features, labels                      
183 |     
184 |     @classmethod
185 |     def tic_tac_toe(cls):
186 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/tic-tac-toe/tic-tac-toe.data"
187 |         data=pd.read_csv(url, skiprows=0, header=0, sep=',')
188 |         features = data.iloc[:,:-1]
189 |         features = pd.get_dummies(features)
190 |         labels = data.iloc[:,-1]
191 |         labels = labels.astype('category').cat.codes
192 |         return features, labels                          
193 |     
194 |     @classmethod
195 |     def website_phishing(cls):
196 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00379/PhishingData.arff"
197 |         data=pd.read_csv(url, skiprows=14, header=None, sep=',')
198 |         features = data.iloc[:,:-1]
199 |         features = pd.get_dummies(features)
200 |         labels = data.iloc[:,-1]
201 |         labels = labels.astype('category').cat.codes
202 |         return features, labels                              
203 |     
204 |     @classmethod
205 |     def wholesale_customers(cls):
206 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00292/Wholesale%20customers%20data.csv"
207 |         data=pd.read_csv(url, skiprows=0, header=0, sep=',')
208 |         features = data.iloc[:,2:]
209 |         features = pd.get_dummies(features)
210 |         labels = data.iloc[:,1]
211 |         labels = labels.astype('category').cat.codes
212 |         return features, labels                                  
213 |     
214 |     
215 | 
216 | classifiers = [
217 |     SVC(),
218 |     GaussianNB(),
219 |     DecisionTreeClassifier(),
220 |     RandomForestClassifier(),
221 |     MLPClassifier(hidden_layer_sizes=(100)),
222 |     MLPClassifier(hidden_layer_sizes=(100,100)),
223 |     MLPClassifier(hidden_layer_sizes=(100,100,100)),]
224 | 
225 | names = [
226 |     'Support Vector',
227 |     'Naive Bayes',
228 |     'Decision Tree',
229 |     'Random Forests',
230 |     '1-layer NN',
231 |     '2-layer NN',
232 |     '3-layer NN',
233 | ]
234 | 
235 | def print_stats(X_train, X_test, y_train, y_test):
236 |     string = "Training set size: " + str(X_train.shape) + ", Test set size: " + str(X_test.shape) + ", \# of classes: " + str(len(np.unique(y_train)))
237 |     display(Markdown(string))
238 | 
239 | def print_best(scores):
240 |     eps = 1e-3
241 |     best = np.max(scores)
242 |     indices = np.where(scores > best - eps)[1]
243 |     string = 'Best classifier: **'
244 |     for i, idx in enumerate(indices):
245 |         if i > 0:
246 |             string += ', '
247 |         string += names[idx]
248 |     string += '**'
249 |     display(Markdown(string))
250 |     
251 | all_data = list()
252 | 
253 | def compute_test_accuracies(X, y, train_size=0.8, verbose=1, append=True, iters=3):
254 |     scores = np.zeros((iters,len(classifiers)))
255 |     for i in range(iters):
256 |         with warnings.catch_warnings():
257 |             warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge                     
258 |             X_train, X_test, y_train, y_test =  train_test_split(X,y,train_size=train_size)
259 |         if verbose>=1 and i==0:
260 |             print_stats(X_train, X_test, y_train, y_test)
261 |         for c, clf in enumerate(classifiers):
262 |             if verbose>=2:
263 |                 print(names[c])
264 |             with warnings.catch_warnings():
265 |                 warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge                     
266 |                 clf.fit(X_train, y_train)
267 |             score = clf.score(X_test, y_test)
268 |             scores[i,c] = score
269 |     scores = np.mean(scores,axis=0).reshape(1,-1)
270 |     if append:
271 |         n, d = X.shape
272 |         c = len(np.unique(y))            
273 |         all_data.append(np.concatenate([[[n, d, c]], scores], axis=1))
274 |     return scores
275 |     
276 | def highlight_max(s):
277 |     '''
278 |     highlight the maximum in a Series yellow.
279 |     '''
280 |     eps = 1e-3
281 |     best = s.max()
282 |     return ['background-color: #5fba7d' if v>best-eps else '' for v in s]
283 | 
284 | def highlight_max_excluding_first_three(s):
285 |     '''
286 |     highlight the maximum in a Series yellow.
287 |     '''
288 |     eps = 1e-3
289 |     best = s[3:].max()
290 |     return ['background-color: #5fba7d' if (v>best-eps and i>3) else '' for i, v in enumerate(s)]
291 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/utils-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from IPython.display import clear_output, Image, display, HTML
  4 | import warnings
  5 | import time    
  6 | 
  7 | def strip_consts(graph_def, max_const_size=32):
  8 |     """Strip large constant values from graph_def."""
  9 |     strip_def = tf.GraphDef()
 10 |     for n0 in graph_def.node:
 11 |         n = strip_def.node.add() 
 12 |         n.MergeFrom(n0)
 13 |         if n.op == 'Const':
 14 |             tensor = n.attr['value'].tensor
 15 |             size = len(tensor.tensor_content)
 16 |             if size > max_const_size:
 17 |                 tensor.tensor_content = "<stripped %d bytes>"%size
 18 |     return strip_def
 19 | 
 20 | def show_graph(graph_def, max_const_size=32):
 21 |     """Visualize TensorFlow graph."""
 22 |     if hasattr(graph_def, 'as_graph_def'):
 23 |         graph_def = graph_def.as_graph_def()
 24 |     strip_def = strip_consts(graph_def, max_const_size=max_const_size)
 25 |     code = """
 26 |         <script>
 27 |           function load() {{
 28 |             document.getElementById("{id}").pbtxt = {data};
 29 |           }}
 30 |         </script>
 31 |         <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
 32 |         <div style="height:600px">
 33 |           <tf-graph-basic id="{id}"></tf-graph-basic>
 34 |         </div>
 35 |     """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))
 36 | 
 37 |     iframe = """
 38 |         <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
 39 |     """.format(code.replace('"', '&quot;'))
 40 |     display(HTML(iframe))
 41 | 
 42 | 
 43 | class Timer():
 44 |     def __init__(self):
 45 |         pass
 46 |     def start(self):
 47 |         self.time = time.time()
 48 |     def end(self):
 49 |         return time.time() - self.time
 50 |     def end_and_print(self):
 51 |         print("Time needed to run experiment:",np.round(time.time()-self.time,3),"s")
 52 |     def end_and_md_print(self):
 53 |         from IPython.display import Markdown, display
 54 |         string = "Time needed to run experiment: " + str(np.round(time.time()-self.time,3)) + " s"
 55 |         display(Markdown(string))
 56 | 
 57 | 
 58 | ##
 59 | 
 60 | import matplotlib.pyplot as plt
 61 | 
 62 | def draw_neural_net(ax, left, right, bottom, top, layer_sizes):
 63 |     '''
 64 |     Credit: https://gist.github.com/craffel/2d727968c3aaebd10359
 65 |     Draw a neural network cartoon using matplotilb.
 66 |     
 67 |     :usage:
 68 |         >>> fig = plt.figure(figsize=(12, 12))
 69 |         >>> draw_neural_net(fig.gca(), .1, .9, .1, .9, [4, 7, 2])
 70 |     
 71 |     :parameters:
 72 |         - ax : matplotlib.axes.AxesSubplot
 73 |             The axes on which to plot the cartoon (get e.g. by plt.gca())
 74 |         - left : float
 75 |             The center of the leftmost node(s) will be placed here
 76 |         - right : float
 77 |             The center of the rightmost node(s) will be placed here
 78 |         - bottom : float
 79 |             The center of the bottommost node(s) will be placed here
 80 |         - top : float
 81 |             The center of the topmost node(s) will be placed here
 82 |         - layer_sizes : list of int
 83 |             List of layer sizes, including input and output dimensionality
 84 |     '''
 85 |     
 86 |     n_layers = len(layer_sizes)
 87 |     v_spacing = (top - bottom)/float(max(layer_sizes))
 88 |     h_spacing = (right - left)/float(len(layer_sizes) - 1)
 89 |     # Nodes
 90 |     for n, layer_size in enumerate(layer_sizes):
 91 |         layer_top = v_spacing*(layer_size - 1)/2. + (top + bottom)/2.
 92 |         for m in range(layer_size):
 93 |             circle = plt.Circle((n*h_spacing + left, layer_top - m*v_spacing), v_spacing/4.,
 94 |                                 color='w', ec='k', zorder=4)
 95 |             ax.add_artist(circle)
 96 |     # Edges
 97 |     for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
 98 |         layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2.
 99 |         layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2.
100 |         for m in range(layer_size_a):
101 |             for o in range(layer_size_b):
102 |                 line = plt.Line2D([n*h_spacing + left, (n + 1)*h_spacing + left],
103 |                                   [layer_top_a - m*v_spacing, layer_top_b - o*v_spacing], c='k')
104 |                 ax.add_artist(line)
105 | 
106 | 
107 | ##
108 |         
109 | ### COMMON ANALYTICAL FUNCTIONS ###
110 | 
111 | def random_batch(x_values, y_values,size=64):
112 |     assert x_values.shape[0]==y_values.shape[0]
113 |     n = x_values.shape[0]
114 |     indices = np.random.permutation(n)[:size]
115 |     return x_values[indices], y_values[indices]
116 | 
117 | def random_values():
118 |     def random_functions(x_values):
119 |         n, d = x_values.shape
120 |         return np.random.normal(0,1,n)
121 |     return random_functions
122 |     
123 | 
124 | def sigmoid(x): 
125 |     return 1 / (1 + np.exp(-x))
126 | 
127 | def sigmoid_of_sigmoid():
128 |     def sigmoid_of_sigmoid_function(x_values):
129 |         y_values = sigmoid(sigmoid(x_values[:,0]+x_values[:,1])+sigmoid(x_values[:,2]+x_values[:,3]))
130 |         return y_values
131 |     return sigmoid_of_sigmoid_function
132 | 
133 | def polynomial_composition(power=2):
134 |     def polynomial_composition_function(x_values):
135 |         n, d = x_values.shape
136 |         x_values = np.add.reduceat(x_values, axis=1, indices=range(0,d,2)) #adds adjacent columns together
137 |         x_values = x_values**power
138 |         n, d = x_values.shape
139 |         x_values = np.add.reduceat(x_values, axis=1, indices=range(0,d,2)) #adds adjacent columns together
140 |         x_values = x_values**power
141 |         return np.sum(x_values,axis=1)
142 |     return polynomial_composition_function
143 | 
144 | def polynomial_to_power(power=2):
145 |     from scipy.misc import factorial
146 |     def polynomial_to_power_function(x_values):
147 |         return 1/factorial(power)*np.power(np.sum(x_values, axis=1),power)
148 |     return polynomial_to_power_function
149 | 
150 | def sin(omega=6):
151 |     def sin_function(x_values):
152 |         return np.sin(omega*x_values)
153 |     return sin_function
154 | 
155 | def polynomial(coefs=[1,1,1]):
156 |     def polynomial_function(x_values):
157 |         return np.polynomial.polynomial.polyval(x_values,coefs)
158 |     return polynomial_function
159 | 
160 | def sparse_trig():
161 |     def sparse_trig_function(x_values):
162 |         return 2*(2*np.cos(x_values)**2-1)**2-1
163 |     return sparse_trig_function
164 | 
165 | ### END COMMON FUNCTIONS ###
166 | 
167 | '''
168 | Takes the dataset and maps each column to be between 0 and 1
169 | '''
170 | def normalize(array):
171 |     if array.ndim>1:
172 |         return (array - array.min(axis=0)) / array.ptp(axis=0)
173 |     else:
174 |         return (array - array.min()) / array.ptp()        
175 | 
176 | '''
177 | Helper function to define a multi-layer perceptron.
178 | x: input tensorflow node
179 | num_nodes: array that contains the number of nodes in each hidden layer
180 | num_input: number of nodes in input layer
181 | num_output: number of nodes in output layer
182 | activation: the tensorflow activation function to user
183 | '''
184 | def multilayer_perceptron(x, num_nodes, num_input=1, num_output=1, activation=tf.nn.sigmoid, bias=True, initializer=tf.contrib.layers.xavier_initializer(), return_weight_tensors=False):
185 |     n_prev = num_input
186 |     out = x
187 |     num_layer = 0
188 |     weights = list()
189 |     
190 |     for n in num_nodes:
191 |         w = tf.get_variable("w"+str(num_layer),[n_prev, n], initializer=initializer)
192 |         weights.append(w)
193 |         if bias:
194 |             b = tf.get_variable("b"+str(num_layer),[n], initializer =initializer)
195 |             out = activation(tf.add(tf.matmul(out,w),b),name="out"+str(num_layer))
196 |         else:
197 |             out = activation(tf.matmul(out,w),name="out"+str(num_layer))
198 |             
199 |         n_prev = n
200 |         num_layer += 1
201 |         
202 |     w_out = tf.get_variable("w"+str(num_layer),[n, num_output], initializer =initializer)
203 |     weights.append(w_out)
204 |     
205 |     if bias:
206 |         b_out = tf.get_variable("b"+str(num_layer),[num_output], initializer =initializer)
207 |         out = tf.add(tf.matmul(out,w_out),b_out,name="out"+str(num_layer))
208 |     else:
209 |         out = tf.matmul(out,w_out,name="out"+str(num_layer))
210 |     
211 |     if return_weight_tensors:
212 |         return out, weights
213 |     return out
214 | 
215 | 
216 | # Modified MLP for use with experiment 2
217 | def recurrent_multilayer_perceptron(x, num_nodes, num_input=1, num_output=1, activation=tf.nn.sigmoid):
218 |     n_prev = num_input
219 |     
220 |     assert all(x == num_nodes[0] for x in num_nodes) #for a recurrent multilayer perceptron, the number of neurons in each hidden layer should be the same
221 |     
222 |     w_in = tf.get_variable("w_in",[n_prev, num_nodes[0]])
223 |     b_in = tf.get_variable("b_in",[num_nodes[0]])
224 |     
225 |     w = tf.get_variable("w_shared",[num_nodes[0], num_nodes[0]])
226 |     b = tf.get_variable("b_shared",[num_nodes[0]])
227 |     
228 |     for i in range(len(num_nodes)+1):
229 |         if i==0:
230 |             out = activation(tf.add(tf.matmul(x,w_in),b_in),name="out"+str(i))
231 |         else:
232 |             out = activation(tf.add(tf.matmul(out,w),b),name="out"+str(i))
233 |         
234 |     w_out = tf.get_variable("w_out",[num_nodes[0], num_output])
235 |     b_out = tf.get_variable("b_out",[num_output])
236 |     out = tf.add(tf.matmul(out,w_out),b_out,name="out_final")
237 |     
238 |     return out
239 | 
240 | '''
241 | A class to organize methods that generate datasets for some of the experiments
242 | '''
243 | class Dataset():
244 |     from sklearn.preprocessing import OneHotEncoder
245 |     from sklearn.model_selection import train_test_split
246 |     
247 |     @classmethod
248 |     def generate_moons(cls, n, d=2, test_size=0.2, one_hot=False, normalize_x=False, noise=0):
249 |         from sklearn.datasets import make_moons       
250 |         assert (d%2==0),"d should be even"
251 |         
252 |         X, y = make_moons(n, noise=noise)
253 |         
254 |         if normalize_x:
255 |             X = normalize(X)
256 |                 
257 |         if (one_hot):
258 |             y = y.reshape(-1,1)
259 |             enc = cls.OneHotEncoder(n_values=2,sparse=False)
260 |             y = enc.fit_transform(y)
261 |         
262 |         X_train, X_test, y_train, y_test = cls.train_test_split(X, y, test_size=test_size)
263 |         
264 |         return X_train, X_test, y_train, y_test
265 | 
266 |     
267 |     @classmethod
268 |     def generate_mixture_of_gaussians(cls, n, d, class_seps=[1], covariance_scale=1, test_size=0.2, one_hot=False, randomly_labeled=False, class_ratio=1, return_covariance=False, cov=None, resample=False, normalize_x=False):
269 |         
270 |         if len(class_seps)==d:
271 |             pass
272 |         elif len(class_seps)==1:
273 |             class_seps = np.repeat(class_seps,d)
274 |         else:
275 |             raise ValueError("class_seps must be an array of length 1 or length d")
276 |         
277 |         if cov is None:
278 |             c = covariance_scale*np.random.random((d,d))
279 |             cov = c.T.dot(c)
280 |         
281 |         assert class_ratio>=1, "parameter: class_ratio must be greater than or equal to 1"
282 |         n_pos = int(n/(class_ratio+1))
283 |         n_neg = int(n-n_pos)
284 |         X1 = np.random.multivariate_normal([0]*d, cov, size=n_pos)
285 |         X2 = np.random.multivariate_normal(class_seps, cov, size=n_neg)
286 |         if resample==True: #resamples the minority class
287 |             X1 = np.tile(X1, (class_ratio, 1))
288 |             n_pos = n_pos*class_ratio
289 |         X = np.concatenate([X1,X2])
290 |         
291 |         if normalize_x:
292 |             X = normalize(X)
293 |         
294 |         if randomly_labeled==True:
295 |             y = np.random.randint(0,2,(n_pos+n_neg))
296 |         else:
297 |             y = np.array([0]*n_pos + [1]*n_neg)
298 |         
299 |         if (one_hot):
300 |             y = y.reshape(-1,1)
301 |             enc = cls.OneHotEncoder(n_values=2,sparse=False)
302 |             y = enc.fit_transform(y)
303 |         
304 |         X_train, X_test, y_train, y_test = cls.train_test_split(X, y, test_size=test_size)
305 |         
306 |         if return_covariance:
307 |             return X_train, X_test, y_train, y_test, cov
308 |         return X_train, X_test, y_train, y_test
309 |     
310 |     def generate_MNIST(n_train, n_test, subset=range(10)):
311 |         from tensorflow.examples.tutorials.mnist import input_data
312 |         mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
313 |         y_train = mnist.train.labels
314 |         
315 | def pretty_plotting_styles():
316 |     plt.rc("font",family="sans-serif",size=20)
317 |     plt.rcParams["font.sans-serif"] = "Arial"
318 | 
319 | 
320 | 
321 |     
322 | '''
323 | Returns an RNN with the following parameters:
324 |     window_size: the number of previous time_steps to use to make the prediction
325 |     dim: dimensionality of the input data
326 |     units: the number of hidden units in the LSTM
327 | '''
328 | def RNN(window_size=5, dim=1, units=32):
329 |     import keras
330 |     from keras.models import Model
331 |     from keras.layers import Dense, Input, LSTM
332 |         
333 |     x = Input(shape=(window_size, dim))
334 |     z, sh, sc = LSTM(units=units, return_state=True)(x)
335 |     z = Dense(1, activation='tanh')(z)
336 |     model = Model(inputs=[x],outputs=[z])
337 |     model.compile(loss='mse', optimizer='adam')
338 |     return model
339 | 
340 | 
341 | 
342 | '''
343 | Converts a time-series into a form that can be used to train and validate an RNN
344 | '''
345 | def create_windowed_dataset(time_series, window_size=5, frac_train=0.8):
346 |     time_series = normalize(time_series)
347 |     X_train, y_train, X_test, y_test = [], [], [], []
348 |     n = len(time_series)-window_size-1
349 |     n_train = int(n*frac_train)
350 |     for i in range(n):
351 |         a = time_series[i:(i+window_size)]
352 |         if a.ndim==1:
353 |             a = a.reshape(-1, 1)
354 |         if i < n_train: 
355 |             X_train.append(a)
356 |             y_train.append(time_series[i+window_size])
357 |         else:
358 |             X_test.append(a)
359 |             y_test.append(time_series[i+window_size])
360 |     return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
361 | 
362 | def mse(y, y_):
363 |     y = y.flatten()
364 |     y_ = y_.flatten()
365 |     assert len(y)==len(y_), "arrays must be of the same length"
366 |     return np.round(np.sqrt(np.mean(np.square(y-y_))),2)
367 | 
368 | '''
369 | Helper method to train and graph the results of RNN prediction
370 | '''
371 | def train_and_plot(time_series, window_sizes=None, hidden_units=None,epochs=20, figsize=None):
372 |     plt.rc("font",family="sans-serif",size=14)
373 |     
374 |     if not(figsize is None):
375 |         plt.figure(figsize=figsize)
376 |     if hidden_units is None:
377 |         if figsize is None:
378 |             plt.figure(figsize=[4*len(window_sizes),4])
379 |         for w, window_size in enumerate(window_sizes):
380 |             plt.subplot(1, len(window_sizes), w+1)
381 |             X_train, y_train, X_test, y_test = create_windowed_dataset(time_series, window_size=window_size)
382 |             rnn = RNN(window_size=window_size)
383 |             rnn.fit(X_train, y_train, epochs=epochs, verbose=0)
384 |             y_ = rnn.predict(X_test)
385 |             plt.plot(y_test)
386 |             plt.plot(y_,marker='.')
387 |             plt.title('Window size: '+str(window_size)+', RMSE: ' + str(mse(y_, y_test)))
388 |     elif window_sizes is None:
389 |         if figsize is None:
390 |             plt.figure(figsize=[4*len(hidden_units),4])
391 |         for h, hidden_unit in enumerate(hidden_units):
392 |             plt.subplot(1, len(hidden_units), h+1)
393 |             X_train, y_train, X_test, y_test = create_windowed_dataset(time_series)
394 |             rnn = RNN(units=hidden_unit)
395 |             rnn.fit(X_train, y_train, epochs=epochs, verbose=0)
396 |             y_ = rnn.predict(X_test)
397 |             plt.plot(y_test)
398 |             plt.plot(y_,marker='.')
399 |             plt.title('# Hidden Units: '+str(hidden_unit)+', RMSE: ' + str(mse(y_, y_test)))            
400 |     else:
401 |         if figsize is None:
402 |             plt.figure(figsize=[4*len(window_sizes), 4*len(hidden_units)])
403 |         count = 0 
404 |         for w, window_size in enumerate(window_sizes):
405 |             for h, hidden_unit in enumerate(hidden_units):
406 |                 count += 1
407 |                 plt.subplot(len(window_sizes), len(hidden_units), count)
408 |                 X_train, y_train, X_test, y_test = create_windowed_dataset(time_series, window_size=window_size)
409 |                 rnn = RNN(units=hidden_unit, window_size=window_size)
410 |                 rnn.fit(X_train, y_train, epochs=epochs, verbose=0)
411 |                 y_ = rnn.predict(X_test)
412 |                 plt.plot(y_test)
413 |                 plt.plot(y_,marker='.')
414 |                 plt.title('Window: '+str(window_size)+', Hidden: '+str(hidden_unit)+', RMSE: ' + str(mse(y_, y_test)))
415 |     plt.legend(['Real','Predicted'])
416 |     
417 | def plot_decision_boundary(X, y, grid_pred):
418 |     xx, yy = np.meshgrid(np.arange(0, 1.02, 0.02), np.arange(0, 1.02, 0.02))
419 |     grid_points = np.c_[xx.ravel(), yy.ravel()]
420 |     plt.scatter(*X.T, marker='.', c=np.argmax(y, axis=1), alpha=1, cmap='RdBu')
421 |     zz = grid_pred[:,1].reshape(xx.shape)
422 |     plt.contourf(xx, yy, zz, cmap='RdBu', alpha=.2)
423 |     plt.xlim([0, 1]); plt.ylim([0,1])
424 |     plt.xlabel('Feature 1')
425 |     plt.ylabel('Feature 2')
426 |     
427 |     
428 |     


--------------------------------------------------------------------------------
/__pycache__/experiments.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abidlabs/AtomsOfDeepLearning/bef9b47d26592e51753eece6ae485785d92566e4/__pycache__/experiments.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/uci_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abidlabs/AtomsOfDeepLearning/bef9b47d26592e51753eece6ae485785d92566e4/__pycache__/uci_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abidlabs/AtomsOfDeepLearning/bef9b47d26592e51753eece6ae485785d92566e4/__pycache__/utils.cpython-35.pyc


--------------------------------------------------------------------------------
/experiments.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import time    
  4 | import warnings
  5 | from utils import *
  6 | from sklearn.neural_network import MLPClassifier
  7 | from sklearn.decomposition import PCA
  8 | 
  9 | '''
 10 |  All of the Experiments, starting from a base class
 11 | '''
 12 | class Experiment():
 13 |     def __init__(self):
 14 |         pass
 15 |     
 16 |     #ideally, this would make experiments completely reproducible, but because jobs are distributed over multiple cores, small differences may persist in practice
 17 |     def initialize(self, seed=0, fix_seed=True):
 18 |         if fix_seed:
 19 |             np.random.seed(seed)
 20 |             tf.set_random_seed(seed)
 21 |         self.timer = Timer()
 22 |         self.timer.start()
 23 | 
 24 |     def conclude(self):
 25 |         self.timer.end_and_print()
 26 | '''
 27 | Experiment 1: Why do we use neural networks?
 28 | Description: Performs regression using a neural network with 1 hidden layer and different number of units. Returns the original x-values, true y-values, and predicted y-values, along with the MSE loss.
 29 | '''
 30 | class Experiment1(Experiment):
 31 |     def __init__(self):
 32 |         pass
 33 |         
 34 |     def run(self,
 35 |             n_hidden = 2,
 36 |             learning_rate = 0.003,
 37 |             num_steps = 10000,
 38 |             num_values = 100,
 39 |             function = sin(omega=6),
 40 |             verbose=True):
 41 |         
 42 |         
 43 |         x_values = np.linspace(-1,1, num_values).reshape(-1,1)
 44 |         y_values = function(x_values).reshape(-1,1)
 45 | 
 46 |         tf.reset_default_graph()
 47 |         x = tf.placeholder(dtype="float", shape=[None,1])
 48 |         y = tf.placeholder(dtype="float", shape=[None,1])
 49 |         y_ = multilayer_perceptron(x, num_nodes=[n_hidden])
 50 | 
 51 |         loss_op = tf.reduce_mean(tf.square(y_ - y))
 52 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
 53 |         train_op = optimizer.minimize(loss_op)
 54 |         init_op = tf.global_variables_initializer()
 55 | 
 56 |         with tf.Session() as sess:
 57 |             sess.run(init_op)
 58 |             y_preds = list()
 59 |             for step in range(num_steps):
 60 |                 _, loss, y_pred = sess.run([train_op, loss_op, y_], feed_dict={x:x_values,y:y_values})
 61 |                 if (step%(num_steps/10)==0 and verbose):
 62 |                     print(loss)
 63 |                     y_preds.append(y_pred.squeeze())
 64 | 
 65 |         return x_values.squeeze(), y_values.squeeze(), y_pred.squeeze(), loss
 66 | 
 67 | '''
 68 | Experiment 2: Why are Deeper Networks Better?
 69 | '''
 70 | class Experiment2(Experiment):
 71 |     def __init__(self):
 72 |         pass
 73 |         
 74 |     def run(self,
 75 |             n=16, 
 76 |             n_hidden=[10],
 77 |             num_steps=15000,
 78 |             learning_rate = 0.003,
 79 |             verbose=False,
 80 |             recurrent=True):
 81 |         
 82 |         
 83 |         x_values = np.linspace(0,1-1/n,n).reshape(-1,1)
 84 |         y_values = np.resize([[0,1],[1,0]], (n,2))
 85 |         
 86 |         tf.reset_default_graph()
 87 |         x = tf.placeholder(dtype="float", shape=[None,1])
 88 |         y = tf.placeholder(dtype="float", shape=[None,2])
 89 | 
 90 |         if recurrent:
 91 |             y_ = recurrent_multilayer_perceptron(x, num_input=1, num_output=2, num_nodes=n_hidden,activation=tf.nn.relu)
 92 |         else:
 93 |             y_ = multilayer_perceptron(x, num_input=1, num_output=2, num_nodes=n_hidden,bias=bias,activation=tf.nn.relu)
 94 | 
 95 |         
 96 |         n_params = np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.global_variables()])        
 97 |         #show_graph(tf.get_default_graph().as_graph_def())
 98 |         loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
 99 |         correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
100 |         accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
101 |         
102 |         #loss_op = tf.reduce_mean(tf.square(y_ - y))
103 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
104 |         train_op = optimizer.minimize(loss_op)
105 |         init_op = tf.global_variables_initializer()
106 | 
107 |         with tf.Session() as sess:
108 |             sess.run(init_op)
109 |             for step in range(num_steps):
110 |                 x_batch, y_batch = random_batch(x_values, y_values)
111 |                 _, loss, y_pred = sess.run([train_op, loss_op, y_], feed_dict={x:x_batch,y:y_batch})
112 |                 if (step%(num_steps/10)==0 and verbose):
113 |                     print(loss)
114 |         
115 |             accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:x_values,y:y_values})
116 | 
117 |         return x_values.squeeze(), y_values.squeeze(), y_pred.squeeze(), loss, accuracy, n_params
118 | 
119 |     
120 | '''
121 | Experiment 3: Does More Data Favor Deeper Neural Networks?
122 | '''
123 | class Experiment3(Experiment):
124 |     def __init__(self):
125 |         pass
126 |         
127 |     def run(self,
128 |             classifiers, 
129 |             d = 12,
130 |             class_seps = [1],
131 |             ns = np.logspace(2,4,10),
132 |             iters = 3,
133 |             covariance_scale = 1,
134 |             test_size = 0.2,
135 |             accuracy_on = 'test',
136 |             recurrent=True):        
137 |         
138 |         acc = np.zeros((len(ns),len(classifiers),iters))
139 |         n_max = int(np.max(ns))
140 | 
141 |         for k in range(iters):
142 |         
143 |             X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n_max, 
144 |                                                                                      d=d,
145 |                                                                                      class_seps=class_seps, 
146 |                                                                                      covariance_scale=covariance_scale, 
147 |                                                                                      test_size=test_size)
148 |             for i, n in enumerate(ns):
149 |                 for j, clf in enumerate(classifiers):
150 |                     with warnings.catch_warnings():
151 |                         warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge                     
152 |                         n_train = int(n*(1-test_size))
153 |                         clf.fit(X_train[:n_train],y_train[:n_train]) #choose a subset of the training data
154 |                         if accuracy_on=='train':
155 |                             acc[i,j,k] = clf.score(X_train[:int(n*(1-test_size))],y_train[:int(n*(1-test_size))])
156 |                         elif accuracy_on=='test':
157 |                             acc[i,j,k] = clf.score(X_test,y_test)
158 |                         else:
159 |                             raise ValueError("accuracy_on must be 'test' or 'train'") 
160 | 
161 |         return acc
162 | 
163 | 
164 | '''
165 | Experiment 4: Does Unbalanced Data Hurt Neural Networks?
166 | '''
167 | class Experiment4(Experiment):
168 |    
169 |     def __init__(self):
170 |         pass
171 |         
172 |     def run(self,
173 |             d = 12,
174 |             iters = 3,
175 |             covariance_scale = 1,
176 |             test_size = 0.2,
177 |             resample=False,
178 |             n = 1200,
179 |             num_steps=500,
180 |             learning_rate = 0.003,
181 |             verbose=False,
182 |             load_covs = None,
183 |             classify_grid = False,
184 |             hidden_layer_sizes=(100,100),
185 |             ratios = [1]):
186 |         
187 |         acc_matrix = np.zeros((len(ratios),iters))
188 |         class_seps = [1/(i+1) for i in range(d)]
189 |         clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes)
190 |         saved_covs = []
191 |         
192 |         counter = 0
193 |         for k in range(iters):
194 |             for r, ratio in enumerate(ratios):
195 |                 # load covariance matrices for reproducibility
196 |                 if load_covs is None:
197 |                     cov = None
198 |                 else:
199 |                     cov = load_covs[counter]
200 |                 counter += 1
201 |                     
202 |                     
203 |                 X_train, _, y_train, _, cov = Dataset.generate_mixture_of_gaussians(n=n, 
204 |                                                                              d=d,
205 |                                                                              normalize_x=True,
206 |                                                                              one_hot=True,
207 |                                                                              class_seps=class_seps, 
208 |                                                                              covariance_scale=covariance_scale, 
209 |                                                                              test_size=0,
210 |                                                                              cov = cov,
211 |                                                                              class_ratio=ratio,
212 |                                                                              resample=resample,
213 |                                                                              return_covariance=True)
214 |                 saved_covs.append(cov)
215 |                 X_test, _, y_test, _ = Dataset.generate_mixture_of_gaussians(n=int(n/4), 
216 |                                                                              d=d,
217 |                                                                              normalize_x = True,
218 |                                                                              one_hot=True,
219 |                                                                              class_seps=class_seps, 
220 |                                                                              covariance_scale=covariance_scale, 
221 |                                                                              test_size=0,
222 |                                                                              cov=cov)
223 |                 
224 | 
225 |                 
226 |                 
227 |                 tf.reset_default_graph()
228 |                 x = tf.placeholder(dtype="float", shape=[None,d])
229 |                 y = tf.placeholder(dtype="float", shape=[None,2])
230 |                 y_ = multilayer_perceptron(x, num_input=d, num_output=2, num_nodes=hidden_layer_sizes)
231 | 
232 |                 cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_))
233 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
234 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
235 |                 
236 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
237 |                 train_op = optimizer.minimize(cross_entropy)
238 |                 init_op = tf.global_variables_initializer()
239 | 
240 |                 with tf.Session() as sess:
241 |                     sess.run(init_op)
242 |                     y_preds = list()
243 |                     for step in range(num_steps):
244 |                         _, loss, acc, cp = sess.run([train_op, cross_entropy, accuracy_op, correct_prediction], feed_dict={x:X_train,y:y_train})
245 |                         if (step%(num_steps/10)==0 and verbose):
246 |                             print(loss, acc, cp)               
247 |                 
248 |                     accuracy, y_pred = sess.run([accuracy_op, y_], feed_dict={x:X_test,y:y_test})
249 |                     acc_matrix[r,k] = accuracy
250 |                 
251 |                     if classify_grid:
252 |                         xx, yy = np.meshgrid(np.arange(0, 1.02, 0.02), np.arange(0, 1.02, 0.02))
253 |                         grid_points = np.c_[xx.ravel(), yy.ravel()]
254 |                         grid_preds = sess.run(y_, feed_dict={x:grid_points})
255 |             
256 |         if classify_grid:
257 |             return acc_matrix, saved_covs, X_train, y_train, X_test, y_test, y_pred, grid_preds
258 |         return acc_matrix, saved_covs
259 | 
260 | 
261 | '''
262 | 5. Are Neural Networks Memorizing Or Generalizing During Training?
263 | '''
264 | class Experiment5(Experiment):
265 |    
266 |     def __init__(self):
267 |         pass
268 |         
269 |     def run(self,
270 |             d = 6,
271 |             iters = 3,
272 |             covariance_scale = 1,
273 |             test_size = 0.2,
274 |             class_seps = [1 for i in range(6)],
275 |             ns = [500],
276 |             return_accuracy_per_epoch=False,
277 |             randomize=False,
278 |             verbose=False,
279 |             learning_rate = 0.003,
280 |             num_steps=2500,
281 |             hidden_layer_sizes=(100,100)):
282 |         
283 |         if return_accuracy_per_epoch:
284 |             acc = np.zeros((10, len(ns),iters))
285 |         else:
286 |             acc = np.zeros((len(ns),iters))      
287 |         n_max = np.max(ns)
288 |         
289 |         for k in range(iters):
290 |             X_train_, _, y_train_, _ = Dataset.generate_mixture_of_gaussians(n=n_max, 
291 |                                                              d=d,
292 |                                                              class_seps=class_seps, 
293 |                                                              covariance_scale=covariance_scale, 
294 |                                                              one_hot=True,
295 |                                                              test_size=0)
296 |             if randomize:
297 |                 y_train_ = np.random.permutation(y_train_)
298 |                 
299 |             for n_i, n in enumerate(ns):
300 |                 step_multiple = 0
301 |                 tf.reset_default_graph()
302 |                 X_train = X_train_[:n]; y_train = y_train_[:n] 
303 |                 x = tf.placeholder(dtype="float", shape=[None,d])
304 |                 y = tf.placeholder(dtype="float", shape=[None,2])
305 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2)
306 | 
307 |                 loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
308 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
309 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
310 | 
311 |                 #loss_op = tf.reduce_mean(tf.square(y_ - y))
312 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
313 |                 train_op = optimizer.minimize(loss_op)
314 |                 init_op = tf.global_variables_initializer()
315 | 
316 |                 with tf.Session() as sess:
317 |                     sess.run(init_op)
318 |                     for step in range(num_steps):
319 |                         x_batch, y_batch = random_batch(X_train, y_train)
320 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
321 |                         if (step%(num_steps/10)==0 and verbose):
322 |                             print(accuracy)
323 |                         if (step%(num_steps/10)==0 and return_accuracy_per_epoch):
324 |                             accuracy = sess.run(accuracy_op, feed_dict={x:X_train,y:y_train})
325 |                             acc[step_multiple, n_i, k] = accuracy 
326 |                             step_multiple += 1
327 | 
328 |                     accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_train,y:y_train})
329 |                 
330 |                 if not(return_accuracy_per_epoch):
331 |                     acc[n_i,k] = accuracy
332 |         
333 |         return acc
334 |     
335 |     
336 | '''
337 | ## 6. Does Unsupervised Feature Reduction Help or Hurt?
338 | '''
339 | class Experiment6(Experiment):
340 |    
341 |     def __init__(self):
342 |         pass
343 |         
344 |     def run(self,
345 |             d = 10,
346 |             iters = 3,
347 |             covariance_scale = 0.2,
348 |             test_size = 0.2,
349 |             n = 100,
350 |             dummy_dims = [0],
351 |             pca_dims = [None],
352 |             verbose=False,
353 |             noise_level = 0,
354 |             learning_rate = 0.003,
355 |             num_steps=500,
356 |             hidden_layer_sizes=(100,100)):
357 |         
358 |         from scipy.stats import special_ortho_group
359 |         
360 |         class_seps = [1 for i in range(d)]
361 |         acc = np.zeros((iters, len(dummy_dims),len(pca_dims)))      
362 |         
363 |         for k in range(iters):            
364 |             X_train_, X_test_, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
365 |                                                              d=d,
366 |                                                              class_seps=class_seps, 
367 |                                                              covariance_scale=covariance_scale, 
368 |                                                              one_hot=True,
369 |                                                              test_size=test_size)
370 |             
371 |             for d_i, dummy_dim in enumerate(dummy_dims):
372 |                 X_train = np.concatenate((X_train_, noise_level*np.random.random(size=(X_train_.shape[0], dummy_dim))),axis=1);
373 |                 X_test = np.concatenate((X_test_, noise_level*np.random.random(size=(X_test_.shape[0], dummy_dim))),axis=1);
374 |                 
375 |                 rotation_matrix = np.random.random(size=(d+dummy_dim,d+dummy_dim))
376 |                 X_train = X_train.dot(rotation_matrix)
377 |                 X_test = X_test.dot(rotation_matrix)
378 | 
379 |                 
380 |                 for p_i, pca_dim in enumerate(pca_dims):
381 |                     pca = PCA(n_components = pca_dim)
382 |                     if not(pca_dim is None):
383 |                         X_train = pca.fit_transform(X_train)
384 |                         X_test = pca.transform(X_test)
385 |                     if pca_dim is None:
386 |                         pca_dim = d+dummy_dim
387 |                     
388 |                     tf.reset_default_graph()
389 |                     x = tf.placeholder(dtype="float", shape=[None,pca_dim])
390 |                     y = tf.placeholder(dtype="float", shape=[None,2])
391 |                     y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=pca_dim, num_output=2)
392 | 
393 |                     loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
394 |                     correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
395 |                     accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
396 | 
397 |                     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
398 |                     train_op = optimizer.minimize(loss_op)
399 |                     init_op = tf.global_variables_initializer()
400 | 
401 |                     with tf.Session() as sess:
402 |                         sess.run(init_op)
403 |                         for step in range(num_steps):
404 |                             _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:X_train,y:y_train})
405 |                             if (step%(num_steps/10)==0 and verbose):
406 |                                 print(accuracy)
407 | 
408 |                         accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
409 |                     acc[k,d_i,p_i] = accuracy
410 |         return acc
411 | 
412 |     
413 | '''
414 | 7. Can Any Non-linearity Be Used As the Activation Function?
415 | '''
416 | class Experiment7(Experiment):
417 |    
418 |     def __init__(self):
419 |         pass
420 |         
421 |     def run(self,
422 |             iters = 1,
423 |             d = 2,
424 |             test_size = 0.2,
425 |             n = 500,
426 |             noise = 0.1,
427 |             verbose=False,
428 |             activations = [tf.nn.sigmoid, tf.square],
429 |             learning_rate = 0.003,
430 |             num_steps=800,
431 |             hidden_layer_sizes=(30,30)):
432 |         
433 |         
434 |         acc = np.zeros((iters, 10, len(activations)))      
435 |         n_max = n
436 |         
437 |         for k in range(iters):                            
438 |             X_train, X_test, y_train, y_test = Dataset.generate_moons(n=n_max, 
439 |                                                                         test_size=0.2, 
440 |                                                                         one_hot=True, 
441 |                                                                         noise=noise)
442 |             
443 |             for a_i, a in enumerate(activations):
444 |                 step_counter = 0
445 |                 tf.reset_default_graph()
446 |                 x = tf.placeholder(dtype="float", shape=[None,d])
447 |                 y = tf.placeholder(dtype="float", shape=[None,2])
448 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, activation=a)
449 | 
450 |                 loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
451 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
452 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
453 | 
454 |                 #loss_op = tf.reduce_mean(tf.square(y_ - y))
455 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
456 |                 train_op = optimizer.minimize(loss_op)
457 |                 init_op = tf.global_variables_initializer()
458 | 
459 |                 with tf.Session() as sess:
460 |                     sess.run(init_op)
461 |                     for step in range(num_steps):
462 |                         x_batch, y_batch = random_batch(X_train, y_train)
463 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
464 |                         if (step%(num_steps/10)==0):
465 |                             accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
466 |                             acc[k, step_counter, a_i] = accuracy
467 |                             step_counter += 1
468 |                             if verbose:
469 |                                 print(accuracy)
470 | 
471 | 
472 |         
473 |         return acc
474 | 
475 | '''
476 | 8. How Does Batch Size Affect the Results?
477 | '''    
478 | class Experiment8(Experiment):
479 |    
480 |     def __init__(self):
481 |         pass
482 |         
483 |     def run(self,
484 |             d = 12,
485 |             iters = 3,
486 |             covariance_scale = 1,
487 |             test_size = 0.2,
488 |             n = 500,
489 |             batch_sizes = [32],
490 |             return_accuracy_per_epoch=False,
491 |             verbose=False,
492 |             learning_rate = 0.003,
493 |             num_epochs=150,
494 |             store_every=10,
495 |             hidden_layer_sizes=(100,100)):
496 |         
497 |         class_seps = [1 for i in range(12)]
498 |         timer = Timer()
499 |         if return_accuracy_per_epoch:
500 |             acc = np.zeros((int(num_epochs/store_every)-1, len(batch_sizes),iters))
501 |         else:
502 |             acc = np.zeros((len(batch_sizes),iters))      
503 |         
504 |         runtimes = np.zeros((len(batch_sizes))) 
505 |         for k in range(iters):
506 |             X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
507 |                                                              d=d,
508 |                                                              class_seps=class_seps, 
509 |                                                              covariance_scale=covariance_scale, 
510 |                                                              one_hot=True,
511 |                                                              test_size=test_size)
512 |             for b_i, batch_size in enumerate(batch_sizes):
513 |                 timer.start()
514 |                 step_multiple = 0
515 |                 tf.reset_default_graph()
516 |                 x = tf.placeholder(dtype="float", shape=[None,d])
517 |                 y = tf.placeholder(dtype="float", shape=[None,2])
518 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2)
519 | 
520 |                 loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
521 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
522 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
523 | 
524 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
525 |                 train_op = optimizer.minimize(loss_op)
526 |                 init_op = tf.global_variables_initializer()
527 | 
528 |                 with tf.Session() as sess:
529 |                     sess.run(init_op)
530 |                     num_steps = int(num_epochs*n/batch_size)
531 |                     store_acc_threshold = num_steps/num_epochs*store_every
532 |                     for step in range(num_steps):
533 |                         x_batch, y_batch = random_batch(X_train, y_train, size=batch_size)
534 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
535 |                         if (step%(num_steps/num_epochs)==0 and verbose):
536 |                             print(accuracy)
537 |                         if (step>store_acc_threshold and return_accuracy_per_epoch):
538 |                             accuracy = sess.run(accuracy_op, feed_dict={x:X_train,y:y_train})
539 |                             acc[step_multiple, b_i, k] = accuracy 
540 |                             step_multiple += 1
541 |                             store_acc_threshold += num_steps/num_epochs*store_every
542 | 
543 |                     accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
544 |                 
545 |                 if not(return_accuracy_per_epoch):
546 |                     acc[b_i,k] = accuracy # otherwise, this is stored earlier
547 |                 runtimes[b_i] = timer.end()
548 |                 
549 |         return runtimes, acc
550 | 
551 | 
552 |     
553 | '''
554 | 9. How Does the Loss Function Matter?
555 | '''
556 | class Experiment9(Experiment):
557 |    
558 |     def __init__(self):
559 |         pass
560 |         
561 |     def run(self,
562 |             d = 12,
563 |             iters = 1,
564 |             covariance_scale = 1,
565 |             test_size = 0.2,
566 |             n = 500,
567 |             randomize=False,
568 |             verbose=False,
569 |             loss_functions = ['cross_entropy', 'mean_squared_error'],
570 |             learning_rate = 0.003,
571 |             num_steps=500,
572 |             hidden_layer_sizes=(100,100)):
573 |         
574 |         class_seps = [1/(i+1) for i in range(d)]
575 |         acc = np.zeros((iters, 10, len(loss_functions)))      
576 |         n_max = n
577 |         LOSS_FUNCTIONS = ['cross_entropy',
578 |                   'mean_abs_error',
579 |                   'mean_squared_error',
580 |                   'mean_fourth_pow_error', 
581 |                   'hinge_loss', 
582 |                   'constant']
583 |         
584 |         for k in range(iters):
585 |             X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n_max, 
586 |                                                              d=d,
587 |                                                              class_seps=class_seps, 
588 |                                                              covariance_scale=covariance_scale, 
589 |                                                              one_hot=True)
590 |             if randomize:
591 |                 y_train_ = np.random.permutation(y_train_)
592 |                     
593 | 
594 |             for l_i, l in enumerate(loss_functions):
595 |                 step_counter = 0
596 |                 if not(l in LOSS_FUNCTIONS):
597 |                     raise ValueError("Valid loss functions are " + str(LOSS_FUNCTIONS))
598 | 
599 |                 tf.reset_default_graph()
600 |                 x = tf.placeholder(dtype="float", shape=[None,d])
601 |                 y = tf.placeholder(dtype="float", shape=[None,2])
602 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2)
603 | 
604 |                 if l=='cross_entropy':
605 |                     loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_))
606 |                 elif l=='mean_squared_error':
607 |                     loss_op = tf.reduce_mean(tf.square(y_ - y))
608 |                 elif l=='mean_abs_error':
609 |                     loss_op = tf.reduce_mean(tf.abs(y_ - y))
610 |                 elif l=='hinge_loss':
611 |                     loss_op = tf.losses.hinge_loss(labels=y, logits=y_)
612 |                 elif l=='mean_fourth_pow_error':
613 |                     loss_op = tf.reduce_mean(tf.pow(y_ - y, 4))
614 |                 elif l=='constant':
615 |                     loss_op = 0*tf.reduce_mean(tf.square(y_ - y))
616 | 
617 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
618 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
619 | 
620 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
621 |                 train_op = optimizer.minimize(loss_op)
622 |                 init_op = tf.global_variables_initializer()
623 | 
624 |                 with tf.Session() as sess:
625 |                     sess.run(init_op)
626 |                     for step in range(num_steps):
627 |                         x_batch, y_batch = random_batch(X_train, y_train)
628 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
629 |                         if (step%(num_steps/10)==0):
630 |                             accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
631 |                             acc[k, step_counter, l_i] = accuracy
632 |                             step_counter += 1
633 |                             if verbose:
634 |                                 print(accuracy)
635 |         
636 |         return acc
637 | 
638 | '''
639 | 10. How Does the Initialization Affect Performance?
640 | '''
641 | class Experiment10(Experiment):
642 |    
643 |     def __init__(self):
644 |         pass
645 |         
646 |     def run(self,
647 |             d = 12,
648 |             iters = 1,
649 |             covariance_scale = 1,
650 |             test_size = 0.2,
651 |             n = 500,
652 |             randomize=False,
653 |             verbose=False,
654 |             initializers = [tf.contrib.layers.xavier_initializer()],
655 |             learning_rate = 0.003,
656 |             num_steps=500,
657 |             hidden_layer_sizes=(100,100)):
658 |         
659 |         class_seps = [1/(i+1) for i in range(d)]
660 |         acc = np.zeros((iters, 10, len(initializers)))
661 |         
662 |         for k in range(iters):
663 |             X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
664 |                                                              d=d,
665 |                                                              class_seps=class_seps, 
666 |                                                              covariance_scale=covariance_scale, 
667 |                                                              one_hot=True)
668 |                     
669 |             for i_i, initializer in enumerate(initializers):    
670 |                 step_counter = 0
671 |                 tf.reset_default_graph()
672 |                 x = tf.placeholder(dtype="float", shape=[None,d])
673 |                 y = tf.placeholder(dtype="float", shape=[None,2])
674 |                 y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, initializer=initializer)
675 | 
676 |                 loss_op = tf.reduce_mean(tf.square(y_ - y))                       
677 |                 correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
678 |                 accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
679 | 
680 |                 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
681 |                 train_op = optimizer.minimize(loss_op)
682 |                 init_op = tf.global_variables_initializer()
683 | 
684 |                 with tf.Session() as sess:
685 |                     sess.run(init_op)
686 |                     for step in range(num_steps):
687 |                         x_batch, y_batch = random_batch(X_train, y_train)
688 |                         _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
689 |                         if (step%(num_steps/10)==0):
690 |                             accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
691 |                             acc[k, step_counter, i_i] = accuracy
692 |                             step_counter += 1
693 |                             if verbose:
694 |                                 print(accuracy)
695 | 
696 |                     accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test})
697 | 
698 |         
699 |         return acc
700 | 
701 | '''
702 | 11. Do Weights in Different Layers Evolve At Different Speeds?
703 | '''
704 | class Experiment11(Experiment):
705 |    
706 |     def __init__(self):
707 |         pass
708 |         
709 |     def run(self,
710 |             d = 12,
711 |             covariance_scale = 1,
712 |             test_size = 0.2,
713 |             n = 500,
714 |             store_every=2,
715 |             randomize=False,
716 |             verbose=False,
717 |             learning_rate = 0.003,
718 |             num_steps=500,):
719 |         
720 |         class_seps = [1/(i+1) for i in range(d)]
721 |         hidden_layer_sizes=(50,50,50)
722 |         weights = []
723 |         accs = []
724 |         
725 |         X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
726 |                                                          d=d,
727 |                                                          class_seps=class_seps, 
728 |                                                          covariance_scale=covariance_scale, 
729 |                                                          one_hot=True)
730 | 
731 |         step_counter = 0
732 | 
733 |         tf.reset_default_graph()
734 |         x = tf.placeholder(dtype="float", shape=[None,d])
735 |         y = tf.placeholder(dtype="float", shape=[None,2])
736 |         y_, wts = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, return_weight_tensors=True)
737 | 
738 |         loss_op = tf.reduce_mean(tf.square(y_ - y))                       
739 |         correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
740 |         accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
741 | 
742 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
743 |         train_op = optimizer.minimize(loss_op)
744 |         init_op = tf.global_variables_initializer()
745 | 
746 |         with tf.Session() as sess:
747 |             sess.run(init_op)
748 |             for step in range(num_steps):
749 |                 x_batch, y_batch = random_batch(X_train, y_train)
750 |                 _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
751 |                 if (step%2==0):                        
752 |                     accuracy, w0, w1, w2, w3 = sess.run([accuracy_op, wts[0], wts[1], wts[2], wts[3]], feed_dict={x:X_test,y:y_test})
753 |                     weights.append([w0, w1, w2, w3])
754 |                     accs.append(accuracy)
755 | 
756 |                     if verbose:
757 |                         print(accuracy)
758 | 
759 |         return weights, accs
760 |     
761 | '''
762 | 12. How Does Regularization Affect Weight Evolution?
763 | '''
764 | class Experiment12(Experiment):
765 |    
766 |     def __init__(self):
767 |         pass
768 |         
769 |     def run(self,
770 |             d = 12,
771 |             covariance_scale = 1,
772 |             test_size = 0.2,
773 |             n = 500,
774 |             regularization_type = 'L2',
775 |             regularization_strength = 0,
776 |             store_every=2,
777 |             randomize=False,
778 |             verbose=False,
779 |             learning_rate = 0.003,
780 |             num_steps=500,):
781 |         
782 |         class_seps = [1/(i+1) for i in range(d)]
783 |         hidden_layer_sizes=(50,50)
784 |         weights = []
785 |         accs = []
786 |         
787 |         X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 
788 |                                                          d=d,
789 |                                                          class_seps=class_seps, 
790 |                                                          covariance_scale=covariance_scale, 
791 |                                                          one_hot=True)
792 | 
793 |         step_counter = 0
794 | 
795 |         tf.reset_default_graph()
796 |         x = tf.placeholder(dtype="float", shape=[None,d])
797 |         y = tf.placeholder(dtype="float", shape=[None,2])
798 |         y_, wts = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, return_weight_tensors=True)
799 |         
800 |         if regularization_type=='L2':
801 |             loss_op = tf.reduce_mean(tf.square(y_ - y)) + regularization_strength*(tf.reduce_mean(tf.square(wts[0])) + tf.reduce_mean(tf.square(wts[1])) + tf.reduce_mean(tf.square(wts[2])))
802 |         elif regularization_type=='L1':
803 |             loss_op = tf.reduce_mean(tf.square(y_ - y)) + regularization_strength*(tf.reduce_mean(tf.abs(wts[0])) + tf.reduce_mean(tf.abs(wts[1])) + tf.reduce_mean(tf.abs(wts[2])))
804 |         else:
805 |             raise ValueError("regularization_type must be 'L1' or 'L2'")
806 |             
807 |         correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
808 |         accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
809 | 
810 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
811 |         train_op = optimizer.minimize(loss_op)
812 |         init_op = tf.global_variables_initializer()
813 | 
814 |         with tf.Session() as sess:
815 |             sess.run(init_op)
816 |             for step in range(num_steps):
817 |                 x_batch, y_batch = random_batch(X_train, y_train)
818 |                 _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch})
819 |                 if (step%2==0):                        
820 |                     accuracy, w0, w1, w2 = sess.run([accuracy_op, wts[0], wts[1], wts[2]], feed_dict={x:X_test,y:y_test})
821 |                     weights.append([w0, w1, w2])
822 |                     accs.append(accuracy)
823 | 
824 |                     if verbose:
825 |                         print(accuracy)
826 | 
827 |         return weights, accs
828 | 


--------------------------------------------------------------------------------
/ise.csv:
--------------------------------------------------------------------------------
  1 | 0.035753708
  2 | 0.025425873
  3 | -0.02886173
  4 | -0.062208079
  5 | 0.009859905
  6 | -0.029191028
  7 | 0.015445348
  8 | -0.041167612
  9 | 0.000661905
 10 | 0.022037345
 11 | -0.022692465
 12 | -0.013708704
 13 | 0.000864697
 14 | -0.00381506
 15 | 0.00566126
 16 | 0.046831302
 17 | -0.006634978
 18 | 0.034566982
 19 | -0.020528213
 20 | -0.008776701
 21 | -0.025919141
 22 | 0.015279487
 23 | 0.018577796
 24 | -0.014132879
 25 | 0.036607044
 26 | 0.011353209
 27 | -0.040542021
 28 | -0.022105644
 29 | -0.014888368
 30 | 0.007026745
 31 | -0.011494996
 32 | -0.041136038
 33 | -0.002631499
 34 | 0.024654643
 35 | -0.03584061
 36 | 0.017303168
 37 | 0.001725406
 38 | 0.004975853
 39 | 0.000671759
 40 | -0.005891895
 41 | -0.013689039
 42 | 0.002192959
 43 | 0.007913215
 44 | -0.03852223
 45 | 0.007958798
 46 | -0.007133473
 47 | 0.011234009
 48 | -0.001410361
 49 | 0.010974424
 50 | 0.003213253
 51 | 0.000214245
 52 | -0.00711875
 53 | 0.001891803
 54 | 0.019874248
 55 | 0.002918699
 56 | 0.035968063
 57 | 0.003298424
 58 | 0.021165071
 59 | -0.004968387
 60 | 0.011247875
 61 | -0.021780194
 62 | 0.024406595
 63 | 0.006913411
 64 | 0.031401768
 65 | -0.005247358
 66 | 0.000735712
 67 | -0.010297613
 68 | 0.047238893
 69 | 0.03177774
 70 | 0.010170736
 71 | -0.013661316
 72 | -0.010111415
 73 | 0.0057888
 74 | 0.041454961
 75 | -0.002617982
 76 | -0.030954776
 77 | 0.004548211
 78 | 0.009655946
 79 | 0.024517037
 80 | -0.004162916
 81 | -0.012367953
 82 | 0.05198032
 83 | 0.029022746
 84 | 0.016261076
 85 | 0.025882269
 86 | 0.021061108
 87 | -0.026273031
 88 | -0.001123867
 89 | -0.018541411
 90 | 0.025756738
 91 | -0.006508992
 92 | 0.008070571
 93 | 0.011703446
 94 | 0.00540756
 95 | 0.042840743
 96 | -0.011999815
 97 | 0.008430531
 98 | 0.011166484
 99 | -0.015572642
100 | 0.008801231
101 | -0.004209168
102 | -0.000514681
103 | 0.028131284
104 | -0.007792426
105 | -0.022362446
106 | -0.019171953
107 | 0.013947094
108 | -0.032005875
109 | 0.004478365
110 | 0.017089104
111 | 0.00967985
112 | 0.002670961
113 | -0.003623787
114 | -0.004441286
115 | 0.001688065
116 | 0.031866012
117 | 0.002917994
118 | -0.011589253
119 | -0.014168795
120 | 0.024046363
121 | 0.015786402
122 | 0.011719598
123 | 0.002315584
124 | 0.002929655
125 | 0.007996801
126 | -0.005590715
127 | -0.006518816
128 | -0.00105307
129 | 0.012265455
130 | -0.001519732
131 | -0.004539036
132 | -0.005617343
133 | 0.006074147
134 | -0.008232237
135 | 0.032511512
136 | 0.006370441
137 | -0.011323604
138 | 0.013780465
139 | -0.006357127
140 | 0.004758472
141 | 0.018495281
142 | 0.012603659
143 | 0.011235934
144 | 0.004036218
145 | 0.009330697
146 | 0.043744798
147 | 0.010826132
148 | 0.045219554
149 | 0.001853539
150 | -0.017993859
151 | -0.001445482
152 | 0.021028135
153 | -0.001832467
154 | -0.02278477
155 | 0.004813828
156 | 0.013453937
157 | -0.004084155
158 | -0.000271351
159 | -0.001299983
160 | 0.031414816
161 | 0.019757306
162 | 0.013108203
163 | 0.012676973
164 | 0.006291416
165 | -0.023435627
166 | 0.008987423
167 | 0.003705789
168 | -0.021432986
169 | 0.008224308
170 | -0.02534402
171 | 0.008378078
172 | -0.021587901
173 | 0.002509442
174 | -0.009497407
175 | -0.005834448
176 | 0.007574225
177 | 0.036271557
178 | -0.001994969
179 | -0.012536673
180 | 0.016615187
181 | 0.006084621
182 | 0.004157378
183 | 0.008195123
184 | 0.009023651
185 | -0.007433515
186 | 0.004001644
187 | 0.011414049
188 | -0.009442384
189 | -0.002213036
190 | -0.020018346
191 | 0.023026511
192 | 0.031160934
193 | -0.008730222
194 | 0.01706096
195 | 0.006746257
196 | 0.015520491
197 | -0.01843108
198 | 0.018883892
199 | -0.000160911
200 | -0.024346941
201 | 0.009915384
202 | 0.01507818
203 | 0.004774399
204 | -0.005441211
205 | 0.007105868
206 | -0.002934588
207 | -0.01632457
208 | -0.030082971
209 | -0.035849614
210 | 0.005735384
211 | -0.02390659
212 | 0.020219135
213 | 0.000351028
214 | -0.006962099
215 | 0.016985685
216 | 0.00751899
217 | 0.015029496
218 | -0.002417171
219 | -0.006258617
220 | 0.00685869
221 | -0.031914469
222 | -0.017581763
223 | -0.006598214
224 | -0.019347799
225 | 0.012533508
226 | -0.00630611
227 | 0.00055265
228 | 0.011903098
229 | 0.038612983
230 | 0.036468359
231 | 0.008514453
232 | -0.003723744
233 | -0.006519958
234 | -0.008229144
235 | 0.008292258
236 | -0.004204453
237 | 0.016307467
238 | 0.004990278
239 | 0.007262631
240 | -0.016948929
241 | 0.003501244
242 | 0.022530184
243 | 0.004894702
244 | -0.007211305
245 | 0.00581665
246 | 0.003891123
247 | -0.000811768
248 | 0.00322285
249 | -0.002274045
250 | 0.022138372
251 | 0.010229371
252 | 0.013898022
253 | 0.007956979
254 | 0.007771749
255 | -0.003189192
256 | -0.016130747
257 | -0.00454863
258 | 0.017559249
259 | 0.00207392
260 | -0.013516994
261 | 0.010044257
262 | 0.011097874
263 | 0.00559711
264 | -0.003033665
265 | -0.023856682
266 | 0.005236694
267 | 0.000671833
268 | -0.004547723
269 | 0.012852447
270 | -0.002190987
271 | 0.015891732
272 | -0.006837607
273 | -0.001432456
274 | -0.029575211
275 | -0.038300784
276 | -0.030015785
277 | 0.017725401
278 | -0.00510604
279 | 0.015960155
280 | -0.00619301
281 | 0.005285259
282 | 0.008033964
283 | 0.029039821
284 | -0.015578937
285 | 0.016435847
286 | -0.013711428
287 | -0.02262045
288 | -0.03476938
289 | -0.018702566
290 | 0.019627036
291 | 0.029306318
292 | 0.005696108
293 | 0.00432159
294 | -0.005009524
295 | 0.022773381
296 | 0.01266815
297 | -0.006335257
298 | -0.003915514
299 | -0.007161671
300 | -0.002744684
301 | -0.005021298
302 | 0.023990399
303 | 0.019908239
304 | -0.014267377
305 | -0.00182161
306 | 0.008486639
307 | 0.008486611
308 | 0.014337851
309 | 0.034020727
310 | -0.007637163
311 | 0.0010159
312 | 0.001520156
313 | -0.00381384
314 | 0.020479928
315 | 0.006075096
316 | 0.010027432
317 | -0.004077191
318 | 0.004087933
319 | -0.003990517
320 | 0.013150996
321 | -0.006163386
322 | -0.01276782
323 | 0.021378817
324 | -0.004650074
325 | -0.014181169
326 | -0.001211584
327 | 0.012526916
328 | -0.00741867
329 | -0.001926377
330 | 0.012008849
331 | -0.014084423
332 | -0.004895555
333 | 0.013199043
334 | 0.004356217
335 | -0.009524497
336 | -0.018303062
337 | -0.015818452
338 | -0.012077182
339 | -0.056752612
340 | 0.068951684
341 | 0.000243026
342 | 0.02647418
343 | -0.000807723
344 | -0.038389961
345 | 0.005780967
346 | 0.016049266
347 | -0.045388971
348 | -0.006232065
349 | 0.008218708
350 | -0.043092096
351 | 0.034723686
352 | 0.007264208
353 | 0.013409826
354 | -0.015485635
355 | 0.00138582
356 | 0.013716279
357 | 0.003520353
358 | -0.014439062
359 | -0.005624349
360 | -0.011897225
361 | 0.011865737
362 | 0.010203012
363 | 0.004905756
364 | 0.01005563
365 | 0.013067872
366 | -0.000326348
367 | 0.008616579
368 | 0.00251849
369 | 0.004246468
370 | -0.004728047
371 | -0.019495737
372 | 0.000166157
373 | -0.000920201
374 | 0.010131759
375 | -0.021815769
376 | -0.006830443
377 | -0.005580513
378 | 0.020009067
379 | 0.001419823
380 | 0.019062249
381 | 0.000190164
382 | 0.009585147
383 | -0.003400806
384 | 0.007303681
385 | 0.012237341
386 | 0.000388746
387 | -0.010542271
388 | 0.000683248
389 | 0.006648115
390 | 0.001949078
391 | 0.021523653
392 | 0.007114932
393 | -0.010400281
394 | 0.003504
395 | 0.013706542
396 | 0.001248869
397 | 0.005496886
398 | -0.014436418
399 | 0.013711065
400 | -0.010180284
401 | -0.003298125
402 | -0.004645658
403 | 0.002511476
404 | 0.002217177
405 | -0.012028845
406 | -0.017302052
407 | 0.003930899
408 | 0.001645509
409 | 0.005847453
410 | 0.01082305
411 | -0.000327821
412 | -0.010043636
413 | -0.001691507
414 | 0.002389147
415 | -0.004548585
416 | -0.0057746
417 | 0.013165613
418 | 0.004862007
419 | 0.010504082
420 | 0.011817165
421 | 0.002965501
422 | 0.002184557
423 | 0.00167273
424 | -0.005813995
425 | -0.002285365
426 | 0.026892714
427 | 0.003509248
428 | 0.014597561
429 | 0.00527176
430 | 0.002025474
431 | 0.006672435
432 | -0.002567317
433 | 0.005511668
434 | -0.004453821
435 | 0.000761169
436 | 0.005987659
437 | 0.014784496
438 | -0.000587001
439 | 0.003395986
440 | -0.013648916
441 | 0.008169493
442 | 0.013186098
443 | 0.008959157
444 | -0.001807411
445 | 0.006841092
446 | 0.026176163
447 | 0.009736797
448 | 0.007033826
449 | -0.013504077
450 | 0.012567728
451 | 0.005076448
452 | -0.008827632
453 | -0.006813227
454 | 0.015531853
455 | 0.00786829
456 | -0.001169304
457 | -0.002500309
458 | -0.030981288
459 | 0.007058143
460 | -0.008025245
461 | 0.001202099
462 | 0.032986428
463 | -0.001781987
464 | 0.002284926
465 | 0.00845291
466 | -0.013824293
467 | -0.006769931
468 | -0.007617876
469 | 0.006377468
470 | -0.030041874
471 | -0.023701804
472 | 0.013406293
473 | -0.001218191
474 | -0.01502137
475 | -0.031880777
476 | 0.019760499
477 | 0.012248015
478 | 0.011764491
479 | -0.001179229
480 | 0.012661171
481 | 0.002380343
482 | -0.002490964
483 | -0.026805079
484 | -0.017676197
485 | 0.024729439
486 | 0.001945517
487 | -0.015310023
488 | -0.016466636
489 | -0.014150672
490 | -0.000355833
491 | 0.020543856
492 | 0.00952841
493 | 0.011830075
494 | 0.000730038
495 | 0.00134576
496 | 0.001285406
497 | 0.004171479
498 | 0.001534651
499 | -0.012311844
500 | 0.024002222
501 | 0.009040201
502 | 0.008859545
503 | 0.006982356
504 | -0.007835122
505 | -0.013108757
506 | -0.003035482
507 | 0.003760527
508 | 0.009546783
509 | -0.010199048
510 | -0.015563411
511 | -0.006049411
512 | 0.000520727
513 | -0.017835035
514 | 0.00974406
515 | -0.011067076
516 | -0.00074888
517 | 0.012326085
518 | -0.014082114
519 | -0.028498178
520 | 0.001056216
521 | 0.024115793
522 | 0.007446755
523 | -0.02447813
524 | 0.02450661
525 | -0.0061961
526 | 0.005355587
527 | 0.004822987
528 | -0.017664432
529 | 0.004782286
530 | -0.002497928
531 | 0.003606376
532 | 0.008599056
533 | 0.009310309
534 | 0.000190969
535 | -0.013069043
536 | -0.007246324
537 | 


--------------------------------------------------------------------------------
/uci_utils.py:
--------------------------------------------------------------------------------
  1 | from sklearn.neural_network import MLPClassifier
  2 | from sklearn.neighbors import KNeighborsClassifier
  3 | from sklearn.svm import SVC
  4 | from sklearn.gaussian_process import GaussianProcessClassifier
  5 | from sklearn.tree import DecisionTreeClassifier
  6 | from sklearn.ensemble import RandomForestClassifier
  7 | from sklearn.naive_bayes import GaussianNB
  8 | from sklearn.model_selection import train_test_split
  9 | import pandas as pd, numpy as np
 10 | import warnings
 11 | from IPython.display import Markdown, display
 12 | 
 13 | 
 14 | class UCI_Dataset_Loader():
 15 |     @classmethod
 16 |     def adult(cls):
 17 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
 18 |         data=pd.read_csv(url, header=None, )
 19 |         features = data.iloc[:,:-1]
 20 |         features = pd.get_dummies(features)
 21 |         labels = data.iloc[:,-1]
 22 |         labels = labels.astype('category').cat.codes
 23 |         return features, labels
 24 | 
 25 |     @classmethod
 26 |     def car(cls):
 27 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data"
 28 |         data=pd.read_csv(url, header=None, )
 29 |         features = data.iloc[:,:-1]
 30 |         features = pd.get_dummies(features)
 31 |         labels = data.iloc[:,-1]
 32 |         labels = labels.astype('category').cat.codes
 33 |         return features, labels
 34 |     
 35 |     @classmethod
 36 |     def credit_default(cls):
 37 |         try:
 38 |             import xlrd
 39 |         except:
 40 |             raise ImportError("To load this dataset, you need the library 'xlrd'. Try installing: pip install xlrd")
 41 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls"
 42 |         data=pd.read_excel(url, header=1)
 43 |         features = data.iloc[:,:-1]
 44 |         features = pd.get_dummies(features)
 45 |         labels = data.iloc[:,-1]
 46 |         labels = labels.astype('category').cat.codes
 47 |         return features, labels  
 48 |     
 49 |     @classmethod
 50 |     def dermatology(cls):
 51 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data"
 52 |         data=pd.read_csv(url, header=None, )
 53 |         features = data.iloc[:,1:]
 54 |         features = pd.get_dummies(features)
 55 |         labels = data.iloc[:,0]
 56 |         labels = labels.astype('category').cat.codes
 57 |         return features, labels
 58 |     
 59 |     @classmethod
 60 |     def diabetic_retinopathy(cls):
 61 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00329/messidor_features.arff"
 62 |         data=pd.read_csv(url, skiprows=24, header=None)
 63 |         features = data.iloc[:,:-1]
 64 |         features = pd.get_dummies(features)
 65 |         labels = data.iloc[:,-1]
 66 |         labels = labels.astype('category').cat.codes
 67 |         return features, labels
 68 |     
 69 |     @classmethod
 70 |     def ecoli(cls):
 71 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/ecoli/ecoli.data"
 72 |         data=pd.read_csv(url, header=None, sep='\s+')
 73 |         features = data.iloc[:,1:-1]
 74 |         features = pd.get_dummies(features)
 75 |         labels = data.iloc[:,-1]
 76 |         labels = labels.astype('category').cat.codes
 77 |         return features, labels    
 78 |     
 79 |     @classmethod
 80 |     def eeg_eyes(cls):
 81 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00264/EEG%20Eye%20State.arff"
 82 |         data=pd.read_csv(url, skiprows=19, header=None, sep=',')
 83 |         features = data.iloc[:,:-1]
 84 |         features = pd.get_dummies(features)
 85 |         labels = data.iloc[:,-1]
 86 |         labels = labels.astype('category').cat.codes
 87 |         return features, labels        
 88 |     
 89 |     @classmethod
 90 |     def haberman(cls):
 91 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data"
 92 |         data=pd.read_csv(url, skiprows=0, header=None, sep=',')
 93 |         features = data.iloc[:,:-1]
 94 |         features = pd.get_dummies(features)
 95 |         labels = data.iloc[:,-1]
 96 |         labels = labels.astype('category').cat.codes
 97 |         return features, labels            
 98 |     
 99 |     @classmethod
100 |     def ionosphere(cls):
101 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data"
102 |         data=pd.read_csv(url, skiprows=0, header=None, sep=',')
103 |         features = data.iloc[:,:-1]
104 |         features = pd.get_dummies(features)
105 |         labels = data.iloc[:,-1]
106 |         labels = labels.astype('category').cat.codes
107 |         return features, labels                
108 |     
109 |     @classmethod
110 |     def ionosphere(cls):
111 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data"
112 |         data=pd.read_csv(url, skiprows=0, header=None, sep=',')
113 |         features = data.iloc[:,:-1]
114 |         features = pd.get_dummies(features)
115 |         labels = data.iloc[:,-1]
116 |         labels = labels.astype('category').cat.codes
117 |         return features, labels                    
118 |     
119 |     @classmethod
120 |     def mice_protein(cls):
121 |         try:
122 |             import xlrd
123 |         except:
124 |             raise ImportError("To load this dataset, you need the library 'xlrd'. Try installing: pip install xlrd")
125 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00342/Data_Cortex_Nuclear.xls"
126 |         data=pd.read_excel(url, header=0, na_values=['', ' '])
127 |         features = data.iloc[:,1:-4]
128 |         features = features.fillna(value=0)
129 |         features = pd.get_dummies(features)
130 |         labels = data.iloc[:,-1]
131 |         labels = labels.astype('category').cat.codes
132 |         return features, labels    
133 |     
134 |     @classmethod
135 |     def nursery(cls):
136 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.data"
137 |         data=pd.read_csv(url, header=0)
138 |         features = data.iloc[:,:-1]
139 |         features = pd.get_dummies(features)
140 |         labels = data.iloc[:,-1]
141 |         labels = labels.astype('category').cat.codes
142 |         return features, labels                            
143 |     
144 |     @classmethod
145 |     def seeds(cls):
146 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00236/seeds_dataset.txt"
147 |         data=pd.read_csv(url, header=0, sep='\s+')
148 |         features = data.iloc[:,:-1]
149 |         features = pd.get_dummies(features)
150 |         labels = data.iloc[:,-1]
151 |         labels = labels.astype('category').cat.codes
152 |         return features, labels          
153 |     
154 |     @classmethod
155 |     def seismic(cls):
156 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00266/seismic-bumps.arff"
157 |         data=pd.read_csv(url, skiprows=154, header=0, sep=',')
158 |         features = data.iloc[:,:-1]
159 |         features = pd.get_dummies(features)
160 |         labels = data.iloc[:,-1]
161 |         labels = labels.astype('category').cat.codes
162 |         return features, labels              
163 |     
164 |     @classmethod
165 |     def soybean(cls):
166 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/soybean/soybean-small.data"
167 |         data=pd.read_csv(url, skiprows=0, header=0, sep=',')
168 |         features = data.iloc[:,:-1]
169 |         features = pd.get_dummies(features)
170 |         labels = data.iloc[:,-1]
171 |         labels = labels.astype('category').cat.codes
172 |         return features, labels                  
173 |     
174 |     @classmethod
175 |     def teaching_assistant(cls):
176 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/tae/tae.data"
177 |         data=pd.read_csv(url, skiprows=0, header=0, sep=',')
178 |         features = data.iloc[:,:-1]
179 |         features = pd.get_dummies(features)
180 |         labels = data.iloc[:,-1]
181 |         labels = labels.astype('category').cat.codes
182 |         return features, labels                      
183 |     
184 |     @classmethod
185 |     def tic_tac_toe(cls):
186 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/tic-tac-toe/tic-tac-toe.data"
187 |         data=pd.read_csv(url, skiprows=0, header=0, sep=',')
188 |         features = data.iloc[:,:-1]
189 |         features = pd.get_dummies(features)
190 |         labels = data.iloc[:,-1]
191 |         labels = labels.astype('category').cat.codes
192 |         return features, labels                          
193 |     
194 |     @classmethod
195 |     def website_phishing(cls):
196 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00379/PhishingData.arff"
197 |         data=pd.read_csv(url, skiprows=14, header=None, sep=',')
198 |         features = data.iloc[:,:-1]
199 |         features = pd.get_dummies(features)
200 |         labels = data.iloc[:,-1]
201 |         labels = labels.astype('category').cat.codes
202 |         return features, labels                              
203 |     
204 |     @classmethod
205 |     def wholesale_customers(cls):
206 |         url="https://archive.ics.uci.edu/ml/machine-learning-databases/00292/Wholesale%20customers%20data.csv"
207 |         data=pd.read_csv(url, skiprows=0, header=0, sep=',')
208 |         features = data.iloc[:,2:]
209 |         features = pd.get_dummies(features)
210 |         labels = data.iloc[:,1]
211 |         labels = labels.astype('category').cat.codes
212 |         return features, labels                                  
213 |     
214 |     
215 | 
216 | classifiers = [
217 |     SVC(),
218 |     GaussianNB(),
219 |     DecisionTreeClassifier(),
220 |     RandomForestClassifier(),
221 |     MLPClassifier(hidden_layer_sizes=(100)),
222 |     MLPClassifier(hidden_layer_sizes=(100,100)),
223 |     MLPClassifier(hidden_layer_sizes=(100,100,100)),]
224 | 
225 | names = [
226 |     'Support Vector',
227 |     'Naive Bayes',
228 |     'Decision Tree',
229 |     'Random Forests',
230 |     '1-layer NN',
231 |     '2-layer NN',
232 |     '3-layer NN',
233 | ]
234 | 
235 | def print_stats(X_train, X_test, y_train, y_test):
236 |     string = "Training set size: " + str(X_train.shape) + ", Test set size: " + str(X_test.shape) + ", \# of classes: " + str(len(np.unique(y_train)))
237 |     display(Markdown(string))
238 | 
239 | def print_best(scores):
240 |     eps = 1e-3
241 |     best = np.max(scores)
242 |     indices = np.where(scores > best - eps)[1]
243 |     string = 'Best classifier: **'
244 |     for i, idx in enumerate(indices):
245 |         if i > 0:
246 |             string += ', '
247 |         string += names[idx]
248 |     string += '**'
249 |     display(Markdown(string))
250 |     
251 | all_data = list()
252 | 
253 | def compute_test_accuracies(X, y, train_size=0.8, verbose=1, append=True, iters=3):
254 |     scores = np.zeros((iters,len(classifiers)))
255 |     for i in range(iters):
256 |         with warnings.catch_warnings():
257 |             warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge                     
258 |             X_train, X_test, y_train, y_test =  train_test_split(X,y,train_size=train_size)
259 |         if verbose>=1 and i==0:
260 |             print_stats(X_train, X_test, y_train, y_test)
261 |         for c, clf in enumerate(classifiers):
262 |             if verbose>=2:
263 |                 print(names[c])
264 |             with warnings.catch_warnings():
265 |                 warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge                     
266 |                 clf.fit(X_train, y_train)
267 |             score = clf.score(X_test, y_test)
268 |             scores[i,c] = score
269 |     scores = np.mean(scores,axis=0).reshape(1,-1)
270 |     if append:
271 |         n, d = X.shape
272 |         c = len(np.unique(y))            
273 |         all_data.append(np.concatenate([[[n, d, c]], scores], axis=1))
274 |     return scores
275 |     
276 | def highlight_max(s):
277 |     '''
278 |     highlight the maximum in a Series yellow.
279 |     '''
280 |     eps = 1e-3
281 |     best = s.max()
282 |     return ['background-color: #5fba7d' if v>best-eps else '' for v in s]
283 | 
284 | def highlight_max_excluding_first_three(s):
285 |     '''
286 |     highlight the maximum in a Series yellow.
287 |     '''
288 |     eps = 1e-3
289 |     best = s[3:].max()
290 |     return ['background-color: #5fba7d' if (v>best-eps and i>3) else '' for i, v in enumerate(s)]
291 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from IPython.display import clear_output, Image, display, HTML
  4 | import warnings
  5 | import time    
  6 | 
  7 | def strip_consts(graph_def, max_const_size=32):
  8 |     """Strip large constant values from graph_def."""
  9 |     strip_def = tf.GraphDef()
 10 |     for n0 in graph_def.node:
 11 |         n = strip_def.node.add() 
 12 |         n.MergeFrom(n0)
 13 |         if n.op == 'Const':
 14 |             tensor = n.attr['value'].tensor
 15 |             size = len(tensor.tensor_content)
 16 |             if size > max_const_size:
 17 |                 tensor.tensor_content = "<stripped %d bytes>"%size
 18 |     return strip_def
 19 | 
 20 | def show_graph(graph_def, max_const_size=32):
 21 |     """Visualize TensorFlow graph."""
 22 |     if hasattr(graph_def, 'as_graph_def'):
 23 |         graph_def = graph_def.as_graph_def()
 24 |     strip_def = strip_consts(graph_def, max_const_size=max_const_size)
 25 |     code = """
 26 |         <script>
 27 |           function load() {{
 28 |             document.getElementById("{id}").pbtxt = {data};
 29 |           }}
 30 |         </script>
 31 |         <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
 32 |         <div style="height:600px">
 33 |           <tf-graph-basic id="{id}"></tf-graph-basic>
 34 |         </div>
 35 |     """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))
 36 | 
 37 |     iframe = """
 38 |         <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
 39 |     """.format(code.replace('"', '&quot;'))
 40 |     display(HTML(iframe))
 41 | 
 42 | 
 43 | class Timer():
 44 |     def __init__(self):
 45 |         pass
 46 |     def start(self):
 47 |         self.time = time.time()
 48 |     def end(self):
 49 |         return time.time() - self.time
 50 |     def end_and_print(self):
 51 |         print("Time needed to run experiment:",np.round(time.time()-self.time,3),"s")
 52 |     def end_and_md_print(self):
 53 |         from IPython.display import Markdown, display
 54 |         string = "Time needed to run experiment: " + str(np.round(time.time()-self.time,3)) + " s"
 55 |         display(Markdown(string))
 56 | 
 57 | 
 58 | ##
 59 | 
 60 | import matplotlib.pyplot as plt
 61 | 
 62 | def draw_neural_net(ax, left, right, bottom, top, layer_sizes):
 63 |     '''
 64 |     Credit: https://gist.github.com/craffel/2d727968c3aaebd10359
 65 |     Draw a neural network cartoon using matplotilb.
 66 |     
 67 |     :usage:
 68 |         >>> fig = plt.figure(figsize=(12, 12))
 69 |         >>> draw_neural_net(fig.gca(), .1, .9, .1, .9, [4, 7, 2])
 70 |     
 71 |     :parameters:
 72 |         - ax : matplotlib.axes.AxesSubplot
 73 |             The axes on which to plot the cartoon (get e.g. by plt.gca())
 74 |         - left : float
 75 |             The center of the leftmost node(s) will be placed here
 76 |         - right : float
 77 |             The center of the rightmost node(s) will be placed here
 78 |         - bottom : float
 79 |             The center of the bottommost node(s) will be placed here
 80 |         - top : float
 81 |             The center of the topmost node(s) will be placed here
 82 |         - layer_sizes : list of int
 83 |             List of layer sizes, including input and output dimensionality
 84 |     '''
 85 |     
 86 |     n_layers = len(layer_sizes)
 87 |     v_spacing = (top - bottom)/float(max(layer_sizes))
 88 |     h_spacing = (right - left)/float(len(layer_sizes) - 1)
 89 |     # Nodes
 90 |     for n, layer_size in enumerate(layer_sizes):
 91 |         layer_top = v_spacing*(layer_size - 1)/2. + (top + bottom)/2.
 92 |         for m in range(layer_size):
 93 |             circle = plt.Circle((n*h_spacing + left, layer_top - m*v_spacing), v_spacing/4.,
 94 |                                 color='w', ec='k', zorder=4)
 95 |             ax.add_artist(circle)
 96 |     # Edges
 97 |     for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
 98 |         layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2.
 99 |         layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2.
100 |         for m in range(layer_size_a):
101 |             for o in range(layer_size_b):
102 |                 line = plt.Line2D([n*h_spacing + left, (n + 1)*h_spacing + left],
103 |                                   [layer_top_a - m*v_spacing, layer_top_b - o*v_spacing], c='k')
104 |                 ax.add_artist(line)
105 | 
106 | 
107 | ##
108 |         
109 | ### COMMON ANALYTICAL FUNCTIONS ###
110 | 
111 | def random_batch(x_values, y_values,size=64):
112 |     assert x_values.shape[0]==y_values.shape[0]
113 |     n = x_values.shape[0]
114 |     indices = np.random.permutation(n)[:size]
115 |     return x_values[indices], y_values[indices]
116 | 
117 | def random_values():
118 |     def random_functions(x_values):
119 |         n, d = x_values.shape
120 |         return np.random.normal(0,1,n)
121 |     return random_functions
122 |     
123 | 
124 | def sigmoid(x): 
125 |     return 1 / (1 + np.exp(-x))
126 | 
127 | def sigmoid_of_sigmoid():
128 |     def sigmoid_of_sigmoid_function(x_values):
129 |         y_values = sigmoid(sigmoid(x_values[:,0]+x_values[:,1])+sigmoid(x_values[:,2]+x_values[:,3]))
130 |         return y_values
131 |     return sigmoid_of_sigmoid_function
132 | 
133 | def polynomial_composition(power=2):
134 |     def polynomial_composition_function(x_values):
135 |         n, d = x_values.shape
136 |         x_values = np.add.reduceat(x_values, axis=1, indices=range(0,d,2)) #adds adjacent columns together
137 |         x_values = x_values**power
138 |         n, d = x_values.shape
139 |         x_values = np.add.reduceat(x_values, axis=1, indices=range(0,d,2)) #adds adjacent columns together
140 |         x_values = x_values**power
141 |         return np.sum(x_values,axis=1)
142 |     return polynomial_composition_function
143 | 
144 | def polynomial_to_power(power=2):
145 |     from scipy.misc import factorial
146 |     def polynomial_to_power_function(x_values):
147 |         return 1/factorial(power)*np.power(np.sum(x_values, axis=1),power)
148 |     return polynomial_to_power_function
149 | 
150 | def sin(omega=6):
151 |     def sin_function(x_values):
152 |         return np.sin(omega*x_values)
153 |     return sin_function
154 | 
155 | def polynomial(coefs=[1,1,1]):
156 |     def polynomial_function(x_values):
157 |         return np.polynomial.polynomial.polyval(x_values,coefs)
158 |     return polynomial_function
159 | 
160 | def sparse_trig():
161 |     def sparse_trig_function(x_values):
162 |         return 2*(2*np.cos(x_values)**2-1)**2-1
163 |     return sparse_trig_function
164 | 
165 | ### END COMMON FUNCTIONS ###
166 | 
167 | '''
168 | Takes the dataset and maps each column to be between 0 and 1
169 | '''
170 | def normalize(array):
171 |     if array.ndim>1:
172 |         return (array - array.min(axis=0)) / array.ptp(axis=0)
173 |     else:
174 |         return (array - array.min()) / array.ptp()        
175 | 
176 | '''
177 | Helper function to define a multi-layer perceptron.
178 | x: input tensorflow node
179 | num_nodes: array that contains the number of nodes in each hidden layer
180 | num_input: number of nodes in input layer
181 | num_output: number of nodes in output layer
182 | activation: the tensorflow activation function to user
183 | '''
184 | def multilayer_perceptron(x, num_nodes, num_input=1, num_output=1, activation=tf.nn.sigmoid, bias=True, initializer=tf.contrib.layers.xavier_initializer(), return_weight_tensors=False):
185 |     n_prev = num_input
186 |     out = x
187 |     num_layer = 0
188 |     weights = list()
189 |     
190 |     for n in num_nodes:
191 |         w = tf.get_variable("w"+str(num_layer),[n_prev, n], initializer=initializer)
192 |         weights.append(w)
193 |         if bias:
194 |             b = tf.get_variable("b"+str(num_layer),[n], initializer =initializer)
195 |             out = activation(tf.add(tf.matmul(out,w),b),name="out"+str(num_layer))
196 |         else:
197 |             out = activation(tf.matmul(out,w),name="out"+str(num_layer))
198 |             
199 |         n_prev = n
200 |         num_layer += 1
201 |         
202 |     w_out = tf.get_variable("w"+str(num_layer),[n, num_output], initializer =initializer)
203 |     weights.append(w_out)
204 |     
205 |     if bias:
206 |         b_out = tf.get_variable("b"+str(num_layer),[num_output], initializer =initializer)
207 |         out = tf.add(tf.matmul(out,w_out),b_out,name="out"+str(num_layer))
208 |     else:
209 |         out = tf.matmul(out,w_out,name="out"+str(num_layer))
210 |     
211 |     if return_weight_tensors:
212 |         return out, weights
213 |     return out
214 | 
215 | 
216 | # Modified MLP for use with experiment 2
217 | def recurrent_multilayer_perceptron(x, num_nodes, num_input=1, num_output=1, activation=tf.nn.sigmoid):
218 |     n_prev = num_input
219 |     
220 |     assert all(x == num_nodes[0] for x in num_nodes) #for a recurrent multilayer perceptron, the number of neurons in each hidden layer should be the same
221 |     
222 |     w_in = tf.get_variable("w_in",[n_prev, num_nodes[0]])
223 |     b_in = tf.get_variable("b_in",[num_nodes[0]])
224 |     
225 |     w = tf.get_variable("w_shared",[num_nodes[0], num_nodes[0]])
226 |     b = tf.get_variable("b_shared",[num_nodes[0]])
227 |     
228 |     for i in range(len(num_nodes)+1):
229 |         if i==0:
230 |             out = activation(tf.add(tf.matmul(x,w_in),b_in),name="out"+str(i))
231 |         else:
232 |             out = activation(tf.add(tf.matmul(out,w),b),name="out"+str(i))
233 |         
234 |     w_out = tf.get_variable("w_out",[num_nodes[0], num_output])
235 |     b_out = tf.get_variable("b_out",[num_output])
236 |     out = tf.add(tf.matmul(out,w_out),b_out,name="out_final")
237 |     
238 |     return out
239 | 
240 | '''
241 | A class to organize methods that generate datasets for some of the experiments
242 | '''
243 | class Dataset():
244 |     from sklearn.preprocessing import OneHotEncoder
245 |     from sklearn.model_selection import train_test_split
246 |     
247 |     @classmethod
248 |     def generate_moons(cls, n, d=2, test_size=0.2, one_hot=False, normalize_x=False, noise=0):
249 |         from sklearn.datasets import make_moons       
250 |         assert (d%2==0),"d should be even"
251 |         
252 |         X, y = make_moons(n, noise=noise)
253 |         
254 |         if normalize_x:
255 |             X = normalize(X)
256 |                 
257 |         if (one_hot):
258 |             y = y.reshape(-1,1)
259 |             enc = cls.OneHotEncoder(n_values=2,sparse=False)
260 |             y = enc.fit_transform(y)
261 |         
262 |         X_train, X_test, y_train, y_test = cls.train_test_split(X, y, test_size=test_size)
263 |         
264 |         return X_train, X_test, y_train, y_test
265 | 
266 |     
267 |     @classmethod
268 |     def generate_mixture_of_gaussians(cls, n, d, class_seps=[1], covariance_scale=1, test_size=0.2, one_hot=False, randomly_labeled=False, class_ratio=1, return_covariance=False, cov=None, resample=False, normalize_x=False):
269 |         
270 |         if len(class_seps)==d:
271 |             pass
272 |         elif len(class_seps)==1:
273 |             class_seps = np.repeat(class_seps,d)
274 |         else:
275 |             raise ValueError("class_seps must be an array of length 1 or length d")
276 |         
277 |         if cov is None:
278 |             c = covariance_scale*np.random.random((d,d))
279 |             cov = c.T.dot(c)
280 |         
281 |         assert class_ratio>=1, "parameter: class_ratio must be greater than or equal to 1"
282 |         n_pos = int(n/(class_ratio+1))
283 |         n_neg = int(n-n_pos)
284 |         X1 = np.random.multivariate_normal([0]*d, cov, size=n_pos)
285 |         X2 = np.random.multivariate_normal(class_seps, cov, size=n_neg)
286 |         if resample==True: #resamples the minority class
287 |             X1 = np.tile(X1, (class_ratio, 1))
288 |             n_pos = n_pos*class_ratio
289 |         X = np.concatenate([X1,X2])
290 |         
291 |         if normalize_x:
292 |             X = normalize(X)
293 |         
294 |         if randomly_labeled==True:
295 |             y = np.random.randint(0,2,(n_pos+n_neg))
296 |         else:
297 |             y = np.array([0]*n_pos + [1]*n_neg)
298 |         
299 |         if (one_hot):
300 |             y = y.reshape(-1,1)
301 |             enc = cls.OneHotEncoder(n_values=2,sparse=False)
302 |             y = enc.fit_transform(y)
303 |         
304 |         X_train, X_test, y_train, y_test = cls.train_test_split(X, y, test_size=test_size)
305 |         
306 |         if return_covariance:
307 |             return X_train, X_test, y_train, y_test, cov
308 |         return X_train, X_test, y_train, y_test
309 |     
310 |     def generate_MNIST(n_train, n_test, subset=range(10)):
311 |         from tensorflow.examples.tutorials.mnist import input_data
312 |         mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
313 |         y_train = mnist.train.labels
314 |         
315 | def pretty_plotting_styles():
316 |     plt.rc("font",family="sans-serif",size=20)
317 |     plt.rcParams["font.sans-serif"] = "Arial"
318 | 
319 | 
320 | 
321 |     
322 | '''
323 | Returns an RNN with the following parameters:
324 |     window_size: the number of previous time_steps to use to make the prediction
325 |     dim: dimensionality of the input data
326 |     units: the number of hidden units in the LSTM
327 | '''
328 | def RNN(window_size=5, dim=1, units=32):
329 |     import keras
330 |     from keras.models import Model
331 |     from keras.layers import Dense, Input, LSTM
332 |         
333 |     x = Input(shape=(window_size, dim))
334 |     z, sh, sc = LSTM(units=units, return_state=True)(x)
335 |     z = Dense(1, activation='tanh')(z)
336 |     model = Model(inputs=[x],outputs=[z])
337 |     model.compile(loss='mse', optimizer='adam')
338 |     return model
339 | 
340 | 
341 | 
342 | '''
343 | Converts a time-series into a form that can be used to train and validate an RNN
344 | '''
345 | def create_windowed_dataset(time_series, window_size=5, frac_train=0.8):
346 |     time_series = normalize(time_series)
347 |     X_train, y_train, X_test, y_test = [], [], [], []
348 |     n = len(time_series)-window_size-1
349 |     n_train = int(n*frac_train)
350 |     for i in range(n):
351 |         a = time_series[i:(i+window_size)]
352 |         if a.ndim==1:
353 |             a = a.reshape(-1, 1)
354 |         if i < n_train: 
355 |             X_train.append(a)
356 |             y_train.append(time_series[i+window_size])
357 |         else:
358 |             X_test.append(a)
359 |             y_test.append(time_series[i+window_size])
360 |     return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
361 | 
362 | def mse(y, y_):
363 |     y = y.flatten()
364 |     y_ = y_.flatten()
365 |     assert len(y)==len(y_), "arrays must be of the same length"
366 |     return np.round(np.sqrt(np.mean(np.square(y-y_))),2)
367 | 
368 | '''
369 | Helper method to train and graph the results of RNN prediction
370 | '''
371 | def train_and_plot(time_series, window_sizes=None, hidden_units=None,epochs=20, figsize=None):
372 |     plt.rc("font",family="sans-serif",size=14)
373 |     
374 |     if not(figsize is None):
375 |         plt.figure(figsize=figsize)
376 |     if hidden_units is None:
377 |         if figsize is None:
378 |             plt.figure(figsize=[4*len(window_sizes),4])
379 |         for w, window_size in enumerate(window_sizes):
380 |             plt.subplot(1, len(window_sizes), w+1)
381 |             X_train, y_train, X_test, y_test = create_windowed_dataset(time_series, window_size=window_size)
382 |             rnn = RNN(window_size=window_size)
383 |             rnn.fit(X_train, y_train, epochs=epochs, verbose=0)
384 |             y_ = rnn.predict(X_test)
385 |             plt.plot(y_test)
386 |             plt.plot(y_,marker='.')
387 |             plt.title('Window size: '+str(window_size)+', RMSE: ' + str(mse(y_, y_test)))
388 |     elif window_sizes is None:
389 |         if figsize is None:
390 |             plt.figure(figsize=[4*len(hidden_units),4])
391 |         for h, hidden_unit in enumerate(hidden_units):
392 |             plt.subplot(1, len(hidden_units), h+1)
393 |             X_train, y_train, X_test, y_test = create_windowed_dataset(time_series)
394 |             rnn = RNN(units=hidden_unit)
395 |             rnn.fit(X_train, y_train, epochs=epochs, verbose=0)
396 |             y_ = rnn.predict(X_test)
397 |             plt.plot(y_test)
398 |             plt.plot(y_,marker='.')
399 |             plt.title('# Hidden Units: '+str(hidden_unit)+', RMSE: ' + str(mse(y_, y_test)))            
400 |     else:
401 |         if figsize is None:
402 |             plt.figure(figsize=[4*len(window_sizes), 4*len(hidden_units)])
403 |         count = 0 
404 |         for w, window_size in enumerate(window_sizes):
405 |             for h, hidden_unit in enumerate(hidden_units):
406 |                 count += 1
407 |                 plt.subplot(len(window_sizes), len(hidden_units), count)
408 |                 X_train, y_train, X_test, y_test = create_windowed_dataset(time_series, window_size=window_size)
409 |                 rnn = RNN(units=hidden_unit, window_size=window_size)
410 |                 rnn.fit(X_train, y_train, epochs=epochs, verbose=0)
411 |                 y_ = rnn.predict(X_test)
412 |                 plt.plot(y_test)
413 |                 plt.plot(y_,marker='.')
414 |                 plt.title('Window: '+str(window_size)+', Hidden: '+str(hidden_unit)+', RMSE: ' + str(mse(y_, y_test)))
415 |     plt.legend(['Real','Predicted'])
416 |     
417 | def plot_decision_boundary(X, y, grid_pred):
418 |     xx, yy = np.meshgrid(np.arange(0, 1.02, 0.02), np.arange(0, 1.02, 0.02))
419 |     grid_points = np.c_[xx.ravel(), yy.ravel()]
420 |     plt.scatter(*X.T, marker='.', c=np.argmax(y, axis=1), alpha=1, cmap='RdBu')
421 |     zz = grid_pred[:,1].reshape(xx.shape)
422 |     plt.contourf(xx, yy, zz, cmap='RdBu', alpha=.2)
423 |     plt.xlim([0, 1]); plt.ylim([0,1])
424 |     plt.xlabel('Feature 1')
425 |     plt.ylabel('Feature 2')
426 |     
427 |     
428 |     


--------------------------------------------------------------------------------