├── .ipynb_checkpoints ├── Atomic Experiments in Deep Learning-checkpoint.ipynb ├── Deep Networks on UCI Datasets-checkpoint.ipynb ├── What Can an RNN Predict-checkpoint.ipynb ├── experiments-checkpoint.py ├── ise-checkpoint.csv ├── uci_utils-checkpoint.py └── utils-checkpoint.py ├── Atomic Experiments in Deep Learning.html ├── Atomic Experiments in Deep Learning.ipynb ├── Deep Networks on UCI Datasets.html ├── Deep Networks on UCI Datasets.ipynb ├── What Can an RNN Predict.ipynb ├── __pycache__ ├── experiments.cpython-35.pyc ├── uci_utils.cpython-35.pyc └── utils.cpython-35.pyc ├── experiments.py ├── ise.csv ├── uci_utils.py └── utils.py /.ipynb_checkpoints/experiments-checkpoint.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import time 4 | import warnings 5 | from utils import * 6 | from sklearn.neural_network import MLPClassifier 7 | from sklearn.decomposition import PCA 8 | 9 | ''' 10 | All of the Experiments, starting from a base class 11 | ''' 12 | class Experiment(): 13 | def __init__(self): 14 | pass 15 | 16 | #ideally, this would make experiments completely reproducible, but because jobs are distributed over multiple cores, small differences may persist in practice 17 | def initialize(self, seed=0, fix_seed=True): 18 | if fix_seed: 19 | np.random.seed(seed) 20 | tf.set_random_seed(seed) 21 | self.timer = Timer() 22 | self.timer.start() 23 | 24 | def conclude(self): 25 | self.timer.end_and_print() 26 | ''' 27 | Experiment 1: Why do we use neural networks? 28 | Description: Performs regression using a neural network with 1 hidden layer and different number of units. Returns the original x-values, true y-values, and predicted y-values, along with the MSE loss. 29 | ''' 30 | class Experiment1(Experiment): 31 | def __init__(self): 32 | pass 33 | 34 | def run(self, 35 | n_hidden = 2, 36 | learning_rate = 0.003, 37 | num_steps = 10000, 38 | num_values = 100, 39 | function = sin(omega=6), 40 | verbose=True): 41 | 42 | 43 | x_values = np.linspace(-1,1, num_values).reshape(-1,1) 44 | y_values = function(x_values).reshape(-1,1) 45 | 46 | tf.reset_default_graph() 47 | x = tf.placeholder(dtype="float", shape=[None,1]) 48 | y = tf.placeholder(dtype="float", shape=[None,1]) 49 | y_ = multilayer_perceptron(x, num_nodes=[n_hidden]) 50 | 51 | loss_op = tf.reduce_mean(tf.square(y_ - y)) 52 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 53 | train_op = optimizer.minimize(loss_op) 54 | init_op = tf.global_variables_initializer() 55 | 56 | with tf.Session() as sess: 57 | sess.run(init_op) 58 | y_preds = list() 59 | for step in range(num_steps): 60 | _, loss, y_pred = sess.run([train_op, loss_op, y_], feed_dict={x:x_values,y:y_values}) 61 | if (step%(num_steps/10)==0 and verbose): 62 | print(loss) 63 | y_preds.append(y_pred.squeeze()) 64 | 65 | return x_values.squeeze(), y_values.squeeze(), y_pred.squeeze(), loss 66 | 67 | ''' 68 | Experiment 2: Why are Deeper Networks Better? 69 | ''' 70 | class Experiment2(Experiment): 71 | def __init__(self): 72 | pass 73 | 74 | def run(self, 75 | n=16, 76 | n_hidden=[10], 77 | num_steps=15000, 78 | learning_rate = 0.003, 79 | verbose=False, 80 | recurrent=True): 81 | 82 | 83 | x_values = np.linspace(0,1-1/n,n).reshape(-1,1) 84 | y_values = np.resize([[0,1],[1,0]], (n,2)) 85 | 86 | tf.reset_default_graph() 87 | x = tf.placeholder(dtype="float", shape=[None,1]) 88 | y = tf.placeholder(dtype="float", shape=[None,2]) 89 | 90 | if recurrent: 91 | y_ = recurrent_multilayer_perceptron(x, num_input=1, num_output=2, num_nodes=n_hidden,activation=tf.nn.relu) 92 | else: 93 | y_ = multilayer_perceptron(x, num_input=1, num_output=2, num_nodes=n_hidden,bias=bias,activation=tf.nn.relu) 94 | 95 | 96 | n_params = np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.global_variables()]) 97 | #show_graph(tf.get_default_graph().as_graph_def()) 98 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 99 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 100 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 101 | 102 | #loss_op = tf.reduce_mean(tf.square(y_ - y)) 103 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 104 | train_op = optimizer.minimize(loss_op) 105 | init_op = tf.global_variables_initializer() 106 | 107 | with tf.Session() as sess: 108 | sess.run(init_op) 109 | for step in range(num_steps): 110 | x_batch, y_batch = random_batch(x_values, y_values) 111 | _, loss, y_pred = sess.run([train_op, loss_op, y_], feed_dict={x:x_batch,y:y_batch}) 112 | if (step%(num_steps/10)==0 and verbose): 113 | print(loss) 114 | 115 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:x_values,y:y_values}) 116 | 117 | return x_values.squeeze(), y_values.squeeze(), y_pred.squeeze(), loss, accuracy, n_params 118 | 119 | 120 | ''' 121 | Experiment 3: Does More Data Favor Deeper Neural Networks? 122 | ''' 123 | class Experiment3(Experiment): 124 | def __init__(self): 125 | pass 126 | 127 | def run(self, 128 | classifiers, 129 | d = 12, 130 | class_seps = [1], 131 | ns = np.logspace(2,4,10), 132 | iters = 3, 133 | covariance_scale = 1, 134 | test_size = 0.2, 135 | accuracy_on = 'test', 136 | recurrent=True): 137 | 138 | acc = np.zeros((len(ns),len(classifiers),iters)) 139 | n_max = int(np.max(ns)) 140 | 141 | for k in range(iters): 142 | 143 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n_max, 144 | d=d, 145 | class_seps=class_seps, 146 | covariance_scale=covariance_scale, 147 | test_size=test_size) 148 | for i, n in enumerate(ns): 149 | for j, clf in enumerate(classifiers): 150 | with warnings.catch_warnings(): 151 | warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge 152 | n_train = int(n*(1-test_size)) 153 | clf.fit(X_train[:n_train],y_train[:n_train]) #choose a subset of the training data 154 | if accuracy_on=='train': 155 | acc[i,j,k] = clf.score(X_train[:int(n*(1-test_size))],y_train[:int(n*(1-test_size))]) 156 | elif accuracy_on=='test': 157 | acc[i,j,k] = clf.score(X_test,y_test) 158 | else: 159 | raise ValueError("accuracy_on must be 'test' or 'train'") 160 | 161 | return acc 162 | 163 | 164 | ''' 165 | Experiment 4: Does Unbalanced Data Hurt Neural Networks? 166 | ''' 167 | class Experiment4(Experiment): 168 | 169 | def __init__(self): 170 | pass 171 | 172 | def run(self, 173 | d = 12, 174 | iters = 3, 175 | covariance_scale = 1, 176 | test_size = 0.2, 177 | resample=False, 178 | n = 1200, 179 | num_steps=500, 180 | learning_rate = 0.003, 181 | verbose=False, 182 | load_covs = None, 183 | classify_grid = False, 184 | hidden_layer_sizes=(100,100), 185 | ratios = [1]): 186 | 187 | acc_matrix = np.zeros((len(ratios),iters)) 188 | class_seps = [1/(i+1) for i in range(d)] 189 | clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes) 190 | saved_covs = [] 191 | 192 | counter = 0 193 | for k in range(iters): 194 | for r, ratio in enumerate(ratios): 195 | # load covariance matrices for reproducibility 196 | if load_covs is None: 197 | cov = None 198 | else: 199 | cov = load_covs[counter] 200 | counter += 1 201 | 202 | 203 | X_train, _, y_train, _, cov = Dataset.generate_mixture_of_gaussians(n=n, 204 | d=d, 205 | normalize_x=True, 206 | one_hot=True, 207 | class_seps=class_seps, 208 | covariance_scale=covariance_scale, 209 | test_size=0, 210 | cov = cov, 211 | class_ratio=ratio, 212 | resample=resample, 213 | return_covariance=True) 214 | saved_covs.append(cov) 215 | X_test, _, y_test, _ = Dataset.generate_mixture_of_gaussians(n=int(n/4), 216 | d=d, 217 | normalize_x = True, 218 | one_hot=True, 219 | class_seps=class_seps, 220 | covariance_scale=covariance_scale, 221 | test_size=0, 222 | cov=cov) 223 | 224 | 225 | 226 | 227 | tf.reset_default_graph() 228 | x = tf.placeholder(dtype="float", shape=[None,d]) 229 | y = tf.placeholder(dtype="float", shape=[None,2]) 230 | y_ = multilayer_perceptron(x, num_input=d, num_output=2, num_nodes=hidden_layer_sizes) 231 | 232 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_)) 233 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 234 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 235 | 236 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 237 | train_op = optimizer.minimize(cross_entropy) 238 | init_op = tf.global_variables_initializer() 239 | 240 | with tf.Session() as sess: 241 | sess.run(init_op) 242 | y_preds = list() 243 | for step in range(num_steps): 244 | _, loss, acc, cp = sess.run([train_op, cross_entropy, accuracy_op, correct_prediction], feed_dict={x:X_train,y:y_train}) 245 | if (step%(num_steps/10)==0 and verbose): 246 | print(loss, acc, cp) 247 | 248 | accuracy, y_pred = sess.run([accuracy_op, y_], feed_dict={x:X_test,y:y_test}) 249 | acc_matrix[r,k] = accuracy 250 | 251 | if classify_grid: 252 | xx, yy = np.meshgrid(np.arange(0, 1.02, 0.02), np.arange(0, 1.02, 0.02)) 253 | grid_points = np.c_[xx.ravel(), yy.ravel()] 254 | grid_preds = sess.run(y_, feed_dict={x:grid_points}) 255 | 256 | if classify_grid: 257 | return acc_matrix, saved_covs, X_train, y_train, X_test, y_test, y_pred, grid_preds 258 | return acc_matrix, saved_covs 259 | 260 | 261 | ''' 262 | 5. Are Neural Networks Memorizing Or Generalizing During Training? 263 | ''' 264 | class Experiment5(Experiment): 265 | 266 | def __init__(self): 267 | pass 268 | 269 | def run(self, 270 | d = 6, 271 | iters = 3, 272 | covariance_scale = 1, 273 | test_size = 0.2, 274 | class_seps = [1 for i in range(6)], 275 | ns = [500], 276 | return_accuracy_per_epoch=False, 277 | randomize=False, 278 | verbose=False, 279 | learning_rate = 0.003, 280 | num_steps=2500, 281 | hidden_layer_sizes=(100,100)): 282 | 283 | if return_accuracy_per_epoch: 284 | acc = np.zeros((10, len(ns),iters)) 285 | else: 286 | acc = np.zeros((len(ns),iters)) 287 | n_max = np.max(ns) 288 | 289 | for k in range(iters): 290 | X_train_, _, y_train_, _ = Dataset.generate_mixture_of_gaussians(n=n_max, 291 | d=d, 292 | class_seps=class_seps, 293 | covariance_scale=covariance_scale, 294 | one_hot=True, 295 | test_size=0) 296 | if randomize: 297 | y_train_ = np.random.permutation(y_train_) 298 | 299 | for n_i, n in enumerate(ns): 300 | step_multiple = 0 301 | tf.reset_default_graph() 302 | X_train = X_train_[:n]; y_train = y_train_[:n] 303 | x = tf.placeholder(dtype="float", shape=[None,d]) 304 | y = tf.placeholder(dtype="float", shape=[None,2]) 305 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2) 306 | 307 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 308 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 309 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 310 | 311 | #loss_op = tf.reduce_mean(tf.square(y_ - y)) 312 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 313 | train_op = optimizer.minimize(loss_op) 314 | init_op = tf.global_variables_initializer() 315 | 316 | with tf.Session() as sess: 317 | sess.run(init_op) 318 | for step in range(num_steps): 319 | x_batch, y_batch = random_batch(X_train, y_train) 320 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 321 | if (step%(num_steps/10)==0 and verbose): 322 | print(accuracy) 323 | if (step%(num_steps/10)==0 and return_accuracy_per_epoch): 324 | accuracy = sess.run(accuracy_op, feed_dict={x:X_train,y:y_train}) 325 | acc[step_multiple, n_i, k] = accuracy 326 | step_multiple += 1 327 | 328 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_train,y:y_train}) 329 | 330 | if not(return_accuracy_per_epoch): 331 | acc[n_i,k] = accuracy 332 | 333 | return acc 334 | 335 | 336 | ''' 337 | ## 6. Does Unsupervised Feature Reduction Help or Hurt? 338 | ''' 339 | class Experiment6(Experiment): 340 | 341 | def __init__(self): 342 | pass 343 | 344 | def run(self, 345 | d = 10, 346 | iters = 3, 347 | covariance_scale = 0.2, 348 | test_size = 0.2, 349 | n = 100, 350 | dummy_dims = [0], 351 | pca_dims = [None], 352 | verbose=False, 353 | noise_level = 0, 354 | learning_rate = 0.003, 355 | num_steps=500, 356 | hidden_layer_sizes=(100,100)): 357 | 358 | from scipy.stats import special_ortho_group 359 | 360 | class_seps = [1 for i in range(d)] 361 | acc = np.zeros((iters, len(dummy_dims),len(pca_dims))) 362 | 363 | for k in range(iters): 364 | X_train_, X_test_, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 365 | d=d, 366 | class_seps=class_seps, 367 | covariance_scale=covariance_scale, 368 | one_hot=True, 369 | test_size=test_size) 370 | 371 | for d_i, dummy_dim in enumerate(dummy_dims): 372 | X_train = np.concatenate((X_train_, noise_level*np.random.random(size=(X_train_.shape[0], dummy_dim))),axis=1); 373 | X_test = np.concatenate((X_test_, noise_level*np.random.random(size=(X_test_.shape[0], dummy_dim))),axis=1); 374 | 375 | rotation_matrix = np.random.random(size=(d+dummy_dim,d+dummy_dim)) 376 | X_train = X_train.dot(rotation_matrix) 377 | X_test = X_test.dot(rotation_matrix) 378 | 379 | 380 | for p_i, pca_dim in enumerate(pca_dims): 381 | pca = PCA(n_components = pca_dim) 382 | if not(pca_dim is None): 383 | X_train = pca.fit_transform(X_train) 384 | X_test = pca.transform(X_test) 385 | if pca_dim is None: 386 | pca_dim = d+dummy_dim 387 | 388 | tf.reset_default_graph() 389 | x = tf.placeholder(dtype="float", shape=[None,pca_dim]) 390 | y = tf.placeholder(dtype="float", shape=[None,2]) 391 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=pca_dim, num_output=2) 392 | 393 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 394 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 395 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 396 | 397 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 398 | train_op = optimizer.minimize(loss_op) 399 | init_op = tf.global_variables_initializer() 400 | 401 | with tf.Session() as sess: 402 | sess.run(init_op) 403 | for step in range(num_steps): 404 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:X_train,y:y_train}) 405 | if (step%(num_steps/10)==0 and verbose): 406 | print(accuracy) 407 | 408 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 409 | acc[k,d_i,p_i] = accuracy 410 | return acc 411 | 412 | 413 | ''' 414 | 7. Can Any Non-linearity Be Used As the Activation Function? 415 | ''' 416 | class Experiment7(Experiment): 417 | 418 | def __init__(self): 419 | pass 420 | 421 | def run(self, 422 | iters = 1, 423 | d = 2, 424 | test_size = 0.2, 425 | n = 500, 426 | noise = 0.1, 427 | verbose=False, 428 | activations = [tf.nn.sigmoid, tf.square], 429 | learning_rate = 0.003, 430 | num_steps=800, 431 | hidden_layer_sizes=(30,30)): 432 | 433 | 434 | acc = np.zeros((iters, 10, len(activations))) 435 | n_max = n 436 | 437 | for k in range(iters): 438 | X_train, X_test, y_train, y_test = Dataset.generate_moons(n=n_max, 439 | test_size=0.2, 440 | one_hot=True, 441 | noise=noise) 442 | 443 | for a_i, a in enumerate(activations): 444 | step_counter = 0 445 | tf.reset_default_graph() 446 | x = tf.placeholder(dtype="float", shape=[None,d]) 447 | y = tf.placeholder(dtype="float", shape=[None,2]) 448 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, activation=a) 449 | 450 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 451 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 452 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 453 | 454 | #loss_op = tf.reduce_mean(tf.square(y_ - y)) 455 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 456 | train_op = optimizer.minimize(loss_op) 457 | init_op = tf.global_variables_initializer() 458 | 459 | with tf.Session() as sess: 460 | sess.run(init_op) 461 | for step in range(num_steps): 462 | x_batch, y_batch = random_batch(X_train, y_train) 463 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 464 | if (step%(num_steps/10)==0): 465 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 466 | acc[k, step_counter, a_i] = accuracy 467 | step_counter += 1 468 | if verbose: 469 | print(accuracy) 470 | 471 | 472 | 473 | return acc 474 | 475 | ''' 476 | 8. How Does Batch Size Affect the Results? 477 | ''' 478 | class Experiment8(Experiment): 479 | 480 | def __init__(self): 481 | pass 482 | 483 | def run(self, 484 | d = 12, 485 | iters = 3, 486 | covariance_scale = 1, 487 | test_size = 0.2, 488 | n = 500, 489 | batch_sizes = [32], 490 | return_accuracy_per_epoch=False, 491 | verbose=False, 492 | learning_rate = 0.003, 493 | num_epochs=150, 494 | store_every=10, 495 | hidden_layer_sizes=(100,100)): 496 | 497 | class_seps = [1 for i in range(12)] 498 | timer = Timer() 499 | if return_accuracy_per_epoch: 500 | acc = np.zeros((int(num_epochs/store_every)-1, len(batch_sizes),iters)) 501 | else: 502 | acc = np.zeros((len(batch_sizes),iters)) 503 | 504 | runtimes = np.zeros((len(batch_sizes))) 505 | for k in range(iters): 506 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 507 | d=d, 508 | class_seps=class_seps, 509 | covariance_scale=covariance_scale, 510 | one_hot=True, 511 | test_size=test_size) 512 | for b_i, batch_size in enumerate(batch_sizes): 513 | timer.start() 514 | step_multiple = 0 515 | tf.reset_default_graph() 516 | x = tf.placeholder(dtype="float", shape=[None,d]) 517 | y = tf.placeholder(dtype="float", shape=[None,2]) 518 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2) 519 | 520 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 521 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 522 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 523 | 524 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 525 | train_op = optimizer.minimize(loss_op) 526 | init_op = tf.global_variables_initializer() 527 | 528 | with tf.Session() as sess: 529 | sess.run(init_op) 530 | num_steps = int(num_epochs*n/batch_size) 531 | store_acc_threshold = num_steps/num_epochs*store_every 532 | for step in range(num_steps): 533 | x_batch, y_batch = random_batch(X_train, y_train, size=batch_size) 534 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 535 | if (step%(num_steps/num_epochs)==0 and verbose): 536 | print(accuracy) 537 | if (step>store_acc_threshold and return_accuracy_per_epoch): 538 | accuracy = sess.run(accuracy_op, feed_dict={x:X_train,y:y_train}) 539 | acc[step_multiple, b_i, k] = accuracy 540 | step_multiple += 1 541 | store_acc_threshold += num_steps/num_epochs*store_every 542 | 543 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 544 | 545 | if not(return_accuracy_per_epoch): 546 | acc[b_i,k] = accuracy # otherwise, this is stored earlier 547 | runtimes[b_i] = timer.end() 548 | 549 | return runtimes, acc 550 | 551 | 552 | 553 | ''' 554 | 9. How Does the Loss Function Matter? 555 | ''' 556 | class Experiment9(Experiment): 557 | 558 | def __init__(self): 559 | pass 560 | 561 | def run(self, 562 | d = 12, 563 | iters = 1, 564 | covariance_scale = 1, 565 | test_size = 0.2, 566 | n = 500, 567 | randomize=False, 568 | verbose=False, 569 | loss_functions = ['cross_entropy', 'mean_squared_error'], 570 | learning_rate = 0.003, 571 | num_steps=500, 572 | hidden_layer_sizes=(100,100)): 573 | 574 | class_seps = [1/(i+1) for i in range(d)] 575 | acc = np.zeros((iters, 10, len(loss_functions))) 576 | n_max = n 577 | LOSS_FUNCTIONS = ['cross_entropy', 578 | 'mean_abs_error', 579 | 'mean_squared_error', 580 | 'mean_fourth_pow_error', 581 | 'hinge_loss', 582 | 'constant'] 583 | 584 | for k in range(iters): 585 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n_max, 586 | d=d, 587 | class_seps=class_seps, 588 | covariance_scale=covariance_scale, 589 | one_hot=True) 590 | if randomize: 591 | y_train_ = np.random.permutation(y_train_) 592 | 593 | 594 | for l_i, l in enumerate(loss_functions): 595 | step_counter = 0 596 | if not(l in LOSS_FUNCTIONS): 597 | raise ValueError("Valid loss functions are " + str(LOSS_FUNCTIONS)) 598 | 599 | tf.reset_default_graph() 600 | x = tf.placeholder(dtype="float", shape=[None,d]) 601 | y = tf.placeholder(dtype="float", shape=[None,2]) 602 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2) 603 | 604 | if l=='cross_entropy': 605 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 606 | elif l=='mean_squared_error': 607 | loss_op = tf.reduce_mean(tf.square(y_ - y)) 608 | elif l=='mean_abs_error': 609 | loss_op = tf.reduce_mean(tf.abs(y_ - y)) 610 | elif l=='hinge_loss': 611 | loss_op = tf.losses.hinge_loss(labels=y, logits=y_) 612 | elif l=='mean_fourth_pow_error': 613 | loss_op = tf.reduce_mean(tf.pow(y_ - y, 4)) 614 | elif l=='constant': 615 | loss_op = 0*tf.reduce_mean(tf.square(y_ - y)) 616 | 617 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 618 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 619 | 620 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 621 | train_op = optimizer.minimize(loss_op) 622 | init_op = tf.global_variables_initializer() 623 | 624 | with tf.Session() as sess: 625 | sess.run(init_op) 626 | for step in range(num_steps): 627 | x_batch, y_batch = random_batch(X_train, y_train) 628 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 629 | if (step%(num_steps/10)==0): 630 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 631 | acc[k, step_counter, l_i] = accuracy 632 | step_counter += 1 633 | if verbose: 634 | print(accuracy) 635 | 636 | return acc 637 | 638 | ''' 639 | 10. How Does the Initialization Affect Performance? 640 | ''' 641 | class Experiment10(Experiment): 642 | 643 | def __init__(self): 644 | pass 645 | 646 | def run(self, 647 | d = 12, 648 | iters = 1, 649 | covariance_scale = 1, 650 | test_size = 0.2, 651 | n = 500, 652 | randomize=False, 653 | verbose=False, 654 | initializers = [tf.contrib.layers.xavier_initializer()], 655 | learning_rate = 0.003, 656 | num_steps=500, 657 | hidden_layer_sizes=(100,100)): 658 | 659 | class_seps = [1/(i+1) for i in range(d)] 660 | acc = np.zeros((iters, 10, len(initializers))) 661 | 662 | for k in range(iters): 663 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 664 | d=d, 665 | class_seps=class_seps, 666 | covariance_scale=covariance_scale, 667 | one_hot=True) 668 | 669 | for i_i, initializer in enumerate(initializers): 670 | step_counter = 0 671 | tf.reset_default_graph() 672 | x = tf.placeholder(dtype="float", shape=[None,d]) 673 | y = tf.placeholder(dtype="float", shape=[None,2]) 674 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, initializer=initializer) 675 | 676 | loss_op = tf.reduce_mean(tf.square(y_ - y)) 677 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 678 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 679 | 680 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 681 | train_op = optimizer.minimize(loss_op) 682 | init_op = tf.global_variables_initializer() 683 | 684 | with tf.Session() as sess: 685 | sess.run(init_op) 686 | for step in range(num_steps): 687 | x_batch, y_batch = random_batch(X_train, y_train) 688 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 689 | if (step%(num_steps/10)==0): 690 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 691 | acc[k, step_counter, i_i] = accuracy 692 | step_counter += 1 693 | if verbose: 694 | print(accuracy) 695 | 696 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 697 | 698 | 699 | return acc 700 | 701 | ''' 702 | 11. Do Weights in Different Layers Evolve At Different Speeds? 703 | ''' 704 | class Experiment11(Experiment): 705 | 706 | def __init__(self): 707 | pass 708 | 709 | def run(self, 710 | d = 12, 711 | covariance_scale = 1, 712 | test_size = 0.2, 713 | n = 500, 714 | store_every=2, 715 | randomize=False, 716 | verbose=False, 717 | learning_rate = 0.003, 718 | num_steps=500,): 719 | 720 | class_seps = [1/(i+1) for i in range(d)] 721 | hidden_layer_sizes=(50,50,50) 722 | weights = [] 723 | accs = [] 724 | 725 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 726 | d=d, 727 | class_seps=class_seps, 728 | covariance_scale=covariance_scale, 729 | one_hot=True) 730 | 731 | step_counter = 0 732 | 733 | tf.reset_default_graph() 734 | x = tf.placeholder(dtype="float", shape=[None,d]) 735 | y = tf.placeholder(dtype="float", shape=[None,2]) 736 | y_, wts = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, return_weight_tensors=True) 737 | 738 | loss_op = tf.reduce_mean(tf.square(y_ - y)) 739 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 740 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 741 | 742 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 743 | train_op = optimizer.minimize(loss_op) 744 | init_op = tf.global_variables_initializer() 745 | 746 | with tf.Session() as sess: 747 | sess.run(init_op) 748 | for step in range(num_steps): 749 | x_batch, y_batch = random_batch(X_train, y_train) 750 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 751 | if (step%2==0): 752 | accuracy, w0, w1, w2, w3 = sess.run([accuracy_op, wts[0], wts[1], wts[2], wts[3]], feed_dict={x:X_test,y:y_test}) 753 | weights.append([w0, w1, w2, w3]) 754 | accs.append(accuracy) 755 | 756 | if verbose: 757 | print(accuracy) 758 | 759 | return weights, accs 760 | 761 | ''' 762 | 12. How Does Regularization Affect Weight Evolution? 763 | ''' 764 | class Experiment12(Experiment): 765 | 766 | def __init__(self): 767 | pass 768 | 769 | def run(self, 770 | d = 12, 771 | covariance_scale = 1, 772 | test_size = 0.2, 773 | n = 500, 774 | regularization_type = 'L2', 775 | regularization_strength = 0, 776 | store_every=2, 777 | randomize=False, 778 | verbose=False, 779 | learning_rate = 0.003, 780 | num_steps=500,): 781 | 782 | class_seps = [1/(i+1) for i in range(d)] 783 | hidden_layer_sizes=(50,50) 784 | weights = [] 785 | accs = [] 786 | 787 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 788 | d=d, 789 | class_seps=class_seps, 790 | covariance_scale=covariance_scale, 791 | one_hot=True) 792 | 793 | step_counter = 0 794 | 795 | tf.reset_default_graph() 796 | x = tf.placeholder(dtype="float", shape=[None,d]) 797 | y = tf.placeholder(dtype="float", shape=[None,2]) 798 | y_, wts = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, return_weight_tensors=True) 799 | 800 | if regularization_type=='L2': 801 | loss_op = tf.reduce_mean(tf.square(y_ - y)) + regularization_strength*(tf.reduce_mean(tf.square(wts[0])) + tf.reduce_mean(tf.square(wts[1])) + tf.reduce_mean(tf.square(wts[2]))) 802 | elif regularization_type=='L1': 803 | loss_op = tf.reduce_mean(tf.square(y_ - y)) + regularization_strength*(tf.reduce_mean(tf.abs(wts[0])) + tf.reduce_mean(tf.abs(wts[1])) + tf.reduce_mean(tf.abs(wts[2]))) 804 | else: 805 | raise ValueError("regularization_type must be 'L1' or 'L2'") 806 | 807 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 808 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 809 | 810 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 811 | train_op = optimizer.minimize(loss_op) 812 | init_op = tf.global_variables_initializer() 813 | 814 | with tf.Session() as sess: 815 | sess.run(init_op) 816 | for step in range(num_steps): 817 | x_batch, y_batch = random_batch(X_train, y_train) 818 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 819 | if (step%2==0): 820 | accuracy, w0, w1, w2 = sess.run([accuracy_op, wts[0], wts[1], wts[2]], feed_dict={x:X_test,y:y_test}) 821 | weights.append([w0, w1, w2]) 822 | accs.append(accuracy) 823 | 824 | if verbose: 825 | print(accuracy) 826 | 827 | return weights, accs 828 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/ise-checkpoint.csv: -------------------------------------------------------------------------------- 1 | 0.035753708 2 | 0.025425873 3 | -0.02886173 4 | -0.062208079 5 | 0.009859905 6 | -0.029191028 7 | 0.015445348 8 | -0.041167612 9 | 0.000661905 10 | 0.022037345 11 | -0.022692465 12 | -0.013708704 13 | 0.000864697 14 | -0.00381506 15 | 0.00566126 16 | 0.046831302 17 | -0.006634978 18 | 0.034566982 19 | -0.020528213 20 | -0.008776701 21 | -0.025919141 22 | 0.015279487 23 | 0.018577796 24 | -0.014132879 25 | 0.036607044 26 | 0.011353209 27 | -0.040542021 28 | -0.022105644 29 | -0.014888368 30 | 0.007026745 31 | -0.011494996 32 | -0.041136038 33 | -0.002631499 34 | 0.024654643 35 | -0.03584061 36 | 0.017303168 37 | 0.001725406 38 | 0.004975853 39 | 0.000671759 40 | -0.005891895 41 | -0.013689039 42 | 0.002192959 43 | 0.007913215 44 | -0.03852223 45 | 0.007958798 46 | -0.007133473 47 | 0.011234009 48 | -0.001410361 49 | 0.010974424 50 | 0.003213253 51 | 0.000214245 52 | -0.00711875 53 | 0.001891803 54 | 0.019874248 55 | 0.002918699 56 | 0.035968063 57 | 0.003298424 58 | 0.021165071 59 | -0.004968387 60 | 0.011247875 61 | -0.021780194 62 | 0.024406595 63 | 0.006913411 64 | 0.031401768 65 | -0.005247358 66 | 0.000735712 67 | -0.010297613 68 | 0.047238893 69 | 0.03177774 70 | 0.010170736 71 | -0.013661316 72 | -0.010111415 73 | 0.0057888 74 | 0.041454961 75 | -0.002617982 76 | -0.030954776 77 | 0.004548211 78 | 0.009655946 79 | 0.024517037 80 | -0.004162916 81 | -0.012367953 82 | 0.05198032 83 | 0.029022746 84 | 0.016261076 85 | 0.025882269 86 | 0.021061108 87 | -0.026273031 88 | -0.001123867 89 | -0.018541411 90 | 0.025756738 91 | -0.006508992 92 | 0.008070571 93 | 0.011703446 94 | 0.00540756 95 | 0.042840743 96 | -0.011999815 97 | 0.008430531 98 | 0.011166484 99 | -0.015572642 100 | 0.008801231 101 | -0.004209168 102 | -0.000514681 103 | 0.028131284 104 | -0.007792426 105 | -0.022362446 106 | -0.019171953 107 | 0.013947094 108 | -0.032005875 109 | 0.004478365 110 | 0.017089104 111 | 0.00967985 112 | 0.002670961 113 | -0.003623787 114 | -0.004441286 115 | 0.001688065 116 | 0.031866012 117 | 0.002917994 118 | -0.011589253 119 | -0.014168795 120 | 0.024046363 121 | 0.015786402 122 | 0.011719598 123 | 0.002315584 124 | 0.002929655 125 | 0.007996801 126 | -0.005590715 127 | -0.006518816 128 | -0.00105307 129 | 0.012265455 130 | -0.001519732 131 | -0.004539036 132 | -0.005617343 133 | 0.006074147 134 | -0.008232237 135 | 0.032511512 136 | 0.006370441 137 | -0.011323604 138 | 0.013780465 139 | -0.006357127 140 | 0.004758472 141 | 0.018495281 142 | 0.012603659 143 | 0.011235934 144 | 0.004036218 145 | 0.009330697 146 | 0.043744798 147 | 0.010826132 148 | 0.045219554 149 | 0.001853539 150 | -0.017993859 151 | -0.001445482 152 | 0.021028135 153 | -0.001832467 154 | -0.02278477 155 | 0.004813828 156 | 0.013453937 157 | -0.004084155 158 | -0.000271351 159 | -0.001299983 160 | 0.031414816 161 | 0.019757306 162 | 0.013108203 163 | 0.012676973 164 | 0.006291416 165 | -0.023435627 166 | 0.008987423 167 | 0.003705789 168 | -0.021432986 169 | 0.008224308 170 | -0.02534402 171 | 0.008378078 172 | -0.021587901 173 | 0.002509442 174 | -0.009497407 175 | -0.005834448 176 | 0.007574225 177 | 0.036271557 178 | -0.001994969 179 | -0.012536673 180 | 0.016615187 181 | 0.006084621 182 | 0.004157378 183 | 0.008195123 184 | 0.009023651 185 | -0.007433515 186 | 0.004001644 187 | 0.011414049 188 | -0.009442384 189 | -0.002213036 190 | -0.020018346 191 | 0.023026511 192 | 0.031160934 193 | -0.008730222 194 | 0.01706096 195 | 0.006746257 196 | 0.015520491 197 | -0.01843108 198 | 0.018883892 199 | -0.000160911 200 | -0.024346941 201 | 0.009915384 202 | 0.01507818 203 | 0.004774399 204 | -0.005441211 205 | 0.007105868 206 | -0.002934588 207 | -0.01632457 208 | -0.030082971 209 | -0.035849614 210 | 0.005735384 211 | -0.02390659 212 | 0.020219135 213 | 0.000351028 214 | -0.006962099 215 | 0.016985685 216 | 0.00751899 217 | 0.015029496 218 | -0.002417171 219 | -0.006258617 220 | 0.00685869 221 | -0.031914469 222 | -0.017581763 223 | -0.006598214 224 | -0.019347799 225 | 0.012533508 226 | -0.00630611 227 | 0.00055265 228 | 0.011903098 229 | 0.038612983 230 | 0.036468359 231 | 0.008514453 232 | -0.003723744 233 | -0.006519958 234 | -0.008229144 235 | 0.008292258 236 | -0.004204453 237 | 0.016307467 238 | 0.004990278 239 | 0.007262631 240 | -0.016948929 241 | 0.003501244 242 | 0.022530184 243 | 0.004894702 244 | -0.007211305 245 | 0.00581665 246 | 0.003891123 247 | -0.000811768 248 | 0.00322285 249 | -0.002274045 250 | 0.022138372 251 | 0.010229371 252 | 0.013898022 253 | 0.007956979 254 | 0.007771749 255 | -0.003189192 256 | -0.016130747 257 | -0.00454863 258 | 0.017559249 259 | 0.00207392 260 | -0.013516994 261 | 0.010044257 262 | 0.011097874 263 | 0.00559711 264 | -0.003033665 265 | -0.023856682 266 | 0.005236694 267 | 0.000671833 268 | -0.004547723 269 | 0.012852447 270 | -0.002190987 271 | 0.015891732 272 | -0.006837607 273 | -0.001432456 274 | -0.029575211 275 | -0.038300784 276 | -0.030015785 277 | 0.017725401 278 | -0.00510604 279 | 0.015960155 280 | -0.00619301 281 | 0.005285259 282 | 0.008033964 283 | 0.029039821 284 | -0.015578937 285 | 0.016435847 286 | -0.013711428 287 | -0.02262045 288 | -0.03476938 289 | -0.018702566 290 | 0.019627036 291 | 0.029306318 292 | 0.005696108 293 | 0.00432159 294 | -0.005009524 295 | 0.022773381 296 | 0.01266815 297 | -0.006335257 298 | -0.003915514 299 | -0.007161671 300 | -0.002744684 301 | -0.005021298 302 | 0.023990399 303 | 0.019908239 304 | -0.014267377 305 | -0.00182161 306 | 0.008486639 307 | 0.008486611 308 | 0.014337851 309 | 0.034020727 310 | -0.007637163 311 | 0.0010159 312 | 0.001520156 313 | -0.00381384 314 | 0.020479928 315 | 0.006075096 316 | 0.010027432 317 | -0.004077191 318 | 0.004087933 319 | -0.003990517 320 | 0.013150996 321 | -0.006163386 322 | -0.01276782 323 | 0.021378817 324 | -0.004650074 325 | -0.014181169 326 | -0.001211584 327 | 0.012526916 328 | -0.00741867 329 | -0.001926377 330 | 0.012008849 331 | -0.014084423 332 | -0.004895555 333 | 0.013199043 334 | 0.004356217 335 | -0.009524497 336 | -0.018303062 337 | -0.015818452 338 | -0.012077182 339 | -0.056752612 340 | 0.068951684 341 | 0.000243026 342 | 0.02647418 343 | -0.000807723 344 | -0.038389961 345 | 0.005780967 346 | 0.016049266 347 | -0.045388971 348 | -0.006232065 349 | 0.008218708 350 | -0.043092096 351 | 0.034723686 352 | 0.007264208 353 | 0.013409826 354 | -0.015485635 355 | 0.00138582 356 | 0.013716279 357 | 0.003520353 358 | -0.014439062 359 | -0.005624349 360 | -0.011897225 361 | 0.011865737 362 | 0.010203012 363 | 0.004905756 364 | 0.01005563 365 | 0.013067872 366 | -0.000326348 367 | 0.008616579 368 | 0.00251849 369 | 0.004246468 370 | -0.004728047 371 | -0.019495737 372 | 0.000166157 373 | -0.000920201 374 | 0.010131759 375 | -0.021815769 376 | -0.006830443 377 | -0.005580513 378 | 0.020009067 379 | 0.001419823 380 | 0.019062249 381 | 0.000190164 382 | 0.009585147 383 | -0.003400806 384 | 0.007303681 385 | 0.012237341 386 | 0.000388746 387 | -0.010542271 388 | 0.000683248 389 | 0.006648115 390 | 0.001949078 391 | 0.021523653 392 | 0.007114932 393 | -0.010400281 394 | 0.003504 395 | 0.013706542 396 | 0.001248869 397 | 0.005496886 398 | -0.014436418 399 | 0.013711065 400 | -0.010180284 401 | -0.003298125 402 | -0.004645658 403 | 0.002511476 404 | 0.002217177 405 | -0.012028845 406 | -0.017302052 407 | 0.003930899 408 | 0.001645509 409 | 0.005847453 410 | 0.01082305 411 | -0.000327821 412 | -0.010043636 413 | -0.001691507 414 | 0.002389147 415 | -0.004548585 416 | -0.0057746 417 | 0.013165613 418 | 0.004862007 419 | 0.010504082 420 | 0.011817165 421 | 0.002965501 422 | 0.002184557 423 | 0.00167273 424 | -0.005813995 425 | -0.002285365 426 | 0.026892714 427 | 0.003509248 428 | 0.014597561 429 | 0.00527176 430 | 0.002025474 431 | 0.006672435 432 | -0.002567317 433 | 0.005511668 434 | -0.004453821 435 | 0.000761169 436 | 0.005987659 437 | 0.014784496 438 | -0.000587001 439 | 0.003395986 440 | -0.013648916 441 | 0.008169493 442 | 0.013186098 443 | 0.008959157 444 | -0.001807411 445 | 0.006841092 446 | 0.026176163 447 | 0.009736797 448 | 0.007033826 449 | -0.013504077 450 | 0.012567728 451 | 0.005076448 452 | -0.008827632 453 | -0.006813227 454 | 0.015531853 455 | 0.00786829 456 | -0.001169304 457 | -0.002500309 458 | -0.030981288 459 | 0.007058143 460 | -0.008025245 461 | 0.001202099 462 | 0.032986428 463 | -0.001781987 464 | 0.002284926 465 | 0.00845291 466 | -0.013824293 467 | -0.006769931 468 | -0.007617876 469 | 0.006377468 470 | -0.030041874 471 | -0.023701804 472 | 0.013406293 473 | -0.001218191 474 | -0.01502137 475 | -0.031880777 476 | 0.019760499 477 | 0.012248015 478 | 0.011764491 479 | -0.001179229 480 | 0.012661171 481 | 0.002380343 482 | -0.002490964 483 | -0.026805079 484 | -0.017676197 485 | 0.024729439 486 | 0.001945517 487 | -0.015310023 488 | -0.016466636 489 | -0.014150672 490 | -0.000355833 491 | 0.020543856 492 | 0.00952841 493 | 0.011830075 494 | 0.000730038 495 | 0.00134576 496 | 0.001285406 497 | 0.004171479 498 | 0.001534651 499 | -0.012311844 500 | 0.024002222 501 | 0.009040201 502 | 0.008859545 503 | 0.006982356 504 | -0.007835122 505 | -0.013108757 506 | -0.003035482 507 | 0.003760527 508 | 0.009546783 509 | -0.010199048 510 | -0.015563411 511 | -0.006049411 512 | 0.000520727 513 | -0.017835035 514 | 0.00974406 515 | -0.011067076 516 | -0.00074888 517 | 0.012326085 518 | -0.014082114 519 | -0.028498178 520 | 0.001056216 521 | 0.024115793 522 | 0.007446755 523 | -0.02447813 524 | 0.02450661 525 | -0.0061961 526 | 0.005355587 527 | 0.004822987 528 | -0.017664432 529 | 0.004782286 530 | -0.002497928 531 | 0.003606376 532 | 0.008599056 533 | 0.009310309 534 | 0.000190969 535 | -0.013069043 536 | -0.007246324 537 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/uci_utils-checkpoint.py: -------------------------------------------------------------------------------- 1 | from sklearn.neural_network import MLPClassifier 2 | from sklearn.neighbors import KNeighborsClassifier 3 | from sklearn.svm import SVC 4 | from sklearn.gaussian_process import GaussianProcessClassifier 5 | from sklearn.tree import DecisionTreeClassifier 6 | from sklearn.ensemble import RandomForestClassifier 7 | from sklearn.naive_bayes import GaussianNB 8 | from sklearn.model_selection import train_test_split 9 | import pandas as pd, numpy as np 10 | import warnings 11 | from IPython.display import Markdown, display 12 | 13 | 14 | class UCI_Dataset_Loader(): 15 | @classmethod 16 | def adult(cls): 17 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data" 18 | data=pd.read_csv(url, header=None, ) 19 | features = data.iloc[:,:-1] 20 | features = pd.get_dummies(features) 21 | labels = data.iloc[:,-1] 22 | labels = labels.astype('category').cat.codes 23 | return features, labels 24 | 25 | @classmethod 26 | def car(cls): 27 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data" 28 | data=pd.read_csv(url, header=None, ) 29 | features = data.iloc[:,:-1] 30 | features = pd.get_dummies(features) 31 | labels = data.iloc[:,-1] 32 | labels = labels.astype('category').cat.codes 33 | return features, labels 34 | 35 | @classmethod 36 | def credit_default(cls): 37 | try: 38 | import xlrd 39 | except: 40 | raise ImportError("To load this dataset, you need the library 'xlrd'. Try installing: pip install xlrd") 41 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls" 42 | data=pd.read_excel(url, header=1) 43 | features = data.iloc[:,:-1] 44 | features = pd.get_dummies(features) 45 | labels = data.iloc[:,-1] 46 | labels = labels.astype('category').cat.codes 47 | return features, labels 48 | 49 | @classmethod 50 | def dermatology(cls): 51 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data" 52 | data=pd.read_csv(url, header=None, ) 53 | features = data.iloc[:,1:] 54 | features = pd.get_dummies(features) 55 | labels = data.iloc[:,0] 56 | labels = labels.astype('category').cat.codes 57 | return features, labels 58 | 59 | @classmethod 60 | def diabetic_retinopathy(cls): 61 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00329/messidor_features.arff" 62 | data=pd.read_csv(url, skiprows=24, header=None) 63 | features = data.iloc[:,:-1] 64 | features = pd.get_dummies(features) 65 | labels = data.iloc[:,-1] 66 | labels = labels.astype('category').cat.codes 67 | return features, labels 68 | 69 | @classmethod 70 | def ecoli(cls): 71 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/ecoli/ecoli.data" 72 | data=pd.read_csv(url, header=None, sep='\s+') 73 | features = data.iloc[:,1:-1] 74 | features = pd.get_dummies(features) 75 | labels = data.iloc[:,-1] 76 | labels = labels.astype('category').cat.codes 77 | return features, labels 78 | 79 | @classmethod 80 | def eeg_eyes(cls): 81 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00264/EEG%20Eye%20State.arff" 82 | data=pd.read_csv(url, skiprows=19, header=None, sep=',') 83 | features = data.iloc[:,:-1] 84 | features = pd.get_dummies(features) 85 | labels = data.iloc[:,-1] 86 | labels = labels.astype('category').cat.codes 87 | return features, labels 88 | 89 | @classmethod 90 | def haberman(cls): 91 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data" 92 | data=pd.read_csv(url, skiprows=0, header=None, sep=',') 93 | features = data.iloc[:,:-1] 94 | features = pd.get_dummies(features) 95 | labels = data.iloc[:,-1] 96 | labels = labels.astype('category').cat.codes 97 | return features, labels 98 | 99 | @classmethod 100 | def ionosphere(cls): 101 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data" 102 | data=pd.read_csv(url, skiprows=0, header=None, sep=',') 103 | features = data.iloc[:,:-1] 104 | features = pd.get_dummies(features) 105 | labels = data.iloc[:,-1] 106 | labels = labels.astype('category').cat.codes 107 | return features, labels 108 | 109 | @classmethod 110 | def ionosphere(cls): 111 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data" 112 | data=pd.read_csv(url, skiprows=0, header=None, sep=',') 113 | features = data.iloc[:,:-1] 114 | features = pd.get_dummies(features) 115 | labels = data.iloc[:,-1] 116 | labels = labels.astype('category').cat.codes 117 | return features, labels 118 | 119 | @classmethod 120 | def mice_protein(cls): 121 | try: 122 | import xlrd 123 | except: 124 | raise ImportError("To load this dataset, you need the library 'xlrd'. Try installing: pip install xlrd") 125 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00342/Data_Cortex_Nuclear.xls" 126 | data=pd.read_excel(url, header=0, na_values=['', ' ']) 127 | features = data.iloc[:,1:-4] 128 | features = features.fillna(value=0) 129 | features = pd.get_dummies(features) 130 | labels = data.iloc[:,-1] 131 | labels = labels.astype('category').cat.codes 132 | return features, labels 133 | 134 | @classmethod 135 | def nursery(cls): 136 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.data" 137 | data=pd.read_csv(url, header=0) 138 | features = data.iloc[:,:-1] 139 | features = pd.get_dummies(features) 140 | labels = data.iloc[:,-1] 141 | labels = labels.astype('category').cat.codes 142 | return features, labels 143 | 144 | @classmethod 145 | def seeds(cls): 146 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00236/seeds_dataset.txt" 147 | data=pd.read_csv(url, header=0, sep='\s+') 148 | features = data.iloc[:,:-1] 149 | features = pd.get_dummies(features) 150 | labels = data.iloc[:,-1] 151 | labels = labels.astype('category').cat.codes 152 | return features, labels 153 | 154 | @classmethod 155 | def seismic(cls): 156 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00266/seismic-bumps.arff" 157 | data=pd.read_csv(url, skiprows=154, header=0, sep=',') 158 | features = data.iloc[:,:-1] 159 | features = pd.get_dummies(features) 160 | labels = data.iloc[:,-1] 161 | labels = labels.astype('category').cat.codes 162 | return features, labels 163 | 164 | @classmethod 165 | def soybean(cls): 166 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/soybean/soybean-small.data" 167 | data=pd.read_csv(url, skiprows=0, header=0, sep=',') 168 | features = data.iloc[:,:-1] 169 | features = pd.get_dummies(features) 170 | labels = data.iloc[:,-1] 171 | labels = labels.astype('category').cat.codes 172 | return features, labels 173 | 174 | @classmethod 175 | def teaching_assistant(cls): 176 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/tae/tae.data" 177 | data=pd.read_csv(url, skiprows=0, header=0, sep=',') 178 | features = data.iloc[:,:-1] 179 | features = pd.get_dummies(features) 180 | labels = data.iloc[:,-1] 181 | labels = labels.astype('category').cat.codes 182 | return features, labels 183 | 184 | @classmethod 185 | def tic_tac_toe(cls): 186 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/tic-tac-toe/tic-tac-toe.data" 187 | data=pd.read_csv(url, skiprows=0, header=0, sep=',') 188 | features = data.iloc[:,:-1] 189 | features = pd.get_dummies(features) 190 | labels = data.iloc[:,-1] 191 | labels = labels.astype('category').cat.codes 192 | return features, labels 193 | 194 | @classmethod 195 | def website_phishing(cls): 196 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00379/PhishingData.arff" 197 | data=pd.read_csv(url, skiprows=14, header=None, sep=',') 198 | features = data.iloc[:,:-1] 199 | features = pd.get_dummies(features) 200 | labels = data.iloc[:,-1] 201 | labels = labels.astype('category').cat.codes 202 | return features, labels 203 | 204 | @classmethod 205 | def wholesale_customers(cls): 206 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00292/Wholesale%20customers%20data.csv" 207 | data=pd.read_csv(url, skiprows=0, header=0, sep=',') 208 | features = data.iloc[:,2:] 209 | features = pd.get_dummies(features) 210 | labels = data.iloc[:,1] 211 | labels = labels.astype('category').cat.codes 212 | return features, labels 213 | 214 | 215 | 216 | classifiers = [ 217 | SVC(), 218 | GaussianNB(), 219 | DecisionTreeClassifier(), 220 | RandomForestClassifier(), 221 | MLPClassifier(hidden_layer_sizes=(100)), 222 | MLPClassifier(hidden_layer_sizes=(100,100)), 223 | MLPClassifier(hidden_layer_sizes=(100,100,100)),] 224 | 225 | names = [ 226 | 'Support Vector', 227 | 'Naive Bayes', 228 | 'Decision Tree', 229 | 'Random Forests', 230 | '1-layer NN', 231 | '2-layer NN', 232 | '3-layer NN', 233 | ] 234 | 235 | def print_stats(X_train, X_test, y_train, y_test): 236 | string = "Training set size: " + str(X_train.shape) + ", Test set size: " + str(X_test.shape) + ", \# of classes: " + str(len(np.unique(y_train))) 237 | display(Markdown(string)) 238 | 239 | def print_best(scores): 240 | eps = 1e-3 241 | best = np.max(scores) 242 | indices = np.where(scores > best - eps)[1] 243 | string = 'Best classifier: **' 244 | for i, idx in enumerate(indices): 245 | if i > 0: 246 | string += ', ' 247 | string += names[idx] 248 | string += '**' 249 | display(Markdown(string)) 250 | 251 | all_data = list() 252 | 253 | def compute_test_accuracies(X, y, train_size=0.8, verbose=1, append=True, iters=3): 254 | scores = np.zeros((iters,len(classifiers))) 255 | for i in range(iters): 256 | with warnings.catch_warnings(): 257 | warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge 258 | X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=train_size) 259 | if verbose>=1 and i==0: 260 | print_stats(X_train, X_test, y_train, y_test) 261 | for c, clf in enumerate(classifiers): 262 | if verbose>=2: 263 | print(names[c]) 264 | with warnings.catch_warnings(): 265 | warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge 266 | clf.fit(X_train, y_train) 267 | score = clf.score(X_test, y_test) 268 | scores[i,c] = score 269 | scores = np.mean(scores,axis=0).reshape(1,-1) 270 | if append: 271 | n, d = X.shape 272 | c = len(np.unique(y)) 273 | all_data.append(np.concatenate([[[n, d, c]], scores], axis=1)) 274 | return scores 275 | 276 | def highlight_max(s): 277 | ''' 278 | highlight the maximum in a Series yellow. 279 | ''' 280 | eps = 1e-3 281 | best = s.max() 282 | return ['background-color: #5fba7d' if v>best-eps else '' for v in s] 283 | 284 | def highlight_max_excluding_first_three(s): 285 | ''' 286 | highlight the maximum in a Series yellow. 287 | ''' 288 | eps = 1e-3 289 | best = s[3:].max() 290 | return ['background-color: #5fba7d' if (v>best-eps and i>3) else '' for i, v in enumerate(s)] 291 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/utils-checkpoint.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from IPython.display import clear_output, Image, display, HTML 4 | import warnings 5 | import time 6 | 7 | def strip_consts(graph_def, max_const_size=32): 8 | """Strip large constant values from graph_def.""" 9 | strip_def = tf.GraphDef() 10 | for n0 in graph_def.node: 11 | n = strip_def.node.add() 12 | n.MergeFrom(n0) 13 | if n.op == 'Const': 14 | tensor = n.attr['value'].tensor 15 | size = len(tensor.tensor_content) 16 | if size > max_const_size: 17 | tensor.tensor_content = ""%size 18 | return strip_def 19 | 20 | def show_graph(graph_def, max_const_size=32): 21 | """Visualize TensorFlow graph.""" 22 | if hasattr(graph_def, 'as_graph_def'): 23 | graph_def = graph_def.as_graph_def() 24 | strip_def = strip_consts(graph_def, max_const_size=max_const_size) 25 | code = """ 26 | 31 | 32 |
33 | 34 |
35 | """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand())) 36 | 37 | iframe = """ 38 | 39 | """.format(code.replace('"', '"')) 40 | display(HTML(iframe)) 41 | 42 | 43 | class Timer(): 44 | def __init__(self): 45 | pass 46 | def start(self): 47 | self.time = time.time() 48 | def end(self): 49 | return time.time() - self.time 50 | def end_and_print(self): 51 | print("Time needed to run experiment:",np.round(time.time()-self.time,3),"s") 52 | def end_and_md_print(self): 53 | from IPython.display import Markdown, display 54 | string = "Time needed to run experiment: " + str(np.round(time.time()-self.time,3)) + " s" 55 | display(Markdown(string)) 56 | 57 | 58 | ## 59 | 60 | import matplotlib.pyplot as plt 61 | 62 | def draw_neural_net(ax, left, right, bottom, top, layer_sizes): 63 | ''' 64 | Credit: https://gist.github.com/craffel/2d727968c3aaebd10359 65 | Draw a neural network cartoon using matplotilb. 66 | 67 | :usage: 68 | >>> fig = plt.figure(figsize=(12, 12)) 69 | >>> draw_neural_net(fig.gca(), .1, .9, .1, .9, [4, 7, 2]) 70 | 71 | :parameters: 72 | - ax : matplotlib.axes.AxesSubplot 73 | The axes on which to plot the cartoon (get e.g. by plt.gca()) 74 | - left : float 75 | The center of the leftmost node(s) will be placed here 76 | - right : float 77 | The center of the rightmost node(s) will be placed here 78 | - bottom : float 79 | The center of the bottommost node(s) will be placed here 80 | - top : float 81 | The center of the topmost node(s) will be placed here 82 | - layer_sizes : list of int 83 | List of layer sizes, including input and output dimensionality 84 | ''' 85 | 86 | n_layers = len(layer_sizes) 87 | v_spacing = (top - bottom)/float(max(layer_sizes)) 88 | h_spacing = (right - left)/float(len(layer_sizes) - 1) 89 | # Nodes 90 | for n, layer_size in enumerate(layer_sizes): 91 | layer_top = v_spacing*(layer_size - 1)/2. + (top + bottom)/2. 92 | for m in range(layer_size): 93 | circle = plt.Circle((n*h_spacing + left, layer_top - m*v_spacing), v_spacing/4., 94 | color='w', ec='k', zorder=4) 95 | ax.add_artist(circle) 96 | # Edges 97 | for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): 98 | layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2. 99 | layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2. 100 | for m in range(layer_size_a): 101 | for o in range(layer_size_b): 102 | line = plt.Line2D([n*h_spacing + left, (n + 1)*h_spacing + left], 103 | [layer_top_a - m*v_spacing, layer_top_b - o*v_spacing], c='k') 104 | ax.add_artist(line) 105 | 106 | 107 | ## 108 | 109 | ### COMMON ANALYTICAL FUNCTIONS ### 110 | 111 | def random_batch(x_values, y_values,size=64): 112 | assert x_values.shape[0]==y_values.shape[0] 113 | n = x_values.shape[0] 114 | indices = np.random.permutation(n)[:size] 115 | return x_values[indices], y_values[indices] 116 | 117 | def random_values(): 118 | def random_functions(x_values): 119 | n, d = x_values.shape 120 | return np.random.normal(0,1,n) 121 | return random_functions 122 | 123 | 124 | def sigmoid(x): 125 | return 1 / (1 + np.exp(-x)) 126 | 127 | def sigmoid_of_sigmoid(): 128 | def sigmoid_of_sigmoid_function(x_values): 129 | y_values = sigmoid(sigmoid(x_values[:,0]+x_values[:,1])+sigmoid(x_values[:,2]+x_values[:,3])) 130 | return y_values 131 | return sigmoid_of_sigmoid_function 132 | 133 | def polynomial_composition(power=2): 134 | def polynomial_composition_function(x_values): 135 | n, d = x_values.shape 136 | x_values = np.add.reduceat(x_values, axis=1, indices=range(0,d,2)) #adds adjacent columns together 137 | x_values = x_values**power 138 | n, d = x_values.shape 139 | x_values = np.add.reduceat(x_values, axis=1, indices=range(0,d,2)) #adds adjacent columns together 140 | x_values = x_values**power 141 | return np.sum(x_values,axis=1) 142 | return polynomial_composition_function 143 | 144 | def polynomial_to_power(power=2): 145 | from scipy.misc import factorial 146 | def polynomial_to_power_function(x_values): 147 | return 1/factorial(power)*np.power(np.sum(x_values, axis=1),power) 148 | return polynomial_to_power_function 149 | 150 | def sin(omega=6): 151 | def sin_function(x_values): 152 | return np.sin(omega*x_values) 153 | return sin_function 154 | 155 | def polynomial(coefs=[1,1,1]): 156 | def polynomial_function(x_values): 157 | return np.polynomial.polynomial.polyval(x_values,coefs) 158 | return polynomial_function 159 | 160 | def sparse_trig(): 161 | def sparse_trig_function(x_values): 162 | return 2*(2*np.cos(x_values)**2-1)**2-1 163 | return sparse_trig_function 164 | 165 | ### END COMMON FUNCTIONS ### 166 | 167 | ''' 168 | Takes the dataset and maps each column to be between 0 and 1 169 | ''' 170 | def normalize(array): 171 | if array.ndim>1: 172 | return (array - array.min(axis=0)) / array.ptp(axis=0) 173 | else: 174 | return (array - array.min()) / array.ptp() 175 | 176 | ''' 177 | Helper function to define a multi-layer perceptron. 178 | x: input tensorflow node 179 | num_nodes: array that contains the number of nodes in each hidden layer 180 | num_input: number of nodes in input layer 181 | num_output: number of nodes in output layer 182 | activation: the tensorflow activation function to user 183 | ''' 184 | def multilayer_perceptron(x, num_nodes, num_input=1, num_output=1, activation=tf.nn.sigmoid, bias=True, initializer=tf.contrib.layers.xavier_initializer(), return_weight_tensors=False): 185 | n_prev = num_input 186 | out = x 187 | num_layer = 0 188 | weights = list() 189 | 190 | for n in num_nodes: 191 | w = tf.get_variable("w"+str(num_layer),[n_prev, n], initializer=initializer) 192 | weights.append(w) 193 | if bias: 194 | b = tf.get_variable("b"+str(num_layer),[n], initializer =initializer) 195 | out = activation(tf.add(tf.matmul(out,w),b),name="out"+str(num_layer)) 196 | else: 197 | out = activation(tf.matmul(out,w),name="out"+str(num_layer)) 198 | 199 | n_prev = n 200 | num_layer += 1 201 | 202 | w_out = tf.get_variable("w"+str(num_layer),[n, num_output], initializer =initializer) 203 | weights.append(w_out) 204 | 205 | if bias: 206 | b_out = tf.get_variable("b"+str(num_layer),[num_output], initializer =initializer) 207 | out = tf.add(tf.matmul(out,w_out),b_out,name="out"+str(num_layer)) 208 | else: 209 | out = tf.matmul(out,w_out,name="out"+str(num_layer)) 210 | 211 | if return_weight_tensors: 212 | return out, weights 213 | return out 214 | 215 | 216 | # Modified MLP for use with experiment 2 217 | def recurrent_multilayer_perceptron(x, num_nodes, num_input=1, num_output=1, activation=tf.nn.sigmoid): 218 | n_prev = num_input 219 | 220 | assert all(x == num_nodes[0] for x in num_nodes) #for a recurrent multilayer perceptron, the number of neurons in each hidden layer should be the same 221 | 222 | w_in = tf.get_variable("w_in",[n_prev, num_nodes[0]]) 223 | b_in = tf.get_variable("b_in",[num_nodes[0]]) 224 | 225 | w = tf.get_variable("w_shared",[num_nodes[0], num_nodes[0]]) 226 | b = tf.get_variable("b_shared",[num_nodes[0]]) 227 | 228 | for i in range(len(num_nodes)+1): 229 | if i==0: 230 | out = activation(tf.add(tf.matmul(x,w_in),b_in),name="out"+str(i)) 231 | else: 232 | out = activation(tf.add(tf.matmul(out,w),b),name="out"+str(i)) 233 | 234 | w_out = tf.get_variable("w_out",[num_nodes[0], num_output]) 235 | b_out = tf.get_variable("b_out",[num_output]) 236 | out = tf.add(tf.matmul(out,w_out),b_out,name="out_final") 237 | 238 | return out 239 | 240 | ''' 241 | A class to organize methods that generate datasets for some of the experiments 242 | ''' 243 | class Dataset(): 244 | from sklearn.preprocessing import OneHotEncoder 245 | from sklearn.model_selection import train_test_split 246 | 247 | @classmethod 248 | def generate_moons(cls, n, d=2, test_size=0.2, one_hot=False, normalize_x=False, noise=0): 249 | from sklearn.datasets import make_moons 250 | assert (d%2==0),"d should be even" 251 | 252 | X, y = make_moons(n, noise=noise) 253 | 254 | if normalize_x: 255 | X = normalize(X) 256 | 257 | if (one_hot): 258 | y = y.reshape(-1,1) 259 | enc = cls.OneHotEncoder(n_values=2,sparse=False) 260 | y = enc.fit_transform(y) 261 | 262 | X_train, X_test, y_train, y_test = cls.train_test_split(X, y, test_size=test_size) 263 | 264 | return X_train, X_test, y_train, y_test 265 | 266 | 267 | @classmethod 268 | def generate_mixture_of_gaussians(cls, n, d, class_seps=[1], covariance_scale=1, test_size=0.2, one_hot=False, randomly_labeled=False, class_ratio=1, return_covariance=False, cov=None, resample=False, normalize_x=False): 269 | 270 | if len(class_seps)==d: 271 | pass 272 | elif len(class_seps)==1: 273 | class_seps = np.repeat(class_seps,d) 274 | else: 275 | raise ValueError("class_seps must be an array of length 1 or length d") 276 | 277 | if cov is None: 278 | c = covariance_scale*np.random.random((d,d)) 279 | cov = c.T.dot(c) 280 | 281 | assert class_ratio>=1, "parameter: class_ratio must be greater than or equal to 1" 282 | n_pos = int(n/(class_ratio+1)) 283 | n_neg = int(n-n_pos) 284 | X1 = np.random.multivariate_normal([0]*d, cov, size=n_pos) 285 | X2 = np.random.multivariate_normal(class_seps, cov, size=n_neg) 286 | if resample==True: #resamples the minority class 287 | X1 = np.tile(X1, (class_ratio, 1)) 288 | n_pos = n_pos*class_ratio 289 | X = np.concatenate([X1,X2]) 290 | 291 | if normalize_x: 292 | X = normalize(X) 293 | 294 | if randomly_labeled==True: 295 | y = np.random.randint(0,2,(n_pos+n_neg)) 296 | else: 297 | y = np.array([0]*n_pos + [1]*n_neg) 298 | 299 | if (one_hot): 300 | y = y.reshape(-1,1) 301 | enc = cls.OneHotEncoder(n_values=2,sparse=False) 302 | y = enc.fit_transform(y) 303 | 304 | X_train, X_test, y_train, y_test = cls.train_test_split(X, y, test_size=test_size) 305 | 306 | if return_covariance: 307 | return X_train, X_test, y_train, y_test, cov 308 | return X_train, X_test, y_train, y_test 309 | 310 | def generate_MNIST(n_train, n_test, subset=range(10)): 311 | from tensorflow.examples.tutorials.mnist import input_data 312 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 313 | y_train = mnist.train.labels 314 | 315 | def pretty_plotting_styles(): 316 | plt.rc("font",family="sans-serif",size=20) 317 | plt.rcParams["font.sans-serif"] = "Arial" 318 | 319 | 320 | 321 | 322 | ''' 323 | Returns an RNN with the following parameters: 324 | window_size: the number of previous time_steps to use to make the prediction 325 | dim: dimensionality of the input data 326 | units: the number of hidden units in the LSTM 327 | ''' 328 | def RNN(window_size=5, dim=1, units=32): 329 | import keras 330 | from keras.models import Model 331 | from keras.layers import Dense, Input, LSTM 332 | 333 | x = Input(shape=(window_size, dim)) 334 | z, sh, sc = LSTM(units=units, return_state=True)(x) 335 | z = Dense(1, activation='tanh')(z) 336 | model = Model(inputs=[x],outputs=[z]) 337 | model.compile(loss='mse', optimizer='adam') 338 | return model 339 | 340 | 341 | 342 | ''' 343 | Converts a time-series into a form that can be used to train and validate an RNN 344 | ''' 345 | def create_windowed_dataset(time_series, window_size=5, frac_train=0.8): 346 | time_series = normalize(time_series) 347 | X_train, y_train, X_test, y_test = [], [], [], [] 348 | n = len(time_series)-window_size-1 349 | n_train = int(n*frac_train) 350 | for i in range(n): 351 | a = time_series[i:(i+window_size)] 352 | if a.ndim==1: 353 | a = a.reshape(-1, 1) 354 | if i < n_train: 355 | X_train.append(a) 356 | y_train.append(time_series[i+window_size]) 357 | else: 358 | X_test.append(a) 359 | y_test.append(time_series[i+window_size]) 360 | return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test) 361 | 362 | def mse(y, y_): 363 | y = y.flatten() 364 | y_ = y_.flatten() 365 | assert len(y)==len(y_), "arrays must be of the same length" 366 | return np.round(np.sqrt(np.mean(np.square(y-y_))),2) 367 | 368 | ''' 369 | Helper method to train and graph the results of RNN prediction 370 | ''' 371 | def train_and_plot(time_series, window_sizes=None, hidden_units=None,epochs=20, figsize=None): 372 | plt.rc("font",family="sans-serif",size=14) 373 | 374 | if not(figsize is None): 375 | plt.figure(figsize=figsize) 376 | if hidden_units is None: 377 | if figsize is None: 378 | plt.figure(figsize=[4*len(window_sizes),4]) 379 | for w, window_size in enumerate(window_sizes): 380 | plt.subplot(1, len(window_sizes), w+1) 381 | X_train, y_train, X_test, y_test = create_windowed_dataset(time_series, window_size=window_size) 382 | rnn = RNN(window_size=window_size) 383 | rnn.fit(X_train, y_train, epochs=epochs, verbose=0) 384 | y_ = rnn.predict(X_test) 385 | plt.plot(y_test) 386 | plt.plot(y_,marker='.') 387 | plt.title('Window size: '+str(window_size)+', RMSE: ' + str(mse(y_, y_test))) 388 | elif window_sizes is None: 389 | if figsize is None: 390 | plt.figure(figsize=[4*len(hidden_units),4]) 391 | for h, hidden_unit in enumerate(hidden_units): 392 | plt.subplot(1, len(hidden_units), h+1) 393 | X_train, y_train, X_test, y_test = create_windowed_dataset(time_series) 394 | rnn = RNN(units=hidden_unit) 395 | rnn.fit(X_train, y_train, epochs=epochs, verbose=0) 396 | y_ = rnn.predict(X_test) 397 | plt.plot(y_test) 398 | plt.plot(y_,marker='.') 399 | plt.title('# Hidden Units: '+str(hidden_unit)+', RMSE: ' + str(mse(y_, y_test))) 400 | else: 401 | if figsize is None: 402 | plt.figure(figsize=[4*len(window_sizes), 4*len(hidden_units)]) 403 | count = 0 404 | for w, window_size in enumerate(window_sizes): 405 | for h, hidden_unit in enumerate(hidden_units): 406 | count += 1 407 | plt.subplot(len(window_sizes), len(hidden_units), count) 408 | X_train, y_train, X_test, y_test = create_windowed_dataset(time_series, window_size=window_size) 409 | rnn = RNN(units=hidden_unit, window_size=window_size) 410 | rnn.fit(X_train, y_train, epochs=epochs, verbose=0) 411 | y_ = rnn.predict(X_test) 412 | plt.plot(y_test) 413 | plt.plot(y_,marker='.') 414 | plt.title('Window: '+str(window_size)+', Hidden: '+str(hidden_unit)+', RMSE: ' + str(mse(y_, y_test))) 415 | plt.legend(['Real','Predicted']) 416 | 417 | def plot_decision_boundary(X, y, grid_pred): 418 | xx, yy = np.meshgrid(np.arange(0, 1.02, 0.02), np.arange(0, 1.02, 0.02)) 419 | grid_points = np.c_[xx.ravel(), yy.ravel()] 420 | plt.scatter(*X.T, marker='.', c=np.argmax(y, axis=1), alpha=1, cmap='RdBu') 421 | zz = grid_pred[:,1].reshape(xx.shape) 422 | plt.contourf(xx, yy, zz, cmap='RdBu', alpha=.2) 423 | plt.xlim([0, 1]); plt.ylim([0,1]) 424 | plt.xlabel('Feature 1') 425 | plt.ylabel('Feature 2') 426 | 427 | 428 | -------------------------------------------------------------------------------- /__pycache__/experiments.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abidlabs/AtomsOfDeepLearning/bef9b47d26592e51753eece6ae485785d92566e4/__pycache__/experiments.cpython-35.pyc -------------------------------------------------------------------------------- /__pycache__/uci_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abidlabs/AtomsOfDeepLearning/bef9b47d26592e51753eece6ae485785d92566e4/__pycache__/uci_utils.cpython-35.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abidlabs/AtomsOfDeepLearning/bef9b47d26592e51753eece6ae485785d92566e4/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /experiments.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import time 4 | import warnings 5 | from utils import * 6 | from sklearn.neural_network import MLPClassifier 7 | from sklearn.decomposition import PCA 8 | 9 | ''' 10 | All of the Experiments, starting from a base class 11 | ''' 12 | class Experiment(): 13 | def __init__(self): 14 | pass 15 | 16 | #ideally, this would make experiments completely reproducible, but because jobs are distributed over multiple cores, small differences may persist in practice 17 | def initialize(self, seed=0, fix_seed=True): 18 | if fix_seed: 19 | np.random.seed(seed) 20 | tf.set_random_seed(seed) 21 | self.timer = Timer() 22 | self.timer.start() 23 | 24 | def conclude(self): 25 | self.timer.end_and_print() 26 | ''' 27 | Experiment 1: Why do we use neural networks? 28 | Description: Performs regression using a neural network with 1 hidden layer and different number of units. Returns the original x-values, true y-values, and predicted y-values, along with the MSE loss. 29 | ''' 30 | class Experiment1(Experiment): 31 | def __init__(self): 32 | pass 33 | 34 | def run(self, 35 | n_hidden = 2, 36 | learning_rate = 0.003, 37 | num_steps = 10000, 38 | num_values = 100, 39 | function = sin(omega=6), 40 | verbose=True): 41 | 42 | 43 | x_values = np.linspace(-1,1, num_values).reshape(-1,1) 44 | y_values = function(x_values).reshape(-1,1) 45 | 46 | tf.reset_default_graph() 47 | x = tf.placeholder(dtype="float", shape=[None,1]) 48 | y = tf.placeholder(dtype="float", shape=[None,1]) 49 | y_ = multilayer_perceptron(x, num_nodes=[n_hidden]) 50 | 51 | loss_op = tf.reduce_mean(tf.square(y_ - y)) 52 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 53 | train_op = optimizer.minimize(loss_op) 54 | init_op = tf.global_variables_initializer() 55 | 56 | with tf.Session() as sess: 57 | sess.run(init_op) 58 | y_preds = list() 59 | for step in range(num_steps): 60 | _, loss, y_pred = sess.run([train_op, loss_op, y_], feed_dict={x:x_values,y:y_values}) 61 | if (step%(num_steps/10)==0 and verbose): 62 | print(loss) 63 | y_preds.append(y_pred.squeeze()) 64 | 65 | return x_values.squeeze(), y_values.squeeze(), y_pred.squeeze(), loss 66 | 67 | ''' 68 | Experiment 2: Why are Deeper Networks Better? 69 | ''' 70 | class Experiment2(Experiment): 71 | def __init__(self): 72 | pass 73 | 74 | def run(self, 75 | n=16, 76 | n_hidden=[10], 77 | num_steps=15000, 78 | learning_rate = 0.003, 79 | verbose=False, 80 | recurrent=True): 81 | 82 | 83 | x_values = np.linspace(0,1-1/n,n).reshape(-1,1) 84 | y_values = np.resize([[0,1],[1,0]], (n,2)) 85 | 86 | tf.reset_default_graph() 87 | x = tf.placeholder(dtype="float", shape=[None,1]) 88 | y = tf.placeholder(dtype="float", shape=[None,2]) 89 | 90 | if recurrent: 91 | y_ = recurrent_multilayer_perceptron(x, num_input=1, num_output=2, num_nodes=n_hidden,activation=tf.nn.relu) 92 | else: 93 | y_ = multilayer_perceptron(x, num_input=1, num_output=2, num_nodes=n_hidden,bias=bias,activation=tf.nn.relu) 94 | 95 | 96 | n_params = np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.global_variables()]) 97 | #show_graph(tf.get_default_graph().as_graph_def()) 98 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 99 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 100 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 101 | 102 | #loss_op = tf.reduce_mean(tf.square(y_ - y)) 103 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 104 | train_op = optimizer.minimize(loss_op) 105 | init_op = tf.global_variables_initializer() 106 | 107 | with tf.Session() as sess: 108 | sess.run(init_op) 109 | for step in range(num_steps): 110 | x_batch, y_batch = random_batch(x_values, y_values) 111 | _, loss, y_pred = sess.run([train_op, loss_op, y_], feed_dict={x:x_batch,y:y_batch}) 112 | if (step%(num_steps/10)==0 and verbose): 113 | print(loss) 114 | 115 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:x_values,y:y_values}) 116 | 117 | return x_values.squeeze(), y_values.squeeze(), y_pred.squeeze(), loss, accuracy, n_params 118 | 119 | 120 | ''' 121 | Experiment 3: Does More Data Favor Deeper Neural Networks? 122 | ''' 123 | class Experiment3(Experiment): 124 | def __init__(self): 125 | pass 126 | 127 | def run(self, 128 | classifiers, 129 | d = 12, 130 | class_seps = [1], 131 | ns = np.logspace(2,4,10), 132 | iters = 3, 133 | covariance_scale = 1, 134 | test_size = 0.2, 135 | accuracy_on = 'test', 136 | recurrent=True): 137 | 138 | acc = np.zeros((len(ns),len(classifiers),iters)) 139 | n_max = int(np.max(ns)) 140 | 141 | for k in range(iters): 142 | 143 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n_max, 144 | d=d, 145 | class_seps=class_seps, 146 | covariance_scale=covariance_scale, 147 | test_size=test_size) 148 | for i, n in enumerate(ns): 149 | for j, clf in enumerate(classifiers): 150 | with warnings.catch_warnings(): 151 | warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge 152 | n_train = int(n*(1-test_size)) 153 | clf.fit(X_train[:n_train],y_train[:n_train]) #choose a subset of the training data 154 | if accuracy_on=='train': 155 | acc[i,j,k] = clf.score(X_train[:int(n*(1-test_size))],y_train[:int(n*(1-test_size))]) 156 | elif accuracy_on=='test': 157 | acc[i,j,k] = clf.score(X_test,y_test) 158 | else: 159 | raise ValueError("accuracy_on must be 'test' or 'train'") 160 | 161 | return acc 162 | 163 | 164 | ''' 165 | Experiment 4: Does Unbalanced Data Hurt Neural Networks? 166 | ''' 167 | class Experiment4(Experiment): 168 | 169 | def __init__(self): 170 | pass 171 | 172 | def run(self, 173 | d = 12, 174 | iters = 3, 175 | covariance_scale = 1, 176 | test_size = 0.2, 177 | resample=False, 178 | n = 1200, 179 | num_steps=500, 180 | learning_rate = 0.003, 181 | verbose=False, 182 | load_covs = None, 183 | classify_grid = False, 184 | hidden_layer_sizes=(100,100), 185 | ratios = [1]): 186 | 187 | acc_matrix = np.zeros((len(ratios),iters)) 188 | class_seps = [1/(i+1) for i in range(d)] 189 | clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes) 190 | saved_covs = [] 191 | 192 | counter = 0 193 | for k in range(iters): 194 | for r, ratio in enumerate(ratios): 195 | # load covariance matrices for reproducibility 196 | if load_covs is None: 197 | cov = None 198 | else: 199 | cov = load_covs[counter] 200 | counter += 1 201 | 202 | 203 | X_train, _, y_train, _, cov = Dataset.generate_mixture_of_gaussians(n=n, 204 | d=d, 205 | normalize_x=True, 206 | one_hot=True, 207 | class_seps=class_seps, 208 | covariance_scale=covariance_scale, 209 | test_size=0, 210 | cov = cov, 211 | class_ratio=ratio, 212 | resample=resample, 213 | return_covariance=True) 214 | saved_covs.append(cov) 215 | X_test, _, y_test, _ = Dataset.generate_mixture_of_gaussians(n=int(n/4), 216 | d=d, 217 | normalize_x = True, 218 | one_hot=True, 219 | class_seps=class_seps, 220 | covariance_scale=covariance_scale, 221 | test_size=0, 222 | cov=cov) 223 | 224 | 225 | 226 | 227 | tf.reset_default_graph() 228 | x = tf.placeholder(dtype="float", shape=[None,d]) 229 | y = tf.placeholder(dtype="float", shape=[None,2]) 230 | y_ = multilayer_perceptron(x, num_input=d, num_output=2, num_nodes=hidden_layer_sizes) 231 | 232 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_)) 233 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 234 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 235 | 236 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 237 | train_op = optimizer.minimize(cross_entropy) 238 | init_op = tf.global_variables_initializer() 239 | 240 | with tf.Session() as sess: 241 | sess.run(init_op) 242 | y_preds = list() 243 | for step in range(num_steps): 244 | _, loss, acc, cp = sess.run([train_op, cross_entropy, accuracy_op, correct_prediction], feed_dict={x:X_train,y:y_train}) 245 | if (step%(num_steps/10)==0 and verbose): 246 | print(loss, acc, cp) 247 | 248 | accuracy, y_pred = sess.run([accuracy_op, y_], feed_dict={x:X_test,y:y_test}) 249 | acc_matrix[r,k] = accuracy 250 | 251 | if classify_grid: 252 | xx, yy = np.meshgrid(np.arange(0, 1.02, 0.02), np.arange(0, 1.02, 0.02)) 253 | grid_points = np.c_[xx.ravel(), yy.ravel()] 254 | grid_preds = sess.run(y_, feed_dict={x:grid_points}) 255 | 256 | if classify_grid: 257 | return acc_matrix, saved_covs, X_train, y_train, X_test, y_test, y_pred, grid_preds 258 | return acc_matrix, saved_covs 259 | 260 | 261 | ''' 262 | 5. Are Neural Networks Memorizing Or Generalizing During Training? 263 | ''' 264 | class Experiment5(Experiment): 265 | 266 | def __init__(self): 267 | pass 268 | 269 | def run(self, 270 | d = 6, 271 | iters = 3, 272 | covariance_scale = 1, 273 | test_size = 0.2, 274 | class_seps = [1 for i in range(6)], 275 | ns = [500], 276 | return_accuracy_per_epoch=False, 277 | randomize=False, 278 | verbose=False, 279 | learning_rate = 0.003, 280 | num_steps=2500, 281 | hidden_layer_sizes=(100,100)): 282 | 283 | if return_accuracy_per_epoch: 284 | acc = np.zeros((10, len(ns),iters)) 285 | else: 286 | acc = np.zeros((len(ns),iters)) 287 | n_max = np.max(ns) 288 | 289 | for k in range(iters): 290 | X_train_, _, y_train_, _ = Dataset.generate_mixture_of_gaussians(n=n_max, 291 | d=d, 292 | class_seps=class_seps, 293 | covariance_scale=covariance_scale, 294 | one_hot=True, 295 | test_size=0) 296 | if randomize: 297 | y_train_ = np.random.permutation(y_train_) 298 | 299 | for n_i, n in enumerate(ns): 300 | step_multiple = 0 301 | tf.reset_default_graph() 302 | X_train = X_train_[:n]; y_train = y_train_[:n] 303 | x = tf.placeholder(dtype="float", shape=[None,d]) 304 | y = tf.placeholder(dtype="float", shape=[None,2]) 305 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2) 306 | 307 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 308 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 309 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 310 | 311 | #loss_op = tf.reduce_mean(tf.square(y_ - y)) 312 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 313 | train_op = optimizer.minimize(loss_op) 314 | init_op = tf.global_variables_initializer() 315 | 316 | with tf.Session() as sess: 317 | sess.run(init_op) 318 | for step in range(num_steps): 319 | x_batch, y_batch = random_batch(X_train, y_train) 320 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 321 | if (step%(num_steps/10)==0 and verbose): 322 | print(accuracy) 323 | if (step%(num_steps/10)==0 and return_accuracy_per_epoch): 324 | accuracy = sess.run(accuracy_op, feed_dict={x:X_train,y:y_train}) 325 | acc[step_multiple, n_i, k] = accuracy 326 | step_multiple += 1 327 | 328 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_train,y:y_train}) 329 | 330 | if not(return_accuracy_per_epoch): 331 | acc[n_i,k] = accuracy 332 | 333 | return acc 334 | 335 | 336 | ''' 337 | ## 6. Does Unsupervised Feature Reduction Help or Hurt? 338 | ''' 339 | class Experiment6(Experiment): 340 | 341 | def __init__(self): 342 | pass 343 | 344 | def run(self, 345 | d = 10, 346 | iters = 3, 347 | covariance_scale = 0.2, 348 | test_size = 0.2, 349 | n = 100, 350 | dummy_dims = [0], 351 | pca_dims = [None], 352 | verbose=False, 353 | noise_level = 0, 354 | learning_rate = 0.003, 355 | num_steps=500, 356 | hidden_layer_sizes=(100,100)): 357 | 358 | from scipy.stats import special_ortho_group 359 | 360 | class_seps = [1 for i in range(d)] 361 | acc = np.zeros((iters, len(dummy_dims),len(pca_dims))) 362 | 363 | for k in range(iters): 364 | X_train_, X_test_, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 365 | d=d, 366 | class_seps=class_seps, 367 | covariance_scale=covariance_scale, 368 | one_hot=True, 369 | test_size=test_size) 370 | 371 | for d_i, dummy_dim in enumerate(dummy_dims): 372 | X_train = np.concatenate((X_train_, noise_level*np.random.random(size=(X_train_.shape[0], dummy_dim))),axis=1); 373 | X_test = np.concatenate((X_test_, noise_level*np.random.random(size=(X_test_.shape[0], dummy_dim))),axis=1); 374 | 375 | rotation_matrix = np.random.random(size=(d+dummy_dim,d+dummy_dim)) 376 | X_train = X_train.dot(rotation_matrix) 377 | X_test = X_test.dot(rotation_matrix) 378 | 379 | 380 | for p_i, pca_dim in enumerate(pca_dims): 381 | pca = PCA(n_components = pca_dim) 382 | if not(pca_dim is None): 383 | X_train = pca.fit_transform(X_train) 384 | X_test = pca.transform(X_test) 385 | if pca_dim is None: 386 | pca_dim = d+dummy_dim 387 | 388 | tf.reset_default_graph() 389 | x = tf.placeholder(dtype="float", shape=[None,pca_dim]) 390 | y = tf.placeholder(dtype="float", shape=[None,2]) 391 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=pca_dim, num_output=2) 392 | 393 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 394 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 395 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 396 | 397 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 398 | train_op = optimizer.minimize(loss_op) 399 | init_op = tf.global_variables_initializer() 400 | 401 | with tf.Session() as sess: 402 | sess.run(init_op) 403 | for step in range(num_steps): 404 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:X_train,y:y_train}) 405 | if (step%(num_steps/10)==0 and verbose): 406 | print(accuracy) 407 | 408 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 409 | acc[k,d_i,p_i] = accuracy 410 | return acc 411 | 412 | 413 | ''' 414 | 7. Can Any Non-linearity Be Used As the Activation Function? 415 | ''' 416 | class Experiment7(Experiment): 417 | 418 | def __init__(self): 419 | pass 420 | 421 | def run(self, 422 | iters = 1, 423 | d = 2, 424 | test_size = 0.2, 425 | n = 500, 426 | noise = 0.1, 427 | verbose=False, 428 | activations = [tf.nn.sigmoid, tf.square], 429 | learning_rate = 0.003, 430 | num_steps=800, 431 | hidden_layer_sizes=(30,30)): 432 | 433 | 434 | acc = np.zeros((iters, 10, len(activations))) 435 | n_max = n 436 | 437 | for k in range(iters): 438 | X_train, X_test, y_train, y_test = Dataset.generate_moons(n=n_max, 439 | test_size=0.2, 440 | one_hot=True, 441 | noise=noise) 442 | 443 | for a_i, a in enumerate(activations): 444 | step_counter = 0 445 | tf.reset_default_graph() 446 | x = tf.placeholder(dtype="float", shape=[None,d]) 447 | y = tf.placeholder(dtype="float", shape=[None,2]) 448 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, activation=a) 449 | 450 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 451 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 452 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 453 | 454 | #loss_op = tf.reduce_mean(tf.square(y_ - y)) 455 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 456 | train_op = optimizer.minimize(loss_op) 457 | init_op = tf.global_variables_initializer() 458 | 459 | with tf.Session() as sess: 460 | sess.run(init_op) 461 | for step in range(num_steps): 462 | x_batch, y_batch = random_batch(X_train, y_train) 463 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 464 | if (step%(num_steps/10)==0): 465 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 466 | acc[k, step_counter, a_i] = accuracy 467 | step_counter += 1 468 | if verbose: 469 | print(accuracy) 470 | 471 | 472 | 473 | return acc 474 | 475 | ''' 476 | 8. How Does Batch Size Affect the Results? 477 | ''' 478 | class Experiment8(Experiment): 479 | 480 | def __init__(self): 481 | pass 482 | 483 | def run(self, 484 | d = 12, 485 | iters = 3, 486 | covariance_scale = 1, 487 | test_size = 0.2, 488 | n = 500, 489 | batch_sizes = [32], 490 | return_accuracy_per_epoch=False, 491 | verbose=False, 492 | learning_rate = 0.003, 493 | num_epochs=150, 494 | store_every=10, 495 | hidden_layer_sizes=(100,100)): 496 | 497 | class_seps = [1 for i in range(12)] 498 | timer = Timer() 499 | if return_accuracy_per_epoch: 500 | acc = np.zeros((int(num_epochs/store_every)-1, len(batch_sizes),iters)) 501 | else: 502 | acc = np.zeros((len(batch_sizes),iters)) 503 | 504 | runtimes = np.zeros((len(batch_sizes))) 505 | for k in range(iters): 506 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 507 | d=d, 508 | class_seps=class_seps, 509 | covariance_scale=covariance_scale, 510 | one_hot=True, 511 | test_size=test_size) 512 | for b_i, batch_size in enumerate(batch_sizes): 513 | timer.start() 514 | step_multiple = 0 515 | tf.reset_default_graph() 516 | x = tf.placeholder(dtype="float", shape=[None,d]) 517 | y = tf.placeholder(dtype="float", shape=[None,2]) 518 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2) 519 | 520 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 521 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 522 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 523 | 524 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 525 | train_op = optimizer.minimize(loss_op) 526 | init_op = tf.global_variables_initializer() 527 | 528 | with tf.Session() as sess: 529 | sess.run(init_op) 530 | num_steps = int(num_epochs*n/batch_size) 531 | store_acc_threshold = num_steps/num_epochs*store_every 532 | for step in range(num_steps): 533 | x_batch, y_batch = random_batch(X_train, y_train, size=batch_size) 534 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 535 | if (step%(num_steps/num_epochs)==0 and verbose): 536 | print(accuracy) 537 | if (step>store_acc_threshold and return_accuracy_per_epoch): 538 | accuracy = sess.run(accuracy_op, feed_dict={x:X_train,y:y_train}) 539 | acc[step_multiple, b_i, k] = accuracy 540 | step_multiple += 1 541 | store_acc_threshold += num_steps/num_epochs*store_every 542 | 543 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 544 | 545 | if not(return_accuracy_per_epoch): 546 | acc[b_i,k] = accuracy # otherwise, this is stored earlier 547 | runtimes[b_i] = timer.end() 548 | 549 | return runtimes, acc 550 | 551 | 552 | 553 | ''' 554 | 9. How Does the Loss Function Matter? 555 | ''' 556 | class Experiment9(Experiment): 557 | 558 | def __init__(self): 559 | pass 560 | 561 | def run(self, 562 | d = 12, 563 | iters = 1, 564 | covariance_scale = 1, 565 | test_size = 0.2, 566 | n = 500, 567 | randomize=False, 568 | verbose=False, 569 | loss_functions = ['cross_entropy', 'mean_squared_error'], 570 | learning_rate = 0.003, 571 | num_steps=500, 572 | hidden_layer_sizes=(100,100)): 573 | 574 | class_seps = [1/(i+1) for i in range(d)] 575 | acc = np.zeros((iters, 10, len(loss_functions))) 576 | n_max = n 577 | LOSS_FUNCTIONS = ['cross_entropy', 578 | 'mean_abs_error', 579 | 'mean_squared_error', 580 | 'mean_fourth_pow_error', 581 | 'hinge_loss', 582 | 'constant'] 583 | 584 | for k in range(iters): 585 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n_max, 586 | d=d, 587 | class_seps=class_seps, 588 | covariance_scale=covariance_scale, 589 | one_hot=True) 590 | if randomize: 591 | y_train_ = np.random.permutation(y_train_) 592 | 593 | 594 | for l_i, l in enumerate(loss_functions): 595 | step_counter = 0 596 | if not(l in LOSS_FUNCTIONS): 597 | raise ValueError("Valid loss functions are " + str(LOSS_FUNCTIONS)) 598 | 599 | tf.reset_default_graph() 600 | x = tf.placeholder(dtype="float", shape=[None,d]) 601 | y = tf.placeholder(dtype="float", shape=[None,2]) 602 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2) 603 | 604 | if l=='cross_entropy': 605 | loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)) 606 | elif l=='mean_squared_error': 607 | loss_op = tf.reduce_mean(tf.square(y_ - y)) 608 | elif l=='mean_abs_error': 609 | loss_op = tf.reduce_mean(tf.abs(y_ - y)) 610 | elif l=='hinge_loss': 611 | loss_op = tf.losses.hinge_loss(labels=y, logits=y_) 612 | elif l=='mean_fourth_pow_error': 613 | loss_op = tf.reduce_mean(tf.pow(y_ - y, 4)) 614 | elif l=='constant': 615 | loss_op = 0*tf.reduce_mean(tf.square(y_ - y)) 616 | 617 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 618 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 619 | 620 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 621 | train_op = optimizer.minimize(loss_op) 622 | init_op = tf.global_variables_initializer() 623 | 624 | with tf.Session() as sess: 625 | sess.run(init_op) 626 | for step in range(num_steps): 627 | x_batch, y_batch = random_batch(X_train, y_train) 628 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 629 | if (step%(num_steps/10)==0): 630 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 631 | acc[k, step_counter, l_i] = accuracy 632 | step_counter += 1 633 | if verbose: 634 | print(accuracy) 635 | 636 | return acc 637 | 638 | ''' 639 | 10. How Does the Initialization Affect Performance? 640 | ''' 641 | class Experiment10(Experiment): 642 | 643 | def __init__(self): 644 | pass 645 | 646 | def run(self, 647 | d = 12, 648 | iters = 1, 649 | covariance_scale = 1, 650 | test_size = 0.2, 651 | n = 500, 652 | randomize=False, 653 | verbose=False, 654 | initializers = [tf.contrib.layers.xavier_initializer()], 655 | learning_rate = 0.003, 656 | num_steps=500, 657 | hidden_layer_sizes=(100,100)): 658 | 659 | class_seps = [1/(i+1) for i in range(d)] 660 | acc = np.zeros((iters, 10, len(initializers))) 661 | 662 | for k in range(iters): 663 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 664 | d=d, 665 | class_seps=class_seps, 666 | covariance_scale=covariance_scale, 667 | one_hot=True) 668 | 669 | for i_i, initializer in enumerate(initializers): 670 | step_counter = 0 671 | tf.reset_default_graph() 672 | x = tf.placeholder(dtype="float", shape=[None,d]) 673 | y = tf.placeholder(dtype="float", shape=[None,2]) 674 | y_ = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, initializer=initializer) 675 | 676 | loss_op = tf.reduce_mean(tf.square(y_ - y)) 677 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 678 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 679 | 680 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 681 | train_op = optimizer.minimize(loss_op) 682 | init_op = tf.global_variables_initializer() 683 | 684 | with tf.Session() as sess: 685 | sess.run(init_op) 686 | for step in range(num_steps): 687 | x_batch, y_batch = random_batch(X_train, y_train) 688 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 689 | if (step%(num_steps/10)==0): 690 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 691 | acc[k, step_counter, i_i] = accuracy 692 | step_counter += 1 693 | if verbose: 694 | print(accuracy) 695 | 696 | accuracy, loss, y_pred = sess.run([accuracy_op, loss_op, y_], feed_dict={x:X_test,y:y_test}) 697 | 698 | 699 | return acc 700 | 701 | ''' 702 | 11. Do Weights in Different Layers Evolve At Different Speeds? 703 | ''' 704 | class Experiment11(Experiment): 705 | 706 | def __init__(self): 707 | pass 708 | 709 | def run(self, 710 | d = 12, 711 | covariance_scale = 1, 712 | test_size = 0.2, 713 | n = 500, 714 | store_every=2, 715 | randomize=False, 716 | verbose=False, 717 | learning_rate = 0.003, 718 | num_steps=500,): 719 | 720 | class_seps = [1/(i+1) for i in range(d)] 721 | hidden_layer_sizes=(50,50,50) 722 | weights = [] 723 | accs = [] 724 | 725 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 726 | d=d, 727 | class_seps=class_seps, 728 | covariance_scale=covariance_scale, 729 | one_hot=True) 730 | 731 | step_counter = 0 732 | 733 | tf.reset_default_graph() 734 | x = tf.placeholder(dtype="float", shape=[None,d]) 735 | y = tf.placeholder(dtype="float", shape=[None,2]) 736 | y_, wts = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, return_weight_tensors=True) 737 | 738 | loss_op = tf.reduce_mean(tf.square(y_ - y)) 739 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 740 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 741 | 742 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 743 | train_op = optimizer.minimize(loss_op) 744 | init_op = tf.global_variables_initializer() 745 | 746 | with tf.Session() as sess: 747 | sess.run(init_op) 748 | for step in range(num_steps): 749 | x_batch, y_batch = random_batch(X_train, y_train) 750 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 751 | if (step%2==0): 752 | accuracy, w0, w1, w2, w3 = sess.run([accuracy_op, wts[0], wts[1], wts[2], wts[3]], feed_dict={x:X_test,y:y_test}) 753 | weights.append([w0, w1, w2, w3]) 754 | accs.append(accuracy) 755 | 756 | if verbose: 757 | print(accuracy) 758 | 759 | return weights, accs 760 | 761 | ''' 762 | 12. How Does Regularization Affect Weight Evolution? 763 | ''' 764 | class Experiment12(Experiment): 765 | 766 | def __init__(self): 767 | pass 768 | 769 | def run(self, 770 | d = 12, 771 | covariance_scale = 1, 772 | test_size = 0.2, 773 | n = 500, 774 | regularization_type = 'L2', 775 | regularization_strength = 0, 776 | store_every=2, 777 | randomize=False, 778 | verbose=False, 779 | learning_rate = 0.003, 780 | num_steps=500,): 781 | 782 | class_seps = [1/(i+1) for i in range(d)] 783 | hidden_layer_sizes=(50,50) 784 | weights = [] 785 | accs = [] 786 | 787 | X_train, X_test, y_train, y_test = Dataset.generate_mixture_of_gaussians(n=n, 788 | d=d, 789 | class_seps=class_seps, 790 | covariance_scale=covariance_scale, 791 | one_hot=True) 792 | 793 | step_counter = 0 794 | 795 | tf.reset_default_graph() 796 | x = tf.placeholder(dtype="float", shape=[None,d]) 797 | y = tf.placeholder(dtype="float", shape=[None,2]) 798 | y_, wts = multilayer_perceptron(x, num_nodes=hidden_layer_sizes, num_input=d, num_output=2, return_weight_tensors=True) 799 | 800 | if regularization_type=='L2': 801 | loss_op = tf.reduce_mean(tf.square(y_ - y)) + regularization_strength*(tf.reduce_mean(tf.square(wts[0])) + tf.reduce_mean(tf.square(wts[1])) + tf.reduce_mean(tf.square(wts[2]))) 802 | elif regularization_type=='L1': 803 | loss_op = tf.reduce_mean(tf.square(y_ - y)) + regularization_strength*(tf.reduce_mean(tf.abs(wts[0])) + tf.reduce_mean(tf.abs(wts[1])) + tf.reduce_mean(tf.abs(wts[2]))) 804 | else: 805 | raise ValueError("regularization_type must be 'L1' or 'L2'") 806 | 807 | correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 808 | accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) 809 | 810 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 811 | train_op = optimizer.minimize(loss_op) 812 | init_op = tf.global_variables_initializer() 813 | 814 | with tf.Session() as sess: 815 | sess.run(init_op) 816 | for step in range(num_steps): 817 | x_batch, y_batch = random_batch(X_train, y_train) 818 | _, accuracy, y_pred = sess.run([train_op, accuracy_op, y_], feed_dict={x:x_batch,y:y_batch}) 819 | if (step%2==0): 820 | accuracy, w0, w1, w2 = sess.run([accuracy_op, wts[0], wts[1], wts[2]], feed_dict={x:X_test,y:y_test}) 821 | weights.append([w0, w1, w2]) 822 | accs.append(accuracy) 823 | 824 | if verbose: 825 | print(accuracy) 826 | 827 | return weights, accs 828 | -------------------------------------------------------------------------------- /ise.csv: -------------------------------------------------------------------------------- 1 | 0.035753708 2 | 0.025425873 3 | -0.02886173 4 | -0.062208079 5 | 0.009859905 6 | -0.029191028 7 | 0.015445348 8 | -0.041167612 9 | 0.000661905 10 | 0.022037345 11 | -0.022692465 12 | -0.013708704 13 | 0.000864697 14 | -0.00381506 15 | 0.00566126 16 | 0.046831302 17 | -0.006634978 18 | 0.034566982 19 | -0.020528213 20 | -0.008776701 21 | -0.025919141 22 | 0.015279487 23 | 0.018577796 24 | -0.014132879 25 | 0.036607044 26 | 0.011353209 27 | -0.040542021 28 | -0.022105644 29 | -0.014888368 30 | 0.007026745 31 | -0.011494996 32 | -0.041136038 33 | -0.002631499 34 | 0.024654643 35 | -0.03584061 36 | 0.017303168 37 | 0.001725406 38 | 0.004975853 39 | 0.000671759 40 | -0.005891895 41 | -0.013689039 42 | 0.002192959 43 | 0.007913215 44 | -0.03852223 45 | 0.007958798 46 | -0.007133473 47 | 0.011234009 48 | -0.001410361 49 | 0.010974424 50 | 0.003213253 51 | 0.000214245 52 | -0.00711875 53 | 0.001891803 54 | 0.019874248 55 | 0.002918699 56 | 0.035968063 57 | 0.003298424 58 | 0.021165071 59 | -0.004968387 60 | 0.011247875 61 | -0.021780194 62 | 0.024406595 63 | 0.006913411 64 | 0.031401768 65 | -0.005247358 66 | 0.000735712 67 | -0.010297613 68 | 0.047238893 69 | 0.03177774 70 | 0.010170736 71 | -0.013661316 72 | -0.010111415 73 | 0.0057888 74 | 0.041454961 75 | -0.002617982 76 | -0.030954776 77 | 0.004548211 78 | 0.009655946 79 | 0.024517037 80 | -0.004162916 81 | -0.012367953 82 | 0.05198032 83 | 0.029022746 84 | 0.016261076 85 | 0.025882269 86 | 0.021061108 87 | -0.026273031 88 | -0.001123867 89 | -0.018541411 90 | 0.025756738 91 | -0.006508992 92 | 0.008070571 93 | 0.011703446 94 | 0.00540756 95 | 0.042840743 96 | -0.011999815 97 | 0.008430531 98 | 0.011166484 99 | -0.015572642 100 | 0.008801231 101 | -0.004209168 102 | -0.000514681 103 | 0.028131284 104 | -0.007792426 105 | -0.022362446 106 | -0.019171953 107 | 0.013947094 108 | -0.032005875 109 | 0.004478365 110 | 0.017089104 111 | 0.00967985 112 | 0.002670961 113 | -0.003623787 114 | -0.004441286 115 | 0.001688065 116 | 0.031866012 117 | 0.002917994 118 | -0.011589253 119 | -0.014168795 120 | 0.024046363 121 | 0.015786402 122 | 0.011719598 123 | 0.002315584 124 | 0.002929655 125 | 0.007996801 126 | -0.005590715 127 | -0.006518816 128 | -0.00105307 129 | 0.012265455 130 | -0.001519732 131 | -0.004539036 132 | -0.005617343 133 | 0.006074147 134 | -0.008232237 135 | 0.032511512 136 | 0.006370441 137 | -0.011323604 138 | 0.013780465 139 | -0.006357127 140 | 0.004758472 141 | 0.018495281 142 | 0.012603659 143 | 0.011235934 144 | 0.004036218 145 | 0.009330697 146 | 0.043744798 147 | 0.010826132 148 | 0.045219554 149 | 0.001853539 150 | -0.017993859 151 | -0.001445482 152 | 0.021028135 153 | -0.001832467 154 | -0.02278477 155 | 0.004813828 156 | 0.013453937 157 | -0.004084155 158 | -0.000271351 159 | -0.001299983 160 | 0.031414816 161 | 0.019757306 162 | 0.013108203 163 | 0.012676973 164 | 0.006291416 165 | -0.023435627 166 | 0.008987423 167 | 0.003705789 168 | -0.021432986 169 | 0.008224308 170 | -0.02534402 171 | 0.008378078 172 | -0.021587901 173 | 0.002509442 174 | -0.009497407 175 | -0.005834448 176 | 0.007574225 177 | 0.036271557 178 | -0.001994969 179 | -0.012536673 180 | 0.016615187 181 | 0.006084621 182 | 0.004157378 183 | 0.008195123 184 | 0.009023651 185 | -0.007433515 186 | 0.004001644 187 | 0.011414049 188 | -0.009442384 189 | -0.002213036 190 | -0.020018346 191 | 0.023026511 192 | 0.031160934 193 | -0.008730222 194 | 0.01706096 195 | 0.006746257 196 | 0.015520491 197 | -0.01843108 198 | 0.018883892 199 | -0.000160911 200 | -0.024346941 201 | 0.009915384 202 | 0.01507818 203 | 0.004774399 204 | -0.005441211 205 | 0.007105868 206 | -0.002934588 207 | -0.01632457 208 | -0.030082971 209 | -0.035849614 210 | 0.005735384 211 | -0.02390659 212 | 0.020219135 213 | 0.000351028 214 | -0.006962099 215 | 0.016985685 216 | 0.00751899 217 | 0.015029496 218 | -0.002417171 219 | -0.006258617 220 | 0.00685869 221 | -0.031914469 222 | -0.017581763 223 | -0.006598214 224 | -0.019347799 225 | 0.012533508 226 | -0.00630611 227 | 0.00055265 228 | 0.011903098 229 | 0.038612983 230 | 0.036468359 231 | 0.008514453 232 | -0.003723744 233 | -0.006519958 234 | -0.008229144 235 | 0.008292258 236 | -0.004204453 237 | 0.016307467 238 | 0.004990278 239 | 0.007262631 240 | -0.016948929 241 | 0.003501244 242 | 0.022530184 243 | 0.004894702 244 | -0.007211305 245 | 0.00581665 246 | 0.003891123 247 | -0.000811768 248 | 0.00322285 249 | -0.002274045 250 | 0.022138372 251 | 0.010229371 252 | 0.013898022 253 | 0.007956979 254 | 0.007771749 255 | -0.003189192 256 | -0.016130747 257 | -0.00454863 258 | 0.017559249 259 | 0.00207392 260 | -0.013516994 261 | 0.010044257 262 | 0.011097874 263 | 0.00559711 264 | -0.003033665 265 | -0.023856682 266 | 0.005236694 267 | 0.000671833 268 | -0.004547723 269 | 0.012852447 270 | -0.002190987 271 | 0.015891732 272 | -0.006837607 273 | -0.001432456 274 | -0.029575211 275 | -0.038300784 276 | -0.030015785 277 | 0.017725401 278 | -0.00510604 279 | 0.015960155 280 | -0.00619301 281 | 0.005285259 282 | 0.008033964 283 | 0.029039821 284 | -0.015578937 285 | 0.016435847 286 | -0.013711428 287 | -0.02262045 288 | -0.03476938 289 | -0.018702566 290 | 0.019627036 291 | 0.029306318 292 | 0.005696108 293 | 0.00432159 294 | -0.005009524 295 | 0.022773381 296 | 0.01266815 297 | -0.006335257 298 | -0.003915514 299 | -0.007161671 300 | -0.002744684 301 | -0.005021298 302 | 0.023990399 303 | 0.019908239 304 | -0.014267377 305 | -0.00182161 306 | 0.008486639 307 | 0.008486611 308 | 0.014337851 309 | 0.034020727 310 | -0.007637163 311 | 0.0010159 312 | 0.001520156 313 | -0.00381384 314 | 0.020479928 315 | 0.006075096 316 | 0.010027432 317 | -0.004077191 318 | 0.004087933 319 | -0.003990517 320 | 0.013150996 321 | -0.006163386 322 | -0.01276782 323 | 0.021378817 324 | -0.004650074 325 | -0.014181169 326 | -0.001211584 327 | 0.012526916 328 | -0.00741867 329 | -0.001926377 330 | 0.012008849 331 | -0.014084423 332 | -0.004895555 333 | 0.013199043 334 | 0.004356217 335 | -0.009524497 336 | -0.018303062 337 | -0.015818452 338 | -0.012077182 339 | -0.056752612 340 | 0.068951684 341 | 0.000243026 342 | 0.02647418 343 | -0.000807723 344 | -0.038389961 345 | 0.005780967 346 | 0.016049266 347 | -0.045388971 348 | -0.006232065 349 | 0.008218708 350 | -0.043092096 351 | 0.034723686 352 | 0.007264208 353 | 0.013409826 354 | -0.015485635 355 | 0.00138582 356 | 0.013716279 357 | 0.003520353 358 | -0.014439062 359 | -0.005624349 360 | -0.011897225 361 | 0.011865737 362 | 0.010203012 363 | 0.004905756 364 | 0.01005563 365 | 0.013067872 366 | -0.000326348 367 | 0.008616579 368 | 0.00251849 369 | 0.004246468 370 | -0.004728047 371 | -0.019495737 372 | 0.000166157 373 | -0.000920201 374 | 0.010131759 375 | -0.021815769 376 | -0.006830443 377 | -0.005580513 378 | 0.020009067 379 | 0.001419823 380 | 0.019062249 381 | 0.000190164 382 | 0.009585147 383 | -0.003400806 384 | 0.007303681 385 | 0.012237341 386 | 0.000388746 387 | -0.010542271 388 | 0.000683248 389 | 0.006648115 390 | 0.001949078 391 | 0.021523653 392 | 0.007114932 393 | -0.010400281 394 | 0.003504 395 | 0.013706542 396 | 0.001248869 397 | 0.005496886 398 | -0.014436418 399 | 0.013711065 400 | -0.010180284 401 | -0.003298125 402 | -0.004645658 403 | 0.002511476 404 | 0.002217177 405 | -0.012028845 406 | -0.017302052 407 | 0.003930899 408 | 0.001645509 409 | 0.005847453 410 | 0.01082305 411 | -0.000327821 412 | -0.010043636 413 | -0.001691507 414 | 0.002389147 415 | -0.004548585 416 | -0.0057746 417 | 0.013165613 418 | 0.004862007 419 | 0.010504082 420 | 0.011817165 421 | 0.002965501 422 | 0.002184557 423 | 0.00167273 424 | -0.005813995 425 | -0.002285365 426 | 0.026892714 427 | 0.003509248 428 | 0.014597561 429 | 0.00527176 430 | 0.002025474 431 | 0.006672435 432 | -0.002567317 433 | 0.005511668 434 | -0.004453821 435 | 0.000761169 436 | 0.005987659 437 | 0.014784496 438 | -0.000587001 439 | 0.003395986 440 | -0.013648916 441 | 0.008169493 442 | 0.013186098 443 | 0.008959157 444 | -0.001807411 445 | 0.006841092 446 | 0.026176163 447 | 0.009736797 448 | 0.007033826 449 | -0.013504077 450 | 0.012567728 451 | 0.005076448 452 | -0.008827632 453 | -0.006813227 454 | 0.015531853 455 | 0.00786829 456 | -0.001169304 457 | -0.002500309 458 | -0.030981288 459 | 0.007058143 460 | -0.008025245 461 | 0.001202099 462 | 0.032986428 463 | -0.001781987 464 | 0.002284926 465 | 0.00845291 466 | -0.013824293 467 | -0.006769931 468 | -0.007617876 469 | 0.006377468 470 | -0.030041874 471 | -0.023701804 472 | 0.013406293 473 | -0.001218191 474 | -0.01502137 475 | -0.031880777 476 | 0.019760499 477 | 0.012248015 478 | 0.011764491 479 | -0.001179229 480 | 0.012661171 481 | 0.002380343 482 | -0.002490964 483 | -0.026805079 484 | -0.017676197 485 | 0.024729439 486 | 0.001945517 487 | -0.015310023 488 | -0.016466636 489 | -0.014150672 490 | -0.000355833 491 | 0.020543856 492 | 0.00952841 493 | 0.011830075 494 | 0.000730038 495 | 0.00134576 496 | 0.001285406 497 | 0.004171479 498 | 0.001534651 499 | -0.012311844 500 | 0.024002222 501 | 0.009040201 502 | 0.008859545 503 | 0.006982356 504 | -0.007835122 505 | -0.013108757 506 | -0.003035482 507 | 0.003760527 508 | 0.009546783 509 | -0.010199048 510 | -0.015563411 511 | -0.006049411 512 | 0.000520727 513 | -0.017835035 514 | 0.00974406 515 | -0.011067076 516 | -0.00074888 517 | 0.012326085 518 | -0.014082114 519 | -0.028498178 520 | 0.001056216 521 | 0.024115793 522 | 0.007446755 523 | -0.02447813 524 | 0.02450661 525 | -0.0061961 526 | 0.005355587 527 | 0.004822987 528 | -0.017664432 529 | 0.004782286 530 | -0.002497928 531 | 0.003606376 532 | 0.008599056 533 | 0.009310309 534 | 0.000190969 535 | -0.013069043 536 | -0.007246324 537 | -------------------------------------------------------------------------------- /uci_utils.py: -------------------------------------------------------------------------------- 1 | from sklearn.neural_network import MLPClassifier 2 | from sklearn.neighbors import KNeighborsClassifier 3 | from sklearn.svm import SVC 4 | from sklearn.gaussian_process import GaussianProcessClassifier 5 | from sklearn.tree import DecisionTreeClassifier 6 | from sklearn.ensemble import RandomForestClassifier 7 | from sklearn.naive_bayes import GaussianNB 8 | from sklearn.model_selection import train_test_split 9 | import pandas as pd, numpy as np 10 | import warnings 11 | from IPython.display import Markdown, display 12 | 13 | 14 | class UCI_Dataset_Loader(): 15 | @classmethod 16 | def adult(cls): 17 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data" 18 | data=pd.read_csv(url, header=None, ) 19 | features = data.iloc[:,:-1] 20 | features = pd.get_dummies(features) 21 | labels = data.iloc[:,-1] 22 | labels = labels.astype('category').cat.codes 23 | return features, labels 24 | 25 | @classmethod 26 | def car(cls): 27 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data" 28 | data=pd.read_csv(url, header=None, ) 29 | features = data.iloc[:,:-1] 30 | features = pd.get_dummies(features) 31 | labels = data.iloc[:,-1] 32 | labels = labels.astype('category').cat.codes 33 | return features, labels 34 | 35 | @classmethod 36 | def credit_default(cls): 37 | try: 38 | import xlrd 39 | except: 40 | raise ImportError("To load this dataset, you need the library 'xlrd'. Try installing: pip install xlrd") 41 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls" 42 | data=pd.read_excel(url, header=1) 43 | features = data.iloc[:,:-1] 44 | features = pd.get_dummies(features) 45 | labels = data.iloc[:,-1] 46 | labels = labels.astype('category').cat.codes 47 | return features, labels 48 | 49 | @classmethod 50 | def dermatology(cls): 51 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data" 52 | data=pd.read_csv(url, header=None, ) 53 | features = data.iloc[:,1:] 54 | features = pd.get_dummies(features) 55 | labels = data.iloc[:,0] 56 | labels = labels.astype('category').cat.codes 57 | return features, labels 58 | 59 | @classmethod 60 | def diabetic_retinopathy(cls): 61 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00329/messidor_features.arff" 62 | data=pd.read_csv(url, skiprows=24, header=None) 63 | features = data.iloc[:,:-1] 64 | features = pd.get_dummies(features) 65 | labels = data.iloc[:,-1] 66 | labels = labels.astype('category').cat.codes 67 | return features, labels 68 | 69 | @classmethod 70 | def ecoli(cls): 71 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/ecoli/ecoli.data" 72 | data=pd.read_csv(url, header=None, sep='\s+') 73 | features = data.iloc[:,1:-1] 74 | features = pd.get_dummies(features) 75 | labels = data.iloc[:,-1] 76 | labels = labels.astype('category').cat.codes 77 | return features, labels 78 | 79 | @classmethod 80 | def eeg_eyes(cls): 81 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00264/EEG%20Eye%20State.arff" 82 | data=pd.read_csv(url, skiprows=19, header=None, sep=',') 83 | features = data.iloc[:,:-1] 84 | features = pd.get_dummies(features) 85 | labels = data.iloc[:,-1] 86 | labels = labels.astype('category').cat.codes 87 | return features, labels 88 | 89 | @classmethod 90 | def haberman(cls): 91 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data" 92 | data=pd.read_csv(url, skiprows=0, header=None, sep=',') 93 | features = data.iloc[:,:-1] 94 | features = pd.get_dummies(features) 95 | labels = data.iloc[:,-1] 96 | labels = labels.astype('category').cat.codes 97 | return features, labels 98 | 99 | @classmethod 100 | def ionosphere(cls): 101 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data" 102 | data=pd.read_csv(url, skiprows=0, header=None, sep=',') 103 | features = data.iloc[:,:-1] 104 | features = pd.get_dummies(features) 105 | labels = data.iloc[:,-1] 106 | labels = labels.astype('category').cat.codes 107 | return features, labels 108 | 109 | @classmethod 110 | def ionosphere(cls): 111 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data" 112 | data=pd.read_csv(url, skiprows=0, header=None, sep=',') 113 | features = data.iloc[:,:-1] 114 | features = pd.get_dummies(features) 115 | labels = data.iloc[:,-1] 116 | labels = labels.astype('category').cat.codes 117 | return features, labels 118 | 119 | @classmethod 120 | def mice_protein(cls): 121 | try: 122 | import xlrd 123 | except: 124 | raise ImportError("To load this dataset, you need the library 'xlrd'. Try installing: pip install xlrd") 125 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00342/Data_Cortex_Nuclear.xls" 126 | data=pd.read_excel(url, header=0, na_values=['', ' ']) 127 | features = data.iloc[:,1:-4] 128 | features = features.fillna(value=0) 129 | features = pd.get_dummies(features) 130 | labels = data.iloc[:,-1] 131 | labels = labels.astype('category').cat.codes 132 | return features, labels 133 | 134 | @classmethod 135 | def nursery(cls): 136 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.data" 137 | data=pd.read_csv(url, header=0) 138 | features = data.iloc[:,:-1] 139 | features = pd.get_dummies(features) 140 | labels = data.iloc[:,-1] 141 | labels = labels.astype('category').cat.codes 142 | return features, labels 143 | 144 | @classmethod 145 | def seeds(cls): 146 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00236/seeds_dataset.txt" 147 | data=pd.read_csv(url, header=0, sep='\s+') 148 | features = data.iloc[:,:-1] 149 | features = pd.get_dummies(features) 150 | labels = data.iloc[:,-1] 151 | labels = labels.astype('category').cat.codes 152 | return features, labels 153 | 154 | @classmethod 155 | def seismic(cls): 156 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00266/seismic-bumps.arff" 157 | data=pd.read_csv(url, skiprows=154, header=0, sep=',') 158 | features = data.iloc[:,:-1] 159 | features = pd.get_dummies(features) 160 | labels = data.iloc[:,-1] 161 | labels = labels.astype('category').cat.codes 162 | return features, labels 163 | 164 | @classmethod 165 | def soybean(cls): 166 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/soybean/soybean-small.data" 167 | data=pd.read_csv(url, skiprows=0, header=0, sep=',') 168 | features = data.iloc[:,:-1] 169 | features = pd.get_dummies(features) 170 | labels = data.iloc[:,-1] 171 | labels = labels.astype('category').cat.codes 172 | return features, labels 173 | 174 | @classmethod 175 | def teaching_assistant(cls): 176 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/tae/tae.data" 177 | data=pd.read_csv(url, skiprows=0, header=0, sep=',') 178 | features = data.iloc[:,:-1] 179 | features = pd.get_dummies(features) 180 | labels = data.iloc[:,-1] 181 | labels = labels.astype('category').cat.codes 182 | return features, labels 183 | 184 | @classmethod 185 | def tic_tac_toe(cls): 186 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/tic-tac-toe/tic-tac-toe.data" 187 | data=pd.read_csv(url, skiprows=0, header=0, sep=',') 188 | features = data.iloc[:,:-1] 189 | features = pd.get_dummies(features) 190 | labels = data.iloc[:,-1] 191 | labels = labels.astype('category').cat.codes 192 | return features, labels 193 | 194 | @classmethod 195 | def website_phishing(cls): 196 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00379/PhishingData.arff" 197 | data=pd.read_csv(url, skiprows=14, header=None, sep=',') 198 | features = data.iloc[:,:-1] 199 | features = pd.get_dummies(features) 200 | labels = data.iloc[:,-1] 201 | labels = labels.astype('category').cat.codes 202 | return features, labels 203 | 204 | @classmethod 205 | def wholesale_customers(cls): 206 | url="https://archive.ics.uci.edu/ml/machine-learning-databases/00292/Wholesale%20customers%20data.csv" 207 | data=pd.read_csv(url, skiprows=0, header=0, sep=',') 208 | features = data.iloc[:,2:] 209 | features = pd.get_dummies(features) 210 | labels = data.iloc[:,1] 211 | labels = labels.astype('category').cat.codes 212 | return features, labels 213 | 214 | 215 | 216 | classifiers = [ 217 | SVC(), 218 | GaussianNB(), 219 | DecisionTreeClassifier(), 220 | RandomForestClassifier(), 221 | MLPClassifier(hidden_layer_sizes=(100)), 222 | MLPClassifier(hidden_layer_sizes=(100,100)), 223 | MLPClassifier(hidden_layer_sizes=(100,100,100)),] 224 | 225 | names = [ 226 | 'Support Vector', 227 | 'Naive Bayes', 228 | 'Decision Tree', 229 | 'Random Forests', 230 | '1-layer NN', 231 | '2-layer NN', 232 | '3-layer NN', 233 | ] 234 | 235 | def print_stats(X_train, X_test, y_train, y_test): 236 | string = "Training set size: " + str(X_train.shape) + ", Test set size: " + str(X_test.shape) + ", \# of classes: " + str(len(np.unique(y_train))) 237 | display(Markdown(string)) 238 | 239 | def print_best(scores): 240 | eps = 1e-3 241 | best = np.max(scores) 242 | indices = np.where(scores > best - eps)[1] 243 | string = 'Best classifier: **' 244 | for i, idx in enumerate(indices): 245 | if i > 0: 246 | string += ', ' 247 | string += names[idx] 248 | string += '**' 249 | display(Markdown(string)) 250 | 251 | all_data = list() 252 | 253 | def compute_test_accuracies(X, y, train_size=0.8, verbose=1, append=True, iters=3): 254 | scores = np.zeros((iters,len(classifiers))) 255 | for i in range(iters): 256 | with warnings.catch_warnings(): 257 | warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge 258 | X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=train_size) 259 | if verbose>=1 and i==0: 260 | print_stats(X_train, X_test, y_train, y_test) 261 | for c, clf in enumerate(classifiers): 262 | if verbose>=2: 263 | print(names[c]) 264 | with warnings.catch_warnings(): 265 | warnings.simplefilter('ignore') #MLP throws annoying errors whenever it doesn't fully converge 266 | clf.fit(X_train, y_train) 267 | score = clf.score(X_test, y_test) 268 | scores[i,c] = score 269 | scores = np.mean(scores,axis=0).reshape(1,-1) 270 | if append: 271 | n, d = X.shape 272 | c = len(np.unique(y)) 273 | all_data.append(np.concatenate([[[n, d, c]], scores], axis=1)) 274 | return scores 275 | 276 | def highlight_max(s): 277 | ''' 278 | highlight the maximum in a Series yellow. 279 | ''' 280 | eps = 1e-3 281 | best = s.max() 282 | return ['background-color: #5fba7d' if v>best-eps else '' for v in s] 283 | 284 | def highlight_max_excluding_first_three(s): 285 | ''' 286 | highlight the maximum in a Series yellow. 287 | ''' 288 | eps = 1e-3 289 | best = s[3:].max() 290 | return ['background-color: #5fba7d' if (v>best-eps and i>3) else '' for i, v in enumerate(s)] 291 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from IPython.display import clear_output, Image, display, HTML 4 | import warnings 5 | import time 6 | 7 | def strip_consts(graph_def, max_const_size=32): 8 | """Strip large constant values from graph_def.""" 9 | strip_def = tf.GraphDef() 10 | for n0 in graph_def.node: 11 | n = strip_def.node.add() 12 | n.MergeFrom(n0) 13 | if n.op == 'Const': 14 | tensor = n.attr['value'].tensor 15 | size = len(tensor.tensor_content) 16 | if size > max_const_size: 17 | tensor.tensor_content = ""%size 18 | return strip_def 19 | 20 | def show_graph(graph_def, max_const_size=32): 21 | """Visualize TensorFlow graph.""" 22 | if hasattr(graph_def, 'as_graph_def'): 23 | graph_def = graph_def.as_graph_def() 24 | strip_def = strip_consts(graph_def, max_const_size=max_const_size) 25 | code = """ 26 | 31 | 32 |
33 | 34 |
35 | """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand())) 36 | 37 | iframe = """ 38 | 39 | """.format(code.replace('"', '"')) 40 | display(HTML(iframe)) 41 | 42 | 43 | class Timer(): 44 | def __init__(self): 45 | pass 46 | def start(self): 47 | self.time = time.time() 48 | def end(self): 49 | return time.time() - self.time 50 | def end_and_print(self): 51 | print("Time needed to run experiment:",np.round(time.time()-self.time,3),"s") 52 | def end_and_md_print(self): 53 | from IPython.display import Markdown, display 54 | string = "Time needed to run experiment: " + str(np.round(time.time()-self.time,3)) + " s" 55 | display(Markdown(string)) 56 | 57 | 58 | ## 59 | 60 | import matplotlib.pyplot as plt 61 | 62 | def draw_neural_net(ax, left, right, bottom, top, layer_sizes): 63 | ''' 64 | Credit: https://gist.github.com/craffel/2d727968c3aaebd10359 65 | Draw a neural network cartoon using matplotilb. 66 | 67 | :usage: 68 | >>> fig = plt.figure(figsize=(12, 12)) 69 | >>> draw_neural_net(fig.gca(), .1, .9, .1, .9, [4, 7, 2]) 70 | 71 | :parameters: 72 | - ax : matplotlib.axes.AxesSubplot 73 | The axes on which to plot the cartoon (get e.g. by plt.gca()) 74 | - left : float 75 | The center of the leftmost node(s) will be placed here 76 | - right : float 77 | The center of the rightmost node(s) will be placed here 78 | - bottom : float 79 | The center of the bottommost node(s) will be placed here 80 | - top : float 81 | The center of the topmost node(s) will be placed here 82 | - layer_sizes : list of int 83 | List of layer sizes, including input and output dimensionality 84 | ''' 85 | 86 | n_layers = len(layer_sizes) 87 | v_spacing = (top - bottom)/float(max(layer_sizes)) 88 | h_spacing = (right - left)/float(len(layer_sizes) - 1) 89 | # Nodes 90 | for n, layer_size in enumerate(layer_sizes): 91 | layer_top = v_spacing*(layer_size - 1)/2. + (top + bottom)/2. 92 | for m in range(layer_size): 93 | circle = plt.Circle((n*h_spacing + left, layer_top - m*v_spacing), v_spacing/4., 94 | color='w', ec='k', zorder=4) 95 | ax.add_artist(circle) 96 | # Edges 97 | for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): 98 | layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2. 99 | layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2. 100 | for m in range(layer_size_a): 101 | for o in range(layer_size_b): 102 | line = plt.Line2D([n*h_spacing + left, (n + 1)*h_spacing + left], 103 | [layer_top_a - m*v_spacing, layer_top_b - o*v_spacing], c='k') 104 | ax.add_artist(line) 105 | 106 | 107 | ## 108 | 109 | ### COMMON ANALYTICAL FUNCTIONS ### 110 | 111 | def random_batch(x_values, y_values,size=64): 112 | assert x_values.shape[0]==y_values.shape[0] 113 | n = x_values.shape[0] 114 | indices = np.random.permutation(n)[:size] 115 | return x_values[indices], y_values[indices] 116 | 117 | def random_values(): 118 | def random_functions(x_values): 119 | n, d = x_values.shape 120 | return np.random.normal(0,1,n) 121 | return random_functions 122 | 123 | 124 | def sigmoid(x): 125 | return 1 / (1 + np.exp(-x)) 126 | 127 | def sigmoid_of_sigmoid(): 128 | def sigmoid_of_sigmoid_function(x_values): 129 | y_values = sigmoid(sigmoid(x_values[:,0]+x_values[:,1])+sigmoid(x_values[:,2]+x_values[:,3])) 130 | return y_values 131 | return sigmoid_of_sigmoid_function 132 | 133 | def polynomial_composition(power=2): 134 | def polynomial_composition_function(x_values): 135 | n, d = x_values.shape 136 | x_values = np.add.reduceat(x_values, axis=1, indices=range(0,d,2)) #adds adjacent columns together 137 | x_values = x_values**power 138 | n, d = x_values.shape 139 | x_values = np.add.reduceat(x_values, axis=1, indices=range(0,d,2)) #adds adjacent columns together 140 | x_values = x_values**power 141 | return np.sum(x_values,axis=1) 142 | return polynomial_composition_function 143 | 144 | def polynomial_to_power(power=2): 145 | from scipy.misc import factorial 146 | def polynomial_to_power_function(x_values): 147 | return 1/factorial(power)*np.power(np.sum(x_values, axis=1),power) 148 | return polynomial_to_power_function 149 | 150 | def sin(omega=6): 151 | def sin_function(x_values): 152 | return np.sin(omega*x_values) 153 | return sin_function 154 | 155 | def polynomial(coefs=[1,1,1]): 156 | def polynomial_function(x_values): 157 | return np.polynomial.polynomial.polyval(x_values,coefs) 158 | return polynomial_function 159 | 160 | def sparse_trig(): 161 | def sparse_trig_function(x_values): 162 | return 2*(2*np.cos(x_values)**2-1)**2-1 163 | return sparse_trig_function 164 | 165 | ### END COMMON FUNCTIONS ### 166 | 167 | ''' 168 | Takes the dataset and maps each column to be between 0 and 1 169 | ''' 170 | def normalize(array): 171 | if array.ndim>1: 172 | return (array - array.min(axis=0)) / array.ptp(axis=0) 173 | else: 174 | return (array - array.min()) / array.ptp() 175 | 176 | ''' 177 | Helper function to define a multi-layer perceptron. 178 | x: input tensorflow node 179 | num_nodes: array that contains the number of nodes in each hidden layer 180 | num_input: number of nodes in input layer 181 | num_output: number of nodes in output layer 182 | activation: the tensorflow activation function to user 183 | ''' 184 | def multilayer_perceptron(x, num_nodes, num_input=1, num_output=1, activation=tf.nn.sigmoid, bias=True, initializer=tf.contrib.layers.xavier_initializer(), return_weight_tensors=False): 185 | n_prev = num_input 186 | out = x 187 | num_layer = 0 188 | weights = list() 189 | 190 | for n in num_nodes: 191 | w = tf.get_variable("w"+str(num_layer),[n_prev, n], initializer=initializer) 192 | weights.append(w) 193 | if bias: 194 | b = tf.get_variable("b"+str(num_layer),[n], initializer =initializer) 195 | out = activation(tf.add(tf.matmul(out,w),b),name="out"+str(num_layer)) 196 | else: 197 | out = activation(tf.matmul(out,w),name="out"+str(num_layer)) 198 | 199 | n_prev = n 200 | num_layer += 1 201 | 202 | w_out = tf.get_variable("w"+str(num_layer),[n, num_output], initializer =initializer) 203 | weights.append(w_out) 204 | 205 | if bias: 206 | b_out = tf.get_variable("b"+str(num_layer),[num_output], initializer =initializer) 207 | out = tf.add(tf.matmul(out,w_out),b_out,name="out"+str(num_layer)) 208 | else: 209 | out = tf.matmul(out,w_out,name="out"+str(num_layer)) 210 | 211 | if return_weight_tensors: 212 | return out, weights 213 | return out 214 | 215 | 216 | # Modified MLP for use with experiment 2 217 | def recurrent_multilayer_perceptron(x, num_nodes, num_input=1, num_output=1, activation=tf.nn.sigmoid): 218 | n_prev = num_input 219 | 220 | assert all(x == num_nodes[0] for x in num_nodes) #for a recurrent multilayer perceptron, the number of neurons in each hidden layer should be the same 221 | 222 | w_in = tf.get_variable("w_in",[n_prev, num_nodes[0]]) 223 | b_in = tf.get_variable("b_in",[num_nodes[0]]) 224 | 225 | w = tf.get_variable("w_shared",[num_nodes[0], num_nodes[0]]) 226 | b = tf.get_variable("b_shared",[num_nodes[0]]) 227 | 228 | for i in range(len(num_nodes)+1): 229 | if i==0: 230 | out = activation(tf.add(tf.matmul(x,w_in),b_in),name="out"+str(i)) 231 | else: 232 | out = activation(tf.add(tf.matmul(out,w),b),name="out"+str(i)) 233 | 234 | w_out = tf.get_variable("w_out",[num_nodes[0], num_output]) 235 | b_out = tf.get_variable("b_out",[num_output]) 236 | out = tf.add(tf.matmul(out,w_out),b_out,name="out_final") 237 | 238 | return out 239 | 240 | ''' 241 | A class to organize methods that generate datasets for some of the experiments 242 | ''' 243 | class Dataset(): 244 | from sklearn.preprocessing import OneHotEncoder 245 | from sklearn.model_selection import train_test_split 246 | 247 | @classmethod 248 | def generate_moons(cls, n, d=2, test_size=0.2, one_hot=False, normalize_x=False, noise=0): 249 | from sklearn.datasets import make_moons 250 | assert (d%2==0),"d should be even" 251 | 252 | X, y = make_moons(n, noise=noise) 253 | 254 | if normalize_x: 255 | X = normalize(X) 256 | 257 | if (one_hot): 258 | y = y.reshape(-1,1) 259 | enc = cls.OneHotEncoder(n_values=2,sparse=False) 260 | y = enc.fit_transform(y) 261 | 262 | X_train, X_test, y_train, y_test = cls.train_test_split(X, y, test_size=test_size) 263 | 264 | return X_train, X_test, y_train, y_test 265 | 266 | 267 | @classmethod 268 | def generate_mixture_of_gaussians(cls, n, d, class_seps=[1], covariance_scale=1, test_size=0.2, one_hot=False, randomly_labeled=False, class_ratio=1, return_covariance=False, cov=None, resample=False, normalize_x=False): 269 | 270 | if len(class_seps)==d: 271 | pass 272 | elif len(class_seps)==1: 273 | class_seps = np.repeat(class_seps,d) 274 | else: 275 | raise ValueError("class_seps must be an array of length 1 or length d") 276 | 277 | if cov is None: 278 | c = covariance_scale*np.random.random((d,d)) 279 | cov = c.T.dot(c) 280 | 281 | assert class_ratio>=1, "parameter: class_ratio must be greater than or equal to 1" 282 | n_pos = int(n/(class_ratio+1)) 283 | n_neg = int(n-n_pos) 284 | X1 = np.random.multivariate_normal([0]*d, cov, size=n_pos) 285 | X2 = np.random.multivariate_normal(class_seps, cov, size=n_neg) 286 | if resample==True: #resamples the minority class 287 | X1 = np.tile(X1, (class_ratio, 1)) 288 | n_pos = n_pos*class_ratio 289 | X = np.concatenate([X1,X2]) 290 | 291 | if normalize_x: 292 | X = normalize(X) 293 | 294 | if randomly_labeled==True: 295 | y = np.random.randint(0,2,(n_pos+n_neg)) 296 | else: 297 | y = np.array([0]*n_pos + [1]*n_neg) 298 | 299 | if (one_hot): 300 | y = y.reshape(-1,1) 301 | enc = cls.OneHotEncoder(n_values=2,sparse=False) 302 | y = enc.fit_transform(y) 303 | 304 | X_train, X_test, y_train, y_test = cls.train_test_split(X, y, test_size=test_size) 305 | 306 | if return_covariance: 307 | return X_train, X_test, y_train, y_test, cov 308 | return X_train, X_test, y_train, y_test 309 | 310 | def generate_MNIST(n_train, n_test, subset=range(10)): 311 | from tensorflow.examples.tutorials.mnist import input_data 312 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 313 | y_train = mnist.train.labels 314 | 315 | def pretty_plotting_styles(): 316 | plt.rc("font",family="sans-serif",size=20) 317 | plt.rcParams["font.sans-serif"] = "Arial" 318 | 319 | 320 | 321 | 322 | ''' 323 | Returns an RNN with the following parameters: 324 | window_size: the number of previous time_steps to use to make the prediction 325 | dim: dimensionality of the input data 326 | units: the number of hidden units in the LSTM 327 | ''' 328 | def RNN(window_size=5, dim=1, units=32): 329 | import keras 330 | from keras.models import Model 331 | from keras.layers import Dense, Input, LSTM 332 | 333 | x = Input(shape=(window_size, dim)) 334 | z, sh, sc = LSTM(units=units, return_state=True)(x) 335 | z = Dense(1, activation='tanh')(z) 336 | model = Model(inputs=[x],outputs=[z]) 337 | model.compile(loss='mse', optimizer='adam') 338 | return model 339 | 340 | 341 | 342 | ''' 343 | Converts a time-series into a form that can be used to train and validate an RNN 344 | ''' 345 | def create_windowed_dataset(time_series, window_size=5, frac_train=0.8): 346 | time_series = normalize(time_series) 347 | X_train, y_train, X_test, y_test = [], [], [], [] 348 | n = len(time_series)-window_size-1 349 | n_train = int(n*frac_train) 350 | for i in range(n): 351 | a = time_series[i:(i+window_size)] 352 | if a.ndim==1: 353 | a = a.reshape(-1, 1) 354 | if i < n_train: 355 | X_train.append(a) 356 | y_train.append(time_series[i+window_size]) 357 | else: 358 | X_test.append(a) 359 | y_test.append(time_series[i+window_size]) 360 | return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test) 361 | 362 | def mse(y, y_): 363 | y = y.flatten() 364 | y_ = y_.flatten() 365 | assert len(y)==len(y_), "arrays must be of the same length" 366 | return np.round(np.sqrt(np.mean(np.square(y-y_))),2) 367 | 368 | ''' 369 | Helper method to train and graph the results of RNN prediction 370 | ''' 371 | def train_and_plot(time_series, window_sizes=None, hidden_units=None,epochs=20, figsize=None): 372 | plt.rc("font",family="sans-serif",size=14) 373 | 374 | if not(figsize is None): 375 | plt.figure(figsize=figsize) 376 | if hidden_units is None: 377 | if figsize is None: 378 | plt.figure(figsize=[4*len(window_sizes),4]) 379 | for w, window_size in enumerate(window_sizes): 380 | plt.subplot(1, len(window_sizes), w+1) 381 | X_train, y_train, X_test, y_test = create_windowed_dataset(time_series, window_size=window_size) 382 | rnn = RNN(window_size=window_size) 383 | rnn.fit(X_train, y_train, epochs=epochs, verbose=0) 384 | y_ = rnn.predict(X_test) 385 | plt.plot(y_test) 386 | plt.plot(y_,marker='.') 387 | plt.title('Window size: '+str(window_size)+', RMSE: ' + str(mse(y_, y_test))) 388 | elif window_sizes is None: 389 | if figsize is None: 390 | plt.figure(figsize=[4*len(hidden_units),4]) 391 | for h, hidden_unit in enumerate(hidden_units): 392 | plt.subplot(1, len(hidden_units), h+1) 393 | X_train, y_train, X_test, y_test = create_windowed_dataset(time_series) 394 | rnn = RNN(units=hidden_unit) 395 | rnn.fit(X_train, y_train, epochs=epochs, verbose=0) 396 | y_ = rnn.predict(X_test) 397 | plt.plot(y_test) 398 | plt.plot(y_,marker='.') 399 | plt.title('# Hidden Units: '+str(hidden_unit)+', RMSE: ' + str(mse(y_, y_test))) 400 | else: 401 | if figsize is None: 402 | plt.figure(figsize=[4*len(window_sizes), 4*len(hidden_units)]) 403 | count = 0 404 | for w, window_size in enumerate(window_sizes): 405 | for h, hidden_unit in enumerate(hidden_units): 406 | count += 1 407 | plt.subplot(len(window_sizes), len(hidden_units), count) 408 | X_train, y_train, X_test, y_test = create_windowed_dataset(time_series, window_size=window_size) 409 | rnn = RNN(units=hidden_unit, window_size=window_size) 410 | rnn.fit(X_train, y_train, epochs=epochs, verbose=0) 411 | y_ = rnn.predict(X_test) 412 | plt.plot(y_test) 413 | plt.plot(y_,marker='.') 414 | plt.title('Window: '+str(window_size)+', Hidden: '+str(hidden_unit)+', RMSE: ' + str(mse(y_, y_test))) 415 | plt.legend(['Real','Predicted']) 416 | 417 | def plot_decision_boundary(X, y, grid_pred): 418 | xx, yy = np.meshgrid(np.arange(0, 1.02, 0.02), np.arange(0, 1.02, 0.02)) 419 | grid_points = np.c_[xx.ravel(), yy.ravel()] 420 | plt.scatter(*X.T, marker='.', c=np.argmax(y, axis=1), alpha=1, cmap='RdBu') 421 | zz = grid_pred[:,1].reshape(xx.shape) 422 | plt.contourf(xx, yy, zz, cmap='RdBu', alpha=.2) 423 | plt.xlim([0, 1]); plt.ylim([0,1]) 424 | plt.xlabel('Feature 1') 425 | plt.ylabel('Feature 2') 426 | 427 | 428 | --------------------------------------------------------------------------------