├── .gitignore ├── mnist_cnn ├── run_mnist_cnn.sh ├── plot_results.py ├── mnist_backprop.py └── forward_thinking_cnn_mnist.py ├── FTForest ├── FTDRF.py ├── FTDRF.sh ├── FTDRF_with_MGS.sh ├── concat_mgsout.py ├── mgs.py ├── FTDRF_from_mgs.py ├── FTDRF_test.py ├── structure.py └── functions.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.npz 2 | -------------------------------------------------------------------------------- /mnist_cnn/run_mnist_cnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python forward_thinking_cnn_mnist.py 4 | python mnist_backprop.py 5 | python plot_results.py 6 | -------------------------------------------------------------------------------- /FTForest/FTDRF.py: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "User note: This test will use substantial computer memory (>50 GB)." 4 | echo "If you do not have these memory resources on your machine, we advise you to cancel this task now." 5 | 6 | python FTDRF_test.py 7 | -------------------------------------------------------------------------------- /FTForest/FTDRF.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "User note: This test will use substantial computer memory (>50 GB)." 4 | echo "If you do not have these memory resources on your machine, we advise you to cancel this task now." 5 | 6 | python FTDRF_test.py 7 | -------------------------------------------------------------------------------- /FTForest/FTDRF_with_MGS.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "User note: This test will use substantial computer memory (>50 GB)." 4 | echo "If you do not have these memory resources on your machine, we advise you to cancel this task now." 5 | 6 | echo "" 7 | echo "Building 7x7 Window Features" 8 | python mgs.py 7 9 | 10 | echo "Building 9x9 Window Features" 11 | python mgs.py 9 12 | 13 | echo "Building 14x14 Window Features" 14 | python mgs.py 14 15 | 16 | echo "Concatenating Multi-Grained Scanning Output" 17 | python concat_mgsout.py 18 | 19 | echo "Building Foward Thinking Deep Random Forest (FTDRF)" 20 | python FTDRF_from_mgs.py 21 | -------------------------------------------------------------------------------- /FTForest/concat_mgsout.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from functions import combine_MGS_output 3 | 4 | # read in the pushed thru training data from all window sizes 5 | filenames = ['./7_wind/X_tr_thru_7_wind.npy', './9_wind/X_tr_thru_9_wind.npy', './14_wind/X_tr_thru_14_wind.npy'] 6 | 7 | 8 | X_out = combine_MGS_output(filenames) 9 | np.save('X_mgsout.npy', X_out) 10 | X_out = 0 # Clear Up Memory 11 | 12 | filenames = ['./7_wind/X_t_thru_7_wind.npy', './9_wind/X_t_thru_9_wind.npy', './14_wind/X_t_thru_14_wind.npy'] 13 | X_t_out = combine_MGS_output(filenames) 14 | np.save('X_t_mgsout.npy', X_t_out) 15 | X_t_out = 0 # Clear Up Memory 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ForwardThinking 2 | Companion code for _Forward Thinking: Building and Training Neural Networks One Layer at a Time_ and _Forward Thinking: Building Deep Random Forests_ submitted to NIPS 2017. 3 | 4 | Authors: 5 | - Chris Hettinger 6 | - Tanner Christensen 7 | - Ben Ehlert 8 | - Jeffrey Humpherys 9 | - Tyler Jarvis 10 | - David Kartchner 11 | - Kevin Miller 12 | - Sean Wade 13 | 14 | ## Publications 15 | - _Forward Thinking: Building and Training Neural Networks One Layer at a Time_: (URL coming soon) 16 | - _Forward Thinking: Building Deep Random Forests_: https://arxiv.org/abs/1705.07366 17 | 18 | ## Dependencies 19 | - numpy==1.11.3 20 | - tensorflow-gpu==1.0.0 21 | - keras==2.0.4 22 | - matplotlib==2.0.0 23 | 24 | ## Hardware 25 | We used a single desktop computer with: 26 | - Intel i5-7400 processor 27 | - Nvidia GeForce GTX 1060 3GB GPU 28 | - 8GB DDR4 RAM 29 | With our configuration, it took approximately 2 hours to run the `run_mnist_cnn.sh` script. 30 | 31 | ## Installation 32 | TODO: Make this pip installable. 33 | 34 | ## Execution 35 | Once the package has been installed, you may run the included `run_mnist_cnn.sh` script. 36 | 37 | This will run the forward thinking neural network (achieved 99.72% in our tests), the backpropagation equivalent of our model (achieved 99.63% in our tests), and saves and displays a plot comparing the test and train accuracies. 38 | -------------------------------------------------------------------------------- /mnist_cnn/plot_results.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pickle 3 | import numpy as np 4 | import seaborn as sns 5 | from matplotlib import pyplot as plt 6 | 7 | fwd_acc = [] 8 | fwd_val_acc = [] 9 | fwd_times = [] 10 | 11 | back_acc = [] 12 | back_val_acc = [] 13 | back_times = [] 14 | 15 | for i in xrange(1,4): 16 | fwd = np.load('layer{}_mnist_results.npz'.format(i)) 17 | fwd_acc.append(fwd['acc']) 18 | fwd_val_acc.append(fwd['val_acc']) 19 | fwd_times.append(fwd['times']) 20 | 21 | fwd_acc = np.concatenate(fwd_acc) 22 | fwd_val_acc = np.concatenate(fwd_val_acc) 23 | fwd_times = np.concatenate(fwd_times) 24 | 25 | back = np.load('mnist_backprop_results.npz'.format(i)) 26 | back_acc = back['acc'] 27 | back_val_acc = back['val_acc'] 28 | back_times = back['times'] 29 | 30 | 31 | sns.set_context("paper") 32 | sns.set(font='serif') 33 | sns.set_style("white", { 34 | "font.family": "serif", 35 | "font.serif": ["Times", "Palatino", "serif"]}) 36 | 37 | fig = plt.figure() 38 | fig.set_size_inches(3.5, 4) 39 | ax = plt.subplot(211) 40 | fig.dpi = 100 41 | 42 | c1 = '#008000' 43 | c2 = '#000073' 44 | 45 | # create training accuracy plot 46 | ax.plot(fwd_times / 60., 100 * (np.array(fwd_acc)), label = 'Forward Thinking', c=c1, linewidth=2) 47 | ax.plot(back_times / 60., 100 * (np.array(back_acc)), label = 'Backpropagation', c=c2, alpha=.5) 48 | plt.yticks(np.arange(91,101)) 49 | plt.title('Training Accuracy') 50 | 51 | box = ax.get_position() 52 | ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) 53 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) 54 | 55 | plt.xlabel('Minutes') 56 | plt.ylabel('Accuracy (%)') 57 | 58 | # create testing accuracy plot 59 | ax = plt.subplot(212) 60 | ax.plot(fwd_times / 60., 100 * (np.array(fwd_val_acc)), label = 'Forward Thinking', c=c1, linewidth=2) 61 | ax.plot(back_times / 60., 100 * (np.array(back_val_acc)), label = 'Backpropagation', c=c2, alpha=.5) 62 | plt.yticks(np.arange(98,100,.25)) 63 | plt.title('Test Accuracy') 64 | 65 | box = ax.get_position() 66 | ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) 67 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) 68 | 69 | plt.xlabel('Minutes') 70 | plt.ylabel('Accuracy (%)') 71 | 72 | plt.savefig('accuracy.png') 73 | plt.show() 74 | -------------------------------------------------------------------------------- /FTForest/mgs.py: -------------------------------------------------------------------------------- 1 | ### script to make the windowing files for the whole MNIST dataset, along with building the random forests for MGS step and pushing all the training and testing data thru 2 | 3 | 4 | import numpy as np 5 | from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier 6 | import pickle 7 | import sys 8 | import os 9 | from sklearn.metrics import accuracy_score 10 | from itertools import product 11 | from keras.datasets import mnist 12 | from functions import windowing_tr, windowing_push, push_thru_MGS_windowing_sep 13 | 14 | 15 | 16 | # get the MNIST data 17 | (X_tr, y_tr), (X_t, y_t) = mnist.load_data() 18 | X_tr = X_tr.reshape(60000, 784) 19 | X_t, y_t = 0, 0 # memory efficiency 20 | 21 | 22 | # train the Multi-Grained Scanning Random Forests, Windowing 23 | n_trees = 30 24 | n_samples = X_tr.shape[0] # number of training samples 25 | MGS_Forests = [] 26 | w = int(sys.argv[1]) 27 | test_name = str(w)+ '_wind' 28 | foldername = './'+test_name + '/' 29 | os.mkdir(foldername) 30 | 31 | print w 32 | # do multi-grained scanning - windowing. save files 33 | Xw, yw = windowing_tr(X_tr,y_tr, w_size=w) 34 | y_tr = 0 # memory 35 | 36 | # build the Random Forests for MGS for this window size 37 | forest1 = RandomForestClassifier(n_trees, max_depth=None, min_samples_split=20, n_jobs=-1) 38 | forest2 = ExtraTreesClassifier(n_trees, max_depth=None, min_samples_split=20, n_jobs=-1) 39 | 40 | # fit the forests to windowed data 41 | forest1.fit(Xw, yw) 42 | forest2.fit(Xw, yw) 43 | Xw, yw = 0,0 # free up memory 44 | 45 | # append forests to list, pickle to save 46 | MGS_Forests.append(forest1) 47 | MGS_Forests.append(forest2) 48 | forest1, forest2 = 0, 0 # memory 49 | 50 | # push training data thru the mgs random forests 51 | X_tr_thru = push_thru_MGS_windowing_sep(X_tr, MGS_Forests, w) 52 | np.save(foldername+'X_tr_thru_'+test_name, X_tr_thru) 53 | X_tr_thru = 0 # memory 54 | 55 | # push the testing data thru 56 | # get the MNIST data 57 | (X_tr, y_tr), (X_t, y_t) = mnist.load_data() 58 | X_tr, y_tr = 0, 0 # for memory 59 | X_t = X_t.reshape(10000, 784) 60 | y_t = 0 # for memory 61 | 62 | # push the testing data thru MGS random forests 63 | X_t_thru = push_thru_MGS_windowing_sep(X_t, MGS_Forests, w) 64 | 65 | np.save(foldername+'X_t_thru_'+test_name, X_t_thru) 66 | -------------------------------------------------------------------------------- /FTForest/FTDRF_from_mgs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import accuracy_score 3 | from keras.datasets import mnist 4 | from structure import RFLayer_RAND 5 | 6 | 7 | print 'Loading in training data:' 8 | # This is the data read in from previous layers, output of the function concat_mgs_output 9 | (X_tr, y_tr), (Xtest, ytest) = mnist.load_data() 10 | X_tr, Xtest, ytest = 0, 0, 0 11 | X_curr = np.load('./X_mgsout.npy') 12 | 13 | 14 | print 15 | print 'RF Layer training:' 16 | 17 | # parameters for the building of the next layers 18 | n = 2000 # num trees in each layer 19 | min_gain = 0.01 20 | verbose = True 21 | max_layers = 5 22 | md = None 23 | mss = 10 24 | n_jobs = -1 25 | 26 | # dictionary where layers of decision trees will be stored 27 | Layers = {} 28 | 29 | # train the FTDRF layers on MGS data loaded in 30 | prev_score = -1.0 # instantiate prev_score 31 | for i in xrange(max_layers): 32 | print X_curr.shape 33 | RFL = RFLayer_RAND(n, md=md, mss=mss) 34 | RFL.fit(X_curr, y_tr, 5, 1, n_jobs) 35 | Layers[i] = RFL 36 | 37 | # if verbose, print out the estimation accuracy for this layer 38 | if verbose: 39 | print 'Layer ' + str(i+1) 40 | print 'acc: ' + str(RFL.val_score) 41 | 42 | 43 | # check to see if we have improved enough going one more layer 44 | rel_gain = (RFL.val_score - prev_score)/float(abs(prev_score)) 45 | if rel_gain < min_gain or RFL.val_score == 1.0: 46 | print 'Converged! Stopping building layers' 47 | print 48 | break 49 | prev_score = RFL.val_score 50 | 51 | # if moving on to another level, push the data through 52 | X_curr = RFL.push_thru_data(X_curr) 53 | print 'Going to another layer' 54 | print 55 | 56 | X_curr, y_sp = 0, 0 # memory 57 | 58 | # load in testing data, free up memory of the training data 59 | print 'Loading in testing data' 60 | (X_tr, y_tr),(X_t, y_t) = mnist.load_data() 61 | X_tr, y_tr, X_t = 0, 0, 0 62 | y_t = y_t.astype('uint8') 63 | X_t_curr = np.load('./X_t_mgsout.npy') 64 | 65 | 66 | # push test data thru FTDRF layers 67 | for i in xrange(len(Layers.keys())-1): 68 | X_t_curr = Layers[i].push_thru_data(X_t_curr) 69 | last = len(Layers.keys())-1 70 | y_pred = Layers[last].predict(X_t_curr) 71 | 72 | 73 | print 74 | print 'Statistics:' 75 | print 'The accuracy was:' 76 | print accuracy_score(y_pred, y_t) 77 | print 'Params:' 78 | print 'num_tres in each layer = ' + str(n) 79 | print 'md =' + str(md) 80 | print 'mss = ' + str(mss) 81 | print 82 | -------------------------------------------------------------------------------- /FTForest/FTDRF_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import accuracy_score 3 | from keras.datasets import mnist 4 | from functions import single_pixeldiag_tr 5 | from structure import RFLayer_RAND 6 | 7 | print 'FTDRF Test, no MGS' 8 | 9 | # load in mnist dataset from keras 10 | print 'Loading in MNIST data' 11 | (X_tr, y_tr),(X_t_curr, y_t) = mnist.load_data() 12 | X_tr = X_tr.reshape(60000,784) 13 | X_t_curr, y_t = 0,0 # memory 14 | 15 | # Get single pixe wiggling 16 | X_curr, y_tr = single_pixeldiag_tr(X_tr, y_tr) 17 | X_curr = X_curr.astype('uint8') 18 | y_tr = y_tr.astype('uint8') 19 | X_tr = 0 20 | 21 | 22 | print 23 | # train the next layers on multigrained scanning data 24 | print 'RF Layer training:' 25 | 26 | 27 | # parameters for the building of the next layers 28 | n = 2000 # num trees in each layer 29 | min_gain = 0.01 30 | verbose = True 31 | max_layers = 5 32 | md = None 33 | mss = 10 34 | n_jobs = -1 35 | 36 | # dictionary where layers of decision trees will be stored 37 | Layers = {} 38 | 39 | prev_score = -1.0 # instantiate prev_score 40 | # build the layers 41 | for i in xrange(max_layers): 42 | print X_curr.shape 43 | RFL = RFLayer_RAND(n, md=md, mss=mss) 44 | RFL.fit(X_curr, y_tr, 5, 1, n_jobs) 45 | Layers[i] = RFL 46 | 47 | # if verbose, print out the estimation accuracy for this layer 48 | if verbose: 49 | print 'Layer ' + str(i+1) 50 | print 'acc: ' + str(RFL.val_score) 51 | 52 | 53 | # check to see if we have improved enough going one more layer 54 | rel_gain = (RFL.val_score - prev_score)/float(abs(prev_score)) 55 | if rel_gain < min_gain or RFL.val_score == 1.0: 56 | print 'Converged! Stopping building layers' 57 | print 58 | break 59 | prev_score = RFL.val_score 60 | 61 | # if moving on to another level, push the data through 62 | X_curr = RFL.push_thru_data(X_curr) 63 | print 'Going to another layer' 64 | print 65 | 66 | X_curr, y_sp = 0, 0 # memory 67 | 68 | # load in testing data, free up memory of the training data 69 | print 'Loading in testing data' 70 | (X_tr, y_tr),(X_t_curr, y_t) = mnist.load_data() 71 | X_tr, y_tr = 0, 0 72 | X_t_curr = X_t_curr.reshape(10000,784) 73 | X_t_curr = X_t_curr.astype('uint8') 74 | y_t = y_t.astype('uint8') 75 | 76 | 77 | # push test data thru FTDRF layers 78 | for i in xrange(len(Layers.keys())-1): 79 | X_t_curr = Layers[i].push_thru_data(X_t_curr) 80 | last = len(Layers.keys())-1 81 | y_pred = Layers[last].predict(X_t_curr) 82 | 83 | 84 | print 85 | print 'Statistics:' 86 | print 'The accuracy was:' 87 | print accuracy_score(y_pred, y_t) 88 | print 'Params:' 89 | print 'num_tres in each layer = ' + str(n) 90 | print 'md =' + str(md) 91 | print 'mss = ' + str(mss) 92 | print 93 | -------------------------------------------------------------------------------- /FTForest/structure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import accuracy_score 5 | from itertools import product 6 | from tensorflow.examples.tutorials.mnist import input_data 7 | 8 | 9 | class RFLayer_RAND(object): 10 | def __init__(self, n_estimators, classifier=True, md=None, mss=10): 11 | self.n_estimators = n_estimators 12 | self.max_depth = md 13 | self.min_samples_split = mss 14 | self.classifier = classifier 15 | 16 | def fit(self, X_train, y_train, kfold=5, k=2, n_jobs=-1): # kfold = 5 yields 80/20 split, k will be the number of times we run validation 17 | if kfold > 1: 18 | kf = KFold(kfold, shuffle=True) 19 | else: 20 | raise ValueError('Need to pass kfold something greater than 1 so can do cross validation') 21 | 22 | models = [] 23 | best_score = 0 24 | best_ind = 0 25 | count = 0 26 | 27 | # split training data into training and estimating sets via quasi kfold validation routine 28 | for tr_ind, est_ind in kf.split(X_train, y_train): 29 | # instantiate the layer of decision trees 30 | models.append(RandomForestClassifier(self.n_estimators, criterion='entropy', max_depth=self.max_depth, 31 | min_samples_split=self.min_samples_split, 32 | n_jobs=n_jobs)) 33 | for tree in models[count].estimators_: # make half of the trees completely random Decision Trees 34 | if np.random.rand() <= .5: 35 | tree.splitter = 'random' 36 | 37 | 38 | # get the split of the training data 39 | X_tr, y_tr = X_train[tr_ind,:], y_train[tr_ind] 40 | # train the layer on this split 41 | models[count].fit(X_tr, y_tr) 42 | X_tr, y_tr = 0, 0 43 | 44 | # check accuracy on the estimation set 45 | X_est, y_est = X_train[est_ind,:], y_train[est_ind] 46 | y_pred = models[count].predict(X_est) 47 | acc_score = accuracy_score(y_pred, y_est) 48 | X_est, y_est = 0, 0 # memory 49 | y_pred = 0 # memory 50 | 51 | if acc_score > best_score: # with k > 1 we compare to see which is best layer trained 52 | best_score = acc_score 53 | best_ind = count 54 | count += 1 55 | if count >= k: 56 | break 57 | 58 | # save the best layer 59 | self.L = models[best_ind] 60 | self.n_classes = self.L.n_classes_ 61 | self.val_score = best_score 62 | 63 | def predict(self, X_test): 64 | return self.L.predict(X_test) 65 | 66 | def push_thru_data(self, X): 67 | n_samples, dim_data = X.shape 68 | X_push = np.empty((n_samples, self.n_estimators*self.n_classes)) 69 | # push the data X through this layer 70 | i = 0 71 | for tree in self.L.estimators_: 72 | if self.classifier: 73 | X_push[:,i*self.n_classes:(i+1)*self.n_classes] = tree.predict_proba(X).astype('float32') 74 | i += 1 75 | return X_push 76 | -------------------------------------------------------------------------------- /mnist_cnn/mnist_backprop.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import keras 3 | from keras.datasets import mnist 4 | from keras.layers import Input, Dense, Dropout, Conv2D, Flatten, MaxPooling2D 5 | from keras.models import Model 6 | from keras import backend as K 7 | from keras.preprocessing.image import ImageDataGenerator 8 | 9 | import time 10 | 11 | batch_size = 128 12 | num_classes = 10 13 | 14 | 15 | train_begin_time = time.time() 16 | best_score = 0 17 | 18 | # input image dimensions 19 | img_rows, img_cols = 28, 28 20 | 21 | # the data, shuffled and split between train and test sets 22 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 23 | 24 | if K.image_data_format() == 'channels_first': 25 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 26 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 27 | input_shape = (1, img_rows, img_cols) 28 | else: 29 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 30 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 31 | input_shape = (img_rows, img_cols, 1) 32 | 33 | x_train = x_train.astype('float32') 34 | x_test = x_test.astype('float32') 35 | x_train /= 255 36 | x_test /= 255 37 | print('x_train shape:', x_train.shape) 38 | print(x_train.shape[0], 'train samples') 39 | print(x_test.shape[0], 'test samples') 40 | 41 | # convert class vectors to binary class matrices 42 | y_train = keras.utils.to_categorical(y_train, num_classes) 43 | y_test = keras.utils.to_categorical(y_test, num_classes) 44 | 45 | def save_weights(model, filename, layer): 46 | conv1 = model.get_layer('conv{0}'.format(layer)).get_weights() 47 | fc1 = model.get_layer('fc1').get_weights() 48 | fc2 = model.get_layer('fc2').get_weights() 49 | 50 | np.savez(filename, W_conv=conv1[0], b_conv=conv1[1], W_fc1=fc1[0], b_fc1=fc1[1], 51 | W_fc2=fc2[0], b_fc2=fc2[1]) 52 | 53 | class TimeHistory(keras.callbacks.Callback): 54 | def on_train_begin(self, logs={}): 55 | self.times = [] 56 | self.epoch_times = [] 57 | self.best_weights = None 58 | 59 | def on_epoch_begin(self, epoch, logs={}): 60 | self.t0 = time.time() 61 | 62 | def on_epoch_end(self, epoch, logs={}): 63 | global best_score 64 | self.times.append(time.time() - train_begin_time) 65 | self.epoch_times.append(time.time() - self.t0) 66 | 67 | if logs.get('val_acc') > best_score: 68 | try: 69 | best_score = logs.get('val_acc') 70 | self.best_weights = save_weights(self.model, 'weights_layer3.npz', 3) 71 | except Exception: 72 | pass 73 | 74 | def run_backprop(): 75 | main_input = Input(shape=input_shape, name='main_input') 76 | conv1 = Conv2D(256, (3,3), activation='relu', padding='same', name='conv1')(main_input) 77 | conv2 = Conv2D(256, (3,3), activation='relu', padding='same', name='conv2')(conv1) 78 | 79 | conv2 = MaxPooling2D(pool_size = (2,2))(conv2) 80 | 81 | conv3 = Conv2D(128, (3,3), activation='relu', padding='same', name='conv3')(conv2) 82 | conv3_drop = Dropout(.3)(conv3) 83 | conv3_flat = Flatten()(conv3_drop) 84 | 85 | fc1 = Dense(150, activation='relu', name='fc1')(conv3_flat) 86 | fc1_drop = Dropout(.5)(fc1) 87 | main_output = Dense(10, activation='softmax', name='fc2')(fc1_drop) 88 | 89 | model = Model(inputs=[main_input], outputs=[main_output]) 90 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) 91 | 92 | time_history = TimeHistory() 93 | history = keras.callbacks.History() 94 | 95 | def schedule(epoch): 96 | if epoch < 2: 97 | return 0.005 98 | elif epoch < 10: 99 | return 0.002 100 | elif epoch < 40: 101 | return 0.001 102 | elif epoch < 60: 103 | return 0.0005 104 | elif epoch < 80: 105 | return 0.0001 106 | else: 107 | return 0.00005 108 | 109 | rate_schedule = keras.callbacks.LearningRateScheduler(schedule) 110 | 111 | # This will do preprocessing and realtime data augmentation: 112 | datagen = ImageDataGenerator( 113 | rotation_range=7, # randomly rotate images in the range (degrees, 0 to 180) 114 | width_shift_range=0.05, # randomly shift images horizontally (fraction of total width) 115 | height_shift_range=0.05, # randomly shift images vertically (fraction of total height) 116 | zoom_range=.1) 117 | # Compute quantities required for feature-wise normalization 118 | datagen.fit(x_train) 119 | 120 | time_history = TimeHistory() 121 | history = keras.callbacks.History() 122 | 123 | # Fit the model on the batches generated by datagen.flow(). 124 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), 125 | steps_per_epoch=x_train.shape[0] // batch_size, 126 | epochs=100, callbacks=[history, time_history, rate_schedule], 127 | validation_data=(x_test, y_test)) 128 | 129 | np.savez('mnist_backprop_results.npz', acc=history.history['acc'], loss=history.history['loss'], 130 | val_acc=history.history['val_acc'], val_loss=history.history['val_loss'], 131 | times=time_history.times, epoch_times=time_history.epoch_times) 132 | 133 | if __name__ == "__main__": 134 | run_backprop() 135 | -------------------------------------------------------------------------------- /FTForest/functions.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from itertools import product 4 | 5 | 6 | # windowing 7 | # X shape = (n_examples, dimension of data) 8 | def windowing_tr(X, y, img_size=(28,28), w_size=10): 9 | n_samples, dim_data = X.shape 10 | if img_size[0]*img_size[1] != dim_data: # must make sure dim of data is square 11 | raise ValueError('img_size not compatible with size of data in X') 12 | n_x = img_size[1]- w_size+1 13 | n_y = img_size[0]- w_size+1 14 | num_wind = n_y*n_x 15 | X_out = np.empty((num_wind*n_samples, w_size**2)) 16 | y_out = np.zeros(num_wind*n_samples) 17 | for k in xrange(n_samples): 18 | k_image = X[k,:].reshape(img_size) 19 | for i in xrange(n_y): 20 | for j in xrange(n_x): 21 | i_row = i*n_x 22 | X_out[k*num_wind + i_row+j,:] = k_image[i:i+w_size, j:j+w_size].flatten() 23 | y_out[k*num_wind + i_row+j] = y[k] 24 | return X_out.astype('float32'), y_out.astype('float32') 25 | 26 | 27 | def windowing_push(X, img_size=(28,28), w_size=10): 28 | n_samples, dim_data = X.shape 29 | if img_size[0]*img_size[1] != dim_data: # must make sure dim of data is square 30 | raise ValueError('img_size not compatible with size of data in X') 31 | n_x = img_size[1]- w_size+1 32 | n_y = img_size[0]- w_size+1 33 | num_wind = n_y*n_x 34 | X_out = np.empty((num_wind*n_samples, w_size**2)) 35 | for k in xrange(n_samples): 36 | k_image = X[k,:].reshape(img_size) 37 | for i in xrange(n_y): 38 | for j in xrange(n_x): 39 | i_row = i*n_x 40 | X_out[k*num_wind + i_row+j,:] = k_image[i:i+w_size, j:j+w_size].flatten() 41 | return X_out.astype('float32') 42 | 43 | def push_thru_MGS_windowing(X_data, MGS_forests, img_size=(28,28), windows=[7,9,14]): 44 | n_classes = MGS_forests[0].n_classes_ 45 | n_samples = X_data.shape[0] 46 | n_x = img_size[1]+1 -np.array(windows) 47 | n_y = img_size[0]+1 -np.array(windows) 48 | N = n_x * n_y 49 | X7 = windowing_push(X_data, w_size=7) 50 | pred_all1 = MGS_forests[0].predict_proba(X7) 51 | pred_all1 = pred_all1.reshape(n_samples, N[0]*n_classes) 52 | pred_all2 = MGS_forests[1].predict_proba(X7) 53 | X7 = 0 54 | pred_all2 = pred_all2.reshape(n_samples, N[0]*n_classes) 55 | pred_all1 = np.hstack((pred_all1, pred_all2)) 56 | X9 = windowing_push(X_data, w_size=9) 57 | pred_all2 = MGS_forests[2].predict_proba(X9) 58 | pred_all2 = pred_all2.reshape(n_samples, N[1]*n_classes) 59 | pred_all1 = np.hstack((pred_all1, pred_all2)) 60 | pred_all2 = MGS_forests[3].predict_proba(X9) 61 | X9 = 0 62 | pred_all2 = pred_all2.reshape(n_samples, N[1]*n_classes) 63 | pred_all1 = np.hstack((pred_all1, pred_all2)) 64 | X14 = windowing_push(X_data, w_size=14) 65 | pred_all2 = MGS_forests[4].predict_proba(X14) 66 | pred_all2 = pred_all2.reshape(n_samples, N[2]*n_classes) 67 | pred_all1 = np.hstack((pred_all1, pred_all2)) 68 | pred_all2 = MGS_forests[5].predict_proba(X14) 69 | X14 = 0 70 | pred_all2 = pred_all2.reshape(n_samples, N[2]*n_classes) 71 | return np.hstack((pred_all1, pred_all2)).astype('float32') 72 | 73 | def push_thru_MGS_windowing_sep(X_data, MGS_forests, w_size, img_size=(28,28)): 74 | n_classes = MGS_forests[0].n_classes_ 75 | n_samples = X_data.shape[0] 76 | n_x = img_size[1]+1 - w_size 77 | n_y = img_size[0]+1 - w_size 78 | N = n_x * n_y 79 | Xw = windowing_push(X_data, w_size=w_size) 80 | pred_all1 = MGS_forests[0].predict_proba(Xw) 81 | pred_all1 = pred_all1.reshape(n_samples, N*n_classes) 82 | pred_all2 = MGS_forests[1].predict_proba(Xw) 83 | Xw = 0 84 | pred_all2 = pred_all2.reshape(n_samples, N*n_classes) 85 | return np.hstack((pred_all1, pred_all2)).astype('float32') 86 | 87 | 88 | def push_thru_MGS_windowing_sep_npy(filename, MGS_forests, w_size, write_to_file=False, out_file=None): 89 | X = np.load(filename) 90 | n_c = MGSforest1.n_classes_ 91 | n_samples = X.shape[0] 92 | N = (28-w_size+1)**2 93 | if write_to_file and out_file == None: 94 | raise ValueError('if want to write to file, must provide filename') 95 | 96 | # push data thru MGSforests 97 | pred1 = MGS_forests[0].predict_proba(X) 98 | pred1 = pred1.reshape(n_samples, N*n_c) 99 | pred2 = MGS_forests[1].predict_proba(X) 100 | pred2 = pred1.reshape(n_samples, N*n_c) 101 | X_thru = np.hstack((pred1, pred2)) 102 | if write_to_file: 103 | np.save(out_file+'/X_thru_'+str(w_size), X_thru) 104 | return X_thru 105 | else: 106 | return X_thru 107 | 108 | def combine_MGS_output(filenames, w_sizes=[7,9,14]): #filenames is list of filenames corresponding to the data pushed thru MGS_forests 109 | if len(filenames) != len(w_sizes): 110 | raise ValueError('Need to have the same number of files as number of windows') 111 | count = 0 112 | for fname in filenames: #add in checks that in correct order, 7, 9, 14 113 | if count == 0: 114 | X_out = np.load(fname) 115 | print X_out.shape 116 | else: 117 | X = np.load(fname) 118 | print X.shape 119 | X_out = np.hstack((X_out, X)) 120 | X = 0 # free up memory 121 | count += 1 122 | return X_out.astype('float32') 123 | 124 | 125 | # single pixel, wiggle 126 | def single_pixeldiag_tr(X, y, img_size=(28,28)): 127 | n_samples, dim_data = X.shape 128 | if img_size[0]*img_size[1] != dim_data: # must make sure dim of data is square 129 | raise ValueError('img_size not compatible with size of data in X') 130 | X_out = np.empty((n_samples*5, dim_data)) 131 | y_out = np.zeros(n_samples*5) 132 | for k in xrange(n_samples): 133 | k_image = X[k,:].reshape(img_size) 134 | count = 0 135 | for i,j in list(product([-1,1], [-1,1])) + [(0,0)]: 136 | image = np.zeros((img_size[0]+2, img_size[1]+2)) 137 | image[1+i:1+i+img_size[0], 1+j:1+j+img_size[1]] = k_image 138 | X_out[5*k+count] = image[1:1+img_size[0],1:1+img_size[1]].flatten() 139 | y_out[5*k+count] = y[k] 140 | count += 1 141 | return X_out, y_out 142 | -------------------------------------------------------------------------------- /mnist_cnn/forward_thinking_cnn_mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import keras 3 | from keras.datasets import mnist 4 | from keras.layers import Input, Dense, Dropout, Conv2D, Flatten, MaxPooling2D 5 | from keras.models import Model 6 | from keras import backend as K 7 | from keras.preprocessing.image import ImageDataGenerator 8 | 9 | import time 10 | 11 | batch_size = 128 12 | num_classes = 10 13 | epochs1 = 1 14 | epochs2 = 1 15 | epochs3 = 98 16 | 17 | train_begin_time = time.time() 18 | best_score = 0 19 | 20 | # input image dimensions 21 | img_rows, img_cols = 28, 28 22 | 23 | # the data, shuffled and split between train and test sets 24 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 25 | 26 | if K.image_data_format() == 'channels_first': 27 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 28 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 29 | input_shape = (1, img_rows, img_cols) 30 | else: 31 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 32 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 33 | input_shape = (img_rows, img_cols, 1) 34 | 35 | x_train = x_train.astype('float32') 36 | x_test = x_test.astype('float32') 37 | x_train /= 255 38 | x_test /= 255 39 | print('x_train shape:', x_train.shape) 40 | print(x_train.shape[0], 'train samples') 41 | print(x_test.shape[0], 'test samples') 42 | 43 | # convert class vectors to binary class matrices 44 | y_train = keras.utils.to_categorical(y_train, num_classes) 45 | y_test = keras.utils.to_categorical(y_test, num_classes) 46 | 47 | def save_weights(model, filename, layer): 48 | conv1 = model.get_layer('conv{0}'.format(layer)).get_weights() 49 | fc1 = model.get_layer('fc1').get_weights() 50 | fc2 = model.get_layer('fc2').get_weights() 51 | 52 | np.savez(filename, W_conv=conv1[0], b_conv=conv1[1], W_fc1=fc1[0], b_fc1=fc1[1], 53 | W_fc2=fc2[0], b_fc2=fc2[1]) 54 | 55 | class TimeHistory(keras.callbacks.Callback): 56 | def on_train_begin(self, logs={}): 57 | self.times = [] 58 | self.epoch_times = [] 59 | self.best_weights = None 60 | 61 | def on_epoch_begin(self, epoch, logs={}): 62 | self.t0 = time.time() 63 | 64 | def on_epoch_end(self, epoch, logs={}): 65 | global best_score 66 | self.times.append(time.time() - train_begin_time) 67 | self.epoch_times.append(time.time() - self.t0) 68 | 69 | if logs.get('val_acc') > best_score: 70 | try: 71 | best_score = logs.get('val_acc') 72 | self.best_weights = save_weights(self.model, 'weights_layer3.npz', 3) 73 | except Exception: 74 | pass 75 | 76 | def layer1(): 77 | main_input = Input(shape=input_shape, name='main_input') 78 | conv1 = Conv2D(256,(3,3), activation='relu', padding='same', name='conv1')(main_input) 79 | conv1 = MaxPooling2D(pool_size=(2,2))(conv1) 80 | conv1_drop = Dropout(.3)(conv1) 81 | conv1_flat = Flatten()(conv1_drop) 82 | fc1 = Dense(150, activation='relu', name='fc1')(conv1_flat) 83 | fc1_drop = Dropout(.5)(fc1) 84 | main_output = Dense(10, activation='softmax', name='fc2')(fc1_drop) 85 | 86 | model = Model(inputs=[main_input], outputs=[main_output]) 87 | model.compile(optimizer=keras.optimizers.Adam(lr=0.005), loss='categorical_crossentropy', metrics=['accuracy']) 88 | 89 | print('Using real-time data augmentation.') 90 | # This will do preprocessing and realtime data augmentation: 91 | datagen = ImageDataGenerator( 92 | rotation_range=7, # randomly rotate images in the range (degrees, 0 to 180) 93 | width_shift_range=0.05, # randomly shift images horizontally (fraction of total width) 94 | height_shift_range=0.05, # randomly shift images vertically (fraction of total height) 95 | zoom_range=.1) 96 | 97 | # Compute quantities required for feature-wise normalization 98 | # (std, mean, and principal components if ZCA whitening is applied). 99 | datagen.fit(x_train) 100 | 101 | time_history = TimeHistory() 102 | history = keras.callbacks.History() 103 | # Fit the model on the batches generated by datagen.flow(). 104 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), 105 | steps_per_epoch=x_train.shape[0] // batch_size, 106 | epochs=epochs1, callbacks=[history, time_history], 107 | validation_data=(x_test, y_test)) 108 | 109 | np.savez('layer1_mnist_results.npz', acc=history.history['acc'], loss=history.history['loss'], 110 | val_acc=history.history['val_acc'], val_loss=history.history['val_loss'], 111 | times=time_history.times, epoch_times=time_history.epoch_times) 112 | conv1_weights = model.get_layer('conv1').get_weights() 113 | 114 | save_weights(model, "weights_layer1.npz", 1) 115 | 116 | return conv1_weights 117 | 118 | def layer2(conv1_weights): 119 | main_input = Input(shape=input_shape, name='main_input') 120 | 121 | conv1 = Conv2D(256, (3,3), activation='relu', padding='same', trainable=False, name='conv1')(main_input) 122 | conv2 = Conv2D(256, (3,3), activation='relu', padding='same', name='conv2')(conv1) 123 | 124 | conv2 = MaxPooling2D(pool_size = (2,2))(conv2) 125 | conv2_drop = Dropout(.3)(conv2) 126 | conv2_flat = Flatten()(conv2_drop) 127 | 128 | fc1 = Dense(150, activation='relu', name='fc1')(conv2_flat) 129 | fc1_drop = Dropout(.5)(fc1) 130 | main_output = Dense(10, activation='softmax', name='fc2')(fc1_drop) 131 | 132 | model = Model(inputs=[main_input], outputs=[main_output]) 133 | model.compile(optimizer=keras.optimizers.Adam(lr=0.005), loss='categorical_crossentropy', metrics=['accuracy']) 134 | model.get_layer('conv1').set_weights(conv1_weights) 135 | 136 | time_history = TimeHistory() 137 | history = keras.callbacks.History() 138 | 139 | print('Using real-time data augmentation.') 140 | # This will do preprocessing and realtime data augmentation: 141 | datagen = ImageDataGenerator( 142 | rotation_range=7, # randomly rotate images in the range (degrees, 0 to 180) 143 | width_shift_range=0.05, # randomly shift images horizontally (fraction of total width) 144 | height_shift_range=0.05, # randomly shift images vertically (fraction of total height) 145 | zoom_range=.1) 146 | 147 | # Compute quantities required for feature-wise normalization 148 | datagen.fit(x_train) 149 | 150 | time_history = TimeHistory() 151 | history = keras.callbacks.History() 152 | # Fit the model on the batches generated by datagen.flow(). 153 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), 154 | steps_per_epoch=x_train.shape[0] // batch_size, 155 | epochs=epochs2, callbacks=[history, time_history], 156 | validation_data=(x_test, y_test)) 157 | 158 | np.savez('layer2_mnist_results.npz', acc=history.history['acc'], loss=history.history['loss'], 159 | val_acc=history.history['val_acc'], val_loss=history.history['val_loss'], 160 | times=time_history.times, epoch_times=time_history.epoch_times) 161 | conv2_weights = model.get_layer('conv2').get_weights() 162 | 163 | save_weights(model, "weights_layer2.npz", 2) 164 | 165 | return conv2_weights 166 | 167 | def layer3(conv1_weights, conv2_weights): 168 | main_input = Input(shape=input_shape, name='main_input') 169 | conv1 = Conv2D(256, (3,3), activation='relu', padding='same', trainable=False, name='conv1')(main_input) 170 | conv2 = Conv2D(256, (3,3), activation='relu', padding='same', trainable=False, name='conv2')(conv1) 171 | 172 | conv2 = MaxPooling2D(pool_size = (2,2))(conv2) 173 | 174 | conv3 = Conv2D(128, (3,3), activation='relu', padding='same', name='conv3')(conv2) 175 | conv3_drop = Dropout(.3)(conv3) 176 | conv3_flat = Flatten()(conv3_drop) 177 | 178 | fc1 = Dense(150, activation='relu', name='fc1')(conv3_flat) 179 | fc1_drop = Dropout(.5)(fc1) 180 | main_output = Dense(10, activation='softmax', name='fc2')(fc1_drop) 181 | 182 | model = Model(inputs=[main_input], outputs=[main_output]) 183 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) 184 | model.get_layer('conv1').set_weights(conv1_weights) 185 | model.get_layer('conv2').set_weights(conv2_weights) 186 | 187 | time_history = TimeHistory() 188 | history = keras.callbacks.History() 189 | 190 | def schedule(epoch): 191 | if epoch < 2: 192 | return 0.005 193 | elif epoch < 10: 194 | return 0.002 195 | elif epoch < 40: 196 | return 0.001 197 | elif epoch < 60: 198 | return 0.0005 199 | elif epoch < 80: 200 | return 0.0001 201 | else: 202 | return 0.00005 203 | 204 | rate_schedule = keras.callbacks.LearningRateScheduler(schedule) 205 | 206 | print('Using real-time data augmentation.') 207 | # This will do preprocessing and realtime data augmentation: 208 | datagen = ImageDataGenerator( 209 | rotation_range=7, # randomly rotate images in the range (degrees, 0 to 180) 210 | width_shift_range=0.05, # randomly shift images horizontally (fraction of total width) 211 | height_shift_range=0.05, # randomly shift images vertically (fraction of total height) 212 | zoom_range=.1) # randomly flip images 213 | 214 | # Compute quantities required for feature-wise normalization 215 | datagen.fit(x_train) 216 | 217 | time_history = TimeHistory() 218 | history = keras.callbacks.History() 219 | # Fit the model on the batches generated by datagen.flow(). 220 | model.fit_generator(datagen.flow(x_train, y_train, 221 | batch_size=batch_size), 222 | steps_per_epoch=x_train.shape[0] // batch_size, 223 | epochs=epochs3, callbacks=[history, time_history, rate_schedule], 224 | validation_data=(x_test, y_test)) 225 | 226 | np.savez('layer3_mnist_results.npz', acc=history.history['acc'], loss=history.history['loss'], 227 | val_acc=history.history['val_acc'], val_loss=history.history['val_loss'], 228 | times=time_history.times, epoch_times=time_history.epoch_times) 229 | 230 | conv3_weights = model.get_layer('conv3').get_weights() 231 | 232 | return conv3_weights 233 | 234 | if __name__ == "__main__": 235 | conv1_weights = layer1() 236 | conv2_weights = layer2(conv1_weights) 237 | conv3_weights = layer3(conv1_weights,conv2_weights) 238 | --------------------------------------------------------------------------------