├── .gitignore
├── mnist_cnn
    ├── run_mnist_cnn.sh
    ├── plot_results.py
    ├── mnist_backprop.py
    └── forward_thinking_cnn_mnist.py
├── FTForest
    ├── FTDRF.py
    ├── FTDRF.sh
    ├── FTDRF_with_MGS.sh
    ├── concat_mgsout.py
    ├── mgs.py
    ├── FTDRF_from_mgs.py
    ├── FTDRF_test.py
    ├── structure.py
    └── functions.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.npz
2 | 


--------------------------------------------------------------------------------
/mnist_cnn/run_mnist_cnn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python forward_thinking_cnn_mnist.py
4 | python mnist_backprop.py
5 | python plot_results.py
6 | 


--------------------------------------------------------------------------------
/FTForest/FTDRF.py:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "User note:  This test will use substantial computer memory (>50 GB)."
4 | echo "If you do not have these memory resources on your machine, we advise you to cancel this task now."
5 | 
6 | python FTDRF_test.py
7 | 


--------------------------------------------------------------------------------
/FTForest/FTDRF.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "User note:  This test will use substantial computer memory (>50 GB)."
4 | echo "If you do not have these memory resources on your machine, we advise you to cancel this task now."
5 | 
6 | python FTDRF_test.py
7 | 


--------------------------------------------------------------------------------
/FTForest/FTDRF_with_MGS.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "User note:  This test will use substantial computer memory (>50 GB)."
 4 | echo "If you do not have these memory resources on your machine, we advise you to cancel this task now."
 5 | 
 6 | echo ""
 7 | echo "Building 7x7 Window Features"
 8 | python mgs.py 7
 9 | 
10 | echo "Building 9x9 Window Features"
11 | python mgs.py 9
12 | 
13 | echo "Building 14x14 Window Features"
14 | python mgs.py 14
15 | 
16 | echo "Concatenating Multi-Grained Scanning Output"
17 | python concat_mgsout.py
18 | 
19 | echo "Building Foward Thinking Deep Random Forest (FTDRF)"
20 | python FTDRF_from_mgs.py
21 | 


--------------------------------------------------------------------------------
/FTForest/concat_mgsout.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from functions import combine_MGS_output
 3 | 
 4 | # read in the pushed thru training data from all window sizes
 5 | filenames = ['./7_wind/X_tr_thru_7_wind.npy', './9_wind/X_tr_thru_9_wind.npy', './14_wind/X_tr_thru_14_wind.npy']
 6 | 
 7 | 
 8 | X_out = combine_MGS_output(filenames)
 9 | np.save('X_mgsout.npy', X_out)
10 | X_out = 0 # Clear Up Memory
11 | 
12 | filenames = ['./7_wind/X_t_thru_7_wind.npy', './9_wind/X_t_thru_9_wind.npy', './14_wind/X_t_thru_14_wind.npy']
13 | X_t_out = combine_MGS_output(filenames)
14 | np.save('X_t_mgsout.npy', X_t_out)
15 | X_t_out = 0 # Clear Up Memory
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ForwardThinking
 2 | Companion code for _Forward Thinking: Building and Training Neural Networks One Layer at a Time_ and _Forward Thinking: Building Deep Random Forests_ submitted to NIPS 2017.
 3 | 
 4 | Authors:
 5 | - Chris Hettinger
 6 | - Tanner Christensen
 7 | - Ben Ehlert
 8 | - Jeffrey Humpherys
 9 | - Tyler Jarvis
10 | - David Kartchner
11 | - Kevin Miller
12 | - Sean Wade
13 | 
14 | ## Publications
15 | - _Forward Thinking: Building and Training Neural Networks One Layer at a Time_: (URL coming soon)
16 | - _Forward Thinking: Building Deep Random Forests_: https://arxiv.org/abs/1705.07366
17 | 
18 | ## Dependencies
19 | - numpy==1.11.3
20 | - tensorflow-gpu==1.0.0
21 | - keras==2.0.4
22 | - matplotlib==2.0.0
23 | 
24 | ## Hardware
25 | We used a single desktop computer with:
26 | - Intel i5-7400 processor
27 | - Nvidia GeForce GTX 1060 3GB GPU
28 | - 8GB DDR4 RAM
29 | With our configuration, it took approximately 2 hours to run the `run_mnist_cnn.sh` script.
30 | 
31 | ## Installation
32 | TODO: Make this pip installable.
33 | 
34 | ## Execution
35 | Once the package has been installed, you may run the included `run_mnist_cnn.sh` script. 
36 | 
37 | This will run the forward thinking neural network (achieved 99.72% in our tests), the backpropagation equivalent of our model (achieved 99.63% in our tests), and saves and displays a plot comparing the test and train accuracies.
38 | 


--------------------------------------------------------------------------------
/mnist_cnn/plot_results.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pickle
 3 | import numpy as np
 4 | import seaborn as sns
 5 | from matplotlib import pyplot as plt
 6 | 
 7 | fwd_acc = []
 8 | fwd_val_acc = []
 9 | fwd_times = []
10 | 
11 | back_acc = []
12 | back_val_acc = []
13 | back_times = []
14 | 
15 | for i in xrange(1,4):
16 |     fwd = np.load('layer{}_mnist_results.npz'.format(i))
17 |     fwd_acc.append(fwd['acc'])
18 |     fwd_val_acc.append(fwd['val_acc'])
19 |     fwd_times.append(fwd['times'])
20 | 
21 | fwd_acc = np.concatenate(fwd_acc)
22 | fwd_val_acc = np.concatenate(fwd_val_acc)
23 | fwd_times = np.concatenate(fwd_times)
24 | 
25 | back = np.load('mnist_backprop_results.npz'.format(i))
26 | back_acc = back['acc']
27 | back_val_acc = back['val_acc']
28 | back_times = back['times']
29 | 
30 | 
31 | sns.set_context("paper")
32 | sns.set(font='serif')
33 | sns.set_style("white", {
34 |         "font.family": "serif",
35 |         "font.serif": ["Times", "Palatino", "serif"]})
36 | 
37 | fig = plt.figure()
38 | fig.set_size_inches(3.5, 4)
39 | ax = plt.subplot(211)
40 | fig.dpi = 100
41 | 
42 | c1 = '#008000'
43 | c2 = '#000073'
44 | 
45 | # create training accuracy plot
46 | ax.plot(fwd_times / 60., 100 * (np.array(fwd_acc)), label = 'Forward Thinking', c=c1, linewidth=2)
47 | ax.plot(back_times / 60., 100 * (np.array(back_acc)), label = 'Backpropagation', c=c2, alpha=.5)
48 | plt.yticks(np.arange(91,101))
49 | plt.title('Training Accuracy')
50 | 
51 | box = ax.get_position()
52 | ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
53 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
54 | 
55 | plt.xlabel('Minutes')
56 | plt.ylabel('Accuracy (%)')
57 | 
58 | # create testing accuracy plot
59 | ax = plt.subplot(212)
60 | ax.plot(fwd_times / 60., 100 * (np.array(fwd_val_acc)), label = 'Forward Thinking', c=c1, linewidth=2)
61 | ax.plot(back_times / 60., 100 * (np.array(back_val_acc)), label = 'Backpropagation', c=c2, alpha=.5)
62 | plt.yticks(np.arange(98,100,.25))
63 | plt.title('Test Accuracy')
64 | 
65 | box = ax.get_position()
66 | ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
67 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
68 | 
69 | plt.xlabel('Minutes')
70 | plt.ylabel('Accuracy (%)')
71 | 
72 | plt.savefig('accuracy.png')
73 | plt.show()
74 | 


--------------------------------------------------------------------------------
/FTForest/mgs.py:
--------------------------------------------------------------------------------
 1 | ### script to make the windowing files for the whole MNIST dataset, along with building the random forests for MGS step and pushing all the training and testing data thru
 2 | 
 3 | 
 4 | import numpy as np
 5 | from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
 6 | import pickle
 7 | import sys
 8 | import os
 9 | from sklearn.metrics import accuracy_score
10 | from itertools import product
11 | from keras.datasets import mnist
12 | from functions import windowing_tr, windowing_push, push_thru_MGS_windowing_sep
13 | 
14 | 
15 | 
16 | # get the MNIST data
17 | (X_tr, y_tr), (X_t, y_t) = mnist.load_data()
18 | X_tr = X_tr.reshape(60000, 784)
19 | X_t, y_t = 0, 0 # memory efficiency
20 | 
21 | 
22 | # train the Multi-Grained Scanning Random Forests, Windowing
23 | n_trees = 30
24 | n_samples = X_tr.shape[0] # number of training samples
25 | MGS_Forests = []
26 | w = int(sys.argv[1])
27 | test_name = str(w)+ '_wind'
28 | foldername = './'+test_name + '/'
29 | os.mkdir(foldername)
30 | 
31 | print w
32 | # do multi-grained scanning - windowing. save files
33 | Xw, yw = windowing_tr(X_tr,y_tr,  w_size=w)
34 | y_tr = 0 # memory
35 | 
36 | # build the Random Forests for MGS for this window size
37 | forest1 = RandomForestClassifier(n_trees, max_depth=None, min_samples_split=20, n_jobs=-1)
38 | forest2 = ExtraTreesClassifier(n_trees, max_depth=None, min_samples_split=20, n_jobs=-1)
39 | 
40 | # fit the forests to windowed data
41 | forest1.fit(Xw, yw)
42 | forest2.fit(Xw, yw)
43 | Xw, yw = 0,0 # free up memory
44 | 
45 | # append forests to list, pickle to save
46 | MGS_Forests.append(forest1)
47 | MGS_Forests.append(forest2)
48 | forest1, forest2 = 0, 0 # memory
49 | 
50 | # push training data thru the mgs random forests
51 | X_tr_thru = push_thru_MGS_windowing_sep(X_tr, MGS_Forests, w)
52 | np.save(foldername+'X_tr_thru_'+test_name, X_tr_thru)
53 | X_tr_thru = 0 # memory
54 | 
55 | # push the testing data thru
56 | # get the MNIST data
57 | (X_tr, y_tr), (X_t, y_t) = mnist.load_data()
58 | X_tr, y_tr = 0, 0 # for memory
59 | X_t = X_t.reshape(10000, 784)
60 | y_t = 0 # for memory
61 | 
62 | # push the testing data thru MGS random forests
63 | X_t_thru = push_thru_MGS_windowing_sep(X_t, MGS_Forests, w)
64 | 
65 | np.save(foldername+'X_t_thru_'+test_name, X_t_thru)
66 | 


--------------------------------------------------------------------------------
/FTForest/FTDRF_from_mgs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import accuracy_score
 3 | from keras.datasets import mnist
 4 | from structure import RFLayer_RAND
 5 | 
 6 | 
 7 | print 'Loading in training data:'
 8 | # This is the data read in from previous layers, output of the function concat_mgs_output
 9 | (X_tr, y_tr), (Xtest, ytest) = mnist.load_data()
10 | X_tr, Xtest, ytest = 0, 0, 0
11 | X_curr = np.load('./X_mgsout.npy')
12 | 
13 | 
14 | print
15 | print 'RF Layer training:'
16 | 
17 | # parameters for the building of the next layers
18 | n = 2000 # num trees in each layer
19 | min_gain = 0.01
20 | verbose = True
21 | max_layers = 5
22 | md = None
23 | mss = 10
24 | n_jobs = -1
25 | 
26 | # dictionary where layers of decision trees will be stored
27 | Layers = {}
28 | 
29 | # train the FTDRF layers on MGS data loaded in
30 | prev_score = -1.0 # instantiate prev_score
31 | for i in xrange(max_layers):
32 |     print X_curr.shape
33 |     RFL = RFLayer_RAND(n, md=md, mss=mss)
34 |     RFL.fit(X_curr, y_tr, 5, 1, n_jobs)
35 |     Layers[i] = RFL
36 | 
37 |     # if verbose, print out the estimation accuracy for this layer
38 |     if verbose:
39 |         print 'Layer ' + str(i+1)
40 |         print 'acc: ' + str(RFL.val_score)
41 | 
42 | 
43 |     # check to see if we have improved enough going one more layer
44 |     rel_gain = (RFL.val_score - prev_score)/float(abs(prev_score))
45 |     if rel_gain < min_gain or RFL.val_score == 1.0:
46 |         print 'Converged! Stopping building layers'
47 |         print
48 |         break
49 |     prev_score = RFL.val_score
50 | 
51 |     # if moving on to another level, push the data through
52 |     X_curr = RFL.push_thru_data(X_curr)
53 |     print 'Going to another layer'
54 |     print
55 | 
56 | X_curr, y_sp = 0, 0 # memory
57 | 
58 | # load in testing data, free up memory of the training data
59 | print 'Loading in testing data'
60 | (X_tr, y_tr),(X_t, y_t) = mnist.load_data()
61 | X_tr, y_tr, X_t = 0, 0, 0
62 | y_t = y_t.astype('uint8')
63 | X_t_curr = np.load('./X_t_mgsout.npy')
64 | 
65 | 
66 | # push test data thru FTDRF layers
67 | for i in xrange(len(Layers.keys())-1):
68 |     X_t_curr = Layers[i].push_thru_data(X_t_curr)
69 | last = len(Layers.keys())-1
70 | y_pred = Layers[last].predict(X_t_curr)
71 | 
72 | 
73 | print
74 | print 'Statistics:'
75 | print 'The accuracy was:'
76 | print accuracy_score(y_pred, y_t)
77 | print 'Params:'
78 | print 'num_tres in each layer = ' + str(n)
79 | print 'md =' + str(md)
80 | print 'mss = ' + str(mss)
81 | print
82 | 


--------------------------------------------------------------------------------
/FTForest/FTDRF_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import accuracy_score
 3 | from keras.datasets import mnist
 4 | from functions import single_pixeldiag_tr
 5 | from structure import RFLayer_RAND
 6 | 
 7 | print 'FTDRF Test, no MGS'
 8 | 
 9 | # load in mnist dataset from keras
10 | print 'Loading in MNIST data'
11 | (X_tr, y_tr),(X_t_curr, y_t) = mnist.load_data()
12 | X_tr = X_tr.reshape(60000,784)
13 | X_t_curr, y_t = 0,0 # memory
14 | 
15 | # Get single pixe wiggling
16 | X_curr, y_tr = single_pixeldiag_tr(X_tr, y_tr)
17 | X_curr = X_curr.astype('uint8')
18 | y_tr = y_tr.astype('uint8')
19 | X_tr = 0
20 | 
21 | 
22 | print
23 | # train the next layers on multigrained scanning data
24 | print 'RF Layer training:'
25 | 
26 | 
27 | # parameters for the building of the next layers
28 | n = 2000 # num trees in each layer
29 | min_gain = 0.01
30 | verbose = True
31 | max_layers = 5
32 | md = None
33 | mss = 10
34 | n_jobs = -1
35 | 
36 | # dictionary where layers of decision trees will be stored
37 | Layers = {}
38 | 
39 | prev_score = -1.0 # instantiate prev_score
40 | # build the layers
41 | for i in xrange(max_layers):
42 |     print X_curr.shape
43 |     RFL = RFLayer_RAND(n, md=md, mss=mss)
44 |     RFL.fit(X_curr, y_tr, 5, 1, n_jobs)
45 |     Layers[i] = RFL
46 | 
47 |     # if verbose, print out the estimation accuracy for this layer
48 |     if verbose:
49 |         print 'Layer ' + str(i+1)
50 |         print 'acc: ' + str(RFL.val_score)
51 | 
52 | 
53 |     # check to see if we have improved enough going one more layer
54 |     rel_gain = (RFL.val_score - prev_score)/float(abs(prev_score))
55 |     if rel_gain < min_gain or RFL.val_score == 1.0:
56 |         print 'Converged! Stopping building layers'
57 |         print
58 |         break
59 |     prev_score = RFL.val_score
60 | 
61 |     # if moving on to another level, push the data through
62 |     X_curr = RFL.push_thru_data(X_curr)
63 |     print 'Going to another layer'
64 |     print
65 | 
66 | X_curr, y_sp = 0, 0 # memory
67 | 
68 | # load in testing data, free up memory of the training data
69 | print 'Loading in testing data'
70 | (X_tr, y_tr),(X_t_curr, y_t) = mnist.load_data()
71 | X_tr, y_tr = 0, 0
72 | X_t_curr = X_t_curr.reshape(10000,784)
73 | X_t_curr = X_t_curr.astype('uint8')
74 | y_t = y_t.astype('uint8')
75 | 
76 | 
77 | # push test data thru FTDRF layers
78 | for i in xrange(len(Layers.keys())-1):
79 |     X_t_curr = Layers[i].push_thru_data(X_t_curr)
80 | last = len(Layers.keys())-1
81 | y_pred = Layers[last].predict(X_t_curr)
82 | 
83 | 
84 | print
85 | print 'Statistics:'
86 | print 'The accuracy was:'
87 | print accuracy_score(y_pred, y_t)
88 | print 'Params:'
89 | print 'num_tres in each layer = ' + str(n)
90 | print 'md =' + str(md)
91 | print 'mss = ' + str(mss)
92 | print
93 | 


--------------------------------------------------------------------------------
/FTForest/structure.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import accuracy_score
 5 | from itertools import product
 6 | from tensorflow.examples.tutorials.mnist import input_data
 7 | 
 8 | 
 9 | class RFLayer_RAND(object):
10 |     def __init__(self, n_estimators, classifier=True, md=None, mss=10):
11 |         self.n_estimators = n_estimators
12 |         self.max_depth = md
13 |         self.min_samples_split = mss
14 |         self.classifier = classifier
15 | 
16 |     def fit(self, X_train, y_train, kfold=5, k=2, n_jobs=-1): # kfold = 5 yields 80/20 split, k will be the number of times we run validation
17 |         if kfold > 1:
18 |             kf = KFold(kfold, shuffle=True)
19 |         else:
20 |             raise ValueError('Need to pass kfold something greater than 1 so can do cross validation')
21 | 
22 |         models = []
23 |         best_score = 0
24 |         best_ind = 0
25 |         count = 0
26 | 
27 |         # split training data into training and estimating sets via quasi kfold validation routine
28 |         for tr_ind, est_ind in kf.split(X_train, y_train):
29 |             # instantiate the layer of decision trees
30 |             models.append(RandomForestClassifier(self.n_estimators, criterion='entropy', max_depth=self.max_depth,
31 |                                                  min_samples_split=self.min_samples_split,
32 |                                                  n_jobs=n_jobs))
33 |             for tree in models[count].estimators_: # make half of the trees completely random Decision Trees
34 |                 if np.random.rand() <= .5:
35 |                     tree.splitter = 'random'
36 | 
37 | 
38 |             # get the split of the training data
39 |             X_tr, y_tr = X_train[tr_ind,:], y_train[tr_ind]
40 |             # train the layer on this split
41 |             models[count].fit(X_tr, y_tr)
42 |             X_tr, y_tr = 0, 0
43 | 
44 |             # check accuracy on the estimation set
45 |             X_est, y_est = X_train[est_ind,:], y_train[est_ind]
46 |             y_pred = models[count].predict(X_est)
47 |             acc_score = accuracy_score(y_pred, y_est)
48 |             X_est, y_est = 0, 0 # memory
49 |             y_pred = 0 # memory
50 | 
51 |             if acc_score > best_score: # with k > 1 we compare to see which is best layer trained
52 |                 best_score = acc_score
53 |                 best_ind = count
54 |             count += 1
55 |             if count >= k:
56 |                 break
57 | 
58 |         # save the best layer
59 |         self.L = models[best_ind]
60 |         self.n_classes = self.L.n_classes_
61 |         self.val_score = best_score
62 | 
63 |     def predict(self, X_test):
64 |         return self.L.predict(X_test)
65 | 
66 |     def push_thru_data(self, X):
67 |         n_samples, dim_data = X.shape
68 |         X_push = np.empty((n_samples, self.n_estimators*self.n_classes))
69 |         # push the data X through this layer
70 |         i = 0
71 |         for tree in self.L.estimators_:
72 |             if self.classifier:
73 |                 X_push[:,i*self.n_classes:(i+1)*self.n_classes] = tree.predict_proba(X).astype('float32')
74 |             i += 1
75 |         return X_push
76 | 


--------------------------------------------------------------------------------
/mnist_cnn/mnist_backprop.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import keras
  3 | from keras.datasets import mnist
  4 | from keras.layers import Input, Dense, Dropout, Conv2D, Flatten, MaxPooling2D
  5 | from keras.models import Model
  6 | from keras import backend as K
  7 | from keras.preprocessing.image import ImageDataGenerator
  8 | 
  9 | import time
 10 | 
 11 | batch_size = 128
 12 | num_classes = 10
 13 | 
 14 | 
 15 | train_begin_time = time.time()
 16 | best_score = 0
 17 | 
 18 | # input image dimensions
 19 | img_rows, img_cols = 28, 28
 20 | 
 21 | # the data, shuffled and split between train and test sets
 22 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
 23 | 
 24 | if K.image_data_format() == 'channels_first':
 25 |     x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
 26 |     x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
 27 |     input_shape = (1, img_rows, img_cols)
 28 | else:
 29 |     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
 30 |     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
 31 |     input_shape = (img_rows, img_cols, 1)
 32 | 
 33 | x_train = x_train.astype('float32')
 34 | x_test = x_test.astype('float32')
 35 | x_train /= 255
 36 | x_test /= 255
 37 | print('x_train shape:', x_train.shape)
 38 | print(x_train.shape[0], 'train samples')
 39 | print(x_test.shape[0], 'test samples')
 40 | 
 41 | # convert class vectors to binary class matrices
 42 | y_train = keras.utils.to_categorical(y_train, num_classes)
 43 | y_test = keras.utils.to_categorical(y_test, num_classes)
 44 | 
 45 | def save_weights(model, filename, layer):
 46 |     conv1 = model.get_layer('conv{0}'.format(layer)).get_weights()
 47 |     fc1 = model.get_layer('fc1').get_weights()
 48 |     fc2 = model.get_layer('fc2').get_weights()
 49 | 
 50 |     np.savez(filename, W_conv=conv1[0], b_conv=conv1[1], W_fc1=fc1[0], b_fc1=fc1[1],
 51 |                 W_fc2=fc2[0], b_fc2=fc2[1])
 52 | 
 53 | class TimeHistory(keras.callbacks.Callback):
 54 |     def on_train_begin(self, logs={}):
 55 |         self.times = []
 56 |         self.epoch_times = []
 57 |         self.best_weights = None
 58 | 
 59 |     def on_epoch_begin(self, epoch, logs={}):
 60 |         self.t0 = time.time()
 61 | 
 62 |     def on_epoch_end(self, epoch, logs={}):
 63 |         global best_score
 64 |         self.times.append(time.time() - train_begin_time)
 65 |         self.epoch_times.append(time.time() - self.t0)
 66 | 
 67 |         if logs.get('val_acc') > best_score:
 68 |             try:
 69 |                 best_score = logs.get('val_acc')
 70 |                 self.best_weights = save_weights(self.model, 'weights_layer3.npz', 3)
 71 |             except Exception:
 72 |                 pass
 73 | 
 74 | def run_backprop():
 75 |     main_input = Input(shape=input_shape, name='main_input')
 76 |     conv1 = Conv2D(256, (3,3), activation='relu', padding='same', name='conv1')(main_input)
 77 |     conv2 = Conv2D(256, (3,3), activation='relu', padding='same', name='conv2')(conv1)
 78 | 
 79 |     conv2 = MaxPooling2D(pool_size = (2,2))(conv2)
 80 | 
 81 |     conv3 = Conv2D(128, (3,3), activation='relu', padding='same', name='conv3')(conv2)
 82 |     conv3_drop = Dropout(.3)(conv3)
 83 |     conv3_flat = Flatten()(conv3_drop)
 84 | 
 85 |     fc1 = Dense(150, activation='relu', name='fc1')(conv3_flat)
 86 |     fc1_drop = Dropout(.5)(fc1)
 87 |     main_output = Dense(10, activation='softmax', name='fc2')(fc1_drop)
 88 | 
 89 |     model = Model(inputs=[main_input], outputs=[main_output])
 90 |     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
 91 | 
 92 |     time_history = TimeHistory()
 93 |     history = keras.callbacks.History()
 94 | 
 95 |     def schedule(epoch):
 96 |         if epoch < 2:
 97 |             return 0.005
 98 |         elif epoch < 10:
 99 |             return 0.002
100 |         elif epoch < 40:
101 |             return 0.001
102 |         elif epoch < 60:
103 |             return 0.0005
104 |         elif epoch < 80:
105 |             return 0.0001
106 |         else:
107 |             return 0.00005
108 | 
109 |     rate_schedule = keras.callbacks.LearningRateScheduler(schedule)
110 | 
111 |     # This will do preprocessing and realtime data augmentation:
112 |     datagen = ImageDataGenerator(
113 |         rotation_range=7,  # randomly rotate images in the range (degrees, 0 to 180)
114 |         width_shift_range=0.05,  # randomly shift images horizontally (fraction of total width)
115 |         height_shift_range=0.05,  # randomly shift images vertically (fraction of total height)
116 |         zoom_range=.1) 
117 |     # Compute quantities required for feature-wise normalization
118 |     datagen.fit(x_train)
119 | 
120 |     time_history = TimeHistory()
121 |     history = keras.callbacks.History()
122 | 
123 |     # Fit the model on the batches generated by datagen.flow().
124 |     model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
125 |                         steps_per_epoch=x_train.shape[0] // batch_size,
126 |                         epochs=100, callbacks=[history, time_history, rate_schedule],
127 |                         validation_data=(x_test, y_test))
128 | 
129 |     np.savez('mnist_backprop_results.npz', acc=history.history['acc'], loss=history.history['loss'],
130 |               val_acc=history.history['val_acc'], val_loss=history.history['val_loss'],
131 |               times=time_history.times, epoch_times=time_history.epoch_times)
132 | 
133 | if __name__ == "__main__":
134 |     run_backprop()
135 | 


--------------------------------------------------------------------------------
/FTForest/functions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from itertools import product
  4 | 
  5 | 
  6 | # windowing
  7 | # X shape = (n_examples, dimension of data)
  8 | def windowing_tr(X, y, img_size=(28,28), w_size=10):
  9 |     n_samples, dim_data = X.shape
 10 |     if img_size[0]*img_size[1] != dim_data: # must make sure dim of data is square
 11 |         raise ValueError('img_size not compatible with size of data in X')
 12 |     n_x = img_size[1]- w_size+1
 13 |     n_y = img_size[0]- w_size+1
 14 |     num_wind = n_y*n_x
 15 |     X_out = np.empty((num_wind*n_samples, w_size**2))
 16 |     y_out = np.zeros(num_wind*n_samples)
 17 |     for k in xrange(n_samples):
 18 |         k_image = X[k,:].reshape(img_size)
 19 |         for i in xrange(n_y):
 20 |             for j in xrange(n_x):
 21 |                 i_row = i*n_x
 22 |                 X_out[k*num_wind + i_row+j,:] = k_image[i:i+w_size, j:j+w_size].flatten()
 23 |                 y_out[k*num_wind + i_row+j] = y[k]
 24 |     return X_out.astype('float32'), y_out.astype('float32')
 25 | 
 26 | 
 27 | def windowing_push(X, img_size=(28,28), w_size=10):
 28 |     n_samples, dim_data = X.shape
 29 |     if img_size[0]*img_size[1] != dim_data: # must make sure dim of data is square
 30 |         raise ValueError('img_size not compatible with size of data in X')
 31 |     n_x = img_size[1]- w_size+1
 32 |     n_y = img_size[0]- w_size+1
 33 |     num_wind = n_y*n_x
 34 |     X_out = np.empty((num_wind*n_samples, w_size**2))
 35 |     for k in xrange(n_samples):
 36 |         k_image = X[k,:].reshape(img_size)
 37 |         for i in xrange(n_y):
 38 |             for j in xrange(n_x):
 39 |                 i_row = i*n_x
 40 |                 X_out[k*num_wind + i_row+j,:] = k_image[i:i+w_size, j:j+w_size].flatten()
 41 |     return X_out.astype('float32')
 42 | 
 43 | def push_thru_MGS_windowing(X_data, MGS_forests, img_size=(28,28), windows=[7,9,14]):
 44 |     n_classes = MGS_forests[0].n_classes_
 45 |     n_samples = X_data.shape[0]
 46 |     n_x = img_size[1]+1 -np.array(windows)
 47 |     n_y = img_size[0]+1 -np.array(windows)
 48 |     N = n_x * n_y
 49 |     X7 = windowing_push(X_data, w_size=7)
 50 |     pred_all1 = MGS_forests[0].predict_proba(X7)
 51 |     pred_all1 = pred_all1.reshape(n_samples, N[0]*n_classes)
 52 |     pred_all2 = MGS_forests[1].predict_proba(X7)
 53 |     X7 = 0
 54 |     pred_all2 = pred_all2.reshape(n_samples, N[0]*n_classes)
 55 |     pred_all1 = np.hstack((pred_all1, pred_all2))
 56 |     X9 = windowing_push(X_data, w_size=9)
 57 |     pred_all2 = MGS_forests[2].predict_proba(X9)
 58 |     pred_all2 = pred_all2.reshape(n_samples, N[1]*n_classes)
 59 |     pred_all1 = np.hstack((pred_all1, pred_all2))
 60 |     pred_all2 = MGS_forests[3].predict_proba(X9)
 61 |     X9 = 0
 62 |     pred_all2 = pred_all2.reshape(n_samples, N[1]*n_classes)
 63 |     pred_all1 = np.hstack((pred_all1, pred_all2))
 64 |     X14 = windowing_push(X_data, w_size=14)
 65 |     pred_all2 = MGS_forests[4].predict_proba(X14)
 66 |     pred_all2 = pred_all2.reshape(n_samples, N[2]*n_classes)
 67 |     pred_all1 = np.hstack((pred_all1, pred_all2))
 68 |     pred_all2 = MGS_forests[5].predict_proba(X14)
 69 |     X14 = 0
 70 |     pred_all2 = pred_all2.reshape(n_samples, N[2]*n_classes)
 71 |     return  np.hstack((pred_all1, pred_all2)).astype('float32')
 72 | 
 73 | def push_thru_MGS_windowing_sep(X_data, MGS_forests, w_size, img_size=(28,28)):
 74 |     n_classes = MGS_forests[0].n_classes_
 75 |     n_samples = X_data.shape[0]
 76 |     n_x = img_size[1]+1 - w_size
 77 |     n_y = img_size[0]+1 - w_size
 78 |     N = n_x * n_y
 79 |     Xw = windowing_push(X_data, w_size=w_size)
 80 |     pred_all1 = MGS_forests[0].predict_proba(Xw)
 81 |     pred_all1 = pred_all1.reshape(n_samples, N*n_classes)
 82 |     pred_all2 = MGS_forests[1].predict_proba(Xw)
 83 |     Xw = 0
 84 |     pred_all2 = pred_all2.reshape(n_samples, N*n_classes)
 85 |     return  np.hstack((pred_all1, pred_all2)).astype('float32')
 86 | 
 87 | 
 88 | def push_thru_MGS_windowing_sep_npy(filename, MGS_forests, w_size, write_to_file=False, out_file=None):
 89 |     X = np.load(filename)
 90 |     n_c = MGSforest1.n_classes_
 91 |     n_samples = X.shape[0]
 92 |     N = (28-w_size+1)**2
 93 |     if write_to_file and out_file == None:
 94 |         raise ValueError('if want to write to file, must provide filename')
 95 | 
 96 |     # push data thru MGSforests
 97 |     pred1 = MGS_forests[0].predict_proba(X)
 98 |     pred1 = pred1.reshape(n_samples, N*n_c)
 99 |     pred2 = MGS_forests[1].predict_proba(X)
100 |     pred2 = pred1.reshape(n_samples, N*n_c)
101 |     X_thru = np.hstack((pred1, pred2))
102 |     if write_to_file:
103 |         np.save(out_file+'/X_thru_'+str(w_size), X_thru)
104 |         return X_thru
105 |     else:
106 |         return X_thru
107 | 
108 | def combine_MGS_output(filenames, w_sizes=[7,9,14]):  #filenames is list of filenames corresponding to the data pushed thru MGS_forests
109 |     if len(filenames) != len(w_sizes):
110 |         raise ValueError('Need to have the same number of files as number of windows')
111 |     count = 0
112 |     for fname in filenames: #add in checks that in correct order, 7, 9, 14
113 |         if count == 0:
114 |             X_out = np.load(fname)
115 |             print X_out.shape
116 |         else:
117 |             X = np.load(fname)
118 |             print X.shape
119 |             X_out = np.hstack((X_out, X))
120 |             X = 0 # free up memory
121 |         count += 1
122 |     return X_out.astype('float32')
123 | 
124 | 
125 | # single pixel, wiggle
126 | def single_pixeldiag_tr(X, y, img_size=(28,28)):
127 |     n_samples, dim_data = X.shape
128 |     if img_size[0]*img_size[1] != dim_data: # must make sure dim of data is square
129 |         raise ValueError('img_size not compatible with size of data in X')
130 |     X_out = np.empty((n_samples*5, dim_data))
131 |     y_out = np.zeros(n_samples*5)
132 |     for k in xrange(n_samples):
133 |         k_image = X[k,:].reshape(img_size)
134 |         count = 0
135 |         for i,j in list(product([-1,1], [-1,1])) + [(0,0)]:
136 |             image = np.zeros((img_size[0]+2, img_size[1]+2))
137 |             image[1+i:1+i+img_size[0], 1+j:1+j+img_size[1]] = k_image
138 |             X_out[5*k+count] = image[1:1+img_size[0],1:1+img_size[1]].flatten()
139 |             y_out[5*k+count] = y[k]
140 |             count += 1
141 |     return X_out, y_out
142 | 


--------------------------------------------------------------------------------
/mnist_cnn/forward_thinking_cnn_mnist.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import keras
  3 | from keras.datasets import mnist
  4 | from keras.layers import Input, Dense, Dropout, Conv2D, Flatten, MaxPooling2D
  5 | from keras.models import Model
  6 | from keras import backend as K
  7 | from keras.preprocessing.image import ImageDataGenerator
  8 | 
  9 | import time
 10 | 
 11 | batch_size = 128
 12 | num_classes = 10
 13 | epochs1 = 1
 14 | epochs2 = 1
 15 | epochs3 = 98
 16 | 
 17 | train_begin_time = time.time()
 18 | best_score = 0
 19 | 
 20 | # input image dimensions
 21 | img_rows, img_cols = 28, 28
 22 | 
 23 | # the data, shuffled and split between train and test sets
 24 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
 25 | 
 26 | if K.image_data_format() == 'channels_first':
 27 |     x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
 28 |     x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
 29 |     input_shape = (1, img_rows, img_cols)
 30 | else:
 31 |     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
 32 |     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
 33 |     input_shape = (img_rows, img_cols, 1)
 34 | 
 35 | x_train = x_train.astype('float32')
 36 | x_test = x_test.astype('float32')
 37 | x_train /= 255
 38 | x_test /= 255
 39 | print('x_train shape:', x_train.shape)
 40 | print(x_train.shape[0], 'train samples')
 41 | print(x_test.shape[0], 'test samples')
 42 | 
 43 | # convert class vectors to binary class matrices
 44 | y_train = keras.utils.to_categorical(y_train, num_classes)
 45 | y_test = keras.utils.to_categorical(y_test, num_classes)
 46 | 
 47 | def save_weights(model, filename, layer):
 48 |     conv1 = model.get_layer('conv{0}'.format(layer)).get_weights()
 49 |     fc1 = model.get_layer('fc1').get_weights()
 50 |     fc2 = model.get_layer('fc2').get_weights()
 51 | 
 52 |     np.savez(filename, W_conv=conv1[0], b_conv=conv1[1], W_fc1=fc1[0], b_fc1=fc1[1],
 53 |                 W_fc2=fc2[0], b_fc2=fc2[1])
 54 | 
 55 | class TimeHistory(keras.callbacks.Callback):
 56 |     def on_train_begin(self, logs={}):
 57 |         self.times = []
 58 |         self.epoch_times = []
 59 |         self.best_weights = None
 60 | 
 61 |     def on_epoch_begin(self, epoch, logs={}):
 62 |         self.t0 = time.time()
 63 | 
 64 |     def on_epoch_end(self, epoch, logs={}):
 65 |         global best_score
 66 |         self.times.append(time.time() - train_begin_time)
 67 |         self.epoch_times.append(time.time() - self.t0)
 68 | 
 69 |         if logs.get('val_acc') > best_score:
 70 |             try:
 71 |                 best_score = logs.get('val_acc')
 72 |                 self.best_weights = save_weights(self.model, 'weights_layer3.npz', 3)
 73 |             except Exception:
 74 |                 pass
 75 | 
 76 | def layer1():
 77 |     main_input = Input(shape=input_shape, name='main_input')
 78 |     conv1 = Conv2D(256,(3,3), activation='relu', padding='same', name='conv1')(main_input)
 79 |     conv1 = MaxPooling2D(pool_size=(2,2))(conv1)
 80 |     conv1_drop = Dropout(.3)(conv1)
 81 |     conv1_flat = Flatten()(conv1_drop)
 82 |     fc1 = Dense(150, activation='relu', name='fc1')(conv1_flat)
 83 |     fc1_drop = Dropout(.5)(fc1)
 84 |     main_output = Dense(10, activation='softmax', name='fc2')(fc1_drop)
 85 | 
 86 |     model = Model(inputs=[main_input], outputs=[main_output])
 87 |     model.compile(optimizer=keras.optimizers.Adam(lr=0.005), loss='categorical_crossentropy', metrics=['accuracy'])
 88 | 
 89 |     print('Using real-time data augmentation.')
 90 |     # This will do preprocessing and realtime data augmentation:
 91 |     datagen = ImageDataGenerator(
 92 |         rotation_range=7,  # randomly rotate images in the range (degrees, 0 to 180)
 93 |         width_shift_range=0.05,  # randomly shift images horizontally (fraction of total width)
 94 |         height_shift_range=0.05,  # randomly shift images vertically (fraction of total height)
 95 |         zoom_range=.1)
 96 | 
 97 |     # Compute quantities required for feature-wise normalization
 98 |     # (std, mean, and principal components if ZCA whitening is applied).
 99 |     datagen.fit(x_train)
100 | 
101 |     time_history = TimeHistory()
102 |     history = keras.callbacks.History()
103 |     # Fit the model on the batches generated by datagen.flow().
104 |     model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
105 |     					steps_per_epoch=x_train.shape[0] // batch_size,
106 |     					epochs=epochs1, callbacks=[history, time_history],
107 |     					validation_data=(x_test, y_test))
108 | 
109 |     np.savez('layer1_mnist_results.npz', acc=history.history['acc'], loss=history.history['loss'],
110 |     		  val_acc=history.history['val_acc'], val_loss=history.history['val_loss'],
111 |     		  times=time_history.times, epoch_times=time_history.epoch_times)
112 |     conv1_weights = model.get_layer('conv1').get_weights()
113 | 
114 |     save_weights(model, "weights_layer1.npz", 1)
115 | 
116 |     return conv1_weights
117 | 
118 | def layer2(conv1_weights):
119 |     main_input = Input(shape=input_shape, name='main_input')
120 | 
121 |     conv1 = Conv2D(256, (3,3), activation='relu', padding='same', trainable=False, name='conv1')(main_input)
122 |     conv2 = Conv2D(256, (3,3), activation='relu', padding='same', name='conv2')(conv1)
123 | 
124 |     conv2 = MaxPooling2D(pool_size = (2,2))(conv2)
125 |     conv2_drop = Dropout(.3)(conv2)
126 |     conv2_flat = Flatten()(conv2_drop)
127 | 
128 |     fc1 = Dense(150, activation='relu', name='fc1')(conv2_flat)
129 |     fc1_drop = Dropout(.5)(fc1)
130 |     main_output = Dense(10, activation='softmax', name='fc2')(fc1_drop)
131 | 
132 |     model = Model(inputs=[main_input], outputs=[main_output])
133 |     model.compile(optimizer=keras.optimizers.Adam(lr=0.005), loss='categorical_crossentropy', metrics=['accuracy'])
134 |     model.get_layer('conv1').set_weights(conv1_weights)
135 | 
136 |     time_history = TimeHistory()
137 |     history = keras.callbacks.History()
138 | 
139 |     print('Using real-time data augmentation.')
140 |     # This will do preprocessing and realtime data augmentation:
141 |     datagen = ImageDataGenerator(
142 |         rotation_range=7,  # randomly rotate images in the range (degrees, 0 to 180)
143 |         width_shift_range=0.05,  # randomly shift images horizontally (fraction of total width)
144 |         height_shift_range=0.05,  # randomly shift images vertically (fraction of total height)
145 |         zoom_range=.1)
146 | 
147 |     # Compute quantities required for feature-wise normalization
148 |     datagen.fit(x_train)
149 | 
150 |     time_history = TimeHistory()
151 |     history = keras.callbacks.History()
152 |     # Fit the model on the batches generated by datagen.flow().
153 |     model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
154 |     					steps_per_epoch=x_train.shape[0] // batch_size,
155 |     					epochs=epochs2, callbacks=[history, time_history],
156 |     					validation_data=(x_test, y_test))
157 | 
158 |     np.savez('layer2_mnist_results.npz', acc=history.history['acc'], loss=history.history['loss'],
159 |     		  val_acc=history.history['val_acc'], val_loss=history.history['val_loss'],
160 |     		  times=time_history.times, epoch_times=time_history.epoch_times)
161 |     conv2_weights = model.get_layer('conv2').get_weights()
162 | 
163 |     save_weights(model, "weights_layer2.npz", 2)
164 | 
165 |     return conv2_weights
166 | 
167 | def layer3(conv1_weights, conv2_weights):
168 |     main_input = Input(shape=input_shape, name='main_input')
169 |     conv1 = Conv2D(256, (3,3), activation='relu', padding='same', trainable=False, name='conv1')(main_input)
170 |     conv2 = Conv2D(256, (3,3), activation='relu', padding='same', trainable=False, name='conv2')(conv1)
171 | 
172 |     conv2 = MaxPooling2D(pool_size = (2,2))(conv2)
173 | 
174 |     conv3 = Conv2D(128, (3,3), activation='relu', padding='same', name='conv3')(conv2)
175 |     conv3_drop = Dropout(.3)(conv3)
176 |     conv3_flat = Flatten()(conv3_drop)
177 | 
178 |     fc1 = Dense(150, activation='relu', name='fc1')(conv3_flat)
179 |     fc1_drop = Dropout(.5)(fc1)
180 |     main_output = Dense(10, activation='softmax', name='fc2')(fc1_drop)
181 | 
182 |     model = Model(inputs=[main_input], outputs=[main_output])
183 |     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
184 |     model.get_layer('conv1').set_weights(conv1_weights)
185 |     model.get_layer('conv2').set_weights(conv2_weights)
186 | 
187 |     time_history = TimeHistory()
188 |     history = keras.callbacks.History()
189 | 
190 |     def schedule(epoch):
191 |         if epoch < 2:
192 |             return 0.005
193 |         elif epoch < 10:
194 |             return 0.002
195 |         elif epoch < 40:
196 |             return 0.001
197 |         elif epoch < 60:
198 |             return 0.0005
199 |         elif epoch < 80:
200 |             return 0.0001
201 |         else:
202 |             return 0.00005
203 | 
204 |     rate_schedule = keras.callbacks.LearningRateScheduler(schedule)
205 | 
206 |     print('Using real-time data augmentation.')
207 |     # This will do preprocessing and realtime data augmentation:
208 |     datagen = ImageDataGenerator(
209 |         rotation_range=7,  # randomly rotate images in the range (degrees, 0 to 180)
210 |         width_shift_range=0.05,  # randomly shift images horizontally (fraction of total width)
211 |         height_shift_range=0.05,  # randomly shift images vertically (fraction of total height)
212 |         zoom_range=.1)  # randomly flip images
213 | 
214 |     # Compute quantities required for feature-wise normalization
215 |     datagen.fit(x_train)
216 | 
217 |     time_history = TimeHistory()
218 |     history = keras.callbacks.History()
219 |     # Fit the model on the batches generated by datagen.flow().
220 |     model.fit_generator(datagen.flow(x_train, y_train,
221 |     								 batch_size=batch_size),
222 |     					steps_per_epoch=x_train.shape[0] // batch_size,
223 |     					epochs=epochs3, callbacks=[history, time_history, rate_schedule],
224 |     					validation_data=(x_test, y_test))
225 | 
226 |     np.savez('layer3_mnist_results.npz', acc=history.history['acc'], loss=history.history['loss'],
227 |     		  val_acc=history.history['val_acc'], val_loss=history.history['val_loss'],
228 |     		  times=time_history.times, epoch_times=time_history.epoch_times)
229 | 
230 |     conv3_weights = model.get_layer('conv3').get_weights()
231 | 
232 |     return conv3_weights
233 | 
234 | if __name__ == "__main__":
235 |     conv1_weights = layer1()
236 |     conv2_weights = layer2(conv1_weights)
237 |     conv3_weights = layer3(conv1_weights,conv2_weights)
238 | 


--------------------------------------------------------------------------------