├── .gitignore ├── README.md ├── ann.py ├── ann_sigmoid.py ├── ann_tf.py ├── ann_theano.py ├── cnn_tf.py ├── cnn_theano.py ├── logistic.py ├── logistic_sigmoid.py ├── show_images.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | fer2013* 2 | .DS_Store 3 | *.pyc 4 | TEST.py 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Relevant Course URLs: 2 | 3 | * https://deeplearningcourses.com/c/data-science-logistic-regression-in-python/ 4 | * https://deeplearningcourses.com/c/data-science-deep-learning-in-python/ 5 | * https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow/ 6 | * https://deeplearningcourses.com/c/deep-learning-convolutional-neural-networks-theano-tensorflow/ 7 | 8 | Data: 9 | 10 | https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge 11 | 12 | If you get "An error occurred: Data not found": 13 | 14 | https://archive.org/download/fer2013_202311/fer2013.csv 15 | -------------------------------------------------------------------------------- /ann.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | from util import getData, softmax, cost2, y2indicator, error_rate, relu 10 | from sklearn.utils import shuffle 11 | 12 | 13 | class ANN(object): 14 | def __init__(self, M): 15 | self.M = M 16 | 17 | def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False): 18 | 19 | N, D = X.shape 20 | K = len(set(Y)) 21 | T = y2indicator(Y) 22 | self.W1 = np.random.randn(D, self.M) / np.sqrt(D) 23 | self.b1 = np.zeros(self.M) 24 | self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) 25 | self.b2 = np.zeros(K) 26 | 27 | costs = [] 28 | best_validation_error = 1 29 | for i in range(epochs): 30 | # forward propagation and cost calculation 31 | pY, Z = self.forward(X) 32 | 33 | # gradient descent step 34 | pY_T = pY - T 35 | self.W2 -= learning_rate*(Z.T.dot(pY_T) + reg*self.W2) 36 | self.b2 -= learning_rate*(pY_T.sum(axis=0) + reg*self.b2) 37 | # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu 38 | dZ = pY_T.dot(self.W2.T) * (1 - Z*Z) # tanh 39 | self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1) 40 | self.b1 -= learning_rate*(dZ.sum(axis=0) + reg*self.b1) 41 | 42 | if i % 10 == 0: 43 | pYvalid, _ = self.forward(Xvalid) 44 | c = cost2(Yvalid, pYvalid) 45 | costs.append(c) 46 | e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) 47 | print("i:", i, "cost:", c, "error:", e) 48 | if e < best_validation_error: 49 | best_validation_error = e 50 | print("best_validation_error:", best_validation_error) 51 | 52 | if show_fig: 53 | plt.plot(costs) 54 | plt.show() 55 | 56 | 57 | def forward(self, X): 58 | # Z = relu(X.dot(self.W1) + self.b1) 59 | Z = np.tanh(X.dot(self.W1) + self.b1) 60 | return softmax(Z.dot(self.W2) + self.b2), Z 61 | 62 | def predict(self, X): 63 | pY, _ = self.forward(X) 64 | return np.argmax(pY, axis=1) 65 | 66 | def score(self, X, Y): 67 | prediction = self.predict(X) 68 | return 1 - error_rate(Y, prediction) 69 | 70 | 71 | def main(): 72 | Xtrain, Ytrain, Xvalid, Yvalid = getData() 73 | 74 | model = ANN(200) 75 | model.fit(Xtrain, Ytrain, Xvalid, Yvalid, reg=0, show_fig=True) 76 | print(model.score(Xvalid, Yvalid)) 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /ann_sigmoid.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from sklearn.utils import shuffle 9 | from util import getBinaryData, sigmoid, sigmoid_cost, error_rate, relu 10 | 11 | 12 | class ANN(object): 13 | def __init__(self, M): 14 | self.M = M 15 | 16 | def fit(self, X, Y, learning_rate=5e-7, reg=1.0, epochs=10000, show_fig=False): 17 | X, Y = shuffle(X, Y) 18 | Xvalid, Yvalid = X[-1000:], Y[-1000:] 19 | X, Y = X[:-1000], Y[:-1000] 20 | 21 | N, D = X.shape 22 | self.W1 = np.random.randn(D, self.M) / np.sqrt(D) 23 | self.b1 = np.zeros(self.M) 24 | self.W2 = np.random.randn(self.M) / np.sqrt(self.M) 25 | self.b2 = 0 26 | 27 | costs = [] 28 | best_validation_error = 1 29 | for i in range(epochs): 30 | # forward propagation and cost calculation 31 | pY, Z = self.forward(X) 32 | 33 | # gradient descent step 34 | pY_Y = pY - Y 35 | self.W2 -= learning_rate*(Z.T.dot(pY_Y) + reg*self.W2) 36 | self.b2 -= learning_rate*((pY_Y).sum() + reg*self.b2) 37 | 38 | # print "(pY_Y).dot(self.W2.T) shape:", (pY_Y).dot(self.W2.T).shape 39 | # print "Z shape:", Z.shape 40 | dZ = np.outer(pY_Y, self.W2) * (Z > 0) 41 | # dZ = np.outer(pY_Y, self.W2) * (1 - Z*Z) 42 | self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1) 43 | self.b1 -= learning_rate*(np.sum(dZ, axis=0) + reg*self.b1) 44 | 45 | if i % 20 == 0: 46 | pYvalid, _ = self.forward(Xvalid) 47 | c = sigmoid_cost(Yvalid, pYvalid) 48 | costs.append(c) 49 | e = error_rate(Yvalid, np.round(pYvalid)) 50 | print("i:", i, "cost:", c, "error:", e) 51 | if e < best_validation_error: 52 | best_validation_error = e 53 | print("best_validation_error:", best_validation_error) 54 | 55 | if show_fig: 56 | plt.plot(costs) 57 | plt.show() 58 | 59 | 60 | def forward(self, X): 61 | Z = relu(X.dot(self.W1) + self.b1) 62 | # Z = np.tanh(X.dot(self.W1) + self.b1) 63 | return sigmoid(Z.dot(self.W2) + self.b2), Z 64 | 65 | 66 | def predict(self, X): 67 | pY = self.forward(X) 68 | return np.round(pY) 69 | 70 | 71 | def score(self, X, Y): 72 | prediction = self.predict(X) 73 | return 1 - error_rate(Y, prediction) 74 | 75 | 76 | def main(): 77 | X, Y = getBinaryData() 78 | 79 | X0 = X[Y==0, :] 80 | X1 = X[Y==1, :] 81 | X1 = np.repeat(X1, 9, axis=0) 82 | X = np.vstack([X0, X1]) 83 | Y = np.array([0]*len(X0) + [1]*len(X1)) 84 | 85 | model = ANN(100) 86 | model.fit(X, Y, show_fig=True) 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /ann_tf.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | from util import getData, getBinaryData, y2indicator, error_rate, init_weight_and_bias 10 | from sklearn.utils import shuffle 11 | 12 | 13 | class HiddenLayer(object): 14 | def __init__(self, M1, M2, an_id): 15 | self.id = an_id 16 | self.M1 = M1 17 | self.M2 = M2 18 | W, b = init_weight_and_bias(M1, M2) 19 | self.W = tf.Variable(W.astype(np.float32)) 20 | self.b = tf.Variable(b.astype(np.float32)) 21 | self.params = [self.W, self.b] 22 | 23 | def forward(self, X): 24 | return tf.nn.relu(tf.matmul(X, self.W) + self.b) 25 | 26 | 27 | class ANN(object): 28 | def __init__(self, hidden_layer_sizes): 29 | self.hidden_layer_sizes = hidden_layer_sizes 30 | 31 | def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False): 32 | K = len(set(Y)) # won't work later b/c we turn it into indicator 33 | 34 | # make a validation set 35 | X, Y = shuffle(X, Y) 36 | X = X.astype(np.float32) 37 | Y = y2indicator(Y).astype(np.float32) 38 | 39 | # for calculating error rate 40 | Yvalid_flat = Yvalid 41 | Yvalid = y2indicator(Yvalid).astype(np.float32) 42 | 43 | # initialize hidden layers 44 | N, D = X.shape 45 | 46 | self.hidden_layers = [] 47 | M1 = D 48 | count = 0 49 | for M2 in self.hidden_layer_sizes: 50 | h = HiddenLayer(M1, M2, count) 51 | self.hidden_layers.append(h) 52 | M1 = M2 53 | count += 1 54 | W, b = init_weight_and_bias(M1, K) 55 | self.W = tf.Variable(W.astype(np.float32)) 56 | self.b = tf.Variable(b.astype(np.float32)) 57 | 58 | # collect params for later use 59 | self.params = [self.W, self.b] 60 | for h in self.hidden_layers: 61 | self.params += h.params 62 | 63 | # set up theano functions and variables 64 | tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') 65 | tfT = tf.placeholder(tf.float32, shape=(None, K), name='T') 66 | act = self.forward(tfX) 67 | 68 | rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params]) 69 | cost = tf.reduce_mean( 70 | tf.nn.softmax_cross_entropy_with_logits( 71 | logits=act, 72 | labels=tfT 73 | ) 74 | ) + rcost 75 | prediction = self.predict(tfX) 76 | train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) 77 | 78 | n_batches = N // batch_sz 79 | costs = [] 80 | init = tf.global_variables_initializer() 81 | with tf.Session() as session: 82 | session.run(init) 83 | for i in range(epochs): 84 | X, Y = shuffle(X, Y) 85 | for j in range(n_batches): 86 | Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] 87 | Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] 88 | 89 | session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) 90 | 91 | if j % 20 == 0: 92 | c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid}) 93 | costs.append(c) 94 | 95 | p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid}) 96 | e = error_rate(Yvalid_flat, p) 97 | print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) 98 | 99 | if show_fig: 100 | plt.plot(costs) 101 | plt.show() 102 | 103 | def forward(self, X): 104 | Z = X 105 | for h in self.hidden_layers: 106 | Z = h.forward(Z) 107 | return tf.matmul(Z, self.W) + self.b 108 | 109 | def predict(self, X): 110 | act = self.forward(X) 111 | return tf.argmax(act, 1) 112 | 113 | 114 | def main(): 115 | Xtrain, Ytrain, Xvalid, Yvalid = getData() 116 | model = ANN([2000, 1000, 500]) 117 | model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True) 118 | 119 | if __name__ == '__main__': 120 | main() 121 | -------------------------------------------------------------------------------- /ann_theano.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import theano 8 | import theano.tensor as T 9 | import matplotlib.pyplot as plt 10 | 11 | from util import getData, getBinaryData, error_rate, relu, init_weight_and_bias 12 | from sklearn.utils import shuffle 13 | 14 | 15 | 16 | def rmsprop(cost, params, lr, mu, decay, eps): 17 | grads = T.grad(cost, params) 18 | updates = [] 19 | for p, g in zip(params, grads): 20 | # cache 21 | ones = np.ones_like(p.get_value(), dtype=np.float32) 22 | c = theano.shared(ones) 23 | new_c = decay*c + (np.float32(1.0) - decay)*g*g 24 | 25 | # momentum 26 | zeros = np.zeros_like(p.get_value(), dtype=np.float32) 27 | m = theano.shared(zeros) 28 | new_m = mu*m - lr*g / T.sqrt(new_c + eps) 29 | 30 | # param update 31 | new_p = p + new_m 32 | 33 | # append the updates 34 | updates.append((c, new_c)) 35 | updates.append((m, new_m)) 36 | updates.append((p, new_p)) 37 | return updates 38 | 39 | 40 | class HiddenLayer(object): 41 | def __init__(self, M1, M2, an_id): 42 | self.id = an_id 43 | self.M1 = M1 44 | self.M2 = M2 45 | W, b = init_weight_and_bias(M1, M2) 46 | self.W = theano.shared(W, 'W_%s' % self.id) 47 | self.b = theano.shared(b, 'b_%s' % self.id) 48 | self.params = [self.W, self.b] 49 | 50 | def forward(self, X): 51 | return relu(X.dot(self.W) + self.b) 52 | 53 | 54 | class ANN(object): 55 | def __init__(self, hidden_layer_sizes): 56 | self.hidden_layer_sizes = hidden_layer_sizes 57 | 58 | def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, eps=1e-8, epochs=10, batch_sz=100, show_fig=False): 59 | # downcast 60 | learning_rate = np.float32(learning_rate) 61 | mu = np.float32(mu) 62 | decay = np.float32(decay) 63 | reg = np.float32(reg) 64 | eps = np.float32(eps) 65 | 66 | X = X.astype(np.float32) 67 | Xvalid = Xvalid.astype(np.float32) 68 | Y = Y.astype(np.int32) 69 | Yvalid = Yvalid.astype(np.int32) 70 | 71 | # initialize hidden layers 72 | N, D = X.shape 73 | K = len(set(Y)) 74 | self.hidden_layers = [] 75 | M1 = D 76 | count = 0 77 | for M2 in self.hidden_layer_sizes: 78 | h = HiddenLayer(M1, M2, count) 79 | self.hidden_layers.append(h) 80 | M1 = M2 81 | count += 1 82 | W, b = init_weight_and_bias(M1, K) 83 | self.W = theano.shared(W, 'W_logreg') 84 | self.b = theano.shared(b, 'b_logreg') 85 | 86 | # collect params for later use 87 | self.params = [self.W, self.b] 88 | for h in self.hidden_layers: 89 | self.params += h.params 90 | 91 | # set up theano functions and variables 92 | thX = T.fmatrix('X') 93 | thY = T.ivector('Y') 94 | pY = self.th_forward(thX) 95 | 96 | rcost = reg*T.sum([(p*p).sum() for p in self.params]) 97 | cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost 98 | prediction = self.th_predict(thX) 99 | 100 | # actual prediction function 101 | self.predict_op = theano.function(inputs=[thX], outputs=prediction) 102 | cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) 103 | 104 | updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps) 105 | train_op = theano.function( 106 | inputs=[thX, thY], 107 | updates=updates 108 | ) 109 | 110 | n_batches = N // batch_sz 111 | costs = [] 112 | for i in range(epochs): 113 | X, Y = shuffle(X, Y) 114 | for j in range(n_batches): 115 | Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] 116 | Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] 117 | 118 | train_op(Xbatch, Ybatch) 119 | 120 | if j % 20 == 0: 121 | c, p = cost_predict_op(Xvalid, Yvalid) 122 | costs.append(c) 123 | e = error_rate(Yvalid, p) 124 | print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) 125 | 126 | if show_fig: 127 | plt.plot(costs) 128 | plt.show() 129 | 130 | def th_forward(self, X): 131 | Z = X 132 | for h in self.hidden_layers: 133 | Z = h.forward(Z) 134 | return T.nnet.softmax(Z.dot(self.W) + self.b) 135 | 136 | def th_predict(self, X): 137 | pY = self.th_forward(X) 138 | return T.argmax(pY, axis=1) 139 | 140 | def predict(self, X): 141 | return self.predict_op(X) 142 | 143 | 144 | def main(): 145 | Xtrain, Ytrain, Xvalid, Yvalid = getData() 146 | model = ANN([2000, 1000, 500]) 147 | model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True) 148 | 149 | if __name__ == '__main__': 150 | main() 151 | -------------------------------------------------------------------------------- /cnn_tf.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | 10 | from sklearn.utils import shuffle 11 | 12 | from util import getImageData, error_rate, init_weight_and_bias, y2indicator 13 | from ann_tf import HiddenLayer 14 | 15 | # differences from Theano: 16 | # image dimensions are expected to be: N x width x height x color 17 | # filter shapes are expected to be: filter width x filter height x input feature maps x output feature maps 18 | 19 | 20 | def init_filter(shape, poolsz): 21 | w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[:-1]) + shape[-1]*np.prod(shape[:-2] / np.prod(poolsz))) 22 | return w.astype(np.float32) 23 | 24 | 25 | class ConvPoolLayer(object): 26 | def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)): 27 | # mi = input feature map size 28 | # mo = output feature map size 29 | sz = (fw, fh, mi, mo) 30 | W0 = init_filter(sz, poolsz) 31 | self.W = tf.Variable(W0) 32 | b0 = np.zeros(mo, dtype=np.float32) 33 | self.b = tf.Variable(b0) 34 | self.poolsz = poolsz 35 | self.params = [self.W, self.b] 36 | 37 | def forward(self, X): 38 | conv_out = tf.nn.conv2d(X, self.W, strides=[1, 1, 1, 1], padding='SAME') 39 | conv_out = tf.nn.bias_add(conv_out, self.b) 40 | p1, p2 = self.poolsz 41 | pool_out = tf.nn.max_pool( 42 | conv_out, 43 | ksize=[1, p1, p2, 1], 44 | strides=[1, p1, p2, 1], 45 | padding='SAME' 46 | ) 47 | return tf.nn.relu(pool_out) 48 | 49 | 50 | class CNN(object): 51 | def __init__(self, convpool_layer_sizes, hidden_layer_sizes): 52 | self.convpool_layer_sizes = convpool_layer_sizes 53 | self.hidden_layer_sizes = hidden_layer_sizes 54 | 55 | def fit(self, X, Y, Xvalid, Yvalid, lr=1e-2, mu=0.9, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=5, show_fig=True): 56 | lr = np.float32(lr) 57 | mu = np.float32(mu) 58 | reg = np.float32(reg) 59 | decay = np.float32(decay) 60 | eps = np.float32(eps) 61 | K = len(set(Y)) 62 | 63 | # make a validation set 64 | X, Y = shuffle(X, Y) 65 | X = X.astype(np.float32) 66 | Y = y2indicator(Y).astype(np.float32) 67 | 68 | Yvalid = y2indicator(Yvalid).astype(np.float32) 69 | Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate 70 | 71 | # initialize convpool layers 72 | N, width, height, c = X.shape 73 | mi = c 74 | outw = width 75 | outh = height 76 | self.convpool_layers = [] 77 | for mo, fw, fh in self.convpool_layer_sizes: 78 | layer = ConvPoolLayer(mi, mo, fw, fh) 79 | self.convpool_layers.append(layer) 80 | outw = outw // 2 81 | outh = outh // 2 82 | mi = mo 83 | 84 | # initialize mlp layers 85 | self.hidden_layers = [] 86 | M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer 87 | count = 0 88 | for M2 in self.hidden_layer_sizes: 89 | h = HiddenLayer(M1, M2, count) 90 | self.hidden_layers.append(h) 91 | M1 = M2 92 | count += 1 93 | 94 | # logistic regression layer 95 | W, b = init_weight_and_bias(M1, K) 96 | self.W = tf.Variable(W, 'W_logreg') 97 | self.b = tf.Variable(b, 'b_logreg') 98 | 99 | # collect params for later use 100 | self.params = [self.W, self.b] 101 | for h in self.convpool_layers: 102 | self.params += h.params 103 | for h in self.hidden_layers: 104 | self.params += h.params 105 | 106 | # set up tensorflow functions and variables 107 | tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X') 108 | tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y') 109 | act = self.forward(tfX) 110 | 111 | rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params]) 112 | cost = tf.reduce_mean( 113 | tf.nn.softmax_cross_entropy_with_logits( 114 | logits=act, 115 | labels=tfY 116 | ) 117 | ) + rcost 118 | prediction = self.predict(tfX) 119 | 120 | train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) 121 | 122 | n_batches = N // batch_sz 123 | costs = [] 124 | init = tf.global_variables_initializer() 125 | with tf.Session() as session: 126 | session.run(init) 127 | for i in range(epochs): 128 | X, Y = shuffle(X, Y) 129 | for j in range(n_batches): 130 | Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] 131 | Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] 132 | 133 | session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) 134 | 135 | if j % 20 == 0: 136 | c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid}) 137 | costs.append(c) 138 | 139 | p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid}) 140 | e = error_rate(Yvalid_flat, p) 141 | print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) 142 | 143 | if show_fig: 144 | plt.plot(costs) 145 | plt.show() 146 | 147 | def forward(self, X): 148 | Z = X 149 | for c in self.convpool_layers: 150 | Z = c.forward(Z) 151 | Z_shape = Z.get_shape().as_list() 152 | Z = tf.reshape(Z, [-1, np.prod(Z_shape[1:])]) 153 | for h in self.hidden_layers: 154 | Z = h.forward(Z) 155 | return tf.matmul(Z, self.W) + self.b 156 | 157 | def predict(self, X): 158 | pY = self.forward(X) 159 | return tf.argmax(pY, 1) 160 | 161 | 162 | def main(): 163 | Xtrain, Ytrain, Xvalid, Yvalid = getImageData() 164 | 165 | # reshape X for tf: N x H x W x C 166 | Xtrain = Xtrain.transpose((0, 2, 3, 1)) 167 | Xvalid = Xvalid.transpose((0, 2, 3, 1)) 168 | 169 | model = CNN( 170 | convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)], 171 | hidden_layer_sizes=[500, 300], 172 | ) 173 | model.fit(Xtrain, Ytrain, Xvalid, Yvalid) 174 | 175 | if __name__ == '__main__': 176 | main() 177 | -------------------------------------------------------------------------------- /cnn_theano.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import theano 8 | import theano.tensor as T 9 | import matplotlib.pyplot as plt 10 | 11 | from sklearn.utils import shuffle 12 | from theano.tensor.nnet import conv2d 13 | from theano.tensor.signal.pool import pool_2d 14 | 15 | from util import getImageData, error_rate, init_weight_and_bias, init_filter 16 | from ann_theano import HiddenLayer, rmsprop 17 | 18 | 19 | class ConvPoolLayer(object): 20 | def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)): 21 | # mi = input feature map size 22 | # mo = output feature map size 23 | sz = (mo, mi, fw, fh) 24 | W0 = init_filter(sz, poolsz) 25 | self.W = theano.shared(W0) 26 | b0 = np.zeros(mo, dtype=np.float32) 27 | self.b = theano.shared(b0) 28 | self.poolsz = poolsz 29 | self.params = [self.W, self.b] 30 | 31 | def forward(self, X): 32 | conv_out = conv2d(input=X, filters=self.W) 33 | pooled_out = pool_2d( 34 | input=conv_out, 35 | ws=self.poolsz, 36 | ignore_border=True, 37 | mode='max', 38 | ) 39 | return T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) 40 | 41 | 42 | class CNN(object): 43 | def __init__(self, convpool_layer_sizes, hidden_layer_sizes): 44 | self.convpool_layer_sizes = convpool_layer_sizes 45 | self.hidden_layer_sizes = hidden_layer_sizes 46 | 47 | def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True): 48 | # downcast 49 | lr = np.float32(lr) 50 | mu = np.float32(mu) 51 | reg = np.float32(reg) 52 | decay = np.float32(decay) 53 | eps = np.float32(eps) 54 | 55 | X = X.astype(np.float32) 56 | Xvalid = Xvalid.astype(np.float32) 57 | Y = Y.astype(np.int32) 58 | Yvalid = Yvalid.astype(np.int32) 59 | 60 | # initialize convpool layers 61 | N, c, width, height = X.shape 62 | mi = c 63 | outw = width 64 | outh = height 65 | self.convpool_layers = [] 66 | for mo, fw, fh in self.convpool_layer_sizes: 67 | layer = ConvPoolLayer(mi, mo, fw, fh) 68 | self.convpool_layers.append(layer) 69 | outw = (outw - fw + 1) // 2 70 | outh = (outh - fh + 1) // 2 71 | mi = mo 72 | 73 | # initialize mlp layers 74 | K = len(set(Y)) 75 | self.hidden_layers = [] 76 | M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer 77 | count = 0 78 | for M2 in self.hidden_layer_sizes: 79 | h = HiddenLayer(M1, M2, count) 80 | self.hidden_layers.append(h) 81 | M1 = M2 82 | count += 1 83 | 84 | # logistic regression layer 85 | W, b = init_weight_and_bias(M1, K) 86 | self.W = theano.shared(W, 'W_logreg') 87 | self.b = theano.shared(b, 'b_logreg') 88 | 89 | # collect params for later use 90 | self.params = [self.W, self.b] 91 | for c in self.convpool_layers: 92 | self.params += c.params 93 | for h in self.hidden_layers: 94 | self.params += h.params 95 | 96 | # set up theano functions and variables 97 | thX = T.tensor4('X', dtype='float32') 98 | thY = T.ivector('Y') 99 | pY = self.forward(thX) 100 | 101 | rcost = reg*T.sum([(p*p).sum() for p in self.params]) 102 | cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost 103 | prediction = self.th_predict(thX) 104 | 105 | cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) 106 | 107 | updates = rmsprop(cost, self.params, lr, mu, decay, eps) 108 | train_op = theano.function( 109 | inputs=[thX, thY], 110 | outputs=cost, 111 | updates=updates 112 | ) 113 | 114 | n_batches = N // batch_sz 115 | costs = [] 116 | for i in range(epochs): 117 | X, Y = shuffle(X, Y) 118 | for j in range(n_batches): 119 | Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] 120 | Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] 121 | 122 | train_c = train_op(Xbatch, Ybatch) 123 | 124 | if j % 20 == 0: 125 | c, p = cost_predict_op(Xvalid, Yvalid) 126 | costs.append(c) 127 | e = error_rate(Yvalid, p) 128 | print( 129 | "i:", i, 130 | "j:", j, 131 | "nb:", n_batches, 132 | "train cost:", train_c, 133 | "cost:", c, 134 | "error rate:", e 135 | ) 136 | 137 | if show_fig: 138 | plt.plot(costs) 139 | plt.show() 140 | 141 | def forward(self, X): 142 | Z = X 143 | for c in self.convpool_layers: 144 | Z = c.forward(Z) 145 | Z = Z.flatten(ndim=2) 146 | for h in self.hidden_layers: 147 | Z = h.forward(Z) 148 | return T.nnet.softmax(Z.dot(self.W) + self.b) 149 | 150 | def th_predict(self, X): 151 | pY = self.forward(X) 152 | return T.argmax(pY, axis=1) 153 | 154 | 155 | def main(): 156 | Xtrain, Ytrain, Xvalid, Yvalid = getImageData() 157 | model = CNN( 158 | convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)], 159 | hidden_layer_sizes=[500, 300], 160 | ) 161 | model.fit(Xtrain, Ytrain, Xvalid, Yvalid) 162 | 163 | if __name__ == '__main__': 164 | main() 165 | -------------------------------------------------------------------------------- /logistic.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | from util import getData, softmax, cost, y2indicator, error_rate 10 | from sklearn.utils import shuffle 11 | 12 | 13 | class LogisticModel(object): 14 | def __init__(self): 15 | pass 16 | 17 | def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-7, reg=0., epochs=10000, show_fig=False): 18 | Tvalid = y2indicator(Yvalid) 19 | 20 | N, D = X.shape 21 | K = len(set(Y)) 22 | T = y2indicator(Y) 23 | self.W = np.random.randn(D, K) / np.sqrt(D) 24 | self.b = np.zeros(K) 25 | 26 | costs = [] 27 | best_validation_error = 1 28 | for i in range(epochs): 29 | # forward propagation and cost calculation 30 | pY = self.forward(X) 31 | 32 | # gradient descent step 33 | self.W -= learning_rate*(X.T.dot(pY - T) + reg*self.W) 34 | self.b -= learning_rate*((pY - T).sum(axis=0) + reg*self.b) 35 | 36 | if i % 10 == 0: 37 | pYvalid = self.forward(Xvalid) 38 | c = cost(Tvalid, pYvalid) 39 | costs.append(c) 40 | e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) 41 | print("i:", i, "cost:", c, "error:", e) 42 | if e < best_validation_error: 43 | best_validation_error = e 44 | print("best_validation_error:", best_validation_error) 45 | 46 | if show_fig: 47 | plt.plot(costs) 48 | plt.show() 49 | 50 | 51 | def forward(self, X): 52 | return softmax(X.dot(self.W) + self.b) 53 | 54 | def predict(self, X): 55 | pY = self.forward(X) 56 | return np.argmax(pY, axis=1) 57 | 58 | def score(self, X, Y): 59 | prediction = self.predict(X) 60 | return 1 - error_rate(Y, prediction) 61 | 62 | 63 | def main(): 64 | Xtrain, Ytrain, Xvalid, Yvalid = getData() 65 | 66 | model = LogisticModel() 67 | model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True) 68 | print(model.score(Xvalid, Yvalid)) 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /logistic_sigmoid.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from sklearn.utils import shuffle 9 | from util import getBinaryData, sigmoid, sigmoid_cost, error_rate 10 | 11 | 12 | class LogisticModel(object): 13 | def __init__(self): 14 | pass 15 | 16 | def fit(self, X, Y, learning_rate=1e-6, reg=0., epochs=120000, show_fig=False): 17 | X, Y = shuffle(X, Y) 18 | Xvalid, Yvalid = X[-1000:], Y[-1000:] 19 | X, Y = X[:-1000], Y[:-1000] 20 | 21 | N, D = X.shape 22 | self.W = np.random.randn(D) / np.sqrt(D) 23 | self.b = 0 24 | 25 | costs = [] 26 | best_validation_error = 1 27 | for i in range(epochs): 28 | # forward propagation and cost calculation 29 | pY = self.forward(X) 30 | 31 | # gradient descent step 32 | self.W -= learning_rate*(X.T.dot(pY - Y) + reg*self.W) 33 | self.b -= learning_rate*((pY - Y).sum() + reg*self.b) 34 | 35 | 36 | if i % 20 == 0: 37 | pYvalid = self.forward(Xvalid) 38 | c = sigmoid_cost(Yvalid, pYvalid) 39 | costs.append(c) 40 | e = error_rate(Yvalid, np.round(pYvalid)) 41 | print("i:", i, "cost:", c, "error:", e) 42 | if e < best_validation_error: 43 | best_validation_error = e 44 | print("best_validation_error:", best_validation_error) 45 | 46 | if show_fig: 47 | plt.plot(costs) 48 | plt.show() 49 | 50 | 51 | def forward(self, X): 52 | return sigmoid(X.dot(self.W) + self.b) 53 | 54 | def predict(self, X): 55 | pY = self.forward(X) 56 | return np.round(pY) 57 | 58 | 59 | def score(self, X, Y): 60 | prediction = self.predict(X) 61 | return 1 - error_rate(Y, prediction) 62 | 63 | 64 | def main(): 65 | X, Y = getBinaryData() 66 | 67 | X0 = X[Y==0, :] 68 | X1 = X[Y==1, :] 69 | X1 = np.repeat(X1, 9, axis=0) 70 | X = np.vstack([X0, X1]) 71 | Y = np.array([0]*len(X0) + [1]*len(X1)) 72 | 73 | model = LogisticModel() 74 | model.fit(X, Y, show_fig=True) 75 | model.score(X, Y) 76 | # scores = cross_val_score(model, X, Y, cv=5) 77 | # print "score mean:", np.mean(scores), "stdev:", np.std(scores) 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /show_images.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range, input 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | from util import getData 10 | 11 | label_map = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'] 12 | 13 | def main(): 14 | X, Y, _, _ = getData(balance_ones=False) 15 | 16 | while True: 17 | for i in range(7): 18 | x, y = X[Y==i], Y[Y==i] 19 | N = len(y) 20 | j = np.random.choice(N) 21 | plt.imshow(x[j].reshape(48, 48), cmap='gray') 22 | plt.title(label_map[y[j]]) 23 | plt.show() 24 | prompt = input('Quit? Enter Y:\n') 25 | if prompt.lower().startswith('y'): 26 | break 27 | 28 | 29 | if __name__ == '__main__': 30 | main() 31 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | # Note: you may need to update your version of future 4 | # sudo pip install -U future 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from sklearn.utils import shuffle 9 | 10 | 11 | def init_weight_and_bias(M1, M2): 12 | W = np.random.randn(M1, M2) / np.sqrt(M1) 13 | b = np.zeros(M2) 14 | return W.astype(np.float32), b.astype(np.float32) 15 | 16 | 17 | def init_filter(shape, poolsz): 18 | w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[1:]) + shape[0]*np.prod(shape[2:] / np.prod(poolsz))) 19 | return w.astype(np.float32) 20 | 21 | 22 | def relu(x): 23 | return x * (x > 0) 24 | 25 | 26 | def sigmoid(A): 27 | return 1 / (1 + np.exp(-A)) 28 | 29 | 30 | def softmax(A): 31 | expA = np.exp(A) 32 | return expA / expA.sum(axis=1, keepdims=True) 33 | 34 | 35 | def sigmoid_cost(T, Y): 36 | return -(T*np.log(Y) + (1-T)*np.log(1-Y)).sum() 37 | 38 | 39 | def cost(T, Y): 40 | return -(T*np.log(Y)).sum() 41 | 42 | 43 | def cost2(T, Y): 44 | # same as cost(), just uses the targets to index Y 45 | # instead of multiplying by a large indicator matrix with mostly 0s 46 | N = len(T) 47 | return -np.log(Y[np.arange(N), T]).mean() 48 | 49 | 50 | def error_rate(targets, predictions): 51 | return np.mean(targets != predictions) 52 | 53 | 54 | def y2indicator(y): 55 | N = len(y) 56 | K = len(set(y)) 57 | ind = np.zeros((N, K)) 58 | for i in range(N): 59 | ind[i, y[i]] = 1 60 | return ind 61 | 62 | 63 | def getData(balance_ones=True, Ntest=1000): 64 | # images are 48x48 = 2304 size vectors 65 | Y = [] 66 | X = [] 67 | first = True 68 | for line in open('fer2013.csv'): 69 | if first: 70 | first = False 71 | else: 72 | row = line.split(',') 73 | Y.append(int(row[0])) 74 | X.append([int(p) for p in row[1].split()]) 75 | 76 | X, Y = np.array(X) / 255.0, np.array(Y) 77 | 78 | # shuffle and split 79 | X, Y = shuffle(X, Y) 80 | Xtrain, Ytrain = X[:-Ntest], Y[:-Ntest] 81 | Xvalid, Yvalid = X[-Ntest:], Y[-Ntest:] 82 | 83 | if balance_ones: 84 | # balance the 1 class 85 | X0, Y0 = Xtrain[Ytrain!=1, :], Ytrain[Ytrain!=1] 86 | X1 = Xtrain[Ytrain==1, :] 87 | X1 = np.repeat(X1, 9, axis=0) 88 | Xtrain = np.vstack([X0, X1]) 89 | Ytrain = np.concatenate((Y0, [1]*len(X1))) 90 | 91 | return Xtrain, Ytrain, Xvalid, Yvalid 92 | 93 | 94 | def getImageData(): 95 | Xtrain, Ytrain, Xvalid, Yvalid = getData() 96 | N, D = Xtrain.shape 97 | d = int(np.sqrt(D)) 98 | Xtrain = Xtrain.reshape(-1, 1, d, d) 99 | Xvalid = Xvalid.reshape(-1, 1, d, d) 100 | return Xtrain, Ytrain, Xvalid, Yvalid 101 | 102 | 103 | def getBinaryData(): 104 | Y = [] 105 | X = [] 106 | first = True 107 | for line in open('fer2013.csv'): 108 | if first: 109 | first = False 110 | else: 111 | row = line.split(',') 112 | y = int(row[0]) 113 | if y == 0 or y == 1: 114 | Y.append(y) 115 | X.append([int(p) for p in row[1].split()]) 116 | return np.array(X) / 255.0, np.array(Y) 117 | 118 | 119 | def crossValidation(model, X, Y, K=5): 120 | # split data into K parts 121 | X, Y = shuffle(X, Y) 122 | sz = len(Y) // K 123 | errors = [] 124 | for k in range(K): 125 | xtr = np.concatenate([ X[:k*sz, :], X[(k*sz + sz):, :] ]) 126 | ytr = np.concatenate([ Y[:k*sz], Y[(k*sz + sz):] ]) 127 | xte = X[k*sz:(k*sz + sz), :] 128 | yte = Y[k*sz:(k*sz + sz)] 129 | 130 | model.fit(xtr, ytr) 131 | err = model.score(xte, yte) 132 | errors.append(err) 133 | print("errors:", errors) 134 | return np.mean(errors) 135 | --------------------------------------------------------------------------------