├── .gitignore
├── README.md
├── ann.py
├── ann_sigmoid.py
├── ann_tf.py
├── ann_theano.py
├── cnn_tf.py
├── cnn_theano.py
├── logistic.py
├── logistic_sigmoid.py
├── show_images.py
└── util.py


/.gitignore:
--------------------------------------------------------------------------------
1 | fer2013*
2 | .DS_Store
3 | *.pyc
4 | TEST.py
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Relevant Course URLs:
 2 | 
 3 | * https://deeplearningcourses.com/c/data-science-logistic-regression-in-python/
 4 | * https://deeplearningcourses.com/c/data-science-deep-learning-in-python/
 5 | * https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow/
 6 | * https://deeplearningcourses.com/c/deep-learning-convolutional-neural-networks-theano-tensorflow/
 7 | 
 8 | Data:
 9 | 
10 | https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge
11 | 
12 | If you get "An error occurred: Data not found":
13 | 
14 | https://archive.org/download/fer2013_202311/fer2013.csv
15 | 


--------------------------------------------------------------------------------
/ann.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | from builtins import range
 3 | # Note: you may need to update your version of future
 4 | # sudo pip install -U future
 5 | 
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from util import getData, softmax, cost2, y2indicator, error_rate, relu
10 | from sklearn.utils import shuffle
11 | 
12 | 
13 | class ANN(object):
14 |     def __init__(self, M):
15 |         self.M = M
16 | 
17 |     def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False):
18 | 
19 |         N, D = X.shape
20 |         K = len(set(Y))
21 |         T = y2indicator(Y)
22 |         self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
23 |         self.b1 = np.zeros(self.M)
24 |         self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
25 |         self.b2 = np.zeros(K)
26 | 
27 |         costs = []
28 |         best_validation_error = 1
29 |         for i in range(epochs):
30 |             # forward propagation and cost calculation
31 |             pY, Z = self.forward(X)
32 | 
33 |             # gradient descent step
34 |             pY_T = pY - T
35 |             self.W2 -= learning_rate*(Z.T.dot(pY_T) + reg*self.W2)
36 |             self.b2 -= learning_rate*(pY_T.sum(axis=0) + reg*self.b2)
37 |             # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu
38 |             dZ = pY_T.dot(self.W2.T) * (1 - Z*Z) # tanh
39 |             self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)
40 |             self.b1 -= learning_rate*(dZ.sum(axis=0) + reg*self.b1)
41 | 
42 |             if i % 10 == 0:
43 |                 pYvalid, _ = self.forward(Xvalid)
44 |                 c = cost2(Yvalid, pYvalid)
45 |                 costs.append(c)
46 |                 e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
47 |                 print("i:", i, "cost:", c, "error:", e)
48 |                 if e < best_validation_error:
49 |                     best_validation_error = e
50 |         print("best_validation_error:", best_validation_error)
51 | 
52 |         if show_fig:
53 |             plt.plot(costs)
54 |             plt.show()
55 | 
56 | 
57 |     def forward(self, X):
58 |         # Z = relu(X.dot(self.W1) + self.b1)
59 |         Z = np.tanh(X.dot(self.W1) + self.b1)
60 |         return softmax(Z.dot(self.W2) + self.b2), Z
61 | 
62 |     def predict(self, X):
63 |         pY, _ = self.forward(X)
64 |         return np.argmax(pY, axis=1)
65 | 
66 |     def score(self, X, Y):
67 |         prediction = self.predict(X)
68 |         return 1 - error_rate(Y, prediction)
69 | 
70 | 
71 | def main():
72 |     Xtrain, Ytrain, Xvalid, Yvalid = getData()
73 |     
74 |     model = ANN(200)
75 |     model.fit(Xtrain, Ytrain, Xvalid, Yvalid, reg=0, show_fig=True)
76 |     print(model.score(Xvalid, Yvalid))
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/ann_sigmoid.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | from builtins import range
 3 | # Note: you may need to update your version of future
 4 | # sudo pip install -U future
 5 | 
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.utils import shuffle
 9 | from util import getBinaryData, sigmoid, sigmoid_cost, error_rate, relu
10 | 
11 | 
12 | class ANN(object):
13 |     def __init__(self, M):
14 |         self.M = M
15 | 
16 |     def fit(self, X, Y, learning_rate=5e-7, reg=1.0, epochs=10000, show_fig=False):
17 |         X, Y = shuffle(X, Y)
18 |         Xvalid, Yvalid = X[-1000:], Y[-1000:]
19 |         X, Y = X[:-1000], Y[:-1000]
20 | 
21 |         N, D = X.shape
22 |         self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
23 |         self.b1 = np.zeros(self.M)
24 |         self.W2 = np.random.randn(self.M) / np.sqrt(self.M)
25 |         self.b2 = 0
26 | 
27 |         costs = []
28 |         best_validation_error = 1
29 |         for i in range(epochs):
30 |             # forward propagation and cost calculation
31 |             pY, Z = self.forward(X)
32 | 
33 |             # gradient descent step
34 |             pY_Y = pY - Y
35 |             self.W2 -= learning_rate*(Z.T.dot(pY_Y) + reg*self.W2)
36 |             self.b2 -= learning_rate*((pY_Y).sum() + reg*self.b2)
37 | 
38 |             # print "(pY_Y).dot(self.W2.T) shape:", (pY_Y).dot(self.W2.T).shape
39 |             # print "Z shape:", Z.shape
40 |             dZ = np.outer(pY_Y, self.W2) * (Z > 0)
41 |             # dZ = np.outer(pY_Y, self.W2) * (1 - Z*Z)
42 |             self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)
43 |             self.b1 -= learning_rate*(np.sum(dZ, axis=0) + reg*self.b1)
44 | 
45 |             if i % 20 == 0:
46 |                 pYvalid, _ = self.forward(Xvalid)
47 |                 c = sigmoid_cost(Yvalid, pYvalid)
48 |                 costs.append(c)
49 |                 e = error_rate(Yvalid, np.round(pYvalid))
50 |                 print("i:", i, "cost:", c, "error:", e)
51 |                 if e < best_validation_error:
52 |                     best_validation_error = e
53 |         print("best_validation_error:", best_validation_error)
54 | 
55 |         if show_fig:
56 |             plt.plot(costs)
57 |             plt.show()
58 | 
59 | 
60 |     def forward(self, X):
61 |         Z = relu(X.dot(self.W1) + self.b1)
62 |         # Z = np.tanh(X.dot(self.W1) + self.b1)
63 |         return sigmoid(Z.dot(self.W2) + self.b2), Z
64 | 
65 | 
66 |     def predict(self, X):
67 |         pY = self.forward(X)
68 |         return np.round(pY)
69 | 
70 | 
71 |     def score(self, X, Y):
72 |         prediction = self.predict(X)
73 |         return 1 - error_rate(Y, prediction)
74 | 
75 | 
76 | def main():
77 |     X, Y = getBinaryData()
78 | 
79 |     X0 = X[Y==0, :]
80 |     X1 = X[Y==1, :]
81 |     X1 = np.repeat(X1, 9, axis=0)
82 |     X = np.vstack([X0, X1])
83 |     Y = np.array([0]*len(X0) + [1]*len(X1))
84 |     
85 |     model = ANN(100)
86 |     model.fit(X, Y, show_fig=True)
87 | 
88 | if __name__ == '__main__':
89 |     main()
90 | 


--------------------------------------------------------------------------------
/ann_tf.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | from builtins import range
  3 | # Note: you may need to update your version of future
  4 | # sudo pip install -U future
  5 | 
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | import matplotlib.pyplot as plt
  9 | from util import getData, getBinaryData, y2indicator, error_rate, init_weight_and_bias
 10 | from sklearn.utils import shuffle
 11 | 
 12 | 
 13 | class HiddenLayer(object):
 14 |     def __init__(self, M1, M2, an_id):
 15 |         self.id = an_id
 16 |         self.M1 = M1
 17 |         self.M2 = M2
 18 |         W, b = init_weight_and_bias(M1, M2)
 19 |         self.W = tf.Variable(W.astype(np.float32))
 20 |         self.b = tf.Variable(b.astype(np.float32))
 21 |         self.params = [self.W, self.b]
 22 | 
 23 |     def forward(self, X):
 24 |         return tf.nn.relu(tf.matmul(X, self.W) + self.b)
 25 | 
 26 | 
 27 | class ANN(object):
 28 |     def __init__(self, hidden_layer_sizes):
 29 |         self.hidden_layer_sizes = hidden_layer_sizes
 30 | 
 31 |     def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False):
 32 |         K = len(set(Y)) # won't work later b/c we turn it into indicator
 33 | 
 34 |         # make a validation set
 35 |         X, Y = shuffle(X, Y)
 36 |         X = X.astype(np.float32)
 37 |         Y = y2indicator(Y).astype(np.float32)
 38 | 
 39 |         # for calculating error rate
 40 |         Yvalid_flat = Yvalid
 41 |         Yvalid = y2indicator(Yvalid).astype(np.float32)
 42 | 
 43 |         # initialize hidden layers
 44 |         N, D = X.shape
 45 |         
 46 |         self.hidden_layers = []
 47 |         M1 = D
 48 |         count = 0
 49 |         for M2 in self.hidden_layer_sizes:
 50 |             h = HiddenLayer(M1, M2, count)
 51 |             self.hidden_layers.append(h)
 52 |             M1 = M2
 53 |             count += 1
 54 |         W, b = init_weight_and_bias(M1, K)
 55 |         self.W = tf.Variable(W.astype(np.float32))
 56 |         self.b = tf.Variable(b.astype(np.float32))
 57 | 
 58 |         # collect params for later use
 59 |         self.params = [self.W, self.b]
 60 |         for h in self.hidden_layers:
 61 |             self.params += h.params
 62 | 
 63 |         # set up theano functions and variables
 64 |         tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
 65 |         tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
 66 |         act = self.forward(tfX)
 67 | 
 68 |         rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
 69 |         cost = tf.reduce_mean(
 70 |             tf.nn.softmax_cross_entropy_with_logits(
 71 |                 logits=act,
 72 |                 labels=tfT
 73 |             )
 74 |         ) + rcost
 75 |         prediction = self.predict(tfX)
 76 |         train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)
 77 | 
 78 |         n_batches = N // batch_sz
 79 |         costs = []
 80 |         init = tf.global_variables_initializer()
 81 |         with tf.Session() as session:
 82 |             session.run(init)
 83 |             for i in range(epochs):
 84 |                 X, Y = shuffle(X, Y)
 85 |                 for j in range(n_batches):
 86 |                     Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
 87 |                     Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
 88 | 
 89 |                     session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})
 90 | 
 91 |                     if j % 20 == 0:
 92 |                         c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid})
 93 |                         costs.append(c)
 94 | 
 95 |                         p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid})
 96 |                         e = error_rate(Yvalid_flat, p)
 97 |                         print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
 98 |         
 99 |         if show_fig:
100 |             plt.plot(costs)
101 |             plt.show()
102 | 
103 |     def forward(self, X):
104 |         Z = X
105 |         for h in self.hidden_layers:
106 |             Z = h.forward(Z)
107 |         return tf.matmul(Z, self.W) + self.b
108 | 
109 |     def predict(self, X):
110 |         act = self.forward(X)
111 |         return tf.argmax(act, 1)
112 | 
113 | 
114 | def main():
115 |     Xtrain, Ytrain, Xvalid, Yvalid = getData()
116 |     model = ANN([2000, 1000, 500])
117 |     model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)
118 | 
119 | if __name__ == '__main__':
120 |     main()
121 | 


--------------------------------------------------------------------------------
/ann_theano.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | from builtins import range
  3 | # Note: you may need to update your version of future
  4 | # sudo pip install -U future
  5 | 
  6 | import numpy as np
  7 | import theano
  8 | import theano.tensor as T
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | from util import getData, getBinaryData, error_rate, relu, init_weight_and_bias
 12 | from sklearn.utils import shuffle
 13 | 
 14 | 
 15 | 
 16 | def rmsprop(cost, params, lr, mu, decay, eps):
 17 |     grads = T.grad(cost, params)
 18 |     updates = []
 19 |     for p, g in zip(params, grads):
 20 |         # cache
 21 |         ones = np.ones_like(p.get_value(), dtype=np.float32)
 22 |         c = theano.shared(ones)
 23 |         new_c = decay*c + (np.float32(1.0) - decay)*g*g
 24 | 
 25 |         # momentum
 26 |         zeros = np.zeros_like(p.get_value(), dtype=np.float32)
 27 |         m = theano.shared(zeros)
 28 |         new_m = mu*m - lr*g / T.sqrt(new_c + eps)
 29 | 
 30 |         # param update
 31 |         new_p = p + new_m
 32 | 
 33 |         # append the updates
 34 |         updates.append((c, new_c))
 35 |         updates.append((m, new_m))
 36 |         updates.append((p, new_p))
 37 |     return updates
 38 | 
 39 | 
 40 | class HiddenLayer(object):
 41 |     def __init__(self, M1, M2, an_id):
 42 |         self.id = an_id
 43 |         self.M1 = M1
 44 |         self.M2 = M2
 45 |         W, b = init_weight_and_bias(M1, M2)
 46 |         self.W = theano.shared(W, 'W_%s' % self.id)
 47 |         self.b = theano.shared(b, 'b_%s' % self.id)
 48 |         self.params = [self.W, self.b]
 49 | 
 50 |     def forward(self, X):
 51 |         return relu(X.dot(self.W) + self.b)
 52 | 
 53 | 
 54 | class ANN(object):
 55 |     def __init__(self, hidden_layer_sizes):
 56 |         self.hidden_layer_sizes = hidden_layer_sizes
 57 | 
 58 |     def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, eps=1e-8, epochs=10, batch_sz=100, show_fig=False):
 59 |         # downcast
 60 |         learning_rate = np.float32(learning_rate)
 61 |         mu = np.float32(mu)
 62 |         decay = np.float32(decay)
 63 |         reg = np.float32(reg)
 64 |         eps = np.float32(eps)
 65 | 
 66 |         X = X.astype(np.float32)
 67 |         Xvalid = Xvalid.astype(np.float32)
 68 |         Y = Y.astype(np.int32)
 69 |         Yvalid = Yvalid.astype(np.int32)
 70 | 
 71 |         # initialize hidden layers
 72 |         N, D = X.shape
 73 |         K = len(set(Y))
 74 |         self.hidden_layers = []
 75 |         M1 = D
 76 |         count = 0
 77 |         for M2 in self.hidden_layer_sizes:
 78 |             h = HiddenLayer(M1, M2, count)
 79 |             self.hidden_layers.append(h)
 80 |             M1 = M2
 81 |             count += 1
 82 |         W, b = init_weight_and_bias(M1, K)
 83 |         self.W = theano.shared(W, 'W_logreg')
 84 |         self.b = theano.shared(b, 'b_logreg')
 85 | 
 86 |         # collect params for later use
 87 |         self.params = [self.W, self.b]
 88 |         for h in self.hidden_layers:
 89 |             self.params += h.params
 90 | 
 91 |         # set up theano functions and variables
 92 |         thX = T.fmatrix('X')
 93 |         thY = T.ivector('Y')
 94 |         pY = self.th_forward(thX)
 95 | 
 96 |         rcost = reg*T.sum([(p*p).sum() for p in self.params])
 97 |         cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
 98 |         prediction = self.th_predict(thX)
 99 | 
100 |         # actual prediction function
101 |         self.predict_op = theano.function(inputs=[thX], outputs=prediction)
102 |         cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])
103 | 
104 |         updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps)
105 |         train_op = theano.function(
106 |             inputs=[thX, thY],
107 |             updates=updates
108 |         )
109 | 
110 |         n_batches = N // batch_sz
111 |         costs = []
112 |         for i in range(epochs):
113 |             X, Y = shuffle(X, Y)
114 |             for j in range(n_batches):
115 |                 Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
116 |                 Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
117 | 
118 |                 train_op(Xbatch, Ybatch)
119 | 
120 |                 if j % 20 == 0:
121 |                     c, p = cost_predict_op(Xvalid, Yvalid)
122 |                     costs.append(c)
123 |                     e = error_rate(Yvalid, p)
124 |                     print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
125 |         
126 |         if show_fig:
127 |             plt.plot(costs)
128 |             plt.show()
129 | 
130 |     def th_forward(self, X):
131 |         Z = X
132 |         for h in self.hidden_layers:
133 |             Z = h.forward(Z)
134 |         return T.nnet.softmax(Z.dot(self.W) + self.b)
135 | 
136 |     def th_predict(self, X):
137 |         pY = self.th_forward(X)
138 |         return T.argmax(pY, axis=1)
139 | 
140 |     def predict(self, X):
141 |         return self.predict_op(X)
142 | 
143 | 
144 | def main():
145 |     Xtrain, Ytrain, Xvalid, Yvalid = getData()
146 |     model = ANN([2000, 1000, 500])
147 |     model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)
148 | 
149 | if __name__ == '__main__':
150 |     main()
151 | 


--------------------------------------------------------------------------------
/cnn_tf.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | from builtins import range
  3 | # Note: you may need to update your version of future
  4 | # sudo pip install -U future
  5 | 
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from sklearn.utils import shuffle
 11 | 
 12 | from util import getImageData, error_rate, init_weight_and_bias, y2indicator
 13 | from ann_tf import HiddenLayer
 14 | 
 15 | # differences from Theano:
 16 | # image dimensions are expected to be: N x width x height x color
 17 | # filter shapes are expected to be: filter width x filter height x input feature maps x output feature maps
 18 | 
 19 | 
 20 | def init_filter(shape, poolsz):
 21 |     w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[:-1]) + shape[-1]*np.prod(shape[:-2] / np.prod(poolsz)))
 22 |     return w.astype(np.float32)
 23 | 
 24 | 
 25 | class ConvPoolLayer(object):
 26 |     def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)):
 27 |         # mi = input feature map size
 28 |         # mo = output feature map size
 29 |         sz = (fw, fh, mi, mo)
 30 |         W0 = init_filter(sz, poolsz)
 31 |         self.W = tf.Variable(W0)
 32 |         b0 = np.zeros(mo, dtype=np.float32)
 33 |         self.b = tf.Variable(b0)
 34 |         self.poolsz = poolsz
 35 |         self.params = [self.W, self.b]
 36 | 
 37 |     def forward(self, X):
 38 |         conv_out = tf.nn.conv2d(X, self.W, strides=[1, 1, 1, 1], padding='SAME')
 39 |         conv_out = tf.nn.bias_add(conv_out, self.b)
 40 |         p1, p2 = self.poolsz
 41 |         pool_out = tf.nn.max_pool(
 42 |             conv_out,
 43 |             ksize=[1, p1, p2, 1],
 44 |             strides=[1, p1, p2, 1],
 45 |             padding='SAME'
 46 |         )
 47 |         return tf.nn.relu(pool_out)
 48 | 
 49 | 
 50 | class CNN(object):
 51 |     def __init__(self, convpool_layer_sizes, hidden_layer_sizes):
 52 |         self.convpool_layer_sizes = convpool_layer_sizes
 53 |         self.hidden_layer_sizes = hidden_layer_sizes
 54 | 
 55 |     def fit(self, X, Y, Xvalid, Yvalid, lr=1e-2, mu=0.9, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=5, show_fig=True):
 56 |         lr = np.float32(lr)
 57 |         mu = np.float32(mu)
 58 |         reg = np.float32(reg)
 59 |         decay = np.float32(decay)
 60 |         eps = np.float32(eps)
 61 |         K = len(set(Y))
 62 | 
 63 |         # make a validation set
 64 |         X, Y = shuffle(X, Y)
 65 |         X = X.astype(np.float32)
 66 |         Y = y2indicator(Y).astype(np.float32)
 67 | 
 68 |         Yvalid = y2indicator(Yvalid).astype(np.float32)
 69 |         Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate
 70 | 
 71 |         # initialize convpool layers
 72 |         N, width, height, c = X.shape
 73 |         mi = c
 74 |         outw = width
 75 |         outh = height
 76 |         self.convpool_layers = []
 77 |         for mo, fw, fh in self.convpool_layer_sizes:
 78 |             layer = ConvPoolLayer(mi, mo, fw, fh)
 79 |             self.convpool_layers.append(layer)
 80 |             outw = outw // 2
 81 |             outh = outh // 2
 82 |             mi = mo
 83 | 
 84 |         # initialize mlp layers
 85 |         self.hidden_layers = []
 86 |         M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
 87 |         count = 0
 88 |         for M2 in self.hidden_layer_sizes:
 89 |             h = HiddenLayer(M1, M2, count)
 90 |             self.hidden_layers.append(h)
 91 |             M1 = M2
 92 |             count += 1
 93 | 
 94 |         # logistic regression layer
 95 |         W, b = init_weight_and_bias(M1, K)
 96 |         self.W = tf.Variable(W, 'W_logreg')
 97 |         self.b = tf.Variable(b, 'b_logreg')
 98 | 
 99 |         # collect params for later use
100 |         self.params = [self.W, self.b]
101 |         for h in self.convpool_layers:
102 |             self.params += h.params
103 |         for h in self.hidden_layers:
104 |             self.params += h.params
105 | 
106 |         # set up tensorflow functions and variables
107 |         tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X')
108 |         tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
109 |         act = self.forward(tfX)
110 | 
111 |         rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
112 |         cost = tf.reduce_mean(
113 |             tf.nn.softmax_cross_entropy_with_logits(
114 |                 logits=act,
115 |                 labels=tfY
116 |             )
117 |         ) + rcost
118 |         prediction = self.predict(tfX)
119 | 
120 |         train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
121 | 
122 |         n_batches = N // batch_sz
123 |         costs = []
124 |         init = tf.global_variables_initializer()
125 |         with tf.Session() as session:
126 |             session.run(init)
127 |             for i in range(epochs):
128 |                 X, Y = shuffle(X, Y)
129 |                 for j in range(n_batches):
130 |                     Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
131 |                     Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
132 | 
133 |                     session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})
134 | 
135 |                     if j % 20 == 0:
136 |                         c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid})
137 |                         costs.append(c)
138 | 
139 |                         p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid})
140 |                         e = error_rate(Yvalid_flat, p)
141 |                         print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
142 | 
143 |         if show_fig:
144 |             plt.plot(costs)
145 |             plt.show()
146 | 
147 |     def forward(self, X):
148 |         Z = X
149 |         for c in self.convpool_layers:
150 |             Z = c.forward(Z)
151 |         Z_shape = Z.get_shape().as_list()
152 |         Z = tf.reshape(Z, [-1, np.prod(Z_shape[1:])])
153 |         for h in self.hidden_layers:
154 |             Z = h.forward(Z)
155 |         return tf.matmul(Z, self.W) + self.b
156 | 
157 |     def predict(self, X):
158 |         pY = self.forward(X)
159 |         return tf.argmax(pY, 1)
160 | 
161 | 
162 | def main():
163 |     Xtrain, Ytrain, Xvalid, Yvalid = getImageData()
164 | 
165 |     # reshape X for tf: N x H x W x C
166 |     Xtrain = Xtrain.transpose((0, 2, 3, 1))
167 |     Xvalid = Xvalid.transpose((0, 2, 3, 1))
168 | 
169 |     model = CNN(
170 |         convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)],
171 |         hidden_layer_sizes=[500, 300],
172 |     )
173 |     model.fit(Xtrain, Ytrain, Xvalid, Yvalid)
174 | 
175 | if __name__ == '__main__':
176 |     main()
177 | 


--------------------------------------------------------------------------------
/cnn_theano.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | from builtins import range
  3 | # Note: you may need to update your version of future
  4 | # sudo pip install -U future
  5 | 
  6 | import numpy as np
  7 | import theano
  8 | import theano.tensor as T
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | from sklearn.utils import shuffle
 12 | from theano.tensor.nnet import conv2d
 13 | from theano.tensor.signal.pool import pool_2d
 14 | 
 15 | from util import getImageData, error_rate, init_weight_and_bias, init_filter
 16 | from ann_theano import HiddenLayer, rmsprop
 17 | 
 18 | 
 19 | class ConvPoolLayer(object):
 20 |     def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)):
 21 |         # mi = input feature map size
 22 |         # mo = output feature map size
 23 |         sz = (mo, mi, fw, fh)
 24 |         W0 = init_filter(sz, poolsz)
 25 |         self.W = theano.shared(W0)
 26 |         b0 = np.zeros(mo, dtype=np.float32)
 27 |         self.b = theano.shared(b0)
 28 |         self.poolsz = poolsz
 29 |         self.params = [self.W, self.b]
 30 | 
 31 |     def forward(self, X):
 32 |         conv_out = conv2d(input=X, filters=self.W)
 33 |         pooled_out = pool_2d(
 34 |             input=conv_out,
 35 |             ws=self.poolsz,
 36 |             ignore_border=True,
 37 |             mode='max',
 38 |         )
 39 |         return T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
 40 | 
 41 | 
 42 | class CNN(object):
 43 |     def __init__(self, convpool_layer_sizes, hidden_layer_sizes):
 44 |         self.convpool_layer_sizes = convpool_layer_sizes
 45 |         self.hidden_layer_sizes = hidden_layer_sizes
 46 | 
 47 |     def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True):
 48 |         # downcast
 49 |         lr = np.float32(lr)
 50 |         mu = np.float32(mu)
 51 |         reg = np.float32(reg)
 52 |         decay = np.float32(decay)
 53 |         eps = np.float32(eps)
 54 | 
 55 |         X = X.astype(np.float32)
 56 |         Xvalid = Xvalid.astype(np.float32)
 57 |         Y = Y.astype(np.int32)
 58 |         Yvalid = Yvalid.astype(np.int32)
 59 | 
 60 |         # initialize convpool layers
 61 |         N, c, width, height = X.shape
 62 |         mi = c
 63 |         outw = width
 64 |         outh = height
 65 |         self.convpool_layers = []
 66 |         for mo, fw, fh in self.convpool_layer_sizes:
 67 |             layer = ConvPoolLayer(mi, mo, fw, fh)
 68 |             self.convpool_layers.append(layer)
 69 |             outw = (outw - fw + 1) // 2
 70 |             outh = (outh - fh + 1) // 2
 71 |             mi = mo
 72 | 
 73 |         # initialize mlp layers
 74 |         K = len(set(Y))
 75 |         self.hidden_layers = []
 76 |         M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
 77 |         count = 0
 78 |         for M2 in self.hidden_layer_sizes:
 79 |             h = HiddenLayer(M1, M2, count)
 80 |             self.hidden_layers.append(h)
 81 |             M1 = M2
 82 |             count += 1
 83 | 
 84 |         # logistic regression layer
 85 |         W, b = init_weight_and_bias(M1, K)
 86 |         self.W = theano.shared(W, 'W_logreg')
 87 |         self.b = theano.shared(b, 'b_logreg')
 88 | 
 89 |         # collect params for later use
 90 |         self.params = [self.W, self.b]
 91 |         for c in self.convpool_layers:
 92 |             self.params += c.params
 93 |         for h in self.hidden_layers:
 94 |             self.params += h.params
 95 | 
 96 |         # set up theano functions and variables
 97 |         thX = T.tensor4('X', dtype='float32')
 98 |         thY = T.ivector('Y')
 99 |         pY = self.forward(thX)
100 | 
101 |         rcost = reg*T.sum([(p*p).sum() for p in self.params])
102 |         cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
103 |         prediction = self.th_predict(thX)
104 | 
105 |         cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])
106 | 
107 |         updates = rmsprop(cost, self.params, lr, mu, decay, eps)
108 |         train_op = theano.function(
109 |             inputs=[thX, thY],
110 |             outputs=cost,
111 |             updates=updates
112 |         )
113 | 
114 |         n_batches = N // batch_sz
115 |         costs = []
116 |         for i in range(epochs):
117 |             X, Y = shuffle(X, Y)
118 |             for j in range(n_batches):
119 |                 Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
120 |                 Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
121 | 
122 |                 train_c = train_op(Xbatch, Ybatch)
123 | 
124 |                 if j % 20 == 0:
125 |                     c, p = cost_predict_op(Xvalid, Yvalid)
126 |                     costs.append(c)
127 |                     e = error_rate(Yvalid, p)
128 |                     print(
129 |                         "i:", i,
130 |                         "j:", j,
131 |                         "nb:", n_batches,
132 |                         "train cost:", train_c,
133 |                         "cost:", c,
134 |                         "error rate:", e
135 |                     )
136 | 
137 |         if show_fig:
138 |             plt.plot(costs)
139 |             plt.show()
140 | 
141 |     def forward(self, X):
142 |         Z = X
143 |         for c in self.convpool_layers:
144 |             Z = c.forward(Z)
145 |         Z = Z.flatten(ndim=2)
146 |         for h in self.hidden_layers:
147 |             Z = h.forward(Z)
148 |         return T.nnet.softmax(Z.dot(self.W) + self.b)
149 | 
150 |     def th_predict(self, X):
151 |         pY = self.forward(X)
152 |         return T.argmax(pY, axis=1)
153 | 
154 | 
155 | def main():
156 |     Xtrain, Ytrain, Xvalid, Yvalid = getImageData()
157 |     model = CNN(
158 |         convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)],
159 |         hidden_layer_sizes=[500, 300],
160 |     )
161 |     model.fit(Xtrain, Ytrain, Xvalid, Yvalid)
162 | 
163 | if __name__ == '__main__':
164 |     main()
165 | 


--------------------------------------------------------------------------------
/logistic.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | from builtins import range
 3 | # Note: you may need to update your version of future
 4 | # sudo pip install -U future
 5 | 
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from util import getData, softmax, cost, y2indicator, error_rate
10 | from sklearn.utils import shuffle
11 | 
12 | 
13 | class LogisticModel(object):
14 |     def __init__(self):
15 |         pass
16 | 
17 |     def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-7, reg=0., epochs=10000, show_fig=False):
18 |         Tvalid = y2indicator(Yvalid)
19 | 
20 |         N, D = X.shape
21 |         K = len(set(Y))
22 |         T = y2indicator(Y)
23 |         self.W = np.random.randn(D, K) / np.sqrt(D)
24 |         self.b = np.zeros(K)
25 | 
26 |         costs = []
27 |         best_validation_error = 1
28 |         for i in range(epochs):
29 |             # forward propagation and cost calculation
30 |             pY = self.forward(X)
31 | 
32 |             # gradient descent step
33 |             self.W -= learning_rate*(X.T.dot(pY - T) + reg*self.W)
34 |             self.b -= learning_rate*((pY - T).sum(axis=0) + reg*self.b)
35 | 
36 |             if i % 10 == 0:
37 |                 pYvalid = self.forward(Xvalid)
38 |                 c = cost(Tvalid, pYvalid)
39 |                 costs.append(c)
40 |                 e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
41 |                 print("i:", i, "cost:", c, "error:", e)
42 |                 if e < best_validation_error:
43 |                     best_validation_error = e
44 |         print("best_validation_error:", best_validation_error)
45 | 
46 |         if show_fig:
47 |             plt.plot(costs)
48 |             plt.show()
49 | 
50 | 
51 |     def forward(self, X):
52 |         return softmax(X.dot(self.W) + self.b)
53 | 
54 |     def predict(self, X):
55 |         pY = self.forward(X)
56 |         return np.argmax(pY, axis=1)
57 | 
58 |     def score(self, X, Y):
59 |         prediction = self.predict(X)
60 |         return 1 - error_rate(Y, prediction)
61 | 
62 | 
63 | def main():
64 |     Xtrain, Ytrain, Xvalid, Yvalid = getData()
65 |     
66 |     model = LogisticModel()
67 |     model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)
68 |     print(model.score(Xvalid, Yvalid))
69 | 
70 | if __name__ == '__main__':
71 |     main()
72 | 


--------------------------------------------------------------------------------
/logistic_sigmoid.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | from builtins import range
 3 | # Note: you may need to update your version of future
 4 | # sudo pip install -U future
 5 | 
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.utils import shuffle
 9 | from util import getBinaryData, sigmoid, sigmoid_cost, error_rate
10 | 
11 | 
12 | class LogisticModel(object):
13 |     def __init__(self):
14 |         pass
15 | 
16 |     def fit(self, X, Y, learning_rate=1e-6, reg=0., epochs=120000, show_fig=False):
17 |         X, Y = shuffle(X, Y)
18 |         Xvalid, Yvalid = X[-1000:], Y[-1000:]
19 |         X, Y = X[:-1000], Y[:-1000]
20 | 
21 |         N, D = X.shape
22 |         self.W = np.random.randn(D) / np.sqrt(D)
23 |         self.b = 0
24 | 
25 |         costs = []
26 |         best_validation_error = 1
27 |         for i in range(epochs):
28 |                 # forward propagation and cost calculation
29 |                 pY = self.forward(X)
30 | 
31 |                 # gradient descent step
32 |                 self.W -= learning_rate*(X.T.dot(pY - Y) + reg*self.W)
33 |                 self.b -= learning_rate*((pY - Y).sum() + reg*self.b)
34 | 
35 |                 
36 |                 if i % 20 == 0:
37 |                     pYvalid = self.forward(Xvalid)
38 |                     c = sigmoid_cost(Yvalid, pYvalid)
39 |                     costs.append(c)
40 |                     e = error_rate(Yvalid, np.round(pYvalid))
41 |                     print("i:", i, "cost:", c, "error:", e)
42 |                     if e < best_validation_error:
43 |                         best_validation_error = e
44 |         print("best_validation_error:", best_validation_error)
45 | 
46 |         if show_fig:
47 |             plt.plot(costs)
48 |             plt.show()
49 | 
50 | 
51 |     def forward(self, X):
52 |         return sigmoid(X.dot(self.W) + self.b)
53 | 
54 |     def predict(self, X):
55 |         pY = self.forward(X)
56 |         return np.round(pY)
57 | 
58 | 
59 |     def score(self, X, Y):
60 |         prediction = self.predict(X)
61 |         return 1 - error_rate(Y, prediction)
62 | 
63 | 
64 | def main():
65 |     X, Y = getBinaryData()
66 | 
67 |     X0 = X[Y==0, :]
68 |     X1 = X[Y==1, :]
69 |     X1 = np.repeat(X1, 9, axis=0)
70 |     X = np.vstack([X0, X1])
71 |     Y = np.array([0]*len(X0) + [1]*len(X1))
72 |     
73 |     model = LogisticModel()
74 |     model.fit(X, Y, show_fig=True)
75 |     model.score(X, Y)
76 |     # scores = cross_val_score(model, X, Y, cv=5)
77 |     # print "score mean:", np.mean(scores), "stdev:", np.std(scores)
78 | 
79 | if __name__ == '__main__':
80 |     main()
81 | 


--------------------------------------------------------------------------------
/show_images.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | from builtins import range, input
 3 | # Note: you may need to update your version of future
 4 | # sudo pip install -U future
 5 | 
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from util import getData
10 | 
11 | label_map = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
12 | 
13 | def main():
14 |     X, Y, _, _ = getData(balance_ones=False)
15 | 
16 |     while True:
17 |         for i in range(7):
18 |             x, y = X[Y==i], Y[Y==i]
19 |             N = len(y)
20 |             j = np.random.choice(N)
21 |             plt.imshow(x[j].reshape(48, 48), cmap='gray')
22 |             plt.title(label_map[y[j]])
23 |             plt.show()
24 |         prompt = input('Quit? Enter Y:\n')
25 |         if prompt.lower().startswith('y'):
26 |             break
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     main()
31 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | from builtins import range
  3 | # Note: you may need to update your version of future
  4 | # sudo pip install -U future
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | from sklearn.utils import shuffle
  9 | 
 10 | 
 11 | def init_weight_and_bias(M1, M2):
 12 |     W = np.random.randn(M1, M2) / np.sqrt(M1)
 13 |     b = np.zeros(M2)
 14 |     return W.astype(np.float32), b.astype(np.float32)
 15 | 
 16 | 
 17 | def init_filter(shape, poolsz):
 18 |     w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[1:]) + shape[0]*np.prod(shape[2:] / np.prod(poolsz)))
 19 |     return w.astype(np.float32)
 20 | 
 21 | 
 22 | def relu(x):
 23 |     return x * (x > 0)
 24 | 
 25 | 
 26 | def sigmoid(A):
 27 |     return 1 / (1 + np.exp(-A))
 28 | 
 29 | 
 30 | def softmax(A):
 31 |     expA = np.exp(A)
 32 |     return expA / expA.sum(axis=1, keepdims=True)
 33 | 
 34 | 
 35 | def sigmoid_cost(T, Y):
 36 |     return -(T*np.log(Y) + (1-T)*np.log(1-Y)).sum()
 37 | 
 38 | 
 39 | def cost(T, Y):
 40 |     return -(T*np.log(Y)).sum()
 41 | 
 42 | 
 43 | def cost2(T, Y):
 44 |     # same as cost(), just uses the targets to index Y
 45 |     # instead of multiplying by a large indicator matrix with mostly 0s
 46 |     N = len(T)
 47 |     return -np.log(Y[np.arange(N), T]).mean()
 48 | 
 49 | 
 50 | def error_rate(targets, predictions):
 51 |     return np.mean(targets != predictions)
 52 | 
 53 | 
 54 | def y2indicator(y):
 55 |     N = len(y)
 56 |     K = len(set(y))
 57 |     ind = np.zeros((N, K))
 58 |     for i in range(N):
 59 |         ind[i, y[i]] = 1
 60 |     return ind
 61 | 
 62 | 
 63 | def getData(balance_ones=True, Ntest=1000):
 64 |     # images are 48x48 = 2304 size vectors
 65 |     Y = []
 66 |     X = []
 67 |     first = True
 68 |     for line in open('fer2013.csv'):
 69 |         if first:
 70 |             first = False
 71 |         else:
 72 |             row = line.split(',')
 73 |             Y.append(int(row[0]))
 74 |             X.append([int(p) for p in row[1].split()])
 75 | 
 76 |     X, Y = np.array(X) / 255.0, np.array(Y)
 77 | 
 78 |     # shuffle and split
 79 |     X, Y = shuffle(X, Y)
 80 |     Xtrain, Ytrain = X[:-Ntest], Y[:-Ntest]
 81 |     Xvalid, Yvalid = X[-Ntest:], Y[-Ntest:]
 82 | 
 83 |     if balance_ones:
 84 |         # balance the 1 class
 85 |         X0, Y0 = Xtrain[Ytrain!=1, :], Ytrain[Ytrain!=1]
 86 |         X1 = Xtrain[Ytrain==1, :]
 87 |         X1 = np.repeat(X1, 9, axis=0)
 88 |         Xtrain = np.vstack([X0, X1])
 89 |         Ytrain = np.concatenate((Y0, [1]*len(X1)))
 90 | 
 91 |     return Xtrain, Ytrain, Xvalid, Yvalid
 92 | 
 93 | 
 94 | def getImageData():
 95 |     Xtrain, Ytrain, Xvalid, Yvalid = getData()
 96 |     N, D = Xtrain.shape
 97 |     d = int(np.sqrt(D))
 98 |     Xtrain = Xtrain.reshape(-1, 1, d, d)
 99 |     Xvalid = Xvalid.reshape(-1, 1, d, d)
100 |     return Xtrain, Ytrain, Xvalid, Yvalid
101 | 
102 | 
103 | def getBinaryData():
104 |     Y = []
105 |     X = []
106 |     first = True
107 |     for line in open('fer2013.csv'):
108 |         if first:
109 |             first = False
110 |         else:
111 |             row = line.split(',')
112 |             y = int(row[0])
113 |             if y == 0 or y == 1:
114 |                 Y.append(y)
115 |                 X.append([int(p) for p in row[1].split()])
116 |     return np.array(X) / 255.0, np.array(Y)
117 | 
118 | 
119 | def crossValidation(model, X, Y, K=5):
120 |     # split data into K parts
121 |     X, Y = shuffle(X, Y)
122 |     sz = len(Y) // K
123 |     errors = []
124 |     for k in range(K):
125 |         xtr = np.concatenate([ X[:k*sz, :], X[(k*sz + sz):, :] ])
126 |         ytr = np.concatenate([ Y[:k*sz], Y[(k*sz + sz):] ])
127 |         xte = X[k*sz:(k*sz + sz), :]
128 |         yte = Y[k*sz:(k*sz + sz)]
129 | 
130 |         model.fit(xtr, ytr)
131 |         err = model.score(xte, yte)
132 |         errors.append(err)
133 |     print("errors:", errors)
134 |     return np.mean(errors)
135 | 


--------------------------------------------------------------------------------