├── README.md ├── backprop.py ├── data.mat ├── definitions.py ├── helper.py ├── iterate.dat ├── labels.mat └── main.py /README.md: -------------------------------------------------------------------------------- 1 | Used semi-supervised recursive autoencoders to learn sentence meanings and predict polarity of movie reviews. Achieved an accuracy of 72% with random word initialization on movie reviews dataset from rottentomatoes. 2 | 3 | This code is based on Richard Socher's work as described in the paper "Semi-Supervised Recursive Autoencoders for Predicting Sentiment Distributions" and their MATLAB code for the same. 4 | 5 | The main file is main.py 6 | 7 | -------------------------------------------------------------------------------- /backprop.py: -------------------------------------------------------------------------------- 1 | from definitions import * 2 | from functools import partial 3 | 4 | def predict(W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, freq, test_sentence, d, num_cat): 5 | sl = len(test_sentence) 6 | L = We[:,test_sentence] 7 | tr = tree(sl, d, num_cat, L) 8 | tr.forward(freq, W1, W2, W3, W4, Wcat, b1, b2, b3, bcat, alpha, beta, 0) 9 | pred = fcat(np.dot(Wcat,tr.nodeFeatures[:,2*sl-2])+bcat) 10 | return 1*(pred>0.5) 11 | 12 | def getW(t, d, num_cat, dict_length): 13 | if t.shape[0] is not 1: 14 | theta = t[np.newaxis,:] 15 | else: 16 | theta = t 17 | sW = (d, d) 18 | sb = (d, 1) 19 | s = d*d 20 | s2 = num_cat*d 21 | s3 = dict_length*d 22 | W1, W2, W3, W4, Wcat, We = theta[0,0:s].reshape(sW), theta[0,s:2*s].reshape(sW), theta[0,2*s:3*s].reshape(sW), theta[0,3*s:4*s].reshape(sW), theta[0,4*s:4*s+s2].reshape((num_cat,d)), theta[0,4*s+s2:4*s+s2+s3].reshape((d,dict_length)) 23 | s4 = 4*s+s2+s3 24 | b1, b2, b3, bcat = theta[0,s4:s4+d].reshape(sb), theta[0,s4+d:s4+2*d].reshape(sb), theta[0,s4+2*d:s4+3*d].reshape(sb), theta[0,s4+3*d:s4+3*d+num_cat].reshape((num_cat,1)) 25 | return (W1,W2,W3,W4,Wcat,We,b1,b2,b3,bcat) 26 | 27 | 28 | '''Backpropagation for derivative and cost computation''' 29 | def backprop(x, training_data, training_labels, freq_original, d, num_cat, dict_length, alpha, beta, theta): 30 | sW = (d, d) 31 | sb = (d, 1) 32 | (W1,W2,W3,W4,Wcat,We,b1,b2,b3,bcat) = getW(theta, d, num_cat, dict_length) 33 | gW1, gW2, gW3, gW4, gWcat, gb1, gb2, gb3, gbcat, gWe = np.zeros(sW), np.zeros(sW), np.zeros(sW), np.zeros(sW), np.zeros((num_cat, d)), np.zeros(sb), np.zeros(sb), np.zeros(sb), np.zeros((num_cat,1)), np.zeros((d, dict_length)) 34 | cost_J = 0.0 35 | for i in range(len(training_data)): 36 | word_indices = training_data[i] 37 | true_label = training_labels[i] 38 | sl = len(word_indices) 39 | L = We[:,word_indices] 40 | gL = np.zeros((L.shape[0],L.shape[1])) 41 | freq = [freq_original[k] for k in word_indices] 42 | tr = tree(sl, d, num_cat, L) 43 | if sl>1 : 44 | tr.forward(freq, W1, W2, W3, W4, Wcat, b1, b2, b3, bcat, alpha, beta, true_label) 45 | for current in range(2*sl-2,sl-1,-1): 46 | kid1, kid2 = tr.kids[current,0], tr.kids[current,1] 47 | a1, a1_unnorm = tr.nodeFeatures[:,current:current+1], tr.nodeFeatures_unnorm[:,current:current+1] 48 | d1, d2 = tr.delta1[:,current:current+1], tr.delta2[:,current:current+1] 49 | pd = tr.parentdelta[:,current:current+1] 50 | pp = tr.pp[current] 51 | if(current==(2*sl-2)): 52 | W = np.zeros((d,d)) 53 | delt = np.zeros((d, 1)) 54 | else: 55 | W, delt = W2.copy(), tr.y2c2[:,pp:pp+1] 56 | if(tr.kids[pp,0]==current):#left_child 57 | W, delt = W1.copy(), tr.y1c1[:,pp:pp+1] 58 | smd = tr.catdelta[:, current:current+1] 59 | gbcat += smd 60 | h = np.dot(W3.T, d1) + np.dot(W4.T, d2) + np.dot(W.T, pd) + np.dot(Wcat.T, smd) - delt 61 | parent_d = np.dot(fnorm_prime(a1_unnorm), h) 62 | gWcat += np.dot(smd,a1.T) 63 | tr.parentdelta[:,kid1:kid1+1], tr.parentdelta[:,kid2:kid2+1] = parent_d, parent_d 64 | gb1, gb2, gb3 = gb1+parent_d, gb2+d1, gb3+d2 65 | gW1, gW2, gW3, gW4 = gW1 + np.dot(parent_d, tr.nodeFeatures[:,kid1:kid1+1].T), gW2 + np.dot(parent_d, tr.nodeFeatures[:,kid2:kid2+1].T), gW3 + np.dot(d1,a1.T), gW4 + np.dot(d2,a1.T) 66 | for j in range(sl-1,-1,-1): 67 | pp = tr.pp[j] 68 | W, delt = W2.copy(), tr.y2c2[:,pp:pp+1] 69 | if(tr.kids[pp,0]==j):#left_child 70 | W, delt = W1.copy(), tr.y1c1[:,pp:pp+1] 71 | gWcat += np.dot(tr.catdelta[:,j:j+1],tr.nodeFeatures[:,j:j+1].T) 72 | gbcat += tr.catdelta[:,j] 73 | gL[:,j:j+1] += np.dot(W.T,tr.parentdelta[:,j:j+1]) + np.dot(Wcat.T,tr.catdelta[:,j:j+1]) - delt 74 | gWe[:,word_indices[j]] += gL[:,j] 75 | cost_J += sum(tr.nodeScores) + sum(tr.nodeScoresR) 76 | actual = gW1[0,2] 77 | tr.checkgradient(actual, word_indices, freq, 0.0000000000001, W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, true_label) 78 | F = np.ndarray.flatten 79 | D = np.dot 80 | #final grad computation 81 | grad_J = np.concatenate([F(gW1),F(gW2),F(gW3),F(gW4),F(gWcat),F(gWe),F(gb1),F(gb2),F(gb3),F(gbcat)],axis=1) 82 | grad_reg = np.concatenate([F(W1),F(W2),F(W3),F(W4),F(Wcat),F(We),np.zeros(d),np.zeros(d),np.zeros(d),np.zeros(num_cat)],axis=1) 83 | grad = grad_J/len(training_data) + .0004*grad_reg 84 | #final cost computation 85 | cost_reg = .0002*(D(F(W1),F(W1).T)+D(F(W2),F(W2).T)+D(F(W3),F(W3).T)+D(F(W4),F(W4).T)+D(F(Wcat),F(Wcat).T)+D(F(We),F(We).T)) 86 | cost = cost_J/len(training_data) + cost_reg 87 | if(x==1): 88 | return grad 89 | else: 90 | return cost[0] 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skritika/Sentiment-Analysis-using-Recursive-Autoencoders/831fa5d785ecf979eb433d9e942da2bd169dc033/data.mat -------------------------------------------------------------------------------- /definitions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def f(x): 4 | f = np.tanh(x) 5 | return f 6 | 7 | def fnorm(x): 8 | (n1, n2) = x.shape 9 | f = np.tanh(x) 10 | norm = np.linalg.norm(f,axis=0)*np.ones((n1,n2)) 11 | fnorm = f/norm 12 | return fnorm 13 | 14 | def fcat(x): 15 | return 1/(1+np.exp(-x)) 16 | 17 | def f_prime(f): 18 | f_p = 1 - np.square(f) 19 | return f_p 20 | 21 | def fnorm_prime(f_unnorm): 22 | f = f_unnorm 23 | f_p = 1 - np.square(f) 24 | diag = np.diagflat(f_p) 25 | norm = np.linalg.norm(f) 26 | fnorm_p = diag/norm - np.dot(diag, np.dot(f, f.T))/np.power(norm,3) 27 | return fnorm_p 28 | 29 | def fcat_prime(x): 30 | #return fcat(x)*(1-fcat(x)) 31 | return x*(1-x) 32 | 33 | class tree: 34 | def __init__(self, sl, hiddenSize, cat_size, words): 35 | self.sl = sl 36 | self.hiddenSize = hiddenSize 37 | self.words = words 38 | self.collapsed = range(0,sl) 39 | self.pp = np.zeros((2*sl-1,1),dtype=int) 40 | self.nodeScoresR = np.zeros((2*sl-1,1)) 41 | self.nodeScores = np.zeros((2*sl-1,1)) 42 | self.kids = np.zeros((2*sl-1,2)) 43 | self.numkids = np.ones((2*sl-1,1)) 44 | self.y1c1 = np.zeros((hiddenSize,2*sl-1)) 45 | self.y2c2 = np.zeros((hiddenSize,2*sl-1)) 46 | self.freq = np.zeros((2*sl-1,1)) 47 | self.nodeFeatures = np.concatenate((words, np.zeros((hiddenSize,sl-1))), axis=1) 48 | self.nodeFeatures_unnorm = np.concatenate((words, np.zeros((hiddenSize,sl-1))), axis=1) 49 | self.delta1 = np.zeros((hiddenSize,2*sl-1)) 50 | self.delta2 = np.zeros((hiddenSize,2*sl-1)) 51 | self.parentdelta = np.zeros((hiddenSize,2*sl-1)) 52 | self.catdelta = np.zeros((cat_size,2*sl-1)) 53 | self.catdelta_out = np.zeros((self.hiddenSize,2*sl-1)) 54 | 55 | def forward(self, freq, W1, W2, W3, W4, Wcat, b1, b2, b3, bcat, alpha, beta, sentence_label): 56 | sl = self.sl 57 | D = np.dot 58 | '''Builds tree and computes recontruction error for each node''' 59 | words = self.words 60 | for j in range(0,sl-1): 61 | lens = words.shape[1] 62 | c1, f1, c2, f2 = words[:,0:lens-1], freq[0:lens-1], words[:,1:lens], freq[1:lens] 63 | p = f(D(W1,c1)+D(W2,c2)+np.tile(b1,lens-1)) 64 | p_norm = p/(np.linalg.norm(p,axis=0)*np.ones(p.shape)) 65 | y1, y2 = f(D(W3,p_norm)+np.tile(b2,lens-1)), f(D(W4,p_norm)+np.tile(b3,lens-1)) 66 | y1_norm, y2_norm = y1/(np.linalg.norm(y1,axis=0)*np.ones(y1.shape)), y2/(np.linalg.norm(y2,axis=0)*np.ones(y2.shape)) 67 | y1c1, y2c2 = alpha*(y1_norm-c1), alpha*(y2_norm-c2) 68 | recons_error = sum(y1c1*(y1_norm-c1)+y2c2*(y2_norm-c2))*0.5 69 | m, mp = np.min(recons_error), np.argmin(recons_error) 70 | self.y1c1[:,sl+j], self.y2c2[:,sl+j] = y1c1[:,mp], y2c2[:,mp] 71 | self.delta1[:,sl+j:sl+j+1], self.delta2[:,sl+j:sl+j+1] = D(fnorm_prime(y1[:,mp:mp+1]), y1c1[:,mp:mp+1]), D(fnorm_prime(y2[:,mp:mp+1]), y2c2[:,mp:mp+1]) 72 | index_child1, index_child2 = self.collapsed[mp], self.collapsed[mp+1] 73 | words = np.delete(words,mp+1,1) 74 | words[:,mp] = p_norm[:,mp] 75 | self.nodeFeatures[:,sl+j], self.nodeFeatures_unnorm[:,sl+j] = p_norm[:,mp], p[:,mp] 76 | self.nodeScoresR[sl+j] = m 77 | self.pp[index_child1], self.pp[index_child2] = sl+j, sl+j 78 | self.kids[sl+j,0], self.kids[sl+j,1] = index_child1, index_child2 79 | self.numkids[sl+j] = self.numkids[self.kids[sl+j,0]] + self.numkids[self.kids[sl+j,1]] 80 | self.freq = np.delete(self.freq,mp+1,0) 81 | self.freq[mp] = (D(self.numkids[self.kids[sl+j,0]], f1[mp]) + D(self.numkids[self.kids[sl+j,1]], f2[mp]))/self.numkids[sl+j] 82 | del self.collapsed[mp] 83 | self.collapsed[mp]=sl+j 84 | '''Classification error computation for each node''' 85 | out = fcat(D(Wcat,self.words)+np.tile(bcat,sl)) 86 | diff = np.tile(sentence_label,sl)-out 87 | lbl_sm = (1-alpha)*diff 88 | score = 0.5*lbl_sm*diff 89 | self.nodeScores[0:sl], self.catdelta[:,0:sl] = score.T, -(lbl_sm)*fcat_prime(out) 90 | for i in range(sl,2*sl-1): 91 | sm = fcat(D(Wcat,self.nodeFeatures[:,i]) + bcat) 92 | lbl_sm = beta*(1-alpha)*(sentence_label-sm) 93 | self.catdelta[:,i] = -(lbl_sm)*fcat_prime(sm) 94 | J = 0.5*(D(lbl_sm.T,(sentence_label-sm))) 95 | self.nodeScores[i] = J 96 | 97 | def cost(self, words, W1, W2, W3, W4, Wcat, b1, b2, b3, bcat, alpha, beta, sentence_label): 98 | D = np.dot 99 | sl = self.sl 100 | nodeScoresR = np.zeros((2*sl-1,1)) 101 | nodeScores = np.zeros((2*sl-1,1)) 102 | nF = self.nodeFeatures.copy() 103 | nF[:,0:sl] = words 104 | for j in range(0,sl-1): 105 | k1, k2 = self.kids[sl+j,0], self.kids[sl+j,1] 106 | c1, c2 = nF[:,k1:k1+1], nF[:,k2:k2+1] 107 | nF[:,sl+j:sl+j+1] = fnorm(D(W1,c1)+D(W2,c2)+b1) 108 | y1, y2 = f(D(W3,nF[:,sl+j:sl+j+1])+b2), f(D(W4,nF[:,sl+j:sl+j+1])+b3) 109 | y1_norm, y2_norm = y1/(np.linalg.norm(y1,axis=0)*np.ones(y1.shape)), y2/(np.linalg.norm(y2,axis=0)*np.ones(y2.shape)) 110 | y1c1, y2c2 = alpha*(y1_norm-c1), alpha*(y2_norm-c2) 111 | nodeScoresR[sl+j] = sum(y1c1*(y1_norm-c1)+y2c2*(y2_norm-c2))*0.5 112 | out = fcat(D(Wcat,words)+np.tile(bcat,sl)) 113 | diff = np.tile(sentence_label,sl)-out 114 | lbl_sm = (1-alpha)*diff 115 | score = 0.5*lbl_sm*diff 116 | nodeScores[0:sl] = score.T 117 | for i in range(sl,2*sl-1): 118 | sm = fcat(D(Wcat,nF[:,i]) + bcat) 119 | lbl_sm = beta*(1-alpha)*(sentence_label-sm) 120 | nodeScores[i] = 0.5*(D(lbl_sm.T,(sentence_label-sm))) 121 | error = (sum(nodeScoresR) + sum(nodeScores)) 122 | return error 123 | 124 | def checkgradient(self,actual, sentence, freq, eps, W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, sl): 125 | w = We[:,sentence] 126 | wa, wb = w.copy(), w.copy() 127 | W1a, W1b = W1.copy(), W1.copy() 128 | W2a, W2b = W2.copy(), W2.copy() 129 | W3a, W3b = W3.copy(), W3.copy() 130 | W4a, W4b = W4.copy(), W4.copy() 131 | Wcata, Wcatb = Wcat.copy(), Wcat.copy() 132 | eps_min = 1e-16 133 | e = eps_min 134 | e_range = [] 135 | while(e<1): 136 | e_range.append(e) 137 | e=e*10 138 | J_range = [] 139 | #W2a[3,3], W2b[3,3] = W2[3,3] + eps, W2[3,3] - eps 140 | #W1a[0,2], W1b[0,2] = W1[0,2] + eps, W1[0,2] - eps 141 | #W3a[3,4], W3b[3,4] = W3[3,4] + eps, W3[3,4] - eps 142 | #W4a[2,4], W4b[2,4] = W4[2,4] + eps, W4[2,4] - eps 143 | #Wcata[0,0], Wcatb[0,0] = Wcat[0,0] + eps, Wcat[0,0] - eps 144 | #wa[0,1], wb[0,1] = w[0,1] + eps, w[0,1] - eps 145 | for eps in e_range: 146 | W1a, W1b = W1.copy(), W1.copy() 147 | W1a[0,2], W1b[0,2] = W1[0,2] + eps, W1[0,2] - eps 148 | j1 = self.cost(wa,W1a,W2a,W3a,W4a,Wcata,b1,b2,b3,bcat,alpha,beta,sl) 149 | j2 = self.cost(wb,W1b,W2b,W3b,W4b,Wcatb,b1,b2,b3,bcat,alpha,beta,sl) 150 | grad = (j1-j2)/(2*eps) 151 | grad = abs(grad - actual) 152 | J_range.append(grad[0]) 153 | 154 | -------------------------------------------------------------------------------- /helper.py: -------------------------------------------------------------------------------- 1 | from backprop import * 2 | import scipy.io 3 | import math 4 | 5 | def init_theta(d, num_cat, dict_length): 6 | r = math.sqrt(6)/math.sqrt(d+d+1) 7 | #We = 1e-3*(np.random.rand(hiddenSize, dictionary_length)*2*r-r) 8 | W = np.random.rand(1,4*d*d+d*num_cat+d*dict_length)*2*r-r #W1+W2+W3+W4+Wcat+We 9 | b = np.zeros((1,3*d+num_cat)) 10 | return np.concatenate([W,b],axis=1) 11 | 12 | def accuracy(W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, freq, test_sentences, labels, d, num_cat): 13 | n = len(test_sentences) 14 | correct = 0 15 | for i in range(len(test_sentences)): 16 | p = predict(W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, freq, test_sentences[i], d, num_cat) 17 | correct += 1*(p[0]==labels[i]) 18 | correct = correct*100 19 | return correct/float(n) 20 | 21 | def load(): 22 | data = scipy.io.loadmat('data.mat') 23 | data = data['snum'] 24 | train = [] 25 | for i in range(data.shape[0]): 26 | x = data[i] 27 | y = x[0]-1 28 | z = y.tolist() 29 | u = z[0] 30 | train.append(u) 31 | 32 | lbl = scipy.io.loadmat('labels.mat') 33 | lbl = lbl['lbl'] 34 | lbl = lbl[0] 35 | labels = [] 36 | for i in range(lbl.shape[0]): 37 | labels.append(lbl[i]) 38 | return (train, labels) 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /iterate.dat: -------------------------------------------------------------------------------- 1 | RUNNING THE L-BFGS-B CODE 2 | 3 | it = iteration number 4 | nf = number of function evaluations 5 | nseg = number of segments explored during the Cauchy search 6 | nact = number of active bounds at the generalized Cauchy point 7 | sub = manner in which the subspace minimization terminated: 8 | con = converged, bnd = a bound was reached 9 | itls = number of iterations performed in the line search 10 | stepl = step length used 11 | tstep = norm of the displacement (total step) 12 | projg = norm of the projected gradient 13 | f = function value 14 | 15 | * * * 16 | 17 | Machine precision = 2.220E-16 18 | N = 282541 M = 10 19 | 20 | it nf nseg nact sub itls stepl tstep projg f 21 | 0 1 - - - - - - 1.955E+00 1.329E+01 22 | 1 2 1 0 --- 0 3.4E-01 1.0E+00 7.802E-01 1.129E+01 23 | 2 3 0 0 con 0 1.0E+00 2.7E+00 4.357E-01 9.085E+00 24 | 3 4 0 0 con 0 1.0E+00 2.1E+00 4.067E-01 8.550E+00 25 | 4 5 0 0 con 0 1.0E+00 6.0E-01 1.520E-01 8.053E+00 26 | 5 6 0 0 con 0 1.0E+00 8.0E-01 1.698E-01 7.684E+00 27 | 6 7 0 0 con 0 1.0E+00 7.7E-01 1.660E-01 7.348E+00 28 | 7 9 0 0 con 1 4.9E-01 1.7E+00 3.072E-01 6.700E+00 29 | 8 13 0 0 con 3 1.7E-01 9.3E-01 3.334E-01 6.321E+00 30 | 9 15 0 0 con 1 7.0E-02 4.0E-01 2.789E-01 6.182E+00 31 | 10 17 0 0 con 1 1.7E-01 2.6E-01 2.108E-01 6.071E+00 32 | 11 19 0 0 con 1 4.8E-01 3.7E-01 4.349E-01 5.999E+00 33 | 12 20 0 0 con 0 1.0E+00 3.6E-01 6.408E-01 5.950E+00 34 | 13 21 0 0 con 0 1.0E+00 2.0E-01 2.122E-01 5.908E+00 35 | 14 22 0 0 con 0 1.0E+00 3.4E-01 2.482E-01 5.847E+00 36 | 15 23 0 0 con 0 1.0E+00 3.9E-01 3.310E-01 5.756E+00 37 | 16 24 0 0 con 0 1.0E+00 1.2E+00 9.688E-01 5.657E+00 38 | 17 25 0 0 con 0 1.0E+00 5.3E-01 4.108E-01 5.511E+00 39 | 18 26 0 0 con 0 1.0E+00 2.1E-01 1.183E-01 5.452E+00 40 | 19 27 0 0 con 0 1.0E+00 1.4E-01 1.373E-01 5.425E+00 41 | 20 28 0 0 con 0 1.0E+00 3.3E-01 3.180E-01 5.373E+00 42 | 21 29 0 0 con 0 1.0E+00 5.5E-01 4.152E-01 5.313E+00 43 | 22 30 0 0 con 0 1.0E+00 4.3E-01 3.582E-01 5.252E+00 44 | 23 31 0 0 con 0 1.0E+00 9.6E-02 1.369E-01 5.215E+00 45 | 24 32 0 0 con 0 1.0E+00 1.3E-01 3.107E-01 5.197E+00 46 | 25 33 0 0 con 0 1.0E+00 1.0E-01 3.143E-01 5.175E+00 47 | 26 34 0 0 con 0 1.0E+00 7.1E-01 5.063E-01 5.111E+00 48 | 27 35 0 0 con 0 1.0E+00 5.3E-01 2.888E-01 5.055E+00 49 | 28 36 0 0 con 0 1.0E+00 5.9E-02 1.250E-01 5.038E+00 50 | 29 37 0 0 con 0 1.0E+00 1.8E-01 1.453E-01 5.015E+00 51 | 30 38 0 0 con 0 1.0E+00 3.2E-01 3.000E-01 4.977E+00 52 | 31 39 0 0 con 0 1.0E+00 1.0E+00 5.911E-01 4.929E+00 53 | 32 40 0 0 con 0 1.0E+00 7.0E-01 2.602E-01 4.865E+00 54 | 33 41 0 0 con 0 1.0E+00 2.1E-01 6.606E-02 4.845E+00 55 | 34 42 0 0 con 0 1.0E+00 2.7E-01 1.569E-01 4.827E+00 56 | 35 43 0 0 con 0 1.0E+00 2.3E-01 1.595E-01 4.813E+00 57 | 36 44 0 0 con 0 1.0E+00 1.7E-01 1.139E-01 4.793E+00 58 | 37 45 0 0 con 0 1.0E+00 6.0E-01 2.586E-01 4.735E+00 59 | 38 46 0 0 con 0 1.0E+00 5.5E-01 3.540E-01 4.691E+00 60 | 39 47 0 0 con 0 1.0E+00 5.7E-01 2.684E-01 4.647E+00 61 | 40 48 0 0 con 0 1.0E+00 6.8E-01 4.442E-01 4.630E+00 62 | 41 49 0 0 con 0 1.0E+00 1.6E-01 1.908E-01 4.612E+00 63 | 42 50 0 0 con 0 1.0E+00 7.8E-02 1.233E-01 4.600E+00 64 | 43 51 0 0 con 0 1.0E+00 3.8E-01 2.113E-01 4.548E+00 65 | 44 52 0 0 con 0 1.0E+00 1.6E+00 9.864E-01 4.523E+00 66 | 45 53 0 0 con 0 1.0E+00 2.6E-01 3.745E-01 4.457E+00 67 | 46 54 0 0 con 0 1.0E+00 1.1E-01 2.738E-01 4.440E+00 68 | 47 55 0 0 con 0 1.0E+00 2.8E-01 2.877E-01 4.413E+00 69 | 48 56 0 0 con 0 1.0E+00 3.1E-01 3.714E-01 4.381E+00 70 | 49 57 0 0 con 0 1.0E+00 7.5E-01 4.278E-01 4.334E+00 71 | 50 58 0 0 con 0 1.0E+00 4.9E-01 5.898E-01 4.302E+00 72 | 51 59 0 0 con 0 1.0E+00 1.2E-01 2.911E-01 4.277E+00 73 | 52 60 0 0 con 0 1.0E+00 1.7E-01 2.290E-01 4.255E+00 74 | 53 61 0 0 con 0 1.0E+00 2.4E-01 3.117E-01 4.221E+00 75 | 54 62 0 0 con 0 1.0E+00 4.6E-01 5.012E-01 4.165E+00 76 | 55 63 0 0 con 0 1.0E+00 9.7E-01 1.051E+00 4.140E+00 77 | 56 64 0 0 con 0 1.0E+00 9.2E-02 2.289E-01 4.073E+00 78 | 57 65 0 0 con 0 1.0E+00 1.4E-01 2.290E-01 4.052E+00 79 | 58 66 0 0 con 0 1.0E+00 1.1E-01 2.735E-01 4.035E+00 80 | 59 67 0 0 con 0 1.0E+00 1.2E-01 3.828E-01 4.021E+00 81 | 60 68 0 0 con 0 1.0E+00 1.8E-01 3.207E-01 3.999E+00 82 | 61 69 0 0 con 0 1.0E+00 2.2E-01 2.181E-01 3.978E+00 83 | 62 70 0 0 con 0 1.0E+00 8.8E-02 1.526E-01 3.966E+00 84 | 63 71 0 0 con 0 1.0E+00 4.8E-02 1.515E-01 3.959E+00 85 | 64 72 0 0 con 0 1.0E+00 1.3E-01 2.333E-01 3.947E+00 86 | 65 73 0 0 con 0 1.0E+00 2.9E-01 4.551E-01 3.932E+00 87 | 66 74 0 0 con 0 1.0E+00 2.0E-01 2.385E-01 3.909E+00 88 | 67 75 0 0 con 0 1.0E+00 7.6E-02 1.651E-01 3.892E+00 89 | 68 76 0 0 con 0 1.0E+00 8.7E-02 1.439E-01 3.881E+00 90 | 69 77 0 0 con 0 1.0E+00 2.1E-02 1.164E-01 3.877E+00 91 | 70 78 0 0 con 0 1.0E+00 8.0E-02 3.307E-01 3.865E+00 92 | 71 79 0 0 con 0 1.0E+00 2.5E-01 6.036E-01 3.842E+00 93 | 72 80 0 0 con 0 1.0E+00 4.3E-01 6.074E-01 3.808E+00 94 | 73 82 0 0 con 1 1.9E-01 1.6E-01 5.785E-01 3.799E+00 95 | 74 83 0 0 con 0 1.0E+00 3.9E-01 3.145E-01 3.772E+00 96 | 75 84 0 0 con 0 1.0E+00 3.0E-01 1.032E-01 3.754E+00 97 | 76 85 0 0 con 0 1.0E+00 1.4E-01 2.753E-01 3.742E+00 98 | 77 86 0 0 con 0 1.0E+00 1.8E-01 4.092E-01 3.719E+00 99 | 78 87 0 0 con 0 1.0E+00 6.0E-01 4.372E-01 3.693E+00 100 | 79 89 0 0 con 1 1.3E-01 7.2E-02 4.364E-01 3.692E+00 101 | 80 90 0 0 con 0 1.0E+00 9.8E-02 2.482E-01 3.677E+00 102 | 81 91 0 0 con 0 1.0E+00 1.1E-01 1.539E-01 3.663E+00 103 | 82 92 0 0 con 0 1.0E+00 9.0E-02 2.546E-01 3.656E+00 104 | 83 93 0 0 con 0 1.0E+00 1.3E-01 3.940E-01 3.636E+00 105 | 84 94 0 0 con 0 1.0E+00 2.4E-01 4.467E-01 3.609E+00 106 | 85 96 0 0 con 1 4.3E-01 2.6E-01 3.107E-01 3.595E+00 107 | 86 97 0 0 con 0 1.0E+00 2.5E-01 1.593E-01 3.573E+00 108 | 87 98 0 0 con 0 1.0E+00 1.7E-01 7.254E-02 3.558E+00 109 | 88 99 0 0 con 0 1.0E+00 1.2E-01 1.685E-01 3.545E+00 110 | 89 100 0 0 con 0 1.0E+00 2.1E-01 2.323E-01 3.530E+00 111 | 90 101 0 0 con 0 1.0E+00 4.1E-01 2.612E-01 3.507E+00 112 | 91 102 0 0 con 0 1.0E+00 3.6E-01 3.503E-01 3.502E+00 113 | 92 103 0 0 con 0 1.0E+00 2.5E-01 8.487E-02 3.480E+00 114 | 93 104 0 0 con 0 1.0E+00 1.4E-01 5.536E-02 3.471E+00 115 | 94 105 0 0 con 0 1.0E+00 1.6E-01 1.217E-01 3.462E+00 116 | 95 106 0 0 con 0 1.0E+00 3.1E-01 2.016E-01 3.446E+00 117 | 96 107 0 0 con 0 1.0E+00 4.0E-01 1.968E-01 3.429E+00 118 | 97 108 0 0 con 0 1.0E+00 1.6E-01 8.453E-02 3.412E+00 119 | 98 109 0 0 con 0 1.0E+00 1.7E-01 1.233E-01 3.401E+00 120 | 99 110 0 0 con 0 1.0E+00 2.2E-01 2.034E-01 3.389E+00 121 | 100 111 0 0 con 0 1.0E+00 2.3E-01 3.737E-01 3.381E+00 122 | 101 112 0 0 con 0 1.0E+00 5.9E-02 9.743E-02 3.369E+00 123 | 102 113 0 0 con 0 1.0E+00 1.1E-01 8.841E-02 3.359E+00 124 | 125 | STOP: TOTAL NO. of ITERATIONS EXCEEDS LIMIT 126 | 127 | Total User time 4.477E+02 seconds. 128 | 129 | -------------------------------------------------------------------------------- /labels.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skritika/Sentiment-Analysis-using-Recursive-Autoencoders/831fa5d785ecf979eb433d9e942da2bd169dc033/labels.mat -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from helper import * 2 | import scipy.io 3 | from scipy.optimize import fmin_l_bfgs_b 4 | 5 | d = 20 6 | (t, l) = load() 7 | s = range(len(t)) 8 | np.random.shuffle(s) 9 | data = [t[i] for i in s] 10 | labels = [l[i] for i in s] 11 | 12 | train_data = data[0:8] 13 | train_labels = labels[0:8] 14 | test_data = data[8:10] 15 | test_labels = labels[8:10] 16 | 17 | 18 | num_cat = 1 19 | dict_length = 14043 20 | alpha = 0.2 21 | beta = 0.5 22 | initv = init_theta(d,num_cat,dict_length) 23 | freq = [1/float(14043)]*14043 24 | fgrad = partial(backprop, 1, train_data, train_labels, freq, d, num_cat, dict_length, alpha, beta) 25 | fcost = partial(backprop, 0, train_data, train_labels, freq, d, num_cat, dict_length, alpha, beta) 26 | backprop(1, train_data, train_labels, freq, d, num_cat, dict_length, alpha, beta, initv) 27 | theta_min = fmin_l_bfgs_b(fcost, initv, fprime = fgrad, args=(), maxiter=100, disp=1)[0] 28 | (W1,W2,W3,W4,Wcat,We,b1,b2,b3,bcat) = getW(theta_min, d, num_cat, dict_length) 29 | print "Accuracy on the test set is", accuracy(W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, freq, test_data, test_labels, d, num_cat) 30 | 31 | --------------------------------------------------------------------------------