├── README.md
├── backprop.py
├── data.mat
├── definitions.py
├── helper.py
├── iterate.dat
├── labels.mat
└── main.py


/README.md:
--------------------------------------------------------------------------------
1 | Used semi-supervised recursive autoencoders to learn sentence meanings and predict polarity of movie reviews. Achieved an accuracy of 72% with random word initialization on movie reviews dataset from rottentomatoes.
2 | 
3 | This code is based on Richard Socher's work as described in the paper "Semi-Supervised Recursive Autoencoders for Predicting Sentiment Distributions" and their MATLAB code for the same.
4 | 
5 | The main file is main.py
6 | 
7 | 


--------------------------------------------------------------------------------
/backprop.py:
--------------------------------------------------------------------------------
 1 | from definitions import *
 2 | from functools import partial
 3 | 
 4 | def predict(W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, freq, test_sentence, d, num_cat):
 5 | 	sl = len(test_sentence)
 6 | 	L = We[:,test_sentence]
 7 | 	tr = tree(sl, d, num_cat, L)
 8 | 	tr.forward(freq, W1, W2, W3, W4, Wcat, b1, b2, b3, bcat, alpha, beta, 0)
 9 | 	pred = fcat(np.dot(Wcat,tr.nodeFeatures[:,2*sl-2])+bcat)
10 | 	return 1*(pred>0.5)
11 | 			
12 | def getW(t, d, num_cat, dict_length):
13 | 	if t.shape[0] is not 1:
14 | 		theta = t[np.newaxis,:]
15 | 	else:
16 | 		theta = t
17 | 	sW = (d, d)
18 | 	sb = (d, 1)
19 | 	s = d*d
20 | 	s2 = num_cat*d
21 | 	s3 = dict_length*d
22 | 	W1, W2, W3, W4, Wcat, We = theta[0,0:s].reshape(sW), theta[0,s:2*s].reshape(sW), theta[0,2*s:3*s].reshape(sW), theta[0,3*s:4*s].reshape(sW), theta[0,4*s:4*s+s2].reshape((num_cat,d)), theta[0,4*s+s2:4*s+s2+s3].reshape((d,dict_length))
23 | 	s4 = 4*s+s2+s3 
24 | 	b1, b2, b3, bcat = theta[0,s4:s4+d].reshape(sb), theta[0,s4+d:s4+2*d].reshape(sb), theta[0,s4+2*d:s4+3*d].reshape(sb), theta[0,s4+3*d:s4+3*d+num_cat].reshape((num_cat,1))
25 | 	return (W1,W2,W3,W4,Wcat,We,b1,b2,b3,bcat)
26 | 
27 | 
28 | '''Backpropagation for derivative and cost computation'''
29 | def backprop(x, training_data, training_labels, freq_original, d, num_cat, dict_length, alpha, beta, theta):
30 | 	sW = (d, d)
31 | 	sb = (d, 1)
32 | 	(W1,W2,W3,W4,Wcat,We,b1,b2,b3,bcat) = getW(theta, d, num_cat, dict_length)
33 | 	gW1, gW2, gW3, gW4, gWcat, gb1, gb2, gb3, gbcat, gWe = np.zeros(sW), np.zeros(sW), np.zeros(sW), np.zeros(sW), np.zeros((num_cat, d)), np.zeros(sb), np.zeros(sb), np.zeros(sb), np.zeros((num_cat,1)), np.zeros((d, dict_length))
34 | 	cost_J = 0.0
35 | 	for i in range(len(training_data)):
36 | 		word_indices = training_data[i]
37 | 		true_label = training_labels[i]
38 | 		sl = len(word_indices)
39 | 		L = We[:,word_indices]
40 | 		gL = np.zeros((L.shape[0],L.shape[1]))		
41 | 		freq = [freq_original[k] for k in word_indices]
42 | 		tr = tree(sl, d, num_cat, L)
43 | 		if sl>1 : 
44 | 			tr.forward(freq, W1, W2, W3, W4, Wcat, b1, b2, b3, bcat, alpha, beta, true_label)
45 | 			for current in range(2*sl-2,sl-1,-1):
46 | 				kid1, kid2 = tr.kids[current,0], tr.kids[current,1]
47 | 				a1, a1_unnorm = tr.nodeFeatures[:,current:current+1], tr.nodeFeatures_unnorm[:,current:current+1]
48 | 				d1, d2 = tr.delta1[:,current:current+1], tr.delta2[:,current:current+1]
49 | 				pd = tr.parentdelta[:,current:current+1]
50 | 				pp = tr.pp[current]
51 | 				if(current==(2*sl-2)):
52 | 					W = np.zeros((d,d))
53 | 					delt = np.zeros((d, 1))
54 | 				else:
55 | 					W, delt = W2.copy(), tr.y2c2[:,pp:pp+1] 
56 | 					if(tr.kids[pp,0]==current):#left_child
57 | 						W, delt = W1.copy(), tr.y1c1[:,pp:pp+1]
58 | 				smd = tr.catdelta[:, current:current+1]
59 | 				gbcat += smd
60 | 				h = np.dot(W3.T, d1) + np.dot(W4.T, d2) + np.dot(W.T, pd) + np.dot(Wcat.T, smd) - delt
61 | 				parent_d = np.dot(fnorm_prime(a1_unnorm), h)
62 | 				gWcat += np.dot(smd,a1.T)
63 | 				tr.parentdelta[:,kid1:kid1+1], tr.parentdelta[:,kid2:kid2+1] = parent_d, parent_d
64 | 				gb1, gb2, gb3 = gb1+parent_d, gb2+d1, gb3+d2
65 | 				gW1, gW2, gW3, gW4 = gW1 + np.dot(parent_d, tr.nodeFeatures[:,kid1:kid1+1].T), gW2 + np.dot(parent_d, tr.nodeFeatures[:,kid2:kid2+1].T), gW3 + np.dot(d1,a1.T), gW4 + np.dot(d2,a1.T)
66 | 			for j in range(sl-1,-1,-1):
67 | 				pp = tr.pp[j]
68 | 				W, delt = W2.copy(), tr.y2c2[:,pp:pp+1] 
69 | 				if(tr.kids[pp,0]==j):#left_child
70 | 					W, delt = W1.copy(), tr.y1c1[:,pp:pp+1]
71 | 				gWcat += np.dot(tr.catdelta[:,j:j+1],tr.nodeFeatures[:,j:j+1].T) 
72 | 				gbcat += tr.catdelta[:,j]  
73 | 				gL[:,j:j+1] += np.dot(W.T,tr.parentdelta[:,j:j+1]) + np.dot(Wcat.T,tr.catdelta[:,j:j+1]) - delt 	
74 | 				gWe[:,word_indices[j]] += gL[:,j]
75 | 			cost_J += sum(tr.nodeScores) + sum(tr.nodeScoresR)
76 | 			actual = gW1[0,2]
77 | 			tr.checkgradient(actual, word_indices, freq, 0.0000000000001, W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, true_label)
78 | 	F = np.ndarray.flatten
79 | 	D = np.dot
80 | 	#final grad computation
81 | 	grad_J = np.concatenate([F(gW1),F(gW2),F(gW3),F(gW4),F(gWcat),F(gWe),F(gb1),F(gb2),F(gb3),F(gbcat)],axis=1)
82 | 	grad_reg = np.concatenate([F(W1),F(W2),F(W3),F(W4),F(Wcat),F(We),np.zeros(d),np.zeros(d),np.zeros(d),np.zeros(num_cat)],axis=1)
83 | 	grad = grad_J/len(training_data) + .0004*grad_reg
84 | 	#final cost computation		
85 | 	cost_reg = .0002*(D(F(W1),F(W1).T)+D(F(W2),F(W2).T)+D(F(W3),F(W3).T)+D(F(W4),F(W4).T)+D(F(Wcat),F(Wcat).T)+D(F(We),F(We).T))
86 | 	cost = cost_J/len(training_data) + cost_reg
87 | 	if(x==1):
88 | 		return grad
89 | 	else:
90 | 		return cost[0]
91 | 	
92 | 
93 | 	
94 | 
95 | 


--------------------------------------------------------------------------------
/data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skritika/Sentiment-Analysis-using-Recursive-Autoencoders/831fa5d785ecf979eb433d9e942da2bd169dc033/data.mat


--------------------------------------------------------------------------------
/definitions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | def f(x):
  4 | 	f = np.tanh(x)
  5 | 	return f
  6 | 
  7 | def fnorm(x):
  8 | 	(n1, n2) = x.shape
  9 | 	f = np.tanh(x)
 10 | 	norm = np.linalg.norm(f,axis=0)*np.ones((n1,n2))
 11 | 	fnorm = f/norm
 12 | 	return fnorm	
 13 | 
 14 | def fcat(x):
 15 | 	return 1/(1+np.exp(-x))
 16 | 
 17 | def f_prime(f):
 18 | 	f_p = 1 - np.square(f)
 19 | 	return f_p
 20 | 
 21 | def fnorm_prime(f_unnorm):
 22 | 	f = f_unnorm
 23 | 	f_p = 1 - np.square(f)
 24 | 	diag = np.diagflat(f_p)
 25 | 	norm = np.linalg.norm(f)
 26 | 	fnorm_p = diag/norm - np.dot(diag, np.dot(f, f.T))/np.power(norm,3)
 27 | 	return fnorm_p
 28 | 
 29 | def fcat_prime(x):
 30 | 	#return fcat(x)*(1-fcat(x))
 31 | 	return x*(1-x)
 32 | 
 33 | class tree:
 34 | 	def __init__(self, sl, hiddenSize, cat_size, words):
 35 | 		self.sl = sl
 36 | 		self.hiddenSize = hiddenSize
 37 | 		self.words = words 
 38 | 		self.collapsed = range(0,sl)
 39 | 		self.pp = np.zeros((2*sl-1,1),dtype=int)		
 40 | 		self.nodeScoresR = np.zeros((2*sl-1,1))		
 41 | 		self.nodeScores = np.zeros((2*sl-1,1))		
 42 | 		self.kids = np.zeros((2*sl-1,2))		
 43 | 		self.numkids = np.ones((2*sl-1,1))		
 44 | 		self.y1c1 = np.zeros((hiddenSize,2*sl-1))		
 45 | 		self.y2c2 = np.zeros((hiddenSize,2*sl-1))		
 46 | 		self.freq = np.zeros((2*sl-1,1))		
 47 | 		self.nodeFeatures = np.concatenate((words, np.zeros((hiddenSize,sl-1))), axis=1)
 48 | 		self.nodeFeatures_unnorm = np.concatenate((words, np.zeros((hiddenSize,sl-1))), axis=1)
 49 | 		self.delta1 = np.zeros((hiddenSize,2*sl-1))		
 50 | 		self.delta2 = np.zeros((hiddenSize,2*sl-1))
 51 | 		self.parentdelta = np.zeros((hiddenSize,2*sl-1))
 52 | 		self.catdelta = np.zeros((cat_size,2*sl-1))
 53 | 		self.catdelta_out = np.zeros((self.hiddenSize,2*sl-1))
 54 | 
 55 | 	def forward(self, freq, W1, W2, W3, W4, Wcat, b1, b2, b3, bcat, alpha, beta, sentence_label):
 56 | 		sl = self.sl
 57 | 		D = np.dot
 58 | 		'''Builds tree and computes recontruction error for each node'''
 59 | 		words = self.words
 60 | 		for j in range(0,sl-1):
 61 | 			lens = words.shape[1]
 62 | 			c1, f1, c2, f2 = words[:,0:lens-1], freq[0:lens-1], words[:,1:lens], freq[1:lens]	
 63 | 			p = f(D(W1,c1)+D(W2,c2)+np.tile(b1,lens-1))		
 64 | 			p_norm = p/(np.linalg.norm(p,axis=0)*np.ones(p.shape))
 65 | 			y1, y2 = f(D(W3,p_norm)+np.tile(b2,lens-1)), f(D(W4,p_norm)+np.tile(b3,lens-1))	
 66 | 			y1_norm, y2_norm = y1/(np.linalg.norm(y1,axis=0)*np.ones(y1.shape)), y2/(np.linalg.norm(y2,axis=0)*np.ones(y2.shape))	
 67 | 			y1c1, y2c2 = alpha*(y1_norm-c1), alpha*(y2_norm-c2)
 68 | 			recons_error = sum(y1c1*(y1_norm-c1)+y2c2*(y2_norm-c2))*0.5
 69 | 			m, mp = np.min(recons_error), np.argmin(recons_error)
 70 | 			self.y1c1[:,sl+j], self.y2c2[:,sl+j] = y1c1[:,mp], y2c2[:,mp]
 71 | 			self.delta1[:,sl+j:sl+j+1], self.delta2[:,sl+j:sl+j+1] = D(fnorm_prime(y1[:,mp:mp+1]), y1c1[:,mp:mp+1]), D(fnorm_prime(y2[:,mp:mp+1]), y2c2[:,mp:mp+1])			
 72 | 			index_child1, index_child2 = self.collapsed[mp], self.collapsed[mp+1]
 73 | 			words = np.delete(words,mp+1,1)
 74 | 			words[:,mp] = p_norm[:,mp]
 75 | 			self.nodeFeatures[:,sl+j], self.nodeFeatures_unnorm[:,sl+j] = p_norm[:,mp], p[:,mp]
 76 | 			self.nodeScoresR[sl+j] = m
 77 | 			self.pp[index_child1], self.pp[index_child2] = sl+j, sl+j
 78 | 			self.kids[sl+j,0], self.kids[sl+j,1] = index_child1, index_child2
 79 | 			self.numkids[sl+j] = self.numkids[self.kids[sl+j,0]] + self.numkids[self.kids[sl+j,1]]
 80 | 			self.freq = np.delete(self.freq,mp+1,0) 
 81 | 			self.freq[mp] = (D(self.numkids[self.kids[sl+j,0]], f1[mp]) +  D(self.numkids[self.kids[sl+j,1]], f2[mp]))/self.numkids[sl+j]
 82 | 			del self.collapsed[mp]
 83 | 			self.collapsed[mp]=sl+j
 84 | 		'''Classification error computation for each node'''
 85 | 		out = fcat(D(Wcat,self.words)+np.tile(bcat,sl))
 86 | 		diff = np.tile(sentence_label,sl)-out
 87 | 		lbl_sm = (1-alpha)*diff
 88 | 		score = 0.5*lbl_sm*diff
 89 | 		self.nodeScores[0:sl], self.catdelta[:,0:sl] = score.T, -(lbl_sm)*fcat_prime(out)
 90 | 		for i in range(sl,2*sl-1):
 91 | 			sm = fcat(D(Wcat,self.nodeFeatures[:,i]) + bcat)
 92 | 			lbl_sm = beta*(1-alpha)*(sentence_label-sm)
 93 | 			self.catdelta[:,i] = -(lbl_sm)*fcat_prime(sm)
 94 | 			J = 0.5*(D(lbl_sm.T,(sentence_label-sm)))
 95 | 			self.nodeScores[i] = J
 96 | 	
 97 | 	def cost(self, words, W1, W2, W3, W4, Wcat, b1, b2, b3, bcat, alpha, beta, sentence_label):
 98 | 		D = np.dot
 99 | 		sl = self.sl
100 | 		nodeScoresR = np.zeros((2*sl-1,1))		
101 | 		nodeScores = np.zeros((2*sl-1,1))		
102 | 		nF = self.nodeFeatures.copy()
103 | 		nF[:,0:sl] = words
104 | 		for j in range(0,sl-1):
105 | 			k1, k2 = self.kids[sl+j,0], self.kids[sl+j,1]
106 | 			c1, c2 = nF[:,k1:k1+1], nF[:,k2:k2+1]
107 | 			nF[:,sl+j:sl+j+1] = fnorm(D(W1,c1)+D(W2,c2)+b1)
108 | 			y1, y2 = f(D(W3,nF[:,sl+j:sl+j+1])+b2), f(D(W4,nF[:,sl+j:sl+j+1])+b3)
109 | 			y1_norm, y2_norm = y1/(np.linalg.norm(y1,axis=0)*np.ones(y1.shape)), y2/(np.linalg.norm(y2,axis=0)*np.ones(y2.shape))	
110 | 			y1c1, y2c2 = alpha*(y1_norm-c1), alpha*(y2_norm-c2)
111 | 			nodeScoresR[sl+j] = sum(y1c1*(y1_norm-c1)+y2c2*(y2_norm-c2))*0.5
112 | 		out = fcat(D(Wcat,words)+np.tile(bcat,sl))
113 | 		diff = np.tile(sentence_label,sl)-out
114 | 		lbl_sm = (1-alpha)*diff
115 | 		score = 0.5*lbl_sm*diff
116 | 		nodeScores[0:sl] = score.T
117 | 		for i in range(sl,2*sl-1):
118 | 			sm = fcat(D(Wcat,nF[:,i]) + bcat)
119 | 			lbl_sm = beta*(1-alpha)*(sentence_label-sm)
120 | 			nodeScores[i] = 0.5*(D(lbl_sm.T,(sentence_label-sm)))
121 | 		error = (sum(nodeScoresR) + sum(nodeScores))
122 | 		return error
123 | 
124 | 	def checkgradient(self,actual, sentence, freq, eps, W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, sl):
125 | 		w = We[:,sentence]
126 | 		wa, wb = w.copy(), w.copy()
127 | 		W1a, W1b = W1.copy(), W1.copy()
128 | 		W2a, W2b = W2.copy(), W2.copy()
129 | 		W3a, W3b = W3.copy(), W3.copy()
130 | 		W4a, W4b = W4.copy(), W4.copy()
131 | 		Wcata, Wcatb = Wcat.copy(), Wcat.copy()
132 | 		eps_min = 1e-16
133 | 		e = eps_min
134 | 		e_range = []
135 | 		while(e<1):
136 | 			e_range.append(e)
137 | 			e=e*10
138 | 		J_range = []
139 | 		#W2a[3,3], W2b[3,3] = W2[3,3] + eps, W2[3,3] - eps
140 | 		#W1a[0,2], W1b[0,2] = W1[0,2] + eps, W1[0,2] - eps
141 | 		#W3a[3,4], W3b[3,4] = W3[3,4] + eps, W3[3,4] - eps
142 | 		#W4a[2,4], W4b[2,4] = W4[2,4] + eps, W4[2,4] - eps
143 | 		#Wcata[0,0], Wcatb[0,0] = Wcat[0,0] + eps, Wcat[0,0] - eps
144 | 		#wa[0,1], wb[0,1] = w[0,1] + eps, w[0,1] - eps	
145 | 		for  eps in e_range:			
146 | 				W1a, W1b = W1.copy(), W1.copy()
147 | 				W1a[0,2], W1b[0,2] = W1[0,2] + eps, W1[0,2] - eps
148 | 				j1 = self.cost(wa,W1a,W2a,W3a,W4a,Wcata,b1,b2,b3,bcat,alpha,beta,sl)
149 | 				j2 = self.cost(wb,W1b,W2b,W3b,W4b,Wcatb,b1,b2,b3,bcat,alpha,beta,sl)
150 | 				grad = (j1-j2)/(2*eps)
151 | 				grad = abs(grad - actual)
152 | 				J_range.append(grad[0])
153 | 
154 | 


--------------------------------------------------------------------------------
/helper.py:
--------------------------------------------------------------------------------
 1 | from backprop import *
 2 | import scipy.io
 3 | import math
 4 | 
 5 | def init_theta(d, num_cat, dict_length):
 6 | 		r = math.sqrt(6)/math.sqrt(d+d+1)
 7 | 		#We = 1e-3*(np.random.rand(hiddenSize, dictionary_length)*2*r-r)
 8 | 		W = np.random.rand(1,4*d*d+d*num_cat+d*dict_length)*2*r-r #W1+W2+W3+W4+Wcat+We
 9 | 		b = np.zeros((1,3*d+num_cat))
10 | 		return np.concatenate([W,b],axis=1)
11 | 
12 | def accuracy(W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, freq, test_sentences, labels, d, num_cat):
13 | 	n = len(test_sentences)
14 | 	correct = 0
15 | 	for i in range(len(test_sentences)):
16 | 		p = predict(W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, freq, test_sentences[i], d, num_cat)
17 | 		correct += 1*(p[0]==labels[i])
18 | 	correct = correct*100
19 | 	return correct/float(n)
20 | 
21 | def load():
22 | 	data = scipy.io.loadmat('data.mat')
23 | 	data = data['snum']
24 | 	train = []
25 | 	for i in range(data.shape[0]):
26 | 		x = data[i]
27 | 		y = x[0]-1
28 | 		z = y.tolist()
29 | 		u = z[0]
30 | 		train.append(u)
31 | 
32 | 	lbl = scipy.io.loadmat('labels.mat')
33 | 	lbl = lbl['lbl']
34 | 	lbl = lbl[0]
35 | 	labels = []
36 | 	for i in range(lbl.shape[0]):
37 | 		labels.append(lbl[i])	
38 | 	return (train, labels)
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 	
47 | 


--------------------------------------------------------------------------------
/iterate.dat:
--------------------------------------------------------------------------------
  1 | RUNNING THE L-BFGS-B CODE
  2 | 
  3 | it    = iteration number
  4 | nf    = number of function evaluations
  5 | nseg  = number of segments explored during the Cauchy search
  6 | nact  = number of active bounds at the generalized Cauchy point
  7 | sub   = manner in which the subspace minimization terminated:
  8 |         con = converged, bnd = a bound was reached
  9 | itls  = number of iterations performed in the line search
 10 | stepl = step length used
 11 | tstep = norm of the displacement (total step)
 12 | projg = norm of the projected gradient
 13 | f     = function value
 14 | 
 15 |            * * *
 16 | 
 17 | Machine precision = 2.220E-16
 18 |  N =  282541    M =  10
 19 | 
 20 |    it   nf  nseg  nact  sub  itls  stepl    tstep     projg        f
 21 |     0    1     -     -   -     -     -        -     1.955E+00  1.329E+01
 22 |     1    2     1     0  ---    0  3.4E-01  1.0E+00  7.802E-01  1.129E+01
 23 |     2    3     0     0  con    0  1.0E+00  2.7E+00  4.357E-01  9.085E+00
 24 |     3    4     0     0  con    0  1.0E+00  2.1E+00  4.067E-01  8.550E+00
 25 |     4    5     0     0  con    0  1.0E+00  6.0E-01  1.520E-01  8.053E+00
 26 |     5    6     0     0  con    0  1.0E+00  8.0E-01  1.698E-01  7.684E+00
 27 |     6    7     0     0  con    0  1.0E+00  7.7E-01  1.660E-01  7.348E+00
 28 |     7    9     0     0  con    1  4.9E-01  1.7E+00  3.072E-01  6.700E+00
 29 |     8   13     0     0  con    3  1.7E-01  9.3E-01  3.334E-01  6.321E+00
 30 |     9   15     0     0  con    1  7.0E-02  4.0E-01  2.789E-01  6.182E+00
 31 |    10   17     0     0  con    1  1.7E-01  2.6E-01  2.108E-01  6.071E+00
 32 |    11   19     0     0  con    1  4.8E-01  3.7E-01  4.349E-01  5.999E+00
 33 |    12   20     0     0  con    0  1.0E+00  3.6E-01  6.408E-01  5.950E+00
 34 |    13   21     0     0  con    0  1.0E+00  2.0E-01  2.122E-01  5.908E+00
 35 |    14   22     0     0  con    0  1.0E+00  3.4E-01  2.482E-01  5.847E+00
 36 |    15   23     0     0  con    0  1.0E+00  3.9E-01  3.310E-01  5.756E+00
 37 |    16   24     0     0  con    0  1.0E+00  1.2E+00  9.688E-01  5.657E+00
 38 |    17   25     0     0  con    0  1.0E+00  5.3E-01  4.108E-01  5.511E+00
 39 |    18   26     0     0  con    0  1.0E+00  2.1E-01  1.183E-01  5.452E+00
 40 |    19   27     0     0  con    0  1.0E+00  1.4E-01  1.373E-01  5.425E+00
 41 |    20   28     0     0  con    0  1.0E+00  3.3E-01  3.180E-01  5.373E+00
 42 |    21   29     0     0  con    0  1.0E+00  5.5E-01  4.152E-01  5.313E+00
 43 |    22   30     0     0  con    0  1.0E+00  4.3E-01  3.582E-01  5.252E+00
 44 |    23   31     0     0  con    0  1.0E+00  9.6E-02  1.369E-01  5.215E+00
 45 |    24   32     0     0  con    0  1.0E+00  1.3E-01  3.107E-01  5.197E+00
 46 |    25   33     0     0  con    0  1.0E+00  1.0E-01  3.143E-01  5.175E+00
 47 |    26   34     0     0  con    0  1.0E+00  7.1E-01  5.063E-01  5.111E+00
 48 |    27   35     0     0  con    0  1.0E+00  5.3E-01  2.888E-01  5.055E+00
 49 |    28   36     0     0  con    0  1.0E+00  5.9E-02  1.250E-01  5.038E+00
 50 |    29   37     0     0  con    0  1.0E+00  1.8E-01  1.453E-01  5.015E+00
 51 |    30   38     0     0  con    0  1.0E+00  3.2E-01  3.000E-01  4.977E+00
 52 |    31   39     0     0  con    0  1.0E+00  1.0E+00  5.911E-01  4.929E+00
 53 |    32   40     0     0  con    0  1.0E+00  7.0E-01  2.602E-01  4.865E+00
 54 |    33   41     0     0  con    0  1.0E+00  2.1E-01  6.606E-02  4.845E+00
 55 |    34   42     0     0  con    0  1.0E+00  2.7E-01  1.569E-01  4.827E+00
 56 |    35   43     0     0  con    0  1.0E+00  2.3E-01  1.595E-01  4.813E+00
 57 |    36   44     0     0  con    0  1.0E+00  1.7E-01  1.139E-01  4.793E+00
 58 |    37   45     0     0  con    0  1.0E+00  6.0E-01  2.586E-01  4.735E+00
 59 |    38   46     0     0  con    0  1.0E+00  5.5E-01  3.540E-01  4.691E+00
 60 |    39   47     0     0  con    0  1.0E+00  5.7E-01  2.684E-01  4.647E+00
 61 |    40   48     0     0  con    0  1.0E+00  6.8E-01  4.442E-01  4.630E+00
 62 |    41   49     0     0  con    0  1.0E+00  1.6E-01  1.908E-01  4.612E+00
 63 |    42   50     0     0  con    0  1.0E+00  7.8E-02  1.233E-01  4.600E+00
 64 |    43   51     0     0  con    0  1.0E+00  3.8E-01  2.113E-01  4.548E+00
 65 |    44   52     0     0  con    0  1.0E+00  1.6E+00  9.864E-01  4.523E+00
 66 |    45   53     0     0  con    0  1.0E+00  2.6E-01  3.745E-01  4.457E+00
 67 |    46   54     0     0  con    0  1.0E+00  1.1E-01  2.738E-01  4.440E+00
 68 |    47   55     0     0  con    0  1.0E+00  2.8E-01  2.877E-01  4.413E+00
 69 |    48   56     0     0  con    0  1.0E+00  3.1E-01  3.714E-01  4.381E+00
 70 |    49   57     0     0  con    0  1.0E+00  7.5E-01  4.278E-01  4.334E+00
 71 |    50   58     0     0  con    0  1.0E+00  4.9E-01  5.898E-01  4.302E+00
 72 |    51   59     0     0  con    0  1.0E+00  1.2E-01  2.911E-01  4.277E+00
 73 |    52   60     0     0  con    0  1.0E+00  1.7E-01  2.290E-01  4.255E+00
 74 |    53   61     0     0  con    0  1.0E+00  2.4E-01  3.117E-01  4.221E+00
 75 |    54   62     0     0  con    0  1.0E+00  4.6E-01  5.012E-01  4.165E+00
 76 |    55   63     0     0  con    0  1.0E+00  9.7E-01  1.051E+00  4.140E+00
 77 |    56   64     0     0  con    0  1.0E+00  9.2E-02  2.289E-01  4.073E+00
 78 |    57   65     0     0  con    0  1.0E+00  1.4E-01  2.290E-01  4.052E+00
 79 |    58   66     0     0  con    0  1.0E+00  1.1E-01  2.735E-01  4.035E+00
 80 |    59   67     0     0  con    0  1.0E+00  1.2E-01  3.828E-01  4.021E+00
 81 |    60   68     0     0  con    0  1.0E+00  1.8E-01  3.207E-01  3.999E+00
 82 |    61   69     0     0  con    0  1.0E+00  2.2E-01  2.181E-01  3.978E+00
 83 |    62   70     0     0  con    0  1.0E+00  8.8E-02  1.526E-01  3.966E+00
 84 |    63   71     0     0  con    0  1.0E+00  4.8E-02  1.515E-01  3.959E+00
 85 |    64   72     0     0  con    0  1.0E+00  1.3E-01  2.333E-01  3.947E+00
 86 |    65   73     0     0  con    0  1.0E+00  2.9E-01  4.551E-01  3.932E+00
 87 |    66   74     0     0  con    0  1.0E+00  2.0E-01  2.385E-01  3.909E+00
 88 |    67   75     0     0  con    0  1.0E+00  7.6E-02  1.651E-01  3.892E+00
 89 |    68   76     0     0  con    0  1.0E+00  8.7E-02  1.439E-01  3.881E+00
 90 |    69   77     0     0  con    0  1.0E+00  2.1E-02  1.164E-01  3.877E+00
 91 |    70   78     0     0  con    0  1.0E+00  8.0E-02  3.307E-01  3.865E+00
 92 |    71   79     0     0  con    0  1.0E+00  2.5E-01  6.036E-01  3.842E+00
 93 |    72   80     0     0  con    0  1.0E+00  4.3E-01  6.074E-01  3.808E+00
 94 |    73   82     0     0  con    1  1.9E-01  1.6E-01  5.785E-01  3.799E+00
 95 |    74   83     0     0  con    0  1.0E+00  3.9E-01  3.145E-01  3.772E+00
 96 |    75   84     0     0  con    0  1.0E+00  3.0E-01  1.032E-01  3.754E+00
 97 |    76   85     0     0  con    0  1.0E+00  1.4E-01  2.753E-01  3.742E+00
 98 |    77   86     0     0  con    0  1.0E+00  1.8E-01  4.092E-01  3.719E+00
 99 |    78   87     0     0  con    0  1.0E+00  6.0E-01  4.372E-01  3.693E+00
100 |    79   89     0     0  con    1  1.3E-01  7.2E-02  4.364E-01  3.692E+00
101 |    80   90     0     0  con    0  1.0E+00  9.8E-02  2.482E-01  3.677E+00
102 |    81   91     0     0  con    0  1.0E+00  1.1E-01  1.539E-01  3.663E+00
103 |    82   92     0     0  con    0  1.0E+00  9.0E-02  2.546E-01  3.656E+00
104 |    83   93     0     0  con    0  1.0E+00  1.3E-01  3.940E-01  3.636E+00
105 |    84   94     0     0  con    0  1.0E+00  2.4E-01  4.467E-01  3.609E+00
106 |    85   96     0     0  con    1  4.3E-01  2.6E-01  3.107E-01  3.595E+00
107 |    86   97     0     0  con    0  1.0E+00  2.5E-01  1.593E-01  3.573E+00
108 |    87   98     0     0  con    0  1.0E+00  1.7E-01  7.254E-02  3.558E+00
109 |    88   99     0     0  con    0  1.0E+00  1.2E-01  1.685E-01  3.545E+00
110 |    89  100     0     0  con    0  1.0E+00  2.1E-01  2.323E-01  3.530E+00
111 |    90  101     0     0  con    0  1.0E+00  4.1E-01  2.612E-01  3.507E+00
112 |    91  102     0     0  con    0  1.0E+00  3.6E-01  3.503E-01  3.502E+00
113 |    92  103     0     0  con    0  1.0E+00  2.5E-01  8.487E-02  3.480E+00
114 |    93  104     0     0  con    0  1.0E+00  1.4E-01  5.536E-02  3.471E+00
115 |    94  105     0     0  con    0  1.0E+00  1.6E-01  1.217E-01  3.462E+00
116 |    95  106     0     0  con    0  1.0E+00  3.1E-01  2.016E-01  3.446E+00
117 |    96  107     0     0  con    0  1.0E+00  4.0E-01  1.968E-01  3.429E+00
118 |    97  108     0     0  con    0  1.0E+00  1.6E-01  8.453E-02  3.412E+00
119 |    98  109     0     0  con    0  1.0E+00  1.7E-01  1.233E-01  3.401E+00
120 |    99  110     0     0  con    0  1.0E+00  2.2E-01  2.034E-01  3.389E+00
121 |   100  111     0     0  con    0  1.0E+00  2.3E-01  3.737E-01  3.381E+00
122 |   101  112     0     0  con    0  1.0E+00  5.9E-02  9.743E-02  3.369E+00
123 |   102  113     0     0  con    0  1.0E+00  1.1E-01  8.841E-02  3.359E+00
124 | 
125 | STOP: TOTAL NO. of ITERATIONS EXCEEDS LIMIT                 
126 | 
127 |  Total User time 4.477E+02 seconds.
128 | 
129 | 


--------------------------------------------------------------------------------
/labels.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skritika/Sentiment-Analysis-using-Recursive-Autoencoders/831fa5d785ecf979eb433d9e942da2bd169dc033/labels.mat


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from helper import *
 2 | import scipy.io
 3 | from scipy.optimize import fmin_l_bfgs_b
 4 | 
 5 | d = 20
 6 | (t, l) = load()
 7 | s = range(len(t))
 8 | np.random.shuffle(s)
 9 | data = [t[i] for i in s]
10 | labels = [l[i] for i in s]
11 | 
12 | train_data = data[0:8]
13 | train_labels = labels[0:8]
14 | test_data = data[8:10]
15 | test_labels = labels[8:10]
16 | 
17 | 
18 | num_cat = 1
19 | dict_length = 14043
20 | alpha = 0.2
21 | beta = 0.5
22 | initv = init_theta(d,num_cat,dict_length)
23 | freq = [1/float(14043)]*14043
24 | fgrad = partial(backprop, 1, train_data, train_labels, freq, d, num_cat, dict_length, alpha, beta)
25 | fcost = partial(backprop, 0, train_data, train_labels, freq, d, num_cat, dict_length, alpha, beta)
26 | backprop(1, train_data, train_labels, freq, d, num_cat, dict_length, alpha, beta, initv)
27 | theta_min = fmin_l_bfgs_b(fcost, initv, fprime = fgrad, args=(), maxiter=100, disp=1)[0] 
28 | (W1,W2,W3,W4,Wcat,We,b1,b2,b3,bcat) = getW(theta_min, d, num_cat, dict_length)
29 | print "Accuracy on the test set is", accuracy(W1, W2, W3, W4, Wcat, We, b1, b2, b3, bcat, alpha, beta, freq, test_data, test_labels, d, num_cat)
30 | 
31 | 


--------------------------------------------------------------------------------