├── .idea
├── Recurrent_Neural_Network(RNN).iml
├── misc.xml
├── modules.xml
└── workspace.xml
├── README.md
└── RNN.py
/.idea/Recurrent_Neural_Network(RNN).iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 | 1490476663557
273 |
274 |
275 | 1490476663557
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Recurrent Neural Networks
2 | ***This repository contains the code for Recurrent Neural Network from scratch using Python 3 and numpy.***
3 |
4 | # Requirements
5 | **Numpy**
6 |
7 | # Sample Output
8 | ```
9 | Error:[ 3.94375112]
10 | Pred:[1 1 1 1 1 1 1 1]
11 | True:[0 0 1 1 0 1 1 1]
12 | 28 + 27 = 255
13 | ------------
14 | Error:[ 3.89378112]
15 | Pred:[0 0 1 0 1 0 1 0]
16 | True:[1 0 0 1 0 0 1 0]
17 | 21 + 125 = 42
18 | ------------
19 | Error:[ 3.80079469]
20 | Pred:[1 1 1 0 1 0 0 1]
21 | True:[1 1 0 1 1 0 0 1]
22 | 100 + 117 = 233
23 | ------------
24 | Error:[ 3.75256184]
25 | Pred:[0 0 0 1 0 0 0 0]
26 | True:[0 0 1 0 0 1 1 1]
27 | 12 + 27 = 16
28 | ------------
29 | Error:[ 3.47163732]
30 | Pred:[0 0 0 1 0 1 0 1]
31 | True:[0 1 0 1 1 1 0 1]
32 | 67 + 26 = 21
33 | ------------
34 | Error:[ 3.46614289]
35 | Pred:[1 1 1 0 0 1 1 0]
36 | True:[1 0 0 0 0 1 1 0]
37 | 92 + 42 = 230
38 | ------------
39 | Error:[ 0.57723326]
40 | Pred:[0 1 1 1 0 1 1 0]
41 | True:[0 1 1 1 0 1 1 0]
42 | 86 + 32 = 118
43 | ------------
44 | Error:[ 0.83430643]
45 | Pred:[1 1 1 0 1 0 1 0]
46 | True:[1 1 1 0 1 0 1 0]
47 | 107 + 127 = 234
48 | ------------
49 | Error:[ 0.50010502]
50 | Pred:[0 0 1 0 1 0 0 0]
51 | True:[0 0 1 0 1 0 0 0]
52 | 15 + 25 = 40
53 | ------------
54 | Error:[ 0.42438922]
55 | Pred:[0 1 1 1 0 1 1 1]
56 | True:[0 1 1 1 0 1 1 1]
57 | 28 + 91 = 119
58 | ------------
59 | ```
60 |
61 | **Note that the RNN keeps on training, predicting output values and collecting dJdW2 and dJdW1 values at each output stage. Once it reaches the last stage of an addition, it starts backpropagating all the errors till the first stage. Hence, after initial 3-4 steps it starts predicting the accurate output.**
62 |
63 | # Resources
64 |
65 | | S.No. | Papers / Blogs / Authors | Paper Links |
66 | | ------ | --------------------------------------------------------- | ---------------------------------------------------- |
67 | |1. |"A Critical Review of RNN for Sequence Learning" by Zachary C. Lipton| https://arxiv.org/pdf/1506.00019.pdf |
68 | |2. | "i am trask" Blog |https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/|
69 | |3. | Nikhil Buduma's Blog |http://nikhilbuduma.com/2015/01/11/a-deep-dive-into-recurrent-neural-networks/|
70 |
71 |
72 |
--------------------------------------------------------------------------------
/RNN.py:
--------------------------------------------------------------------------------
1 | # Recurrent Neural Network from Scratch in Python 3
2 |
3 | import copy
4 | import numpy as np
5 |
6 | # np.random.seed(0)
7 |
8 | # Sigmoid Activation Function
9 | # To be applied at Hidden Layers and Output Layer
10 | def sigmoid(z):
11 | return (1 / (1 + np.exp(-z)))
12 |
13 | # Derivative of Sigmoid Function
14 | # Used in calculation of Back Propagation Loss
15 | def sigmoidPrime(z):
16 | return z * (1-z)
17 |
18 |
19 | # Generate Input Dataset
20 | int_to_binary = {}
21 | binary_dim = 8
22 |
23 | # Calculate the largest value which can be attained
24 | # 2^8 = 256
25 | max_val = (2**binary_dim)
26 |
27 | # Calculate Binary values for int from 0 to 256
28 | binary_val = np.unpackbits(np.array([range(max_val)], dtype=np.uint8).T, axis=1)
29 |
30 | # Function to map Integer values to Binary values
31 | for i in range(max_val):
32 | int_to_binary[i] = binary_val[i]
33 | # print('\nInteger value: ',i)
34 | # print('binary value: ', binary_val[i])
35 |
36 |
37 | # NN variables
38 | learning_rate = 0.1
39 |
40 | # Inputs: Values to be added bit by bit
41 | inputLayerSize = 2
42 |
43 | # Hidden Layer with 16 neurons
44 | hiddenLayerSize = 16
45 |
46 | # Output at one time step is 1 bit
47 | outputLayerSize = 1
48 |
49 | # Initialize Weights
50 | # Weight of first Synapse (Synapse_0) from Input to Hidden Layer at Current Timestep
51 | W1 = 2 * np.random.random((inputLayerSize, hiddenLayerSize)) - 1
52 |
53 | # Weight of second Synapse (Synapse_1) from Hidden Layer to Output Layer
54 | W2 = 2 * np.random.random((hiddenLayerSize, outputLayerSize)) - 1
55 |
56 | # Weight of Synapse (Synapse_h) from Current Hidden Layer to Next Hidden Layer in Timestep
57 | W_h = 2 * np.random.random((hiddenLayerSize, hiddenLayerSize)) - 1
58 |
59 |
60 | # Initialize Updated Weights Values
61 | W1_update = np.zeros_like(W1)
62 | W2_update = np.zeros_like(W2)
63 | W_h_update = np.zeros_like(W_h)
64 |
65 |
66 | # Iterate over 10,000 samples for Training
67 | for j in range(10000):
68 | # ----------------------------- Compute True Values for the Sum (a+b) [binary encoded] --------------------------
69 | # Generate a random sample value for 1st input
70 | a_int = np.random.randint(max_val/2)
71 | # Convert this Int value to Binary
72 | a = int_to_binary[a_int]
73 |
74 | # Generate a random sample value for 2nd input
75 | b_int = np.random.randint(max_val/2)
76 | # Map Int to Binary
77 | b = int_to_binary[b_int]
78 |
79 | # True Answer a + b = c
80 | c_int = a_int + b_int
81 | c = int_to_binary[c_int]
82 |
83 | # Array to save predicted outputs (binary encoded)
84 | d = np.zeros_like(c)
85 |
86 | # Initialize overall error to "0"
87 | overallError = 0
88 |
89 | # Save the values of dJdW1 and dJdW2 computed at Output layer into a list
90 | output_layer_deltas = list()
91 |
92 | # Save the values obtained at Hidden Layer of current state in a list to keep track
93 | hidden_layer_values = list()
94 |
95 | # Initially, there is no previous hidden state. So append "0" for that
96 | hidden_layer_values.append(np.zeros(hiddenLayerSize))
97 |
98 | # ----------------------------- Compute the Values for (a+b) using RNN [Forward Propagation] ----------------------
99 | # position: location of the bit amongst 8 bits; starting point "0"; "0 - 7"
100 | for position in range(binary_dim):
101 | # Generate Input Data for RNN
102 | # Take the binary values of "a" and "b" generated for each iteration of "j"
103 |
104 | # With increasing value of position, the bit location of "a" and "b" decreases from "7 -> 0"
105 | # and each iteration computes the sum of corresponding bit of "a" and "b".
106 | # ex. for position = 0, X = [a[7],b[7]], 7th bit of a and b.
107 | X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]])
108 |
109 | # Actual value for (a+b) = c, c is an array of 8 bits, so take transpose to compare bit by bit with X value.
110 | y = np.array([[c[binary_dim - position - 1]]]).T
111 |
112 | # Values computed at current hidden layer
113 | # [dot product of Input(X) and Weights(W1)] + [dot product of previous hidden layer values and Weights (W_h)]
114 | # W_h: weight from previous step hidden layer to current step hidden layer
115 | # W1: weights from current step input to current hidden layer
116 | layer_1 = sigmoid(np.dot(X,W1) + np.dot(hidden_layer_values[-1],W_h))
117 |
118 | # The new output using new Hidden layer values
119 | layer_2 = sigmoid(np.dot(layer_1, W2))
120 |
121 | # Calculate the error
122 | output_error = y - layer_2
123 |
124 | # Save the error deltas at each step as it will be propagated back
125 | output_layer_deltas.append((output_error)*sigmoidPrime(layer_2))
126 |
127 | # Save the sum of error at each binary position
128 | overallError += np.abs(output_error[0])
129 |
130 | # Round off the values to nearest "0" or "1" and save it to a list
131 | d[binary_dim - position - 1] = np.round(layer_2[0][0])
132 |
133 | # Save the hidden layer to be used later
134 | hidden_layer_values.append(copy.deepcopy(layer_1))
135 |
136 | future_layer_1_delta = np.zeros(hiddenLayerSize)
137 |
138 | # ----------------------------------- Back Propagating the Error Values to All Previous Time-steps ---------------------
139 | for position in range(binary_dim):
140 | # a[0], b[0] -> a[1]b[1] ....
141 | X = np.array([[a[position], b[position]]])
142 | # The last step Hidden Layer where we are currently a[0],b[0]
143 | layer_1 = hidden_layer_values[-position - 1]
144 | # The hidden layer before the current layer, a[1],b[1]
145 | prev_hidden_layer = hidden_layer_values[-position-2]
146 | # Errors at Output Layer, a[1],b[1]
147 | output_layer_delta = output_layer_deltas[-position-1]
148 | layer_1_delta = (future_layer_1_delta.dot(W_h.T) + output_layer_delta.dot(W2.T)) * sigmoidPrime(layer_1)
149 |
150 | # Update all the weights and try again
151 | W2_update += np.atleast_2d(layer_1).T.dot(output_layer_delta)
152 | W_h_update += np.atleast_2d(prev_hidden_layer).T.dot(layer_1_delta)
153 | W1_update += X.T.dot(layer_1_delta)
154 |
155 | future_layer_1_delta = layer_1_delta
156 |
157 | # Update the weights with the values
158 | W1 += W1_update * learning_rate
159 | W2 += W2_update * learning_rate
160 | W_h += W_h_update * learning_rate
161 |
162 | # Clear the updated weights values
163 | W1_update *= 0
164 | W2_update *= 0
165 | W_h_update *= 0
166 |
167 |
168 | # Print out the Progress of the RNN
169 | if (j % 1000 == 0):
170 | print("Error:" + str(overallError))
171 | print("Pred:" + str(d))
172 | print("True:" + str(c))
173 | out = 0
174 | for index, x in enumerate(reversed(d)):
175 | out += x * pow(2, index)
176 | print(str(a_int) + " + " + str(b_int) + " = " + str(out))
177 | print("------------")
178 |
179 | # ------------------------------------- EOC -----------------------------
--------------------------------------------------------------------------------