├── .idea ├── Recurrent_Neural_Network(RNN).iml ├── misc.xml ├── modules.xml └── workspace.xml ├── README.md └── RNN.py /.idea/Recurrent_Neural_Network(RNN).iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 38 | 39 | 40 | 46 | 47 | 48 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 75 | 76 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 104 | 105 | 121 | 122 | 138 | 139 | 150 | 151 | 169 | 170 | 188 | 189 | 209 | 210 | 231 | 232 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 269 | 270 | 271 | 272 | 1490476663557 273 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Recurrent Neural Networks 2 | ***This repository contains the code for Recurrent Neural Network from scratch using Python 3 and numpy.*** 3 | 4 | # Requirements 5 | **Numpy** 6 | 7 | # Sample Output 8 | ``` 9 | Error:[ 3.94375112] 10 | Pred:[1 1 1 1 1 1 1 1] 11 | True:[0 0 1 1 0 1 1 1] 12 | 28 + 27 = 255 13 | ------------ 14 | Error:[ 3.89378112] 15 | Pred:[0 0 1 0 1 0 1 0] 16 | True:[1 0 0 1 0 0 1 0] 17 | 21 + 125 = 42 18 | ------------ 19 | Error:[ 3.80079469] 20 | Pred:[1 1 1 0 1 0 0 1] 21 | True:[1 1 0 1 1 0 0 1] 22 | 100 + 117 = 233 23 | ------------ 24 | Error:[ 3.75256184] 25 | Pred:[0 0 0 1 0 0 0 0] 26 | True:[0 0 1 0 0 1 1 1] 27 | 12 + 27 = 16 28 | ------------ 29 | Error:[ 3.47163732] 30 | Pred:[0 0 0 1 0 1 0 1] 31 | True:[0 1 0 1 1 1 0 1] 32 | 67 + 26 = 21 33 | ------------ 34 | Error:[ 3.46614289] 35 | Pred:[1 1 1 0 0 1 1 0] 36 | True:[1 0 0 0 0 1 1 0] 37 | 92 + 42 = 230 38 | ------------ 39 | Error:[ 0.57723326] 40 | Pred:[0 1 1 1 0 1 1 0] 41 | True:[0 1 1 1 0 1 1 0] 42 | 86 + 32 = 118 43 | ------------ 44 | Error:[ 0.83430643] 45 | Pred:[1 1 1 0 1 0 1 0] 46 | True:[1 1 1 0 1 0 1 0] 47 | 107 + 127 = 234 48 | ------------ 49 | Error:[ 0.50010502] 50 | Pred:[0 0 1 0 1 0 0 0] 51 | True:[0 0 1 0 1 0 0 0] 52 | 15 + 25 = 40 53 | ------------ 54 | Error:[ 0.42438922] 55 | Pred:[0 1 1 1 0 1 1 1] 56 | True:[0 1 1 1 0 1 1 1] 57 | 28 + 91 = 119 58 | ------------ 59 | ``` 60 | 61 | **Note that the RNN keeps on training, predicting output values and collecting dJdW2 and dJdW1 values at each output stage. Once it reaches the last stage of an addition, it starts backpropagating all the errors till the first stage. Hence, after initial 3-4 steps it starts predicting the accurate output.** 62 | 63 | # Resources 64 | 65 | | S.No. | Papers / Blogs / Authors | Paper Links | 66 | | ------ | --------------------------------------------------------- | ---------------------------------------------------- | 67 | |1. |"A Critical Review of RNN for Sequence Learning" by Zachary C. Lipton| https://arxiv.org/pdf/1506.00019.pdf | 68 | |2. | "i am trask" Blog |https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/| 69 | |3. | Nikhil Buduma's Blog |http://nikhilbuduma.com/2015/01/11/a-deep-dive-into-recurrent-neural-networks/| 70 | 71 | 72 | -------------------------------------------------------------------------------- /RNN.py: -------------------------------------------------------------------------------- 1 | # Recurrent Neural Network from Scratch in Python 3 2 | 3 | import copy 4 | import numpy as np 5 | 6 | # np.random.seed(0) 7 | 8 | # Sigmoid Activation Function 9 | # To be applied at Hidden Layers and Output Layer 10 | def sigmoid(z): 11 | return (1 / (1 + np.exp(-z))) 12 | 13 | # Derivative of Sigmoid Function 14 | # Used in calculation of Back Propagation Loss 15 | def sigmoidPrime(z): 16 | return z * (1-z) 17 | 18 | 19 | # Generate Input Dataset 20 | int_to_binary = {} 21 | binary_dim = 8 22 | 23 | # Calculate the largest value which can be attained 24 | # 2^8 = 256 25 | max_val = (2**binary_dim) 26 | 27 | # Calculate Binary values for int from 0 to 256 28 | binary_val = np.unpackbits(np.array([range(max_val)], dtype=np.uint8).T, axis=1) 29 | 30 | # Function to map Integer values to Binary values 31 | for i in range(max_val): 32 | int_to_binary[i] = binary_val[i] 33 | # print('\nInteger value: ',i) 34 | # print('binary value: ', binary_val[i]) 35 | 36 | 37 | # NN variables 38 | learning_rate = 0.1 39 | 40 | # Inputs: Values to be added bit by bit 41 | inputLayerSize = 2 42 | 43 | # Hidden Layer with 16 neurons 44 | hiddenLayerSize = 16 45 | 46 | # Output at one time step is 1 bit 47 | outputLayerSize = 1 48 | 49 | # Initialize Weights 50 | # Weight of first Synapse (Synapse_0) from Input to Hidden Layer at Current Timestep 51 | W1 = 2 * np.random.random((inputLayerSize, hiddenLayerSize)) - 1 52 | 53 | # Weight of second Synapse (Synapse_1) from Hidden Layer to Output Layer 54 | W2 = 2 * np.random.random((hiddenLayerSize, outputLayerSize)) - 1 55 | 56 | # Weight of Synapse (Synapse_h) from Current Hidden Layer to Next Hidden Layer in Timestep 57 | W_h = 2 * np.random.random((hiddenLayerSize, hiddenLayerSize)) - 1 58 | 59 | 60 | # Initialize Updated Weights Values 61 | W1_update = np.zeros_like(W1) 62 | W2_update = np.zeros_like(W2) 63 | W_h_update = np.zeros_like(W_h) 64 | 65 | 66 | # Iterate over 10,000 samples for Training 67 | for j in range(10000): 68 | # ----------------------------- Compute True Values for the Sum (a+b) [binary encoded] -------------------------- 69 | # Generate a random sample value for 1st input 70 | a_int = np.random.randint(max_val/2) 71 | # Convert this Int value to Binary 72 | a = int_to_binary[a_int] 73 | 74 | # Generate a random sample value for 2nd input 75 | b_int = np.random.randint(max_val/2) 76 | # Map Int to Binary 77 | b = int_to_binary[b_int] 78 | 79 | # True Answer a + b = c 80 | c_int = a_int + b_int 81 | c = int_to_binary[c_int] 82 | 83 | # Array to save predicted outputs (binary encoded) 84 | d = np.zeros_like(c) 85 | 86 | # Initialize overall error to "0" 87 | overallError = 0 88 | 89 | # Save the values of dJdW1 and dJdW2 computed at Output layer into a list 90 | output_layer_deltas = list() 91 | 92 | # Save the values obtained at Hidden Layer of current state in a list to keep track 93 | hidden_layer_values = list() 94 | 95 | # Initially, there is no previous hidden state. So append "0" for that 96 | hidden_layer_values.append(np.zeros(hiddenLayerSize)) 97 | 98 | # ----------------------------- Compute the Values for (a+b) using RNN [Forward Propagation] ---------------------- 99 | # position: location of the bit amongst 8 bits; starting point "0"; "0 - 7" 100 | for position in range(binary_dim): 101 | # Generate Input Data for RNN 102 | # Take the binary values of "a" and "b" generated for each iteration of "j" 103 | 104 | # With increasing value of position, the bit location of "a" and "b" decreases from "7 -> 0" 105 | # and each iteration computes the sum of corresponding bit of "a" and "b". 106 | # ex. for position = 0, X = [a[7],b[7]], 7th bit of a and b. 107 | X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]]) 108 | 109 | # Actual value for (a+b) = c, c is an array of 8 bits, so take transpose to compare bit by bit with X value. 110 | y = np.array([[c[binary_dim - position - 1]]]).T 111 | 112 | # Values computed at current hidden layer 113 | # [dot product of Input(X) and Weights(W1)] + [dot product of previous hidden layer values and Weights (W_h)] 114 | # W_h: weight from previous step hidden layer to current step hidden layer 115 | # W1: weights from current step input to current hidden layer 116 | layer_1 = sigmoid(np.dot(X,W1) + np.dot(hidden_layer_values[-1],W_h)) 117 | 118 | # The new output using new Hidden layer values 119 | layer_2 = sigmoid(np.dot(layer_1, W2)) 120 | 121 | # Calculate the error 122 | output_error = y - layer_2 123 | 124 | # Save the error deltas at each step as it will be propagated back 125 | output_layer_deltas.append((output_error)*sigmoidPrime(layer_2)) 126 | 127 | # Save the sum of error at each binary position 128 | overallError += np.abs(output_error[0]) 129 | 130 | # Round off the values to nearest "0" or "1" and save it to a list 131 | d[binary_dim - position - 1] = np.round(layer_2[0][0]) 132 | 133 | # Save the hidden layer to be used later 134 | hidden_layer_values.append(copy.deepcopy(layer_1)) 135 | 136 | future_layer_1_delta = np.zeros(hiddenLayerSize) 137 | 138 | # ----------------------------------- Back Propagating the Error Values to All Previous Time-steps --------------------- 139 | for position in range(binary_dim): 140 | # a[0], b[0] -> a[1]b[1] .... 141 | X = np.array([[a[position], b[position]]]) 142 | # The last step Hidden Layer where we are currently a[0],b[0] 143 | layer_1 = hidden_layer_values[-position - 1] 144 | # The hidden layer before the current layer, a[1],b[1] 145 | prev_hidden_layer = hidden_layer_values[-position-2] 146 | # Errors at Output Layer, a[1],b[1] 147 | output_layer_delta = output_layer_deltas[-position-1] 148 | layer_1_delta = (future_layer_1_delta.dot(W_h.T) + output_layer_delta.dot(W2.T)) * sigmoidPrime(layer_1) 149 | 150 | # Update all the weights and try again 151 | W2_update += np.atleast_2d(layer_1).T.dot(output_layer_delta) 152 | W_h_update += np.atleast_2d(prev_hidden_layer).T.dot(layer_1_delta) 153 | W1_update += X.T.dot(layer_1_delta) 154 | 155 | future_layer_1_delta = layer_1_delta 156 | 157 | # Update the weights with the values 158 | W1 += W1_update * learning_rate 159 | W2 += W2_update * learning_rate 160 | W_h += W_h_update * learning_rate 161 | 162 | # Clear the updated weights values 163 | W1_update *= 0 164 | W2_update *= 0 165 | W_h_update *= 0 166 | 167 | 168 | # Print out the Progress of the RNN 169 | if (j % 1000 == 0): 170 | print("Error:" + str(overallError)) 171 | print("Pred:" + str(d)) 172 | print("True:" + str(c)) 173 | out = 0 174 | for index, x in enumerate(reversed(d)): 175 | out += x * pow(2, index) 176 | print(str(a_int) + " + " + str(b_int) + " = " + str(out)) 177 | print("------------") 178 | 179 | # ------------------------------------- EOC ----------------------------- --------------------------------------------------------------------------------