├── README.md
└── rnn.py


/README.md:
--------------------------------------------------------------------------------
 1 | # RNN Demo
 2 | 
 3 | Overview
 4 | ============
 5 | This project helps train a Recurrent neural net to predict the sum of two binary numbers.
 6 | 
 7 | Dependencies
 8 | ============
 9 | * numpy
10 | * copy
11 | 
12 | Use [pip](https://pypi.python.org/pypi/pip) to install any missing dependencies
13 | 
14 | Basic Usage
15 | ===========
16 | 
17 | Run rnn.py, that's it! 
18 | 
19 | 
20 | 
21 | Credits
22 | ===========
23 | Credit for the vast majority of code here goes to @iamtrask! I've merely created a wrapper around all of the important functions to get people started.
24 | 


--------------------------------------------------------------------------------
/rnn.py:
--------------------------------------------------------------------------------
  1 | import copy, numpy as np
  2 | np.random.seed(0)
  3 | 
  4 | # compute sigmoid nonlinearity
  5 | def sigmoid(x):
  6 |     output = 1/(1+np.exp(-x))
  7 |     return output
  8 | 
  9 | # convert output of sigmoid function to its derivative
 10 | def sigmoid_output_to_derivative(output):
 11 |     return output*(1-output)
 12 | 
 13 | # training dataset generation
 14 | int2binary = {}
 15 | binary_dim = 8
 16 | 
 17 | largest_number = pow(2,binary_dim)
 18 | binary = np.unpackbits(
 19 |     np.array([range(largest_number)],dtype=np.uint8).T,axis=1)
 20 | for i in range(largest_number):
 21 |     int2binary[i] = binary[i]
 22 | 
 23 | # input variables
 24 | alpha = 0.1
 25 | input_dim = 2
 26 | hidden_dim = 16
 27 | output_dim = 1
 28 | 
 29 | 
 30 | # initialize neural network weights
 31 | synapse_0 = 2*np.random.random((input_dim,hidden_dim)) - 1
 32 | synapse_1 = 2*np.random.random((hidden_dim,output_dim)) - 1
 33 | synapse_h = 2*np.random.random((hidden_dim,hidden_dim)) - 1
 34 | 
 35 | synapse_0_update = np.zeros_like(synapse_0)
 36 | synapse_1_update = np.zeros_like(synapse_1)
 37 | synapse_h_update = np.zeros_like(synapse_h)
 38 | 
 39 | # training logic
 40 | for j in range(10000):
 41 |     
 42 |     # generate a simple addition problem (a + b = c)
 43 |     a_int = np.random.randint(largest_number/2) # int version
 44 |     a = int2binary[a_int] # binary encoding
 45 | 
 46 |     b_int = np.random.randint(largest_number/2) # int version
 47 |     b = int2binary[b_int] # binary encoding
 48 | 
 49 |     # true answer
 50 |     c_int = a_int + b_int
 51 |     c = int2binary[c_int]
 52 |     
 53 |     # where we'll store our best guess (binary encoded)
 54 |     d = np.zeros_like(c)
 55 | 
 56 |     overallError = 0
 57 |     
 58 |     layer_2_deltas = list()
 59 |     layer_1_values = list()
 60 |     layer_1_values.append(np.zeros(hidden_dim))
 61 |     
 62 |     # moving along the positions in the binary encoding
 63 |     for position in range(binary_dim):
 64 |         
 65 |         # generate input and output
 66 |         X = np.array([[a[binary_dim - position - 1],b[binary_dim - position - 1]]])
 67 |         y = np.array([[c[binary_dim - position - 1]]]).T
 68 | 
 69 |         # hidden layer (input ~+ prev_hidden)
 70 |         layer_1 = sigmoid(np.dot(X,synapse_0) + np.dot(layer_1_values[-1],synapse_h))
 71 | 
 72 |         # output layer (new binary representation)
 73 |         layer_2 = sigmoid(np.dot(layer_1,synapse_1))
 74 | 
 75 |         # did we miss?... if so, by how much?
 76 |         layer_2_error = y - layer_2
 77 |         layer_2_deltas.append((layer_2_error)*sigmoid_output_to_derivative(layer_2))
 78 |         overallError += np.abs(layer_2_error[0])
 79 |     
 80 |         # decode estimate so we can print it out
 81 |         d[binary_dim - position - 1] = np.round(layer_2[0][0])
 82 |         
 83 |         # store hidden layer so we can use it in the next timestep
 84 |         layer_1_values.append(copy.deepcopy(layer_1))
 85 |     
 86 |     future_layer_1_delta = np.zeros(hidden_dim)
 87 |     
 88 |     for position in range(binary_dim):
 89 |         
 90 |         X = np.array([[a[position],b[position]]])
 91 |         layer_1 = layer_1_values[-position-1]
 92 |         prev_layer_1 = layer_1_values[-position-2]
 93 |         
 94 |         # error at output layer
 95 |         layer_2_delta = layer_2_deltas[-position-1]
 96 |         # error at hidden layer
 97 |         layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) + layer_2_delta.dot(synapse_1.T)) * sigmoid_output_to_derivative(layer_1)
 98 | 
 99 |         # let's update all our weights so we can try again
100 |         synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
101 |         synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
102 |         synapse_0_update += X.T.dot(layer_1_delta)
103 |         
104 |         future_layer_1_delta = layer_1_delta
105 |     
106 | 
107 |     synapse_0 += synapse_0_update * alpha
108 |     synapse_1 += synapse_1_update * alpha
109 |     synapse_h += synapse_h_update * alpha    
110 | 
111 |     synapse_0_update *= 0
112 |     synapse_1_update *= 0
113 |     synapse_h_update *= 0
114 |     
115 |     # print out progress
116 |     if(j % 1000 == 0):
117 |         print "Error:" + str(overallError)
118 |         print "Pred:" + str(d)
119 |         print "True:" + str(c)
120 |         out = 0
121 |         for index,x in enumerate(reversed(d)):
122 |             out += x*pow(2,index)
123 |         print str(a_int) + " + " + str(b_int) + " = " + str(out)
124 |         print "------------"


--------------------------------------------------------------------------------