├── README.md ├── recurrent network.ipynb ├── self_organizing_map_demo.ipynb └── simple_af_network.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # neural_networks 2 | This is the code for "Neural Networks - The Math of Intelligence #4" by Siraj Raval on Youtube 3 | 4 | 5 | ## Coding Challenge - Due Date, Thursday July 13 at 12 PM PST 6 | 7 | Create your own self organizing map implementation using numpy. Bonus points if you put your code in a Jupyter notebook and document your steps well. You can use any dataset you like, [here](https://archive.ics.uci.edu/ml/datasets.html?area=&att=&format=&numAtt=&numIns=&sort=nameUp&task=clu&type=&view=table) is a good place to find some datasets. Post your Github link in the comments section of the video. Good luck! 8 | 9 | ## Overview 10 | 11 | This is the code for [this](https://youtu.be/ov_RkIJptwE) video on Youtube by Siraj Raval as part of The Math of Intelligence Series. I go over 4 different neural networks in the video, and you can find them in this repository. 12 | 13 | ## Dependencies 14 | 15 | * numpy 16 | * copy 17 | * pil 18 | 19 | Install dependencies using [pip](https://pip.pypa.io/en/stable/) 20 | 21 | ## Usage 22 | 23 | The simple AF notebook contains the code for the network with the added hidden layer. For the simplest version, see [this](http://iamtrask.github.io/2015/07/12/basic-python-network/). The Recurrent notebook contains the recurrent code and the self organizing map contains the code for the unsupervised model. 24 | 25 | Type `jupyter notebook` in the terminal in the main directory and the code will appear in your browser. Install jupyter from [here](http://jupyter.readthedocs.io/en/latest/install.html) if you haven't yet. 26 | 27 | ## Credits 28 | 29 | Credits go to [Trask](http://iamtrask.github.io/2015/07/12/basic-python-network/) and [Litery](https://github.com/Litery). I've merely created a wrapper to get people started. 30 | 31 | ## Python 2/3 Troubleshooting 32 | 33 | ## Conda 34 | Install Conda https://conda.io/docs/installation.html 35 | 36 | //OSX / Linux Windows 37 | conda create -n maths python=3.5 38 | source activate maths 39 | conda install pandas matplotlib jupyter notebook scipy scikit-learn nb_conda 40 | 41 | 42 | -------------------------------------------------------------------------------- /recurrent network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#to save hidden layer\n", 12 | "import copy\n", 13 | "#matrix math\n", 14 | "import numpy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 10, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "# Input data - binary numbers for each integer from 0 to 256\n", 26 | "int_to_binary = {}\n", 27 | "binary_dim = 8\n", 28 | "max_val = (2**binary_dim) #2^8 = 256\n", 29 | "binary_val = np.unpackbits(np.array([range(max_val)], dtype=np.uint8).T, axis=1) # Calc Binary values for ints 0-256\n", 30 | "for i in range(max_val): # map Integer values to Binary values\n", 31 | " int_to_binary[i] = binary_val[i]" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 11, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# sigmoid function\n", 43 | "def activate(x,deriv=False):\n", 44 | " if(deriv==True):\n", 45 | " return x*(1-x)\n", 46 | " return 1/(1+np.exp(-x))" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 17, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "Error:[ 4.07553155]\n", 61 | "Pred:[0 0 1 1 1 1 0 0]\n", 62 | "True:[0 1 1 0 0 0 1 1]\n", 63 | "62 + 37 = 60\n", 64 | "------------\n", 65 | "Error:[ 1.7905746]\n", 66 | "Pred:[1 0 1 0 1 0 0 1]\n", 67 | "True:[1 0 1 0 1 0 0 1]\n", 68 | "67 + 102 = 169\n", 69 | "------------\n", 70 | "Error:[ 0.67219956]\n", 71 | "Pred:[1 0 1 0 0 1 0 0]\n", 72 | "True:[1 0 1 0 0 1 0 0]\n", 73 | "39 + 125 = 164\n", 74 | "------------\n", 75 | "Error:[ 0.27309937]\n", 76 | "Pred:[1 0 1 0 1 0 1 0]\n", 77 | "True:[1 0 1 0 1 0 1 0]\n", 78 | "83 + 87 = 170\n", 79 | "------------\n", 80 | "Error:[ 0.09328066]\n", 81 | "Pred:[0 0 0 1 1 1 1 1]\n", 82 | "True:[0 0 0 1 1 1 1 1]\n", 83 | "22 + 9 = 31\n", 84 | "------------\n", 85 | "Error:[ 0.14276206]\n", 86 | "Pred:[0 1 0 0 0 0 1 1]\n", 87 | "True:[0 1 0 0 0 0 1 1]\n", 88 | "59 + 8 = 67\n", 89 | "------------\n", 90 | "Error:[ 0.14549186]\n", 91 | "Pred:[1 1 0 1 1 0 1 1]\n", 92 | "True:[1 1 0 1 1 0 1 1]\n", 93 | "109 + 110 = 219\n", 94 | "------------\n", 95 | "Error:[ 0.09007097]\n", 96 | "Pred:[0 1 0 1 1 0 1 0]\n", 97 | "True:[0 1 0 1 1 0 1 0]\n", 98 | "54 + 36 = 90\n", 99 | "------------\n", 100 | "Error:[ 0.1074296]\n", 101 | "Pred:[1 1 0 1 0 0 1 0]\n", 102 | "True:[1 1 0 1 0 0 1 0]\n", 103 | "89 + 121 = 210\n", 104 | "------------\n", 105 | "Error:[ 0.09466737]\n", 106 | "Pred:[1 0 0 0 1 1 1 0]\n", 107 | "True:[1 0 0 0 1 1 1 0]\n", 108 | "85 + 57 = 142\n", 109 | "------------\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "#hyperparameters\n", 115 | "inputLayerSize = 2\n", 116 | "hiddenLayerSize = 16\n", 117 | "outputLayerSize = 1\n", 118 | "\n", 119 | "# 3 weight values\n", 120 | "W1 = 2 * np.random.random((inputLayerSize, hiddenLayerSize)) - 1\n", 121 | "W2 = 2 * np.random.random((hiddenLayerSize, outputLayerSize)) - 1\n", 122 | "W_h = 2 * np.random.random((hiddenLayerSize, hiddenLayerSize)) - 1 #Current h to h in next Timestep, recurrence!\n", 123 | "\n", 124 | "# Initialize Updated Weights Values\n", 125 | "W1_update = np.zeros_like(W1)\n", 126 | "W2_update = np.zeros_like(W2)\n", 127 | "W_h_update = np.zeros_like(W_h)\n", 128 | "\n", 129 | "#Compute the the Sum of two integers \n", 130 | "for j in range(10000):\n", 131 | " \n", 132 | " #a + b = c (random values)\n", 133 | " a_int = np.random.randint(max_val/2)\n", 134 | " b_int = np.random.randint(max_val/2)\n", 135 | " c_int = a_int + b_int\n", 136 | " \n", 137 | " # get binary values for a,b, and c\n", 138 | " a = int_to_binary[a_int]\n", 139 | " b = int_to_binary[b_int]\n", 140 | " c = int_to_binary[c_int]\n", 141 | "\n", 142 | " # Save predicted binary outputs \n", 143 | " d = np.zeros_like(c)\n", 144 | "\n", 145 | " #Initialize Error\n", 146 | " overallError = 0\n", 147 | "\n", 148 | " # Store output gradients & hidden layer values\n", 149 | " output_layer_gradients = list()\n", 150 | " hidden_layer_values = list()\n", 151 | " hidden_layer_values.append(np.zeros(hiddenLayerSize))#init as 0\n", 152 | "\n", 153 | " # Forward propagation to compute the sum of two 8 digit long binary integers\n", 154 | " for position in range(binary_dim):\n", 155 | " \n", 156 | " #input - binary values of a & b\n", 157 | " X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]])\n", 158 | " #output - the sum c\n", 159 | " y = np.array([[c[binary_dim - position - 1]]]).T\n", 160 | "\n", 161 | " # Calculate the error\n", 162 | " layer_1 = activate(np.dot(X,W1) + np.dot(hidden_layer_values[-1],W_h))\n", 163 | " layer_2 = activate(np.dot(layer_1, W2))\n", 164 | " output_error = y - layer_2\n", 165 | "\n", 166 | " # Save the error gradients at each step as it will be propagated back\n", 167 | " output_layer_gradients.append((output_error)*activate(layer_2, deriv=True))\n", 168 | "\n", 169 | " # Save the sum of error at each binary position\n", 170 | " overallError += np.abs(output_error[0])\n", 171 | "\n", 172 | " # Round off the values to nearest \"0\" or \"1\" and save it to a list\n", 173 | " d[binary_dim - position - 1] = np.round(layer_2[0][0])\n", 174 | "\n", 175 | " # Save the hidden layer to be used later\n", 176 | " hidden_layer_values.append(copy.deepcopy(layer_1))\n", 177 | "\n", 178 | " future_layer_1_gradient = np.zeros(hiddenLayerSize)\n", 179 | "\n", 180 | " #backpropagate the error to the previous timesteps!\n", 181 | " for position in range(binary_dim):\n", 182 | " # a[0], b[0] -> a[1]b[1] ....\n", 183 | " X = np.array([[a[position], b[position]]])\n", 184 | " # The last step Hidden Layer where we are currently a[0],b[0]\n", 185 | " layer_1 = hidden_layer_values[-position - 1]\n", 186 | " # The hidden layer before the current layer, a[1],b[1]\n", 187 | " prev_hidden_layer = hidden_layer_values[-position-2]\n", 188 | " # Errors at Output Layer, a[1],b[1]\n", 189 | " output_layer_gradient = output_layer_gradients[-position-1]\n", 190 | " layer_1_gradients = (future_layer_1_gradient.dot(W_h.T) + output_layer_gradient.dot(W2.T)) * activate(layer_1, deriv=True)\n", 191 | "\n", 192 | " # Update all the weights and try again\n", 193 | " W2_update += np.atleast_2d(layer_1).T.dot(output_layer_gradient)\n", 194 | " W_h_update += np.atleast_2d(prev_hidden_layer).T.dot(layer_1_gradient)\n", 195 | " W1_update += X.T.dot(layer_1_gradients)\n", 196 | "\n", 197 | " future_layer_1_gradient = layer_1_gradients\n", 198 | "\n", 199 | " # Update the weights with the values\n", 200 | " W1 += W1_update \n", 201 | " W2 += W2_update\n", 202 | " W_h += W_h_update \n", 203 | "\n", 204 | " # Clear the updated weights values\n", 205 | " W1_update *= 0\n", 206 | " W2_update *= 0\n", 207 | " W_h_update *= 0\n", 208 | " \n", 209 | " # Print out the Progress of the RNN\n", 210 | " if (j % 1000 == 0):\n", 211 | " print(\"Error:\" + str(overallError))\n", 212 | " print(\"Pred:\" + str(d))\n", 213 | " print(\"True:\" + str(c))\n", 214 | " out = 0\n", 215 | " for index, x in enumerate(reversed(d)):\n", 216 | " out += x * pow(2, index)\n", 217 | " print(str(a_int) + \" + \" + str(b_int) + \" = \" + str(out))\n", 218 | " print(\"------------\")" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": true 226 | }, 227 | "outputs": [], 228 | "source": [] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Python 2", 234 | "language": "python", 235 | "name": "python2" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 2 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython2", 247 | "version": "2.7.12" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 2 252 | } 253 | -------------------------------------------------------------------------------- /self_organizing_map_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import math\n", 13 | "from PIL import Image\n", 14 | "\n", 15 | "\n", 16 | "class SOM:\n", 17 | " def __init__(self, x_size, y_size, trait_num, t_iter, t_step):\n", 18 | " self.weights = np.random.randint(256, size=(x_size, y_size, trait_num)).astype('float64')\n", 19 | " self.t_iter = t_iter\n", 20 | " self.map_radius = max(self.weights.shape)/2\n", 21 | " self.t_const = self.t_iter/math.log(self.map_radius)\n", 22 | " self.t_step = t_step\n", 23 | "\n", 24 | " def show(self):\n", 25 | " im = Image.fromarray(self.weights.astype('uint8'), mode='RGB')\n", 26 | " im.format = 'JPG'\n", 27 | " im.show()\n", 28 | "\n", 29 | " def distance_matrix(self, vector):\n", 30 | " return np.sum((self.weights - vector) ** 2, 2)\n", 31 | "\n", 32 | " def bmu(self, vector):\n", 33 | " distance = self.distance_matrix(vector)\n", 34 | " return np.unravel_index(distance.argmin(), distance.shape)\n", 35 | "\n", 36 | " def bmu_distance(self, vector):\n", 37 | " x, y, rgb = self.weights.shape\n", 38 | " xi = np.arange(x).reshape(x, 1).repeat(y, 1)\n", 39 | " yi = np.arange(y).reshape(1, y).repeat(x, 0)\n", 40 | " return np.sum((np.dstack((xi, yi)) - np.array(self.bmu(vector))) ** 2, 2)\n", 41 | "\n", 42 | " def hood_radius(self, iteration):\n", 43 | " return self.map_radius * math.exp(-iteration/self.t_const)\n", 44 | "\n", 45 | " def teach_row(self, vector, i, dis_cut, dist):\n", 46 | " hood_radius_2 = self.hood_radius(i) ** 2\n", 47 | " bmu_distance = self.bmu_distance(vector).astype('float64')\n", 48 | " if dist is None:\n", 49 | " temp = hood_radius_2 - bmu_distance\n", 50 | " else:\n", 51 | " temp = dist ** 2 - bmu_distance\n", 52 | " influence = np.exp(-bmu_distance / (2 * hood_radius_2))\n", 53 | " if dis_cut:\n", 54 | " influence *= ((np.sign(temp) + 1) / 2)\n", 55 | " return np.expand_dims(influence, 2) * (vector - self.weights)\n", 56 | "\n", 57 | " def teach(self, t_set, distance_cutoff=False, distance=None):\n", 58 | " for i in range(self.t_iter):\n", 59 | " for x in t_set:\n", 60 | " self.weights += self.teach_row(x, i, distance_cutoff, distance)\n", 61 | " self.show()\n", 62 | "\n", 63 | "s = SOM(200, 200, 3, 100, 0.1)\n", 64 | "# t_set = np.array([[200, 0, 0], [0, 200, 0], [0, 0, 200], [120, 0, 100]])\n", 65 | "t_set = np.random.randint(256, size=(15, 3))\n", 66 | "s.teach(t_set)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [] 77 | } 78 | ], 79 | "metadata": { 80 | "kernelspec": { 81 | "display_name": "Python 2", 82 | "language": "python", 83 | "name": "python2" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 2 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython2", 95 | "version": "2.7.12" 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 2 100 | } 101 | -------------------------------------------------------------------------------- /simple_af_network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 131, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#dependencies (matrix math) \n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 132, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "[[0 0 1]\n", 27 | " [0 1 1]\n", 28 | " [1 0 1]\n", 29 | " [1 1 1]]\n", 30 | "[[0]\n", 31 | " [1]\n", 32 | " [1]\n", 33 | " [0]]\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "#input data\n", 39 | "input_data = np.array([[0,0,1],\n", 40 | " [0,1,1],\n", 41 | " [1,0,1],\n", 42 | " [1,1,1]])\n", 43 | " \n", 44 | "output_labels = np.array([[0],\n", 45 | " [1],\n", 46 | " [1],\n", 47 | " [0]])\n", 48 | "\n", 49 | "print(input_data)\n", 50 | "print(output_labels)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "collapsed": true 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "# sigmoid function\n", 62 | "def activate(x,deriv=False):\n", 63 | " if(deriv==True):\n", 64 | " return x*(1-x)\n", 65 | " return 1/(1+np.exp(-x))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 142, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "[[ 0.04934062 0.84961594 -0.47340646 -0.86807782]\n", 80 | " [ 0.47013193 0.54435606 0.81563171 0.86394414]\n", 81 | " [-0.97209685 -0.53127583 0.23355671 0.89803264]]\n", 82 | "[[ 0.90035224]\n", 83 | " [ 0.11330638]\n", 84 | " [ 0.8312127 ]\n", 85 | " [ 0.28313242]]\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "# 2 weight values\n", 91 | "synaptic_weight_0 = 2*np.random.random((3,4)) - 1\n", 92 | "synaptic_weight_1 = 2*np.random.random((4,1)) - 1\n", 93 | "\n", 94 | "print(synaptic_weight_0)\n", 95 | "print(synaptic_weight_1)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 141, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "Error:3.17606213494e-05\n", 110 | "Error:2.93162545087e-05\n", 111 | "Error:2.72212348725e-05\n", 112 | "Error:2.5405665823e-05\n", 113 | "Error:2.38171310776e-05\n", 114 | "Error:2.24155517423e-05\n" 115 | ] 116 | } 117 | ], 118 | "source": [ 119 | "for j in xrange(60000):\n", 120 | "\n", 121 | "\t# Forward propagate through layers 0, 1, and 2\n", 122 | " layer0 = input_data\n", 123 | " layer1 = activate(np.dot(layer0,synaptic_weight_0))\n", 124 | " layer2 = activate(np.dot(layer1,synaptic_weight_1))\n", 125 | "\n", 126 | " #calculate error for layer 2\n", 127 | " layer2_error = output_labels - layer2\n", 128 | " \n", 129 | " if (j% 10000) == 0:\n", 130 | " print \"Error:\" + str(np.mean(np.abs(layer2_error)))\n", 131 | " \n", 132 | " #Use it to compute the gradient\n", 133 | " layer2_gradient = l2_error*activate(layer2,deriv=True)\n", 134 | "\n", 135 | " #calculate error for layer 1\n", 136 | " layer1_error = layer2_gradient.dot(synaptic_weight_1.T)\n", 137 | " \n", 138 | " #Use it to compute its gradient\n", 139 | " layer1_gradient = layer1_error * activate(layer1,deriv=True)\n", 140 | " \n", 141 | " #update the weights using the gradients\n", 142 | " synaptic_weight_1 += layer1.T.dot(layer2_gradient)\n", 143 | " synaptic_weight_0 += layer0.T.dot(layer1_gradient)\n" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 139, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "[ 0.99973427 0.98488354 0.01181281 0.96003643]\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "#testing\n", 163 | "print(activate(np.dot(array([0, 1, 1]), syn0)))" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [] 174 | } 175 | ], 176 | "metadata": { 177 | "kernelspec": { 178 | "display_name": "Python 2", 179 | "language": "python", 180 | "name": "python2" 181 | }, 182 | "language_info": { 183 | "codemirror_mode": { 184 | "name": "ipython", 185 | "version": 2 186 | }, 187 | "file_extension": ".py", 188 | "mimetype": "text/x-python", 189 | "name": "python", 190 | "nbconvert_exporter": "python", 191 | "pygments_lexer": "ipython2", 192 | "version": "2.7.12" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 2 197 | } 198 | --------------------------------------------------------------------------------