├── .gitattributes └── RNN Case Study └── RNNs_in_Python.ipynb /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /RNN Case Study/RNNs_in_Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "RNN in Python.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "thmejNEWhkk3" 23 | }, 24 | "source": [ 25 | "# RNN in Python\n", 26 | "In this colab notebook, you will get to know how various types of RNNs implemented. Feel free to experiment and also keep an eye on number of paramters shown in the model summary, try calculating it on your own." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "558BehfxXD_3" 33 | }, 34 | "source": [ 35 | "# import libraries\n", 36 | "import tensorflow as tf\n", 37 | "from tensorflow.keras.models import Sequential\n", 38 | "from tensorflow.keras.layers import Dense\n", 39 | "from tensorflow.keras.layers import SimpleRNN, LSTM, GRU\n", 40 | "from tensorflow.keras.layers import TimeDistributed, RepeatVector, Bidirectional" 41 | ], 42 | "execution_count": 13, 43 | "outputs": [] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "metadata": { 48 | "id": "Bcd21SkpYhkl" 49 | }, 50 | "source": [ 51 | "# define parameters\n", 52 | "n_output = 4 # number of classes in case of classification, 1 in case of regression\n", 53 | "output_activation = \"sigmoid\" # “softmax” or “sigmoid” in case of classification, “linear” in case of regression" 54 | ], 55 | "execution_count": 14, 56 | "outputs": [] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "fW9YA4sEY4UE" 62 | }, 63 | "source": [ 64 | "#### First, let's look at how to build a vanilla RNN in Keras. We will be using Simple RNN to build sample models for different types but you may replace it by GRU or LSTM without changing anything else." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "metadata": { 70 | "id": "PZtocp_3YxzB" 71 | }, 72 | "source": [ 73 | "vanilla_model = Sequential()\n", 74 | "\n", 75 | "# add the first hidden layer\n", 76 | "n_cells = 5 # number of cells to add in the hidden layer\n", 77 | "time_steps = 10 # length of sequences\n", 78 | "features = 4 # number of features of each entity in the sequence\n", 79 | "\n", 80 | "vanilla_model.add(SimpleRNN(n_cells, input_shape=(time_steps, features)))\n", 81 | "\n", 82 | "# add output layer\n", 83 | "vanilla_model.add(Dense(n_output, activation=output_activation))" 84 | ], 85 | "execution_count": 15, 86 | "outputs": [] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "metadata": { 91 | "colab": { 92 | "base_uri": "https://localhost:8080/" 93 | }, 94 | "id": "s3N57TGtZBwZ", 95 | "outputId": "0e2ff7fc-ac36-409f-d85d-a2bb802abf41" 96 | }, 97 | "source": [ 98 | "vanilla_model.summary()" 99 | ], 100 | "execution_count": 16, 101 | "outputs": [ 102 | { 103 | "output_type": "stream", 104 | "text": [ 105 | "Model: \"sequential_4\"\n", 106 | "_________________________________________________________________\n", 107 | "Layer (type) Output Shape Param # \n", 108 | "=================================================================\n", 109 | "simple_rnn_4 (SimpleRNN) (None, 5) 50 \n", 110 | "_________________________________________________________________\n", 111 | "dense_3 (Dense) (None, 4) 24 \n", 112 | "=================================================================\n", 113 | "Total params: 74\n", 114 | "Trainable params: 74\n", 115 | "Non-trainable params: 0\n", 116 | "_________________________________________________________________\n" 117 | ], 118 | "name": "stdout" 119 | } 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": { 125 | "id": "g1X2eZlph6IJ" 126 | }, 127 | "source": [ 128 | "Let's also learn to build multi-layer RNN model with two hidden RNN layers." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "metadata": { 134 | "id": "zPewur-8iH0y" 135 | }, 136 | "source": [ 137 | "multilayer_model = Sequential()\n", 138 | "\n", 139 | "# add the first hidden layer\n", 140 | "n_cells = 5 # number of cells to add in the hidden layer\n", 141 | "time_steps = 10 # length of sequences\n", 142 | "features = 4 # number of features of each entity in the sequence\n", 143 | "\n", 144 | "multilayer_model.add(SimpleRNN(n_cells, input_shape=(time_steps, features), return_sequences=True))\n", 145 | "\n", 146 | "multilayer_model.add(SimpleRNN(n_cells, input_shape=(time_steps, features)))\n", 147 | "\n", 148 | "# add output layer\n", 149 | "multilayer_model.add(Dense(n_output, activation=output_activation))" 150 | ], 151 | "execution_count": 17, 152 | "outputs": [] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "metadata": { 157 | "colab": { 158 | "base_uri": "https://localhost:8080/" 159 | }, 160 | "id": "XTv5Fn27iXBf", 161 | "outputId": "e197c9db-46f6-49d7-ac32-67fa6cc14837" 162 | }, 163 | "source": [ 164 | "multilayer_model.summary()" 165 | ], 166 | "execution_count": 18, 167 | "outputs": [ 168 | { 169 | "output_type": "stream", 170 | "text": [ 171 | "Model: \"sequential_5\"\n", 172 | "_________________________________________________________________\n", 173 | "Layer (type) Output Shape Param # \n", 174 | "=================================================================\n", 175 | "simple_rnn_5 (SimpleRNN) (None, 10, 5) 50 \n", 176 | "_________________________________________________________________\n", 177 | "simple_rnn_6 (SimpleRNN) (None, 5) 55 \n", 178 | "_________________________________________________________________\n", 179 | "dense_4 (Dense) (None, 4) 24 \n", 180 | "=================================================================\n", 181 | "Total params: 129\n", 182 | "Trainable params: 129\n", 183 | "Non-trainable params: 0\n", 184 | "_________________________________________________________________\n" 185 | ], 186 | "name": "stdout" 187 | } 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": { 193 | "id": "BsUh3EA9ZfCn" 194 | }, 195 | "source": [ 196 | "#### Let's look at how to build a many-to-one architecture in Keras." 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": { 202 | "id": "Ix5DMSGhIKbU" 203 | }, 204 | "source": [ 205 | "
\n", 206 | "\n", 207 | "![m20.png]()" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "metadata": { 213 | "id": "kSoGSVHpZMZ8" 214 | }, 215 | "source": [ 216 | "# instantiate model\n", 217 | "m2o_model = Sequential()\n", 218 | "\n", 219 | "# time_steps: multiple input, that is, one input at each timestep\n", 220 | "# A normal implementation returns only the RNN output at last time step and it is defined by the return_sequences, which is False by default.\n", 221 | "# In order to get output at each time step, you need to set return_sequences=True\n", 222 | "# Hence this is a many-to-one RNN model.\n", 223 | "m2o_model.add(SimpleRNN(n_cells, input_shape=(time_steps, features)))\n", 224 | "\n", 225 | "# single output at output layer\n", 226 | "m2o_model.add(Dense(n_output, activation=output_activation))" 227 | ], 228 | "execution_count": 19, 229 | "outputs": [] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "metadata": { 234 | "colab": { 235 | "base_uri": "https://localhost:8080/" 236 | }, 237 | "id": "8ICY-hf1aAjD", 238 | "outputId": "879230ec-f168-4403-f688-54ccddebe6d9" 239 | }, 240 | "source": [ 241 | "m2o_model.summary()" 242 | ], 243 | "execution_count": 20, 244 | "outputs": [ 245 | { 246 | "output_type": "stream", 247 | "text": [ 248 | "Model: \"sequential_6\"\n", 249 | "_________________________________________________________________\n", 250 | "Layer (type) Output Shape Param # \n", 251 | "=================================================================\n", 252 | "simple_rnn_7 (SimpleRNN) (None, 5) 50 \n", 253 | "_________________________________________________________________\n", 254 | "dense_5 (Dense) (None, 4) 24 \n", 255 | "=================================================================\n", 256 | "Total params: 74\n", 257 | "Trainable params: 74\n", 258 | "Non-trainable params: 0\n", 259 | "_________________________________________________________________\n" 260 | ], 261 | "name": "stdout" 262 | } 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": { 268 | "id": "0-CD0CRgaHyc" 269 | }, 270 | "source": [ 271 | "#### Let's look at how to build a many-to-many (with equal length of input and output sequences) architecture in Keras. TimeDistributed() funcion allows the dense layer to access output at each time step one by one.\n", 272 | "\n", 273 | "#### If you remove the Timedistributed() function and use plain dense layer as we did previously in many-to-one, the dense layer will be applied only at the last time step and get inputs of all the time steps at once. For example Sentiment analysis where you just need to apply dense layer once at the last time step.\n", 274 | "\n", 275 | "#### Where as in many-to-many RNN, Timedistributed() makes sure that dense layers gets applied on RNN outputs at each time step. Hence dense layer is active here at each time step. For example POS tagger where you need to have a dense layer at each time step." 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": { 281 | "id": "KudDswp1SxS8" 282 | }, 283 | "source": [ 284 | "
\n", 285 | "\n", 286 | "![m2m.png]()" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "metadata": { 292 | "id": "jhYSMcHpaD0u" 293 | }, 294 | "source": [ 295 | "# instantiate model\n", 296 | "m2m_model = Sequential()\n", 297 | "\n", 298 | "# time_steps: multiple input, that is, one input at each timestep\n", 299 | "# return_sequences=True returns all the outputs processed by RNN\n", 300 | "m2m_model.add(SimpleRNN(n_cells, input_shape=(time_steps, features), return_sequences=True))\n", 301 | "\n", 302 | "# TimeDistributed(): This function is used when you want your neural network \n", 303 | "# to provide an separate output at each timestep which is exactly what we want in the many-to-many RNN model.\n", 304 | "m2m_model.add(TimeDistributed(Dense(n_output, activation=output_activation)))" 305 | ], 306 | "execution_count": 21, 307 | "outputs": [] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "metadata": { 312 | "colab": { 313 | "base_uri": "https://localhost:8080/" 314 | }, 315 | "id": "ghlWLq1Za3uL", 316 | "outputId": "ca836538-b5a2-419a-ea01-bc94ebb1ac45" 317 | }, 318 | "source": [ 319 | "m2m_model.summary()" 320 | ], 321 | "execution_count": 22, 322 | "outputs": [ 323 | { 324 | "output_type": "stream", 325 | "text": [ 326 | "Model: \"sequential_7\"\n", 327 | "_________________________________________________________________\n", 328 | "Layer (type) Output Shape Param # \n", 329 | "=================================================================\n", 330 | "simple_rnn_8 (SimpleRNN) (None, 10, 5) 50 \n", 331 | "_________________________________________________________________\n", 332 | "time_distributed (TimeDistri (None, 10, 4) 24 \n", 333 | "=================================================================\n", 334 | "Total params: 74\n", 335 | "Trainable params: 74\n", 336 | "Non-trainable params: 0\n", 337 | "_________________________________________________________________\n" 338 | ], 339 | "name": "stdout" 340 | } 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": { 346 | "id": "ecuLOmh2d598" 347 | }, 348 | "source": [ 349 | "#### Let's look at how to build a one-to-many RNN in Keras." 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": { 355 | "id": "S6Ja0TQYS8ck" 356 | }, 357 | "source": [ 358 | "
\n", 359 | "\n", 360 | "![o2m.png]()" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "metadata": { 366 | "id": "kgAfWggrdvwa" 367 | }, 368 | "source": [ 369 | "# instantiate model\n", 370 | "o2m_model = Sequential()\n", 371 | "\n", 372 | "# time_steps is one in this case because the input consists of only one entity\n", 373 | "o2m_model.add(SimpleRNN(n_cells, input_shape=(1, features), return_sequences=True))\n", 374 | "\n", 375 | "# TimeDistributed(): multiple outputs at the output layer\n", 376 | "o2m_model.add(TimeDistributed(Dense(n_output, activation=output_activation)))" 377 | ], 378 | "execution_count": 26, 379 | "outputs": [] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "metadata": { 384 | "colab": { 385 | "base_uri": "https://localhost:8080/" 386 | }, 387 | "id": "qhA3xVaVd9av", 388 | "outputId": "04a3ed36-50a2-42e3-eda8-59e09f61ad42" 389 | }, 390 | "source": [ 391 | "o2m_model.summary()" 392 | ], 393 | "execution_count": 27, 394 | "outputs": [ 395 | { 396 | "output_type": "stream", 397 | "text": [ 398 | "Model: \"sequential_9\"\n", 399 | "_________________________________________________________________\n", 400 | "Layer (type) Output Shape Param # \n", 401 | "=================================================================\n", 402 | "simple_rnn_11 (SimpleRNN) (None, 1, 5) 50 \n", 403 | "_________________________________________________________________\n", 404 | "time_distributed_2 (TimeDist (None, 1, 4) 24 \n", 405 | "=================================================================\n", 406 | "Total params: 74\n", 407 | "Trainable params: 74\n", 408 | "Non-trainable params: 0\n", 409 | "_________________________________________________________________\n" 410 | ], 411 | "name": "stdout" 412 | } 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": { 418 | "id": "X4RG5w1-e8ng" 419 | }, 420 | "source": [ 421 | "#### Let's see how to build LSTM networks in Keras." 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "metadata": { 427 | "id": "4tk7mYa6emx_" 428 | }, 429 | "source": [ 430 | "# instantiate model\n", 431 | "lstm_model = Sequential()\n", 432 | "\n", 433 | "# replace the SimpleRNN() layer with LSTM() layer\n", 434 | "lstm_model.add(LSTM(n_cells, input_shape=(time_steps, features)))\n", 435 | "\n", 436 | "# output layer\n", 437 | "lstm_model.add(Dense(n_output, activation=output_activation))" 438 | ], 439 | "execution_count": 28, 440 | "outputs": [] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "metadata": { 445 | "colab": { 446 | "base_uri": "https://localhost:8080/" 447 | }, 448 | "id": "3v_c2GNOfG3S", 449 | "outputId": "342c48ce-a6f4-43d6-d3b2-9c570d637a5d" 450 | }, 451 | "source": [ 452 | "lstm_model.summary()" 453 | ], 454 | "execution_count": 29, 455 | "outputs": [ 456 | { 457 | "output_type": "stream", 458 | "text": [ 459 | "Model: \"sequential_10\"\n", 460 | "_________________________________________________________________\n", 461 | "Layer (type) Output Shape Param # \n", 462 | "=================================================================\n", 463 | "lstm (LSTM) (None, 5) 200 \n", 464 | "_________________________________________________________________\n", 465 | "dense_9 (Dense) (None, 4) 24 \n", 466 | "=================================================================\n", 467 | "Total params: 224\n", 468 | "Trainable params: 224\n", 469 | "Non-trainable params: 0\n", 470 | "_________________________________________________________________\n" 471 | ], 472 | "name": "stdout" 473 | } 474 | ] 475 | }, 476 | { 477 | "cell_type": "markdown", 478 | "metadata": { 479 | "id": "I4CgVFjWfQAB" 480 | }, 481 | "source": [ 482 | "#### Let's see how to build GRU networks in Keras" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "metadata": { 488 | "id": "uZX5eVEvfOvr" 489 | }, 490 | "source": [ 491 | "# instantiate model\n", 492 | "gru_model = Sequential()\n", 493 | "\n", 494 | "# replace the LSTM() layer with GRU() layer\n", 495 | "gru_model.add(GRU(n_cells, input_shape=(time_steps, features)))\n", 496 | "\n", 497 | "# output layer\n", 498 | "gru_model.add(Dense(n_output, activation=output_activation))" 499 | ], 500 | "execution_count": 30, 501 | "outputs": [] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "metadata": { 506 | "colab": { 507 | "base_uri": "https://localhost:8080/" 508 | }, 509 | "id": "R8zqspSZffSm", 510 | "outputId": "2e6cab05-25c0-4166-e70e-67b8feece3c3" 511 | }, 512 | "source": [ 513 | "gru_model.summary()" 514 | ], 515 | "execution_count": 31, 516 | "outputs": [ 517 | { 518 | "output_type": "stream", 519 | "text": [ 520 | "Model: \"sequential_11\"\n", 521 | "_________________________________________________________________\n", 522 | "Layer (type) Output Shape Param # \n", 523 | "=================================================================\n", 524 | "gru (GRU) (None, 5) 165 \n", 525 | "_________________________________________________________________\n", 526 | "dense_10 (Dense) (None, 4) 24 \n", 527 | "=================================================================\n", 528 | "Total params: 189\n", 529 | "Trainable params: 189\n", 530 | "Non-trainable params: 0\n", 531 | "_________________________________________________________________\n" 532 | ], 533 | "name": "stdout" 534 | } 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "metadata": { 540 | "id": "IWCkmuW9fhfZ" 541 | }, 542 | "source": [ 543 | "" 544 | ], 545 | "execution_count": 31, 546 | "outputs": [] 547 | } 548 | ] 549 | } --------------------------------------------------------------------------------