├── .gitattributes
├── README.md
├── Week 1
    ├── Exercise_question.ipynb
    ├── Quiz 1.pdf
    └── exercise_question.py
├── Week 2
    ├── Course_3_Week_2_Exercise_Question.ipynb
    ├── Course_3_Week_2_Lesson_1.ipynb
    ├── Quiz 2.pdf
    ├── course_3_week_2_exercise_question.py
    └── course_3_week_2_lesson_1.py
├── Week 3
    ├── Course_3_Week_3_Lesson_1a.ipynb
    ├── Course_3_Week_3_Lesson_1b.ipynb
    ├── Course_3_Week_3_Lesson_1c.ipynb
    ├── Course_3_Week_3_Lesson_2.ipynb
    ├── Course_3_Week_3_Lesson_2c.ipynb
    ├── Course_3_Week_3_Lesson_2d.ipynb
    ├── NLP_Course_Week_3_Exercise_Question.ipynb
    ├── Quiz 3.pdf
    ├── course_3_week_3_lesson_1a.py
    ├── course_3_week_3_lesson_1b.py
    ├── course_3_week_3_lesson_1c.py
    ├── course_3_week_3_lesson_2.py
    ├── course_3_week_3_lesson_2c.py
    ├── course_3_week_3_lesson_2d.py
    └── nlp_course_week_3_exercise_question.py
└── Week 4
    ├── NLP_Week4_Exercise_Shakespeare_Question.ipynb
    ├── Quiz 4.pdf
    └── nlp_week4_exercise_shakespeare_question.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NLP_In_Tensorflow-Course
 2 | 
 3 | This repository contains Excercise Notebooks of Course 3-Natural Language Processing in Tensorflow of **Tensorflow in Practice Specialization.**
 4 | 
 5 | #### Download Dataset for Week 1, 2 Exercise Notebook:
 6 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv
 7 | 
 8 | #### Download Dataset for Week 3 Exercise Notebook:
 9 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/training_cleaned.csv
10 | 
11 | #### Download Dataset for Week 4 Exercise Notebook:
12 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt
13 | 


--------------------------------------------------------------------------------
/Week 1/Exercise_question.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Exercise-question.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "cell_type": "code",
 18 |       "metadata": {
 19 |         "id": "zrZevCPJ92HG",
 20 |         "colab_type": "code",
 21 |         "colab": {
 22 |           "base_uri": "https://localhost:8080/",
 23 |           "height": 204
 24 |         },
 25 |         "outputId": "be1e58bc-3329-404a-e167-c08b7f6aba73"
 26 |       },
 27 |       "source": [
 28 |         "!wget --no-check-certificate \\\n",
 29 |         "    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \\\n",
 30 |         "    -O /tmp/bbc-text.csv\n",
 31 |         "\n",
 32 |         "  \n",
 33 |         "import csv\n",
 34 |         "from tensorflow.keras.preprocessing.text import Tokenizer\n",
 35 |         "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
 36 |         "\n",
 37 |         "\n",
 38 |         "#Stopwords list from https://github.com/Yoast/YoastSEO.js/blob/develop/src/config/stopwords.js\n",
 39 |         "# Convert it to a Python list and paste it here\n",
 40 |         "stopwords = [ \"a\", \"about\", \"above\", \"after\", \"again\", \"against\", \"all\", \"am\", \"an\", \"and\", \"any\", \"are\", \"as\", \"at\", \"be\", \"because\", \"been\", \"before\", \"being\", \"below\", \"between\", \"both\", \"but\", \"by\", \"could\", \"did\", \"do\", \"does\", \"doing\", \"down\", \"during\", \"each\", \"few\", \"for\", \"from\", \"further\", \"had\", \"has\", \"have\", \"having\", \"he\", \"he'd\", \"he'll\", \"he's\", \"her\", \"here\", \"here's\", \"hers\", \"herself\", \"him\", \"himself\", \"his\", \"how\", \"how's\", \"i\", \"i'd\", \"i'll\", \"i'm\", \"i've\", \"if\", \"in\", \"into\", \"is\", \"it\", \"it's\", \"its\", \"itself\", \"let's\", \"me\", \"more\", \"most\", \"my\", \"myself\", \"nor\", \"of\", \"on\", \"once\", \"only\", \"or\", \"other\", \"ought\", \"our\", \"ours\", \"ourselves\", \"out\", \"over\", \"own\", \"same\", \"she\", \"she'd\", \"she'll\", \"she's\", \"should\", \"so\", \"some\", \"such\", \"than\", \"that\", \"that's\", \"the\", \"their\", \"theirs\", \"them\", \"themselves\", \"then\", \"there\", \"there's\", \"these\", \"they\", \"they'd\", \"they'll\", \"they're\", \"they've\", \"this\", \"those\", \"through\", \"to\", \"too\", \"under\", \"until\", \"up\", \"very\", \"was\", \"we\", \"we'd\", \"we'll\", \"we're\", \"we've\", \"were\", \"what\", \"what's\", \"when\", \"when's\", \"where\", \"where's\", \"which\", \"while\", \"who\", \"who's\", \"whom\", \"why\", \"why's\", \"with\", \"would\", \"you\", \"you'd\", \"you'll\", \"you're\", \"you've\", \"your\", \"yours\", \"yourself\", \"yourselves\" ] \n",
 41 |         "#YOUR CODE HERE\n"
 42 |       ],
 43 |       "execution_count": 1,
 44 |       "outputs": [
 45 |         {
 46 |           "output_type": "stream",
 47 |           "text": [
 48 |             "--2019-06-22 00:09:58--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv\n",
 49 |             "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.141.128, 2607:f8b0:400c:c06::80\n",
 50 |             "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.141.128|:443... connected.\n",
 51 |             "HTTP request sent, awaiting response... 200 OK\n",
 52 |             "Length: 5057493 (4.8M) [application/octet-stream]\n",
 53 |             "Saving to: ‘/tmp/bbc-text.csv’\n",
 54 |             "\n",
 55 |             "\r/tmp/bbc-text.csv     0%[                    ]       0  --.-KB/s               \r/tmp/bbc-text.csv   100%[===================>]   4.82M  --.-KB/s    in 0.02s   \n",
 56 |             "\n",
 57 |             "2019-06-22 00:09:59 (206 MB/s) - ‘/tmp/bbc-text.csv’ saved [5057493/5057493]\n",
 58 |             "\n"
 59 |           ],
 60 |           "name": "stdout"
 61 |         }
 62 |       ]
 63 |     },
 64 |     {
 65 |       "cell_type": "code",
 66 |       "metadata": {
 67 |         "id": "1rmYBjsyCv3K",
 68 |         "colab_type": "code",
 69 |         "colab": {
 70 |           "base_uri": "https://localhost:8080/",
 71 |           "height": 71
 72 |         },
 73 |         "outputId": "d0305e29-7b76-43be-87c1-805794fe99b5"
 74 |       },
 75 |       "source": [
 76 |         "sentences = []\n",
 77 |         "labels = []\n",
 78 |         "fields = []\n",
 79 |         "with open(\"/tmp/bbc-text.csv\", 'r') as csvfile:\n",
 80 |         "    # Your Code here\n",
 81 |         "    reader = csv.reader(csvfile, delimiter=',')\n",
 82 |         "    next(reader)\n",
 83 |         "    for row in reader:\n",
 84 |         "        labels.append(row[0])\n",
 85 |         "        sentence = row[1]\n",
 86 |         "        for word in stopwords:\n",
 87 |         "            token = \" \" + word + \" \"\n",
 88 |         "            sentence = sentence.replace(token, \" \")\n",
 89 |         "            sentence = sentence.replace(\"  \", \" \")\n",
 90 |         "        sentences.append(sentence)\n",
 91 |         "    \n",
 92 |         "print(len(sentences))\n",
 93 |         "print(sentences[0])\n",
 94 |         "\n",
 95 |         "#Expected output\n",
 96 |         "# 2225\n",
 97 |         "# tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want."
 98 |       ],
 99 |       "execution_count": 7,
100 |       "outputs": [
101 |         {
102 |           "output_type": "stream",
103 |           "text": [
104 |             "2225\n",
105 |             "tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want.\n"
106 |           ],
107 |           "name": "stdout"
108 |         }
109 |       ]
110 |     },
111 |     {
112 |       "cell_type": "code",
113 |       "metadata": {
114 |         "id": "9LhzBBgSC3S5",
115 |         "colab_type": "code",
116 |         "colab": {
117 |           "base_uri": "https://localhost:8080/",
118 |           "height": 34
119 |         },
120 |         "outputId": "0795b3f3-c7bf-4b47-c7a4-73a610ed5136"
121 |       },
122 |       "source": [
123 |         "tokenizer = Tokenizer(oov_token = \"<OOV>\") # Your Code Here\n",
124 |         "tokenizer.fit_on_texts(sentences)  #(# Your Code Here)\n",
125 |         "word_index = tokenizer.word_index  # Your Code here\n",
126 |         "print(len(word_index))  #(# Your Code Here)\n",
127 |         "# Expected output\n",
128 |         "# 29714"
129 |       ],
130 |       "execution_count": 6,
131 |       "outputs": [
132 |         {
133 |           "output_type": "stream",
134 |           "text": [
135 |             "29714\n"
136 |           ],
137 |           "name": "stdout"
138 |         }
139 |       ]
140 |     },
141 |     {
142 |       "cell_type": "code",
143 |       "metadata": {
144 |         "id": "1Gr3dbQfC5VR",
145 |         "colab_type": "code",
146 |         "colab": {
147 |           "base_uri": "https://localhost:8080/",
148 |           "height": 51
149 |         },
150 |         "outputId": "2779240f-12b6-4664-dc32-7afb92c463de"
151 |       },
152 |       "source": [
153 |         "sequences = tokenizer.texts_to_sequences(sentences) # Your Code Here\n",
154 |         "padded = pad_sequences(sequences, padding = 'post')  # Your Code here\n",
155 |         "print(padded[0])\n",
156 |         "print(padded.shape)\n",
157 |         "\n",
158 |         "# Expected output\n",
159 |         "# [  96  176 1158 ...    0    0    0]\n",
160 |         "# (2225, 2442)"
161 |       ],
162 |       "execution_count": 10,
163 |       "outputs": [
164 |         {
165 |           "output_type": "stream",
166 |           "text": [
167 |             "[  96  176 1158 ...    0    0    0]\n",
168 |             "(2225, 2442)\n"
169 |           ],
170 |           "name": "stdout"
171 |         }
172 |       ]
173 |     },
174 |     {
175 |       "cell_type": "code",
176 |       "metadata": {
177 |         "id": "fZufOahzC6yx",
178 |         "colab_type": "code",
179 |         "colab": {
180 |           "base_uri": "https://localhost:8080/",
181 |           "height": 71
182 |         },
183 |         "outputId": "554a55df-4adc-4195-e4f8-c784db3bf120"
184 |       },
185 |       "source": [
186 |         "# Your Code Here\n",
187 |         "label_tokenizer = Tokenizer()\n",
188 |         "label_tokenizer.fit_on_texts(labels)\n",
189 |         "label_word_index = label_tokenizer.word_index\n",
190 |         "label_seq = label_tokenizer.texts_to_sequences(labels)\n",
191 |         "print(label_seq)\n",
192 |         "print(label_word_index)\n",
193 |         "\n",
194 |         "# Expected Output\n",
195 |         "# [[4], [2], [1], [1], [5], [3], [3], [1], [1], [5], [5], [2], [2], [3], [1], [2], [3], [1], [2], [4], [4], [4], [1], [1], [4], [1], [5], [4], [3], [5], [3], [4], [5], [5], [2], [3], [4], [5], [3], [2], [3], [1], [2], [1], [4], [5], [3], [3], [3], [2], [1], [3], [2], [2], [1], [3], [2], [1], [1], [2], [2], [1], [2], [1], [2], [4], [2], [5], [4], [2], [3], [2], [3], [1], [2], [4], [2], [1], [1], [2], [2], [1], [3], [2], [5], [3], [3], [2], [5], [2], [1], [1], [3], [1], [3], [1], [2], [1], [2], [5], [5], [1], [2], [3], [3], [4], [1], [5], [1], [4], [2], [5], [1], [5], [1], [5], [5], [3], [1], [1], [5], [3], [2], [4], [2], [2], [4], [1], [3], [1], [4], [5], [1], [2], [2], [4], [5], [4], [1], [2], [2], [2], [4], [1], [4], [2], [1], [5], [1], [4], [1], [4], [3], [2], [4], [5], [1], [2], [3], [2], [5], [3], [3], [5], [3], [2], [5], [3], [3], [5], [3], [1], [2], [3], [3], [2], [5], [1], [2], [2], [1], [4], [1], [4], [4], [1], [2], [1], [3], [5], [3], [2], [3], [2], [4], [3], [5], [3], [4], [2], [1], [2], [1], [4], [5], [2], [3], [3], [5], [1], [5], [3], [1], [5], [1], [1], [5], [1], [3], [3], [5], [4], [1], [3], [2], [5], [4], [1], [4], [1], [5], [3], [1], [5], [4], [2], [4], [2], [2], [4], [2], [1], [2], [1], [2], [1], [5], [2], [2], [5], [1], [1], [3], [4], [3], [3], [3], [4], [1], [4], [3], [2], [4], [5], [4], [1], [1], [2], [2], [3], [2], [4], [1], [5], [1], [3], [4], [5], [2], [1], [5], [1], [4], [3], [4], [2], [2], [3], [3], [1], [2], [4], [5], [3], [4], [2], [5], [1], [5], [1], [5], [3], [2], [1], [2], [1], [1], [5], [1], [3], [3], [2], [5], [4], [2], [1], [2], [5], [2], [2], [2], [3], [2], [3], [5], [5], [2], [1], [2], [3], [2], [4], [5], [2], [1], [1], [5], [2], [2], [3], [4], [5], [4], [3], [2], [1], [3], [2], [5], [4], [5], [4], [3], [1], [5], [2], [3], [2], [2], [3], [1], [4], [2], [2], [5], [5], [4], [1], [2], [5], [4], [4], [5], [5], [5], [3], [1], [3], [4], [2], [5], [3], [2], [5], [3], [3], [1], [1], [2], [3], [5], [2], [1], [2], [2], [1], [2], [3], [3], [3], [1], [4], [4], [2], [4], [1], [5], [2], [3], [2], [5], [2], [3], [5], [3], [2], [4], [2], [1], [1], [2], [1], [1], [5], [1], [1], [1], [4], [2], [2], [2], [3], [1], [1], [2], [4], [2], [3], [1], [3], [4], [2], [1], [5], [2], [3], [4], [2], [1], [2], [3], [2], [2], [1], [5], [4], [3], [4], [2], [1], [2], [5], [4], [4], [2], [1], [1], [5], [3], [3], [3], [1], [3], [4], [4], [5], [3], [4], [5], [2], [1], [1], [4], [2], [1], [1], [3], [1], [1], [2], [1], [5], [4], [3], [1], [3], [4], [2], [2], [2], [4], [2], [2], [1], [1], [1], [1], [2], [4], [5], [1], [1], [4], [2], [4], [5], [3], [1], [2], [3], [2], [4], [4], [3], [4], [2], [1], [2], [5], [1], [3], [5], [1], [1], [3], [4], [5], [4], [1], [3], [2], [5], [3], [2], [5], [1], [1], [4], [3], [5], [3], [5], [3], [4], [3], [5], [1], [2], [1], [5], [1], [5], [4], [2], [1], [3], [5], [3], [5], [5], [5], [3], [5], [4], [3], [4], [4], [1], [1], [4], [4], [1], [5], [5], [1], [4], [5], [1], [1], [4], [2], [3], [4], [2], [1], [5], [1], [5], [3], [4], [5], [5], [2], [5], [5], [1], [4], [4], [3], [1], [4], [1], [3], [3], [5], [4], [2], [4], [4], [4], [2], [3], [3], [1], [4], [2], [2], [5], [5], [1], [4], [2], [4], [5], [1], [4], [3], [4], [3], [2], [3], [3], [2], [1], [4], [1], [4], [3], [5], [4], [1], [5], [4], [1], [3], [5], [1], [4], [1], [1], [3], [5], [2], [3], [5], [2], [2], [4], [2], [5], [4], [1], [4], [3], [4], [3], [2], [3], [5], [1], [2], [2], [2], [5], [1], [2], [5], [5], [1], [5], [3], [3], [3], [1], [1], [1], [4], [3], [1], [3], [3], [4], [3], [1], [2], [5], [1], [2], [2], [4], [2], [5], [5], [5], [2], [5], [5], [3], [4], [2], [1], [4], [1], [1], [3], [2], [1], [4], [2], [1], [4], [1], [1], [5], [1], [2], [1], [2], [4], [3], [4], [2], [1], [1], [2], [2], [2], [2], [3], [1], [2], [4], [2], [1], [3], [2], [4], [2], [1], [2], [3], [5], [1], [2], [3], [2], [5], [2], [2], [2], [1], [3], [5], [1], [3], [1], [3], [3], [2], [2], [1], [4], [5], [1], [5], [2], [2], [2], [4], [1], [4], [3], [4], [4], [4], [1], [4], [4], [5], [5], [4], [1], [5], [4], [1], [1], [2], [5], [4], [2], [1], [2], [3], [2], [5], [4], [2], [3], [2], [4], [1], [2], [5], [2], [3], [1], [5], [3], [1], [2], [1], [3], [3], [1], [5], [5], [2], [2], [1], [4], [4], [1], [5], [4], [4], [2], [1], [5], [4], [1], [1], [2], [5], [2], [2], [2], [5], [1], [5], [4], [4], [4], [3], [4], [4], [5], [5], [1], [1], [3], [2], [5], [1], [3], [5], [4], [3], [4], [4], [2], [5], [3], [4], [3], [3], [1], [3], [3], [5], [4], [1], [3], [1], [5], [3], [2], [2], [3], [1], [1], [1], [5], [4], [4], [2], [5], [1], [3], [4], [3], [5], [4], [4], [2], [2], [1], [2], [2], [4], [3], [5], [2], [2], [2], [2], [2], [4], [1], [3], [4], [4], [2], [2], [5], [3], [5], [1], [4], [1], [5], [1], [4], [1], [2], [1], [3], [3], [5], [2], [1], [3], [3], [1], [5], [3], [2], [4], [1], [2], [2], [2], [5], [5], [4], [4], [2], [2], [5], [1], [2], [5], [4], [4], [2], [2], [1], [1], [1], [3], [3], [1], [3], [1], [2], [5], [1], [4], [5], [1], [1], [2], [2], [4], [4], [1], [5], [1], [5], [1], [5], [3], [5], [5], [4], [5], [2], [2], [3], [1], [3], [4], [2], [3], [1], [3], [1], [5], [1], [3], [1], [1], [4], [5], [1], [3], [1], [1], [2], [4], [5], [3], [4], [5], [3], [5], [3], [5], [5], [4], [5], [3], [5], [5], [4], [4], [1], [1], [5], [5], [4], [5], [3], [4], [5], [2], [4], [1], [2], [5], [5], [4], [5], [4], [2], [5], [1], [5], [2], [1], [2], [1], [3], [4], [5], [3], [2], [5], [5], [3], [2], [5], [1], [3], [1], [2], [2], [2], [2], [2], [5], [4], [1], [5], [5], [2], [1], [4], [4], [5], [1], [2], [3], [2], [3], [2], [2], [5], [3], [2], [2], [4], [3], [1], [4], [5], [3], [2], [2], [1], [5], [3], [4], [2], [2], [3], [2], [1], [5], [1], [5], [4], [3], [2], [2], [4], [2], [2], [1], [2], [4], [5], [3], [2], [3], [2], [1], [4], [2], [3], [5], [4], [2], [5], [1], [3], [3], [1], [3], [2], [4], [5], [1], [1], [4], [2], [1], [5], [4], [1], [3], [1], [2], [2], [2], [3], [5], [1], [3], [4], [2], [2], [4], [5], [5], [4], [4], [1], [1], [5], [4], [5], [1], [3], [4], [2], [1], [5], [2], [2], [5], [1], [2], [1], [4], [3], [3], [4], [5], [3], [5], [2], [2], [3], [1], [4], [1], [1], [1], [3], [2], [1], [2], [4], [1], [2], [2], [1], [3], [4], [1], [2], [4], [1], [1], [2], [2], [2], [2], [3], [5], [4], [2], [2], [1], [2], [5], [2], [5], [1], [3], [2], [2], [4], [5], [2], [2], [2], [3], [2], [3], [4], [5], [3], [5], [1], [4], [3], [2], [4], [1], [2], [2], [5], [4], [2], [2], [1], [1], [5], [1], [3], [1], [2], [1], [2], [3], [3], [2], [3], [4], [5], [1], [2], [5], [1], [3], [3], [4], [5], [2], [3], [3], [1], [4], [2], [1], [5], [1], [5], [1], [2], [1], [3], [5], [4], [2], [1], [3], [4], [1], [5], [2], [1], [5], [1], [4], [1], [4], [3], [1], [2], [5], [4], [4], [3], [4], [5], [4], [1], [2], [4], [2], [5], [1], [4], [3], [3], [3], [3], [5], [5], [5], [2], [3], [3], [1], [1], [4], [1], [3], [2], [2], [4], [1], [4], [2], [4], [3], [3], [1], [2], [3], [1], [2], [4], [2], [2], [5], [5], [1], [2], [4], [4], [3], [2], [3], [1], [5], [5], [3], [3], [2], [2], [4], [4], [1], [1], [3], [4], [1], [4], [2], [1], [2], [3], [1], [5], [2], [4], [3], [5], [4], [2], [1], [5], [4], [4], [5], [3], [4], [5], [1], [5], [1], [1], [1], [3], [4], [1], [2], [1], [1], [2], [4], [1], [2], [5], [3], [4], [1], [3], [4], [5], [3], [1], [3], [4], [2], [5], [1], [3], [2], [4], [4], [4], [3], [2], [1], [3], [5], [4], [5], [1], [4], [2], [3], [5], [4], [3], [1], [1], [2], [5], [2], [2], [3], [2], [2], [3], [4], [5], [3], [5], [5], [2], [3], [1], [3], [5], [1], [5], [3], [5], [5], [5], [2], [1], [3], [1], [5], [4], [4], [2], [3], [5], [2], [1], [2], [3], [3], [2], [1], [4], [4], [4], [2], [3], [3], [2], [1], [1], [5], [2], [1], [1], [3], [3], [3], [5], [3], [2], [4], [2], [3], [5], [5], [2], [1], [3], [5], [1], [5], [3], [3], [2], [3], [1], [5], [5], [4], [4], [4], [4], [3], [4], [2], [4], [1], [1], [5], [2], [4], [5], [2], [4], [1], [4], [5], [5], [3], [3], [1], [2], [2], [4], [5], [1], [3], [2], [4], [5], [3], [1], [5], [3], [3], [4], [1], [3], [2], [3], [5], [4], [1], [3], [5], [5], [2], [1], [4], [4], [1], [5], [4], [3], [4], [1], [3], [3], [1], [5], [1], [3], [1], [4], [5], [1], [5], [2], [2], [5], [5], [5], [4], [1], [2], [2], [3], [3], [2], [3], [5], [1], [1], [4], [3], [1], [2], [1], [2], [4], [1], [1], [2], [5], [1], [1], [4], [1], [2], [3], [2], [5], [4], [5], [3], [2], [5], [3], [5], [3], [3], [2], [1], [1], [1], [4], [4], [1], [3], [5], [4], [1], [5], [2], [5], [3], [2], [1], [4], [2], [1], [3], [2], [5], [5], [5], [3], [5], [3], [5], [1], [5], [1], [3], [3], [2], [3], [4], [1], [4], [1], [2], [3], [4], [5], [5], [3], [5], [3], [1], [1], [3], [2], [4], [1], [3], [3], [5], [1], [3], [3], [2], [4], [4], [2], [4], [1], [1], [2], [3], [2], [4], [1], [4], [3], [5], [1], [2], [1], [5], [4], [4], [1], [3], [1], [2], [1], [2], [1], [1], [5], [5], [2], [4], [4], [2], [4], [2], [2], [1], [1], [3], [1], [4], [1], [4], [1], [1], [2], [2], [4], [1], [2], [4], [4], [3], [1], [2], [5], [5], [4], [3], [1], [1], [4], [2], [4], [5], [5], [3], [3], [2], [5], [1], [5], [5], [2], [1], [3], [4], [2], [1], [5], [4], [3], [3], [1], [1], [2], [2], [2], [2], [2], [5], [2], [3], [3], [4], [4], [5], [3], [5], [2], [3], [1], [1], [2], [4], [2], [4], [1], [2], [2], [3], [1], [1], [3], [3], [5], [5], [3], [2], [3], [3], [2], [4], [3], [3], [3], [3], [3], [5], [5], [4], [3], [1], [3], [1], [4], [1], [1], [1], [5], [4], [5], [4], [1], [4], [1], [1], [5], [5], [2], [5], [5], [3], [2], [1], [4], [4], [3], [2], [1], [2], [5], [1], [3], [5], [1], [1], [2], [3], [4], [4], [2], [2], [1], [3], [5], [1], [1], [3], [5], [4], [1], [5], [2], [3], [1], [3], [4], [5], [1], [3], [2], [5], [3], [5], [3], [1], [3], [2], [2], [3], [2], [4], [1], [2], [5], [2], [1], [1], [5], [4], [3], [4], [3], [3], [1], [1], [1], [2], [4], [5], [2], [1], [2], [1], [2], [4], [2], [2], [2], [2], [1], [1], [1], [2], [2], [5], [2], [2], [2], [1], [1], [1], [4], [2], [1], [1], [1], [2], [5], [4], [4], [4], [3], [2], [2], [4], [2], [4], [1], [1], [3], [3], [3], [1], [1], [3], [3], [4], [2], [1], [1], [1], [1], [2], [1], [2], [2], [2], [2], [1], [3], [1], [4], [4], [1], [4], [2], [5], [2], [1], [2], [4], [4], [3], [5], [2], [5], [2], [4], [3], [5], [3], [5], [5], [4], [2], [4], [4], [2], [3], [1], [5], [2], [3], [5], [2], [4], [1], [4], [3], [1], [3], [2], [3], [3], [2], [2], [2], [4], [3], [2], [3], [2], [5], [3], [1], [3], [3], [1], [5], [4], [4], [2], [4], [1], [2], [2], [3], [1], [4], [4], [4], [1], [5], [1], [3], [2], [3], [3], [5], [4], [2], [4], [1], [5], [5], [1], [2], [5], [4], [4], [1], [5], [2], [3], [3], [3], [4], [4], [2], [3], [2], [3], [3], [5], [1], [4], [2], [4], [5], [4], [4], [1], [3], [1], [1], [3], [5], [5], [2], [3], [3], [1], [2], [2], [4], [2], [4], [4], [1], [2], [3], [1], [2], [2], [1], [4], [1], [4], [5], [1], [1], [5], [2], [4], [1], [1], [3], [4], [2], [3], [1], [1], [3], [5], [4], [4], [4], [2], [1], [5], [5], [4], [2], [3], [4], [1], [1], [4], [4], [3], [2], [1], [5], [5], [1], [5], [4], [4], [2], [2], [2], [1], [1], [4], [1], [2], [4], [2], [2], [1], [2], [3], [2], [2], [4], [2], [4], [3], [4], [5], [3], [4], [5], [1], [3], [5], [2], [4], [2], [4], [5], [4], [1], [2], [2], [3], [5], [3], [1]]\n",
196 |         "# {'sport': 1, 'business': 2, 'politics': 3, 'tech': 4, 'entertainment': 5}"
197 |       ],
198 |       "execution_count": 12,
199 |       "outputs": [
200 |         {
201 |           "output_type": "stream",
202 |           "text": [
203 |             "[[4], [2], [1], [1], [5], [3], [3], [1], [1], [5], [5], [2], [2], [3], [1], [2], [3], [1], [2], [4], [4], [4], [1], [1], [4], [1], [5], [4], [3], [5], [3], [4], [5], [5], [2], [3], [4], [5], [3], [2], [3], [1], [2], [1], [4], [5], [3], [3], [3], [2], [1], [3], [2], [2], [1], [3], [2], [1], [1], [2], [2], [1], [2], [1], [2], [4], [2], [5], [4], [2], [3], [2], [3], [1], [2], [4], [2], [1], [1], [2], [2], [1], [3], [2], [5], [3], [3], [2], [5], [2], [1], [1], [3], [1], [3], [1], [2], [1], [2], [5], [5], [1], [2], [3], [3], [4], [1], [5], [1], [4], [2], [5], [1], [5], [1], [5], [5], [3], [1], [1], [5], [3], [2], [4], [2], [2], [4], [1], [3], [1], [4], [5], [1], [2], [2], [4], [5], [4], [1], [2], [2], [2], [4], [1], [4], [2], [1], [5], [1], [4], [1], [4], [3], [2], [4], [5], [1], [2], [3], [2], [5], [3], [3], [5], [3], [2], [5], [3], [3], [5], [3], [1], [2], [3], [3], [2], [5], [1], [2], [2], [1], [4], [1], [4], [4], [1], [2], [1], [3], [5], [3], [2], [3], [2], [4], [3], [5], [3], [4], [2], [1], [2], [1], [4], [5], [2], [3], [3], [5], [1], [5], [3], [1], [5], [1], [1], [5], [1], [3], [3], [5], [4], [1], [3], [2], [5], [4], [1], [4], [1], [5], [3], [1], [5], [4], [2], [4], [2], [2], [4], [2], [1], [2], [1], [2], [1], [5], [2], [2], [5], [1], [1], [3], [4], [3], [3], [3], [4], [1], [4], [3], [2], [4], [5], [4], [1], [1], [2], [2], [3], [2], [4], [1], [5], [1], [3], [4], [5], [2], [1], [5], [1], [4], [3], [4], [2], [2], [3], [3], [1], [2], [4], [5], [3], [4], [2], [5], [1], [5], [1], [5], [3], [2], [1], [2], [1], [1], [5], [1], [3], [3], [2], [5], [4], [2], [1], [2], [5], [2], [2], [2], [3], [2], [3], [5], [5], [2], [1], [2], [3], [2], [4], [5], [2], [1], [1], [5], [2], [2], [3], [4], [5], [4], [3], [2], [1], [3], [2], [5], [4], [5], [4], [3], [1], [5], [2], [3], [2], [2], [3], [1], [4], [2], [2], [5], [5], [4], [1], [2], [5], [4], [4], [5], [5], [5], [3], [1], [3], [4], [2], [5], [3], [2], [5], [3], [3], [1], [1], [2], [3], [5], [2], [1], [2], [2], [1], [2], [3], [3], [3], [1], [4], [4], [2], [4], [1], [5], [2], [3], [2], [5], [2], [3], [5], [3], [2], [4], [2], [1], [1], [2], [1], [1], [5], [1], [1], [1], [4], [2], [2], [2], [3], [1], [1], [2], [4], [2], [3], [1], [3], [4], [2], [1], [5], [2], [3], [4], [2], [1], [2], [3], [2], [2], [1], [5], [4], [3], [4], [2], [1], [2], [5], [4], [4], [2], [1], [1], [5], [3], [3], [3], [1], [3], [4], [4], [5], [3], [4], [5], [2], [1], [1], [4], [2], [1], [1], [3], [1], [1], [2], [1], [5], [4], [3], [1], [3], [4], [2], [2], [2], [4], [2], [2], [1], [1], [1], [1], [2], [4], [5], [1], [1], [4], [2], [4], [5], [3], [1], [2], [3], [2], [4], [4], [3], [4], [2], [1], [2], [5], [1], [3], [5], [1], [1], [3], [4], [5], [4], [1], [3], [2], [5], [3], [2], [5], [1], [1], [4], [3], [5], [3], [5], [3], [4], [3], [5], [1], [2], [1], [5], [1], [5], [4], [2], [1], [3], [5], [3], [5], [5], [5], [3], [5], [4], [3], [4], [4], [1], [1], [4], [4], [1], [5], [5], [1], [4], [5], [1], [1], [4], [2], [3], [4], [2], [1], [5], [1], [5], [3], [4], [5], [5], [2], [5], [5], [1], [4], [4], [3], [1], [4], [1], [3], [3], [5], [4], [2], [4], [4], [4], [2], [3], [3], [1], [4], [2], [2], [5], [5], [1], [4], [2], [4], [5], [1], [4], [3], [4], [3], [2], [3], [3], [2], [1], [4], [1], [4], [3], [5], [4], [1], [5], [4], [1], [3], [5], [1], [4], [1], [1], [3], [5], [2], [3], [5], [2], [2], [4], [2], [5], [4], [1], [4], [3], [4], [3], [2], [3], [5], [1], [2], [2], [2], [5], [1], [2], [5], [5], [1], [5], [3], [3], [3], [1], [1], [1], [4], [3], [1], [3], [3], [4], [3], [1], [2], [5], [1], [2], [2], [4], [2], [5], [5], [5], [2], [5], [5], [3], [4], [2], [1], [4], [1], [1], [3], [2], [1], [4], [2], [1], [4], [1], [1], [5], [1], [2], [1], [2], [4], [3], [4], [2], [1], [1], [2], [2], [2], [2], [3], [1], [2], [4], [2], [1], [3], [2], [4], [2], [1], [2], [3], [5], [1], [2], [3], [2], [5], [2], [2], [2], [1], [3], [5], [1], [3], [1], [3], [3], [2], [2], [1], [4], [5], [1], [5], [2], [2], [2], [4], [1], [4], [3], [4], [4], [4], [1], [4], [4], [5], [5], [4], [1], [5], [4], [1], [1], [2], [5], [4], [2], [1], [2], [3], [2], [5], [4], [2], [3], [2], [4], [1], [2], [5], [2], [3], [1], [5], [3], [1], [2], [1], [3], [3], [1], [5], [5], [2], [2], [1], [4], [4], [1], [5], [4], [4], [2], [1], [5], [4], [1], [1], [2], [5], [2], [2], [2], [5], [1], [5], [4], [4], [4], [3], [4], [4], [5], [5], [1], [1], [3], [2], [5], [1], [3], [5], [4], [3], [4], [4], [2], [5], [3], [4], [3], [3], [1], [3], [3], [5], [4], [1], [3], [1], [5], [3], [2], [2], [3], [1], [1], [1], [5], [4], [4], [2], [5], [1], [3], [4], [3], [5], [4], [4], [2], [2], [1], [2], [2], [4], [3], [5], [2], [2], [2], [2], [2], [4], [1], [3], [4], [4], [2], [2], [5], [3], [5], [1], [4], [1], [5], [1], [4], [1], [2], [1], [3], [3], [5], [2], [1], [3], [3], [1], [5], [3], [2], [4], [1], [2], [2], [2], [5], [5], [4], [4], [2], [2], [5], [1], [2], [5], [4], [4], [2], [2], [1], [1], [1], [3], [3], [1], [3], [1], [2], [5], [1], [4], [5], [1], [1], [2], [2], [4], [4], [1], [5], [1], [5], [1], [5], [3], [5], [5], [4], [5], [2], [2], [3], [1], [3], [4], [2], [3], [1], [3], [1], [5], [1], [3], [1], [1], [4], [5], [1], [3], [1], [1], [2], [4], [5], [3], [4], [5], [3], [5], [3], [5], [5], [4], [5], [3], [5], [5], [4], [4], [1], [1], [5], [5], [4], [5], [3], [4], [5], [2], [4], [1], [2], [5], [5], [4], [5], [4], [2], [5], [1], [5], [2], [1], [2], [1], [3], [4], [5], [3], [2], [5], [5], [3], [2], [5], [1], [3], [1], [2], [2], [2], [2], [2], [5], [4], [1], [5], [5], [2], [1], [4], [4], [5], [1], [2], [3], [2], [3], [2], [2], [5], [3], [2], [2], [4], [3], [1], [4], [5], [3], [2], [2], [1], [5], [3], [4], [2], [2], [3], [2], [1], [5], [1], [5], [4], [3], [2], [2], [4], [2], [2], [1], [2], [4], [5], [3], [2], [3], [2], [1], [4], [2], [3], [5], [4], [2], [5], [1], [3], [3], [1], [3], [2], [4], [5], [1], [1], [4], [2], [1], [5], [4], [1], [3], [1], [2], [2], [2], [3], [5], [1], [3], [4], [2], [2], [4], [5], [5], [4], [4], [1], [1], [5], [4], [5], [1], [3], [4], [2], [1], [5], [2], [2], [5], [1], [2], [1], [4], [3], [3], [4], [5], [3], [5], [2], [2], [3], [1], [4], [1], [1], [1], [3], [2], [1], [2], [4], [1], [2], [2], [1], [3], [4], [1], [2], [4], [1], [1], [2], [2], [2], [2], [3], [5], [4], [2], [2], [1], [2], [5], [2], [5], [1], [3], [2], [2], [4], [5], [2], [2], [2], [3], [2], [3], [4], [5], [3], [5], [1], [4], [3], [2], [4], [1], [2], [2], [5], [4], [2], [2], [1], [1], [5], [1], [3], [1], [2], [1], [2], [3], [3], [2], [3], [4], [5], [1], [2], [5], [1], [3], [3], [4], [5], [2], [3], [3], [1], [4], [2], [1], [5], [1], [5], [1], [2], [1], [3], [5], [4], [2], [1], [3], [4], [1], [5], [2], [1], [5], [1], [4], [1], [4], [3], [1], [2], [5], [4], [4], [3], [4], [5], [4], [1], [2], [4], [2], [5], [1], [4], [3], [3], [3], [3], [5], [5], [5], [2], [3], [3], [1], [1], [4], [1], [3], [2], [2], [4], [1], [4], [2], [4], [3], [3], [1], [2], [3], [1], [2], [4], [2], [2], [5], [5], [1], [2], [4], [4], [3], [2], [3], [1], [5], [5], [3], [3], [2], [2], [4], [4], [1], [1], [3], [4], [1], [4], [2], [1], [2], [3], [1], [5], [2], [4], [3], [5], [4], [2], [1], [5], [4], [4], [5], [3], [4], [5], [1], [5], [1], [1], [1], [3], [4], [1], [2], [1], [1], [2], [4], [1], [2], [5], [3], [4], [1], [3], [4], [5], [3], [1], [3], [4], [2], [5], [1], [3], [2], [4], [4], [4], [3], [2], [1], [3], [5], [4], [5], [1], [4], [2], [3], [5], [4], [3], [1], [1], [2], [5], [2], [2], [3], [2], [2], [3], [4], [5], [3], [5], [5], [2], [3], [1], [3], [5], [1], [5], [3], [5], [5], [5], [2], [1], [3], [1], [5], [4], [4], [2], [3], [5], [2], [1], [2], [3], [3], [2], [1], [4], [4], [4], [2], [3], [3], [2], [1], [1], [5], [2], [1], [1], [3], [3], [3], [5], [3], [2], [4], [2], [3], [5], [5], [2], [1], [3], [5], [1], [5], [3], [3], [2], [3], [1], [5], [5], [4], [4], [4], [4], [3], [4], [2], [4], [1], [1], [5], [2], [4], [5], [2], [4], [1], [4], [5], [5], [3], [3], [1], [2], [2], [4], [5], [1], [3], [2], [4], [5], [3], [1], [5], [3], [3], [4], [1], [3], [2], [3], [5], [4], [1], [3], [5], [5], [2], [1], [4], [4], [1], [5], [4], [3], [4], [1], [3], [3], [1], [5], [1], [3], [1], [4], [5], [1], [5], [2], [2], [5], [5], [5], [4], [1], [2], [2], [3], [3], [2], [3], [5], [1], [1], [4], [3], [1], [2], [1], [2], [4], [1], [1], [2], [5], [1], [1], [4], [1], [2], [3], [2], [5], [4], [5], [3], [2], [5], [3], [5], [3], [3], [2], [1], [1], [1], [4], [4], [1], [3], [5], [4], [1], [5], [2], [5], [3], [2], [1], [4], [2], [1], [3], [2], [5], [5], [5], [3], [5], [3], [5], [1], [5], [1], [3], [3], [2], [3], [4], [1], [4], [1], [2], [3], [4], [5], [5], [3], [5], [3], [1], [1], [3], [2], [4], [1], [3], [3], [5], [1], [3], [3], [2], [4], [4], [2], [4], [1], [1], [2], [3], [2], [4], [1], [4], [3], [5], [1], [2], [1], [5], [4], [4], [1], [3], [1], [2], [1], [2], [1], [1], [5], [5], [2], [4], [4], [2], [4], [2], [2], [1], [1], [3], [1], [4], [1], [4], [1], [1], [2], [2], [4], [1], [2], [4], [4], [3], [1], [2], [5], [5], [4], [3], [1], [1], [4], [2], [4], [5], [5], [3], [3], [2], [5], [1], [5], [5], [2], [1], [3], [4], [2], [1], [5], [4], [3], [3], [1], [1], [2], [2], [2], [2], [2], [5], [2], [3], [3], [4], [4], [5], [3], [5], [2], [3], [1], [1], [2], [4], [2], [4], [1], [2], [2], [3], [1], [1], [3], [3], [5], [5], [3], [2], [3], [3], [2], [4], [3], [3], [3], [3], [3], [5], [5], [4], [3], [1], [3], [1], [4], [1], [1], [1], [5], [4], [5], [4], [1], [4], [1], [1], [5], [5], [2], [5], [5], [3], [2], [1], [4], [4], [3], [2], [1], [2], [5], [1], [3], [5], [1], [1], [2], [3], [4], [4], [2], [2], [1], [3], [5], [1], [1], [3], [5], [4], [1], [5], [2], [3], [1], [3], [4], [5], [1], [3], [2], [5], [3], [5], [3], [1], [3], [2], [2], [3], [2], [4], [1], [2], [5], [2], [1], [1], [5], [4], [3], [4], [3], [3], [1], [1], [1], [2], [4], [5], [2], [1], [2], [1], [2], [4], [2], [2], [2], [2], [1], [1], [1], [2], [2], [5], [2], [2], [2], [1], [1], [1], [4], [2], [1], [1], [1], [2], [5], [4], [4], [4], [3], [2], [2], [4], [2], [4], [1], [1], [3], [3], [3], [1], [1], [3], [3], [4], [2], [1], [1], [1], [1], [2], [1], [2], [2], [2], [2], [1], [3], [1], [4], [4], [1], [4], [2], [5], [2], [1], [2], [4], [4], [3], [5], [2], [5], [2], [4], [3], [5], [3], [5], [5], [4], [2], [4], [4], [2], [3], [1], [5], [2], [3], [5], [2], [4], [1], [4], [3], [1], [3], [2], [3], [3], [2], [2], [2], [4], [3], [2], [3], [2], [5], [3], [1], [3], [3], [1], [5], [4], [4], [2], [4], [1], [2], [2], [3], [1], [4], [4], [4], [1], [5], [1], [3], [2], [3], [3], [5], [4], [2], [4], [1], [5], [5], [1], [2], [5], [4], [4], [1], [5], [2], [3], [3], [3], [4], [4], [2], [3], [2], [3], [3], [5], [1], [4], [2], [4], [5], [4], [4], [1], [3], [1], [1], [3], [5], [5], [2], [3], [3], [1], [2], [2], [4], [2], [4], [4], [1], [2], [3], [1], [2], [2], [1], [4], [1], [4], [5], [1], [1], [5], [2], [4], [1], [1], [3], [4], [2], [3], [1], [1], [3], [5], [4], [4], [4], [2], [1], [5], [5], [4], [2], [3], [4], [1], [1], [4], [4], [3], [2], [1], [5], [5], [1], [5], [4], [4], [2], [2], [2], [1], [1], [4], [1], [2], [4], [2], [2], [1], [2], [3], [2], [2], [4], [2], [4], [3], [4], [5], [3], [4], [5], [1], [3], [5], [2], [4], [2], [4], [5], [4], [1], [2], [2], [3], [5], [3], [1]]\n",
204 |             "{'sport': 1, 'business': 2, 'politics': 3, 'tech': 4, 'entertainment': 5}\n"
205 |           ],
206 |           "name": "stdout"
207 |         }
208 |       ]
209 |     }
210 |   ]
211 | }


--------------------------------------------------------------------------------
/Week 1/Quiz 1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/07Agarg/Natural-Language-Processing-In-Tensorflow-Course/46eb21e25f73fd8644a95e64696d64dd4843e1e8/Week 1/Quiz 1.pdf


--------------------------------------------------------------------------------
/Week 1/exercise_question.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Exercise-question.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%201%20-%20Exercise-question.ipynb
 8 | """
 9 | """
10 | !wget --no-check-certificate \
11 |     https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \
12 |     -O /tmp/bbc-text.csv
13 | 
14 | """
15 | import csv
16 | from tensorflow.keras.preprocessing.text import Tokenizer
17 | from tensorflow.keras.preprocessing.sequence import pad_sequences
18 | 
19 | 
20 | #Stopwords list from https://github.com/Yoast/YoastSEO.js/blob/develop/src/config/stopwords.js
21 | # Convert it to a Python list and paste it here
22 | stopwords = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" ] 
23 | #YOUR CODE HERE
24 | 
25 | sentences = []
26 | labels = []
27 | fields = []
28 | with open("bbc-text.csv", 'r') as csvfile:
29 |     # Your Code here
30 |     reader = csv.reader(csvfile, delimiter=',')
31 |     next(reader)
32 |     for row in reader:
33 |         labels.append(row[0])
34 |         sentence = row[1]
35 |         for word in stopwords:
36 |             token = " " + word + " "
37 |             sentence = sentence.replace(token, " ")
38 |             sentence = sentence.replace("  ", " ")
39 |         sentences.append(sentence)
40 |     
41 | print(len(sentences))
42 | print(sentences[0])
43 | 
44 | #Expected output
45 | # 2225
46 | # tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want.
47 | 
48 | tokenizer = Tokenizer(oov_token = "<OOV>") # Your Code Here
49 | tokenizer.fit_on_texts(sentences)  #(# Your Code Here)
50 | word_index = tokenizer.word_index  # Your Code here
51 | print(len(word_index))  #(# Your Code Here)
52 | # Expected output
53 | # 29714
54 | 
55 | sequences = tokenizer.texts_to_sequences(sentences) # Your Code Here
56 | padded = pad_sequences(sequences, padding = 'post')  # Your Code here
57 | print(padded[0])
58 | print(padded.shape)
59 | 
60 | # Expected output
61 | # [  96  176 1158 ...    0    0    0]
62 | # (2225, 2442)
63 | 
64 | # Your Code Here
65 | label_tokenizer = Tokenizer()
66 | label_tokenizer.fit_on_texts(labels)
67 | label_word_index = label_tokenizer.word_index
68 | label_seq = label_tokenizer.texts_to_sequences(labels)
69 | print(label_seq)
70 | print(label_word_index)
71 | 
72 | # Expected Output
73 | # [[4], [2], [1], [1], [5], [3], [3], [1], [1], [5], [5], [2], [2], [3], [1], [2], [3], [1], [2], [4], [4], [4], [1], [1], [4], [1], [5], [4], [3], [5], [3], [4], [5], [5], [2], [3], [4], [5], [3], [2], [3], [1], [2], [1], [4], [5], [3], [3], [3], [2], [1], [3], [2], [2], [1], [3], [2], [1], [1], [2], [2], [1], [2], [1], [2], [4], [2], [5], [4], [2], [3], [2], [3], [1], [2], [4], [2], [1], [1], [2], [2], [1], [3], [2], [5], [3], [3], [2], [5], [2], [1], [1], [3], [1], [3], [1], [2], [1], [2], [5], [5], [1], [2], [3], [3], [4], [1], [5], [1], [4], [2], [5], [1], [5], [1], [5], [5], [3], [1], [1], [5], [3], [2], [4], [2], [2], [4], [1], [3], [1], [4], [5], [1], [2], [2], [4], [5], [4], [1], [2], [2], [2], [4], [1], [4], [2], [1], [5], [1], [4], [1], [4], [3], [2], [4], [5], [1], [2], [3], [2], [5], [3], [3], [5], [3], [2], [5], [3], [3], [5], [3], [1], [2], [3], [3], [2], [5], [1], [2], [2], [1], [4], [1], [4], [4], [1], [2], [1], [3], [5], [3], [2], [3], [2], [4], [3], [5], [3], [4], [2], [1], [2], [1], [4], [5], [2], [3], [3], [5], [1], [5], [3], [1], [5], [1], [1], [5], [1], [3], [3], [5], [4], [1], [3], [2], [5], [4], [1], [4], [1], [5], [3], [1], [5], [4], [2], [4], [2], [2], [4], [2], [1], [2], [1], [2], [1], [5], [2], [2], [5], [1], [1], [3], [4], [3], [3], [3], [4], [1], [4], [3], [2], [4], [5], [4], [1], [1], [2], [2], [3], [2], [4], [1], [5], [1], [3], [4], [5], [2], [1], [5], [1], [4], [3], [4], [2], [2], [3], [3], [1], [2], [4], [5], [3], [4], [2], [5], [1], [5], [1], [5], [3], [2], [1], [2], [1], [1], [5], [1], [3], [3], [2], [5], [4], [2], [1], [2], [5], [2], [2], [2], [3], [2], [3], [5], [5], [2], [1], [2], [3], [2], [4], [5], [2], [1], [1], [5], [2], [2], [3], [4], [5], [4], [3], [2], [1], [3], [2], [5], [4], [5], [4], [3], [1], [5], [2], [3], [2], [2], [3], [1], [4], [2], [2], [5], [5], [4], [1], [2], [5], [4], [4], [5], [5], [5], [3], [1], [3], [4], [2], [5], [3], [2], [5], [3], [3], [1], [1], [2], [3], [5], [2], [1], [2], [2], [1], [2], [3], [3], [3], [1], [4], [4], [2], [4], [1], [5], [2], [3], [2], [5], [2], [3], [5], [3], [2], [4], [2], [1], [1], [2], [1], [1], [5], [1], [1], [1], [4], [2], [2], [2], [3], [1], [1], [2], [4], [2], [3], [1], [3], [4], [2], [1], [5], [2], [3], [4], [2], [1], [2], [3], [2], [2], [1], [5], [4], [3], [4], [2], [1], [2], [5], [4], [4], [2], [1], [1], [5], [3], [3], [3], [1], [3], [4], [4], [5], [3], [4], [5], [2], [1], [1], [4], [2], [1], [1], [3], [1], [1], [2], [1], [5], [4], [3], [1], [3], [4], [2], [2], [2], [4], [2], [2], [1], [1], [1], [1], [2], [4], [5], [1], [1], [4], [2], [4], [5], [3], [1], [2], [3], [2], [4], [4], [3], [4], [2], [1], [2], [5], [1], [3], [5], [1], [1], [3], [4], [5], [4], [1], [3], [2], [5], [3], [2], [5], [1], [1], [4], [3], [5], [3], [5], [3], [4], [3], [5], [1], [2], [1], [5], [1], [5], [4], [2], [1], [3], [5], [3], [5], [5], [5], [3], [5], [4], [3], [4], [4], [1], [1], [4], [4], [1], [5], [5], [1], [4], [5], [1], [1], [4], [2], [3], [4], [2], [1], [5], [1], [5], [3], [4], [5], [5], [2], [5], [5], [1], [4], [4], [3], [1], [4], [1], [3], [3], [5], [4], [2], [4], [4], [4], [2], [3], [3], [1], [4], [2], [2], [5], [5], [1], [4], [2], [4], [5], [1], [4], [3], [4], [3], [2], [3], [3], [2], [1], [4], [1], [4], [3], [5], [4], [1], [5], [4], [1], [3], [5], [1], [4], [1], [1], [3], [5], [2], [3], [5], [2], [2], [4], [2], [5], [4], [1], [4], [3], [4], [3], [2], [3], [5], [1], [2], [2], [2], [5], [1], [2], [5], [5], [1], [5], [3], [3], [3], [1], [1], [1], [4], [3], [1], [3], [3], [4], [3], [1], [2], [5], [1], [2], [2], [4], [2], [5], [5], [5], [2], [5], [5], [3], [4], [2], [1], [4], [1], [1], [3], [2], [1], [4], [2], [1], [4], [1], [1], [5], [1], [2], [1], [2], [4], [3], [4], [2], [1], [1], [2], [2], [2], [2], [3], [1], [2], [4], [2], [1], [3], [2], [4], [2], [1], [2], [3], [5], [1], [2], [3], [2], [5], [2], [2], [2], [1], [3], [5], [1], [3], [1], [3], [3], [2], [2], [1], [4], [5], [1], [5], [2], [2], [2], [4], [1], [4], [3], [4], [4], [4], [1], [4], [4], [5], [5], [4], [1], [5], [4], [1], [1], [2], [5], [4], [2], [1], [2], [3], [2], [5], [4], [2], [3], [2], [4], [1], [2], [5], [2], [3], [1], [5], [3], [1], [2], [1], [3], [3], [1], [5], [5], [2], [2], [1], [4], [4], [1], [5], [4], [4], [2], [1], [5], [4], [1], [1], [2], [5], [2], [2], [2], [5], [1], [5], [4], [4], [4], [3], [4], [4], [5], [5], [1], [1], [3], [2], [5], [1], [3], [5], [4], [3], [4], [4], [2], [5], [3], [4], [3], [3], [1], [3], [3], [5], [4], [1], [3], [1], [5], [3], [2], [2], [3], [1], [1], [1], [5], [4], [4], [2], [5], [1], [3], [4], [3], [5], [4], [4], [2], [2], [1], [2], [2], [4], [3], [5], [2], [2], [2], [2], [2], [4], [1], [3], [4], [4], [2], [2], [5], [3], [5], [1], [4], [1], [5], [1], [4], [1], [2], [1], [3], [3], [5], [2], [1], [3], [3], [1], [5], [3], [2], [4], [1], [2], [2], [2], [5], [5], [4], [4], [2], [2], [5], [1], [2], [5], [4], [4], [2], [2], [1], [1], [1], [3], [3], [1], [3], [1], [2], [5], [1], [4], [5], [1], [1], [2], [2], [4], [4], [1], [5], [1], [5], [1], [5], [3], [5], [5], [4], [5], [2], [2], [3], [1], [3], [4], [2], [3], [1], [3], [1], [5], [1], [3], [1], [1], [4], [5], [1], [3], [1], [1], [2], [4], [5], [3], [4], [5], [3], [5], [3], [5], [5], [4], [5], [3], [5], [5], [4], [4], [1], [1], [5], [5], [4], [5], [3], [4], [5], [2], [4], [1], [2], [5], [5], [4], [5], [4], [2], [5], [1], [5], [2], [1], [2], [1], [3], [4], [5], [3], [2], [5], [5], [3], [2], [5], [1], [3], [1], [2], [2], [2], [2], [2], [5], [4], [1], [5], [5], [2], [1], [4], [4], [5], [1], [2], [3], [2], [3], [2], [2], [5], [3], [2], [2], [4], [3], [1], [4], [5], [3], [2], [2], [1], [5], [3], [4], [2], [2], [3], [2], [1], [5], [1], [5], [4], [3], [2], [2], [4], [2], [2], [1], [2], [4], [5], [3], [2], [3], [2], [1], [4], [2], [3], [5], [4], [2], [5], [1], [3], [3], [1], [3], [2], [4], [5], [1], [1], [4], [2], [1], [5], [4], [1], [3], [1], [2], [2], [2], [3], [5], [1], [3], [4], [2], [2], [4], [5], [5], [4], [4], [1], [1], [5], [4], [5], [1], [3], [4], [2], [1], [5], [2], [2], [5], [1], [2], [1], [4], [3], [3], [4], [5], [3], [5], [2], [2], [3], [1], [4], [1], [1], [1], [3], [2], [1], [2], [4], [1], [2], [2], [1], [3], [4], [1], [2], [4], [1], [1], [2], [2], [2], [2], [3], [5], [4], [2], [2], [1], [2], [5], [2], [5], [1], [3], [2], [2], [4], [5], [2], [2], [2], [3], [2], [3], [4], [5], [3], [5], [1], [4], [3], [2], [4], [1], [2], [2], [5], [4], [2], [2], [1], [1], [5], [1], [3], [1], [2], [1], [2], [3], [3], [2], [3], [4], [5], [1], [2], [5], [1], [3], [3], [4], [5], [2], [3], [3], [1], [4], [2], [1], [5], [1], [5], [1], [2], [1], [3], [5], [4], [2], [1], [3], [4], [1], [5], [2], [1], [5], [1], [4], [1], [4], [3], [1], [2], [5], [4], [4], [3], [4], [5], [4], [1], [2], [4], [2], [5], [1], [4], [3], [3], [3], [3], [5], [5], [5], [2], [3], [3], [1], [1], [4], [1], [3], [2], [2], [4], [1], [4], [2], [4], [3], [3], [1], [2], [3], [1], [2], [4], [2], [2], [5], [5], [1], [2], [4], [4], [3], [2], [3], [1], [5], [5], [3], [3], [2], [2], [4], [4], [1], [1], [3], [4], [1], [4], [2], [1], [2], [3], [1], [5], [2], [4], [3], [5], [4], [2], [1], [5], [4], [4], [5], [3], [4], [5], [1], [5], [1], [1], [1], [3], [4], [1], [2], [1], [1], [2], [4], [1], [2], [5], [3], [4], [1], [3], [4], [5], [3], [1], [3], [4], [2], [5], [1], [3], [2], [4], [4], [4], [3], [2], [1], [3], [5], [4], [5], [1], [4], [2], [3], [5], [4], [3], [1], [1], [2], [5], [2], [2], [3], [2], [2], [3], [4], [5], [3], [5], [5], [2], [3], [1], [3], [5], [1], [5], [3], [5], [5], [5], [2], [1], [3], [1], [5], [4], [4], [2], [3], [5], [2], [1], [2], [3], [3], [2], [1], [4], [4], [4], [2], [3], [3], [2], [1], [1], [5], [2], [1], [1], [3], [3], [3], [5], [3], [2], [4], [2], [3], [5], [5], [2], [1], [3], [5], [1], [5], [3], [3], [2], [3], [1], [5], [5], [4], [4], [4], [4], [3], [4], [2], [4], [1], [1], [5], [2], [4], [5], [2], [4], [1], [4], [5], [5], [3], [3], [1], [2], [2], [4], [5], [1], [3], [2], [4], [5], [3], [1], [5], [3], [3], [4], [1], [3], [2], [3], [5], [4], [1], [3], [5], [5], [2], [1], [4], [4], [1], [5], [4], [3], [4], [1], [3], [3], [1], [5], [1], [3], [1], [4], [5], [1], [5], [2], [2], [5], [5], [5], [4], [1], [2], [2], [3], [3], [2], [3], [5], [1], [1], [4], [3], [1], [2], [1], [2], [4], [1], [1], [2], [5], [1], [1], [4], [1], [2], [3], [2], [5], [4], [5], [3], [2], [5], [3], [5], [3], [3], [2], [1], [1], [1], [4], [4], [1], [3], [5], [4], [1], [5], [2], [5], [3], [2], [1], [4], [2], [1], [3], [2], [5], [5], [5], [3], [5], [3], [5], [1], [5], [1], [3], [3], [2], [3], [4], [1], [4], [1], [2], [3], [4], [5], [5], [3], [5], [3], [1], [1], [3], [2], [4], [1], [3], [3], [5], [1], [3], [3], [2], [4], [4], [2], [4], [1], [1], [2], [3], [2], [4], [1], [4], [3], [5], [1], [2], [1], [5], [4], [4], [1], [3], [1], [2], [1], [2], [1], [1], [5], [5], [2], [4], [4], [2], [4], [2], [2], [1], [1], [3], [1], [4], [1], [4], [1], [1], [2], [2], [4], [1], [2], [4], [4], [3], [1], [2], [5], [5], [4], [3], [1], [1], [4], [2], [4], [5], [5], [3], [3], [2], [5], [1], [5], [5], [2], [1], [3], [4], [2], [1], [5], [4], [3], [3], [1], [1], [2], [2], [2], [2], [2], [5], [2], [3], [3], [4], [4], [5], [3], [5], [2], [3], [1], [1], [2], [4], [2], [4], [1], [2], [2], [3], [1], [1], [3], [3], [5], [5], [3], [2], [3], [3], [2], [4], [3], [3], [3], [3], [3], [5], [5], [4], [3], [1], [3], [1], [4], [1], [1], [1], [5], [4], [5], [4], [1], [4], [1], [1], [5], [5], [2], [5], [5], [3], [2], [1], [4], [4], [3], [2], [1], [2], [5], [1], [3], [5], [1], [1], [2], [3], [4], [4], [2], [2], [1], [3], [5], [1], [1], [3], [5], [4], [1], [5], [2], [3], [1], [3], [4], [5], [1], [3], [2], [5], [3], [5], [3], [1], [3], [2], [2], [3], [2], [4], [1], [2], [5], [2], [1], [1], [5], [4], [3], [4], [3], [3], [1], [1], [1], [2], [4], [5], [2], [1], [2], [1], [2], [4], [2], [2], [2], [2], [1], [1], [1], [2], [2], [5], [2], [2], [2], [1], [1], [1], [4], [2], [1], [1], [1], [2], [5], [4], [4], [4], [3], [2], [2], [4], [2], [4], [1], [1], [3], [3], [3], [1], [1], [3], [3], [4], [2], [1], [1], [1], [1], [2], [1], [2], [2], [2], [2], [1], [3], [1], [4], [4], [1], [4], [2], [5], [2], [1], [2], [4], [4], [3], [5], [2], [5], [2], [4], [3], [5], [3], [5], [5], [4], [2], [4], [4], [2], [3], [1], [5], [2], [3], [5], [2], [4], [1], [4], [3], [1], [3], [2], [3], [3], [2], [2], [2], [4], [3], [2], [3], [2], [5], [3], [1], [3], [3], [1], [5], [4], [4], [2], [4], [1], [2], [2], [3], [1], [4], [4], [4], [1], [5], [1], [3], [2], [3], [3], [5], [4], [2], [4], [1], [5], [5], [1], [2], [5], [4], [4], [1], [5], [2], [3], [3], [3], [4], [4], [2], [3], [2], [3], [3], [5], [1], [4], [2], [4], [5], [4], [4], [1], [3], [1], [1], [3], [5], [5], [2], [3], [3], [1], [2], [2], [4], [2], [4], [4], [1], [2], [3], [1], [2], [2], [1], [4], [1], [4], [5], [1], [1], [5], [2], [4], [1], [1], [3], [4], [2], [3], [1], [1], [3], [5], [4], [4], [4], [2], [1], [5], [5], [4], [2], [3], [4], [1], [1], [4], [4], [3], [2], [1], [5], [5], [1], [5], [4], [4], [2], [2], [2], [1], [1], [4], [1], [2], [4], [2], [2], [1], [2], [3], [2], [2], [4], [2], [4], [3], [4], [5], [3], [4], [5], [1], [3], [5], [2], [4], [2], [4], [5], [4], [1], [2], [2], [3], [5], [3], [1]]
74 | # {'sport': 1, 'business': 2, 'politics': 3, 'tech': 4, 'entertainment': 5}


--------------------------------------------------------------------------------
/Week 2/Course_3_Week_2_Exercise_Question.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Course 4 - Week 2 - Exercise - Question.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "accelerator": "GPU"
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "code",
 19 |       "metadata": {
 20 |         "id": "gnwiOnGyW5JK",
 21 |         "colab_type": "code",
 22 |         "colab": {
 23 |           "base_uri": "https://localhost:8080/",
 24 |           "height": 204
 25 |         },
 26 |         "outputId": "fb4de731-64ae-4492-da83-ff37930ec2e4"
 27 |       },
 28 |       "source": [
 29 |         "import csv\n",
 30 |         "import tensorflow as tf\n",
 31 |         "import numpy as np\n",
 32 |         "from tensorflow.keras.preprocessing.text import Tokenizer\n",
 33 |         "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
 34 |         "\n",
 35 |         "!wget --no-check-certificate \\\n",
 36 |         "    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \\\n",
 37 |         "    -O /tmp/bbc-text.csv"
 38 |       ],
 39 |       "execution_count": 1,
 40 |       "outputs": [
 41 |         {
 42 |           "output_type": "stream",
 43 |           "text": [
 44 |             "--2019-06-22 11:51:22--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv\n",
 45 |             "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.141.128, 2607:f8b0:400c:c06::80\n",
 46 |             "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.141.128|:443... connected.\n",
 47 |             "HTTP request sent, awaiting response... 200 OK\n",
 48 |             "Length: 5057493 (4.8M) [application/octet-stream]\n",
 49 |             "Saving to: ‘/tmp/bbc-text.csv’\n",
 50 |             "\n",
 51 |             "\r/tmp/bbc-text.csv     0%[                    ]       0  --.-KB/s               \r/tmp/bbc-text.csv   100%[===================>]   4.82M  --.-KB/s    in 0.06s   \n",
 52 |             "\n",
 53 |             "2019-06-22 11:51:23 (82.1 MB/s) - ‘/tmp/bbc-text.csv’ saved [5057493/5057493]\n",
 54 |             "\n"
 55 |           ],
 56 |           "name": "stdout"
 57 |         }
 58 |       ]
 59 |     },
 60 |     {
 61 |       "cell_type": "code",
 62 |       "metadata": {
 63 |         "id": "EYo6A4v5ZABQ",
 64 |         "colab_type": "code",
 65 |         "colab": {}
 66 |       },
 67 |       "source": [
 68 |         "vocab_size = 1000 # YOUR CODE HERE\n",
 69 |         "embedding_dim = 16 # YOUR CODE HERE\n",
 70 |         "max_length = 120 # YOUR CODE HERE\n",
 71 |         "trunc_type = 'post' # YOUR CODE HERE\n",
 72 |         "padding_type = 'post' # YOUR CODE HERE\n",
 73 |         "oov_tok = \"<OOV>\" # YOUR CODE HERE\n",
 74 |         "training_portion = .8"
 75 |       ],
 76 |       "execution_count": 0,
 77 |       "outputs": []
 78 |     },
 79 |     {
 80 |       "cell_type": "code",
 81 |       "metadata": {
 82 |         "id": "iU1qq3_SZBx_",
 83 |         "colab_type": "code",
 84 |         "colab": {
 85 |           "base_uri": "https://localhost:8080/",
 86 |           "height": 34
 87 |         },
 88 |         "outputId": "e791d9ce-619d-4bf4-f2fd-2a1c2e54b8de"
 89 |       },
 90 |       "source": [
 91 |         "sentences = []\n",
 92 |         "labels = []\n",
 93 |         "stopwords = [ \"a\", \"about\", \"above\", \"after\", \"again\", \"against\", \"all\", \"am\", \"an\", \"and\", \"any\", \"are\", \"as\", \"at\", \"be\", \"because\", \"been\", \"before\", \"being\", \"below\", \"between\", \"both\", \"but\", \"by\", \"could\", \"did\", \"do\", \"does\", \"doing\", \"down\", \"during\", \"each\", \"few\", \"for\", \"from\", \"further\", \"had\", \"has\", \"have\", \"having\", \"he\", \"he'd\", \"he'll\", \"he's\", \"her\", \"here\", \"here's\", \"hers\", \"herself\", \"him\", \"himself\", \"his\", \"how\", \"how's\", \"i\", \"i'd\", \"i'll\", \"i'm\", \"i've\", \"if\", \"in\", \"into\", \"is\", \"it\", \"it's\", \"its\", \"itself\", \"let's\", \"me\", \"more\", \"most\", \"my\", \"myself\", \"nor\", \"of\", \"on\", \"once\", \"only\", \"or\", \"other\", \"ought\", \"our\", \"ours\", \"ourselves\", \"out\", \"over\", \"own\", \"same\", \"she\", \"she'd\", \"she'll\", \"she's\", \"should\", \"so\", \"some\", \"such\", \"than\", \"that\", \"that's\", \"the\", \"their\", \"theirs\", \"them\", \"themselves\", \"then\", \"there\", \"there's\", \"these\", \"they\", \"they'd\", \"they'll\", \"they're\", \"they've\", \"this\", \"those\", \"through\", \"to\", \"too\", \"under\", \"until\", \"up\", \"very\", \"was\", \"we\", \"we'd\", \"we'll\", \"we're\", \"we've\", \"were\", \"what\", \"what's\", \"when\", \"when's\", \"where\", \"where's\", \"which\", \"while\", \"who\", \"who's\", \"whom\", \"why\", \"why's\", \"with\", \"would\", \"you\", \"you'd\", \"you'll\", \"you're\", \"you've\", \"your\", \"yours\", \"yourself\", \"yourselves\" ]\n",
 94 |         "print(len(stopwords))\n",
 95 |         "# Expected Output\n",
 96 |         "# 153"
 97 |       ],
 98 |       "execution_count": 16,
 99 |       "outputs": [
100 |         {
101 |           "output_type": "stream",
102 |           "text": [
103 |             "153\n"
104 |           ],
105 |           "name": "stdout"
106 |         }
107 |       ]
108 |     },
109 |     {
110 |       "cell_type": "code",
111 |       "metadata": {
112 |         "id": "eutB2xMiZD0e",
113 |         "colab_type": "code",
114 |         "colab": {
115 |           "base_uri": "https://localhost:8080/",
116 |           "height": 88
117 |         },
118 |         "outputId": "6cf64e1c-035e-40e6-9570-3bdb78f0da5c"
119 |       },
120 |       "source": [
121 |         "with open(\"/tmp/bbc-text.csv\", 'r') as csvfile:\n",
122 |         "    # YOUR CODE HERE\n",
123 |         "    reader = csv.reader(csvfile, delimiter=',')\n",
124 |         "    next(reader)\n",
125 |         "    for row in reader:\n",
126 |         "      labels.append(row[0])\n",
127 |         "      sentence = row[1]\n",
128 |         "      for word in stopwords:\n",
129 |         "        token = \" \" + word + \" \"\n",
130 |         "        sentence = sentence.replace(token, \" \")\n",
131 |         "        sentence = sentence.replace(\"  \", \" \")\n",
132 |         "      sentences.append(sentence)\n",
133 |         "    \n",
134 |         "    \n",
135 |         "print(len(labels))\n",
136 |         "print(len(sentences))\n",
137 |         "print(sentences[0])\n",
138 |         "# Expected Output\n",
139 |         "# 2225\n",
140 |         "# 2225\n",
141 |         "# tv future hands viewers home theatre systems  plasma high-definition tvs  digital video recorders moving living room  way people watch tv will radically different five years  time.  according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend  programmes content will delivered viewers via home networks  cable  satellite  telecoms companies  broadband service providers front rooms portable devices.  one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes  like us s tivo uk s sky+ system  allow people record  store  play  pause forward wind tv programmes want.  essentially  technology allows much personalised tv. also built-in high-definition tv sets  big business japan us  slower take off europe lack high-definition programming. not can people forward wind adverts  can also forget abiding network channel schedules  putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well  brand identity  viewer loyalty channels. although us leads technology moment  also concern raised europe  particularly growing uptake services like sky+.  happens today  will see nine months years  time uk   adam hume  bbc broadcast s futurologist told bbc news website. likes bbc  no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters  brand loyalty important everyone.  will talking content brands rather network brands   said tim hanlon  brand communications firm starcom mediavest.  reality broadband connections  anybody can producer content.  added:  challenge now hard promote programme much choice.   means  said stacey jolna  senior vice president tv guide tv group  way people find content want watch simplified tv viewers. means networks  us terms  channels take leaf google s book search engine future  instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone  panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands  mr hanlon suggested.  end  kids just diapers pushing buttons already - everything possible available   said mr hanlon.  ultimately  consumer will tell market want.   50 000 new gadgets technologies showcased ces  many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built  instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies  directtv  even launched branded dvr show 100-hours recording capability  instant replay  search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo  called tivotogo  means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want  want."
142 |       ],
143 |       "execution_count": 17,
144 |       "outputs": [
145 |         {
146 |           "output_type": "stream",
147 |           "text": [
148 |             "2225\n",
149 |             "2225\n",
150 |             "tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want.\n"
151 |           ],
152 |           "name": "stdout"
153 |         }
154 |       ]
155 |     },
156 |     {
157 |       "cell_type": "code",
158 |       "metadata": {
159 |         "id": "XfdaWh06ZGe3",
160 |         "colab_type": "code",
161 |         "colab": {
162 |           "base_uri": "https://localhost:8080/",
163 |           "height": 102
164 |         },
165 |         "outputId": "b2f27c3e-2cb9-46c7-a1e1-ed91d890578e"
166 |       },
167 |       "source": [
168 |         "train_size = int(len(sentences) * training_portion)  # YOUR CODE HERE\n",
169 |         "\n",
170 |         "train_sentences = sentences[:train_size]  # YOUR CODE HERE\n",
171 |         "train_labels = labels[:train_size] # YOUR CODE HERE\n",
172 |         "\n",
173 |         "validation_sentences = sentences[train_size:] # YOUR CODE HERE\n",
174 |         "validation_labels = labels[train_size:]   # YOUR CODE HERE\n",
175 |         "\n",
176 |         "print(train_size)\n",
177 |         "print(len(train_sentences))\n",
178 |         "print(len(train_labels))\n",
179 |         "print(len(validation_sentences))\n",
180 |         "print(len(validation_labels))\n",
181 |         "\n",
182 |         "# Expected output (if training_portion=.8)\n",
183 |         "# 1780\n",
184 |         "# 1780\n",
185 |         "# 1780\n",
186 |         "# 445\n",
187 |         "# 445"
188 |       ],
189 |       "execution_count": 18,
190 |       "outputs": [
191 |         {
192 |           "output_type": "stream",
193 |           "text": [
194 |             "1780\n",
195 |             "1780\n",
196 |             "1780\n",
197 |             "445\n",
198 |             "445\n"
199 |           ],
200 |           "name": "stdout"
201 |         }
202 |       ]
203 |     },
204 |     {
205 |       "cell_type": "code",
206 |       "metadata": {
207 |         "id": "ULzA8xhwZI22",
208 |         "colab_type": "code",
209 |         "colab": {
210 |           "base_uri": "https://localhost:8080/",
211 |           "height": 119
212 |         },
213 |         "outputId": "7fd933dd-0fee-45ca-fc10-532f51e6a30d"
214 |       },
215 |       "source": [
216 |         "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok) # YOUR CODE HERE\n",
217 |         "tokenizer.fit_on_texts(train_sentences)   #(# YOUR CODE HERE)\n",
218 |         "word_index = tokenizer.word_index # YOUR CODE HERE\n",
219 |         "\n",
220 |         "train_sequences = tokenizer.texts_to_sequences(train_sentences) # YOUR CODE HERE\n",
221 |         "train_padded = pad_sequences(train_sequences, padding = padding_type, maxlen=max_length) # YOUR CODE HERE\n",
222 |         "\n",
223 |         "print(len(train_sequences[0]))\n",
224 |         "print(len(train_padded[0]))\n",
225 |         "\n",
226 |         "print(len(train_sequences[1]))\n",
227 |         "print(len(train_padded[1]))\n",
228 |         "\n",
229 |         "print(len(train_sequences[10]))\n",
230 |         "print(len(train_padded[10]))\n",
231 |         "\n",
232 |         "# Expected Ouput\n",
233 |         "# 449\n",
234 |         "# 120\n",
235 |         "# 200\n",
236 |         "# 120\n",
237 |         "# 192\n",
238 |         "# 120"
239 |       ],
240 |       "execution_count": 19,
241 |       "outputs": [
242 |         {
243 |           "output_type": "stream",
244 |           "text": [
245 |             "449\n",
246 |             "120\n",
247 |             "200\n",
248 |             "120\n",
249 |             "192\n",
250 |             "120\n"
251 |           ],
252 |           "name": "stdout"
253 |         }
254 |       ]
255 |     },
256 |     {
257 |       "cell_type": "code",
258 |       "metadata": {
259 |         "id": "c8PeFWzPZLW_",
260 |         "colab_type": "code",
261 |         "colab": {
262 |           "base_uri": "https://localhost:8080/",
263 |           "height": 51
264 |         },
265 |         "outputId": "320703ad-222b-4e28-cb94-34e438e76a79"
266 |       },
267 |       "source": [
268 |         "validation_sequences = tokenizer.texts_to_sequences(validation_sentences) # YOUR CODE HERE\n",
269 |         "validation_padded = pad_sequences(validation_sequences, padding = padding_type, maxlen=max_length) # YOUR CODE HERE\n",
270 |         "\n",
271 |         "print(len(validation_sequences))\n",
272 |         "print(validation_padded.shape)\n",
273 |         "\n",
274 |         "# Expected output\n",
275 |         "# 445\n",
276 |         "# (445, 120)"
277 |       ],
278 |       "execution_count": 20,
279 |       "outputs": [
280 |         {
281 |           "output_type": "stream",
282 |           "text": [
283 |             "445\n",
284 |             "(445, 120)\n"
285 |           ],
286 |           "name": "stdout"
287 |         }
288 |       ]
289 |     },
290 |     {
291 |       "cell_type": "code",
292 |       "metadata": {
293 |         "id": "XkWiQ_FKZNp2",
294 |         "colab_type": "code",
295 |         "colab": {}
296 |       },
297 |       "source": [
298 |         "label_tokenizer = tokenizer()  # YOUR CODE HERE\n",
299 |         "label_tokenizer.fit_on_texts(labels)  #(# YOUR CODE HERE)\n",
300 |         "\n",
301 |         "training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels))  # YOUR CODE HERE\n",
302 |         "validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels)) # YOUR CODE HERE\n",
303 |         "\n",
304 |         "print(training_label_seq[0])\n",
305 |         "print(training_label_seq[1])\n",
306 |         "print(training_label_seq[2])\n",
307 |         "print(training_label_seq.shape)\n",
308 |         "\n",
309 |         "print(validation_label_seq[0])\n",
310 |         "print(validation_label_seq[1])\n",
311 |         "print(validation_label_seq[2])\n",
312 |         "print(validation_label_seq.shape)\n",
313 |         "\n",
314 |         "# Expected output\n",
315 |         "# [4]\n",
316 |         "# [2]\n",
317 |         "# [1]\n",
318 |         "# (1780, 1)\n",
319 |         "# [5]\n",
320 |         "# [4]\n",
321 |         "# [3]\n",
322 |         "# (445, 1)"
323 |       ],
324 |       "execution_count": 0,
325 |       "outputs": []
326 |     },
327 |     {
328 |       "cell_type": "code",
329 |       "metadata": {
330 |         "id": "HZ5um4MWZP-W",
331 |         "colab_type": "code",
332 |         "colab": {}
333 |       },
334 |       "source": [
335 |         "model = tf.keras.Sequential([\n",
336 |         "# YOUR CODE HERE\n",
337 |         "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), \n",
338 |         "    tf.keras.layers.GlobalAveragePooling(), \n",
339 |         "    tf.keras.layers.Dense(24, activation = 'relu'), \n",
340 |         "    tf.keras.layers.Dense(6, activation = 'sigmoid')\n",
341 |         "    \n",
342 |         "])\n",
343 |         "model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
344 |         "model.summary()\n",
345 |         "\n",
346 |         "# Expected Output\n",
347 |         "# Layer (type)                 Output Shape              Param #   \n",
348 |         "# =================================================================\n",
349 |         "# embedding (Embedding)        (None, 120, 16)           16000     \n",
350 |         "# _________________________________________________________________\n",
351 |         "# global_average_pooling1d (Gl (None, 16)                0         \n",
352 |         "# _________________________________________________________________\n",
353 |         "# dense (Dense)                (None, 24)                408       \n",
354 |         "# _________________________________________________________________\n",
355 |         "# dense_1 (Dense)              (None, 6)                 150       \n",
356 |         "# =================================================================\n",
357 |         "# Total params: 16,558\n",
358 |         "# Trainable params: 16,558\n",
359 |         "# Non-trainable params: 0"
360 |       ],
361 |       "execution_count": 0,
362 |       "outputs": []
363 |     },
364 |     {
365 |       "cell_type": "code",
366 |       "metadata": {
367 |         "id": "XsfdxySKZSXu",
368 |         "colab_type": "code",
369 |         "colab": {}
370 |       },
371 |       "source": [
372 |         "num_epochs = 30\n",
373 |         "history = model.fit(train_padded, train_sequences, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2)      #(# YOUR CODE HERE)"
374 |       ],
375 |       "execution_count": 0,
376 |       "outputs": []
377 |     },
378 |     {
379 |       "cell_type": "code",
380 |       "metadata": {
381 |         "id": "dQ0BX2apXS9u",
382 |         "colab_type": "code",
383 |         "colab": {}
384 |       },
385 |       "source": [
386 |         "import matplotlib.pyplot as plt\n",
387 |         "\n",
388 |         "\n",
389 |         "def plot_graphs(history, string):\n",
390 |         "  plt.plot(history.history[string])\n",
391 |         "  plt.plot(history.history['val_'+string])\n",
392 |         "  plt.xlabel(\"Epochs\")\n",
393 |         "  plt.ylabel(string)\n",
394 |         "  plt.legend([string, 'val_'+string])\n",
395 |         "  plt.show()\n",
396 |         "  \n",
397 |         "plot_graphs(history, \"acc\")\n",
398 |         "plot_graphs(history, \"loss\")"
399 |       ],
400 |       "execution_count": 0,
401 |       "outputs": []
402 |     },
403 |     {
404 |       "cell_type": "code",
405 |       "metadata": {
406 |         "id": "w7Xc-uWxXhML",
407 |         "colab_type": "code",
408 |         "colab": {}
409 |       },
410 |       "source": [
411 |         "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n",
412 |         "\n",
413 |         "def decode_sentence(text):\n",
414 |         "    return ' '.join([reverse_word_index.get(i, '?') for i in text])\n"
415 |       ],
416 |       "execution_count": 0,
417 |       "outputs": []
418 |     },
419 |     {
420 |       "cell_type": "code",
421 |       "metadata": {
422 |         "id": "OhnFA_TDXrih",
423 |         "colab_type": "code",
424 |         "colab": {}
425 |       },
426 |       "source": [
427 |         "e = model.layers[0]\n",
428 |         "weights = e.get_weights()[0]\n",
429 |         "print(weights.shape) # shape: (vocab_size, embedding_dim)\n",
430 |         "\n",
431 |         "# Expected output\n",
432 |         "# (1000, 16)"
433 |       ],
434 |       "execution_count": 0,
435 |       "outputs": []
436 |     },
437 |     {
438 |       "cell_type": "code",
439 |       "metadata": {
440 |         "id": "_POzcWWAXudL",
441 |         "colab_type": "code",
442 |         "colab": {}
443 |       },
444 |       "source": [
445 |         "import io\n",
446 |         "\n",
447 |         "out_v = io.open('vecs.tsv', 'w', encoding='utf-8')\n",
448 |         "out_m = io.open('meta.tsv', 'w', encoding='utf-8')\n",
449 |         "for word_num in range(1, vocab_size):\n",
450 |         "  word = reverse_word_index[word_num]\n",
451 |         "  embeddings = weights[word_num]\n",
452 |         "  out_m.write(word + \"\\n\")\n",
453 |         "  out_v.write('\\t'.join([str(x) for x in embeddings]) + \"\\n\")\n",
454 |         "out_v.close()\n",
455 |         "out_m.close()"
456 |       ],
457 |       "execution_count": 0,
458 |       "outputs": []
459 |     },
460 |     {
461 |       "cell_type": "code",
462 |       "metadata": {
463 |         "id": "VmqpQMZ_XyOa",
464 |         "colab_type": "code",
465 |         "colab": {}
466 |       },
467 |       "source": [
468 |         "try:\n",
469 |         "  from google.colab import files\n",
470 |         "except ImportError:\n",
471 |         "  pass\n",
472 |         "else:\n",
473 |         "  files.download('vecs.tsv')\n",
474 |         "  files.download('meta.tsv')"
475 |       ],
476 |       "execution_count": 0,
477 |       "outputs": []
478 |     }
479 |   ]
480 | }


--------------------------------------------------------------------------------
/Week 2/Quiz 2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/07Agarg/Natural-Language-Processing-In-Tensorflow-Course/46eb21e25f73fd8644a95e64696d64dd4843e1e8/Week 2/Quiz 2.pdf


--------------------------------------------------------------------------------
/Week 2/course_3_week_2_exercise_question.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Course 4 - Week 2 - Exercise - Question.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%202%20-%20Exercise%20-%20Question.ipynb
  8 | """
  9 | 
 10 | import csv
 11 | import tensorflow as tf
 12 | import numpy as np
 13 | from tensorflow.keras.preprocessing.text import Tokenizer
 14 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 15 | '''
 16 | !wget --no-check-certificate \
 17 |     https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \
 18 |     -O /tmp/bbc-text.csv
 19 | '''
 20 | vocab_size = 1000 # YOUR CODE HERE
 21 | embedding_dim = 16 # YOUR CODE HERE
 22 | max_length = 120 # YOUR CODE HERE
 23 | trunc_type = 'post' # YOUR CODE HERE
 24 | padding_type = 'post' # YOUR CODE HERE
 25 | oov_tok = "<OOV>" # YOUR CODE HERE
 26 | training_portion = .8
 27 | 
 28 | sentences = []
 29 | labels = []
 30 | stopwords = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" ]
 31 | print(len(stopwords))
 32 | # Expected Output
 33 | # 153
 34 | 
 35 | with open("bbc-text.csv", 'r') as csvfile:
 36 |     # YOUR CODE HERE
 37 |     reader = csv.reader(csvfile, delimiter=',')
 38 |     next(reader)
 39 |     for row in reader:
 40 |       labels.append(row[0])
 41 |       sentence = row[1]
 42 |       for word in stopwords:
 43 |         token = " " + word + " "
 44 |         sentence = sentence.replace(token, " ")
 45 |         sentence = sentence.replace("  ", " ")
 46 |       sentences.append(sentence)
 47 |     
 48 |     
 49 | print(len(labels))
 50 | print(len(sentences))
 51 | print(sentences[0])
 52 | # Expected Output
 53 | # 2225
 54 | # 2225
 55 | # tv future hands viewers home theatre systems  plasma high-definition tvs  digital video recorders moving living room  way people watch tv will radically different five years  time.  according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend  programmes content will delivered viewers via home networks  cable  satellite  telecoms companies  broadband service providers front rooms portable devices.  one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes  like us s tivo uk s sky+ system  allow people record  store  play  pause forward wind tv programmes want.  essentially  technology allows much personalised tv. also built-in high-definition tv sets  big business japan us  slower take off europe lack high-definition programming. not can people forward wind adverts  can also forget abiding network channel schedules  putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well  brand identity  viewer loyalty channels. although us leads technology moment  also concern raised europe  particularly growing uptake services like sky+.  happens today  will see nine months years  time uk   adam hume  bbc broadcast s futurologist told bbc news website. likes bbc  no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters  brand loyalty important everyone.  will talking content brands rather network brands   said tim hanlon  brand communications firm starcom mediavest.  reality broadband connections  anybody can producer content.  added:  challenge now hard promote programme much choice.   means  said stacey jolna  senior vice president tv guide tv group  way people find content want watch simplified tv viewers. means networks  us terms  channels take leaf google s book search engine future  instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone  panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands  mr hanlon suggested.  end  kids just diapers pushing buttons already - everything possible available   said mr hanlon.  ultimately  consumer will tell market want.   50 000 new gadgets technologies showcased ces  many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built  instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies  directtv  even launched branded dvr show 100-hours recording capability  instant replay  search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo  called tivotogo  means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want  want.
 56 | 
 57 | train_size = int(len(sentences) * training_portion)  # YOUR CODE HERE
 58 | 
 59 | train_sentences = sentences[:train_size]  # YOUR CODE HERE
 60 | train_labels = labels[:train_size] # YOUR CODE HERE
 61 | 
 62 | validation_sentences = sentences[train_size:] # YOUR CODE HERE
 63 | validation_labels = labels[train_size:]   # YOUR CODE HERE
 64 | 
 65 | print(train_size)
 66 | print(len(train_sentences))
 67 | print(len(train_labels))
 68 | print(len(validation_sentences))
 69 | print(len(validation_labels))
 70 | 
 71 | # Expected output (if training_portion=.8)
 72 | # 1780
 73 | # 1780
 74 | # 1780
 75 | # 445
 76 | # 445
 77 | 
 78 | tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok) # YOUR CODE HERE
 79 | tokenizer.fit_on_texts(train_sentences)   #(# YOUR CODE HERE)
 80 | word_index = tokenizer.word_index # YOUR CODE HERE
 81 | 
 82 | train_sequences = tokenizer.texts_to_sequences(train_sentences) # YOUR CODE HERE
 83 | train_padded = pad_sequences(train_sequences, padding = padding_type, maxlen=max_length) # YOUR CODE HERE
 84 | 
 85 | print(len(train_sequences[0]))
 86 | print(len(train_padded[0]))
 87 | 
 88 | print(len(train_sequences[1]))
 89 | print(len(train_padded[1]))
 90 | 
 91 | print(len(train_sequences[10]))
 92 | print(len(train_padded[10]))
 93 | 
 94 | # Expected Ouput
 95 | # 449
 96 | # 120
 97 | # 200
 98 | # 120
 99 | # 192
100 | # 120
101 | 
102 | validation_sequences = tokenizer.texts_to_sequences(validation_sentences) # YOUR CODE HERE
103 | validation_padded = pad_sequences(validation_sequences, padding = padding_type, maxlen=max_length) # YOUR CODE HERE
104 | 
105 | print(len(validation_sequences))
106 | print(validation_padded.shape)
107 | 
108 | # Expected output
109 | # 445
110 | # (445, 120)
111 | 
112 | label_tokenizer = Tokenizer()  # YOUR CODE HERE
113 | label_tokenizer.fit_on_texts(labels)  #(# YOUR CODE HERE)
114 | 
115 | training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels))  # YOUR CODE HERE
116 | validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels)) # YOUR CODE HERE
117 | 
118 | print(training_label_seq[0])
119 | print(training_label_seq[1])
120 | print(training_label_seq[2])
121 | print(training_label_seq.shape)
122 | 
123 | print(validation_label_seq[0])
124 | print(validation_label_seq[1])
125 | print(validation_label_seq[2])
126 | print(validation_label_seq.shape)
127 | 
128 | # Expected output
129 | # [4]
130 | # [2]
131 | # [1]
132 | # (1780, 1)
133 | # [5]
134 | # [4]
135 | # [3]
136 | # (445, 1)
137 | 
138 | model = tf.keras.Sequential([
139 | # YOUR CODE HERE
140 |     tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), 
141 |     tf.keras.layers.GlobalAveragePooling1D(), 
142 |     tf.keras.layers.Dense(24, activation = 'relu'), 
143 |     tf.keras.layers.Dense(6, activation = 'sigmoid')
144 |     
145 | ])
146 | model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
147 | model.summary()
148 | 
149 | # Expected Output
150 | # Layer (type)                 Output Shape              Param #   
151 | # =================================================================
152 | # embedding (Embedding)        (None, 120, 16)           16000     
153 | # _________________________________________________________________
154 | # global_average_pooling1d (Gl (None, 16)                0         
155 | # _________________________________________________________________
156 | # dense (Dense)                (None, 24)                408       
157 | # _________________________________________________________________
158 | # dense_1 (Dense)              (None, 6)                 150       
159 | # =================================================================
160 | # Total params: 16,558
161 | # Trainable params: 16,558
162 | # Non-trainable params: 0
163 | 
164 | num_epochs = 30
165 | history = model.fit(train_padded, train_sequences, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2)      #(# YOUR CODE HERE)
166 | 
167 | import matplotlib.pyplot as plt
168 | 
169 | 
170 | def plot_graphs(history, string):
171 |   plt.plot(history.history[string])
172 |   plt.plot(history.history['val_'+string])
173 |   plt.xlabel("Epochs")
174 |   plt.ylabel(string)
175 |   plt.legend([string, 'val_'+string])
176 |   plt.show()
177 |   
178 | plot_graphs(history, "acc")
179 | plot_graphs(history, "loss")
180 | 
181 | reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
182 | 
183 | def decode_sentence(text):
184 |     return ' '.join([reverse_word_index.get(i, '?') for i in text])
185 | 
186 | e = model.layers[0]
187 | weights = e.get_weights()[0]
188 | print(weights.shape) # shape: (vocab_size, embedding_dim)
189 | 
190 | # Expected output
191 | # (1000, 16)
192 | 
193 | import io
194 | 
195 | out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
196 | out_m = io.open('meta.tsv', 'w', encoding='utf-8')
197 | for word_num in range(1, vocab_size):
198 |   word = reverse_word_index[word_num]
199 |   embeddings = weights[word_num]
200 |   out_m.write(word + "\n")
201 |   out_v.write('\t'.join([str(x) for x in embeddings]) + "\n")
202 | out_v.close()
203 | out_m.close()
204 | 
205 | try:
206 |   from google.colab import files
207 | except ImportError:
208 |   pass
209 | else:
210 |   files.download('vecs.tsv')
211 |   files.download('meta.tsv')


--------------------------------------------------------------------------------
/Week 2/course_3_week_2_lesson_1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Course 3 - Week 2 - Lesson 1.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%202%20-%20Lesson%201.ipynb
  8 | """
  9 | 
 10 | # NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT
 11 | 
 12 | import tensorflow as tf
 13 | print(tf.__version__)
 14 | 
 15 | # This is needed for the iterator over the data
 16 | # But not necessary if you have TF 2.0 installed
 17 | #!pip install tensorflow==2.0.0-beta0
 18 | 
 19 | 
 20 | tf.enable_eager_execution()
 21 | 
 22 | # !pip install -q tensorflow-datasets
 23 | 
 24 | import tensorflow_datasets as tfds
 25 | imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)
 26 | 
 27 | import numpy as np
 28 | 
 29 | train_data, test_data = imdb['train'], imdb['test']
 30 | 
 31 | training_sentences = []
 32 | training_labels = []
 33 | 
 34 | testing_sentences = []
 35 | testing_labels = []
 36 | 
 37 | # str(s.tonumpy()) is needed in Python3 instead of just s.numpy()
 38 | for s,l in train_data:
 39 |   training_sentences.append(str(s.numpy()))
 40 |   training_labels.append(l.numpy())
 41 |   
 42 | for s,l in test_data:
 43 |   testing_sentences.append(str(s.numpy()))
 44 |   testing_labels.append(l.numpy())
 45 |   
 46 | training_labels_final = np.array(training_labels)
 47 | testing_labels_final = np.array(testing_labels)
 48 | 
 49 | vocab_size = 10000
 50 | embedding_dim = 16
 51 | max_length = 120
 52 | trunc_type='post'
 53 | oov_tok = "<OOV>"
 54 | 
 55 | 
 56 | from tensorflow.keras.preprocessing.text import Tokenizer
 57 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 58 | 
 59 | tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
 60 | tokenizer.fit_on_texts(training_sentences)
 61 | word_index = tokenizer.word_index
 62 | sequences = tokenizer.texts_to_sequences(training_sentences)
 63 | padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)
 64 | 
 65 | testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
 66 | testing_padded = pad_sequences(testing_sequences,maxlen=max_length)
 67 | 
 68 | reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
 69 | 
 70 | def decode_review(text):
 71 |     return ' '.join([reverse_word_index.get(i, '?') for i in text])
 72 | 
 73 | print(decode_review(padded[1]))
 74 | print(training_sentences[1])
 75 | 
 76 | model = tf.keras.Sequential([
 77 |     tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
 78 |     tf.keras.layers.Flatten(),
 79 |     tf.keras.layers.Dense(6, activation='relu'),
 80 |     tf.keras.layers.Dense(1, activation='sigmoid')
 81 | ])
 82 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
 83 | model.summary()
 84 | 
 85 | num_epochs = 10
 86 | model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))
 87 | 
 88 | e = model.layers[0]
 89 | weights = e.get_weights()[0]
 90 | print(weights.shape) # shape: (vocab_size, embedding_dim)
 91 | 
 92 | import io
 93 | 
 94 | out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
 95 | out_m = io.open('meta.tsv', 'w', encoding='utf-8')
 96 | for word_num in range(1, vocab_size):
 97 |   word = reverse_word_index[word_num]
 98 |   embeddings = weights[word_num]
 99 |   out_m.write(word + "\n")
100 |   out_v.write('\t'.join([str(x) for x in embeddings]) + "\n")
101 | out_v.close()
102 | out_m.close()
103 | 
104 | try:
105 |   from google.colab import files
106 | except ImportError:
107 |   pass
108 | else:
109 |   files.download('vecs.tsv')
110 |   files.download('meta.tsv')
111 | 
112 | sentence = "I really think this is amazing. honest."
113 | sequence = tokenizer.texts_to_sequences(sentence)
114 | print(sequence)


--------------------------------------------------------------------------------
/Week 3/Course_3_Week_3_Lesson_1a.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Course 4 - Week 3 - Lesson 1a.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "metadata": {
 20 |         "id": "rFiCyWQ-NC5D",
 21 |         "colab_type": "text"
 22 |       },
 23 |       "source": [
 24 |         "# Single Layer LSTM"
 25 |       ]
 26 |     },
 27 |     {
 28 |       "cell_type": "code",
 29 |       "metadata": {
 30 |         "id": "Y20Lud2ZMBhW",
 31 |         "colab_type": "code",
 32 |         "colab": {}
 33 |       },
 34 |       "source": [
 35 |         "from __future__ import absolute_import, division, print_function, unicode_literals\n",
 36 |         "\n",
 37 |         "\n",
 38 |         "import tensorflow_datasets as tfds\n",
 39 |         "import tensorflow as tf\n",
 40 |         "print(tf.__version__)"
 41 |       ],
 42 |       "execution_count": 0,
 43 |       "outputs": []
 44 |     },
 45 |     {
 46 |       "cell_type": "code",
 47 |       "metadata": {
 48 |         "id": "Aclov8NVMJml",
 49 |         "colab_type": "code",
 50 |         "colab": {}
 51 |       },
 52 |       "source": [
 53 |         "# If the tf.__version__ is 1.x, please run this cell\n",
 54 |         "#!pip install tensorflow==2.0.0-beta0"
 55 |       ],
 56 |       "execution_count": 0,
 57 |       "outputs": []
 58 |     },
 59 |     {
 60 |       "cell_type": "code",
 61 |       "metadata": {
 62 |         "id": "uAU8g7C0MPZE",
 63 |         "colab_type": "code",
 64 |         "colab": {}
 65 |       },
 66 |       "source": [
 67 |         "import tensorflow_datasets as tfds\n",
 68 |         "import tensorflow as tf\n",
 69 |         "print(tf.__version__)"
 70 |       ],
 71 |       "execution_count": 0,
 72 |       "outputs": []
 73 |     },
 74 |     {
 75 |       "cell_type": "code",
 76 |       "metadata": {
 77 |         "id": "AW-4Vo4TMUHb",
 78 |         "colab_type": "code",
 79 |         "colab": {}
 80 |       },
 81 |       "source": [
 82 |         "# Get the data\n",
 83 |         "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n",
 84 |         "train_dataset, test_dataset = dataset['train'], dataset['test']"
 85 |       ],
 86 |       "execution_count": 0,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "metadata": {
 92 |         "id": "DVfhKpHsPOxq",
 93 |         "colab_type": "code",
 94 |         "colab": {}
 95 |       },
 96 |       "source": [
 97 |         "tokenizer = info.features['text'].encoder"
 98 |       ],
 99 |       "execution_count": 0,
100 |       "outputs": []
101 |     },
102 |     {
103 |       "cell_type": "code",
104 |       "metadata": {
105 |         "id": "ffvRUI0_McDS",
106 |         "colab_type": "code",
107 |         "colab": {}
108 |       },
109 |       "source": [
110 |         "BUFFER_SIZE = 10000\n",
111 |         "BATCH_SIZE = 64\n",
112 |         "\n",
113 |         "train_dataset = train_dataset.shuffle(BUFFER_SIZE)\n",
114 |         "train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)\n",
115 |         "test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)"
116 |       ],
117 |       "execution_count": 0,
118 |       "outputs": []
119 |     },
120 |     {
121 |       "cell_type": "code",
122 |       "metadata": {
123 |         "id": "FxQooMEkMgur",
124 |         "colab_type": "code",
125 |         "colab": {}
126 |       },
127 |       "source": [
128 |         "model = tf.keras.Sequential([\n",
129 |         "    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),\n",
130 |         "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),\n",
131 |         "    tf.keras.layers.Dense(64, activation='relu'),\n",
132 |         "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
133 |         "])"
134 |       ],
135 |       "execution_count": 0,
136 |       "outputs": []
137 |     },
138 |     {
139 |       "cell_type": "code",
140 |       "metadata": {
141 |         "id": "QKI5dfPgMioL",
142 |         "colab_type": "code",
143 |         "colab": {}
144 |       },
145 |       "source": [
146 |         "model.summary()"
147 |       ],
148 |       "execution_count": 0,
149 |       "outputs": []
150 |     },
151 |     {
152 |       "cell_type": "code",
153 |       "metadata": {
154 |         "id": "Uip7QOVzMoMq",
155 |         "colab_type": "code",
156 |         "colab": {}
157 |       },
158 |       "source": [
159 |         "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])"
160 |       ],
161 |       "execution_count": 0,
162 |       "outputs": []
163 |     },
164 |     {
165 |       "cell_type": "code",
166 |       "metadata": {
167 |         "id": "7mlgzaRDMtF6",
168 |         "colab_type": "code",
169 |         "colab": {}
170 |       },
171 |       "source": [
172 |         "NUM_EPOCHS = 10\n",
173 |         "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)"
174 |       ],
175 |       "execution_count": 0,
176 |       "outputs": []
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "metadata": {
181 |         "colab_type": "code",
182 |         "id": "Mp1Z7P9pYRSK",
183 |         "colab": {}
184 |       },
185 |       "source": [
186 |         "import matplotlib.pyplot as plt\n",
187 |         "\n",
188 |         "\n",
189 |         "def plot_graphs(history, string):\n",
190 |         "  plt.plot(history.history[string])\n",
191 |         "  plt.plot(history.history['val_'+string])\n",
192 |         "  plt.xlabel(\"Epochs\")\n",
193 |         "  plt.ylabel(string)\n",
194 |         "  plt.legend([string, 'val_'+string])\n",
195 |         "  plt.show()"
196 |       ],
197 |       "execution_count": 0,
198 |       "outputs": []
199 |     },
200 |     {
201 |       "cell_type": "code",
202 |       "metadata": {
203 |         "id": "R_sX6ilIM515",
204 |         "colab_type": "code",
205 |         "colab": {}
206 |       },
207 |       "source": [
208 |         "plot_graphs(history, 'accuracy')"
209 |       ],
210 |       "execution_count": 0,
211 |       "outputs": []
212 |     },
213 |     {
214 |       "cell_type": "code",
215 |       "metadata": {
216 |         "id": "RFEXtKtqNARB",
217 |         "colab_type": "code",
218 |         "colab": {}
219 |       },
220 |       "source": [
221 |         "plot_graphs(history, 'loss')"
222 |       ],
223 |       "execution_count": 0,
224 |       "outputs": []
225 |     }
226 |   ]
227 | }


--------------------------------------------------------------------------------
/Week 3/Course_3_Week_3_Lesson_1b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Course 4 - Week 3 - Lesson 1b.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "rFiCyWQ-NC5D",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "# Multiple Layer LSTM"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "metadata": {
 31 |         "id": "Y20Lud2ZMBhW",
 32 |         "colab_type": "code",
 33 |         "colab": {}
 34 |       },
 35 |       "source": [
 36 |         "from __future__ import absolute_import, division, print_function, unicode_literals\n",
 37 |         "\n",
 38 |         "\n",
 39 |         "import tensorflow_datasets as tfds\n",
 40 |         "import tensorflow as tf\n",
 41 |         "print(tf.__version__)"
 42 |       ],
 43 |       "execution_count": 0,
 44 |       "outputs": []
 45 |     },
 46 |     {
 47 |       "cell_type": "code",
 48 |       "metadata": {
 49 |         "id": "Aclov8NVMJml",
 50 |         "colab_type": "code",
 51 |         "colab": {}
 52 |       },
 53 |       "source": [
 54 |         "# If the tf.__version__ is 1.x, please run this cell\n",
 55 |         "!pip install tensorflow==2.0.0-beta0"
 56 |       ],
 57 |       "execution_count": 0,
 58 |       "outputs": []
 59 |     },
 60 |     {
 61 |       "cell_type": "code",
 62 |       "metadata": {
 63 |         "id": "uAU8g7C0MPZE",
 64 |         "colab_type": "code",
 65 |         "colab": {}
 66 |       },
 67 |       "source": [
 68 |         "import tensorflow_datasets as tfds\n",
 69 |         "import tensorflow as tf\n",
 70 |         "print(tf.__version__)"
 71 |       ],
 72 |       "execution_count": 0,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "metadata": {
 78 |         "id": "AW-4Vo4TMUHb",
 79 |         "colab_type": "code",
 80 |         "colab": {}
 81 |       },
 82 |       "source": [
 83 |         "# Get the data\n",
 84 |         "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n",
 85 |         "train_dataset, test_dataset = dataset['train'], dataset['test']\n"
 86 |       ],
 87 |       "execution_count": 0,
 88 |       "outputs": []
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "metadata": {
 93 |         "id": "L11bIR6-PKvs",
 94 |         "colab_type": "code",
 95 |         "colab": {}
 96 |       },
 97 |       "source": [
 98 |         "tokenizer = info.features['text'].encoder"
 99 |       ],
100 |       "execution_count": 0,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "metadata": {
106 |         "id": "ffvRUI0_McDS",
107 |         "colab_type": "code",
108 |         "colab": {}
109 |       },
110 |       "source": [
111 |         "BUFFER_SIZE = 10000\n",
112 |         "BATCH_SIZE = 64\n",
113 |         "\n",
114 |         "train_dataset = train_dataset.shuffle(BUFFER_SIZE)\n",
115 |         "train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)\n",
116 |         "test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)"
117 |       ],
118 |       "execution_count": 0,
119 |       "outputs": []
120 |     },
121 |     {
122 |       "cell_type": "code",
123 |       "metadata": {
124 |         "colab_type": "code",
125 |         "id": "jo1jjO3vn0jo",
126 |         "colab": {}
127 |       },
128 |       "source": [
129 |         "model = tf.keras.Sequential([\n",
130 |         "    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),\n",
131 |         "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),\n",
132 |         "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n",
133 |         "    tf.keras.layers.Dense(64, activation='relu'),\n",
134 |         "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
135 |         "])"
136 |       ],
137 |       "execution_count": 0,
138 |       "outputs": []
139 |     },
140 |     {
141 |       "cell_type": "code",
142 |       "metadata": {
143 |         "id": "QKI5dfPgMioL",
144 |         "colab_type": "code",
145 |         "colab": {}
146 |       },
147 |       "source": [
148 |         "model.summary()"
149 |       ],
150 |       "execution_count": 0,
151 |       "outputs": []
152 |     },
153 |     {
154 |       "cell_type": "code",
155 |       "metadata": {
156 |         "id": "Uip7QOVzMoMq",
157 |         "colab_type": "code",
158 |         "colab": {}
159 |       },
160 |       "source": [
161 |         "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])"
162 |       ],
163 |       "execution_count": 0,
164 |       "outputs": []
165 |     },
166 |     {
167 |       "cell_type": "code",
168 |       "metadata": {
169 |         "id": "7mlgzaRDMtF6",
170 |         "colab_type": "code",
171 |         "colab": {}
172 |       },
173 |       "source": [
174 |         "NUM_EPOCHS = 10\n",
175 |         "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)"
176 |       ],
177 |       "execution_count": 0,
178 |       "outputs": []
179 |     },
180 |     {
181 |       "cell_type": "code",
182 |       "metadata": {
183 |         "colab_type": "code",
184 |         "id": "Mp1Z7P9pYRSK",
185 |         "colab": {}
186 |       },
187 |       "source": [
188 |         "import matplotlib.pyplot as plt\n",
189 |         "\n",
190 |         "\n",
191 |         "def plot_graphs(history, string):\n",
192 |         "  plt.plot(history.history[string])\n",
193 |         "  plt.plot(history.history['val_'+string])\n",
194 |         "  plt.xlabel(\"Epochs\")\n",
195 |         "  plt.ylabel(string)\n",
196 |         "  plt.legend([string, 'val_'+string])\n",
197 |         "  plt.show()"
198 |       ],
199 |       "execution_count": 0,
200 |       "outputs": []
201 |     },
202 |     {
203 |       "cell_type": "code",
204 |       "metadata": {
205 |         "id": "R_sX6ilIM515",
206 |         "colab_type": "code",
207 |         "colab": {}
208 |       },
209 |       "source": [
210 |         "plot_graphs(history, 'accuracy')"
211 |       ],
212 |       "execution_count": 0,
213 |       "outputs": []
214 |     },
215 |     {
216 |       "cell_type": "code",
217 |       "metadata": {
218 |         "id": "RFEXtKtqNARB",
219 |         "colab_type": "code",
220 |         "colab": {}
221 |       },
222 |       "source": [
223 |         "plot_graphs(history, 'loss')"
224 |       ],
225 |       "execution_count": 0,
226 |       "outputs": []
227 |     }
228 |   ]
229 | }


--------------------------------------------------------------------------------
/Week 3/Course_3_Week_3_Lesson_1c.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Course 4 - Week 3 - Lesson 1c.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "rFiCyWQ-NC5D",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "# Multiple Layer GRU"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "metadata": {
 31 |         "id": "Y20Lud2ZMBhW",
 32 |         "colab_type": "code",
 33 |         "colab": {}
 34 |       },
 35 |       "source": [
 36 |         "from __future__ import absolute_import, division, print_function, unicode_literals\n",
 37 |         "\n",
 38 |         "\n",
 39 |         "import tensorflow_datasets as tfds\n",
 40 |         "import tensorflow as tf\n",
 41 |         "print(tf.__version__)"
 42 |       ],
 43 |       "execution_count": 0,
 44 |       "outputs": []
 45 |     },
 46 |     {
 47 |       "cell_type": "code",
 48 |       "metadata": {
 49 |         "id": "Aclov8NVMJml",
 50 |         "colab_type": "code",
 51 |         "colab": {}
 52 |       },
 53 |       "source": [
 54 |         "# If the tf.__version__ is 1.x, please run this cell\n",
 55 |         "!pip install tensorflow==2.0.0-beta0"
 56 |       ],
 57 |       "execution_count": 0,
 58 |       "outputs": []
 59 |     },
 60 |     {
 61 |       "cell_type": "code",
 62 |       "metadata": {
 63 |         "id": "uAU8g7C0MPZE",
 64 |         "colab_type": "code",
 65 |         "colab": {}
 66 |       },
 67 |       "source": [
 68 |         "import tensorflow_datasets as tfds\n",
 69 |         "import tensorflow as tf\n",
 70 |         "print(tf.__version__)"
 71 |       ],
 72 |       "execution_count": 0,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "metadata": {
 78 |         "id": "-svP7Gy_jOiI",
 79 |         "colab_type": "code",
 80 |         "colab": {}
 81 |       },
 82 |       "source": [
 83 |         ""
 84 |       ],
 85 |       "execution_count": 0,
 86 |       "outputs": []
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "metadata": {
 91 |         "id": "AW-4Vo4TMUHb",
 92 |         "colab_type": "code",
 93 |         "colab": {}
 94 |       },
 95 |       "source": [
 96 |         "# Get the data\n",
 97 |         "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n",
 98 |         "train_dataset, test_dataset = dataset['train'], dataset['test']\n"
 99 |       ],
100 |       "execution_count": 0,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "metadata": {
106 |         "id": "L11bIR6-PKvs",
107 |         "colab_type": "code",
108 |         "colab": {}
109 |       },
110 |       "source": [
111 |         "tokenizer = info.features['text'].encoder"
112 |       ],
113 |       "execution_count": 0,
114 |       "outputs": []
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "metadata": {
119 |         "id": "ffvRUI0_McDS",
120 |         "colab_type": "code",
121 |         "colab": {}
122 |       },
123 |       "source": [
124 |         "BUFFER_SIZE = 10000\n",
125 |         "BATCH_SIZE = 64\n",
126 |         "\n",
127 |         "train_dataset = train_dataset.shuffle(BUFFER_SIZE)\n",
128 |         "train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)\n",
129 |         "test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)"
130 |       ],
131 |       "execution_count": 0,
132 |       "outputs": []
133 |     },
134 |     {
135 |       "cell_type": "code",
136 |       "metadata": {
137 |         "colab_type": "code",
138 |         "id": "jo1jjO3vn0jo",
139 |         "colab": {}
140 |       },
141 |       "source": [
142 |         "model = tf.keras.Sequential([\n",
143 |         "    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),\n",
144 |         "    tf.keras.layers.Conv1D(128, 5, activation='relu'),\n",
145 |         "    tf.keras.layers.GlobalAveragePooling1D(),\n",
146 |         "    tf.keras.layers.Dense(64, activation='relu'),\n",
147 |         "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
148 |         "])"
149 |       ],
150 |       "execution_count": 0,
151 |       "outputs": []
152 |     },
153 |     {
154 |       "cell_type": "code",
155 |       "metadata": {
156 |         "id": "QKI5dfPgMioL",
157 |         "colab_type": "code",
158 |         "colab": {}
159 |       },
160 |       "source": [
161 |         "model.summary()"
162 |       ],
163 |       "execution_count": 0,
164 |       "outputs": []
165 |     },
166 |     {
167 |       "cell_type": "code",
168 |       "metadata": {
169 |         "id": "Uip7QOVzMoMq",
170 |         "colab_type": "code",
171 |         "colab": {}
172 |       },
173 |       "source": [
174 |         "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])"
175 |       ],
176 |       "execution_count": 0,
177 |       "outputs": []
178 |     },
179 |     {
180 |       "cell_type": "code",
181 |       "metadata": {
182 |         "id": "7mlgzaRDMtF6",
183 |         "colab_type": "code",
184 |         "colab": {}
185 |       },
186 |       "source": [
187 |         "NUM_EPOCHS = 10\n",
188 |         "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)"
189 |       ],
190 |       "execution_count": 0,
191 |       "outputs": []
192 |     },
193 |     {
194 |       "cell_type": "code",
195 |       "metadata": {
196 |         "colab_type": "code",
197 |         "id": "Mp1Z7P9pYRSK",
198 |         "colab": {}
199 |       },
200 |       "source": [
201 |         "import matplotlib.pyplot as plt\n",
202 |         "\n",
203 |         "\n",
204 |         "def plot_graphs(history, string):\n",
205 |         "  plt.plot(history.history[string])\n",
206 |         "  plt.plot(history.history['val_'+string])\n",
207 |         "  plt.xlabel(\"Epochs\")\n",
208 |         "  plt.ylabel(string)\n",
209 |         "  plt.legend([string, 'val_'+string])\n",
210 |         "  plt.show()"
211 |       ],
212 |       "execution_count": 0,
213 |       "outputs": []
214 |     },
215 |     {
216 |       "cell_type": "code",
217 |       "metadata": {
218 |         "id": "R_sX6ilIM515",
219 |         "colab_type": "code",
220 |         "colab": {}
221 |       },
222 |       "source": [
223 |         "plot_graphs(history, 'accuracy')"
224 |       ],
225 |       "execution_count": 0,
226 |       "outputs": []
227 |     },
228 |     {
229 |       "cell_type": "code",
230 |       "metadata": {
231 |         "id": "RFEXtKtqNARB",
232 |         "colab_type": "code",
233 |         "colab": {}
234 |       },
235 |       "source": [
236 |         "plot_graphs(history, 'loss')"
237 |       ],
238 |       "execution_count": 0,
239 |       "outputs": []
240 |     }
241 |   ]
242 | }


--------------------------------------------------------------------------------
/Week 3/Course_3_Week_3_Lesson_2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Course 3 - Week 3 - Lesson 2.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "id": "jGwXGIXvFhXW",
 22 |         "colab_type": "code",
 23 |         "colab": {}
 24 |       },
 25 |       "source": [
 26 |         "import json\n",
 27 |         "import tensorflow as tf\n",
 28 |         "\n",
 29 |         "from tensorflow.keras.preprocessing.text import Tokenizer\n",
 30 |         "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
 31 |         "\n",
 32 |         "!wget --no-check-certificate \\\n",
 33 |         "    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \\\n",
 34 |         "    -O /tmp/sarcasm.json\n",
 35 |         "\n",
 36 |         "vocab_size = 1000\n",
 37 |         "embedding_dim = 16\n",
 38 |         "max_length = 120\n",
 39 |         "trunc_type='post'\n",
 40 |         "padding_type='post'\n",
 41 |         "oov_tok = \"<OOV>\"\n",
 42 |         "training_size = 20000\n",
 43 |         "\n",
 44 |         "\n",
 45 |         "with open(\"/tmp/sarcasm.json\", 'r') as f:\n",
 46 |         "    datastore = json.load(f)\n",
 47 |         "\n",
 48 |         "\n",
 49 |         "sentences = []\n",
 50 |         "labels = []\n",
 51 |         "urls = []\n",
 52 |         "for item in datastore:\n",
 53 |         "    sentences.append(item['headline'])\n",
 54 |         "    labels.append(item['is_sarcastic'])\n",
 55 |         "\n",
 56 |         "training_sentences = sentences[0:training_size]\n",
 57 |         "testing_sentences = sentences[training_size:]\n",
 58 |         "training_labels = labels[0:training_size]\n",
 59 |         "testing_labels = labels[training_size:]\n",
 60 |         "\n",
 61 |         "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n",
 62 |         "tokenizer.fit_on_texts(training_sentences)\n",
 63 |         "\n",
 64 |         "word_index = tokenizer.word_index\n",
 65 |         "\n",
 66 |         "training_sequences = tokenizer.texts_to_sequences(training_sentences)\n",
 67 |         "training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n",
 68 |         "\n",
 69 |         "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n",
 70 |         "testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n",
 71 |         "\n",
 72 |         "model = tf.keras.Sequential([\n",
 73 |         "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
 74 |         "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n",
 75 |         "    tf.keras.layers.Dense(24, activation='relu'),\n",
 76 |         "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
 77 |         "])\n",
 78 |         "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
 79 |         "model.summary()\n",
 80 |         "\n",
 81 |         "num_epochs = 50\n",
 82 |         "history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)\n",
 83 |         "\n"
 84 |       ],
 85 |       "execution_count": 0,
 86 |       "outputs": []
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "metadata": {
 91 |         "id": "g9DC6dmLF8DC",
 92 |         "colab_type": "code",
 93 |         "colab": {}
 94 |       },
 95 |       "source": [
 96 |         "import matplotlib.pyplot as plt\n",
 97 |         "\n",
 98 |         "\n",
 99 |         "def plot_graphs(history, string):\n",
100 |         "  plt.plot(history.history[string])\n",
101 |         "  plt.plot(history.history['val_'+string])\n",
102 |         "  plt.xlabel(\"Epochs\")\n",
103 |         "  plt.ylabel(string)\n",
104 |         "  plt.legend([string, 'val_'+string])\n",
105 |         "  plt.show()\n",
106 |         "\n",
107 |         "plot_graphs(history, 'acc')\n",
108 |         "plot_graphs(history, 'loss')"
109 |       ],
110 |       "execution_count": 0,
111 |       "outputs": []
112 |     },
113 |     {
114 |       "cell_type": "code",
115 |       "metadata": {
116 |         "id": "7ZEZIUppGhdi",
117 |         "colab_type": "code",
118 |         "colab": {}
119 |       },
120 |       "source": [
121 |         "model.save(\"test.h5\")"
122 |       ],
123 |       "execution_count": 0,
124 |       "outputs": []
125 |     }
126 |   ]
127 | }


--------------------------------------------------------------------------------
/Week 3/Course_3_Week_3_Lesson_2c.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Course 3 - Week 3 - Lesson 2c.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "id": "jGwXGIXvFhXW",
 22 |         "colab_type": "code",
 23 |         "colab": {}
 24 |       },
 25 |       "source": [
 26 |         "import json\n",
 27 |         "import tensorflow as tf\n",
 28 |         "\n",
 29 |         "from tensorflow.keras.preprocessing.text import Tokenizer\n",
 30 |         "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
 31 |         "\n",
 32 |         "!wget --no-check-certificate \\\n",
 33 |         "    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \\\n",
 34 |         "    -O /tmp/sarcasm.json\n",
 35 |         "\n",
 36 |         "vocab_size = 1000\n",
 37 |         "embedding_dim = 16\n",
 38 |         "max_length = 120\n",
 39 |         "trunc_type='post'\n",
 40 |         "padding_type='post'\n",
 41 |         "oov_tok = \"<OOV>\"\n",
 42 |         "training_size = 20000\n",
 43 |         "\n",
 44 |         "\n",
 45 |         "with open(\"/tmp/sarcasm.json\", 'r') as f:\n",
 46 |         "    datastore = json.load(f)\n",
 47 |         "\n",
 48 |         "\n",
 49 |         "sentences = []\n",
 50 |         "labels = []\n",
 51 |         "urls = []\n",
 52 |         "for item in datastore:\n",
 53 |         "    sentences.append(item['headline'])\n",
 54 |         "    labels.append(item['is_sarcastic'])\n",
 55 |         "\n",
 56 |         "training_sentences = sentences[0:training_size]\n",
 57 |         "testing_sentences = sentences[training_size:]\n",
 58 |         "training_labels = labels[0:training_size]\n",
 59 |         "testing_labels = labels[training_size:]\n",
 60 |         "\n",
 61 |         "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n",
 62 |         "tokenizer.fit_on_texts(training_sentences)\n",
 63 |         "\n",
 64 |         "word_index = tokenizer.word_index\n",
 65 |         "\n",
 66 |         "training_sequences = tokenizer.texts_to_sequences(training_sentences)\n",
 67 |         "training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n",
 68 |         "\n",
 69 |         "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n",
 70 |         "testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n",
 71 |         "\n",
 72 |         "model = tf.keras.Sequential([\n",
 73 |         "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
 74 |         "    tf.keras.layers.Conv1D(128, 5, activation='relu'),\n",
 75 |         "    tf.keras.layers.GlobalMaxPooling1D(),\n",
 76 |         "    tf.keras.layers.Dense(24, activation='relu'),\n",
 77 |         "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
 78 |         "])\n",
 79 |         "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
 80 |         "model.summary()\n",
 81 |         "\n",
 82 |         "num_epochs = 50\n",
 83 |         "history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)\n",
 84 |         "\n"
 85 |       ],
 86 |       "execution_count": 0,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "metadata": {
 92 |         "id": "g9DC6dmLF8DC",
 93 |         "colab_type": "code",
 94 |         "colab": {}
 95 |       },
 96 |       "source": [
 97 |         "import matplotlib.pyplot as plt\n",
 98 |         "\n",
 99 |         "\n",
100 |         "def plot_graphs(history, string):\n",
101 |         "  plt.plot(history.history[string])\n",
102 |         "  plt.plot(history.history['val_'+string])\n",
103 |         "  plt.xlabel(\"Epochs\")\n",
104 |         "  plt.ylabel(string)\n",
105 |         "  plt.legend([string, 'val_'+string])\n",
106 |         "  plt.show()\n",
107 |         "\n",
108 |         "plot_graphs(history, 'acc')\n",
109 |         "plot_graphs(history, 'loss')"
110 |       ],
111 |       "execution_count": 0,
112 |       "outputs": []
113 |     },
114 |     {
115 |       "cell_type": "code",
116 |       "metadata": {
117 |         "id": "7ZEZIUppGhdi",
118 |         "colab_type": "code",
119 |         "colab": {}
120 |       },
121 |       "source": [
122 |         "model.save(\"test.h5\")"
123 |       ],
124 |       "execution_count": 0,
125 |       "outputs": []
126 |     }
127 |   ]
128 | }


--------------------------------------------------------------------------------
/Week 3/Course_3_Week_3_Lesson_2d.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Course 3 - Week 3 - Lesson 2d.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "id": "P-AhVYeBWgQ3",
 22 |         "colab_type": "code",
 23 |         "colab": {}
 24 |       },
 25 |       "source": [
 26 |         "# NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT\n",
 27 |         "\n",
 28 |         "import tensorflow as tf\n",
 29 |         "print(tf.__version__)\n",
 30 |         "\n",
 31 |         "# This is needed for the iterator over the data\n",
 32 |         "# But not necessary if you have TF 2.0 installed\n",
 33 |         "#!pip install tensorflow==2.0.0-beta0\n",
 34 |         "\n",
 35 |         "\n",
 36 |         "tf.enable_eager_execution()\n",
 37 |         "\n",
 38 |         "# !pip install -q tensorflow-datasets"
 39 |       ],
 40 |       "execution_count": 0,
 41 |       "outputs": []
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "metadata": {
 46 |         "id": "_IoM4VFxWpMR",
 47 |         "colab_type": "code",
 48 |         "colab": {}
 49 |       },
 50 |       "source": [
 51 |         "import tensorflow_datasets as tfds\n",
 52 |         "imdb, info = tfds.load(\"imdb_reviews\", with_info=True, as_supervised=True)\n"
 53 |       ],
 54 |       "execution_count": 0,
 55 |       "outputs": []
 56 |     },
 57 |     {
 58 |       "cell_type": "code",
 59 |       "metadata": {
 60 |         "id": "wHQ2Ko0zl7M4",
 61 |         "colab_type": "code",
 62 |         "colab": {}
 63 |       },
 64 |       "source": [
 65 |         "import numpy as np\n",
 66 |         "\n",
 67 |         "train_data, test_data = imdb['train'], imdb['test']\n",
 68 |         "\n",
 69 |         "training_sentences = []\n",
 70 |         "training_labels = []\n",
 71 |         "\n",
 72 |         "testing_sentences = []\n",
 73 |         "testing_labels = []\n",
 74 |         "\n",
 75 |         "# str(s.tonumpy()) is needed in Python3 instead of just s.numpy()\n",
 76 |         "for s,l in train_data:\n",
 77 |         "  training_sentences.append(str(s.numpy()))\n",
 78 |         "  training_labels.append(l.numpy())\n",
 79 |         "  \n",
 80 |         "for s,l in test_data:\n",
 81 |         "  testing_sentences.append(str(s.numpy()))\n",
 82 |         "  testing_labels.append(l.numpy())\n",
 83 |         "  \n",
 84 |         "training_labels_final = np.array(training_labels)\n",
 85 |         "testing_labels_final = np.array(testing_labels)\n"
 86 |       ],
 87 |       "execution_count": 0,
 88 |       "outputs": []
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "metadata": {
 93 |         "id": "7n15yyMdmoH1",
 94 |         "colab_type": "code",
 95 |         "colab": {}
 96 |       },
 97 |       "source": [
 98 |         "vocab_size = 10000\n",
 99 |         "embedding_dim = 16\n",
100 |         "max_length = 120\n",
101 |         "trunc_type='post'\n",
102 |         "oov_tok = \"<OOV>\"\n",
103 |         "\n",
104 |         "\n",
105 |         "from tensorflow.keras.preprocessing.text import Tokenizer\n",
106 |         "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
107 |         "\n",
108 |         "tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)\n",
109 |         "tokenizer.fit_on_texts(training_sentences)\n",
110 |         "word_index = tokenizer.word_index\n",
111 |         "sequences = tokenizer.texts_to_sequences(training_sentences)\n",
112 |         "padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)\n",
113 |         "\n",
114 |         "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n",
115 |         "testing_padded = pad_sequences(testing_sequences,maxlen=max_length)\n",
116 |         "\n"
117 |       ],
118 |       "execution_count": 0,
119 |       "outputs": []
120 |     },
121 |     {
122 |       "cell_type": "code",
123 |       "metadata": {
124 |         "colab_type": "code",
125 |         "id": "9axf0uIXVMhO",
126 |         "colab": {}
127 |       },
128 |       "source": [
129 |         "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n",
130 |         "\n",
131 |         "def decode_review(text):\n",
132 |         "    return ' '.join([reverse_word_index.get(i, '?') for i in text])\n",
133 |         "\n",
134 |         "print(decode_review(padded[1]))\n",
135 |         "print(training_sentences[1])"
136 |       ],
137 |       "execution_count": 0,
138 |       "outputs": []
139 |     },
140 |     {
141 |       "cell_type": "code",
142 |       "metadata": {
143 |         "id": "5NEpdhb8AxID",
144 |         "colab_type": "code",
145 |         "colab": {}
146 |       },
147 |       "source": [
148 |         "model = tf.keras.Sequential([\n",
149 |         "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
150 |         "    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),\n",
151 |         "    tf.keras.layers.Dense(6, activation='relu'),\n",
152 |         "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
153 |         "])\n",
154 |         "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
155 |         "model.summary()\n"
156 |       ],
157 |       "execution_count": 0,
158 |       "outputs": []
159 |     },
160 |     {
161 |       "cell_type": "code",
162 |       "metadata": {
163 |         "id": "V5LLrXC-uNX6",
164 |         "colab_type": "code",
165 |         "colab": {}
166 |       },
167 |       "source": [
168 |         "num_epochs = 50\n",
169 |         "history = model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))"
170 |       ],
171 |       "execution_count": 0,
172 |       "outputs": []
173 |     },
174 |     {
175 |       "cell_type": "code",
176 |       "metadata": {
177 |         "id": "nHGYuU4jPYaj",
178 |         "colab_type": "code",
179 |         "colab": {}
180 |       },
181 |       "source": [
182 |         "import matplotlib.pyplot as plt\n",
183 |         "\n",
184 |         "\n",
185 |         "def plot_graphs(history, string):\n",
186 |         "  plt.plot(history.history[string])\n",
187 |         "  plt.plot(history.history['val_'+string])\n",
188 |         "  plt.xlabel(\"Epochs\")\n",
189 |         "  plt.ylabel(string)\n",
190 |         "  plt.legend([string, 'val_'+string])\n",
191 |         "  plt.show()\n",
192 |         "\n",
193 |         "plot_graphs(history, 'accuracy')\n",
194 |         "plot_graphs(history, 'loss')"
195 |       ],
196 |       "execution_count": 0,
197 |       "outputs": []
198 |     },
199 |     {
200 |       "cell_type": "code",
201 |       "metadata": {
202 |         "id": "wSualgGPPK0S",
203 |         "colab_type": "code",
204 |         "colab": {}
205 |       },
206 |       "source": [
207 |         "# Model Definition with LSTM\n",
208 |         "model = tf.keras.Sequential([\n",
209 |         "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
210 |         "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n",
211 |         "    tf.keras.layers.Dense(6, activation='relu'),\n",
212 |         "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
213 |         "])\n",
214 |         "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
215 |         "model.summary()\n"
216 |       ],
217 |       "execution_count": 0,
218 |       "outputs": []
219 |     },
220 |     {
221 |       "cell_type": "code",
222 |       "metadata": {
223 |         "id": "K_Jc7cY3Qxke",
224 |         "colab_type": "code",
225 |         "colab": {}
226 |       },
227 |       "source": [
228 |         "# Model Definition with Conv1D\n",
229 |         "model = tf.keras.Sequential([\n",
230 |         "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
231 |         "    tf.keras.layers.Conv1D(128, 5, activation='relu'),\n",
232 |         "    tf.keras.layers.GlobalAveragePooling1D(),\n",
233 |         "    tf.keras.layers.Dense(6, activation='relu'),\n",
234 |         "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
235 |         "])\n",
236 |         "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
237 |         "model.summary()\n"
238 |       ],
239 |       "execution_count": 0,
240 |       "outputs": []
241 |     }
242 |   ]
243 | }


--------------------------------------------------------------------------------
/Week 3/NLP_Course_Week_3_Exercise_Question.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "NLP Course - Week 3 Exercise Question.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "accelerator": "GPU"
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "code",
 19 |       "metadata": {
 20 |         "id": "hmA6EzkQJ5jt",
 21 |         "colab_type": "code",
 22 |         "colab": {}
 23 |       },
 24 |       "source": [
 25 |         "import json\n",
 26 |         "import tensorflow as tf\n",
 27 |         "import csv\n",
 28 |         "import random\n",
 29 |         "import numpy as np\n",
 30 |         "\n",
 31 |         "from tensorflow.keras.preprocessing.text import Tokenizer\n",
 32 |         "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
 33 |         "from tensorflow.keras.utils import to_categorical\n",
 34 |         "from tensorflow.keras import regularizers\n",
 35 |         "\n",
 36 |         "\n",
 37 |         "embedding_dim = 100\n",
 38 |         "max_length = 16\n",
 39 |         "trunc_type='post'\n",
 40 |         "padding_type='post'\n",
 41 |         "oov_tok = \"<OOV>\"\n",
 42 |         "training_size=160000  #Your dataset size here. Experiment using smaller values (i.e. 16000), but don't forget to train on at least 160000 to see the best effects\n",
 43 |         "test_portion=.1\n",
 44 |         "\n",
 45 |         "corpus = []\n"
 46 |       ],
 47 |       "execution_count": 0,
 48 |       "outputs": []
 49 |     },
 50 |     {
 51 |       "cell_type": "code",
 52 |       "metadata": {
 53 |         "id": "bM0l_dORKqE0",
 54 |         "colab_type": "code",
 55 |         "outputId": "ffa1b4a9-6462-4a5e-e166-9d863b732aaf",
 56 |         "colab": {
 57 |           "base_uri": "https://localhost:8080/",
 58 |           "height": 204
 59 |         }
 60 |       },
 61 |       "source": [
 62 |         "\n",
 63 |         "# Note that I cleaned the Stanford dataset to remove LATIN1 encoding to make it easier for Python CSV reader\n",
 64 |         "# You can do that yourself with:\n",
 65 |         "# iconv -f LATIN1 -t UTF8 training.1600000.processed.noemoticon.csv -o training_cleaned.csv\n",
 66 |         "# I then hosted it on my site to make it easier to use in this notebook\n",
 67 |         "\n",
 68 |         "!wget --no-check-certificate \\\n",
 69 |         "    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/training_cleaned.csv \\\n",
 70 |         "    -O /tmp/training_cleaned.csv\n",
 71 |         "\n",
 72 |         "num_sentences = 0\n",
 73 |         "\n",
 74 |         "with open(\"/tmp/training_cleaned.csv\") as csvfile:\n",
 75 |         "    reader = csv.reader(csvfile, delimiter=',')\n",
 76 |         "    for row in reader:\n",
 77 |         "      # Your Code here. Create list items where the first item is the text, found in row[5], and the second is the label. Note that the label is a '0' or a '4' in the text. When it's the former, make\n",
 78 |         "      # your label to be 0, otherwise 1. Keep a count of the number of sentences in num_sentences\n",
 79 |         "        list_item=[]\n",
 80 |         "        list_item.append(row[5])\n",
 81 |         "        if row[0] == '0':\n",
 82 |         "          list_item.append(0)\n",
 83 |         "        else:\n",
 84 |         "          list_item.append(1)\n",
 85 |         "        # YOUR CODE HERE\n",
 86 |         "        num_sentences = num_sentences + 1\n",
 87 |         "        corpus.append(list_item)\n",
 88 |         "\n",
 89 |         "\n"
 90 |       ],
 91 |       "execution_count": 2,
 92 |       "outputs": [
 93 |         {
 94 |           "output_type": "stream",
 95 |           "text": [
 96 |             "--2019-06-23 12:23:27--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/training_cleaned.csv\n",
 97 |             "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.141.128, 2607:f8b0:400c:c06::80\n",
 98 |             "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.141.128|:443... connected.\n",
 99 |             "HTTP request sent, awaiting response... 200 OK\n",
100 |             "Length: 238942690 (228M) [application/octet-stream]\n",
101 |             "Saving to: ‘/tmp/training_cleaned.csv’\n",
102 |             "\n",
103 |             "/tmp/training_clean 100%[===================>] 227.87M   181MB/s    in 1.3s    \n",
104 |             "\n",
105 |             "2019-06-23 12:23:29 (181 MB/s) - ‘/tmp/training_cleaned.csv’ saved [238942690/238942690]\n",
106 |             "\n"
107 |           ],
108 |           "name": "stdout"
109 |         }
110 |       ]
111 |     },
112 |     {
113 |       "cell_type": "code",
114 |       "metadata": {
115 |         "id": "3kxblBUjEUX-",
116 |         "colab_type": "code",
117 |         "outputId": "b25aa6f3-d787-4d19-bbe3-f3715fb7ced2",
118 |         "colab": {
119 |           "base_uri": "https://localhost:8080/",
120 |           "height": 68
121 |         }
122 |       },
123 |       "source": [
124 |         "print(num_sentences)\n",
125 |         "print(len(corpus))\n",
126 |         "print(corpus[1])\n",
127 |         "\n",
128 |         "# Expected Output:\n",
129 |         "# 1600000\n",
130 |         "# 1600000\n",
131 |         "# [\"is upset that he can't update his Facebook by texting it... and might cry as a result  School today also. Blah!\", 0]"
132 |       ],
133 |       "execution_count": 3,
134 |       "outputs": [
135 |         {
136 |           "output_type": "stream",
137 |           "text": [
138 |             "1600000\n",
139 |             "1600000\n",
140 |             "[\"is upset that he can't update his Facebook by texting it... and might cry as a result  School today also. Blah!\", 0]\n"
141 |           ],
142 |           "name": "stdout"
143 |         }
144 |       ]
145 |     },
146 |     {
147 |       "cell_type": "code",
148 |       "metadata": {
149 |         "id": "ohOGz24lsNAD",
150 |         "colab_type": "code",
151 |         "colab": {}
152 |       },
153 |       "source": [
154 |         "sentences=[]\n",
155 |         "labels=[]\n",
156 |         "random.shuffle(corpus)\n",
157 |         "for x in range(training_size):\n",
158 |         "    sentences.append(corpus[x][0]) #(# YOUR CODE HERE)\n",
159 |         "    labels.append(corpus[x][1]) #(# YOUR CODE HERE)\n",
160 |         "\n",
161 |         "\n",
162 |         "tokenizer = Tokenizer()\n",
163 |         "tokenizer.fit_on_texts(sentences) #(# YOUR CODE HERE)\n",
164 |         "\n",
165 |         "word_index = tokenizer.word_index\n",
166 |         "vocab_size=len(word_index)    #(# YOUR CODE HERE)\n",
167 |         "\n",
168 |         "sequences = tokenizer.texts_to_sequences(sentences)  #(# YOUR CODE HERE)\n",
169 |         "padded = pad_sequences(sequences, maxlen=max_length, padding = padding_type, truncating=trunc_type) #(# YOUR CODE HERE)\n",
170 |         "\n",
171 |         "split = int(test_portion * training_size)\n",
172 |         "\n",
173 |         "test_sequences = padded[0:split] #[# YOUR CODE HERE]\n",
174 |         "training_sequences = padded[split:training_size]#[# YOUR CODE HERE]\n",
175 |         "test_labels = labels[0:split] #[# YOUR CODE HERE]\n",
176 |         "training_labels = labels[split:training_size] #[# YOUR CODE HERE]"
177 |       ],
178 |       "execution_count": 0,
179 |       "outputs": []
180 |     },
181 |     {
182 |       "cell_type": "code",
183 |       "metadata": {
184 |         "id": "gIrtRem1En3N",
185 |         "colab_type": "code",
186 |         "outputId": "95155c4c-948a-48c0-9cee-e6531c836d98",
187 |         "colab": {
188 |           "base_uri": "https://localhost:8080/",
189 |           "height": 51
190 |         }
191 |       },
192 |       "source": [
193 |         "print(vocab_size)\n",
194 |         "print(word_index['i'])\n",
195 |         "# Expected Output\n",
196 |         "# 138858\n",
197 |         "# 1"
198 |       ],
199 |       "execution_count": 8,
200 |       "outputs": [
201 |         {
202 |           "output_type": "stream",
203 |           "text": [
204 |             "138876\n",
205 |             "1\n"
206 |           ],
207 |           "name": "stdout"
208 |         }
209 |       ]
210 |     },
211 |     {
212 |       "cell_type": "code",
213 |       "metadata": {
214 |         "id": "C1zdgJkusRh0",
215 |         "colab_type": "code",
216 |         "outputId": "b6edd322-8191-45e7-cb12-08921685a72f",
217 |         "colab": {
218 |           "base_uri": "https://localhost:8080/",
219 |           "height": 204
220 |         }
221 |       },
222 |       "source": [
223 |         "# Note this is the 100 dimension version of GloVe from Stanford\n",
224 |         "# I unzipped and hosted it on my site to make this notebook easier\n",
225 |         "!wget --no-check-certificate \\\n",
226 |         "    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/glove.6B.100d.txt \\\n",
227 |         "    -O /tmp/glove.6B.100d.txt\n",
228 |         "embeddings_index = {};\n",
229 |         "with open('/tmp/glove.6B.100d.txt') as f:\n",
230 |         "    for line in f:\n",
231 |         "        values = line.split();\n",
232 |         "        word = values[0];\n",
233 |         "        coefs = np.asarray(values[1:], dtype='float32');\n",
234 |         "        embeddings_index[word] = coefs;\n",
235 |         "\n",
236 |         "embeddings_matrix = np.zeros((vocab_size+1, embedding_dim));\n",
237 |         "for word, i in word_index.items():\n",
238 |         "    embedding_vector = embeddings_index.get(word);\n",
239 |         "    if embedding_vector is not None:\n",
240 |         "        embeddings_matrix[i] = embedding_vector;"
241 |       ],
242 |       "execution_count": 0,
243 |       "outputs": [
244 |         {
245 |           "output_type": "stream",
246 |           "text": [
247 |             "--2019-06-07 17:55:30--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/glove.6B.100d.txt\n",
248 |             "Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.183.128, 2607:f8b0:4001:c12::80\n",
249 |             "Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.183.128|:443... connected.\n",
250 |             "HTTP request sent, awaiting response... 200 OK\n",
251 |             "Length: 347116733 (331M) [text/plain]\n",
252 |             "Saving to: ‘/tmp/glove.6B.100d.txt’\n",
253 |             "\n",
254 |             "/tmp/glove.6B.100d. 100%[===================>] 331.04M   160MB/s    in 2.1s    \n",
255 |             "\n",
256 |             "2019-06-07 17:55:33 (160 MB/s) - ‘/tmp/glove.6B.100d.txt’ saved [347116733/347116733]\n",
257 |             "\n"
258 |           ],
259 |           "name": "stdout"
260 |         }
261 |       ]
262 |     },
263 |     {
264 |       "cell_type": "code",
265 |       "metadata": {
266 |         "id": "71NLk_lpFLNt",
267 |         "colab_type": "code",
268 |         "outputId": "97cb88db-754f-4375-fdc3-876cd6b4fdce",
269 |         "colab": {
270 |           "base_uri": "https://localhost:8080/",
271 |           "height": 34
272 |         }
273 |       },
274 |       "source": [
275 |         "print(len(embeddings_matrix))\n",
276 |         "# Expected Output\n",
277 |         "# 138859"
278 |       ],
279 |       "execution_count": 0,
280 |       "outputs": [
281 |         {
282 |           "output_type": "stream",
283 |           "text": [
284 |             "138859\n"
285 |           ],
286 |           "name": "stdout"
287 |         }
288 |       ]
289 |     },
290 |     {
291 |       "cell_type": "code",
292 |       "metadata": {
293 |         "colab_type": "code",
294 |         "id": "iKKvbuEBOGFz",
295 |         "colab": {}
296 |       },
297 |       "source": [
298 |         "model = tf.keras.Sequential([\n",
299 |         "    tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=max_length, weights=[embeddings_matrix], trainable=False),\n",
300 |         "    # YOUR CODE HERE - experiment with combining different types, such as convolutions and LSTMs\n",
301 |         "])\n",
302 |         "model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy') #(# YOUR CODE HERE)\n",
303 |         "model.summary()\n",
304 |         "\n",
305 |         "num_epochs = 50\n",
306 |         "history = model.fit(training_sequences, training_labels, epochs=num_epochs, validation_data=(test_sequences, test_labels), verbose=2)\n",
307 |         "\n",
308 |         "print(\"Training Complete\")\n"
309 |       ],
310 |       "execution_count": 0,
311 |       "outputs": []
312 |     },
313 |     {
314 |       "cell_type": "code",
315 |       "metadata": {
316 |         "id": "qxju4ItJKO8F",
317 |         "colab_type": "code",
318 |         "colab": {}
319 |       },
320 |       "source": [
321 |         "import matplotlib.image  as mpimg\n",
322 |         "import matplotlib.pyplot as plt\n",
323 |         "\n",
324 |         "#-----------------------------------------------------------\n",
325 |         "# Retrieve a list of list results on training and test data\n",
326 |         "# sets for each training epoch\n",
327 |         "#-----------------------------------------------------------\n",
328 |         "acc=history.history['acc']\n",
329 |         "val_acc=history.history['val_acc']\n",
330 |         "loss=history.history['loss']\n",
331 |         "val_loss=history.history['val_loss']\n",
332 |         "\n",
333 |         "epochs=range(len(acc)) # Get number of epochs\n",
334 |         "\n",
335 |         "#------------------------------------------------\n",
336 |         "# Plot training and validation accuracy per epoch\n",
337 |         "#------------------------------------------------\n",
338 |         "plt.plot(epochs, acc, 'r')\n",
339 |         "plt.plot(epochs, val_acc, 'b')\n",
340 |         "plt.title('Training and validation accuracy')\n",
341 |         "plt.xlabel(\"Epochs\")\n",
342 |         "plt.ylabel(\"Accuracy\")\n",
343 |         "plt.legend([\"Accuracy\", \"Validation Accuracy\"])\n",
344 |         "\n",
345 |         "plt.figure()\n",
346 |         "\n",
347 |         "#------------------------------------------------\n",
348 |         "# Plot training and validation loss per epoch\n",
349 |         "#------------------------------------------------\n",
350 |         "plt.plot(epochs, loss, 'r')\n",
351 |         "plt.plot(epochs, val_loss, 'b')\n",
352 |         "plt.title('Training and validation loss')\n",
353 |         "plt.xlabel(\"Epochs\")\n",
354 |         "plt.ylabel(\"Loss\")\n",
355 |         "plt.legend([\"Loss\", \"Validation Loss\"])\n",
356 |         "\n",
357 |         "plt.figure()\n",
358 |         "\n",
359 |         "\n",
360 |         "# Expected Output\n",
361 |         "# A chart where the validation loss does not increase sharply!"
362 |       ],
363 |       "execution_count": 0,
364 |       "outputs": []
365 |     }
366 |   ]
367 | }


--------------------------------------------------------------------------------
/Week 3/Quiz 3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/07Agarg/Natural-Language-Processing-In-Tensorflow-Course/46eb21e25f73fd8644a95e64696d64dd4843e1e8/Week 3/Quiz 3.pdf


--------------------------------------------------------------------------------
/Week 3/course_3_week_3_lesson_1a.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Course 4 - Week 3 - Lesson 1a.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%201a.ipynb
 8 | 
 9 | # Single Layer LSTM
10 | """
11 | 
12 | from __future__ import absolute_import, division, print_function, unicode_literals
13 | 
14 | 
15 | import tensorflow_datasets as tfds
16 | import tensorflow as tf
17 | print(tf.__version__)
18 | 
19 | # If the tf.__version__ is 1.x, please run this cell
20 | #!pip install tensorflow==2.0.0-beta0
21 | 
22 | import tensorflow_datasets as tfds
23 | import tensorflow as tf
24 | print(tf.__version__)
25 | 
26 | # Get the data
27 | dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
28 | train_dataset, test_dataset = dataset['train'], dataset['test']
29 | 
30 | tokenizer = info.features['text'].encoder
31 | 
32 | BUFFER_SIZE = 10000
33 | BATCH_SIZE = 64
34 | 
35 | train_dataset = train_dataset.shuffle(BUFFER_SIZE)
36 | train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)
37 | test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)
38 | 
39 | model = tf.keras.Sequential([
40 |     tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
41 |     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
42 |     tf.keras.layers.Dense(64, activation='relu'),
43 |     tf.keras.layers.Dense(1, activation='sigmoid')
44 | ])
45 | 
46 | model.summary()
47 | 
48 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
49 | 
50 | NUM_EPOCHS = 10
51 | history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)
52 | 
53 | import matplotlib.pyplot as plt
54 | 
55 | 
56 | def plot_graphs(history, string):
57 |   plt.plot(history.history[string])
58 |   plt.plot(history.history['val_'+string])
59 |   plt.xlabel("Epochs")
60 |   plt.ylabel(string)
61 |   plt.legend([string, 'val_'+string])
62 |   plt.show()
63 | 
64 | plot_graphs(history, 'accuracy')
65 | 
66 | plot_graphs(history, 'loss')


--------------------------------------------------------------------------------
/Week 3/course_3_week_3_lesson_1b.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Course 4 - Week 3 - Lesson 1b.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%201b.ipynb
 8 | 
 9 | # Multiple Layer LSTM
10 | """
11 | 
12 | from __future__ import absolute_import, division, print_function, unicode_literals
13 | 
14 | 
15 | import tensorflow_datasets as tfds
16 | import tensorflow as tf
17 | print(tf.__version__)
18 | 
19 | # If the tf.__version__ is 1.x, please run this cell
20 | !pip install tensorflow==2.0.0-beta0
21 | 
22 | import tensorflow_datasets as tfds
23 | import tensorflow as tf
24 | print(tf.__version__)
25 | 
26 | # Get the data
27 | dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
28 | train_dataset, test_dataset = dataset['train'], dataset['test']
29 | 
30 | tokenizer = info.features['text'].encoder
31 | 
32 | BUFFER_SIZE = 10000
33 | BATCH_SIZE = 64
34 | 
35 | train_dataset = train_dataset.shuffle(BUFFER_SIZE)
36 | train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)
37 | test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)
38 | 
39 | model = tf.keras.Sequential([
40 |     tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
41 |     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
42 |     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
43 |     tf.keras.layers.Dense(64, activation='relu'),
44 |     tf.keras.layers.Dense(1, activation='sigmoid')
45 | ])
46 | 
47 | model.summary()
48 | 
49 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
50 | 
51 | NUM_EPOCHS = 10
52 | history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)
53 | 
54 | import matplotlib.pyplot as plt
55 | 
56 | 
57 | def plot_graphs(history, string):
58 |   plt.plot(history.history[string])
59 |   plt.plot(history.history['val_'+string])
60 |   plt.xlabel("Epochs")
61 |   plt.ylabel(string)
62 |   plt.legend([string, 'val_'+string])
63 |   plt.show()
64 | 
65 | plot_graphs(history, 'accuracy')
66 | 
67 | plot_graphs(history, 'loss')


--------------------------------------------------------------------------------
/Week 3/course_3_week_3_lesson_1c.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Course 4 - Week 3 - Lesson 1c.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%201c.ipynb
 8 | 
 9 | # Multiple Layer GRU
10 | """
11 | 
12 | from __future__ import absolute_import, division, print_function, unicode_literals
13 | 
14 | 
15 | import tensorflow_datasets as tfds
16 | import tensorflow as tf
17 | print(tf.__version__)
18 | 
19 | # If the tf.__version__ is 1.x, please run this cell
20 | !pip install tensorflow==2.0.0-beta0
21 | 
22 | import tensorflow_datasets as tfds
23 | import tensorflow as tf
24 | print(tf.__version__)
25 | 
26 | 
27 | 
28 | # Get the data
29 | dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
30 | train_dataset, test_dataset = dataset['train'], dataset['test']
31 | 
32 | tokenizer = info.features['text'].encoder
33 | 
34 | BUFFER_SIZE = 10000
35 | BATCH_SIZE = 64
36 | 
37 | train_dataset = train_dataset.shuffle(BUFFER_SIZE)
38 | train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)
39 | test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)
40 | 
41 | model = tf.keras.Sequential([
42 |     tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
43 |     tf.keras.layers.Conv1D(128, 5, activation='relu'),
44 |     tf.keras.layers.GlobalAveragePooling1D(),
45 |     tf.keras.layers.Dense(64, activation='relu'),
46 |     tf.keras.layers.Dense(1, activation='sigmoid')
47 | ])
48 | 
49 | model.summary()
50 | 
51 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
52 | 
53 | NUM_EPOCHS = 10
54 | history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)
55 | 
56 | import matplotlib.pyplot as plt
57 | 
58 | 
59 | def plot_graphs(history, string):
60 |   plt.plot(history.history[string])
61 |   plt.plot(history.history['val_'+string])
62 |   plt.xlabel("Epochs")
63 |   plt.ylabel(string)
64 |   plt.legend([string, 'val_'+string])
65 |   plt.show()
66 | 
67 | plot_graphs(history, 'accuracy')
68 | 
69 | plot_graphs(history, 'loss')


--------------------------------------------------------------------------------
/Week 3/course_3_week_3_lesson_2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Course 3 - Week 3 - Lesson 2.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%202.ipynb
 8 | """
 9 | 
10 | import json
11 | import tensorflow as tf
12 | 
13 | from tensorflow.keras.preprocessing.text import Tokenizer
14 | from tensorflow.keras.preprocessing.sequence import pad_sequences
15 | 
16 | !wget --no-check-certificate \
17 |     https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
18 |     -O /tmp/sarcasm.json
19 | 
20 | vocab_size = 1000
21 | embedding_dim = 16
22 | max_length = 120
23 | trunc_type='post'
24 | padding_type='post'
25 | oov_tok = "<OOV>"
26 | training_size = 20000
27 | 
28 | 
29 | with open("/tmp/sarcasm.json", 'r') as f:
30 |     datastore = json.load(f)
31 | 
32 | 
33 | sentences = []
34 | labels = []
35 | urls = []
36 | for item in datastore:
37 |     sentences.append(item['headline'])
38 |     labels.append(item['is_sarcastic'])
39 | 
40 | training_sentences = sentences[0:training_size]
41 | testing_sentences = sentences[training_size:]
42 | training_labels = labels[0:training_size]
43 | testing_labels = labels[training_size:]
44 | 
45 | tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
46 | tokenizer.fit_on_texts(training_sentences)
47 | 
48 | word_index = tokenizer.word_index
49 | 
50 | training_sequences = tokenizer.texts_to_sequences(training_sentences)
51 | training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
52 | 
53 | testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
54 | testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
55 | 
56 | model = tf.keras.Sequential([
57 |     tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
58 |     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
59 |     tf.keras.layers.Dense(24, activation='relu'),
60 |     tf.keras.layers.Dense(1, activation='sigmoid')
61 | ])
62 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
63 | model.summary()
64 | 
65 | num_epochs = 50
66 | history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)
67 | 
68 | import matplotlib.pyplot as plt
69 | 
70 | 
71 | def plot_graphs(history, string):
72 |   plt.plot(history.history[string])
73 |   plt.plot(history.history['val_'+string])
74 |   plt.xlabel("Epochs")
75 |   plt.ylabel(string)
76 |   plt.legend([string, 'val_'+string])
77 |   plt.show()
78 | 
79 | plot_graphs(history, 'acc')
80 | plot_graphs(history, 'loss')
81 | 
82 | model.save("test.h5")


--------------------------------------------------------------------------------
/Week 3/course_3_week_3_lesson_2c.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Course 3 - Week 3 - Lesson 2c.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%202c.ipynb
 8 | """
 9 | 
10 | import json
11 | import tensorflow as tf
12 | 
13 | from tensorflow.keras.preprocessing.text import Tokenizer
14 | from tensorflow.keras.preprocessing.sequence import pad_sequences
15 | 
16 | !wget --no-check-certificate \
17 |     https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
18 |     -O /tmp/sarcasm.json
19 | 
20 | vocab_size = 1000
21 | embedding_dim = 16
22 | max_length = 120
23 | trunc_type='post'
24 | padding_type='post'
25 | oov_tok = "<OOV>"
26 | training_size = 20000
27 | 
28 | 
29 | with open("/tmp/sarcasm.json", 'r') as f:
30 |     datastore = json.load(f)
31 | 
32 | 
33 | sentences = []
34 | labels = []
35 | urls = []
36 | for item in datastore:
37 |     sentences.append(item['headline'])
38 |     labels.append(item['is_sarcastic'])
39 | 
40 | training_sentences = sentences[0:training_size]
41 | testing_sentences = sentences[training_size:]
42 | training_labels = labels[0:training_size]
43 | testing_labels = labels[training_size:]
44 | 
45 | tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
46 | tokenizer.fit_on_texts(training_sentences)
47 | 
48 | word_index = tokenizer.word_index
49 | 
50 | training_sequences = tokenizer.texts_to_sequences(training_sentences)
51 | training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
52 | 
53 | testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
54 | testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
55 | 
56 | model = tf.keras.Sequential([
57 |     tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
58 |     tf.keras.layers.Conv1D(128, 5, activation='relu'),
59 |     tf.keras.layers.GlobalMaxPooling1D(),
60 |     tf.keras.layers.Dense(24, activation='relu'),
61 |     tf.keras.layers.Dense(1, activation='sigmoid')
62 | ])
63 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
64 | model.summary()
65 | 
66 | num_epochs = 50
67 | history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)
68 | 
69 | import matplotlib.pyplot as plt
70 | 
71 | 
72 | def plot_graphs(history, string):
73 |   plt.plot(history.history[string])
74 |   plt.plot(history.history['val_'+string])
75 |   plt.xlabel("Epochs")
76 |   plt.ylabel(string)
77 |   plt.legend([string, 'val_'+string])
78 |   plt.show()
79 | 
80 | plot_graphs(history, 'acc')
81 | plot_graphs(history, 'loss')
82 | 
83 | model.save("test.h5")


--------------------------------------------------------------------------------
/Week 3/course_3_week_3_lesson_2d.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Course 3 - Week 3 - Lesson 2d.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%202d.ipynb
  8 | """
  9 | 
 10 | # NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT
 11 | 
 12 | import tensorflow as tf
 13 | print(tf.__version__)
 14 | 
 15 | # This is needed for the iterator over the data
 16 | # But not necessary if you have TF 2.0 installed
 17 | #!pip install tensorflow==2.0.0-beta0
 18 | 
 19 | 
 20 | tf.enable_eager_execution()
 21 | 
 22 | # !pip install -q tensorflow-datasets
 23 | 
 24 | import tensorflow_datasets as tfds
 25 | imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)
 26 | 
 27 | import numpy as np
 28 | 
 29 | train_data, test_data = imdb['train'], imdb['test']
 30 | 
 31 | training_sentences = []
 32 | training_labels = []
 33 | 
 34 | testing_sentences = []
 35 | testing_labels = []
 36 | 
 37 | # str(s.tonumpy()) is needed in Python3 instead of just s.numpy()
 38 | for s,l in train_data:
 39 |   training_sentences.append(str(s.numpy()))
 40 |   training_labels.append(l.numpy())
 41 |   
 42 | for s,l in test_data:
 43 |   testing_sentences.append(str(s.numpy()))
 44 |   testing_labels.append(l.numpy())
 45 |   
 46 | training_labels_final = np.array(training_labels)
 47 | testing_labels_final = np.array(testing_labels)
 48 | 
 49 | vocab_size = 10000
 50 | embedding_dim = 16
 51 | max_length = 120
 52 | trunc_type='post'
 53 | oov_tok = "<OOV>"
 54 | 
 55 | 
 56 | from tensorflow.keras.preprocessing.text import Tokenizer
 57 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 58 | 
 59 | tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
 60 | tokenizer.fit_on_texts(training_sentences)
 61 | word_index = tokenizer.word_index
 62 | sequences = tokenizer.texts_to_sequences(training_sentences)
 63 | padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)
 64 | 
 65 | testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
 66 | testing_padded = pad_sequences(testing_sequences,maxlen=max_length)
 67 | 
 68 | reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
 69 | 
 70 | def decode_review(text):
 71 |     return ' '.join([reverse_word_index.get(i, '?') for i in text])
 72 | 
 73 | print(decode_review(padded[1]))
 74 | print(training_sentences[1])
 75 | 
 76 | model = tf.keras.Sequential([
 77 |     tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
 78 |     tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
 79 |     tf.keras.layers.Dense(6, activation='relu'),
 80 |     tf.keras.layers.Dense(1, activation='sigmoid')
 81 | ])
 82 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
 83 | model.summary()
 84 | 
 85 | num_epochs = 50
 86 | history = model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))
 87 | 
 88 | import matplotlib.pyplot as plt
 89 | 
 90 | 
 91 | def plot_graphs(history, string):
 92 |   plt.plot(history.history[string])
 93 |   plt.plot(history.history['val_'+string])
 94 |   plt.xlabel("Epochs")
 95 |   plt.ylabel(string)
 96 |   plt.legend([string, 'val_'+string])
 97 |   plt.show()
 98 | 
 99 | plot_graphs(history, 'accuracy')
100 | plot_graphs(history, 'loss')
101 | 
102 | # Model Definition with LSTM
103 | model = tf.keras.Sequential([
104 |     tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
105 |     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
106 |     tf.keras.layers.Dense(6, activation='relu'),
107 |     tf.keras.layers.Dense(1, activation='sigmoid')
108 | ])
109 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
110 | model.summary()
111 | 
112 | # Model Definition with Conv1D
113 | model = tf.keras.Sequential([
114 |     tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
115 |     tf.keras.layers.Conv1D(128, 5, activation='relu'),
116 |     tf.keras.layers.GlobalAveragePooling1D(),
117 |     tf.keras.layers.Dense(6, activation='relu'),
118 |     tf.keras.layers.Dense(1, activation='sigmoid')
119 | ])
120 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
121 | model.summary()


--------------------------------------------------------------------------------
/Week 3/nlp_course_week_3_exercise_question.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """NLP Course - Week 3 Exercise Question.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/NLP%20Course%20-%20Week%203%20Exercise%20Question.ipynb
  8 | """
  9 | 
 10 | import json
 11 | import tensorflow as tf
 12 | import csv
 13 | import random
 14 | import numpy as np
 15 | 
 16 | from tensorflow.keras.preprocessing.text import Tokenizer
 17 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 18 | from tensorflow.keras.utils import to_categorical
 19 | from tensorflow.keras import regularizers
 20 | 
 21 | 
 22 | embedding_dim = 100
 23 | max_length = 16
 24 | trunc_type='post'
 25 | padding_type='post'
 26 | oov_tok = "<OOV>"
 27 | training_size=160000  #Your dataset size here. Experiment using smaller values (i.e. 16000), but don't forget to train on at least 160000 to see the best effects
 28 | test_portion=.1
 29 | 
 30 | corpus = []
 31 | 
 32 | # Note that I cleaned the Stanford dataset to remove LATIN1 encoding to make it easier for Python CSV reader
 33 | # You can do that yourself with:
 34 | # iconv -f LATIN1 -t UTF8 training.1600000.processed.noemoticon.csv -o training_cleaned.csv
 35 | # I then hosted it on my site to make it easier to use in this notebook
 36 | 
 37 | !wget --no-check-certificate \
 38 |     https://storage.googleapis.com/laurencemoroney-blog.appspot.com/training_cleaned.csv \
 39 |     -O /tmp/training_cleaned.csv
 40 | 
 41 | num_sentences = 0
 42 | 
 43 | with open("/tmp/training_cleaned.csv") as csvfile:
 44 |     reader = csv.reader(csvfile, delimiter=',')
 45 |     for row in reader:
 46 |       # Your Code here. Create list items where the first item is the text, found in row[5], and the second is the label. Note that the label is a '0' or a '4' in the text. When it's the former, make
 47 |       # your label to be 0, otherwise 1. Keep a count of the number of sentences in num_sentences
 48 |         list_item=[]
 49 |         list_item.append(row[5])
 50 |         if row[0] == '0':
 51 |           list_item.append(0)
 52 |         else:
 53 |           list_item.append(1)
 54 |         # YOUR CODE HERE
 55 |         num_sentences = num_sentences + 1
 56 |         corpus.append(list_item)
 57 | 
 58 | print(num_sentences)
 59 | print(len(corpus))
 60 | print(corpus[1])
 61 | 
 62 | # Expected Output:
 63 | # 1600000
 64 | # 1600000
 65 | # ["is upset that he can't update his Facebook by texting it... and might cry as a result  School today also. Blah!", 0]
 66 | 
 67 | sentences=[]
 68 | labels=[]
 69 | random.shuffle(corpus)
 70 | for x in range(training_size):
 71 |     sentences.append(corpus[x][0]) #(# YOUR CODE HERE)
 72 |     labels.append(corpus[x][1]) #(# YOUR CODE HERE)
 73 | 
 74 | 
 75 | tokenizer = Tokenizer()
 76 | tokenizer.fit_on_texts(sentences) #(# YOUR CODE HERE)
 77 | 
 78 | word_index = tokenizer.word_index
 79 | vocab_size=len(word_index)    #(# YOUR CODE HERE)
 80 | 
 81 | sequences = tokenizer.texts_to_sequences(sentences)  #(# YOUR CODE HERE)
 82 | padded = pad_sequences(sequences, maxlen=max_length, padding = padding_type, truncating=trunc_type) #(# YOUR CODE HERE)
 83 | 
 84 | split = int(test_portion * training_size)
 85 | 
 86 | test_sequences = padded[0:split] #[# YOUR CODE HERE]
 87 | training_sequences = padded[split:training_size]#[# YOUR CODE HERE]
 88 | test_labels = labels[0:split] #[# YOUR CODE HERE]
 89 | training_labels = labels[split:training_size] #[# YOUR CODE HERE]
 90 | 
 91 | print(vocab_size)
 92 | print(word_index['i'])
 93 | # Expected Output
 94 | # 138858
 95 | # 1
 96 | 
 97 | # Note this is the 100 dimension version of GloVe from Stanford
 98 | # I unzipped and hosted it on my site to make this notebook easier
 99 | !wget --no-check-certificate \
100 |     https://storage.googleapis.com/laurencemoroney-blog.appspot.com/glove.6B.100d.txt \
101 |     -O /tmp/glove.6B.100d.txt
102 | embeddings_index = {};
103 | with open('/tmp/glove.6B.100d.txt') as f:
104 |     for line in f:
105 |         values = line.split();
106 |         word = values[0];
107 |         coefs = np.asarray(values[1:], dtype='float32');
108 |         embeddings_index[word] = coefs;
109 | 
110 | embeddings_matrix = np.zeros((vocab_size+1, embedding_dim));
111 | for word, i in word_index.items():
112 |     embedding_vector = embeddings_index.get(word);
113 |     if embedding_vector is not None:
114 |         embeddings_matrix[i] = embedding_vector;
115 | 
116 | print(len(embeddings_matrix))
117 | # Expected Output
118 | # 138859
119 | 
120 | model = tf.keras.Sequential([
121 |     tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=max_length, weights=[embeddings_matrix], trainable=False),
122 |     # YOUR CODE HERE - experiment with combining different types, such as convolutions and LSTMs
123 | ])
124 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy') #(# YOUR CODE HERE)
125 | model.summary()
126 | 
127 | num_epochs = 50
128 | history = model.fit(training_sequences, training_labels, epochs=num_epochs, validation_data=(test_sequences, test_labels), verbose=2)
129 | 
130 | print("Training Complete")
131 | 
132 | import matplotlib.image  as mpimg
133 | import matplotlib.pyplot as plt
134 | 
135 | #-----------------------------------------------------------
136 | # Retrieve a list of list results on training and test data
137 | # sets for each training epoch
138 | #-----------------------------------------------------------
139 | acc=history.history['acc']
140 | val_acc=history.history['val_acc']
141 | loss=history.history['loss']
142 | val_loss=history.history['val_loss']
143 | 
144 | epochs=range(len(acc)) # Get number of epochs
145 | 
146 | #------------------------------------------------
147 | # Plot training and validation accuracy per epoch
148 | #------------------------------------------------
149 | plt.plot(epochs, acc, 'r')
150 | plt.plot(epochs, val_acc, 'b')
151 | plt.title('Training and validation accuracy')
152 | plt.xlabel("Epochs")
153 | plt.ylabel("Accuracy")
154 | plt.legend(["Accuracy", "Validation Accuracy"])
155 | 
156 | plt.figure()
157 | 
158 | #------------------------------------------------
159 | # Plot training and validation loss per epoch
160 | #------------------------------------------------
161 | plt.plot(epochs, loss, 'r')
162 | plt.plot(epochs, val_loss, 'b')
163 | plt.title('Training and validation loss')
164 | plt.xlabel("Epochs")
165 | plt.ylabel("Loss")
166 | plt.legend(["Loss", "Validation Loss"])
167 | 
168 | plt.figure()
169 | 
170 | 
171 | # Expected Output
172 | # A chart where the validation loss does not increase sharply!


--------------------------------------------------------------------------------
/Week 4/NLP_Week4_Exercise_Shakespeare_Question.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "NLP-Week4-Exercise-Shakespeare-Question.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python2",
 12 |       "display_name": "Python 2"
 13 |     },
 14 |     "accelerator": "GPU"
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "code",
 19 |       "metadata": {
 20 |         "id": "BOwsuGQQY9OL",
 21 |         "colab_type": "code",
 22 |         "colab": {}
 23 |       },
 24 |       "source": [
 25 |         "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
 26 |         "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional\n",
 27 |         "from tensorflow.keras.preprocessing.text import Tokenizer\n",
 28 |         "from tensorflow.keras.models import Sequential\n",
 29 |         "from tensorflow.keras.optimizers import Adam\n",
 30 |         "### YOUR CODE HERE\n",
 31 |         "from tensorflow.keras.regularizers import Regularizer\n",
 32 |         "# Figure out how to import regularizers\n",
 33 |         "###\n",
 34 |         "import tensorflow.keras.utils as ku \n",
 35 |         "import numpy as np "
 36 |       ],
 37 |       "execution_count": 0,
 38 |       "outputs": []
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "metadata": {
 43 |         "colab_type": "code",
 44 |         "id": "PRnDnCW-Z7qv",
 45 |         "colab": {}
 46 |       },
 47 |       "source": [
 48 |         "tokenizer = Tokenizer()\n",
 49 |         "!wget --no-check-certificate \\\n",
 50 |         "    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \\\n",
 51 |         "    -O /tmp/sonnets.txt\n",
 52 |         "data = open('/tmp/sonnets.txt').read()\n",
 53 |         "\n",
 54 |         "corpus = data.lower().split(\"\\n\")\n",
 55 |         "\n",
 56 |         "\n",
 57 |         "tokenizer.fit_on_texts(corpus)\n",
 58 |         "total_words = len(tokenizer.word_index) + 1\n",
 59 |         "\n",
 60 |         "# create input sequences using list of tokens\n",
 61 |         "input_sequences = []\n",
 62 |         "for line in corpus:\n",
 63 |         "\ttoken_list = tokenizer.texts_to_sequences([line])[0]\n",
 64 |         "\tfor i in range(1, len(token_list)):\n",
 65 |         "\t\tn_gram_sequence = token_list[:i+1]\n",
 66 |         "\t\tinput_sequences.append(n_gram_sequence)\n",
 67 |         "\n",
 68 |         "\n",
 69 |         "# pad sequences \n",
 70 |         "max_sequence_len = max([len(x) for x in input_sequences])\n",
 71 |         "input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))\n",
 72 |         "\n",
 73 |         "# create predictors and label\n",
 74 |         "predictors, label = input_sequences[:,:-1],input_sequences[:,-1]\n",
 75 |         "\n",
 76 |         "label = ku.to_categorical(label, num_classes=total_words)"
 77 |       ],
 78 |       "execution_count": 0,
 79 |       "outputs": []
 80 |     },
 81 |     {
 82 |       "cell_type": "code",
 83 |       "metadata": {
 84 |         "id": "w9vH8Y59ajYL",
 85 |         "colab_type": "code",
 86 |         "colab": {}
 87 |       },
 88 |       "source": [
 89 |         "model = Sequential()\n",
 90 |         "model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))  #(# Your Embedding Layer)\n",
 91 |         "model.add(Bidirectional(LSTM(150, return_sequences=True)))  #(# An LSTM Layer)\n",
 92 |         "model.add(Dropout(0.2))  #(# A dropout layer)\n",
 93 |         "model.add(LSTM(100))  #(# Another LSTM Layer)\n",
 94 |         "model.add(Dense(total_words/2, activation='relu'))  #(# A Dense Layer including regularizers)\n",
 95 |         "model.add(Dense(total_words, activation='softmax'))  #(# A Dense Layer)\n",
 96 |         "# Pick an optimizer\n",
 97 |         "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')  #(# Pick a loss function and an optimizer)\n",
 98 |         "print(model.summary())\n"
 99 |       ],
100 |       "execution_count": 0,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "metadata": {
106 |         "id": "AIg2f1HBxqof",
107 |         "colab_type": "code",
108 |         "colab": {}
109 |       },
110 |       "source": [
111 |         " history = model.fit(predictors, label, epochs=100, verbose=1)"
112 |       ],
113 |       "execution_count": 0,
114 |       "outputs": []
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "metadata": {
119 |         "id": "1fXTEO3GJ282",
120 |         "colab_type": "code",
121 |         "colab": {}
122 |       },
123 |       "source": [
124 |         "import matplotlib.pyplot as plt\n",
125 |         "acc = history.history['acc']\n",
126 |         "loss = history.history['loss']\n",
127 |         "\n",
128 |         "epochs = range(len(acc))\n",
129 |         "\n",
130 |         "plt.plot(epochs, acc, 'b', label='Training accuracy')\n",
131 |         "plt.title('Training accuracy')\n",
132 |         "\n",
133 |         "plt.figure()\n",
134 |         "\n",
135 |         "plt.plot(epochs, loss, 'b', label='Training Loss')\n",
136 |         "plt.title('Training loss')\n",
137 |         "plt.legend()\n",
138 |         "\n",
139 |         "plt.show()"
140 |       ],
141 |       "execution_count": 0,
142 |       "outputs": []
143 |     },
144 |     {
145 |       "cell_type": "code",
146 |       "metadata": {
147 |         "id": "6Vc6PHgxa6Hm",
148 |         "colab_type": "code",
149 |         "colab": {}
150 |       },
151 |       "source": [
152 |         "seed_text = \"Help me Obi Wan Kenobi, you're my only hope\"\n",
153 |         "next_words = 100\n",
154 |         "  \n",
155 |         "for _ in range(next_words):\n",
156 |         "\ttoken_list = tokenizer.texts_to_sequences([seed_text])[0]\n",
157 |         "\ttoken_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')\n",
158 |         "\tpredicted = model.predict_classes(token_list, verbose=0)\n",
159 |         "\toutput_word = \"\"\n",
160 |         "\tfor word, index in tokenizer.word_index.items():\n",
161 |         "\t\tif index == predicted:\n",
162 |         "\t\t\toutput_word = word\n",
163 |         "\t\t\tbreak\n",
164 |         "\tseed_text += \" \" + output_word\n",
165 |         "print(seed_text)"
166 |       ],
167 |       "execution_count": 0,
168 |       "outputs": []
169 |     }
170 |   ]
171 | }


--------------------------------------------------------------------------------
/Week 4/Quiz 4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/07Agarg/Natural-Language-Processing-In-Tensorflow-Course/46eb21e25f73fd8644a95e64696d64dd4843e1e8/Week 4/Quiz 4.pdf


--------------------------------------------------------------------------------
/Week 4/nlp_week4_exercise_shakespeare_question.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """NLP-Week4-Exercise-Shakespeare-Question.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/NLP_Week4_Exercise_Shakespeare_Question.ipynb
 8 | """
 9 | 
10 | from tensorflow.keras.preprocessing.sequence import pad_sequences
11 | from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
12 | from tensorflow.keras.preprocessing.text import Tokenizer
13 | from tensorflow.keras.models import Sequential
14 | from tensorflow.keras.optimizers import Adam
15 | ### YOUR CODE HERE
16 | from tensorflow.keras.regularizers import Regularizer
17 | # Figure out how to import regularizers
18 | ###
19 | import tensorflow.keras.utils as ku 
20 | import numpy as np
21 | 
22 | tokenizer = Tokenizer()
23 | !wget --no-check-certificate \
24 |     https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \
25 |     -O /tmp/sonnets.txt
26 | data = open('/tmp/sonnets.txt').read()
27 | 
28 | corpus = data.lower().split("\n")
29 | 
30 | 
31 | tokenizer.fit_on_texts(corpus)
32 | total_words = len(tokenizer.word_index) + 1
33 | 
34 | # create input sequences using list of tokens
35 | input_sequences = []
36 | for line in corpus:
37 | 	token_list = tokenizer.texts_to_sequences([line])[0]
38 | 	for i in range(1, len(token_list)):
39 | 		n_gram_sequence = token_list[:i+1]
40 | 		input_sequences.append(n_gram_sequence)
41 | 
42 | 
43 | # pad sequences 
44 | max_sequence_len = max([len(x) for x in input_sequences])
45 | input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
46 | 
47 | # create predictors and label
48 | predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
49 | 
50 | label = ku.to_categorical(label, num_classes=total_words)
51 | 
52 | model = Sequential()
53 | model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))  #(# Your Embedding Layer)
54 | model.add(Bidirectional(LSTM(150, return_sequences=True)))  #(# An LSTM Layer)
55 | model.add(Dropout(0.2))  #(# A dropout layer)
56 | model.add(LSTM(100))  #(# Another LSTM Layer)
57 | model.add(Dense(total_words/2, activation='relu'))  #(# A Dense Layer including regularizers)
58 | model.add(Dense(total_words, activation='softmax'))  #(# A Dense Layer)
59 | # Pick an optimizer
60 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')  #(# Pick a loss function and an optimizer)
61 | print(model.summary())
62 | 
63 | history = model.fit(predictors, label, epochs=100, verbose=1)
64 | 
65 | import matplotlib.pyplot as plt
66 | acc = history.history['acc']
67 | loss = history.history['loss']
68 | 
69 | epochs = range(len(acc))
70 | 
71 | plt.plot(epochs, acc, 'b', label='Training accuracy')
72 | plt.title('Training accuracy')
73 | 
74 | plt.figure()
75 | 
76 | plt.plot(epochs, loss, 'b', label='Training Loss')
77 | plt.title('Training loss')
78 | plt.legend()
79 | 
80 | plt.show()
81 | 
82 | seed_text = "Help me Obi Wan Kenobi, you're my only hope"
83 | next_words = 100
84 |   
85 | for _ in range(next_words):
86 | 	token_list = tokenizer.texts_to_sequences([seed_text])[0]
87 | 	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
88 | 	predicted = model.predict_classes(token_list, verbose=0)
89 | 	output_word = ""
90 | 	for word, index in tokenizer.word_index.items():
91 | 		if index == predicted:
92 | 			output_word = word
93 | 			break
94 | 	seed_text += " " + output_word
95 | print(seed_text)


--------------------------------------------------------------------------------