├── .gitattributes ├── README.md ├── Week 1 ├── Exercise_question.ipynb ├── Quiz 1.pdf └── exercise_question.py ├── Week 2 ├── Course_3_Week_2_Exercise_Question.ipynb ├── Course_3_Week_2_Lesson_1.ipynb ├── Quiz 2.pdf ├── course_3_week_2_exercise_question.py └── course_3_week_2_lesson_1.py ├── Week 3 ├── Course_3_Week_3_Lesson_1a.ipynb ├── Course_3_Week_3_Lesson_1b.ipynb ├── Course_3_Week_3_Lesson_1c.ipynb ├── Course_3_Week_3_Lesson_2.ipynb ├── Course_3_Week_3_Lesson_2c.ipynb ├── Course_3_Week_3_Lesson_2d.ipynb ├── NLP_Course_Week_3_Exercise_Question.ipynb ├── Quiz 3.pdf ├── course_3_week_3_lesson_1a.py ├── course_3_week_3_lesson_1b.py ├── course_3_week_3_lesson_1c.py ├── course_3_week_3_lesson_2.py ├── course_3_week_3_lesson_2c.py ├── course_3_week_3_lesson_2d.py └── nlp_course_week_3_exercise_question.py └── Week 4 ├── NLP_Week4_Exercise_Shakespeare_Question.ipynb ├── Quiz 4.pdf └── nlp_week4_exercise_shakespeare_question.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NLP_In_Tensorflow-Course 2 | 3 | This repository contains Excercise Notebooks of Course 3-Natural Language Processing in Tensorflow of **Tensorflow in Practice Specialization.** 4 | 5 | #### Download Dataset for Week 1, 2 Exercise Notebook: 6 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv 7 | 8 | #### Download Dataset for Week 3 Exercise Notebook: 9 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/training_cleaned.csv 10 | 11 | #### Download Dataset for Week 4 Exercise Notebook: 12 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt 13 | -------------------------------------------------------------------------------- /Week 1/Exercise_question.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Exercise-question.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "metadata": { 19 | "id": "zrZevCPJ92HG", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 204 24 | }, 25 | "outputId": "be1e58bc-3329-404a-e167-c08b7f6aba73" 26 | }, 27 | "source": [ 28 | "!wget --no-check-certificate \\\n", 29 | " https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \\\n", 30 | " -O /tmp/bbc-text.csv\n", 31 | "\n", 32 | " \n", 33 | "import csv\n", 34 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 35 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 36 | "\n", 37 | "\n", 38 | "#Stopwords list from https://github.com/Yoast/YoastSEO.js/blob/develop/src/config/stopwords.js\n", 39 | "# Convert it to a Python list and paste it here\n", 40 | "stopwords = [ \"a\", \"about\", \"above\", \"after\", \"again\", \"against\", \"all\", \"am\", \"an\", \"and\", \"any\", \"are\", \"as\", \"at\", \"be\", \"because\", \"been\", \"before\", \"being\", \"below\", \"between\", \"both\", \"but\", \"by\", \"could\", \"did\", \"do\", \"does\", \"doing\", \"down\", \"during\", \"each\", \"few\", \"for\", \"from\", \"further\", \"had\", \"has\", \"have\", \"having\", \"he\", \"he'd\", \"he'll\", \"he's\", \"her\", \"here\", \"here's\", \"hers\", \"herself\", \"him\", \"himself\", \"his\", \"how\", \"how's\", \"i\", \"i'd\", \"i'll\", \"i'm\", \"i've\", \"if\", \"in\", \"into\", \"is\", \"it\", \"it's\", \"its\", \"itself\", \"let's\", \"me\", \"more\", \"most\", \"my\", \"myself\", \"nor\", \"of\", \"on\", \"once\", \"only\", \"or\", \"other\", \"ought\", \"our\", \"ours\", \"ourselves\", \"out\", \"over\", \"own\", \"same\", \"she\", \"she'd\", \"she'll\", \"she's\", \"should\", \"so\", \"some\", \"such\", \"than\", \"that\", \"that's\", \"the\", \"their\", \"theirs\", \"them\", \"themselves\", \"then\", \"there\", \"there's\", \"these\", \"they\", \"they'd\", \"they'll\", \"they're\", \"they've\", \"this\", \"those\", \"through\", \"to\", \"too\", \"under\", \"until\", \"up\", \"very\", \"was\", \"we\", \"we'd\", \"we'll\", \"we're\", \"we've\", \"were\", \"what\", \"what's\", \"when\", \"when's\", \"where\", \"where's\", \"which\", \"while\", \"who\", \"who's\", \"whom\", \"why\", \"why's\", \"with\", \"would\", \"you\", \"you'd\", \"you'll\", \"you're\", \"you've\", \"your\", \"yours\", \"yourself\", \"yourselves\" ] \n", 41 | "#YOUR CODE HERE\n" 42 | ], 43 | "execution_count": 1, 44 | "outputs": [ 45 | { 46 | "output_type": "stream", 47 | "text": [ 48 | "--2019-06-22 00:09:58-- https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv\n", 49 | "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.141.128, 2607:f8b0:400c:c06::80\n", 50 | "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.141.128|:443... connected.\n", 51 | "HTTP request sent, awaiting response... 200 OK\n", 52 | "Length: 5057493 (4.8M) [application/octet-stream]\n", 53 | "Saving to: ‘/tmp/bbc-text.csv’\n", 54 | "\n", 55 | "\r/tmp/bbc-text.csv 0%[ ] 0 --.-KB/s \r/tmp/bbc-text.csv 100%[===================>] 4.82M --.-KB/s in 0.02s \n", 56 | "\n", 57 | "2019-06-22 00:09:59 (206 MB/s) - ‘/tmp/bbc-text.csv’ saved [5057493/5057493]\n", 58 | "\n" 59 | ], 60 | "name": "stdout" 61 | } 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "metadata": { 67 | "id": "1rmYBjsyCv3K", 68 | "colab_type": "code", 69 | "colab": { 70 | "base_uri": "https://localhost:8080/", 71 | "height": 71 72 | }, 73 | "outputId": "d0305e29-7b76-43be-87c1-805794fe99b5" 74 | }, 75 | "source": [ 76 | "sentences = []\n", 77 | "labels = []\n", 78 | "fields = []\n", 79 | "with open(\"/tmp/bbc-text.csv\", 'r') as csvfile:\n", 80 | " # Your Code here\n", 81 | " reader = csv.reader(csvfile, delimiter=',')\n", 82 | " next(reader)\n", 83 | " for row in reader:\n", 84 | " labels.append(row[0])\n", 85 | " sentence = row[1]\n", 86 | " for word in stopwords:\n", 87 | " token = \" \" + word + \" \"\n", 88 | " sentence = sentence.replace(token, \" \")\n", 89 | " sentence = sentence.replace(\" \", \" \")\n", 90 | " sentences.append(sentence)\n", 91 | " \n", 92 | "print(len(sentences))\n", 93 | "print(sentences[0])\n", 94 | "\n", 95 | "#Expected output\n", 96 | "# 2225\n", 97 | "# tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want." 98 | ], 99 | "execution_count": 7, 100 | "outputs": [ 101 | { 102 | "output_type": "stream", 103 | "text": [ 104 | "2225\n", 105 | "tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want.\n" 106 | ], 107 | "name": "stdout" 108 | } 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "metadata": { 114 | "id": "9LhzBBgSC3S5", 115 | "colab_type": "code", 116 | "colab": { 117 | "base_uri": "https://localhost:8080/", 118 | "height": 34 119 | }, 120 | "outputId": "0795b3f3-c7bf-4b47-c7a4-73a610ed5136" 121 | }, 122 | "source": [ 123 | "tokenizer = Tokenizer(oov_token = \"\") # Your Code Here\n", 124 | "tokenizer.fit_on_texts(sentences) #(# Your Code Here)\n", 125 | "word_index = tokenizer.word_index # Your Code here\n", 126 | "print(len(word_index)) #(# Your Code Here)\n", 127 | "# Expected output\n", 128 | "# 29714" 129 | ], 130 | "execution_count": 6, 131 | "outputs": [ 132 | { 133 | "output_type": "stream", 134 | "text": [ 135 | "29714\n" 136 | ], 137 | "name": "stdout" 138 | } 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "metadata": { 144 | "id": "1Gr3dbQfC5VR", 145 | "colab_type": "code", 146 | "colab": { 147 | "base_uri": "https://localhost:8080/", 148 | "height": 51 149 | }, 150 | "outputId": "2779240f-12b6-4664-dc32-7afb92c463de" 151 | }, 152 | "source": [ 153 | "sequences = tokenizer.texts_to_sequences(sentences) # Your Code Here\n", 154 | "padded = pad_sequences(sequences, padding = 'post') # Your Code here\n", 155 | "print(padded[0])\n", 156 | "print(padded.shape)\n", 157 | "\n", 158 | "# Expected output\n", 159 | "# [ 96 176 1158 ... 0 0 0]\n", 160 | "# (2225, 2442)" 161 | ], 162 | "execution_count": 10, 163 | "outputs": [ 164 | { 165 | "output_type": "stream", 166 | "text": [ 167 | "[ 96 176 1158 ... 0 0 0]\n", 168 | "(2225, 2442)\n" 169 | ], 170 | "name": "stdout" 171 | } 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "metadata": { 177 | "id": "fZufOahzC6yx", 178 | "colab_type": "code", 179 | "colab": { 180 | "base_uri": "https://localhost:8080/", 181 | "height": 71 182 | }, 183 | "outputId": "554a55df-4adc-4195-e4f8-c784db3bf120" 184 | }, 185 | "source": [ 186 | "# Your Code Here\n", 187 | "label_tokenizer = Tokenizer()\n", 188 | "label_tokenizer.fit_on_texts(labels)\n", 189 | "label_word_index = label_tokenizer.word_index\n", 190 | "label_seq = label_tokenizer.texts_to_sequences(labels)\n", 191 | "print(label_seq)\n", 192 | "print(label_word_index)\n", 193 | "\n", 194 | "# Expected Output\n", 195 | "# [[4], [2], [1], [1], [5], [3], [3], [1], [1], [5], [5], [2], [2], [3], [1], [2], [3], [1], [2], [4], [4], [4], [1], [1], [4], [1], [5], [4], [3], [5], [3], [4], [5], [5], [2], [3], [4], [5], [3], [2], [3], [1], [2], [1], [4], [5], [3], [3], [3], [2], [1], [3], [2], [2], [1], [3], [2], [1], [1], [2], [2], [1], [2], [1], [2], [4], [2], [5], [4], [2], [3], [2], [3], [1], [2], [4], [2], [1], [1], [2], [2], [1], [3], [2], [5], [3], [3], [2], [5], [2], [1], [1], [3], [1], [3], [1], [2], [1], [2], [5], [5], [1], [2], [3], [3], [4], [1], [5], [1], [4], [2], [5], [1], [5], [1], [5], [5], [3], [1], [1], [5], [3], [2], [4], [2], [2], [4], [1], [3], [1], [4], [5], [1], [2], [2], [4], [5], [4], [1], [2], [2], [2], [4], [1], [4], [2], [1], [5], [1], [4], [1], [4], [3], [2], [4], [5], [1], [2], [3], [2], [5], [3], [3], [5], [3], [2], [5], [3], [3], [5], [3], [1], [2], [3], [3], [2], [5], [1], [2], [2], [1], [4], [1], [4], [4], [1], [2], [1], [3], [5], [3], [2], [3], [2], [4], [3], [5], [3], [4], [2], [1], [2], [1], [4], [5], [2], [3], [3], [5], [1], [5], [3], [1], [5], [1], [1], [5], [1], [3], [3], [5], [4], [1], [3], [2], [5], [4], [1], [4], [1], [5], [3], [1], [5], [4], [2], [4], [2], [2], [4], [2], [1], [2], [1], [2], [1], [5], [2], [2], [5], [1], [1], [3], [4], [3], [3], [3], [4], [1], [4], [3], [2], [4], [5], [4], [1], [1], [2], [2], [3], [2], [4], [1], [5], [1], [3], [4], [5], [2], [1], [5], [1], [4], [3], [4], [2], [2], [3], [3], [1], [2], [4], [5], [3], [4], [2], [5], [1], [5], [1], [5], [3], [2], [1], [2], [1], [1], [5], [1], [3], [3], [2], [5], [4], [2], [1], [2], [5], [2], [2], [2], [3], [2], [3], [5], [5], [2], [1], [2], [3], [2], [4], [5], [2], [1], [1], [5], [2], [2], [3], [4], [5], [4], [3], [2], [1], [3], [2], [5], [4], [5], [4], [3], [1], [5], [2], [3], [2], [2], [3], [1], [4], [2], [2], [5], [5], [4], [1], [2], [5], [4], [4], [5], [5], [5], [3], [1], [3], [4], [2], [5], [3], [2], [5], [3], [3], [1], [1], [2], [3], [5], [2], [1], [2], [2], [1], [2], [3], [3], [3], [1], [4], [4], [2], [4], [1], [5], [2], [3], [2], [5], [2], [3], [5], [3], [2], [4], [2], [1], [1], [2], [1], [1], [5], [1], [1], [1], [4], [2], [2], [2], [3], [1], [1], [2], [4], [2], [3], [1], [3], [4], [2], [1], [5], [2], [3], [4], [2], [1], [2], [3], [2], [2], [1], [5], [4], [3], [4], [2], [1], [2], [5], [4], [4], [2], [1], [1], [5], [3], [3], [3], [1], [3], [4], [4], [5], [3], [4], [5], [2], [1], [1], [4], [2], [1], [1], [3], [1], [1], [2], [1], [5], [4], [3], [1], [3], [4], [2], [2], [2], [4], [2], [2], [1], [1], [1], [1], [2], [4], [5], [1], [1], [4], [2], [4], [5], [3], [1], [2], [3], [2], [4], [4], [3], [4], [2], [1], [2], [5], [1], [3], [5], [1], [1], [3], [4], [5], [4], [1], [3], [2], [5], [3], [2], [5], [1], [1], [4], [3], [5], [3], [5], [3], [4], [3], [5], [1], [2], [1], [5], [1], [5], [4], [2], [1], [3], [5], [3], [5], [5], [5], [3], [5], [4], [3], [4], [4], [1], [1], [4], [4], [1], [5], [5], [1], [4], [5], [1], [1], [4], [2], [3], [4], [2], [1], [5], [1], [5], [3], [4], [5], [5], [2], [5], [5], [1], [4], [4], [3], [1], [4], [1], [3], [3], [5], [4], [2], [4], [4], [4], [2], [3], [3], [1], [4], [2], [2], [5], [5], [1], [4], [2], [4], [5], [1], [4], [3], [4], [3], [2], [3], [3], [2], [1], [4], [1], [4], [3], [5], [4], [1], [5], [4], [1], [3], [5], [1], [4], [1], [1], [3], [5], [2], [3], [5], [2], [2], [4], [2], [5], [4], [1], [4], [3], [4], [3], [2], [3], [5], [1], [2], [2], [2], [5], [1], [2], [5], [5], [1], [5], [3], [3], [3], [1], [1], [1], [4], [3], [1], [3], [3], [4], [3], [1], [2], [5], [1], [2], [2], [4], [2], [5], [5], [5], [2], [5], [5], [3], [4], [2], [1], [4], [1], [1], [3], [2], [1], [4], [2], [1], [4], [1], [1], [5], [1], [2], [1], [2], [4], [3], [4], [2], [1], [1], [2], [2], [2], [2], [3], [1], [2], [4], [2], [1], [3], [2], [4], [2], [1], [2], [3], [5], [1], [2], [3], [2], [5], [2], [2], [2], [1], [3], [5], [1], [3], [1], [3], [3], [2], [2], [1], [4], [5], [1], [5], [2], [2], [2], [4], [1], [4], [3], [4], [4], [4], [1], [4], [4], [5], [5], [4], [1], [5], [4], [1], [1], [2], [5], [4], [2], [1], [2], [3], [2], [5], [4], [2], [3], [2], [4], [1], [2], [5], [2], [3], [1], [5], [3], [1], [2], [1], [3], [3], [1], [5], [5], [2], [2], [1], [4], [4], [1], [5], [4], [4], [2], [1], [5], [4], [1], [1], [2], [5], [2], [2], [2], [5], [1], [5], [4], [4], [4], [3], [4], [4], [5], [5], [1], [1], [3], [2], [5], [1], [3], [5], [4], [3], [4], [4], [2], [5], [3], [4], [3], [3], [1], [3], [3], [5], [4], [1], [3], [1], [5], [3], [2], [2], [3], [1], [1], [1], [5], [4], [4], [2], [5], [1], [3], [4], [3], [5], [4], [4], [2], [2], [1], [2], [2], [4], [3], [5], [2], [2], [2], [2], [2], [4], [1], [3], [4], [4], [2], [2], [5], [3], [5], [1], [4], [1], [5], [1], [4], [1], [2], [1], [3], [3], [5], [2], [1], [3], [3], [1], [5], [3], [2], [4], [1], [2], [2], [2], [5], [5], [4], [4], [2], [2], [5], [1], [2], [5], [4], [4], [2], [2], [1], [1], [1], [3], [3], [1], [3], [1], [2], [5], [1], [4], [5], [1], [1], [2], [2], [4], [4], [1], [5], [1], [5], [1], [5], [3], [5], [5], [4], [5], [2], [2], [3], [1], [3], [4], [2], [3], [1], [3], [1], [5], [1], [3], [1], [1], [4], [5], [1], [3], [1], [1], [2], [4], [5], [3], [4], [5], [3], [5], [3], [5], [5], [4], [5], [3], [5], [5], [4], [4], [1], [1], [5], [5], [4], [5], [3], [4], [5], [2], [4], [1], [2], [5], [5], [4], [5], [4], [2], [5], [1], [5], [2], [1], [2], [1], [3], [4], [5], [3], [2], [5], [5], [3], [2], [5], [1], [3], [1], [2], [2], [2], [2], [2], [5], [4], [1], [5], [5], [2], [1], [4], [4], [5], [1], [2], [3], [2], [3], [2], [2], [5], [3], [2], [2], [4], [3], [1], [4], [5], [3], [2], [2], [1], [5], [3], [4], [2], [2], [3], [2], [1], [5], [1], [5], [4], [3], [2], [2], [4], [2], [2], [1], [2], [4], [5], [3], [2], [3], [2], [1], [4], [2], [3], [5], [4], [2], [5], [1], [3], [3], [1], [3], [2], [4], [5], [1], [1], [4], [2], [1], [5], [4], [1], [3], [1], [2], [2], [2], [3], [5], [1], [3], [4], [2], [2], [4], [5], [5], [4], [4], [1], [1], [5], [4], [5], [1], [3], [4], [2], [1], [5], [2], [2], [5], [1], [2], [1], [4], [3], [3], [4], [5], [3], [5], [2], [2], [3], [1], [4], [1], [1], [1], [3], [2], [1], [2], [4], [1], [2], [2], [1], [3], [4], [1], [2], [4], [1], [1], [2], [2], [2], [2], [3], [5], [4], [2], [2], [1], [2], [5], [2], [5], [1], [3], [2], [2], [4], [5], [2], [2], [2], [3], [2], [3], [4], [5], [3], [5], [1], [4], [3], [2], [4], [1], [2], [2], [5], [4], [2], [2], [1], [1], [5], [1], [3], [1], [2], [1], [2], [3], [3], [2], [3], [4], [5], [1], [2], [5], [1], [3], [3], [4], [5], [2], [3], [3], [1], [4], [2], [1], [5], [1], [5], [1], [2], [1], [3], [5], [4], [2], [1], [3], [4], [1], [5], [2], [1], [5], [1], [4], [1], [4], [3], [1], [2], [5], [4], [4], [3], [4], [5], [4], [1], [2], [4], [2], [5], [1], [4], [3], [3], [3], [3], [5], [5], [5], [2], [3], [3], [1], [1], [4], [1], [3], [2], [2], [4], [1], [4], [2], [4], [3], [3], [1], [2], [3], [1], [2], [4], [2], [2], [5], [5], [1], [2], [4], [4], [3], [2], [3], [1], [5], [5], [3], [3], [2], [2], [4], [4], [1], [1], [3], [4], [1], [4], [2], [1], [2], [3], [1], [5], [2], [4], [3], [5], [4], [2], [1], [5], [4], [4], [5], [3], [4], [5], [1], [5], [1], [1], [1], [3], [4], [1], [2], [1], [1], [2], [4], [1], [2], [5], [3], [4], [1], [3], [4], [5], [3], [1], [3], [4], [2], [5], [1], [3], [2], [4], [4], [4], [3], [2], [1], [3], [5], [4], [5], [1], [4], [2], [3], [5], [4], [3], [1], [1], [2], [5], [2], [2], [3], [2], [2], [3], [4], [5], [3], [5], [5], [2], [3], [1], [3], [5], [1], [5], [3], [5], [5], [5], [2], [1], [3], [1], [5], [4], [4], [2], [3], [5], [2], [1], [2], [3], [3], [2], [1], [4], [4], [4], [2], [3], [3], [2], [1], [1], [5], [2], [1], [1], [3], [3], [3], [5], [3], [2], [4], [2], [3], [5], [5], [2], [1], [3], [5], [1], [5], [3], [3], [2], [3], [1], [5], [5], [4], [4], [4], [4], [3], [4], [2], [4], [1], [1], [5], [2], [4], [5], [2], [4], [1], [4], [5], [5], [3], [3], [1], [2], [2], [4], [5], [1], [3], [2], [4], [5], [3], [1], [5], [3], [3], [4], [1], [3], [2], [3], [5], [4], [1], [3], [5], [5], [2], [1], [4], [4], [1], [5], [4], [3], [4], [1], [3], [3], [1], [5], [1], [3], [1], [4], [5], [1], [5], [2], [2], [5], [5], [5], [4], [1], [2], [2], [3], [3], [2], [3], [5], [1], [1], [4], [3], [1], [2], [1], [2], [4], [1], [1], [2], [5], [1], [1], [4], [1], [2], [3], [2], [5], [4], [5], [3], [2], [5], [3], [5], [3], [3], [2], [1], [1], [1], [4], [4], [1], [3], [5], [4], [1], [5], [2], [5], [3], [2], [1], [4], [2], [1], [3], [2], [5], [5], [5], [3], [5], [3], [5], [1], [5], [1], [3], [3], [2], [3], [4], [1], [4], [1], [2], [3], [4], [5], [5], [3], [5], [3], [1], [1], [3], [2], [4], [1], [3], [3], [5], [1], [3], [3], [2], [4], [4], [2], [4], [1], [1], [2], [3], [2], [4], [1], [4], [3], [5], [1], [2], [1], [5], [4], [4], [1], [3], [1], [2], [1], [2], [1], [1], [5], [5], [2], [4], [4], [2], [4], [2], [2], [1], [1], [3], [1], [4], [1], [4], [1], [1], [2], [2], [4], [1], [2], [4], [4], [3], [1], [2], [5], [5], [4], [3], [1], [1], [4], [2], [4], [5], [5], [3], [3], [2], [5], [1], [5], [5], [2], [1], [3], [4], [2], [1], [5], [4], [3], [3], [1], [1], [2], [2], [2], [2], [2], [5], [2], [3], [3], [4], [4], [5], [3], [5], [2], [3], [1], [1], [2], [4], [2], [4], [1], [2], [2], [3], [1], [1], [3], [3], [5], [5], [3], [2], [3], [3], [2], [4], [3], [3], [3], [3], [3], [5], [5], [4], [3], [1], [3], [1], [4], [1], [1], [1], [5], [4], [5], [4], [1], [4], [1], [1], [5], [5], [2], [5], [5], [3], [2], [1], [4], [4], [3], [2], [1], [2], [5], [1], [3], [5], [1], [1], [2], [3], [4], [4], [2], [2], [1], [3], [5], [1], [1], [3], [5], [4], [1], [5], [2], [3], [1], [3], [4], [5], [1], [3], [2], [5], [3], [5], [3], [1], [3], [2], [2], [3], [2], [4], [1], [2], [5], [2], [1], [1], [5], [4], [3], [4], [3], [3], [1], [1], [1], [2], [4], [5], [2], [1], [2], [1], [2], [4], [2], [2], [2], [2], [1], [1], [1], [2], [2], [5], [2], [2], [2], [1], [1], [1], [4], [2], [1], [1], [1], [2], [5], [4], [4], [4], [3], [2], [2], [4], [2], [4], [1], [1], [3], [3], [3], [1], [1], [3], [3], [4], [2], [1], [1], [1], [1], [2], [1], [2], [2], [2], [2], [1], [3], [1], [4], [4], [1], [4], [2], [5], [2], [1], [2], [4], [4], [3], [5], [2], [5], [2], [4], [3], [5], [3], [5], [5], [4], [2], [4], [4], [2], [3], [1], [5], [2], [3], [5], [2], [4], [1], [4], [3], [1], [3], [2], [3], [3], [2], [2], [2], [4], [3], [2], [3], [2], [5], [3], [1], [3], [3], [1], [5], [4], [4], [2], [4], [1], [2], [2], [3], [1], [4], [4], [4], [1], [5], [1], [3], [2], [3], [3], [5], [4], [2], [4], [1], [5], [5], [1], [2], [5], [4], [4], [1], [5], [2], [3], [3], [3], [4], [4], [2], [3], [2], [3], [3], [5], [1], [4], [2], [4], [5], [4], [4], [1], [3], [1], [1], [3], [5], [5], [2], [3], [3], [1], [2], [2], [4], [2], [4], [4], [1], [2], [3], [1], [2], [2], [1], [4], [1], [4], [5], [1], [1], [5], [2], [4], [1], [1], [3], [4], [2], [3], [1], [1], [3], [5], [4], [4], [4], [2], [1], [5], [5], [4], [2], [3], [4], [1], [1], [4], [4], [3], [2], [1], [5], [5], [1], [5], [4], [4], [2], [2], [2], [1], [1], [4], [1], [2], [4], [2], [2], [1], [2], [3], [2], [2], [4], [2], [4], [3], [4], [5], [3], [4], [5], [1], [3], [5], [2], [4], [2], [4], [5], [4], [1], [2], [2], [3], [5], [3], [1]]\n", 196 | "# {'sport': 1, 'business': 2, 'politics': 3, 'tech': 4, 'entertainment': 5}" 197 | ], 198 | "execution_count": 12, 199 | "outputs": [ 200 | { 201 | "output_type": "stream", 202 | "text": [ 203 | "[[4], [2], [1], [1], [5], [3], [3], [1], [1], [5], [5], [2], [2], [3], [1], [2], [3], [1], [2], [4], [4], [4], [1], [1], [4], [1], [5], [4], [3], [5], [3], [4], [5], [5], [2], [3], [4], [5], [3], [2], [3], [1], [2], [1], [4], [5], [3], [3], [3], [2], [1], [3], [2], [2], [1], [3], [2], [1], [1], [2], [2], [1], [2], [1], [2], [4], [2], [5], [4], [2], [3], [2], [3], [1], [2], [4], [2], [1], [1], [2], [2], [1], [3], [2], [5], [3], [3], [2], [5], [2], [1], [1], [3], [1], [3], [1], [2], [1], [2], [5], [5], [1], [2], [3], [3], [4], [1], [5], [1], [4], [2], [5], [1], [5], [1], [5], [5], [3], [1], [1], [5], [3], [2], [4], [2], [2], [4], [1], [3], [1], [4], [5], [1], [2], [2], [4], [5], [4], [1], [2], [2], [2], [4], [1], [4], [2], [1], [5], [1], [4], [1], [4], [3], [2], [4], [5], [1], [2], [3], [2], [5], [3], [3], [5], [3], [2], [5], [3], [3], [5], [3], [1], [2], [3], [3], [2], [5], [1], [2], [2], [1], [4], [1], [4], [4], [1], [2], [1], [3], [5], [3], [2], [3], [2], [4], [3], [5], [3], [4], [2], [1], [2], [1], [4], [5], [2], [3], [3], [5], [1], [5], [3], [1], [5], [1], [1], [5], [1], [3], [3], [5], [4], [1], [3], [2], [5], [4], [1], [4], [1], [5], [3], [1], [5], [4], [2], [4], [2], [2], [4], [2], [1], [2], [1], [2], [1], [5], [2], [2], [5], [1], [1], [3], [4], [3], [3], [3], [4], [1], [4], [3], [2], [4], [5], [4], [1], [1], [2], [2], [3], [2], [4], [1], [5], [1], [3], [4], [5], [2], [1], [5], [1], [4], [3], [4], [2], [2], [3], [3], [1], [2], [4], [5], [3], [4], [2], [5], [1], [5], [1], [5], [3], [2], [1], [2], [1], [1], [5], [1], [3], [3], [2], [5], [4], [2], [1], [2], [5], [2], [2], [2], [3], [2], [3], [5], [5], [2], [1], [2], [3], [2], [4], [5], [2], [1], [1], [5], [2], [2], [3], [4], [5], [4], [3], [2], [1], [3], [2], [5], [4], [5], [4], [3], [1], [5], [2], [3], [2], [2], [3], [1], [4], [2], [2], [5], [5], [4], [1], [2], [5], [4], [4], [5], [5], [5], [3], [1], [3], [4], [2], [5], [3], [2], [5], [3], [3], [1], [1], [2], [3], [5], [2], [1], [2], [2], [1], [2], [3], [3], [3], [1], [4], [4], [2], [4], [1], [5], [2], [3], [2], [5], [2], [3], [5], [3], [2], [4], [2], [1], [1], [2], [1], [1], [5], [1], [1], [1], [4], [2], [2], [2], [3], [1], [1], [2], [4], [2], [3], [1], [3], [4], [2], [1], [5], [2], [3], [4], [2], [1], [2], [3], [2], [2], [1], [5], [4], [3], [4], [2], [1], [2], [5], [4], [4], [2], [1], [1], [5], [3], [3], [3], [1], [3], [4], [4], [5], [3], [4], [5], [2], [1], [1], [4], [2], [1], [1], [3], [1], [1], [2], [1], [5], [4], [3], [1], [3], [4], [2], [2], [2], [4], [2], [2], [1], [1], [1], [1], [2], [4], [5], [1], [1], [4], [2], [4], [5], [3], [1], [2], [3], [2], [4], [4], [3], [4], [2], [1], [2], [5], [1], [3], [5], [1], [1], [3], [4], [5], [4], [1], [3], [2], [5], [3], [2], [5], [1], [1], [4], [3], [5], [3], [5], [3], [4], [3], [5], [1], [2], [1], [5], [1], [5], [4], [2], [1], [3], [5], [3], [5], [5], [5], [3], [5], [4], [3], [4], [4], [1], [1], [4], [4], [1], [5], [5], [1], [4], [5], [1], [1], [4], [2], [3], [4], [2], [1], [5], [1], [5], [3], [4], [5], [5], [2], [5], [5], [1], [4], [4], [3], [1], [4], [1], [3], [3], [5], [4], [2], [4], [4], [4], [2], [3], [3], [1], [4], [2], [2], [5], [5], [1], [4], [2], [4], [5], [1], [4], [3], [4], [3], [2], [3], [3], [2], [1], [4], [1], [4], [3], [5], [4], [1], [5], [4], [1], [3], [5], [1], [4], [1], [1], [3], [5], [2], [3], [5], [2], [2], [4], [2], [5], [4], [1], [4], [3], [4], [3], [2], [3], [5], [1], [2], [2], [2], [5], [1], [2], [5], [5], [1], [5], [3], [3], [3], [1], [1], [1], [4], [3], [1], [3], [3], [4], [3], [1], [2], [5], [1], [2], [2], [4], [2], [5], [5], [5], [2], [5], [5], [3], [4], [2], [1], [4], [1], [1], [3], [2], [1], [4], [2], [1], [4], [1], [1], [5], [1], [2], [1], [2], [4], [3], [4], [2], [1], [1], [2], [2], [2], [2], [3], [1], [2], [4], [2], [1], [3], [2], [4], [2], [1], [2], [3], [5], [1], [2], [3], [2], [5], [2], [2], [2], [1], [3], [5], [1], [3], [1], [3], [3], [2], [2], [1], [4], [5], [1], [5], [2], [2], [2], [4], [1], [4], [3], [4], [4], [4], [1], [4], [4], [5], [5], [4], [1], [5], [4], [1], [1], [2], [5], [4], [2], [1], [2], [3], [2], [5], [4], [2], [3], [2], [4], [1], [2], [5], [2], [3], [1], [5], [3], [1], [2], [1], [3], [3], [1], [5], [5], [2], [2], [1], [4], [4], [1], [5], [4], [4], [2], [1], [5], [4], [1], [1], [2], [5], [2], [2], [2], [5], [1], [5], [4], [4], [4], [3], [4], [4], [5], [5], [1], [1], [3], [2], [5], [1], [3], [5], [4], [3], [4], [4], [2], [5], [3], [4], [3], [3], [1], [3], [3], [5], [4], [1], [3], [1], [5], [3], [2], [2], [3], [1], [1], [1], [5], [4], [4], [2], [5], [1], [3], [4], [3], [5], [4], [4], [2], [2], [1], [2], [2], [4], [3], [5], [2], [2], [2], [2], [2], [4], [1], [3], [4], [4], [2], [2], [5], [3], [5], [1], [4], [1], [5], [1], [4], [1], [2], [1], [3], [3], [5], [2], [1], [3], [3], [1], [5], [3], [2], [4], [1], [2], [2], [2], [5], [5], [4], [4], [2], [2], [5], [1], [2], [5], [4], [4], [2], [2], [1], [1], [1], [3], [3], [1], [3], [1], [2], [5], [1], [4], [5], [1], [1], [2], [2], [4], [4], [1], [5], [1], [5], [1], [5], [3], [5], [5], [4], [5], [2], [2], [3], [1], [3], [4], [2], [3], [1], [3], [1], [5], [1], [3], [1], [1], [4], [5], [1], [3], [1], [1], [2], [4], [5], [3], [4], [5], [3], [5], [3], [5], [5], [4], [5], [3], [5], [5], [4], [4], [1], [1], [5], [5], [4], [5], [3], [4], [5], [2], [4], [1], [2], [5], [5], [4], [5], [4], [2], [5], [1], [5], [2], [1], [2], [1], [3], [4], [5], [3], [2], [5], [5], [3], [2], [5], [1], [3], [1], [2], [2], [2], [2], [2], [5], [4], [1], [5], [5], [2], [1], [4], [4], [5], [1], [2], [3], [2], [3], [2], [2], [5], [3], [2], [2], [4], [3], [1], [4], [5], [3], [2], [2], [1], [5], [3], [4], [2], [2], [3], [2], [1], [5], [1], [5], [4], [3], [2], [2], [4], [2], [2], [1], [2], [4], [5], [3], [2], [3], [2], [1], [4], [2], [3], [5], [4], [2], [5], [1], [3], [3], [1], [3], [2], [4], [5], [1], [1], [4], [2], [1], [5], [4], [1], [3], [1], [2], [2], [2], [3], [5], [1], [3], [4], [2], [2], [4], [5], [5], [4], [4], [1], [1], [5], [4], [5], [1], [3], [4], [2], [1], [5], [2], [2], [5], [1], [2], [1], [4], [3], [3], [4], [5], [3], [5], [2], [2], [3], [1], [4], [1], [1], [1], [3], [2], [1], [2], [4], [1], [2], [2], [1], [3], [4], [1], [2], [4], [1], [1], [2], [2], [2], [2], [3], [5], [4], [2], [2], [1], [2], [5], [2], [5], [1], [3], [2], [2], [4], [5], [2], [2], [2], [3], [2], [3], [4], [5], [3], [5], [1], [4], [3], [2], [4], [1], [2], [2], [5], [4], [2], [2], [1], [1], [5], [1], [3], [1], [2], [1], [2], [3], [3], [2], [3], [4], [5], [1], [2], [5], [1], [3], [3], [4], [5], [2], [3], [3], [1], [4], [2], [1], [5], [1], [5], [1], [2], [1], [3], [5], [4], [2], [1], [3], [4], [1], [5], [2], [1], [5], [1], [4], [1], [4], [3], [1], [2], [5], [4], [4], [3], [4], [5], [4], [1], [2], [4], [2], [5], [1], [4], [3], [3], [3], [3], [5], [5], [5], [2], [3], [3], [1], [1], [4], [1], [3], [2], [2], [4], [1], [4], [2], [4], [3], [3], [1], [2], [3], [1], [2], [4], [2], [2], [5], [5], [1], [2], [4], [4], [3], [2], [3], [1], [5], [5], [3], [3], [2], [2], [4], [4], [1], [1], [3], [4], [1], [4], [2], [1], [2], [3], [1], [5], [2], [4], [3], [5], [4], [2], [1], [5], [4], [4], [5], [3], [4], [5], [1], [5], [1], [1], [1], [3], [4], [1], [2], [1], [1], [2], [4], [1], [2], [5], [3], [4], [1], [3], [4], [5], [3], [1], [3], [4], [2], [5], [1], [3], [2], [4], [4], [4], [3], [2], [1], [3], [5], [4], [5], [1], [4], [2], [3], [5], [4], [3], [1], [1], [2], [5], [2], [2], [3], [2], [2], [3], [4], [5], [3], [5], [5], [2], [3], [1], [3], [5], [1], [5], [3], [5], [5], [5], [2], [1], [3], [1], [5], [4], [4], [2], [3], [5], [2], [1], [2], [3], [3], [2], [1], [4], [4], [4], [2], [3], [3], [2], [1], [1], [5], [2], [1], [1], [3], [3], [3], [5], [3], [2], [4], [2], [3], [5], [5], [2], [1], [3], [5], [1], [5], [3], [3], [2], [3], [1], [5], [5], [4], [4], [4], [4], [3], [4], [2], [4], [1], [1], [5], [2], [4], [5], [2], [4], [1], [4], [5], [5], [3], [3], [1], [2], [2], [4], [5], [1], [3], [2], [4], [5], [3], [1], [5], [3], [3], [4], [1], [3], [2], [3], [5], [4], [1], [3], [5], [5], [2], [1], [4], [4], [1], [5], [4], [3], [4], [1], [3], [3], [1], [5], [1], [3], [1], [4], [5], [1], [5], [2], [2], [5], [5], [5], [4], [1], [2], [2], [3], [3], [2], [3], [5], [1], [1], [4], [3], [1], [2], [1], [2], [4], [1], [1], [2], [5], [1], [1], [4], [1], [2], [3], [2], [5], [4], [5], [3], [2], [5], [3], [5], [3], [3], [2], [1], [1], [1], [4], [4], [1], [3], [5], [4], [1], [5], [2], [5], [3], [2], [1], [4], [2], [1], [3], [2], [5], [5], [5], [3], [5], [3], [5], [1], [5], [1], [3], [3], [2], [3], [4], [1], [4], [1], [2], [3], [4], [5], [5], [3], [5], [3], [1], [1], [3], [2], [4], [1], [3], [3], [5], [1], [3], [3], [2], [4], [4], [2], [4], [1], [1], [2], [3], [2], [4], [1], [4], [3], [5], [1], [2], [1], [5], [4], [4], [1], [3], [1], [2], [1], [2], [1], [1], [5], [5], [2], [4], [4], [2], [4], [2], [2], [1], [1], [3], [1], [4], [1], [4], [1], [1], [2], [2], [4], [1], [2], [4], [4], [3], [1], [2], [5], [5], [4], [3], [1], [1], [4], [2], [4], [5], [5], [3], [3], [2], [5], [1], [5], [5], [2], [1], [3], [4], [2], [1], [5], [4], [3], [3], [1], [1], [2], [2], [2], [2], [2], [5], [2], [3], [3], [4], [4], [5], [3], [5], [2], [3], [1], [1], [2], [4], [2], [4], [1], [2], [2], [3], [1], [1], [3], [3], [5], [5], [3], [2], [3], [3], [2], [4], [3], [3], [3], [3], [3], [5], [5], [4], [3], [1], [3], [1], [4], [1], [1], [1], [5], [4], [5], [4], [1], [4], [1], [1], [5], [5], [2], [5], [5], [3], [2], [1], [4], [4], [3], [2], [1], [2], [5], [1], [3], [5], [1], [1], [2], [3], [4], [4], [2], [2], [1], [3], [5], [1], [1], [3], [5], [4], [1], [5], [2], [3], [1], [3], [4], [5], [1], [3], [2], [5], [3], [5], [3], [1], [3], [2], [2], [3], [2], [4], [1], [2], [5], [2], [1], [1], [5], [4], [3], [4], [3], [3], [1], [1], [1], [2], [4], [5], [2], [1], [2], [1], [2], [4], [2], [2], [2], [2], [1], [1], [1], [2], [2], [5], [2], [2], [2], [1], [1], [1], [4], [2], [1], [1], [1], [2], [5], [4], [4], [4], [3], [2], [2], [4], [2], [4], [1], [1], [3], [3], [3], [1], [1], [3], [3], [4], [2], [1], [1], [1], [1], [2], [1], [2], [2], [2], [2], [1], [3], [1], [4], [4], [1], [4], [2], [5], [2], [1], [2], [4], [4], [3], [5], [2], [5], [2], [4], [3], [5], [3], [5], [5], [4], [2], [4], [4], [2], [3], [1], [5], [2], [3], [5], [2], [4], [1], [4], [3], [1], [3], [2], [3], [3], [2], [2], [2], [4], [3], [2], [3], [2], [5], [3], [1], [3], [3], [1], [5], [4], [4], [2], [4], [1], [2], [2], [3], [1], [4], [4], [4], [1], [5], [1], [3], [2], [3], [3], [5], [4], [2], [4], [1], [5], [5], [1], [2], [5], [4], [4], [1], [5], [2], [3], [3], [3], [4], [4], [2], [3], [2], [3], [3], [5], [1], [4], [2], [4], [5], [4], [4], [1], [3], [1], [1], [3], [5], [5], [2], [3], [3], [1], [2], [2], [4], [2], [4], [4], [1], [2], [3], [1], [2], [2], [1], [4], [1], [4], [5], [1], [1], [5], [2], [4], [1], [1], [3], [4], [2], [3], [1], [1], [3], [5], [4], [4], [4], [2], [1], [5], [5], [4], [2], [3], [4], [1], [1], [4], [4], [3], [2], [1], [5], [5], [1], [5], [4], [4], [2], [2], [2], [1], [1], [4], [1], [2], [4], [2], [2], [1], [2], [3], [2], [2], [4], [2], [4], [3], [4], [5], [3], [4], [5], [1], [3], [5], [2], [4], [2], [4], [5], [4], [1], [2], [2], [3], [5], [3], [1]]\n", 204 | "{'sport': 1, 'business': 2, 'politics': 3, 'tech': 4, 'entertainment': 5}\n" 205 | ], 206 | "name": "stdout" 207 | } 208 | ] 209 | } 210 | ] 211 | } -------------------------------------------------------------------------------- /Week 1/Quiz 1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/07Agarg/Natural-Language-Processing-In-Tensorflow-Course/46eb21e25f73fd8644a95e64696d64dd4843e1e8/Week 1/Quiz 1.pdf -------------------------------------------------------------------------------- /Week 1/exercise_question.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Exercise-question.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%201%20-%20Exercise-question.ipynb 8 | """ 9 | """ 10 | !wget --no-check-certificate \ 11 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \ 12 | -O /tmp/bbc-text.csv 13 | 14 | """ 15 | import csv 16 | from tensorflow.keras.preprocessing.text import Tokenizer 17 | from tensorflow.keras.preprocessing.sequence import pad_sequences 18 | 19 | 20 | #Stopwords list from https://github.com/Yoast/YoastSEO.js/blob/develop/src/config/stopwords.js 21 | # Convert it to a Python list and paste it here 22 | stopwords = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" ] 23 | #YOUR CODE HERE 24 | 25 | sentences = [] 26 | labels = [] 27 | fields = [] 28 | with open("bbc-text.csv", 'r') as csvfile: 29 | # Your Code here 30 | reader = csv.reader(csvfile, delimiter=',') 31 | next(reader) 32 | for row in reader: 33 | labels.append(row[0]) 34 | sentence = row[1] 35 | for word in stopwords: 36 | token = " " + word + " " 37 | sentence = sentence.replace(token, " ") 38 | sentence = sentence.replace(" ", " ") 39 | sentences.append(sentence) 40 | 41 | print(len(sentences)) 42 | print(sentences[0]) 43 | 44 | #Expected output 45 | # 2225 46 | # tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want. 47 | 48 | tokenizer = Tokenizer(oov_token = "") # Your Code Here 49 | tokenizer.fit_on_texts(sentences) #(# Your Code Here) 50 | word_index = tokenizer.word_index # Your Code here 51 | print(len(word_index)) #(# Your Code Here) 52 | # Expected output 53 | # 29714 54 | 55 | sequences = tokenizer.texts_to_sequences(sentences) # Your Code Here 56 | padded = pad_sequences(sequences, padding = 'post') # Your Code here 57 | print(padded[0]) 58 | print(padded.shape) 59 | 60 | # Expected output 61 | # [ 96 176 1158 ... 0 0 0] 62 | # (2225, 2442) 63 | 64 | # Your Code Here 65 | label_tokenizer = Tokenizer() 66 | label_tokenizer.fit_on_texts(labels) 67 | label_word_index = label_tokenizer.word_index 68 | label_seq = label_tokenizer.texts_to_sequences(labels) 69 | print(label_seq) 70 | print(label_word_index) 71 | 72 | # Expected Output 73 | # [[4], [2], [1], [1], [5], [3], [3], [1], [1], [5], [5], [2], [2], [3], [1], [2], [3], [1], [2], [4], [4], [4], [1], [1], [4], [1], [5], [4], [3], [5], [3], [4], [5], [5], [2], [3], [4], [5], [3], [2], [3], [1], [2], [1], [4], [5], [3], [3], [3], [2], [1], [3], [2], [2], [1], [3], [2], [1], [1], [2], [2], [1], [2], [1], [2], [4], [2], [5], [4], [2], [3], [2], [3], [1], [2], [4], [2], [1], [1], [2], [2], [1], [3], [2], [5], [3], [3], [2], [5], [2], [1], [1], [3], [1], [3], [1], [2], [1], [2], [5], [5], [1], [2], [3], [3], [4], [1], [5], [1], [4], [2], [5], [1], [5], [1], [5], [5], [3], [1], [1], [5], [3], [2], [4], [2], [2], [4], [1], [3], [1], [4], [5], [1], [2], [2], [4], [5], [4], [1], [2], [2], [2], [4], [1], [4], [2], [1], [5], [1], [4], [1], [4], [3], [2], [4], [5], [1], [2], [3], [2], [5], [3], [3], [5], [3], [2], [5], [3], [3], [5], [3], [1], [2], [3], [3], [2], [5], [1], [2], [2], [1], [4], [1], [4], [4], [1], [2], [1], [3], [5], [3], [2], [3], [2], [4], [3], [5], [3], [4], [2], [1], [2], [1], [4], [5], [2], [3], [3], [5], [1], [5], [3], [1], [5], [1], [1], [5], [1], [3], [3], [5], [4], [1], [3], [2], [5], [4], [1], [4], [1], [5], [3], [1], [5], [4], [2], [4], [2], [2], [4], [2], [1], [2], [1], [2], [1], [5], [2], [2], [5], [1], [1], [3], [4], [3], [3], [3], [4], [1], [4], [3], [2], [4], [5], [4], [1], [1], [2], [2], [3], [2], [4], [1], [5], [1], [3], [4], [5], [2], [1], [5], [1], [4], [3], [4], [2], [2], [3], [3], [1], [2], [4], [5], [3], [4], [2], [5], [1], [5], [1], [5], [3], [2], [1], [2], [1], [1], [5], [1], [3], [3], [2], [5], [4], [2], [1], [2], [5], [2], [2], [2], [3], [2], [3], [5], [5], [2], [1], [2], [3], [2], [4], [5], [2], [1], [1], [5], [2], [2], [3], [4], [5], [4], [3], [2], [1], [3], [2], [5], [4], [5], [4], [3], [1], [5], [2], [3], [2], [2], [3], [1], [4], [2], [2], [5], [5], [4], [1], [2], [5], [4], [4], [5], [5], [5], [3], [1], [3], [4], [2], [5], [3], [2], [5], [3], [3], [1], [1], [2], [3], [5], [2], [1], [2], [2], [1], [2], [3], [3], [3], [1], [4], [4], [2], [4], [1], [5], [2], [3], [2], [5], [2], [3], [5], [3], [2], [4], [2], [1], [1], [2], [1], [1], [5], [1], [1], [1], [4], [2], [2], [2], [3], [1], [1], [2], [4], [2], [3], [1], [3], [4], [2], [1], [5], [2], [3], [4], [2], [1], [2], [3], [2], [2], [1], [5], [4], [3], [4], [2], [1], [2], [5], [4], [4], [2], [1], [1], [5], [3], [3], [3], [1], [3], [4], [4], [5], [3], [4], [5], [2], [1], [1], [4], [2], [1], [1], [3], [1], [1], [2], [1], [5], [4], [3], [1], [3], [4], [2], [2], [2], [4], [2], [2], [1], [1], [1], [1], [2], [4], [5], [1], [1], [4], [2], [4], [5], [3], [1], [2], [3], [2], [4], [4], [3], [4], [2], [1], [2], [5], [1], [3], [5], [1], [1], [3], [4], [5], [4], [1], [3], [2], [5], [3], [2], [5], [1], [1], [4], [3], [5], [3], [5], [3], [4], [3], [5], [1], [2], [1], [5], [1], [5], [4], [2], [1], [3], [5], [3], [5], [5], [5], [3], [5], [4], [3], [4], [4], [1], [1], [4], [4], [1], [5], [5], [1], [4], [5], [1], [1], [4], [2], [3], [4], [2], [1], [5], [1], [5], [3], [4], [5], [5], [2], [5], [5], [1], [4], [4], [3], [1], [4], [1], [3], [3], [5], [4], [2], [4], [4], [4], [2], [3], [3], [1], [4], [2], [2], [5], [5], [1], [4], [2], [4], [5], [1], [4], [3], [4], [3], [2], [3], [3], [2], [1], [4], [1], [4], [3], [5], [4], [1], [5], [4], [1], [3], [5], [1], [4], [1], [1], [3], [5], [2], [3], [5], [2], [2], [4], [2], [5], [4], [1], [4], [3], [4], [3], [2], [3], [5], [1], [2], [2], [2], [5], [1], [2], [5], [5], [1], [5], [3], [3], [3], [1], [1], [1], [4], [3], [1], [3], [3], [4], [3], [1], [2], [5], [1], [2], [2], [4], [2], [5], [5], [5], [2], [5], [5], [3], [4], [2], [1], [4], [1], [1], [3], [2], [1], [4], [2], [1], [4], [1], [1], [5], [1], [2], [1], [2], [4], [3], [4], [2], [1], [1], [2], [2], [2], [2], [3], [1], [2], [4], [2], [1], [3], [2], [4], [2], [1], [2], [3], [5], [1], [2], [3], [2], [5], [2], [2], [2], [1], [3], [5], [1], [3], [1], [3], [3], [2], [2], [1], [4], [5], [1], [5], [2], [2], [2], [4], [1], [4], [3], [4], [4], [4], [1], [4], [4], [5], [5], [4], [1], [5], [4], [1], [1], [2], [5], [4], [2], [1], [2], [3], [2], [5], [4], [2], [3], [2], [4], [1], [2], [5], [2], [3], [1], [5], [3], [1], [2], [1], [3], [3], [1], [5], [5], [2], [2], [1], [4], [4], [1], [5], [4], [4], [2], [1], [5], [4], [1], [1], [2], [5], [2], [2], [2], [5], [1], [5], [4], [4], [4], [3], [4], [4], [5], [5], [1], [1], [3], [2], [5], [1], [3], [5], [4], [3], [4], [4], [2], [5], [3], [4], [3], [3], [1], [3], [3], [5], [4], [1], [3], [1], [5], [3], [2], [2], [3], [1], [1], [1], [5], [4], [4], [2], [5], [1], [3], [4], [3], [5], [4], [4], [2], [2], [1], [2], [2], [4], [3], [5], [2], [2], [2], [2], [2], [4], [1], [3], [4], [4], [2], [2], [5], [3], [5], [1], [4], [1], [5], [1], [4], [1], [2], [1], [3], [3], [5], [2], [1], [3], [3], [1], [5], [3], [2], [4], [1], [2], [2], [2], [5], [5], [4], [4], [2], [2], [5], [1], [2], [5], [4], [4], [2], [2], [1], [1], [1], [3], [3], [1], [3], [1], [2], [5], [1], [4], [5], [1], [1], [2], [2], [4], [4], [1], [5], [1], [5], [1], [5], [3], [5], [5], [4], [5], [2], [2], [3], [1], [3], [4], [2], [3], [1], [3], [1], [5], [1], [3], [1], [1], [4], [5], [1], [3], [1], [1], [2], [4], [5], [3], [4], [5], [3], [5], [3], [5], [5], [4], [5], [3], [5], [5], [4], [4], [1], [1], [5], [5], [4], [5], [3], [4], [5], [2], [4], [1], [2], [5], [5], [4], [5], [4], [2], [5], [1], [5], [2], [1], [2], [1], [3], [4], [5], [3], [2], [5], [5], [3], [2], [5], [1], [3], [1], [2], [2], [2], [2], [2], [5], [4], [1], [5], [5], [2], [1], [4], [4], [5], [1], [2], [3], [2], [3], [2], [2], [5], [3], [2], [2], [4], [3], [1], [4], [5], [3], [2], [2], [1], [5], [3], [4], [2], [2], [3], [2], [1], [5], [1], [5], [4], [3], [2], [2], [4], [2], [2], [1], [2], [4], [5], [3], [2], [3], [2], [1], [4], [2], [3], [5], [4], [2], [5], [1], [3], [3], [1], [3], [2], [4], [5], [1], [1], [4], [2], [1], [5], [4], [1], [3], [1], [2], [2], [2], [3], [5], [1], [3], [4], [2], [2], [4], [5], [5], [4], [4], [1], [1], [5], [4], [5], [1], [3], [4], [2], [1], [5], [2], [2], [5], [1], [2], [1], [4], [3], [3], [4], [5], [3], [5], [2], [2], [3], [1], [4], [1], [1], [1], [3], [2], [1], [2], [4], [1], [2], [2], [1], [3], [4], [1], [2], [4], [1], [1], [2], [2], [2], [2], [3], [5], [4], [2], [2], [1], [2], [5], [2], [5], [1], [3], [2], [2], [4], [5], [2], [2], [2], [3], [2], [3], [4], [5], [3], [5], [1], [4], [3], [2], [4], [1], [2], [2], [5], [4], [2], [2], [1], [1], [5], [1], [3], [1], [2], [1], [2], [3], [3], [2], [3], [4], [5], [1], [2], [5], [1], [3], [3], [4], [5], [2], [3], [3], [1], [4], [2], [1], [5], [1], [5], [1], [2], [1], [3], [5], [4], [2], [1], [3], [4], [1], [5], [2], [1], [5], [1], [4], [1], [4], [3], [1], [2], [5], [4], [4], [3], [4], [5], [4], [1], [2], [4], [2], [5], [1], [4], [3], [3], [3], [3], [5], [5], [5], [2], [3], [3], [1], [1], [4], [1], [3], [2], [2], [4], [1], [4], [2], [4], [3], [3], [1], [2], [3], [1], [2], [4], [2], [2], [5], [5], [1], [2], [4], [4], [3], [2], [3], [1], [5], [5], [3], [3], [2], [2], [4], [4], [1], [1], [3], [4], [1], [4], [2], [1], [2], [3], [1], [5], [2], [4], [3], [5], [4], [2], [1], [5], [4], [4], [5], [3], [4], [5], [1], [5], [1], [1], [1], [3], [4], [1], [2], [1], [1], [2], [4], [1], [2], [5], [3], [4], [1], [3], [4], [5], [3], [1], [3], [4], [2], [5], [1], [3], [2], [4], [4], [4], [3], [2], [1], [3], [5], [4], [5], [1], [4], [2], [3], [5], [4], [3], [1], [1], [2], [5], [2], [2], [3], [2], [2], [3], [4], [5], [3], [5], [5], [2], [3], [1], [3], [5], [1], [5], [3], [5], [5], [5], [2], [1], [3], [1], [5], [4], [4], [2], [3], [5], [2], [1], [2], [3], [3], [2], [1], [4], [4], [4], [2], [3], [3], [2], [1], [1], [5], [2], [1], [1], [3], [3], [3], [5], [3], [2], [4], [2], [3], [5], [5], [2], [1], [3], [5], [1], [5], [3], [3], [2], [3], [1], [5], [5], [4], [4], [4], [4], [3], [4], [2], [4], [1], [1], [5], [2], [4], [5], [2], [4], [1], [4], [5], [5], [3], [3], [1], [2], [2], [4], [5], [1], [3], [2], [4], [5], [3], [1], [5], [3], [3], [4], [1], [3], [2], [3], [5], [4], [1], [3], [5], [5], [2], [1], [4], [4], [1], [5], [4], [3], [4], [1], [3], [3], [1], [5], [1], [3], [1], [4], [5], [1], [5], [2], [2], [5], [5], [5], [4], [1], [2], [2], [3], [3], [2], [3], [5], [1], [1], [4], [3], [1], [2], [1], [2], [4], [1], [1], [2], [5], [1], [1], [4], [1], [2], [3], [2], [5], [4], [5], [3], [2], [5], [3], [5], [3], [3], [2], [1], [1], [1], [4], [4], [1], [3], [5], [4], [1], [5], [2], [5], [3], [2], [1], [4], [2], [1], [3], [2], [5], [5], [5], [3], [5], [3], [5], [1], [5], [1], [3], [3], [2], [3], [4], [1], [4], [1], [2], [3], [4], [5], [5], [3], [5], [3], [1], [1], [3], [2], [4], [1], [3], [3], [5], [1], [3], [3], [2], [4], [4], [2], [4], [1], [1], [2], [3], [2], [4], [1], [4], [3], [5], [1], [2], [1], [5], [4], [4], [1], [3], [1], [2], [1], [2], [1], [1], [5], [5], [2], [4], [4], [2], [4], [2], [2], [1], [1], [3], [1], [4], [1], [4], [1], [1], [2], [2], [4], [1], [2], [4], [4], [3], [1], [2], [5], [5], [4], [3], [1], [1], [4], [2], [4], [5], [5], [3], [3], [2], [5], [1], [5], [5], [2], [1], [3], [4], [2], [1], [5], [4], [3], [3], [1], [1], [2], [2], [2], [2], [2], [5], [2], [3], [3], [4], [4], [5], [3], [5], [2], [3], [1], [1], [2], [4], [2], [4], [1], [2], [2], [3], [1], [1], [3], [3], [5], [5], [3], [2], [3], [3], [2], [4], [3], [3], [3], [3], [3], [5], [5], [4], [3], [1], [3], [1], [4], [1], [1], [1], [5], [4], [5], [4], [1], [4], [1], [1], [5], [5], [2], [5], [5], [3], [2], [1], [4], [4], [3], [2], [1], [2], [5], [1], [3], [5], [1], [1], [2], [3], [4], [4], [2], [2], [1], [3], [5], [1], [1], [3], [5], [4], [1], [5], [2], [3], [1], [3], [4], [5], [1], [3], [2], [5], [3], [5], [3], [1], [3], [2], [2], [3], [2], [4], [1], [2], [5], [2], [1], [1], [5], [4], [3], [4], [3], [3], [1], [1], [1], [2], [4], [5], [2], [1], [2], [1], [2], [4], [2], [2], [2], [2], [1], [1], [1], [2], [2], [5], [2], [2], [2], [1], [1], [1], [4], [2], [1], [1], [1], [2], [5], [4], [4], [4], [3], [2], [2], [4], [2], [4], [1], [1], [3], [3], [3], [1], [1], [3], [3], [4], [2], [1], [1], [1], [1], [2], [1], [2], [2], [2], [2], [1], [3], [1], [4], [4], [1], [4], [2], [5], [2], [1], [2], [4], [4], [3], [5], [2], [5], [2], [4], [3], [5], [3], [5], [5], [4], [2], [4], [4], [2], [3], [1], [5], [2], [3], [5], [2], [4], [1], [4], [3], [1], [3], [2], [3], [3], [2], [2], [2], [4], [3], [2], [3], [2], [5], [3], [1], [3], [3], [1], [5], [4], [4], [2], [4], [1], [2], [2], [3], [1], [4], [4], [4], [1], [5], [1], [3], [2], [3], [3], [5], [4], [2], [4], [1], [5], [5], [1], [2], [5], [4], [4], [1], [5], [2], [3], [3], [3], [4], [4], [2], [3], [2], [3], [3], [5], [1], [4], [2], [4], [5], [4], [4], [1], [3], [1], [1], [3], [5], [5], [2], [3], [3], [1], [2], [2], [4], [2], [4], [4], [1], [2], [3], [1], [2], [2], [1], [4], [1], [4], [5], [1], [1], [5], [2], [4], [1], [1], [3], [4], [2], [3], [1], [1], [3], [5], [4], [4], [4], [2], [1], [5], [5], [4], [2], [3], [4], [1], [1], [4], [4], [3], [2], [1], [5], [5], [1], [5], [4], [4], [2], [2], [2], [1], [1], [4], [1], [2], [4], [2], [2], [1], [2], [3], [2], [2], [4], [2], [4], [3], [4], [5], [3], [4], [5], [1], [3], [5], [2], [4], [2], [4], [5], [4], [1], [2], [2], [3], [5], [3], [1]] 74 | # {'sport': 1, 'business': 2, 'politics': 3, 'tech': 4, 'entertainment': 5} -------------------------------------------------------------------------------- /Week 2/Course_3_Week_2_Exercise_Question.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Course 4 - Week 2 - Exercise - Question.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "accelerator": "GPU" 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "metadata": { 20 | "id": "gnwiOnGyW5JK", 21 | "colab_type": "code", 22 | "colab": { 23 | "base_uri": "https://localhost:8080/", 24 | "height": 204 25 | }, 26 | "outputId": "fb4de731-64ae-4492-da83-ff37930ec2e4" 27 | }, 28 | "source": [ 29 | "import csv\n", 30 | "import tensorflow as tf\n", 31 | "import numpy as np\n", 32 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 33 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 34 | "\n", 35 | "!wget --no-check-certificate \\\n", 36 | " https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \\\n", 37 | " -O /tmp/bbc-text.csv" 38 | ], 39 | "execution_count": 1, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "text": [ 44 | "--2019-06-22 11:51:22-- https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv\n", 45 | "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.141.128, 2607:f8b0:400c:c06::80\n", 46 | "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.141.128|:443... connected.\n", 47 | "HTTP request sent, awaiting response... 200 OK\n", 48 | "Length: 5057493 (4.8M) [application/octet-stream]\n", 49 | "Saving to: ‘/tmp/bbc-text.csv’\n", 50 | "\n", 51 | "\r/tmp/bbc-text.csv 0%[ ] 0 --.-KB/s \r/tmp/bbc-text.csv 100%[===================>] 4.82M --.-KB/s in 0.06s \n", 52 | "\n", 53 | "2019-06-22 11:51:23 (82.1 MB/s) - ‘/tmp/bbc-text.csv’ saved [5057493/5057493]\n", 54 | "\n" 55 | ], 56 | "name": "stdout" 57 | } 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "id": "EYo6A4v5ZABQ", 64 | "colab_type": "code", 65 | "colab": {} 66 | }, 67 | "source": [ 68 | "vocab_size = 1000 # YOUR CODE HERE\n", 69 | "embedding_dim = 16 # YOUR CODE HERE\n", 70 | "max_length = 120 # YOUR CODE HERE\n", 71 | "trunc_type = 'post' # YOUR CODE HERE\n", 72 | "padding_type = 'post' # YOUR CODE HERE\n", 73 | "oov_tok = \"\" # YOUR CODE HERE\n", 74 | "training_portion = .8" 75 | ], 76 | "execution_count": 0, 77 | "outputs": [] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "metadata": { 82 | "id": "iU1qq3_SZBx_", 83 | "colab_type": "code", 84 | "colab": { 85 | "base_uri": "https://localhost:8080/", 86 | "height": 34 87 | }, 88 | "outputId": "e791d9ce-619d-4bf4-f2fd-2a1c2e54b8de" 89 | }, 90 | "source": [ 91 | "sentences = []\n", 92 | "labels = []\n", 93 | "stopwords = [ \"a\", \"about\", \"above\", \"after\", \"again\", \"against\", \"all\", \"am\", \"an\", \"and\", \"any\", \"are\", \"as\", \"at\", \"be\", \"because\", \"been\", \"before\", \"being\", \"below\", \"between\", \"both\", \"but\", \"by\", \"could\", \"did\", \"do\", \"does\", \"doing\", \"down\", \"during\", \"each\", \"few\", \"for\", \"from\", \"further\", \"had\", \"has\", \"have\", \"having\", \"he\", \"he'd\", \"he'll\", \"he's\", \"her\", \"here\", \"here's\", \"hers\", \"herself\", \"him\", \"himself\", \"his\", \"how\", \"how's\", \"i\", \"i'd\", \"i'll\", \"i'm\", \"i've\", \"if\", \"in\", \"into\", \"is\", \"it\", \"it's\", \"its\", \"itself\", \"let's\", \"me\", \"more\", \"most\", \"my\", \"myself\", \"nor\", \"of\", \"on\", \"once\", \"only\", \"or\", \"other\", \"ought\", \"our\", \"ours\", \"ourselves\", \"out\", \"over\", \"own\", \"same\", \"she\", \"she'd\", \"she'll\", \"she's\", \"should\", \"so\", \"some\", \"such\", \"than\", \"that\", \"that's\", \"the\", \"their\", \"theirs\", \"them\", \"themselves\", \"then\", \"there\", \"there's\", \"these\", \"they\", \"they'd\", \"they'll\", \"they're\", \"they've\", \"this\", \"those\", \"through\", \"to\", \"too\", \"under\", \"until\", \"up\", \"very\", \"was\", \"we\", \"we'd\", \"we'll\", \"we're\", \"we've\", \"were\", \"what\", \"what's\", \"when\", \"when's\", \"where\", \"where's\", \"which\", \"while\", \"who\", \"who's\", \"whom\", \"why\", \"why's\", \"with\", \"would\", \"you\", \"you'd\", \"you'll\", \"you're\", \"you've\", \"your\", \"yours\", \"yourself\", \"yourselves\" ]\n", 94 | "print(len(stopwords))\n", 95 | "# Expected Output\n", 96 | "# 153" 97 | ], 98 | "execution_count": 16, 99 | "outputs": [ 100 | { 101 | "output_type": "stream", 102 | "text": [ 103 | "153\n" 104 | ], 105 | "name": "stdout" 106 | } 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "metadata": { 112 | "id": "eutB2xMiZD0e", 113 | "colab_type": "code", 114 | "colab": { 115 | "base_uri": "https://localhost:8080/", 116 | "height": 88 117 | }, 118 | "outputId": "6cf64e1c-035e-40e6-9570-3bdb78f0da5c" 119 | }, 120 | "source": [ 121 | "with open(\"/tmp/bbc-text.csv\", 'r') as csvfile:\n", 122 | " # YOUR CODE HERE\n", 123 | " reader = csv.reader(csvfile, delimiter=',')\n", 124 | " next(reader)\n", 125 | " for row in reader:\n", 126 | " labels.append(row[0])\n", 127 | " sentence = row[1]\n", 128 | " for word in stopwords:\n", 129 | " token = \" \" + word + \" \"\n", 130 | " sentence = sentence.replace(token, \" \")\n", 131 | " sentence = sentence.replace(\" \", \" \")\n", 132 | " sentences.append(sentence)\n", 133 | " \n", 134 | " \n", 135 | "print(len(labels))\n", 136 | "print(len(sentences))\n", 137 | "print(sentences[0])\n", 138 | "# Expected Output\n", 139 | "# 2225\n", 140 | "# 2225\n", 141 | "# tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want." 142 | ], 143 | "execution_count": 17, 144 | "outputs": [ 145 | { 146 | "output_type": "stream", 147 | "text": [ 148 | "2225\n", 149 | "2225\n", 150 | "tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want.\n" 151 | ], 152 | "name": "stdout" 153 | } 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "metadata": { 159 | "id": "XfdaWh06ZGe3", 160 | "colab_type": "code", 161 | "colab": { 162 | "base_uri": "https://localhost:8080/", 163 | "height": 102 164 | }, 165 | "outputId": "b2f27c3e-2cb9-46c7-a1e1-ed91d890578e" 166 | }, 167 | "source": [ 168 | "train_size = int(len(sentences) * training_portion) # YOUR CODE HERE\n", 169 | "\n", 170 | "train_sentences = sentences[:train_size] # YOUR CODE HERE\n", 171 | "train_labels = labels[:train_size] # YOUR CODE HERE\n", 172 | "\n", 173 | "validation_sentences = sentences[train_size:] # YOUR CODE HERE\n", 174 | "validation_labels = labels[train_size:] # YOUR CODE HERE\n", 175 | "\n", 176 | "print(train_size)\n", 177 | "print(len(train_sentences))\n", 178 | "print(len(train_labels))\n", 179 | "print(len(validation_sentences))\n", 180 | "print(len(validation_labels))\n", 181 | "\n", 182 | "# Expected output (if training_portion=.8)\n", 183 | "# 1780\n", 184 | "# 1780\n", 185 | "# 1780\n", 186 | "# 445\n", 187 | "# 445" 188 | ], 189 | "execution_count": 18, 190 | "outputs": [ 191 | { 192 | "output_type": "stream", 193 | "text": [ 194 | "1780\n", 195 | "1780\n", 196 | "1780\n", 197 | "445\n", 198 | "445\n" 199 | ], 200 | "name": "stdout" 201 | } 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "metadata": { 207 | "id": "ULzA8xhwZI22", 208 | "colab_type": "code", 209 | "colab": { 210 | "base_uri": "https://localhost:8080/", 211 | "height": 119 212 | }, 213 | "outputId": "7fd933dd-0fee-45ca-fc10-532f51e6a30d" 214 | }, 215 | "source": [ 216 | "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok) # YOUR CODE HERE\n", 217 | "tokenizer.fit_on_texts(train_sentences) #(# YOUR CODE HERE)\n", 218 | "word_index = tokenizer.word_index # YOUR CODE HERE\n", 219 | "\n", 220 | "train_sequences = tokenizer.texts_to_sequences(train_sentences) # YOUR CODE HERE\n", 221 | "train_padded = pad_sequences(train_sequences, padding = padding_type, maxlen=max_length) # YOUR CODE HERE\n", 222 | "\n", 223 | "print(len(train_sequences[0]))\n", 224 | "print(len(train_padded[0]))\n", 225 | "\n", 226 | "print(len(train_sequences[1]))\n", 227 | "print(len(train_padded[1]))\n", 228 | "\n", 229 | "print(len(train_sequences[10]))\n", 230 | "print(len(train_padded[10]))\n", 231 | "\n", 232 | "# Expected Ouput\n", 233 | "# 449\n", 234 | "# 120\n", 235 | "# 200\n", 236 | "# 120\n", 237 | "# 192\n", 238 | "# 120" 239 | ], 240 | "execution_count": 19, 241 | "outputs": [ 242 | { 243 | "output_type": "stream", 244 | "text": [ 245 | "449\n", 246 | "120\n", 247 | "200\n", 248 | "120\n", 249 | "192\n", 250 | "120\n" 251 | ], 252 | "name": "stdout" 253 | } 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "metadata": { 259 | "id": "c8PeFWzPZLW_", 260 | "colab_type": "code", 261 | "colab": { 262 | "base_uri": "https://localhost:8080/", 263 | "height": 51 264 | }, 265 | "outputId": "320703ad-222b-4e28-cb94-34e438e76a79" 266 | }, 267 | "source": [ 268 | "validation_sequences = tokenizer.texts_to_sequences(validation_sentences) # YOUR CODE HERE\n", 269 | "validation_padded = pad_sequences(validation_sequences, padding = padding_type, maxlen=max_length) # YOUR CODE HERE\n", 270 | "\n", 271 | "print(len(validation_sequences))\n", 272 | "print(validation_padded.shape)\n", 273 | "\n", 274 | "# Expected output\n", 275 | "# 445\n", 276 | "# (445, 120)" 277 | ], 278 | "execution_count": 20, 279 | "outputs": [ 280 | { 281 | "output_type": "stream", 282 | "text": [ 283 | "445\n", 284 | "(445, 120)\n" 285 | ], 286 | "name": "stdout" 287 | } 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "metadata": { 293 | "id": "XkWiQ_FKZNp2", 294 | "colab_type": "code", 295 | "colab": {} 296 | }, 297 | "source": [ 298 | "label_tokenizer = tokenizer() # YOUR CODE HERE\n", 299 | "label_tokenizer.fit_on_texts(labels) #(# YOUR CODE HERE)\n", 300 | "\n", 301 | "training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels)) # YOUR CODE HERE\n", 302 | "validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels)) # YOUR CODE HERE\n", 303 | "\n", 304 | "print(training_label_seq[0])\n", 305 | "print(training_label_seq[1])\n", 306 | "print(training_label_seq[2])\n", 307 | "print(training_label_seq.shape)\n", 308 | "\n", 309 | "print(validation_label_seq[0])\n", 310 | "print(validation_label_seq[1])\n", 311 | "print(validation_label_seq[2])\n", 312 | "print(validation_label_seq.shape)\n", 313 | "\n", 314 | "# Expected output\n", 315 | "# [4]\n", 316 | "# [2]\n", 317 | "# [1]\n", 318 | "# (1780, 1)\n", 319 | "# [5]\n", 320 | "# [4]\n", 321 | "# [3]\n", 322 | "# (445, 1)" 323 | ], 324 | "execution_count": 0, 325 | "outputs": [] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "metadata": { 330 | "id": "HZ5um4MWZP-W", 331 | "colab_type": "code", 332 | "colab": {} 333 | }, 334 | "source": [ 335 | "model = tf.keras.Sequential([\n", 336 | "# YOUR CODE HERE\n", 337 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), \n", 338 | " tf.keras.layers.GlobalAveragePooling(), \n", 339 | " tf.keras.layers.Dense(24, activation = 'relu'), \n", 340 | " tf.keras.layers.Dense(6, activation = 'sigmoid')\n", 341 | " \n", 342 | "])\n", 343 | "model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 344 | "model.summary()\n", 345 | "\n", 346 | "# Expected Output\n", 347 | "# Layer (type) Output Shape Param # \n", 348 | "# =================================================================\n", 349 | "# embedding (Embedding) (None, 120, 16) 16000 \n", 350 | "# _________________________________________________________________\n", 351 | "# global_average_pooling1d (Gl (None, 16) 0 \n", 352 | "# _________________________________________________________________\n", 353 | "# dense (Dense) (None, 24) 408 \n", 354 | "# _________________________________________________________________\n", 355 | "# dense_1 (Dense) (None, 6) 150 \n", 356 | "# =================================================================\n", 357 | "# Total params: 16,558\n", 358 | "# Trainable params: 16,558\n", 359 | "# Non-trainable params: 0" 360 | ], 361 | "execution_count": 0, 362 | "outputs": [] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "metadata": { 367 | "id": "XsfdxySKZSXu", 368 | "colab_type": "code", 369 | "colab": {} 370 | }, 371 | "source": [ 372 | "num_epochs = 30\n", 373 | "history = model.fit(train_padded, train_sequences, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2) #(# YOUR CODE HERE)" 374 | ], 375 | "execution_count": 0, 376 | "outputs": [] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "metadata": { 381 | "id": "dQ0BX2apXS9u", 382 | "colab_type": "code", 383 | "colab": {} 384 | }, 385 | "source": [ 386 | "import matplotlib.pyplot as plt\n", 387 | "\n", 388 | "\n", 389 | "def plot_graphs(history, string):\n", 390 | " plt.plot(history.history[string])\n", 391 | " plt.plot(history.history['val_'+string])\n", 392 | " plt.xlabel(\"Epochs\")\n", 393 | " plt.ylabel(string)\n", 394 | " plt.legend([string, 'val_'+string])\n", 395 | " plt.show()\n", 396 | " \n", 397 | "plot_graphs(history, \"acc\")\n", 398 | "plot_graphs(history, \"loss\")" 399 | ], 400 | "execution_count": 0, 401 | "outputs": [] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "metadata": { 406 | "id": "w7Xc-uWxXhML", 407 | "colab_type": "code", 408 | "colab": {} 409 | }, 410 | "source": [ 411 | "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n", 412 | "\n", 413 | "def decode_sentence(text):\n", 414 | " return ' '.join([reverse_word_index.get(i, '?') for i in text])\n" 415 | ], 416 | "execution_count": 0, 417 | "outputs": [] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "metadata": { 422 | "id": "OhnFA_TDXrih", 423 | "colab_type": "code", 424 | "colab": {} 425 | }, 426 | "source": [ 427 | "e = model.layers[0]\n", 428 | "weights = e.get_weights()[0]\n", 429 | "print(weights.shape) # shape: (vocab_size, embedding_dim)\n", 430 | "\n", 431 | "# Expected output\n", 432 | "# (1000, 16)" 433 | ], 434 | "execution_count": 0, 435 | "outputs": [] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "metadata": { 440 | "id": "_POzcWWAXudL", 441 | "colab_type": "code", 442 | "colab": {} 443 | }, 444 | "source": [ 445 | "import io\n", 446 | "\n", 447 | "out_v = io.open('vecs.tsv', 'w', encoding='utf-8')\n", 448 | "out_m = io.open('meta.tsv', 'w', encoding='utf-8')\n", 449 | "for word_num in range(1, vocab_size):\n", 450 | " word = reverse_word_index[word_num]\n", 451 | " embeddings = weights[word_num]\n", 452 | " out_m.write(word + \"\\n\")\n", 453 | " out_v.write('\\t'.join([str(x) for x in embeddings]) + \"\\n\")\n", 454 | "out_v.close()\n", 455 | "out_m.close()" 456 | ], 457 | "execution_count": 0, 458 | "outputs": [] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "metadata": { 463 | "id": "VmqpQMZ_XyOa", 464 | "colab_type": "code", 465 | "colab": {} 466 | }, 467 | "source": [ 468 | "try:\n", 469 | " from google.colab import files\n", 470 | "except ImportError:\n", 471 | " pass\n", 472 | "else:\n", 473 | " files.download('vecs.tsv')\n", 474 | " files.download('meta.tsv')" 475 | ], 476 | "execution_count": 0, 477 | "outputs": [] 478 | } 479 | ] 480 | } -------------------------------------------------------------------------------- /Week 2/Quiz 2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/07Agarg/Natural-Language-Processing-In-Tensorflow-Course/46eb21e25f73fd8644a95e64696d64dd4843e1e8/Week 2/Quiz 2.pdf -------------------------------------------------------------------------------- /Week 2/course_3_week_2_exercise_question.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Course 4 - Week 2 - Exercise - Question.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%202%20-%20Exercise%20-%20Question.ipynb 8 | """ 9 | 10 | import csv 11 | import tensorflow as tf 12 | import numpy as np 13 | from tensorflow.keras.preprocessing.text import Tokenizer 14 | from tensorflow.keras.preprocessing.sequence import pad_sequences 15 | ''' 16 | !wget --no-check-certificate \ 17 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \ 18 | -O /tmp/bbc-text.csv 19 | ''' 20 | vocab_size = 1000 # YOUR CODE HERE 21 | embedding_dim = 16 # YOUR CODE HERE 22 | max_length = 120 # YOUR CODE HERE 23 | trunc_type = 'post' # YOUR CODE HERE 24 | padding_type = 'post' # YOUR CODE HERE 25 | oov_tok = "" # YOUR CODE HERE 26 | training_portion = .8 27 | 28 | sentences = [] 29 | labels = [] 30 | stopwords = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" ] 31 | print(len(stopwords)) 32 | # Expected Output 33 | # 153 34 | 35 | with open("bbc-text.csv", 'r') as csvfile: 36 | # YOUR CODE HERE 37 | reader = csv.reader(csvfile, delimiter=',') 38 | next(reader) 39 | for row in reader: 40 | labels.append(row[0]) 41 | sentence = row[1] 42 | for word in stopwords: 43 | token = " " + word + " " 44 | sentence = sentence.replace(token, " ") 45 | sentence = sentence.replace(" ", " ") 46 | sentences.append(sentence) 47 | 48 | 49 | print(len(labels)) 50 | print(len(sentences)) 51 | print(sentences[0]) 52 | # Expected Output 53 | # 2225 54 | # 2225 55 | # tv future hands viewers home theatre systems plasma high-definition tvs digital video recorders moving living room way people watch tv will radically different five years time. according expert panel gathered annual consumer electronics show las vegas discuss new technologies will impact one favourite pastimes. us leading trend programmes content will delivered viewers via home networks cable satellite telecoms companies broadband service providers front rooms portable devices. one talked-about technologies ces digital personal video recorders (dvr pvr). set-top boxes like us s tivo uk s sky+ system allow people record store play pause forward wind tv programmes want. essentially technology allows much personalised tv. also built-in high-definition tv sets big business japan us slower take off europe lack high-definition programming. not can people forward wind adverts can also forget abiding network channel schedules putting together a-la-carte entertainment. us networks cable satellite companies worried means terms advertising revenues well brand identity viewer loyalty channels. although us leads technology moment also concern raised europe particularly growing uptake services like sky+. happens today will see nine months years time uk adam hume bbc broadcast s futurologist told bbc news website. likes bbc no issues lost advertising revenue yet. pressing issue moment commercial uk broadcasters brand loyalty important everyone. will talking content brands rather network brands said tim hanlon brand communications firm starcom mediavest. reality broadband connections anybody can producer content. added: challenge now hard promote programme much choice. means said stacey jolna senior vice president tv guide tv group way people find content want watch simplified tv viewers. means networks us terms channels take leaf google s book search engine future instead scheduler help people find want watch. kind channel model might work younger ipod generation used taking control gadgets play them. might not suit everyone panel recognised. older generations comfortable familiar schedules channel brands know getting. perhaps not want much choice put hands mr hanlon suggested. end kids just diapers pushing buttons already - everything possible available said mr hanlon. ultimately consumer will tell market want. 50 000 new gadgets technologies showcased ces many enhancing tv-watching experience. high-definition tv sets everywhere many new models lcd (liquid crystal display) tvs launched dvr capability built instead external boxes. one example launched show humax s 26-inch lcd tv 80-hour tivo dvr dvd recorder. one us s biggest satellite tv companies directtv even launched branded dvr show 100-hours recording capability instant replay search function. set can pause rewind tv 90 hours. microsoft chief bill gates announced pre-show keynote speech partnership tivo called tivotogo means people can play recorded programmes windows pcs mobile devices. reflect increasing trend freeing multimedia people can watch want want. 56 | 57 | train_size = int(len(sentences) * training_portion) # YOUR CODE HERE 58 | 59 | train_sentences = sentences[:train_size] # YOUR CODE HERE 60 | train_labels = labels[:train_size] # YOUR CODE HERE 61 | 62 | validation_sentences = sentences[train_size:] # YOUR CODE HERE 63 | validation_labels = labels[train_size:] # YOUR CODE HERE 64 | 65 | print(train_size) 66 | print(len(train_sentences)) 67 | print(len(train_labels)) 68 | print(len(validation_sentences)) 69 | print(len(validation_labels)) 70 | 71 | # Expected output (if training_portion=.8) 72 | # 1780 73 | # 1780 74 | # 1780 75 | # 445 76 | # 445 77 | 78 | tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok) # YOUR CODE HERE 79 | tokenizer.fit_on_texts(train_sentences) #(# YOUR CODE HERE) 80 | word_index = tokenizer.word_index # YOUR CODE HERE 81 | 82 | train_sequences = tokenizer.texts_to_sequences(train_sentences) # YOUR CODE HERE 83 | train_padded = pad_sequences(train_sequences, padding = padding_type, maxlen=max_length) # YOUR CODE HERE 84 | 85 | print(len(train_sequences[0])) 86 | print(len(train_padded[0])) 87 | 88 | print(len(train_sequences[1])) 89 | print(len(train_padded[1])) 90 | 91 | print(len(train_sequences[10])) 92 | print(len(train_padded[10])) 93 | 94 | # Expected Ouput 95 | # 449 96 | # 120 97 | # 200 98 | # 120 99 | # 192 100 | # 120 101 | 102 | validation_sequences = tokenizer.texts_to_sequences(validation_sentences) # YOUR CODE HERE 103 | validation_padded = pad_sequences(validation_sequences, padding = padding_type, maxlen=max_length) # YOUR CODE HERE 104 | 105 | print(len(validation_sequences)) 106 | print(validation_padded.shape) 107 | 108 | # Expected output 109 | # 445 110 | # (445, 120) 111 | 112 | label_tokenizer = Tokenizer() # YOUR CODE HERE 113 | label_tokenizer.fit_on_texts(labels) #(# YOUR CODE HERE) 114 | 115 | training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels)) # YOUR CODE HERE 116 | validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels)) # YOUR CODE HERE 117 | 118 | print(training_label_seq[0]) 119 | print(training_label_seq[1]) 120 | print(training_label_seq[2]) 121 | print(training_label_seq.shape) 122 | 123 | print(validation_label_seq[0]) 124 | print(validation_label_seq[1]) 125 | print(validation_label_seq[2]) 126 | print(validation_label_seq.shape) 127 | 128 | # Expected output 129 | # [4] 130 | # [2] 131 | # [1] 132 | # (1780, 1) 133 | # [5] 134 | # [4] 135 | # [3] 136 | # (445, 1) 137 | 138 | model = tf.keras.Sequential([ 139 | # YOUR CODE HERE 140 | tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), 141 | tf.keras.layers.GlobalAveragePooling1D(), 142 | tf.keras.layers.Dense(24, activation = 'relu'), 143 | tf.keras.layers.Dense(6, activation = 'sigmoid') 144 | 145 | ]) 146 | model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy']) 147 | model.summary() 148 | 149 | # Expected Output 150 | # Layer (type) Output Shape Param # 151 | # ================================================================= 152 | # embedding (Embedding) (None, 120, 16) 16000 153 | # _________________________________________________________________ 154 | # global_average_pooling1d (Gl (None, 16) 0 155 | # _________________________________________________________________ 156 | # dense (Dense) (None, 24) 408 157 | # _________________________________________________________________ 158 | # dense_1 (Dense) (None, 6) 150 159 | # ================================================================= 160 | # Total params: 16,558 161 | # Trainable params: 16,558 162 | # Non-trainable params: 0 163 | 164 | num_epochs = 30 165 | history = model.fit(train_padded, train_sequences, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2) #(# YOUR CODE HERE) 166 | 167 | import matplotlib.pyplot as plt 168 | 169 | 170 | def plot_graphs(history, string): 171 | plt.plot(history.history[string]) 172 | plt.plot(history.history['val_'+string]) 173 | plt.xlabel("Epochs") 174 | plt.ylabel(string) 175 | plt.legend([string, 'val_'+string]) 176 | plt.show() 177 | 178 | plot_graphs(history, "acc") 179 | plot_graphs(history, "loss") 180 | 181 | reverse_word_index = dict([(value, key) for (key, value) in word_index.items()]) 182 | 183 | def decode_sentence(text): 184 | return ' '.join([reverse_word_index.get(i, '?') for i in text]) 185 | 186 | e = model.layers[0] 187 | weights = e.get_weights()[0] 188 | print(weights.shape) # shape: (vocab_size, embedding_dim) 189 | 190 | # Expected output 191 | # (1000, 16) 192 | 193 | import io 194 | 195 | out_v = io.open('vecs.tsv', 'w', encoding='utf-8') 196 | out_m = io.open('meta.tsv', 'w', encoding='utf-8') 197 | for word_num in range(1, vocab_size): 198 | word = reverse_word_index[word_num] 199 | embeddings = weights[word_num] 200 | out_m.write(word + "\n") 201 | out_v.write('\t'.join([str(x) for x in embeddings]) + "\n") 202 | out_v.close() 203 | out_m.close() 204 | 205 | try: 206 | from google.colab import files 207 | except ImportError: 208 | pass 209 | else: 210 | files.download('vecs.tsv') 211 | files.download('meta.tsv') -------------------------------------------------------------------------------- /Week 2/course_3_week_2_lesson_1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Course 3 - Week 2 - Lesson 1.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%202%20-%20Lesson%201.ipynb 8 | """ 9 | 10 | # NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT 11 | 12 | import tensorflow as tf 13 | print(tf.__version__) 14 | 15 | # This is needed for the iterator over the data 16 | # But not necessary if you have TF 2.0 installed 17 | #!pip install tensorflow==2.0.0-beta0 18 | 19 | 20 | tf.enable_eager_execution() 21 | 22 | # !pip install -q tensorflow-datasets 23 | 24 | import tensorflow_datasets as tfds 25 | imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True) 26 | 27 | import numpy as np 28 | 29 | train_data, test_data = imdb['train'], imdb['test'] 30 | 31 | training_sentences = [] 32 | training_labels = [] 33 | 34 | testing_sentences = [] 35 | testing_labels = [] 36 | 37 | # str(s.tonumpy()) is needed in Python3 instead of just s.numpy() 38 | for s,l in train_data: 39 | training_sentences.append(str(s.numpy())) 40 | training_labels.append(l.numpy()) 41 | 42 | for s,l in test_data: 43 | testing_sentences.append(str(s.numpy())) 44 | testing_labels.append(l.numpy()) 45 | 46 | training_labels_final = np.array(training_labels) 47 | testing_labels_final = np.array(testing_labels) 48 | 49 | vocab_size = 10000 50 | embedding_dim = 16 51 | max_length = 120 52 | trunc_type='post' 53 | oov_tok = "" 54 | 55 | 56 | from tensorflow.keras.preprocessing.text import Tokenizer 57 | from tensorflow.keras.preprocessing.sequence import pad_sequences 58 | 59 | tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok) 60 | tokenizer.fit_on_texts(training_sentences) 61 | word_index = tokenizer.word_index 62 | sequences = tokenizer.texts_to_sequences(training_sentences) 63 | padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type) 64 | 65 | testing_sequences = tokenizer.texts_to_sequences(testing_sentences) 66 | testing_padded = pad_sequences(testing_sequences,maxlen=max_length) 67 | 68 | reverse_word_index = dict([(value, key) for (key, value) in word_index.items()]) 69 | 70 | def decode_review(text): 71 | return ' '.join([reverse_word_index.get(i, '?') for i in text]) 72 | 73 | print(decode_review(padded[1])) 74 | print(training_sentences[1]) 75 | 76 | model = tf.keras.Sequential([ 77 | tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), 78 | tf.keras.layers.Flatten(), 79 | tf.keras.layers.Dense(6, activation='relu'), 80 | tf.keras.layers.Dense(1, activation='sigmoid') 81 | ]) 82 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 83 | model.summary() 84 | 85 | num_epochs = 10 86 | model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final)) 87 | 88 | e = model.layers[0] 89 | weights = e.get_weights()[0] 90 | print(weights.shape) # shape: (vocab_size, embedding_dim) 91 | 92 | import io 93 | 94 | out_v = io.open('vecs.tsv', 'w', encoding='utf-8') 95 | out_m = io.open('meta.tsv', 'w', encoding='utf-8') 96 | for word_num in range(1, vocab_size): 97 | word = reverse_word_index[word_num] 98 | embeddings = weights[word_num] 99 | out_m.write(word + "\n") 100 | out_v.write('\t'.join([str(x) for x in embeddings]) + "\n") 101 | out_v.close() 102 | out_m.close() 103 | 104 | try: 105 | from google.colab import files 106 | except ImportError: 107 | pass 108 | else: 109 | files.download('vecs.tsv') 110 | files.download('meta.tsv') 111 | 112 | sentence = "I really think this is amazing. honest." 113 | sequence = tokenizer.texts_to_sequences(sentence) 114 | print(sequence) -------------------------------------------------------------------------------- /Week 3/Course_3_Week_3_Lesson_1a.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Course 4 - Week 3 - Lesson 1a.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "rFiCyWQ-NC5D", 21 | "colab_type": "text" 22 | }, 23 | "source": [ 24 | "# Single Layer LSTM" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "metadata": { 30 | "id": "Y20Lud2ZMBhW", 31 | "colab_type": "code", 32 | "colab": {} 33 | }, 34 | "source": [ 35 | "from __future__ import absolute_import, division, print_function, unicode_literals\n", 36 | "\n", 37 | "\n", 38 | "import tensorflow_datasets as tfds\n", 39 | "import tensorflow as tf\n", 40 | "print(tf.__version__)" 41 | ], 42 | "execution_count": 0, 43 | "outputs": [] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "metadata": { 48 | "id": "Aclov8NVMJml", 49 | "colab_type": "code", 50 | "colab": {} 51 | }, 52 | "source": [ 53 | "# If the tf.__version__ is 1.x, please run this cell\n", 54 | "#!pip install tensorflow==2.0.0-beta0" 55 | ], 56 | "execution_count": 0, 57 | "outputs": [] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "metadata": { 62 | "id": "uAU8g7C0MPZE", 63 | "colab_type": "code", 64 | "colab": {} 65 | }, 66 | "source": [ 67 | "import tensorflow_datasets as tfds\n", 68 | "import tensorflow as tf\n", 69 | "print(tf.__version__)" 70 | ], 71 | "execution_count": 0, 72 | "outputs": [] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "metadata": { 77 | "id": "AW-4Vo4TMUHb", 78 | "colab_type": "code", 79 | "colab": {} 80 | }, 81 | "source": [ 82 | "# Get the data\n", 83 | "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n", 84 | "train_dataset, test_dataset = dataset['train'], dataset['test']" 85 | ], 86 | "execution_count": 0, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "metadata": { 92 | "id": "DVfhKpHsPOxq", 93 | "colab_type": "code", 94 | "colab": {} 95 | }, 96 | "source": [ 97 | "tokenizer = info.features['text'].encoder" 98 | ], 99 | "execution_count": 0, 100 | "outputs": [] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "metadata": { 105 | "id": "ffvRUI0_McDS", 106 | "colab_type": "code", 107 | "colab": {} 108 | }, 109 | "source": [ 110 | "BUFFER_SIZE = 10000\n", 111 | "BATCH_SIZE = 64\n", 112 | "\n", 113 | "train_dataset = train_dataset.shuffle(BUFFER_SIZE)\n", 114 | "train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)\n", 115 | "test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)" 116 | ], 117 | "execution_count": 0, 118 | "outputs": [] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "metadata": { 123 | "id": "FxQooMEkMgur", 124 | "colab_type": "code", 125 | "colab": {} 126 | }, 127 | "source": [ 128 | "model = tf.keras.Sequential([\n", 129 | " tf.keras.layers.Embedding(tokenizer.vocab_size, 64),\n", 130 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),\n", 131 | " tf.keras.layers.Dense(64, activation='relu'),\n", 132 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 133 | "])" 134 | ], 135 | "execution_count": 0, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "metadata": { 141 | "id": "QKI5dfPgMioL", 142 | "colab_type": "code", 143 | "colab": {} 144 | }, 145 | "source": [ 146 | "model.summary()" 147 | ], 148 | "execution_count": 0, 149 | "outputs": [] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "metadata": { 154 | "id": "Uip7QOVzMoMq", 155 | "colab_type": "code", 156 | "colab": {} 157 | }, 158 | "source": [ 159 | "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])" 160 | ], 161 | "execution_count": 0, 162 | "outputs": [] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "metadata": { 167 | "id": "7mlgzaRDMtF6", 168 | "colab_type": "code", 169 | "colab": {} 170 | }, 171 | "source": [ 172 | "NUM_EPOCHS = 10\n", 173 | "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)" 174 | ], 175 | "execution_count": 0, 176 | "outputs": [] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "metadata": { 181 | "colab_type": "code", 182 | "id": "Mp1Z7P9pYRSK", 183 | "colab": {} 184 | }, 185 | "source": [ 186 | "import matplotlib.pyplot as plt\n", 187 | "\n", 188 | "\n", 189 | "def plot_graphs(history, string):\n", 190 | " plt.plot(history.history[string])\n", 191 | " plt.plot(history.history['val_'+string])\n", 192 | " plt.xlabel(\"Epochs\")\n", 193 | " plt.ylabel(string)\n", 194 | " plt.legend([string, 'val_'+string])\n", 195 | " plt.show()" 196 | ], 197 | "execution_count": 0, 198 | "outputs": [] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "metadata": { 203 | "id": "R_sX6ilIM515", 204 | "colab_type": "code", 205 | "colab": {} 206 | }, 207 | "source": [ 208 | "plot_graphs(history, 'accuracy')" 209 | ], 210 | "execution_count": 0, 211 | "outputs": [] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "metadata": { 216 | "id": "RFEXtKtqNARB", 217 | "colab_type": "code", 218 | "colab": {} 219 | }, 220 | "source": [ 221 | "plot_graphs(history, 'loss')" 222 | ], 223 | "execution_count": 0, 224 | "outputs": [] 225 | } 226 | ] 227 | } -------------------------------------------------------------------------------- /Week 3/Course_3_Week_3_Lesson_1b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Course 4 - Week 3 - Lesson 1b.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "rFiCyWQ-NC5D", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "# Multiple Layer LSTM" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "metadata": { 31 | "id": "Y20Lud2ZMBhW", 32 | "colab_type": "code", 33 | "colab": {} 34 | }, 35 | "source": [ 36 | "from __future__ import absolute_import, division, print_function, unicode_literals\n", 37 | "\n", 38 | "\n", 39 | "import tensorflow_datasets as tfds\n", 40 | "import tensorflow as tf\n", 41 | "print(tf.__version__)" 42 | ], 43 | "execution_count": 0, 44 | "outputs": [] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "metadata": { 49 | "id": "Aclov8NVMJml", 50 | "colab_type": "code", 51 | "colab": {} 52 | }, 53 | "source": [ 54 | "# If the tf.__version__ is 1.x, please run this cell\n", 55 | "!pip install tensorflow==2.0.0-beta0" 56 | ], 57 | "execution_count": 0, 58 | "outputs": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "id": "uAU8g7C0MPZE", 64 | "colab_type": "code", 65 | "colab": {} 66 | }, 67 | "source": [ 68 | "import tensorflow_datasets as tfds\n", 69 | "import tensorflow as tf\n", 70 | "print(tf.__version__)" 71 | ], 72 | "execution_count": 0, 73 | "outputs": [] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "metadata": { 78 | "id": "AW-4Vo4TMUHb", 79 | "colab_type": "code", 80 | "colab": {} 81 | }, 82 | "source": [ 83 | "# Get the data\n", 84 | "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n", 85 | "train_dataset, test_dataset = dataset['train'], dataset['test']\n" 86 | ], 87 | "execution_count": 0, 88 | "outputs": [] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "metadata": { 93 | "id": "L11bIR6-PKvs", 94 | "colab_type": "code", 95 | "colab": {} 96 | }, 97 | "source": [ 98 | "tokenizer = info.features['text'].encoder" 99 | ], 100 | "execution_count": 0, 101 | "outputs": [] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "metadata": { 106 | "id": "ffvRUI0_McDS", 107 | "colab_type": "code", 108 | "colab": {} 109 | }, 110 | "source": [ 111 | "BUFFER_SIZE = 10000\n", 112 | "BATCH_SIZE = 64\n", 113 | "\n", 114 | "train_dataset = train_dataset.shuffle(BUFFER_SIZE)\n", 115 | "train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)\n", 116 | "test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)" 117 | ], 118 | "execution_count": 0, 119 | "outputs": [] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "metadata": { 124 | "colab_type": "code", 125 | "id": "jo1jjO3vn0jo", 126 | "colab": {} 127 | }, 128 | "source": [ 129 | "model = tf.keras.Sequential([\n", 130 | " tf.keras.layers.Embedding(tokenizer.vocab_size, 64),\n", 131 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),\n", 132 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n", 133 | " tf.keras.layers.Dense(64, activation='relu'),\n", 134 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 135 | "])" 136 | ], 137 | "execution_count": 0, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "metadata": { 143 | "id": "QKI5dfPgMioL", 144 | "colab_type": "code", 145 | "colab": {} 146 | }, 147 | "source": [ 148 | "model.summary()" 149 | ], 150 | "execution_count": 0, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "metadata": { 156 | "id": "Uip7QOVzMoMq", 157 | "colab_type": "code", 158 | "colab": {} 159 | }, 160 | "source": [ 161 | "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])" 162 | ], 163 | "execution_count": 0, 164 | "outputs": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "metadata": { 169 | "id": "7mlgzaRDMtF6", 170 | "colab_type": "code", 171 | "colab": {} 172 | }, 173 | "source": [ 174 | "NUM_EPOCHS = 10\n", 175 | "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)" 176 | ], 177 | "execution_count": 0, 178 | "outputs": [] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "metadata": { 183 | "colab_type": "code", 184 | "id": "Mp1Z7P9pYRSK", 185 | "colab": {} 186 | }, 187 | "source": [ 188 | "import matplotlib.pyplot as plt\n", 189 | "\n", 190 | "\n", 191 | "def plot_graphs(history, string):\n", 192 | " plt.plot(history.history[string])\n", 193 | " plt.plot(history.history['val_'+string])\n", 194 | " plt.xlabel(\"Epochs\")\n", 195 | " plt.ylabel(string)\n", 196 | " plt.legend([string, 'val_'+string])\n", 197 | " plt.show()" 198 | ], 199 | "execution_count": 0, 200 | "outputs": [] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "metadata": { 205 | "id": "R_sX6ilIM515", 206 | "colab_type": "code", 207 | "colab": {} 208 | }, 209 | "source": [ 210 | "plot_graphs(history, 'accuracy')" 211 | ], 212 | "execution_count": 0, 213 | "outputs": [] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "metadata": { 218 | "id": "RFEXtKtqNARB", 219 | "colab_type": "code", 220 | "colab": {} 221 | }, 222 | "source": [ 223 | "plot_graphs(history, 'loss')" 224 | ], 225 | "execution_count": 0, 226 | "outputs": [] 227 | } 228 | ] 229 | } -------------------------------------------------------------------------------- /Week 3/Course_3_Week_3_Lesson_1c.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Course 4 - Week 3 - Lesson 1c.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "rFiCyWQ-NC5D", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "# Multiple Layer GRU" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "metadata": { 31 | "id": "Y20Lud2ZMBhW", 32 | "colab_type": "code", 33 | "colab": {} 34 | }, 35 | "source": [ 36 | "from __future__ import absolute_import, division, print_function, unicode_literals\n", 37 | "\n", 38 | "\n", 39 | "import tensorflow_datasets as tfds\n", 40 | "import tensorflow as tf\n", 41 | "print(tf.__version__)" 42 | ], 43 | "execution_count": 0, 44 | "outputs": [] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "metadata": { 49 | "id": "Aclov8NVMJml", 50 | "colab_type": "code", 51 | "colab": {} 52 | }, 53 | "source": [ 54 | "# If the tf.__version__ is 1.x, please run this cell\n", 55 | "!pip install tensorflow==2.0.0-beta0" 56 | ], 57 | "execution_count": 0, 58 | "outputs": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "id": "uAU8g7C0MPZE", 64 | "colab_type": "code", 65 | "colab": {} 66 | }, 67 | "source": [ 68 | "import tensorflow_datasets as tfds\n", 69 | "import tensorflow as tf\n", 70 | "print(tf.__version__)" 71 | ], 72 | "execution_count": 0, 73 | "outputs": [] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "metadata": { 78 | "id": "-svP7Gy_jOiI", 79 | "colab_type": "code", 80 | "colab": {} 81 | }, 82 | "source": [ 83 | "" 84 | ], 85 | "execution_count": 0, 86 | "outputs": [] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "metadata": { 91 | "id": "AW-4Vo4TMUHb", 92 | "colab_type": "code", 93 | "colab": {} 94 | }, 95 | "source": [ 96 | "# Get the data\n", 97 | "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n", 98 | "train_dataset, test_dataset = dataset['train'], dataset['test']\n" 99 | ], 100 | "execution_count": 0, 101 | "outputs": [] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "metadata": { 106 | "id": "L11bIR6-PKvs", 107 | "colab_type": "code", 108 | "colab": {} 109 | }, 110 | "source": [ 111 | "tokenizer = info.features['text'].encoder" 112 | ], 113 | "execution_count": 0, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "id": "ffvRUI0_McDS", 120 | "colab_type": "code", 121 | "colab": {} 122 | }, 123 | "source": [ 124 | "BUFFER_SIZE = 10000\n", 125 | "BATCH_SIZE = 64\n", 126 | "\n", 127 | "train_dataset = train_dataset.shuffle(BUFFER_SIZE)\n", 128 | "train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)\n", 129 | "test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)" 130 | ], 131 | "execution_count": 0, 132 | "outputs": [] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "metadata": { 137 | "colab_type": "code", 138 | "id": "jo1jjO3vn0jo", 139 | "colab": {} 140 | }, 141 | "source": [ 142 | "model = tf.keras.Sequential([\n", 143 | " tf.keras.layers.Embedding(tokenizer.vocab_size, 64),\n", 144 | " tf.keras.layers.Conv1D(128, 5, activation='relu'),\n", 145 | " tf.keras.layers.GlobalAveragePooling1D(),\n", 146 | " tf.keras.layers.Dense(64, activation='relu'),\n", 147 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 148 | "])" 149 | ], 150 | "execution_count": 0, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "metadata": { 156 | "id": "QKI5dfPgMioL", 157 | "colab_type": "code", 158 | "colab": {} 159 | }, 160 | "source": [ 161 | "model.summary()" 162 | ], 163 | "execution_count": 0, 164 | "outputs": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "metadata": { 169 | "id": "Uip7QOVzMoMq", 170 | "colab_type": "code", 171 | "colab": {} 172 | }, 173 | "source": [ 174 | "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])" 175 | ], 176 | "execution_count": 0, 177 | "outputs": [] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "metadata": { 182 | "id": "7mlgzaRDMtF6", 183 | "colab_type": "code", 184 | "colab": {} 185 | }, 186 | "source": [ 187 | "NUM_EPOCHS = 10\n", 188 | "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)" 189 | ], 190 | "execution_count": 0, 191 | "outputs": [] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "metadata": { 196 | "colab_type": "code", 197 | "id": "Mp1Z7P9pYRSK", 198 | "colab": {} 199 | }, 200 | "source": [ 201 | "import matplotlib.pyplot as plt\n", 202 | "\n", 203 | "\n", 204 | "def plot_graphs(history, string):\n", 205 | " plt.plot(history.history[string])\n", 206 | " plt.plot(history.history['val_'+string])\n", 207 | " plt.xlabel(\"Epochs\")\n", 208 | " plt.ylabel(string)\n", 209 | " plt.legend([string, 'val_'+string])\n", 210 | " plt.show()" 211 | ], 212 | "execution_count": 0, 213 | "outputs": [] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "metadata": { 218 | "id": "R_sX6ilIM515", 219 | "colab_type": "code", 220 | "colab": {} 221 | }, 222 | "source": [ 223 | "plot_graphs(history, 'accuracy')" 224 | ], 225 | "execution_count": 0, 226 | "outputs": [] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "metadata": { 231 | "id": "RFEXtKtqNARB", 232 | "colab_type": "code", 233 | "colab": {} 234 | }, 235 | "source": [ 236 | "plot_graphs(history, 'loss')" 237 | ], 238 | "execution_count": 0, 239 | "outputs": [] 240 | } 241 | ] 242 | } -------------------------------------------------------------------------------- /Week 3/Course_3_Week_3_Lesson_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Course 3 - Week 3 - Lesson 2.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "id": "jGwXGIXvFhXW", 22 | "colab_type": "code", 23 | "colab": {} 24 | }, 25 | "source": [ 26 | "import json\n", 27 | "import tensorflow as tf\n", 28 | "\n", 29 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 30 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 31 | "\n", 32 | "!wget --no-check-certificate \\\n", 33 | " https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \\\n", 34 | " -O /tmp/sarcasm.json\n", 35 | "\n", 36 | "vocab_size = 1000\n", 37 | "embedding_dim = 16\n", 38 | "max_length = 120\n", 39 | "trunc_type='post'\n", 40 | "padding_type='post'\n", 41 | "oov_tok = \"\"\n", 42 | "training_size = 20000\n", 43 | "\n", 44 | "\n", 45 | "with open(\"/tmp/sarcasm.json\", 'r') as f:\n", 46 | " datastore = json.load(f)\n", 47 | "\n", 48 | "\n", 49 | "sentences = []\n", 50 | "labels = []\n", 51 | "urls = []\n", 52 | "for item in datastore:\n", 53 | " sentences.append(item['headline'])\n", 54 | " labels.append(item['is_sarcastic'])\n", 55 | "\n", 56 | "training_sentences = sentences[0:training_size]\n", 57 | "testing_sentences = sentences[training_size:]\n", 58 | "training_labels = labels[0:training_size]\n", 59 | "testing_labels = labels[training_size:]\n", 60 | "\n", 61 | "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n", 62 | "tokenizer.fit_on_texts(training_sentences)\n", 63 | "\n", 64 | "word_index = tokenizer.word_index\n", 65 | "\n", 66 | "training_sequences = tokenizer.texts_to_sequences(training_sentences)\n", 67 | "training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 68 | "\n", 69 | "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n", 70 | "testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 71 | "\n", 72 | "model = tf.keras.Sequential([\n", 73 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 74 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n", 75 | " tf.keras.layers.Dense(24, activation='relu'),\n", 76 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 77 | "])\n", 78 | "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 79 | "model.summary()\n", 80 | "\n", 81 | "num_epochs = 50\n", 82 | "history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)\n", 83 | "\n" 84 | ], 85 | "execution_count": 0, 86 | "outputs": [] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "metadata": { 91 | "id": "g9DC6dmLF8DC", 92 | "colab_type": "code", 93 | "colab": {} 94 | }, 95 | "source": [ 96 | "import matplotlib.pyplot as plt\n", 97 | "\n", 98 | "\n", 99 | "def plot_graphs(history, string):\n", 100 | " plt.plot(history.history[string])\n", 101 | " plt.plot(history.history['val_'+string])\n", 102 | " plt.xlabel(\"Epochs\")\n", 103 | " plt.ylabel(string)\n", 104 | " plt.legend([string, 'val_'+string])\n", 105 | " plt.show()\n", 106 | "\n", 107 | "plot_graphs(history, 'acc')\n", 108 | "plot_graphs(history, 'loss')" 109 | ], 110 | "execution_count": 0, 111 | "outputs": [] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "metadata": { 116 | "id": "7ZEZIUppGhdi", 117 | "colab_type": "code", 118 | "colab": {} 119 | }, 120 | "source": [ 121 | "model.save(\"test.h5\")" 122 | ], 123 | "execution_count": 0, 124 | "outputs": [] 125 | } 126 | ] 127 | } -------------------------------------------------------------------------------- /Week 3/Course_3_Week_3_Lesson_2c.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Course 3 - Week 3 - Lesson 2c.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "id": "jGwXGIXvFhXW", 22 | "colab_type": "code", 23 | "colab": {} 24 | }, 25 | "source": [ 26 | "import json\n", 27 | "import tensorflow as tf\n", 28 | "\n", 29 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 30 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 31 | "\n", 32 | "!wget --no-check-certificate \\\n", 33 | " https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \\\n", 34 | " -O /tmp/sarcasm.json\n", 35 | "\n", 36 | "vocab_size = 1000\n", 37 | "embedding_dim = 16\n", 38 | "max_length = 120\n", 39 | "trunc_type='post'\n", 40 | "padding_type='post'\n", 41 | "oov_tok = \"\"\n", 42 | "training_size = 20000\n", 43 | "\n", 44 | "\n", 45 | "with open(\"/tmp/sarcasm.json\", 'r') as f:\n", 46 | " datastore = json.load(f)\n", 47 | "\n", 48 | "\n", 49 | "sentences = []\n", 50 | "labels = []\n", 51 | "urls = []\n", 52 | "for item in datastore:\n", 53 | " sentences.append(item['headline'])\n", 54 | " labels.append(item['is_sarcastic'])\n", 55 | "\n", 56 | "training_sentences = sentences[0:training_size]\n", 57 | "testing_sentences = sentences[training_size:]\n", 58 | "training_labels = labels[0:training_size]\n", 59 | "testing_labels = labels[training_size:]\n", 60 | "\n", 61 | "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n", 62 | "tokenizer.fit_on_texts(training_sentences)\n", 63 | "\n", 64 | "word_index = tokenizer.word_index\n", 65 | "\n", 66 | "training_sequences = tokenizer.texts_to_sequences(training_sentences)\n", 67 | "training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 68 | "\n", 69 | "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n", 70 | "testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 71 | "\n", 72 | "model = tf.keras.Sequential([\n", 73 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 74 | " tf.keras.layers.Conv1D(128, 5, activation='relu'),\n", 75 | " tf.keras.layers.GlobalMaxPooling1D(),\n", 76 | " tf.keras.layers.Dense(24, activation='relu'),\n", 77 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 78 | "])\n", 79 | "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 80 | "model.summary()\n", 81 | "\n", 82 | "num_epochs = 50\n", 83 | "history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)\n", 84 | "\n" 85 | ], 86 | "execution_count": 0, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "metadata": { 92 | "id": "g9DC6dmLF8DC", 93 | "colab_type": "code", 94 | "colab": {} 95 | }, 96 | "source": [ 97 | "import matplotlib.pyplot as plt\n", 98 | "\n", 99 | "\n", 100 | "def plot_graphs(history, string):\n", 101 | " plt.plot(history.history[string])\n", 102 | " plt.plot(history.history['val_'+string])\n", 103 | " plt.xlabel(\"Epochs\")\n", 104 | " plt.ylabel(string)\n", 105 | " plt.legend([string, 'val_'+string])\n", 106 | " plt.show()\n", 107 | "\n", 108 | "plot_graphs(history, 'acc')\n", 109 | "plot_graphs(history, 'loss')" 110 | ], 111 | "execution_count": 0, 112 | "outputs": [] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "metadata": { 117 | "id": "7ZEZIUppGhdi", 118 | "colab_type": "code", 119 | "colab": {} 120 | }, 121 | "source": [ 122 | "model.save(\"test.h5\")" 123 | ], 124 | "execution_count": 0, 125 | "outputs": [] 126 | } 127 | ] 128 | } -------------------------------------------------------------------------------- /Week 3/Course_3_Week_3_Lesson_2d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Course 3 - Week 3 - Lesson 2d.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "id": "P-AhVYeBWgQ3", 22 | "colab_type": "code", 23 | "colab": {} 24 | }, 25 | "source": [ 26 | "# NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT\n", 27 | "\n", 28 | "import tensorflow as tf\n", 29 | "print(tf.__version__)\n", 30 | "\n", 31 | "# This is needed for the iterator over the data\n", 32 | "# But not necessary if you have TF 2.0 installed\n", 33 | "#!pip install tensorflow==2.0.0-beta0\n", 34 | "\n", 35 | "\n", 36 | "tf.enable_eager_execution()\n", 37 | "\n", 38 | "# !pip install -q tensorflow-datasets" 39 | ], 40 | "execution_count": 0, 41 | "outputs": [] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "metadata": { 46 | "id": "_IoM4VFxWpMR", 47 | "colab_type": "code", 48 | "colab": {} 49 | }, 50 | "source": [ 51 | "import tensorflow_datasets as tfds\n", 52 | "imdb, info = tfds.load(\"imdb_reviews\", with_info=True, as_supervised=True)\n" 53 | ], 54 | "execution_count": 0, 55 | "outputs": [] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "metadata": { 60 | "id": "wHQ2Ko0zl7M4", 61 | "colab_type": "code", 62 | "colab": {} 63 | }, 64 | "source": [ 65 | "import numpy as np\n", 66 | "\n", 67 | "train_data, test_data = imdb['train'], imdb['test']\n", 68 | "\n", 69 | "training_sentences = []\n", 70 | "training_labels = []\n", 71 | "\n", 72 | "testing_sentences = []\n", 73 | "testing_labels = []\n", 74 | "\n", 75 | "# str(s.tonumpy()) is needed in Python3 instead of just s.numpy()\n", 76 | "for s,l in train_data:\n", 77 | " training_sentences.append(str(s.numpy()))\n", 78 | " training_labels.append(l.numpy())\n", 79 | " \n", 80 | "for s,l in test_data:\n", 81 | " testing_sentences.append(str(s.numpy()))\n", 82 | " testing_labels.append(l.numpy())\n", 83 | " \n", 84 | "training_labels_final = np.array(training_labels)\n", 85 | "testing_labels_final = np.array(testing_labels)\n" 86 | ], 87 | "execution_count": 0, 88 | "outputs": [] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "metadata": { 93 | "id": "7n15yyMdmoH1", 94 | "colab_type": "code", 95 | "colab": {} 96 | }, 97 | "source": [ 98 | "vocab_size = 10000\n", 99 | "embedding_dim = 16\n", 100 | "max_length = 120\n", 101 | "trunc_type='post'\n", 102 | "oov_tok = \"\"\n", 103 | "\n", 104 | "\n", 105 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 106 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 107 | "\n", 108 | "tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)\n", 109 | "tokenizer.fit_on_texts(training_sentences)\n", 110 | "word_index = tokenizer.word_index\n", 111 | "sequences = tokenizer.texts_to_sequences(training_sentences)\n", 112 | "padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)\n", 113 | "\n", 114 | "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n", 115 | "testing_padded = pad_sequences(testing_sequences,maxlen=max_length)\n", 116 | "\n" 117 | ], 118 | "execution_count": 0, 119 | "outputs": [] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "metadata": { 124 | "colab_type": "code", 125 | "id": "9axf0uIXVMhO", 126 | "colab": {} 127 | }, 128 | "source": [ 129 | "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n", 130 | "\n", 131 | "def decode_review(text):\n", 132 | " return ' '.join([reverse_word_index.get(i, '?') for i in text])\n", 133 | "\n", 134 | "print(decode_review(padded[1]))\n", 135 | "print(training_sentences[1])" 136 | ], 137 | "execution_count": 0, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "metadata": { 143 | "id": "5NEpdhb8AxID", 144 | "colab_type": "code", 145 | "colab": {} 146 | }, 147 | "source": [ 148 | "model = tf.keras.Sequential([\n", 149 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 150 | " tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),\n", 151 | " tf.keras.layers.Dense(6, activation='relu'),\n", 152 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 153 | "])\n", 154 | "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 155 | "model.summary()\n" 156 | ], 157 | "execution_count": 0, 158 | "outputs": [] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "metadata": { 163 | "id": "V5LLrXC-uNX6", 164 | "colab_type": "code", 165 | "colab": {} 166 | }, 167 | "source": [ 168 | "num_epochs = 50\n", 169 | "history = model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))" 170 | ], 171 | "execution_count": 0, 172 | "outputs": [] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "metadata": { 177 | "id": "nHGYuU4jPYaj", 178 | "colab_type": "code", 179 | "colab": {} 180 | }, 181 | "source": [ 182 | "import matplotlib.pyplot as plt\n", 183 | "\n", 184 | "\n", 185 | "def plot_graphs(history, string):\n", 186 | " plt.plot(history.history[string])\n", 187 | " plt.plot(history.history['val_'+string])\n", 188 | " plt.xlabel(\"Epochs\")\n", 189 | " plt.ylabel(string)\n", 190 | " plt.legend([string, 'val_'+string])\n", 191 | " plt.show()\n", 192 | "\n", 193 | "plot_graphs(history, 'accuracy')\n", 194 | "plot_graphs(history, 'loss')" 195 | ], 196 | "execution_count": 0, 197 | "outputs": [] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "metadata": { 202 | "id": "wSualgGPPK0S", 203 | "colab_type": "code", 204 | "colab": {} 205 | }, 206 | "source": [ 207 | "# Model Definition with LSTM\n", 208 | "model = tf.keras.Sequential([\n", 209 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 210 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n", 211 | " tf.keras.layers.Dense(6, activation='relu'),\n", 212 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 213 | "])\n", 214 | "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 215 | "model.summary()\n" 216 | ], 217 | "execution_count": 0, 218 | "outputs": [] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "metadata": { 223 | "id": "K_Jc7cY3Qxke", 224 | "colab_type": "code", 225 | "colab": {} 226 | }, 227 | "source": [ 228 | "# Model Definition with Conv1D\n", 229 | "model = tf.keras.Sequential([\n", 230 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 231 | " tf.keras.layers.Conv1D(128, 5, activation='relu'),\n", 232 | " tf.keras.layers.GlobalAveragePooling1D(),\n", 233 | " tf.keras.layers.Dense(6, activation='relu'),\n", 234 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 235 | "])\n", 236 | "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 237 | "model.summary()\n" 238 | ], 239 | "execution_count": 0, 240 | "outputs": [] 241 | } 242 | ] 243 | } -------------------------------------------------------------------------------- /Week 3/NLP_Course_Week_3_Exercise_Question.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "NLP Course - Week 3 Exercise Question.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "accelerator": "GPU" 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "metadata": { 20 | "id": "hmA6EzkQJ5jt", 21 | "colab_type": "code", 22 | "colab": {} 23 | }, 24 | "source": [ 25 | "import json\n", 26 | "import tensorflow as tf\n", 27 | "import csv\n", 28 | "import random\n", 29 | "import numpy as np\n", 30 | "\n", 31 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 32 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 33 | "from tensorflow.keras.utils import to_categorical\n", 34 | "from tensorflow.keras import regularizers\n", 35 | "\n", 36 | "\n", 37 | "embedding_dim = 100\n", 38 | "max_length = 16\n", 39 | "trunc_type='post'\n", 40 | "padding_type='post'\n", 41 | "oov_tok = \"\"\n", 42 | "training_size=160000 #Your dataset size here. Experiment using smaller values (i.e. 16000), but don't forget to train on at least 160000 to see the best effects\n", 43 | "test_portion=.1\n", 44 | "\n", 45 | "corpus = []\n" 46 | ], 47 | "execution_count": 0, 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "bM0l_dORKqE0", 54 | "colab_type": "code", 55 | "outputId": "ffa1b4a9-6462-4a5e-e166-9d863b732aaf", 56 | "colab": { 57 | "base_uri": "https://localhost:8080/", 58 | "height": 204 59 | } 60 | }, 61 | "source": [ 62 | "\n", 63 | "# Note that I cleaned the Stanford dataset to remove LATIN1 encoding to make it easier for Python CSV reader\n", 64 | "# You can do that yourself with:\n", 65 | "# iconv -f LATIN1 -t UTF8 training.1600000.processed.noemoticon.csv -o training_cleaned.csv\n", 66 | "# I then hosted it on my site to make it easier to use in this notebook\n", 67 | "\n", 68 | "!wget --no-check-certificate \\\n", 69 | " https://storage.googleapis.com/laurencemoroney-blog.appspot.com/training_cleaned.csv \\\n", 70 | " -O /tmp/training_cleaned.csv\n", 71 | "\n", 72 | "num_sentences = 0\n", 73 | "\n", 74 | "with open(\"/tmp/training_cleaned.csv\") as csvfile:\n", 75 | " reader = csv.reader(csvfile, delimiter=',')\n", 76 | " for row in reader:\n", 77 | " # Your Code here. Create list items where the first item is the text, found in row[5], and the second is the label. Note that the label is a '0' or a '4' in the text. When it's the former, make\n", 78 | " # your label to be 0, otherwise 1. Keep a count of the number of sentences in num_sentences\n", 79 | " list_item=[]\n", 80 | " list_item.append(row[5])\n", 81 | " if row[0] == '0':\n", 82 | " list_item.append(0)\n", 83 | " else:\n", 84 | " list_item.append(1)\n", 85 | " # YOUR CODE HERE\n", 86 | " num_sentences = num_sentences + 1\n", 87 | " corpus.append(list_item)\n", 88 | "\n", 89 | "\n" 90 | ], 91 | "execution_count": 2, 92 | "outputs": [ 93 | { 94 | "output_type": "stream", 95 | "text": [ 96 | "--2019-06-23 12:23:27-- https://storage.googleapis.com/laurencemoroney-blog.appspot.com/training_cleaned.csv\n", 97 | "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.141.128, 2607:f8b0:400c:c06::80\n", 98 | "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.141.128|:443... connected.\n", 99 | "HTTP request sent, awaiting response... 200 OK\n", 100 | "Length: 238942690 (228M) [application/octet-stream]\n", 101 | "Saving to: ‘/tmp/training_cleaned.csv’\n", 102 | "\n", 103 | "/tmp/training_clean 100%[===================>] 227.87M 181MB/s in 1.3s \n", 104 | "\n", 105 | "2019-06-23 12:23:29 (181 MB/s) - ‘/tmp/training_cleaned.csv’ saved [238942690/238942690]\n", 106 | "\n" 107 | ], 108 | "name": "stdout" 109 | } 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "metadata": { 115 | "id": "3kxblBUjEUX-", 116 | "colab_type": "code", 117 | "outputId": "b25aa6f3-d787-4d19-bbe3-f3715fb7ced2", 118 | "colab": { 119 | "base_uri": "https://localhost:8080/", 120 | "height": 68 121 | } 122 | }, 123 | "source": [ 124 | "print(num_sentences)\n", 125 | "print(len(corpus))\n", 126 | "print(corpus[1])\n", 127 | "\n", 128 | "# Expected Output:\n", 129 | "# 1600000\n", 130 | "# 1600000\n", 131 | "# [\"is upset that he can't update his Facebook by texting it... and might cry as a result School today also. Blah!\", 0]" 132 | ], 133 | "execution_count": 3, 134 | "outputs": [ 135 | { 136 | "output_type": "stream", 137 | "text": [ 138 | "1600000\n", 139 | "1600000\n", 140 | "[\"is upset that he can't update his Facebook by texting it... and might cry as a result School today also. Blah!\", 0]\n" 141 | ], 142 | "name": "stdout" 143 | } 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "metadata": { 149 | "id": "ohOGz24lsNAD", 150 | "colab_type": "code", 151 | "colab": {} 152 | }, 153 | "source": [ 154 | "sentences=[]\n", 155 | "labels=[]\n", 156 | "random.shuffle(corpus)\n", 157 | "for x in range(training_size):\n", 158 | " sentences.append(corpus[x][0]) #(# YOUR CODE HERE)\n", 159 | " labels.append(corpus[x][1]) #(# YOUR CODE HERE)\n", 160 | "\n", 161 | "\n", 162 | "tokenizer = Tokenizer()\n", 163 | "tokenizer.fit_on_texts(sentences) #(# YOUR CODE HERE)\n", 164 | "\n", 165 | "word_index = tokenizer.word_index\n", 166 | "vocab_size=len(word_index) #(# YOUR CODE HERE)\n", 167 | "\n", 168 | "sequences = tokenizer.texts_to_sequences(sentences) #(# YOUR CODE HERE)\n", 169 | "padded = pad_sequences(sequences, maxlen=max_length, padding = padding_type, truncating=trunc_type) #(# YOUR CODE HERE)\n", 170 | "\n", 171 | "split = int(test_portion * training_size)\n", 172 | "\n", 173 | "test_sequences = padded[0:split] #[# YOUR CODE HERE]\n", 174 | "training_sequences = padded[split:training_size]#[# YOUR CODE HERE]\n", 175 | "test_labels = labels[0:split] #[# YOUR CODE HERE]\n", 176 | "training_labels = labels[split:training_size] #[# YOUR CODE HERE]" 177 | ], 178 | "execution_count": 0, 179 | "outputs": [] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "metadata": { 184 | "id": "gIrtRem1En3N", 185 | "colab_type": "code", 186 | "outputId": "95155c4c-948a-48c0-9cee-e6531c836d98", 187 | "colab": { 188 | "base_uri": "https://localhost:8080/", 189 | "height": 51 190 | } 191 | }, 192 | "source": [ 193 | "print(vocab_size)\n", 194 | "print(word_index['i'])\n", 195 | "# Expected Output\n", 196 | "# 138858\n", 197 | "# 1" 198 | ], 199 | "execution_count": 8, 200 | "outputs": [ 201 | { 202 | "output_type": "stream", 203 | "text": [ 204 | "138876\n", 205 | "1\n" 206 | ], 207 | "name": "stdout" 208 | } 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "metadata": { 214 | "id": "C1zdgJkusRh0", 215 | "colab_type": "code", 216 | "outputId": "b6edd322-8191-45e7-cb12-08921685a72f", 217 | "colab": { 218 | "base_uri": "https://localhost:8080/", 219 | "height": 204 220 | } 221 | }, 222 | "source": [ 223 | "# Note this is the 100 dimension version of GloVe from Stanford\n", 224 | "# I unzipped and hosted it on my site to make this notebook easier\n", 225 | "!wget --no-check-certificate \\\n", 226 | " https://storage.googleapis.com/laurencemoroney-blog.appspot.com/glove.6B.100d.txt \\\n", 227 | " -O /tmp/glove.6B.100d.txt\n", 228 | "embeddings_index = {};\n", 229 | "with open('/tmp/glove.6B.100d.txt') as f:\n", 230 | " for line in f:\n", 231 | " values = line.split();\n", 232 | " word = values[0];\n", 233 | " coefs = np.asarray(values[1:], dtype='float32');\n", 234 | " embeddings_index[word] = coefs;\n", 235 | "\n", 236 | "embeddings_matrix = np.zeros((vocab_size+1, embedding_dim));\n", 237 | "for word, i in word_index.items():\n", 238 | " embedding_vector = embeddings_index.get(word);\n", 239 | " if embedding_vector is not None:\n", 240 | " embeddings_matrix[i] = embedding_vector;" 241 | ], 242 | "execution_count": 0, 243 | "outputs": [ 244 | { 245 | "output_type": "stream", 246 | "text": [ 247 | "--2019-06-07 17:55:30-- https://storage.googleapis.com/laurencemoroney-blog.appspot.com/glove.6B.100d.txt\n", 248 | "Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.183.128, 2607:f8b0:4001:c12::80\n", 249 | "Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.183.128|:443... connected.\n", 250 | "HTTP request sent, awaiting response... 200 OK\n", 251 | "Length: 347116733 (331M) [text/plain]\n", 252 | "Saving to: ‘/tmp/glove.6B.100d.txt’\n", 253 | "\n", 254 | "/tmp/glove.6B.100d. 100%[===================>] 331.04M 160MB/s in 2.1s \n", 255 | "\n", 256 | "2019-06-07 17:55:33 (160 MB/s) - ‘/tmp/glove.6B.100d.txt’ saved [347116733/347116733]\n", 257 | "\n" 258 | ], 259 | "name": "stdout" 260 | } 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "metadata": { 266 | "id": "71NLk_lpFLNt", 267 | "colab_type": "code", 268 | "outputId": "97cb88db-754f-4375-fdc3-876cd6b4fdce", 269 | "colab": { 270 | "base_uri": "https://localhost:8080/", 271 | "height": 34 272 | } 273 | }, 274 | "source": [ 275 | "print(len(embeddings_matrix))\n", 276 | "# Expected Output\n", 277 | "# 138859" 278 | ], 279 | "execution_count": 0, 280 | "outputs": [ 281 | { 282 | "output_type": "stream", 283 | "text": [ 284 | "138859\n" 285 | ], 286 | "name": "stdout" 287 | } 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "metadata": { 293 | "colab_type": "code", 294 | "id": "iKKvbuEBOGFz", 295 | "colab": {} 296 | }, 297 | "source": [ 298 | "model = tf.keras.Sequential([\n", 299 | " tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=max_length, weights=[embeddings_matrix], trainable=False),\n", 300 | " # YOUR CODE HERE - experiment with combining different types, such as convolutions and LSTMs\n", 301 | "])\n", 302 | "model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy') #(# YOUR CODE HERE)\n", 303 | "model.summary()\n", 304 | "\n", 305 | "num_epochs = 50\n", 306 | "history = model.fit(training_sequences, training_labels, epochs=num_epochs, validation_data=(test_sequences, test_labels), verbose=2)\n", 307 | "\n", 308 | "print(\"Training Complete\")\n" 309 | ], 310 | "execution_count": 0, 311 | "outputs": [] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "metadata": { 316 | "id": "qxju4ItJKO8F", 317 | "colab_type": "code", 318 | "colab": {} 319 | }, 320 | "source": [ 321 | "import matplotlib.image as mpimg\n", 322 | "import matplotlib.pyplot as plt\n", 323 | "\n", 324 | "#-----------------------------------------------------------\n", 325 | "# Retrieve a list of list results on training and test data\n", 326 | "# sets for each training epoch\n", 327 | "#-----------------------------------------------------------\n", 328 | "acc=history.history['acc']\n", 329 | "val_acc=history.history['val_acc']\n", 330 | "loss=history.history['loss']\n", 331 | "val_loss=history.history['val_loss']\n", 332 | "\n", 333 | "epochs=range(len(acc)) # Get number of epochs\n", 334 | "\n", 335 | "#------------------------------------------------\n", 336 | "# Plot training and validation accuracy per epoch\n", 337 | "#------------------------------------------------\n", 338 | "plt.plot(epochs, acc, 'r')\n", 339 | "plt.plot(epochs, val_acc, 'b')\n", 340 | "plt.title('Training and validation accuracy')\n", 341 | "plt.xlabel(\"Epochs\")\n", 342 | "plt.ylabel(\"Accuracy\")\n", 343 | "plt.legend([\"Accuracy\", \"Validation Accuracy\"])\n", 344 | "\n", 345 | "plt.figure()\n", 346 | "\n", 347 | "#------------------------------------------------\n", 348 | "# Plot training and validation loss per epoch\n", 349 | "#------------------------------------------------\n", 350 | "plt.plot(epochs, loss, 'r')\n", 351 | "plt.plot(epochs, val_loss, 'b')\n", 352 | "plt.title('Training and validation loss')\n", 353 | "plt.xlabel(\"Epochs\")\n", 354 | "plt.ylabel(\"Loss\")\n", 355 | "plt.legend([\"Loss\", \"Validation Loss\"])\n", 356 | "\n", 357 | "plt.figure()\n", 358 | "\n", 359 | "\n", 360 | "# Expected Output\n", 361 | "# A chart where the validation loss does not increase sharply!" 362 | ], 363 | "execution_count": 0, 364 | "outputs": [] 365 | } 366 | ] 367 | } -------------------------------------------------------------------------------- /Week 3/Quiz 3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/07Agarg/Natural-Language-Processing-In-Tensorflow-Course/46eb21e25f73fd8644a95e64696d64dd4843e1e8/Week 3/Quiz 3.pdf -------------------------------------------------------------------------------- /Week 3/course_3_week_3_lesson_1a.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Course 4 - Week 3 - Lesson 1a.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%201a.ipynb 8 | 9 | # Single Layer LSTM 10 | """ 11 | 12 | from __future__ import absolute_import, division, print_function, unicode_literals 13 | 14 | 15 | import tensorflow_datasets as tfds 16 | import tensorflow as tf 17 | print(tf.__version__) 18 | 19 | # If the tf.__version__ is 1.x, please run this cell 20 | #!pip install tensorflow==2.0.0-beta0 21 | 22 | import tensorflow_datasets as tfds 23 | import tensorflow as tf 24 | print(tf.__version__) 25 | 26 | # Get the data 27 | dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True) 28 | train_dataset, test_dataset = dataset['train'], dataset['test'] 29 | 30 | tokenizer = info.features['text'].encoder 31 | 32 | BUFFER_SIZE = 10000 33 | BATCH_SIZE = 64 34 | 35 | train_dataset = train_dataset.shuffle(BUFFER_SIZE) 36 | train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes) 37 | test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes) 38 | 39 | model = tf.keras.Sequential([ 40 | tf.keras.layers.Embedding(tokenizer.vocab_size, 64), 41 | tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)), 42 | tf.keras.layers.Dense(64, activation='relu'), 43 | tf.keras.layers.Dense(1, activation='sigmoid') 44 | ]) 45 | 46 | model.summary() 47 | 48 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 49 | 50 | NUM_EPOCHS = 10 51 | history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset) 52 | 53 | import matplotlib.pyplot as plt 54 | 55 | 56 | def plot_graphs(history, string): 57 | plt.plot(history.history[string]) 58 | plt.plot(history.history['val_'+string]) 59 | plt.xlabel("Epochs") 60 | plt.ylabel(string) 61 | plt.legend([string, 'val_'+string]) 62 | plt.show() 63 | 64 | plot_graphs(history, 'accuracy') 65 | 66 | plot_graphs(history, 'loss') -------------------------------------------------------------------------------- /Week 3/course_3_week_3_lesson_1b.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Course 4 - Week 3 - Lesson 1b.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%201b.ipynb 8 | 9 | # Multiple Layer LSTM 10 | """ 11 | 12 | from __future__ import absolute_import, division, print_function, unicode_literals 13 | 14 | 15 | import tensorflow_datasets as tfds 16 | import tensorflow as tf 17 | print(tf.__version__) 18 | 19 | # If the tf.__version__ is 1.x, please run this cell 20 | !pip install tensorflow==2.0.0-beta0 21 | 22 | import tensorflow_datasets as tfds 23 | import tensorflow as tf 24 | print(tf.__version__) 25 | 26 | # Get the data 27 | dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True) 28 | train_dataset, test_dataset = dataset['train'], dataset['test'] 29 | 30 | tokenizer = info.features['text'].encoder 31 | 32 | BUFFER_SIZE = 10000 33 | BATCH_SIZE = 64 34 | 35 | train_dataset = train_dataset.shuffle(BUFFER_SIZE) 36 | train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes) 37 | test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes) 38 | 39 | model = tf.keras.Sequential([ 40 | tf.keras.layers.Embedding(tokenizer.vocab_size, 64), 41 | tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)), 42 | tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)), 43 | tf.keras.layers.Dense(64, activation='relu'), 44 | tf.keras.layers.Dense(1, activation='sigmoid') 45 | ]) 46 | 47 | model.summary() 48 | 49 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 50 | 51 | NUM_EPOCHS = 10 52 | history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset) 53 | 54 | import matplotlib.pyplot as plt 55 | 56 | 57 | def plot_graphs(history, string): 58 | plt.plot(history.history[string]) 59 | plt.plot(history.history['val_'+string]) 60 | plt.xlabel("Epochs") 61 | plt.ylabel(string) 62 | plt.legend([string, 'val_'+string]) 63 | plt.show() 64 | 65 | plot_graphs(history, 'accuracy') 66 | 67 | plot_graphs(history, 'loss') -------------------------------------------------------------------------------- /Week 3/course_3_week_3_lesson_1c.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Course 4 - Week 3 - Lesson 1c.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%201c.ipynb 8 | 9 | # Multiple Layer GRU 10 | """ 11 | 12 | from __future__ import absolute_import, division, print_function, unicode_literals 13 | 14 | 15 | import tensorflow_datasets as tfds 16 | import tensorflow as tf 17 | print(tf.__version__) 18 | 19 | # If the tf.__version__ is 1.x, please run this cell 20 | !pip install tensorflow==2.0.0-beta0 21 | 22 | import tensorflow_datasets as tfds 23 | import tensorflow as tf 24 | print(tf.__version__) 25 | 26 | 27 | 28 | # Get the data 29 | dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True) 30 | train_dataset, test_dataset = dataset['train'], dataset['test'] 31 | 32 | tokenizer = info.features['text'].encoder 33 | 34 | BUFFER_SIZE = 10000 35 | BATCH_SIZE = 64 36 | 37 | train_dataset = train_dataset.shuffle(BUFFER_SIZE) 38 | train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes) 39 | test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes) 40 | 41 | model = tf.keras.Sequential([ 42 | tf.keras.layers.Embedding(tokenizer.vocab_size, 64), 43 | tf.keras.layers.Conv1D(128, 5, activation='relu'), 44 | tf.keras.layers.GlobalAveragePooling1D(), 45 | tf.keras.layers.Dense(64, activation='relu'), 46 | tf.keras.layers.Dense(1, activation='sigmoid') 47 | ]) 48 | 49 | model.summary() 50 | 51 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 52 | 53 | NUM_EPOCHS = 10 54 | history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset) 55 | 56 | import matplotlib.pyplot as plt 57 | 58 | 59 | def plot_graphs(history, string): 60 | plt.plot(history.history[string]) 61 | plt.plot(history.history['val_'+string]) 62 | plt.xlabel("Epochs") 63 | plt.ylabel(string) 64 | plt.legend([string, 'val_'+string]) 65 | plt.show() 66 | 67 | plot_graphs(history, 'accuracy') 68 | 69 | plot_graphs(history, 'loss') -------------------------------------------------------------------------------- /Week 3/course_3_week_3_lesson_2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Course 3 - Week 3 - Lesson 2.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%202.ipynb 8 | """ 9 | 10 | import json 11 | import tensorflow as tf 12 | 13 | from tensorflow.keras.preprocessing.text import Tokenizer 14 | from tensorflow.keras.preprocessing.sequence import pad_sequences 15 | 16 | !wget --no-check-certificate \ 17 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \ 18 | -O /tmp/sarcasm.json 19 | 20 | vocab_size = 1000 21 | embedding_dim = 16 22 | max_length = 120 23 | trunc_type='post' 24 | padding_type='post' 25 | oov_tok = "" 26 | training_size = 20000 27 | 28 | 29 | with open("/tmp/sarcasm.json", 'r') as f: 30 | datastore = json.load(f) 31 | 32 | 33 | sentences = [] 34 | labels = [] 35 | urls = [] 36 | for item in datastore: 37 | sentences.append(item['headline']) 38 | labels.append(item['is_sarcastic']) 39 | 40 | training_sentences = sentences[0:training_size] 41 | testing_sentences = sentences[training_size:] 42 | training_labels = labels[0:training_size] 43 | testing_labels = labels[training_size:] 44 | 45 | tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok) 46 | tokenizer.fit_on_texts(training_sentences) 47 | 48 | word_index = tokenizer.word_index 49 | 50 | training_sequences = tokenizer.texts_to_sequences(training_sentences) 51 | training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type) 52 | 53 | testing_sequences = tokenizer.texts_to_sequences(testing_sentences) 54 | testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type) 55 | 56 | model = tf.keras.Sequential([ 57 | tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), 58 | tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)), 59 | tf.keras.layers.Dense(24, activation='relu'), 60 | tf.keras.layers.Dense(1, activation='sigmoid') 61 | ]) 62 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 63 | model.summary() 64 | 65 | num_epochs = 50 66 | history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1) 67 | 68 | import matplotlib.pyplot as plt 69 | 70 | 71 | def plot_graphs(history, string): 72 | plt.plot(history.history[string]) 73 | plt.plot(history.history['val_'+string]) 74 | plt.xlabel("Epochs") 75 | plt.ylabel(string) 76 | plt.legend([string, 'val_'+string]) 77 | plt.show() 78 | 79 | plot_graphs(history, 'acc') 80 | plot_graphs(history, 'loss') 81 | 82 | model.save("test.h5") -------------------------------------------------------------------------------- /Week 3/course_3_week_3_lesson_2c.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Course 3 - Week 3 - Lesson 2c.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%202c.ipynb 8 | """ 9 | 10 | import json 11 | import tensorflow as tf 12 | 13 | from tensorflow.keras.preprocessing.text import Tokenizer 14 | from tensorflow.keras.preprocessing.sequence import pad_sequences 15 | 16 | !wget --no-check-certificate \ 17 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \ 18 | -O /tmp/sarcasm.json 19 | 20 | vocab_size = 1000 21 | embedding_dim = 16 22 | max_length = 120 23 | trunc_type='post' 24 | padding_type='post' 25 | oov_tok = "" 26 | training_size = 20000 27 | 28 | 29 | with open("/tmp/sarcasm.json", 'r') as f: 30 | datastore = json.load(f) 31 | 32 | 33 | sentences = [] 34 | labels = [] 35 | urls = [] 36 | for item in datastore: 37 | sentences.append(item['headline']) 38 | labels.append(item['is_sarcastic']) 39 | 40 | training_sentences = sentences[0:training_size] 41 | testing_sentences = sentences[training_size:] 42 | training_labels = labels[0:training_size] 43 | testing_labels = labels[training_size:] 44 | 45 | tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok) 46 | tokenizer.fit_on_texts(training_sentences) 47 | 48 | word_index = tokenizer.word_index 49 | 50 | training_sequences = tokenizer.texts_to_sequences(training_sentences) 51 | training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type) 52 | 53 | testing_sequences = tokenizer.texts_to_sequences(testing_sentences) 54 | testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type) 55 | 56 | model = tf.keras.Sequential([ 57 | tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), 58 | tf.keras.layers.Conv1D(128, 5, activation='relu'), 59 | tf.keras.layers.GlobalMaxPooling1D(), 60 | tf.keras.layers.Dense(24, activation='relu'), 61 | tf.keras.layers.Dense(1, activation='sigmoid') 62 | ]) 63 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 64 | model.summary() 65 | 66 | num_epochs = 50 67 | history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1) 68 | 69 | import matplotlib.pyplot as plt 70 | 71 | 72 | def plot_graphs(history, string): 73 | plt.plot(history.history[string]) 74 | plt.plot(history.history['val_'+string]) 75 | plt.xlabel("Epochs") 76 | plt.ylabel(string) 77 | plt.legend([string, 'val_'+string]) 78 | plt.show() 79 | 80 | plot_graphs(history, 'acc') 81 | plot_graphs(history, 'loss') 82 | 83 | model.save("test.h5") -------------------------------------------------------------------------------- /Week 3/course_3_week_3_lesson_2d.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Course 3 - Week 3 - Lesson 2d.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/Course%203%20-%20Week%203%20-%20Lesson%202d.ipynb 8 | """ 9 | 10 | # NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT 11 | 12 | import tensorflow as tf 13 | print(tf.__version__) 14 | 15 | # This is needed for the iterator over the data 16 | # But not necessary if you have TF 2.0 installed 17 | #!pip install tensorflow==2.0.0-beta0 18 | 19 | 20 | tf.enable_eager_execution() 21 | 22 | # !pip install -q tensorflow-datasets 23 | 24 | import tensorflow_datasets as tfds 25 | imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True) 26 | 27 | import numpy as np 28 | 29 | train_data, test_data = imdb['train'], imdb['test'] 30 | 31 | training_sentences = [] 32 | training_labels = [] 33 | 34 | testing_sentences = [] 35 | testing_labels = [] 36 | 37 | # str(s.tonumpy()) is needed in Python3 instead of just s.numpy() 38 | for s,l in train_data: 39 | training_sentences.append(str(s.numpy())) 40 | training_labels.append(l.numpy()) 41 | 42 | for s,l in test_data: 43 | testing_sentences.append(str(s.numpy())) 44 | testing_labels.append(l.numpy()) 45 | 46 | training_labels_final = np.array(training_labels) 47 | testing_labels_final = np.array(testing_labels) 48 | 49 | vocab_size = 10000 50 | embedding_dim = 16 51 | max_length = 120 52 | trunc_type='post' 53 | oov_tok = "" 54 | 55 | 56 | from tensorflow.keras.preprocessing.text import Tokenizer 57 | from tensorflow.keras.preprocessing.sequence import pad_sequences 58 | 59 | tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok) 60 | tokenizer.fit_on_texts(training_sentences) 61 | word_index = tokenizer.word_index 62 | sequences = tokenizer.texts_to_sequences(training_sentences) 63 | padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type) 64 | 65 | testing_sequences = tokenizer.texts_to_sequences(testing_sentences) 66 | testing_padded = pad_sequences(testing_sequences,maxlen=max_length) 67 | 68 | reverse_word_index = dict([(value, key) for (key, value) in word_index.items()]) 69 | 70 | def decode_review(text): 71 | return ' '.join([reverse_word_index.get(i, '?') for i in text]) 72 | 73 | print(decode_review(padded[1])) 74 | print(training_sentences[1]) 75 | 76 | model = tf.keras.Sequential([ 77 | tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), 78 | tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)), 79 | tf.keras.layers.Dense(6, activation='relu'), 80 | tf.keras.layers.Dense(1, activation='sigmoid') 81 | ]) 82 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 83 | model.summary() 84 | 85 | num_epochs = 50 86 | history = model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final)) 87 | 88 | import matplotlib.pyplot as plt 89 | 90 | 91 | def plot_graphs(history, string): 92 | plt.plot(history.history[string]) 93 | plt.plot(history.history['val_'+string]) 94 | plt.xlabel("Epochs") 95 | plt.ylabel(string) 96 | plt.legend([string, 'val_'+string]) 97 | plt.show() 98 | 99 | plot_graphs(history, 'accuracy') 100 | plot_graphs(history, 'loss') 101 | 102 | # Model Definition with LSTM 103 | model = tf.keras.Sequential([ 104 | tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), 105 | tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)), 106 | tf.keras.layers.Dense(6, activation='relu'), 107 | tf.keras.layers.Dense(1, activation='sigmoid') 108 | ]) 109 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 110 | model.summary() 111 | 112 | # Model Definition with Conv1D 113 | model = tf.keras.Sequential([ 114 | tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), 115 | tf.keras.layers.Conv1D(128, 5, activation='relu'), 116 | tf.keras.layers.GlobalAveragePooling1D(), 117 | tf.keras.layers.Dense(6, activation='relu'), 118 | tf.keras.layers.Dense(1, activation='sigmoid') 119 | ]) 120 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 121 | model.summary() -------------------------------------------------------------------------------- /Week 3/nlp_course_week_3_exercise_question.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """NLP Course - Week 3 Exercise Question.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/NLP%20Course%20-%20Week%203%20Exercise%20Question.ipynb 8 | """ 9 | 10 | import json 11 | import tensorflow as tf 12 | import csv 13 | import random 14 | import numpy as np 15 | 16 | from tensorflow.keras.preprocessing.text import Tokenizer 17 | from tensorflow.keras.preprocessing.sequence import pad_sequences 18 | from tensorflow.keras.utils import to_categorical 19 | from tensorflow.keras import regularizers 20 | 21 | 22 | embedding_dim = 100 23 | max_length = 16 24 | trunc_type='post' 25 | padding_type='post' 26 | oov_tok = "" 27 | training_size=160000 #Your dataset size here. Experiment using smaller values (i.e. 16000), but don't forget to train on at least 160000 to see the best effects 28 | test_portion=.1 29 | 30 | corpus = [] 31 | 32 | # Note that I cleaned the Stanford dataset to remove LATIN1 encoding to make it easier for Python CSV reader 33 | # You can do that yourself with: 34 | # iconv -f LATIN1 -t UTF8 training.1600000.processed.noemoticon.csv -o training_cleaned.csv 35 | # I then hosted it on my site to make it easier to use in this notebook 36 | 37 | !wget --no-check-certificate \ 38 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/training_cleaned.csv \ 39 | -O /tmp/training_cleaned.csv 40 | 41 | num_sentences = 0 42 | 43 | with open("/tmp/training_cleaned.csv") as csvfile: 44 | reader = csv.reader(csvfile, delimiter=',') 45 | for row in reader: 46 | # Your Code here. Create list items where the first item is the text, found in row[5], and the second is the label. Note that the label is a '0' or a '4' in the text. When it's the former, make 47 | # your label to be 0, otherwise 1. Keep a count of the number of sentences in num_sentences 48 | list_item=[] 49 | list_item.append(row[5]) 50 | if row[0] == '0': 51 | list_item.append(0) 52 | else: 53 | list_item.append(1) 54 | # YOUR CODE HERE 55 | num_sentences = num_sentences + 1 56 | corpus.append(list_item) 57 | 58 | print(num_sentences) 59 | print(len(corpus)) 60 | print(corpus[1]) 61 | 62 | # Expected Output: 63 | # 1600000 64 | # 1600000 65 | # ["is upset that he can't update his Facebook by texting it... and might cry as a result School today also. Blah!", 0] 66 | 67 | sentences=[] 68 | labels=[] 69 | random.shuffle(corpus) 70 | for x in range(training_size): 71 | sentences.append(corpus[x][0]) #(# YOUR CODE HERE) 72 | labels.append(corpus[x][1]) #(# YOUR CODE HERE) 73 | 74 | 75 | tokenizer = Tokenizer() 76 | tokenizer.fit_on_texts(sentences) #(# YOUR CODE HERE) 77 | 78 | word_index = tokenizer.word_index 79 | vocab_size=len(word_index) #(# YOUR CODE HERE) 80 | 81 | sequences = tokenizer.texts_to_sequences(sentences) #(# YOUR CODE HERE) 82 | padded = pad_sequences(sequences, maxlen=max_length, padding = padding_type, truncating=trunc_type) #(# YOUR CODE HERE) 83 | 84 | split = int(test_portion * training_size) 85 | 86 | test_sequences = padded[0:split] #[# YOUR CODE HERE] 87 | training_sequences = padded[split:training_size]#[# YOUR CODE HERE] 88 | test_labels = labels[0:split] #[# YOUR CODE HERE] 89 | training_labels = labels[split:training_size] #[# YOUR CODE HERE] 90 | 91 | print(vocab_size) 92 | print(word_index['i']) 93 | # Expected Output 94 | # 138858 95 | # 1 96 | 97 | # Note this is the 100 dimension version of GloVe from Stanford 98 | # I unzipped and hosted it on my site to make this notebook easier 99 | !wget --no-check-certificate \ 100 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/glove.6B.100d.txt \ 101 | -O /tmp/glove.6B.100d.txt 102 | embeddings_index = {}; 103 | with open('/tmp/glove.6B.100d.txt') as f: 104 | for line in f: 105 | values = line.split(); 106 | word = values[0]; 107 | coefs = np.asarray(values[1:], dtype='float32'); 108 | embeddings_index[word] = coefs; 109 | 110 | embeddings_matrix = np.zeros((vocab_size+1, embedding_dim)); 111 | for word, i in word_index.items(): 112 | embedding_vector = embeddings_index.get(word); 113 | if embedding_vector is not None: 114 | embeddings_matrix[i] = embedding_vector; 115 | 116 | print(len(embeddings_matrix)) 117 | # Expected Output 118 | # 138859 119 | 120 | model = tf.keras.Sequential([ 121 | tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=max_length, weights=[embeddings_matrix], trainable=False), 122 | # YOUR CODE HERE - experiment with combining different types, such as convolutions and LSTMs 123 | ]) 124 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy') #(# YOUR CODE HERE) 125 | model.summary() 126 | 127 | num_epochs = 50 128 | history = model.fit(training_sequences, training_labels, epochs=num_epochs, validation_data=(test_sequences, test_labels), verbose=2) 129 | 130 | print("Training Complete") 131 | 132 | import matplotlib.image as mpimg 133 | import matplotlib.pyplot as plt 134 | 135 | #----------------------------------------------------------- 136 | # Retrieve a list of list results on training and test data 137 | # sets for each training epoch 138 | #----------------------------------------------------------- 139 | acc=history.history['acc'] 140 | val_acc=history.history['val_acc'] 141 | loss=history.history['loss'] 142 | val_loss=history.history['val_loss'] 143 | 144 | epochs=range(len(acc)) # Get number of epochs 145 | 146 | #------------------------------------------------ 147 | # Plot training and validation accuracy per epoch 148 | #------------------------------------------------ 149 | plt.plot(epochs, acc, 'r') 150 | plt.plot(epochs, val_acc, 'b') 151 | plt.title('Training and validation accuracy') 152 | plt.xlabel("Epochs") 153 | plt.ylabel("Accuracy") 154 | plt.legend(["Accuracy", "Validation Accuracy"]) 155 | 156 | plt.figure() 157 | 158 | #------------------------------------------------ 159 | # Plot training and validation loss per epoch 160 | #------------------------------------------------ 161 | plt.plot(epochs, loss, 'r') 162 | plt.plot(epochs, val_loss, 'b') 163 | plt.title('Training and validation loss') 164 | plt.xlabel("Epochs") 165 | plt.ylabel("Loss") 166 | plt.legend(["Loss", "Validation Loss"]) 167 | 168 | plt.figure() 169 | 170 | 171 | # Expected Output 172 | # A chart where the validation loss does not increase sharply! -------------------------------------------------------------------------------- /Week 4/NLP_Week4_Exercise_Shakespeare_Question.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "NLP-Week4-Exercise-Shakespeare-Question.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python2", 12 | "display_name": "Python 2" 13 | }, 14 | "accelerator": "GPU" 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "metadata": { 20 | "id": "BOwsuGQQY9OL", 21 | "colab_type": "code", 22 | "colab": {} 23 | }, 24 | "source": [ 25 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 26 | "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional\n", 27 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 28 | "from tensorflow.keras.models import Sequential\n", 29 | "from tensorflow.keras.optimizers import Adam\n", 30 | "### YOUR CODE HERE\n", 31 | "from tensorflow.keras.regularizers import Regularizer\n", 32 | "# Figure out how to import regularizers\n", 33 | "###\n", 34 | "import tensorflow.keras.utils as ku \n", 35 | "import numpy as np " 36 | ], 37 | "execution_count": 0, 38 | "outputs": [] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "metadata": { 43 | "colab_type": "code", 44 | "id": "PRnDnCW-Z7qv", 45 | "colab": {} 46 | }, 47 | "source": [ 48 | "tokenizer = Tokenizer()\n", 49 | "!wget --no-check-certificate \\\n", 50 | " https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \\\n", 51 | " -O /tmp/sonnets.txt\n", 52 | "data = open('/tmp/sonnets.txt').read()\n", 53 | "\n", 54 | "corpus = data.lower().split(\"\\n\")\n", 55 | "\n", 56 | "\n", 57 | "tokenizer.fit_on_texts(corpus)\n", 58 | "total_words = len(tokenizer.word_index) + 1\n", 59 | "\n", 60 | "# create input sequences using list of tokens\n", 61 | "input_sequences = []\n", 62 | "for line in corpus:\n", 63 | "\ttoken_list = tokenizer.texts_to_sequences([line])[0]\n", 64 | "\tfor i in range(1, len(token_list)):\n", 65 | "\t\tn_gram_sequence = token_list[:i+1]\n", 66 | "\t\tinput_sequences.append(n_gram_sequence)\n", 67 | "\n", 68 | "\n", 69 | "# pad sequences \n", 70 | "max_sequence_len = max([len(x) for x in input_sequences])\n", 71 | "input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))\n", 72 | "\n", 73 | "# create predictors and label\n", 74 | "predictors, label = input_sequences[:,:-1],input_sequences[:,-1]\n", 75 | "\n", 76 | "label = ku.to_categorical(label, num_classes=total_words)" 77 | ], 78 | "execution_count": 0, 79 | "outputs": [] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "metadata": { 84 | "id": "w9vH8Y59ajYL", 85 | "colab_type": "code", 86 | "colab": {} 87 | }, 88 | "source": [ 89 | "model = Sequential()\n", 90 | "model.add(Embedding(total_words, 100, input_length=max_sequence_len-1)) #(# Your Embedding Layer)\n", 91 | "model.add(Bidirectional(LSTM(150, return_sequences=True))) #(# An LSTM Layer)\n", 92 | "model.add(Dropout(0.2)) #(# A dropout layer)\n", 93 | "model.add(LSTM(100)) #(# Another LSTM Layer)\n", 94 | "model.add(Dense(total_words/2, activation='relu')) #(# A Dense Layer including regularizers)\n", 95 | "model.add(Dense(total_words, activation='softmax')) #(# A Dense Layer)\n", 96 | "# Pick an optimizer\n", 97 | "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy') #(# Pick a loss function and an optimizer)\n", 98 | "print(model.summary())\n" 99 | ], 100 | "execution_count": 0, 101 | "outputs": [] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "metadata": { 106 | "id": "AIg2f1HBxqof", 107 | "colab_type": "code", 108 | "colab": {} 109 | }, 110 | "source": [ 111 | " history = model.fit(predictors, label, epochs=100, verbose=1)" 112 | ], 113 | "execution_count": 0, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "id": "1fXTEO3GJ282", 120 | "colab_type": "code", 121 | "colab": {} 122 | }, 123 | "source": [ 124 | "import matplotlib.pyplot as plt\n", 125 | "acc = history.history['acc']\n", 126 | "loss = history.history['loss']\n", 127 | "\n", 128 | "epochs = range(len(acc))\n", 129 | "\n", 130 | "plt.plot(epochs, acc, 'b', label='Training accuracy')\n", 131 | "plt.title('Training accuracy')\n", 132 | "\n", 133 | "plt.figure()\n", 134 | "\n", 135 | "plt.plot(epochs, loss, 'b', label='Training Loss')\n", 136 | "plt.title('Training loss')\n", 137 | "plt.legend()\n", 138 | "\n", 139 | "plt.show()" 140 | ], 141 | "execution_count": 0, 142 | "outputs": [] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "metadata": { 147 | "id": "6Vc6PHgxa6Hm", 148 | "colab_type": "code", 149 | "colab": {} 150 | }, 151 | "source": [ 152 | "seed_text = \"Help me Obi Wan Kenobi, you're my only hope\"\n", 153 | "next_words = 100\n", 154 | " \n", 155 | "for _ in range(next_words):\n", 156 | "\ttoken_list = tokenizer.texts_to_sequences([seed_text])[0]\n", 157 | "\ttoken_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')\n", 158 | "\tpredicted = model.predict_classes(token_list, verbose=0)\n", 159 | "\toutput_word = \"\"\n", 160 | "\tfor word, index in tokenizer.word_index.items():\n", 161 | "\t\tif index == predicted:\n", 162 | "\t\t\toutput_word = word\n", 163 | "\t\t\tbreak\n", 164 | "\tseed_text += \" \" + output_word\n", 165 | "print(seed_text)" 166 | ], 167 | "execution_count": 0, 168 | "outputs": [] 169 | } 170 | ] 171 | } -------------------------------------------------------------------------------- /Week 4/Quiz 4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/07Agarg/Natural-Language-Processing-In-Tensorflow-Course/46eb21e25f73fd8644a95e64696d64dd4843e1e8/Week 4/Quiz 4.pdf -------------------------------------------------------------------------------- /Week 4/nlp_week4_exercise_shakespeare_question.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """NLP-Week4-Exercise-Shakespeare-Question.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/TensorFlow%20In%20Practice/Course%203%20-%20NLP/NLP_Week4_Exercise_Shakespeare_Question.ipynb 8 | """ 9 | 10 | from tensorflow.keras.preprocessing.sequence import pad_sequences 11 | from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional 12 | from tensorflow.keras.preprocessing.text import Tokenizer 13 | from tensorflow.keras.models import Sequential 14 | from tensorflow.keras.optimizers import Adam 15 | ### YOUR CODE HERE 16 | from tensorflow.keras.regularizers import Regularizer 17 | # Figure out how to import regularizers 18 | ### 19 | import tensorflow.keras.utils as ku 20 | import numpy as np 21 | 22 | tokenizer = Tokenizer() 23 | !wget --no-check-certificate \ 24 | https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \ 25 | -O /tmp/sonnets.txt 26 | data = open('/tmp/sonnets.txt').read() 27 | 28 | corpus = data.lower().split("\n") 29 | 30 | 31 | tokenizer.fit_on_texts(corpus) 32 | total_words = len(tokenizer.word_index) + 1 33 | 34 | # create input sequences using list of tokens 35 | input_sequences = [] 36 | for line in corpus: 37 | token_list = tokenizer.texts_to_sequences([line])[0] 38 | for i in range(1, len(token_list)): 39 | n_gram_sequence = token_list[:i+1] 40 | input_sequences.append(n_gram_sequence) 41 | 42 | 43 | # pad sequences 44 | max_sequence_len = max([len(x) for x in input_sequences]) 45 | input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')) 46 | 47 | # create predictors and label 48 | predictors, label = input_sequences[:,:-1],input_sequences[:,-1] 49 | 50 | label = ku.to_categorical(label, num_classes=total_words) 51 | 52 | model = Sequential() 53 | model.add(Embedding(total_words, 100, input_length=max_sequence_len-1)) #(# Your Embedding Layer) 54 | model.add(Bidirectional(LSTM(150, return_sequences=True))) #(# An LSTM Layer) 55 | model.add(Dropout(0.2)) #(# A dropout layer) 56 | model.add(LSTM(100)) #(# Another LSTM Layer) 57 | model.add(Dense(total_words/2, activation='relu')) #(# A Dense Layer including regularizers) 58 | model.add(Dense(total_words, activation='softmax')) #(# A Dense Layer) 59 | # Pick an optimizer 60 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy') #(# Pick a loss function and an optimizer) 61 | print(model.summary()) 62 | 63 | history = model.fit(predictors, label, epochs=100, verbose=1) 64 | 65 | import matplotlib.pyplot as plt 66 | acc = history.history['acc'] 67 | loss = history.history['loss'] 68 | 69 | epochs = range(len(acc)) 70 | 71 | plt.plot(epochs, acc, 'b', label='Training accuracy') 72 | plt.title('Training accuracy') 73 | 74 | plt.figure() 75 | 76 | plt.plot(epochs, loss, 'b', label='Training Loss') 77 | plt.title('Training loss') 78 | plt.legend() 79 | 80 | plt.show() 81 | 82 | seed_text = "Help me Obi Wan Kenobi, you're my only hope" 83 | next_words = 100 84 | 85 | for _ in range(next_words): 86 | token_list = tokenizer.texts_to_sequences([seed_text])[0] 87 | token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre') 88 | predicted = model.predict_classes(token_list, verbose=0) 89 | output_word = "" 90 | for word, index in tokenizer.word_index.items(): 91 | if index == predicted: 92 | output_word = word 93 | break 94 | seed_text += " " + output_word 95 | print(seed_text) --------------------------------------------------------------------------------