├── 3. Natural Language Processing in TensorFlow ├── 1. Sentiment in text │ ├── Quiz1.png │ ├── C3_W1_Lab_1_tokenize_basic.ipynb │ ├── C3_W1_Lab_2_sequences_basic.ipynb │ └── C3_W1_Lab_3_sarcasm.ipynb ├── 2. Word Embeddings │ ├── Quiz2.png │ └── C3_W2_Lab_2_sarcasm_classifier.ipynb ├── 3. Sequence models │ ├── Quiz3.png │ ├── C3_W3_Lab_6_sarcasm_with_1D_convolutional.ipynb │ ├── C3_W3_Lab_5_sarcasm_with_bi_LSTM.ipynb │ ├── C3_W3_Lab_3_Conv1D.ipynb │ ├── C3_W3_Lab_2_multiple_layer_LSTM.ipynb │ ├── C3_W3_Lab_1_single_layer_LSTM.ipynb │ └── C3_W3_Lab_4_imdb_reviews_with_GRU_LSTM_Conv1D.ipynb └── 4. Sequence models and literature │ ├── Quiz4.png │ └── history.pkl ├── 2. Convolutional Neural Networks in TensorFlow ├── 3. Transfer Learning │ ├── Quiz3.png │ └── C2_W3_Lab_1_transfer_learning.ipynb ├── 1. Exploring a Larger Dataset │ ├── Quiz1.png │ └── history.pkl ├── 4. Multiclass Classifications │ └── Quiz4.png └── 2. Augmentation A technique to avoid overfitting │ ├── Quiz2.png │ ├── history_augmented.pkl │ ├── C2_W2_Lab_2_horses_v_humans_augmentation.ipynb │ └── C2_W2_Lab_1_cats_v_dogs_augmentation.ipynb ├── 4. Sequences, Time Series and Prediction ├── 1. Sequences and Prediction │ └── Quiz1.png ├── 4. Real-world time series data │ └── Quiz4.png ├── 2. Deep Neural Networks for Time Series │ ├── Quiz2.png │ └── C4_W2_Lab_1_features_and_labels.ipynb └── 3. Recurrent Neural Networks for Time Series │ └── Quiz3.png ├── 1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning ├── 4. Using Real-world Images │ └── Quiz4.png ├── 1. A New Programming Paradigm │ ├── quiz1.png │ ├── C1W1_Assignment.ipynb │ └── C1_W1_Lab_1_hello_world_nn.ipynb ├── 2. Introduction to Computer Vision │ ├── Quiz2.png │ ├── C1_W2_Lab_2_callbacks.ipynb │ └── C1W2_Assignment.ipynb └── 3. Enhancing Vision with Convolutional Neural Networks │ └── Quiz3.png ├── README.md └── LICENSE /3. Natural Language Processing in TensorFlow/1. Sentiment in text/Quiz1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/3. Natural Language Processing in TensorFlow/1. Sentiment in text/Quiz1.png -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/2. Word Embeddings/Quiz2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/3. Natural Language Processing in TensorFlow/2. Word Embeddings/Quiz2.png -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/3. Sequence models/Quiz3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/3. Natural Language Processing in TensorFlow/3. Sequence models/Quiz3.png -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/3. Transfer Learning/Quiz3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/2. Convolutional Neural Networks in TensorFlow/3. Transfer Learning/Quiz3.png -------------------------------------------------------------------------------- /4. Sequences, Time Series and Prediction/1. Sequences and Prediction/Quiz1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/4. Sequences, Time Series and Prediction/1. Sequences and Prediction/Quiz1.png -------------------------------------------------------------------------------- /4. Sequences, Time Series and Prediction/4. Real-world time series data/Quiz4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/4. Sequences, Time Series and Prediction/4. Real-world time series data/Quiz4.png -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/1. Exploring a Larger Dataset/Quiz1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/2. Convolutional Neural Networks in TensorFlow/1. Exploring a Larger Dataset/Quiz1.png -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/4. Multiclass Classifications/Quiz4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/2. Convolutional Neural Networks in TensorFlow/4. Multiclass Classifications/Quiz4.png -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/1. Exploring a Larger Dataset/history.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/2. Convolutional Neural Networks in TensorFlow/1. Exploring a Larger Dataset/history.pkl -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/4. Sequence models and literature/Quiz4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/3. Natural Language Processing in TensorFlow/4. Sequence models and literature/Quiz4.png -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/4. Sequence models and literature/history.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/3. Natural Language Processing in TensorFlow/4. Sequence models and literature/history.pkl -------------------------------------------------------------------------------- /4. Sequences, Time Series and Prediction/2. Deep Neural Networks for Time Series/Quiz2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/4. Sequences, Time Series and Prediction/2. Deep Neural Networks for Time Series/Quiz2.png -------------------------------------------------------------------------------- /4. Sequences, Time Series and Prediction/3. Recurrent Neural Networks for Time Series/Quiz3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/4. Sequences, Time Series and Prediction/3. Recurrent Neural Networks for Time Series/Quiz3.png -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/2. Augmentation A technique to avoid overfitting/Quiz2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/2. Convolutional Neural Networks in TensorFlow/2. Augmentation A technique to avoid overfitting/Quiz2.png -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/2. Augmentation A technique to avoid overfitting/history_augmented.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/2. Convolutional Neural Networks in TensorFlow/2. Augmentation A technique to avoid overfitting/history_augmented.pkl -------------------------------------------------------------------------------- /1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/4. Using Real-world Images/Quiz4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/4. Using Real-world Images/Quiz4.png -------------------------------------------------------------------------------- /1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/1. A New Programming Paradigm/quiz1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/1. A New Programming Paradigm/quiz1.png -------------------------------------------------------------------------------- /1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/2. Introduction to Computer Vision/Quiz2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/2. Introduction to Computer Vision/Quiz2.png -------------------------------------------------------------------------------- /1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/3. Enhancing Vision with Convolutional Neural Networks/Quiz3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/HEAD/1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/3. Enhancing Vision with Convolutional Neural Networks/Quiz3.png -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/1. Sentiment in text/C3_W1_Lab_1_tokenize_basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "WuuWNeIRurUB" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "rL-LzAqpoGLC" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Tokenizer Basics\n", 19 | "\n", 20 | "In most NLP tasks, the initial step in preparing your data is to extract a vocabulary of words from your *corpus* (i.e. input texts). You will need to define how to represent the texts into numerical representations which can be used to train a neural network. These representations are called *tokens* and Tensorflow and Keras makes it easy to generate these using its APIs. You will see how to do that in the next cells." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "-nt3uR9TPrUt" 27 | }, 28 | "source": [ 29 | "## Generating the vocabulary\n", 30 | "\n", 31 | "In this notebook, you will look first at how you can provide a look up dictionary for each word. The code below takes a list of sentences, then takes each word in those sentences and assigns it to an integer. This is done using the [fit_on_texts()](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text/Tokenizer#fit_on_texts) method and you can get the result by looking at the `word_index` property. More frequent words have a lower index." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "id": "zaCMcjMQifQc" 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 43 | "\n", 44 | "# Define input sentences\n", 45 | "sentences = [\n", 46 | " 'i love my dog',\n", 47 | " 'I, love my cat'\n", 48 | " ]\n", 49 | "\n", 50 | "# Initialize the Tokenizer class\n", 51 | "tokenizer = Tokenizer(num_words = 100)\n", 52 | "\n", 53 | "# Generate indices for each word in the corpus\n", 54 | "tokenizer.fit_on_texts(sentences)\n", 55 | "\n", 56 | "# Get the indices and print it\n", 57 | "word_index = tokenizer.word_index\n", 58 | "print(word_index)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": { 64 | "id": "uTPWesNaRdX2" 65 | }, 66 | "source": [ 67 | "The `num_words` parameter used in the initializer specifies the maximum number of words minus one (based on frequency) to keep when generating sequences. You will see this in a later exercise. For now, the important thing to note is it does not affect how the `word_index` dictionary is generated. You can try passing `1` instead of `100` as shown on the next cell and you will arrive at the same `word_index`.\n", 68 | "\n", 69 | "Also notice that by default, all punctuation is ignored and words are converted to lower case. You can override these behaviors by modifying the `filters` and `lower` arguments of the `Tokenizer` class as described [here](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text/Tokenizer#arguments). You can try modifying these in the next cell below and compare the output to the one generated above." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "id": "VX1A1pDNoVKm" 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "# Define input sentences\n", 81 | "sentences = [\n", 82 | " 'i love my dog',\n", 83 | " 'I, love my cat',\n", 84 | " 'You love my dog!'\n", 85 | "]\n", 86 | "\n", 87 | "# Initialize the Tokenizer class\n", 88 | "tokenizer = Tokenizer(num_words = 1)\n", 89 | "\n", 90 | "# Generate indices for each word in the corpus\n", 91 | "tokenizer.fit_on_texts(sentences)\n", 92 | "\n", 93 | "# Get the indices and print it\n", 94 | "word_index = tokenizer.word_index\n", 95 | "print(word_index)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": { 101 | "id": "c9LFfwBffDaj" 102 | }, 103 | "source": [ 104 | "That concludes this short exercise on tokenizing input texts!" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "colab": { 110 | "name": "C3_W1_Lab_1_tokenize_basic.ipynb", 111 | "private_outputs": true, 112 | "provenance": [], 113 | "toc_visible": true 114 | }, 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.7.4" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 0 135 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepLearning.AI-TensorFlow-Developer-Professional-Certificate 2 | My quiz and assignment answers for DeepLearning.AI TensorFlow Developer Professional Certificate course. 3 | 4 | Check **[Coursera Honor Code](https://www.coursera.support/s/article/209818863-Coursera-Honor-Code?language=en_US)** before you take a look at the assignments. 5 | 6 | For more you can check **[course info](https://www.deeplearning.ai/courses/tensorflow-developer-professional-certificate/)**. 7 | 8 | ![DeepLearning AI TensorFlow Developer](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/assets/89780902/4251492b-f123-462f-9b70-7fad7898c251) 9 | 10 | ## Contents 11 | - ### [Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/1.%20Introduction%20to%20TensorFlow%20for%20Artificial%20Intelligence%2C%20Machine%20Learning%2C%20and%20Deep%20Learning) 12 | * [Week 1](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/1.%20Introduction%20to%20TensorFlow%20for%20Artificial%20Intelligence%2C%20Machine%20Learning%2C%20and%20Deep%20Learning/1.%20A%20New%20Programming%20Paradigm): A New Programming Paradigm 13 | 14 | * [Week 2](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/1.%20Introduction%20to%20TensorFlow%20for%20Artificial%20Intelligence%2C%20Machine%20Learning%2C%20and%20Deep%20Learning/2.%20Introduction%20to%20Computer%20Vision): Introduction to Computer Vision 15 | * [Week 3](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/1.%20Introduction%20to%20TensorFlow%20for%20Artificial%20Intelligence%2C%20Machine%20Learning%2C%20and%20Deep%20Learning/3.%20Enhancing%20Vision%20with%20Convolutional%20Neural%20Networks): Enhancing Vision with Convolutional Neural Networks 16 | * [Week 4](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/1.%20Introduction%20to%20TensorFlow%20for%20Artificial%20Intelligence%2C%20Machine%20Learning%2C%20and%20Deep%20Learning/4.%20Using%20Real-world%20Images): Using Real-world Images 17 | 18 |
19 | Show Certificate 20 | TensorFlow C1 Certificate 21 |
22 | - --- 23 | 24 | - ### [Convolutional Neural Networks in TensorFlow](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/2.%20Convolutional%20Neural%20Networks%20in%20TensorFlow) 25 | * [Week 1](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/2.%20Convolutional%20Neural%20Networks%20in%20TensorFlow/1.%20Exploring%20a%20Larger%20Dataset): Exploring a Larger Dataset 26 | 27 | * [Week 2](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/2.%20Convolutional%20Neural%20Networks%20in%20TensorFlow/2.%20Augmentation%20A%20technique%20to%20avoid%20overfitting): Augmentation A technique to avoid overfitting 28 | 29 | * [Week 3](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/2.%20Convolutional%20Neural%20Networks%20in%20TensorFlow/3.%20Transfer%20Learning): Transfer Learning 30 | 31 | * [Week 4](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/2.%20Convolutional%20Neural%20Networks%20in%20TensorFlow/4.%20Multiclass%20Classifications): Multiclass Classifications 32 | 33 |
34 | Show Certificate 35 | TensorFlow C2 Certificate 36 |
37 | - --- 38 | 39 | - ### [Natural Language Processing in TensorFlow](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/3.%20Natural%20Language%20Processing%20in%20TensorFlow) 40 | * [Week 1](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/3.%20Natural%20Language%20Processing%20in%20TensorFlow/1.%20Sentiment%20in%20text): Sentiment in text 41 | 42 | * [Week 2](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/3.%20Natural%20Language%20Processing%20in%20TensorFlow/2.%20Word%20Embeddings): Word Embeddings 43 | 44 | * [Week 3](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/3.%20Natural%20Language%20Processing%20in%20TensorFlow/3.%20Sequence%20models): Sequence models 45 | 46 | * [Week 4](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/3.%20Natural%20Language%20Processing%20in%20TensorFlow/4.%20Sequence%20models%20and%20literature): Sequence models and literature 47 | 48 |
49 | Show Certificate 50 | TensorFlow C3 Certificate 51 |
52 | - --- 53 | 54 | - ### [Sequences, Time Series and Prediction](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/4.%20Sequences%2C%20Time%20Series%20and%20Prediction) 55 | * [Week 1](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/4.%20Sequences%2C%20Time%20Series%20and%20Prediction/1.%20Sequences%20and%20Prediction): Sequences and Prediction 56 | 57 | * [Week 2](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/4.%20Sequences%2C%20Time%20Series%20and%20Prediction/2.%20Deep%20Neural%20Networks%20for%20Time%20Series): Deep Neural Networks for Time Series 58 | 59 | * [Week 3](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/4.%20Sequences%2C%20Time%20Series%20and%20Prediction/3.%20Recurrent%20Neural%20Networks%20for%20Time%20Series): Recurrent Neural Networks for Time Series 60 | 61 | * [Week 4](https://github.com/BurakAhmet/DeepLearning.AI-TensorFlow-Developer-Professional-Certificate/tree/main/4.%20Sequences%2C%20Time%20Series%20and%20Prediction/4.%20Real-world%20time%20series%20data): Real-world time series data 62 |
63 | Show Certificate 64 | TensorFlow C4 Certificate 65 |
66 | - --- 67 | 68 | -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/1. Sentiment in text/C3_W1_Lab_2_sequences_basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "XaBB6MjUkVBA" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "1SmE2CODfmmL" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Generating Sequences and Padding\n", 19 | "\n", 20 | "In this lab, you will look at converting your input sentences into a sequence of tokens. Similar to images in the previous course, you need to prepare text data with uniform size before feeding it to your model. You will see how to do these in the next sections." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "JiFUJg-lmTm6" 27 | }, 28 | "source": [ 29 | "## Text to Sequences\n", 30 | "\n", 31 | "In the previous lab, you saw how to generate a `word_index` dictionary to generate tokens for each word in your corpus. You can then use the result to convert each of the input sentences into a sequence of tokens. That is done using the [`texts_to_sequences()`](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text/Tokenizer#texts_to_sequences) method as shown below." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "id": "ArOPfBwyZtln" 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 43 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 44 | "\n", 45 | "# Define your input texts\n", 46 | "sentences = [\n", 47 | " 'I love my dog',\n", 48 | " 'I love my cat',\n", 49 | " 'You love my dog!',\n", 50 | " 'Do you think my dog is amazing?'\n", 51 | "]\n", 52 | "\n", 53 | "# Initialize the Tokenizer class\n", 54 | "tokenizer = Tokenizer(num_words = 100, oov_token=\"\")\n", 55 | "\n", 56 | "# Tokenize the input sentences\n", 57 | "tokenizer.fit_on_texts(sentences)\n", 58 | "\n", 59 | "# Get the word index dictionary\n", 60 | "word_index = tokenizer.word_index\n", 61 | "\n", 62 | "# Generate list of token sequences\n", 63 | "sequences = tokenizer.texts_to_sequences(sentences)\n", 64 | "\n", 65 | "# Print the result\n", 66 | "print(\"\\nWord Index = \" , word_index)\n", 67 | "print(\"\\nSequences = \" , sequences)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "id": "z56pEkF2p8c-" 74 | }, 75 | "source": [ 76 | "## Padding\n", 77 | "\n", 78 | "As mentioned in the lecture, you will usually need to pad the sequences into a uniform length because that is what your model expects. You can use the [pad_sequences](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/sequence/pad_sequences) for that. By default, it will pad according to the length of the longest sequence. You can override this with the `maxlen` argument to define a specific length. Feel free to play with the [other arguments](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/sequence/pad_sequences#args) shown in class and compare the result." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "id": "qljgx1eSlEse" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# Pad the sequences to a uniform length\n", 90 | "padded = pad_sequences(sequences, maxlen=5)\n", 91 | "\n", 92 | "# Print the result\n", 93 | "print(\"\\nPadded Sequences:\")\n", 94 | "print(padded)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": { 100 | "id": "btEb9jI0k7Ip" 101 | }, 102 | "source": [ 103 | "## Out-of-vocabulary tokens\n", 104 | "\n", 105 | "Notice that you defined an `oov_token` when the `Tokenizer` was initialized earlier. This will be used when you have input words that are not found in the `word_index` dictionary. For example, you may decide to collect more text after your initial training and decide to not re-generate the `word_index`. You will see this in action in the cell below. Notice that the token `1` is inserted for words that are not found in the dictionary." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "id": "4fW1NWTok72V" 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "# Try with words that the tokenizer wasn't fit to\n", 117 | "test_data = [\n", 118 | " 'i really love my dog',\n", 119 | " 'my dog loves my manatee',\n", 120 | "]\n", 121 | "\n", 122 | "# Generate the sequences\n", 123 | "test_seq = tokenizer.texts_to_sequences(test_data)\n", 124 | "\n", 125 | "# Print the word index dictionary\n", 126 | "print(\"\\nWord Index = \" , word_index)\n", 127 | "\n", 128 | "# Print the sequences with OOV\n", 129 | "print(\"\\nTest Sequence = \", test_seq)\n", 130 | "\n", 131 | "# Print the padded result\n", 132 | "padded = pad_sequences(test_seq, maxlen=10)\n", 133 | "print(\"\\nPadded Test Sequence: \")\n", 134 | "print(padded)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "id": "UBlQIPBqskAJ" 141 | }, 142 | "source": [ 143 | "This concludes another introduction to text data preprocessing. So far, you've just been using dummy data. In the next exercise, you will be applying the same concepts to a real-world and much larger dataset." 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "colab": { 149 | "name": "C3_W1_Lab_2_sequences_basic.ipynb", 150 | "private_outputs": true, 151 | "provenance": [], 152 | "toc_visible": true 153 | }, 154 | "kernelspec": { 155 | "display_name": "Python 3", 156 | "language": "python", 157 | "name": "python3" 158 | }, 159 | "language_info": { 160 | "codemirror_mode": { 161 | "name": "ipython", 162 | "version": 3 163 | }, 164 | "file_extension": ".py", 165 | "mimetype": "text/x-python", 166 | "name": "python", 167 | "nbconvert_exporter": "python", 168 | "pygments_lexer": "ipython3", 169 | "version": "3.7.4" 170 | } 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 0 174 | } -------------------------------------------------------------------------------- /1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/2. Introduction to Computer Vision/C1_W2_Lab_2_callbacks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "-eimtRAW5rps" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "vBNo9JrZIYG6" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Using Callbacks to Control Training\n", 19 | "\n", 20 | "In this lab, you will use the [Callbacks API](https://keras.io/api/callbacks/) to stop training when a specified metric is met. This is a useful feature so you won't need to complete all epochs when this threshold is reached. For example, if you set 1000 epochs and your desired accuracy is already reached at epoch 200, then the training will automatically stop. Let's see how this is implemented in the next sections.\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "Mcwrn9AKKVb8" 27 | }, 28 | "source": [ 29 | "## Load and Normalize the Fashion MNIST dataset\n", 30 | "\n", 31 | "Like the previous lab, you will use the Fashion MNIST dataset again for this exercise. And also as mentioned before, you will normalize the pixel values to help optimize the training." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "id": "8LTaefqDJMIn" 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "import tensorflow as tf\n", 43 | "\n", 44 | "# Instantiate the dataset API\n", 45 | "fmnist = tf.keras.datasets.fashion_mnist\n", 46 | "\n", 47 | "# Load the dataset\n", 48 | "(x_train, y_train),(x_test, y_test) = fmnist.load_data()\n", 49 | "\n", 50 | "# Normalize the pixel values\n", 51 | "x_train, x_test = x_train / 255.0, x_test / 255.0" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "id": "Ia2OadhALJjS" 58 | }, 59 | "source": [ 60 | "## Creating a Callback class\n", 61 | "\n", 62 | "You can create a callback by defining a class that inherits the [tf.keras.callbacks.Callback](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback) base class. From there, you can define available methods to set where the callback will be executed. For instance below, you will use the [on_epoch_end()](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback#on_epoch_end) method to check the loss at each training epoch." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "id": "uuRmQZWVJAJH" 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "class myCallback(tf.keras.callbacks.Callback):\n", 74 | " def on_epoch_end(self, epoch, logs={}):\n", 75 | " '''\n", 76 | " Halts the training when the loss falls below 0.4\n", 77 | "\n", 78 | " Args:\n", 79 | " epoch (integer) - index of epoch (required but unused in the function definition below)\n", 80 | " logs (dict) - metric results from the training epoch\n", 81 | " '''\n", 82 | "\n", 83 | " # Check the accuracy\n", 84 | " if(logs.get('accuracy') > 0.6):\n", 85 | "\n", 86 | " # Stop if threshold is met\n", 87 | " print(\"\\nLoss is lower than 0.6 so cancelling training!\")\n", 88 | " self.model.stop_training = True\n", 89 | "\n", 90 | "# Instantiate class\n", 91 | "callbacks = myCallback()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": { 97 | "id": "4xlXeLkFeMn8" 98 | }, 99 | "source": [ 100 | "## Define and compile the model\n", 101 | "\n", 102 | "Next, you will define and compile the model. The architecture will be similar to the one you built in the previous lab. Afterwards, you will set the optimizer, loss, and metrics that you will use for training." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "id": "7JXxMg3TpzER" 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "# Define the model\n", 114 | "model = tf.keras.models.Sequential([\n", 115 | " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", 116 | " tf.keras.layers.Dense(512, activation=tf.nn.relu),\n", 117 | " tf.keras.layers.Dense(10, activation=tf.nn.softmax)\n", 118 | "])\n", 119 | "\n", 120 | "# Compile the model\n", 121 | "model.compile(optimizer=tf.optimizers.Adam(),\n", 122 | " loss='sparse_categorical_crossentropy',\n", 123 | " metrics=['accuracy'])\n", 124 | "\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "id": "6eLe4cPZe-ui" 131 | }, 132 | "source": [ 133 | "### Train the model\n", 134 | "\n", 135 | "Now you are ready to train the model. To set the callback, simply set the `callbacks` parameter to the `myCallback` instance you declared before. Run the cell below and observe what happens." 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "id": "nLXTB32de3_e" 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "# Train the model with a callback\n", 147 | "model.fit(x_train, y_train, epochs=10, callbacks=[callbacks])" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": { 153 | "id": "fGBSkRQPff93" 154 | }, 155 | "source": [ 156 | "You will notice that the training does not need to complete all 10 epochs. By having a callback at the end of each epoch, it is able to check the training parameters and compare if it meets the threshold you set in the function definition. In this case, it will simply stop when the loss falls below `0.40` after the current epoch.\n", 157 | "\n", 158 | "*Optional Challenge: Modify the code to make the training stop when the accuracy metric exceeds 60%.*\n", 159 | "\n", 160 | "That concludes this simple exercise on callbacks!" 161 | ] 162 | } 163 | ], 164 | "metadata": { 165 | "colab": { 166 | "name": "C1_W2_Lab_2_callbacks.ipynb", 167 | "private_outputs": true, 168 | "provenance": [], 169 | "toc_visible": true 170 | }, 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.7.4" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 0 191 | } -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/1. Sentiment in text/C3_W1_Lab_3_sarcasm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "uT5yn3mFsD0g" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "SdNGfEo2u-r7" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Tokenizing the Sarcasm Dataset\n", 19 | "\n", 20 | "In this lab, you will be applying what you've learned in the past two exercises to preprocess the [News Headlines Dataset for Sarcasm Detection](https://www.kaggle.com/rmisra/news-headlines-dataset-for-sarcasm-detection/home). This contains news headlines which are labeled as sarcastic or not. You will revisit this dataset in later labs so it is good to be acquainted with it now." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "Twhyfjg0xTkg" 27 | }, 28 | "source": [ 29 | "## Download and inspect the dataset\n", 30 | "\n", 31 | "First, you will fetch the dataset and preview some of its elements." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "id": "33W129a7xgoJ" 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# Download the dataset\n", 43 | "!wget https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": { 49 | "id": "zJHdzh9FyWa2" 50 | }, 51 | "source": [ 52 | "The dataset is saved as a [JSON](https://www.json.org/json-en.html) file and you can use Python's [`json`](https://docs.python.org/3/library/json.html) module to load it into your workspace. The cell below unpacks the JSON file into a list." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "id": "OkaBMeNDwMel" 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "import json\n", 64 | "\n", 65 | "# Load the JSON file\n", 66 | "with open(\"./sarcasm.json\", 'r') as f:\n", 67 | " datastore = json.load(f)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "id": "D2aSBvJVzRNV" 74 | }, 75 | "source": [ 76 | "You can inspect a few of the elements in the list. You will notice that each element consists of a dictionary with a URL link, the actual headline, and a label named `is_sarcastic`. Printed below are two elements with contrasting labels." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "id": "RiiFcWU2xnMJ" 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "# Non-sarcastic headline\n", 88 | "print(datastore[0])\n", 89 | "\n", 90 | "# Sarcastic headline\n", 91 | "print(datastore[20000])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": { 97 | "id": "dPuH0bBiz8LJ" 98 | }, 99 | "source": [ 100 | "With that, you can collect all urls, headlines, and labels for easier processing when using the tokenizer. For this lab, you will only need the headlines but we included the code to collect the URLs and labels as well." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "id": "9pxLUQJCxkNB" 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# Initialize lists\n", 112 | "sentences = []\n", 113 | "labels = []\n", 114 | "urls = []\n", 115 | "\n", 116 | "# Append elements in the dictionaries into each list\n", 117 | "for item in datastore:\n", 118 | " sentences.append(item['headline'])\n", 119 | " labels.append(item['is_sarcastic'])\n", 120 | " urls.append(item['article_link'])" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": { 126 | "id": "lBHSXJ5V0qqK" 127 | }, 128 | "source": [ 129 | "## Preprocessing the headlines\n", 130 | "\n", 131 | "You can convert the `sentences` list above into padded sequences by using the same methods you've been using in the past exercises. The cell below generates the `word_index` dictionary and generates the list of padded sequences for each of the 26,709 headlines." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "id": "5OSTw3uJuvmY" 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 143 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 144 | "\n", 145 | "# Initialize the Tokenizer class\n", 146 | "tokenizer = Tokenizer(oov_token=\"\")\n", 147 | "\n", 148 | "# Generate the word index dictionary\n", 149 | "tokenizer.fit_on_texts(sentences)\n", 150 | "\n", 151 | "# Print the length of the word index\n", 152 | "word_index = tokenizer.word_index\n", 153 | "print(f'number of words in word_index: {len(word_index)}')\n", 154 | "\n", 155 | "# Print the word index\n", 156 | "print(f'word_index: {word_index}')\n", 157 | "print()\n", 158 | "\n", 159 | "# Generate and pad the sequences\n", 160 | "sequences = tokenizer.texts_to_sequences(sentences)\n", 161 | "padded = pad_sequences(sequences, padding='post')\n", 162 | "\n", 163 | "# Print a sample headline\n", 164 | "index = 2\n", 165 | "print(f'sample headline: {sentences[index]}')\n", 166 | "print(f'padded sequence: {padded[index]}')\n", 167 | "print()\n", 168 | "\n", 169 | "# Print dimensions of padded sequences\n", 170 | "print(f'shape of padded sequences: {padded.shape}')" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": { 176 | "id": "4wyLF5T036W8" 177 | }, 178 | "source": [ 179 | "This concludes the short demo on using text data preprocessing APIs on a relatively large dataset. Next week, you will start building models that can be trained on these output sequences. See you there!" 180 | ] 181 | } 182 | ], 183 | "metadata": { 184 | "colab": { 185 | "name": "C3_W1_Lab_3_sarcasm.ipynb", 186 | "private_outputs": true, 187 | "provenance": [], 188 | "toc_visible": true 189 | }, 190 | "kernelspec": { 191 | "display_name": "Python 3", 192 | "language": "python", 193 | "name": "python3" 194 | }, 195 | "language_info": { 196 | "codemirror_mode": { 197 | "name": "ipython", 198 | "version": 3 199 | }, 200 | "file_extension": ".py", 201 | "mimetype": "text/x-python", 202 | "name": "python", 203 | "nbconvert_exporter": "python", 204 | "pygments_lexer": "ipython3", 205 | "version": "3.7.4" 206 | } 207 | }, 208 | "nbformat": 4, 209 | "nbformat_minor": 0 210 | } -------------------------------------------------------------------------------- /1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/1. A New Programming Paradigm/C1W1_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "mw2VBrBcgvGa" 7 | }, 8 | "source": [ 9 | "# Week 1 Assignment: Housing Prices\n", 10 | "\n", 11 | "In this exercise you'll try to build a neural network that predicts the price of a house according to a simple formula.\n", 12 | "\n", 13 | "Imagine that house pricing is as easy as:\n", 14 | "\n", 15 | "A house has a base cost of 50k, and every additional bedroom adds a cost of 50k. This will make a 1 bedroom house cost 100k, a 2 bedroom house cost 150k etc.\n", 16 | "\n", 17 | "How would you create a neural network that learns this relationship so that it would predict a 7 bedroom house as costing close to 400k etc.\n", 18 | "\n", 19 | "Hint: Your network might work better if you scale the house price down. You don't have to give the answer 400...it might be better to create something that predicts the number 4, and then your answer is in the 'hundreds of thousands' etc." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "deletable": false, 27 | "editable": false 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "# IMPORTANT: This will check your notebook's metadata for grading.\n", 32 | "# Please do not continue the lab unless the output of this cell tells you to proceed. \n", 33 | "!python add_metadata.py --filename C1W1_Assignment.ipynb" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "_**NOTE:** To prevent errors from the autograder, you are not allowed to edit or delete non-graded cells in this notebook . Please only put your solutions in between the `### START CODE HERE` and `### END CODE HERE` code comments, and also refrain from adding any new cells. **Once you have passed this assignment** and want to experiment with any of the non-graded code, you may follow the instructions at the bottom of this notebook._" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "deletable": false, 48 | "editable": false, 49 | "id": "PUNO2E6SeURH", 50 | "tags": [ 51 | "graded" 52 | ] 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "# grader-required-cell\n", 57 | "\n", 58 | "import tensorflow as tf\n", 59 | "import numpy as np" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "deletable": false, 67 | "id": "B-74xrKrBqGJ", 68 | "tags": [ 69 | "graded" 70 | ] 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# grader-required-cell\n", 75 | "\n", 76 | "# GRADED FUNCTION: house_model\n", 77 | "def house_model():\n", 78 | " ### START CODE HERE\n", 79 | " \n", 80 | " # Define input and output tensors with the values for houses with 1 up to 6 bedrooms\n", 81 | " # Hint: Remember to explictly set the dtype as float\n", 82 | " xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]\n", 83 | " ys = [1.0, 1.5, 2.0, 2.5, 3.0, 3.5]\n", 84 | " \n", 85 | " # Define your model (should be a model with 1 dense layer and 1 unit)\n", 86 | " # Note: you can use `tf.keras` instead of `keras`\n", 87 | " model = tf.keras.Sequential([tf.keras.layers.Dense(units=1, input_shape=[1])])\n", 88 | " \n", 89 | " # Compile your model\n", 90 | " # Set the optimizer to Stochastic Gradient Descent\n", 91 | " # and use Mean Squared Error as the loss function\n", 92 | " model.compile(optimizer=\"sgd\", loss=\"mean_squared_error\")\n", 93 | " \n", 94 | " # Train your model for 1000 epochs by feeding the i/o tensors\n", 95 | " model.fit(xs, ys, epochs=1000)\n", 96 | " \n", 97 | " ### END CODE HERE\n", 98 | " return model" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Now that you have a function that returns a compiled and trained model when invoked, use it to get the model to predict the price of houses: " 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "deletable": false, 113 | "editable": false, 114 | "scrolled": true, 115 | "tags": [ 116 | "graded" 117 | ] 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# grader-required-cell\n", 122 | "\n", 123 | "# Get your trained model\n", 124 | "model = house_model()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "Now that your model has finished training it is time to test it out! You can do so by running the next cell." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "deletable": false, 139 | "editable": false, 140 | "id": "kMlInDdSBqGK", 141 | "tags": [ 142 | "graded" 143 | ] 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "# grader-required-cell\n", 148 | "\n", 149 | "new_x = 7.0\n", 150 | "prediction = model.predict([new_x])[0]\n", 151 | "print(prediction)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "If everything went as expected you should see a prediction value very close to 4. **If not, try adjusting your code before submitting the assignment.** Notice that you can play around with the value of `new_x` to get different predictions. In general you should see that the network was able to learn the linear relationship between `x` and `y`, so if you use a value of 8.0 you should get a prediction close to 4.5 and so on." 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "**Congratulations on finishing this week's assignment!**\n", 166 | "\n", 167 | "You have successfully coded a neural network that learned the linear relationship between two variables. Nice job!\n", 168 | "\n", 169 | "**Keep it up!**" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "
\n", 177 | " Please click here if you want to experiment with any of the non-graded code.\n", 178 | "

Important Note: Please only do this when you've already passed the assignment to avoid problems with the autograder.\n", 179 | "

    \n", 180 | "
  1. On the notebook’s menu, click “View” > “Cell Toolbar” > “Edit Metadata”
  2. \n", 181 | "
  3. Hit the “Edit Metadata” button next to the code cell which you want to lock/unlock
  4. \n", 182 | "
  5. Set the attribute value for “editable” to:\n", 183 | "
      \n", 184 | "
    • “true” if you want to unlock it
    • \n", 185 | "
    • “false” if you want to lock it
    • \n", 186 | "
    \n", 187 | "
  6. \n", 188 | "
  7. On the notebook’s menu, click “View” > “Cell Toolbar” > “None”
  8. \n", 189 | "
\n", 190 | "

Here's a short demo of how to do the steps above: \n", 191 | "
\n", 192 | " \n", 193 | "

" 194 | ] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.8.8" 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 4 218 | } 219 | -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/3. Sequence models/C3_W3_Lab_6_sarcasm_with_1D_convolutional.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "H0cmyldIoFGK" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "YEdilk144fzb" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Training a Sarcasm Detection Model using a Convolution Layer\n", 19 | "\n", 20 | "You will be doing the same steps here as the previous lab but will be using a convolution layer instead. As usual, try tweaking the parameters and observe how it affects the results.\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "pmokcpHc5u1R" 27 | }, 28 | "source": [ 29 | "## Download the Dataset" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "id": "dxezdGoV29Yz" 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "# Download the dataset\n", 41 | "!wget https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "id": "BTcGA2Po2_nN" 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "import json\n", 53 | "\n", 54 | "# Load the JSON file\n", 55 | "with open(\"./sarcasm.json\", 'r') as f:\n", 56 | " datastore = json.load(f)\n", 57 | "\n", 58 | "# Initialize the lists\n", 59 | "sentences = []\n", 60 | "labels = []\n", 61 | "\n", 62 | "# Collect sentences and labels into the lists\n", 63 | "for item in datastore:\n", 64 | " sentences.append(item['headline'])\n", 65 | " labels.append(item['is_sarcastic'])" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": { 71 | "id": "F2zXSds45s2P" 72 | }, 73 | "source": [ 74 | "## Split the Dataset" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "id": "baDwTn9S3ENB" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "training_size = 20000\n", 86 | "\n", 87 | "# Split the sentences\n", 88 | "training_sentences = sentences[0:training_size]\n", 89 | "testing_sentences = sentences[training_size:]\n", 90 | "\n", 91 | "# Split the labels\n", 92 | "training_labels = labels[0:training_size]\n", 93 | "testing_labels = labels[training_size:]" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "id": "NdpLY-or5pTP" 100 | }, 101 | "source": [ 102 | "## Data preprocessing" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "id": "RHjZR4oi3LOq" 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "import numpy as np\n", 114 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 115 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 116 | "\n", 117 | "vocab_size = 10000\n", 118 | "max_length = 120\n", 119 | "trunc_type='post'\n", 120 | "padding_type='post'\n", 121 | "oov_tok = \"\"\n", 122 | "\n", 123 | "# Initialize the Tokenizer class\n", 124 | "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n", 125 | "\n", 126 | "# Generate the word index dictionary\n", 127 | "tokenizer.fit_on_texts(training_sentences)\n", 128 | "word_index = tokenizer.word_index\n", 129 | "\n", 130 | "# Generate and pad the training sequences\n", 131 | "training_sequences = tokenizer.texts_to_sequences(training_sentences)\n", 132 | "training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 133 | "\n", 134 | "# Generate and pad the testing sequences\n", 135 | "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n", 136 | "testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 137 | "\n", 138 | "# Convert the labels lists into numpy arrays\n", 139 | "training_labels = np.array(training_labels)\n", 140 | "testing_labels = np.array(testing_labels)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "id": "HQBjPv_A5m1x" 147 | }, 148 | "source": [ 149 | "## Build and Compile the Model" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "id": "jGwXGIXvFhXW" 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "import tensorflow as tf\n", 161 | "\n", 162 | "# Parameters\n", 163 | "embedding_dim = 64\n", 164 | "filters = 128\n", 165 | "kernel_size = 5\n", 166 | "dense_dim = 8\n", 167 | "\n", 168 | "# Model Definition with Conv1D\n", 169 | "model_conv = tf.keras.Sequential([\n", 170 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 171 | " tf.keras.layers.Conv1D(filters, kernel_size, activation='relu'),\n", 172 | " tf.keras.layers.GlobalMaxPooling1D(),\n", 173 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 174 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 175 | "])\n", 176 | "\n", 177 | "# Set the training parameters\n", 178 | "model_conv.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 179 | "\n", 180 | "# Print the model summary\n", 181 | "model_conv.summary()" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": { 187 | "id": "PcXC5QG45kM7" 188 | }, 189 | "source": [ 190 | "## Train the Model" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "id": "oB6C55FO3z3q" 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "NUM_EPOCHS = 10\n", 202 | "\n", 203 | "# Train the model\n", 204 | "history_conv = model_conv.fit(training_padded, training_labels, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels))" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "id": "g9DC6dmLF8DC" 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "import matplotlib.pyplot as plt\n", 216 | "\n", 217 | "# Plot Utility\n", 218 | "def plot_graphs(history, string):\n", 219 | " plt.plot(history.history[string])\n", 220 | " plt.plot(history.history['val_'+string])\n", 221 | " plt.xlabel(\"Epochs\")\n", 222 | " plt.ylabel(string)\n", 223 | " plt.legend([string, 'val_'+string])\n", 224 | " plt.show()\n", 225 | "\n", 226 | "# Plot the accuracy and loss history\n", 227 | "plot_graphs(history_conv, 'accuracy')\n", 228 | "plot_graphs(history_conv, 'loss')" 229 | ] 230 | } 231 | ], 232 | "metadata": { 233 | "colab": { 234 | "name": "C3_W3_Lab_6_sarcasm_with_1D_convolutional.ipynb", 235 | "private_outputs": true, 236 | "provenance": [], 237 | "toc_visible": true 238 | }, 239 | "kernelspec": { 240 | "display_name": "Python 3", 241 | "language": "python", 242 | "name": "python3" 243 | }, 244 | "language_info": { 245 | "codemirror_mode": { 246 | "name": "ipython", 247 | "version": 3 248 | }, 249 | "file_extension": ".py", 250 | "mimetype": "text/x-python", 251 | "name": "python", 252 | "nbconvert_exporter": "python", 253 | "pygments_lexer": "ipython3", 254 | "version": "3.7.4" 255 | } 256 | }, 257 | "nbformat": 4, 258 | "nbformat_minor": 0 259 | } -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/3. Sequence models/C3_W3_Lab_5_sarcasm_with_bi_LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "7qbosmJcj7m-" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "Q2MY4-M1zuhV" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Training a Sarcasm Detection Model using Bidirectional LSTMs\n", 19 | "\n", 20 | "In this lab, you will revisit the [News Headlines Dataset for Sarcasm Detection](https://www.kaggle.com/rmisra/news-headlines-dataset-for-sarcasm-detection/home) dataset and use it to train a Bi-LSTM Model.\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "S-AgItE6z80t" 27 | }, 28 | "source": [ 29 | "## Download the Dataset\n", 30 | "\n", 31 | "First, you will download the JSON file and extract the contents into lists." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "id": "k_Wlz9i10Dmn" 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# Download the dataset\n", 43 | "!wget https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "id": "Pr4R0I240GOh" 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "import json\n", 55 | "\n", 56 | "# Load the JSON file\n", 57 | "with open(\"./sarcasm.json\", 'r') as f:\n", 58 | " datastore = json.load(f)\n", 59 | "\n", 60 | "# Initialize the lists\n", 61 | "sentences = []\n", 62 | "labels = []\n", 63 | "\n", 64 | "# Collect sentences and labels into the lists\n", 65 | "for item in datastore:\n", 66 | " sentences.append(item['headline'])\n", 67 | " labels.append(item['is_sarcastic'])" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "id": "zN9-ojV55UCR" 74 | }, 75 | "source": [ 76 | "## Split the Dataset\n", 77 | "\n", 78 | "You will then split the lists into train and test sets." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "id": "50H0ZrJf035i" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "training_size = 20000\n", 90 | "\n", 91 | "# Split the sentences\n", 92 | "training_sentences = sentences[0:training_size]\n", 93 | "testing_sentences = sentences[training_size:]\n", 94 | "\n", 95 | "# Split the labels\n", 96 | "training_labels = labels[0:training_size]\n", 97 | "testing_labels = labels[training_size:]" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": { 103 | "id": "MYVNY4tE5YbN" 104 | }, 105 | "source": [ 106 | "## Data preprocessing\n", 107 | "\n", 108 | "Next, you will generate the vocabulary and padded sequences." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "id": "hodsUZib1Ce7" 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "import numpy as np\n", 120 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 121 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 122 | "\n", 123 | "vocab_size = 10000\n", 124 | "max_length = 120\n", 125 | "trunc_type='post'\n", 126 | "padding_type='post'\n", 127 | "oov_tok = \"\"\n", 128 | "\n", 129 | "# Initialize the Tokenizer class\n", 130 | "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n", 131 | "\n", 132 | "# Generate the word index dictionary\n", 133 | "tokenizer.fit_on_texts(training_sentences)\n", 134 | "word_index = tokenizer.word_index\n", 135 | "\n", 136 | "# Generate and pad the training sequences\n", 137 | "training_sequences = tokenizer.texts_to_sequences(training_sentences)\n", 138 | "training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 139 | "\n", 140 | "# Generate and pad the testing sequences\n", 141 | "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n", 142 | "testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 143 | "\n", 144 | "# Convert the labels lists into numpy arrays\n", 145 | "training_labels = np.array(training_labels)\n", 146 | "testing_labels = np.array(testing_labels)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "id": "o23gJhj95el5" 153 | }, 154 | "source": [ 155 | "## Build and Compile the Model\n", 156 | "\n", 157 | "The architecture here is almost identical to the one you used in the previous lab with the IMDB Reviews. Try to tweak the parameters and see how it affects the training time and accuracy (both training and validation)." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "id": "jGwXGIXvFhXW" 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "import tensorflow as tf\n", 169 | "\n", 170 | "# Parameters\n", 171 | "embedding_dim = 64\n", 172 | "lstm_dim = 64\n", 173 | "dense_dim = 32\n", 174 | "\n", 175 | "# Model Definition with LSTM\n", 176 | "model_lstm = tf.keras.Sequential([\n", 177 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 178 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_dim)),\n", 179 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 180 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 181 | "])\n", 182 | "\n", 183 | "# Set the training parameters\n", 184 | "model_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 185 | "\n", 186 | "# Print the model summary\n", 187 | "model_lstm.summary()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": { 193 | "id": "krcQGm7B5g9A" 194 | }, 195 | "source": [ 196 | "## Train the Model" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": { 203 | "id": "nEKV8EMj11BW" 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "NUM_EPOCHS = 10\n", 208 | "\n", 209 | "# Train the model\n", 210 | "history_lstm = model_lstm.fit(training_padded, training_labels, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels))" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "id": "g9DC6dmLF8DC" 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "import matplotlib.pyplot as plt\n", 222 | "\n", 223 | "# Plot Utility\n", 224 | "def plot_graphs(history, string):\n", 225 | " plt.plot(history.history[string])\n", 226 | " plt.plot(history.history['val_'+string])\n", 227 | " plt.xlabel(\"Epochs\")\n", 228 | " plt.ylabel(string)\n", 229 | " plt.legend([string, 'val_'+string])\n", 230 | " plt.show()\n", 231 | "\n", 232 | "# Plot the accuracy and loss history\n", 233 | "plot_graphs(history_lstm, 'accuracy')\n", 234 | "plot_graphs(history_lstm, 'loss')" 235 | ] 236 | } 237 | ], 238 | "metadata": { 239 | "accelerator": "GPU", 240 | "colab": { 241 | "name": "C3_W3_Lab_5_sarcasm_with_bi_LSTM.ipynb", 242 | "private_outputs": true, 243 | "provenance": [], 244 | "toc_visible": true 245 | }, 246 | "kernelspec": { 247 | "display_name": "Python 3", 248 | "language": "python", 249 | "name": "python3" 250 | }, 251 | "language_info": { 252 | "codemirror_mode": { 253 | "name": "ipython", 254 | "version": 3 255 | }, 256 | "file_extension": ".py", 257 | "mimetype": "text/x-python", 258 | "name": "python", 259 | "nbconvert_exporter": "python", 260 | "pygments_lexer": "ipython3", 261 | "version": "3.7.4" 262 | } 263 | }, 264 | "nbformat": 4, 265 | "nbformat_minor": 0 266 | } -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/3. Sequence models/C3_W3_Lab_3_Conv1D.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "1qZw1162XMO-" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "rFiCyWQ-NC5D" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Using Convolutional Neural Networks\n", 19 | "\n", 20 | "In this lab, you will look at another way of building your text classification model and this will be with a convolution layer. As you learned in Course 2 of this specialization, convolutions extract features by applying filters to the input. Let's see how you can use that for text data in the next sections." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "djvGxIRDHT5e" 27 | }, 28 | "source": [ 29 | "## Download and prepare the dataset" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "id": "Y20Lud2ZMBhW" 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import tensorflow_datasets as tfds\n", 41 | "\n", 42 | "# Download the subword encoded pretokenized dataset\n", 43 | "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n", 44 | "\n", 45 | "# Get the tokenizer\n", 46 | "tokenizer = info.features['text'].encoder" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "id": "AW-4Vo4TMUHb" 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "BUFFER_SIZE = 10000\n", 58 | "BATCH_SIZE = 256\n", 59 | "\n", 60 | "# Get the train and test splits\n", 61 | "train_data, test_data = dataset['train'], dataset['test'],\n", 62 | "\n", 63 | "# Shuffle the training data\n", 64 | "train_dataset = train_data.shuffle(BUFFER_SIZE)\n", 65 | "\n", 66 | "# Batch and pad the datasets to the maximum length of the sequences\n", 67 | "train_dataset = train_dataset.padded_batch(BATCH_SIZE)\n", 68 | "test_dataset = test_data.padded_batch(BATCH_SIZE)\n" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": { 74 | "id": "nfatNr6-IAcd" 75 | }, 76 | "source": [ 77 | "## Build the Model\n", 78 | "\n", 79 | "In Course 2, you were using 2D convolution layers because you were applying it on images. For temporal data such as text sequences, you will use [Conv1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv1D) instead so the convolution will happen over a single dimension. You will also append a pooling layer to reduce the output of the convolution layer. For this lab, you will use [GlobalMaxPooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalMaxPool1D) to get the max value across the time dimension. You can also use average pooling and you will do that in the next labs. See how these layers behave as standalone layers in the cell below." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "id": "Ay87qbqwIJaV" 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "import tensorflow as tf\n", 91 | "import numpy as np\n", 92 | "\n", 93 | "# Hyperparameters\n", 94 | "batch_size = 1\n", 95 | "timesteps = 20\n", 96 | "features = 20\n", 97 | "filters = 128\n", 98 | "kernel_size = 5\n", 99 | "\n", 100 | "print(f'batch_size: {batch_size}')\n", 101 | "print(f'timesteps (sequence length): {timesteps}')\n", 102 | "print(f'features (embedding size): {features}')\n", 103 | "print(f'filters: {filters}')\n", 104 | "print(f'kernel_size: {kernel_size}')\n", 105 | "\n", 106 | "# Define array input with random values\n", 107 | "random_input = np.random.rand(batch_size,timesteps,features)\n", 108 | "print(f'shape of input array: {random_input.shape}')\n", 109 | "\n", 110 | "# Pass array to convolution layer and inspect output shape\n", 111 | "conv1d = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, activation='relu')\n", 112 | "result = conv1d(random_input)\n", 113 | "print(f'shape of conv1d output: {result.shape}')\n", 114 | "\n", 115 | "# Pass array to max pooling layer and inspect output shape\n", 116 | "gmp = tf.keras.layers.GlobalMaxPooling1D()\n", 117 | "result = gmp(result)\n", 118 | "print(f'shape of global max pooling output: {result.shape}')" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": { 124 | "id": "lNNYF7tqO7it" 125 | }, 126 | "source": [ 127 | "You can build the model by simply appending the convolution and pooling layer after the embedding layer as shown below." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "id": "jo1jjO3vn0jo" 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "import tensorflow as tf\n", 139 | "\n", 140 | "# Hyperparameters\n", 141 | "embedding_dim = 64\n", 142 | "filters = 128\n", 143 | "kernel_size = 5\n", 144 | "dense_dim = 64\n", 145 | "\n", 146 | "# Build the model\n", 147 | "model = tf.keras.Sequential([\n", 148 | " tf.keras.layers.Embedding(tokenizer.vocab_size, embedding_dim),\n", 149 | " tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, activation='relu'),\n", 150 | " tf.keras.layers.GlobalMaxPooling1D(),\n", 151 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 152 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 153 | "])\n", 154 | "\n", 155 | "# Print the model summary\n", 156 | "model.summary()" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "id": "Uip7QOVzMoMq" 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "# Set the training parameters\n", 168 | "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": { 174 | "id": "iLJu8HEvPG0L" 175 | }, 176 | "source": [ 177 | "## Train the model\n", 178 | "\n", 179 | "Training will take around 30 seconds per epoch and you will notice that it reaches higher accuracies than the previous models you've built." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "id": "7mlgzaRDMtF6" 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "NUM_EPOCHS = 10\n", 191 | "\n", 192 | "# Train the model\n", 193 | "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": { 200 | "id": "Mp1Z7P9pYRSK" 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "import matplotlib.pyplot as plt\n", 205 | "\n", 206 | "# Plot utility\n", 207 | "def plot_graphs(history, string):\n", 208 | " plt.plot(history.history[string])\n", 209 | " plt.plot(history.history['val_'+string])\n", 210 | " plt.xlabel(\"Epochs\")\n", 211 | " plt.ylabel(string)\n", 212 | " plt.legend([string, 'val_'+string])\n", 213 | " plt.show()\n", 214 | "\n", 215 | "# Plot the accuracy and results\n", 216 | "plot_graphs(history, \"accuracy\")\n", 217 | "plot_graphs(history, \"loss\")" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": { 223 | "id": "0rD7ZS84PlUp" 224 | }, 225 | "source": [ 226 | "## Wrap Up\n", 227 | "\n", 228 | "In this lab, you explored another model architecture you can use for text classification. In the next lessons, you will revisit full word encoding of the IMDB reviews and compare which model works best when the data is prepared that way." 229 | ] 230 | } 231 | ], 232 | "metadata": { 233 | "accelerator": "GPU", 234 | "colab": { 235 | "name": "C3_W3_Lab_3_Conv1D.ipynb", 236 | "private_outputs": true, 237 | "provenance": [], 238 | "toc_visible": true 239 | }, 240 | "kernelspec": { 241 | "display_name": "Python 3", 242 | "language": "python", 243 | "name": "python3" 244 | }, 245 | "language_info": { 246 | "codemirror_mode": { 247 | "name": "ipython", 248 | "version": 3 249 | }, 250 | "file_extension": ".py", 251 | "mimetype": "text/x-python", 252 | "name": "python", 253 | "nbconvert_exporter": "python", 254 | "pygments_lexer": "ipython3", 255 | "version": "3.7.4" 256 | } 257 | }, 258 | "nbformat": 4, 259 | "nbformat_minor": 0 260 | } -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/3. Sequence models/C3_W3_Lab_2_multiple_layer_LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "QklQTyOeH7t_" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "rFiCyWQ-NC5D" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Multiple LSTMs\n", 19 | "\n", 20 | "In this lab, you will look at how to build a model with multiple LSTM layers. Since you know the preceding steps already (e.g. downloading datasets, preparing the data, etc.), we won't expound on it anymore so you can just focus on the model building code." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "xqmDNHeByJqr" 27 | }, 28 | "source": [ 29 | "## Download and Prepare the Dataset" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "id": "AW-4Vo4TMUHb" 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import tensorflow_datasets as tfds\n", 41 | "\n", 42 | "# Download the subword encoded pretokenized dataset\n", 43 | "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n", 44 | "\n", 45 | "# Get the tokenizer\n", 46 | "tokenizer = info.features['text'].encoder" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "id": "fF8bUh_5Ff7y" 53 | }, 54 | "source": [ 55 | "Like the previous lab, we increased the `BATCH_SIZE` here to make the training faster. If you are doing this on your local machine and have a powerful processor, feel free to use the value used in the lecture (i.e. 64) to get the same results as Laurence." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "id": "ffvRUI0_McDS" 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "BUFFER_SIZE = 10000\n", 67 | "BATCH_SIZE = 256\n", 68 | "\n", 69 | "# Get the train and test splits\n", 70 | "train_data, test_data = dataset['train'], dataset['test'],\n", 71 | "\n", 72 | "# Shuffle the training data\n", 73 | "train_dataset = train_data.shuffle(BUFFER_SIZE)\n", 74 | "\n", 75 | "# Batch and pad the datasets to the maximum length of the sequences\n", 76 | "train_dataset = train_dataset.padded_batch(BATCH_SIZE)\n", 77 | "test_dataset = test_data.padded_batch(BATCH_SIZE)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": { 83 | "id": "xcZEiG9ayNZr" 84 | }, 85 | "source": [ 86 | "## Build and Compile the Model\n", 87 | "\n", 88 | "You can build multiple layer LSTM models by simply appending another `LSTM` layer in your `Sequential` model and enabling the `return_sequences` flag to `True`. This is because an `LSTM` layer expects a sequence input so if the previous layer is also an LSTM, then it should output a sequence as well. See the code cell below that demonstrates this flag in action. You'll notice that the output dimension is in 3 dimensions `(batch_size, timesteps, features)` when when `return_sequences` is True." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "id": "18MsI2LU75kH" 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "import tensorflow as tf\n", 100 | "import numpy as np\n", 101 | "\n", 102 | "# Hyperparameters\n", 103 | "batch_size = 1\n", 104 | "timesteps = 20\n", 105 | "features = 16\n", 106 | "lstm_dim = 8\n", 107 | "\n", 108 | "print(f'batch_size: {batch_size}')\n", 109 | "print(f'timesteps (sequence length): {timesteps}')\n", 110 | "print(f'features (embedding size): {features}')\n", 111 | "print(f'lstm output units: {lstm_dim}')\n", 112 | "\n", 113 | "# Define array input with random values\n", 114 | "random_input = np.random.rand(batch_size,timesteps,features)\n", 115 | "print(f'shape of input array: {random_input.shape}')\n", 116 | "\n", 117 | "# Define LSTM that returns a single output\n", 118 | "lstm = tf.keras.layers.LSTM(lstm_dim)\n", 119 | "result = lstm(random_input)\n", 120 | "print(f'shape of lstm output(return_sequences=False): {result.shape}')\n", 121 | "\n", 122 | "# Define LSTM that returns a sequence\n", 123 | "lstm_rs = tf.keras.layers.LSTM(lstm_dim, return_sequences=True)\n", 124 | "result = lstm_rs(random_input)\n", 125 | "print(f'shape of lstm output(return_sequences=True): {result.shape}')" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "id": "6Was3BX6_50C" 132 | }, 133 | "source": [ 134 | "The next cell implements the stacked LSTM architecture." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "id": "VPNwU1SVyTjm" 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "import tensorflow as tf\n", 146 | "\n", 147 | "# Hyperparameters\n", 148 | "embedding_dim = 64\n", 149 | "lstm1_dim = 64\n", 150 | "lstm2_dim = 32\n", 151 | "dense_dim = 64\n", 152 | "\n", 153 | "# Build the model\n", 154 | "model = tf.keras.Sequential([\n", 155 | " tf.keras.layers.Embedding(tokenizer.vocab_size, embedding_dim),\n", 156 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm1_dim, return_sequences=True)),\n", 157 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm2_dim)),\n", 158 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 159 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 160 | "])\n", 161 | "\n", 162 | "# Print the model summary\n", 163 | "model.summary()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "id": "Uip7QOVzMoMq" 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "# Set the training parameters\n", 175 | "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": { 181 | "id": "uh39GlZP79DY" 182 | }, 183 | "source": [ 184 | "## Train the Model\n", 185 | "\n", 186 | "The additional LSTM layer will lengthen the training time compared to the previous lab. Given the default parameters we set, it will take around 2 minutes per epoch with the Colab GPU enabled." 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "id": "7mlgzaRDMtF6" 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "NUM_EPOCHS = 10\n", 198 | "\n", 199 | "# Train the model\n", 200 | "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "id": "Mp1Z7P9pYRSK" 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "import matplotlib.pyplot as plt\n", 212 | "\n", 213 | "# Plot utility\n", 214 | "def plot_graphs(history, string):\n", 215 | " plt.plot(history.history[string])\n", 216 | " plt.plot(history.history['val_'+string])\n", 217 | " plt.xlabel(\"Epochs\")\n", 218 | " plt.ylabel(string)\n", 219 | " plt.legend([string, 'val_'+string])\n", 220 | " plt.show()\n", 221 | "\n", 222 | "# Plot the accuracy and results\n", 223 | "plot_graphs(history, \"accuracy\")\n", 224 | "plot_graphs(history, \"loss\")" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "id": "txQdN63vBlTK" 231 | }, 232 | "source": [ 233 | "## Wrap Up\n", 234 | "\n", 235 | "This lab showed how you can build deep networks by stacking LSTM layers. In the next labs, you will continue exploring other architectures you can use to implement your sentiment classification model." 236 | ] 237 | } 238 | ], 239 | "metadata": { 240 | "accelerator": "GPU", 241 | "colab": { 242 | "name": "C3_W3_Lab_2_multiple_layer_LSTM.ipynb", 243 | "private_outputs": true, 244 | "provenance": [], 245 | "toc_visible": true 246 | }, 247 | "kernelspec": { 248 | "display_name": "Python 3", 249 | "language": "python", 250 | "name": "python3" 251 | }, 252 | "language_info": { 253 | "codemirror_mode": { 254 | "name": "ipython", 255 | "version": 3 256 | }, 257 | "file_extension": ".py", 258 | "mimetype": "text/x-python", 259 | "name": "python", 260 | "nbconvert_exporter": "python", 261 | "pygments_lexer": "ipython3", 262 | "version": "3.7.4" 263 | } 264 | }, 265 | "nbformat": 4, 266 | "nbformat_minor": 0 267 | } -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/3. Sequence models/C3_W3_Lab_1_single_layer_LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "UR4bZpqr_0Pj" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "rFiCyWQ-NC5D" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Single Layer LSTM\n", 19 | "\n", 20 | "So far in this course, you've been using mostly basic dense layers and embeddings to build your models. It detects how the combination of words (or subwords) in the input text determines the output class. In the labs this week, you will look at other layers you can use to build your models. Most of these will deal with *Recurrent Neural Networks*, a kind of model that takes the ordering of inputs into account. This makes it suitable for different applications such as parts-of-speech tagging, music composition, language translation, and the like. For example, you may want your model to differentiate sentiments even if the words used in two sentences are the same:\n", 21 | "\n", 22 | "```\n", 23 | "1: My friends do like the movie but I don't. --> negative review\n", 24 | "2: My friends don't like the movie but I do. --> positive review\n", 25 | "```\n", 26 | "\n", 27 | "The first layer you will be looking at is the [*LSTM (Long Short-Term Memory)*](https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM). In a nutshell, it computes the state of a current timestep and passes it on to the next timesteps where this state is also updated. The process repeats until the final timestep where the output computation is affected by all previous states. Not only that, it can be configured to be bidirectional so you can get the relationship of later words to earlier ones. If you want to go in-depth of how these processes work, you can look at the [Sequence Models](https://www.coursera.org/learn/nlp-sequence-models) course of the Deep Learning Specialization. For this lab, you can take advantage of Tensorflow's APIs that implements the complexities of these layers for you. This makes it easy to just plug it in to your model. Let's see how to do that in the next sections below." 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "tfp2tBZYnE5b" 34 | }, 35 | "source": [ 36 | "## Download the dataset\n", 37 | "\n", 38 | "For this lab, you will use the `subwords8k` pre-tokenized [IMDB Reviews dataset](https://www.tensorflow.org/datasets/catalog/imdb_reviews). You will load it via Tensorflow Datasets as you've done last week:" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "id": "AW-4Vo4TMUHb" 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import tensorflow_datasets as tfds\n", 50 | "\n", 51 | "# Download the subword encoded pretokenized dataset\n", 52 | "dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)\n", 53 | "\n", 54 | "# Get the tokenizer\n", 55 | "tokenizer = info.features['text'].encoder" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "YfL_2x3SoXeu" 62 | }, 63 | "source": [ 64 | "## Prepare the dataset\n", 65 | "\n", 66 | "You can then get the train and test splits and generate padded batches.\n", 67 | "\n", 68 | "*Note: To make the training go faster in this lab, you will increase the batch size that Laurence used in the lecture. In particular, you will use `256` and this takes roughly a minute to train per epoch. In the video, Laurence used `16` which takes around 4 minutes per epoch.*" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "id": "ffvRUI0_McDS" 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "BUFFER_SIZE = 10000\n", 80 | "BATCH_SIZE = 256\n", 81 | "\n", 82 | "# Get the train and test splits\n", 83 | "train_data, test_data = dataset['train'], dataset['test'],\n", 84 | "\n", 85 | "# Shuffle the training data\n", 86 | "train_dataset = train_data.shuffle(BUFFER_SIZE)\n", 87 | "\n", 88 | "# Batch and pad the datasets to the maximum length of the sequences\n", 89 | "train_dataset = train_dataset.padded_batch(BATCH_SIZE)\n", 90 | "test_dataset = test_data.padded_batch(BATCH_SIZE)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": { 96 | "id": "4HkUeYNWoi9j" 97 | }, 98 | "source": [ 99 | "## Build and compile the model\n", 100 | "\n", 101 | "Now you will build the model. You will simply swap the `Flatten` or `GlobalAveragePooling1D` from before with an `LSTM` layer. Moreover, you will nest it inside a [Biderectional](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Bidirectional) layer so the passing of the sequence information goes both forwards and backwards. These additional computations will naturally make the training go slower than the models you built last week. You should take this into account when using RNNs in your own applications." 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "id": "FxQooMEkMgur" 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "import tensorflow as tf\n", 113 | "\n", 114 | "# Hyperparameters\n", 115 | "embedding_dim = 64\n", 116 | "lstm_dim = 64\n", 117 | "dense_dim = 64\n", 118 | "\n", 119 | "# Build the model\n", 120 | "model = tf.keras.Sequential([\n", 121 | " tf.keras.layers.Embedding(tokenizer.vocab_size, embedding_dim),\n", 122 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_dim)),\n", 123 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 124 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 125 | "])\n", 126 | "\n", 127 | "# Print the model summary\n", 128 | "model.summary()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "id": "Uip7QOVzMoMq" 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "# Set the training parameters\n", 140 | "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "id": "EEKm-MzDs59w" 147 | }, 148 | "source": [ 149 | "## Train the model\n", 150 | "\n", 151 | "Now you can start training. Using the default parameters above, you should reach around 98% training accuracy and 82% validation accuracy. You can visualize the results using the same plot utilities. See if you can still improve on this by modifying the hyperparameters or by training with more epochs." 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "id": "7mlgzaRDMtF6" 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "NUM_EPOCHS = 10\n", 163 | "\n", 164 | "history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "id": "Mp1Z7P9pYRSK" 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "import matplotlib.pyplot as plt\n", 176 | "\n", 177 | "# Plot utility\n", 178 | "def plot_graphs(history, string):\n", 179 | " plt.plot(history.history[string])\n", 180 | " plt.plot(history.history['val_'+string])\n", 181 | " plt.xlabel(\"Epochs\")\n", 182 | " plt.ylabel(string)\n", 183 | " plt.legend([string, 'val_'+string])\n", 184 | " plt.show()\n", 185 | "\n", 186 | "# Plot the accuracy and results\n", 187 | "plot_graphs(history, \"accuracy\")\n", 188 | "plot_graphs(history, \"loss\")" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": { 194 | "id": "c1pnGOV9ur9Y" 195 | }, 196 | "source": [ 197 | "## Wrap Up\n", 198 | "\n", 199 | "In this lab, you got a first look at using LSTM layers to build Recurrent Neural Networks. You only used a single LSTM layer but this can be stacked as well to build deeper networks. You will see how to do that in the next lab." 200 | ] 201 | } 202 | ], 203 | "metadata": { 204 | "accelerator": "GPU", 205 | "colab": { 206 | "name": "C3_W3_Lab_1_single_layer_LSTM.ipynb", 207 | "private_outputs": true, 208 | "provenance": [], 209 | "toc_visible": true 210 | }, 211 | "kernelspec": { 212 | "display_name": "Python 3", 213 | "language": "python", 214 | "name": "python3" 215 | }, 216 | "language_info": { 217 | "codemirror_mode": { 218 | "name": "ipython", 219 | "version": 3 220 | }, 221 | "file_extension": ".py", 222 | "mimetype": "text/x-python", 223 | "name": "python", 224 | "nbconvert_exporter": "python", 225 | "pygments_lexer": "ipython3", 226 | "version": "3.7.4" 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 0 231 | } -------------------------------------------------------------------------------- /1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/1. A New Programming Paradigm/C1_W1_Lab_1_hello_world_nn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "ZeBWo2khG0Cd" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "ZIAkIlfmCe1B" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: The Hello World of Deep Learning with Neural Networks" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "fA93WUy1zzWf" 25 | }, 26 | "source": [ 27 | "Like every first app, you should start with something super simple that shows the overall scaffolding for how your code works. In the case of creating neural networks, one simple case is where it learns the relationship between two numbers. So, for example, if you were writing code for a function like this, you already know the 'rules':\n", 28 | "\n", 29 | "\n", 30 | "```\n", 31 | "def hw_function(x):\n", 32 | " y = (2 * x) - 1\n", 33 | " return y\n", 34 | "```\n", 35 | "\n", 36 | "So how would you train a neural network to do the equivalent task? By using data! By feeding it with a set of x's and y's, it should be able to figure out the relationship between them.\n", 37 | "\n", 38 | "This is obviously a very different paradigm from what you might be used to. So let's step through it piece by piece.\n" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "id": "DzbtdRcZDO9B" 45 | }, 46 | "source": [ 47 | "## Imports\n", 48 | "\n", 49 | "Let's start with the imports. Here, you are importing [TensorFlow](https://www.tensorflow.org/) and calling it `tf` for convention and ease of use.\n", 50 | "\n", 51 | "You then import a library called [`numpy`](https://numpy.org) which helps to represent data as arrays easily and to optimize numerical operations.\n", 52 | "\n", 53 | "The framework you will use to build a neural network as a sequence of layers is called [`keras`](https://keras.io/) so you will import that too.\n" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "id": "X9uIpOS2zx7k" 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "import tensorflow as tf\n", 65 | "import numpy as np\n", 66 | "from tensorflow import keras\n", 67 | "\n", 68 | "print(tf.__version__)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": { 74 | "id": "wwJGmDrQ0EoB" 75 | }, 76 | "source": [ 77 | "## Define and Compile the Neural Network\n", 78 | "\n", 79 | "Next, you will create the simplest possible neural network. It has 1 layer with 1 neuron, and the input shape to it is just 1 value. You will build this model using Keras' [Sequential](https://keras.io/api/models/sequential/) class which allows you to define the network as a sequence of [layers](https://keras.io/api/layers/). You can use a single [Dense](https://keras.io/api/layers/core_layers/dense/) layer to build this simple network as shown below." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "id": "kQFAr_xo0M4T" 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "# Build a simple Sequential model\n", 91 | "model = tf.keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": { 97 | "id": "KhjZjZ-c0Ok9" 98 | }, 99 | "source": [ 100 | "Now, you will compile the neural network. When you do so, you have to specify 2 functions: a [loss](https://keras.io/api/losses/) and an [optimizer](https://keras.io/api/optimizers/).\n", 101 | "\n", 102 | "If you've seen lots of math for machine learning, here's where it's usually used. But in this case, it's nicely encapsulated in functions and classes for you. But what happens here? Let's explain...\n", 103 | "\n", 104 | "You know that in the function declared at the start of this notebook, the relationship between the numbers is `y=2x-1`. When the computer is trying to 'learn' that, it makes a guess... maybe `y=10x+10`. The `loss` function measures the guessed answers against the known correct answers and measures how well or how badly it did.\n", 105 | "\n", 106 | "It then uses the `optimizer` function to make another guess. Based on how the loss function went, it will try to minimize the loss. At that point maybe it will come up with something like `y=5x+5`, which, while still pretty bad, is closer to the correct result (i.e. the loss is lower).\n", 107 | "\n", 108 | "It will repeat this for the number of _epochs_ which you will see shortly. But first, here's how you will tell it to use [mean squared error](https://keras.io/api/losses/regression_losses/#meansquarederror-function) for the loss and [stochastic gradient descent](https://keras.io/api/optimizers/sgd/) for the optimizer. You don't need to understand the math for these yet, but you can see that they work!\n", 109 | "\n", 110 | "Over time, you will learn the different and appropriate loss and optimizer functions for different scenarios.\n" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "id": "m8YQN1H41L-Y" 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# Compile the model\n", 122 | "model.compile(optimizer='sgd', loss='mean_squared_error')" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": { 128 | "id": "5QyOUhFw1OUX" 129 | }, 130 | "source": [ 131 | "## Providing the Data\n", 132 | "\n", 133 | "Next up, you will feed in some data. In this case, you are taking 6 X's and 6 Y's. You can see that the relationship between these is `y=2x-1`, so where `x = -1`, `y=-3` etc.\n", 134 | "\n", 135 | "The de facto standard way of declaring model inputs and outputs is to use `numpy`, a Python library that provides lots of array type data structures. You can specify these values by building numpy arrays with [`np.array()`](https://numpy.org/doc/stable/reference/generated/numpy.array.html)." 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "id": "4Dxk4q-jzEy4" 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "# Declare model inputs and outputs for training\n", 147 | "xs = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)\n", 148 | "ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": { 154 | "id": "n_YcWRElnM_b" 155 | }, 156 | "source": [ 157 | "# Training the Neural Network\n", 158 | "\n", 159 | "The process of training the neural network, where it 'learns' the relationship between the x's and y's is in the [`model.fit()`](https://keras.io/api/models/model_training_apis/#fit-method) call. This is where it will go through the loop we spoke about above: making a guess, measuring how good or bad it is (aka the loss), using the optimizer to make another guess etc. It will do it for the number of `epochs` you specify. When you run this code, you'll see the loss on the right hand side." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "id": "lpRrl7WK10Pq" 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "# Train the model\n", 171 | "model.fit(xs, ys, epochs=500)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": { 177 | "id": "kaFIr71H2OZ-" 178 | }, 179 | "source": [ 180 | "Ok, now you have a model that has been trained to learn the relationship between `x` and `y`. You can use the [`model.predict()`](https://keras.io/api/models/model_training_apis/#predict-method) method to have it figure out the `y` for a previously unknown `x`. So, for example, if `x=10`, what do you think `y` will be? Take a guess before you run this code:" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "id": "oxNzL4lS2Gui" 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "# Make a prediction\n", 192 | "print(model.predict([10.0]))" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": { 198 | "id": "btF2CSFH2iEX" 199 | }, 200 | "source": [ 201 | "You might have thought `19`, right? But it ended up being a little under. Why do you think that is?\n", 202 | "\n", 203 | "Remember that neural networks deal with probabilities. So given the data that we fed the model with, it calculated that there is a very high probability that the relationship between `x` and `y` is `y=2x-1`, but with only 6 data points we can't know for sure. As a result, the result for 10 is very close to 19, but not necessarily 19.\n", 204 | "\n", 205 | "As you work with neural networks, you'll see this pattern recurring. You will almost always deal with probabilities, not certainties, and will do a little bit of coding to figure out what the result is based on the probabilities, particularly when it comes to classification.\n" 206 | ] 207 | } 208 | ], 209 | "metadata": { 210 | "colab": { 211 | "name": "C1_W1_Lab_1_hello_world_nn.ipynb", 212 | "private_outputs": true, 213 | "provenance": [], 214 | "toc_visible": true 215 | }, 216 | "kernelspec": { 217 | "display_name": "Python 3", 218 | "language": "python", 219 | "name": "python3" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.7.4" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 0 236 | } -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/2. Augmentation A technique to avoid overfitting/C2_W2_Lab_2_horses_v_humans_augmentation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "zZYwDmMvnSHF" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "37v_yExZppEp" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Data Augmentation on the Horses or Humans Dataset\n", 19 | "\n", 20 | "In the previous lab, you saw how data augmentation helped improve the model's performance on unseen data. By tweaking the cat and dog training images, the model was able to learn features that are also representative of the validation data. However, applying data augmentation requires good understanding of your dataset. Simply transforming it randomly will not always yield good results.\n", 21 | "\n", 22 | "In the next cells, you will apply the same techniques to the `Horses or Humans` dataset and analyze the results." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "id": "Lslf0vB3rQlU" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "# Download the training set\n", 34 | "!wget https://storage.googleapis.com/tensorflow-1-public/course2/week3/horse-or-human.zip" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "id": "bTfUYJhTnSHK" 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "# Download the validation set\n", 46 | "!wget https://storage.googleapis.com/tensorflow-1-public/course2/week3/validation-horse-or-human.zip" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "id": "RXZT2UsyIVe_" 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "import os\n", 58 | "import zipfile\n", 59 | "\n", 60 | "# Extract the archive\n", 61 | "zip_ref = zipfile.ZipFile('./horse-or-human.zip', 'r')\n", 62 | "zip_ref.extractall('tmp/horse-or-human')\n", 63 | "\n", 64 | "zip_ref = zipfile.ZipFile('./validation-horse-or-human.zip', 'r')\n", 65 | "zip_ref.extractall('tmp/validation-horse-or-human')\n", 66 | "\n", 67 | "zip_ref.close()\n", 68 | "\n", 69 | "# Directory with training horse pictures\n", 70 | "train_horse_dir = os.path.join('tmp/horse-or-human/horses')\n", 71 | "\n", 72 | "# Directory with training human pictures\n", 73 | "train_human_dir = os.path.join('tmp/horse-or-human/humans')\n", 74 | "\n", 75 | "# Directory with validation horse pictures\n", 76 | "validation_horse_dir = os.path.join('tmp/validation-horse-or-human/horses')\n", 77 | "\n", 78 | "# Directory with validation human pictures\n", 79 | "validation_human_dir = os.path.join('tmp/validation-horse-or-human/humans')" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "id": "PixZ2s5QbYQ3" 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "import tensorflow as tf\n", 91 | "\n", 92 | "# Build the model\n", 93 | "model = tf.keras.models.Sequential([\n", 94 | " # Note the input shape is the desired size of the image 300x300 with 3 bytes color\n", 95 | " # This is the first convolution\n", 96 | " tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(300, 300, 3)),\n", 97 | " tf.keras.layers.MaxPooling2D(2, 2),\n", 98 | " # The second convolution\n", 99 | " tf.keras.layers.Conv2D(32, (3,3), activation='relu'),\n", 100 | " tf.keras.layers.MaxPooling2D(2,2),\n", 101 | " # The third convolution\n", 102 | " tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n", 103 | " tf.keras.layers.MaxPooling2D(2,2),\n", 104 | " # The fourth convolution\n", 105 | " tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n", 106 | " tf.keras.layers.MaxPooling2D(2,2),\n", 107 | " # The fifth convolution\n", 108 | " tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n", 109 | " tf.keras.layers.MaxPooling2D(2,2),\n", 110 | " # Flatten the results to feed into a DNN\n", 111 | " tf.keras.layers.Flatten(),\n", 112 | " # 512 neuron hidden layer\n", 113 | " tf.keras.layers.Dense(512, activation='relu'),\n", 114 | " # Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('horses') and 1 for the other ('humans')\n", 115 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 116 | "])" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "id": "8DHWhFP_uhq3" 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "from tensorflow.keras.optimizers import RMSprop\n", 128 | "\n", 129 | "# Set training parameters\n", 130 | "model.compile(loss='binary_crossentropy',\n", 131 | " optimizer=RMSprop(learning_rate=1e-4),\n", 132 | " metrics=['accuracy'])" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "id": "ClebU9NJg99G" 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", 144 | "\n", 145 | "# Apply data augmentation\n", 146 | "train_datagen = ImageDataGenerator(\n", 147 | " rescale=1./255,\n", 148 | " rotation_range=40,\n", 149 | " width_shift_range=0.2,\n", 150 | " height_shift_range=0.2,\n", 151 | " shear_range=0.2,\n", 152 | " zoom_range=0.2,\n", 153 | " horizontal_flip=True,\n", 154 | " fill_mode='nearest')\n", 155 | "\n", 156 | "validation_datagen = ImageDataGenerator(rescale=1/255)\n", 157 | "\n", 158 | "# Flow training images in batches of 128 using train_datagen generator\n", 159 | "train_generator = train_datagen.flow_from_directory(\n", 160 | " 'tmp/horse-or-human/', # This is the source directory for training images\n", 161 | " target_size=(300, 300), # All images will be resized to 300x300\n", 162 | " batch_size=128,\n", 163 | " # Since we use binary_crossentropy loss, we need binary labels\n", 164 | " class_mode='binary')\n", 165 | "\n", 166 | "# Flow training images in batches of 128 using train_datagen generator\n", 167 | "validation_generator = validation_datagen.flow_from_directory(\n", 168 | " 'tmp/validation-horse-or-human/', # This is the source directory for validation images\n", 169 | " target_size=(300, 300), # All images will be resized to 300x300\n", 170 | " batch_size=32,\n", 171 | " # Since we use binary_crossentropy loss, we need binary labels\n", 172 | " class_mode='binary')" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "id": "Fb1_lgobv81m" 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "# Constant for epochs\n", 184 | "EPOCHS = 20\n", 185 | "\n", 186 | "# Train the model\n", 187 | "history = model.fit(\n", 188 | " train_generator,\n", 189 | " steps_per_epoch=8,\n", 190 | " epochs=EPOCHS,\n", 191 | " verbose=1,\n", 192 | " validation_data = validation_generator,\n", 193 | " validation_steps=8)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": { 200 | "id": "7zNPRWOVJdOH" 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "import matplotlib.pyplot as plt\n", 205 | "\n", 206 | "# Plot the model results\n", 207 | "acc = history.history['accuracy']\n", 208 | "val_acc = history.history['val_accuracy']\n", 209 | "loss = history.history['loss']\n", 210 | "val_loss = history.history['val_loss']\n", 211 | "\n", 212 | "epochs = range(len(acc))\n", 213 | "\n", 214 | "plt.plot(epochs, acc, 'r', label='Training accuracy')\n", 215 | "plt.plot(epochs, val_acc, 'b', label='Validation accuracy')\n", 216 | "plt.title('Training and validation accuracy')\n", 217 | "\n", 218 | "plt.figure()\n", 219 | "\n", 220 | "plt.plot(epochs, loss, 'r', label='Training Loss')\n", 221 | "plt.plot(epochs, val_loss, 'b', label='Validation Loss')\n", 222 | "plt.title('Training and validation loss')\n", 223 | "plt.legend()\n", 224 | "\n", 225 | "plt.show()" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": { 231 | "id": "hwyabYvCsvtn" 232 | }, 233 | "source": [ 234 | "As you can see in the results, the preprocessing techniques used in augmenting the data did not help much in the results. The validation accuracy is fluctuating and not trending up like the training accuracy. This might be because the additional training data generated still do not represent the features in the validation data. For example, some human or horse poses in the validation set cannot be mimicked by the image processing techniques that `ImageDataGenerator` provides. It might also be that the background of the training images are also learned so the white background of the validation set is throwing the model off even with cropping. Try looking at the validation images in the `tmp/validation-horse-or-human` directory (note: if you are using Colab, you can use the file explorer on the left to explore the images) and see if you can augment the training images to match its characteristics. If this is not possible, then at this point you can consider other techniques and you will see that in next week's lessons." 235 | ] 236 | } 237 | ], 238 | "metadata": { 239 | "accelerator": "GPU", 240 | "colab": { 241 | "name": "C2_W2_Lab_2_horses_v_humans_augmentation.ipynb", 242 | "private_outputs": true, 243 | "provenance": [] 244 | }, 245 | "kernelspec": { 246 | "display_name": "Python 3", 247 | "language": "python", 248 | "name": "python3" 249 | }, 250 | "language_info": { 251 | "codemirror_mode": { 252 | "name": "ipython", 253 | "version": 3 254 | }, 255 | "file_extension": ".py", 256 | "mimetype": "text/x-python", 257 | "name": "python", 258 | "nbconvert_exporter": "python", 259 | "pygments_lexer": "ipython3", 260 | "version": "3.7.4" 261 | } 262 | }, 263 | "nbformat": 4, 264 | "nbformat_minor": 0 265 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /1. Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning/2. Introduction to Computer Vision/C1W2_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "_2s0EJ5Fy4u2" 7 | }, 8 | "source": [ 9 | "# Week 2: Implementing Callbacks in TensorFlow using the MNIST Dataset\n", 10 | "\n", 11 | "In the course you learned how to do classification using Fashion MNIST, a data set containing items of clothing. There's another, similar dataset called MNIST which has items of handwriting -- the digits 0 through 9.\n", 12 | "\n", 13 | "Write an MNIST classifier that trains to 99% accuracy and stops once this threshold is achieved. In the lecture you saw how this was done for the loss but here you will be using accuracy instead.\n", 14 | "\n", 15 | "Some notes:\n", 16 | "1. Your network should succeed in less than 9 epochs.\n", 17 | "2. When it reaches 99% or greater it should print out the string \"Reached 99% accuracy so cancelling training!\" and stop training.\n", 18 | "3. If you add any additional variables, make sure you use the same names as the ones used in the class. This is important for the function signatures (the parameters and names) of the callbacks." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 10, 24 | "metadata": { 25 | "deletable": false, 26 | "editable": false 27 | }, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "\u001b[32mGrader metadata detected! You can proceed with the lab!\u001b[0m\r\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "# IMPORTANT: This will check your notebook's metadata for grading.\n", 39 | "# Please do not continue the lab unless the output of this cell tells you to proceed. \n", 40 | "!python add_metadata.py --filename C1W2_Assignment.ipynb" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "_**NOTE:** To prevent errors from the autograder, you are not allowed to edit or delete non-graded cells in this notebook . Please only put your solutions in between the `### START CODE HERE` and `### END CODE HERE` code comments, and also refrain from adding any new cells. **Once you have passed this assignment** and want to experiment with any of the non-graded code, you may follow the instructions at the bottom of this notebook._" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 11, 53 | "metadata": { 54 | "deletable": false, 55 | "editable": false, 56 | "id": "djVOgMHty4u3", 57 | "tags": [ 58 | "graded" 59 | ] 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "# grader-required-cell\n", 64 | "\n", 65 | "import os\n", 66 | "import tensorflow as tf\n", 67 | "from tensorflow import keras" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Load and inspect the data\n", 75 | "\n", 76 | "Begin by loading the data. A couple of things to notice:\n", 77 | "\n", 78 | "- The file `mnist.npz` is already included in the current workspace under the `data` directory. By default the `load_data` from Keras accepts a path relative to `~/.keras/datasets` but in this case it is stored somewhere else, as a result of this, you need to specify the full path.\n", 79 | "\n", 80 | "- `load_data` returns the train and test sets in the form of the tuples `(x_train, y_train), (x_test, y_test)` but in this exercise you will be needing only the train set so you can ignore the second tuple." 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 12, 86 | "metadata": { 87 | "deletable": false, 88 | "editable": false, 89 | "tags": [ 90 | "graded" 91 | ] 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "# grader-required-cell\n", 96 | "\n", 97 | "# Load the data\n", 98 | "\n", 99 | "# Get current working directory\n", 100 | "current_dir = os.getcwd()\n", 101 | "\n", 102 | "# Append data/mnist.npz to the previous path to get the full path\n", 103 | "data_path = os.path.join(current_dir, \"data/mnist.npz\")\n", 104 | "\n", 105 | "# Discard test set\n", 106 | "(x_train, y_train), _ = tf.keras.datasets.mnist.load_data(path=data_path)\n", 107 | " \n", 108 | "# Normalize pixel values\n", 109 | "x_train = x_train / 255.0" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Now take a look at the shape of the training data:" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 13, 122 | "metadata": { 123 | "deletable": false, 124 | "editable": false, 125 | "tags": [ 126 | "graded" 127 | ] 128 | }, 129 | "outputs": [ 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "There are 60000 examples with shape (28, 28)\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "# grader-required-cell\n", 140 | "\n", 141 | "data_shape = x_train.shape\n", 142 | "\n", 143 | "print(f\"There are {data_shape[0]} examples with shape ({data_shape[1]}, {data_shape[2]})\")" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "## Defining your callback\n", 151 | "\n", 152 | "Now it is time to create your own custom callback. For this complete the `myCallback` class and the `on_epoch_end` method in the cell below. If you need some guidance on how to proceed, check out this [link](https://www.tensorflow.org/guide/keras/custom_callback)." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 17, 158 | "metadata": { 159 | "deletable": false, 160 | "tags": [ 161 | "graded" 162 | ] 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# grader-required-cell\n", 167 | "\n", 168 | "# GRADED CLASS: myCallback\n", 169 | "### START CODE HERE\n", 170 | "\n", 171 | "# Remember to inherit from the correct class\n", 172 | "class myCallback(keras.callbacks.Callback):\n", 173 | " # Define the correct function signature for on_epoch_end\n", 174 | " def on_epoch_end(self, epoch, logs={}):\n", 175 | " if logs.get('accuracy') is not None and logs.get('accuracy') > 0.99: \n", 176 | " print(\"\\nReached 99% accuracy so cancelling training!\")\n", 177 | " \n", 178 | " # Stop training once the above condition is met\n", 179 | " self.model.stop_training = True\n", 180 | "\n", 181 | "### END CODE HERE\n", 182 | "\n" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "## Create and train your model\n", 190 | "\n", 191 | "Now that you have defined your callback it is time to complete the `train_mnist` function below. \n", 192 | "\n", 193 | "**You must set your model to train for 10 epochs and the callback should fire before the 9th epoch for you to pass this assignment.**\n", 194 | "\n", 195 | "**Hint:**\n", 196 | "- Feel free to try the architecture for the neural network that you see fit but in case you need extra help you can check out an architecture that works pretty well at the end of this notebook." 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 15, 202 | "metadata": { 203 | "deletable": false, 204 | "id": "rEHcB3kqyHZ6", 205 | "tags": [ 206 | "graded" 207 | ] 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "# grader-required-cell\n", 212 | "\n", 213 | "# GRADED FUNCTION: train_mnist\n", 214 | "def train_mnist(x_train, y_train):\n", 215 | "\n", 216 | " ### START CODE HERE\n", 217 | " \n", 218 | " # Instantiate the callback class\n", 219 | " callbacks = myCallback()\n", 220 | " \n", 221 | " # Define the model\n", 222 | " model = tf.keras.models.Sequential([ \n", 223 | " keras.layers.Flatten(input_shape=(28,28)),\n", 224 | " keras.layers.Dense(512, activation=tf.nn.relu),\n", 225 | " keras.layers.Dense(10, activation=tf.nn.softmax),\n", 226 | " ]) \n", 227 | " \n", 228 | " # Compile the model\n", 229 | " model.compile(optimizer='adam', \n", 230 | " loss='sparse_categorical_crossentropy', \n", 231 | " metrics=['accuracy']) \n", 232 | " \n", 233 | " # Fit the model for 10 epochs adding the callbacks\n", 234 | " # and save the training history\n", 235 | " history = model.fit(x_train, y_train, epochs=10, callbacks=[callbacks])\n", 236 | "\n", 237 | " ### END CODE HERE\n", 238 | "\n", 239 | " return history" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "Call the `train_mnist` passing in the appropiate parameters to get the training history:" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 18, 252 | "metadata": { 253 | "deletable": false, 254 | "editable": false, 255 | "id": "sFgpwbGly4u4", 256 | "tags": [ 257 | "graded" 258 | ] 259 | }, 260 | "outputs": [ 261 | { 262 | "name": "stdout", 263 | "output_type": "stream", 264 | "text": [ 265 | "Epoch 1/10\n", 266 | "1875/1875 [==============================] - 8s 4ms/step - loss: 0.1992 - accuracy: 0.9416\n", 267 | "Epoch 2/10\n", 268 | "1875/1875 [==============================] - 8s 4ms/step - loss: 0.0799 - accuracy: 0.9752\n", 269 | "Epoch 3/10\n", 270 | "1875/1875 [==============================] - 8s 4ms/step - loss: 0.0519 - accuracy: 0.9842\n", 271 | "Epoch 4/10\n", 272 | "1875/1875 [==============================] - 8s 4ms/step - loss: 0.0372 - accuracy: 0.9883\n", 273 | "Epoch 5/10\n", 274 | "1873/1875 [============================>.] - ETA: 0s - loss: 0.0271 - accuracy: 0.9911\n", 275 | "Reached 99% accuracy so cancelling training!\n", 276 | "1875/1875 [==============================] - 8s 4ms/step - loss: 0.0271 - accuracy: 0.9911\n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "# grader-required-cell\n", 282 | "\n", 283 | "hist = train_mnist(x_train, y_train)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "If you see the message `Reached 99% accuracy so cancelling training!` printed out after less than 9 epochs it means your callback worked as expected. " 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "## Need more help?\n", 298 | "\n", 299 | "Run the following cell to see an architecture that works well for the problem at hand:" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "# WE STRONGLY RECOMMEND YOU TO TRY YOUR OWN ARCHITECTURES FIRST\n", 309 | "# AND ONLY RUN THIS CELL IF YOU WISH TO SEE AN ANSWER\n", 310 | "\n", 311 | "import base64\n", 312 | "\n", 313 | "encoded_answer = \"CiAgIC0gQSBGbGF0dGVuIGxheWVyIHRoYXQgcmVjZWl2ZXMgaW5wdXRzIHdpdGggdGhlIHNhbWUgc2hhcGUgYXMgdGhlIGltYWdlcwogICAtIEEgRGVuc2UgbGF5ZXIgd2l0aCA1MTIgdW5pdHMgYW5kIFJlTFUgYWN0aXZhdGlvbiBmdW5jdGlvbgogICAtIEEgRGVuc2UgbGF5ZXIgd2l0aCAxMCB1bml0cyBhbmQgc29mdG1heCBhY3RpdmF0aW9uIGZ1bmN0aW9uCg==\"\n", 314 | "encoded_answer = encoded_answer.encode('ascii')\n", 315 | "answer = base64.b64decode(encoded_answer)\n", 316 | "answer = answer.decode('ascii')\n", 317 | "\n", 318 | "print(answer)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "**Congratulations on finishing this week's assignment!**\n", 326 | "\n", 327 | "You have successfully implemented a callback that gives you more control over the training loop for your model. Nice job!\n", 328 | "\n", 329 | "**Keep it up!**" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "
\n", 337 | " Please click here if you want to experiment with any of the non-graded code.\n", 338 | "

Important Note: Please only do this when you've already passed the assignment to avoid problems with the autograder.\n", 339 | "

    \n", 340 | "
  1. On the notebook’s menu, click “View” > “Cell Toolbar” > “Edit Metadata”
  2. \n", 341 | "
  3. Hit the “Edit Metadata” button next to the code cell which you want to lock/unlock
  4. \n", 342 | "
  5. Set the attribute value for “editable” to:\n", 343 | "
      \n", 344 | "
    • “true” if you want to unlock it
    • \n", 345 | "
    • “false” if you want to lock it
    • \n", 346 | "
    \n", 347 | "
  6. \n", 348 | "
  7. On the notebook’s menu, click “View” > “Cell Toolbar” > “None”
  8. \n", 349 | "
\n", 350 | "

Here's a short demo of how to do the steps above: \n", 351 | "
\n", 352 | " \n", 353 | "

" 354 | ] 355 | } 356 | ], 357 | "metadata": { 358 | "kernelspec": { 359 | "display_name": "Python 3", 360 | "language": "python", 361 | "name": "python3" 362 | }, 363 | "language_info": { 364 | "codemirror_mode": { 365 | "name": "ipython", 366 | "version": 3 367 | }, 368 | "file_extension": ".py", 369 | "mimetype": "text/x-python", 370 | "name": "python", 371 | "nbconvert_exporter": "python", 372 | "pygments_lexer": "ipython3", 373 | "version": "3.8.8" 374 | } 375 | }, 376 | "nbformat": 4, 377 | "nbformat_minor": 1 378 | } 379 | -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/3. Transfer Learning/C2_W3_Lab_1_transfer_learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "rTjsSu5O7kqz" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "bT0to3TL2q7H" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Transfer Learning\n", 19 | "\n", 20 | "In this lab, you will see how you can use a pre-trained model to achieve good results even with a small training dataset. This is called _transfer learning_ and you do this by leveraging the trained layers of an existing model and adding your own layers to fit your application. For example, you can:\n", 21 | "\n", 22 | "1. just get the convolution layers of one model\n", 23 | "2. attach some dense layers onto it\n", 24 | "3. train just the dense network\n", 25 | "4. evaluate the results\n", 26 | "\n", 27 | "Doing this will allow you to save time building your application because you will essentially skip weeks of training time of very deep networks. You will just use the features it has learned and tweak it for your dataset. Let's see how these are done in the next sections." 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "Qvrr8pLRzJMV" 34 | }, 35 | "source": [ 36 | "**IMPORTANT NOTE:** This notebook is designed to run as a Colab. Running the notebook on your local machine might result in some of the code blocks throwing errors." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "id": "-12slkPL6_JH" 43 | }, 44 | "source": [ 45 | "## Setup the pretrained model\n", 46 | "\n", 47 | "You will need to prepare pretrained model and configure the layers that you need. For this exercise, you will use the convolution layers of the [InceptionV3](https://arxiv.org/abs/1512.00567) architecture as your base model. To do that, you need to:\n", 48 | "\n", 49 | "1. Set the input shape to fit your application. In this case. set it to `150x150x3` as you've been doing in the last few labs.\n", 50 | "\n", 51 | "2. Pick and freeze the convolution layers to take advantage of the features it has learned already.\n", 52 | "\n", 53 | "3. Add dense layers which you will train.\n", 54 | "\n", 55 | "Let's see how to do these in the next cells." 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "3VqhFEK2Y-PK" 62 | }, 63 | "source": [ 64 | "First, in preparing the input to the model, you want to fetch the pretrained weights of the `InceptionV3` model and remove the fully connected layer at the end because you will be replacing it later. You will also specify the input shape that your model will accept. Lastly, you want to freeze the weights of these layers because they have been trained already." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "id": "1xJZ5glPPCRz" 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# Download the pre-trained weights. No top means it excludes the fully connected layer it uses for classification.\n", 76 | "!wget --no-check-certificate \\\n", 77 | " https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \\\n", 78 | " -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "id": "KsiBCpQ1VvPp" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "from tensorflow.keras.applications.inception_v3 import InceptionV3\n", 90 | "from tensorflow.keras import layers\n", 91 | "\n", 92 | "# Set the weights file you downloaded into a variable\n", 93 | "local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'\n", 94 | "\n", 95 | "# Initialize the base model.\n", 96 | "# Set the input shape and remove the dense layers.\n", 97 | "pre_trained_model = InceptionV3(input_shape = (150, 150, 3),\n", 98 | " include_top = False,\n", 99 | " weights = None)\n", 100 | "\n", 101 | "# Load the pre-trained weights you downloaded.\n", 102 | "pre_trained_model.load_weights(local_weights_file)\n", 103 | "\n", 104 | "# Freeze the weights of the layers.\n", 105 | "for layer in pre_trained_model.layers:\n", 106 | " layer.trainable = False" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "id": "1y2rEnqFaa9k" 113 | }, 114 | "source": [ 115 | "You can see the summary of the model below. You can see that it is a very deep network. You can then select up to which point of the network you want to use. As Laurence showed in the exercise, you will use up to `mixed7` as your base model and add to that. This is because the original last layer might be too specialized in what it has learned so it might not translate well into your application. `mixed7` on the other hand will be more generalized and you can start with that for your application. After the exercise, feel free to modify and use other layers to see what the results you get." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "id": "qeGP0Ust5kCR" 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "pre_trained_model.summary()\n" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "id": "jDmGO9tg5iPc" 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "# Choose `mixed7` as the last layer of your base model\n", 138 | "last_layer = pre_trained_model.get_layer('mixed7')\n", 139 | "print('last layer output shape: ', last_layer.output_shape)\n", 140 | "last_output = last_layer.output" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "id": "UXT9SDMK7Ioa" 147 | }, 148 | "source": [ 149 | "## Add dense layers for your classifier\n", 150 | "\n", 151 | "Next, you will add dense layers to your model. These will be the layers that you will train and is tasked with recognizing cats and dogs. You will add a [Dropout](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dropout) layer as well to regularize the output and avoid overfitting." 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "id": "BMXb913pbvFg" 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "from tensorflow.keras.optimizers import RMSprop\n", 163 | "from tensorflow.keras import Model\n", 164 | "\n", 165 | "# Flatten the output layer to 1 dimension\n", 166 | "x = layers.Flatten()(last_output)\n", 167 | "# Add a fully connected layer with 1,024 hidden units and ReLU activation\n", 168 | "x = layers.Dense(1024, activation='relu')(x)\n", 169 | "# Add a dropout rate of 0.2\n", 170 | "x = layers.Dropout(0.2)(x)\n", 171 | "# Add a final sigmoid layer for classification\n", 172 | "x = layers.Dense (1, activation='sigmoid')(x)\n", 173 | "\n", 174 | "# Append the dense network to the base model\n", 175 | "model = Model(pre_trained_model.input, x)\n", 176 | "\n", 177 | "# Print the model summary. See your dense network connected at the end.\n", 178 | "model.summary()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "id": "SAwTTkWr56uC" 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "# Set the training parameters\n", 190 | "model.compile(optimizer = RMSprop(learning_rate=0.0001),\n", 191 | " loss = 'binary_crossentropy',\n", 192 | " metrics = ['accuracy'])" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": { 198 | "id": "aYLGw_RO7Z_X" 199 | }, 200 | "source": [ 201 | "## Prepare the dataset\n", 202 | "\n", 203 | "Now you will prepare the dataset. This is basically the same code as the one you used in the data augmentation lab." 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "id": "O4s8HckqGlnb" 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "# Download the dataset\n", 215 | "!wget https://storage.googleapis.com/tensorflow-1-public/course2/cats_and_dogs_filtered.zip" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": { 222 | "id": "WOV8jON3c3Jv" 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "import os\n", 227 | "import zipfile\n", 228 | "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", 229 | "\n", 230 | "# Extract the archive\n", 231 | "zip_ref = zipfile.ZipFile(\"./cats_and_dogs_filtered.zip\", 'r')\n", 232 | "zip_ref.extractall(\"tmp/\")\n", 233 | "zip_ref.close()\n", 234 | "\n", 235 | "# Define our example directories and files\n", 236 | "base_dir = 'tmp/cats_and_dogs_filtered'\n", 237 | "\n", 238 | "train_dir = os.path.join( base_dir, 'train')\n", 239 | "validation_dir = os.path.join( base_dir, 'validation')\n", 240 | "\n", 241 | "# Directory with training cat pictures\n", 242 | "train_cats_dir = os.path.join(train_dir, 'cats')\n", 243 | "\n", 244 | "# Directory with training dog pictures\n", 245 | "train_dogs_dir = os.path.join(train_dir, 'dogs')\n", 246 | "\n", 247 | "# Directory with validation cat pictures\n", 248 | "validation_cats_dir = os.path.join(validation_dir, 'cats')\n", 249 | "\n", 250 | "# Directory with validation dog pictures\n", 251 | "validation_dogs_dir = os.path.join(validation_dir, 'dogs')\n", 252 | "\n", 253 | "# Add our data-augmentation parameters to ImageDataGenerator\n", 254 | "train_datagen = ImageDataGenerator(rescale = 1./255.,\n", 255 | " rotation_range = 40,\n", 256 | " width_shift_range = 0.2,\n", 257 | " height_shift_range = 0.2,\n", 258 | " shear_range = 0.2,\n", 259 | " zoom_range = 0.2,\n", 260 | " horizontal_flip = True)\n", 261 | "\n", 262 | "# Note that the validation data should not be augmented!\n", 263 | "test_datagen = ImageDataGenerator( rescale = 1.0/255. )\n", 264 | "\n", 265 | "# Flow training images in batches of 20 using train_datagen generator\n", 266 | "train_generator = train_datagen.flow_from_directory(train_dir,\n", 267 | " batch_size = 20,\n", 268 | " class_mode = 'binary',\n", 269 | " target_size = (150, 150))\n", 270 | "\n", 271 | "# Flow validation images in batches of 20 using test_datagen generator\n", 272 | "validation_generator = test_datagen.flow_from_directory( validation_dir,\n", 273 | " batch_size = 20,\n", 274 | " class_mode = 'binary',\n", 275 | " target_size = (150, 150))" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": { 281 | "id": "3m3S6AZb7h-B" 282 | }, 283 | "source": [ 284 | "## Train the model\n", 285 | "\n", 286 | "With that, you can now train the model. You will do 20 epochs and plot the results afterwards." 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "id": "Blhq2MAUeyGA" 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "# Train the model.\n", 298 | "history = model.fit(\n", 299 | " train_generator,\n", 300 | " validation_data = validation_generator,\n", 301 | " steps_per_epoch = 100,\n", 302 | " epochs = 20,\n", 303 | " validation_steps = 50,\n", 304 | " verbose = 2)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": { 310 | "id": "RwcB2bPj7lIx" 311 | }, 312 | "source": [ 313 | "## Evaluate the results\n", 314 | "\n", 315 | "You will use the same code to plot the results. As you can see, the validation accuracy is also trending upwards as your training accuracy improves. This is a good sign that your model is no longer overfitting!" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": { 322 | "id": "C2Fp6Se9rKuL" 323 | }, 324 | "outputs": [], 325 | "source": [ 326 | "import matplotlib.pyplot as plt\n", 327 | "acc = history.history['accuracy']\n", 328 | "val_acc = history.history['val_accuracy']\n", 329 | "loss = history.history['loss']\n", 330 | "val_loss = history.history['val_loss']\n", 331 | "\n", 332 | "epochs = range(len(acc))\n", 333 | "\n", 334 | "plt.plot(epochs, acc, 'r', label='Training accuracy')\n", 335 | "plt.plot(epochs, val_acc, 'b', label='Validation accuracy')\n", 336 | "plt.title('Training and validation accuracy')\n", 337 | "plt.legend(loc=0)\n", 338 | "plt.figure()\n", 339 | "\n", 340 | "\n", 341 | "plt.show()" 342 | ] 343 | } 344 | ], 345 | "metadata": { 346 | "accelerator": "GPU", 347 | "colab": { 348 | "name": "C2_W3_Lab_1_transfer_learning.ipynb", 349 | "private_outputs": true, 350 | "provenance": [], 351 | "toc_visible": true 352 | }, 353 | "kernelspec": { 354 | "display_name": "Python 3", 355 | "language": "python", 356 | "name": "python3" 357 | }, 358 | "language_info": { 359 | "codemirror_mode": { 360 | "name": "ipython", 361 | "version": 3 362 | }, 363 | "file_extension": ".py", 364 | "mimetype": "text/x-python", 365 | "name": "python", 366 | "nbconvert_exporter": "python", 367 | "pygments_lexer": "ipython3", 368 | "version": "3.7.4" 369 | } 370 | }, 371 | "nbformat": 4, 372 | "nbformat_minor": 0 373 | } -------------------------------------------------------------------------------- /4. Sequences, Time Series and Prediction/2. Deep Neural Networks for Time Series/C4_W2_Lab_1_features_and_labels.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "DuD0RncCe5dS" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "VwpmsHAGCThh" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Preparing Time Series Features and Labels\n", 19 | "\n", 20 | "In this lab, you will prepare time series data into features and labels that you can use to train a model. This is mainly achieved by a *windowing* technique where in you group consecutive measurement values into one feature and the next measurement will be the label. For example, in hourly measurements, you can use values taken at hours 1 to 11 to predict the value at hour 12. The next sections will show how you can implement this in Tensorflow.\n", 21 | "\n", 22 | "Let's begin!" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "id": "8KGZ4YDEEA9s" 29 | }, 30 | "source": [ 31 | "## Imports\n", 32 | "\n", 33 | "Tensorflow will be your lone import in this module and you'll be using methods mainly from the [tf.data API](https://www.tensorflow.org/guide/data), particularly the [tf.data.Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) class. This contains many useful methods to arrange sequences of data and you'll see that shortly." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "id": "mBw-_CJVEDxY" 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "import tensorflow as tf" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "id": "xBUvK1ATDR2L" 51 | }, 52 | "source": [ 53 | "## Create a Simple Dataset\n", 54 | "\n", 55 | "For this exercise, you will just use a sequence of numbers as your dataset so you can clearly see the effect of each command. For example, the cell below uses the [range()](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#range) method to generate a dataset containing numbers 0 to 9." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "id": "asEdslR_05O_" 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Generate a tf dataset with 10 elements (i.e. numbers 0 to 9)\n", 67 | "dataset = tf.data.Dataset.range(10)\n", 68 | "\n", 69 | "# Preview the result\n", 70 | "for val in dataset:\n", 71 | " print(val.numpy())" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": { 77 | "id": "7ci0BvcW0VM-" 78 | }, 79 | "source": [ 80 | "You will see this command several times in the next sections." 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "id": "j3BpTbsvGbgn" 87 | }, 88 | "source": [ 89 | "## Windowing the data\n", 90 | "\n", 91 | "As mentioned earlier, you want to group consecutive elements of your data and use that to predict a future value. This is called windowing and you can use that with the [window()](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#window) method as shown below. Here, you will take 5 elements per window (i.e. `size` parameter) and you will move this window 1 element at a time (i.e. `shift` parameter). One caveat to using this method is that each window returned is a [Dataset](https://www.tensorflow.org/guide/data#dataset_structure) in itself. This is a Python iterable and, as of the current version (TF 2.8), it won't show the elements if you use the `print()` method on it. It will just show a description of the data structure (e.g. `<_VariantDataset shapes: (), types: tf.int64>`)." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "id": "Lrv_ghSt1lgQ" 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "# Generate a tf dataset with 10 elements (i.e. numbers 0 to 9)\n", 103 | "dataset = tf.data.Dataset.range(10)\n", 104 | "\n", 105 | "# Window the data\n", 106 | "dataset = dataset.window(size=5, shift=1)\n", 107 | "\n", 108 | "# Print the result\n", 109 | "for window_dataset in dataset:\n", 110 | " print(window_dataset)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": { 116 | "id": "SfnpaHHVXu4f" 117 | }, 118 | "source": [ 119 | "If you want to see the elements, you will have to iterate over each iterable. This can be done by modifying the print statement above with a nested for-loop or list comprehension. The code below shows the list comprehension while in the lecture video, you saw the for-loop." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "id": "vpL6Bsm7W0xx" 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "# Print the result\n", 131 | "for window_dataset in dataset:\n", 132 | " print([item.numpy() for item in window_dataset])" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": { 138 | "id": "2U91p2SoIaTC" 139 | }, 140 | "source": [ 141 | "Now that you can see the elements of each window, you'll notice that the resulting sets are not sized evenly because there are no more elements after the number `9`. You can use the `drop_remainder` flag to make sure that only 5-element windows are retained." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "id": "QLEq6MG-2DN2" 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "# Generate a tf dataset with 10 elements (i.e. numbers 0 to 9)\n", 153 | "dataset = tf.data.Dataset.range(10)\n", 154 | "\n", 155 | "# Window the data but only take those with the specified size\n", 156 | "dataset = dataset.window(size=5, shift=1, drop_remainder=True)\n", 157 | "\n", 158 | "# Print the result\n", 159 | "for window_dataset in dataset:\n", 160 | " print([item.numpy() for item in window_dataset])" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": { 166 | "id": "B6DL74dqMu3T" 167 | }, 168 | "source": [ 169 | "## Flatten the Windows\n", 170 | "\n", 171 | "In training the model later, you will want to prepare the windows to be [tensors](https://www.tensorflow.org/guide/tensor) instead of the `Dataset` structure. You can do that by feeding a mapping function to the [flat_map()](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#flat_map) method. This function will be applied to each window and the results will be [flattened into a single dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#flatten_a_dataset_of_windows_2). To illustrate, the code below will put all elements of a window into a single batch then flatten the result." 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "id": "PJ9CAHlJ2ODe" 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "# Generate a tf dataset with 10 elements (i.e. numbers 0 to 9)\n", 183 | "dataset = tf.data.Dataset.range(10)\n", 184 | "\n", 185 | "# Window the data but only take those with the specified size\n", 186 | "dataset = dataset.window(5, shift=1, drop_remainder=True)\n", 187 | "\n", 188 | "# Flatten the windows by putting its elements in a single batch\n", 189 | "dataset = dataset.flat_map(lambda window: window.batch(5))\n", 190 | "\n", 191 | "# Print the results\n", 192 | "for window in dataset:\n", 193 | " print(window.numpy())" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": { 199 | "id": "nxMA2L7IMx7V" 200 | }, 201 | "source": [ 202 | "## Group into features and labels\n", 203 | "\n", 204 | "Next, you will want to mark the labels in each window. For this exercise, you will do that by splitting the last element of each window from the first four. This is done with the [map()](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map) method containing a lambda function that defines the window slicing." 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "id": "DryEZ2Mz2nNV" 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "# Generate a tf dataset with 10 elements (i.e. numbers 0 to 9)\n", 216 | "dataset = tf.data.Dataset.range(10)\n", 217 | "\n", 218 | "# Window the data but only take those with the specified size\n", 219 | "dataset = dataset.window(5, shift=1, drop_remainder=True)\n", 220 | "\n", 221 | "# Flatten the windows by putting its elements in a single batch\n", 222 | "dataset = dataset.flat_map(lambda window: window.batch(5))\n", 223 | "\n", 224 | "# Create tuples with features (first four elements of the window) and labels (last element)\n", 225 | "dataset = dataset.map(lambda window: (window[:-1], window[-1]))\n", 226 | "\n", 227 | "# Print the results\n", 228 | "for x,y in dataset:\n", 229 | " print(\"x = \", x.numpy())\n", 230 | " print(\"y = \", y.numpy())\n", 231 | " print()" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": { 237 | "id": "TnueY7A6NFdg" 238 | }, 239 | "source": [ 240 | "## Shuffle the data\n", 241 | "\n", 242 | "It is good practice to shuffle your dataset to reduce *sequence bias* while training your model. This refers to the neural network overfitting to the order of inputs and consequently, it will not perform well when it does not see that particular order when testing. You don't want the sequence of training inputs to impact the network this way so it's good to shuffle them up.\n", 243 | "\n", 244 | "You can simply use the [shuffle()](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) method to do this. The `buffer_size` parameter is required for that and as mentioned in the doc, you should put a number equal or greater than the total number of elements for better shuffling. We can see from the previous cells that the total number of windows in the dataset is `6` so we can choose this number or higher." 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "id": "1tl-0BOKkEtk" 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "# Generate a tf dataset with 10 elements (i.e. numbers 0 to 9)\n", 256 | "dataset = tf.data.Dataset.range(10)\n", 257 | "\n", 258 | "# Window the data but only take those with the specified size\n", 259 | "dataset = dataset.window(5, shift=1, drop_remainder=True)\n", 260 | "\n", 261 | "# Flatten the windows by putting its elements in a single batch\n", 262 | "dataset = dataset.flat_map(lambda window: window.batch(5))\n", 263 | "\n", 264 | "# Create tuples with features (first four elements of the window) and labels (last element)\n", 265 | "dataset = dataset.map(lambda window: (window[:-1], window[-1]))\n", 266 | "\n", 267 | "# Shuffle the windows\n", 268 | "dataset = dataset.shuffle(buffer_size=10)\n", 269 | "\n", 270 | "# Print the results\n", 271 | "for x,y in dataset:\n", 272 | " print(\"x = \", x.numpy())\n", 273 | " print(\"y = \", y.numpy())\n", 274 | " print()\n" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": { 280 | "id": "9Wr4jGaTNIk4" 281 | }, 282 | "source": [ 283 | "## Create batches for training\n", 284 | "\n", 285 | "Lastly, you will want to group your windows into batches. You can do that with the [batch()](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch) method as shown below. Simply specify the batch size and it will return a batched dataset with that number of windows. As a rule of thumb, it is also good to specify a [prefetch()](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch) step. This optimizes the execution time when the model is already training. By specifying a prefetch `buffer_size` of `1` as shown below, Tensorflow will prepare the next one batch in advance (i.e. putting it in a buffer) while the current batch is being consumed by the model. You can read more about it [here](https://towardsdatascience.com/optimising-your-input-pipeline-performance-with-tf-data-part-1-32e52a30cac4#Prefetching)." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "id": "Wa0PNwxMGapy" 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "# Generate a tf dataset with 10 elements (i.e. numbers 0 to 9)\n", 297 | "dataset = tf.data.Dataset.range(10)\n", 298 | "\n", 299 | "# Window the data but only take those with the specified size\n", 300 | "dataset = dataset.window(5, shift=1, drop_remainder=True)\n", 301 | "\n", 302 | "# Flatten the windows by putting its elements in a single batch\n", 303 | "dataset = dataset.flat_map(lambda window: window.batch(5))\n", 304 | "\n", 305 | "# Create tuples with features (first four elements of the window) and labels (last element)\n", 306 | "dataset = dataset.map(lambda window: (window[:-1], window[-1]))\n", 307 | "\n", 308 | "# Shuffle the windows\n", 309 | "dataset = dataset.shuffle(buffer_size=10)\n", 310 | "\n", 311 | "# Create batches of windows\n", 312 | "dataset = dataset.batch(2).prefetch(1)\n", 313 | "\n", 314 | "# Print the results\n", 315 | "for x,y in dataset:\n", 316 | " print(\"x = \", x.numpy())\n", 317 | " print(\"y = \", y.numpy())\n", 318 | " print()\n" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": { 324 | "id": "7YiIH06unP1W" 325 | }, 326 | "source": [ 327 | "## Wrap Up\n", 328 | "\n", 329 | "This short exercise showed you how to chain different methods of the `tf.data.Dataset` class to prepare a sequence into shuffled and batched window datasets. You will be using this same concept in the next exercises when you apply it to synthetic data and use the result to train a neural network. On to the next!" 330 | ] 331 | } 332 | ], 333 | "metadata": { 334 | "colab": { 335 | "name": "C4_W2_Lab_1_features_and_labels.ipynb", 336 | "private_outputs": true, 337 | "provenance": [], 338 | "toc_visible": true 339 | }, 340 | "kernelspec": { 341 | "display_name": "Python 3", 342 | "language": "python", 343 | "name": "python3" 344 | }, 345 | "language_info": { 346 | "codemirror_mode": { 347 | "name": "ipython", 348 | "version": 3 349 | }, 350 | "file_extension": ".py", 351 | "mimetype": "text/x-python", 352 | "name": "python", 353 | "nbconvert_exporter": "python", 354 | "pygments_lexer": "ipython3", 355 | "version": "3.7.4" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 0 360 | } -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/2. Word Embeddings/C3_W2_Lab_2_sarcasm_classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "By8qnVDaX3UP" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "ZfVtdwryvvP6" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Training a binary classifier with the Sarcasm Dataset\n", 19 | "\n", 20 | "In this lab, you will revisit the [News Headlines Dataset for Sarcasm Detection](https://www.kaggle.com/rmisra/news-headlines-dataset-for-sarcasm-detection/home) from last week and proceed to build a train a model on it. The steps will be very similar to the previous lab with IMDB Reviews with just some minor modifications. You can tweak the hyperparameters and see how it affects the results. Let's begin!" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "aWIM6gplHqfx" 27 | }, 28 | "source": [ 29 | "## Download the dataset\n", 30 | "\n", 31 | "You will first download the JSON file, load it into your workspace and put the sentences and labels into lists." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "id": "BQVuQrZNkPn9" 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# Download the dataset\n", 43 | "!wget https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "id": "oaLaaqhNkUPd" 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "import json\n", 55 | "\n", 56 | "# Load the JSON file\n", 57 | "with open(\"./sarcasm.json\", 'r') as f:\n", 58 | " datastore = json.load(f)\n", 59 | "\n", 60 | "# Initialize the lists\n", 61 | "sentences = []\n", 62 | "labels = []\n", 63 | "\n", 64 | "# Collect sentences and labels into the lists\n", 65 | "for item in datastore:\n", 66 | " sentences.append(item['headline'])\n", 67 | " labels.append(item['is_sarcastic'])" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "id": "kw1I6oNSfCxa" 74 | }, 75 | "source": [ 76 | "## Hyperparameters\n", 77 | "\n", 78 | "We placed the hyperparameters in the cell below so you can easily tweak it later:" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "id": "wpF4x5olfHX-" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# Number of examples to use for training\n", 90 | "training_size = 20000\n", 91 | "\n", 92 | "# Vocabulary size of the tokenizer\n", 93 | "vocab_size = 10000\n", 94 | "\n", 95 | "# Maximum length of the padded sequences\n", 96 | "max_length = 32\n", 97 | "\n", 98 | "# Output dimensions of the Embedding layer\n", 99 | "embedding_dim = 16" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": { 105 | "id": "dHibcDI0H5Zj" 106 | }, 107 | "source": [ 108 | "## Split the dataset\n", 109 | "\n", 110 | "Next, you will generate your train and test datasets. You will use the `training_size` value you set above to slice the `sentences` and `labels` lists into two sublists: one fore training and another for testing." 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "id": "S1sD-7v0kYWk" 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# Split the sentences\n", 122 | "training_sentences = sentences[0:training_size]\n", 123 | "testing_sentences = sentences[training_size:]\n", 124 | "\n", 125 | "# Split the labels\n", 126 | "training_labels = labels[0:training_size]\n", 127 | "testing_labels = labels[training_size:]" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": { 133 | "id": "qciTzNR7IHzJ" 134 | }, 135 | "source": [ 136 | "## Preprocessing the train and test sets\n", 137 | "\n", 138 | "Now you can preprocess the text and labels so it can be consumed by the model. You use the `Tokenizer` class to create the vocabulary and the `pad_sequences` method to generate padded token sequences. You will also need to set the labels to a numpy array so it can be a valid data type for `model.fit()`." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "id": "3u8UB0MCkZ5N" 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "import numpy as np\n", 150 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 151 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", 152 | "\n", 153 | "# Parameters for padding and OOV tokens\n", 154 | "trunc_type='post'\n", 155 | "padding_type='post'\n", 156 | "oov_tok = \"\"\n", 157 | "\n", 158 | "# Initialize the Tokenizer class\n", 159 | "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n", 160 | "\n", 161 | "# Generate the word index dictionary\n", 162 | "tokenizer.fit_on_texts(training_sentences)\n", 163 | "word_index = tokenizer.word_index\n", 164 | "\n", 165 | "# Generate and pad the training sequences\n", 166 | "training_sequences = tokenizer.texts_to_sequences(training_sentences)\n", 167 | "training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 168 | "\n", 169 | "# Generate and pad the testing sequences\n", 170 | "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n", 171 | "testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", 172 | "\n", 173 | "# Convert the labels lists into numpy arrays\n", 174 | "training_labels = np.array(training_labels)\n", 175 | "testing_labels = np.array(testing_labels)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": { 181 | "id": "AMF4afx2IdHo" 182 | }, 183 | "source": [ 184 | "## Build and Compile the Model\n", 185 | "\n", 186 | "Next, you will build the model. The architecture is similar to the previous lab but you will use a [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D) layer instead of `Flatten` after the Embedding. This adds the task of averaging over the sequence dimension before connecting to the dense layers. See a short demo of how this works using the snippet below. Notice that it gets the average over 3 arrays (i.e. `(10 + 1 + 1) / 3` and `(2 + 3 + 1) / 3` to arrive at the final output." 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "id": "7KDCvSc0kFOz" 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "import tensorflow as tf\n", 198 | "\n", 199 | "# Initialize a GlobalAveragePooling1D (GAP1D) layer\n", 200 | "gap1d_layer = tf.keras.layers.GlobalAveragePooling1D()\n", 201 | "\n", 202 | "# Define sample array\n", 203 | "sample_array = np.array([[[10,2],[1,3],[1,1]]])\n", 204 | "\n", 205 | "# Print shape and contents of sample array\n", 206 | "print(f'shape of sample_array = {sample_array.shape}')\n", 207 | "print(f'sample array: {sample_array}')\n", 208 | "\n", 209 | "# Pass the sample array to the GAP1D layer\n", 210 | "output = gap1d_layer(sample_array)\n", 211 | "\n", 212 | "# Print shape and contents of the GAP1D output array\n", 213 | "print(f'output shape of gap1d_layer: {output.shape}')\n", 214 | "print(f'output array of gap1d_layer: {output.numpy()}')" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "id": "evlU_kqOshc4" 221 | }, 222 | "source": [ 223 | "This added computation reduces the dimensionality of the model as compared to using `Flatten()` and thus, the number of training parameters will also decrease. See the output of `model.summary()` below and see how it compares if you swap out the pooling layer with a simple `Flatten()`." 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "id": "FufaT4vlkiDE" 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "# Build the model\n", 235 | "model = tf.keras.Sequential([\n", 236 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 237 | " tf.keras.layers.GlobalAveragePooling1D(),\n", 238 | " tf.keras.layers.Dense(24, activation='relu'),\n", 239 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 240 | "])\n", 241 | "\n", 242 | "# Print the model summary\n", 243 | "model.summary()" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": { 249 | "id": "GMxT5NzKtRgr" 250 | }, 251 | "source": [ 252 | "You will use the same loss, optimizer, and metrics from the previous lab." 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": { 259 | "id": "XfDt1hmYkiys" 260 | }, 261 | "outputs": [], 262 | "source": [ 263 | "# Compile the model\n", 264 | "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": { 270 | "id": "Axtd-WQAJIUK" 271 | }, 272 | "source": [ 273 | "## Train the Model\n", 274 | "\n", 275 | "Now you will feed in the prepared datasets to train the model. If you used the default hyperparameters, you will get around 99% training accuracy and 80% validation accuracy.\n", 276 | "\n", 277 | "*Tip: You can set the `verbose` parameter of `model.fit()` to `2` to indicate that you want to print just the results per epoch. Setting it to `1` (default) displays a progress bar per epoch, while `0` silences all displays. It doesn't matter much in this Colab but when working in a production environment, you may want to set this to `2` as recommended in the [documentation](https://keras.io/api/models/model_training_apis/#fit-method).*" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "id": "2DTKQFf1kkyc" 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "num_epochs = 30\n", 289 | "\n", 290 | "# Train the model\n", 291 | "history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": { 297 | "id": "L_bWhGOSJLLm" 298 | }, 299 | "source": [ 300 | "## Visualize the Results\n", 301 | "\n", 302 | "You can use the cell below to plot the training results. You may notice some overfitting because your validation accuracy is slowly dropping while the training accuracy is still going up. See if you can improve it by tweaking the hyperparameters. Some example values are shown in the lectures." 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "id": "2HYfBKXjkmU8" 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "import matplotlib.pyplot as plt\n", 314 | "\n", 315 | "# Plot utility\n", 316 | "def plot_graphs(history, string):\n", 317 | " plt.plot(history.history[string])\n", 318 | " plt.plot(history.history['val_'+string])\n", 319 | " plt.xlabel(\"Epochs\")\n", 320 | " plt.ylabel(string)\n", 321 | " plt.legend([string, 'val_'+string])\n", 322 | " plt.show()\n", 323 | "\n", 324 | "# Plot the accuracy and loss\n", 325 | "plot_graphs(history, \"accuracy\")\n", 326 | "plot_graphs(history, \"loss\")" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": { 332 | "id": "JN6kaxxcJQgd" 333 | }, 334 | "source": [ 335 | "## Visualize Word Embeddings\n", 336 | "\n", 337 | "As before, you can visualize the final weights of the embeddings using the [Tensorflow Embedding Projector](https://projector.tensorflow.org/)." 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "id": "c9MqihtEkzQ9" 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "# Get the index-word dictionary\n", 349 | "reverse_word_index = tokenizer.index_word\n", 350 | "\n", 351 | "# Get the embedding layer from the model (i.e. first layer)\n", 352 | "embedding_layer = model.layers[0]\n", 353 | "\n", 354 | "# Get the weights of the embedding layer\n", 355 | "embedding_weights = embedding_layer.get_weights()[0]\n", 356 | "\n", 357 | "# Print the shape. Expected is (vocab_size, embedding_dim)\n", 358 | "print(embedding_weights.shape)\n" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": { 365 | "id": "LoBXVffknldU" 366 | }, 367 | "outputs": [], 368 | "source": [ 369 | "import io\n", 370 | "\n", 371 | "# Open writeable files\n", 372 | "out_v = io.open('vecs.tsv', 'w', encoding='utf-8')\n", 373 | "out_m = io.open('meta.tsv', 'w', encoding='utf-8')\n", 374 | "\n", 375 | "# Initialize the loop. Start counting at `1` because `0` is just for the padding\n", 376 | "for word_num in range(1, vocab_size):\n", 377 | "\n", 378 | " # Get the word associated at the current index\n", 379 | " word_name = reverse_word_index[word_num]\n", 380 | "\n", 381 | " # Get the embedding weights associated with the current index\n", 382 | " word_embedding = embedding_weights[word_num]\n", 383 | "\n", 384 | " # Write the word name\n", 385 | " out_m.write(word_name + \"\\n\")\n", 386 | "\n", 387 | " # Write the word embedding\n", 388 | " out_v.write('\\t'.join([str(x) for x in word_embedding]) + \"\\n\")\n", 389 | "\n", 390 | "# Close the files\n", 391 | "out_v.close()\n", 392 | "out_m.close()" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": { 399 | "id": "U4eZ5HtVnnEE" 400 | }, 401 | "outputs": [], 402 | "source": [ 403 | "# Import files utilities in Colab\n", 404 | "try:\n", 405 | " from google.colab import files\n", 406 | "except ImportError:\n", 407 | " pass\n", 408 | "\n", 409 | "# Download the files\n", 410 | "else:\n", 411 | " files.download('vecs.tsv')\n", 412 | " files.download('meta.tsv')" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": { 418 | "id": "1GierJvdJWMt" 419 | }, 420 | "source": [ 421 | "## Wrap Up\n", 422 | "\n", 423 | "In this lab, you were able to build a binary classifier to detect sarcasm. You saw some overfitting in the initial attempt and hopefully, you were able to arrive at a better set of hyperparameters.\n", 424 | "\n", 425 | "So far, you've been tokenizing datasets from scratch and you're treating the vocab size as a hyperparameter. Furthermore, you're tokenizing the texts by building a vocabulary of full words. In the next lab, you will make use of a pre-tokenized dataset that uses a vocabulary of *subwords*. For instance, instead of having a uniqe token for the word `Tensorflow`, it will instead have a token each for `Ten`, `sor`, and `flow`. You will see the motivation and implications of having this design in the next exercise. See you there!" 426 | ] 427 | } 428 | ], 429 | "metadata": { 430 | "colab": { 431 | "name": "C3_W2_Lab_2_sarcasm_classifier.ipynb", 432 | "private_outputs": true, 433 | "provenance": [], 434 | "toc_visible": true 435 | }, 436 | "kernelspec": { 437 | "display_name": "Python 3", 438 | "language": "python", 439 | "name": "python3" 440 | }, 441 | "language_info": { 442 | "codemirror_mode": { 443 | "name": "ipython", 444 | "version": 3 445 | }, 446 | "file_extension": ".py", 447 | "mimetype": "text/x-python", 448 | "name": "python", 449 | "nbconvert_exporter": "python", 450 | "pygments_lexer": "ipython3", 451 | "version": "3.7.4" 452 | } 453 | }, 454 | "nbformat": 4, 455 | "nbformat_minor": 0 456 | } -------------------------------------------------------------------------------- /2. Convolutional Neural Networks in TensorFlow/2. Augmentation A technique to avoid overfitting/C2_W2_Lab_1_cats_v_dogs_augmentation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "uCUOk8oaa245" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "gGxCD4mGHHjG" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Data Augmentation\n", 19 | "\n", 20 | "In the previous lessons, you saw that having a high training accuracy does not automatically mean having a good predictive model. It can still perform poorly on new data because it has overfit to the training set. In this lab, you will see how to avoid that using _data augmentation_. This increases the amount of training data by modifying the existing training data's properties. For example, in image data, you can apply different preprocessing techniques such as rotate, flip, shear, or zoom on your existing images so you can simulate other data that the model should also learn from. This way, the model would see more variety in the images during training so it will infer better on new, previously unseen data.\n", 21 | "\n", 22 | "Let's see how you can do this in the following sections." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "id": "kJJqX4DxcQs8" 29 | }, 30 | "source": [ 31 | "## Baseline Performance\n", 32 | "\n", 33 | "You will start with a model that's very effective at learning `Cats vs Dogs` without data augmentation. It's similar to the previous models that you have used. Note that there are four convolutional layers with 32, 64, 128 and 128 convolutions respectively. The code is basically the same from the previous lab so we won't go over the details step by step since you've already seen it before.\n", 34 | "\n", 35 | "You will train only for 20 epochs to save time but feel free to increase this if you want." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "id": "zJZIF29-dIRv" 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "# Download the dataset\n", 47 | "!wget https://storage.googleapis.com/tensorflow-1-public/course2/cats_and_dogs_filtered.zip" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "id": "_DyUfCTgdwa8" 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "import os\n", 59 | "import zipfile\n", 60 | "\n", 61 | "# Extract the archive\n", 62 | "zip_ref = zipfile.ZipFile(\"./cats_and_dogs_filtered.zip\", 'r')\n", 63 | "zip_ref.extractall(\"tmp/\")\n", 64 | "zip_ref.close()\n", 65 | "\n", 66 | "# Assign training and validation set directories\n", 67 | "base_dir = 'tmp/cats_and_dogs_filtered'\n", 68 | "train_dir = os.path.join(base_dir, 'train')\n", 69 | "validation_dir = os.path.join(base_dir, 'validation')\n", 70 | "\n", 71 | "# Directory with training cat pictures\n", 72 | "train_cats_dir = os.path.join(train_dir, 'cats')\n", 73 | "\n", 74 | "# Directory with training dog pictures\n", 75 | "train_dogs_dir = os.path.join(train_dir, 'dogs')\n", 76 | "\n", 77 | "# Directory with validation cat pictures\n", 78 | "validation_cats_dir = os.path.join(validation_dir, 'cats')\n", 79 | "\n", 80 | "# Directory with validation dog pictures\n", 81 | "validation_dogs_dir = os.path.join(validation_dir, 'dogs')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": { 87 | "id": "Ub_BdOJIfZ_Q" 88 | }, 89 | "source": [ 90 | "You will place the model creation inside a function so you can easily initialize a new one when you use data augmentation later in this notebook." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "id": "uWllK_Wad-Mx" 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "import tensorflow as tf\n", 102 | "from tensorflow.keras.optimizers import RMSprop\n", 103 | "\n", 104 | "def create_model():\n", 105 | " '''Creates a CNN with 4 convolutional layers'''\n", 106 | " model = tf.keras.models.Sequential([\n", 107 | " tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),\n", 108 | " tf.keras.layers.MaxPooling2D(2, 2),\n", 109 | " tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n", 110 | " tf.keras.layers.MaxPooling2D(2,2),\n", 111 | " tf.keras.layers.Conv2D(128, (3,3), activation='relu'),\n", 112 | " tf.keras.layers.MaxPooling2D(2,2),\n", 113 | " tf.keras.layers.Conv2D(128, (3,3), activation='relu'),\n", 114 | " tf.keras.layers.MaxPooling2D(2,2),\n", 115 | " tf.keras.layers.Flatten(),\n", 116 | " tf.keras.layers.Dense(512, activation='relu'),\n", 117 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 118 | " ])\n", 119 | "\n", 120 | " model.compile(loss='binary_crossentropy',\n", 121 | " optimizer=RMSprop(learning_rate=1e-4),\n", 122 | " metrics=['accuracy'])\n", 123 | "\n", 124 | " return model" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "id": "MJPyDEzOqrKB" 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", 136 | "\n", 137 | "# All images will be rescaled by 1./255\n", 138 | "train_datagen = ImageDataGenerator(rescale=1./255)\n", 139 | "test_datagen = ImageDataGenerator(rescale=1./255)\n", 140 | "\n", 141 | "# Flow training images in batches of 20 using train_datagen generator\n", 142 | "train_generator = train_datagen.flow_from_directory(\n", 143 | " train_dir, # This is the source directory for training images\n", 144 | " target_size=(150, 150), # All images will be resized to 150x150\n", 145 | " batch_size=20,\n", 146 | " # Since we use binary_crossentropy loss, we need binary labels\n", 147 | " class_mode='binary')\n", 148 | "\n", 149 | "# Flow validation images in batches of 20 using test_datagen generator\n", 150 | "validation_generator = test_datagen.flow_from_directory(\n", 151 | " validation_dir,\n", 152 | " target_size=(150, 150),\n", 153 | " batch_size=20,\n", 154 | " class_mode='binary')" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "id": "hdqUoF44esR3" 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "# Constant for epochs\n", 166 | "EPOCHS = 20\n", 167 | "\n", 168 | "# Create a new model\n", 169 | "model = create_model()\n", 170 | "\n", 171 | "# Train the model\n", 172 | "history = model.fit(\n", 173 | " train_generator,\n", 174 | " steps_per_epoch=100, # 2000 images = batch_size * steps\n", 175 | " epochs=EPOCHS,\n", 176 | " validation_data=validation_generator,\n", 177 | " validation_steps=50, # 1000 images = batch_size * steps\n", 178 | " verbose=2)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "id": "Y-G0Am4cguNt" 185 | }, 186 | "source": [ 187 | "You will then visualize the loss and accuracy with respect to the training and validation set. You will again use a convenience function so it can be reused later. This function accepts a [History](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History) object which contains the results of the `fit()` method you ran above." 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "id": "GZWPcmKWO303" 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "import matplotlib.pyplot as plt\n", 199 | "\n", 200 | "def plot_loss_acc(history):\n", 201 | " '''Plots the training and validation loss and accuracy from a history object'''\n", 202 | " acc = history.history['accuracy']\n", 203 | " val_acc = history.history['val_accuracy']\n", 204 | " loss = history.history['loss']\n", 205 | " val_loss = history.history['val_loss']\n", 206 | "\n", 207 | " epochs = range(len(acc))\n", 208 | "\n", 209 | " plt.plot(epochs, acc, 'bo', label='Training accuracy')\n", 210 | " plt.plot(epochs, val_acc, 'b', label='Validation accuracy')\n", 211 | " plt.title('Training and validation accuracy')\n", 212 | "\n", 213 | " plt.figure()\n", 214 | "\n", 215 | " plt.plot(epochs, loss, 'bo', label='Training Loss')\n", 216 | " plt.plot(epochs, val_loss, 'b', label='Validation Loss')\n", 217 | " plt.title('Training and validation loss')\n", 218 | " plt.legend()\n", 219 | "\n", 220 | " plt.show()" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "id": "Vojz4NYXiT_f" 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "# Plot training results\n", 232 | "plot_loss_acc(history)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": { 238 | "id": "zb81GvNov-Tg" 239 | }, 240 | "source": [ 241 | "From the results above, you'll see the training accuracy is more than 90%, and the validation accuracy is in the 70%-80% range. This is a great example of _overfitting_ -- which in short means that it can do very well with images it has seen before, but not so well with images it hasn't.\n" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": { 247 | "id": "5KBz-vFbjLZX" 248 | }, 249 | "source": [ 250 | "## Data augmentation\n", 251 | "\n", 252 | "One simple method to avoid overfitting is to augment the images a bit. If you think about it, most pictures of a cat are very similar -- the ears are at the top, then the eyes, then the mouth etc. Things like the distance between the eyes and ears will always be quite similar too.\n", 253 | "\n", 254 | "What if you tweak with the images a bit -- rotate the image, squash it, etc. That's what image augementation is all about. And there's an API that makes it easy!\n", 255 | "\n", 256 | "Take a look at the [ImageDataGenerator](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator) which you have been using to rescale the image. There are other properties on it that you can use to augment the image.\n", 257 | "\n", 258 | "```\n", 259 | "# Updated to do image augmentation\n", 260 | "train_datagen = ImageDataGenerator(\n", 261 | " rotation_range=40,\n", 262 | " width_shift_range=0.2,\n", 263 | " height_shift_range=0.2,\n", 264 | " shear_range=0.2,\n", 265 | " zoom_range=0.2,\n", 266 | " horizontal_flip=True,\n", 267 | " fill_mode='nearest')\n", 268 | "```\n", 269 | "\n", 270 | "These are just a few of the options available. Let's quickly go over it:\n", 271 | "\n", 272 | "* `rotation_range` is a value in degrees (0–180) within which to randomly rotate pictures.\n", 273 | "* `width_shift` and `height_shift` are ranges (as a fraction of total width or height) within which to randomly translate pictures vertically or horizontally.\n", 274 | "* `shear_range` is for randomly applying shearing transformations.\n", 275 | "* `zoom_range` is for randomly zooming inside pictures.\n", 276 | "* `horizontal_flip` is for randomly flipping half of the images horizontally. This is relevant when there are no assumptions of horizontal assymmetry (e.g. real-world pictures).\n", 277 | "* `fill_mode` is the strategy used for filling in newly created pixels, which can appear after a rotation or a width/height shift.\n", 278 | "\n", 279 | "\n", 280 | "Run the next cells to see the impact on the results. The code is similar to the baseline but the definition of `train_datagen` has been updated to use the parameters described above.\n" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": { 287 | "id": "UK7_Fflgv8YC" 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "# Create new model\n", 292 | "model_for_aug = create_model()\n", 293 | "\n", 294 | "# This code has changed. Now instead of the ImageGenerator just rescaling\n", 295 | "# the image, we also rotate and do other operations\n", 296 | "train_datagen = ImageDataGenerator(\n", 297 | " rescale=1./255,\n", 298 | " rotation_range=40,\n", 299 | " width_shift_range=0.2,\n", 300 | " height_shift_range=0.2,\n", 301 | " shear_range=0.2,\n", 302 | " zoom_range=0.2,\n", 303 | " horizontal_flip=True,\n", 304 | " fill_mode='nearest')\n", 305 | "\n", 306 | "test_datagen = ImageDataGenerator(rescale=1./255)\n", 307 | "\n", 308 | "# Flow training images in batches of 20 using train_datagen generator\n", 309 | "train_generator = train_datagen.flow_from_directory(\n", 310 | " train_dir, # This is the source directory for training images\n", 311 | " target_size=(150, 150), # All images will be resized to 150x150\n", 312 | " batch_size=20,\n", 313 | " # Since we use binary_crossentropy loss, we need binary labels\n", 314 | " class_mode='binary')\n", 315 | "\n", 316 | "# Flow validation images in batches of 20 using test_datagen generator\n", 317 | "validation_generator = test_datagen.flow_from_directory(\n", 318 | " validation_dir,\n", 319 | " target_size=(150, 150),\n", 320 | " batch_size=20,\n", 321 | " class_mode='binary')\n", 322 | "\n", 323 | "# Train the new model\n", 324 | "history_with_aug = model_for_aug.fit(\n", 325 | " train_generator,\n", 326 | " steps_per_epoch=100, # 2000 images = batch_size * steps\n", 327 | " epochs=EPOCHS,\n", 328 | " validation_data=validation_generator,\n", 329 | " validation_steps=50, # 1000 images = batch_size * steps\n", 330 | " verbose=2)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "id": "bnyRnwopT5aW" 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "# Plot the results of training with data augmentation\n", 342 | "plot_loss_acc(history_with_aug)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": { 348 | "id": "1D1hd5fqmJUx" 349 | }, 350 | "source": [ 351 | "As you can see, the training accuracy has gone down compared to the baseline. This is expected because (as a result of data augmentation) there are more variety in the images so the model will need more runs to learn from them. The good thing is the validation accuracy is no longer stalling and is more in line with the training results. This means that the model is now performing better on unseen data.\n", 352 | "\n", 353 | "\n" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": { 359 | "id": "z4B9b6GPnKg1" 360 | }, 361 | "source": [ 362 | "## Wrap Up\n", 363 | "\n", 364 | "This exercise showed a simple trick to avoid overfitting. You can improve your baseline results by simply tweaking the same images you have already. The `ImageDataGenerator` class has built-in parameters to do just that. Try to modify the values some more in the `train_datagen` and see what results you get.\n", 365 | "\n", 366 | "Take note that this will not work for all cases. In the next lesson, Laurence will show a scenario where data augmentation will not help improve your validation accuracy." 367 | ] 368 | } 369 | ], 370 | "metadata": { 371 | "accelerator": "GPU", 372 | "colab": { 373 | "name": "C2_W2_Lab_1_cats_v_dogs_augmentation.ipynb", 374 | "private_outputs": true, 375 | "provenance": [], 376 | "toc_visible": true 377 | }, 378 | "kernelspec": { 379 | "display_name": "Python 3", 380 | "language": "python", 381 | "name": "python3" 382 | }, 383 | "language_info": { 384 | "codemirror_mode": { 385 | "name": "ipython", 386 | "version": 3 387 | }, 388 | "file_extension": ".py", 389 | "mimetype": "text/x-python", 390 | "name": "python", 391 | "nbconvert_exporter": "python", 392 | "pygments_lexer": "ipython3", 393 | "version": "3.7.4" 394 | } 395 | }, 396 | "nbformat": 4, 397 | "nbformat_minor": 0 398 | } -------------------------------------------------------------------------------- /3. Natural Language Processing in TensorFlow/3. Sequence models/C3_W3_Lab_4_imdb_reviews_with_GRU_LSTM_Conv1D.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "vamjGxEud8EY" 7 | }, 8 | "source": [ 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "qnpO3iadYEY2" 16 | }, 17 | "source": [ 18 | "# Ungraded Lab: Building Models for the IMDB Reviews Dataset\n", 19 | "\n", 20 | "In this lab, you will build four models and train it on the [IMDB Reviews dataset](https://www.tensorflow.org/datasets/catalog/imdb_reviews) with full word encoding. These use different layers after the embedding namely `Flatten`, `LSTM`, `GRU`, and `Conv1D`. You will compare the performance and see which architecture might be best for this particular dataset. Let's begin!" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "-6PhPXVCa_1i" 27 | }, 28 | "source": [ 29 | "## Imports\n", 30 | "\n", 31 | "You will first import common libraries that will be used throughout the exercise." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "id": "WA0Fi9p9ah5_" 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "import tensorflow_datasets as tfds\n", 43 | "import tensorflow as tf\n", 44 | "import numpy as np\n", 45 | "\n", 46 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", 47 | "from tensorflow.keras.preprocessing.sequence import pad_sequences" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "id": "BTmnR_9dbBY9" 54 | }, 55 | "source": [ 56 | "## Download and Prepare the Dataset\n", 57 | "\n", 58 | "Next, you will download the `plain_text` version of the `IMDB Reviews` dataset." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "id": "P-AhVYeBWgQ3" 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "# Download the plain text dataset\n", 70 | "imdb, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "id": "wHQ2Ko0zl7M4" 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "# Get the train and test sets\n", 82 | "train_data, test_data = imdb['train'], imdb['test']\n", 83 | "\n", 84 | "# Initialize sentences and labels lists\n", 85 | "training_sentences = []\n", 86 | "training_labels = []\n", 87 | "\n", 88 | "testing_sentences = []\n", 89 | "testing_labels = []\n", 90 | "\n", 91 | "# Loop over all training examples and save the sentences and labels\n", 92 | "for s,l in train_data:\n", 93 | " training_sentences.append(s.numpy().decode('utf8'))\n", 94 | " training_labels.append(l.numpy())\n", 95 | "\n", 96 | "# Loop over all test examples and save the sentences and labels\n", 97 | "for s,l in test_data:\n", 98 | " testing_sentences.append(s.numpy().decode('utf8'))\n", 99 | " testing_labels.append(l.numpy())\n", 100 | "\n", 101 | "# Convert labels lists to numpy array\n", 102 | "training_labels_final = np.array(training_labels)\n", 103 | "testing_labels_final = np.array(testing_labels)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": { 109 | "id": "Ygj9nleMfrAy" 110 | }, 111 | "source": [ 112 | "Unlike the subword encoded set you've been using in the previous labs, you will need to build the vocabulary from scratch and generate padded sequences. You already know how to do that with the `Tokenizer` class and `pad_sequences()` method." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "id": "7n15yyMdmoH1" 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# Parameters\n", 124 | "vocab_size = 10000\n", 125 | "max_length = 120\n", 126 | "trunc_type='post'\n", 127 | "oov_tok = \"\"\n", 128 | "\n", 129 | "# Initialize the Tokenizer class\n", 130 | "tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)\n", 131 | "\n", 132 | "# Generate the word index dictionary for the training sentences\n", 133 | "tokenizer.fit_on_texts(training_sentences)\n", 134 | "word_index = tokenizer.word_index\n", 135 | "\n", 136 | "# Generate and pad the training sequences\n", 137 | "sequences = tokenizer.texts_to_sequences(training_sentences)\n", 138 | "padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)\n", 139 | "\n", 140 | "# Generate and pad the test sequences\n", 141 | "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n", 142 | "testing_padded = pad_sequences(testing_sequences,maxlen=max_length)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": { 148 | "id": "cs4GDKAFbJdq" 149 | }, 150 | "source": [ 151 | "## Plot Utility\n", 152 | "\n", 153 | "Before you define the models, you will define the function below so you can easily visualize the accuracy and loss history after training." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "id": "nHGYuU4jPYaj" 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "import matplotlib.pyplot as plt\n", 165 | "\n", 166 | "# Plot Utility\n", 167 | "def plot_graphs(history, string):\n", 168 | " plt.plot(history.history[string])\n", 169 | " plt.plot(history.history['val_'+string])\n", 170 | " plt.xlabel(\"Epochs\")\n", 171 | " plt.ylabel(string)\n", 172 | " plt.legend([string, 'val_'+string])\n", 173 | " plt.show()" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": { 179 | "id": "bUoZJv02bP0m" 180 | }, 181 | "source": [ 182 | "## Model 1: Flatten\n", 183 | "\n", 184 | "First up is simply using a `Flatten` layer after the embedding. Its main advantage is that it is very fast to train. Observe the results below.\n", 185 | "\n", 186 | "*Note: You might see a different graph in the lectures. This is because we adjusted the `BATCH_SIZE` for training so subsequent models will train faster.*" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "id": "_SRAyulSaWAa" 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "# Parameters\n", 198 | "embedding_dim = 16\n", 199 | "dense_dim = 6\n", 200 | "\n", 201 | "# Model Definition with a Flatten layer\n", 202 | "model_flatten = tf.keras.Sequential([\n", 203 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 204 | " tf.keras.layers.Flatten(),\n", 205 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 206 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 207 | "])\n", 208 | "\n", 209 | "# Set the training parameters\n", 210 | "model_flatten.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 211 | "\n", 212 | "# Print the model summary\n", 213 | "model_flatten.summary()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "id": "tYLZUZ3Ga1ok" 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "NUM_EPOCHS = 10\n", 225 | "BATCH_SIZE = 128\n", 226 | "\n", 227 | "# Train the model\n", 228 | "history_flatten = model_flatten.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "id": "fVPLbqcca6U2" 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "# Plot the accuracy and loss history\n", 240 | "plot_graphs(history_flatten, 'accuracy')\n", 241 | "plot_graphs(history_flatten, 'loss')" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": { 247 | "id": "2w_soBeUbSXu" 248 | }, 249 | "source": [ 250 | "## LSTM\n", 251 | "\n", 252 | "Next, you will use an LSTM. This is slower to train but useful in applications where the order of the tokens is important." 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": { 259 | "id": "wSualgGPPK0S" 260 | }, 261 | "outputs": [], 262 | "source": [ 263 | "# Parameters\n", 264 | "embedding_dim = 16\n", 265 | "lstm_dim = 32\n", 266 | "dense_dim = 6\n", 267 | "\n", 268 | "# Model Definition with LSTM\n", 269 | "model_lstm = tf.keras.Sequential([\n", 270 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 271 | " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_dim)),\n", 272 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 273 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 274 | "])\n", 275 | "\n", 276 | "# Set the training parameters\n", 277 | "model_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 278 | "\n", 279 | "# Print the model summary\n", 280 | "model_lstm.summary()" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": { 287 | "id": "crEvEcQmUQiL" 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "NUM_EPOCHS = 10\n", 292 | "BATCH_SIZE = 128\n", 293 | "\n", 294 | "# Train the model\n", 295 | "history_lstm = model_lstm.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "id": "QVwnSYF-aIha" 303 | }, 304 | "outputs": [], 305 | "source": [ 306 | "# Plot the accuracy and loss history\n", 307 | "plot_graphs(history_lstm, 'accuracy')\n", 308 | "plot_graphs(history_lstm, 'loss')" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": { 314 | "id": "tcBMGJgzcXkl" 315 | }, 316 | "source": [ 317 | "## GRU\n", 318 | "\n", 319 | "The *Gated Recurrent Unit* or [GRU](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GRU) is usually referred to as a simpler version of the LSTM. It can be used in applications where the sequence is important but you want faster results and can sacrifice some accuracy. You will notice in the model summary that it is a bit smaller than the LSTM and it also trains faster by a few seconds." 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": { 326 | "id": "5NEpdhb8AxID" 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "import tensorflow as tf\n", 331 | "\n", 332 | "# Parameters\n", 333 | "embedding_dim = 16\n", 334 | "gru_dim = 32\n", 335 | "dense_dim = 6\n", 336 | "\n", 337 | "# Model Definition with GRU\n", 338 | "model_gru = tf.keras.Sequential([\n", 339 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 340 | " tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_dim)),\n", 341 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 342 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 343 | "])\n", 344 | "\n", 345 | "# Set the training parameters\n", 346 | "model_gru.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 347 | "\n", 348 | "# Print the model summary\n", 349 | "model_gru.summary()" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": { 356 | "id": "V5LLrXC-uNX6" 357 | }, 358 | "outputs": [], 359 | "source": [ 360 | "NUM_EPOCHS = 10\n", 361 | "BATCH_SIZE = 128\n", 362 | "\n", 363 | "# Train the model\n", 364 | "history_gru = model_gru.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": { 371 | "id": "7kwU-2skSQ3E" 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "# Plot the accuracy and loss history\n", 376 | "plot_graphs(history_gru, 'accuracy')\n", 377 | "plot_graphs(history_gru, 'loss')" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": { 383 | "id": "ugToQrB-cfr5" 384 | }, 385 | "source": [ 386 | "## Convolution\n", 387 | "\n", 388 | "Lastly, you will use a convolution layer to extract features from your dataset. You will append a [GlobalAveragePooling1d](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D) layer to reduce the results before passing it on to the dense layers. Like the model with `Flatten`, this also trains much faster than the ones using RNN layers like `LSTM` and `GRU`." 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": { 395 | "id": "K_Jc7cY3Qxke" 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "# Parameters\n", 400 | "embedding_dim = 16\n", 401 | "filters = 128\n", 402 | "kernel_size = 5\n", 403 | "dense_dim = 6\n", 404 | "\n", 405 | "# Model Definition with Conv1D\n", 406 | "model_conv = tf.keras.Sequential([\n", 407 | " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", 408 | " tf.keras.layers.Conv1D(filters, kernel_size, activation='relu'),\n", 409 | " tf.keras.layers.GlobalAveragePooling1D(),\n", 410 | " tf.keras.layers.Dense(dense_dim, activation='relu'),\n", 411 | " tf.keras.layers.Dense(1, activation='sigmoid')\n", 412 | "])\n", 413 | "\n", 414 | "# Set the training parameters\n", 415 | "model_conv.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", 416 | "\n", 417 | "# Print the model summary\n", 418 | "model_conv.summary()" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": { 425 | "id": "aUV70isnTiFF" 426 | }, 427 | "outputs": [], 428 | "source": [ 429 | "NUM_EPOCHS = 10\n", 430 | "BATCH_SIZE = 128\n", 431 | "\n", 432 | "# Train the model\n", 433 | "history_conv = model_conv.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": { 440 | "id": "T42EmhV0XhRV" 441 | }, 442 | "outputs": [], 443 | "source": [ 444 | "# Plot the accuracy and loss history\n", 445 | "plot_graphs(history_conv, 'accuracy')\n", 446 | "plot_graphs(history_conv, 'loss')" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "metadata": { 452 | "id": "UgTIZxoUkv0l" 453 | }, 454 | "source": [ 455 | "## Wrap Up\n", 456 | "\n", 457 | "Now that you've seen the results for each model, can you make a recommendation on what works best for this dataset? Do you still get the same results if you tweak some hyperparameters like the vocabulary size? Try tweaking some of the values some more so you can get more insight on what model performs best." 458 | ] 459 | } 460 | ], 461 | "metadata": { 462 | "accelerator": "GPU", 463 | "colab": { 464 | "name": "C3_W3_Lab_4_imdb_reviews_with_GRU_LSTM_Conv1D.ipynb", 465 | "private_outputs": true, 466 | "provenance": [], 467 | "toc_visible": true 468 | }, 469 | "kernelspec": { 470 | "display_name": "Python 3", 471 | "language": "python", 472 | "name": "python3" 473 | }, 474 | "language_info": { 475 | "codemirror_mode": { 476 | "name": "ipython", 477 | "version": 3 478 | }, 479 | "file_extension": ".py", 480 | "mimetype": "text/x-python", 481 | "name": "python", 482 | "nbconvert_exporter": "python", 483 | "pygments_lexer": "ipython3", 484 | "version": "3.7.4" 485 | } 486 | }, 487 | "nbformat": 4, 488 | "nbformat_minor": 0 489 | } --------------------------------------------------------------------------------