├── .gitignore
├── .ipynb_checkpoints
    └── Untitled-checkpoint.ipynb
├── LICENSE
├── README.md
├── Untitled.ipynb
├── ch1
    ├── .gitignore
    ├── README.md
    ├── keras_sequential_api_hello_world.ipynb
    ├── keras_sequential_api_hello_world.py
    ├── mnist_hello_world.ipynb
    ├── pytorch_tensor_walkthrough.ipynb
    └── validate_gradient_setup.ipynb
├── ch2
    ├── .gitignore
    ├── README.md
    ├── cleaning_up_datasets.ipynb
    ├── examining_image_datasets.ipynb
    ├── examining_tabular_datasets.ipynb
    ├── examining_text_datasets.ipynb
    └── fastai_dataset_walkthrough.ipynb
├── ch3
    ├── .gitignore
    ├── README.md
    ├── accessing_non_curated_datasets.ipynb
    ├── adult_sample_model.pkl
    ├── adult_sample_test.csv
    ├── assessing_dataset_suitability.ipynb
    ├── loading_saved_models_trained_with_tabular_datasets.ipynb
    ├── saving_models_trained_with_tabular_datasets.ipynb
    ├── training_model_standalone_tabular_dataset.ipynb
    └── training_with_tabular_datasets.ipynb
├── ch4
    ├── .gitignore
    ├── text_classifier_model.ipynb
    ├── text_model_training.ipynb
    ├── text_standalone_dataset_classifier.ipynb
    └── text_standalone_dataset_lm.ipynb
├── ch5
    ├── .gitignore
    ├── training_large_recommender_systems.ipynb
    ├── training_recommender_systems.ipynb
    └── training_recommender_systems_on_standalone_dataset.ipynb
├── ch6
    ├── .gitignore
    ├── exploring_image_location_datasets.ipynb
    ├── training_with_curated_image_datasets.ipynb
    ├── training_with_curated_multi_image_classification_datasets.ipynb
    └── training_with_standalone_image_datasets.ipynb
├── ch7
    ├── .gitignore
    ├── deploy_image
    │   ├── fruits_360may3.pkl
    │   ├── static
    │   │   └── css
    │   │   │   └── main2.css
    │   ├── templates
    │   │   ├── home.html
    │   │   └── show-prediction.html
    │   ├── test_images
    │   │   ├── 26_100.jpg
    │   │   ├── 4_100.jpg
    │   │   └── 5_100.jpg
    │   └── web_flask_deploy_image_model.py
    └── deploy_tabular
    │   ├── adult_sample_model.pkl
    │   ├── static
    │       └── css
    │       │   └── main2.css
    │   ├── templates
    │       ├── home.html
    │       └── show-prediction.html
    │   └── web_flask_deploy.py
├── ch8
    ├── training_with_curated_multi_image_classification_dataset_augmenteds.ipynb
    ├── training_with_image_datasets_datablock_augmented.ipynb
    ├── training_with_image_datasets_datablocks.ipynb
    ├── training_with_tabular_datasets_callbacks.ipynb
    └── training_with_tabular_datasets_metrics.ipynb
└── untitled.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pth
3 | *.pkl
4 | .ipynb_checkpoints
5 | Untitled*.*
6 | untitled*.*
7 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 4
6 | }
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning with fastai Cookbook
 2 | 
 3 | <a href="https://www.packtpub.com/product/deep-learning-with-fastai-cookbook/9781800208100"><img src="https://static.packt-cdn.com/products/9781800208100/cover/smaller" alt=" Deep Learning with fastai Cookbook" height="256px" align="right"></a>
 4 | 
 5 | This is the code repository for [ Deep Learning with fastai Cookbook](https://www.packtpub.com/product/deep-learning-with-fastai-cookbook/9781800208100), published by Packt.
 6 | 
 7 | **Leverage the easy-to-use fastai framework to unlock the power of deep learning**
 8 | 
 9 | ## What is this book about?
10 | fastai is an easy-to-use deep learning framework built on top of PyTorch that lets you rapidly create complete deep learning solutions with as few as 10 lines of code. Both predominant low-level deep learning frameworks, TensorFlow and PyTorch, require a lot of code, even for straightforward applications. In contrast, fastai handles the messy details for you and lets you focus on applying deep learning to actually solve problems.
11 | 
12 | This book covers the following exciting features:
13 | * Prepare real-world raw datasets to train fastai deep learning models
14 | * Train fastai deep learning models using text and tabular data
15 | * Create recommender systems with fastai
16 | * Find out how to assess whether fastai is a good fit for a given problem
17 | * Deploy fastai deep learning models in web applications
18 | 
19 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1800208103) today!
20 | 
21 | <a href="https://www.packtpub.com/?utm_source=github&utm_medium=banner&utm_campaign=GitHubBanner"><img src="https://raw.githubusercontent.com/PacktPublishing/GitHub/master/GitHub.png" 
22 | alt="https://www.packtpub.com/" border="5" /></a>
23 | 
24 | 
25 | ## Instructions and Navigations
26 | All of the code is organized into folders. For example, Chapter02.
27 | 
28 | The code will look like the following:
29 | ```
30 | for(var i = 0; i < relationship_list.length; i++) {
31 | var opt = relationship_list[i];
32 | select_relationship.innerHTML += "<option value=\""
33 | + opt + "\">" + opt + "</option>";
34 | ```
35 | 
36 | **Following is what you need for this book:**
37 | This book is for data scientists, machine learning developers, and deep learning enthusiasts looking to explore the fastai framework using a recipe-based approach. Working knowledge of the Python programming language and machine learning basics is strongly recommended to get the most out of this deep learning book.
38 | 
39 | With the following software and hardware list you can run all code files present in the book (Chapter 1-8).
40 | 
41 | ### Software and Hardware List
42 | 
43 | | Chapter  | Software required                   | OS required                        |
44 | | -------- | ------------------------------------| -----------------------------------|
45 | | 1        | Python 3.7                    | Windows or Linux |
46 | | 2        | Python libraries: pandas,folium           | Windows or Linux |
47 | | 3        | Jupyter notebook           | Windows, Mac OS X, and Linux (Any) |
48 | | 4        | Cloud deep learning environment: Paperspace Gradient, Google Collabratory          |Windows or Linux |
49 | | 5        | Deep learning frameworks, fastai, PyTorch, Keras            | Windows or Linux |
50 | 
51 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](http://www.packtpub.com/sites/default/files/downloads/Bookname_ColorImages.pdf).
52 | 
53 | ### Related products <Other books you may enjoy>
54 | * Mastering spaCy [[Packt]](https://www.packtpub.com/networking-and-servers/linux-powerful-server-administration?utm_source=github&utm_medium=repository&utm_campaign=9781788293778) [[Amazon]](https://www.amazon.com/dp/1800567685)
55 | 
56 | * Automated Machine Learning [[Packt]](https://www.packtpub.com/product/automated-machine-learning/9781800567689) [[Amazon]](https://www.amazon.com/dp/1800567685)
57 | 
58 | ## Get to Know the Author
59 | **Mark Ryan**
60 | is a machine learning practitioner and technology manager who is passionate about delivering end-to-end deep learning applications that solve real-world problems. Mark has worked on deep learning projects that incorporate a variety of related technologies, including Rasa chatbots, web applications, and messenger platforms. As a strong believer in democratizing technology, Mark advocates for Keras and fastai as accessible frameworks that open up deep learning to non-specialists. Mark has a degree in computer science from the University of Waterloo and a Master of Science degree in computer science from the University of Toronto.


--------------------------------------------------------------------------------
/Untitled.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "# imports for notebook boilerplate\n",
10 |     "!pip install -Uqq fastbook\n",
11 |     "import fastbook\n",
12 |     "from fastbook import *\n",
13 |     "from fastai.tabular.all import *"
14 |    ]
15 |   },
16 |   {
17 |    "cell_type": "code",
18 |    "execution_count": 2,
19 |    "metadata": {},
20 |    "outputs": [],
21 |    "source": [
22 |     "# set up the notebook for fast.ai\n",
23 |     "fastbook.setup_book()"
24 |    ]
25 |   },
26 |   {
27 |    "cell_type": "code",
28 |    "execution_count": 4,
29 |    "metadata": {},
30 |    "outputs": [
31 |     {
32 |      "ename": "ModuleNotFoundError",
33 |      "evalue": "No module named 'pandas_datareader'",
34 |      "output_type": "error",
35 |      "traceback": [
36 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
37 |       "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
38 |       "\u001b[0;32m<ipython-input-4-51c192f24cd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0myaml\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;31m# For reading stock data from yahoo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas_datareader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDataReader\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;31m# For time stamps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
39 |       "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pandas_datareader'"
40 |      ]
41 |     }
42 |    ],
43 |    "source": [
44 |     "import numpy as np\n",
45 |     "import pandas as pd\n",
46 |     "import os\n",
47 |     "import yaml\n",
48 |     "# For reading stock data from yahoo\n",
49 |     "from pandas_datareader.data import DataReader\n",
50 |     "\n",
51 |     "# For time stamps\n",
52 |     "from datetime import datetime\n"
53 |    ]
54 |   },
55 |   {
56 |    "cell_type": "code",
57 |    "execution_count": null,
58 |    "metadata": {},
59 |    "outputs": [],
60 |    "source": []
61 |   }
62 |  ],
63 |  "metadata": {
64 |   "kernelspec": {
65 |    "display_name": "Python 3",
66 |    "language": "python",
67 |    "name": "python3"
68 |   },
69 |   "language_info": {
70 |    "codemirror_mode": {
71 |     "name": "ipython",
72 |     "version": 3
73 |    },
74 |    "file_extension": ".py",
75 |    "mimetype": "text/x-python",
76 |    "name": "python",
77 |    "nbconvert_exporter": "python",
78 |    "pygments_lexer": "ipython3",
79 |    "version": "3.8.6"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 4
84 | }
85 | 


--------------------------------------------------------------------------------
/ch1/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | *.html
4 | 


--------------------------------------------------------------------------------
/ch1/README.md:
--------------------------------------------------------------------------------
1 | **Chapter 1 readme**
2 | 
3 | This folder contains the code samples related to chapter 1 "Getting Started with fast.ai":
4 | 
5 | - **validate_gradient_setup.ipynb**:  fast.ai environment validation notebook
6 | - **mnist_hello_world.ipynb**: fast.ai MNIST "hello world" example
7 | - **keras_sequential_api_hello_world.ipynb**:  Keras MNIST "hello world" example
8 | - **pytorch_tensor_walkthrough.ipynb**:  PyTorch tensor examples


--------------------------------------------------------------------------------
/ch1/keras_sequential_api_hello_world.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Keras sequential API \"hello world\" model for MNIST\n",
  8 |     "adapted from https://github.com/tensorflow/docs/blob/master/site/en/tutorials/quickstart/beginner.ipynb"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "#import required libraries\n",
 18 |     "\n",
 19 |     "import tensorflow as tf\n",
 20 |     "import pydotplus\n",
 21 |     "from tensorflow.keras.utils import plot_model"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# define dataset\n",
 31 |     "mnist = tf.keras.datasets.mnist\n",
 32 |     "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
 33 |     "x_train, x_test = x_train / 255.0, x_test / 255.0"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stdout",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "WARNING:tensorflow:From c:\\users\\ryanm\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\tensorflow_core\\python\\ops\\resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
 46 |       "Instructions for updating:\n",
 47 |       "If using Keras pass *_constraint arguments to layers.\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "# define layers for the hello world model\n",
 53 |     "\n",
 54 |     "hello_world_model = tf.keras.models.Sequential([ \n",
 55 |     "  tf.keras.layers.Flatten(input_shape=(28, 28)), \n",
 56 |     "  tf.keras.layers.Dense(128, activation='relu'), \n",
 57 |     "  tf.keras.layers.Dropout(0.15), \n",
 58 |     "  tf.keras.layers.Dense(10) \n",
 59 |     "])"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "Train on 51000 samples, validate on 9000 samples\n",
 72 |       "Epoch 1/10\n",
 73 |       "51000/51000 [==============================] - 7s 138us/sample - loss: 0.3479 - acc: 0.9011 - val_loss: 0.1577 - val_acc: 0.9572\n",
 74 |       "Epoch 2/10\n",
 75 |       "51000/51000 [==============================] - 8s 154us/sample - loss: 0.1603 - acc: 0.9534 - val_loss: 0.1117 - val_acc: 0.9679\n",
 76 |       "Epoch 3/10\n",
 77 |       "51000/51000 [==============================] - 5s 96us/sample - loss: 0.1194 - acc: 0.9647 - val_loss: 0.0946 - val_acc: 0.9730\n",
 78 |       "Epoch 4/10\n",
 79 |       "51000/51000 [==============================] - 5s 99us/sample - loss: 0.0955 - acc: 0.9706 - val_loss: 0.0859 - val_acc: 0.9748\n",
 80 |       "Epoch 5/10\n",
 81 |       "51000/51000 [==============================] - 8s 158us/sample - loss: 0.0801 - acc: 0.9753 - val_loss: 0.0817 - val_acc: 0.9748\n",
 82 |       "Epoch 6/10\n",
 83 |       "51000/51000 [==============================] - 7s 135us/sample - loss: 0.0700 - acc: 0.9785 - val_loss: 0.0735 - val_acc: 0.9767\n",
 84 |       "Epoch 7/10\n",
 85 |       "51000/51000 [==============================] - 6s 116us/sample - loss: 0.0600 - acc: 0.9815 - val_loss: 0.0706 - val_acc: 0.9784\n",
 86 |       "Epoch 8/10\n",
 87 |       "51000/51000 [==============================] - 5s 107us/sample - loss: 0.0525 - acc: 0.9833 - val_loss: 0.0769 - val_acc: 0.9776\n",
 88 |       "Epoch 9/10\n",
 89 |       "51000/51000 [==============================] - 5s 94us/sample - loss: 0.0464 - acc: 0.9851 - val_loss: 0.0692 - val_acc: 0.9791\n",
 90 |       "Epoch 10/10\n",
 91 |       "51000/51000 [==============================] - 5s 95us/sample - loss: 0.0432 - acc: 0.9856 - val_loss: 0.0740 - val_acc: 0.9777\n",
 92 |       "10000/10000 - 1s - loss: 0.0759 - acc: 0.9775\n",
 93 |       "Loss for test dataset: 0.07588852692145155\n",
 94 |       "Accuracy for test dataset: 0.9775\n"
 95 |      ]
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "# compile the hello world model, including specifying the loss function, optimizer, and metrics\n",
100 |     "\n",
101 |     "hello_world_model.compile(optimizer='adam',\n",
102 |     "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
103 |     "              metrics=['accuracy']) \n",
104 |     "\n",
105 |     "# train model\n",
106 |     "\n",
107 |     "history = hello_world_model.fit(x_train, y_train,\n",
108 |     "                    batch_size=64,\n",
109 |     "                    epochs=10,\n",
110 |     "                    validation_split=0.15)\n",
111 |     "\n",
112 |     "# assess performance of the model\n",
113 |     "                    \n",
114 |     "test_scores = hello_world_model.evaluate(x_test,  y_test, verbose=2) \n",
115 |     "print('Loss for test dataset:', test_scores[0])\n",
116 |     "print('Accuracy for test dataset:', test_scores[1])\n"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 5,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "name": "stdout",
126 |      "output_type": "stream",
127 |      "text": [
128 |       "10000/10000 - 1s - loss: 0.0759 - acc: 0.9775\n",
129 |       "Loss for test dataset: 0.07588852692145155\n",
130 |       "Accuracy for test dataset: 0.9775\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "# assess performance of the model on the test set\n",
136 |     "                    \n",
137 |     "test_scores = hello_world_model.evaluate(x_test,  y_test, verbose=2) \n",
138 |     "print('Loss for test dataset:', test_scores[0])\n",
139 |     "print('Accuracy for test dataset:', test_scores[1])"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": []
148 |   }
149 |  ],
150 |  "metadata": {
151 |   "kernelspec": {
152 |    "display_name": "Python 3",
153 |    "language": "python",
154 |    "name": "python3"
155 |   },
156 |   "language_info": {
157 |    "codemirror_mode": {
158 |     "name": "ipython",
159 |     "version": 3
160 |    },
161 |    "file_extension": ".py",
162 |    "mimetype": "text/x-python",
163 |    "name": "python",
164 |    "nbconvert_exporter": "python",
165 |    "pygments_lexer": "ipython3",
166 |    "version": "3.8.6"
167 |   }
168 |  },
169 |  "nbformat": 4,
170 |  "nbformat_minor": 4
171 | }
172 | 


--------------------------------------------------------------------------------
/ch1/keras_sequential_api_hello_world.py:
--------------------------------------------------------------------------------
 1 | # hello world Keras sequential API model for MNIST
 2 | # adapted from https://github.com/tensorflow/docs/blob/master/site/en/tutorials/quickstart/beginner.ipynb
 3 | 
 4 | #import required libraries
 5 | 
 6 | import tensorflow as tf
 7 | import pydotplus
 8 | from tensorflow.keras.utils import plot_model
 9 | 
10 | mnist = tf.keras.datasets.mnist
11 | 
12 | # define inputs for the model
13 | 
14 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
15 | x_train, x_test = x_train / 255.0, x_test / 255.0
16 | 
17 | # define layers for the hello world model
18 | 
19 | hello_world_model = tf.keras.models.Sequential([ 
20 |   tf.keras.layers.Flatten(input_shape=(28, 28)), 
21 |   tf.keras.layers.Dense(128, activation='relu'), 
22 |   tf.keras.layers.Dropout(0.15), 
23 |   tf.keras.layers.Dense(10) 
24 | ])
25 | 
26 | # compile the hello world model, including specifying the loss function, optimizer, and metrics
27 | 
28 | hello_world_model.compile(optimizer='adam',
29 |               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
30 |               metrics=['accuracy']) 
31 | 
32 | # train model
33 | 
34 | history = hello_world_model.fit(x_train, y_train,
35 |                     batch_size=64,
36 |                     epochs=10,
37 |                     validation_split=0.15)
38 | 
39 | # assess performance of the model
40 |                     
41 | test_scores = hello_world_model.evaluate(x_test,  y_test, verbose=2) 
42 | print('Loss for test dataset:', test_scores[0])
43 | print('Accuracy for test dataset:', test_scores[1])
44 | 


--------------------------------------------------------------------------------
/ch1/pytorch_tensor_walkthrough.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyTorch tensor walkthrough\n",
  8 |     "- some simple examples of using PyTorch tensor objects"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "# import required library\n",
 18 |     "import torch"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/plain": [
 29 |        "tensor([[1., 1., 1., 1., 1., 1., 1.],\n",
 30 |        "        [1., 1., 1., 1., 1., 1., 1.],\n",
 31 |        "        [1., 1., 1., 1., 1., 1., 1.],\n",
 32 |        "        [1., 1., 1., 1., 1., 1., 1.],\n",
 33 |        "        [1., 1., 1., 1., 1., 1., 1.]])"
 34 |       ]
 35 |      },
 36 |      "execution_count": 2,
 37 |      "metadata": {},
 38 |      "output_type": "execute_result"
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "# create a 2-dimensional 5x7 tensor with value 1 in every position\n",
 43 |     "a = torch.ones(5, 7, dtype=torch.float)\n",
 44 |     "a"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "data": {
 54 |       "text/plain": [
 55 |        "tensor([[1., 0., 0., 0., 0., 0., 0.],\n",
 56 |        "        [0., 1., 0., 0., 0., 0., 0.],\n",
 57 |        "        [0., 0., 1., 0., 0., 0., 0.],\n",
 58 |        "        [0., 0., 0., 1., 0., 0., 0.],\n",
 59 |        "        [0., 0., 0., 0., 1., 0., 0.]])"
 60 |       ]
 61 |      },
 62 |      "execution_count": 3,
 63 |      "metadata": {},
 64 |      "output_type": "execute_result"
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "# define a non-symmetric matrix with zeros and 1's down the diagonal\n",
 69 |     "b = torch.eye(5,7)\n",
 70 |     "b"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 4,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "text/plain": [
 81 |        "tensor([[1., 0., 0., 0., 0.],\n",
 82 |        "        [0., 1., 0., 0., 0.],\n",
 83 |        "        [0., 0., 1., 0., 0.],\n",
 84 |        "        [0., 0., 0., 1., 0.],\n",
 85 |        "        [0., 0., 0., 0., 1.]])"
 86 |       ]
 87 |      },
 88 |      "execution_count": 4,
 89 |      "metadata": {},
 90 |      "output_type": "execute_result"
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "# define a square identity tensor\n",
 95 |     "c = torch.eye(5,5)\n",
 96 |     "c"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "# Examine tensor elements"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 5,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "tensor([1., 0., 0., 0., 0., 0., 0.])"
115 |       ]
116 |      },
117 |      "execution_count": 5,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "b[0]"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 6,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "tensor(1.)"
135 |       ]
136 |      },
137 |      "execution_count": 6,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "b[0,0]"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 7,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "data": {
153 |       "text/plain": [
154 |        "tensor([[0., 0., 1., 0., 0., 0., 0.],\n",
155 |        "        [0., 0., 0., 1., 0., 0., 0.],\n",
156 |        "        [0., 0., 0., 0., 1., 0., 0.]])"
157 |       ]
158 |      },
159 |      "execution_count": 7,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "b[2:]"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "# Do operations on the tensors"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 8,
178 |    "metadata": {},
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/plain": [
183 |        "tensor([[2., 1., 1., 1., 1., 1., 1.],\n",
184 |        "        [1., 2., 1., 1., 1., 1., 1.],\n",
185 |        "        [1., 1., 2., 1., 1., 1., 1.],\n",
186 |        "        [1., 1., 1., 2., 1., 1., 1.],\n",
187 |        "        [1., 1., 1., 1., 2., 1., 1.]])"
188 |       ]
189 |      },
190 |      "execution_count": 8,
191 |      "metadata": {},
192 |      "output_type": "execute_result"
193 |     }
194 |    ],
195 |    "source": [
196 |     "# add two tensors with identical dimesions\n",
197 |     "a_plus_b = a + b\n",
198 |     "a_plus_b"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 9,
204 |    "metadata": {},
205 |    "outputs": [
206 |     {
207 |      "ename": "RuntimeError",
208 |      "evalue": "mat1 and mat2 shapes cannot be multiplied (5x7 and 5x5)",
209 |      "output_type": "error",
210 |      "traceback": [
211 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
212 |       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
213 |       "\u001b[0;32m<ipython-input-9-a25dfb8b9002>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# multiply two tensors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0ma_mult_c\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m@\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0ma_mult_c\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
214 |       "\u001b[0;31mRuntimeError\u001b[0m: mat1 and mat2 shapes cannot be multiplied (5x7 and 5x5)"
215 |      ]
216 |     }
217 |    ],
218 |    "source": [
219 |     "# multiply two tensors - this cell will produce an error\n",
220 |     "a_mult_c = a@c\n",
221 |     "a_mult_c"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 10,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "data": {
231 |       "text/plain": [
232 |        "tensor([[1., 0., 0., 0., 0., 0., 0.],\n",
233 |        "        [0., 1., 0., 0., 0., 0., 0.],\n",
234 |        "        [0., 0., 1., 0., 0., 0., 0.],\n",
235 |        "        [0., 0., 0., 1., 0., 0., 0.],\n",
236 |        "        [0., 0., 0., 0., 1., 0., 0.],\n",
237 |        "        [0., 0., 0., 0., 0., 1., 0.],\n",
238 |        "        [0., 0., 0., 0., 0., 0., 1.]])"
239 |       ]
240 |      },
241 |      "execution_count": 10,
242 |      "metadata": {},
243 |      "output_type": "execute_result"
244 |     }
245 |    ],
246 |    "source": [
247 |     "# define as 7 x 7 identity tensor\n",
248 |     "d = torch.eye(7,7)\n",
249 |     "d"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 11,
255 |    "metadata": {},
256 |    "outputs": [
257 |     {
258 |      "data": {
259 |       "text/plain": [
260 |        "tensor([[1., 1., 1., 1., 1., 1., 1.],\n",
261 |        "        [1., 1., 1., 1., 1., 1., 1.],\n",
262 |        "        [1., 1., 1., 1., 1., 1., 1.],\n",
263 |        "        [1., 1., 1., 1., 1., 1., 1.],\n",
264 |        "        [1., 1., 1., 1., 1., 1., 1.]])"
265 |       ]
266 |      },
267 |      "execution_count": 11,
268 |      "metadata": {},
269 |      "output_type": "execute_result"
270 |     }
271 |    ],
272 |    "source": [
273 |     "# multiply a tensor by the identity tensor\n",
274 |     "a_mult_d = a@d\n",
275 |     "a_mult_d"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 12,
281 |    "metadata": {},
282 |    "outputs": [
283 |     {
284 |      "data": {
285 |       "text/plain": [
286 |        "tensor([[1., 1., 1., 1., 1.],\n",
287 |        "        [1., 1., 1., 1., 1.],\n",
288 |        "        [1., 1., 1., 1., 1.],\n",
289 |        "        [1., 1., 1., 1., 1.],\n",
290 |        "        [1., 1., 1., 1., 1.],\n",
291 |        "        [1., 1., 1., 1., 1.],\n",
292 |        "        [1., 1., 1., 1., 1.]])"
293 |       ]
294 |      },
295 |      "execution_count": 12,
296 |      "metadata": {},
297 |      "output_type": "execute_result"
298 |     }
299 |    ],
300 |    "source": [
301 |     "# transpose a to get a 7x5 matrix\n",
302 |     "a_trans = torch.transpose(a,0,1)\n",
303 |     "a_trans"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 13,
309 |    "metadata": {},
310 |    "outputs": [
311 |     {
312 |      "data": {
313 |       "text/plain": [
314 |        "tensor([[1., 1., 1., 1., 1.],\n",
315 |        "        [1., 1., 1., 1., 1.],\n",
316 |        "        [1., 1., 1., 1., 1.],\n",
317 |        "        [1., 1., 1., 1., 1.],\n",
318 |        "        [1., 1., 1., 1., 1.],\n",
319 |        "        [1., 1., 1., 1., 1.],\n",
320 |        "        [1., 1., 1., 1., 1.]])"
321 |       ]
322 |      },
323 |      "execution_count": 13,
324 |      "metadata": {},
325 |      "output_type": "execute_result"
326 |     }
327 |    ],
328 |    "source": [
329 |     "a_trans_mult_c = a_trans @ c\n",
330 |     "a_trans_mult_c"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {},
337 |    "outputs": [],
338 |    "source": []
339 |   }
340 |  ],
341 |  "metadata": {
342 |   "kernelspec": {
343 |    "display_name": "Python 3",
344 |    "language": "python",
345 |    "name": "python3"
346 |   },
347 |   "language_info": {
348 |    "codemirror_mode": {
349 |     "name": "ipython",
350 |     "version": 3
351 |    },
352 |    "file_extension": ".py",
353 |    "mimetype": "text/x-python",
354 |    "name": "python",
355 |    "nbconvert_exporter": "python",
356 |    "pygments_lexer": "ipython3",
357 |    "version": "3.8.6"
358 |   }
359 |  },
360 |  "nbformat": 4,
361 |  "nbformat_minor": 4
362 | }
363 | 


--------------------------------------------------------------------------------
/ch1/validate_gradient_setup.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Notebook to validate Gradient setup\n",
 8 |     "Run this notebook to validate that Gradient is set up correctly."
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 1,
14 |    "metadata": {},
15 |    "outputs": [
16 |     {
17 |      "data": {
18 |       "text/plain": [
19 |        "'2.1.5'"
20 |       ]
21 |      },
22 |      "execution_count": 1,
23 |      "metadata": {},
24 |      "output_type": "execute_result"
25 |     }
26 |    ],
27 |    "source": [
28 |     "# validate the version of fast.ai\n",
29 |     "import fastai\n",
30 |     "fastai.__version__"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "code",
35 |    "execution_count": 2,
36 |    "metadata": {},
37 |    "outputs": [
38 |     {
39 |      "name": "stdout",
40 |      "output_type": "stream",
41 |      "text": [
42 |       "Sat Jul  3 22:40:46 2021       \n",
43 |       "+-----------------------------------------------------------------------------+\n",
44 |       "| NVIDIA-SMI 450.36.06    Driver Version: 450.36.06    CUDA Version: 11.0     |\n",
45 |       "|-------------------------------+----------------------+----------------------+\n",
46 |       "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
47 |       "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
48 |       "|                               |                      |               MIG M. |\n",
49 |       "|===============================+======================+======================|\n",
50 |       "|   0  Quadro P4000        On   | 00000000:00:05.0 Off |                  N/A |\n",
51 |       "| 46%   41C    P0    34W / 105W |    853MiB /  8119MiB |     35%      Default |\n",
52 |       "|                               |                      |                  N/A |\n",
53 |       "+-------------------------------+----------------------+----------------------+\n",
54 |       "                                                                               \n",
55 |       "+-----------------------------------------------------------------------------+\n",
56 |       "| Processes:                                                                  |\n",
57 |       "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
58 |       "|        ID   ID                                                   Usage      |\n",
59 |       "|=============================================================================|\n",
60 |       "+-----------------------------------------------------------------------------+\n"
61 |      ]
62 |     }
63 |    ],
64 |    "source": [
65 |     "# validate access to GPUs\n",
66 |     "!nvidia-smi"
67 |    ]
68 |   },
69 |   {
70 |    "cell_type": "code",
71 |    "execution_count": null,
72 |    "metadata": {},
73 |    "outputs": [],
74 |    "source": []
75 |   }
76 |  ],
77 |  "metadata": {
78 |   "kernelspec": {
79 |    "display_name": "Python 3",
80 |    "language": "python",
81 |    "name": "python3"
82 |   },
83 |   "language_info": {
84 |    "codemirror_mode": {
85 |     "name": "ipython",
86 |     "version": 3
87 |    },
88 |    "file_extension": ".py",
89 |    "mimetype": "text/x-python",
90 |    "name": "python",
91 |    "nbconvert_exporter": "python",
92 |    "pygments_lexer": "ipython3",
93 |    "version": "3.8.6"
94 |   }
95 |  },
96 |  "nbformat": 4,
97 |  "nbformat_minor": 4
98 | }
99 | 


--------------------------------------------------------------------------------
/ch2/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | *.html
4 | 


--------------------------------------------------------------------------------
/ch2/README.md:
--------------------------------------------------------------------------------
 1 | **Chapter 2 readme**
 2 | 
 3 | This folder contains the code samples related to chapter 2 "Exploring and cleaning up data with fast.ai"
 4 | 
 5 | - **fast.ai environment validation notebook**: fastai_dataset_walkthrough.ipynb
 6 | - **tabular dataset examination notebook**: examining_tabular_datasets.ipynb
 7 | - **text dataset examination notebook**: examining_text_datasets.ipynb
 8 | - **image dataset examination notebook**: examining_image_datasets.ipynb
 9 | - **cleaning up datasets notebook**: cleaning_up_datasets.ipynb
10 | 
11 | 


--------------------------------------------------------------------------------
/ch2/cleaning_up_datasets.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "id": "-VumfAIlrzuU"
  7 |    },
  8 |    "source": [
  9 |     "# Cleaning up datasets in fast.ai\n",
 10 |     "Walkthrough of how to clean up datasets using the facilities in fast.ai."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {
 17 |     "colab": {
 18 |      "base_uri": "https://localhost:8080/"
 19 |     },
 20 |     "executionInfo": {
 21 |      "elapsed": 11330,
 22 |      "status": "ok",
 23 |      "timestamp": 1609718586941,
 24 |      "user": {
 25 |       "displayName": "Mark Ryan",
 26 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiOaJEeoxteIdEhraqpv8y7ol-feJVt-BYY9ceTIQ=s64",
 27 |       "userId": "08045617267833954278"
 28 |      },
 29 |      "user_tz": 300
 30 |     },
 31 |     "id": "_c65w6Ykr8ar",
 32 |     "outputId": "8fa5c207-53fe-4c93-a541-17fadd9ca175"
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# imports for notebook boilerplate\n",
 37 |     "!pip install -Uqq fastbook\n",
 38 |     "import fastbook\n",
 39 |     "from fastbook import *\n",
 40 |     "from fastai.text.all import *\n",
 41 |     "from fastai.vision.all import *\n",
 42 |     "from fastai.tabular.all import *"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 2,
 48 |    "metadata": {
 49 |     "colab": {
 50 |      "base_uri": "https://localhost:8080/"
 51 |     },
 52 |     "executionInfo": {
 53 |      "elapsed": 24264,
 54 |      "status": "ok",
 55 |      "timestamp": 1609718825006,
 56 |      "user": {
 57 |       "displayName": "Mark Ryan",
 58 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiOaJEeoxteIdEhraqpv8y7ol-feJVt-BYY9ceTIQ=s64",
 59 |       "userId": "08045617267833954278"
 60 |      },
 61 |      "user_tz": 300
 62 |     },
 63 |     "id": "D5SUKNQdsHqF",
 64 |     "outputId": "8872bfbd-98f7-4c25-d058-83ce69a950a9"
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# set up the notebook for fast.ai\n",
 69 |     "fastbook.setup_book()"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 3,
 75 |    "metadata": {
 76 |     "executionInfo": {
 77 |      "elapsed": 386,
 78 |      "status": "ok",
 79 |      "timestamp": 1609718856470,
 80 |      "user": {
 81 |       "displayName": "Mark Ryan",
 82 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiOaJEeoxteIdEhraqpv8y7ol-feJVt-BYY9ceTIQ=s64",
 83 |       "userId": "08045617267833954278"
 84 |      },
 85 |      "user_tz": 300
 86 |     },
 87 |     "id": "JtqnrjQMt4oC"
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "# ingest the curated tabular dataset ADULT_SAMPLE\n",
 92 |     "path = untar_data(URLs.ADULT_SAMPLE)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 4,
 98 |    "metadata": {
 99 |     "colab": {
100 |      "base_uri": "https://localhost:8080/"
101 |     },
102 |     "executionInfo": {
103 |      "elapsed": 461,
104 |      "status": "ok",
105 |      "timestamp": 1609719188933,
106 |      "user": {
107 |       "displayName": "Mark Ryan",
108 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiOaJEeoxteIdEhraqpv8y7ol-feJVt-BYY9ceTIQ=s64",
109 |       "userId": "08045617267833954278"
110 |      },
111 |      "user_tz": 300
112 |     },
113 |     "id": "YbnUC8p3utZb",
114 |     "outputId": "6a0ea8a4-e84b-41ba-a322-aee92534819d"
115 |    },
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/plain": [
120 |        "age                 0\n",
121 |        "workclass           0\n",
122 |        "fnlwgt              0\n",
123 |        "education           0\n",
124 |        "education-num     487\n",
125 |        "marital-status      0\n",
126 |        "occupation        512\n",
127 |        "relationship        0\n",
128 |        "race                0\n",
129 |        "sex                 0\n",
130 |        "capital-gain        0\n",
131 |        "capital-loss        0\n",
132 |        "hours-per-week      0\n",
133 |        "native-country      0\n",
134 |        "salary              0\n",
135 |        "dtype: int64"
136 |       ]
137 |      },
138 |      "execution_count": 4,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "# ingest the dataset into a Pandas dataframe and count the number of missing values in each column\n",
145 |     "df = pd.read_csv(path/'adult.csv')\n",
146 |     "df.isnull().sum()"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 5,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "# define the parameters for TabularPandas\n",
156 |     "\n",
157 |     "# define transforms to apply to the tabular dataset\n",
158 |     "procs = [FillMissing,Categorify]\n",
159 |     "# define the dependent variable (y value)\n",
160 |     "dep_var = 'salary'\n",
161 |     "# define columns that are continuous / categorical\n",
162 |     "cont,cat = cont_cat_split(df, 1, dep_var=dep_var) "
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 6,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "# define TabularPandas using the parameters just defined\n",
172 |     "df_no_missing = TabularPandas(df,procs, cat, cont,y_names = dep_var)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 7,
178 |    "metadata": {},
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/html": [
183 |        "<table border=\"1\" class=\"dataframe\">\n",
184 |        "  <thead>\n",
185 |        "    <tr style=\"text-align: right;\">\n",
186 |        "      <th></th>\n",
187 |        "      <th>workclass</th>\n",
188 |        "      <th>education</th>\n",
189 |        "      <th>marital-status</th>\n",
190 |        "      <th>occupation</th>\n",
191 |        "      <th>relationship</th>\n",
192 |        "      <th>race</th>\n",
193 |        "      <th>sex</th>\n",
194 |        "      <th>native-country</th>\n",
195 |        "      <th>education-num_na</th>\n",
196 |        "      <th>age</th>\n",
197 |        "      <th>fnlwgt</th>\n",
198 |        "      <th>education-num</th>\n",
199 |        "      <th>capital-gain</th>\n",
200 |        "      <th>capital-loss</th>\n",
201 |        "      <th>hours-per-week</th>\n",
202 |        "      <th>salary</th>\n",
203 |        "    </tr>\n",
204 |        "  </thead>\n",
205 |        "  <tbody>\n",
206 |        "    <tr>\n",
207 |        "      <th>0</th>\n",
208 |        "      <td>Private</td>\n",
209 |        "      <td>Assoc-acdm</td>\n",
210 |        "      <td>Married-civ-spouse</td>\n",
211 |        "      <td>#na#</td>\n",
212 |        "      <td>Wife</td>\n",
213 |        "      <td>White</td>\n",
214 |        "      <td>Female</td>\n",
215 |        "      <td>United-States</td>\n",
216 |        "      <td>False</td>\n",
217 |        "      <td>49</td>\n",
218 |        "      <td>101320</td>\n",
219 |        "      <td>12.0</td>\n",
220 |        "      <td>0</td>\n",
221 |        "      <td>1902</td>\n",
222 |        "      <td>40</td>\n",
223 |        "      <td>&gt;=50k</td>\n",
224 |        "    </tr>\n",
225 |        "    <tr>\n",
226 |        "      <th>1</th>\n",
227 |        "      <td>Private</td>\n",
228 |        "      <td>Masters</td>\n",
229 |        "      <td>Divorced</td>\n",
230 |        "      <td>Exec-managerial</td>\n",
231 |        "      <td>Not-in-family</td>\n",
232 |        "      <td>White</td>\n",
233 |        "      <td>Male</td>\n",
234 |        "      <td>United-States</td>\n",
235 |        "      <td>False</td>\n",
236 |        "      <td>44</td>\n",
237 |        "      <td>236746</td>\n",
238 |        "      <td>14.0</td>\n",
239 |        "      <td>10520</td>\n",
240 |        "      <td>0</td>\n",
241 |        "      <td>45</td>\n",
242 |        "      <td>&gt;=50k</td>\n",
243 |        "    </tr>\n",
244 |        "    <tr>\n",
245 |        "      <th>2</th>\n",
246 |        "      <td>Private</td>\n",
247 |        "      <td>HS-grad</td>\n",
248 |        "      <td>Divorced</td>\n",
249 |        "      <td>#na#</td>\n",
250 |        "      <td>Unmarried</td>\n",
251 |        "      <td>Black</td>\n",
252 |        "      <td>Female</td>\n",
253 |        "      <td>United-States</td>\n",
254 |        "      <td>True</td>\n",
255 |        "      <td>38</td>\n",
256 |        "      <td>96185</td>\n",
257 |        "      <td>10.0</td>\n",
258 |        "      <td>0</td>\n",
259 |        "      <td>0</td>\n",
260 |        "      <td>32</td>\n",
261 |        "      <td>&lt;50k</td>\n",
262 |        "    </tr>\n",
263 |        "  </tbody>\n",
264 |        "</table>"
265 |       ],
266 |       "text/plain": [
267 |        "<IPython.core.display.HTML object>"
268 |       ]
269 |      },
270 |      "metadata": {},
271 |      "output_type": "display_data"
272 |     }
273 |    ],
274 |    "source": [
275 |     "# show the result with the show API which preserves the original values\n",
276 |     "df_no_missing.show(3)"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": 8,
282 |    "metadata": {},
283 |    "outputs": [
284 |     {
285 |      "data": {
286 |       "text/html": [
287 |        "<div>\n",
288 |        "<style scoped>\n",
289 |        "    .dataframe tbody tr th:only-of-type {\n",
290 |        "        vertical-align: middle;\n",
291 |        "    }\n",
292 |        "\n",
293 |        "    .dataframe tbody tr th {\n",
294 |        "        vertical-align: top;\n",
295 |        "    }\n",
296 |        "\n",
297 |        "    .dataframe thead th {\n",
298 |        "        text-align: right;\n",
299 |        "    }\n",
300 |        "</style>\n",
301 |        "<table border=\"1\" class=\"dataframe\">\n",
302 |        "  <thead>\n",
303 |        "    <tr style=\"text-align: right;\">\n",
304 |        "      <th></th>\n",
305 |        "      <th>age</th>\n",
306 |        "      <th>workclass</th>\n",
307 |        "      <th>fnlwgt</th>\n",
308 |        "      <th>education</th>\n",
309 |        "      <th>education-num</th>\n",
310 |        "      <th>marital-status</th>\n",
311 |        "      <th>occupation</th>\n",
312 |        "      <th>relationship</th>\n",
313 |        "      <th>race</th>\n",
314 |        "      <th>sex</th>\n",
315 |        "      <th>capital-gain</th>\n",
316 |        "      <th>capital-loss</th>\n",
317 |        "      <th>hours-per-week</th>\n",
318 |        "      <th>native-country</th>\n",
319 |        "      <th>salary</th>\n",
320 |        "      <th>education-num_na</th>\n",
321 |        "    </tr>\n",
322 |        "  </thead>\n",
323 |        "  <tbody>\n",
324 |        "    <tr>\n",
325 |        "      <th>0</th>\n",
326 |        "      <td>49</td>\n",
327 |        "      <td>5</td>\n",
328 |        "      <td>101320</td>\n",
329 |        "      <td>8</td>\n",
330 |        "      <td>12.0</td>\n",
331 |        "      <td>3</td>\n",
332 |        "      <td>0</td>\n",
333 |        "      <td>6</td>\n",
334 |        "      <td>5</td>\n",
335 |        "      <td>1</td>\n",
336 |        "      <td>0</td>\n",
337 |        "      <td>1902</td>\n",
338 |        "      <td>40</td>\n",
339 |        "      <td>40</td>\n",
340 |        "      <td>1</td>\n",
341 |        "      <td>1</td>\n",
342 |        "    </tr>\n",
343 |        "    <tr>\n",
344 |        "      <th>1</th>\n",
345 |        "      <td>44</td>\n",
346 |        "      <td>5</td>\n",
347 |        "      <td>236746</td>\n",
348 |        "      <td>13</td>\n",
349 |        "      <td>14.0</td>\n",
350 |        "      <td>1</td>\n",
351 |        "      <td>5</td>\n",
352 |        "      <td>2</td>\n",
353 |        "      <td>5</td>\n",
354 |        "      <td>2</td>\n",
355 |        "      <td>10520</td>\n",
356 |        "      <td>0</td>\n",
357 |        "      <td>45</td>\n",
358 |        "      <td>40</td>\n",
359 |        "      <td>1</td>\n",
360 |        "      <td>1</td>\n",
361 |        "    </tr>\n",
362 |        "    <tr>\n",
363 |        "      <th>2</th>\n",
364 |        "      <td>38</td>\n",
365 |        "      <td>5</td>\n",
366 |        "      <td>96185</td>\n",
367 |        "      <td>12</td>\n",
368 |        "      <td>10.0</td>\n",
369 |        "      <td>1</td>\n",
370 |        "      <td>0</td>\n",
371 |        "      <td>5</td>\n",
372 |        "      <td>3</td>\n",
373 |        "      <td>1</td>\n",
374 |        "      <td>0</td>\n",
375 |        "      <td>0</td>\n",
376 |        "      <td>32</td>\n",
377 |        "      <td>40</td>\n",
378 |        "      <td>0</td>\n",
379 |        "      <td>2</td>\n",
380 |        "    </tr>\n",
381 |        "  </tbody>\n",
382 |        "</table>\n",
383 |        "</div>"
384 |       ],
385 |       "text/plain": [
386 |        "   age  workclass  fnlwgt  education  education-num  marital-status  \\\n",
387 |        "0   49          5  101320          8           12.0               3   \n",
388 |        "1   44          5  236746         13           14.0               1   \n",
389 |        "2   38          5   96185         12           10.0               1   \n",
390 |        "\n",
391 |        "   occupation  relationship  race  sex  capital-gain  capital-loss  \\\n",
392 |        "0           0             6     5    1             0          1902   \n",
393 |        "1           5             2     5    2         10520             0   \n",
394 |        "2           0             5     3    1             0             0   \n",
395 |        "\n",
396 |        "   hours-per-week  native-country  salary  education-num_na  \n",
397 |        "0              40              40       1                 1  \n",
398 |        "1              45              40       1                 1  \n",
399 |        "2              32              40       0                 2  "
400 |       ]
401 |      },
402 |      "execution_count": 8,
403 |      "metadata": {},
404 |      "output_type": "execute_result"
405 |     }
406 |    ],
407 |    "source": [
408 |     "# show the result with the items.head() API which shows the non-numeric categorical values transformed to numeric IDs\n",
409 |     "df_no_missing.items.head(3)"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 9,
415 |    "metadata": {},
416 |    "outputs": [
417 |     {
418 |      "data": {
419 |       "text/plain": [
420 |        "age                 0\n",
421 |        "workclass           0\n",
422 |        "fnlwgt              0\n",
423 |        "education           0\n",
424 |        "education-num       0\n",
425 |        "marital-status      0\n",
426 |        "occupation          0\n",
427 |        "relationship        0\n",
428 |        "race                0\n",
429 |        "sex                 0\n",
430 |        "capital-gain        0\n",
431 |        "capital-loss        0\n",
432 |        "hours-per-week      0\n",
433 |        "native-country      0\n",
434 |        "salary              0\n",
435 |        "education-num_na    0\n",
436 |        "dtype: int64"
437 |       ]
438 |      },
439 |      "execution_count": 9,
440 |      "metadata": {},
441 |      "output_type": "execute_result"
442 |     }
443 |    ],
444 |    "source": [
445 |     "# confirm that the missing values have been dealt with\n",
446 |     "df_no_missing.items.isnull().sum()"
447 |    ]
448 |   }
449 |  ],
450 |  "metadata": {
451 |   "colab": {
452 |    "authorship_tag": "ABX9TyMIoaNLzGxbUkLOTT6nhCqr",
453 |    "collapsed_sections": [],
454 |    "name": "cleaning_up_datasets.ipynb",
455 |    "provenance": []
456 |   },
457 |   "kernelspec": {
458 |    "display_name": "Python 3",
459 |    "language": "python",
460 |    "name": "python3"
461 |   },
462 |   "language_info": {
463 |    "codemirror_mode": {
464 |     "name": "ipython",
465 |     "version": 3
466 |    },
467 |    "file_extension": ".py",
468 |    "mimetype": "text/x-python",
469 |    "name": "python",
470 |    "nbconvert_exporter": "python",
471 |    "pygments_lexer": "ipython3",
472 |    "version": "3.8.6"
473 |   }
474 |  },
475 |  "nbformat": 4,
476 |  "nbformat_minor": 4
477 | }
478 | 


--------------------------------------------------------------------------------
/ch2/examining_tabular_datasets.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Examining tabular datasets in fast.ai\n",
  8 |     "Walkthrough of how to examine tabular datasets using the facilities in fast.ai\n",
  9 |     "\n",
 10 |     "The example shown here is adapted from the paper by Howard and Gugger https://arxiv.org/pdf/2002.04688.pdf"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "# imports for notebook boilerplate\n",
 20 |     "!pip install -Uqq fastbook\n",
 21 |     "import fastbook\n",
 22 |     "from fastbook import *\n",
 23 |     "from fastai.vision.all import *"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# set up the notebook for fast.ai\n",
 33 |     "fastbook.setup_book()"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# ingest the curated tabular dataset ADULT_SAMPLE\n",
 43 |     "path = untar_data(URLs.ADULT_SAMPLE)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 4,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "data": {
 53 |       "text/plain": [
 54 |        "(#3) [Path('/storage/data/adult_sample/export.pkl'),Path('/storage/data/adult_sample/adult.csv'),Path('/storage/data/adult_sample/models')]"
 55 |       ]
 56 |      },
 57 |      "execution_count": 4,
 58 |      "metadata": {},
 59 |      "output_type": "execute_result"
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "# examine the directory structure\n",
 64 |     "path.ls()"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 5,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# ingest the dataset into a Pandas dataframe\n",
 74 |     "df = pd.read_csv(path/'adult.csv')"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 6,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "data": {
 84 |       "text/html": [
 85 |        "<div>\n",
 86 |        "<style scoped>\n",
 87 |        "    .dataframe tbody tr th:only-of-type {\n",
 88 |        "        vertical-align: middle;\n",
 89 |        "    }\n",
 90 |        "\n",
 91 |        "    .dataframe tbody tr th {\n",
 92 |        "        vertical-align: top;\n",
 93 |        "    }\n",
 94 |        "\n",
 95 |        "    .dataframe thead th {\n",
 96 |        "        text-align: right;\n",
 97 |        "    }\n",
 98 |        "</style>\n",
 99 |        "<table border=\"1\" class=\"dataframe\">\n",
100 |        "  <thead>\n",
101 |        "    <tr style=\"text-align: right;\">\n",
102 |        "      <th></th>\n",
103 |        "      <th>age</th>\n",
104 |        "      <th>workclass</th>\n",
105 |        "      <th>fnlwgt</th>\n",
106 |        "      <th>education</th>\n",
107 |        "      <th>education-num</th>\n",
108 |        "      <th>marital-status</th>\n",
109 |        "      <th>occupation</th>\n",
110 |        "      <th>relationship</th>\n",
111 |        "      <th>race</th>\n",
112 |        "      <th>sex</th>\n",
113 |        "      <th>capital-gain</th>\n",
114 |        "      <th>capital-loss</th>\n",
115 |        "      <th>hours-per-week</th>\n",
116 |        "      <th>native-country</th>\n",
117 |        "      <th>salary</th>\n",
118 |        "    </tr>\n",
119 |        "  </thead>\n",
120 |        "  <tbody>\n",
121 |        "    <tr>\n",
122 |        "      <th>0</th>\n",
123 |        "      <td>49</td>\n",
124 |        "      <td>Private</td>\n",
125 |        "      <td>101320</td>\n",
126 |        "      <td>Assoc-acdm</td>\n",
127 |        "      <td>12.0</td>\n",
128 |        "      <td>Married-civ-spouse</td>\n",
129 |        "      <td>NaN</td>\n",
130 |        "      <td>Wife</td>\n",
131 |        "      <td>White</td>\n",
132 |        "      <td>Female</td>\n",
133 |        "      <td>0</td>\n",
134 |        "      <td>1902</td>\n",
135 |        "      <td>40</td>\n",
136 |        "      <td>United-States</td>\n",
137 |        "      <td>&gt;=50k</td>\n",
138 |        "    </tr>\n",
139 |        "    <tr>\n",
140 |        "      <th>1</th>\n",
141 |        "      <td>44</td>\n",
142 |        "      <td>Private</td>\n",
143 |        "      <td>236746</td>\n",
144 |        "      <td>Masters</td>\n",
145 |        "      <td>14.0</td>\n",
146 |        "      <td>Divorced</td>\n",
147 |        "      <td>Exec-managerial</td>\n",
148 |        "      <td>Not-in-family</td>\n",
149 |        "      <td>White</td>\n",
150 |        "      <td>Male</td>\n",
151 |        "      <td>10520</td>\n",
152 |        "      <td>0</td>\n",
153 |        "      <td>45</td>\n",
154 |        "      <td>United-States</td>\n",
155 |        "      <td>&gt;=50k</td>\n",
156 |        "    </tr>\n",
157 |        "    <tr>\n",
158 |        "      <th>2</th>\n",
159 |        "      <td>38</td>\n",
160 |        "      <td>Private</td>\n",
161 |        "      <td>96185</td>\n",
162 |        "      <td>HS-grad</td>\n",
163 |        "      <td>NaN</td>\n",
164 |        "      <td>Divorced</td>\n",
165 |        "      <td>NaN</td>\n",
166 |        "      <td>Unmarried</td>\n",
167 |        "      <td>Black</td>\n",
168 |        "      <td>Female</td>\n",
169 |        "      <td>0</td>\n",
170 |        "      <td>0</td>\n",
171 |        "      <td>32</td>\n",
172 |        "      <td>United-States</td>\n",
173 |        "      <td>&lt;50k</td>\n",
174 |        "    </tr>\n",
175 |        "    <tr>\n",
176 |        "      <th>3</th>\n",
177 |        "      <td>38</td>\n",
178 |        "      <td>Self-emp-inc</td>\n",
179 |        "      <td>112847</td>\n",
180 |        "      <td>Prof-school</td>\n",
181 |        "      <td>15.0</td>\n",
182 |        "      <td>Married-civ-spouse</td>\n",
183 |        "      <td>Prof-specialty</td>\n",
184 |        "      <td>Husband</td>\n",
185 |        "      <td>Asian-Pac-Islander</td>\n",
186 |        "      <td>Male</td>\n",
187 |        "      <td>0</td>\n",
188 |        "      <td>0</td>\n",
189 |        "      <td>40</td>\n",
190 |        "      <td>United-States</td>\n",
191 |        "      <td>&gt;=50k</td>\n",
192 |        "    </tr>\n",
193 |        "    <tr>\n",
194 |        "      <th>4</th>\n",
195 |        "      <td>42</td>\n",
196 |        "      <td>Self-emp-not-inc</td>\n",
197 |        "      <td>82297</td>\n",
198 |        "      <td>7th-8th</td>\n",
199 |        "      <td>NaN</td>\n",
200 |        "      <td>Married-civ-spouse</td>\n",
201 |        "      <td>Other-service</td>\n",
202 |        "      <td>Wife</td>\n",
203 |        "      <td>Black</td>\n",
204 |        "      <td>Female</td>\n",
205 |        "      <td>0</td>\n",
206 |        "      <td>0</td>\n",
207 |        "      <td>50</td>\n",
208 |        "      <td>United-States</td>\n",
209 |        "      <td>&lt;50k</td>\n",
210 |        "    </tr>\n",
211 |        "  </tbody>\n",
212 |        "</table>\n",
213 |        "</div>"
214 |       ],
215 |       "text/plain": [
216 |        "   age          workclass  fnlwgt     education  education-num  \\\n",
217 |        "0   49            Private  101320    Assoc-acdm           12.0   \n",
218 |        "1   44            Private  236746       Masters           14.0   \n",
219 |        "2   38            Private   96185       HS-grad            NaN   \n",
220 |        "3   38       Self-emp-inc  112847   Prof-school           15.0   \n",
221 |        "4   42   Self-emp-not-inc   82297       7th-8th            NaN   \n",
222 |        "\n",
223 |        "        marital-status        occupation    relationship                 race  \\\n",
224 |        "0   Married-civ-spouse               NaN            Wife                White   \n",
225 |        "1             Divorced   Exec-managerial   Not-in-family                White   \n",
226 |        "2             Divorced               NaN       Unmarried                Black   \n",
227 |        "3   Married-civ-spouse    Prof-specialty         Husband   Asian-Pac-Islander   \n",
228 |        "4   Married-civ-spouse     Other-service            Wife                Black   \n",
229 |        "\n",
230 |        "       sex  capital-gain  capital-loss  hours-per-week  native-country salary  \n",
231 |        "0   Female             0          1902              40   United-States  >=50k  \n",
232 |        "1     Male         10520             0              45   United-States  >=50k  \n",
233 |        "2   Female             0             0              32   United-States   <50k  \n",
234 |        "3     Male             0             0              40   United-States  >=50k  \n",
235 |        "4   Female             0             0              50   United-States   <50k  "
236 |       ]
237 |      },
238 |      "execution_count": 6,
239 |      "metadata": {},
240 |      "output_type": "execute_result"
241 |     }
242 |    ],
243 |    "source": [
244 |     "# examine the first few records in the dataframe\n",
245 |     "df.head()"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 7,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/plain": [
256 |        "(32561, 15)"
257 |       ]
258 |      },
259 |      "execution_count": 7,
260 |      "metadata": {},
261 |      "output_type": "execute_result"
262 |     }
263 |    ],
264 |    "source": [
265 |     "# get the number of records in the dataset\n",
266 |     "df.shape"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 8,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "data": {
276 |       "text/plain": [
277 |        "age                  73\n",
278 |        "workclass             9\n",
279 |        "fnlwgt            21648\n",
280 |        "education            16\n",
281 |        "education-num        16\n",
282 |        "marital-status        7\n",
283 |        "occupation           15\n",
284 |        "relationship          6\n",
285 |        "race                  5\n",
286 |        "sex                   2\n",
287 |        "capital-gain        119\n",
288 |        "capital-loss         92\n",
289 |        "hours-per-week       94\n",
290 |        "native-country       42\n",
291 |        "salary                2\n",
292 |        "dtype: int64"
293 |       ]
294 |      },
295 |      "execution_count": 8,
296 |      "metadata": {},
297 |      "output_type": "execute_result"
298 |     }
299 |    ],
300 |    "source": [
301 |     "# get the count of unique values in each column of the dataset\n",
302 |     "df.nunique()"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 9,
308 |    "metadata": {},
309 |    "outputs": [
310 |     {
311 |      "data": {
312 |       "text/plain": [
313 |        "age                 0\n",
314 |        "workclass           0\n",
315 |        "fnlwgt              0\n",
316 |        "education           0\n",
317 |        "education-num     487\n",
318 |        "marital-status      0\n",
319 |        "occupation        512\n",
320 |        "relationship        0\n",
321 |        "race                0\n",
322 |        "sex                 0\n",
323 |        "capital-gain        0\n",
324 |        "capital-loss        0\n",
325 |        "hours-per-week      0\n",
326 |        "native-country      0\n",
327 |        "salary              0\n",
328 |        "dtype: int64"
329 |       ]
330 |      },
331 |      "execution_count": 9,
332 |      "metadata": {},
333 |      "output_type": "execute_result"
334 |     }
335 |    ],
336 |    "source": [
337 |     "# count the number of missing values in each column of the dataset\n",
338 |     "df.isnull().sum()"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 10,
344 |    "metadata": {},
345 |    "outputs": [
346 |     {
347 |      "data": {
348 |       "text/html": [
349 |        "<div>\n",
350 |        "<style scoped>\n",
351 |        "    .dataframe tbody tr th:only-of-type {\n",
352 |        "        vertical-align: middle;\n",
353 |        "    }\n",
354 |        "\n",
355 |        "    .dataframe tbody tr th {\n",
356 |        "        vertical-align: top;\n",
357 |        "    }\n",
358 |        "\n",
359 |        "    .dataframe thead th {\n",
360 |        "        text-align: right;\n",
361 |        "    }\n",
362 |        "</style>\n",
363 |        "<table border=\"1\" class=\"dataframe\">\n",
364 |        "  <thead>\n",
365 |        "    <tr style=\"text-align: right;\">\n",
366 |        "      <th></th>\n",
367 |        "      <th>age</th>\n",
368 |        "      <th>workclass</th>\n",
369 |        "      <th>fnlwgt</th>\n",
370 |        "      <th>education</th>\n",
371 |        "      <th>education-num</th>\n",
372 |        "      <th>marital-status</th>\n",
373 |        "      <th>occupation</th>\n",
374 |        "      <th>relationship</th>\n",
375 |        "      <th>race</th>\n",
376 |        "      <th>sex</th>\n",
377 |        "      <th>capital-gain</th>\n",
378 |        "      <th>capital-loss</th>\n",
379 |        "      <th>hours-per-week</th>\n",
380 |        "      <th>native-country</th>\n",
381 |        "      <th>salary</th>\n",
382 |        "    </tr>\n",
383 |        "  </thead>\n",
384 |        "  <tbody>\n",
385 |        "    <tr>\n",
386 |        "      <th>2</th>\n",
387 |        "      <td>38</td>\n",
388 |        "      <td>Private</td>\n",
389 |        "      <td>96185</td>\n",
390 |        "      <td>HS-grad</td>\n",
391 |        "      <td>NaN</td>\n",
392 |        "      <td>Divorced</td>\n",
393 |        "      <td>NaN</td>\n",
394 |        "      <td>Unmarried</td>\n",
395 |        "      <td>Black</td>\n",
396 |        "      <td>Female</td>\n",
397 |        "      <td>0</td>\n",
398 |        "      <td>0</td>\n",
399 |        "      <td>32</td>\n",
400 |        "      <td>United-States</td>\n",
401 |        "      <td>&lt;50k</td>\n",
402 |        "    </tr>\n",
403 |        "    <tr>\n",
404 |        "      <th>3</th>\n",
405 |        "      <td>38</td>\n",
406 |        "      <td>Self-emp-inc</td>\n",
407 |        "      <td>112847</td>\n",
408 |        "      <td>Prof-school</td>\n",
409 |        "      <td>15.0</td>\n",
410 |        "      <td>Married-civ-spouse</td>\n",
411 |        "      <td>Prof-specialty</td>\n",
412 |        "      <td>Husband</td>\n",
413 |        "      <td>Asian-Pac-Islander</td>\n",
414 |        "      <td>Male</td>\n",
415 |        "      <td>0</td>\n",
416 |        "      <td>0</td>\n",
417 |        "      <td>40</td>\n",
418 |        "      <td>United-States</td>\n",
419 |        "      <td>&gt;=50k</td>\n",
420 |        "    </tr>\n",
421 |        "    <tr>\n",
422 |        "      <th>5</th>\n",
423 |        "      <td>20</td>\n",
424 |        "      <td>Private</td>\n",
425 |        "      <td>63210</td>\n",
426 |        "      <td>HS-grad</td>\n",
427 |        "      <td>9.0</td>\n",
428 |        "      <td>Never-married</td>\n",
429 |        "      <td>Handlers-cleaners</td>\n",
430 |        "      <td>Own-child</td>\n",
431 |        "      <td>White</td>\n",
432 |        "      <td>Male</td>\n",
433 |        "      <td>0</td>\n",
434 |        "      <td>0</td>\n",
435 |        "      <td>15</td>\n",
436 |        "      <td>United-States</td>\n",
437 |        "      <td>&lt;50k</td>\n",
438 |        "    </tr>\n",
439 |        "    <tr>\n",
440 |        "      <th>7</th>\n",
441 |        "      <td>37</td>\n",
442 |        "      <td>Private</td>\n",
443 |        "      <td>138940</td>\n",
444 |        "      <td>11th</td>\n",
445 |        "      <td>7.0</td>\n",
446 |        "      <td>Married-civ-spouse</td>\n",
447 |        "      <td>NaN</td>\n",
448 |        "      <td>Husband</td>\n",
449 |        "      <td>White</td>\n",
450 |        "      <td>Male</td>\n",
451 |        "      <td>0</td>\n",
452 |        "      <td>0</td>\n",
453 |        "      <td>40</td>\n",
454 |        "      <td>United-States</td>\n",
455 |        "      <td>&lt;50k</td>\n",
456 |        "    </tr>\n",
457 |        "    <tr>\n",
458 |        "      <th>9</th>\n",
459 |        "      <td>36</td>\n",
460 |        "      <td>Self-emp-inc</td>\n",
461 |        "      <td>216711</td>\n",
462 |        "      <td>HS-grad</td>\n",
463 |        "      <td>NaN</td>\n",
464 |        "      <td>Married-civ-spouse</td>\n",
465 |        "      <td>NaN</td>\n",
466 |        "      <td>Husband</td>\n",
467 |        "      <td>White</td>\n",
468 |        "      <td>Male</td>\n",
469 |        "      <td>99999</td>\n",
470 |        "      <td>0</td>\n",
471 |        "      <td>50</td>\n",
472 |        "      <td>?</td>\n",
473 |        "      <td>&gt;=50k</td>\n",
474 |        "    </tr>\n",
475 |        "  </tbody>\n",
476 |        "</table>\n",
477 |        "</div>"
478 |       ],
479 |       "text/plain": [
480 |        "   age      workclass  fnlwgt     education  education-num  \\\n",
481 |        "2   38        Private   96185       HS-grad            NaN   \n",
482 |        "3   38   Self-emp-inc  112847   Prof-school           15.0   \n",
483 |        "5   20        Private   63210       HS-grad            9.0   \n",
484 |        "7   37        Private  138940          11th            7.0   \n",
485 |        "9   36   Self-emp-inc  216711       HS-grad            NaN   \n",
486 |        "\n",
487 |        "        marital-status          occupation relationship                 race  \\\n",
488 |        "2             Divorced                 NaN    Unmarried                Black   \n",
489 |        "3   Married-civ-spouse      Prof-specialty      Husband   Asian-Pac-Islander   \n",
490 |        "5        Never-married   Handlers-cleaners    Own-child                White   \n",
491 |        "7   Married-civ-spouse                 NaN      Husband                White   \n",
492 |        "9   Married-civ-spouse                 NaN      Husband                White   \n",
493 |        "\n",
494 |        "       sex  capital-gain  capital-loss  hours-per-week  native-country salary  \n",
495 |        "2   Female             0             0              32   United-States   <50k  \n",
496 |        "3     Male             0             0              40   United-States  >=50k  \n",
497 |        "5     Male             0             0              15   United-States   <50k  \n",
498 |        "7     Male             0             0              40   United-States   <50k  \n",
499 |        "9     Male         99999             0              50               ?  >=50k  "
500 |       ]
501 |      },
502 |      "execution_count": 10,
503 |      "metadata": {},
504 |      "output_type": "execute_result"
505 |     }
506 |    ],
507 |    "source": [
508 |     "# get the subset of the dataset where age <= 40\n",
509 |     "# streetcarjan2014[streetcarjan2014.Location == \"King and Shaw\"].Route\n",
510 |     "df_young = df[df.age <= 40]\n",
511 |     "df_young.head()"
512 |    ]
513 |   }
514 |  ],
515 |  "metadata": {
516 |   "kernelspec": {
517 |    "display_name": "Python 3",
518 |    "language": "python",
519 |    "name": "python3"
520 |   },
521 |   "language_info": {
522 |    "codemirror_mode": {
523 |     "name": "ipython",
524 |     "version": 3
525 |    },
526 |    "file_extension": ".py",
527 |    "mimetype": "text/x-python",
528 |    "name": "python",
529 |    "nbconvert_exporter": "python",
530 |    "pygments_lexer": "ipython3",
531 |    "version": "3.8.6"
532 |   }
533 |  },
534 |  "nbformat": 4,
535 |  "nbformat_minor": 4
536 | }
537 | 


--------------------------------------------------------------------------------
/ch3/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | *.html
4 | 


--------------------------------------------------------------------------------
/ch3/README.md:
--------------------------------------------------------------------------------
1 | **Chapter 3 readme**
2 | 
3 | This folder contains the code samples related to chapter 3 "Training models with tabular data"
4 | 
5 | - **training_with_tabular_datasets.ipynb**: notebook containing end-to-end example of training a fastai deep learning model with tabular data 
6 | 
7 | 


--------------------------------------------------------------------------------
/ch3/accessing_non_curated_datasets.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Accessing non-curated tabular datasets\n",
  8 |     "Example of making a dataset that is not curated by fastai available for training a fastai deep learning application.\n",
  9 |     "\n",
 10 |     "In this notebook we'll go through the steps in ingest the Kaggle house prices dataset: https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data\n",
 11 |     "\n"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 19,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "Requirement already satisfied: fastdownload in /opt/conda/envs/fastai/lib/python3.8/site-packages (0.0.5)\n",
 24 |       "Requirement already satisfied: fastprogress in /opt/conda/envs/fastai/lib/python3.8/site-packages (from fastdownload) (1.0.0)\n",
 25 |       "Requirement already satisfied: fastcore>=1.3.26 in /opt/conda/envs/fastai/lib/python3.8/site-packages (from fastdownload) (1.3.26)\n",
 26 |       "Requirement already satisfied: numpy in /opt/conda/envs/fastai/lib/python3.8/site-packages (from fastprogress->fastdownload) (1.19.4)\n",
 27 |       "Requirement already satisfied: packaging in /opt/conda/envs/fastai/lib/python3.8/site-packages (from fastcore>=1.3.26->fastdownload) (20.4)\n",
 28 |       "Requirement already satisfied: pip in /opt/conda/envs/fastai/lib/python3.8/site-packages (from fastcore>=1.3.26->fastdownload) (20.2.4)\n",
 29 |       "Requirement already satisfied: six in /opt/conda/envs/fastai/lib/python3.8/site-packages (from packaging->fastcore>=1.3.26->fastdownload) (1.15.0)\n",
 30 |       "Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/envs/fastai/lib/python3.8/site-packages (from packaging->fastcore>=1.3.26->fastdownload) (2.4.7)\n"
 31 |      ]
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "# imports for notebook boilerplate\n",
 36 |     "!pip install -Uqq fastbook\n",
 37 |     "!pip install fastdownload\n",
 38 |     "import fastbook\n",
 39 |     "from fastbook import *\n",
 40 |     "from fastai.tabular.all import *\n",
 41 |     "from fastdownload import FastDownload\n"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 20,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# imports required for this notebook\n",
 51 |     "from kaggle import api"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 21,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "# set up the notebook for fast.ai\n",
 61 |     "fastbook.setup_book()"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "# Accessing a Kaggle dataset\n",
 69 |     "\n",
 70 |     "The following cells assume that you have completed the following steps:\n",
 71 |     "- Created a Kaggle ID, if you don't already have one: https://www.kaggle.com/account/login\n",
 72 |     "- Log into your Kaggle ID and go through the steps to download your Kaggle API key file: kaggle.json\n",
 73 |     "- Uploaded your kaggle.json file to the directory /root/.kaggle in your Gradient instance\n"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 22,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "# copy the contents of your kaggle.json file into creds\n",
 83 |     "creds = '{\"username\":<YOUR ID>,\"key\":<YOUR KEY>}'"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 23,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "# define the kaggle credentials path\n",
 93 |     "cred_path = Path('~/.kaggle/kaggle.json').expanduser()\n"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 24,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "# define a target path for this house price dataset\n",
103 |     "path = URLs.path('house_price')\n"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 25,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "# need an explicit definition of file_extract\n",
113 |     "def file_extract(fname, dest=None):\n",
114 |     "    \"Extract `fname` to `dest` using `tarfile` or `zipfile`.\"\n",
115 |     "    if dest is None: dest = Path(fname).parent\n",
116 |     "    fname = str(fname)\n",
117 |     "    if   fname.endswith('gz'):  tarfile.open(fname, 'r:gz').extractall(dest)\n",
118 |     "    elif fname.endswith('zip'): zipfile.ZipFile(fname     ).extractall(dest)\n",
119 |     "    else: raise Exception(f'Unrecognized archive: {fname}')"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 26,
125 |    "metadata": {},
126 |    "outputs": [
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "path does not exist\n"
132 |      ]
133 |     },
134 |     {
135 |      "name": "stderr",
136 |      "output_type": "stream",
137 |      "text": [
138 |       "100%|██████████| 199k/199k [00:00<00:00, 5.69MB/s]"
139 |      ]
140 |     },
141 |     {
142 |      "name": "stdout",
143 |      "output_type": "stream",
144 |      "text": [
145 |       "Downloading house-prices-advanced-regression-techniques.zip to /root/.fastai/archive/house_price\n",
146 |       "\n"
147 |      ]
148 |     },
149 |     {
150 |      "name": "stderr",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "\n"
154 |      ]
155 |     },
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "(#4) [Path('/root/.fastai/archive/house_price/train.csv'),Path('/root/.fastai/archive/house_price/test.csv'),Path('/root/.fastai/archive/house_price/sample_submission.csv'),Path('/root/.fastai/archive/house_price/data_description.txt')]"
160 |       ]
161 |      },
162 |      "execution_count": 26,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": [
168 |     "# create the target path for the dataset and copy it into /storage/archive on Gradient\n",
169 |     "if not path.exists():\n",
170 |     "    print('path does not exist')\n",
171 |     "    path.mkdir()\n",
172 |     "    api.competition_download_cli('house-prices-advanced-regression-techniques', path=path)\n",
173 |     "    #d = FastDownload()\n",
174 |     "    #d.get(path/'house-prices-advanced-regression-techniques.zip')\n",
175 |     "    file_extract(path/'house-prices-advanced-regression-techniques.zip')\n",
176 |     "    \n",
177 |     "    \n",
178 |     "\n",
179 |     "\n",
180 |     "# list the directory structure of the newly created dataset\n",
181 |     "path.ls(file_type='text')\n",
182 |     "\n",
183 |     "\n"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "# Ingest and explore the dataset\n",
191 |     "In this dataset the train and test subsets are in separate CSV files. Ingest each of these and explore them"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "# ingest the dataset into a Pandas dataframe\n",
201 |     "df_train = pd.read_csv(path/'train.csv')"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "df_train.head()"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "df_train.shape"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "df_test = pd.read_csv(path/'test.csv')\n",
229 |     "df_test.head()"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": [
238 |     "# note the shape of test - why does it have one less column than the train dataset?\n",
239 |     "df_test.shape"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "# Set target\n",
247 |     "adjust target column for binary classification"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "# function to replace target values with value indicating whether the input is over or under the mean\n",
257 |     "def under_over(x,mean_x):\n",
258 |     "    if (x <= mean_x):\n",
259 |     "        returner = '0'\n",
260 |     "    else:\n",
261 |     "        returner = '1'\n",
262 |     "    return(returner)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": null,
268 |    "metadata": {},
269 |    "outputs": [],
270 |    "source": [
271 |     "# get the average of the values in the SalePrice column\n",
272 |     "mean_sp = int(df_train['SalePrice'].mean())\n",
273 |     "# use the under_over() function to replace the values in the SalePrice column with indicators whether the value was over or under\n",
274 |     "# the average for the SalePrice column\n",
275 |     "df_train['SalePrice'] = df_train['SalePrice'].apply(lambda x: under_over(x,mean_sp))\n",
276 |     "df_train.head()"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": null,
282 |    "metadata": {},
283 |    "outputs": [],
284 |    "source": [
285 |     "mean_sp"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": [
294 |     "df_train['SalePrice'].value_counts()"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "markdown",
299 |    "metadata": {},
300 |    "source": [
301 |     "# Define target, categorical and continuous columns"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "metadata": {},
308 |    "outputs": [],
309 |    "source": [
310 |     "# define the dependent variable (y value)\n",
311 |     "dep_var = 'SalePrice'\n",
312 |     "# define columns that are continuous / categorical\n",
313 |     "cont,cat = cont_cat_split(df_train, 1, dep_var=dep_var) "
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "print(\"len cont is \",len(cont))\n",
323 |     "print(\"len cat is \",len(cat))"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {},
330 |    "outputs": [],
331 |    "source": [
332 |     "df_train[cat].nunique()"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {},
339 |    "outputs": [],
340 |    "source": [
341 |     "df_test[cat].nunique()"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "markdown",
346 |    "metadata": {},
347 |    "source": [
348 |     "# Check for missing values"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": null,
354 |    "metadata": {},
355 |    "outputs": [],
356 |    "source": [
357 |     "# create a dataframe that has rows for each column in df_train with missing values and \n",
358 |     "# columns for the count and ratio of missing values\n",
359 |     "count = df_train.isna().sum()\n",
360 |     "df_train_missing = (pd.concat([count.rename('missing_count'),\n",
361 |     "                     count.div(len(df_train))\n",
362 |     "                          .rename('missing_ratio')],axis = 1)\n",
363 |     "             .loc[count.ne(0)])"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "metadata": {},
370 |    "outputs": [],
371 |    "source": [
372 |     "df_train_missing.head()"
373 |    ]
374 |   },
375 |   {
376 |    "cell_type": "code",
377 |    "execution_count": null,
378 |    "metadata": {},
379 |    "outputs": [],
380 |    "source": [
381 |     "df_train_missing.shape"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": null,
387 |    "metadata": {},
388 |    "outputs": [],
389 |    "source": [
390 |     "count2 = df_test.isna().sum()\n",
391 |     "df_test_missing = (pd.concat([count2.rename('missing_count'),\n",
392 |     "                     count2.div(len(df_test))\n",
393 |     "                          .rename('missing_ratio')],axis = 1)\n",
394 |     "             .loc[count2.ne(0)])"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "df_test_missing.head()"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": [
412 |     "# check to see missing value col count in test set\n",
413 |     "df_test_missing.shape"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "markdown",
418 |    "metadata": {},
419 |    "source": [
420 |     "# Replace missing values"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "metadata": {},
427 |    "outputs": [],
428 |    "source": [
429 |     "\n",
430 |     "# for categorical columns, replace missing values with the most column categorical value in that column\n",
431 |     "df_train[cat] = df_train[cat].fillna(df_train[cat].mode().iloc[0])\n",
432 |     "df_test[cat] = df_test[cat].fillna(df_test[cat].mode().iloc[0])\n",
433 |     "# for continuous columns, replace missing values with 0\n",
434 |     "df_train[cont] = df_train[cont].fillna(0.0)\n",
435 |     "df_test[cont] = df_test[cont].fillna(0.0)\n"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "markdown",
440 |    "metadata": {},
441 |    "source": [
442 |     "# Confirm missing values dealt with"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": null,
448 |    "metadata": {},
449 |    "outputs": [],
450 |    "source": [
451 |     "# check for missing values in df_train\n",
452 |     "count = df_train.isna().sum()\n",
453 |     "df_train_missing = (pd.concat([count.rename('missing_count'),\n",
454 |     "                     count.div(len(df_train))\n",
455 |     "                          .rename('missing_ratio')],axis = 1)\n",
456 |     "             .loc[count.ne(0)])"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "metadata": {},
463 |    "outputs": [],
464 |    "source": [
465 |     "df_train_missing"
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "code",
470 |    "execution_count": null,
471 |    "metadata": {},
472 |    "outputs": [],
473 |    "source": [
474 |     "# check for missing values in df_test\n",
475 |     "count = df_test.isna().sum()\n",
476 |     "df_test_missing = (pd.concat([count.rename('missing_count'),\n",
477 |     "                     count.div(len(df_test))\n",
478 |     "                          .rename('missing_ratio')],axis = 1)\n",
479 |     "             .loc[count.ne(0)])"
480 |    ]
481 |   },
482 |   {
483 |    "cell_type": "code",
484 |    "execution_count": null,
485 |    "metadata": {},
486 |    "outputs": [],
487 |    "source": [
488 |     "df_test_missing"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "markdown",
493 |    "metadata": {},
494 |    "source": [
495 |     "# define TabularDataLoaders"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "code",
500 |    "execution_count": null,
501 |    "metadata": {},
502 |    "outputs": [],
503 |    "source": [
504 |     "# define TabularDataLoaders object \n",
505 |     "# valid_idx: the indices to use for the validation set\n",
506 |     "# what happens when we try to run this without dealing with missing values first\n",
507 |     "procs = [Categorify, Normalize]\n",
508 |     "dls_house=TabularDataLoaders.from_df(\n",
509 |     "    df_train,path,procs= procs,\n",
510 |     "    cat_names= cat, cont_names = cont, y_names = dep_var, \n",
511 |     "    valid_idx=list(range((df_train.shape[0]-100),df_train.shape[0])), \n",
512 |     "    bs=64)\n",
513 |     "                       "
514 |    ]
515 |   },
516 |   {
517 |    "cell_type": "code",
518 |    "execution_count": null,
519 |    "metadata": {},
520 |    "outputs": [],
521 |    "source": [
522 |     "dls_house.valid.show_batch()"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "markdown",
527 |    "metadata": {},
528 |    "source": [
529 |     "# Define and train model"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {},
536 |    "outputs": [],
537 |    "source": [
538 |     "# define and fit the model\n",
539 |     "learn = tabular_learner(dls_house, layers=[200,100], metrics=accuracy)\n",
540 |     "learn.fit_one_cycle(5)"
541 |    ]
542 |   },
543 |   {
544 |    "cell_type": "markdown",
545 |    "metadata": {},
546 |    "source": [
547 |     "# Apply trained model to the test dataset"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "code",
552 |    "execution_count": null,
553 |    "metadata": {},
554 |    "outputs": [],
555 |    "source": [
556 |     "# apply model to the test set\n",
557 |     "# details of test_dl here: https://docs.fast.ai/tutorial.tabular\n",
558 |     "dl = learn.dls.test_dl(df_test)"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "code",
563 |    "execution_count": null,
564 |    "metadata": {},
565 |    "outputs": [],
566 |    "source": [
567 |     "learn.get_preds(dl=dl)\n"
568 |    ]
569 |   },
570 |   {
571 |    "cell_type": "code",
572 |    "execution_count": null,
573 |    "metadata": {},
574 |    "outputs": [],
575 |    "source": [
576 |     "learn.show_results()"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "markdown",
581 |    "metadata": {},
582 |    "source": [
583 |     "# Examine the structure of the trained model structure\n",
584 |     "Use the summary() function to see the structure of the trained model, including:\n",
585 |     "- the layers that make up the model\n",
586 |     "- total parameters\n",
587 |     "- loss function\n",
588 |     "- optimizer function\n",
589 |     "- callbacks"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "code",
594 |    "execution_count": null,
595 |    "metadata": {},
596 |    "outputs": [],
597 |    "source": [
598 |     "learn.summary()"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": null,
604 |    "metadata": {},
605 |    "outputs": [],
606 |    "source": []
607 |   }
608 |  ],
609 |  "metadata": {
610 |   "kernelspec": {
611 |    "display_name": "Python 3",
612 |    "language": "python",
613 |    "name": "python3"
614 |   },
615 |   "language_info": {
616 |    "codemirror_mode": {
617 |     "name": "ipython",
618 |     "version": 3
619 |    },
620 |    "file_extension": ".py",
621 |    "mimetype": "text/x-python",
622 |    "name": "python",
623 |    "nbconvert_exporter": "python",
624 |    "pygments_lexer": "ipython3",
625 |    "version": "3.8.6"
626 |   }
627 |  },
628 |  "nbformat": 4,
629 |  "nbformat_minor": 4
630 | }
631 | 


--------------------------------------------------------------------------------
/ch3/adult_sample_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Deep-Learning-with-fastai-Cookbook/e692fee0e7d8de184cb57deb222123c94483acd7/ch3/adult_sample_model.pkl


--------------------------------------------------------------------------------
/ch3/adult_sample_test.csv:
--------------------------------------------------------------------------------
  1 | age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country
  2 | 49, Private,101320, Assoc-acdm,12, Married-civ-spouse,, Wife, White, Female,0,1902,40, United-States
  3 | 44, Private,236746, Masters,14, Divorced, Exec-managerial, Not-in-family, White, Male,10520,0,45, United-States
  4 | 38, Private,96185, HS-grad,, Divorced,, Unmarried, Black, Female,0,0,32, United-States
  5 | 38, Self-emp-inc,112847, Prof-school,15, Married-civ-spouse, Prof-specialty, Husband, Asian-Pac-Islander, Male,0,0,40, United-States
  6 | 42, Self-emp-not-inc,82297, 7th-8th,, Married-civ-spouse, Other-service, Wife, Black, Female,0,0,50, United-States
  7 | 20, Private,63210, HS-grad,9, Never-married, Handlers-cleaners, Own-child, White, Male,0,0,15, United-States
  8 | 49, Private,44434, Some-college,10, Divorced,, Other-relative, White, Male,0,0,35, United-States
  9 | 37, Private,138940, 11th,7, Married-civ-spouse,, Husband, White, Male,0,0,40, United-States
 10 | 46, Private,328216, HS-grad,9, Married-civ-spouse, Craft-repair, Husband, White, Male,0,0,40, United-States
 11 | 36, Self-emp-inc,216711, HS-grad,, Married-civ-spouse,, Husband, White, Male,99999,0,50, ?
 12 | 23, Private,529223, Bachelors,13, Never-married,, Own-child, Black, Male,0,0,10, United-States
 13 | 18, Private,216284, 11th,, Never-married, Adm-clerical, Own-child, White, Female,0,0,20, United-States
 14 | 30, Private,151989, Assoc-voc,, Married-civ-spouse,, Wife, White, Female,0,0,40, United-States
 15 | 30, Private,55291, Bachelors,, Married-civ-spouse,, Husband, White, Male,0,0,40, United-States
 16 | 43, Private,84661, Assoc-voc,, Married-civ-spouse, Sales, Husband, White, Male,0,0,45, United-States
 17 | 51, Private,284329, HS-grad,9, Widowed,, Unmarried, White, Male,0,0,40, United-States
 18 | 38, Private,170174, 10th,, Married-civ-spouse, Machine-op-inspct, Husband, White, Male,0,0,40, United-States
 19 | 35, Private,261293, Masters,14, Never-married,, Not-in-family, White, Male,0,0,60, United-States
 20 | 56, State-gov,274111, Masters,14, Divorced,, Not-in-family, White, Male,0,1669,40, United-States
 21 | 45, Private,267967, Bachelors,, Married-civ-spouse, Prof-specialty, Husband, White, Male,0,0,45, United-States
 22 | 40, Private,188942, Some-college,, Married-civ-spouse,, Wife, Black, Female,0,0,40, Puerto-Rico
 23 | 26, Private,746432, HS-grad,9, Never-married, Handlers-cleaners, Own-child, Black, Male,0,0,48, United-States
 24 | 46, Private,117605, 9th,, Divorced, Sales, Not-in-family, White, Male,0,0,35, United-States
 25 | 29, Private,1268339, HS-grad,, Married-spouse-absent,, Own-child, Black, Male,0,0,40, United-States
 26 | 49, Private,247294, HS-grad,9, Married-civ-spouse, Craft-repair, Husband, White, Male,0,0,45, United-States
 27 | 55, Self-emp-inc,222615, Masters,14, Married-civ-spouse, Exec-managerial, Husband, White, Male,0,0,60, United-States
 28 | 47, Self-emp-not-inc,213745, Some-college,, Divorced,, Unmarried, White, Female,0,0,45, United-States
 29 | 41, Self-emp-inc,151089, Some-college,, Married-civ-spouse,, Husband, White, Male,0,0,50, United-States
 30 | 27, Private,153078, Prof-school,, Never-married, Prof-specialty, Own-child, Asian-Pac-Islander, Male,0,0,40, United-States
 31 | 42, Private,70055, 11th,7, Married-civ-spouse,, Husband, White, Male,0,0,45, United-States
 32 | 46, Private,353824, Assoc-acdm,, Never-married, Adm-clerical, Not-in-family, White, Male,0,0,40, United-States
 33 | 18, Private,91525, HS-grad,9, Never-married, Sales, Other-relative, White, Male,0,0,25, United-States
 34 | 45, Private,242391, Bachelors,13, Married-civ-spouse, Exec-managerial, Husband, White, Male,0,0,40, United-States
 35 | 41, Private,140590, Some-college,, Never-married, Sales, Not-in-family, Black, Male,0,0,33, United-States
 36 | 47, Federal-gov,102771, Doctorate,, Married-civ-spouse,, Husband, White, Male,0,0,40, United-States
 37 | 35, Local-gov,107233, HS-grad,9, Never-married,, Unmarried, Amer-Indian-Eskimo, Male,0,0,55, United-States
 38 | 44, Local-gov,150171, HS-grad,, Divorced, Adm-clerical, Unmarried, White, Female,0,0,40, United-States
 39 | 29, Local-gov,419722, Assoc-acdm,, Never-married,, Not-in-family, White, Male,3674,0,40, United-States
 40 | 36, Self-emp-not-inc,239415, HS-grad,, Married-civ-spouse,, Husband, White, Male,0,0,35, United-States
 41 | 39, Private,150125, Assoc-acdm,, Divorced,, Unmarried, Black, Female,0,0,40, United-States
 42 | 23, Private,222925, HS-grad,9, Married-civ-spouse,, Own-child, White, Female,2105,0,40, United-States
 43 | 19, Private,263338, HS-grad,9, Never-married, Handlers-cleaners, Own-child, White, Male,0,0,20, United-States
 44 | 29, Private,157262, Masters,, Never-married, Exec-managerial, Not-in-family, White, Female,0,0,45, United-States
 45 | 24, Private,162593, Bachelors,13, Never-married, Adm-clerical, Not-in-family, Asian-Pac-Islander, Female,0,0,40, United-States
 46 | 46, Local-gov,124071, Masters,, Divorced, Exec-managerial, Unmarried, White, Female,0,0,65, United-States
 47 | 30, Private,216811, HS-grad,, Married-civ-spouse,, Husband, Amer-Indian-Eskimo, Male,0,0,40, United-States
 48 | 34, Private,265807, HS-grad,, Married-civ-spouse,, Husband, White, Male,0,2051,55, United-States
 49 | 53, Private,231919, HS-grad,, Married-civ-spouse,, Husband, White, Male,0,0,45, United-States
 50 | 29, Private,146719, HS-grad,9, Never-married, Transport-moving, Not-in-family, White, Female,0,0,45, United-States
 51 | 49, Local-gov,78859, Masters,14, Widowed, Prof-specialty, Unmarried, White, Female,0,323,20, United-States
 52 | 32, Local-gov,230912, Masters,14, Never-married,, Not-in-family, White, Female,4865,0,40, United-States
 53 | 42, Private,36271, Bachelors,, Married-civ-spouse,, Husband, White, Male,0,0,40, United-States
 54 | 20, Private,56322, Some-college,, Never-married,, Own-child, White, Male,2176,0,25, United-States
 55 | 24, Private,111450, HS-grad,9, Never-married, Transport-moving, Unmarried, Black, Male,0,0,40, United-States
 56 | 38, Self-emp-not-inc,133299, Assoc-acdm,, Married-civ-spouse,, Husband, White, Male,0,0,40, United-States
 57 | 36, Private,156084, HS-grad,, Never-married, Sales, Own-child, White, Female,0,0,40, United-States
 58 | 42, Self-emp-not-inc,344920, Some-college,10, Married-civ-spouse,, Wife, White, Female,0,0,50, United-States
 59 | 58, Private,218281, Bachelors,13, Married-civ-spouse, Adm-clerical, Husband, White, Male,0,0,40, Mexico
 60 | 46, Self-emp-not-inc,245724, Some-college,, Divorced,, Not-in-family, White, Male,0,0,50, United-States
 61 | 34, Federal-gov,191342, Bachelors,, Married-civ-spouse,, Husband, Asian-Pac-Islander, Male,0,0,38, United-States
 62 | 53, Self-emp-not-inc,263925, Bachelors,13, Married-civ-spouse,, Husband, White, Male,0,0,40, Canada
 63 | 66, ?,177351, Bachelors,, Married-civ-spouse, ?, Husband, White, Male,0,2174,40, United-States
 64 | 32, Self-emp-not-inc,65278, Doctorate,16, Married-civ-spouse,, Husband, White, Male,0,0,40, United-States
 65 | 25, Private,190350, 10th,6, Married-civ-spouse, Other-service, Own-child, White, Female,0,0,35, United-States
 66 | 47, Private,163814, 10th,, Married-civ-spouse, Craft-repair, Husband, White, Male,0,0,40, United-States
 67 | 61, Private,183735, Some-college,, Married-civ-spouse, Sales, Husband, White, Male,0,0,45, United-States
 68 | 54, Self-emp-inc,206964, Some-college,10, Married-civ-spouse, Sales, Husband, White, Male,0,1977,40, United-States
 69 | 70, Private,102610, Some-college,10, Divorced, Other-service, Not-in-family, White, Male,0,0,80, United-States
 70 | 20, Private,146706, Some-college,, Married-civ-spouse,, Other-relative, White, Female,0,0,30, United-States
 71 | 23, Local-gov,129232, HS-grad,9, Never-married, Adm-clerical, Own-child, White, Male,0,0,40, United-States
 72 | 61, Private,99784, Masters,, Widowed,, Not-in-family, White, Female,0,0,40, United-States
 73 | 31, Private,123397, HS-grad,9, Married-civ-spouse, Transport-moving, Wife, White, Female,5178,0,35, United-States
 74 | 42, Private,322385, Assoc-voc,11, Married-civ-spouse, Machine-op-inspct, Husband, White, Male,2407,0,40, United-States
 75 | 31, Private,379798, Some-college,10, Married-civ-spouse,, Husband, White, Male,0,0,40, United-States
 76 | 28, Private,37933, HS-grad,, Never-married, Other-service, Not-in-family, Black, Female,0,0,48, United-States
 77 | 38, Self-emp-not-inc,65716, Assoc-voc,, Divorced,, Unmarried, White, Female,0,0,40, United-States
 78 | 31, Private,194901, HS-grad,9, Married-civ-spouse,, Husband, White, Male,0,0,50, United-States
 79 | 19, Private,168601, 11th,7, Never-married,, Other-relative, White, Male,0,0,30, United-States
 80 | 51, Private,282744, Bachelors,13, Married-civ-spouse,, Husband, White, Male,0,0,40, Canada
 81 | 41, Local-gov,183224, Masters,14, Married-civ-spouse, Prof-specialty, Wife, Asian-Pac-Islander, Female,0,0,40, Taiwan
 82 | 21, Private,29810, HS-grad,9, Never-married,, Not-in-family, White, Female,0,0,50, United-States
 83 | 25, Self-emp-not-inc,113948, Assoc-voc,, Married-civ-spouse,, Wife, White, Female,0,0,45, United-States
 84 | 17, Private,143331, 10th,6, Never-married,, Own-child, White, Male,0,0,15, United-States
 85 | 33, Private,201988, Masters,, Married-civ-spouse, Prof-specialty, Husband, White, Male,0,0,45, United-States
 86 | 29, Private,243660, HS-grad,, Married-civ-spouse,, Husband, White, Male,0,0,50, United-States
 87 | 33, Private,256062, Some-college,, Never-married, Adm-clerical, Unmarried, White, Female,0,0,35, Puerto-Rico
 88 | 27, Private,150025, HS-grad,, Never-married,, Not-in-family, White, Male,0,0,40, Puerto-Rico
 89 | 30, Private,251120, 7th-8th,, Never-married,, Not-in-family, White, Male,0,0,38, United-States
 90 | 28, Private,61435, HS-grad,, Married-civ-spouse, Craft-repair, Husband, White, Male,0,0,45, United-States
 91 | 22, ?,255969, 12th,8, Never-married, ?, Not-in-family, White, Male,0,0,48, United-States
 92 | 18, Private,170544, 11th,, Never-married,, Own-child, White, Male,0,0,20, United-States
 93 | 43, Private,117158, Some-college,, Married-civ-spouse,, Husband, White, Male,0,0,45, United-States
 94 | 43, Private,248094, HS-grad,9, Married-civ-spouse, Machine-op-inspct, Husband, White, Male,0,0,40, United-States
 95 | 36, Private,189674, Some-college,10, Separated,, Other-relative, Black, Female,0,0,40, United-States
 96 | 39, Private,269455, HS-grad,, Married-civ-spouse, Farming-fishing, Husband, White, Male,0,0,45, United-States
 97 | 48, Private,36503, Some-college,, Married-civ-spouse,, Husband, White, Male,0,0,40, United-States
 98 | 33, Private,50276, Bachelors,13, Never-married, Exec-managerial, Not-in-family, White, Male,0,0,40, United-States
 99 | 34, Local-gov,22641, HS-grad,9, Never-married,, Not-in-family, Amer-Indian-Eskimo, Male,0,0,40, United-States
100 | 39, Local-gov,43702, Assoc-voc,, Married-civ-spouse,, Wife, White, Female,0,0,37, United-States
101 | 28, Private,153869, 11th,, Married-civ-spouse, Craft-repair, Husband, White, Male,0,0,40, United-States
102 | 46, Private,405309, HS-grad,, Married-civ-spouse, Exec-managerial, Husband, White, Male,0,0,40, United-States
103 | 20, Private,133352, Some-college,10, Never-married, Adm-clerical, Own-child, Asian-Pac-Islander, Female,0,0,40, Vietnam
104 | 19, Private,148644, HS-grad,, Never-married,, Own-child, White, Female,0,0,40, United-States
105 | 50, Private,171852, Bachelors,, Separated,, Own-child, Other, Female,0,0,40, United-States
106 | 27, Private,203776, HS-grad,9, Married-civ-spouse,, Husband, White, Male,0,0,50, United-States
107 | 61, Self-emp-inc,148577, HS-grad,9, Married-civ-spouse,, Husband, White, Male,0,0,55, United-States
108 | 34, Self-emp-inc,62396, Bachelors,13, Never-married, Prof-specialty, Not-in-family, White, Female,0,0,62, United-States
109 | 31, Private,403468, Some-college,10, Separated, Other-service, Unmarried, White, Female,0,0,50, Mexico
110 | 17, Private,132636, 11th,, Never-married,, Own-child, White, Female,0,0,15, United-States
111 | 23, Local-gov,254127, Bachelors,13, Never-married,, Other-relative, White, Female,0,0,50, United-States
112 | 21, Private,163870, 10th,, Married-civ-spouse,, Husband, White, Male,3908,0,40, United-States
113 | 50, Self-emp-not-inc,124793, Some-college,10, Married-civ-spouse,, Husband, White, Male,0,0,55, United-States
114 | 32, Private,130304, HS-grad,, Married-civ-spouse,, Husband, White, Male,0,1485,48, United-States
115 | 37, Private,117381, Masters,14, Married-civ-spouse, Prof-specialty, Husband, White, Male,0,0,45, United-States
116 | 49, Self-emp-not-inc,249585, 11th,7, Married-civ-spouse, Exec-managerial, Husband, White, Male,0,0,65, United-States
117 | 56, Self-emp-inc,98418, Prof-school,, Married-civ-spouse, Prof-specialty, Husband, White, Male,99999,0,40, United-States
118 | 34, Private,208785, Assoc-acdm,12, Divorced,, Not-in-family, White, Male,0,0,50, United-States
119 | 53, Private,142411, Assoc-voc,, Divorced,, Unmarried, White, Female,0,0,40, United-States
120 | 71, ?,250263, Some-college,, Married-civ-spouse,, Husband, White, Male,3432,0,30, United-States
121 | 36, Private,199947, Some-college,, Divorced,, Own-child, White, Female,0,0,30, United-States
122 | 30, Private,426017, 11th,, Never-married, Other-service, Own-child, White, Female,0,0,19, United-States
123 | 46, Private,59767, Some-college,10, Divorced, Sales, Not-in-family, White, Male,0,0,50, United-States
124 | 27, Private,123984, Assoc-acdm,12, Never-married, Other-service, Not-in-family, Asian-Pac-Islander, Female,0,0,35, Philippines
125 | 31, Private,168906, Bachelors,, Divorced, Exec-managerial, Not-in-family, White, Female,0,0,50, United-States
126 | 52, Private,229983, Prof-school,15, Married-civ-spouse, Prof-specialty, Wife, White, Female,3103,0,30, United-States
127 | 30, ?,298577, HS-grad,9, Never-married,, Own-child, White, Male,0,0,30, United-States
128 | 50, Local-gov,145166, Masters,, Married-civ-spouse, Prof-specialty, Husband, White, Male,7298,0,40, United-States
129 | 47, Private,114459, Doctorate,16, Married-civ-spouse,, Husband, White, Male,0,0,50, United-States
130 | 


--------------------------------------------------------------------------------
/ch3/loading_saved_models_trained_with_tabular_datasets.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Loading a saved model trained with a tabular dataset in fast.ai\n",
  8 |     "- Example of loading a model trained with a tabular dataset in fast.ai.\n",
  9 |     "\n",
 10 |     "\n",
 11 |     "The example shown here is adapted from the paper by Howard and Gugger https://arxiv.org/pdf/2002.04688.pdf"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Prepare the notebook\n",
 19 |     "Import the required libraries and set up the notebook"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "# imports for notebook boilerplate\n",
 29 |     "!pip install -Uqq fastbook\n",
 30 |     "import fastbook\n",
 31 |     "from fastbook import *\n",
 32 |     "from fastai.tabular.all import *\n"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 2,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "# set up the notebook for fast.ai\n",
 42 |     "fastbook.setup_book()"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "# Load the saved, trained model\n",
 50 |     "- load the saved model\n",
 51 |     "- load the test dataset\n",
 52 |     "- apply the model to a row from the test dataset"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "# set the path depending on platform\n",
 62 |     "if 'google.colab' in str(get_ipython()):\n",
 63 |     "    temp_path = Path('/content/gdrive/MyDrive/fastai_cookbook/Deep-Learning-with-fastai-Cookbook/ch3/')\n",
 64 |     "else:\n",
 65 |     "    temp_path = Path(os.getcwd())"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 4,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# load the trained model\n",
 75 |     "learn = load_learner(os.path.join(temp_path,'adult_sample_model.pkl'))"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 5,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "# load a test dataset\n",
 85 |     "sample_URL = 'https://raw.githubusercontent.com/PacktPublishing/Deep-Learning-with-fastai-Cookbook/main/ch3/adult_sample_test.csv'\n",
 86 |     "df_test = pd.read_csv(sample_URL)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 6,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "(128, 14)"
 98 |       ]
 99 |      },
100 |      "execution_count": 6,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "df_test.shape"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 7,
112 |    "metadata": {},
113 |    "outputs": [
114 |     {
115 |      "data": {
116 |       "text/html": [
117 |        "<div>\n",
118 |        "<style scoped>\n",
119 |        "    .dataframe tbody tr th:only-of-type {\n",
120 |        "        vertical-align: middle;\n",
121 |        "    }\n",
122 |        "\n",
123 |        "    .dataframe tbody tr th {\n",
124 |        "        vertical-align: top;\n",
125 |        "    }\n",
126 |        "\n",
127 |        "    .dataframe thead th {\n",
128 |        "        text-align: right;\n",
129 |        "    }\n",
130 |        "</style>\n",
131 |        "<table border=\"1\" class=\"dataframe\">\n",
132 |        "  <thead>\n",
133 |        "    <tr style=\"text-align: right;\">\n",
134 |        "      <th></th>\n",
135 |        "      <th>age</th>\n",
136 |        "      <th>workclass</th>\n",
137 |        "      <th>fnlwgt</th>\n",
138 |        "      <th>education</th>\n",
139 |        "      <th>education-num</th>\n",
140 |        "      <th>marital-status</th>\n",
141 |        "      <th>occupation</th>\n",
142 |        "      <th>relationship</th>\n",
143 |        "      <th>race</th>\n",
144 |        "      <th>sex</th>\n",
145 |        "      <th>capital-gain</th>\n",
146 |        "      <th>capital-loss</th>\n",
147 |        "      <th>hours-per-week</th>\n",
148 |        "      <th>native-country</th>\n",
149 |        "    </tr>\n",
150 |        "  </thead>\n",
151 |        "  <tbody>\n",
152 |        "    <tr>\n",
153 |        "      <th>0</th>\n",
154 |        "      <td>49</td>\n",
155 |        "      <td>Private</td>\n",
156 |        "      <td>101320</td>\n",
157 |        "      <td>Assoc-acdm</td>\n",
158 |        "      <td>12.0</td>\n",
159 |        "      <td>Married-civ-spouse</td>\n",
160 |        "      <td>NaN</td>\n",
161 |        "      <td>Wife</td>\n",
162 |        "      <td>White</td>\n",
163 |        "      <td>Female</td>\n",
164 |        "      <td>0</td>\n",
165 |        "      <td>1902</td>\n",
166 |        "      <td>40</td>\n",
167 |        "      <td>United-States</td>\n",
168 |        "    </tr>\n",
169 |        "    <tr>\n",
170 |        "      <th>1</th>\n",
171 |        "      <td>44</td>\n",
172 |        "      <td>Private</td>\n",
173 |        "      <td>236746</td>\n",
174 |        "      <td>Masters</td>\n",
175 |        "      <td>14.0</td>\n",
176 |        "      <td>Divorced</td>\n",
177 |        "      <td>Exec-managerial</td>\n",
178 |        "      <td>Not-in-family</td>\n",
179 |        "      <td>White</td>\n",
180 |        "      <td>Male</td>\n",
181 |        "      <td>10520</td>\n",
182 |        "      <td>0</td>\n",
183 |        "      <td>45</td>\n",
184 |        "      <td>United-States</td>\n",
185 |        "    </tr>\n",
186 |        "    <tr>\n",
187 |        "      <th>2</th>\n",
188 |        "      <td>38</td>\n",
189 |        "      <td>Private</td>\n",
190 |        "      <td>96185</td>\n",
191 |        "      <td>HS-grad</td>\n",
192 |        "      <td>NaN</td>\n",
193 |        "      <td>Divorced</td>\n",
194 |        "      <td>NaN</td>\n",
195 |        "      <td>Unmarried</td>\n",
196 |        "      <td>Black</td>\n",
197 |        "      <td>Female</td>\n",
198 |        "      <td>0</td>\n",
199 |        "      <td>0</td>\n",
200 |        "      <td>32</td>\n",
201 |        "      <td>United-States</td>\n",
202 |        "    </tr>\n",
203 |        "    <tr>\n",
204 |        "      <th>3</th>\n",
205 |        "      <td>38</td>\n",
206 |        "      <td>Self-emp-inc</td>\n",
207 |        "      <td>112847</td>\n",
208 |        "      <td>Prof-school</td>\n",
209 |        "      <td>15.0</td>\n",
210 |        "      <td>Married-civ-spouse</td>\n",
211 |        "      <td>Prof-specialty</td>\n",
212 |        "      <td>Husband</td>\n",
213 |        "      <td>Asian-Pac-Islander</td>\n",
214 |        "      <td>Male</td>\n",
215 |        "      <td>0</td>\n",
216 |        "      <td>0</td>\n",
217 |        "      <td>40</td>\n",
218 |        "      <td>United-States</td>\n",
219 |        "    </tr>\n",
220 |        "    <tr>\n",
221 |        "      <th>4</th>\n",
222 |        "      <td>42</td>\n",
223 |        "      <td>Self-emp-not-inc</td>\n",
224 |        "      <td>82297</td>\n",
225 |        "      <td>7th-8th</td>\n",
226 |        "      <td>NaN</td>\n",
227 |        "      <td>Married-civ-spouse</td>\n",
228 |        "      <td>Other-service</td>\n",
229 |        "      <td>Wife</td>\n",
230 |        "      <td>Black</td>\n",
231 |        "      <td>Female</td>\n",
232 |        "      <td>0</td>\n",
233 |        "      <td>0</td>\n",
234 |        "      <td>50</td>\n",
235 |        "      <td>United-States</td>\n",
236 |        "    </tr>\n",
237 |        "  </tbody>\n",
238 |        "</table>\n",
239 |        "</div>"
240 |       ],
241 |       "text/plain": [
242 |        "   age          workclass  fnlwgt     education  education-num  \\\n",
243 |        "0   49            Private  101320    Assoc-acdm           12.0   \n",
244 |        "1   44            Private  236746       Masters           14.0   \n",
245 |        "2   38            Private   96185       HS-grad            NaN   \n",
246 |        "3   38       Self-emp-inc  112847   Prof-school           15.0   \n",
247 |        "4   42   Self-emp-not-inc   82297       7th-8th            NaN   \n",
248 |        "\n",
249 |        "        marital-status        occupation    relationship                 race  \\\n",
250 |        "0   Married-civ-spouse               NaN            Wife                White   \n",
251 |        "1             Divorced   Exec-managerial   Not-in-family                White   \n",
252 |        "2             Divorced               NaN       Unmarried                Black   \n",
253 |        "3   Married-civ-spouse    Prof-specialty         Husband   Asian-Pac-Islander   \n",
254 |        "4   Married-civ-spouse     Other-service            Wife                Black   \n",
255 |        "\n",
256 |        "       sex  capital-gain  capital-loss  hours-per-week  native-country  \n",
257 |        "0   Female             0          1902              40   United-States  \n",
258 |        "1     Male         10520             0              45   United-States  \n",
259 |        "2   Female             0             0              32   United-States  \n",
260 |        "3     Male             0             0              40   United-States  \n",
261 |        "4   Female             0             0              50   United-States  "
262 |       ]
263 |      },
264 |      "execution_count": 7,
265 |      "metadata": {},
266 |      "output_type": "execute_result"
267 |     }
268 |    ],
269 |    "source": [
270 |     "df_test.head()"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 8,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "data": {
280 |       "text/plain": [
281 |        "age                                49\n",
282 |        "workclass                     Private\n",
283 |        "fnlwgt                         101320\n",
284 |        "education                  Assoc-acdm\n",
285 |        "education-num                      12\n",
286 |        "marital-status     Married-civ-spouse\n",
287 |        "occupation                        NaN\n",
288 |        "relationship                     Wife\n",
289 |        "race                            White\n",
290 |        "sex                            Female\n",
291 |        "capital-gain                        0\n",
292 |        "capital-loss                     1902\n",
293 |        "hours-per-week                     40\n",
294 |        "native-country          United-States\n",
295 |        "Name: 0, dtype: object"
296 |       ]
297 |      },
298 |      "execution_count": 8,
299 |      "metadata": {},
300 |      "output_type": "execute_result"
301 |     }
302 |    ],
303 |    "source": [
304 |     "# inspect a row of the test sample\n",
305 |     "df_test.iloc[0]"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 9,
311 |    "metadata": {},
312 |    "outputs": [
313 |     {
314 |      "data": {
315 |       "text/html": [],
316 |       "text/plain": [
317 |        "<IPython.core.display.HTML object>"
318 |       ]
319 |      },
320 |      "metadata": {},
321 |      "output_type": "display_data"
322 |     },
323 |     {
324 |      "data": {
325 |       "text/plain": [
326 |        "(   workclass  education  marital-status  occupation  relationship  race  sex  \\\n",
327 |        " 0        5.0        8.0             3.0         0.0           6.0   5.0  1.0   \n",
328 |        " \n",
329 |        "    native-country  education-num_na   age    fnlwgt  education-num  \\\n",
330 |        " 0            40.0               1.0  49.0  101320.0           12.0   \n",
331 |        " \n",
332 |        "    capital-gain  capital-loss  hours-per-week  salary  \n",
333 |        " 0           0.0        1902.0            40.0     1.0  ,\n",
334 |        " tensor(1),\n",
335 |        " tensor([0.1678, 0.8322]))"
336 |       ]
337 |      },
338 |      "execution_count": 9,
339 |      "metadata": {},
340 |      "output_type": "execute_result"
341 |     }
342 |    ],
343 |    "source": [
344 |     "# get a prediction from the saved model on a sample test record\n",
345 |     "test_sample = df_test.iloc[0]\n",
346 |     "learn.predict(test_sample)"
347 |    ]
348 |   }
349 |  ],
350 |  "metadata": {
351 |   "kernelspec": {
352 |    "display_name": "Python 3",
353 |    "language": "python",
354 |    "name": "python3"
355 |   },
356 |   "language_info": {
357 |    "codemirror_mode": {
358 |     "name": "ipython",
359 |     "version": 3
360 |    },
361 |    "file_extension": ".py",
362 |    "mimetype": "text/x-python",
363 |    "name": "python",
364 |    "nbconvert_exporter": "python",
365 |    "pygments_lexer": "ipython3",
366 |    "version": "3.8.6"
367 |   }
368 |  },
369 |  "nbformat": 4,
370 |  "nbformat_minor": 4
371 | }
372 | 


--------------------------------------------------------------------------------
/ch4/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | *.html
4 | 


--------------------------------------------------------------------------------
/ch4/text_classifier_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training a text classifier model with fastai\n",
  8 |     "- this notebook assumes you have already run text_model_training.ipynb notebook\n",
  9 |     "- In this notebook, the IMDB dataset is ingested\n",
 10 |     "- the first section"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "#hide\n",
 20 |     "!pip install -Uqq fastbook\n",
 21 |     "import fastbook\n",
 22 |     "fastbook.setup_book()"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "#hide\n",
 32 |     "from fastbook import *\n",
 33 |     "from fastai.text.all import *"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# switch to control whether direct TDL or DataBlocks definition used \n",
 43 |     "tdl = True"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 4,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# ensure the modifier value matches the value set for modifier in text_model_training notebook\n",
 53 |     "modifier = \"july4_2021\""
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "# Ingest the dataset\n",
 61 |     "- define the path for the dataset\n",
 62 |     "- create a TextDataLoaders object"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "CPU times: user 2.86 ms, sys: 822 µs, total: 3.68 ms\n",
 75 |       "Wall time: 11.8 ms\n"
 76 |      ]
 77 |     },
 78 |     {
 79 |      "data": {
 80 |       "text/plain": [
 81 |        "(#7) [Path('/storage/data/imdb/README'),Path('/storage/data/imdb/tmp_lm'),Path('/storage/data/imdb/imdb.vocab'),Path('/storage/data/imdb/tmp_clas'),Path('/storage/data/imdb/test'),Path('/storage/data/imdb/train'),Path('/storage/data/imdb/unsup')]"
 82 |       ]
 83 |      },
 84 |      "execution_count": 5,
 85 |      "metadata": {},
 86 |      "output_type": "execute_result"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "%%time\n",
 91 |     "# create dataloaders object\n",
 92 |     "path = untar_data(URLs.IMDB)\n",
 93 |     "path.ls()"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "# Define the text classifier"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 6,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "name": "stdout",
110 |      "output_type": "stream",
111 |      "text": [
112 |       "CPU times: user 7.08 s, sys: 6.01 s, total: 13.1 s\n",
113 |       "Wall time: 43.7 s\n"
114 |      ]
115 |     }
116 |    ],
117 |    "source": [
118 |     "%%time\n",
119 |     "# define TextDataLoaders object\n",
120 |     "dls_clas = TextDataLoaders.from_folder(path, valid='test')\n"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 7,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "'\\n├── test\\n│   ├── neg\\n│   └── pos\\n├── tmp_clas\\n├── tmp_lm\\n├── train\\n│   ├── neg\\n│   └── pos\\n└── unsup\\n'"
132 |       ]
133 |      },
134 |      "execution_count": 7,
135 |      "metadata": {},
136 |      "output_type": "execute_result"
137 |     }
138 |    ],
139 |    "source": [
140 |     "# directory structure of the IMDB curated dataset\n",
141 |     "'''\n",
142 |     "├── test\n",
143 |     "│   ├── neg\n",
144 |     "│   └── pos\n",
145 |     "├── tmp_clas\n",
146 |     "├── tmp_lm\n",
147 |     "├── train\n",
148 |     "│   ├── neg\n",
149 |     "│   └── pos\n",
150 |     "└── unsup\n",
151 |     "'''"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 8,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/plain": [
162 |        "Path('/storage/data/imdb')"
163 |       ]
164 |      },
165 |      "execution_count": 8,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "dls_clas.path"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 9,
177 |    "metadata": {},
178 |    "outputs": [
179 |     {
180 |      "data": {
181 |       "text/html": [
182 |        "<table border=\"1\" class=\"dataframe\">\n",
183 |        "  <thead>\n",
184 |        "    <tr style=\"text-align: right;\">\n",
185 |        "      <th></th>\n",
186 |        "      <th>text</th>\n",
187 |        "      <th>category</th>\n",
188 |        "    </tr>\n",
189 |        "  </thead>\n",
190 |        "  <tbody>\n",
191 |        "    <tr>\n",
192 |        "      <th>0</th>\n",
193 |        "      <td>xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules of the match , both opponents have to go through tables in order to get the win . xxmaj benoit and xxmaj guerrero heated up early on by taking turns hammering first xxmaj spike and then xxmaj bubba xxmaj ray . a xxmaj german xxunk by xxmaj benoit to xxmaj bubba took the wind out of the xxmaj dudley brother . xxmaj spike tried to help his brother , but the referee restrained him while xxmaj benoit and xxmaj guerrero</td>\n",
194 |        "      <td>pos</td>\n",
195 |        "    </tr>\n",
196 |        "    <tr>\n",
197 |        "      <th>1</th>\n",
198 |        "      <td>xxbos * ! ! - xxup spoilers - ! ! * \\n\\n xxmaj before i begin this , let me say that i have had both the advantages of seeing this movie on the big screen and of having seen the \" authorized xxmaj version \" of this movie , remade by xxmaj stephen xxmaj king , himself , in 1997 . \\n\\n xxmaj both advantages made me appreciate this version of \" the xxmaj shining , \" all the more . \\n\\n xxmaj also , let me say that xxmaj i 've read xxmaj mr . xxmaj king 's book , \" the xxmaj shining \" on many occasions over the years , and while i love the book and am a huge fan of his work , xxmaj stanley xxmaj kubrick 's retelling of this story is far more compelling … and xxup scary . \\n\\n xxmaj kubrick</td>\n",
199 |        "      <td>pos</td>\n",
200 |        "    </tr>\n",
201 |        "    <tr>\n",
202 |        "      <th>2</th>\n",
203 |        "      <td>xxbos xxmaj heavy - handed moralism . xxmaj writers using characters as mouthpieces to speak for themselves . xxmaj predictable , plodding plot points ( say that five times fast ) . a child 's imitation of xxmaj britney xxmaj spears . xxmaj this film has all the earmarks of a xxmaj lifetime xxmaj special reject . \\n\\n i honestly believe that xxmaj jesus xxmaj xxunk and xxmaj julia xxmaj xxunk set out to create a thought - provoking , emotional film on a tough subject , exploring the idea that things are not always black and white , that one who is a criminal by definition is not necessarily a bad human being , and that there can be extenuating circumstances , especially when one puts the well - being of a child first . xxmaj however , their earnestness ends up being channeled into preachy dialogue and trite</td>\n",
204 |        "      <td>neg</td>\n",
205 |        "    </tr>\n",
206 |        "  </tbody>\n",
207 |        "</table>"
208 |       ],
209 |       "text/plain": [
210 |        "<IPython.core.display.HTML object>"
211 |       ]
212 |      },
213 |      "metadata": {},
214 |      "output_type": "display_data"
215 |     }
216 |    ],
217 |    "source": [
218 |     "dls_clas.show_batch(max_n=3)"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 10,
224 |    "metadata": {},
225 |    "outputs": [
226 |     {
227 |      "name": "stdout",
228 |      "output_type": "stream",
229 |      "text": [
230 |       "keep_path is:  /storage/data/imdb\n"
231 |      ]
232 |     }
233 |    ],
234 |    "source": [
235 |     "# save the current path\n",
236 |     "keep_path = path\n",
237 |     "print(\"keep_path is: \",str(keep_path))"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 11,
243 |    "metadata": {},
244 |    "outputs": [
245 |     {
246 |      "name": "stdout",
247 |      "output_type": "stream",
248 |      "text": [
249 |       "CPU times: user 6.89 s, sys: 1.16 s, total: 8.05 s\n",
250 |       "Wall time: 5.06 s\n"
251 |      ]
252 |     }
253 |    ],
254 |    "source": [
255 |     "%%time\n",
256 |     "# define a text_classifier_learner object\n",
257 |     "learn_clas = text_classifier_learner(dls_clas, AWD_LSTM, \n",
258 |     "                                metrics=accuracy).to_fp16()"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "# Fine-tune the text classifier\n",
266 |     "Use the encoder created as part of training the language model to fine tune the text classifier"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 11,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "data": {
276 |       "text/plain": [
277 |        "Path('/storage/data/imdb')"
278 |       ]
279 |      },
280 |      "execution_count": 11,
281 |      "metadata": {},
282 |      "output_type": "execute_result"
283 |     }
284 |    ],
285 |    "source": [
286 |     "# Path('/storage/data/imdb')\n",
287 |     "learn_clas.path"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 12,
293 |    "metadata": {},
294 |    "outputs": [
295 |     {
296 |      "name": "stdout",
297 |      "output_type": "stream",
298 |      "text": [
299 |       "CPU times: user 339 µs, sys: 80 µs, total: 419 µs\n",
300 |       "Wall time: 54.4 µs\n"
301 |      ]
302 |     }
303 |    ],
304 |    "source": [
305 |     "%%time\n",
306 |     "# set the path to the location of the encoder\n",
307 |     "learn_clas.path = Path('/notebooks/temp')"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 13,
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "# load the encoder that was saved when the language model was trained\n",
317 |     "learn_clas = learn_clas.load_encoder('ft_'+modifier)"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": 14,
323 |    "metadata": {},
324 |    "outputs": [
325 |     {
326 |      "data": {
327 |       "text/plain": [
328 |        "Path('/notebooks/temp')"
329 |       ]
330 |      },
331 |      "execution_count": 14,
332 |      "metadata": {},
333 |      "output_type": "execute_result"
334 |     }
335 |    ],
336 |    "source": [
337 |     "learn_clas.path"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 15,
343 |    "metadata": {},
344 |    "outputs": [],
345 |    "source": [
346 |     "# set the path back to the original path\n",
347 |     "learn_clas.path = keep_path"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 16,
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "data": {
357 |       "text/plain": [
358 |        "Path('/storage/data/imdb')"
359 |       ]
360 |      },
361 |      "execution_count": 16,
362 |      "metadata": {},
363 |      "output_type": "execute_result"
364 |     }
365 |    ],
366 |    "source": [
367 |     "# ch 10 style Path('/storage/data/imdb')\n",
368 |     "learn_clas.path"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": null,
374 |    "metadata": {},
375 |    "outputs": [
376 |     {
377 |      "data": {
378 |       "text/html": [
379 |        "\n",
380 |        "    <div>\n",
381 |        "        <style>\n",
382 |        "            /* Turns off some styling */\n",
383 |        "            progress {\n",
384 |        "                /* gets rid of default border in Firefox and Opera. */\n",
385 |        "                border: none;\n",
386 |        "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
387 |        "                background-size: auto;\n",
388 |        "            }\n",
389 |        "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
390 |        "                background: #F44336;\n",
391 |        "            }\n",
392 |        "        </style>\n",
393 |        "      <progress value='4' class='' max='5' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
394 |        "      80.00% [4/5 22:29<05:37]\n",
395 |        "    </div>\n",
396 |        "    \n",
397 |        "<table border=\"1\" class=\"dataframe\">\n",
398 |        "  <thead>\n",
399 |        "    <tr style=\"text-align: left;\">\n",
400 |        "      <th>epoch</th>\n",
401 |        "      <th>train_loss</th>\n",
402 |        "      <th>valid_loss</th>\n",
403 |        "      <th>accuracy</th>\n",
404 |        "      <th>time</th>\n",
405 |        "    </tr>\n",
406 |        "  </thead>\n",
407 |        "  <tbody>\n",
408 |        "    <tr>\n",
409 |        "      <td>0</td>\n",
410 |        "      <td>0.431991</td>\n",
411 |        "      <td>0.294341</td>\n",
412 |        "      <td>0.876600</td>\n",
413 |        "      <td>08:29</td>\n",
414 |        "    </tr>\n",
415 |        "    <tr>\n",
416 |        "      <td>1</td>\n",
417 |        "      <td>0.409600</td>\n",
418 |        "      <td>0.281576</td>\n",
419 |        "      <td>0.883800</td>\n",
420 |        "      <td>04:39</td>\n",
421 |        "    </tr>\n",
422 |        "    <tr>\n",
423 |        "      <td>2</td>\n",
424 |        "      <td>0.396112</td>\n",
425 |        "      <td>0.270241</td>\n",
426 |        "      <td>0.888880</td>\n",
427 |        "      <td>04:10</td>\n",
428 |        "    </tr>\n",
429 |        "    <tr>\n",
430 |        "      <td>3</td>\n",
431 |        "      <td>0.411736</td>\n",
432 |        "      <td>0.263737</td>\n",
433 |        "      <td>0.891000</td>\n",
434 |        "      <td>05:10</td>\n",
435 |        "    </tr>\n",
436 |        "  </tbody>\n",
437 |        "</table><p>\n",
438 |        "\n",
439 |        "    <div>\n",
440 |        "        <style>\n",
441 |        "            /* Turns off some styling */\n",
442 |        "            progress {\n",
443 |        "                /* gets rid of default border in Firefox and Opera. */\n",
444 |        "                border: none;\n",
445 |        "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
446 |        "                background-size: auto;\n",
447 |        "            }\n",
448 |        "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
449 |        "                background: #F44336;\n",
450 |        "            }\n",
451 |        "        </style>\n",
452 |        "      <progress value='347' class='' max='391' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
453 |        "      88.75% [347/391 01:33<00:11 0.3788]\n",
454 |        "    </div>\n",
455 |        "    "
456 |       ],
457 |       "text/plain": [
458 |        "<IPython.core.display.HTML object>"
459 |       ]
460 |      },
461 |      "metadata": {},
462 |      "output_type": "display_data"
463 |     }
464 |    ],
465 |    "source": [
466 |     "%%time\n",
467 |     "# fine tune the model\n",
468 |     "learn_clas.fit_one_cycle(5, 2e-2)"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "code",
473 |    "execution_count": null,
474 |    "metadata": {},
475 |    "outputs": [],
476 |    "source": [
477 |     "x, y = first(dls_clas.train)\n",
478 |     "x.shape, y.shape, len(dls_clas.train)"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": null,
484 |    "metadata": {},
485 |    "outputs": [],
486 |    "source": []
487 |   },
488 |   {
489 |    "cell_type": "code",
490 |    "execution_count": null,
491 |    "metadata": {},
492 |    "outputs": [],
493 |    "source": [
494 |     "learn_clas.summary()"
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "markdown",
499 |    "metadata": {},
500 |    "source": [
501 |     "# Exercise the text classifier\n",
502 |     "Apply the fine-tuned text classifier on some text samples."
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "code",
507 |    "execution_count": null,
508 |    "metadata": {},
509 |    "outputs": [],
510 |    "source": [
511 |     "preds = learn_clas.predict(\"this film shows incredibly bad writing and is a complete disaster\")"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "code",
516 |    "execution_count": null,
517 |    "metadata": {},
518 |    "outputs": [],
519 |    "source": [
520 |     "preds"
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "code",
525 |    "execution_count": null,
526 |    "metadata": {},
527 |    "outputs": [],
528 |    "source": [
529 |     "preds = learn_clas.predict(\"this film shows incredible talent and is a complete triumph\")"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {},
536 |    "outputs": [],
537 |    "source": [
538 |     "preds"
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "code",
543 |    "execution_count": null,
544 |    "metadata": {},
545 |    "outputs": [],
546 |    "source": [
547 |     "# save the classifier model\n",
548 |     "learn_clas.path = Path('/notebooks/temp')\n",
549 |     "learn_clas.save('classifier_single_epoch_'+modifier+'d')"
550 |    ]
551 |   }
552 |  ],
553 |  "metadata": {
554 |   "jupytext": {
555 |    "split_at_heading": true
556 |   },
557 |   "kernelspec": {
558 |    "display_name": "Python 3",
559 |    "language": "python",
560 |    "name": "python3"
561 |   },
562 |   "language_info": {
563 |    "codemirror_mode": {
564 |     "name": "ipython",
565 |     "version": 3
566 |    },
567 |    "file_extension": ".py",
568 |    "mimetype": "text/x-python",
569 |    "name": "python",
570 |    "nbconvert_exporter": "python",
571 |    "pygments_lexer": "ipython3",
572 |    "version": "3.8.6"
573 |   }
574 |  },
575 |  "nbformat": 4,
576 |  "nbformat_minor": 4
577 | }
578 | 


--------------------------------------------------------------------------------
/ch4/text_model_training.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training a language model with fastai\n",
  8 |     "- train a language model with curated dataset IMDB using pre-trained model AWD_LSTM\n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "#hide\n",
 18 |     "!pip install -Uqq fastbook\n",
 19 |     "import fastbook\n",
 20 |     "fastbook.setup_book()"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 2,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "#hide\n",
 30 |     "from fastbook import *\n",
 31 |     "from fastai.text.all import *"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "# define timestamp string for saving models\n",
 41 |     "modifier = \"aug13_2021\""
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "# Training a language model\n",
 49 |     "- take a pretrained model and train it some more using the IMDB dataset"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "data": {
 59 |       "text/html": [
 60 |        "\n",
 61 |        "    <div>\n",
 62 |        "        <style>\n",
 63 |        "            /* Turns off some styling */\n",
 64 |        "            progress {\n",
 65 |        "                /* gets rid of default border in Firefox and Opera. */\n",
 66 |        "                border: none;\n",
 67 |        "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
 68 |        "                background-size: auto;\n",
 69 |        "            }\n",
 70 |        "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
 71 |        "                background: #F44336;\n",
 72 |        "            }\n",
 73 |        "        </style>\n",
 74 |        "      <progress value='144441344' class='' max='144440600' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
 75 |        "      100.00% [144441344/144440600 00:14<00:00]\n",
 76 |        "    </div>\n",
 77 |        "    "
 78 |       ],
 79 |       "text/plain": [
 80 |        "<IPython.core.display.HTML object>"
 81 |       ]
 82 |      },
 83 |      "metadata": {},
 84 |      "output_type": "display_data"
 85 |     },
 86 |     {
 87 |      "name": "stdout",
 88 |      "output_type": "stream",
 89 |      "text": [
 90 |       "CPU times: user 27.1 s, sys: 21.2 s, total: 48.3 s\n",
 91 |       "Wall time: 4min\n"
 92 |      ]
 93 |     },
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "(#7) [Path('/root/.fastai/data/imdb/imdb.vocab'),Path('/root/.fastai/data/imdb/tmp_lm'),Path('/root/.fastai/data/imdb/train'),Path('/root/.fastai/data/imdb/unsup'),Path('/root/.fastai/data/imdb/tmp_clas'),Path('/root/.fastai/data/imdb/test'),Path('/root/.fastai/data/imdb/README')]"
 98 |       ]
 99 |      },
100 |      "execution_count": 4,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "%%time\n",
107 |     "# create path object\n",
108 |     "path = untar_data(URLs.IMDB)\n",
109 |     "path.ls()"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 5,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/html": [],
120 |       "text/plain": [
121 |        "<IPython.core.display.HTML object>"
122 |       ]
123 |      },
124 |      "metadata": {},
125 |      "output_type": "display_data"
126 |     },
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "CPU times: user 34.8 s, sys: 9.84 s, total: 44.6 s\n",
132 |       "Wall time: 1min 32s\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "%%time\n",
138 |     "# create TextDataLoaders object\n",
139 |     "get_imdb = partial(get_text_files, folders=['train', 'test', 'unsup'])\n",
140 |     "dls = TextDataLoaders.from_folder(path, valid = 'test', is_lm=True, bs=16)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 6,
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "data": {
150 |       "text/html": [
151 |        "<table border=\"1\" class=\"dataframe\">\n",
152 |        "  <thead>\n",
153 |        "    <tr style=\"text-align: right;\">\n",
154 |        "      <th></th>\n",
155 |        "      <th>text</th>\n",
156 |        "      <th>text_</th>\n",
157 |        "    </tr>\n",
158 |        "  </thead>\n",
159 |        "  <tbody>\n",
160 |        "    <tr>\n",
161 |        "      <th>0</th>\n",
162 |        "      <td>xxbos a light - hearted comedy , xxmaj nothing shows us a world that we sometimes wish to escape to : a world of nothing . xxmaj anything you do n't like , be it a stack of bills , a bad memory , or even hunger can disappear at your wish . xxmaj they approached this movie very well , and with an enjoyable starring duo , there were only a</td>\n",
163 |        "      <td>a light - hearted comedy , xxmaj nothing shows us a world that we sometimes wish to escape to : a world of nothing . xxmaj anything you do n't like , be it a stack of bills , a bad memory , or even hunger can disappear at your wish . xxmaj they approached this movie very well , and with an enjoyable starring duo , there were only a few</td>\n",
164 |        "    </tr>\n",
165 |        "    <tr>\n",
166 |        "      <th>1</th>\n",
167 |        "      <td>butch , what with their thick legs and arms . xxmaj brilliant . \\n\\n 3 . xxmaj brilliant - especially the way that neatly ties in with the theme of role reversal between the sexes : so utterly original and mind - blowing . xxmaj ellen behaves like a man , wants sex all the time , while her ex xxmaj patrick wants to talk - like a girl . xxmaj xxunk</td>\n",
168 |        "      <td>, what with their thick legs and arms . xxmaj brilliant . \\n\\n 3 . xxmaj brilliant - especially the way that neatly ties in with the theme of role reversal between the sexes : so utterly original and mind - blowing . xxmaj ellen behaves like a man , wants sex all the time , while her ex xxmaj patrick wants to talk - like a girl . xxmaj xxunk .</td>\n",
169 |        "    </tr>\n",
170 |        "  </tbody>\n",
171 |        "</table>"
172 |       ],
173 |       "text/plain": [
174 |        "<IPython.core.display.HTML object>"
175 |       ]
176 |      },
177 |      "metadata": {},
178 |      "output_type": "display_data"
179 |     }
180 |    ],
181 |    "source": [
182 |     "dls.show_batch(max_n=2)"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 7,
188 |    "metadata": {},
189 |    "outputs": [
190 |     {
191 |      "data": {
192 |       "text/html": [
193 |        "\n",
194 |        "    <div>\n",
195 |        "        <style>\n",
196 |        "            /* Turns off some styling */\n",
197 |        "            progress {\n",
198 |        "                /* gets rid of default border in Firefox and Opera. */\n",
199 |        "                border: none;\n",
200 |        "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
201 |        "                background-size: auto;\n",
202 |        "            }\n",
203 |        "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
204 |        "                background: #F44336;\n",
205 |        "            }\n",
206 |        "        </style>\n",
207 |        "      <progress value='105070592' class='' max='105067061' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
208 |        "      100.00% [105070592/105067061 00:10<00:00]\n",
209 |        "    </div>\n",
210 |        "    "
211 |       ],
212 |       "text/plain": [
213 |        "<IPython.core.display.HTML object>"
214 |       ]
215 |      },
216 |      "metadata": {},
217 |      "output_type": "display_data"
218 |     },
219 |     {
220 |      "data": {
221 |       "text/html": [
222 |        "<table border=\"1\" class=\"dataframe\">\n",
223 |        "  <thead>\n",
224 |        "    <tr style=\"text-align: left;\">\n",
225 |        "      <th>epoch</th>\n",
226 |        "      <th>train_loss</th>\n",
227 |        "      <th>valid_loss</th>\n",
228 |        "      <th>accuracy</th>\n",
229 |        "      <th>time</th>\n",
230 |        "    </tr>\n",
231 |        "  </thead>\n",
232 |        "  <tbody>\n",
233 |        "    <tr>\n",
234 |        "      <td>0</td>\n",
235 |        "      <td>4.566350</td>\n",
236 |        "      <td>4.347948</td>\n",
237 |        "      <td>0.266946</td>\n",
238 |        "      <td>28:51</td>\n",
239 |        "    </tr>\n",
240 |        "  </tbody>\n",
241 |        "</table>"
242 |       ],
243 |       "text/plain": [
244 |        "<IPython.core.display.HTML object>"
245 |       ]
246 |      },
247 |      "metadata": {},
248 |      "output_type": "display_data"
249 |     },
250 |     {
251 |      "data": {
252 |       "text/html": [
253 |        "<table border=\"1\" class=\"dataframe\">\n",
254 |        "  <thead>\n",
255 |        "    <tr style=\"text-align: left;\">\n",
256 |        "      <th>epoch</th>\n",
257 |        "      <th>train_loss</th>\n",
258 |        "      <th>valid_loss</th>\n",
259 |        "      <th>accuracy</th>\n",
260 |        "      <th>time</th>\n",
261 |        "    </tr>\n",
262 |        "  </thead>\n",
263 |        "  <tbody>\n",
264 |        "    <tr>\n",
265 |        "      <td>0</td>\n",
266 |        "      <td>4.133654</td>\n",
267 |        "      <td>4.032937</td>\n",
268 |        "      <td>0.296276</td>\n",
269 |        "      <td>31:30</td>\n",
270 |        "    </tr>\n",
271 |        "  </tbody>\n",
272 |        "</table>"
273 |       ],
274 |       "text/plain": [
275 |        "<IPython.core.display.HTML object>"
276 |       ]
277 |      },
278 |      "metadata": {},
279 |      "output_type": "display_data"
280 |     },
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "CPU times: user 45min 30s, sys: 15min 5s, total: 1h 35s\n",
286 |       "Wall time: 1h 36s\n"
287 |      ]
288 |     }
289 |    ],
290 |    "source": [
291 |     "%%time\n",
292 |     "# define and train model\n",
293 |     "learn = language_model_learner(dls,AWD_LSTM,metrics=accuracy).to_fp16()\n",
294 |     "learn.fine_tune(1, 1e-2)"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 8,
300 |    "metadata": {},
301 |    "outputs": [
302 |     {
303 |      "data": {
304 |       "text/html": [],
305 |       "text/plain": [
306 |        "<IPython.core.display.HTML object>"
307 |       ]
308 |      },
309 |      "metadata": {},
310 |      "output_type": "display_data"
311 |     },
312 |     {
313 |      "data": {
314 |       "text/plain": [
315 |        "'what comes next may have been an insult to animation , corruption , and drug use . This is not as bad'"
316 |       ]
317 |      },
318 |      "execution_count": 8,
319 |      "metadata": {},
320 |      "output_type": "execute_result"
321 |     }
322 |    ],
323 |    "source": [
324 |     "# get prediction\n",
325 |     "# preds = learn.predict('The star is', n_words=20)\n",
326 |     "learn.predict(\"what comes next\", n_words=20)"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 9,
332 |    "metadata": {},
333 |    "outputs": [],
334 |    "source": [
335 |     "keep_path = learn.path"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 10,
341 |    "metadata": {},
342 |    "outputs": [],
343 |    "source": [
344 |     "# workaround to make path writeable\n",
345 |     "learn.path = Path('/notebooks/temp')"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": 11,
351 |    "metadata": {},
352 |    "outputs": [
353 |     {
354 |      "data": {
355 |       "text/plain": [
356 |        "Path('/notebooks/temp')"
357 |       ]
358 |      },
359 |      "execution_count": 11,
360 |      "metadata": {},
361 |      "output_type": "execute_result"
362 |     }
363 |    ],
364 |    "source": [
365 |     "learn.path"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": 12,
371 |    "metadata": {},
372 |    "outputs": [
373 |     {
374 |      "data": {
375 |       "text/plain": [
376 |        "'models'"
377 |       ]
378 |      },
379 |      "execution_count": 12,
380 |      "metadata": {},
381 |      "output_type": "execute_result"
382 |     }
383 |    ],
384 |    "source": [
385 |     "learn.model_dir"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "code",
390 |    "execution_count": 13,
391 |    "metadata": {},
392 |    "outputs": [
393 |     {
394 |      "data": {
395 |       "text/plain": [
396 |        "Path('/notebooks/temp/models/lm_aug13_2021.pth')"
397 |       ]
398 |      },
399 |      "execution_count": 13,
400 |      "metadata": {},
401 |      "output_type": "execute_result"
402 |     }
403 |    ],
404 |    "source": [
405 |     "learn.save('lm_'+modifier)"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": 14,
411 |    "metadata": {},
412 |    "outputs": [],
413 |    "source": [
414 |     "# workaround to save encoder - need to do this to later load encoder for classifier\n",
415 |     "learn.save_encoder('ft_'+modifier)"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": 15,
421 |    "metadata": {},
422 |    "outputs": [],
423 |    "source": [
424 |     "learn.export('lm_model_'+modifier+'.pkl')"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": 16,
430 |    "metadata": {},
431 |    "outputs": [
432 |     {
433 |      "data": {
434 |       "text/html": [],
435 |       "text/plain": [
436 |        "<IPython.core.display.HTML object>"
437 |       ]
438 |      },
439 |      "metadata": {},
440 |      "output_type": "display_data"
441 |     },
442 |     {
443 |      "data": {
444 |       "text/plain": [
445 |        "SequentialRNN (Input shape: 16)\n",
446 |        "============================================================================\n",
447 |        "Layer (type)         Output Shape         Param #    Trainable \n",
448 |        "============================================================================\n",
449 |        "                     []                  \n",
450 |        "LSTM                                                           \n",
451 |        "LSTM                                                           \n",
452 |        "LSTM                                                           \n",
453 |        "RNNDropout                                                     \n",
454 |        "RNNDropout                                                     \n",
455 |        "RNNDropout                                                     \n",
456 |        "____________________________________________________________________________\n",
457 |        "                     16 x 72 x 60008     \n",
458 |        "Linear                                    24063208   True      \n",
459 |        "RNNDropout                                                     \n",
460 |        "____________________________________________________________________________\n",
461 |        "\n",
462 |        "Total params: 24,063,208\n",
463 |        "Total trainable params: 24,063,208\n",
464 |        "Total non-trainable params: 0\n",
465 |        "\n",
466 |        "Optimizer used: <function Adam at 0x7ff8af453280>\n",
467 |        "Loss function: FlattenedLoss of CrossEntropyLoss()\n",
468 |        "\n",
469 |        "Model unfrozen\n",
470 |        "\n",
471 |        "Callbacks:\n",
472 |        "  - TrainEvalCallback\n",
473 |        "  - ModelResetter\n",
474 |        "  - RNNCallback\n",
475 |        "  - MixedPrecision\n",
476 |        "  - Recorder\n",
477 |        "  - ProgressCallback"
478 |       ]
479 |      },
480 |      "execution_count": 16,
481 |      "metadata": {},
482 |      "output_type": "execute_result"
483 |     }
484 |    ],
485 |    "source": [
486 |     "learn.summary()"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "code",
491 |    "execution_count": 17,
492 |    "metadata": {},
493 |    "outputs": [
494 |     {
495 |      "data": {
496 |       "text/plain": [
497 |        "SequentialRNN(\n",
498 |        "  (0): AWD_LSTM(\n",
499 |        "    (encoder): Embedding(60008, 400, padding_idx=1)\n",
500 |        "    (encoder_dp): EmbeddingDropout(\n",
501 |        "      (emb): Embedding(60008, 400, padding_idx=1)\n",
502 |        "    )\n",
503 |        "    (rnns): ModuleList(\n",
504 |        "      (0): WeightDropout(\n",
505 |        "        (module): LSTM(400, 1152, batch_first=True)\n",
506 |        "      )\n",
507 |        "      (1): WeightDropout(\n",
508 |        "        (module): LSTM(1152, 1152, batch_first=True)\n",
509 |        "      )\n",
510 |        "      (2): WeightDropout(\n",
511 |        "        (module): LSTM(1152, 400, batch_first=True)\n",
512 |        "      )\n",
513 |        "    )\n",
514 |        "    (input_dp): RNNDropout()\n",
515 |        "    (hidden_dps): ModuleList(\n",
516 |        "      (0): RNNDropout()\n",
517 |        "      (1): RNNDropout()\n",
518 |        "      (2): RNNDropout()\n",
519 |        "    )\n",
520 |        "  )\n",
521 |        "  (1): LinearDecoder(\n",
522 |        "    (decoder): Linear(in_features=400, out_features=60008, bias=True)\n",
523 |        "    (output_dp): RNNDropout()\n",
524 |        "  )\n",
525 |        ")"
526 |       ]
527 |      },
528 |      "execution_count": 17,
529 |      "metadata": {},
530 |      "output_type": "execute_result"
531 |     }
532 |    ],
533 |    "source": [
534 |     "learn.model"
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "code",
539 |    "execution_count": null,
540 |    "metadata": {},
541 |    "outputs": [],
542 |    "source": []
543 |   }
544 |  ],
545 |  "metadata": {
546 |   "jupytext": {
547 |    "split_at_heading": true
548 |   },
549 |   "kernelspec": {
550 |    "display_name": "Python 3",
551 |    "language": "python",
552 |    "name": "python3"
553 |   },
554 |   "language_info": {
555 |    "codemirror_mode": {
556 |     "name": "ipython",
557 |     "version": 3
558 |    },
559 |    "file_extension": ".py",
560 |    "mimetype": "text/x-python",
561 |    "name": "python",
562 |    "nbconvert_exporter": "python",
563 |    "pygments_lexer": "ipython3",
564 |    "version": "3.8.6"
565 |   }
566 |  },
567 |  "nbformat": 4,
568 |  "nbformat_minor": 4
569 | }
570 | 


--------------------------------------------------------------------------------
/ch4/text_standalone_dataset_classifier.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training a text classifier model on a standalone dataset with fastai\n",
  8 |     "- This notebook ingests the Kaggle Covid tweets dataset (https://www.kaggle.com/datatattle/covid-19-nlp-text-classification)\n",
  9 |     "- This notebook assumes you have already run text_standalone_dataset_lm.ipynb notebook to create a language model\n",
 10 |     "- The encoder from the language model is used to create the text classifier"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 13,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "#hide\n",
 20 |     "!pip install -Uqq fastbook\n",
 21 |     "import fastbook\n",
 22 |     "fastbook.setup_book()"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 14,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "#hide\n",
 32 |     "from fastbook import *\n",
 33 |     "from fastai.text.all import *"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 15,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# ensure that value of modifier matches the value of modifier in text_standalone_dataset_lm notebook\n",
 43 |     "modifier = 'standalone_mar20'"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "# Ingest the dataset\n",
 51 |     "- define the source of the dataset\n",
 52 |     "- create a dataframe for the training dataset"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 16,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "CPU times: user 0 ns, sys: 3.81 ms, total: 3.81 ms\n",
 65 |       "Wall time: 3.24 ms\n"
 66 |      ]
 67 |     },
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "(#2) [Path('/storage/archive/covid_tweets/train'),Path('/storage/archive/covid_tweets/test')]"
 72 |       ]
 73 |      },
 74 |      "execution_count": 16,
 75 |      "metadata": {},
 76 |      "output_type": "execute_result"
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "%%time\n",
 81 |     "# create dataloaders object\n",
 82 |     "path = URLs.path('covid_tweets')\n",
 83 |     "path.ls()"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 17,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "# read the training CSV into a dataframe - note that the encoding parameter is needed to avoid a decode error\n",
 93 |     "df_train = pd.read_csv(path/'train/Corona_NLP_train.csv',encoding = \"ISO-8859-1\")"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "# Define the text classifier"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 18,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "data": {
110 |       "text/html": [],
111 |       "text/plain": [
112 |        "<IPython.core.display.HTML object>"
113 |       ]
114 |      },
115 |      "metadata": {},
116 |      "output_type": "display_data"
117 |     },
118 |     {
119 |      "data": {
120 |       "text/html": [
121 |        "<table border=\"1\" class=\"dataframe\">\n",
122 |        "  <thead>\n",
123 |        "    <tr style=\"text-align: right;\">\n",
124 |        "      <th></th>\n",
125 |        "      <th>text</th>\n",
126 |        "      <th>category</th>\n",
127 |        "    </tr>\n",
128 |        "  </thead>\n",
129 |        "  <tbody>\n",
130 |        "    <tr>\n",
131 |        "      <th>0</th>\n",
132 |        "      <td>xxbos xxrep 5 ? ? ? xxrep 7 ? ? ? xxrep 7 ? xxrep 4 ? xxrep 4 ? xxrep 11 ? ? ? xxrep 6 ? xxrep 4 ? , xxrep 3 ? xxrep 3 ? ? ? xxrep 3 ? xxrep 4 ? xxrep 3 ? ? ? ? ? xxrep 4 ? ? ? xxrep 3 ? , xxrep 4 ? ? ? ? ? xxrep 6 ? xxrep 3 ? xxrep 3 ? xxrep 3 ? ? ? xxrep 3 ? \\r\\r\\n▁ xxrep 5 ? xxrep 6 ? ? ? xxrep 3 ? xxrep 4 ? xxrep 4 ? ? ? xxrep 4 ? xxrep 6 ? xxrep 4 ? xxrep 8 ? ? ? xxrep 6 ? ? ? xxrep 5 ? ? ? xxrep 3 ? xxrep 4 ? ? ? xxrep 7 ? xxrep 5 ? - xxrep 8 ? xxrep 5</td>\n",
133 |        "      <td>Neutral</td>\n",
134 |        "    </tr>\n",
135 |        "    <tr>\n",
136 |        "      <th>1</th>\n",
137 |        "      <td>xxbos xxmaj fun xxmaj riding 4 xxmaj xxunk , xxmaj shield xxmaj bash # xxmaj cod # callofduty # xxmaj practice # xxmaj xxunk # xxmaj xxunk # xxmaj recreation # xxmaj fun # xxmaj bored # todo # xxmaj coronavirus # xxmaj quarantine # xxmaj isolation # toiletpaper # xxmaj lockdown # xxmaj art # xxmaj milk # xxmaj water # xxmaj xxunk # xxmaj weather # xxmaj cleveland # xxmaj ohio # xxmaj browns # xxup nfl # xxmaj xxunk # xxmaj poetry \\r\\r\\n https : / / t.co / xxunk via @youtube</td>\n",
138 |        "      <td>Positive</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>2</th>\n",
142 |        "      <td>xxbos xxmaj friends ! xxmaj it 's xxmaj march 25 , 2020 at 03:00pm- time to xxup stop xxup renting &amp; &amp; buy a # home from # realtor xxmaj kally ( khoelcher ( at ) gmail ( dot ) com ) of # xxmaj goodyear # xxmaj arizona # coldwellbanker ( 269)240 - 8824 . # xxup n95 masks , # gloves , &amp; &amp; hand # sanitizer provided to xxup prevent # coronavirus . # xxmaj avondale # xxmaj buckeye # â ",
143 |        "▁ https : / / t.co / xxunk</td>\n",
144 |        "      <td>Extremely Positive</td>\n",
145 |        "    </tr>\n",
146 |        "  </tbody>\n",
147 |        "</table>"
148 |       ],
149 |       "text/plain": [
150 |        "<IPython.core.display.HTML object>"
151 |       ]
152 |      },
153 |      "metadata": {},
154 |      "output_type": "display_data"
155 |     },
156 |     {
157 |      "name": "stdout",
158 |      "output_type": "stream",
159 |      "text": [
160 |       "CPU times: user 52.2 s, sys: 1.35 s, total: 53.5 s\n",
161 |       "Wall time: 56.9 s\n"
162 |      ]
163 |     }
164 |    ],
165 |    "source": [
166 |     "%%time\n",
167 |     "# create TextDataLoaders object\n",
168 |     "dls = TextDataLoaders.from_df(df_train, path=path, text_col='OriginalTweet',label_col='Sentiment')"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "dls.show_batch(max_n=3)"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 19,
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "Path('/storage/archive/covid_tweets')"
189 |       ]
190 |      },
191 |      "execution_count": 19,
192 |      "metadata": {},
193 |      "output_type": "execute_result"
194 |     }
195 |    ],
196 |    "source": [
197 |     "dls.path"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 20,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "keep_path is:  /storage/archive/covid_tweets\n"
210 |      ]
211 |     }
212 |    ],
213 |    "source": [
214 |     "# save the current path\n",
215 |     "keep_path = path\n",
216 |     "print(\"keep_path is: \",str(keep_path))"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 21,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "CPU times: user 5.07 s, sys: 834 ms, total: 5.91 s\n",
229 |       "Wall time: 1.13 s\n"
230 |      ]
231 |     }
232 |    ],
233 |    "source": [
234 |     "%%time\n",
235 |     "# define a text_classifier_learner object\n",
236 |     "learn_clas = text_classifier_learner(dls, AWD_LSTM, \n",
237 |     "                                metrics=accuracy).to_fp16()"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "markdown",
242 |    "metadata": {},
243 |    "source": [
244 |     "# Fine-tune the text classifier\n",
245 |     "Use the encoder created as part of training the language model to fine tune the text classifier"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 22,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/plain": [
256 |        "Path('/storage/archive/covid_tweets')"
257 |       ]
258 |      },
259 |      "execution_count": 22,
260 |      "metadata": {},
261 |      "output_type": "execute_result"
262 |     }
263 |    ],
264 |    "source": [
265 |     "# Path('/storage/data/imdb')\n",
266 |     "learn_clas.path"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 23,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "CPU times: user 277 µs, sys: 17 µs, total: 294 µs\n",
279 |       "Wall time: 49.4 µs\n"
280 |      ]
281 |     }
282 |    ],
283 |    "source": [
284 |     "%%time\n",
285 |     "# set the path to the location of the encoder\n",
286 |     "learn_clas.path = Path('/notebooks/temp')"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 24,
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": [
295 |     "# load the encoder that was saved when the language model was trained\n",
296 |     "learn_clas = learn_clas.load_encoder('ft_standalone'+modifier)"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 25,
302 |    "metadata": {},
303 |    "outputs": [
304 |     {
305 |      "data": {
306 |       "text/plain": [
307 |        "Path('/notebooks/temp')"
308 |       ]
309 |      },
310 |      "execution_count": 25,
311 |      "metadata": {},
312 |      "output_type": "execute_result"
313 |     }
314 |    ],
315 |    "source": [
316 |     "learn_clas.path"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 26,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "# set the path back to the original path\n",
326 |     "learn_clas.path = keep_path"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 27,
332 |    "metadata": {},
333 |    "outputs": [
334 |     {
335 |      "data": {
336 |       "text/plain": [
337 |        "Path('/storage/archive/covid_tweets')"
338 |       ]
339 |      },
340 |      "execution_count": 27,
341 |      "metadata": {},
342 |      "output_type": "execute_result"
343 |     }
344 |    ],
345 |    "source": [
346 |     "# ch 10 style Path('/storage/data/imdb')\n",
347 |     "learn_clas.path"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 28,
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "data": {
357 |       "text/html": [
358 |        "<table border=\"1\" class=\"dataframe\">\n",
359 |        "  <thead>\n",
360 |        "    <tr style=\"text-align: left;\">\n",
361 |        "      <th>epoch</th>\n",
362 |        "      <th>train_loss</th>\n",
363 |        "      <th>valid_loss</th>\n",
364 |        "      <th>accuracy</th>\n",
365 |        "      <th>time</th>\n",
366 |        "    </tr>\n",
367 |        "  </thead>\n",
368 |        "  <tbody>\n",
369 |        "    <tr>\n",
370 |        "      <td>0</td>\n",
371 |        "      <td>1.461614</td>\n",
372 |        "      <td>1.281477</td>\n",
373 |        "      <td>0.453286</td>\n",
374 |        "      <td>00:37</td>\n",
375 |        "    </tr>\n",
376 |        "  </tbody>\n",
377 |        "</table>"
378 |       ],
379 |       "text/plain": [
380 |        "<IPython.core.display.HTML object>"
381 |       ]
382 |      },
383 |      "metadata": {},
384 |      "output_type": "display_data"
385 |     },
386 |     {
387 |      "name": "stdout",
388 |      "output_type": "stream",
389 |      "text": [
390 |       "CPU times: user 29.9 s, sys: 7.19 s, total: 37.1 s\n",
391 |       "Wall time: 37.4 s\n"
392 |      ]
393 |     }
394 |    ],
395 |    "source": [
396 |     "%%time\n",
397 |     "# fine tune the model\n",
398 |     "learn_clas.fit_one_cycle(1, 2e-2)"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": 29,
404 |    "metadata": {},
405 |    "outputs": [
406 |     {
407 |      "data": {
408 |       "text/plain": [
409 |        "(torch.Size([64, 166]), torch.Size([64]), 514)"
410 |       ]
411 |      },
412 |      "execution_count": 29,
413 |      "metadata": {},
414 |      "output_type": "execute_result"
415 |     }
416 |    ],
417 |    "source": [
418 |     "x, y = first(dls.train)\n",
419 |     "x.shape, y.shape, len(dls.train)"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": 31,
425 |    "metadata": {},
426 |    "outputs": [
427 |     {
428 |      "data": {
429 |       "text/html": [
430 |        "<table border=\"1\" class=\"dataframe\">\n",
431 |        "  <thead>\n",
432 |        "    <tr style=\"text-align: left;\">\n",
433 |        "      <th>epoch</th>\n",
434 |        "      <th>train_loss</th>\n",
435 |        "      <th>valid_loss</th>\n",
436 |        "      <th>accuracy</th>\n",
437 |        "      <th>time</th>\n",
438 |        "    </tr>\n",
439 |        "  </thead>\n",
440 |        "  <tbody>\n",
441 |        "    <tr>\n",
442 |        "      <td>0</td>\n",
443 |        "      <td>None</td>\n",
444 |        "      <td>None</td>\n",
445 |        "      <td>00:00</td>\n",
446 |        "    </tr>\n",
447 |        "  </tbody>\n",
448 |        "</table>"
449 |       ],
450 |       "text/plain": [
451 |        "<IPython.core.display.HTML object>"
452 |       ]
453 |      },
454 |      "metadata": {},
455 |      "output_type": "display_data"
456 |     },
457 |     {
458 |      "data": {
459 |       "text/plain": [
460 |        "SequentialRNN (Input shape: ['64 x 166'])\n",
461 |        "================================================================\n",
462 |        "Layer (type)         Output Shape         Param #    Trainable \n",
463 |        "================================================================\n",
464 |        "LSTM                 ['64 x 22 x 1152',   1,852,416  False     \n",
465 |        "________________________________________________________________\n",
466 |        "LSTM                 ['64 x 22 x 1152',   5,317,632  False     \n",
467 |        "________________________________________________________________\n",
468 |        "LSTM                 ['64 x 22 x 400', \"  1,846,400  False     \n",
469 |        "________________________________________________________________\n",
470 |        "RNNDropout           64 x 22 x 400        0          False     \n",
471 |        "________________________________________________________________\n",
472 |        "RNNDropout           64 x 22 x 1152       0          False     \n",
473 |        "________________________________________________________________\n",
474 |        "RNNDropout           64 x 22 x 1152       0          False     \n",
475 |        "________________________________________________________________\n",
476 |        "BatchNorm1d          64 x 1200            2,400      True      \n",
477 |        "________________________________________________________________\n",
478 |        "Dropout              64 x 1200            0          False     \n",
479 |        "________________________________________________________________\n",
480 |        "Linear               64 x 50              60,000     True      \n",
481 |        "________________________________________________________________\n",
482 |        "ReLU                 64 x 50              0          False     \n",
483 |        "________________________________________________________________\n",
484 |        "BatchNorm1d          64 x 50              100        True      \n",
485 |        "________________________________________________________________\n",
486 |        "Dropout              64 x 50              0          False     \n",
487 |        "________________________________________________________________\n",
488 |        "Linear               64 x 5               250        True      \n",
489 |        "________________________________________________________________\n",
490 |        "\n",
491 |        "Total params: 9,079,198\n",
492 |        "Total trainable params: 62,750\n",
493 |        "Total non-trainable params: 9,016,448\n",
494 |        "\n",
495 |        "Optimizer used: <function Adam at 0x7f2498fbd940>\n",
496 |        "Loss function: FlattenedLoss of CrossEntropyLoss()\n",
497 |        "\n",
498 |        "Model frozen up to parameter group #4\n",
499 |        "\n",
500 |        "Callbacks:\n",
501 |        "  - ModelResetter\n",
502 |        "  - RNNRegularizer\n",
503 |        "  - ModelToHalf\n",
504 |        "  - TrainEvalCallback\n",
505 |        "  - Recorder\n",
506 |        "  - ProgressCallback\n",
507 |        "  - MixedPrecision"
508 |       ]
509 |      },
510 |      "execution_count": 31,
511 |      "metadata": {},
512 |      "output_type": "execute_result"
513 |     }
514 |    ],
515 |    "source": [
516 |     "learn_clas.summary()"
517 |    ]
518 |   },
519 |   {
520 |    "cell_type": "markdown",
521 |    "metadata": {},
522 |    "source": [
523 |     "# Exercise the text classifier\n",
524 |     "Apply the fine-tuned text classifier on some text samples."
525 |    ]
526 |   },
527 |   {
528 |    "cell_type": "code",
529 |    "execution_count": 41,
530 |    "metadata": {},
531 |    "outputs": [
532 |     {
533 |      "data": {
534 |       "text/html": [],
535 |       "text/plain": [
536 |        "<IPython.core.display.HTML object>"
537 |       ]
538 |      },
539 |      "metadata": {},
540 |      "output_type": "display_data"
541 |     }
542 |    ],
543 |    "source": [
544 |     "preds = learn_clas.predict(\"the government's approach to the pandemic has been a complete disaster\")"
545 |    ]
546 |   },
547 |   {
548 |    "cell_type": "code",
549 |    "execution_count": 42,
550 |    "metadata": {},
551 |    "outputs": [
552 |     {
553 |      "data": {
554 |       "text/plain": [
555 |        "('Negative',\n",
556 |        " TensorText(2),\n",
557 |        " TensorText([0.3328, 0.0545, 0.3551, 0.1026, 0.1551]))"
558 |       ]
559 |      },
560 |      "execution_count": 42,
561 |      "metadata": {},
562 |      "output_type": "execute_result"
563 |     }
564 |    ],
565 |    "source": [
566 |     "preds"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": 43,
572 |    "metadata": {},
573 |    "outputs": [
574 |     {
575 |      "data": {
576 |       "text/html": [],
577 |       "text/plain": [
578 |        "<IPython.core.display.HTML object>"
579 |       ]
580 |      },
581 |      "metadata": {},
582 |      "output_type": "display_data"
583 |     }
584 |    ],
585 |    "source": [
586 |     "preds = learn_clas.predict(\"the new vaccines hold the promise of a quick return to economic growth\")"
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "code",
591 |    "execution_count": 44,
592 |    "metadata": {},
593 |    "outputs": [
594 |     {
595 |      "data": {
596 |       "text/plain": [
597 |        "('Extremely Positive',\n",
598 |        " TensorText(1),\n",
599 |        " TensorText([0.0565, 0.3758, 0.1528, 0.0699, 0.3450]))"
600 |       ]
601 |      },
602 |      "execution_count": 44,
603 |      "metadata": {},
604 |      "output_type": "execute_result"
605 |     }
606 |    ],
607 |    "source": [
608 |     "preds"
609 |    ]
610 |   },
611 |   {
612 |    "cell_type": "code",
613 |    "execution_count": 45,
614 |    "metadata": {},
615 |    "outputs": [
616 |     {
617 |      "data": {
618 |       "text/html": [],
619 |       "text/plain": [
620 |        "<IPython.core.display.HTML object>"
621 |       ]
622 |      },
623 |      "metadata": {},
624 |      "output_type": "display_data"
625 |     }
626 |    ],
627 |    "source": [
628 |     "preds = learn_clas.predict(\"this flu is about what we would expect in a normal winter\")"
629 |    ]
630 |   },
631 |   {
632 |    "cell_type": "code",
633 |    "execution_count": 46,
634 |    "metadata": {},
635 |    "outputs": [
636 |     {
637 |      "data": {
638 |       "text/plain": [
639 |        "('Negative',\n",
640 |        " TensorText(2),\n",
641 |        " TensorText([0.2712, 0.0407, 0.3615, 0.1584, 0.1682]))"
642 |       ]
643 |      },
644 |      "execution_count": 46,
645 |      "metadata": {},
646 |      "output_type": "execute_result"
647 |     }
648 |    ],
649 |    "source": [
650 |     "preds"
651 |    ]
652 |   },
653 |   {
654 |    "cell_type": "code",
655 |    "execution_count": 47,
656 |    "metadata": {},
657 |    "outputs": [
658 |     {
659 |      "data": {
660 |       "text/html": [],
661 |       "text/plain": [
662 |        "<IPython.core.display.HTML object>"
663 |       ]
664 |      },
665 |      "metadata": {},
666 |      "output_type": "display_data"
667 |     }
668 |    ],
669 |    "source": [
670 |     "preds = learn_clas.predict(\"the health ministry needs to pay closer attention to the vaccine rollout\")"
671 |    ]
672 |   },
673 |   {
674 |    "cell_type": "code",
675 |    "execution_count": 48,
676 |    "metadata": {},
677 |    "outputs": [
678 |     {
679 |      "data": {
680 |       "text/plain": [
681 |        "('Positive',\n",
682 |        " TensorText(4),\n",
683 |        " TensorText([0.0927, 0.1448, 0.3081, 0.1216, 0.3327]))"
684 |       ]
685 |      },
686 |      "execution_count": 48,
687 |      "metadata": {},
688 |      "output_type": "execute_result"
689 |     }
690 |    ],
691 |    "source": [
692 |     "preds"
693 |    ]
694 |   },
695 |   {
696 |    "cell_type": "code",
697 |    "execution_count": 40,
698 |    "metadata": {},
699 |    "outputs": [
700 |     {
701 |      "data": {
702 |       "text/plain": [
703 |        "Path('/notebooks/temp/models/classifier_single_epoch_standalone_mar20d.pth')"
704 |       ]
705 |      },
706 |      "execution_count": 40,
707 |      "metadata": {},
708 |      "output_type": "execute_result"
709 |     }
710 |    ],
711 |    "source": [
712 |     "# save the classifier model\n",
713 |     "learn_clas.path = Path('/notebooks/temp')\n",
714 |     "learn_clas.save('classifier_single_epoch_'+modifier+'d')"
715 |    ]
716 |   }
717 |  ],
718 |  "metadata": {
719 |   "jupytext": {
720 |    "split_at_heading": true
721 |   },
722 |   "kernelspec": {
723 |    "display_name": "Python 3",
724 |    "language": "python",
725 |    "name": "python3"
726 |   },
727 |   "language_info": {
728 |    "codemirror_mode": {
729 |     "name": "ipython",
730 |     "version": 3
731 |    },
732 |    "file_extension": ".py",
733 |    "mimetype": "text/x-python",
734 |    "name": "python",
735 |    "nbconvert_exporter": "python",
736 |    "pygments_lexer": "ipython3",
737 |    "version": "3.8.6"
738 |   }
739 |  },
740 |  "nbformat": 4,
741 |  "nbformat_minor": 4
742 | }
743 | 


--------------------------------------------------------------------------------
/ch4/text_standalone_dataset_lm.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training a language model on a standalone dataset with fastai\n",
  8 |     "- This notebook ingests the Kaggle Covid-related tweets dataset (https://www.kaggle.com/datatattle/covid-19-nlp-text-classification)\n",
  9 |     "- Trains a language model using pre-trained model AWD_LSTM as a starting point and fine-tuning it with the Covid-related tweets dataset\n"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 24,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "#hide\n",
 19 |     "!pip install -Uqq fastbook\n",
 20 |     "import fastbook\n",
 21 |     "fastbook.setup_book()"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 25,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "#hide\n",
 31 |     "from fastbook import *\n",
 32 |     "from fastai.text.all import *\n",
 33 |     "import pickle "
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 26,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "modifier = 'standalone_mar20'"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "# Ingest the dataset\n",
 50 |     "- define the source of the dataset\n",
 51 |     "- create a dataframe for the training dataset"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 27,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "CPU times: user 1.76 ms, sys: 3.52 ms, total: 5.28 ms\n",
 64 |       "Wall time: 4.68 ms\n"
 65 |      ]
 66 |     },
 67 |     {
 68 |      "data": {
 69 |       "text/plain": [
 70 |        "(#2) [Path('/storage/archive/covid_tweets/train'),Path('/storage/archive/covid_tweets/test')]"
 71 |       ]
 72 |      },
 73 |      "execution_count": 27,
 74 |      "metadata": {},
 75 |      "output_type": "execute_result"
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "%%time\n",
 80 |     "# create dataloaders object\n",
 81 |     "path = URLs.path('covid_tweets')\n",
 82 |     "path.ls()"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 28,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "# read the training CSV into a dataframe - note that the encoding parameter is needed to avoid a decode error\n",
 92 |     "df_train = pd.read_csv(path/'train/Corona_NLP_train.csv',encoding = \"ISO-8859-1\")"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "# Create language model"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 29,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/html": [],
110 |       "text/plain": [
111 |        "<IPython.core.display.HTML object>"
112 |       ]
113 |      },
114 |      "metadata": {},
115 |      "output_type": "display_data"
116 |     },
117 |     {
118 |      "data": {
119 |       "text/html": [
120 |        "<table border=\"1\" class=\"dataframe\">\n",
121 |        "  <thead>\n",
122 |        "    <tr style=\"text-align: right;\">\n",
123 |        "      <th></th>\n",
124 |        "      <th>text</th>\n",
125 |        "      <th>text_</th>\n",
126 |        "    </tr>\n",
127 |        "  </thead>\n",
128 |        "  <tbody>\n",
129 |        "    <tr>\n",
130 |        "      <th>0</th>\n",
131 |        "      <td>xxbos xxmaj share - prices of listed mining companies are in a downward spiral . xxmaj commodity prices across the industry have been tumbling as the industry considers the devastating xxunk of this âblack xxmaj xxunk event . https : / / t.co / xxunk # xxmaj covid_19 # xxmaj africa # xxunk # mining # economy xxbos xxmaj online xxmaj food xxmaj orders checklist place your order in advance order only</td>\n",
132 |        "      <td>xxmaj share - prices of listed mining companies are in a downward spiral . xxmaj commodity prices across the industry have been tumbling as the industry considers the devastating xxunk of this âblack xxmaj xxunk event . https : / / t.co / xxunk # xxmaj covid_19 # xxmaj africa # xxunk # mining # economy xxbos xxmaj online xxmaj food xxmaj orders checklist place your order in advance order only what</td>\n",
133 |        "    </tr>\n",
134 |        "    <tr>\n",
135 |        "      <th>1</th>\n",
136 |        "      <td>https : / / t.co / xxunk xxbos xxmaj this sign posted at my local grocery store . xxmaj crazy times . # fridaythoughts # socialdistanacing # flattenthecurve # coronavirus # coronavirus2020 https : / / t.co / xxunk xxbos xxmaj dear young , healthy xxunk . xxmaj please explain why supermarket aisles nationwide seem to have been emptied of sanitary products . xxmaj how many periods do you expect to have</td>\n",
137 |        "      <td>: / / t.co / xxunk xxbos xxmaj this sign posted at my local grocery store . xxmaj crazy times . # fridaythoughts # socialdistanacing # flattenthecurve # coronavirus # coronavirus2020 https : / / t.co / xxunk xxbos xxmaj dear young , healthy xxunk . xxmaj please explain why supermarket aisles nationwide seem to have been emptied of sanitary products . xxmaj how many periods do you expect to have during</td>\n",
138 |        "    </tr>\n",
139 |        "    <tr>\n",
140 |        "      <th>2</th>\n",
141 |        "      <td>of this # coronavirus has been a run on # toiletpaper . xxmaj if you find yourself resorting to facial tissues or paper towels , do n't flush them down the toilet . xxmaj flushing xxup anything other than toilet paper can lead to xxunk . # xxmaj sarasota https : / / t.co / xxunk xxbos xxmaj is consumer # privacy dead and can it be revived ? \\r\\r\\n xxmaj governments</td>\n",
142 |        "      <td>this # coronavirus has been a run on # toiletpaper . xxmaj if you find yourself resorting to facial tissues or paper towels , do n't flush them down the toilet . xxmaj flushing xxup anything other than toilet paper can lead to xxunk . # xxmaj sarasota https : / / t.co / xxunk xxbos xxmaj is consumer # privacy dead and can it be revived ? \\r\\r\\n xxmaj governments expanded</td>\n",
143 |        "    </tr>\n",
144 |        "  </tbody>\n",
145 |        "</table>"
146 |       ],
147 |       "text/plain": [
148 |        "<IPython.core.display.HTML object>"
149 |       ]
150 |      },
151 |      "metadata": {},
152 |      "output_type": "display_data"
153 |     },
154 |     {
155 |      "name": "stdout",
156 |      "output_type": "stream",
157 |      "text": [
158 |       "CPU times: user 22.3 s, sys: 1.88 s, total: 24.2 s\n",
159 |       "Wall time: 26.7 s\n"
160 |      ]
161 |     }
162 |    ],
163 |    "source": [
164 |     "%%time\n",
165 |     "# create TextDataLoaders object\n",
166 |     "dls = TextDataLoaders.from_df(df_train, path=path, \n",
167 |     "                              text_col='OriginalTweet',\n",
168 |     "                              is_lm=True)\n",
169 |     "dls.show_batch(max_n=3)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 30,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "data": {
179 |       "text/html": [
180 |        "<table border=\"1\" class=\"dataframe\">\n",
181 |        "  <thead>\n",
182 |        "    <tr style=\"text-align: left;\">\n",
183 |        "      <th>epoch</th>\n",
184 |        "      <th>train_loss</th>\n",
185 |        "      <th>valid_loss</th>\n",
186 |        "      <th>accuracy</th>\n",
187 |        "      <th>time</th>\n",
188 |        "    </tr>\n",
189 |        "  </thead>\n",
190 |        "  <tbody>\n",
191 |        "    <tr>\n",
192 |        "      <td>0</td>\n",
193 |        "      <td>4.448665</td>\n",
194 |        "      <td>3.958910</td>\n",
195 |        "      <td>0.322896</td>\n",
196 |        "      <td>02:00</td>\n",
197 |        "    </tr>\n",
198 |        "  </tbody>\n",
199 |        "</table>"
200 |       ],
201 |       "text/plain": [
202 |        "<IPython.core.display.HTML object>"
203 |       ]
204 |      },
205 |      "metadata": {},
206 |      "output_type": "display_data"
207 |     },
208 |     {
209 |      "data": {
210 |       "text/html": [
211 |        "<table border=\"1\" class=\"dataframe\">\n",
212 |        "  <thead>\n",
213 |        "    <tr style=\"text-align: left;\">\n",
214 |        "      <th>epoch</th>\n",
215 |        "      <th>train_loss</th>\n",
216 |        "      <th>valid_loss</th>\n",
217 |        "      <th>accuracy</th>\n",
218 |        "      <th>time</th>\n",
219 |        "    </tr>\n",
220 |        "  </thead>\n",
221 |        "  <tbody>\n",
222 |        "    <tr>\n",
223 |        "      <td>0</td>\n",
224 |        "      <td>4.008188</td>\n",
225 |        "      <td>3.735647</td>\n",
226 |        "      <td>0.343352</td>\n",
227 |        "      <td>02:19</td>\n",
228 |        "    </tr>\n",
229 |        "  </tbody>\n",
230 |        "</table>"
231 |       ],
232 |       "text/plain": [
233 |        "<IPython.core.display.HTML object>"
234 |       ]
235 |      },
236 |      "metadata": {},
237 |      "output_type": "display_data"
238 |     },
239 |     {
240 |      "name": "stdout",
241 |      "output_type": "stream",
242 |      "text": [
243 |       "CPU times: user 3min 37s, sys: 49 s, total: 4min 26s\n",
244 |       "Wall time: 4min 20s\n"
245 |      ]
246 |     }
247 |    ],
248 |    "source": [
249 |     "%%time\n",
250 |     "# define and train model\n",
251 |     "learn = language_model_learner(dls,AWD_LSTM,\n",
252 |     "                               metrics=accuracy).to_fp16()\n",
253 |     "learn.fine_tune(1, 1e-2)"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "markdown",
258 |    "metadata": {},
259 |    "source": [
260 |     "# Exercise and save language model\n",
261 |     "- try out the language model with a few examples\n",
262 |     "- save the language model and the encoder"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 31,
268 |    "metadata": {},
269 |    "outputs": [
270 |     {
271 |      "data": {
272 |       "text/html": [],
273 |       "text/plain": [
274 |        "<IPython.core.display.HTML object>"
275 |       ]
276 |      },
277 |      "metadata": {},
278 |      "output_type": "display_data"
279 |     },
280 |     {
281 |      "data": {
282 |       "text/plain": [
283 |        "'what comes next to know who would get mondaymood ! Got anywhere - there d have to mean was going to go'"
284 |       ]
285 |      },
286 |      "execution_count": 31,
287 |      "metadata": {},
288 |      "output_type": "execute_result"
289 |     }
290 |    ],
291 |    "source": [
292 |     "# get prediction\n",
293 |     "learn.predict(\"what comes next\", n_words=20)"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 32,
299 |    "metadata": {},
300 |    "outputs": [],
301 |    "source": [
302 |     "learn.export('/notebooks/temp/models/lm_model_standalone'+modifier)"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 33,
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "keep_path = learn.path"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 34,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "# workaround to make path writeable\n",
321 |     "learn.path = Path('/notebooks/temp')"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": 35,
327 |    "metadata": {},
328 |    "outputs": [
329 |     {
330 |      "data": {
331 |       "text/plain": [
332 |        "Path('/notebooks/temp')"
333 |       ]
334 |      },
335 |      "execution_count": 35,
336 |      "metadata": {},
337 |      "output_type": "execute_result"
338 |     }
339 |    ],
340 |    "source": [
341 |     "learn.path"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": 36,
347 |    "metadata": {},
348 |    "outputs": [
349 |     {
350 |      "data": {
351 |       "text/plain": [
352 |        "'models'"
353 |       ]
354 |      },
355 |      "execution_count": 36,
356 |      "metadata": {},
357 |      "output_type": "execute_result"
358 |     }
359 |    ],
360 |    "source": [
361 |     "learn.model_dir"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": 37,
367 |    "metadata": {},
368 |    "outputs": [
369 |     {
370 |      "data": {
371 |       "text/plain": [
372 |        "Path('/notebooks/temp/models/lm_standalonestandalone_mar20.pth')"
373 |       ]
374 |      },
375 |      "execution_count": 37,
376 |      "metadata": {},
377 |      "output_type": "execute_result"
378 |     }
379 |    ],
380 |    "source": [
381 |     "learn.save('lm_standalone'+modifier)"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": 38,
387 |    "metadata": {},
388 |    "outputs": [],
389 |    "source": [
390 |     "# workaround to save encoder - need to do this to later load encoder for classifier\n",
391 |     "learn.save_encoder('ft_standalone'+modifier)"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": 39,
397 |    "metadata": {},
398 |    "outputs": [],
399 |    "source": [
400 |     "learn.path = keep_path"
401 |    ]
402 |   }
403 |  ],
404 |  "metadata": {
405 |   "jupytext": {
406 |    "split_at_heading": true
407 |   },
408 |   "kernelspec": {
409 |    "display_name": "Python 3",
410 |    "language": "python",
411 |    "name": "python3"
412 |   },
413 |   "language_info": {
414 |    "codemirror_mode": {
415 |     "name": "ipython",
416 |     "version": 3
417 |    },
418 |    "file_extension": ".py",
419 |    "mimetype": "text/x-python",
420 |    "name": "python",
421 |    "nbconvert_exporter": "python",
422 |    "pygments_lexer": "ipython3",
423 |    "version": "3.8.6"
424 |   }
425 |  },
426 |  "nbformat": 4,
427 |  "nbformat_minor": 4
428 | }
429 | 


--------------------------------------------------------------------------------
/ch5/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | *.html
4 | 


--------------------------------------------------------------------------------
/ch5/training_recommender_systems.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training a recommender system\n",
  8 |     "Train a recommender system using a fast.ai curated dataset\n",
  9 |     "\n",
 10 |     "The example shown here is adapted from the paper by Howard and Gugger https://arxiv.org/pdf/2002.04688.pdf"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "# imports for notebook boilerplate\n",
 20 |     "!pip install -Uqq fastbook\n",
 21 |     "import fastbook\n",
 22 |     "from fastbook import *\n",
 23 |     "from fastai.collab import *"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# set up the notebook for fast.ai\n",
 33 |     "fastbook.setup_book()"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "# Ingest the dataset\n",
 41 |     "- define the path object\n",
 42 |     "- define a dataframe to contain the dataset"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {},
 49 |    "outputs": [
 50 |     {
 51 |      "data": {
 52 |       "text/html": [
 53 |        "\n",
 54 |        "    <div>\n",
 55 |        "        <style>\n",
 56 |        "            /* Turns off some styling */\n",
 57 |        "            progress {\n",
 58 |        "                /* gets rid of default border in Firefox and Opera. */\n",
 59 |        "                border: none;\n",
 60 |        "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
 61 |        "                background-size: auto;\n",
 62 |        "            }\n",
 63 |        "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
 64 |        "                background: #F44336;\n",
 65 |        "            }\n",
 66 |        "        </style>\n",
 67 |        "      <progress value='57344' class='' max='51790' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
 68 |        "      110.72% [57344/51790 00:00<00:00]\n",
 69 |        "    </div>\n",
 70 |        "    "
 71 |       ],
 72 |       "text/plain": [
 73 |        "<IPython.core.display.HTML object>"
 74 |       ]
 75 |      },
 76 |      "metadata": {},
 77 |      "output_type": "display_data"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "# ingest the curated recommender system dataset ML_SAMPLE\n",
 82 |     "path = untar_data(URLs.ML_SAMPLE)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 4,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "(#1) [Path('/root/.fastai/data/movie_lens_sample/ratings.csv')]"
 94 |       ]
 95 |      },
 96 |      "execution_count": 4,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "# examine the directory structure\n",
103 |     "path.ls()"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 5,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "# ingest the dataset into a Pandas dataframe\n",
113 |     "df = pd.read_csv(path/'ratings.csv')"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "# Examine the dataset"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 6,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/html": [
131 |        "<div>\n",
132 |        "<style scoped>\n",
133 |        "    .dataframe tbody tr th:only-of-type {\n",
134 |        "        vertical-align: middle;\n",
135 |        "    }\n",
136 |        "\n",
137 |        "    .dataframe tbody tr th {\n",
138 |        "        vertical-align: top;\n",
139 |        "    }\n",
140 |        "\n",
141 |        "    .dataframe thead th {\n",
142 |        "        text-align: right;\n",
143 |        "    }\n",
144 |        "</style>\n",
145 |        "<table border=\"1\" class=\"dataframe\">\n",
146 |        "  <thead>\n",
147 |        "    <tr style=\"text-align: right;\">\n",
148 |        "      <th></th>\n",
149 |        "      <th>userId</th>\n",
150 |        "      <th>movieId</th>\n",
151 |        "      <th>rating</th>\n",
152 |        "      <th>timestamp</th>\n",
153 |        "    </tr>\n",
154 |        "  </thead>\n",
155 |        "  <tbody>\n",
156 |        "    <tr>\n",
157 |        "      <th>0</th>\n",
158 |        "      <td>73</td>\n",
159 |        "      <td>1097</td>\n",
160 |        "      <td>4.0</td>\n",
161 |        "      <td>1255504951</td>\n",
162 |        "    </tr>\n",
163 |        "    <tr>\n",
164 |        "      <th>1</th>\n",
165 |        "      <td>561</td>\n",
166 |        "      <td>924</td>\n",
167 |        "      <td>3.5</td>\n",
168 |        "      <td>1172695223</td>\n",
169 |        "    </tr>\n",
170 |        "    <tr>\n",
171 |        "      <th>2</th>\n",
172 |        "      <td>157</td>\n",
173 |        "      <td>260</td>\n",
174 |        "      <td>3.5</td>\n",
175 |        "      <td>1291598691</td>\n",
176 |        "    </tr>\n",
177 |        "    <tr>\n",
178 |        "      <th>3</th>\n",
179 |        "      <td>358</td>\n",
180 |        "      <td>1210</td>\n",
181 |        "      <td>5.0</td>\n",
182 |        "      <td>957481884</td>\n",
183 |        "    </tr>\n",
184 |        "    <tr>\n",
185 |        "      <th>4</th>\n",
186 |        "      <td>130</td>\n",
187 |        "      <td>316</td>\n",
188 |        "      <td>2.0</td>\n",
189 |        "      <td>1138999234</td>\n",
190 |        "    </tr>\n",
191 |        "  </tbody>\n",
192 |        "</table>\n",
193 |        "</div>"
194 |       ],
195 |       "text/plain": [
196 |        "   userId  movieId  rating   timestamp\n",
197 |        "0      73     1097     4.0  1255504951\n",
198 |        "1     561      924     3.5  1172695223\n",
199 |        "2     157      260     3.5  1291598691\n",
200 |        "3     358     1210     5.0   957481884\n",
201 |        "4     130      316     2.0  1138999234"
202 |       ]
203 |      },
204 |      "execution_count": 6,
205 |      "metadata": {},
206 |      "output_type": "execute_result"
207 |     }
208 |    ],
209 |    "source": [
210 |     "# examine the first few records in the dataframe\n",
211 |     "df.head()"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 7,
217 |    "metadata": {},
218 |    "outputs": [
219 |     {
220 |      "data": {
221 |       "text/plain": [
222 |        "(6031, 4)"
223 |       ]
224 |      },
225 |      "execution_count": 7,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "# get the number of records in the dataset\n",
232 |     "df.shape"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 8,
238 |    "metadata": {},
239 |    "outputs": [
240 |     {
241 |      "data": {
242 |       "text/plain": [
243 |        "userId        100\n",
244 |        "movieId       100\n",
245 |        "rating         10\n",
246 |        "timestamp    5609\n",
247 |        "dtype: int64"
248 |       ]
249 |      },
250 |      "execution_count": 8,
251 |      "metadata": {},
252 |      "output_type": "execute_result"
253 |     }
254 |    ],
255 |    "source": [
256 |     "# get the count of unique values in each column of the dataset\n",
257 |     "df.nunique()"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 9,
263 |    "metadata": {},
264 |    "outputs": [
265 |     {
266 |      "data": {
267 |       "text/plain": [
268 |        "userId       0\n",
269 |        "movieId      0\n",
270 |        "rating       0\n",
271 |        "timestamp    0\n",
272 |        "dtype: int64"
273 |       ]
274 |      },
275 |      "execution_count": 9,
276 |      "metadata": {},
277 |      "output_type": "execute_result"
278 |     }
279 |    ],
280 |    "source": [
281 |     "# count the number of missing values in each column of the dataset\n",
282 |     "df.isnull().sum()"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "code",
287 |    "execution_count": 10,
288 |    "metadata": {},
289 |    "outputs": [
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "10"
294 |       ]
295 |      },
296 |      "execution_count": 10,
297 |      "metadata": {},
298 |      "output_type": "execute_result"
299 |     }
300 |    ],
301 |    "source": [
302 |     "df['rating'].nunique()"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 11,
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "# defined a CollabDataLoaders object\n",
312 |     "dls=CollabDataLoaders.from_df(df,bs= 64)"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 12,
318 |    "metadata": {},
319 |    "outputs": [
320 |     {
321 |      "data": {
322 |       "text/html": [
323 |        "<table border=\"1\" class=\"dataframe\">\n",
324 |        "  <thead>\n",
325 |        "    <tr style=\"text-align: right;\">\n",
326 |        "      <th></th>\n",
327 |        "      <th>userId</th>\n",
328 |        "      <th>movieId</th>\n",
329 |        "      <th>rating</th>\n",
330 |        "    </tr>\n",
331 |        "  </thead>\n",
332 |        "  <tbody>\n",
333 |        "    <tr>\n",
334 |        "      <th>0</th>\n",
335 |        "      <td>388</td>\n",
336 |        "      <td>1923</td>\n",
337 |        "      <td>5.0</td>\n",
338 |        "    </tr>\n",
339 |        "    <tr>\n",
340 |        "      <th>1</th>\n",
341 |        "      <td>607</td>\n",
342 |        "      <td>1200</td>\n",
343 |        "      <td>4.0</td>\n",
344 |        "    </tr>\n",
345 |        "    <tr>\n",
346 |        "      <th>2</th>\n",
347 |        "      <td>346</td>\n",
348 |        "      <td>4993</td>\n",
349 |        "      <td>4.0</td>\n",
350 |        "    </tr>\n",
351 |        "    <tr>\n",
352 |        "      <th>3</th>\n",
353 |        "      <td>607</td>\n",
354 |        "      <td>153</td>\n",
355 |        "      <td>3.0</td>\n",
356 |        "    </tr>\n",
357 |        "    <tr>\n",
358 |        "      <th>4</th>\n",
359 |        "      <td>150</td>\n",
360 |        "      <td>1210</td>\n",
361 |        "      <td>4.5</td>\n",
362 |        "    </tr>\n",
363 |        "    <tr>\n",
364 |        "      <th>5</th>\n",
365 |        "      <td>460</td>\n",
366 |        "      <td>2716</td>\n",
367 |        "      <td>5.0</td>\n",
368 |        "    </tr>\n",
369 |        "    <tr>\n",
370 |        "      <th>6</th>\n",
371 |        "      <td>481</td>\n",
372 |        "      <td>1704</td>\n",
373 |        "      <td>3.5</td>\n",
374 |        "    </tr>\n",
375 |        "    <tr>\n",
376 |        "      <th>7</th>\n",
377 |        "      <td>134</td>\n",
378 |        "      <td>32</td>\n",
379 |        "      <td>4.5</td>\n",
380 |        "    </tr>\n",
381 |        "    <tr>\n",
382 |        "      <th>8</th>\n",
383 |        "      <td>23</td>\n",
384 |        "      <td>1136</td>\n",
385 |        "      <td>4.0</td>\n",
386 |        "    </tr>\n",
387 |        "    <tr>\n",
388 |        "      <th>9</th>\n",
389 |        "      <td>105</td>\n",
390 |        "      <td>2716</td>\n",
391 |        "      <td>4.0</td>\n",
392 |        "    </tr>\n",
393 |        "  </tbody>\n",
394 |        "</table>"
395 |       ],
396 |       "text/plain": [
397 |        "<IPython.core.display.HTML object>"
398 |       ]
399 |      },
400 |      "metadata": {},
401 |      "output_type": "display_data"
402 |     }
403 |    ],
404 |    "source": [
405 |     "dls.show_batch()"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "markdown",
410 |    "metadata": {},
411 |    "source": [
412 |     "# Define and train the model"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": 13,
418 |    "metadata": {},
419 |    "outputs": [],
420 |    "source": [
421 |     "# define the model\n",
422 |     "learn=collab_learner(dls,y_range= [ 0 , 5.0 ] )"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "code",
427 |    "execution_count": 14,
428 |    "metadata": {},
429 |    "outputs": [
430 |     {
431 |      "data": {
432 |       "text/html": [
433 |        "<table border=\"1\" class=\"dataframe\">\n",
434 |        "  <thead>\n",
435 |        "    <tr style=\"text-align: left;\">\n",
436 |        "      <th>epoch</th>\n",
437 |        "      <th>train_loss</th>\n",
438 |        "      <th>valid_loss</th>\n",
439 |        "      <th>time</th>\n",
440 |        "    </tr>\n",
441 |        "  </thead>\n",
442 |        "  <tbody>\n",
443 |        "    <tr>\n",
444 |        "      <td>0</td>\n",
445 |        "      <td>2.609334</td>\n",
446 |        "      <td>2.528086</td>\n",
447 |        "      <td>00:00</td>\n",
448 |        "    </tr>\n",
449 |        "    <tr>\n",
450 |        "      <td>1</td>\n",
451 |        "      <td>2.315725</td>\n",
452 |        "      <td>2.012042</td>\n",
453 |        "      <td>00:00</td>\n",
454 |        "    </tr>\n",
455 |        "    <tr>\n",
456 |        "      <td>2</td>\n",
457 |        "      <td>1.701957</td>\n",
458 |        "      <td>1.337556</td>\n",
459 |        "      <td>00:00</td>\n",
460 |        "    </tr>\n",
461 |        "    <tr>\n",
462 |        "      <td>3</td>\n",
463 |        "      <td>1.265640</td>\n",
464 |        "      <td>1.117061</td>\n",
465 |        "      <td>00:00</td>\n",
466 |        "    </tr>\n",
467 |        "    <tr>\n",
468 |        "      <td>4</td>\n",
469 |        "      <td>1.097266</td>\n",
470 |        "      <td>1.089355</td>\n",
471 |        "      <td>00:00</td>\n",
472 |        "    </tr>\n",
473 |        "  </tbody>\n",
474 |        "</table>"
475 |       ],
476 |       "text/plain": [
477 |        "<IPython.core.display.HTML object>"
478 |       ]
479 |      },
480 |      "metadata": {},
481 |      "output_type": "display_data"
482 |     }
483 |    ],
484 |    "source": [
485 |     "# train the model\n",
486 |     "learn.fit_one_cycle( 5 )"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "markdown",
491 |    "metadata": {},
492 |    "source": [
493 |     "# Exercise the trained model\n",
494 |     "- define a dataframe containing test data\n",
495 |     "- apply the trained model to the dataframe"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "code",
500 |    "execution_count": 15,
501 |    "metadata": {},
502 |    "outputs": [
503 |     {
504 |      "data": {
505 |       "text/html": [
506 |        "<div>\n",
507 |        "<style scoped>\n",
508 |        "    .dataframe tbody tr th:only-of-type {\n",
509 |        "        vertical-align: middle;\n",
510 |        "    }\n",
511 |        "\n",
512 |        "    .dataframe tbody tr th {\n",
513 |        "        vertical-align: top;\n",
514 |        "    }\n",
515 |        "\n",
516 |        "    .dataframe thead th {\n",
517 |        "        text-align: right;\n",
518 |        "    }\n",
519 |        "</style>\n",
520 |        "<table border=\"1\" class=\"dataframe\">\n",
521 |        "  <thead>\n",
522 |        "    <tr style=\"text-align: right;\">\n",
523 |        "      <th></th>\n",
524 |        "      <th>userId</th>\n",
525 |        "      <th>movieId</th>\n",
526 |        "    </tr>\n",
527 |        "  </thead>\n",
528 |        "  <tbody>\n",
529 |        "    <tr>\n",
530 |        "      <th>0</th>\n",
531 |        "      <td>388</td>\n",
532 |        "      <td>153</td>\n",
533 |        "    </tr>\n",
534 |        "    <tr>\n",
535 |        "      <th>1</th>\n",
536 |        "      <td>607</td>\n",
537 |        "      <td>1210</td>\n",
538 |        "    </tr>\n",
539 |        "  </tbody>\n",
540 |        "</table>\n",
541 |        "</div>"
542 |       ],
543 |       "text/plain": [
544 |        "  userId movieId\n",
545 |        "0    388     153\n",
546 |        "1    607    1210"
547 |       ]
548 |      },
549 |      "execution_count": 15,
550 |      "metadata": {},
551 |      "output_type": "execute_result"
552 |     }
553 |    ],
554 |    "source": [
555 |     "#scoring_columns = ['userId','movieId','timestamp']\n",
556 |     "scoring_columns = ['userId','movieId']\n",
557 |     "test_df = pd.DataFrame(columns=scoring_columns)\n",
558 |     "test_df.at[0,'userId'] = 388\n",
559 |     "test_df.at[0,'movieId'] = 153\n",
560 |     "test_df.at[1,'userId'] = 607\n",
561 |     "test_df.at[1,'movieId'] = 1210\n",
562 |     "test_df.head()"
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "code",
567 |    "execution_count": 16,
568 |    "metadata": {},
569 |    "outputs": [
570 |     {
571 |      "data": {
572 |       "text/html": [],
573 |       "text/plain": [
574 |        "<IPython.core.display.HTML object>"
575 |       ]
576 |      },
577 |      "metadata": {},
578 |      "output_type": "display_data"
579 |     },
580 |     {
581 |      "data": {
582 |       "text/plain": [
583 |        "(tensor([2.4751, 3.6097]), None)"
584 |       ]
585 |      },
586 |      "execution_count": 16,
587 |      "metadata": {},
588 |      "output_type": "execute_result"
589 |     }
590 |    ],
591 |    "source": [
592 |     "dl = learn.dls.test_dl(test_df)\n",
593 |     "learn.get_preds(dl=dl)"
594 |    ]
595 |   },
596 |   {
597 |    "cell_type": "code",
598 |    "execution_count": 17,
599 |    "metadata": {},
600 |    "outputs": [
601 |     {
602 |      "data": {
603 |       "text/html": [],
604 |       "text/plain": [
605 |        "<IPython.core.display.HTML object>"
606 |       ]
607 |      },
608 |      "metadata": {},
609 |      "output_type": "display_data"
610 |     },
611 |     {
612 |      "data": {
613 |       "text/plain": [
614 |        "EmbeddingDotBias (Input shape: 64)\n",
615 |        "============================================================================\n",
616 |        "Layer (type)         Output Shape         Param #    Trainable \n",
617 |        "============================================================================\n",
618 |        "                     64 x 50             \n",
619 |        "Embedding                                 5050       True      \n",
620 |        "Embedding                                 5050       True      \n",
621 |        "____________________________________________________________________________\n",
622 |        "                     64 x 1              \n",
623 |        "Embedding                                 101        True      \n",
624 |        "Embedding                                 101        True      \n",
625 |        "____________________________________________________________________________\n",
626 |        "\n",
627 |        "Total params: 10,302\n",
628 |        "Total trainable params: 10,302\n",
629 |        "Total non-trainable params: 0\n",
630 |        "\n",
631 |        "Optimizer used: <function Adam at 0x7f19ae4ef160>\n",
632 |        "Loss function: FlattenedLoss of MSELoss()\n",
633 |        "\n",
634 |        "Model unfrozen\n",
635 |        "\n",
636 |        "Callbacks:\n",
637 |        "  - TrainEvalCallback\n",
638 |        "  - Recorder\n",
639 |        "  - ProgressCallback"
640 |       ]
641 |      },
642 |      "execution_count": 17,
643 |      "metadata": {},
644 |      "output_type": "execute_result"
645 |     }
646 |    ],
647 |    "source": [
648 |     "learn.summary()"
649 |    ]
650 |   },
651 |   {
652 |    "cell_type": "code",
653 |    "execution_count": 18,
654 |    "metadata": {},
655 |    "outputs": [
656 |     {
657 |      "data": {
658 |       "text/plain": [
659 |        "EmbeddingDotBias(\n",
660 |        "  (u_weight): Embedding(101, 50)\n",
661 |        "  (i_weight): Embedding(101, 50)\n",
662 |        "  (u_bias): Embedding(101, 1)\n",
663 |        "  (i_bias): Embedding(101, 1)\n",
664 |        ")"
665 |       ]
666 |      },
667 |      "execution_count": 18,
668 |      "metadata": {},
669 |      "output_type": "execute_result"
670 |     }
671 |    ],
672 |    "source": [
673 |     "learn.model"
674 |    ]
675 |   }
676 |  ],
677 |  "metadata": {
678 |   "kernelspec": {
679 |    "display_name": "Python 3",
680 |    "language": "python",
681 |    "name": "python3"
682 |   },
683 |   "language_info": {
684 |    "codemirror_mode": {
685 |     "name": "ipython",
686 |     "version": 3
687 |    },
688 |    "file_extension": ".py",
689 |    "mimetype": "text/x-python",
690 |    "name": "python",
691 |    "nbconvert_exporter": "python",
692 |    "pygments_lexer": "ipython3",
693 |    "version": "3.8.6"
694 |   }
695 |  },
696 |  "nbformat": 4,
697 |  "nbformat_minor": 4
698 | }
699 | 


--------------------------------------------------------------------------------
/ch6/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | *.html
4 | 


--------------------------------------------------------------------------------
/ch7/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | 


--------------------------------------------------------------------------------
/ch7/deploy_image/fruits_360may3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Deep-Learning-with-fastai-Cookbook/e692fee0e7d8de184cb57deb222123c94483acd7/ch7/deploy_image/fruits_360may3.pkl


--------------------------------------------------------------------------------
/ch7/deploy_image/static/css/main2.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   margin: 0.2;
 3 |   padding: 0;
 4 |   font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
 5 |   color: #444;
 6 | }
 7 | /*
 8 |  * Formatting the header area
 9 |  */
10 | header {
11 |   background-color: #DFB887;
12 |   height: 35px;
13 |   width: 100%;
14 |   opacity: .9;
15 |   margin-bottom: 10px;
16 | }
17 | header h1.logo {
18 |   margin: 0;
19 |   font-size: 1.7em;
20 |   color: #fff;
21 |   text-transform: uppercase;
22 |   float: left;
23 | }
24 | header h1.logo:hover {
25 |   color: #fff;
26 |   text-decoration: none;
27 | }
28 | /*
29 |  * Centering the body content
30 |  */
31 | .container {
32 |   width: 1200px;
33 |   margin: 0 auto;
34 | }
35 | div.home {
36 |   padding: 10px 0 30px 0;
37 |   font-size: 1.2em;
38 |   background-color: #E6E6FA;
39 |   -webkit-border-radius: 6px;
40 |      -moz-border-radius: 6px;
41 |           border-radius: 6px;
42 | 
43 | }
44 | 
45 | div.about {
46 |   padding: 10px 0 30px 0;
47 |   background-color: #E6E6FA;
48 |   -webkit-border-radius: 6px;
49 |      -moz-border-radius: 6px;
50 |           border-radius: 6px;
51 | }
52 | h2 {
53 |   font-size: 3em;
54 |   margin-top: 40px;
55 |   text-align: center;
56 |   letter-spacing: -2px;
57 | }
58 | h3 {
59 |   font-size: 1.7em;
60 |   font-weight: 100;
61 |   margin-top: 30px;
62 |   text-align: center;
63 |   letter-spacing: -1px;
64 |   color: #999;
65 | }
66 | .menu {
67 |   float: right;
68 |   margin-top: 8px;
69 | }
70 | .menu li {
71 |   display: inline;
72 | }
73 | .menu li + li {
74 |   margin-left: 35px;
75 | }
76 | .menu li a {
77 |   color: #444;
78 |   text-decoration: none;
79 | }
80 | 


--------------------------------------------------------------------------------
/ch7/deploy_image/templates/home.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html> 
 2 | <head> 
 3 | 	<title> 
 4 | 		Predicting the fruit or vegetable in an image using a model trained with fastai
 5 | 	</title> 
 6 | 	<link rel="stylesheet" href="{{ url_for('static', filename='css/main2.css') }}">
 7 | </head> 
 8 | <body>
 9 | <div class="home"> 
10 | 	<h1 style="color: green"> 
11 | 		Please select the image you want to classify:
12 | 	</h1> 
13 | 	<!-- Field for the user to enter the text they want completed -->
14 | 	<p> 
15 | 	<label for="image_field">Please choose an image:</label>
16 | 	<input type="file"
17 |        id="image_field" name="image_field"
18 |        accept="image/png, image/jpeg">
19 | 	</p>
20 | 
21 | 	<!-- button to invoke JS functions that assemble selected scoring parameters and link to show-prediction.html -->
22 | 	<button>
23 | 	<a onclick="link_with_args();" style="font-size : 20px; width: 100%; height: 100px;">Get prediction</a>
24 | 	</button>
25 | 		
26 | 	<script type="text/javascript"> 
27 | 		function getOption() { 
28 | 			// load selections from HTML controls into JS variables
29 | 			// load values of the input file
30 | 			var file_value = [];
31 | 			const input = document.querySelector('input');
32 | 			const curFiles = input.files;
33 | 			if(curFiles.length === 0) {
34 | 				console.log("file list empty");
35 |  
36 | 			} else {
37 | 			for(const file of curFiles) {
38 | 				file_value.push(file.name);
39 | 			}
40 | 			}
41 |  			// build complete URL, including scoring parameters, for the result display page
42 | 			//
43 | 			prefix = "/show-prediction/?"
44 | 			window.output = prefix.concat("file_name=",file_value[0])
45 | 		} 
46 | 	</script> 
47 | 	<script>
48 | 	function link_with_args(){
49 | 	// call getOption to create the URL (including scoring parameters) from HTML control selections
50 | 	getOption();	
51 | 	console.log("in link_with_args");
52 | 	console.log(window.output);
53 | 	// specify the target of the link = the URL generated by getOption
54 | 	window.location.href = window.output;
55 | 	}		
56 | 	</script>
57 | 
58 | 
59 | </div>
60 | </body> 
61 | 
62 | 
63 | 
64 | 
65 | </html>					 
66 | 


--------------------------------------------------------------------------------
/ch7/deploy_image/templates/show-prediction.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html> 
 2 | <head> 
 3 | 	<title> 
 4 | 		Page for showing prediction 
 5 | 	</title> 
 6 | 	<link rel="stylesheet" href="{{ url_for('static', filename='css/main2.css') }}">
 7 | </head> 
 8 | 
 9 | <body> 
10 | <!-- display the prediction  -->
11 | <div class="home">
12 | 	<h1 style="color: green"> 
13 | 		Here is the prediction for the image you selected:
14 | 	</h1> 
15 | 	<h1 style="color: green"> 
16 | 		{{ prediction.prediction_key }}
17 | 	</h1>
18 | 
19 | 	
20 | <!-- link back to home.html for entering scoring parameters for another prediction  -->
21 | 	<form action="{{ url_for('home') }}">
22 | 		<input type="submit" value="Get another prediction" style="font-size : 20px; width: 40%; height: 30px;"/>
23 | 	</form>
24 | 	
25 | </div>
26 | </body> 
27 | 
28 | 
29 | 
30 | 
31 | </html>					 
32 | 


--------------------------------------------------------------------------------
/ch7/deploy_image/test_images/26_100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Deep-Learning-with-fastai-Cookbook/e692fee0e7d8de184cb57deb222123c94483acd7/ch7/deploy_image/test_images/26_100.jpg


--------------------------------------------------------------------------------
/ch7/deploy_image/test_images/4_100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Deep-Learning-with-fastai-Cookbook/e692fee0e7d8de184cb57deb222123c94483acd7/ch7/deploy_image/test_images/4_100.jpg


--------------------------------------------------------------------------------
/ch7/deploy_image/test_images/5_100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Deep-Learning-with-fastai-Cookbook/e692fee0e7d8de184cb57deb222123c94483acd7/ch7/deploy_image/test_images/5_100.jpg


--------------------------------------------------------------------------------
/ch7/deploy_image/web_flask_deploy_image_model.py:
--------------------------------------------------------------------------------
 1 | # example of using Flask to deploy a fastai deep learning model trained on an image dataset
 2 | import json
 3 | import os
 4 | import urllib.request
 5 | import numpy as np
 6 | import pathlib
 7 | temp = pathlib.PosixPath
 8 | pathlib.PosixPath = pathlib.WindowsPath
 9 | from flask import Flask, render_template, request
10 | from fastai.vision.all import *
11 | 
12 | image_directory = "test_images"
13 | 
14 | # build the path for the trained model
15 | path = Path(os.getcwd())
16 | full_path = os.path.join(path,'fruits_360may3.pkl')
17 | print("path is:",path)
18 | print("full_path is: ",full_path)
19 | # load the model
20 | learner = load_learner(full_path)
21 | 
22 | 
23 | app = Flask(__name__)
24 | 
25 | 
26 | @app.route('/')
27 | def home():   
28 |     ''' render home.html - page that is served at localhost that allows user to enter model scoring parameters'''
29 |     title_text = "fastai deployment"
30 |     title = {'titlename':title_text}
31 |     return render_template('home.html',title=title) 
32 |     
33 | @app.route('/show-prediction/')
34 | def show_prediction():
35 |     ''' 
36 |     get the scoring parameters entered in home.html and render show-prediction.html
37 |     '''
38 |     # the scoring parameters are sent to this page as parameters on the URL link from home.html
39 |     # load the scoring parameter
40 |     image_file_name = request.args.get("file_name")
41 |     # build the fully qualified file name
42 |     full_path = os.path.join(path,image_directory,image_file_name)
43 |     print("full_path is: ",full_path)
44 |     img = PILImage.create(full_path)
45 |     # apply the model to the image
46 |     pred_class, ti1, ti2 = learner.predict(img)
47 |     print("pred_class is: ",pred_class)
48 |     predict_string = "Predicted object is: "+pred_class
49 |     # build parameter to pass on to show-prediction.html
50 |     prediction = {'prediction_key':predict_string}
51 |     # render the page that will show the prediction
52 |     return(render_template('show-prediction.html',prediction=prediction))
53 |     
54 |     
55 |     
56 | if __name__ == '__main__':
57 |     app.run(debug=True, host='0.0.0.0')


--------------------------------------------------------------------------------
/ch7/deploy_tabular/adult_sample_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Deep-Learning-with-fastai-Cookbook/e692fee0e7d8de184cb57deb222123c94483acd7/ch7/deploy_tabular/adult_sample_model.pkl


--------------------------------------------------------------------------------
/ch7/deploy_tabular/static/css/main2.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   margin: 0.2;
 3 |   padding: 0;
 4 |   font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
 5 |   color: #444;
 6 | }
 7 | /*
 8 |  * Formatting the header area
 9 |  */
10 | header {
11 |   background-color: #DFB887;
12 |   height: 35px;
13 |   width: 100%;
14 |   opacity: .9;
15 |   margin-bottom: 10px;
16 | }
17 | header h1.logo {
18 |   margin: 0;
19 |   font-size: 1.7em;
20 |   color: #fff;
21 |   text-transform: uppercase;
22 |   float: left;
23 | }
24 | header h1.logo:hover {
25 |   color: #fff;
26 |   text-decoration: none;
27 | }
28 | /*
29 |  * Centering the body content
30 |  */
31 | .container {
32 |   width: 1200px;
33 |   margin: 0 auto;
34 | }
35 | div.home {
36 |   padding: 10px 0 30px 0;
37 |   font-size: 1.2em;
38 |   background-color: #E6E6FA;
39 |   -webkit-border-radius: 6px;
40 |      -moz-border-radius: 6px;
41 |           border-radius: 6px;
42 | 
43 | }
44 | 
45 | div.about {
46 |   padding: 10px 0 30px 0;
47 |   background-color: #E6E6FA;
48 |   -webkit-border-radius: 6px;
49 |      -moz-border-radius: 6px;
50 |           border-radius: 6px;
51 | }
52 | h2 {
53 |   font-size: 3em;
54 |   margin-top: 40px;
55 |   text-align: center;
56 |   letter-spacing: -2px;
57 | }
58 | h3 {
59 |   font-size: 1.7em;
60 |   font-weight: 100;
61 |   margin-top: 30px;
62 |   text-align: center;
63 |   letter-spacing: -1px;
64 |   color: #999;
65 | }
66 | .menu {
67 |   float: right;
68 |   margin-top: 8px;
69 | }
70 | .menu li {
71 |   display: inline;
72 | }
73 | .menu li + li {
74 |   margin-left: 35px;
75 | }
76 | .menu li a {
77 |   color: #444;
78 |   text-decoration: none;
79 | }
80 | 


--------------------------------------------------------------------------------
/ch7/deploy_tabular/templates/home.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html> 
  2 | <head> 
  3 | 	<title> 
  4 | 		Predicting whether an individual is going to have a salary of over 50k using a fastai model trained on ADULT_SAMPLE
  5 | 	</title> 
  6 | 	<link rel="stylesheet" href="{{ url_for('static', filename='css/main2.css') }}">
  7 | </head> 
  8 | <!-- <body onload="load_neighbourhood_dropdown()"> -->
  9 | <body onload="load_selections()">
 10 | <div class="home"> 
 11 | 	<h1 style="color: green"> 
 12 | 		Please select the details for the individual you want to make a salary prediction for
 13 | 	</h1> 
 14 | 	<!-- show selection lists for each categorical parameter for the Airbnb listing -->
 15 | 	<p> 
 16 | 		Select the work class: 
 17 | 		<!-- workclass-->
 18 | 		<select id="workclass" > 
 19 | 		</select> 
 20 | 	</p>
 21 | 	<p> 
 22 | 		<label for="fnlwgt">fnlwgt (20000-150000):</label>
 23 | 
 24 | 			<input type="number" id="fnlwgt" name="fnlwgt"
 25 | 			min="20000" max="150000">
 26 | 	</p>
 27 | 	<p> 
 28 | 		<label for="age">age (18-100):</label>
 29 | 
 30 | 			<input type="number" id="age" name="age"
 31 | 			min="18" max="100">
 32 | 	</p>	
 33 | 	<p> 
 34 | 		Select the education level: 
 35 | 		<!-- education -->
 36 | 		<select id="education" > 
 37 | 		</select> 
 38 | 	</p>
 39 | 	<p> 
 40 | 		<label for="education-num">education years (6 - 16):</label>
 41 | 
 42 | 			<input type="number" id="education-num" name="education-num"
 43 | 			min="6" max="16">
 44 | 	</p>
 45 | 	<p> 
 46 | 		<label for="hours-per-week">hours-per-week (10 - 80):</label>
 47 | 
 48 | 			<input type="number" id="hours-per-week" name="hours-per-week"
 49 | 			min="10" max="80">
 50 | 	</p>
 51 | 	<p> 
 52 | 		<label for="capital-gain">capital-gain (0 - 80):</label>
 53 | 
 54 | 			<input type="number" id="capital-gain" name="capital-gain"
 55 | 			min="0" max="80">
 56 | 	</p>
 57 | 	<p> 
 58 | 		<label for="capital-loss">capital-loss (0 - 80):</label>
 59 | 
 60 | 			<input type="number" id="capital-loss" name="capital-loss"
 61 | 			min="0" max="80">
 62 | 	</p>
 63 | 	<p>
 64 | 		Select the marital status:
 65 | 		<select id="marital-status">
 66 | 		</select>
 67 | 	</p>
 68 | 	<p>
 69 | 		Select occupation:
 70 | 		<select id="occupation">
 71 | 		</select>
 72 | 	</p>
 73 | 	<p>
 74 | 		Select relationship:
 75 | 		<select id="relationship">
 76 | 		</select>
 77 | 	</p>
 78 | 	<p>
 79 | 		Select race:
 80 | 		<select id="race">
 81 | 		</select>
 82 | 	</p>
 83 | 	<p>
 84 | 		Select sex:
 85 | 		<select id="sex">
 86 | 		</select>
 87 | 	</p>
 88 | 	<p>
 89 | 		Select native-country:
 90 | 		<select id="native-country">
 91 | 		</select>
 92 | 	</p>
 93 | 
 94 | 
 95 | 	
 96 | 	<!-- button to invoke JS functions that assemble selected scoring parameters and link to show-prediction.html -->
 97 | 	<button>
 98 | 	<a onclick="link_with_args();" style="font-size : 20px; width: 100%; height: 100px;">Get prediction</a>
 99 | 	</button>
100 | 	
101 | 	<!-- echo the link (including scoring parameters) to show-prediction.html -->
102 | 	<p> 
103 | 		The value of the option selected is: 
104 | 		<span class="output"></span> 
105 | 	</p> 
106 | 	
107 | 
108 | 	
109 | 	<script type="text/javascript"> 
110 | 		function getOption() { 
111 | 			// load selections from HTML controls into JS variables
112 | 			selectElementworkclass = document.querySelector('#workclass'); 
113 | 			selectElementeducation = document.querySelector('#education');
114 | 			selectElementmaritalstatus = document.querySelector('#marital-status');
115 | 			selectElementoccupation = document.querySelector('#occupation');
116 | 			selectElementrelationship = document.querySelector('#relationship');
117 | 			selectElementrace = document.querySelector('#race');
118 | 			selectElementsex = document.querySelector('#sex');
119 | 			selectElementnativecountry = document.querySelector('#native-country');
120 | 			// load values of continuous scoring parameters
121 | 			fnlwgt_value = document.getElementById("fnlwgt").value;
122 | 			age_value = document.getElementById("age").value;
123 | 			education_num_value = document.getElementById("education-num").value;
124 | 			hours_per_week_value = document.getElementById("hours-per-week").value;
125 | 			capital_gain_value = document.getElementById("capital-gain").value;		
126 | 			capital_loss_value = document.getElementById("capital-loss").value;
127 | 			// load the values selected for the scoring parameters into JS variables
128 | 			workclass_string = selectElementworkclass.options[selectElementworkclass.selectedIndex].value
129 | 			education_string = selectElementeducation.options[selectElementeducation.selectedIndex].value
130 | 			marital_status_string = selectElementmaritalstatus.options[selectElementmaritalstatus.selectedIndex].value
131 | 			occupation_string = selectElementoccupation.options[selectElementoccupation.selectedIndex].value
132 | 			relationship_string = selectElementrelationship.options[selectElementrelationship.selectedIndex].value
133 | 			race_string = selectElementrace.options[selectElementrace.selectedIndex].value
134 | 			sex_string = selectElementsex.options[selectElementsex.selectedIndex].value
135 | 			native_country_string = selectElementnativecountry.options[selectElementnativecountry.selectedIndex].value
136 | 			// build complete URL, including scoring parameters, for the result display page
137 | 			//
138 | 			prefix = "/show-prediction/?"
139 | 			window.output = prefix.concat("workclass=",workclass_string,"&age=",age_value,"&fnlwgt=",fnlwgt_value,"&education=",education_string,"&education-num=",education_num_value,"&marital-status=",marital_status_string,"&occupation=",occupation_string,"&relationship=",relationship_string,"&race=",race_string,"&sex=",sex_string,"&capital-gain=",capital_gain_value,"&capital-loss=",capital_loss_value,"&hours-per-week=",hours_per_week_value,"&native-country=",native_country_string);
140 | 			document.querySelector('.output').textContent = window.output; 
141 | 		} 
142 | 	</script> 
143 | 	<script>
144 | 	function link_with_args(){
145 | 	// call getOption to create the URL (including scoring parameters) from HTML control selections
146 | 	getOption();	
147 | 	console.log("in link_with_args");
148 | 	console.log(window.output);
149 | 	// specify the target of the link = the URL generated by getOption
150 | 	window.location.href = window.output;
151 | 	}		
152 | 	</script>
153 | 	<script>
154 | 	function load_selections(){
155 | 	// initialize all selections
156 | 	var select_workclass = document.getElementById("workclass"); 
157 | 	var select_education = document.getElementById("education"); 
158 | 	var select_marital_status = document.getElementById("marital-status");
159 | 	var select_occupation = document.getElementById("occupation");
160 | 	var select_relationship = document.getElementById("relationship");
161 | 	var select_race = document.getElementById("race");
162 | 	var select_sex = document.getElementById("sex");
163 | 	var select_native_country = document.getElementById("native-country");
164 | 	var workclass_list = [" Private" ," Self-emp-inc" ," Self-emp-not-inc" ," State-gov" ," Federal-gov" ," Local-gov" ];
165 | 	var education_list = [" 7th-8th" ," 9th" ," 10th" ," 11th" ," 12th" ," HS-grad" ," Assoc-voc" ," Assoc-acdm" ," Prof-school" ," Some-college" ," Bachelors" ," Masters" ," Doctorate" ];
166 | 	var marital_status_list = [" Married-civ-spouse" ," Divorced" ," Never-married" ," Widowed" ," Married-spouse-absent" ," Separated" ];
167 | 	var occupation_list = [" Exec-managerial" ," Prof-specialty" ," Other-service" ," Handlers-cleaners" ," Craft-repair" ," Adm-clerical" ," Sales" ," Machine-op-inspct" ," Prof-specialty" ," Handlers-cleaners" ," Craft-repair" ," Exec-managerial" ," Adm-clerical" ," Transport-moving" ," Other-service" ," Farming-fishing" ];
168 | 	var relationship_list = [" Wife" ," Not-in-family" ," Unmarried" ," Husband" ," Own-child" ," Other-relative" ];
169 | 	var race_list = [" White" ," Black" ," Asian-Pac-Islander" ," Amer-Indian-Eskimo" ," Other" ];
170 | 	var sex_list = [" Female" ," Male" ];
171 | 	var native_country_list = ["United-States","Puerto-Rico","Mexico","Canada","Taiwan","Vietnam","Philippines"];
172 | 	// set defaults for numeric input fields:
173 | 	document.getElementById("fnlwgt").defaultValue = 7;
174 | 	document.getElementById("age").defaultValue = 40;
175 | 	document.getElementById("education-num").defaultValue = 12;
176 | 	document.getElementById("hours-per-week").defaultValue = 40;
177 | 	document.getElementById("capital-gain").defaultValue = 0;
178 | 	document.getElementById("capital-loss").defaultValue = 0;
179 | 		
180 | 	// Populate workclass list:
181 | 		for(var i = 0; i < workclass_list.length; i++) {
182 | 			var opt = workclass_list[i];
183 | 			select_workclass.innerHTML += "<option value=\"" + opt + "\">" + opt + "</option>";
184 | 		}
185 | 		// initialize education list
186 | 		for(var i = 0; i < education_list.length; i++) {
187 | 			var opt = education_list[i];
188 | 			select_education.innerHTML += "<option value=\"" + opt + "\">" + opt + "</option>";
189 | 		}
190 | 		// populate marital-status list
191 | 		for(var i = 0; i < marital_status_list.length; i++) {
192 | 			var opt = marital_status_list[i];
193 | 			select_marital_status.innerHTML += "<option value=\"" + opt + "\">" + opt + "</option>";
194 | 		}
195 | 		// populate occupation_list list
196 | 		for(var i = 0; i < occupation_list.length; i++) {
197 | 			var opt = occupation_list[i];
198 | 			select_occupation.innerHTML += "<option value=\"" + opt + "\">" + opt + "</option>";
199 | 		}
200 | 		// populate relationship list
201 | 		for(var i = 0; i < relationship_list.length; i++) {
202 | 			var opt = relationship_list[i];
203 | 			select_relationship.innerHTML += "<option value=\"" + opt + "\">" + opt + "</option>";
204 | 		}
205 | 		// populate race list
206 | 		for(var i = 0; i < race_list.length; i++) {
207 | 			var opt = race_list[i];
208 | 			select_race.innerHTML += "<option value=\"" + opt + "\">" + opt + "</option>";
209 | 		}
210 | 		// populate sex list
211 | 		for(var i = 0; i < sex_list.length; i++) {
212 | 			var opt = sex_list[i];
213 | 			select_sex.innerHTML += "<option value=\"" + opt + "\">" + opt + "</option>";
214 | 		}
215 | 		// populate marital-status list
216 | 		for(var i = 0; i < native_country_list.length; i++) {
217 | 			var opt = native_country_list[i];
218 | 			select_native_country.innerHTML += "<option value=\"" + opt + "\">" + opt + "</option>";
219 | 		}
220 | 	};
221 | 	
222 | 	</script>
223 | 
224 | </div>
225 | </body> 
226 | 
227 | 
228 | 
229 | 
230 | </html>					 
231 | 


--------------------------------------------------------------------------------
/ch7/deploy_tabular/templates/show-prediction.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html> 
 2 | <head> 
 3 | 	<title> 
 4 | 		Page for showing prediction 
 5 | 	</title> 
 6 | 	<link rel="stylesheet" href="{{ url_for('static', filename='css/main2.css') }}">
 7 | </head> 
 8 | 
 9 | <body> 
10 | <!-- display the prediction  -->
11 | <div class="home">
12 | 	<h1 style="color: green"> 
13 | 		Here is the prediction for the individual's income:
14 | 	</h1> 
15 | 	<h1 style="color: green"> 
16 | 		{{ prediction.prediction_key }}
17 | 	</h1>
18 | 
19 | 	
20 | <!-- link back to home.html for entering scoring parameters for another prediction  -->
21 | 	<form action="{{ url_for('home') }}">
22 | 		<input type="submit" value="Get another prediction" style="font-size : 20px; width: 40%; height: 30px;"/>
23 | 	</form>
24 | 	
25 | </div>
26 | </body> 
27 | 
28 | 
29 | 
30 | 
31 | </html>					 
32 | 


--------------------------------------------------------------------------------
/ch7/deploy_tabular/web_flask_deploy.py:
--------------------------------------------------------------------------------
 1 | # example of using Flask to deploy a fastai deep learning model trained on a tabular dataset
 2 | import json
 3 | import os
 4 | import urllib.request
 5 | import numpy as np
 6 | import pathlib
 7 | temp = pathlib.PosixPath
 8 | pathlib.PosixPath = pathlib.WindowsPath
 9 | from flask import Flask, render_template, request
10 | #from fastbook import *
11 | from fastai.tabular.all import *
12 | #from fastai.vision import *
13 | 
14 | scoring_columns = ["age","workclass","fnlwgt","education","education-num","marital-status","occupation","relationship","race","sex","capital-gain","capital-loss","hours-per-week","native-country"]
15 | 
16 | # build the path for the trained model
17 | path = Path(os.getcwd())
18 | full_path = os.path.join(path,'adult_sample_model.pkl')
19 | print("path is:",path)
20 | print("full_path is: ",full_path)
21 | # load the model
22 | learner = load_learner(full_path)
23 | 
24 | 
25 | app = Flask(__name__)
26 | 
27 | 
28 | @app.route('/')
29 | def home():   
30 |     ''' render home.html - page that is served at localhost that allows user to enter model scoring parameters'''
31 |     title_text = "fastai deployment"
32 |     title = {'titlename':title_text}
33 |     return render_template('home.html',title=title) 
34 |     
35 | @app.route('/show-prediction/')
36 | def show_prediction():
37 |     ''' 
38 |     get the scoring parameters entered in home.html and render show-prediction.html
39 |     '''
40 |     # the scoring parameters are sent to this page as parameters on the URL link from home.html
41 |     # load the scoring parameter values into a dictionary indexed by the column names expected by the model
42 |     score_values_dict = {}
43 |     # bring the URL argument values into a Python dictionary
44 |     for column in scoring_columns:
45 |         # use input from home.html for scoring
46 |         score_values_dict[column] = request.args.get(column)
47 |     for value in score_values_dict:
48 |         print("value for "+value+" is: "+str(score_values_dict[value]))
49 |     # create and load scoring parameters dataframe (containing the scoring parameters)that will be fed into the pipelines
50 |     score_df = pd.DataFrame(columns=scoring_columns)
51 |     # df = df.astype({"a": int, "b": complex})
52 |     print("score_df before load is "+str(score_df))
53 |     for col in scoring_columns:
54 |         score_df.at[0,col] = score_values_dict[col]
55 |     # ensure columns have the correc types    
56 |     score_df = score_df.astype({"age":np.int64,"fnlwgt":np.int64,"education-num":np.float64,"capital-gain":np.int64,"capital-loss":np.int64,"hours-per-week":np.int64})
57 |     # print details about scoring parameters
58 |     print("score_df: ",score_df)
59 |     print("score_df.dtypes: ",score_df.dtypes)
60 |     print("score_df.iloc[0]",score_df.iloc[0])
61 |     print("shape of score_df.iloc[0] is: ",score_df.iloc[0].shape)
62 |     pred_class,pred_idx,outputs = learner.predict(score_df.iloc[0])
63 |     for col in scoring_columns:
64 |         print("pred_class "+str(col)+" is: "+str(pred_class[col]))
65 |     print("pred_idx is: "+str(pred_idx))
66 |     print("outputs is: "+str(outputs))
67 |     # get a result string from the value of the model's prediction
68 |     if outputs[0] >= outputs[1]:
69 |         predict_string = "Prediction is: individual has income less than 50k"
70 |     else:
71 |         predict_string = "Prediction is: individual has income greater than 50k"
72 |     # build parameter to pass on to show-prediction.html
73 |     prediction = {'prediction_key':predict_string}
74 |     # render the page that will show the prediction
75 |     return(render_template('show-prediction.html',prediction=prediction))
76 |     
77 |     
78 |     
79 | if __name__ == '__main__':
80 |     app.run(debug=True, host='0.0.0.0')


--------------------------------------------------------------------------------
/untitled.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Deep-Learning-with-fastai-Cookbook/e692fee0e7d8de184cb57deb222123c94483acd7/untitled.txt


--------------------------------------------------------------------------------