├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── course
    ├── 0_Check_Environment.ipynb
    ├── 1 First Deep Learning Model.ipynb
    ├── 2 Data.ipynb
    ├── 3 Machine Learning.ipynb
    ├── 4 Deep Learning Intro.ipynb
    ├── 5 Gradient Descent.ipynb
    ├── 6 Convolutional Neural Networks.ipynb
    ├── 8 Recurrent Neural Networks.ipynb
    └── 9 Improving performance.ipynb
├── data
    ├── HR_comma_sep.csv
    ├── banknotes.csv
    ├── banknotes.png
    ├── cansim-0800020-eng-6674700030567901031.csv
    ├── diabetes.csv
    ├── generator
    │   └── class 0
    │   │   └── squirrel.jpeg
    ├── housing-data.csv
    ├── international-airline-passengers.csv
    ├── iris.csv
    ├── iss.jpg
    ├── sms.wav
    ├── titanic-train.csv
    ├── us_retail_sales.csv
    ├── user_visit_duration.csv
    ├── weight-height.csv
    └── wines.csv
├── environment.yml
├── solutions
    ├── 2 Data exploration Exercises Solution.ipynb
    ├── 3 Machine Learning Exercises Solution.ipynb
    ├── 4 Deep Learning Intro Exercises Solution.ipynb
    ├── 5 Gradient Descent Exercises Solution.ipynb
    ├── 6 Convolutional Neural Networks Exercises Solution.ipynb
    ├── 8 Recurrent Neural Networks Exercises Solutions.ipynb
    └── 9 Improving performance Exercises Solutions.ipynb
└── tests
    └── test_nb.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .floydexpt
 3 | .floydignore
 4 | 
 5 | # Byte-compiled / optimized / DLL files
 6 | __pycache__/
 7 | *.py[cod]
 8 | *$py.class
 9 | 
10 | # C extensions
11 | *.so
12 | 
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | 
31 | # PyInstaller
32 | #  Usually these files are written by a python script from a template
33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 | 
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 | 
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *,cover
50 | .hypothesis/
51 | 
52 | # Translations
53 | *.mo
54 | *.pot
55 | 
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 | 
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 | 
64 | # Scrapy stuff:
65 | .scrapy
66 | 
67 | # Sphinx documentation
68 | docs/_build/
69 | 
70 | # PyBuilder
71 | target/
72 | 
73 | # IPython Notebook
74 | .ipynb_checkpoints
75 | 
76 | # pyenv
77 | .python-version
78 | 
79 | # celery beat schedule file
80 | celerybeat-schedule
81 | 
82 | # dotenv
83 | .env
84 | 
85 | # virtualenv
86 | venv/
87 | ENV/
88 | 
89 | # Spyder project settings
90 | .spyderproject
91 | 
92 | # Rope project settings
93 | .ropeproject
94 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: python
 3 | python:
 4 |   - "3.7"
 5 | install:
 6 |   - sudo apt-get update
 7 |   # We do this conditionally because it saves us some downloading if the
 8 |   # version is the same.
 9 |   - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
10 |   - bash miniconda.sh -b -p $HOME/miniconda
11 |   - export PATH="$HOME/miniconda/bin:$PATH"
12 |   - hash -r
13 |   - conda config --set always_yes yes --set changeps1 no
14 |   - conda update -q conda
15 |   # Useful for debugging any issues with conda
16 |   - conda info -a
17 | 
18 |   - conda env create -q -n test-environment python=$TRAVIS_PYTHON_VERSION -f environment.yml
19 |   - source activate test-environment
20 | 
21 | script:
22 |   - travis_wait 30 py.test -v
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | COPYRIGHT
 2 | 
 3 | All contributions by Francesco Mosconi:
 4 | Copyright (c) 2017, Francesco Mosconi.
 5 | All rights reserved.
 6 | 
 7 | All contributions by Catalit LLC:
 8 | Copyright (c) 2017, Catalit LLC.
 9 | All rights reserved.
10 | 
11 | All other contributions:
12 | Copyright (c) 2015, the respective contributors.
13 | All rights reserved.
14 | 
15 | Each contributor holds copyright over their respective contributions.
16 | The project versioning (Git) records all such contribution source information.
17 | MIT License
18 | 
19 | Copyright (c) 2017 
20 | 
21 | Permission is hereby granted, free of charge, to any person obtaining a copy
22 | of this software and associated documentation files (the "Software"), to deal
23 | in the Software without restriction, including without limitation the rights
24 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 | copies of the Software, and to permit persons to whom the Software is
26 | furnished to do so, subject to the following conditions:
27 | 
28 | The above copyright notice and this permission notice shall be included in all
29 | copies or substantial portions of the Software.
30 | 
31 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 | SOFTWARE.
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## [Check our Zero To Deep Learning 5 day bootcamp. New dates are out!](https://www.zerotodeeplearning.com/?utm_source=github.com&utm_medium=affiliate&utm_campaign=https%3A%2F%2Fgithub.com%2FDataweekends%2Fzero_to_deep_learning_video&utm_content=README.md)
  2 | 
  3 | ------
  4 | 
  5 | # Zero to Deep Learning® Video Course
  6 | 
  7 | Welcome to the Zero to Deep Learning® Video Course repository.
  8 | 
  9 | ## Get started guide
 10 | 
 11 | #### Clone this repository on your local computer
 12 | 
 13 | ```
 14 | git clone https://github.com/Dataweekends/zero_to_deep_learning_video.git
 15 | ```
 16 | 
 17 | #### Download and Install Anaconda Python 3.7
 18 | 
 19 | https://www.anaconda.com/distribution/
 20 | 
 21 | #### Change to course folder
 22 | 
 23 | ```
 24 | cd zero_to_deep_learning_video
 25 | ```
 26 | 
 27 | #### Create the course environment
 28 | 
 29 | ```
 30 | conda env create
 31 | ```
 32 | 
 33 | wait for the environment to create.
 34 | 
 35 | #### Activate the environment (Mac/Linux)
 36 | ```
 37 | conda activate ztdl
 38 | ```
 39 | 
 40 | #### Activate the environment (Windows)
 41 | ```
 42 | conda activate ztdl
 43 | ```
 44 | 
 45 | Check that your prompt changed to
 46 | 
 47 | ```
 48 | (ztdl) $
 49 | ```
 50 | 
 51 | #### Launch Jupyter Notebook
 52 | 
 53 | ```
 54 | jupyter notebook
 55 | ```
 56 | 
 57 | #### Open your browser to
 58 | 
 59 | ```
 60 | http://localhost:8888
 61 | ```
 62 | 
 63 | #### Run the Check environment Notebook
 64 | 
 65 | Go to the course folder, open the notebook `0_Check_Environment.ipynb` and run it. If you see the message:
 66 | 
 67 |     Houston we are go!
 68 | 
 69 | You are good to go! Enjoy!
 70 | 
 71 | 
 72 | #### Troubleshooting installation
 73 | If for some reason you don't see `Houston we are go!`, the simplest solution is to delete the environment and start from scratch again.
 74 | 
 75 | To remove the environment:
 76 | 
 77 | - close the browser and go back to your terminal
 78 | - stop jupyter notebook (CTRL-C)
 79 | - deactivate the environment (Mac/Linux):
 80 | 
 81 | ```
 82 | conda deactivate
 83 | ```
 84 | 
 85 | - deactivate the environment (Windows 10):
 86 | 
 87 | ```
 88 | deactivate ztdl
 89 | ```
 90 | 
 91 | - delete the environment:
 92 | 
 93 | ```
 94 | conda remove -y -n ztdl --all
 95 | ```
 96 | 
 97 | - restart from environment creation and make sure that each steps completes till the end.
 98 | 
 99 | #### Updating Conda
100 | 
101 | One thing you can also try is to update your conda executable. This may help if you already had Anaconda installed on your system.
102 | 
103 | ```
104 | conda update conda
105 | ```
106 | 
107 | These instructions have been tested on:
108 | 
109 | - Mac OSX Sierra 10.15.7
110 | - Ubuntu 18.04
111 | - Windows 10
112 | 
113 | ## Running the course on Google Colaboratory with free GPU support
114 | 
115 | Google offers a free platform to run Jupyter notebooks called Google Colaboratory. You need a Gmail or Google Apps email address to use it.
116 | 
117 | Follow these steps:
118 | 
119 | 1. Open your browser and go to https://colab.research.google.com/
120 | 2. Choose the **GITHUB** tab and paste the repository address: `https://github.com/Dataweekends/zero_to_deep_learning_video` in the search bar.
121 | 3. Click on the notebook you would like to run
122 | 4. Enable GPU support in the `Edit -> Notebook Settings` menu
123 | 5. Enjoy running the notebook with GPU support!
124 | 6. If the notebook loads data from the repo you will have to download the data too. Follow these steps to do that:
125 |   1. Create a code cell at the top of the notebook
126 |   2. Clone the repository in Colab:
127 |   ```
128 |   !git clone https://github.com/Dataweekends/zero_to_deep_learning_video.git
129 |   ```
130 |   3. Replace the `../data` path with `zero_to_deep_learning_video/data` in the cell that loads the data.
131 | 7. Enjoy!


--------------------------------------------------------------------------------
/course/0_Check_Environment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Check Environment\n",
  8 |     "This notebook checks that you have correctly created the environment and that all packages needed are installed."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "## Environment\n",
 16 |     "\n",
 17 |     "The next command should return a line like (Mac/Linux):\n",
 18 |     "\n",
 19 |     "    /<YOUR-HOME-FOLDER>/anaconda/envs/ztdl/bin/python\n",
 20 |     "\n",
 21 |     "or like (Windows 10):\n",
 22 |     "\n",
 23 |     "    C:\\\\<YOUR-HOME-FOLDER>\\\\Anaconda3\\\\envs\\\\ztdl\\\\python.exe\n",
 24 |     "\n",
 25 |     "In particular you should make sure that you are using the python executable from within the course environment.\n",
 26 |     "\n",
 27 |     "If that's not the case do this:\n",
 28 |     "\n",
 29 |     "1. close this notebook\n",
 30 |     "2. go to the terminal and stop jupyer notebook\n",
 31 |     "3. make sure that you have activated the environment, you should see a prompt like:\n",
 32 |     "\n",
 33 |     "        (ztdl) $\n",
 34 |     "4. (optional) if you don't see that prompt activate the environment:\n",
 35 |     "    - mac/linux:\n",
 36 |     "    \n",
 37 |     "            conda activate ztdl\n",
 38 |     "\n",
 39 |     "    - windows:\n",
 40 |     "\n",
 41 |     "            activate ztdl\n",
 42 |     "5. restart jupyter notebook"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "import os\n",
 52 |     "import sys\n",
 53 |     "sys.executable"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "## Python 3.7\n",
 61 |     "\n",
 62 |     "The next line should say that you're using Python 3.7.x from Anaconda. At the time of publication it looks like this (Mac/Linux):\n",
 63 |     "\n",
 64 |     "    Python 3.7.3 (default, Mar 27 2019, 22:11:17)\n",
 65 |     "    [GCC 7.3.0] :: Anaconda, Inc. on linux\n",
 66 |     "    Type \"help\", \"copyright\", \"credits\" or \"license\" for more information.\n",
 67 |     "\n",
 68 |     "or like this (Windows 10):\n",
 69 |     "\n",
 70 |     "    Python 3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)] :: Anaconda, Inc. on win32\n",
 71 |     "    Type \"help\", \"copyright\", \"credits\" or \"license\" for more information.\n",
 72 |     "\n",
 73 |     "but date and exact version of GCC may change in the future.\n",
 74 |     "\n",
 75 |     "If you see a different version of python, go back to the previous step and make sure you created and activated the environment correctly."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "import sys\n",
 85 |     "sys.version"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "## Jupyter\n",
 93 |     "\n",
 94 |     "Check that Jupyter is running from within the environment. The next line should look like (Mac/Linux):\n",
 95 |     "\n",
 96 |     "    /<YOUR-HOME-FOLDER>/anaconda/envs/ztdl/lib/python3.6/site-packages/jupyter.py'\n",
 97 |     "\n",
 98 |     "or like this (Windows 10):\n",
 99 |     "\n",
100 |     "    C:\\\\Users\\\\<YOUR-USER>\\\\Anaconda3\\\\envs\\\\ztdl\\\\lib\\\\site-packages\\\\jupyter.py"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "import jupyter\n",
110 |     "jupyter.__file__"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "## Other packages\n",
118 |     "\n",
119 |     "Here we will check that all the packages are installed and have the correct versions. If everything is ok you should see:\n",
120 |     "    \n",
121 |     "    Using TensorFlow backend.\n",
122 |     "    \n",
123 |     "    Houston we are go!\n",
124 |     "\n",
125 |     "If there's any issue here please make sure you have checked the previous steps and if it's all good please send us a question in the Q&A forum."
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "import pip\n",
135 |     "import numpy\n",
136 |     "import jupyter\n",
137 |     "import matplotlib\n",
138 |     "import sklearn\n",
139 |     "import scipy\n",
140 |     "import pandas\n",
141 |     "import PIL\n",
142 |     "import seaborn\n",
143 |     "import tensorflow\n",
144 |     "\n",
145 |     "\n",
146 |     "def check_version(pkg, version):\n",
147 |     "    actual = pkg.__version__.split('.')\n",
148 |     "    if len(actual) == 3:\n",
149 |     "        actual_major = '.'.join(actual[:2])\n",
150 |     "    elif len(actual) == 2:\n",
151 |     "        actual_major = '.'.join(actual)\n",
152 |     "    else:\n",
153 |     "        raise NotImplementedError(pkg.__name__ +\n",
154 |     "                                  \"actual version :\"+\n",
155 |     "                                  pkg.__version__)\n",
156 |     "    try:\n",
157 |     "        assert(actual_major == version)\n",
158 |     "    except Exception as ex:\n",
159 |     "        print(\"{} {}\\t=> {}\".format(pkg.__name__,\n",
160 |     "                                    version,\n",
161 |     "                                    pkg.__version__))\n",
162 |     "        raise ex\n",
163 |     "\n",
164 |     "check_version(pip, '21.0')\n",
165 |     "check_version(numpy, '1.19')\n",
166 |     "check_version(matplotlib, '3.3')\n",
167 |     "check_version(sklearn, '0.24')\n",
168 |     "check_version(scipy, '1.6')\n",
169 |     "check_version(pandas, '1.2')\n",
170 |     "check_version(PIL, '8.2')\n",
171 |     "check_version(seaborn, '0.11')\n",
172 |     "check_version(tensorflow, '2.5')\n",
173 |     "\n",
174 |     "print(\"Houston we are go!\")"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": []
183 |   }
184 |  ],
185 |  "metadata": {
186 |   "kernelspec": {
187 |    "display_name": "Python 3",
188 |    "language": "python",
189 |    "name": "python3"
190 |   },
191 |   "language_info": {
192 |    "codemirror_mode": {
193 |     "name": "ipython",
194 |     "version": 3
195 |    },
196 |    "file_extension": ".py",
197 |    "mimetype": "text/x-python",
198 |    "name": "python",
199 |    "nbconvert_exporter": "python",
200 |    "pygments_lexer": "ipython3",
201 |    "version": "3.7.10"
202 |   }
203 |  },
204 |  "nbformat": 4,
205 |  "nbformat_minor": 2
206 | }
207 | 


--------------------------------------------------------------------------------
/course/1 First Deep Learning Model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# First Deep Learning Model"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "%matplotlib inline\n",
 18 |     "import matplotlib.pyplot as plt"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "from sklearn.datasets import make_circles"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "X, y = make_circles(n_samples=1000,\n",
 37 |     "                    noise=0.1,\n",
 38 |     "                    factor=0.2,\n",
 39 |     "                    random_state=0)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "X"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "X.shape"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "plt.figure(figsize=(5, 5))\n",
 67 |     "plt.plot(X[y==0, 0], X[y==0, 1], 'ob', alpha=0.5)\n",
 68 |     "plt.plot(X[y==1, 0], X[y==1, 1], 'xr', alpha=0.5)\n",
 69 |     "plt.xlim(-1.5, 1.5)\n",
 70 |     "plt.ylim(-1.5, 1.5)\n",
 71 |     "plt.legend(['0', '1'])\n",
 72 |     "plt.title(\"Blue circles and Red crosses\")"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "from tensorflow.keras.models import Sequential\n",
 82 |     "from tensorflow.keras.layers import Dense\n",
 83 |     "from tensorflow.keras.optimizers import SGD"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "model = Sequential()"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "model.add(Dense(4, input_shape=(2,), activation='tanh'))"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "model.add(Dense(1, activation='sigmoid'))"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "model.compile(SGD(learning_rate=0.5), 'binary_crossentropy', metrics=['accuracy'])"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "model.fit(X, y, epochs=20)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "hticks = np.linspace(-1.5, 1.5, 101)\n",
138 |     "vticks = np.linspace(-1.5, 1.5, 101)\n",
139 |     "aa, bb = np.meshgrid(hticks, vticks)\n",
140 |     "ab = np.c_[aa.ravel(), bb.ravel()]\n",
141 |     "c = model.predict(ab)\n",
142 |     "cc = c.reshape(aa.shape)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "plt.figure(figsize=(5, 5))\n",
152 |     "plt.contourf(aa, bb, cc, cmap='bwr', alpha=0.2)\n",
153 |     "plt.plot(X[y==0, 0], X[y==0, 1], 'ob', alpha=0.5)\n",
154 |     "plt.plot(X[y==1, 0], X[y==1, 1], 'xr', alpha=0.5)\n",
155 |     "plt.xlim(-1.5, 1.5)\n",
156 |     "plt.ylim(-1.5, 1.5)\n",
157 |     "plt.legend(['0', '1'])\n",
158 |     "plt.title(\"Blue circles and Red crosses\")"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": []
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "kernelspec": {
171 |    "display_name": "Python 3",
172 |    "language": "python",
173 |    "name": "python3"
174 |   },
175 |   "language_info": {
176 |    "codemirror_mode": {
177 |     "name": "ipython",
178 |     "version": 3
179 |    },
180 |    "file_extension": ".py",
181 |    "mimetype": "text/x-python",
182 |    "name": "python",
183 |    "nbconvert_exporter": "python",
184 |    "pygments_lexer": "ipython3",
185 |    "version": "3.7.10"
186 |   }
187 |  },
188 |  "nbformat": 4,
189 |  "nbformat_minor": 2
190 | }
191 | 


--------------------------------------------------------------------------------
/course/2 Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Data Exploration with Pandas"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "%matplotlib inline\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "import pandas as pd\n",
 19 |     "import numpy as np"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "df = pd.read_csv('../data/titanic-train.csv')"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "type(df)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "df.head()"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "df.info()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "df.describe()"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "### Indexing"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "df.iloc[3]"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "df.loc[0:4,'Ticket']"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "df['Ticket'].head()"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "df[['Embarked', 'Ticket']].head()"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "### Selections"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "df[df['Age'] > 70]"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "df['Age'] > 70"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "df.query(\"Age > 70\")"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "df[(df['Age'] == 11) & (df['SibSp'] == 5)]"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "df[(df.Age == 11) | (df.SibSp == 5)]"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "df.query('(Age == 11) | (SibSp == 5)')"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "### Unique Values"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "df['Embarked'].unique()"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "### Sorting"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "df.sort_values('Age', ascending = False).head()"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "metadata": {},
206 |    "source": [
207 |     "### Aggregations"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "df['Survived'].value_counts()"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": [
225 |     "df['Pclass'].value_counts()"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "df.groupby(['Pclass', 'Survived'])['PassengerId'].count()"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "metadata": {},
241 |    "outputs": [],
242 |    "source": [
243 |     "df['Age'].min()"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "df['Age'].max()"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": [
261 |     "df['Age'].mean()"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "df['Age'].median()"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": [
279 |     "mean_age_by_survived = df.groupby('Survived')['Age'].mean()\n",
280 |     "mean_age_by_survived"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": null,
286 |    "metadata": {},
287 |    "outputs": [],
288 |    "source": [
289 |     "std_age_by_survived = df.groupby('Survived')['Age'].std()\n",
290 |     "std_age_by_survived"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "### Merge"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "df1 = mean_age_by_survived.round(0).reset_index()\n",
307 |     "df2 = std_age_by_survived.round(0).reset_index()"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": null,
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "df1"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "df2"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "df3 = pd.merge(df1, df2, on='Survived')"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "df3"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "metadata": {},
350 |    "outputs": [],
351 |    "source": [
352 |     "df3.columns = ['Survived', 'Average Age', 'Age Standard Deviation']"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "df3"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {},
367 |    "source": [
368 |     "### Pivot Tables"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": null,
374 |    "metadata": {},
375 |    "outputs": [],
376 |    "source": [
377 |     "df.pivot_table(index='Pclass',\n",
378 |     "               columns='Survived',\n",
379 |     "               values='PassengerId',\n",
380 |     "               aggfunc='count')"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {},
386 |    "source": [
387 |     "### Correlations"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": null,
393 |    "metadata": {},
394 |    "outputs": [],
395 |    "source": [
396 |     "df['IsFemale'] = df['Sex'] == 'female'"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": null,
402 |    "metadata": {},
403 |    "outputs": [],
404 |    "source": [
405 |     "correlated_with_survived = df.corr()['Survived'].sort_values()\n",
406 |     "correlated_with_survived"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "code",
411 |    "execution_count": null,
412 |    "metadata": {},
413 |    "outputs": [],
414 |    "source": [
415 |     "correlated_with_survived.iloc[:-1].plot(kind='bar',\n",
416 |     "                                        title='Titanic Passengers: correlation with survival');"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "markdown",
421 |    "metadata": {},
422 |    "source": [
423 |     "# Visual Data Exploration with Matplotlib"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {},
430 |    "outputs": [],
431 |    "source": [
432 |     "data1 = np.random.normal(0, 0.1, 1000)\n",
433 |     "data2 = np.random.normal(1, 0.4, 1000) + np.linspace(0, 1, 1000)\n",
434 |     "data3 = 2 + np.random.random(1000) * np.linspace(1, 5, 1000)\n",
435 |     "data4 = np.random.normal(3, 0.2, 1000) + 0.3 * np.sin(np.linspace(0, 20, 1000))"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": null,
441 |    "metadata": {},
442 |    "outputs": [],
443 |    "source": [
444 |     "data = np.vstack([data1, data2, data3, data4]).transpose()"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": null,
450 |    "metadata": {},
451 |    "outputs": [],
452 |    "source": [
453 |     "df = pd.DataFrame(data, columns=['data1', 'data2', 'data3', 'data4'])\n",
454 |     "df.head()"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "markdown",
459 |    "metadata": {},
460 |    "source": [
461 |     "### Line Plot"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "code",
466 |    "execution_count": null,
467 |    "metadata": {},
468 |    "outputs": [],
469 |    "source": [
470 |     "df.plot(title='Line plot');"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "code",
475 |    "execution_count": null,
476 |    "metadata": {},
477 |    "outputs": [],
478 |    "source": [
479 |     "plt.plot(df)\n",
480 |     "plt.title('Line plot')\n",
481 |     "plt.legend(['data1', 'data2', 'data3', 'data4']);"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "markdown",
486 |    "metadata": {},
487 |    "source": [
488 |     "### Scatter Plot"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": null,
494 |    "metadata": {},
495 |    "outputs": [],
496 |    "source": [
497 |     "df.plot(style='.');"
498 |    ]
499 |   },
500 |   {
501 |    "cell_type": "code",
502 |    "execution_count": null,
503 |    "metadata": {},
504 |    "outputs": [],
505 |    "source": [
506 |     "_ = df.plot(kind='scatter', x='data1', y='data2',\n",
507 |     "            xlim=(-1.5, 1.5), ylim=(0, 3))"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "### Histograms"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": null,
520 |    "metadata": {},
521 |    "outputs": [],
522 |    "source": [
523 |     "df.plot(kind='hist',\n",
524 |     "        bins=50,\n",
525 |     "        title='Histogram',\n",
526 |     "        alpha=0.6);"
527 |    ]
528 |   },
529 |   {
530 |    "cell_type": "markdown",
531 |    "metadata": {},
532 |    "source": [
533 |     "### Cumulative distribution"
534 |    ]
535 |   },
536 |   {
537 |    "cell_type": "code",
538 |    "execution_count": null,
539 |    "metadata": {},
540 |    "outputs": [],
541 |    "source": [
542 |     "df.plot(kind='hist',\n",
543 |     "        bins=100,\n",
544 |     "        title='Cumulative distributions',\n",
545 |     "        density=True,\n",
546 |     "        cumulative=True,\n",
547 |     "        alpha=0.4);"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "markdown",
552 |    "metadata": {},
553 |    "source": [
554 |     "### Box Plot"
555 |    ]
556 |   },
557 |   {
558 |    "cell_type": "code",
559 |    "execution_count": null,
560 |    "metadata": {},
561 |    "outputs": [],
562 |    "source": [
563 |     "df.plot(kind='box',\n",
564 |     "        title='Boxplot');"
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "markdown",
569 |    "metadata": {},
570 |    "source": [
571 |     "### Subplots"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "code",
576 |    "execution_count": null,
577 |    "metadata": {},
578 |    "outputs": [],
579 |    "source": [
580 |     "fig, ax = plt.subplots(2, 2, figsize=(5, 5))\n",
581 |     "\n",
582 |     "df.plot(ax=ax[0][0],\n",
583 |     "        title='Line plot')\n",
584 |     "\n",
585 |     "df.plot(ax=ax[0][1],\n",
586 |     "        style='o',\n",
587 |     "        title='Scatter plot')\n",
588 |     "\n",
589 |     "df.plot(ax=ax[1][0],\n",
590 |     "        kind='hist',\n",
591 |     "        bins=50,\n",
592 |     "        title='Histogram')\n",
593 |     "\n",
594 |     "df.plot(ax=ax[1][1],\n",
595 |     "        kind='box',\n",
596 |     "        title='Boxplot')\n",
597 |     "\n",
598 |     "plt.tight_layout()"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "markdown",
603 |    "metadata": {},
604 |    "source": [
605 |     "### Pie charts"
606 |    ]
607 |   },
608 |   {
609 |    "cell_type": "code",
610 |    "execution_count": null,
611 |    "metadata": {},
612 |    "outputs": [],
613 |    "source": [
614 |     "gt01 = df['data1'] > 0.1\n",
615 |     "piecounts = gt01.value_counts()\n",
616 |     "piecounts"
617 |    ]
618 |   },
619 |   {
620 |    "cell_type": "code",
621 |    "execution_count": null,
622 |    "metadata": {},
623 |    "outputs": [],
624 |    "source": [
625 |     "piecounts.plot(kind='pie',\n",
626 |     "               figsize=(5, 5),\n",
627 |     "               explode=[0, 0.15],\n",
628 |     "               labels=['<= 0.1', '> 0.1'],\n",
629 |     "               autopct='%1.1f%%',\n",
630 |     "               shadow=True,\n",
631 |     "               startangle=90,\n",
632 |     "               fontsize=16);"
633 |    ]
634 |   },
635 |   {
636 |    "cell_type": "markdown",
637 |    "metadata": {},
638 |    "source": [
639 |     "### Hexbin plot"
640 |    ]
641 |   },
642 |   {
643 |    "cell_type": "code",
644 |    "execution_count": null,
645 |    "metadata": {},
646 |    "outputs": [],
647 |    "source": [
648 |     "data = np.vstack([np.random.normal((0, 0), 2, size=(1000, 2)),\n",
649 |     "                  np.random.normal((9, 9), 3, size=(2000, 2))])\n",
650 |     "df = pd.DataFrame(data, columns=['x', 'y'])"
651 |    ]
652 |   },
653 |   {
654 |    "cell_type": "code",
655 |    "execution_count": null,
656 |    "metadata": {},
657 |    "outputs": [],
658 |    "source": [
659 |     "df.head()"
660 |    ]
661 |   },
662 |   {
663 |    "cell_type": "code",
664 |    "execution_count": null,
665 |    "metadata": {},
666 |    "outputs": [],
667 |    "source": [
668 |     "df.plot();"
669 |    ]
670 |   },
671 |   {
672 |    "cell_type": "code",
673 |    "execution_count": null,
674 |    "metadata": {},
675 |    "outputs": [],
676 |    "source": [
677 |     "df.plot(kind='kde');"
678 |    ]
679 |   },
680 |   {
681 |    "cell_type": "code",
682 |    "execution_count": null,
683 |    "metadata": {},
684 |    "outputs": [],
685 |    "source": [
686 |     "df.plot(kind='hexbin', x='x', y='y', bins=100, cmap='rainbow');"
687 |    ]
688 |   },
689 |   {
690 |    "cell_type": "markdown",
691 |    "metadata": {},
692 |    "source": [
693 |     "# Unstructured data"
694 |    ]
695 |   },
696 |   {
697 |    "cell_type": "markdown",
698 |    "metadata": {},
699 |    "source": [
700 |     "### Images"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": null,
706 |    "metadata": {},
707 |    "outputs": [],
708 |    "source": [
709 |     "from PIL import Image"
710 |    ]
711 |   },
712 |   {
713 |    "cell_type": "code",
714 |    "execution_count": null,
715 |    "metadata": {},
716 |    "outputs": [],
717 |    "source": [
718 |     "img = Image.open('../data/iss.jpg')\n",
719 |     "img"
720 |    ]
721 |   },
722 |   {
723 |    "cell_type": "code",
724 |    "execution_count": null,
725 |    "metadata": {},
726 |    "outputs": [],
727 |    "source": [
728 |     "type(img)"
729 |    ]
730 |   },
731 |   {
732 |    "cell_type": "code",
733 |    "execution_count": null,
734 |    "metadata": {},
735 |    "outputs": [],
736 |    "source": [
737 |     "imgarray = np.asarray(img)"
738 |    ]
739 |   },
740 |   {
741 |    "cell_type": "code",
742 |    "execution_count": null,
743 |    "metadata": {},
744 |    "outputs": [],
745 |    "source": [
746 |     "type(imgarray)"
747 |    ]
748 |   },
749 |   {
750 |    "cell_type": "code",
751 |    "execution_count": null,
752 |    "metadata": {},
753 |    "outputs": [],
754 |    "source": [
755 |     "imgarray.shape"
756 |    ]
757 |   },
758 |   {
759 |    "cell_type": "code",
760 |    "execution_count": null,
761 |    "metadata": {},
762 |    "outputs": [],
763 |    "source": [
764 |     "imgarray.ravel().shape"
765 |    ]
766 |   },
767 |   {
768 |    "cell_type": "code",
769 |    "execution_count": null,
770 |    "metadata": {},
771 |    "outputs": [],
772 |    "source": [
773 |     "435 * 640 * 3"
774 |    ]
775 |   },
776 |   {
777 |    "cell_type": "markdown",
778 |    "metadata": {},
779 |    "source": [
780 |     "### Sound"
781 |    ]
782 |   },
783 |   {
784 |    "cell_type": "code",
785 |    "execution_count": null,
786 |    "metadata": {},
787 |    "outputs": [],
788 |    "source": [
789 |     "from scipy.io import wavfile"
790 |    ]
791 |   },
792 |   {
793 |    "cell_type": "code",
794 |    "execution_count": null,
795 |    "metadata": {},
796 |    "outputs": [],
797 |    "source": [
798 |     "rate, snd = wavfile.read(filename='../data/sms.wav')"
799 |    ]
800 |   },
801 |   {
802 |    "cell_type": "code",
803 |    "execution_count": null,
804 |    "metadata": {},
805 |    "outputs": [],
806 |    "source": [
807 |     "from IPython.display import Audio"
808 |    ]
809 |   },
810 |   {
811 |    "cell_type": "code",
812 |    "execution_count": null,
813 |    "metadata": {},
814 |    "outputs": [],
815 |    "source": [
816 |     "Audio(data=snd, rate=rate)"
817 |    ]
818 |   },
819 |   {
820 |    "cell_type": "code",
821 |    "execution_count": null,
822 |    "metadata": {},
823 |    "outputs": [],
824 |    "source": [
825 |     "len(snd)"
826 |    ]
827 |   },
828 |   {
829 |    "cell_type": "code",
830 |    "execution_count": null,
831 |    "metadata": {},
832 |    "outputs": [],
833 |    "source": [
834 |     "snd"
835 |    ]
836 |   },
837 |   {
838 |    "cell_type": "code",
839 |    "execution_count": null,
840 |    "metadata": {},
841 |    "outputs": [],
842 |    "source": [
843 |     "plt.plot(snd)"
844 |    ]
845 |   },
846 |   {
847 |    "cell_type": "code",
848 |    "execution_count": null,
849 |    "metadata": {},
850 |    "outputs": [],
851 |    "source": [
852 |     "_ = plt.specgram(snd, NFFT=1024, Fs=44100)\n",
853 |     "plt.ylabel('Frequency (Hz)')\n",
854 |     "plt.xlabel('Time (s)')"
855 |    ]
856 |   },
857 |   {
858 |    "cell_type": "markdown",
859 |    "metadata": {},
860 |    "source": [
861 |     "# Data Exploration Exercises"
862 |    ]
863 |   },
864 |   {
865 |    "cell_type": "markdown",
866 |    "metadata": {},
867 |    "source": [
868 |     "## Exercise 1\n",
869 |     "- load the dataset: `../data/international-airline-passengers.csv`\n",
870 |     "- inspect it using the `.info()` and `.head()` commands\n",
871 |     "- use the function [`pd.to_datetime()`](http://pandas.pydata.org/pandas-docs/version/0.20/generated/pandas.to_datetime.html) to change the column type of 'Month' to a datatime type\n",
872 |     "- set the index of df to be a datetime index using the column 'Month' and the `df.set_index()` method\n",
873 |     "- choose the appropriate plot and display the data\n",
874 |     "- choose appropriate scale\n",
875 |     "- label the axes"
876 |    ]
877 |   },
878 |   {
879 |    "cell_type": "code",
880 |    "execution_count": null,
881 |    "metadata": {},
882 |    "outputs": [],
883 |    "source": []
884 |   },
885 |   {
886 |    "cell_type": "markdown",
887 |    "metadata": {},
888 |    "source": [
889 |     "## Exercise 2\n",
890 |     "- load the dataset: `../data/weight-height.csv`\n",
891 |     "- inspect it\n",
892 |     "- plot it using a scatter plot with Weight as a function of Height\n",
893 |     "- plot the male and female populations with 2 different colors on a new scatter plot\n",
894 |     "- remember to label the axes"
895 |    ]
896 |   },
897 |   {
898 |    "cell_type": "code",
899 |    "execution_count": null,
900 |    "metadata": {},
901 |    "outputs": [],
902 |    "source": []
903 |   },
904 |   {
905 |    "cell_type": "markdown",
906 |    "metadata": {},
907 |    "source": [
908 |     "## Exercise 3\n",
909 |     "- plot the histogram of the heights for males and for females on the same plot\n",
910 |     "- use alpha to control transparency in the plot comand\n",
911 |     "- plot a vertical line at the mean of each population using `plt.axvline()`"
912 |    ]
913 |   },
914 |   {
915 |    "cell_type": "code",
916 |    "execution_count": null,
917 |    "metadata": {},
918 |    "outputs": [],
919 |    "source": []
920 |   },
921 |   {
922 |    "cell_type": "markdown",
923 |    "metadata": {},
924 |    "source": [
925 |     "## Exercise 4\n",
926 |     "- plot the weights of the males and females using a box plot\n",
927 |     "- which one is easier to read?\n",
928 |     "- (remember to put in titles, axes and legends)"
929 |    ]
930 |   },
931 |   {
932 |    "cell_type": "code",
933 |    "execution_count": null,
934 |    "metadata": {},
935 |    "outputs": [],
936 |    "source": []
937 |   },
938 |   {
939 |    "cell_type": "markdown",
940 |    "metadata": {},
941 |    "source": [
942 |     "## Exercise 5\n",
943 |     "- load the dataset: `../data/titanic-train.csv`\n",
944 |     "- learn about scattermatrix here: http://pandas.pydata.org/pandas-docs/stable/visualization.html\n",
945 |     "- display the data using a scattermatrix"
946 |    ]
947 |   },
948 |   {
949 |    "cell_type": "code",
950 |    "execution_count": null,
951 |    "metadata": {},
952 |    "outputs": [],
953 |    "source": []
954 |   }
955 |  ],
956 |  "metadata": {
957 |   "kernelspec": {
958 |    "display_name": "Python 3",
959 |    "language": "python",
960 |    "name": "python3"
961 |   },
962 |   "language_info": {
963 |    "codemirror_mode": {
964 |     "name": "ipython",
965 |     "version": 3
966 |    },
967 |    "file_extension": ".py",
968 |    "mimetype": "text/x-python",
969 |    "name": "python",
970 |    "nbconvert_exporter": "python",
971 |    "pygments_lexer": "ipython3",
972 |    "version": "3.7.10"
973 |   }
974 |  },
975 |  "nbformat": 4,
976 |  "nbformat_minor": 2
977 | }
978 | 


--------------------------------------------------------------------------------
/course/4 Deep Learning Intro.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Deep Learning Intro"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "%matplotlib inline\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "import pandas as pd\n",
 19 |     "import numpy as np"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Shallow and Deep Networks"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from sklearn.datasets import make_moons\n",
 36 |     "\n",
 37 |     "X, y = make_moons(n_samples=1000, noise=0.1, random_state=0)\n",
 38 |     "plt.plot(X[y==0, 0], X[y==0, 1], 'ob', alpha=0.5)\n",
 39 |     "plt.plot(X[y==1, 0], X[y==1, 1], 'xr', alpha=0.5)\n",
 40 |     "plt.legend(['0', '1'])"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "X.shape"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "from sklearn.model_selection import train_test_split"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "X_train, X_test, y_train, y_test = train_test_split(X, y,\n",
 68 |     "                                                    test_size=0.3,\n",
 69 |     "                                                    random_state=42)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "from tensorflow.keras.models import Sequential\n",
 79 |     "from tensorflow.keras.layers import Dense\n",
 80 |     "from tensorflow.keras.optimizers import SGD, Adam"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "### Shallow Model"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "model = Sequential()\n",
 97 |     "model.add(Dense(1, input_shape=(2,), activation='sigmoid'))\n",
 98 |     "model.compile(Adam(learning_rate=0.05), 'binary_crossentropy', metrics=['accuracy'])"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "model.fit(X_train, y_train, epochs=200, verbose=0)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "results = model.evaluate(X_test, y_test)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "results"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "print(\"The Accuracy score on the Train set is:\\t{:0.3f}\".format(results[1]))"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "def plot_decision_boundary(model, X, y):\n",
144 |     "    amin, bmin = X.min(axis=0) - 0.1\n",
145 |     "    amax, bmax = X.max(axis=0) + 0.1\n",
146 |     "    hticks = np.linspace(amin, amax, 101)\n",
147 |     "    vticks = np.linspace(bmin, bmax, 101)\n",
148 |     "    \n",
149 |     "    aa, bb = np.meshgrid(hticks, vticks)\n",
150 |     "    ab = np.c_[aa.ravel(), bb.ravel()]\n",
151 |     "    \n",
152 |     "    c = model.predict(ab)\n",
153 |     "    cc = c.reshape(aa.shape)\n",
154 |     "\n",
155 |     "    plt.figure(figsize=(12, 8))\n",
156 |     "    plt.contourf(aa, bb, cc, cmap='bwr', alpha=0.2)\n",
157 |     "    plt.plot(X[y==0, 0], X[y==0, 1], 'ob', alpha=0.5)\n",
158 |     "    plt.plot(X[y==1, 0], X[y==1, 1], 'xr', alpha=0.5)\n",
159 |     "    plt.legend(['0', '1'])\n",
160 |     "    \n",
161 |     "plot_decision_boundary(model, X, y)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "### Deep model"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "model = Sequential()\n",
178 |     "model.add(Dense(4, input_shape=(2,), activation='tanh'))\n",
179 |     "model.add(Dense(2, activation='tanh'))\n",
180 |     "model.add(Dense(1, activation='sigmoid'))\n",
181 |     "model.compile(Adam(learning_rate=0.05), 'binary_crossentropy', metrics=['accuracy'])"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "model.fit(X_train, y_train, epochs=100, verbose=0)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "model.evaluate(X_test, y_test)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "from sklearn.metrics import accuracy_score, confusion_matrix"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": [
217 |     "y_train_pred = model.predict_classes(X_train)\n",
218 |     "y_test_pred = model.predict_classes(X_test)\n",
219 |     "\n",
220 |     "print(\"The Accuracy score on the Train set is:\\t{:0.3f}\".format(accuracy_score(y_train, y_train_pred)))\n",
221 |     "print(\"The Accuracy score on the Test set is:\\t{:0.3f}\".format(accuracy_score(y_test, y_test_pred)))"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {},
228 |    "outputs": [],
229 |    "source": [
230 |     "plot_decision_boundary(model, X, y)"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "## Multiclass classification\n",
238 |     "\n",
239 |     "### The Iris dataset"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": null,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "df = pd.read_csv('../data/iris.csv')"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": [
257 |     "import seaborn as sns\n",
258 |     "sns.pairplot(df, hue=\"species\")"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "df.head()"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "X = df.drop('species', axis=1)\n",
277 |     "X.head()"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {},
284 |    "outputs": [],
285 |    "source": [
286 |     "target_names = df['species'].unique()\n",
287 |     "target_names"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "target_dict = {n:i for i, n in enumerate(target_names)}\n",
297 |     "target_dict"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "y= df['species'].map(target_dict)\n",
307 |     "y.head()"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": null,
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "from tensorflow.keras.utils import to_categorical"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "y_cat = to_categorical(y)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "y_cat[:10]"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "X_train, X_test, y_train, y_test = train_test_split(X.values, y_cat,\n",
344 |     "                                                    test_size=0.2)"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": null,
350 |    "metadata": {},
351 |    "outputs": [],
352 |    "source": [
353 |     "model = Sequential()\n",
354 |     "model.add(Dense(3, input_shape=(4,), activation='softmax'))\n",
355 |     "model.compile(Adam(learning_rate=0.1),\n",
356 |     "              loss='categorical_crossentropy',\n",
357 |     "              metrics=['accuracy'])"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": null,
363 |    "metadata": {},
364 |    "outputs": [],
365 |    "source": [
366 |     "model.fit(X_train, y_train, epochs=20, validation_split=0.1)"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": null,
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": [
375 |     "y_pred = model.predict(X_test)"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "metadata": {},
382 |    "outputs": [],
383 |    "source": [
384 |     "y_pred[:5]"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": null,
390 |    "metadata": {},
391 |    "outputs": [],
392 |    "source": [
393 |     "y_test_class = np.argmax(y_test, axis=1)\n",
394 |     "y_pred_class = np.argmax(y_pred, axis=1)"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "from sklearn.metrics import classification_report"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": [
412 |     "print(classification_report(y_test_class, y_pred_class))"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": null,
418 |    "metadata": {},
419 |    "outputs": [],
420 |    "source": [
421 |     "confusion_matrix(y_test_class, y_pred_class)"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "markdown",
426 |    "metadata": {},
427 |    "source": [
428 |     "## Exercise 1"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "markdown",
433 |    "metadata": {
434 |     "collapsed": true
435 |    },
436 |    "source": [
437 |     "The [Pima Indians dataset](https://archive.ics.uci.edu/ml/datasets/diabetes) is a very famous dataset distributed by UCI and originally collected from the National Institute of Diabetes and Digestive and Kidney Diseases. It contains data from clinical exams for women age 21 and above of Pima indian origins. The objective is to predict based on diagnostic measurements whether a patient has diabetes.\n",
438 |     "\n",
439 |     "It has the following features:\n",
440 |     "\n",
441 |     "- Pregnancies: Number of times pregnant\n",
442 |     "- Glucose: Plasma glucose concentration a 2 hours in an oral glucose tolerance test\n",
443 |     "- BloodPressure: Diastolic blood pressure (mm Hg)\n",
444 |     "- SkinThickness: Triceps skin fold thickness (mm)\n",
445 |     "- Insulin: 2-Hour serum insulin (mu U/ml)\n",
446 |     "- BMI: Body mass index (weight in kg/(height in m)^2)\n",
447 |     "- DiabetesPedigreeFunction: Diabetes pedigree function\n",
448 |     "- Age: Age (years)\n",
449 |     "\n",
450 |     "The last colum is the outcome, and it is a binary variable.\n",
451 |     "\n",
452 |     "In this first exercise we will explore it through the following steps:\n",
453 |     "\n",
454 |     "1. Load the ..data/diabetes.csv dataset, use pandas to explore the range of each feature\n",
455 |     "- For each feature draw a histogram. Bonus points if you draw all the histograms in the same figure.\n",
456 |     "- Explore correlations of features with the outcome column. You can do this in several ways, for example using the `sns.pairplot` we used above or drawing a heatmap of the correlations.\n",
457 |     "- Do features need standardization? If so what stardardization technique will you use? MinMax? Standard?\n",
458 |     "- Prepare your final `X` and `y` variables to be used by a ML model. Make sure you define your target variable well. Will you need dummy columns?"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": null,
464 |    "metadata": {},
465 |    "outputs": [],
466 |    "source": []
467 |   },
468 |   {
469 |    "cell_type": "markdown",
470 |    "metadata": {},
471 |    "source": [
472 |     "## Exercise 2"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "markdown",
477 |    "metadata": {
478 |     "collapsed": true
479 |    },
480 |    "source": [
481 |     "Build a fully connected NN model that predicts diabetes. Follow these steps:\n",
482 |     "\n",
483 |     "1. Split your data in a train/test with a test size of 20% and a `random_state = 22`\n",
484 |     "- define a sequential model with at least one inner layer. You will have to make choices for the following things:\n",
485 |     "    - what is the size of the input?\n",
486 |     "    - how many nodes will you use in each layer?\n",
487 |     "    - what is the size of the output?\n",
488 |     "    - what activation functions will you use in the inner layers?\n",
489 |     "    - what activation function will you use at output?\n",
490 |     "    - what loss function will you use?\n",
491 |     "    - what optimizer will you use?\n",
492 |     "- fit your model on the training set, using a validation_split of 0.1\n",
493 |     "- test your trained model on the test data from the train/test split\n",
494 |     "- check the accuracy score, the confusion matrix and the classification report"
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "code",
499 |    "execution_count": null,
500 |    "metadata": {},
501 |    "outputs": [],
502 |    "source": []
503 |   },
504 |   {
505 |    "cell_type": "markdown",
506 |    "metadata": {},
507 |    "source": [
508 |     "## Exercise 3\n",
509 |     "Compare your work with the results presented in [this notebook](https://www.kaggle.com/futurist/d/uciml/pima-indians-diabetes-database/pima-data-visualisation-and-machine-learning). Are your Neural Network results better or worse than the results obtained by traditional Machine Learning techniques?\n",
510 |     "\n",
511 |     "- Try training a Support Vector Machine or a Random Forest model on the exact same train/test split. Is the performance better or worse?\n",
512 |     "- Try restricting your features to only 4 features like in the suggested notebook. How does model performance change?"
513 |    ]
514 |   },
515 |   {
516 |    "cell_type": "code",
517 |    "execution_count": null,
518 |    "metadata": {},
519 |    "outputs": [],
520 |    "source": []
521 |   },
522 |   {
523 |    "cell_type": "markdown",
524 |    "metadata": {},
525 |    "source": [
526 |     "## Exercise 4\n",
527 |     "\n",
528 |     "[Tensorflow playground](http://playground.tensorflow.org/) is a web based neural network demo. It is really useful to develop an intuition about what happens when you change architecture, activation function or other parameters. Try playing with it for a few minutes. You don't need do understand the meaning of every knob and button in the page, just get a sense for what happens if you change something. In the next chapter we'll explore these things in more detail.\n"
529 |    ]
530 |   },
531 |   {
532 |    "cell_type": "code",
533 |    "execution_count": null,
534 |    "metadata": {},
535 |    "outputs": [],
536 |    "source": []
537 |   }
538 |  ],
539 |  "metadata": {
540 |   "kernelspec": {
541 |    "display_name": "Python 3",
542 |    "language": "python",
543 |    "name": "python3"
544 |   },
545 |   "language_info": {
546 |    "codemirror_mode": {
547 |     "name": "ipython",
548 |     "version": 3
549 |    },
550 |    "file_extension": ".py",
551 |    "mimetype": "text/x-python",
552 |    "name": "python",
553 |    "nbconvert_exporter": "python",
554 |    "pygments_lexer": "ipython3",
555 |    "version": "3.7.10"
556 |   }
557 |  },
558 |  "nbformat": 4,
559 |  "nbformat_minor": 2
560 | }
561 | 


--------------------------------------------------------------------------------
/course/8 Recurrent Neural Networks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Recurrent Neural Networks"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "import numpy as np\n",
 18 |     "%matplotlib inline\n",
 19 |     "import matplotlib.pyplot as plt"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Time series forecasting"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "df = pd.read_csv('../data/cansim-0800020-eng-6674700030567901031.csv',\n",
 36 |     "                 skiprows=6, skipfooter=9,\n",
 37 |     "                 engine='python')\n",
 38 |     "df.head()"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "from pandas.tseries.offsets import MonthEnd"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "df['Adjustments'] = pd.to_datetime(df['Adjustments']) + MonthEnd(1)\n",
 57 |     "df = df.set_index('Adjustments')\n",
 58 |     "df.head()"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "df.plot()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "split_date = pd.Timestamp('01-01-2011')"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "train = df.loc[:split_date, ['Unadjusted']]\n",
 86 |     "test = df.loc[split_date:, ['Unadjusted']]"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "ax = train.plot()\n",
 96 |     "test.plot(ax=ax)\n",
 97 |     "plt.legend(['train', 'test'])"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "from sklearn.preprocessing import MinMaxScaler\n",
107 |     "\n",
108 |     "sc = MinMaxScaler()\n",
109 |     "\n",
110 |     "train_sc = sc.fit_transform(train)\n",
111 |     "test_sc = sc.transform(test)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "train_sc[:4]"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "X_train = train_sc[:-1]\n",
130 |     "y_train = train_sc[1:]\n",
131 |     "\n",
132 |     "X_test = test_sc[:-1]\n",
133 |     "y_test = test_sc[1:]"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "### Fully connected predictor"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "from tensorflow.keras.models import Sequential\n",
150 |     "from tensorflow.keras.layers import Dense\n",
151 |     "import tensorflow.keras.backend as K\n",
152 |     "from tensorflow.keras.callbacks import EarlyStopping"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "K.clear_session()\n",
162 |     "\n",
163 |     "model = Sequential()\n",
164 |     "model.add(Dense(12, input_dim=1, activation='relu'))\n",
165 |     "model.add(Dense(1))\n",
166 |     "model.compile(loss='mean_squared_error', optimizer='adam')\n",
167 |     "model.summary()"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "model.fit(X_train, y_train, epochs=200,\n",
186 |     "          batch_size=2, verbose=1,\n",
187 |     "          callbacks=[early_stop])"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "y_pred = model.predict(X_test)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "plt.plot(y_test)\n",
206 |     "plt.plot(y_pred)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "### Recurrent predictor"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {},
220 |    "outputs": [],
221 |    "source": [
222 |     "from tensorflow.keras.layers import LSTM"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "X_train.shape"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "#3D tensor with shape (batch_size, timesteps, input_dim)\n",
241 |     "X_train[:, None].shape"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": null,
247 |    "metadata": {},
248 |    "outputs": [],
249 |    "source": [
250 |     "X_train_t = X_train[:, None]\n",
251 |     "X_test_t = X_test[:, None]"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "K.clear_session()\n",
261 |     "model = Sequential()\n",
262 |     "\n",
263 |     "model.add(LSTM(6, input_shape=(1, 1)))\n",
264 |     "\n",
265 |     "model.add(Dense(1))\n",
266 |     "\n",
267 |     "model.compile(loss='mean_squared_error', optimizer='adam')"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "model.fit(X_train_t, y_train,\n",
277 |     "          epochs=100, batch_size=1, verbose=1,\n",
278 |     "          callbacks=[early_stop])"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "y_pred = model.predict(X_test_t)\n",
288 |     "plt.plot(y_test)\n",
289 |     "plt.plot(y_pred)"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "## Windows"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": null,
302 |    "metadata": {},
303 |    "outputs": [],
304 |    "source": [
305 |     "train_sc.shape"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": null,
311 |    "metadata": {},
312 |    "outputs": [],
313 |    "source": [
314 |     "train_sc_df = pd.DataFrame(train_sc, columns=['Scaled'], index=train.index)\n",
315 |     "test_sc_df = pd.DataFrame(test_sc, columns=['Scaled'], index=test.index)\n",
316 |     "train_sc_df.head()"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "for s in range(1, 13):\n",
326 |     "    train_sc_df['shift_{}'.format(s)] = train_sc_df['Scaled'].shift(s)\n",
327 |     "    test_sc_df['shift_{}'.format(s)] = test_sc_df['Scaled'].shift(s)"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": null,
333 |    "metadata": {},
334 |    "outputs": [],
335 |    "source": [
336 |     "train_sc_df.head(13)"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "X_train = train_sc_df.dropna().drop('Scaled', axis=1)\n",
346 |     "y_train = train_sc_df.dropna()[['Scaled']]\n",
347 |     "\n",
348 |     "X_test = test_sc_df.dropna().drop('Scaled', axis=1)\n",
349 |     "y_test = test_sc_df.dropna()[['Scaled']]"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": null,
355 |    "metadata": {},
356 |    "outputs": [],
357 |    "source": [
358 |     "X_train.head()"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "X_train.shape"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": null,
373 |    "metadata": {},
374 |    "outputs": [],
375 |    "source": [
376 |     "X_train = X_train.values\n",
377 |     "X_test= X_test.values\n",
378 |     "\n",
379 |     "y_train = y_train.values\n",
380 |     "y_test = y_test.values"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {},
386 |    "source": [
387 |     "### Fully Connected on Windows"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": null,
393 |    "metadata": {},
394 |    "outputs": [],
395 |    "source": [
396 |     "K.clear_session()\n",
397 |     "\n",
398 |     "model = Sequential()\n",
399 |     "model.add(Dense(12, input_dim=12, activation='relu'))\n",
400 |     "model.add(Dense(1))\n",
401 |     "model.compile(loss='mean_squared_error', optimizer='adam')\n",
402 |     "model.summary()"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "code",
407 |    "execution_count": null,
408 |    "metadata": {},
409 |    "outputs": [],
410 |    "source": [
411 |     "model.fit(X_train, y_train, epochs=200,\n",
412 |     "          batch_size=1, verbose=1, callbacks=[early_stop])"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": null,
418 |    "metadata": {},
419 |    "outputs": [],
420 |    "source": [
421 |     "y_pred = model.predict(X_test)\n",
422 |     "plt.plot(y_test)\n",
423 |     "plt.plot(y_pred)"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "### LSTM on Windows"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": null,
436 |    "metadata": {},
437 |    "outputs": [],
438 |    "source": [
439 |     "X_train_t = X_train.reshape(X_train.shape[0], 1, 12)\n",
440 |     "X_test_t = X_test.reshape(X_test.shape[0], 1, 12)"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": null,
446 |    "metadata": {},
447 |    "outputs": [],
448 |    "source": [
449 |     "X_train_t.shape"
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": null,
455 |    "metadata": {},
456 |    "outputs": [],
457 |    "source": [
458 |     "K.clear_session()\n",
459 |     "model = Sequential()\n",
460 |     "\n",
461 |     "model.add(LSTM(6, input_shape=(1, 12)))\n",
462 |     "\n",
463 |     "model.add(Dense(1))\n",
464 |     "\n",
465 |     "model.compile(loss='mean_squared_error', optimizer='adam')"
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "code",
470 |    "execution_count": null,
471 |    "metadata": {},
472 |    "outputs": [],
473 |    "source": [
474 |     "model.summary()"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": null,
480 |    "metadata": {},
481 |    "outputs": [],
482 |    "source": [
483 |     "model.fit(X_train_t, y_train, epochs=100,\n",
484 |     "          batch_size=1, verbose=1, callbacks=[early_stop])"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": null,
490 |    "metadata": {},
491 |    "outputs": [],
492 |    "source": [
493 |     "y_pred = model.predict(X_test_t)\n",
494 |     "plt.plot(y_test)\n",
495 |     "plt.plot(y_pred)"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "markdown",
500 |    "metadata": {},
501 |    "source": [
502 |     "## Exercise 1\n",
503 |     "\n",
504 |     "In the model above we reshaped the input shape to: `(num_samples, 1, 12)`, i.e. we treated a window of 12 months as a vector of 12 coordinates that we simultaneously passed to all the LSTM nodes. An alternative way to look at the problem is to reshape the input to `(num_samples, 12, 1)`. This means we consider each input window as a sequence of 12 values that we will pass in sequence to the LSTM. In principle this looks like a more accurate description of our situation. But does it yield better predictions? Let's check it.\n",
505 |     "\n",
506 |     "- Reshape `X_train` and `X_test` so that they represent a set of univariate sequences\n",
507 |     "- retrain the same LSTM(6) model, you'll have to adapt the `input_shape`\n",
508 |     "- check the performance of this new model, is it better at predicting the test data?"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "code",
513 |    "execution_count": null,
514 |    "metadata": {},
515 |    "outputs": [],
516 |    "source": []
517 |   },
518 |   {
519 |    "cell_type": "markdown",
520 |    "metadata": {
521 |     "collapsed": true
522 |    },
523 |    "source": [
524 |     "## Exercise 2\n",
525 |     "\n",
526 |     "RNN models can be applied to images too. In general we can apply them to any data where there's a connnection between nearby units. Let's see how we can easily build a model that works with images.\n",
527 |     "\n",
528 |     "- Load the MNIST data, by now you should be able to do it blindfolded :)\n",
529 |     "- reshape it so that an image looks like a long sequence of pixels\n",
530 |     "- create a recurrent model and train it on the training data\n",
531 |     "- how does it perform compared to a fully connected? How does it compare to Convolutional Neural Networks?\n",
532 |     "\n",
533 |     "(feel free to run this exercise on a cloud GPU if it's too slow on your laptop)"
534 |    ]
535 |   },
536 |   {
537 |    "cell_type": "code",
538 |    "execution_count": null,
539 |    "metadata": {},
540 |    "outputs": [],
541 |    "source": []
542 |   }
543 |  ],
544 |  "metadata": {
545 |   "kernelspec": {
546 |    "display_name": "Python 3",
547 |    "language": "python",
548 |    "name": "python3"
549 |   },
550 |   "language_info": {
551 |    "codemirror_mode": {
552 |     "name": "ipython",
553 |     "version": 3
554 |    },
555 |    "file_extension": ".py",
556 |    "mimetype": "text/x-python",
557 |    "name": "python",
558 |    "nbconvert_exporter": "python",
559 |    "pygments_lexer": "ipython3",
560 |    "version": "3.7.10"
561 |   }
562 |  },
563 |  "nbformat": 4,
564 |  "nbformat_minor": 2
565 | }
566 | 


--------------------------------------------------------------------------------
/data/banknotes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_video/efff3403504906c1e38c23f4d35fd3cb98af1950/data/banknotes.png


--------------------------------------------------------------------------------
/data/cansim-0800020-eng-6674700030567901031.csv:
--------------------------------------------------------------------------------
  1 | "Table 080-0020 Retail trade, sales by the North American Industry Classification System (NAICS), monthly (dollars x 1,000)(2,3,4,5,6)"
  2 | Survey or program details:
  3 | Retail Trade Survey (Monthly) - 2406
  4 | Monthly Retail Trade Survey (Department Store Organizations) - 2408
  5 | Geography,Canada,Canada
  6 | North American Industry Classification System (NAICS),Retail trade [44-45] ,Retail trade [44-45] 
  7 | Adjustments,Unadjusted,Seasonally adjusted
  8 | Jan-1991,12588862,15026890
  9 | Feb-1991,12154321,15304585
 10 | Mar-1991,14337072,15413591
 11 | Apr-1991,15108570,15293409
 12 | May-1991,17225734,15676083
 13 | Jun-1991,16342833,15507931
 14 | Jul-1991,15996243,15556313
 15 | Aug-1991,16064910,15430645
 16 | Sep-1991,15015317,15427313
 17 | Oct-1991,15606864,15410250
 18 | Nov-1991,16237366,15662790
 19 | Dec-1991,18381340,15349625
 20 | Jan-1992,13084963,15477875
 21 | Feb-1992,12773972,15513022
 22 | Mar-1992,14198775,15527933
 23 | Apr-1992,15558390,15708556
 24 | May-1992,16776396,15642000
 25 | Jun-1992,16716231,15823989
 26 | Jul-1992,16637483,15869453
 27 | Aug-1992,15842075,15844631
 28 | Sep-1992,15812400,15983239
 29 | Oct-1992,16562268,16125835
 30 | Nov-1992,16015869,16049478
 31 | Dec-1992,19682921,16095727
 32 | Jan-1993,13672727,16408864
 33 | Feb-1993,12900733,16239039
 34 | Mar-1993,15211859,16314960
 35 | Apr-1993,16642246,16577426
 36 | May-1993,17442405,16472045
 37 | Jun-1993,17444074,16351907
 38 | Jul-1993,17610326,16712914
 39 | Aug-1993,16645660,16703413
 40 | Sep-1993,16790330,16755338
 41 | Oct-1993,16921755,16819382
 42 | Nov-1993,17124609,16958202
 43 | Dec-1993,20928208,17021436
 44 | Jan-1994,14005058,17076164
 45 | Feb-1994,13799079,17393150
 46 | Mar-1994,16865149,17890903
 47 | Apr-1994,17494589,17507688
 48 | May-1994,18739509,17775079
 49 | Jun-1994,19323481,17882069
 50 | Jul-1994,18297834,17785800
 51 | Aug-1994,18101290,17881976
 52 | Sep-1994,18161417,17952647
 53 | Oct-1994,17998875,18193703
 54 | Nov-1994,18516766,18264676
 55 | Dec-1994,22688647,18387840
 56 | Jan-1995,14927996,18337565
 57 | Feb-1995,14520623,18259470
 58 | Mar-1995,17457477,18225708
 59 | Apr-1995,17774107,18217661
 60 | May-1995,19740889,18333051
 61 | Jun-1995,20319460,18503481
 62 | Jul-1995,18747299,18407254
 63 | Aug-1995,19280525,18720783
 64 | Sep-1995,18860566,18628735
 65 | Oct-1995,18177152,18412692
 66 | Nov-1995,18962903,18506305
 67 | Dec-1995,22308880,18525162
 68 | Jan-1996,15379086,18531426
 69 | Feb-1996,15521981,18657652
 70 | Mar-1996,17613469,18774049
 71 | Apr-1996,18421405,18739023
 72 | May-1996,20624568,18758009
 73 | Jun-1996,20099348,18977805
 74 | Jul-1996,19423284,18914063
 75 | Aug-1996,19889359,19071178
 76 | Sep-1996,18589571,19019991
 77 | Oct-1996,19686383,19488074
 78 | Nov-1996,20293165,19820074
 79 | Dec-1996,22897980,19688254
 80 | Jan-1997,16882321,19857365
 81 | Feb-1997,16033605,20141489
 82 | Mar-1997,18225453,20056949
 83 | Apr-1997,20432272,20215340
 84 | May-1997,22594727,20386953
 85 | Jun-1997,21577744,20466883
 86 | Jul-1997,21570145,20681230
 87 | Aug-1997,21065784,20605349
 88 | Sep-1997,20532806,20646564
 89 | Oct-1997,21491163,21037021
 90 | Nov-1997,20904746,20973561
 91 | Dec-1997,25507180,21749250
 92 | Jan-1998,17736224,20776214
 93 | Feb-1998,16797018,21153779
 94 | Mar-1998,19408883,21041225
 95 | Apr-1998,21501677,21504619
 96 | May-1998,23312947,21504262
 97 | Jun-1998,22654803,21247311
 98 | Jul-1998,22594775,21385620
 99 | Aug-1998,21512734,21335980
100 | Sep-1998,21645562,21660645
101 | Oct-1998,21994089,21565457
102 | Nov-1998,21461344,21714061
103 | Dec-1998,25874332,21605209
104 | Jan-1999,18438151,22074043
105 | Feb-1999,17658952,22286260
106 | Mar-1999,21082603,22402680
107 | Apr-1999,22587382,22389229
108 | May-1999,23892100,22300484
109 | Jun-1999,24036828,22450487
110 | Jul-1999,23994614,22614164
111 | Aug-1999,22926469,22806183
112 | Sep-1999,22984278,22817165
113 | Oct-1999,22813633,22967565
114 | Nov-1999,22972959,23036527
115 | Dec-1999,28143999,23387176
116 | Jan-2000,19324692,23434451
117 | Feb-2000,19140440,23378327
118 | Mar-2000,22918829,23813646
119 | Apr-2000,22914155,23537859
120 | May-2000,25659687,23644020
121 | Jun-2000,25945400,23841011
122 | Jul-2000,24821347,24204193
123 | Aug-2000,25102965,24266358
124 | Sep-2000,24710257,24495699
125 | Oct-2000,23687124,24330740
126 | Nov-2000,24556357,24373656
127 | Dec-2000,29057176,24518477
128 | Jan-2001,20607642,24640517
129 | Feb-2001,19444855,24477976
130 | Mar-2001,23652255,24583988
131 | Apr-2001,24370700,24944482
132 | May-2001,27585889,25143416
133 | Jun-2001,27243919,25190078
134 | Jul-2001,25507932,24813831
135 | Aug-2001,26322941,25017925
136 | Sep-2001,24263969,24703734
137 | Oct-2001,24917747,25149772
138 | Nov-2001,26048646,25657476
139 | Dec-2001,30481412,26124705
140 | Jan-2002,22361219,26496729
141 | Feb-2002,20787209,26105990
142 | Mar-2002,24642692,26072633
143 | Apr-2002,26405170,26657565
144 | May-2002,29087583,26165998
145 | Jun-2002,28363263,26768924
146 | Jul-2002,27912328,26620588
147 | Aug-2002,28202300,26767279
148 | Sep-2002,26054411,26620259
149 | Oct-2002,27131743,27061844
150 | Nov-2002,27276942,27015215
151 | Dec-2002,31300554,27172388
152 | Jan-2003,23301871,27137545
153 | Feb-2003,21980804,27622538
154 | Mar-2003,25468203,27275170
155 | Apr-2003,27059495,27176306
156 | May-2003,30417563,27484615
157 | Jun-2003,28912102,27569750
158 | Jul-2003,29492832,27707861
159 | Aug-2003,29102135,28020755
160 | Sep-2003,27467571,27841344
161 | Oct-2003,28223631,27825024
162 | Nov-2003,27391422,27777531
163 | Dec-2003,32325789,27704978
164 | Jan-2004,23778728,27935993
165 | Feb-2004,23008594,28719948
166 | Mar-2004,26967793,28689514
167 | Apr-2004,28592026,28254086
168 | May-2004,30479247,28554094
169 | Jun-2004,30711705,28550528
170 | Jul-2004,30898334,28616168
171 | Aug-2004,29535183,28836665
172 | Sep-2004,29245397,29243662
173 | Oct-2004,29445711,29561177
174 | Nov-2004,29232659,29901036
175 | Dec-2004,34561103,29593609
176 | Jan-2005,24498615,29888781
177 | Feb-2005,24028226,30460620
178 | Mar-2005,28600602,30006264
179 | Apr-2005,30600811,29940271
180 | May-2005,31948565,29935878
181 | Jun-2005,32967426,30590992
182 | Jul-2005,32620077,30800241
183 | Aug-2005,32025283,30647393
184 | Sep-2005,30914826,30600008
185 | Oct-2005,30241532,30887481
186 | Nov-2005,30828069,31012789
187 | Dec-2005,36726743,31230056
188 | Jan-2006,25993203,31747679
189 | Feb-2006,25128165,31744450
190 | Mar-2006,30760061,31871717
191 | Apr-2006,32106585,32354405
192 | May-2006,34894460,32048264
193 | Jun-2006,35049112,32242363
194 | Jul-2006,34341547,33040218
195 | Aug-2006,35045180,33007575
196 | Sep-2006,33056559,32492838
197 | Oct-2006,31830349,32595465
198 | Nov-2006,32663281,32814138
199 | Dec-2006,38605976,33515367
200 | Jan-2007,27777968,33221023
201 | Feb-2007,26548520,33466188
202 | Mar-2007,32818504,33910296
203 | Apr-2007,33621240,34385868
204 | May-2007,38434319,34789277
205 | Jun-2007,37555708,34436278
206 | Jul-2007,35635889,34430726
207 | Aug-2007,36978090,34725483
208 | Sep-2007,34057842,34341226
209 | Oct-2007,34070363,34305870
210 | Nov-2007,35091406,34958995
211 | Dec-2007,40006665,35625285
212 | Jan-2008,30525699,35933156
213 | Feb-2008,29418898,35526909
214 | Mar-2008,32925876,35530142
215 | Apr-2008,36272111,35914167
216 | May-2008,39778972,36006888
217 | Jun-2008,37842321,36400129
218 | Jul-2008,38632038,36403867
219 | Aug-2008,37775417,36137537
220 | Sep-2008,36138751,36390521
221 | Oct-2008,36158245,35747571
222 | Nov-2008,34230901,34661690
223 | Dec-2008,38256712,33303365
224 | Jan-2009,29192654,33747849
225 | Feb-2009,26804723,33869426
226 | Mar-2009,31356949,33894022
227 | Apr-2009,33942769,33930673
228 | May-2009,37316515,34345160
229 | Jun-2009,36865690,34735113
230 | Jul-2009,37191480,34755444
231 | Aug-2009,36049418,35039739
232 | Sep-2009,35537357,35239195
233 | Oct-2009,36133694,35330869
234 | Nov-2009,34354756,35250896
235 | Dec-2009,40969765,35577384
236 | Jan-2010,30668321,36080033
237 | Feb-2010,28632551,36051781
238 | Mar-2010,34967182,37001843
239 | Apr-2010,36469949,36148495
240 | May-2010,38424455,36041318
241 | Jun-2010,38973462,36350588
242 | Jul-2010,38932294,36295314
243 | Aug-2010,37395330,36515170
244 | Sep-2010,36923390,36632898
245 | Oct-2010,37014326,36879707
246 | Nov-2010,37408825,37568029
247 | Dec-2010,43147947,37392857
248 | Jan-2011,31191594,37392259
249 | Feb-2011,29797949,37437926
250 | Mar-2011,36099866,37617167
251 | Apr-2011,38035760,37755408
252 | May-2011,40046516,37723958
253 | Jun-2011,40839556,38228307
254 | Jul-2011,39832282,37925826
255 | Aug-2011,39541248,37976798
256 | Sep-2011,38877263,38181654
257 | Oct-2011,38203872,38623692
258 | Nov-2011,39174736,38779553
259 | Dec-2011,45089701,39087795
260 | Jan-2012,32361808,39102435
261 | Feb-2012,32087072,38968001
262 | Mar-2012,37933733,39201228
263 | Apr-2012,37775805,38920526
264 | May-2012,42584571,38841267
265 | Jun-2012,41789242,38773515
266 | Jul-2012,40130908,38854126
267 | Aug-2012,41321526,38854279
268 | Sep-2012,39069513,39058649
269 | Oct-2012,39487597,39277317
270 | Nov-2012,40095933,39224805
271 | Dec-2012,43489091,39050651
272 | Jan-2013,33574671,39523536
273 | Feb-2013,31636843,39710038
274 | Mar-2013,37561378,39811962
275 | Apr-2013,39401295,39655045
276 | May-2013,44577490,40295930
277 | Jun-2013,42169145,39992542
278 | Jul-2013,42417829,40388278
279 | Aug-2013,43237460,40660890
280 | Sep-2013,40170270,40631164
281 | Oct-2013,41560987,40813306
282 | Nov-2013,41893714,40798569
283 | Dec-2013,44796794,40716614
284 | Jan-2014,34980327,40976155
285 | Feb-2014,32905708,41256280
286 | Mar-2014,38460091,41242344
287 | Apr-2014,41809373,41852467
288 | May-2014,46379543,41906455
289 | Jun-2014,44178750,42457216
290 | Jul-2014,45285331,42562972
291 | Aug-2014,44359733,42456214
292 | Sep-2014,43017529,42685882
293 | Oct-2014,43775478,42690228
294 | Nov-2014,42968326,42603501
295 | Dec-2014,46887481,42317955
296 | Jan-2015,34820395,40971992
297 | Feb-2015,33174923,41801906
298 | Mar-2015,39444291,42420253
299 | Apr-2015,42297319,42331926
300 | May-2015,46670930,42721761
301 | Jun-2015,45584849,42989280
302 | Jul-2015,46295664,43154020
303 | Aug-2015,44793347,43309509
304 | Sep-2015,43999627,43303889
305 | Oct-2015,44507776,43378904
306 | Nov-2015,43696305,43921767
307 | Dec-2015,48097829,43078048
308 | Jan-2016,36415115,43977584
309 | Feb-2016,35649450,44205540
310 | Mar-2016,41403762,43839996
311 | Apr-2016,44881587,44181416
312 | May-2016,47337082,44176591
313 | Jun-2016,47399117,44162244
314 | Jul-2016,46321314,44110862
315 | Aug-2016,46201453,44216280
316 | Sep-2016,45528702,44534797
317 | Oct-2016,44770113,45061618
318 | Nov-2016,46285062,45141762
319 | Dec-2016,50016137,44943929
320 | Jan-2017,37628452,45952103
321 | Footnotes:
322 | 2,The total for retail trade excludes North American Industry Classification System (NAICS) 454.
323 | 3,"This CANSIM table replaces archived table 80-0014, 80-0015 and 80-0017."
324 | 4,"Quality indicator: Code A=Excellent. Code B=Very good. Code C=Good. Code D=Acceptable. Code E=Poor, use with caution. Code F=Unreliable (data not published)."
325 | 5,"Data for Northwest Territories includes Nunavut, from 1991-01 to 1998-12."
326 | 6,"In April 2013, data from 2004 onwards will be based on the 2012 North American Industry Classification System (NAICS). Data prior to 2004 will continue to be based on the 2007 North American Industry Classification System (NAICS)."
327 | Source:
328 | "Statistics Canada. Table 080-0020 - Retail trade, sales by the North American Industry Classification System (NAICS), monthly (dollars)"
329 | "(accessed: April 19, 2017)"
330 | 


--------------------------------------------------------------------------------
/data/generator/class 0/squirrel.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_video/efff3403504906c1e38c23f4d35fd3cb98af1950/data/generator/class 0/squirrel.jpeg


--------------------------------------------------------------------------------
/data/housing-data.csv:
--------------------------------------------------------------------------------
 1 | sqft,bdrms,age,price
 2 | 2104,3,70,399900
 3 | 1600,3,28,329900
 4 | 2400,3,44,369000
 5 | 1416,2,49,232000
 6 | 3000,4,75,539900
 7 | 1985,4,61,299900
 8 | 1534,3,12,314900
 9 | 1427,3,57,198999
10 | 1380,3,14,212000
11 | 1494,3,15,242500
12 | 1940,4,7,239999
13 | 2000,3,27,347000
14 | 1890,3,45,329999
15 | 4478,5,49,699900
16 | 1268,3,58,259900
17 | 2300,4,77,449900
18 | 1320,2,62,299900
19 | 1236,3,78,199900
20 | 2609,4,5,499998
21 | 3031,4,21,599000
22 | 1767,3,44,252900
23 | 1888,2,79,255000
24 | 1604,3,13,242900
25 | 1962,4,53,259900
26 | 3890,3,36,573900
27 | 1100,3,60,249900
28 | 1458,3,29,464500
29 | 2526,3,13,469000
30 | 2200,3,28,475000
31 | 2637,3,25,299900
32 | 1839,2,40,349900
33 | 1000,1,5,169900
34 | 2040,4,75,314900
35 | 3137,3,67,579900
36 | 1811,4,24,285900
37 | 1437,3,50,249900
38 | 1239,3,22,229900
39 | 2132,4,28,345000
40 | 4215,4,66,549000
41 | 2162,4,43,287000
42 | 1664,2,40,368500
43 | 2238,3,37,329900
44 | 2567,4,57,314000
45 | 1200,3,76,299000
46 | 852,2,70,179900
47 | 1852,4,64,299900
48 | 1203,3,11,239500
49 | 


--------------------------------------------------------------------------------
/data/international-airline-passengers.csv:
--------------------------------------------------------------------------------
  1 | "Month","Thousand Passengers"
  2 | "1949-01",112
  3 | "1949-02",118
  4 | "1949-03",132
  5 | "1949-04",129
  6 | "1949-05",121
  7 | "1949-06",135
  8 | "1949-07",148
  9 | "1949-08",148
 10 | "1949-09",136
 11 | "1949-10",119
 12 | "1949-11",104
 13 | "1949-12",118
 14 | "1950-01",115
 15 | "1950-02",126
 16 | "1950-03",141
 17 | "1950-04",135
 18 | "1950-05",125
 19 | "1950-06",149
 20 | "1950-07",170
 21 | "1950-08",170
 22 | "1950-09",158
 23 | "1950-10",133
 24 | "1950-11",114
 25 | "1950-12",140
 26 | "1951-01",145
 27 | "1951-02",150
 28 | "1951-03",178
 29 | "1951-04",163
 30 | "1951-05",172
 31 | "1951-06",178
 32 | "1951-07",199
 33 | "1951-08",199
 34 | "1951-09",184
 35 | "1951-10",162
 36 | "1951-11",146
 37 | "1951-12",166
 38 | "1952-01",171
 39 | "1952-02",180
 40 | "1952-03",193
 41 | "1952-04",181
 42 | "1952-05",183
 43 | "1952-06",218
 44 | "1952-07",230
 45 | "1952-08",242
 46 | "1952-09",209
 47 | "1952-10",191
 48 | "1952-11",172
 49 | "1952-12",194
 50 | "1953-01",196
 51 | "1953-02",196
 52 | "1953-03",236
 53 | "1953-04",235
 54 | "1953-05",229
 55 | "1953-06",243
 56 | "1953-07",264
 57 | "1953-08",272
 58 | "1953-09",237
 59 | "1953-10",211
 60 | "1953-11",180
 61 | "1953-12",201
 62 | "1954-01",204
 63 | "1954-02",188
 64 | "1954-03",235
 65 | "1954-04",227
 66 | "1954-05",234
 67 | "1954-06",264
 68 | "1954-07",302
 69 | "1954-08",293
 70 | "1954-09",259
 71 | "1954-10",229
 72 | "1954-11",203
 73 | "1954-12",229
 74 | "1955-01",242
 75 | "1955-02",233
 76 | "1955-03",267
 77 | "1955-04",269
 78 | "1955-05",270
 79 | "1955-06",315
 80 | "1955-07",364
 81 | "1955-08",347
 82 | "1955-09",312
 83 | "1955-10",274
 84 | "1955-11",237
 85 | "1955-12",278
 86 | "1956-01",284
 87 | "1956-02",277
 88 | "1956-03",317
 89 | "1956-04",313
 90 | "1956-05",318
 91 | "1956-06",374
 92 | "1956-07",413
 93 | "1956-08",405
 94 | "1956-09",355
 95 | "1956-10",306
 96 | "1956-11",271
 97 | "1956-12",306
 98 | "1957-01",315
 99 | "1957-02",301
100 | "1957-03",356
101 | "1957-04",348
102 | "1957-05",355
103 | "1957-06",422
104 | "1957-07",465
105 | "1957-08",467
106 | "1957-09",404
107 | "1957-10",347
108 | "1957-11",305
109 | "1957-12",336
110 | "1958-01",340
111 | "1958-02",318
112 | "1958-03",362
113 | "1958-04",348
114 | "1958-05",363
115 | "1958-06",435
116 | "1958-07",491
117 | "1958-08",505
118 | "1958-09",404
119 | "1958-10",359
120 | "1958-11",310
121 | "1958-12",337
122 | "1959-01",360
123 | "1959-02",342
124 | "1959-03",406
125 | "1959-04",396
126 | "1959-05",420
127 | "1959-06",472
128 | "1959-07",548
129 | "1959-08",559
130 | "1959-09",463
131 | "1959-10",407
132 | "1959-11",362
133 | "1959-12",405
134 | "1960-01",417
135 | "1960-02",391
136 | "1960-03",419
137 | "1960-04",461
138 | "1960-05",472
139 | "1960-06",535
140 | "1960-07",622
141 | "1960-08",606
142 | "1960-09",508
143 | "1960-10",461
144 | "1960-11",390
145 | "1960-12",432


--------------------------------------------------------------------------------
/data/iris.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,species
  2 | 5.1,3.5,1.4,0.2,setosa
  3 | 4.9,3.0,1.4,0.2,setosa
  4 | 4.7,3.2,1.3,0.2,setosa
  5 | 4.6,3.1,1.5,0.2,setosa
  6 | 5.0,3.6,1.4,0.2,setosa
  7 | 5.4,3.9,1.7,0.4,setosa
  8 | 4.6,3.4,1.4,0.3,setosa
  9 | 5.0,3.4,1.5,0.2,setosa
 10 | 4.4,2.9,1.4,0.2,setosa
 11 | 4.9,3.1,1.5,0.1,setosa
 12 | 5.4,3.7,1.5,0.2,setosa
 13 | 4.8,3.4,1.6,0.2,setosa
 14 | 4.8,3.0,1.4,0.1,setosa
 15 | 4.3,3.0,1.1,0.1,setosa
 16 | 5.8,4.0,1.2,0.2,setosa
 17 | 5.7,4.4,1.5,0.4,setosa
 18 | 5.4,3.9,1.3,0.4,setosa
 19 | 5.1,3.5,1.4,0.3,setosa
 20 | 5.7,3.8,1.7,0.3,setosa
 21 | 5.1,3.8,1.5,0.3,setosa
 22 | 5.4,3.4,1.7,0.2,setosa
 23 | 5.1,3.7,1.5,0.4,setosa
 24 | 4.6,3.6,1.0,0.2,setosa
 25 | 5.1,3.3,1.7,0.5,setosa
 26 | 4.8,3.4,1.9,0.2,setosa
 27 | 5.0,3.0,1.6,0.2,setosa
 28 | 5.0,3.4,1.6,0.4,setosa
 29 | 5.2,3.5,1.5,0.2,setosa
 30 | 5.2,3.4,1.4,0.2,setosa
 31 | 4.7,3.2,1.6,0.2,setosa
 32 | 4.8,3.1,1.6,0.2,setosa
 33 | 5.4,3.4,1.5,0.4,setosa
 34 | 5.2,4.1,1.5,0.1,setosa
 35 | 5.5,4.2,1.4,0.2,setosa
 36 | 4.9,3.1,1.5,0.2,setosa
 37 | 5.0,3.2,1.2,0.2,setosa
 38 | 5.5,3.5,1.3,0.2,setosa
 39 | 4.9,3.6,1.4,0.1,setosa
 40 | 4.4,3.0,1.3,0.2,setosa
 41 | 5.1,3.4,1.5,0.2,setosa
 42 | 5.0,3.5,1.3,0.3,setosa
 43 | 4.5,2.3,1.3,0.3,setosa
 44 | 4.4,3.2,1.3,0.2,setosa
 45 | 5.0,3.5,1.6,0.6,setosa
 46 | 5.1,3.8,1.9,0.4,setosa
 47 | 4.8,3.0,1.4,0.3,setosa
 48 | 5.1,3.8,1.6,0.2,setosa
 49 | 4.6,3.2,1.4,0.2,setosa
 50 | 5.3,3.7,1.5,0.2,setosa
 51 | 5.0,3.3,1.4,0.2,setosa
 52 | 7.0,3.2,4.7,1.4,versicolor
 53 | 6.4,3.2,4.5,1.5,versicolor
 54 | 6.9,3.1,4.9,1.5,versicolor
 55 | 5.5,2.3,4.0,1.3,versicolor
 56 | 6.5,2.8,4.6,1.5,versicolor
 57 | 5.7,2.8,4.5,1.3,versicolor
 58 | 6.3,3.3,4.7,1.6,versicolor
 59 | 4.9,2.4,3.3,1.0,versicolor
 60 | 6.6,2.9,4.6,1.3,versicolor
 61 | 5.2,2.7,3.9,1.4,versicolor
 62 | 5.0,2.0,3.5,1.0,versicolor
 63 | 5.9,3.0,4.2,1.5,versicolor
 64 | 6.0,2.2,4.0,1.0,versicolor
 65 | 6.1,2.9,4.7,1.4,versicolor
 66 | 5.6,2.9,3.6,1.3,versicolor
 67 | 6.7,3.1,4.4,1.4,versicolor
 68 | 5.6,3.0,4.5,1.5,versicolor
 69 | 5.8,2.7,4.1,1.0,versicolor
 70 | 6.2,2.2,4.5,1.5,versicolor
 71 | 5.6,2.5,3.9,1.1,versicolor
 72 | 5.9,3.2,4.8,1.8,versicolor
 73 | 6.1,2.8,4.0,1.3,versicolor
 74 | 6.3,2.5,4.9,1.5,versicolor
 75 | 6.1,2.8,4.7,1.2,versicolor
 76 | 6.4,2.9,4.3,1.3,versicolor
 77 | 6.6,3.0,4.4,1.4,versicolor
 78 | 6.8,2.8,4.8,1.4,versicolor
 79 | 6.7,3.0,5.0,1.7,versicolor
 80 | 6.0,2.9,4.5,1.5,versicolor
 81 | 5.7,2.6,3.5,1.0,versicolor
 82 | 5.5,2.4,3.8,1.1,versicolor
 83 | 5.5,2.4,3.7,1.0,versicolor
 84 | 5.8,2.7,3.9,1.2,versicolor
 85 | 6.0,2.7,5.1,1.6,versicolor
 86 | 5.4,3.0,4.5,1.5,versicolor
 87 | 6.0,3.4,4.5,1.6,versicolor
 88 | 6.7,3.1,4.7,1.5,versicolor
 89 | 6.3,2.3,4.4,1.3,versicolor
 90 | 5.6,3.0,4.1,1.3,versicolor
 91 | 5.5,2.5,4.0,1.3,versicolor
 92 | 5.5,2.6,4.4,1.2,versicolor
 93 | 6.1,3.0,4.6,1.4,versicolor
 94 | 5.8,2.6,4.0,1.2,versicolor
 95 | 5.0,2.3,3.3,1.0,versicolor
 96 | 5.6,2.7,4.2,1.3,versicolor
 97 | 5.7,3.0,4.2,1.2,versicolor
 98 | 5.7,2.9,4.2,1.3,versicolor
 99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica
152 | 


--------------------------------------------------------------------------------
/data/iss.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_video/efff3403504906c1e38c23f4d35fd3cb98af1950/data/iss.jpg


--------------------------------------------------------------------------------
/data/sms.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_video/efff3403504906c1e38c23f4d35fd3cb98af1950/data/sms.wav


--------------------------------------------------------------------------------
/data/us_retail_sales.csv:
--------------------------------------------------------------------------------
1 | YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC1992,164083,164260,163747,164759,165617,166098,167305,167797,169407,170681,171025,1729951993,175078,173770,172328,176766,178445,178201,180759,180692,181800,182910,184746,1863391994,185128,188077,191588,191632,190940,193196,193763,196157,197754,199579,199723,2006701995,201583,198383,200230,201048,202993,205507,204959,206529,206978,206157,208661,2104341996,208731,212011,213855,214644,216304,216059,216374,216355,219240,221039,220989,2218981997,223524,225409,226136,224588,222906,226048,228738,229317,230284,229822,230486,2311971998,231605,231664,233043,235976,237055,238958,237423,236412,238542,242531,244307,2465771999,246891,249510,250657,252418,254738,255472,257441,260253,261352,261825,264883,2698762000,268091,272020,275214,271004,271418,273440,272638,272943,277523,276973,275923,2757362001,278916,278799,276468,280804,281540,280399,279522,281423,276095,294613,286960,2837082002,283577,285061,284263,288820,284994,287401,290427,292582,288434,289634,291475,2938192003,295294,291178,296347,295643,296395,299662,302788,307745,305916,304824,308551,3073622004,309225,311427,316935,313531,318962,314971,318532,318945,324607,326680,327837,3318772005,329187,332847,332517,337608,334551,344034,346715,342950,343711,344390,347047,3474412006,357626,354614,356357,357320,356423,357763,359217,360497,358468,358435,359256,3643002007,363462,364090,367714,366065,370887,367933,369472,370982,373138,375048,378371,3753492008,375303,371661,371961,373877,375790,376584,374774,372091,366167,352900,339942,3320652009,336977,335765,330219,331314,334290,339532,340286,346705,338356,341590,344911,3462582010,346349,346972,354625,357086,354430,353830,354536,356560,359440,363855,367352,3692992011,372041,375003,378472,380284,380398,383081,382966,383541,387313,390302,391478,3916952012,395280,400181,401830,400095,399463,395723,397516,401937,405707,405598,407327,4094682013,412207,416783,412514,413391,414207,415989,417962,417101,417682,419686,421056,4228912014,419195,424657,429707,433082,434161,435587,435429,438598,437932,439829,441445,4379962015,435881,433579,440463,440355,444400,444747,447112,447738,447641,446489,448724,4508312016,446757,448895,447757,453397,454135,457409,457849,457722,462284,465321,466028,4706162017,473104,471865,470844,,,,,,,,,


--------------------------------------------------------------------------------
/data/user_visit_duration.csv:
--------------------------------------------------------------------------------
  1 | Time (min),Buy
  2 | 2.0,0
  3 | 0.6833333333333333,0
  4 | 3.216666666666667,1
  5 | 0.9,0
  6 | 1.5333333333333334,1
  7 | 2.8833333333333333,1
  8 | 0.8,0
  9 | 1.4666666666666666,0
 10 | 1.1166666666666667,0
 11 | 0.6,0
 12 | 1.35,1
 13 | 3.183333333333333,1
 14 | 2.7666666666666666,0
 15 | 2.183333333333333,1
 16 | 1.5,1
 17 | 1.3333333333333333,1
 18 | 1.5333333333333334,0
 19 | 0.7833333333333333,0
 20 | 2.9833333333333334,1
 21 | 4.15,1
 22 | 0.85,0
 23 | 2.033333333333333,1
 24 | 1.6,0
 25 | 2.6166666666666667,1
 26 | 2.683333333333333,1
 27 | 1.95,0
 28 | 0.4666666666666667,1
 29 | 2.716666666666667,1
 30 | 2.333333333333333,1
 31 | 3.4166666666666665,1
 32 | 0.26666666666666666,0
 33 | 1.3833333333333333,1
 34 | 0.5166666666666667,0
 35 | 2.7,1
 36 | 2.05,0
 37 | 2.95,1
 38 | 1.2333333333333334,0
 39 | 3.6166666666666663,1
 40 | 1.4333333333333331,1
 41 | 2.066666666666667,0
 42 | 2.066666666666667,1
 43 | 1.5,0
 44 | 2.433333333333333,0
 45 | 2.95,1
 46 | 2.216666666666667,1
 47 | 0.9166666666666666,0
 48 | 2.1,1
 49 | 3.75,1
 50 | 1.0,0
 51 | 0.0,0
 52 | 2.65,1
 53 | 1.55,0
 54 | 1.0666666666666669,0
 55 | 2.0166666666666666,1
 56 | 0.0,0
 57 | 0.0,0
 58 | 0.6666666666666666,0
 59 | 2.5166666666666666,1
 60 | 1.0666666666666669,0
 61 | 1.25,0
 62 | 2.95,1
 63 | 0.0,0
 64 | 1.9666666666666663,0
 65 | 2.2,1
 66 | 2.9,1
 67 | 3.85,1
 68 | 2.3833333333333333,1
 69 | 2.083333333333333,1
 70 | 3.183333333333333,1
 71 | 3.8666666666666663,1
 72 | 2.183333333333333,0
 73 | 2.833333333333333,1
 74 | 2.7333333333333334,1
 75 | 1.3833333333333333,0
 76 | 1.1666666666666667,0
 77 | 0.38333333333333336,0
 78 | 1.1666666666666667,0
 79 | 1.5166666666666666,0
 80 | 3.216666666666667,1
 81 | 1.1333333333333333,0
 82 | 0.7,0
 83 | 0.8166666666666667,0
 84 | 3.883333333333333,1
 85 | 2.216666666666667,1
 86 | 0.75,0
 87 | 2.566666666666667,0
 88 | 0.0,0
 89 | 0.0,0
 90 | 1.7666666666666666,1
 91 | 1.6833333333333331,1
 92 | 0.21666666666666667,0
 93 | 0.0,0
 94 | 2.8833333333333333,1
 95 | 2.466666666666667,1
 96 | 1.2666666666666666,0
 97 | 3.75,1
 98 | 3.883333333333333,1
 99 | 1.5666666666666669,0
100 | 1.6666666666666667,0
101 | 2.15,1
102 | 


--------------------------------------------------------------------------------
/data/wines.csv:
--------------------------------------------------------------------------------
  1 | Class,Alcohol,Malic_acid,Ash,Alcalinity_of_ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,OD280-OD315_of_diluted_wines,Proline
  2 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
  3 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
  4 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
  5 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
  6 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
  7 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
  8 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
  9 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
 10 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
 11 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
 12 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
 13 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
 14 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
 15 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
 16 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
 17 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
 18 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
 19 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
 20 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
 21 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
 22 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
 23 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
 24 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
 25 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
 26 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
 27 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
 28 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
 29 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
 30 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
 31 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
 32 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
 33 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
 34 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
 35 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
 36 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
 37 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
 38 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
 39 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
 40 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
 41 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
 42 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
 43 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
 44 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
 45 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
 46 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
 47 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
 48 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
 49 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
 50 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
 51 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
 52 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
 53 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
 54 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
 55 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
 56 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
 57 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
 58 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
 59 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
 60 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
 61 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
 62 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
 63 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
 64 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
 65 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
 66 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
 67 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
 68 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
 69 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
 70 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
 71 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
 72 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
 73 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
 74 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
 75 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
 76 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
 77 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
 78 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
 79 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
 80 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
 81 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
 82 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
 83 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
 84 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
 85 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
 86 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
 87 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
 88 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
 89 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
 90 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
 91 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
 92 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
 93 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
 94 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
 95 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
 96 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
 97 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
 98 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
 99 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
100 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
101 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
102 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
103 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
104 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
105 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
106 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
107 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
108 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
109 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
110 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
111 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
112 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
113 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
114 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
115 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
116 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
117 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
118 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
119 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
120 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
121 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
122 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
123 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
124 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
125 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
126 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
127 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
128 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
129 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
130 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
131 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
132 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
133 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
134 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
135 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
136 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
137 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
138 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
139 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
140 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
141 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
142 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
143 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
144 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
145 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
146 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
147 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
148 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
149 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
150 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
151 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
152 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
153 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
154 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
155 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
156 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
157 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
158 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
159 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
160 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
161 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
162 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
163 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
164 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
165 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
166 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
167 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
168 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
169 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
170 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
171 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
172 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
173 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
174 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
175 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
176 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
177 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
178 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
179 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: ztdl
 2 | channels:
 3 | - defaults
 4 | dependencies:
 5 | - python=3.7.*
 6 | - bz2file==0.98
 7 | - cython==0.29.*
 8 | - pip==21.0.*
 9 | - numpy==1.19.*
10 | - jupyter==1.0.*
11 | - matplotlib==3.3.*
12 | - setuptools==52.0.*
13 | - scikit-learn==0.24.*
14 | - scipy==1.6.*
15 | - pandas==1.2.*
16 | - pillow==8.2.*
17 | - seaborn==0.11.*
18 | - pytest==6.2.*
19 | - twisted==21.2.*
20 | - pip:
21 |   - tensorflow==2.5.*
22 | 


--------------------------------------------------------------------------------
/solutions/2 Data exploration Exercises Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "\n",
 11 |     "%matplotlib inline\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "\n",
 14 |     "import pandas as pd"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Exercise 1\n",
 22 |     "- load the dataset: `../data/international-airline-passengers.csv`\n",
 23 |     "- inspect it using the `.info()` and `.head()` commands\n",
 24 |     "- use the function `pd.to_datetime()` to change the column type of 'Month' to a datatime type\n",
 25 |     "- set the index of df to be a datetime index using the column 'Month' and the `df.set_index()` method\n",
 26 |     "- choose the appropriate plot and display the data\n",
 27 |     "- choose appropriate scale\n",
 28 |     "- label the axes"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "# - load the dataset: ../data/international-airline-passengers.csv\n",
 38 |     "df = pd.read_csv('../data/international-airline-passengers.csv')"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# - inspect it using the .info() and .head() commands\n",
 48 |     "df.info()"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "df.head()"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# - use the function to_datetime() to change the column type of 'Month' to a datatime type\n",
 67 |     "# - set the index of df to be a datetime index using the column 'Month' and tthe set_index() method\n",
 68 |     "\n",
 69 |     "df['Month'] = pd.to_datetime(df['Month'])\n",
 70 |     "df = df.set_index('Month')"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "df.head()"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "# - choose the appropriate plot and display the data\n",
 89 |     "# - choose appropriate scale\n",
 90 |     "# - label the axes\n",
 91 |     "\n",
 92 |     "df.plot();"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "## Exercise 2\n",
100 |     "- load the dataset: `../data/weight-height.csv`\n",
101 |     "- inspect it\n",
102 |     "- plot it using a scatter plot with Weight as a function of Height\n",
103 |     "- plot the male and female populations with 2 different colors on a new scatter plot\n",
104 |     "- remember to label the axes"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "# - load the dataset: ../data/weight-height.csv\n",
114 |     "# - inspect it\n",
115 |     "df = pd.read_csv('../data/weight-height.csv')\n",
116 |     "df.head()"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "df.info()"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "df.describe()"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "scrolled": true
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "df['Gender'].value_counts()"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "# - plot it using a scatter plot with Weight as a function of Height\n",
155 |     "_ = df.plot(kind='scatter', x='Height', y='Weight');"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "# - plot the male and female populations with 2 different colors on a new scatter plot\n",
165 |     "# - remember to label the axes\n",
166 |     "\n",
167 |     "# this can be done in several ways, showing 2 here:\n",
168 |     "males = df[df['Gender'] == 'Male']\n",
169 |     "females = df.query('Gender == \"Female\"')\n",
170 |     "fig, ax = plt.subplots()\n",
171 |     "\n",
172 |     "males.plot(kind='scatter', x='Height', y='Weight',\n",
173 |     "           ax=ax, color='blue', alpha=0.3,\n",
174 |     "           title='Male & Female Populations')\n",
175 |     "\n",
176 |     "females.plot(kind='scatter', x='Height', y='Weight',\n",
177 |     "             ax=ax, color='red', alpha=0.3);"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "df['Gendercolor'] = df['Gender'].map({'Male': 'blue', 'Female': 'red'})\n",
187 |     "df.head()"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "df.plot(kind='scatter', \n",
197 |     "        x='Height',\n",
198 |     "        y='Weight',\n",
199 |     "        c=df['Gendercolor'],\n",
200 |     "        alpha=0.3,\n",
201 |     "        title='Male & Female Populations');"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "fig, ax = plt.subplots()\n",
211 |     "ax.plot(males['Height'], males['Weight'], 'ob', \n",
212 |     "        females['Height'], females['Weight'], 'or', alpha=0.3)\n",
213 |     "plt.xlabel('Height')\n",
214 |     "plt.ylabel('Weight')\n",
215 |     "plt.title('Male & Female Populations');"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {
221 |     "collapsed": true
222 |    },
223 |    "source": [
224 |     "## Exercise 3\n",
225 |     "- plot the histogram of the heights for males and for females on the same plot\n",
226 |     "- use alpha to control transparency in the plot comand\n",
227 |     "- plot a vertical line at the mean of each population using `plt.axvline()`"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "males['Height'].plot(kind='hist',\n",
237 |     "                     bins=50,\n",
238 |     "                     range=(50, 80),\n",
239 |     "                     alpha=0.3,\n",
240 |     "                     color='blue')\n",
241 |     "\n",
242 |     "females['Height'].plot(kind='hist',\n",
243 |     "                       bins=50,\n",
244 |     "                       range=(50, 80),\n",
245 |     "                       alpha=0.3,\n",
246 |     "                       color='red')\n",
247 |     "\n",
248 |     "plt.title('Height distribution')\n",
249 |     "plt.legend([\"Males\", \"Females\"])\n",
250 |     "plt.xlabel(\"Heigth (in)\")\n",
251 |     "\n",
252 |     "\n",
253 |     "plt.axvline(males['Height'].mean(), color='blue', linewidth=2)\n",
254 |     "plt.axvline(females['Height'].mean(), color='red', linewidth=2);"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": null,
260 |    "metadata": {},
261 |    "outputs": [],
262 |    "source": [
263 |     "males['Height'].plot(kind='hist',\n",
264 |     "                     bins=200,\n",
265 |     "                     range=(50, 80),\n",
266 |     "                     alpha=0.3,\n",
267 |     "                     color='blue',\n",
268 |     "                     cumulative=True,\n",
269 |     "                     density=True)\n",
270 |     "\n",
271 |     "females['Height'].plot(kind='hist',\n",
272 |     "                       bins=200,\n",
273 |     "                       range=(50, 80),\n",
274 |     "                       alpha=0.3,\n",
275 |     "                       color='red',\n",
276 |     "                       cumulative=True,\n",
277 |     "                       density=True)\n",
278 |     "\n",
279 |     "plt.title('Height distribution')\n",
280 |     "plt.legend([\"Males\", \"Females\"])\n",
281 |     "plt.xlabel(\"Heigth (in)\")\n",
282 |     "\n",
283 |     "plt.axhline(0.8)\n",
284 |     "plt.axhline(0.5)\n",
285 |     "plt.axhline(0.2);"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "metadata": {},
291 |    "source": [
292 |     "## Exercise 4\n",
293 |     "- plot the weights of the males and females using a box plot\n",
294 |     "- which one is easier to read?\n",
295 |     "- (remember to put in titles, axes and legends)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {},
302 |    "outputs": [],
303 |    "source": [
304 |     "dfpvt = df.pivot(columns = 'Gender', values = 'Weight')"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": null,
310 |    "metadata": {},
311 |    "outputs": [],
312 |    "source": [
313 |     "dfpvt.head()"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "dfpvt.info()"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": null,
328 |    "metadata": {},
329 |    "outputs": [],
330 |    "source": [
331 |     "dfpvt.plot(kind='box')\n",
332 |     "plt.title('Weight Box Plot')\n",
333 |     "plt.ylabel(\"Weight (lbs)\");"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "## Exercise 5\n",
341 |     "- load the dataset: `../data/titanic-train.csv`\n",
342 |     "- learn about scattermatrix here: http://pandas.pydata.org/pandas-docs/stable/visualization.html\n",
343 |     "- display the data using a scattermatrix"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "metadata": {},
350 |    "outputs": [],
351 |    "source": [
352 |     "df = pd.read_csv('../data/titanic-train.csv')\n",
353 |     "df.head()"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": null,
359 |    "metadata": {},
360 |    "outputs": [],
361 |    "source": [
362 |     "from pandas.plotting import scatter_matrix"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": null,
368 |    "metadata": {},
369 |    "outputs": [],
370 |    "source": [
371 |     "_ = scatter_matrix(df.drop('PassengerId', axis=1), figsize=(10, 10))"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": []
380 |   }
381 |  ],
382 |  "metadata": {
383 |   "anaconda-cloud": {},
384 |   "kernelspec": {
385 |    "display_name": "Python 3",
386 |    "language": "python",
387 |    "name": "python3"
388 |   },
389 |   "language_info": {
390 |    "codemirror_mode": {
391 |     "name": "ipython",
392 |     "version": 3
393 |    },
394 |    "file_extension": ".py",
395 |    "mimetype": "text/x-python",
396 |    "name": "python",
397 |    "nbconvert_exporter": "python",
398 |    "pygments_lexer": "ipython3",
399 |    "version": "3.7.10"
400 |   }
401 |  },
402 |  "nbformat": 4,
403 |  "nbformat_minor": 1
404 | }
405 | 


--------------------------------------------------------------------------------
/solutions/3 Machine Learning Exercises Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Machine Learning Exercises Solution"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "%matplotlib inline\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "import pandas as pd\n",
 19 |     "import numpy as np"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Exercise 1\n",
 27 |     "\n",
 28 |     "You've just been hired at a real estate investment firm and they would like you to build a model for pricing houses. You are given a dataset that contains data for house prices and a few features like number of bedrooms, size in square feet and age of the house. Let's see if you can build a model that is able to predict the price. In this exercise we extend what we have learned about linear regression to a dataset with more than one feature. Here are the steps to complete it:\n",
 29 |     "\n",
 30 |     "1. Load the dataset ../data/housing-data.csv\n",
 31 |     "- plot the histograms for each feature\n",
 32 |     "- create 2 variables called X and y: X shall be a matrix with 3 columns (sqft,bdrms,age) and y shall be a vector with 1 column (price)\n",
 33 |     "- create a linear regression model in Keras with the appropriate number of inputs and output\n",
 34 |     "- split the data into train and test with a 20% test size\n",
 35 |     "- train the model on the training set and check its accuracy on training and test set\n",
 36 |     "- how's your model doing? Is the loss growing smaller?\n",
 37 |     "- try to improve your model with these experiments:\n",
 38 |     "    - normalize the input features with one of the rescaling techniques mentioned above\n",
 39 |     "    - use a different value for the learning rate of your model\n",
 40 |     "    - use a different optimizer\n",
 41 |     "- once you're satisfied with training, check the R2score on the test set"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# Load the dataset ../data/housing-data.csv\n",
 51 |     "df = pd.read_csv('../data/housing-data.csv')\n",
 52 |     "df.head()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "df.columns"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# plot the histograms for each feature\n",
 71 |     "plt.figure(figsize=(15, 5))\n",
 72 |     "for i, feature in enumerate(df.columns):\n",
 73 |     "    plt.subplot(1, 4, i+1)\n",
 74 |     "    df[feature].plot(kind='hist', title=feature)\n",
 75 |     "    plt.xlabel(feature)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "# create 2 variables called X and y:\n",
 85 |     "# X shall be a matrix with 3 columns (sqft,bdrms,age)\n",
 86 |     "# and y shall be a vector with 1 column (price)\n",
 87 |     "X = df[['sqft', 'bdrms', 'age']].values\n",
 88 |     "y = df['price'].values"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "X"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "y"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "from tensorflow.keras.models import Sequential\n",
116 |     "from tensorflow.keras.layers import Dense\n",
117 |     "from tensorflow.keras.optimizers import Adam"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "# create a linear regression model in Keras\n",
127 |     "# with the appropriate number of inputs and output\n",
128 |     "model = Sequential()\n",
129 |     "model.add(Dense(1, input_shape=(3,)))\n",
130 |     "model.compile(Adam(learning_rate=0.8), 'mean_squared_error')"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "from sklearn.model_selection import train_test_split"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "# split the data into train and test with a 20% test size\n",
149 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "len(X_train)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "len(X)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "# train the model on the training set and check its accuracy on training and test set\n",
177 |     "# how's your model doing? Is the loss growing smaller?\n",
178 |     "model.fit(X_train, y_train, epochs=10)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "df.describe()"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "from sklearn.metrics import r2_score"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "# check the R2score on training and test set (probably very bad)\n",
206 |     "\n",
207 |     "y_train_pred = model.predict(X_train)\n",
208 |     "y_test_pred = model.predict(X_test)\n",
209 |     "\n",
210 |     "print(\"The R2 score on the Train set is:\\t{:0.3f}\".format(r2_score(y_train, y_train_pred)))\n",
211 |     "print(\"The R2 score on the Test set is:\\t{:0.3f}\".format(r2_score(y_test, y_test_pred)))"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": null,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "# try to improve your model with these experiments:\n",
221 |     "#     - normalize the input features with one of the rescaling techniques mentioned above\n",
222 |     "#     - use a different value for the learning rate of your model\n",
223 |     "#     - use a different optimizer\n",
224 |     "df['sqft1000'] = df['sqft']/1000.0\n",
225 |     "df['age10'] = df['age']/10.0\n",
226 |     "df['price100k'] = df['price']/1e5"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "X = df[['sqft1000', 'bdrms', 'age10']].values\n",
236 |     "y = df['price100k'].values"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": [
254 |     "model = Sequential()\n",
255 |     "model.add(Dense(1, input_dim=3))\n",
256 |     "model.compile(Adam(learning_rate=0.1), 'mean_squared_error')\n",
257 |     "model.fit(X_train, y_train, epochs=20)"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": null,
263 |    "metadata": {},
264 |    "outputs": [],
265 |    "source": [
266 |     "# once you're satisfied with training, check the R2score on the test set\n",
267 |     "\n",
268 |     "y_train_pred = model.predict(X_train)\n",
269 |     "y_test_pred = model.predict(X_test)\n",
270 |     "\n",
271 |     "print(\"The R2 score on the Train set is:\\t{:0.3f}\".format(r2_score(y_train, y_train_pred)))\n",
272 |     "print(\"The R2 score on the Test set is:\\t{:0.3f}\".format(r2_score(y_test, y_test_pred)))"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {},
279 |    "outputs": [],
280 |    "source": [
281 |     "model.fit(X_train, y_train, epochs=40, verbose=0)"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": [
290 |     "# once you're satisfied with training, check the R2score on the test set\n",
291 |     "\n",
292 |     "y_train_pred = model.predict(X_train)\n",
293 |     "y_test_pred = model.predict(X_test)\n",
294 |     "\n",
295 |     "print(\"The R2 score on the Train set is:\\t{:0.3f}\".format(r2_score(y_train, y_train_pred)))\n",
296 |     "print(\"The R2 score on the Test set is:\\t{:0.3f}\".format(r2_score(y_test, y_test_pred)))"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {},
302 |    "source": [
303 |     "## Exercise 2\n",
304 |     "\n",
305 |     "Your boss was extremely happy with your work on the housing price prediction model and decided to entrust you with a more challenging task. They've seen a lot of people leave the company recently and they would like to understand why that's happening. They have collected historical data on employees and they would like you to build a model that is able to predict which employee will leave next. The would like a model that is better than random guessing. They also prefer false negatives than false positives, in this first phase. Fields in the dataset include:\n",
306 |     "\n",
307 |     "- Employee satisfaction level\n",
308 |     "- Last evaluation\n",
309 |     "- Number of projects\n",
310 |     "- Average monthly hours\n",
311 |     "- Time spent at the company\n",
312 |     "- Whether they have had a work accident\n",
313 |     "- Whether they have had a promotion in the last 5 years\n",
314 |     "- Department\n",
315 |     "- Salary\n",
316 |     "- Whether the employee has left\n",
317 |     "\n",
318 |     "Your goal is to predict the binary outcome variable `left` using the rest of the data. Since the outcome is binary, this is a classification problem. Here are some things you may want to try out:\n",
319 |     "\n",
320 |     "1. load the dataset at ../data/HR_comma_sep.csv, inspect it with `.head()`, `.info()` and `.describe()`.\n",
321 |     "- Establish a benchmark: what would be your accuracy score if you predicted everyone stay?\n",
322 |     "- Check if any feature needs rescaling. You may plot a histogram of the feature to decide which rescaling method is more appropriate.\n",
323 |     "- convert the categorical features into binary dummy columns. You will then have to combine them with the numerical features using `pd.concat`.\n",
324 |     "- do the usual train/test split with a 20% test size\n",
325 |     "- play around with learning rate and optimizer\n",
326 |     "- check the confusion matrix, precision and recall\n",
327 |     "- check if you still get the same results if you use a 5-Fold cross validation on all the data\n",
328 |     "- Is the model good enough for your boss?\n",
329 |     "\n",
330 |     "As you will see in this exercise, the a logistic regression model is not good enough to help your boss. In the next chapter we will learn how to go beyond linear models.\n",
331 |     "\n",
332 |     "This dataset comes from https://www.kaggle.com/ludobenistant/hr-analytics/ and is released under [CC BY-SA 4.0 License](https://creativecommons.org/licenses/by-sa/4.0/)."
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {},
339 |    "outputs": [],
340 |    "source": [
341 |     "# load the dataset at ../data/HR_comma_sep.csv, inspect it with `.head()`, `.info()` and `.describe()`.\n",
342 |     "\n",
343 |     "df = pd.read_csv('../data/HR_comma_sep.csv')"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "metadata": {},
350 |    "outputs": [],
351 |    "source": [
352 |     "df.head()"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "df.info()"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": null,
367 |    "metadata": {},
368 |    "outputs": [],
369 |    "source": [
370 |     "df.describe()"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": null,
376 |    "metadata": {},
377 |    "outputs": [],
378 |    "source": [
379 |     "# Establish a benchmark: what would be your accuracy score if you predicted everyone stay?\n",
380 |     "\n",
381 |     "df.left.value_counts() / len(df)"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "markdown",
386 |    "metadata": {},
387 |    "source": [
388 |     "Predicting 0 all the time would yield an accuracy of 76%"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": [
397 |     "# Check if any feature needs rescaling.\n",
398 |     "# You may plot a histogram of the feature to decide which rescaling method is more appropriate.\n",
399 |     "df['average_montly_hours'].plot(kind='hist');"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": null,
405 |    "metadata": {},
406 |    "outputs": [],
407 |    "source": [
408 |     "df['average_montly_hours_100'] = df['average_montly_hours']/100.0"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": null,
414 |    "metadata": {},
415 |    "outputs": [],
416 |    "source": [
417 |     "df['average_montly_hours_100'].plot(kind='hist');"
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "code",
422 |    "execution_count": null,
423 |    "metadata": {},
424 |    "outputs": [],
425 |    "source": [
426 |     "df['time_spend_company'].plot(kind='hist');"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": null,
432 |    "metadata": {},
433 |    "outputs": [],
434 |    "source": [
435 |     "# convert the categorical features into binary dummy columns.\n",
436 |     "# You will then have to combine them with\n",
437 |     "# the numerical features using `pd.concat`.\n",
438 |     "df_dummies = pd.get_dummies(df[['sales', 'salary']])"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "code",
443 |    "execution_count": null,
444 |    "metadata": {},
445 |    "outputs": [],
446 |    "source": [
447 |     "df_dummies.head()\n"
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "code",
452 |    "execution_count": null,
453 |    "metadata": {},
454 |    "outputs": [],
455 |    "source": [
456 |     "df.columns"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "metadata": {},
463 |    "outputs": [],
464 |    "source": [
465 |     "X = pd.concat([df[['satisfaction_level', 'last_evaluation', 'number_project',\n",
466 |     "                   'time_spend_company', 'Work_accident',\n",
467 |     "                   'promotion_last_5years', 'average_montly_hours_100']],\n",
468 |     "               df_dummies], axis=1).values\n",
469 |     "y = df['left'].values"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": null,
475 |    "metadata": {},
476 |    "outputs": [],
477 |    "source": [
478 |     "X.shape"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": null,
484 |    "metadata": {},
485 |    "outputs": [],
486 |    "source": [
487 |     "# do the usual train/test split with a 20% test size\n",
488 |     "\n",
489 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": null,
495 |    "metadata": {},
496 |    "outputs": [],
497 |    "source": [
498 |     "# play around with learning rate and optimizer\n",
499 |     "\n",
500 |     "model = Sequential()\n",
501 |     "model.add(Dense(1, input_dim=20, activation='sigmoid'))\n",
502 |     "model.compile(Adam(learning_rate=0.5), 'binary_crossentropy', metrics=['accuracy'])"
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "code",
507 |    "execution_count": null,
508 |    "metadata": {},
509 |    "outputs": [],
510 |    "source": [
511 |     "model.summary()"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "code",
516 |    "execution_count": null,
517 |    "metadata": {},
518 |    "outputs": [],
519 |    "source": [
520 |     "model.fit(X_train, y_train, epochs=10)"
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "code",
525 |    "execution_count": null,
526 |    "metadata": {},
527 |    "outputs": [],
528 |    "source": [
529 |     "y_test_pred = model.predict_classes(X_test)"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {},
536 |    "outputs": [],
537 |    "source": [
538 |     "from sklearn.metrics import confusion_matrix, classification_report"
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "code",
543 |    "execution_count": null,
544 |    "metadata": {},
545 |    "outputs": [],
546 |    "source": [
547 |     "def pretty_confusion_matrix(y_true, y_pred, labels=[\"False\", \"True\"]):\n",
548 |     "    cm = confusion_matrix(y_true, y_pred)\n",
549 |     "    pred_labels = ['Predicted '+ l for l in labels]\n",
550 |     "    df = pd.DataFrame(cm, index=labels, columns=pred_labels)\n",
551 |     "    return df"
552 |    ]
553 |   },
554 |   {
555 |    "cell_type": "code",
556 |    "execution_count": null,
557 |    "metadata": {},
558 |    "outputs": [],
559 |    "source": [
560 |     "# check the confusion matrix, precision and recall\n",
561 |     "\n",
562 |     "pretty_confusion_matrix(y_test, y_test_pred, labels=['Stay', 'Leave'])"
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "code",
567 |    "execution_count": null,
568 |    "metadata": {},
569 |    "outputs": [],
570 |    "source": [
571 |     "print(classification_report(y_test, y_test_pred))"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "code",
576 |    "execution_count": null,
577 |    "metadata": {},
578 |    "outputs": [],
579 |    "source": [
580 |     "from tensorflow.keras.wrappers.scikit_learn import KerasClassifier"
581 |    ]
582 |   },
583 |   {
584 |    "cell_type": "code",
585 |    "execution_count": null,
586 |    "metadata": {},
587 |    "outputs": [],
588 |    "source": [
589 |     "# check if you still get the same results if you use a 5-Fold cross validation on all the data\n",
590 |     "\n",
591 |     "def build_logistic_regression_model():\n",
592 |     "    model = Sequential()\n",
593 |     "    model.add(Dense(1, input_dim=20, activation='sigmoid'))\n",
594 |     "    model.compile(Adam(learning_rate=0.5), 'binary_crossentropy', metrics=['accuracy'])\n",
595 |     "    return model\n",
596 |     "\n",
597 |     "model = KerasClassifier(build_fn=build_logistic_regression_model,\n",
598 |     "                        epochs=10, verbose=0)"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": null,
604 |    "metadata": {},
605 |    "outputs": [],
606 |    "source": [
607 |     "from sklearn.model_selection import KFold, cross_val_score"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": null,
613 |    "metadata": {},
614 |    "outputs": [],
615 |    "source": [
616 |     "cv = KFold(5, shuffle=True)\n",
617 |     "scores = cross_val_score(model, X, y, cv=cv)\n",
618 |     "\n",
619 |     "print(\"The cross validation accuracy is {:0.4f} ± {:0.4f}\".format(scores.mean(), scores.std()))"
620 |    ]
621 |   },
622 |   {
623 |    "cell_type": "code",
624 |    "execution_count": null,
625 |    "metadata": {},
626 |    "outputs": [],
627 |    "source": [
628 |     "scores"
629 |    ]
630 |   },
631 |   {
632 |    "cell_type": "code",
633 |    "execution_count": null,
634 |    "metadata": {},
635 |    "outputs": [],
636 |    "source": [
637 |     "# Is the model good enough for your boss?"
638 |    ]
639 |   },
640 |   {
641 |    "cell_type": "markdown",
642 |    "metadata": {},
643 |    "source": [
644 |     "No, the model is not good enough for my boss, since it performs no better than the benchmark."
645 |    ]
646 |   }
647 |  ],
648 |  "metadata": {
649 |   "kernelspec": {
650 |    "display_name": "Python 3",
651 |    "language": "python",
652 |    "name": "python3"
653 |   },
654 |   "language_info": {
655 |    "codemirror_mode": {
656 |     "name": "ipython",
657 |     "version": 3
658 |    },
659 |    "file_extension": ".py",
660 |    "mimetype": "text/x-python",
661 |    "name": "python",
662 |    "nbconvert_exporter": "python",
663 |    "pygments_lexer": "ipython3",
664 |    "version": "3.7.10"
665 |   }
666 |  },
667 |  "nbformat": 4,
668 |  "nbformat_minor": 2
669 | }
670 | 


--------------------------------------------------------------------------------
/solutions/4 Deep Learning Intro Exercises Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Deep Learning Intro"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "%matplotlib inline\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "import pandas as pd\n",
 19 |     "import numpy as np"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Exercise 1"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {
 32 |     "collapsed": true
 33 |    },
 34 |    "source": [
 35 |     "The [Pima Indians dataset](https://archive.ics.uci.edu/ml/datasets/diabetes) is a very famous dataset distributed by UCI and originally collected from the National Institute of Diabetes and Digestive and Kidney Diseases. It contains data from clinical exams for women age 21 and above of Pima indian origins. The objective is to predict based on diagnostic measurements whether a patient has diabetes.\n",
 36 |     "\n",
 37 |     "It has the following features:\n",
 38 |     "\n",
 39 |     "- Pregnancies: Number of times pregnant\n",
 40 |     "- Glucose: Plasma glucose concentration a 2 hours in an oral glucose tolerance test\n",
 41 |     "- BloodPressure: Diastolic blood pressure (mm Hg)\n",
 42 |     "- SkinThickness: Triceps skin fold thickness (mm)\n",
 43 |     "- Insulin: 2-Hour serum insulin (mu U/ml)\n",
 44 |     "- BMI: Body mass index (weight in kg/(height in m)^2)\n",
 45 |     "- DiabetesPedigreeFunction: Diabetes pedigree function\n",
 46 |     "- Age: Age (years)\n",
 47 |     "\n",
 48 |     "The last colum is the outcome, and it is a binary variable.\n",
 49 |     "\n",
 50 |     "In this first exercise we will explore it through the following steps:\n",
 51 |     "\n",
 52 |     "1. Load the ..data/diabetes.csv dataset, use pandas to explore the range of each feature\n",
 53 |     "- For each feature draw a histogram. Bonus points if you draw all the histograms in the same figure.\n",
 54 |     "- Explore correlations of features with the outcome column. You can do this in several ways, for example using the `sns.pairplot` we used above or drawing a heatmap of the correlations.\n",
 55 |     "- Do features need standardization? If so what stardardization technique will you use? MinMax? Standard?\n",
 56 |     "- Prepare your final `X` and `y` variables to be used by a ML model. Make sure you define your target variable well. Will you need dummy columns?"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "df = pd.read_csv('../data/diabetes.csv')\n",
 66 |     "df.head()"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "_ = df.hist(figsize=(12, 10))"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "import seaborn as sns"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "sns.pairplot(df, hue='Outcome');"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "sns.heatmap(df.corr(), annot = True)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "df.info()"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "df.describe()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "from sklearn.preprocessing import StandardScaler"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "from tensorflow.keras.utils import to_categorical"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "sc = StandardScaler()\n",
148 |     "X = sc.fit_transform(df.drop('Outcome', axis=1))\n",
149 |     "y = df['Outcome'].values\n",
150 |     "y_cat = to_categorical(y)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "X.shape"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "y_cat.shape"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "## Exercise 2"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {
181 |     "collapsed": true
182 |    },
183 |    "source": [
184 |     "Build a fully connected NN model that predicts diabetes. Follow these steps:\n",
185 |     "\n",
186 |     "1. Split your data in a train/test with a test size of 20% and a `random_state = 22`\n",
187 |     "- define a sequential model with at least one inner layer. You will have to make choices for the following things:\n",
188 |     "    - what is the size of the input?\n",
189 |     "    - how many nodes will you use in each layer?\n",
190 |     "    - what is the size of the output?\n",
191 |     "    - what activation functions will you use in the inner layers?\n",
192 |     "    - what activation function will you use at output?\n",
193 |     "    - what loss function will you use?\n",
194 |     "    - what optimizer will you use?\n",
195 |     "- fit your model on the training set, using a validation_split of 0.1\n",
196 |     "- test your trained model on the test data from the train/test split\n",
197 |     "- check the accuracy score, the confusion matrix and the classification report"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "X.shape"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "from sklearn.model_selection import train_test_split"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "X_train, X_test, y_train, y_test = train_test_split(X, y_cat,\n",
225 |     "                                                    random_state=22,\n",
226 |     "                                                    test_size=0.2)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "from tensorflow.keras.models import Sequential\n",
236 |     "from tensorflow.keras.layers import Dense\n",
237 |     "from tensorflow.keras.optimizers import Adam"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": [
246 |     "model = Sequential()\n",
247 |     "model.add(Dense(32, input_shape=(8,), activation='relu'))\n",
248 |     "model.add(Dense(32, activation='relu'))\n",
249 |     "model.add(Dense(2, activation='softmax'))\n",
250 |     "model.compile(Adam(learning_rate=0.05),\n",
251 |     "              loss='categorical_crossentropy',\n",
252 |     "              metrics=['accuracy'])"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": [
261 |     "model.summary()"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "32*8 + 32"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": [
279 |     "model.fit(X_train, y_train, epochs=20, verbose=2, validation_split=0.1)"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {},
286 |    "outputs": [],
287 |    "source": [
288 |     "y_pred = model.predict(X_test)"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": null,
294 |    "metadata": {},
295 |    "outputs": [],
296 |    "source": [
297 |     "y_test_class = np.argmax(y_test, axis=1)\n",
298 |     "y_pred_class = np.argmax(y_pred, axis=1)"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": null,
304 |    "metadata": {},
305 |    "outputs": [],
306 |    "source": [
307 |     "from sklearn.metrics import accuracy_score\n",
308 |     "from sklearn.metrics import classification_report\n",
309 |     "from sklearn.metrics import confusion_matrix"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": null,
315 |    "metadata": {},
316 |    "outputs": [],
317 |    "source": [
318 |     "pd.Series(y_test_class).value_counts() / len(y_test_class)"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": [
327 |     "accuracy_score(y_test_class, y_pred_class)"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": null,
333 |    "metadata": {},
334 |    "outputs": [],
335 |    "source": [
336 |     "print(classification_report(y_test_class, y_pred_class))"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "confusion_matrix(y_test_class, y_pred_class)"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "markdown",
350 |    "metadata": {},
351 |    "source": [
352 |     "## Exercise 3\n",
353 |     "Compare your work with the results presented in [this notebook](https://www.kaggle.com/sheshu/pima-data-visualisation-and-machine-learning). Are your Neural Network results better or worse than the results obtained by traditional Machine Learning techniques?\n",
354 |     "\n",
355 |     "- Try training a Support Vector Machine or a Random Forest model on the exact same train/test split. Is the performance better or worse?\n",
356 |     "- Try restricting your features to only 4 features like in the suggested notebook. How does model performance change?"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": null,
362 |    "metadata": {},
363 |    "outputs": [],
364 |    "source": [
365 |     "from sklearn.ensemble import RandomForestClassifier\n",
366 |     "from sklearn.svm import SVC\n",
367 |     "from sklearn.naive_bayes import GaussianNB\n",
368 |     "\n",
369 |     "for mod in [RandomForestClassifier(), SVC(), GaussianNB()]:\n",
370 |     "    mod.fit(X_train, y_train[:, 1])\n",
371 |     "    y_pred = mod.predict(X_test)\n",
372 |     "    print(\"=\"*80)\n",
373 |     "    print(mod)\n",
374 |     "    print(\"-\"*80)\n",
375 |     "    print(\"Accuracy score: {:0.3}\".format(accuracy_score(y_test_class,\n",
376 |     "                                                         y_pred)))\n",
377 |     "    print(\"Confusion Matrix:\")\n",
378 |     "    print(confusion_matrix(y_test_class, y_pred))\n",
379 |     "    print()"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "metadata": {},
385 |    "source": [
386 |     "## Exercise 4\n",
387 |     "\n",
388 |     "[Tensorflow playground](http://playground.tensorflow.org/) is a web based neural network demo. It is really useful to develop an intuition about what happens when you change architecture, activation function or other parameters. Try playing with it for a few minutes. You don't nee do understand the meaning of every knob and button in the page, just get a sense for what happens if you change something. In the next chapter we'll explore these things in more detail.\n"
389 |    ]
390 |   }
391 |  ],
392 |  "metadata": {
393 |   "kernelspec": {
394 |    "display_name": "Python 3",
395 |    "language": "python",
396 |    "name": "python3"
397 |   },
398 |   "language_info": {
399 |    "codemirror_mode": {
400 |     "name": "ipython",
401 |     "version": 3
402 |    },
403 |    "file_extension": ".py",
404 |    "mimetype": "text/x-python",
405 |    "name": "python",
406 |    "nbconvert_exporter": "python",
407 |    "pygments_lexer": "ipython3",
408 |    "version": "3.7.10"
409 |   }
410 |  },
411 |  "nbformat": 4,
412 |  "nbformat_minor": 2
413 | }
414 | 


--------------------------------------------------------------------------------
/solutions/5 Gradient Descent Exercises Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Gradient Descent"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import pandas as pd\n",
 18 |     "%matplotlib inline\n",
 19 |     "import matplotlib.pyplot as plt"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "### Exercise 1\n",
 27 |     "\n",
 28 |     "You've just been hired at a wine company and they would like you to help them build a model that predicts the quality of their wine based on several measurements. They give you a dataset with wine\n",
 29 |     "\n",
 30 |     "- Load the ../data/wines.csv into Pandas\n",
 31 |     "- Use the column called \"Class\" as target\n",
 32 |     "- Check how many classes are there in target, and if necessary use dummy columns for a multi-class classification\n",
 33 |     "- Use all the other columns as features, check their range and distribution (using seaborn pairplot)\n",
 34 |     "- Rescale all the features using either MinMaxScaler or StandardScaler\n",
 35 |     "- Build a deep model with at least 1 hidden layer to classify the data\n",
 36 |     "- Choose the cost function, what will you use? Mean Squared Error? Binary Cross-Entropy? Categorical Cross-Entropy?\n",
 37 |     "- Choose an optimizer\n",
 38 |     "- Choose a value for the learning rate, you may want to try with several values\n",
 39 |     "- Choose a batch size\n",
 40 |     "- Train your model on all the data using a `validation_split=0.2`. Can you converge to 100% validation accuracy?\n",
 41 |     "- What's the minumum number of epochs to converge?\n",
 42 |     "- Repeat the training several times to verify how stable your results are"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "df = pd.read_csv('../data/wines.csv')"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "df.head()"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "y = df['Class']"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "y.value_counts()"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "y_cat = pd.get_dummies(y)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "y_cat.head()"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "X = df.drop('Class', axis=1)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "X.shape"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "import seaborn as sns"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "sns.pairplot(df, hue='Class')"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "from sklearn.preprocessing import StandardScaler"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "sc = StandardScaler()"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "Xsc = sc.fit_transform(X)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "from tensorflow.keras.models import Sequential\n",
169 |     "from tensorflow.keras.layers import Dense\n",
170 |     "from tensorflow.keras.optimizers import SGD, Adam, Adadelta, RMSprop\n",
171 |     "import tensorflow.keras.backend as K"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": [
180 |     "K.clear_session()\n",
181 |     "model = Sequential()\n",
182 |     "model.add(Dense(5, input_shape=(13,),\n",
183 |     "                kernel_initializer='he_normal',\n",
184 |     "                activation='relu'))\n",
185 |     "model.add(Dense(3, activation='softmax'))\n",
186 |     "\n",
187 |     "model.compile(RMSprop(learning_rate=0.1),\n",
188 |     "              'categorical_crossentropy',\n",
189 |     "              metrics=['accuracy'])\n",
190 |     "\n",
191 |     "model.fit(Xsc, y_cat.values,\n",
192 |     "          batch_size=8,\n",
193 |     "          epochs=10,\n",
194 |     "          verbose=1,\n",
195 |     "          validation_split=0.2)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "### Exercise 2\n",
203 |     "\n",
204 |     "Since this dataset has 13 features we can only visualize pairs of features like we did in the Paired plot. We could however exploit the fact that a neural network is a function to extract 2 high level features to represent our data.\n",
205 |     "\n",
206 |     "- Build a deep fully connected network with the following structure:\n",
207 |     "    - Layer 1: 8 nodes\n",
208 |     "    - Layer 2: 5 nodes\n",
209 |     "    - Layer 3: 2 nodes\n",
210 |     "    - Output : 3 nodes\n",
211 |     "- Choose activation functions, inizializations, optimizer and learning rate so that it converges to 100% accuracy within 20 epochs (not easy)\n",
212 |     "- Remember to train the model on the scaled data\n",
213 |     "- Define a Feature Function like we did above between the input of the 1st layer and the output of the 3rd layer\n",
214 |     "- Calculate the features and plot them on a 2-dimensional scatter plot\n",
215 |     "- Can we distinguish the 3 classes well?\n"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "K.clear_session()\n",
225 |     "model = Sequential()\n",
226 |     "model.add(Dense(8, input_shape=(13,),\n",
227 |     "                kernel_initializer='he_normal', activation='tanh'))\n",
228 |     "model.add(Dense(5, kernel_initializer='he_normal', activation='tanh'))\n",
229 |     "model.add(Dense(2, kernel_initializer='he_normal', activation='tanh'))\n",
230 |     "model.add(Dense(3, activation='softmax'))\n",
231 |     "\n",
232 |     "model.compile(RMSprop(learning_rate=0.05),\n",
233 |     "              'categorical_crossentropy',\n",
234 |     "              metrics=['accuracy'])\n",
235 |     "\n",
236 |     "model.fit(Xsc, y_cat.values,\n",
237 |     "          batch_size=16,\n",
238 |     "          epochs=20,\n",
239 |     "          verbose=1)"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": null,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "model.summary()"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": [
257 |     "inp = model.layers[0].input\n",
258 |     "out = model.layers[2].output"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "features_function = K.function([inp], [out])"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "features = features_function([Xsc])[0]"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": null,
282 |    "metadata": {},
283 |    "outputs": [],
284 |    "source": [
285 |     "features.shape"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": [
294 |     "plt.scatter(features[:, 0], features[:, 1], c=y)"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "markdown",
299 |    "metadata": {},
300 |    "source": [
301 |     "### Exercise 3\n",
302 |     "\n",
303 |     "Keras functional API. So far we've always used the Sequential model API in Keras. However, Keras also offers a Functional API, which is much more powerful. You can find its [documentation here](https://keras.io/getting-started/functional-api-guide/). Let's see how we can leverage it.\n",
304 |     "\n",
305 |     "- define an input layer called `inputs`\n",
306 |     "- define two hidden layers as before, one with 8 nodes, one with 5 nodes\n",
307 |     "- define a `second_to_last` layer with 2 nodes\n",
308 |     "- define an output layer with 3 nodes\n",
309 |     "- create a model that connect input and output\n",
310 |     "- train it and make sure that it converges\n",
311 |     "- define a function between inputs and second_to_last layer\n",
312 |     "- recalculate the features and plot them"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "metadata": {},
319 |    "outputs": [],
320 |    "source": [
321 |     "from tensorflow.keras.layers import Input\n",
322 |     "from tensorflow.keras.models import Model"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": null,
328 |    "metadata": {},
329 |    "outputs": [],
330 |    "source": [
331 |     "K.clear_session()\n",
332 |     "\n",
333 |     "inputs = Input(shape=(13,))\n",
334 |     "x = Dense(8, kernel_initializer='he_normal', activation='tanh')(inputs)\n",
335 |     "x = Dense(5, kernel_initializer='he_normal', activation='tanh')(x)\n",
336 |     "second_to_last = Dense(2, kernel_initializer='he_normal',\n",
337 |     "                       activation='tanh')(x)\n",
338 |     "outputs = Dense(3, activation='softmax')(second_to_last)\n",
339 |     "\n",
340 |     "model = Model(inputs=inputs, outputs=outputs)\n",
341 |     "\n",
342 |     "model.compile(RMSprop(learning_rate=0.05),\n",
343 |     "              'categorical_crossentropy',\n",
344 |     "              metrics=['accuracy'])\n",
345 |     "\n",
346 |     "model.fit(Xsc, y_cat.values, batch_size=16, epochs=20, verbose=1)"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": null,
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": [
355 |     "features_function = K.function([inputs], [second_to_last])"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "metadata": {},
362 |    "outputs": [],
363 |    "source": [
364 |     "features = features_function([Xsc])[0]"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": null,
370 |    "metadata": {},
371 |    "outputs": [],
372 |    "source": [
373 |     "plt.scatter(features[:, 0], features[:, 1], c=y)"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "markdown",
378 |    "metadata": {},
379 |    "source": [
380 |     "## Exercise 4 \n",
381 |     "\n",
382 |     "Keras offers the possibility to call a function at each epoch. These are Callbacks, and their [documentation is here](https://keras.io/callbacks/). Callbacks allow us to add some neat functionality. In this exercise we'll explore a few of them.\n",
383 |     "\n",
384 |     "- Split the data into train and test sets with a test_size = 0.3 and random_state=42\n",
385 |     "- Reset and recompile your model\n",
386 |     "- train the model on the train data using `validation_data=(X_test, y_test)`\n",
387 |     "- Use the `EarlyStopping` callback to stop your training if the `val_loss` doesn't improve\n",
388 |     "- Use the `ModelCheckpoint` callback to save the trained model to disk once training is finished\n",
389 |     "- Use the `TensorBoard` callback to output your training information to a `/tmp/` subdirectory\n",
390 |     "- Watch the next video for an overview of tensorboard"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": null,
396 |    "metadata": {},
397 |    "outputs": [],
398 |    "source": [
399 |     "from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": null,
405 |    "metadata": {},
406 |    "outputs": [],
407 |    "source": [
408 |     "checkpointer = ModelCheckpoint(filepath=\"/tmp/udemy/weights.hdf5\",\n",
409 |     "                               verbose=1, save_best_only=True)"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {},
416 |    "outputs": [],
417 |    "source": [
418 |     "earlystopper = EarlyStopping(monitor='val_loss', min_delta=0,\n",
419 |     "                             patience=1, verbose=1, mode='auto')"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": null,
425 |    "metadata": {},
426 |    "outputs": [],
427 |    "source": [
428 |     "tensorboard = TensorBoard(log_dir='/tmp/udemy/tensorboard/')"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": null,
434 |    "metadata": {},
435 |    "outputs": [],
436 |    "source": [
437 |     "from sklearn.model_selection import train_test_split"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": null,
443 |    "metadata": {},
444 |    "outputs": [],
445 |    "source": [
446 |     "X_train, X_test, y_train, y_test = train_test_split(Xsc, y_cat.values,\n",
447 |     "                                                    test_size=0.3,\n",
448 |     "                                                    random_state=42)"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": null,
454 |    "metadata": {},
455 |    "outputs": [],
456 |    "source": [
457 |     "K.clear_session()\n",
458 |     "\n",
459 |     "inputs = Input(shape=(13,))\n",
460 |     "\n",
461 |     "x = Dense(8, kernel_initializer='he_normal', activation='tanh')(inputs)\n",
462 |     "x = Dense(5, kernel_initializer='he_normal', activation='tanh')(x)\n",
463 |     "second_to_last = Dense(2, kernel_initializer='he_normal',\n",
464 |     "                       activation='tanh')(x)\n",
465 |     "outputs = Dense(3, activation='softmax')(second_to_last)\n",
466 |     "\n",
467 |     "model = Model(inputs=inputs, outputs=outputs)\n",
468 |     "\n",
469 |     "model.compile(RMSprop(learning_rate=0.05), 'categorical_crossentropy',\n",
470 |     "              metrics=['accuracy'])\n",
471 |     "\n",
472 |     "model.fit(X_train, y_train, batch_size=32,\n",
473 |     "          epochs=20, verbose=2,\n",
474 |     "          validation_data=(X_test, y_test),\n",
475 |     "          callbacks=[checkpointer, earlystopper, tensorboard])"
476 |    ]
477 |   },
478 |   {
479 |    "cell_type": "markdown",
480 |    "metadata": {},
481 |    "source": [
482 |     "Run Tensorboard with the command:\n",
483 |     "\n",
484 |     "    tensorboard --logdir /tmp/udemy/tensorboard/\n",
485 |     "    \n",
486 |     "and open your browser at http://localhost:6006"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "code",
491 |    "execution_count": null,
492 |    "metadata": {},
493 |    "outputs": [],
494 |    "source": []
495 |   }
496 |  ],
497 |  "metadata": {
498 |   "kernelspec": {
499 |    "display_name": "Python 3",
500 |    "language": "python",
501 |    "name": "python3"
502 |   },
503 |   "language_info": {
504 |    "codemirror_mode": {
505 |     "name": "ipython",
506 |     "version": 3
507 |    },
508 |    "file_extension": ".py",
509 |    "mimetype": "text/x-python",
510 |    "name": "python",
511 |    "nbconvert_exporter": "python",
512 |    "pygments_lexer": "ipython3",
513 |    "version": "3.7.10"
514 |   }
515 |  },
516 |  "nbformat": 4,
517 |  "nbformat_minor": 2
518 | }
519 | 


--------------------------------------------------------------------------------
/solutions/6 Convolutional Neural Networks Exercises Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Convolutional Neural Networks Exercises Solution"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "import numpy as np\n",
 18 |     "%matplotlib inline\n",
 19 |     "import matplotlib.pyplot as plt"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "from tensorflow.keras.utils import to_categorical"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "from tensorflow.keras.models import Sequential\n",
 38 |     "from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten\n",
 39 |     "import tensorflow.keras.backend as K"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {
 45 |     "collapsed": true
 46 |    },
 47 |    "source": [
 48 |     "### Exercise 1\n",
 49 |     "\n",
 50 |     "You've been hired by a shipping company to overhaul the way they route mail, parcels and packages. They want to build an image recognition system  capable of recognizing the digits in the zipcode on a package, so that it can be automatically routed to the correct location.\n",
 51 |     "You are tasked to build the digit recognition system. Luckily, you can rely on the MNIST dataset for the intial training of your model!\n",
 52 |     "\n",
 53 |     "Build a deep convolutional neural network with at least two convolutional and two pooling layers before the fully connected layer.\n",
 54 |     "\n",
 55 |     "- Start from the network we have just built\n",
 56 |     "- Insert a `Conv2D` layer after the first `MaxPool2D`, give it 64 filters.\n",
 57 |     "- Insert a `MaxPool2D` after that one\n",
 58 |     "- Insert an `Activation` layer\n",
 59 |     "- retrain the model\n",
 60 |     "- does performance improve?\n",
 61 |     "- how many parameters does this new model have? More or less than the previous model? Why?\n",
 62 |     "- how long did this second model take to train? Longer or shorter than the previous model? Why?\n",
 63 |     "- did it perform better or worse than the previous model?"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "from tensorflow.keras.datasets import mnist"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "(X_train, y_train), (X_test, y_test) = mnist.load_data(('/tmp/mnist.npz'))"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "X_train.shape"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "X_train = X_train.astype('float32') / 255.0\n",
100 |     "X_test = X_test.astype('float32') / 255.0\n",
101 |     "\n",
102 |     "X_train = X_train.reshape(-1, 28, 28, 1)\n",
103 |     "X_test = X_test.reshape(-1, 28, 28, 1)\n",
104 |     "\n",
105 |     "y_train_cat = to_categorical(y_train, 10)\n",
106 |     "y_test_cat = to_categorical(y_test, 10)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "K.clear_session()\n",
116 |     "\n",
117 |     "model = Sequential()\n",
118 |     "\n",
119 |     "model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))\n",
120 |     "model.add(MaxPool2D(pool_size=(2, 2)))\n",
121 |     "\n",
122 |     "model.add(Conv2D(64, (3, 3), activation='relu'))\n",
123 |     "model.add(MaxPool2D(pool_size=(2, 2)))\n",
124 |     "\n",
125 |     "model.add(Flatten())\n",
126 |     "\n",
127 |     "model.add(Dense(128, activation='relu'))\n",
128 |     "\n",
129 |     "model.add(Dense(10, activation='softmax'))\n",
130 |     "\n",
131 |     "model.compile(loss='categorical_crossentropy',\n",
132 |     "              optimizer='rmsprop',\n",
133 |     "              metrics=['accuracy'])"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "model.summary()"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "model.fit(X_train, y_train_cat, batch_size=128,\n",
152 |     "          epochs=2, verbose=1, validation_split=0.3)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "model.evaluate(X_test, y_test_cat)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "### Exercise 2\n",
169 |     "\n",
170 |     "Pleased with your performance with the digits recognition task, your boss decides to challenge you with a harder task. Their online branch allows people to upload images to a website that generates and prints a postcard that is shipped to destination. Your boss would like to know what images people are loading on the site in order to provide targeted advertising on the same page, so he asks you to build an image recognition system capable of recognizing a few objects. Luckily for you, there's a dataset ready made with a collection of labeled images. This is the [Cifar 10 Dataset](http://www.cs.toronto.edu/~kriz/cifar.html), a very famous dataset that contains images for 10 different categories:\n",
171 |     "\n",
172 |     "- airplane \t\t\t\t\t\t\t\t\t\t\n",
173 |     "- automobile \t\t\t\t\t\t\t\t\t\t\n",
174 |     "- bird \t\t\t\t\t\t\t\t\t\t\n",
175 |     "- cat \t\t\t\t\t\t\t\t\t\t\n",
176 |     "- deer \t\t\t\t\t\t\t\t\t\t\n",
177 |     "- dog \t\t\t\t\t\t\t\t\t\t\n",
178 |     "- frog \t\t\t\t\t\t\t\t\t\t\n",
179 |     "- horse \t\t\t\t\t\t\t\t\t\t\n",
180 |     "- ship \t\t\t\t\t\t\t\t\t\t\n",
181 |     "- truck\n",
182 |     "\n",
183 |     "In this exercise we will reach the limit of what you can achieve on your laptop and get ready for the next session on cloud GPUs.\n",
184 |     "\n",
185 |     "Here's what you have to do:\n",
186 |     "- load the cifar10 dataset using `keras.datasets.cifar10.load_data()`\n",
187 |     "- display a few images, see how hard/easy it is for you to recognize an object with such low resolution\n",
188 |     "- check the shape of X_train, does it need reshape?\n",
189 |     "- check the scale of X_train, does it need rescaling?\n",
190 |     "- check the shape of y_train, does it need reshape?\n",
191 |     "- build a model with the following architecture, and choose the parameters and activation functions for each of the layers:\n",
192 |     "    - conv2d\n",
193 |     "    - conv2d\n",
194 |     "    - maxpool\n",
195 |     "    - conv2d\n",
196 |     "    - conv2d\n",
197 |     "    - maxpool\n",
198 |     "    - flatten\n",
199 |     "    - dense\n",
200 |     "    - output\n",
201 |     "- compile the model and check the number of parameters\n",
202 |     "- attempt to train the model with the optimizer of your choice. How fast does training proceed?\n",
203 |     "- If training is too slow (as expected) stop the execution and move to the next session!"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": [
212 |     "from tensorflow.keras.datasets import cifar10"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": [
221 |     "(X_train, y_train), (X_test, y_test) = cifar10.load_data()"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {},
228 |    "outputs": [],
229 |    "source": [
230 |     "X_train.shape"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "plt.imshow(X_train[1])"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": null,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "X_train = X_train.astype('float32') / 255.0\n",
249 |     "X_test = X_test.astype('float32') / 255.0"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {},
256 |    "outputs": [],
257 |    "source": [
258 |     "y_train.shape"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "y_train_cat = to_categorical(y_train, 10)\n",
268 |     "y_test_cat = to_categorical(y_test, 10)"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {},
275 |    "outputs": [],
276 |    "source": [
277 |     "y_train_cat.shape"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {},
284 |    "outputs": [],
285 |    "source": [
286 |     "model = Sequential()\n",
287 |     "model.add(Conv2D(32, (3, 3),\n",
288 |     "                 padding='same',\n",
289 |     "                 input_shape=(32, 32, 3),\n",
290 |     "                 activation='relu'))\n",
291 |     "model.add(Conv2D(32, (3, 3), activation='relu'))\n",
292 |     "model.add(MaxPool2D(pool_size=(2, 2)))\n",
293 |     "\n",
294 |     "model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))\n",
295 |     "model.add(Conv2D(64, (3, 3), activation='relu'))\n",
296 |     "model.add(MaxPool2D(pool_size=(2, 2)))\n",
297 |     "\n",
298 |     "model.add(Flatten())\n",
299 |     "model.add(Dense(512, activation='relu'))\n",
300 |     "model.add(Dense(10, activation='softmax'))"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {},
307 |    "outputs": [],
308 |    "source": [
309 |     "model.compile(loss='categorical_crossentropy',\n",
310 |     "              optimizer='rmsprop',\n",
311 |     "              metrics=['accuracy'])"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "model.summary()"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "metadata": {},
327 |    "outputs": [],
328 |    "source": [
329 |     "model.fit(X_train, y_train_cat,\n",
330 |     "          batch_size=32,\n",
331 |     "          epochs=2,\n",
332 |     "          validation_data=(X_test, y_test_cat),\n",
333 |     "          shuffle=True)"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": null,
339 |    "metadata": {},
340 |    "outputs": [],
341 |    "source": []
342 |   }
343 |  ],
344 |  "metadata": {
345 |   "kernelspec": {
346 |    "display_name": "Python 3",
347 |    "language": "python",
348 |    "name": "python3"
349 |   },
350 |   "language_info": {
351 |    "codemirror_mode": {
352 |     "name": "ipython",
353 |     "version": 3
354 |    },
355 |    "file_extension": ".py",
356 |    "mimetype": "text/x-python",
357 |    "name": "python",
358 |    "nbconvert_exporter": "python",
359 |    "pygments_lexer": "ipython3",
360 |    "version": "3.7.10"
361 |   }
362 |  },
363 |  "nbformat": 4,
364 |  "nbformat_minor": 2
365 | }
366 | 


--------------------------------------------------------------------------------
/solutions/8 Recurrent Neural Networks Exercises Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Recurrent Neural Networks"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "import numpy as np\n",
 18 |     "%matplotlib inline\n",
 19 |     "import matplotlib.pyplot as plt"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Time series forecasting"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from pandas.tseries.offsets import MonthEnd\n",
 36 |     "\n",
 37 |     "df = pd.read_csv('../data/cansim-0800020-eng-6674700030567901031.csv',\n",
 38 |     "                 skiprows=6, skipfooter=9,\n",
 39 |     "                 engine='python')\n",
 40 |     "\n",
 41 |     "df['Adjustments'] = pd.to_datetime(df['Adjustments']) + MonthEnd(1)\n",
 42 |     "df = df.set_index('Adjustments')\n",
 43 |     "df.head()"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "split_date = pd.Timestamp('01-01-2011')"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "train = df.loc[:split_date, ['Unadjusted']]\n",
 62 |     "test = df.loc[split_date:, ['Unadjusted']]"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "from sklearn.preprocessing import MinMaxScaler\n",
 72 |     "\n",
 73 |     "sc = MinMaxScaler()\n",
 74 |     "\n",
 75 |     "train_sc = sc.fit_transform(train)\n",
 76 |     "test_sc = sc.transform(test)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "train_sc_df = pd.DataFrame(train_sc, columns=['Scaled'], index=train.index)\n",
 86 |     "test_sc_df = pd.DataFrame(test_sc, columns=['Scaled'], index=test.index)\n",
 87 |     "\n",
 88 |     "for s in range(1, 13):\n",
 89 |     "    train_sc_df['shift_{}'.format(s)] = train_sc_df['Scaled'].shift(s)\n",
 90 |     "    test_sc_df['shift_{}'.format(s)] = test_sc_df['Scaled'].shift(s)\n",
 91 |     "\n",
 92 |     "X_train = train_sc_df.dropna().drop('Scaled', axis=1)\n",
 93 |     "y_train = train_sc_df.dropna()[['Scaled']]\n",
 94 |     "\n",
 95 |     "X_test = test_sc_df.dropna().drop('Scaled', axis=1)\n",
 96 |     "y_test = test_sc_df.dropna()[['Scaled']]\n",
 97 |     "\n",
 98 |     "X_train = X_train.values\n",
 99 |     "X_test= X_test.values\n",
100 |     "\n",
101 |     "y_train = y_train.values\n",
102 |     "y_test = y_test.values"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "X_train.shape"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## Exercise 1\n",
119 |     "\n",
120 |     "In the model above we reshaped the input shape to: `(num_samples, 1, 12)`, i.e. we treated a window of 12 months as a vector of 12 coordinates that we simultaneously passed to all the LSTM nodes. An alternative way to look at the problem is to reshape the input to `(num_samples, 12, 1)`. This means we consider each input window as a sequence of 12 values that we will pass in sequence to the LSTM. In principle this looks like a more accurate description of our situation. But does it yield better predictions? Let's check it.\n",
121 |     "\n",
122 |     "- Reshape `X_train` and `X_test` so that they represent a set of univariate sequences\n",
123 |     "- retrain the same LSTM(6) model, you'll have to adapt the `input_shape`\n",
124 |     "- check the performance of this new model, is it better at predicting the test data?"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "X_train_t = X_train.reshape(X_train.shape[0], 12, 1)\n",
134 |     "X_test_t = X_test.reshape(X_test.shape[0], 12, 1)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "X_train_t.shape"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "from tensorflow.keras.models import Sequential\n",
153 |     "from tensorflow.keras.layers import LSTM, Dense\n",
154 |     "import tensorflow.keras.backend as K\n",
155 |     "from tensorflow.keras.callbacks import EarlyStopping"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "K.clear_session()\n",
165 |     "model = Sequential()\n",
166 |     "\n",
167 |     "model.add(LSTM(6, input_shape=(12, 1)))\n",
168 |     "\n",
169 |     "model.add(Dense(1))\n",
170 |     "\n",
171 |     "model.compile(loss='mean_squared_error', optimizer='adam')"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": [
180 |     "model.summary()"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {},
196 |    "outputs": [],
197 |    "source": [
198 |     "model.fit(X_train_t, y_train, epochs=600,\n",
199 |     "          batch_size=32, verbose=0)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "y_pred = model.predict(X_test_t)\n",
209 |     "plt.plot(y_test)\n",
210 |     "plt.plot(y_pred)"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {
216 |     "collapsed": true
217 |    },
218 |    "source": [
219 |     "## Exercise 2\n",
220 |     "\n",
221 |     "RNN models can be applied to images too. In general we can apply them to any data where there's a connnection between nearby units. Let's see how we can easily build a model that works with images.\n",
222 |     "\n",
223 |     "- Load the MNIST data, by now you should be able to do it blindfolded :)\n",
224 |     "- reshape it so that an image looks like a long sequence of pixels\n",
225 |     "- create a recurrent model and train it on the training data\n",
226 |     "- how does it perform compared to a fully connected? How does it compare to Convolutional Neural Networks?\n",
227 |     "\n",
228 |     "(feel free to run this exercise on a cloud GPU if it's too slow on your laptop)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "from tensorflow.keras.datasets import mnist\n",
238 |     "from tensorflow.keras.utils import to_categorical"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {},
245 |    "outputs": [],
246 |    "source": [
247 |     "(X_train, y_train), (X_test, y_test) = mnist.load_data()\n",
248 |     "X_train = X_train.astype('float32') / 255.0\n",
249 |     "X_test = X_test.astype('float32') / 255.0\n",
250 |     "y_train_cat = to_categorical(y_train, 10)\n",
251 |     "y_test_cat = to_categorical(y_test, 10)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "X_train = X_train.reshape(X_train.shape[0], -1, 1)\n",
261 |     "X_test = X_test.reshape(X_test.shape[0], -1, 1)"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "print(X_train.shape)\n",
271 |     "print(X_test.shape)\n",
272 |     "print(y_train_cat.shape)\n",
273 |     "print(y_test_cat.shape)"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "# define the model\n",
283 |     "K.clear_session()\n",
284 |     "model = Sequential()\n",
285 |     "model.add(LSTM(32, input_shape=X_train.shape[1:]))\n",
286 |     "model.add(Dense(10, activation='softmax'))\n",
287 |     "\n",
288 |     "# compile the model\n",
289 |     "model.compile(loss='categorical_crossentropy',\n",
290 |     "              optimizer='rmsprop',\n",
291 |     "              metrics=['accuracy'])\n",
292 |     "\n",
293 |     "model.fit(X_train, y_train_cat,\n",
294 |     "          batch_size=32,\n",
295 |     "          epochs=100,\n",
296 |     "          validation_split=0.3,\n",
297 |     "          shuffle=True,\n",
298 |     "          verbose=2,\n",
299 |     "          )\n",
300 |     "\n",
301 |     "model.evaluate(X_test, y_test_cat)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "metadata": {},
308 |    "outputs": [],
309 |    "source": []
310 |   }
311 |  ],
312 |  "metadata": {
313 |   "kernelspec": {
314 |    "display_name": "Python 3",
315 |    "language": "python",
316 |    "name": "python3"
317 |   },
318 |   "language_info": {
319 |    "codemirror_mode": {
320 |     "name": "ipython",
321 |     "version": 3
322 |    },
323 |    "file_extension": ".py",
324 |    "mimetype": "text/x-python",
325 |    "name": "python",
326 |    "nbconvert_exporter": "python",
327 |    "pygments_lexer": "ipython3",
328 |    "version": "3.7.10"
329 |   }
330 |  },
331 |  "nbformat": 4,
332 |  "nbformat_minor": 2
333 | }
334 | 


--------------------------------------------------------------------------------
/solutions/9 Improving performance Exercises Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 9 Improving performance Exercises Solutions"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "import numpy as np\n",
 18 |     "%matplotlib inline\n",
 19 |     "import matplotlib.pyplot as plt"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Exercise 1\n",
 27 |     "\n",
 28 |     "- Reload the IMDB data keeping only the first 20000 most common words\n",
 29 |     "- pad the reviews to a shorter length (eg. 70 or 80), this time make sure you keep the first part of the review if it's longer than the maximum length\n",
 30 |     "- re run the model (remember to set max_features correctly)\n",
 31 |     "- does it train faster this time?\n",
 32 |     "- do you get a better performance?"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "from tensorflow.keras.datasets import imdb\n",
 42 |     "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
 43 |     "from tensorflow.keras.models import Sequential\n",
 44 |     "from tensorflow.keras.layers import Embedding, LSTM, Dense"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "max_features = 20000\n",
 54 |     "skip_top = 200"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "(X_train, y_train), (X_test, y_test) = imdb.load_data('/tmp/imdb.npz',\n",
 64 |     "                                                      num_words=max_features,\n",
 65 |     "                                                      start_char=1,\n",
 66 |     "                                                      oov_char=2,\n",
 67 |     "                                                      index_from=3)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "X_train.shape"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "maxlen = 80"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "X_train_pad = pad_sequences(X_train, maxlen=maxlen, truncating='post')\n",
 95 |     "X_test_pad = pad_sequences(X_test, maxlen=maxlen, truncating='post')"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "model = Sequential()\n",
105 |     "model.add(Embedding(max_features, 128))\n",
106 |     "model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))\n",
107 |     "model.add(Dense(1, activation='sigmoid'))\n",
108 |     "\n",
109 |     "model.compile(loss='binary_crossentropy',\n",
110 |     "              optimizer='adam',\n",
111 |     "              metrics=['accuracy'])"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "X_train[0]"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "model.fit(X_train_pad, y_train,\n",
130 |     "          batch_size=32,\n",
131 |     "          epochs=2,\n",
132 |     "          validation_split=0.3)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "score, acc = model.evaluate(X_test_pad, y_test)\n",
142 |     "print('Test score:', score)\n",
143 |     "print('Test accuracy:', acc)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "## Exercise 2\n",
151 |     "\n",
152 |     "- Reload the digits data as above\n",
153 |     "- define a function repeated_training_reg_dropout that adds regularization and dropout to a fully connected network\n",
154 |     "- compare the performance with/witouth dropout and regularization like we did for batch normalization\n",
155 |     "- do you get a better performance?"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "from sklearn.datasets import load_digits\n",
165 |     "from tensorflow.keras.utils import to_categorical\n",
166 |     "from sklearn.model_selection import train_test_split\n",
167 |     "from tensorflow.keras.layers import Dropout\n",
168 |     "import tensorflow.keras.backend as K"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "digits = load_digits()\n",
178 |     "X, y = digits.data, digits.target\n",
179 |     "y_cat = to_categorical(y)\n",
180 |     "\n",
181 |     "X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.3)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "def repeated_training_reg_dropout(X_train,\n",
191 |     "                                  y_train,\n",
192 |     "                                  X_test,\n",
193 |     "                                  y_test,\n",
194 |     "                                  units=512,\n",
195 |     "                                  activation='sigmoid',\n",
196 |     "                                  optimizer='sgd',\n",
197 |     "                                  do_dropout=False,\n",
198 |     "                                  rate=0.3,\n",
199 |     "                                  kernel_regularizer='l2',\n",
200 |     "                                  epochs=10,\n",
201 |     "                                  repeats=3):\n",
202 |     "    histories = []\n",
203 |     "    \n",
204 |     "    for repeat in range(repeats):\n",
205 |     "        K.clear_session()\n",
206 |     "\n",
207 |     "        model = Sequential()\n",
208 |     "        \n",
209 |     "        # first fully connected layer\n",
210 |     "        model.add(Dense(units,\n",
211 |     "                        input_shape=X_train.shape[1:],\n",
212 |     "                        kernel_initializer='normal',\n",
213 |     "                        kernel_regularizer=kernel_regularizer,\n",
214 |     "                        activation=activation))\n",
215 |     "        if do_dropout:\n",
216 |     "            model.add(Dropout(rate))\n",
217 |     "\n",
218 |     "        # second fully connected layer\n",
219 |     "        model.add(Dense(units,\n",
220 |     "                        kernel_initializer='normal',\n",
221 |     "                        kernel_regularizer=kernel_regularizer,\n",
222 |     "                        activation=activation))\n",
223 |     "        if do_dropout:\n",
224 |     "            model.add(Dropout(rate))\n",
225 |     "\n",
226 |     "        # third fully connected layer\n",
227 |     "        model.add(Dense(units,\n",
228 |     "                        kernel_initializer='normal',\n",
229 |     "                        kernel_regularizer=kernel_regularizer,\n",
230 |     "                        activation=activation))\n",
231 |     "        if do_dropout:\n",
232 |     "            model.add(Dropout(rate))\n",
233 |     "\n",
234 |     "        # output layer\n",
235 |     "        model.add(Dense(10, activation='softmax'))\n",
236 |     "        \n",
237 |     "        model.compile(optimizer,\n",
238 |     "                      'categorical_crossentropy',\n",
239 |     "                      metrics=['accuracy'])\n",
240 |     "\n",
241 |     "        h = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, verbose=0)\n",
242 |     "        histories.append([h.history['accuracy'], h.history['val_accuracy']])\n",
243 |     "        print(repeat, end=' ')\n",
244 |     "\n",
245 |     "    histories = np.array(histories)\n",
246 |     "    \n",
247 |     "    # calculate mean and standard deviation across repeats:\n",
248 |     "    mean_acc = histories.mean(axis=0)\n",
249 |     "    std_acc = histories.std(axis=0)\n",
250 |     "    print()\n",
251 |     "    \n",
252 |     "    return mean_acc[0], std_acc[0], mean_acc[1], std_acc[1]"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": [
261 |     "mean_acc, std_acc, mean_acc_val, std_acc_val = repeated_training_reg_dropout(X_train,\n",
262 |     "                                                                             y_train,\n",
263 |     "                                                                             X_test,\n",
264 |     "                                                                             y_test,\n",
265 |     "                                                                             do_dropout=False)"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "mean_acc_do, std_acc_do, mean_acc_val_do, std_acc_val_do = repeated_training_reg_dropout(X_train,\n",
275 |     "                                                                                         y_train,\n",
276 |     "                                                                                         X_test,\n",
277 |     "                                                                                         y_test,\n",
278 |     "                                                                                         do_dropout=True)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "def plot_mean_std(m, s):\n",
288 |     "    plt.plot(m)\n",
289 |     "    plt.fill_between(range(len(m)), m-s, m+s, alpha=0.1)"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": [
298 |     "plot_mean_std(mean_acc, std_acc)\n",
299 |     "plot_mean_std(mean_acc_val, std_acc_val)\n",
300 |     "plot_mean_std(mean_acc_do, std_acc_do)\n",
301 |     "plot_mean_std(mean_acc_val_do, std_acc_val_do)\n",
302 |     "plt.ylim(0, 1.01)\n",
303 |     "plt.title(\"Dropout and Regularization Accuracy\")\n",
304 |     "plt.xlabel('Epochs')\n",
305 |     "plt.ylabel('Accuracy')\n",
306 |     "plt.legend(['Train', 'Test', 'Train with Dropout and Regularization', 'Test with Dropout and Regularization'], loc='best')"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "markdown",
311 |    "metadata": {},
312 |    "source": [
313 |     "## Exercise 3\n",
314 |     "\n",
315 |     "This is a very long and complex exercise, that should give you an idea of a real world scenario. Feel free to look at the solution if you feel lost. Also, feel free to run this with a GPU, in which case you don't need to download the data.\n",
316 |     "\n",
317 |     "If you are running this locally, download and unpack the male/female pictures from [here](https://www.dropbox.com/s/nov493om2jmh2gp/male_female.tgz?dl=0). These images and labels were obtained from [Crowdflower](https://www.crowdflower.com/data-for-everyone/).\n",
318 |     "\n",
319 |     "Your goal is to build an image classifier that will recognize the gender of a person from pictures.\n",
320 |     "\n",
321 |     "- Have a look at the directory structure and inspect a couple of pictures\n",
322 |     "- Design a model that will take a color image of size 64x64 as input and return a binary output (female=0/male=1)\n",
323 |     "- Feel free to introduce any regularization technique in your model (Dropout, Batch Normalization, Weight Regularization)\n",
324 |     "- Compile your model with an optimizer of your choice\n",
325 |     "- Using `ImageDataGenerator`, define a train generator that will augment your images with some geometric transformations. Feel free to choose the parameters that make sense to you.\n",
326 |     "- Define also a test generator, whose only purpose is to rescale the pixels by 1./255\n",
327 |     "- use the function `flow_from_directory` to generate batches from the train and test folders. Make sure you set the `target_size` to 64x64.\n",
328 |     "- Use the `model.fit_generator` function to fit the model on the batches generated from the ImageDataGenerator. Since you are streaming and augmenting the data in real time you will have to decide how many batches make an epoch and how many epochs you want to run\n",
329 |     "- Train your model (you should get to at least 85% accuracy)\n",
330 |     "- Once you are satisfied with your training, check a few of the misclassified pictures. Are those sensible errors?"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {},
337 |    "outputs": [],
338 |    "source": [
339 |     "# If you are running this locally\n",
340 |     "# uncomment the next 4 lines to download, extract and set the data path:\n",
341 |     "# !wget 'https://www.dropbox.com/s/nov493om2jmh2gp/male_female.tgz?dl=1' -O ../data/male_female.tgz\n",
342 |     "# data_path = '../data/male_female'\n",
343 |     "# !mkdir -p {data_path}\n",
344 |     "# !tar -xzvf ../data/male_female.tgz --directory {data_path}"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": null,
350 |    "metadata": {},
351 |    "outputs": [],
352 |    "source": [
353 |     "from tensorflow.keras.layers import Conv2D\n",
354 |     "from tensorflow.keras.layers import MaxPooling2D\n",
355 |     "from tensorflow.keras.layers import Flatten\n",
356 |     "from tensorflow.keras.layers import BatchNormalization\n",
357 |     "from itertools import islice\n",
358 |     "from tensorflow.keras.preprocessing.image import ImageDataGenerator"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "K.clear_session()\n",
368 |     "\n",
369 |     "model = Sequential()\n",
370 |     "model.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu'))\n",
371 |     "model.add(MaxPooling2D(pool_size = (2, 2)))\n",
372 |     "model.add(BatchNormalization())\n",
373 |     "\n",
374 |     "model.add(Conv2D(64, (3, 3), activation = 'relu'))\n",
375 |     "model.add(MaxPooling2D(pool_size = (2, 2)))\n",
376 |     "model.add(BatchNormalization())\n",
377 |     "\n",
378 |     "model.add(Conv2D(64, (3, 3), activation = 'relu'))\n",
379 |     "model.add(MaxPooling2D(pool_size = (2, 2)))\n",
380 |     "model.add(BatchNormalization())\n",
381 |     "\n",
382 |     "model.add(Flatten())\n",
383 |     "\n",
384 |     "model.add(Dense(128, activation = 'relu'))\n",
385 |     "model.add(Dense(1, activation = 'sigmoid'))"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "code",
390 |    "execution_count": null,
391 |    "metadata": {},
392 |    "outputs": [],
393 |    "source": [
394 |     "model.compile(optimizer = 'adam',\n",
395 |     "              loss = 'binary_crossentropy',\n",
396 |     "              metrics = ['accuracy'])\n",
397 |     "\n",
398 |     "model.summary()"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": null,
404 |    "metadata": {},
405 |    "outputs": [],
406 |    "source": [
407 |     "train_gen = ImageDataGenerator(rescale = 1./255,\n",
408 |     "                               width_shift_range=0.1,\n",
409 |     "                               height_shift_range=0.1,\n",
410 |     "                               rotation_range = 10,\n",
411 |     "                               shear_range = 0.2,\n",
412 |     "                               zoom_range = 0.2,\n",
413 |     "                               horizontal_flip = True)\n",
414 |     "\n",
415 |     "test_gen = ImageDataGenerator(rescale = 1./255)"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": null,
421 |    "metadata": {},
422 |    "outputs": [],
423 |    "source": [
424 |     "train = train_gen.flow_from_directory(data_path + '/train',\n",
425 |     "                                      target_size = (64, 64),\n",
426 |     "                                      batch_size = 16,\n",
427 |     "                                      class_mode = 'binary')\n",
428 |     "\n",
429 |     "test = test_gen.flow_from_directory(data_path + '/test',\n",
430 |     "                                    target_size = (64, 64),\n",
431 |     "                                    batch_size = 16,\n",
432 |     "                                    class_mode = 'binary')"
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": null,
438 |    "metadata": {},
439 |    "outputs": [],
440 |    "source": [
441 |     "model.fit(train,\n",
442 |     "          steps_per_epoch = 800,\n",
443 |     "          epochs = 200,\n",
444 |     "          validation_data = test,\n",
445 |     "          validation_steps = 200)"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": null,
451 |    "metadata": {},
452 |    "outputs": [],
453 |    "source": [
454 |     "X_test = []\n",
455 |     "y_test = []\n",
456 |     "for ts in islice(test, 50):\n",
457 |     "    X_test.append(ts[0])\n",
458 |     "    y_test.append(ts[1])\n",
459 |     "\n",
460 |     "X_test = np.concatenate(X_test)\n",
461 |     "y_test = np.concatenate(y_test)"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "code",
466 |    "execution_count": null,
467 |    "metadata": {},
468 |    "outputs": [],
469 |    "source": [
470 |     "y_pred = model.predict_classes(X_test).ravel()"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "code",
475 |    "execution_count": null,
476 |    "metadata": {},
477 |    "outputs": [],
478 |    "source": [
479 |     "np.argwhere(y_test != y_pred).ravel()"
480 |    ]
481 |   },
482 |   {
483 |    "cell_type": "code",
484 |    "execution_count": null,
485 |    "metadata": {},
486 |    "outputs": [],
487 |    "source": [
488 |     "plt.imshow(X_test[14])"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": null,
494 |    "metadata": {},
495 |    "outputs": [],
496 |    "source": []
497 |   }
498 |  ],
499 |  "metadata": {
500 |   "kernelspec": {
501 |    "display_name": "Python 3",
502 |    "language": "python",
503 |    "name": "python3"
504 |   },
505 |   "language_info": {
506 |    "codemirror_mode": {
507 |     "name": "ipython",
508 |     "version": 3
509 |    },
510 |    "file_extension": ".py",
511 |    "mimetype": "text/x-python",
512 |    "name": "python",
513 |    "nbconvert_exporter": "python",
514 |    "pygments_lexer": "ipython3",
515 |    "version": "3.7.10"
516 |   }
517 |  },
518 |  "nbformat": 4,
519 |  "nbformat_minor": 2
520 | }
521 | 


--------------------------------------------------------------------------------
/tests/test_nb.py:
--------------------------------------------------------------------------------
 1 | # tests that too long to execute on Travis are temporarily commented out
 2 | # TODO: find a way to fix this
 3 | 
 4 | import subprocess
 5 | import tempfile
 6 | 
 7 | 
 8 | def _exec_notebook(path):
 9 |     with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout:
10 |         args = ["jupyter", "nbconvert", "--to", "notebook", "--execute",
11 |                 "--ExecutePreprocessor.timeout=1000",
12 |                 "--output", fout.name, path]
13 |         subprocess.check_call(args)
14 | 
15 | 
16 | def test_0():
17 |     _exec_notebook('course/0_Check_Environment.ipynb')
18 | 
19 | 
20 | def test_1():
21 |     _exec_notebook('course/1 First Deep Learning Model.ipynb')
22 | 
23 | 
24 | def test_2():
25 |     _exec_notebook('course/2 Data.ipynb')
26 | 
27 | 
28 | def test_3():
29 |     _exec_notebook('course/3 Machine Learning.ipynb')
30 | 
31 | 
32 | def test_4():
33 |     _exec_notebook('course/4 Deep Learning Intro.ipynb')
34 | 
35 | 
36 | def test_5():
37 |     _exec_notebook('course/5 Gradient Descent.ipynb')
38 | 
39 | 
40 | def test_6():
41 |     _exec_notebook('course/6 Convolutional Neural Networks.ipynb')
42 | 
43 | 
44 | def test_8():
45 |     _exec_notebook('course/8 Recurrent Neural Networks.ipynb')
46 | 
47 | 
48 | def test_9():
49 |     _exec_notebook('course/9 Improving performance.ipynb')
50 | 
51 | 
52 | def test_2_sol():
53 |     _exec_notebook('solutions/2 Data exploration Exercises Solution.ipynb')
54 | 
55 | 
56 | def test_3_sol():
57 |     _exec_notebook('solutions/3 Machine Learning Exercises Solution.ipynb')
58 | 
59 | 
60 | def test_4_sol():
61 |     _exec_notebook('solutions/4 Deep Learning Intro Exercises Solution.ipynb')
62 | 
63 | 
64 | def test_5_sol():
65 |     _exec_notebook('solutions/5 Gradient Descent Exercises Solution.ipynb')
66 | 
67 | 
68 | def test_6_sol():
69 |     _exec_notebook('solutions/6 Convolutional Neural Networks Exercises Solution.ipynb')
70 | 
71 | 
72 | def test_8_sol():
73 |     _exec_notebook('solutions/8 Recurrent Neural Networks Exercises Solutions.ipynb')
74 | 
75 | 
76 | def test_9_sol():
77 |     _exec_notebook('solutions/9 Improving performance Exercises Solutions.ipynb')
78 | 


--------------------------------------------------------------------------------