├── .gitignore
├── .ipynb_checkpoints
    └── 09. Neural Networks Advanced - Python-checkpoint.ipynb
├── 01. Introduction to Jupyter Notebooks and Data - Python.ipynb
├── 02. Linear Regression - Python.ipynb
├── 03. Multiple Linear Regression - Python.ipynb
├── 04. Polynomial Regression - Python.ipynb
├── 05. Logistic Regression - Python.ipynb
├── 06. Support Vector Machines - Python.ipynb
├── 07. Advanced SVMs - Python.ipynb
├── 08. Neural Networks Introduction - Python.ipynb
├── 09. Neural Networks Advanced - Python.ipynb
├── 10. Convolutional Neural Networks - Python.ipynb
├── 11. Recurrent Neural Networks - Python.ipynb
├── 12. Clustering - Python.ipynb
├── CODE_OF_CONDUCT.md
├── Data
    ├── Arthur tales.txt
    ├── PrionData.csv
    ├── The Time Machine.txt
    ├── chocolate data multiple linear regression.txt
    ├── chocolate data.txt
    ├── dog_data.csv
    ├── football data.txt
    ├── football_data.csv
    ├── traffic_by_hour.csv
    └── trees.csv
├── LICENSE
├── Models
    └── arthur-model-epoch-30.hdf5
├── README.md
└── SECURITY.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.suo
  8 | *.user
  9 | *.userosscache
 10 | *.sln.docstates
 11 | 
 12 | # User-specific files (MonoDevelop/Xamarin Studio)
 13 | *.userprefs
 14 | 
 15 | # Build results
 16 | [Dd]ebug/
 17 | [Dd]ebugPublic/
 18 | [Rr]elease/
 19 | [Rr]eleases/
 20 | x64/
 21 | x86/
 22 | bld/
 23 | [Bb]in/
 24 | [Oo]bj/
 25 | [Ll]og/
 26 | 
 27 | # Visual Studio 2015/2017 cache/options directory
 28 | .vs/
 29 | # Uncomment if you have tasks that create the project's static files in wwwroot
 30 | #wwwroot/
 31 | 
 32 | # Visual Studio 2017 auto generated files
 33 | Generated\ Files/
 34 | 
 35 | # MSTest test Results
 36 | [Tt]est[Rr]esult*/
 37 | [Bb]uild[Ll]og.*
 38 | 
 39 | # NUNIT
 40 | *.VisualState.xml
 41 | TestResult.xml
 42 | 
 43 | # Build Results of an ATL Project
 44 | [Dd]ebugPS/
 45 | [Rr]eleasePS/
 46 | dlldata.c
 47 | 
 48 | # Benchmark Results
 49 | BenchmarkDotNet.Artifacts/
 50 | 
 51 | # .NET Core
 52 | project.lock.json
 53 | project.fragment.lock.json
 54 | artifacts/
 55 | **/Properties/launchSettings.json
 56 | 
 57 | # StyleCop
 58 | StyleCopReport.xml
 59 | 
 60 | # Files built by Visual Studio
 61 | *_i.c
 62 | *_p.c
 63 | *_i.h
 64 | *.ilk
 65 | *.meta
 66 | *.obj
 67 | *.iobj
 68 | *.pch
 69 | *.pdb
 70 | *.ipdb
 71 | *.pgc
 72 | *.pgd
 73 | *.rsp
 74 | *.sbr
 75 | *.tlb
 76 | *.tli
 77 | *.tlh
 78 | *.tmp
 79 | *.tmp_proj
 80 | *.log
 81 | *.vspscc
 82 | *.vssscc
 83 | .builds
 84 | *.pidb
 85 | *.svclog
 86 | *.scc
 87 | 
 88 | # Chutzpah Test files
 89 | _Chutzpah*
 90 | 
 91 | # Visual C++ cache files
 92 | ipch/
 93 | *.aps
 94 | *.ncb
 95 | *.opendb
 96 | *.opensdf
 97 | *.sdf
 98 | *.cachefile
 99 | *.VC.db
100 | *.VC.VC.opendb
101 | 
102 | # Visual Studio profiler
103 | *.psess
104 | *.vsp
105 | *.vspx
106 | *.sap
107 | 
108 | # Visual Studio Trace Files
109 | *.e2e
110 | 
111 | # TFS 2012 Local Workspace
112 | $tf/
113 | 
114 | # Guidance Automation Toolkit
115 | *.gpState
116 | 
117 | # ReSharper is a .NET coding add-in
118 | _ReSharper*/
119 | *.[Rr]e[Ss]harper
120 | *.DotSettings.user
121 | 
122 | # JustCode is a .NET coding add-in
123 | .JustCode
124 | 
125 | # TeamCity is a build add-in
126 | _TeamCity*
127 | 
128 | # DotCover is a Code Coverage Tool
129 | *.dotCover
130 | 
131 | # AxoCover is a Code Coverage Tool
132 | .axoCover/*
133 | !.axoCover/settings.json
134 | 
135 | # Visual Studio code coverage results
136 | *.coverage
137 | *.coveragexml
138 | 
139 | # NCrunch
140 | _NCrunch_*
141 | .*crunch*.local.xml
142 | nCrunchTemp_*
143 | 
144 | # MightyMoose
145 | *.mm.*
146 | AutoTest.Net/
147 | 
148 | # Web workbench (sass)
149 | .sass-cache/
150 | 
151 | # Installshield output folder
152 | [Ee]xpress/
153 | 
154 | # DocProject is a documentation generator add-in
155 | DocProject/buildhelp/
156 | DocProject/Help/*.HxT
157 | DocProject/Help/*.HxC
158 | DocProject/Help/*.hhc
159 | DocProject/Help/*.hhk
160 | DocProject/Help/*.hhp
161 | DocProject/Help/Html2
162 | DocProject/Help/html
163 | 
164 | # Click-Once directory
165 | publish/
166 | 
167 | # Publish Web Output
168 | *.[Pp]ublish.xml
169 | *.azurePubxml
170 | # Note: Comment the next line if you want to checkin your web deploy settings,
171 | # but database connection strings (with potential passwords) will be unencrypted
172 | *.pubxml
173 | *.publishproj
174 | 
175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
176 | # checkin your Azure Web App publish settings, but sensitive information contained
177 | # in these scripts will be unencrypted
178 | PublishScripts/
179 | 
180 | # NuGet Packages
181 | *.nupkg
182 | # The packages folder can be ignored because of Package Restore
183 | **/[Pp]ackages/*
184 | # except build/, which is used as an MSBuild target.
185 | !**/[Pp]ackages/build/
186 | # Uncomment if necessary however generally it will be regenerated when needed
187 | #!**/[Pp]ackages/repositories.config
188 | # NuGet v3's project.json files produces more ignorable files
189 | *.nuget.props
190 | *.nuget.targets
191 | 
192 | # Microsoft Azure Build Output
193 | csx/
194 | *.build.csdef
195 | 
196 | # Microsoft Azure Emulator
197 | ecf/
198 | rcf/
199 | 
200 | # Windows Store app package directories and files
201 | AppPackages/
202 | BundleArtifacts/
203 | Package.StoreAssociation.xml
204 | _pkginfo.txt
205 | *.appx
206 | 
207 | # Visual Studio cache files
208 | # files ending in .cache can be ignored
209 | *.[Cc]ache
210 | # but keep track of directories ending in .cache
211 | !*.[Cc]ache/
212 | 
213 | # Others
214 | ClientBin/
215 | ~$*
216 | *~
217 | *.dbmdl
218 | *.dbproj.schemaview
219 | *.jfm
220 | *.pfx
221 | *.publishsettings
222 | orleans.codegen.cs
223 | 
224 | # Including strong name files can present a security risk 
225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
226 | #*.snk
227 | 
228 | # Since there are multiple workflows, uncomment next line to ignore bower_components
229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
230 | #bower_components/
231 | 
232 | # RIA/Silverlight projects
233 | Generated_Code/
234 | 
235 | # Backup & report files from converting an old project file
236 | # to a newer Visual Studio version. Backup files are not needed,
237 | # because we have git ;-)
238 | _UpgradeReport_Files/
239 | Backup*/
240 | UpgradeLog*.XML
241 | UpgradeLog*.htm
242 | ServiceFabricBackup/
243 | *.rptproj.bak
244 | 
245 | # SQL Server files
246 | *.mdf
247 | *.ldf
248 | *.ndf
249 | 
250 | # Business Intelligence projects
251 | *.rdl.data
252 | *.bim.layout
253 | *.bim_*.settings
254 | *.rptproj.rsuser
255 | 
256 | # Microsoft Fakes
257 | FakesAssemblies/
258 | 
259 | # GhostDoc plugin setting file
260 | *.GhostDoc.xml
261 | 
262 | # Node.js Tools for Visual Studio
263 | .ntvs_analysis.dat
264 | node_modules/
265 | 
266 | # Visual Studio 6 build log
267 | *.plg
268 | 
269 | # Visual Studio 6 workspace options file
270 | *.opt
271 | 
272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
273 | *.vbw
274 | 
275 | # Visual Studio LightSwitch build output
276 | **/*.HTMLClient/GeneratedArtifacts
277 | **/*.DesktopClient/GeneratedArtifacts
278 | **/*.DesktopClient/ModelManifest.xml
279 | **/*.Server/GeneratedArtifacts
280 | **/*.Server/ModelManifest.xml
281 | _Pvt_Extensions
282 | 
283 | # Paket dependency manager
284 | .paket/paket.exe
285 | paket-files/
286 | 
287 | # FAKE - F# Make
288 | .fake/
289 | 
290 | # JetBrains Rider
291 | .idea/
292 | *.sln.iml
293 | 
294 | # CodeRush
295 | .cr/
296 | 
297 | # Python Tools for Visual Studio (PTVS)
298 | __pycache__/
299 | *.pyc
300 | 
301 | # Cake - Uncomment if you are using it
302 | # tools/**
303 | # !tools/packages.config
304 | 
305 | # Tabs Studio
306 | *.tss
307 | 
308 | # Telerik's JustMock configuration file
309 | *.jmconfig
310 | 
311 | # BizTalk build output
312 | *.btp.cs
313 | *.btm.cs
314 | *.odx.cs
315 | *.xsd.cs
316 | 
317 | # OpenCover UI analysis results
318 | OpenCover/
319 | 
320 | # Azure Stream Analytics local run output 
321 | ASALocalRun/
322 | 
323 | # MSBuild Binary and Structured Log
324 | *.binlog
325 | 
326 | # NVidia Nsight GPU debugger configuration file
327 | *.nvuser
328 | 
329 | # MFractors (Xamarin productivity tool) working folder 
330 | .mfractor/
331 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/09. Neural Networks Advanced - Python-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Exercise 9 - Advanced Neural Networks\n",
  8 |     "==========\n",
  9 |     "\n",
 10 |     "There are many factors that influence how well a neural network might perform. AI practitioners tend to play around with the structure of the hidden layers, the activation functions used, and the optimisation function.\n",
 11 |     "\n",
 12 |     "In this exercise we will look at how changing these parameters impacts the accuracy performance of our network."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "Step 1\n",
 20 |     "------\n",
 21 |     "\n",
 22 |     "In this exercise we will use the same dog dataset as in exercise 8, building on what we learnt before and trying different parameters for a network to try and improve performance.\n",
 23 |     "\n",
 24 |     "Let's start by opening up our data set and setting up our train and test sets.\n",
 25 |     "\n",
 26 |     "#### __Run the code__ below."
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# Run this!\n",
 36 |     "\n",
 37 |     "# Here we set a randomisation seed for replicatability.\n",
 38 |     "import os\n",
 39 |     "os.environ['PYTHONHASHSEED'] = '0'\n",
 40 |     "seed = 6\n",
 41 |     "import random as rn\n",
 42 |     "rn.seed(seed)\n",
 43 |     "import numpy as np\n",
 44 |     "np.random.seed(seed)\n",
 45 |     "\n",
 46 |     "import warnings\n",
 47 |     "warnings.filterwarnings(\"ignore\")\n",
 48 |     "\n",
 49 |     "from keras import backend as K\n",
 50 |     "import keras\n",
 51 |     "\n",
 52 |     "print('keras using %s backend'%keras.backend.backend())\n",
 53 |     "import pandas as pd\n",
 54 |     "from sklearn.preprocessing import OneHotEncoder\n",
 55 |     "# Sets up the graphing configuration\n",
 56 |     "import matplotlib.pyplot as graph\n",
 57 |     "%matplotlib inline\n",
 58 |     "graph.rcParams['figure.figsize'] = (15,5)\n",
 59 |     "graph.rcParams[\"font.family\"] = 'DejaVu Sans'\n",
 60 |     "graph.rcParams[\"font.size\"] = '12'\n",
 61 |     "graph.rcParams['image.cmap'] = 'rainbow'"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# Run this too!\n",
 71 |     "# This gets our data ready\n",
 72 |     "\n",
 73 |     "# Load the data\n",
 74 |     "dataset = pd.read_csv('Data/dog_data.csv')\n",
 75 |     "\n",
 76 |     "# Separate out the features\n",
 77 |     "features = dataset.drop(['breed'], axis = 1)\n",
 78 |     "\n",
 79 |     "# Sets the target one-hot vectors\n",
 80 |     "target = OneHotEncoder(sparse = False).fit_transform(np.transpose([dataset['breed']]))\n",
 81 |     "\n",
 82 |     "# Take the first 4/5 of the data and assign it to training\n",
 83 |     "train_X = features.values[:160]\n",
 84 |     "train_Y = target[:160]\n",
 85 |     "\n",
 86 |     "# Take the last 1/5 of the data and assign it to testing\n",
 87 |     "test_X = features.values[160:]\n",
 88 |     "test_Y = target[160:]"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "Step 2\n",
 96 |     "------\n",
 97 |     "\n",
 98 |     "The box below contains methods to help us quickly change the structure. Don't edit them - just run the box.\n",
 99 |     "\n",
100 |     "The __train_network__ method allows us to change:\n",
101 |     "* the number of layers\n",
102 |     "* the activation functions the layers use\n",
103 |     "* the optimizer of the model\n",
104 |     "* the number of training cycles for the model (__epochs__)\n",
105 |     "\n",
106 |     "The plot_acc and bar_acc just plot our models so we can easily see how well they do.\n",
107 |     "\n",
108 |     "Don't worry about the code - it is simply to make the next steps easier.\n",
109 |     "\n",
110 |     "#### __Run the code__ below."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 1,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "# Run this!\n",
120 |     "# Below are a few helper methods. Do not edit these.\n",
121 |     "\n",
122 |     "def train_network(structure, activation, optimizer, epochs):\n",
123 |     "    \n",
124 |     "    os.environ['PYTHONHASHSEED'] = '0'\n",
125 |     "    rn.seed(seed)\n",
126 |     "    np.random.seed(seed)\n",
127 |     "    \n",
128 |     "    # This initialises the model\n",
129 |     "    model = keras.models.Sequential()\n",
130 |     "    \n",
131 |     "    # This is our input + the first hidden layer 1\n",
132 |     "    model.add(keras.layers.Dense(units = structure[1], input_dim = structure[0], activation = activation)) \n",
133 |     "    \n",
134 |     "    # Hidden layer 2, if not ignored (of size 0)\n",
135 |     "    if structure[2] > 0:\n",
136 |     "        model.add(keras.layers.Dense(units = structure[2], activation = activation))\n",
137 |     "        \n",
138 |     "    # Output layer\n",
139 |     "    model.add(keras.layers.Dense(units=structure[-1], activation = \"softmax\"))\n",
140 |     "    \n",
141 |     "    # Compiles the model with parameters\n",
142 |     "    model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])\n",
143 |     "    \n",
144 |     "    # This tells the us training has started, so we know that it's actually running\n",
145 |     "    print('training... ', end = '')\n",
146 |     "\n",
147 |     "    # This trains the network\n",
148 |     "    training_stats = model.fit(train_X, train_Y, batch_size = 1, epochs = epochs, verbose = 0, shuffle = False)\n",
149 |     "    \n",
150 |     "    # Results!\n",
151 |     "    print('train_acc: %0.3f, test_acc: %0.3f' %(training_stats.history['accuracy'][-1], \n",
152 |     "                                                model.evaluate(test_X, test_Y, verbose = 0)[1]))\n",
153 |     "    \n",
154 |     "    # This returns the results and the model for use outside the function\n",
155 |     "    return training_stats, model\n",
156 |     "\n",
157 |     "# Plots our evaluations in a line graph to see how they compare\n",
158 |     "def plot_acc(train_acc, test_acc, title):\n",
159 |     "    # Plots the training and testing accuracy lines\n",
160 |     "    training_accuracy, = graph.plot(train_acc, label = 'Training Accuracy')\n",
161 |     "    testing_accuracy, = graph.plot(test_acc, label = 'Testing Accuracy')\n",
162 |     "    graph.legend(handles = [training_accuracy, testing_accuracy])\n",
163 |     "    \n",
164 |     "    # Plots guide lines along y = 0 and y = 1 to help visualise\n",
165 |     "    xp = np.linspace(0, train_acc.shape[0] - 1, 10 * train_acc.shape[0])\n",
166 |     "    graph.plot(xp, np.full(xp.shape, 1), c = 'k', linestyle = ':', alpha = 0.5)\n",
167 |     "    graph.plot(xp, np.full(xp.shape, 0), c = 'k', linestyle = ':', alpha = 0.5)\n",
168 |     "    \n",
169 |     "    graph.xticks(range(0, train_acc.shape[0]), range(1, train_acc.shape[0] + 1))\n",
170 |     "    graph.ylim(0,1)\n",
171 |     "    graph.title(title)\n",
172 |     "    \n",
173 |     "    graph.show()\n",
174 |     "\n",
175 |     "# Plots our evaluations in a bar chart to see how they compare\n",
176 |     "def bar_acc(train_acc, test_acc, title, xticks):\n",
177 |     "    index = range(1, train_acc.shape[0] + 1)\n",
178 |     "    \n",
179 |     "    # Plots the training and testing accuracy bars\n",
180 |     "    training_accuracy = graph.bar(index, train_acc, 0.4, align = 'center')\n",
181 |     "    testing_accuracy = graph.bar(index, test_acc, 0.4, align = 'edge')\n",
182 |     "    graph.legend((training_accuracy[0], testing_accuracy[0]), ('Training Accuracy', 'Testing Accuracy'))\n",
183 |     "    \n",
184 |     "    graph.xticks(index, xticks)\n",
185 |     "    graph.title(title)\n",
186 |     "    \n",
187 |     "    graph.show()"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "Step 3\n",
195 |     "------\n",
196 |     "\n",
197 |     "Let's first look at how different layer sizes impact performance.\n",
198 |     "\n",
199 |     "Let's look at a network with just one hidden layer. We'll see how it performs with 1 to 10 nodes.\n",
200 |     "\n",
201 |     "### In the cell below replace:\n",
202 |     "#### 1. `<addHidden1>` with `hidden1`\n",
203 |     "#### 2. `<addTrainAcc>` with `train_acc`\n",
204 |     "#### 3. `<addTestAcc>` with `test_acc`\n",
205 |     "#### and then __run the code__."
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "# Initialises empty arrays into which to append new values.\n",
215 |     "train_acc = np.empty((0))\n",
216 |     "test_acc = np.empty((0))\n",
217 |     "\n",
218 |     "for hidden1 in range (1,11):\n",
219 |     "    print('Evaluating model with %i hidden neurons... ' %hidden1, end = '')\n",
220 |     "\n",
221 |     "###\n",
222 |     "# REPLACE <addHidden1> BELOW WITH hidden1\n",
223 |     "###\n",
224 |     "    training_stats, model = train_network(structure = [3, <addHidden1>, <addHidden1>, 3], \n",
225 |     "                                          activation = 'relu', optimizer = 'RMSprop', epochs = 12)\n",
226 |     "###\n",
227 |     "    \n",
228 |     "    train_acc = np.append(train_acc, training_stats.history['accuracy'][-1])\n",
229 |     "    test_acc = np.append(test_acc, model.evaluate(test_X, test_Y, verbose = 0)[1])\n",
230 |     "\n",
231 |     "###\n",
232 |     "# REPLACE <addTrainAcc> WITH train_acc AND <addTestAcc> WITH test_acc\n",
233 |     "###\n",
234 |     "plot_acc(<addTrainAcc>, <addTestAcc>, 'hidden layer size performance comparison')\n",
235 |     "###"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "So, experimenting with different sizes of hidden layers can dramatically improve your results.\n",
243 |     "\n",
244 |     "Step 4\n",
245 |     "------\n",
246 |     "\n",
247 |     "Now we'll look at how different activation functions impact the performance.\n",
248 |     "\n",
249 |     "There's lots we will try, just remember it is common to try both `relu` and `tanh` first.\n",
250 |     "\n",
251 |     "### In the cell below replace:\n",
252 |     "#### 1. `<addActivation>` with `activation`\n",
253 |     "#### 2. `<addActivationFunctions>` with `activation_functions`\n",
254 |     "#### and then __run the code__."
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": null,
260 |    "metadata": {
261 |     "scrolled": false
262 |    },
263 |    "outputs": [],
264 |    "source": [
265 |     "train_acc = np.empty((0))\n",
266 |     "test_acc = np.empty((0))\n",
267 |     "\n",
268 |     "# Makes a list of the activation functions we wish to compare\n",
269 |     "activation_functions = ['elu', 'selu', 'relu', 'tanh', 'sigmoid', \n",
270 |     "                        'hard_sigmoid', 'softplus', 'softsign', 'linear']\n",
271 |     "\n",
272 |     "for activation in activation_functions:\n",
273 |     "    print('Evaluating model with %s hidden layer activation function... ' %activation, end = '')\n",
274 |     "\n",
275 |     "###\n",
276 |     "# REPLACE <addActivation> WITH activation\n",
277 |     "###\n",
278 |     "    training_stats, model = train_network(structure = [3, 4, 2, 3],\n",
279 |     "                                          activation = <addActivation>, optimizer = 'RMSprop', epochs = 12)\n",
280 |     "###\n",
281 |     "    \n",
282 |     "    train_acc = np.append(train_acc, training_stats.history['accuracy'][-1])\n",
283 |     "    test_acc = np.append(test_acc, model.evaluate(test_X, test_Y, verbose=0)[1])\n",
284 |     "    \n",
285 |     "###\n",
286 |     "# REPLACE THE <addActivationFunctions> BELOW WITH activation_functions\n",
287 |     "###\n",
288 |     "bar_acc(train_acc, test_acc, 'activation function performance comparison using (4,2) hidden layer', <addActivationFunctions>)\n",
289 |     "###"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "There's quite a lot of variance there. It's always good to quickly test different activation functions first.\n",
297 |     "\n",
298 |     "Next, lets try changing the shape of the hidden layers.\n",
299 |     "\n",
300 |     "#### Replace `<updateHere>`'s with `3` and run the code."
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {},
307 |    "outputs": [],
308 |    "source": [
309 |     "train_acc = np.empty((0))\n",
310 |     "test_acc = np.empty((0))\n",
311 |     "\n",
312 |     "activation_functions = ['elu', 'selu', 'relu', 'tanh', 'sigmoid',\n",
313 |     "                        'hard_sigmoid', 'softplus', 'softsign', 'linear']\n",
314 |     "\n",
315 |     "for activation in activation_functions:\n",
316 |     "    print('Evaluating model with %s hidden layer activation function... ' %activation, end='')\n",
317 |     "    \n",
318 |     "\n",
319 |     "# The value you choose for <updateHere> below will change the size of the hidden layers. Lets try changing them both to 3 for now\n",
320 |     "# (but you can have a play around with different numbers if you want)\n",
321 |     "###\n",
322 |     "# REPLACE THE <updateHere>'s BELOW WITH 3\n",
323 |     "###\n",
324 |     "    training_stats, model = train_network(structure = [3, <updateHere>, <updateHere>, 3], \n",
325 |     "                                          activation = activation, optimizer = 'RMSprop', epochs = 12)\n",
326 |     "###\n",
327 |     "    \n",
328 |     "    train_acc = np.append(train_acc, training_stats.history['accuracy'][-1])\n",
329 |     "    test_acc = np.append(test_acc, model.evaluate(test_X, test_Y, verbose=0)[1])\n",
330 |     "    \n",
331 |     "bar_acc(train_acc, test_acc, 'activation function performance comparison using (3,3) hidden layer', activation_functions)"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "markdown",
336 |    "metadata": {},
337 |    "source": [
338 |     "Step 5\n",
339 |     "-----\n",
340 |     "\n",
341 |     "The __optimisation function__ is the last major parameter of the network architecture. It changes how the network is trained - so it can have a __very large impact on training time and end performance__.\n",
342 |     "\n",
343 |     "Note: this step won't always provide the same results every time it is run. Optimizers such as SGD will give different results.\n",
344 |     "\n",
345 |     "#### Replace `<addOptimizer>` with `optimizer` and run the code."
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": null,
351 |    "metadata": {},
352 |    "outputs": [],
353 |    "source": [
354 |     "train_acc = np.empty((0))\n",
355 |     "test_acc = np.empty((0))\n",
356 |     "\n",
357 |     "# This is a list of the optimisation functions for us to compare\n",
358 |     "optimization_functions = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta',\n",
359 |     "                          'Adam', 'Adamax', 'Nadam']\n",
360 |     "\n",
361 |     "for optimizer in optimization_functions:\n",
362 |     "    print('Evaluating model with %s optimizer... ' %optimizer, end='')\n",
363 |     "    \n",
364 |     "    \n",
365 |     "# The <addOptimizer> below is where we specify the optimizer in the code    \n",
366 |     "###\n",
367 |     "# REPLACE THE <addOptimizer> BELOW WITH optimizer\n",
368 |     "###\n",
369 |     "    training_stats, model = train_network(structure = [3, 4, 2, 3],\n",
370 |     "                                          activation = 'relu', optimizer = <addOptimizer>, epochs = 12)\n",
371 |     "###\n",
372 |     "\n",
373 |     "# This is recording our data for the plot\n",
374 |     "    train_acc = np.append(train_acc, training_stats.history['accuracy'][-1])\n",
375 |     "    test_acc = np.append(test_acc, model.evaluate(test_X, test_Y, verbose=0)[1])\n",
376 |     "\n",
377 |     "# And now, the plot!    \n",
378 |     "bar_acc(train_acc, test_acc, 'optimizer performance comparison using (4,2) hidden layer', optimization_functions)"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "markdown",
383 |    "metadata": {},
384 |    "source": [
385 |     "Step 6\n",
386 |     "-------\n",
387 |     "\n",
388 |     "Let's try to combine what we've seen above and try to create a neural network that performs better than what we made in exercise 7, where we used the structure `[3,4,2,3]`, the activation function `relu`, and the optimiser `SGD` (Stochastic Gradient Descent).\n",
389 |     "\n",
390 |     "### In the cell below replace:\n",
391 |     "#### 1. `<layerSize>`'s with numbers of your choice (how many nodes the hidden layers will have)\n",
392 |     "#### 2. `<activationFunction>` with one of the following: `'relu'`, `'softsign'`, `'tanh'`, `'elu'`, `'selu'`, `'softplus'`, `'linear'`\n",
393 |     "#### 3. `<optimiser>` with one of the following: `'SGD'`, `'adam'`, `'RMSprop'`, `'Adagrad'`, `'Adadelta'`, `'Adamax'`, `'Nadam'`\n",
394 |     "#### and then __run the code__."
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "###\n",
404 |     "# REPLACE THE <layerSize>'s' BELOW WITH PARAMETERS TO TEST A NEW NEURAL NETWORK e.g. 4 and 2\n",
405 |     "###\n",
406 |     "structure = [3, <layerSize>, <layerSize>, 3]\n",
407 |     "###\n",
408 |     "\n",
409 |     "###\n",
410 |     "# REPLACE <activationFunction> WITH ONE OF THE FOLLOWING: 'relu', 'softsign', 'tanh', 'elu', 'selu', 'softplus', 'linear'\n",
411 |     "###\n",
412 |     "activation = <activationFunction>\n",
413 |     "###\n",
414 |     "\n",
415 |     "###\n",
416 |     "# REPLACE <optimiser> WITH ONE OF THE FOLLOWING: 'SGD', 'adam', 'RMSprop', 'Adagrad', 'Adadelta', 'Adamax', 'Nadam'\n",
417 |     "###\n",
418 |     "optimizer = <optimiser>\n",
419 |     "###\n",
420 |     "\n",
421 |     "training_stats, model = train_network(structure, activation, optimizer, epochs = 24)\n",
422 |     "\n",
423 |     "# We can plot our training statistics to see how it developed over time\n",
424 |     "accuracy, = graph.plot(training_stats.history['accuracy'], label = 'Accuracy')\n",
425 |     "training_loss, = graph.plot(training_stats.history['loss'], label = 'Training Loss')\n",
426 |     "graph.legend(handles = [accuracy, training_loss])\n",
427 |     "loss = np.array(training_stats.history['loss'])\n",
428 |     "xp = np.linspace(0, loss.shape[0], 10 * loss.shape[0])\n",
429 |     "graph.plot(xp, np.full(xp.shape, 1), c = 'k', linestyle = ':', alpha = 0.5)\n",
430 |     "graph.plot(xp, np.full(xp.shape, 0), c = 'k', linestyle = ':', alpha = 0.5)\n",
431 |     "graph.show()"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "markdown",
436 |    "metadata": {},
437 |    "source": [
438 |     "How does it look? Were we able to beat the other network? Try out a number of different configurations to see how they perform!"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "markdown",
443 |    "metadata": {},
444 |    "source": [
445 |     "Conclusion\n",
446 |     "-------\n",
447 |     "\n",
448 |     "We've compared how different neural network architecture parameters influence accuracy performance, and we've tried to combine them in such a way that we maximise this performance."
449 |    ]
450 |   }
451 |  ],
452 |  "metadata": {
453 |   "kernelspec": {
454 |    "display_name": "Python 3",
455 |    "language": "python",
456 |    "name": "python3"
457 |   },
458 |   "language_info": {
459 |    "codemirror_mode": {
460 |     "name": "ipython",
461 |     "version": 3
462 |    },
463 |    "file_extension": ".py",
464 |    "mimetype": "text/x-python",
465 |    "name": "python",
466 |    "nbconvert_exporter": "python",
467 |    "pygments_lexer": "ipython3",
468 |    "version": "3.7.3"
469 |   }
470 |  },
471 |  "nbformat": 4,
472 |  "nbformat_minor": 2
473 | }
474 | 


--------------------------------------------------------------------------------
/01. Introduction to Jupyter Notebooks and Data - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {},
  5 |       "cell_type": "markdown",
  6 |       "source": "# Welcome to Azure Notebooks!\n\nPython is a free, open source programming language which is extremely popular for statistical analysis and AI.\n\nHere, we will give you a taste of what using python is like.\n\nLet's get started. We’ve provided the data for you, and cleaned it up so it’s ready for analysis. You can __move through the steps by clicking on the run button__ just above this notebook."
  7 |     },
  8 |     {
  9 |       "metadata": {},
 10 |       "cell_type": "markdown",
 11 |       "source": "Exercise 1 - Introduction To Jupyter Notebooks\n==========================\n\nThe purpose of this exercise is to get you familiar with using Jupyter Notebooks. Don't worry if you find the coding difficult - this is not a Python course. You will slowly learn more as you go and you definitely don't need to understand every line of code.\n\nStep 1\n--------\n\nThese notebooks contain places where you can execute code, like below.\n\nGive it a go. Click on the code below, then press `Run` in the toolbar above (or press __Shift+Enter__) to run the code."
 12 |     },
 13 |     {
 14 |       "metadata": {
 15 |         "trusted": true
 16 |       },
 17 |       "cell_type": "code",
 18 |       "source": "print(\"The code ran successfully!\")",
 19 |       "execution_count": null,
 20 |       "outputs": []
 21 |     },
 22 |     {
 23 |       "metadata": {},
 24 |       "cell_type": "markdown",
 25 |       "source": "If all went well, the code should have printed a message for you.\n\nAt the start of most programming exercises we have to load things to help us do things easily, like creating graphs. \n\nClick on the code below, then __hit the `Run` button to load graphing capabilities for later in the exercise__."
 26 |     },
 27 |     {
 28 |       "metadata": {
 29 |         "trusted": true
 30 |       },
 31 |       "cell_type": "code",
 32 |       "source": "import warnings\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as graph",
 33 |       "execution_count": null,
 34 |       "outputs": []
 35 |     },
 36 |     {
 37 |       "metadata": {
 38 |         "slideshow": {
 39 |           "slide_type": "slide"
 40 |         }
 41 |       },
 42 |       "cell_type": "markdown",
 43 |       "source": "Step 2\n--------\n\nLet's get it to print a message you choose this time. \n\n#### Below, write a message between the quotation marks then run the cell.\n\nIt is okay to use spaces, numbers, or letters. Your message should look red. For example, `print(\"this is my message\")`."
 44 |     },
 45 |     {
 46 |       "metadata": {
 47 |         "trusted": true
 48 |       },
 49 |       "cell_type": "code",
 50 |       "source": "###\n# WRITE A MESSAGE BETWEEN THE SPEECH MARKS IN THE LINE BELOW, THEN HIT RUN.\n###\nprint(\"type something here!\")\n###\n\n# It's ok to use spaces, numbers, or letters. Your message should look red.\n# For example: print(\"this is my message\")",
 51 |       "execution_count": null,
 52 |       "outputs": []
 53 |     },
 54 |     {
 55 |       "metadata": {},
 56 |       "cell_type": "markdown",
 57 |       "source": "You will notice hash symbols (`#`). Anything after a `#` is ignored by the computer. This lets us leave notes for you to read so that you understand the code better."
 58 |     },
 59 |     {
 60 |       "metadata": {},
 61 |       "cell_type": "markdown",
 62 |       "source": "Step 3\n--------\n\nPython lets us save things and use them later. In this exercise we will save your message"
 63 |     },
 64 |     {
 65 |       "metadata": {
 66 |         "trusted": true
 67 |       },
 68 |       "cell_type": "code",
 69 |       "source": "###\n# WRITE A MESSAGE BETWEEN THE SPEECH MARKS IN THE LINE BELOW, THEN PRESS RUN\n###\nmy_message = \"\"\n###\n\nprint(my_message) ",
 70 |       "execution_count": null,
 71 |       "outputs": []
 72 |     },
 73 |     {
 74 |       "metadata": {},
 75 |       "cell_type": "markdown",
 76 |       "source": "Okay, what's happened here? \n\nIn the real world we might put something in an envelope (like a letter, or picture). On the envelope we write something (give it a name), like \"my_letter_for_alice\".\n\nIn a computer, we do something similar. The thing that holds information (like the envelope) is called a **variable**. We also give each one a name. \n\nActually, you've already done this.\n\nFirst, you made a message, then you saved it to a **variable** called 'my_message':\n```\nmy_message = \"this is my message!\"\n              ↑↑↑\n              the message you made\n \nmy_message = \"this is my message!\"\n          ↑↑↑\n          the equals sign means to save it to the variable on the left\n     \nmy_message = \"this is my message!\"\n↑↑↑\nthis is the name of your variable. They must never have spaces in them.\n```"
 77 |     },
 78 |     {
 79 |       "metadata": {},
 80 |       "cell_type": "markdown",
 81 |       "source": "Step 4\n-------\n\nLet's try using variables again, but save a number inside our variable this time. Remember, the variable is on the *left hand side* of the `=` assignment symbol and is the equivalent of a labelled box. The information on the *right hand side* is the information we want to store inside the variable (or a box in our analogy).\n\n#### In the cell below replace `<addNumber>` with any number you choose.\n\nThen __run the code__."
 82 |     },
 83 |     {
 84 |       "metadata": {
 85 |         "trusted": true
 86 |       },
 87 |       "cell_type": "code",
 88 |       "source": "###\n# REPLACE <addNumber> BELOW WITH ANY NUMBER\n###\nmy_first_number = <addNumber>\n###\n\nprint(my_first_number)\nprint(my_first_number)",
 89 |       "execution_count": null,
 90 |       "outputs": []
 91 |     },
 92 |     {
 93 |       "metadata": {},
 94 |       "cell_type": "markdown",
 95 |       "source": "What happened here?\n\nIn the real world, we might then do something with this information. For example, we might choose to read it. We can read it as many times as we like.\n\nOn the computer, we can also do things with this information. Here, you asked the computer to print the message to the screen twice.\n\n```\nprint(my_first_number) \nprint(my_first_number)\n```"
 96 |     },
 97 |     {
 98 |       "metadata": {},
 99 |       "cell_type": "markdown",
100 |       "source": "How did you do this though?\n\n```\nprint(....)\n↑↑↑\n```\nthis is what you are asking the computer to do. It is a **method** called print. There are many methods available. Soon, we will use methods that make graphs.\n```\nprint(....)\n     ↑    ↑\n```\nmethods have round brackets. What you write here between these is given to the method. \n```\nprint(my_first_number)\n      ↑↑↑\n```\nIn this case, we gave it 'my_first_number', and it took it and printed it to the screen.               \n      \n\nStep 5\n-------\n\nOk, let's make a graph from some data.\n\n#### In the cell below replace the `<addNumber>`'s with any number you choose\n\nThen __run the code__ to make a graph."
101 |     },
102 |     {
103 |       "metadata": {
104 |         "trusted": true
105 |       },
106 |       "cell_type": "code",
107 |       "source": "# These are our x values\nx_values = [1, 2, 3]\n\n###\n# BELOW INSIDE THE SQUARE BRACKETS, REPLACE THE <addNumber>'S WITH EACH WITH A NUMBER\n###\ny_values = [<addNumber>, <addNumber>, <addNumber>]\n###\n\n# When you've done that, run the cell\n# For example, you could change like this: y_values = [3, 1, 7]\n\n# This makes a bar graph. We give it our x and y values\ngraph.bar(x_values, y_values)",
108 |       "execution_count": null,
109 |       "outputs": []
110 |     },
111 |     {
112 |       "metadata": {},
113 |       "cell_type": "markdown",
114 |       "source": "This is very simple, but here x and y are our data.\n\nIf you'd like, have a play with the code:\n* change x and y values and see how the graph changes. Make sure they have the same count of numbers in them.\n* change `graph.bar` to `graph.scatter` to change the type of graph\n\n\nStep 6\n----------------\n\nFrom time to time, we will load data from text files, rather than write it into the code. You can't see these text files in your browser because they are saved on the server running this website. We can load them using code, though. Let's load one up, look at it, then graph it.\n\n#### In the cell below write `print(data.head())` then __run the code__."
115 |     },
116 |     {
117 |       "metadata": {
118 |         "trusted": true
119 |       },
120 |       "cell_type": "code",
121 |       "source": "import pandas as pd\n\n# The next line loads information about chocolate bars and saves it in a variable called 'data'\ndataset = pd.read_csv('Data/chocolate data.txt', index_col = False, sep = '\\t')\n\n### \n# WRITE print(dataset.head()) BELOW TO PREVIEW THE DATA ---###\n###\n\n###",
122 |       "execution_count": null,
123 |       "outputs": []
124 |     },
125 |     {
126 |       "metadata": {},
127 |       "cell_type": "markdown",
128 |       "source": "Each row (horizontal) shows information about one chocolate bar. For example, the first chocolate bar was:\n* 185 grams\n* 65% cocoa\n* 11% sugar\n* 24% milk\n* and a customer said they were 47% happy with it\n\nWe would probably say that our chocolate bar features were weight, cocoa %, sugar % and milk %\n\nConclusion\n----------------\n\n__Well done__ that's the end of programming exercise one.\n\nYou can now go back to the course and click __'Next Step'__ to move onto some key concepts of AI - models and error.\n\n\nOptional Step 7\n----------------\nWhen we say \"optional\" we mean exercises that might help you learn, but you don't have to do. \n\nWe can graph some of these features in scatter plot. Let's put cocoa_percent on the x-axis and customer happiness on the y axis.\n\n#### In the cell below replace `<addYValues>` with `customer_happiness` and then __run the code__."
129 |     },
130 |     {
131 |       "metadata": {
132 |         "trusted": true
133 |       },
134 |       "cell_type": "code",
135 |       "source": "x_values = dataset.cocoa_percent\n\n###\n# REPLACE <addYValues> BELOW WITH customer_happiness\n###\ny_values = dataset.<addYValues>\n###\n\ngraph.scatter(x_values, y_values)",
136 |       "execution_count": null,
137 |       "outputs": []
138 |     },
139 |     {
140 |       "metadata": {},
141 |       "cell_type": "markdown",
142 |       "source": "In this graph, every chocolate bar is one point. Later, we will analyse this data with AI."
143 |     }
144 |   ],
145 |   "metadata": {
146 |     "kernelspec": {
147 |       "name": "python36",
148 |       "display_name": "Python 3.6",
149 |       "language": "python"
150 |     },
151 |     "language_info": {
152 |       "mimetype": "text/x-python",
153 |       "nbconvert_exporter": "python",
154 |       "name": "python",
155 |       "pygments_lexer": "ipython3",
156 |       "version": "3.6.6",
157 |       "file_extension": ".py",
158 |       "codemirror_mode": {
159 |         "version": 3,
160 |         "name": "ipython"
161 |       }
162 |     }
163 |   },
164 |   "nbformat": 4,
165 |   "nbformat_minor": 2
166 | }


--------------------------------------------------------------------------------
/02. Linear Regression - Python.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "metadata": {
 5 |         "collapsed": true
 6 |       },
 7 |       "cell_type": "markdown",
 8 |       "source": "Exercise 2 - Simple Linear Regression\n=====================\n\nWe want to know how to make our chocolate-bar customers happier. To do this, we need to know which chocolate bar _features_ predict customer happiness. For example, customers may be happier when chocolate bars are bigger, or when they contain more cocoa. \n\nWe have data on customer happiness when eating chocolate bars with different features. Lets look at the relationship between happiness and bar size.\n\nStep 1\n--\n\nFirst, lets have a look at our data.\n\n#### In the cell below replace the text `<printDataHere>` with `print(dataset.head())` and then press __Run__ in the toolbar above (or press __Shift+Enter__)."
 9 |     },
10 |     {
11 |       "metadata": {
12 |         "scrolled": true,
13 |         "trusted": true
14 |       },
15 |       "cell_type": "code",
16 |       "source": "import warnings\nwarnings.filterwarnings(\"ignore\")\nimport pandas as pd\nimport matplotlib.pyplot as graph\nimport statsmodels.formula.api as smf\nfrom scipy import stats\n\ndataset = pd.read_csv('Data/chocolate data.txt', index_col=False, sep=\"\\t\",header=0)\n    \n###\n# REPLACE <PrintDataHere> WITH print(dataset.head())\n###\n<PrintDataHere>\n###",
17 |       "execution_count": null,
18 |       "outputs": []
19 |     },
20 |     {
21 |       "metadata": {},
22 |       "cell_type": "markdown",
23 |       "source": "The data represents 100 different variations of chocolate bars and the measured customer happiness for each one. \n\nStep 2\n--\n\nWe want to know which chocolate bar features make customers happy.\n\nThe example below shows a linear regression between __cocoa percentage__ and __happiness__. You can read through the comments to understand what is happening. \n\n#### __Run the code__ to to see the output visualized."
24 |     },
25 |     {
26 |       "metadata": {
27 |         "trusted": true
28 |       },
29 |       "cell_type": "code",
30 |       "source": "# Run this cell!\n\n# DO NOT EDIT ANY OF THIS CODE\n\n# Define a function to perform a linear regression\ndef PerformLinearRegression(formula):\n\n    # This performs linear regression\n    lm = smf.ols(formula = formula, data = dataset).fit()\n\n    featureName=formula.split(\" \")[-1]\n    \n    # get the data for the x parameter (our feature)\n    train_X=dataset[featureName]\n    \n    # This makes and shows a graph\n    intercept=lm.params[0]\n    slope=lm.params[1]\n    line = slope * train_X + intercept\n    graph.plot(train_X, line, '-', c = 'red')\n    graph.scatter(train_X, dataset.customer_happiness)\n    graph.ylabel('customer_happiness')\n    graph.xlabel(featureName)\n    graph.show()\n\n# This performs the linear regression steps listed above\n# The text in red is the formula for our regression\nPerformLinearRegression('customer_happiness ~ cocoa_percent')",
31 |       "execution_count": null,
32 |       "outputs": []
33 |     },
34 |     {
35 |       "metadata": {},
36 |       "cell_type": "markdown",
37 |       "source": "In the scatter plot above, each point represents an observation for a single chocolate bar.\n\nIt seems that __more cocoa makes customers more happy__. We can tell, because as we increase the amount of cocoa (x axis) the amount of customer happiness (y axis) increases. \n\nStep 3\n------\n\nLet's look at some other features.\n\n#### Below, replace the text `<addFeatureHere>` with __`weight`__ to see if heavier chocolate bars make people happier.\n\nAlso try the variables `sugar_percent` and  `milk_percent` to see if these improve customers' experiences."
38 |     },
39 |     {
40 |       "metadata": {
41 |         "trusted": true
42 |       },
43 |       "cell_type": "code",
44 |       "source": "###\n# CHANGE <addFeatureHere> TO weight IN THE LINE BELOW\n###\nPerformLinearRegression('customer_happiness ~ <addFeatureHere>')\n###",
45 |       "execution_count": null,
46 |       "outputs": []
47 |     },
48 |     {
49 |       "metadata": {},
50 |       "cell_type": "markdown",
51 |       "source": "It looks like heavier chocolate bars make customers happier. The amount of milk or sugar, however, don't seem to make customers happier. \n\nConclusion\n---\nYou have run a simple linear regression. This told us that if we want to make a chocolate bar that will make customers happy, it should be large and contain a lot of cocoa.\n\nWell done! You can now go back to the course and click __'Next Step'__ to move onto using linear regression with multiple features."
52 |     }
53 |   ],
54 |   "metadata": {
55 |     "kernelspec": {
56 |       "name": "python36",
57 |       "display_name": "Python 3.6",
58 |       "language": "python"
59 |     },
60 |     "language_info": {
61 |       "mimetype": "text/x-python",
62 |       "nbconvert_exporter": "python",
63 |       "name": "python",
64 |       "pygments_lexer": "ipython3",
65 |       "version": "3.6.6",
66 |       "file_extension": ".py",
67 |       "codemirror_mode": {
68 |         "version": 3,
69 |         "name": "ipython"
70 |       }
71 |     }
72 |   },
73 |   "nbformat": 4,
74 |   "nbformat_minor": 2
75 | }


--------------------------------------------------------------------------------
/03. Multiple Linear Regression - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {
  5 |         "collapsed": true
  6 |       },
  7 |       "cell_type": "markdown",
  8 |       "source": "Exercise 3 - Multiple Linear Regression\n===================\n\nFrom the previous exercise, we know that customers are happier with chocolate bars that are large and have high amounts of cocoa. Customers may feel differently when they have to pay for these bars though.\n\nIn this exercise, we will try to find the chocolate bar that best suits customers, taking into account the cocoa content, size, and price.\n\nStep 1\n------\n\nFirstly, lets have a look at our data.\n\nThe data is from survey of how happy customers were with chocolate bars they purchased.\n\n#### Replace `<printDataHere>` with `print(dataset.head())` below, and __run the code__."
  9 |     },
 10 |     {
 11 |       "metadata": {
 12 |         "trusted": true
 13 |       },
 14 |       "cell_type": "code",
 15 |       "source": "# This sets up the graphing configuration\nimport warnings\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as graph\n%matplotlib inline\ngraph.rcParams['figure.figsize'] = (15,5)\ngraph.rcParams[\"font.family\"] = 'DejaVu Sans'\ngraph.rcParams[\"font.size\"] = '12'\nimport pandas as pd\nimport statsmodels.formula.api as smf\n\n# Imports our new data set!\ndataset = pd.read_csv('Data/chocolate data multiple linear regression.txt', index_col=False, sep=\"\\t\",header=0)\n \n###    \n# REPLACE <printDataHere> with print(dataset.head())\n###\n<printDataHere>\n###",
 16 |       "execution_count": null,
 17 |       "outputs": []
 18 |     },
 19 |     {
 20 |       "metadata": {},
 21 |       "cell_type": "markdown",
 22 |       "source": "Step 2\n------\n\nPreviously we found that customers like a high percentage of cocoa and heavier bars of chocolate. Large bars of chocolate cost more money, though, which might make customers less inclined to purchase them.\n\nLet's perform a simple linear regression to see the relationship between __customer happiness__ and chocolate bar __weight__ when the cost of the chocolate was taken into consideration for the survey.\n\n#### In the cell below find the text `<addFeatureHere>` and replace it with `weight` and __run the code__."
 23 |     },
 24 |     {
 25 |       "metadata": {
 26 |         "trusted": true
 27 |       },
 28 |       "cell_type": "code",
 29 |       "source": "###\n# REPLACE <addFeatureHere> BELOW WITH weight\n###\nformula = 'customer_happiness ~ <addFeatureHere>'\n###\n\n# This performs linear regression\nlm = smf.ols(formula = formula, data = dataset).fit()\n\nfeatureName = formula.split(\" \")[-1]\n\n# Get the data for the x parameter (the feature)\nx = dataset[featureName]\n\n# This makes and shows a graph\nintercept = lm.params[0]\nslope = lm.params[1]\nline = slope * x + intercept\ngraph.plot(x, line, '-', c = 'red')\ngraph.scatter(x, dataset.customer_happiness)\ngraph.ylabel('Customer Happiness')\ngraph.xlabel(featureName)\ngraph.show()",
 30 |       "execution_count": null,
 31 |       "outputs": []
 32 |     },
 33 |     {
 34 |       "metadata": {},
 35 |       "cell_type": "markdown",
 36 |       "source": "Customer happiness still increases with larger bars of chocolate. However, many data points (blue) are a long way from our trendline (red). This means that this line doesn't describe the data very well. It is likely that there are other features of the chocolate that are influencing customer happiness.\n\nRepeat the above exercise, looking at `cocoa_percent` in place of `weight` and run the code again. You should see a similar trend."
 37 |     },
 38 |     {
 39 |       "metadata": {},
 40 |       "cell_type": "markdown",
 41 |       "source": "Step 3\n------\n\nWe can check how well our data fit by getting the R² values. These range between 0 - 1, where 1 is a perfect fit. What is a 'good' or 'bad' fit depends on several things, but for our purposes here numbers below ~0.3 will mean a poor fit.\n\nOur linear model is saved under the name `lm`.\n\nThe linear model for simple linear regression we just ran, \"weight vs. customer happiness\", is saved under `lm`. Let's determine the R² value of this model. \n\n#### Print out the R² value of this model by replacing the text `<addFunctionCallHere>` with `rsquared` and then __run the code__."
 42 |     },
 43 |     {
 44 |       "metadata": {
 45 |         "trusted": true
 46 |       },
 47 |       "cell_type": "code",
 48 |       "source": "###\n# REPLACE <addFunctionCallHere> BELOW WITH rsquared TO PRINT THE R² VALUE\n###\nprint(lm.<addFunctionCallHere>)\n###",
 49 |       "execution_count": null,
 50 |       "outputs": []
 51 |     },
 52 |     {
 53 |       "metadata": {},
 54 |       "cell_type": "markdown",
 55 |       "source": "We have a value below 0.3, which means it is a poor fit.\n\nStep 4\n------\n\nThe problem with our chocolate bar survey is that the chocolate bar variables aren't controlled; cost, bar weight, and cocoa percent are different for every chocolate bar.\n\nWe want to see the relationship between cocoa content and customer happiness, but cost and block weight are also influencing customer happiness.\n\nWe *could* run another survey, giving away chocolate bars that are all the same weight for free (i.e. weight and cost are constant), and ask people how happy they are with the chocolate bar given varying percentages of cocoa. However, this would be expensive and time consuming.\n\n__Alternatively, we can use multiple linear regression__. Multiple linear regression can give us the relationship between each _feature_ and customer happiness. These are provided as _coefficients_ (slopes). Positive numbers indicate a positive relationship (i.e. customer happiness increases as this feature increases), negative numbers indicate a negative relationship (customer happiness decreases as this feature increases). Unlike _simple_ linear regression, these relationships should be independent. That means that our relationship between cocoa content and customer happiness should not be influenced strongly by bar weight or cost. \n\n### Below, replace: \n#### 1. `<addCocoaPercentHere>` with `cocoa_percent` \n#### 2. `<addCostPercentHere>` with `cost`\n### then __run the code__."
 56 |     },
 57 |     {
 58 |       "metadata": {
 59 |         "trusted": true
 60 |       },
 61 |       "cell_type": "code",
 62 |       "source": "###\n# IN THE LINE BELOW REPLACE <addCocoaPercentHere> WITH cocoa_percent AND <addCostPercentHere> WITH cost\n###\nformula = 'customer_happiness ~ weight + <addCocoaPercentHere> + <addCostPercentHere>'\n###\n\n# This creates a new model with all three features\nlm = smf.ols(formula = formula, data = dataset).fit()\n\n# Print the coefficients (slopes) of our new model\nprint(lm.params)\n\nprint(\"R²: \" + str(lm.rsquared))",
 63 |       "execution_count": null,
 64 |       "outputs": []
 65 |     },
 66 |     {
 67 |       "metadata": {},
 68 |       "cell_type": "markdown",
 69 |       "source": "If we inspect the table, we can see that `weight` and `cocoa_percent` are positive numbers, telling us they both independently increase customer happiness, but also that cost decreases it. \n\nThe R² value is also much higher than before. This means model fits much better now.\n\nStep 5\n------\n\nFrom our linear regression, we have an equation that predicts customer happiness. It looks like so:\n\n`customer_happiness = -9.34 + weight * 0.106 + cocoa_percent * 31.9 + cost * -1.31`\n\nWe might also know that, for our company, the cost of manufacturing and shipping each bar can be calculated as:\n\n`cost = (0.05 * weight + weight * cocoa_percent)^2 * 0.0004`\n\nFrom this, we can calculate the best bar for our customers, by balancing the cost against how happy the customer is likely to be with this product.\n\nLets plots this in 3D to see what our optimum chocolate bar should be.\n\nBelow, complete the calculation for customer happiness.\n\n#### Replace `<addThirdCoefHere>` and `<addFourthCoefHere>` as described in the comments and then __run the code__."
 70 |     },
 71 |     {
 72 |       "metadata": {
 73 |         "trusted": true
 74 |       },
 75 |       "cell_type": "code",
 76 |       "source": "import math\nimport numpy as np\nfrom mpl_toolkits.mplot3d import Axes3D\ndef CalculateCustomerHappiness(weight, cocoa_percent):\n    # This calculates the customer happiness for a given bar of chocolate\n    cocoa_fraction = cocoa_percent / 100\n    cost = (weight * 0.05 + weight * cocoa_fraction)**2 * 0.0004\n    \n    # First coefficient\n    coeff_intercept = lm.params[0]\n    \n    # Second coefficient\n    coeff_weight = lm.params[1]\n    \n    # Third coefficient\n    coeff_cocoa = lm.params[2]\n    \n    # Fourth coefficient\n    coeff_cost = lm.params[3]\n    \n    ### \n    # REPLACE THE <addThirdCoefHere> AND <addFourthCoefHere> BELOW WITH THE THIRD AND FOURTH COEFFICIENTS\n    # YOU'LL FIND THEM JUST ABOVE THESE COMMENTS!\n    # THE FIRST TWO HAVE ALREADY BEEN COMPLETED FOR YOU\n    ###\n    customer_happiness = (coeff_intercept) + (weight * coeff_weight) + (cocoa_fraction * <addThirdCoefHere>) + (cost * <addFourthCoefHere>)\n    ###\n    \n    return customer_happiness\n       \n    \n# Don't edit this part! This creates our graph.\ndef Graph3d():\n    # This creates a 3D graph of likely customer happiness with different types of chocolate bar\n    fig = graph.figure()\n    ax = fig.add_subplot(111, projection='3d')\n\n    cocoaPercentages=range(0,100,5)\n    for weight in range(50,250,5):\n        happiness=[]\n        for cocoa in cocoaPercentages:\n            happiness.append(CalculateCustomerHappiness(weight,cocoa))\n\n        ax.scatter(np.full(len(happiness),weight), cocoaPercentages, happiness,c=happiness)\n\n    ax.set_xlabel('Chocolate Bar Weight')\n    ax.set_ylabel('Cocoa %')\n    ax.set_zlabel('Customer happiness')\n\n    graph.show()\n    \nGraph3d()",
 77 |       "execution_count": null,
 78 |       "outputs": []
 79 |     },
 80 |     {
 81 |       "metadata": {},
 82 |       "cell_type": "markdown",
 83 |       "source": "In the graph above, higher values in the graph (yellow) show higher customer happiness. We can see that our optimum bar should be around 100g and contain a high amount of cocoa. For large bars of chocolate, a cocoa content of around 50% appears to be ideal.\n\nNote how this is different to our earlier work with _simple_ linear regression. With that, we assumed a large bar with very high amount of cocoa was what customers would want."
 84 |     },
 85 |     {
 86 |       "metadata": {
 87 |         "trusted": false
 88 |       },
 89 |       "cell_type": "markdown",
 90 |       "source": "Conclusion\n==========\n\nThat's it! You can go back to the course now and click on __'Next Step'__ to carry on with our introduction to regression."
 91 |     },
 92 |     {
 93 |       "metadata": {
 94 |         "trusted": true
 95 |       },
 96 |       "cell_type": "code",
 97 |       "source": "",
 98 |       "execution_count": null,
 99 |       "outputs": []
100 |     }
101 |   ],
102 |   "metadata": {
103 |     "kernelspec": {
104 |       "name": "python36",
105 |       "display_name": "Python 3.6",
106 |       "language": "python"
107 |     },
108 |     "language_info": {
109 |       "mimetype": "text/x-python",
110 |       "nbconvert_exporter": "python",
111 |       "name": "python",
112 |       "pygments_lexer": "ipython3",
113 |       "version": "3.6.6",
114 |       "file_extension": ".py",
115 |       "codemirror_mode": {
116 |         "version": 3,
117 |         "name": "ipython"
118 |       }
119 |     }
120 |   },
121 |   "nbformat": 4,
122 |   "nbformat_minor": 2
123 | }


--------------------------------------------------------------------------------
/04. Polynomial Regression - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {},
  5 |       "cell_type": "markdown",
  6 |       "source": "Exercise 4 - Polynomial Regression\n========\n\nSometimes our data doesn't have a linear relationship, but we still want to predict an outcome.\n\nSuppose we want to predict how satisfied people might be with a piece of fruit, we would expect satisfaction would be low if the fruit was under ripened or over ripened. Satisfaction would be high in between underripened and overripened.\n\nThis is not something linear regression will help us with, so we can turn to polynomial regression to help us make predictions for these more complex non-linear relationships!"
  7 |     },
  8 |     {
  9 |       "metadata": {},
 10 |       "cell_type": "markdown",
 11 |       "source": "Step 1\n------\n\nIn this exercise we will look at a dataset analysing internet traffic over the course of the day. Observations were made every hour over the course of several days. Suppose we want to predict the level of traffic we might see at any time during the day, how might we do this?\n\nLet's start by opening up our data and having a look at it.\n\n#### In the cell below replace the text `<printDataHere>` with `print(dataset.head())`, and __run the code__ to see the data."
 12 |     },
 13 |     {
 14 |       "metadata": {
 15 |         "trusted": true
 16 |       },
 17 |       "cell_type": "code",
 18 |       "source": "# This sets up the graphing configuration\nimport warnings\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as graph\n%matplotlib inline\ngraph.rcParams['figure.figsize'] = (15,5)\ngraph.rcParams[\"font.family\"] = \"DejaVu Sans\"\ngraph.rcParams[\"font.size\"] = \"12\"\ngraph.rcParams['image.cmap'] = 'rainbow'\ngraph.rcParams['axes.facecolor'] = 'white'\ngraph.rcParams['figure.facecolor'] = 'white'\nimport numpy as np\nimport pandas as pd\n\ndataset = pd.read_csv('Data/traffic_by_hour.csv')\n\n###\n# BELOW, REPLACE <printDataHere> WITH print(dataset.head()) TO PREVIEW THE DATASET ---###\n###\n<printDataHere>\n###",
 19 |       "execution_count": null,
 20 |       "outputs": []
 21 |     },
 22 |     {
 23 |       "metadata": {},
 24 |       "cell_type": "markdown",
 25 |       "source": "Step 2\n-----\n\nNext we're going to flip the data with the transpose method - our rows will become columns and our columns will become rows. Transpose is commonly used to reshape data so we can use it. Let's try it out.\n\n#### In the cell below find the text `<addCallToTranspose>` and replace it with `transpose`"
 26 |     },
 27 |     {
 28 |       "metadata": {
 29 |         "trusted": true
 30 |       },
 31 |       "cell_type": "code",
 32 |       "source": "### \n# REPLACE THE <addCallToTranspose> BELOW WITH transpose\n###\ndataset_T = np.<addCallToTranspose>(dataset)\n###\n\nprint(dataset_T)",
 33 |       "execution_count": null,
 34 |       "outputs": []
 35 |     },
 36 |     {
 37 |       "metadata": {},
 38 |       "cell_type": "markdown",
 39 |       "source": "Now lets visualize the data. \n\n#### Replace the text `<addSampleHere>` with `sample` and then __run the code__."
 40 |     },
 41 |     {
 42 |       "metadata": {
 43 |         "scrolled": true,
 44 |         "trusted": true
 45 |       },
 46 |       "cell_type": "code",
 47 |       "source": "# Let's visualise the data!\n\n###\n# REPLACE <addSampleHere> BELOW WITH sample\n###\nfor <addSampleHere> in range(0, dataset_T.shape[1]):\n    graph.plot(dataset.columns.values, dataset_T[sample])\n###\n\ngraph.xlabel('Time of day')\ngraph.ylabel('Internet traffic (Gbps)')\ngraph.show()",
 48 |       "execution_count": null,
 49 |       "outputs": []
 50 |     },
 51 |     {
 52 |       "metadata": {},
 53 |       "cell_type": "markdown",
 54 |       "source": "Step 3\n-----\n\nThis all looks a bit busy, let's see if we can draw out a clearer pattern by taking the __average values__ for each hour.\n\n#### In the cell below find all occurances of `<replaceWithHour>` and replace them with `hour` and then __run the code__."
 55 |     },
 56 |     {
 57 |       "metadata": {
 58 |         "trusted": true
 59 |       },
 60 |       "cell_type": "code",
 61 |       "source": "# We want to look at the mean values for each hour.\n\nhours = dataset.columns.values\n\n###\n# REPLACE THE <replaceWithHour>'s BELOW WITH hour\n###\ntrain_Y = [dataset[<replaceWithHour>].mean() for <replaceWithHour> in hours]  # This will be our outcome we measure (label) - amount of internet traffic\ntrain_X = np.transpose([int(<replaceWithHour>) for <replaceWithHour> in hours]) # This is our feature - time of day\n###\n\n# This makes our graph, don't edit!\ngraph.scatter(train_X, train_Y)\nfor sample in range(0,dataset_T.shape[1]):\n    graph.plot(hours, dataset_T[sample], alpha=0.25)\ngraph.xlabel('Time of day')\ngraph.ylabel('Internet traffic (Gbps)')\ngraph.show()",
 62 |       "execution_count": null,
 63 |       "outputs": []
 64 |     },
 65 |     {
 66 |       "metadata": {},
 67 |       "cell_type": "markdown",
 68 |       "source": "This alone could help us make a prediction if we wanted to know the expected traffic exactly on the hour.\n\nBut, we'll need to be a bit more clever if we want to make a __good__ prediction of times in between."
 69 |     },
 70 |     {
 71 |       "metadata": {},
 72 |       "cell_type": "markdown",
 73 |       "source": "Step 4\n------\n\nLet's use the midpoints in between the hours to analyse the relationship between the __time of day__ and the __amount of internet traffic__.\n\nNumpy's `polyfit(x,y,d)` function allows us to do polynomial regression, or more precisely least squares polynomial fit.\n\nWe specify a __feature $x$ (time of day)__, our __label $y$ (the amount of traffic)__, and the __degree $d$ of the polynomial (how curvy the line is)__.\n\n#### In the cell below find the text `<replaceWithDegree>`, replace it with the value `1` then __run the code__."
 74 |     },
 75 |     {
 76 |       "metadata": {
 77 |         "trusted": true
 78 |       },
 79 |       "cell_type": "code",
 80 |       "source": "# Polynomials of degree 1 are linear!\n# Lets include this one just for comparison\n\n###\n# REPLACE THE <replaceWithDegree> BELOW WITH 1\n###\npoly_1 = np.polyfit(train_X, train_Y, <replaceWithDegree>)\n###",
 81 |       "execution_count": null,
 82 |       "outputs": []
 83 |     },
 84 |     {
 85 |       "metadata": {},
 86 |       "cell_type": "markdown",
 87 |       "source": "Let's also compare a few higher-degree polynomials.\n\n#### Replace the `<replaceWithDegree>`'s below with numbers, as directed in the comments."
 88 |     },
 89 |     {
 90 |       "metadata": {
 91 |         "scrolled": true,
 92 |         "trusted": true
 93 |       },
 94 |       "cell_type": "code",
 95 |       "source": "###\n# REPLACE THE <replaceWithDegree>'s BELOW WITH 2, 3, AND THEN 4\n###\npoly_2 = np.polyfit(train_X, train_Y, <replaceWithDegree>)\npoly_3 = np.polyfit(train_X, train_Y, <replaceWithDegree>)\npoly_4 = np.polyfit(train_X, train_Y, <replaceWithDegree>)\n###\n\n# Let's plot it!\ngraph.scatter(train_X, train_Y)\nxp = np.linspace(0, 24, 100)\n\n# black dashed linear degree 1\ngraph.plot(xp, np.polyval(poly_1, xp), 'k--')\n# red degree 2\ngraph.plot(xp, np.polyval(poly_2, xp), 'r-')\n# blue degree 3\ngraph.plot(xp, np.polyval(poly_3, xp), 'b-') \n# yellow degree 4\ngraph.plot(xp, np.polyval(poly_4, xp), 'y-') \n\ngraph.xticks(train_X, dataset.columns.values)\ngraph.xlabel('Time of day')\ngraph.ylabel('Internet traffic (Gbps)')\ngraph.show()",
 96 |       "execution_count": null,
 97 |       "outputs": []
 98 |     },
 99 |     {
100 |       "metadata": {},
101 |       "cell_type": "markdown",
102 |       "source": "None of these polynomials do a great job of generalising the data. Let's try a few more.\n\n#### Follow the instructions in the comments to replace the `<replaceWithDegree>`'s and then __run the code__."
103 |     },
104 |     {
105 |       "metadata": {
106 |         "trusted": true
107 |       },
108 |       "cell_type": "code",
109 |       "source": "###\n# REPLACE THE <replaceWithDegree>'s 5, 6, AND 7\n###\npoly_5 = np.polyfit(train_X, train_Y, <replaceWithDegree>)\npoly_6 = np.polyfit(train_X, train_Y, <replaceWithDegree>)\npoly_7 = np.polyfit(train_X, train_Y, <replaceWithDegree>)\n###\n\n# Let's plot it!\ngraph.scatter(train_X, train_Y)\nxp = np.linspace(0, 24, 100)\n\n# black dashed linear degree 1\ngraph.plot(xp, np.polyval(poly_1, xp), 'k--')\n# red degree 5\ngraph.plot(xp, np.polyval(poly_5, xp), 'r-') \n# blue degree 6\ngraph.plot(xp, np.polyval(poly_6, xp), 'b-') \n# yellow degree 7\ngraph.plot(xp, np.polyval(poly_7, xp), 'y-') \n\ngraph.xticks(train_X, dataset.columns.values)\ngraph.xlabel('Time of day')\ngraph.ylabel('Internet traffic (Gbps)')\ngraph.show()",
110 |       "execution_count": null,
111 |       "outputs": []
112 |     },
113 |     {
114 |       "metadata": {},
115 |       "cell_type": "markdown",
116 |       "source": "It looks like the 5th and 6th degree polynomials have an identical curve. This looks like a good curve to use.\n\nWe could perhaps use an even higher degree polynomial to fit it even more tightly, but we don't want to overfit the curve, since we want just a generalisation of the relationship.\n\nLet's see how our degree 6 polynomial compares to the real data.\n\n#### Replace the text `<replaceWithPoly6>` with `poly_6` and __run the code__."
117 |     },
118 |     {
119 |       "metadata": {
120 |         "trusted": true
121 |       },
122 |       "cell_type": "code",
123 |       "source": "for row in range(0,dataset_T.shape[1]):\n    graph.plot(dataset.columns.values, dataset_T[row], alpha = 0.5)\n\n###\n# REPLACE <replaceWithPoly6> BELOW WITH poly_6 - THE POLYNOMIAL WE WISH TO VISUALIZE\n###    \ngraph.plot(xp, np.polyval(<replaceWithPoly6>, xp), 'k-')\n###\n\ngraph.xlabel('Time of day')\ngraph.ylabel('Internet traffic (Gbps)')\ngraph.show()",
124 |       "execution_count": null,
125 |       "outputs": []
126 |     },
127 |     {
128 |       "metadata": {},
129 |       "cell_type": "markdown",
130 |       "source": "Step 5\n------\n\nNow let's try using this model to make a prediction for a time between 00 and 24.\n\n#### In the cell below follow the instructions in the code to replace `<replaceWithTime>` and `<replaceWithPoly6>` then __run the code__."
131 |     },
132 |     {
133 |       "metadata": {
134 |         "trusted": true
135 |       },
136 |       "cell_type": "code",
137 |       "source": "###\n# REPLACE <replaceWithTime> BELOW WITH 12.5 (this represents the time 12:30)\n###\ntime = <replaceWithTime>\n###\n\n###\n# REPLACE <replaceWithPoly6> BELOW WITH poly_6 SO WE CAN VISUALIZE THE 6TH DEGREE POLYNOMIAL MODEL\n###\npred = np.polyval(<replaceWithPoly6>, time)\n###\n\nprint(\"at t=%s, predicted internet traffic is %s Gbps\"%(time,pred))\n\n# Now let's visualise it\ngraph.plot(xp, np.polyval(poly_6, xp), 'y-')\n\ngraph.plot(time, pred, 'ko') # result point\ngraph.plot(np.linspace(0, time, 2), np.full([2], pred), dashes=[6, 3], color='black') # dashed lines (to y-axis)\ngraph.plot(np.full([2], time), np.linspace(0, pred, 2), dashes=[6, 3], color='black') # dashed lines (to x-axis)\n\ngraph.xticks(train_X, dataset.columns.values)\ngraph.ylim(0, 60)\ngraph.title('expected traffic throughout the day')\ngraph.xlabel('time of day')\ngraph.ylabel('internet traffic (Gbps)')\n\ngraph.show()",
138 |       "execution_count": null,
139 |       "outputs": []
140 |     },
141 |     {
142 |       "metadata": {},
143 |       "cell_type": "markdown",
144 |       "source": "Conclusion\n-----\n\nAnd there we have it! You have made a polynomial regression model and used it for analysis! This models gives us a prediction for the level of internet traffic we should expect to see at any given time of day.\n\nYou can go back to the course and either click __'Next Step'__ to start an optional step with tips on how to better work with AI models, or you can go to the next module where instead of predicting numbers we predict categories."
145 |     }
146 |   ],
147 |   "metadata": {
148 |     "kernelspec": {
149 |       "name": "python36",
150 |       "display_name": "Python 3.6",
151 |       "language": "python"
152 |     },
153 |     "language_info": {
154 |       "mimetype": "text/x-python",
155 |       "nbconvert_exporter": "python",
156 |       "name": "python",
157 |       "pygments_lexer": "ipython3",
158 |       "version": "3.6.6",
159 |       "file_extension": ".py",
160 |       "codemirror_mode": {
161 |         "version": 3,
162 |         "name": "ipython"
163 |       }
164 |     }
165 |   },
166 |   "nbformat": 4,
167 |   "nbformat_minor": 2
168 | }


--------------------------------------------------------------------------------
/05. Logistic Regression - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {
  5 |         "collapsed": true
  6 |       },
  7 |       "cell_type": "markdown",
  8 |       "source": "Exercise 5 - Logistic Regression\n=====\n\nLogistic regression predicts binary (yes/no) events. For example, we may want to predict if someone will arrive at work on time, or if a person shopping will buy a product. \n\nThis exercise will demonstrate simple logistic regression: predicting an outcome from only one feature.\n\nStep 1\n-----\n\nWe want to place a bet on the outcome of the next football (soccer) match. It is the final of a competition, so there will not be a draw. We have historical data about our favourite team playing in matches such as this. Complete the exercise below to preview our data.\n\n### In the cell below replace:\n#### 1. `<addFilePath>` with `'Data/football data.txt' ` (including the quotation marks)\n#### 2. `<printDataHere>` with `print(dataset.head())`\n\n#### and then __run the code__."
  9 |     },
 10 |     {
 11 |       "metadata": {
 12 |         "trusted": true
 13 |       },
 14 |       "cell_type": "code",
 15 |       "source": "# This part sets up the graphing configuration\nimport warnings\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as graph\n%matplotlib inline\ngraph.rcParams['figure.figsize'] = (15,5)\ngraph.rcParams[\"font.family\"] = 'DejaVu Sans'\ngraph.rcParams[\"font.size\"] = '12'\ngraph.rcParams['image.cmap'] = 'rainbow'\nimport pandas as pd\n\n\n###\n# REPLACE <addFilePath> BELOW WITH 'Data/football data.txt' (INCLUDING THE QUOTES) TO LOAD THE DATA FROM THAT FILE\n###\ndataset = pd.read_csv(<addFilePath>, index_col = False, sep = '\\t', header = 0)\n###\n\n###\n# REPLACE <printDataHere> BELOW WITH print(dataset.head()) TO PREVIEW OUR DATASET\n###\n<printDataHere>\n###",
 16 |       "execution_count": null,
 17 |       "outputs": []
 18 |     },
 19 |     {
 20 |       "metadata": {},
 21 |       "cell_type": "markdown",
 22 |       "source": "This data shows the average goals per match of our team for that season in the left column. In the right column it lists a 1 if our team won the competition or a 0 if they did not.\n\nStep 2\n----\n\nLet's graph the data so we have a better idea of what's going on here. Complete the exercise below to make an x-y scatter plot.\n\n### In the cell below replace:\n#### 1. `<addWonCompetition>` with `'won_competition'`\n#### 2. `<addAverageGoals>` with `'average_goals_per_match'`\n#### then __run the code__."
 23 |     },
 24 |     {
 25 |       "metadata": {
 26 |         "trusted": true
 27 |       },
 28 |       "cell_type": "code",
 29 |       "source": "###\n# REPLACE <addWonCompetition> BELOW WITH 'won_competition' (INCLUDING THE QUOTES)\n###\ntrain_Y = dataset[<addWonCompetition>]\n###\n\n###\n# REPLACE <addAverageGoals> BELOW WITH 'average_goals_per_match' (INCLUDING THE QUOTES)\n###\ntrain_X = dataset[<addAverageGoals>]\n###\n\n# The 'won_competition' will be displayed on the vertical axis (y axis)\n# The 'average_goals_per_match' will be displayed on the horizontal axis (x axis)\n\ngraph.scatter(train_X, train_Y, c = train_Y, marker = 'D')\n\ngraph.yticks([0, 1], ['No', 'Yes'])\ngraph.ylabel(\"Competition Win\")\ngraph.ylim([-0.5, 1.5])\ngraph.xlabel(\"Average number of goals scored per match\")\n\ngraph.show()",
 30 |       "execution_count": null,
 31 |       "outputs": []
 32 |     },
 33 |     {
 34 |       "metadata": {},
 35 |       "cell_type": "markdown",
 36 |       "source": "We can see from this graph that generally, when our team has a good score average, they tend to win the competition.\n\nStep 3\n----\n\nHow can we predict whether the team will win this season? Let's apply AI to this problem, by making a logisitic regression model using this data and then graph it. This will tell us whether we will likely win this season.\n\n#### Below replace `<buildLinearRegression>` with `linear_model.LogisticRegression()` and then __run the code__."
 37 |     },
 38 |     {
 39 |       "metadata": {
 40 |         "trusted": true
 41 |       },
 42 |       "cell_type": "code",
 43 |       "source": "import numpy as np\nfrom sklearn import linear_model\n\n# Here we build a logistic regression model\n\n###\n# REPLACE <buildLinearRegression> BELOW WITH linear_model.LogisticRegression() TO BUILD A LOGISTIC REGRESSION MODEL\n###\nclf = <buildLinearRegression>\n###\n\n# This step fits (calculates) the model\n# We are using our feature (x - number of goals scored) and our outcome/label (y - won/lost)\nclf.fit(train_X[:, np.newaxis], train_Y)\n\n# This works out the loss\ndef sigmoid(train_X):\n    return 1 / (1 + np.exp(-train_X))\nX_test = np.linspace(0, 3, 300)\nloss = sigmoid(X_test * clf.coef_ + clf.intercept_).ravel()",
 44 |       "execution_count": null,
 45 |       "outputs": []
 46 |     },
 47 |     {
 48 |       "metadata": {},
 49 |       "cell_type": "markdown",
 50 |       "source": "Alright, that's the model done. Now __run the code__ below to graph it."
 51 |     },
 52 |     {
 53 |       "metadata": {
 54 |         "trusted": true
 55 |       },
 56 |       "cell_type": "code",
 57 |       "source": "# This makes the graph\n# The data points\ngraph.scatter(train_X, train_Y, c = train_Y, marker = 'D')\n# The curve\ngraph.plot(X_test, loss, color = 'gold', linewidth = 3)\n# Define the y-axis\ngraph.yticks([0, 1], ['No = 0.0', 'Yes = 1.0'])\ngraph.ylabel(\"Competition Win Likelihood\")\ngraph.xlabel(\"Average number of goals per match\")\ngraph.show()",
 58 |       "execution_count": null,
 59 |       "outputs": []
 60 |     },
 61 |     {
 62 |       "metadata": {},
 63 |       "cell_type": "markdown",
 64 |       "source": "We now have a line fit to our data. This yellow line is our logistic regression model.\n\nStep 4\n------\n\nWe can read the model above like so:\n* Take the average number of goals per match for the current year. Let's say it is 2.5.\n* Find 2.5 on the x-axis. \n* What value (on the y axis) does the line have at x=2.5?\n* If this value is above 0.5, then the model thinks our team will win this year. If it is less than 0.5, it thinks our team will lose.\n\nBecause this line is just a mathematical function (equation) we don't have to do this visually.\n\nIn the exercise below, __choose the number of goals you want to evaluate__.\n\nThe code will calculate the probability that our team will win with your chosen number of goals in the match.\n\n### In the cell below replace:\n#### 1. `<numberOfGoals>` with the number of goals in a year (any number from 0 to 3)\n#### 2. `<replaceWithP>` with `p`\n#### then __run the code__."
 65 |     },
 66 |     {
 67 |       "metadata": {
 68 |         "trusted": true
 69 |       },
 70 |       "cell_type": "code",
 71 |       "source": "###\n# REPLACE <numberOfGoals> BELOW WITH THE NUMBER OF GOALS IN A MATCH THIS YEAR. USE ANY NUMBER FROM 0 TO 3\n###\np = <numberOfGoals>\n###\n\n# Next we're going to use our model again - clf is the name of our model.\n# We'll use a method to predict the probability of a positive result\n# Use the variable p which we just made in this method.\n\n###\n# REPLACE <replaceWithP> BELOW WITH p TO PREDICT USING THIS VALUE\n###\nprobOfWinning = clf.predict_proba([[ <replaceWithP> ]])[0][1]\n###\n\n# This prints out the result\nprint(\"Probability of winning this year\")\nprint(str(probOfWinning * 100) + \"%\")\n\n# This plots the result\ngraph.scatter(train_X, train_Y, c = train_Y, marker = 'D')\ngraph.yticks([0, probOfWinning, 1], ['No = 0.0', round(probOfWinning,3), 'Yes = 1.0'])\ngraph.plot(X_test, loss, color = 'gold', linewidth = 3)\n\ngraph.plot(p, probOfWinning, 'ko') # result point\ngraph.plot(np.linspace(0, p, 2), np.full([2],probOfWinning), dashes = [6, 3], color = 'black') # dashed lines (to y-axis)\ngraph.plot(np.full([2],p), np.linspace(0, probOfWinning, 2), dashes = [6, 3], color = 'black') # dashed lines (to x-axis)\n\ngraph.ylabel(\"Competition Win Likelihood\")\ngraph.xlabel(\"Average number of goals per match\")\ngraph.show()",
 72 |       "execution_count": null,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "metadata": {},
 77 |       "cell_type": "markdown",
 78 |       "source": "Conclusion\n-----\n\nWell done! We have calculated the likelihood that our team will win this year's competition.\n\nYou can go back to the course now and click __'Next Step'__ "
 79 |     },
 80 |     {
 81 |       "metadata": {},
 82 |       "cell_type": "markdown",
 83 |       "source": "Optional: Step 5\n-----\n\nOf course, these predictions are only one model.\n\nLet's return to what we did in step 3, but we'll replace `linear_model.LogisticRegression()` with `linear_model.LogisticRegression(C=200)`. This will tell the model to make a steeper decision boundary. Then repeat Step 4 with this boundary. Did your results change?\n\nThere are methods we can use to choose sensible parameters for many models. This is currently outside the scope of this course, but it is important to remember that a model is only as good as the data we give it, the parameters we choose, and the assumptions we make.\n\n#### Follow the instructions in the cell below to replace `<numberOfGoals>` and `<buildLinearRegression>` and __run the code__."
 84 |     },
 85 |     {
 86 |       "metadata": {
 87 |         "trusted": true
 88 |       },
 89 |       "cell_type": "code",
 90 |       "source": "# Let's do that again.\n# We will repeat what we did in step 3, but change the decision boundary.\n\nimport numpy as np\nfrom sklearn import linear_model\n\n###\n# REPLACE THE <numberOfGoals> WITH THE NUMBER OF GOALS YOU WANT TO EVALUATE\n###\np = <numberOfGoals>\n###\n\n# Here we build the new logistic regression model.\n# The C=200 is where we change the decision boundary.\n###\n# REPLACE <buildLinearRegression> BELOW WITH linear_model.LogisticRegression(C=200) TO BUILD A LOGISTIC REGRESSION MODEL\n###\nclf = <buildLinearRegression>\n###\n\n# This step fits (calculates) the model\n# We are using our feature (x - number of goals scored) and our outcome/label (y - won/lost)\nclf.fit(train_X[:, np.newaxis], train_Y)\n\n# This works out the loss\ndef sigmoid(train_X):\n    return 1 / (1 + np.exp(-train_X))\nX_test = np.linspace(0, 3, 300)\nloss = sigmoid(X_test * clf.coef_ + clf.intercept_).ravel()\n\n# This makes the prediction for your chosen number of goals.\nprobOfWinning = clf.predict_proba([[p]])[0][1]\n\n# This prints out the result.\nprint(\"Probability of winning this year\")\nprint(str(probOfWinning * 100) + \"%\")\n\n# This plots the result.\ngraph.scatter(train_X, train_Y, c = train_Y, marker = 'D')\ngraph.yticks([0, probOfWinning, 1], ['No = 0.0', round(probOfWinning,3), 'Yes = 1.0'])\ngraph.plot(X_test, loss, color = 'gold', linewidth = 3)\n\ngraph.plot(p, probOfWinning, 'ko') # result point\ngraph.plot(np.linspace(0, p, 2), np.full([2],probOfWinning), dashes = [6, 3], color = 'black') # dashed lines (to y-axis)\ngraph.plot(np.full([2],p), np.linspace(0, probOfWinning, 2), dashes = [6, 3], color = 'black') # dashed lines (to x-axis)\n\ngraph.ylabel(\"Competition Win Likelihood\")\ngraph.xlabel(\"Average number of goals per match\")\ngraph.show()",
 91 |       "execution_count": null,
 92 |       "outputs": []
 93 |     }
 94 |   ],
 95 |   "metadata": {
 96 |     "kernelspec": {
 97 |       "name": "python36",
 98 |       "display_name": "Python 3.6",
 99 |       "language": "python"
100 |     },
101 |     "language_info": {
102 |       "mimetype": "text/x-python",
103 |       "nbconvert_exporter": "python",
104 |       "name": "python",
105 |       "pygments_lexer": "ipython3",
106 |       "version": "3.6.6",
107 |       "file_extension": ".py",
108 |       "codemirror_mode": {
109 |         "version": 3,
110 |         "name": "ipython"
111 |       }
112 |     }
113 |   },
114 |   "nbformat": 4,
115 |   "nbformat_minor": 2
116 | }


--------------------------------------------------------------------------------
/06. Support Vector Machines - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {
  5 |         "collapsed": true
  6 |       },
  7 |       "cell_type": "markdown",
  8 |       "source": "Exercise 6 - Support Vector Machines\n=====\n\nSupport vector machines (SVMs) let us predict categories. This exercise will demonstrate a simple support vector machine that can predict a category from a small number of features. \n\nOur problem is that we want to be able to categorise which type of tree an new specimen belongs to. To do this, we will use features of three different types of trees to train an SVM. \n\n__Run the code__ in the cell below."
  9 |     },
 10 |     {
 11 |       "metadata": {
 12 |         "trusted": true
 13 |       },
 14 |       "cell_type": "code",
 15 |       "source": "# Run this code!\n# It sets up the graphing configuration.\nimport warnings\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as graph\n%matplotlib inline\ngraph.rcParams['figure.figsize'] = (15,5)\ngraph.rcParams[\"font.family\"] = 'DejaVu Sans'\ngraph.rcParams[\"font.size\"] = '12'\ngraph.rcParams['image.cmap'] = 'rainbow'",
 16 |       "execution_count": null,
 17 |       "outputs": []
 18 |     },
 19 |     {
 20 |       "metadata": {},
 21 |       "cell_type": "markdown",
 22 |       "source": "Step 1\n-----\n\nFirst, we will take a look at the raw data first to see what features we have.\n\n#### Replace `<printDataHere>` with `print(dataset.head())` and then __run the code__."
 23 |     },
 24 |     {
 25 |       "metadata": {
 26 |         "trusted": true
 27 |       },
 28 |       "cell_type": "code",
 29 |       "source": "import pandas as pd\nimport numpy as np\n\n# Loads the SVM library\nfrom sklearn import svm\n\n# Loads the dataset\ndataset = pd.read_csv('Data/trees.csv')\n\n###\n# REPLACE <printDataHere> with print(dataset.head()) TO PREVIEW THE DATASET\n###\n<printDataHere>\n###",
 30 |       "execution_count": null,
 31 |       "outputs": []
 32 |     },
 33 |     {
 34 |       "metadata": {},
 35 |       "cell_type": "markdown",
 36 |       "source": "It looks like we have _four features_ (leaf_width, leaf_length, trunk_girth, trunk_height) and _one label_ (tree_type).\n\nLet's plot it.\n\n__Run the code__ in the cell below."
 37 |     },
 38 |     {
 39 |       "metadata": {
 40 |         "trusted": true
 41 |       },
 42 |       "cell_type": "code",
 43 |       "source": "# Run this code to plot the leaf features\n\n# This extracts the features. drop() deletes the column we state (tree_type), leaving on the features\nallFeatures = dataset.drop(['tree_type'], axis = 1)\n\n# This keeps only the column we state (tree_type), leaving only our label\nlabels = np.array(dataset['tree_type'])\n\n#Plots the graph\nX = allFeatures['leaf_width']\nY = allFeatures['leaf_length']\ncolor=labels\ngraph.scatter(X, Y, c = color)\ngraph.title('classification plot for leaf features')\ngraph.xlabel('leaf width')\ngraph.ylabel('leaf length')\ngraph.legend()\ngraph.show()",
 44 |       "execution_count": null,
 45 |       "outputs": []
 46 |     },
 47 |     {
 48 |       "metadata": {},
 49 |       "cell_type": "markdown",
 50 |       "source": "__Run the code__ in the cell below to plot the trunk features"
 51 |     },
 52 |     {
 53 |       "metadata": {
 54 |         "trusted": true
 55 |       },
 56 |       "cell_type": "code",
 57 |       "source": "# Run this code to plot the trunk features\ngraph.scatter(allFeatures['trunk_girth'], allFeatures['trunk_height'], c = labels)\ngraph.title('Classification plot for trunk features')\ngraph.xlabel('trunk girth')\ngraph.ylabel('trunk height')\ngraph.show()",
 58 |       "execution_count": null,
 59 |       "outputs": []
 60 |     },
 61 |     {
 62 |       "metadata": {},
 63 |       "cell_type": "markdown",
 64 |       "source": "Step 2\n-----\n\nLets make a support vector machine.\n\nThe syntax for a support vector machine is as follows:\n\n__`model = svm.SVC().fit(features, labels)`__\n\nYour features set will be called __`train_X`__ and your labels set will be called __`train_Y`__\n\n#### Let's first run the SVM in the cell below using the first two features, the leaf features."
 65 |     },
 66 |     {
 67 |       "metadata": {
 68 |         "trusted": true
 69 |       },
 70 |       "cell_type": "code",
 71 |       "source": "# Sets up the feature and target sets for leaf features\n\n# Feature 1\nfeature_one = allFeatures['leaf_width'].values\n\n# Feature 2\nfeature_two = allFeatures['leaf_length'].values\n\n# Features\ntrain_X = np.asarray([feature_one, feature_two]).transpose()\n\n# Labels\ntrain_Y = labels \n\n# Fits the SVM model\n###\n# REPLACE THE <makeSVM> WITH THE CODE TO MAKE A SVM MODEL AS ABOVE\n###\nmodel = <makeSVM>\n###\nprint(\"Model ready. Now plot it to see the result.\")",
 72 |       "execution_count": null,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "metadata": {},
 77 |       "cell_type": "markdown",
 78 |       "source": "#### Let's plot it! Run the cell below to visualise the SVM with our dataset."
 79 |     },
 80 |     {
 81 |       "metadata": {
 82 |         "trusted": true
 83 |       },
 84 |       "cell_type": "code",
 85 |       "source": "# Run this to plots the SVM model\nX_min, X_max = train_X[:, 0].min() - 1, train_X[:, 0].max() + 1\nY_min, Y_max = train_X[:, 1].min() - 1, train_X[:, 1].max() + 1\n\nXX, YY = np.meshgrid(np.arange(X_min, X_max, .02), np.arange(Y_min, Y_max, .02))\nZ = model.predict(np.c_[XX.ravel(), YY.ravel()]).reshape(XX.shape)\n\ngraph.scatter(feature_one, feature_two, c = train_Y, cmap = graph.cm.rainbow, zorder = 10, edgecolor = 'k', s = 40)\ngraph.contourf(XX, YY, Z, cmap = graph.cm.rainbow, alpha = 1.0)\ngraph.contour(XX, YY, Z, colors = 'k', linestyles = '--', alpha=0.5)\n\ngraph.title('SVM plot for leaf features')\ngraph.xlabel('leaf width')\ngraph.ylabel('leaf length')\n\ngraph.show()",
 86 |       "execution_count": null,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "metadata": {},
 91 |       "cell_type": "markdown",
 92 |       "source": "The graph shows three colored zones that the SVM has chosen to group the datapoints in. Color, here, means type of tree. As we can see, the zones correspond reasonably well with the actual tree types of our training data. This means that the SVM can group, for its training data, quite well calculate tree type based on leaf features.\n\n\nNow let's do the same using trunk features.\n\n### In the cell below replace:\n#### 1. `<addTrunkGirth>` with `'trunk_girth'`\n#### 2. `<addTrunkHeight>` with `'trunk_height'`\n#### Then __run the code__."
 93 |     },
 94 |     {
 95 |       "metadata": {
 96 |         "trusted": true
 97 |       },
 98 |       "cell_type": "code",
 99 |       "source": "# Feature 1\n###--- REPLACE THE <addTrunkGirth> BELOW WITH 'trunk_girth' (INCLUDING THE QUOTES) ---###\n###\ntrunk_girth = allFeatures[<addTrunkGirth>].values\n###\n\n# Feature 2\n###--- REPLACE THE <addTrunkHeight> BELOW WITH 'trunk_height' (INCLUDING THE QUOTES) ---###\ntrunk_height = allFeatures[<addTrunkHeight>].values\n###\n\n# Features\ntrunk_features = np.asarray([trunk_girth, trunk_height]).transpose()\n\n# Fits the SVM model\nmodel = svm.SVC().fit(trunk_features, train_Y)\n\n# Plots the SVM model\nX_min, X_max = trunk_features[:, 0].min() - 1, trunk_features[:, 0].max() + 1\nY_min, Y_max = trunk_features[:, 1].min() - 1, trunk_features[:, 1].max() + 1\n\nXX, YY = np.meshgrid(np.arange(X_min, X_max, .02), np.arange(Y_min, Y_max, .02))\nZ = model.predict(np.c_[XX.ravel(), YY.ravel()]).reshape(XX.shape)\n\ngraph.scatter(trunk_girth, trunk_height, c = train_Y, cmap = graph.cm.rainbow, zorder = 10, edgecolor = 'k', s = 40)\ngraph.contourf(XX, YY, Z, cmap = graph.cm.rainbow, alpha = 1.0)\ngraph.contour(XX, YY, Z, colors = 'k', linestyles = '--', alpha = 0.5)\n\ngraph.title('SVM plot for leaf features')\ngraph.xlabel('trunk girth')\ngraph.ylabel('trunk height')\n\ngraph.show()",
100 |       "execution_count": null,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "metadata": {},
105 |       "cell_type": "markdown",
106 |       "source": "Conclusion\n-------\n\nAnd that's it! You've made a simple support vector machine that can predict the type of tree based on the leaf and trunk measurements!\n\nYou can go back to the course now and click __'Next Step'__ to move onto how we can test AI models."
107 |     }
108 |   ],
109 |   "metadata": {
110 |     "kernelspec": {
111 |       "name": "python36",
112 |       "display_name": "Python 3.6",
113 |       "language": "python"
114 |     },
115 |     "language_info": {
116 |       "mimetype": "text/x-python",
117 |       "nbconvert_exporter": "python",
118 |       "name": "python",
119 |       "pygments_lexer": "ipython3",
120 |       "version": "3.6.6",
121 |       "file_extension": ".py",
122 |       "codemirror_mode": {
123 |         "version": 3,
124 |         "name": "ipython"
125 |       }
126 |     }
127 |   },
128 |   "nbformat": 4,
129 |   "nbformat_minor": 2
130 | }


--------------------------------------------------------------------------------
/07. Advanced SVMs - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {
  5 |         "collapsed": true
  6 |       },
  7 |       "cell_type": "markdown",
  8 |       "source": "Exercise 7 - Advanced Support Vector Machines\n=====\n\nSupport vector machines let us predict catergories. In this example we will be looking at practically using SVMs by formatting data correctly, visualising the SVM model and then evaluating the SVM model.\n\nWe will be looking at __prions__ - misfolded proteins that are associated with several fatal neurodegenerative diseases (kind of like Daleks, if you have seen Doctor Who). Looking at examples of proteins mass and weight, we will build a predictive model to detect prions in blood samples.\n\n#### Run the code below to set up the graphing features for this notebook."
  9 |     },
 10 |     {
 11 |       "metadata": {
 12 |         "trusted": true
 13 |       },
 14 |       "cell_type": "code",
 15 |       "source": "# Run this code!\n# It sets up the graphing configuration\nimport warnings\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as graph\n%matplotlib inline\ngraph.rcParams['figure.figsize'] = (15,5)\ngraph.rcParams[\"font.family\"] = 'DejaVu Sans'\ngraph.rcParams[\"font.size\"] = '12'\ngraph.rcParams['image.cmap'] = 'rainbow'",
 16 |       "execution_count": null,
 17 |       "outputs": []
 18 |     },
 19 |     {
 20 |       "metadata": {},
 21 |       "cell_type": "markdown",
 22 |       "source": "Step 1\n-----\n\nLets load up the data first, and save it temporarily as rawData. Our dataset is called \"PrionData.csv\".\n\n#### Replace `<addPathToData>`  with `'Data/PrionData.csv'` and then __Run the code__."
 23 |     },
 24 |     {
 25 |       "metadata": {
 26 |         "trusted": true
 27 |       },
 28 |       "cell_type": "code",
 29 |       "source": "import pandas as pd\nimport numpy as np\n\n###\n# REPLACE <addPathToData> BELOW WITH 'Data/PrionData.csv' (INCLUDING THE QUOTES) TO LOAD THE DATA FROM THAT FILE \n###\nrawData = pd.read_csv(<addPathToData>)\n###",
 30 |       "execution_count": null,
 31 |       "outputs": []
 32 |     },
 33 |     {
 34 |       "metadata": {},
 35 |       "cell_type": "markdown",
 36 |       "source": "Step 2\n-----\n\nLets take a look at the data.\n\n#### In the cell below replace the text `<printDataHere>` with `print(rawData.head())` and then __Run the code__."
 37 |     },
 38 |     {
 39 |       "metadata": {
 40 |         "trusted": true
 41 |       },
 42 |       "cell_type": "code",
 43 |       "source": "###\n# REPLACE <printDataHere> with print(rawData.head()) TO VIEW THE TOP 5 DATA POINTS OF THE DATA SET\n###\n<printDataHere>\n###",
 44 |       "execution_count": null,
 45 |       "outputs": []
 46 |     },
 47 |     {
 48 |       "metadata": {},
 49 |       "cell_type": "markdown",
 50 |       "source": "Looks like we have an extra column, this happens regularly when exporting data sets from a program like Excel and then importing them into a dataframe.\n\nStep 3\n-----\n\nLets get rid of that extra column, and then check that it's gone.\n\n#### __Run the code__ below."
 51 |     },
 52 |     {
 53 |       "metadata": {
 54 |         "trusted": true
 55 |       },
 56 |       "cell_type": "code",
 57 |       "source": "# Run this box to remove the extra column.\ndataset = rawData.drop(['Unnamed: 0'], axis = 1)\nprint(dataset.head())",
 58 |       "execution_count": null,
 59 |       "outputs": []
 60 |     },
 61 |     {
 62 |       "metadata": {},
 63 |       "cell_type": "markdown",
 64 |       "source": "All gone!\n\nStep 4\n-----\n\nLet's graph the data set to better understand what we're working with.\n\nLooking at the output of the last step we can see the 'categories' we're looking at is called __prion_status__ (the label).\n\n### In the cell below replace:\n#### 1. `<addMass>` with `'mass'`\n#### 2. `<addWeight>` with `'weight'`\n#### then __run the code__.\n"
 65 |     },
 66 |     {
 67 |       "metadata": {
 68 |         "trusted": true
 69 |       },
 70 |       "cell_type": "code",
 71 |       "source": "###\n# REPLACE THE <addMass> BELOW WITH 'mass' (INCLUDING THE QUOTES)\n###\nX = dataset[<addMass>]\n###\n\n##\n# REPLACE THE <addWeight> BELOW WITH 'weight' (INCLUDING THE QUOTES)\n###\nY = dataset[<addWeight>]\n###\n\n# This makes a list that says which items are prions and which are not\ntarget = dataset['prion_status'] == 'prion'\n\ngraph.scatter(X, Y, c = target, zorder = 10, s = 40)\n\ngraph.title(\"Classification plot for prion data\")\ngraph.ylabel(\"Mass\")\ngraph.xlabel(\"Weight\")\n\ngraph.show()",
 72 |       "execution_count": null,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "metadata": {},
 77 |       "cell_type": "markdown",
 78 |       "source": "Step 5\n-------\n\nLet's split up our data into test and training sets. We'll start by checking the total number of instances in our dataset by using the DataFrame attribute *shape*. The first number is the one we want.\n\n#### In the cell below replace `<addShape>` with `shape` and then __Run the code__."
 79 |     },
 80 |     {
 81 |       "metadata": {
 82 |         "trusted": true
 83 |       },
 84 |       "cell_type": "code",
 85 |       "source": "###\n# REPLACE THE <addShape> BELOW WITH THE NAME OF THE ATTRIBUTE WE WANT TO LOOK AT - shape\n###\ndataset.<addShape>\n###",
 86 |       "execution_count": null,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "metadata": {},
 91 |       "cell_type": "markdown",
 92 |       "source": "Step 6\n-----\n\nStep 5 has told us that we have nearly 500 data points. We'll use 400 examples for our training set, and the remainder for our test set.\n\n#### Replace the `<add400>` below with `400` and run the cell."
 93 |     },
 94 |     {
 95 |       "metadata": {
 96 |         "trusted": true
 97 |       },
 98 |       "cell_type": "code",
 99 |       "source": "# This makes our training set out of the first 400 examples\ntrain_X = dataset.drop(['prion_status'], 1).truncate(after = 399)\ntrain_Y = dataset['prion_status'].truncate(after = 399)\n\n###\n# REPLACE THE <add400> BELOW WITH 400 TO MAKE THE TEST SET OUT OF THE REMAINING EXAMPLES\n###\ntest_X = dataset.drop(['prion_status'], 1).truncate(before = <add400>).reset_index(drop = True)\ntest_Y = dataset['prion_status'].truncate(before = <add400>).reset_index(drop = True)\n###",
100 |       "execution_count": null,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "metadata": {},
105 |       "cell_type": "markdown",
106 |       "source": "Step 7\n-----\n\nWell done! Lets look at a summary of our training data.\n\n#### In the cell below replace `<addDescribe>` with `describe()` then __run the code__."
107 |     },
108 |     {
109 |       "metadata": {
110 |         "trusted": true
111 |       },
112 |       "cell_type": "code",
113 |       "source": "###\n# REPLACE THE <addDescribe> BELOW WITH 'describe()'\n###\nprint(train_X.<addDescribe>)\nprint(train_Y.<addDescribe>)\n###",
114 |       "execution_count": null,
115 |       "outputs": []
116 |     },
117 |     {
118 |       "metadata": {},
119 |       "cell_type": "markdown",
120 |       "source": "314 non-prions out of 400, which means there's 86 prions in there. That looks about right if we refer to the graph we made in Step 4.\n\nLet's take a look at our test set too.\n\n#### Use the `describe()` function again, this time looking at __test__ instead of train."
121 |     },
122 |     {
123 |       "metadata": {
124 |         "trusted": true
125 |       },
126 |       "cell_type": "code",
127 |       "source": "###\n# REPLACE THE <addDescribe> BELOW WITH describe()\n###\nprint(test_X.<addDescribe>)\nprint(test_Y.<addDescribe>)\n###",
128 |       "execution_count": null,
129 |       "outputs": []
130 |     },
131 |     {
132 |       "metadata": {},
133 |       "cell_type": "markdown",
134 |       "source": "Looks good to me! Alright, enough of that - lets make an SVM.\n\nStep 8\n-----\n\nBelow we will make an SVM, similar to the previous exercise.\n\nRemember, the syntax for SVM's is:\n\n`SVM_Model = svm.SVC().fit(features, labels)`\n\n### In the cell below replace:\n#### 1. `<addFeatures>` with `train_X`\n#### 2. `<addLabels>` with `train_Y`\n#### and then __run the code__."
135 |     },
136 |     {
137 |       "metadata": {
138 |         "trusted": true
139 |       },
140 |       "cell_type": "code",
141 |       "source": "from sklearn import svm\n\n###\n# REPLACE <addFeatures> WITH train_X and <addLabels> WITH train_Y\n###\nSVM_Model = svm.SVC(gamma = 'auto').fit(<addFeatures>, <addLabels>)\n###\nprint(\"done!\")",
142 |       "execution_count": null,
143 |       "outputs": []
144 |     },
145 |     {
146 |       "metadata": {},
147 |       "cell_type": "markdown",
148 |       "source": "Well done! We've made a SVM Model from our training set.\n\nStep 9\n-----\n\nLets use our model to make some predictions. __Run the code__ in the cell below."
149 |     },
150 |     {
151 |       "metadata": {
152 |         "trusted": true
153 |       },
154 |       "cell_type": "code",
155 |       "source": "# Don't edit this! Just hit run to plot the graph\n\n\n#This makes a plot of our SVM\ndef plot_SVM(clf, data, target):\n    #Make a list of which are prions\n    is_prion = target == 'prion'\n\n    graph.scatter(data['mass'], data['weight'], c = is_prion, zorder = 10, edgecolor = 'k', s = 40)\n    \n    # Put the result into a colour plot\n    XX, YY = np.mgrid[0:1:255j, 0:1:255j]\n    Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()]).reshape(XX.shape)\n    graph.pcolormesh(XX, YY, Z > 0)\n    graph.contour(XX, YY, Z, colors = ['k', 'k', 'k'], linestyles = ['--', '-', '--'], levels = [-.5, 0, .5])\n    \n    graph.ylim(0, 1)\n    graph.xlim(0, 1)\n    \n    graph.show()\n\n#Call the code to plot our SVM\nplot_SVM(SVM_Model, train_X, train_Y)",
156 |       "execution_count": null,
157 |       "outputs": []
158 |     },
159 |     {
160 |       "metadata": {},
161 |       "cell_type": "markdown",
162 |       "source": "Step 10\n-------\n\nThe SVM has done a reasonable job of separating our test dataset into two. Now lets take a look at our test set.\n\nRemember our syntax for plotting SVM's is: `plot_SVM(SVM_Model, features, labels)`\n\nAdd our __test__ set below to see how it looks.\n\n### In the cell below replace:\n#### 1. `<addTestX>` with `test_X`\n#### 2. `<addTestY>` with `test_Y`\n#### and then __run the code__."
163 |     },
164 |     {
165 |       "metadata": {
166 |         "trusted": true
167 |       },
168 |       "cell_type": "code",
169 |       "source": "###\n# REPLACE <addTestX> WITH test_X AND <addTestY> WITH test_Y\n###\nplot_SVM(SVM_Model, <addTestX>, <addTestY>)\n###",
170 |       "execution_count": null,
171 |       "outputs": []
172 |     },
173 |     {
174 |       "metadata": {},
175 |       "cell_type": "markdown",
176 |       "source": "Step 11\n-----\n\nGraphing is a good way to see how our model has done, but sometimes numbers can be better. Lets calculate the accuracy of our SVM in each dataset.\n\n### In the cell below replace:\n#### 1. `<addTrainX>` with `train_X`\n#### 2. `<addTestX>` with `test_X`\n#### 3. `<addTrainY>` with `train_Y`\n#### 4. `<addTestY>` with `test_Y`\n#### and then __run the code__."
177 |     },
178 |     {
179 |       "metadata": {
180 |         "trusted": true
181 |       },
182 |       "cell_type": "code",
183 |       "source": "###\n# REPLACE <addTrainX> WITH train_X AND <addTestX> with test_X FEATURE SETS TO GENERATE THE PREDICTIONS\n###\ntrain_P = SVM_Model.predict(<addTrainX>.values)\ntest_P = SVM_Model.predict(<addTestX>.values)\n###\n\n# This function evaluates the SVM's accuracy\ndef evaluate_SVM(pred, real, name):\n    matches = pred == real #see where predicted and real are the same\n    accuracy = sum(matches)/len(matches)*100 #convert to percent\n    print(name, \"Set Accuracy:\", accuracy, \"%\") \n\n\n###\n# REPLACE <addTrainY> WITH train_Y AND <addTestY> with test_Y\n###\nevaluate_SVM(train_P, <addTrainY>, 'Train')\nevaluate_SVM(test_P, <addTestY>, 'Test')\n###",
184 |       "execution_count": null,
185 |       "outputs": []
186 |     },
187 |     {
188 |       "metadata": {},
189 |       "cell_type": "markdown",
190 |       "source": "That's a good result. \n\nConclusion\n------\n\nWell done! We've taken a data set, cleaned and prepared it, made a SVM, and then evaluated it. Well done!\n\nYou can go back to the course now, or you can try using different kernels with your SVM below.\n\nOPTIONAL: Step 12\n-----\n\nWant to have a play around with different kernels for your SVM models? It's really easy!\n\nThe standard kernel is a Radial Basis Function kernel. But there's a few more you can choose from - linear (`linear`), polynomial (`poly`), and sigmoid (`sigmoid`). Lets try them out.\n\nIf you wanted to use a linear kernel, all you need to do is add `kernel='linear'` to your model. Like this:\n\n`SVM_Model = svm.SVC(kernel='linear')`\n\nGive it a go with all the different kernels below. The first one is done for you\n\n#### Run the cell below"
191 |     },
192 |     {
193 |       "metadata": {
194 |         "trusted": true
195 |       },
196 |       "cell_type": "code",
197 |       "source": "def assess_SVM(SVM_Model):\n    # Plot the new linear SVM model\n    plot_SVM(SVM_Model, train_X, train_Y)\n    plot_SVM(SVM_Model, test_X, test_Y)\n\n    # Use the model to predict the training and test sets.\n    train_P = SVM_Model.predict(train_X.values)\n    test_P = SVM_Model.predict(test_X.values)\n\n    # Evaluate the model using the training and test sets\n    evaluate_SVM(train_P, train_Y, 'Train')\n    evaluate_SVM(test_P, test_Y, 'Test')\n\n# Make a new linear SVM model\nSVM_Model = svm.SVC(kernel = 'linear').fit(train_X, train_Y)\n\nassess_SVM(SVM_Model)",
198 |       "execution_count": null,
199 |       "outputs": []
200 |     },
201 |     {
202 |       "metadata": {},
203 |       "cell_type": "markdown",
204 |       "source": "You can see the hyperplane is a linear line!\n\nNow lets try a sigmoid kernel.\n\n#### Replace `<replaceThis>` with `'sigmoid'` then run the cell."
205 |     },
206 |     {
207 |       "metadata": {
208 |         "trusted": true
209 |       },
210 |       "cell_type": "code",
211 |       "source": "# Make a new sigmoid SVM model\n\n###\n# REPLACE THE <replaceThis> BELOW WITH 'sigmoid' (INCLUDING THE QUOTES)\n###\nSVM_Model = svm.SVC(kernel = <replaceThis>, gamma = 4, coef0 = 0).fit(train_X, train_Y)\n###\nassess_SVM(SVM_Model)",
212 |       "execution_count": null,
213 |       "outputs": []
214 |     },
215 |     {
216 |       "metadata": {},
217 |       "cell_type": "markdown",
218 |       "source": "Perhaps a sigmoid kernel isn't a good idea for this data set....\n\nLets try a polynomial kernel\n\n#### Replace `<replaceWithPoly>` with `'polynomial'` then run the cell."
219 |     },
220 |     {
221 |       "metadata": {
222 |         "trusted": true
223 |       },
224 |       "cell_type": "code",
225 |       "source": "# Make a new polynomial SVM model\n\n###\n# REPLACE THE <replaceWithPoly> BELOW WITH 'poly' (INCLUDING THE QUOTES)\n###\nSVM_Model = svm.SVC(kernel = <replaceWithPoly>, gamma = 10, degree = 3, coef0 = 0).fit(train_X, train_Y)\n###\n\nassess_SVM(SVM_Model)",
226 |       "execution_count": null,
227 |       "outputs": []
228 |     },
229 |     {
230 |       "metadata": {},
231 |       "cell_type": "markdown",
232 |       "source": "If we were to carry on analyzing prions like this, polynomial looks like a good choice. If the data set was more complicated we could try different degrees for the polynomial to see which one was the most accurate. This is part of __`tuning`__ a model.\n\nWell done!"
233 |     }
234 |   ],
235 |   "metadata": {
236 |     "kernelspec": {
237 |       "name": "python36",
238 |       "display_name": "Python 3.6",
239 |       "language": "python"
240 |     },
241 |     "language_info": {
242 |       "mimetype": "text/x-python",
243 |       "nbconvert_exporter": "python",
244 |       "name": "python",
245 |       "pygments_lexer": "ipython3",
246 |       "version": "3.6.6",
247 |       "file_extension": ".py",
248 |       "codemirror_mode": {
249 |         "version": 3,
250 |         "name": "ipython"
251 |       }
252 |     }
253 |   },
254 |   "nbformat": 4,
255 |   "nbformat_minor": 2
256 | }


--------------------------------------------------------------------------------
/08. Neural Networks Introduction - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {
  5 |         "collapsed": true
  6 |       },
  7 |       "cell_type": "markdown",
  8 |       "source": "Exercise 8 - Introduction to Neural Networks\n=======\n\nOriginally hypothesised in the 1940s, neural networks are now one of the main tools used in modern AI. Neural networks can be used for both regression and categorisation applications. Recent advances with storage, processing power, and open-source tools have allowed many successful applications of neural networks in medical diagnosis, filtering explicit content, speech recognition and machine translation.\n\nIn this exercise we will compare three dog breeds, using their age, weight, and height. We will make a neural network model to classify the breeds of the dogs based on these features.\n\nNote: It's extremely common for AI practitioners to use a template such as the one below for making neural networks quickly. After you are done, feel free to play around with the template to get a feel of how you can easily adjust a neural network to your problems using Keras.\n\n__Run the code__ in the cell below."
  9 |     },
 10 |     {
 11 |       "metadata": {
 12 |         "trusted": true
 13 |       },
 14 |       "cell_type": "code",
 15 |       "source": "# Run this!\nimport warnings\nwarnings.filterwarnings(\"ignore\")\nimport tensorflow as tf\nimport keras\nprint('keras using %s backend'%keras.backend.backend())\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as graph\n%matplotlib inline\ngraph.rcParams['figure.figsize'] = (15,5)\ngraph.rcParams[\"font.family\"] = 'DejaVu Sans'\ngraph.rcParams[\"font.size\"] = '12'\ngraph.rcParams['image.cmap'] = 'rainbow'",
 16 |       "execution_count": null,
 17 |       "outputs": []
 18 |     },
 19 |     {
 20 |       "metadata": {},
 21 |       "cell_type": "markdown",
 22 |       "source": "Step 1\n------\n\nLet's start by opening up our data and having a look at it.\n\n#### In the cell below replace the `<printDataHere>` with `print(dataset.head())` and then __Run the code__ in the cell below."
 23 |     },
 24 |     {
 25 |       "metadata": {
 26 |         "trusted": true
 27 |       },
 28 |       "cell_type": "code",
 29 |       "source": "# This loads the dataset\ndataset = pd.read_csv('Data/dog_data.csv')\n\n###\n# REPLACE <printDataHere> with print(dataset.head()) TO PREVIEW OUR DATASET\n###\n<printDataHere>\n###\n\n# This tells us the shape of the data set\nprint(\"Shape of data set:\", dataset.shape)\n\n# Defines the feature dataframe\nfeatures = dataset.drop(['breed'], axis = 1)",
 30 |       "execution_count": null,
 31 |       "outputs": []
 32 |     },
 33 |     {
 34 |       "metadata": {},
 35 |       "cell_type": "markdown",
 36 |       "source": "So we've got data from __200 dogs__. The features are their __age__, __weight__, and __height__.\n\nStep 2\n------\n\nOur labels are three breeds of dogs, represented as numbers in our dataset, as `0`, `1`, and `2`. \n\nFor a neural network these numbers are misleading, as they might imply that breed `1` is closer to breed `2` than breed `0` is, in some way. But that is not the case here.\n\nTo allow the neural network to predict categories properly we represent categories as one-hot vectors. The labels (dog breeds) will go from being represented as `0`, `1`, and `2` to this:\n\n| breed 0 | breed 1 | breed 2 |\n|:------- |:------- |:------- |\n| `1 0 0` | `0 1 0` | `0 0 1` |\n\nSo the if the 1 is in the first position, the neural network knows that it's breed 0.\n\nIf the 1 is in the second position, the neural network knows that it's breed 1, and so on.\n\n#### Replace `<addLabels>` with `labels` and __Run the code__."
 37 |     },
 38 |     {
 39 |       "metadata": {
 40 |         "trusted": true
 41 |       },
 42 |       "cell_type": "code",
 43 |       "source": "from sklearn.preprocessing import OneHotEncoder\n\n# This sets the  labels (numerical)\nlabels = np.array(dataset['breed'])\n\n###\n# REPLACE THE <addLabels> BELOW WITH labels\n###\nonehot = OneHotEncoder(sparse = False).fit_transform(np.transpose([<addLabels>]))\n###\n\nprint(onehot[:5])",
 44 |       "execution_count": null,
 45 |       "outputs": []
 46 |     },
 47 |     {
 48 |       "metadata": {},
 49 |       "cell_type": "markdown",
 50 |       "source": "There we go!\n\nStep 3\n-------\n\nBefore we make our model, let's get our test set and training set ready.\n\nWe've got data on 200 dogs, so we'll use 160 for a training set, and 40 for our test set.\n\n#### Run the code below to set up our training at test sets."
 51 |     },
 52 |     {
 53 |       "metadata": {
 54 |         "trusted": true
 55 |       },
 56 |       "cell_type": "code",
 57 |       "source": "# Run this! This sets up our training and test sets.\n\n# This takes the first 160 examples for our training set\ntrain_X = features.values[:160]\ntrain_Y = onehot[:160]\n\n# This takes the last 40 examples of the 200 for our test set\ntest_X = features.values[160:]\ntest_Y = onehot[160:]",
 58 |       "execution_count": null,
 59 |       "outputs": []
 60 |     },
 61 |     {
 62 |       "metadata": {},
 63 |       "cell_type": "markdown",
 64 |       "source": "## Step 4\n\nThat's our data ready. Now it's time to make your first neural network model!\n\nThis is the standard syntax for a model in Keras. You can always play around with adding in extra hidden layers and changing their size and activation functions later.\n\nOur **first layer** is our **input layer**, with **3 nodes** because we have three features.\n\nOur __second layer__ is our 1st hidden layer, so let's try **4 nodes** for it.\n\nOur __third layer__ is our second hidden layer, let's try **2 nodes** for it.\n\nOur **final layer** will be the **output layer**, in which we have **3 nodes**, one for each of the dog breeds.\n\n### In the cell below replace:\n#### 1. `<addSequential>` with `Sequential()`\n#### 2. `<inputNodes>` with `3`\n#### 3. `<hiddenNotes1>` with `4`\n#### 4. `<hiddenNotes2>` with `2`\n#### 5. `<outputNotes>` with `3`\n\n#### and then __run the code__."
 65 |     },
 66 |     {
 67 |       "metadata": {
 68 |         "trusted": true
 69 |       },
 70 |       "cell_type": "code",
 71 |       "source": "# Set a randomisation seed for replicatability.\nnp.random.seed(6)\n\n\n# This creates our base model for us to add to\n###\n# REPLACE THE <addSequential> BELOW WITH Sequential()\n###\nmodel = keras.models.<addSequential>\n###\n\n###\n# REPLACE THE <inputNodes>, <hiddenNotes1>, <hiddenNotes2>, <outputNotes> BELOW WITH THE APPROPRIATE NUMBERS OF NODES AS DESCRIBED ABOVE\n###\nstructure = [<inputNodes>, <hiddenNotes1>, <hiddenNotes2>, <outputNotes>]\n###\n\n# Input layer + hidden layer 1\nmodel.add(keras.layers.Dense(units=structure[1], input_dim = structure[0], activation = 'relu'))\n\n# Hidden layer 2\nmodel.add(keras.layers.Dense(units=structure[2], activation = 'relu'))\n\n# Output layer - note that the activation function is softmax\n# Softmax will predict a category and provide a value for how likely this is the correct prediction.\nmodel.add(keras.layers.Dense(units=structure[3], activation = tf.nn.softmax))\n\nprint(\"Layer structure:\", structure)",
 72 |       "execution_count": null,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "metadata": {},
 77 |       "cell_type": "markdown",
 78 |       "source": "Expected output:  \n`Layer structure: [3, 4, 2, 3]`\n\nAlright, that's your first model ready.\n\n('tanh' is another common activation function if you want to try it instead of relu, but it doesn't perform very well here)\n\nStep 5\n-------\n\nNext up we'll compile it and see how it runs.\n\nThere's a few parameters you can chose that change how the model trains, and end up changing how the model performs.\n\nWe will use some standard parameters for now. Feel free to experiment with some different parameters later on.\n\nIf this doesn't work, check your input the correct size for the input and output layers in step 4 (3 nodes each).\n\n### In the cell below replace:\n#### 1. `<addLoss>` with `'categorical_crossentropy'`\n#### 2. `<addOptimizer>` with `sgd'`\n#### 3. `<addMetric>` with `'accuracy'`\n#### 4. `<addTrainX>` with `train_X`\n#### 5. `<addTrainY>` with `train_Y`\n\n#### and then __run the code__."
 79 |     },
 80 |     {
 81 |       "metadata": {
 82 |         "trusted": true
 83 |       },
 84 |       "cell_type": "code",
 85 |       "source": "# Let's compile the model\n\n###\n# REPLACE THE <addLoss> WITH 'categorical_crossentropy', <addOptimizer> WITH 'sgd', AND <addMetric> with 'accuracy' (INCLUDING THE QUOTES)\n###\nmodel.compile(loss = <addLoss>, optimizer = <addOptimizer>, metrics = [<addMetric>])\n###\n\n# Time to fit the model\nprint('Starting training')\n\n###\n# REPLACE THE <addTrainX> WITH train_X AND <addTrainY> WITH train_Y\n###\ntraining_stats = model.fit(<addTrainX>, <addTrainY>, batch_size = 1, epochs = 24, verbose = 0)\n###\n\nprint('Training finished')\nprint('Training Evaluation: loss = %0.3f, accuracy = %0.2f%%'\n      %(training_stats.history['loss'][-1], 100 * training_stats.history['acc'][-1]))",
 86 |       "execution_count": null,
 87 |       "outputs": []
 88 |     },
 89 |     {
 90 |       "metadata": {},
 91 |       "cell_type": "markdown",
 92 |       "source": "See? Neural networks aren't too hard.\n\n`'adam'` is another popular optimizer if you want to try it instead of `'sgd'`\n\n#### Lets plot it! Run the cell below."
 93 |     },
 94 |     {
 95 |       "metadata": {
 96 |         "trusted": true
 97 |       },
 98 |       "cell_type": "code",
 99 |       "source": "# Run this!\n\naccuracy, = graph.plot(training_stats.history['acc'],label = 'Accuracy')\ntraining_loss, = graph.plot(training_stats.history['loss'],label = 'Training Loss')\n\n\ngraph.legend(handles = [accuracy,training_loss])\nloss = np.array(training_stats.history['loss'])\nxp = np.linspace(0, loss.shape[0], 10 * loss.shape[0])\ngraph.plot(xp, np.full(xp.shape, 1), c = 'k', linestyle = ':', alpha = 0.5)\ngraph.plot(xp, np.full(xp.shape, 0), c = 'k', linestyle = ':', alpha = 0.5)\ngraph.show()",
100 |       "execution_count": null,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "metadata": {},
105 |       "cell_type": "markdown",
106 |       "source": "Step 6\n------\n\nNow that our model is trained and ready, let's see how it performs on our test data!\n\nIt's important to test a model on data that it has never seen before, to make sure it doesn't overfit. Now let's evaluate it against the test set.\n\n### In the cell below replace:\n#### 1. `<addTestX>` with `test_X`\n#### 2. `<addTestY>` with `test_Y`\n#### and then __run the code__."
107 |     },
108 |     {
109 |       "metadata": {
110 |         "trusted": true
111 |       },
112 |       "cell_type": "code",
113 |       "source": "###\n# REPLACE <addTestX> WITH test_X AND <addTestY> with test_Y\n###\nevaluation = model.evaluate(<addTestX>, <addTestY>, verbose=0)\n###\n\nprint('Test Set Evaluation: loss = %0.6f, accuracy = %0.2f' %(evaluation[0], 100*evaluation[1]))",
114 |       "execution_count": null,
115 |       "outputs": []
116 |     },
117 |     {
118 |       "metadata": {},
119 |       "cell_type": "markdown",
120 |       "source": "It seems to be very accurate with the random seed that we set, but let's see how it predicts something completely new and unclassified!\n\nCome up with a brand new sample of the format `[age, weight, height]` to test it with.\n\n#### Replace the `<addNumber>`'s below with any numbers you want."
121 |     },
122 |     {
123 |       "metadata": {
124 |         "trusted": true
125 |       },
126 |       "cell_type": "code",
127 |       "source": "###\n# REPLACE THE <addNumber> BELOW WITH A WHATEVER NUMBERS YOU WANT, e.g. [9, 7, 7]\n###\n# [age, weight, height]\nnew_sample = [<addNumber>, <addNumber>, <addNumber>]\n###",
128 |       "execution_count": null,
129 |       "outputs": []
130 |     },
131 |     {
132 |       "metadata": {},
133 |       "cell_type": "markdown",
134 |       "source": "Let's have a look at where our new sample sits in comparison to our dataset.\n\n#### Replace the `<addNewSample>`'s below with `new_sample`"
135 |     },
136 |     {
137 |       "metadata": {
138 |         "trusted": true
139 |       },
140 |       "cell_type": "code",
141 |       "source": "# Plots out the age-weight relationship\n\n###\n# REPLACE THE <addNewSample> BELOW WITH new_sample\n###\ngraph.plot(<addNewSample>[0], <addNewSample>[1], 'ko', marker='x')\n###\n\ngraph.scatter(train_X[:,0], train_X[:,1], c = labels[:160])\ngraph.title('samples by age and weight')\ngraph.xlabel('age')\ngraph.ylabel('weight')\ngraph.show()\n\n# Plot out the age-height relationship\n\n###\n# REPLACE THE <addNewSample> BELOW WITH new_sample\n###\ngraph.plot(<addNewSample>[0], <addNewSample>[2], 'ko', marker='x')\n###\n\ngraph.scatter(train_X[:,0], train_X[:,2], c = labels[:160])\ngraph.title('samples by age and height')\ngraph.xlabel('age')\ngraph.ylabel('height')\ngraph.show()",
142 |       "execution_count": null,
143 |       "outputs": []
144 |     },
145 |     {
146 |       "metadata": {},
147 |       "cell_type": "markdown",
148 |       "source": "Looks alright? Now let's see what breed of dog the model says it is!\n\n#### Replace `<addNewSample>` with `new_sample`"
149 |     },
150 |     {
151 |       "metadata": {
152 |         "trusted": true
153 |       },
154 |       "cell_type": "code",
155 |       "source": "###\n# REPLACE THE <addNewSample> BELOW WITH new_sample\n###\npredicted = model.predict(np.array([<addNewSample>]))\nprint('Breed prediction for %s:' %(<addNewSample>))\n###\n\nprint(np.around(predicted[0],2))\nprint('Breed %s, with %i%% certainty.' %(np.argmax(predicted), np.round(100 * predicted[:, np.argmax(predicted)][0])))",
156 |       "execution_count": null,
157 |       "outputs": []
158 |     },
159 |     {
160 |       "metadata": {},
161 |       "cell_type": "markdown",
162 |       "source": "Breed `0` should be purple, breed `1` should be green, and breed `2` should be red."
163 |     },
164 |     {
165 |       "metadata": {},
166 |       "cell_type": "markdown",
167 |       "source": "Conclusion\n------\n\nWe've built a simple neural network to help us predict dog breeds! In the next exercise we'll look into neural networks with a bit more depth, and at the factors that influence how well it learns.\n\nIf you want to play around with this neural network and a new data set, just remember to set your input and output sizes correctly."
168 |     }
169 |   ],
170 |   "metadata": {
171 |     "kernelspec": {
172 |       "name": "python36",
173 |       "display_name": "Python 3.6",
174 |       "language": "python"
175 |     },
176 |     "language_info": {
177 |       "mimetype": "text/x-python",
178 |       "nbconvert_exporter": "python",
179 |       "name": "python",
180 |       "pygments_lexer": "ipython3",
181 |       "version": "3.6.6",
182 |       "file_extension": ".py",
183 |       "codemirror_mode": {
184 |         "version": 3,
185 |         "name": "ipython"
186 |       }
187 |     }
188 |   },
189 |   "nbformat": 4,
190 |   "nbformat_minor": 2
191 | }


--------------------------------------------------------------------------------
/09. Neural Networks Advanced - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Exercise 9 - Advanced Neural Networks\n",
  8 |     "==========\n",
  9 |     "\n",
 10 |     "There are many factors that influence how well a neural network might perform. AI practitioners tend to play around with the structure of the hidden layers, the activation functions used, and the optimisation function.\n",
 11 |     "\n",
 12 |     "In this exercise we will look at how changing these parameters impacts the accuracy performance of our network."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "Step 1\n",
 20 |     "------\n",
 21 |     "\n",
 22 |     "In this exercise we will use the same dog dataset as in exercise 8, building on what we learnt before and trying different parameters for a network to try and improve performance.\n",
 23 |     "\n",
 24 |     "Let's start by opening up our data set and setting up our train and test sets.\n",
 25 |     "\n",
 26 |     "#### __Run the code__ below."
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# Run this!\n",
 36 |     "\n",
 37 |     "# Here we set a randomisation seed for replicatability.\n",
 38 |     "import os\n",
 39 |     "os.environ['PYTHONHASHSEED'] = '0'\n",
 40 |     "seed = 6\n",
 41 |     "import random as rn\n",
 42 |     "rn.seed(seed)\n",
 43 |     "import numpy as np\n",
 44 |     "np.random.seed(seed)\n",
 45 |     "\n",
 46 |     "import warnings\n",
 47 |     "warnings.filterwarnings(\"ignore\")\n",
 48 |     "\n",
 49 |     "from keras import backend as K\n",
 50 |     "import keras\n",
 51 |     "\n",
 52 |     "print('keras using %s backend'%keras.backend.backend())\n",
 53 |     "import pandas as pd\n",
 54 |     "from sklearn.preprocessing import OneHotEncoder\n",
 55 |     "# Sets up the graphing configuration\n",
 56 |     "import matplotlib.pyplot as graph\n",
 57 |     "%matplotlib inline\n",
 58 |     "graph.rcParams['figure.figsize'] = (15,5)\n",
 59 |     "graph.rcParams[\"font.family\"] = 'DejaVu Sans'\n",
 60 |     "graph.rcParams[\"font.size\"] = '12'\n",
 61 |     "graph.rcParams['image.cmap'] = 'rainbow'"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# Run this too!\n",
 71 |     "# This gets our data ready\n",
 72 |     "\n",
 73 |     "# Load the data\n",
 74 |     "dataset = pd.read_csv('Data/dog_data.csv')\n",
 75 |     "\n",
 76 |     "# Separate out the features\n",
 77 |     "features = dataset.drop(['breed'], axis = 1)\n",
 78 |     "\n",
 79 |     "# Sets the target one-hot vectors\n",
 80 |     "target = OneHotEncoder(sparse = False).fit_transform(np.transpose([dataset['breed']]))\n",
 81 |     "\n",
 82 |     "# Take the first 4/5 of the data and assign it to training\n",
 83 |     "train_X = features.values[:160]\n",
 84 |     "train_Y = target[:160]\n",
 85 |     "\n",
 86 |     "# Take the last 1/5 of the data and assign it to testing\n",
 87 |     "test_X = features.values[160:]\n",
 88 |     "test_Y = target[160:]"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "Step 2\n",
 96 |     "------\n",
 97 |     "\n",
 98 |     "The box below contains methods to help us quickly change the structure. Don't edit them - just run the box.\n",
 99 |     "\n",
100 |     "The __train_network__ method allows us to change:\n",
101 |     "* the number of layers\n",
102 |     "* the activation functions the layers use\n",
103 |     "* the optimizer of the model\n",
104 |     "* the number of training cycles for the model (__epochs__)\n",
105 |     "\n",
106 |     "The plot_acc and bar_acc just plot our models so we can easily see how well they do.\n",
107 |     "\n",
108 |     "Don't worry about the code - it is simply to make the next steps easier.\n",
109 |     "\n",
110 |     "#### __Run the code__ below."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 1,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "# Run this!\n",
120 |     "# Below are a few helper methods. Do not edit these.\n",
121 |     "\n",
122 |     "def train_network(structure, activation, optimizer, epochs):\n",
123 |     "    \n",
124 |     "    os.environ['PYTHONHASHSEED'] = '0'\n",
125 |     "    rn.seed(seed)\n",
126 |     "    np.random.seed(seed)\n",
127 |     "    \n",
128 |     "    # This initialises the model\n",
129 |     "    model = keras.models.Sequential()\n",
130 |     "    \n",
131 |     "    # This is our input + the first hidden layer 1\n",
132 |     "    model.add(keras.layers.Dense(units = structure[1], input_dim = structure[0], activation = activation)) \n",
133 |     "    \n",
134 |     "    # Hidden layer 2, if not ignored (of size 0)\n",
135 |     "    if structure[2] > 0:\n",
136 |     "        model.add(keras.layers.Dense(units = structure[2], activation = activation))\n",
137 |     "        \n",
138 |     "    # Output layer\n",
139 |     "    model.add(keras.layers.Dense(units=structure[-1], activation = \"softmax\"))\n",
140 |     "    \n",
141 |     "    # Compiles the model with parameters\n",
142 |     "    model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])\n",
143 |     "    \n",
144 |     "    # This tells the us training has started, so we know that it's actually running\n",
145 |     "    print('training... ', end = '')\n",
146 |     "\n",
147 |     "    # This trains the network\n",
148 |     "    training_stats = model.fit(train_X, train_Y, batch_size = 1, epochs = epochs, verbose = 0, shuffle = False)\n",
149 |     "    \n",
150 |     "    # Results!\n",
151 |     "    print('train_acc: %0.3f, test_acc: %0.3f' %(training_stats.history['accuracy'][-1], \n",
152 |     "                                                model.evaluate(test_X, test_Y, verbose = 0)[1]))\n",
153 |     "    \n",
154 |     "    # This returns the results and the model for use outside the function\n",
155 |     "    return training_stats, model\n",
156 |     "\n",
157 |     "# Plots our evaluations in a line graph to see how they compare\n",
158 |     "def plot_acc(train_acc, test_acc, title):\n",
159 |     "    # Plots the training and testing accuracy lines\n",
160 |     "    training_accuracy, = graph.plot(train_acc, label = 'Training Accuracy')\n",
161 |     "    testing_accuracy, = graph.plot(test_acc, label = 'Testing Accuracy')\n",
162 |     "    graph.legend(handles = [training_accuracy, testing_accuracy])\n",
163 |     "    \n",
164 |     "    # Plots guide lines along y = 0 and y = 1 to help visualise\n",
165 |     "    xp = np.linspace(0, train_acc.shape[0] - 1, 10 * train_acc.shape[0])\n",
166 |     "    graph.plot(xp, np.full(xp.shape, 1), c = 'k', linestyle = ':', alpha = 0.5)\n",
167 |     "    graph.plot(xp, np.full(xp.shape, 0), c = 'k', linestyle = ':', alpha = 0.5)\n",
168 |     "    \n",
169 |     "    graph.xticks(range(0, train_acc.shape[0]), range(1, train_acc.shape[0] + 1))\n",
170 |     "    graph.ylim(0,1)\n",
171 |     "    graph.title(title)\n",
172 |     "    \n",
173 |     "    graph.show()\n",
174 |     "\n",
175 |     "# Plots our evaluations in a bar chart to see how they compare\n",
176 |     "def bar_acc(train_acc, test_acc, title, xticks):\n",
177 |     "    index = range(1, train_acc.shape[0] + 1)\n",
178 |     "    \n",
179 |     "    # Plots the training and testing accuracy bars\n",
180 |     "    training_accuracy = graph.bar(index, train_acc, 0.4, align = 'center')\n",
181 |     "    testing_accuracy = graph.bar(index, test_acc, 0.4, align = 'edge')\n",
182 |     "    graph.legend((training_accuracy[0], testing_accuracy[0]), ('Training Accuracy', 'Testing Accuracy'))\n",
183 |     "    \n",
184 |     "    graph.xticks(index, xticks)\n",
185 |     "    graph.title(title)\n",
186 |     "    \n",
187 |     "    graph.show()"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "Step 3\n",
195 |     "------\n",
196 |     "\n",
197 |     "Let's first look at how different layer sizes impact performance.\n",
198 |     "\n",
199 |     "Let's look at a network with just one hidden layer. We'll see how it performs with 1 to 10 nodes.\n",
200 |     "\n",
201 |     "### In the cell below replace:\n",
202 |     "#### 1. `<addHidden1>` with `hidden1`\n",
203 |     "#### 2. `<addTrainAcc>` with `train_acc`\n",
204 |     "#### 3. `<addTestAcc>` with `test_acc`\n",
205 |     "#### and then __run the code__."
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "# Initialises empty arrays into which to append new values.\n",
215 |     "train_acc = np.empty((0))\n",
216 |     "test_acc = np.empty((0))\n",
217 |     "\n",
218 |     "for hidden1 in range (1,11):\n",
219 |     "    print('Evaluating model with %i hidden neurons... ' %hidden1, end = '')\n",
220 |     "\n",
221 |     "###\n",
222 |     "# REPLACE <addHidden1> BELOW WITH hidden1\n",
223 |     "###\n",
224 |     "    training_stats, model = train_network(structure = [3, <addHidden1>, <addHidden1>, 3], \n",
225 |     "                                          activation = 'relu', optimizer = 'RMSprop', epochs = 12)\n",
226 |     "###\n",
227 |     "    \n",
228 |     "    train_acc = np.append(train_acc, training_stats.history['accuracy'][-1])\n",
229 |     "    test_acc = np.append(test_acc, model.evaluate(test_X, test_Y, verbose = 0)[1])\n",
230 |     "\n",
231 |     "###\n",
232 |     "# REPLACE <addTrainAcc> WITH train_acc AND <addTestAcc> WITH test_acc\n",
233 |     "###\n",
234 |     "plot_acc(<addTrainAcc>, <addTestAcc>, 'hidden layer size performance comparison')\n",
235 |     "###"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "So, experimenting with different sizes of hidden layers can dramatically improve your results.\n",
243 |     "\n",
244 |     "Step 4\n",
245 |     "------\n",
246 |     "\n",
247 |     "Now we'll look at how different activation functions impact the performance.\n",
248 |     "\n",
249 |     "There's lots we will try, just remember it is common to try both `relu` and `tanh` first.\n",
250 |     "\n",
251 |     "### In the cell below replace:\n",
252 |     "#### 1. `<addActivation>` with `activation`\n",
253 |     "#### 2. `<addActivationFunctions>` with `activation_functions`\n",
254 |     "#### and then __run the code__."
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": null,
260 |    "metadata": {
261 |     "scrolled": false
262 |    },
263 |    "outputs": [],
264 |    "source": [
265 |     "train_acc = np.empty((0))\n",
266 |     "test_acc = np.empty((0))\n",
267 |     "\n",
268 |     "# Makes a list of the activation functions we wish to compare\n",
269 |     "activation_functions = ['elu', 'selu', 'relu', 'tanh', 'sigmoid', \n",
270 |     "                        'hard_sigmoid', 'softplus', 'softsign', 'linear']\n",
271 |     "\n",
272 |     "for activation in activation_functions:\n",
273 |     "    print('Evaluating model with %s hidden layer activation function... ' %activation, end = '')\n",
274 |     "\n",
275 |     "###\n",
276 |     "# REPLACE <addActivation> WITH activation\n",
277 |     "###\n",
278 |     "    training_stats, model = train_network(structure = [3, 4, 2, 3],\n",
279 |     "                                          activation = <addActivation>, optimizer = 'RMSprop', epochs = 12)\n",
280 |     "###\n",
281 |     "    \n",
282 |     "    train_acc = np.append(train_acc, training_stats.history['accuracy'][-1])\n",
283 |     "    test_acc = np.append(test_acc, model.evaluate(test_X, test_Y, verbose=0)[1])\n",
284 |     "    \n",
285 |     "###\n",
286 |     "# REPLACE THE <addActivationFunctions> BELOW WITH activation_functions\n",
287 |     "###\n",
288 |     "bar_acc(train_acc, test_acc, 'activation function performance comparison using (4,2) hidden layer', <addActivationFunctions>)\n",
289 |     "###"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "There's quite a lot of variance there. It's always good to quickly test different activation functions first.\n",
297 |     "\n",
298 |     "Next, lets try changing the shape of the hidden layers.\n",
299 |     "\n",
300 |     "#### Replace `<updateHere>`'s with `3` and run the code."
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {},
307 |    "outputs": [],
308 |    "source": [
309 |     "train_acc = np.empty((0))\n",
310 |     "test_acc = np.empty((0))\n",
311 |     "\n",
312 |     "activation_functions = ['elu', 'selu', 'relu', 'tanh', 'sigmoid',\n",
313 |     "                        'hard_sigmoid', 'softplus', 'softsign', 'linear']\n",
314 |     "\n",
315 |     "for activation in activation_functions:\n",
316 |     "    print('Evaluating model with %s hidden layer activation function... ' %activation, end='')\n",
317 |     "    \n",
318 |     "\n",
319 |     "# The value you choose for <updateHere> below will change the size of the hidden layers. Lets try changing them both to 3 for now\n",
320 |     "# (but you can have a play around with different numbers if you want)\n",
321 |     "###\n",
322 |     "# REPLACE THE <updateHere>'s BELOW WITH 3\n",
323 |     "###\n",
324 |     "    training_stats, model = train_network(structure = [3, <updateHere>, <updateHere>, 3], \n",
325 |     "                                          activation = activation, optimizer = 'RMSprop', epochs = 12)\n",
326 |     "###\n",
327 |     "    \n",
328 |     "    train_acc = np.append(train_acc, training_stats.history['accuracy'][-1])\n",
329 |     "    test_acc = np.append(test_acc, model.evaluate(test_X, test_Y, verbose=0)[1])\n",
330 |     "    \n",
331 |     "bar_acc(train_acc, test_acc, 'activation function performance comparison using (3,3) hidden layer', activation_functions)"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "markdown",
336 |    "metadata": {},
337 |    "source": [
338 |     "Step 5\n",
339 |     "-----\n",
340 |     "\n",
341 |     "The __optimisation function__ is the last major parameter of the network architecture. It changes how the network is trained - so it can have a __very large impact on training time and end performance__.\n",
342 |     "\n",
343 |     "Note: this step won't always provide the same results every time it is run. Optimizers such as SGD will give different results.\n",
344 |     "\n",
345 |     "#### Replace `<addOptimizer>` with `optimizer` and run the code."
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": null,
351 |    "metadata": {},
352 |    "outputs": [],
353 |    "source": [
354 |     "train_acc = np.empty((0))\n",
355 |     "test_acc = np.empty((0))\n",
356 |     "\n",
357 |     "# This is a list of the optimisation functions for us to compare\n",
358 |     "optimization_functions = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta',\n",
359 |     "                          'Adam', 'Adamax', 'Nadam']\n",
360 |     "\n",
361 |     "for optimizer in optimization_functions:\n",
362 |     "    print('Evaluating model with %s optimizer... ' %optimizer, end='')\n",
363 |     "    \n",
364 |     "    \n",
365 |     "# The <addOptimizer> below is where we specify the optimizer in the code    \n",
366 |     "###\n",
367 |     "# REPLACE THE <addOptimizer> BELOW WITH optimizer\n",
368 |     "###\n",
369 |     "    training_stats, model = train_network(structure = [3, 4, 2, 3],\n",
370 |     "                                          activation = 'relu', optimizer = <addOptimizer>, epochs = 12)\n",
371 |     "###\n",
372 |     "\n",
373 |     "# This is recording our data for the plot\n",
374 |     "    train_acc = np.append(train_acc, training_stats.history['accuracy'][-1])\n",
375 |     "    test_acc = np.append(test_acc, model.evaluate(test_X, test_Y, verbose=0)[1])\n",
376 |     "\n",
377 |     "# And now, the plot!    \n",
378 |     "bar_acc(train_acc, test_acc, 'optimizer performance comparison using (4,2) hidden layer', optimization_functions)"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "markdown",
383 |    "metadata": {},
384 |    "source": [
385 |     "Step 6\n",
386 |     "-------\n",
387 |     "\n",
388 |     "Let's try to combine what we've seen above and try to create a neural network that performs better than what we made in exercise 7, where we used the structure `[3,4,2,3]`, the activation function `relu`, and the optimiser `SGD` (Stochastic Gradient Descent).\n",
389 |     "\n",
390 |     "### In the cell below replace:\n",
391 |     "#### 1. `<layerSize>`'s with numbers of your choice (how many nodes the hidden layers will have)\n",
392 |     "#### 2. `<activationFunction>` with one of the following: `'relu'`, `'softsign'`, `'tanh'`, `'elu'`, `'selu'`, `'softplus'`, `'linear'`\n",
393 |     "#### 3. `<optimiser>` with one of the following: `'SGD'`, `'adam'`, `'RMSprop'`, `'Adagrad'`, `'Adadelta'`, `'Adamax'`, `'Nadam'`\n",
394 |     "#### and then __run the code__."
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "###\n",
404 |     "# REPLACE THE <layerSize>'s' BELOW WITH PARAMETERS TO TEST A NEW NEURAL NETWORK e.g. 4 and 2\n",
405 |     "###\n",
406 |     "structure = [3, <layerSize>, <layerSize>, 3]\n",
407 |     "###\n",
408 |     "\n",
409 |     "###\n",
410 |     "# REPLACE <activationFunction> WITH ONE OF THE FOLLOWING: 'relu', 'softsign', 'tanh', 'elu', 'selu', 'softplus', 'linear'\n",
411 |     "###\n",
412 |     "activation = <activationFunction>\n",
413 |     "###\n",
414 |     "\n",
415 |     "###\n",
416 |     "# REPLACE <optimiser> WITH ONE OF THE FOLLOWING: 'SGD', 'adam', 'RMSprop', 'Adagrad', 'Adadelta', 'Adamax', 'Nadam'\n",
417 |     "###\n",
418 |     "optimizer = <optimiser>\n",
419 |     "###\n",
420 |     "\n",
421 |     "training_stats, model = train_network(structure, activation, optimizer, epochs = 24)\n",
422 |     "\n",
423 |     "# We can plot our training statistics to see how it developed over time\n",
424 |     "accuracy, = graph.plot(training_stats.history['accuracy'], label = 'Accuracy')\n",
425 |     "training_loss, = graph.plot(training_stats.history['loss'], label = 'Training Loss')\n",
426 |     "graph.legend(handles = [accuracy, training_loss])\n",
427 |     "loss = np.array(training_stats.history['loss'])\n",
428 |     "xp = np.linspace(0, loss.shape[0], 10 * loss.shape[0])\n",
429 |     "graph.plot(xp, np.full(xp.shape, 1), c = 'k', linestyle = ':', alpha = 0.5)\n",
430 |     "graph.plot(xp, np.full(xp.shape, 0), c = 'k', linestyle = ':', alpha = 0.5)\n",
431 |     "graph.show()"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "markdown",
436 |    "metadata": {},
437 |    "source": [
438 |     "How does it look? Were we able to beat the other network? Try out a number of different configurations to see how they perform!"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "markdown",
443 |    "metadata": {},
444 |    "source": [
445 |     "Conclusion\n",
446 |     "-------\n",
447 |     "\n",
448 |     "We've compared how different neural network architecture parameters influence accuracy performance, and we've tried to combine them in such a way that we maximise this performance."
449 |    ]
450 |   }
451 |  ],
452 |  "metadata": {
453 |   "kernelspec": {
454 |    "display_name": "Python 3",
455 |    "language": "python",
456 |    "name": "python3"
457 |   },
458 |   "language_info": {
459 |    "codemirror_mode": {
460 |     "name": "ipython",
461 |     "version": 3
462 |    },
463 |    "file_extension": ".py",
464 |    "mimetype": "text/x-python",
465 |    "name": "python",
466 |    "nbconvert_exporter": "python",
467 |    "pygments_lexer": "ipython3",
468 |    "version": "3.7.3"
469 |   }
470 |  },
471 |  "nbformat": 4,
472 |  "nbformat_minor": 2
473 | }
474 | 


--------------------------------------------------------------------------------
/10. Convolutional Neural Networks - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {},
  5 |       "cell_type": "markdown",
  6 |       "source": "Convolutional Neural Networks\n======\n\nConvolutional neural networks (CNNs) are a class of deep neural networks, most commonly used in computer vision applications.\n\nConvolutional refers the network pre-processing data for you - traditionally this pre-processing was performed by data scientists. The neural network can learn how to do pre-processing *itself* by applying filters for things such as edge detection."
  7 |     },
  8 |     {
  9 |       "metadata": {},
 10 |       "cell_type": "markdown",
 11 |       "source": "Step 1\n-----\n\nIn this exercise we will train a CNN to recognise handwritten digits, using the MNIST digit dataset.\n\nThis is a very common exercise and data set to learn from.\n\nLet's start by loading our dataset and setting up our train, validation, and test sets.\n\n#### Run the code below to import our required libraries and set up the graphing features."
 12 |     },
 13 |     {
 14 |       "metadata": {
 15 |         "trusted": true
 16 |       },
 17 |       "cell_type": "code",
 18 |       "source": "# Run this!\nimport warnings\nwarnings.filterwarnings(\"ignore\")\nimport tensorflow as tf\nimport numpy as np\nimport keras\nfrom keras.models import Sequential\nfrom keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D\nprint('keras using %s backend'%keras.backend.backend())\nimport matplotlib.pyplot as graph\n%matplotlib inline\ngraph.rcParams['figure.figsize'] = (15,5)\ngraph.rcParams[\"font.family\"] = 'DejaVu Sans'\ngraph.rcParams[\"font.size\"] = '12'\ngraph.rcParams['image.cmap'] = 'rainbow'",
 19 |       "execution_count": null,
 20 |       "outputs": []
 21 |     },
 22 |     {
 23 |       "metadata": {},
 24 |       "cell_type": "markdown",
 25 |       "source": "### In the cell below replace:\n#### 1. `<addTrainX>` with `train_X`\n#### 2. `<addTrainY>` with `train_Y`\n#### 3. `<addValidX>` with `valid_X`\n#### 4. `<addValidY>` with `valid_Y`\n#### 5. `<addTextX>` with `test_X`\n#### 6. `<addTextY>` with `test_Y`\n#### and then __run the code__."
 26 |     },
 27 |     {
 28 |       "metadata": {
 29 |         "trusted": true
 30 |       },
 31 |       "cell_type": "code",
 32 |       "source": "# Here we import the dataset, and split it into the training, validation, and test sets.\nfrom keras.datasets import mnist\n\n# This is our training data, with 6400 samples.\n###\n# REPLACE <addTrainX> WITH train_X AND <addTrainY> WITH train_Y\n###\n<addTrainX> = mnist.load_data()[0][0][:6400].astype('float32')\n<addTrainY> = mnist.load_data()[0][1][:6400]\n###\n\n# This is our validation data, with 1600 samples.\n###\n# REPLACE <addValidX> WITH valid_X AND <addValidY> WITH valid_Y\n###\n<addValidX> = mnist.load_data()[1][0][:1600].astype('float32')\n<addValidY> = mnist.load_data()[1][1][:1600]\n###\n\n# This is our test data, with 2000 samples.\n###\n# REPLACE <addTextX> WITH test_X AND <addTextY> WITH test_Y\n###\n<addTextX> = mnist.load_data()[1][0][-2000:].astype('float32')\n<addTextY> = mnist.load_data()[1][1][-2000:]\n###\n\nprint('train_X:', train_X.shape, end = '')\nprint(', train_Y:', train_Y.shape)\nprint('valid_X:', valid_X.shape, end = '')\nprint(', valid_Y:', valid_Y.shape)\nprint('test_X:', test_X.shape, end = '')\nprint(', test_Y:', test_Y.shape)",
 33 |       "execution_count": null,
 34 |       "outputs": []
 35 |     },
 36 |     {
 37 |       "metadata": {},
 38 |       "cell_type": "markdown",
 39 |       "source": "So we have 6400 training samples, 1600 validation samples, and 2000 test samples.\n\nEach sample is an greyscale image - 28 pixels wide and 28 pixels high. Each pixel is really a number from 0 to 255 - 0 being fully black, 255 being fully white. When we graph the 28x28 numbers, we can see the image.\n\nLet's have a look at one of our samples.\n\n#### Replace `<addSample>` with `train_X[0]` (you can change 0 to any number between 0 and 6400 if you like)"
 40 |     },
 41 |     {
 42 |       "metadata": {
 43 |         "trusted": true
 44 |       },
 45 |       "cell_type": "code",
 46 |       "source": "###\n# REPLACE THE <addSample> BELOW WITH train_X[0] OR ANOTHER SAMPLE e.g. train_X[1] or train_X[2]\n###\ngraph.imshow(<addSample>, cmap = 'gray', interpolation = 'nearest')\n###\n\ngraph.show()",
 47 |       "execution_count": null,
 48 |       "outputs": []
 49 |     },
 50 |     {
 51 |       "metadata": {},
 52 |       "cell_type": "markdown",
 53 |       "source": "Step 2\n---\n\nThe neural network will use the 28x28 values of each image to predict what each image represents.\n\nAs each value is between 0 and 255, we'll scale the values down by dividing by 255 (this makes it faster for the Neural Network to train).\n\nWe need to reshape our data to get it working well with our neural network. \n\n### In the cell below replace:\n#### 1. `<addRehape>` with `reshape`\n#### 2. `<completeCalculation>` with `/255`\n#### and then __run the code__."
 54 |     },
 55 |     {
 56 |       "metadata": {
 57 |         "trusted": true
 58 |       },
 59 |       "cell_type": "code",
 60 |       "source": "# First off, let's reshape our X sets so that they fit the convolutional layers.\n\n# This gets the image dimensions - 28\ndim = train_X[0].shape[0]\n\n###\n# REPLACE THE <addRehape> BELOW WITH reshape\n###\ntrain_X = train_X.<addRehape>(train_X.shape[0], dim, dim, 1)\nvalid_X = valid_X.<addRehape>(valid_X.shape[0], dim, dim, 1)\ntest_X = test_X.<addRehape>(test_X.shape[0], dim, dim, 1)\n###\n\n# Next up - feature scaling.\n# We scale the values so they are between 0 and 1, instead of 0 and 255.\n\n###\n# REPLACE THE <completeCalculation> BELOW WITH /255\n###\ntrain_X = train_X <completeCalculation>\nvalid_X = valid_X <completeCalculation>\ntest_X = test_X <completeCalculation>\n###\n\n\n# Now we print the label for the first example\nprint(train_Y[0])",
 61 |       "execution_count": null,
 62 |       "outputs": []
 63 |     },
 64 |     {
 65 |       "metadata": {},
 66 |       "cell_type": "markdown",
 67 |       "source": "Expected output:  \n`5`\n\nThe label is a number - the number we see when we view the image.\n\nWe need represent this number as a one-hot vector, so the neural network knows it is a category.\n\nKeras can convert these labels into one-hot vectors easily with the function - `to_categorical`\n\n#### Replace `<addCategorical>` with `to_categorical`"
 68 |     },
 69 |     {
 70 |       "metadata": {
 71 |         "trusted": true
 72 |       },
 73 |       "cell_type": "code",
 74 |       "source": "###\n# REPLACE THE <addCategorical> BELOW WITH to_categorical\n###\ntrain_Y = keras.utils.<addCategorical>(train_Y, 10)\nvalid_Y = keras.utils.<addCategorical>(valid_Y, 10)\ntest_Y = keras.utils.<addCategorical>(test_Y, 10)\n###\n\n# 10 being the number of categories (numbers 0 to 9)\n\nprint(train_Y[0])",
 75 |       "execution_count": null,
 76 |       "outputs": []
 77 |     },
 78 |     {
 79 |       "metadata": {},
 80 |       "cell_type": "markdown",
 81 |       "source": "Expected output:  \n`[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]`\n\nStep 3\n-----\n\nAll ready! Time to build another neural network.\n\n#### Replace `<addSequential>` with `Sequential()` and run the code."
 82 |     },
 83 |     {
 84 |       "metadata": {
 85 |         "trusted": true
 86 |       },
 87 |       "cell_type": "code",
 88 |       "source": "# Sets a randomisation seed for replicatability.\nnp.random.seed(6)\n\n###\n# REPLACE THE <addSequential> BELOW WITH Sequential() (don't forget the () )\n###\nmodel = <addSequential>\n###",
 89 |       "execution_count": null,
 90 |       "outputs": []
 91 |     },
 92 |     {
 93 |       "metadata": {},
 94 |       "cell_type": "markdown",
 95 |       "source": "The __Convolutional__ in Convolutional Neural Networks refers the pre-processing the network can do itself.\n\n#### Replace `<addConv2d>` with `Conv2D`"
 96 |     },
 97 |     {
 98 |       "metadata": {
 99 |         "trusted": true
100 |       },
101 |       "cell_type": "code",
102 |       "source": "###\n# REPLACE THE <addConv2D> BELOW WITH Conv2D\n###\nmodel.add(<addConv2D>(28, kernel_size = (3, 3), activation = 'relu', input_shape = (dim, dim, 1)))\nmodel.add(<addConv2D>(56, (3, 3), activation = 'relu'))\n###",
103 |       "execution_count": null,
104 |       "outputs": []
105 |     },
106 |     {
107 |       "metadata": {},
108 |       "cell_type": "markdown",
109 |       "source": "Next up we'll:\n* Add pooling layers.\n* Apply dropout.\n* Flatten the data to a vector (the output of step 2 is a vector).\n\n### In the cell below replace:\n#### 1. `<addMaxPooling2D>` with `MaxPooling2D`\n#### 2. `<addDropout>` with `Dropout`\n#### 3. `<addFlatten>` with `Flatten()`\n\n#### and then __run the code__."
110 |     },
111 |     {
112 |       "metadata": {
113 |         "trusted": true
114 |       },
115 |       "cell_type": "code",
116 |       "source": "# Pooling layers help speed up training time and make features it detects more robust.\n# They act by downsampling the data - reducing the data size and complexity.\n\n###\n# REPLACE THE <addMaxPooling2D> BELOW WITH MaxPooling2D\n###\nmodel.add(<addMaxPooling2D>(pool_size = (2, 2)))\n###\n\n# Dropout is a technique to help prevent overfitting\n# It makes nodes 'dropout' - turning them off randomly.\n\n###\n# REPLACE THE <addDropout> BELOW WITH Dropout\n###\nmodel.add(<addDropout>(0.125))\n###\n\n\n###\n# REPLACE THE <addFlatten> BELOW WITH Flatten()\n###\nmodel.add(<addFlatten>)\n###",
117 |       "execution_count": null,
118 |       "outputs": []
119 |     },
120 |     {
121 |       "metadata": {},
122 |       "cell_type": "markdown",
123 |       "source": "#### Replace `<updateHere>` with 10 and run the code."
124 |     },
125 |     {
126 |       "metadata": {
127 |         "trusted": true
128 |       },
129 |       "cell_type": "code",
130 |       "source": "# Dense layers perform classification - we have extracted the features with the convolutional pre-processing\nmodel.add(Dense(128, activation='relu'))\n\n# More dropout!\nmodel.add(Dropout(0.25))\n\n# Next is our output layer\n# Softmax outputs the probability for each category\n###\n# REPLACE <updateHere> BELOW WITH 10, THE NUMBER OF CLASSES (DIGITS 0 TO 9)\n###\nmodel.add(Dense(<updateHere>, activation=tf.nn.softmax))\n###\n\n# And finally, we compile.\nmodel.compile(loss='categorical_crossentropy', optimizer='Adamax', metrics=['accuracy'])",
131 |       "execution_count": null,
132 |       "outputs": []
133 |     },
134 |     {
135 |       "metadata": {},
136 |       "cell_type": "markdown",
137 |       "source": "Step 4\n-----\n\nLet's train it!\n\n### In the cell below replace:\n#### 1. `<addTrainX>` with `train_X `\n#### 2. `<addTrainY>` with `train_Y`\n#### 3. `<addValidX>` with `valid_X`\n#### 4. `<addValidY>` with `valid_Y`\n#### 5. `<addEvaluate>` with `evaluate`\n\n#### and then __run the code__."
138 |     },
139 |     {
140 |       "metadata": {
141 |         "trusted": true
142 |       },
143 |       "cell_type": "code",
144 |       "source": "###\n# REPLACE THE <addTrainX> WITH train_X, <addTrainY> WITH train_Y, <addValidX> WITH valid_X, AND <addValidY> WITH valid_Y\n###\ntraining_stats = model.fit(<addTrainX>, <addTrainY>, batch_size = 128, epochs = 12, verbose = 1, validation_data = (<addValidX>, <addValidY>))\n###\n\n###\n# REPLACE THE <addEvaluate> BELOW WITH evaluate\n###\nevaluation = model.<addEvaluate>(test_X, test_Y, verbose=0)\n###\n\nprint('Test Set Evaluation: loss = %0.6f, accuracy = %0.2f' %(evaluation[0], 100 * evaluation[1]))\n\n# We can plot our training statistics to see how it developed over time\naccuracy, = graph.plot(training_stats.history['acc'], label = 'Accuracy')\ntraining_loss, = graph.plot(training_stats.history['loss'], label = 'Training Loss')\ngraph.legend(handles = [accuracy, training_loss])\nloss = np.array(training_stats.history['loss'])\nxp = np.linspace(0,loss.shape[0],10 * loss.shape[0])\ngraph.plot(xp, np.full(xp.shape, 1), c = 'k', linestyle = ':', alpha = 0.5)\ngraph.plot(xp, np.full(xp.shape, 0), c = 'k', linestyle = ':', alpha = 0.5)\ngraph.show()",
145 |       "execution_count": null,
146 |       "outputs": []
147 |     },
148 |     {
149 |       "metadata": {},
150 |       "cell_type": "markdown",
151 |       "source": "## Step 5\n\nLet's test it on a new sample that it hasn't seen, and see how it classifies it!\n\n#### Replace `<addNumber>` with any number between 0 and 1999, then run the code."
152 |     },
153 |     {
154 |       "metadata": {
155 |         "trusted": true
156 |       },
157 |       "cell_type": "code",
158 |       "source": "###\n# REPLACE THE <addNumber> WITH ANY NUMBER BETWEEN 0 AND 1999\n###\nsample = test_X[<addNumber>].reshape(dim, dim)\n###\n\ngraph.imshow(sample, cmap = 'gray', interpolation = 'nearest')\ngraph.show()\n\nprediction = model.predict(sample.reshape(1, dim, dim, 1))\nprint('prediction: %i' %(np.argmax(prediction)))",
159 |       "execution_count": null,
160 |       "outputs": []
161 |     },
162 |     {
163 |       "metadata": {},
164 |       "cell_type": "markdown",
165 |       "source": "How is the prediction? Does it look right?"
166 |     },
167 |     {
168 |       "metadata": {},
169 |       "cell_type": "markdown",
170 |       "source": "Conclusion\n------\n\nCongratulations! We've built a convolutional neural network that is able to recognise handwritten digits with very high accuracy.\n\nCNN's are very complex - you're not expected to understand everything (or most things) we covered here. They take a lot of time and practise to properly understand each aspect of them.\n\nHere we used:  \n* __Feature scaling__ - reducing the range of the values. This helps improve training time.\n* __Convolutional layers__ - network layers that pre-process the data for us. These apply filters to extract features for the neural network to analyze.\n* __Pooling layers__ - part of the Convolutional layers. They apply filters downsample the data - extracting features.\n* __Dropout__ - a regularization technique to help prevent overfitting.\n* __Dense layers__ - neural network layers which perform classification on the features extracted by the convolutional layers and downsampled by the pooling layers.\n* __Softmax__ - an activation function which outputs the probability for each category."
171 |     }
172 |   ],
173 |   "metadata": {
174 |     "kernelspec": {
175 |       "name": "python36",
176 |       "display_name": "Python 3.6",
177 |       "language": "python"
178 |     },
179 |     "language_info": {
180 |       "mimetype": "text/x-python",
181 |       "nbconvert_exporter": "python",
182 |       "name": "python",
183 |       "pygments_lexer": "ipython3",
184 |       "version": "3.6.6",
185 |       "file_extension": ".py",
186 |       "codemirror_mode": {
187 |         "version": 3,
188 |         "name": "ipython"
189 |       }
190 |     }
191 |   },
192 |   "nbformat": 4,
193 |   "nbformat_minor": 2
194 | }


--------------------------------------------------------------------------------
/11. Recurrent Neural Networks - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {
  5 |         "collapsed": true
  6 |       },
  7 |       "cell_type": "markdown",
  8 |       "source": "Exercise 11 - Recurrent Neural Networks\n========\n\nA recurrent neural network (RNN) is a class of neural network that excels when your data can be treated as a sequence - such as text, music, speech recognition, connected handwriting, or data over a time period. \n\nRNN's can analyse or predict a word based on the previous words in a sentence - they allow a connection between previous information and current information.\n\nThis exercise looks at implementing a LSTM RNN to generate new characters after learning from a large sample of text. LSTMs are a special type of RNN which dramatically improves the model’s ability to connect previous data to current data where there is a long gap.\n\nWe will train an RNN model using a novel written by H. G. Wells - The Time Machine."
  9 |     },
 10 |     {
 11 |       "metadata": {},
 12 |       "cell_type": "markdown",
 13 |       "source": "Step 1\n------\n\nLet's start by loading our libraries and text file. This might take a few minutes.\n\n#### Run the cell below to import the necessary libraries."
 14 |     },
 15 |     {
 16 |       "metadata": {
 17 |         "trusted": false
 18 |       },
 19 |       "cell_type": "code",
 20 |       "source": "%%capture\n# Run this!\nfrom keras.models import load_model\nfrom keras.models import Sequential\nfrom keras.layers import Dense, Activation, LSTM\nfrom keras.callbacks import LambdaCallback, ModelCheckpoint\nimport numpy as np\nimport random, sys, io, string",
 21 |       "execution_count": null,
 22 |       "outputs": []
 23 |     },
 24 |     {
 25 |       "metadata": {},
 26 |       "cell_type": "markdown",
 27 |       "source": "#### Replace the `<addFileName>` with `The Time Machine`"
 28 |     },
 29 |     {
 30 |       "metadata": {
 31 |         "trusted": false
 32 |       },
 33 |       "cell_type": "code",
 34 |       "source": "###\n# REPLACE THE <addFileName> BELOW WITH The Time Machine\n###\ntext = io.open('Data/<addFileName>.txt', encoding = 'UTF-8').read()\n###\n\n# Let's have a look at some of the text\nprint(text[0:198])\n\n# This cuts out punctuation and make all the characters lower case\ntext = text.lower().translate(str.maketrans(\"\", \"\", string.punctuation))\n\n# Character index dictionary\ncharset = sorted(list(set(text)))\nindex_from_char = dict((c, i) for i, c in enumerate(charset))\nchar_from_index = dict((i, c) for i, c in enumerate(charset))\n\nprint('text length: %s characters' %len(text))\nprint('unique characters: %s' %len(charset))",
 35 |       "execution_count": null,
 36 |       "outputs": []
 37 |     },
 38 |     {
 39 |       "metadata": {},
 40 |       "cell_type": "markdown",
 41 |       "source": "Expected output:  \n```The Time Traveller (for so it will be convenient to speak of him) was expounding a recondite matter to us. His pale grey eyes shone and twinkled, and his usually pale face was flushed and animated.\ntext length: 174201 characters\nunique characters: 39```\n\nStep 2\n-----\n\nNext we'll divide the text into sequences of 40 characters.\n\nThen for each sequence we'll make a training set - the following character will be the correct output for the test set.\n\n### In the cell below replace:\n#### 1. `<sequenceLength>` with `40`\n#### 2. `<step>` with `4`\n#### and then __run the code__. "
 42 |     },
 43 |     {
 44 |       "metadata": {
 45 |         "trusted": false
 46 |       },
 47 |       "cell_type": "code",
 48 |       "source": "###\n# REPLACE <sequenceLength> WITH 40 AND <step> WITH 4\n###\nsequence_length = <sequenceLength>\nstep = <step>\n###\n\nsequences = []\ntarget_chars = []\nfor i in range(0, len(text) - sequence_length, step):\n    sequences.append([text[i: i + sequence_length]])\n    target_chars.append(text[i + sequence_length])\nprint('number of training sequences:', len(sequences))",
 49 |       "execution_count": null,
 50 |       "outputs": []
 51 |     },
 52 |     {
 53 |       "metadata": {},
 54 |       "cell_type": "markdown",
 55 |       "source": "Expected output:\n`number of training sequences: 43541`\n\n#### Replace `<addSequences>` with `sequences` and run the code."
 56 |     },
 57 |     {
 58 |       "metadata": {
 59 |         "trusted": false
 60 |       },
 61 |       "cell_type": "code",
 62 |       "source": "# One-hot vectorise\n\nX = np.zeros((len(sequences), sequence_length, len(charset)), dtype=np.bool)\ny = np.zeros((len(sequences), len(charset)), dtype=np.bool)\n\n###\n# REPLACE THE <addSequences> BELOW WITH sequences\n###\nfor n, sequence in enumerate(<addSequences>):\n###\n    for m, character in enumerate(list(sequence[0])):\n        X[n, m, index_from_char[character]] = 1\n    y[n, index_from_char[target_chars[n]]] = 1",
 63 |       "execution_count": null,
 64 |       "outputs": []
 65 |     },
 66 |     {
 67 |       "metadata": {},
 68 |       "cell_type": "markdown",
 69 |       "source": "Step 3\n------\n\nLet's build our model, using a single LSTM layer of 128 units. We'll keep the model simple for now, so that training does not take too long.\n\n### In the cell below replace:\n#### 1. `<addLSTM>` with `LSTM`\n#### 2. `<addLayerSize>` with `128`\n#### 3. `<addSoftmaxFunction>` with `'softmax`\n#### and then __run the code__."
 70 |     },
 71 |     {
 72 |       "metadata": {
 73 |         "trusted": false
 74 |       },
 75 |       "cell_type": "code",
 76 |       "source": "model = Sequential()\n\n###\n# REPLACE THE <addLSTM> BELOW WITH LSTM (use uppercase) AND <addLayerSize> WITH 128\n###\nmodel.add(<addLSTM>(<addLayerSize>, input_shape = (X.shape[1], X.shape[2])))\n###\n\n###\n# REPLACE THE <addSoftmaxFunction> with 'softmax' (INCLUDING THE QUOTES)\n###\nmodel.add(Dense(y.shape[1], activation = <addSoftMaxFunction>))\n###\n\nmodel.compile(loss = 'categorical_crossentropy', optimizer = 'Adam')",
 77 |       "execution_count": null,
 78 |       "outputs": []
 79 |     },
 80 |     {
 81 |       "metadata": {},
 82 |       "cell_type": "markdown",
 83 |       "source": "The code below generates text at the end of an epoch (one training cycle). This allows us to see how the model is performing as it trains. If you're making a large neural network with a long training time it's useful to check in on the model as see if the text generating is legible as it trains, as overtraining may occur and the output of the model turn to nonsense.\n\nThe code below will also save a model if it is the best performing model, so we can use it later.\n\n#### Run the code below, but don't change it"
 84 |     },
 85 |     {
 86 |       "metadata": {
 87 |         "trusted": false
 88 |       },
 89 |       "cell_type": "code",
 90 |       "source": "# Run this, but do not edit.\n# It helps generate the text and save the model epochs.\n\n# Generate new text\ndef on_epoch_end(epoch, _):\n    diversity = 0.5\n    print('\\n### Generating text with diversity %0.2f' %(diversity))\n\n    start = random.randint(0, len(text) - sequence_length - 1)\n    seed = text[start: start + sequence_length]\n    print('### Generating with seed: \"%s\"' %seed[:40])\n\n    output = seed[:40].lower().translate(str.maketrans(\"\", \"\", string.punctuation))\n    print(output, end = '')\n\n    for i in range(500):\n        x_pred = np.zeros((1, sequence_length, len(charset)))\n        for t, char in enumerate(output):\n            x_pred[0, t, index_from_char[char]] = 1.\n\n        predictions = model.predict(x_pred, verbose=0)[0]\n        exp_preds = np.exp(np.log(np.asarray(predictions).astype('float64')) / diversity)\n        next_index = np.argmax(np.random.multinomial(1, exp_preds / np.sum(exp_preds), 1))\n        next_char = char_from_index[next_index]\n\n        output = output[1:] + next_char\n\n        print(next_char, end = '')\n    print()\nprint_callback = LambdaCallback(on_epoch_end=on_epoch_end)\n\n# Save the model\ncheckpoint = ModelCheckpoint('Models/model-epoch-{epoch:02d}.hdf5', \n                             monitor = 'loss', verbose = 1, save_best_only = True, mode = 'min')",
 91 |       "execution_count": null,
 92 |       "outputs": []
 93 |     },
 94 |     {
 95 |       "metadata": {},
 96 |       "cell_type": "markdown",
 97 |       "source": "The code below will start the model to train. This may take a long time. Feel free to stop the training with the `square stop button` to the right of the `Run button` in the toolbar.\n\nLater in the exercise, we will load a pretrained model.\n\n### In the cell below replace:\n#### 1. `<addPrintCallback>` with `print_callback`\n#### 2. `<addCheckpoint>` with `checkpoint`\n#### and then __run the code__."
 98 |     },
 99 |     {
100 |       "metadata": {
101 |         "trusted": false
102 |       },
103 |       "cell_type": "code",
104 |       "source": "###\n# REPLACE <addPrintCallback> WITH print_callback AND <addCheckpoint> WITH checkpoint\n###\nmodel.fit(X, y, batch_size = 128, epochs = 3, callbacks = [<addPrintCallback>, <addCheckpoint>])\n###",
105 |       "execution_count": null,
106 |       "outputs": []
107 |     },
108 |     {
109 |       "metadata": {},
110 |       "cell_type": "markdown",
111 |       "source": "The output won't appear to be very good. But then, this dataset is small, and we have trained it only for a short time using a rather small RNN. How might it look if we upscaled things?\n\nStep 5\n------\n\nWe could improve our model by:\n* Having a larger training set.\n* Increasing the number of LSTM units.\n* Training it for longer\n* Experimenting with difference activation functions, optimization functions etc\n\nTraining this would still take far too long on most computers to see good results - so we've trained a model already for you.\n\nThis model uses a different dataset - a few of the King Arthur tales pasted together. The model used:\n* sequences of 50 characters\n* Two LSTM layers (512 units each)\n* A dropout of 0.5 after each LSTM layer\n* Only 30 epochs (we'd recomend 100-200)\n\nLet's try importing this model that has already been trained.\n\n#### Replace `<addLoadModel>` with `load_model` and run the code."
112 |     },
113 |     {
114 |       "metadata": {
115 |         "trusted": false
116 |       },
117 |       "cell_type": "code",
118 |       "source": "from keras.models import load_model\nprint(\"loading model... \", end = '')\n\n###\n# REPLACE <addLoadModel> BELOW WITH load_model\n###\nmodel = <addLoadModel>('Models/arthur-model-epoch-30.hdf5')\n###\nmodel.compile(loss = 'categorical_crossentropy', optimizer = 'Adam')\n###\n\nprint(\"model loaded\")",
119 |       "execution_count": null,
120 |       "outputs": []
121 |     },
122 |     {
123 |       "metadata": {},
124 |       "cell_type": "markdown",
125 |       "source": "Step 6\n-------\n\nNow let's use this model to generate some new text!\n\n#### Replace `<addFilePath>` with `'Data/Arthur tales.txt'`"
126 |     },
127 |     {
128 |       "metadata": {
129 |         "trusted": false
130 |       },
131 |       "cell_type": "code",
132 |       "source": "###\n# REPLACE <addFilePath> BELOW WITH 'Data/Arthur tales.txt' (INCLUDING THE QUOTATION MARKS)\n###\ntext = io.open(<addFilePath>, encoding='UTF-8').read()\n###\n\n# Cut out punctuation and make lower case\ntext = text.lower().translate(str.maketrans(\"\", \"\", string.punctuation))\n\n# Character index dictionary\ncharset = sorted(list(set(text)))\nindex_from_char = dict((c, i) for i, c in enumerate(charset))\nchar_from_index = dict((i, c) for i, c in enumerate(charset))\n\nprint('text length: %s characters' %len(text))\nprint('unique characters: %s' %len(charset))",
133 |       "execution_count": null,
134 |       "outputs": []
135 |     },
136 |     {
137 |       "metadata": {},
138 |       "cell_type": "markdown",
139 |       "source": "### In the cell below replace:\n#### 1. `<sequenceLength>` with `50`\n#### 2. `<writeSentence>` with a sentence of your own, at least 50 characters long.\n#### 3. `<numCharsToGenerate>` with the number of characters you want to generate (choose a large number, like 1500)\n#### and then __run the code__."
140 |     },
141 |     {
142 |       "metadata": {
143 |         "trusted": false
144 |       },
145 |       "cell_type": "code",
146 |       "source": "# Generate text\n\ndiversity = 0.5\nprint('\\n### Generating text with diversity %0.2f' %(diversity))\n\n###\n# REPLACE <sequenceLength> BELOW WITH 50\n###\nsequence_length = <sequenceLength>\n###\n\n# Next we'll make a starting point for our text generator\n\n###\n# REPLACE <writeSentence> WITH A SENTENCE OF AT LEAST 50 CHARACTERS\n###\nseed = \"<writeSentence>\"\n###\n\nseed = seed.lower().translate(str.maketrans(\"\", \"\", string.punctuation))\n\n###\n# OR, ALTERNATIVELY, UNCOMMENT THE FOLLOWING TWO LINES AND GRAB A RANDOM STRING FROM THE TEXT FILE\n###\n\n#start = random.randint(0, len(text) - sequence_length - 1)\n#seed = text[start: start + sequence_length]\n\n###\n\nprint('### Generating with seed: \"%s\"' %seed[:40])\n\noutput = seed[:sequence_length].lower().translate(str.maketrans(\"\", \"\", string.punctuation))\nprint(output, end = '')\n\n###\n# REPLACE THE <numCharsToGenerate> BELOW WITH THE NUMBER OF CHARACTERS WE WISH TO GENERATE, e.g. 1500\n###\nfor i in range(<numCharsToGenerate>):\n###\n    x_pred = np.zeros((1, sequence_length, len(charset)))\n    for t, char in enumerate(output):\n        x_pred[0, t, index_from_char[char]] = 1.\n\n    predictions = model.predict(x_pred, verbose=0)[0]\n    exp_preds = np.exp(np.log(np.asarray(predictions).astype('float64')) / diversity)\n    next_index = np.argmax(np.random.multinomial(1, exp_preds / np.sum(exp_preds), 1))\n    next_char = char_from_index[next_index]\n\n    output = output[1:] + next_char\n\n    print(next_char, end = '')\nprint()",
147 |       "execution_count": null,
148 |       "outputs": []
149 |     },
150 |     {
151 |       "metadata": {},
152 |       "cell_type": "markdown",
153 |       "source": "How does it look? Does it seem intelligible?\n\nConclusion\n--------\n\nWe have trained an RNN that learns to predict characters based on a text sequence. We have trained a lightweight model from scratch, as well as imported a pre-trained model and generated new text from that."
154 |     }
155 |   ],
156 |   "metadata": {
157 |     "kernelspec": {
158 |       "name": "python36",
159 |       "display_name": "Python 3.6",
160 |       "language": "python"
161 |     },
162 |     "language_info": {
163 |       "mimetype": "text/x-python",
164 |       "nbconvert_exporter": "python",
165 |       "name": "python",
166 |       "pygments_lexer": "ipython3",
167 |       "version": "3.6.6",
168 |       "file_extension": ".py",
169 |       "codemirror_mode": {
170 |         "version": 3,
171 |         "name": "ipython"
172 |       }
173 |     }
174 |   },
175 |   "nbformat": 4,
176 |   "nbformat_minor": 2
177 | }


--------------------------------------------------------------------------------
/12. Clustering - Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "metadata": {},
  5 |       "cell_type": "markdown",
  6 |       "source": "Clustering\n======\n\nWhen a data set doesn’t have labels we can use unsupervised learning to find some kind of structure in the data - allowing us to discover patterns or groupings.\n\nCluster analysis is a method of finding groupings, known as clusters, in datasets. As the data sets are unlabelled, cluster analysis tries to group similar examples using the examples features.\n\nK-means clustering lives true to its name - it separates examples into k number of clusters (so if k is 5, it will divide the examples into 5 clusters) and it partitions the examples by the average (mean) of the clusters."
  7 |     },
  8 |     {
  9 |       "metadata": {},
 10 |       "cell_type": "markdown",
 11 |       "source": "Step 1\n-----\n\nIn this exercise we will look at using k-means clustering to categorise a few different datasets.\n\nLet's start by first creating three clusters.\n\n#### Run the code below to set up the graphing features."
 12 |     },
 13 |     {
 14 |       "metadata": {
 15 |         "trusted": true
 16 |       },
 17 |       "cell_type": "code",
 18 |       "source": "# This sets up the graphs\nimport warnings\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as graph\n%matplotlib inline\ngraph.rcParams['figure.figsize'] = (15,5)\ngraph.rcParams[\"font.family\"] = 'DejaVu Sans'\ngraph.rcParams[\"font.size\"] = '12'\ngraph.rcParams['image.cmap'] = 'rainbow'",
 19 |       "execution_count": null,
 20 |       "outputs": []
 21 |     },
 22 |     {
 23 |       "metadata": {},
 24 |       "cell_type": "markdown",
 25 |       "source": "### In the cell below replace:\n#### 1. `<addClusterData>` with `cluster_data`\n#### 2. `<addOutput>` with `output`\n#### and then __run the code__."
 26 |     },
 27 |     {
 28 |       "metadata": {
 29 |         "trusted": true
 30 |       },
 31 |       "cell_type": "code",
 32 |       "source": "# Let's make some data!\nimport numpy as np\nfrom sklearn import datasets\n\n###\n# REPLACE <addClusterData> WITH cluster_data AND <addOutput> WITH output\n###\n<addClusterData>, <addOutput> = datasets.make_classification(n_samples = 500, n_features = 2, n_informative = 2, n_redundant = 0, n_repeated = 0,\n                                                    n_classes = 3, n_clusters_per_class = 1, class_sep = 1.25, random_state = 6)\n###\n\n# Let's visualise it\ngraph.scatter(cluster_data.T[0], cluster_data.T[1])\ngraph.show()",
 33 |       "execution_count": null,
 34 |       "outputs": []
 35 |     },
 36 |     {
 37 |       "metadata": {},
 38 |       "cell_type": "markdown",
 39 |       "source": "Now let's see how k-means performs on a dataset like this!"
 40 |     },
 41 |     {
 42 |       "metadata": {},
 43 |       "cell_type": "markdown",
 44 |       "source": "### In the cell below replace:\n#### 1. `<addKMeans>` with `KMeans`\n#### 2. `<addFit>` with `fit`\n#### 3. `<addClusterCenters>` with `k_means.cluster_centers_`\n#### 4. `<addLabels>` with `k_means.labels_`\n#### and then __run the code__."
 45 |     },
 46 |     {
 47 |       "metadata": {
 48 |         "trusted": true
 49 |       },
 50 |       "cell_type": "code",
 51 |       "source": "from sklearn.cluster import KMeans\n\n###\n# REPLACE <addKMeans> WITH KMeans\n###\nk_means = <addKMeans>(n_clusters=3)\n###\n\n###\n# REPLACE <addFit> WITH fit\n###\nk_means.<addFit>(cluster_data)\n###\n\n# Let's visualise it\n###\n# REPLACE <addClusterCenters> BELOW WITH k_means.cluster_centers_\n###\nfor mean in <addClusterCenters>:\n    graph.plot(mean[0], mean[1], 'ko', marker = '+', markersize = 20)\n###\n\n###\n# REPLACE <addLabels> BELOW WITH k_means.labels_\n###\ngraph.scatter(cluster_data.T[0], cluster_data.T[1], c = <addLabels>)\n###\n\ngraph.show()",
 52 |       "execution_count": null,
 53 |       "outputs": []
 54 |     },
 55 |     {
 56 |       "metadata": {},
 57 |       "cell_type": "markdown",
 58 |       "source": "It performs rather well, by the looks of it! But we already knew that it had three clusters, sometimes it might not be so clear. "
 59 |     },
 60 |     {
 61 |       "metadata": {},
 62 |       "cell_type": "markdown",
 63 |       "source": "## Step 2\n\nLet's generate another dataset in which it may be a little less obvious how many classes it contains.\n\n#### Replace `<addMakeClassification>` with `datasets.make_classification` and run the code."
 64 |     },
 65 |     {
 66 |       "metadata": {
 67 |         "trusted": true
 68 |       },
 69 |       "cell_type": "code",
 70 |       "source": "###\n# REPLACE <addMakeClassification> BELOW WITH datasets.make_classification\n###\ncluster_data, output = <addMakeClassification>(n_samples = 500, n_features = 2, n_informative = 2, n_redundant = 0, n_repeated = 0, \n                                            n_classes = 4, n_clusters_per_class = 1, class_sep = 1.25, random_state = 6)\n###\n\ngraph.scatter(cluster_data.T[0], cluster_data.T[1])\ngraph.show()",
 71 |       "execution_count": null,
 72 |       "outputs": []
 73 |     },
 74 |     {
 75 |       "metadata": {},
 76 |       "cell_type": "markdown",
 77 |       "source": "In instances where we do not know how many classes to expect, it is handy to run k-means multiple times and compare how the data looks when divided up into a differing number of classes. Let's try that now.\n\n#### Replace `<addNHere>` with `n` and run the code"
 78 |     },
 79 |     {
 80 |       "metadata": {
 81 |         "trusted": true
 82 |       },
 83 |       "cell_type": "code",
 84 |       "source": "###\n# REPLACE <addNHere> BELOW WITH n\n###\nfor <addNHere> in range(2,6):\n    k_means = KMeans(n_clusters = <addNHere>).fit(cluster_data)\n###\n\n    for mean in k_means.cluster_centers_:\n        graph.plot(mean[0], mean[1], 'ko', marker = '+', markersize = 20)\n    graph.scatter(cluster_data.T[0], cluster_data.T[1], c = k_means.labels_)\n    graph.show()",
 85 |       "execution_count": null,
 86 |       "outputs": []
 87 |     },
 88 |     {
 89 |       "metadata": {},
 90 |       "cell_type": "markdown",
 91 |       "source": "Which one do you think best splits the data?"
 92 |     },
 93 |     {
 94 |       "metadata": {},
 95 |       "cell_type": "markdown",
 96 |       "source": "Step 3\n========\n\nK-means clustering performs well enough on clustered data like that, but let's try it out on a dataset that is not so linear.\n\n#### Replace `<addMakeCircles>` with `make_circles` and run the code."
 97 |     },
 98 |     {
 99 |       "metadata": {
100 |         "trusted": true
101 |       },
102 |       "cell_type": "code",
103 |       "source": "###\n# REPLACE <addMakeCircles> BELOW WITH make_circles\n###\nring_data, target = datasets.<addMakeCircles>(n_samples = 500, factor = .5, noise = 0.05, random_state = 6)\n###\n\ngraph.scatter(ring_data.T[0], ring_data.T[1], c = target)\ngraph.show()",
104 |       "execution_count": null,
105 |       "outputs": []
106 |     },
107 |     {
108 |       "metadata": {},
109 |       "cell_type": "markdown",
110 |       "source": "We can clearly distinguish two \"clusters\", that is, the two rings of datapoints.\n\nLet's see how k-means handles a dataset like this.\n\n#### Replace `<addRingData>` with `ring_data` and run the code"
111 |     },
112 |     {
113 |       "metadata": {
114 |         "trusted": true
115 |       },
116 |       "cell_type": "code",
117 |       "source": "###\n# REPLACE <addRingData> BELOW WITH ring_data\n###\nk_means = KMeans(n_clusters = 2).fit(<addRingData>)\n###\n\nfor mean in k_means.cluster_centers_:\n    graph.plot(mean[0], mean[1], 'ko', marker = '+', markersize = 20)\ngraph.scatter(ring_data.T[0], ring_data.T[1], c = k_means.labels_)\ngraph.show()",
118 |       "execution_count": null,
119 |       "outputs": []
120 |     },
121 |     {
122 |       "metadata": {},
123 |       "cell_type": "markdown",
124 |       "source": "K-means clearly has difficulty solving this.\n\nAs we are using it, there is no way for k-means to place two means to label this data set correctly."
125 |     },
126 |     {
127 |       "metadata": {},
128 |       "cell_type": "markdown",
129 |       "source": "Step 4\n------\n\nBut, we can try another way. We can use another feature - distance away from the centre.\n\nLet's see if k-means is able to classify the two data clusters with this new feature.\n\n#### Replace `<addSqrt>` with `np.sqrt` and run the code."
130 |     },
131 |     {
132 |       "metadata": {
133 |         "trusted": true
134 |       },
135 |       "cell_type": "code",
136 |       "source": "distance_from_center = []\nfor sample in ring_data:\n###\n# REPLACE <addSqrt> BELOW WITH np.sqrt\n###\n    z = 4 * <addSqrt>(sample[0]**2 + sample[1]**2)\n###\n    distance_from_center.append(z)\n# Make it a three-dimensional dataset\nring_data = np.concatenate((ring_data, np.array(distance_from_center).reshape(-1, 1)), axis = 1)\n\ngraph.scatter(ring_data.T[0], ring_data.T[1], c = ring_data.T[2])\ngraph.show()",
137 |       "execution_count": null,
138 |       "outputs": []
139 |     },
140 |     {
141 |       "metadata": {},
142 |       "cell_type": "markdown",
143 |       "source": "Looks like it will work, so let's plot all three features.\n\n### In the cell below replace:\n#### 1. `<addProjection>` with `projection='3d'`\n#### 2. `<addRingDataT>` with `ring_data.T[2]`\n#### and then __run the code__. "
144 |     },
145 |     {
146 |       "metadata": {
147 |         "trusted": true
148 |       },
149 |       "cell_type": "code",
150 |       "source": "from mpl_toolkits.mplot3d import Axes3D\n\nfig = graph.figure()\n###\n# REPLACE <addProjection> BELOW WITH projection='3d'\n###\nax = fig.add_subplot(111, <addProjection>)\n###\n\n###\n# REPLACE <addRingDataT> BELOW WITH ring_data.T[2]\n###\nax.scatter(ring_data.T[0], ring_data.T[1], <addRingDataT>, c = target)\n###\n\nax.view_init(30, 45)\ngraph.show()",
151 |       "execution_count": null,
152 |       "outputs": []
153 |     },
154 |     {
155 |       "metadata": {},
156 |       "cell_type": "markdown",
157 |       "source": "Let's see how k-means deals with the data now that it has 3 features!\n\n### In the cell below replace:\n#### 1. `<addRingData>` with `ring_data`\n#### 2. `<addLabels>` with `k_means.labels_`\n#### and then __run the code__."
158 |     },
159 |     {
160 |       "metadata": {
161 |         "trusted": true
162 |       },
163 |       "cell_type": "code",
164 |       "source": "###\n# REPLACE <addRingData> BELOW WITH ring_data\n###\nk_means = KMeans(n_clusters = 2, random_state = 0).fit(<addRingData>)\n###\n\nfig = graph.figure()\nax = fig.add_subplot(111, projection='3d')\nfor mean in k_means.cluster_centers_:\n    ax.scatter(mean[0], mean[1], mean[2], c='black', marker='+', s=50) # plot the cluster centres\n    \n###\n# REPLACE <addLabels> BELOW WITH k_means.labels_\n###\nax.scatter(ring_data.T[0], ring_data.T[1], ring_data.T[2], c = <addLabels>)\n###\n\n# We can plot a hyperplane that separates the two rings\nhp_X, hp_Y = np.array(np.meshgrid(np.linspace(-1, 1, 11), np.linspace(-1, 1, 11)))\nhp_Z = np.full(hp_X.shape, np.abs(k_means.cluster_centers_[0][2] - k_means.cluster_centers_[1][2] / 2))\nax.plot_wireframe(hp_X, hp_Y, hp_Z, rstride = 1, cstride = 1, \n                  color = 'k', linewidth = 1, linestyle = 'solid', alpha = 0.5)\n\nax.view_init(20, 45)\nax.set_zlabel('new axis')\ngraph.show()",
165 |       "execution_count": null,
166 |       "outputs": []
167 |     },
168 |     {
169 |       "metadata": {},
170 |       "cell_type": "markdown",
171 |       "source": "You can see the `+` that indicates the center of the clusters. Looks good!\n\nStep 5\n------\n\nSome data we cannot manipulate like that. Let's have a look at a different type of data distribution.\n\n#### Replace `<addMakeMoons>` with `datasets.make_moons` and run the code."
172 |     },
173 |     {
174 |       "metadata": {
175 |         "trusted": true
176 |       },
177 |       "cell_type": "code",
178 |       "source": "###\n# REPLACE <addMakeMoons> BELOW WITH datasets.make_moons\n###\ncrescent_data, output = <addMakeMoons>(n_samples = 500, noise = .05)\n###\n\ngraph.scatter(crescent_data.T[0], crescent_data.T[1], c = target)\ngraph.show()",
179 |       "execution_count": null,
180 |       "outputs": []
181 |     },
182 |     {
183 |       "metadata": {},
184 |       "cell_type": "markdown",
185 |       "source": "Let's try fitting it.\n\n#### Replace `<addCrescentData>` with `crescent_data` and run the code."
186 |     },
187 |     {
188 |       "metadata": {
189 |         "trusted": true
190 |       },
191 |       "cell_type": "code",
192 |       "source": "# Below we run KMeans on crescent_data using n_clusters = 2\n###\n# REPLACE <addCrescentData> WITH crescent_data\n###\nk_means = KMeans(n_clusters = 2).fit(<addCrescentData>)\n###\n\nfor mean in k_means.cluster_centers_:\n    graph.plot(mean[0], mean[1], 'ko', marker = '+', markersize = 20)\ngraph.scatter(crescent_data.T[0], crescent_data.T[1], c = k_means.labels_)\ngraph.show()",
193 |       "execution_count": null,
194 |       "outputs": []
195 |     },
196 |     {
197 |       "metadata": {},
198 |       "cell_type": "markdown",
199 |       "source": "Again, a similar issue as with the circle data.\n\nBut k-means is just one method for clustering, other methods don't have quite the same restrictions as k-means.\n\nStep 6\n------\n\nSpectral clustering is a clustering method that aims to cluster data that is in some way connected - but not necessarily distributed.\n\n### In the cell below replace:\n#### 1. `<addSpectralClustering>` with `SpectralClustering`\n#### 2. `<addCrescentData>` with `crescent_data`\n#### 3. `<addLabels>` with `labels_`\n#### and then __run the code__."
200 |     },
201 |     {
202 |       "metadata": {
203 |         "trusted": true
204 |       },
205 |       "cell_type": "code",
206 |       "source": "from sklearn import cluster\n\n###\n# REPLACE <addSpectralClustering> BELOW WITH SpectralClustering\n###\nspectral = cluster.<addSpectralClustering>(n_clusters = 2, eigen_solver = 'arpack', affinity = 'nearest_neighbors')\n###\n\n###\n# REPLACE <addCrescentData> BELOW WITH crescent_data\n###\nlabels_ = spectral.fit_predict(<addCrescentData>)\n###\n\n### \n# REPLACE <addLabels> BELOW WITH labels_\n###\ngraph.scatter(crescent_data.T[0], crescent_data.T[1], c = <addLabels>)\n###\ngraph.show()",
207 |       "execution_count": null,
208 |       "outputs": []
209 |     },
210 |     {
211 |       "metadata": {},
212 |       "cell_type": "markdown",
213 |       "source": "### In the cell below replace:\n#### 1. `<addSpectralClustering>` with `SpectralClustering`\n#### 2. `<addRingData>` with `ring_data`\n#### 3. `<addLabels>` with `labels_`\n#### and then __run the code__."
214 |     },
215 |     {
216 |       "metadata": {
217 |         "trusted": true
218 |       },
219 |       "cell_type": "code",
220 |       "source": "# Let's use spectral clustering on the ring_data\n\n###\n# REPLACE <addSpectralClustering> BELOW WITH SpectralClustering\n###\nspectral = cluster.<addSpectralClustering>(n_clusters = 2, eigen_solver = 'arpack', affinity = 'nearest_neighbors')\n###\n\n###\n# REPLACE <addRingData> BELOW WITH ring_data\n###\nlabels_ = spectral.fit_predict(<addRingData>)\n###\n\n###\n# REPLACE <addLabels> BELOW WITH labels_\n###\ngraph.scatter(ring_data.T[0], ring_data.T[1], c = <addLabels>)\n###\ngraph.show()",
221 |       "execution_count": null,
222 |       "outputs": []
223 |     },
224 |     {
225 |       "metadata": {},
226 |       "cell_type": "markdown",
227 |       "source": "Does it classify the data in the correct clusters?"
228 |     },
229 |     {
230 |       "metadata": {},
231 |       "cell_type": "markdown",
232 |       "source": "## Conclusion\n\nWe have learnt two important clustering methods, k-means and spectral clustering, and used them on a variety of datasets where one might be more appropriate to use than the other."
233 |     }
234 |   ],
235 |   "metadata": {
236 |     "kernelspec": {
237 |       "name": "python36",
238 |       "display_name": "Python 3.6",
239 |       "language": "python"
240 |     },
241 |     "language_info": {
242 |       "mimetype": "text/x-python",
243 |       "nbconvert_exporter": "python",
244 |       "name": "python",
245 |       "pygments_lexer": "ipython3",
246 |       "version": "3.6.6",
247 |       "file_extension": ".py",
248 |       "codemirror_mode": {
249 |         "version": 3,
250 |         "name": "ipython"
251 |       }
252 |     }
253 |   },
254 |   "nbformat": 4,
255 |   "nbformat_minor": 2
256 | }


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/Data/chocolate data multiple linear regression.txt:
--------------------------------------------------------------------------------
  1 | weight	cocoa_percent	cost	customer_happiness
  2 | 247	0.11	0.25	29
  3 | 192	0.82	10.44	29
  4 | 106	0.01	0	6
  5 | 78	0.04	0.01	4
  6 | 213	0.39	2.56	30
  7 | 188	0.05	0.04	19
  8 | 190	0.38	2.23	28
  9 | 154	0.45	2.48	24
 10 | 79	0.38	0.89	13
 11 | 165	0.36	1.68	24
 12 | 175	0.84	9.77	28
 13 | 206	0.79	10.41	30
 14 | 227	0.26	1.23	30
 15 | 204	0.96	15.12	27
 16 | 148	0.17	0.34	17
 17 | 60	0.8	3.07	22
 18 | 200	0.86	11.9	28
 19 | 148	0.63	4.64	26
 20 | 225	0.19	0.68	28
 21 | 61	0.02	0	0
 22 | 174	0.85	10.01	23
 23 | 93	0.72	3.82	18
 24 | 184	0.85	10.72	24
 25 | 243	0.95	17.49	23
 26 | 234	0.81	12.37	27
 27 | 92	0.4	1.16	11
 28 | 239	0.43	3.58	29
 29 | 101	0.66	3.49	18
 30 | 142	0.32	1.16	16
 31 | 151	0.45	2.42	20
 32 | 157	0.4	2.05	19
 33 | 219	0.21	0.8	23
 34 | 109	0.23	0.47	9
 35 | 80	0.89	5.02	21
 36 | 173	0.68	6.48	24
 37 | 118	0.23	0.5	11
 38 | 162	0.53	3.6	22
 39 | 162	0.08	0.09	12
 40 | 222	0.22	0.87	24
 41 | 194	0.46	3.27	25
 42 | 61	0.18	0.16	1
 43 | 157	0.55	3.74	21
 44 | 101	0.85	5.85	21
 45 | 55	0.84	3.08	18
 46 | 213	0.35	2.13	26
 47 | 244	0.08	0.13	24
 48 | 239	0.05	0.05	22
 49 | 119	0.68	4.44	20
 50 | 162	0.29	1.1	18
 51 | 89	0.24	0.42	7
 52 | 131	0.37	1.41	16
 53 | 156	0.61	4.62	22
 54 | 108	0.73	4.64	20
 55 | 95	1	7.57	22
 56 | 147	0.76	6.72	22
 57 | 127	0.15	0.23	9
 58 | 149	0.46	2.54	20
 59 | 103	0.94	7.33	21
 60 | 121	0.76	5.65	21
 61 | 193	0.88	11.85	24
 62 | 117	0.85	6.79	21
 63 | 203	0.59	5.62	26
 64 | 110	0.73	4.73	20
 65 | 135	0.99	10.56	22
 66 | 90	0.54	2.1	15
 67 | 209	0.52	4.53	27
 68 | 194	0.55	4.78	26
 69 | 73	0.95	5.22	21
 70 | 132	0.59	3.72	20
 71 | 166	0.04	0.02	11
 72 | 214	0.58	5.8	27
 73 | 122	0.64	4.05	19
 74 | 64	0.19	0.18	2
 75 | 205	0.62	6.3	26
 76 | 55	0.17	0.13	0
 77 | 208	0.31	1.61	22
 78 | 216	0.4	2.7	24
 79 | 149	0.41	1.96	16
 80 | 128	0.73	5.53	19
 81 | 73	1	5.81	20
 82 | 61	0.29	0.42	3
 83 | 159	0.19	0.47	13
 84 | 166	0.24	0.76	15
 85 | 178	0.95	12.98	21
 86 | 116	0.02	0	1
 87 | 74	0.32	0.62	5
 88 | 133	0.94	9.42	20
 89 | 208	0.57	5.41	25
 90 | 137	0.34	1.23	13
 91 | 107	0.02	0	0
 92 | 203	0.17	0.46	6
 93 | 182	0.45	2.98	9
 94 | 189	0.23	0.83	6
 95 | 149	0.21	0.53	0
 96 | 243	0.46	4.17	10
 97 | 204	0.73	8.77	7
 98 | 224	0.05	0.04	0
 99 | 170	0.52	3.67	1
100 | 216	0.66	7.46	5
101 | 233	0.07	0.09	0


--------------------------------------------------------------------------------
/Data/chocolate data.txt:
--------------------------------------------------------------------------------
  1 | weight	cocoa_percent	sugar_percent	milk_percent	customer_happiness
  2 | 185	65	11	24	47
  3 | 247	44	34	22	55
  4 | 133	33	21	47	35
  5 | 145	30	38	32	34
  6 | 110	22	70	7	40
  7 | 134	25	38	37	40
  8 | 196	18	34	48	41
  9 | 118	45	38	17	38
 10 | 235	45	12	43	50
 11 | 107	8	2	90	25
 12 | 106	10	72	18	22
 13 | 151	42	20	38	47
 14 | 144	18	36	46	26
 15 | 160	39	40	21	47
 16 | 59	17	47	37	22
 17 | 221	39	28	33	46
 18 | 81	57	32	10	48
 19 | 135	19	36	45	27
 20 | 85	43	20	37	28
 21 | 246	35	43	22	52
 22 | 150	9	49	42	31
 23 | 124	49	32	20	43
 24 | 216	11	12	76	49
 25 | 227	15	26	60	43
 26 | 105	13	61	26	27
 27 | 27	61	9	30	30
 28 | 225	48	2	51	58
 29 | 50	43	29	28	23
 30 | 156	53	44	4	40
 31 | 155	76	24	0	54
 32 | 67	12	44	44	21
 33 | 41	26	53	21	34
 34 | 204	46	31	22	61
 35 | 211	35	41	23	58
 36 | 208	14	57	29	47
 37 | 162	43	41	15	39
 38 | 15	40	13	47	32
 39 | 133	23	28	49	28
 40 | 76	36	28	37	40
 41 | 18	76	13	11	30
 42 | 131	61	23	16	39
 43 | 231	34	14	52	49
 44 | 66	9	48	43	15
 45 | 242	41	42	17	47
 46 | 79	0	54	46	17
 47 | 51	15	44	41	24
 48 | 90	26	72	2	24
 49 | 1	3	33	65	7
 50 | 67	19	44	37	16
 51 | 176	50	21	29	41
 52 | 46	64	24	12	47
 53 | 81	24	53	24	19
 54 | 250	32	40	28	44
 55 | 240	48	8	44	56
 56 | 175	30	38	32	35
 57 | 11	11	72	17	11
 58 | 58	29	64	7	26
 59 | 234	48	11	41	52
 60 | 95	11	57	32	32
 61 | 97	31	54	15	23
 62 | 32	79	3	18	35
 63 | 234	41	25	34	51
 64 | 237	71	20	9	70
 65 | 116	47	38	15	35
 66 | 31	12	75	13	13
 67 | 83	2	33	65	16
 68 | 192	30	68	2	41
 69 | 1	24	29	47	10
 70 | 65	42	16	42	37
 71 | 22	43	32	25	31
 72 | 140	44	5	51	51
 73 | 212	38	55	6	62
 74 | 67	13	72	15	14
 75 | 112	24	59	18	26
 76 | 234	42	41	17	56
 77 | 233	21	28	51	60
 78 | 2	28	10	62	12
 79 | 81	37	41	22	35
 80 | 222	90	5	5	60
 81 | 223	39	21	40	54
 82 | 181	9	40	51	52
 83 | 34	46	28	26	22
 84 | 132	19	45	36	29
 85 | 103	39	23	38	41
 86 | 45	35	36	29	18
 87 | 40	13	52	35	22
 88 | 216	12	73	15	41
 89 | 107	41	23	36	32
 90 | 42	47	33	20	31
 91 | 165	44	30	26	52
 92 | 33	53	3	45	34
 93 | 162	5	56	39	36
 94 | 249	4	14	82	52
 95 | 144	79	3	18	45
 96 | 215	3	20	78	41
 97 | 194	0	12	88	29
 98 | 95	34	20	46	26
 99 | 44	29	40	31	19
100 | 218	46	25	28	49
101 | 169	88	1	12	57
102 | 


--------------------------------------------------------------------------------
/Data/dog_data.csv:
--------------------------------------------------------------------------------
  1 | age,weight,height,breed
  2 | 9.47E+00,6.20E+00,6.80E+00,1
  3 | 7.97E+00,8.63E+00,8.92E+00,0
  4 | 9.51E+00,6.40E+00,5.78E+00,1
  5 | 8.96E+00,8.82E+00,6.28E+00,2
  6 | 8.37E+00,3.89E+00,5.62E+00,1
  7 | 9.46E+00,9.56E+00,5.77E+00,2
  8 | 1.04E+01,1.10E+01,7.78E+00,0
  9 | 9.08E+00,7.10E+00,5.79E+00,1
 10 | 9.53E+00,9.29E+00,5.03E+00,2
 11 | 8.57E+00,5.09E+00,4.05E+00,1
 12 | 8.77E+00,6.17E+00,4.89E+00,1
 13 | 8.63E+00,3.92E+00,4.62E+00,1
 14 | 7.67E+00,7.54E+00,4.90E+00,2
 15 | 8.25E+00,7.94E+00,4.75E+00,2
 16 | 8.54E+00,8.73E+00,9.10E+00,0
 17 | 9.22E+00,8.47E+00,8.51E+00,0
 18 | 8.28E+00,8.86E+00,5.12E+00,2
 19 | 8.52E+00,4.98E+00,4.74E+00,1
 20 | 9.82E+00,8.66E+00,8.92E+00,0
 21 | 9.71E+00,5.82E+00,4.57E+00,1
 22 | 8.34E+00,5.68E+00,5.49E+00,1
 23 | 8.51E+00,8.01E+00,9.13E+00,2
 24 | 7.19E+00,7.98E+00,2.78E+00,2
 25 | 8.53E+00,9.33E+00,5.32E+00,2
 26 | 9.49E+00,9.29E+00,9.14E+00,0
 27 | 1.05E+01,1.06E+01,6.68E+00,2
 28 | 8.90E+00,7.16E+00,1.01E+01,0
 29 | 9.06E+00,4.01E+00,5.37E+00,1
 30 | 9.76E+00,6.54E+00,5.01E+00,1
 31 | 8.42E+00,3.38E+00,4.16E+00,1
 32 | 8.65E+00,2.78E+00,5.80E+00,1
 33 | 9.12E+00,5.26E+00,5.87E+00,1
 34 | 9.97E+00,1.05E+01,6.36E+00,2
 35 | 9.02E+00,8.54E+00,9.15E+00,0
 36 | 8.34E+00,6.47E+00,8.31E+00,0
 37 | 8.98E+00,5.44E+00,4.89E+00,1
 38 | 8.55E+00,8.07E+00,1.02E+01,0
 39 | 9.54E+00,1.03E+01,7.62E+00,0
 40 | 8.38E+00,4.50E+00,3.79E+00,1
 41 | 8.11E+00,8.52E+00,8.57E+00,0
 42 | 8.01E+00,8.40E+00,1.07E+01,0
 43 | 8.14E+00,7.33E+00,9.01E+00,0
 44 | 7.72E+00,8.81E+00,8.34E+00,0
 45 | 9.57E+00,8.51E+00,9.32E+00,0
 46 | 9.52E+00,9.32E+00,5.19E+00,2
 47 | 7.63E+00,7.79E+00,3.97E+00,2
 48 | 8.30E+00,8.63E+00,7.75E+00,0
 49 | 1.05E+01,1.04E+01,6.51E+00,2
 50 | 1.02E+01,9.13E+00,8.06E+00,0
 51 | 8.18E+00,4.05E+00,6.22E+00,1
 52 | 8.27E+00,8.49E+00,5.44E+00,2
 53 | 8.90E+00,9.29E+00,5.22E+00,2
 54 | 9.58E+00,9.08E+00,4.58E+00,2
 55 | 8.97E+00,4.78E+00,4.93E+00,1
 56 | 1.07E+01,1.06E+01,8.65E+00,0
 57 | 9.59E+00,5.10E+00,4.93E+00,1
 58 | 9.55E+00,6.27E+00,5.41E+00,1
 59 | 8.16E+00,7.70E+00,1.11E+01,0
 60 | 1.06E+01,1.01E+01,9.82E+00,0
 61 | 9.31E+00,4.98E+00,5.40E+00,1
 62 | 8.24E+00,7.84E+00,9.13E+00,0
 63 | 9.67E+00,9.99E+00,4.78E+00,2
 64 | 9.54E+00,1.08E+01,8.42E+00,0
 65 | 9.19E+00,7.52E+00,1.04E+01,0
 66 | 8.86E+00,8.69E+00,6.01E+00,2
 67 | 7.96E+00,6.26E+00,5.51E+00,1
 68 | 1.13E+01,1.06E+01,8.73E+00,0
 69 | 8.71E+00,6.21E+00,6.66E+00,1
 70 | 1.19E+01,1.14E+01,6.02E+00,2
 71 | 8.77E+00,5.61E+00,5.22E+00,1
 72 | 8.09E+00,8.37E+00,4.10E+00,2
 73 | 8.10E+00,8.05E+00,8.51E+00,0
 74 | 8.84E+00,9.21E+00,5.38E+00,2
 75 | 8.12E+00,3.23E+00,2.21E+00,1
 76 | 9.14E+00,8.98E+00,4.96E+00,2
 77 | 1.20E+01,1.18E+01,7.09E+00,2
 78 | 8.67E+00,8.22E+00,3.54E+00,2
 79 | 9.04E+00,1.01E+01,9.34E+00,0
 80 | 9.27E+00,8.58E+00,8.30E+00,0
 81 | 1.07E+01,9.59E+00,9.66E+00,0
 82 | 9.10E+00,9.51E+00,9.18E+00,0
 83 | 8.30E+00,8.36E+00,9.00E+00,0
 84 | 1.03E+01,1.17E+01,7.35E+00,0
 85 | 8.27E+00,7.69E+00,3.44E+00,2
 86 | 8.70E+00,8.60E+00,5.31E+00,2
 87 | 8.10E+00,4.51E+00,4.19E+00,1
 88 | 9.17E+00,8.35E+00,9.87E+00,0
 89 | 8.64E+00,8.85E+00,6.20E+00,2
 90 | 8.20E+00,8.37E+00,4.14E+00,2
 91 | 6.90E+00,6.87E+00,2.67E+00,2
 92 | 7.30E+00,7.36E+00,2.74E+00,2
 93 | 1.01E+01,6.19E+00,5.54E+00,1
 94 | 9.67E+00,1.02E+01,6.13E+00,2
 95 | 8.28E+00,8.56E+00,1.02E+01,0
 96 | 9.93E+00,9.29E+00,8.54E+00,0
 97 | 1.04E+01,1.05E+01,6.94E+00,2
 98 | 8.06E+00,7.52E+00,1.04E+01,0
 99 | 1.03E+01,5.82E+00,4.62E+00,1
100 | 7.68E+00,8.26E+00,1.01E+01,0
101 | 9.71E+00,9.50E+00,5.56E+00,2
102 | 9.82E+00,4.99E+00,5.14E+00,1
103 | 8.98E+00,9.47E+00,4.22E+00,2
104 | 7.62E+00,4.42E+00,5.05E+00,1
105 | 7.54E+00,8.87E+00,8.99E+00,0
106 | 8.65E+00,6.18E+00,4.55E+00,1
107 | 7.83E+00,7.86E+00,4.99E+00,2
108 | 8.17E+00,5.29E+00,5.17E+00,1
109 | 9.13E+00,9.19E+00,1.05E+01,0
110 | 8.96E+00,4.09E+00,3.45E+00,1
111 | 1.00E+01,1.03E+01,6.87E+00,2
112 | 8.78E+00,4.21E+00,3.11E+00,1
113 | 7.12E+00,7.93E+00,1.01E+01,0
114 | 8.67E+00,9.00E+00,9.38E+00,0
115 | 9.40E+00,6.34E+00,5.17E+00,1
116 | 8.53E+00,4.10E+00,3.87E+00,1
117 | 1.06E+01,1.08E+01,6.48E+00,2
118 | 8.76E+00,8.48E+00,4.46E+00,2
119 | 9.51E+00,9.40E+00,5.03E+00,2
120 | 9.18E+00,4.23E+00,5.77E+00,1
121 | 1.03E+01,9.66E+00,6.60E+00,2
122 | 8.96E+00,8.75E+00,4.96E+00,2
123 | 8.43E+00,8.11E+00,8.75E+00,0
124 | 8.37E+00,8.74E+00,5.70E+00,2
125 | 9.06E+00,5.17E+00,4.72E+00,1
126 | 1.11E+01,1.11E+01,6.91E+00,2
127 | 7.32E+00,7.54E+00,9.31E+00,0
128 | 8.92E+00,9.39E+00,5.48E+00,2
129 | 8.82E+00,4.71E+00,5.26E+00,1
130 | 7.77E+00,7.99E+00,3.44E+00,2
131 | 8.10E+00,8.37E+00,9.02E+00,0
132 | 8.48E+00,8.78E+00,4.96E+00,2
133 | 8.29E+00,8.05E+00,1.06E+01,0
134 | 9.72E+00,6.69E+00,4.87E+00,1
135 | 1.02E+01,6.34E+00,4.95E+00,1
136 | 9.60E+00,7.27E+00,6.92E+00,1
137 | 8.50E+00,7.83E+00,8.60E+00,0
138 | 8.95E+00,8.89E+00,9.44E+00,0
139 | 8.45E+00,9.00E+00,8.75E+00,0
140 | 1.12E+01,1.18E+01,7.13E+00,0
141 | 8.37E+00,2.08E+00,4.73E+00,1
142 | 9.30E+00,6.46E+00,5.95E+00,1
143 | 8.96E+00,9.07E+00,5.08E+00,2
144 | 9.51E+00,9.77E+00,9.19E+00,0
145 | 8.84E+00,9.42E+00,6.13E+00,2
146 | 1.06E+01,9.96E+00,5.77E+00,2
147 | 9.16E+00,8.96E+00,6.41E+00,2
148 | 8.17E+00,4.25E+00,4.59E+00,1
149 | 7.92E+00,8.90E+00,9.10E+00,0
150 | 8.91E+00,3.64E+00,5.13E+00,1
151 | 8.98E+00,5.03E+00,4.29E+00,1
152 | 9.15E+00,4.39E+00,5.62E+00,1
153 | 9.40E+00,9.52E+00,4.84E+00,2
154 | 7.99E+00,7.93E+00,3.98E+00,2
155 | 9.24E+00,9.99E+00,8.83E+00,0
156 | 9.87E+00,1.05E+01,7.99E+00,0
157 | 9.30E+00,3.72E+00,4.02E+00,1
158 | 9.39E+00,5.08E+00,4.51E+00,1
159 | 9.02E+00,8.60E+00,8.35E+00,0
160 | 9.36E+00,6.89E+00,6.13E+00,1
161 | 9.23E+00,1.03E+01,1.01E+01,0
162 | 8.96E+00,4.15E+00,4.88E+00,1
163 | 8.64E+00,8.22E+00,9.16E+00,0
164 | 1.08E+01,1.06E+01,6.74E+00,2
165 | 1.05E+01,1.02E+01,9.06E+00,0
166 | 8.10E+00,9.20E+00,5.62E+00,2
167 | 8.77E+00,8.67E+00,4.10E+00,2
168 | 8.63E+00,8.09E+00,4.86E+00,2
169 | 9.20E+00,9.53E+00,6.55E+00,2
170 | 7.68E+00,7.59E+00,9.77E+00,0
171 | 1.09E+01,1.06E+01,6.38E+00,2
172 | 8.38E+00,8.69E+00,5.06E+00,2
173 | 8.30E+00,8.34E+00,9.32E+00,0
174 | 8.99E+00,4.19E+00,5.63E+00,1
175 | 8.74E+00,8.71E+00,5.55E+00,2
176 | 8.50E+00,3.55E+00,5.81E+00,1
177 | 9.87E+00,9.56E+00,7.98E+00,0
178 | 6.82E+00,6.46E+00,2.48E+00,2
179 | 8.70E+00,2.93E+00,4.36E+00,1
180 | 1.01E+01,6.75E+00,5.12E+00,1
181 | 1.18E+01,1.21E+01,8.03E+00,2
182 | 9.40E+00,5.21E+00,6.14E+00,1
183 | 7.70E+00,8.80E+00,9.61E+00,0
184 | 8.92E+00,5.24E+00,4.35E+00,1
185 | 9.59E+00,4.62E+00,6.07E+00,1
186 | 9.60E+00,5.42E+00,7.63E+00,1
187 | 9.53E+00,9.59E+00,5.81E+00,2
188 | 9.32E+00,9.32E+00,6.27E+00,2
189 | 1.02E+01,1.01E+01,8.69E+00,1
190 | 9.00E+00,9.68E+00,6.44E+00,2
191 | 1.21E+01,1.11E+01,6.95E+00,0
192 | 1.04E+01,4.64E+00,6.01E+00,1
193 | 7.50E+00,7.72E+00,9.99E+00,0
194 | 9.49E+00,4.28E+00,6.47E+00,1
195 | 8.95E+00,5.77E+00,6.43E+00,1
196 | 8.90E+00,4.67E+00,2.88E+00,1
197 | 1.02E+01,9.90E+00,5.02E+00,2
198 | 7.77E+00,3.96E+00,4.17E+00,1
199 | 9.38E+00,6.98E+00,5.89E+00,1
200 | 9.00E+00,9.06E+00,9.59E+00,0
201 | 8.26E+00,8.85E+00,1.02E+01,0
202 | 


--------------------------------------------------------------------------------
/Data/football data.txt:
--------------------------------------------------------------------------------
  1 | average_goals_per_match	won_competition
  2 | 2.422870462	1
  3 | 2.824477516	1
  4 | 0.571688038	0
  5 | 1.055027667	0
  6 | 0.394192269	0
  7 | 0.754099232	0
  8 | 0.962959667	0
  9 | 1.994727613	0
 10 | 0.456755473	0
 11 | 0.525435057	0
 12 | 1.891407683	0
 13 | 1.018292157	0
 14 | 2.641061388	1
 15 | 1.081919124	0
 16 | 1.584087989	0
 17 | 1.587817681	0
 18 | 2.459575476	1
 19 | 1.170237541	0
 20 | 2.821653731	1
 21 | 2.05399727	0
 22 | 0.451411638	0
 23 | 1.486331674	0
 24 | 2.023428035	0
 25 | 2.843421156	1
 26 | 2.18239352	0
 27 | 2.508448909	1
 28 | 1.514173157	0
 29 | 1.460308	0
 30 | 1.779336362	0
 31 | 2.258397839	0
 32 | 2.607770127	1
 33 | 0.09945028	0
 34 | 2.35292296	1
 35 | 2.732122873	1
 36 | 2.8009988	1
 37 | 1.375195574	0
 38 | 0.971946125	0
 39 | 2.558268873	1
 40 | 2.565131087	1
 41 | 1.32548955	0
 42 | 2.334994306	0
 43 | 0.277073998	0
 44 | 0.261170366	0
 45 | 2.486403854	1
 46 | 2.969708759	1
 47 | 1.156309517	0
 48 | 2.84905351	1
 49 | 2.43635455	1
 50 | 0.754363317	0
 51 | 2.742626634	1
 52 | 1.224594963	0
 53 | 0.430516008	0
 54 | 0.887394082	0
 55 | 0.208938758	0
 56 | 1.520957714	0
 57 | 1.163998189	0
 58 | 2.81519393	1
 59 | 2.866950623	1
 60 | 0.35201577	0
 61 | 1.00306916	0
 62 | 0.089511949	0
 63 | 2.540442771	1
 64 | 0.547311147	0
 65 | 1.79477098	0
 66 | 1.48085737	0
 67 | 2.31644846	1
 68 | 0.763619073	0
 69 | 0.143390622	0
 70 | 0.193967012	0
 71 | 0.381415979	0
 72 | 2.632311728	1
 73 | 1.470286787	0
 74 | 0.498301326	0
 75 | 0.819245999	0
 76 | 1.869586655	0
 77 | 2.132291437	0
 78 | 0.145980681	0
 79 | 1.253342554	0
 80 | 1.647209079	0
 81 | 0.926896356	0
 82 | 2.282348031	1
 83 | 0.012578271	0
 84 | 0.455925289	0
 85 | 2.680673352	1
 86 | 1.985915665	0
 87 | 0.085881894	0
 88 | 2.689432655	1
 89 | 2.508641	1
 90 | 2.45187147	1
 91 | 1.238382828	0
 92 | 2.284923233	1
 93 | 0.101472759	0
 94 | 0.533641322	0
 95 | 1.73402536	0
 96 | 2.180275628	0
 97 | 2.642701936	1
 98 | 0.388674174	0
 99 | 2.226984467	0
100 | 1.045053442	0
101 | 2.216692543	0


--------------------------------------------------------------------------------
/Data/football_data.csv:
--------------------------------------------------------------------------------
  1 | ﻿average_goals_per_match,won_competition
  2 | 2.422870462,1
  3 | 2.824477516,1
  4 | 0.571688038,0
  5 | 1.055027667,0
  6 | 0.394192269,0
  7 | 0.754099232,0
  8 | 0.962959667,0
  9 | 1.994727613,0
 10 | 0.456755473,0
 11 | 0.525435057,0
 12 | 1.891407683,0
 13 | 1.018292157,0
 14 | 2.641061388,1
 15 | 1.081919124,0
 16 | 1.584087989,0
 17 | 1.587817681,0
 18 | 2.459575476,1
 19 | 1.170237541,0
 20 | 2.821653731,1
 21 | 2.05399727,0
 22 | 0.451411638,0
 23 | 1.486331674,0
 24 | 2.023428035,0
 25 | 2.843421156,1
 26 | 2.18239352,0
 27 | 2.508448909,1
 28 | 1.514173157,0
 29 | 1.460308,0
 30 | 1.779336362,0
 31 | 2.258397839,0
 32 | 2.607770127,1
 33 | 0.09945028,0
 34 | 2.35292296,1
 35 | 2.732122873,1
 36 | 2.8009988,1
 37 | 1.375195574,0
 38 | 0.971946125,0
 39 | 2.558268873,1
 40 | 2.565131087,1
 41 | 1.32548955,0
 42 | 2.334994306,0
 43 | 0.277073998,0
 44 | 0.261170366,0
 45 | 2.486403854,1
 46 | 2.969708759,1
 47 | 1.156309517,0
 48 | 2.84905351,1
 49 | 2.43635455,1
 50 | 0.754363317,0
 51 | 2.742626634,1
 52 | 1.224594963,0
 53 | 0.430516008,0
 54 | 0.887394082,0
 55 | 0.208938758,0
 56 | 1.520957714,0
 57 | 1.163998189,0
 58 | 2.81519393,1
 59 | 2.866950623,1
 60 | 0.35201577,0
 61 | 1.00306916,0
 62 | 0.089511949,0
 63 | 2.540442771,1
 64 | 0.547311147,0
 65 | 1.79477098,0
 66 | 1.48085737,0
 67 | 2.31644846,1
 68 | 0.763619073,0
 69 | 0.143390622,0
 70 | 0.193967012,0
 71 | 0.381415979,0
 72 | 2.632311728,1
 73 | 1.470286787,0
 74 | 0.498301326,0
 75 | 0.819245999,0
 76 | 1.869586655,0
 77 | 2.132291437,0
 78 | 0.145980681,0
 79 | 1.253342554,0
 80 | 1.647209079,0
 81 | 0.926896356,0
 82 | 2.282348031,1
 83 | 0.012578271,0
 84 | 0.455925289,0
 85 | 2.680673352,1
 86 | 1.985915665,0
 87 | 0.085881894,0
 88 | 2.689432655,1
 89 | 2.508641,1
 90 | 2.45187147,1
 91 | 1.238382828,0
 92 | 2.284923233,1
 93 | 0.101472759,0
 94 | 0.533641322,0
 95 | 1.73402536,0
 96 | 2.180275628,0
 97 | 2.642701936,1
 98 | 0.388674174,0
 99 | 2.226984467,0
100 | 1.045053442,0
101 | 2.216692543,0
102 | 


--------------------------------------------------------------------------------
/Data/traffic_by_hour.csv:
--------------------------------------------------------------------------------
1 | 00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23
2 | 4.360655421386891106e+01,2.471415243168735998e+01,9.302910970866314244e+00,3.694417015169733531e+00,9.324994928878604483e+00,9.837653313714728398e+00,7.960156509601787000e+00,2.129209806855379128e+01,2.771412586402762201e+01,4.670921084052981342e+01,3.911199949015489352e+01,4.742874469954385575e+01,4.345939414008118007e+01,3.904657935639097843e+01,4.171486034093191364e+01,3.813035688692960434e+01,4.277975145346606212e+01,4.130417935632857507e+01,4.949913745519085495e+01,4.356621060502504861e+01,4.333981356792399708e+01,6.409661727874893700e+01,5.958220793355829414e+01,4.281970155221666374e+01
3 | 4.458483461102591150e+01,1.960434764035231581e+01,9.480831719723372686e+00,1.347690534797838424e+01,1.446522395805932604e+01,6.014082826041636132e+00,2.267967131865366781e+01,1.819289802099414288e+01,2.878376226399169013e+01,4.011397231467096702e+01,4.614933381836575421e+01,4.375361104497339682e+01,4.531261797021947046e+01,3.465456885850920088e+01,5.136445722452378959e+01,3.581937927938652422e+01,5.324305644941320281e+01,4.991026677530656741e+01,4.521989462961121120e+01,5.200261896989397314e+01,5.681758056516863320e+01,6.135913191794563204e+01,5.028792564895855577e+01,4.038354412487803557e+01
4 | 3.320856146056010516e+01,2.958418081618512119e+01,2.720763330588727413e+01,1.124323327934973094e+01,1.222980541796953169e+01,5.072605297463047336e+00,6.111837866899005434e+00,2.617679230349706643e+01,3.524648307585961504e+01,3.822043200812598229e+01,3.090295064127423785e+01,5.046242188322992206e+01,4.186584869132914122e+01,4.362873596980531232e+01,3.773802942037548291e+01,4.210401295001258148e+01,5.464266677003922013e+01,4.965617376126637339e+01,3.477964093080311869e+01,4.530579139639098685e+01,4.181824635262458401e+01,6.114016284549821023e+01,6.144635319762286940e+01,5.881157571704308396e+01
5 | 3.502665485183174354e+01,2.036754951472195785e+01,2.144528489571132468e+01,7.449591532631174573e+00,2.232114991577189400e+00,8.104623220511466997e+00,9.095804670617658516e+00,1.949946259191193576e+01,3.768956690121524389e+01,3.390709349829828056e+01,3.101834889182442012e+01,4.337981435125371377e+01,4.033062481032064994e+01,4.179804092660128845e+01,3.235427385361062136e+01,3.611236563329332228e+01,5.382150804401013033e+01,3.586998953767924547e+01,4.183091027514376492e+01,4.692259473776898204e+01,4.267652627221272610e+01,6.013905435990915294e+01,6.163977217887768489e+01,4.467098795974052194e+01
6 | 4.016319422608915346e+01,1.993632753640963173e+01,1.806648016188032813e+01,1.210993983210695646e+01,1.087853935093216329e+01,9.766026740065232303e+00,1.950476123450128796e+01,1.031387494130530769e+01,2.850912829110281166e+01,3.080974586707072405e+01,3.632650856885130253e+01,4.589394114695814153e+01,3.151274296264309527e+01,3.723943695070546767e+01,3.750943082970208309e+01,5.441648446512536452e+01,3.680134250606158020e+01,4.921699104313118767e+01,4.392759526930144176e+01,4.065717511095378711e+01,4.435037140017104917e+01,5.190988563720762272e+01,6.167439508720916308e+01,4.672716979505646862e+01
7 | 4.916939064389787006e+01,2.445518800786128821e+01,1.239135992063409653e+01,1.070533695787615258e+01,6.511395138985639264e+00,2.178534480043876442e+01,1.925732145248098703e+01,2.327378212446163985e+01,2.966100596772361797e+01,3.460858234351567120e+01,3.867958481988834052e+01,4.825450197089693916e+01,4.458540354744464906e+01,3.356191487925229211e+01,3.939223788729094622e+01,5.470800707845620536e+01,4.804269775089670702e+01,3.668272237596980290e+01,4.784333915301860429e+01,4.587219623494701182e+01,4.163642201315494873e+01,5.404916943574897203e+01,5.370873094384637625e+01,5.547372381993804424e+01


--------------------------------------------------------------------------------
/Data/trees.csv:
--------------------------------------------------------------------------------
  1 | leaf_width,leaf_length,trunk_girth,trunk_height,tree_type
  2 | 5.13E+00,6.18E+00,8.26E+00,8.74E+00,0
  3 | 7.49E+00,4.02E+00,8.07E+00,6.78E+00,0
  4 | 9.22E+00,4.16E+00,5.46E+00,8.45E+00,1
  5 | 6.98E+00,1.11E+01,6.96E+00,4.06E+00,2
  6 | 3.46E+00,5.19E+00,8.72E+00,1.04E+01,0
  7 | 4.55E+00,5.15E+00,9.01E+00,9.64E+00,0
  8 | 4.95E+00,1.04E+01,6.33E+00,4.49E+00,2
  9 | 7.64E+00,2.58E+00,9.73E+00,7.75E+00,0
 10 | 8.69E+00,4.35E+00,4.37E+00,8.82E+00,1
 11 | 7.21E+00,3.62E+00,8.71E+00,7.43E+00,0
 12 | 6.48E+00,1.15E+01,8.20E+00,3.85E+00,2
 13 | 8.52E+00,3.67E+00,5.99E+00,9.70E+00,1
 14 | 6.35E+00,8.18E+00,4.50E+00,6.14E+00,2
 15 | 6.61E+00,5.29E+00,6.80E+00,6.07E+00,0
 16 | 5.70E+00,5.08E+00,8.30E+00,8.10E+00,0
 17 | 4.73E+00,7.88E+00,4.03E+00,5.10E+00,2
 18 | 8.15E+00,5.08E+00,5.43E+00,1.05E+01,1
 19 | 5.42E+00,8.67E+00,4.57E+00,4.57E+00,2
 20 | 3.31E+00,5.46E+00,9.85E+00,1.04E+01,0
 21 | 6.50E+00,4.39E+00,9.15E+00,8.29E+00,0
 22 | 9.31E+00,5.18E+00,5.66E+00,8.93E+00,1
 23 | 2.79E+00,9.93E+00,6.56E+00,3.12E+00,2
 24 | 5.56E+00,9.99E+00,6.06E+00,4.56E+00,2
 25 | 5.87E+00,6.92E+00,2.89E+00,6.25E+00,2
 26 | 3.99E+00,6.54E+00,2.91E+00,6.36E+00,2
 27 | 5.18E+00,9.02E+00,4.42E+00,5.62E+00,2
 28 | 6.07E+00,3.81E+00,9.77E+00,9.44E+00,0
 29 | 8.78E+00,4.35E+00,4.44E+00,8.85E+00,1
 30 | 4.12E+00,7.14E+00,7.83E+00,8.16E+00,0
 31 | 5.10E+00,5.86E+00,8.55E+00,8.37E+00,0
 32 | 3.34E+00,9.27E+00,5.25E+00,3.98E+00,2
 33 | 4.88E+00,9.19E+00,5.89E+00,4.93E+00,2
 34 | 4.70E+00,6.18E+00,9.26E+00,8.09E+00,0
 35 | 5.32E+00,9.59E+00,5.23E+00,4.85E+00,2
 36 | 9.01E+00,7.84E+00,3.10E+00,9.07E+00,1
 37 | 1.01E+01,5.94E+00,6.45E+00,8.39E+00,1
 38 | 5.71E+00,6.16E+00,2.24E+00,8.06E+00,2
 39 | 3.01E+00,5.78E+00,9.61E+00,1.06E+01,0
 40 | 5.56E+00,4.63E+00,9.24E+00,9.44E+00,0
 41 | 4.39E+00,7.97E+00,4.49E+00,5.15E+00,2
 42 | 3.67E+00,5.17E+00,9.34E+00,9.81E+00,0
 43 | 3.68E+00,8.62E+00,5.35E+00,4.48E+00,2
 44 | 3.64E+00,6.14E+00,8.99E+00,9.09E+00,0
 45 | 8.92E+00,5.78E+00,5.08E+00,9.39E+00,1
 46 | 5.84E+00,4.81E+00,8.86E+00,8.05E+00,0
 47 | 8.11E+00,9.31E+00,4.70E+00,5.62E+00,2
 48 | 5.41E+00,5.81E+00,7.48E+00,7.84E+00,0
 49 | 1.00E+01,8.15E+00,4.89E+00,8.69E+00,1
 50 | 5.05E+00,8.51E+00,4.89E+00,5.52E+00,2
 51 | 2.06E+00,6.52E+00,9.68E+00,1.04E+01,0
 52 | 1.10E+01,4.56E+00,7.22E+00,6.97E+00,1
 53 | 5.27E+00,4.25E+00,8.19E+00,9.16E+00,0
 54 | 4.31E+00,5.11E+00,1.09E+01,1.06E+01,0
 55 | 7.48E+00,5.17E+00,7.65E+00,6.13E+00,0
 56 | 5.75E+00,4.78E+00,8.19E+00,8.02E+00,0
 57 | 3.73E+00,9.85E+00,5.46E+00,4.45E+00,2
 58 | 3.03E+00,1.05E+01,6.70E+00,2.78E+00,2
 59 | 4.99E+00,9.67E+00,5.29E+00,4.81E+00,2
 60 | 3.97E+00,6.25E+00,7.10E+00,8.14E+00,0
 61 | 6.12E+00,9.76E+00,5.79E+00,4.45E+00,2
 62 | 5.51E+00,4.12E+00,1.01E+01,9.38E+00,0
 63 | 8.12E+00,5.01E+00,2.81E+00,9.33E+00,1
 64 | 4.37E+00,6.64E+00,7.70E+00,7.72E+00,0
 65 | 4.03E+00,7.25E+00,3.44E+00,5.95E+00,2
 66 | 1.01E+01,5.55E+00,5.15E+00,7.42E+00,1
 67 | 9.20E+00,7.27E+00,4.76E+00,9.44E+00,1
 68 | 8.63E+00,5.95E+00,4.65E+00,9.68E+00,1
 69 | 6.72E+00,5.02E+00,7.74E+00,7.16E+00,0
 70 | 6.23E+00,4.27E+00,8.95E+00,7.94E+00,0
 71 | 4.66E+00,5.47E+00,9.49E+00,9.13E+00,0
 72 | 1.06E+01,7.05E+00,5.37E+00,7.60E+00,1
 73 | 6.90E+00,3.11E+00,8.50E+00,7.86E+00,0
 74 | 9.01E+00,3.80E+00,4.98E+00,8.48E+00,1
 75 | 8.53E+00,4.52E+00,5.02E+00,9.58E+00,1
 76 | 9.40E+00,5.34E+00,5.26E+00,8.64E+00,1
 77 | 1.03E+01,3.09E+00,6.88E+00,7.04E+00,1
 78 | 1.01E+01,5.69E+00,4.99E+00,7.61E+00,1
 79 | 1.01E+01,5.40E+00,5.75E+00,7.94E+00,1
 80 | 7.07E+00,3.58E+00,8.95E+00,7.79E+00,0
 81 | 7.67E+00,5.31E+00,3.30E+00,1.03E+01,1
 82 | 3.92E+00,7.08E+00,3.35E+00,5.54E+00,2
 83 | 5.17E+00,9.49E+00,5.81E+00,4.65E+00,2
 84 | 4.53E+00,5.58E+00,9.38E+00,9.35E+00,0
 85 | 3.79E+00,5.93E+00,8.36E+00,8.99E+00,0
 86 | 5.27E+00,8.53E+00,4.45E+00,5.00E+00,2
 87 | 4.26E+00,5.84E+00,9.39E+00,9.95E+00,0
 88 | 9.05E+00,7.22E+00,3.80E+00,9.33E+00,1
 89 | 5.86E+00,9.61E+00,6.11E+00,5.93E+00,2
 90 | 4.22E+00,5.76E+00,9.20E+00,1.00E+01,0
 91 | 9.61E+00,6.40E+00,4.47E+00,8.33E+00,1
 92 | 6.20E+00,8.41E+00,4.77E+00,5.19E+00,2
 93 | 5.94E+00,4.02E+00,8.40E+00,8.65E+00,0
 94 | 7.70E+00,4.67E+00,3.81E+00,1.02E+01,1
 95 | 5.00E+00,5.62E+00,7.93E+00,8.02E+00,0
 96 | 5.46E+00,4.58E+00,1.08E+01,1.02E+01,0
 97 | 5.55E+00,8.15E+00,3.50E+00,5.31E+00,0
 98 | 5.92E+00,9.84E+00,6.16E+00,5.53E+00,2
 99 | 4.87E+00,8.81E+00,4.87E+00,4.72E+00,2
100 | 6.61E+00,7.36E+00,2.62E+00,6.53E+00,2
101 | 2.83E+00,5.99E+00,8.54E+00,9.73E+00,0
102 | 4.18E+00,5.00E+00,8.57E+00,9.30E+00,0
103 | 2.54E+00,9.60E+00,6.12E+00,3.65E+00,2
104 | 5.28E+00,7.55E+00,2.97E+00,6.02E+00,2
105 | 5.08E+00,8.39E+00,3.93E+00,5.24E+00,2
106 | 5.92E+00,4.94E+00,7.95E+00,8.02E+00,0
107 | 5.84E+00,8.72E+00,4.61E+00,5.86E+00,2
108 | 5.32E+00,8.29E+00,4.14E+00,5.12E+00,2
109 | 5.88E+00,1.04E+01,6.83E+00,4.06E+00,2
110 | 6.59E+00,8.95E+00,4.38E+00,4.97E+00,2
111 | 1.02E+01,5.59E+00,5.44E+00,7.76E+00,1
112 | 5.58E+00,8.32E+00,4.88E+00,5.17E+00,2
113 | 5.49E+00,8.55E+00,4.29E+00,6.14E+00,2
114 | 5.08E+00,5.57E+00,7.69E+00,8.07E+00,0
115 | 5.19E+00,4.69E+00,9.93E+00,9.36E+00,0
116 | 8.47E+00,3.43E+00,4.63E+00,9.08E+00,1
117 | 7.81E+00,3.76E+00,3.88E+00,9.86E+00,1
118 | 8.01E+00,4.20E+00,3.12E+00,9.29E+00,1
119 | 9.27E+00,4.56E+00,5.31E+00,8.65E+00,1
120 | 5.45E+00,9.25E+00,5.36E+00,5.48E+00,2
121 | 4.64E+00,9.64E+00,5.61E+00,4.85E+00,2
122 | 9.69E+00,3.71E+00,6.06E+00,8.01E+00,1
123 | 5.85E+00,4.81E+00,8.57E+00,7.75E+00,0
124 | 5.83E+00,9.24E+00,4.72E+00,4.79E+00,2
125 | 5.46E+00,5.08E+00,8.39E+00,8.13E+00,0
126 | 3.57E+00,5.34E+00,1.04E+01,1.03E+01,0
127 | 6.85E+00,1.11E+00,4.48E+00,1.06E+01,1
128 | 1.01E+01,3.93E+00,7.03E+00,7.87E+00,1
129 | 8.21E+00,4.32E+00,5.20E+00,1.01E+01,1
130 | 9.67E+00,5.02E+00,7.91E+00,9.23E+00,1
131 | 1.08E+01,5.49E+00,7.08E+00,7.28E+00,1
132 | 1.01E+01,5.75E+00,6.46E+00,8.39E+00,1
133 | 8.80E+00,3.99E+00,4.87E+00,8.84E+00,0
134 | 9.31E+00,5.24E+00,6.48E+00,9.25E+00,1
135 | 5.56E+00,9.08E+00,5.01E+00,4.57E+00,2
136 | 5.65E+00,4.48E+00,9.20E+00,8.85E+00,0
137 | 8.75E+00,4.16E+00,4.94E+00,9.11E+00,1
138 | 1.04E+01,4.94E+00,6.81E+00,7.56E+00,1
139 | 8.67E+00,2.93E+00,6.75E+00,9.51E+00,1
140 | 8.44E+00,4.18E+00,4.45E+00,9.18E+00,1
141 | 9.27E+00,5.37E+00,6.06E+00,9.23E+00,1
142 | 1.04E+01,5.92E+00,6.91E+00,8.19E+00,1
143 | 4.66E+00,4.64E+00,1.03E+01,9.59E+00,0
144 | 7.63E+00,9.19E+00,4.31E+00,5.58E+00,2
145 | 1.07E+01,5.35E+00,7.17E+00,7.69E+00,1
146 | 7.84E+00,4.19E+00,4.32E+00,1.00E+01,1
147 | 7.35E+00,8.98E+00,4.12E+00,5.59E+00,2
148 | 9.02E+00,3.42E+00,6.66E+00,9.06E+00,1
149 | 9.15E+00,2.81E+00,6.53E+00,8.70E+00,1
150 | 9.13E+00,3.60E+00,5.20E+00,8.35E+00,1
151 | 4.04E+00,6.60E+00,1.88E+00,6.53E+00,2
152 | 4.67E+00,1.02E+01,5.92E+00,4.06E+00,2
153 | 5.47E+00,4.77E+00,9.05E+00,8.35E+00,0
154 | 6.95E+00,3.95E+00,1.91E+00,1.03E+01,1
155 | 4.93E+00,4.60E+00,9.49E+00,9.21E+00,0
156 | 9.51E+00,4.21E+00,5.65E+00,8.20E+00,1
157 | 6.60E+00,4.28E+00,8.30E+00,7.75E+00,0
158 | 4.93E+00,5.33E+00,9.11E+00,8.55E+00,0
159 | 5.96E+00,9.36E+00,5.49E+00,5.03E+00,2
160 | 7.09E+00,4.09E+00,8.47E+00,6.88E+00,0
161 | 9.85E+00,6.31E+00,5.49E+00,8.42E+00,1
162 | 4.87E+00,1.13E+01,7.62E+00,3.42E+00,2
163 | 8.54E+00,5.68E+00,4.73E+00,9.75E+00,1
164 | 7.36E+00,7.37E+00,1.78E+00,1.10E+01,1
165 | 4.17E+00,6.11E+00,8.44E+00,9.02E+00,0
166 | 7.49E+00,8.27E+00,3.76E+00,6.25E+00,2
167 | 4.23E+00,9.41E+00,5.83E+00,5.04E+00,2
168 | 6.46E+00,9.95E+00,5.59E+00,5.11E+00,2
169 | 8.68E+00,3.93E+00,6.34E+00,9.75E+00,1
170 | 5.00E+00,7.49E+00,3.10E+00,5.63E+00,2
171 | 7.85E+00,4.84E+00,4.77E+00,1.07E+01,1
172 | 5.38E+00,9.24E+00,4.77E+00,4.48E+00,2
173 | 5.26E+00,5.68E+00,8.93E+00,8.84E+00,1
174 | 9.50E+00,7.24E+00,4.32E+00,8.80E+00,1
175 | 5.67E+00,8.60E+00,4.80E+00,5.45E+00,2
176 | 3.69E+00,9.97E+00,6.64E+00,4.05E+00,2
177 | 6.18E+00,3.63E+00,8.39E+00,8.83E+00,0
178 | 9.74E+00,5.23E+00,7.00E+00,9.07E+00,1
179 | 5.00E+00,5.10E+00,8.64E+00,9.12E+00,0
180 | 9.68E+00,6.63E+00,5.12E+00,8.85E+00,1
181 | 4.60E+00,4.18E+00,9.99E+00,9.71E+00,0
182 | 5.21E+00,5.61E+00,8.09E+00,8.01E+00,0
183 | 2.84E+00,5.86E+00,9.53E+00,1.07E+01,0
184 | 9.65E+00,6.07E+00,6.31E+00,9.11E+00,1
185 | 2.65E+00,6.69E+00,9.18E+00,1.07E+01,0
186 | 4.99E+00,1.04E+01,6.61E+00,3.95E+00,2
187 | 3.35E+00,1.02E+01,6.53E+00,3.33E+00,2
188 | 8.94E+00,4.19E+00,4.17E+00,8.34E+00,1
189 | 4.10E+00,4.38E+00,1.05E+01,1.10E+01,0
190 | 3.69E+00,7.75E+00,3.82E+00,5.14E+00,2
191 | 9.15E+00,4.52E+00,4.77E+00,8.58E+00,1
192 | 4.67E+00,4.25E+00,1.02E+01,9.46E+00,0
193 | 3.94E+00,9.39E+00,5.20E+00,4.43E+00,2
194 | 7.51E+00,7.45E+00,2.47E+00,6.13E+00,2
195 | 2.87E+00,7.84E+00,3.56E+00,4.77E+00,2
196 | 6.96E+00,1.05E+01,6.22E+00,4.69E+00,2
197 | 8.65E+00,5.83E+00,4.63E+00,9.64E+00,1
198 | 8.39E+00,2.96E+00,4.14E+00,8.56E+00,1
199 | 4.99E+00,4.47E+00,9.17E+00,9.66E+00,0
200 | 9.58E+00,4.53E+00,5.68E+00,8.29E+00,1
201 | 6.45E+00,3.04E+00,9.37E+00,8.89E+00,0
202 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/Models/arthur-model-epoch-30.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrosoftDocs/ms-learn-ml-crash-course-python/efc4ff685a61e033a8a9ffa0f5ba8956a1528405/Models/arthur-model-epoch-30.hdf5


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ML Crash Course Python Programming Exercises
 2 | 
 3 | Welcome! In this library we have the Python programming exercises for the ML Crash Course.
 4 | 
 5 | These notebooks contains the programming exercises used in the [ML Crash Course](https://docs.microsoft.com/learn/paths/ml-crash-course). While you could explore the examples without following the tutorial, it's strongly recommended that you follow along with the course on Microsoft Learn.
 6 | 
 7 | ## Getting started
 8 | 
 9 | If you aren't already completing ML Crash Course you can visit the [learning path](https://docs.microsoft.com/learn/paths/ml-crash-course).
10 | 
11 | ### Setting up Azure Notebooks
12 | 
13 | * Go to [Azure Notebooks projects](https://notebooks.azure.com/home/projects#).
14 | * Click on Upload GitHub Repo.
15 | * Click on the "GitHub repository" box and paste in ```MicrosoftDocs/ms-learn-ml-crash-course-python```.
16 | * Click the Import button.
17 | 
18 | ## Troubleshooting
19 | 
20 | Below are some common issues and their solutions when completing the exercises in this Azure Notebooks.
21 | 
22 | ### Links to the exercises don't work
23 | 
24 | The links to the exercises on MS Learn assume you have set up your library, and kept the library ID as 'ms-learn-ml-crash-course-pytho' - if you haven't then the links won't work.
25 | 
26 | #### Solution
27 | 
28 | * Click [here](https://notebooks.azure.com/home/libraries) to go to your libraries
29 | * Right clicking on your library and select __Settings__
30 | * Change the Library ID back to ```ms-learn-ml-crash-course-pytho```.
31 | 
32 | You can also just go to your library and right click the programming exercise you wish to do and click '__Run__'
33 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.3 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets Microsoft's [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)) of a security vulnerability, please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------