├── Breast Cancer Analysis.ipynb ├── Cryptocurrency Market Analysis.ipynb ├── Digit Recognizer Using CNN.ipynb ├── Exploratory Data Analysis of House Prices.ipynb ├── Intro to BigQuery .ipynb ├── K_Nearest_Neighbors ├── K-Nearest Neighbors On Social Network Ads.ipynb ├── Social_Network_Ads.csv ├── classification_template.R ├── classification_template.py ├── knn.R └── knn.py ├── Natural Language Processing Personal Notes.ipynb ├── R Practice [Under Construction].ipynb ├── README.md ├── Regression - Machine Learning.ipynb ├── Simple Linear Regression.ipynb ├── Stock Market Analysis for Tech Stocks.ipynb ├── data ├── breast_cancer.csv ├── house_prices_test.csv ├── house_prices_train.csv ├── kc_house_data.csv ├── linear_regression_test.csv └── linear_regression_train.csv ├── digit_recognizer.ipynb ├── kaggle_titanic.ipynb └── webscraping san diego apartments.ipynb /Breast Cancer Analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "We are going to analyze breast cancer from Wisconsin. This dataset comes from the UCI Machine Learning Repository and can be found [here](https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29).\n", 8 | "\n", 9 | "Attribute Information on the dataset can be found [here](https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.names).\n", 10 | "***" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# here we will import the libraries used for machine learning\n", 20 | "import numpy as np # linear algebra\n", 21 | "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv), data manipulation as in SQL\n", 22 | "import matplotlib.pyplot as plt # this is used for the plot the graph \n", 23 | "import seaborn as sns # used for plot interactive graph. I like it most for plot\n", 24 | "%matplotlib inline\n", 25 | "from sklearn.linear_model import LogisticRegression # to apply the Logistic regression\n", 26 | "from sklearn.model_selection import train_test_split # to split the data into two parts\n", 27 | "from sklearn.model_selection import GridSearchCV,KFold# for tuning parameter\n", 28 | "from sklearn.ensemble import RandomForestClassifier # for random forest classifier\n", 29 | "from sklearn.naive_bayes import GaussianNB\n", 30 | "from sklearn.neighbors import KNeighborsClassifier\n", 31 | "from sklearn.tree import DecisionTreeClassifier\n", 32 | "from sklearn import svm # for Support Vector Machine\n", 33 | "from sklearn import metrics # for the check the error and accuracy of the model\n", 34 | "# Any results you write to the current directory are saved as output.\n", 35 | "# dont worry about the error if its not working then insteda of model_selection we can use cross_validation" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 7, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "df = pd.read_csv('data/breast_cancer.csv',header = 0)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 8, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
\n", 58 | "\n", 71 | "\n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | "
iddiagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_mean...texture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worstUnnamed: 32
0842302M17.9910.38122.801001.00.118400.277600.30010.14710...17.33184.602019.00.16220.66560.71190.26540.46010.11890NaN
1842517M20.5717.77132.901326.00.084740.078640.08690.07017...23.41158.801956.00.12380.18660.24160.18600.27500.08902NaN
284300903M19.6921.25130.001203.00.109600.159900.19740.12790...25.53152.501709.00.14440.42450.45040.24300.36130.08758NaN
384348301M11.4220.3877.58386.10.142500.283900.24140.10520...26.5098.87567.70.20980.86630.68690.25750.66380.17300NaN
484358402M20.2914.34135.101297.00.100300.132800.19800.10430...16.67152.201575.00.13740.20500.40000.16250.23640.07678NaN
\n", 221 | "

5 rows × 33 columns

\n", 222 | "
" 223 | ], 224 | "text/plain": [ 225 | " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", 226 | "0 842302 M 17.99 10.38 122.80 1001.0 \n", 227 | "1 842517 M 20.57 17.77 132.90 1326.0 \n", 228 | "2 84300903 M 19.69 21.25 130.00 1203.0 \n", 229 | "3 84348301 M 11.42 20.38 77.58 386.1 \n", 230 | "4 84358402 M 20.29 14.34 135.10 1297.0 \n", 231 | "\n", 232 | " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", 233 | "0 0.11840 0.27760 0.3001 0.14710 \n", 234 | "1 0.08474 0.07864 0.0869 0.07017 \n", 235 | "2 0.10960 0.15990 0.1974 0.12790 \n", 236 | "3 0.14250 0.28390 0.2414 0.10520 \n", 237 | "4 0.10030 0.13280 0.1980 0.10430 \n", 238 | "\n", 239 | " ... texture_worst perimeter_worst area_worst smoothness_worst \\\n", 240 | "0 ... 17.33 184.60 2019.0 0.1622 \n", 241 | "1 ... 23.41 158.80 1956.0 0.1238 \n", 242 | "2 ... 25.53 152.50 1709.0 0.1444 \n", 243 | "3 ... 26.50 98.87 567.7 0.2098 \n", 244 | "4 ... 16.67 152.20 1575.0 0.1374 \n", 245 | "\n", 246 | " compactness_worst concavity_worst concave points_worst symmetry_worst \\\n", 247 | "0 0.6656 0.7119 0.2654 0.4601 \n", 248 | "1 0.1866 0.2416 0.1860 0.2750 \n", 249 | "2 0.4245 0.4504 0.2430 0.3613 \n", 250 | "3 0.8663 0.6869 0.2575 0.6638 \n", 251 | "4 0.2050 0.4000 0.1625 0.2364 \n", 252 | "\n", 253 | " fractal_dimension_worst Unnamed: 32 \n", 254 | "0 0.11890 NaN \n", 255 | "1 0.08902 NaN \n", 256 | "2 0.08758 NaN \n", 257 | "3 0.17300 NaN \n", 258 | "4 0.07678 NaN \n", 259 | "\n", 260 | "[5 rows x 33 columns]" 261 | ] 262 | }, 263 | "execution_count": 8, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "df.head()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 9, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "\n", 282 | "RangeIndex: 569 entries, 0 to 568\n", 283 | "Data columns (total 33 columns):\n", 284 | "id 569 non-null int64\n", 285 | "diagnosis 569 non-null object\n", 286 | "radius_mean 569 non-null float64\n", 287 | "texture_mean 569 non-null float64\n", 288 | "perimeter_mean 569 non-null float64\n", 289 | "area_mean 569 non-null float64\n", 290 | "smoothness_mean 569 non-null float64\n", 291 | "compactness_mean 569 non-null float64\n", 292 | "concavity_mean 569 non-null float64\n", 293 | "concave points_mean 569 non-null float64\n", 294 | "symmetry_mean 569 non-null float64\n", 295 | "fractal_dimension_mean 569 non-null float64\n", 296 | "radius_se 569 non-null float64\n", 297 | "texture_se 569 non-null float64\n", 298 | "perimeter_se 569 non-null float64\n", 299 | "area_se 569 non-null float64\n", 300 | "smoothness_se 569 non-null float64\n", 301 | "compactness_se 569 non-null float64\n", 302 | "concavity_se 569 non-null float64\n", 303 | "concave points_se 569 non-null float64\n", 304 | "symmetry_se 569 non-null float64\n", 305 | "fractal_dimension_se 569 non-null float64\n", 306 | "radius_worst 569 non-null float64\n", 307 | "texture_worst 569 non-null float64\n", 308 | "perimeter_worst 569 non-null float64\n", 309 | "area_worst 569 non-null float64\n", 310 | "smoothness_worst 569 non-null float64\n", 311 | "compactness_worst 569 non-null float64\n", 312 | "concavity_worst 569 non-null float64\n", 313 | "concave points_worst 569 non-null float64\n", 314 | "symmetry_worst 569 non-null float64\n", 315 | "fractal_dimension_worst 569 non-null float64\n", 316 | "Unnamed: 32 0 non-null float64\n", 317 | "dtypes: float64(31), int64(1), object(1)\n", 318 | "memory usage: 146.8+ KB\n" 319 | ] 320 | } 321 | ], 322 | "source": [ 323 | "df.info()" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "***\n", 331 | "Seeing the data we can drop unnecessary columns. `id` and `unnamed` have no value for us." 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "collapsed": true 339 | }, 340 | "outputs": [], 341 | "source": [] 342 | } 343 | ], 344 | "metadata": { 345 | "kernelspec": { 346 | "display_name": "Python 3", 347 | "language": "python", 348 | "name": "python3" 349 | }, 350 | "language_info": { 351 | "codemirror_mode": { 352 | "name": "ipython", 353 | "version": 3 354 | }, 355 | "file_extension": ".py", 356 | "mimetype": "text/x-python", 357 | "name": "python", 358 | "nbconvert_exporter": "python", 359 | "pygments_lexer": "ipython3", 360 | "version": "3.6.3" 361 | } 362 | }, 363 | "nbformat": 4, 364 | "nbformat_minor": 2 365 | } 366 | -------------------------------------------------------------------------------- /Digit Recognizer Using CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#importing necessary libraries\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "import numpy as np\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import matplotlib.image as mpimg\n", 17 | "import seaborn as sns\n", 18 | "%matplotlib inline\n", 19 | "\n", 20 | "np.random.seed(2)\n", 21 | "\n", 22 | "from sklearn.model_selection import train_test_split\n", 23 | "from sklearn.metrics import confusion_matrix\n", 24 | "import itertools\n", 25 | "\n", 26 | "from keras.utils.np_utils import to_categorical \n", 27 | "from keras.models import Sequential\n", 28 | "from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2d\n", 29 | "from keras.optimizers import RMSprop\n", 30 | "from keras.preprocessing.image import ImageDataGenerator\n", 31 | "from keras.callbacks import ReduceLROnPLateau\n", 32 | "\n", 33 | "sns.set(style='white',context='notebook',palette='deep')" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "#let's load our data\n", 45 | "train = pd.read_csv()\n", 46 | "test = pd.read_csv()\n" 47 | ] 48 | } 49 | ], 50 | "metadata": { 51 | "kernelspec": { 52 | "display_name": "Python 3", 53 | "language": "python", 54 | "name": "python3" 55 | }, 56 | "language_info": { 57 | "codemirror_mode": { 58 | "name": "ipython", 59 | "version": 3 60 | }, 61 | "file_extension": ".py", 62 | "mimetype": "text/x-python", 63 | "name": "python", 64 | "nbconvert_exporter": "python", 65 | "pygments_lexer": "ipython3", 66 | "version": "3.6.3" 67 | } 68 | }, 69 | "nbformat": 4, 70 | "nbformat_minor": 2 71 | } 72 | -------------------------------------------------------------------------------- /Intro to BigQuery .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from google.cloud import bigquery\n", 11 | "from bq_helper import BigQueryHelper" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "QUERY = \"\"\"\n", 21 | " SELECT\n", 22 | " extract(DAYOFYEAR from date_local) as day_of_year,\n", 23 | " aqi\n", 24 | " FROM\n", 25 | " `bigquery-public-data.epa_historical_air_quality.pm25_frm_daily_summary`\n", 26 | " WHERE\n", 27 | " city_name = \"Los Angeles\"\n", 28 | " AND state_name = \"California\"\n", 29 | " AND sample_duration = \"24 HOUR\"\n", 30 | " AND poc = 1\n", 31 | " AND EXTRACT(YEAR FROM date_local) = 2015\n", 32 | " ORDER BY day_of_year\n", 33 | " \"\"\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": { 40 | "scrolled": true 41 | }, 42 | "outputs": [ 43 | { 44 | "ename": "DefaultCredentialsError", 45 | "evalue": "Could not automatically determine credentials. Please set GOOGLE_APPLICATION_CREDENTIALS or\nexplicitly create credential and re-run the application. For more\ninformation, please see\nhttps://developers.google.com/accounts/docs/application-default-credentials.", 46 | "output_type": "error", 47 | "traceback": [ 48 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 49 | "\u001b[0;31mDefaultCredentialsError\u001b[0m Traceback (most recent call last)", 50 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbq_assistant\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBigQueryHelper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"bigquery-public-data\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"epa_historical_air_quality\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 51 | "\u001b[0;32m~/src/bq-helper/bq_helper.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, active_project, dataset_name)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproject_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mactive_project\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset_name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbigquery\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mClient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dataset_ref\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproject_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 52 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/bigquery/client.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, project, credentials, _http)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcredentials\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_http\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 125\u001b[0m super(Client, self).__init__(\n\u001b[0;32m--> 126\u001b[0;31m project=project, credentials=credentials, _http=_http)\n\u001b[0m\u001b[1;32m 127\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_connection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mConnection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 53 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/client.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, project, credentials, _http)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcredentials\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_http\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 211\u001b[0;31m \u001b[0m_ClientProjectMixin\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 212\u001b[0m \u001b[0mClient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcredentials\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcredentials\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_http\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_http\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 54 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/client.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, project)\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_determine_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 166\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m raise EnvironmentError('Project was not passed and could not be '\n", 55 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/client.py\u001b[0m in \u001b[0;36m_determine_default\u001b[0;34m(project)\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_determine_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[0;34m\"\"\"Helper: use default project detection.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 178\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_determine_default_project\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 179\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 56 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/_helpers.py\u001b[0m in \u001b[0;36m_determine_default_project\u001b[0;34m(project)\u001b[0m\n\u001b[1;32m 177\u001b[0m \"\"\"\n\u001b[1;32m 178\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 179\u001b[0;31m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauth\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 180\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 57 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/auth/_default.py\u001b[0m in \u001b[0;36mdefault\u001b[0;34m(scopes, request)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcredentials\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meffective_project_id\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDefaultCredentialsError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_HELP_MESSAGE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 58 | "\u001b[0;31mDefaultCredentialsError\u001b[0m: Could not automatically determine credentials. Please set GOOGLE_APPLICATION_CREDENTIALS or\nexplicitly create credential and re-run the application. For more\ninformation, please see\nhttps://developers.google.com/accounts/docs/application-default-credentials." 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "bq_assistant = BigQueryHelper(\"bigquery-public-data\", \"epa_historical_air_quality\")" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "df = bq_assistant.query_to_pandas(QUERY)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "df.plot(x='day_of_year', y='aqi', style='.');" 82 | ] 83 | } 84 | ], 85 | "metadata": { 86 | "kernelspec": { 87 | "display_name": "Python 3", 88 | "language": "python", 89 | "name": "python3" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 3 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython3", 101 | "version": "3.6.4" 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 2 106 | } 107 | -------------------------------------------------------------------------------- /K_Nearest_Neighbors/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /K_Nearest_Neighbors/classification_template.R: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting classifier to the Training set 23 | # Create your classifier here 24 | 25 | # Predicting the Test set results 26 | y_pred = predict(classifier, newdata = test_set[-3]) 27 | 28 | # Making the Confusion Matrix 29 | cm = table(test_set[, 3], y_pred) 30 | 31 | # Visualising the Training set results 32 | library(ElemStatLearn) 33 | set = training_set 34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 36 | grid_set = expand.grid(X1, X2) 37 | colnames(grid_set) = c('Age', 'EstimatedSalary') 38 | y_grid = predict(classifier, newdata = grid_set) 39 | plot(set[, -3], 40 | main = 'Classifier (Training set)', 41 | xlab = 'Age', ylab = 'Estimated Salary', 42 | xlim = range(X1), ylim = range(X2)) 43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 46 | 47 | # Visualising the Test set results 48 | library(ElemStatLearn) 49 | set = test_set 50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 52 | grid_set = expand.grid(X1, X2) 53 | colnames(grid_set) = c('Age', 'EstimatedSalary') 54 | y_grid = predict(classifier, newdata = grid_set) 55 | plot(set[, -3], main = 'Classifier (Test set)', 56 | xlab = 'Age', ylab = 'Estimated Salary', 57 | xlim = range(X1), ylim = range(X2)) 58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /K_Nearest_Neighbors/classification_template.py: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting classifier to the Training set 24 | # Create your classifier here 25 | 26 | # Predicting the Test set results 27 | y_pred = classifier.predict(X_test) 28 | 29 | # Making the Confusion Matrix 30 | from sklearn.metrics import confusion_matrix 31 | cm = confusion_matrix(y_test, y_pred) 32 | 33 | # Visualising the Training set results 34 | from matplotlib.colors import ListedColormap 35 | X_set, y_set = X_train, y_train 36 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 37 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 38 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 39 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 40 | plt.xlim(X1.min(), X1.max()) 41 | plt.ylim(X2.min(), X2.max()) 42 | for i, j in enumerate(np.unique(y_set)): 43 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 44 | c = ListedColormap(('red', 'green'))(i), label = j) 45 | plt.title('Classifier (Training set)') 46 | plt.xlabel('Age') 47 | plt.ylabel('Estimated Salary') 48 | plt.legend() 49 | plt.show() 50 | 51 | # Visualising the Test set results 52 | from matplotlib.colors import ListedColormap 53 | X_set, y_set = X_test, y_test 54 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 55 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 56 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 57 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 58 | plt.xlim(X1.min(), X1.max()) 59 | plt.ylim(X2.min(), X2.max()) 60 | for i, j in enumerate(np.unique(y_set)): 61 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 62 | c = ListedColormap(('red', 'green'))(i), label = j) 63 | plt.title('Classifier (Test set)') 64 | plt.xlabel('Age') 65 | plt.ylabel('Estimated Salary') 66 | plt.legend() 67 | plt.show() -------------------------------------------------------------------------------- /K_Nearest_Neighbors/knn.R: -------------------------------------------------------------------------------- 1 | # K-Nearest Neighbors (K-NN) 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting K-NN to the Training set and Predicting the Test set results 23 | library(class) 24 | y_pred = knn(train = training_set[, -3], 25 | test = test_set[, -3], 26 | cl = training_set[, 3], 27 | k = 5, 28 | prob = TRUE) 29 | 30 | # Making the Confusion Matrix 31 | cm = table(test_set[, 3], y_pred) 32 | 33 | # Visualising the Training set results 34 | library(ElemStatLearn) 35 | set = training_set 36 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 37 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 38 | grid_set = expand.grid(X1, X2) 39 | colnames(grid_set) = c('Age', 'EstimatedSalary') 40 | y_grid = knn(train = training_set[, -3], test = grid_set, cl = training_set[, 3], k = 5) 41 | plot(set[, -3], 42 | main = 'K-NN (Training set)', 43 | xlab = 'Age', ylab = 'Estimated Salary', 44 | xlim = range(X1), ylim = range(X2)) 45 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 46 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 47 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 48 | 49 | # Visualising the Test set results 50 | library(ElemStatLearn) 51 | set = test_set 52 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 53 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 54 | grid_set = expand.grid(X1, X2) 55 | colnames(grid_set) = c('Age', 'EstimatedSalary') 56 | y_grid = knn(train = training_set[, -3], test = grid_set, cl = training_set[, 3], k = 5) 57 | plot(set[, -3], 58 | main = 'K-NN (Test set)', 59 | xlab = 'Age', ylab = 'Estimated Salary', 60 | xlim = range(X1), ylim = range(X2)) 61 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 62 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 63 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /K_Nearest_Neighbors/knn.py: -------------------------------------------------------------------------------- 1 | # K-Nearest Neighbors (K-NN) 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting K-NN to the Training set 24 | from sklearn.neighbors import KNeighborsClassifier 25 | classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2) 26 | classifier.fit(X_train, y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('K-NN (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('K-NN (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /Natural Language Processing Personal Notes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import nltk" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Download necessary packages." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "nltk.download()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Part 1 - Tokenizing\n", 31 | "Form of grouping\n", 32 | " Word and sentence tokenizers\n", 33 | " Corpora - body of text, ex: medical journals, presidential speeches, English language\n", 34 | " Lexicon - words and their meainings\n", 35 | " ex: investor speak vs regular speak" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from nltk.tokenize import sent_tokenize, word_tokenize" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "example_text = \"Hello there, how are you doing today? The weather is great and Python is awesome. The sky is pink\"" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "Seperate sentences using sent tokenize" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 5, 66 | "metadata": { 67 | "scrolled": false 68 | }, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "['Hello there, how are you doing today?', 'The weather is great and Python is awesome.', 'The sky is pink']\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "print(sent_tokenize(example_text))" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Seperate words using word tokenize" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 6, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "['Hello', 'there', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', 'and', 'Python', 'is', 'awesome', '.', 'The', 'sky', 'is', 'pink']\n" 99 | ] 100 | } 101 | ], 102 | "source": [ 103 | "print(word_tokenize(example_text))" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Print out a list" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "Hello\n", 123 | "there\n", 124 | ",\n", 125 | "how\n", 126 | "are\n", 127 | "you\n", 128 | "doing\n", 129 | "today\n", 130 | "?\n", 131 | "The\n", 132 | "weather\n", 133 | "is\n", 134 | "great\n", 135 | "and\n", 136 | "Python\n", 137 | "is\n", 138 | "awesome\n", 139 | ".\n", 140 | "The\n", 141 | "sky\n", 142 | "is\n", 143 | "pink\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "for i in word_tokenize(example_text):\n", 149 | " print(i)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "### Part 2 - Stop Words\n", 157 | "What is stop words? Words that you want to filter out of an analysis. Filler words basically, rendering them useless." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 8, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "from nltk.corpus import stopwords\n", 167 | "from nltk.tokenize import word_tokenize" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 9, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "example_sentence = \"This is an example showing off stop word filtration.\"\n", 177 | "#set of stop words already predfined by nltk\n", 178 | "stop_words = set(stopwords.words(\"english\")) " 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 10, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "{'a',\n", 190 | " 'about',\n", 191 | " 'above',\n", 192 | " 'after',\n", 193 | " 'again',\n", 194 | " 'against',\n", 195 | " 'ain',\n", 196 | " 'all',\n", 197 | " 'am',\n", 198 | " 'an',\n", 199 | " 'and',\n", 200 | " 'any',\n", 201 | " 'are',\n", 202 | " 'aren',\n", 203 | " \"aren't\",\n", 204 | " 'as',\n", 205 | " 'at',\n", 206 | " 'be',\n", 207 | " 'because',\n", 208 | " 'been',\n", 209 | " 'before',\n", 210 | " 'being',\n", 211 | " 'below',\n", 212 | " 'between',\n", 213 | " 'both',\n", 214 | " 'but',\n", 215 | " 'by',\n", 216 | " 'can',\n", 217 | " 'couldn',\n", 218 | " \"couldn't\",\n", 219 | " 'd',\n", 220 | " 'did',\n", 221 | " 'didn',\n", 222 | " \"didn't\",\n", 223 | " 'do',\n", 224 | " 'does',\n", 225 | " 'doesn',\n", 226 | " \"doesn't\",\n", 227 | " 'doing',\n", 228 | " 'don',\n", 229 | " \"don't\",\n", 230 | " 'down',\n", 231 | " 'during',\n", 232 | " 'each',\n", 233 | " 'few',\n", 234 | " 'for',\n", 235 | " 'from',\n", 236 | " 'further',\n", 237 | " 'had',\n", 238 | " 'hadn',\n", 239 | " \"hadn't\",\n", 240 | " 'has',\n", 241 | " 'hasn',\n", 242 | " \"hasn't\",\n", 243 | " 'have',\n", 244 | " 'haven',\n", 245 | " \"haven't\",\n", 246 | " 'having',\n", 247 | " 'he',\n", 248 | " 'her',\n", 249 | " 'here',\n", 250 | " 'hers',\n", 251 | " 'herself',\n", 252 | " 'him',\n", 253 | " 'himself',\n", 254 | " 'his',\n", 255 | " 'how',\n", 256 | " 'i',\n", 257 | " 'if',\n", 258 | " 'in',\n", 259 | " 'into',\n", 260 | " 'is',\n", 261 | " 'isn',\n", 262 | " \"isn't\",\n", 263 | " 'it',\n", 264 | " \"it's\",\n", 265 | " 'its',\n", 266 | " 'itself',\n", 267 | " 'just',\n", 268 | " 'll',\n", 269 | " 'm',\n", 270 | " 'ma',\n", 271 | " 'me',\n", 272 | " 'mightn',\n", 273 | " \"mightn't\",\n", 274 | " 'more',\n", 275 | " 'most',\n", 276 | " 'mustn',\n", 277 | " \"mustn't\",\n", 278 | " 'my',\n", 279 | " 'myself',\n", 280 | " 'needn',\n", 281 | " \"needn't\",\n", 282 | " 'no',\n", 283 | " 'nor',\n", 284 | " 'not',\n", 285 | " 'now',\n", 286 | " 'o',\n", 287 | " 'of',\n", 288 | " 'off',\n", 289 | " 'on',\n", 290 | " 'once',\n", 291 | " 'only',\n", 292 | " 'or',\n", 293 | " 'other',\n", 294 | " 'our',\n", 295 | " 'ours',\n", 296 | " 'ourselves',\n", 297 | " 'out',\n", 298 | " 'over',\n", 299 | " 'own',\n", 300 | " 're',\n", 301 | " 's',\n", 302 | " 'same',\n", 303 | " 'shan',\n", 304 | " \"shan't\",\n", 305 | " 'she',\n", 306 | " \"she's\",\n", 307 | " 'should',\n", 308 | " \"should've\",\n", 309 | " 'shouldn',\n", 310 | " \"shouldn't\",\n", 311 | " 'so',\n", 312 | " 'some',\n", 313 | " 'such',\n", 314 | " 't',\n", 315 | " 'than',\n", 316 | " 'that',\n", 317 | " \"that'll\",\n", 318 | " 'the',\n", 319 | " 'their',\n", 320 | " 'theirs',\n", 321 | " 'them',\n", 322 | " 'themselves',\n", 323 | " 'then',\n", 324 | " 'there',\n", 325 | " 'these',\n", 326 | " 'they',\n", 327 | " 'this',\n", 328 | " 'those',\n", 329 | " 'through',\n", 330 | " 'to',\n", 331 | " 'too',\n", 332 | " 'under',\n", 333 | " 'until',\n", 334 | " 'up',\n", 335 | " 've',\n", 336 | " 'very',\n", 337 | " 'was',\n", 338 | " 'wasn',\n", 339 | " \"wasn't\",\n", 340 | " 'we',\n", 341 | " 'were',\n", 342 | " 'weren',\n", 343 | " \"weren't\",\n", 344 | " 'what',\n", 345 | " 'when',\n", 346 | " 'where',\n", 347 | " 'which',\n", 348 | " 'while',\n", 349 | " 'who',\n", 350 | " 'whom',\n", 351 | " 'why',\n", 352 | " 'will',\n", 353 | " 'with',\n", 354 | " 'won',\n", 355 | " \"won't\",\n", 356 | " 'wouldn',\n", 357 | " \"wouldn't\",\n", 358 | " 'y',\n", 359 | " 'you',\n", 360 | " \"you'd\",\n", 361 | " \"you'll\",\n", 362 | " \"you're\",\n", 363 | " \"you've\",\n", 364 | " 'your',\n", 365 | " 'yours',\n", 366 | " 'yourself',\n", 367 | " 'yourselves'}" 368 | ] 369 | }, 370 | "execution_count": 10, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "stop_words" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 11, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "name": "stdout", 386 | "output_type": "stream", 387 | "text": [ 388 | "['This', 'example', 'showing', 'stop', 'word', 'filtration', '.']\n" 389 | ] 390 | } 391 | ], 392 | "source": [ 393 | "words = word_tokenize(example_sentence)\n", 394 | "filtered_sentence = []\n", 395 | "for w in words:\n", 396 | " if w not in stop_words:\n", 397 | " filtered_sentence.append(w)\n", 398 | "print(filtered_sentence) \n" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "We can see from the previous sentence that some words have been removed." 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 13, 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "['This', 'example', 'showing', 'stop', 'word', 'filtration', '.']" 417 | ] 418 | }, 419 | "execution_count": 13, 420 | "metadata": {}, 421 | "output_type": "execute_result" 422 | } 423 | ], 424 | "source": [ 425 | "#short hand version of previous code\n", 426 | "filtered_sentence = [w for w in words if not w in stop_words]\n", 427 | "print(filtered_sentence)" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [] 436 | } 437 | ], 438 | "metadata": { 439 | "kernelspec": { 440 | "display_name": "Python 3", 441 | "language": "python", 442 | "name": "python3" 443 | } 444 | }, 445 | "nbformat": 4, 446 | "nbformat_minor": 2 447 | } 448 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Science and Machine Learning Portfolio 2 | Repository containing portfolio of data science projects completed for academic, self learning, and professional purposes. Presented in the form of Jupyter Notebooks. 3 | 4 | Tools 5 | - **Python**: NumPy, Pandas, Seaborn, Matplotlib 6 | - **Machine Learning**: scikit-learn, TensorFlow, keras 7 | 8 | ## Contents 9 | - ## Machine Learning 10 | - [Convolution Neural Network - Digit Recognizer](https://github.com/melvfnz/data_science_portfolio/blob/master/digit_recognizer.ipynb): Convolutional Neural Network that learns to recognize sequences of digits using data generated by concatenating images from MNIST (Recognizes a digit based on an image). 11 | - [K-Nearest Neighbors - Social Network Ads Dataset](https://github.com/melvfernandez/data_science_portfolio/blob/master/K_Nearest_Neighbors/K-Nearest%20Neighbors%20On%20Social%20Network%20Ads.ipynb): Using K-NN on customers that bought a SUV from a social network ad. 12 | - [Monte Carlo Model - Cryptocurrency](https://github.com/melvfernandez/data_science_portfolio/blob/master/Cryptocurrency%20Market%20Analysis.ipynb): Using a probabilistic model on cryptocurrency to find an approximate solution to a numerical problem that would be difficult to solve by other methods. 13 | - [Machine Learning Regression - Financial Market](https://github.com/melvfnz/data_science_portfolio/blob/master/Regression%20-%20Machine%20Learning.ipynb): Importing from quandl (financial and economical data) to create a simple regression. 14 | 15 | - ## Data Analysis and Visualization (Python Programming) 16 | - [Cryptocurrency Market Analysis](https://github.com/melvfernandez/data_science_portfolio/blob/master/Cryptocurrency%20Market%20Analysis.ipynb): Based off my stock market analysis of tech stocks. Change in price over time, daily returns, and behaviour prediction. 17 | - [Stock Market Analysis of Tech Stocks](https://github.com/melvfernandez/data__scientist_portfolio/blob/master/Stock%20Market%20Analysis%20for%20Tech%20Stocks.ipynb): Analysis of technology stocks including change in price over time, daily returns, and stock behaviour prediction. 18 | - [Exploratory Data Analysis - Titanic Passenger Information](https://github.com/melvfnz/data_science_portfolio/blob/master/kaggle_titanic.ipynb): Simple analysis of passengers on board the Titanic answering common questions with visualizations. 19 | - [Exploratory Data Analysis - House Prices](https://github.com/melvfernandez/data_science_portfolio/blob/master/Exploratory%20Data%20Analysis%20of%20House%20Prices.ipynb): Simple analysis of house prices including quick visualizations with correlation plots and heat maps. 20 | - [Simple Linear Regression](https://github.com/melvfernandez/data_science_portfolio/blob/master/Simple%20Linear%20Regression.ipynb): Small playground to summarize and study relationships between two continuous variables from a randomized dataset. 21 | 22 | 23 | 24 | - ## Minor Projects 25 | - [Personal Notes on Natural Language Processing Toolkit](https://github.com/melvfnz/data_science_portfolio/blob/master/Natural%20Language%20Processing%20Personal%20Notes.ipynb) 26 | - [Intro to BigQuery](https://github.com/melvfnz/data_science_portfolio/blob/master/Intro%20to%20BigQuery%20.ipynb) 27 | - [Breast Cancer Analysis](https://github.com/melvfnz/data_science_portfolio/blob/master/Breast%20Cancer%20Analysis.ipynb) 28 | 29 | If you enjoyed what you saw, want to have a chat with me about the portfolio, work opportunities, or collaboration, feel free to contact me on: 30 | - [LinkedIn](https://www.linkedin.com/in/melvfernandez/) 31 | - [Twitter](https://twitter.com/melvfnz) 32 | 33 | 34 | -------------------------------------------------------------------------------- /Regression - Machine Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import quandl" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 4, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "df = quandl.get('WIKI/GOOGL')" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 9, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | "
OpenHighLowCloseVolumeEx-DividendSplit RatioAdj. OpenAdj. HighAdj. LowAdj. CloseAdj. Volume
Date
2004-08-19100.01104.0695.96100.33544659000.00.01.050.15983952.19110948.12856850.32284244659000.0
2004-08-20101.01109.08100.50108.31022834300.00.01.050.66138754.70888150.40559754.32268922834300.0
2004-08-23110.76113.48109.05109.40018256100.00.01.055.55148256.91569354.69383554.86937718256100.0
2004-08-24111.24111.60103.57104.87015247300.00.01.055.79222555.97278351.94535052.59736315247300.0
2004-08-25104.76108.00103.88106.0009188600.00.01.052.54219354.16720952.10083053.1641139188600.0
\n", 155 | "
" 156 | ], 157 | "text/plain": [ 158 | " Open High Low Close Volume Ex-Dividend \\\n", 159 | "Date \n", 160 | "2004-08-19 100.01 104.06 95.96 100.335 44659000.0 0.0 \n", 161 | "2004-08-20 101.01 109.08 100.50 108.310 22834300.0 0.0 \n", 162 | "2004-08-23 110.76 113.48 109.05 109.400 18256100.0 0.0 \n", 163 | "2004-08-24 111.24 111.60 103.57 104.870 15247300.0 0.0 \n", 164 | "2004-08-25 104.76 108.00 103.88 106.000 9188600.0 0.0 \n", 165 | "\n", 166 | " Split Ratio Adj. Open Adj. High Adj. Low Adj. Close \\\n", 167 | "Date \n", 168 | "2004-08-19 1.0 50.159839 52.191109 48.128568 50.322842 \n", 169 | "2004-08-20 1.0 50.661387 54.708881 50.405597 54.322689 \n", 170 | "2004-08-23 1.0 55.551482 56.915693 54.693835 54.869377 \n", 171 | "2004-08-24 1.0 55.792225 55.972783 51.945350 52.597363 \n", 172 | "2004-08-25 1.0 52.542193 54.167209 52.100830 53.164113 \n", 173 | "\n", 174 | " Adj. Volume \n", 175 | "Date \n", 176 | "2004-08-19 44659000.0 \n", 177 | "2004-08-20 22834300.0 \n", 178 | "2004-08-23 18256100.0 \n", 179 | "2004-08-24 15247300.0 \n", 180 | "2004-08-25 9188600.0 " 181 | ] 182 | }, 183 | "execution_count": 9, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "df.head()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 7, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/html": [ 200 | "
\n", 201 | "\n", 214 | "\n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | "
OpenHighLowCloseVolumeEx-DividendSplit RatioAdj. OpenAdj. HighAdj. LowAdj. CloseAdj. Volume
count3399.0000003399.0000003399.0000003399.0000003.399000e+033399.0000003399.03399.0000003399.0000003399.0000003399.0000003.399000e+03
mean592.816524598.217079586.871826592.6062087.860173e+060.1671001.0404.057168407.566178400.164700403.9336567.860173e+06
std223.452561224.167098222.472709223.4419638.263983e+069.7420670.0251.615480253.030208249.968507251.6491488.263983e+06
min99.090000101.74000095.960000100.0100005.211410e+050.0000001.049.69841451.02751748.12856850.1598395.211410e+05
25%460.065000464.510000455.000000459.8350002.460999e+060.0000001.0230.744787232.974169228.204445230.6294312.460999e+06
50%563.760000568.750000558.580000563.7700005.113400e+060.0000001.0299.143427301.114512296.916553298.8826225.113400e+06
75%731.935000736.070000722.475050730.1400001.034390e+070.0000001.0558.970488562.317784554.172844558.2666841.034390e+07
max1226.8000001228.8800001218.6000001220.1700008.215110e+07567.9716681.01188.0000001198.0000001184.0600001187.5600008.215110e+07
\n", 355 | "
" 356 | ], 357 | "text/plain": [ 358 | " Open High Low Close Volume \\\n", 359 | "count 3399.000000 3399.000000 3399.000000 3399.000000 3.399000e+03 \n", 360 | "mean 592.816524 598.217079 586.871826 592.606208 7.860173e+06 \n", 361 | "std 223.452561 224.167098 222.472709 223.441963 8.263983e+06 \n", 362 | "min 99.090000 101.740000 95.960000 100.010000 5.211410e+05 \n", 363 | "25% 460.065000 464.510000 455.000000 459.835000 2.460999e+06 \n", 364 | "50% 563.760000 568.750000 558.580000 563.770000 5.113400e+06 \n", 365 | "75% 731.935000 736.070000 722.475050 730.140000 1.034390e+07 \n", 366 | "max 1226.800000 1228.880000 1218.600000 1220.170000 8.215110e+07 \n", 367 | "\n", 368 | " Ex-Dividend Split Ratio Adj. Open Adj. High Adj. Low \\\n", 369 | "count 3399.000000 3399.0 3399.000000 3399.000000 3399.000000 \n", 370 | "mean 0.167100 1.0 404.057168 407.566178 400.164700 \n", 371 | "std 9.742067 0.0 251.615480 253.030208 249.968507 \n", 372 | "min 0.000000 1.0 49.698414 51.027517 48.128568 \n", 373 | "25% 0.000000 1.0 230.744787 232.974169 228.204445 \n", 374 | "50% 0.000000 1.0 299.143427 301.114512 296.916553 \n", 375 | "75% 0.000000 1.0 558.970488 562.317784 554.172844 \n", 376 | "max 567.971668 1.0 1188.000000 1198.000000 1184.060000 \n", 377 | "\n", 378 | " Adj. Close Adj. Volume \n", 379 | "count 3399.000000 3.399000e+03 \n", 380 | "mean 403.933656 7.860173e+06 \n", 381 | "std 251.649148 8.263983e+06 \n", 382 | "min 50.159839 5.211410e+05 \n", 383 | "25% 230.629431 2.460999e+06 \n", 384 | "50% 298.882622 5.113400e+06 \n", 385 | "75% 558.266684 1.034390e+07 \n", 386 | "max 1187.560000 8.215110e+07 " 387 | ] 388 | }, 389 | "execution_count": 7, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "df.describe()" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "Let us recreate our dataframe with columns that we want" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 10, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "df = df[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume',]]" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 11, 417 | "metadata": {}, 418 | "outputs": [ 419 | { 420 | "data": { 421 | "text/html": [ 422 | "
\n", 423 | "\n", 436 | "\n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | "
Adj. OpenAdj. HighAdj. LowAdj. CloseAdj. Volume
Date
2004-08-1950.15983952.19110948.12856850.32284244659000.0
2004-08-2050.66138754.70888150.40559754.32268922834300.0
2004-08-2355.55148256.91569354.69383554.86937718256100.0
2004-08-2455.79222555.97278351.94535052.59736315247300.0
2004-08-2552.54219354.16720952.10083053.1641139188600.0
\n", 498 | "
" 499 | ], 500 | "text/plain": [ 501 | " Adj. Open Adj. High Adj. Low Adj. Close Adj. Volume\n", 502 | "Date \n", 503 | "2004-08-19 50.159839 52.191109 48.128568 50.322842 44659000.0\n", 504 | "2004-08-20 50.661387 54.708881 50.405597 54.322689 22834300.0\n", 505 | "2004-08-23 55.551482 56.915693 54.693835 54.869377 18256100.0\n", 506 | "2004-08-24 55.792225 55.972783 51.945350 52.597363 15247300.0\n", 507 | "2004-08-25 52.542193 54.167209 52.100830 53.164113 9188600.0" 508 | ] 509 | }, 510 | "execution_count": 11, 511 | "metadata": {}, 512 | "output_type": "execute_result" 513 | } 514 | ], 515 | "source": [ 516 | "df.head()" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 15, 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [ 525 | "df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) / df['Adj. Close']*100.0" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 16, 531 | "metadata": {}, 532 | "outputs": [ 533 | { 534 | "data": { 535 | "text/html": [ 536 | "
\n", 537 | "\n", 550 | "\n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | "
Adj. OpenAdj. HighAdj. LowAdj. CloseAdj. VolumeHL_PCT
Date
2004-08-1950.15983952.19110948.12856850.32284244659000.03.712563
2004-08-2050.66138754.70888150.40559754.32268922834300.00.710922
2004-08-2355.55148256.91569354.69383554.86937718256100.03.729433
2004-08-2455.79222555.97278351.94535052.59736315247300.06.417469
2004-08-2552.54219354.16720952.10083053.1641139188600.01.886792
\n", 619 | "
" 620 | ], 621 | "text/plain": [ 622 | " Adj. Open Adj. High Adj. Low Adj. Close Adj. Volume HL_PCT\n", 623 | "Date \n", 624 | "2004-08-19 50.159839 52.191109 48.128568 50.322842 44659000.0 3.712563\n", 625 | "2004-08-20 50.661387 54.708881 50.405597 54.322689 22834300.0 0.710922\n", 626 | "2004-08-23 55.551482 56.915693 54.693835 54.869377 18256100.0 3.729433\n", 627 | "2004-08-24 55.792225 55.972783 51.945350 52.597363 15247300.0 6.417469\n", 628 | "2004-08-25 52.542193 54.167209 52.100830 53.164113 9188600.0 1.886792" 629 | ] 630 | }, 631 | "execution_count": 16, 632 | "metadata": {}, 633 | "output_type": "execute_result" 634 | } 635 | ], 636 | "source": [ 637 | "df.head()" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 17, 643 | "metadata": {}, 644 | "outputs": [], 645 | "source": [ 646 | "df['PCT_CHANGE'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 19, 652 | "metadata": {}, 653 | "outputs": [ 654 | { 655 | "data": { 656 | "text/html": [ 657 | "
\n", 658 | "\n", 671 | "\n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | "
Adj. CloseHL_PCTPCT_CHANGEAdj. Volume
Date
2004-08-1950.3228423.7125630.32496844659000.0
2004-08-2054.3226890.7109227.22700722834300.0
2004-08-2354.8693773.729433-1.22788018256100.0
2004-08-2452.5973636.417469-5.72635715247300.0
2004-08-2553.1641131.8867921.1836589188600.0
\n", 726 | "
" 727 | ], 728 | "text/plain": [ 729 | " Adj. Close HL_PCT PCT_CHANGE Adj. Volume\n", 730 | "Date \n", 731 | "2004-08-19 50.322842 3.712563 0.324968 44659000.0\n", 732 | "2004-08-20 54.322689 0.710922 7.227007 22834300.0\n", 733 | "2004-08-23 54.869377 3.729433 -1.227880 18256100.0\n", 734 | "2004-08-24 52.597363 6.417469 -5.726357 15247300.0\n", 735 | "2004-08-25 53.164113 1.886792 1.183658 9188600.0" 736 | ] 737 | }, 738 | "execution_count": 19, 739 | "metadata": {}, 740 | "output_type": "execute_result" 741 | } 742 | ], 743 | "source": [ 744 | "df = df[['Adj. Close','HL_PCT','PCT_CHANGE','Adj. Volume']]\n", 745 | "df.head()" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": null, 751 | "metadata": {}, 752 | "outputs": [], 753 | "source": [ 754 | "forescast_col = 'Adj. Close'\n", 755 | "df.fillna('-99999', inplace=True)\n", 756 | "\n", 757 | "forecast_out = int(math.ceil(0.1*len(df)))" 758 | ] 759 | } 760 | ], 761 | "metadata": { 762 | "kernelspec": { 763 | "display_name": "Python 3", 764 | "language": "python", 765 | "name": "python3" 766 | }, 767 | "language_info": { 768 | "codemirror_mode": { 769 | "name": "ipython", 770 | "version": 3 771 | }, 772 | "file_extension": ".py", 773 | "mimetype": "text/x-python", 774 | "name": "python", 775 | "nbconvert_exporter": "python", 776 | "pygments_lexer": "ipython3", 777 | "version": "3.6.4" 778 | } 779 | }, 780 | "nbformat": 4, 781 | "nbformat_minor": 2 782 | } 783 | -------------------------------------------------------------------------------- /Simple Linear Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This is simply a playground to perform commands from pandas, numpy, scipy, and sklearn on a randomized dataset.\n", 8 | "\n", 9 | "## Process for a simple linear regression\n", 10 | " 1. Load the data\n", 11 | " 2. Clean the data\n", 12 | " 3. Observe the statistics\n", 13 | " 4. Train the model with the training data\n", 14 | " 5. Test the train data with the test data" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "#first off import required libraries\n", 26 | "import pandas as pd\n", 27 | "import numpy as np\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import math\n", 30 | "from scipy import stats as st\n", 31 | "from sklearn import linear_model\n", 32 | "%matplotlib inline" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "#let's load our data\n", 44 | "training_set = pd.read_csv('data/linear_regression_train.csv')\n", 45 | "test_set = pd.read_csv('data/linear_regression_test.csv')" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "#let's now do a simple clean by dropping any null values\n", 57 | "training_set = training_set.dropna()\n", 58 | "test_set = test_set.dropna()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/html": [ 69 | "
\n", 70 | "\n", 83 | "\n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | "
xy
count699.000000699.000000
mean50.01430649.939869
std28.95456029.109217
min0.000000-3.839981
25%25.00000024.929968
50%49.00000048.973020
75%75.00000074.929911
max100.000000108.871618
\n", 134 | "
" 135 | ], 136 | "text/plain": [ 137 | " x y\n", 138 | "count 699.000000 699.000000\n", 139 | "mean 50.014306 49.939869\n", 140 | "std 28.954560 29.109217\n", 141 | "min 0.000000 -3.839981\n", 142 | "25% 25.000000 24.929968\n", 143 | "50% 49.000000 48.973020\n", 144 | "75% 75.000000 74.929911\n", 145 | "max 100.000000 108.871618" 146 | ] 147 | }, 148 | "execution_count": 4, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "#we can view quick statistics using .describe()\n", 155 | "training_set.describe()" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 5, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/html": [ 166 | "
\n", 167 | "\n", 180 | "\n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | "
xy
count300.000000300.000000
mean50.93666751.205051
std28.50428629.071481
min0.000000-3.467884
25%27.00000025.676502
50%53.00000052.170557
75%73.00000074.303007
max100.000000105.591837
\n", 231 | "
" 232 | ], 233 | "text/plain": [ 234 | " x y\n", 235 | "count 300.000000 300.000000\n", 236 | "mean 50.936667 51.205051\n", 237 | "std 28.504286 29.071481\n", 238 | "min 0.000000 -3.467884\n", 239 | "25% 27.000000 25.676502\n", 240 | "50% 53.000000 52.170557\n", 241 | "75% 73.000000 74.303007\n", 242 | "max 100.000000 105.591837" 243 | ] 244 | }, 245 | "execution_count": 5, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "test_set.describe()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "We can observe that the median and mean are relatively close, saying that the data isn't skewed by any outliers." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 9, 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "#let's seperate our data into x and y \n", 270 | "x_training_set = training_set.as_matrix(['x'])\n", 271 | "y_training_set = training_set.as_matrix(['y'])\n", 272 | "\n", 273 | "x_test_set = test_set.as_matrix(['x'])\n", 274 | "y_test_set = test_set.as_matrix(['y'])" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 15, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "ename": "AttributeError", 284 | "evalue": "Unknown property figsize", 285 | "output_type": "error", 286 | "traceback": [ 287 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 288 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 289 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m#we can now plot our data to view the relationship\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtitle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Relationship between X and Y'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_training_set\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_training_set\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 290 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/pyplot.py\u001b[0m in \u001b[0;36mscatter\u001b[0;34m(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, hold, data, **kwargs)\u001b[0m\n\u001b[1;32m 3355\u001b[0m \u001b[0mvmin\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvmin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvmax\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvmax\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3356\u001b[0m \u001b[0mlinewidths\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlinewidths\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3357\u001b[0;31m edgecolors=edgecolors, data=data, **kwargs)\n\u001b[0m\u001b[1;32m 3358\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3359\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwashold\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 291 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py\u001b[0m in \u001b[0;36minner\u001b[0;34m(ax, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1708\u001b[0m warnings.warn(msg % (label_namer, func.__name__),\n\u001b[1;32m 1709\u001b[0m RuntimeWarning, stacklevel=2)\n\u001b[0;32m-> 1710\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1711\u001b[0m \u001b[0mpre_doc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1712\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpre_doc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 292 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py\u001b[0m in \u001b[0;36mscatter\u001b[0;34m(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, **kwargs)\u001b[0m\n\u001b[1;32m 4097\u001b[0m )\n\u001b[1;32m 4098\u001b[0m \u001b[0mcollection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIdentityTransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4099\u001b[0;31m \u001b[0mcollection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4101\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcolors\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 293 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/artist.py\u001b[0m in \u001b[0;36mupdate\u001b[0;34m(self, props)\u001b[0m\n\u001b[1;32m 845\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 846\u001b[0m ret = [_update_property(self, k, v)\n\u001b[0;32m--> 847\u001b[0;31m for k, v in props.items()]\n\u001b[0m\u001b[1;32m 848\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 849\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meventson\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstore\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 294 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/artist.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 845\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 846\u001b[0m ret = [_update_property(self, k, v)\n\u001b[0;32m--> 847\u001b[0;31m for k, v in props.items()]\n\u001b[0m\u001b[1;32m 848\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 849\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meventson\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstore\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 295 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/artist.py\u001b[0m in \u001b[0;36m_update_property\u001b[0;34m(self, k, v)\u001b[0m\n\u001b[1;32m 838\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'set_'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 839\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 840\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Unknown property %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 841\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 296 | "\u001b[0;31mAttributeError\u001b[0m: Unknown property figsize" 297 | ] 298 | }, 299 | { 300 | "data": { 301 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEICAYAAABcVE8dAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFH1JREFUeJzt3X+0ZWV93/H3ByaAAQTjjF3CDAyJ\nQ3RKG9EbxJU2QEUz0BUmtSZCaw0uCtEU0UqMWFNlYWpXNdbEBouDscQYwdE0OrVk4RIJRMsggz+I\nA6UdBwJTQAZElJAI6Ld/7D2Zw+XeOXvuz5l53q+1zpr949n7fO9zz/2cfZ599p5UFZKkfd9+i12A\nJGlhGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8BuS5OQk22ax/WVJ/v1c1jTFc1SS502z7l8m+fwM\n93txko/PrjpNJcnK/ve2ZLFr0a4Z+HuZJHcl+Zskjya5P8kVSQ6Zh+c5O8mXRpdV1eur6t1z/VxD\nVdUfV9UrFvp594Y3iyR/kmTdpGWfSfL7i1VTX8MfJ/nopGUnJXkoyXMXq65WGfh7p1+sqkOAFwLH\nA29f5Hq0+P4N8M+TnAKQ5NV0r42LFrUquAA4PcnLAZIcBFwOXFhV9y1qZQ0y8PdiVXU/cA1d8AOQ\n5MAkv5Pk7iTf7odhnjHV9kkuSvKtJN9PcluSf9YvfwFwGfDS/pPEd/vlVyT57ZHtz02yJcl3kmxI\ncsTIukry+iT/N8nDSS5Nkn7d85Jcn+SRJA8m+eSk0k6dZrunfOron+OCJFv7/bwvya5e0wcl+WT/\n8341yc+M7OuI/ih5e5I7k1zQL18D/Dvg1X1ffCPJKUn+cmTbLyT5ysj8l5L80q7226/bb+R38FCS\n9Ul+ol+3Y5jkV/vf5YNJ3jHdD9a/Fi4ELk9yFPBB4Neq6tGp2ic5IcmNSb6b5L4kv5/kgEl9O93v\nb//+NfZgkq3AP91FXQ8BbwTWJTkYeBfwraq6YrptNI+qysde9ADuAk7tp5cDfwn83sj63wU2AD8B\nHAr8D+A/9utOBraNtP1l4Ai6N/5XA38NPLdfdzbwpUnPfQXw2/30PwEeBF4EHAj8F+CGkbYFfA44\nHDgK2A6s6dddCbyjf96DgH80cLun1NS3va7/WY8C/g/wr6fpt4uBJ4BXAT8G/AZwZz+9H3AL8E7g\nAOAnga3AL4xs+/GRfR0E/A2wFFgC3A/c2/f3M/p1zx6w3zcDG/vf44HAh4Er+3Ur+5/v8n6fPwP8\nAHjBmNfHNf3v5Q/HtHsxcGJf/0rgduDNA38Prwf+N7Ci7/vr+vZLdvF8n6Z7XT4EHLXYf0etPha9\nAB+7+QvrAv9R4Pv9H9m1wOH9utCF9k+NtH8pcGc/fTIjgT/Fvr8OrO2nnxKu/bIr2Bn4fwC8d2Td\nIX2gruzni6cG+Xrgon76Y8A6YPkUNexqu6fU1LddMzL/68C10/xsFwMbR+b3A+4D/jHwEuDuSe3f\nDvy3kW0/Pmn9XwCv7EPz832da4BTgFv7NuP2ezvwspF1z+37cEcI12gfAV8Bzhzz+vitfruX7+br\n6s3Anw78PXwReP3IulcwPvD/Xv+6fdNi/w21/PCs+t7pl6rqC0lOAj5Bd6T5XWAZ8OPALf2nb+je\nBPafaidJXgu8hS5coAvtpQNrOAL46o6Zqno0yUPAkXRvStAd+e7wWL9/gN8E3g18JcnDwPuravTE\n3nTbTeWekem/6usa27aqfpTuG0tH0IXVETuGrnr704X6dK6nfwPtpx8GTqI7Cr++b3P0mP0eDfxp\nkh+NrP8hXTjuMLgvkqyi++TyIeD9SV5cVU9M0/ZY4D8DE3SvmSV0n0ZGTffcR/D0ft+lqvp2kgeB\nzePaav44hr8Xq6rr6Y66f6df9CDdcMLfr6rD+8dh1Z3gfYokR9MNF5wPPLuqDge+SfcGAV0I7sq9\ndIG1Y38H0w1j/L8Bdd9fVedW1RHArwEfyjRfxRxgxcj0UX1dY9v2Y/3L+/b30H0KOnzkcWhVnb6j\n5Cn2tSPwf76fvp4u8E9iZ+CP2+89wGmT1h9UVWP7cLJ+fP0jdEN6b6T7pPe2XWzyX+mGZVZV1TPp\nzlNkF+1H3cfT+117AQN/7/e7wMuTvLCqfkQX4h9I8hyAJEcm+YUptjuYLsi29+1eBxw3sv7bwPLR\nE3mTfAJ4XZIXJjkQeA9wU1XdNa7gJL+cZHk/+3Bfxw/HbTeNtyZ5VpIVwJuAySeAR704ySvTfV/8\nzXRH4xvphkq+l+RtSZ7Rn5Q8LsnP9tt9G1g56YTw/wJ+GjgB+EpVbaZ7A3wJcEPfZtx+LwP+Q//m\nS5JlSdbOsB/eQPfp7D396+Ac4DeTPH+a9ocC3wMe7du8YTeeaz1wQZLlSZ7F4n8TSAMZ+Hu5qtpO\nNya+44KotwFbgI1Jvgd8gS6YJm93G/B+4Ea6QPsHwJdHmnyR7uP3/f1H8cnbX9s/55/QHfH9FHDm\nwLJ/FrgpyaN0J/LeVFV3Dtx2ss/SDUV8HfifdOcWdtX21XRvMv8KeGVVPVFVPwR+ke7bTnfSfVL6\nCHBYv92n+n8fSvJVgKr6a7ohrc1V9Xi//kbgr6rqgb7NuP3+Xv/zfz7J9+nefF6yux3Qv9m9Bzhn\nRy0jv9/Ld3y7ZpLfAP4F3bmgy9n1G+Vkl9OdHP4GXR/8992tWYsjVf4HKNo7JSm6IYkti12LtDfw\nCF+SGjE28JN8NMkDSb45zfok+WC6C3BuTfKiuS9TkjRbQ47wr6D7fvF0TgNW9Y/z6M7+S/OuquJw\njjTc2MCvqhuA7+yiyVrgY9XZCBweb4okSXucubjw6kieehHGtn7Z026MlOQ8uk8BHHzwwS9+/vOn\n+8aYJGkqt9xyy4NVtWwm285F4E/1la8pv/pTVevoLqlnYmKiNm3aNAdPL0ntSDL2yubpzMW3dLbx\n1Kvudly9KEnag8xF4G8AXtt/W+dE4JHyPteStMcZO6ST5Eq6e4Ys7W829S66W8pSVZcBVwOn013d\n+RjwuvkqVpI0c2MDv6rOGrO+6P63HUnSHswrbSWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJ\naoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RG\nGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSB\nL0mNMPAlqREGviQ1wsCXpEYMCvwka5LckWRLkoumWH9UkuuSfC3JrUlOn/tSJUmzMTbwk+wPXAqc\nBqwGzkqyelKz3wLWV9XxwJnAh+a6UEnS7Aw5wj8B2FJVW6vqceAqYO2kNgU8s58+DLh37kqUJM2F\nIYF/JHDPyPy2ftmoi4HXJNkGXA28caodJTkvyaYkm7Zv3z6DciVJMzUk8DPFspo0fxZwRVUtB04H\n/ijJ0/ZdVeuqaqKqJpYtW7b71UqSZmxI4G8DVozML+fpQzbnAOsBqupG4CBg6VwUKEmaG0MC/2Zg\nVZJjkhxAd1J2w6Q2dwMvA0jyArrAd8xGkvYgYwO/qp4EzgeuAW6n+zbO5iSXJDmjb3YhcG6SbwBX\nAmdX1eRhH0nSIloypFFVXU13MnZ02TtHpm8Dfm5uS5MkzSWvtJWkRhj4ktQIA1+SGmHgS1IjDHxJ\naoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RG\nGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSB\nL0mNMPAlqREGviQ1wsCXpEYY+JLUiEGBn2RNkjuSbEly0TRtfiXJbUk2J/nE3JYpSZqtJeMaJNkf\nuBR4ObANuDnJhqq6baTNKuDtwM9V1cNJnjNfBUuSZmbIEf4JwJaq2lpVjwNXAWsntTkXuLSqHgao\nqgfmtkxJ0mwNCfwjgXtG5rf1y0YdCxyb5MtJNiZZM9WOkpyXZFOSTdu3b59ZxZKkGRkS+JliWU2a\nXwKsAk4GzgI+kuTwp21Uta6qJqpqYtmyZbtbqyRpFoYE/jZgxcj8cuDeKdp8tqqeqKo7gTvo3gAk\nSXuIIYF/M7AqyTFJDgDOBDZMavMZ4BSAJEvphni2zmWhkqTZGRv4VfUkcD5wDXA7sL6qNie5JMkZ\nfbNrgIeS3AZcB7y1qh6ar6IlSbsvVZOH4xfGxMREbdq0aVGeW5L2VkluqaqJmWzrlbaS1AgDX5Ia\nYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREG\nviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBL\nUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjBgV+kjVJ7kiyJclFu2j3qiSV\nZGLuSpQkzYWxgZ9kf+BS4DRgNXBWktVTtDsUuAC4aa6LlCTN3pAj/BOALVW1taoeB64C1k7R7t3A\ne4G/ncP6JElzZEjgHwncMzK/rV/2d5IcD6yoqs/takdJzkuyKcmm7du373axkqSZGxL4mWJZ/d3K\nZD/gA8CF43ZUVeuqaqKqJpYtWza8SknSrA0J/G3AipH55cC9I/OHAscBf57kLuBEYIMnbiVpzzIk\n8G8GViU5JskBwJnAhh0rq+qRqlpaVSuraiWwETijqjbNS8WSpBkZG/hV9SRwPnANcDuwvqo2J7kk\nyRnzXaAkaW4sGdKoqq4Grp607J3TtD159mVJkuaaV9pKUiMMfElqhIEvSY0w8CWpEQa+JDXCwJek\nRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqE\ngS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4\nktQIA1+SGmHgS1IjDHxJasSgwE+yJskdSbYkuWiK9W9JcluSW5Ncm+TouS9VkjQbYwM/yf7ApcBp\nwGrgrCSrJzX7GjBRVf8Q+DTw3rkuVJI0O0OO8E8AtlTV1qp6HLgKWDvaoKquq6rH+tmNwPK5LVOS\nNFtDAv9I4J6R+W39sumcA/zZVCuSnJdkU5JN27dvH16lJGnWhgR+plhWUzZMXgNMAO+ban1Vrauq\niaqaWLZs2fAqJUmztmRAm23AipH55cC9kxslORV4B3BSVf1gbsqTJM2VIUf4NwOrkhyT5ADgTGDD\naIMkxwMfBs6oqgfmvkxJ0myNDfyqehI4H7gGuB1YX1Wbk1yS5Iy+2fuAQ4BPJfl6kg3T7E6StEiG\nDOlQVVcDV09a9s6R6VPnuC5J0hzzSltJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtS\nIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXC\nwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8\nSWqEgS9JjTDwJakRgwI/yZokdyTZkuSiKdYfmOST/fqbkqyc60IlSbMzNvCT7A9cCpwGrAbOSrJ6\nUrNzgIer6nnAB4D/NNeFSpJmZ8gR/gnAlqraWlWPA1cBaye1WQv8YT/9aeBlSTJ3ZUqSZmvJgDZH\nAveMzG8DXjJdm6p6MskjwLOBB0cbJTkPOK+f/UGSb86k6H3QUib1VcPsi53si53si51+eqYbDgn8\nqY7UawZtqKp1wDqAJJuqamLA8+/z7Iud7Iud7Iud7Iudkmya6bZDhnS2AStG5pcD907XJskS4DDg\nOzMtSpI094YE/s3AqiTHJDkAOBPYMKnNBuBX++lXAV+sqqcd4UuSFs/YIZ1+TP584Bpgf+CjVbU5\nySXApqraAPwB8EdJttAd2Z854LnXzaLufY19sZN9sZN9sZN9sdOM+yIeiEtSG7zSVpIaYeBLUiPm\nPfC9LcNOA/riLUluS3JrkmuTHL0YdS6EcX0x0u5VSSrJPvuVvCF9keRX+tfG5iSfWOgaF8qAv5Gj\nklyX5Gv938npi1HnfEvy0SQPTHetUjof7Pvp1iQvGrTjqpq3B91J3m8BPwkcAHwDWD2pza8Dl/XT\nZwKfnM+aFusxsC9OAX68n35Dy33RtzsUuAHYCEwsdt2L+LpYBXwNeFY//5zFrnsR+2Id8IZ+ejVw\n12LXPU998fPAi4BvTrP+dODP6K6BOhG4ach+5/sI39sy7DS2L6rquqp6rJ/dSHfNw75oyOsC4N3A\ne4G/XcjiFtiQvjgXuLSqHgaoqgcWuMaFMqQvCnhmP30YT78maJ9QVTew62uZ1gIfq85G4PAkzx23\n3/kO/Kluy3DkdG2q6klgx20Z9jVD+mLUOXTv4PuisX2R5HhgRVV9biELWwRDXhfHAscm+XKSjUnW\nLFh1C2tIX1wMvCbJNuBq4I0LU9oeZ3fzBBh2a4XZmLPbMuwDBv+cSV4DTAAnzWtFi2eXfZFkP7q7\nrp69UAUtoiGviyV0wzon033q+4skx1XVd+e5toU2pC/OAq6oqvcneSnd9T/HVdWP5r+8PcqMcnO+\nj/C9LcNOQ/qCJKcC7wDOqKofLFBtC21cXxwKHAf8eZK76MYoN+yjJ26H/o18tqqeqKo7gTvo3gD2\nNUP64hxgPUBV3QgcRHdjtdYMypPJ5jvwvS3DTmP7oh/G+DBd2O+r47Qwpi+q6pGqWlpVK6tqJd35\njDOqasY3jdqDDfkb+QzdCX2SLKUb4tm6oFUujCF9cTfwMoAkL6AL/O0LWuWeYQPw2v7bOicCj1TV\nfeM2mtchnZq/2zLsdQb2xfuAQ4BP9eet766qMxat6HkysC+aMLAvrgFekeQ24IfAW6vqocWren4M\n7IsLgcuT/Fu6IYyz98UDxCRX0g3hLe3PV7wL+DGAqrqM7vzF6cAW4DHgdYP2uw/2lSRpCl5pK0mN\nMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSI/4/rev6i5m3RygAAAAASUVORK5CYII=\n", 302 | "text/plain": [ 303 | "" 304 | ] 305 | }, 306 | "metadata": {}, 307 | "output_type": "display_data" 308 | } 309 | ], 310 | "source": [ 311 | "#we can now plot our data to view the relationship\n", 312 | "plt.title('Relationship between X and Y')\n", 313 | "plt.scatter(x_training_set, y_training_set)\n", 314 | "plt.show()" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 11, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "data": { 324 | "text/plain": [ 325 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" 326 | ] 327 | }, 328 | "execution_count": 11, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "#after some observations and visualizations, time to train our data\n", 335 | "#let's create the linear regression object\n", 336 | "lm = linear_model.LinearRegression()\n", 337 | "#then fit it\n", 338 | "lm.fit(x_training_set,y_training_set)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 14, 344 | "metadata": {}, 345 | "outputs": [ 346 | { 347 | "data": { 348 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3X2cXGV99/HPL5tBJoDZIEHJQggK\nBkFuEliBmuoNwduICqQoIkJFS4u1tRWx0WCpgMWS3vEW8OVTURQoCuHJiIIN1oAPqaAJCSJCFBES\nNgGCZAOShWw2v/uPc83m7Ow5Z87szuzMznzfr1de2Tlz5pxr5sxcv3M9m7sjIiJSbkKjEyAiIs1J\nAUJERBIpQIiISCIFCBERSaQAISIiiRQgREQkkQJEA5jZGWZ2Z6PTUWJmRTP7npltMbObGpiOq83s\nkjE+55/M7NVjec5Gq/ZzbsR1GSkzczM7MPz9VTP7lzE45wfM7Gd1OO5FZnZdrY9bjXEdIMzsfWa2\nMvzIN5rZD8zszxudrkrc/Vvu/tZGpyPm3cArgVe4+6nxJ8zs/5jZU2a2V2zby8zsITP70FgntNbc\nfXd3f7Ta15nZjJAZTRxtGpopA65XZhc7/t1m9mL4zT5jZrea2T71OJe7/627/2vONP11PdIw3o3b\nAGFm5wGXA/9GlLlNB74MnNzIdFVSiwylDvYHfuvu28ufcPcfAt8HrohtvgDYCFw5NsmTFvMRd98d\neC3QCVyWtJOZdYxpqmQ4dx93/4DJwJ+AUzP2eRlRANkQ/l0OvCw8dyzwBPAJ4GmizG4+8Hbgt8Cz\nwKdix7oIuBlYAjwP3AccHnt+IfD78NxvgL+IPfcBYAXRj+BZ4JKw7WfheQvPPQ1sAX4FvD72Pq8F\nNgGPE2XME2LH/RnwOWAz8AfghIzP43XA3UAv8CBwUth+MbAN6A+f6dkpn/cTwDuA14fzvSblPA8B\n74w9ngg8AxwRHt8EPBne60+AQ2P7Xg1cEn9/Zcd24MDY9f0csA54CvgqUAzP7UUU1HrDZ/7T0ueW\nkN74Ma8GvgTcHq7lvRnvc1147Z/Cvz8L2/8qfAabgWXA/lnXGTgnfPbbwnG+l3K+K4D1wHPAKuBN\nZd/PG8N35flwfbtjz88m+s4+T/QdvqH0OSd8R14EBkJaevN8LsDBwA/DZ70WeE/G9/Bu4K9jj/8e\n+HXsPF8B7gBeAN6SdZ3DaxYQ/X43hM++/HpeEtv3ZGBN+Ax/D7wN+Gx4vy+G9/zFSu8JeAVwWzjO\nL4B/pey7Gtv3v4gCYnzb/cApOa/rdfE8q+w4jwFvCX9PYGc+9MfwfdgzPLcrcF3Y3gv8Enhlrry2\n2sy5Gf6FC7sdmJixz2eAe4C9ganA/wD/GvuwtwOfBgrA3xBlwt8G9gAODV+YV8cuVD9RVUwB+Cei\nDLkQnj8VmBYu0mnhy71PeO4D4Vz/QJRZFhkaIOaFL0YnUSbyuthrrwW+G9I0gyh4nR07bn9Iewfw\nYaIfiSV8FgXgEeBTwC7AXKIf+szyL2LG53li+CL/Ajg3Y79PA9+KPX4H8HDs8V+F91MK4Gtiz11N\n/gBxOdGPdM9wvO8Bl4bnLiXKSArh35uSPpeEY15NlCEcFa7Vt4AbUl43I7x2Ymzb/PA5vy68/gLg\nf3Jc58H3nfG5nkmUMU0EPk4UZHeNXb8XiW5wOsL7vyc8twvRzcXHwmfx7vC9STxfyuee+rkAu4Xv\nxQfDc0cQ3RAcmnL8uwkBgiiQLwf+M3aeLcAcot/SrhWu89uIgsbrQzq+nXA9S9+no8Kx/084dhdw\ncHma8rwnogB7Y9jv9UBP+WcWO9b7gRWxx4cQZdKlm9VK1zVvgDiXKL/bl+i39R/A9eG5D4XPbVL4\nfhwJvDxXXjvSTLqR/4AzgCcr7PN74O2xx/OAx2Ifdh/QER7vEb5YR8f2XwXMj12oe2LPTSC6a3lT\nyrnXACfHfnDr0n6ERJn1b4FjiN3lhgv5EnBIbNuHgLtjx3gk9tyk8B5elZCeN4UvXvz41wMXlX8R\nK3ymNwErSbkbD/scSBR8JoXH3wI+nbJvZ0jz5PD4anIECKIM9gWG3sX+GfCH8PdniALrgTneU3mG\n8vXYc28nFtzKXjeD4QHiB8RKYOF7spWoCi/xOpe/7yp+A5sJpdhw/f479twhQF/4+82U3TgQ3SxV\nGyASPxeiG6Kflu3/H8CFKce/O3wmvUQZ67eAqbHzXBvbt9J1/gawKPbcaxOu5yWxNF2WkaZ4gEh9\nT0S/y35CcAnP/Vv5ZxZ7bo/wHvYPjz8LfKOK65o3QDwEHB97bp+QzolEN2X/A/yvar5j7j5u2yD+\nCOxVoT5/GtGdU8njYdvgMdx9IPzdF/5/KvZ8H7B77PH60h/uvoOoymUagJm938zWmFmvmfUS3VXs\nlfTacu6+HPgiURH+KTO70sxeHl5fuvuLv4eu2OMnY8fZGv6Mp7lkGrA+pDvtWHk8SJQx7Ejbwd0f\nIfqynmhmk4CTiO7sMLMOM1tkZr83s+eIvuAw9LPKYypRQFwV+8z/K2wHWEx0J3+nmT1qZgurOPaT\nsb+3kvx5ptkfuCKWpmeJMrmujOuci5l9PHQM2BKOPZmhn1t5uncNv49pQI+HXCOIf6fySvtc9geO\nLr3nkLYzgFdlHOsf3b3T3bvc/Qx33xR7Lv5bqXSdp5Xtn/W+9iO6acwj6z1NJcp0c53X3Z8nqpp7\nb9j0XqKgCOS6rnntD3wnlt6HiKrOXgn8J1F15w1mtsHM/q+ZFfIcdLwGiJ8TFannZ+yzgehDK5ke\nto3UfqU/zGwCUVFug5ntD3wN+AhRL6BO4NdEGUNJ/Mc5jLt/wd2PJKraei1RveozRHcA5e+hZwRp\n3wDsF9I92mPlcT1wOlGd729C0AB4X9j2FqIfwoyw3coPQHTXNan0wMziGc4zRAH80JDRdLr7ZI8a\nPnH359394+7+aqKqsfPM7PiavbtI0jVdD3wolqZOdy+6+/+EdCVd57RjDTKzNwGfBN4DTAnfsS0k\nf27lNgJdZhbfd3qV7yvLeuDHZe95d3f/cJXHSTp/5nUmem/7xfbPel/rgdfkOGdp37T3tImoyjjv\neSH8Hszsz4iqmO+Cqq9r+e+hg52BspTmE8rSvKu797h7v7tf7O6HAG8E3klU9VXRuAwQ7r6FqK77\nS2Y238wmmVnBzE4ws/8bdrseuMDMpoYump8maqgZqSPN7JRwV3YuUfXPPUT1kE70xcHMPkhUgsjF\nzN5gZkeHiP4CoZEwlG5uBD5rZnuEQHTeCN/DveHYnwif07FEGecNIzhWHjcAbyVqF/l2bPseRJ/b\nH4m+7P+WcYz7gUPNbJaZ7UpU3AYGS3BfAy4zs70BzKzLzOaFv99pZgeGTPE5ojupgfITjNImYAcQ\nH0PxVeB8Mzs0pGOymZ0a/k68zuF1T5Udp9weRJnSJmCimX0ayFv6+Hl47T+a2UQzO4WoPj7NU8C+\nZrZLzuN/H3itmf1l+G4Vwnt9Xc7Xp6p0nYl+Hx8ws0NCafXCjMNdBXzQzI43swnhOAeH58o//9T3\nFH6XtwIXhXznEOCsCm/lDqIbvc8AS2Il8Gqu62+JSoXvCN+hC4jaGkq+SpRX7A8Q8r2Tw9/Hmdlh\nIag8R3Tjmev3MC4DBIC7f54ow7yA6ANeT3QXvzTscglRffmvgAeIenGMpq/5d4nqJjcDf0nUC6Hf\n3X8D/D+iH+JTwGFEvZbyejnRj2AzUVH1j0S9NiBq2H4BeJSox9K3iepdq+Lu24iqek4guiv7MvB+\nd3+42mPlPN9Gos/jjUS9ZkquJXqPPUS9ve7JOMZviX5Q/w38juj9x32SqBrpnlBd9d/AzPDcQeHx\nn0I6vuzud4/qTQ1P31ai+uQVoVh/jLt/B/h3oqL8c0QlyRPCS7Ku81XAIeE4SxluGVH7xm/Da18k\no9qyLJ3bgFOI2hY2E32Hb814yXKiqsQnzeyZHMd/nuhm4L1EJdUniT6Dl2W9rgqp19ndf0DUiL08\n7LM8I52/IGp0vozoLv3H7CydXwG828w2m9kXcrynjxBVsT1J1M7xzaw34O4vEX3mb2HoDVPu6xpu\niv8O+DrR7+cFomrukiuIGvPvNLPniX5bR4fnXkXUC/M5oqqnH5PzRtOGVk1KEjO7iKjh68xGp0VE\nZKyM2xKEiIjUlwKEiIgkUhWTiIgkUglCREQSNePEcbnttddePmPGjEYnQ0RkXFm1atUz7j610n7j\nOkDMmDGDlStXNjoZIiLjipnlGk2vKiYREUmkACEiIokUIEREJJEChIiIJFKAEBGRROO6F5OISLtZ\nurqHxcvWsqG3j2mdRRbMm8n82dUu7ZKPAoSIyDixdHUPC266n/4d0QwYPb19LLjpfoC6BAlVMYmI\njBMX3fbgYHAo6d/hXHTbg3U5nwKEiMg40dvXX9X20VKAEBGRRAoQIiLjxG67dFS1fbQUIERExolC\nR3KWnbZ9tBQgRETGiS0pbQ1p20dL3VxFRJpEpTEO0zqL9PT2DXvdtM5iXdKjEoSISBNYurqH8299\ngJ7ePpxojMP5tz7A0tU9g/ssmDeTYmFoe0Ox0MGCeTPrkqa6BQgz+4aZPW1mv45t29PMfmhmvwv/\nTwnbzcy+YGaPmNmvzOyIeqVLRKQZLV62lr7+gSHb+voHWLxs7eDj+bO7uPSUw+jqLGJAV2eRS085\nbFyOpL4a+CJwbWzbQuBH7r7IzBaGx58ETgAOCv+OBr4S/hcRGZeqnRJjQ0LVUWl7/FiTiwXM6pXq\noepWgnD3nwDPlm0+Gbgm/H0NMD+2/VqP3AN0mtk+9UqbiEg95akuKpfWjjC5WBhyrN6+fjZv7c99\n3NEY6zaIV7r7RoDw/95hexewPrbfE2HbMGZ2jpmtNLOVmzZtqmtiRURGIk91Ubm09gUzhh2rmuOO\nRrM0UicVmDxhG+5+pbt3u3v31KkV19wWEam5pat7mLNoOQcsvJ05i5YPu4PPqi5Kk9a+0Lu1chfW\nrOOOxlh3c33KzPZx942hCunpsP0JYL/YfvsCG8Y4bSIiFZWqj0p39aVqHtg5o2pWd9Sk9oTerf1M\n6ywy4xVFntzyIg48ueVFVj7+LJ2TCmyuECRapZvrbcBZ4e+zgO/Gtr8/9GY6BthSqooSEWkmeaqP\nkqqLjCiYnLtkTWp7worfP8uAR5UnA+5cd886nqswCK6e3VzrVoIws+uBY4G9zOwJ4EJgEXCjmZ0N\nrANODbvfAbwdeATYCnywXukSERmNPNVHpZLE4mVrB0sSiXXmOQxkvLBrvC4Y5O6npzx1fMK+Dvx9\nvdIiIlIreUczz5/dxfzZXcz+zJ0Vq4hGasXCuXU5bomm2hARqcKCeTOHrOpW0tPbx6yL7xxsU+ic\nVMC9fms1dBYLdTlunAKEiEi1UgaqxYNBvUoNAIUJxkUnHVq345coQIiIVGHxsrX0ZzUM1Fm92x3i\nFCBERKpQrzEHeXR1Fuve7hDXLAPlRETGhXqNOYi7/LRZYzpraxqVIEREcigNcEvqwVQtI73b65RJ\nhSHdZPNO9lcPChAiIhUsXd3Dgpvvr9j2YMCkXTp4YVv63EmlNgRg2DELHcaFJ0aNz6Vuso2kACEi\nUsHF33swV8O0Q2ZwMIaPXWh0KSGLAoSISEzSOg616rJaPhdTMwaFOAUIEZEgayK+0SoWOjju4KkV\nJ/prJurFJCISpE3EN1qlqbvvenhT1etENJIChIhIUI8xDlMmFVixcC7zZ3eNaJ2IRlIVk4i0nJHW\n86dNxJfVLbUSj70w70R/zUIlCBFpKSNZD7rkuIOTV6kczcQaW2LzM6UtKzrWA+DyUoAQkZYykvWg\nS+56uPbr3MdLB2nLijZjAzWoiklEWsxo6vlr3RaQVDpohgFweakEISItJa0+P089fy3bApq9dJCH\nAoSItJTR1PMnvTaLEU2sF68yuvy0WTy26B2DPZfGM1UxiUhLmT+7i5WPP8v1965nwJ0OM951ZFSt\nU6l3U9Ja0lmmdRbHVZVRtRQgRKSlLF3dwy2rehgI/UsH3LllVdSD6ZZVPRVHMZcy/EoT9DVz76Na\nUYAQkZaS1oupVKIo37542dohASJeyphcLAxbY3pLX3/Tz6FUKwoQItJS0noilQeHpP3L52Lq7eun\nWOjgstNmtXwwSKIAISItJXU0tA0d1VzSOamQuRhQUimjXagXk4i0lLReTMWJydndi/0DgyOv0zTr\nXEn1pgAhIi0lbbRyX/+OxP37+ndUnLG1WedKqjdVMYlI06vFIjtpVU+VtENvpTQKECLSlOLtAvHZ\nVCstspO26M+7juwa0s0Vosx/18KE1BXjutqkt1KahlQxmdnHzOxBM/u1mV1vZrua2QFmdq+Z/c7M\nlpjZLo1Im4g0XnxGVhg+m2rW5Htp3VzvenhTYtXThScemthmcflps1piNPRojHkJwsy6gH8EDnH3\nPjO7EXgv8HbgMne/wcy+CpwNfGWs0ycijZeUyZerdlK+Db19maOex8s60WOpUVVME4GimfUDk4CN\nwFzgfeH5a4CLUIAQaStZ3U3LTessJrZNjGRRnlaeLmM0xjxAuHuPmX0OWAf0AXcCq4Bed98ednsC\n0NUSaSPlbQdZioUOjjt46pCpMHp6+zh3yRpg+Apw7dzQPBqNqGKaApwMHAD0AjcBJyTsmjjs0czO\nAc4BmD59ep1SKSKjVW3PozzVShBl/n39A1x3z7rUfZydQaLdG5pHoxFVTG8B/uDumwDM7FbgjUCn\nmU0MpYh9gQ1JL3b3K4ErAbq7u0ezEqCI1ElaTyJI7nlU2iePvD/6UnBYsXBuzldIuUb0YloHHGNm\nk8zMgOOB3wB3Ae8O+5wFfLcBaRORHJau7mHOouUcsPB25ixaPmy955Es+9lhVvN0tusI6FoZ8wDh\n7vcCNwP3AQ+ENFwJfBI4z8weAV4BXDXWaRORyuJdUJ2dpYN4kBjJsp9pk+mNRruOgK6VhvRicvcL\ngQvLNj8KHNWA5IhIFbJKB6Xqo7SeRA7MWbSc4w6eyl0PbxrSPtE1wpHOadQwPXqai0lEqpKndJC1\ndGdPbx/X3bNuWAnkuIOnUpgwtJqpMME485jpuZYB7SwWmDKpMGQQnBqmR0dTbYhIVfKMMyhf9rOS\nvv4Bbv/VxqjrUZxB9/570r3/noM9oibt0sHWbQM4UbvF6UfvxyXzDxvlu5IkChAiUpUF82ay4Kb7\n6d+xM+MvTLAh1Tnly37mkTQfUv+As3jZ2iHH7py0C5/9C3VbHQsKECJSvYQ7/bi8YxryKFVBVeoy\nW4sZX2UotUGISFUWL1s7OHq5pHSnX1LL7qUdZhW7zObpWSXVU4AQkarkaaSuVffSYqEj11rSIxl3\nIZUpQIhIVdIy//j2rF5MeZV6InXlON9Ixl1IZQoQIlKVtDWf4w3J8WU/R6q0FkOe8+UJWlI9BQgR\nqcr82V2868iuwakxOsx415HDp8ueP7uLFQvnDmvPzqOrrMts0kI/8fPlCSJSPfViEpFc0pYAHXDn\nllU9dO+/JzB84Z3JxQK9fclLeiZJytgrrddQek69mGrLvA7zn4yV7u5uX7lyZaOTIdLy8qzV0Fks\n8NL2HcPWfN7hzkvbdwzbv1Sy6JxUwB229PUrYx8jZrbK3bsr7acShIgMkTSeIM+4hqRSQtZrHHhs\n0TtGm1ypIwUIEUmtPiofpCbtRQFCpM2VVx+VVzrXKzh0Fgt1Oa7UjgKESJu6YOkDuSfTq7XCBOOi\nkw4d8/NKdRQgRNrQBUsfyFzTuZ60RvT4oQAh0oauv3d9w86tNaLHDw2UE2lDjahWgvqsOy31owAh\n0oYalVGffvR+DTmvjIwChEgbqlVGnRVoioUJlFYQ7bBo6VCt/Da+qA1CpA1dMv8w/rDpT6z4/bOj\nOs4xr57Cfeu2DBs9rfWgW4NKECJtaOnqHu5btyXXvlmVUY/9sa/iRHoyfqkEIdIi0pbcjG8vzXuU\nd/K8YqEjc6Dcht6+ihPpyfilACHSAspHQ5emyFj5+LPcsqpncPvmrflnVU1a6rOc1ltobQoQIuNY\nfA6lcn39AyMeKV2p5FDaR+sttDa1QYiMU6VSQ1JwKBlJcKi01Gd8H1UttTaVIESaVFqbQkmeKbgn\nGOyoMkbERzqXz+SqHkrtRQFCpAmltSnAztXTskoOJR1VBoj4uAat0iYVA4SZneLut1baVg0z6wS+\nDryeaHbhvwLWAkuAGcBjwHvcffNIzyHSzOKlg8nFAmbQu7U/c4Gevv4BFi9bO5hBd5hVrELqH76Q\nW6byAXTqodTe8rRBXJCw7Z9Hed4rgP9y94OBw4GHgIXAj9z9IOBH4bFIy4m3HThRl9PNW/txdpYU\n0koHG2LbazmfkkY6S5LUEoSZzQPeBnSZ2edjT70cqPK+ZMhxXw68GfgAgLtvA7aZ2cnAsWG3a4C7\ngU+O9DwizapS20Ff/0Bq6SDerXTKpEJV3VaTFDqMxe8+XKUESZRVxfQ08GvgReDB2PbnGd3d/auB\nTcA3zexwYBXwUeCV7r4RwN03mtneSS82s3OAcwCmT58+imSINMaGHG0HA+7DupqWdyt9sRYrvTVm\nUlcZJ1KrmNx9tbtfBcwE/hP4sbtf5e43uvszozjnROAI4CvuPht4gSoCjrtf6e7d7t49derUUSRD\npDHyDC7rMEudwmLp6h7mLFpOX7UNDAn6dziLl60d9XGkNeVpgzgeeAD4IYCZzTKz74zinE8AT7j7\nveHxzUQB4ykz2yecYx+iEoxIy1kwbybFQkfmPmntC0tX97Dg5vtz9WDKK0+JRtpTnm6unwGOBu4C\ncPc1ZnbgSE/o7k+a2Xozm+nua4kC0G/Cv7OAReH/7470HCLNImssQ9oIaIDOYiGxm6vh9A/Utl5I\n02VImjwBot/de23ovO+j/Yb+A/AtM9sFeBT4IFFp5kYzOxtYB5w6ynOINFSlsQyl6qLywWhG8mR6\nlQbFlZx5TNQ2l2eaDU2XIVnyBIiHzOw9wAQzO4CoQfme0ZzU3dcA3QlPHT+a44o0kzxjGZJKE6O9\n+7rr4U2sWDh3sMtqpTEX6sEkafIEiI8Anybq2vodYBnwqXomSqQVpNXtl28vlSZmf+bOUXdbzTq+\nSLUqBgh3f4FoPMInzWwPd3++/skSGf+mdRYT2xjS6vxrERyyji9SrayBcv8M3OLuD4e2gu8DR5nZ\nS8Dp7r58rBIpMh4tmDczsX2hp7eP2Z+5E3fY0rezqqcW1KYgtZTVzfV9RPMjAbwf2BXYC5gLXFrn\ndImMe/Nndw2ZNtvY2b6weWs/vX07p9f42JI1oz6fpuCWWssKENvcB7tAvA34trtvd/cHgUL9kyYy\n/s2f3cWKhXPp6ixmNj6PtmH6zGOms2LhXAUHqamsNoiXzOx1RAPW5gKfiD03qa6pEmkSeXoAVVq3\nAeo/GO2uhzfV9fjSnrICxMeB24iqla5w90cBzOztwK/GIG0iDVU+RiE+NqGnt48FN98/bM3npHUb\nACYXC4ljG2pFo6GlHrLmYlrh7ge5+xR3vyi2/Q53f8+YpE6kgSrNuto/4Hz73nWJYx3OXbKG15x/\nBxcsjYLF0HGmtaeeS1IPWlFOJEWeu/Ks1doG3LnunnVAVC1VL+q5JPWSZ7I+kbZUq7vy6+9dX7c7\nfPVcknqqGCDMbFgpI2mbSKs57uDaTCc/4J5rBtdqdXUW1XNJ6ipPRv8Loum4K20TGVcq9T6qVc+g\nDrNcM7hWQ9VKMhayRlLvDewDFM3sMKJxPhAtOapurtLUKmX+lWZahdr1DDr96P0Gjzt/dhcHLLw9\nddxDxwRjIKthg6jkoEn2ZCxkVTG9A/gisC/wpdi/TwH/Uv+kiYxMKfPv6e0bHKl8/q0PsHR1z+A+\nWTOtlkwuJo8HTeuQ1NVZ5MxjptMRuix1mHHmMdMHZ1UtSWuP6Oos8v9OPXxw5HXaPqpWkrGSWoJw\n928SrRv9Hne/cQzTJDIipVJDUhVO+TTbeWZaTeuaOmmXDrZt30F/7E6/MMEG7+rLA0K5pDmaSlVG\nWetEqFpJxlqeNoi9zezl7v6cmX2VqO3hfHf/UZ3TJpJbUoZaLp7555lpNW121Re2JZwjJZhUWlEu\nrQoszz4i9ZYnQJzj7l80s7cSVTd9GLgSOLKuKROpQqVBbTA088+aaXXOouUsmDeTCZY9ziGuf8CH\nlFAg34pyWbSOgzRannEQpZ/ICcA33X1VzteJjJlKDcrxzH/p6p7MmVZLGXne4JCWhjztHCLNLE9G\nf7+Z3QGcCPzAzHZn9JNPitRU1kC0pMy/FCTSZlrNu/5zVhryrign0qzyBIgPAhcBR7n7VqJ1Ic6u\nZ6JEqpU0EK1Y6GDKpEJi5h+/i69Fhp3UgJwWtDRvkowXFQOEuw8AryZqewAo5nmdyFiKVxkZO6eg\nSJsDaUNvH0tX9zBn0fIRF4dL7dJp012kBS31RJLxwnauCZSyg9kXiRYIerO7v87M9gSWufsbxiKB\nWbq7u33lypWNToY0sTmLlif2VuosFnhp+44RVSVB/sFqedaKEBlrZrbK3bsr7ZenF9Mb3f0IM1sN\n4O7PhjWqRZpe2pgDs+rbGUYyglk9kWQ8yxMg+s1sAqGdz8xeAeyoa6pEaqR8PEFpVbi0MQ5pSiOY\nRdpJ1lxME919O9H0GrcAU83sYuA9wMVjlD4RYHRVNfHRyefduCaz+6oZTJxg9A/ERkl3mNoNpC1l\nlSB+ARzh7tea2SrgLUTtcqe6+6/HJHUi5JtYL+k15QHlU7f+quLYBneGd+JWp25pU1m9kQYnD3D3\nB939Cne/XMFBxlq1A87SJuvb2l+5ZrTDbMgcSwD9O1yD26QtZZUgpprZeWlPuvvnR3NiM+sAVgI9\n7v5OMzsAuAHYE7gP+Et33zaac0hrSBun0NPbx2vOv4PTj95vyAR5aQGlkmKhI3U/DW6TdpRVgugA\ndgf2SPk3Wh8FHoo9/nfgMnc/CNiMBuNJkDbtNuxc9/mCpQ8MbhtJZl4ay5A21bYGt0k7yipBbHT3\nz9TjpGa2L9F6E58FzjMzA+YC7wu7XEM0evsr9Ti/jC9p027HXX/v+sFSRNpMrWnK12zQNNsikVxt\nEHVwOfAJdnaXfQXQG3pNATzJrCfBAAATgUlEQVQBJLY+mtk5ZrbSzFZu2lSbJSGluaWNho4bcB+c\niG/BvJkUOvJ9fScYdO+/5+DjtBHZGssg7SirBHF8PU5oZu8Ennb3VWZ2bGlzwq6JfUfc/Uqi6cbp\n7u5W/5Jxqppuq5OLBXr7KgeJnt4+PrZkTVWdjnY4w6bp1uA2kUjWinLP1umcc4CTzOztRBP/vZyo\nRNEZG3uxL7ChTueXBqu222qeKqaSkdwxlOZl0pQYIkON+aR77n6+u+/r7jOA9wLL3f0M4C7g3WG3\ns4DvjnXaZGxU2201TxXTaEwuFiquYS3SjpppVtZPEjVYP0LUJnFVg9MjdZLVbbXUjhBXzx5EafMy\naWEfkQYHCHe/293fGf5+1N2PcvcD3f1Ud3+pkWmT+snqtpp09540bfZolE/TnTUluEg7yzNZn0hN\nVWpTiN+9xyfZ27UwoepJ9iCa2tssqqpKal9YvGxtYrdYjX2QdqcAIWMuT5tCT28f5y5Zs/M1ff25\nu67G5ZmFNW1KcI19kHbXTG0Q0iZGemfeP+BV9WjKm8lr7INIMpUgZMwtmDeTBTffP2RK7byyFkDc\nbZcOtm4bwIkm3XvXkfnHM2jsg8hwChBSE1WPI6jxEMcpkwq82L9j8LAD7tyyqofu/fdUxi8yQqpi\nklFLm147bRzB4mVrh02pPVqbt/arq6pIjSlAyKjlHfi2dHUPcxYtz5xIL6uJoTDBmDIpvYtsEnVV\nFRk5BQgZtbRMOL596eoeFtx8f2Zw6OosctlpswYbi6dMKkRdVMNzi089nAtPPHTYmIhioYPOlLEV\n6qoqMnJqg5BMaW0L8e0YiW0K8QFxF3/vwcxGaSOqmlq8bG2ueZDK0wSapluk1hQgJFXapHorH3+W\nW1b17MyMU/L9eJfUrAFu8fiSZ73prB5HmnBPpHYUICRVWtvC9feuZyCrv2mQd5K98iOV2i+qzdzV\nVVWkttQGIanS2hbyBAcYWsWU1kZQ7blFZOwoQEiqtAbejpzDmeO7XXTSoRQm5B8GrcZlkcZTgJBU\nSbOoFgsdnH70frlmV41XMc2f3cXiUw+nK0fGr8ZlkeagACGp0uYoumT+YUO2p5ULyqf1nj+7ixUL\n52YGCc2DJNI81EgtmUbT8Ns/sCNxe9rsqQoMIs1FAUKqVt79Nc0L2waYdfGdqWsxqEuqSHNTgGhj\n8cFuk2OL6nROKuAOW/r6EwfHZY2GLtfbt7MdonyMgwKCSHNTgGhT5aWAeEYeH9SWOjhuhEY6xkFE\nxp4CRJtKGgSXpprBcXnkHeNQ9RTiIlJT6sXUpqodiFYpOFQz06oDcxYtT50OHKqfQlxEak8Bok1V\nOxAta3BchxmnHbUfqz/9Vi4/bVauMRJ51ozQ+g4ijaUA0WbiazLkHddcaXBcafW2pat7ho2d6CwW\nUksWWRl+ninERaS+1AbRRsobpp3UmboHdcXq/rv33zO1F1O88Tmph9IBC29PPE9ahj+ts5h4Hk3B\nITJ2VIJoA6VSw7lL1gyrtqnU7Lxi4dzBzL40Ejqt5JF1d5+WsadtT5vmQ1NwiIwdlSDGoTy9e+Jj\nFiqVEtJ0dRYTz9U5qZC4vkNnRiN12ujptAxfg+lEGk8BYpxJW8QHdmaqSVVJWTqLBV7avmNY5n3c\nwVNTzpV8xKyOTiPJ8DWYTqSxxjxAmNl+wLXAq4AdwJXufoWZ7QksAWYAjwHvcffNY52+ZpfVuyee\nCecd41AsdHDRSYcOvi6eeaedK82WvuwFgpThi4wvjShBbAc+7u73mdkewCoz+yHwAeBH7r7IzBYC\nC4FPNiB9TS1P7568PX06zHjXkV1D2hjiPrZkTVVpUwOySGsZ80Zqd9/o7veFv58HHgK6gJOBa8Ju\n1wDzxzpt40Gext6stoC4AXeuu2cdMxbenjhwLe1cUyYV1IAs0gYa2ovJzGYAs4F7gVe6+0aIggiw\nd8przjGzlWa2ctOmTWOV1KaRp3fPSGbE6Ont42NL1gwJFmnnuvDEQxPXiVD1kUhrMa/R/DpVn9hs\nd+DHwGfd/VYz63X3ztjzm919StYxuru7feXKlfVOatNJ68U0ktlW05TWZwD1JBJpNWa2yt27K+7X\niABhZgXg+8Ayd/982LYWONbdN5rZPsDd7p5ZZ9GuASJJ3jUaqtHVWWTFwrk1O56INIe8AaIRvZgM\nuAp4qBQcgtuAs4BF4f/vjnXamkG8dJC2LsMZX/s5K37/7OBr5rxmTx77Y19NgwNoWguRdjfmJQgz\n+3Pgp8ADRN1cAT5F1A5xIzAdWAec6u7PJh4kaLUSRKVSQLHQwb5TduV3T78wJulRCUKkNTVtCcLd\nf0b6OvfHj2Vamk2l8Qt9/QN1Cw7lo63VK0lENBdTExltlU6eabbTXHbaLPVKEpEhNNVGE0mbwTSv\nXQsTeNnECUOWD82jw0yjnEVkGJUgmsiCeTMpdORdpWG4zVv7eWn7DnbbJbkk8bKJyZf79KP3G/E5\nRaR1KUA0m1H2GejrH+CFbcntGNu27+DMY6YPrg7XYcaZx0znkvmHje6kItKSVMXURBYvW0v/jvr1\nKpvWWeSS+YcpIIhILgoQTaCWI6Ah6pG0a6Ej99oLIiJJFCBGIc/CPXmOcd6SNYMDQmrBgUtPOUxT\nZIjIqChAjFCehXvyOP/WX9U0OEDUTVW9kkRktBQgRijPwj1QuZTR11/b8KCqJBGpFQWIEUob1NbT\n28ecRcsHM+lalDLy6lJVkojUkALECGUNaisFgpdNnFCxlDHBoBYdlww0b5KI1JTGQYxQ0mI6cX39\nA6kjmuOlj/cdPT3X+SoNn9NynyJSaypBVFDehnDcwVO56+FNbOjtY3KxwK6FCWzeWt3UFtM6i1V3\nbc0qZKjdQUTqQQEiQ1JPpevuWTf4fG9fP8VCR1XVRMVCB8cdPLVmi/uo3UFE6kUBIkOl6beBqjL5\nUmae57hxZsnrTGu9BhGpJ7VBZKjVyGbYmZnPn91V1bTehQ7jjKOnD2vvULWSiNSbAkSG0qR2lXQW\nC5kN1uWZed4G5a7OIovffTiXzD+MS085TOs1iMiYUhVThoEcy7EWJhgXnXQowGBj9uRiATPo3dqf\nODhuwbyZFdsgyrutamS0iIy1tg4QlUY577ZLR+rU2YNCISOegZeO25vSu6m0X1YvJnVbFZFGa9sq\nplIPpZ7ePpydg9uWru4Z3GdrpeAA9A84i5etreq4EAWJFQvncvlps9S+ICJNqW0DRNZcSiV5BzjH\nG53zHDdu/uwutS+ISFNq2yqmtJ5E8e0dZrnaIeLVQXmOW07tCyLSjNq2BJFWxx/fnmet5rw9lNSm\nICLjTdsGiKS5lMoz+0vmH8ac1+w5ZJ+D9t5tsDqoM0y18bEla5izaDlLV/fkOq6IyHjQtgEiT93/\n0tU93Lduy5DXPbH5RRbMm8llp83ipe072Ly1f0hjNKA2BRFpCeY56tibVXd3t69cubJux5+zaHli\nN9SuUF2U9pymvxCRZmZmq9y9u9J+bdtInUfWokDVvkZEZLxpqiomM3ubma01s0fMbGG9z7d0dQ9z\nFi3ngIW3D7YhxI2kYVmN0SLSKpomQJhZB/Al4ATgEOB0MzukXufLM6Ct0qJA5dQYLSKtpJmqmI4C\nHnH3RwHM7AbgZOA3tTxJ1kI95cuBxqfE2BACSRqtyyAiraaZAkQXsD72+Ang6FqeoHwBoCTlbQjx\nQWxZjdZqmBaRVtM0VUwkL7s87KbdzM4xs5VmtnLTpk1VnSDPQj0Oie0RkG/shIhIq2imAPEEEB+6\nvC+woXwnd7/S3bvdvXvq1KlVnSBvD6OsCfY0xkFE2kUzVTH9EjjIzA4AeoD3Au+r5QkmFwv09iVP\nwV2uvD2iRPMmiUi7aJoA4e7bzewjwDKgA/iGuz9Yy3PkXCBuUC2XHBURGW+aJkAAuPsdwB31Ov7m\nlAV80uRdclREpBU1UxtE3VWb4eeZ6ltEpFW1VYCoNsPv0qhoEWljbRUgqsnw1X1VRNpdWwWIpHEM\npUqnKZMKdBYL6r4qIhI0VSN1vZVPnTFN02OIiKRqqwABGscgIpJXW1UxiYhIfgoQIiKSSAFCREQS\nKUCIiEgiBQgREUlkPo6nkzCzTcDjozjEXsAzNUrOeNBu7xf0ntuF3nN19nf3iusljOsAMVpmttLd\nuxudjrHSbu8X9J7bhd5zfaiKSUREEilAiIhIonYPEFc2OgFjrN3eL+g9twu95zpo6zYIERFJ1+4l\nCBERSaEAISIiidoyQJjZ28xsrZk9YmYLG52eejCz/czsLjN7yMweNLOPhu17mtkPzex34f8pjU5r\nLZlZh5mtNrPvh8cHmNm94f0uMbNdGp3GWjOzTjO72cweDtf7z1r5OpvZx8J3+tdmdr2Z7dpq19nM\nvmFmT5vZr2PbEq+pRb4Q8rNfmdkRtUpH2wUIM+sAvgScABwCnG5mhzQ2VXWxHfi4u78OOAb4+/A+\nFwI/cveDgB+Fx63ko8BDscf/DlwW3u9m4OyGpKq+rgD+y90PBg4nev8teZ3NrAv4R6Db3V8PdADv\npfWu89XA28q2pV3TE4CDwr9zgK/UKhFtFyCAo4BH3P1Rd98G3ACc3OA01Zy7b3T3+8LfzxNlGl1E\n7/WasNs1wPzGpLD2zGxf4B3A18NjA+YCN4ddWur9ApjZy4E3A1cBuPs2d++lha8z0To2RTObCEwC\nNtJi19ndfwI8W7Y57ZqeDFzrkXuATjPbpxbpaMcA0QWsjz1+ImxrWWY2A5gN3Au80t03QhREgL0b\nl7Kauxz4BLAjPH4F0Ovu28PjVrzWrwY2Ad8MVWtfN7PdaNHr7O49wOeAdUSBYQuwita/zpB+TeuW\np7VjgLCEbS3b19fMdgduAc519+canZ56MbN3Ak+7+6r45oRdW+1aTwSOAL7i7rOBF2iR6qQkod79\nZOAAYBqwG1EVS7lWu85Z6vY9b8cA8QSwX+zxvsCGBqWlrsysQBQcvuXut4bNT5WKn+H/pxuVvhqb\nA5xkZo8RVRvOJSpRdIaqCGjNa/0E8IS73xse30wUMFr1Or8F+IO7b3L3fuBW4I20/nWG9Gtatzyt\nHQPEL4GDQq+HXYgauG5rcJpqLtS/XwU85O6fjz11G3BW+Pss4LtjnbZ6cPfz3X1fd59BdE2Xu/sZ\nwF3Au8NuLfN+S9z9SWC9mc0Mm44HfkOLXmeiqqVjzGxS+I6X3m9LX+cg7ZreBrw/9GY6BthSqooa\nrbYcSW1mbye6u+wAvuHun21wkmrOzP4c+CnwADvr5D9F1A5xIzCd6Md2qruXN4aNa2Z2LPBP7v5O\nM3s1UYliT2A1cKa7v9TI9NWamc0iapjfBXgU+CDRzV9LXmczuxg4jain3mrgr4nq3FvmOpvZ9cCx\nRFN6PwVcCCwl4ZqGQPlFol5PW4EPuvvKmqSjHQOEiIhU1o5VTCIikoMChIiIJFKAEBGRRAoQIiKS\nSAFCREQSKUDIuGRmA2a2JszoeZOZTRrFsY6Nzf56UtYMv2Hm1L8bwTkuMrN/Gmkaa30ckTwUIGS8\n6nP3WWFGz23A38afDIOGqv5+u/tt7r4oY5dOoOoAITIeKUBIK/gpcKCZzQjrIXwZuA/Yz8zeamY/\nN7P7QkljdxhcE+RhM/sZcErpQGb2ATP7Yvj7lWb2HTO7P/x7I7AIeE0ovSwO+y0ws1+Gufgvjh3r\nny1ad+S/gZmUMbPJZvZYKZCF0cHrzaxgZn8Tjnm/md2SVEIys7vNrDv8vVeYZqS0JsbiWJo+FLbv\nY2Y/iZW83lSLD19alwKEjGth/p0TiEaMQ5QRXxubuO4C4C3ufgSwEjjPzHYFvgacCLwJeFXK4b8A\n/NjdDyea3+hBoonwfh9KLwvM7K1E8/AfBcwCjjSzN5vZkURTfswmCkBvKD+4u28B7gf+d9h0IrCs\nNMeQu78hnPshqlvf4Gyi6RbeEM77N2Z2APC+cPxZROtGrKnimNKGJlbeRaQpFc2slMH9lGjeqWnA\n42FOfIgWSjoEWBHNRsAuwM+Bg4kmfPsdgJldR7TQSrm5wPsB3H0A2GLDV2Z7a/i3OjzenShg7AF8\nx923hnOkzfe1hGjaiLuIAsqXw/bXm9klRFVauwPLsj6MhDT9LzMrzU00OaTpl8A3wiSOS91dAUIy\nKUDIeNUX7oQHhSDwQnwT8EN3P71sv1nUbjpoAy519/8oO8e5Oc9xG3Cpme0JHAksD9uvBua7+/1m\n9gGieXnKbWdnLcCuZWn6B3cfFlTM7M1Eiyr9p5ktdvdrc6RR2pSqmKSV3QPMMbMDYbCO/7XAw8AB\nZvaasN/pKa//EfDh8NoOi1Zve56odFCyDPirWNtGl5ntDfwE+AszK5rZHkTVR8O4+5+AXxAtG/r9\nUFIhnGNjuNs/IyV9jxEFFdg5k2kpTR8Or8XMXmtmu5nZ/kRrZnyNqMRVs7WLpTWpBCEty903hbvv\n683sZWHzBe7+WzM7B7jdzJ4Bfga8PuEQHwWuNLOzgQHgw+7+czNbYdFi8j8I7RCvA34eSjB/IppJ\n9D4zW0JUz/84UTVYmiXATQwtJfwL0cy7jxO1r+wx/GV8DrjRzP6SnSUPiGZ2nQHcF2b63ES0POWx\nwAIz6w/pfH9GmkQ0m6uIiCRTFZOIiCRSgBARkUQKECIikkgBQkREEilAiIhIIgUIERFJpAAhIiKJ\n/j9qBqvLroC2NwAAAABJRU5ErkJggg==\n", 349 | "text/plain": [ 350 | "" 351 | ] 352 | }, 353 | "metadata": {}, 354 | "output_type": "display_data" 355 | } 356 | ], 357 | "source": [ 358 | "#we can now run the model with the test data\n", 359 | "y_predicted = lm.predict(x_test_set)\n", 360 | "\n", 361 | "plt.title('Comparison of Y values in test and the Predicted values')\n", 362 | "plt.ylabel('Test Set')\n", 363 | "plt.xlabel('Predicted values')\n", 364 | "plt.scatter(y_predicted, y_test_set)\n", 365 | "plt.show()" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "As expected we get a really good fit!" 373 | ] 374 | } 375 | ], 376 | "metadata": { 377 | "kernelspec": { 378 | "display_name": "Python 3", 379 | "language": "python", 380 | "name": "python3" 381 | }, 382 | "language_info": { 383 | "codemirror_mode": { 384 | "name": "ipython", 385 | "version": 3 386 | }, 387 | "file_extension": ".py", 388 | "mimetype": "text/x-python", 389 | "name": "python", 390 | "nbconvert_exporter": "python", 391 | "pygments_lexer": "ipython3", 392 | "version": "3.6.3" 393 | } 394 | }, 395 | "nbformat": 4, 396 | "nbformat_minor": 2 397 | } 398 | -------------------------------------------------------------------------------- /data/linear_regression_test.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 77,79.77515201 3 | 21,23.17727887 4 | 22,25.60926156 5 | 20,17.85738813 6 | 36,41.84986439 7 | 15,9.805234876 8 | 62,58.87465933 9 | 95,97.61793701 10 | 20,18.39512747 11 | 5,8.746747654 12 | 4,2.811415826 13 | 19,17.09537241 14 | 96,95.14907176 15 | 62,61.38800663 16 | 36,40.24701716 17 | 15,14.82248589 18 | 65,66.95806869 19 | 14,16.63507984 20 | 87,90.65513736 21 | 69,77.22982636 22 | 89,92.11906278 23 | 51,46.91387709 24 | 89,89.82634442 25 | 27,21.71380347 26 | 97,97.41206981 27 | 58,57.01631363 28 | 79,78.31056542 29 | 21,19.1315097 30 | 93,93.03483388 31 | 27,26.59112396 32 | 99,97.55155344 33 | 31,31.43524822 34 | 33,35.12724777 35 | 80,78.61042432 36 | 28,33.07112825 37 | 47,51.69967172 38 | 53,53.62235225 39 | 69,69.46306072 40 | 28,27.42497237 41 | 33,36.34644189 42 | 91,95.06140858 43 | 71,68.16724757 44 | 50,50.96155532 45 | 76,78.04237454 46 | 4,5.607664865 47 | 37,36.11334779 48 | 70,67.2352155 49 | 68,65.01324035 50 | 40,38.14753871 51 | 35,34.31141446 52 | 94,95.28503937 53 | 88,87.84749912 54 | 52,54.08170635 55 | 31,31.93063515 56 | 59,59.61247085 57 | 0,-1.040114209 58 | 39,47.49374765 59 | 64,62.60089773 60 | 69,70.9146434 61 | 57,56.14834113 62 | 13,14.05572877 63 | 72,68.11367147 64 | 76,75.59701346 65 | 61,59.225745 66 | 82,85.45504157 67 | 18,17.76197116 68 | 41,38.68888682 69 | 50,50.96343637 70 | 55,51.83503872 71 | 13,17.0761107 72 | 46,46.56141773 73 | 13,10.34754461 74 | 79,77.91032969 75 | 53,50.17008622 76 | 15,13.25690647 77 | 28,31.32274932 78 | 81,73.9308764 79 | 69,74.45114379 80 | 52,52.01932286 81 | 84,83.68820499 82 | 68,70.3698748 83 | 27,23.44479161 84 | 56,49.83051801 85 | 48,49.88226593 86 | 40,41.04525583 87 | 39,33.37834391 88 | 82,81.29750133 89 | 100,105.5918375 90 | 59,56.82457013 91 | 43,48.67252645 92 | 67,67.02150613 93 | 38,38.43076389 94 | 63,58.61466887 95 | 91,89.12377509 96 | 60,60.9105427 97 | 14,13.83959878 98 | 21,16.89085185 99 | 87,84.06676818 100 | 73,70.34969772 101 | 32,33.38474138 102 | 2,-1.63296825 103 | 82,88.54475895 104 | 19,17.44047622 105 | 74,75.69298554 106 | 42,41.97607107 107 | 12,12.59244741 108 | 1,0.275307261 109 | 90,98.13258005 110 | 89,87.45721555 111 | 0,-2.344738542 112 | 41,39.3294153 113 | 16,16.68715211 114 | 94,96.58888601 115 | 97,97.70342201 116 | 66,67.01715955 117 | 24,25.63476257 118 | 17,13.41310757 119 | 90,95.15647284 120 | 13,9.744164258 121 | 0,-3.467883789 122 | 64,62.82816355 123 | 96,97.27405461 124 | 98,95.58017185 125 | 12,7.468501839 126 | 41,45.44599591 127 | 47,46.69013968 128 | 78,74.4993599 129 | 20,21.63500655 130 | 89,91.59548851 131 | 29,26.49487961 132 | 64,67.38654703 133 | 75,74.25362837 134 | 12,12.07991648 135 | 25,21.32273728 136 | 28,29.31770045 137 | 30,26.48713683 138 | 65,68.94699774 139 | 59,59.10598995 140 | 64,64.37521087 141 | 53,60.20758349 142 | 71,70.34329706 143 | 97,97.1082562 144 | 73,75.7584178 145 | 9,10.80462727 146 | 12,12.11219941 147 | 63,63.28312382 148 | 99,98.03017721 149 | 60,63.19354354 150 | 35,34.8534823 151 | 2,-2.819913974 152 | 60,59.8313966 153 | 32,29.38505024 154 | 94,97.00148372 155 | 84,85.18657275 156 | 63,61.74063192 157 | 22,18.84798163 158 | 81,78.79008525 159 | 93,95.12400481 160 | 33,30.48881287 161 | 7,10.41468095 162 | 42,38.98317436 163 | 46,46.11021062 164 | 54,52.45103628 165 | 16,21.16523945 166 | 49,52.28620611 167 | 43,44.18863945 168 | 95,97.13832018 169 | 66,67.22008001 170 | 21,18.98322306 171 | 35,24.3884599 172 | 80,79.44769523 173 | 37,40.03504862 174 | 54,53.32005764 175 | 56,54.55446979 176 | 1,-2.761182595 177 | 32,37.80182795 178 | 58,57.48741435 179 | 32,36.06292994 180 | 46,49.83538167 181 | 72,74.68953276 182 | 17,14.86159401 183 | 97,101.0697879 184 | 93,99.43577876 185 | 91,91.69240746 186 | 37,34.12473248 187 | 4,6.079390073 188 | 54,59.07247174 189 | 51,56.43046022 190 | 27,30.49412933 191 | 46,48.35172635 192 | 92,89.73153611 193 | 73,72.86282528 194 | 77,80.97144285 195 | 91,91.36566374 196 | 61,60.07137496 197 | 99,99.87382707 198 | 4,8.655714172 199 | 72,69.39858505 200 | 19,19.38780134 201 | 57,53.11628433 202 | 78,78.39683006 203 | 26,25.75612514 204 | 74,75.07484683 205 | 90,92.88772282 206 | 66,69.45498498 207 | 13,13.12109842 208 | 40,48.09843134 209 | 77,79.3142548 210 | 67,68.48820749 211 | 75,73.2300846 212 | 23,24.68362712 213 | 45,41.90368917 214 | 59,62.22635684 215 | 44,45.96396877 216 | 23,23.52647153 217 | 55,51.80035866 218 | 55,51.10774273 219 | 95,95.79747345 220 | 12,9.241138977 221 | 4,7.646529763 222 | 7,9.281699753 223 | 100,103.5266162 224 | 48,47.41006725 225 | 42,42.03835773 226 | 96,96.11982476 227 | 39,38.05766408 228 | 100,105.4503788 229 | 87,88.80306911 230 | 14,15.49301141 231 | 14,12.42624606 232 | 37,40.00709598 233 | 5,5.634030902 234 | 88,87.36938931 235 | 91,89.73951993 236 | 65,66.61499643 237 | 74,72.9138853 238 | 56,57.19103506 239 | 16,11.21710477 240 | 5,0.676076749 241 | 28,28.15668543 242 | 92,95.3958003 243 | 46,52.05490703 244 | 54,59.70864577 245 | 39,36.79224762 246 | 44,37.08457698 247 | 31,24.18437976 248 | 68,67.28725332 249 | 86,82.870594 250 | 90,89.899991 251 | 38,36.94173178 252 | 21,19.87562242 253 | 95,90.71481654 254 | 56,61.09367762 255 | 60,60.11134958 256 | 65,64.83296316 257 | 78,81.40381769 258 | 89,92.40217686 259 | 6,2.576625376 260 | 67,63.80768172 261 | 36,38.67780759 262 | 16,16.82839701 263 | 100,99.78687252 264 | 45,44.68913433 265 | 73,71.00377824 266 | 57,51.57326718 267 | 20,19.87846479 268 | 76,79.50341495 269 | 34,34.58876491 270 | 55,55.7383467 271 | 72,68.19721905 272 | 55,55.81628509 273 | 8,9.391416798 274 | 56,56.01448111 275 | 72,77.9969477 276 | 58,55.37049953 277 | 6,11.89457829 278 | 96,94.79081712 279 | 23,25.69041546 280 | 58,53.52042319 281 | 23,18.31396758 282 | 19,21.42637785 283 | 25,30.41303282 284 | 64,67.68142149 285 | 21,17.0854783 286 | 59,60.91792707 287 | 19,14.99514319 288 | 16,16.74923937 289 | 42,41.46923883 290 | 43,42.84526108 291 | 61,59.12912974 292 | 92,91.30863673 293 | 11,8.673336357 294 | 41,39.31485292 295 | 1,5.313686205 296 | 8,5.405220518 297 | 71,68.5458879 298 | 46,47.33487629 299 | 55,54.09063686 300 | 62,63.29717058 301 | 47,52.45946688 -------------------------------------------------------------------------------- /data/linear_regression_train.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 24,21.54945196 3 | 50,47.46446305 4 | 15,17.21865634 5 | 38,36.58639803 6 | 87,87.28898389 7 | 36,32.46387493 8 | 12,10.78089683 9 | 81,80.7633986 10 | 25,24.61215147 11 | 5,6.963319071 12 | 16,11.23757338 13 | 16,13.53290206 14 | 24,24.60323899 15 | 39,39.40049976 16 | 54,48.43753838 17 | 60,61.69900319 18 | 26,26.92832418 19 | 73,70.4052055 20 | 29,29.34092408 21 | 31,25.30895192 22 | 68,69.02934339 23 | 87,84.99484703 24 | 58,57.04310305 25 | 54,50.5921991 26 | 84,83.02772202 27 | 58,57.05752706 28 | 49,47.95883341 29 | 20,24.34226432 30 | 90,94.68488281 31 | 48,48.03970696 32 | 4,7.08132338 33 | 25,21.99239907 34 | 42,42.33151664 35 | 0,0.329089443 36 | 60,61.92303698 37 | 93,91.17716423 38 | 39,39.45358014 39 | 7,5.996069607 40 | 21,22.59015942 41 | 68,61.18044414 42 | 84,85.02778957 43 | 0,-1.28631089 44 | 58,61.94273962 45 | 19,21.96033347 46 | 36,33.66194193 47 | 19,17.60946242 48 | 59,58.5630564 49 | 51,52.82390762 50 | 19,22.1363481 51 | 33,35.07467353 52 | 85,86.18822311 53 | 44,42.63227697 54 | 5,4.09817744 55 | 59,61.2229864 56 | 14,17.70677576 57 | 9,11.85312574 58 | 75,80.23051695 59 | 69,62.64931741 60 | 10,9.616859804 61 | 17,20.02797699 62 | 58,61.7510743 63 | 74,71.61010303 64 | 21,23.77154623 65 | 51,51.90142035 66 | 19,22.66073682 67 | 50,50.02897927 68 | 24,26.68794368 69 | 0,0.376911899 70 | 12,6.806419002 71 | 75,77.33986001 72 | 21,28.90260209 73 | 64,66.7346608 74 | 5,0.707510638 75 | 58,57.07748383 76 | 32,28.41453196 77 | 41,44.46272123 78 | 7,7.459605998 79 | 4,2.316708112 80 | 5,4.928546187 81 | 49,52.50336074 82 | 90,91.19109623 83 | 3,8.489164326 84 | 11,6.963371967 85 | 32,31.97989959 86 | 83,81.4281205 87 | 25,22.62365422 88 | 83,78.52505087 89 | 26,25.80714057 90 | 76,73.51081775 91 | 95,91.775467 92 | 53,49.21863516 93 | 77,80.50445387 94 | 42,50.05636123 95 | 25,25.46292549 96 | 54,55.32164264 97 | 55,59.1244888 98 | 0,1.100686692 99 | 73,71.98020786 100 | 35,30.13666408 101 | 86,83.88427405 102 | 90,89.91004752 103 | 13,8.335654576 104 | 46,47.88388961 105 | 46,45.00397413 106 | 32,31.15664574 107 | 8,9.190375682 108 | 71,74.83135003 109 | 28,30.23177607 110 | 24,24.21914027 111 | 56,57.87219151 112 | 49,50.61728392 113 | 79,78.67470043 114 | 90,86.236707 115 | 89,89.10409255 116 | 41,43.26595082 117 | 27,26.68273277 118 | 58,59.46383041 119 | 26,28.90055826 120 | 31,31.300416 121 | 70,71.1433266 122 | 71,68.4739206 123 | 39,39.98238856 124 | 7,4.075776144 125 | 48,47.85817542 126 | 56,51.20390217 127 | 45,43.9367213 128 | 41,38.13626679 129 | 3,3.574661632 130 | 37,36.4139958 131 | 24,22.21908523 132 | 68,63.5312572 133 | 47,49.86702787 134 | 27,21.53140009 135 | 68,64.05710234 136 | 74,70.77549842 137 | 95,92.15749762 138 | 79,81.22259156 139 | 21,25.10114067 140 | 95,94.08853397 141 | 54,53.25166165 142 | 56,59.16236621 143 | 80,75.24148428 144 | 26,28.22325833 145 | 25,25.33323728 146 | 8,6.364615703 147 | 95,95.4609216 148 | 94,88.64183756 149 | 54,58.70318693 150 | 7,6.815491279 151 | 99,99.40394676 152 | 36,32.77049249 153 | 48,47.0586788 154 | 65,60.53321778 155 | 42,40.30929858 156 | 93,89.42222685 157 | 86,86.82132066 158 | 26,26.11697543 159 | 51,53.26657596 160 | 100,96.62327888 161 | 94,95.78441027 162 | 6,6.047286687 163 | 24,24.47387908 164 | 75,75.96844763 165 | 7,3.829381009 166 | 53,52.51703683 167 | 73,72.80457527 168 | 16,14.10999096 169 | 80,80.86087062 170 | 77,77.01988215 171 | 89,86.26972444 172 | 80,77.13735466 173 | 55,51.47649476 174 | 19,17.34557531 175 | 56,57.72853572 176 | 47,44.15029394 177 | 56,59.24362743 178 | 2,-1.053275611 179 | 82,86.79002254 180 | 57,60.14031858 181 | 44,44.04222058 182 | 26,24.5227488 183 | 52,52.95305521 184 | 41,43.16133498 185 | 44,45.67562576 186 | 3,-2.830749501 187 | 31,29.19693178 188 | 97,96.49812401 189 | 21,22.5453232 190 | 17,20.10741433 191 | 7,4.035430253 192 | 61,61.14568518 193 | 10,13.97163653 194 | 52,55.34529893 195 | 10,12.18441166 196 | 65,64.00077658 197 | 71,70.3188322 198 | 4,-0.936895047 199 | 24,18.91422276 200 | 26,23.87590331 201 | 51,47.5775361 202 | 42,43.2736092 203 | 62,66.48278755 204 | 74,75.72605529 205 | 77,80.59643338 206 | 3,-2.235879852 207 | 50,47.04654956 208 | 24,21.59635575 209 | 37,32.87558963 210 | 58,57.95782956 211 | 52,52.24760027 212 | 27,24.58286902 213 | 14,12.12573805 214 | 100,100.0158026 215 | 3530.15736917 216 | 72,74.04682658 217 | 5,1.611947467 218 | 71,70.36836307 219 | 54,52.26831735 220 | 84,83.1286166 221 | 42,43.64765048 222 | 54,49.44785426 223 | 74,72.6356699 224 | 54,52.78130641 225 | 53,57.11195136 226 | 78,79.1050629 227 | 97,101.6228548 228 | 49,53.5825402 229 | 71,68.92139297 230 | 48,46.9666961 231 | 51,51.02642868 232 | 89,85.52073551 233 | 99,99.51685756 234 | 93,94.63911256 235 | 49,46.78357742 236 | 18,21.21321959 237 | 65,58.37266004 238 | 83,87.22059677 239 | 100,102.4967859 240 | 41,43.88314335 241 | 52,53.06655757 242 | 29,26.33464785 243 | 97,98.52008934 244 | 7,9.400497579 245 | 51,52.94026699 246 | 58,53.83020877 247 | 50,45.94511142 248 | 67,65.0132736 249 | 89,86.5069584 250 | 76,75.63280796 251 | 35,36.78035027 252 | 99,100.5328916 253 | 31,29.04466136 254 | 52,51.70352433 255 | 11,9.199954718 256 | 66,71.70015848 257 | 50,49.82634062 258 | 39,37.49971096 259 | 60,53.65084683 260 | 35,33.92561965 261 | 53,49.92639685 262 | 14,8.148154262 263 | 49,49.72359037 264 | 16,16.16712757 265 | 76,75.30033002 266 | 13,9.577368568 267 | 51,48.38088357 268 | 70,72.95331671 269 | 98,92.59573853 270 | 86,88.85523586 271 | 100,99.00361771 272 | 46,45.09439571 273 | 51,46.94362684 274 | 50,48.33449605 275 | 91,94.92329574 276 | 48,47.78165248 277 | 81,81.28960746 278 | 38,37.83155021 279 | 40,39.69185252 280 | 79,76.92664854 281 | 96,88.02990531 282 | 60,56.99178872 283 | 70,72.58929383 284 | 44,44.98103442 285 | 11,11.99017641 286 | 6,1.919513328 287 | 5,1.628826073 288 | 72,66.27746655 289 | 55,57.53887255 290 | 95,94.70291077 291 | 41,41.21469904 292 | 25,25.04169243 293 | 1,3.778209914 294 | 55,50.50711779 295 | 4,9.682408486 296 | 48,48.88147608 297 | 55,54.40348599 298 | 75,71.70233156 299 | 68,69.35848388 300 | 100,99.98491591 301 | 25,26.03323718 302 | 75,75.48910307 303 | 34,36.59623056 304 | 38,40.95102191 305 | 92,86.78316267 306 | 21,15.50701184 307 | 88,85.86077871 308 | 75,79.20610113 309 | 76,80.80643766 310 | 44,48.59717283 311 | 10,13.93415049 312 | 21,27.3051179 313 | 16,14.00226297 314 | 32,33.67416 315 | 13,13.11612884 316 | 26,24.76649193 317 | 70,73.68477876 318 | 77,77.53149541 319 | 77,76.24503196 320 | 88,88.0578931 321 | 35,35.02445799 322 | 24,21.65857739 323 | 17,17.33681562 324 | 91,94.36778957 325 | 32,33.43396307 326 | 36,32.52179399 327 | 89,90.57741298 328 | 69,71.25634126 329 | 30,31.23212856 330 | 6,5.398840061 331 | 22,18.56241391 332 | 67,71.97121038 333 | 9,5.225759566 334 | 74,73.5964342 335 | 50,49.76948983 336 | 85,82.69087513 337 | 3,1.652309089 338 | 0,-3.836652144 339 | 59,62.03811556 340 | 62,61.26514581 341 | 17,13.24991628 342 | 90,88.61672694 343 | 23,21.13655528 344 | 19,23.85017475 345 | 93,92.01203405 346 | 14,10.26712261 347 | 58,54.14681616 348 | 87,87.00645713 349 | 37,37.69447352 350 | 20,19.62278654 351 | 35,34.78561007 352 | 63,62.03190983 353 | 56,52.67003801 354 | 62,58.09031476 355 | 98,97.19448821 356 | 90,90.50155298 357 | 51,50.5123462 358 | 93,94.45211871 359 | 22,21.10794636 360 | 38,37.36298431 361 | 13,10.28574844 362 | 98,96.04932416 363 | 99,100.0953697 364 | 31,30.6063167 365 | 94,96.19000542 366 | 73,71.30828034 367 | 37,34.59311043 368 | 23,19.02332876 369 | 11,10.76669688 370 | 88,90.5799868 371 | 47,48.71787679 372 | 79,78.74139764 373 | 91,85.23492274 374 | 71,71.65789964 375 | 10,8.938990554 376 | 39,39.89606046 377 | 92,91.85091116 378 | 99,99.11200375 379 | 28,26.22196486 380 | 32,33.21584226 381 | 32,35.72392691 382 | 75,76.88604495 383 | 99,99.30874567 384 | 27,25.77161074 385 | 64,67.85169407 386 | 98,98.50371084 387 | 38,31.11331895 388 | 46,45.51171028 389 | 13,12.65537808 390 | 96,95.56065366 391 | 9,9.526431641 392 | 34,36.10893209 393 | 49,46.43628318 394 | 1,-3.83998112 395 | 50,48.97302037 396 | 94,93.25305499 397 | 27,23.47650968 398 | 20,17.13551132 399 | 12,14.55896144 400 | 45,41.53992729 401 | 91,91.64730552 402 | 61,66.16652565 403 | 10,9.230857489 404 | 47,47.41377893 405 | 33,34.76441561 406 | 84,86.10796637 407 | 24,21.81267954 408 | 48,48.89963951 409 | 48,46.78108638 410 | 9,12.91328547 411 | 93,94.55203143 412 | 99,94.97068753 413 | 8,2.379172481 414 | 20,21.47982988 415 | 38,35.79795462 416 | 78,82.0763803 417 | 81,78.87097714 418 | 42,47.2492425 419 | 95,96.18852325 420 | 78,78.38491927 421 | 44,42.94274064 422 | 68,64.43231595 423 | 87,84.21191485 424 | 58,57.3069783 425 | 52,52.52101436 426 | 26,25.7440243 427 | 75,75.42283401 428 | 48,53.62523007 429 | 71,75.14466308 430 | 77,74.12151511 431 | 34,36.24807243 432 | 24,20.21665898 433 | 70,66.94758118 434 | 29,34.07278254 435 | 76,73.13850045 436 | 98,92.85929155 437 | 28,28.36793808 438 | 87,85.59308727 439 | 9,10.68453755 440 | 87,86.10708624 441 | 33,33.22031418 442 | 64,66.09563422 443 | 17,19.30486546 444 | 49,48.84542083 445 | 95,93.73176312 446 | 75,75.45758614 447 | 89,91.24239226 448 | 81,87.15690853 449 | 25,25.53752833 450 | 47,46.06629478 451 | 50,49.65277661 452 | 5,7.382244165 453 | 68,71.11189935 454 | 84,83.50570521 455 | 8,8.791139893 456 | 41,33.30638903 457 | 26,26.40362524 458 | 89,91.72960726 459 | 78,82.53030719 460 | 34,36.67762733 461 | 92,86.98450355 462 | 27,32.34784175 463 | 12,16.78353974 464 | 2,1.576584383 465 | 22,17.4618141 466 | 0,2.116113029 467 | 26,24.34804332 468 | 50,48.29491198 469 | 84,85.52145453 470 | 70,73.71434779 471 | 66,63.15189497 472 | 42,38.46213684 473 | 19,19.47100788 474 | 94,94.07428225 475 | 71,67.92051286 476 | 19,22.58096241 477 | 16,16.01629889 478 | 49,48.43307886 479 | 29,29.6673599 480 | 29,26.65566328 481 | 86,86.28206739 482 | 50,50.82304924 483 | 86,88.57251713 484 | 30,32.59980745 485 | 23,21.02469368 486 | 20,20.72894979 487 | 16,20.38051187 488 | 57,57.25180153 489 | 8,6.967537054 490 | 8,10.240085 491 | 62,64.94841088 492 | 55,55.35893915 493 | 30,31.24365589 494 | 86,90.72048818 495 | 62,58.750127 496 | 51,55.85003198 497 | 61,60.19925869 498 | 86,85.03295412 499 | 61,60.38823085 500 | 21,18.44679787 501 | 81,82.18839247 502 | 97,94.2963344 503 | 5,7.682024586 504 | 61,61.01858089 505 | 47,53.60562216 506 | 98,94.47728801 507 | 30,27.9645947 508 | 63,62.55662585 509 | 0,1.406254414 510 | 100,101.7003412 511 | 18,13.84973988 512 | 30,28.99769315 513 | 98,99.04315693 514 | 16,15.56135514 515 | 22,24.63528393 516 | 55,53.98393374 517 | 43,42.91449728 518 | 75,74.29662112 519 | 91,91.17012883 520 | 46,49.42440876 521 | 85,82.47683519 522 | 55,56.15303953 523 | 36,37.17063131 524 | 49,46.36928662 525 | 94,97.02383456 526 | 43,40.83182104 527 | 22,24.08498313 528 | 37,41.14386358 529 | 24,21.97388066 530 | 95,100.740897 531 | 61,61.19971596 532 | 75,74.39517002 533 | 68,69.04377173 534 | 58,56.68718792 535 | 5,5.860391715 536 | 53,55.72021356 537 | 80,79.22021816 538 | 83,86.30177517 539 | 25,25.26971886 540 | 34,36.33294447 541 | 26,27.65574228 542 | 90,94.79690531 543 | 60,58.67366671 544 | 49,56.15934471 545 | 19,18.40919388 546 | 92,86.26936988 547 | 29,26.59436195 548 | 8,8.452520159 549 | 57,56.18131518 550 | 29,27.65452669 551 | 19,20.87391785 552 | 81,77.83354439 553 | 50,50.01787825 554 | 15,9.290856256 555 | 70,75.0284725 556 | 39,38.3037698 557 | 43,44.70786405 558 | 21,22.51016575 559 | 98,102.4959452 560 | 86,86.76845244 561 | 16,13.89748578 562 | 25,24.81824269 563 | 31,33.94224862 564 | 93,92.26970059 565 | 67,68.73365081 566 | 49,47.38516883 567 | 25,32.37576914 568 | 88,87.67388681 569 | 54,54.57648371 570 | 21,18.06450222 571 | 8,7.896539841 572 | 32,35.00341078 573 | 35,36.72823317 574 | 67,65.84975426 575 | 90,89.59295492 576 | 59,61.69026202 577 | 15,11.60499315 578 | 67,71.0826803 579 | 42,43.71901164 580 | 44,41.57421008 581 | 77,74.25552425 582 | 68,66.28310437 583 | 36,36.62438077 584 | 11,10.32374866 585 | 10,7.156457657 586 | 65,67.88603132 587 | 98,101.1097591 588 | 98,98.6132033 589 | 49,50.19083844 590 | 31,27.83896261 591 | 56,55.9249564 592 | 70,76.47340872 593 | 91,92.05756378 594 | 25,27.35245439 595 | 54,55.32083476 596 | 39,41.39990349 597 | 91,93.59057024 598 | 3,5.297054029 599 | 22,21.01429422 600 | 2,2.267059451 601 | 2,-0.121860502 602 | 65,66.49546208 603 | 71,73.83637687 604 | 42,42.10140878 605 | 76,77.35135732 606 | 43,41.02251779 607 | 8,14.75305272 608 | 86,83.28199022 609 | 87,89.93374342 610 | 3,2.286571686 611 | 58,55.61421297 612 | 62,62.15313408 613 | 89,89.55803528 614 | 95,94.00291863 615 | 28,26.78023848 616 | 0,-0.764537626 617 | 1,0.282866003 618 | 49,44.26800515 619 | 21,19.85174138 620 | 46,47.15960005 621 | 11,8.359366572 622 | 89,92.08157084 623 | 37,41.88734051 624 | 29,30.5413129 625 | 44,46.87654473 626 | 96,96.35659485 627 | 16,17.9170699 628 | 74,71.67949917 629 | 35,32.64997554 630 | 42,39.34482965 631 | 16,17.03401999 632 | 56,52.87524074 633 | 18,15.85414849 634 | 100,108.8716183 635 | 54,49.30477253 636 | 92,89.4749477 637 | 63,63.67348242 638 | 81,83.78410946 639 | 73,73.51136922 640 | 48,46.80297244 641 | 1,5.809946802 642 | 85,85.23027975 643 | 14,10.58213964 644 | 25,21.37698317 645 | 45,46.0537745 646 | 98,95.2389253 647 | 97,94.15149206 648 | 58,54.54868046 649 | 93,87.36260449 650 | 88,88.47741598 651 | 89,84.48045678 652 | 47,48.79647071 653 | 6,10.76675683 654 | 34,30.48882921 655 | 30,29.76846185 656 | 16,13.51574749 657 | 86,86.12955884 658 | 40,43.30022747 659 | 52,51.92110232 660 | 15,16.49185287 661 | 4,7.998073432 662 | 95,97.66689567 663 | 99,89.80545367 664 | 35,38.07166567 665 | 58,60.27852322 666 | 10,6.709195759 667 | 16,18.35488924 668 | 53,56.37058203 669 | 58,62.80064204 670 | 42,41.25155632 671 | 24,19.42637541 672 | 84,82.88935804 673 | 64,63.61364981 674 | 12,11.29627199 675 | 61,60.02274882 676 | 75,72.60339326 677 | 15,11.87964573 678 | 100,100.7012737 679 | 43,45.12420809 680 | 13,14.81106804 681 | 48,48.09368034 682 | 45,42.29145672 683 | 52,52.73389794 684 | 34,36.72396986 685 | 30,28.64535198 686 | 65,62.16675273 687 | 100,95.58459518 688 | 67,66.04325304 689 | 99,99.9566225 690 | 45,46.14941984 691 | 87,89.13754963 692 | 73,69.71787806 693 | 9,12.31736648 694 | 81,78.20296268 695 | 72,71.30995371 696 | 81,81.45544709 697 | 58,58.59500642 698 | 93,94.62509374 699 | 82,88.60376995 700 | 66,63.64868529 701 | 97,94.9752655 -------------------------------------------------------------------------------- /digit_recognizer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Deep Learning Project\n", 8 | "## Build a Digit Recognition Program" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "In this project, we will design and implement a deep learning model that learns to recognize sequences of digits. We will train the model using synthetic data generated by concatenating character images from [MNIST](http://yann.lecun.com/exdb/mnist/). \n", 16 | "\n", 17 | "To produce a synthetic sequence of digits for testing, we will limit the to sequences to up to five digits, and use five classifiers on top of your deep network. We will incorporate an additional ‘blank’ character to account for shorter number sequences.\n", 18 | "\n", 19 | "We will use ** Keras ** to implement the model. You can read more about Keras at [keras.io](https://keras.io/)." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "### Implementation\n", 27 | "\n", 28 | "Let's start by importing the modules we'll require fot this project." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stderr", 38 | "output_type": "stream", 39 | "text": [ 40 | "Using TensorFlow backend.\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "#Module Imports\n", 46 | "from __future__ import print_function\n", 47 | "import random\n", 48 | "from os import listdir\n", 49 | "import glob\n", 50 | "\n", 51 | "import numpy as np\n", 52 | "from scipy import misc\n", 53 | "import tensorflow as tf\n", 54 | "import h5py\n", 55 | "\n", 56 | "from keras.datasets import mnist\n", 57 | "from keras.utils import np_utils\n", 58 | "\n", 59 | "import matplotlib.pyplot as plt\n", 60 | "%matplotlib inline" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 2, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "#Setting the random seed so that the results are reproducible. \n", 70 | "random.seed(101)\n", 71 | "\n", 72 | "#Setting variables for MNIST image dimensions\n", 73 | "mnist_image_height = 28\n", 74 | "mnist_image_width = 28" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz\n", 87 | "11493376/11490434 [==============================] - 11s 1us/step\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "#Import MNIST data from keras\n", 93 | "(X_train, y_train), (X_test, y_test) = mnist.load_data()" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "Shape of training dataset: (60000, 28, 28)\n", 106 | "Shape of test dataset: (10000, 28, 28)\n", 107 | "Label for image: 5\n" 108 | ] 109 | }, 110 | { 111 | "data": { 112 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADgpJREFUeJzt3X+MVfWZx/HPs1j+kKI4aQRCYSnEYJW4082IjSWrxkzVDQZHrekkJjQapn8wiU02ZA3/VNNgyCrslmiamaZYSFpKE3VB0iw0otLGZuKIWC0srTFsO3IDNTjywx9kmGf/mEMzxbnfe+fec++5zPN+JeT+eM6558kNnznn3O+592vuLgDx/EPRDQAoBuEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxDUZc3cmJlxOSHQYO5u1SxX157fzO40syNm9q6ZPVrPawFoLqv12n4zmybpj5I6JQ1Jel1St7sfSqzDnh9osGbs+ZdJetfd33P3c5J+IWllHa8HoInqCf88SX8Z93goe+7vmFmPmQ2a2WAd2wKQs3o+8Jvo0OJzh/Xu3i+pX+KwH2gl9ez5hyTNH/f4y5KO1dcOgGapJ/yvS7rGzL5iZtMlfVvSrnzaAtBoNR/2u/uImfVK2iNpmqQt7v6H3DoD0FA1D/XVtDHO+YGGa8pFPgAuXYQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8EVfMU3ZJkZkclnZZ0XtKIu3fk0RTyM23atGT9yiuvbOj2e3t7y9Yuv/zy5LpLlixJ1tesWZOsP/XUU2Vr3d3dyXU//fTTZH3Dhg3J+uOPP56st4K6wp+5zd0/yOF1ADQRh/1AUPWG3yXtNbM3zKwnj4YANEe9h/3fcPdjZna1pF+b2f+6+/7xC2R/FPjDALSYuvb87n4suz0h6QVJyyZYpt/dO/gwEGgtNYffzGaY2cwL9yV9U9I7eTUGoLHqOeyfLekFM7vwOj939//JpSsADVdz+N39PUn/lGMvU9aCBQuS9enTpyfrN998c7K+fPnysrVZs2Yl173vvvuS9SINDQ0l65s3b07Wu7q6ytZOnz6dXPett95K1l999dVk/VLAUB8QFOEHgiL8QFCEHwiK8ANBEX4gKHP35m3MrHkba6L29vZkfd++fcl6o79W26pGR0eT9YceeihZP3PmTM3bLpVKyfqHH36YrB85cqTmbTeau1s1y7HnB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgGOfPQVtbW7I+MDCQrC9atCjPdnJVqffh4eFk/bbbbitbO3fuXHLdqNc/1ItxfgBJhB8IivADQRF+ICjCDwRF+IGgCD8QVB6z9IZ38uTJZH3t2rXJ+ooVK5L1N998M1mv9BPWKQcPHkzWOzs7k/WzZ88m69dff33Z2iOPPJJcF43Fnh8IivADQRF+ICjCDwRF+IGgCD8QFOEHgqr4fX4z2yJphaQT7r40e65N0g5JCyUdlfSAu6d/6FxT9/v89briiiuS9UrTSff19ZWtPfzww8l1H3zwwWR9+/btyTpaT57f5/+ppDsveu5RSS+5+zWSXsoeA7iEVAy/u++XdPElbCslbc3ub5V0T859AWiwWs/5Z7t7SZKy26vzawlAMzT82n4z65HU0+jtAJicWvf8x81sriRltyfKLeju/e7e4e4dNW4LQAPUGv5dklZl91dJ2plPOwCapWL4zWy7pN9JWmJmQ2b2sKQNkjrN7E+SOrPHAC4hFc/53b27TOn2nHsJ69SpU3Wt/9FHH9W87urVq5P1HTt2JOujo6M1bxvF4go/ICjCDwRF+IGgCD8QFOEHgiL8QFBM0T0FzJgxo2ztxRdfTK57yy23JOt33XVXsr53795kHc3HFN0Akgg/EBThB4Ii/EBQhB8IivADQRF+ICjG+ae4xYsXJ+sHDhxI1oeHh5P1l19+OVkfHBwsW3vmmWeS6zbz/+ZUwjg/gCTCDwRF+IGgCD8QFOEHgiL8QFCEHwiKcf7gurq6kvVnn302WZ85c2bN2163bl2yvm3btmS9VCrVvO2pjHF+AEmEHwiK8ANBEX4gKMIPBEX4gaAIPxBUxXF+M9siaYWkE+6+NHvuMUmrJf01W2ydu/+q4sYY57/kLF26NFnftGlTsn777bXP5N7X15esr1+/Pll///33a972pSzPcf6fSrpzguf/093bs38Vgw+gtVQMv7vvl3SyCb0AaKJ6zvl7zez3ZrbFzK7KrSMATVFr+H8kabGkdkklSRvLLWhmPWY2aGblf8wNQNPVFH53P+7u5919VNKPJS1LLNvv7h3u3lFrkwDyV1P4zWzuuIddkt7Jpx0AzXJZpQXMbLukWyV9ycyGJH1f0q1m1i7JJR2V9N0G9gigAfg+P+oya9asZP3uu+8uW6v0WwFm6eHqffv2JeudnZ3J+lTF9/kBJBF+ICjCDwRF+IGgCD8QFOEHgmKoD4X57LPPkvXLLktfhjIyMpKs33HHHWVrr7zySnLdSxlDfQCSCD8QFOEHgiL8QFCEHwiK8ANBEX4gqIrf50dsN9xwQ7J+//33J+s33nhj2VqlcfxKDh06lKzv37+/rtef6tjzA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQjPNPcUuWLEnWe3t7k/V77703WZ8zZ86ke6rW+fPnk/VSqZSsj46O5tnOlMOeHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCqjjOb2bzJW2TNEfSqKR+d/+hmbVJ2iFpoaSjkh5w9w8b12pclcbSu7u7y9YqjeMvXLiwlpZyMTg4mKyvX78+Wd+1a1ee7YRTzZ5/RNK/uftXJX1d0hozu07So5JecvdrJL2UPQZwiagYfncvufuB7P5pSYclzZO0UtLWbLGtku5pVJMA8jepc34zWyjpa5IGJM1295I09gdC0tV5Nwegcaq+tt/MvijpOUnfc/dTZlVNByYz65HUU1t7ABqlqj2/mX1BY8H/mbs/nz193MzmZvW5kk5MtK6797t7h7t35NEwgHxUDL+N7eJ/Iumwu28aV9olaVV2f5Wknfm3B6BRKk7RbWbLJf1G0tsaG+qTpHUaO+//paQFkv4s6VvufrLCa4Wconv27NnJ+nXXXZesP/3008n6tddeO+me8jIwMJCsP/nkk2VrO3em9xd8Jbc21U7RXfGc391/K6nci90+maYAtA6u8AOCIvxAUIQfCIrwA0ERfiAowg8ExU93V6mtra1sra+vL7lue3t7sr5o0aKaesrDa6+9lqxv3LgxWd+zZ0+y/sknn0y6JzQHe34gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCCrMOP9NN92UrK9duzZZX7ZsWdnavHnzauopLx9//HHZ2ubNm5PrPvHEE8n62bNna+oJrY89PxAU4QeCIvxAUIQfCIrwA0ERfiAowg8EFWacv6urq656PQ4dOpSs7969O1kfGRlJ1lPfuR8eHk6ui7jY8wNBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUObu6QXM5kvaJmmOpFFJ/e7+QzN7TNJqSX/NFl3n7r+q8FrpjQGom7tbNctVE/65kua6+wEzmynpDUn3SHpA0hl3f6rapgg/0HjVhr/iFX7uXpJUyu6fNrPDkor96RoAdZvUOb+ZLZT0NUkD2VO9ZvZ7M9tiZleVWafHzAbNbLCuTgHkquJh/98WNPuipFclrXf3581stqQPJLmkH2js1OChCq/BYT/QYLmd80uSmX1B0m5Je9x90wT1hZJ2u/vSCq9D+IEGqzb8FQ/7zcwk/UTS4fHBzz4IvKBL0juTbRJAcar5tH+5pN9IeltjQ32StE5St6R2jR32H5X03ezDwdRrsecHGizXw/68EH6g8XI77AcwNRF+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCavYU3R9I+r9xj7+UPdeKWrW3Vu1Lorda5dnbP1a7YFO/z/+5jZsNuntHYQ0ktGpvrdqXRG+1Kqo3DvuBoAg/EFTR4e8vePsprdpbq/Yl0VutCumt0HN+AMUpes8PoCCFhN/M7jSzI2b2rpk9WkQP5ZjZUTN728wOFj3FWDYN2gkze2fcc21m9msz+1N2O+E0aQX19piZvZ+9dwfN7F8L6m2+mb1sZofN7A9m9kj2fKHvXaKvQt63ph/2m9k0SX+U1ClpSNLrkrrd/VBTGynDzI5K6nD3wseEzexfJJ2RtO3CbEhm9h+STrr7huwP51Xu/u8t0ttjmuTMzQ3qrdzM0t9Rge9dnjNe56GIPf8ySe+6+3vufk7SLyStLKCPlufu+yWdvOjplZK2Zve3auw/T9OV6a0luHvJ3Q9k909LujCzdKHvXaKvQhQR/nmS/jLu8ZBaa8pvl7TXzN4ws56im5nA7AszI2W3Vxfcz8UqztzcTBfNLN0y710tM17nrYjwTzSbSCsNOXzD3f9Z0l2S1mSHt6jOjyQt1tg0biVJG4tsJptZ+jlJ33P3U0X2Mt4EfRXyvhUR/iFJ88c9/rKkYwX0MSF3P5bdnpD0gsZOU1rJ8QuTpGa3Jwru52/c/bi7n3f3UUk/VoHvXTaz9HOSfubuz2dPF/7eTdRXUe9bEeF/XdI1ZvYVM5su6duSdhXQx+eY2YzsgxiZ2QxJ31TrzT68S9Kq7P4qSTsL7OXvtMrMzeVmllbB712rzXhdyEU+2VDGf0maJmmLu69vehMTMLNFGtvbS2PfePx5kb2Z2XZJt2rsW1/HJX1f0n9L+qWkBZL+LOlb7t70D97K9HarJjlzc4N6Kzez9IAKfO/ynPE6l364wg+IiSv8gKAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8E9f/Ex0YKZYOZcwAAAABJRU5ErkJggg==\n", 113 | "text/plain": [ 114 | "" 115 | ] 116 | }, 117 | "metadata": {}, 118 | "output_type": "display_data" 119 | } 120 | ], 121 | "source": [ 122 | "#Checking the downloaded data\n", 123 | "print(\"Shape of training dataset: {}\".format(np.shape(X_train)))\n", 124 | "print(\"Shape of test dataset: {}\".format(np.shape(X_test)))\n", 125 | "\n", 126 | "\n", 127 | "plt.figure()\n", 128 | "plt.imshow(X_train[0], cmap='gray')\n", 129 | "\n", 130 | "print(\"Label for image: {}\".format(y_train[0]))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### Building synthetic data" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "The MNIST dataset is very popular for beginner Deep Learning projects. So, to add a twist to the tale, we're going to predict images that can contain 1 to 5 digits. We'll have to change the architecture of our deep learning model for this, but before that, we'll need to generate this dataset first.\n", 145 | "\n", 146 | "To generate the synthetic training data, we will first start by randomly picking out up to 5 individual digits out from the MNIST training set. The individual images will be then stacked together, and blanks will be used to make up the number of digits if there were less than 5. By this approach, we could increase the size of our training data. We'll build around 60,000 such examples.\n", 147 | "\n", 148 | "While concatenating images together, we'll also build the labels for each image. First, labels for single digits will be arranged in tuples of 5. Labels 0-9 will be used for digits 0-9, and a 10 will be used to indicate a blank.\n", 149 | "\n", 150 | "The same approach will be used to build the test data, but using the MNIST test set for individual digits, for 10,000 synthetic test images.\n", 151 | "\n", 152 | "\n", 153 | "\n", 154 | "Let's write a function that does this." 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 5, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "def build_synth_data(data,labels,dataset_size):\n", 164 | " \n", 165 | " #Define synthetic image dimensions\n", 166 | " synth_img_height = 64\n", 167 | " synth_img_width = 64\n", 168 | " \n", 169 | " #Define synthetic data\n", 170 | " synth_data = np.ndarray(shape=(dataset_size,synth_img_height,synth_img_width),\n", 171 | " dtype=np.float32)\n", 172 | " \n", 173 | " #Define synthetic labels\n", 174 | " synth_labels = [] \n", 175 | " \n", 176 | " #For a loop till the size of the synthetic dataset\n", 177 | " for i in range(0,dataset_size):\n", 178 | " \n", 179 | " #Pick a random number of digits to be in the dataset\n", 180 | " num_digits = random.randint(1,5)\n", 181 | " \n", 182 | " #Randomly sampling indices to extract digits + labels afterwards\n", 183 | " s_indices = [random.randint(0,len(data)-1) for p in range(0,num_digits)]\n", 184 | " \n", 185 | " #stitch images together\n", 186 | " new_image = np.hstack([X_train[index] for index in s_indices])\n", 187 | " #stitch the labels together\n", 188 | " new_label = [y_train[index] for index in s_indices]\n", 189 | " \n", 190 | " \n", 191 | " #Loop till number of digits - 5, to concatenate blanks images, and blank labels together\n", 192 | " for j in range(0,5-num_digits):\n", 193 | " new_image = np.hstack([new_image,np.zeros(shape=(mnist_image_height,\n", 194 | " mnist_image_width))])\n", 195 | " new_label.append(10) #Might need to remove this step\n", 196 | " \n", 197 | " #Resize image\n", 198 | " new_image = misc.imresize(new_image,(64,64))\n", 199 | " \n", 200 | " #Assign the image to synth_data\n", 201 | " synth_data[i,:,:] = new_image\n", 202 | " \n", 203 | " #Assign the label to synth_data\n", 204 | " synth_labels.append(tuple(new_label))\n", 205 | " \n", 206 | " \n", 207 | " #Return the synthetic dataset\n", 208 | " return synth_data,synth_labels" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 6, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "name": "stderr", 218 | "output_type": "stream", 219 | "text": [ 220 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:36: DeprecationWarning: `imresize` is deprecated!\n", 221 | "`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.\n", 222 | "Use ``skimage.transform.resize`` instead.\n" 223 | ] 224 | } 225 | ], 226 | "source": [ 227 | "#Building the training dataset\n", 228 | "X_synth_train,y_synth_train = build_synth_data(X_train,y_train,60000)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 7, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "name": "stderr", 238 | "output_type": "stream", 239 | "text": [ 240 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:36: DeprecationWarning: `imresize` is deprecated!\n", 241 | "`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.\n", 242 | "Use ``skimage.transform.resize`` instead.\n" 243 | ] 244 | } 245 | ], 246 | "source": [ 247 | "#Building the test dataset\n", 248 | "X_synth_test,y_synth_test = build_synth_data(X_test,y_test,10000)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 8, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "text/plain": [ 259 | "(1, 4, 1, 9, 7)" 260 | ] 261 | }, 262 | "execution_count": 8, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | }, 266 | { 267 | "data": { 268 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP4AAAD8CAYAAABXXhlaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAG4NJREFUeJztnW2sVeWVx/9LUEEUEXnpFShvUoGqBaSIYWIRC3U6bfFDa2ybCTMh4UtnYjOdVJ1JJu1kJrFp0nY+mCZ0dMqHtmptOxjaVA1gEWJFGAHl5V7e4crLBQRFtCp2zYez73Y9y3vuPfees599Tp//L7k5a59nn3vW3eesu9d61vOsJaoKQkhaXFK2AoSQ+NDwCUkQGj4hCULDJyRBaPiEJAgNn5AEoeETkiB1Gb6I3CUi7SKyT0QeaJRShJBikYEu4BGRQQA6ACwG0AngJQBfVdVdjVOPEFIEg+t47TwA+1T1AACIyGMAlgKoavgiwmWChBSMqkpf59Tj6o8DcNQcd2bPEUKanHru+D39V/nIHV1EVgBYUcf7EEIaTD2G3wlggjkeD+CYP0lVVwJYCdDVJ6RZqMfVfwnANBGZLCKXAbgXwFONUYsQUiQDvuOr6kUR+QcATwMYBOBRVd3ZMM0IIYUx4HTegN6Mrj4hhVP0rD4hpEWh4ROSIDR8QhKEhk9IgtDwCUkQGj4hCULDJyRBaPiEJAgNn5AEoeETkiA0fEIShIZPSILQ8AlJEBo+IQlCwyckQWj4hCQIDZ+QBKHhE5IgNHxCEoSGT0iC1FNXv3Ta2tqC4/Pnz+fyW2+9FVudmrj00ktz+brrrgvGTpw4kcvvvvtuNJ08Ih/WavzYxz6Wy2PHjg3Oa29vz+V33nmneMVIw+Adn5AEoeETkiA0fEISpKVj/Dlz5gTHNubct29fbHVqYtiwYbm8cOHCYOx3v/tdLp86dSqWSh/Bxvj2Gi9ZsiQ47/vf/34ud3Z2Fq8YaRh93vFF5FER6RKRV81zI0XkWRHZmz1eU6yahJBGUour/1MAd7nnHgCwVlWnAVibHRNCWoQ+XX1V3SAik9zTSwEszORVAJ4DcH8D9aqJT37yk8HxG2+8kcvN6upfeeWVuTx//vxgbNOmTblcpqt/ySUf3g9uuummXL777ruD837yk5/kMl391mKgk3tjVfU4AGSPYxqnEiGkaAqf3BORFQBWFP0+hJDaGajhnxSRNlU9LiJtALqqnaiqKwGsBBrTJtu6odZtBoBrr702l+3MdKZHvW/dEEaOHJnLU6dODcas/mWGKvYajxo1Kpevuuqq4Lyrr746mk6ksQzU1X8KwLJMXgZgdWPUIYTEoJZ03i8AvADgBhHpFJHlAB4CsFhE9gJYnB0TQlqEWmb1v1pl6M4G60IIiUTLrdyzsbuP8W082qzYOH7KlCnBmI3/y2TQoEG5bK/p8OHDg/P8MWkduFafkASh4ROSIC3n6lusSwoAgwc3359jU2NA6N4PHTq013PLwqbpbLETnyJtFmya8ZZbbgnGRowYkctPP/10LqdeOKQ5vmmEkKjQ8AlJEBo+IQnSfEFxH9g408fEzRjj+7jYxviXX355bHV6xOto42Jf0LQZsWnFxYsXB2N2WfTzzz+fy4zxCSHJQcMnJEGazzeuA1uzvlm47LLLguMJEybksk9HloUPmT7xiU/ksl1pePHixeC8999/v1jFquCv6Y033pjLc+fODcZsXwCfPo1Jb/0Urrnmw8p1o0ePzmUfjmzdurXqWH/hHZ+QBKHhE5IgLefq24IaH3zwQTBmXcBmKcQxZMiQ4Ni7ec2Az4bMnj07l+2MuW/r9ac//alYxargr+lnPvOZXJ4xY0YwZr8j/nUxsRkce32BUGdbh/HMmTPBeR0dHblMV58Q0m9o+IQkCA2fkARpuRjfxu4+HVZmuqYavliIbTXt5x38nEUsfDpv4sSJuWxbfp0+fTo47+233y5WsSpcccUVwfGsWbNyedy4ccHY66+/nssxi4P6eZOPf/zjuTxv3rxgzLYps/MVXV1hDVtbqMWP9Rfe8QlJEBo+IQnScq6+dUutGwqEK8mapWiEXfkGhCuzvGtfVnqsVnwK6cKFC6Xo4T/3MWM+bOTkP3ebRhs/fnwu21VwReDDkc997nO5bN15IAz/bIjgaxracGHPnj116cc7PiEJQsMnJEFo+IQkSEvH+D6OKisd1ht255U/Pnv2bDBWVozv4+Jq8yNvvvlmcFxWjO97+Nl5kz//+c/BWG+74orEp3GXLFmSy7fddltNv8P/nT5VWQ+1tNCaICLrRWS3iOwUkfuy50eKyLMisjd7vKav30UIaQ5qcfUvAviWqs4AMB/AN0RkJoAHAKxV1WkA1mbHhJAWoJbeeccBHM/k8yKyG8A4AEsBLMxOWwXgOQD3F6JlC2Pr1wHhDrdm2e3ma//ZtlnWdf7jH/8YnHf+/PliFavC5MmTg2Ob3vNpOpsCi4ktAAKEqcSB0sjdhf2a3BORSQBmA3gRwNjsn0L3P4cx1V9JCGkmap7cE5ErAfwKwDdV9c1aF8iIyAoAKwamHiGkCGq644vIpagY/c9U9dfZ0ydFpC0bbwPQ464BVV2pqnNVdW5P44SQ+PR5x5fKrf0RALtV9Qdm6CkAywA8lD2uLkTDFse3vv5LivHfeuutYhWrgm8vbmP8zZs3B2NltfL2/Qjsjsda8V61LzJaD7W4+gsA/C2AV0RkW/bcv6Bi8E+IyHIARwB8pWFaEUIKpZZZ/Y0AqgX0dzZWHUJIDFpu5V4rYHdYTZo0KRizBRT8KrOyikH2tnLPFgvxrn3MlZJ2FZut+w8AnZ2duexdfbvaLWYo5dN59jth9QWAvXv35vIbb7yRy7feemtwXiN3nHKtPiEJQsMnJEHo6heA3Rgyffr0YOz48eO57It0lDUD7UMOW9DEuvpl9SYAwhWQ/ppaV/mFF14IxmyoFXNTkS0OAoRh0f79+4OxJ598sscxHy40Et7xCUkQGj4hCULDJyRBGOMXgF1h5WO9I0eO5LKP6X2sHQufJmqW9t0We638qrh169bl8tGjR4Ox9vb2XLZ9F3yRi0bvNPRFYo4dO5bLfgWknZewqT6fPvUrPeuBd3xCEoSGT0iC0NUvAOvq+40VJ0+ezGW/2cTXtIuFTT8CH61b3wzYGna+VdqpU6dy2a8mPHz4cC4vWrQol339PRsSFMGWLVtyec2aNcHYa6+9lsu2pqTfPNXIcIR3fEIShIZPSILQ8AlJkJaL8W3qybcibhZsjOzj5xMnTuTyxYsXg7Gy6tT7WNKmouwy3dh9C+xnbfsReH3tNfUpUdva2y7f9cUvOzo6crkRS5N9SnT79u25vGnTpqqvs7sJ/fyQ3blXL7zjE5IgNHxCEqQ5feVesO6PTzv5llRlccMNN+Sydxtt6qZMbNrI7wKzrvR7772Xy2+//Xbxihmsjtdff30u+xDp0KFDueyvtw0RbrzxxlzesWNHcN4f/vCHqr9/IPr67+bOnTtr+h1WX/v7gDBtWS+84xOSIDR8QhKk5Vx964b6jqTNQiu4+nbGvFldfTszPnXq1Fz2rrhdneevty12MnPmzFy2n5F/r4G6+vaaelffbx6qhi3H7jMDNkNRL7zjE5IgNHxCEoSGT0iCtFyMb2vP+xbUA43N6sWvJLPtks6dOxeM2TiwrDr6Hh+P2r/Hrtbz6aWisSszbezrV7DZuQdfVKRazOx3ujW6CIqfa6h1fsS2APc6vfPOO/UrltHnJykiQ0Rks4hsF5GdIvLd7PnJIvKiiOwVkcdFpHGNvQghhVLLv/B3ASxS1U8BmAXgLhGZD+B7AH6oqtMAnAWwvDg1CSGNpJbeeQqgu/jXpdmPAlgE4GvZ86sAfAfAjxuvYoh1j6+++upgrBld/ddffz0Ys+5rma6+dUW9Hvbvse5l7E1RdoOTddl9wRKrow9HfLfibvzn0ghXv1rrMaD39l32ddbVH2i4UAs1BW0iMijrlNsF4FkA+wGcU9VuS+sEMK7a6wkhzUVNhq+qH6jqLADjAcwDMKOn03p6rYisEJEtIrKlp3FCSHz6NU2rqucAPAdgPoARItLt+40HcKzKa1aq6lxVnVuPooSQxtFn0CYiowG8r6rnRGQogM+iMrG3HsCXATwGYBmA1UUq2o2N4XyhAluHPGafN1+jfezYsbnsiy7Y9Fgj2x73F3t9fHxrr7GN6/31LppqhSd9Os/Wm/c62qW+dnfbgQMHgvMaEePba+pr4Pf2+23xUDs/5Jd3N7Kufi2zNW0AVonIIFQ8hCdUdY2I7ALwmIj8B4CXATzSMK0IIYVSy6z+DgCze3j+ACrxPiGkxWi5lXu90ciaZP3BryC0O8J27doVjNm0TlnpR09vK8JsSi22q2+xYZEvuGJ3EPrWVTfddFMuHzx4MJdrLYwxUHz6rjdX36albV1A3/Lbti+vF67VJyRBaPiEJEjLufp2dtfPituNFzFn9X1HXLvpxdaDA0J3zZerLmuWv7f3tRtbYrv6NqNgZ/htGzIgvKa2Zh0Qus42w2K70gKN+b7Y3+E73fb2+21oaL9LNjQBGlvenHd8QhKEhk9IgtDwCUmQlovxberM78SKWQzSvrdvd23TdD7FaOM0n87zxRWLxMb1fgdbtV1mMedNgDA1Z9OK+/fvD86z19GvorRzQnZHXhHtwOz18YUx7TyEn1Ox7bzsTkn/Oxp5/XnHJyRBaPiEJMhflKvfyJpkfWHdNZsyAkK3zrv6NiXmXf2YhS7stfMpMPu3NboWXX+wm1esq+832FgX2P8t9noX7epbfMrRri70TJgwIZer6QvQ1SeE1AkNn5AEoeETkiAtEePbmHPUqFFVz/O10ovEpona2tqCMTvX4Isn2BjOL4H1qagisdfUv2+ZBUIsds7DFzS12N5/S5cuDcbsNT5z5kwuFz130dHRERzb76ZP29rCLVbHRrbF9vCOT0iC0PAJSZCWc/XtKjOf3rhw4ULVsUZjXc/rrrsuGLMrCL2rb91qn77zBT2KxF5T3268WVx966b35urbUOvuu+8Oxmwa0LYzK8LVt9857+rbFK//3OnqE0KiQMMnJEFawtW3q8yGDx+ey371VcyVe1YPn2mwhTjuuOOOYMyuLPP14WJu0mmG9+0LGybZVY4333xzcJ5tqWXdZj927FiPrR8KobeWWTYLAYQrPw8fPlzT76gX3vEJSRAaPiEJQsMnJEFaLsa3Nch9jN/IFkN9YWN1WywRCHW85557gjG748yn0UiITc/a3W0LFiwIzrNzOz7tZ4ud+pZUZeFXetoYf/Pmzbnc246+eqn5jp+1yn5ZRNZkx5NF5EUR2Ssij4tIed0WCCH9oj+u/n0Adpvj7wH4oapOA3AWwPJGKkYIKY6aXH0RGQ/gbwD8J4B/ksrSrkUAvpadsgrAdwD8uAAdA1ffusre1S/SNfLYVJxN3wHh6rxPf/rTwVi1DrDNil1xFjNdCoTpLLsacvr06cF5va2Ks8Us7Mq92NjVkOPGjQvGbGrYhiZFtlir9Y7/IwDfBtC9zvFaAOdUtVuzTgDjenohIaT56NPwReQLALpUdat9uodTe1wcLyIrRGSLiGwZoI6EkAZTi6u/AMCXROTzAIYAGI6KBzBCRAZnd/3xAHpcFqWqKwGsBAARiVufmRDSI30avqo+COBBABCRhQD+WVW/LiK/BPBlAI8BWAZgdVFK2rjY7rbyMX7MttNWJ1/00y6B7a1mfbO0ye4NG9f79tRFY9OztmipX7Jrd9r5nYX2dTHngDz2OzJx4sRgzM5R2H55RRYLqWcBz/2oTPTtQyXmf6QxKhFCiqZfC3hU9TkAz2XyAQDzGq8SIaRoWm7lni3OUGY6z7phviWyTef51XlWZ9/yy4cMzYBNqdmdbjGwrr5tJ+ULlthr6guw2PCkTFffhqi+D4MNp+wOwiKLyTTfN40QUjg0fEISpCVcfTtLbt1o77rF3KTT1dWVyxs3bgzGbrnllly2nVCBcCWZ3cwDfHQFYCz8TLg9tm50zOsLhLPde/furaqHDbX8akh7vcvMotjv7fXXXx+MHTlyJJdjlYjnHZ+QBKHhE5IgNHxCEqQlYny748rGwT7WK7I4ocfWPN+0aVMwZtNNvm3z8ePHc9nH+HbnYUxaIca3rbG9HjZl5693M8b4U6dODcY2bNiQy4zxCSGFQcMnJEFawtW3K9qs229rsgEfXclXJNbdfOWVV4Ix6yrv2rUrGLMpyAkTJgRjdpWf/R1FrOCy4YivRW/fz25ysa53bNrb23P50UcfDcbs9+OLX/xiMGZDhDKxdfZ8d2Ib/sUKR3jHJyRBaPiEJAgNn5AEaYkY38a7Nsb3qY8iCxd4bKy+c+fOYMz2PxsyZEgwZvum3XvvvcGYTVUWHePbtJfv5VYtxi9zd9v+/ftz+eGHHw7G7FzJ7bffHozZpb4x8SlSG+P7pdknTpzI5VjzVLzjE5IgNHxCEqQlXH3r3lvX2dYgB+Km82xYYd3hno4t1l32hThsmsemqIoIYeyOx952BVqX1buvMbFhnQ/x7LXyY3blXkxswRgAmDlzZi77NLQNDYssvmHhHZ+QBKHhE5IgLeHq23pltnWV3SgDxHX1B4qtr+ZdfdtKybriRazmstfKvpfHrib07amaEe/qe7c6FvZ7CgBz5szJ5ZMnTwZjPmSNAe/4hCQIDZ+QBKHhE5IgzR+0IYxBbTFFnzaLuXJvoPTWdtrOZRSdOrPzBr3V87dxfTPW/QfCFK+fDylrtaEvCDJt2rRc3rNnTzBmV+7FoibDF5FDAM4D+ADARVWdKyIjATwOYBKAQwDuUdW4zdUIIQOiP//C71DVWao6Nzt+AMBaVZ0GYG12TAhpAepx9ZcCWJjJq1DpqXd/nfr0C5++i7XqqR5sOOJrBNrQpejUpP39vYVIVsdmTZfaTUY+jVYWviOuTdX64iyxaxkCtd/xFcAzIrJVRFZkz41V1eMAkD2OKUJBQkjjqfWOv0BVj4nIGADPisiePl+Rkf2jWNHniYSQaNR0x1fVY9ljF4DfoNIe+6SItAFA9thV5bUrVXWumRsghJRMn3d8ERkG4BJVPZ/JSwD8O4CnACwD8FD2uLooJW0MauUy66QPFDsP4Zfs2jbUMWP83t7Lphyb9Xo3Y4w/ZcqU4NgufbaFQ4FyYvxaXP2xAH6T5ZUHA/i5qv5eRF4C8ISILAdwBMBXilOTENJI+jR8VT0A4FM9PH8GwJ1FKEUIKZaWWLlnV1+99tpruexXPLVCOs+6y/ZvAcLdhkX/Lda9PHPmTDBm3dR9+/blcswWZf1h9OjRuVxmsRC7qnTGjBnBmG3l3dnZGU2najTnGkxCSKHQ8AlJEBo+IQnSEjG+jUdtXHzs2LHgvFaI8W3qzPZMA5onxrcpU1vPnjF+79gY3xbXBMJKQM3wveUdn5AEoeETkiAt4epbF3PDhg25fPTo0TLUqQvrRu/YsSMY6+rqcdVzIdgU6fr164Oxc+fO5fL27dtzuVl25/nioHbn28GDB4OxmIU47Oo835bMhnFnz5ZftoJ3fEIShIZPSIK0hKtvZ6CfeeaZXD59+nRwXivM6ltXf9u2bcFYzFlz6wL/9re/DcY2bdqUy0eOHMnlZrm+tjYhELrYu3fvDsZiboCxLdBGjRoVjB04cCCX7WassuAdn5AEoeETkiA0fEISpCVifLujza4ka0VsnOxX7sXEpuY6OjpK02Mg+BbU9pr6GD9m8RAb4w8dOjQYs62wyyi84eEdn5AEoeETkiAt4eoTYvFutE2P+fZUMbGuvk992oImzQDv+IQkCA2fkASh4ROSIIzxScthW3cD4a7GQ4cORdbmQ4YNG5bLfvk1Y3xCSOnQ8AlJELr6pOU4f/58cPz888/ncszCGx67GtKHHL7OXtnUdMcXkREi8qSI7BGR3SJym4iMFJFnRWRv9nhN0coSQhpDra7+fwH4vapOR6Wd1m4ADwBYq6rTAKzNjgkhLUAt3XKHA7gdwN8BgKq+B+A9EVkKYGF22ioAzwG4vwglCbF4V3/jxo25XGZHX+vq28IbwEfbpZVNLXf8KQBOAfgfEXlZRP47a5c9VlWPA0D2OKZAPQkhDaQWwx8MYA6AH6vqbAAX0A+3XkRWiMgWEdkyQB0JIQ2mFsPvBNCpqi9mx0+i8o/gpIi0AUD22GNtaFVdqapzVXVuIxQmhNRPnzG+qp4QkaMicoOqtgO4E8Cu7GcZgIeyx9WFakpIht/5VmZcb7Htr9etWxeMNVv7sVrz+P8I4GcichmAAwD+HhVv4QkRWQ7gCICvFKMiIaTR1GT4qroNQE+u+p2NVYcQEgOJWStdRJqjMDshBWA36di2XgBw8uTJXLa9FYpAVftsGcy1+oQkCA2fkASh4ROSIIzxCfkLgzE+IaRHaPiEJEjsQhynARwGMCqTy6QZdACoh4d6hPRXj4m1nBQ1xs/fVGRL2Wv3m0EH6kE9ytKDrj4hCULDJyRByjL8lSW9r6UZdACoh4d6hBSiRykxPiGkXOjqE5IgUQ1fRO4SkXYR2Sci0aryisijItIlIq+a56KXBxeRCSKyPitRvlNE7itDFxEZIiKbRWR7psd3s+cni8iLmR6PZ/UXCkdEBmX1HNeUpYeIHBKRV0RkW3eZuJK+I1FK2UczfBEZBOBhAH8NYCaAr4rIzEhv/1MAd7nnyigPfhHAt1R1BoD5AL6RXYPYurwLYJGqfgrALAB3ich8AN8D8MNMj7MAlhesRzf3oVKyvZuy9LhDVWeZ9FkZ35E4pexVNcoPgNsAPG2OHwTwYMT3nwTgVXPcDqAtk9sAtMfSxeiwGsDiMnUBcAWA/wNwKyoLRQb39HkV+P7jsy/zIgBrAEhJehwCMMo9F/VzATAcwEFkc29F6hHT1R8H4Kg57syeK4tSy4OLyCQAswG8WIYumXu9DZUiqc8C2A/gnKp2F7CL9fn8CMC3AXRXp7i2JD0UwDMislVEVmTPxf5copWyj2n4Pe0YSjKlICJXAvgVgG+q6ptl6KCqH6jqLFTuuPMAzOjptCJ1EJEvAOhS1a326dh6ZCxQ1TmohKLfEJHbI7ynp65S9v0hpuF3AphgjscDKLOTYE3lwRuNiFyKitH/TFV/XaYuAKCq51DpgjQfwAgR6d6/EePzWQDgSyJyCMBjqLj7PypBD6jqseyxC8BvUPlnGPtzqauUfX+IafgvAZiWzdheBuBeAE9FfH/PU6iUBQcilQcXEQHwCIDdqvqDsnQRkdEiMiKThwL4LCqTSOsBfDmWHqr6oKqOV9VJqHwf1qnq12PrISLDROSqbhnAEgCvIvLnoqonABwVkRuyp7pL2Tdej6InTdwkxecBdKAST/5rxPf9BYDjAN5H5b/qclRiybUA9maPIyPo8VeouK07AGzLfj4fWxcANwN4OdPjVQD/lj0/BcBmAPsA/BLA5RE/o4UA1pShR/Z+27Ofnd3fzZK+I7MAbMk+m/8FcE0RenDlHiEJwpV7hCQIDZ+QBKHhE5IgNHxCEoSGT0iC0PAJSRAaPiEJQsMnJEH+H7w2jgGBltfZAAAAAElFTkSuQmCC\n", 269 | "text/plain": [ 270 | "" 271 | ] 272 | }, 273 | "metadata": {}, 274 | "output_type": "display_data" 275 | } 276 | ], 277 | "source": [ 278 | "#checking a sample\n", 279 | "plt.figure()\n", 280 | "plt.imshow(X_synth_train[232], cmap='gray')\n", 281 | "\n", 282 | "y_synth_train[232]" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "Looks like things work as we expect them to. Let's prepare the datset and labels so that keras can handle them." 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "### Preparatory Preprocessing\n", 297 | "\n", 298 | "#### Preprocessing Labels for model\n", 299 | "\n", 300 | "The labels are going to be encoded to \"One Hot\" arrays, to make them compatible with Keras. Note that, as the our Deep Learning model will have 5 classifiers, we'll need 5 such One Hot arrays, one for each digit position in the image. " 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 9, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "#Converting labels to One-hot representations of shape (set_size,digits,classes)\n", 310 | "possible_classes = 11\n", 311 | "\n", 312 | "def convert_labels(labels):\n", 313 | " \n", 314 | " #As per Keras conventions, the multiple labels need to be of the form [array_digit1,...5]\n", 315 | " #Each digit array will be of shape (60000,11)\n", 316 | " \n", 317 | " #Code below could be better, but cba for now. \n", 318 | " \n", 319 | " #Declare output ndarrays\n", 320 | " dig0_arr = np.ndarray(shape=(len(labels),possible_classes))\n", 321 | " dig1_arr = np.ndarray(shape=(len(labels),possible_classes))\n", 322 | " dig2_arr = np.ndarray(shape=(len(labels),possible_classes))\n", 323 | " dig3_arr = np.ndarray(shape=(len(labels),possible_classes)) #5 for digits, 11 for possible classes \n", 324 | " dig4_arr = np.ndarray(shape=(len(labels),possible_classes))\n", 325 | " \n", 326 | " for index,label in enumerate(labels):\n", 327 | " \n", 328 | " #Using np_utils from keras to OHE the labels in the image\n", 329 | " dig0_arr[index,:] = np_utils.to_categorical(label[0],possible_classes)\n", 330 | " dig1_arr[index,:] = np_utils.to_categorical(label[1],possible_classes)\n", 331 | " dig2_arr[index,:] = np_utils.to_categorical(label[2],possible_classes)\n", 332 | " dig3_arr[index,:] = np_utils.to_categorical(label[3],possible_classes)\n", 333 | " dig4_arr[index,:] = np_utils.to_categorical(label[4],possible_classes)\n", 334 | " \n", 335 | " return [dig0_arr,dig1_arr,dig2_arr,dig3_arr,dig4_arr]" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 10, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "train_labels = convert_labels(y_synth_train)\n", 345 | "test_labels = convert_labels(y_synth_test)" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 11, 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "data": { 355 | "text/plain": [ 356 | "(60000, 11)" 357 | ] 358 | }, 359 | "execution_count": 11, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "#Checking the shape of the OHE array for the first digit position\n", 366 | "np.shape(train_labels[0])" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 12, 372 | "metadata": {}, 373 | "outputs": [ 374 | { 375 | "data": { 376 | "text/plain": [ 377 | "array([ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])" 378 | ] 379 | }, 380 | "execution_count": 12, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "np_utils.to_categorical(y_synth_train[234][0],11)" 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "metadata": {}, 392 | "source": [ 393 | "#### Preprocessing Images for model" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "The function below will pre-process the images so that they can be handled by keras." 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 13, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [ 409 | "def prep_data_keras(img_data):\n", 410 | " \n", 411 | " #Reshaping data for keras, with tensorflow as backend\n", 412 | " img_data = img_data.reshape(len(img_data),64,64,1)\n", 413 | " \n", 414 | " #Converting everything to floats\n", 415 | " img_data = img_data.astype('float32')\n", 416 | " \n", 417 | " #Normalizing values between 0 and 1\n", 418 | " img_data /= 255\n", 419 | " \n", 420 | " return img_data" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 14, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "train_images = prep_data_keras(X_synth_train)\n", 430 | "test_images = prep_data_keras(X_synth_test)" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 15, 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "data": { 440 | "text/plain": [ 441 | "(60000, 64, 64, 1)" 442 | ] 443 | }, 444 | "execution_count": 15, 445 | "metadata": {}, 446 | "output_type": "execute_result" 447 | } 448 | ], 449 | "source": [ 450 | "np.shape(train_images)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": 16, 456 | "metadata": {}, 457 | "outputs": [ 458 | { 459 | "data": { 460 | "text/plain": [ 461 | "(10000, 64, 64, 1)" 462 | ] 463 | }, 464 | "execution_count": 16, 465 | "metadata": {}, 466 | "output_type": "execute_result" 467 | } 468 | ], 469 | "source": [ 470 | "np.shape(test_images)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "markdown", 475 | "metadata": {}, 476 | "source": [ 477 | "### Model Building" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": {}, 484 | "outputs": [], 485 | "source": [ 486 | "#Importing relevant keras modules\n", 487 | "from keras.models import Sequential, Model\n", 488 | "from keras.layers import Dense, Dropout, Activation, Flatten, Input\n", 489 | "from keras.layers import Convolution2D, MaxPooling2D" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "metadata": {}, 495 | "source": [ 496 | "We're going to use a Convolutional Neural Network for our network. \n", 497 | "\n", 498 | "Starting with a 2D Convolutional layer, we'll use ReLU activations after every Convolutional Layer. \n", 499 | "\n", 500 | "After the second CovLayer + ReLU, we'll add 2DMaxPooling, and a dropout to make the model robust to overfitting. A flattening layer will be added to make the data ready for classification layers, which were in the form of Dense Layers, of the same size as the no. of classes (11 for us), activated using softmax to give us the probability of each class." 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": null, 506 | "metadata": {}, 507 | "outputs": [ 508 | { 509 | "name": "stderr", 510 | "output_type": "stream", 511 | "text": [ 512 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:23: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), padding=\"same\")`\n", 513 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:25: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3))`\n", 514 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:46: UserWarning: Update your `Model` call to the Keras 2 API: `Model(inputs=Tensor(\"in..., outputs=[