├── Breast Cancer Analysis.ipynb
├── Cryptocurrency Market Analysis.ipynb
├── Digit Recognizer Using CNN.ipynb
├── Exploratory Data Analysis of House Prices.ipynb
├── Intro to BigQuery .ipynb
├── K_Nearest_Neighbors
├── K-Nearest Neighbors On Social Network Ads.ipynb
├── Social_Network_Ads.csv
├── classification_template.R
├── classification_template.py
├── knn.R
└── knn.py
├── Natural Language Processing Personal Notes.ipynb
├── R Practice [Under Construction].ipynb
├── README.md
├── Regression - Machine Learning.ipynb
├── Simple Linear Regression.ipynb
├── Stock Market Analysis for Tech Stocks.ipynb
├── data
├── breast_cancer.csv
├── house_prices_test.csv
├── house_prices_train.csv
├── kc_house_data.csv
├── linear_regression_test.csv
└── linear_regression_train.csv
├── digit_recognizer.ipynb
├── kaggle_titanic.ipynb
└── webscraping san diego apartments.ipynb
/Breast Cancer Analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "We are going to analyze breast cancer from Wisconsin. This dataset comes from the UCI Machine Learning Repository and can be found [here](https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29).\n",
8 | "\n",
9 | "Attribute Information on the dataset can be found [here](https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.names).\n",
10 | "***"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "# here we will import the libraries used for machine learning\n",
20 | "import numpy as np # linear algebra\n",
21 | "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv), data manipulation as in SQL\n",
22 | "import matplotlib.pyplot as plt # this is used for the plot the graph \n",
23 | "import seaborn as sns # used for plot interactive graph. I like it most for plot\n",
24 | "%matplotlib inline\n",
25 | "from sklearn.linear_model import LogisticRegression # to apply the Logistic regression\n",
26 | "from sklearn.model_selection import train_test_split # to split the data into two parts\n",
27 | "from sklearn.model_selection import GridSearchCV,KFold# for tuning parameter\n",
28 | "from sklearn.ensemble import RandomForestClassifier # for random forest classifier\n",
29 | "from sklearn.naive_bayes import GaussianNB\n",
30 | "from sklearn.neighbors import KNeighborsClassifier\n",
31 | "from sklearn.tree import DecisionTreeClassifier\n",
32 | "from sklearn import svm # for Support Vector Machine\n",
33 | "from sklearn import metrics # for the check the error and accuracy of the model\n",
34 | "# Any results you write to the current directory are saved as output.\n",
35 | "# dont worry about the error if its not working then insteda of model_selection we can use cross_validation"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 7,
41 | "metadata": {
42 | "collapsed": true
43 | },
44 | "outputs": [],
45 | "source": [
46 | "df = pd.read_csv('data/breast_cancer.csv',header = 0)"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 8,
52 | "metadata": {},
53 | "outputs": [
54 | {
55 | "data": {
56 | "text/html": [
57 | "
\n",
58 | "\n",
71 | "
\n",
72 | " \n",
73 | " \n",
74 | " | \n",
75 | " id | \n",
76 | " diagnosis | \n",
77 | " radius_mean | \n",
78 | " texture_mean | \n",
79 | " perimeter_mean | \n",
80 | " area_mean | \n",
81 | " smoothness_mean | \n",
82 | " compactness_mean | \n",
83 | " concavity_mean | \n",
84 | " concave points_mean | \n",
85 | " ... | \n",
86 | " texture_worst | \n",
87 | " perimeter_worst | \n",
88 | " area_worst | \n",
89 | " smoothness_worst | \n",
90 | " compactness_worst | \n",
91 | " concavity_worst | \n",
92 | " concave points_worst | \n",
93 | " symmetry_worst | \n",
94 | " fractal_dimension_worst | \n",
95 | " Unnamed: 32 | \n",
96 | "
\n",
97 | " \n",
98 | " \n",
99 | " \n",
100 | " 0 | \n",
101 | " 842302 | \n",
102 | " M | \n",
103 | " 17.99 | \n",
104 | " 10.38 | \n",
105 | " 122.80 | \n",
106 | " 1001.0 | \n",
107 | " 0.11840 | \n",
108 | " 0.27760 | \n",
109 | " 0.3001 | \n",
110 | " 0.14710 | \n",
111 | " ... | \n",
112 | " 17.33 | \n",
113 | " 184.60 | \n",
114 | " 2019.0 | \n",
115 | " 0.1622 | \n",
116 | " 0.6656 | \n",
117 | " 0.7119 | \n",
118 | " 0.2654 | \n",
119 | " 0.4601 | \n",
120 | " 0.11890 | \n",
121 | " NaN | \n",
122 | "
\n",
123 | " \n",
124 | " 1 | \n",
125 | " 842517 | \n",
126 | " M | \n",
127 | " 20.57 | \n",
128 | " 17.77 | \n",
129 | " 132.90 | \n",
130 | " 1326.0 | \n",
131 | " 0.08474 | \n",
132 | " 0.07864 | \n",
133 | " 0.0869 | \n",
134 | " 0.07017 | \n",
135 | " ... | \n",
136 | " 23.41 | \n",
137 | " 158.80 | \n",
138 | " 1956.0 | \n",
139 | " 0.1238 | \n",
140 | " 0.1866 | \n",
141 | " 0.2416 | \n",
142 | " 0.1860 | \n",
143 | " 0.2750 | \n",
144 | " 0.08902 | \n",
145 | " NaN | \n",
146 | "
\n",
147 | " \n",
148 | " 2 | \n",
149 | " 84300903 | \n",
150 | " M | \n",
151 | " 19.69 | \n",
152 | " 21.25 | \n",
153 | " 130.00 | \n",
154 | " 1203.0 | \n",
155 | " 0.10960 | \n",
156 | " 0.15990 | \n",
157 | " 0.1974 | \n",
158 | " 0.12790 | \n",
159 | " ... | \n",
160 | " 25.53 | \n",
161 | " 152.50 | \n",
162 | " 1709.0 | \n",
163 | " 0.1444 | \n",
164 | " 0.4245 | \n",
165 | " 0.4504 | \n",
166 | " 0.2430 | \n",
167 | " 0.3613 | \n",
168 | " 0.08758 | \n",
169 | " NaN | \n",
170 | "
\n",
171 | " \n",
172 | " 3 | \n",
173 | " 84348301 | \n",
174 | " M | \n",
175 | " 11.42 | \n",
176 | " 20.38 | \n",
177 | " 77.58 | \n",
178 | " 386.1 | \n",
179 | " 0.14250 | \n",
180 | " 0.28390 | \n",
181 | " 0.2414 | \n",
182 | " 0.10520 | \n",
183 | " ... | \n",
184 | " 26.50 | \n",
185 | " 98.87 | \n",
186 | " 567.7 | \n",
187 | " 0.2098 | \n",
188 | " 0.8663 | \n",
189 | " 0.6869 | \n",
190 | " 0.2575 | \n",
191 | " 0.6638 | \n",
192 | " 0.17300 | \n",
193 | " NaN | \n",
194 | "
\n",
195 | " \n",
196 | " 4 | \n",
197 | " 84358402 | \n",
198 | " M | \n",
199 | " 20.29 | \n",
200 | " 14.34 | \n",
201 | " 135.10 | \n",
202 | " 1297.0 | \n",
203 | " 0.10030 | \n",
204 | " 0.13280 | \n",
205 | " 0.1980 | \n",
206 | " 0.10430 | \n",
207 | " ... | \n",
208 | " 16.67 | \n",
209 | " 152.20 | \n",
210 | " 1575.0 | \n",
211 | " 0.1374 | \n",
212 | " 0.2050 | \n",
213 | " 0.4000 | \n",
214 | " 0.1625 | \n",
215 | " 0.2364 | \n",
216 | " 0.07678 | \n",
217 | " NaN | \n",
218 | "
\n",
219 | " \n",
220 | "
\n",
221 | "
5 rows × 33 columns
\n",
222 | "
"
223 | ],
224 | "text/plain": [
225 | " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
226 | "0 842302 M 17.99 10.38 122.80 1001.0 \n",
227 | "1 842517 M 20.57 17.77 132.90 1326.0 \n",
228 | "2 84300903 M 19.69 21.25 130.00 1203.0 \n",
229 | "3 84348301 M 11.42 20.38 77.58 386.1 \n",
230 | "4 84358402 M 20.29 14.34 135.10 1297.0 \n",
231 | "\n",
232 | " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
233 | "0 0.11840 0.27760 0.3001 0.14710 \n",
234 | "1 0.08474 0.07864 0.0869 0.07017 \n",
235 | "2 0.10960 0.15990 0.1974 0.12790 \n",
236 | "3 0.14250 0.28390 0.2414 0.10520 \n",
237 | "4 0.10030 0.13280 0.1980 0.10430 \n",
238 | "\n",
239 | " ... texture_worst perimeter_worst area_worst smoothness_worst \\\n",
240 | "0 ... 17.33 184.60 2019.0 0.1622 \n",
241 | "1 ... 23.41 158.80 1956.0 0.1238 \n",
242 | "2 ... 25.53 152.50 1709.0 0.1444 \n",
243 | "3 ... 26.50 98.87 567.7 0.2098 \n",
244 | "4 ... 16.67 152.20 1575.0 0.1374 \n",
245 | "\n",
246 | " compactness_worst concavity_worst concave points_worst symmetry_worst \\\n",
247 | "0 0.6656 0.7119 0.2654 0.4601 \n",
248 | "1 0.1866 0.2416 0.1860 0.2750 \n",
249 | "2 0.4245 0.4504 0.2430 0.3613 \n",
250 | "3 0.8663 0.6869 0.2575 0.6638 \n",
251 | "4 0.2050 0.4000 0.1625 0.2364 \n",
252 | "\n",
253 | " fractal_dimension_worst Unnamed: 32 \n",
254 | "0 0.11890 NaN \n",
255 | "1 0.08902 NaN \n",
256 | "2 0.08758 NaN \n",
257 | "3 0.17300 NaN \n",
258 | "4 0.07678 NaN \n",
259 | "\n",
260 | "[5 rows x 33 columns]"
261 | ]
262 | },
263 | "execution_count": 8,
264 | "metadata": {},
265 | "output_type": "execute_result"
266 | }
267 | ],
268 | "source": [
269 | "df.head()"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 9,
275 | "metadata": {},
276 | "outputs": [
277 | {
278 | "name": "stdout",
279 | "output_type": "stream",
280 | "text": [
281 | "\n",
282 | "RangeIndex: 569 entries, 0 to 568\n",
283 | "Data columns (total 33 columns):\n",
284 | "id 569 non-null int64\n",
285 | "diagnosis 569 non-null object\n",
286 | "radius_mean 569 non-null float64\n",
287 | "texture_mean 569 non-null float64\n",
288 | "perimeter_mean 569 non-null float64\n",
289 | "area_mean 569 non-null float64\n",
290 | "smoothness_mean 569 non-null float64\n",
291 | "compactness_mean 569 non-null float64\n",
292 | "concavity_mean 569 non-null float64\n",
293 | "concave points_mean 569 non-null float64\n",
294 | "symmetry_mean 569 non-null float64\n",
295 | "fractal_dimension_mean 569 non-null float64\n",
296 | "radius_se 569 non-null float64\n",
297 | "texture_se 569 non-null float64\n",
298 | "perimeter_se 569 non-null float64\n",
299 | "area_se 569 non-null float64\n",
300 | "smoothness_se 569 non-null float64\n",
301 | "compactness_se 569 non-null float64\n",
302 | "concavity_se 569 non-null float64\n",
303 | "concave points_se 569 non-null float64\n",
304 | "symmetry_se 569 non-null float64\n",
305 | "fractal_dimension_se 569 non-null float64\n",
306 | "radius_worst 569 non-null float64\n",
307 | "texture_worst 569 non-null float64\n",
308 | "perimeter_worst 569 non-null float64\n",
309 | "area_worst 569 non-null float64\n",
310 | "smoothness_worst 569 non-null float64\n",
311 | "compactness_worst 569 non-null float64\n",
312 | "concavity_worst 569 non-null float64\n",
313 | "concave points_worst 569 non-null float64\n",
314 | "symmetry_worst 569 non-null float64\n",
315 | "fractal_dimension_worst 569 non-null float64\n",
316 | "Unnamed: 32 0 non-null float64\n",
317 | "dtypes: float64(31), int64(1), object(1)\n",
318 | "memory usage: 146.8+ KB\n"
319 | ]
320 | }
321 | ],
322 | "source": [
323 | "df.info()"
324 | ]
325 | },
326 | {
327 | "cell_type": "markdown",
328 | "metadata": {},
329 | "source": [
330 | "***\n",
331 | "Seeing the data we can drop unnecessary columns. `id` and `unnamed` have no value for us."
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "execution_count": null,
337 | "metadata": {
338 | "collapsed": true
339 | },
340 | "outputs": [],
341 | "source": []
342 | }
343 | ],
344 | "metadata": {
345 | "kernelspec": {
346 | "display_name": "Python 3",
347 | "language": "python",
348 | "name": "python3"
349 | },
350 | "language_info": {
351 | "codemirror_mode": {
352 | "name": "ipython",
353 | "version": 3
354 | },
355 | "file_extension": ".py",
356 | "mimetype": "text/x-python",
357 | "name": "python",
358 | "nbconvert_exporter": "python",
359 | "pygments_lexer": "ipython3",
360 | "version": "3.6.3"
361 | }
362 | },
363 | "nbformat": 4,
364 | "nbformat_minor": 2
365 | }
366 |
--------------------------------------------------------------------------------
/Digit Recognizer Using CNN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "#importing necessary libraries\n",
12 | "\n",
13 | "import pandas as pd\n",
14 | "import numpy as np\n",
15 | "import matplotlib.pyplot as plt\n",
16 | "import matplotlib.image as mpimg\n",
17 | "import seaborn as sns\n",
18 | "%matplotlib inline\n",
19 | "\n",
20 | "np.random.seed(2)\n",
21 | "\n",
22 | "from sklearn.model_selection import train_test_split\n",
23 | "from sklearn.metrics import confusion_matrix\n",
24 | "import itertools\n",
25 | "\n",
26 | "from keras.utils.np_utils import to_categorical \n",
27 | "from keras.models import Sequential\n",
28 | "from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2d\n",
29 | "from keras.optimizers import RMSprop\n",
30 | "from keras.preprocessing.image import ImageDataGenerator\n",
31 | "from keras.callbacks import ReduceLROnPLateau\n",
32 | "\n",
33 | "sns.set(style='white',context='notebook',palette='deep')"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {
40 | "collapsed": true
41 | },
42 | "outputs": [],
43 | "source": [
44 | "#let's load our data\n",
45 | "train = pd.read_csv()\n",
46 | "test = pd.read_csv()\n"
47 | ]
48 | }
49 | ],
50 | "metadata": {
51 | "kernelspec": {
52 | "display_name": "Python 3",
53 | "language": "python",
54 | "name": "python3"
55 | },
56 | "language_info": {
57 | "codemirror_mode": {
58 | "name": "ipython",
59 | "version": 3
60 | },
61 | "file_extension": ".py",
62 | "mimetype": "text/x-python",
63 | "name": "python",
64 | "nbconvert_exporter": "python",
65 | "pygments_lexer": "ipython3",
66 | "version": "3.6.3"
67 | }
68 | },
69 | "nbformat": 4,
70 | "nbformat_minor": 2
71 | }
72 |
--------------------------------------------------------------------------------
/Intro to BigQuery .ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "from google.cloud import bigquery\n",
11 | "from bq_helper import BigQueryHelper"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "QUERY = \"\"\"\n",
21 | " SELECT\n",
22 | " extract(DAYOFYEAR from date_local) as day_of_year,\n",
23 | " aqi\n",
24 | " FROM\n",
25 | " `bigquery-public-data.epa_historical_air_quality.pm25_frm_daily_summary`\n",
26 | " WHERE\n",
27 | " city_name = \"Los Angeles\"\n",
28 | " AND state_name = \"California\"\n",
29 | " AND sample_duration = \"24 HOUR\"\n",
30 | " AND poc = 1\n",
31 | " AND EXTRACT(YEAR FROM date_local) = 2015\n",
32 | " ORDER BY day_of_year\n",
33 | " \"\"\""
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "metadata": {
40 | "scrolled": true
41 | },
42 | "outputs": [
43 | {
44 | "ename": "DefaultCredentialsError",
45 | "evalue": "Could not automatically determine credentials. Please set GOOGLE_APPLICATION_CREDENTIALS or\nexplicitly create credential and re-run the application. For more\ninformation, please see\nhttps://developers.google.com/accounts/docs/application-default-credentials.",
46 | "output_type": "error",
47 | "traceback": [
48 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
49 | "\u001b[0;31mDefaultCredentialsError\u001b[0m Traceback (most recent call last)",
50 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbq_assistant\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBigQueryHelper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"bigquery-public-data\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"epa_historical_air_quality\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
51 | "\u001b[0;32m~/src/bq-helper/bq_helper.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, active_project, dataset_name)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproject_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mactive_project\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset_name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbigquery\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mClient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dataset_ref\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproject_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
52 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/bigquery/client.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, project, credentials, _http)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcredentials\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_http\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 125\u001b[0m super(Client, self).__init__(\n\u001b[0;32m--> 126\u001b[0;31m project=project, credentials=credentials, _http=_http)\n\u001b[0m\u001b[1;32m 127\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_connection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mConnection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
53 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/client.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, project, credentials, _http)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcredentials\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_http\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 211\u001b[0;31m \u001b[0m_ClientProjectMixin\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 212\u001b[0m \u001b[0mClient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcredentials\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcredentials\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_http\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_http\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
54 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/client.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, project)\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_determine_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 166\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m raise EnvironmentError('Project was not passed and could not be '\n",
55 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/client.py\u001b[0m in \u001b[0;36m_determine_default\u001b[0;34m(project)\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_determine_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[0;34m\"\"\"Helper: use default project detection.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 178\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_determine_default_project\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 179\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
56 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/cloud/_helpers.py\u001b[0m in \u001b[0;36m_determine_default_project\u001b[0;34m(project)\u001b[0m\n\u001b[1;32m 177\u001b[0m \"\"\"\n\u001b[1;32m 178\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 179\u001b[0;31m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauth\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 180\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
57 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/google/auth/_default.py\u001b[0m in \u001b[0;36mdefault\u001b[0;34m(scopes, request)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcredentials\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meffective_project_id\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDefaultCredentialsError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_HELP_MESSAGE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
58 | "\u001b[0;31mDefaultCredentialsError\u001b[0m: Could not automatically determine credentials. Please set GOOGLE_APPLICATION_CREDENTIALS or\nexplicitly create credential and re-run the application. For more\ninformation, please see\nhttps://developers.google.com/accounts/docs/application-default-credentials."
59 | ]
60 | }
61 | ],
62 | "source": [
63 | "bq_assistant = BigQueryHelper(\"bigquery-public-data\", \"epa_historical_air_quality\")"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "df = bq_assistant.query_to_pandas(QUERY)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "df.plot(x='day_of_year', y='aqi', style='.');"
82 | ]
83 | }
84 | ],
85 | "metadata": {
86 | "kernelspec": {
87 | "display_name": "Python 3",
88 | "language": "python",
89 | "name": "python3"
90 | },
91 | "language_info": {
92 | "codemirror_mode": {
93 | "name": "ipython",
94 | "version": 3
95 | },
96 | "file_extension": ".py",
97 | "mimetype": "text/x-python",
98 | "name": "python",
99 | "nbconvert_exporter": "python",
100 | "pygments_lexer": "ipython3",
101 | "version": "3.6.4"
102 | }
103 | },
104 | "nbformat": 4,
105 | "nbformat_minor": 2
106 | }
107 |
--------------------------------------------------------------------------------
/K_Nearest_Neighbors/Social_Network_Ads.csv:
--------------------------------------------------------------------------------
1 | User ID,Gender,Age,EstimatedSalary,Purchased
2 | 15624510,Male,19,19000,0
3 | 15810944,Male,35,20000,0
4 | 15668575,Female,26,43000,0
5 | 15603246,Female,27,57000,0
6 | 15804002,Male,19,76000,0
7 | 15728773,Male,27,58000,0
8 | 15598044,Female,27,84000,0
9 | 15694829,Female,32,150000,1
10 | 15600575,Male,25,33000,0
11 | 15727311,Female,35,65000,0
12 | 15570769,Female,26,80000,0
13 | 15606274,Female,26,52000,0
14 | 15746139,Male,20,86000,0
15 | 15704987,Male,32,18000,0
16 | 15628972,Male,18,82000,0
17 | 15697686,Male,29,80000,0
18 | 15733883,Male,47,25000,1
19 | 15617482,Male,45,26000,1
20 | 15704583,Male,46,28000,1
21 | 15621083,Female,48,29000,1
22 | 15649487,Male,45,22000,1
23 | 15736760,Female,47,49000,1
24 | 15714658,Male,48,41000,1
25 | 15599081,Female,45,22000,1
26 | 15705113,Male,46,23000,1
27 | 15631159,Male,47,20000,1
28 | 15792818,Male,49,28000,1
29 | 15633531,Female,47,30000,1
30 | 15744529,Male,29,43000,0
31 | 15669656,Male,31,18000,0
32 | 15581198,Male,31,74000,0
33 | 15729054,Female,27,137000,1
34 | 15573452,Female,21,16000,0
35 | 15776733,Female,28,44000,0
36 | 15724858,Male,27,90000,0
37 | 15713144,Male,35,27000,0
38 | 15690188,Female,33,28000,0
39 | 15689425,Male,30,49000,0
40 | 15671766,Female,26,72000,0
41 | 15782806,Female,27,31000,0
42 | 15764419,Female,27,17000,0
43 | 15591915,Female,33,51000,0
44 | 15772798,Male,35,108000,0
45 | 15792008,Male,30,15000,0
46 | 15715541,Female,28,84000,0
47 | 15639277,Male,23,20000,0
48 | 15798850,Male,25,79000,0
49 | 15776348,Female,27,54000,0
50 | 15727696,Male,30,135000,1
51 | 15793813,Female,31,89000,0
52 | 15694395,Female,24,32000,0
53 | 15764195,Female,18,44000,0
54 | 15744919,Female,29,83000,0
55 | 15671655,Female,35,23000,0
56 | 15654901,Female,27,58000,0
57 | 15649136,Female,24,55000,0
58 | 15775562,Female,23,48000,0
59 | 15807481,Male,28,79000,0
60 | 15642885,Male,22,18000,0
61 | 15789109,Female,32,117000,0
62 | 15814004,Male,27,20000,0
63 | 15673619,Male,25,87000,0
64 | 15595135,Female,23,66000,0
65 | 15583681,Male,32,120000,1
66 | 15605000,Female,59,83000,0
67 | 15718071,Male,24,58000,0
68 | 15679760,Male,24,19000,0
69 | 15654574,Female,23,82000,0
70 | 15577178,Female,22,63000,0
71 | 15595324,Female,31,68000,0
72 | 15756932,Male,25,80000,0
73 | 15726358,Female,24,27000,0
74 | 15595228,Female,20,23000,0
75 | 15782530,Female,33,113000,0
76 | 15592877,Male,32,18000,0
77 | 15651983,Male,34,112000,1
78 | 15746737,Male,18,52000,0
79 | 15774179,Female,22,27000,0
80 | 15667265,Female,28,87000,0
81 | 15655123,Female,26,17000,0
82 | 15595917,Male,30,80000,0
83 | 15668385,Male,39,42000,0
84 | 15709476,Male,20,49000,0
85 | 15711218,Male,35,88000,0
86 | 15798659,Female,30,62000,0
87 | 15663939,Female,31,118000,1
88 | 15694946,Male,24,55000,0
89 | 15631912,Female,28,85000,0
90 | 15768816,Male,26,81000,0
91 | 15682268,Male,35,50000,0
92 | 15684801,Male,22,81000,0
93 | 15636428,Female,30,116000,0
94 | 15809823,Male,26,15000,0
95 | 15699284,Female,29,28000,0
96 | 15786993,Female,29,83000,0
97 | 15709441,Female,35,44000,0
98 | 15710257,Female,35,25000,0
99 | 15582492,Male,28,123000,1
100 | 15575694,Male,35,73000,0
101 | 15756820,Female,28,37000,0
102 | 15766289,Male,27,88000,0
103 | 15593014,Male,28,59000,0
104 | 15584545,Female,32,86000,0
105 | 15675949,Female,33,149000,1
106 | 15672091,Female,19,21000,0
107 | 15801658,Male,21,72000,0
108 | 15706185,Female,26,35000,0
109 | 15789863,Male,27,89000,0
110 | 15720943,Male,26,86000,0
111 | 15697997,Female,38,80000,0
112 | 15665416,Female,39,71000,0
113 | 15660200,Female,37,71000,0
114 | 15619653,Male,38,61000,0
115 | 15773447,Male,37,55000,0
116 | 15739160,Male,42,80000,0
117 | 15689237,Male,40,57000,0
118 | 15679297,Male,35,75000,0
119 | 15591433,Male,36,52000,0
120 | 15642725,Male,40,59000,0
121 | 15701962,Male,41,59000,0
122 | 15811613,Female,36,75000,0
123 | 15741049,Male,37,72000,0
124 | 15724423,Female,40,75000,0
125 | 15574305,Male,35,53000,0
126 | 15678168,Female,41,51000,0
127 | 15697020,Female,39,61000,0
128 | 15610801,Male,42,65000,0
129 | 15745232,Male,26,32000,0
130 | 15722758,Male,30,17000,0
131 | 15792102,Female,26,84000,0
132 | 15675185,Male,31,58000,0
133 | 15801247,Male,33,31000,0
134 | 15725660,Male,30,87000,0
135 | 15638963,Female,21,68000,0
136 | 15800061,Female,28,55000,0
137 | 15578006,Male,23,63000,0
138 | 15668504,Female,20,82000,0
139 | 15687491,Male,30,107000,1
140 | 15610403,Female,28,59000,0
141 | 15741094,Male,19,25000,0
142 | 15807909,Male,19,85000,0
143 | 15666141,Female,18,68000,0
144 | 15617134,Male,35,59000,0
145 | 15783029,Male,30,89000,0
146 | 15622833,Female,34,25000,0
147 | 15746422,Female,24,89000,0
148 | 15750839,Female,27,96000,1
149 | 15749130,Female,41,30000,0
150 | 15779862,Male,29,61000,0
151 | 15767871,Male,20,74000,0
152 | 15679651,Female,26,15000,0
153 | 15576219,Male,41,45000,0
154 | 15699247,Male,31,76000,0
155 | 15619087,Female,36,50000,0
156 | 15605327,Male,40,47000,0
157 | 15610140,Female,31,15000,0
158 | 15791174,Male,46,59000,0
159 | 15602373,Male,29,75000,0
160 | 15762605,Male,26,30000,0
161 | 15598840,Female,32,135000,1
162 | 15744279,Male,32,100000,1
163 | 15670619,Male,25,90000,0
164 | 15599533,Female,37,33000,0
165 | 15757837,Male,35,38000,0
166 | 15697574,Female,33,69000,0
167 | 15578738,Female,18,86000,0
168 | 15762228,Female,22,55000,0
169 | 15614827,Female,35,71000,0
170 | 15789815,Male,29,148000,1
171 | 15579781,Female,29,47000,0
172 | 15587013,Male,21,88000,0
173 | 15570932,Male,34,115000,0
174 | 15794661,Female,26,118000,0
175 | 15581654,Female,34,43000,0
176 | 15644296,Female,34,72000,0
177 | 15614420,Female,23,28000,0
178 | 15609653,Female,35,47000,0
179 | 15594577,Male,25,22000,0
180 | 15584114,Male,24,23000,0
181 | 15673367,Female,31,34000,0
182 | 15685576,Male,26,16000,0
183 | 15774727,Female,31,71000,0
184 | 15694288,Female,32,117000,1
185 | 15603319,Male,33,43000,0
186 | 15759066,Female,33,60000,0
187 | 15814816,Male,31,66000,0
188 | 15724402,Female,20,82000,0
189 | 15571059,Female,33,41000,0
190 | 15674206,Male,35,72000,0
191 | 15715160,Male,28,32000,0
192 | 15730448,Male,24,84000,0
193 | 15662067,Female,19,26000,0
194 | 15779581,Male,29,43000,0
195 | 15662901,Male,19,70000,0
196 | 15689751,Male,28,89000,0
197 | 15667742,Male,34,43000,0
198 | 15738448,Female,30,79000,0
199 | 15680243,Female,20,36000,0
200 | 15745083,Male,26,80000,0
201 | 15708228,Male,35,22000,0
202 | 15628523,Male,35,39000,0
203 | 15708196,Male,49,74000,0
204 | 15735549,Female,39,134000,1
205 | 15809347,Female,41,71000,0
206 | 15660866,Female,58,101000,1
207 | 15766609,Female,47,47000,0
208 | 15654230,Female,55,130000,1
209 | 15794566,Female,52,114000,0
210 | 15800890,Female,40,142000,1
211 | 15697424,Female,46,22000,0
212 | 15724536,Female,48,96000,1
213 | 15735878,Male,52,150000,1
214 | 15707596,Female,59,42000,0
215 | 15657163,Male,35,58000,0
216 | 15622478,Male,47,43000,0
217 | 15779529,Female,60,108000,1
218 | 15636023,Male,49,65000,0
219 | 15582066,Male,40,78000,0
220 | 15666675,Female,46,96000,0
221 | 15732987,Male,59,143000,1
222 | 15789432,Female,41,80000,0
223 | 15663161,Male,35,91000,1
224 | 15694879,Male,37,144000,1
225 | 15593715,Male,60,102000,1
226 | 15575002,Female,35,60000,0
227 | 15622171,Male,37,53000,0
228 | 15795224,Female,36,126000,1
229 | 15685346,Male,56,133000,1
230 | 15691808,Female,40,72000,0
231 | 15721007,Female,42,80000,1
232 | 15794253,Female,35,147000,1
233 | 15694453,Male,39,42000,0
234 | 15813113,Male,40,107000,1
235 | 15614187,Male,49,86000,1
236 | 15619407,Female,38,112000,0
237 | 15646227,Male,46,79000,1
238 | 15660541,Male,40,57000,0
239 | 15753874,Female,37,80000,0
240 | 15617877,Female,46,82000,0
241 | 15772073,Female,53,143000,1
242 | 15701537,Male,42,149000,1
243 | 15736228,Male,38,59000,0
244 | 15780572,Female,50,88000,1
245 | 15769596,Female,56,104000,1
246 | 15586996,Female,41,72000,0
247 | 15722061,Female,51,146000,1
248 | 15638003,Female,35,50000,0
249 | 15775590,Female,57,122000,1
250 | 15730688,Male,41,52000,0
251 | 15753102,Female,35,97000,1
252 | 15810075,Female,44,39000,0
253 | 15723373,Male,37,52000,0
254 | 15795298,Female,48,134000,1
255 | 15584320,Female,37,146000,1
256 | 15724161,Female,50,44000,0
257 | 15750056,Female,52,90000,1
258 | 15609637,Female,41,72000,0
259 | 15794493,Male,40,57000,0
260 | 15569641,Female,58,95000,1
261 | 15815236,Female,45,131000,1
262 | 15811177,Female,35,77000,0
263 | 15680587,Male,36,144000,1
264 | 15672821,Female,55,125000,1
265 | 15767681,Female,35,72000,0
266 | 15600379,Male,48,90000,1
267 | 15801336,Female,42,108000,1
268 | 15721592,Male,40,75000,0
269 | 15581282,Male,37,74000,0
270 | 15746203,Female,47,144000,1
271 | 15583137,Male,40,61000,0
272 | 15680752,Female,43,133000,0
273 | 15688172,Female,59,76000,1
274 | 15791373,Male,60,42000,1
275 | 15589449,Male,39,106000,1
276 | 15692819,Female,57,26000,1
277 | 15727467,Male,57,74000,1
278 | 15734312,Male,38,71000,0
279 | 15764604,Male,49,88000,1
280 | 15613014,Female,52,38000,1
281 | 15759684,Female,50,36000,1
282 | 15609669,Female,59,88000,1
283 | 15685536,Male,35,61000,0
284 | 15750447,Male,37,70000,1
285 | 15663249,Female,52,21000,1
286 | 15638646,Male,48,141000,0
287 | 15734161,Female,37,93000,1
288 | 15631070,Female,37,62000,0
289 | 15761950,Female,48,138000,1
290 | 15649668,Male,41,79000,0
291 | 15713912,Female,37,78000,1
292 | 15586757,Male,39,134000,1
293 | 15596522,Male,49,89000,1
294 | 15625395,Male,55,39000,1
295 | 15760570,Male,37,77000,0
296 | 15566689,Female,35,57000,0
297 | 15725794,Female,36,63000,0
298 | 15673539,Male,42,73000,1
299 | 15705298,Female,43,112000,1
300 | 15675791,Male,45,79000,0
301 | 15747043,Male,46,117000,1
302 | 15736397,Female,58,38000,1
303 | 15678201,Male,48,74000,1
304 | 15720745,Female,37,137000,1
305 | 15637593,Male,37,79000,1
306 | 15598070,Female,40,60000,0
307 | 15787550,Male,42,54000,0
308 | 15603942,Female,51,134000,0
309 | 15733973,Female,47,113000,1
310 | 15596761,Male,36,125000,1
311 | 15652400,Female,38,50000,0
312 | 15717893,Female,42,70000,0
313 | 15622585,Male,39,96000,1
314 | 15733964,Female,38,50000,0
315 | 15753861,Female,49,141000,1
316 | 15747097,Female,39,79000,0
317 | 15594762,Female,39,75000,1
318 | 15667417,Female,54,104000,1
319 | 15684861,Male,35,55000,0
320 | 15742204,Male,45,32000,1
321 | 15623502,Male,36,60000,0
322 | 15774872,Female,52,138000,1
323 | 15611191,Female,53,82000,1
324 | 15674331,Male,41,52000,0
325 | 15619465,Female,48,30000,1
326 | 15575247,Female,48,131000,1
327 | 15695679,Female,41,60000,0
328 | 15713463,Male,41,72000,0
329 | 15785170,Female,42,75000,0
330 | 15796351,Male,36,118000,1
331 | 15639576,Female,47,107000,1
332 | 15693264,Male,38,51000,0
333 | 15589715,Female,48,119000,1
334 | 15769902,Male,42,65000,0
335 | 15587177,Male,40,65000,0
336 | 15814553,Male,57,60000,1
337 | 15601550,Female,36,54000,0
338 | 15664907,Male,58,144000,1
339 | 15612465,Male,35,79000,0
340 | 15810800,Female,38,55000,0
341 | 15665760,Male,39,122000,1
342 | 15588080,Female,53,104000,1
343 | 15776844,Male,35,75000,0
344 | 15717560,Female,38,65000,0
345 | 15629739,Female,47,51000,1
346 | 15729908,Male,47,105000,1
347 | 15716781,Female,41,63000,0
348 | 15646936,Male,53,72000,1
349 | 15768151,Female,54,108000,1
350 | 15579212,Male,39,77000,0
351 | 15721835,Male,38,61000,0
352 | 15800515,Female,38,113000,1
353 | 15591279,Male,37,75000,0
354 | 15587419,Female,42,90000,1
355 | 15750335,Female,37,57000,0
356 | 15699619,Male,36,99000,1
357 | 15606472,Male,60,34000,1
358 | 15778368,Male,54,70000,1
359 | 15671387,Female,41,72000,0
360 | 15573926,Male,40,71000,1
361 | 15709183,Male,42,54000,0
362 | 15577514,Male,43,129000,1
363 | 15778830,Female,53,34000,1
364 | 15768072,Female,47,50000,1
365 | 15768293,Female,42,79000,0
366 | 15654456,Male,42,104000,1
367 | 15807525,Female,59,29000,1
368 | 15574372,Female,58,47000,1
369 | 15671249,Male,46,88000,1
370 | 15779744,Male,38,71000,0
371 | 15624755,Female,54,26000,1
372 | 15611430,Female,60,46000,1
373 | 15774744,Male,60,83000,1
374 | 15629885,Female,39,73000,0
375 | 15708791,Male,59,130000,1
376 | 15793890,Female,37,80000,0
377 | 15646091,Female,46,32000,1
378 | 15596984,Female,46,74000,0
379 | 15800215,Female,42,53000,0
380 | 15577806,Male,41,87000,1
381 | 15749381,Female,58,23000,1
382 | 15683758,Male,42,64000,0
383 | 15670615,Male,48,33000,1
384 | 15715622,Female,44,139000,1
385 | 15707634,Male,49,28000,1
386 | 15806901,Female,57,33000,1
387 | 15775335,Male,56,60000,1
388 | 15724150,Female,49,39000,1
389 | 15627220,Male,39,71000,0
390 | 15672330,Male,47,34000,1
391 | 15668521,Female,48,35000,1
392 | 15807837,Male,48,33000,1
393 | 15592570,Male,47,23000,1
394 | 15748589,Female,45,45000,1
395 | 15635893,Male,60,42000,1
396 | 15757632,Female,39,59000,0
397 | 15691863,Female,46,41000,1
398 | 15706071,Male,51,23000,1
399 | 15654296,Female,50,20000,1
400 | 15755018,Male,36,33000,0
401 | 15594041,Female,49,36000,1
--------------------------------------------------------------------------------
/K_Nearest_Neighbors/classification_template.R:
--------------------------------------------------------------------------------
1 | # Classification template
2 |
3 | # Importing the dataset
4 | dataset = read.csv('Social_Network_Ads.csv')
5 | dataset = dataset[3:5]
6 |
7 | # Encoding the target feature as factor
8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))
9 |
10 | # Splitting the dataset into the Training set and Test set
11 | # install.packages('caTools')
12 | library(caTools)
13 | set.seed(123)
14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75)
15 | training_set = subset(dataset, split == TRUE)
16 | test_set = subset(dataset, split == FALSE)
17 |
18 | # Feature Scaling
19 | training_set[-3] = scale(training_set[-3])
20 | test_set[-3] = scale(test_set[-3])
21 |
22 | # Fitting classifier to the Training set
23 | # Create your classifier here
24 |
25 | # Predicting the Test set results
26 | y_pred = predict(classifier, newdata = test_set[-3])
27 |
28 | # Making the Confusion Matrix
29 | cm = table(test_set[, 3], y_pred)
30 |
31 | # Visualising the Training set results
32 | library(ElemStatLearn)
33 | set = training_set
34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
36 | grid_set = expand.grid(X1, X2)
37 | colnames(grid_set) = c('Age', 'EstimatedSalary')
38 | y_grid = predict(classifier, newdata = grid_set)
39 | plot(set[, -3],
40 | main = 'Classifier (Training set)',
41 | xlab = 'Age', ylab = 'Estimated Salary',
42 | xlim = range(X1), ylim = range(X2))
43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
46 |
47 | # Visualising the Test set results
48 | library(ElemStatLearn)
49 | set = test_set
50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
52 | grid_set = expand.grid(X1, X2)
53 | colnames(grid_set) = c('Age', 'EstimatedSalary')
54 | y_grid = predict(classifier, newdata = grid_set)
55 | plot(set[, -3], main = 'Classifier (Test set)',
56 | xlab = 'Age', ylab = 'Estimated Salary',
57 | xlim = range(X1), ylim = range(X2))
58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
--------------------------------------------------------------------------------
/K_Nearest_Neighbors/classification_template.py:
--------------------------------------------------------------------------------
1 | # Classification template
2 |
3 | # Importing the libraries
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 | import pandas as pd
7 |
8 | # Importing the dataset
9 | dataset = pd.read_csv('Social_Network_Ads.csv')
10 | X = dataset.iloc[:, [2, 3]].values
11 | y = dataset.iloc[:, 4].values
12 |
13 | # Splitting the dataset into the Training set and Test set
14 | from sklearn.cross_validation import train_test_split
15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
16 |
17 | # Feature Scaling
18 | from sklearn.preprocessing import StandardScaler
19 | sc = StandardScaler()
20 | X_train = sc.fit_transform(X_train)
21 | X_test = sc.transform(X_test)
22 |
23 | # Fitting classifier to the Training set
24 | # Create your classifier here
25 |
26 | # Predicting the Test set results
27 | y_pred = classifier.predict(X_test)
28 |
29 | # Making the Confusion Matrix
30 | from sklearn.metrics import confusion_matrix
31 | cm = confusion_matrix(y_test, y_pred)
32 |
33 | # Visualising the Training set results
34 | from matplotlib.colors import ListedColormap
35 | X_set, y_set = X_train, y_train
36 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
37 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
38 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
39 | alpha = 0.75, cmap = ListedColormap(('red', 'green')))
40 | plt.xlim(X1.min(), X1.max())
41 | plt.ylim(X2.min(), X2.max())
42 | for i, j in enumerate(np.unique(y_set)):
43 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
44 | c = ListedColormap(('red', 'green'))(i), label = j)
45 | plt.title('Classifier (Training set)')
46 | plt.xlabel('Age')
47 | plt.ylabel('Estimated Salary')
48 | plt.legend()
49 | plt.show()
50 |
51 | # Visualising the Test set results
52 | from matplotlib.colors import ListedColormap
53 | X_set, y_set = X_test, y_test
54 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
55 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
56 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
57 | alpha = 0.75, cmap = ListedColormap(('red', 'green')))
58 | plt.xlim(X1.min(), X1.max())
59 | plt.ylim(X2.min(), X2.max())
60 | for i, j in enumerate(np.unique(y_set)):
61 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
62 | c = ListedColormap(('red', 'green'))(i), label = j)
63 | plt.title('Classifier (Test set)')
64 | plt.xlabel('Age')
65 | plt.ylabel('Estimated Salary')
66 | plt.legend()
67 | plt.show()
--------------------------------------------------------------------------------
/K_Nearest_Neighbors/knn.R:
--------------------------------------------------------------------------------
1 | # K-Nearest Neighbors (K-NN)
2 |
3 | # Importing the dataset
4 | dataset = read.csv('Social_Network_Ads.csv')
5 | dataset = dataset[3:5]
6 |
7 | # Encoding the target feature as factor
8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))
9 |
10 | # Splitting the dataset into the Training set and Test set
11 | # install.packages('caTools')
12 | library(caTools)
13 | set.seed(123)
14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75)
15 | training_set = subset(dataset, split == TRUE)
16 | test_set = subset(dataset, split == FALSE)
17 |
18 | # Feature Scaling
19 | training_set[-3] = scale(training_set[-3])
20 | test_set[-3] = scale(test_set[-3])
21 |
22 | # Fitting K-NN to the Training set and Predicting the Test set results
23 | library(class)
24 | y_pred = knn(train = training_set[, -3],
25 | test = test_set[, -3],
26 | cl = training_set[, 3],
27 | k = 5,
28 | prob = TRUE)
29 |
30 | # Making the Confusion Matrix
31 | cm = table(test_set[, 3], y_pred)
32 |
33 | # Visualising the Training set results
34 | library(ElemStatLearn)
35 | set = training_set
36 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
37 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
38 | grid_set = expand.grid(X1, X2)
39 | colnames(grid_set) = c('Age', 'EstimatedSalary')
40 | y_grid = knn(train = training_set[, -3], test = grid_set, cl = training_set[, 3], k = 5)
41 | plot(set[, -3],
42 | main = 'K-NN (Training set)',
43 | xlab = 'Age', ylab = 'Estimated Salary',
44 | xlim = range(X1), ylim = range(X2))
45 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
46 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
47 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
48 |
49 | # Visualising the Test set results
50 | library(ElemStatLearn)
51 | set = test_set
52 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
53 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
54 | grid_set = expand.grid(X1, X2)
55 | colnames(grid_set) = c('Age', 'EstimatedSalary')
56 | y_grid = knn(train = training_set[, -3], test = grid_set, cl = training_set[, 3], k = 5)
57 | plot(set[, -3],
58 | main = 'K-NN (Test set)',
59 | xlab = 'Age', ylab = 'Estimated Salary',
60 | xlim = range(X1), ylim = range(X2))
61 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
62 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
63 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
--------------------------------------------------------------------------------
/K_Nearest_Neighbors/knn.py:
--------------------------------------------------------------------------------
1 | # K-Nearest Neighbors (K-NN)
2 |
3 | # Importing the libraries
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 | import pandas as pd
7 |
8 | # Importing the dataset
9 | dataset = pd.read_csv('Social_Network_Ads.csv')
10 | X = dataset.iloc[:, [2, 3]].values
11 | y = dataset.iloc[:, 4].values
12 |
13 | # Splitting the dataset into the Training set and Test set
14 | from sklearn.cross_validation import train_test_split
15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
16 |
17 | # Feature Scaling
18 | from sklearn.preprocessing import StandardScaler
19 | sc = StandardScaler()
20 | X_train = sc.fit_transform(X_train)
21 | X_test = sc.transform(X_test)
22 |
23 | # Fitting K-NN to the Training set
24 | from sklearn.neighbors import KNeighborsClassifier
25 | classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
26 | classifier.fit(X_train, y_train)
27 |
28 | # Predicting the Test set results
29 | y_pred = classifier.predict(X_test)
30 |
31 | # Making the Confusion Matrix
32 | from sklearn.metrics import confusion_matrix
33 | cm = confusion_matrix(y_test, y_pred)
34 |
35 | # Visualising the Training set results
36 | from matplotlib.colors import ListedColormap
37 | X_set, y_set = X_train, y_train
38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
41 | alpha = 0.75, cmap = ListedColormap(('red', 'green')))
42 | plt.xlim(X1.min(), X1.max())
43 | plt.ylim(X2.min(), X2.max())
44 | for i, j in enumerate(np.unique(y_set)):
45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
46 | c = ListedColormap(('red', 'green'))(i), label = j)
47 | plt.title('K-NN (Training set)')
48 | plt.xlabel('Age')
49 | plt.ylabel('Estimated Salary')
50 | plt.legend()
51 | plt.show()
52 |
53 | # Visualising the Test set results
54 | from matplotlib.colors import ListedColormap
55 | X_set, y_set = X_test, y_test
56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
59 | alpha = 0.75, cmap = ListedColormap(('red', 'green')))
60 | plt.xlim(X1.min(), X1.max())
61 | plt.ylim(X2.min(), X2.max())
62 | for i, j in enumerate(np.unique(y_set)):
63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
64 | c = ListedColormap(('red', 'green'))(i), label = j)
65 | plt.title('K-NN (Test set)')
66 | plt.xlabel('Age')
67 | plt.ylabel('Estimated Salary')
68 | plt.legend()
69 | plt.show()
--------------------------------------------------------------------------------
/Natural Language Processing Personal Notes.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import nltk"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "Download necessary packages."
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "nltk.download()"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "## Part 1 - Tokenizing\n",
31 | "Form of grouping\n",
32 | " Word and sentence tokenizers\n",
33 | " Corpora - body of text, ex: medical journals, presidential speeches, English language\n",
34 | " Lexicon - words and their meainings\n",
35 | " ex: investor speak vs regular speak"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 3,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "from nltk.tokenize import sent_tokenize, word_tokenize"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 4,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "example_text = \"Hello there, how are you doing today? The weather is great and Python is awesome. The sky is pink\""
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "Seperate sentences using sent tokenize"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 5,
66 | "metadata": {
67 | "scrolled": false
68 | },
69 | "outputs": [
70 | {
71 | "name": "stdout",
72 | "output_type": "stream",
73 | "text": [
74 | "['Hello there, how are you doing today?', 'The weather is great and Python is awesome.', 'The sky is pink']\n"
75 | ]
76 | }
77 | ],
78 | "source": [
79 | "print(sent_tokenize(example_text))"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "Seperate words using word tokenize"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 6,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "name": "stdout",
96 | "output_type": "stream",
97 | "text": [
98 | "['Hello', 'there', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', 'and', 'Python', 'is', 'awesome', '.', 'The', 'sky', 'is', 'pink']\n"
99 | ]
100 | }
101 | ],
102 | "source": [
103 | "print(word_tokenize(example_text))"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "Print out a list"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 7,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "Hello\n",
123 | "there\n",
124 | ",\n",
125 | "how\n",
126 | "are\n",
127 | "you\n",
128 | "doing\n",
129 | "today\n",
130 | "?\n",
131 | "The\n",
132 | "weather\n",
133 | "is\n",
134 | "great\n",
135 | "and\n",
136 | "Python\n",
137 | "is\n",
138 | "awesome\n",
139 | ".\n",
140 | "The\n",
141 | "sky\n",
142 | "is\n",
143 | "pink\n"
144 | ]
145 | }
146 | ],
147 | "source": [
148 | "for i in word_tokenize(example_text):\n",
149 | " print(i)"
150 | ]
151 | },
152 | {
153 | "cell_type": "markdown",
154 | "metadata": {},
155 | "source": [
156 | "### Part 2 - Stop Words\n",
157 | "What is stop words? Words that you want to filter out of an analysis. Filler words basically, rendering them useless."
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 8,
163 | "metadata": {},
164 | "outputs": [],
165 | "source": [
166 | "from nltk.corpus import stopwords\n",
167 | "from nltk.tokenize import word_tokenize"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 9,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "example_sentence = \"This is an example showing off stop word filtration.\"\n",
177 | "#set of stop words already predfined by nltk\n",
178 | "stop_words = set(stopwords.words(\"english\")) "
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 10,
184 | "metadata": {},
185 | "outputs": [
186 | {
187 | "data": {
188 | "text/plain": [
189 | "{'a',\n",
190 | " 'about',\n",
191 | " 'above',\n",
192 | " 'after',\n",
193 | " 'again',\n",
194 | " 'against',\n",
195 | " 'ain',\n",
196 | " 'all',\n",
197 | " 'am',\n",
198 | " 'an',\n",
199 | " 'and',\n",
200 | " 'any',\n",
201 | " 'are',\n",
202 | " 'aren',\n",
203 | " \"aren't\",\n",
204 | " 'as',\n",
205 | " 'at',\n",
206 | " 'be',\n",
207 | " 'because',\n",
208 | " 'been',\n",
209 | " 'before',\n",
210 | " 'being',\n",
211 | " 'below',\n",
212 | " 'between',\n",
213 | " 'both',\n",
214 | " 'but',\n",
215 | " 'by',\n",
216 | " 'can',\n",
217 | " 'couldn',\n",
218 | " \"couldn't\",\n",
219 | " 'd',\n",
220 | " 'did',\n",
221 | " 'didn',\n",
222 | " \"didn't\",\n",
223 | " 'do',\n",
224 | " 'does',\n",
225 | " 'doesn',\n",
226 | " \"doesn't\",\n",
227 | " 'doing',\n",
228 | " 'don',\n",
229 | " \"don't\",\n",
230 | " 'down',\n",
231 | " 'during',\n",
232 | " 'each',\n",
233 | " 'few',\n",
234 | " 'for',\n",
235 | " 'from',\n",
236 | " 'further',\n",
237 | " 'had',\n",
238 | " 'hadn',\n",
239 | " \"hadn't\",\n",
240 | " 'has',\n",
241 | " 'hasn',\n",
242 | " \"hasn't\",\n",
243 | " 'have',\n",
244 | " 'haven',\n",
245 | " \"haven't\",\n",
246 | " 'having',\n",
247 | " 'he',\n",
248 | " 'her',\n",
249 | " 'here',\n",
250 | " 'hers',\n",
251 | " 'herself',\n",
252 | " 'him',\n",
253 | " 'himself',\n",
254 | " 'his',\n",
255 | " 'how',\n",
256 | " 'i',\n",
257 | " 'if',\n",
258 | " 'in',\n",
259 | " 'into',\n",
260 | " 'is',\n",
261 | " 'isn',\n",
262 | " \"isn't\",\n",
263 | " 'it',\n",
264 | " \"it's\",\n",
265 | " 'its',\n",
266 | " 'itself',\n",
267 | " 'just',\n",
268 | " 'll',\n",
269 | " 'm',\n",
270 | " 'ma',\n",
271 | " 'me',\n",
272 | " 'mightn',\n",
273 | " \"mightn't\",\n",
274 | " 'more',\n",
275 | " 'most',\n",
276 | " 'mustn',\n",
277 | " \"mustn't\",\n",
278 | " 'my',\n",
279 | " 'myself',\n",
280 | " 'needn',\n",
281 | " \"needn't\",\n",
282 | " 'no',\n",
283 | " 'nor',\n",
284 | " 'not',\n",
285 | " 'now',\n",
286 | " 'o',\n",
287 | " 'of',\n",
288 | " 'off',\n",
289 | " 'on',\n",
290 | " 'once',\n",
291 | " 'only',\n",
292 | " 'or',\n",
293 | " 'other',\n",
294 | " 'our',\n",
295 | " 'ours',\n",
296 | " 'ourselves',\n",
297 | " 'out',\n",
298 | " 'over',\n",
299 | " 'own',\n",
300 | " 're',\n",
301 | " 's',\n",
302 | " 'same',\n",
303 | " 'shan',\n",
304 | " \"shan't\",\n",
305 | " 'she',\n",
306 | " \"she's\",\n",
307 | " 'should',\n",
308 | " \"should've\",\n",
309 | " 'shouldn',\n",
310 | " \"shouldn't\",\n",
311 | " 'so',\n",
312 | " 'some',\n",
313 | " 'such',\n",
314 | " 't',\n",
315 | " 'than',\n",
316 | " 'that',\n",
317 | " \"that'll\",\n",
318 | " 'the',\n",
319 | " 'their',\n",
320 | " 'theirs',\n",
321 | " 'them',\n",
322 | " 'themselves',\n",
323 | " 'then',\n",
324 | " 'there',\n",
325 | " 'these',\n",
326 | " 'they',\n",
327 | " 'this',\n",
328 | " 'those',\n",
329 | " 'through',\n",
330 | " 'to',\n",
331 | " 'too',\n",
332 | " 'under',\n",
333 | " 'until',\n",
334 | " 'up',\n",
335 | " 've',\n",
336 | " 'very',\n",
337 | " 'was',\n",
338 | " 'wasn',\n",
339 | " \"wasn't\",\n",
340 | " 'we',\n",
341 | " 'were',\n",
342 | " 'weren',\n",
343 | " \"weren't\",\n",
344 | " 'what',\n",
345 | " 'when',\n",
346 | " 'where',\n",
347 | " 'which',\n",
348 | " 'while',\n",
349 | " 'who',\n",
350 | " 'whom',\n",
351 | " 'why',\n",
352 | " 'will',\n",
353 | " 'with',\n",
354 | " 'won',\n",
355 | " \"won't\",\n",
356 | " 'wouldn',\n",
357 | " \"wouldn't\",\n",
358 | " 'y',\n",
359 | " 'you',\n",
360 | " \"you'd\",\n",
361 | " \"you'll\",\n",
362 | " \"you're\",\n",
363 | " \"you've\",\n",
364 | " 'your',\n",
365 | " 'yours',\n",
366 | " 'yourself',\n",
367 | " 'yourselves'}"
368 | ]
369 | },
370 | "execution_count": 10,
371 | "metadata": {},
372 | "output_type": "execute_result"
373 | }
374 | ],
375 | "source": [
376 | "stop_words"
377 | ]
378 | },
379 | {
380 | "cell_type": "code",
381 | "execution_count": 11,
382 | "metadata": {},
383 | "outputs": [
384 | {
385 | "name": "stdout",
386 | "output_type": "stream",
387 | "text": [
388 | "['This', 'example', 'showing', 'stop', 'word', 'filtration', '.']\n"
389 | ]
390 | }
391 | ],
392 | "source": [
393 | "words = word_tokenize(example_sentence)\n",
394 | "filtered_sentence = []\n",
395 | "for w in words:\n",
396 | " if w not in stop_words:\n",
397 | " filtered_sentence.append(w)\n",
398 | "print(filtered_sentence) \n"
399 | ]
400 | },
401 | {
402 | "cell_type": "markdown",
403 | "metadata": {},
404 | "source": [
405 | "We can see from the previous sentence that some words have been removed."
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 13,
411 | "metadata": {},
412 | "outputs": [
413 | {
414 | "data": {
415 | "text/plain": [
416 | "['This', 'example', 'showing', 'stop', 'word', 'filtration', '.']"
417 | ]
418 | },
419 | "execution_count": 13,
420 | "metadata": {},
421 | "output_type": "execute_result"
422 | }
423 | ],
424 | "source": [
425 | "#short hand version of previous code\n",
426 | "filtered_sentence = [w for w in words if not w in stop_words]\n",
427 | "print(filtered_sentence)"
428 | ]
429 | },
430 | {
431 | "cell_type": "code",
432 | "execution_count": null,
433 | "metadata": {},
434 | "outputs": [],
435 | "source": []
436 | }
437 | ],
438 | "metadata": {
439 | "kernelspec": {
440 | "display_name": "Python 3",
441 | "language": "python",
442 | "name": "python3"
443 | }
444 | },
445 | "nbformat": 4,
446 | "nbformat_minor": 2
447 | }
448 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data Science and Machine Learning Portfolio
2 | Repository containing portfolio of data science projects completed for academic, self learning, and professional purposes. Presented in the form of Jupyter Notebooks.
3 |
4 | Tools
5 | - **Python**: NumPy, Pandas, Seaborn, Matplotlib
6 | - **Machine Learning**: scikit-learn, TensorFlow, keras
7 |
8 | ## Contents
9 | - ## Machine Learning
10 | - [Convolution Neural Network - Digit Recognizer](https://github.com/melvfnz/data_science_portfolio/blob/master/digit_recognizer.ipynb): Convolutional Neural Network that learns to recognize sequences of digits using data generated by concatenating images from MNIST (Recognizes a digit based on an image).
11 | - [K-Nearest Neighbors - Social Network Ads Dataset](https://github.com/melvfernandez/data_science_portfolio/blob/master/K_Nearest_Neighbors/K-Nearest%20Neighbors%20On%20Social%20Network%20Ads.ipynb): Using K-NN on customers that bought a SUV from a social network ad.
12 | - [Monte Carlo Model - Cryptocurrency](https://github.com/melvfernandez/data_science_portfolio/blob/master/Cryptocurrency%20Market%20Analysis.ipynb): Using a probabilistic model on cryptocurrency to find an approximate solution to a numerical problem that would be difficult to solve by other methods.
13 | - [Machine Learning Regression - Financial Market](https://github.com/melvfnz/data_science_portfolio/blob/master/Regression%20-%20Machine%20Learning.ipynb): Importing from quandl (financial and economical data) to create a simple regression.
14 |
15 | - ## Data Analysis and Visualization (Python Programming)
16 | - [Cryptocurrency Market Analysis](https://github.com/melvfernandez/data_science_portfolio/blob/master/Cryptocurrency%20Market%20Analysis.ipynb): Based off my stock market analysis of tech stocks. Change in price over time, daily returns, and behaviour prediction.
17 | - [Stock Market Analysis of Tech Stocks](https://github.com/melvfernandez/data__scientist_portfolio/blob/master/Stock%20Market%20Analysis%20for%20Tech%20Stocks.ipynb): Analysis of technology stocks including change in price over time, daily returns, and stock behaviour prediction.
18 | - [Exploratory Data Analysis - Titanic Passenger Information](https://github.com/melvfnz/data_science_portfolio/blob/master/kaggle_titanic.ipynb): Simple analysis of passengers on board the Titanic answering common questions with visualizations.
19 | - [Exploratory Data Analysis - House Prices](https://github.com/melvfernandez/data_science_portfolio/blob/master/Exploratory%20Data%20Analysis%20of%20House%20Prices.ipynb): Simple analysis of house prices including quick visualizations with correlation plots and heat maps.
20 | - [Simple Linear Regression](https://github.com/melvfernandez/data_science_portfolio/blob/master/Simple%20Linear%20Regression.ipynb): Small playground to summarize and study relationships between two continuous variables from a randomized dataset.
21 |
22 |
23 |
24 | - ## Minor Projects
25 | - [Personal Notes on Natural Language Processing Toolkit](https://github.com/melvfnz/data_science_portfolio/blob/master/Natural%20Language%20Processing%20Personal%20Notes.ipynb)
26 | - [Intro to BigQuery](https://github.com/melvfnz/data_science_portfolio/blob/master/Intro%20to%20BigQuery%20.ipynb)
27 | - [Breast Cancer Analysis](https://github.com/melvfnz/data_science_portfolio/blob/master/Breast%20Cancer%20Analysis.ipynb)
28 |
29 | If you enjoyed what you saw, want to have a chat with me about the portfolio, work opportunities, or collaboration, feel free to contact me on:
30 | - [LinkedIn](https://www.linkedin.com/in/melvfernandez/)
31 | - [Twitter](https://twitter.com/melvfnz)
32 |
33 |
34 |
--------------------------------------------------------------------------------
/Regression - Machine Learning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import quandl"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 4,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "df = quandl.get('WIKI/GOOGL')"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 9,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/html": [
30 | "\n",
31 | "\n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " | \n",
48 | " Open | \n",
49 | " High | \n",
50 | " Low | \n",
51 | " Close | \n",
52 | " Volume | \n",
53 | " Ex-Dividend | \n",
54 | " Split Ratio | \n",
55 | " Adj. Open | \n",
56 | " Adj. High | \n",
57 | " Adj. Low | \n",
58 | " Adj. Close | \n",
59 | " Adj. Volume | \n",
60 | "
\n",
61 | " \n",
62 | " Date | \n",
63 | " | \n",
64 | " | \n",
65 | " | \n",
66 | " | \n",
67 | " | \n",
68 | " | \n",
69 | " | \n",
70 | " | \n",
71 | " | \n",
72 | " | \n",
73 | " | \n",
74 | " | \n",
75 | "
\n",
76 | " \n",
77 | " \n",
78 | " \n",
79 | " 2004-08-19 | \n",
80 | " 100.01 | \n",
81 | " 104.06 | \n",
82 | " 95.96 | \n",
83 | " 100.335 | \n",
84 | " 44659000.0 | \n",
85 | " 0.0 | \n",
86 | " 1.0 | \n",
87 | " 50.159839 | \n",
88 | " 52.191109 | \n",
89 | " 48.128568 | \n",
90 | " 50.322842 | \n",
91 | " 44659000.0 | \n",
92 | "
\n",
93 | " \n",
94 | " 2004-08-20 | \n",
95 | " 101.01 | \n",
96 | " 109.08 | \n",
97 | " 100.50 | \n",
98 | " 108.310 | \n",
99 | " 22834300.0 | \n",
100 | " 0.0 | \n",
101 | " 1.0 | \n",
102 | " 50.661387 | \n",
103 | " 54.708881 | \n",
104 | " 50.405597 | \n",
105 | " 54.322689 | \n",
106 | " 22834300.0 | \n",
107 | "
\n",
108 | " \n",
109 | " 2004-08-23 | \n",
110 | " 110.76 | \n",
111 | " 113.48 | \n",
112 | " 109.05 | \n",
113 | " 109.400 | \n",
114 | " 18256100.0 | \n",
115 | " 0.0 | \n",
116 | " 1.0 | \n",
117 | " 55.551482 | \n",
118 | " 56.915693 | \n",
119 | " 54.693835 | \n",
120 | " 54.869377 | \n",
121 | " 18256100.0 | \n",
122 | "
\n",
123 | " \n",
124 | " 2004-08-24 | \n",
125 | " 111.24 | \n",
126 | " 111.60 | \n",
127 | " 103.57 | \n",
128 | " 104.870 | \n",
129 | " 15247300.0 | \n",
130 | " 0.0 | \n",
131 | " 1.0 | \n",
132 | " 55.792225 | \n",
133 | " 55.972783 | \n",
134 | " 51.945350 | \n",
135 | " 52.597363 | \n",
136 | " 15247300.0 | \n",
137 | "
\n",
138 | " \n",
139 | " 2004-08-25 | \n",
140 | " 104.76 | \n",
141 | " 108.00 | \n",
142 | " 103.88 | \n",
143 | " 106.000 | \n",
144 | " 9188600.0 | \n",
145 | " 0.0 | \n",
146 | " 1.0 | \n",
147 | " 52.542193 | \n",
148 | " 54.167209 | \n",
149 | " 52.100830 | \n",
150 | " 53.164113 | \n",
151 | " 9188600.0 | \n",
152 | "
\n",
153 | " \n",
154 | "
\n",
155 | "
"
156 | ],
157 | "text/plain": [
158 | " Open High Low Close Volume Ex-Dividend \\\n",
159 | "Date \n",
160 | "2004-08-19 100.01 104.06 95.96 100.335 44659000.0 0.0 \n",
161 | "2004-08-20 101.01 109.08 100.50 108.310 22834300.0 0.0 \n",
162 | "2004-08-23 110.76 113.48 109.05 109.400 18256100.0 0.0 \n",
163 | "2004-08-24 111.24 111.60 103.57 104.870 15247300.0 0.0 \n",
164 | "2004-08-25 104.76 108.00 103.88 106.000 9188600.0 0.0 \n",
165 | "\n",
166 | " Split Ratio Adj. Open Adj. High Adj. Low Adj. Close \\\n",
167 | "Date \n",
168 | "2004-08-19 1.0 50.159839 52.191109 48.128568 50.322842 \n",
169 | "2004-08-20 1.0 50.661387 54.708881 50.405597 54.322689 \n",
170 | "2004-08-23 1.0 55.551482 56.915693 54.693835 54.869377 \n",
171 | "2004-08-24 1.0 55.792225 55.972783 51.945350 52.597363 \n",
172 | "2004-08-25 1.0 52.542193 54.167209 52.100830 53.164113 \n",
173 | "\n",
174 | " Adj. Volume \n",
175 | "Date \n",
176 | "2004-08-19 44659000.0 \n",
177 | "2004-08-20 22834300.0 \n",
178 | "2004-08-23 18256100.0 \n",
179 | "2004-08-24 15247300.0 \n",
180 | "2004-08-25 9188600.0 "
181 | ]
182 | },
183 | "execution_count": 9,
184 | "metadata": {},
185 | "output_type": "execute_result"
186 | }
187 | ],
188 | "source": [
189 | "df.head()"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 7,
195 | "metadata": {},
196 | "outputs": [
197 | {
198 | "data": {
199 | "text/html": [
200 | "\n",
201 | "\n",
214 | "
\n",
215 | " \n",
216 | " \n",
217 | " | \n",
218 | " Open | \n",
219 | " High | \n",
220 | " Low | \n",
221 | " Close | \n",
222 | " Volume | \n",
223 | " Ex-Dividend | \n",
224 | " Split Ratio | \n",
225 | " Adj. Open | \n",
226 | " Adj. High | \n",
227 | " Adj. Low | \n",
228 | " Adj. Close | \n",
229 | " Adj. Volume | \n",
230 | "
\n",
231 | " \n",
232 | " \n",
233 | " \n",
234 | " count | \n",
235 | " 3399.000000 | \n",
236 | " 3399.000000 | \n",
237 | " 3399.000000 | \n",
238 | " 3399.000000 | \n",
239 | " 3.399000e+03 | \n",
240 | " 3399.000000 | \n",
241 | " 3399.0 | \n",
242 | " 3399.000000 | \n",
243 | " 3399.000000 | \n",
244 | " 3399.000000 | \n",
245 | " 3399.000000 | \n",
246 | " 3.399000e+03 | \n",
247 | "
\n",
248 | " \n",
249 | " mean | \n",
250 | " 592.816524 | \n",
251 | " 598.217079 | \n",
252 | " 586.871826 | \n",
253 | " 592.606208 | \n",
254 | " 7.860173e+06 | \n",
255 | " 0.167100 | \n",
256 | " 1.0 | \n",
257 | " 404.057168 | \n",
258 | " 407.566178 | \n",
259 | " 400.164700 | \n",
260 | " 403.933656 | \n",
261 | " 7.860173e+06 | \n",
262 | "
\n",
263 | " \n",
264 | " std | \n",
265 | " 223.452561 | \n",
266 | " 224.167098 | \n",
267 | " 222.472709 | \n",
268 | " 223.441963 | \n",
269 | " 8.263983e+06 | \n",
270 | " 9.742067 | \n",
271 | " 0.0 | \n",
272 | " 251.615480 | \n",
273 | " 253.030208 | \n",
274 | " 249.968507 | \n",
275 | " 251.649148 | \n",
276 | " 8.263983e+06 | \n",
277 | "
\n",
278 | " \n",
279 | " min | \n",
280 | " 99.090000 | \n",
281 | " 101.740000 | \n",
282 | " 95.960000 | \n",
283 | " 100.010000 | \n",
284 | " 5.211410e+05 | \n",
285 | " 0.000000 | \n",
286 | " 1.0 | \n",
287 | " 49.698414 | \n",
288 | " 51.027517 | \n",
289 | " 48.128568 | \n",
290 | " 50.159839 | \n",
291 | " 5.211410e+05 | \n",
292 | "
\n",
293 | " \n",
294 | " 25% | \n",
295 | " 460.065000 | \n",
296 | " 464.510000 | \n",
297 | " 455.000000 | \n",
298 | " 459.835000 | \n",
299 | " 2.460999e+06 | \n",
300 | " 0.000000 | \n",
301 | " 1.0 | \n",
302 | " 230.744787 | \n",
303 | " 232.974169 | \n",
304 | " 228.204445 | \n",
305 | " 230.629431 | \n",
306 | " 2.460999e+06 | \n",
307 | "
\n",
308 | " \n",
309 | " 50% | \n",
310 | " 563.760000 | \n",
311 | " 568.750000 | \n",
312 | " 558.580000 | \n",
313 | " 563.770000 | \n",
314 | " 5.113400e+06 | \n",
315 | " 0.000000 | \n",
316 | " 1.0 | \n",
317 | " 299.143427 | \n",
318 | " 301.114512 | \n",
319 | " 296.916553 | \n",
320 | " 298.882622 | \n",
321 | " 5.113400e+06 | \n",
322 | "
\n",
323 | " \n",
324 | " 75% | \n",
325 | " 731.935000 | \n",
326 | " 736.070000 | \n",
327 | " 722.475050 | \n",
328 | " 730.140000 | \n",
329 | " 1.034390e+07 | \n",
330 | " 0.000000 | \n",
331 | " 1.0 | \n",
332 | " 558.970488 | \n",
333 | " 562.317784 | \n",
334 | " 554.172844 | \n",
335 | " 558.266684 | \n",
336 | " 1.034390e+07 | \n",
337 | "
\n",
338 | " \n",
339 | " max | \n",
340 | " 1226.800000 | \n",
341 | " 1228.880000 | \n",
342 | " 1218.600000 | \n",
343 | " 1220.170000 | \n",
344 | " 8.215110e+07 | \n",
345 | " 567.971668 | \n",
346 | " 1.0 | \n",
347 | " 1188.000000 | \n",
348 | " 1198.000000 | \n",
349 | " 1184.060000 | \n",
350 | " 1187.560000 | \n",
351 | " 8.215110e+07 | \n",
352 | "
\n",
353 | " \n",
354 | "
\n",
355 | "
"
356 | ],
357 | "text/plain": [
358 | " Open High Low Close Volume \\\n",
359 | "count 3399.000000 3399.000000 3399.000000 3399.000000 3.399000e+03 \n",
360 | "mean 592.816524 598.217079 586.871826 592.606208 7.860173e+06 \n",
361 | "std 223.452561 224.167098 222.472709 223.441963 8.263983e+06 \n",
362 | "min 99.090000 101.740000 95.960000 100.010000 5.211410e+05 \n",
363 | "25% 460.065000 464.510000 455.000000 459.835000 2.460999e+06 \n",
364 | "50% 563.760000 568.750000 558.580000 563.770000 5.113400e+06 \n",
365 | "75% 731.935000 736.070000 722.475050 730.140000 1.034390e+07 \n",
366 | "max 1226.800000 1228.880000 1218.600000 1220.170000 8.215110e+07 \n",
367 | "\n",
368 | " Ex-Dividend Split Ratio Adj. Open Adj. High Adj. Low \\\n",
369 | "count 3399.000000 3399.0 3399.000000 3399.000000 3399.000000 \n",
370 | "mean 0.167100 1.0 404.057168 407.566178 400.164700 \n",
371 | "std 9.742067 0.0 251.615480 253.030208 249.968507 \n",
372 | "min 0.000000 1.0 49.698414 51.027517 48.128568 \n",
373 | "25% 0.000000 1.0 230.744787 232.974169 228.204445 \n",
374 | "50% 0.000000 1.0 299.143427 301.114512 296.916553 \n",
375 | "75% 0.000000 1.0 558.970488 562.317784 554.172844 \n",
376 | "max 567.971668 1.0 1188.000000 1198.000000 1184.060000 \n",
377 | "\n",
378 | " Adj. Close Adj. Volume \n",
379 | "count 3399.000000 3.399000e+03 \n",
380 | "mean 403.933656 7.860173e+06 \n",
381 | "std 251.649148 8.263983e+06 \n",
382 | "min 50.159839 5.211410e+05 \n",
383 | "25% 230.629431 2.460999e+06 \n",
384 | "50% 298.882622 5.113400e+06 \n",
385 | "75% 558.266684 1.034390e+07 \n",
386 | "max 1187.560000 8.215110e+07 "
387 | ]
388 | },
389 | "execution_count": 7,
390 | "metadata": {},
391 | "output_type": "execute_result"
392 | }
393 | ],
394 | "source": [
395 | "df.describe()"
396 | ]
397 | },
398 | {
399 | "cell_type": "markdown",
400 | "metadata": {},
401 | "source": [
402 | "Let us recreate our dataframe with columns that we want"
403 | ]
404 | },
405 | {
406 | "cell_type": "code",
407 | "execution_count": 10,
408 | "metadata": {},
409 | "outputs": [],
410 | "source": [
411 | "df = df[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume',]]"
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": 11,
417 | "metadata": {},
418 | "outputs": [
419 | {
420 | "data": {
421 | "text/html": [
422 | "\n",
423 | "\n",
436 | "
\n",
437 | " \n",
438 | " \n",
439 | " | \n",
440 | " Adj. Open | \n",
441 | " Adj. High | \n",
442 | " Adj. Low | \n",
443 | " Adj. Close | \n",
444 | " Adj. Volume | \n",
445 | "
\n",
446 | " \n",
447 | " Date | \n",
448 | " | \n",
449 | " | \n",
450 | " | \n",
451 | " | \n",
452 | " | \n",
453 | "
\n",
454 | " \n",
455 | " \n",
456 | " \n",
457 | " 2004-08-19 | \n",
458 | " 50.159839 | \n",
459 | " 52.191109 | \n",
460 | " 48.128568 | \n",
461 | " 50.322842 | \n",
462 | " 44659000.0 | \n",
463 | "
\n",
464 | " \n",
465 | " 2004-08-20 | \n",
466 | " 50.661387 | \n",
467 | " 54.708881 | \n",
468 | " 50.405597 | \n",
469 | " 54.322689 | \n",
470 | " 22834300.0 | \n",
471 | "
\n",
472 | " \n",
473 | " 2004-08-23 | \n",
474 | " 55.551482 | \n",
475 | " 56.915693 | \n",
476 | " 54.693835 | \n",
477 | " 54.869377 | \n",
478 | " 18256100.0 | \n",
479 | "
\n",
480 | " \n",
481 | " 2004-08-24 | \n",
482 | " 55.792225 | \n",
483 | " 55.972783 | \n",
484 | " 51.945350 | \n",
485 | " 52.597363 | \n",
486 | " 15247300.0 | \n",
487 | "
\n",
488 | " \n",
489 | " 2004-08-25 | \n",
490 | " 52.542193 | \n",
491 | " 54.167209 | \n",
492 | " 52.100830 | \n",
493 | " 53.164113 | \n",
494 | " 9188600.0 | \n",
495 | "
\n",
496 | " \n",
497 | "
\n",
498 | "
"
499 | ],
500 | "text/plain": [
501 | " Adj. Open Adj. High Adj. Low Adj. Close Adj. Volume\n",
502 | "Date \n",
503 | "2004-08-19 50.159839 52.191109 48.128568 50.322842 44659000.0\n",
504 | "2004-08-20 50.661387 54.708881 50.405597 54.322689 22834300.0\n",
505 | "2004-08-23 55.551482 56.915693 54.693835 54.869377 18256100.0\n",
506 | "2004-08-24 55.792225 55.972783 51.945350 52.597363 15247300.0\n",
507 | "2004-08-25 52.542193 54.167209 52.100830 53.164113 9188600.0"
508 | ]
509 | },
510 | "execution_count": 11,
511 | "metadata": {},
512 | "output_type": "execute_result"
513 | }
514 | ],
515 | "source": [
516 | "df.head()"
517 | ]
518 | },
519 | {
520 | "cell_type": "code",
521 | "execution_count": 15,
522 | "metadata": {},
523 | "outputs": [],
524 | "source": [
525 | "df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) / df['Adj. Close']*100.0"
526 | ]
527 | },
528 | {
529 | "cell_type": "code",
530 | "execution_count": 16,
531 | "metadata": {},
532 | "outputs": [
533 | {
534 | "data": {
535 | "text/html": [
536 | "\n",
537 | "\n",
550 | "
\n",
551 | " \n",
552 | " \n",
553 | " | \n",
554 | " Adj. Open | \n",
555 | " Adj. High | \n",
556 | " Adj. Low | \n",
557 | " Adj. Close | \n",
558 | " Adj. Volume | \n",
559 | " HL_PCT | \n",
560 | "
\n",
561 | " \n",
562 | " Date | \n",
563 | " | \n",
564 | " | \n",
565 | " | \n",
566 | " | \n",
567 | " | \n",
568 | " | \n",
569 | "
\n",
570 | " \n",
571 | " \n",
572 | " \n",
573 | " 2004-08-19 | \n",
574 | " 50.159839 | \n",
575 | " 52.191109 | \n",
576 | " 48.128568 | \n",
577 | " 50.322842 | \n",
578 | " 44659000.0 | \n",
579 | " 3.712563 | \n",
580 | "
\n",
581 | " \n",
582 | " 2004-08-20 | \n",
583 | " 50.661387 | \n",
584 | " 54.708881 | \n",
585 | " 50.405597 | \n",
586 | " 54.322689 | \n",
587 | " 22834300.0 | \n",
588 | " 0.710922 | \n",
589 | "
\n",
590 | " \n",
591 | " 2004-08-23 | \n",
592 | " 55.551482 | \n",
593 | " 56.915693 | \n",
594 | " 54.693835 | \n",
595 | " 54.869377 | \n",
596 | " 18256100.0 | \n",
597 | " 3.729433 | \n",
598 | "
\n",
599 | " \n",
600 | " 2004-08-24 | \n",
601 | " 55.792225 | \n",
602 | " 55.972783 | \n",
603 | " 51.945350 | \n",
604 | " 52.597363 | \n",
605 | " 15247300.0 | \n",
606 | " 6.417469 | \n",
607 | "
\n",
608 | " \n",
609 | " 2004-08-25 | \n",
610 | " 52.542193 | \n",
611 | " 54.167209 | \n",
612 | " 52.100830 | \n",
613 | " 53.164113 | \n",
614 | " 9188600.0 | \n",
615 | " 1.886792 | \n",
616 | "
\n",
617 | " \n",
618 | "
\n",
619 | "
"
620 | ],
621 | "text/plain": [
622 | " Adj. Open Adj. High Adj. Low Adj. Close Adj. Volume HL_PCT\n",
623 | "Date \n",
624 | "2004-08-19 50.159839 52.191109 48.128568 50.322842 44659000.0 3.712563\n",
625 | "2004-08-20 50.661387 54.708881 50.405597 54.322689 22834300.0 0.710922\n",
626 | "2004-08-23 55.551482 56.915693 54.693835 54.869377 18256100.0 3.729433\n",
627 | "2004-08-24 55.792225 55.972783 51.945350 52.597363 15247300.0 6.417469\n",
628 | "2004-08-25 52.542193 54.167209 52.100830 53.164113 9188600.0 1.886792"
629 | ]
630 | },
631 | "execution_count": 16,
632 | "metadata": {},
633 | "output_type": "execute_result"
634 | }
635 | ],
636 | "source": [
637 | "df.head()"
638 | ]
639 | },
640 | {
641 | "cell_type": "code",
642 | "execution_count": 17,
643 | "metadata": {},
644 | "outputs": [],
645 | "source": [
646 | "df['PCT_CHANGE'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": 19,
652 | "metadata": {},
653 | "outputs": [
654 | {
655 | "data": {
656 | "text/html": [
657 | "\n",
658 | "\n",
671 | "
\n",
672 | " \n",
673 | " \n",
674 | " | \n",
675 | " Adj. Close | \n",
676 | " HL_PCT | \n",
677 | " PCT_CHANGE | \n",
678 | " Adj. Volume | \n",
679 | "
\n",
680 | " \n",
681 | " Date | \n",
682 | " | \n",
683 | " | \n",
684 | " | \n",
685 | " | \n",
686 | "
\n",
687 | " \n",
688 | " \n",
689 | " \n",
690 | " 2004-08-19 | \n",
691 | " 50.322842 | \n",
692 | " 3.712563 | \n",
693 | " 0.324968 | \n",
694 | " 44659000.0 | \n",
695 | "
\n",
696 | " \n",
697 | " 2004-08-20 | \n",
698 | " 54.322689 | \n",
699 | " 0.710922 | \n",
700 | " 7.227007 | \n",
701 | " 22834300.0 | \n",
702 | "
\n",
703 | " \n",
704 | " 2004-08-23 | \n",
705 | " 54.869377 | \n",
706 | " 3.729433 | \n",
707 | " -1.227880 | \n",
708 | " 18256100.0 | \n",
709 | "
\n",
710 | " \n",
711 | " 2004-08-24 | \n",
712 | " 52.597363 | \n",
713 | " 6.417469 | \n",
714 | " -5.726357 | \n",
715 | " 15247300.0 | \n",
716 | "
\n",
717 | " \n",
718 | " 2004-08-25 | \n",
719 | " 53.164113 | \n",
720 | " 1.886792 | \n",
721 | " 1.183658 | \n",
722 | " 9188600.0 | \n",
723 | "
\n",
724 | " \n",
725 | "
\n",
726 | "
"
727 | ],
728 | "text/plain": [
729 | " Adj. Close HL_PCT PCT_CHANGE Adj. Volume\n",
730 | "Date \n",
731 | "2004-08-19 50.322842 3.712563 0.324968 44659000.0\n",
732 | "2004-08-20 54.322689 0.710922 7.227007 22834300.0\n",
733 | "2004-08-23 54.869377 3.729433 -1.227880 18256100.0\n",
734 | "2004-08-24 52.597363 6.417469 -5.726357 15247300.0\n",
735 | "2004-08-25 53.164113 1.886792 1.183658 9188600.0"
736 | ]
737 | },
738 | "execution_count": 19,
739 | "metadata": {},
740 | "output_type": "execute_result"
741 | }
742 | ],
743 | "source": [
744 | "df = df[['Adj. Close','HL_PCT','PCT_CHANGE','Adj. Volume']]\n",
745 | "df.head()"
746 | ]
747 | },
748 | {
749 | "cell_type": "code",
750 | "execution_count": null,
751 | "metadata": {},
752 | "outputs": [],
753 | "source": [
754 | "forescast_col = 'Adj. Close'\n",
755 | "df.fillna('-99999', inplace=True)\n",
756 | "\n",
757 | "forecast_out = int(math.ceil(0.1*len(df)))"
758 | ]
759 | }
760 | ],
761 | "metadata": {
762 | "kernelspec": {
763 | "display_name": "Python 3",
764 | "language": "python",
765 | "name": "python3"
766 | },
767 | "language_info": {
768 | "codemirror_mode": {
769 | "name": "ipython",
770 | "version": 3
771 | },
772 | "file_extension": ".py",
773 | "mimetype": "text/x-python",
774 | "name": "python",
775 | "nbconvert_exporter": "python",
776 | "pygments_lexer": "ipython3",
777 | "version": "3.6.4"
778 | }
779 | },
780 | "nbformat": 4,
781 | "nbformat_minor": 2
782 | }
783 |
--------------------------------------------------------------------------------
/Simple Linear Regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This is simply a playground to perform commands from pandas, numpy, scipy, and sklearn on a randomized dataset.\n",
8 | "\n",
9 | "## Process for a simple linear regression\n",
10 | " 1. Load the data\n",
11 | " 2. Clean the data\n",
12 | " 3. Observe the statistics\n",
13 | " 4. Train the model with the training data\n",
14 | " 5. Test the train data with the test data"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "#first off import required libraries\n",
26 | "import pandas as pd\n",
27 | "import numpy as np\n",
28 | "import matplotlib.pyplot as plt\n",
29 | "import math\n",
30 | "from scipy import stats as st\n",
31 | "from sklearn import linear_model\n",
32 | "%matplotlib inline"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 2,
38 | "metadata": {
39 | "collapsed": true
40 | },
41 | "outputs": [],
42 | "source": [
43 | "#let's load our data\n",
44 | "training_set = pd.read_csv('data/linear_regression_train.csv')\n",
45 | "test_set = pd.read_csv('data/linear_regression_test.csv')"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 3,
51 | "metadata": {
52 | "collapsed": true
53 | },
54 | "outputs": [],
55 | "source": [
56 | "#let's now do a simple clean by dropping any null values\n",
57 | "training_set = training_set.dropna()\n",
58 | "test_set = test_set.dropna()"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 4,
64 | "metadata": {},
65 | "outputs": [
66 | {
67 | "data": {
68 | "text/html": [
69 | "\n",
70 | "\n",
83 | "
\n",
84 | " \n",
85 | " \n",
86 | " | \n",
87 | " x | \n",
88 | " y | \n",
89 | "
\n",
90 | " \n",
91 | " \n",
92 | " \n",
93 | " count | \n",
94 | " 699.000000 | \n",
95 | " 699.000000 | \n",
96 | "
\n",
97 | " \n",
98 | " mean | \n",
99 | " 50.014306 | \n",
100 | " 49.939869 | \n",
101 | "
\n",
102 | " \n",
103 | " std | \n",
104 | " 28.954560 | \n",
105 | " 29.109217 | \n",
106 | "
\n",
107 | " \n",
108 | " min | \n",
109 | " 0.000000 | \n",
110 | " -3.839981 | \n",
111 | "
\n",
112 | " \n",
113 | " 25% | \n",
114 | " 25.000000 | \n",
115 | " 24.929968 | \n",
116 | "
\n",
117 | " \n",
118 | " 50% | \n",
119 | " 49.000000 | \n",
120 | " 48.973020 | \n",
121 | "
\n",
122 | " \n",
123 | " 75% | \n",
124 | " 75.000000 | \n",
125 | " 74.929911 | \n",
126 | "
\n",
127 | " \n",
128 | " max | \n",
129 | " 100.000000 | \n",
130 | " 108.871618 | \n",
131 | "
\n",
132 | " \n",
133 | "
\n",
134 | "
"
135 | ],
136 | "text/plain": [
137 | " x y\n",
138 | "count 699.000000 699.000000\n",
139 | "mean 50.014306 49.939869\n",
140 | "std 28.954560 29.109217\n",
141 | "min 0.000000 -3.839981\n",
142 | "25% 25.000000 24.929968\n",
143 | "50% 49.000000 48.973020\n",
144 | "75% 75.000000 74.929911\n",
145 | "max 100.000000 108.871618"
146 | ]
147 | },
148 | "execution_count": 4,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "#we can view quick statistics using .describe()\n",
155 | "training_set.describe()"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 5,
161 | "metadata": {},
162 | "outputs": [
163 | {
164 | "data": {
165 | "text/html": [
166 | "\n",
167 | "\n",
180 | "
\n",
181 | " \n",
182 | " \n",
183 | " | \n",
184 | " x | \n",
185 | " y | \n",
186 | "
\n",
187 | " \n",
188 | " \n",
189 | " \n",
190 | " count | \n",
191 | " 300.000000 | \n",
192 | " 300.000000 | \n",
193 | "
\n",
194 | " \n",
195 | " mean | \n",
196 | " 50.936667 | \n",
197 | " 51.205051 | \n",
198 | "
\n",
199 | " \n",
200 | " std | \n",
201 | " 28.504286 | \n",
202 | " 29.071481 | \n",
203 | "
\n",
204 | " \n",
205 | " min | \n",
206 | " 0.000000 | \n",
207 | " -3.467884 | \n",
208 | "
\n",
209 | " \n",
210 | " 25% | \n",
211 | " 27.000000 | \n",
212 | " 25.676502 | \n",
213 | "
\n",
214 | " \n",
215 | " 50% | \n",
216 | " 53.000000 | \n",
217 | " 52.170557 | \n",
218 | "
\n",
219 | " \n",
220 | " 75% | \n",
221 | " 73.000000 | \n",
222 | " 74.303007 | \n",
223 | "
\n",
224 | " \n",
225 | " max | \n",
226 | " 100.000000 | \n",
227 | " 105.591837 | \n",
228 | "
\n",
229 | " \n",
230 | "
\n",
231 | "
"
232 | ],
233 | "text/plain": [
234 | " x y\n",
235 | "count 300.000000 300.000000\n",
236 | "mean 50.936667 51.205051\n",
237 | "std 28.504286 29.071481\n",
238 | "min 0.000000 -3.467884\n",
239 | "25% 27.000000 25.676502\n",
240 | "50% 53.000000 52.170557\n",
241 | "75% 73.000000 74.303007\n",
242 | "max 100.000000 105.591837"
243 | ]
244 | },
245 | "execution_count": 5,
246 | "metadata": {},
247 | "output_type": "execute_result"
248 | }
249 | ],
250 | "source": [
251 | "test_set.describe()"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "We can observe that the median and mean are relatively close, saying that the data isn't skewed by any outliers."
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 9,
264 | "metadata": {
265 | "collapsed": true
266 | },
267 | "outputs": [],
268 | "source": [
269 | "#let's seperate our data into x and y \n",
270 | "x_training_set = training_set.as_matrix(['x'])\n",
271 | "y_training_set = training_set.as_matrix(['y'])\n",
272 | "\n",
273 | "x_test_set = test_set.as_matrix(['x'])\n",
274 | "y_test_set = test_set.as_matrix(['y'])"
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 15,
280 | "metadata": {},
281 | "outputs": [
282 | {
283 | "ename": "AttributeError",
284 | "evalue": "Unknown property figsize",
285 | "output_type": "error",
286 | "traceback": [
287 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
288 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
289 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m#we can now plot our data to view the relationship\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtitle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Relationship between X and Y'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_training_set\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_training_set\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
290 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/pyplot.py\u001b[0m in \u001b[0;36mscatter\u001b[0;34m(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, hold, data, **kwargs)\u001b[0m\n\u001b[1;32m 3355\u001b[0m \u001b[0mvmin\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvmin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvmax\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvmax\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3356\u001b[0m \u001b[0mlinewidths\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlinewidths\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3357\u001b[0;31m edgecolors=edgecolors, data=data, **kwargs)\n\u001b[0m\u001b[1;32m 3358\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3359\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwashold\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
291 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py\u001b[0m in \u001b[0;36minner\u001b[0;34m(ax, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1708\u001b[0m warnings.warn(msg % (label_namer, func.__name__),\n\u001b[1;32m 1709\u001b[0m RuntimeWarning, stacklevel=2)\n\u001b[0;32m-> 1710\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1711\u001b[0m \u001b[0mpre_doc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1712\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpre_doc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
292 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py\u001b[0m in \u001b[0;36mscatter\u001b[0;34m(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, **kwargs)\u001b[0m\n\u001b[1;32m 4097\u001b[0m )\n\u001b[1;32m 4098\u001b[0m \u001b[0mcollection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIdentityTransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4099\u001b[0;31m \u001b[0mcollection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4101\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcolors\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
293 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/artist.py\u001b[0m in \u001b[0;36mupdate\u001b[0;34m(self, props)\u001b[0m\n\u001b[1;32m 845\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 846\u001b[0m ret = [_update_property(self, k, v)\n\u001b[0;32m--> 847\u001b[0;31m for k, v in props.items()]\n\u001b[0m\u001b[1;32m 848\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 849\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meventson\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstore\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
294 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/artist.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 845\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 846\u001b[0m ret = [_update_property(self, k, v)\n\u001b[0;32m--> 847\u001b[0;31m for k, v in props.items()]\n\u001b[0m\u001b[1;32m 848\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 849\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meventson\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstore\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
295 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/matplotlib/artist.py\u001b[0m in \u001b[0;36m_update_property\u001b[0;34m(self, k, v)\u001b[0m\n\u001b[1;32m 838\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'set_'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 839\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 840\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Unknown property %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 841\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
296 | "\u001b[0;31mAttributeError\u001b[0m: Unknown property figsize"
297 | ]
298 | },
299 | {
300 | "data": {
301 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEICAYAAABcVE8dAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFH1JREFUeJzt3X+0ZWV93/H3ByaAAQTjjF3CDAyJ\nQ3RKG9EbxJU2QEUz0BUmtSZCaw0uCtEU0UqMWFNlYWpXNdbEBouDscQYwdE0OrVk4RIJRMsggz+I\nA6UdBwJTQAZElJAI6Ld/7D2Zw+XeOXvuz5l53q+1zpr949n7fO9zz/2cfZ599p5UFZKkfd9+i12A\nJGlhGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8BuS5OQk22ax/WVJ/v1c1jTFc1SS502z7l8m+fwM\n93txko/PrjpNJcnK/ve2ZLFr0a4Z+HuZJHcl+Zskjya5P8kVSQ6Zh+c5O8mXRpdV1eur6t1z/VxD\nVdUfV9UrFvp594Y3iyR/kmTdpGWfSfL7i1VTX8MfJ/nopGUnJXkoyXMXq65WGfh7p1+sqkOAFwLH\nA29f5Hq0+P4N8M+TnAKQ5NV0r42LFrUquAA4PcnLAZIcBFwOXFhV9y1qZQ0y8PdiVXU/cA1d8AOQ\n5MAkv5Pk7iTf7odhnjHV9kkuSvKtJN9PcluSf9YvfwFwGfDS/pPEd/vlVyT57ZHtz02yJcl3kmxI\ncsTIukry+iT/N8nDSS5Nkn7d85Jcn+SRJA8m+eSk0k6dZrunfOron+OCJFv7/bwvya5e0wcl+WT/\n8341yc+M7OuI/ih5e5I7k1zQL18D/Dvg1X1ffCPJKUn+cmTbLyT5ysj8l5L80q7226/bb+R38FCS\n9Ul+ol+3Y5jkV/vf5YNJ3jHdD9a/Fi4ELk9yFPBB4Neq6tGp2ic5IcmNSb6b5L4kv5/kgEl9O93v\nb//+NfZgkq3AP91FXQ8BbwTWJTkYeBfwraq6YrptNI+qysde9ADuAk7tp5cDfwn83sj63wU2AD8B\nHAr8D+A/9utOBraNtP1l4Ai6N/5XA38NPLdfdzbwpUnPfQXw2/30PwEeBF4EHAj8F+CGkbYFfA44\nHDgK2A6s6dddCbyjf96DgH80cLun1NS3va7/WY8C/g/wr6fpt4uBJ4BXAT8G/AZwZz+9H3AL8E7g\nAOAnga3AL4xs+/GRfR0E/A2wFFgC3A/c2/f3M/p1zx6w3zcDG/vf44HAh4Er+3Ur+5/v8n6fPwP8\nAHjBmNfHNf3v5Q/HtHsxcGJf/0rgduDNA38Prwf+N7Ci7/vr+vZLdvF8n6Z7XT4EHLXYf0etPha9\nAB+7+QvrAv9R4Pv9H9m1wOH9utCF9k+NtH8pcGc/fTIjgT/Fvr8OrO2nnxKu/bIr2Bn4fwC8d2Td\nIX2gruzni6cG+Xrgon76Y8A6YPkUNexqu6fU1LddMzL/68C10/xsFwMbR+b3A+4D/jHwEuDuSe3f\nDvy3kW0/Pmn9XwCv7EPz832da4BTgFv7NuP2ezvwspF1z+37cEcI12gfAV8Bzhzz+vitfruX7+br\n6s3Anw78PXwReP3IulcwPvD/Xv+6fdNi/w21/PCs+t7pl6rqC0lOAj5Bd6T5XWAZ8OPALf2nb+je\nBPafaidJXgu8hS5coAvtpQNrOAL46o6Zqno0yUPAkXRvStAd+e7wWL9/gN8E3g18JcnDwPuravTE\n3nTbTeWekem/6usa27aqfpTuG0tH0IXVETuGrnr704X6dK6nfwPtpx8GTqI7Cr++b3P0mP0eDfxp\nkh+NrP8hXTjuMLgvkqyi++TyIeD9SV5cVU9M0/ZY4D8DE3SvmSV0n0ZGTffcR/D0ft+lqvp2kgeB\nzePaav44hr8Xq6rr6Y66f6df9CDdcMLfr6rD+8dh1Z3gfYokR9MNF5wPPLuqDge+SfcGAV0I7sq9\ndIG1Y38H0w1j/L8Bdd9fVedW1RHArwEfyjRfxRxgxcj0UX1dY9v2Y/3L+/b30H0KOnzkcWhVnb6j\n5Cn2tSPwf76fvp4u8E9iZ+CP2+89wGmT1h9UVWP7cLJ+fP0jdEN6b6T7pPe2XWzyX+mGZVZV1TPp\nzlNkF+1H3cfT+117AQN/7/e7wMuTvLCqfkQX4h9I8hyAJEcm+YUptjuYLsi29+1eBxw3sv7bwPLR\nE3mTfAJ4XZIXJjkQeA9wU1XdNa7gJL+cZHk/+3Bfxw/HbTeNtyZ5VpIVwJuAySeAR704ySvTfV/8\nzXRH4xvphkq+l+RtSZ7Rn5Q8LsnP9tt9G1g56YTw/wJ+GjgB+EpVbaZ7A3wJcEPfZtx+LwP+Q//m\nS5JlSdbOsB/eQPfp7D396+Ac4DeTPH+a9ocC3wMe7du8YTeeaz1wQZLlSZ7F4n8TSAMZ+Hu5qtpO\nNya+44KotwFbgI1Jvgd8gS6YJm93G/B+4Ea6QPsHwJdHmnyR7uP3/f1H8cnbX9s/55/QHfH9FHDm\nwLJ/FrgpyaN0J/LeVFV3Dtx2ss/SDUV8HfifdOcWdtX21XRvMv8KeGVVPVFVPwR+ke7bTnfSfVL6\nCHBYv92n+n8fSvJVgKr6a7ohrc1V9Xi//kbgr6rqgb7NuP3+Xv/zfz7J9+nefF6yux3Qv9m9Bzhn\nRy0jv9/Ld3y7ZpLfAP4F3bmgy9n1G+Vkl9OdHP4GXR/8992tWYsjVf4HKNo7JSm6IYkti12LtDfw\nCF+SGjE28JN8NMkDSb45zfok+WC6C3BuTfKiuS9TkjRbQ47wr6D7fvF0TgNW9Y/z6M7+S/OuquJw\njjTc2MCvqhuA7+yiyVrgY9XZCBweb4okSXucubjw6kieehHGtn7Z026MlOQ8uk8BHHzwwS9+/vOn\n+8aYJGkqt9xyy4NVtWwm285F4E/1la8pv/pTVevoLqlnYmKiNm3aNAdPL0ntSDL2yubpzMW3dLbx\n1Kvudly9KEnag8xF4G8AXtt/W+dE4JHyPteStMcZO6ST5Eq6e4Ys7W829S66W8pSVZcBVwOn013d\n+RjwuvkqVpI0c2MDv6rOGrO+6P63HUnSHswrbSWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJ\naoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RG\nGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSB\nL0mNMPAlqREGviQ1wsCXpEYMCvwka5LckWRLkoumWH9UkuuSfC3JrUlOn/tSJUmzMTbwk+wPXAqc\nBqwGzkqyelKz3wLWV9XxwJnAh+a6UEnS7Aw5wj8B2FJVW6vqceAqYO2kNgU8s58+DLh37kqUJM2F\nIYF/JHDPyPy2ftmoi4HXJNkGXA28caodJTkvyaYkm7Zv3z6DciVJMzUk8DPFspo0fxZwRVUtB04H\n/ijJ0/ZdVeuqaqKqJpYtW7b71UqSZmxI4G8DVozML+fpQzbnAOsBqupG4CBg6VwUKEmaG0MC/2Zg\nVZJjkhxAd1J2w6Q2dwMvA0jyArrAd8xGkvYgYwO/qp4EzgeuAW6n+zbO5iSXJDmjb3YhcG6SbwBX\nAmdX1eRhH0nSIloypFFVXU13MnZ02TtHpm8Dfm5uS5MkzSWvtJWkRhj4ktQIA1+SGmHgS1IjDHxJ\naoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RG\nGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSB\nL0mNMPAlqREGviQ1wsCXpEYY+JLUiEGBn2RNkjuSbEly0TRtfiXJbUk2J/nE3JYpSZqtJeMaJNkf\nuBR4ObANuDnJhqq6baTNKuDtwM9V1cNJnjNfBUuSZmbIEf4JwJaq2lpVjwNXAWsntTkXuLSqHgao\nqgfmtkxJ0mwNCfwjgXtG5rf1y0YdCxyb5MtJNiZZM9WOkpyXZFOSTdu3b59ZxZKkGRkS+JliWU2a\nXwKsAk4GzgI+kuTwp21Uta6qJqpqYtmyZbtbqyRpFoYE/jZgxcj8cuDeKdp8tqqeqKo7gTvo3gAk\nSXuIIYF/M7AqyTFJDgDOBDZMavMZ4BSAJEvphni2zmWhkqTZGRv4VfUkcD5wDXA7sL6qNie5JMkZ\nfbNrgIeS3AZcB7y1qh6ar6IlSbsvVZOH4xfGxMREbdq0aVGeW5L2VkluqaqJmWzrlbaS1AgDX5Ia\nYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREG\nviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBL\nUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjBgV+kjVJ7kiyJclFu2j3qiSV\nZGLuSpQkzYWxgZ9kf+BS4DRgNXBWktVTtDsUuAC4aa6LlCTN3pAj/BOALVW1taoeB64C1k7R7t3A\ne4G/ncP6JElzZEjgHwncMzK/rV/2d5IcD6yoqs/takdJzkuyKcmm7du373axkqSZGxL4mWJZ/d3K\nZD/gA8CF43ZUVeuqaqKqJpYtWza8SknSrA0J/G3AipH55cC9I/OHAscBf57kLuBEYIMnbiVpzzIk\n8G8GViU5JskBwJnAhh0rq+qRqlpaVSuraiWwETijqjbNS8WSpBkZG/hV9SRwPnANcDuwvqo2J7kk\nyRnzXaAkaW4sGdKoqq4Grp607J3TtD159mVJkuaaV9pKUiMMfElqhIEvSY0w8CWpEQa+JDXCwJek\nRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqE\ngS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4\nktQIA1+SGmHgS1IjDHxJasSgwE+yJskdSbYkuWiK9W9JcluSW5Ncm+TouS9VkjQbYwM/yf7ApcBp\nwGrgrCSrJzX7GjBRVf8Q+DTw3rkuVJI0O0OO8E8AtlTV1qp6HLgKWDvaoKquq6rH+tmNwPK5LVOS\nNFtDAv9I4J6R+W39sumcA/zZVCuSnJdkU5JN27dvH16lJGnWhgR+plhWUzZMXgNMAO+ban1Vrauq\niaqaWLZs2fAqJUmztmRAm23AipH55cC9kxslORV4B3BSVf1gbsqTJM2VIUf4NwOrkhyT5ADgTGDD\naIMkxwMfBs6oqgfmvkxJ0myNDfyqehI4H7gGuB1YX1Wbk1yS5Iy+2fuAQ4BPJfl6kg3T7E6StEiG\nDOlQVVcDV09a9s6R6VPnuC5J0hzzSltJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtS\nIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXC\nwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8\nSWqEgS9JjTDwJakRgwI/yZokdyTZkuSiKdYfmOST/fqbkqyc60IlSbMzNvCT7A9cCpwGrAbOSrJ6\nUrNzgIer6nnAB4D/NNeFSpJmZ8gR/gnAlqraWlWPA1cBaye1WQv8YT/9aeBlSTJ3ZUqSZmvJgDZH\nAveMzG8DXjJdm6p6MskjwLOBB0cbJTkPOK+f/UGSb86k6H3QUib1VcPsi53si53si51+eqYbDgn8\nqY7UawZtqKp1wDqAJJuqamLA8+/z7Iud7Iud7Iud7Iudkmya6bZDhnS2AStG5pcD907XJskS4DDg\nOzMtSpI094YE/s3AqiTHJDkAOBPYMKnNBuBX++lXAV+sqqcd4UuSFs/YIZ1+TP584Bpgf+CjVbU5\nySXApqraAPwB8EdJttAd2Z854LnXzaLufY19sZN9sZN9sZN9sdOM+yIeiEtSG7zSVpIaYeBLUiPm\nPfC9LcNOA/riLUluS3JrkmuTHL0YdS6EcX0x0u5VSSrJPvuVvCF9keRX+tfG5iSfWOgaF8qAv5Gj\nklyX5Gv938npi1HnfEvy0SQPTHetUjof7Pvp1iQvGrTjqpq3B91J3m8BPwkcAHwDWD2pza8Dl/XT\nZwKfnM+aFusxsC9OAX68n35Dy33RtzsUuAHYCEwsdt2L+LpYBXwNeFY//5zFrnsR+2Id8IZ+ejVw\n12LXPU998fPAi4BvTrP+dODP6K6BOhG4ach+5/sI39sy7DS2L6rquqp6rJ/dSHfNw75oyOsC4N3A\ne4G/XcjiFtiQvjgXuLSqHgaoqgcWuMaFMqQvCnhmP30YT78maJ9QVTew62uZ1gIfq85G4PAkzx23\n3/kO/Kluy3DkdG2q6klgx20Z9jVD+mLUOXTv4PuisX2R5HhgRVV9biELWwRDXhfHAscm+XKSjUnW\nLFh1C2tIX1wMvCbJNuBq4I0LU9oeZ3fzBBh2a4XZmLPbMuwDBv+cSV4DTAAnzWtFi2eXfZFkP7q7\nrp69UAUtoiGviyV0wzon033q+4skx1XVd+e5toU2pC/OAq6oqvcneSnd9T/HVdWP5r+8PcqMcnO+\nj/C9LcNOQ/qCJKcC7wDOqKofLFBtC21cXxwKHAf8eZK76MYoN+yjJ26H/o18tqqeqKo7gTvo3gD2\nNUP64hxgPUBV3QgcRHdjtdYMypPJ5jvwvS3DTmP7oh/G+DBd2O+r47Qwpi+q6pGqWlpVK6tqJd35\njDOqasY3jdqDDfkb+QzdCX2SLKUb4tm6oFUujCF9cTfwMoAkL6AL/O0LWuWeYQPw2v7bOicCj1TV\nfeM2mtchnZq/2zLsdQb2xfuAQ4BP9eet766qMxat6HkysC+aMLAvrgFekeQ24IfAW6vqocWren4M\n7IsLgcuT/Fu6IYyz98UDxCRX0g3hLe3PV7wL+DGAqrqM7vzF6cAW4DHgdYP2uw/2lSRpCl5pK0mN\nMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSI/4/rev6i5m3RygAAAAASUVORK5CYII=\n",
302 | "text/plain": [
303 | ""
304 | ]
305 | },
306 | "metadata": {},
307 | "output_type": "display_data"
308 | }
309 | ],
310 | "source": [
311 | "#we can now plot our data to view the relationship\n",
312 | "plt.title('Relationship between X and Y')\n",
313 | "plt.scatter(x_training_set, y_training_set)\n",
314 | "plt.show()"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": 11,
320 | "metadata": {},
321 | "outputs": [
322 | {
323 | "data": {
324 | "text/plain": [
325 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
326 | ]
327 | },
328 | "execution_count": 11,
329 | "metadata": {},
330 | "output_type": "execute_result"
331 | }
332 | ],
333 | "source": [
334 | "#after some observations and visualizations, time to train our data\n",
335 | "#let's create the linear regression object\n",
336 | "lm = linear_model.LinearRegression()\n",
337 | "#then fit it\n",
338 | "lm.fit(x_training_set,y_training_set)"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": 14,
344 | "metadata": {},
345 | "outputs": [
346 | {
347 | "data": {
348 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3X2cXGV99/HPL5tBJoDZIEHJQggK\nBkFuEliBmuoNwduICqQoIkJFS4u1tRWx0WCpgMWS3vEW8OVTURQoCuHJiIIN1oAPqaAJCSJCFBES\nNgGCZAOShWw2v/uPc83m7Ow5Z87szuzMznzfr1de2Tlz5pxr5sxcv3M9m7sjIiJSbkKjEyAiIs1J\nAUJERBIpQIiISCIFCBERSaQAISIiiRQgREQkkQJEA5jZGWZ2Z6PTUWJmRTP7npltMbObGpiOq83s\nkjE+55/M7NVjec5Gq/ZzbsR1GSkzczM7MPz9VTP7lzE45wfM7Gd1OO5FZnZdrY9bjXEdIMzsfWa2\nMvzIN5rZD8zszxudrkrc/Vvu/tZGpyPm3cArgVe4+6nxJ8zs/5jZU2a2V2zby8zsITP70FgntNbc\nfXd3f7Ta15nZjJAZTRxtGpopA65XZhc7/t1m9mL4zT5jZrea2T71OJe7/627/2vONP11PdIw3o3b\nAGFm5wGXA/9GlLlNB74MnNzIdFVSiwylDvYHfuvu28ufcPcfAt8HrohtvgDYCFw5NsmTFvMRd98d\neC3QCVyWtJOZdYxpqmQ4dx93/4DJwJ+AUzP2eRlRANkQ/l0OvCw8dyzwBPAJ4GmizG4+8Hbgt8Cz\nwKdix7oIuBlYAjwP3AccHnt+IfD78NxvgL+IPfcBYAXRj+BZ4JKw7WfheQvPPQ1sAX4FvD72Pq8F\nNgGPE2XME2LH/RnwOWAz8AfghIzP43XA3UAv8CBwUth+MbAN6A+f6dkpn/cTwDuA14fzvSblPA8B\n74w9ngg8AxwRHt8EPBne60+AQ2P7Xg1cEn9/Zcd24MDY9f0csA54CvgqUAzP7UUU1HrDZ/7T0ueW\nkN74Ma8GvgTcHq7lvRnvc1147Z/Cvz8L2/8qfAabgWXA/lnXGTgnfPbbwnG+l3K+K4D1wHPAKuBN\nZd/PG8N35flwfbtjz88m+s4+T/QdvqH0OSd8R14EBkJaevN8LsDBwA/DZ70WeE/G9/Bu4K9jj/8e\n+HXsPF8B7gBeAN6SdZ3DaxYQ/X43hM++/HpeEtv3ZGBN+Ax/D7wN+Gx4vy+G9/zFSu8JeAVwWzjO\nL4B/pey7Gtv3v4gCYnzb/cApOa/rdfE8q+w4jwFvCX9PYGc+9MfwfdgzPLcrcF3Y3gv8Enhlrry2\n2sy5Gf6FC7sdmJixz2eAe4C9ganA/wD/GvuwtwOfBgrA3xBlwt8G9gAODV+YV8cuVD9RVUwB+Cei\nDLkQnj8VmBYu0mnhy71PeO4D4Vz/QJRZFhkaIOaFL0YnUSbyuthrrwW+G9I0gyh4nR07bn9Iewfw\nYaIfiSV8FgXgEeBTwC7AXKIf+szyL2LG53li+CL/Ajg3Y79PA9+KPX4H8HDs8V+F91MK4Gtiz11N\n/gBxOdGPdM9wvO8Bl4bnLiXKSArh35uSPpeEY15NlCEcFa7Vt4AbUl43I7x2Ymzb/PA5vy68/gLg\nf3Jc58H3nfG5nkmUMU0EPk4UZHeNXb8XiW5wOsL7vyc8twvRzcXHwmfx7vC9STxfyuee+rkAu4Xv\nxQfDc0cQ3RAcmnL8uwkBgiiQLwf+M3aeLcAcot/SrhWu89uIgsbrQzq+nXA9S9+no8Kx/084dhdw\ncHma8rwnogB7Y9jv9UBP+WcWO9b7gRWxx4cQZdKlm9VK1zVvgDiXKL/bl+i39R/A9eG5D4XPbVL4\nfhwJvDxXXjvSTLqR/4AzgCcr7PN74O2xx/OAx2Ifdh/QER7vEb5YR8f2XwXMj12oe2LPTSC6a3lT\nyrnXACfHfnDr0n6ERJn1b4FjiN3lhgv5EnBIbNuHgLtjx3gk9tyk8B5elZCeN4UvXvz41wMXlX8R\nK3ymNwErSbkbD/scSBR8JoXH3wI+nbJvZ0jz5PD4anIECKIM9gWG3sX+GfCH8PdniALrgTneU3mG\n8vXYc28nFtzKXjeD4QHiB8RKYOF7spWoCi/xOpe/7yp+A5sJpdhw/f479twhQF/4+82U3TgQ3SxV\nGyASPxeiG6Kflu3/H8CFKce/O3wmvUQZ67eAqbHzXBvbt9J1/gawKPbcaxOu5yWxNF2WkaZ4gEh9\nT0S/y35CcAnP/Vv5ZxZ7bo/wHvYPjz8LfKOK65o3QDwEHB97bp+QzolEN2X/A/yvar5j7j5u2yD+\nCOxVoT5/GtGdU8njYdvgMdx9IPzdF/5/KvZ8H7B77PH60h/uvoOoymUagJm938zWmFmvmfUS3VXs\nlfTacu6+HPgiURH+KTO70sxeHl5fuvuLv4eu2OMnY8fZGv6Mp7lkGrA+pDvtWHk8SJQx7Ejbwd0f\nIfqynmhmk4CTiO7sMLMOM1tkZr83s+eIvuAw9LPKYypRQFwV+8z/K2wHWEx0J3+nmT1qZgurOPaT\nsb+3kvx5ptkfuCKWpmeJMrmujOuci5l9PHQM2BKOPZmhn1t5uncNv49pQI+HXCOIf6fySvtc9geO\nLr3nkLYzgFdlHOsf3b3T3bvc/Qx33xR7Lv5bqXSdp5Xtn/W+9iO6acwj6z1NJcp0c53X3Z8nqpp7\nb9j0XqKgCOS6rnntD3wnlt6HiKrOXgn8J1F15w1mtsHM/q+ZFfIcdLwGiJ8TFannZ+yzgehDK5ke\nto3UfqU/zGwCUVFug5ntD3wN+AhRL6BO4NdEGUNJ/Mc5jLt/wd2PJKraei1RveozRHcA5e+hZwRp\n3wDsF9I92mPlcT1wOlGd729C0AB4X9j2FqIfwoyw3coPQHTXNan0wMziGc4zRAH80JDRdLr7ZI8a\nPnH359394+7+aqKqsfPM7PiavbtI0jVdD3wolqZOdy+6+/+EdCVd57RjDTKzNwGfBN4DTAnfsS0k\nf27lNgJdZhbfd3qV7yvLeuDHZe95d3f/cJXHSTp/5nUmem/7xfbPel/rgdfkOGdp37T3tImoyjjv\neSH8Hszsz4iqmO+Cqq9r+e+hg52BspTmE8rSvKu797h7v7tf7O6HAG8E3klU9VXRuAwQ7r6FqK77\nS2Y238wmmVnBzE4ws/8bdrseuMDMpoYump8maqgZqSPN7JRwV3YuUfXPPUT1kE70xcHMPkhUgsjF\nzN5gZkeHiP4CoZEwlG5uBD5rZnuEQHTeCN/DveHYnwif07FEGecNIzhWHjcAbyVqF/l2bPseRJ/b\nH4m+7P+WcYz7gUPNbJaZ7UpU3AYGS3BfAy4zs70BzKzLzOaFv99pZgeGTPE5ojupgfITjNImYAcQ\nH0PxVeB8Mzs0pGOymZ0a/k68zuF1T5Udp9weRJnSJmCimX0ayFv6+Hl47T+a2UQzO4WoPj7NU8C+\nZrZLzuN/H3itmf1l+G4Vwnt9Xc7Xp6p0nYl+Hx8ws0NCafXCjMNdBXzQzI43swnhOAeH58o//9T3\nFH6XtwIXhXznEOCsCm/lDqIbvc8AS2Il8Gqu62+JSoXvCN+hC4jaGkq+SpRX7A8Q8r2Tw9/Hmdlh\nIag8R3Tjmev3MC4DBIC7f54ow7yA6ANeT3QXvzTscglRffmvgAeIenGMpq/5d4nqJjcDf0nUC6Hf\n3X8D/D+iH+JTwGFEvZbyejnRj2AzUVH1j0S9NiBq2H4BeJSox9K3iepdq+Lu24iqek4guiv7MvB+\nd3+42mPlPN9Gos/jjUS9ZkquJXqPPUS9ve7JOMZviX5Q/w38juj9x32SqBrpnlBd9d/AzPDcQeHx\nn0I6vuzud4/qTQ1P31ai+uQVoVh/jLt/B/h3oqL8c0QlyRPCS7Ku81XAIeE4SxluGVH7xm/Da18k\no9qyLJ3bgFOI2hY2E32Hb814yXKiqsQnzeyZHMd/nuhm4L1EJdUniT6Dl2W9rgqp19ndf0DUiL08\n7LM8I52/IGp0vozoLv3H7CydXwG828w2m9kXcrynjxBVsT1J1M7xzaw34O4vEX3mb2HoDVPu6xpu\niv8O+DrR7+cFomrukiuIGvPvNLPniX5bR4fnXkXUC/M5oqqnH5PzRtOGVk1KEjO7iKjh68xGp0VE\nZKyM2xKEiIjUlwKEiIgkUhWTiIgkUglCREQSNePEcbnttddePmPGjEYnQ0RkXFm1atUz7j610n7j\nOkDMmDGDlStXNjoZIiLjipnlGk2vKiYREUmkACEiIokUIEREJJEChIiIJFKAEBGRROO6F5OISLtZ\nurqHxcvWsqG3j2mdRRbMm8n82dUu7ZKPAoSIyDixdHUPC266n/4d0QwYPb19LLjpfoC6BAlVMYmI\njBMX3fbgYHAo6d/hXHTbg3U5nwKEiMg40dvXX9X20VKAEBGRRAoQIiLjxG67dFS1fbQUIERExolC\nR3KWnbZ9tBQgRETGiS0pbQ1p20dL3VxFRJpEpTEO0zqL9PT2DXvdtM5iXdKjEoSISBNYurqH8299\ngJ7ePpxojMP5tz7A0tU9g/ssmDeTYmFoe0Ox0MGCeTPrkqa6BQgz+4aZPW1mv45t29PMfmhmvwv/\nTwnbzcy+YGaPmNmvzOyIeqVLRKQZLV62lr7+gSHb+voHWLxs7eDj+bO7uPSUw+jqLGJAV2eRS085\nbFyOpL4a+CJwbWzbQuBH7r7IzBaGx58ETgAOCv+OBr4S/hcRGZeqnRJjQ0LVUWl7/FiTiwXM6pXq\noepWgnD3nwDPlm0+Gbgm/H0NMD+2/VqP3AN0mtk+9UqbiEg95akuKpfWjjC5WBhyrN6+fjZv7c99\n3NEY6zaIV7r7RoDw/95hexewPrbfE2HbMGZ2jpmtNLOVmzZtqmtiRURGIk91Ubm09gUzhh2rmuOO\nRrM0UicVmDxhG+5+pbt3u3v31KkV19wWEam5pat7mLNoOQcsvJ05i5YPu4PPqi5Kk9a+0Lu1chfW\nrOOOxlh3c33KzPZx942hCunpsP0JYL/YfvsCG8Y4bSIiFZWqj0p39aVqHtg5o2pWd9Sk9oTerf1M\n6ywy4xVFntzyIg48ueVFVj7+LJ2TCmyuECRapZvrbcBZ4e+zgO/Gtr8/9GY6BthSqooSEWkmeaqP\nkqqLjCiYnLtkTWp7worfP8uAR5UnA+5cd886nqswCK6e3VzrVoIws+uBY4G9zOwJ4EJgEXCjmZ0N\nrANODbvfAbwdeATYCnywXukSERmNPNVHpZLE4mVrB0sSiXXmOQxkvLBrvC4Y5O6npzx1fMK+Dvx9\nvdIiIlIreUczz5/dxfzZXcz+zJ0Vq4hGasXCuXU5bomm2hARqcKCeTOHrOpW0tPbx6yL7xxsU+ic\nVMC9fms1dBYLdTlunAKEiEi1UgaqxYNBvUoNAIUJxkUnHVq345coQIiIVGHxsrX0ZzUM1Fm92x3i\nFCBERKpQrzEHeXR1Fuve7hDXLAPlRETGhXqNOYi7/LRZYzpraxqVIEREcigNcEvqwVQtI73b65RJ\nhSHdZPNO9lcPChAiIhUsXd3Dgpvvr9j2YMCkXTp4YVv63EmlNgRg2DELHcaFJ0aNz6Vuso2kACEi\nUsHF33swV8O0Q2ZwMIaPXWh0KSGLAoSISEzSOg616rJaPhdTMwaFOAUIEZEgayK+0SoWOjju4KkV\nJ/prJurFJCISpE3EN1qlqbvvenhT1etENJIChIhIUI8xDlMmFVixcC7zZ3eNaJ2IRlIVk4i0nJHW\n86dNxJfVLbUSj70w70R/zUIlCBFpKSNZD7rkuIOTV6kczcQaW2LzM6UtKzrWA+DyUoAQkZYykvWg\nS+56uPbr3MdLB2nLijZjAzWoiklEWsxo6vlr3RaQVDpohgFweakEISItJa0+P089fy3bApq9dJCH\nAoSItJTR1PMnvTaLEU2sF68yuvy0WTy26B2DPZfGM1UxiUhLmT+7i5WPP8v1965nwJ0OM951ZFSt\nU6l3U9Ja0lmmdRbHVZVRtRQgRKSlLF3dwy2rehgI/UsH3LllVdSD6ZZVPRVHMZcy/EoT9DVz76Na\nUYAQkZaS1oupVKIo37542dohASJeyphcLAxbY3pLX3/Tz6FUKwoQItJS0noilQeHpP3L52Lq7eun\nWOjgstNmtXwwSKIAISItJXU0tA0d1VzSOamQuRhQUimjXagXk4i0lLReTMWJydndi/0DgyOv0zTr\nXEn1pgAhIi0lbbRyX/+OxP37+ndUnLG1WedKqjdVMYlI06vFIjtpVU+VtENvpTQKECLSlOLtAvHZ\nVCstspO26M+7juwa0s0Vosx/18KE1BXjutqkt1KahlQxmdnHzOxBM/u1mV1vZrua2QFmdq+Z/c7M\nlpjZLo1Im4g0XnxGVhg+m2rW5Htp3VzvenhTYtXThScemthmcflps1piNPRojHkJwsy6gH8EDnH3\nPjO7EXgv8HbgMne/wcy+CpwNfGWs0ycijZeUyZerdlK+Db19maOex8s60WOpUVVME4GimfUDk4CN\nwFzgfeH5a4CLUIAQaStZ3U3LTessJrZNjGRRnlaeLmM0xjxAuHuPmX0OWAf0AXcCq4Bed98ednsC\n0NUSaSPlbQdZioUOjjt46pCpMHp6+zh3yRpg+Apw7dzQPBqNqGKaApwMHAD0AjcBJyTsmjjs0czO\nAc4BmD59ep1SKSKjVW3PozzVShBl/n39A1x3z7rUfZydQaLdG5pHoxFVTG8B/uDumwDM7FbgjUCn\nmU0MpYh9gQ1JL3b3K4ErAbq7u0ezEqCI1ElaTyJI7nlU2iePvD/6UnBYsXBuzldIuUb0YloHHGNm\nk8zMgOOB3wB3Ae8O+5wFfLcBaRORHJau7mHOouUcsPB25ixaPmy955Es+9lhVvN0tusI6FoZ8wDh\n7vcCNwP3AQ+ENFwJfBI4z8weAV4BXDXWaRORyuJdUJ2dpYN4kBjJsp9pk+mNRruOgK6VhvRicvcL\ngQvLNj8KHNWA5IhIFbJKB6Xqo7SeRA7MWbSc4w6eyl0PbxrSPtE1wpHOadQwPXqai0lEqpKndJC1\ndGdPbx/X3bNuWAnkuIOnUpgwtJqpMME485jpuZYB7SwWmDKpMGQQnBqmR0dTbYhIVfKMMyhf9rOS\nvv4Bbv/VxqjrUZxB9/570r3/noM9oibt0sHWbQM4UbvF6UfvxyXzDxvlu5IkChAiUpUF82ay4Kb7\n6d+xM+MvTLAh1Tnly37mkTQfUv+As3jZ2iHH7py0C5/9C3VbHQsKECJSvYQ7/bi8YxryKFVBVeoy\nW4sZX2UotUGISFUWL1s7OHq5pHSnX1LL7qUdZhW7zObpWSXVU4AQkarkaaSuVffSYqEj11rSIxl3\nIZUpQIhIVdIy//j2rF5MeZV6InXlON9Ixl1IZQoQIlKVtDWf4w3J8WU/R6q0FkOe8+UJWlI9BQgR\nqcr82V2868iuwakxOsx415HDp8ueP7uLFQvnDmvPzqOrrMts0kI/8fPlCSJSPfViEpFc0pYAHXDn\nllU9dO+/JzB84Z3JxQK9fclLeiZJytgrrddQek69mGrLvA7zn4yV7u5uX7lyZaOTIdLy8qzV0Fks\n8NL2HcPWfN7hzkvbdwzbv1Sy6JxUwB229PUrYx8jZrbK3bsr7acShIgMkTSeIM+4hqRSQtZrHHhs\n0TtGm1ypIwUIEUmtPiofpCbtRQFCpM2VVx+VVzrXKzh0Fgt1Oa7UjgKESJu6YOkDuSfTq7XCBOOi\nkw4d8/NKdRQgRNrQBUsfyFzTuZ60RvT4oQAh0oauv3d9w86tNaLHDw2UE2lDjahWgvqsOy31owAh\n0oYalVGffvR+DTmvjIwChEgbqlVGnRVoioUJlFYQ7bBo6VCt/Da+qA1CpA1dMv8w/rDpT6z4/bOj\nOs4xr57Cfeu2DBs9rfWgW4NKECJtaOnqHu5btyXXvlmVUY/9sa/iRHoyfqkEIdIi0pbcjG8vzXuU\nd/K8YqEjc6Dcht6+ihPpyfilACHSAspHQ5emyFj5+LPcsqpncPvmrflnVU1a6rOc1ltobQoQIuNY\nfA6lcn39AyMeKV2p5FDaR+sttDa1QYiMU6VSQ1JwKBlJcKi01Gd8H1UttTaVIESaVFqbQkmeKbgn\nGOyoMkbERzqXz+SqHkrtRQFCpAmltSnAztXTskoOJR1VBoj4uAat0iYVA4SZneLut1baVg0z6wS+\nDryeaHbhvwLWAkuAGcBjwHvcffNIzyHSzOKlg8nFAmbQu7U/c4Gevv4BFi9bO5hBd5hVrELqH76Q\nW6byAXTqodTe8rRBXJCw7Z9Hed4rgP9y94OBw4GHgIXAj9z9IOBH4bFIy4m3HThRl9PNW/txdpYU\n0koHG2LbazmfkkY6S5LUEoSZzQPeBnSZ2edjT70cqPK+ZMhxXw68GfgAgLtvA7aZ2cnAsWG3a4C7\ngU+O9DwizapS20Ff/0Bq6SDerXTKpEJV3VaTFDqMxe8+XKUESZRVxfQ08GvgReDB2PbnGd3d/auB\nTcA3zexwYBXwUeCV7r4RwN03mtneSS82s3OAcwCmT58+imSINMaGHG0HA+7DupqWdyt9sRYrvTVm\nUlcZJ1KrmNx9tbtfBcwE/hP4sbtf5e43uvszozjnROAI4CvuPht4gSoCjrtf6e7d7t49derUUSRD\npDHyDC7rMEudwmLp6h7mLFpOX7UNDAn6dziLl60d9XGkNeVpgzgeeAD4IYCZzTKz74zinE8AT7j7\nveHxzUQB4ykz2yecYx+iEoxIy1kwbybFQkfmPmntC0tX97Dg5vtz9WDKK0+JRtpTnm6unwGOBu4C\ncPc1ZnbgSE/o7k+a2Xozm+nua4kC0G/Cv7OAReH/7470HCLNImssQ9oIaIDOYiGxm6vh9A/Utl5I\n02VImjwBot/de23ovO+j/Yb+A/AtM9sFeBT4IFFp5kYzOxtYB5w6ynOINFSlsQyl6qLywWhG8mR6\nlQbFlZx5TNQ2l2eaDU2XIVnyBIiHzOw9wAQzO4CoQfme0ZzU3dcA3QlPHT+a44o0kzxjGZJKE6O9\n+7rr4U2sWDh3sMtqpTEX6sEkafIEiI8Anybq2vodYBnwqXomSqQVpNXtl28vlSZmf+bOUXdbzTq+\nSLUqBgh3f4FoPMInzWwPd3++/skSGf+mdRYT2xjS6vxrERyyji9SrayBcv8M3OLuD4e2gu8DR5nZ\nS8Dp7r58rBIpMh4tmDczsX2hp7eP2Z+5E3fY0rezqqcW1KYgtZTVzfV9RPMjAbwf2BXYC5gLXFrn\ndImMe/Nndw2ZNtvY2b6weWs/vX07p9f42JI1oz6fpuCWWssKENvcB7tAvA34trtvd/cHgUL9kyYy\n/s2f3cWKhXPp6ixmNj6PtmH6zGOms2LhXAUHqamsNoiXzOx1RAPW5gKfiD03qa6pEmkSeXoAVVq3\nAeo/GO2uhzfV9fjSnrICxMeB24iqla5w90cBzOztwK/GIG0iDVU+RiE+NqGnt48FN98/bM3npHUb\nACYXC4ljG2pFo6GlHrLmYlrh7ge5+xR3vyi2/Q53f8+YpE6kgSrNuto/4Hz73nWJYx3OXbKG15x/\nBxcsjYLF0HGmtaeeS1IPWlFOJEWeu/Ks1doG3LnunnVAVC1VL+q5JPWSZ7I+kbZUq7vy6+9dX7c7\nfPVcknqqGCDMbFgpI2mbSKs57uDaTCc/4J5rBtdqdXUW1XNJ6ipPRv8Loum4K20TGVcq9T6qVc+g\nDrNcM7hWQ9VKMhayRlLvDewDFM3sMKJxPhAtOapurtLUKmX+lWZahdr1DDr96P0Gjzt/dhcHLLw9\nddxDxwRjIKthg6jkoEn2ZCxkVTG9A/gisC/wpdi/TwH/Uv+kiYxMKfPv6e0bHKl8/q0PsHR1z+A+\nWTOtlkwuJo8HTeuQ1NVZ5MxjptMRuix1mHHmMdMHZ1UtSWuP6Oos8v9OPXxw5HXaPqpWkrGSWoJw\n928SrRv9Hne/cQzTJDIipVJDUhVO+TTbeWZaTeuaOmmXDrZt30F/7E6/MMEG7+rLA0K5pDmaSlVG\nWetEqFpJxlqeNoi9zezl7v6cmX2VqO3hfHf/UZ3TJpJbUoZaLp7555lpNW121Re2JZwjJZhUWlEu\nrQoszz4i9ZYnQJzj7l80s7cSVTd9GLgSOLKuKROpQqVBbTA088+aaXXOouUsmDeTCZY9ziGuf8CH\nlFAg34pyWbSOgzRannEQpZ/ICcA33X1VzteJjJlKDcrxzH/p6p7MmVZLGXne4JCWhjztHCLNLE9G\nf7+Z3QGcCPzAzHZn9JNPitRU1kC0pMy/FCTSZlrNu/5zVhryrign0qzyBIgPAhcBR7n7VqJ1Ic6u\nZ6JEqpU0EK1Y6GDKpEJi5h+/i69Fhp3UgJwWtDRvkowXFQOEuw8AryZqewAo5nmdyFiKVxkZO6eg\nSJsDaUNvH0tX9zBn0fIRF4dL7dJp012kBS31RJLxwnauCZSyg9kXiRYIerO7v87M9gSWufsbxiKB\nWbq7u33lypWNToY0sTmLlif2VuosFnhp+44RVSVB/sFqedaKEBlrZrbK3bsr7ZenF9Mb3f0IM1sN\n4O7PhjWqRZpe2pgDs+rbGUYyglk9kWQ8yxMg+s1sAqGdz8xeAeyoa6pEaqR8PEFpVbi0MQ5pSiOY\nRdpJ1lxME919O9H0GrcAU83sYuA9wMVjlD4RYHRVNfHRyefduCaz+6oZTJxg9A/ERkl3mNoNpC1l\nlSB+ARzh7tea2SrgLUTtcqe6+6/HJHUi5JtYL+k15QHlU7f+quLYBneGd+JWp25pU1m9kQYnD3D3\nB939Cne/XMFBxlq1A87SJuvb2l+5ZrTDbMgcSwD9O1yD26QtZZUgpprZeWlPuvvnR3NiM+sAVgI9\n7v5OMzsAuAHYE7gP+Et33zaac0hrSBun0NPbx2vOv4PTj95vyAR5aQGlkmKhI3U/DW6TdpRVgugA\ndgf2SPk3Wh8FHoo9/nfgMnc/CNiMBuNJkDbtNuxc9/mCpQ8MbhtJZl4ay5A21bYGt0k7yipBbHT3\nz9TjpGa2L9F6E58FzjMzA+YC7wu7XEM0evsr9Ti/jC9p027HXX/v+sFSRNpMrWnK12zQNNsikVxt\nEHVwOfAJdnaXfQXQG3pNATzJrCfBAAATgUlEQVQBJLY+mtk5ZrbSzFZu2lSbJSGluaWNho4bcB+c\niG/BvJkUOvJ9fScYdO+/5+DjtBHZGssg7SirBHF8PU5oZu8Ennb3VWZ2bGlzwq6JfUfc/Uqi6cbp\n7u5W/5Jxqppuq5OLBXr7KgeJnt4+PrZkTVWdjnY4w6bp1uA2kUjWinLP1umcc4CTzOztRBP/vZyo\nRNEZG3uxL7ChTueXBqu222qeKqaSkdwxlOZl0pQYIkON+aR77n6+u+/r7jOA9wLL3f0M4C7g3WG3\ns4DvjnXaZGxU2201TxXTaEwuFiquYS3SjpppVtZPEjVYP0LUJnFVg9MjdZLVbbXUjhBXzx5EafMy\naWEfkQYHCHe/293fGf5+1N2PcvcD3f1Ud3+pkWmT+snqtpp09540bfZolE/TnTUluEg7yzNZn0hN\nVWpTiN+9xyfZ27UwoepJ9iCa2tssqqpKal9YvGxtYrdYjX2QdqcAIWMuT5tCT28f5y5Zs/M1ff25\nu67G5ZmFNW1KcI19kHbXTG0Q0iZGemfeP+BV9WjKm8lr7INIMpUgZMwtmDeTBTffP2RK7byyFkDc\nbZcOtm4bwIkm3XvXkfnHM2jsg8hwChBSE1WPI6jxEMcpkwq82L9j8LAD7tyyqofu/fdUxi8yQqpi\nklFLm147bRzB4mVrh02pPVqbt/arq6pIjSlAyKjlHfi2dHUPcxYtz5xIL6uJoTDBmDIpvYtsEnVV\nFRk5BQgZtbRMOL596eoeFtx8f2Zw6OosctlpswYbi6dMKkRdVMNzi089nAtPPHTYmIhioYPOlLEV\n6qoqMnJqg5BMaW0L8e0YiW0K8QFxF3/vwcxGaSOqmlq8bG2ueZDK0wSapluk1hQgJFXapHorH3+W\nW1b17MyMU/L9eJfUrAFu8fiSZ73prB5HmnBPpHYUICRVWtvC9feuZyCrv2mQd5K98iOV2i+qzdzV\nVVWkttQGIanS2hbyBAcYWsWU1kZQ7blFZOwoQEiqtAbejpzDmeO7XXTSoRQm5B8GrcZlkcZTgJBU\nSbOoFgsdnH70frlmV41XMc2f3cXiUw+nK0fGr8ZlkeagACGp0uYoumT+YUO2p5ULyqf1nj+7ixUL\n52YGCc2DJNI81EgtmUbT8Ns/sCNxe9rsqQoMIs1FAUKqVt79Nc0L2waYdfGdqWsxqEuqSHNTgGhj\n8cFuk2OL6nROKuAOW/r6EwfHZY2GLtfbt7MdonyMgwKCSHNTgGhT5aWAeEYeH9SWOjhuhEY6xkFE\nxp4CRJtKGgSXpprBcXnkHeNQ9RTiIlJT6sXUpqodiFYpOFQz06oDcxYtT50OHKqfQlxEak8Bok1V\nOxAta3BchxmnHbUfqz/9Vi4/bVauMRJ51ozQ+g4ijaUA0WbiazLkHddcaXBcafW2pat7ho2d6CwW\nUksWWRl+ninERaS+1AbRRsobpp3UmboHdcXq/rv33zO1F1O88Tmph9IBC29PPE9ahj+ts5h4Hk3B\nITJ2VIJoA6VSw7lL1gyrtqnU7Lxi4dzBzL40Ejqt5JF1d5+WsadtT5vmQ1NwiIwdlSDGoTy9e+Jj\nFiqVEtJ0dRYTz9U5qZC4vkNnRiN12ujptAxfg+lEGk8BYpxJW8QHdmaqSVVJWTqLBV7avmNY5n3c\nwVNTzpV8xKyOTiPJ8DWYTqSxxjxAmNl+wLXAq4AdwJXufoWZ7QksAWYAjwHvcffNY52+ZpfVuyee\nCecd41AsdHDRSYcOvi6eeaedK82WvuwFgpThi4wvjShBbAc+7u73mdkewCoz+yHwAeBH7r7IzBYC\nC4FPNiB9TS1P7568PX06zHjXkV1D2hjiPrZkTVVpUwOySGsZ80Zqd9/o7veFv58HHgK6gJOBa8Ju\n1wDzxzpt40Gext6stoC4AXeuu2cdMxbenjhwLe1cUyYV1IAs0gYa2ovJzGYAs4F7gVe6+0aIggiw\nd8przjGzlWa2ctOmTWOV1KaRp3fPSGbE6Ont42NL1gwJFmnnuvDEQxPXiVD1kUhrMa/R/DpVn9hs\nd+DHwGfd/VYz63X3ztjzm919StYxuru7feXKlfVOatNJ68U0ktlW05TWZwD1JBJpNWa2yt27K+7X\niABhZgXg+8Ayd/982LYWONbdN5rZPsDd7p5ZZ9GuASJJ3jUaqtHVWWTFwrk1O56INIe8AaIRvZgM\nuAp4qBQcgtuAs4BF4f/vjnXamkG8dJC2LsMZX/s5K37/7OBr5rxmTx77Y19NgwNoWguRdjfmJQgz\n+3Pgp8ADRN1cAT5F1A5xIzAdWAec6u7PJh4kaLUSRKVSQLHQwb5TduV3T78wJulRCUKkNTVtCcLd\nf0b6OvfHj2Vamk2l8Qt9/QN1Cw7lo63VK0lENBdTExltlU6eabbTXHbaLPVKEpEhNNVGE0mbwTSv\nXQsTeNnECUOWD82jw0yjnEVkGJUgmsiCeTMpdORdpWG4zVv7eWn7DnbbJbkk8bKJyZf79KP3G/E5\nRaR1KUA0m1H2GejrH+CFbcntGNu27+DMY6YPrg7XYcaZx0znkvmHje6kItKSVMXURBYvW0v/jvr1\nKpvWWeSS+YcpIIhILgoQTaCWI6Ah6pG0a6Ej99oLIiJJFCBGIc/CPXmOcd6SNYMDQmrBgUtPOUxT\nZIjIqChAjFCehXvyOP/WX9U0OEDUTVW9kkRktBQgRijPwj1QuZTR11/b8KCqJBGpFQWIEUob1NbT\n28ecRcsHM+lalDLy6lJVkojUkALECGUNaisFgpdNnFCxlDHBoBYdlww0b5KI1JTGQYxQ0mI6cX39\nA6kjmuOlj/cdPT3X+SoNn9NynyJSaypBVFDehnDcwVO56+FNbOjtY3KxwK6FCWzeWt3UFtM6i1V3\nbc0qZKjdQUTqQQEiQ1JPpevuWTf4fG9fP8VCR1XVRMVCB8cdPLVmi/uo3UFE6kUBIkOl6beBqjL5\nUmae57hxZsnrTGu9BhGpJ7VBZKjVyGbYmZnPn91V1bTehQ7jjKOnD2vvULWSiNSbAkSG0qR2lXQW\nC5kN1uWZed4G5a7OIovffTiXzD+MS085TOs1iMiYUhVThoEcy7EWJhgXnXQowGBj9uRiATPo3dqf\nODhuwbyZFdsgyrutamS0iIy1tg4QlUY577ZLR+rU2YNCISOegZeO25vSu6m0X1YvJnVbFZFGa9sq\nplIPpZ7ePpydg9uWru4Z3GdrpeAA9A84i5etreq4EAWJFQvncvlps9S+ICJNqW0DRNZcSiV5BzjH\nG53zHDdu/uwutS+ISFNq2yqmtJ5E8e0dZrnaIeLVQXmOW07tCyLSjNq2BJFWxx/fnmet5rw9lNSm\nICLjTdsGiKS5lMoz+0vmH8ac1+w5ZJ+D9t5tsDqoM0y18bEla5izaDlLV/fkOq6IyHjQtgEiT93/\n0tU93Lduy5DXPbH5RRbMm8llp83ipe072Ly1f0hjNKA2BRFpCeY56tibVXd3t69cubJux5+zaHli\nN9SuUF2U9pymvxCRZmZmq9y9u9J+bdtInUfWokDVvkZEZLxpqiomM3ubma01s0fMbGG9z7d0dQ9z\nFi3ngIW3D7YhxI2kYVmN0SLSKpomQJhZB/Al4ATgEOB0MzukXufLM6Ct0qJA5dQYLSKtpJmqmI4C\nHnH3RwHM7AbgZOA3tTxJ1kI95cuBxqfE2BACSRqtyyAiraaZAkQXsD72+Ang6FqeoHwBoCTlbQjx\nQWxZjdZqmBaRVtM0VUwkL7s87KbdzM4xs5VmtnLTpk1VnSDPQj0Oie0RkG/shIhIq2imAPEEEB+6\nvC+woXwnd7/S3bvdvXvq1KlVnSBvD6OsCfY0xkFE2kUzVTH9EjjIzA4AeoD3Au+r5QkmFwv09iVP\nwV2uvD2iRPMmiUi7aJoA4e7bzewjwDKgA/iGuz9Yy3PkXCBuUC2XHBURGW+aJkAAuPsdwB31Ov7m\nlAV80uRdclREpBU1UxtE3VWb4eeZ6ltEpFW1VYCoNsPv0qhoEWljbRUgqsnw1X1VRNpdWwWIpHEM\npUqnKZMKdBYL6r4qIhI0VSN1vZVPnTFN02OIiKRqqwABGscgIpJXW1UxiYhIfgoQIiKSSAFCREQS\nKUCIiEgiBQgREUlkPo6nkzCzTcDjozjEXsAzNUrOeNBu7xf0ntuF3nN19nf3iusljOsAMVpmttLd\nuxudjrHSbu8X9J7bhd5zfaiKSUREEilAiIhIonYPEFc2OgFjrN3eL+g9twu95zpo6zYIERFJ1+4l\nCBERSaEAISIiidoyQJjZ28xsrZk9YmYLG52eejCz/czsLjN7yMweNLOPhu17mtkPzex34f8pjU5r\nLZlZh5mtNrPvh8cHmNm94f0uMbNdGp3GWjOzTjO72cweDtf7z1r5OpvZx8J3+tdmdr2Z7dpq19nM\nvmFmT5vZr2PbEq+pRb4Q8rNfmdkRtUpH2wUIM+sAvgScABwCnG5mhzQ2VXWxHfi4u78OOAb4+/A+\nFwI/cveDgB+Fx63ko8BDscf/DlwW3u9m4OyGpKq+rgD+y90PBg4nev8teZ3NrAv4R6Db3V8PdADv\npfWu89XA28q2pV3TE4CDwr9zgK/UKhFtFyCAo4BH3P1Rd98G3ACc3OA01Zy7b3T3+8LfzxNlGl1E\n7/WasNs1wPzGpLD2zGxf4B3A18NjA+YCN4ddWur9ApjZy4E3A1cBuPs2d++lha8z0To2RTObCEwC\nNtJi19ndfwI8W7Y57ZqeDFzrkXuATjPbpxbpaMcA0QWsjz1+ImxrWWY2A5gN3Au80t03QhREgL0b\nl7Kauxz4BLAjPH4F0Ovu28PjVrzWrwY2Ad8MVWtfN7PdaNHr7O49wOeAdUSBYQuwita/zpB+TeuW\np7VjgLCEbS3b19fMdgduAc519+canZ56MbN3Ak+7+6r45oRdW+1aTwSOAL7i7rOBF2iR6qQkod79\nZOAAYBqwG1EVS7lWu85Z6vY9b8cA8QSwX+zxvsCGBqWlrsysQBQcvuXut4bNT5WKn+H/pxuVvhqb\nA5xkZo8RVRvOJSpRdIaqCGjNa/0E8IS73xse30wUMFr1Or8F+IO7b3L3fuBW4I20/nWG9Gtatzyt\nHQPEL4GDQq+HXYgauG5rcJpqLtS/XwU85O6fjz11G3BW+Pss4LtjnbZ6cPfz3X1fd59BdE2Xu/sZ\nwF3Au8NuLfN+S9z9SWC9mc0Mm44HfkOLXmeiqqVjzGxS+I6X3m9LX+cg7ZreBrw/9GY6BthSqooa\nrbYcSW1mbye6u+wAvuHun21wkmrOzP4c+CnwADvr5D9F1A5xIzCd6Md2qruXN4aNa2Z2LPBP7v5O\nM3s1UYliT2A1cKa7v9TI9NWamc0iapjfBXgU+CDRzV9LXmczuxg4jain3mrgr4nq3FvmOpvZ9cCx\nRFN6PwVcCCwl4ZqGQPlFol5PW4EPuvvKmqSjHQOEiIhU1o5VTCIikoMChIiIJFKAEBGRRAoQIiKS\nSAFCREQSKUDIuGRmA2a2JszoeZOZTRrFsY6Nzf56UtYMv2Hm1L8bwTkuMrN/Gmkaa30ckTwUIGS8\n6nP3WWFGz23A38afDIOGqv5+u/tt7r4oY5dOoOoAITIeKUBIK/gpcKCZzQjrIXwZuA/Yz8zeamY/\nN7P7QkljdxhcE+RhM/sZcErpQGb2ATP7Yvj7lWb2HTO7P/x7I7AIeE0ovSwO+y0ws1+Gufgvjh3r\nny1ad+S/gZmUMbPJZvZYKZCF0cHrzaxgZn8Tjnm/md2SVEIys7vNrDv8vVeYZqS0JsbiWJo+FLbv\nY2Y/iZW83lSLD19alwKEjGth/p0TiEaMQ5QRXxubuO4C4C3ufgSwEjjPzHYFvgacCLwJeFXK4b8A\n/NjdDyea3+hBoonwfh9KLwvM7K1E8/AfBcwCjjSzN5vZkURTfswmCkBvKD+4u28B7gf+d9h0IrCs\nNMeQu78hnPshqlvf4Gyi6RbeEM77N2Z2APC+cPxZROtGrKnimNKGJlbeRaQpFc2slMH9lGjeqWnA\n42FOfIgWSjoEWBHNRsAuwM+Bg4kmfPsdgJldR7TQSrm5wPsB3H0A2GLDV2Z7a/i3OjzenShg7AF8\nx923hnOkzfe1hGjaiLuIAsqXw/bXm9klRFVauwPLsj6MhDT9LzMrzU00OaTpl8A3wiSOS91dAUIy\nKUDIeNUX7oQHhSDwQnwT8EN3P71sv1nUbjpoAy519/8oO8e5Oc9xG3Cpme0JHAksD9uvBua7+/1m\n9gGieXnKbWdnLcCuZWn6B3cfFlTM7M1Eiyr9p5ktdvdrc6RR2pSqmKSV3QPMMbMDYbCO/7XAw8AB\nZvaasN/pKa//EfDh8NoOi1Zve56odFCyDPirWNtGl5ntDfwE+AszK5rZHkTVR8O4+5+AXxAtG/r9\nUFIhnGNjuNs/IyV9jxEFFdg5k2kpTR8Or8XMXmtmu5nZ/kRrZnyNqMRVs7WLpTWpBCEty903hbvv\n683sZWHzBe7+WzM7B7jdzJ4Bfga8PuEQHwWuNLOzgQHgw+7+czNbYdFi8j8I7RCvA34eSjB/IppJ\n9D4zW0JUz/84UTVYmiXATQwtJfwL0cy7jxO1r+wx/GV8DrjRzP6SnSUPiGZ2nQHcF2b63ES0POWx\nwAIz6w/pfH9GmkQ0m6uIiCRTFZOIiCRSgBARkUQKECIikkgBQkREEilAiIhIIgUIERFJpAAhIiKJ\n/j9qBqvLroC2NwAAAABJRU5ErkJggg==\n",
349 | "text/plain": [
350 | ""
351 | ]
352 | },
353 | "metadata": {},
354 | "output_type": "display_data"
355 | }
356 | ],
357 | "source": [
358 | "#we can now run the model with the test data\n",
359 | "y_predicted = lm.predict(x_test_set)\n",
360 | "\n",
361 | "plt.title('Comparison of Y values in test and the Predicted values')\n",
362 | "plt.ylabel('Test Set')\n",
363 | "plt.xlabel('Predicted values')\n",
364 | "plt.scatter(y_predicted, y_test_set)\n",
365 | "plt.show()"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "metadata": {},
371 | "source": [
372 | "As expected we get a really good fit!"
373 | ]
374 | }
375 | ],
376 | "metadata": {
377 | "kernelspec": {
378 | "display_name": "Python 3",
379 | "language": "python",
380 | "name": "python3"
381 | },
382 | "language_info": {
383 | "codemirror_mode": {
384 | "name": "ipython",
385 | "version": 3
386 | },
387 | "file_extension": ".py",
388 | "mimetype": "text/x-python",
389 | "name": "python",
390 | "nbconvert_exporter": "python",
391 | "pygments_lexer": "ipython3",
392 | "version": "3.6.3"
393 | }
394 | },
395 | "nbformat": 4,
396 | "nbformat_minor": 2
397 | }
398 |
--------------------------------------------------------------------------------
/data/linear_regression_test.csv:
--------------------------------------------------------------------------------
1 | x,y
2 | 77,79.77515201
3 | 21,23.17727887
4 | 22,25.60926156
5 | 20,17.85738813
6 | 36,41.84986439
7 | 15,9.805234876
8 | 62,58.87465933
9 | 95,97.61793701
10 | 20,18.39512747
11 | 5,8.746747654
12 | 4,2.811415826
13 | 19,17.09537241
14 | 96,95.14907176
15 | 62,61.38800663
16 | 36,40.24701716
17 | 15,14.82248589
18 | 65,66.95806869
19 | 14,16.63507984
20 | 87,90.65513736
21 | 69,77.22982636
22 | 89,92.11906278
23 | 51,46.91387709
24 | 89,89.82634442
25 | 27,21.71380347
26 | 97,97.41206981
27 | 58,57.01631363
28 | 79,78.31056542
29 | 21,19.1315097
30 | 93,93.03483388
31 | 27,26.59112396
32 | 99,97.55155344
33 | 31,31.43524822
34 | 33,35.12724777
35 | 80,78.61042432
36 | 28,33.07112825
37 | 47,51.69967172
38 | 53,53.62235225
39 | 69,69.46306072
40 | 28,27.42497237
41 | 33,36.34644189
42 | 91,95.06140858
43 | 71,68.16724757
44 | 50,50.96155532
45 | 76,78.04237454
46 | 4,5.607664865
47 | 37,36.11334779
48 | 70,67.2352155
49 | 68,65.01324035
50 | 40,38.14753871
51 | 35,34.31141446
52 | 94,95.28503937
53 | 88,87.84749912
54 | 52,54.08170635
55 | 31,31.93063515
56 | 59,59.61247085
57 | 0,-1.040114209
58 | 39,47.49374765
59 | 64,62.60089773
60 | 69,70.9146434
61 | 57,56.14834113
62 | 13,14.05572877
63 | 72,68.11367147
64 | 76,75.59701346
65 | 61,59.225745
66 | 82,85.45504157
67 | 18,17.76197116
68 | 41,38.68888682
69 | 50,50.96343637
70 | 55,51.83503872
71 | 13,17.0761107
72 | 46,46.56141773
73 | 13,10.34754461
74 | 79,77.91032969
75 | 53,50.17008622
76 | 15,13.25690647
77 | 28,31.32274932
78 | 81,73.9308764
79 | 69,74.45114379
80 | 52,52.01932286
81 | 84,83.68820499
82 | 68,70.3698748
83 | 27,23.44479161
84 | 56,49.83051801
85 | 48,49.88226593
86 | 40,41.04525583
87 | 39,33.37834391
88 | 82,81.29750133
89 | 100,105.5918375
90 | 59,56.82457013
91 | 43,48.67252645
92 | 67,67.02150613
93 | 38,38.43076389
94 | 63,58.61466887
95 | 91,89.12377509
96 | 60,60.9105427
97 | 14,13.83959878
98 | 21,16.89085185
99 | 87,84.06676818
100 | 73,70.34969772
101 | 32,33.38474138
102 | 2,-1.63296825
103 | 82,88.54475895
104 | 19,17.44047622
105 | 74,75.69298554
106 | 42,41.97607107
107 | 12,12.59244741
108 | 1,0.275307261
109 | 90,98.13258005
110 | 89,87.45721555
111 | 0,-2.344738542
112 | 41,39.3294153
113 | 16,16.68715211
114 | 94,96.58888601
115 | 97,97.70342201
116 | 66,67.01715955
117 | 24,25.63476257
118 | 17,13.41310757
119 | 90,95.15647284
120 | 13,9.744164258
121 | 0,-3.467883789
122 | 64,62.82816355
123 | 96,97.27405461
124 | 98,95.58017185
125 | 12,7.468501839
126 | 41,45.44599591
127 | 47,46.69013968
128 | 78,74.4993599
129 | 20,21.63500655
130 | 89,91.59548851
131 | 29,26.49487961
132 | 64,67.38654703
133 | 75,74.25362837
134 | 12,12.07991648
135 | 25,21.32273728
136 | 28,29.31770045
137 | 30,26.48713683
138 | 65,68.94699774
139 | 59,59.10598995
140 | 64,64.37521087
141 | 53,60.20758349
142 | 71,70.34329706
143 | 97,97.1082562
144 | 73,75.7584178
145 | 9,10.80462727
146 | 12,12.11219941
147 | 63,63.28312382
148 | 99,98.03017721
149 | 60,63.19354354
150 | 35,34.8534823
151 | 2,-2.819913974
152 | 60,59.8313966
153 | 32,29.38505024
154 | 94,97.00148372
155 | 84,85.18657275
156 | 63,61.74063192
157 | 22,18.84798163
158 | 81,78.79008525
159 | 93,95.12400481
160 | 33,30.48881287
161 | 7,10.41468095
162 | 42,38.98317436
163 | 46,46.11021062
164 | 54,52.45103628
165 | 16,21.16523945
166 | 49,52.28620611
167 | 43,44.18863945
168 | 95,97.13832018
169 | 66,67.22008001
170 | 21,18.98322306
171 | 35,24.3884599
172 | 80,79.44769523
173 | 37,40.03504862
174 | 54,53.32005764
175 | 56,54.55446979
176 | 1,-2.761182595
177 | 32,37.80182795
178 | 58,57.48741435
179 | 32,36.06292994
180 | 46,49.83538167
181 | 72,74.68953276
182 | 17,14.86159401
183 | 97,101.0697879
184 | 93,99.43577876
185 | 91,91.69240746
186 | 37,34.12473248
187 | 4,6.079390073
188 | 54,59.07247174
189 | 51,56.43046022
190 | 27,30.49412933
191 | 46,48.35172635
192 | 92,89.73153611
193 | 73,72.86282528
194 | 77,80.97144285
195 | 91,91.36566374
196 | 61,60.07137496
197 | 99,99.87382707
198 | 4,8.655714172
199 | 72,69.39858505
200 | 19,19.38780134
201 | 57,53.11628433
202 | 78,78.39683006
203 | 26,25.75612514
204 | 74,75.07484683
205 | 90,92.88772282
206 | 66,69.45498498
207 | 13,13.12109842
208 | 40,48.09843134
209 | 77,79.3142548
210 | 67,68.48820749
211 | 75,73.2300846
212 | 23,24.68362712
213 | 45,41.90368917
214 | 59,62.22635684
215 | 44,45.96396877
216 | 23,23.52647153
217 | 55,51.80035866
218 | 55,51.10774273
219 | 95,95.79747345
220 | 12,9.241138977
221 | 4,7.646529763
222 | 7,9.281699753
223 | 100,103.5266162
224 | 48,47.41006725
225 | 42,42.03835773
226 | 96,96.11982476
227 | 39,38.05766408
228 | 100,105.4503788
229 | 87,88.80306911
230 | 14,15.49301141
231 | 14,12.42624606
232 | 37,40.00709598
233 | 5,5.634030902
234 | 88,87.36938931
235 | 91,89.73951993
236 | 65,66.61499643
237 | 74,72.9138853
238 | 56,57.19103506
239 | 16,11.21710477
240 | 5,0.676076749
241 | 28,28.15668543
242 | 92,95.3958003
243 | 46,52.05490703
244 | 54,59.70864577
245 | 39,36.79224762
246 | 44,37.08457698
247 | 31,24.18437976
248 | 68,67.28725332
249 | 86,82.870594
250 | 90,89.899991
251 | 38,36.94173178
252 | 21,19.87562242
253 | 95,90.71481654
254 | 56,61.09367762
255 | 60,60.11134958
256 | 65,64.83296316
257 | 78,81.40381769
258 | 89,92.40217686
259 | 6,2.576625376
260 | 67,63.80768172
261 | 36,38.67780759
262 | 16,16.82839701
263 | 100,99.78687252
264 | 45,44.68913433
265 | 73,71.00377824
266 | 57,51.57326718
267 | 20,19.87846479
268 | 76,79.50341495
269 | 34,34.58876491
270 | 55,55.7383467
271 | 72,68.19721905
272 | 55,55.81628509
273 | 8,9.391416798
274 | 56,56.01448111
275 | 72,77.9969477
276 | 58,55.37049953
277 | 6,11.89457829
278 | 96,94.79081712
279 | 23,25.69041546
280 | 58,53.52042319
281 | 23,18.31396758
282 | 19,21.42637785
283 | 25,30.41303282
284 | 64,67.68142149
285 | 21,17.0854783
286 | 59,60.91792707
287 | 19,14.99514319
288 | 16,16.74923937
289 | 42,41.46923883
290 | 43,42.84526108
291 | 61,59.12912974
292 | 92,91.30863673
293 | 11,8.673336357
294 | 41,39.31485292
295 | 1,5.313686205
296 | 8,5.405220518
297 | 71,68.5458879
298 | 46,47.33487629
299 | 55,54.09063686
300 | 62,63.29717058
301 | 47,52.45946688
--------------------------------------------------------------------------------
/data/linear_regression_train.csv:
--------------------------------------------------------------------------------
1 | x,y
2 | 24,21.54945196
3 | 50,47.46446305
4 | 15,17.21865634
5 | 38,36.58639803
6 | 87,87.28898389
7 | 36,32.46387493
8 | 12,10.78089683
9 | 81,80.7633986
10 | 25,24.61215147
11 | 5,6.963319071
12 | 16,11.23757338
13 | 16,13.53290206
14 | 24,24.60323899
15 | 39,39.40049976
16 | 54,48.43753838
17 | 60,61.69900319
18 | 26,26.92832418
19 | 73,70.4052055
20 | 29,29.34092408
21 | 31,25.30895192
22 | 68,69.02934339
23 | 87,84.99484703
24 | 58,57.04310305
25 | 54,50.5921991
26 | 84,83.02772202
27 | 58,57.05752706
28 | 49,47.95883341
29 | 20,24.34226432
30 | 90,94.68488281
31 | 48,48.03970696
32 | 4,7.08132338
33 | 25,21.99239907
34 | 42,42.33151664
35 | 0,0.329089443
36 | 60,61.92303698
37 | 93,91.17716423
38 | 39,39.45358014
39 | 7,5.996069607
40 | 21,22.59015942
41 | 68,61.18044414
42 | 84,85.02778957
43 | 0,-1.28631089
44 | 58,61.94273962
45 | 19,21.96033347
46 | 36,33.66194193
47 | 19,17.60946242
48 | 59,58.5630564
49 | 51,52.82390762
50 | 19,22.1363481
51 | 33,35.07467353
52 | 85,86.18822311
53 | 44,42.63227697
54 | 5,4.09817744
55 | 59,61.2229864
56 | 14,17.70677576
57 | 9,11.85312574
58 | 75,80.23051695
59 | 69,62.64931741
60 | 10,9.616859804
61 | 17,20.02797699
62 | 58,61.7510743
63 | 74,71.61010303
64 | 21,23.77154623
65 | 51,51.90142035
66 | 19,22.66073682
67 | 50,50.02897927
68 | 24,26.68794368
69 | 0,0.376911899
70 | 12,6.806419002
71 | 75,77.33986001
72 | 21,28.90260209
73 | 64,66.7346608
74 | 5,0.707510638
75 | 58,57.07748383
76 | 32,28.41453196
77 | 41,44.46272123
78 | 7,7.459605998
79 | 4,2.316708112
80 | 5,4.928546187
81 | 49,52.50336074
82 | 90,91.19109623
83 | 3,8.489164326
84 | 11,6.963371967
85 | 32,31.97989959
86 | 83,81.4281205
87 | 25,22.62365422
88 | 83,78.52505087
89 | 26,25.80714057
90 | 76,73.51081775
91 | 95,91.775467
92 | 53,49.21863516
93 | 77,80.50445387
94 | 42,50.05636123
95 | 25,25.46292549
96 | 54,55.32164264
97 | 55,59.1244888
98 | 0,1.100686692
99 | 73,71.98020786
100 | 35,30.13666408
101 | 86,83.88427405
102 | 90,89.91004752
103 | 13,8.335654576
104 | 46,47.88388961
105 | 46,45.00397413
106 | 32,31.15664574
107 | 8,9.190375682
108 | 71,74.83135003
109 | 28,30.23177607
110 | 24,24.21914027
111 | 56,57.87219151
112 | 49,50.61728392
113 | 79,78.67470043
114 | 90,86.236707
115 | 89,89.10409255
116 | 41,43.26595082
117 | 27,26.68273277
118 | 58,59.46383041
119 | 26,28.90055826
120 | 31,31.300416
121 | 70,71.1433266
122 | 71,68.4739206
123 | 39,39.98238856
124 | 7,4.075776144
125 | 48,47.85817542
126 | 56,51.20390217
127 | 45,43.9367213
128 | 41,38.13626679
129 | 3,3.574661632
130 | 37,36.4139958
131 | 24,22.21908523
132 | 68,63.5312572
133 | 47,49.86702787
134 | 27,21.53140009
135 | 68,64.05710234
136 | 74,70.77549842
137 | 95,92.15749762
138 | 79,81.22259156
139 | 21,25.10114067
140 | 95,94.08853397
141 | 54,53.25166165
142 | 56,59.16236621
143 | 80,75.24148428
144 | 26,28.22325833
145 | 25,25.33323728
146 | 8,6.364615703
147 | 95,95.4609216
148 | 94,88.64183756
149 | 54,58.70318693
150 | 7,6.815491279
151 | 99,99.40394676
152 | 36,32.77049249
153 | 48,47.0586788
154 | 65,60.53321778
155 | 42,40.30929858
156 | 93,89.42222685
157 | 86,86.82132066
158 | 26,26.11697543
159 | 51,53.26657596
160 | 100,96.62327888
161 | 94,95.78441027
162 | 6,6.047286687
163 | 24,24.47387908
164 | 75,75.96844763
165 | 7,3.829381009
166 | 53,52.51703683
167 | 73,72.80457527
168 | 16,14.10999096
169 | 80,80.86087062
170 | 77,77.01988215
171 | 89,86.26972444
172 | 80,77.13735466
173 | 55,51.47649476
174 | 19,17.34557531
175 | 56,57.72853572
176 | 47,44.15029394
177 | 56,59.24362743
178 | 2,-1.053275611
179 | 82,86.79002254
180 | 57,60.14031858
181 | 44,44.04222058
182 | 26,24.5227488
183 | 52,52.95305521
184 | 41,43.16133498
185 | 44,45.67562576
186 | 3,-2.830749501
187 | 31,29.19693178
188 | 97,96.49812401
189 | 21,22.5453232
190 | 17,20.10741433
191 | 7,4.035430253
192 | 61,61.14568518
193 | 10,13.97163653
194 | 52,55.34529893
195 | 10,12.18441166
196 | 65,64.00077658
197 | 71,70.3188322
198 | 4,-0.936895047
199 | 24,18.91422276
200 | 26,23.87590331
201 | 51,47.5775361
202 | 42,43.2736092
203 | 62,66.48278755
204 | 74,75.72605529
205 | 77,80.59643338
206 | 3,-2.235879852
207 | 50,47.04654956
208 | 24,21.59635575
209 | 37,32.87558963
210 | 58,57.95782956
211 | 52,52.24760027
212 | 27,24.58286902
213 | 14,12.12573805
214 | 100,100.0158026
215 | 3530.15736917
216 | 72,74.04682658
217 | 5,1.611947467
218 | 71,70.36836307
219 | 54,52.26831735
220 | 84,83.1286166
221 | 42,43.64765048
222 | 54,49.44785426
223 | 74,72.6356699
224 | 54,52.78130641
225 | 53,57.11195136
226 | 78,79.1050629
227 | 97,101.6228548
228 | 49,53.5825402
229 | 71,68.92139297
230 | 48,46.9666961
231 | 51,51.02642868
232 | 89,85.52073551
233 | 99,99.51685756
234 | 93,94.63911256
235 | 49,46.78357742
236 | 18,21.21321959
237 | 65,58.37266004
238 | 83,87.22059677
239 | 100,102.4967859
240 | 41,43.88314335
241 | 52,53.06655757
242 | 29,26.33464785
243 | 97,98.52008934
244 | 7,9.400497579
245 | 51,52.94026699
246 | 58,53.83020877
247 | 50,45.94511142
248 | 67,65.0132736
249 | 89,86.5069584
250 | 76,75.63280796
251 | 35,36.78035027
252 | 99,100.5328916
253 | 31,29.04466136
254 | 52,51.70352433
255 | 11,9.199954718
256 | 66,71.70015848
257 | 50,49.82634062
258 | 39,37.49971096
259 | 60,53.65084683
260 | 35,33.92561965
261 | 53,49.92639685
262 | 14,8.148154262
263 | 49,49.72359037
264 | 16,16.16712757
265 | 76,75.30033002
266 | 13,9.577368568
267 | 51,48.38088357
268 | 70,72.95331671
269 | 98,92.59573853
270 | 86,88.85523586
271 | 100,99.00361771
272 | 46,45.09439571
273 | 51,46.94362684
274 | 50,48.33449605
275 | 91,94.92329574
276 | 48,47.78165248
277 | 81,81.28960746
278 | 38,37.83155021
279 | 40,39.69185252
280 | 79,76.92664854
281 | 96,88.02990531
282 | 60,56.99178872
283 | 70,72.58929383
284 | 44,44.98103442
285 | 11,11.99017641
286 | 6,1.919513328
287 | 5,1.628826073
288 | 72,66.27746655
289 | 55,57.53887255
290 | 95,94.70291077
291 | 41,41.21469904
292 | 25,25.04169243
293 | 1,3.778209914
294 | 55,50.50711779
295 | 4,9.682408486
296 | 48,48.88147608
297 | 55,54.40348599
298 | 75,71.70233156
299 | 68,69.35848388
300 | 100,99.98491591
301 | 25,26.03323718
302 | 75,75.48910307
303 | 34,36.59623056
304 | 38,40.95102191
305 | 92,86.78316267
306 | 21,15.50701184
307 | 88,85.86077871
308 | 75,79.20610113
309 | 76,80.80643766
310 | 44,48.59717283
311 | 10,13.93415049
312 | 21,27.3051179
313 | 16,14.00226297
314 | 32,33.67416
315 | 13,13.11612884
316 | 26,24.76649193
317 | 70,73.68477876
318 | 77,77.53149541
319 | 77,76.24503196
320 | 88,88.0578931
321 | 35,35.02445799
322 | 24,21.65857739
323 | 17,17.33681562
324 | 91,94.36778957
325 | 32,33.43396307
326 | 36,32.52179399
327 | 89,90.57741298
328 | 69,71.25634126
329 | 30,31.23212856
330 | 6,5.398840061
331 | 22,18.56241391
332 | 67,71.97121038
333 | 9,5.225759566
334 | 74,73.5964342
335 | 50,49.76948983
336 | 85,82.69087513
337 | 3,1.652309089
338 | 0,-3.836652144
339 | 59,62.03811556
340 | 62,61.26514581
341 | 17,13.24991628
342 | 90,88.61672694
343 | 23,21.13655528
344 | 19,23.85017475
345 | 93,92.01203405
346 | 14,10.26712261
347 | 58,54.14681616
348 | 87,87.00645713
349 | 37,37.69447352
350 | 20,19.62278654
351 | 35,34.78561007
352 | 63,62.03190983
353 | 56,52.67003801
354 | 62,58.09031476
355 | 98,97.19448821
356 | 90,90.50155298
357 | 51,50.5123462
358 | 93,94.45211871
359 | 22,21.10794636
360 | 38,37.36298431
361 | 13,10.28574844
362 | 98,96.04932416
363 | 99,100.0953697
364 | 31,30.6063167
365 | 94,96.19000542
366 | 73,71.30828034
367 | 37,34.59311043
368 | 23,19.02332876
369 | 11,10.76669688
370 | 88,90.5799868
371 | 47,48.71787679
372 | 79,78.74139764
373 | 91,85.23492274
374 | 71,71.65789964
375 | 10,8.938990554
376 | 39,39.89606046
377 | 92,91.85091116
378 | 99,99.11200375
379 | 28,26.22196486
380 | 32,33.21584226
381 | 32,35.72392691
382 | 75,76.88604495
383 | 99,99.30874567
384 | 27,25.77161074
385 | 64,67.85169407
386 | 98,98.50371084
387 | 38,31.11331895
388 | 46,45.51171028
389 | 13,12.65537808
390 | 96,95.56065366
391 | 9,9.526431641
392 | 34,36.10893209
393 | 49,46.43628318
394 | 1,-3.83998112
395 | 50,48.97302037
396 | 94,93.25305499
397 | 27,23.47650968
398 | 20,17.13551132
399 | 12,14.55896144
400 | 45,41.53992729
401 | 91,91.64730552
402 | 61,66.16652565
403 | 10,9.230857489
404 | 47,47.41377893
405 | 33,34.76441561
406 | 84,86.10796637
407 | 24,21.81267954
408 | 48,48.89963951
409 | 48,46.78108638
410 | 9,12.91328547
411 | 93,94.55203143
412 | 99,94.97068753
413 | 8,2.379172481
414 | 20,21.47982988
415 | 38,35.79795462
416 | 78,82.0763803
417 | 81,78.87097714
418 | 42,47.2492425
419 | 95,96.18852325
420 | 78,78.38491927
421 | 44,42.94274064
422 | 68,64.43231595
423 | 87,84.21191485
424 | 58,57.3069783
425 | 52,52.52101436
426 | 26,25.7440243
427 | 75,75.42283401
428 | 48,53.62523007
429 | 71,75.14466308
430 | 77,74.12151511
431 | 34,36.24807243
432 | 24,20.21665898
433 | 70,66.94758118
434 | 29,34.07278254
435 | 76,73.13850045
436 | 98,92.85929155
437 | 28,28.36793808
438 | 87,85.59308727
439 | 9,10.68453755
440 | 87,86.10708624
441 | 33,33.22031418
442 | 64,66.09563422
443 | 17,19.30486546
444 | 49,48.84542083
445 | 95,93.73176312
446 | 75,75.45758614
447 | 89,91.24239226
448 | 81,87.15690853
449 | 25,25.53752833
450 | 47,46.06629478
451 | 50,49.65277661
452 | 5,7.382244165
453 | 68,71.11189935
454 | 84,83.50570521
455 | 8,8.791139893
456 | 41,33.30638903
457 | 26,26.40362524
458 | 89,91.72960726
459 | 78,82.53030719
460 | 34,36.67762733
461 | 92,86.98450355
462 | 27,32.34784175
463 | 12,16.78353974
464 | 2,1.576584383
465 | 22,17.4618141
466 | 0,2.116113029
467 | 26,24.34804332
468 | 50,48.29491198
469 | 84,85.52145453
470 | 70,73.71434779
471 | 66,63.15189497
472 | 42,38.46213684
473 | 19,19.47100788
474 | 94,94.07428225
475 | 71,67.92051286
476 | 19,22.58096241
477 | 16,16.01629889
478 | 49,48.43307886
479 | 29,29.6673599
480 | 29,26.65566328
481 | 86,86.28206739
482 | 50,50.82304924
483 | 86,88.57251713
484 | 30,32.59980745
485 | 23,21.02469368
486 | 20,20.72894979
487 | 16,20.38051187
488 | 57,57.25180153
489 | 8,6.967537054
490 | 8,10.240085
491 | 62,64.94841088
492 | 55,55.35893915
493 | 30,31.24365589
494 | 86,90.72048818
495 | 62,58.750127
496 | 51,55.85003198
497 | 61,60.19925869
498 | 86,85.03295412
499 | 61,60.38823085
500 | 21,18.44679787
501 | 81,82.18839247
502 | 97,94.2963344
503 | 5,7.682024586
504 | 61,61.01858089
505 | 47,53.60562216
506 | 98,94.47728801
507 | 30,27.9645947
508 | 63,62.55662585
509 | 0,1.406254414
510 | 100,101.7003412
511 | 18,13.84973988
512 | 30,28.99769315
513 | 98,99.04315693
514 | 16,15.56135514
515 | 22,24.63528393
516 | 55,53.98393374
517 | 43,42.91449728
518 | 75,74.29662112
519 | 91,91.17012883
520 | 46,49.42440876
521 | 85,82.47683519
522 | 55,56.15303953
523 | 36,37.17063131
524 | 49,46.36928662
525 | 94,97.02383456
526 | 43,40.83182104
527 | 22,24.08498313
528 | 37,41.14386358
529 | 24,21.97388066
530 | 95,100.740897
531 | 61,61.19971596
532 | 75,74.39517002
533 | 68,69.04377173
534 | 58,56.68718792
535 | 5,5.860391715
536 | 53,55.72021356
537 | 80,79.22021816
538 | 83,86.30177517
539 | 25,25.26971886
540 | 34,36.33294447
541 | 26,27.65574228
542 | 90,94.79690531
543 | 60,58.67366671
544 | 49,56.15934471
545 | 19,18.40919388
546 | 92,86.26936988
547 | 29,26.59436195
548 | 8,8.452520159
549 | 57,56.18131518
550 | 29,27.65452669
551 | 19,20.87391785
552 | 81,77.83354439
553 | 50,50.01787825
554 | 15,9.290856256
555 | 70,75.0284725
556 | 39,38.3037698
557 | 43,44.70786405
558 | 21,22.51016575
559 | 98,102.4959452
560 | 86,86.76845244
561 | 16,13.89748578
562 | 25,24.81824269
563 | 31,33.94224862
564 | 93,92.26970059
565 | 67,68.73365081
566 | 49,47.38516883
567 | 25,32.37576914
568 | 88,87.67388681
569 | 54,54.57648371
570 | 21,18.06450222
571 | 8,7.896539841
572 | 32,35.00341078
573 | 35,36.72823317
574 | 67,65.84975426
575 | 90,89.59295492
576 | 59,61.69026202
577 | 15,11.60499315
578 | 67,71.0826803
579 | 42,43.71901164
580 | 44,41.57421008
581 | 77,74.25552425
582 | 68,66.28310437
583 | 36,36.62438077
584 | 11,10.32374866
585 | 10,7.156457657
586 | 65,67.88603132
587 | 98,101.1097591
588 | 98,98.6132033
589 | 49,50.19083844
590 | 31,27.83896261
591 | 56,55.9249564
592 | 70,76.47340872
593 | 91,92.05756378
594 | 25,27.35245439
595 | 54,55.32083476
596 | 39,41.39990349
597 | 91,93.59057024
598 | 3,5.297054029
599 | 22,21.01429422
600 | 2,2.267059451
601 | 2,-0.121860502
602 | 65,66.49546208
603 | 71,73.83637687
604 | 42,42.10140878
605 | 76,77.35135732
606 | 43,41.02251779
607 | 8,14.75305272
608 | 86,83.28199022
609 | 87,89.93374342
610 | 3,2.286571686
611 | 58,55.61421297
612 | 62,62.15313408
613 | 89,89.55803528
614 | 95,94.00291863
615 | 28,26.78023848
616 | 0,-0.764537626
617 | 1,0.282866003
618 | 49,44.26800515
619 | 21,19.85174138
620 | 46,47.15960005
621 | 11,8.359366572
622 | 89,92.08157084
623 | 37,41.88734051
624 | 29,30.5413129
625 | 44,46.87654473
626 | 96,96.35659485
627 | 16,17.9170699
628 | 74,71.67949917
629 | 35,32.64997554
630 | 42,39.34482965
631 | 16,17.03401999
632 | 56,52.87524074
633 | 18,15.85414849
634 | 100,108.8716183
635 | 54,49.30477253
636 | 92,89.4749477
637 | 63,63.67348242
638 | 81,83.78410946
639 | 73,73.51136922
640 | 48,46.80297244
641 | 1,5.809946802
642 | 85,85.23027975
643 | 14,10.58213964
644 | 25,21.37698317
645 | 45,46.0537745
646 | 98,95.2389253
647 | 97,94.15149206
648 | 58,54.54868046
649 | 93,87.36260449
650 | 88,88.47741598
651 | 89,84.48045678
652 | 47,48.79647071
653 | 6,10.76675683
654 | 34,30.48882921
655 | 30,29.76846185
656 | 16,13.51574749
657 | 86,86.12955884
658 | 40,43.30022747
659 | 52,51.92110232
660 | 15,16.49185287
661 | 4,7.998073432
662 | 95,97.66689567
663 | 99,89.80545367
664 | 35,38.07166567
665 | 58,60.27852322
666 | 10,6.709195759
667 | 16,18.35488924
668 | 53,56.37058203
669 | 58,62.80064204
670 | 42,41.25155632
671 | 24,19.42637541
672 | 84,82.88935804
673 | 64,63.61364981
674 | 12,11.29627199
675 | 61,60.02274882
676 | 75,72.60339326
677 | 15,11.87964573
678 | 100,100.7012737
679 | 43,45.12420809
680 | 13,14.81106804
681 | 48,48.09368034
682 | 45,42.29145672
683 | 52,52.73389794
684 | 34,36.72396986
685 | 30,28.64535198
686 | 65,62.16675273
687 | 100,95.58459518
688 | 67,66.04325304
689 | 99,99.9566225
690 | 45,46.14941984
691 | 87,89.13754963
692 | 73,69.71787806
693 | 9,12.31736648
694 | 81,78.20296268
695 | 72,71.30995371
696 | 81,81.45544709
697 | 58,58.59500642
698 | 93,94.62509374
699 | 82,88.60376995
700 | 66,63.64868529
701 | 97,94.9752655
--------------------------------------------------------------------------------
/digit_recognizer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Deep Learning Project\n",
8 | "## Build a Digit Recognition Program"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "In this project, we will design and implement a deep learning model that learns to recognize sequences of digits. We will train the model using synthetic data generated by concatenating character images from [MNIST](http://yann.lecun.com/exdb/mnist/). \n",
16 | "\n",
17 | "To produce a synthetic sequence of digits for testing, we will limit the to sequences to up to five digits, and use five classifiers on top of your deep network. We will incorporate an additional ‘blank’ character to account for shorter number sequences.\n",
18 | "\n",
19 | "We will use ** Keras ** to implement the model. You can read more about Keras at [keras.io](https://keras.io/)."
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "### Implementation\n",
27 | "\n",
28 | "Let's start by importing the modules we'll require fot this project."
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stderr",
38 | "output_type": "stream",
39 | "text": [
40 | "Using TensorFlow backend.\n"
41 | ]
42 | }
43 | ],
44 | "source": [
45 | "#Module Imports\n",
46 | "from __future__ import print_function\n",
47 | "import random\n",
48 | "from os import listdir\n",
49 | "import glob\n",
50 | "\n",
51 | "import numpy as np\n",
52 | "from scipy import misc\n",
53 | "import tensorflow as tf\n",
54 | "import h5py\n",
55 | "\n",
56 | "from keras.datasets import mnist\n",
57 | "from keras.utils import np_utils\n",
58 | "\n",
59 | "import matplotlib.pyplot as plt\n",
60 | "%matplotlib inline"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 2,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "#Setting the random seed so that the results are reproducible. \n",
70 | "random.seed(101)\n",
71 | "\n",
72 | "#Setting variables for MNIST image dimensions\n",
73 | "mnist_image_height = 28\n",
74 | "mnist_image_width = 28"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 3,
80 | "metadata": {},
81 | "outputs": [
82 | {
83 | "name": "stdout",
84 | "output_type": "stream",
85 | "text": [
86 | "Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz\n",
87 | "11493376/11490434 [==============================] - 11s 1us/step\n"
88 | ]
89 | }
90 | ],
91 | "source": [
92 | "#Import MNIST data from keras\n",
93 | "(X_train, y_train), (X_test, y_test) = mnist.load_data()"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 4,
99 | "metadata": {},
100 | "outputs": [
101 | {
102 | "name": "stdout",
103 | "output_type": "stream",
104 | "text": [
105 | "Shape of training dataset: (60000, 28, 28)\n",
106 | "Shape of test dataset: (10000, 28, 28)\n",
107 | "Label for image: 5\n"
108 | ]
109 | },
110 | {
111 | "data": {
112 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADgpJREFUeJzt3X+MVfWZx/HPs1j+kKI4aQRCYSnEYJW4082IjSWrxkzVDQZHrekkJjQapn8wiU02ZA3/VNNgyCrslmiamaZYSFpKE3VB0iw0otLGZuKIWC0srTFsO3IDNTjywx9kmGf/mEMzxbnfe+fec++5zPN+JeT+eM6558kNnznn3O+592vuLgDx/EPRDQAoBuEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxDUZc3cmJlxOSHQYO5u1SxX157fzO40syNm9q6ZPVrPawFoLqv12n4zmybpj5I6JQ1Jel1St7sfSqzDnh9osGbs+ZdJetfd33P3c5J+IWllHa8HoInqCf88SX8Z93goe+7vmFmPmQ2a2WAd2wKQs3o+8Jvo0OJzh/Xu3i+pX+KwH2gl9ez5hyTNH/f4y5KO1dcOgGapJ/yvS7rGzL5iZtMlfVvSrnzaAtBoNR/2u/uImfVK2iNpmqQt7v6H3DoD0FA1D/XVtDHO+YGGa8pFPgAuXYQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8EVfMU3ZJkZkclnZZ0XtKIu3fk0RTyM23atGT9yiuvbOj2e3t7y9Yuv/zy5LpLlixJ1tesWZOsP/XUU2Vr3d3dyXU//fTTZH3Dhg3J+uOPP56st4K6wp+5zd0/yOF1ADQRh/1AUPWG3yXtNbM3zKwnj4YANEe9h/3fcPdjZna1pF+b2f+6+/7xC2R/FPjDALSYuvb87n4suz0h6QVJyyZYpt/dO/gwEGgtNYffzGaY2cwL9yV9U9I7eTUGoLHqOeyfLekFM7vwOj939//JpSsADVdz+N39PUn/lGMvU9aCBQuS9enTpyfrN998c7K+fPnysrVZs2Yl173vvvuS9SINDQ0l65s3b07Wu7q6ytZOnz6dXPett95K1l999dVk/VLAUB8QFOEHgiL8QFCEHwiK8ANBEX4gKHP35m3MrHkba6L29vZkfd++fcl6o79W26pGR0eT9YceeihZP3PmTM3bLpVKyfqHH36YrB85cqTmbTeau1s1y7HnB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgGOfPQVtbW7I+MDCQrC9atCjPdnJVqffh4eFk/bbbbitbO3fuXHLdqNc/1ItxfgBJhB8IivADQRF+ICjCDwRF+IGgCD8QVB6z9IZ38uTJZH3t2rXJ+ooVK5L1N998M1mv9BPWKQcPHkzWOzs7k/WzZ88m69dff33Z2iOPPJJcF43Fnh8IivADQRF+ICjCDwRF+IGgCD8QFOEHgqr4fX4z2yJphaQT7r40e65N0g5JCyUdlfSAu6d/6FxT9/v89briiiuS9UrTSff19ZWtPfzww8l1H3zwwWR9+/btyTpaT57f5/+ppDsveu5RSS+5+zWSXsoeA7iEVAy/u++XdPElbCslbc3ub5V0T859AWiwWs/5Z7t7SZKy26vzawlAMzT82n4z65HU0+jtAJicWvf8x81sriRltyfKLeju/e7e4e4dNW4LQAPUGv5dklZl91dJ2plPOwCapWL4zWy7pN9JWmJmQ2b2sKQNkjrN7E+SOrPHAC4hFc/53b27TOn2nHsJ69SpU3Wt/9FHH9W87urVq5P1HTt2JOujo6M1bxvF4go/ICjCDwRF+IGgCD8QFOEHgiL8QFBM0T0FzJgxo2ztxRdfTK57yy23JOt33XVXsr53795kHc3HFN0Akgg/EBThB4Ii/EBQhB8IivADQRF+ICjG+ae4xYsXJ+sHDhxI1oeHh5P1l19+OVkfHBwsW3vmmWeS6zbz/+ZUwjg/gCTCDwRF+IGgCD8QFOEHgiL8QFCEHwiKcf7gurq6kvVnn302WZ85c2bN2163bl2yvm3btmS9VCrVvO2pjHF+AEmEHwiK8ANBEX4gKMIPBEX4gaAIPxBUxXF+M9siaYWkE+6+NHvuMUmrJf01W2ydu/+q4sYY57/kLF26NFnftGlTsn777bXP5N7X15esr1+/Pll///33a972pSzPcf6fSrpzguf/093bs38Vgw+gtVQMv7vvl3SyCb0AaKJ6zvl7zez3ZrbFzK7KrSMATVFr+H8kabGkdkklSRvLLWhmPWY2aGblf8wNQNPVFH53P+7u5919VNKPJS1LLNvv7h3u3lFrkwDyV1P4zWzuuIddkt7Jpx0AzXJZpQXMbLukWyV9ycyGJH1f0q1m1i7JJR2V9N0G9gigAfg+P+oya9asZP3uu+8uW6v0WwFm6eHqffv2JeudnZ3J+lTF9/kBJBF+ICjCDwRF+IGgCD8QFOEHgmKoD4X57LPPkvXLLktfhjIyMpKs33HHHWVrr7zySnLdSxlDfQCSCD8QFOEHgiL8QFCEHwiK8ANBEX4gqIrf50dsN9xwQ7J+//33J+s33nhj2VqlcfxKDh06lKzv37+/rtef6tjzA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQjPNPcUuWLEnWe3t7k/V77703WZ8zZ86ke6rW+fPnk/VSqZSsj46O5tnOlMOeHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCqjjOb2bzJW2TNEfSqKR+d/+hmbVJ2iFpoaSjkh5w9w8b12pclcbSu7u7y9YqjeMvXLiwlpZyMTg4mKyvX78+Wd+1a1ee7YRTzZ5/RNK/uftXJX1d0hozu07So5JecvdrJL2UPQZwiagYfncvufuB7P5pSYclzZO0UtLWbLGtku5pVJMA8jepc34zWyjpa5IGJM1295I09gdC0tV5Nwegcaq+tt/MvijpOUnfc/dTZlVNByYz65HUU1t7ABqlqj2/mX1BY8H/mbs/nz193MzmZvW5kk5MtK6797t7h7t35NEwgHxUDL+N7eJ/Iumwu28aV9olaVV2f5Wknfm3B6BRKk7RbWbLJf1G0tsaG+qTpHUaO+//paQFkv4s6VvufrLCa4Wconv27NnJ+nXXXZesP/3008n6tddeO+me8jIwMJCsP/nkk2VrO3em9xd8Jbc21U7RXfGc391/K6nci90+maYAtA6u8AOCIvxAUIQfCIrwA0ERfiAowg8ExU93V6mtra1sra+vL7lue3t7sr5o0aKaesrDa6+9lqxv3LgxWd+zZ0+y/sknn0y6JzQHe34gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCCrMOP9NN92UrK9duzZZX7ZsWdnavHnzauopLx9//HHZ2ubNm5PrPvHEE8n62bNna+oJrY89PxAU4QeCIvxAUIQfCIrwA0ERfiAowg8EFWacv6urq656PQ4dOpSs7969O1kfGRlJ1lPfuR8eHk6ui7jY8wNBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUObu6QXM5kvaJmmOpFFJ/e7+QzN7TNJqSX/NFl3n7r+q8FrpjQGom7tbNctVE/65kua6+wEzmynpDUn3SHpA0hl3f6rapgg/0HjVhr/iFX7uXpJUyu6fNrPDkor96RoAdZvUOb+ZLZT0NUkD2VO9ZvZ7M9tiZleVWafHzAbNbLCuTgHkquJh/98WNPuipFclrXf3581stqQPJLmkH2js1OChCq/BYT/QYLmd80uSmX1B0m5Je9x90wT1hZJ2u/vSCq9D+IEGqzb8FQ/7zcwk/UTS4fHBzz4IvKBL0juTbRJAcar5tH+5pN9IeltjQ32StE5St6R2jR32H5X03ezDwdRrsecHGizXw/68EH6g8XI77AcwNRF+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCavYU3R9I+r9xj7+UPdeKWrW3Vu1Lorda5dnbP1a7YFO/z/+5jZsNuntHYQ0ktGpvrdqXRG+1Kqo3DvuBoAg/EFTR4e8vePsprdpbq/Yl0VutCumt0HN+AMUpes8PoCCFhN/M7jSzI2b2rpk9WkQP5ZjZUTN728wOFj3FWDYN2gkze2fcc21m9msz+1N2O+E0aQX19piZvZ+9dwfN7F8L6m2+mb1sZofN7A9m9kj2fKHvXaKvQt63ph/2m9k0SX+U1ClpSNLrkrrd/VBTGynDzI5K6nD3wseEzexfJJ2RtO3CbEhm9h+STrr7huwP51Xu/u8t0ttjmuTMzQ3qrdzM0t9Rge9dnjNe56GIPf8ySe+6+3vufk7SLyStLKCPlufu+yWdvOjplZK2Zve3auw/T9OV6a0luHvJ3Q9k909LujCzdKHvXaKvQhQR/nmS/jLu8ZBaa8pvl7TXzN4ws56im5nA7AszI2W3Vxfcz8UqztzcTBfNLN0y710tM17nrYjwTzSbSCsNOXzD3f9Z0l2S1mSHt6jOjyQt1tg0biVJG4tsJptZ+jlJ33P3U0X2Mt4EfRXyvhUR/iFJ88c9/rKkYwX0MSF3P5bdnpD0gsZOU1rJ8QuTpGa3Jwru52/c/bi7n3f3UUk/VoHvXTaz9HOSfubuz2dPF/7eTdRXUe9bEeF/XdI1ZvYVM5su6duSdhXQx+eY2YzsgxiZ2QxJ31TrzT68S9Kq7P4qSTsL7OXvtMrMzeVmllbB712rzXhdyEU+2VDGf0maJmmLu69vehMTMLNFGtvbS2PfePx5kb2Z2XZJt2rsW1/HJX1f0n9L+qWkBZL+LOlb7t70D97K9HarJjlzc4N6Kzez9IAKfO/ynPE6l364wg+IiSv8gKAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8E9f/Ex0YKZYOZcwAAAABJRU5ErkJggg==\n",
113 | "text/plain": [
114 | ""
115 | ]
116 | },
117 | "metadata": {},
118 | "output_type": "display_data"
119 | }
120 | ],
121 | "source": [
122 | "#Checking the downloaded data\n",
123 | "print(\"Shape of training dataset: {}\".format(np.shape(X_train)))\n",
124 | "print(\"Shape of test dataset: {}\".format(np.shape(X_test)))\n",
125 | "\n",
126 | "\n",
127 | "plt.figure()\n",
128 | "plt.imshow(X_train[0], cmap='gray')\n",
129 | "\n",
130 | "print(\"Label for image: {}\".format(y_train[0]))"
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "metadata": {},
136 | "source": [
137 | "### Building synthetic data"
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "The MNIST dataset is very popular for beginner Deep Learning projects. So, to add a twist to the tale, we're going to predict images that can contain 1 to 5 digits. We'll have to change the architecture of our deep learning model for this, but before that, we'll need to generate this dataset first.\n",
145 | "\n",
146 | "To generate the synthetic training data, we will first start by randomly picking out up to 5 individual digits out from the MNIST training set. The individual images will be then stacked together, and blanks will be used to make up the number of digits if there were less than 5. By this approach, we could increase the size of our training data. We'll build around 60,000 such examples.\n",
147 | "\n",
148 | "While concatenating images together, we'll also build the labels for each image. First, labels for single digits will be arranged in tuples of 5. Labels 0-9 will be used for digits 0-9, and a 10 will be used to indicate a blank.\n",
149 | "\n",
150 | "The same approach will be used to build the test data, but using the MNIST test set for individual digits, for 10,000 synthetic test images.\n",
151 | "\n",
152 | "\n",
153 | "\n",
154 | "Let's write a function that does this."
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 5,
160 | "metadata": {},
161 | "outputs": [],
162 | "source": [
163 | "def build_synth_data(data,labels,dataset_size):\n",
164 | " \n",
165 | " #Define synthetic image dimensions\n",
166 | " synth_img_height = 64\n",
167 | " synth_img_width = 64\n",
168 | " \n",
169 | " #Define synthetic data\n",
170 | " synth_data = np.ndarray(shape=(dataset_size,synth_img_height,synth_img_width),\n",
171 | " dtype=np.float32)\n",
172 | " \n",
173 | " #Define synthetic labels\n",
174 | " synth_labels = [] \n",
175 | " \n",
176 | " #For a loop till the size of the synthetic dataset\n",
177 | " for i in range(0,dataset_size):\n",
178 | " \n",
179 | " #Pick a random number of digits to be in the dataset\n",
180 | " num_digits = random.randint(1,5)\n",
181 | " \n",
182 | " #Randomly sampling indices to extract digits + labels afterwards\n",
183 | " s_indices = [random.randint(0,len(data)-1) for p in range(0,num_digits)]\n",
184 | " \n",
185 | " #stitch images together\n",
186 | " new_image = np.hstack([X_train[index] for index in s_indices])\n",
187 | " #stitch the labels together\n",
188 | " new_label = [y_train[index] for index in s_indices]\n",
189 | " \n",
190 | " \n",
191 | " #Loop till number of digits - 5, to concatenate blanks images, and blank labels together\n",
192 | " for j in range(0,5-num_digits):\n",
193 | " new_image = np.hstack([new_image,np.zeros(shape=(mnist_image_height,\n",
194 | " mnist_image_width))])\n",
195 | " new_label.append(10) #Might need to remove this step\n",
196 | " \n",
197 | " #Resize image\n",
198 | " new_image = misc.imresize(new_image,(64,64))\n",
199 | " \n",
200 | " #Assign the image to synth_data\n",
201 | " synth_data[i,:,:] = new_image\n",
202 | " \n",
203 | " #Assign the label to synth_data\n",
204 | " synth_labels.append(tuple(new_label))\n",
205 | " \n",
206 | " \n",
207 | " #Return the synthetic dataset\n",
208 | " return synth_data,synth_labels"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 6,
214 | "metadata": {},
215 | "outputs": [
216 | {
217 | "name": "stderr",
218 | "output_type": "stream",
219 | "text": [
220 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:36: DeprecationWarning: `imresize` is deprecated!\n",
221 | "`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.\n",
222 | "Use ``skimage.transform.resize`` instead.\n"
223 | ]
224 | }
225 | ],
226 | "source": [
227 | "#Building the training dataset\n",
228 | "X_synth_train,y_synth_train = build_synth_data(X_train,y_train,60000)"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 7,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "name": "stderr",
238 | "output_type": "stream",
239 | "text": [
240 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:36: DeprecationWarning: `imresize` is deprecated!\n",
241 | "`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.\n",
242 | "Use ``skimage.transform.resize`` instead.\n"
243 | ]
244 | }
245 | ],
246 | "source": [
247 | "#Building the test dataset\n",
248 | "X_synth_test,y_synth_test = build_synth_data(X_test,y_test,10000)"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 8,
254 | "metadata": {},
255 | "outputs": [
256 | {
257 | "data": {
258 | "text/plain": [
259 | "(1, 4, 1, 9, 7)"
260 | ]
261 | },
262 | "execution_count": 8,
263 | "metadata": {},
264 | "output_type": "execute_result"
265 | },
266 | {
267 | "data": {
268 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP4AAAD8CAYAAABXXhlaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAG4NJREFUeJztnW2sVeWVx/9LUEEUEXnpFShvUoGqBaSIYWIRC3U6bfFDa2ybCTMh4UtnYjOdVJ1JJu1kJrFp0nY+mCZ0dMqHtmptOxjaVA1gEWJFGAHl5V7e4crLBQRFtCp2zYez73Y9y3vuPfees599Tp//L7k5a59nn3vW3eesu9d61vOsJaoKQkhaXFK2AoSQ+NDwCUkQGj4hCULDJyRBaPiEJAgNn5AEoeETkiB1Gb6I3CUi7SKyT0QeaJRShJBikYEu4BGRQQA6ACwG0AngJQBfVdVdjVOPEFIEg+t47TwA+1T1AACIyGMAlgKoavgiwmWChBSMqkpf59Tj6o8DcNQcd2bPEUKanHru+D39V/nIHV1EVgBYUcf7EEIaTD2G3wlggjkeD+CYP0lVVwJYCdDVJ6RZqMfVfwnANBGZLCKXAbgXwFONUYsQUiQDvuOr6kUR+QcATwMYBOBRVd3ZMM0IIYUx4HTegN6Mrj4hhVP0rD4hpEWh4ROSIDR8QhKEhk9IgtDwCUkQGj4hCULDJyRBaPiEJAgNn5AEoeETkiA0fEIShIZPSILQ8AlJEBo+IQlCwyckQWj4hCQIDZ+QBKHhE5IgNHxCEoSGT0iC1FNXv3Ta2tqC4/Pnz+fyW2+9FVudmrj00ktz+brrrgvGTpw4kcvvvvtuNJ08Ih/WavzYxz6Wy2PHjg3Oa29vz+V33nmneMVIw+Adn5AEoeETkiA0fEISpKVj/Dlz5gTHNubct29fbHVqYtiwYbm8cOHCYOx3v/tdLp86dSqWSh/Bxvj2Gi9ZsiQ47/vf/34ud3Z2Fq8YaRh93vFF5FER6RKRV81zI0XkWRHZmz1eU6yahJBGUour/1MAd7nnHgCwVlWnAVibHRNCWoQ+XX1V3SAik9zTSwEszORVAJ4DcH8D9aqJT37yk8HxG2+8kcvN6upfeeWVuTx//vxgbNOmTblcpqt/ySUf3g9uuummXL777ruD837yk5/kMl391mKgk3tjVfU4AGSPYxqnEiGkaAqf3BORFQBWFP0+hJDaGajhnxSRNlU9LiJtALqqnaiqKwGsBBrTJtu6odZtBoBrr702l+3MdKZHvW/dEEaOHJnLU6dODcas/mWGKvYajxo1Kpevuuqq4Lyrr746mk6ksQzU1X8KwLJMXgZgdWPUIYTEoJZ03i8AvADgBhHpFJHlAB4CsFhE9gJYnB0TQlqEWmb1v1pl6M4G60IIiUTLrdyzsbuP8W082qzYOH7KlCnBmI3/y2TQoEG5bK/p8OHDg/P8MWkduFafkASh4ROSIC3n6lusSwoAgwc3359jU2NA6N4PHTq013PLwqbpbLETnyJtFmya8ZZbbgnGRowYkctPP/10LqdeOKQ5vmmEkKjQ8AlJEBo+IQnSfEFxH9g408fEzRjj+7jYxviXX355bHV6xOto42Jf0LQZsWnFxYsXB2N2WfTzzz+fy4zxCSHJQcMnJEGazzeuA1uzvlm47LLLguMJEybksk9HloUPmT7xiU/ksl1pePHixeC8999/v1jFquCv6Y033pjLc+fODcZsXwCfPo1Jb/0Urrnmw8p1o0ePzmUfjmzdurXqWH/hHZ+QBKHhE5IgLefq24IaH3zwQTBmXcBmKcQxZMiQ4Ni7ec2Az4bMnj07l+2MuW/r9ac//alYxargr+lnPvOZXJ4xY0YwZr8j/nUxsRkce32BUGdbh/HMmTPBeR0dHblMV58Q0m9o+IQkCA2fkARpuRjfxu4+HVZmuqYavliIbTXt5x38nEUsfDpv4sSJuWxbfp0+fTo47+233y5WsSpcccUVwfGsWbNyedy4ccHY66+/nssxi4P6eZOPf/zjuTxv3rxgzLYps/MVXV1hDVtbqMWP9Rfe8QlJEBo+IQnScq6+dUutGwqEK8mapWiEXfkGhCuzvGtfVnqsVnwK6cKFC6Xo4T/3MWM+bOTkP3ebRhs/fnwu21VwReDDkc997nO5bN15IAz/bIjgaxracGHPnj116cc7PiEJQsMnJEFo+IQkSEvH+D6OKisd1ht255U/Pnv2bDBWVozv4+Jq8yNvvvlmcFxWjO97+Nl5kz//+c/BWG+74orEp3GXLFmSy7fddltNv8P/nT5VWQ+1tNCaICLrRWS3iOwUkfuy50eKyLMisjd7vKav30UIaQ5qcfUvAviWqs4AMB/AN0RkJoAHAKxV1WkA1mbHhJAWoJbeeccBHM/k8yKyG8A4AEsBLMxOWwXgOQD3F6JlC2Pr1wHhDrdm2e3ma//ZtlnWdf7jH/8YnHf+/PliFavC5MmTg2Ob3vNpOpsCi4ktAAKEqcSB0sjdhf2a3BORSQBmA3gRwNjsn0L3P4cx1V9JCGkmap7cE5ErAfwKwDdV9c1aF8iIyAoAKwamHiGkCGq644vIpagY/c9U9dfZ0ydFpC0bbwPQ464BVV2pqnNVdW5P44SQ+PR5x5fKrf0RALtV9Qdm6CkAywA8lD2uLkTDFse3vv5LivHfeuutYhWrgm8vbmP8zZs3B2NltfL2/Qjsjsda8V61LzJaD7W4+gsA/C2AV0RkW/bcv6Bi8E+IyHIARwB8pWFaEUIKpZZZ/Y0AqgX0dzZWHUJIDFpu5V4rYHdYTZo0KRizBRT8KrOyikH2tnLPFgvxrn3MlZJ2FZut+w8AnZ2duexdfbvaLWYo5dN59jth9QWAvXv35vIbb7yRy7feemtwXiN3nHKtPiEJQsMnJEHo6heA3Rgyffr0YOz48eO57It0lDUD7UMOW9DEuvpl9SYAwhWQ/ppaV/mFF14IxmyoFXNTkS0OAoRh0f79+4OxJ598sscxHy40Et7xCUkQGj4hCULDJyRBGOMXgF1h5WO9I0eO5LKP6X2sHQufJmqW9t0We638qrh169bl8tGjR4Ox9vb2XLZ9F3yRi0bvNPRFYo4dO5bLfgWknZewqT6fPvUrPeuBd3xCEoSGT0iC0NUvAOvq+40VJ0+ezGW/2cTXtIuFTT8CH61b3wzYGna+VdqpU6dy2a8mPHz4cC4vWrQol339PRsSFMGWLVtyec2aNcHYa6+9lsu2pqTfPNXIcIR3fEIShIZPSILQ8AlJkJaL8W3qybcibhZsjOzj5xMnTuTyxYsXg7Gy6tT7WNKmouwy3dh9C+xnbfsReH3tNfUpUdva2y7f9cUvOzo6crkRS5N9SnT79u25vGnTpqqvs7sJ/fyQ3blXL7zjE5IgNHxCEqQ5feVesO6PTzv5llRlccMNN+Sydxtt6qZMbNrI7wKzrvR7772Xy2+//Xbxihmsjtdff30u+xDp0KFDueyvtw0RbrzxxlzesWNHcN4f/vCHqr9/IPr67+bOnTtr+h1WX/v7gDBtWS+84xOSIDR8QhKk5Vx964b6jqTNQiu4+nbGvFldfTszPnXq1Fz2rrhdneevty12MnPmzFy2n5F/r4G6+vaaelffbx6qhi3H7jMDNkNRL7zjE5IgNHxCEoSGT0iCtFyMb2vP+xbUA43N6sWvJLPtks6dOxeM2TiwrDr6Hh+P2r/Hrtbz6aWisSszbezrV7DZuQdfVKRazOx3ujW6CIqfa6h1fsS2APc6vfPOO/UrltHnJykiQ0Rks4hsF5GdIvLd7PnJIvKiiOwVkcdFpHGNvQghhVLLv/B3ASxS1U8BmAXgLhGZD+B7AH6oqtMAnAWwvDg1CSGNpJbeeQqgu/jXpdmPAlgE4GvZ86sAfAfAjxuvYoh1j6+++upgrBld/ddffz0Ys+5rma6+dUW9Hvbvse5l7E1RdoOTddl9wRKrow9HfLfibvzn0ghXv1rrMaD39l32ddbVH2i4UAs1BW0iMijrlNsF4FkA+wGcU9VuS+sEMK7a6wkhzUVNhq+qH6jqLADjAcwDMKOn03p6rYisEJEtIrKlp3FCSHz6NU2rqucAPAdgPoARItLt+40HcKzKa1aq6lxVnVuPooSQxtFn0CYiowG8r6rnRGQogM+iMrG3HsCXATwGYBmA1UUq2o2N4XyhAluHPGafN1+jfezYsbnsiy7Y9Fgj2x73F3t9fHxrr7GN6/31LppqhSd9Os/Wm/c62qW+dnfbgQMHgvMaEePba+pr4Pf2+23xUDs/5Jd3N7Kufi2zNW0AVonIIFQ8hCdUdY2I7ALwmIj8B4CXATzSMK0IIYVSy6z+DgCze3j+ACrxPiGkxWi5lXu90ciaZP3BryC0O8J27doVjNm0TlnpR09vK8JsSi22q2+xYZEvuGJ3EPrWVTfddFMuHzx4MJdrLYwxUHz6rjdX36albV1A3/Lbti+vF67VJyRBaPiEJEjLufp2dtfPituNFzFn9X1HXLvpxdaDA0J3zZerLmuWv7f3tRtbYrv6NqNgZ/htGzIgvKa2Zh0Qus42w2K70gKN+b7Y3+E73fb2+21oaL9LNjQBGlvenHd8QhKEhk9IgtDwCUmQlovxberM78SKWQzSvrdvd23TdD7FaOM0n87zxRWLxMb1fgdbtV1mMedNgDA1Z9OK+/fvD86z19GvorRzQnZHXhHtwOz18YUx7TyEn1Ox7bzsTkn/Oxp5/XnHJyRBaPiEJMhflKvfyJpkfWHdNZsyAkK3zrv6NiXmXf2YhS7stfMpMPu3NboWXX+wm1esq+832FgX2P8t9noX7epbfMrRri70TJgwIZer6QvQ1SeE1AkNn5AEoeETkiAtEePbmHPUqFFVz/O10ovEpona2tqCMTvX4Isn2BjOL4H1qagisdfUv2+ZBUIsds7DFzS12N5/S5cuDcbsNT5z5kwuFz130dHRERzb76ZP29rCLVbHRrbF9vCOT0iC0PAJSZCWc/XtKjOf3rhw4ULVsUZjXc/rrrsuGLMrCL2rb91qn77zBT2KxF5T3268WVx966b35urbUOvuu+8Oxmwa0LYzK8LVt9857+rbFK//3OnqE0KiQMMnJEFawtW3q8yGDx+ey371VcyVe1YPn2mwhTjuuOOOYMyuLPP14WJu0mmG9+0LGybZVY4333xzcJ5tqWXdZj927FiPrR8KobeWWTYLAYQrPw8fPlzT76gX3vEJSRAaPiEJQsMnJEFaLsa3Nch9jN/IFkN9YWN1WywRCHW85557gjG748yn0UiITc/a3W0LFiwIzrNzOz7tZ4ud+pZUZeFXetoYf/Pmzbnc246+eqn5jp+1yn5ZRNZkx5NF5EUR2Ssij4tIed0WCCH9oj+u/n0Adpvj7wH4oapOA3AWwPJGKkYIKY6aXH0RGQ/gbwD8J4B/ksrSrkUAvpadsgrAdwD8uAAdA1ffusre1S/SNfLYVJxN3wHh6rxPf/rTwVi1DrDNil1xFjNdCoTpLLsacvr06cF5va2Ks8Us7Mq92NjVkOPGjQvGbGrYhiZFtlir9Y7/IwDfBtC9zvFaAOdUtVuzTgDjenohIaT56NPwReQLALpUdat9uodTe1wcLyIrRGSLiGwZoI6EkAZTi6u/AMCXROTzAIYAGI6KBzBCRAZnd/3xAHpcFqWqKwGsBAARiVufmRDSI30avqo+COBBABCRhQD+WVW/LiK/BPBlAI8BWAZgdVFK2rjY7rbyMX7MttNWJ1/00y6B7a1mfbO0ye4NG9f79tRFY9OztmipX7Jrd9r5nYX2dTHngDz2OzJx4sRgzM5R2H55RRYLqWcBz/2oTPTtQyXmf6QxKhFCiqZfC3hU9TkAz2XyAQDzGq8SIaRoWm7lni3OUGY6z7phviWyTef51XlWZ9/yy4cMzYBNqdmdbjGwrr5tJ+ULlthr6guw2PCkTFffhqi+D4MNp+wOwiKLyTTfN40QUjg0fEISpCVcfTtLbt1o77rF3KTT1dWVyxs3bgzGbrnllly2nVCBcCWZ3cwDfHQFYCz8TLg9tm50zOsLhLPde/furaqHDbX8akh7vcvMotjv7fXXXx+MHTlyJJdjlYjnHZ+QBKHhE5IgNHxCEqQlYny748rGwT7WK7I4ocfWPN+0aVMwZtNNvm3z8ePHc9nH+HbnYUxaIca3rbG9HjZl5693M8b4U6dODcY2bNiQy4zxCSGFQcMnJEFawtW3K9qs229rsgEfXclXJNbdfOWVV4Ix6yrv2rUrGLMpyAkTJgRjdpWf/R1FrOCy4YivRW/fz25ysa53bNrb23P50UcfDcbs9+OLX/xiMGZDhDKxdfZ8d2Ib/sUKR3jHJyRBaPiEJAgNn5AEaYkY38a7Nsb3qY8iCxd4bKy+c+fOYMz2PxsyZEgwZvum3XvvvcGYTVUWHePbtJfv5VYtxi9zd9v+/ftz+eGHHw7G7FzJ7bffHozZpb4x8SlSG+P7pdknTpzI5VjzVLzjE5IgNHxCEqQlXH3r3lvX2dYgB+Km82xYYd3hno4t1l32hThsmsemqIoIYeyOx952BVqX1buvMbFhnQ/x7LXyY3blXkxswRgAmDlzZi77NLQNDYssvmHhHZ+QBKHhE5IgLeHq23pltnWV3SgDxHX1B4qtr+ZdfdtKybriRazmstfKvpfHrib07amaEe/qe7c6FvZ7CgBz5szJ5ZMnTwZjPmSNAe/4hCQIDZ+QBKHhE5IgzR+0IYxBbTFFnzaLuXJvoPTWdtrOZRSdOrPzBr3V87dxfTPW/QfCFK+fDylrtaEvCDJt2rRc3rNnTzBmV+7FoibDF5FDAM4D+ADARVWdKyIjATwOYBKAQwDuUdW4zdUIIQOiP//C71DVWao6Nzt+AMBaVZ0GYG12TAhpAepx9ZcCWJjJq1DpqXd/nfr0C5++i7XqqR5sOOJrBNrQpejUpP39vYVIVsdmTZfaTUY+jVYWviOuTdX64iyxaxkCtd/xFcAzIrJVRFZkz41V1eMAkD2OKUJBQkjjqfWOv0BVj4nIGADPisiePl+Rkf2jWNHniYSQaNR0x1fVY9ljF4DfoNIe+6SItAFA9thV5bUrVXWumRsghJRMn3d8ERkG4BJVPZ/JSwD8O4CnACwD8FD2uLooJW0MauUy66QPFDsP4Zfs2jbUMWP83t7Lphyb9Xo3Y4w/ZcqU4NgufbaFQ4FyYvxaXP2xAH6T5ZUHA/i5qv5eRF4C8ISILAdwBMBXilOTENJI+jR8VT0A4FM9PH8GwJ1FKEUIKZaWWLlnV1+99tpruexXPLVCOs+6y/ZvAcLdhkX/Lda9PHPmTDBm3dR9+/blcswWZf1h9OjRuVxmsRC7qnTGjBnBmG3l3dnZGU2najTnGkxCSKHQ8AlJEBo+IQnSEjG+jUdtXHzs2LHgvFaI8W3qzPZMA5onxrcpU1vPnjF+79gY3xbXBMJKQM3wveUdn5AEoeETkiAt4epbF3PDhg25fPTo0TLUqQvrRu/YsSMY6+rqcdVzIdgU6fr164Oxc+fO5fL27dtzuVl25/nioHbn28GDB4OxmIU47Oo835bMhnFnz5ZftoJ3fEIShIZPSIK0hKtvZ6CfeeaZXD59+nRwXivM6ltXf9u2bcFYzFlz6wL/9re/DcY2bdqUy0eOHMnlZrm+tjYhELrYu3fvDsZiboCxLdBGjRoVjB04cCCX7WassuAdn5AEoeETkiA0fEISpCVifLujza4ka0VsnOxX7sXEpuY6OjpK02Mg+BbU9pr6GD9m8RAb4w8dOjQYs62wyyi84eEdn5AEoeETkiAt4eoTYvFutE2P+fZUMbGuvk992oImzQDv+IQkCA2fkASh4ROSIIzxScthW3cD4a7GQ4cORdbmQ4YNG5bLfvk1Y3xCSOnQ8AlJELr6pOU4f/58cPz888/ncszCGx67GtKHHL7OXtnUdMcXkREi8qSI7BGR3SJym4iMFJFnRWRv9nhN0coSQhpDra7+fwH4vapOR6Wd1m4ADwBYq6rTAKzNjgkhLUAt3XKHA7gdwN8BgKq+B+A9EVkKYGF22ioAzwG4vwglCbF4V3/jxo25XGZHX+vq28IbwEfbpZVNLXf8KQBOAfgfEXlZRP47a5c9VlWPA0D2OKZAPQkhDaQWwx8MYA6AH6vqbAAX0A+3XkRWiMgWEdkyQB0JIQ2mFsPvBNCpqi9mx0+i8o/gpIi0AUD22GNtaFVdqapzVXVuIxQmhNRPnzG+qp4QkaMicoOqtgO4E8Cu7GcZgIeyx9WFakpIht/5VmZcb7Htr9etWxeMNVv7sVrz+P8I4GcichmAAwD+HhVv4QkRWQ7gCICvFKMiIaTR1GT4qroNQE+u+p2NVYcQEgOJWStdRJqjMDshBWA36di2XgBw8uTJXLa9FYpAVftsGcy1+oQkCA2fkASh4ROSIIzxCfkLgzE+IaRHaPiEJEjsQhynARwGMCqTy6QZdACoh4d6hPRXj4m1nBQ1xs/fVGRL2Wv3m0EH6kE9ytKDrj4hCULDJyRByjL8lSW9r6UZdACoh4d6hBSiRykxPiGkXOjqE5IgUQ1fRO4SkXYR2Sci0aryisijItIlIq+a56KXBxeRCSKyPitRvlNE7itDFxEZIiKbRWR7psd3s+cni8iLmR6PZ/UXCkdEBmX1HNeUpYeIHBKRV0RkW3eZuJK+I1FK2UczfBEZBOBhAH8NYCaAr4rIzEhv/1MAd7nnyigPfhHAt1R1BoD5AL6RXYPYurwLYJGqfgrALAB3ich8AN8D8MNMj7MAlhesRzf3oVKyvZuy9LhDVWeZ9FkZ35E4pexVNcoPgNsAPG2OHwTwYMT3nwTgVXPcDqAtk9sAtMfSxeiwGsDiMnUBcAWA/wNwKyoLRQb39HkV+P7jsy/zIgBrAEhJehwCMMo9F/VzATAcwEFkc29F6hHT1R8H4Kg57syeK4tSy4OLyCQAswG8WIYumXu9DZUiqc8C2A/gnKp2F7CL9fn8CMC3AXRXp7i2JD0UwDMislVEVmTPxf5copWyj2n4Pe0YSjKlICJXAvgVgG+q6ptl6KCqH6jqLFTuuPMAzOjptCJ1EJEvAOhS1a326dh6ZCxQ1TmohKLfEJHbI7ynp65S9v0hpuF3AphgjscDKLOTYE3lwRuNiFyKitH/TFV/XaYuAKCq51DpgjQfwAgR6d6/EePzWQDgSyJyCMBjqLj7PypBD6jqseyxC8BvUPlnGPtzqauUfX+IafgvAZiWzdheBuBeAE9FfH/PU6iUBQcilQcXEQHwCIDdqvqDsnQRkdEiMiKThwL4LCqTSOsBfDmWHqr6oKqOV9VJqHwf1qnq12PrISLDROSqbhnAEgCvIvLnoqonABwVkRuyp7pL2Tdej6InTdwkxecBdKAST/5rxPf9BYDjAN5H5b/qclRiybUA9maPIyPo8VeouK07AGzLfj4fWxcANwN4OdPjVQD/lj0/BcBmAPsA/BLA5RE/o4UA1pShR/Z+27Ofnd3fzZK+I7MAbMk+m/8FcE0RenDlHiEJwpV7hCQIDZ+QBKHhE5IgNHxCEoSGT0iC0PAJSRAaPiEJQsMnJEH+H7w2jgGBltfZAAAAAElFTkSuQmCC\n",
269 | "text/plain": [
270 | ""
271 | ]
272 | },
273 | "metadata": {},
274 | "output_type": "display_data"
275 | }
276 | ],
277 | "source": [
278 | "#checking a sample\n",
279 | "plt.figure()\n",
280 | "plt.imshow(X_synth_train[232], cmap='gray')\n",
281 | "\n",
282 | "y_synth_train[232]"
283 | ]
284 | },
285 | {
286 | "cell_type": "markdown",
287 | "metadata": {},
288 | "source": [
289 | "Looks like things work as we expect them to. Let's prepare the datset and labels so that keras can handle them."
290 | ]
291 | },
292 | {
293 | "cell_type": "markdown",
294 | "metadata": {},
295 | "source": [
296 | "### Preparatory Preprocessing\n",
297 | "\n",
298 | "#### Preprocessing Labels for model\n",
299 | "\n",
300 | "The labels are going to be encoded to \"One Hot\" arrays, to make them compatible with Keras. Note that, as the our Deep Learning model will have 5 classifiers, we'll need 5 such One Hot arrays, one for each digit position in the image. "
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 9,
306 | "metadata": {},
307 | "outputs": [],
308 | "source": [
309 | "#Converting labels to One-hot representations of shape (set_size,digits,classes)\n",
310 | "possible_classes = 11\n",
311 | "\n",
312 | "def convert_labels(labels):\n",
313 | " \n",
314 | " #As per Keras conventions, the multiple labels need to be of the form [array_digit1,...5]\n",
315 | " #Each digit array will be of shape (60000,11)\n",
316 | " \n",
317 | " #Code below could be better, but cba for now. \n",
318 | " \n",
319 | " #Declare output ndarrays\n",
320 | " dig0_arr = np.ndarray(shape=(len(labels),possible_classes))\n",
321 | " dig1_arr = np.ndarray(shape=(len(labels),possible_classes))\n",
322 | " dig2_arr = np.ndarray(shape=(len(labels),possible_classes))\n",
323 | " dig3_arr = np.ndarray(shape=(len(labels),possible_classes)) #5 for digits, 11 for possible classes \n",
324 | " dig4_arr = np.ndarray(shape=(len(labels),possible_classes))\n",
325 | " \n",
326 | " for index,label in enumerate(labels):\n",
327 | " \n",
328 | " #Using np_utils from keras to OHE the labels in the image\n",
329 | " dig0_arr[index,:] = np_utils.to_categorical(label[0],possible_classes)\n",
330 | " dig1_arr[index,:] = np_utils.to_categorical(label[1],possible_classes)\n",
331 | " dig2_arr[index,:] = np_utils.to_categorical(label[2],possible_classes)\n",
332 | " dig3_arr[index,:] = np_utils.to_categorical(label[3],possible_classes)\n",
333 | " dig4_arr[index,:] = np_utils.to_categorical(label[4],possible_classes)\n",
334 | " \n",
335 | " return [dig0_arr,dig1_arr,dig2_arr,dig3_arr,dig4_arr]"
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": 10,
341 | "metadata": {},
342 | "outputs": [],
343 | "source": [
344 | "train_labels = convert_labels(y_synth_train)\n",
345 | "test_labels = convert_labels(y_synth_test)"
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": 11,
351 | "metadata": {},
352 | "outputs": [
353 | {
354 | "data": {
355 | "text/plain": [
356 | "(60000, 11)"
357 | ]
358 | },
359 | "execution_count": 11,
360 | "metadata": {},
361 | "output_type": "execute_result"
362 | }
363 | ],
364 | "source": [
365 | "#Checking the shape of the OHE array for the first digit position\n",
366 | "np.shape(train_labels[0])"
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "execution_count": 12,
372 | "metadata": {},
373 | "outputs": [
374 | {
375 | "data": {
376 | "text/plain": [
377 | "array([ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])"
378 | ]
379 | },
380 | "execution_count": 12,
381 | "metadata": {},
382 | "output_type": "execute_result"
383 | }
384 | ],
385 | "source": [
386 | "np_utils.to_categorical(y_synth_train[234][0],11)"
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "metadata": {},
392 | "source": [
393 | "#### Preprocessing Images for model"
394 | ]
395 | },
396 | {
397 | "cell_type": "markdown",
398 | "metadata": {},
399 | "source": [
400 | "The function below will pre-process the images so that they can be handled by keras."
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "execution_count": 13,
406 | "metadata": {},
407 | "outputs": [],
408 | "source": [
409 | "def prep_data_keras(img_data):\n",
410 | " \n",
411 | " #Reshaping data for keras, with tensorflow as backend\n",
412 | " img_data = img_data.reshape(len(img_data),64,64,1)\n",
413 | " \n",
414 | " #Converting everything to floats\n",
415 | " img_data = img_data.astype('float32')\n",
416 | " \n",
417 | " #Normalizing values between 0 and 1\n",
418 | " img_data /= 255\n",
419 | " \n",
420 | " return img_data"
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": 14,
426 | "metadata": {},
427 | "outputs": [],
428 | "source": [
429 | "train_images = prep_data_keras(X_synth_train)\n",
430 | "test_images = prep_data_keras(X_synth_test)"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 15,
436 | "metadata": {},
437 | "outputs": [
438 | {
439 | "data": {
440 | "text/plain": [
441 | "(60000, 64, 64, 1)"
442 | ]
443 | },
444 | "execution_count": 15,
445 | "metadata": {},
446 | "output_type": "execute_result"
447 | }
448 | ],
449 | "source": [
450 | "np.shape(train_images)"
451 | ]
452 | },
453 | {
454 | "cell_type": "code",
455 | "execution_count": 16,
456 | "metadata": {},
457 | "outputs": [
458 | {
459 | "data": {
460 | "text/plain": [
461 | "(10000, 64, 64, 1)"
462 | ]
463 | },
464 | "execution_count": 16,
465 | "metadata": {},
466 | "output_type": "execute_result"
467 | }
468 | ],
469 | "source": [
470 | "np.shape(test_images)"
471 | ]
472 | },
473 | {
474 | "cell_type": "markdown",
475 | "metadata": {},
476 | "source": [
477 | "### Model Building"
478 | ]
479 | },
480 | {
481 | "cell_type": "code",
482 | "execution_count": null,
483 | "metadata": {},
484 | "outputs": [],
485 | "source": [
486 | "#Importing relevant keras modules\n",
487 | "from keras.models import Sequential, Model\n",
488 | "from keras.layers import Dense, Dropout, Activation, Flatten, Input\n",
489 | "from keras.layers import Convolution2D, MaxPooling2D"
490 | ]
491 | },
492 | {
493 | "cell_type": "markdown",
494 | "metadata": {},
495 | "source": [
496 | "We're going to use a Convolutional Neural Network for our network. \n",
497 | "\n",
498 | "Starting with a 2D Convolutional layer, we'll use ReLU activations after every Convolutional Layer. \n",
499 | "\n",
500 | "After the second CovLayer + ReLU, we'll add 2DMaxPooling, and a dropout to make the model robust to overfitting. A flattening layer will be added to make the data ready for classification layers, which were in the form of Dense Layers, of the same size as the no. of classes (11 for us), activated using softmax to give us the probability of each class."
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": null,
506 | "metadata": {},
507 | "outputs": [
508 | {
509 | "name": "stderr",
510 | "output_type": "stream",
511 | "text": [
512 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:23: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), padding=\"same\")`\n",
513 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:25: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3))`\n",
514 | "/Users/melvynnfernandez/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:46: UserWarning: Update your `Model` call to the Keras 2 API: `Model(inputs=Tensor(\"in..., outputs=[