├── .DS_Store
├── .ipynb_checkpoints
├── 0001_RFECV-checkpoint.ipynb
├── 0002_EfficientNet-checkpoint.ipynb
├── 0003_Top3PandasFunctions-checkpoint.ipynb
├── 0004_TPOT-checkpoint.ipynb
├── 0005_PyCaret-checkpoint.ipynb
├── 0006_PyCaretRegression-checkpoint.ipynb
└── 0007_PyCaretClassification-checkpoint.ipynb
├── 0001_RFECV.ipynb
├── 0002_MLDeploy
├── .DS_Store
├── .ipynb_checkpoints
│ └── Test-checkpoint.ipynb
├── Test.ipynb
├── app.py
├── modeler
│ ├── .ipynb_checkpoints
│ │ └── Modeler-checkpoint.py
│ ├── Modeler.py
│ ├── __init__.py
│ └── __pycache__
│ │ ├── Modeler.cpython-37.pyc
│ │ └── __init__.cpython-37.pyc
└── models
│ └── iris.model
├── 0003_Top3PandasFunctions.ipynb
├── 0004_TPOT.ipynb
├── 0005_PyCaret.ipynb
├── 0006_PyCaretRegression.ipynb
├── 0007_PyCaretClassification.ipynb
├── data
└── RealEstate.csv
└── img
└── LogoSingleRow.png
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/betterdatascience/YouTube/cf24c5cf3192eb7b17c25d94c710d3f941818537/.DS_Store
--------------------------------------------------------------------------------
/.ipynb_checkpoints/0002_EfficientNet-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "- EfficientNet rethinks the way in which we scale the CNNs up\n",
8 | "- One wasy of scaling is adding more layers - ResNet18 to ResNet200\n",
9 | "- Width scaling: adding more feature maps\n",
10 | "- Depth scaling: more layers\n",
11 | "- Resolution scaling: larger input images\n",
12 | "- Compound scaling: combination of increasing width, adding more layers, and enargening the resolution\n",
13 | "- Compound scaling is a basic idea in EfficientNet - the idea is to balance upsampling of width, depth, and resolution by scalling with a contant ratio\n",
14 | "\n",
15 | "- Intuition: if the input image is bigger, then the network needs more layer to increase the receptive field and more channels to capture more fine-grained patterns on the bigger image"
16 | ]
17 | }
18 | ],
19 | "metadata": {
20 | "kernelspec": {
21 | "display_name": "Python 3",
22 | "language": "python",
23 | "name": "python3"
24 | },
25 | "language_info": {
26 | "codemirror_mode": {
27 | "name": "ipython",
28 | "version": 3
29 | },
30 | "file_extension": ".py",
31 | "mimetype": "text/x-python",
32 | "name": "python",
33 | "nbconvert_exporter": "python",
34 | "pygments_lexer": "ipython3",
35 | "version": "3.7.6"
36 | }
37 | },
38 | "nbformat": 4,
39 | "nbformat_minor": 4
40 | }
41 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/0003_Top3PandasFunctions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "

\n",
9 | "
\n",
10 | "\n",
11 | "# Top 3 Pandas Functions"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import pandas as pd"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 2,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/html": [
31 | "\n",
32 | "\n",
45 | "
\n",
46 | " \n",
47 | " \n",
48 | " | \n",
49 | " PassengerId | \n",
50 | " Survived | \n",
51 | " Pclass | \n",
52 | " Name | \n",
53 | " Sex | \n",
54 | " Age | \n",
55 | " SibSp | \n",
56 | " Parch | \n",
57 | " Ticket | \n",
58 | " Fare | \n",
59 | " Cabin | \n",
60 | " Embarked | \n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " \n",
65 | " 0 | \n",
66 | " 1 | \n",
67 | " 0 | \n",
68 | " 3 | \n",
69 | " Braund, Mr. Owen Harris | \n",
70 | " male | \n",
71 | " 22.0 | \n",
72 | " 1 | \n",
73 | " 0 | \n",
74 | " A/5 21171 | \n",
75 | " 7.2500 | \n",
76 | " NaN | \n",
77 | " S | \n",
78 | "
\n",
79 | " \n",
80 | " 1 | \n",
81 | " 2 | \n",
82 | " 1 | \n",
83 | " 1 | \n",
84 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
85 | " female | \n",
86 | " 38.0 | \n",
87 | " 1 | \n",
88 | " 0 | \n",
89 | " PC 17599 | \n",
90 | " 71.2833 | \n",
91 | " C85 | \n",
92 | " C | \n",
93 | "
\n",
94 | " \n",
95 | " 2 | \n",
96 | " 3 | \n",
97 | " 1 | \n",
98 | " 3 | \n",
99 | " Heikkinen, Miss. Laina | \n",
100 | " female | \n",
101 | " 26.0 | \n",
102 | " 0 | \n",
103 | " 0 | \n",
104 | " STON/O2. 3101282 | \n",
105 | " 7.9250 | \n",
106 | " NaN | \n",
107 | " S | \n",
108 | "
\n",
109 | " \n",
110 | " 3 | \n",
111 | " 4 | \n",
112 | " 1 | \n",
113 | " 1 | \n",
114 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
115 | " female | \n",
116 | " 35.0 | \n",
117 | " 1 | \n",
118 | " 0 | \n",
119 | " 113803 | \n",
120 | " 53.1000 | \n",
121 | " C123 | \n",
122 | " S | \n",
123 | "
\n",
124 | " \n",
125 | " 4 | \n",
126 | " 5 | \n",
127 | " 0 | \n",
128 | " 3 | \n",
129 | " Allen, Mr. William Henry | \n",
130 | " male | \n",
131 | " 35.0 | \n",
132 | " 0 | \n",
133 | " 0 | \n",
134 | " 373450 | \n",
135 | " 8.0500 | \n",
136 | " NaN | \n",
137 | " S | \n",
138 | "
\n",
139 | " \n",
140 | "
\n",
141 | "
"
142 | ],
143 | "text/plain": [
144 | " PassengerId Survived Pclass \\\n",
145 | "0 1 0 3 \n",
146 | "1 2 1 1 \n",
147 | "2 3 1 3 \n",
148 | "3 4 1 1 \n",
149 | "4 5 0 3 \n",
150 | "\n",
151 | " Name Sex Age SibSp \\\n",
152 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
153 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
154 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
155 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
156 | "4 Allen, Mr. William Henry male 35.0 0 \n",
157 | "\n",
158 | " Parch Ticket Fare Cabin Embarked \n",
159 | "0 0 A/5 21171 7.2500 NaN S \n",
160 | "1 0 PC 17599 71.2833 C85 C \n",
161 | "2 0 STON/O2. 3101282 7.9250 NaN S \n",
162 | "3 0 113803 53.1000 C123 S \n",
163 | "4 0 373450 8.0500 NaN S "
164 | ]
165 | },
166 | "execution_count": 2,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')\n",
173 | "df.head()"
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {},
179 | "source": [
180 | "# 1. apply()"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": 3,
186 | "metadata": {},
187 | "outputs": [],
188 | "source": [
189 | "def extract_title(string):\n",
190 | " string = string.split(',')[-1]\n",
191 | " string = string.strip()\n",
192 | " string = string.split('.')[0]\n",
193 | " return string"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 4,
199 | "metadata": {},
200 | "outputs": [
201 | {
202 | "data": {
203 | "text/plain": [
204 | "0 Mr\n",
205 | "1 Mrs\n",
206 | "2 Miss\n",
207 | "3 Mrs\n",
208 | "4 Mr\n",
209 | "Name: Name, dtype: object"
210 | ]
211 | },
212 | "execution_count": 4,
213 | "metadata": {},
214 | "output_type": "execute_result"
215 | }
216 | ],
217 | "source": [
218 | "titles = df['Name'].apply(extract_title)\n",
219 | "titles[:5]"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 5,
225 | "metadata": {},
226 | "outputs": [
227 | {
228 | "data": {
229 | "text/plain": [
230 | "0 Mr\n",
231 | "1 Mrs\n",
232 | "2 Miss\n",
233 | "3 Mrs\n",
234 | "4 Mr\n",
235 | "Name: Name, dtype: object"
236 | ]
237 | },
238 | "execution_count": 5,
239 | "metadata": {},
240 | "output_type": "execute_result"
241 | }
242 | ],
243 | "source": [
244 | "titles = df['Name'].apply(lambda x: x.split(',')[-1].strip().split('.')[0])\n",
245 | "titles[:5]"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {},
251 | "source": [
252 | "# 2. value_counts()"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 6,
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "data": {
262 | "text/plain": [
263 | "Mr 517\n",
264 | "Miss 182\n",
265 | "Mrs 125\n",
266 | "Master 40\n",
267 | "Dr 7\n",
268 | "Rev 6\n",
269 | "Mlle 2\n",
270 | "Col 2\n",
271 | "Major 2\n",
272 | "Mme 1\n",
273 | "Jonkheer 1\n",
274 | "Sir 1\n",
275 | "Lady 1\n",
276 | "Don 1\n",
277 | "the Countess 1\n",
278 | "Capt 1\n",
279 | "Ms 1\n",
280 | "Name: Name, dtype: int64"
281 | ]
282 | },
283 | "execution_count": 6,
284 | "metadata": {},
285 | "output_type": "execute_result"
286 | }
287 | ],
288 | "source": [
289 | "titles.value_counts()"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 8,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "data": {
299 | "text/plain": [
300 | "Mr 58.024691\n",
301 | "Miss 20.426487\n",
302 | "Mrs 14.029181\n",
303 | "Master 4.489338\n",
304 | "Dr 0.785634\n",
305 | "Rev 0.673401\n",
306 | "Mlle 0.224467\n",
307 | "Col 0.224467\n",
308 | "Major 0.224467\n",
309 | "Mme 0.112233\n",
310 | "Jonkheer 0.112233\n",
311 | "Sir 0.112233\n",
312 | "Lady 0.112233\n",
313 | "Don 0.112233\n",
314 | "the Countess 0.112233\n",
315 | "Capt 0.112233\n",
316 | "Ms 0.112233\n",
317 | "Name: Name, dtype: float64"
318 | ]
319 | },
320 | "execution_count": 8,
321 | "metadata": {},
322 | "output_type": "execute_result"
323 | }
324 | ],
325 | "source": [
326 | "titles.value_counts(normalize=True) * 100"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "metadata": {},
332 | "source": [
333 | "# 3. get_dummies()"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 9,
339 | "metadata": {},
340 | "outputs": [
341 | {
342 | "data": {
343 | "text/html": [
344 | "\n",
345 | "\n",
358 | "
\n",
359 | " \n",
360 | " \n",
361 | " | \n",
362 | " C | \n",
363 | " Q | \n",
364 | " S | \n",
365 | "
\n",
366 | " \n",
367 | " \n",
368 | " \n",
369 | " 0 | \n",
370 | " 0 | \n",
371 | " 0 | \n",
372 | " 1 | \n",
373 | "
\n",
374 | " \n",
375 | " 1 | \n",
376 | " 1 | \n",
377 | " 0 | \n",
378 | " 0 | \n",
379 | "
\n",
380 | " \n",
381 | " 2 | \n",
382 | " 0 | \n",
383 | " 0 | \n",
384 | " 1 | \n",
385 | "
\n",
386 | " \n",
387 | " 3 | \n",
388 | " 0 | \n",
389 | " 0 | \n",
390 | " 1 | \n",
391 | "
\n",
392 | " \n",
393 | " 4 | \n",
394 | " 0 | \n",
395 | " 0 | \n",
396 | " 1 | \n",
397 | "
\n",
398 | " \n",
399 | "
\n",
400 | "
"
401 | ],
402 | "text/plain": [
403 | " C Q S\n",
404 | "0 0 0 1\n",
405 | "1 1 0 0\n",
406 | "2 0 0 1\n",
407 | "3 0 0 1\n",
408 | "4 0 0 1"
409 | ]
410 | },
411 | "execution_count": 9,
412 | "metadata": {},
413 | "output_type": "execute_result"
414 | }
415 | ],
416 | "source": [
417 | "emb = pd.get_dummies(df['Embarked'])\n",
418 | "emb.head()"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": 10,
424 | "metadata": {},
425 | "outputs": [
426 | {
427 | "data": {
428 | "text/html": [
429 | "\n",
430 | "\n",
443 | "
\n",
444 | " \n",
445 | " \n",
446 | " | \n",
447 | " Embarked_C | \n",
448 | " Embarked_Q | \n",
449 | " Embarked_S | \n",
450 | "
\n",
451 | " \n",
452 | " \n",
453 | " \n",
454 | " 0 | \n",
455 | " 0 | \n",
456 | " 0 | \n",
457 | " 1 | \n",
458 | "
\n",
459 | " \n",
460 | " 1 | \n",
461 | " 1 | \n",
462 | " 0 | \n",
463 | " 0 | \n",
464 | "
\n",
465 | " \n",
466 | " 2 | \n",
467 | " 0 | \n",
468 | " 0 | \n",
469 | " 1 | \n",
470 | "
\n",
471 | " \n",
472 | " 3 | \n",
473 | " 0 | \n",
474 | " 0 | \n",
475 | " 1 | \n",
476 | "
\n",
477 | " \n",
478 | " 4 | \n",
479 | " 0 | \n",
480 | " 0 | \n",
481 | " 1 | \n",
482 | "
\n",
483 | " \n",
484 | "
\n",
485 | "
"
486 | ],
487 | "text/plain": [
488 | " Embarked_C Embarked_Q Embarked_S\n",
489 | "0 0 0 1\n",
490 | "1 1 0 0\n",
491 | "2 0 0 1\n",
492 | "3 0 0 1\n",
493 | "4 0 0 1"
494 | ]
495 | },
496 | "execution_count": 10,
497 | "metadata": {},
498 | "output_type": "execute_result"
499 | }
500 | ],
501 | "source": [
502 | "emb = pd.get_dummies(df['Embarked'], prefix='Embarked')\n",
503 | "emb.head()"
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "execution_count": 11,
509 | "metadata": {},
510 | "outputs": [
511 | {
512 | "data": {
513 | "text/html": [
514 | "\n",
515 | "\n",
528 | "
\n",
529 | " \n",
530 | " \n",
531 | " | \n",
532 | " Embarked_Q | \n",
533 | " Embarked_S | \n",
534 | "
\n",
535 | " \n",
536 | " \n",
537 | " \n",
538 | " 0 | \n",
539 | " 0 | \n",
540 | " 1 | \n",
541 | "
\n",
542 | " \n",
543 | " 1 | \n",
544 | " 0 | \n",
545 | " 0 | \n",
546 | "
\n",
547 | " \n",
548 | " 2 | \n",
549 | " 0 | \n",
550 | " 1 | \n",
551 | "
\n",
552 | " \n",
553 | " 3 | \n",
554 | " 0 | \n",
555 | " 1 | \n",
556 | "
\n",
557 | " \n",
558 | " 4 | \n",
559 | " 0 | \n",
560 | " 1 | \n",
561 | "
\n",
562 | " \n",
563 | "
\n",
564 | "
"
565 | ],
566 | "text/plain": [
567 | " Embarked_Q Embarked_S\n",
568 | "0 0 1\n",
569 | "1 0 0\n",
570 | "2 0 1\n",
571 | "3 0 1\n",
572 | "4 0 1"
573 | ]
574 | },
575 | "execution_count": 11,
576 | "metadata": {},
577 | "output_type": "execute_result"
578 | }
579 | ],
580 | "source": [
581 | "emb = pd.get_dummies(df['Embarked'], prefix='Embarked', drop_first=True)\n",
582 | "emb.head()"
583 | ]
584 | }
585 | ],
586 | "metadata": {
587 | "kernelspec": {
588 | "display_name": "Python 3",
589 | "language": "python",
590 | "name": "python3"
591 | },
592 | "language_info": {
593 | "codemirror_mode": {
594 | "name": "ipython",
595 | "version": 3
596 | },
597 | "file_extension": ".py",
598 | "mimetype": "text/x-python",
599 | "name": "python",
600 | "nbconvert_exporter": "python",
601 | "pygments_lexer": "ipython3",
602 | "version": "3.7.6"
603 | }
604 | },
605 | "nbformat": 4,
606 | "nbformat_minor": 4
607 | }
608 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/0004_TPOT-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "

\n",
9 | "
\n",
10 | "\n",
11 | "# TPOT"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "name": "stderr",
21 | "output_type": "stream",
22 | "text": [
23 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
24 | " return f(*args, **kwds)\n"
25 | ]
26 | }
27 | ],
28 | "source": [
29 | "import pandas as pd\n",
30 | "from sklearn.preprocessing import StandardScaler\n",
31 | "from sklearn.model_selection import train_test_split"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 2,
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/html": [
42 | "\n",
43 | "\n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " | \n",
60 | " PassengerId | \n",
61 | " Survived | \n",
62 | " Pclass | \n",
63 | " Name | \n",
64 | " Sex | \n",
65 | " Age | \n",
66 | " SibSp | \n",
67 | " Parch | \n",
68 | " Ticket | \n",
69 | " Fare | \n",
70 | " Cabin | \n",
71 | " Embarked | \n",
72 | "
\n",
73 | " \n",
74 | " \n",
75 | " \n",
76 | " 0 | \n",
77 | " 1 | \n",
78 | " 0 | \n",
79 | " 3 | \n",
80 | " Braund, Mr. Owen Harris | \n",
81 | " male | \n",
82 | " 22.0 | \n",
83 | " 1 | \n",
84 | " 0 | \n",
85 | " A/5 21171 | \n",
86 | " 7.2500 | \n",
87 | " NaN | \n",
88 | " S | \n",
89 | "
\n",
90 | " \n",
91 | " 1 | \n",
92 | " 2 | \n",
93 | " 1 | \n",
94 | " 1 | \n",
95 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
96 | " female | \n",
97 | " 38.0 | \n",
98 | " 1 | \n",
99 | " 0 | \n",
100 | " PC 17599 | \n",
101 | " 71.2833 | \n",
102 | " C85 | \n",
103 | " C | \n",
104 | "
\n",
105 | " \n",
106 | " 2 | \n",
107 | " 3 | \n",
108 | " 1 | \n",
109 | " 3 | \n",
110 | " Heikkinen, Miss. Laina | \n",
111 | " female | \n",
112 | " 26.0 | \n",
113 | " 0 | \n",
114 | " 0 | \n",
115 | " STON/O2. 3101282 | \n",
116 | " 7.9250 | \n",
117 | " NaN | \n",
118 | " S | \n",
119 | "
\n",
120 | " \n",
121 | " 3 | \n",
122 | " 4 | \n",
123 | " 1 | \n",
124 | " 1 | \n",
125 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
126 | " female | \n",
127 | " 35.0 | \n",
128 | " 1 | \n",
129 | " 0 | \n",
130 | " 113803 | \n",
131 | " 53.1000 | \n",
132 | " C123 | \n",
133 | " S | \n",
134 | "
\n",
135 | " \n",
136 | " 4 | \n",
137 | " 5 | \n",
138 | " 0 | \n",
139 | " 3 | \n",
140 | " Allen, Mr. William Henry | \n",
141 | " male | \n",
142 | " 35.0 | \n",
143 | " 0 | \n",
144 | " 0 | \n",
145 | " 373450 | \n",
146 | " 8.0500 | \n",
147 | " NaN | \n",
148 | " S | \n",
149 | "
\n",
150 | " \n",
151 | "
\n",
152 | "
"
153 | ],
154 | "text/plain": [
155 | " PassengerId Survived Pclass \\\n",
156 | "0 1 0 3 \n",
157 | "1 2 1 1 \n",
158 | "2 3 1 3 \n",
159 | "3 4 1 1 \n",
160 | "4 5 0 3 \n",
161 | "\n",
162 | " Name Sex Age SibSp \\\n",
163 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
164 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
165 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
166 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
167 | "4 Allen, Mr. William Henry male 35.0 0 \n",
168 | "\n",
169 | " Parch Ticket Fare Cabin Embarked \n",
170 | "0 0 A/5 21171 7.2500 NaN S \n",
171 | "1 0 PC 17599 71.2833 C85 C \n",
172 | "2 0 STON/O2. 3101282 7.9250 NaN S \n",
173 | "3 0 113803 53.1000 C123 S \n",
174 | "4 0 373450 8.0500 NaN S "
175 | ]
176 | },
177 | "execution_count": 2,
178 | "metadata": {},
179 | "output_type": "execute_result"
180 | }
181 | ],
182 | "source": [
183 | "data = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')\n",
184 | "data.head()"
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {},
190 | "source": [
191 | "## Data preparation"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 3,
197 | "metadata": {},
198 | "outputs": [],
199 | "source": [
200 | "data.drop(['Ticket', 'PassengerId'], axis=1, inplace=True)\n",
201 | "\n",
202 | "gender_mapper = {'male': 0, 'female': 1}\n",
203 | "data['Sex'].replace(gender_mapper, inplace=True)\n",
204 | "\n",
205 | "data['Title'] = data['Name'].apply(lambda x: x.split(',')[1].strip().split(' ')[0])\n",
206 | "data['Title'] = [0 if x in ['Mr.', 'Miss.', 'Mrs.'] else 1 for x in data['Title']]\n",
207 | "data = data.rename(columns={'Title': 'Title_Unusual'})\n",
208 | "data.drop('Name', axis=1, inplace=True)\n",
209 | "\n",
210 | "data['Cabin_Known'] = [0 if str(x) == 'nan' else 1 for x in data['Cabin']]\n",
211 | "data.drop('Cabin', axis=1, inplace=True)\n",
212 | "\n",
213 | "emb_dummies = pd.get_dummies(data['Embarked'], drop_first=True, prefix='Embarked')\n",
214 | "data = pd.concat([data, emb_dummies], axis=1)\n",
215 | "data.drop('Embarked', axis=1, inplace=True)\n",
216 | "\n",
217 | "data['Age'] = data['Age'].fillna(int(data['Age'].mean()))"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 4,
223 | "metadata": {},
224 | "outputs": [
225 | {
226 | "data": {
227 | "text/html": [
228 | "\n",
229 | "\n",
242 | "
\n",
243 | " \n",
244 | " \n",
245 | " | \n",
246 | " Survived | \n",
247 | " Pclass | \n",
248 | " Sex | \n",
249 | " Age | \n",
250 | " SibSp | \n",
251 | " Parch | \n",
252 | " Fare | \n",
253 | " Title_Unusual | \n",
254 | " Cabin_Known | \n",
255 | " Embarked_Q | \n",
256 | " Embarked_S | \n",
257 | "
\n",
258 | " \n",
259 | " \n",
260 | " \n",
261 | " 0 | \n",
262 | " 0 | \n",
263 | " 3 | \n",
264 | " 0 | \n",
265 | " 22.0 | \n",
266 | " 1 | \n",
267 | " 0 | \n",
268 | " 7.2500 | \n",
269 | " 0 | \n",
270 | " 0 | \n",
271 | " 0 | \n",
272 | " 1 | \n",
273 | "
\n",
274 | " \n",
275 | " 1 | \n",
276 | " 1 | \n",
277 | " 1 | \n",
278 | " 1 | \n",
279 | " 38.0 | \n",
280 | " 1 | \n",
281 | " 0 | \n",
282 | " 71.2833 | \n",
283 | " 0 | \n",
284 | " 1 | \n",
285 | " 0 | \n",
286 | " 0 | \n",
287 | "
\n",
288 | " \n",
289 | " 2 | \n",
290 | " 1 | \n",
291 | " 3 | \n",
292 | " 1 | \n",
293 | " 26.0 | \n",
294 | " 0 | \n",
295 | " 0 | \n",
296 | " 7.9250 | \n",
297 | " 0 | \n",
298 | " 0 | \n",
299 | " 0 | \n",
300 | " 1 | \n",
301 | "
\n",
302 | " \n",
303 | " 3 | \n",
304 | " 1 | \n",
305 | " 1 | \n",
306 | " 1 | \n",
307 | " 35.0 | \n",
308 | " 1 | \n",
309 | " 0 | \n",
310 | " 53.1000 | \n",
311 | " 0 | \n",
312 | " 1 | \n",
313 | " 0 | \n",
314 | " 1 | \n",
315 | "
\n",
316 | " \n",
317 | " 4 | \n",
318 | " 0 | \n",
319 | " 3 | \n",
320 | " 0 | \n",
321 | " 35.0 | \n",
322 | " 0 | \n",
323 | " 0 | \n",
324 | " 8.0500 | \n",
325 | " 0 | \n",
326 | " 0 | \n",
327 | " 0 | \n",
328 | " 1 | \n",
329 | "
\n",
330 | " \n",
331 | "
\n",
332 | "
"
333 | ],
334 | "text/plain": [
335 | " Survived Pclass Sex Age SibSp Parch Fare Title_Unusual \\\n",
336 | "0 0 3 0 22.0 1 0 7.2500 0 \n",
337 | "1 1 1 1 38.0 1 0 71.2833 0 \n",
338 | "2 1 3 1 26.0 0 0 7.9250 0 \n",
339 | "3 1 1 1 35.0 1 0 53.1000 0 \n",
340 | "4 0 3 0 35.0 0 0 8.0500 0 \n",
341 | "\n",
342 | " Cabin_Known Embarked_Q Embarked_S \n",
343 | "0 0 0 1 \n",
344 | "1 1 0 0 \n",
345 | "2 0 0 1 \n",
346 | "3 1 0 1 \n",
347 | "4 0 0 1 "
348 | ]
349 | },
350 | "execution_count": 4,
351 | "metadata": {},
352 | "output_type": "execute_result"
353 | }
354 | ],
355 | "source": [
356 | "data.head()"
357 | ]
358 | },
359 | {
360 | "cell_type": "markdown",
361 | "metadata": {},
362 | "source": [
363 | "## Train Test split and Scaling"
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": 5,
369 | "metadata": {},
370 | "outputs": [],
371 | "source": [
372 | "X = data.drop('Survived', axis=1)\n",
373 | "y = data['Survived']\n",
374 | "\n",
375 | "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": 6,
381 | "metadata": {},
382 | "outputs": [],
383 | "source": [
384 | "ss = StandardScaler()\n",
385 | "X_train_scaled = ss.fit_transform(X_train)\n",
386 | "X_test_scaled = ss.transform(X_test)"
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "metadata": {},
392 | "source": [
393 | "# TPOT"
394 | ]
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": 7,
399 | "metadata": {},
400 | "outputs": [
401 | {
402 | "name": "stderr",
403 | "output_type": "stream",
404 | "text": [
405 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
406 | " return f(*args, **kwds)\n",
407 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
408 | " return f(*args, **kwds)\n",
409 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
410 | " return f(*args, **kwds)\n",
411 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
412 | " return f(*args, **kwds)\n"
413 | ]
414 | },
415 | {
416 | "data": {
417 | "application/vnd.jupyter.widget-view+json": {
418 | "model_id": "486d08b62e1d41f09461fea3e3eb29ba",
419 | "version_major": 2,
420 | "version_minor": 0
421 | },
422 | "text/plain": [
423 | "HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', style=ProgressStyle(description_w…"
424 | ]
425 | },
426 | "metadata": {},
427 | "output_type": "display_data"
428 | },
429 | {
430 | "name": "stdout",
431 | "output_type": "stream",
432 | "text": [
433 | "\n",
434 | "Generation 1 - Current best internal CV score: 0.8216389244558258\n",
435 | "Generation 2 - Current best internal CV score: 0.8216389244558258\n",
436 | "Generation 3 - Current best internal CV score: 0.8244361272530286\n",
437 | "Generation 4 - Current best internal CV score: 0.8244361272530287\n",
438 | "10.02 minutes have elapsed. TPOT will close down.\n",
439 | "TPOT closed during evaluation in one generation.\n",
440 | "WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n",
441 | "\n",
442 | "\n",
443 | "TPOT closed prematurely. Will use the current best pipeline.\n",
444 | "\n",
445 | "Best pipeline: GradientBoostingClassifier(input_matrix, learning_rate=0.1, max_depth=10, max_features=0.55, min_samples_leaf=16, min_samples_split=3, n_estimators=100, subsample=0.6000000000000001)\n"
446 | ]
447 | },
448 | {
449 | "data": {
450 | "text/plain": [
451 | "TPOTClassifier(config_dict=None, crossover_rate=0.1, cv=5,\n",
452 | " disable_update_check=False, early_stop=None, generations=100,\n",
453 | " log_file=,\n",
454 | " max_eval_time_mins=5, max_time_mins=10, memory=None,\n",
455 | " mutation_rate=0.9, n_jobs=1, offspring_size=None,\n",
456 | " periodic_checkpoint_folder=None, population_size=100,\n",
457 | " random_state=None, scoring=None, subsample=1.0, template=None,\n",
458 | " use_dask=False, verbosity=2, warm_start=False)"
459 | ]
460 | },
461 | "execution_count": 7,
462 | "metadata": {},
463 | "output_type": "execute_result"
464 | }
465 | ],
466 | "source": [
467 | "from tpot import TPOTClassifier\n",
468 | "\n",
469 | "tpot = TPOTClassifier(verbosity=2, max_time_mins=10)\n",
470 | "tpot.fit(X_train_scaled, y_train)"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": null,
476 | "metadata": {},
477 | "outputs": [],
478 | "source": []
479 | },
480 | {
481 | "cell_type": "code",
482 | "execution_count": 8,
483 | "metadata": {},
484 | "outputs": [
485 | {
486 | "data": {
487 | "text/plain": [
488 | "Pipeline(memory=None,\n",
489 | " steps=[('gradientboostingclassifier',\n",
490 | " GradientBoostingClassifier(ccp_alpha=0.0,\n",
491 | " criterion='friedman_mse', init=None,\n",
492 | " learning_rate=0.1, loss='deviance',\n",
493 | " max_depth=10, max_features=0.55,\n",
494 | " max_leaf_nodes=None,\n",
495 | " min_impurity_decrease=0.0,\n",
496 | " min_impurity_split=None,\n",
497 | " min_samples_leaf=16,\n",
498 | " min_samples_split=3,\n",
499 | " min_weight_fraction_leaf=0.0,\n",
500 | " n_estimators=100,\n",
501 | " n_iter_no_change=None,\n",
502 | " presort='deprecated',\n",
503 | " random_state=None,\n",
504 | " subsample=0.6000000000000001,\n",
505 | " tol=0.0001, validation_fraction=0.1,\n",
506 | " verbose=0, warm_start=False))],\n",
507 | " verbose=False)"
508 | ]
509 | },
510 | "execution_count": 8,
511 | "metadata": {},
512 | "output_type": "execute_result"
513 | }
514 | ],
515 | "source": [
516 | "tpot.fitted_pipeline_"
517 | ]
518 | },
519 | {
520 | "cell_type": "code",
521 | "execution_count": 9,
522 | "metadata": {},
523 | "outputs": [
524 | {
525 | "data": {
526 | "text/plain": [
527 | "0.8491620111731844"
528 | ]
529 | },
530 | "execution_count": 9,
531 | "metadata": {},
532 | "output_type": "execute_result"
533 | }
534 | ],
535 | "source": [
536 | "tpot.score(X_test_scaled, y_test)"
537 | ]
538 | }
539 | ],
540 | "metadata": {
541 | "kernelspec": {
542 | "display_name": "Python 3",
543 | "language": "python",
544 | "name": "python3"
545 | },
546 | "language_info": {
547 | "codemirror_mode": {
548 | "name": "ipython",
549 | "version": 3
550 | },
551 | "file_extension": ".py",
552 | "mimetype": "text/x-python",
553 | "name": "python",
554 | "nbconvert_exporter": "python",
555 | "pygments_lexer": "ipython3",
556 | "version": "3.7.6"
557 | }
558 | },
559 | "nbformat": 4,
560 | "nbformat_minor": 4
561 | }
562 |
--------------------------------------------------------------------------------
/0001_RFECV.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "

\n",
9 | "
\n",
10 | "\n",
11 | "# Recursive Feature Elimination with Cross-Validation"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "## Imports"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "import numpy as np\n",
28 | "import pandas as pd\n",
29 | "import matplotlib.pyplot as plt\n",
30 | "\n",
31 | "from sklearn.model_selection import StratifiedKFold\n",
32 | "from sklearn.feature_selection import RFECV\n",
33 | "from sklearn.ensemble import RandomForestClassifier\n",
34 | "\n",
35 | "import warnings\n",
36 | "warnings.filterwarnings('ignore')"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 3,
42 | "metadata": {},
43 | "outputs": [
44 | {
45 | "data": {
46 | "text/html": [
47 | "\n",
48 | "\n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " | \n",
65 | " PassengerId | \n",
66 | " Survived | \n",
67 | " Pclass | \n",
68 | " Name | \n",
69 | " Sex | \n",
70 | " Age | \n",
71 | " SibSp | \n",
72 | " Parch | \n",
73 | " Ticket | \n",
74 | " Fare | \n",
75 | " Cabin | \n",
76 | " Embarked | \n",
77 | "
\n",
78 | " \n",
79 | " \n",
80 | " \n",
81 | " 0 | \n",
82 | " 1 | \n",
83 | " 0 | \n",
84 | " 3 | \n",
85 | " Braund, Mr. Owen Harris | \n",
86 | " male | \n",
87 | " 22.0 | \n",
88 | " 1 | \n",
89 | " 0 | \n",
90 | " A/5 21171 | \n",
91 | " 7.2500 | \n",
92 | " NaN | \n",
93 | " S | \n",
94 | "
\n",
95 | " \n",
96 | " 1 | \n",
97 | " 2 | \n",
98 | " 1 | \n",
99 | " 1 | \n",
100 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
101 | " female | \n",
102 | " 38.0 | \n",
103 | " 1 | \n",
104 | " 0 | \n",
105 | " PC 17599 | \n",
106 | " 71.2833 | \n",
107 | " C85 | \n",
108 | " C | \n",
109 | "
\n",
110 | " \n",
111 | " 2 | \n",
112 | " 3 | \n",
113 | " 1 | \n",
114 | " 3 | \n",
115 | " Heikkinen, Miss. Laina | \n",
116 | " female | \n",
117 | " 26.0 | \n",
118 | " 0 | \n",
119 | " 0 | \n",
120 | " STON/O2. 3101282 | \n",
121 | " 7.9250 | \n",
122 | " NaN | \n",
123 | " S | \n",
124 | "
\n",
125 | " \n",
126 | " 3 | \n",
127 | " 4 | \n",
128 | " 1 | \n",
129 | " 1 | \n",
130 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
131 | " female | \n",
132 | " 35.0 | \n",
133 | " 1 | \n",
134 | " 0 | \n",
135 | " 113803 | \n",
136 | " 53.1000 | \n",
137 | " C123 | \n",
138 | " S | \n",
139 | "
\n",
140 | " \n",
141 | " 4 | \n",
142 | " 5 | \n",
143 | " 0 | \n",
144 | " 3 | \n",
145 | " Allen, Mr. William Henry | \n",
146 | " male | \n",
147 | " 35.0 | \n",
148 | " 0 | \n",
149 | " 0 | \n",
150 | " 373450 | \n",
151 | " 8.0500 | \n",
152 | " NaN | \n",
153 | " S | \n",
154 | "
\n",
155 | " \n",
156 | "
\n",
157 | "
"
158 | ],
159 | "text/plain": [
160 | " PassengerId Survived Pclass \\\n",
161 | "0 1 0 3 \n",
162 | "1 2 1 1 \n",
163 | "2 3 1 3 \n",
164 | "3 4 1 1 \n",
165 | "4 5 0 3 \n",
166 | "\n",
167 | " Name Sex Age SibSp \\\n",
168 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
169 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
170 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
171 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
172 | "4 Allen, Mr. William Henry male 35.0 0 \n",
173 | "\n",
174 | " Parch Ticket Fare Cabin Embarked \n",
175 | "0 0 A/5 21171 7.2500 NaN S \n",
176 | "1 0 PC 17599 71.2833 C85 C \n",
177 | "2 0 STON/O2. 3101282 7.9250 NaN S \n",
178 | "3 0 113803 53.1000 C123 S \n",
179 | "4 0 373450 8.0500 NaN S "
180 | ]
181 | },
182 | "execution_count": 3,
183 | "metadata": {},
184 | "output_type": "execute_result"
185 | }
186 | ],
187 | "source": [
188 | "data = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')\n",
189 | "data.head()"
190 | ]
191 | },
192 | {
193 | "cell_type": "markdown",
194 | "metadata": {},
195 | "source": [
196 | "## Data cleaning and preparation"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 4,
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "data.drop(['Ticket', 'PassengerId'], axis=1, inplace=True)\n",
206 | "\n",
207 | "gender_mapper = {'male': 0, 'female': 1}\n",
208 | "data['Sex'].replace(gender_mapper, inplace=True)\n",
209 | "\n",
210 | "data['Title'] = data['Name'].apply(lambda x: x.split(',')[1].strip().split(' ')[0])\n",
211 | "data['Title'] = [0 if x in ['Mr.', 'Miss.', 'Mrs.'] else 1 for x in data['Title']]\n",
212 | "data = data.rename(columns={'Title': 'Title_Unusual'})\n",
213 | "data.drop('Name', axis=1, inplace=True)\n",
214 | "\n",
215 | "data['Cabin_Known'] = [0 if str(x) == 'nan' else 1 for x in data['Cabin']]\n",
216 | "data.drop('Cabin', axis=1, inplace=True)\n",
217 | "\n",
218 | "emb_dummies = pd.get_dummies(data['Embarked'], drop_first=True, prefix='Embarked')\n",
219 | "data = pd.concat([data, emb_dummies], axis=1)\n",
220 | "data.drop('Embarked', axis=1, inplace=True)\n",
221 | "\n",
222 | "data['Age'] = data['Age'].fillna(int(data['Age'].mean()))"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 5,
228 | "metadata": {},
229 | "outputs": [
230 | {
231 | "data": {
232 | "text/html": [
233 | "\n",
234 | "\n",
247 | "
\n",
248 | " \n",
249 | " \n",
250 | " | \n",
251 | " Survived | \n",
252 | " Pclass | \n",
253 | " Sex | \n",
254 | " Age | \n",
255 | " SibSp | \n",
256 | " Parch | \n",
257 | " Fare | \n",
258 | " Title_Unusual | \n",
259 | " Cabin_Known | \n",
260 | " Embarked_Q | \n",
261 | " Embarked_S | \n",
262 | "
\n",
263 | " \n",
264 | " \n",
265 | " \n",
266 | " 0 | \n",
267 | " 0 | \n",
268 | " 3 | \n",
269 | " 0 | \n",
270 | " 22.0 | \n",
271 | " 1 | \n",
272 | " 0 | \n",
273 | " 7.2500 | \n",
274 | " 0 | \n",
275 | " 0 | \n",
276 | " 0 | \n",
277 | " 1 | \n",
278 | "
\n",
279 | " \n",
280 | " 1 | \n",
281 | " 1 | \n",
282 | " 1 | \n",
283 | " 1 | \n",
284 | " 38.0 | \n",
285 | " 1 | \n",
286 | " 0 | \n",
287 | " 71.2833 | \n",
288 | " 0 | \n",
289 | " 1 | \n",
290 | " 0 | \n",
291 | " 0 | \n",
292 | "
\n",
293 | " \n",
294 | " 2 | \n",
295 | " 1 | \n",
296 | " 3 | \n",
297 | " 1 | \n",
298 | " 26.0 | \n",
299 | " 0 | \n",
300 | " 0 | \n",
301 | " 7.9250 | \n",
302 | " 0 | \n",
303 | " 0 | \n",
304 | " 0 | \n",
305 | " 1 | \n",
306 | "
\n",
307 | " \n",
308 | " 3 | \n",
309 | " 1 | \n",
310 | " 1 | \n",
311 | " 1 | \n",
312 | " 35.0 | \n",
313 | " 1 | \n",
314 | " 0 | \n",
315 | " 53.1000 | \n",
316 | " 0 | \n",
317 | " 1 | \n",
318 | " 0 | \n",
319 | " 1 | \n",
320 | "
\n",
321 | " \n",
322 | " 4 | \n",
323 | " 0 | \n",
324 | " 3 | \n",
325 | " 0 | \n",
326 | " 35.0 | \n",
327 | " 0 | \n",
328 | " 0 | \n",
329 | " 8.0500 | \n",
330 | " 0 | \n",
331 | " 0 | \n",
332 | " 0 | \n",
333 | " 1 | \n",
334 | "
\n",
335 | " \n",
336 | "
\n",
337 | "
"
338 | ],
339 | "text/plain": [
340 | " Survived Pclass Sex Age SibSp Parch Fare Title_Unusual \\\n",
341 | "0 0 3 0 22.0 1 0 7.2500 0 \n",
342 | "1 1 1 1 38.0 1 0 71.2833 0 \n",
343 | "2 1 3 1 26.0 0 0 7.9250 0 \n",
344 | "3 1 1 1 35.0 1 0 53.1000 0 \n",
345 | "4 0 3 0 35.0 0 0 8.0500 0 \n",
346 | "\n",
347 | " Cabin_Known Embarked_Q Embarked_S \n",
348 | "0 0 0 1 \n",
349 | "1 1 0 0 \n",
350 | "2 0 0 1 \n",
351 | "3 1 0 1 \n",
352 | "4 0 0 1 "
353 | ]
354 | },
355 | "execution_count": 5,
356 | "metadata": {},
357 | "output_type": "execute_result"
358 | }
359 | ],
360 | "source": [
361 | "data.head()"
362 | ]
363 | },
364 | {
365 | "cell_type": "markdown",
366 | "metadata": {},
367 | "source": [
368 | "## Remove correlated features "
369 | ]
370 | },
371 | {
372 | "cell_type": "code",
373 | "execution_count": 6,
374 | "metadata": {},
375 | "outputs": [],
376 | "source": [
377 | "correlated_features = set()\n",
378 | "correlation_matrix = data.drop('Survived', axis=1).corr()\n",
379 | "\n",
380 | "for i in range(len(correlation_matrix.columns)):\n",
381 | " for j in range(i):\n",
382 | " if abs(correlation_matrix.iloc[i, j]) > 0.8:\n",
383 | " colname = correlation_matrix.columns[i]\n",
384 | " correlated_features.add(colname)"
385 | ]
386 | },
387 | {
388 | "cell_type": "code",
389 | "execution_count": 7,
390 | "metadata": {},
391 | "outputs": [
392 | {
393 | "data": {
394 | "text/plain": [
395 | "set()"
396 | ]
397 | },
398 | "execution_count": 7,
399 | "metadata": {},
400 | "output_type": "execute_result"
401 | }
402 | ],
403 | "source": [
404 | "correlated_features"
405 | ]
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "metadata": {},
410 | "source": [
411 | "## Running RFECV"
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": 8,
417 | "metadata": {},
418 | "outputs": [
419 | {
420 | "data": {
421 | "text/plain": [
422 | "RFECV(cv=StratifiedKFold(n_splits=10, random_state=None, shuffle=False),\n",
423 | " estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,\n",
424 | " class_weight=None, criterion='gini',\n",
425 | " max_depth=None, max_features='auto',\n",
426 | " max_leaf_nodes=None, max_samples=None,\n",
427 | " min_impurity_decrease=0.0,\n",
428 | " min_impurity_split=None,\n",
429 | " min_samples_leaf=1, min_samples_split=2,\n",
430 | " min_weight_fraction_leaf=0.0,\n",
431 | " n_estimators=100, n_jobs=None,\n",
432 | " oob_score=False, random_state=101,\n",
433 | " verbose=0, warm_start=False),\n",
434 | " min_features_to_select=1, n_jobs=None, scoring='accuracy', step=1,\n",
435 | " verbose=0)"
436 | ]
437 | },
438 | "execution_count": 8,
439 | "metadata": {},
440 | "output_type": "execute_result"
441 | }
442 | ],
443 | "source": [
444 | "X = data.drop('Survived', axis=1)\n",
445 | "target = data['Survived']\n",
446 | "\n",
447 | "rfc = RandomForestClassifier(random_state=101)\n",
448 | "rfecv = RFECV(estimator=rfc, step=1, cv=StratifiedKFold(10), scoring='accuracy')\n",
449 | "rfecv.fit(X, target)"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 9,
455 | "metadata": {},
456 | "outputs": [
457 | {
458 | "name": "stdout",
459 | "output_type": "stream",
460 | "text": [
461 | "Optimal number of features: 4\n"
462 | ]
463 | }
464 | ],
465 | "source": [
466 | "print('Optimal number of features: {}'.format(rfecv.n_features_))"
467 | ]
468 | },
469 | {
470 | "cell_type": "code",
471 | "execution_count": 10,
472 | "metadata": {},
473 | "outputs": [
474 | {
475 | "data": {
476 | "image/png": "\n",
477 | "text/plain": [
478 | ""
479 | ]
480 | },
481 | "metadata": {
482 | "needs_background": "light"
483 | },
484 | "output_type": "display_data"
485 | }
486 | ],
487 | "source": [
488 | "plt.figure(figsize=(16, 9))\n",
489 | "plt.title('Recursive Feature Elimination with Cross-Validation', fontsize=18, fontweight='bold', pad=20)\n",
490 | "plt.xlabel('Number of features selected', fontsize=14, labelpad=20)\n",
491 | "plt.ylabel('% Correct Classification', fontsize=14, labelpad=20)\n",
492 | "plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_, color='#303F9F', linewidth=3)\n",
493 | "plt.show()"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": 11,
499 | "metadata": {},
500 | "outputs": [
501 | {
502 | "name": "stdout",
503 | "output_type": "stream",
504 | "text": [
505 | "[3 4 6 7 8 9]\n"
506 | ]
507 | }
508 | ],
509 | "source": [
510 | "print(np.where(rfecv.support_ == False)[0])\n",
511 | "\n",
512 | "X.drop(X.columns[np.where(rfecv.support_ == False)[0]], axis=1, inplace=True)"
513 | ]
514 | },
515 | {
516 | "cell_type": "code",
517 | "execution_count": 12,
518 | "metadata": {},
519 | "outputs": [
520 | {
521 | "data": {
522 | "text/plain": [
523 | "array([0.09550623, 0.27979401, 0.28835754, 0.33634222])"
524 | ]
525 | },
526 | "execution_count": 12,
527 | "metadata": {},
528 | "output_type": "execute_result"
529 | }
530 | ],
531 | "source": [
532 | "rfecv.estimator_.feature_importances_"
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": 13,
538 | "metadata": {},
539 | "outputs": [
540 | {
541 | "data": {
542 | "image/png": "\n",
543 | "text/plain": [
544 | ""
545 | ]
546 | },
547 | "metadata": {
548 | "needs_background": "light"
549 | },
550 | "output_type": "display_data"
551 | }
552 | ],
553 | "source": [
554 | "dset = pd.DataFrame()\n",
555 | "dset['attr'] = X.columns\n",
556 | "dset['importance'] = rfecv.estimator_.feature_importances_\n",
557 | "dset = dset.sort_values(by='importance', ascending=False)\n",
558 | "\n",
559 | "plt.figure(figsize=(16, 10))\n",
560 | "plt.barh(y=dset['attr'], width=dset['importance'], color='#1976D2')\n",
561 | "plt.title('RFECV - Feature importances', fontsize=20, fontweight='bold', pad=20)\n",
562 | "plt.xlabel('Importance', fontsize=14, labelpad=20)\n",
563 | "plt.show()"
564 | ]
565 | }
566 | ],
567 | "metadata": {
568 | "kernelspec": {
569 | "display_name": "Python 3",
570 | "language": "python",
571 | "name": "python3"
572 | },
573 | "language_info": {
574 | "codemirror_mode": {
575 | "name": "ipython",
576 | "version": 3
577 | },
578 | "file_extension": ".py",
579 | "mimetype": "text/x-python",
580 | "name": "python",
581 | "nbconvert_exporter": "python",
582 | "pygments_lexer": "ipython3",
583 | "version": "3.7.6"
584 | }
585 | },
586 | "nbformat": 4,
587 | "nbformat_minor": 4
588 | }
589 |
--------------------------------------------------------------------------------
/0002_MLDeploy/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/betterdatascience/YouTube/cf24c5cf3192eb7b17c25d94c710d3f941818537/0002_MLDeploy/.DS_Store
--------------------------------------------------------------------------------
/0002_MLDeploy/.ipynb_checkpoints/Test-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 4
6 | }
7 |
--------------------------------------------------------------------------------
/0002_MLDeploy/Test.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import requests"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 5,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "res = requests.post(\n",
19 | " url='http://localhost:5000/predict',\n",
20 | " json={\n",
21 | " 'sepal_length': 5.0,\n",
22 | " 'sepal_width': 3.2,\n",
23 | " 'petal_length': 1.5,\n",
24 | " 'petal_width': 0.3\n",
25 | " }\n",
26 | ")"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 6,
32 | "metadata": {},
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/plain": [
37 | ""
38 | ]
39 | },
40 | "execution_count": 6,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "res"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 8,
52 | "metadata": {},
53 | "outputs": [
54 | {
55 | "data": {
56 | "text/plain": [
57 | "{'Class': 'setosa',\n",
58 | " 'Input': {'PetalLength': 1.5,\n",
59 | " 'PetalWidth': 0.3,\n",
60 | " 'SepalLength': 5.0,\n",
61 | " 'SepalWidth': 3.2}}"
62 | ]
63 | },
64 | "execution_count": 8,
65 | "metadata": {},
66 | "output_type": "execute_result"
67 | }
68 | ],
69 | "source": [
70 | "res.json()"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": []
79 | }
80 | ],
81 | "metadata": {
82 | "kernelspec": {
83 | "display_name": "Python 3",
84 | "language": "python",
85 | "name": "python3"
86 | },
87 | "language_info": {
88 | "codemirror_mode": {
89 | "name": "ipython",
90 | "version": 3
91 | },
92 | "file_extension": ".py",
93 | "mimetype": "text/x-python",
94 | "name": "python",
95 | "nbconvert_exporter": "python",
96 | "pygments_lexer": "ipython3",
97 | "version": "3.7.6"
98 | }
99 | },
100 | "nbformat": 4,
101 | "nbformat_minor": 4
102 | }
103 |
--------------------------------------------------------------------------------
/0002_MLDeploy/app.py:
--------------------------------------------------------------------------------
1 | import os
2 | import joblib
3 | from flask import Flask, jsonify, request
4 | from flask_restful import Api, Resource
5 | from modeler.Modeler import Modeler
6 |
7 | app = Flask(__name__)
8 | api = Api(app)
9 |
10 |
11 | class Predict(Resource):
12 | @staticmethod
13 | def post():
14 | data = request.get_json()
15 | sepal_length = data['sepal_length']
16 | sepal_width = data['sepal_width']
17 | petal_length = data['petal_length']
18 | petal_width = data['petal_width']
19 |
20 | m = Modeler()
21 | if not os.path.isfile('models/iris.model'):
22 | m.fit()
23 | prediction = m.predict([sepal_length, sepal_width, petal_length, petal_width])
24 | return jsonify({
25 | 'Input': {
26 | 'SepalLength': sepal_length,
27 | 'SepalWidth': sepal_width,
28 | 'PetalLength': petal_length,
29 | 'PetalWidth': petal_width
30 | },
31 | 'Class': prediction
32 | })
33 |
34 | api.add_resource(Predict, '/predict')
35 |
36 | if __name__ == '__main__':
37 | app.run(debug=True)
--------------------------------------------------------------------------------
/0002_MLDeploy/modeler/.ipynb_checkpoints/Modeler-checkpoint.py:
--------------------------------------------------------------------------------
1 | import os
2 | import joblib
3 | import pandas as pd
4 | from sklearn.tree import DecisionTreeClassifier
5 |
6 |
7 | class Modeler:
8 | def __init__(self):
9 | self.df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')
10 | try: self.model = joblib.load('models/iris.model')
11 | except: self.model = None
12 |
13 | def fit(self):
14 | X = self.df.drop('species', axis=1)
15 | y = self.df['species']
16 | self.model = DecisionTreeClassifier().fit(X, y)
17 | joblib.dump(self.model, 'models/iris.model')
18 |
19 | def predict(self, measurement):
20 | if not os.path.exists('models/iris.model'):
21 | raise Exception('Model not trained yet. Call .fit() before making predictions')
22 | if len(measurement) != 4:
23 | raise Exception(f'Expected sepal_length, sepal_width, petal_length, petal_width, but got {measurement}')
24 | prediction = self.model.predict([measurement])
25 | return prediction[0]
--------------------------------------------------------------------------------
/0002_MLDeploy/modeler/Modeler.py:
--------------------------------------------------------------------------------
1 | import os
2 | import joblib
3 | import pandas as pd
4 | from sklearn.tree import DecisionTreeClassifier
5 |
6 |
7 | class Modeler:
8 | def __init__(self):
9 | self.df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')
10 | try: self.model = joblib.load('models/iris.model')
11 | except: self.model = None
12 |
13 | def fit(self):
14 | X = self.df.drop('species', axis=1)
15 | y = self.df['species']
16 | self.model = DecisionTreeClassifier().fit(X, y)
17 | joblib.dump(self.model, 'models/iris.model')
18 |
19 | def predict(self, measurement):
20 | if not os.path.exists('models/iris.model'):
21 | raise Exception('Model not trained yet. Call .fit() before making predictions')
22 | if len(measurement) != 4:
23 | raise Exception(f'Expected sepal_length, sepal_width, petal_length, petal_width, but got {measurement}')
24 | prediction = self.model.predict([measurement])
25 | return prediction[0]
--------------------------------------------------------------------------------
/0002_MLDeploy/modeler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/betterdatascience/YouTube/cf24c5cf3192eb7b17c25d94c710d3f941818537/0002_MLDeploy/modeler/__init__.py
--------------------------------------------------------------------------------
/0002_MLDeploy/modeler/__pycache__/Modeler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/betterdatascience/YouTube/cf24c5cf3192eb7b17c25d94c710d3f941818537/0002_MLDeploy/modeler/__pycache__/Modeler.cpython-37.pyc
--------------------------------------------------------------------------------
/0002_MLDeploy/modeler/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/betterdatascience/YouTube/cf24c5cf3192eb7b17c25d94c710d3f941818537/0002_MLDeploy/modeler/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/0002_MLDeploy/models/iris.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/betterdatascience/YouTube/cf24c5cf3192eb7b17c25d94c710d3f941818537/0002_MLDeploy/models/iris.model
--------------------------------------------------------------------------------
/0003_Top3PandasFunctions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "

\n",
9 | "
\n",
10 | "\n",
11 | "# Top 3 Pandas Functions"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import pandas as pd"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 2,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/html": [
31 | "\n",
32 | "\n",
45 | "
\n",
46 | " \n",
47 | " \n",
48 | " | \n",
49 | " PassengerId | \n",
50 | " Survived | \n",
51 | " Pclass | \n",
52 | " Name | \n",
53 | " Sex | \n",
54 | " Age | \n",
55 | " SibSp | \n",
56 | " Parch | \n",
57 | " Ticket | \n",
58 | " Fare | \n",
59 | " Cabin | \n",
60 | " Embarked | \n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " \n",
65 | " 0 | \n",
66 | " 1 | \n",
67 | " 0 | \n",
68 | " 3 | \n",
69 | " Braund, Mr. Owen Harris | \n",
70 | " male | \n",
71 | " 22.0 | \n",
72 | " 1 | \n",
73 | " 0 | \n",
74 | " A/5 21171 | \n",
75 | " 7.2500 | \n",
76 | " NaN | \n",
77 | " S | \n",
78 | "
\n",
79 | " \n",
80 | " 1 | \n",
81 | " 2 | \n",
82 | " 1 | \n",
83 | " 1 | \n",
84 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
85 | " female | \n",
86 | " 38.0 | \n",
87 | " 1 | \n",
88 | " 0 | \n",
89 | " PC 17599 | \n",
90 | " 71.2833 | \n",
91 | " C85 | \n",
92 | " C | \n",
93 | "
\n",
94 | " \n",
95 | " 2 | \n",
96 | " 3 | \n",
97 | " 1 | \n",
98 | " 3 | \n",
99 | " Heikkinen, Miss. Laina | \n",
100 | " female | \n",
101 | " 26.0 | \n",
102 | " 0 | \n",
103 | " 0 | \n",
104 | " STON/O2. 3101282 | \n",
105 | " 7.9250 | \n",
106 | " NaN | \n",
107 | " S | \n",
108 | "
\n",
109 | " \n",
110 | " 3 | \n",
111 | " 4 | \n",
112 | " 1 | \n",
113 | " 1 | \n",
114 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
115 | " female | \n",
116 | " 35.0 | \n",
117 | " 1 | \n",
118 | " 0 | \n",
119 | " 113803 | \n",
120 | " 53.1000 | \n",
121 | " C123 | \n",
122 | " S | \n",
123 | "
\n",
124 | " \n",
125 | " 4 | \n",
126 | " 5 | \n",
127 | " 0 | \n",
128 | " 3 | \n",
129 | " Allen, Mr. William Henry | \n",
130 | " male | \n",
131 | " 35.0 | \n",
132 | " 0 | \n",
133 | " 0 | \n",
134 | " 373450 | \n",
135 | " 8.0500 | \n",
136 | " NaN | \n",
137 | " S | \n",
138 | "
\n",
139 | " \n",
140 | "
\n",
141 | "
"
142 | ],
143 | "text/plain": [
144 | " PassengerId Survived Pclass \\\n",
145 | "0 1 0 3 \n",
146 | "1 2 1 1 \n",
147 | "2 3 1 3 \n",
148 | "3 4 1 1 \n",
149 | "4 5 0 3 \n",
150 | "\n",
151 | " Name Sex Age SibSp \\\n",
152 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
153 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
154 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
155 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
156 | "4 Allen, Mr. William Henry male 35.0 0 \n",
157 | "\n",
158 | " Parch Ticket Fare Cabin Embarked \n",
159 | "0 0 A/5 21171 7.2500 NaN S \n",
160 | "1 0 PC 17599 71.2833 C85 C \n",
161 | "2 0 STON/O2. 3101282 7.9250 NaN S \n",
162 | "3 0 113803 53.1000 C123 S \n",
163 | "4 0 373450 8.0500 NaN S "
164 | ]
165 | },
166 | "execution_count": 2,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')\n",
173 | "df.head()"
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {},
179 | "source": [
180 | "# 1. apply()"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": 3,
186 | "metadata": {},
187 | "outputs": [],
188 | "source": [
189 | "def extract_title(string):\n",
190 | " string = string.split(',')[-1]\n",
191 | " string = string.strip()\n",
192 | " string = string.split('.')[0]\n",
193 | " return string"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 4,
199 | "metadata": {},
200 | "outputs": [
201 | {
202 | "data": {
203 | "text/plain": [
204 | "0 Mr\n",
205 | "1 Mrs\n",
206 | "2 Miss\n",
207 | "3 Mrs\n",
208 | "4 Mr\n",
209 | "Name: Name, dtype: object"
210 | ]
211 | },
212 | "execution_count": 4,
213 | "metadata": {},
214 | "output_type": "execute_result"
215 | }
216 | ],
217 | "source": [
218 | "titles = df['Name'].apply(extract_title)\n",
219 | "titles[:5]"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 5,
225 | "metadata": {},
226 | "outputs": [
227 | {
228 | "data": {
229 | "text/plain": [
230 | "0 Mr\n",
231 | "1 Mrs\n",
232 | "2 Miss\n",
233 | "3 Mrs\n",
234 | "4 Mr\n",
235 | "Name: Name, dtype: object"
236 | ]
237 | },
238 | "execution_count": 5,
239 | "metadata": {},
240 | "output_type": "execute_result"
241 | }
242 | ],
243 | "source": [
244 | "titles = df['Name'].apply(lambda x: x.split(',')[-1].strip().split('.')[0])\n",
245 | "titles[:5]"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {},
251 | "source": [
252 | "# 2. value_counts()"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 6,
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "data": {
262 | "text/plain": [
263 | "Mr 517\n",
264 | "Miss 182\n",
265 | "Mrs 125\n",
266 | "Master 40\n",
267 | "Dr 7\n",
268 | "Rev 6\n",
269 | "Mlle 2\n",
270 | "Col 2\n",
271 | "Major 2\n",
272 | "Mme 1\n",
273 | "Jonkheer 1\n",
274 | "Sir 1\n",
275 | "Lady 1\n",
276 | "Don 1\n",
277 | "the Countess 1\n",
278 | "Capt 1\n",
279 | "Ms 1\n",
280 | "Name: Name, dtype: int64"
281 | ]
282 | },
283 | "execution_count": 6,
284 | "metadata": {},
285 | "output_type": "execute_result"
286 | }
287 | ],
288 | "source": [
289 | "titles.value_counts()"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 8,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "data": {
299 | "text/plain": [
300 | "Mr 58.024691\n",
301 | "Miss 20.426487\n",
302 | "Mrs 14.029181\n",
303 | "Master 4.489338\n",
304 | "Dr 0.785634\n",
305 | "Rev 0.673401\n",
306 | "Mlle 0.224467\n",
307 | "Col 0.224467\n",
308 | "Major 0.224467\n",
309 | "Mme 0.112233\n",
310 | "Jonkheer 0.112233\n",
311 | "Sir 0.112233\n",
312 | "Lady 0.112233\n",
313 | "Don 0.112233\n",
314 | "the Countess 0.112233\n",
315 | "Capt 0.112233\n",
316 | "Ms 0.112233\n",
317 | "Name: Name, dtype: float64"
318 | ]
319 | },
320 | "execution_count": 8,
321 | "metadata": {},
322 | "output_type": "execute_result"
323 | }
324 | ],
325 | "source": [
326 | "titles.value_counts(normalize=True) * 100"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "metadata": {},
332 | "source": [
333 | "# 3. get_dummies()"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 9,
339 | "metadata": {},
340 | "outputs": [
341 | {
342 | "data": {
343 | "text/html": [
344 | "\n",
345 | "\n",
358 | "
\n",
359 | " \n",
360 | " \n",
361 | " | \n",
362 | " C | \n",
363 | " Q | \n",
364 | " S | \n",
365 | "
\n",
366 | " \n",
367 | " \n",
368 | " \n",
369 | " 0 | \n",
370 | " 0 | \n",
371 | " 0 | \n",
372 | " 1 | \n",
373 | "
\n",
374 | " \n",
375 | " 1 | \n",
376 | " 1 | \n",
377 | " 0 | \n",
378 | " 0 | \n",
379 | "
\n",
380 | " \n",
381 | " 2 | \n",
382 | " 0 | \n",
383 | " 0 | \n",
384 | " 1 | \n",
385 | "
\n",
386 | " \n",
387 | " 3 | \n",
388 | " 0 | \n",
389 | " 0 | \n",
390 | " 1 | \n",
391 | "
\n",
392 | " \n",
393 | " 4 | \n",
394 | " 0 | \n",
395 | " 0 | \n",
396 | " 1 | \n",
397 | "
\n",
398 | " \n",
399 | "
\n",
400 | "
"
401 | ],
402 | "text/plain": [
403 | " C Q S\n",
404 | "0 0 0 1\n",
405 | "1 1 0 0\n",
406 | "2 0 0 1\n",
407 | "3 0 0 1\n",
408 | "4 0 0 1"
409 | ]
410 | },
411 | "execution_count": 9,
412 | "metadata": {},
413 | "output_type": "execute_result"
414 | }
415 | ],
416 | "source": [
417 | "emb = pd.get_dummies(df['Embarked'])\n",
418 | "emb.head()"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": 10,
424 | "metadata": {},
425 | "outputs": [
426 | {
427 | "data": {
428 | "text/html": [
429 | "\n",
430 | "\n",
443 | "
\n",
444 | " \n",
445 | " \n",
446 | " | \n",
447 | " Embarked_C | \n",
448 | " Embarked_Q | \n",
449 | " Embarked_S | \n",
450 | "
\n",
451 | " \n",
452 | " \n",
453 | " \n",
454 | " 0 | \n",
455 | " 0 | \n",
456 | " 0 | \n",
457 | " 1 | \n",
458 | "
\n",
459 | " \n",
460 | " 1 | \n",
461 | " 1 | \n",
462 | " 0 | \n",
463 | " 0 | \n",
464 | "
\n",
465 | " \n",
466 | " 2 | \n",
467 | " 0 | \n",
468 | " 0 | \n",
469 | " 1 | \n",
470 | "
\n",
471 | " \n",
472 | " 3 | \n",
473 | " 0 | \n",
474 | " 0 | \n",
475 | " 1 | \n",
476 | "
\n",
477 | " \n",
478 | " 4 | \n",
479 | " 0 | \n",
480 | " 0 | \n",
481 | " 1 | \n",
482 | "
\n",
483 | " \n",
484 | "
\n",
485 | "
"
486 | ],
487 | "text/plain": [
488 | " Embarked_C Embarked_Q Embarked_S\n",
489 | "0 0 0 1\n",
490 | "1 1 0 0\n",
491 | "2 0 0 1\n",
492 | "3 0 0 1\n",
493 | "4 0 0 1"
494 | ]
495 | },
496 | "execution_count": 10,
497 | "metadata": {},
498 | "output_type": "execute_result"
499 | }
500 | ],
501 | "source": [
502 | "emb = pd.get_dummies(df['Embarked'], prefix='Embarked')\n",
503 | "emb.head()"
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "execution_count": 11,
509 | "metadata": {},
510 | "outputs": [
511 | {
512 | "data": {
513 | "text/html": [
514 | "\n",
515 | "\n",
528 | "
\n",
529 | " \n",
530 | " \n",
531 | " | \n",
532 | " Embarked_Q | \n",
533 | " Embarked_S | \n",
534 | "
\n",
535 | " \n",
536 | " \n",
537 | " \n",
538 | " 0 | \n",
539 | " 0 | \n",
540 | " 1 | \n",
541 | "
\n",
542 | " \n",
543 | " 1 | \n",
544 | " 0 | \n",
545 | " 0 | \n",
546 | "
\n",
547 | " \n",
548 | " 2 | \n",
549 | " 0 | \n",
550 | " 1 | \n",
551 | "
\n",
552 | " \n",
553 | " 3 | \n",
554 | " 0 | \n",
555 | " 1 | \n",
556 | "
\n",
557 | " \n",
558 | " 4 | \n",
559 | " 0 | \n",
560 | " 1 | \n",
561 | "
\n",
562 | " \n",
563 | "
\n",
564 | "
"
565 | ],
566 | "text/plain": [
567 | " Embarked_Q Embarked_S\n",
568 | "0 0 1\n",
569 | "1 0 0\n",
570 | "2 0 1\n",
571 | "3 0 1\n",
572 | "4 0 1"
573 | ]
574 | },
575 | "execution_count": 11,
576 | "metadata": {},
577 | "output_type": "execute_result"
578 | }
579 | ],
580 | "source": [
581 | "emb = pd.get_dummies(df['Embarked'], prefix='Embarked', drop_first=True)\n",
582 | "emb.head()"
583 | ]
584 | }
585 | ],
586 | "metadata": {
587 | "kernelspec": {
588 | "display_name": "Python 3",
589 | "language": "python",
590 | "name": "python3"
591 | },
592 | "language_info": {
593 | "codemirror_mode": {
594 | "name": "ipython",
595 | "version": 3
596 | },
597 | "file_extension": ".py",
598 | "mimetype": "text/x-python",
599 | "name": "python",
600 | "nbconvert_exporter": "python",
601 | "pygments_lexer": "ipython3",
602 | "version": "3.7.6"
603 | }
604 | },
605 | "nbformat": 4,
606 | "nbformat_minor": 4
607 | }
608 |
--------------------------------------------------------------------------------
/0004_TPOT.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "

\n",
9 | "
\n",
10 | "\n",
11 | "# TPOT"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "name": "stderr",
21 | "output_type": "stream",
22 | "text": [
23 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
24 | " return f(*args, **kwds)\n"
25 | ]
26 | }
27 | ],
28 | "source": [
29 | "import pandas as pd\n",
30 | "from sklearn.preprocessing import StandardScaler\n",
31 | "from sklearn.model_selection import train_test_split"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 2,
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/html": [
42 | "\n",
43 | "\n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " | \n",
60 | " PassengerId | \n",
61 | " Survived | \n",
62 | " Pclass | \n",
63 | " Name | \n",
64 | " Sex | \n",
65 | " Age | \n",
66 | " SibSp | \n",
67 | " Parch | \n",
68 | " Ticket | \n",
69 | " Fare | \n",
70 | " Cabin | \n",
71 | " Embarked | \n",
72 | "
\n",
73 | " \n",
74 | " \n",
75 | " \n",
76 | " 0 | \n",
77 | " 1 | \n",
78 | " 0 | \n",
79 | " 3 | \n",
80 | " Braund, Mr. Owen Harris | \n",
81 | " male | \n",
82 | " 22.0 | \n",
83 | " 1 | \n",
84 | " 0 | \n",
85 | " A/5 21171 | \n",
86 | " 7.2500 | \n",
87 | " NaN | \n",
88 | " S | \n",
89 | "
\n",
90 | " \n",
91 | " 1 | \n",
92 | " 2 | \n",
93 | " 1 | \n",
94 | " 1 | \n",
95 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
96 | " female | \n",
97 | " 38.0 | \n",
98 | " 1 | \n",
99 | " 0 | \n",
100 | " PC 17599 | \n",
101 | " 71.2833 | \n",
102 | " C85 | \n",
103 | " C | \n",
104 | "
\n",
105 | " \n",
106 | " 2 | \n",
107 | " 3 | \n",
108 | " 1 | \n",
109 | " 3 | \n",
110 | " Heikkinen, Miss. Laina | \n",
111 | " female | \n",
112 | " 26.0 | \n",
113 | " 0 | \n",
114 | " 0 | \n",
115 | " STON/O2. 3101282 | \n",
116 | " 7.9250 | \n",
117 | " NaN | \n",
118 | " S | \n",
119 | "
\n",
120 | " \n",
121 | " 3 | \n",
122 | " 4 | \n",
123 | " 1 | \n",
124 | " 1 | \n",
125 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
126 | " female | \n",
127 | " 35.0 | \n",
128 | " 1 | \n",
129 | " 0 | \n",
130 | " 113803 | \n",
131 | " 53.1000 | \n",
132 | " C123 | \n",
133 | " S | \n",
134 | "
\n",
135 | " \n",
136 | " 4 | \n",
137 | " 5 | \n",
138 | " 0 | \n",
139 | " 3 | \n",
140 | " Allen, Mr. William Henry | \n",
141 | " male | \n",
142 | " 35.0 | \n",
143 | " 0 | \n",
144 | " 0 | \n",
145 | " 373450 | \n",
146 | " 8.0500 | \n",
147 | " NaN | \n",
148 | " S | \n",
149 | "
\n",
150 | " \n",
151 | "
\n",
152 | "
"
153 | ],
154 | "text/plain": [
155 | " PassengerId Survived Pclass \\\n",
156 | "0 1 0 3 \n",
157 | "1 2 1 1 \n",
158 | "2 3 1 3 \n",
159 | "3 4 1 1 \n",
160 | "4 5 0 3 \n",
161 | "\n",
162 | " Name Sex Age SibSp \\\n",
163 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
164 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
165 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
166 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
167 | "4 Allen, Mr. William Henry male 35.0 0 \n",
168 | "\n",
169 | " Parch Ticket Fare Cabin Embarked \n",
170 | "0 0 A/5 21171 7.2500 NaN S \n",
171 | "1 0 PC 17599 71.2833 C85 C \n",
172 | "2 0 STON/O2. 3101282 7.9250 NaN S \n",
173 | "3 0 113803 53.1000 C123 S \n",
174 | "4 0 373450 8.0500 NaN S "
175 | ]
176 | },
177 | "execution_count": 2,
178 | "metadata": {},
179 | "output_type": "execute_result"
180 | }
181 | ],
182 | "source": [
183 | "data = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')\n",
184 | "data.head()"
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {},
190 | "source": [
191 | "## Data preparation"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 3,
197 | "metadata": {},
198 | "outputs": [],
199 | "source": [
200 | "data.drop(['Ticket', 'PassengerId'], axis=1, inplace=True)\n",
201 | "\n",
202 | "gender_mapper = {'male': 0, 'female': 1}\n",
203 | "data['Sex'].replace(gender_mapper, inplace=True)\n",
204 | "\n",
205 | "data['Title'] = data['Name'].apply(lambda x: x.split(',')[1].strip().split(' ')[0])\n",
206 | "data['Title'] = [0 if x in ['Mr.', 'Miss.', 'Mrs.'] else 1 for x in data['Title']]\n",
207 | "data = data.rename(columns={'Title': 'Title_Unusual'})\n",
208 | "data.drop('Name', axis=1, inplace=True)\n",
209 | "\n",
210 | "data['Cabin_Known'] = [0 if str(x) == 'nan' else 1 for x in data['Cabin']]\n",
211 | "data.drop('Cabin', axis=1, inplace=True)\n",
212 | "\n",
213 | "emb_dummies = pd.get_dummies(data['Embarked'], drop_first=True, prefix='Embarked')\n",
214 | "data = pd.concat([data, emb_dummies], axis=1)\n",
215 | "data.drop('Embarked', axis=1, inplace=True)\n",
216 | "\n",
217 | "data['Age'] = data['Age'].fillna(int(data['Age'].mean()))"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 4,
223 | "metadata": {},
224 | "outputs": [
225 | {
226 | "data": {
227 | "text/html": [
228 | "\n",
229 | "\n",
242 | "
\n",
243 | " \n",
244 | " \n",
245 | " | \n",
246 | " Survived | \n",
247 | " Pclass | \n",
248 | " Sex | \n",
249 | " Age | \n",
250 | " SibSp | \n",
251 | " Parch | \n",
252 | " Fare | \n",
253 | " Title_Unusual | \n",
254 | " Cabin_Known | \n",
255 | " Embarked_Q | \n",
256 | " Embarked_S | \n",
257 | "
\n",
258 | " \n",
259 | " \n",
260 | " \n",
261 | " 0 | \n",
262 | " 0 | \n",
263 | " 3 | \n",
264 | " 0 | \n",
265 | " 22.0 | \n",
266 | " 1 | \n",
267 | " 0 | \n",
268 | " 7.2500 | \n",
269 | " 0 | \n",
270 | " 0 | \n",
271 | " 0 | \n",
272 | " 1 | \n",
273 | "
\n",
274 | " \n",
275 | " 1 | \n",
276 | " 1 | \n",
277 | " 1 | \n",
278 | " 1 | \n",
279 | " 38.0 | \n",
280 | " 1 | \n",
281 | " 0 | \n",
282 | " 71.2833 | \n",
283 | " 0 | \n",
284 | " 1 | \n",
285 | " 0 | \n",
286 | " 0 | \n",
287 | "
\n",
288 | " \n",
289 | " 2 | \n",
290 | " 1 | \n",
291 | " 3 | \n",
292 | " 1 | \n",
293 | " 26.0 | \n",
294 | " 0 | \n",
295 | " 0 | \n",
296 | " 7.9250 | \n",
297 | " 0 | \n",
298 | " 0 | \n",
299 | " 0 | \n",
300 | " 1 | \n",
301 | "
\n",
302 | " \n",
303 | " 3 | \n",
304 | " 1 | \n",
305 | " 1 | \n",
306 | " 1 | \n",
307 | " 35.0 | \n",
308 | " 1 | \n",
309 | " 0 | \n",
310 | " 53.1000 | \n",
311 | " 0 | \n",
312 | " 1 | \n",
313 | " 0 | \n",
314 | " 1 | \n",
315 | "
\n",
316 | " \n",
317 | " 4 | \n",
318 | " 0 | \n",
319 | " 3 | \n",
320 | " 0 | \n",
321 | " 35.0 | \n",
322 | " 0 | \n",
323 | " 0 | \n",
324 | " 8.0500 | \n",
325 | " 0 | \n",
326 | " 0 | \n",
327 | " 0 | \n",
328 | " 1 | \n",
329 | "
\n",
330 | " \n",
331 | "
\n",
332 | "
"
333 | ],
334 | "text/plain": [
335 | " Survived Pclass Sex Age SibSp Parch Fare Title_Unusual \\\n",
336 | "0 0 3 0 22.0 1 0 7.2500 0 \n",
337 | "1 1 1 1 38.0 1 0 71.2833 0 \n",
338 | "2 1 3 1 26.0 0 0 7.9250 0 \n",
339 | "3 1 1 1 35.0 1 0 53.1000 0 \n",
340 | "4 0 3 0 35.0 0 0 8.0500 0 \n",
341 | "\n",
342 | " Cabin_Known Embarked_Q Embarked_S \n",
343 | "0 0 0 1 \n",
344 | "1 1 0 0 \n",
345 | "2 0 0 1 \n",
346 | "3 1 0 1 \n",
347 | "4 0 0 1 "
348 | ]
349 | },
350 | "execution_count": 4,
351 | "metadata": {},
352 | "output_type": "execute_result"
353 | }
354 | ],
355 | "source": [
356 | "data.head()"
357 | ]
358 | },
359 | {
360 | "cell_type": "markdown",
361 | "metadata": {},
362 | "source": [
363 | "## Train Test split and Scaling"
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": 5,
369 | "metadata": {},
370 | "outputs": [],
371 | "source": [
372 | "X = data.drop('Survived', axis=1)\n",
373 | "y = data['Survived']\n",
374 | "\n",
375 | "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": 6,
381 | "metadata": {},
382 | "outputs": [],
383 | "source": [
384 | "ss = StandardScaler()\n",
385 | "X_train_scaled = ss.fit_transform(X_train)\n",
386 | "X_test_scaled = ss.transform(X_test)"
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "metadata": {},
392 | "source": [
393 | "# TPOT"
394 | ]
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": 7,
399 | "metadata": {},
400 | "outputs": [
401 | {
402 | "name": "stderr",
403 | "output_type": "stream",
404 | "text": [
405 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
406 | " return f(*args, **kwds)\n",
407 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
408 | " return f(*args, **kwds)\n",
409 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
410 | " return f(*args, **kwds)\n",
411 | "/opt/anaconda3/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
412 | " return f(*args, **kwds)\n"
413 | ]
414 | },
415 | {
416 | "data": {
417 | "application/vnd.jupyter.widget-view+json": {
418 | "model_id": "486d08b62e1d41f09461fea3e3eb29ba",
419 | "version_major": 2,
420 | "version_minor": 0
421 | },
422 | "text/plain": [
423 | "HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', style=ProgressStyle(description_w…"
424 | ]
425 | },
426 | "metadata": {},
427 | "output_type": "display_data"
428 | },
429 | {
430 | "name": "stdout",
431 | "output_type": "stream",
432 | "text": [
433 | "\n",
434 | "Generation 1 - Current best internal CV score: 0.8216389244558258\n",
435 | "Generation 2 - Current best internal CV score: 0.8216389244558258\n",
436 | "Generation 3 - Current best internal CV score: 0.8244361272530286\n",
437 | "Generation 4 - Current best internal CV score: 0.8244361272530287\n",
438 | "10.02 minutes have elapsed. TPOT will close down.\n",
439 | "TPOT closed during evaluation in one generation.\n",
440 | "WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n",
441 | "\n",
442 | "\n",
443 | "TPOT closed prematurely. Will use the current best pipeline.\n",
444 | "\n",
445 | "Best pipeline: GradientBoostingClassifier(input_matrix, learning_rate=0.1, max_depth=10, max_features=0.55, min_samples_leaf=16, min_samples_split=3, n_estimators=100, subsample=0.6000000000000001)\n"
446 | ]
447 | },
448 | {
449 | "data": {
450 | "text/plain": [
451 | "TPOTClassifier(config_dict=None, crossover_rate=0.1, cv=5,\n",
452 | " disable_update_check=False, early_stop=None, generations=100,\n",
453 | " log_file=,\n",
454 | " max_eval_time_mins=5, max_time_mins=10, memory=None,\n",
455 | " mutation_rate=0.9, n_jobs=1, offspring_size=None,\n",
456 | " periodic_checkpoint_folder=None, population_size=100,\n",
457 | " random_state=None, scoring=None, subsample=1.0, template=None,\n",
458 | " use_dask=False, verbosity=2, warm_start=False)"
459 | ]
460 | },
461 | "execution_count": 7,
462 | "metadata": {},
463 | "output_type": "execute_result"
464 | }
465 | ],
466 | "source": [
467 | "from tpot import TPOTClassifier\n",
468 | "\n",
469 | "tpot = TPOTClassifier(verbosity=2, max_time_mins=10)\n",
470 | "tpot.fit(X_train_scaled, y_train)"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": null,
476 | "metadata": {},
477 | "outputs": [],
478 | "source": []
479 | },
480 | {
481 | "cell_type": "code",
482 | "execution_count": 8,
483 | "metadata": {},
484 | "outputs": [
485 | {
486 | "data": {
487 | "text/plain": [
488 | "Pipeline(memory=None,\n",
489 | " steps=[('gradientboostingclassifier',\n",
490 | " GradientBoostingClassifier(ccp_alpha=0.0,\n",
491 | " criterion='friedman_mse', init=None,\n",
492 | " learning_rate=0.1, loss='deviance',\n",
493 | " max_depth=10, max_features=0.55,\n",
494 | " max_leaf_nodes=None,\n",
495 | " min_impurity_decrease=0.0,\n",
496 | " min_impurity_split=None,\n",
497 | " min_samples_leaf=16,\n",
498 | " min_samples_split=3,\n",
499 | " min_weight_fraction_leaf=0.0,\n",
500 | " n_estimators=100,\n",
501 | " n_iter_no_change=None,\n",
502 | " presort='deprecated',\n",
503 | " random_state=None,\n",
504 | " subsample=0.6000000000000001,\n",
505 | " tol=0.0001, validation_fraction=0.1,\n",
506 | " verbose=0, warm_start=False))],\n",
507 | " verbose=False)"
508 | ]
509 | },
510 | "execution_count": 8,
511 | "metadata": {},
512 | "output_type": "execute_result"
513 | }
514 | ],
515 | "source": [
516 | "tpot.fitted_pipeline_"
517 | ]
518 | },
519 | {
520 | "cell_type": "code",
521 | "execution_count": 9,
522 | "metadata": {},
523 | "outputs": [
524 | {
525 | "data": {
526 | "text/plain": [
527 | "0.8491620111731844"
528 | ]
529 | },
530 | "execution_count": 9,
531 | "metadata": {},
532 | "output_type": "execute_result"
533 | }
534 | ],
535 | "source": [
536 | "tpot.score(X_test_scaled, y_test)"
537 | ]
538 | }
539 | ],
540 | "metadata": {
541 | "kernelspec": {
542 | "display_name": "Python 3",
543 | "language": "python",
544 | "name": "python3"
545 | },
546 | "language_info": {
547 | "codemirror_mode": {
548 | "name": "ipython",
549 | "version": 3
550 | },
551 | "file_extension": ".py",
552 | "mimetype": "text/x-python",
553 | "name": "python",
554 | "nbconvert_exporter": "python",
555 | "pygments_lexer": "ipython3",
556 | "version": "3.7.6"
557 | }
558 | },
559 | "nbformat": 4,
560 | "nbformat_minor": 4
561 | }
562 |
--------------------------------------------------------------------------------
/data/RealEstate.csv:
--------------------------------------------------------------------------------
1 | No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
2 | 1,2012.917,32,84.87882,10,24.98298,121.54024,37.9
3 | 2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
4 | 3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
5 | 4,2013.500,13.3,561.9845,5,24.98746,121.54391,54.8
6 | 5,2012.833,5,390.5684,5,24.97937,121.54245,43.1
7 | 6,2012.667,7.1,2175.03,3,24.96305,121.51254,32.1
8 | 7,2012.667,34.5,623.4731,7,24.97933,121.53642,40.3
9 | 8,2013.417,20.3,287.6025,6,24.98042,121.54228,46.7
10 | 9,2013.500,31.7,5512.038,1,24.95095,121.48458,18.8
11 | 10,2013.417,17.9,1783.18,3,24.96731,121.51486,22.1
12 | 11,2013.083,34.8,405.2134,1,24.97349,121.53372,41.4
13 | 12,2013.333,6.3,90.45606,9,24.97433,121.5431,58.1
14 | 13,2012.917,13,492.2313,5,24.96515,121.53737,39.3
15 | 14,2012.667,20.4,2469.645,4,24.96108,121.51046,23.8
16 | 15,2013.500,13.2,1164.838,4,24.99156,121.53406,34.3
17 | 16,2013.583,35.7,579.2083,2,24.9824,121.54619,50.5
18 | 17,2013.250,0,292.9978,6,24.97744,121.54458,70.1
19 | 18,2012.750,17.7,350.8515,1,24.97544,121.53119,37.4
20 | 19,2013.417,16.9,368.1363,8,24.9675,121.54451,42.3
21 | 20,2012.667,1.5,23.38284,7,24.96772,121.54102,47.7
22 | 21,2013.417,4.5,2275.877,3,24.96314,121.51151,29.3
23 | 22,2013.417,10.5,279.1726,7,24.97528,121.54541,51.6
24 | 23,2012.917,14.7,1360.139,1,24.95204,121.54842,24.6
25 | 24,2013.083,10.1,279.1726,7,24.97528,121.54541,47.9
26 | 25,2013.000,39.6,480.6977,4,24.97353,121.53885,38.8
27 | 26,2013.083,29.3,1487.868,2,24.97542,121.51726,27
28 | 27,2012.667,3.1,383.8624,5,24.98085,121.54391,56.2
29 | 28,2013.250,10.4,276.449,5,24.95593,121.53913,33.6
30 | 29,2013.500,19.2,557.478,4,24.97419,121.53797,47
31 | 30,2013.083,7.1,451.2438,5,24.97563,121.54694,57.1
32 | 31,2013.500,25.9,4519.69,0,24.94826,121.49587,22.1
33 | 32,2012.750,29.6,769.4034,7,24.98281,121.53408,25
34 | 33,2012.750,37.9,488.5727,1,24.97349,121.53451,34.2
35 | 34,2013.250,16.5,323.655,6,24.97841,121.54281,49.3
36 | 35,2012.750,15.4,205.367,7,24.98419,121.54243,55.1
37 | 36,2013.500,13.9,4079.418,0,25.01459,121.51816,27.3
38 | 37,2012.917,14.7,1935.009,2,24.96386,121.51458,22.9
39 | 38,2013.167,12,1360.139,1,24.95204,121.54842,25.3
40 | 39,2012.667,3.1,577.9615,6,24.97201,121.54722,47.7
41 | 40,2013.167,16.2,289.3248,5,24.98203,121.54348,46.2
42 | 41,2013.000,13.6,4082.015,0,24.94155,121.50381,15.9
43 | 42,2013.500,16.8,4066.587,0,24.94297,121.50342,18.2
44 | 43,2013.417,36.1,519.4617,5,24.96305,121.53758,34.7
45 | 44,2012.750,34.4,512.7871,6,24.98748,121.54301,34.1
46 | 45,2013.583,2.7,533.4762,4,24.97445,121.54765,53.9
47 | 46,2013.083,36.6,488.8193,8,24.97015,121.54494,38.3
48 | 47,2013.417,21.7,463.9623,9,24.9703,121.54458,42
49 | 48,2013.583,35.9,640.7391,3,24.97563,121.53715,61.5
50 | 49,2013.417,24.2,4605.749,0,24.94684,121.49578,13.4
51 | 50,2012.667,29.4,4510.359,1,24.94925,121.49542,13.2
52 | 51,2013.417,21.7,512.5487,4,24.974,121.53842,44.2
53 | 52,2013.083,31.3,1758.406,1,24.95402,121.55282,20.7
54 | 53,2013.583,32.1,1438.579,3,24.97419,121.5175,27
55 | 54,2013.083,13.3,492.2313,5,24.96515,121.53737,38.9
56 | 55,2013.083,16.1,289.3248,5,24.98203,121.54348,51.7
57 | 56,2012.833,31.7,1160.632,0,24.94968,121.53009,13.7
58 | 57,2013.417,33.6,371.2495,8,24.97254,121.54059,41.9
59 | 58,2012.917,3.5,56.47425,7,24.95744,121.53711,53.5
60 | 59,2013.500,30.3,4510.359,1,24.94925,121.49542,22.6
61 | 60,2013.083,13.3,336.0532,5,24.95776,121.53438,42.4
62 | 61,2013.417,11,1931.207,2,24.96365,121.51471,21.3
63 | 62,2013.500,5.3,259.6607,6,24.97585,121.54516,63.2
64 | 63,2012.917,17.2,2175.877,3,24.96303,121.51254,27.7
65 | 64,2013.583,2.6,533.4762,4,24.97445,121.54765,55
66 | 65,2013.333,17.5,995.7554,0,24.96305,121.54915,25.3
67 | 66,2013.417,40.1,123.7429,8,24.97635,121.54329,44.3
68 | 67,2013.000,1,193.5845,6,24.96571,121.54089,50.7
69 | 68,2013.500,8.5,104.8101,5,24.96674,121.54067,56.8
70 | 69,2013.417,30.4,464.223,6,24.97964,121.53805,36.2
71 | 70,2012.833,12.5,561.9845,5,24.98746,121.54391,42
72 | 71,2013.583,6.6,90.45606,9,24.97433,121.5431,59
73 | 72,2013.083,35.5,640.7391,3,24.97563,121.53715,40.8
74 | 73,2013.583,32.5,424.5442,8,24.97587,121.53913,36.3
75 | 74,2013.167,13.8,4082.015,0,24.94155,121.50381,20
76 | 75,2012.917,6.8,379.5575,10,24.98343,121.53762,54.4
77 | 76,2013.500,12.3,1360.139,1,24.95204,121.54842,29.5
78 | 77,2013.583,35.9,616.4004,3,24.97723,121.53767,36.8
79 | 78,2012.833,20.5,2185.128,3,24.96322,121.51237,25.6
80 | 79,2012.917,38.2,552.4371,2,24.97598,121.53381,29.8
81 | 80,2013.000,18,1414.837,1,24.95182,121.54887,26.5
82 | 81,2013.500,11.8,533.4762,4,24.97445,121.54765,40.3
83 | 82,2013.000,30.8,377.7956,6,24.96427,121.53964,36.8
84 | 83,2013.083,13.2,150.9347,7,24.96725,121.54252,48.1
85 | 84,2012.917,25.3,2707.392,3,24.96056,121.50831,17.7
86 | 85,2013.083,15.1,383.2805,7,24.96735,121.54464,43.7
87 | 86,2012.750,0,338.9679,9,24.96853,121.54413,50.8
88 | 87,2012.833,1.8,1455.798,1,24.9512,121.549,27
89 | 88,2013.583,16.9,4066.587,0,24.94297,121.50342,18.3
90 | 89,2012.917,8.9,1406.43,0,24.98573,121.52758,48
91 | 90,2013.500,23,3947.945,0,24.94783,121.50243,25.3
92 | 91,2012.833,0,274.0144,1,24.9748,121.53059,45.4
93 | 92,2013.250,9.1,1402.016,0,24.98569,121.5276,43.2
94 | 93,2012.917,20.6,2469.645,4,24.96108,121.51046,21.8
95 | 94,2012.917,31.9,1146.329,0,24.9492,121.53076,16.1
96 | 95,2012.917,40.9,167.5989,5,24.9663,121.54026,41
97 | 96,2012.917,8,104.8101,5,24.96674,121.54067,51.8
98 | 97,2013.417,6.4,90.45606,9,24.97433,121.5431,59.5
99 | 98,2013.083,28.4,617.4424,3,24.97746,121.53299,34.6
100 | 99,2013.417,16.4,289.3248,5,24.98203,121.54348,51
101 | 100,2013.417,6.4,90.45606,9,24.97433,121.5431,62.2
102 | 101,2013.500,17.5,964.7496,4,24.98872,121.53411,38.2
103 | 102,2012.833,12.7,170.1289,1,24.97371,121.52984,32.9
104 | 103,2013.083,1.1,193.5845,6,24.96571,121.54089,54.4
105 | 104,2012.750,0,208.3905,6,24.95618,121.53844,45.7
106 | 105,2012.667,32.7,392.4459,6,24.96398,121.5425,30.5
107 | 106,2012.833,0,292.9978,6,24.97744,121.54458,71
108 | 107,2013.083,17.2,189.5181,8,24.97707,121.54308,47.1
109 | 108,2013.333,12.2,1360.139,1,24.95204,121.54842,26.6
110 | 109,2013.417,31.4,592.5006,2,24.9726,121.53561,34.1
111 | 110,2013.583,4,2147.376,3,24.96299,121.51284,28.4
112 | 111,2013.083,8.1,104.8101,5,24.96674,121.54067,51.6
113 | 112,2013.583,33.3,196.6172,7,24.97701,121.54224,39.4
114 | 113,2013.417,9.9,2102.427,3,24.96044,121.51462,23.1
115 | 114,2013.333,14.8,393.2606,6,24.96172,121.53812,7.6
116 | 115,2012.667,30.6,143.8383,8,24.98155,121.54142,53.3
117 | 116,2013.083,20.6,737.9161,2,24.98092,121.54739,46.4
118 | 117,2013.000,30.9,6396.283,1,24.94375,121.47883,12.2
119 | 118,2013.000,13.6,4197.349,0,24.93885,121.50383,13
120 | 119,2013.500,25.3,1583.722,3,24.96622,121.51709,30.6
121 | 120,2013.500,16.6,289.3248,5,24.98203,121.54348,59.6
122 | 121,2013.167,13.3,492.2313,5,24.96515,121.53737,31.3
123 | 122,2013.500,13.6,492.2313,5,24.96515,121.53737,48
124 | 123,2013.250,31.5,414.9476,4,24.98199,121.54464,32.5
125 | 124,2013.417,0,185.4296,0,24.9711,121.5317,45.5
126 | 125,2012.917,9.9,279.1726,7,24.97528,121.54541,57.4
127 | 126,2013.167,1.1,193.5845,6,24.96571,121.54089,48.6
128 | 127,2013.083,38.6,804.6897,4,24.97838,121.53477,62.9
129 | 128,2013.250,3.8,383.8624,5,24.98085,121.54391,55
130 | 129,2013.083,41.3,124.9912,6,24.96674,121.54039,60.7
131 | 130,2013.417,38.5,216.8329,7,24.98086,121.54162,41
132 | 131,2013.250,29.6,535.527,8,24.98092,121.53653,37.5
133 | 132,2013.500,4,2147.376,3,24.96299,121.51284,30.7
134 | 133,2013.167,26.6,482.7581,5,24.97433,121.53863,37.5
135 | 134,2012.833,18,373.3937,8,24.9866,121.54082,39.5
136 | 135,2012.667,33.4,186.9686,6,24.96604,121.54211,42.2
137 | 136,2012.917,18.9,1009.235,0,24.96357,121.54951,20.8
138 | 137,2012.750,11.4,390.5684,5,24.97937,121.54245,46.8
139 | 138,2013.500,13.6,319.0708,6,24.96495,121.54277,47.4
140 | 139,2013.167,10,942.4664,0,24.97843,121.52406,43.5
141 | 140,2012.667,12.9,492.2313,5,24.96515,121.53737,42.5
142 | 141,2013.250,16.2,289.3248,5,24.98203,121.54348,51.4
143 | 142,2013.333,5.1,1559.827,3,24.97213,121.51627,28.9
144 | 143,2013.417,19.8,640.6071,5,24.97017,121.54647,37.5
145 | 144,2013.500,13.6,492.2313,5,24.96515,121.53737,40.1
146 | 145,2013.083,11.9,1360.139,1,24.95204,121.54842,28.4
147 | 146,2012.917,2.1,451.2438,5,24.97563,121.54694,45.5
148 | 147,2012.750,0,185.4296,0,24.9711,121.5317,52.2
149 | 148,2012.750,3.2,489.8821,8,24.97017,121.54494,43.2
150 | 149,2013.500,16.4,3780.59,0,24.93293,121.51203,45.1
151 | 150,2012.667,34.9,179.4538,8,24.97349,121.54245,39.7
152 | 151,2013.250,35.8,170.7311,7,24.96719,121.54269,48.5
153 | 152,2013.500,4.9,387.7721,9,24.98118,121.53788,44.7
154 | 153,2013.333,12,1360.139,1,24.95204,121.54842,28.9
155 | 154,2013.250,6.5,376.1709,6,24.95418,121.53713,40.9
156 | 155,2013.500,16.9,4066.587,0,24.94297,121.50342,20.7
157 | 156,2013.167,13.8,4082.015,0,24.94155,121.50381,15.6
158 | 157,2013.583,30.7,1264.73,0,24.94883,121.52954,18.3
159 | 158,2013.250,16.1,815.9314,4,24.97886,121.53464,35.6
160 | 159,2013.000,11.6,390.5684,5,24.97937,121.54245,39.4
161 | 160,2012.667,15.5,815.9314,4,24.97886,121.53464,37.4
162 | 161,2012.917,3.5,49.66105,8,24.95836,121.53756,57.8
163 | 162,2013.417,19.2,616.4004,3,24.97723,121.53767,39.6
164 | 163,2012.750,16,4066.587,0,24.94297,121.50342,11.6
165 | 164,2013.500,8.5,104.8101,5,24.96674,121.54067,55.5
166 | 165,2012.833,0,185.4296,0,24.9711,121.5317,55.2
167 | 166,2012.917,13.7,1236.564,1,24.97694,121.55391,30.6
168 | 167,2013.417,0,292.9978,6,24.97744,121.54458,73.6
169 | 168,2013.417,28.2,330.0854,8,24.97408,121.54011,43.4
170 | 169,2013.083,27.6,515.1122,5,24.96299,121.5432,37.4
171 | 170,2013.417,8.4,1962.628,1,24.95468,121.55481,23.5
172 | 171,2013.333,24,4527.687,0,24.94741,121.49628,14.4
173 | 172,2013.083,3.6,383.8624,5,24.98085,121.54391,58.8
174 | 173,2013.583,6.6,90.45606,9,24.97433,121.5431,58.1
175 | 174,2013.083,41.3,401.8807,4,24.98326,121.5446,35.1
176 | 175,2013.417,4.3,432.0385,7,24.9805,121.53778,45.2
177 | 176,2013.083,30.2,472.1745,3,24.97005,121.53758,36.5
178 | 177,2012.833,13.9,4573.779,0,24.94867,121.49507,19.2
179 | 178,2013.083,33,181.0766,9,24.97697,121.54262,42
180 | 179,2013.500,13.1,1144.436,4,24.99176,121.53456,36.7
181 | 180,2013.083,14,438.8513,1,24.97493,121.5273,42.6
182 | 181,2012.667,26.9,4449.27,0,24.94898,121.49621,15.5
183 | 182,2013.167,11.6,201.8939,8,24.98489,121.54121,55.9
184 | 183,2013.500,13.5,2147.376,3,24.96299,121.51284,23.6
185 | 184,2013.500,17,4082.015,0,24.94155,121.50381,18.8
186 | 185,2012.750,14.1,2615.465,0,24.95495,121.56174,21.8
187 | 186,2012.750,31.4,1447.286,3,24.97285,121.5173,21.5
188 | 187,2013.167,20.9,2185.128,3,24.96322,121.51237,25.7
189 | 188,2013.000,8.9,3078.176,0,24.95464,121.56627,22
190 | 189,2012.917,34.8,190.0392,8,24.97707,121.54312,44.3
191 | 190,2012.917,16.3,4066.587,0,24.94297,121.50342,20.5
192 | 191,2013.500,35.3,616.5735,8,24.97945,121.53642,42.3
193 | 192,2013.167,13.2,750.0704,2,24.97371,121.54951,37.8
194 | 193,2013.167,43.8,57.58945,7,24.9675,121.54069,42.7
195 | 194,2013.417,9.7,421.479,5,24.98246,121.54477,49.3
196 | 195,2013.500,15.2,3771.895,0,24.93363,121.51158,29.3
197 | 196,2013.333,15.2,461.1016,5,24.95425,121.5399,34.6
198 | 197,2013.000,22.8,707.9067,2,24.981,121.54713,36.6
199 | 198,2013.250,34.4,126.7286,8,24.96881,121.54089,48.2
200 | 199,2013.083,34,157.6052,7,24.96628,121.54196,39.1
201 | 200,2013.417,18.2,451.6419,8,24.96945,121.5449,31.6
202 | 201,2013.417,17.4,995.7554,0,24.96305,121.54915,25.5
203 | 202,2013.417,13.1,561.9845,5,24.98746,121.54391,45.9
204 | 203,2012.917,38.3,642.6985,3,24.97559,121.53713,31.5
205 | 204,2012.667,15.6,289.3248,5,24.98203,121.54348,46.1
206 | 205,2013.000,18,1414.837,1,24.95182,121.54887,26.6
207 | 206,2013.083,12.8,1449.722,3,24.97289,121.51728,21.4
208 | 207,2013.250,22.2,379.5575,10,24.98343,121.53762,44
209 | 208,2013.083,38.5,665.0636,3,24.97503,121.53692,34.2
210 | 209,2012.750,11.5,1360.139,1,24.95204,121.54842,26.2
211 | 210,2012.833,34.8,175.6294,8,24.97347,121.54271,40.9
212 | 211,2013.500,5.2,390.5684,5,24.97937,121.54245,52.2
213 | 212,2013.083,0,274.0144,1,24.9748,121.53059,43.5
214 | 213,2013.333,17.6,1805.665,2,24.98672,121.52091,31.1
215 | 214,2013.083,6.2,90.45606,9,24.97433,121.5431,58
216 | 215,2013.583,18.1,1783.18,3,24.96731,121.51486,20.9
217 | 216,2013.333,19.2,383.7129,8,24.972,121.54477,48.1
218 | 217,2013.250,37.8,590.9292,1,24.97153,121.53559,39.7
219 | 218,2012.917,28,372.6242,6,24.97838,121.54119,40.8
220 | 219,2013.417,13.6,492.2313,5,24.96515,121.53737,43.8
221 | 220,2012.750,29.3,529.7771,8,24.98102,121.53655,40.2
222 | 221,2013.333,37.2,186.5101,9,24.97703,121.54265,78.3
223 | 222,2013.333,9,1402.016,0,24.98569,121.5276,38.5
224 | 223,2013.583,30.6,431.1114,10,24.98123,121.53743,48.5
225 | 224,2013.250,9.1,1402.016,0,24.98569,121.5276,42.3
226 | 225,2013.333,34.5,324.9419,6,24.97814,121.5417,46
227 | 226,2013.250,1.1,193.5845,6,24.96571,121.54089,49
228 | 227,2013.000,16.5,4082.015,0,24.94155,121.50381,12.8
229 | 228,2012.917,32.4,265.0609,8,24.98059,121.53986,40.2
230 | 229,2013.417,11.9,3171.329,0,25.00115,121.51776,46.6
231 | 230,2013.583,31,1156.412,0,24.9489,121.53095,19
232 | 231,2013.500,4,2147.376,3,24.96299,121.51284,33.4
233 | 232,2012.833,16.2,4074.736,0,24.94235,121.50357,14.7
234 | 233,2012.917,27.1,4412.765,1,24.95032,121.49587,17.4
235 | 234,2013.333,39.7,333.3679,9,24.98016,121.53932,32.4
236 | 235,2013.250,8,2216.612,4,24.96007,121.51361,23.9
237 | 236,2012.750,12.9,250.631,7,24.96606,121.54297,39.3
238 | 237,2013.167,3.6,373.8389,10,24.98322,121.53765,61.9
239 | 238,2013.167,13,732.8528,0,24.97668,121.52518,39
240 | 239,2013.083,12.8,732.8528,0,24.97668,121.52518,40.6
241 | 240,2013.500,18.1,837.7233,0,24.96334,121.54767,29.7
242 | 241,2013.083,11,1712.632,2,24.96412,121.5167,28.8
243 | 242,2013.500,13.7,250.631,7,24.96606,121.54297,41.4
244 | 243,2012.833,2,2077.39,3,24.96357,121.51329,33.4
245 | 244,2013.417,32.8,204.1705,8,24.98236,121.53923,48.2
246 | 245,2013.083,4.8,1559.827,3,24.97213,121.51627,21.7
247 | 246,2013.417,7.5,639.6198,5,24.97258,121.54814,40.8
248 | 247,2013.417,16.4,389.8219,6,24.96412,121.54273,40.6
249 | 248,2013.333,21.7,1055.067,0,24.96211,121.54928,23.1
250 | 249,2013.000,19,1009.235,0,24.96357,121.54951,22.3
251 | 250,2012.833,18,6306.153,1,24.95743,121.47516,15
252 | 251,2013.167,39.2,424.7132,7,24.97429,121.53917,30
253 | 252,2012.917,31.7,1159.454,0,24.9496,121.53018,13.8
254 | 253,2012.833,5.9,90.45606,9,24.97433,121.5431,52.7
255 | 254,2012.667,30.4,1735.595,2,24.96464,121.51623,25.9
256 | 255,2012.667,1.1,329.9747,5,24.98254,121.54395,51.8
257 | 256,2013.417,31.5,5512.038,1,24.95095,121.48458,17.4
258 | 257,2012.667,14.6,339.2289,1,24.97519,121.53151,26.5
259 | 258,2013.250,17.3,444.1334,1,24.97501,121.5273,43.9
260 | 259,2013.417,0,292.9978,6,24.97744,121.54458,63.3
261 | 260,2013.083,17.7,837.7233,0,24.96334,121.54767,28.8
262 | 261,2013.250,17,1485.097,4,24.97073,121.517,30.7
263 | 262,2013.167,16.2,2288.011,3,24.95885,121.51359,24.4
264 | 263,2012.917,15.9,289.3248,5,24.98203,121.54348,53
265 | 264,2013.417,3.9,2147.376,3,24.96299,121.51284,31.7
266 | 265,2013.167,32.6,493.657,7,24.96968,121.54522,40.6
267 | 266,2012.833,15.7,815.9314,4,24.97886,121.53464,38.1
268 | 267,2013.250,17.8,1783.18,3,24.96731,121.51486,23.7
269 | 268,2012.833,34.7,482.7581,5,24.97433,121.53863,41.1
270 | 269,2013.417,17.2,390.5684,5,24.97937,121.54245,40.1
271 | 270,2013.000,17.6,837.7233,0,24.96334,121.54767,23
272 | 271,2013.333,10.8,252.5822,1,24.9746,121.53046,117.5
273 | 272,2012.917,17.7,451.6419,8,24.96945,121.5449,26.5
274 | 273,2012.750,13,492.2313,5,24.96515,121.53737,40.5
275 | 274,2013.417,13.2,170.1289,1,24.97371,121.52984,29.3
276 | 275,2013.167,27.5,394.0173,7,24.97305,121.53994,41
277 | 276,2012.667,1.5,23.38284,7,24.96772,121.54102,49.7
278 | 277,2013.000,19.1,461.1016,5,24.95425,121.5399,34
279 | 278,2013.417,21.2,2185.128,3,24.96322,121.51237,27.7
280 | 279,2012.750,0,208.3905,6,24.95618,121.53844,44
281 | 280,2013.417,2.6,1554.25,3,24.97026,121.51642,31.1
282 | 281,2013.250,2.3,184.3302,6,24.96581,121.54086,45.4
283 | 282,2013.333,4.7,387.7721,9,24.98118,121.53788,44.8
284 | 283,2012.917,2,1455.798,1,24.9512,121.549,25.6
285 | 284,2013.417,33.5,1978.671,2,24.98674,121.51844,23.5
286 | 285,2012.917,15,383.2805,7,24.96735,121.54464,34.4
287 | 286,2013.167,30.1,718.2937,3,24.97509,121.53644,55.3
288 | 287,2012.917,5.9,90.45606,9,24.97433,121.5431,56.3
289 | 288,2013.000,19.2,461.1016,5,24.95425,121.5399,32.9
290 | 289,2013.583,16.6,323.6912,6,24.97841,121.5428,51
291 | 290,2013.333,13.9,289.3248,5,24.98203,121.54348,44.5
292 | 291,2013.083,37.7,490.3446,0,24.97217,121.53471,37
293 | 292,2012.833,3.4,56.47425,7,24.95744,121.53711,54.4
294 | 293,2013.083,17.5,395.6747,5,24.95674,121.534,24.5
295 | 294,2012.667,12.6,383.2805,7,24.96735,121.54464,42.5
296 | 295,2013.500,26.4,335.5273,6,24.9796,121.5414,38.1
297 | 296,2013.167,18.2,2179.59,3,24.96299,121.51252,21.8
298 | 297,2012.750,12.5,1144.436,4,24.99176,121.53456,34.1
299 | 298,2012.833,34.9,567.0349,4,24.97003,121.5458,28.5
300 | 299,2013.333,16.7,4082.015,0,24.94155,121.50381,16.7
301 | 300,2013.167,33.2,121.7262,10,24.98178,121.54059,46.1
302 | 301,2013.083,2.5,156.2442,4,24.96696,121.53992,36.9
303 | 302,2012.750,38,461.7848,0,24.97229,121.53445,35.7
304 | 303,2013.500,16.5,2288.011,3,24.95885,121.51359,23.2
305 | 304,2013.500,38.3,439.7105,0,24.97161,121.53423,38.4
306 | 305,2013.417,20,1626.083,3,24.96622,121.51668,29.4
307 | 306,2013.083,16.2,289.3248,5,24.98203,121.54348,55
308 | 307,2013.500,14.4,169.9803,1,24.97369,121.52979,50.2
309 | 308,2012.833,10.3,3079.89,0,24.9546,121.56627,24.7
310 | 309,2013.417,16.4,289.3248,5,24.98203,121.54348,53
311 | 310,2013.250,30.3,1264.73,0,24.94883,121.52954,19.1
312 | 311,2013.583,16.4,1643.499,2,24.95394,121.55174,24.7
313 | 312,2013.167,21.3,537.7971,4,24.97425,121.53814,42.2
314 | 313,2013.583,35.4,318.5292,9,24.97071,121.54069,78
315 | 314,2013.333,8.3,104.8101,5,24.96674,121.54067,42.8
316 | 315,2013.250,3.7,577.9615,6,24.97201,121.54722,41.6
317 | 316,2013.083,15.6,1756.411,2,24.9832,121.51812,27.3
318 | 317,2013.250,13.3,250.631,7,24.96606,121.54297,42
319 | 318,2012.750,15.6,752.7669,2,24.97795,121.53451,37.5
320 | 319,2013.333,7.1,379.5575,10,24.98343,121.53762,49.8
321 | 320,2013.250,34.6,272.6783,5,24.95562,121.53872,26.9
322 | 321,2012.750,13.5,4197.349,0,24.93885,121.50383,18.6
323 | 322,2012.917,16.9,964.7496,4,24.98872,121.53411,37.7
324 | 323,2013.000,12.9,187.4823,1,24.97388,121.52981,33.1
325 | 324,2013.417,28.6,197.1338,6,24.97631,121.54436,42.5
326 | 325,2012.667,12.4,1712.632,2,24.96412,121.5167,31.3
327 | 326,2013.083,36.6,488.8193,8,24.97015,121.54494,38.1
328 | 327,2013.500,4.1,56.47425,7,24.95744,121.53711,62.1
329 | 328,2013.417,3.5,757.3377,3,24.97538,121.54971,36.7
330 | 329,2012.833,15.9,1497.713,3,24.97003,121.51696,23.6
331 | 330,2013.000,13.6,4197.349,0,24.93885,121.50383,19.2
332 | 331,2013.083,32,1156.777,0,24.94935,121.53046,12.8
333 | 332,2013.333,25.6,4519.69,0,24.94826,121.49587,15.6
334 | 333,2013.167,39.8,617.7134,2,24.97577,121.53475,39.6
335 | 334,2012.750,7.8,104.8101,5,24.96674,121.54067,38.4
336 | 335,2012.917,30,1013.341,5,24.99006,121.5346,22.8
337 | 336,2013.583,27.3,337.6016,6,24.96431,121.54063,36.5
338 | 337,2012.833,5.1,1867.233,2,24.98407,121.51748,35.6
339 | 338,2012.833,31.3,600.8604,5,24.96871,121.54651,30.9
340 | 339,2012.917,31.5,258.186,9,24.96867,121.54331,36.3
341 | 340,2013.333,1.7,329.9747,5,24.98254,121.54395,50.4
342 | 341,2013.333,33.6,270.8895,0,24.97281,121.53265,42.9
343 | 342,2013.000,13,750.0704,2,24.97371,121.54951,37
344 | 343,2012.667,5.7,90.45606,9,24.97433,121.5431,53.5
345 | 344,2013.000,33.5,563.2854,8,24.98223,121.53597,46.6
346 | 345,2013.500,34.6,3085.17,0,24.998,121.5155,41.2
347 | 346,2012.667,0,185.4296,0,24.9711,121.5317,37.9
348 | 347,2013.417,13.2,1712.632,2,24.96412,121.5167,30.8
349 | 348,2013.583,17.4,6488.021,1,24.95719,121.47353,11.2
350 | 349,2012.833,4.6,259.6607,6,24.97585,121.54516,53.7
351 | 350,2012.750,7.8,104.8101,5,24.96674,121.54067,47
352 | 351,2013.000,13.2,492.2313,5,24.96515,121.53737,42.3
353 | 352,2012.833,4,2180.245,3,24.96324,121.51241,28.6
354 | 353,2012.833,18.4,2674.961,3,24.96143,121.50827,25.7
355 | 354,2013.500,4.1,2147.376,3,24.96299,121.51284,31.3
356 | 355,2013.417,12.2,1360.139,1,24.95204,121.54842,30.1
357 | 356,2013.250,3.8,383.8624,5,24.98085,121.54391,60.7
358 | 357,2012.833,10.3,211.4473,1,24.97417,121.52999,45.3
359 | 358,2013.417,0,338.9679,9,24.96853,121.54413,44.9
360 | 359,2013.167,1.1,193.5845,6,24.96571,121.54089,45.1
361 | 360,2013.500,5.6,2408.993,0,24.95505,121.55964,24.7
362 | 361,2012.667,32.9,87.30222,10,24.983,121.54022,47.1
363 | 362,2013.083,41.4,281.205,8,24.97345,121.54093,63.3
364 | 363,2013.417,17.1,967.4,4,24.98872,121.53408,40
365 | 364,2013.500,32.3,109.9455,10,24.98182,121.54086,48
366 | 365,2013.417,35.3,614.1394,7,24.97913,121.53666,33.1
367 | 366,2012.917,17.3,2261.432,4,24.96182,121.51222,29.5
368 | 367,2012.750,14.2,1801.544,1,24.95153,121.55254,24.8
369 | 368,2012.833,15,1828.319,2,24.96464,121.51531,20.9
370 | 369,2013.417,18.2,350.8515,1,24.97544,121.53119,43.1
371 | 370,2012.667,20.2,2185.128,3,24.96322,121.51237,22.8
372 | 371,2012.750,15.9,289.3248,5,24.98203,121.54348,42.1
373 | 372,2013.500,4.1,312.8963,5,24.95591,121.53956,51.7
374 | 373,2013.000,33.9,157.6052,7,24.96628,121.54196,41.5
375 | 374,2013.083,0,274.0144,1,24.9748,121.53059,52.2
376 | 375,2013.250,5.4,390.5684,5,24.97937,121.54245,49.5
377 | 376,2013.250,21.7,1157.988,0,24.96165,121.55011,23.8
378 | 377,2013.417,14.7,1717.193,2,24.96447,121.51649,30.5
379 | 378,2013.333,3.9,49.66105,8,24.95836,121.53756,56.8
380 | 379,2013.333,37.3,587.8877,8,24.97077,121.54634,37.4
381 | 380,2013.333,0,292.9978,6,24.97744,121.54458,69.7
382 | 381,2013.333,14.1,289.3248,5,24.98203,121.54348,53.3
383 | 382,2013.417,8,132.5469,9,24.98298,121.53981,47.3
384 | 383,2013.000,16.3,3529.564,0,24.93207,121.51597,29.3
385 | 384,2012.667,29.1,506.1144,4,24.97845,121.53889,40.3
386 | 385,2012.750,16.1,4066.587,0,24.94297,121.50342,12.9
387 | 386,2013.000,18.3,82.88643,10,24.983,121.54026,46.6
388 | 387,2012.833,0,185.4296,0,24.9711,121.5317,55.3
389 | 388,2013.250,16.2,2103.555,3,24.96042,121.51462,25.6
390 | 389,2013.500,10.4,2251.938,4,24.95957,121.51353,27.3
391 | 390,2013.250,40.9,122.3619,8,24.96756,121.5423,67.7
392 | 391,2013.500,32.8,377.8302,9,24.97151,121.5435,38.6
393 | 392,2013.583,6.2,1939.749,1,24.95155,121.55387,31.3
394 | 393,2013.083,42.7,443.802,6,24.97927,121.53874,35.3
395 | 394,2013.000,16.9,967.4,4,24.98872,121.53408,40.3
396 | 395,2013.500,32.6,4136.271,1,24.95544,121.4963,24.7
397 | 396,2012.917,21.2,512.5487,4,24.974,121.53842,42.5
398 | 397,2012.667,37.1,918.6357,1,24.97198,121.55063,31.9
399 | 398,2013.417,13.1,1164.838,4,24.99156,121.53406,32.2
400 | 399,2013.417,14.7,1717.193,2,24.96447,121.51649,23
401 | 400,2012.917,12.7,170.1289,1,24.97371,121.52984,37.3
402 | 401,2013.250,26.8,482.7581,5,24.97433,121.53863,35.5
403 | 402,2013.083,7.6,2175.03,3,24.96305,121.51254,27.7
404 | 403,2012.833,12.7,187.4823,1,24.97388,121.52981,28.5
405 | 404,2012.667,30.9,161.942,9,24.98353,121.53966,39.7
406 | 405,2013.333,16.4,289.3248,5,24.98203,121.54348,41.2
407 | 406,2012.667,23,130.9945,6,24.95663,121.53765,37.2
408 | 407,2013.167,1.9,372.1386,7,24.97293,121.54026,40.5
409 | 408,2013.000,5.2,2408.993,0,24.95505,121.55964,22.3
410 | 409,2013.417,18.5,2175.744,3,24.9633,121.51243,28.1
411 | 410,2013.000,13.7,4082.015,0,24.94155,121.50381,15.4
412 | 411,2012.667,5.6,90.45606,9,24.97433,121.5431,50
413 | 412,2013.250,18.8,390.9696,7,24.97923,121.53986,40.6
414 | 413,2013.000,8.1,104.8101,5,24.96674,121.54067,52.5
415 | 414,2013.500,6.5,90.45606,9,24.97433,121.5431,63.9
416 |
--------------------------------------------------------------------------------
/img/LogoSingleRow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/betterdatascience/YouTube/cf24c5cf3192eb7b17c25d94c710d3f941818537/img/LogoSingleRow.png
--------------------------------------------------------------------------------