├── ProjectReport.pdf
├── ReportSummary.pdf
├── README.md
├── LICENSE
├── .gitignore
├── GradientBoosting.ipynb
└── SVR.ipynb
/ProjectReport.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pavini11/SkyLens/HEAD/ProjectReport.pdf
--------------------------------------------------------------------------------
/ReportSummary.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pavini11/SkyLens/HEAD/ReportSummary.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Time-series-forecasting-of-Air-Quality-Prediction
2 | AQI Prediction using LSTM, MLR, SVR, Random Forest, Gradient Boosting
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Pavini Jain
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/GradientBoosting.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "import seaborn as sns\n",
13 | "import matplotlib.pyplot as plt\n",
14 | "\n",
15 | "import warnings\n",
16 | "warnings.filterwarnings(\"ignore\")\n",
17 | "warnings.simplefilter(action='ignore', category=FutureWarning)"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 3,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "df_x = pd.read_csv(\"train_x.csv\")\n",
27 | "df_y = pd.read_csv(\"train_y.csv\")\n",
28 | "df_x1 = pd.read_csv(\"test_x.csv\")\n",
29 | "df_y1 = pd.read_csv(\"test_y.csv\")"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 4,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "data": {
39 | "text/html": [
40 | "
\n",
41 | "\n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " | \n",
58 | " Date | \n",
59 | " PM2.5 | \n",
60 | " PM10 | \n",
61 | " NO2 | \n",
62 | " NOx | \n",
63 | " CO | \n",
64 | " SO2 | \n",
65 | " O3 | \n",
66 | " temp | \n",
67 | " max_temp | \n",
68 | " min_temp | \n",
69 | " humid | \n",
70 | " visible | \n",
71 | " wind | \n",
72 | "
\n",
73 | " \n",
74 | " \n",
75 | " \n",
76 | " | 0 | \n",
77 | " 2016-01-01 | \n",
78 | " 56.40 | \n",
79 | " 95.08 | \n",
80 | " 51.73 | \n",
81 | " 34.31 | \n",
82 | " 0.69 | \n",
83 | " 7.44 | \n",
84 | " 55.96 | \n",
85 | " 20.1 | \n",
86 | " 28.4 | \n",
87 | " 15.7 | \n",
88 | " 54.0 | \n",
89 | " 6.0 | \n",
90 | " 3.0 | \n",
91 | "
\n",
92 | " \n",
93 | " | 1 | \n",
94 | " 2016-01-02 | \n",
95 | " 53.69 | \n",
96 | " 93.92 | \n",
97 | " 73.09 | \n",
98 | " 54.77 | \n",
99 | " 0.76 | \n",
100 | " 8.67 | \n",
101 | " 34.06 | \n",
102 | " 20.8 | \n",
103 | " 28.3 | \n",
104 | " 12.9 | \n",
105 | " 45.0 | \n",
106 | " 6.3 | \n",
107 | " 1.1 | \n",
108 | "
\n",
109 | " \n",
110 | " | 2 | \n",
111 | " 2016-01-03 | \n",
112 | " 62.35 | \n",
113 | " 99.34 | \n",
114 | " 77.77 | \n",
115 | " 55.60 | \n",
116 | " 0.96 | \n",
117 | " 9.10 | \n",
118 | " 47.62 | \n",
119 | " 21.4 | \n",
120 | " 29.0 | \n",
121 | " 13.7 | \n",
122 | " 45.0 | \n",
123 | " 6.3 | \n",
124 | " 0.4 | \n",
125 | "
\n",
126 | " \n",
127 | " | 3 | \n",
128 | " 2016-01-04 | \n",
129 | " 64.96 | \n",
130 | " 104.62 | \n",
131 | " 77.07 | \n",
132 | " 50.81 | \n",
133 | " 0.89 | \n",
134 | " 8.87 | \n",
135 | " 50.62 | \n",
136 | " 21.5 | \n",
137 | " 29.0 | \n",
138 | " 15.2 | \n",
139 | " 47.0 | \n",
140 | " 6.9 | \n",
141 | " 2.2 | \n",
142 | "
\n",
143 | " \n",
144 | " | 4 | \n",
145 | " 2016-01-05 | \n",
146 | " 70.15 | \n",
147 | " 105.12 | \n",
148 | " 79.43 | \n",
149 | " 56.05 | \n",
150 | " 0.85 | \n",
151 | " 9.41 | \n",
152 | " 36.94 | \n",
153 | " 20.9 | \n",
154 | " 28.4 | \n",
155 | " 14.7 | \n",
156 | " 51.0 | \n",
157 | " 6.3 | \n",
158 | " 1.1 | \n",
159 | "
\n",
160 | " \n",
161 | "
\n",
162 | "
"
163 | ],
164 | "text/plain": [
165 | " Date PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp \\\n",
166 | "0 2016-01-01 56.40 95.08 51.73 34.31 0.69 7.44 55.96 20.1 28.4 \n",
167 | "1 2016-01-02 53.69 93.92 73.09 54.77 0.76 8.67 34.06 20.8 28.3 \n",
168 | "2 2016-01-03 62.35 99.34 77.77 55.60 0.96 9.10 47.62 21.4 29.0 \n",
169 | "3 2016-01-04 64.96 104.62 77.07 50.81 0.89 8.87 50.62 21.5 29.0 \n",
170 | "4 2016-01-05 70.15 105.12 79.43 56.05 0.85 9.41 36.94 20.9 28.4 \n",
171 | "\n",
172 | " min_temp humid visible wind \n",
173 | "0 15.7 54.0 6.0 3.0 \n",
174 | "1 12.9 45.0 6.3 1.1 \n",
175 | "2 13.7 45.0 6.3 0.4 \n",
176 | "3 15.2 47.0 6.9 2.2 \n",
177 | "4 14.7 51.0 6.3 1.1 "
178 | ]
179 | },
180 | "execution_count": 4,
181 | "metadata": {},
182 | "output_type": "execute_result"
183 | }
184 | ],
185 | "source": [
186 | "df_x.head(5)"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 5,
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/html": [
197 | "\n",
198 | "\n",
211 | "
\n",
212 | " \n",
213 | " \n",
214 | " | \n",
215 | " Date | \n",
216 | " PM2.5 | \n",
217 | " PM10 | \n",
218 | " NO2 | \n",
219 | " NOx | \n",
220 | " CO | \n",
221 | " SO2 | \n",
222 | " O3 | \n",
223 | " temp | \n",
224 | " max_temp | \n",
225 | " min_temp | \n",
226 | " humid | \n",
227 | " visible | \n",
228 | " wind | \n",
229 | "
\n",
230 | " \n",
231 | " \n",
232 | " \n",
233 | " | 0 | \n",
234 | " 01-01-2020 | \n",
235 | " 30.71 | \n",
236 | " 70.80 | \n",
237 | " 27.31 | \n",
238 | " 26.74 | \n",
239 | " 1.07 | \n",
240 | " 8.56 | \n",
241 | " 37.39 | \n",
242 | " 22.0 | \n",
243 | " 27.2 | \n",
244 | " 19.2 | \n",
245 | " 75 | \n",
246 | " 6.9 | \n",
247 | " 3.3 | \n",
248 | "
\n",
249 | " \n",
250 | " | 1 | \n",
251 | " 02-01-2020 | \n",
252 | " 25.78 | \n",
253 | " 62.73 | \n",
254 | " 28.05 | \n",
255 | " 30.18 | \n",
256 | " 1.16 | \n",
257 | " 7.52 | \n",
258 | " 33.11 | \n",
259 | " 22.2 | \n",
260 | " 26.0 | \n",
261 | " 19.4 | \n",
262 | " 77 | \n",
263 | " 5.5 | \n",
264 | " 2.0 | \n",
265 | "
\n",
266 | " \n",
267 | " | 2 | \n",
268 | " 03-01-2020 | \n",
269 | " 29.66 | \n",
270 | " 69.62 | \n",
271 | " 26.26 | \n",
272 | " 26.92 | \n",
273 | " 0.99 | \n",
274 | " 6.40 | \n",
275 | " 32.98 | \n",
276 | " 23.4 | \n",
277 | " 28.4 | \n",
278 | " 19.4 | \n",
279 | " 68 | \n",
280 | " 6.9 | \n",
281 | " 3.5 | \n",
282 | "
\n",
283 | " \n",
284 | " | 3 | \n",
285 | " 04-01-2020 | \n",
286 | " 52.62 | \n",
287 | " 106.81 | \n",
288 | " 31.56 | \n",
289 | " 41.89 | \n",
290 | " 1.39 | \n",
291 | " 6.94 | \n",
292 | " 37.35 | \n",
293 | " 23.6 | \n",
294 | " 30.5 | \n",
295 | " 19.0 | \n",
296 | " 63 | \n",
297 | " 6.3 | \n",
298 | " 1.3 | \n",
299 | "
\n",
300 | " \n",
301 | " | 4 | \n",
302 | " 05-01-2020 | \n",
303 | " 52.64 | \n",
304 | " 97.94 | \n",
305 | " 29.10 | \n",
306 | " 29.52 | \n",
307 | " 1.01 | \n",
308 | " 7.01 | \n",
309 | " 45.10 | \n",
310 | " 24.2 | \n",
311 | " 30.5 | \n",
312 | " 20.4 | \n",
313 | " 63 | \n",
314 | " 6.3 | \n",
315 | " 1.3 | \n",
316 | "
\n",
317 | " \n",
318 | "
\n",
319 | "
"
320 | ],
321 | "text/plain": [
322 | " Date PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp \\\n",
323 | "0 01-01-2020 30.71 70.80 27.31 26.74 1.07 8.56 37.39 22.0 27.2 \n",
324 | "1 02-01-2020 25.78 62.73 28.05 30.18 1.16 7.52 33.11 22.2 26.0 \n",
325 | "2 03-01-2020 29.66 69.62 26.26 26.92 0.99 6.40 32.98 23.4 28.4 \n",
326 | "3 04-01-2020 52.62 106.81 31.56 41.89 1.39 6.94 37.35 23.6 30.5 \n",
327 | "4 05-01-2020 52.64 97.94 29.10 29.52 1.01 7.01 45.10 24.2 30.5 \n",
328 | "\n",
329 | " min_temp humid visible wind \n",
330 | "0 19.2 75 6.9 3.3 \n",
331 | "1 19.4 77 5.5 2.0 \n",
332 | "2 19.4 68 6.9 3.5 \n",
333 | "3 19.0 63 6.3 1.3 \n",
334 | "4 20.4 63 6.3 1.3 "
335 | ]
336 | },
337 | "execution_count": 5,
338 | "metadata": {},
339 | "output_type": "execute_result"
340 | }
341 | ],
342 | "source": [
343 | "df_x1.head(5)"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": 6,
349 | "metadata": {},
350 | "outputs": [
351 | {
352 | "data": {
353 | "text/html": [
354 | "\n",
355 | "\n",
368 | "
\n",
369 | " \n",
370 | " \n",
371 | " | \n",
372 | " Date | \n",
373 | " AQI | \n",
374 | "
\n",
375 | " \n",
376 | " \n",
377 | " \n",
378 | " | 0 | \n",
379 | " 2016-01-01 | \n",
380 | " 101.0 | \n",
381 | "
\n",
382 | " \n",
383 | " | 1 | \n",
384 | " 2016-01-02 | \n",
385 | " 92.0 | \n",
386 | "
\n",
387 | " \n",
388 | " | 2 | \n",
389 | " 2016-01-03 | \n",
390 | " 108.0 | \n",
391 | "
\n",
392 | " \n",
393 | " | 3 | \n",
394 | " 2016-01-04 | \n",
395 | " 109.0 | \n",
396 | "
\n",
397 | " \n",
398 | " | 4 | \n",
399 | " 2016-01-05 | \n",
400 | " 114.0 | \n",
401 | "
\n",
402 | " \n",
403 | "
\n",
404 | "
"
405 | ],
406 | "text/plain": [
407 | " Date AQI\n",
408 | "0 2016-01-01 101.0\n",
409 | "1 2016-01-02 92.0\n",
410 | "2 2016-01-03 108.0\n",
411 | "3 2016-01-04 109.0\n",
412 | "4 2016-01-05 114.0"
413 | ]
414 | },
415 | "execution_count": 6,
416 | "metadata": {},
417 | "output_type": "execute_result"
418 | }
419 | ],
420 | "source": [
421 | "df_y.head(5)"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": 7,
427 | "metadata": {},
428 | "outputs": [
429 | {
430 | "data": {
431 | "text/html": [
432 | "\n",
433 | "\n",
446 | "
\n",
447 | " \n",
448 | " \n",
449 | " | \n",
450 | " Date | \n",
451 | " AQI | \n",
452 | "
\n",
453 | " \n",
454 | " \n",
455 | " \n",
456 | " | 0 | \n",
457 | " 2020-01-01 | \n",
458 | " 82.0 | \n",
459 | "
\n",
460 | " \n",
461 | " | 1 | \n",
462 | " 2020-01-02 | \n",
463 | " 81.0 | \n",
464 | "
\n",
465 | " \n",
466 | " | 2 | \n",
467 | " 2020-01-03 | \n",
468 | " 85.0 | \n",
469 | "
\n",
470 | " \n",
471 | " | 3 | \n",
472 | " 2020-01-04 | \n",
473 | " 95.0 | \n",
474 | "
\n",
475 | " \n",
476 | " | 4 | \n",
477 | " 2020-01-05 | \n",
478 | " 118.0 | \n",
479 | "
\n",
480 | " \n",
481 | "
\n",
482 | "
"
483 | ],
484 | "text/plain": [
485 | " Date AQI\n",
486 | "0 2020-01-01 82.0\n",
487 | "1 2020-01-02 81.0\n",
488 | "2 2020-01-03 85.0\n",
489 | "3 2020-01-04 95.0\n",
490 | "4 2020-01-05 118.0"
491 | ]
492 | },
493 | "execution_count": 7,
494 | "metadata": {},
495 | "output_type": "execute_result"
496 | }
497 | ],
498 | "source": [
499 | "df_y1.head(5)"
500 | ]
501 | },
502 | {
503 | "cell_type": "code",
504 | "execution_count": 8,
505 | "metadata": {},
506 | "outputs": [
507 | {
508 | "name": "stdout",
509 | "output_type": "stream",
510 | "text": [
511 | "\n",
512 | "RangeIndex: 1441 entries, 0 to 1440\n",
513 | "Data columns (total 14 columns):\n",
514 | "Date 1441 non-null object\n",
515 | "PM2.5 1441 non-null float64\n",
516 | "PM10 1441 non-null float64\n",
517 | "NO2 1441 non-null float64\n",
518 | "NOx 1441 non-null float64\n",
519 | "CO 1441 non-null float64\n",
520 | "SO2 1441 non-null float64\n",
521 | "O3 1441 non-null float64\n",
522 | "temp 1441 non-null float64\n",
523 | "max_temp 1441 non-null float64\n",
524 | "min_temp 1441 non-null float64\n",
525 | "humid 1441 non-null float64\n",
526 | "visible 1441 non-null float64\n",
527 | "wind 1441 non-null float64\n",
528 | "dtypes: float64(13), object(1)\n",
529 | "memory usage: 157.7+ KB\n"
530 | ]
531 | }
532 | ],
533 | "source": [
534 | "df_x.info()"
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "execution_count": 9,
540 | "metadata": {},
541 | "outputs": [
542 | {
543 | "name": "stdout",
544 | "output_type": "stream",
545 | "text": [
546 | "\n",
547 | "RangeIndex: 1441 entries, 0 to 1440\n",
548 | "Data columns (total 2 columns):\n",
549 | "Date 1441 non-null object\n",
550 | "AQI 1441 non-null float64\n",
551 | "dtypes: float64(1), object(1)\n",
552 | "memory usage: 22.6+ KB\n"
553 | ]
554 | }
555 | ],
556 | "source": [
557 | "df_y.info()"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": 10,
563 | "metadata": {},
564 | "outputs": [
565 | {
566 | "name": "stdout",
567 | "output_type": "stream",
568 | "text": [
569 | "\n",
570 | "RangeIndex: 182 entries, 0 to 181\n",
571 | "Data columns (total 14 columns):\n",
572 | "Date 182 non-null object\n",
573 | "PM2.5 182 non-null float64\n",
574 | "PM10 182 non-null float64\n",
575 | "NO2 182 non-null float64\n",
576 | "NOx 182 non-null float64\n",
577 | "CO 182 non-null float64\n",
578 | "SO2 182 non-null float64\n",
579 | "O3 182 non-null float64\n",
580 | "temp 182 non-null float64\n",
581 | "max_temp 182 non-null float64\n",
582 | "min_temp 182 non-null float64\n",
583 | "humid 182 non-null int64\n",
584 | "visible 182 non-null float64\n",
585 | "wind 182 non-null float64\n",
586 | "dtypes: float64(12), int64(1), object(1)\n",
587 | "memory usage: 20.0+ KB\n"
588 | ]
589 | }
590 | ],
591 | "source": [
592 | "df_x1.info()"
593 | ]
594 | },
595 | {
596 | "cell_type": "code",
597 | "execution_count": 11,
598 | "metadata": {},
599 | "outputs": [
600 | {
601 | "name": "stdout",
602 | "output_type": "stream",
603 | "text": [
604 | "\n",
605 | "RangeIndex: 182 entries, 0 to 181\n",
606 | "Data columns (total 2 columns):\n",
607 | "Date 182 non-null object\n",
608 | "AQI 182 non-null float64\n",
609 | "dtypes: float64(1), object(1)\n",
610 | "memory usage: 2.9+ KB\n"
611 | ]
612 | }
613 | ],
614 | "source": [
615 | "df_y1.info()"
616 | ]
617 | },
618 | {
619 | "cell_type": "code",
620 | "execution_count": 12,
621 | "metadata": {},
622 | "outputs": [],
623 | "source": [
624 | "df_x[\"Date\"] = pd.to_datetime(df_x[\"Date\"])\n",
625 | "df_x1[\"Date\"] = pd.to_datetime(df_x[\"Date\"])"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": 13,
631 | "metadata": {},
632 | "outputs": [
633 | {
634 | "name": "stdout",
635 | "output_type": "stream",
636 | "text": [
637 | "\n",
638 | "RangeIndex: 1441 entries, 0 to 1440\n",
639 | "Data columns (total 14 columns):\n",
640 | "Date 1441 non-null datetime64[ns]\n",
641 | "PM2.5 1441 non-null float64\n",
642 | "PM10 1441 non-null float64\n",
643 | "NO2 1441 non-null float64\n",
644 | "NOx 1441 non-null float64\n",
645 | "CO 1441 non-null float64\n",
646 | "SO2 1441 non-null float64\n",
647 | "O3 1441 non-null float64\n",
648 | "temp 1441 non-null float64\n",
649 | "max_temp 1441 non-null float64\n",
650 | "min_temp 1441 non-null float64\n",
651 | "humid 1441 non-null float64\n",
652 | "visible 1441 non-null float64\n",
653 | "wind 1441 non-null float64\n",
654 | "dtypes: datetime64[ns](1), float64(13)\n",
655 | "memory usage: 157.7 KB\n"
656 | ]
657 | }
658 | ],
659 | "source": [
660 | "df_x.info()"
661 | ]
662 | },
663 | {
664 | "cell_type": "code",
665 | "execution_count": 14,
666 | "metadata": {},
667 | "outputs": [
668 | {
669 | "name": "stdout",
670 | "output_type": "stream",
671 | "text": [
672 | "\n",
673 | "RangeIndex: 182 entries, 0 to 181\n",
674 | "Data columns (total 14 columns):\n",
675 | "Date 182 non-null datetime64[ns]\n",
676 | "PM2.5 182 non-null float64\n",
677 | "PM10 182 non-null float64\n",
678 | "NO2 182 non-null float64\n",
679 | "NOx 182 non-null float64\n",
680 | "CO 182 non-null float64\n",
681 | "SO2 182 non-null float64\n",
682 | "O3 182 non-null float64\n",
683 | "temp 182 non-null float64\n",
684 | "max_temp 182 non-null float64\n",
685 | "min_temp 182 non-null float64\n",
686 | "humid 182 non-null int64\n",
687 | "visible 182 non-null float64\n",
688 | "wind 182 non-null float64\n",
689 | "dtypes: datetime64[ns](1), float64(12), int64(1)\n",
690 | "memory usage: 20.0 KB\n"
691 | ]
692 | }
693 | ],
694 | "source": [
695 | "df_x1.info()"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": 15,
701 | "metadata": {},
702 | "outputs": [
703 | {
704 | "data": {
705 | "text/plain": [
706 | ""
707 | ]
708 | },
709 | "execution_count": 15,
710 | "metadata": {},
711 | "output_type": "execute_result"
712 | },
713 | {
714 | "data": {
715 | "image/png": "\n",
716 | "text/plain": [
717 | ""
718 | ]
719 | },
720 | "metadata": {
721 | "needs_background": "light"
722 | },
723 | "output_type": "display_data"
724 | }
725 | ],
726 | "source": [
727 | "sns.heatmap(df_x.isnull(),cbar=False, yticklabels=False)"
728 | ]
729 | },
730 | {
731 | "cell_type": "code",
732 | "execution_count": 16,
733 | "metadata": {},
734 | "outputs": [
735 | {
736 | "data": {
737 | "text/plain": [
738 | ""
739 | ]
740 | },
741 | "execution_count": 16,
742 | "metadata": {},
743 | "output_type": "execute_result"
744 | },
745 | {
746 | "data": {
747 | "image/png": "\n",
748 | "text/plain": [
749 | ""
750 | ]
751 | },
752 | "metadata": {
753 | "needs_background": "light"
754 | },
755 | "output_type": "display_data"
756 | }
757 | ],
758 | "source": [
759 | "sns.heatmap(df_x1.isnull(),cbar=False, yticklabels=False)"
760 | ]
761 | },
762 | {
763 | "cell_type": "code",
764 | "execution_count": 17,
765 | "metadata": {},
766 | "outputs": [],
767 | "source": [
768 | "x_train = df_x.drop(['Date'], axis = 1)\n",
769 | "y_train = df_y[\"AQI\"]\n",
770 | "x_test = df_x1.drop(['Date'], axis = 1)\n",
771 | "y_test = df_y1[\"AQI\"]"
772 | ]
773 | },
774 | {
775 | "cell_type": "code",
776 | "execution_count": 18,
777 | "metadata": {},
778 | "outputs": [
779 | {
780 | "data": {
781 | "text/html": [
782 | "\n",
783 | "\n",
796 | "
\n",
797 | " \n",
798 | " \n",
799 | " | \n",
800 | " PM2.5 | \n",
801 | " PM10 | \n",
802 | " NO2 | \n",
803 | " NOx | \n",
804 | " CO | \n",
805 | " SO2 | \n",
806 | " O3 | \n",
807 | " temp | \n",
808 | " max_temp | \n",
809 | " min_temp | \n",
810 | " humid | \n",
811 | " visible | \n",
812 | " wind | \n",
813 | "
\n",
814 | " \n",
815 | " \n",
816 | " \n",
817 | " | 0 | \n",
818 | " 56.40 | \n",
819 | " 95.08 | \n",
820 | " 51.73 | \n",
821 | " 34.31 | \n",
822 | " 0.69 | \n",
823 | " 7.44 | \n",
824 | " 55.96 | \n",
825 | " 20.1 | \n",
826 | " 28.4 | \n",
827 | " 15.7 | \n",
828 | " 54.0 | \n",
829 | " 6.0 | \n",
830 | " 3.0 | \n",
831 | "
\n",
832 | " \n",
833 | " | 1 | \n",
834 | " 53.69 | \n",
835 | " 93.92 | \n",
836 | " 73.09 | \n",
837 | " 54.77 | \n",
838 | " 0.76 | \n",
839 | " 8.67 | \n",
840 | " 34.06 | \n",
841 | " 20.8 | \n",
842 | " 28.3 | \n",
843 | " 12.9 | \n",
844 | " 45.0 | \n",
845 | " 6.3 | \n",
846 | " 1.1 | \n",
847 | "
\n",
848 | " \n",
849 | " | 2 | \n",
850 | " 62.35 | \n",
851 | " 99.34 | \n",
852 | " 77.77 | \n",
853 | " 55.60 | \n",
854 | " 0.96 | \n",
855 | " 9.10 | \n",
856 | " 47.62 | \n",
857 | " 21.4 | \n",
858 | " 29.0 | \n",
859 | " 13.7 | \n",
860 | " 45.0 | \n",
861 | " 6.3 | \n",
862 | " 0.4 | \n",
863 | "
\n",
864 | " \n",
865 | " | 3 | \n",
866 | " 64.96 | \n",
867 | " 104.62 | \n",
868 | " 77.07 | \n",
869 | " 50.81 | \n",
870 | " 0.89 | \n",
871 | " 8.87 | \n",
872 | " 50.62 | \n",
873 | " 21.5 | \n",
874 | " 29.0 | \n",
875 | " 15.2 | \n",
876 | " 47.0 | \n",
877 | " 6.9 | \n",
878 | " 2.2 | \n",
879 | "
\n",
880 | " \n",
881 | " | 4 | \n",
882 | " 70.15 | \n",
883 | " 105.12 | \n",
884 | " 79.43 | \n",
885 | " 56.05 | \n",
886 | " 0.85 | \n",
887 | " 9.41 | \n",
888 | " 36.94 | \n",
889 | " 20.9 | \n",
890 | " 28.4 | \n",
891 | " 14.7 | \n",
892 | " 51.0 | \n",
893 | " 6.3 | \n",
894 | " 1.1 | \n",
895 | "
\n",
896 | " \n",
897 | "
\n",
898 | "
"
899 | ],
900 | "text/plain": [
901 | " PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp min_temp \\\n",
902 | "0 56.40 95.08 51.73 34.31 0.69 7.44 55.96 20.1 28.4 15.7 \n",
903 | "1 53.69 93.92 73.09 54.77 0.76 8.67 34.06 20.8 28.3 12.9 \n",
904 | "2 62.35 99.34 77.77 55.60 0.96 9.10 47.62 21.4 29.0 13.7 \n",
905 | "3 64.96 104.62 77.07 50.81 0.89 8.87 50.62 21.5 29.0 15.2 \n",
906 | "4 70.15 105.12 79.43 56.05 0.85 9.41 36.94 20.9 28.4 14.7 \n",
907 | "\n",
908 | " humid visible wind \n",
909 | "0 54.0 6.0 3.0 \n",
910 | "1 45.0 6.3 1.1 \n",
911 | "2 45.0 6.3 0.4 \n",
912 | "3 47.0 6.9 2.2 \n",
913 | "4 51.0 6.3 1.1 "
914 | ]
915 | },
916 | "execution_count": 18,
917 | "metadata": {},
918 | "output_type": "execute_result"
919 | }
920 | ],
921 | "source": [
922 | "x_train.head(5)"
923 | ]
924 | },
925 | {
926 | "cell_type": "code",
927 | "execution_count": 19,
928 | "metadata": {},
929 | "outputs": [
930 | {
931 | "data": {
932 | "text/html": [
933 | "\n",
934 | "\n",
947 | "
\n",
948 | " \n",
949 | " \n",
950 | " | \n",
951 | " PM2.5 | \n",
952 | " PM10 | \n",
953 | " NO2 | \n",
954 | " NOx | \n",
955 | " CO | \n",
956 | " SO2 | \n",
957 | " O3 | \n",
958 | " temp | \n",
959 | " max_temp | \n",
960 | " min_temp | \n",
961 | " humid | \n",
962 | " visible | \n",
963 | " wind | \n",
964 | "
\n",
965 | " \n",
966 | " \n",
967 | " \n",
968 | " | 0 | \n",
969 | " 30.71 | \n",
970 | " 70.80 | \n",
971 | " 27.31 | \n",
972 | " 26.74 | \n",
973 | " 1.07 | \n",
974 | " 8.56 | \n",
975 | " 37.39 | \n",
976 | " 22.0 | \n",
977 | " 27.2 | \n",
978 | " 19.2 | \n",
979 | " 75 | \n",
980 | " 6.9 | \n",
981 | " 3.3 | \n",
982 | "
\n",
983 | " \n",
984 | " | 1 | \n",
985 | " 25.78 | \n",
986 | " 62.73 | \n",
987 | " 28.05 | \n",
988 | " 30.18 | \n",
989 | " 1.16 | \n",
990 | " 7.52 | \n",
991 | " 33.11 | \n",
992 | " 22.2 | \n",
993 | " 26.0 | \n",
994 | " 19.4 | \n",
995 | " 77 | \n",
996 | " 5.5 | \n",
997 | " 2.0 | \n",
998 | "
\n",
999 | " \n",
1000 | " | 2 | \n",
1001 | " 29.66 | \n",
1002 | " 69.62 | \n",
1003 | " 26.26 | \n",
1004 | " 26.92 | \n",
1005 | " 0.99 | \n",
1006 | " 6.40 | \n",
1007 | " 32.98 | \n",
1008 | " 23.4 | \n",
1009 | " 28.4 | \n",
1010 | " 19.4 | \n",
1011 | " 68 | \n",
1012 | " 6.9 | \n",
1013 | " 3.5 | \n",
1014 | "
\n",
1015 | " \n",
1016 | " | 3 | \n",
1017 | " 52.62 | \n",
1018 | " 106.81 | \n",
1019 | " 31.56 | \n",
1020 | " 41.89 | \n",
1021 | " 1.39 | \n",
1022 | " 6.94 | \n",
1023 | " 37.35 | \n",
1024 | " 23.6 | \n",
1025 | " 30.5 | \n",
1026 | " 19.0 | \n",
1027 | " 63 | \n",
1028 | " 6.3 | \n",
1029 | " 1.3 | \n",
1030 | "
\n",
1031 | " \n",
1032 | " | 4 | \n",
1033 | " 52.64 | \n",
1034 | " 97.94 | \n",
1035 | " 29.10 | \n",
1036 | " 29.52 | \n",
1037 | " 1.01 | \n",
1038 | " 7.01 | \n",
1039 | " 45.10 | \n",
1040 | " 24.2 | \n",
1041 | " 30.5 | \n",
1042 | " 20.4 | \n",
1043 | " 63 | \n",
1044 | " 6.3 | \n",
1045 | " 1.3 | \n",
1046 | "
\n",
1047 | " \n",
1048 | "
\n",
1049 | "
"
1050 | ],
1051 | "text/plain": [
1052 | " PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp min_temp \\\n",
1053 | "0 30.71 70.80 27.31 26.74 1.07 8.56 37.39 22.0 27.2 19.2 \n",
1054 | "1 25.78 62.73 28.05 30.18 1.16 7.52 33.11 22.2 26.0 19.4 \n",
1055 | "2 29.66 69.62 26.26 26.92 0.99 6.40 32.98 23.4 28.4 19.4 \n",
1056 | "3 52.62 106.81 31.56 41.89 1.39 6.94 37.35 23.6 30.5 19.0 \n",
1057 | "4 52.64 97.94 29.10 29.52 1.01 7.01 45.10 24.2 30.5 20.4 \n",
1058 | "\n",
1059 | " humid visible wind \n",
1060 | "0 75 6.9 3.3 \n",
1061 | "1 77 5.5 2.0 \n",
1062 | "2 68 6.9 3.5 \n",
1063 | "3 63 6.3 1.3 \n",
1064 | "4 63 6.3 1.3 "
1065 | ]
1066 | },
1067 | "execution_count": 19,
1068 | "metadata": {},
1069 | "output_type": "execute_result"
1070 | }
1071 | ],
1072 | "source": [
1073 | "x_test.head(5)"
1074 | ]
1075 | },
1076 | {
1077 | "cell_type": "code",
1078 | "execution_count": 20,
1079 | "metadata": {},
1080 | "outputs": [
1081 | {
1082 | "data": {
1083 | "text/plain": [
1084 | "0 101.0\n",
1085 | "1 92.0\n",
1086 | "2 108.0\n",
1087 | "3 109.0\n",
1088 | "4 114.0\n",
1089 | "Name: AQI, dtype: float64"
1090 | ]
1091 | },
1092 | "execution_count": 20,
1093 | "metadata": {},
1094 | "output_type": "execute_result"
1095 | }
1096 | ],
1097 | "source": [
1098 | "y_train.head(5)"
1099 | ]
1100 | },
1101 | {
1102 | "cell_type": "code",
1103 | "execution_count": 21,
1104 | "metadata": {},
1105 | "outputs": [
1106 | {
1107 | "data": {
1108 | "text/plain": [
1109 | "0 82.0\n",
1110 | "1 81.0\n",
1111 | "2 85.0\n",
1112 | "3 95.0\n",
1113 | "4 118.0\n",
1114 | "Name: AQI, dtype: float64"
1115 | ]
1116 | },
1117 | "execution_count": 21,
1118 | "metadata": {},
1119 | "output_type": "execute_result"
1120 | }
1121 | ],
1122 | "source": [
1123 | "y_test.head(5)"
1124 | ]
1125 | },
1126 | {
1127 | "cell_type": "code",
1128 | "execution_count": 22,
1129 | "metadata": {},
1130 | "outputs": [],
1131 | "source": [
1132 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
1133 | "sc_x = StandardScaler()\n",
1134 | "sc_y = StandardScaler()\n",
1135 | "x_train = sc_x.fit_transform(x_train)\n",
1136 | "x_test = sc_x.transform(x_test)\n",
1137 | "y_train = sc_y.fit_transform(np.array(y_train).reshape(-1,1))\n",
1138 | "y_test = sc_y.transform(np.array(y_test).reshape(-1,1))"
1139 | ]
1140 | },
1141 | {
1142 | "cell_type": "code",
1143 | "execution_count": 23,
1144 | "metadata": {},
1145 | "outputs": [
1146 | {
1147 | "data": {
1148 | "text/plain": [
1149 | "array([[-0.27610501],\n",
1150 | " [-0.30227705],\n",
1151 | " [-0.1975889 ],\n",
1152 | " [ 0.06413148],\n",
1153 | " [ 0.66608835]])"
1154 | ]
1155 | },
1156 | "execution_count": 23,
1157 | "metadata": {},
1158 | "output_type": "execute_result"
1159 | }
1160 | ],
1161 | "source": [
1162 | "y_test[:5]"
1163 | ]
1164 | },
1165 | {
1166 | "cell_type": "code",
1167 | "execution_count": 24,
1168 | "metadata": {},
1169 | "outputs": [
1170 | {
1171 | "data": {
1172 | "text/plain": [
1173 | "GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,\n",
1174 | " learning_rate=0.1, loss='ls', max_depth=3,\n",
1175 | " max_features=None, max_leaf_nodes=None,\n",
1176 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
1177 | " min_samples_leaf=1, min_samples_split=2,\n",
1178 | " min_weight_fraction_leaf=0.0, n_estimators=100,\n",
1179 | " n_iter_no_change=None, presort='auto',\n",
1180 | " random_state=None, subsample=1.0, tol=0.0001,\n",
1181 | " validation_fraction=0.1, verbose=0, warm_start=False)"
1182 | ]
1183 | },
1184 | "execution_count": 24,
1185 | "metadata": {},
1186 | "output_type": "execute_result"
1187 | }
1188 | ],
1189 | "source": [
1190 | "from sklearn.ensemble import GradientBoostingRegressor\n",
1191 | "regressor = GradientBoostingRegressor()\n",
1192 | "regressor.fit(x_train,y_train)"
1193 | ]
1194 | },
1195 | {
1196 | "cell_type": "code",
1197 | "execution_count": 25,
1198 | "metadata": {},
1199 | "outputs": [
1200 | {
1201 | "data": {
1202 | "text/plain": [
1203 | "array([-0.30664803, 0.02744575, -0.45522541, 0.56821673, 0.39152823,\n",
1204 | " -0.33634286, -0.45985512, -0.13761617, 0.21430141, 0.06167805,\n",
1205 | " -0.43960067, -0.05112361, 0.40469433, 0.91172871, 0.24935004,\n",
1206 | " 0.34019051, 0.25525236, -0.59217263, -0.56262186, -0.01497731,\n",
1207 | " 0.0695675 , -0.02144429, 0.04325328, 0.15946079, 0.18446 ,\n",
1208 | " 0.25582022, 0.30949316, 0.40278977, 0.4255181 , 0.2840163 ,\n",
1209 | " 0.42057773, 0.45941366, -0.24938813, -0.25418846, -0.42233104,\n",
1210 | " 0.11668701, 0.33066576, 0.29736599, 0.32064904, 0.40746019,\n",
1211 | " -0.16741748, -0.11139299, -0.30030267, 0.00940919, 0.52902124,\n",
1212 | " 0.71358506, 0.4200336 , 0.26898954, 0.4043111 , 0.51775921,\n",
1213 | " 0.24915816, -0.20381793, -0.32190889, 0.13897562, -0.13482573,\n",
1214 | " 0.45736285, 0.2023384 , 0.32740856, 0.14504189, 0.24373536,\n",
1215 | " 0.4673738 , 0.65162784, 1.76219074, 0.63494524, 0.62830402,\n",
1216 | " 0.30860789, 0.10367325, -0.05551396, 0.20358276, 0.30723921,\n",
1217 | " 0.29236797, 0.1460285 , -0.06520416, -0.35593281, 0.07230334,\n",
1218 | " -0.43199656, -0.50446128, -0.31805938, 0.44619972, 0.50666258,\n",
1219 | " 0.37658415, -0.03478484, 0.09924211, 0.2197565 , -0.86057172,\n",
1220 | " -0.94029625, -0.77387695, -0.65316783, -0.22228412, 0.1078362 ,\n",
1221 | " -0.12004117, -0.17820506, -0.07598379, -0.11422471, -0.05717854,\n",
1222 | " -0.63267181, -0.18355135, -0.35640078, -0.47804469, 0.18869273,\n",
1223 | " -0.64542504, -0.57419083, -0.55506317, -0.49390715, -0.42547427,\n",
1224 | " -0.48798456, -0.5072359 , -0.29127774, -0.49157359, -0.85665164,\n",
1225 | " -0.93764569, -0.76543852, -0.69058052, -0.60870061, -0.25151876,\n",
1226 | " -0.89037835, -0.64424282, -0.74117626, -0.55070149, -0.79329866,\n",
1227 | " -0.66968387, -0.56791011, -0.56984256, -0.48223239, -0.30810351,\n",
1228 | " -0.14926282, -0.44770745, -0.47163955, -0.72383692, -0.79947907,\n",
1229 | " -0.54709498, -0.4843061 , -0.86389296, -0.90695394, -0.61251399,\n",
1230 | " -0.81859382, -0.39373544, -0.30160905, -0.25753693, -0.82518774,\n",
1231 | " -0.91156371, -0.83574485, -0.63531245, -0.46670179, -0.58265605,\n",
1232 | " -0.63542767, -0.51736277, -0.46767405, -0.77992992, -0.80571348,\n",
1233 | " -0.8893096 , -0.7368831 , -1.12757017, -1.29664219, -1.33817791,\n",
1234 | " -1.12177791, -0.85475676, -0.83829278, -1.06097844, -0.87068831,\n",
1235 | " -0.83739172, -0.78359487, -1.05402323, -1.15873018, -0.97677208,\n",
1236 | " -1.1738632 , -1.08756149, -0.87845 , -1.05953758, -0.89726162,\n",
1237 | " -0.92808345, -1.07917673, -1.09615972, -1.04304939, -0.91874159,\n",
1238 | " -0.85788747, -0.96075367, -1.11091323, -1.15244895, -1.11325572,\n",
1239 | " -1.02376198, -1.1516053 ])"
1240 | ]
1241 | },
1242 | "execution_count": 25,
1243 | "metadata": {},
1244 | "output_type": "execute_result"
1245 | }
1246 | ],
1247 | "source": [
1248 | "y_prediction = regressor.predict(x_test)\n",
1249 | "y_prediction "
1250 | ]
1251 | },
1252 | {
1253 | "cell_type": "code",
1254 | "execution_count": 26,
1255 | "metadata": {},
1256 | "outputs": [
1257 | {
1258 | "name": "stdout",
1259 | "output_type": "stream",
1260 | "text": [
1261 | "R2 SCORE is 0.770856636180251\n",
1262 | "mean_sqrd_error is 0.058365602156151296\n",
1263 | "Root mean squared error of is 0.24158973934368838\n",
1264 | "Mean Absolute error is 0.1872874239791087\n"
1265 | ]
1266 | }
1267 | ],
1268 | "source": [
1269 | "from sklearn.metrics import r2_score\n",
1270 | "from sklearn.metrics import mean_squared_error\n",
1271 | "from sklearn.metrics import mean_absolute_error\n",
1272 | "score = r2_score(y_test,y_prediction)\n",
1273 | "mean_error = mean_squared_error(y_test,y_prediction)\n",
1274 | "mae = mean_absolute_error(y_test,y_prediction)\n",
1275 | "print(\"R2 SCORE is\", score)\n",
1276 | "print(\"mean_sqrd_error is \", mean_error)\n",
1277 | "print(\"Root mean squared error of is\",np.sqrt(mean_error))\n",
1278 | "print(\"Mean Absolute error is\", mae)"
1279 | ]
1280 | },
1281 | {
1282 | "cell_type": "code",
1283 | "execution_count": 27,
1284 | "metadata": {},
1285 | "outputs": [
1286 | {
1287 | "data": {
1288 | "text/plain": [
1289 | "array([[ 82.],\n",
1290 | " [ 81.],\n",
1291 | " [ 85.],\n",
1292 | " [ 95.],\n",
1293 | " [118.]])"
1294 | ]
1295 | },
1296 | "execution_count": 27,
1297 | "metadata": {},
1298 | "output_type": "execute_result"
1299 | }
1300 | ],
1301 | "source": [
1302 | "y_test = sc_y.inverse_transform(y_test)\n",
1303 | "y_test[:5]"
1304 | ]
1305 | },
1306 | {
1307 | "cell_type": "code",
1308 | "execution_count": 28,
1309 | "metadata": {},
1310 | "outputs": [
1311 | {
1312 | "data": {
1313 | "text/plain": [
1314 | "array([ 80.83299016, 93.59828519, 75.15603902, 114.26045103,\n",
1315 | " 107.50940979])"
1316 | ]
1317 | },
1318 | "execution_count": 28,
1319 | "metadata": {},
1320 | "output_type": "execute_result"
1321 | }
1322 | ],
1323 | "source": [
1324 | "y_prediction = sc_y.inverse_transform(y_prediction)\n",
1325 | "y_prediction[:5]"
1326 | ]
1327 | },
1328 | {
1329 | "cell_type": "code",
1330 | "execution_count": 29,
1331 | "metadata": {},
1332 | "outputs": [
1333 | {
1334 | "data": {
1335 | "image/png": "\n",
1336 | "text/plain": [
1337 | ""
1338 | ]
1339 | },
1340 | "metadata": {
1341 | "needs_background": "light"
1342 | },
1343 | "output_type": "display_data"
1344 | }
1345 | ],
1346 | "source": [
1347 | "df1 = pd.DataFrame({'Actual': y_test[100:120].flatten(), 'Predicted': y_prediction[100:120].flatten()})\n",
1348 | "df1.plot(kind='line')\n",
1349 | "plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')\n",
1350 | "plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')\n",
1351 | "plt.show()"
1352 | ]
1353 | },
1354 | {
1355 | "cell_type": "code",
1356 | "execution_count": 30,
1357 | "metadata": {},
1358 | "outputs": [
1359 | {
1360 | "data": {
1361 | "text/html": [
1362 | "\n",
1363 | "\n",
1376 | "
\n",
1377 | " \n",
1378 | " \n",
1379 | " | \n",
1380 | " ACTUAL AQI | \n",
1381 | " PREDICTED AQI | \n",
1382 | " DIFFERNCE | \n",
1383 | " PERCENTAGE ERROR | \n",
1384 | "
\n",
1385 | " \n",
1386 | " \n",
1387 | " \n",
1388 | " | 0 | \n",
1389 | " 82.0 | \n",
1390 | " 80.832990 | \n",
1391 | " 1.167010 | \n",
1392 | " 1.423183 | \n",
1393 | "
\n",
1394 | " \n",
1395 | " | 1 | \n",
1396 | " 81.0 | \n",
1397 | " 93.598285 | \n",
1398 | " -12.598285 | \n",
1399 | " 15.553439 | \n",
1400 | "
\n",
1401 | " \n",
1402 | " | 2 | \n",
1403 | " 85.0 | \n",
1404 | " 75.156039 | \n",
1405 | " 9.843961 | \n",
1406 | " 11.581131 | \n",
1407 | "
\n",
1408 | " \n",
1409 | " | 3 | \n",
1410 | " 95.0 | \n",
1411 | " 114.260451 | \n",
1412 | " -19.260451 | \n",
1413 | " 20.274159 | \n",
1414 | "
\n",
1415 | " \n",
1416 | " | 4 | \n",
1417 | " 118.0 | \n",
1418 | " 107.509410 | \n",
1419 | " 10.490590 | \n",
1420 | " 8.890331 | \n",
1421 | "
\n",
1422 | " \n",
1423 | " | 5 | \n",
1424 | " 81.0 | \n",
1425 | " 79.698389 | \n",
1426 | " 1.301611 | \n",
1427 | " 1.606927 | \n",
1428 | "
\n",
1429 | " \n",
1430 | " | 6 | \n",
1431 | " 75.0 | \n",
1432 | " 74.979144 | \n",
1433 | " 0.020856 | \n",
1434 | " 0.027808 | \n",
1435 | "
\n",
1436 | " \n",
1437 | " | 7 | \n",
1438 | " 93.0 | \n",
1439 | " 87.291481 | \n",
1440 | " 5.708519 | \n",
1441 | " 6.138193 | \n",
1442 | "
\n",
1443 | " \n",
1444 | " | 8 | \n",
1445 | " 101.0 | \n",
1446 | " 100.737800 | \n",
1447 | " 0.262200 | \n",
1448 | " 0.259604 | \n",
1449 | "
\n",
1450 | " \n",
1451 | " | 9 | \n",
1452 | " 94.0 | \n",
1453 | " 94.906258 | \n",
1454 | " -0.906258 | \n",
1455 | " 0.964104 | \n",
1456 | "
\n",
1457 | " \n",
1458 | " | 10 | \n",
1459 | " 78.0 | \n",
1460 | " 75.753040 | \n",
1461 | " 2.246960 | \n",
1462 | " 2.880717 | \n",
1463 | "
\n",
1464 | " \n",
1465 | " | 11 | \n",
1466 | " 79.0 | \n",
1467 | " 90.596251 | \n",
1468 | " -11.596251 | \n",
1469 | " 14.678798 | \n",
1470 | "
\n",
1471 | " \n",
1472 | " | 12 | \n",
1473 | " 106.0 | \n",
1474 | " 108.012470 | \n",
1475 | " -2.012470 | \n",
1476 | " 1.898556 | \n",
1477 | "
\n",
1478 | " \n",
1479 | " | 13 | \n",
1480 | " 121.0 | \n",
1481 | " 127.385603 | \n",
1482 | " -6.385603 | \n",
1483 | " 5.277358 | \n",
1484 | "
\n",
1485 | " \n",
1486 | " | 14 | \n",
1487 | " 111.0 | \n",
1488 | " 102.076964 | \n",
1489 | " 8.923036 | \n",
1490 | " 8.038772 | \n",
1491 | "
\n",
1492 | " \n",
1493 | "
\n",
1494 | "
"
1495 | ],
1496 | "text/plain": [
1497 | " ACTUAL AQI PREDICTED AQI DIFFERNCE PERCENTAGE ERROR\n",
1498 | "0 82.0 80.832990 1.167010 1.423183\n",
1499 | "1 81.0 93.598285 -12.598285 15.553439\n",
1500 | "2 85.0 75.156039 9.843961 11.581131\n",
1501 | "3 95.0 114.260451 -19.260451 20.274159\n",
1502 | "4 118.0 107.509410 10.490590 8.890331\n",
1503 | "5 81.0 79.698389 1.301611 1.606927\n",
1504 | "6 75.0 74.979144 0.020856 0.027808\n",
1505 | "7 93.0 87.291481 5.708519 6.138193\n",
1506 | "8 101.0 100.737800 0.262200 0.259604\n",
1507 | "9 94.0 94.906258 -0.906258 0.964104\n",
1508 | "10 78.0 75.753040 2.246960 2.880717\n",
1509 | "11 79.0 90.596251 -11.596251 14.678798\n",
1510 | "12 106.0 108.012470 -2.012470 1.898556\n",
1511 | "13 121.0 127.385603 -6.385603 5.277358\n",
1512 | "14 111.0 102.076964 8.923036 8.038772"
1513 | ]
1514 | },
1515 | "execution_count": 30,
1516 | "metadata": {},
1517 | "output_type": "execute_result"
1518 | }
1519 | ],
1520 | "source": [
1521 | "result = pd.DataFrame()\n",
1522 | "result[\"ACTUAL AQI\"] = y_test.flatten()\n",
1523 | "result[\"PREDICTED AQI\"] = y_prediction\n",
1524 | "result[\"DIFFERNCE\"] = result[\"ACTUAL AQI\"] - result[\"PREDICTED AQI\"]\n",
1525 | "result[\"PERCENTAGE ERROR\"] = ( abs(result[\"ACTUAL AQI\"] - result[\"PREDICTED AQI\"] ) / result[\"ACTUAL AQI\"] ) * 100\n",
1526 | "result.head(15)"
1527 | ]
1528 | },
1529 | {
1530 | "cell_type": "code",
1531 | "execution_count": null,
1532 | "metadata": {},
1533 | "outputs": [],
1534 | "source": []
1535 | }
1536 | ],
1537 | "metadata": {
1538 | "kernelspec": {
1539 | "display_name": "Python 3",
1540 | "language": "python",
1541 | "name": "python3"
1542 | },
1543 | "language_info": {
1544 | "codemirror_mode": {
1545 | "name": "ipython",
1546 | "version": 3
1547 | },
1548 | "file_extension": ".py",
1549 | "mimetype": "text/x-python",
1550 | "name": "python",
1551 | "nbconvert_exporter": "python",
1552 | "pygments_lexer": "ipython3",
1553 | "version": "3.7.3"
1554 | }
1555 | },
1556 | "nbformat": 4,
1557 | "nbformat_minor": 2
1558 | }
1559 |
--------------------------------------------------------------------------------
/SVR.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "import seaborn as sns\n",
13 | "import matplotlib.pyplot as plt\n",
14 | "\n",
15 | "import warnings\n",
16 | "warnings.filterwarnings(\"ignore\")\n",
17 | "warnings.simplefilter(action='ignore', category=FutureWarning)"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "df_x = pd.read_csv(\"train_x.csv\")\n",
27 | "df_y = pd.read_csv(\"train_y.csv\")\n",
28 | "df_x1 = pd.read_csv(\"test_x.csv\")\n",
29 | "df_y1 = pd.read_csv(\"test_y.csv\")"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 3,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "data": {
39 | "text/html": [
40 | "\n",
41 | "\n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " | \n",
58 | " Date | \n",
59 | " PM2.5 | \n",
60 | " PM10 | \n",
61 | " NO2 | \n",
62 | " NOx | \n",
63 | " CO | \n",
64 | " SO2 | \n",
65 | " O3 | \n",
66 | " temp | \n",
67 | " max_temp | \n",
68 | " min_temp | \n",
69 | " humid | \n",
70 | " visible | \n",
71 | " wind | \n",
72 | "
\n",
73 | " \n",
74 | " \n",
75 | " \n",
76 | " | 0 | \n",
77 | " 2016-01-01 | \n",
78 | " 56.40 | \n",
79 | " 95.08 | \n",
80 | " 51.73 | \n",
81 | " 34.31 | \n",
82 | " 0.69 | \n",
83 | " 7.44 | \n",
84 | " 55.96 | \n",
85 | " 20.1 | \n",
86 | " 28.4 | \n",
87 | " 15.7 | \n",
88 | " 54.0 | \n",
89 | " 6.0 | \n",
90 | " 3.0 | \n",
91 | "
\n",
92 | " \n",
93 | " | 1 | \n",
94 | " 2016-01-02 | \n",
95 | " 53.69 | \n",
96 | " 93.92 | \n",
97 | " 73.09 | \n",
98 | " 54.77 | \n",
99 | " 0.76 | \n",
100 | " 8.67 | \n",
101 | " 34.06 | \n",
102 | " 20.8 | \n",
103 | " 28.3 | \n",
104 | " 12.9 | \n",
105 | " 45.0 | \n",
106 | " 6.3 | \n",
107 | " 1.1 | \n",
108 | "
\n",
109 | " \n",
110 | " | 2 | \n",
111 | " 2016-01-03 | \n",
112 | " 62.35 | \n",
113 | " 99.34 | \n",
114 | " 77.77 | \n",
115 | " 55.60 | \n",
116 | " 0.96 | \n",
117 | " 9.10 | \n",
118 | " 47.62 | \n",
119 | " 21.4 | \n",
120 | " 29.0 | \n",
121 | " 13.7 | \n",
122 | " 45.0 | \n",
123 | " 6.3 | \n",
124 | " 0.4 | \n",
125 | "
\n",
126 | " \n",
127 | " | 3 | \n",
128 | " 2016-01-04 | \n",
129 | " 64.96 | \n",
130 | " 104.62 | \n",
131 | " 77.07 | \n",
132 | " 50.81 | \n",
133 | " 0.89 | \n",
134 | " 8.87 | \n",
135 | " 50.62 | \n",
136 | " 21.5 | \n",
137 | " 29.0 | \n",
138 | " 15.2 | \n",
139 | " 47.0 | \n",
140 | " 6.9 | \n",
141 | " 2.2 | \n",
142 | "
\n",
143 | " \n",
144 | " | 4 | \n",
145 | " 2016-01-05 | \n",
146 | " 70.15 | \n",
147 | " 105.12 | \n",
148 | " 79.43 | \n",
149 | " 56.05 | \n",
150 | " 0.85 | \n",
151 | " 9.41 | \n",
152 | " 36.94 | \n",
153 | " 20.9 | \n",
154 | " 28.4 | \n",
155 | " 14.7 | \n",
156 | " 51.0 | \n",
157 | " 6.3 | \n",
158 | " 1.1 | \n",
159 | "
\n",
160 | " \n",
161 | "
\n",
162 | "
"
163 | ],
164 | "text/plain": [
165 | " Date PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp \\\n",
166 | "0 2016-01-01 56.40 95.08 51.73 34.31 0.69 7.44 55.96 20.1 28.4 \n",
167 | "1 2016-01-02 53.69 93.92 73.09 54.77 0.76 8.67 34.06 20.8 28.3 \n",
168 | "2 2016-01-03 62.35 99.34 77.77 55.60 0.96 9.10 47.62 21.4 29.0 \n",
169 | "3 2016-01-04 64.96 104.62 77.07 50.81 0.89 8.87 50.62 21.5 29.0 \n",
170 | "4 2016-01-05 70.15 105.12 79.43 56.05 0.85 9.41 36.94 20.9 28.4 \n",
171 | "\n",
172 | " min_temp humid visible wind \n",
173 | "0 15.7 54.0 6.0 3.0 \n",
174 | "1 12.9 45.0 6.3 1.1 \n",
175 | "2 13.7 45.0 6.3 0.4 \n",
176 | "3 15.2 47.0 6.9 2.2 \n",
177 | "4 14.7 51.0 6.3 1.1 "
178 | ]
179 | },
180 | "execution_count": 3,
181 | "metadata": {},
182 | "output_type": "execute_result"
183 | }
184 | ],
185 | "source": [
186 | "df_x.head(5)"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 4,
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/html": [
197 | "\n",
198 | "\n",
211 | "
\n",
212 | " \n",
213 | " \n",
214 | " | \n",
215 | " Date | \n",
216 | " PM2.5 | \n",
217 | " PM10 | \n",
218 | " NO2 | \n",
219 | " NOx | \n",
220 | " CO | \n",
221 | " SO2 | \n",
222 | " O3 | \n",
223 | " temp | \n",
224 | " max_temp | \n",
225 | " min_temp | \n",
226 | " humid | \n",
227 | " visible | \n",
228 | " wind | \n",
229 | "
\n",
230 | " \n",
231 | " \n",
232 | " \n",
233 | " | 0 | \n",
234 | " 01-01-2020 | \n",
235 | " 30.71 | \n",
236 | " 70.80 | \n",
237 | " 27.31 | \n",
238 | " 26.74 | \n",
239 | " 1.07 | \n",
240 | " 8.56 | \n",
241 | " 37.39 | \n",
242 | " 22.0 | \n",
243 | " 27.2 | \n",
244 | " 19.2 | \n",
245 | " 75 | \n",
246 | " 6.9 | \n",
247 | " 3.3 | \n",
248 | "
\n",
249 | " \n",
250 | " | 1 | \n",
251 | " 02-01-2020 | \n",
252 | " 25.78 | \n",
253 | " 62.73 | \n",
254 | " 28.05 | \n",
255 | " 30.18 | \n",
256 | " 1.16 | \n",
257 | " 7.52 | \n",
258 | " 33.11 | \n",
259 | " 22.2 | \n",
260 | " 26.0 | \n",
261 | " 19.4 | \n",
262 | " 77 | \n",
263 | " 5.5 | \n",
264 | " 2.0 | \n",
265 | "
\n",
266 | " \n",
267 | " | 2 | \n",
268 | " 03-01-2020 | \n",
269 | " 29.66 | \n",
270 | " 69.62 | \n",
271 | " 26.26 | \n",
272 | " 26.92 | \n",
273 | " 0.99 | \n",
274 | " 6.40 | \n",
275 | " 32.98 | \n",
276 | " 23.4 | \n",
277 | " 28.4 | \n",
278 | " 19.4 | \n",
279 | " 68 | \n",
280 | " 6.9 | \n",
281 | " 3.5 | \n",
282 | "
\n",
283 | " \n",
284 | " | 3 | \n",
285 | " 04-01-2020 | \n",
286 | " 52.62 | \n",
287 | " 106.81 | \n",
288 | " 31.56 | \n",
289 | " 41.89 | \n",
290 | " 1.39 | \n",
291 | " 6.94 | \n",
292 | " 37.35 | \n",
293 | " 23.6 | \n",
294 | " 30.5 | \n",
295 | " 19.0 | \n",
296 | " 63 | \n",
297 | " 6.3 | \n",
298 | " 1.3 | \n",
299 | "
\n",
300 | " \n",
301 | " | 4 | \n",
302 | " 05-01-2020 | \n",
303 | " 52.64 | \n",
304 | " 97.94 | \n",
305 | " 29.10 | \n",
306 | " 29.52 | \n",
307 | " 1.01 | \n",
308 | " 7.01 | \n",
309 | " 45.10 | \n",
310 | " 24.2 | \n",
311 | " 30.5 | \n",
312 | " 20.4 | \n",
313 | " 63 | \n",
314 | " 6.3 | \n",
315 | " 1.3 | \n",
316 | "
\n",
317 | " \n",
318 | "
\n",
319 | "
"
320 | ],
321 | "text/plain": [
322 | " Date PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp \\\n",
323 | "0 01-01-2020 30.71 70.80 27.31 26.74 1.07 8.56 37.39 22.0 27.2 \n",
324 | "1 02-01-2020 25.78 62.73 28.05 30.18 1.16 7.52 33.11 22.2 26.0 \n",
325 | "2 03-01-2020 29.66 69.62 26.26 26.92 0.99 6.40 32.98 23.4 28.4 \n",
326 | "3 04-01-2020 52.62 106.81 31.56 41.89 1.39 6.94 37.35 23.6 30.5 \n",
327 | "4 05-01-2020 52.64 97.94 29.10 29.52 1.01 7.01 45.10 24.2 30.5 \n",
328 | "\n",
329 | " min_temp humid visible wind \n",
330 | "0 19.2 75 6.9 3.3 \n",
331 | "1 19.4 77 5.5 2.0 \n",
332 | "2 19.4 68 6.9 3.5 \n",
333 | "3 19.0 63 6.3 1.3 \n",
334 | "4 20.4 63 6.3 1.3 "
335 | ]
336 | },
337 | "execution_count": 4,
338 | "metadata": {},
339 | "output_type": "execute_result"
340 | }
341 | ],
342 | "source": [
343 | "df_x1.head(5)"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": 5,
349 | "metadata": {},
350 | "outputs": [
351 | {
352 | "data": {
353 | "text/html": [
354 | "\n",
355 | "\n",
368 | "
\n",
369 | " \n",
370 | " \n",
371 | " | \n",
372 | " Date | \n",
373 | " AQI | \n",
374 | "
\n",
375 | " \n",
376 | " \n",
377 | " \n",
378 | " | 0 | \n",
379 | " 2016-01-01 | \n",
380 | " 101.0 | \n",
381 | "
\n",
382 | " \n",
383 | " | 1 | \n",
384 | " 2016-01-02 | \n",
385 | " 92.0 | \n",
386 | "
\n",
387 | " \n",
388 | " | 2 | \n",
389 | " 2016-01-03 | \n",
390 | " 108.0 | \n",
391 | "
\n",
392 | " \n",
393 | " | 3 | \n",
394 | " 2016-01-04 | \n",
395 | " 109.0 | \n",
396 | "
\n",
397 | " \n",
398 | " | 4 | \n",
399 | " 2016-01-05 | \n",
400 | " 114.0 | \n",
401 | "
\n",
402 | " \n",
403 | "
\n",
404 | "
"
405 | ],
406 | "text/plain": [
407 | " Date AQI\n",
408 | "0 2016-01-01 101.0\n",
409 | "1 2016-01-02 92.0\n",
410 | "2 2016-01-03 108.0\n",
411 | "3 2016-01-04 109.0\n",
412 | "4 2016-01-05 114.0"
413 | ]
414 | },
415 | "execution_count": 5,
416 | "metadata": {},
417 | "output_type": "execute_result"
418 | }
419 | ],
420 | "source": [
421 | "df_y.head(5)"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": 6,
427 | "metadata": {},
428 | "outputs": [
429 | {
430 | "data": {
431 | "text/html": [
432 | "\n",
433 | "\n",
446 | "
\n",
447 | " \n",
448 | " \n",
449 | " | \n",
450 | " Date | \n",
451 | " AQI | \n",
452 | "
\n",
453 | " \n",
454 | " \n",
455 | " \n",
456 | " | 0 | \n",
457 | " 2020-01-01 | \n",
458 | " 82.0 | \n",
459 | "
\n",
460 | " \n",
461 | " | 1 | \n",
462 | " 2020-01-02 | \n",
463 | " 81.0 | \n",
464 | "
\n",
465 | " \n",
466 | " | 2 | \n",
467 | " 2020-01-03 | \n",
468 | " 85.0 | \n",
469 | "
\n",
470 | " \n",
471 | " | 3 | \n",
472 | " 2020-01-04 | \n",
473 | " 95.0 | \n",
474 | "
\n",
475 | " \n",
476 | " | 4 | \n",
477 | " 2020-01-05 | \n",
478 | " 118.0 | \n",
479 | "
\n",
480 | " \n",
481 | "
\n",
482 | "
"
483 | ],
484 | "text/plain": [
485 | " Date AQI\n",
486 | "0 2020-01-01 82.0\n",
487 | "1 2020-01-02 81.0\n",
488 | "2 2020-01-03 85.0\n",
489 | "3 2020-01-04 95.0\n",
490 | "4 2020-01-05 118.0"
491 | ]
492 | },
493 | "execution_count": 6,
494 | "metadata": {},
495 | "output_type": "execute_result"
496 | }
497 | ],
498 | "source": [
499 | "df_y1.head(5)"
500 | ]
501 | },
502 | {
503 | "cell_type": "code",
504 | "execution_count": 7,
505 | "metadata": {},
506 | "outputs": [
507 | {
508 | "name": "stdout",
509 | "output_type": "stream",
510 | "text": [
511 | "\n",
512 | "RangeIndex: 1441 entries, 0 to 1440\n",
513 | "Data columns (total 14 columns):\n",
514 | "Date 1441 non-null object\n",
515 | "PM2.5 1441 non-null float64\n",
516 | "PM10 1441 non-null float64\n",
517 | "NO2 1441 non-null float64\n",
518 | "NOx 1441 non-null float64\n",
519 | "CO 1441 non-null float64\n",
520 | "SO2 1441 non-null float64\n",
521 | "O3 1441 non-null float64\n",
522 | "temp 1441 non-null float64\n",
523 | "max_temp 1441 non-null float64\n",
524 | "min_temp 1441 non-null float64\n",
525 | "humid 1441 non-null float64\n",
526 | "visible 1441 non-null float64\n",
527 | "wind 1441 non-null float64\n",
528 | "dtypes: float64(13), object(1)\n",
529 | "memory usage: 157.7+ KB\n"
530 | ]
531 | }
532 | ],
533 | "source": [
534 | "df_x.info()"
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "execution_count": 8,
540 | "metadata": {},
541 | "outputs": [
542 | {
543 | "name": "stdout",
544 | "output_type": "stream",
545 | "text": [
546 | "\n",
547 | "RangeIndex: 1441 entries, 0 to 1440\n",
548 | "Data columns (total 2 columns):\n",
549 | "Date 1441 non-null object\n",
550 | "AQI 1441 non-null float64\n",
551 | "dtypes: float64(1), object(1)\n",
552 | "memory usage: 22.6+ KB\n"
553 | ]
554 | }
555 | ],
556 | "source": [
557 | "df_y.info()"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": 9,
563 | "metadata": {},
564 | "outputs": [
565 | {
566 | "name": "stdout",
567 | "output_type": "stream",
568 | "text": [
569 | "\n",
570 | "RangeIndex: 182 entries, 0 to 181\n",
571 | "Data columns (total 14 columns):\n",
572 | "Date 182 non-null object\n",
573 | "PM2.5 182 non-null float64\n",
574 | "PM10 182 non-null float64\n",
575 | "NO2 182 non-null float64\n",
576 | "NOx 182 non-null float64\n",
577 | "CO 182 non-null float64\n",
578 | "SO2 182 non-null float64\n",
579 | "O3 182 non-null float64\n",
580 | "temp 182 non-null float64\n",
581 | "max_temp 182 non-null float64\n",
582 | "min_temp 182 non-null float64\n",
583 | "humid 182 non-null int64\n",
584 | "visible 182 non-null float64\n",
585 | "wind 182 non-null float64\n",
586 | "dtypes: float64(12), int64(1), object(1)\n",
587 | "memory usage: 20.0+ KB\n"
588 | ]
589 | }
590 | ],
591 | "source": [
592 | "df_x1.info()"
593 | ]
594 | },
595 | {
596 | "cell_type": "code",
597 | "execution_count": 10,
598 | "metadata": {},
599 | "outputs": [
600 | {
601 | "name": "stdout",
602 | "output_type": "stream",
603 | "text": [
604 | "\n",
605 | "RangeIndex: 182 entries, 0 to 181\n",
606 | "Data columns (total 2 columns):\n",
607 | "Date 182 non-null object\n",
608 | "AQI 182 non-null float64\n",
609 | "dtypes: float64(1), object(1)\n",
610 | "memory usage: 2.9+ KB\n"
611 | ]
612 | }
613 | ],
614 | "source": [
615 | "df_y1.info()"
616 | ]
617 | },
618 | {
619 | "cell_type": "code",
620 | "execution_count": 11,
621 | "metadata": {},
622 | "outputs": [],
623 | "source": [
624 | "df_x[\"Date\"] = pd.to_datetime(df_x[\"Date\"])\n",
625 | "df_x1[\"Date\"] = pd.to_datetime(df_x[\"Date\"])"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": 12,
631 | "metadata": {},
632 | "outputs": [
633 | {
634 | "name": "stdout",
635 | "output_type": "stream",
636 | "text": [
637 | "\n",
638 | "RangeIndex: 1441 entries, 0 to 1440\n",
639 | "Data columns (total 14 columns):\n",
640 | "Date 1441 non-null datetime64[ns]\n",
641 | "PM2.5 1441 non-null float64\n",
642 | "PM10 1441 non-null float64\n",
643 | "NO2 1441 non-null float64\n",
644 | "NOx 1441 non-null float64\n",
645 | "CO 1441 non-null float64\n",
646 | "SO2 1441 non-null float64\n",
647 | "O3 1441 non-null float64\n",
648 | "temp 1441 non-null float64\n",
649 | "max_temp 1441 non-null float64\n",
650 | "min_temp 1441 non-null float64\n",
651 | "humid 1441 non-null float64\n",
652 | "visible 1441 non-null float64\n",
653 | "wind 1441 non-null float64\n",
654 | "dtypes: datetime64[ns](1), float64(13)\n",
655 | "memory usage: 157.7 KB\n"
656 | ]
657 | }
658 | ],
659 | "source": [
660 | "df_x.info()"
661 | ]
662 | },
663 | {
664 | "cell_type": "code",
665 | "execution_count": 13,
666 | "metadata": {},
667 | "outputs": [
668 | {
669 | "name": "stdout",
670 | "output_type": "stream",
671 | "text": [
672 | "\n",
673 | "RangeIndex: 182 entries, 0 to 181\n",
674 | "Data columns (total 14 columns):\n",
675 | "Date 182 non-null datetime64[ns]\n",
676 | "PM2.5 182 non-null float64\n",
677 | "PM10 182 non-null float64\n",
678 | "NO2 182 non-null float64\n",
679 | "NOx 182 non-null float64\n",
680 | "CO 182 non-null float64\n",
681 | "SO2 182 non-null float64\n",
682 | "O3 182 non-null float64\n",
683 | "temp 182 non-null float64\n",
684 | "max_temp 182 non-null float64\n",
685 | "min_temp 182 non-null float64\n",
686 | "humid 182 non-null int64\n",
687 | "visible 182 non-null float64\n",
688 | "wind 182 non-null float64\n",
689 | "dtypes: datetime64[ns](1), float64(12), int64(1)\n",
690 | "memory usage: 20.0 KB\n"
691 | ]
692 | }
693 | ],
694 | "source": [
695 | "df_x1.info()"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": 14,
701 | "metadata": {},
702 | "outputs": [
703 | {
704 | "data": {
705 | "text/plain": [
706 | ""
707 | ]
708 | },
709 | "execution_count": 14,
710 | "metadata": {},
711 | "output_type": "execute_result"
712 | },
713 | {
714 | "data": {
715 | "image/png": "\n",
716 | "text/plain": [
717 | ""
718 | ]
719 | },
720 | "metadata": {
721 | "needs_background": "light"
722 | },
723 | "output_type": "display_data"
724 | }
725 | ],
726 | "source": [
727 | "sns.heatmap(df_x.isnull(),cbar=False, yticklabels=False)"
728 | ]
729 | },
730 | {
731 | "cell_type": "code",
732 | "execution_count": 15,
733 | "metadata": {},
734 | "outputs": [
735 | {
736 | "data": {
737 | "text/plain": [
738 | ""
739 | ]
740 | },
741 | "execution_count": 15,
742 | "metadata": {},
743 | "output_type": "execute_result"
744 | },
745 | {
746 | "data": {
747 | "image/png": "\n",
748 | "text/plain": [
749 | ""
750 | ]
751 | },
752 | "metadata": {
753 | "needs_background": "light"
754 | },
755 | "output_type": "display_data"
756 | }
757 | ],
758 | "source": [
759 | "sns.heatmap(df_x1.isnull(),cbar=False, yticklabels=False)"
760 | ]
761 | },
762 | {
763 | "cell_type": "code",
764 | "execution_count": 16,
765 | "metadata": {},
766 | "outputs": [],
767 | "source": [
768 | "x_train = df_x.drop(['Date'], axis = 1)\n",
769 | "y_train = df_y[\"AQI\"]\n",
770 | "x_test = df_x1.drop(['Date'], axis = 1)\n",
771 | "y_test = df_y1[\"AQI\"]"
772 | ]
773 | },
774 | {
775 | "cell_type": "code",
776 | "execution_count": 17,
777 | "metadata": {},
778 | "outputs": [
779 | {
780 | "data": {
781 | "text/html": [
782 | "\n",
783 | "\n",
796 | "
\n",
797 | " \n",
798 | " \n",
799 | " | \n",
800 | " PM2.5 | \n",
801 | " PM10 | \n",
802 | " NO2 | \n",
803 | " NOx | \n",
804 | " CO | \n",
805 | " SO2 | \n",
806 | " O3 | \n",
807 | " temp | \n",
808 | " max_temp | \n",
809 | " min_temp | \n",
810 | " humid | \n",
811 | " visible | \n",
812 | " wind | \n",
813 | "
\n",
814 | " \n",
815 | " \n",
816 | " \n",
817 | " | 0 | \n",
818 | " 56.40 | \n",
819 | " 95.08 | \n",
820 | " 51.73 | \n",
821 | " 34.31 | \n",
822 | " 0.69 | \n",
823 | " 7.44 | \n",
824 | " 55.96 | \n",
825 | " 20.1 | \n",
826 | " 28.4 | \n",
827 | " 15.7 | \n",
828 | " 54.0 | \n",
829 | " 6.0 | \n",
830 | " 3.0 | \n",
831 | "
\n",
832 | " \n",
833 | " | 1 | \n",
834 | " 53.69 | \n",
835 | " 93.92 | \n",
836 | " 73.09 | \n",
837 | " 54.77 | \n",
838 | " 0.76 | \n",
839 | " 8.67 | \n",
840 | " 34.06 | \n",
841 | " 20.8 | \n",
842 | " 28.3 | \n",
843 | " 12.9 | \n",
844 | " 45.0 | \n",
845 | " 6.3 | \n",
846 | " 1.1 | \n",
847 | "
\n",
848 | " \n",
849 | " | 2 | \n",
850 | " 62.35 | \n",
851 | " 99.34 | \n",
852 | " 77.77 | \n",
853 | " 55.60 | \n",
854 | " 0.96 | \n",
855 | " 9.10 | \n",
856 | " 47.62 | \n",
857 | " 21.4 | \n",
858 | " 29.0 | \n",
859 | " 13.7 | \n",
860 | " 45.0 | \n",
861 | " 6.3 | \n",
862 | " 0.4 | \n",
863 | "
\n",
864 | " \n",
865 | " | 3 | \n",
866 | " 64.96 | \n",
867 | " 104.62 | \n",
868 | " 77.07 | \n",
869 | " 50.81 | \n",
870 | " 0.89 | \n",
871 | " 8.87 | \n",
872 | " 50.62 | \n",
873 | " 21.5 | \n",
874 | " 29.0 | \n",
875 | " 15.2 | \n",
876 | " 47.0 | \n",
877 | " 6.9 | \n",
878 | " 2.2 | \n",
879 | "
\n",
880 | " \n",
881 | " | 4 | \n",
882 | " 70.15 | \n",
883 | " 105.12 | \n",
884 | " 79.43 | \n",
885 | " 56.05 | \n",
886 | " 0.85 | \n",
887 | " 9.41 | \n",
888 | " 36.94 | \n",
889 | " 20.9 | \n",
890 | " 28.4 | \n",
891 | " 14.7 | \n",
892 | " 51.0 | \n",
893 | " 6.3 | \n",
894 | " 1.1 | \n",
895 | "
\n",
896 | " \n",
897 | "
\n",
898 | "
"
899 | ],
900 | "text/plain": [
901 | " PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp min_temp \\\n",
902 | "0 56.40 95.08 51.73 34.31 0.69 7.44 55.96 20.1 28.4 15.7 \n",
903 | "1 53.69 93.92 73.09 54.77 0.76 8.67 34.06 20.8 28.3 12.9 \n",
904 | "2 62.35 99.34 77.77 55.60 0.96 9.10 47.62 21.4 29.0 13.7 \n",
905 | "3 64.96 104.62 77.07 50.81 0.89 8.87 50.62 21.5 29.0 15.2 \n",
906 | "4 70.15 105.12 79.43 56.05 0.85 9.41 36.94 20.9 28.4 14.7 \n",
907 | "\n",
908 | " humid visible wind \n",
909 | "0 54.0 6.0 3.0 \n",
910 | "1 45.0 6.3 1.1 \n",
911 | "2 45.0 6.3 0.4 \n",
912 | "3 47.0 6.9 2.2 \n",
913 | "4 51.0 6.3 1.1 "
914 | ]
915 | },
916 | "execution_count": 17,
917 | "metadata": {},
918 | "output_type": "execute_result"
919 | }
920 | ],
921 | "source": [
922 | "x_train.head(5)"
923 | ]
924 | },
925 | {
926 | "cell_type": "code",
927 | "execution_count": 18,
928 | "metadata": {},
929 | "outputs": [
930 | {
931 | "data": {
932 | "text/html": [
933 | "\n",
934 | "\n",
947 | "
\n",
948 | " \n",
949 | " \n",
950 | " | \n",
951 | " PM2.5 | \n",
952 | " PM10 | \n",
953 | " NO2 | \n",
954 | " NOx | \n",
955 | " CO | \n",
956 | " SO2 | \n",
957 | " O3 | \n",
958 | " temp | \n",
959 | " max_temp | \n",
960 | " min_temp | \n",
961 | " humid | \n",
962 | " visible | \n",
963 | " wind | \n",
964 | "
\n",
965 | " \n",
966 | " \n",
967 | " \n",
968 | " | 0 | \n",
969 | " 30.71 | \n",
970 | " 70.80 | \n",
971 | " 27.31 | \n",
972 | " 26.74 | \n",
973 | " 1.07 | \n",
974 | " 8.56 | \n",
975 | " 37.39 | \n",
976 | " 22.0 | \n",
977 | " 27.2 | \n",
978 | " 19.2 | \n",
979 | " 75 | \n",
980 | " 6.9 | \n",
981 | " 3.3 | \n",
982 | "
\n",
983 | " \n",
984 | " | 1 | \n",
985 | " 25.78 | \n",
986 | " 62.73 | \n",
987 | " 28.05 | \n",
988 | " 30.18 | \n",
989 | " 1.16 | \n",
990 | " 7.52 | \n",
991 | " 33.11 | \n",
992 | " 22.2 | \n",
993 | " 26.0 | \n",
994 | " 19.4 | \n",
995 | " 77 | \n",
996 | " 5.5 | \n",
997 | " 2.0 | \n",
998 | "
\n",
999 | " \n",
1000 | " | 2 | \n",
1001 | " 29.66 | \n",
1002 | " 69.62 | \n",
1003 | " 26.26 | \n",
1004 | " 26.92 | \n",
1005 | " 0.99 | \n",
1006 | " 6.40 | \n",
1007 | " 32.98 | \n",
1008 | " 23.4 | \n",
1009 | " 28.4 | \n",
1010 | " 19.4 | \n",
1011 | " 68 | \n",
1012 | " 6.9 | \n",
1013 | " 3.5 | \n",
1014 | "
\n",
1015 | " \n",
1016 | " | 3 | \n",
1017 | " 52.62 | \n",
1018 | " 106.81 | \n",
1019 | " 31.56 | \n",
1020 | " 41.89 | \n",
1021 | " 1.39 | \n",
1022 | " 6.94 | \n",
1023 | " 37.35 | \n",
1024 | " 23.6 | \n",
1025 | " 30.5 | \n",
1026 | " 19.0 | \n",
1027 | " 63 | \n",
1028 | " 6.3 | \n",
1029 | " 1.3 | \n",
1030 | "
\n",
1031 | " \n",
1032 | " | 4 | \n",
1033 | " 52.64 | \n",
1034 | " 97.94 | \n",
1035 | " 29.10 | \n",
1036 | " 29.52 | \n",
1037 | " 1.01 | \n",
1038 | " 7.01 | \n",
1039 | " 45.10 | \n",
1040 | " 24.2 | \n",
1041 | " 30.5 | \n",
1042 | " 20.4 | \n",
1043 | " 63 | \n",
1044 | " 6.3 | \n",
1045 | " 1.3 | \n",
1046 | "
\n",
1047 | " \n",
1048 | "
\n",
1049 | "
"
1050 | ],
1051 | "text/plain": [
1052 | " PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp min_temp \\\n",
1053 | "0 30.71 70.80 27.31 26.74 1.07 8.56 37.39 22.0 27.2 19.2 \n",
1054 | "1 25.78 62.73 28.05 30.18 1.16 7.52 33.11 22.2 26.0 19.4 \n",
1055 | "2 29.66 69.62 26.26 26.92 0.99 6.40 32.98 23.4 28.4 19.4 \n",
1056 | "3 52.62 106.81 31.56 41.89 1.39 6.94 37.35 23.6 30.5 19.0 \n",
1057 | "4 52.64 97.94 29.10 29.52 1.01 7.01 45.10 24.2 30.5 20.4 \n",
1058 | "\n",
1059 | " humid visible wind \n",
1060 | "0 75 6.9 3.3 \n",
1061 | "1 77 5.5 2.0 \n",
1062 | "2 68 6.9 3.5 \n",
1063 | "3 63 6.3 1.3 \n",
1064 | "4 63 6.3 1.3 "
1065 | ]
1066 | },
1067 | "execution_count": 18,
1068 | "metadata": {},
1069 | "output_type": "execute_result"
1070 | }
1071 | ],
1072 | "source": [
1073 | "x_test.head(5)"
1074 | ]
1075 | },
1076 | {
1077 | "cell_type": "code",
1078 | "execution_count": 19,
1079 | "metadata": {},
1080 | "outputs": [
1081 | {
1082 | "data": {
1083 | "text/plain": [
1084 | "0 101.0\n",
1085 | "1 92.0\n",
1086 | "2 108.0\n",
1087 | "3 109.0\n",
1088 | "4 114.0\n",
1089 | "Name: AQI, dtype: float64"
1090 | ]
1091 | },
1092 | "execution_count": 19,
1093 | "metadata": {},
1094 | "output_type": "execute_result"
1095 | }
1096 | ],
1097 | "source": [
1098 | "y_train.head(5)"
1099 | ]
1100 | },
1101 | {
1102 | "cell_type": "code",
1103 | "execution_count": 20,
1104 | "metadata": {},
1105 | "outputs": [
1106 | {
1107 | "data": {
1108 | "text/plain": [
1109 | "0 82.0\n",
1110 | "1 81.0\n",
1111 | "2 85.0\n",
1112 | "3 95.0\n",
1113 | "4 118.0\n",
1114 | "Name: AQI, dtype: float64"
1115 | ]
1116 | },
1117 | "execution_count": 20,
1118 | "metadata": {},
1119 | "output_type": "execute_result"
1120 | }
1121 | ],
1122 | "source": [
1123 | "y_test.head(5)"
1124 | ]
1125 | },
1126 | {
1127 | "cell_type": "code",
1128 | "execution_count": 21,
1129 | "metadata": {},
1130 | "outputs": [],
1131 | "source": [
1132 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
1133 | "sc_x = StandardScaler()\n",
1134 | "sc_y = StandardScaler()\n",
1135 | "x_train = sc_x.fit_transform(x_train)\n",
1136 | "x_test = sc_x.transform(x_test)\n",
1137 | "y_train = sc_y.fit_transform(np.array(y_train).reshape(-1,1))\n",
1138 | "y_test = sc_y.transform(np.array(y_test).reshape(-1,1))"
1139 | ]
1140 | },
1141 | {
1142 | "cell_type": "code",
1143 | "execution_count": 22,
1144 | "metadata": {},
1145 | "outputs": [
1146 | {
1147 | "data": {
1148 | "text/plain": [
1149 | "array([[-0.27610501],\n",
1150 | " [-0.30227705],\n",
1151 | " [-0.1975889 ],\n",
1152 | " [ 0.06413148],\n",
1153 | " [ 0.66608835]])"
1154 | ]
1155 | },
1156 | "execution_count": 22,
1157 | "metadata": {},
1158 | "output_type": "execute_result"
1159 | }
1160 | ],
1161 | "source": [
1162 | "y_test[:5]"
1163 | ]
1164 | },
1165 | {
1166 | "cell_type": "code",
1167 | "execution_count": 23,
1168 | "metadata": {},
1169 | "outputs": [
1170 | {
1171 | "data": {
1172 | "text/plain": [
1173 | "SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,\n",
1174 | " gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,\n",
1175 | " tol=0.001, verbose=False)"
1176 | ]
1177 | },
1178 | "execution_count": 23,
1179 | "metadata": {},
1180 | "output_type": "execute_result"
1181 | }
1182 | ],
1183 | "source": [
1184 | "from sklearn.svm import SVR\n",
1185 | "regressor = SVR(kernel = 'rbf')\n",
1186 | "regressor.fit(x_train,y_train)"
1187 | ]
1188 | },
1189 | {
1190 | "cell_type": "code",
1191 | "execution_count": 24,
1192 | "metadata": {},
1193 | "outputs": [
1194 | {
1195 | "data": {
1196 | "text/plain": [
1197 | "array([-0.42595944, -0.32311139, -0.37435607, 0.56882821, 0.35833741,\n",
1198 | " -0.4289785 , -0.57286549, -0.26249298, 0.01577009, -0.11099087,\n",
1199 | " -0.32003543, -0.21896574, 0.40781822, 0.70554794, 0.22467137,\n",
1200 | " 0.26272955, 0.05300658, -0.43237994, -0.50516794, -0.15245204,\n",
1201 | " -0.16191589, -0.16050723, -0.09468641, 0.01183634, 0.24495599,\n",
1202 | " 0.43675849, 0.50363451, 0.22383874, 0.36682982, 0.68494588,\n",
1203 | " 0.37600183, 0.16605308, -0.30409398, -0.25732645, -0.17879819,\n",
1204 | " -0.28231346, 0.22057641, 0.25122338, 0.13562446, 0.16869742,\n",
1205 | " -0.27649917, -0.19226034, -0.34176244, -0.07255308, 0.79432799,\n",
1206 | " 0.99324748, 0.03093437, 0.16210712, 0.31583795, 0.47365842,\n",
1207 | " 0.11191249, -0.22742264, -0.26317195, 0.01138717, -0.16583801,\n",
1208 | " 0.34501398, 0.20441671, 0.14925122, 0.16647227, 0.23584846,\n",
1209 | " 0.44561196, 0.74555276, 1.77403237, 0.98925211, 0.6941801 ,\n",
1210 | " 0.04527876, 0.08692571, -0.15628333, -0.03071387, 0.15375035,\n",
1211 | " 0.03870278, 0.00487757, -0.18599819, -0.30738972, -0.12022113,\n",
1212 | " -0.37615108, -0.56692846, -0.31514682, 0.19595979, 0.30663164,\n",
1213 | " 0.4771285 , -0.28227281, -0.05911738, 0.04322423, -0.68263281,\n",
1214 | " -0.73819769, -0.50290478, -0.46696796, -0.38069815, -0.36181173,\n",
1215 | " -0.20645508, -0.36911442, -0.27281806, -0.22542369, -0.37897064,\n",
1216 | " -0.38816626, -0.72536073, -0.98441423, -0.7442325 , -0.71863761,\n",
1217 | " -0.72144794, -0.74704061, -0.58227917, -0.50226986, -0.33404258,\n",
1218 | " -0.48810957, -0.6112883 , -0.42955452, -0.65229843, -0.87307501,\n",
1219 | " -1.00287225, -0.82285002, -0.25709449, -0.75710873, -0.60362084,\n",
1220 | " -0.26031897, -0.91369875, -0.95760632, -0.81297347, -0.14054648,\n",
1221 | " -0.7561288 , -0.61221255, -0.69518032, -0.60207119, -0.37488619,\n",
1222 | " -0.20918184, -0.54138463, -0.49764821, -0.84623359, -0.73307276,\n",
1223 | " -0.60311798, -0.65324264, -0.86782537, -0.94903969, -0.59868767,\n",
1224 | " -0.80292309, -0.57618004, -0.85284508, -0.63479565, -0.51516231,\n",
1225 | " -0.51806576, -0.91674212, -0.68377782, -0.57208282, -0.84884395,\n",
1226 | " -0.80278251, -0.75762862, -0.58761396, -0.79499592, -0.81003283,\n",
1227 | " -0.87723899, -0.83168805, -1.13971526, -1.3723071 , -1.24599784,\n",
1228 | " -1.21094141, -0.90016809, -1.01965268, -1.02627238, -1.06362168,\n",
1229 | " -1.0106033 , -0.74411236, -0.97876786, -1.04877468, -0.96290783,\n",
1230 | " -1.11445661, -1.01896601, -0.86198109, -0.94589339, -0.78238535,\n",
1231 | " -0.85986483, -0.96586506, -1.06933763, -0.99446313, -0.97199095,\n",
1232 | " -0.98809041, -0.86366581, -0.9923654 , -1.1113921 , -1.06807415,\n",
1233 | " -0.08591312, -1.22178744])"
1234 | ]
1235 | },
1236 | "execution_count": 24,
1237 | "metadata": {},
1238 | "output_type": "execute_result"
1239 | }
1240 | ],
1241 | "source": [
1242 | "y_prediction = regressor.predict(x_test)\n",
1243 | "y_prediction"
1244 | ]
1245 | },
1246 | {
1247 | "cell_type": "code",
1248 | "execution_count": 25,
1249 | "metadata": {},
1250 | "outputs": [
1251 | {
1252 | "name": "stdout",
1253 | "output_type": "stream",
1254 | "text": [
1255 | "R2 SCORE is 0.8063001007104242\n",
1256 | "mean_sqrd_error is 0.04933772059187857\n",
1257 | "Root mean squared error of is 0.22212095937096654\n",
1258 | "Mean Absolute error is 0.16675010409400015\n"
1259 | ]
1260 | }
1261 | ],
1262 | "source": [
1263 | "from sklearn.metrics import r2_score\n",
1264 | "from sklearn.metrics import mean_squared_error\n",
1265 | "from sklearn.metrics import mean_absolute_error\n",
1266 | "score = r2_score(y_test,y_prediction)\n",
1267 | "mean_error = mean_squared_error(y_test,y_prediction)\n",
1268 | "mae = mean_absolute_error(y_test,y_prediction)\n",
1269 | "print(\"R2 SCORE is\", score)\n",
1270 | "print(\"mean_sqrd_error is \", mean_error)\n",
1271 | "print(\"Root mean squared error of is\",np.sqrt(mean_error))\n",
1272 | "print(\"Mean Absolute error is\", mae)"
1273 | ]
1274 | },
1275 | {
1276 | "cell_type": "code",
1277 | "execution_count": 26,
1278 | "metadata": {},
1279 | "outputs": [
1280 | {
1281 | "data": {
1282 | "text/plain": [
1283 | "array([[ 82.],\n",
1284 | " [ 81.],\n",
1285 | " [ 85.],\n",
1286 | " [ 95.],\n",
1287 | " [118.]])"
1288 | ]
1289 | },
1290 | "execution_count": 26,
1291 | "metadata": {},
1292 | "output_type": "execute_result"
1293 | }
1294 | ],
1295 | "source": [
1296 | "y_test = sc_y.inverse_transform(y_test)\n",
1297 | "y_test[:5]"
1298 | ]
1299 | },
1300 | {
1301 | "cell_type": "code",
1302 | "execution_count": 27,
1303 | "metadata": {},
1304 | "outputs": [
1305 | {
1306 | "data": {
1307 | "text/plain": [
1308 | "array([ 76.27425416, 80.20394657, 78.24595278, 114.28381494,\n",
1309 | " 106.24123124])"
1310 | ]
1311 | },
1312 | "execution_count": 27,
1313 | "metadata": {},
1314 | "output_type": "execute_result"
1315 | }
1316 | ],
1317 | "source": [
1318 | "y_prediction = sc_y.inverse_transform(y_prediction)\n",
1319 | "y_prediction[:5]"
1320 | ]
1321 | },
1322 | {
1323 | "cell_type": "code",
1324 | "execution_count": 28,
1325 | "metadata": {},
1326 | "outputs": [
1327 | {
1328 | "data": {
1329 | "image/png": "\n",
1330 | "text/plain": [
1331 | ""
1332 | ]
1333 | },
1334 | "metadata": {
1335 | "needs_background": "light"
1336 | },
1337 | "output_type": "display_data"
1338 | }
1339 | ],
1340 | "source": [
1341 | "df1 = pd.DataFrame({'Actual': y_test[100:120].flatten(), 'Predicted': y_prediction[100:120].flatten()})\n",
1342 | "df1.plot(kind='line')\n",
1343 | "plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')\n",
1344 | "plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')\n",
1345 | "plt.show()"
1346 | ]
1347 | },
1348 | {
1349 | "cell_type": "code",
1350 | "execution_count": 29,
1351 | "metadata": {},
1352 | "outputs": [
1353 | {
1354 | "data": {
1355 | "text/html": [
1356 | "\n",
1357 | "\n",
1370 | "
\n",
1371 | " \n",
1372 | " \n",
1373 | " | \n",
1374 | " ACTUAL AQI | \n",
1375 | " PREDICTED AQI | \n",
1376 | " DIFFERENCE | \n",
1377 | " PERCENTAGE ERROR | \n",
1378 | "
\n",
1379 | " \n",
1380 | " \n",
1381 | " \n",
1382 | " | 0 | \n",
1383 | " 82.0 | \n",
1384 | " 76.274254 | \n",
1385 | " 5.725746 | \n",
1386 | " 6.982617 | \n",
1387 | "
\n",
1388 | " \n",
1389 | " | 1 | \n",
1390 | " 81.0 | \n",
1391 | " 80.203947 | \n",
1392 | " 0.796053 | \n",
1393 | " 0.982782 | \n",
1394 | "
\n",
1395 | " \n",
1396 | " | 2 | \n",
1397 | " 85.0 | \n",
1398 | " 78.245953 | \n",
1399 | " 6.754047 | \n",
1400 | " 7.945938 | \n",
1401 | "
\n",
1402 | " \n",
1403 | " | 3 | \n",
1404 | " 95.0 | \n",
1405 | " 114.283815 | \n",
1406 | " -19.283815 | \n",
1407 | " 20.298753 | \n",
1408 | "
\n",
1409 | " \n",
1410 | " | 4 | \n",
1411 | " 118.0 | \n",
1412 | " 106.241231 | \n",
1413 | " 11.758769 | \n",
1414 | " 9.965058 | \n",
1415 | "
\n",
1416 | " \n",
1417 | " | 5 | \n",
1418 | " 81.0 | \n",
1419 | " 76.158900 | \n",
1420 | " 4.841100 | \n",
1421 | " 5.976667 | \n",
1422 | "
\n",
1423 | " \n",
1424 | " | 6 | \n",
1425 | " 75.0 | \n",
1426 | " 70.661163 | \n",
1427 | " 4.338837 | \n",
1428 | " 5.785116 | \n",
1429 | "
\n",
1430 | " \n",
1431 | " | 7 | \n",
1432 | " 93.0 | \n",
1433 | " 82.520098 | \n",
1434 | " 10.479902 | \n",
1435 | " 11.268712 | \n",
1436 | "
\n",
1437 | " \n",
1438 | " | 8 | \n",
1439 | " 101.0 | \n",
1440 | " 93.152173 | \n",
1441 | " 7.847827 | \n",
1442 | " 7.770126 | \n",
1443 | "
\n",
1444 | " \n",
1445 | " | 9 | \n",
1446 | " 94.0 | \n",
1447 | " 88.308799 | \n",
1448 | " 5.691201 | \n",
1449 | " 6.054469 | \n",
1450 | "
\n",
1451 | " \n",
1452 | " | 10 | \n",
1453 | " 78.0 | \n",
1454 | " 80.321475 | \n",
1455 | " -2.321475 | \n",
1456 | " 2.976250 | \n",
1457 | "
\n",
1458 | " \n",
1459 | " | 11 | \n",
1460 | " 79.0 | \n",
1461 | " 84.183218 | \n",
1462 | " -5.183218 | \n",
1463 | " 6.561036 | \n",
1464 | "
\n",
1465 | " \n",
1466 | " | 12 | \n",
1467 | " 106.0 | \n",
1468 | " 108.131829 | \n",
1469 | " -2.131829 | \n",
1470 | " 2.011160 | \n",
1471 | "
\n",
1472 | " \n",
1473 | " | 13 | \n",
1474 | " 121.0 | \n",
1475 | " 119.507700 | \n",
1476 | " 1.492300 | \n",
1477 | " 1.233306 | \n",
1478 | "
\n",
1479 | " \n",
1480 | " | 14 | \n",
1481 | " 111.0 | \n",
1482 | " 101.134023 | \n",
1483 | " 9.865977 | \n",
1484 | " 8.888267 | \n",
1485 | "
\n",
1486 | " \n",
1487 | "
\n",
1488 | "
"
1489 | ],
1490 | "text/plain": [
1491 | " ACTUAL AQI PREDICTED AQI DIFFERENCE PERCENTAGE ERROR\n",
1492 | "0 82.0 76.274254 5.725746 6.982617\n",
1493 | "1 81.0 80.203947 0.796053 0.982782\n",
1494 | "2 85.0 78.245953 6.754047 7.945938\n",
1495 | "3 95.0 114.283815 -19.283815 20.298753\n",
1496 | "4 118.0 106.241231 11.758769 9.965058\n",
1497 | "5 81.0 76.158900 4.841100 5.976667\n",
1498 | "6 75.0 70.661163 4.338837 5.785116\n",
1499 | "7 93.0 82.520098 10.479902 11.268712\n",
1500 | "8 101.0 93.152173 7.847827 7.770126\n",
1501 | "9 94.0 88.308799 5.691201 6.054469\n",
1502 | "10 78.0 80.321475 -2.321475 2.976250\n",
1503 | "11 79.0 84.183218 -5.183218 6.561036\n",
1504 | "12 106.0 108.131829 -2.131829 2.011160\n",
1505 | "13 121.0 119.507700 1.492300 1.233306\n",
1506 | "14 111.0 101.134023 9.865977 8.888267"
1507 | ]
1508 | },
1509 | "execution_count": 29,
1510 | "metadata": {},
1511 | "output_type": "execute_result"
1512 | }
1513 | ],
1514 | "source": [
1515 | "result = pd.DataFrame()\n",
1516 | "result[\"ACTUAL AQI\"] = y_test.flatten()\n",
1517 | "result[\"PREDICTED AQI\"] = y_prediction\n",
1518 | "result[\"DIFFERENCE\"] = result[\"ACTUAL AQI\"] - result[\"PREDICTED AQI\"]\n",
1519 | "result[\"PERCENTAGE ERROR\"] = ( abs(result[\"ACTUAL AQI\"] - result[\"PREDICTED AQI\"] ) / result[\"ACTUAL AQI\"] ) * 100\n",
1520 | "result.head(15)"
1521 | ]
1522 | },
1523 | {
1524 | "cell_type": "code",
1525 | "execution_count": null,
1526 | "metadata": {},
1527 | "outputs": [],
1528 | "source": []
1529 | }
1530 | ],
1531 | "metadata": {
1532 | "kernelspec": {
1533 | "display_name": "Python 3",
1534 | "language": "python",
1535 | "name": "python3"
1536 | },
1537 | "language_info": {
1538 | "codemirror_mode": {
1539 | "name": "ipython",
1540 | "version": 3
1541 | },
1542 | "file_extension": ".py",
1543 | "mimetype": "text/x-python",
1544 | "name": "python",
1545 | "nbconvert_exporter": "python",
1546 | "pygments_lexer": "ipython3",
1547 | "version": "3.7.3"
1548 | }
1549 | },
1550 | "nbformat": 4,
1551 | "nbformat_minor": 2
1552 | }
1553 |
--------------------------------------------------------------------------------