├── ProjectReport.pdf ├── ReportSummary.pdf ├── README.md ├── LICENSE ├── .gitignore ├── GradientBoosting.ipynb └── SVR.ipynb /ProjectReport.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavini11/SkyLens/HEAD/ProjectReport.pdf -------------------------------------------------------------------------------- /ReportSummary.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavini11/SkyLens/HEAD/ReportSummary.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Time-series-forecasting-of-Air-Quality-Prediction 2 | AQI Prediction using LSTM, MLR, SVR, Random Forest, Gradient Boosting 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Pavini Jain 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /GradientBoosting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "\n", 15 | "import warnings\n", 16 | "warnings.filterwarnings(\"ignore\")\n", 17 | "warnings.simplefilter(action='ignore', category=FutureWarning)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 3, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "df_x = pd.read_csv(\"train_x.csv\")\n", 27 | "df_y = pd.read_csv(\"train_y.csv\")\n", 28 | "df_x1 = pd.read_csv(\"test_x.csv\")\n", 29 | "df_y1 = pd.read_csv(\"test_y.csv\")" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/html": [ 40 | "
\n", 41 | "\n", 54 | "\n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | "
DatePM2.5PM10NO2NOxCOSO2O3tempmax_tempmin_temphumidvisiblewind
02016-01-0156.4095.0851.7334.310.697.4455.9620.128.415.754.06.03.0
12016-01-0253.6993.9273.0954.770.768.6734.0620.828.312.945.06.31.1
22016-01-0362.3599.3477.7755.600.969.1047.6221.429.013.745.06.30.4
32016-01-0464.96104.6277.0750.810.898.8750.6221.529.015.247.06.92.2
42016-01-0570.15105.1279.4356.050.859.4136.9420.928.414.751.06.31.1
\n", 162 | "
" 163 | ], 164 | "text/plain": [ 165 | " Date PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp \\\n", 166 | "0 2016-01-01 56.40 95.08 51.73 34.31 0.69 7.44 55.96 20.1 28.4 \n", 167 | "1 2016-01-02 53.69 93.92 73.09 54.77 0.76 8.67 34.06 20.8 28.3 \n", 168 | "2 2016-01-03 62.35 99.34 77.77 55.60 0.96 9.10 47.62 21.4 29.0 \n", 169 | "3 2016-01-04 64.96 104.62 77.07 50.81 0.89 8.87 50.62 21.5 29.0 \n", 170 | "4 2016-01-05 70.15 105.12 79.43 56.05 0.85 9.41 36.94 20.9 28.4 \n", 171 | "\n", 172 | " min_temp humid visible wind \n", 173 | "0 15.7 54.0 6.0 3.0 \n", 174 | "1 12.9 45.0 6.3 1.1 \n", 175 | "2 13.7 45.0 6.3 0.4 \n", 176 | "3 15.2 47.0 6.9 2.2 \n", 177 | "4 14.7 51.0 6.3 1.1 " 178 | ] 179 | }, 180 | "execution_count": 4, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "df_x.head(5)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 5, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/html": [ 197 | "
\n", 198 | "\n", 211 | "\n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | "
DatePM2.5PM10NO2NOxCOSO2O3tempmax_tempmin_temphumidvisiblewind
001-01-202030.7170.8027.3126.741.078.5637.3922.027.219.2756.93.3
102-01-202025.7862.7328.0530.181.167.5233.1122.226.019.4775.52.0
203-01-202029.6669.6226.2626.920.996.4032.9823.428.419.4686.93.5
304-01-202052.62106.8131.5641.891.396.9437.3523.630.519.0636.31.3
405-01-202052.6497.9429.1029.521.017.0145.1024.230.520.4636.31.3
\n", 319 | "
" 320 | ], 321 | "text/plain": [ 322 | " Date PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp \\\n", 323 | "0 01-01-2020 30.71 70.80 27.31 26.74 1.07 8.56 37.39 22.0 27.2 \n", 324 | "1 02-01-2020 25.78 62.73 28.05 30.18 1.16 7.52 33.11 22.2 26.0 \n", 325 | "2 03-01-2020 29.66 69.62 26.26 26.92 0.99 6.40 32.98 23.4 28.4 \n", 326 | "3 04-01-2020 52.62 106.81 31.56 41.89 1.39 6.94 37.35 23.6 30.5 \n", 327 | "4 05-01-2020 52.64 97.94 29.10 29.52 1.01 7.01 45.10 24.2 30.5 \n", 328 | "\n", 329 | " min_temp humid visible wind \n", 330 | "0 19.2 75 6.9 3.3 \n", 331 | "1 19.4 77 5.5 2.0 \n", 332 | "2 19.4 68 6.9 3.5 \n", 333 | "3 19.0 63 6.3 1.3 \n", 334 | "4 20.4 63 6.3 1.3 " 335 | ] 336 | }, 337 | "execution_count": 5, 338 | "metadata": {}, 339 | "output_type": "execute_result" 340 | } 341 | ], 342 | "source": [ 343 | "df_x1.head(5)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 6, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/html": [ 354 | "
\n", 355 | "\n", 368 | "\n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | "
DateAQI
02016-01-01101.0
12016-01-0292.0
22016-01-03108.0
32016-01-04109.0
42016-01-05114.0
\n", 404 | "
" 405 | ], 406 | "text/plain": [ 407 | " Date AQI\n", 408 | "0 2016-01-01 101.0\n", 409 | "1 2016-01-02 92.0\n", 410 | "2 2016-01-03 108.0\n", 411 | "3 2016-01-04 109.0\n", 412 | "4 2016-01-05 114.0" 413 | ] 414 | }, 415 | "execution_count": 6, 416 | "metadata": {}, 417 | "output_type": "execute_result" 418 | } 419 | ], 420 | "source": [ 421 | "df_y.head(5)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 7, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "data": { 431 | "text/html": [ 432 | "
\n", 433 | "\n", 446 | "\n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | "
DateAQI
02020-01-0182.0
12020-01-0281.0
22020-01-0385.0
32020-01-0495.0
42020-01-05118.0
\n", 482 | "
" 483 | ], 484 | "text/plain": [ 485 | " Date AQI\n", 486 | "0 2020-01-01 82.0\n", 487 | "1 2020-01-02 81.0\n", 488 | "2 2020-01-03 85.0\n", 489 | "3 2020-01-04 95.0\n", 490 | "4 2020-01-05 118.0" 491 | ] 492 | }, 493 | "execution_count": 7, 494 | "metadata": {}, 495 | "output_type": "execute_result" 496 | } 497 | ], 498 | "source": [ 499 | "df_y1.head(5)" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 8, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "name": "stdout", 509 | "output_type": "stream", 510 | "text": [ 511 | "\n", 512 | "RangeIndex: 1441 entries, 0 to 1440\n", 513 | "Data columns (total 14 columns):\n", 514 | "Date 1441 non-null object\n", 515 | "PM2.5 1441 non-null float64\n", 516 | "PM10 1441 non-null float64\n", 517 | "NO2 1441 non-null float64\n", 518 | "NOx 1441 non-null float64\n", 519 | "CO 1441 non-null float64\n", 520 | "SO2 1441 non-null float64\n", 521 | "O3 1441 non-null float64\n", 522 | "temp 1441 non-null float64\n", 523 | "max_temp 1441 non-null float64\n", 524 | "min_temp 1441 non-null float64\n", 525 | "humid 1441 non-null float64\n", 526 | "visible 1441 non-null float64\n", 527 | "wind 1441 non-null float64\n", 528 | "dtypes: float64(13), object(1)\n", 529 | "memory usage: 157.7+ KB\n" 530 | ] 531 | } 532 | ], 533 | "source": [ 534 | "df_x.info()" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 9, 540 | "metadata": {}, 541 | "outputs": [ 542 | { 543 | "name": "stdout", 544 | "output_type": "stream", 545 | "text": [ 546 | "\n", 547 | "RangeIndex: 1441 entries, 0 to 1440\n", 548 | "Data columns (total 2 columns):\n", 549 | "Date 1441 non-null object\n", 550 | "AQI 1441 non-null float64\n", 551 | "dtypes: float64(1), object(1)\n", 552 | "memory usage: 22.6+ KB\n" 553 | ] 554 | } 555 | ], 556 | "source": [ 557 | "df_y.info()" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 10, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "name": "stdout", 567 | "output_type": "stream", 568 | "text": [ 569 | "\n", 570 | "RangeIndex: 182 entries, 0 to 181\n", 571 | "Data columns (total 14 columns):\n", 572 | "Date 182 non-null object\n", 573 | "PM2.5 182 non-null float64\n", 574 | "PM10 182 non-null float64\n", 575 | "NO2 182 non-null float64\n", 576 | "NOx 182 non-null float64\n", 577 | "CO 182 non-null float64\n", 578 | "SO2 182 non-null float64\n", 579 | "O3 182 non-null float64\n", 580 | "temp 182 non-null float64\n", 581 | "max_temp 182 non-null float64\n", 582 | "min_temp 182 non-null float64\n", 583 | "humid 182 non-null int64\n", 584 | "visible 182 non-null float64\n", 585 | "wind 182 non-null float64\n", 586 | "dtypes: float64(12), int64(1), object(1)\n", 587 | "memory usage: 20.0+ KB\n" 588 | ] 589 | } 590 | ], 591 | "source": [ 592 | "df_x1.info()" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": 11, 598 | "metadata": {}, 599 | "outputs": [ 600 | { 601 | "name": "stdout", 602 | "output_type": "stream", 603 | "text": [ 604 | "\n", 605 | "RangeIndex: 182 entries, 0 to 181\n", 606 | "Data columns (total 2 columns):\n", 607 | "Date 182 non-null object\n", 608 | "AQI 182 non-null float64\n", 609 | "dtypes: float64(1), object(1)\n", 610 | "memory usage: 2.9+ KB\n" 611 | ] 612 | } 613 | ], 614 | "source": [ 615 | "df_y1.info()" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 12, 621 | "metadata": {}, 622 | "outputs": [], 623 | "source": [ 624 | "df_x[\"Date\"] = pd.to_datetime(df_x[\"Date\"])\n", 625 | "df_x1[\"Date\"] = pd.to_datetime(df_x[\"Date\"])" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 13, 631 | "metadata": {}, 632 | "outputs": [ 633 | { 634 | "name": "stdout", 635 | "output_type": "stream", 636 | "text": [ 637 | "\n", 638 | "RangeIndex: 1441 entries, 0 to 1440\n", 639 | "Data columns (total 14 columns):\n", 640 | "Date 1441 non-null datetime64[ns]\n", 641 | "PM2.5 1441 non-null float64\n", 642 | "PM10 1441 non-null float64\n", 643 | "NO2 1441 non-null float64\n", 644 | "NOx 1441 non-null float64\n", 645 | "CO 1441 non-null float64\n", 646 | "SO2 1441 non-null float64\n", 647 | "O3 1441 non-null float64\n", 648 | "temp 1441 non-null float64\n", 649 | "max_temp 1441 non-null float64\n", 650 | "min_temp 1441 non-null float64\n", 651 | "humid 1441 non-null float64\n", 652 | "visible 1441 non-null float64\n", 653 | "wind 1441 non-null float64\n", 654 | "dtypes: datetime64[ns](1), float64(13)\n", 655 | "memory usage: 157.7 KB\n" 656 | ] 657 | } 658 | ], 659 | "source": [ 660 | "df_x.info()" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "execution_count": 14, 666 | "metadata": {}, 667 | "outputs": [ 668 | { 669 | "name": "stdout", 670 | "output_type": "stream", 671 | "text": [ 672 | "\n", 673 | "RangeIndex: 182 entries, 0 to 181\n", 674 | "Data columns (total 14 columns):\n", 675 | "Date 182 non-null datetime64[ns]\n", 676 | "PM2.5 182 non-null float64\n", 677 | "PM10 182 non-null float64\n", 678 | "NO2 182 non-null float64\n", 679 | "NOx 182 non-null float64\n", 680 | "CO 182 non-null float64\n", 681 | "SO2 182 non-null float64\n", 682 | "O3 182 non-null float64\n", 683 | "temp 182 non-null float64\n", 684 | "max_temp 182 non-null float64\n", 685 | "min_temp 182 non-null float64\n", 686 | "humid 182 non-null int64\n", 687 | "visible 182 non-null float64\n", 688 | "wind 182 non-null float64\n", 689 | "dtypes: datetime64[ns](1), float64(12), int64(1)\n", 690 | "memory usage: 20.0 KB\n" 691 | ] 692 | } 693 | ], 694 | "source": [ 695 | "df_x1.info()" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 15, 701 | "metadata": {}, 702 | "outputs": [ 703 | { 704 | "data": { 705 | "text/plain": [ 706 | "" 707 | ] 708 | }, 709 | "execution_count": 15, 710 | "metadata": {}, 711 | "output_type": "execute_result" 712 | }, 713 | { 714 | "data": { 715 | "image/png": "\n", 716 | "text/plain": [ 717 | "
" 718 | ] 719 | }, 720 | "metadata": { 721 | "needs_background": "light" 722 | }, 723 | "output_type": "display_data" 724 | } 725 | ], 726 | "source": [ 727 | "sns.heatmap(df_x.isnull(),cbar=False, yticklabels=False)" 728 | ] 729 | }, 730 | { 731 | "cell_type": "code", 732 | "execution_count": 16, 733 | "metadata": {}, 734 | "outputs": [ 735 | { 736 | "data": { 737 | "text/plain": [ 738 | "" 739 | ] 740 | }, 741 | "execution_count": 16, 742 | "metadata": {}, 743 | "output_type": "execute_result" 744 | }, 745 | { 746 | "data": { 747 | "image/png": "\n", 748 | "text/plain": [ 749 | "
" 750 | ] 751 | }, 752 | "metadata": { 753 | "needs_background": "light" 754 | }, 755 | "output_type": "display_data" 756 | } 757 | ], 758 | "source": [ 759 | "sns.heatmap(df_x1.isnull(),cbar=False, yticklabels=False)" 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": 17, 765 | "metadata": {}, 766 | "outputs": [], 767 | "source": [ 768 | "x_train = df_x.drop(['Date'], axis = 1)\n", 769 | "y_train = df_y[\"AQI\"]\n", 770 | "x_test = df_x1.drop(['Date'], axis = 1)\n", 771 | "y_test = df_y1[\"AQI\"]" 772 | ] 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": 18, 777 | "metadata": {}, 778 | "outputs": [ 779 | { 780 | "data": { 781 | "text/html": [ 782 | "
\n", 783 | "\n", 796 | "\n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | "
PM2.5PM10NO2NOxCOSO2O3tempmax_tempmin_temphumidvisiblewind
056.4095.0851.7334.310.697.4455.9620.128.415.754.06.03.0
153.6993.9273.0954.770.768.6734.0620.828.312.945.06.31.1
262.3599.3477.7755.600.969.1047.6221.429.013.745.06.30.4
364.96104.6277.0750.810.898.8750.6221.529.015.247.06.92.2
470.15105.1279.4356.050.859.4136.9420.928.414.751.06.31.1
\n", 898 | "
" 899 | ], 900 | "text/plain": [ 901 | " PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp min_temp \\\n", 902 | "0 56.40 95.08 51.73 34.31 0.69 7.44 55.96 20.1 28.4 15.7 \n", 903 | "1 53.69 93.92 73.09 54.77 0.76 8.67 34.06 20.8 28.3 12.9 \n", 904 | "2 62.35 99.34 77.77 55.60 0.96 9.10 47.62 21.4 29.0 13.7 \n", 905 | "3 64.96 104.62 77.07 50.81 0.89 8.87 50.62 21.5 29.0 15.2 \n", 906 | "4 70.15 105.12 79.43 56.05 0.85 9.41 36.94 20.9 28.4 14.7 \n", 907 | "\n", 908 | " humid visible wind \n", 909 | "0 54.0 6.0 3.0 \n", 910 | "1 45.0 6.3 1.1 \n", 911 | "2 45.0 6.3 0.4 \n", 912 | "3 47.0 6.9 2.2 \n", 913 | "4 51.0 6.3 1.1 " 914 | ] 915 | }, 916 | "execution_count": 18, 917 | "metadata": {}, 918 | "output_type": "execute_result" 919 | } 920 | ], 921 | "source": [ 922 | "x_train.head(5)" 923 | ] 924 | }, 925 | { 926 | "cell_type": "code", 927 | "execution_count": 19, 928 | "metadata": {}, 929 | "outputs": [ 930 | { 931 | "data": { 932 | "text/html": [ 933 | "
\n", 934 | "\n", 947 | "\n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | "
PM2.5PM10NO2NOxCOSO2O3tempmax_tempmin_temphumidvisiblewind
030.7170.8027.3126.741.078.5637.3922.027.219.2756.93.3
125.7862.7328.0530.181.167.5233.1122.226.019.4775.52.0
229.6669.6226.2626.920.996.4032.9823.428.419.4686.93.5
352.62106.8131.5641.891.396.9437.3523.630.519.0636.31.3
452.6497.9429.1029.521.017.0145.1024.230.520.4636.31.3
\n", 1049 | "
" 1050 | ], 1051 | "text/plain": [ 1052 | " PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp min_temp \\\n", 1053 | "0 30.71 70.80 27.31 26.74 1.07 8.56 37.39 22.0 27.2 19.2 \n", 1054 | "1 25.78 62.73 28.05 30.18 1.16 7.52 33.11 22.2 26.0 19.4 \n", 1055 | "2 29.66 69.62 26.26 26.92 0.99 6.40 32.98 23.4 28.4 19.4 \n", 1056 | "3 52.62 106.81 31.56 41.89 1.39 6.94 37.35 23.6 30.5 19.0 \n", 1057 | "4 52.64 97.94 29.10 29.52 1.01 7.01 45.10 24.2 30.5 20.4 \n", 1058 | "\n", 1059 | " humid visible wind \n", 1060 | "0 75 6.9 3.3 \n", 1061 | "1 77 5.5 2.0 \n", 1062 | "2 68 6.9 3.5 \n", 1063 | "3 63 6.3 1.3 \n", 1064 | "4 63 6.3 1.3 " 1065 | ] 1066 | }, 1067 | "execution_count": 19, 1068 | "metadata": {}, 1069 | "output_type": "execute_result" 1070 | } 1071 | ], 1072 | "source": [ 1073 | "x_test.head(5)" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "code", 1078 | "execution_count": 20, 1079 | "metadata": {}, 1080 | "outputs": [ 1081 | { 1082 | "data": { 1083 | "text/plain": [ 1084 | "0 101.0\n", 1085 | "1 92.0\n", 1086 | "2 108.0\n", 1087 | "3 109.0\n", 1088 | "4 114.0\n", 1089 | "Name: AQI, dtype: float64" 1090 | ] 1091 | }, 1092 | "execution_count": 20, 1093 | "metadata": {}, 1094 | "output_type": "execute_result" 1095 | } 1096 | ], 1097 | "source": [ 1098 | "y_train.head(5)" 1099 | ] 1100 | }, 1101 | { 1102 | "cell_type": "code", 1103 | "execution_count": 21, 1104 | "metadata": {}, 1105 | "outputs": [ 1106 | { 1107 | "data": { 1108 | "text/plain": [ 1109 | "0 82.0\n", 1110 | "1 81.0\n", 1111 | "2 85.0\n", 1112 | "3 95.0\n", 1113 | "4 118.0\n", 1114 | "Name: AQI, dtype: float64" 1115 | ] 1116 | }, 1117 | "execution_count": 21, 1118 | "metadata": {}, 1119 | "output_type": "execute_result" 1120 | } 1121 | ], 1122 | "source": [ 1123 | "y_test.head(5)" 1124 | ] 1125 | }, 1126 | { 1127 | "cell_type": "code", 1128 | "execution_count": 22, 1129 | "metadata": {}, 1130 | "outputs": [], 1131 | "source": [ 1132 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", 1133 | "sc_x = StandardScaler()\n", 1134 | "sc_y = StandardScaler()\n", 1135 | "x_train = sc_x.fit_transform(x_train)\n", 1136 | "x_test = sc_x.transform(x_test)\n", 1137 | "y_train = sc_y.fit_transform(np.array(y_train).reshape(-1,1))\n", 1138 | "y_test = sc_y.transform(np.array(y_test).reshape(-1,1))" 1139 | ] 1140 | }, 1141 | { 1142 | "cell_type": "code", 1143 | "execution_count": 23, 1144 | "metadata": {}, 1145 | "outputs": [ 1146 | { 1147 | "data": { 1148 | "text/plain": [ 1149 | "array([[-0.27610501],\n", 1150 | " [-0.30227705],\n", 1151 | " [-0.1975889 ],\n", 1152 | " [ 0.06413148],\n", 1153 | " [ 0.66608835]])" 1154 | ] 1155 | }, 1156 | "execution_count": 23, 1157 | "metadata": {}, 1158 | "output_type": "execute_result" 1159 | } 1160 | ], 1161 | "source": [ 1162 | "y_test[:5]" 1163 | ] 1164 | }, 1165 | { 1166 | "cell_type": "code", 1167 | "execution_count": 24, 1168 | "metadata": {}, 1169 | "outputs": [ 1170 | { 1171 | "data": { 1172 | "text/plain": [ 1173 | "GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,\n", 1174 | " learning_rate=0.1, loss='ls', max_depth=3,\n", 1175 | " max_features=None, max_leaf_nodes=None,\n", 1176 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 1177 | " min_samples_leaf=1, min_samples_split=2,\n", 1178 | " min_weight_fraction_leaf=0.0, n_estimators=100,\n", 1179 | " n_iter_no_change=None, presort='auto',\n", 1180 | " random_state=None, subsample=1.0, tol=0.0001,\n", 1181 | " validation_fraction=0.1, verbose=0, warm_start=False)" 1182 | ] 1183 | }, 1184 | "execution_count": 24, 1185 | "metadata": {}, 1186 | "output_type": "execute_result" 1187 | } 1188 | ], 1189 | "source": [ 1190 | "from sklearn.ensemble import GradientBoostingRegressor\n", 1191 | "regressor = GradientBoostingRegressor()\n", 1192 | "regressor.fit(x_train,y_train)" 1193 | ] 1194 | }, 1195 | { 1196 | "cell_type": "code", 1197 | "execution_count": 25, 1198 | "metadata": {}, 1199 | "outputs": [ 1200 | { 1201 | "data": { 1202 | "text/plain": [ 1203 | "array([-0.30664803, 0.02744575, -0.45522541, 0.56821673, 0.39152823,\n", 1204 | " -0.33634286, -0.45985512, -0.13761617, 0.21430141, 0.06167805,\n", 1205 | " -0.43960067, -0.05112361, 0.40469433, 0.91172871, 0.24935004,\n", 1206 | " 0.34019051, 0.25525236, -0.59217263, -0.56262186, -0.01497731,\n", 1207 | " 0.0695675 , -0.02144429, 0.04325328, 0.15946079, 0.18446 ,\n", 1208 | " 0.25582022, 0.30949316, 0.40278977, 0.4255181 , 0.2840163 ,\n", 1209 | " 0.42057773, 0.45941366, -0.24938813, -0.25418846, -0.42233104,\n", 1210 | " 0.11668701, 0.33066576, 0.29736599, 0.32064904, 0.40746019,\n", 1211 | " -0.16741748, -0.11139299, -0.30030267, 0.00940919, 0.52902124,\n", 1212 | " 0.71358506, 0.4200336 , 0.26898954, 0.4043111 , 0.51775921,\n", 1213 | " 0.24915816, -0.20381793, -0.32190889, 0.13897562, -0.13482573,\n", 1214 | " 0.45736285, 0.2023384 , 0.32740856, 0.14504189, 0.24373536,\n", 1215 | " 0.4673738 , 0.65162784, 1.76219074, 0.63494524, 0.62830402,\n", 1216 | " 0.30860789, 0.10367325, -0.05551396, 0.20358276, 0.30723921,\n", 1217 | " 0.29236797, 0.1460285 , -0.06520416, -0.35593281, 0.07230334,\n", 1218 | " -0.43199656, -0.50446128, -0.31805938, 0.44619972, 0.50666258,\n", 1219 | " 0.37658415, -0.03478484, 0.09924211, 0.2197565 , -0.86057172,\n", 1220 | " -0.94029625, -0.77387695, -0.65316783, -0.22228412, 0.1078362 ,\n", 1221 | " -0.12004117, -0.17820506, -0.07598379, -0.11422471, -0.05717854,\n", 1222 | " -0.63267181, -0.18355135, -0.35640078, -0.47804469, 0.18869273,\n", 1223 | " -0.64542504, -0.57419083, -0.55506317, -0.49390715, -0.42547427,\n", 1224 | " -0.48798456, -0.5072359 , -0.29127774, -0.49157359, -0.85665164,\n", 1225 | " -0.93764569, -0.76543852, -0.69058052, -0.60870061, -0.25151876,\n", 1226 | " -0.89037835, -0.64424282, -0.74117626, -0.55070149, -0.79329866,\n", 1227 | " -0.66968387, -0.56791011, -0.56984256, -0.48223239, -0.30810351,\n", 1228 | " -0.14926282, -0.44770745, -0.47163955, -0.72383692, -0.79947907,\n", 1229 | " -0.54709498, -0.4843061 , -0.86389296, -0.90695394, -0.61251399,\n", 1230 | " -0.81859382, -0.39373544, -0.30160905, -0.25753693, -0.82518774,\n", 1231 | " -0.91156371, -0.83574485, -0.63531245, -0.46670179, -0.58265605,\n", 1232 | " -0.63542767, -0.51736277, -0.46767405, -0.77992992, -0.80571348,\n", 1233 | " -0.8893096 , -0.7368831 , -1.12757017, -1.29664219, -1.33817791,\n", 1234 | " -1.12177791, -0.85475676, -0.83829278, -1.06097844, -0.87068831,\n", 1235 | " -0.83739172, -0.78359487, -1.05402323, -1.15873018, -0.97677208,\n", 1236 | " -1.1738632 , -1.08756149, -0.87845 , -1.05953758, -0.89726162,\n", 1237 | " -0.92808345, -1.07917673, -1.09615972, -1.04304939, -0.91874159,\n", 1238 | " -0.85788747, -0.96075367, -1.11091323, -1.15244895, -1.11325572,\n", 1239 | " -1.02376198, -1.1516053 ])" 1240 | ] 1241 | }, 1242 | "execution_count": 25, 1243 | "metadata": {}, 1244 | "output_type": "execute_result" 1245 | } 1246 | ], 1247 | "source": [ 1248 | "y_prediction = regressor.predict(x_test)\n", 1249 | "y_prediction " 1250 | ] 1251 | }, 1252 | { 1253 | "cell_type": "code", 1254 | "execution_count": 26, 1255 | "metadata": {}, 1256 | "outputs": [ 1257 | { 1258 | "name": "stdout", 1259 | "output_type": "stream", 1260 | "text": [ 1261 | "R2 SCORE is 0.770856636180251\n", 1262 | "mean_sqrd_error is 0.058365602156151296\n", 1263 | "Root mean squared error of is 0.24158973934368838\n", 1264 | "Mean Absolute error is 0.1872874239791087\n" 1265 | ] 1266 | } 1267 | ], 1268 | "source": [ 1269 | "from sklearn.metrics import r2_score\n", 1270 | "from sklearn.metrics import mean_squared_error\n", 1271 | "from sklearn.metrics import mean_absolute_error\n", 1272 | "score = r2_score(y_test,y_prediction)\n", 1273 | "mean_error = mean_squared_error(y_test,y_prediction)\n", 1274 | "mae = mean_absolute_error(y_test,y_prediction)\n", 1275 | "print(\"R2 SCORE is\", score)\n", 1276 | "print(\"mean_sqrd_error is \", mean_error)\n", 1277 | "print(\"Root mean squared error of is\",np.sqrt(mean_error))\n", 1278 | "print(\"Mean Absolute error is\", mae)" 1279 | ] 1280 | }, 1281 | { 1282 | "cell_type": "code", 1283 | "execution_count": 27, 1284 | "metadata": {}, 1285 | "outputs": [ 1286 | { 1287 | "data": { 1288 | "text/plain": [ 1289 | "array([[ 82.],\n", 1290 | " [ 81.],\n", 1291 | " [ 85.],\n", 1292 | " [ 95.],\n", 1293 | " [118.]])" 1294 | ] 1295 | }, 1296 | "execution_count": 27, 1297 | "metadata": {}, 1298 | "output_type": "execute_result" 1299 | } 1300 | ], 1301 | "source": [ 1302 | "y_test = sc_y.inverse_transform(y_test)\n", 1303 | "y_test[:5]" 1304 | ] 1305 | }, 1306 | { 1307 | "cell_type": "code", 1308 | "execution_count": 28, 1309 | "metadata": {}, 1310 | "outputs": [ 1311 | { 1312 | "data": { 1313 | "text/plain": [ 1314 | "array([ 80.83299016, 93.59828519, 75.15603902, 114.26045103,\n", 1315 | " 107.50940979])" 1316 | ] 1317 | }, 1318 | "execution_count": 28, 1319 | "metadata": {}, 1320 | "output_type": "execute_result" 1321 | } 1322 | ], 1323 | "source": [ 1324 | "y_prediction = sc_y.inverse_transform(y_prediction)\n", 1325 | "y_prediction[:5]" 1326 | ] 1327 | }, 1328 | { 1329 | "cell_type": "code", 1330 | "execution_count": 29, 1331 | "metadata": {}, 1332 | "outputs": [ 1333 | { 1334 | "data": { 1335 | "image/png": "\n", 1336 | "text/plain": [ 1337 | "
" 1338 | ] 1339 | }, 1340 | "metadata": { 1341 | "needs_background": "light" 1342 | }, 1343 | "output_type": "display_data" 1344 | } 1345 | ], 1346 | "source": [ 1347 | "df1 = pd.DataFrame({'Actual': y_test[100:120].flatten(), 'Predicted': y_prediction[100:120].flatten()})\n", 1348 | "df1.plot(kind='line')\n", 1349 | "plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')\n", 1350 | "plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')\n", 1351 | "plt.show()" 1352 | ] 1353 | }, 1354 | { 1355 | "cell_type": "code", 1356 | "execution_count": 30, 1357 | "metadata": {}, 1358 | "outputs": [ 1359 | { 1360 | "data": { 1361 | "text/html": [ 1362 | "
\n", 1363 | "\n", 1376 | "\n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | "
ACTUAL AQIPREDICTED AQIDIFFERNCEPERCENTAGE ERROR
082.080.8329901.1670101.423183
181.093.598285-12.59828515.553439
285.075.1560399.84396111.581131
395.0114.260451-19.26045120.274159
4118.0107.50941010.4905908.890331
581.079.6983891.3016111.606927
675.074.9791440.0208560.027808
793.087.2914815.7085196.138193
8101.0100.7378000.2622000.259604
994.094.906258-0.9062580.964104
1078.075.7530402.2469602.880717
1179.090.596251-11.59625114.678798
12106.0108.012470-2.0124701.898556
13121.0127.385603-6.3856035.277358
14111.0102.0769648.9230368.038772
\n", 1494 | "
" 1495 | ], 1496 | "text/plain": [ 1497 | " ACTUAL AQI PREDICTED AQI DIFFERNCE PERCENTAGE ERROR\n", 1498 | "0 82.0 80.832990 1.167010 1.423183\n", 1499 | "1 81.0 93.598285 -12.598285 15.553439\n", 1500 | "2 85.0 75.156039 9.843961 11.581131\n", 1501 | "3 95.0 114.260451 -19.260451 20.274159\n", 1502 | "4 118.0 107.509410 10.490590 8.890331\n", 1503 | "5 81.0 79.698389 1.301611 1.606927\n", 1504 | "6 75.0 74.979144 0.020856 0.027808\n", 1505 | "7 93.0 87.291481 5.708519 6.138193\n", 1506 | "8 101.0 100.737800 0.262200 0.259604\n", 1507 | "9 94.0 94.906258 -0.906258 0.964104\n", 1508 | "10 78.0 75.753040 2.246960 2.880717\n", 1509 | "11 79.0 90.596251 -11.596251 14.678798\n", 1510 | "12 106.0 108.012470 -2.012470 1.898556\n", 1511 | "13 121.0 127.385603 -6.385603 5.277358\n", 1512 | "14 111.0 102.076964 8.923036 8.038772" 1513 | ] 1514 | }, 1515 | "execution_count": 30, 1516 | "metadata": {}, 1517 | "output_type": "execute_result" 1518 | } 1519 | ], 1520 | "source": [ 1521 | "result = pd.DataFrame()\n", 1522 | "result[\"ACTUAL AQI\"] = y_test.flatten()\n", 1523 | "result[\"PREDICTED AQI\"] = y_prediction\n", 1524 | "result[\"DIFFERNCE\"] = result[\"ACTUAL AQI\"] - result[\"PREDICTED AQI\"]\n", 1525 | "result[\"PERCENTAGE ERROR\"] = ( abs(result[\"ACTUAL AQI\"] - result[\"PREDICTED AQI\"] ) / result[\"ACTUAL AQI\"] ) * 100\n", 1526 | "result.head(15)" 1527 | ] 1528 | }, 1529 | { 1530 | "cell_type": "code", 1531 | "execution_count": null, 1532 | "metadata": {}, 1533 | "outputs": [], 1534 | "source": [] 1535 | } 1536 | ], 1537 | "metadata": { 1538 | "kernelspec": { 1539 | "display_name": "Python 3", 1540 | "language": "python", 1541 | "name": "python3" 1542 | }, 1543 | "language_info": { 1544 | "codemirror_mode": { 1545 | "name": "ipython", 1546 | "version": 3 1547 | }, 1548 | "file_extension": ".py", 1549 | "mimetype": "text/x-python", 1550 | "name": "python", 1551 | "nbconvert_exporter": "python", 1552 | "pygments_lexer": "ipython3", 1553 | "version": "3.7.3" 1554 | } 1555 | }, 1556 | "nbformat": 4, 1557 | "nbformat_minor": 2 1558 | } 1559 | -------------------------------------------------------------------------------- /SVR.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "\n", 15 | "import warnings\n", 16 | "warnings.filterwarnings(\"ignore\")\n", 17 | "warnings.simplefilter(action='ignore', category=FutureWarning)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "df_x = pd.read_csv(\"train_x.csv\")\n", 27 | "df_y = pd.read_csv(\"train_y.csv\")\n", 28 | "df_x1 = pd.read_csv(\"test_x.csv\")\n", 29 | "df_y1 = pd.read_csv(\"test_y.csv\")" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/html": [ 40 | "
\n", 41 | "\n", 54 | "\n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | "
DatePM2.5PM10NO2NOxCOSO2O3tempmax_tempmin_temphumidvisiblewind
02016-01-0156.4095.0851.7334.310.697.4455.9620.128.415.754.06.03.0
12016-01-0253.6993.9273.0954.770.768.6734.0620.828.312.945.06.31.1
22016-01-0362.3599.3477.7755.600.969.1047.6221.429.013.745.06.30.4
32016-01-0464.96104.6277.0750.810.898.8750.6221.529.015.247.06.92.2
42016-01-0570.15105.1279.4356.050.859.4136.9420.928.414.751.06.31.1
\n", 162 | "
" 163 | ], 164 | "text/plain": [ 165 | " Date PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp \\\n", 166 | "0 2016-01-01 56.40 95.08 51.73 34.31 0.69 7.44 55.96 20.1 28.4 \n", 167 | "1 2016-01-02 53.69 93.92 73.09 54.77 0.76 8.67 34.06 20.8 28.3 \n", 168 | "2 2016-01-03 62.35 99.34 77.77 55.60 0.96 9.10 47.62 21.4 29.0 \n", 169 | "3 2016-01-04 64.96 104.62 77.07 50.81 0.89 8.87 50.62 21.5 29.0 \n", 170 | "4 2016-01-05 70.15 105.12 79.43 56.05 0.85 9.41 36.94 20.9 28.4 \n", 171 | "\n", 172 | " min_temp humid visible wind \n", 173 | "0 15.7 54.0 6.0 3.0 \n", 174 | "1 12.9 45.0 6.3 1.1 \n", 175 | "2 13.7 45.0 6.3 0.4 \n", 176 | "3 15.2 47.0 6.9 2.2 \n", 177 | "4 14.7 51.0 6.3 1.1 " 178 | ] 179 | }, 180 | "execution_count": 3, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "df_x.head(5)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 4, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/html": [ 197 | "
\n", 198 | "\n", 211 | "\n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | "
DatePM2.5PM10NO2NOxCOSO2O3tempmax_tempmin_temphumidvisiblewind
001-01-202030.7170.8027.3126.741.078.5637.3922.027.219.2756.93.3
102-01-202025.7862.7328.0530.181.167.5233.1122.226.019.4775.52.0
203-01-202029.6669.6226.2626.920.996.4032.9823.428.419.4686.93.5
304-01-202052.62106.8131.5641.891.396.9437.3523.630.519.0636.31.3
405-01-202052.6497.9429.1029.521.017.0145.1024.230.520.4636.31.3
\n", 319 | "
" 320 | ], 321 | "text/plain": [ 322 | " Date PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp \\\n", 323 | "0 01-01-2020 30.71 70.80 27.31 26.74 1.07 8.56 37.39 22.0 27.2 \n", 324 | "1 02-01-2020 25.78 62.73 28.05 30.18 1.16 7.52 33.11 22.2 26.0 \n", 325 | "2 03-01-2020 29.66 69.62 26.26 26.92 0.99 6.40 32.98 23.4 28.4 \n", 326 | "3 04-01-2020 52.62 106.81 31.56 41.89 1.39 6.94 37.35 23.6 30.5 \n", 327 | "4 05-01-2020 52.64 97.94 29.10 29.52 1.01 7.01 45.10 24.2 30.5 \n", 328 | "\n", 329 | " min_temp humid visible wind \n", 330 | "0 19.2 75 6.9 3.3 \n", 331 | "1 19.4 77 5.5 2.0 \n", 332 | "2 19.4 68 6.9 3.5 \n", 333 | "3 19.0 63 6.3 1.3 \n", 334 | "4 20.4 63 6.3 1.3 " 335 | ] 336 | }, 337 | "execution_count": 4, 338 | "metadata": {}, 339 | "output_type": "execute_result" 340 | } 341 | ], 342 | "source": [ 343 | "df_x1.head(5)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 5, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/html": [ 354 | "
\n", 355 | "\n", 368 | "\n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | "
DateAQI
02016-01-01101.0
12016-01-0292.0
22016-01-03108.0
32016-01-04109.0
42016-01-05114.0
\n", 404 | "
" 405 | ], 406 | "text/plain": [ 407 | " Date AQI\n", 408 | "0 2016-01-01 101.0\n", 409 | "1 2016-01-02 92.0\n", 410 | "2 2016-01-03 108.0\n", 411 | "3 2016-01-04 109.0\n", 412 | "4 2016-01-05 114.0" 413 | ] 414 | }, 415 | "execution_count": 5, 416 | "metadata": {}, 417 | "output_type": "execute_result" 418 | } 419 | ], 420 | "source": [ 421 | "df_y.head(5)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 6, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "data": { 431 | "text/html": [ 432 | "
\n", 433 | "\n", 446 | "\n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | "
DateAQI
02020-01-0182.0
12020-01-0281.0
22020-01-0385.0
32020-01-0495.0
42020-01-05118.0
\n", 482 | "
" 483 | ], 484 | "text/plain": [ 485 | " Date AQI\n", 486 | "0 2020-01-01 82.0\n", 487 | "1 2020-01-02 81.0\n", 488 | "2 2020-01-03 85.0\n", 489 | "3 2020-01-04 95.0\n", 490 | "4 2020-01-05 118.0" 491 | ] 492 | }, 493 | "execution_count": 6, 494 | "metadata": {}, 495 | "output_type": "execute_result" 496 | } 497 | ], 498 | "source": [ 499 | "df_y1.head(5)" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 7, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "name": "stdout", 509 | "output_type": "stream", 510 | "text": [ 511 | "\n", 512 | "RangeIndex: 1441 entries, 0 to 1440\n", 513 | "Data columns (total 14 columns):\n", 514 | "Date 1441 non-null object\n", 515 | "PM2.5 1441 non-null float64\n", 516 | "PM10 1441 non-null float64\n", 517 | "NO2 1441 non-null float64\n", 518 | "NOx 1441 non-null float64\n", 519 | "CO 1441 non-null float64\n", 520 | "SO2 1441 non-null float64\n", 521 | "O3 1441 non-null float64\n", 522 | "temp 1441 non-null float64\n", 523 | "max_temp 1441 non-null float64\n", 524 | "min_temp 1441 non-null float64\n", 525 | "humid 1441 non-null float64\n", 526 | "visible 1441 non-null float64\n", 527 | "wind 1441 non-null float64\n", 528 | "dtypes: float64(13), object(1)\n", 529 | "memory usage: 157.7+ KB\n" 530 | ] 531 | } 532 | ], 533 | "source": [ 534 | "df_x.info()" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 8, 540 | "metadata": {}, 541 | "outputs": [ 542 | { 543 | "name": "stdout", 544 | "output_type": "stream", 545 | "text": [ 546 | "\n", 547 | "RangeIndex: 1441 entries, 0 to 1440\n", 548 | "Data columns (total 2 columns):\n", 549 | "Date 1441 non-null object\n", 550 | "AQI 1441 non-null float64\n", 551 | "dtypes: float64(1), object(1)\n", 552 | "memory usage: 22.6+ KB\n" 553 | ] 554 | } 555 | ], 556 | "source": [ 557 | "df_y.info()" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 9, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "name": "stdout", 567 | "output_type": "stream", 568 | "text": [ 569 | "\n", 570 | "RangeIndex: 182 entries, 0 to 181\n", 571 | "Data columns (total 14 columns):\n", 572 | "Date 182 non-null object\n", 573 | "PM2.5 182 non-null float64\n", 574 | "PM10 182 non-null float64\n", 575 | "NO2 182 non-null float64\n", 576 | "NOx 182 non-null float64\n", 577 | "CO 182 non-null float64\n", 578 | "SO2 182 non-null float64\n", 579 | "O3 182 non-null float64\n", 580 | "temp 182 non-null float64\n", 581 | "max_temp 182 non-null float64\n", 582 | "min_temp 182 non-null float64\n", 583 | "humid 182 non-null int64\n", 584 | "visible 182 non-null float64\n", 585 | "wind 182 non-null float64\n", 586 | "dtypes: float64(12), int64(1), object(1)\n", 587 | "memory usage: 20.0+ KB\n" 588 | ] 589 | } 590 | ], 591 | "source": [ 592 | "df_x1.info()" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": 10, 598 | "metadata": {}, 599 | "outputs": [ 600 | { 601 | "name": "stdout", 602 | "output_type": "stream", 603 | "text": [ 604 | "\n", 605 | "RangeIndex: 182 entries, 0 to 181\n", 606 | "Data columns (total 2 columns):\n", 607 | "Date 182 non-null object\n", 608 | "AQI 182 non-null float64\n", 609 | "dtypes: float64(1), object(1)\n", 610 | "memory usage: 2.9+ KB\n" 611 | ] 612 | } 613 | ], 614 | "source": [ 615 | "df_y1.info()" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 11, 621 | "metadata": {}, 622 | "outputs": [], 623 | "source": [ 624 | "df_x[\"Date\"] = pd.to_datetime(df_x[\"Date\"])\n", 625 | "df_x1[\"Date\"] = pd.to_datetime(df_x[\"Date\"])" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 12, 631 | "metadata": {}, 632 | "outputs": [ 633 | { 634 | "name": "stdout", 635 | "output_type": "stream", 636 | "text": [ 637 | "\n", 638 | "RangeIndex: 1441 entries, 0 to 1440\n", 639 | "Data columns (total 14 columns):\n", 640 | "Date 1441 non-null datetime64[ns]\n", 641 | "PM2.5 1441 non-null float64\n", 642 | "PM10 1441 non-null float64\n", 643 | "NO2 1441 non-null float64\n", 644 | "NOx 1441 non-null float64\n", 645 | "CO 1441 non-null float64\n", 646 | "SO2 1441 non-null float64\n", 647 | "O3 1441 non-null float64\n", 648 | "temp 1441 non-null float64\n", 649 | "max_temp 1441 non-null float64\n", 650 | "min_temp 1441 non-null float64\n", 651 | "humid 1441 non-null float64\n", 652 | "visible 1441 non-null float64\n", 653 | "wind 1441 non-null float64\n", 654 | "dtypes: datetime64[ns](1), float64(13)\n", 655 | "memory usage: 157.7 KB\n" 656 | ] 657 | } 658 | ], 659 | "source": [ 660 | "df_x.info()" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "execution_count": 13, 666 | "metadata": {}, 667 | "outputs": [ 668 | { 669 | "name": "stdout", 670 | "output_type": "stream", 671 | "text": [ 672 | "\n", 673 | "RangeIndex: 182 entries, 0 to 181\n", 674 | "Data columns (total 14 columns):\n", 675 | "Date 182 non-null datetime64[ns]\n", 676 | "PM2.5 182 non-null float64\n", 677 | "PM10 182 non-null float64\n", 678 | "NO2 182 non-null float64\n", 679 | "NOx 182 non-null float64\n", 680 | "CO 182 non-null float64\n", 681 | "SO2 182 non-null float64\n", 682 | "O3 182 non-null float64\n", 683 | "temp 182 non-null float64\n", 684 | "max_temp 182 non-null float64\n", 685 | "min_temp 182 non-null float64\n", 686 | "humid 182 non-null int64\n", 687 | "visible 182 non-null float64\n", 688 | "wind 182 non-null float64\n", 689 | "dtypes: datetime64[ns](1), float64(12), int64(1)\n", 690 | "memory usage: 20.0 KB\n" 691 | ] 692 | } 693 | ], 694 | "source": [ 695 | "df_x1.info()" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 14, 701 | "metadata": {}, 702 | "outputs": [ 703 | { 704 | "data": { 705 | "text/plain": [ 706 | "" 707 | ] 708 | }, 709 | "execution_count": 14, 710 | "metadata": {}, 711 | "output_type": "execute_result" 712 | }, 713 | { 714 | "data": { 715 | "image/png": "\n", 716 | "text/plain": [ 717 | "
" 718 | ] 719 | }, 720 | "metadata": { 721 | "needs_background": "light" 722 | }, 723 | "output_type": "display_data" 724 | } 725 | ], 726 | "source": [ 727 | "sns.heatmap(df_x.isnull(),cbar=False, yticklabels=False)" 728 | ] 729 | }, 730 | { 731 | "cell_type": "code", 732 | "execution_count": 15, 733 | "metadata": {}, 734 | "outputs": [ 735 | { 736 | "data": { 737 | "text/plain": [ 738 | "" 739 | ] 740 | }, 741 | "execution_count": 15, 742 | "metadata": {}, 743 | "output_type": "execute_result" 744 | }, 745 | { 746 | "data": { 747 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAEjCAYAAACRoNIGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAXOElEQVR4nO3de7iu9ZzH8fd3V1tNB+yUUpddCU0qoRxjVBimSDETGkxHh04yDBdGQo6lyCmHHKYRplGKSpecSu0OVHsXJSTKKUyEivSdP373s/ezntZee7fu3/Pbz67367rW1Vr30/7e9177WZ91/453ZCaSpDbmrOgLkKR7EkNXkhoydCWpIUNXkhoydCWpIUNXkhpadcYX527kfDJJuotu/8sNsbTXvNOVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqyNCVpIYMXUlqKTOrfQAH1KxnfeuvLPVX5mu3ftv6te90D6hcz/rWX1nqr8zXbv2G9e1ekKSGDF1Jaqh26H6kcj3rW39lqb8yX7v1G9aPrpNYktSA3QuS1JChK0kNGbqS1NCqK/oCNNkiYi0gM/NPK/paNB4RMRfYAkjg6sz8ywq+pLu1KgNpETEfeHBmfjUi1gBWzcybe9bcJjMXdp+vBrwGeDRwBfDWzPxzheveAtgN2Ijyhvs5cFpmfr9C7VWA/YCNgbMy89tDr70hM9/a9xxdrX0z8+Mj531DZh7Rs+7LgdcCawIB3Ay8MzM/2KfuUP0tKN/3CzPzj0PHn56ZZ1WoPwcgM+/oQmUr4CeZ+bu+tUfOswHlfZnAxZn5y0p1NwPeCzwOuAO4ADgsM39co/7QeXYBPgz8iPLvvCnwksw8s0fN0ynfj2ll5rNmW3voHAHsBWyWmW+OiAcCG2TmRT3rLmLma9+mT32o0L0QEfsDJwPHd4c2Bk7tWxf45NDn7wA2B44G1qC8SXqJiNcAn6W80S4CLu4+PykiXtu3PuX78Q/Ab4H3RcR7hl7bo0L9gZ0j4oyI2DAitgIWAGv3KRgRbwB2BZ6cmetm5jxgR+AZ3Wu9RMQhwBeBg4ErImK3oZffVqH+s4FfADd0tc8FjgIWRsQz+9YfOs9+lPfOHsBzgQURsU+l8p8BPg9sADwA+B/gpEq1hx0N7JiZT87Mf6D8Ox/Ts+ZRXd1rgVuAj3Yff6TcNNXwQcovpOd3X98MfKBC3V2BZwJndR97dR9nUHKuvwprki8D5gKXDh1bVKHupSPnWK37PICFFer/YFBz5Phc4JoK9RcOfb4qZZ7fF4B7Df/danwAewK/AX4KPKFCvauB1ac5vgbwgwr1FwFrdZ9vAlwCHDr6797nvUMJq02BPwAP7Y7PBy6p+H2/Glh36Ot1Kc3zGrUvnObYgprvm67mt0a+jtFjtWov7dgsa3939P0CXF7x+/Lt5Tk2m48afbq3ZeZfyt0+RMSqzHB7fhfcOyJ2p9yN3ysz/wqlczEiatS/g3IHcd3I8Q271/qaO/gkM28HDoiINwJfA9aqUB+AiHgwcCjwv8DfAy+MiEuzZ/dLZt46zbFbIqLG92aV7LoUMvMnEfFk4OSumyoq1Ce7Zn5E/DQzr+6OXTfodqjkesod1sDNwM8q1f561+L6LOXnaU/gyxExDyDrdZNcGRFnUO6qE/hn4OKI2KM7zxd61F4vIjbLrkskIjYF1ut7wZ2/dl1p2dVejzo/twNrRsQOmXleV//xlK623mqE7jcj4nXAGhHxVODlwOk16gKDvp8FEXH/zPxV14f2mwr1XwGcExHXsOQH5YGUboyDKtS/ZLR/Mkvf08+BD1WoP3A6cFCW/vQAXknpKnlYj5rXR8TOmXnO8MGI2JnSbO/rlxGxbWZeBpCZf4yIXYETgK0r1Cci5mTmHcA+Q8dWYeiXYQU3ABdGxBcpP/y7ARdFxCsBMvM9M/3hZdiz++9LRo7v051rsx61h60O/IrSFQZwIzCP0sROSutstg4DvhERg37oTbjz32e23gecAqwfEUdSund6d30N2Rc4ISLu3X19E0PvpT56D6R1dw77Ak+j3KV8JTM/WuHaxq679kdTBnSCcudycWb+bYVe2F0QEetk5h9Gjj04M6/pUfNhlD7X84DvUH74tgeeAOyWmVf2uGQiYmPg9pxm0CkinpBDg46zrL89pQsDyi/RpAwU3R94Ymae2Kf+0HkOn+n17DmYeXcQEfeizIwAuCozb6tYewtgZ8rP7jlZYQB8mnOsQ8nJ31erWSF0D83M9y7rWE0RscF0P7AV66+VQyPqPeqsDxxIuetM4HvABzPzV31rj7N+RGxO6RN9SFc7gCuBa4AbMvNHfeoPnWdHhq49M79eqe5qwJGUO5PrKF1UG1MGZ1836KqaZN1d+S6Uu8PFLdKed8/TnWdTyoDm6HlmPcMgInbKzK8NuihG9emyGHSvLE2tbpful8VzuPP35c29a1cI3e9m5iNHjl2amY/oVXjmc345M3cZY/2fZuYDe9Z4AmUE+pOUu8UAHgm8GNirwt3c2OpHxJco4bRw5Ph2wOGZ2WsGQERsRGm23srUa18D2D0zb+hZ/xjKDI7Dspu62N2xHAX8OTNf0af+0Hm2A15PGaAb/sHsPa2o62e9lXLHvrivsvbdc0RcDnx8mvN8s0fNIzLz8Ij4xDQvZ2bOupkeEddSfklP1/efmVml2yUizgJ+T3l/Lm75ZubRvWvPNnQj4vnAC4AdKFNyBtYG/paZT+l7ceM06Heb7iXg9VmmSfWpvwB4WWZeOnJ8W+D4zHzMpNaPiCsyc6ulvLYoM3v1u0bEKcAXM/OTI8dfBDwnM3eb9g8uf/1rgIfkyJu7u3u8KjMf3Kf+UL2rgVdz58AaHZydTe2FNcJ7Oc5zYd/34t3RTD8DffUZSDufMqhyP8qcvIGbgYXT/onJ8jbg3cDt07xWY4R7ndFABMjMyyKi1zzaBvVXn+G1NXrWBtgyM3cfPZiZn46I11eon6OB2x38W6WZLwM3ZuZpFesNOzMinpaZZ4+p/sB7u77ps4HF/a2Z+d2+hSPiPsCLuHMT/ZC+tbv6e1Bu+hI4NzNrrA8YOD8its7MRcv+X++aWYdu99v8OsoE5eoiYmvKhOqNgDOB12Tm/3WvXZSZj+55iu8Cp2bmd6Y59349a3dl4r6Dax46OI86oT7O+hdHxP6jA6IRsS+ludXXKtMd7AY2p33tLvpeRLwoMz89Uv9fgasq1B84PCI+BpzD1MDqM+I/sAA4pfue/JXSAsvMXKdC7WFbAy8EdmLJ3Xp2X/d1BuXvMaUlUENEfJAySDpYMPLSiHhqZh5Y6RQ7AP/WdWfcxpLvf/+uowp9uo8FjqPMEZ1L+aH5U983R0ScB7yV8o+2H7A38KzM/FGNPuOIeCjwu8y8cZrX7l9hMOoAYH/gVZSAB3gU8E7ghMw8fml/dkXXj4j7U6bj/IUlIbsd5d93976DmBFxLGXO4yuy29MhItakrIS6te+d0FCf8S1MnX1Rpc946DwnUkbmr2QosPr0WQ7V/jHwbMpCo7Fteh0RVwHb5Bj2W5huvKdi7SuBrQbfm+6X06LM7DNVcrj+/OmO1+g6qrFy4xLKb5xLKYG7N3BkhbqXjXy9I2X0/LF0q1Em/YOypPBblKXAv+0+f+ZKVH9Hysj2wcBOFeuuRuna+Q0lFC+hzA89Cphb8Tw7ddd+CLDzGP59e6+8nKH2V4A546o/dJ7PAeuPqfZhlBuDDSlzf+cB8yrV/gIwf+jr+cBJFequ0/133nQfNa69xp3uJZm53XDHf0Scn5mP71n3cuBJOTQ/LiK2oay8mpeZ6/asP2NfXFbYlEPT6+bRXk+ZcL45Jdx3pTT935SVN6UZl4j4KHBMZn5vDLU/SVkAcSZTuy5qTxn7BrANZUHN8HlqbEpzIGXq3k0sWaWa2WOGQSzZTOfelNbLRd3XjwHOz54D+BHxpczcdSmzJHpd+0CNFWl/jrKL02UR8S7K4FqN5XLvpHRZLBgcyMyFUVZF/WeF+o+jrEQ7CbiQSstPB6Is+V2azMy3THL9MTseeEqWZcX3pexmdjCwLWWPiueuyIu7C3YAXjyOfj/KZjHXUrp0aq6iGzXjAo+eXglsnpk1VpAOHFWx1p1k5q7dp+dRWo7nZmbNcYAqd7rzKcsI51KaE/emTND/Yf/LG59u+tBTKbsUbQN8mdI86bXaaqj+v09zeE3K6r11M7PX/gvjrj9OEXF5Zj68+/wDlFkAb+q+viwzt12R17e8xtrvt+Qca+aY9zKOqVuz/h1lb4xeW7N2dU8DnpcVtmFtLSJ2ovxSfSKlxXEpJYB7L/qqtZ/uegA5zaBUj5rNmv/d6pPnU/oZ35yZx9Wq3dVfm7Ipzb6UjUWOzsxfryz1a4uIK4BtM/P2biDngMz81uC1HNP8yHGIiB0ogfWJ7udgrcy8tkLdx1EWLayVmQ+MiIdT9rl9ed/aI+fZHziA0mX3oCgbKH04M3euUPsUyorDrzO162LWA6URcV5m7hARNzN1Y63qszu6G7PtKd1fLwVuycwtZv5Tyzbr7oWICErT5CDKX3hORNwOHJcVlsox5uY/LA7bXSiBuwllE40a030G9edRmlh7AZ8CHpkjU7wmuf4YnUTZKOk3lBkG5wKD5cfV1riPWze/dTvgocAnKAOEJ1L2qOjrWOAfgdMAMvPyiHhShbqjDqTsP3Jhd55roiwvr+FU6uytvVhm7tD9t8Zc96WKiHMoLccLKO/P7WvdyPTp030F5c21/eA3e5Td7j8UEYdlZt+NkDdgSfP/BdRv/n+K8jSBM4EjMrPW5sqD+u+mbG79EWDrrLCXQ8v645SZR3Zv6g2Bs3NJc2sOpW93ZbE78Ai6KXuZ+fNKC1/o6v0sYsq9xjg2YhrX1qxk5qdq1JlORDwIuD4zb4uyNeg2wKcz86ZKp1hImYK5FeVG4KaIuCAzb+lbuM8y4EuBp452kndNrLOz4t4L42j+R9kXdrivbPCNqNJM6erfRlnxVr0ZNO76WrbBIp3BfNRurvEFNQbSIuJk4D3A+ynTJA8BtsvM5/WtPXKed1FmF7yI8gvv5ZTNh3qvDByaATBFjRkAEXEZpZWxCWV63WmUzer/qW/tkfOsRZkG+yrK44Du1bdmnzvd1aYblczMG6Ps8tTbOJv/mTnWJyGv7PW1XD4fEccD9+n6RvehrKKs4aWUZ6RtRJledzYlEGt7LWUsYBFlr9szst7WrNsNfb46ZYP0XnuaDLmjGxPYHTg2M4/rbgSriIiDKINoj6KsvD2BqXvMzFqf0J1pBUvv1S0Nmv+rU97Ym1OaEidkecKDtLzWozw36w+Uft03ArU2enpoZu41fCDKznK9dqebxsHdiPzioI1KW7Nm5m9HDh3brTSdabrj8vprlE23XkzZcB1Kn3ota1BaGt+pnQt9uhf+xtTm+eKXKM/X6vUNaND8/xxlTfu5wDOA6zLz0D41dc8y3TLXqLQ72FJqV19Wu5TzVNmaNSKG686h3Pm+bDBdsGftLSk3TRdk5klR9gXeMzPf0bf2uPXZ8KbGxiQz1R9383nL7LYojIiPU1a2SMsUES+jNPU3i4jhHfXWpuedaDdV7PGU54sNbz+6DnU2AxqcZzBAvenI9My1KUvKaziaJTdLtwM/oXQx9NatAjxk6OtrKU8Nn3g1VqSNRYPm/+KnB3R9QxVL627uM5Rur7dT+kQHbs7+S5jnUh5cuiolAAf+QN2Vei22Zn0Gd376wvOAWU8pjYjPZ+a/RMQiph9AHvsexH1VWRwxDuNu/o90jwSlD+fPOPqvCRAR82da2RYRx2Xm2KfXddOkZrV9a5SnL9xEmVJX5ekLEbFhZv6iawVcxMjTl2uuBhyXib3TZczN/3F3j0h9LEd41FiAsTxm2tB+WTbOzKdXuxIgMwdPo16bsofH7yiPqT85Kz17cNwmedrRlOb/irwQ6R6sT1P4/CgPI6guM4/IsnfugcADKCscvzqOc9U2yXe6D4+IwaPFA1ij+9rmvzTBhvpbVwX2jrIhe+1d2AZ+DfySMvhXa/nyWE1s6Nr8l2bUauR3NufZddn/Sz/dDJI9WTJXev8cw77G4zCxoSvdk0XE6pl568ix+w2tAu29eGE5vfCu/oFGg1nzKY97uqzBuaqa2NkL0j1Z10TfPzMXdF8/B3h7Zj6k8nn2oDwwYH3KXa3dd2Nm6EoTqBuAOgH4BmWgaF1gv8y8vvJ5fkh5rt73a9bV0hm60oSKiGcD/0VZsPCkHMPTWCLi25nZavqZsE9Xmkjd3PQHUfaJfQhwekS8PzM/UPlUl3QLkU5l6tMdqm3mr6kMXWkyXUHpTkjg2oh4LGXXq9rWoazEfNrQsaTiE1Q0ld0LktSQd7rSBOoeEPl2YEuGluLWeOpCV/8/MvNdEXEc0z/dYdYPj9TMDF1pMn2C8uDXYyhPo92bugsiBrMVLqHSM9G0fOxekCZQRHwnMx8VEYuGNn46NzOfWPk82wOvY+r2iyvFFokrK+90pcl0a0TMAa7pntd1A+PZW+BE4NWUZ6TdMYb6GuGdrjSBujvQ7wP3Ad5CmWXwrsy8sPJ5zsvMHWrW1MwMXWkCRcR2wOspewwMnjdYvdkfETtTnrZ9Ds7TbcLuBWky/Tdtmv17A1tQgn1wHufpjpGhK02mGzPztGX/b709fDBQpzYMXWkyHR4RH2P8zf4FEbHlyrIX7d2BfbrSBIqIEynN/isZavZn5j6Vz/N9yh4P1zK+pztoiKErTaDh+bljPs/86Y6vDE/VXVnZvSBNpibNfsO1Pe90pQlks//uy9CVJpDN/rsvQ1eSGpqzoi9Aku5JDF1JasjQlaSGDF1JasjQlaSG/h+WB7E69hZlGwAAAABJRU5ErkJggg==\n", 748 | "text/plain": [ 749 | "
" 750 | ] 751 | }, 752 | "metadata": { 753 | "needs_background": "light" 754 | }, 755 | "output_type": "display_data" 756 | } 757 | ], 758 | "source": [ 759 | "sns.heatmap(df_x1.isnull(),cbar=False, yticklabels=False)" 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": 16, 765 | "metadata": {}, 766 | "outputs": [], 767 | "source": [ 768 | "x_train = df_x.drop(['Date'], axis = 1)\n", 769 | "y_train = df_y[\"AQI\"]\n", 770 | "x_test = df_x1.drop(['Date'], axis = 1)\n", 771 | "y_test = df_y1[\"AQI\"]" 772 | ] 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": 17, 777 | "metadata": {}, 778 | "outputs": [ 779 | { 780 | "data": { 781 | "text/html": [ 782 | "
\n", 783 | "\n", 796 | "\n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | "
PM2.5PM10NO2NOxCOSO2O3tempmax_tempmin_temphumidvisiblewind
056.4095.0851.7334.310.697.4455.9620.128.415.754.06.03.0
153.6993.9273.0954.770.768.6734.0620.828.312.945.06.31.1
262.3599.3477.7755.600.969.1047.6221.429.013.745.06.30.4
364.96104.6277.0750.810.898.8750.6221.529.015.247.06.92.2
470.15105.1279.4356.050.859.4136.9420.928.414.751.06.31.1
\n", 898 | "
" 899 | ], 900 | "text/plain": [ 901 | " PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp min_temp \\\n", 902 | "0 56.40 95.08 51.73 34.31 0.69 7.44 55.96 20.1 28.4 15.7 \n", 903 | "1 53.69 93.92 73.09 54.77 0.76 8.67 34.06 20.8 28.3 12.9 \n", 904 | "2 62.35 99.34 77.77 55.60 0.96 9.10 47.62 21.4 29.0 13.7 \n", 905 | "3 64.96 104.62 77.07 50.81 0.89 8.87 50.62 21.5 29.0 15.2 \n", 906 | "4 70.15 105.12 79.43 56.05 0.85 9.41 36.94 20.9 28.4 14.7 \n", 907 | "\n", 908 | " humid visible wind \n", 909 | "0 54.0 6.0 3.0 \n", 910 | "1 45.0 6.3 1.1 \n", 911 | "2 45.0 6.3 0.4 \n", 912 | "3 47.0 6.9 2.2 \n", 913 | "4 51.0 6.3 1.1 " 914 | ] 915 | }, 916 | "execution_count": 17, 917 | "metadata": {}, 918 | "output_type": "execute_result" 919 | } 920 | ], 921 | "source": [ 922 | "x_train.head(5)" 923 | ] 924 | }, 925 | { 926 | "cell_type": "code", 927 | "execution_count": 18, 928 | "metadata": {}, 929 | "outputs": [ 930 | { 931 | "data": { 932 | "text/html": [ 933 | "
\n", 934 | "\n", 947 | "\n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | "
PM2.5PM10NO2NOxCOSO2O3tempmax_tempmin_temphumidvisiblewind
030.7170.8027.3126.741.078.5637.3922.027.219.2756.93.3
125.7862.7328.0530.181.167.5233.1122.226.019.4775.52.0
229.6669.6226.2626.920.996.4032.9823.428.419.4686.93.5
352.62106.8131.5641.891.396.9437.3523.630.519.0636.31.3
452.6497.9429.1029.521.017.0145.1024.230.520.4636.31.3
\n", 1049 | "
" 1050 | ], 1051 | "text/plain": [ 1052 | " PM2.5 PM10 NO2 NOx CO SO2 O3 temp max_temp min_temp \\\n", 1053 | "0 30.71 70.80 27.31 26.74 1.07 8.56 37.39 22.0 27.2 19.2 \n", 1054 | "1 25.78 62.73 28.05 30.18 1.16 7.52 33.11 22.2 26.0 19.4 \n", 1055 | "2 29.66 69.62 26.26 26.92 0.99 6.40 32.98 23.4 28.4 19.4 \n", 1056 | "3 52.62 106.81 31.56 41.89 1.39 6.94 37.35 23.6 30.5 19.0 \n", 1057 | "4 52.64 97.94 29.10 29.52 1.01 7.01 45.10 24.2 30.5 20.4 \n", 1058 | "\n", 1059 | " humid visible wind \n", 1060 | "0 75 6.9 3.3 \n", 1061 | "1 77 5.5 2.0 \n", 1062 | "2 68 6.9 3.5 \n", 1063 | "3 63 6.3 1.3 \n", 1064 | "4 63 6.3 1.3 " 1065 | ] 1066 | }, 1067 | "execution_count": 18, 1068 | "metadata": {}, 1069 | "output_type": "execute_result" 1070 | } 1071 | ], 1072 | "source": [ 1073 | "x_test.head(5)" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "code", 1078 | "execution_count": 19, 1079 | "metadata": {}, 1080 | "outputs": [ 1081 | { 1082 | "data": { 1083 | "text/plain": [ 1084 | "0 101.0\n", 1085 | "1 92.0\n", 1086 | "2 108.0\n", 1087 | "3 109.0\n", 1088 | "4 114.0\n", 1089 | "Name: AQI, dtype: float64" 1090 | ] 1091 | }, 1092 | "execution_count": 19, 1093 | "metadata": {}, 1094 | "output_type": "execute_result" 1095 | } 1096 | ], 1097 | "source": [ 1098 | "y_train.head(5)" 1099 | ] 1100 | }, 1101 | { 1102 | "cell_type": "code", 1103 | "execution_count": 20, 1104 | "metadata": {}, 1105 | "outputs": [ 1106 | { 1107 | "data": { 1108 | "text/plain": [ 1109 | "0 82.0\n", 1110 | "1 81.0\n", 1111 | "2 85.0\n", 1112 | "3 95.0\n", 1113 | "4 118.0\n", 1114 | "Name: AQI, dtype: float64" 1115 | ] 1116 | }, 1117 | "execution_count": 20, 1118 | "metadata": {}, 1119 | "output_type": "execute_result" 1120 | } 1121 | ], 1122 | "source": [ 1123 | "y_test.head(5)" 1124 | ] 1125 | }, 1126 | { 1127 | "cell_type": "code", 1128 | "execution_count": 21, 1129 | "metadata": {}, 1130 | "outputs": [], 1131 | "source": [ 1132 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", 1133 | "sc_x = StandardScaler()\n", 1134 | "sc_y = StandardScaler()\n", 1135 | "x_train = sc_x.fit_transform(x_train)\n", 1136 | "x_test = sc_x.transform(x_test)\n", 1137 | "y_train = sc_y.fit_transform(np.array(y_train).reshape(-1,1))\n", 1138 | "y_test = sc_y.transform(np.array(y_test).reshape(-1,1))" 1139 | ] 1140 | }, 1141 | { 1142 | "cell_type": "code", 1143 | "execution_count": 22, 1144 | "metadata": {}, 1145 | "outputs": [ 1146 | { 1147 | "data": { 1148 | "text/plain": [ 1149 | "array([[-0.27610501],\n", 1150 | " [-0.30227705],\n", 1151 | " [-0.1975889 ],\n", 1152 | " [ 0.06413148],\n", 1153 | " [ 0.66608835]])" 1154 | ] 1155 | }, 1156 | "execution_count": 22, 1157 | "metadata": {}, 1158 | "output_type": "execute_result" 1159 | } 1160 | ], 1161 | "source": [ 1162 | "y_test[:5]" 1163 | ] 1164 | }, 1165 | { 1166 | "cell_type": "code", 1167 | "execution_count": 23, 1168 | "metadata": {}, 1169 | "outputs": [ 1170 | { 1171 | "data": { 1172 | "text/plain": [ 1173 | "SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,\n", 1174 | " gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,\n", 1175 | " tol=0.001, verbose=False)" 1176 | ] 1177 | }, 1178 | "execution_count": 23, 1179 | "metadata": {}, 1180 | "output_type": "execute_result" 1181 | } 1182 | ], 1183 | "source": [ 1184 | "from sklearn.svm import SVR\n", 1185 | "regressor = SVR(kernel = 'rbf')\n", 1186 | "regressor.fit(x_train,y_train)" 1187 | ] 1188 | }, 1189 | { 1190 | "cell_type": "code", 1191 | "execution_count": 24, 1192 | "metadata": {}, 1193 | "outputs": [ 1194 | { 1195 | "data": { 1196 | "text/plain": [ 1197 | "array([-0.42595944, -0.32311139, -0.37435607, 0.56882821, 0.35833741,\n", 1198 | " -0.4289785 , -0.57286549, -0.26249298, 0.01577009, -0.11099087,\n", 1199 | " -0.32003543, -0.21896574, 0.40781822, 0.70554794, 0.22467137,\n", 1200 | " 0.26272955, 0.05300658, -0.43237994, -0.50516794, -0.15245204,\n", 1201 | " -0.16191589, -0.16050723, -0.09468641, 0.01183634, 0.24495599,\n", 1202 | " 0.43675849, 0.50363451, 0.22383874, 0.36682982, 0.68494588,\n", 1203 | " 0.37600183, 0.16605308, -0.30409398, -0.25732645, -0.17879819,\n", 1204 | " -0.28231346, 0.22057641, 0.25122338, 0.13562446, 0.16869742,\n", 1205 | " -0.27649917, -0.19226034, -0.34176244, -0.07255308, 0.79432799,\n", 1206 | " 0.99324748, 0.03093437, 0.16210712, 0.31583795, 0.47365842,\n", 1207 | " 0.11191249, -0.22742264, -0.26317195, 0.01138717, -0.16583801,\n", 1208 | " 0.34501398, 0.20441671, 0.14925122, 0.16647227, 0.23584846,\n", 1209 | " 0.44561196, 0.74555276, 1.77403237, 0.98925211, 0.6941801 ,\n", 1210 | " 0.04527876, 0.08692571, -0.15628333, -0.03071387, 0.15375035,\n", 1211 | " 0.03870278, 0.00487757, -0.18599819, -0.30738972, -0.12022113,\n", 1212 | " -0.37615108, -0.56692846, -0.31514682, 0.19595979, 0.30663164,\n", 1213 | " 0.4771285 , -0.28227281, -0.05911738, 0.04322423, -0.68263281,\n", 1214 | " -0.73819769, -0.50290478, -0.46696796, -0.38069815, -0.36181173,\n", 1215 | " -0.20645508, -0.36911442, -0.27281806, -0.22542369, -0.37897064,\n", 1216 | " -0.38816626, -0.72536073, -0.98441423, -0.7442325 , -0.71863761,\n", 1217 | " -0.72144794, -0.74704061, -0.58227917, -0.50226986, -0.33404258,\n", 1218 | " -0.48810957, -0.6112883 , -0.42955452, -0.65229843, -0.87307501,\n", 1219 | " -1.00287225, -0.82285002, -0.25709449, -0.75710873, -0.60362084,\n", 1220 | " -0.26031897, -0.91369875, -0.95760632, -0.81297347, -0.14054648,\n", 1221 | " -0.7561288 , -0.61221255, -0.69518032, -0.60207119, -0.37488619,\n", 1222 | " -0.20918184, -0.54138463, -0.49764821, -0.84623359, -0.73307276,\n", 1223 | " -0.60311798, -0.65324264, -0.86782537, -0.94903969, -0.59868767,\n", 1224 | " -0.80292309, -0.57618004, -0.85284508, -0.63479565, -0.51516231,\n", 1225 | " -0.51806576, -0.91674212, -0.68377782, -0.57208282, -0.84884395,\n", 1226 | " -0.80278251, -0.75762862, -0.58761396, -0.79499592, -0.81003283,\n", 1227 | " -0.87723899, -0.83168805, -1.13971526, -1.3723071 , -1.24599784,\n", 1228 | " -1.21094141, -0.90016809, -1.01965268, -1.02627238, -1.06362168,\n", 1229 | " -1.0106033 , -0.74411236, -0.97876786, -1.04877468, -0.96290783,\n", 1230 | " -1.11445661, -1.01896601, -0.86198109, -0.94589339, -0.78238535,\n", 1231 | " -0.85986483, -0.96586506, -1.06933763, -0.99446313, -0.97199095,\n", 1232 | " -0.98809041, -0.86366581, -0.9923654 , -1.1113921 , -1.06807415,\n", 1233 | " -0.08591312, -1.22178744])" 1234 | ] 1235 | }, 1236 | "execution_count": 24, 1237 | "metadata": {}, 1238 | "output_type": "execute_result" 1239 | } 1240 | ], 1241 | "source": [ 1242 | "y_prediction = regressor.predict(x_test)\n", 1243 | "y_prediction" 1244 | ] 1245 | }, 1246 | { 1247 | "cell_type": "code", 1248 | "execution_count": 25, 1249 | "metadata": {}, 1250 | "outputs": [ 1251 | { 1252 | "name": "stdout", 1253 | "output_type": "stream", 1254 | "text": [ 1255 | "R2 SCORE is 0.8063001007104242\n", 1256 | "mean_sqrd_error is 0.04933772059187857\n", 1257 | "Root mean squared error of is 0.22212095937096654\n", 1258 | "Mean Absolute error is 0.16675010409400015\n" 1259 | ] 1260 | } 1261 | ], 1262 | "source": [ 1263 | "from sklearn.metrics import r2_score\n", 1264 | "from sklearn.metrics import mean_squared_error\n", 1265 | "from sklearn.metrics import mean_absolute_error\n", 1266 | "score = r2_score(y_test,y_prediction)\n", 1267 | "mean_error = mean_squared_error(y_test,y_prediction)\n", 1268 | "mae = mean_absolute_error(y_test,y_prediction)\n", 1269 | "print(\"R2 SCORE is\", score)\n", 1270 | "print(\"mean_sqrd_error is \", mean_error)\n", 1271 | "print(\"Root mean squared error of is\",np.sqrt(mean_error))\n", 1272 | "print(\"Mean Absolute error is\", mae)" 1273 | ] 1274 | }, 1275 | { 1276 | "cell_type": "code", 1277 | "execution_count": 26, 1278 | "metadata": {}, 1279 | "outputs": [ 1280 | { 1281 | "data": { 1282 | "text/plain": [ 1283 | "array([[ 82.],\n", 1284 | " [ 81.],\n", 1285 | " [ 85.],\n", 1286 | " [ 95.],\n", 1287 | " [118.]])" 1288 | ] 1289 | }, 1290 | "execution_count": 26, 1291 | "metadata": {}, 1292 | "output_type": "execute_result" 1293 | } 1294 | ], 1295 | "source": [ 1296 | "y_test = sc_y.inverse_transform(y_test)\n", 1297 | "y_test[:5]" 1298 | ] 1299 | }, 1300 | { 1301 | "cell_type": "code", 1302 | "execution_count": 27, 1303 | "metadata": {}, 1304 | "outputs": [ 1305 | { 1306 | "data": { 1307 | "text/plain": [ 1308 | "array([ 76.27425416, 80.20394657, 78.24595278, 114.28381494,\n", 1309 | " 106.24123124])" 1310 | ] 1311 | }, 1312 | "execution_count": 27, 1313 | "metadata": {}, 1314 | "output_type": "execute_result" 1315 | } 1316 | ], 1317 | "source": [ 1318 | "y_prediction = sc_y.inverse_transform(y_prediction)\n", 1319 | "y_prediction[:5]" 1320 | ] 1321 | }, 1322 | { 1323 | "cell_type": "code", 1324 | "execution_count": 28, 1325 | "metadata": {}, 1326 | "outputs": [ 1327 | { 1328 | "data": { 1329 | "image/png": "\n", 1330 | "text/plain": [ 1331 | "
" 1332 | ] 1333 | }, 1334 | "metadata": { 1335 | "needs_background": "light" 1336 | }, 1337 | "output_type": "display_data" 1338 | } 1339 | ], 1340 | "source": [ 1341 | "df1 = pd.DataFrame({'Actual': y_test[100:120].flatten(), 'Predicted': y_prediction[100:120].flatten()})\n", 1342 | "df1.plot(kind='line')\n", 1343 | "plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')\n", 1344 | "plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')\n", 1345 | "plt.show()" 1346 | ] 1347 | }, 1348 | { 1349 | "cell_type": "code", 1350 | "execution_count": 29, 1351 | "metadata": {}, 1352 | "outputs": [ 1353 | { 1354 | "data": { 1355 | "text/html": [ 1356 | "
\n", 1357 | "\n", 1370 | "\n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | "
ACTUAL AQIPREDICTED AQIDIFFERENCEPERCENTAGE ERROR
082.076.2742545.7257466.982617
181.080.2039470.7960530.982782
285.078.2459536.7540477.945938
395.0114.283815-19.28381520.298753
4118.0106.24123111.7587699.965058
581.076.1589004.8411005.976667
675.070.6611634.3388375.785116
793.082.52009810.47990211.268712
8101.093.1521737.8478277.770126
994.088.3087995.6912016.054469
1078.080.321475-2.3214752.976250
1179.084.183218-5.1832186.561036
12106.0108.131829-2.1318292.011160
13121.0119.5077001.4923001.233306
14111.0101.1340239.8659778.888267
\n", 1488 | "
" 1489 | ], 1490 | "text/plain": [ 1491 | " ACTUAL AQI PREDICTED AQI DIFFERENCE PERCENTAGE ERROR\n", 1492 | "0 82.0 76.274254 5.725746 6.982617\n", 1493 | "1 81.0 80.203947 0.796053 0.982782\n", 1494 | "2 85.0 78.245953 6.754047 7.945938\n", 1495 | "3 95.0 114.283815 -19.283815 20.298753\n", 1496 | "4 118.0 106.241231 11.758769 9.965058\n", 1497 | "5 81.0 76.158900 4.841100 5.976667\n", 1498 | "6 75.0 70.661163 4.338837 5.785116\n", 1499 | "7 93.0 82.520098 10.479902 11.268712\n", 1500 | "8 101.0 93.152173 7.847827 7.770126\n", 1501 | "9 94.0 88.308799 5.691201 6.054469\n", 1502 | "10 78.0 80.321475 -2.321475 2.976250\n", 1503 | "11 79.0 84.183218 -5.183218 6.561036\n", 1504 | "12 106.0 108.131829 -2.131829 2.011160\n", 1505 | "13 121.0 119.507700 1.492300 1.233306\n", 1506 | "14 111.0 101.134023 9.865977 8.888267" 1507 | ] 1508 | }, 1509 | "execution_count": 29, 1510 | "metadata": {}, 1511 | "output_type": "execute_result" 1512 | } 1513 | ], 1514 | "source": [ 1515 | "result = pd.DataFrame()\n", 1516 | "result[\"ACTUAL AQI\"] = y_test.flatten()\n", 1517 | "result[\"PREDICTED AQI\"] = y_prediction\n", 1518 | "result[\"DIFFERENCE\"] = result[\"ACTUAL AQI\"] - result[\"PREDICTED AQI\"]\n", 1519 | "result[\"PERCENTAGE ERROR\"] = ( abs(result[\"ACTUAL AQI\"] - result[\"PREDICTED AQI\"] ) / result[\"ACTUAL AQI\"] ) * 100\n", 1520 | "result.head(15)" 1521 | ] 1522 | }, 1523 | { 1524 | "cell_type": "code", 1525 | "execution_count": null, 1526 | "metadata": {}, 1527 | "outputs": [], 1528 | "source": [] 1529 | } 1530 | ], 1531 | "metadata": { 1532 | "kernelspec": { 1533 | "display_name": "Python 3", 1534 | "language": "python", 1535 | "name": "python3" 1536 | }, 1537 | "language_info": { 1538 | "codemirror_mode": { 1539 | "name": "ipython", 1540 | "version": 3 1541 | }, 1542 | "file_extension": ".py", 1543 | "mimetype": "text/x-python", 1544 | "name": "python", 1545 | "nbconvert_exporter": "python", 1546 | "pygments_lexer": "ipython3", 1547 | "version": "3.7.3" 1548 | } 1549 | }, 1550 | "nbformat": 4, 1551 | "nbformat_minor": 2 1552 | } 1553 | --------------------------------------------------------------------------------