├── .DS_Store ├── .gitattributes ├── README.md ├── data1 ├── .DS_Store ├── .ipynb_checkpoints │ ├── Binary-checkpoint.ipynb │ ├── Binary_final_v1-checkpoint.ipynb │ ├── Binary_kalman-checkpoint.ipynb │ ├── Logistic-checkpoint.ipynb │ ├── RUL-checkpoint.ipynb │ ├── Untitled-checkpoint.ipynb │ ├── multinomial-checkpoint.ipynb │ └── tpot-checkpoint.ipynb ├── Binary_final_v1.ipynb ├── PM_test_01.txt ├── PM_train_01.txt ├── PM_truth_01.txt ├── RUL.ipynb ├── predictive_binary_final.h5 └── predictive_regression_kalhman.h5 ├── data2 ├── .DS_Store ├── .ipynb_checkpoints │ ├── Binary_final_v1-checkpoint.ipynb │ ├── BinomialPredictiveMaintenance02-checkpoint.ipynb │ └── RUL-checkpoint.ipynb ├── Binary_final_v1.ipynb ├── PM_test_02.txt ├── PM_train_02.txt ├── PM_truth_02.txt ├── RUL.ipynb ├── predictive_binary_final.h5 └── predictive_regression_kalhman.h5 ├── data3 ├── .DS_Store ├── .ipynb_checkpoints │ ├── BinomialPredictiveMaintenance03-checkpoint.ipynb │ └── RUL-checkpoint.ipynb ├── PM_test_03.txt ├── PM_train_03.txt ├── PM_truth_03.txt └── RUL.ipynb └── data4 ├── .DS_Store ├── .ipynb_checkpoints ├── Binary_final_v1-checkpoint.ipynb ├── BinomialPredictiveMaintenance04-checkpoint.ipynb └── RUL-checkpoint.ipynb ├── Binary_final_v1.ipynb ├── PM_test_04.txt ├── PM_train_04.txt ├── PM_truth_04.txt ├── RUL.ipynb ├── predictive_binary_final.h5 └── predictive_regression_kalhman.h5 /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/.DS_Store -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RUL- Siemens MakeIt Real Hackathon 2017 2 | Remaning useful life for Nasa Turbofan Dataset, developed in Siemens MakeItReal hackathon 2017 3 | 4 | Files: 5 | data1, data2, data3 folder contains analysis, modelling and rul prediction on each dataset of increasing complexity and noise 6 | 7 | Hackathon details: https://www.hackerearth.com/sprints/makeitreal/
8 | My experience: https://soham97.github.io/posts/2017/10/Siemens-MakeITReal-Hackathon/
9 | dataset: https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/ 10 | -------------------------------------------------------------------------------- /data1/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data1/.DS_Store -------------------------------------------------------------------------------- /data1/.ipynb_checkpoints/Logistic-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "\n", 13 | "# Setting seed for reproducability\n", 14 | "np.random.seed(1234) \n", 15 | "PYTHONHASHSEED = 0\n", 16 | "from sklearn import preprocessing\n", 17 | "from sklearn.metrics import confusion_matrix, recall_score, precision_score\n", 18 | "%matplotlib inline" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# read training data \n", 28 | "train_df = pd.read_csv('PM_train_01.txt', sep=\" \", header=None)\n", 29 | "train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)\n", 30 | "train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 31 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 32 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# read test data\n", 42 | "test_df = pd.read_csv('PM_test_01.txt', sep=\" \", header=None)\n", 43 | "test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)\n", 44 | "test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 45 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 46 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# read ground truth data\n", 56 | "truth_df = pd.read_csv('PM_truth_01.txt', sep=\" \", header=None)\n", 57 | "truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 5, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/html": [ 68 | "
\n", 69 | "\n", 82 | "\n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s12s13s14s15s16s17s18s19s20s21
011-0.0007-0.0004100.0518.67641.821589.701400.6014.62...521.662388.028138.628.41950.033922388100.039.0623.4190
1120.0019-0.0003100.0518.67642.151591.821403.1414.62...522.282388.078131.498.43180.033922388100.039.0023.4236
213-0.00430.0003100.0518.67642.351587.991404.2014.62...522.422388.038133.238.41780.033902388100.038.9523.3442
3140.00070.0000100.0518.67642.351582.791401.8714.62...522.862388.088133.838.36820.033922388100.038.8823.3739
415-0.0019-0.0002100.0518.67642.371582.851406.2214.62...522.192388.048133.808.42940.033932388100.038.9023.4044
\n", 232 | "

5 rows × 26 columns

\n", 233 | "
" 234 | ], 235 | "text/plain": [ 236 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 237 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n", 238 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n", 239 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n", 240 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n", 241 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n", 242 | "\n", 243 | " s5 ... s12 s13 s14 s15 s16 s17 s18 s19 \\\n", 244 | "0 14.62 ... 521.66 2388.02 8138.62 8.4195 0.03 392 2388 100.0 \n", 245 | "1 14.62 ... 522.28 2388.07 8131.49 8.4318 0.03 392 2388 100.0 \n", 246 | "2 14.62 ... 522.42 2388.03 8133.23 8.4178 0.03 390 2388 100.0 \n", 247 | "3 14.62 ... 522.86 2388.08 8133.83 8.3682 0.03 392 2388 100.0 \n", 248 | "4 14.62 ... 522.19 2388.04 8133.80 8.4294 0.03 393 2388 100.0 \n", 249 | "\n", 250 | " s20 s21 \n", 251 | "0 39.06 23.4190 \n", 252 | "1 39.00 23.4236 \n", 253 | "2 38.95 23.3442 \n", 254 | "3 38.88 23.3739 \n", 255 | "4 38.90 23.4044 \n", 256 | "\n", 257 | "[5 rows x 26 columns]" 258 | ] 259 | }, 260 | "execution_count": 5, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "train_df = train_df.sort_values(['id','cycle'])\n", 267 | "train_df.head()" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 6, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/html": [ 278 | "
\n", 279 | "\n", 292 | "\n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s13s14s15s16s17s18s19s20s21RUL
011-0.0007-0.0004100.0518.67641.821589.701400.6014.62...2388.028138.628.41950.033922388100.039.0623.4190191
1120.0019-0.0003100.0518.67642.151591.821403.1414.62...2388.078131.498.43180.033922388100.039.0023.4236190
213-0.00430.0003100.0518.67642.351587.991404.2014.62...2388.038133.238.41780.033902388100.038.9523.3442189
3140.00070.0000100.0518.67642.351582.791401.8714.62...2388.088133.838.36820.033922388100.038.8823.3739188
415-0.0019-0.0002100.0518.67642.371582.851406.2214.62...2388.048133.808.42940.033932388100.038.9023.4044187
\n", 442 | "

5 rows × 27 columns

\n", 443 | "
" 444 | ], 445 | "text/plain": [ 446 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 447 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n", 448 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n", 449 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n", 450 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n", 451 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n", 452 | "\n", 453 | " s5 ... s13 s14 s15 s16 s17 s18 s19 s20 \\\n", 454 | "0 14.62 ... 2388.02 8138.62 8.4195 0.03 392 2388 100.0 39.06 \n", 455 | "1 14.62 ... 2388.07 8131.49 8.4318 0.03 392 2388 100.0 39.00 \n", 456 | "2 14.62 ... 2388.03 8133.23 8.4178 0.03 390 2388 100.0 38.95 \n", 457 | "3 14.62 ... 2388.08 8133.83 8.3682 0.03 392 2388 100.0 38.88 \n", 458 | "4 14.62 ... 2388.04 8133.80 8.4294 0.03 393 2388 100.0 38.90 \n", 459 | "\n", 460 | " s21 RUL \n", 461 | "0 23.4190 191 \n", 462 | "1 23.4236 190 \n", 463 | "2 23.3442 189 \n", 464 | "3 23.3739 188 \n", 465 | "4 23.4044 187 \n", 466 | "\n", 467 | "[5 rows x 27 columns]" 468 | ] 469 | }, 470 | "execution_count": 6, 471 | "metadata": {}, 472 | "output_type": "execute_result" 473 | } 474 | ], 475 | "source": [ 476 | "# Data Labeling - generate column RUL\n", 477 | "rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()\n", 478 | "rul.columns = ['id', 'max']\n", 479 | "train_df = train_df.merge(rul, on=['id'], how='left')\n", 480 | "train_df['RUL'] = train_df['max'] - train_df['cycle']\n", 481 | "train_df.drop('max', axis=1, inplace=True)\n", 482 | "train_df.head()" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 7, 488 | "metadata": {}, 489 | "outputs": [ 490 | { 491 | "data": { 492 | "text/html": [ 493 | "
\n", 494 | "\n", 507 | "\n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s15s16s17s18s19s20s21RULlabel1label2
011-0.0007-0.0004100.0518.67641.821589.701400.6014.62...8.41950.033922388100.039.0623.419019100
1120.0019-0.0003100.0518.67642.151591.821403.1414.62...8.43180.033922388100.039.0023.423619000
213-0.00430.0003100.0518.67642.351587.991404.2014.62...8.41780.033902388100.038.9523.344218900
3140.00070.0000100.0518.67642.351582.791401.8714.62...8.36820.033922388100.038.8823.373918800
415-0.0019-0.0002100.0518.67642.371582.851406.2214.62...8.42940.033932388100.038.9023.404418700
\n", 657 | "

5 rows × 29 columns

\n", 658 | "
" 659 | ], 660 | "text/plain": [ 661 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 662 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n", 663 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n", 664 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n", 665 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n", 666 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n", 667 | "\n", 668 | " s5 ... s15 s16 s17 s18 s19 s20 s21 RUL label1 \\\n", 669 | "0 14.62 ... 8.4195 0.03 392 2388 100.0 39.06 23.4190 191 0 \n", 670 | "1 14.62 ... 8.4318 0.03 392 2388 100.0 39.00 23.4236 190 0 \n", 671 | "2 14.62 ... 8.4178 0.03 390 2388 100.0 38.95 23.3442 189 0 \n", 672 | "3 14.62 ... 8.3682 0.03 392 2388 100.0 38.88 23.3739 188 0 \n", 673 | "4 14.62 ... 8.4294 0.03 393 2388 100.0 38.90 23.4044 187 0 \n", 674 | "\n", 675 | " label2 \n", 676 | "0 0 \n", 677 | "1 0 \n", 678 | "2 0 \n", 679 | "3 0 \n", 680 | "4 0 \n", 681 | "\n", 682 | "[5 rows x 29 columns]" 683 | ] 684 | }, 685 | "execution_count": 7, 686 | "metadata": {}, 687 | "output_type": "execute_result" 688 | } 689 | ], 690 | "source": [ 691 | "# generate label columns for training data\n", 692 | "w1 = 30\n", 693 | "w0 = 15\n", 694 | "train_df['label1'] = np.where(train_df['RUL'] <= w1, 1, 0 )\n", 695 | "train_df['label2'] = train_df['label1']\n", 696 | "train_df.loc[train_df['RUL'] <= w0, 'label2'] = 2\n", 697 | "train_df.head()" 698 | ] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": 8, 703 | "metadata": {}, 704 | "outputs": [ 705 | { 706 | "data": { 707 | "text/html": [ 708 | "
\n", 709 | "\n", 722 | "\n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s16s17s18s19s20s21RULlabel1label2cycle_norm
0110.4597700.1666670.00.00.1837350.4068020.3097570.0...0.00.3333330.00.00.7131780.724662191000.00000
1120.6091950.2500000.00.00.2831330.4530190.3526330.0...0.00.3333330.00.00.6666670.731014190000.00277
2130.2528740.7500000.00.00.3433730.3695230.3705270.0...0.00.1666670.00.00.6279070.621375189000.00554
3140.5402300.5000000.00.00.3433730.2561590.3311950.0...0.00.3333330.00.00.5736430.662386188000.00831
4150.3908050.3333330.00.00.3493980.2574670.4046250.0...0.00.4166670.00.00.5891470.704502187000.01108
\n", 872 | "

5 rows × 30 columns

\n", 873 | "
" 874 | ], 875 | "text/plain": [ 876 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 877 | "0 1 1 0.459770 0.166667 0.0 0.0 0.183735 0.406802 0.309757 \n", 878 | "1 1 2 0.609195 0.250000 0.0 0.0 0.283133 0.453019 0.352633 \n", 879 | "2 1 3 0.252874 0.750000 0.0 0.0 0.343373 0.369523 0.370527 \n", 880 | "3 1 4 0.540230 0.500000 0.0 0.0 0.343373 0.256159 0.331195 \n", 881 | "4 1 5 0.390805 0.333333 0.0 0.0 0.349398 0.257467 0.404625 \n", 882 | "\n", 883 | " s5 ... s16 s17 s18 s19 s20 s21 RUL label1 \\\n", 884 | "0 0.0 ... 0.0 0.333333 0.0 0.0 0.713178 0.724662 191 0 \n", 885 | "1 0.0 ... 0.0 0.333333 0.0 0.0 0.666667 0.731014 190 0 \n", 886 | "2 0.0 ... 0.0 0.166667 0.0 0.0 0.627907 0.621375 189 0 \n", 887 | "3 0.0 ... 0.0 0.333333 0.0 0.0 0.573643 0.662386 188 0 \n", 888 | "4 0.0 ... 0.0 0.416667 0.0 0.0 0.589147 0.704502 187 0 \n", 889 | "\n", 890 | " label2 cycle_norm \n", 891 | "0 0 0.00000 \n", 892 | "1 0 0.00277 \n", 893 | "2 0 0.00554 \n", 894 | "3 0 0.00831 \n", 895 | "4 0 0.01108 \n", 896 | "\n", 897 | "[5 rows x 30 columns]" 898 | ] 899 | }, 900 | "execution_count": 8, 901 | "metadata": {}, 902 | "output_type": "execute_result" 903 | } 904 | ], 905 | "source": [ 906 | "# MinMax normalization\n", 907 | "train_df['cycle_norm'] = train_df['cycle']\n", 908 | "cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])\n", 909 | "min_max_scaler = preprocessing.MinMaxScaler()\n", 910 | "norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), \n", 911 | " columns=cols_normalize, \n", 912 | " index=train_df.index)\n", 913 | "join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)\n", 914 | "train_df = join_df.reindex(columns = train_df.columns)\n", 915 | "train_df.head()" 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "execution_count": 9, 921 | "metadata": {}, 922 | "outputs": [ 923 | { 924 | "data": { 925 | "text/html": [ 926 | "
\n", 927 | "\n", 940 | "\n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s13s14s15s16s17s18s19s20s21cycle_norm
0110.6321840.7500000.00.00.5451810.3106610.2694130.0...0.2205880.1321600.3089650.00.3333330.00.00.5581400.6618340.00000
1120.3448280.2500000.00.00.1506020.3795510.2223160.0...0.2647060.2047680.2131590.00.4166670.00.00.6821710.6868270.00277
2130.5172410.5833330.00.00.3765060.3466320.3222480.0...0.2205880.1556400.4586380.00.4166670.00.00.7286820.7213480.00554
3140.7413790.5000000.00.00.3704820.2851540.4080010.0...0.2500000.1700900.2570220.00.2500000.00.00.6666670.6621100.00831
4150.5804600.5000000.00.00.3915660.3520820.3320390.0...0.2205880.1527510.3008850.00.1666670.00.00.6589150.7163770.01108
\n", 1090 | "

5 rows × 27 columns

\n", 1091 | "
" 1092 | ], 1093 | "text/plain": [ 1094 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 1095 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n", 1096 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n", 1097 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n", 1098 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n", 1099 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n", 1100 | "\n", 1101 | " s5 ... s13 s14 s15 s16 s17 s18 s19 \\\n", 1102 | "0 0.0 ... 0.220588 0.132160 0.308965 0.0 0.333333 0.0 0.0 \n", 1103 | "1 0.0 ... 0.264706 0.204768 0.213159 0.0 0.416667 0.0 0.0 \n", 1104 | "2 0.0 ... 0.220588 0.155640 0.458638 0.0 0.416667 0.0 0.0 \n", 1105 | "3 0.0 ... 0.250000 0.170090 0.257022 0.0 0.250000 0.0 0.0 \n", 1106 | "4 0.0 ... 0.220588 0.152751 0.300885 0.0 0.166667 0.0 0.0 \n", 1107 | "\n", 1108 | " s20 s21 cycle_norm \n", 1109 | "0 0.558140 0.661834 0.00000 \n", 1110 | "1 0.682171 0.686827 0.00277 \n", 1111 | "2 0.728682 0.721348 0.00554 \n", 1112 | "3 0.666667 0.662110 0.00831 \n", 1113 | "4 0.658915 0.716377 0.01108 \n", 1114 | "\n", 1115 | "[5 rows x 27 columns]" 1116 | ] 1117 | }, 1118 | "execution_count": 9, 1119 | "metadata": {}, 1120 | "output_type": "execute_result" 1121 | } 1122 | ], 1123 | "source": [ 1124 | "test_df['cycle_norm'] = test_df['cycle']\n", 1125 | "norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), \n", 1126 | " columns=cols_normalize, \n", 1127 | " index=test_df.index)\n", 1128 | "test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)\n", 1129 | "test_df = test_join_df.reindex(columns = test_df.columns)\n", 1130 | "test_df = test_df.reset_index(drop=True)\n", 1131 | "test_df.head()" 1132 | ] 1133 | }, 1134 | { 1135 | "cell_type": "code", 1136 | "execution_count": 10, 1137 | "metadata": {}, 1138 | "outputs": [], 1139 | "source": [ 1140 | "# generate column max for test data\n", 1141 | "rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()\n", 1142 | "rul.columns = ['id', 'max']\n", 1143 | "truth_df.columns = ['more']\n", 1144 | "truth_df['id'] = truth_df.index + 1\n", 1145 | "truth_df['max'] = rul['max'] + truth_df['more']\n", 1146 | "truth_df.drop('more', axis=1, inplace=True)" 1147 | ] 1148 | }, 1149 | { 1150 | "cell_type": "code", 1151 | "execution_count": 11, 1152 | "metadata": {}, 1153 | "outputs": [ 1154 | { 1155 | "data": { 1156 | "text/html": [ 1157 | "
\n", 1158 | "\n", 1171 | "\n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s14s15s16s17s18s19s20s21cycle_normRUL
0110.6321840.7500000.00.00.5451810.3106610.2694130.0...0.1321600.3089650.00.3333330.00.00.5581400.6618340.00000142
1120.3448280.2500000.00.00.1506020.3795510.2223160.0...0.2047680.2131590.00.4166670.00.00.6821710.6868270.00277141
2130.5172410.5833330.00.00.3765060.3466320.3222480.0...0.1556400.4586380.00.4166670.00.00.7286820.7213480.00554140
3140.7413790.5000000.00.00.3704820.2851540.4080010.0...0.1700900.2570220.00.2500000.00.00.6666670.6621100.00831139
4150.5804600.5000000.00.00.3915660.3520820.3320390.0...0.1527510.3008850.00.1666670.00.00.6589150.7163770.01108138
\n", 1321 | "

5 rows × 28 columns

\n", 1322 | "
" 1323 | ], 1324 | "text/plain": [ 1325 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 1326 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n", 1327 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n", 1328 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n", 1329 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n", 1330 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n", 1331 | "\n", 1332 | " s5 ... s14 s15 s16 s17 s18 s19 s20 s21 \\\n", 1333 | "0 0.0 ... 0.132160 0.308965 0.0 0.333333 0.0 0.0 0.558140 0.661834 \n", 1334 | "1 0.0 ... 0.204768 0.213159 0.0 0.416667 0.0 0.0 0.682171 0.686827 \n", 1335 | "2 0.0 ... 0.155640 0.458638 0.0 0.416667 0.0 0.0 0.728682 0.721348 \n", 1336 | "3 0.0 ... 0.170090 0.257022 0.0 0.250000 0.0 0.0 0.666667 0.662110 \n", 1337 | "4 0.0 ... 0.152751 0.300885 0.0 0.166667 0.0 0.0 0.658915 0.716377 \n", 1338 | "\n", 1339 | " cycle_norm RUL \n", 1340 | "0 0.00000 142 \n", 1341 | "1 0.00277 141 \n", 1342 | "2 0.00554 140 \n", 1343 | "3 0.00831 139 \n", 1344 | "4 0.01108 138 \n", 1345 | "\n", 1346 | "[5 rows x 28 columns]" 1347 | ] 1348 | }, 1349 | "execution_count": 11, 1350 | "metadata": {}, 1351 | "output_type": "execute_result" 1352 | } 1353 | ], 1354 | "source": [ 1355 | "# generate RUL for test data\n", 1356 | "test_df = test_df.merge(truth_df, on=['id'], how='left')\n", 1357 | "test_df['RUL'] = test_df['max'] - test_df['cycle']\n", 1358 | "test_df.drop('max', axis=1, inplace=True)\n", 1359 | "test_df.head()" 1360 | ] 1361 | }, 1362 | { 1363 | "cell_type": "code", 1364 | "execution_count": 12, 1365 | "metadata": {}, 1366 | "outputs": [ 1367 | { 1368 | "data": { 1369 | "text/html": [ 1370 | "
\n", 1371 | "\n", 1384 | "\n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s16s17s18s19s20s21cycle_normRULlabel1label2
0110.6321840.7500000.00.00.5451810.3106610.2694130.0...0.00.3333330.00.00.5581400.6618340.0000014200
1120.3448280.2500000.00.00.1506020.3795510.2223160.0...0.00.4166670.00.00.6821710.6868270.0027714100
2130.5172410.5833330.00.00.3765060.3466320.3222480.0...0.00.4166670.00.00.7286820.7213480.0055414000
3140.7413790.5000000.00.00.3704820.2851540.4080010.0...0.00.2500000.00.00.6666670.6621100.0083113900
4150.5804600.5000000.00.00.3915660.3520820.3320390.0...0.00.1666670.00.00.6589150.7163770.0110813800
\n", 1534 | "

5 rows × 30 columns

\n", 1535 | "
" 1536 | ], 1537 | "text/plain": [ 1538 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 1539 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n", 1540 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n", 1541 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n", 1542 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n", 1543 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n", 1544 | "\n", 1545 | " s5 ... s16 s17 s18 s19 s20 s21 cycle_norm RUL \\\n", 1546 | "0 0.0 ... 0.0 0.333333 0.0 0.0 0.558140 0.661834 0.00000 142 \n", 1547 | "1 0.0 ... 0.0 0.416667 0.0 0.0 0.682171 0.686827 0.00277 141 \n", 1548 | "2 0.0 ... 0.0 0.416667 0.0 0.0 0.728682 0.721348 0.00554 140 \n", 1549 | "3 0.0 ... 0.0 0.250000 0.0 0.0 0.666667 0.662110 0.00831 139 \n", 1550 | "4 0.0 ... 0.0 0.166667 0.0 0.0 0.658915 0.716377 0.01108 138 \n", 1551 | "\n", 1552 | " label1 label2 \n", 1553 | "0 0 0 \n", 1554 | "1 0 0 \n", 1555 | "2 0 0 \n", 1556 | "3 0 0 \n", 1557 | "4 0 0 \n", 1558 | "\n", 1559 | "[5 rows x 30 columns]" 1560 | ] 1561 | }, 1562 | "execution_count": 12, 1563 | "metadata": {}, 1564 | "output_type": "execute_result" 1565 | } 1566 | ], 1567 | "source": [ 1568 | "# generate label columns w0 and w1 for test data\n", 1569 | "test_df['label1'] = np.where(test_df['RUL'] <= w1, 1, 0 )\n", 1570 | "test_df['label2'] = test_df['label1']\n", 1571 | "test_df.loc[test_df['RUL'] <= w0, 'label2'] = 2\n", 1572 | "test_df.head()" 1573 | ] 1574 | }, 1575 | { 1576 | "cell_type": "code", 1577 | "execution_count": 13, 1578 | "metadata": {}, 1579 | "outputs": [], 1580 | "source": [ 1581 | "def gen_label(df):\n", 1582 | " y = df['label1']\n", 1583 | " df.drop(['label1','label2','RUL'],axis=1,inplace=True)\n", 1584 | " return y,df" 1585 | ] 1586 | }, 1587 | { 1588 | "cell_type": "code", 1589 | "execution_count": 14, 1590 | "metadata": {}, 1591 | "outputs": [ 1592 | { 1593 | "name": "stdout", 1594 | "output_type": "stream", 1595 | "text": [ 1596 | "(20631,) (20631, 27)\n", 1597 | "(13096,) (13096, 27)\n" 1598 | ] 1599 | } 1600 | ], 1601 | "source": [ 1602 | "y_train,X_train = gen_label(train_df)\n", 1603 | "print(y_train.shape,X_train.shape)\n", 1604 | "y_test,X_test = gen_label(test_df)\n", 1605 | "print(y_test.shape,X_test.shape)" 1606 | ] 1607 | }, 1608 | { 1609 | "cell_type": "code", 1610 | "execution_count": 15, 1611 | "metadata": {}, 1612 | "outputs": [], 1613 | "source": [ 1614 | "from sklearn.model_selection import train_test_split\n", 1615 | "\n", 1616 | "X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.075, random_state=42)" 1617 | ] 1618 | }, 1619 | { 1620 | "cell_type": "code", 1621 | "execution_count": 16, 1622 | "metadata": {}, 1623 | "outputs": [ 1624 | { 1625 | "name": "stdout", 1626 | "output_type": "stream", 1627 | "text": [ 1628 | "acc for val: 0.9683462532299741\n" 1629 | ] 1630 | } 1631 | ], 1632 | "source": [ 1633 | "from sklearn.linear_model import LogisticRegression\n", 1634 | "from sklearn.metrics import accuracy_score\n", 1635 | "\n", 1636 | "lr = LogisticRegression(C=40)\n", 1637 | "lr.fit(X_train,y_train)\n", 1638 | "y_pred = lr.predict(X_val)\n", 1639 | "print('acc for val: ',accuracy_score(y_val,y_pred))" 1640 | ] 1641 | }, 1642 | { 1643 | "cell_type": "code", 1644 | "execution_count": 17, 1645 | "metadata": {}, 1646 | "outputs": [ 1647 | { 1648 | "name": "stdout", 1649 | "output_type": "stream", 1650 | "text": [ 1651 | "Confusion matrix\n", 1652 | "- x-axis is true labels.\n", 1653 | "- y-axis is predicted labels\n" 1654 | ] 1655 | }, 1656 | { 1657 | "data": { 1658 | "text/plain": [ 1659 | "array([[1314, 19],\n", 1660 | " [ 30, 185]])" 1661 | ] 1662 | }, 1663 | "execution_count": 17, 1664 | "metadata": {}, 1665 | "output_type": "execute_result" 1666 | } 1667 | ], 1668 | "source": [ 1669 | "print('Confusion matrix\\n- x-axis is true labels.\\n- y-axis is predicted labels')\n", 1670 | "cm = confusion_matrix(y_val, y_pred)\n", 1671 | "cm" 1672 | ] 1673 | }, 1674 | { 1675 | "cell_type": "code", 1676 | "execution_count": 18, 1677 | "metadata": {}, 1678 | "outputs": [ 1679 | { 1680 | "name": "stdout", 1681 | "output_type": "stream", 1682 | "text": [ 1683 | "precision = 0.9068627450980392 \n", 1684 | " recall = 0.8604651162790697\n" 1685 | ] 1686 | } 1687 | ], 1688 | "source": [ 1689 | "# compute precision and recall\n", 1690 | "precision = precision_score(y_val, y_pred)\n", 1691 | "recall = recall_score(y_val, y_pred)\n", 1692 | "print( 'precision = ', precision, '\\n', 'recall = ', recall)" 1693 | ] 1694 | }, 1695 | { 1696 | "cell_type": "code", 1697 | "execution_count": 19, 1698 | "metadata": {}, 1699 | "outputs": [ 1700 | { 1701 | "name": "stdout", 1702 | "output_type": "stream", 1703 | "text": [ 1704 | "acc for test: 0.9858735491753207\n" 1705 | ] 1706 | } 1707 | ], 1708 | "source": [ 1709 | "y_pred_test = lr.predict(X_test)\n", 1710 | "print('acc for test: ',accuracy_score(y_test,y_pred_test))" 1711 | ] 1712 | }, 1713 | { 1714 | "cell_type": "code", 1715 | "execution_count": 20, 1716 | "metadata": {}, 1717 | "outputs": [ 1718 | { 1719 | "name": "stdout", 1720 | "output_type": "stream", 1721 | "text": [ 1722 | "Confusion matrix\n", 1723 | "- x-axis is true labels.\n", 1724 | "- y-axis is predicted labels\n" 1725 | ] 1726 | }, 1727 | { 1728 | "data": { 1729 | "text/plain": [ 1730 | "array([[12715, 49],\n", 1731 | " [ 136, 196]])" 1732 | ] 1733 | }, 1734 | "execution_count": 20, 1735 | "metadata": {}, 1736 | "output_type": "execute_result" 1737 | } 1738 | ], 1739 | "source": [ 1740 | "print('Confusion matrix\\n- x-axis is true labels.\\n- y-axis is predicted labels')\n", 1741 | "cm = confusion_matrix(y_test, y_pred_test)\n", 1742 | "cm" 1743 | ] 1744 | }, 1745 | { 1746 | "cell_type": "code", 1747 | "execution_count": 21, 1748 | "metadata": {}, 1749 | "outputs": [ 1750 | { 1751 | "name": "stdout", 1752 | "output_type": "stream", 1753 | "text": [ 1754 | "Precision: 0.8 \n", 1755 | " Recall: 0.5903614457831325 \n", 1756 | " F1-score: 0.6793760831889082\n" 1757 | ] 1758 | } 1759 | ], 1760 | "source": [ 1761 | "# compute precision and recall\n", 1762 | "precision_test = precision_score(y_test, y_pred_test)\n", 1763 | "recall_test = recall_score(y_test, y_pred_test)\n", 1764 | "f1_test = 2 * (precision_test * recall_test) / (precision_test + recall_test)\n", 1765 | "print( 'Precision: ', precision_test, '\\n', 'Recall: ', recall_test,'\\n', 'F1-score:', f1_test )" 1766 | ] 1767 | }, 1768 | { 1769 | "cell_type": "code", 1770 | "execution_count": 22, 1771 | "metadata": {}, 1772 | "outputs": [ 1773 | { 1774 | "data": { 1775 | "text/html": [ 1776 | "
\n", 1777 | "\n", 1790 | "\n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | "
AccuracyPrecisionRecallF1-score
logistic0.9858740.80.5903610.679376
\n", 1810 | "
" 1811 | ], 1812 | "text/plain": [ 1813 | " Accuracy Precision Recall F1-score\n", 1814 | "logistic 0.985874 0.8 0.590361 0.679376" 1815 | ] 1816 | }, 1817 | "execution_count": 22, 1818 | "metadata": {}, 1819 | "output_type": "execute_result" 1820 | } 1821 | ], 1822 | "source": [ 1823 | "results_df = pd.DataFrame([[accuracy_score(y_test,y_pred_test),precision_test,recall_test,f1_test],],\n", 1824 | " columns = ['Accuracy', 'Precision', 'Recall', 'F1-score'],\n", 1825 | " index = ['logistic'])\n", 1826 | "results_df" 1827 | ] 1828 | }, 1829 | { 1830 | "cell_type": "code", 1831 | "execution_count": null, 1832 | "metadata": {}, 1833 | "outputs": [], 1834 | "source": [] 1835 | } 1836 | ], 1837 | "metadata": { 1838 | "kernelspec": { 1839 | "display_name": "Python 3", 1840 | "language": "python", 1841 | "name": "python3" 1842 | }, 1843 | "language_info": { 1844 | "codemirror_mode": { 1845 | "name": "ipython", 1846 | "version": 3 1847 | }, 1848 | "file_extension": ".py", 1849 | "mimetype": "text/x-python", 1850 | "name": "python", 1851 | "nbconvert_exporter": "python", 1852 | "pygments_lexer": "ipython3", 1853 | "version": "3.6.2" 1854 | } 1855 | }, 1856 | "nbformat": 4, 1857 | "nbformat_minor": 2 1858 | } 1859 | -------------------------------------------------------------------------------- /data1/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /data1/.ipynb_checkpoints/tpot-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 13 | " from ._conv import register_converters as _register_converters\n", 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import keras\n", 20 | "import pandas as pd\n", 21 | "import numpy as np\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "\n", 24 | "# Setting seed for reproducability\n", 25 | "np.random.seed(1234) \n", 26 | "PYTHONHASHSEED = 0\n", 27 | "from sklearn import preprocessing\n", 28 | "from sklearn.metrics import confusion_matrix, recall_score, precision_score\n", 29 | "from keras.models import Sequential\n", 30 | "from keras.layers import Dense, Dropout, LSTM, Activation\n", 31 | "%matplotlib inline" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# read training data \n", 41 | "train_df = pd.read_csv('PM_train_01.txt', sep=\" \", header=None)\n", 42 | "train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)\n", 43 | "train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 44 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 45 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "train_df.columns\n", 55 | "cols = ['setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 56 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 57 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "'setting1'" 69 | ] 70 | }, 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "train_df.columns[2]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "# read test data\n", 87 | "test_df = pd.read_csv('PM_test_01.txt', sep=\" \", header=None)\n", 88 | "test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)\n", 89 | "test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 90 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 91 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "# read ground truth data\n", 101 | "truth_df = pd.read_csv('PM_truth_01.txt', sep=\" \", header=None)\n", 102 | "truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 7, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/html": [ 113 | "
\n", 114 | "\n", 127 | "\n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s12s13s14s15s16s17s18s19s20s21
011-0.0007-0.0004100.0518.67641.821589.701400.6014.62...521.662388.028138.628.41950.033922388100.039.0623.4190
1120.0019-0.0003100.0518.67642.151591.821403.1414.62...522.282388.078131.498.43180.033922388100.039.0023.4236
213-0.00430.0003100.0518.67642.351587.991404.2014.62...522.422388.038133.238.41780.033902388100.038.9523.3442
3140.00070.0000100.0518.67642.351582.791401.8714.62...522.862388.088133.838.36820.033922388100.038.8823.3739
415-0.0019-0.0002100.0518.67642.371582.851406.2214.62...522.192388.048133.808.42940.033932388100.038.9023.4044
\n", 277 | "

5 rows × 26 columns

\n", 278 | "
" 279 | ], 280 | "text/plain": [ 281 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 282 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n", 283 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n", 284 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n", 285 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n", 286 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n", 287 | "\n", 288 | " s5 ... s12 s13 s14 s15 s16 s17 s18 s19 \\\n", 289 | "0 14.62 ... 521.66 2388.02 8138.62 8.4195 0.03 392 2388 100.0 \n", 290 | "1 14.62 ... 522.28 2388.07 8131.49 8.4318 0.03 392 2388 100.0 \n", 291 | "2 14.62 ... 522.42 2388.03 8133.23 8.4178 0.03 390 2388 100.0 \n", 292 | "3 14.62 ... 522.86 2388.08 8133.83 8.3682 0.03 392 2388 100.0 \n", 293 | "4 14.62 ... 522.19 2388.04 8133.80 8.4294 0.03 393 2388 100.0 \n", 294 | "\n", 295 | " s20 s21 \n", 296 | "0 39.06 23.4190 \n", 297 | "1 39.00 23.4236 \n", 298 | "2 38.95 23.3442 \n", 299 | "3 38.88 23.3739 \n", 300 | "4 38.90 23.4044 \n", 301 | "\n", 302 | "[5 rows x 26 columns]" 303 | ] 304 | }, 305 | "execution_count": 7, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "train_df = train_df.sort_values(['id','cycle'])\n", 312 | "train_df.head()" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 8, 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/html": [ 323 | "
\n", 324 | "\n", 337 | "\n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s13s14s15s16s17s18s19s20s21RUL
011-0.0007-0.0004100.0518.67641.821589.701400.6014.62...2388.028138.628.41950.033922388100.039.0623.4190191
1120.0019-0.0003100.0518.67642.151591.821403.1414.62...2388.078131.498.43180.033922388100.039.0023.4236190
213-0.00430.0003100.0518.67642.351587.991404.2014.62...2388.038133.238.41780.033902388100.038.9523.3442189
3140.00070.0000100.0518.67642.351582.791401.8714.62...2388.088133.838.36820.033922388100.038.8823.3739188
415-0.0019-0.0002100.0518.67642.371582.851406.2214.62...2388.048133.808.42940.033932388100.038.9023.4044187
\n", 487 | "

5 rows × 27 columns

\n", 488 | "
" 489 | ], 490 | "text/plain": [ 491 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 492 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n", 493 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n", 494 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n", 495 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n", 496 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n", 497 | "\n", 498 | " s5 ... s13 s14 s15 s16 s17 s18 s19 s20 \\\n", 499 | "0 14.62 ... 2388.02 8138.62 8.4195 0.03 392 2388 100.0 39.06 \n", 500 | "1 14.62 ... 2388.07 8131.49 8.4318 0.03 392 2388 100.0 39.00 \n", 501 | "2 14.62 ... 2388.03 8133.23 8.4178 0.03 390 2388 100.0 38.95 \n", 502 | "3 14.62 ... 2388.08 8133.83 8.3682 0.03 392 2388 100.0 38.88 \n", 503 | "4 14.62 ... 2388.04 8133.80 8.4294 0.03 393 2388 100.0 38.90 \n", 504 | "\n", 505 | " s21 RUL \n", 506 | "0 23.4190 191 \n", 507 | "1 23.4236 190 \n", 508 | "2 23.3442 189 \n", 509 | "3 23.3739 188 \n", 510 | "4 23.4044 187 \n", 511 | "\n", 512 | "[5 rows x 27 columns]" 513 | ] 514 | }, 515 | "execution_count": 8, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "# Data Labeling - generate column RUL\n", 522 | "rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()\n", 523 | "rul.columns = ['id', 'max']\n", 524 | "train_df = train_df.merge(rul, on=['id'], how='left')\n", 525 | "train_df['RUL'] = train_df['max'] - train_df['cycle']\n", 526 | "train_df.drop('max', axis=1, inplace=True)\n", 527 | "train_df.head()" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 9, 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "data": { 537 | "text/html": [ 538 | "
\n", 539 | "\n", 552 | "\n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s15s16s17s18s19s20s21RULlabel1label2
011-0.0007-0.0004100.0518.67641.821589.701400.6014.62...8.41950.033922388100.039.0623.419019100
1120.0019-0.0003100.0518.67642.151591.821403.1414.62...8.43180.033922388100.039.0023.423619000
213-0.00430.0003100.0518.67642.351587.991404.2014.62...8.41780.033902388100.038.9523.344218900
3140.00070.0000100.0518.67642.351582.791401.8714.62...8.36820.033922388100.038.8823.373918800
415-0.0019-0.0002100.0518.67642.371582.851406.2214.62...8.42940.033932388100.038.9023.404418700
\n", 702 | "

5 rows × 29 columns

\n", 703 | "
" 704 | ], 705 | "text/plain": [ 706 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 707 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n", 708 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n", 709 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n", 710 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n", 711 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n", 712 | "\n", 713 | " s5 ... s15 s16 s17 s18 s19 s20 s21 RUL label1 \\\n", 714 | "0 14.62 ... 8.4195 0.03 392 2388 100.0 39.06 23.4190 191 0 \n", 715 | "1 14.62 ... 8.4318 0.03 392 2388 100.0 39.00 23.4236 190 0 \n", 716 | "2 14.62 ... 8.4178 0.03 390 2388 100.0 38.95 23.3442 189 0 \n", 717 | "3 14.62 ... 8.3682 0.03 392 2388 100.0 38.88 23.3739 188 0 \n", 718 | "4 14.62 ... 8.4294 0.03 393 2388 100.0 38.90 23.4044 187 0 \n", 719 | "\n", 720 | " label2 \n", 721 | "0 0 \n", 722 | "1 0 \n", 723 | "2 0 \n", 724 | "3 0 \n", 725 | "4 0 \n", 726 | "\n", 727 | "[5 rows x 29 columns]" 728 | ] 729 | }, 730 | "execution_count": 9, 731 | "metadata": {}, 732 | "output_type": "execute_result" 733 | } 734 | ], 735 | "source": [ 736 | "# generate label columns for training data\n", 737 | "w1 = 30\n", 738 | "w0 = 15\n", 739 | "train_df['label1'] = np.where(train_df['RUL'] <= w1, 1, 0 )\n", 740 | "train_df['label2'] = train_df['label1']\n", 741 | "train_df.loc[train_df['RUL'] <= w0, 'label2'] = 2\n", 742 | "train_df.head()" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 10, 748 | "metadata": {}, 749 | "outputs": [ 750 | { 751 | "data": { 752 | "text/html": [ 753 | "
\n", 754 | "\n", 767 | "\n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s16s17s18s19s20s21RULlabel1label2cycle_norm
0110.4597700.1666670.00.00.1837350.4068020.3097570.0...0.00.3333330.00.00.7131780.724662191000.00000
1120.6091950.2500000.00.00.2831330.4530190.3526330.0...0.00.3333330.00.00.6666670.731014190000.00277
2130.2528740.7500000.00.00.3433730.3695230.3705270.0...0.00.1666670.00.00.6279070.621375189000.00554
3140.5402300.5000000.00.00.3433730.2561590.3311950.0...0.00.3333330.00.00.5736430.662386188000.00831
4150.3908050.3333330.00.00.3493980.2574670.4046250.0...0.00.4166670.00.00.5891470.704502187000.01108
\n", 917 | "

5 rows × 30 columns

\n", 918 | "
" 919 | ], 920 | "text/plain": [ 921 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 922 | "0 1 1 0.459770 0.166667 0.0 0.0 0.183735 0.406802 0.309757 \n", 923 | "1 1 2 0.609195 0.250000 0.0 0.0 0.283133 0.453019 0.352633 \n", 924 | "2 1 3 0.252874 0.750000 0.0 0.0 0.343373 0.369523 0.370527 \n", 925 | "3 1 4 0.540230 0.500000 0.0 0.0 0.343373 0.256159 0.331195 \n", 926 | "4 1 5 0.390805 0.333333 0.0 0.0 0.349398 0.257467 0.404625 \n", 927 | "\n", 928 | " s5 ... s16 s17 s18 s19 s20 s21 RUL label1 \\\n", 929 | "0 0.0 ... 0.0 0.333333 0.0 0.0 0.713178 0.724662 191 0 \n", 930 | "1 0.0 ... 0.0 0.333333 0.0 0.0 0.666667 0.731014 190 0 \n", 931 | "2 0.0 ... 0.0 0.166667 0.0 0.0 0.627907 0.621375 189 0 \n", 932 | "3 0.0 ... 0.0 0.333333 0.0 0.0 0.573643 0.662386 188 0 \n", 933 | "4 0.0 ... 0.0 0.416667 0.0 0.0 0.589147 0.704502 187 0 \n", 934 | "\n", 935 | " label2 cycle_norm \n", 936 | "0 0 0.00000 \n", 937 | "1 0 0.00277 \n", 938 | "2 0 0.00554 \n", 939 | "3 0 0.00831 \n", 940 | "4 0 0.01108 \n", 941 | "\n", 942 | "[5 rows x 30 columns]" 943 | ] 944 | }, 945 | "execution_count": 10, 946 | "metadata": {}, 947 | "output_type": "execute_result" 948 | } 949 | ], 950 | "source": [ 951 | "# MinMax normalization\n", 952 | "train_df['cycle_norm'] = train_df['cycle']\n", 953 | "cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])\n", 954 | "min_max_scaler = preprocessing.MinMaxScaler()\n", 955 | "norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), \n", 956 | " columns=cols_normalize, \n", 957 | " index=train_df.index)\n", 958 | "join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)\n", 959 | "train_df = join_df.reindex(columns = train_df.columns)\n", 960 | "train_df.head()" 961 | ] 962 | }, 963 | { 964 | "cell_type": "code", 965 | "execution_count": 11, 966 | "metadata": {}, 967 | "outputs": [ 968 | { 969 | "data": { 970 | "text/html": [ 971 | "
\n", 972 | "\n", 985 | "\n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s13s14s15s16s17s18s19s20s21cycle_norm
0110.6321840.7500000.00.00.5451810.3106610.2694130.0...0.2205880.1321600.3089650.00.3333330.00.00.5581400.6618340.00000
1120.3448280.2500000.00.00.1506020.3795510.2223160.0...0.2647060.2047680.2131590.00.4166670.00.00.6821710.6868270.00277
2130.5172410.5833330.00.00.3765060.3466320.3222480.0...0.2205880.1556400.4586380.00.4166670.00.00.7286820.7213480.00554
3140.7413790.5000000.00.00.3704820.2851540.4080010.0...0.2500000.1700900.2570220.00.2500000.00.00.6666670.6621100.00831
4150.5804600.5000000.00.00.3915660.3520820.3320390.0...0.2205880.1527510.3008850.00.1666670.00.00.6589150.7163770.01108
\n", 1135 | "

5 rows × 27 columns

\n", 1136 | "
" 1137 | ], 1138 | "text/plain": [ 1139 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 1140 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n", 1141 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n", 1142 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n", 1143 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n", 1144 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n", 1145 | "\n", 1146 | " s5 ... s13 s14 s15 s16 s17 s18 s19 \\\n", 1147 | "0 0.0 ... 0.220588 0.132160 0.308965 0.0 0.333333 0.0 0.0 \n", 1148 | "1 0.0 ... 0.264706 0.204768 0.213159 0.0 0.416667 0.0 0.0 \n", 1149 | "2 0.0 ... 0.220588 0.155640 0.458638 0.0 0.416667 0.0 0.0 \n", 1150 | "3 0.0 ... 0.250000 0.170090 0.257022 0.0 0.250000 0.0 0.0 \n", 1151 | "4 0.0 ... 0.220588 0.152751 0.300885 0.0 0.166667 0.0 0.0 \n", 1152 | "\n", 1153 | " s20 s21 cycle_norm \n", 1154 | "0 0.558140 0.661834 0.00000 \n", 1155 | "1 0.682171 0.686827 0.00277 \n", 1156 | "2 0.728682 0.721348 0.00554 \n", 1157 | "3 0.666667 0.662110 0.00831 \n", 1158 | "4 0.658915 0.716377 0.01108 \n", 1159 | "\n", 1160 | "[5 rows x 27 columns]" 1161 | ] 1162 | }, 1163 | "execution_count": 11, 1164 | "metadata": {}, 1165 | "output_type": "execute_result" 1166 | } 1167 | ], 1168 | "source": [ 1169 | "test_df['cycle_norm'] = test_df['cycle']\n", 1170 | "norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), \n", 1171 | " columns=cols_normalize, \n", 1172 | " index=test_df.index)\n", 1173 | "test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)\n", 1174 | "test_df = test_join_df.reindex(columns = test_df.columns)\n", 1175 | "test_df = test_df.reset_index(drop=True)\n", 1176 | "test_df.head()" 1177 | ] 1178 | }, 1179 | { 1180 | "cell_type": "code", 1181 | "execution_count": 12, 1182 | "metadata": {}, 1183 | "outputs": [], 1184 | "source": [ 1185 | "# generate column max for test data\n", 1186 | "rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()\n", 1187 | "rul.columns = ['id', 'max']\n", 1188 | "truth_df.columns = ['more']\n", 1189 | "truth_df['id'] = truth_df.index + 1\n", 1190 | "truth_df['max'] = rul['max'] + truth_df['more']\n", 1191 | "truth_df.drop('more', axis=1, inplace=True)" 1192 | ] 1193 | }, 1194 | { 1195 | "cell_type": "code", 1196 | "execution_count": 13, 1197 | "metadata": {}, 1198 | "outputs": [ 1199 | { 1200 | "data": { 1201 | "text/html": [ 1202 | "
\n", 1203 | "\n", 1216 | "\n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s14s15s16s17s18s19s20s21cycle_normRUL
0110.6321840.7500000.00.00.5451810.3106610.2694130.0...0.1321600.3089650.00.3333330.00.00.5581400.6618340.00000142
1120.3448280.2500000.00.00.1506020.3795510.2223160.0...0.2047680.2131590.00.4166670.00.00.6821710.6868270.00277141
2130.5172410.5833330.00.00.3765060.3466320.3222480.0...0.1556400.4586380.00.4166670.00.00.7286820.7213480.00554140
3140.7413790.5000000.00.00.3704820.2851540.4080010.0...0.1700900.2570220.00.2500000.00.00.6666670.6621100.00831139
4150.5804600.5000000.00.00.3915660.3520820.3320390.0...0.1527510.3008850.00.1666670.00.00.6589150.7163770.01108138
\n", 1366 | "

5 rows × 28 columns

\n", 1367 | "
" 1368 | ], 1369 | "text/plain": [ 1370 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 1371 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n", 1372 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n", 1373 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n", 1374 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n", 1375 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n", 1376 | "\n", 1377 | " s5 ... s14 s15 s16 s17 s18 s19 s20 s21 \\\n", 1378 | "0 0.0 ... 0.132160 0.308965 0.0 0.333333 0.0 0.0 0.558140 0.661834 \n", 1379 | "1 0.0 ... 0.204768 0.213159 0.0 0.416667 0.0 0.0 0.682171 0.686827 \n", 1380 | "2 0.0 ... 0.155640 0.458638 0.0 0.416667 0.0 0.0 0.728682 0.721348 \n", 1381 | "3 0.0 ... 0.170090 0.257022 0.0 0.250000 0.0 0.0 0.666667 0.662110 \n", 1382 | "4 0.0 ... 0.152751 0.300885 0.0 0.166667 0.0 0.0 0.658915 0.716377 \n", 1383 | "\n", 1384 | " cycle_norm RUL \n", 1385 | "0 0.00000 142 \n", 1386 | "1 0.00277 141 \n", 1387 | "2 0.00554 140 \n", 1388 | "3 0.00831 139 \n", 1389 | "4 0.01108 138 \n", 1390 | "\n", 1391 | "[5 rows x 28 columns]" 1392 | ] 1393 | }, 1394 | "execution_count": 13, 1395 | "metadata": {}, 1396 | "output_type": "execute_result" 1397 | } 1398 | ], 1399 | "source": [ 1400 | "# generate RUL for test data\n", 1401 | "test_df = test_df.merge(truth_df, on=['id'], how='left')\n", 1402 | "test_df['RUL'] = test_df['max'] - test_df['cycle']\n", 1403 | "test_df.drop('max', axis=1, inplace=True)\n", 1404 | "test_df.head()" 1405 | ] 1406 | }, 1407 | { 1408 | "cell_type": "code", 1409 | "execution_count": 14, 1410 | "metadata": {}, 1411 | "outputs": [ 1412 | { 1413 | "data": { 1414 | "text/html": [ 1415 | "
\n", 1416 | "\n", 1429 | "\n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s16s17s18s19s20s21cycle_normRULlabel1label2
0110.6321840.7500000.00.00.5451810.3106610.2694130.0...0.00.3333330.00.00.5581400.6618340.0000014200
1120.3448280.2500000.00.00.1506020.3795510.2223160.0...0.00.4166670.00.00.6821710.6868270.0027714100
2130.5172410.5833330.00.00.3765060.3466320.3222480.0...0.00.4166670.00.00.7286820.7213480.0055414000
3140.7413790.5000000.00.00.3704820.2851540.4080010.0...0.00.2500000.00.00.6666670.6621100.0083113900
4150.5804600.5000000.00.00.3915660.3520820.3320390.0...0.00.1666670.00.00.6589150.7163770.0110813800
\n", 1579 | "

5 rows × 30 columns

\n", 1580 | "
" 1581 | ], 1582 | "text/plain": [ 1583 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 1584 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n", 1585 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n", 1586 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n", 1587 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n", 1588 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n", 1589 | "\n", 1590 | " s5 ... s16 s17 s18 s19 s20 s21 cycle_norm RUL \\\n", 1591 | "0 0.0 ... 0.0 0.333333 0.0 0.0 0.558140 0.661834 0.00000 142 \n", 1592 | "1 0.0 ... 0.0 0.416667 0.0 0.0 0.682171 0.686827 0.00277 141 \n", 1593 | "2 0.0 ... 0.0 0.416667 0.0 0.0 0.728682 0.721348 0.00554 140 \n", 1594 | "3 0.0 ... 0.0 0.250000 0.0 0.0 0.666667 0.662110 0.00831 139 \n", 1595 | "4 0.0 ... 0.0 0.166667 0.0 0.0 0.658915 0.716377 0.01108 138 \n", 1596 | "\n", 1597 | " label1 label2 \n", 1598 | "0 0 0 \n", 1599 | "1 0 0 \n", 1600 | "2 0 0 \n", 1601 | "3 0 0 \n", 1602 | "4 0 0 \n", 1603 | "\n", 1604 | "[5 rows x 30 columns]" 1605 | ] 1606 | }, 1607 | "execution_count": 14, 1608 | "metadata": {}, 1609 | "output_type": "execute_result" 1610 | } 1611 | ], 1612 | "source": [ 1613 | "# generate label columns w0 and w1 for test data\n", 1614 | "test_df['label1'] = np.where(test_df['RUL'] <= w1, 1, 0 )\n", 1615 | "test_df['label2'] = test_df['label1']\n", 1616 | "test_df.loc[test_df['RUL'] <= w0, 'label2'] = 2\n", 1617 | "test_df.head()" 1618 | ] 1619 | }, 1620 | { 1621 | "cell_type": "code", 1622 | "execution_count": 15, 1623 | "metadata": {}, 1624 | "outputs": [], 1625 | "source": [ 1626 | "y_train = train_df['RUL']\n", 1627 | "y_test = test_df['RUL']\n", 1628 | "train_df.drop(['label1','label2','RUL'],axis=1,inplace=True)\n", 1629 | "test_df.drop(['label1','label2','RUL'],axis=1,inplace=True)" 1630 | ] 1631 | }, 1632 | { 1633 | "cell_type": "code", 1634 | "execution_count": 16, 1635 | "metadata": {}, 1636 | "outputs": [ 1637 | { 1638 | "data": { 1639 | "text/html": [ 1640 | "
\n", 1641 | "\n", 1654 | "\n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s13s14s15s16s17s18s19s20s21cycle_norm
0110.4597700.1666670.00.00.1837350.4068020.3097570.0...0.2058820.1996080.3639860.00.3333330.00.00.7131780.7246620.00000
1120.6091950.2500000.00.00.2831330.4530190.3526330.0...0.2794120.1628130.4113120.00.3333330.00.00.6666670.7310140.00277
2130.2528740.7500000.00.00.3433730.3695230.3705270.0...0.2205880.1717930.3574450.00.1666670.00.00.6279070.6213750.00554
3140.5402300.5000000.00.00.3433730.2561590.3311950.0...0.2941180.1748890.1666030.00.3333330.00.00.5736430.6623860.00831
4150.3908050.3333330.00.00.3493980.2574670.4046250.0...0.2352940.1747340.4020780.00.4166670.00.00.5891470.7045020.01108
\n", 1804 | "

5 rows × 27 columns

\n", 1805 | "
" 1806 | ], 1807 | "text/plain": [ 1808 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 1809 | "0 1 1 0.459770 0.166667 0.0 0.0 0.183735 0.406802 0.309757 \n", 1810 | "1 1 2 0.609195 0.250000 0.0 0.0 0.283133 0.453019 0.352633 \n", 1811 | "2 1 3 0.252874 0.750000 0.0 0.0 0.343373 0.369523 0.370527 \n", 1812 | "3 1 4 0.540230 0.500000 0.0 0.0 0.343373 0.256159 0.331195 \n", 1813 | "4 1 5 0.390805 0.333333 0.0 0.0 0.349398 0.257467 0.404625 \n", 1814 | "\n", 1815 | " s5 ... s13 s14 s15 s16 s17 s18 s19 \\\n", 1816 | "0 0.0 ... 0.205882 0.199608 0.363986 0.0 0.333333 0.0 0.0 \n", 1817 | "1 0.0 ... 0.279412 0.162813 0.411312 0.0 0.333333 0.0 0.0 \n", 1818 | "2 0.0 ... 0.220588 0.171793 0.357445 0.0 0.166667 0.0 0.0 \n", 1819 | "3 0.0 ... 0.294118 0.174889 0.166603 0.0 0.333333 0.0 0.0 \n", 1820 | "4 0.0 ... 0.235294 0.174734 0.402078 0.0 0.416667 0.0 0.0 \n", 1821 | "\n", 1822 | " s20 s21 cycle_norm \n", 1823 | "0 0.713178 0.724662 0.00000 \n", 1824 | "1 0.666667 0.731014 0.00277 \n", 1825 | "2 0.627907 0.621375 0.00554 \n", 1826 | "3 0.573643 0.662386 0.00831 \n", 1827 | "4 0.589147 0.704502 0.01108 \n", 1828 | "\n", 1829 | "[5 rows x 27 columns]" 1830 | ] 1831 | }, 1832 | "execution_count": 16, 1833 | "metadata": {}, 1834 | "output_type": "execute_result" 1835 | } 1836 | ], 1837 | "source": [ 1838 | "train_df.head()" 1839 | ] 1840 | }, 1841 | { 1842 | "cell_type": "code", 1843 | "execution_count": 17, 1844 | "metadata": {}, 1845 | "outputs": [ 1846 | { 1847 | "name": "stdout", 1848 | "output_type": "stream", 1849 | "text": [ 1850 | "Version 0.9.2 of tpot is outdated. Version 0.9.3 was released 2 days ago.\n" 1851 | ] 1852 | } 1853 | ], 1854 | "source": [ 1855 | "from tpot import TPOTRegressor\n", 1856 | "tpot = TPOTRegressor(generations=10, scoring ='neg_mean_squared_error', population_size=50, verbosity=2, cv = 3)" 1857 | ] 1858 | }, 1859 | { 1860 | "cell_type": "code", 1861 | "execution_count": 18, 1862 | "metadata": {}, 1863 | "outputs": [ 1864 | { 1865 | "name": "stderr", 1866 | "output_type": "stream", 1867 | "text": [ 1868 | "Optimization Progress: 18%|█▊ | 100/550 [16:05<2:28:00, 19.74s/pipeline]" 1869 | ] 1870 | }, 1871 | { 1872 | "name": "stdout", 1873 | "output_type": "stream", 1874 | "text": [ 1875 | "Generation 1 - Current best internal CV score: -1336.629916208506\n" 1876 | ] 1877 | }, 1878 | { 1879 | "name": "stderr", 1880 | "output_type": "stream", 1881 | "text": [ 1882 | "Optimization Progress: 27%|██▋ | 151/550 [25:07<1:16:36, 11.52s/pipeline] " 1883 | ] 1884 | }, 1885 | { 1886 | "name": "stdout", 1887 | "output_type": "stream", 1888 | "text": [ 1889 | "Generation 2 - Current best internal CV score: -1316.3456588624992\n" 1890 | ] 1891 | }, 1892 | { 1893 | "name": "stderr", 1894 | "output_type": "stream", 1895 | "text": [ 1896 | "Optimization Progress: 37%|███▋ | 201/550 [30:05<1:07:40, 11.63s/pipeline]" 1897 | ] 1898 | }, 1899 | { 1900 | "name": "stdout", 1901 | "output_type": "stream", 1902 | "text": [ 1903 | "Generation 3 - Current best internal CV score: -1316.3456588624992\n" 1904 | ] 1905 | }, 1906 | { 1907 | "name": "stderr", 1908 | "output_type": "stream", 1909 | "text": [ 1910 | "Optimization Progress: 46%|████▌ | 251/550 [34:30<22:03, 4.43s/pipeline] " 1911 | ] 1912 | }, 1913 | { 1914 | "name": "stdout", 1915 | "output_type": "stream", 1916 | "text": [ 1917 | "Generation 4 - Current best internal CV score: -1316.3456588624992\n" 1918 | ] 1919 | }, 1920 | { 1921 | "name": "stderr", 1922 | "output_type": "stream", 1923 | "text": [ 1924 | "Optimization Progress: 55%|█████▍ | 301/550 [40:37<27:54, 6.73s/pipeline] " 1925 | ] 1926 | }, 1927 | { 1928 | "name": "stdout", 1929 | "output_type": "stream", 1930 | "text": [ 1931 | "Generation 5 - Current best internal CV score: -1316.3456588624992\n" 1932 | ] 1933 | }, 1934 | { 1935 | "name": "stderr", 1936 | "output_type": "stream", 1937 | "text": [ 1938 | "Optimization Progress: 64%|██████▍ | 351/550 [47:44<57:47, 17.42s/pipeline] " 1939 | ] 1940 | }, 1941 | { 1942 | "name": "stdout", 1943 | "output_type": "stream", 1944 | "text": [ 1945 | "Generation 6 - Current best internal CV score: -1316.3456588624992\n" 1946 | ] 1947 | }, 1948 | { 1949 | "name": "stderr", 1950 | "output_type": "stream", 1951 | "text": [ 1952 | "Optimization Progress: 73%|███████▎ | 401/550 [53:10<12:27, 5.01s/pipeline] " 1953 | ] 1954 | }, 1955 | { 1956 | "name": "stdout", 1957 | "output_type": "stream", 1958 | "text": [ 1959 | "Generation 7 - Current best internal CV score: -1316.3456588624992\n" 1960 | ] 1961 | }, 1962 | { 1963 | "name": "stderr", 1964 | "output_type": "stream", 1965 | "text": [ 1966 | "Optimization Progress: 82%|████████▏ | 451/550 [56:19<05:31, 3.35s/pipeline]" 1967 | ] 1968 | }, 1969 | { 1970 | "name": "stdout", 1971 | "output_type": "stream", 1972 | "text": [ 1973 | "Generation 8 - Current best internal CV score: -1316.3456588624992\n" 1974 | ] 1975 | }, 1976 | { 1977 | "name": "stderr", 1978 | "output_type": "stream", 1979 | "text": [ 1980 | "Optimization Progress: 91%|█████████ | 501/550 [1:01:28<04:54, 6.00s/pipeline]" 1981 | ] 1982 | }, 1983 | { 1984 | "name": "stdout", 1985 | "output_type": "stream", 1986 | "text": [ 1987 | "Generation 9 - Current best internal CV score: -1316.3456588624992\n" 1988 | ] 1989 | }, 1990 | { 1991 | "name": "stderr", 1992 | "output_type": "stream", 1993 | "text": [ 1994 | " \r" 1995 | ] 1996 | }, 1997 | { 1998 | "name": "stdout", 1999 | "output_type": "stream", 2000 | "text": [ 2001 | "Generation 10 - Current best internal CV score: -1316.2679104580218\n", 2002 | "\n", 2003 | "Best pipeline: ExtraTreesRegressor(ZeroCount(input_matrix), bootstrap=True, max_features=0.3, min_samples_leaf=11, min_samples_split=3, n_estimators=100)\n" 2004 | ] 2005 | }, 2006 | { 2007 | "data": { 2008 | "text/plain": [ 2009 | "TPOTRegressor(config_dict={'sklearn.linear_model.ElasticNetCV': {'l1_ratio': array([0. , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,\n", 2010 | " 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1. ]), 'tol': [1e-05, 0.0001, 0.001, 0.01, 0.1]}, 'sklearn.ensemble.ExtraTreesRegressor': {'n_estimato....3 , 0.35, 0.4 , 0.45, 0.5 , 0.55,\n", 2011 | " 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1. ])}}}},\n", 2012 | " crossover_rate=0.1, cv=3, disable_update_check=False,\n", 2013 | " early_stop=None, generations=10, max_eval_time_mins=5,\n", 2014 | " max_time_mins=None, memory=None, mutation_rate=0.9, n_jobs=1,\n", 2015 | " offspring_size=50, periodic_checkpoint_folder=None,\n", 2016 | " population_size=50, random_state=None, scoring=None, subsample=1.0,\n", 2017 | " verbosity=2, warm_start=False)" 2018 | ] 2019 | }, 2020 | "execution_count": 18, 2021 | "metadata": {}, 2022 | "output_type": "execute_result" 2023 | } 2024 | ], 2025 | "source": [ 2026 | "tpot.fit(train_df, y_train)" 2027 | ] 2028 | }, 2029 | { 2030 | "cell_type": "code", 2031 | "execution_count": 19, 2032 | "metadata": {}, 2033 | "outputs": [ 2034 | { 2035 | "data": { 2036 | "text/plain": [ 2037 | "True" 2038 | ] 2039 | }, 2040 | "execution_count": 19, 2041 | "metadata": {}, 2042 | "output_type": "execute_result" 2043 | } 2044 | ], 2045 | "source": [ 2046 | "tpot.export('rul_mse_tpot.py')" 2047 | ] 2048 | }, 2049 | { 2050 | "cell_type": "code", 2051 | "execution_count": null, 2052 | "metadata": {}, 2053 | "outputs": [], 2054 | "source": [] 2055 | } 2056 | ], 2057 | "metadata": { 2058 | "kernelspec": { 2059 | "display_name": "Python 3", 2060 | "language": "python", 2061 | "name": "python3" 2062 | }, 2063 | "language_info": { 2064 | "codemirror_mode": { 2065 | "name": "ipython", 2066 | "version": 3 2067 | }, 2068 | "file_extension": ".py", 2069 | "mimetype": "text/x-python", 2070 | "name": "python", 2071 | "nbconvert_exporter": "python", 2072 | "pygments_lexer": "ipython3", 2073 | "version": "3.6.2" 2074 | } 2075 | }, 2076 | "nbformat": 4, 2077 | "nbformat_minor": 2 2078 | } 2079 | -------------------------------------------------------------------------------- /data1/PM_truth_01.txt: -------------------------------------------------------------------------------- 1 | 112 2 | 98 3 | 69 4 | 82 5 | 91 6 | 93 7 | 91 8 | 95 9 | 111 10 | 96 11 | 97 12 | 124 13 | 95 14 | 107 15 | 83 16 | 84 17 | 50 18 | 28 19 | 87 20 | 16 21 | 57 22 | 111 23 | 113 24 | 20 25 | 145 26 | 119 27 | 66 28 | 97 29 | 90 30 | 115 31 | 8 32 | 48 33 | 106 34 | 7 35 | 11 36 | 19 37 | 21 38 | 50 39 | 142 40 | 28 41 | 18 42 | 10 43 | 59 44 | 109 45 | 114 46 | 47 47 | 135 48 | 92 49 | 21 50 | 79 51 | 114 52 | 29 53 | 26 54 | 97 55 | 137 56 | 15 57 | 103 58 | 37 59 | 114 60 | 100 61 | 21 62 | 54 63 | 72 64 | 28 65 | 128 66 | 14 67 | 77 68 | 8 69 | 121 70 | 94 71 | 118 72 | 50 73 | 131 74 | 126 75 | 113 76 | 10 77 | 34 78 | 107 79 | 63 80 | 90 81 | 8 82 | 9 83 | 137 84 | 58 85 | 118 86 | 89 87 | 116 88 | 115 89 | 136 90 | 28 91 | 38 92 | 20 93 | 85 94 | 55 95 | 128 96 | 137 97 | 82 98 | 59 99 | 117 100 | 20 101 | -------------------------------------------------------------------------------- /data1/predictive_binary_final.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data1/predictive_binary_final.h5 -------------------------------------------------------------------------------- /data1/predictive_regression_kalhman.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data1/predictive_regression_kalhman.h5 -------------------------------------------------------------------------------- /data2/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data2/.DS_Store -------------------------------------------------------------------------------- /data2/PM_truth_02.txt: -------------------------------------------------------------------------------- 1 | 18 2 | 79 3 | 106 4 | 110 5 | 15 6 | 155 7 | 6 8 | 90 9 | 11 10 | 79 11 | 6 12 | 73 13 | 30 14 | 11 15 | 37 16 | 67 17 | 68 18 | 99 19 | 22 20 | 54 21 | 97 22 | 10 23 | 142 24 | 77 25 | 88 26 | 163 27 | 126 28 | 138 29 | 83 30 | 78 31 | 75 32 | 11 33 | 53 34 | 173 35 | 63 36 | 100 37 | 151 38 | 55 39 | 48 40 | 37 41 | 44 42 | 27 43 | 18 44 | 6 45 | 15 46 | 112 47 | 131 48 | 13 49 | 122 50 | 13 51 | 98 52 | 53 53 | 52 54 | 106 55 | 103 56 | 152 57 | 123 58 | 26 59 | 178 60 | 73 61 | 169 62 | 39 63 | 39 64 | 14 65 | 11 66 | 121 67 | 86 68 | 56 69 | 115 70 | 17 71 | 148 72 | 104 73 | 78 74 | 86 75 | 98 76 | 36 77 | 94 78 | 52 79 | 91 80 | 15 81 | 141 82 | 74 83 | 146 84 | 17 85 | 47 86 | 194 87 | 21 88 | 79 89 | 97 90 | 8 91 | 9 92 | 73 93 | 183 94 | 97 95 | 73 96 | 49 97 | 31 98 | 97 99 | 9 100 | 14 101 | 106 102 | 8 103 | 8 104 | 106 105 | 116 106 | 120 107 | 61 108 | 168 109 | 35 110 | 80 111 | 9 112 | 50 113 | 151 114 | 78 115 | 91 116 | 7 117 | 181 118 | 150 119 | 106 120 | 15 121 | 67 122 | 145 123 | 180 124 | 7 125 | 179 126 | 124 127 | 82 128 | 108 129 | 79 130 | 121 131 | 120 132 | 39 133 | 38 134 | 9 135 | 167 136 | 87 137 | 88 138 | 7 139 | 51 140 | 55 141 | 155 142 | 47 143 | 81 144 | 43 145 | 98 146 | 10 147 | 92 148 | 11 149 | 165 150 | 34 151 | 115 152 | 59 153 | 99 154 | 103 155 | 108 156 | 83 157 | 171 158 | 15 159 | 9 160 | 42 161 | 13 162 | 41 163 | 88 164 | 14 165 | 155 166 | 188 167 | 96 168 | 82 169 | 135 170 | 182 171 | 36 172 | 107 173 | 14 174 | 95 175 | 142 176 | 23 177 | 6 178 | 144 179 | 35 180 | 97 181 | 68 182 | 14 183 | 67 184 | 191 185 | 19 186 | 10 187 | 158 188 | 183 189 | 43 190 | 12 191 | 148 192 | 13 193 | 37 194 | 122 195 | 80 196 | 93 197 | 132 198 | 32 199 | 103 200 | 174 201 | 111 202 | 68 203 | 192 204 | 121 205 | 134 206 | 48 207 | 85 208 | 8 209 | 23 210 | 8 211 | 6 212 | 57 213 | 83 214 | 172 215 | 101 216 | 81 217 | 86 218 | 165 219 | 73 220 | 121 221 | 139 222 | 75 223 | 151 224 | 145 225 | 11 226 | 108 227 | 14 228 | 126 229 | 61 230 | 85 231 | 8 232 | 101 233 | 153 234 | 89 235 | 190 236 | 12 237 | 62 238 | 134 239 | 101 240 | 121 241 | 167 242 | 17 243 | 161 244 | 181 245 | 16 246 | 152 247 | 148 248 | 56 249 | 111 250 | 23 251 | 84 252 | 12 253 | 43 254 | 48 255 | 122 256 | 191 257 | 56 258 | 131 259 | 51 260 | -------------------------------------------------------------------------------- /data2/predictive_binary_final.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data2/predictive_binary_final.h5 -------------------------------------------------------------------------------- /data2/predictive_regression_kalhman.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data2/predictive_regression_kalhman.h5 -------------------------------------------------------------------------------- /data3/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data3/.DS_Store -------------------------------------------------------------------------------- /data3/.ipynb_checkpoints/RUL-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 13 | " from ._conv import register_converters as _register_converters\n", 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import keras" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pandas as pd\n", 29 | "import numpy as np\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "\n", 32 | "# Setting seed for reproducability\n", 33 | "np.random.seed(1234) \n", 34 | "PYTHONHASHSEED = 0\n", 35 | "from sklearn import preprocessing\n", 36 | "from sklearn.metrics import confusion_matrix, recall_score, precision_score\n", 37 | "from keras.models import Sequential\n", 38 | "from keras.layers import Dense, Dropout, LSTM, Activation\n", 39 | "%matplotlib inline" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "from pykalman import KalmanFilter" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# read training data \n", 58 | "train_df = pd.read_csv('PM_train_03.txt', sep=\" \", header=None)\n", 59 | "train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)\n", 60 | "train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 61 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 62 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/html": [ 73 | "
\n", 74 | "\n", 87 | "\n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s12s13s14s15s16s17s18s19s20s21
011-0.00050.0004100.0518.67642.361583.231396.8414.62...522.312388.018145.328.42460.033912388100.039.1123.3537
1120.0008-0.0003100.0518.67642.501584.691396.8914.62...522.422388.038152.858.44030.033922388100.038.9923.4491
213-0.0014-0.0002100.0518.67642.181582.351405.6114.62...522.032388.008150.178.39010.033912388100.038.8523.3669
314-0.00200.0001100.0518.67642.921585.611392.2714.62...522.492388.088146.568.38780.033922388100.038.9623.2951
4150.00160.0000100.0518.67641.681588.631397.6514.62...522.582388.038147.808.38690.033922388100.039.1423.4583
\n", 237 | "

5 rows × 26 columns

\n", 238 | "
" 239 | ], 240 | "text/plain": [ 241 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 242 | "0 1 1 -0.0005 0.0004 100.0 518.67 642.36 1583.23 1396.84 \n", 243 | "1 1 2 0.0008 -0.0003 100.0 518.67 642.50 1584.69 1396.89 \n", 244 | "2 1 3 -0.0014 -0.0002 100.0 518.67 642.18 1582.35 1405.61 \n", 245 | "3 1 4 -0.0020 0.0001 100.0 518.67 642.92 1585.61 1392.27 \n", 246 | "4 1 5 0.0016 0.0000 100.0 518.67 641.68 1588.63 1397.65 \n", 247 | "\n", 248 | " s5 ... s12 s13 s14 s15 s16 s17 s18 s19 \\\n", 249 | "0 14.62 ... 522.31 2388.01 8145.32 8.4246 0.03 391 2388 100.0 \n", 250 | "1 14.62 ... 522.42 2388.03 8152.85 8.4403 0.03 392 2388 100.0 \n", 251 | "2 14.62 ... 522.03 2388.00 8150.17 8.3901 0.03 391 2388 100.0 \n", 252 | "3 14.62 ... 522.49 2388.08 8146.56 8.3878 0.03 392 2388 100.0 \n", 253 | "4 14.62 ... 522.58 2388.03 8147.80 8.3869 0.03 392 2388 100.0 \n", 254 | "\n", 255 | " s20 s21 \n", 256 | "0 39.11 23.3537 \n", 257 | "1 38.99 23.4491 \n", 258 | "2 38.85 23.3669 \n", 259 | "3 38.96 23.2951 \n", 260 | "4 39.14 23.4583 \n", 261 | "\n", 262 | "[5 rows x 26 columns]" 263 | ] 264 | }, 265 | "execution_count": 5, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "train_df.head()" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 6, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "train_df.columns\n", 281 | "cols = ['setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 282 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 283 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 7, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "'setting1'" 295 | ] 296 | }, 297 | "execution_count": 7, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "train_df.columns[2]" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 8, 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "name": "stdout", 313 | "output_type": "stream", 314 | "text": [ 315 | "setting1\n" 316 | ] 317 | }, 318 | { 319 | "name": "stderr", 320 | "output_type": "stream", 321 | "text": [ 322 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/scipy/linalg/basic.py:1226: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n", 323 | " warnings.warn(mesg, RuntimeWarning)\n" 324 | ] 325 | }, 326 | { 327 | "name": "stdout", 328 | "output_type": "stream", 329 | "text": [ 330 | "setting2\n", 331 | "setting3\n", 332 | "s1\n", 333 | "s2\n", 334 | "s3\n", 335 | "s4\n", 336 | "s5\n", 337 | "s6\n", 338 | "s7\n", 339 | "s8\n", 340 | "s9\n", 341 | "s10\n", 342 | "s11\n", 343 | "s12\n", 344 | "s13\n", 345 | "s14\n", 346 | "s15\n", 347 | "s16\n", 348 | "s17\n", 349 | "s18\n", 350 | "s19\n", 351 | "s20\n", 352 | "s21\n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "for cols in train_df.columns:\n", 358 | " if cols == 'id':\n", 359 | " continue;\n", 360 | " if cols == 'cycle':\n", 361 | " continue;\n", 362 | " else:\n", 363 | " print(cols)\n", 364 | " kf = KalmanFilter(transition_matrices = [1],\n", 365 | " observation_matrices = [1],\n", 366 | " initial_state_mean = train_df[cols].values[0],\n", 367 | " initial_state_covariance = 1,\n", 368 | " observation_covariance=1,\n", 369 | " transition_covariance=.01)\n", 370 | " state_means,_ = kf.filter(train_df[cols].values)\n", 371 | " train_df[cols] = state_means.flatten()" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "# read test data\n", 381 | "test_df = pd.read_csv('PM_test_03.txt', sep=\" \", header=None)\n", 382 | "test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)\n", 383 | "test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 384 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 385 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "name": "stdout", 395 | "output_type": "stream", 396 | "text": [ 397 | "setting1\n", 398 | "setting2\n", 399 | "setting3\n", 400 | "s1\n", 401 | "s2\n", 402 | "s3\n", 403 | "s4\n", 404 | "s5\n", 405 | "s6\n" 406 | ] 407 | } 408 | ], 409 | "source": [ 410 | "for cols in test_df.columns:\n", 411 | " if cols == 'id':\n", 412 | " continue;\n", 413 | " if cols == 'cycle':\n", 414 | " continue;\n", 415 | " else:\n", 416 | " print(cols)\n", 417 | " kf = KalmanFilter(transition_matrices = [1],\n", 418 | " observation_matrices = [1],\n", 419 | " initial_state_mean = test_df[cols].values[0],\n", 420 | " initial_state_covariance = 1,\n", 421 | " observation_covariance=1,\n", 422 | " transition_covariance=.01)\n", 423 | " state_means,_ = kf.filter(test_df[cols].values)\n", 424 | " test_df[cols] = state_means.flatten()" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "# read ground truth data\n", 434 | "truth_df = pd.read_csv('PM_truth_03.txt', sep=\" \", header=None)\n", 435 | "truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "train_df = train_df.sort_values(['id','cycle'])\n", 445 | "train_df.head()" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": {}, 452 | "outputs": [], 453 | "source": [ 454 | "# Data Labeling - generate column RUL\n", 455 | "rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()\n", 456 | "rul.columns = ['id', 'max']\n", 457 | "train_df = train_df.merge(rul, on=['id'], how='left')\n", 458 | "train_df['RUL'] = train_df['max'] - train_df['cycle']\n", 459 | "train_df.drop('max', axis=1, inplace=True)\n", 460 | "train_df.head()" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [ 469 | "# generate label columns for training data\n", 470 | "w1 = 30\n", 471 | "w0 = 15\n", 472 | "train_df['label1'] = np.where(train_df['RUL'] <= w1, 1, 0 )\n", 473 | "train_df['label2'] = train_df['label1']\n", 474 | "train_df.loc[train_df['RUL'] <= w0, 'label2'] = 2\n", 475 | "train_df.head()" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": null, 481 | "metadata": {}, 482 | "outputs": [], 483 | "source": [ 484 | "# MinMax normalization\n", 485 | "train_df['cycle_norm'] = train_df['cycle']\n", 486 | "cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])\n", 487 | "min_max_scaler = preprocessing.MinMaxScaler()\n", 488 | "norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), \n", 489 | " columns=cols_normalize, \n", 490 | " index=train_df.index)\n", 491 | "join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)\n", 492 | "train_df = join_df.reindex(columns = train_df.columns)\n", 493 | "train_df.head()" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [ 502 | "test_df['cycle_norm'] = test_df['cycle']\n", 503 | "norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), \n", 504 | " columns=cols_normalize, \n", 505 | " index=test_df.index)\n", 506 | "test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)\n", 507 | "test_df = test_join_df.reindex(columns = test_df.columns)\n", 508 | "test_df = test_df.reset_index(drop=True)\n", 509 | "test_df.head()" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "# generate column max for test data\n", 519 | "rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()\n", 520 | "rul.columns = ['id', 'max']\n", 521 | "truth_df.columns = ['more']\n", 522 | "truth_df['id'] = truth_df.index + 1\n", 523 | "truth_df['max'] = rul['max'] + truth_df['more']\n", 524 | "truth_df.drop('more', axis=1, inplace=True)" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [ 533 | "# generate RUL for test data\n", 534 | "test_df = test_df.merge(truth_df, on=['id'], how='left')\n", 535 | "test_df['RUL'] = test_df['max'] - test_df['cycle']\n", 536 | "test_df.drop('max', axis=1, inplace=True)\n", 537 | "test_df.head()" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": {}, 544 | "outputs": [], 545 | "source": [ 546 | "# generate label columns w0 and w1 for test data\n", 547 | "test_df['label1'] = np.where(test_df['RUL'] <= w1, 1, 0 )\n", 548 | "test_df['label2'] = test_df['label1']\n", 549 | "test_df.loc[test_df['RUL'] <= w0, 'label2'] = 2\n", 550 | "test_df.head()" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": null, 556 | "metadata": {}, 557 | "outputs": [], 558 | "source": [ 559 | "train_df.drop(['label1','label2'],axis=1,inplace=True)\n", 560 | "test_df.drop(['label1','label2'],axis=1,inplace=True)" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": null, 566 | "metadata": {}, 567 | "outputs": [], 568 | "source": [ 569 | "# pick a large window size of 50 cycles\n", 570 | "sequence_length = 50" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": {}, 577 | "outputs": [], 578 | "source": [ 579 | "# preparing data for visualizations \n", 580 | "# window of 50 cycles prior to a failure point for engine id 3\n", 581 | "engine_id3 = test_df[test_df['id'] == 3]\n", 582 | "engine_id3_50cycleWindow = engine_id3[engine_id3['RUL'] <= engine_id3['RUL'].min() + 50]\n", 583 | "cols1 = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10']\n", 584 | "engine_id3_50cycleWindow1 = engine_id3_50cycleWindow[cols1]\n", 585 | "cols2 = ['s11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']\n", 586 | "engine_id3_50cycleWindow2 = engine_id3_50cycleWindow[cols2]" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": null, 592 | "metadata": {}, 593 | "outputs": [], 594 | "source": [ 595 | "# plotting sensor data for engine ID 3 prior to a failure point - sensors 1-10 \n", 596 | "ax1 = engine_id3_50cycleWindow1.plot(subplots=True, sharex=True, figsize=(20,20))" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": null, 602 | "metadata": {}, 603 | "outputs": [], 604 | "source": [ 605 | "# plotting sensor data for engine ID 3 prior to a failure point - sensors 11-21 \n", 606 | "ax2 = engine_id3_50cycleWindow2.plot(subplots=True, sharex=True, figsize=(20,20))" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": {}, 613 | "outputs": [], 614 | "source": [ 615 | "# function to reshape features into (samples, time steps, features) \n", 616 | "def gen_sequence(id_df, seq_length, seq_cols):\n", 617 | " \"\"\" Only sequences that meet the window-length are considered, no padding is used. This means for testing\n", 618 | " we need to drop those which are below the window-length. An alternative would be to pad sequences so that\n", 619 | " we can use shorter ones \"\"\"\n", 620 | " data_array = id_df[seq_cols].values\n", 621 | " num_elements = data_array.shape[0]\n", 622 | " for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):\n", 623 | " yield data_array[start:stop, :]" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": null, 629 | "metadata": {}, 630 | "outputs": [], 631 | "source": [ 632 | "# pick the feature columns \n", 633 | "sensor_cols = ['s' + str(i) for i in range(1,22)]\n", 634 | "sequence_cols = ['setting1', 'setting2', 'setting3', 'cycle_norm']\n", 635 | "sequence_cols.extend(sensor_cols)" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": null, 641 | "metadata": {}, 642 | "outputs": [], 643 | "source": [ 644 | "# generator for the sequences\n", 645 | "seq_gen = (list(gen_sequence(train_df[train_df['id']==id], sequence_length, sequence_cols)) \n", 646 | " for id in train_df['id'].unique())" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": null, 652 | "metadata": {}, 653 | "outputs": [], 654 | "source": [ 655 | "# generate sequences and convert to numpy array\n", 656 | "seq_array = np.concatenate(list(seq_gen)).astype(np.float32)\n", 657 | "seq_array.shape" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "# function to generate labels\n", 667 | "def gen_labels(id_df, seq_length, label):\n", 668 | " data_array = id_df[label].values\n", 669 | " num_elements = data_array.shape[0]\n", 670 | " return data_array[seq_length:num_elements, :]" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": null, 676 | "metadata": {}, 677 | "outputs": [], 678 | "source": [ 679 | "# generate labels\n", 680 | "label_gen = [gen_labels(train_df[train_df['id']==id], sequence_length, ['RUL']) \n", 681 | " for id in train_df['id'].unique()]\n", 682 | "label_array = np.concatenate(label_gen).astype(np.float32)\n", 683 | "label_array.shape" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "metadata": {}, 690 | "outputs": [], 691 | "source": [ 692 | "from keras.layers import Bidirectional" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": null, 698 | "metadata": {}, 699 | "outputs": [], 700 | "source": [ 701 | "# build the network\n", 702 | "nb_features = seq_array.shape[2]\n", 703 | "nb_out = label_array.shape[1]\n", 704 | "\n", 705 | "model = Sequential()\n", 706 | "# model.add(Bidirectional(LSTM(\n", 707 | "# units=100,\n", 708 | "# return_sequences=True),\n", 709 | "# input_shape=(sequence_length, nb_features)))\n", 710 | "model.add(LSTM(\n", 711 | " units=100,\n", 712 | " return_sequences=True,\n", 713 | " input_shape=(sequence_length, nb_features)))\n", 714 | "model.add(Dropout(0.4))\n", 715 | "\n", 716 | "model.add(LSTM(\n", 717 | " units=100,\n", 718 | " return_sequences=False))\n", 719 | "model.add(Dropout(0.4))\n", 720 | "\n", 721 | "# model.add(Dense(units=100,activation='relu'))\n", 722 | "# model.add(Dropout(0.2))\n", 723 | "# model.add(Dense(units=100,activation='relu'))\n", 724 | "# model.add(Dropout(0.2))\n", 725 | "# model.add(Dense(units=100,activation='relu'))\n", 726 | "# model.add(Dropout(0.2))\n", 727 | "model.add(Dense(units=1, activation='relu'))\n", 728 | "model.add(Activation(\"relu\"))\n", 729 | "model.compile(loss=\"mse\", optimizer=\"rmsprop\", metrics=['mse'])" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "print(model.summary())" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": null, 744 | "metadata": {}, 745 | "outputs": [], 746 | "source": [ 747 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 748 | "\n", 749 | "STAMP = 'predictive_regression_kalhman'\n", 750 | "print(STAMP)\n", 751 | "\n", 752 | "early_stopping =EarlyStopping(monitor='val_loss', patience=10)\n", 753 | "bst_model_path = STAMP + '.h5'\n", 754 | "model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": null, 760 | "metadata": { 761 | "scrolled": false 762 | }, 763 | "outputs": [], 764 | "source": [ 765 | "%%time\n", 766 | "# fit the network\n", 767 | "hist = model.fit(seq_array, label_array, epochs=1000, batch_size=200, validation_split=0.05, verbose=1,callbacks=[early_stopping, model_checkpoint])" 768 | ] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "execution_count": null, 773 | "metadata": {}, 774 | "outputs": [], 775 | "source": [ 776 | "import matplotlib.pyplot as plt\n", 777 | "\n", 778 | "#plots\n", 779 | "# list all data in history\n", 780 | "print(hist.history.keys())\n", 781 | "# summarize history for accuracy\n", 782 | "plt.plot(hist.history['mean_squared_error'])\n", 783 | "plt.plot(hist.history['val_mean_squared_error'])\n", 784 | "plt.title('mse')\n", 785 | "plt.ylabel('mean_squared_error')\n", 786 | "plt.xlabel('epoch')\n", 787 | "plt.legend(['train', 'test'], loc='upper left')\n", 788 | "plt.show()\n", 789 | "# summarize history for loss\n", 790 | "plt.plot(hist.history['loss'])\n", 791 | "plt.plot(hist.history['val_loss'])\n", 792 | "plt.title('model loss')\n", 793 | "plt.ylabel('loss')\n", 794 | "plt.xlabel('epoch')\n", 795 | "plt.legend(['train', 'test'], loc='upper left')\n", 796 | "plt.show()" 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "execution_count": null, 802 | "metadata": {}, 803 | "outputs": [], 804 | "source": [ 805 | "model.load_weights(bst_model_path)" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": null, 811 | "metadata": {}, 812 | "outputs": [], 813 | "source": [ 814 | "# training metrics\n", 815 | "scores = model.evaluate(seq_array, label_array, verbose=1, batch_size=200)\n", 816 | "print('Score: {}'.format(scores[1]))" 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": null, 822 | "metadata": {}, 823 | "outputs": [], 824 | "source": [ 825 | "seq_array_test_last = [test_df[test_df['id']==id][sequence_cols].values[-sequence_length:] \n", 826 | " for id in test_df['id'].unique() if len(test_df[test_df['id']==id]) >= sequence_length]\n", 827 | "\n", 828 | "seq_array_test_last = np.asarray(seq_array_test_last).astype(np.float32)\n", 829 | "seq_array_test_last.shape" 830 | ] 831 | }, 832 | { 833 | "cell_type": "code", 834 | "execution_count": null, 835 | "metadata": {}, 836 | "outputs": [], 837 | "source": [ 838 | "y_mask = [len(test_df[test_df['id']==id]) >= sequence_length for id in test_df['id'].unique()]" 839 | ] 840 | }, 841 | { 842 | "cell_type": "code", 843 | "execution_count": null, 844 | "metadata": {}, 845 | "outputs": [], 846 | "source": [ 847 | "label_array_test_last = test_df.groupby('id')['RUL'].nth(-1)[y_mask].values\n", 848 | "label_array_test_last = label_array_test_last.reshape(label_array_test_last.shape[0],1).astype(np.float32)\n", 849 | "label_array_test_last.shape" 850 | ] 851 | }, 852 | { 853 | "cell_type": "code", 854 | "execution_count": null, 855 | "metadata": {}, 856 | "outputs": [], 857 | "source": [ 858 | "print(seq_array_test_last.shape)\n", 859 | "print(label_array_test_last.shape)" 860 | ] 861 | }, 862 | { 863 | "cell_type": "code", 864 | "execution_count": null, 865 | "metadata": {}, 866 | "outputs": [], 867 | "source": [ 868 | "# test metrics\n", 869 | "import math\n", 870 | "scores_test = model.evaluate(seq_array_test_last, label_array_test_last, verbose=2)\n", 871 | "print('Test Score: %.2f MSE (%.2f RMSE)' % (scores_test[0], math.sqrt(scores_test[0])))" 872 | ] 873 | }, 874 | { 875 | "cell_type": "code", 876 | "execution_count": null, 877 | "metadata": {}, 878 | "outputs": [], 879 | "source": [ 880 | "pred = model.predict(seq_array_test_last)\n", 881 | "pred" 882 | ] 883 | }, 884 | { 885 | "cell_type": "code", 886 | "execution_count": null, 887 | "metadata": {}, 888 | "outputs": [], 889 | "source": [ 890 | "diff = []\n", 891 | "ratio = []\n", 892 | "pred = model.predict(seq_array_test_last)\n", 893 | "for u in range(len(label_array_test_last)):\n", 894 | " pr = pred[u][0]\n", 895 | " ratio.append((label_array_test_last[u] / pr) - 1)\n", 896 | " diff.append(abs(label_array_test_last[u] - pr))" 897 | ] 898 | }, 899 | { 900 | "cell_type": "code", 901 | "execution_count": null, 902 | "metadata": {}, 903 | "outputs": [], 904 | "source": [ 905 | "import matplotlib.pyplot as plt2\n", 906 | "\n", 907 | "plt2.plot(pred, color='red', label='Prediction')\n", 908 | "plt2.plot(label_array_test_last, color='blue', label='Ground Truth')\n", 909 | "plt2.legend(loc='upper left')\n", 910 | "plt2.show()" 911 | ] 912 | }, 913 | { 914 | "cell_type": "code", 915 | "execution_count": null, 916 | "metadata": {}, 917 | "outputs": [], 918 | "source": [ 919 | "error = pd.DataFrame((label_array_test_last - pred),columns=['error'])\n", 920 | "error.describe()" 921 | ] 922 | }, 923 | { 924 | "cell_type": "code", 925 | "execution_count": null, 926 | "metadata": {}, 927 | "outputs": [], 928 | "source": [ 929 | "pred_cutoff = pd.DataFrame(pred,columns=['rul'])\n", 930 | "pred_cutoff.loc[pred_cutoff['rul'] > 140,'rul'] = 140\n", 931 | "pred_cutoff.head()" 932 | ] 933 | }, 934 | { 935 | "cell_type": "code", 936 | "execution_count": null, 937 | "metadata": {}, 938 | "outputs": [], 939 | "source": [ 940 | "from sklearn.metrics import mean_squared_error\n", 941 | "\n", 942 | "cutoffscore = mean_squared_error(label_array_test_last,pred_cutoff)\n", 943 | "print('Test Score: %.2f MSE (%.2f RMSE)' % (cutoffscore, math.sqrt(cutoffscore)))" 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": null, 949 | "metadata": {}, 950 | "outputs": [], 951 | "source": [] 952 | } 953 | ], 954 | "metadata": { 955 | "kernelspec": { 956 | "display_name": "Python 3", 957 | "language": "python", 958 | "name": "python3" 959 | }, 960 | "language_info": { 961 | "codemirror_mode": { 962 | "name": "ipython", 963 | "version": 3 964 | }, 965 | "file_extension": ".py", 966 | "mimetype": "text/x-python", 967 | "name": "python", 968 | "nbconvert_exporter": "python", 969 | "pygments_lexer": "ipython3", 970 | "version": "3.6.2" 971 | } 972 | }, 973 | "nbformat": 4, 974 | "nbformat_minor": 2 975 | } 976 | -------------------------------------------------------------------------------- /data3/PM_truth_03.txt: -------------------------------------------------------------------------------- 1 | 44 2 | 51 3 | 27 4 | 120 5 | 101 6 | 99 7 | 71 8 | 55 9 | 55 10 | 66 11 | 77 12 | 115 13 | 115 14 | 31 15 | 108 16 | 56 17 | 136 18 | 132 19 | 85 20 | 56 21 | 18 22 | 119 23 | 78 24 | 9 25 | 58 26 | 11 27 | 88 28 | 144 29 | 124 30 | 89 31 | 79 32 | 55 33 | 71 34 | 65 35 | 87 36 | 137 37 | 145 38 | 22 39 | 8 40 | 41 41 | 131 42 | 115 43 | 128 44 | 69 45 | 111 46 | 7 47 | 137 48 | 55 49 | 135 50 | 11 51 | 78 52 | 120 53 | 87 54 | 87 55 | 55 56 | 93 57 | 88 58 | 40 59 | 49 60 | 128 61 | 129 62 | 58 63 | 117 64 | 28 65 | 115 66 | 87 67 | 92 68 | 103 69 | 100 70 | 63 71 | 35 72 | 45 73 | 99 74 | 117 75 | 45 76 | 27 77 | 86 78 | 20 79 | 18 80 | 133 81 | 15 82 | 6 83 | 145 84 | 104 85 | 56 86 | 25 87 | 68 88 | 144 89 | 41 90 | 51 91 | 81 92 | 14 93 | 67 94 | 10 95 | 127 96 | 113 97 | 123 98 | 17 99 | 8 100 | 28 101 | -------------------------------------------------------------------------------- /data3/RUL.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 13 | " from ._conv import register_converters as _register_converters\n", 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import keras" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pandas as pd\n", 29 | "import numpy as np\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "\n", 32 | "# Setting seed for reproducability\n", 33 | "np.random.seed(1234) \n", 34 | "PYTHONHASHSEED = 0\n", 35 | "from sklearn import preprocessing\n", 36 | "from sklearn.metrics import confusion_matrix, recall_score, precision_score\n", 37 | "from keras.models import Sequential\n", 38 | "from keras.layers import Dense, Dropout, LSTM, Activation\n", 39 | "%matplotlib inline" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "from pykalman import KalmanFilter" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# read training data \n", 58 | "train_df = pd.read_csv('PM_train_03.txt', sep=\" \", header=None)\n", 59 | "train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)\n", 60 | "train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 61 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 62 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/html": [ 73 | "
\n", 74 | "\n", 87 | "\n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | "
idcyclesetting1setting2setting3s1s2s3s4s5...s12s13s14s15s16s17s18s19s20s21
011-0.00050.0004100.0518.67642.361583.231396.8414.62...522.312388.018145.328.42460.033912388100.039.1123.3537
1120.0008-0.0003100.0518.67642.501584.691396.8914.62...522.422388.038152.858.44030.033922388100.038.9923.4491
213-0.0014-0.0002100.0518.67642.181582.351405.6114.62...522.032388.008150.178.39010.033912388100.038.8523.3669
314-0.00200.0001100.0518.67642.921585.611392.2714.62...522.492388.088146.568.38780.033922388100.038.9623.2951
4150.00160.0000100.0518.67641.681588.631397.6514.62...522.582388.038147.808.38690.033922388100.039.1423.4583
\n", 237 | "

5 rows × 26 columns

\n", 238 | "
" 239 | ], 240 | "text/plain": [ 241 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n", 242 | "0 1 1 -0.0005 0.0004 100.0 518.67 642.36 1583.23 1396.84 \n", 243 | "1 1 2 0.0008 -0.0003 100.0 518.67 642.50 1584.69 1396.89 \n", 244 | "2 1 3 -0.0014 -0.0002 100.0 518.67 642.18 1582.35 1405.61 \n", 245 | "3 1 4 -0.0020 0.0001 100.0 518.67 642.92 1585.61 1392.27 \n", 246 | "4 1 5 0.0016 0.0000 100.0 518.67 641.68 1588.63 1397.65 \n", 247 | "\n", 248 | " s5 ... s12 s13 s14 s15 s16 s17 s18 s19 \\\n", 249 | "0 14.62 ... 522.31 2388.01 8145.32 8.4246 0.03 391 2388 100.0 \n", 250 | "1 14.62 ... 522.42 2388.03 8152.85 8.4403 0.03 392 2388 100.0 \n", 251 | "2 14.62 ... 522.03 2388.00 8150.17 8.3901 0.03 391 2388 100.0 \n", 252 | "3 14.62 ... 522.49 2388.08 8146.56 8.3878 0.03 392 2388 100.0 \n", 253 | "4 14.62 ... 522.58 2388.03 8147.80 8.3869 0.03 392 2388 100.0 \n", 254 | "\n", 255 | " s20 s21 \n", 256 | "0 39.11 23.3537 \n", 257 | "1 38.99 23.4491 \n", 258 | "2 38.85 23.3669 \n", 259 | "3 38.96 23.2951 \n", 260 | "4 39.14 23.4583 \n", 261 | "\n", 262 | "[5 rows x 26 columns]" 263 | ] 264 | }, 265 | "execution_count": 5, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "train_df.head()" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 6, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "train_df.columns\n", 281 | "cols = ['setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 282 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 283 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 7, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "'setting1'" 295 | ] 296 | }, 297 | "execution_count": 7, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "train_df.columns[2]" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 8, 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "name": "stdout", 313 | "output_type": "stream", 314 | "text": [ 315 | "setting1\n" 316 | ] 317 | }, 318 | { 319 | "name": "stderr", 320 | "output_type": "stream", 321 | "text": [ 322 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/scipy/linalg/basic.py:1226: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n", 323 | " warnings.warn(mesg, RuntimeWarning)\n" 324 | ] 325 | }, 326 | { 327 | "name": "stdout", 328 | "output_type": "stream", 329 | "text": [ 330 | "setting2\n", 331 | "setting3\n", 332 | "s1\n", 333 | "s2\n", 334 | "s3\n", 335 | "s4\n", 336 | "s5\n", 337 | "s6\n", 338 | "s7\n", 339 | "s8\n", 340 | "s9\n", 341 | "s10\n", 342 | "s11\n", 343 | "s12\n", 344 | "s13\n", 345 | "s14\n", 346 | "s15\n", 347 | "s16\n", 348 | "s17\n", 349 | "s18\n", 350 | "s19\n", 351 | "s20\n", 352 | "s21\n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "for cols in train_df.columns:\n", 358 | " if cols == 'id':\n", 359 | " continue;\n", 360 | " if cols == 'cycle':\n", 361 | " continue;\n", 362 | " else:\n", 363 | " print(cols)\n", 364 | " kf = KalmanFilter(transition_matrices = [1],\n", 365 | " observation_matrices = [1],\n", 366 | " initial_state_mean = train_df[cols].values[0],\n", 367 | " initial_state_covariance = 1,\n", 368 | " observation_covariance=1,\n", 369 | " transition_covariance=.01)\n", 370 | " state_means,_ = kf.filter(train_df[cols].values)\n", 371 | " train_df[cols] = state_means.flatten()" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "# read test data\n", 381 | "test_df = pd.read_csv('PM_test_03.txt', sep=\" \", header=None)\n", 382 | "test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)\n", 383 | "test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n", 384 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n", 385 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "name": "stdout", 395 | "output_type": "stream", 396 | "text": [ 397 | "setting1\n", 398 | "setting2\n", 399 | "setting3\n", 400 | "s1\n", 401 | "s2\n", 402 | "s3\n", 403 | "s4\n", 404 | "s5\n", 405 | "s6\n" 406 | ] 407 | } 408 | ], 409 | "source": [ 410 | "for cols in test_df.columns:\n", 411 | " if cols == 'id':\n", 412 | " continue;\n", 413 | " if cols == 'cycle':\n", 414 | " continue;\n", 415 | " else:\n", 416 | " print(cols)\n", 417 | " kf = KalmanFilter(transition_matrices = [1],\n", 418 | " observation_matrices = [1],\n", 419 | " initial_state_mean = test_df[cols].values[0],\n", 420 | " initial_state_covariance = 1,\n", 421 | " observation_covariance=1,\n", 422 | " transition_covariance=.01)\n", 423 | " state_means,_ = kf.filter(test_df[cols].values)\n", 424 | " test_df[cols] = state_means.flatten()" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "# read ground truth data\n", 434 | "truth_df = pd.read_csv('PM_truth_03.txt', sep=\" \", header=None)\n", 435 | "truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "train_df = train_df.sort_values(['id','cycle'])\n", 445 | "train_df.head()" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": {}, 452 | "outputs": [], 453 | "source": [ 454 | "# Data Labeling - generate column RUL\n", 455 | "rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()\n", 456 | "rul.columns = ['id', 'max']\n", 457 | "train_df = train_df.merge(rul, on=['id'], how='left')\n", 458 | "train_df['RUL'] = train_df['max'] - train_df['cycle']\n", 459 | "train_df.drop('max', axis=1, inplace=True)\n", 460 | "train_df.head()" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [ 469 | "# generate label columns for training data\n", 470 | "w1 = 30\n", 471 | "w0 = 15\n", 472 | "train_df['label1'] = np.where(train_df['RUL'] <= w1, 1, 0 )\n", 473 | "train_df['label2'] = train_df['label1']\n", 474 | "train_df.loc[train_df['RUL'] <= w0, 'label2'] = 2\n", 475 | "train_df.head()" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": null, 481 | "metadata": {}, 482 | "outputs": [], 483 | "source": [ 484 | "# MinMax normalization\n", 485 | "train_df['cycle_norm'] = train_df['cycle']\n", 486 | "cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])\n", 487 | "min_max_scaler = preprocessing.MinMaxScaler()\n", 488 | "norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), \n", 489 | " columns=cols_normalize, \n", 490 | " index=train_df.index)\n", 491 | "join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)\n", 492 | "train_df = join_df.reindex(columns = train_df.columns)\n", 493 | "train_df.head()" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [ 502 | "test_df['cycle_norm'] = test_df['cycle']\n", 503 | "norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), \n", 504 | " columns=cols_normalize, \n", 505 | " index=test_df.index)\n", 506 | "test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)\n", 507 | "test_df = test_join_df.reindex(columns = test_df.columns)\n", 508 | "test_df = test_df.reset_index(drop=True)\n", 509 | "test_df.head()" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "# generate column max for test data\n", 519 | "rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()\n", 520 | "rul.columns = ['id', 'max']\n", 521 | "truth_df.columns = ['more']\n", 522 | "truth_df['id'] = truth_df.index + 1\n", 523 | "truth_df['max'] = rul['max'] + truth_df['more']\n", 524 | "truth_df.drop('more', axis=1, inplace=True)" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [ 533 | "# generate RUL for test data\n", 534 | "test_df = test_df.merge(truth_df, on=['id'], how='left')\n", 535 | "test_df['RUL'] = test_df['max'] - test_df['cycle']\n", 536 | "test_df.drop('max', axis=1, inplace=True)\n", 537 | "test_df.head()" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": {}, 544 | "outputs": [], 545 | "source": [ 546 | "# generate label columns w0 and w1 for test data\n", 547 | "test_df['label1'] = np.where(test_df['RUL'] <= w1, 1, 0 )\n", 548 | "test_df['label2'] = test_df['label1']\n", 549 | "test_df.loc[test_df['RUL'] <= w0, 'label2'] = 2\n", 550 | "test_df.head()" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": null, 556 | "metadata": {}, 557 | "outputs": [], 558 | "source": [ 559 | "train_df.drop(['label1','label2'],axis=1,inplace=True)\n", 560 | "test_df.drop(['label1','label2'],axis=1,inplace=True)" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": null, 566 | "metadata": {}, 567 | "outputs": [], 568 | "source": [ 569 | "# pick a large window size of 50 cycles\n", 570 | "sequence_length = 50" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": {}, 577 | "outputs": [], 578 | "source": [ 579 | "# preparing data for visualizations \n", 580 | "# window of 50 cycles prior to a failure point for engine id 3\n", 581 | "engine_id3 = test_df[test_df['id'] == 3]\n", 582 | "engine_id3_50cycleWindow = engine_id3[engine_id3['RUL'] <= engine_id3['RUL'].min() + 50]\n", 583 | "cols1 = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10']\n", 584 | "engine_id3_50cycleWindow1 = engine_id3_50cycleWindow[cols1]\n", 585 | "cols2 = ['s11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']\n", 586 | "engine_id3_50cycleWindow2 = engine_id3_50cycleWindow[cols2]" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": null, 592 | "metadata": {}, 593 | "outputs": [], 594 | "source": [ 595 | "# plotting sensor data for engine ID 3 prior to a failure point - sensors 1-10 \n", 596 | "ax1 = engine_id3_50cycleWindow1.plot(subplots=True, sharex=True, figsize=(20,20))" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": null, 602 | "metadata": {}, 603 | "outputs": [], 604 | "source": [ 605 | "# plotting sensor data for engine ID 3 prior to a failure point - sensors 11-21 \n", 606 | "ax2 = engine_id3_50cycleWindow2.plot(subplots=True, sharex=True, figsize=(20,20))" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": {}, 613 | "outputs": [], 614 | "source": [ 615 | "# function to reshape features into (samples, time steps, features) \n", 616 | "def gen_sequence(id_df, seq_length, seq_cols):\n", 617 | " \"\"\" Only sequences that meet the window-length are considered, no padding is used. This means for testing\n", 618 | " we need to drop those which are below the window-length. An alternative would be to pad sequences so that\n", 619 | " we can use shorter ones \"\"\"\n", 620 | " data_array = id_df[seq_cols].values\n", 621 | " num_elements = data_array.shape[0]\n", 622 | " for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):\n", 623 | " yield data_array[start:stop, :]" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": null, 629 | "metadata": {}, 630 | "outputs": [], 631 | "source": [ 632 | "# pick the feature columns \n", 633 | "sensor_cols = ['s' + str(i) for i in range(1,22)]\n", 634 | "sequence_cols = ['setting1', 'setting2', 'setting3', 'cycle_norm']\n", 635 | "sequence_cols.extend(sensor_cols)" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": null, 641 | "metadata": {}, 642 | "outputs": [], 643 | "source": [ 644 | "# generator for the sequences\n", 645 | "seq_gen = (list(gen_sequence(train_df[train_df['id']==id], sequence_length, sequence_cols)) \n", 646 | " for id in train_df['id'].unique())" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": null, 652 | "metadata": {}, 653 | "outputs": [], 654 | "source": [ 655 | "# generate sequences and convert to numpy array\n", 656 | "seq_array = np.concatenate(list(seq_gen)).astype(np.float32)\n", 657 | "seq_array.shape" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "# function to generate labels\n", 667 | "def gen_labels(id_df, seq_length, label):\n", 668 | " data_array = id_df[label].values\n", 669 | " num_elements = data_array.shape[0]\n", 670 | " return data_array[seq_length:num_elements, :]" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": null, 676 | "metadata": {}, 677 | "outputs": [], 678 | "source": [ 679 | "# generate labels\n", 680 | "label_gen = [gen_labels(train_df[train_df['id']==id], sequence_length, ['RUL']) \n", 681 | " for id in train_df['id'].unique()]\n", 682 | "label_array = np.concatenate(label_gen).astype(np.float32)\n", 683 | "label_array.shape" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "metadata": {}, 690 | "outputs": [], 691 | "source": [ 692 | "from keras.layers import Bidirectional" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": null, 698 | "metadata": {}, 699 | "outputs": [], 700 | "source": [ 701 | "# build the network\n", 702 | "nb_features = seq_array.shape[2]\n", 703 | "nb_out = label_array.shape[1]\n", 704 | "\n", 705 | "model = Sequential()\n", 706 | "# model.add(Bidirectional(LSTM(\n", 707 | "# units=100,\n", 708 | "# return_sequences=True),\n", 709 | "# input_shape=(sequence_length, nb_features)))\n", 710 | "model.add(LSTM(\n", 711 | " units=100,\n", 712 | " return_sequences=True,\n", 713 | " input_shape=(sequence_length, nb_features)))\n", 714 | "model.add(Dropout(0.4))\n", 715 | "\n", 716 | "model.add(LSTM(\n", 717 | " units=100,\n", 718 | " return_sequences=False))\n", 719 | "model.add(Dropout(0.4))\n", 720 | "\n", 721 | "# model.add(Dense(units=100,activation='relu'))\n", 722 | "# model.add(Dropout(0.2))\n", 723 | "# model.add(Dense(units=100,activation='relu'))\n", 724 | "# model.add(Dropout(0.2))\n", 725 | "# model.add(Dense(units=100,activation='relu'))\n", 726 | "# model.add(Dropout(0.2))\n", 727 | "model.add(Dense(units=1, activation='relu'))\n", 728 | "model.add(Activation(\"relu\"))\n", 729 | "model.compile(loss=\"mse\", optimizer=\"rmsprop\", metrics=['mse'])" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "print(model.summary())" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": null, 744 | "metadata": {}, 745 | "outputs": [], 746 | "source": [ 747 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 748 | "\n", 749 | "STAMP = 'predictive_regression_kalhman'\n", 750 | "print(STAMP)\n", 751 | "\n", 752 | "early_stopping =EarlyStopping(monitor='val_loss', patience=10)\n", 753 | "bst_model_path = STAMP + '.h5'\n", 754 | "model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": null, 760 | "metadata": { 761 | "scrolled": false 762 | }, 763 | "outputs": [], 764 | "source": [ 765 | "%%time\n", 766 | "# fit the network\n", 767 | "hist = model.fit(seq_array, label_array, epochs=1000, batch_size=200, validation_split=0.05, verbose=1,callbacks=[early_stopping, model_checkpoint])" 768 | ] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "execution_count": null, 773 | "metadata": {}, 774 | "outputs": [], 775 | "source": [ 776 | "import matplotlib.pyplot as plt\n", 777 | "\n", 778 | "#plots\n", 779 | "# list all data in history\n", 780 | "print(hist.history.keys())\n", 781 | "# summarize history for accuracy\n", 782 | "plt.plot(hist.history['mean_squared_error'])\n", 783 | "plt.plot(hist.history['val_mean_squared_error'])\n", 784 | "plt.title('mse')\n", 785 | "plt.ylabel('mean_squared_error')\n", 786 | "plt.xlabel('epoch')\n", 787 | "plt.legend(['train', 'test'], loc='upper left')\n", 788 | "plt.show()\n", 789 | "# summarize history for loss\n", 790 | "plt.plot(hist.history['loss'])\n", 791 | "plt.plot(hist.history['val_loss'])\n", 792 | "plt.title('model loss')\n", 793 | "plt.ylabel('loss')\n", 794 | "plt.xlabel('epoch')\n", 795 | "plt.legend(['train', 'test'], loc='upper left')\n", 796 | "plt.show()" 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "execution_count": null, 802 | "metadata": {}, 803 | "outputs": [], 804 | "source": [ 805 | "model.load_weights(bst_model_path)" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": null, 811 | "metadata": {}, 812 | "outputs": [], 813 | "source": [ 814 | "# training metrics\n", 815 | "scores = model.evaluate(seq_array, label_array, verbose=1, batch_size=200)\n", 816 | "print('Score: {}'.format(scores[1]))" 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": null, 822 | "metadata": {}, 823 | "outputs": [], 824 | "source": [ 825 | "seq_array_test_last = [test_df[test_df['id']==id][sequence_cols].values[-sequence_length:] \n", 826 | " for id in test_df['id'].unique() if len(test_df[test_df['id']==id]) >= sequence_length]\n", 827 | "\n", 828 | "seq_array_test_last = np.asarray(seq_array_test_last).astype(np.float32)\n", 829 | "seq_array_test_last.shape" 830 | ] 831 | }, 832 | { 833 | "cell_type": "code", 834 | "execution_count": null, 835 | "metadata": {}, 836 | "outputs": [], 837 | "source": [ 838 | "y_mask = [len(test_df[test_df['id']==id]) >= sequence_length for id in test_df['id'].unique()]" 839 | ] 840 | }, 841 | { 842 | "cell_type": "code", 843 | "execution_count": null, 844 | "metadata": {}, 845 | "outputs": [], 846 | "source": [ 847 | "label_array_test_last = test_df.groupby('id')['RUL'].nth(-1)[y_mask].values\n", 848 | "label_array_test_last = label_array_test_last.reshape(label_array_test_last.shape[0],1).astype(np.float32)\n", 849 | "label_array_test_last.shape" 850 | ] 851 | }, 852 | { 853 | "cell_type": "code", 854 | "execution_count": null, 855 | "metadata": {}, 856 | "outputs": [], 857 | "source": [ 858 | "print(seq_array_test_last.shape)\n", 859 | "print(label_array_test_last.shape)" 860 | ] 861 | }, 862 | { 863 | "cell_type": "code", 864 | "execution_count": null, 865 | "metadata": {}, 866 | "outputs": [], 867 | "source": [ 868 | "# test metrics\n", 869 | "import math\n", 870 | "scores_test = model.evaluate(seq_array_test_last, label_array_test_last, verbose=2)\n", 871 | "print('Test Score: %.2f MSE (%.2f RMSE)' % (scores_test[0], math.sqrt(scores_test[0])))" 872 | ] 873 | }, 874 | { 875 | "cell_type": "code", 876 | "execution_count": null, 877 | "metadata": {}, 878 | "outputs": [], 879 | "source": [ 880 | "pred = model.predict(seq_array_test_last)\n", 881 | "pred" 882 | ] 883 | }, 884 | { 885 | "cell_type": "code", 886 | "execution_count": null, 887 | "metadata": {}, 888 | "outputs": [], 889 | "source": [ 890 | "diff = []\n", 891 | "ratio = []\n", 892 | "pred = model.predict(seq_array_test_last)\n", 893 | "for u in range(len(label_array_test_last)):\n", 894 | " pr = pred[u][0]\n", 895 | " ratio.append((label_array_test_last[u] / pr) - 1)\n", 896 | " diff.append(abs(label_array_test_last[u] - pr))" 897 | ] 898 | }, 899 | { 900 | "cell_type": "code", 901 | "execution_count": null, 902 | "metadata": {}, 903 | "outputs": [], 904 | "source": [ 905 | "import matplotlib.pyplot as plt2\n", 906 | "\n", 907 | "plt2.plot(pred, color='red', label='Prediction')\n", 908 | "plt2.plot(label_array_test_last, color='blue', label='Ground Truth')\n", 909 | "plt2.legend(loc='upper left')\n", 910 | "plt2.show()" 911 | ] 912 | }, 913 | { 914 | "cell_type": "code", 915 | "execution_count": null, 916 | "metadata": {}, 917 | "outputs": [], 918 | "source": [ 919 | "error = pd.DataFrame((label_array_test_last - pred),columns=['error'])\n", 920 | "error.describe()" 921 | ] 922 | }, 923 | { 924 | "cell_type": "code", 925 | "execution_count": null, 926 | "metadata": {}, 927 | "outputs": [], 928 | "source": [ 929 | "pred_cutoff = pd.DataFrame(pred,columns=['rul'])\n", 930 | "pred_cutoff.loc[pred_cutoff['rul'] > 140,'rul'] = 140\n", 931 | "pred_cutoff.head()" 932 | ] 933 | }, 934 | { 935 | "cell_type": "code", 936 | "execution_count": null, 937 | "metadata": {}, 938 | "outputs": [], 939 | "source": [ 940 | "from sklearn.metrics import mean_squared_error\n", 941 | "\n", 942 | "cutoffscore = mean_squared_error(label_array_test_last,pred_cutoff)\n", 943 | "print('Test Score: %.2f MSE (%.2f RMSE)' % (cutoffscore, math.sqrt(cutoffscore)))" 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": null, 949 | "metadata": {}, 950 | "outputs": [], 951 | "source": [] 952 | } 953 | ], 954 | "metadata": { 955 | "kernelspec": { 956 | "display_name": "Python 3", 957 | "language": "python", 958 | "name": "python3" 959 | }, 960 | "language_info": { 961 | "codemirror_mode": { 962 | "name": "ipython", 963 | "version": 3 964 | }, 965 | "file_extension": ".py", 966 | "mimetype": "text/x-python", 967 | "name": "python", 968 | "nbconvert_exporter": "python", 969 | "pygments_lexer": "ipython3", 970 | "version": "3.6.2" 971 | } 972 | }, 973 | "nbformat": 4, 974 | "nbformat_minor": 2 975 | } 976 | -------------------------------------------------------------------------------- /data4/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data4/.DS_Store -------------------------------------------------------------------------------- /data4/PM_truth_04.txt: -------------------------------------------------------------------------------- 1 | 22 2 | 39 3 | 107 4 | 75 5 | 149 6 | 78 7 | 94 8 | 14 9 | 99 10 | 162 11 | 143 12 | 7 13 | 71 14 | 105 15 | 12 16 | 160 17 | 162 18 | 104 19 | 194 20 | 82 21 | 91 22 | 11 23 | 26 24 | 142 25 | 39 26 | 92 27 | 76 28 | 124 29 | 64 30 | 118 31 | 6 32 | 22 33 | 147 34 | 126 35 | 36 36 | 73 37 | 89 38 | 11 39 | 151 40 | 10 41 | 97 42 | 30 43 | 42 44 | 60 45 | 85 46 | 134 47 | 34 48 | 45 49 | 24 50 | 86 51 | 119 52 | 151 53 | 142 54 | 176 55 | 157 56 | 67 57 | 97 58 | 8 59 | 154 60 | 139 61 | 51 62 | 33 63 | 184 64 | 46 65 | 12 66 | 133 67 | 46 68 | 46 69 | 12 70 | 33 71 | 15 72 | 176 73 | 23 74 | 89 75 | 124 76 | 163 77 | 25 78 | 74 79 | 78 80 | 114 81 | 96 82 | 10 83 | 172 84 | 166 85 | 115 86 | 70 87 | 94 88 | 56 89 | 86 90 | 96 91 | 50 92 | 73 93 | 154 94 | 129 95 | 171 96 | 71 97 | 105 98 | 113 99 | 37 100 | 7 101 | 13 102 | 22 103 | 9 104 | 120 105 | 100 106 | 107 107 | 41 108 | 153 109 | 126 110 | 59 111 | 18 112 | 66 113 | 13 114 | 14 115 | 139 116 | 13 117 | 75 118 | 8 119 | 109 120 | 137 121 | 41 122 | 192 123 | 23 124 | 86 125 | 184 126 | 15 127 | 195 128 | 126 129 | 120 130 | 165 131 | 101 132 | 116 133 | 126 134 | 36 135 | 7 136 | 122 137 | 159 138 | 88 139 | 173 140 | 146 141 | 130 142 | 108 143 | 53 144 | 162 145 | 59 146 | 100 147 | 56 148 | 145 149 | 76 150 | 57 151 | 31 152 | 88 153 | 173 154 | 34 155 | 7 156 | 133 157 | 172 158 | 6 159 | 22 160 | 83 161 | 82 162 | 84 163 | 95 164 | 174 165 | 111 166 | 72 167 | 109 168 | 87 169 | 179 170 | 158 171 | 126 172 | 12 173 | 8 174 | 10 175 | 123 176 | 103 177 | 12 178 | 106 179 | 12 180 | 32 181 | 37 182 | 116 183 | 15 184 | 10 185 | 46 186 | 142 187 | 24 188 | 135 189 | 56 190 | 43 191 | 178 192 | 71 193 | 104 194 | 15 195 | 166 196 | 89 197 | 36 198 | 11 199 | 92 200 | 96 201 | 59 202 | 13 203 | 167 204 | 151 205 | 154 206 | 109 207 | 116 208 | 91 209 | 11 210 | 88 211 | 108 212 | 76 213 | 14 214 | 89 215 | 145 216 | 17 217 | 66 218 | 154 219 | 41 220 | 182 221 | 73 222 | 39 223 | 58 224 | 14 225 | 145 226 | 88 227 | 162 228 | 189 229 | 120 230 | 98 231 | 33 232 | 184 233 | 110 234 | 68 235 | 24 236 | 75 237 | 18 238 | 16 239 | 166 240 | 98 241 | 176 242 | 81 243 | 118 244 | 35 245 | 131 246 | 194 247 | 112 248 | 26 249 | -------------------------------------------------------------------------------- /data4/predictive_binary_final.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data4/predictive_binary_final.h5 -------------------------------------------------------------------------------- /data4/predictive_regression_kalhman.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data4/predictive_regression_kalhman.h5 --------------------------------------------------------------------------------