├── .DS_Store
├── .gitattributes
├── README.md
├── data1
├── .DS_Store
├── .ipynb_checkpoints
│ ├── Binary-checkpoint.ipynb
│ ├── Binary_final_v1-checkpoint.ipynb
│ ├── Binary_kalman-checkpoint.ipynb
│ ├── Logistic-checkpoint.ipynb
│ ├── RUL-checkpoint.ipynb
│ ├── Untitled-checkpoint.ipynb
│ ├── multinomial-checkpoint.ipynb
│ └── tpot-checkpoint.ipynb
├── Binary_final_v1.ipynb
├── PM_test_01.txt
├── PM_train_01.txt
├── PM_truth_01.txt
├── RUL.ipynb
├── predictive_binary_final.h5
└── predictive_regression_kalhman.h5
├── data2
├── .DS_Store
├── .ipynb_checkpoints
│ ├── Binary_final_v1-checkpoint.ipynb
│ ├── BinomialPredictiveMaintenance02-checkpoint.ipynb
│ └── RUL-checkpoint.ipynb
├── Binary_final_v1.ipynb
├── PM_test_02.txt
├── PM_train_02.txt
├── PM_truth_02.txt
├── RUL.ipynb
├── predictive_binary_final.h5
└── predictive_regression_kalhman.h5
├── data3
├── .DS_Store
├── .ipynb_checkpoints
│ ├── BinomialPredictiveMaintenance03-checkpoint.ipynb
│ └── RUL-checkpoint.ipynb
├── PM_test_03.txt
├── PM_train_03.txt
├── PM_truth_03.txt
└── RUL.ipynb
└── data4
├── .DS_Store
├── .ipynb_checkpoints
├── Binary_final_v1-checkpoint.ipynb
├── BinomialPredictiveMaintenance04-checkpoint.ipynb
└── RUL-checkpoint.ipynb
├── Binary_final_v1.ipynb
├── PM_test_04.txt
├── PM_train_04.txt
├── PM_truth_04.txt
├── RUL.ipynb
├── predictive_binary_final.h5
└── predictive_regression_kalhman.h5
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/.DS_Store
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RUL- Siemens MakeIt Real Hackathon 2017
2 | Remaning useful life for Nasa Turbofan Dataset, developed in Siemens MakeItReal hackathon 2017
3 |
4 | Files:
5 | data1, data2, data3 folder contains analysis, modelling and rul prediction on each dataset of increasing complexity and noise
6 |
7 | Hackathon details: https://www.hackerearth.com/sprints/makeitreal/
8 | My experience: https://soham97.github.io/posts/2017/10/Siemens-MakeITReal-Hackathon/
9 | dataset: https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/
10 |
--------------------------------------------------------------------------------
/data1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data1/.DS_Store
--------------------------------------------------------------------------------
/data1/.ipynb_checkpoints/Logistic-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "\n",
13 | "# Setting seed for reproducability\n",
14 | "np.random.seed(1234) \n",
15 | "PYTHONHASHSEED = 0\n",
16 | "from sklearn import preprocessing\n",
17 | "from sklearn.metrics import confusion_matrix, recall_score, precision_score\n",
18 | "%matplotlib inline"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "# read training data \n",
28 | "train_df = pd.read_csv('PM_train_01.txt', sep=\" \", header=None)\n",
29 | "train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)\n",
30 | "train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
31 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
32 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 3,
38 | "metadata": {},
39 | "outputs": [],
40 | "source": [
41 | "# read test data\n",
42 | "test_df = pd.read_csv('PM_test_01.txt', sep=\" \", header=None)\n",
43 | "test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)\n",
44 | "test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
45 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
46 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 4,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "# read ground truth data\n",
56 | "truth_df = pd.read_csv('PM_truth_01.txt', sep=\" \", header=None)\n",
57 | "truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 5,
63 | "metadata": {},
64 | "outputs": [
65 | {
66 | "data": {
67 | "text/html": [
68 | "
\n",
69 | "\n",
82 | "
\n",
83 | " \n",
84 | " \n",
85 | " | \n",
86 | " id | \n",
87 | " cycle | \n",
88 | " setting1 | \n",
89 | " setting2 | \n",
90 | " setting3 | \n",
91 | " s1 | \n",
92 | " s2 | \n",
93 | " s3 | \n",
94 | " s4 | \n",
95 | " s5 | \n",
96 | " ... | \n",
97 | " s12 | \n",
98 | " s13 | \n",
99 | " s14 | \n",
100 | " s15 | \n",
101 | " s16 | \n",
102 | " s17 | \n",
103 | " s18 | \n",
104 | " s19 | \n",
105 | " s20 | \n",
106 | " s21 | \n",
107 | "
\n",
108 | " \n",
109 | " \n",
110 | " \n",
111 | " 0 | \n",
112 | " 1 | \n",
113 | " 1 | \n",
114 | " -0.0007 | \n",
115 | " -0.0004 | \n",
116 | " 100.0 | \n",
117 | " 518.67 | \n",
118 | " 641.82 | \n",
119 | " 1589.70 | \n",
120 | " 1400.60 | \n",
121 | " 14.62 | \n",
122 | " ... | \n",
123 | " 521.66 | \n",
124 | " 2388.02 | \n",
125 | " 8138.62 | \n",
126 | " 8.4195 | \n",
127 | " 0.03 | \n",
128 | " 392 | \n",
129 | " 2388 | \n",
130 | " 100.0 | \n",
131 | " 39.06 | \n",
132 | " 23.4190 | \n",
133 | "
\n",
134 | " \n",
135 | " 1 | \n",
136 | " 1 | \n",
137 | " 2 | \n",
138 | " 0.0019 | \n",
139 | " -0.0003 | \n",
140 | " 100.0 | \n",
141 | " 518.67 | \n",
142 | " 642.15 | \n",
143 | " 1591.82 | \n",
144 | " 1403.14 | \n",
145 | " 14.62 | \n",
146 | " ... | \n",
147 | " 522.28 | \n",
148 | " 2388.07 | \n",
149 | " 8131.49 | \n",
150 | " 8.4318 | \n",
151 | " 0.03 | \n",
152 | " 392 | \n",
153 | " 2388 | \n",
154 | " 100.0 | \n",
155 | " 39.00 | \n",
156 | " 23.4236 | \n",
157 | "
\n",
158 | " \n",
159 | " 2 | \n",
160 | " 1 | \n",
161 | " 3 | \n",
162 | " -0.0043 | \n",
163 | " 0.0003 | \n",
164 | " 100.0 | \n",
165 | " 518.67 | \n",
166 | " 642.35 | \n",
167 | " 1587.99 | \n",
168 | " 1404.20 | \n",
169 | " 14.62 | \n",
170 | " ... | \n",
171 | " 522.42 | \n",
172 | " 2388.03 | \n",
173 | " 8133.23 | \n",
174 | " 8.4178 | \n",
175 | " 0.03 | \n",
176 | " 390 | \n",
177 | " 2388 | \n",
178 | " 100.0 | \n",
179 | " 38.95 | \n",
180 | " 23.3442 | \n",
181 | "
\n",
182 | " \n",
183 | " 3 | \n",
184 | " 1 | \n",
185 | " 4 | \n",
186 | " 0.0007 | \n",
187 | " 0.0000 | \n",
188 | " 100.0 | \n",
189 | " 518.67 | \n",
190 | " 642.35 | \n",
191 | " 1582.79 | \n",
192 | " 1401.87 | \n",
193 | " 14.62 | \n",
194 | " ... | \n",
195 | " 522.86 | \n",
196 | " 2388.08 | \n",
197 | " 8133.83 | \n",
198 | " 8.3682 | \n",
199 | " 0.03 | \n",
200 | " 392 | \n",
201 | " 2388 | \n",
202 | " 100.0 | \n",
203 | " 38.88 | \n",
204 | " 23.3739 | \n",
205 | "
\n",
206 | " \n",
207 | " 4 | \n",
208 | " 1 | \n",
209 | " 5 | \n",
210 | " -0.0019 | \n",
211 | " -0.0002 | \n",
212 | " 100.0 | \n",
213 | " 518.67 | \n",
214 | " 642.37 | \n",
215 | " 1582.85 | \n",
216 | " 1406.22 | \n",
217 | " 14.62 | \n",
218 | " ... | \n",
219 | " 522.19 | \n",
220 | " 2388.04 | \n",
221 | " 8133.80 | \n",
222 | " 8.4294 | \n",
223 | " 0.03 | \n",
224 | " 393 | \n",
225 | " 2388 | \n",
226 | " 100.0 | \n",
227 | " 38.90 | \n",
228 | " 23.4044 | \n",
229 | "
\n",
230 | " \n",
231 | "
\n",
232 | "
5 rows × 26 columns
\n",
233 | "
"
234 | ],
235 | "text/plain": [
236 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
237 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n",
238 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n",
239 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n",
240 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n",
241 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n",
242 | "\n",
243 | " s5 ... s12 s13 s14 s15 s16 s17 s18 s19 \\\n",
244 | "0 14.62 ... 521.66 2388.02 8138.62 8.4195 0.03 392 2388 100.0 \n",
245 | "1 14.62 ... 522.28 2388.07 8131.49 8.4318 0.03 392 2388 100.0 \n",
246 | "2 14.62 ... 522.42 2388.03 8133.23 8.4178 0.03 390 2388 100.0 \n",
247 | "3 14.62 ... 522.86 2388.08 8133.83 8.3682 0.03 392 2388 100.0 \n",
248 | "4 14.62 ... 522.19 2388.04 8133.80 8.4294 0.03 393 2388 100.0 \n",
249 | "\n",
250 | " s20 s21 \n",
251 | "0 39.06 23.4190 \n",
252 | "1 39.00 23.4236 \n",
253 | "2 38.95 23.3442 \n",
254 | "3 38.88 23.3739 \n",
255 | "4 38.90 23.4044 \n",
256 | "\n",
257 | "[5 rows x 26 columns]"
258 | ]
259 | },
260 | "execution_count": 5,
261 | "metadata": {},
262 | "output_type": "execute_result"
263 | }
264 | ],
265 | "source": [
266 | "train_df = train_df.sort_values(['id','cycle'])\n",
267 | "train_df.head()"
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": 6,
273 | "metadata": {},
274 | "outputs": [
275 | {
276 | "data": {
277 | "text/html": [
278 | "\n",
279 | "\n",
292 | "
\n",
293 | " \n",
294 | " \n",
295 | " | \n",
296 | " id | \n",
297 | " cycle | \n",
298 | " setting1 | \n",
299 | " setting2 | \n",
300 | " setting3 | \n",
301 | " s1 | \n",
302 | " s2 | \n",
303 | " s3 | \n",
304 | " s4 | \n",
305 | " s5 | \n",
306 | " ... | \n",
307 | " s13 | \n",
308 | " s14 | \n",
309 | " s15 | \n",
310 | " s16 | \n",
311 | " s17 | \n",
312 | " s18 | \n",
313 | " s19 | \n",
314 | " s20 | \n",
315 | " s21 | \n",
316 | " RUL | \n",
317 | "
\n",
318 | " \n",
319 | " \n",
320 | " \n",
321 | " 0 | \n",
322 | " 1 | \n",
323 | " 1 | \n",
324 | " -0.0007 | \n",
325 | " -0.0004 | \n",
326 | " 100.0 | \n",
327 | " 518.67 | \n",
328 | " 641.82 | \n",
329 | " 1589.70 | \n",
330 | " 1400.60 | \n",
331 | " 14.62 | \n",
332 | " ... | \n",
333 | " 2388.02 | \n",
334 | " 8138.62 | \n",
335 | " 8.4195 | \n",
336 | " 0.03 | \n",
337 | " 392 | \n",
338 | " 2388 | \n",
339 | " 100.0 | \n",
340 | " 39.06 | \n",
341 | " 23.4190 | \n",
342 | " 191 | \n",
343 | "
\n",
344 | " \n",
345 | " 1 | \n",
346 | " 1 | \n",
347 | " 2 | \n",
348 | " 0.0019 | \n",
349 | " -0.0003 | \n",
350 | " 100.0 | \n",
351 | " 518.67 | \n",
352 | " 642.15 | \n",
353 | " 1591.82 | \n",
354 | " 1403.14 | \n",
355 | " 14.62 | \n",
356 | " ... | \n",
357 | " 2388.07 | \n",
358 | " 8131.49 | \n",
359 | " 8.4318 | \n",
360 | " 0.03 | \n",
361 | " 392 | \n",
362 | " 2388 | \n",
363 | " 100.0 | \n",
364 | " 39.00 | \n",
365 | " 23.4236 | \n",
366 | " 190 | \n",
367 | "
\n",
368 | " \n",
369 | " 2 | \n",
370 | " 1 | \n",
371 | " 3 | \n",
372 | " -0.0043 | \n",
373 | " 0.0003 | \n",
374 | " 100.0 | \n",
375 | " 518.67 | \n",
376 | " 642.35 | \n",
377 | " 1587.99 | \n",
378 | " 1404.20 | \n",
379 | " 14.62 | \n",
380 | " ... | \n",
381 | " 2388.03 | \n",
382 | " 8133.23 | \n",
383 | " 8.4178 | \n",
384 | " 0.03 | \n",
385 | " 390 | \n",
386 | " 2388 | \n",
387 | " 100.0 | \n",
388 | " 38.95 | \n",
389 | " 23.3442 | \n",
390 | " 189 | \n",
391 | "
\n",
392 | " \n",
393 | " 3 | \n",
394 | " 1 | \n",
395 | " 4 | \n",
396 | " 0.0007 | \n",
397 | " 0.0000 | \n",
398 | " 100.0 | \n",
399 | " 518.67 | \n",
400 | " 642.35 | \n",
401 | " 1582.79 | \n",
402 | " 1401.87 | \n",
403 | " 14.62 | \n",
404 | " ... | \n",
405 | " 2388.08 | \n",
406 | " 8133.83 | \n",
407 | " 8.3682 | \n",
408 | " 0.03 | \n",
409 | " 392 | \n",
410 | " 2388 | \n",
411 | " 100.0 | \n",
412 | " 38.88 | \n",
413 | " 23.3739 | \n",
414 | " 188 | \n",
415 | "
\n",
416 | " \n",
417 | " 4 | \n",
418 | " 1 | \n",
419 | " 5 | \n",
420 | " -0.0019 | \n",
421 | " -0.0002 | \n",
422 | " 100.0 | \n",
423 | " 518.67 | \n",
424 | " 642.37 | \n",
425 | " 1582.85 | \n",
426 | " 1406.22 | \n",
427 | " 14.62 | \n",
428 | " ... | \n",
429 | " 2388.04 | \n",
430 | " 8133.80 | \n",
431 | " 8.4294 | \n",
432 | " 0.03 | \n",
433 | " 393 | \n",
434 | " 2388 | \n",
435 | " 100.0 | \n",
436 | " 38.90 | \n",
437 | " 23.4044 | \n",
438 | " 187 | \n",
439 | "
\n",
440 | " \n",
441 | "
\n",
442 | "
5 rows × 27 columns
\n",
443 | "
"
444 | ],
445 | "text/plain": [
446 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
447 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n",
448 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n",
449 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n",
450 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n",
451 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n",
452 | "\n",
453 | " s5 ... s13 s14 s15 s16 s17 s18 s19 s20 \\\n",
454 | "0 14.62 ... 2388.02 8138.62 8.4195 0.03 392 2388 100.0 39.06 \n",
455 | "1 14.62 ... 2388.07 8131.49 8.4318 0.03 392 2388 100.0 39.00 \n",
456 | "2 14.62 ... 2388.03 8133.23 8.4178 0.03 390 2388 100.0 38.95 \n",
457 | "3 14.62 ... 2388.08 8133.83 8.3682 0.03 392 2388 100.0 38.88 \n",
458 | "4 14.62 ... 2388.04 8133.80 8.4294 0.03 393 2388 100.0 38.90 \n",
459 | "\n",
460 | " s21 RUL \n",
461 | "0 23.4190 191 \n",
462 | "1 23.4236 190 \n",
463 | "2 23.3442 189 \n",
464 | "3 23.3739 188 \n",
465 | "4 23.4044 187 \n",
466 | "\n",
467 | "[5 rows x 27 columns]"
468 | ]
469 | },
470 | "execution_count": 6,
471 | "metadata": {},
472 | "output_type": "execute_result"
473 | }
474 | ],
475 | "source": [
476 | "# Data Labeling - generate column RUL\n",
477 | "rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()\n",
478 | "rul.columns = ['id', 'max']\n",
479 | "train_df = train_df.merge(rul, on=['id'], how='left')\n",
480 | "train_df['RUL'] = train_df['max'] - train_df['cycle']\n",
481 | "train_df.drop('max', axis=1, inplace=True)\n",
482 | "train_df.head()"
483 | ]
484 | },
485 | {
486 | "cell_type": "code",
487 | "execution_count": 7,
488 | "metadata": {},
489 | "outputs": [
490 | {
491 | "data": {
492 | "text/html": [
493 | "\n",
494 | "\n",
507 | "
\n",
508 | " \n",
509 | " \n",
510 | " | \n",
511 | " id | \n",
512 | " cycle | \n",
513 | " setting1 | \n",
514 | " setting2 | \n",
515 | " setting3 | \n",
516 | " s1 | \n",
517 | " s2 | \n",
518 | " s3 | \n",
519 | " s4 | \n",
520 | " s5 | \n",
521 | " ... | \n",
522 | " s15 | \n",
523 | " s16 | \n",
524 | " s17 | \n",
525 | " s18 | \n",
526 | " s19 | \n",
527 | " s20 | \n",
528 | " s21 | \n",
529 | " RUL | \n",
530 | " label1 | \n",
531 | " label2 | \n",
532 | "
\n",
533 | " \n",
534 | " \n",
535 | " \n",
536 | " 0 | \n",
537 | " 1 | \n",
538 | " 1 | \n",
539 | " -0.0007 | \n",
540 | " -0.0004 | \n",
541 | " 100.0 | \n",
542 | " 518.67 | \n",
543 | " 641.82 | \n",
544 | " 1589.70 | \n",
545 | " 1400.60 | \n",
546 | " 14.62 | \n",
547 | " ... | \n",
548 | " 8.4195 | \n",
549 | " 0.03 | \n",
550 | " 392 | \n",
551 | " 2388 | \n",
552 | " 100.0 | \n",
553 | " 39.06 | \n",
554 | " 23.4190 | \n",
555 | " 191 | \n",
556 | " 0 | \n",
557 | " 0 | \n",
558 | "
\n",
559 | " \n",
560 | " 1 | \n",
561 | " 1 | \n",
562 | " 2 | \n",
563 | " 0.0019 | \n",
564 | " -0.0003 | \n",
565 | " 100.0 | \n",
566 | " 518.67 | \n",
567 | " 642.15 | \n",
568 | " 1591.82 | \n",
569 | " 1403.14 | \n",
570 | " 14.62 | \n",
571 | " ... | \n",
572 | " 8.4318 | \n",
573 | " 0.03 | \n",
574 | " 392 | \n",
575 | " 2388 | \n",
576 | " 100.0 | \n",
577 | " 39.00 | \n",
578 | " 23.4236 | \n",
579 | " 190 | \n",
580 | " 0 | \n",
581 | " 0 | \n",
582 | "
\n",
583 | " \n",
584 | " 2 | \n",
585 | " 1 | \n",
586 | " 3 | \n",
587 | " -0.0043 | \n",
588 | " 0.0003 | \n",
589 | " 100.0 | \n",
590 | " 518.67 | \n",
591 | " 642.35 | \n",
592 | " 1587.99 | \n",
593 | " 1404.20 | \n",
594 | " 14.62 | \n",
595 | " ... | \n",
596 | " 8.4178 | \n",
597 | " 0.03 | \n",
598 | " 390 | \n",
599 | " 2388 | \n",
600 | " 100.0 | \n",
601 | " 38.95 | \n",
602 | " 23.3442 | \n",
603 | " 189 | \n",
604 | " 0 | \n",
605 | " 0 | \n",
606 | "
\n",
607 | " \n",
608 | " 3 | \n",
609 | " 1 | \n",
610 | " 4 | \n",
611 | " 0.0007 | \n",
612 | " 0.0000 | \n",
613 | " 100.0 | \n",
614 | " 518.67 | \n",
615 | " 642.35 | \n",
616 | " 1582.79 | \n",
617 | " 1401.87 | \n",
618 | " 14.62 | \n",
619 | " ... | \n",
620 | " 8.3682 | \n",
621 | " 0.03 | \n",
622 | " 392 | \n",
623 | " 2388 | \n",
624 | " 100.0 | \n",
625 | " 38.88 | \n",
626 | " 23.3739 | \n",
627 | " 188 | \n",
628 | " 0 | \n",
629 | " 0 | \n",
630 | "
\n",
631 | " \n",
632 | " 4 | \n",
633 | " 1 | \n",
634 | " 5 | \n",
635 | " -0.0019 | \n",
636 | " -0.0002 | \n",
637 | " 100.0 | \n",
638 | " 518.67 | \n",
639 | " 642.37 | \n",
640 | " 1582.85 | \n",
641 | " 1406.22 | \n",
642 | " 14.62 | \n",
643 | " ... | \n",
644 | " 8.4294 | \n",
645 | " 0.03 | \n",
646 | " 393 | \n",
647 | " 2388 | \n",
648 | " 100.0 | \n",
649 | " 38.90 | \n",
650 | " 23.4044 | \n",
651 | " 187 | \n",
652 | " 0 | \n",
653 | " 0 | \n",
654 | "
\n",
655 | " \n",
656 | "
\n",
657 | "
5 rows × 29 columns
\n",
658 | "
"
659 | ],
660 | "text/plain": [
661 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
662 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n",
663 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n",
664 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n",
665 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n",
666 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n",
667 | "\n",
668 | " s5 ... s15 s16 s17 s18 s19 s20 s21 RUL label1 \\\n",
669 | "0 14.62 ... 8.4195 0.03 392 2388 100.0 39.06 23.4190 191 0 \n",
670 | "1 14.62 ... 8.4318 0.03 392 2388 100.0 39.00 23.4236 190 0 \n",
671 | "2 14.62 ... 8.4178 0.03 390 2388 100.0 38.95 23.3442 189 0 \n",
672 | "3 14.62 ... 8.3682 0.03 392 2388 100.0 38.88 23.3739 188 0 \n",
673 | "4 14.62 ... 8.4294 0.03 393 2388 100.0 38.90 23.4044 187 0 \n",
674 | "\n",
675 | " label2 \n",
676 | "0 0 \n",
677 | "1 0 \n",
678 | "2 0 \n",
679 | "3 0 \n",
680 | "4 0 \n",
681 | "\n",
682 | "[5 rows x 29 columns]"
683 | ]
684 | },
685 | "execution_count": 7,
686 | "metadata": {},
687 | "output_type": "execute_result"
688 | }
689 | ],
690 | "source": [
691 | "# generate label columns for training data\n",
692 | "w1 = 30\n",
693 | "w0 = 15\n",
694 | "train_df['label1'] = np.where(train_df['RUL'] <= w1, 1, 0 )\n",
695 | "train_df['label2'] = train_df['label1']\n",
696 | "train_df.loc[train_df['RUL'] <= w0, 'label2'] = 2\n",
697 | "train_df.head()"
698 | ]
699 | },
700 | {
701 | "cell_type": "code",
702 | "execution_count": 8,
703 | "metadata": {},
704 | "outputs": [
705 | {
706 | "data": {
707 | "text/html": [
708 | "\n",
709 | "\n",
722 | "
\n",
723 | " \n",
724 | " \n",
725 | " | \n",
726 | " id | \n",
727 | " cycle | \n",
728 | " setting1 | \n",
729 | " setting2 | \n",
730 | " setting3 | \n",
731 | " s1 | \n",
732 | " s2 | \n",
733 | " s3 | \n",
734 | " s4 | \n",
735 | " s5 | \n",
736 | " ... | \n",
737 | " s16 | \n",
738 | " s17 | \n",
739 | " s18 | \n",
740 | " s19 | \n",
741 | " s20 | \n",
742 | " s21 | \n",
743 | " RUL | \n",
744 | " label1 | \n",
745 | " label2 | \n",
746 | " cycle_norm | \n",
747 | "
\n",
748 | " \n",
749 | " \n",
750 | " \n",
751 | " 0 | \n",
752 | " 1 | \n",
753 | " 1 | \n",
754 | " 0.459770 | \n",
755 | " 0.166667 | \n",
756 | " 0.0 | \n",
757 | " 0.0 | \n",
758 | " 0.183735 | \n",
759 | " 0.406802 | \n",
760 | " 0.309757 | \n",
761 | " 0.0 | \n",
762 | " ... | \n",
763 | " 0.0 | \n",
764 | " 0.333333 | \n",
765 | " 0.0 | \n",
766 | " 0.0 | \n",
767 | " 0.713178 | \n",
768 | " 0.724662 | \n",
769 | " 191 | \n",
770 | " 0 | \n",
771 | " 0 | \n",
772 | " 0.00000 | \n",
773 | "
\n",
774 | " \n",
775 | " 1 | \n",
776 | " 1 | \n",
777 | " 2 | \n",
778 | " 0.609195 | \n",
779 | " 0.250000 | \n",
780 | " 0.0 | \n",
781 | " 0.0 | \n",
782 | " 0.283133 | \n",
783 | " 0.453019 | \n",
784 | " 0.352633 | \n",
785 | " 0.0 | \n",
786 | " ... | \n",
787 | " 0.0 | \n",
788 | " 0.333333 | \n",
789 | " 0.0 | \n",
790 | " 0.0 | \n",
791 | " 0.666667 | \n",
792 | " 0.731014 | \n",
793 | " 190 | \n",
794 | " 0 | \n",
795 | " 0 | \n",
796 | " 0.00277 | \n",
797 | "
\n",
798 | " \n",
799 | " 2 | \n",
800 | " 1 | \n",
801 | " 3 | \n",
802 | " 0.252874 | \n",
803 | " 0.750000 | \n",
804 | " 0.0 | \n",
805 | " 0.0 | \n",
806 | " 0.343373 | \n",
807 | " 0.369523 | \n",
808 | " 0.370527 | \n",
809 | " 0.0 | \n",
810 | " ... | \n",
811 | " 0.0 | \n",
812 | " 0.166667 | \n",
813 | " 0.0 | \n",
814 | " 0.0 | \n",
815 | " 0.627907 | \n",
816 | " 0.621375 | \n",
817 | " 189 | \n",
818 | " 0 | \n",
819 | " 0 | \n",
820 | " 0.00554 | \n",
821 | "
\n",
822 | " \n",
823 | " 3 | \n",
824 | " 1 | \n",
825 | " 4 | \n",
826 | " 0.540230 | \n",
827 | " 0.500000 | \n",
828 | " 0.0 | \n",
829 | " 0.0 | \n",
830 | " 0.343373 | \n",
831 | " 0.256159 | \n",
832 | " 0.331195 | \n",
833 | " 0.0 | \n",
834 | " ... | \n",
835 | " 0.0 | \n",
836 | " 0.333333 | \n",
837 | " 0.0 | \n",
838 | " 0.0 | \n",
839 | " 0.573643 | \n",
840 | " 0.662386 | \n",
841 | " 188 | \n",
842 | " 0 | \n",
843 | " 0 | \n",
844 | " 0.00831 | \n",
845 | "
\n",
846 | " \n",
847 | " 4 | \n",
848 | " 1 | \n",
849 | " 5 | \n",
850 | " 0.390805 | \n",
851 | " 0.333333 | \n",
852 | " 0.0 | \n",
853 | " 0.0 | \n",
854 | " 0.349398 | \n",
855 | " 0.257467 | \n",
856 | " 0.404625 | \n",
857 | " 0.0 | \n",
858 | " ... | \n",
859 | " 0.0 | \n",
860 | " 0.416667 | \n",
861 | " 0.0 | \n",
862 | " 0.0 | \n",
863 | " 0.589147 | \n",
864 | " 0.704502 | \n",
865 | " 187 | \n",
866 | " 0 | \n",
867 | " 0 | \n",
868 | " 0.01108 | \n",
869 | "
\n",
870 | " \n",
871 | "
\n",
872 | "
5 rows × 30 columns
\n",
873 | "
"
874 | ],
875 | "text/plain": [
876 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
877 | "0 1 1 0.459770 0.166667 0.0 0.0 0.183735 0.406802 0.309757 \n",
878 | "1 1 2 0.609195 0.250000 0.0 0.0 0.283133 0.453019 0.352633 \n",
879 | "2 1 3 0.252874 0.750000 0.0 0.0 0.343373 0.369523 0.370527 \n",
880 | "3 1 4 0.540230 0.500000 0.0 0.0 0.343373 0.256159 0.331195 \n",
881 | "4 1 5 0.390805 0.333333 0.0 0.0 0.349398 0.257467 0.404625 \n",
882 | "\n",
883 | " s5 ... s16 s17 s18 s19 s20 s21 RUL label1 \\\n",
884 | "0 0.0 ... 0.0 0.333333 0.0 0.0 0.713178 0.724662 191 0 \n",
885 | "1 0.0 ... 0.0 0.333333 0.0 0.0 0.666667 0.731014 190 0 \n",
886 | "2 0.0 ... 0.0 0.166667 0.0 0.0 0.627907 0.621375 189 0 \n",
887 | "3 0.0 ... 0.0 0.333333 0.0 0.0 0.573643 0.662386 188 0 \n",
888 | "4 0.0 ... 0.0 0.416667 0.0 0.0 0.589147 0.704502 187 0 \n",
889 | "\n",
890 | " label2 cycle_norm \n",
891 | "0 0 0.00000 \n",
892 | "1 0 0.00277 \n",
893 | "2 0 0.00554 \n",
894 | "3 0 0.00831 \n",
895 | "4 0 0.01108 \n",
896 | "\n",
897 | "[5 rows x 30 columns]"
898 | ]
899 | },
900 | "execution_count": 8,
901 | "metadata": {},
902 | "output_type": "execute_result"
903 | }
904 | ],
905 | "source": [
906 | "# MinMax normalization\n",
907 | "train_df['cycle_norm'] = train_df['cycle']\n",
908 | "cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])\n",
909 | "min_max_scaler = preprocessing.MinMaxScaler()\n",
910 | "norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), \n",
911 | " columns=cols_normalize, \n",
912 | " index=train_df.index)\n",
913 | "join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)\n",
914 | "train_df = join_df.reindex(columns = train_df.columns)\n",
915 | "train_df.head()"
916 | ]
917 | },
918 | {
919 | "cell_type": "code",
920 | "execution_count": 9,
921 | "metadata": {},
922 | "outputs": [
923 | {
924 | "data": {
925 | "text/html": [
926 | "\n",
927 | "\n",
940 | "
\n",
941 | " \n",
942 | " \n",
943 | " | \n",
944 | " id | \n",
945 | " cycle | \n",
946 | " setting1 | \n",
947 | " setting2 | \n",
948 | " setting3 | \n",
949 | " s1 | \n",
950 | " s2 | \n",
951 | " s3 | \n",
952 | " s4 | \n",
953 | " s5 | \n",
954 | " ... | \n",
955 | " s13 | \n",
956 | " s14 | \n",
957 | " s15 | \n",
958 | " s16 | \n",
959 | " s17 | \n",
960 | " s18 | \n",
961 | " s19 | \n",
962 | " s20 | \n",
963 | " s21 | \n",
964 | " cycle_norm | \n",
965 | "
\n",
966 | " \n",
967 | " \n",
968 | " \n",
969 | " 0 | \n",
970 | " 1 | \n",
971 | " 1 | \n",
972 | " 0.632184 | \n",
973 | " 0.750000 | \n",
974 | " 0.0 | \n",
975 | " 0.0 | \n",
976 | " 0.545181 | \n",
977 | " 0.310661 | \n",
978 | " 0.269413 | \n",
979 | " 0.0 | \n",
980 | " ... | \n",
981 | " 0.220588 | \n",
982 | " 0.132160 | \n",
983 | " 0.308965 | \n",
984 | " 0.0 | \n",
985 | " 0.333333 | \n",
986 | " 0.0 | \n",
987 | " 0.0 | \n",
988 | " 0.558140 | \n",
989 | " 0.661834 | \n",
990 | " 0.00000 | \n",
991 | "
\n",
992 | " \n",
993 | " 1 | \n",
994 | " 1 | \n",
995 | " 2 | \n",
996 | " 0.344828 | \n",
997 | " 0.250000 | \n",
998 | " 0.0 | \n",
999 | " 0.0 | \n",
1000 | " 0.150602 | \n",
1001 | " 0.379551 | \n",
1002 | " 0.222316 | \n",
1003 | " 0.0 | \n",
1004 | " ... | \n",
1005 | " 0.264706 | \n",
1006 | " 0.204768 | \n",
1007 | " 0.213159 | \n",
1008 | " 0.0 | \n",
1009 | " 0.416667 | \n",
1010 | " 0.0 | \n",
1011 | " 0.0 | \n",
1012 | " 0.682171 | \n",
1013 | " 0.686827 | \n",
1014 | " 0.00277 | \n",
1015 | "
\n",
1016 | " \n",
1017 | " 2 | \n",
1018 | " 1 | \n",
1019 | " 3 | \n",
1020 | " 0.517241 | \n",
1021 | " 0.583333 | \n",
1022 | " 0.0 | \n",
1023 | " 0.0 | \n",
1024 | " 0.376506 | \n",
1025 | " 0.346632 | \n",
1026 | " 0.322248 | \n",
1027 | " 0.0 | \n",
1028 | " ... | \n",
1029 | " 0.220588 | \n",
1030 | " 0.155640 | \n",
1031 | " 0.458638 | \n",
1032 | " 0.0 | \n",
1033 | " 0.416667 | \n",
1034 | " 0.0 | \n",
1035 | " 0.0 | \n",
1036 | " 0.728682 | \n",
1037 | " 0.721348 | \n",
1038 | " 0.00554 | \n",
1039 | "
\n",
1040 | " \n",
1041 | " 3 | \n",
1042 | " 1 | \n",
1043 | " 4 | \n",
1044 | " 0.741379 | \n",
1045 | " 0.500000 | \n",
1046 | " 0.0 | \n",
1047 | " 0.0 | \n",
1048 | " 0.370482 | \n",
1049 | " 0.285154 | \n",
1050 | " 0.408001 | \n",
1051 | " 0.0 | \n",
1052 | " ... | \n",
1053 | " 0.250000 | \n",
1054 | " 0.170090 | \n",
1055 | " 0.257022 | \n",
1056 | " 0.0 | \n",
1057 | " 0.250000 | \n",
1058 | " 0.0 | \n",
1059 | " 0.0 | \n",
1060 | " 0.666667 | \n",
1061 | " 0.662110 | \n",
1062 | " 0.00831 | \n",
1063 | "
\n",
1064 | " \n",
1065 | " 4 | \n",
1066 | " 1 | \n",
1067 | " 5 | \n",
1068 | " 0.580460 | \n",
1069 | " 0.500000 | \n",
1070 | " 0.0 | \n",
1071 | " 0.0 | \n",
1072 | " 0.391566 | \n",
1073 | " 0.352082 | \n",
1074 | " 0.332039 | \n",
1075 | " 0.0 | \n",
1076 | " ... | \n",
1077 | " 0.220588 | \n",
1078 | " 0.152751 | \n",
1079 | " 0.300885 | \n",
1080 | " 0.0 | \n",
1081 | " 0.166667 | \n",
1082 | " 0.0 | \n",
1083 | " 0.0 | \n",
1084 | " 0.658915 | \n",
1085 | " 0.716377 | \n",
1086 | " 0.01108 | \n",
1087 | "
\n",
1088 | " \n",
1089 | "
\n",
1090 | "
5 rows × 27 columns
\n",
1091 | "
"
1092 | ],
1093 | "text/plain": [
1094 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
1095 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n",
1096 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n",
1097 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n",
1098 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n",
1099 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n",
1100 | "\n",
1101 | " s5 ... s13 s14 s15 s16 s17 s18 s19 \\\n",
1102 | "0 0.0 ... 0.220588 0.132160 0.308965 0.0 0.333333 0.0 0.0 \n",
1103 | "1 0.0 ... 0.264706 0.204768 0.213159 0.0 0.416667 0.0 0.0 \n",
1104 | "2 0.0 ... 0.220588 0.155640 0.458638 0.0 0.416667 0.0 0.0 \n",
1105 | "3 0.0 ... 0.250000 0.170090 0.257022 0.0 0.250000 0.0 0.0 \n",
1106 | "4 0.0 ... 0.220588 0.152751 0.300885 0.0 0.166667 0.0 0.0 \n",
1107 | "\n",
1108 | " s20 s21 cycle_norm \n",
1109 | "0 0.558140 0.661834 0.00000 \n",
1110 | "1 0.682171 0.686827 0.00277 \n",
1111 | "2 0.728682 0.721348 0.00554 \n",
1112 | "3 0.666667 0.662110 0.00831 \n",
1113 | "4 0.658915 0.716377 0.01108 \n",
1114 | "\n",
1115 | "[5 rows x 27 columns]"
1116 | ]
1117 | },
1118 | "execution_count": 9,
1119 | "metadata": {},
1120 | "output_type": "execute_result"
1121 | }
1122 | ],
1123 | "source": [
1124 | "test_df['cycle_norm'] = test_df['cycle']\n",
1125 | "norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), \n",
1126 | " columns=cols_normalize, \n",
1127 | " index=test_df.index)\n",
1128 | "test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)\n",
1129 | "test_df = test_join_df.reindex(columns = test_df.columns)\n",
1130 | "test_df = test_df.reset_index(drop=True)\n",
1131 | "test_df.head()"
1132 | ]
1133 | },
1134 | {
1135 | "cell_type": "code",
1136 | "execution_count": 10,
1137 | "metadata": {},
1138 | "outputs": [],
1139 | "source": [
1140 | "# generate column max for test data\n",
1141 | "rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()\n",
1142 | "rul.columns = ['id', 'max']\n",
1143 | "truth_df.columns = ['more']\n",
1144 | "truth_df['id'] = truth_df.index + 1\n",
1145 | "truth_df['max'] = rul['max'] + truth_df['more']\n",
1146 | "truth_df.drop('more', axis=1, inplace=True)"
1147 | ]
1148 | },
1149 | {
1150 | "cell_type": "code",
1151 | "execution_count": 11,
1152 | "metadata": {},
1153 | "outputs": [
1154 | {
1155 | "data": {
1156 | "text/html": [
1157 | "\n",
1158 | "\n",
1171 | "
\n",
1172 | " \n",
1173 | " \n",
1174 | " | \n",
1175 | " id | \n",
1176 | " cycle | \n",
1177 | " setting1 | \n",
1178 | " setting2 | \n",
1179 | " setting3 | \n",
1180 | " s1 | \n",
1181 | " s2 | \n",
1182 | " s3 | \n",
1183 | " s4 | \n",
1184 | " s5 | \n",
1185 | " ... | \n",
1186 | " s14 | \n",
1187 | " s15 | \n",
1188 | " s16 | \n",
1189 | " s17 | \n",
1190 | " s18 | \n",
1191 | " s19 | \n",
1192 | " s20 | \n",
1193 | " s21 | \n",
1194 | " cycle_norm | \n",
1195 | " RUL | \n",
1196 | "
\n",
1197 | " \n",
1198 | " \n",
1199 | " \n",
1200 | " 0 | \n",
1201 | " 1 | \n",
1202 | " 1 | \n",
1203 | " 0.632184 | \n",
1204 | " 0.750000 | \n",
1205 | " 0.0 | \n",
1206 | " 0.0 | \n",
1207 | " 0.545181 | \n",
1208 | " 0.310661 | \n",
1209 | " 0.269413 | \n",
1210 | " 0.0 | \n",
1211 | " ... | \n",
1212 | " 0.132160 | \n",
1213 | " 0.308965 | \n",
1214 | " 0.0 | \n",
1215 | " 0.333333 | \n",
1216 | " 0.0 | \n",
1217 | " 0.0 | \n",
1218 | " 0.558140 | \n",
1219 | " 0.661834 | \n",
1220 | " 0.00000 | \n",
1221 | " 142 | \n",
1222 | "
\n",
1223 | " \n",
1224 | " 1 | \n",
1225 | " 1 | \n",
1226 | " 2 | \n",
1227 | " 0.344828 | \n",
1228 | " 0.250000 | \n",
1229 | " 0.0 | \n",
1230 | " 0.0 | \n",
1231 | " 0.150602 | \n",
1232 | " 0.379551 | \n",
1233 | " 0.222316 | \n",
1234 | " 0.0 | \n",
1235 | " ... | \n",
1236 | " 0.204768 | \n",
1237 | " 0.213159 | \n",
1238 | " 0.0 | \n",
1239 | " 0.416667 | \n",
1240 | " 0.0 | \n",
1241 | " 0.0 | \n",
1242 | " 0.682171 | \n",
1243 | " 0.686827 | \n",
1244 | " 0.00277 | \n",
1245 | " 141 | \n",
1246 | "
\n",
1247 | " \n",
1248 | " 2 | \n",
1249 | " 1 | \n",
1250 | " 3 | \n",
1251 | " 0.517241 | \n",
1252 | " 0.583333 | \n",
1253 | " 0.0 | \n",
1254 | " 0.0 | \n",
1255 | " 0.376506 | \n",
1256 | " 0.346632 | \n",
1257 | " 0.322248 | \n",
1258 | " 0.0 | \n",
1259 | " ... | \n",
1260 | " 0.155640 | \n",
1261 | " 0.458638 | \n",
1262 | " 0.0 | \n",
1263 | " 0.416667 | \n",
1264 | " 0.0 | \n",
1265 | " 0.0 | \n",
1266 | " 0.728682 | \n",
1267 | " 0.721348 | \n",
1268 | " 0.00554 | \n",
1269 | " 140 | \n",
1270 | "
\n",
1271 | " \n",
1272 | " 3 | \n",
1273 | " 1 | \n",
1274 | " 4 | \n",
1275 | " 0.741379 | \n",
1276 | " 0.500000 | \n",
1277 | " 0.0 | \n",
1278 | " 0.0 | \n",
1279 | " 0.370482 | \n",
1280 | " 0.285154 | \n",
1281 | " 0.408001 | \n",
1282 | " 0.0 | \n",
1283 | " ... | \n",
1284 | " 0.170090 | \n",
1285 | " 0.257022 | \n",
1286 | " 0.0 | \n",
1287 | " 0.250000 | \n",
1288 | " 0.0 | \n",
1289 | " 0.0 | \n",
1290 | " 0.666667 | \n",
1291 | " 0.662110 | \n",
1292 | " 0.00831 | \n",
1293 | " 139 | \n",
1294 | "
\n",
1295 | " \n",
1296 | " 4 | \n",
1297 | " 1 | \n",
1298 | " 5 | \n",
1299 | " 0.580460 | \n",
1300 | " 0.500000 | \n",
1301 | " 0.0 | \n",
1302 | " 0.0 | \n",
1303 | " 0.391566 | \n",
1304 | " 0.352082 | \n",
1305 | " 0.332039 | \n",
1306 | " 0.0 | \n",
1307 | " ... | \n",
1308 | " 0.152751 | \n",
1309 | " 0.300885 | \n",
1310 | " 0.0 | \n",
1311 | " 0.166667 | \n",
1312 | " 0.0 | \n",
1313 | " 0.0 | \n",
1314 | " 0.658915 | \n",
1315 | " 0.716377 | \n",
1316 | " 0.01108 | \n",
1317 | " 138 | \n",
1318 | "
\n",
1319 | " \n",
1320 | "
\n",
1321 | "
5 rows × 28 columns
\n",
1322 | "
"
1323 | ],
1324 | "text/plain": [
1325 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
1326 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n",
1327 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n",
1328 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n",
1329 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n",
1330 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n",
1331 | "\n",
1332 | " s5 ... s14 s15 s16 s17 s18 s19 s20 s21 \\\n",
1333 | "0 0.0 ... 0.132160 0.308965 0.0 0.333333 0.0 0.0 0.558140 0.661834 \n",
1334 | "1 0.0 ... 0.204768 0.213159 0.0 0.416667 0.0 0.0 0.682171 0.686827 \n",
1335 | "2 0.0 ... 0.155640 0.458638 0.0 0.416667 0.0 0.0 0.728682 0.721348 \n",
1336 | "3 0.0 ... 0.170090 0.257022 0.0 0.250000 0.0 0.0 0.666667 0.662110 \n",
1337 | "4 0.0 ... 0.152751 0.300885 0.0 0.166667 0.0 0.0 0.658915 0.716377 \n",
1338 | "\n",
1339 | " cycle_norm RUL \n",
1340 | "0 0.00000 142 \n",
1341 | "1 0.00277 141 \n",
1342 | "2 0.00554 140 \n",
1343 | "3 0.00831 139 \n",
1344 | "4 0.01108 138 \n",
1345 | "\n",
1346 | "[5 rows x 28 columns]"
1347 | ]
1348 | },
1349 | "execution_count": 11,
1350 | "metadata": {},
1351 | "output_type": "execute_result"
1352 | }
1353 | ],
1354 | "source": [
1355 | "# generate RUL for test data\n",
1356 | "test_df = test_df.merge(truth_df, on=['id'], how='left')\n",
1357 | "test_df['RUL'] = test_df['max'] - test_df['cycle']\n",
1358 | "test_df.drop('max', axis=1, inplace=True)\n",
1359 | "test_df.head()"
1360 | ]
1361 | },
1362 | {
1363 | "cell_type": "code",
1364 | "execution_count": 12,
1365 | "metadata": {},
1366 | "outputs": [
1367 | {
1368 | "data": {
1369 | "text/html": [
1370 | "\n",
1371 | "\n",
1384 | "
\n",
1385 | " \n",
1386 | " \n",
1387 | " | \n",
1388 | " id | \n",
1389 | " cycle | \n",
1390 | " setting1 | \n",
1391 | " setting2 | \n",
1392 | " setting3 | \n",
1393 | " s1 | \n",
1394 | " s2 | \n",
1395 | " s3 | \n",
1396 | " s4 | \n",
1397 | " s5 | \n",
1398 | " ... | \n",
1399 | " s16 | \n",
1400 | " s17 | \n",
1401 | " s18 | \n",
1402 | " s19 | \n",
1403 | " s20 | \n",
1404 | " s21 | \n",
1405 | " cycle_norm | \n",
1406 | " RUL | \n",
1407 | " label1 | \n",
1408 | " label2 | \n",
1409 | "
\n",
1410 | " \n",
1411 | " \n",
1412 | " \n",
1413 | " 0 | \n",
1414 | " 1 | \n",
1415 | " 1 | \n",
1416 | " 0.632184 | \n",
1417 | " 0.750000 | \n",
1418 | " 0.0 | \n",
1419 | " 0.0 | \n",
1420 | " 0.545181 | \n",
1421 | " 0.310661 | \n",
1422 | " 0.269413 | \n",
1423 | " 0.0 | \n",
1424 | " ... | \n",
1425 | " 0.0 | \n",
1426 | " 0.333333 | \n",
1427 | " 0.0 | \n",
1428 | " 0.0 | \n",
1429 | " 0.558140 | \n",
1430 | " 0.661834 | \n",
1431 | " 0.00000 | \n",
1432 | " 142 | \n",
1433 | " 0 | \n",
1434 | " 0 | \n",
1435 | "
\n",
1436 | " \n",
1437 | " 1 | \n",
1438 | " 1 | \n",
1439 | " 2 | \n",
1440 | " 0.344828 | \n",
1441 | " 0.250000 | \n",
1442 | " 0.0 | \n",
1443 | " 0.0 | \n",
1444 | " 0.150602 | \n",
1445 | " 0.379551 | \n",
1446 | " 0.222316 | \n",
1447 | " 0.0 | \n",
1448 | " ... | \n",
1449 | " 0.0 | \n",
1450 | " 0.416667 | \n",
1451 | " 0.0 | \n",
1452 | " 0.0 | \n",
1453 | " 0.682171 | \n",
1454 | " 0.686827 | \n",
1455 | " 0.00277 | \n",
1456 | " 141 | \n",
1457 | " 0 | \n",
1458 | " 0 | \n",
1459 | "
\n",
1460 | " \n",
1461 | " 2 | \n",
1462 | " 1 | \n",
1463 | " 3 | \n",
1464 | " 0.517241 | \n",
1465 | " 0.583333 | \n",
1466 | " 0.0 | \n",
1467 | " 0.0 | \n",
1468 | " 0.376506 | \n",
1469 | " 0.346632 | \n",
1470 | " 0.322248 | \n",
1471 | " 0.0 | \n",
1472 | " ... | \n",
1473 | " 0.0 | \n",
1474 | " 0.416667 | \n",
1475 | " 0.0 | \n",
1476 | " 0.0 | \n",
1477 | " 0.728682 | \n",
1478 | " 0.721348 | \n",
1479 | " 0.00554 | \n",
1480 | " 140 | \n",
1481 | " 0 | \n",
1482 | " 0 | \n",
1483 | "
\n",
1484 | " \n",
1485 | " 3 | \n",
1486 | " 1 | \n",
1487 | " 4 | \n",
1488 | " 0.741379 | \n",
1489 | " 0.500000 | \n",
1490 | " 0.0 | \n",
1491 | " 0.0 | \n",
1492 | " 0.370482 | \n",
1493 | " 0.285154 | \n",
1494 | " 0.408001 | \n",
1495 | " 0.0 | \n",
1496 | " ... | \n",
1497 | " 0.0 | \n",
1498 | " 0.250000 | \n",
1499 | " 0.0 | \n",
1500 | " 0.0 | \n",
1501 | " 0.666667 | \n",
1502 | " 0.662110 | \n",
1503 | " 0.00831 | \n",
1504 | " 139 | \n",
1505 | " 0 | \n",
1506 | " 0 | \n",
1507 | "
\n",
1508 | " \n",
1509 | " 4 | \n",
1510 | " 1 | \n",
1511 | " 5 | \n",
1512 | " 0.580460 | \n",
1513 | " 0.500000 | \n",
1514 | " 0.0 | \n",
1515 | " 0.0 | \n",
1516 | " 0.391566 | \n",
1517 | " 0.352082 | \n",
1518 | " 0.332039 | \n",
1519 | " 0.0 | \n",
1520 | " ... | \n",
1521 | " 0.0 | \n",
1522 | " 0.166667 | \n",
1523 | " 0.0 | \n",
1524 | " 0.0 | \n",
1525 | " 0.658915 | \n",
1526 | " 0.716377 | \n",
1527 | " 0.01108 | \n",
1528 | " 138 | \n",
1529 | " 0 | \n",
1530 | " 0 | \n",
1531 | "
\n",
1532 | " \n",
1533 | "
\n",
1534 | "
5 rows × 30 columns
\n",
1535 | "
"
1536 | ],
1537 | "text/plain": [
1538 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
1539 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n",
1540 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n",
1541 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n",
1542 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n",
1543 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n",
1544 | "\n",
1545 | " s5 ... s16 s17 s18 s19 s20 s21 cycle_norm RUL \\\n",
1546 | "0 0.0 ... 0.0 0.333333 0.0 0.0 0.558140 0.661834 0.00000 142 \n",
1547 | "1 0.0 ... 0.0 0.416667 0.0 0.0 0.682171 0.686827 0.00277 141 \n",
1548 | "2 0.0 ... 0.0 0.416667 0.0 0.0 0.728682 0.721348 0.00554 140 \n",
1549 | "3 0.0 ... 0.0 0.250000 0.0 0.0 0.666667 0.662110 0.00831 139 \n",
1550 | "4 0.0 ... 0.0 0.166667 0.0 0.0 0.658915 0.716377 0.01108 138 \n",
1551 | "\n",
1552 | " label1 label2 \n",
1553 | "0 0 0 \n",
1554 | "1 0 0 \n",
1555 | "2 0 0 \n",
1556 | "3 0 0 \n",
1557 | "4 0 0 \n",
1558 | "\n",
1559 | "[5 rows x 30 columns]"
1560 | ]
1561 | },
1562 | "execution_count": 12,
1563 | "metadata": {},
1564 | "output_type": "execute_result"
1565 | }
1566 | ],
1567 | "source": [
1568 | "# generate label columns w0 and w1 for test data\n",
1569 | "test_df['label1'] = np.where(test_df['RUL'] <= w1, 1, 0 )\n",
1570 | "test_df['label2'] = test_df['label1']\n",
1571 | "test_df.loc[test_df['RUL'] <= w0, 'label2'] = 2\n",
1572 | "test_df.head()"
1573 | ]
1574 | },
1575 | {
1576 | "cell_type": "code",
1577 | "execution_count": 13,
1578 | "metadata": {},
1579 | "outputs": [],
1580 | "source": [
1581 | "def gen_label(df):\n",
1582 | " y = df['label1']\n",
1583 | " df.drop(['label1','label2','RUL'],axis=1,inplace=True)\n",
1584 | " return y,df"
1585 | ]
1586 | },
1587 | {
1588 | "cell_type": "code",
1589 | "execution_count": 14,
1590 | "metadata": {},
1591 | "outputs": [
1592 | {
1593 | "name": "stdout",
1594 | "output_type": "stream",
1595 | "text": [
1596 | "(20631,) (20631, 27)\n",
1597 | "(13096,) (13096, 27)\n"
1598 | ]
1599 | }
1600 | ],
1601 | "source": [
1602 | "y_train,X_train = gen_label(train_df)\n",
1603 | "print(y_train.shape,X_train.shape)\n",
1604 | "y_test,X_test = gen_label(test_df)\n",
1605 | "print(y_test.shape,X_test.shape)"
1606 | ]
1607 | },
1608 | {
1609 | "cell_type": "code",
1610 | "execution_count": 15,
1611 | "metadata": {},
1612 | "outputs": [],
1613 | "source": [
1614 | "from sklearn.model_selection import train_test_split\n",
1615 | "\n",
1616 | "X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.075, random_state=42)"
1617 | ]
1618 | },
1619 | {
1620 | "cell_type": "code",
1621 | "execution_count": 16,
1622 | "metadata": {},
1623 | "outputs": [
1624 | {
1625 | "name": "stdout",
1626 | "output_type": "stream",
1627 | "text": [
1628 | "acc for val: 0.9683462532299741\n"
1629 | ]
1630 | }
1631 | ],
1632 | "source": [
1633 | "from sklearn.linear_model import LogisticRegression\n",
1634 | "from sklearn.metrics import accuracy_score\n",
1635 | "\n",
1636 | "lr = LogisticRegression(C=40)\n",
1637 | "lr.fit(X_train,y_train)\n",
1638 | "y_pred = lr.predict(X_val)\n",
1639 | "print('acc for val: ',accuracy_score(y_val,y_pred))"
1640 | ]
1641 | },
1642 | {
1643 | "cell_type": "code",
1644 | "execution_count": 17,
1645 | "metadata": {},
1646 | "outputs": [
1647 | {
1648 | "name": "stdout",
1649 | "output_type": "stream",
1650 | "text": [
1651 | "Confusion matrix\n",
1652 | "- x-axis is true labels.\n",
1653 | "- y-axis is predicted labels\n"
1654 | ]
1655 | },
1656 | {
1657 | "data": {
1658 | "text/plain": [
1659 | "array([[1314, 19],\n",
1660 | " [ 30, 185]])"
1661 | ]
1662 | },
1663 | "execution_count": 17,
1664 | "metadata": {},
1665 | "output_type": "execute_result"
1666 | }
1667 | ],
1668 | "source": [
1669 | "print('Confusion matrix\\n- x-axis is true labels.\\n- y-axis is predicted labels')\n",
1670 | "cm = confusion_matrix(y_val, y_pred)\n",
1671 | "cm"
1672 | ]
1673 | },
1674 | {
1675 | "cell_type": "code",
1676 | "execution_count": 18,
1677 | "metadata": {},
1678 | "outputs": [
1679 | {
1680 | "name": "stdout",
1681 | "output_type": "stream",
1682 | "text": [
1683 | "precision = 0.9068627450980392 \n",
1684 | " recall = 0.8604651162790697\n"
1685 | ]
1686 | }
1687 | ],
1688 | "source": [
1689 | "# compute precision and recall\n",
1690 | "precision = precision_score(y_val, y_pred)\n",
1691 | "recall = recall_score(y_val, y_pred)\n",
1692 | "print( 'precision = ', precision, '\\n', 'recall = ', recall)"
1693 | ]
1694 | },
1695 | {
1696 | "cell_type": "code",
1697 | "execution_count": 19,
1698 | "metadata": {},
1699 | "outputs": [
1700 | {
1701 | "name": "stdout",
1702 | "output_type": "stream",
1703 | "text": [
1704 | "acc for test: 0.9858735491753207\n"
1705 | ]
1706 | }
1707 | ],
1708 | "source": [
1709 | "y_pred_test = lr.predict(X_test)\n",
1710 | "print('acc for test: ',accuracy_score(y_test,y_pred_test))"
1711 | ]
1712 | },
1713 | {
1714 | "cell_type": "code",
1715 | "execution_count": 20,
1716 | "metadata": {},
1717 | "outputs": [
1718 | {
1719 | "name": "stdout",
1720 | "output_type": "stream",
1721 | "text": [
1722 | "Confusion matrix\n",
1723 | "- x-axis is true labels.\n",
1724 | "- y-axis is predicted labels\n"
1725 | ]
1726 | },
1727 | {
1728 | "data": {
1729 | "text/plain": [
1730 | "array([[12715, 49],\n",
1731 | " [ 136, 196]])"
1732 | ]
1733 | },
1734 | "execution_count": 20,
1735 | "metadata": {},
1736 | "output_type": "execute_result"
1737 | }
1738 | ],
1739 | "source": [
1740 | "print('Confusion matrix\\n- x-axis is true labels.\\n- y-axis is predicted labels')\n",
1741 | "cm = confusion_matrix(y_test, y_pred_test)\n",
1742 | "cm"
1743 | ]
1744 | },
1745 | {
1746 | "cell_type": "code",
1747 | "execution_count": 21,
1748 | "metadata": {},
1749 | "outputs": [
1750 | {
1751 | "name": "stdout",
1752 | "output_type": "stream",
1753 | "text": [
1754 | "Precision: 0.8 \n",
1755 | " Recall: 0.5903614457831325 \n",
1756 | " F1-score: 0.6793760831889082\n"
1757 | ]
1758 | }
1759 | ],
1760 | "source": [
1761 | "# compute precision and recall\n",
1762 | "precision_test = precision_score(y_test, y_pred_test)\n",
1763 | "recall_test = recall_score(y_test, y_pred_test)\n",
1764 | "f1_test = 2 * (precision_test * recall_test) / (precision_test + recall_test)\n",
1765 | "print( 'Precision: ', precision_test, '\\n', 'Recall: ', recall_test,'\\n', 'F1-score:', f1_test )"
1766 | ]
1767 | },
1768 | {
1769 | "cell_type": "code",
1770 | "execution_count": 22,
1771 | "metadata": {},
1772 | "outputs": [
1773 | {
1774 | "data": {
1775 | "text/html": [
1776 | "\n",
1777 | "\n",
1790 | "
\n",
1791 | " \n",
1792 | " \n",
1793 | " | \n",
1794 | " Accuracy | \n",
1795 | " Precision | \n",
1796 | " Recall | \n",
1797 | " F1-score | \n",
1798 | "
\n",
1799 | " \n",
1800 | " \n",
1801 | " \n",
1802 | " logistic | \n",
1803 | " 0.985874 | \n",
1804 | " 0.8 | \n",
1805 | " 0.590361 | \n",
1806 | " 0.679376 | \n",
1807 | "
\n",
1808 | " \n",
1809 | "
\n",
1810 | "
"
1811 | ],
1812 | "text/plain": [
1813 | " Accuracy Precision Recall F1-score\n",
1814 | "logistic 0.985874 0.8 0.590361 0.679376"
1815 | ]
1816 | },
1817 | "execution_count": 22,
1818 | "metadata": {},
1819 | "output_type": "execute_result"
1820 | }
1821 | ],
1822 | "source": [
1823 | "results_df = pd.DataFrame([[accuracy_score(y_test,y_pred_test),precision_test,recall_test,f1_test],],\n",
1824 | " columns = ['Accuracy', 'Precision', 'Recall', 'F1-score'],\n",
1825 | " index = ['logistic'])\n",
1826 | "results_df"
1827 | ]
1828 | },
1829 | {
1830 | "cell_type": "code",
1831 | "execution_count": null,
1832 | "metadata": {},
1833 | "outputs": [],
1834 | "source": []
1835 | }
1836 | ],
1837 | "metadata": {
1838 | "kernelspec": {
1839 | "display_name": "Python 3",
1840 | "language": "python",
1841 | "name": "python3"
1842 | },
1843 | "language_info": {
1844 | "codemirror_mode": {
1845 | "name": "ipython",
1846 | "version": 3
1847 | },
1848 | "file_extension": ".py",
1849 | "mimetype": "text/x-python",
1850 | "name": "python",
1851 | "nbconvert_exporter": "python",
1852 | "pygments_lexer": "ipython3",
1853 | "version": "3.6.2"
1854 | }
1855 | },
1856 | "nbformat": 4,
1857 | "nbformat_minor": 2
1858 | }
1859 |
--------------------------------------------------------------------------------
/data1/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/data1/.ipynb_checkpoints/tpot-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
13 | " from ._conv import register_converters as _register_converters\n",
14 | "Using TensorFlow backend.\n"
15 | ]
16 | }
17 | ],
18 | "source": [
19 | "import keras\n",
20 | "import pandas as pd\n",
21 | "import numpy as np\n",
22 | "import matplotlib.pyplot as plt\n",
23 | "\n",
24 | "# Setting seed for reproducability\n",
25 | "np.random.seed(1234) \n",
26 | "PYTHONHASHSEED = 0\n",
27 | "from sklearn import preprocessing\n",
28 | "from sklearn.metrics import confusion_matrix, recall_score, precision_score\n",
29 | "from keras.models import Sequential\n",
30 | "from keras.layers import Dense, Dropout, LSTM, Activation\n",
31 | "%matplotlib inline"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 2,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "# read training data \n",
41 | "train_df = pd.read_csv('PM_train_01.txt', sep=\" \", header=None)\n",
42 | "train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)\n",
43 | "train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
44 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
45 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 3,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "train_df.columns\n",
55 | "cols = ['setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
56 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
57 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 4,
63 | "metadata": {},
64 | "outputs": [
65 | {
66 | "data": {
67 | "text/plain": [
68 | "'setting1'"
69 | ]
70 | },
71 | "execution_count": 4,
72 | "metadata": {},
73 | "output_type": "execute_result"
74 | }
75 | ],
76 | "source": [
77 | "train_df.columns[2]"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 5,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "# read test data\n",
87 | "test_df = pd.read_csv('PM_test_01.txt', sep=\" \", header=None)\n",
88 | "test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)\n",
89 | "test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
90 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
91 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 6,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": [
100 | "# read ground truth data\n",
101 | "truth_df = pd.read_csv('PM_truth_01.txt', sep=\" \", header=None)\n",
102 | "truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 7,
108 | "metadata": {},
109 | "outputs": [
110 | {
111 | "data": {
112 | "text/html": [
113 | "\n",
114 | "\n",
127 | "
\n",
128 | " \n",
129 | " \n",
130 | " | \n",
131 | " id | \n",
132 | " cycle | \n",
133 | " setting1 | \n",
134 | " setting2 | \n",
135 | " setting3 | \n",
136 | " s1 | \n",
137 | " s2 | \n",
138 | " s3 | \n",
139 | " s4 | \n",
140 | " s5 | \n",
141 | " ... | \n",
142 | " s12 | \n",
143 | " s13 | \n",
144 | " s14 | \n",
145 | " s15 | \n",
146 | " s16 | \n",
147 | " s17 | \n",
148 | " s18 | \n",
149 | " s19 | \n",
150 | " s20 | \n",
151 | " s21 | \n",
152 | "
\n",
153 | " \n",
154 | " \n",
155 | " \n",
156 | " 0 | \n",
157 | " 1 | \n",
158 | " 1 | \n",
159 | " -0.0007 | \n",
160 | " -0.0004 | \n",
161 | " 100.0 | \n",
162 | " 518.67 | \n",
163 | " 641.82 | \n",
164 | " 1589.70 | \n",
165 | " 1400.60 | \n",
166 | " 14.62 | \n",
167 | " ... | \n",
168 | " 521.66 | \n",
169 | " 2388.02 | \n",
170 | " 8138.62 | \n",
171 | " 8.4195 | \n",
172 | " 0.03 | \n",
173 | " 392 | \n",
174 | " 2388 | \n",
175 | " 100.0 | \n",
176 | " 39.06 | \n",
177 | " 23.4190 | \n",
178 | "
\n",
179 | " \n",
180 | " 1 | \n",
181 | " 1 | \n",
182 | " 2 | \n",
183 | " 0.0019 | \n",
184 | " -0.0003 | \n",
185 | " 100.0 | \n",
186 | " 518.67 | \n",
187 | " 642.15 | \n",
188 | " 1591.82 | \n",
189 | " 1403.14 | \n",
190 | " 14.62 | \n",
191 | " ... | \n",
192 | " 522.28 | \n",
193 | " 2388.07 | \n",
194 | " 8131.49 | \n",
195 | " 8.4318 | \n",
196 | " 0.03 | \n",
197 | " 392 | \n",
198 | " 2388 | \n",
199 | " 100.0 | \n",
200 | " 39.00 | \n",
201 | " 23.4236 | \n",
202 | "
\n",
203 | " \n",
204 | " 2 | \n",
205 | " 1 | \n",
206 | " 3 | \n",
207 | " -0.0043 | \n",
208 | " 0.0003 | \n",
209 | " 100.0 | \n",
210 | " 518.67 | \n",
211 | " 642.35 | \n",
212 | " 1587.99 | \n",
213 | " 1404.20 | \n",
214 | " 14.62 | \n",
215 | " ... | \n",
216 | " 522.42 | \n",
217 | " 2388.03 | \n",
218 | " 8133.23 | \n",
219 | " 8.4178 | \n",
220 | " 0.03 | \n",
221 | " 390 | \n",
222 | " 2388 | \n",
223 | " 100.0 | \n",
224 | " 38.95 | \n",
225 | " 23.3442 | \n",
226 | "
\n",
227 | " \n",
228 | " 3 | \n",
229 | " 1 | \n",
230 | " 4 | \n",
231 | " 0.0007 | \n",
232 | " 0.0000 | \n",
233 | " 100.0 | \n",
234 | " 518.67 | \n",
235 | " 642.35 | \n",
236 | " 1582.79 | \n",
237 | " 1401.87 | \n",
238 | " 14.62 | \n",
239 | " ... | \n",
240 | " 522.86 | \n",
241 | " 2388.08 | \n",
242 | " 8133.83 | \n",
243 | " 8.3682 | \n",
244 | " 0.03 | \n",
245 | " 392 | \n",
246 | " 2388 | \n",
247 | " 100.0 | \n",
248 | " 38.88 | \n",
249 | " 23.3739 | \n",
250 | "
\n",
251 | " \n",
252 | " 4 | \n",
253 | " 1 | \n",
254 | " 5 | \n",
255 | " -0.0019 | \n",
256 | " -0.0002 | \n",
257 | " 100.0 | \n",
258 | " 518.67 | \n",
259 | " 642.37 | \n",
260 | " 1582.85 | \n",
261 | " 1406.22 | \n",
262 | " 14.62 | \n",
263 | " ... | \n",
264 | " 522.19 | \n",
265 | " 2388.04 | \n",
266 | " 8133.80 | \n",
267 | " 8.4294 | \n",
268 | " 0.03 | \n",
269 | " 393 | \n",
270 | " 2388 | \n",
271 | " 100.0 | \n",
272 | " 38.90 | \n",
273 | " 23.4044 | \n",
274 | "
\n",
275 | " \n",
276 | "
\n",
277 | "
5 rows × 26 columns
\n",
278 | "
"
279 | ],
280 | "text/plain": [
281 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
282 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n",
283 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n",
284 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n",
285 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n",
286 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n",
287 | "\n",
288 | " s5 ... s12 s13 s14 s15 s16 s17 s18 s19 \\\n",
289 | "0 14.62 ... 521.66 2388.02 8138.62 8.4195 0.03 392 2388 100.0 \n",
290 | "1 14.62 ... 522.28 2388.07 8131.49 8.4318 0.03 392 2388 100.0 \n",
291 | "2 14.62 ... 522.42 2388.03 8133.23 8.4178 0.03 390 2388 100.0 \n",
292 | "3 14.62 ... 522.86 2388.08 8133.83 8.3682 0.03 392 2388 100.0 \n",
293 | "4 14.62 ... 522.19 2388.04 8133.80 8.4294 0.03 393 2388 100.0 \n",
294 | "\n",
295 | " s20 s21 \n",
296 | "0 39.06 23.4190 \n",
297 | "1 39.00 23.4236 \n",
298 | "2 38.95 23.3442 \n",
299 | "3 38.88 23.3739 \n",
300 | "4 38.90 23.4044 \n",
301 | "\n",
302 | "[5 rows x 26 columns]"
303 | ]
304 | },
305 | "execution_count": 7,
306 | "metadata": {},
307 | "output_type": "execute_result"
308 | }
309 | ],
310 | "source": [
311 | "train_df = train_df.sort_values(['id','cycle'])\n",
312 | "train_df.head()"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": 8,
318 | "metadata": {},
319 | "outputs": [
320 | {
321 | "data": {
322 | "text/html": [
323 | "\n",
324 | "\n",
337 | "
\n",
338 | " \n",
339 | " \n",
340 | " | \n",
341 | " id | \n",
342 | " cycle | \n",
343 | " setting1 | \n",
344 | " setting2 | \n",
345 | " setting3 | \n",
346 | " s1 | \n",
347 | " s2 | \n",
348 | " s3 | \n",
349 | " s4 | \n",
350 | " s5 | \n",
351 | " ... | \n",
352 | " s13 | \n",
353 | " s14 | \n",
354 | " s15 | \n",
355 | " s16 | \n",
356 | " s17 | \n",
357 | " s18 | \n",
358 | " s19 | \n",
359 | " s20 | \n",
360 | " s21 | \n",
361 | " RUL | \n",
362 | "
\n",
363 | " \n",
364 | " \n",
365 | " \n",
366 | " 0 | \n",
367 | " 1 | \n",
368 | " 1 | \n",
369 | " -0.0007 | \n",
370 | " -0.0004 | \n",
371 | " 100.0 | \n",
372 | " 518.67 | \n",
373 | " 641.82 | \n",
374 | " 1589.70 | \n",
375 | " 1400.60 | \n",
376 | " 14.62 | \n",
377 | " ... | \n",
378 | " 2388.02 | \n",
379 | " 8138.62 | \n",
380 | " 8.4195 | \n",
381 | " 0.03 | \n",
382 | " 392 | \n",
383 | " 2388 | \n",
384 | " 100.0 | \n",
385 | " 39.06 | \n",
386 | " 23.4190 | \n",
387 | " 191 | \n",
388 | "
\n",
389 | " \n",
390 | " 1 | \n",
391 | " 1 | \n",
392 | " 2 | \n",
393 | " 0.0019 | \n",
394 | " -0.0003 | \n",
395 | " 100.0 | \n",
396 | " 518.67 | \n",
397 | " 642.15 | \n",
398 | " 1591.82 | \n",
399 | " 1403.14 | \n",
400 | " 14.62 | \n",
401 | " ... | \n",
402 | " 2388.07 | \n",
403 | " 8131.49 | \n",
404 | " 8.4318 | \n",
405 | " 0.03 | \n",
406 | " 392 | \n",
407 | " 2388 | \n",
408 | " 100.0 | \n",
409 | " 39.00 | \n",
410 | " 23.4236 | \n",
411 | " 190 | \n",
412 | "
\n",
413 | " \n",
414 | " 2 | \n",
415 | " 1 | \n",
416 | " 3 | \n",
417 | " -0.0043 | \n",
418 | " 0.0003 | \n",
419 | " 100.0 | \n",
420 | " 518.67 | \n",
421 | " 642.35 | \n",
422 | " 1587.99 | \n",
423 | " 1404.20 | \n",
424 | " 14.62 | \n",
425 | " ... | \n",
426 | " 2388.03 | \n",
427 | " 8133.23 | \n",
428 | " 8.4178 | \n",
429 | " 0.03 | \n",
430 | " 390 | \n",
431 | " 2388 | \n",
432 | " 100.0 | \n",
433 | " 38.95 | \n",
434 | " 23.3442 | \n",
435 | " 189 | \n",
436 | "
\n",
437 | " \n",
438 | " 3 | \n",
439 | " 1 | \n",
440 | " 4 | \n",
441 | " 0.0007 | \n",
442 | " 0.0000 | \n",
443 | " 100.0 | \n",
444 | " 518.67 | \n",
445 | " 642.35 | \n",
446 | " 1582.79 | \n",
447 | " 1401.87 | \n",
448 | " 14.62 | \n",
449 | " ... | \n",
450 | " 2388.08 | \n",
451 | " 8133.83 | \n",
452 | " 8.3682 | \n",
453 | " 0.03 | \n",
454 | " 392 | \n",
455 | " 2388 | \n",
456 | " 100.0 | \n",
457 | " 38.88 | \n",
458 | " 23.3739 | \n",
459 | " 188 | \n",
460 | "
\n",
461 | " \n",
462 | " 4 | \n",
463 | " 1 | \n",
464 | " 5 | \n",
465 | " -0.0019 | \n",
466 | " -0.0002 | \n",
467 | " 100.0 | \n",
468 | " 518.67 | \n",
469 | " 642.37 | \n",
470 | " 1582.85 | \n",
471 | " 1406.22 | \n",
472 | " 14.62 | \n",
473 | " ... | \n",
474 | " 2388.04 | \n",
475 | " 8133.80 | \n",
476 | " 8.4294 | \n",
477 | " 0.03 | \n",
478 | " 393 | \n",
479 | " 2388 | \n",
480 | " 100.0 | \n",
481 | " 38.90 | \n",
482 | " 23.4044 | \n",
483 | " 187 | \n",
484 | "
\n",
485 | " \n",
486 | "
\n",
487 | "
5 rows × 27 columns
\n",
488 | "
"
489 | ],
490 | "text/plain": [
491 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
492 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n",
493 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n",
494 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n",
495 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n",
496 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n",
497 | "\n",
498 | " s5 ... s13 s14 s15 s16 s17 s18 s19 s20 \\\n",
499 | "0 14.62 ... 2388.02 8138.62 8.4195 0.03 392 2388 100.0 39.06 \n",
500 | "1 14.62 ... 2388.07 8131.49 8.4318 0.03 392 2388 100.0 39.00 \n",
501 | "2 14.62 ... 2388.03 8133.23 8.4178 0.03 390 2388 100.0 38.95 \n",
502 | "3 14.62 ... 2388.08 8133.83 8.3682 0.03 392 2388 100.0 38.88 \n",
503 | "4 14.62 ... 2388.04 8133.80 8.4294 0.03 393 2388 100.0 38.90 \n",
504 | "\n",
505 | " s21 RUL \n",
506 | "0 23.4190 191 \n",
507 | "1 23.4236 190 \n",
508 | "2 23.3442 189 \n",
509 | "3 23.3739 188 \n",
510 | "4 23.4044 187 \n",
511 | "\n",
512 | "[5 rows x 27 columns]"
513 | ]
514 | },
515 | "execution_count": 8,
516 | "metadata": {},
517 | "output_type": "execute_result"
518 | }
519 | ],
520 | "source": [
521 | "# Data Labeling - generate column RUL\n",
522 | "rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()\n",
523 | "rul.columns = ['id', 'max']\n",
524 | "train_df = train_df.merge(rul, on=['id'], how='left')\n",
525 | "train_df['RUL'] = train_df['max'] - train_df['cycle']\n",
526 | "train_df.drop('max', axis=1, inplace=True)\n",
527 | "train_df.head()"
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "execution_count": 9,
533 | "metadata": {},
534 | "outputs": [
535 | {
536 | "data": {
537 | "text/html": [
538 | "\n",
539 | "\n",
552 | "
\n",
553 | " \n",
554 | " \n",
555 | " | \n",
556 | " id | \n",
557 | " cycle | \n",
558 | " setting1 | \n",
559 | " setting2 | \n",
560 | " setting3 | \n",
561 | " s1 | \n",
562 | " s2 | \n",
563 | " s3 | \n",
564 | " s4 | \n",
565 | " s5 | \n",
566 | " ... | \n",
567 | " s15 | \n",
568 | " s16 | \n",
569 | " s17 | \n",
570 | " s18 | \n",
571 | " s19 | \n",
572 | " s20 | \n",
573 | " s21 | \n",
574 | " RUL | \n",
575 | " label1 | \n",
576 | " label2 | \n",
577 | "
\n",
578 | " \n",
579 | " \n",
580 | " \n",
581 | " 0 | \n",
582 | " 1 | \n",
583 | " 1 | \n",
584 | " -0.0007 | \n",
585 | " -0.0004 | \n",
586 | " 100.0 | \n",
587 | " 518.67 | \n",
588 | " 641.82 | \n",
589 | " 1589.70 | \n",
590 | " 1400.60 | \n",
591 | " 14.62 | \n",
592 | " ... | \n",
593 | " 8.4195 | \n",
594 | " 0.03 | \n",
595 | " 392 | \n",
596 | " 2388 | \n",
597 | " 100.0 | \n",
598 | " 39.06 | \n",
599 | " 23.4190 | \n",
600 | " 191 | \n",
601 | " 0 | \n",
602 | " 0 | \n",
603 | "
\n",
604 | " \n",
605 | " 1 | \n",
606 | " 1 | \n",
607 | " 2 | \n",
608 | " 0.0019 | \n",
609 | " -0.0003 | \n",
610 | " 100.0 | \n",
611 | " 518.67 | \n",
612 | " 642.15 | \n",
613 | " 1591.82 | \n",
614 | " 1403.14 | \n",
615 | " 14.62 | \n",
616 | " ... | \n",
617 | " 8.4318 | \n",
618 | " 0.03 | \n",
619 | " 392 | \n",
620 | " 2388 | \n",
621 | " 100.0 | \n",
622 | " 39.00 | \n",
623 | " 23.4236 | \n",
624 | " 190 | \n",
625 | " 0 | \n",
626 | " 0 | \n",
627 | "
\n",
628 | " \n",
629 | " 2 | \n",
630 | " 1 | \n",
631 | " 3 | \n",
632 | " -0.0043 | \n",
633 | " 0.0003 | \n",
634 | " 100.0 | \n",
635 | " 518.67 | \n",
636 | " 642.35 | \n",
637 | " 1587.99 | \n",
638 | " 1404.20 | \n",
639 | " 14.62 | \n",
640 | " ... | \n",
641 | " 8.4178 | \n",
642 | " 0.03 | \n",
643 | " 390 | \n",
644 | " 2388 | \n",
645 | " 100.0 | \n",
646 | " 38.95 | \n",
647 | " 23.3442 | \n",
648 | " 189 | \n",
649 | " 0 | \n",
650 | " 0 | \n",
651 | "
\n",
652 | " \n",
653 | " 3 | \n",
654 | " 1 | \n",
655 | " 4 | \n",
656 | " 0.0007 | \n",
657 | " 0.0000 | \n",
658 | " 100.0 | \n",
659 | " 518.67 | \n",
660 | " 642.35 | \n",
661 | " 1582.79 | \n",
662 | " 1401.87 | \n",
663 | " 14.62 | \n",
664 | " ... | \n",
665 | " 8.3682 | \n",
666 | " 0.03 | \n",
667 | " 392 | \n",
668 | " 2388 | \n",
669 | " 100.0 | \n",
670 | " 38.88 | \n",
671 | " 23.3739 | \n",
672 | " 188 | \n",
673 | " 0 | \n",
674 | " 0 | \n",
675 | "
\n",
676 | " \n",
677 | " 4 | \n",
678 | " 1 | \n",
679 | " 5 | \n",
680 | " -0.0019 | \n",
681 | " -0.0002 | \n",
682 | " 100.0 | \n",
683 | " 518.67 | \n",
684 | " 642.37 | \n",
685 | " 1582.85 | \n",
686 | " 1406.22 | \n",
687 | " 14.62 | \n",
688 | " ... | \n",
689 | " 8.4294 | \n",
690 | " 0.03 | \n",
691 | " 393 | \n",
692 | " 2388 | \n",
693 | " 100.0 | \n",
694 | " 38.90 | \n",
695 | " 23.4044 | \n",
696 | " 187 | \n",
697 | " 0 | \n",
698 | " 0 | \n",
699 | "
\n",
700 | " \n",
701 | "
\n",
702 | "
5 rows × 29 columns
\n",
703 | "
"
704 | ],
705 | "text/plain": [
706 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
707 | "0 1 1 -0.0007 -0.0004 100.0 518.67 641.82 1589.70 1400.60 \n",
708 | "1 1 2 0.0019 -0.0003 100.0 518.67 642.15 1591.82 1403.14 \n",
709 | "2 1 3 -0.0043 0.0003 100.0 518.67 642.35 1587.99 1404.20 \n",
710 | "3 1 4 0.0007 0.0000 100.0 518.67 642.35 1582.79 1401.87 \n",
711 | "4 1 5 -0.0019 -0.0002 100.0 518.67 642.37 1582.85 1406.22 \n",
712 | "\n",
713 | " s5 ... s15 s16 s17 s18 s19 s20 s21 RUL label1 \\\n",
714 | "0 14.62 ... 8.4195 0.03 392 2388 100.0 39.06 23.4190 191 0 \n",
715 | "1 14.62 ... 8.4318 0.03 392 2388 100.0 39.00 23.4236 190 0 \n",
716 | "2 14.62 ... 8.4178 0.03 390 2388 100.0 38.95 23.3442 189 0 \n",
717 | "3 14.62 ... 8.3682 0.03 392 2388 100.0 38.88 23.3739 188 0 \n",
718 | "4 14.62 ... 8.4294 0.03 393 2388 100.0 38.90 23.4044 187 0 \n",
719 | "\n",
720 | " label2 \n",
721 | "0 0 \n",
722 | "1 0 \n",
723 | "2 0 \n",
724 | "3 0 \n",
725 | "4 0 \n",
726 | "\n",
727 | "[5 rows x 29 columns]"
728 | ]
729 | },
730 | "execution_count": 9,
731 | "metadata": {},
732 | "output_type": "execute_result"
733 | }
734 | ],
735 | "source": [
736 | "# generate label columns for training data\n",
737 | "w1 = 30\n",
738 | "w0 = 15\n",
739 | "train_df['label1'] = np.where(train_df['RUL'] <= w1, 1, 0 )\n",
740 | "train_df['label2'] = train_df['label1']\n",
741 | "train_df.loc[train_df['RUL'] <= w0, 'label2'] = 2\n",
742 | "train_df.head()"
743 | ]
744 | },
745 | {
746 | "cell_type": "code",
747 | "execution_count": 10,
748 | "metadata": {},
749 | "outputs": [
750 | {
751 | "data": {
752 | "text/html": [
753 | "\n",
754 | "\n",
767 | "
\n",
768 | " \n",
769 | " \n",
770 | " | \n",
771 | " id | \n",
772 | " cycle | \n",
773 | " setting1 | \n",
774 | " setting2 | \n",
775 | " setting3 | \n",
776 | " s1 | \n",
777 | " s2 | \n",
778 | " s3 | \n",
779 | " s4 | \n",
780 | " s5 | \n",
781 | " ... | \n",
782 | " s16 | \n",
783 | " s17 | \n",
784 | " s18 | \n",
785 | " s19 | \n",
786 | " s20 | \n",
787 | " s21 | \n",
788 | " RUL | \n",
789 | " label1 | \n",
790 | " label2 | \n",
791 | " cycle_norm | \n",
792 | "
\n",
793 | " \n",
794 | " \n",
795 | " \n",
796 | " 0 | \n",
797 | " 1 | \n",
798 | " 1 | \n",
799 | " 0.459770 | \n",
800 | " 0.166667 | \n",
801 | " 0.0 | \n",
802 | " 0.0 | \n",
803 | " 0.183735 | \n",
804 | " 0.406802 | \n",
805 | " 0.309757 | \n",
806 | " 0.0 | \n",
807 | " ... | \n",
808 | " 0.0 | \n",
809 | " 0.333333 | \n",
810 | " 0.0 | \n",
811 | " 0.0 | \n",
812 | " 0.713178 | \n",
813 | " 0.724662 | \n",
814 | " 191 | \n",
815 | " 0 | \n",
816 | " 0 | \n",
817 | " 0.00000 | \n",
818 | "
\n",
819 | " \n",
820 | " 1 | \n",
821 | " 1 | \n",
822 | " 2 | \n",
823 | " 0.609195 | \n",
824 | " 0.250000 | \n",
825 | " 0.0 | \n",
826 | " 0.0 | \n",
827 | " 0.283133 | \n",
828 | " 0.453019 | \n",
829 | " 0.352633 | \n",
830 | " 0.0 | \n",
831 | " ... | \n",
832 | " 0.0 | \n",
833 | " 0.333333 | \n",
834 | " 0.0 | \n",
835 | " 0.0 | \n",
836 | " 0.666667 | \n",
837 | " 0.731014 | \n",
838 | " 190 | \n",
839 | " 0 | \n",
840 | " 0 | \n",
841 | " 0.00277 | \n",
842 | "
\n",
843 | " \n",
844 | " 2 | \n",
845 | " 1 | \n",
846 | " 3 | \n",
847 | " 0.252874 | \n",
848 | " 0.750000 | \n",
849 | " 0.0 | \n",
850 | " 0.0 | \n",
851 | " 0.343373 | \n",
852 | " 0.369523 | \n",
853 | " 0.370527 | \n",
854 | " 0.0 | \n",
855 | " ... | \n",
856 | " 0.0 | \n",
857 | " 0.166667 | \n",
858 | " 0.0 | \n",
859 | " 0.0 | \n",
860 | " 0.627907 | \n",
861 | " 0.621375 | \n",
862 | " 189 | \n",
863 | " 0 | \n",
864 | " 0 | \n",
865 | " 0.00554 | \n",
866 | "
\n",
867 | " \n",
868 | " 3 | \n",
869 | " 1 | \n",
870 | " 4 | \n",
871 | " 0.540230 | \n",
872 | " 0.500000 | \n",
873 | " 0.0 | \n",
874 | " 0.0 | \n",
875 | " 0.343373 | \n",
876 | " 0.256159 | \n",
877 | " 0.331195 | \n",
878 | " 0.0 | \n",
879 | " ... | \n",
880 | " 0.0 | \n",
881 | " 0.333333 | \n",
882 | " 0.0 | \n",
883 | " 0.0 | \n",
884 | " 0.573643 | \n",
885 | " 0.662386 | \n",
886 | " 188 | \n",
887 | " 0 | \n",
888 | " 0 | \n",
889 | " 0.00831 | \n",
890 | "
\n",
891 | " \n",
892 | " 4 | \n",
893 | " 1 | \n",
894 | " 5 | \n",
895 | " 0.390805 | \n",
896 | " 0.333333 | \n",
897 | " 0.0 | \n",
898 | " 0.0 | \n",
899 | " 0.349398 | \n",
900 | " 0.257467 | \n",
901 | " 0.404625 | \n",
902 | " 0.0 | \n",
903 | " ... | \n",
904 | " 0.0 | \n",
905 | " 0.416667 | \n",
906 | " 0.0 | \n",
907 | " 0.0 | \n",
908 | " 0.589147 | \n",
909 | " 0.704502 | \n",
910 | " 187 | \n",
911 | " 0 | \n",
912 | " 0 | \n",
913 | " 0.01108 | \n",
914 | "
\n",
915 | " \n",
916 | "
\n",
917 | "
5 rows × 30 columns
\n",
918 | "
"
919 | ],
920 | "text/plain": [
921 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
922 | "0 1 1 0.459770 0.166667 0.0 0.0 0.183735 0.406802 0.309757 \n",
923 | "1 1 2 0.609195 0.250000 0.0 0.0 0.283133 0.453019 0.352633 \n",
924 | "2 1 3 0.252874 0.750000 0.0 0.0 0.343373 0.369523 0.370527 \n",
925 | "3 1 4 0.540230 0.500000 0.0 0.0 0.343373 0.256159 0.331195 \n",
926 | "4 1 5 0.390805 0.333333 0.0 0.0 0.349398 0.257467 0.404625 \n",
927 | "\n",
928 | " s5 ... s16 s17 s18 s19 s20 s21 RUL label1 \\\n",
929 | "0 0.0 ... 0.0 0.333333 0.0 0.0 0.713178 0.724662 191 0 \n",
930 | "1 0.0 ... 0.0 0.333333 0.0 0.0 0.666667 0.731014 190 0 \n",
931 | "2 0.0 ... 0.0 0.166667 0.0 0.0 0.627907 0.621375 189 0 \n",
932 | "3 0.0 ... 0.0 0.333333 0.0 0.0 0.573643 0.662386 188 0 \n",
933 | "4 0.0 ... 0.0 0.416667 0.0 0.0 0.589147 0.704502 187 0 \n",
934 | "\n",
935 | " label2 cycle_norm \n",
936 | "0 0 0.00000 \n",
937 | "1 0 0.00277 \n",
938 | "2 0 0.00554 \n",
939 | "3 0 0.00831 \n",
940 | "4 0 0.01108 \n",
941 | "\n",
942 | "[5 rows x 30 columns]"
943 | ]
944 | },
945 | "execution_count": 10,
946 | "metadata": {},
947 | "output_type": "execute_result"
948 | }
949 | ],
950 | "source": [
951 | "# MinMax normalization\n",
952 | "train_df['cycle_norm'] = train_df['cycle']\n",
953 | "cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])\n",
954 | "min_max_scaler = preprocessing.MinMaxScaler()\n",
955 | "norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), \n",
956 | " columns=cols_normalize, \n",
957 | " index=train_df.index)\n",
958 | "join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)\n",
959 | "train_df = join_df.reindex(columns = train_df.columns)\n",
960 | "train_df.head()"
961 | ]
962 | },
963 | {
964 | "cell_type": "code",
965 | "execution_count": 11,
966 | "metadata": {},
967 | "outputs": [
968 | {
969 | "data": {
970 | "text/html": [
971 | "\n",
972 | "\n",
985 | "
\n",
986 | " \n",
987 | " \n",
988 | " | \n",
989 | " id | \n",
990 | " cycle | \n",
991 | " setting1 | \n",
992 | " setting2 | \n",
993 | " setting3 | \n",
994 | " s1 | \n",
995 | " s2 | \n",
996 | " s3 | \n",
997 | " s4 | \n",
998 | " s5 | \n",
999 | " ... | \n",
1000 | " s13 | \n",
1001 | " s14 | \n",
1002 | " s15 | \n",
1003 | " s16 | \n",
1004 | " s17 | \n",
1005 | " s18 | \n",
1006 | " s19 | \n",
1007 | " s20 | \n",
1008 | " s21 | \n",
1009 | " cycle_norm | \n",
1010 | "
\n",
1011 | " \n",
1012 | " \n",
1013 | " \n",
1014 | " 0 | \n",
1015 | " 1 | \n",
1016 | " 1 | \n",
1017 | " 0.632184 | \n",
1018 | " 0.750000 | \n",
1019 | " 0.0 | \n",
1020 | " 0.0 | \n",
1021 | " 0.545181 | \n",
1022 | " 0.310661 | \n",
1023 | " 0.269413 | \n",
1024 | " 0.0 | \n",
1025 | " ... | \n",
1026 | " 0.220588 | \n",
1027 | " 0.132160 | \n",
1028 | " 0.308965 | \n",
1029 | " 0.0 | \n",
1030 | " 0.333333 | \n",
1031 | " 0.0 | \n",
1032 | " 0.0 | \n",
1033 | " 0.558140 | \n",
1034 | " 0.661834 | \n",
1035 | " 0.00000 | \n",
1036 | "
\n",
1037 | " \n",
1038 | " 1 | \n",
1039 | " 1 | \n",
1040 | " 2 | \n",
1041 | " 0.344828 | \n",
1042 | " 0.250000 | \n",
1043 | " 0.0 | \n",
1044 | " 0.0 | \n",
1045 | " 0.150602 | \n",
1046 | " 0.379551 | \n",
1047 | " 0.222316 | \n",
1048 | " 0.0 | \n",
1049 | " ... | \n",
1050 | " 0.264706 | \n",
1051 | " 0.204768 | \n",
1052 | " 0.213159 | \n",
1053 | " 0.0 | \n",
1054 | " 0.416667 | \n",
1055 | " 0.0 | \n",
1056 | " 0.0 | \n",
1057 | " 0.682171 | \n",
1058 | " 0.686827 | \n",
1059 | " 0.00277 | \n",
1060 | "
\n",
1061 | " \n",
1062 | " 2 | \n",
1063 | " 1 | \n",
1064 | " 3 | \n",
1065 | " 0.517241 | \n",
1066 | " 0.583333 | \n",
1067 | " 0.0 | \n",
1068 | " 0.0 | \n",
1069 | " 0.376506 | \n",
1070 | " 0.346632 | \n",
1071 | " 0.322248 | \n",
1072 | " 0.0 | \n",
1073 | " ... | \n",
1074 | " 0.220588 | \n",
1075 | " 0.155640 | \n",
1076 | " 0.458638 | \n",
1077 | " 0.0 | \n",
1078 | " 0.416667 | \n",
1079 | " 0.0 | \n",
1080 | " 0.0 | \n",
1081 | " 0.728682 | \n",
1082 | " 0.721348 | \n",
1083 | " 0.00554 | \n",
1084 | "
\n",
1085 | " \n",
1086 | " 3 | \n",
1087 | " 1 | \n",
1088 | " 4 | \n",
1089 | " 0.741379 | \n",
1090 | " 0.500000 | \n",
1091 | " 0.0 | \n",
1092 | " 0.0 | \n",
1093 | " 0.370482 | \n",
1094 | " 0.285154 | \n",
1095 | " 0.408001 | \n",
1096 | " 0.0 | \n",
1097 | " ... | \n",
1098 | " 0.250000 | \n",
1099 | " 0.170090 | \n",
1100 | " 0.257022 | \n",
1101 | " 0.0 | \n",
1102 | " 0.250000 | \n",
1103 | " 0.0 | \n",
1104 | " 0.0 | \n",
1105 | " 0.666667 | \n",
1106 | " 0.662110 | \n",
1107 | " 0.00831 | \n",
1108 | "
\n",
1109 | " \n",
1110 | " 4 | \n",
1111 | " 1 | \n",
1112 | " 5 | \n",
1113 | " 0.580460 | \n",
1114 | " 0.500000 | \n",
1115 | " 0.0 | \n",
1116 | " 0.0 | \n",
1117 | " 0.391566 | \n",
1118 | " 0.352082 | \n",
1119 | " 0.332039 | \n",
1120 | " 0.0 | \n",
1121 | " ... | \n",
1122 | " 0.220588 | \n",
1123 | " 0.152751 | \n",
1124 | " 0.300885 | \n",
1125 | " 0.0 | \n",
1126 | " 0.166667 | \n",
1127 | " 0.0 | \n",
1128 | " 0.0 | \n",
1129 | " 0.658915 | \n",
1130 | " 0.716377 | \n",
1131 | " 0.01108 | \n",
1132 | "
\n",
1133 | " \n",
1134 | "
\n",
1135 | "
5 rows × 27 columns
\n",
1136 | "
"
1137 | ],
1138 | "text/plain": [
1139 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
1140 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n",
1141 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n",
1142 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n",
1143 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n",
1144 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n",
1145 | "\n",
1146 | " s5 ... s13 s14 s15 s16 s17 s18 s19 \\\n",
1147 | "0 0.0 ... 0.220588 0.132160 0.308965 0.0 0.333333 0.0 0.0 \n",
1148 | "1 0.0 ... 0.264706 0.204768 0.213159 0.0 0.416667 0.0 0.0 \n",
1149 | "2 0.0 ... 0.220588 0.155640 0.458638 0.0 0.416667 0.0 0.0 \n",
1150 | "3 0.0 ... 0.250000 0.170090 0.257022 0.0 0.250000 0.0 0.0 \n",
1151 | "4 0.0 ... 0.220588 0.152751 0.300885 0.0 0.166667 0.0 0.0 \n",
1152 | "\n",
1153 | " s20 s21 cycle_norm \n",
1154 | "0 0.558140 0.661834 0.00000 \n",
1155 | "1 0.682171 0.686827 0.00277 \n",
1156 | "2 0.728682 0.721348 0.00554 \n",
1157 | "3 0.666667 0.662110 0.00831 \n",
1158 | "4 0.658915 0.716377 0.01108 \n",
1159 | "\n",
1160 | "[5 rows x 27 columns]"
1161 | ]
1162 | },
1163 | "execution_count": 11,
1164 | "metadata": {},
1165 | "output_type": "execute_result"
1166 | }
1167 | ],
1168 | "source": [
1169 | "test_df['cycle_norm'] = test_df['cycle']\n",
1170 | "norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), \n",
1171 | " columns=cols_normalize, \n",
1172 | " index=test_df.index)\n",
1173 | "test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)\n",
1174 | "test_df = test_join_df.reindex(columns = test_df.columns)\n",
1175 | "test_df = test_df.reset_index(drop=True)\n",
1176 | "test_df.head()"
1177 | ]
1178 | },
1179 | {
1180 | "cell_type": "code",
1181 | "execution_count": 12,
1182 | "metadata": {},
1183 | "outputs": [],
1184 | "source": [
1185 | "# generate column max for test data\n",
1186 | "rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()\n",
1187 | "rul.columns = ['id', 'max']\n",
1188 | "truth_df.columns = ['more']\n",
1189 | "truth_df['id'] = truth_df.index + 1\n",
1190 | "truth_df['max'] = rul['max'] + truth_df['more']\n",
1191 | "truth_df.drop('more', axis=1, inplace=True)"
1192 | ]
1193 | },
1194 | {
1195 | "cell_type": "code",
1196 | "execution_count": 13,
1197 | "metadata": {},
1198 | "outputs": [
1199 | {
1200 | "data": {
1201 | "text/html": [
1202 | "\n",
1203 | "\n",
1216 | "
\n",
1217 | " \n",
1218 | " \n",
1219 | " | \n",
1220 | " id | \n",
1221 | " cycle | \n",
1222 | " setting1 | \n",
1223 | " setting2 | \n",
1224 | " setting3 | \n",
1225 | " s1 | \n",
1226 | " s2 | \n",
1227 | " s3 | \n",
1228 | " s4 | \n",
1229 | " s5 | \n",
1230 | " ... | \n",
1231 | " s14 | \n",
1232 | " s15 | \n",
1233 | " s16 | \n",
1234 | " s17 | \n",
1235 | " s18 | \n",
1236 | " s19 | \n",
1237 | " s20 | \n",
1238 | " s21 | \n",
1239 | " cycle_norm | \n",
1240 | " RUL | \n",
1241 | "
\n",
1242 | " \n",
1243 | " \n",
1244 | " \n",
1245 | " 0 | \n",
1246 | " 1 | \n",
1247 | " 1 | \n",
1248 | " 0.632184 | \n",
1249 | " 0.750000 | \n",
1250 | " 0.0 | \n",
1251 | " 0.0 | \n",
1252 | " 0.545181 | \n",
1253 | " 0.310661 | \n",
1254 | " 0.269413 | \n",
1255 | " 0.0 | \n",
1256 | " ... | \n",
1257 | " 0.132160 | \n",
1258 | " 0.308965 | \n",
1259 | " 0.0 | \n",
1260 | " 0.333333 | \n",
1261 | " 0.0 | \n",
1262 | " 0.0 | \n",
1263 | " 0.558140 | \n",
1264 | " 0.661834 | \n",
1265 | " 0.00000 | \n",
1266 | " 142 | \n",
1267 | "
\n",
1268 | " \n",
1269 | " 1 | \n",
1270 | " 1 | \n",
1271 | " 2 | \n",
1272 | " 0.344828 | \n",
1273 | " 0.250000 | \n",
1274 | " 0.0 | \n",
1275 | " 0.0 | \n",
1276 | " 0.150602 | \n",
1277 | " 0.379551 | \n",
1278 | " 0.222316 | \n",
1279 | " 0.0 | \n",
1280 | " ... | \n",
1281 | " 0.204768 | \n",
1282 | " 0.213159 | \n",
1283 | " 0.0 | \n",
1284 | " 0.416667 | \n",
1285 | " 0.0 | \n",
1286 | " 0.0 | \n",
1287 | " 0.682171 | \n",
1288 | " 0.686827 | \n",
1289 | " 0.00277 | \n",
1290 | " 141 | \n",
1291 | "
\n",
1292 | " \n",
1293 | " 2 | \n",
1294 | " 1 | \n",
1295 | " 3 | \n",
1296 | " 0.517241 | \n",
1297 | " 0.583333 | \n",
1298 | " 0.0 | \n",
1299 | " 0.0 | \n",
1300 | " 0.376506 | \n",
1301 | " 0.346632 | \n",
1302 | " 0.322248 | \n",
1303 | " 0.0 | \n",
1304 | " ... | \n",
1305 | " 0.155640 | \n",
1306 | " 0.458638 | \n",
1307 | " 0.0 | \n",
1308 | " 0.416667 | \n",
1309 | " 0.0 | \n",
1310 | " 0.0 | \n",
1311 | " 0.728682 | \n",
1312 | " 0.721348 | \n",
1313 | " 0.00554 | \n",
1314 | " 140 | \n",
1315 | "
\n",
1316 | " \n",
1317 | " 3 | \n",
1318 | " 1 | \n",
1319 | " 4 | \n",
1320 | " 0.741379 | \n",
1321 | " 0.500000 | \n",
1322 | " 0.0 | \n",
1323 | " 0.0 | \n",
1324 | " 0.370482 | \n",
1325 | " 0.285154 | \n",
1326 | " 0.408001 | \n",
1327 | " 0.0 | \n",
1328 | " ... | \n",
1329 | " 0.170090 | \n",
1330 | " 0.257022 | \n",
1331 | " 0.0 | \n",
1332 | " 0.250000 | \n",
1333 | " 0.0 | \n",
1334 | " 0.0 | \n",
1335 | " 0.666667 | \n",
1336 | " 0.662110 | \n",
1337 | " 0.00831 | \n",
1338 | " 139 | \n",
1339 | "
\n",
1340 | " \n",
1341 | " 4 | \n",
1342 | " 1 | \n",
1343 | " 5 | \n",
1344 | " 0.580460 | \n",
1345 | " 0.500000 | \n",
1346 | " 0.0 | \n",
1347 | " 0.0 | \n",
1348 | " 0.391566 | \n",
1349 | " 0.352082 | \n",
1350 | " 0.332039 | \n",
1351 | " 0.0 | \n",
1352 | " ... | \n",
1353 | " 0.152751 | \n",
1354 | " 0.300885 | \n",
1355 | " 0.0 | \n",
1356 | " 0.166667 | \n",
1357 | " 0.0 | \n",
1358 | " 0.0 | \n",
1359 | " 0.658915 | \n",
1360 | " 0.716377 | \n",
1361 | " 0.01108 | \n",
1362 | " 138 | \n",
1363 | "
\n",
1364 | " \n",
1365 | "
\n",
1366 | "
5 rows × 28 columns
\n",
1367 | "
"
1368 | ],
1369 | "text/plain": [
1370 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
1371 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n",
1372 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n",
1373 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n",
1374 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n",
1375 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n",
1376 | "\n",
1377 | " s5 ... s14 s15 s16 s17 s18 s19 s20 s21 \\\n",
1378 | "0 0.0 ... 0.132160 0.308965 0.0 0.333333 0.0 0.0 0.558140 0.661834 \n",
1379 | "1 0.0 ... 0.204768 0.213159 0.0 0.416667 0.0 0.0 0.682171 0.686827 \n",
1380 | "2 0.0 ... 0.155640 0.458638 0.0 0.416667 0.0 0.0 0.728682 0.721348 \n",
1381 | "3 0.0 ... 0.170090 0.257022 0.0 0.250000 0.0 0.0 0.666667 0.662110 \n",
1382 | "4 0.0 ... 0.152751 0.300885 0.0 0.166667 0.0 0.0 0.658915 0.716377 \n",
1383 | "\n",
1384 | " cycle_norm RUL \n",
1385 | "0 0.00000 142 \n",
1386 | "1 0.00277 141 \n",
1387 | "2 0.00554 140 \n",
1388 | "3 0.00831 139 \n",
1389 | "4 0.01108 138 \n",
1390 | "\n",
1391 | "[5 rows x 28 columns]"
1392 | ]
1393 | },
1394 | "execution_count": 13,
1395 | "metadata": {},
1396 | "output_type": "execute_result"
1397 | }
1398 | ],
1399 | "source": [
1400 | "# generate RUL for test data\n",
1401 | "test_df = test_df.merge(truth_df, on=['id'], how='left')\n",
1402 | "test_df['RUL'] = test_df['max'] - test_df['cycle']\n",
1403 | "test_df.drop('max', axis=1, inplace=True)\n",
1404 | "test_df.head()"
1405 | ]
1406 | },
1407 | {
1408 | "cell_type": "code",
1409 | "execution_count": 14,
1410 | "metadata": {},
1411 | "outputs": [
1412 | {
1413 | "data": {
1414 | "text/html": [
1415 | "\n",
1416 | "\n",
1429 | "
\n",
1430 | " \n",
1431 | " \n",
1432 | " | \n",
1433 | " id | \n",
1434 | " cycle | \n",
1435 | " setting1 | \n",
1436 | " setting2 | \n",
1437 | " setting3 | \n",
1438 | " s1 | \n",
1439 | " s2 | \n",
1440 | " s3 | \n",
1441 | " s4 | \n",
1442 | " s5 | \n",
1443 | " ... | \n",
1444 | " s16 | \n",
1445 | " s17 | \n",
1446 | " s18 | \n",
1447 | " s19 | \n",
1448 | " s20 | \n",
1449 | " s21 | \n",
1450 | " cycle_norm | \n",
1451 | " RUL | \n",
1452 | " label1 | \n",
1453 | " label2 | \n",
1454 | "
\n",
1455 | " \n",
1456 | " \n",
1457 | " \n",
1458 | " 0 | \n",
1459 | " 1 | \n",
1460 | " 1 | \n",
1461 | " 0.632184 | \n",
1462 | " 0.750000 | \n",
1463 | " 0.0 | \n",
1464 | " 0.0 | \n",
1465 | " 0.545181 | \n",
1466 | " 0.310661 | \n",
1467 | " 0.269413 | \n",
1468 | " 0.0 | \n",
1469 | " ... | \n",
1470 | " 0.0 | \n",
1471 | " 0.333333 | \n",
1472 | " 0.0 | \n",
1473 | " 0.0 | \n",
1474 | " 0.558140 | \n",
1475 | " 0.661834 | \n",
1476 | " 0.00000 | \n",
1477 | " 142 | \n",
1478 | " 0 | \n",
1479 | " 0 | \n",
1480 | "
\n",
1481 | " \n",
1482 | " 1 | \n",
1483 | " 1 | \n",
1484 | " 2 | \n",
1485 | " 0.344828 | \n",
1486 | " 0.250000 | \n",
1487 | " 0.0 | \n",
1488 | " 0.0 | \n",
1489 | " 0.150602 | \n",
1490 | " 0.379551 | \n",
1491 | " 0.222316 | \n",
1492 | " 0.0 | \n",
1493 | " ... | \n",
1494 | " 0.0 | \n",
1495 | " 0.416667 | \n",
1496 | " 0.0 | \n",
1497 | " 0.0 | \n",
1498 | " 0.682171 | \n",
1499 | " 0.686827 | \n",
1500 | " 0.00277 | \n",
1501 | " 141 | \n",
1502 | " 0 | \n",
1503 | " 0 | \n",
1504 | "
\n",
1505 | " \n",
1506 | " 2 | \n",
1507 | " 1 | \n",
1508 | " 3 | \n",
1509 | " 0.517241 | \n",
1510 | " 0.583333 | \n",
1511 | " 0.0 | \n",
1512 | " 0.0 | \n",
1513 | " 0.376506 | \n",
1514 | " 0.346632 | \n",
1515 | " 0.322248 | \n",
1516 | " 0.0 | \n",
1517 | " ... | \n",
1518 | " 0.0 | \n",
1519 | " 0.416667 | \n",
1520 | " 0.0 | \n",
1521 | " 0.0 | \n",
1522 | " 0.728682 | \n",
1523 | " 0.721348 | \n",
1524 | " 0.00554 | \n",
1525 | " 140 | \n",
1526 | " 0 | \n",
1527 | " 0 | \n",
1528 | "
\n",
1529 | " \n",
1530 | " 3 | \n",
1531 | " 1 | \n",
1532 | " 4 | \n",
1533 | " 0.741379 | \n",
1534 | " 0.500000 | \n",
1535 | " 0.0 | \n",
1536 | " 0.0 | \n",
1537 | " 0.370482 | \n",
1538 | " 0.285154 | \n",
1539 | " 0.408001 | \n",
1540 | " 0.0 | \n",
1541 | " ... | \n",
1542 | " 0.0 | \n",
1543 | " 0.250000 | \n",
1544 | " 0.0 | \n",
1545 | " 0.0 | \n",
1546 | " 0.666667 | \n",
1547 | " 0.662110 | \n",
1548 | " 0.00831 | \n",
1549 | " 139 | \n",
1550 | " 0 | \n",
1551 | " 0 | \n",
1552 | "
\n",
1553 | " \n",
1554 | " 4 | \n",
1555 | " 1 | \n",
1556 | " 5 | \n",
1557 | " 0.580460 | \n",
1558 | " 0.500000 | \n",
1559 | " 0.0 | \n",
1560 | " 0.0 | \n",
1561 | " 0.391566 | \n",
1562 | " 0.352082 | \n",
1563 | " 0.332039 | \n",
1564 | " 0.0 | \n",
1565 | " ... | \n",
1566 | " 0.0 | \n",
1567 | " 0.166667 | \n",
1568 | " 0.0 | \n",
1569 | " 0.0 | \n",
1570 | " 0.658915 | \n",
1571 | " 0.716377 | \n",
1572 | " 0.01108 | \n",
1573 | " 138 | \n",
1574 | " 0 | \n",
1575 | " 0 | \n",
1576 | "
\n",
1577 | " \n",
1578 | "
\n",
1579 | "
5 rows × 30 columns
\n",
1580 | "
"
1581 | ],
1582 | "text/plain": [
1583 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
1584 | "0 1 1 0.632184 0.750000 0.0 0.0 0.545181 0.310661 0.269413 \n",
1585 | "1 1 2 0.344828 0.250000 0.0 0.0 0.150602 0.379551 0.222316 \n",
1586 | "2 1 3 0.517241 0.583333 0.0 0.0 0.376506 0.346632 0.322248 \n",
1587 | "3 1 4 0.741379 0.500000 0.0 0.0 0.370482 0.285154 0.408001 \n",
1588 | "4 1 5 0.580460 0.500000 0.0 0.0 0.391566 0.352082 0.332039 \n",
1589 | "\n",
1590 | " s5 ... s16 s17 s18 s19 s20 s21 cycle_norm RUL \\\n",
1591 | "0 0.0 ... 0.0 0.333333 0.0 0.0 0.558140 0.661834 0.00000 142 \n",
1592 | "1 0.0 ... 0.0 0.416667 0.0 0.0 0.682171 0.686827 0.00277 141 \n",
1593 | "2 0.0 ... 0.0 0.416667 0.0 0.0 0.728682 0.721348 0.00554 140 \n",
1594 | "3 0.0 ... 0.0 0.250000 0.0 0.0 0.666667 0.662110 0.00831 139 \n",
1595 | "4 0.0 ... 0.0 0.166667 0.0 0.0 0.658915 0.716377 0.01108 138 \n",
1596 | "\n",
1597 | " label1 label2 \n",
1598 | "0 0 0 \n",
1599 | "1 0 0 \n",
1600 | "2 0 0 \n",
1601 | "3 0 0 \n",
1602 | "4 0 0 \n",
1603 | "\n",
1604 | "[5 rows x 30 columns]"
1605 | ]
1606 | },
1607 | "execution_count": 14,
1608 | "metadata": {},
1609 | "output_type": "execute_result"
1610 | }
1611 | ],
1612 | "source": [
1613 | "# generate label columns w0 and w1 for test data\n",
1614 | "test_df['label1'] = np.where(test_df['RUL'] <= w1, 1, 0 )\n",
1615 | "test_df['label2'] = test_df['label1']\n",
1616 | "test_df.loc[test_df['RUL'] <= w0, 'label2'] = 2\n",
1617 | "test_df.head()"
1618 | ]
1619 | },
1620 | {
1621 | "cell_type": "code",
1622 | "execution_count": 15,
1623 | "metadata": {},
1624 | "outputs": [],
1625 | "source": [
1626 | "y_train = train_df['RUL']\n",
1627 | "y_test = test_df['RUL']\n",
1628 | "train_df.drop(['label1','label2','RUL'],axis=1,inplace=True)\n",
1629 | "test_df.drop(['label1','label2','RUL'],axis=1,inplace=True)"
1630 | ]
1631 | },
1632 | {
1633 | "cell_type": "code",
1634 | "execution_count": 16,
1635 | "metadata": {},
1636 | "outputs": [
1637 | {
1638 | "data": {
1639 | "text/html": [
1640 | "\n",
1641 | "\n",
1654 | "
\n",
1655 | " \n",
1656 | " \n",
1657 | " | \n",
1658 | " id | \n",
1659 | " cycle | \n",
1660 | " setting1 | \n",
1661 | " setting2 | \n",
1662 | " setting3 | \n",
1663 | " s1 | \n",
1664 | " s2 | \n",
1665 | " s3 | \n",
1666 | " s4 | \n",
1667 | " s5 | \n",
1668 | " ... | \n",
1669 | " s13 | \n",
1670 | " s14 | \n",
1671 | " s15 | \n",
1672 | " s16 | \n",
1673 | " s17 | \n",
1674 | " s18 | \n",
1675 | " s19 | \n",
1676 | " s20 | \n",
1677 | " s21 | \n",
1678 | " cycle_norm | \n",
1679 | "
\n",
1680 | " \n",
1681 | " \n",
1682 | " \n",
1683 | " 0 | \n",
1684 | " 1 | \n",
1685 | " 1 | \n",
1686 | " 0.459770 | \n",
1687 | " 0.166667 | \n",
1688 | " 0.0 | \n",
1689 | " 0.0 | \n",
1690 | " 0.183735 | \n",
1691 | " 0.406802 | \n",
1692 | " 0.309757 | \n",
1693 | " 0.0 | \n",
1694 | " ... | \n",
1695 | " 0.205882 | \n",
1696 | " 0.199608 | \n",
1697 | " 0.363986 | \n",
1698 | " 0.0 | \n",
1699 | " 0.333333 | \n",
1700 | " 0.0 | \n",
1701 | " 0.0 | \n",
1702 | " 0.713178 | \n",
1703 | " 0.724662 | \n",
1704 | " 0.00000 | \n",
1705 | "
\n",
1706 | " \n",
1707 | " 1 | \n",
1708 | " 1 | \n",
1709 | " 2 | \n",
1710 | " 0.609195 | \n",
1711 | " 0.250000 | \n",
1712 | " 0.0 | \n",
1713 | " 0.0 | \n",
1714 | " 0.283133 | \n",
1715 | " 0.453019 | \n",
1716 | " 0.352633 | \n",
1717 | " 0.0 | \n",
1718 | " ... | \n",
1719 | " 0.279412 | \n",
1720 | " 0.162813 | \n",
1721 | " 0.411312 | \n",
1722 | " 0.0 | \n",
1723 | " 0.333333 | \n",
1724 | " 0.0 | \n",
1725 | " 0.0 | \n",
1726 | " 0.666667 | \n",
1727 | " 0.731014 | \n",
1728 | " 0.00277 | \n",
1729 | "
\n",
1730 | " \n",
1731 | " 2 | \n",
1732 | " 1 | \n",
1733 | " 3 | \n",
1734 | " 0.252874 | \n",
1735 | " 0.750000 | \n",
1736 | " 0.0 | \n",
1737 | " 0.0 | \n",
1738 | " 0.343373 | \n",
1739 | " 0.369523 | \n",
1740 | " 0.370527 | \n",
1741 | " 0.0 | \n",
1742 | " ... | \n",
1743 | " 0.220588 | \n",
1744 | " 0.171793 | \n",
1745 | " 0.357445 | \n",
1746 | " 0.0 | \n",
1747 | " 0.166667 | \n",
1748 | " 0.0 | \n",
1749 | " 0.0 | \n",
1750 | " 0.627907 | \n",
1751 | " 0.621375 | \n",
1752 | " 0.00554 | \n",
1753 | "
\n",
1754 | " \n",
1755 | " 3 | \n",
1756 | " 1 | \n",
1757 | " 4 | \n",
1758 | " 0.540230 | \n",
1759 | " 0.500000 | \n",
1760 | " 0.0 | \n",
1761 | " 0.0 | \n",
1762 | " 0.343373 | \n",
1763 | " 0.256159 | \n",
1764 | " 0.331195 | \n",
1765 | " 0.0 | \n",
1766 | " ... | \n",
1767 | " 0.294118 | \n",
1768 | " 0.174889 | \n",
1769 | " 0.166603 | \n",
1770 | " 0.0 | \n",
1771 | " 0.333333 | \n",
1772 | " 0.0 | \n",
1773 | " 0.0 | \n",
1774 | " 0.573643 | \n",
1775 | " 0.662386 | \n",
1776 | " 0.00831 | \n",
1777 | "
\n",
1778 | " \n",
1779 | " 4 | \n",
1780 | " 1 | \n",
1781 | " 5 | \n",
1782 | " 0.390805 | \n",
1783 | " 0.333333 | \n",
1784 | " 0.0 | \n",
1785 | " 0.0 | \n",
1786 | " 0.349398 | \n",
1787 | " 0.257467 | \n",
1788 | " 0.404625 | \n",
1789 | " 0.0 | \n",
1790 | " ... | \n",
1791 | " 0.235294 | \n",
1792 | " 0.174734 | \n",
1793 | " 0.402078 | \n",
1794 | " 0.0 | \n",
1795 | " 0.416667 | \n",
1796 | " 0.0 | \n",
1797 | " 0.0 | \n",
1798 | " 0.589147 | \n",
1799 | " 0.704502 | \n",
1800 | " 0.01108 | \n",
1801 | "
\n",
1802 | " \n",
1803 | "
\n",
1804 | "
5 rows × 27 columns
\n",
1805 | "
"
1806 | ],
1807 | "text/plain": [
1808 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
1809 | "0 1 1 0.459770 0.166667 0.0 0.0 0.183735 0.406802 0.309757 \n",
1810 | "1 1 2 0.609195 0.250000 0.0 0.0 0.283133 0.453019 0.352633 \n",
1811 | "2 1 3 0.252874 0.750000 0.0 0.0 0.343373 0.369523 0.370527 \n",
1812 | "3 1 4 0.540230 0.500000 0.0 0.0 0.343373 0.256159 0.331195 \n",
1813 | "4 1 5 0.390805 0.333333 0.0 0.0 0.349398 0.257467 0.404625 \n",
1814 | "\n",
1815 | " s5 ... s13 s14 s15 s16 s17 s18 s19 \\\n",
1816 | "0 0.0 ... 0.205882 0.199608 0.363986 0.0 0.333333 0.0 0.0 \n",
1817 | "1 0.0 ... 0.279412 0.162813 0.411312 0.0 0.333333 0.0 0.0 \n",
1818 | "2 0.0 ... 0.220588 0.171793 0.357445 0.0 0.166667 0.0 0.0 \n",
1819 | "3 0.0 ... 0.294118 0.174889 0.166603 0.0 0.333333 0.0 0.0 \n",
1820 | "4 0.0 ... 0.235294 0.174734 0.402078 0.0 0.416667 0.0 0.0 \n",
1821 | "\n",
1822 | " s20 s21 cycle_norm \n",
1823 | "0 0.713178 0.724662 0.00000 \n",
1824 | "1 0.666667 0.731014 0.00277 \n",
1825 | "2 0.627907 0.621375 0.00554 \n",
1826 | "3 0.573643 0.662386 0.00831 \n",
1827 | "4 0.589147 0.704502 0.01108 \n",
1828 | "\n",
1829 | "[5 rows x 27 columns]"
1830 | ]
1831 | },
1832 | "execution_count": 16,
1833 | "metadata": {},
1834 | "output_type": "execute_result"
1835 | }
1836 | ],
1837 | "source": [
1838 | "train_df.head()"
1839 | ]
1840 | },
1841 | {
1842 | "cell_type": "code",
1843 | "execution_count": 17,
1844 | "metadata": {},
1845 | "outputs": [
1846 | {
1847 | "name": "stdout",
1848 | "output_type": "stream",
1849 | "text": [
1850 | "Version 0.9.2 of tpot is outdated. Version 0.9.3 was released 2 days ago.\n"
1851 | ]
1852 | }
1853 | ],
1854 | "source": [
1855 | "from tpot import TPOTRegressor\n",
1856 | "tpot = TPOTRegressor(generations=10, scoring ='neg_mean_squared_error', population_size=50, verbosity=2, cv = 3)"
1857 | ]
1858 | },
1859 | {
1860 | "cell_type": "code",
1861 | "execution_count": 18,
1862 | "metadata": {},
1863 | "outputs": [
1864 | {
1865 | "name": "stderr",
1866 | "output_type": "stream",
1867 | "text": [
1868 | "Optimization Progress: 18%|█▊ | 100/550 [16:05<2:28:00, 19.74s/pipeline]"
1869 | ]
1870 | },
1871 | {
1872 | "name": "stdout",
1873 | "output_type": "stream",
1874 | "text": [
1875 | "Generation 1 - Current best internal CV score: -1336.629916208506\n"
1876 | ]
1877 | },
1878 | {
1879 | "name": "stderr",
1880 | "output_type": "stream",
1881 | "text": [
1882 | "Optimization Progress: 27%|██▋ | 151/550 [25:07<1:16:36, 11.52s/pipeline] "
1883 | ]
1884 | },
1885 | {
1886 | "name": "stdout",
1887 | "output_type": "stream",
1888 | "text": [
1889 | "Generation 2 - Current best internal CV score: -1316.3456588624992\n"
1890 | ]
1891 | },
1892 | {
1893 | "name": "stderr",
1894 | "output_type": "stream",
1895 | "text": [
1896 | "Optimization Progress: 37%|███▋ | 201/550 [30:05<1:07:40, 11.63s/pipeline]"
1897 | ]
1898 | },
1899 | {
1900 | "name": "stdout",
1901 | "output_type": "stream",
1902 | "text": [
1903 | "Generation 3 - Current best internal CV score: -1316.3456588624992\n"
1904 | ]
1905 | },
1906 | {
1907 | "name": "stderr",
1908 | "output_type": "stream",
1909 | "text": [
1910 | "Optimization Progress: 46%|████▌ | 251/550 [34:30<22:03, 4.43s/pipeline] "
1911 | ]
1912 | },
1913 | {
1914 | "name": "stdout",
1915 | "output_type": "stream",
1916 | "text": [
1917 | "Generation 4 - Current best internal CV score: -1316.3456588624992\n"
1918 | ]
1919 | },
1920 | {
1921 | "name": "stderr",
1922 | "output_type": "stream",
1923 | "text": [
1924 | "Optimization Progress: 55%|█████▍ | 301/550 [40:37<27:54, 6.73s/pipeline] "
1925 | ]
1926 | },
1927 | {
1928 | "name": "stdout",
1929 | "output_type": "stream",
1930 | "text": [
1931 | "Generation 5 - Current best internal CV score: -1316.3456588624992\n"
1932 | ]
1933 | },
1934 | {
1935 | "name": "stderr",
1936 | "output_type": "stream",
1937 | "text": [
1938 | "Optimization Progress: 64%|██████▍ | 351/550 [47:44<57:47, 17.42s/pipeline] "
1939 | ]
1940 | },
1941 | {
1942 | "name": "stdout",
1943 | "output_type": "stream",
1944 | "text": [
1945 | "Generation 6 - Current best internal CV score: -1316.3456588624992\n"
1946 | ]
1947 | },
1948 | {
1949 | "name": "stderr",
1950 | "output_type": "stream",
1951 | "text": [
1952 | "Optimization Progress: 73%|███████▎ | 401/550 [53:10<12:27, 5.01s/pipeline] "
1953 | ]
1954 | },
1955 | {
1956 | "name": "stdout",
1957 | "output_type": "stream",
1958 | "text": [
1959 | "Generation 7 - Current best internal CV score: -1316.3456588624992\n"
1960 | ]
1961 | },
1962 | {
1963 | "name": "stderr",
1964 | "output_type": "stream",
1965 | "text": [
1966 | "Optimization Progress: 82%|████████▏ | 451/550 [56:19<05:31, 3.35s/pipeline]"
1967 | ]
1968 | },
1969 | {
1970 | "name": "stdout",
1971 | "output_type": "stream",
1972 | "text": [
1973 | "Generation 8 - Current best internal CV score: -1316.3456588624992\n"
1974 | ]
1975 | },
1976 | {
1977 | "name": "stderr",
1978 | "output_type": "stream",
1979 | "text": [
1980 | "Optimization Progress: 91%|█████████ | 501/550 [1:01:28<04:54, 6.00s/pipeline]"
1981 | ]
1982 | },
1983 | {
1984 | "name": "stdout",
1985 | "output_type": "stream",
1986 | "text": [
1987 | "Generation 9 - Current best internal CV score: -1316.3456588624992\n"
1988 | ]
1989 | },
1990 | {
1991 | "name": "stderr",
1992 | "output_type": "stream",
1993 | "text": [
1994 | " \r"
1995 | ]
1996 | },
1997 | {
1998 | "name": "stdout",
1999 | "output_type": "stream",
2000 | "text": [
2001 | "Generation 10 - Current best internal CV score: -1316.2679104580218\n",
2002 | "\n",
2003 | "Best pipeline: ExtraTreesRegressor(ZeroCount(input_matrix), bootstrap=True, max_features=0.3, min_samples_leaf=11, min_samples_split=3, n_estimators=100)\n"
2004 | ]
2005 | },
2006 | {
2007 | "data": {
2008 | "text/plain": [
2009 | "TPOTRegressor(config_dict={'sklearn.linear_model.ElasticNetCV': {'l1_ratio': array([0. , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,\n",
2010 | " 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1. ]), 'tol': [1e-05, 0.0001, 0.001, 0.01, 0.1]}, 'sklearn.ensemble.ExtraTreesRegressor': {'n_estimato....3 , 0.35, 0.4 , 0.45, 0.5 , 0.55,\n",
2011 | " 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1. ])}}}},\n",
2012 | " crossover_rate=0.1, cv=3, disable_update_check=False,\n",
2013 | " early_stop=None, generations=10, max_eval_time_mins=5,\n",
2014 | " max_time_mins=None, memory=None, mutation_rate=0.9, n_jobs=1,\n",
2015 | " offspring_size=50, periodic_checkpoint_folder=None,\n",
2016 | " population_size=50, random_state=None, scoring=None, subsample=1.0,\n",
2017 | " verbosity=2, warm_start=False)"
2018 | ]
2019 | },
2020 | "execution_count": 18,
2021 | "metadata": {},
2022 | "output_type": "execute_result"
2023 | }
2024 | ],
2025 | "source": [
2026 | "tpot.fit(train_df, y_train)"
2027 | ]
2028 | },
2029 | {
2030 | "cell_type": "code",
2031 | "execution_count": 19,
2032 | "metadata": {},
2033 | "outputs": [
2034 | {
2035 | "data": {
2036 | "text/plain": [
2037 | "True"
2038 | ]
2039 | },
2040 | "execution_count": 19,
2041 | "metadata": {},
2042 | "output_type": "execute_result"
2043 | }
2044 | ],
2045 | "source": [
2046 | "tpot.export('rul_mse_tpot.py')"
2047 | ]
2048 | },
2049 | {
2050 | "cell_type": "code",
2051 | "execution_count": null,
2052 | "metadata": {},
2053 | "outputs": [],
2054 | "source": []
2055 | }
2056 | ],
2057 | "metadata": {
2058 | "kernelspec": {
2059 | "display_name": "Python 3",
2060 | "language": "python",
2061 | "name": "python3"
2062 | },
2063 | "language_info": {
2064 | "codemirror_mode": {
2065 | "name": "ipython",
2066 | "version": 3
2067 | },
2068 | "file_extension": ".py",
2069 | "mimetype": "text/x-python",
2070 | "name": "python",
2071 | "nbconvert_exporter": "python",
2072 | "pygments_lexer": "ipython3",
2073 | "version": "3.6.2"
2074 | }
2075 | },
2076 | "nbformat": 4,
2077 | "nbformat_minor": 2
2078 | }
2079 |
--------------------------------------------------------------------------------
/data1/PM_truth_01.txt:
--------------------------------------------------------------------------------
1 | 112
2 | 98
3 | 69
4 | 82
5 | 91
6 | 93
7 | 91
8 | 95
9 | 111
10 | 96
11 | 97
12 | 124
13 | 95
14 | 107
15 | 83
16 | 84
17 | 50
18 | 28
19 | 87
20 | 16
21 | 57
22 | 111
23 | 113
24 | 20
25 | 145
26 | 119
27 | 66
28 | 97
29 | 90
30 | 115
31 | 8
32 | 48
33 | 106
34 | 7
35 | 11
36 | 19
37 | 21
38 | 50
39 | 142
40 | 28
41 | 18
42 | 10
43 | 59
44 | 109
45 | 114
46 | 47
47 | 135
48 | 92
49 | 21
50 | 79
51 | 114
52 | 29
53 | 26
54 | 97
55 | 137
56 | 15
57 | 103
58 | 37
59 | 114
60 | 100
61 | 21
62 | 54
63 | 72
64 | 28
65 | 128
66 | 14
67 | 77
68 | 8
69 | 121
70 | 94
71 | 118
72 | 50
73 | 131
74 | 126
75 | 113
76 | 10
77 | 34
78 | 107
79 | 63
80 | 90
81 | 8
82 | 9
83 | 137
84 | 58
85 | 118
86 | 89
87 | 116
88 | 115
89 | 136
90 | 28
91 | 38
92 | 20
93 | 85
94 | 55
95 | 128
96 | 137
97 | 82
98 | 59
99 | 117
100 | 20
101 |
--------------------------------------------------------------------------------
/data1/predictive_binary_final.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data1/predictive_binary_final.h5
--------------------------------------------------------------------------------
/data1/predictive_regression_kalhman.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data1/predictive_regression_kalhman.h5
--------------------------------------------------------------------------------
/data2/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data2/.DS_Store
--------------------------------------------------------------------------------
/data2/PM_truth_02.txt:
--------------------------------------------------------------------------------
1 | 18
2 | 79
3 | 106
4 | 110
5 | 15
6 | 155
7 | 6
8 | 90
9 | 11
10 | 79
11 | 6
12 | 73
13 | 30
14 | 11
15 | 37
16 | 67
17 | 68
18 | 99
19 | 22
20 | 54
21 | 97
22 | 10
23 | 142
24 | 77
25 | 88
26 | 163
27 | 126
28 | 138
29 | 83
30 | 78
31 | 75
32 | 11
33 | 53
34 | 173
35 | 63
36 | 100
37 | 151
38 | 55
39 | 48
40 | 37
41 | 44
42 | 27
43 | 18
44 | 6
45 | 15
46 | 112
47 | 131
48 | 13
49 | 122
50 | 13
51 | 98
52 | 53
53 | 52
54 | 106
55 | 103
56 | 152
57 | 123
58 | 26
59 | 178
60 | 73
61 | 169
62 | 39
63 | 39
64 | 14
65 | 11
66 | 121
67 | 86
68 | 56
69 | 115
70 | 17
71 | 148
72 | 104
73 | 78
74 | 86
75 | 98
76 | 36
77 | 94
78 | 52
79 | 91
80 | 15
81 | 141
82 | 74
83 | 146
84 | 17
85 | 47
86 | 194
87 | 21
88 | 79
89 | 97
90 | 8
91 | 9
92 | 73
93 | 183
94 | 97
95 | 73
96 | 49
97 | 31
98 | 97
99 | 9
100 | 14
101 | 106
102 | 8
103 | 8
104 | 106
105 | 116
106 | 120
107 | 61
108 | 168
109 | 35
110 | 80
111 | 9
112 | 50
113 | 151
114 | 78
115 | 91
116 | 7
117 | 181
118 | 150
119 | 106
120 | 15
121 | 67
122 | 145
123 | 180
124 | 7
125 | 179
126 | 124
127 | 82
128 | 108
129 | 79
130 | 121
131 | 120
132 | 39
133 | 38
134 | 9
135 | 167
136 | 87
137 | 88
138 | 7
139 | 51
140 | 55
141 | 155
142 | 47
143 | 81
144 | 43
145 | 98
146 | 10
147 | 92
148 | 11
149 | 165
150 | 34
151 | 115
152 | 59
153 | 99
154 | 103
155 | 108
156 | 83
157 | 171
158 | 15
159 | 9
160 | 42
161 | 13
162 | 41
163 | 88
164 | 14
165 | 155
166 | 188
167 | 96
168 | 82
169 | 135
170 | 182
171 | 36
172 | 107
173 | 14
174 | 95
175 | 142
176 | 23
177 | 6
178 | 144
179 | 35
180 | 97
181 | 68
182 | 14
183 | 67
184 | 191
185 | 19
186 | 10
187 | 158
188 | 183
189 | 43
190 | 12
191 | 148
192 | 13
193 | 37
194 | 122
195 | 80
196 | 93
197 | 132
198 | 32
199 | 103
200 | 174
201 | 111
202 | 68
203 | 192
204 | 121
205 | 134
206 | 48
207 | 85
208 | 8
209 | 23
210 | 8
211 | 6
212 | 57
213 | 83
214 | 172
215 | 101
216 | 81
217 | 86
218 | 165
219 | 73
220 | 121
221 | 139
222 | 75
223 | 151
224 | 145
225 | 11
226 | 108
227 | 14
228 | 126
229 | 61
230 | 85
231 | 8
232 | 101
233 | 153
234 | 89
235 | 190
236 | 12
237 | 62
238 | 134
239 | 101
240 | 121
241 | 167
242 | 17
243 | 161
244 | 181
245 | 16
246 | 152
247 | 148
248 | 56
249 | 111
250 | 23
251 | 84
252 | 12
253 | 43
254 | 48
255 | 122
256 | 191
257 | 56
258 | 131
259 | 51
260 |
--------------------------------------------------------------------------------
/data2/predictive_binary_final.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data2/predictive_binary_final.h5
--------------------------------------------------------------------------------
/data2/predictive_regression_kalhman.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data2/predictive_regression_kalhman.h5
--------------------------------------------------------------------------------
/data3/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data3/.DS_Store
--------------------------------------------------------------------------------
/data3/.ipynb_checkpoints/RUL-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
13 | " from ._conv import register_converters as _register_converters\n",
14 | "Using TensorFlow backend.\n"
15 | ]
16 | }
17 | ],
18 | "source": [
19 | "import keras"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "import pandas as pd\n",
29 | "import numpy as np\n",
30 | "import matplotlib.pyplot as plt\n",
31 | "\n",
32 | "# Setting seed for reproducability\n",
33 | "np.random.seed(1234) \n",
34 | "PYTHONHASHSEED = 0\n",
35 | "from sklearn import preprocessing\n",
36 | "from sklearn.metrics import confusion_matrix, recall_score, precision_score\n",
37 | "from keras.models import Sequential\n",
38 | "from keras.layers import Dense, Dropout, LSTM, Activation\n",
39 | "%matplotlib inline"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 3,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "from pykalman import KalmanFilter"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 4,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "# read training data \n",
58 | "train_df = pd.read_csv('PM_train_03.txt', sep=\" \", header=None)\n",
59 | "train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)\n",
60 | "train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
61 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
62 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 5,
68 | "metadata": {},
69 | "outputs": [
70 | {
71 | "data": {
72 | "text/html": [
73 | "\n",
74 | "\n",
87 | "
\n",
88 | " \n",
89 | " \n",
90 | " | \n",
91 | " id | \n",
92 | " cycle | \n",
93 | " setting1 | \n",
94 | " setting2 | \n",
95 | " setting3 | \n",
96 | " s1 | \n",
97 | " s2 | \n",
98 | " s3 | \n",
99 | " s4 | \n",
100 | " s5 | \n",
101 | " ... | \n",
102 | " s12 | \n",
103 | " s13 | \n",
104 | " s14 | \n",
105 | " s15 | \n",
106 | " s16 | \n",
107 | " s17 | \n",
108 | " s18 | \n",
109 | " s19 | \n",
110 | " s20 | \n",
111 | " s21 | \n",
112 | "
\n",
113 | " \n",
114 | " \n",
115 | " \n",
116 | " 0 | \n",
117 | " 1 | \n",
118 | " 1 | \n",
119 | " -0.0005 | \n",
120 | " 0.0004 | \n",
121 | " 100.0 | \n",
122 | " 518.67 | \n",
123 | " 642.36 | \n",
124 | " 1583.23 | \n",
125 | " 1396.84 | \n",
126 | " 14.62 | \n",
127 | " ... | \n",
128 | " 522.31 | \n",
129 | " 2388.01 | \n",
130 | " 8145.32 | \n",
131 | " 8.4246 | \n",
132 | " 0.03 | \n",
133 | " 391 | \n",
134 | " 2388 | \n",
135 | " 100.0 | \n",
136 | " 39.11 | \n",
137 | " 23.3537 | \n",
138 | "
\n",
139 | " \n",
140 | " 1 | \n",
141 | " 1 | \n",
142 | " 2 | \n",
143 | " 0.0008 | \n",
144 | " -0.0003 | \n",
145 | " 100.0 | \n",
146 | " 518.67 | \n",
147 | " 642.50 | \n",
148 | " 1584.69 | \n",
149 | " 1396.89 | \n",
150 | " 14.62 | \n",
151 | " ... | \n",
152 | " 522.42 | \n",
153 | " 2388.03 | \n",
154 | " 8152.85 | \n",
155 | " 8.4403 | \n",
156 | " 0.03 | \n",
157 | " 392 | \n",
158 | " 2388 | \n",
159 | " 100.0 | \n",
160 | " 38.99 | \n",
161 | " 23.4491 | \n",
162 | "
\n",
163 | " \n",
164 | " 2 | \n",
165 | " 1 | \n",
166 | " 3 | \n",
167 | " -0.0014 | \n",
168 | " -0.0002 | \n",
169 | " 100.0 | \n",
170 | " 518.67 | \n",
171 | " 642.18 | \n",
172 | " 1582.35 | \n",
173 | " 1405.61 | \n",
174 | " 14.62 | \n",
175 | " ... | \n",
176 | " 522.03 | \n",
177 | " 2388.00 | \n",
178 | " 8150.17 | \n",
179 | " 8.3901 | \n",
180 | " 0.03 | \n",
181 | " 391 | \n",
182 | " 2388 | \n",
183 | " 100.0 | \n",
184 | " 38.85 | \n",
185 | " 23.3669 | \n",
186 | "
\n",
187 | " \n",
188 | " 3 | \n",
189 | " 1 | \n",
190 | " 4 | \n",
191 | " -0.0020 | \n",
192 | " 0.0001 | \n",
193 | " 100.0 | \n",
194 | " 518.67 | \n",
195 | " 642.92 | \n",
196 | " 1585.61 | \n",
197 | " 1392.27 | \n",
198 | " 14.62 | \n",
199 | " ... | \n",
200 | " 522.49 | \n",
201 | " 2388.08 | \n",
202 | " 8146.56 | \n",
203 | " 8.3878 | \n",
204 | " 0.03 | \n",
205 | " 392 | \n",
206 | " 2388 | \n",
207 | " 100.0 | \n",
208 | " 38.96 | \n",
209 | " 23.2951 | \n",
210 | "
\n",
211 | " \n",
212 | " 4 | \n",
213 | " 1 | \n",
214 | " 5 | \n",
215 | " 0.0016 | \n",
216 | " 0.0000 | \n",
217 | " 100.0 | \n",
218 | " 518.67 | \n",
219 | " 641.68 | \n",
220 | " 1588.63 | \n",
221 | " 1397.65 | \n",
222 | " 14.62 | \n",
223 | " ... | \n",
224 | " 522.58 | \n",
225 | " 2388.03 | \n",
226 | " 8147.80 | \n",
227 | " 8.3869 | \n",
228 | " 0.03 | \n",
229 | " 392 | \n",
230 | " 2388 | \n",
231 | " 100.0 | \n",
232 | " 39.14 | \n",
233 | " 23.4583 | \n",
234 | "
\n",
235 | " \n",
236 | "
\n",
237 | "
5 rows × 26 columns
\n",
238 | "
"
239 | ],
240 | "text/plain": [
241 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
242 | "0 1 1 -0.0005 0.0004 100.0 518.67 642.36 1583.23 1396.84 \n",
243 | "1 1 2 0.0008 -0.0003 100.0 518.67 642.50 1584.69 1396.89 \n",
244 | "2 1 3 -0.0014 -0.0002 100.0 518.67 642.18 1582.35 1405.61 \n",
245 | "3 1 4 -0.0020 0.0001 100.0 518.67 642.92 1585.61 1392.27 \n",
246 | "4 1 5 0.0016 0.0000 100.0 518.67 641.68 1588.63 1397.65 \n",
247 | "\n",
248 | " s5 ... s12 s13 s14 s15 s16 s17 s18 s19 \\\n",
249 | "0 14.62 ... 522.31 2388.01 8145.32 8.4246 0.03 391 2388 100.0 \n",
250 | "1 14.62 ... 522.42 2388.03 8152.85 8.4403 0.03 392 2388 100.0 \n",
251 | "2 14.62 ... 522.03 2388.00 8150.17 8.3901 0.03 391 2388 100.0 \n",
252 | "3 14.62 ... 522.49 2388.08 8146.56 8.3878 0.03 392 2388 100.0 \n",
253 | "4 14.62 ... 522.58 2388.03 8147.80 8.3869 0.03 392 2388 100.0 \n",
254 | "\n",
255 | " s20 s21 \n",
256 | "0 39.11 23.3537 \n",
257 | "1 38.99 23.4491 \n",
258 | "2 38.85 23.3669 \n",
259 | "3 38.96 23.2951 \n",
260 | "4 39.14 23.4583 \n",
261 | "\n",
262 | "[5 rows x 26 columns]"
263 | ]
264 | },
265 | "execution_count": 5,
266 | "metadata": {},
267 | "output_type": "execute_result"
268 | }
269 | ],
270 | "source": [
271 | "train_df.head()"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 6,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "train_df.columns\n",
281 | "cols = ['setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
282 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
283 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": 7,
289 | "metadata": {},
290 | "outputs": [
291 | {
292 | "data": {
293 | "text/plain": [
294 | "'setting1'"
295 | ]
296 | },
297 | "execution_count": 7,
298 | "metadata": {},
299 | "output_type": "execute_result"
300 | }
301 | ],
302 | "source": [
303 | "train_df.columns[2]"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": 8,
309 | "metadata": {},
310 | "outputs": [
311 | {
312 | "name": "stdout",
313 | "output_type": "stream",
314 | "text": [
315 | "setting1\n"
316 | ]
317 | },
318 | {
319 | "name": "stderr",
320 | "output_type": "stream",
321 | "text": [
322 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/scipy/linalg/basic.py:1226: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n",
323 | " warnings.warn(mesg, RuntimeWarning)\n"
324 | ]
325 | },
326 | {
327 | "name": "stdout",
328 | "output_type": "stream",
329 | "text": [
330 | "setting2\n",
331 | "setting3\n",
332 | "s1\n",
333 | "s2\n",
334 | "s3\n",
335 | "s4\n",
336 | "s5\n",
337 | "s6\n",
338 | "s7\n",
339 | "s8\n",
340 | "s9\n",
341 | "s10\n",
342 | "s11\n",
343 | "s12\n",
344 | "s13\n",
345 | "s14\n",
346 | "s15\n",
347 | "s16\n",
348 | "s17\n",
349 | "s18\n",
350 | "s19\n",
351 | "s20\n",
352 | "s21\n"
353 | ]
354 | }
355 | ],
356 | "source": [
357 | "for cols in train_df.columns:\n",
358 | " if cols == 'id':\n",
359 | " continue;\n",
360 | " if cols == 'cycle':\n",
361 | " continue;\n",
362 | " else:\n",
363 | " print(cols)\n",
364 | " kf = KalmanFilter(transition_matrices = [1],\n",
365 | " observation_matrices = [1],\n",
366 | " initial_state_mean = train_df[cols].values[0],\n",
367 | " initial_state_covariance = 1,\n",
368 | " observation_covariance=1,\n",
369 | " transition_covariance=.01)\n",
370 | " state_means,_ = kf.filter(train_df[cols].values)\n",
371 | " train_df[cols] = state_means.flatten()"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": null,
377 | "metadata": {},
378 | "outputs": [],
379 | "source": [
380 | "# read test data\n",
381 | "test_df = pd.read_csv('PM_test_03.txt', sep=\" \", header=None)\n",
382 | "test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)\n",
383 | "test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
384 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
385 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": null,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "name": "stdout",
395 | "output_type": "stream",
396 | "text": [
397 | "setting1\n",
398 | "setting2\n",
399 | "setting3\n",
400 | "s1\n",
401 | "s2\n",
402 | "s3\n",
403 | "s4\n",
404 | "s5\n",
405 | "s6\n"
406 | ]
407 | }
408 | ],
409 | "source": [
410 | "for cols in test_df.columns:\n",
411 | " if cols == 'id':\n",
412 | " continue;\n",
413 | " if cols == 'cycle':\n",
414 | " continue;\n",
415 | " else:\n",
416 | " print(cols)\n",
417 | " kf = KalmanFilter(transition_matrices = [1],\n",
418 | " observation_matrices = [1],\n",
419 | " initial_state_mean = test_df[cols].values[0],\n",
420 | " initial_state_covariance = 1,\n",
421 | " observation_covariance=1,\n",
422 | " transition_covariance=.01)\n",
423 | " state_means,_ = kf.filter(test_df[cols].values)\n",
424 | " test_df[cols] = state_means.flatten()"
425 | ]
426 | },
427 | {
428 | "cell_type": "code",
429 | "execution_count": null,
430 | "metadata": {},
431 | "outputs": [],
432 | "source": [
433 | "# read ground truth data\n",
434 | "truth_df = pd.read_csv('PM_truth_03.txt', sep=\" \", header=None)\n",
435 | "truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)"
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": null,
441 | "metadata": {},
442 | "outputs": [],
443 | "source": [
444 | "train_df = train_df.sort_values(['id','cycle'])\n",
445 | "train_df.head()"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": null,
451 | "metadata": {},
452 | "outputs": [],
453 | "source": [
454 | "# Data Labeling - generate column RUL\n",
455 | "rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()\n",
456 | "rul.columns = ['id', 'max']\n",
457 | "train_df = train_df.merge(rul, on=['id'], how='left')\n",
458 | "train_df['RUL'] = train_df['max'] - train_df['cycle']\n",
459 | "train_df.drop('max', axis=1, inplace=True)\n",
460 | "train_df.head()"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": null,
466 | "metadata": {},
467 | "outputs": [],
468 | "source": [
469 | "# generate label columns for training data\n",
470 | "w1 = 30\n",
471 | "w0 = 15\n",
472 | "train_df['label1'] = np.where(train_df['RUL'] <= w1, 1, 0 )\n",
473 | "train_df['label2'] = train_df['label1']\n",
474 | "train_df.loc[train_df['RUL'] <= w0, 'label2'] = 2\n",
475 | "train_df.head()"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": null,
481 | "metadata": {},
482 | "outputs": [],
483 | "source": [
484 | "# MinMax normalization\n",
485 | "train_df['cycle_norm'] = train_df['cycle']\n",
486 | "cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])\n",
487 | "min_max_scaler = preprocessing.MinMaxScaler()\n",
488 | "norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), \n",
489 | " columns=cols_normalize, \n",
490 | " index=train_df.index)\n",
491 | "join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)\n",
492 | "train_df = join_df.reindex(columns = train_df.columns)\n",
493 | "train_df.head()"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": null,
499 | "metadata": {},
500 | "outputs": [],
501 | "source": [
502 | "test_df['cycle_norm'] = test_df['cycle']\n",
503 | "norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), \n",
504 | " columns=cols_normalize, \n",
505 | " index=test_df.index)\n",
506 | "test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)\n",
507 | "test_df = test_join_df.reindex(columns = test_df.columns)\n",
508 | "test_df = test_df.reset_index(drop=True)\n",
509 | "test_df.head()"
510 | ]
511 | },
512 | {
513 | "cell_type": "code",
514 | "execution_count": null,
515 | "metadata": {},
516 | "outputs": [],
517 | "source": [
518 | "# generate column max for test data\n",
519 | "rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()\n",
520 | "rul.columns = ['id', 'max']\n",
521 | "truth_df.columns = ['more']\n",
522 | "truth_df['id'] = truth_df.index + 1\n",
523 | "truth_df['max'] = rul['max'] + truth_df['more']\n",
524 | "truth_df.drop('more', axis=1, inplace=True)"
525 | ]
526 | },
527 | {
528 | "cell_type": "code",
529 | "execution_count": null,
530 | "metadata": {},
531 | "outputs": [],
532 | "source": [
533 | "# generate RUL for test data\n",
534 | "test_df = test_df.merge(truth_df, on=['id'], how='left')\n",
535 | "test_df['RUL'] = test_df['max'] - test_df['cycle']\n",
536 | "test_df.drop('max', axis=1, inplace=True)\n",
537 | "test_df.head()"
538 | ]
539 | },
540 | {
541 | "cell_type": "code",
542 | "execution_count": null,
543 | "metadata": {},
544 | "outputs": [],
545 | "source": [
546 | "# generate label columns w0 and w1 for test data\n",
547 | "test_df['label1'] = np.where(test_df['RUL'] <= w1, 1, 0 )\n",
548 | "test_df['label2'] = test_df['label1']\n",
549 | "test_df.loc[test_df['RUL'] <= w0, 'label2'] = 2\n",
550 | "test_df.head()"
551 | ]
552 | },
553 | {
554 | "cell_type": "code",
555 | "execution_count": null,
556 | "metadata": {},
557 | "outputs": [],
558 | "source": [
559 | "train_df.drop(['label1','label2'],axis=1,inplace=True)\n",
560 | "test_df.drop(['label1','label2'],axis=1,inplace=True)"
561 | ]
562 | },
563 | {
564 | "cell_type": "code",
565 | "execution_count": null,
566 | "metadata": {},
567 | "outputs": [],
568 | "source": [
569 | "# pick a large window size of 50 cycles\n",
570 | "sequence_length = 50"
571 | ]
572 | },
573 | {
574 | "cell_type": "code",
575 | "execution_count": null,
576 | "metadata": {},
577 | "outputs": [],
578 | "source": [
579 | "# preparing data for visualizations \n",
580 | "# window of 50 cycles prior to a failure point for engine id 3\n",
581 | "engine_id3 = test_df[test_df['id'] == 3]\n",
582 | "engine_id3_50cycleWindow = engine_id3[engine_id3['RUL'] <= engine_id3['RUL'].min() + 50]\n",
583 | "cols1 = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10']\n",
584 | "engine_id3_50cycleWindow1 = engine_id3_50cycleWindow[cols1]\n",
585 | "cols2 = ['s11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']\n",
586 | "engine_id3_50cycleWindow2 = engine_id3_50cycleWindow[cols2]"
587 | ]
588 | },
589 | {
590 | "cell_type": "code",
591 | "execution_count": null,
592 | "metadata": {},
593 | "outputs": [],
594 | "source": [
595 | "# plotting sensor data for engine ID 3 prior to a failure point - sensors 1-10 \n",
596 | "ax1 = engine_id3_50cycleWindow1.plot(subplots=True, sharex=True, figsize=(20,20))"
597 | ]
598 | },
599 | {
600 | "cell_type": "code",
601 | "execution_count": null,
602 | "metadata": {},
603 | "outputs": [],
604 | "source": [
605 | "# plotting sensor data for engine ID 3 prior to a failure point - sensors 11-21 \n",
606 | "ax2 = engine_id3_50cycleWindow2.plot(subplots=True, sharex=True, figsize=(20,20))"
607 | ]
608 | },
609 | {
610 | "cell_type": "code",
611 | "execution_count": null,
612 | "metadata": {},
613 | "outputs": [],
614 | "source": [
615 | "# function to reshape features into (samples, time steps, features) \n",
616 | "def gen_sequence(id_df, seq_length, seq_cols):\n",
617 | " \"\"\" Only sequences that meet the window-length are considered, no padding is used. This means for testing\n",
618 | " we need to drop those which are below the window-length. An alternative would be to pad sequences so that\n",
619 | " we can use shorter ones \"\"\"\n",
620 | " data_array = id_df[seq_cols].values\n",
621 | " num_elements = data_array.shape[0]\n",
622 | " for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):\n",
623 | " yield data_array[start:stop, :]"
624 | ]
625 | },
626 | {
627 | "cell_type": "code",
628 | "execution_count": null,
629 | "metadata": {},
630 | "outputs": [],
631 | "source": [
632 | "# pick the feature columns \n",
633 | "sensor_cols = ['s' + str(i) for i in range(1,22)]\n",
634 | "sequence_cols = ['setting1', 'setting2', 'setting3', 'cycle_norm']\n",
635 | "sequence_cols.extend(sensor_cols)"
636 | ]
637 | },
638 | {
639 | "cell_type": "code",
640 | "execution_count": null,
641 | "metadata": {},
642 | "outputs": [],
643 | "source": [
644 | "# generator for the sequences\n",
645 | "seq_gen = (list(gen_sequence(train_df[train_df['id']==id], sequence_length, sequence_cols)) \n",
646 | " for id in train_df['id'].unique())"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": null,
652 | "metadata": {},
653 | "outputs": [],
654 | "source": [
655 | "# generate sequences and convert to numpy array\n",
656 | "seq_array = np.concatenate(list(seq_gen)).astype(np.float32)\n",
657 | "seq_array.shape"
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": null,
663 | "metadata": {},
664 | "outputs": [],
665 | "source": [
666 | "# function to generate labels\n",
667 | "def gen_labels(id_df, seq_length, label):\n",
668 | " data_array = id_df[label].values\n",
669 | " num_elements = data_array.shape[0]\n",
670 | " return data_array[seq_length:num_elements, :]"
671 | ]
672 | },
673 | {
674 | "cell_type": "code",
675 | "execution_count": null,
676 | "metadata": {},
677 | "outputs": [],
678 | "source": [
679 | "# generate labels\n",
680 | "label_gen = [gen_labels(train_df[train_df['id']==id], sequence_length, ['RUL']) \n",
681 | " for id in train_df['id'].unique()]\n",
682 | "label_array = np.concatenate(label_gen).astype(np.float32)\n",
683 | "label_array.shape"
684 | ]
685 | },
686 | {
687 | "cell_type": "code",
688 | "execution_count": null,
689 | "metadata": {},
690 | "outputs": [],
691 | "source": [
692 | "from keras.layers import Bidirectional"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": null,
698 | "metadata": {},
699 | "outputs": [],
700 | "source": [
701 | "# build the network\n",
702 | "nb_features = seq_array.shape[2]\n",
703 | "nb_out = label_array.shape[1]\n",
704 | "\n",
705 | "model = Sequential()\n",
706 | "# model.add(Bidirectional(LSTM(\n",
707 | "# units=100,\n",
708 | "# return_sequences=True),\n",
709 | "# input_shape=(sequence_length, nb_features)))\n",
710 | "model.add(LSTM(\n",
711 | " units=100,\n",
712 | " return_sequences=True,\n",
713 | " input_shape=(sequence_length, nb_features)))\n",
714 | "model.add(Dropout(0.4))\n",
715 | "\n",
716 | "model.add(LSTM(\n",
717 | " units=100,\n",
718 | " return_sequences=False))\n",
719 | "model.add(Dropout(0.4))\n",
720 | "\n",
721 | "# model.add(Dense(units=100,activation='relu'))\n",
722 | "# model.add(Dropout(0.2))\n",
723 | "# model.add(Dense(units=100,activation='relu'))\n",
724 | "# model.add(Dropout(0.2))\n",
725 | "# model.add(Dense(units=100,activation='relu'))\n",
726 | "# model.add(Dropout(0.2))\n",
727 | "model.add(Dense(units=1, activation='relu'))\n",
728 | "model.add(Activation(\"relu\"))\n",
729 | "model.compile(loss=\"mse\", optimizer=\"rmsprop\", metrics=['mse'])"
730 | ]
731 | },
732 | {
733 | "cell_type": "code",
734 | "execution_count": null,
735 | "metadata": {},
736 | "outputs": [],
737 | "source": [
738 | "print(model.summary())"
739 | ]
740 | },
741 | {
742 | "cell_type": "code",
743 | "execution_count": null,
744 | "metadata": {},
745 | "outputs": [],
746 | "source": [
747 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
748 | "\n",
749 | "STAMP = 'predictive_regression_kalhman'\n",
750 | "print(STAMP)\n",
751 | "\n",
752 | "early_stopping =EarlyStopping(monitor='val_loss', patience=10)\n",
753 | "bst_model_path = STAMP + '.h5'\n",
754 | "model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)"
755 | ]
756 | },
757 | {
758 | "cell_type": "code",
759 | "execution_count": null,
760 | "metadata": {
761 | "scrolled": false
762 | },
763 | "outputs": [],
764 | "source": [
765 | "%%time\n",
766 | "# fit the network\n",
767 | "hist = model.fit(seq_array, label_array, epochs=1000, batch_size=200, validation_split=0.05, verbose=1,callbacks=[early_stopping, model_checkpoint])"
768 | ]
769 | },
770 | {
771 | "cell_type": "code",
772 | "execution_count": null,
773 | "metadata": {},
774 | "outputs": [],
775 | "source": [
776 | "import matplotlib.pyplot as plt\n",
777 | "\n",
778 | "#plots\n",
779 | "# list all data in history\n",
780 | "print(hist.history.keys())\n",
781 | "# summarize history for accuracy\n",
782 | "plt.plot(hist.history['mean_squared_error'])\n",
783 | "plt.plot(hist.history['val_mean_squared_error'])\n",
784 | "plt.title('mse')\n",
785 | "plt.ylabel('mean_squared_error')\n",
786 | "plt.xlabel('epoch')\n",
787 | "plt.legend(['train', 'test'], loc='upper left')\n",
788 | "plt.show()\n",
789 | "# summarize history for loss\n",
790 | "plt.plot(hist.history['loss'])\n",
791 | "plt.plot(hist.history['val_loss'])\n",
792 | "plt.title('model loss')\n",
793 | "plt.ylabel('loss')\n",
794 | "plt.xlabel('epoch')\n",
795 | "plt.legend(['train', 'test'], loc='upper left')\n",
796 | "plt.show()"
797 | ]
798 | },
799 | {
800 | "cell_type": "code",
801 | "execution_count": null,
802 | "metadata": {},
803 | "outputs": [],
804 | "source": [
805 | "model.load_weights(bst_model_path)"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": null,
811 | "metadata": {},
812 | "outputs": [],
813 | "source": [
814 | "# training metrics\n",
815 | "scores = model.evaluate(seq_array, label_array, verbose=1, batch_size=200)\n",
816 | "print('Score: {}'.format(scores[1]))"
817 | ]
818 | },
819 | {
820 | "cell_type": "code",
821 | "execution_count": null,
822 | "metadata": {},
823 | "outputs": [],
824 | "source": [
825 | "seq_array_test_last = [test_df[test_df['id']==id][sequence_cols].values[-sequence_length:] \n",
826 | " for id in test_df['id'].unique() if len(test_df[test_df['id']==id]) >= sequence_length]\n",
827 | "\n",
828 | "seq_array_test_last = np.asarray(seq_array_test_last).astype(np.float32)\n",
829 | "seq_array_test_last.shape"
830 | ]
831 | },
832 | {
833 | "cell_type": "code",
834 | "execution_count": null,
835 | "metadata": {},
836 | "outputs": [],
837 | "source": [
838 | "y_mask = [len(test_df[test_df['id']==id]) >= sequence_length for id in test_df['id'].unique()]"
839 | ]
840 | },
841 | {
842 | "cell_type": "code",
843 | "execution_count": null,
844 | "metadata": {},
845 | "outputs": [],
846 | "source": [
847 | "label_array_test_last = test_df.groupby('id')['RUL'].nth(-1)[y_mask].values\n",
848 | "label_array_test_last = label_array_test_last.reshape(label_array_test_last.shape[0],1).astype(np.float32)\n",
849 | "label_array_test_last.shape"
850 | ]
851 | },
852 | {
853 | "cell_type": "code",
854 | "execution_count": null,
855 | "metadata": {},
856 | "outputs": [],
857 | "source": [
858 | "print(seq_array_test_last.shape)\n",
859 | "print(label_array_test_last.shape)"
860 | ]
861 | },
862 | {
863 | "cell_type": "code",
864 | "execution_count": null,
865 | "metadata": {},
866 | "outputs": [],
867 | "source": [
868 | "# test metrics\n",
869 | "import math\n",
870 | "scores_test = model.evaluate(seq_array_test_last, label_array_test_last, verbose=2)\n",
871 | "print('Test Score: %.2f MSE (%.2f RMSE)' % (scores_test[0], math.sqrt(scores_test[0])))"
872 | ]
873 | },
874 | {
875 | "cell_type": "code",
876 | "execution_count": null,
877 | "metadata": {},
878 | "outputs": [],
879 | "source": [
880 | "pred = model.predict(seq_array_test_last)\n",
881 | "pred"
882 | ]
883 | },
884 | {
885 | "cell_type": "code",
886 | "execution_count": null,
887 | "metadata": {},
888 | "outputs": [],
889 | "source": [
890 | "diff = []\n",
891 | "ratio = []\n",
892 | "pred = model.predict(seq_array_test_last)\n",
893 | "for u in range(len(label_array_test_last)):\n",
894 | " pr = pred[u][0]\n",
895 | " ratio.append((label_array_test_last[u] / pr) - 1)\n",
896 | " diff.append(abs(label_array_test_last[u] - pr))"
897 | ]
898 | },
899 | {
900 | "cell_type": "code",
901 | "execution_count": null,
902 | "metadata": {},
903 | "outputs": [],
904 | "source": [
905 | "import matplotlib.pyplot as plt2\n",
906 | "\n",
907 | "plt2.plot(pred, color='red', label='Prediction')\n",
908 | "plt2.plot(label_array_test_last, color='blue', label='Ground Truth')\n",
909 | "plt2.legend(loc='upper left')\n",
910 | "plt2.show()"
911 | ]
912 | },
913 | {
914 | "cell_type": "code",
915 | "execution_count": null,
916 | "metadata": {},
917 | "outputs": [],
918 | "source": [
919 | "error = pd.DataFrame((label_array_test_last - pred),columns=['error'])\n",
920 | "error.describe()"
921 | ]
922 | },
923 | {
924 | "cell_type": "code",
925 | "execution_count": null,
926 | "metadata": {},
927 | "outputs": [],
928 | "source": [
929 | "pred_cutoff = pd.DataFrame(pred,columns=['rul'])\n",
930 | "pred_cutoff.loc[pred_cutoff['rul'] > 140,'rul'] = 140\n",
931 | "pred_cutoff.head()"
932 | ]
933 | },
934 | {
935 | "cell_type": "code",
936 | "execution_count": null,
937 | "metadata": {},
938 | "outputs": [],
939 | "source": [
940 | "from sklearn.metrics import mean_squared_error\n",
941 | "\n",
942 | "cutoffscore = mean_squared_error(label_array_test_last,pred_cutoff)\n",
943 | "print('Test Score: %.2f MSE (%.2f RMSE)' % (cutoffscore, math.sqrt(cutoffscore)))"
944 | ]
945 | },
946 | {
947 | "cell_type": "code",
948 | "execution_count": null,
949 | "metadata": {},
950 | "outputs": [],
951 | "source": []
952 | }
953 | ],
954 | "metadata": {
955 | "kernelspec": {
956 | "display_name": "Python 3",
957 | "language": "python",
958 | "name": "python3"
959 | },
960 | "language_info": {
961 | "codemirror_mode": {
962 | "name": "ipython",
963 | "version": 3
964 | },
965 | "file_extension": ".py",
966 | "mimetype": "text/x-python",
967 | "name": "python",
968 | "nbconvert_exporter": "python",
969 | "pygments_lexer": "ipython3",
970 | "version": "3.6.2"
971 | }
972 | },
973 | "nbformat": 4,
974 | "nbformat_minor": 2
975 | }
976 |
--------------------------------------------------------------------------------
/data3/PM_truth_03.txt:
--------------------------------------------------------------------------------
1 | 44
2 | 51
3 | 27
4 | 120
5 | 101
6 | 99
7 | 71
8 | 55
9 | 55
10 | 66
11 | 77
12 | 115
13 | 115
14 | 31
15 | 108
16 | 56
17 | 136
18 | 132
19 | 85
20 | 56
21 | 18
22 | 119
23 | 78
24 | 9
25 | 58
26 | 11
27 | 88
28 | 144
29 | 124
30 | 89
31 | 79
32 | 55
33 | 71
34 | 65
35 | 87
36 | 137
37 | 145
38 | 22
39 | 8
40 | 41
41 | 131
42 | 115
43 | 128
44 | 69
45 | 111
46 | 7
47 | 137
48 | 55
49 | 135
50 | 11
51 | 78
52 | 120
53 | 87
54 | 87
55 | 55
56 | 93
57 | 88
58 | 40
59 | 49
60 | 128
61 | 129
62 | 58
63 | 117
64 | 28
65 | 115
66 | 87
67 | 92
68 | 103
69 | 100
70 | 63
71 | 35
72 | 45
73 | 99
74 | 117
75 | 45
76 | 27
77 | 86
78 | 20
79 | 18
80 | 133
81 | 15
82 | 6
83 | 145
84 | 104
85 | 56
86 | 25
87 | 68
88 | 144
89 | 41
90 | 51
91 | 81
92 | 14
93 | 67
94 | 10
95 | 127
96 | 113
97 | 123
98 | 17
99 | 8
100 | 28
101 |
--------------------------------------------------------------------------------
/data3/RUL.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
13 | " from ._conv import register_converters as _register_converters\n",
14 | "Using TensorFlow backend.\n"
15 | ]
16 | }
17 | ],
18 | "source": [
19 | "import keras"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "import pandas as pd\n",
29 | "import numpy as np\n",
30 | "import matplotlib.pyplot as plt\n",
31 | "\n",
32 | "# Setting seed for reproducability\n",
33 | "np.random.seed(1234) \n",
34 | "PYTHONHASHSEED = 0\n",
35 | "from sklearn import preprocessing\n",
36 | "from sklearn.metrics import confusion_matrix, recall_score, precision_score\n",
37 | "from keras.models import Sequential\n",
38 | "from keras.layers import Dense, Dropout, LSTM, Activation\n",
39 | "%matplotlib inline"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 3,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "from pykalman import KalmanFilter"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 4,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "# read training data \n",
58 | "train_df = pd.read_csv('PM_train_03.txt', sep=\" \", header=None)\n",
59 | "train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)\n",
60 | "train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
61 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
62 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 5,
68 | "metadata": {},
69 | "outputs": [
70 | {
71 | "data": {
72 | "text/html": [
73 | "\n",
74 | "\n",
87 | "
\n",
88 | " \n",
89 | " \n",
90 | " | \n",
91 | " id | \n",
92 | " cycle | \n",
93 | " setting1 | \n",
94 | " setting2 | \n",
95 | " setting3 | \n",
96 | " s1 | \n",
97 | " s2 | \n",
98 | " s3 | \n",
99 | " s4 | \n",
100 | " s5 | \n",
101 | " ... | \n",
102 | " s12 | \n",
103 | " s13 | \n",
104 | " s14 | \n",
105 | " s15 | \n",
106 | " s16 | \n",
107 | " s17 | \n",
108 | " s18 | \n",
109 | " s19 | \n",
110 | " s20 | \n",
111 | " s21 | \n",
112 | "
\n",
113 | " \n",
114 | " \n",
115 | " \n",
116 | " 0 | \n",
117 | " 1 | \n",
118 | " 1 | \n",
119 | " -0.0005 | \n",
120 | " 0.0004 | \n",
121 | " 100.0 | \n",
122 | " 518.67 | \n",
123 | " 642.36 | \n",
124 | " 1583.23 | \n",
125 | " 1396.84 | \n",
126 | " 14.62 | \n",
127 | " ... | \n",
128 | " 522.31 | \n",
129 | " 2388.01 | \n",
130 | " 8145.32 | \n",
131 | " 8.4246 | \n",
132 | " 0.03 | \n",
133 | " 391 | \n",
134 | " 2388 | \n",
135 | " 100.0 | \n",
136 | " 39.11 | \n",
137 | " 23.3537 | \n",
138 | "
\n",
139 | " \n",
140 | " 1 | \n",
141 | " 1 | \n",
142 | " 2 | \n",
143 | " 0.0008 | \n",
144 | " -0.0003 | \n",
145 | " 100.0 | \n",
146 | " 518.67 | \n",
147 | " 642.50 | \n",
148 | " 1584.69 | \n",
149 | " 1396.89 | \n",
150 | " 14.62 | \n",
151 | " ... | \n",
152 | " 522.42 | \n",
153 | " 2388.03 | \n",
154 | " 8152.85 | \n",
155 | " 8.4403 | \n",
156 | " 0.03 | \n",
157 | " 392 | \n",
158 | " 2388 | \n",
159 | " 100.0 | \n",
160 | " 38.99 | \n",
161 | " 23.4491 | \n",
162 | "
\n",
163 | " \n",
164 | " 2 | \n",
165 | " 1 | \n",
166 | " 3 | \n",
167 | " -0.0014 | \n",
168 | " -0.0002 | \n",
169 | " 100.0 | \n",
170 | " 518.67 | \n",
171 | " 642.18 | \n",
172 | " 1582.35 | \n",
173 | " 1405.61 | \n",
174 | " 14.62 | \n",
175 | " ... | \n",
176 | " 522.03 | \n",
177 | " 2388.00 | \n",
178 | " 8150.17 | \n",
179 | " 8.3901 | \n",
180 | " 0.03 | \n",
181 | " 391 | \n",
182 | " 2388 | \n",
183 | " 100.0 | \n",
184 | " 38.85 | \n",
185 | " 23.3669 | \n",
186 | "
\n",
187 | " \n",
188 | " 3 | \n",
189 | " 1 | \n",
190 | " 4 | \n",
191 | " -0.0020 | \n",
192 | " 0.0001 | \n",
193 | " 100.0 | \n",
194 | " 518.67 | \n",
195 | " 642.92 | \n",
196 | " 1585.61 | \n",
197 | " 1392.27 | \n",
198 | " 14.62 | \n",
199 | " ... | \n",
200 | " 522.49 | \n",
201 | " 2388.08 | \n",
202 | " 8146.56 | \n",
203 | " 8.3878 | \n",
204 | " 0.03 | \n",
205 | " 392 | \n",
206 | " 2388 | \n",
207 | " 100.0 | \n",
208 | " 38.96 | \n",
209 | " 23.2951 | \n",
210 | "
\n",
211 | " \n",
212 | " 4 | \n",
213 | " 1 | \n",
214 | " 5 | \n",
215 | " 0.0016 | \n",
216 | " 0.0000 | \n",
217 | " 100.0 | \n",
218 | " 518.67 | \n",
219 | " 641.68 | \n",
220 | " 1588.63 | \n",
221 | " 1397.65 | \n",
222 | " 14.62 | \n",
223 | " ... | \n",
224 | " 522.58 | \n",
225 | " 2388.03 | \n",
226 | " 8147.80 | \n",
227 | " 8.3869 | \n",
228 | " 0.03 | \n",
229 | " 392 | \n",
230 | " 2388 | \n",
231 | " 100.0 | \n",
232 | " 39.14 | \n",
233 | " 23.4583 | \n",
234 | "
\n",
235 | " \n",
236 | "
\n",
237 | "
5 rows × 26 columns
\n",
238 | "
"
239 | ],
240 | "text/plain": [
241 | " id cycle setting1 setting2 setting3 s1 s2 s3 s4 \\\n",
242 | "0 1 1 -0.0005 0.0004 100.0 518.67 642.36 1583.23 1396.84 \n",
243 | "1 1 2 0.0008 -0.0003 100.0 518.67 642.50 1584.69 1396.89 \n",
244 | "2 1 3 -0.0014 -0.0002 100.0 518.67 642.18 1582.35 1405.61 \n",
245 | "3 1 4 -0.0020 0.0001 100.0 518.67 642.92 1585.61 1392.27 \n",
246 | "4 1 5 0.0016 0.0000 100.0 518.67 641.68 1588.63 1397.65 \n",
247 | "\n",
248 | " s5 ... s12 s13 s14 s15 s16 s17 s18 s19 \\\n",
249 | "0 14.62 ... 522.31 2388.01 8145.32 8.4246 0.03 391 2388 100.0 \n",
250 | "1 14.62 ... 522.42 2388.03 8152.85 8.4403 0.03 392 2388 100.0 \n",
251 | "2 14.62 ... 522.03 2388.00 8150.17 8.3901 0.03 391 2388 100.0 \n",
252 | "3 14.62 ... 522.49 2388.08 8146.56 8.3878 0.03 392 2388 100.0 \n",
253 | "4 14.62 ... 522.58 2388.03 8147.80 8.3869 0.03 392 2388 100.0 \n",
254 | "\n",
255 | " s20 s21 \n",
256 | "0 39.11 23.3537 \n",
257 | "1 38.99 23.4491 \n",
258 | "2 38.85 23.3669 \n",
259 | "3 38.96 23.2951 \n",
260 | "4 39.14 23.4583 \n",
261 | "\n",
262 | "[5 rows x 26 columns]"
263 | ]
264 | },
265 | "execution_count": 5,
266 | "metadata": {},
267 | "output_type": "execute_result"
268 | }
269 | ],
270 | "source": [
271 | "train_df.head()"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 6,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "train_df.columns\n",
281 | "cols = ['setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
282 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
283 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": 7,
289 | "metadata": {},
290 | "outputs": [
291 | {
292 | "data": {
293 | "text/plain": [
294 | "'setting1'"
295 | ]
296 | },
297 | "execution_count": 7,
298 | "metadata": {},
299 | "output_type": "execute_result"
300 | }
301 | ],
302 | "source": [
303 | "train_df.columns[2]"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": 8,
309 | "metadata": {},
310 | "outputs": [
311 | {
312 | "name": "stdout",
313 | "output_type": "stream",
314 | "text": [
315 | "setting1\n"
316 | ]
317 | },
318 | {
319 | "name": "stderr",
320 | "output_type": "stream",
321 | "text": [
322 | "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/scipy/linalg/basic.py:1226: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n",
323 | " warnings.warn(mesg, RuntimeWarning)\n"
324 | ]
325 | },
326 | {
327 | "name": "stdout",
328 | "output_type": "stream",
329 | "text": [
330 | "setting2\n",
331 | "setting3\n",
332 | "s1\n",
333 | "s2\n",
334 | "s3\n",
335 | "s4\n",
336 | "s5\n",
337 | "s6\n",
338 | "s7\n",
339 | "s8\n",
340 | "s9\n",
341 | "s10\n",
342 | "s11\n",
343 | "s12\n",
344 | "s13\n",
345 | "s14\n",
346 | "s15\n",
347 | "s16\n",
348 | "s17\n",
349 | "s18\n",
350 | "s19\n",
351 | "s20\n",
352 | "s21\n"
353 | ]
354 | }
355 | ],
356 | "source": [
357 | "for cols in train_df.columns:\n",
358 | " if cols == 'id':\n",
359 | " continue;\n",
360 | " if cols == 'cycle':\n",
361 | " continue;\n",
362 | " else:\n",
363 | " print(cols)\n",
364 | " kf = KalmanFilter(transition_matrices = [1],\n",
365 | " observation_matrices = [1],\n",
366 | " initial_state_mean = train_df[cols].values[0],\n",
367 | " initial_state_covariance = 1,\n",
368 | " observation_covariance=1,\n",
369 | " transition_covariance=.01)\n",
370 | " state_means,_ = kf.filter(train_df[cols].values)\n",
371 | " train_df[cols] = state_means.flatten()"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": null,
377 | "metadata": {},
378 | "outputs": [],
379 | "source": [
380 | "# read test data\n",
381 | "test_df = pd.read_csv('PM_test_03.txt', sep=\" \", header=None)\n",
382 | "test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)\n",
383 | "test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',\n",
384 | " 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',\n",
385 | " 's15', 's16', 's17', 's18', 's19', 's20', 's21']"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": null,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "name": "stdout",
395 | "output_type": "stream",
396 | "text": [
397 | "setting1\n",
398 | "setting2\n",
399 | "setting3\n",
400 | "s1\n",
401 | "s2\n",
402 | "s3\n",
403 | "s4\n",
404 | "s5\n",
405 | "s6\n"
406 | ]
407 | }
408 | ],
409 | "source": [
410 | "for cols in test_df.columns:\n",
411 | " if cols == 'id':\n",
412 | " continue;\n",
413 | " if cols == 'cycle':\n",
414 | " continue;\n",
415 | " else:\n",
416 | " print(cols)\n",
417 | " kf = KalmanFilter(transition_matrices = [1],\n",
418 | " observation_matrices = [1],\n",
419 | " initial_state_mean = test_df[cols].values[0],\n",
420 | " initial_state_covariance = 1,\n",
421 | " observation_covariance=1,\n",
422 | " transition_covariance=.01)\n",
423 | " state_means,_ = kf.filter(test_df[cols].values)\n",
424 | " test_df[cols] = state_means.flatten()"
425 | ]
426 | },
427 | {
428 | "cell_type": "code",
429 | "execution_count": null,
430 | "metadata": {},
431 | "outputs": [],
432 | "source": [
433 | "# read ground truth data\n",
434 | "truth_df = pd.read_csv('PM_truth_03.txt', sep=\" \", header=None)\n",
435 | "truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)"
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": null,
441 | "metadata": {},
442 | "outputs": [],
443 | "source": [
444 | "train_df = train_df.sort_values(['id','cycle'])\n",
445 | "train_df.head()"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": null,
451 | "metadata": {},
452 | "outputs": [],
453 | "source": [
454 | "# Data Labeling - generate column RUL\n",
455 | "rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()\n",
456 | "rul.columns = ['id', 'max']\n",
457 | "train_df = train_df.merge(rul, on=['id'], how='left')\n",
458 | "train_df['RUL'] = train_df['max'] - train_df['cycle']\n",
459 | "train_df.drop('max', axis=1, inplace=True)\n",
460 | "train_df.head()"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": null,
466 | "metadata": {},
467 | "outputs": [],
468 | "source": [
469 | "# generate label columns for training data\n",
470 | "w1 = 30\n",
471 | "w0 = 15\n",
472 | "train_df['label1'] = np.where(train_df['RUL'] <= w1, 1, 0 )\n",
473 | "train_df['label2'] = train_df['label1']\n",
474 | "train_df.loc[train_df['RUL'] <= w0, 'label2'] = 2\n",
475 | "train_df.head()"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": null,
481 | "metadata": {},
482 | "outputs": [],
483 | "source": [
484 | "# MinMax normalization\n",
485 | "train_df['cycle_norm'] = train_df['cycle']\n",
486 | "cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])\n",
487 | "min_max_scaler = preprocessing.MinMaxScaler()\n",
488 | "norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), \n",
489 | " columns=cols_normalize, \n",
490 | " index=train_df.index)\n",
491 | "join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)\n",
492 | "train_df = join_df.reindex(columns = train_df.columns)\n",
493 | "train_df.head()"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": null,
499 | "metadata": {},
500 | "outputs": [],
501 | "source": [
502 | "test_df['cycle_norm'] = test_df['cycle']\n",
503 | "norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), \n",
504 | " columns=cols_normalize, \n",
505 | " index=test_df.index)\n",
506 | "test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)\n",
507 | "test_df = test_join_df.reindex(columns = test_df.columns)\n",
508 | "test_df = test_df.reset_index(drop=True)\n",
509 | "test_df.head()"
510 | ]
511 | },
512 | {
513 | "cell_type": "code",
514 | "execution_count": null,
515 | "metadata": {},
516 | "outputs": [],
517 | "source": [
518 | "# generate column max for test data\n",
519 | "rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()\n",
520 | "rul.columns = ['id', 'max']\n",
521 | "truth_df.columns = ['more']\n",
522 | "truth_df['id'] = truth_df.index + 1\n",
523 | "truth_df['max'] = rul['max'] + truth_df['more']\n",
524 | "truth_df.drop('more', axis=1, inplace=True)"
525 | ]
526 | },
527 | {
528 | "cell_type": "code",
529 | "execution_count": null,
530 | "metadata": {},
531 | "outputs": [],
532 | "source": [
533 | "# generate RUL for test data\n",
534 | "test_df = test_df.merge(truth_df, on=['id'], how='left')\n",
535 | "test_df['RUL'] = test_df['max'] - test_df['cycle']\n",
536 | "test_df.drop('max', axis=1, inplace=True)\n",
537 | "test_df.head()"
538 | ]
539 | },
540 | {
541 | "cell_type": "code",
542 | "execution_count": null,
543 | "metadata": {},
544 | "outputs": [],
545 | "source": [
546 | "# generate label columns w0 and w1 for test data\n",
547 | "test_df['label1'] = np.where(test_df['RUL'] <= w1, 1, 0 )\n",
548 | "test_df['label2'] = test_df['label1']\n",
549 | "test_df.loc[test_df['RUL'] <= w0, 'label2'] = 2\n",
550 | "test_df.head()"
551 | ]
552 | },
553 | {
554 | "cell_type": "code",
555 | "execution_count": null,
556 | "metadata": {},
557 | "outputs": [],
558 | "source": [
559 | "train_df.drop(['label1','label2'],axis=1,inplace=True)\n",
560 | "test_df.drop(['label1','label2'],axis=1,inplace=True)"
561 | ]
562 | },
563 | {
564 | "cell_type": "code",
565 | "execution_count": null,
566 | "metadata": {},
567 | "outputs": [],
568 | "source": [
569 | "# pick a large window size of 50 cycles\n",
570 | "sequence_length = 50"
571 | ]
572 | },
573 | {
574 | "cell_type": "code",
575 | "execution_count": null,
576 | "metadata": {},
577 | "outputs": [],
578 | "source": [
579 | "# preparing data for visualizations \n",
580 | "# window of 50 cycles prior to a failure point for engine id 3\n",
581 | "engine_id3 = test_df[test_df['id'] == 3]\n",
582 | "engine_id3_50cycleWindow = engine_id3[engine_id3['RUL'] <= engine_id3['RUL'].min() + 50]\n",
583 | "cols1 = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10']\n",
584 | "engine_id3_50cycleWindow1 = engine_id3_50cycleWindow[cols1]\n",
585 | "cols2 = ['s11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']\n",
586 | "engine_id3_50cycleWindow2 = engine_id3_50cycleWindow[cols2]"
587 | ]
588 | },
589 | {
590 | "cell_type": "code",
591 | "execution_count": null,
592 | "metadata": {},
593 | "outputs": [],
594 | "source": [
595 | "# plotting sensor data for engine ID 3 prior to a failure point - sensors 1-10 \n",
596 | "ax1 = engine_id3_50cycleWindow1.plot(subplots=True, sharex=True, figsize=(20,20))"
597 | ]
598 | },
599 | {
600 | "cell_type": "code",
601 | "execution_count": null,
602 | "metadata": {},
603 | "outputs": [],
604 | "source": [
605 | "# plotting sensor data for engine ID 3 prior to a failure point - sensors 11-21 \n",
606 | "ax2 = engine_id3_50cycleWindow2.plot(subplots=True, sharex=True, figsize=(20,20))"
607 | ]
608 | },
609 | {
610 | "cell_type": "code",
611 | "execution_count": null,
612 | "metadata": {},
613 | "outputs": [],
614 | "source": [
615 | "# function to reshape features into (samples, time steps, features) \n",
616 | "def gen_sequence(id_df, seq_length, seq_cols):\n",
617 | " \"\"\" Only sequences that meet the window-length are considered, no padding is used. This means for testing\n",
618 | " we need to drop those which are below the window-length. An alternative would be to pad sequences so that\n",
619 | " we can use shorter ones \"\"\"\n",
620 | " data_array = id_df[seq_cols].values\n",
621 | " num_elements = data_array.shape[0]\n",
622 | " for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):\n",
623 | " yield data_array[start:stop, :]"
624 | ]
625 | },
626 | {
627 | "cell_type": "code",
628 | "execution_count": null,
629 | "metadata": {},
630 | "outputs": [],
631 | "source": [
632 | "# pick the feature columns \n",
633 | "sensor_cols = ['s' + str(i) for i in range(1,22)]\n",
634 | "sequence_cols = ['setting1', 'setting2', 'setting3', 'cycle_norm']\n",
635 | "sequence_cols.extend(sensor_cols)"
636 | ]
637 | },
638 | {
639 | "cell_type": "code",
640 | "execution_count": null,
641 | "metadata": {},
642 | "outputs": [],
643 | "source": [
644 | "# generator for the sequences\n",
645 | "seq_gen = (list(gen_sequence(train_df[train_df['id']==id], sequence_length, sequence_cols)) \n",
646 | " for id in train_df['id'].unique())"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": null,
652 | "metadata": {},
653 | "outputs": [],
654 | "source": [
655 | "# generate sequences and convert to numpy array\n",
656 | "seq_array = np.concatenate(list(seq_gen)).astype(np.float32)\n",
657 | "seq_array.shape"
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": null,
663 | "metadata": {},
664 | "outputs": [],
665 | "source": [
666 | "# function to generate labels\n",
667 | "def gen_labels(id_df, seq_length, label):\n",
668 | " data_array = id_df[label].values\n",
669 | " num_elements = data_array.shape[0]\n",
670 | " return data_array[seq_length:num_elements, :]"
671 | ]
672 | },
673 | {
674 | "cell_type": "code",
675 | "execution_count": null,
676 | "metadata": {},
677 | "outputs": [],
678 | "source": [
679 | "# generate labels\n",
680 | "label_gen = [gen_labels(train_df[train_df['id']==id], sequence_length, ['RUL']) \n",
681 | " for id in train_df['id'].unique()]\n",
682 | "label_array = np.concatenate(label_gen).astype(np.float32)\n",
683 | "label_array.shape"
684 | ]
685 | },
686 | {
687 | "cell_type": "code",
688 | "execution_count": null,
689 | "metadata": {},
690 | "outputs": [],
691 | "source": [
692 | "from keras.layers import Bidirectional"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": null,
698 | "metadata": {},
699 | "outputs": [],
700 | "source": [
701 | "# build the network\n",
702 | "nb_features = seq_array.shape[2]\n",
703 | "nb_out = label_array.shape[1]\n",
704 | "\n",
705 | "model = Sequential()\n",
706 | "# model.add(Bidirectional(LSTM(\n",
707 | "# units=100,\n",
708 | "# return_sequences=True),\n",
709 | "# input_shape=(sequence_length, nb_features)))\n",
710 | "model.add(LSTM(\n",
711 | " units=100,\n",
712 | " return_sequences=True,\n",
713 | " input_shape=(sequence_length, nb_features)))\n",
714 | "model.add(Dropout(0.4))\n",
715 | "\n",
716 | "model.add(LSTM(\n",
717 | " units=100,\n",
718 | " return_sequences=False))\n",
719 | "model.add(Dropout(0.4))\n",
720 | "\n",
721 | "# model.add(Dense(units=100,activation='relu'))\n",
722 | "# model.add(Dropout(0.2))\n",
723 | "# model.add(Dense(units=100,activation='relu'))\n",
724 | "# model.add(Dropout(0.2))\n",
725 | "# model.add(Dense(units=100,activation='relu'))\n",
726 | "# model.add(Dropout(0.2))\n",
727 | "model.add(Dense(units=1, activation='relu'))\n",
728 | "model.add(Activation(\"relu\"))\n",
729 | "model.compile(loss=\"mse\", optimizer=\"rmsprop\", metrics=['mse'])"
730 | ]
731 | },
732 | {
733 | "cell_type": "code",
734 | "execution_count": null,
735 | "metadata": {},
736 | "outputs": [],
737 | "source": [
738 | "print(model.summary())"
739 | ]
740 | },
741 | {
742 | "cell_type": "code",
743 | "execution_count": null,
744 | "metadata": {},
745 | "outputs": [],
746 | "source": [
747 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
748 | "\n",
749 | "STAMP = 'predictive_regression_kalhman'\n",
750 | "print(STAMP)\n",
751 | "\n",
752 | "early_stopping =EarlyStopping(monitor='val_loss', patience=10)\n",
753 | "bst_model_path = STAMP + '.h5'\n",
754 | "model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)"
755 | ]
756 | },
757 | {
758 | "cell_type": "code",
759 | "execution_count": null,
760 | "metadata": {
761 | "scrolled": false
762 | },
763 | "outputs": [],
764 | "source": [
765 | "%%time\n",
766 | "# fit the network\n",
767 | "hist = model.fit(seq_array, label_array, epochs=1000, batch_size=200, validation_split=0.05, verbose=1,callbacks=[early_stopping, model_checkpoint])"
768 | ]
769 | },
770 | {
771 | "cell_type": "code",
772 | "execution_count": null,
773 | "metadata": {},
774 | "outputs": [],
775 | "source": [
776 | "import matplotlib.pyplot as plt\n",
777 | "\n",
778 | "#plots\n",
779 | "# list all data in history\n",
780 | "print(hist.history.keys())\n",
781 | "# summarize history for accuracy\n",
782 | "plt.plot(hist.history['mean_squared_error'])\n",
783 | "plt.plot(hist.history['val_mean_squared_error'])\n",
784 | "plt.title('mse')\n",
785 | "plt.ylabel('mean_squared_error')\n",
786 | "plt.xlabel('epoch')\n",
787 | "plt.legend(['train', 'test'], loc='upper left')\n",
788 | "plt.show()\n",
789 | "# summarize history for loss\n",
790 | "plt.plot(hist.history['loss'])\n",
791 | "plt.plot(hist.history['val_loss'])\n",
792 | "plt.title('model loss')\n",
793 | "plt.ylabel('loss')\n",
794 | "plt.xlabel('epoch')\n",
795 | "plt.legend(['train', 'test'], loc='upper left')\n",
796 | "plt.show()"
797 | ]
798 | },
799 | {
800 | "cell_type": "code",
801 | "execution_count": null,
802 | "metadata": {},
803 | "outputs": [],
804 | "source": [
805 | "model.load_weights(bst_model_path)"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": null,
811 | "metadata": {},
812 | "outputs": [],
813 | "source": [
814 | "# training metrics\n",
815 | "scores = model.evaluate(seq_array, label_array, verbose=1, batch_size=200)\n",
816 | "print('Score: {}'.format(scores[1]))"
817 | ]
818 | },
819 | {
820 | "cell_type": "code",
821 | "execution_count": null,
822 | "metadata": {},
823 | "outputs": [],
824 | "source": [
825 | "seq_array_test_last = [test_df[test_df['id']==id][sequence_cols].values[-sequence_length:] \n",
826 | " for id in test_df['id'].unique() if len(test_df[test_df['id']==id]) >= sequence_length]\n",
827 | "\n",
828 | "seq_array_test_last = np.asarray(seq_array_test_last).astype(np.float32)\n",
829 | "seq_array_test_last.shape"
830 | ]
831 | },
832 | {
833 | "cell_type": "code",
834 | "execution_count": null,
835 | "metadata": {},
836 | "outputs": [],
837 | "source": [
838 | "y_mask = [len(test_df[test_df['id']==id]) >= sequence_length for id in test_df['id'].unique()]"
839 | ]
840 | },
841 | {
842 | "cell_type": "code",
843 | "execution_count": null,
844 | "metadata": {},
845 | "outputs": [],
846 | "source": [
847 | "label_array_test_last = test_df.groupby('id')['RUL'].nth(-1)[y_mask].values\n",
848 | "label_array_test_last = label_array_test_last.reshape(label_array_test_last.shape[0],1).astype(np.float32)\n",
849 | "label_array_test_last.shape"
850 | ]
851 | },
852 | {
853 | "cell_type": "code",
854 | "execution_count": null,
855 | "metadata": {},
856 | "outputs": [],
857 | "source": [
858 | "print(seq_array_test_last.shape)\n",
859 | "print(label_array_test_last.shape)"
860 | ]
861 | },
862 | {
863 | "cell_type": "code",
864 | "execution_count": null,
865 | "metadata": {},
866 | "outputs": [],
867 | "source": [
868 | "# test metrics\n",
869 | "import math\n",
870 | "scores_test = model.evaluate(seq_array_test_last, label_array_test_last, verbose=2)\n",
871 | "print('Test Score: %.2f MSE (%.2f RMSE)' % (scores_test[0], math.sqrt(scores_test[0])))"
872 | ]
873 | },
874 | {
875 | "cell_type": "code",
876 | "execution_count": null,
877 | "metadata": {},
878 | "outputs": [],
879 | "source": [
880 | "pred = model.predict(seq_array_test_last)\n",
881 | "pred"
882 | ]
883 | },
884 | {
885 | "cell_type": "code",
886 | "execution_count": null,
887 | "metadata": {},
888 | "outputs": [],
889 | "source": [
890 | "diff = []\n",
891 | "ratio = []\n",
892 | "pred = model.predict(seq_array_test_last)\n",
893 | "for u in range(len(label_array_test_last)):\n",
894 | " pr = pred[u][0]\n",
895 | " ratio.append((label_array_test_last[u] / pr) - 1)\n",
896 | " diff.append(abs(label_array_test_last[u] - pr))"
897 | ]
898 | },
899 | {
900 | "cell_type": "code",
901 | "execution_count": null,
902 | "metadata": {},
903 | "outputs": [],
904 | "source": [
905 | "import matplotlib.pyplot as plt2\n",
906 | "\n",
907 | "plt2.plot(pred, color='red', label='Prediction')\n",
908 | "plt2.plot(label_array_test_last, color='blue', label='Ground Truth')\n",
909 | "plt2.legend(loc='upper left')\n",
910 | "plt2.show()"
911 | ]
912 | },
913 | {
914 | "cell_type": "code",
915 | "execution_count": null,
916 | "metadata": {},
917 | "outputs": [],
918 | "source": [
919 | "error = pd.DataFrame((label_array_test_last - pred),columns=['error'])\n",
920 | "error.describe()"
921 | ]
922 | },
923 | {
924 | "cell_type": "code",
925 | "execution_count": null,
926 | "metadata": {},
927 | "outputs": [],
928 | "source": [
929 | "pred_cutoff = pd.DataFrame(pred,columns=['rul'])\n",
930 | "pred_cutoff.loc[pred_cutoff['rul'] > 140,'rul'] = 140\n",
931 | "pred_cutoff.head()"
932 | ]
933 | },
934 | {
935 | "cell_type": "code",
936 | "execution_count": null,
937 | "metadata": {},
938 | "outputs": [],
939 | "source": [
940 | "from sklearn.metrics import mean_squared_error\n",
941 | "\n",
942 | "cutoffscore = mean_squared_error(label_array_test_last,pred_cutoff)\n",
943 | "print('Test Score: %.2f MSE (%.2f RMSE)' % (cutoffscore, math.sqrt(cutoffscore)))"
944 | ]
945 | },
946 | {
947 | "cell_type": "code",
948 | "execution_count": null,
949 | "metadata": {},
950 | "outputs": [],
951 | "source": []
952 | }
953 | ],
954 | "metadata": {
955 | "kernelspec": {
956 | "display_name": "Python 3",
957 | "language": "python",
958 | "name": "python3"
959 | },
960 | "language_info": {
961 | "codemirror_mode": {
962 | "name": "ipython",
963 | "version": 3
964 | },
965 | "file_extension": ".py",
966 | "mimetype": "text/x-python",
967 | "name": "python",
968 | "nbconvert_exporter": "python",
969 | "pygments_lexer": "ipython3",
970 | "version": "3.6.2"
971 | }
972 | },
973 | "nbformat": 4,
974 | "nbformat_minor": 2
975 | }
976 |
--------------------------------------------------------------------------------
/data4/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data4/.DS_Store
--------------------------------------------------------------------------------
/data4/PM_truth_04.txt:
--------------------------------------------------------------------------------
1 | 22
2 | 39
3 | 107
4 | 75
5 | 149
6 | 78
7 | 94
8 | 14
9 | 99
10 | 162
11 | 143
12 | 7
13 | 71
14 | 105
15 | 12
16 | 160
17 | 162
18 | 104
19 | 194
20 | 82
21 | 91
22 | 11
23 | 26
24 | 142
25 | 39
26 | 92
27 | 76
28 | 124
29 | 64
30 | 118
31 | 6
32 | 22
33 | 147
34 | 126
35 | 36
36 | 73
37 | 89
38 | 11
39 | 151
40 | 10
41 | 97
42 | 30
43 | 42
44 | 60
45 | 85
46 | 134
47 | 34
48 | 45
49 | 24
50 | 86
51 | 119
52 | 151
53 | 142
54 | 176
55 | 157
56 | 67
57 | 97
58 | 8
59 | 154
60 | 139
61 | 51
62 | 33
63 | 184
64 | 46
65 | 12
66 | 133
67 | 46
68 | 46
69 | 12
70 | 33
71 | 15
72 | 176
73 | 23
74 | 89
75 | 124
76 | 163
77 | 25
78 | 74
79 | 78
80 | 114
81 | 96
82 | 10
83 | 172
84 | 166
85 | 115
86 | 70
87 | 94
88 | 56
89 | 86
90 | 96
91 | 50
92 | 73
93 | 154
94 | 129
95 | 171
96 | 71
97 | 105
98 | 113
99 | 37
100 | 7
101 | 13
102 | 22
103 | 9
104 | 120
105 | 100
106 | 107
107 | 41
108 | 153
109 | 126
110 | 59
111 | 18
112 | 66
113 | 13
114 | 14
115 | 139
116 | 13
117 | 75
118 | 8
119 | 109
120 | 137
121 | 41
122 | 192
123 | 23
124 | 86
125 | 184
126 | 15
127 | 195
128 | 126
129 | 120
130 | 165
131 | 101
132 | 116
133 | 126
134 | 36
135 | 7
136 | 122
137 | 159
138 | 88
139 | 173
140 | 146
141 | 130
142 | 108
143 | 53
144 | 162
145 | 59
146 | 100
147 | 56
148 | 145
149 | 76
150 | 57
151 | 31
152 | 88
153 | 173
154 | 34
155 | 7
156 | 133
157 | 172
158 | 6
159 | 22
160 | 83
161 | 82
162 | 84
163 | 95
164 | 174
165 | 111
166 | 72
167 | 109
168 | 87
169 | 179
170 | 158
171 | 126
172 | 12
173 | 8
174 | 10
175 | 123
176 | 103
177 | 12
178 | 106
179 | 12
180 | 32
181 | 37
182 | 116
183 | 15
184 | 10
185 | 46
186 | 142
187 | 24
188 | 135
189 | 56
190 | 43
191 | 178
192 | 71
193 | 104
194 | 15
195 | 166
196 | 89
197 | 36
198 | 11
199 | 92
200 | 96
201 | 59
202 | 13
203 | 167
204 | 151
205 | 154
206 | 109
207 | 116
208 | 91
209 | 11
210 | 88
211 | 108
212 | 76
213 | 14
214 | 89
215 | 145
216 | 17
217 | 66
218 | 154
219 | 41
220 | 182
221 | 73
222 | 39
223 | 58
224 | 14
225 | 145
226 | 88
227 | 162
228 | 189
229 | 120
230 | 98
231 | 33
232 | 184
233 | 110
234 | 68
235 | 24
236 | 75
237 | 18
238 | 16
239 | 166
240 | 98
241 | 176
242 | 81
243 | 118
244 | 35
245 | 131
246 | 194
247 | 112
248 | 26
249 |
--------------------------------------------------------------------------------
/data4/predictive_binary_final.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data4/predictive_binary_final.h5
--------------------------------------------------------------------------------
/data4/predictive_regression_kalhman.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soham97/Remaining_useful_life_NASA/39cb40d2b13f4cf16829df67a809e4ce46beb3a7/data4/predictive_regression_kalhman.h5
--------------------------------------------------------------------------------