├── .gitignore
├── Dataset
    ├── PM_test.txt
    ├── PM_train.txt
    └── PM_truth.txt
├── Images
    ├── test_engine_sample.gif
    ├── train_engine_sample.gif
    └── val_engine_sample.gif
├── LICENSE
├── Models
    ├── baseline_gru_model_weights
    ├── baseline_model
    └── baseline_wtte_model_weights
├── README.md
├── _site
    ├── Dataset
    │   ├── PM_test.txt
    │   ├── PM_train.txt
    │   └── PM_truth.txt
    ├── Images
    │   ├── test_engine_sample.gif
    │   ├── train_engine_sample.gif
    │   └── val_engine_sample.gif
    ├── Models
    │   ├── baseline_gru_model_weights
    │   ├── baseline_model
    │   └── baseline_wtte_model_weights
    ├── Turbofan Engine
    │   └── regression_model.h5
    ├── assets
    │   └── images
    │   │   ├── header_free.jpg
    │   │   ├── rnn-time-to-event-notebook_16_1.png
    │   │   ├── rnn-time-to-event-notebook_19_1.png
    │   │   ├── rnn-time-to-event-notebook_26_1.png
    │   │   ├── rnn-time-to-event-notebook_33_1.png
    │   │   ├── rnn-time-to-event-notebook_35_0.png
    │   │   ├── rnn-time-to-event-notebook_37_1.png
    │   │   ├── rnn-time-to-event-notebook_46_1.png
    │   │   ├── rnn-time-to-event-notebook_49_1.png
    │   │   └── rnn-time-to-event-notebook_51_0.png
    ├── requirements.txt
    ├── rnn-time-to-event-notebook.ipynb
    ├── rnn-time-to-event-notebook.md
    ├── rnn-time-to-event-notebook
    │   └── index.html
    └── rnn-time-to-event-notebook_files
    │   ├── rnn-time-to-event-notebook_16_1.png
    │   ├── rnn-time-to-event-notebook_19_1.png
    │   ├── rnn-time-to-event-notebook_26_1.png
    │   ├── rnn-time-to-event-notebook_33_1.png
    │   ├── rnn-time-to-event-notebook_35_0.png
    │   ├── rnn-time-to-event-notebook_37_1.png
    │   ├── rnn-time-to-event-notebook_46_1.png
    │   ├── rnn-time-to-event-notebook_49_1.png
    │   └── rnn-time-to-event-notebook_51_0.png
├── requirements.txt
├── rnn-time-to-event-notebook.ipynb
├── rnn-time-to-event-notebook.md
├── rnn-time-to-event-notebook_files
    ├── rnn-time-to-event-notebook_16_1.png
    ├── rnn-time-to-event-notebook_19_1.png
    ├── rnn-time-to-event-notebook_26_1.png
    ├── rnn-time-to-event-notebook_33_1.png
    ├── rnn-time-to-event-notebook_35_0.png
    ├── rnn-time-to-event-notebook_37_1.png
    ├── rnn-time-to-event-notebook_46_1.png
    ├── rnn-time-to-event-notebook_49_1.png
    └── rnn-time-to-event-notebook_51_0.png
└── slides
    ├── RNN - Time To Event.odp
    ├── RNN - Time To Event.pdf
    └── RNN - Time To Event.pptx


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/Dataset/PM_truth.txt:
--------------------------------------------------------------------------------
  1 | 112 
  2 | 98 
  3 | 69 
  4 | 82 
  5 | 91 
  6 | 93 
  7 | 91 
  8 | 95 
  9 | 111 
 10 | 96 
 11 | 97 
 12 | 124 
 13 | 95 
 14 | 107 
 15 | 83 
 16 | 84 
 17 | 50 
 18 | 28 
 19 | 87 
 20 | 16 
 21 | 57 
 22 | 111 
 23 | 113 
 24 | 20 
 25 | 145 
 26 | 119 
 27 | 66 
 28 | 97 
 29 | 90 
 30 | 115 
 31 | 8 
 32 | 48 
 33 | 106 
 34 | 7 
 35 | 11 
 36 | 19 
 37 | 21 
 38 | 50 
 39 | 142 
 40 | 28 
 41 | 18 
 42 | 10 
 43 | 59 
 44 | 109 
 45 | 114 
 46 | 47 
 47 | 135 
 48 | 92 
 49 | 21 
 50 | 79 
 51 | 114 
 52 | 29 
 53 | 26 
 54 | 97 
 55 | 137 
 56 | 15 
 57 | 103 
 58 | 37 
 59 | 114 
 60 | 100 
 61 | 21 
 62 | 54 
 63 | 72 
 64 | 28 
 65 | 128 
 66 | 14 
 67 | 77 
 68 | 8 
 69 | 121 
 70 | 94 
 71 | 118 
 72 | 50 
 73 | 131 
 74 | 126 
 75 | 113 
 76 | 10 
 77 | 34 
 78 | 107 
 79 | 63 
 80 | 90 
 81 | 8 
 82 | 9 
 83 | 137 
 84 | 58 
 85 | 118 
 86 | 89 
 87 | 116 
 88 | 115 
 89 | 136 
 90 | 28 
 91 | 38 
 92 | 20 
 93 | 85 
 94 | 55 
 95 | 128 
 96 | 137 
 97 | 82 
 98 | 59 
 99 | 117 
100 | 20 
101 | 


--------------------------------------------------------------------------------
/Images/test_engine_sample.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/Images/test_engine_sample.gif


--------------------------------------------------------------------------------
/Images/train_engine_sample.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/Images/train_engine_sample.gif


--------------------------------------------------------------------------------
/Images/val_engine_sample.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/Images/val_engine_sample.gif


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Manel Maragall
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Models/baseline_gru_model_weights:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/Models/baseline_gru_model_weights


--------------------------------------------------------------------------------
/Models/baseline_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/Models/baseline_model


--------------------------------------------------------------------------------
/Models/baseline_wtte_model_weights:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/Models/baseline_wtte_model_weights


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
   1 | # rnn-time-to-event
   2 | An approximation of Recurrent Neural Networks to predict the Time to an Event
   3 | 
   4 | 
   5 | ![Alt Text](https://github.com/Manelmc/rnn-time-to-event/blob/master/Images/train_engine_sample.gif)
   6 | 
   7 | # Notebook
   8 | 
   9 | Predictive Maintenance for the Turbofan Engine Dataset
  10 | 
  11 | [View in Colaboratory](https://colab.research.google.com/github/Manelmc/rnn-time-to-event/blob/master/predictive-maintenance-turbofan-engine.ipynb)
  12 | 
  13 | ## Data Preparation
  14 | 
  15 | 
  16 | 
  17 | ```python
  18 | import keras
  19 | import keras.backend as K
  20 | 
  21 | print "Keras version", keras.__version__
  22 | 
  23 | import pandas as pd
  24 | import numpy as np
  25 | import matplotlib.pyplot as plt
  26 | 
  27 | 
  28 | # Setting seed for reproducibility
  29 | SEED = 42
  30 | np.random.seed(SEED)  
  31 | ```
  32 | 
  33 |     Using TensorFlow backend.
  34 | 
  35 | 
  36 |     Keras version 2.1.6
  37 | 
  38 | 
  39 | 
  40 | ```python
  41 | !mkdir Dataset
  42 | !mkdir Models
  43 | 
  44 | !wget -q https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/master/Dataset/PM_test.txt -O Dataset/PM_test.txt
  45 | !wget -q https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/master/Dataset/PM_train.txt -O Dataset/PM_train.txt  
  46 | !wget -q https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/master/Dataset/PM_truth.txt -O Dataset/PM_truth.txt
  47 | 
  48 | !ls Dataset
  49 | ```
  50 | 
  51 |     PM_test.txt  PM_train.txt  PM_truth.txt
  52 | 
  53 | 
  54 | ### Turbofan Train Set
  55 | 
  56 | 
  57 | ```python
  58 | from sklearn import preprocessing
  59 | 
  60 | # read training data - It is the aircraft engine run-to-failure data.
  61 | train_df = pd.read_csv('Dataset/PM_train.txt', sep=" ", header=None)
  62 | train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)
  63 | train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
  64 |                      's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
  65 |                      's15', 's16', 's17', 's18', 's19', 's20', 's21']
  66 | 
  67 | train_df = train_df.sort_values(['id','cycle'])
  68 | 
  69 | # Data Labeling - generate column RUL (Remaining Useful Life or Time to Failure)
  70 | rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()
  71 | rul.columns = ['id', 'max']
  72 | train_df = train_df.merge(rul, on=['id'], how='left')
  73 | train_df['RUL'] = train_df['max'] - train_df['cycle']
  74 | train_df.drop('max', axis=1, inplace=True)
  75 | 
  76 | # MinMax normalization (from 0 to 1)
  77 | train_df['cycle_norm'] = train_df['cycle']
  78 | cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])
  79 | min_max_scaler = preprocessing.MinMaxScaler()
  80 | norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]),
  81 |                              columns=cols_normalize,
  82 |                              index=train_df.index)
  83 | join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)
  84 | train_df = join_df.reindex(columns = train_df.columns)
  85 | 
  86 | train_df[train_df["id"] == 1].tail()
  87 | ```
  88 | 
  89 | 
  90 | 
  91 | 
  92 | <div>
  93 | <style scoped>
  94 |     .dataframe tbody tr th:only-of-type {
  95 |         vertical-align: middle;
  96 |     }
  97 | 
  98 |     .dataframe tbody tr th {
  99 |         vertical-align: top;
 100 |     }
 101 | 
 102 |     .dataframe thead th {
 103 |         text-align: right;
 104 |     }
 105 | </style>
 106 | <table border="1" class="dataframe">
 107 |   <thead>
 108 |     <tr style="text-align: right;">
 109 |       <th></th>
 110 |       <th>id</th>
 111 |       <th>cycle</th>
 112 |       <th>setting1</th>
 113 |       <th>setting2</th>
 114 |       <th>setting3</th>
 115 |       <th>s1</th>
 116 |       <th>s2</th>
 117 |       <th>s3</th>
 118 |       <th>s4</th>
 119 |       <th>s5</th>
 120 |       <th>...</th>
 121 |       <th>s14</th>
 122 |       <th>s15</th>
 123 |       <th>s16</th>
 124 |       <th>s17</th>
 125 |       <th>s18</th>
 126 |       <th>s19</th>
 127 |       <th>s20</th>
 128 |       <th>s21</th>
 129 |       <th>RUL</th>
 130 |       <th>cycle_norm</th>
 131 |     </tr>
 132 |   </thead>
 133 |   <tbody>
 134 |     <tr>
 135 |       <th>187</th>
 136 |       <td>1</td>
 137 |       <td>188</td>
 138 |       <td>0.114943</td>
 139 |       <td>0.750000</td>
 140 |       <td>0.0</td>
 141 |       <td>0.0</td>
 142 |       <td>0.765060</td>
 143 |       <td>0.683235</td>
 144 |       <td>0.684166</td>
 145 |       <td>0.0</td>
 146 |       <td>...</td>
 147 |       <td>0.091599</td>
 148 |       <td>0.753367</td>
 149 |       <td>0.0</td>
 150 |       <td>0.666667</td>
 151 |       <td>0.0</td>
 152 |       <td>0.0</td>
 153 |       <td>0.286822</td>
 154 |       <td>0.089202</td>
 155 |       <td>4</td>
 156 |       <td>0.518006</td>
 157 |     </tr>
 158 |     <tr>
 159 |       <th>188</th>
 160 |       <td>1</td>
 161 |       <td>189</td>
 162 |       <td>0.465517</td>
 163 |       <td>0.666667</td>
 164 |       <td>0.0</td>
 165 |       <td>0.0</td>
 166 |       <td>0.894578</td>
 167 |       <td>0.547853</td>
 168 |       <td>0.772451</td>
 169 |       <td>0.0</td>
 170 |       <td>...</td>
 171 |       <td>0.090670</td>
 172 |       <td>0.744132</td>
 173 |       <td>0.0</td>
 174 |       <td>0.583333</td>
 175 |       <td>0.0</td>
 176 |       <td>0.0</td>
 177 |       <td>0.263566</td>
 178 |       <td>0.301712</td>
 179 |       <td>3</td>
 180 |       <td>0.520776</td>
 181 |     </tr>
 182 |     <tr>
 183 |       <th>189</th>
 184 |       <td>1</td>
 185 |       <td>190</td>
 186 |       <td>0.344828</td>
 187 |       <td>0.583333</td>
 188 |       <td>0.0</td>
 189 |       <td>0.0</td>
 190 |       <td>0.731928</td>
 191 |       <td>0.614345</td>
 192 |       <td>0.737677</td>
 193 |       <td>0.0</td>
 194 |       <td>...</td>
 195 |       <td>0.065229</td>
 196 |       <td>0.759523</td>
 197 |       <td>0.0</td>
 198 |       <td>0.833333</td>
 199 |       <td>0.0</td>
 200 |       <td>0.0</td>
 201 |       <td>0.271318</td>
 202 |       <td>0.239299</td>
 203 |       <td>2</td>
 204 |       <td>0.523546</td>
 205 |     </tr>
 206 |     <tr>
 207 |       <th>190</th>
 208 |       <td>1</td>
 209 |       <td>191</td>
 210 |       <td>0.500000</td>
 211 |       <td>0.166667</td>
 212 |       <td>0.0</td>
 213 |       <td>0.0</td>
 214 |       <td>0.641566</td>
 215 |       <td>0.682799</td>
 216 |       <td>0.734639</td>
 217 |       <td>0.0</td>
 218 |       <td>...</td>
 219 |       <td>0.075704</td>
 220 |       <td>0.740669</td>
 221 |       <td>0.0</td>
 222 |       <td>0.500000</td>
 223 |       <td>0.0</td>
 224 |       <td>0.0</td>
 225 |       <td>0.240310</td>
 226 |       <td>0.324910</td>
 227 |       <td>1</td>
 228 |       <td>0.526316</td>
 229 |     </tr>
 230 |     <tr>
 231 |       <th>191</th>
 232 |       <td>1</td>
 233 |       <td>192</td>
 234 |       <td>0.551724</td>
 235 |       <td>0.500000</td>
 236 |       <td>0.0</td>
 237 |       <td>0.0</td>
 238 |       <td>0.701807</td>
 239 |       <td>0.662089</td>
 240 |       <td>0.758778</td>
 241 |       <td>0.0</td>
 242 |       <td>...</td>
 243 |       <td>0.056714</td>
 244 |       <td>0.717199</td>
 245 |       <td>0.0</td>
 246 |       <td>0.666667</td>
 247 |       <td>0.0</td>
 248 |       <td>0.0</td>
 249 |       <td>0.263566</td>
 250 |       <td>0.097625</td>
 251 |       <td>0</td>
 252 |       <td>0.529086</td>
 253 |     </tr>
 254 |   </tbody>
 255 | </table>
 256 | <p>5 rows × 28 columns</p>
 257 | </div>
 258 | 
 259 | 
 260 | 
 261 | ### Turbofan Test Set
 262 | 
 263 | 
 264 | ```python
 265 | from sklearn import preprocessing
 266 | 
 267 | # read test data - It is the aircraft engine operating data without failure events recorded.
 268 | test_df = pd.read_csv('Dataset/PM_test.txt', sep=" ", header=None)
 269 | test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)
 270 | test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
 271 |                      's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
 272 |                      's15', 's16', 's17', 's18', 's19', 's20', 's21']
 273 | 
 274 | # MinMax normalization (from 0 to 1)
 275 | test_df['cycle_norm'] = test_df['cycle']
 276 | norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]),
 277 |                             columns=cols_normalize,
 278 |                             index=test_df.index)
 279 | test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)
 280 | test_df = test_join_df.reindex(columns = test_df.columns)
 281 | test_df = test_df.reset_index(drop=True)
 282 | 
 283 | # read ground truth data - It contains the information of true remaining cycles for each engine in the testing data.
 284 | truth_df = pd.read_csv('Dataset/PM_truth.txt', sep=" ", header=None)
 285 | truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)
 286 | 
 287 | # generate column max for test data
 288 | rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()
 289 | rul.columns = ['id', 'max']
 290 | truth_df.columns = ['more']
 291 | truth_df['id'] = truth_df.index + 1
 292 | truth_df['max'] = rul['max'] + truth_df['more']
 293 | truth_df.drop('more', axis=1, inplace=True)
 294 | 
 295 | # generate RUL for test data
 296 | test_df = test_df.merge(truth_df, on=['id'], how='left')
 297 | test_df['RUL'] = test_df['max'] - test_df['cycle']
 298 | test_df.drop('max', axis=1, inplace=True)
 299 | 
 300 | test_df[test_df["id"] == 1].tail()
 301 | ```
 302 | 
 303 | 
 304 | 
 305 | 
 306 | <div>
 307 | <style scoped>
 308 |     .dataframe tbody tr th:only-of-type {
 309 |         vertical-align: middle;
 310 |     }
 311 | 
 312 |     .dataframe tbody tr th {
 313 |         vertical-align: top;
 314 |     }
 315 | 
 316 |     .dataframe thead th {
 317 |         text-align: right;
 318 |     }
 319 | </style>
 320 | <table border="1" class="dataframe">
 321 |   <thead>
 322 |     <tr style="text-align: right;">
 323 |       <th></th>
 324 |       <th>id</th>
 325 |       <th>cycle</th>
 326 |       <th>setting1</th>
 327 |       <th>setting2</th>
 328 |       <th>setting3</th>
 329 |       <th>s1</th>
 330 |       <th>s2</th>
 331 |       <th>s3</th>
 332 |       <th>s4</th>
 333 |       <th>s5</th>
 334 |       <th>...</th>
 335 |       <th>s14</th>
 336 |       <th>s15</th>
 337 |       <th>s16</th>
 338 |       <th>s17</th>
 339 |       <th>s18</th>
 340 |       <th>s19</th>
 341 |       <th>s20</th>
 342 |       <th>s21</th>
 343 |       <th>cycle_norm</th>
 344 |       <th>RUL</th>
 345 |     </tr>
 346 |   </thead>
 347 |   <tbody>
 348 |     <tr>
 349 |       <th>26</th>
 350 |       <td>1</td>
 351 |       <td>27</td>
 352 |       <td>0.459770</td>
 353 |       <td>0.583333</td>
 354 |       <td>0.0</td>
 355 |       <td>0.0</td>
 356 |       <td>0.262048</td>
 357 |       <td>0.340310</td>
 358 |       <td>0.304862</td>
 359 |       <td>0.0</td>
 360 |       <td>...</td>
 361 |       <td>0.140881</td>
 362 |       <td>0.479030</td>
 363 |       <td>0.0</td>
 364 |       <td>0.333333</td>
 365 |       <td>0.0</td>
 366 |       <td>0.0</td>
 367 |       <td>0.565891</td>
 368 |       <td>0.688898</td>
 369 |       <td>0.072022</td>
 370 |       <td>116</td>
 371 |     </tr>
 372 |     <tr>
 373 |       <th>27</th>
 374 |       <td>1</td>
 375 |       <td>28</td>
 376 |       <td>0.626437</td>
 377 |       <td>0.916667</td>
 378 |       <td>0.0</td>
 379 |       <td>0.0</td>
 380 |       <td>0.216867</td>
 381 |       <td>0.505995</td>
 382 |       <td>0.321404</td>
 383 |       <td>0.0</td>
 384 |       <td>...</td>
 385 |       <td>0.180359</td>
 386 |       <td>0.469796</td>
 387 |       <td>0.0</td>
 388 |       <td>0.333333</td>
 389 |       <td>0.0</td>
 390 |       <td>0.0</td>
 391 |       <td>0.534884</td>
 392 |       <td>0.629660</td>
 393 |       <td>0.074792</td>
 394 |       <td>115</td>
 395 |     </tr>
 396 |     <tr>
 397 |       <th>28</th>
 398 |       <td>1</td>
 399 |       <td>29</td>
 400 |       <td>0.580460</td>
 401 |       <td>0.583333</td>
 402 |       <td>0.0</td>
 403 |       <td>0.0</td>
 404 |       <td>0.222892</td>
 405 |       <td>0.351210</td>
 406 |       <td>0.267725</td>
 407 |       <td>0.0</td>
 408 |       <td>...</td>
 409 |       <td>0.171277</td>
 410 |       <td>0.370527</td>
 411 |       <td>0.0</td>
 412 |       <td>0.333333</td>
 413 |       <td>0.0</td>
 414 |       <td>0.0</td>
 415 |       <td>0.682171</td>
 416 |       <td>0.646092</td>
 417 |       <td>0.077562</td>
 418 |       <td>114</td>
 419 |     </tr>
 420 |     <tr>
 421 |       <th>29</th>
 422 |       <td>1</td>
 423 |       <td>30</td>
 424 |       <td>0.356322</td>
 425 |       <td>0.833333</td>
 426 |       <td>0.0</td>
 427 |       <td>0.0</td>
 428 |       <td>0.475904</td>
 429 |       <td>0.320035</td>
 430 |       <td>0.316003</td>
 431 |       <td>0.0</td>
 432 |       <td>...</td>
 433 |       <td>0.179843</td>
 434 |       <td>0.331281</td>
 435 |       <td>0.0</td>
 436 |       <td>0.250000</td>
 437 |       <td>0.0</td>
 438 |       <td>0.0</td>
 439 |       <td>0.736434</td>
 440 |       <td>0.707954</td>
 441 |       <td>0.080332</td>
 442 |       <td>113</td>
 443 |     </tr>
 444 |     <tr>
 445 |       <th>30</th>
 446 |       <td>1</td>
 447 |       <td>31</td>
 448 |       <td>0.465517</td>
 449 |       <td>0.833333</td>
 450 |       <td>0.0</td>
 451 |       <td>0.0</td>
 452 |       <td>0.412651</td>
 453 |       <td>0.221932</td>
 454 |       <td>0.281229</td>
 455 |       <td>0.0</td>
 456 |       <td>...</td>
 457 |       <td>0.155692</td>
 458 |       <td>0.298192</td>
 459 |       <td>0.0</td>
 460 |       <td>0.416667</td>
 461 |       <td>0.0</td>
 462 |       <td>0.0</td>
 463 |       <td>0.519380</td>
 464 |       <td>0.636564</td>
 465 |       <td>0.083102</td>
 466 |       <td>112</td>
 467 |     </tr>
 468 |   </tbody>
 469 | </table>
 470 | <p>5 rows × 28 columns</p>
 471 | </div>
 472 | 
 473 | 
 474 | 
 475 | ### Apply right padding to all the sequences
 476 | 
 477 | 
 478 | ```python
 479 | def pad_sequence(df, max_seq_length, mask=0):
 480 |     """
 481 |     Applies right padding to a sequences until max_seq_length with mask
 482 |     """
 483 |     return np.pad(df.values, ((0, max_seq_length - df.values.shape[0]), (0,0)),
 484 |                   "constant", constant_values=mask)
 485 | 
 486 | def pad_engines(df, cols, max_batch_len, mask=0):
 487 |     """
 488 |     Applies right padding to the columns "cols" of all the engines
 489 |     """
 490 |     return np.array([pad_sequence(df[df['id'] == batch_id][cols], max_batch_len, mask=mask)
 491 |                      for batch_id in df['id'].unique()])
 492 | 
 493 | max_batch_len = train_df['id'].value_counts().max()
 494 | train_cols = ['s' + str(i) for i in range(1,22)] + ['setting1', 'setting2', 'setting3', 'cycle_norm']
 495 | test_cols = ["RUL"]
 496 | 
 497 | X = pad_engines(train_df, train_cols, max_batch_len)
 498 | Y = pad_engines(train_df, test_cols, max_batch_len)
 499 | ```
 500 | 
 501 | ### Split into train, validation and test
 502 | 
 503 | 
 504 | ```python
 505 | from sklearn.model_selection import train_test_split
 506 | 
 507 | # Split into train and validation
 508 | train_X, val_X, train_Y, val_Y = train_test_split(X, Y, test_size=0.20, random_state=SEED)
 509 | 
 510 | # Test set from CMAPSS
 511 | test_X = pad_engines(test_df, train_cols, max_batch_len)
 512 | test_Y = pad_engines(test_df, test_cols, max_batch_len)
 513 | 
 514 | # In the WTTE-RNN architecture we will predict 2 parameters (alpha and beta)
 515 | # alpha is initialised to 1
 516 | train_Y_wtte = np.concatenate((train_Y, np.ones(train_Y.shape)), axis=2)
 517 | val_Y_wtte = np.concatenate((val_Y, np.ones(val_Y.shape)), axis=2)
 518 | test_Y_wtte = np.concatenate((test_Y, np.ones(test_Y.shape)), axis=2)
 519 | 
 520 | print "Train:\n", "  X:", train_X.shape, "\n  Y:", train_Y.shape, "\n  Y_wtte:", train_Y_wtte.shape
 521 | print "\nValidation:\n", "  X:", val_X.shape, "\n  Y:", val_Y.shape, "\n  Y_wtte:", val_Y_wtte.shape
 522 | print "\nTest:\n", "  X:", test_X.shape, "\n  Y:", test_Y.shape, "\n  Y_wtte:", test_Y_wtte.shape
 523 | ```
 524 | 
 525 |     Train:
 526 |       X: (80, 362, 25)
 527 |       Y: (80, 362, 1)
 528 |       Y_wtte: (80, 362, 2)
 529 | 
 530 |     Validation:
 531 |       X: (20, 362, 25)
 532 |       Y: (20, 362, 1)
 533 |       Y_wtte: (20, 362, 2)
 534 | 
 535 |     Test:
 536 |       X: (100, 362, 25)
 537 |       Y: (100, 362, 1)
 538 |       Y_wtte: (100, 362, 2)
 539 | 
 540 | 
 541 | ## Baseline
 542 | 
 543 | 
 544 | ```python
 545 | from keras.layers import Masking
 546 | from keras.layers.core import Activation
 547 | from keras.models import Sequential
 548 | from keras.layers import Dense, LSTM, TimeDistributed
 549 | from keras.callbacks import EarlyStopping, ModelCheckpoint
 550 | 
 551 | # Model path
 552 | baseline_path = "baseline_model"
 553 | 
 554 | # Callbacks
 555 | early_stopping = EarlyStopping(monitor='val_loss',
 556 |                                min_delta=0,
 557 |                                patience=30,
 558 |                                verbose=0,
 559 |                                mode='min')
 560 | checkpoint = ModelCheckpoint(baseline_path,
 561 |                              monitor='val_loss',
 562 |                              save_best_only=True,
 563 |                              mode='min',
 564 |                              verbose=0)
 565 | # dimensions of the model
 566 | nb_features = train_X.shape[2]
 567 | nb_out = train_Y.shape[2]
 568 | 
 569 | model = Sequential()
 570 | # Masking layer so the right padding is ignored
 571 | # at each layer of the network
 572 | model.add(Masking(mask_value=0.,
 573 |                   input_shape=(max_batch_len, nb_features)))
 574 | # Then there s an LSTM layer with 100 units
 575 | # Recurrent Dropout is also applied after each
 576 | # LSTM layer to control overfitting.
 577 | model.add(LSTM(
 578 |          units=100,
 579 |          recurrent_dropout=0.2,
 580 |          return_sequences=True))
 581 | # followed by another LSTM layer with 50 units
 582 | model.add(LSTM(
 583 |          units=50,
 584 |          recurrent_dropout=0.2,
 585 |          return_sequences=True))
 586 | # Final layer is a Time-Distributed Dense layer
 587 | # with a single unit with an Exponential activation
 588 | model.add(TimeDistributed(Dense(nb_out, activation=K.exp)))
 589 | model.compile(loss="mse", optimizer=keras.optimizers.RMSprop())
 590 | 
 591 | print(model.summary())
 592 | 
 593 | # fit the network
 594 | history = model.fit(train_X, train_Y, epochs=500, batch_size=16,
 595 |                     validation_data=(val_X, val_Y), shuffle=True,
 596 |                     verbose=2, callbacks = [early_stopping, checkpoint])
 597 | 
 598 | # list all data in history
 599 | print(history.history.keys())
 600 | ```
 601 | 
 602 |     _________________________________________________________________
 603 |     Layer (type)                 Output Shape              Param #   
 604 |     =================================================================
 605 |     masking_1 (Masking)          (None, 362, 25)           0         
 606 |     _________________________________________________________________
 607 |     lstm_1 (LSTM)                (None, 362, 100)          50400     
 608 |     _________________________________________________________________
 609 |     lstm_2 (LSTM)                (None, 362, 50)           30200     
 610 |     _________________________________________________________________
 611 |     time_distributed_1 (TimeDist (None, 362, 1)            51        
 612 |     =================================================================
 613 |     Total params: 80,651
 614 |     Trainable params: 80,651
 615 |     Non-trainable params: 0
 616 |     _________________________________________________________________
 617 |     None
 618 |     Train on 80 samples, validate on 20 samples
 619 |     ...
 620 |      - 15s - loss: 1204.1237 - val_loss: 621.4485
 621 |     Epoch 312/500
 622 |      - 15s - loss: 1293.4628 - val_loss: 611.2367
 623 |     Epoch 313/500
 624 |      - 15s - loss: 1410.6540 - val_loss: 599.2881
 625 |     Epoch 314/500
 626 |      - 15s - loss: 1280.4136 - val_loss: 651.2672
 627 |     Epoch 315/500
 628 |      - 15s - loss: 1233.0307 - val_loss: 634.8255
 629 |     Epoch 316/500
 630 |      - 15s - loss: 1339.8630 - val_loss: 702.0963
 631 |     Epoch 317/500
 632 |      - 14s - loss: 1249.2757 - val_loss: 789.5427
 633 |     Epoch 318/500
 634 |      - 15s - loss: 1364.1424 - val_loss: 834.3046
 635 |     ['loss', 'val_loss']
 636 | 
 637 | 
 638 | 
 639 | ```python
 640 | # Execute if training in Colaboratory (preferably from Chrome)
 641 | # Downloads the model after the training finishes
 642 | 
 643 | from google.colab import files
 644 | files.download(baseline_path)
 645 | 
 646 | # Move the model to the expected folder
 647 | !mv baseline_path Models/
 648 | ```
 649 | 
 650 | 
 651 | ```python
 652 | # Validation loss vs the Training loss
 653 | 
 654 | %matplotlib inline
 655 | 
 656 | plt.plot(history.history["loss"])
 657 | plt.plot(history.history["val_loss"])
 658 | ```
 659 | 
 660 | 
 661 | 
 662 | 
 663 |     [<matplotlib.lines.Line2D at 0x7f6039681c50>]
 664 | 
 665 | 
 666 | 
 667 | 
 668 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_16_1.png)
 669 | 
 670 | 
 671 | 
 672 | ```python
 673 | # Execute if you want to upload a model to Collaboratory
 674 | 
 675 | from google.colab import files
 676 | uploaded = files.upload()
 677 | 
 678 | for fn in uploaded.keys():
 679 |     print('User uploaded file "{name}" with length {length} bytes'.format(
 680 |       name=fn, length=len(uploaded[fn])))
 681 | ```
 682 | 
 683 | 
 684 | 
 685 |      <input type="file" id="files-f6e556f7-746f-4e94-b68a-9859a114544e" name="files[]" multiple disabled />
 686 |      <output id="result-f6e556f7-746f-4e94-b68a-9859a114544e">
 687 |       Upload widget is only available when the cell has been executed in the
 688 |       current browser session. Please rerun this cell to enable.
 689 |       </output>
 690 |       <script src="/nbextensions/google.colab/files.js"></script>
 691 | 
 692 | 
 693 | 
 694 | ```python
 695 | from keras.models import load_model
 696 | 
 697 | # It's important to load the model after the training
 698 | # The keras Checkpoint will save the best model in terms
 699 | # of the validation loss in the specified path
 700 | model = load_model("Models/" + baseline_path, custom_objects={"exp": K.exp})
 701 | ```
 702 | 
 703 | 
 704 | ```python
 705 | %matplotlib inline
 706 | from math import sqrt
 707 | 
 708 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 709 | 
 710 | # We save the validation errors to later compare the models
 711 | validation_baseline = model.predict(val_X).flatten()
 712 | 
 713 | def evaluate_and_plot(model, evaluation_data, weibull_function=None):
 714 |     """
 715 |     Generate scores dataframe and plot the RUL
 716 |     """
 717 |     fig = plt.figure()
 718 |     i = 1
 719 |     score_df = pd.DataFrame({"Method": ["MAE", "RMSE", "R2"]})
 720 |     for name_set, train_set, test_set in evaluation_data:
 721 |         if weibull_function is None:
 722 |             y_pred = model.predict(train_set).flatten()
 723 |         else:
 724 |             y_pred = [weibull_function(alpha, beta)
 725 |                       for batch in model.predict(train_set)
 726 |                       for beta, alpha in batch]
 727 |         l = test_set[:,:,0].flatten()
 728 |         # To validate we remove the right padding
 729 |         y_true = np.ma.compressed(np.ma.masked_where(l==0, l))
 730 |         y_pred = np.ma.compressed(np.ma.masked_where(l==0, y_pred))
 731 |         score_mae = "{0:.2f}".format(mean_absolute_error(y_true, y_pred))
 732 |         score_rmse = "{0:.2f}".format(sqrt(mean_squared_error(y_true, y_pred)))
 733 |         score_r2 = "{0:.3f}".format(r2_score(y_true, y_pred))
 734 |         score_df[name_set] = [score_mae, score_rmse, score_r2]
 735 |         ax = fig.add_subplot(6, 1, i)
 736 |         ax.title.set_text(name_set)
 737 |         ax.title.set_fontsize(20)
 738 |         i += 1
 739 |         plt.plot(y_pred[0:2500])
 740 |         plt.plot(y_true[0:2500])
 741 |         ax = fig.add_subplot(6, 1, i)
 742 |         i += 1
 743 |         plt.plot(y_pred[2500:5000])
 744 |         plt.plot(y_true[2500:5000])
 745 |     plt.subplots_adjust(hspace=0.45)
 746 |     fig.set_size_inches(15, i*2.2)
 747 |     return score_df.T
 748 | 
 749 | evaluate_and_plot(model,
 750 |                   [("Train", train_X, train_Y),
 751 |                    ("Validation", val_X, val_Y),
 752 |                    ("Test", test_X, test_Y)])
 753 | ```
 754 | 
 755 | 
 756 | 
 757 | 
 758 | <div>
 759 | <style scoped>
 760 |     .dataframe tbody tr th:only-of-type {
 761 |         vertical-align: middle;
 762 |     }
 763 | 
 764 |     .dataframe tbody tr th {
 765 |         vertical-align: top;
 766 |     }
 767 | 
 768 |     .dataframe thead th {
 769 |         text-align: right;
 770 |     }
 771 | </style>
 772 | <table border="1" class="dataframe">
 773 |   <thead>
 774 |     <tr style="text-align: right;">
 775 |       <th></th>
 776 |       <th>0</th>
 777 |       <th>1</th>
 778 |       <th>2</th>
 779 |     </tr>
 780 |   </thead>
 781 |   <tbody>
 782 |     <tr>
 783 |       <th>Method</th>
 784 |       <td>MAE</td>
 785 |       <td>RMSE</td>
 786 |       <td>R2</td>
 787 |     </tr>
 788 |     <tr>
 789 |       <th>Train</th>
 790 |       <td>21.19</td>
 791 |       <td>33.57</td>
 792 |       <td>0.766</td>
 793 |     </tr>
 794 |     <tr>
 795 |       <th>Validation</th>
 796 |       <td>17.36</td>
 797 |       <td>23.98</td>
 798 |       <td>0.866</td>
 799 |     </tr>
 800 |     <tr>
 801 |       <th>Test</th>
 802 |       <td>27.03</td>
 803 |       <td>37.41</td>
 804 |       <td>0.598</td>
 805 |     </tr>
 806 |   </tbody>
 807 | </table>
 808 | </div>
 809 | 
 810 | 
 811 | 
 812 | 
 813 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_19_1.png)
 814 | 
 815 | 
 816 | ## Adapting to WTTE-RNN
 817 | 
 818 | 
 819 | ```python
 820 | # Install wtte package from Martinsson
 821 | 
 822 | !pip install wtte
 823 | ```
 824 | 
 825 |     Collecting wtte
 826 |       Downloading https://files.pythonhosted.org/packages/95/0e/8affc53f47d4ceb69fc80484fd87ad886c6cab7f4ce0add38076b6092d76/wtte-1.1.1-py2.py3-none-any.whl
 827 |     Requirement already satisfied: scipy in /usr/local/lib/python2.7/dist-packages (from wtte) (0.19.1)
 828 |     Requirement already satisfied: numpy in /usr/local/lib/python2.7/dist-packages (from wtte) (1.14.5)
 829 |     Requirement already satisfied: keras>=2.0 in /usr/local/lib/python2.7/dist-packages (from wtte) (2.1.6)
 830 |     Requirement already satisfied: pandas in /usr/local/lib/python2.7/dist-packages (from wtte) (0.22.0)
 831 |     Collecting six==1.10.0 (from wtte)
 832 |       Downloading https://files.pythonhosted.org/packages/c8/0a/b6723e1bc4c516cb687841499455a8505b44607ab535be01091c0f24f079/six-1.10.0-py2.py3-none-any.whl
 833 |     Requirement already satisfied: pyyaml in /usr/local/lib/python2.7/dist-packages (from keras>=2.0->wtte) (3.13)
 834 |     Requirement already satisfied: h5py in /usr/local/lib/python2.7/dist-packages (from keras>=2.0->wtte) (2.8.0)
 835 |     Requirement already satisfied: pytz>=2011k in /usr/local/lib/python2.7/dist-packages (from pandas->wtte) (2018.5)
 836 |     Requirement already satisfied: python-dateutil in /usr/local/lib/python2.7/dist-packages (from pandas->wtte) (2.5.3)
 837 |     Installing collected packages: six, wtte
 838 |       Found existing installation: six 1.11.0
 839 |         Uninstalling six-1.11.0:
 840 |           Successfully uninstalled six-1.11.0
 841 |     Successfully installed six-1.10.0 wtte-1.1.1
 842 | 
 843 | 
 844 | 
 845 | ```python
 846 | # Loss and activation functions from Martinsson
 847 | # These are not used in the final version because
 848 | # the wtte package has useful regularization tools
 849 | 
 850 | def weibull_loglik_discrete(y_true, y_pred, epsilon=K.epsilon()):
 851 |     y = y_true[..., 0]
 852 |     u = y_true[..., 1]
 853 |     a = y_pred[..., 0]
 854 |     b = y_pred[..., 1]
 855 | 
 856 |     hazard0 = K.pow((y + epsilon) / a, b)
 857 |     hazard1 = K.pow((y + 1.0) / a, b)
 858 | 
 859 |     loss = u * K.log(K.exp(hazard1 - hazard0) - (1.0 - epsilon)) - hazard1
 860 |     return -loss
 861 | 
 862 | def activation_weibull(y_true):
 863 |     a = y_true[..., 0]
 864 |     b = y_true[..., 1]
 865 | 
 866 |     a = K.exp(a)
 867 |     b = K.sigmoid(b)
 868 |     return K.stack([a, b], axis=-1)
 869 | ```
 870 | 
 871 | 
 872 | ```python
 873 | from keras.layers import Masking
 874 | from keras.layers.core import Activation
 875 | from keras.models import Sequential
 876 | from keras.layers import Dense, LSTM, TimeDistributed, Lambda
 877 | from keras.callbacks import EarlyStopping, TerminateOnNaN, ModelCheckpoint
 878 | import wtte.weibull as weibull
 879 | import wtte.wtte as wtte
 880 | 
 881 | # Since we use a lambda in the last layer the model
 882 | # is not saved well in keras, instead we save the weights.
 883 | # This requires compiling the model to load the weights
 884 | baseline_wtte_path = "baseline_wtte_model_weights"
 885 | # Callbacks
 886 | early_stopping = EarlyStopping(monitor='val_loss',
 887 |                                min_delta=0,
 888 |                                patience=30,
 889 |                                verbose=0,
 890 |                                mode='min')
 891 | checkpoint = ModelCheckpoint(baseline_wtte_path,
 892 |                              monitor='val_loss',
 893 |                              save_best_only=True,
 894 |                              save_weights_only=True,
 895 |                              mode='min',
 896 |                              verbose=0)
 897 | 
 898 | nb_features = train_X.shape[2]
 899 | nb_out = train_Y.shape[1]
 900 | 
 901 | model = Sequential()
 902 | 
 903 | model.add(Masking(mask_value=0.,
 904 |                   input_shape=(max_batch_len, nb_features)))
 905 | model.add(LSTM(
 906 |          input_shape=(None, nb_features),
 907 |          units=100,
 908 |          recurrent_dropout=0.2,
 909 |          return_sequences=True))
 910 | model.add(LSTM(
 911 |           units=50,
 912 |           recurrent_dropout=0.2,
 913 |           return_sequences=True))
 914 | model.add(TimeDistributed(Dense(2)))
 915 | # uncomment this line and comment the next to use
 916 | # activation_weibull function:
 917 | # model.add(Activation(activation_weibull))
 918 | model.add(Lambda(wtte.output_lambda,
 919 |                  arguments={# Initialization value around it's scale
 920 |                             "init_alpha": np.nanmean(train_Y_wtte[:,0]),
 921 |                             # Set a maximum
 922 |                             "max_beta_value": 10.0
 923 |                            },
 924 |                 ))
 925 | # Same for the loss "weibull_loglik_discrete"
 926 | # model.compile(loss=weibull_loglik_discrete, optimizer='rmsprop')
 927 | # We use clipping on the loss
 928 | loss = wtte.Loss(kind='discrete', clip_prob=1e-5).loss_function
 929 | 
 930 | model.compile(loss=loss, optimizer='rmsprop')
 931 | ```
 932 | 
 933 | 
 934 | ```python
 935 | print(model.summary())
 936 | 
 937 | # fit the network
 938 | history = model.fit(train_X, train_Y_wtte, epochs=500, batch_size=16,
 939 |                     validation_data=(val_X, val_Y_wtte), shuffle=True, verbose=2,
 940 |                     callbacks = [early_stopping, checkpoint, TerminateOnNaN()])
 941 | 
 942 | # list all data in history
 943 | print(history.history.keys())
 944 | ```
 945 | 
 946 |     _________________________________________________________________
 947 |     Layer (type)                 Output Shape              Param #   
 948 |     =================================================================
 949 |     masking_4 (Masking)          (None, None, 25)          0         
 950 |     _________________________________________________________________
 951 |     lstm_7 (LSTM)                (None, None, 100)         50400     
 952 |     _________________________________________________________________
 953 |     lstm_8 (LSTM)                (None, None, 50)          30200     
 954 |     _________________________________________________________________
 955 |     time_distributed_4 (TimeDist (None, None, 2)           102       
 956 |     _________________________________________________________________
 957 |     lambda_2 (Lambda)            (None, None, 2)           0         
 958 |     =================================================================
 959 |     Total params: 80,702
 960 |     Trainable params: 80,702
 961 |     Non-trainable params: 0
 962 |     _________________________________________________________________
 963 |     None
 964 |     Train on 80 samples, validate on 20 samples
 965 |     ...
 966 |     Epoch 352/500
 967 |      - 12s - loss: 2.5586 - val_loss: 2.4429
 968 |     Epoch 353/500
 969 |      - 13s - loss: 2.5923 - val_loss: 2.5299
 970 |     Epoch 354/500
 971 |      - 12s - loss: 2.6591 - val_loss: 2.4070
 972 |     Epoch 355/500
 973 |      - 12s - loss: 2.5594 - val_loss: 2.5139
 974 |     Epoch 356/500
 975 |      - 13s - loss: 2.5870 - val_loss: 2.4082
 976 |     Epoch 357/500
 977 |      - 12s - loss: 2.6275 - val_loss: 2.4218
 978 |     ['loss', 'val_loss']
 979 | 
 980 | 
 981 | 
 982 | ```python
 983 | # Execute if training in Colaboratory (preferably from Chrome)
 984 | # Downloads the model after the training finishes
 985 | 
 986 | from google.colab import files
 987 | files.download(baseline_wtte_path)
 988 | 
 989 | # Move the model to the expected folder
 990 | !mv baseline_wtte_path Models/
 991 | ```
 992 | 
 993 | 
 994 | ```python
 995 | %matplotlib inline
 996 | 
 997 | 
 998 | plt.plot(history.history["loss"])
 999 | plt.plot(history.history["val_loss"])
1000 | ```
1001 | 
1002 | 
1003 | 
1004 | 
1005 |     [<matplotlib.lines.Line2D at 0x7f351865d990>]
1006 | 
1007 | 
1008 | 
1009 | 
1010 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_26_1.png)
1011 | 
1012 | 
1013 | 
1014 | ```python
1015 | # Execute if you want to upload a model to Collaboratory
1016 | 
1017 | from google.colab import files
1018 | uploaded = files.upload()
1019 | 
1020 | for fn in uploaded.keys():
1021 |     print('User uploaded file "{name}" with length {length} bytes'.format(
1022 |       name=fn, length=len(uploaded[fn])))
1023 | ```
1024 | 
1025 | 
1026 | 
1027 |      <input type="file" id="files-8f58d2a2-d3f6-43fa-93dc-a6fbf59eed70" name="files[]" multiple disabled />
1028 |      <output id="result-8f58d2a2-d3f6-43fa-93dc-a6fbf59eed70">
1029 |       Upload widget is only available when the cell has been executed in the
1030 |       current browser session. Please rerun this cell to enable.
1031 |       </output>
1032 |       <script src="/nbextensions/google.colab/files.js"></script>
1033 | 
1034 | 
1035 |     Saving baseline_wtte_model_weights (1) to baseline_wtte_model_weights (1)
1036 |     User uploaded file "baseline_wtte_model_weights (1)" with length 340528 bytes
1037 | 
1038 | 
1039 | 
1040 | ```python
1041 | # Compile model first to load weights
1042 | 
1043 | model.load_weights("Models/" + baseline_wtte_path)
1044 | ```
1045 | 
1046 | ### Weibull Methods
1047 | 
1048 | $\mu = \beta\Gamma(1 + \alpha^{-1})$
1049 | 
1050 | $\sigma^2 = \beta^2[\Gamma(1 + 2\alpha^{-1}) - \Gamma^2(1 + \alpha^{-1})]$
1051 | 
1052 | $mode = \beta\frac{\alpha-1}{\alpha}^{1/\alpha}$
1053 | 
1054 | Inverse CDF $ = \beta (-\log(1 - x))^\frac{1}{\alpha} $ when $ 0<x<1 $
1055 | 
1056 | 
1057 | ```python
1058 | from math import gamma, log, sqrt
1059 | 
1060 | def mean_weibull(alpha, beta):
1061 |     return beta*gamma(1 + 1./alpha)
1062 | 
1063 | def mode_weibull(alpha, beta):
1064 |     return beta*((alpha-1)/alpha)**(1./alpha) if alpha > 1 else 0
1065 | 
1066 | def median_weibull(alpha, beta):
1067 |     return beta*(log(2)**(1./alpha))
1068 | 
1069 | def var_weibull(alpha, beta):
1070 |     return beta**2*(gamma(1 + 2./alpha) - gamma(1 + 1./alpha)**2)
1071 | 
1072 | def pdf_weibull(x, alpha, beta):
1073 |     return (alpha/beta)*(x/beta)**(alpha - 1)*np.exp(-(x/beta)**alpha)
1074 | 
1075 | def inverse_cdf_weibull(x, alpha, beta):
1076 |     return beta*np.power((-np.log(1.-x)), 1./alpha)
1077 | 
1078 | def survival_weibull(x, alpha, beta):
1079 |     return np.e**-((x/beta)**alpha)
1080 | ```
1081 | 
1082 | ### Mean, Mode and Median
1083 | 
1084 | 
1085 | 
1086 | ![](https://upload.wikimedia.org/wikipedia/commons/thumb/3/33/Visualisation_mode_median_mean.svg/150px-Visualisation_mode_median_mean.svg.png)
1087 | 
1088 | 
1089 | ```python
1090 | %matplotlib inline
1091 | 
1092 | print "Mode"
1093 | print evaluate_and_plot(model,
1094 |                         [("Train", train_X, train_Y_wtte),
1095 |                          ("Validation", val_X, val_Y_wtte),
1096 |                          ("Test", test_X, test_Y_wtte)],
1097 |                         weibull_function = mode_weibull)
1098 | 
1099 | # comment the next line to visualise the plot for the mode
1100 | plt.close()
1101 | 
1102 | print "\nMedian"
1103 | print evaluate_and_plot(model,
1104 |                         [("Train", train_X, train_Y_wtte),
1105 |                          ("Validation", val_X, val_Y_wtte),
1106 |                          ("Test", test_X, test_Y_wtte)],
1107 |                         weibull_function = median_weibull)
1108 | 
1109 | # comment the next line to visualise the plot for the median
1110 | plt.close()
1111 | 
1112 | # We save the validation errors to later compare the models
1113 | validation_wtte = [mean_weibull(alpha, beta)
1114 |                    for batch in model.predict(val_X)
1115 |                    for beta, alpha in batch]
1116 | 
1117 | print "\nMean"
1118 | print evaluate_and_plot(model,
1119 |                         [("Train", train_X, train_Y_wtte),
1120 |                          ("Validation", val_X, val_Y_wtte),
1121 |                          ("Test", test_X, test_Y_wtte)],
1122 |                         weibull_function = mean_weibull)
1123 | ```
1124 | 
1125 |     Mode
1126 |                     0      1      2
1127 |     Method        MAE   RMSE     R2
1128 |     Train       21.53  34.69  0.750
1129 |     Validation  17.94  26.48  0.836
1130 |     Test        27.46  38.59  0.572
1131 | 
1132 |     Median
1133 |                     0      1      2
1134 |     Method        MAE   RMSE     R2
1135 |     Train       21.05  33.51  0.767
1136 |     Validation  17.79  25.48  0.848
1137 |     Test        26.72  37.49  0.596
1138 | 
1139 |     Mean
1140 |                     0      1      2
1141 |     Method        MAE   RMSE     R2
1142 |     Train       20.94  33.14  0.772
1143 |     Validation  17.79  25.26  0.851
1144 |     Test        26.51  37.22  0.602
1145 | 
1146 | 
1147 | 
1148 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_33_1.png)
1149 | 
1150 | 
1151 | ### Evolution of the pdf through the cycles of an engine (PLOT)
1152 | 
1153 | 
1154 | ```python
1155 | import random
1156 | 
1157 | import seaborn as sns
1158 | 
1159 | 
1160 | random.seed(SEED)
1161 | lot = random.sample(train_X, 3)
1162 | random.seed(SEED)
1163 | lot += random.sample(val_X, 3)
1164 | random.seed(SEED)
1165 | lot += random.sample(test_X, 3)
1166 | 
1167 | palette = list(reversed(sns.color_palette("RdBu_r", 250)))
1168 | 
1169 | fig = plt.figure()
1170 | j = 1
1171 | for batch in lot:
1172 |     size = batch[~np.all(batch == 0, axis=1)].shape[0]
1173 |     y_pred_wtte = model.predict(batch.reshape(1, max_batch_len, nb_features))[0]
1174 |     y_pred_wtte = y_pred_wtte[:size]
1175 |     x = np.arange(1, 400.)
1176 | 
1177 |     freq = 5
1178 |     ax = fig.add_subplot(3, 3, j)
1179 | 
1180 |     i=0
1181 |     for beta, alpha in y_pred_wtte[0::freq][2:]:
1182 |         mean = mode_weibull(alpha, beta)
1183 |         color=palette[int(mean)] if i < len(palette) else palette[-1]
1184 |         plt.plot(x, pdf_weibull(x, alpha, beta), color=color)
1185 |         i += 1
1186 |     ax.set_ylim([0, 0.07])
1187 |     ax.set_xlim([0, 300])
1188 |     ax.set_yticklabels([])
1189 |     if j == 2:
1190 |         ax.title.set_text("Train")
1191 |     elif j == 5:
1192 |         ax.title.set_text("Validation")
1193 |     elif j == 8:
1194 |         ax.title.set_text("Test")
1195 |     j += 1
1196 | 
1197 | plt.subplots_adjust(wspace=0.15, hspace=0.25)
1198 | fig.set_size_inches(10,10)
1199 | ```
1200 | 
1201 | 
1202 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_35_0.png)
1203 | 
1204 | 
1205 | ### Confidence Interval of the Weibull Distribution
1206 | 
1207 | 
1208 | ```python
1209 | %matplotlib inline
1210 | 
1211 | from scipy.stats import dweibull
1212 | 
1213 | batch = lot[0]
1214 | size = batch[~np.all(batch == 0, axis=1)].shape[0]
1215 | y_pred_wtte = model.predict(batch.reshape(1, max_batch_len, nb_features))[0]
1216 | y_pred_wtte = y_pred_wtte[:size]
1217 | 
1218 | fig = plt.figure()
1219 | fig.add_subplot(1,1,1)
1220 | for beta, alpha in y_pred_wtte[0::20]:
1221 |     x = np.arange(1, 300.)
1222 |     mean = mean_weibull(alpha, beta)
1223 |     sigma = np.sqrt(var_weibull(alpha, beta))
1224 |     plt.plot(x, pdf_weibull(x, alpha, beta), color=palette[int(mean)])
1225 |     # alpha is the shape parameter
1226 |     conf = dweibull.interval(0.95, alpha, loc=mean, scale=sigma)
1227 |     plt.fill([conf[0]] + list(np.arange(conf[0], conf[1])) + [conf[1]],
1228 |              [0] + list(pdf_weibull(np.arange(conf[0], conf[1]), alpha, beta)) + [0],
1229 |              color=palette[int(mean)], alpha=0.5)
1230 | 
1231 | axes = plt.gca()
1232 | axes.set_ylim([0., 0.06])
1233 | axes.set_xlim([0., 300.])
1234 | fig.set_size_inches(10,5)
1235 | ```
1236 | 
1237 |     /anaconda2/envs/ALL_BF/lib/python2.7/site-packages/ipykernel_launcher.py:16: RuntimeWarning: invalid value encountered in power
1238 |       app.launch_new_instance()
1239 | 
1240 | 
1241 | 
1242 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_37_1.png)
1243 | 
1244 | 
1245 | ### Evolution of the pdf through the cycles of an engine (GIFs)
1246 | 
1247 | 
1248 | ```python
1249 | import sys
1250 | import random
1251 | from math import gamma
1252 | 
1253 | from matplotlib.animation import FuncAnimation
1254 | from scipy.stats import dweibull
1255 | 
1256 | 
1257 | def generate_gif(y_pred, y_true, path, freq=2):
1258 |     # remove mask if exists
1259 |     y_true = y_true[y_true != 0]
1260 |     y_pred = y_pred[:y_true.shape[0]]
1261 | 
1262 |     frames = zip(y_true, y_pred)
1263 | 
1264 |     # pad, w_pad, h_pad, and rect
1265 |     fig = plt.figure()
1266 |     global ax1, ax2
1267 |     ax1 = fig.add_subplot(1,2,1)
1268 |     ax2 = fig.add_subplot(1,2,2)
1269 |     fig.set_tight_layout(True)
1270 |     x = np.arange(1, 300.)
1271 |     beta, alpha = y_pred[0]
1272 |     line1, = ax1.plot(x, pdf_weibull(x, alpha, beta))
1273 |     global i, acc_y_true, acc_y_pred
1274 |     i = 0
1275 |     predict_mean = mean_weibull(alpha, beta)
1276 |     ax2.plot(i, y_true[0], 'bo', label="True", ms=2.5)
1277 |     ax2.plot(i, predict_mean, 'o', color="orange", label="Predicted", ms=2.5)
1278 |     ax2.legend(loc="upper right")
1279 |     # limits
1280 |     ax1.set_ylim([0, 0.07])
1281 |     ax2.set_ylim([0, y_true[0] + 10])
1282 |     ax2.set_xlim([0, len(frames)/freq + 2])
1283 |     ax2.set_xticklabels([])
1284 |     # acc values
1285 |     acc_y_true = []
1286 |     acc_y_pred = []
1287 | 
1288 |     def update(instant):
1289 |         y_true_t, y_pred_t = instant
1290 |         beta, alpha = y_pred_t
1291 |         # print y_true
1292 |         pdf = pdf_weibull(x, alpha, beta)
1293 |         line1.set_ydata(pdf)
1294 |         global i, acc_y_true, acc_y_pred
1295 |         i += 1
1296 |         mean = mean_weibull(alpha, beta)
1297 |         sigma = np.sqrt(var_weibull(alpha, beta))
1298 |         acc_y_pred += [mean]
1299 |         acc_y_true += [y_true_t]
1300 |         ax2.plot(range(len(acc_y_true)), acc_y_true, 'b', label="True")
1301 |         ax2.plot(range(len(acc_y_pred)), acc_y_pred, color="orange", label="Predicted")
1302 |         conf = dweibull.interval(0.95, alpha, loc=mean, scale=sigma)
1303 |         ax1.set_title("PDF Weibull Distrib. (Mean: " + "{0:.1f}".format(mean)
1304 |                      + ", Std: " + "{0:.1f}".format(sigma) + ")"
1305 |                      + " CI 95%: [{0:.1f}, {1:.1f}]".format(*conf))
1306 |         ax2.set_title("Real RUL: " + str(y_true_t) + " cycles")
1307 | 
1308 |     fig.set_size_inches(15,4)
1309 |     anim = FuncAnimation(fig, update, frames=frames[0::freq])
1310 |     anim.save(path, writer="imagemagick")
1311 |     plt.close()
1312 | 
1313 | random.seed(SEED)
1314 | batch_X, batch_Y = random.choice(zip(train_X, train_Y))
1315 | y_pred_wtte = model.predict(batch_X.reshape(1, max_batch_len, nb_features))[0]
1316 | gif_path = "Images/train_engine_sample.gif"
1317 | generate_gif(y_pred_wtte, batch_Y, gif_path, freq=2)
1318 | 
1319 | print "Train Sample"
1320 | from IPython.display import HTML
1321 | HTML('<img src="'+ gif_path + '">')
1322 | ```
1323 | 
1324 |     Train Sample
1325 | 
1326 | 
1327 | 
1328 | 
1329 | 
1330 | <img src="Images/train_engine_sample.gif">
1331 | 
1332 | 
1333 | 
1334 | 
1335 | ```python
1336 | random.seed(SEED)
1337 | batch_X, batch_Y = random.choice(zip(val_X, val_Y))
1338 | y_pred_wtte = model.predict(batch_X.reshape(1, max_batch_len, nb_features))[0]
1339 | gif_path = "Images/val_engine_sample.gif"
1340 | generate_gif(y_pred_wtte, batch_Y, gif_path, freq=2)
1341 | 
1342 | print "Validation Sample"
1343 | from IPython.display import HTML
1344 | HTML('<img src="'+ gif_path + '">')
1345 | ```
1346 | 
1347 |     Validation Sample
1348 | 
1349 | 
1350 | 
1351 | 
1352 | 
1353 | <img src="Images/val_engine_sample.gif">
1354 | 
1355 | 
1356 | 
1357 | 
1358 | ```python
1359 | random.seed(SEED)
1360 | batch_X, batch_Y = random.choice(zip(test_X, test_Y))
1361 | y_pred_wtte = model.predict(batch_X.reshape(1, max_batch_len, nb_features))[0]
1362 | gif_path = "Images/test_engine_sample.gif"
1363 | generate_gif(y_pred_wtte, batch_Y, gif_path, freq=2)
1364 | 
1365 | print "Test Sample"
1366 | from IPython.display import HTML
1367 | HTML('<img src="'+ gif_path + '">')
1368 | ```
1369 | 
1370 |     Test Sample
1371 | 
1372 | 
1373 | 
1374 | 
1375 | 
1376 | <img src="Images/test_engine_sample.gif">
1377 | 
1378 | 
1379 | 
1380 | ## GRU variant
1381 | 
1382 | 
1383 | ```python
1384 | from keras.layers import Masking
1385 | from keras.layers.core import Activation
1386 | from keras.models import Sequential
1387 | from keras.layers import Dense, GRU, TimeDistributed, Lambda
1388 | from keras.callbacks import EarlyStopping, TerminateOnNaN, ModelCheckpoint
1389 | import wtte.weibull as weibull
1390 | import wtte.wtte as wtte
1391 | 
1392 | baseline_gru_path = "baseline_gru_model_weights"
1393 | 
1394 | # Callbacks
1395 | early_stopping = EarlyStopping(monitor='val_loss',
1396 |                                min_delta=0,
1397 |                                patience=30,
1398 |                                verbose=0,
1399 |                                mode='min')
1400 | checkpoint = ModelCheckpoint(baseline_gru_path,
1401 |                              monitor='val_loss',
1402 |                              save_best_only=True,
1403 |                              save_weights_only=True,
1404 |                              mode='min',
1405 |                              verbose=0)
1406 | 
1407 | nb_features = train_X.shape[2]
1408 | nb_out = train_Y.shape[1]
1409 | 
1410 | init_alpha = np.nanmean(train_Y_wtte[:,0])
1411 | 
1412 | model = Sequential()
1413 | model.add(Masking(mask_value=0.,
1414 |                   input_shape=(max_batch_len, nb_features)))
1415 | # We substitute LSTM for GRU
1416 | model.add(GRU(
1417 |          input_shape=(None, nb_features),
1418 |          units=100,
1419 |          recurrent_dropout=0.2,
1420 |          return_sequences=True))
1421 | model.add(GRU(
1422 |           units=50,
1423 |           recurrent_dropout=0.2,
1424 |           return_sequences=True))
1425 | model.add(TimeDistributed(Dense(2)))
1426 | model.add(Lambda(wtte.output_lambda,
1427 |                  arguments={# Initialization value around it's scale
1428 |                             "init_alpha": np.nanmean(train_Y_wtte[:,0]),
1429 |                             # Set a maximum
1430 |                             "max_beta_value": 10.0,
1431 |                             # We set the scalefactor to avoid exploding gradients
1432 |                             "scalefactor": 0.25
1433 |                            },
1434 |                 ))
1435 | loss = wtte.Loss(kind='discrete', clip_prob=1e-5).loss_function
1436 | model.compile(loss=loss, optimizer='rmsprop')
1437 | ```
1438 | 
1439 | 
1440 | ```python
1441 | print(model.summary())
1442 | 
1443 | # fit the network
1444 | history = model.fit(train_X, train_Y_wtte, epochs=500, batch_size=16,
1445 |                     validation_data=(val_X, val_Y_wtte), shuffle=True, verbose=2,
1446 |                     callbacks = [early_stopping, checkpoint, TerminateOnNaN()])
1447 | 
1448 | # list all data in history
1449 | print(history.history.keys())
1450 | ```
1451 | 
1452 |     _________________________________________________________________
1453 |     Layer (type)                 Output Shape              Param #   
1454 |     =================================================================
1455 |     masking_6 (Masking)          (None, None, 25)          0         
1456 |     _________________________________________________________________
1457 |     gru_6 (GRU)                  (None, None, 100)         37800     
1458 |     _________________________________________________________________
1459 |     gru_7 (GRU)                  (None, None, 50)          22650     
1460 |     _________________________________________________________________
1461 |     time_distributed_5 (TimeDist (None, None, 2)           102       
1462 |     _________________________________________________________________
1463 |     lambda_5 (Lambda)            (None, None, 2)           0         
1464 |     =================================================================
1465 |     Total params: 60,552
1466 |     Trainable params: 60,552
1467 |     Non-trainable params: 0
1468 |     _________________________________________________________________
1469 |     None
1470 |     Train on 80 samples, validate on 20 samples
1471 |     ...
1472 |     Epoch 379/500
1473 |      - 4s - loss: 2.5791 - val_loss: 2.4811
1474 |     Epoch 380/500
1475 |      - 4s - loss: 2.4674 - val_loss: 2.3694
1476 |     Epoch 381/500
1477 |      - 4s - loss: 2.4272 - val_loss: 2.3636
1478 |     Epoch 382/500
1479 |      - 4s - loss: 2.4483 - val_loss: 2.4244
1480 |     Epoch 383/500
1481 |      - 4s - loss: 2.4518 - val_loss: 2.4219
1482 |     Epoch 384/500
1483 |      - 4s - loss: 2.4448 - val_loss: 2.3649
1484 |     Epoch 385/500
1485 |      - 4s - loss: 2.5142 - val_loss: 2.3681
1486 |     Epoch 386/500
1487 |      - 4s - loss: 2.4157 - val_loss: 2.4423
1488 |     ['loss', 'val_loss']
1489 | 
1490 | 
1491 | 
1492 | ```python
1493 | # Execute if training in Colaboratory (preferably from Chrome)
1494 | # Downloads the model after the training finishes
1495 | 
1496 | from google.colab import files
1497 | files.download(baseline_gru_path)
1498 | 
1499 | # Move the model to the expected folder
1500 | !mv baseline_gru_path Models/
1501 | ```
1502 | 
1503 | 
1504 | ```python
1505 | %matplotlib inline
1506 | 
1507 | plt.plot(history.history["loss"], color="blue")
1508 | plt.plot(history.history["val_loss"], color="green")
1509 | ```
1510 | 
1511 | 
1512 | 
1513 | 
1514 |     [<matplotlib.lines.Line2D at 0x1a353fcf10>]
1515 | 
1516 | 
1517 | 
1518 | 
1519 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_46_1.png)
1520 | 
1521 | 
1522 | 
1523 | ```python
1524 | # Execute if you want to upload a model to Collaboratory
1525 | 
1526 | from google.colab import files
1527 | uploaded = files.upload()
1528 | 
1529 | for fn in uploaded.keys():
1530 |     print('User uploaded file "{name}" with length {length} bytes'.format(
1531 |       name=fn, length=len(uploaded[fn])))
1532 | ```
1533 | 
1534 | 
1535 | ```python
1536 | # Compile model first to load weights
1537 | 
1538 | model.load_weights("Models/" + baseline_gru_path)
1539 | ```
1540 | 
1541 | 
1542 | ```python
1543 | # We save the validation errors to later compare the models
1544 | validation_gru = [mean_weibull(alpha, beta)
1545 |                    for batch in model.predict(val_X)
1546 |                    for beta, alpha in batch]
1547 | 
1548 | evaluate_and_plot(model,
1549 |                   [("Train", train_X, train_Y_wtte),
1550 |                    ("Validation", val_X, val_Y_wtte),
1551 |                    ("Test", test_X, test_Y_wtte)],
1552 |                   weibull_function = mean_weibull)
1553 | ```
1554 | 
1555 | 
1556 | 
1557 | 
1558 | <div>
1559 | <style scoped>
1560 |     .dataframe tbody tr th:only-of-type {
1561 |         vertical-align: middle;
1562 |     }
1563 | 
1564 |     .dataframe tbody tr th {
1565 |         vertical-align: top;
1566 |     }
1567 | 
1568 |     .dataframe thead th {
1569 |         text-align: right;
1570 |     }
1571 | </style>
1572 | <table border="1" class="dataframe">
1573 |   <thead>
1574 |     <tr style="text-align: right;">
1575 |       <th></th>
1576 |       <th>0</th>
1577 |       <th>1</th>
1578 |       <th>2</th>
1579 |     </tr>
1580 |   </thead>
1581 |   <tbody>
1582 |     <tr>
1583 |       <th>Method</th>
1584 |       <td>MAE</td>
1585 |       <td>RMSE</td>
1586 |       <td>R2</td>
1587 |     </tr>
1588 |     <tr>
1589 |       <th>Train</th>
1590 |       <td>15.94</td>
1591 |       <td>25.90</td>
1592 |       <td>0.861</td>
1593 |     </tr>
1594 |     <tr>
1595 |       <th>Validation</th>
1596 |       <td>18.30</td>
1597 |       <td>27.46</td>
1598 |       <td>0.824</td>
1599 |     </tr>
1600 |     <tr>
1601 |       <th>Test</th>
1602 |       <td>25.82</td>
1603 |       <td>36.70</td>
1604 |       <td>0.613</td>
1605 |     </tr>
1606 |   </tbody>
1607 | </table>
1608 | </div>
1609 | 
1610 | 
1611 | 
1612 | 
1613 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_49_1.png)
1614 | 
1615 | 
1616 | # Result
1617 | 
1618 | The are three models:
1619 | - baseline
1620 | - baseline WTTE-RNN LSTM
1621 | - baseline WTTE-RNN GRU
1622 | 
1623 | The mean is used as the expected value of the RUL.
1624 | 
1625 | 
1626 | ```python
1627 | %matplotlib inline
1628 | import seaborn as sns
1629 | 
1630 | l = val_Y.flatten()
1631 | y_true = np.ma.compressed(np.ma.masked_where(l==0, l))
1632 | y_pred_baseline = np.ma.compressed(np.ma.masked_where(l==0, validation_baseline))
1633 | y_pred_wtte = np.ma.compressed(np.ma.masked_where(l==0, validation_wtte))
1634 | y_pred_gru = np.ma.compressed(np.ma.masked_where(l==0, validation_gru))
1635 | 
1636 | 
1637 | fig = plt.figure()
1638 | ax = fig.add_subplot(1, 1, 1)
1639 | ax.violinplot([y_pred_baseline - y_true,
1640 |                     y_pred_wtte - y_true,
1641 |                     y_pred_gru - y_true])
1642 | 
1643 | ax.set_xticklabels([])
1644 | plt.figtext(0.21, 0.1, ' Baseline')
1645 | plt.figtext(0.480, 0.1, ' Baseline WTTE')
1646 | plt.figtext(0.76, 0.1, ' Baseline GRU')
1647 | 
1648 | fig.set_size_inches(15, 10)
1649 | ```
1650 | 
1651 | 
1652 | ![png](rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_51_0.png)
1653 | 


--------------------------------------------------------------------------------
/_site/Dataset/PM_truth.txt:
--------------------------------------------------------------------------------
  1 | 112 
  2 | 98 
  3 | 69 
  4 | 82 
  5 | 91 
  6 | 93 
  7 | 91 
  8 | 95 
  9 | 111 
 10 | 96 
 11 | 97 
 12 | 124 
 13 | 95 
 14 | 107 
 15 | 83 
 16 | 84 
 17 | 50 
 18 | 28 
 19 | 87 
 20 | 16 
 21 | 57 
 22 | 111 
 23 | 113 
 24 | 20 
 25 | 145 
 26 | 119 
 27 | 66 
 28 | 97 
 29 | 90 
 30 | 115 
 31 | 8 
 32 | 48 
 33 | 106 
 34 | 7 
 35 | 11 
 36 | 19 
 37 | 21 
 38 | 50 
 39 | 142 
 40 | 28 
 41 | 18 
 42 | 10 
 43 | 59 
 44 | 109 
 45 | 114 
 46 | 47 
 47 | 135 
 48 | 92 
 49 | 21 
 50 | 79 
 51 | 114 
 52 | 29 
 53 | 26 
 54 | 97 
 55 | 137 
 56 | 15 
 57 | 103 
 58 | 37 
 59 | 114 
 60 | 100 
 61 | 21 
 62 | 54 
 63 | 72 
 64 | 28 
 65 | 128 
 66 | 14 
 67 | 77 
 68 | 8 
 69 | 121 
 70 | 94 
 71 | 118 
 72 | 50 
 73 | 131 
 74 | 126 
 75 | 113 
 76 | 10 
 77 | 34 
 78 | 107 
 79 | 63 
 80 | 90 
 81 | 8 
 82 | 9 
 83 | 137 
 84 | 58 
 85 | 118 
 86 | 89 
 87 | 116 
 88 | 115 
 89 | 136 
 90 | 28 
 91 | 38 
 92 | 20 
 93 | 85 
 94 | 55 
 95 | 128 
 96 | 137 
 97 | 82 
 98 | 59 
 99 | 117 
100 | 20 
101 | 


--------------------------------------------------------------------------------
/_site/Images/test_engine_sample.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/Images/test_engine_sample.gif


--------------------------------------------------------------------------------
/_site/Images/train_engine_sample.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/Images/train_engine_sample.gif


--------------------------------------------------------------------------------
/_site/Images/val_engine_sample.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/Images/val_engine_sample.gif


--------------------------------------------------------------------------------
/_site/Models/baseline_gru_model_weights:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/Models/baseline_gru_model_weights


--------------------------------------------------------------------------------
/_site/Models/baseline_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/Models/baseline_model


--------------------------------------------------------------------------------
/_site/Models/baseline_wtte_model_weights:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/Models/baseline_wtte_model_weights


--------------------------------------------------------------------------------
/_site/Turbofan Engine/regression_model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/Turbofan Engine/regression_model.h5


--------------------------------------------------------------------------------
/_site/assets/images/header_free.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/header_free.jpg


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_16_1.png


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_19_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_19_1.png


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_26_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_26_1.png


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_33_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_33_1.png


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_35_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_35_0.png


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_37_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_37_1.png


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_46_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_46_1.png


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_49_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_49_1.png


--------------------------------------------------------------------------------
/_site/assets/images/rnn-time-to-event-notebook_51_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/assets/images/rnn-time-to-event-notebook_51_0.png


--------------------------------------------------------------------------------
/_site/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.4.0
  2 | altair==2.2.2
  3 | astor==0.7.1
  4 | backports-abc==0.5
  5 | backports.functools-lru-cache==1.5
  6 | backports.shutil-get-terminal-size==1.0.0
  7 | backports.weakref==1.0.post1
  8 | beautifulsoup4==4.6.3
  9 | bleach==2.1.4
 10 | cachetools==2.1.0
 11 | certifi==2018.8.24
 12 | chardet==3.0.4
 13 | configparser==3.5.0
 14 | crcmod==1.7
 15 | cycler==0.10.0
 16 | decorator==4.3.0
 17 | entrypoints==0.2.3
 18 | enum34==1.1.6
 19 | funcsigs==1.0.2
 20 | functools32==3.2.3.post2
 21 | future==0.16.0
 22 | futures==3.2.0
 23 | gast==0.2.0
 24 | google-api-core==1.3.0
 25 | google-api-python-client==1.6.7
 26 | google-auth==1.4.2
 27 | google-auth-httplib2==0.0.3
 28 | google-auth-oauthlib==0.2.0
 29 | google-cloud-bigquery==1.1.0
 30 | google-cloud-core==0.28.1
 31 | google-cloud-language==1.0.2
 32 | google-cloud-storage==1.8.0
 33 | google-cloud-translate==1.3.1
 34 | google-colab==0.0.1a1
 35 | google-resumable-media==0.3.1
 36 | googleapis-common-protos==1.5.3
 37 | grpcio==1.14.1
 38 | h5py==2.8.0
 39 | html5lib==1.0.1
 40 | httplib2==0.11.3
 41 | idna==2.6
 42 | ipykernel==4.6.1
 43 | ipython==5.5.0
 44 | ipython-genutils==0.2.0
 45 | Jinja2==2.10
 46 | joblib==0.12.2
 47 | jsonschema==2.6.0
 48 | jupyter-client==5.2.3
 49 | jupyter-core==4.4.0
 50 | Keras==2.1.6
 51 | Markdown==2.6.11
 52 | MarkupSafe==1.0
 53 | matplotlib==2.1.2
 54 | mistune==0.8.3
 55 | mock==2.0.0
 56 | mpmath==1.0.0
 57 | nbconvert==5.3.1
 58 | nbformat==4.4.0
 59 | networkx==2.1
 60 | nltk==3.2.5
 61 | notebook==5.2.2
 62 | numpy==1.14.5
 63 | oauth2client==4.1.2
 64 | oauthlib==2.1.0
 65 | olefile==0.45.1
 66 | opencv-python==3.4.2.17
 67 | pandas==0.22.0
 68 | pandas-gbq==0.4.1
 69 | pandocfilters==1.4.2
 70 | pathlib2==2.3.2
 71 | patsy==0.5.0
 72 | pbr==4.2.0
 73 | pexpect==4.6.0
 74 | pickleshare==0.7.4
 75 | Pillow==4.0.0
 76 | plotly==1.12.12
 77 | portpicker==1.2.0
 78 | prompt-toolkit==1.0.15
 79 | protobuf==3.6.1
 80 | psutil==5.4.7
 81 | ptyprocess==0.6.0
 82 | pyasn1==0.4.4
 83 | pyasn1-modules==0.2.2
 84 | Pygments==2.1.3
 85 | pymc3==3.5
 86 | pyparsing==2.2.0
 87 | pystache==0.5.4
 88 | python-dateutil==2.5.3
 89 | pytz==2018.5
 90 | PyWavelets==0.5.2
 91 | PyYAML==3.13
 92 | pyzmq==16.0.4
 93 | requests==2.18.4
 94 | requests-oauthlib==1.0.0
 95 | rsa==3.4.2
 96 | scandir==1.9.0
 97 | scikit-image==0.13.1
 98 | scikit-learn==0.19.2
 99 | scipy==0.19.1
100 | seaborn==0.7.1
101 | simplegeneric==0.8.1
102 | singledispatch==3.4.0.3
103 | six==1.10.0
104 | statsmodels==0.8.0
105 | subprocess32==3.5.2
106 | sympy==1.1.1
107 | tensorboard==1.10.0
108 | tensorflow==1.10.0
109 | tensorflow-hub==0.1.1
110 | termcolor==1.1.0
111 | terminado==0.8.1
112 | testpath==0.3.1
113 | Theano==1.0.2
114 | toolz==0.9.0
115 | tornado==4.5.3
116 | tqdm==4.25.0
117 | traitlets==4.3.2
118 | typing==3.6.4
119 | uritemplate==3.0.0
120 | urllib3==1.22
121 | vega-datasets==0.5.0
122 | wcwidth==0.1.7
123 | webencodings==0.5.1
124 | Werkzeug==0.14.1
125 | wtte==1.1.1
126 | xgboost==0.7.post4
127 | 


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook.md:
--------------------------------------------------------------------------------
   1 | 
   2 | [View in Colaboratory](https://colab.research.google.com/github/Manelmc/rnn-time-to-event/blob/master/predictive-maintenance-turbofan-engine.ipynb)
   3 | 
   4 | # Predictive Maintenance for the Turbofan Engine Dataset
   5 | 
   6 | 
   7 | ## Data Preparation
   8 | 
   9 | 
  10 | ```python
  11 | import keras
  12 | import keras.backend as K
  13 | 
  14 | print "Keras version", keras.__version__
  15 | 
  16 | import pandas as pd
  17 | import numpy as np
  18 | import matplotlib.pyplot as plt
  19 | 
  20 | 
  21 | # Setting seed for reproducibility
  22 | SEED = 42
  23 | np.random.seed(SEED)  
  24 | ```
  25 | 
  26 |     Using TensorFlow backend.
  27 | 
  28 | 
  29 |     Keras version 2.1.6
  30 | 
  31 | 
  32 | 
  33 | ```python
  34 | !mkdir Dataset
  35 | !mkdir Models
  36 | 
  37 | !wget -q https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/master/Dataset/PM_test.txt -O Dataset/PM_test.txt
  38 | !wget -q https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/master/Dataset/PM_train.txt -O Dataset/PM_train.txt  
  39 | !wget -q https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/master/Dataset/PM_truth.txt -O Dataset/PM_truth.txt
  40 | 
  41 | !ls Dataset
  42 | ```
  43 | 
  44 |     PM_test.txt  PM_train.txt  PM_truth.txt
  45 | 
  46 | 
  47 | ### Turbofan Train Set
  48 | 
  49 | 
  50 | ```python
  51 | from sklearn import preprocessing
  52 | 
  53 | # read training data - It is the aircraft engine run-to-failure data.
  54 | train_df = pd.read_csv('Dataset/PM_train.txt', sep=" ", header=None)
  55 | train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)
  56 | train_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
  57 |                      's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
  58 |                      's15', 's16', 's17', 's18', 's19', 's20', 's21']
  59 | 
  60 | train_df = train_df.sort_values(['id','cycle'])
  61 | 
  62 | # Data Labeling - generate column RUL (Remaining Useful Life or Time to Failure)
  63 | rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()
  64 | rul.columns = ['id', 'max']
  65 | train_df = train_df.merge(rul, on=['id'], how='left')
  66 | train_df['RUL'] = train_df['max'] - train_df['cycle']
  67 | train_df.drop('max', axis=1, inplace=True)
  68 | 
  69 | # MinMax normalization (from 0 to 1)
  70 | train_df['cycle_norm'] = train_df['cycle']
  71 | cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])
  72 | min_max_scaler = preprocessing.MinMaxScaler()
  73 | norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]),
  74 |                              columns=cols_normalize,
  75 |                              index=train_df.index)
  76 | join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)
  77 | train_df = join_df.reindex(columns = train_df.columns)
  78 | 
  79 | train_df[train_df["id"] == 1].tail()
  80 | ```
  81 | 
  82 | 
  83 | 
  84 | 
  85 | <div>
  86 | <style scoped>
  87 |     .dataframe tbody tr th:only-of-type {
  88 |         vertical-align: middle;
  89 |     }
  90 | 
  91 |     .dataframe tbody tr th {
  92 |         vertical-align: top;
  93 |     }
  94 | 
  95 |     .dataframe thead th {
  96 |         text-align: right;
  97 |     }
  98 | </style>
  99 | <table border="1" class="dataframe">
 100 |   <thead>
 101 |     <tr style="text-align: right;">
 102 |       <th></th>
 103 |       <th>id</th>
 104 |       <th>cycle</th>
 105 |       <th>setting1</th>
 106 |       <th>setting2</th>
 107 |       <th>setting3</th>
 108 |       <th>s1</th>
 109 |       <th>s2</th>
 110 |       <th>s3</th>
 111 |       <th>s4</th>
 112 |       <th>s5</th>
 113 |       <th>...</th>
 114 |       <th>s14</th>
 115 |       <th>s15</th>
 116 |       <th>s16</th>
 117 |       <th>s17</th>
 118 |       <th>s18</th>
 119 |       <th>s19</th>
 120 |       <th>s20</th>
 121 |       <th>s21</th>
 122 |       <th>RUL</th>
 123 |       <th>cycle_norm</th>
 124 |     </tr>
 125 |   </thead>
 126 |   <tbody>
 127 |     <tr>
 128 |       <th>187</th>
 129 |       <td>1</td>
 130 |       <td>188</td>
 131 |       <td>0.114943</td>
 132 |       <td>0.750000</td>
 133 |       <td>0.0</td>
 134 |       <td>0.0</td>
 135 |       <td>0.765060</td>
 136 |       <td>0.683235</td>
 137 |       <td>0.684166</td>
 138 |       <td>0.0</td>
 139 |       <td>...</td>
 140 |       <td>0.091599</td>
 141 |       <td>0.753367</td>
 142 |       <td>0.0</td>
 143 |       <td>0.666667</td>
 144 |       <td>0.0</td>
 145 |       <td>0.0</td>
 146 |       <td>0.286822</td>
 147 |       <td>0.089202</td>
 148 |       <td>4</td>
 149 |       <td>0.518006</td>
 150 |     </tr>
 151 |     <tr>
 152 |       <th>188</th>
 153 |       <td>1</td>
 154 |       <td>189</td>
 155 |       <td>0.465517</td>
 156 |       <td>0.666667</td>
 157 |       <td>0.0</td>
 158 |       <td>0.0</td>
 159 |       <td>0.894578</td>
 160 |       <td>0.547853</td>
 161 |       <td>0.772451</td>
 162 |       <td>0.0</td>
 163 |       <td>...</td>
 164 |       <td>0.090670</td>
 165 |       <td>0.744132</td>
 166 |       <td>0.0</td>
 167 |       <td>0.583333</td>
 168 |       <td>0.0</td>
 169 |       <td>0.0</td>
 170 |       <td>0.263566</td>
 171 |       <td>0.301712</td>
 172 |       <td>3</td>
 173 |       <td>0.520776</td>
 174 |     </tr>
 175 |     <tr>
 176 |       <th>189</th>
 177 |       <td>1</td>
 178 |       <td>190</td>
 179 |       <td>0.344828</td>
 180 |       <td>0.583333</td>
 181 |       <td>0.0</td>
 182 |       <td>0.0</td>
 183 |       <td>0.731928</td>
 184 |       <td>0.614345</td>
 185 |       <td>0.737677</td>
 186 |       <td>0.0</td>
 187 |       <td>...</td>
 188 |       <td>0.065229</td>
 189 |       <td>0.759523</td>
 190 |       <td>0.0</td>
 191 |       <td>0.833333</td>
 192 |       <td>0.0</td>
 193 |       <td>0.0</td>
 194 |       <td>0.271318</td>
 195 |       <td>0.239299</td>
 196 |       <td>2</td>
 197 |       <td>0.523546</td>
 198 |     </tr>
 199 |     <tr>
 200 |       <th>190</th>
 201 |       <td>1</td>
 202 |       <td>191</td>
 203 |       <td>0.500000</td>
 204 |       <td>0.166667</td>
 205 |       <td>0.0</td>
 206 |       <td>0.0</td>
 207 |       <td>0.641566</td>
 208 |       <td>0.682799</td>
 209 |       <td>0.734639</td>
 210 |       <td>0.0</td>
 211 |       <td>...</td>
 212 |       <td>0.075704</td>
 213 |       <td>0.740669</td>
 214 |       <td>0.0</td>
 215 |       <td>0.500000</td>
 216 |       <td>0.0</td>
 217 |       <td>0.0</td>
 218 |       <td>0.240310</td>
 219 |       <td>0.324910</td>
 220 |       <td>1</td>
 221 |       <td>0.526316</td>
 222 |     </tr>
 223 |     <tr>
 224 |       <th>191</th>
 225 |       <td>1</td>
 226 |       <td>192</td>
 227 |       <td>0.551724</td>
 228 |       <td>0.500000</td>
 229 |       <td>0.0</td>
 230 |       <td>0.0</td>
 231 |       <td>0.701807</td>
 232 |       <td>0.662089</td>
 233 |       <td>0.758778</td>
 234 |       <td>0.0</td>
 235 |       <td>...</td>
 236 |       <td>0.056714</td>
 237 |       <td>0.717199</td>
 238 |       <td>0.0</td>
 239 |       <td>0.666667</td>
 240 |       <td>0.0</td>
 241 |       <td>0.0</td>
 242 |       <td>0.263566</td>
 243 |       <td>0.097625</td>
 244 |       <td>0</td>
 245 |       <td>0.529086</td>
 246 |     </tr>
 247 |   </tbody>
 248 | </table>
 249 | <p>5 rows × 28 columns</p>
 250 | </div>
 251 | 
 252 | 
 253 | 
 254 | ### Turbofan Test Set
 255 | 
 256 | 
 257 | ```python
 258 | from sklearn import preprocessing
 259 | 
 260 | # read test data - It is the aircraft engine operating data without failure events recorded.
 261 | test_df = pd.read_csv('Dataset/PM_test.txt', sep=" ", header=None)
 262 | test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)
 263 | test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
 264 |                      's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
 265 |                      's15', 's16', 's17', 's18', 's19', 's20', 's21']
 266 | 
 267 | # MinMax normalization (from 0 to 1)
 268 | test_df['cycle_norm'] = test_df['cycle']
 269 | norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]),
 270 |                             columns=cols_normalize,
 271 |                             index=test_df.index)
 272 | test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)
 273 | test_df = test_join_df.reindex(columns = test_df.columns)
 274 | test_df = test_df.reset_index(drop=True)
 275 | 
 276 | # read ground truth data - It contains the information of true remaining cycles for each engine in the testing data.
 277 | truth_df = pd.read_csv('Dataset/PM_truth.txt', sep=" ", header=None)
 278 | truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)
 279 | 
 280 | # generate column max for test data
 281 | rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()
 282 | rul.columns = ['id', 'max']
 283 | truth_df.columns = ['more']
 284 | truth_df['id'] = truth_df.index + 1
 285 | truth_df['max'] = rul['max'] + truth_df['more']
 286 | truth_df.drop('more', axis=1, inplace=True)
 287 | 
 288 | # generate RUL for test data
 289 | test_df = test_df.merge(truth_df, on=['id'], how='left')
 290 | test_df['RUL'] = test_df['max'] - test_df['cycle']
 291 | test_df.drop('max', axis=1, inplace=True)
 292 | 
 293 | test_df[test_df["id"] == 1].tail()
 294 | ```
 295 | 
 296 | 
 297 | 
 298 | 
 299 | <div>
 300 | <style scoped>
 301 |     .dataframe tbody tr th:only-of-type {
 302 |         vertical-align: middle;
 303 |     }
 304 | 
 305 |     .dataframe tbody tr th {
 306 |         vertical-align: top;
 307 |     }
 308 | 
 309 |     .dataframe thead th {
 310 |         text-align: right;
 311 |     }
 312 | </style>
 313 | <table border="1" class="dataframe">
 314 |   <thead>
 315 |     <tr style="text-align: right;">
 316 |       <th></th>
 317 |       <th>id</th>
 318 |       <th>cycle</th>
 319 |       <th>setting1</th>
 320 |       <th>setting2</th>
 321 |       <th>setting3</th>
 322 |       <th>s1</th>
 323 |       <th>s2</th>
 324 |       <th>s3</th>
 325 |       <th>s4</th>
 326 |       <th>s5</th>
 327 |       <th>...</th>
 328 |       <th>s14</th>
 329 |       <th>s15</th>
 330 |       <th>s16</th>
 331 |       <th>s17</th>
 332 |       <th>s18</th>
 333 |       <th>s19</th>
 334 |       <th>s20</th>
 335 |       <th>s21</th>
 336 |       <th>cycle_norm</th>
 337 |       <th>RUL</th>
 338 |     </tr>
 339 |   </thead>
 340 |   <tbody>
 341 |     <tr>
 342 |       <th>26</th>
 343 |       <td>1</td>
 344 |       <td>27</td>
 345 |       <td>0.459770</td>
 346 |       <td>0.583333</td>
 347 |       <td>0.0</td>
 348 |       <td>0.0</td>
 349 |       <td>0.262048</td>
 350 |       <td>0.340310</td>
 351 |       <td>0.304862</td>
 352 |       <td>0.0</td>
 353 |       <td>...</td>
 354 |       <td>0.140881</td>
 355 |       <td>0.479030</td>
 356 |       <td>0.0</td>
 357 |       <td>0.333333</td>
 358 |       <td>0.0</td>
 359 |       <td>0.0</td>
 360 |       <td>0.565891</td>
 361 |       <td>0.688898</td>
 362 |       <td>0.072022</td>
 363 |       <td>116</td>
 364 |     </tr>
 365 |     <tr>
 366 |       <th>27</th>
 367 |       <td>1</td>
 368 |       <td>28</td>
 369 |       <td>0.626437</td>
 370 |       <td>0.916667</td>
 371 |       <td>0.0</td>
 372 |       <td>0.0</td>
 373 |       <td>0.216867</td>
 374 |       <td>0.505995</td>
 375 |       <td>0.321404</td>
 376 |       <td>0.0</td>
 377 |       <td>...</td>
 378 |       <td>0.180359</td>
 379 |       <td>0.469796</td>
 380 |       <td>0.0</td>
 381 |       <td>0.333333</td>
 382 |       <td>0.0</td>
 383 |       <td>0.0</td>
 384 |       <td>0.534884</td>
 385 |       <td>0.629660</td>
 386 |       <td>0.074792</td>
 387 |       <td>115</td>
 388 |     </tr>
 389 |     <tr>
 390 |       <th>28</th>
 391 |       <td>1</td>
 392 |       <td>29</td>
 393 |       <td>0.580460</td>
 394 |       <td>0.583333</td>
 395 |       <td>0.0</td>
 396 |       <td>0.0</td>
 397 |       <td>0.222892</td>
 398 |       <td>0.351210</td>
 399 |       <td>0.267725</td>
 400 |       <td>0.0</td>
 401 |       <td>...</td>
 402 |       <td>0.171277</td>
 403 |       <td>0.370527</td>
 404 |       <td>0.0</td>
 405 |       <td>0.333333</td>
 406 |       <td>0.0</td>
 407 |       <td>0.0</td>
 408 |       <td>0.682171</td>
 409 |       <td>0.646092</td>
 410 |       <td>0.077562</td>
 411 |       <td>114</td>
 412 |     </tr>
 413 |     <tr>
 414 |       <th>29</th>
 415 |       <td>1</td>
 416 |       <td>30</td>
 417 |       <td>0.356322</td>
 418 |       <td>0.833333</td>
 419 |       <td>0.0</td>
 420 |       <td>0.0</td>
 421 |       <td>0.475904</td>
 422 |       <td>0.320035</td>
 423 |       <td>0.316003</td>
 424 |       <td>0.0</td>
 425 |       <td>...</td>
 426 |       <td>0.179843</td>
 427 |       <td>0.331281</td>
 428 |       <td>0.0</td>
 429 |       <td>0.250000</td>
 430 |       <td>0.0</td>
 431 |       <td>0.0</td>
 432 |       <td>0.736434</td>
 433 |       <td>0.707954</td>
 434 |       <td>0.080332</td>
 435 |       <td>113</td>
 436 |     </tr>
 437 |     <tr>
 438 |       <th>30</th>
 439 |       <td>1</td>
 440 |       <td>31</td>
 441 |       <td>0.465517</td>
 442 |       <td>0.833333</td>
 443 |       <td>0.0</td>
 444 |       <td>0.0</td>
 445 |       <td>0.412651</td>
 446 |       <td>0.221932</td>
 447 |       <td>0.281229</td>
 448 |       <td>0.0</td>
 449 |       <td>...</td>
 450 |       <td>0.155692</td>
 451 |       <td>0.298192</td>
 452 |       <td>0.0</td>
 453 |       <td>0.416667</td>
 454 |       <td>0.0</td>
 455 |       <td>0.0</td>
 456 |       <td>0.519380</td>
 457 |       <td>0.636564</td>
 458 |       <td>0.083102</td>
 459 |       <td>112</td>
 460 |     </tr>
 461 |   </tbody>
 462 | </table>
 463 | <p>5 rows × 28 columns</p>
 464 | </div>
 465 | 
 466 | 
 467 | 
 468 | ### Apply right padding to all the sequences
 469 | 
 470 | 
 471 | ```python
 472 | def pad_sequence(df, max_seq_length, mask=0):
 473 |     """
 474 |     Applies right padding to a sequences until max_seq_length with mask
 475 |     """
 476 |     return np.pad(df.values, ((0, max_seq_length - df.values.shape[0]), (0,0)),
 477 |                   "constant", constant_values=mask)
 478 | 
 479 | def pad_engines(df, cols, max_batch_len, mask=0):
 480 |     """
 481 |     Applies right padding to the columns "cols" of all the engines
 482 |     """
 483 |     return np.array([pad_sequence(df[df['id'] == batch_id][cols], max_batch_len, mask=mask)
 484 |                      for batch_id in df['id'].unique()])
 485 | 
 486 | max_batch_len = train_df['id'].value_counts().max()
 487 | train_cols = ['s' + str(i) for i in range(1,22)] + ['setting1', 'setting2', 'setting3', 'cycle_norm']
 488 | test_cols = ["RUL"]
 489 | 
 490 | X = pad_engines(train_df, train_cols, max_batch_len)
 491 | Y = pad_engines(train_df, test_cols, max_batch_len)
 492 | ```
 493 | 
 494 | ### Split into train, validation and test
 495 | 
 496 | 
 497 | ```python
 498 | from sklearn.model_selection import train_test_split
 499 | 
 500 | # Split into train and validation
 501 | train_X, val_X, train_Y, val_Y = train_test_split(X, Y, test_size=0.20, random_state=SEED)
 502 | 
 503 | # Test set from CMAPSS
 504 | test_X = pad_engines(test_df, train_cols, max_batch_len)
 505 | test_Y = pad_engines(test_df, test_cols, max_batch_len)
 506 | 
 507 | # In the WTTE-RNN architecture we will predict 2 parameters (alpha and beta)
 508 | # alpha is initialised to 1
 509 | train_Y_wtte = np.concatenate((train_Y, np.ones(train_Y.shape)), axis=2)
 510 | val_Y_wtte = np.concatenate((val_Y, np.ones(val_Y.shape)), axis=2)
 511 | test_Y_wtte = np.concatenate((test_Y, np.ones(test_Y.shape)), axis=2)
 512 | 
 513 | print "Train:\n", "  X:", train_X.shape, "\n  Y:", train_Y.shape, "\n  Y_wtte:", train_Y_wtte.shape
 514 | print "\nValidation:\n", "  X:", val_X.shape, "\n  Y:", val_Y.shape, "\n  Y_wtte:", val_Y_wtte.shape
 515 | print "\nTest:\n", "  X:", test_X.shape, "\n  Y:", test_Y.shape, "\n  Y_wtte:", test_Y_wtte.shape
 516 | ```
 517 | 
 518 |     Train:
 519 |       X: (80, 362, 25)
 520 |       Y: (80, 362, 1)
 521 |       Y_wtte: (80, 362, 2)
 522 | 
 523 |     Validation:
 524 |       X: (20, 362, 25)
 525 |       Y: (20, 362, 1)
 526 |       Y_wtte: (20, 362, 2)
 527 | 
 528 |     Test:
 529 |       X: (100, 362, 25)
 530 |       Y: (100, 362, 1)
 531 |       Y_wtte: (100, 362, 2)
 532 | 
 533 | 
 534 | ## Baseline
 535 | 
 536 | 
 537 | ```python
 538 | from keras.layers import Masking
 539 | from keras.layers.core import Activation
 540 | from keras.models import Sequential
 541 | from keras.layers import Dense, LSTM, TimeDistributed
 542 | from keras.callbacks import EarlyStopping, ModelCheckpoint
 543 | 
 544 | # Model path
 545 | baseline_path = "baseline_model"
 546 | 
 547 | # Callbacks
 548 | early_stopping = EarlyStopping(monitor='val_loss',
 549 |                                min_delta=0,
 550 |                                patience=30,
 551 |                                verbose=0,
 552 |                                mode='min')
 553 | checkpoint = ModelCheckpoint(baseline_path,
 554 |                              monitor='val_loss',
 555 |                              save_best_only=True,
 556 |                              mode='min',
 557 |                              verbose=0)
 558 | # dimensions of the model
 559 | nb_features = train_X.shape[2]
 560 | nb_out = train_Y.shape[2]
 561 | 
 562 | model = Sequential()
 563 | # Masking layer so the right padding is ignored
 564 | # at each layer of the network
 565 | model.add(Masking(mask_value=0.,
 566 |                   input_shape=(max_batch_len, nb_features)))
 567 | # Then there s an LSTM layer with 100 units
 568 | # Recurrent Dropout is also applied after each
 569 | # LSTM layer to control overfitting.
 570 | model.add(LSTM(
 571 |          units=100,
 572 |          recurrent_dropout=0.2,
 573 |          return_sequences=True))
 574 | # followed by another LSTM layer with 50 units
 575 | model.add(LSTM(
 576 |          units=50,
 577 |          recurrent_dropout=0.2,
 578 |          return_sequences=True))
 579 | # Final layer is a Time-Distributed Dense layer
 580 | # with a single unit with an Exponential activation
 581 | model.add(TimeDistributed(Dense(nb_out, activation=K.exp)))
 582 | model.compile(loss="mse", optimizer=keras.optimizers.RMSprop())
 583 | 
 584 | print(model.summary())
 585 | 
 586 | # fit the network
 587 | history = model.fit(train_X, train_Y, epochs=500, batch_size=16,
 588 |                     validation_data=(val_X, val_Y), shuffle=True,
 589 |                     verbose=2, callbacks = [early_stopping, checkpoint])
 590 | 
 591 | # list all data in history
 592 | print(history.history.keys())
 593 | ```
 594 | 
 595 |     _________________________________________________________________
 596 |     Layer (type)                 Output Shape              Param #   
 597 |     =================================================================
 598 |     masking_1 (Masking)          (None, 362, 25)           0         
 599 |     _________________________________________________________________
 600 |     lstm_1 (LSTM)                (None, 362, 100)          50400     
 601 |     _________________________________________________________________
 602 |     lstm_2 (LSTM)                (None, 362, 50)           30200     
 603 |     _________________________________________________________________
 604 |     time_distributed_1 (TimeDist (None, 362, 1)            51        
 605 |     =================================================================
 606 |     Total params: 80,651
 607 |     Trainable params: 80,651
 608 |     Non-trainable params: 0
 609 |     _________________________________________________________________
 610 |     ...
 611 |      - 14s - loss: 1145.8300 - val_loss: 684.7579
 612 |     Epoch 309/500
 613 |      - 15s - loss: 1483.2823 - val_loss: 665.0914
 614 |     Epoch 310/500
 615 |      - 15s - loss: 1484.7324 - val_loss: 676.9185
 616 |     Epoch 311/500
 617 |      - 15s - loss: 1204.1237 - val_loss: 621.4485
 618 |     Epoch 312/500
 619 |      - 15s - loss: 1293.4628 - val_loss: 611.2367
 620 |     Epoch 313/500
 621 |      - 15s - loss: 1410.6540 - val_loss: 599.2881
 622 |     Epoch 314/500
 623 |      - 15s - loss: 1280.4136 - val_loss: 651.2672
 624 |     Epoch 315/500
 625 |      - 15s - loss: 1233.0307 - val_loss: 634.8255
 626 |     Epoch 316/500
 627 |      - 15s - loss: 1339.8630 - val_loss: 702.0963
 628 |     Epoch 317/500
 629 |      - 14s - loss: 1249.2757 - val_loss: 789.5427
 630 |     Epoch 318/500
 631 |      - 15s - loss: 1364.1424 - val_loss: 834.3046
 632 |     ['loss', 'val_loss']
 633 | 
 634 | 
 635 | 
 636 | ```python
 637 | # Execute if training in Colaboratory (preferably from Chrome)
 638 | # Downloads the model after the training finishes
 639 | 
 640 | from google.colab import files
 641 | files.download(baseline_path)
 642 | 
 643 | # Move the model to the expected folder
 644 | !mv baseline_path Models/
 645 | ```
 646 | 
 647 | 
 648 | ```python
 649 | # Validation loss vs the Training loss
 650 | 
 651 | %matplotlib inline
 652 | 
 653 | plt.plot(history.history["loss"])
 654 | plt.plot(history.history["val_loss"])
 655 | ```
 656 | 
 657 | 
 658 | 
 659 | 
 660 |     [<matplotlib.lines.Line2D at 0x7f6039681c50>]
 661 | 
 662 | 
 663 | 
 664 | 
 665 | ![png](assets/images/rnn-time-to-event-notebook_16_1.png)
 666 | 
 667 | 
 668 | 
 669 | ```python
 670 | # Execute if you want to upload a model to Collaboratory
 671 | 
 672 | from google.colab import files
 673 | uploaded = files.upload()
 674 | 
 675 | for fn in uploaded.keys():
 676 |     print('User uploaded file "{name}" with length {length} bytes'.format(
 677 |       name=fn, length=len(uploaded[fn])))
 678 | ```
 679 | 
 680 | 
 681 | 
 682 |      <input type="file" id="files-f6e556f7-746f-4e94-b68a-9859a114544e" name="files[]" multiple disabled />
 683 |      <output id="result-f6e556f7-746f-4e94-b68a-9859a114544e">
 684 |       Upload widget is only available when the cell has been executed in the
 685 |       current browser session. Please rerun this cell to enable.
 686 |       </output>
 687 |       <script src="/nbextensions/google.colab/files.js"></script>
 688 | 
 689 | 
 690 | 
 691 | ```python
 692 | from keras.models import load_model
 693 | 
 694 | # It's important to load the model after the training
 695 | # The keras Checkpoint will save the best model in terms
 696 | # of the validation loss in the specified path
 697 | model = load_model("Models/" + baseline_path, custom_objects={"exp": K.exp})
 698 | ```
 699 | 
 700 | 
 701 | ```python
 702 | %matplotlib inline
 703 | from math import sqrt
 704 | 
 705 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 706 | 
 707 | # We save the validation errors to later compare the models
 708 | validation_baseline = model.predict(val_X).flatten()
 709 | 
 710 | def evaluate_and_plot(model, evaluation_data, weibull_function=None):
 711 |     """
 712 |     Generate scores dataframe and plot the RUL
 713 |     """
 714 |     fig = plt.figure()
 715 |     i = 1
 716 |     score_df = pd.DataFrame({"Method": ["MAE", "RMSE", "R2"]})
 717 |     for name_set, train_set, test_set in evaluation_data:
 718 |         if weibull_function is None:
 719 |             y_pred = model.predict(train_set).flatten()
 720 |         else:
 721 |             y_pred = [weibull_function(alpha, beta)
 722 |                       for batch in model.predict(train_set)
 723 |                       for beta, alpha in batch]
 724 |         l = test_set[:,:,0].flatten()
 725 |         # To validate we remove the right padding
 726 |         y_true = np.ma.compressed(np.ma.masked_where(l==0, l))
 727 |         y_pred = np.ma.compressed(np.ma.masked_where(l==0, y_pred))
 728 |         score_mae = "{0:.2f}".format(mean_absolute_error(y_true, y_pred))
 729 |         score_rmse = "{0:.2f}".format(sqrt(mean_squared_error(y_true, y_pred)))
 730 |         score_r2 = "{0:.3f}".format(r2_score(y_true, y_pred))
 731 |         score_df[name_set] = [score_mae, score_rmse, score_r2]
 732 |         ax = fig.add_subplot(6, 1, i)
 733 |         ax.title.set_text(name_set)
 734 |         ax.title.set_fontsize(20)
 735 |         i += 1
 736 |         plt.plot(y_pred[0:2500])
 737 |         plt.plot(y_true[0:2500])
 738 |         ax = fig.add_subplot(6, 1, i)
 739 |         i += 1
 740 |         plt.plot(y_pred[2500:5000])
 741 |         plt.plot(y_true[2500:5000])
 742 |     plt.subplots_adjust(hspace=0.45)
 743 |     fig.set_size_inches(15, i*2.2)
 744 |     return score_df.T
 745 | 
 746 | evaluate_and_plot(model,
 747 |                   [("Train", train_X, train_Y),
 748 |                    ("Validation", val_X, val_Y),
 749 |                    ("Test", test_X, test_Y)])
 750 | ```
 751 | 
 752 | 
 753 | 
 754 | 
 755 | <div>
 756 | <style scoped>
 757 |     .dataframe tbody tr th:only-of-type {
 758 |         vertical-align: middle;
 759 |     }
 760 | 
 761 |     .dataframe tbody tr th {
 762 |         vertical-align: top;
 763 |     }
 764 | 
 765 |     .dataframe thead th {
 766 |         text-align: right;
 767 |     }
 768 | </style>
 769 | <table border="1" class="dataframe">
 770 |   <thead>
 771 |     <tr style="text-align: right;">
 772 |       <th></th>
 773 |       <th>0</th>
 774 |       <th>1</th>
 775 |       <th>2</th>
 776 |     </tr>
 777 |   </thead>
 778 |   <tbody>
 779 |     <tr>
 780 |       <th>Method</th>
 781 |       <td>MAE</td>
 782 |       <td>RMSE</td>
 783 |       <td>R2</td>
 784 |     </tr>
 785 |     <tr>
 786 |       <th>Train</th>
 787 |       <td>21.19</td>
 788 |       <td>33.57</td>
 789 |       <td>0.766</td>
 790 |     </tr>
 791 |     <tr>
 792 |       <th>Validation</th>
 793 |       <td>17.36</td>
 794 |       <td>23.98</td>
 795 |       <td>0.866</td>
 796 |     </tr>
 797 |     <tr>
 798 |       <th>Test</th>
 799 |       <td>27.03</td>
 800 |       <td>37.41</td>
 801 |       <td>0.598</td>
 802 |     </tr>
 803 |   </tbody>
 804 | </table>
 805 | </div>
 806 | 
 807 | 
 808 | 
 809 | 
 810 | ![png](assets/images/rnn-time-to-event-notebook_19_1.png)
 811 | 
 812 | 
 813 | ## Adapting to WTTE-RNN
 814 | 
 815 | 
 816 | ```python
 817 | # Install wtte package from Martinsson
 818 | 
 819 | !pip install wtte
 820 | ```
 821 | 
 822 |     Collecting wtte
 823 |       Downloading https://files.pythonhosted.org/packages/95/0e/8affc53f47d4ceb69fc80484fd87ad886c6cab7f4ce0add38076b6092d76/wtte-1.1.1-py2.py3-none-any.whl
 824 |     Requirement already satisfied: scipy in /usr/local/lib/python2.7/dist-packages (from wtte) (0.19.1)
 825 |     Requirement already satisfied: numpy in /usr/local/lib/python2.7/dist-packages (from wtte) (1.14.5)
 826 |     Requirement already satisfied: keras>=2.0 in /usr/local/lib/python2.7/dist-packages (from wtte) (2.1.6)
 827 |     Requirement already satisfied: pandas in /usr/local/lib/python2.7/dist-packages (from wtte) (0.22.0)
 828 |     Collecting six==1.10.0 (from wtte)
 829 |       Downloading https://files.pythonhosted.org/packages/c8/0a/b6723e1bc4c516cb687841499455a8505b44607ab535be01091c0f24f079/six-1.10.0-py2.py3-none-any.whl
 830 |     Requirement already satisfied: pyyaml in /usr/local/lib/python2.7/dist-packages (from keras>=2.0->wtte) (3.13)
 831 |     Requirement already satisfied: h5py in /usr/local/lib/python2.7/dist-packages (from keras>=2.0->wtte) (2.8.0)
 832 |     Requirement already satisfied: pytz>=2011k in /usr/local/lib/python2.7/dist-packages (from pandas->wtte) (2018.5)
 833 |     Requirement already satisfied: python-dateutil in /usr/local/lib/python2.7/dist-packages (from pandas->wtte) (2.5.3)
 834 |     Installing collected packages: six, wtte
 835 |       Found existing installation: six 1.11.0
 836 |         Uninstalling six-1.11.0:
 837 |           Successfully uninstalled six-1.11.0
 838 |     Successfully installed six-1.10.0 wtte-1.1.1
 839 | 
 840 | 
 841 | 
 842 | ```python
 843 | # Loss and activation functions from Martinsson
 844 | # These are not used in the final version because
 845 | # the wtte package has useful regularization tools
 846 | 
 847 | def weibull_loglik_discrete(y_true, y_pred, epsilon=K.epsilon()):
 848 |     y = y_true[..., 0]
 849 |     u = y_true[..., 1]
 850 |     a = y_pred[..., 0]
 851 |     b = y_pred[..., 1]
 852 | 
 853 |     hazard0 = K.pow((y + epsilon) / a, b)
 854 |     hazard1 = K.pow((y + 1.0) / a, b)
 855 | 
 856 |     loss = u * K.log(K.exp(hazard1 - hazard0) - (1.0 - epsilon)) - hazard1
 857 |     return -loss
 858 | 
 859 | def activation_weibull(y_true):
 860 |     a = y_true[..., 0]
 861 |     b = y_true[..., 1]
 862 | 
 863 |     a = K.exp(a)
 864 |     b = K.sigmoid(b)
 865 |     return K.stack([a, b], axis=-1)
 866 | ```
 867 | 
 868 | 
 869 | ```python
 870 | from keras.layers import Masking
 871 | from keras.layers.core import Activation
 872 | from keras.models import Sequential
 873 | from keras.layers import Dense, LSTM, TimeDistributed, Lambda
 874 | from keras.callbacks import EarlyStopping, TerminateOnNaN, ModelCheckpoint
 875 | import wtte.weibull as weibull
 876 | import wtte.wtte as wtte
 877 | 
 878 | # Since we use a lambda in the last layer the model
 879 | # is not saved well in keras, instead we save the weights.
 880 | # This requires compiling the model to load the weights
 881 | baseline_wtte_path = "baseline_wtte_model_weights"
 882 | # Callbacks
 883 | early_stopping = EarlyStopping(monitor='val_loss',
 884 |                                min_delta=0,
 885 |                                patience=30,
 886 |                                verbose=0,
 887 |                                mode='min')
 888 | checkpoint = ModelCheckpoint(baseline_wtte_path,
 889 |                              monitor='val_loss',
 890 |                              save_best_only=True,
 891 |                              save_weights_only=True,
 892 |                              mode='min',
 893 |                              verbose=0)
 894 | 
 895 | nb_features = train_X.shape[2]
 896 | nb_out = train_Y.shape[1]
 897 | 
 898 | model = Sequential()
 899 | 
 900 | model.add(Masking(mask_value=0.,
 901 |                   input_shape=(max_batch_len, nb_features)))
 902 | model.add(LSTM(
 903 |          input_shape=(None, nb_features),
 904 |          units=100,
 905 |          recurrent_dropout=0.2,
 906 |          return_sequences=True))
 907 | model.add(LSTM(
 908 |           units=50,
 909 |           recurrent_dropout=0.2,
 910 |           return_sequences=True))
 911 | model.add(TimeDistributed(Dense(2)))
 912 | # uncomment this line and comment the next to use
 913 | # activation_weibull function:
 914 | # model.add(Activation(activation_weibull))
 915 | model.add(Lambda(wtte.output_lambda,
 916 |                  arguments={# Initialization value around it's scale
 917 |                             "init_alpha": np.nanmean(train_Y_wtte[:,0]),
 918 |                             # Set a maximum
 919 |                             "max_beta_value": 10.0
 920 |                            },
 921 |                 ))
 922 | # Same for the loss "weibull_loglik_discrete"
 923 | # model.compile(loss=weibull_loglik_discrete, optimizer='rmsprop')
 924 | # We use clipping on the loss
 925 | loss = wtte.Loss(kind='discrete', clip_prob=1e-5).loss_function
 926 | 
 927 | model.compile(loss=loss, optimizer='rmsprop')
 928 | ```
 929 | 
 930 | 
 931 | ```python
 932 | print(model.summary())
 933 | 
 934 | # fit the network
 935 | history = model.fit(train_X, train_Y_wtte, epochs=500, batch_size=16,
 936 |                     validation_data=(val_X, val_Y_wtte), shuffle=True, verbose=2,
 937 |                     callbacks = [early_stopping, checkpoint, TerminateOnNaN()])
 938 | 
 939 | # list all data in history
 940 | print(history.history.keys())
 941 | ```
 942 | 
 943 |     _________________________________________________________________
 944 |     Layer (type)                 Output Shape              Param #   
 945 |     =================================================================
 946 |     masking_4 (Masking)          (None, None, 25)          0         
 947 |     _________________________________________________________________
 948 |     lstm_7 (LSTM)                (None, None, 100)         50400     
 949 |     _________________________________________________________________
 950 |     lstm_8 (LSTM)                (None, None, 50)          30200     
 951 |     _________________________________________________________________
 952 |     time_distributed_4 (TimeDist (None, None, 2)           102       
 953 |     _________________________________________________________________
 954 |     lambda_2 (Lambda)            (None, None, 2)           0         
 955 |     =================================================================
 956 |     Total params: 80,702
 957 |     Trainable params: 80,702
 958 |     Non-trainable params: 0
 959 |     _________________________________________________________________
 960 |     ...
 961 |      - 12s - loss: 2.5586 - val_loss: 2.4429
 962 |     Epoch 353/500
 963 |      - 13s - loss: 2.5923 - val_loss: 2.5299
 964 |     Epoch 354/500
 965 |      - 12s - loss: 2.6591 - val_loss: 2.4070
 966 |     Epoch 355/500
 967 |      - 12s - loss: 2.5594 - val_loss: 2.5139
 968 |     Epoch 356/500
 969 |      - 13s - loss: 2.5870 - val_loss: 2.4082
 970 |     Epoch 357/500
 971 |      - 12s - loss: 2.6275 - val_loss: 2.4218
 972 |     ['loss', 'val_loss']
 973 | 
 974 | 
 975 | 
 976 | ```python
 977 | # Execute if training in Colaboratory (preferably from Chrome)
 978 | # Downloads the model after the training finishes
 979 | 
 980 | from google.colab import files
 981 | files.download(baseline_wtte_path)
 982 | 
 983 | # Move the model to the expected folder
 984 | !mv baseline_wtte_path Models/
 985 | ```
 986 | 
 987 | 
 988 | ```python
 989 | %matplotlib inline
 990 | 
 991 | 
 992 | plt.plot(history.history["loss"])
 993 | plt.plot(history.history["val_loss"])
 994 | ```
 995 | 
 996 | 
 997 | 
 998 | 
 999 |     [<matplotlib.lines.Line2D at 0x7f351865d990>]
1000 | 
1001 | 
1002 | 
1003 | 
1004 | ![png](assets/images/rnn-time-to-event-notebook_26_1.png)
1005 | 
1006 | 
1007 | 
1008 | ```python
1009 | # Execute if you want to upload a model to Collaboratory
1010 | 
1011 | from google.colab import files
1012 | uploaded = files.upload()
1013 | 
1014 | for fn in uploaded.keys():
1015 |     print('User uploaded file "{name}" with length {length} bytes'.format(
1016 |       name=fn, length=len(uploaded[fn])))
1017 | ```
1018 | 
1019 | 
1020 | 
1021 |      <input type="file" id="files-8f58d2a2-d3f6-43fa-93dc-a6fbf59eed70" name="files[]" multiple disabled />
1022 |      <output id="result-8f58d2a2-d3f6-43fa-93dc-a6fbf59eed70">
1023 |       Upload widget is only available when the cell has been executed in the
1024 |       current browser session. Please rerun this cell to enable.
1025 |       </output>
1026 |       <script src="/nbextensions/google.colab/files.js"></script>
1027 | 
1028 | 
1029 |     Saving baseline_wtte_model_weights (1) to baseline_wtte_model_weights (1)
1030 |     User uploaded file "baseline_wtte_model_weights (1)" with length 340528 bytes
1031 | 
1032 | 
1033 | 
1034 | ```python
1035 | # Compile model first to load weights
1036 | 
1037 | model.load_weights("Models/" + baseline_wtte_path)
1038 | ```
1039 | 
1040 | ### Weibull Methods
1041 | 
1042 | $\mu = \beta\Gamma(1 + \alpha^{-1})$
1043 | 
1044 | $\sigma^2 = \beta^2[\Gamma(1 + 2\alpha^{-1}) - \Gamma^2(1 + \alpha^{-1})]$
1045 | 
1046 | $mode = \beta\frac{\alpha-1}{\alpha}^{1/\alpha}$
1047 | 
1048 | Inverse CDF $ = \beta (-\log(1 - x))^\frac{1}{\alpha} $ when $ 0<x<1 $
1049 | 
1050 | 
1051 | ```python
1052 | from math import gamma, log, sqrt
1053 | 
1054 | def mean_weibull(alpha, beta):
1055 |     return beta*gamma(1 + 1./alpha)
1056 | 
1057 | def mode_weibull(alpha, beta):
1058 |     return beta*((alpha-1)/alpha)**(1./alpha) if alpha > 1 else 0
1059 | 
1060 | def median_weibull(alpha, beta):
1061 |     return beta*(log(2)**(1./alpha))
1062 | 
1063 | def var_weibull(alpha, beta):
1064 |     return beta**2*(gamma(1 + 2./alpha) - gamma(1 + 1./alpha)**2)
1065 | 
1066 | def pdf_weibull(x, alpha, beta):
1067 |     return (alpha/beta)*(x/beta)**(alpha - 1)*np.exp(-(x/beta)**alpha)
1068 | 
1069 | def inverse_cdf_weibull(x, alpha, beta):
1070 |     return beta*np.power((-np.log(1.-x)), 1./alpha)
1071 | 
1072 | def survival_weibull(x, alpha, beta):
1073 |     return np.e**-((x/beta)**alpha)
1074 | ```
1075 | 
1076 | ### Mean, Mode and Median
1077 | 
1078 | 
1079 | 
1080 | ![](https://upload.wikimedia.org/wikipedia/commons/thumb/3/33/Visualisation_mode_median_mean.svg/150px-Visualisation_mode_median_mean.svg.png)
1081 | 
1082 | 
1083 | ```python
1084 | %matplotlib inline
1085 | 
1086 | print "Mode"
1087 | print evaluate_and_plot(model,
1088 |                         [("Train", train_X, train_Y_wtte),
1089 |                          ("Validation", val_X, val_Y_wtte),
1090 |                          ("Test", test_X, test_Y_wtte)],
1091 |                         weibull_function = mode_weibull)
1092 | 
1093 | # comment the next line to visualise the plot for the mode
1094 | plt.close()
1095 | 
1096 | print "\nMedian"
1097 | print evaluate_and_plot(model,
1098 |                         [("Train", train_X, train_Y_wtte),
1099 |                          ("Validation", val_X, val_Y_wtte),
1100 |                          ("Test", test_X, test_Y_wtte)],
1101 |                         weibull_function = median_weibull)
1102 | 
1103 | # comment the next line to visualise the plot for the median
1104 | plt.close()
1105 | 
1106 | # We save the validation errors to later compare the models
1107 | validation_wtte = [mean_weibull(alpha, beta)
1108 |                    for batch in model.predict(val_X)
1109 |                    for beta, alpha in batch]
1110 | 
1111 | print "\nMean"
1112 | print evaluate_and_plot(model,
1113 |                         [("Train", train_X, train_Y_wtte),
1114 |                          ("Validation", val_X, val_Y_wtte),
1115 |                          ("Test", test_X, test_Y_wtte)],
1116 |                         weibull_function = mean_weibull)
1117 | ```
1118 | 
1119 |     Mode
1120 |                     0      1      2
1121 |     Method        MAE   RMSE     R2
1122 |     Train       21.53  34.69  0.750
1123 |     Validation  17.94  26.48  0.836
1124 |     Test        27.46  38.59  0.572
1125 | 
1126 |     Median
1127 |                     0      1      2
1128 |     Method        MAE   RMSE     R2
1129 |     Train       21.05  33.51  0.767
1130 |     Validation  17.79  25.48  0.848
1131 |     Test        26.72  37.49  0.596
1132 | 
1133 |     Mean
1134 |                     0      1      2
1135 |     Method        MAE   RMSE     R2
1136 |     Train       20.94  33.14  0.772
1137 |     Validation  17.79  25.26  0.851
1138 |     Test        26.51  37.22  0.602
1139 | 
1140 | 
1141 | 
1142 | ![png](assets/images/rnn-time-to-event-notebook_33_1.png)
1143 | 
1144 | 
1145 | ### Evolution of the pdf through the cycles of an engine (PLOT)
1146 | 
1147 | 
1148 | ```python
1149 | import random
1150 | 
1151 | import seaborn as sns
1152 | 
1153 | 
1154 | random.seed(SEED)
1155 | lot = random.sample(train_X, 3)
1156 | random.seed(SEED)
1157 | lot += random.sample(val_X, 3)
1158 | random.seed(SEED)
1159 | lot += random.sample(test_X, 3)
1160 | 
1161 | palette = list(reversed(sns.color_palette("RdBu_r", 250)))
1162 | 
1163 | fig = plt.figure()
1164 | j = 1
1165 | for batch in lot:
1166 |     size = batch[~np.all(batch == 0, axis=1)].shape[0]
1167 |     y_pred_wtte = model.predict(batch.reshape(1, max_batch_len, nb_features))[0]
1168 |     y_pred_wtte = y_pred_wtte[:size]
1169 |     x = np.arange(1, 400.)
1170 | 
1171 |     freq = 5
1172 |     ax = fig.add_subplot(3, 3, j)
1173 | 
1174 |     i=0
1175 |     for beta, alpha in y_pred_wtte[0::freq][2:]:
1176 |         mean = mode_weibull(alpha, beta)
1177 |         color=palette[int(mean)] if i < len(palette) else palette[-1]
1178 |         plt.plot(x, pdf_weibull(x, alpha, beta), color=color)
1179 |         i += 1
1180 |     ax.set_ylim([0, 0.07])
1181 |     ax.set_xlim([0, 300])
1182 |     ax.set_yticklabels([])
1183 |     if j == 2:
1184 |         ax.title.set_text("Train")
1185 |     elif j == 5:
1186 |         ax.title.set_text("Validation")
1187 |     elif j == 8:
1188 |         ax.title.set_text("Test")
1189 |     j += 1
1190 | 
1191 | plt.subplots_adjust(wspace=0.15, hspace=0.25)
1192 | fig.set_size_inches(10,10)
1193 | ```
1194 | 
1195 | 
1196 | ![png](assets/images/rnn-time-to-event-notebook_35_0.png)
1197 | 
1198 | 
1199 | ### Confidence Interval of the Weibull Distribution
1200 | 
1201 | 
1202 | ```python
1203 | %matplotlib inline
1204 | 
1205 | from scipy.stats import dweibull
1206 | 
1207 | batch = lot[0]
1208 | size = batch[~np.all(batch == 0, axis=1)].shape[0]
1209 | y_pred_wtte = model.predict(batch.reshape(1, max_batch_len, nb_features))[0]
1210 | y_pred_wtte = y_pred_wtte[:size]
1211 | 
1212 | fig = plt.figure()
1213 | fig.add_subplot(1,1,1)
1214 | for beta, alpha in y_pred_wtte[0::20]:
1215 |     x = np.arange(1, 300.)
1216 |     mean = mean_weibull(alpha, beta)
1217 |     sigma = np.sqrt(var_weibull(alpha, beta))
1218 |     plt.plot(x, pdf_weibull(x, alpha, beta), color=palette[int(mean)])
1219 |     # alpha is the shape parameter
1220 |     conf = dweibull.interval(0.95, alpha, loc=mean, scale=sigma)
1221 |     plt.fill([conf[0]] + list(np.arange(conf[0], conf[1])) + [conf[1]],
1222 |              [0] + list(pdf_weibull(np.arange(conf[0], conf[1]), alpha, beta)) + [0],
1223 |              color=palette[int(mean)], alpha=0.5)
1224 | 
1225 | axes = plt.gca()
1226 | axes.set_ylim([0., 0.06])
1227 | axes.set_xlim([0., 300.])
1228 | fig.set_size_inches(10,5)
1229 | ```
1230 | 
1231 |     /anaconda2/envs/ALL_BF/lib/python2.7/site-packages/ipykernel_launcher.py:16: RuntimeWarning: invalid value encountered in power
1232 |       app.launch_new_instance()
1233 | 
1234 | 
1235 | 
1236 | ![png](assets/images/rnn-time-to-event-notebook_37_1.png)
1237 | 
1238 | 
1239 | ### Evolution of the pdf through the cycles of an engine (GIFs)
1240 | 
1241 | 
1242 | ```python
1243 | import sys
1244 | import random
1245 | from math import gamma
1246 | 
1247 | from matplotlib.animation import FuncAnimation
1248 | from scipy.stats import dweibull
1249 | 
1250 | 
1251 | def generate_gif(y_pred, y_true, path, freq=2):
1252 |     # remove mask if exists
1253 |     y_true = y_true[y_true != 0]
1254 |     y_pred = y_pred[:y_true.shape[0]]
1255 | 
1256 |     frames = zip(y_true, y_pred)
1257 | 
1258 |     # pad, w_pad, h_pad, and rect
1259 |     fig = plt.figure()
1260 |     global ax1, ax2
1261 |     ax1 = fig.add_subplot(1,2,1)
1262 |     ax2 = fig.add_subplot(1,2,2)
1263 |     fig.set_tight_layout(True)
1264 |     x = np.arange(1, 300.)
1265 |     beta, alpha = y_pred[0]
1266 |     line1, = ax1.plot(x, pdf_weibull(x, alpha, beta))
1267 |     global i, acc_y_true, acc_y_pred
1268 |     i = 0
1269 |     predict_mean = mean_weibull(alpha, beta)
1270 |     ax2.plot(i, y_true[0], 'bo', label="True", ms=2.5)
1271 |     ax2.plot(i, predict_mean, 'o', color="orange", label="Predicted", ms=2.5)
1272 |     ax2.legend(loc="upper right")
1273 |     # limits
1274 |     ax1.set_ylim([0, 0.07])
1275 |     ax2.set_ylim([0, y_true[0] + 10])
1276 |     ax2.set_xlim([0, len(frames)/freq + 2])
1277 |     ax2.set_xticklabels([])
1278 |     # acc values
1279 |     acc_y_true = []
1280 |     acc_y_pred = []
1281 | 
1282 |     def update(instant):
1283 |         y_true_t, y_pred_t = instant
1284 |         beta, alpha = y_pred_t
1285 |         # print y_true
1286 |         pdf = pdf_weibull(x, alpha, beta)
1287 |         line1.set_ydata(pdf)
1288 |         global i, acc_y_true, acc_y_pred
1289 |         i += 1
1290 |         mean = mean_weibull(alpha, beta)
1291 |         sigma = np.sqrt(var_weibull(alpha, beta))
1292 |         acc_y_pred += [mean]
1293 |         acc_y_true += [y_true_t]
1294 |         ax2.plot(range(len(acc_y_true)), acc_y_true, 'b', label="True")
1295 |         ax2.plot(range(len(acc_y_pred)), acc_y_pred, color="orange", label="Predicted")
1296 |         conf = dweibull.interval(0.95, alpha, loc=mean, scale=sigma)
1297 |         ax1.set_title("PDF Weibull Distrib. (Mean: " + "{0:.1f}".format(mean)
1298 |                      + ", Std: " + "{0:.1f}".format(sigma) + ")"
1299 |                      + " CI 95%: [{0:.1f}, {1:.1f}]".format(*conf))
1300 |         ax2.set_title("Real RUL: " + str(y_true_t) + " cycles")
1301 | 
1302 |     fig.set_size_inches(15,4)
1303 |     anim = FuncAnimation(fig, update, frames=frames[0::freq])
1304 |     anim.save(path, writer="imagemagick")
1305 |     plt.close()
1306 | 
1307 | random.seed(SEED)
1308 | batch_X, batch_Y = random.choice(zip(train_X, train_Y))
1309 | y_pred_wtte = model.predict(batch_X.reshape(1, max_batch_len, nb_features))[0]
1310 | gif_path = "Images/train_engine_sample.gif"
1311 | generate_gif(y_pred_wtte, batch_Y, gif_path, freq=2)
1312 | 
1313 | print "Train Sample"
1314 | from IPython.display import HTML
1315 | HTML('<img src="'+ gif_path + '">')
1316 | ```
1317 | 
1318 |     Train Sample
1319 | 
1320 | 
1321 | 
1322 | 
1323 | 
1324 | <img src="Images/train_engine_sample.gif">
1325 | 
1326 | 
1327 | 
1328 | 
1329 | ```python
1330 | random.seed(SEED)
1331 | batch_X, batch_Y = random.choice(zip(val_X, val_Y))
1332 | y_pred_wtte = model.predict(batch_X.reshape(1, max_batch_len, nb_features))[0]
1333 | gif_path = "Images/val_engine_sample.gif"
1334 | generate_gif(y_pred_wtte, batch_Y, gif_path, freq=2)
1335 | 
1336 | print "Validation Sample"
1337 | from IPython.display import HTML
1338 | HTML('<img src="'+ gif_path + '">')
1339 | ```
1340 | 
1341 |     Validation Sample
1342 | 
1343 | 
1344 | 
1345 | 
1346 | 
1347 | <img src="Images/val_engine_sample.gif">
1348 | 
1349 | 
1350 | 
1351 | 
1352 | ```python
1353 | random.seed(SEED)
1354 | batch_X, batch_Y = random.choice(zip(test_X, test_Y))
1355 | y_pred_wtte = model.predict(batch_X.reshape(1, max_batch_len, nb_features))[0]
1356 | gif_path = "Images/test_engine_sample.gif"
1357 | generate_gif(y_pred_wtte, batch_Y, gif_path, freq=2)
1358 | 
1359 | print "Test Sample"
1360 | from IPython.display import HTML
1361 | HTML('<img src="'+ gif_path + '">')
1362 | ```
1363 | 
1364 |     Test Sample
1365 | 
1366 | 
1367 | 
1368 | 
1369 | 
1370 | <img src="Images/test_engine_sample.gif">
1371 | 
1372 | 
1373 | 
1374 | ## GRU variant
1375 | 
1376 | 
1377 | ```python
1378 | from keras.layers import Masking
1379 | from keras.layers.core import Activation
1380 | from keras.models import Sequential
1381 | from keras.layers import Dense, GRU, TimeDistributed, Lambda
1382 | from keras.callbacks import EarlyStopping, TerminateOnNaN, ModelCheckpoint
1383 | import wtte.weibull as weibull
1384 | import wtte.wtte as wtte
1385 | 
1386 | baseline_gru_path = "baseline_gru_model_weights"
1387 | 
1388 | # Callbacks
1389 | early_stopping = EarlyStopping(monitor='val_loss',
1390 |                                min_delta=0,
1391 |                                patience=30,
1392 |                                verbose=0,
1393 |                                mode='min')
1394 | checkpoint = ModelCheckpoint(baseline_gru_path,
1395 |                              monitor='val_loss',
1396 |                              save_best_only=True,
1397 |                              save_weights_only=True,
1398 |                              mode='min',
1399 |                              verbose=0)
1400 | 
1401 | nb_features = train_X.shape[2]
1402 | nb_out = train_Y.shape[1]
1403 | 
1404 | init_alpha = np.nanmean(train_Y_wtte[:,0])
1405 | 
1406 | model = Sequential()
1407 | model.add(Masking(mask_value=0.,
1408 |                   input_shape=(max_batch_len, nb_features)))
1409 | # We substitute LSTM for GRU
1410 | model.add(GRU(
1411 |          input_shape=(None, nb_features),
1412 |          units=100,
1413 |          recurrent_dropout=0.2,
1414 |          return_sequences=True))
1415 | model.add(GRU(
1416 |           units=50,
1417 |           recurrent_dropout=0.2,
1418 |           return_sequences=True))
1419 | model.add(TimeDistributed(Dense(2)))
1420 | model.add(Lambda(wtte.output_lambda,
1421 |                  arguments={# Initialization value around it's scale
1422 |                             "init_alpha": np.nanmean(train_Y_wtte[:,0]),
1423 |                             # Set a maximum
1424 |                             "max_beta_value": 10.0,
1425 |                             # We set the scalefactor to avoid exploding gradients
1426 |                             "scalefactor": 0.25
1427 |                            },
1428 |                 ))
1429 | loss = wtte.Loss(kind='discrete', clip_prob=1e-5).loss_function
1430 | model.compile(loss=loss, optimizer='rmsprop')
1431 | ```
1432 | 
1433 | 
1434 | ```python
1435 | print(model.summary())
1436 | 
1437 | # fit the network
1438 | history = model.fit(train_X, train_Y_wtte, epochs=500, batch_size=16,
1439 |                     validation_data=(val_X, val_Y_wtte), shuffle=True, verbose=2,
1440 |                     callbacks = [early_stopping, checkpoint, TerminateOnNaN()])
1441 | 
1442 | # list all data in history
1443 | print(history.history.keys())
1444 | ```
1445 | 
1446 |     _________________________________________________________________
1447 |     Layer (type)                 Output Shape              Param #   
1448 |     =================================================================
1449 |     masking_6 (Masking)          (None, None, 25)          0         
1450 |     _________________________________________________________________
1451 |     gru_6 (GRU)                  (None, None, 100)         37800     
1452 |     _________________________________________________________________
1453 |     gru_7 (GRU)                  (None, None, 50)          22650     
1454 |     _________________________________________________________________
1455 |     time_distributed_5 (TimeDist (None, None, 2)           102       
1456 |     _________________________________________________________________
1457 |     lambda_5 (Lambda)            (None, None, 2)           0         
1458 |     =================================================================
1459 |     Total params: 60,552
1460 |     Trainable params: 60,552
1461 |     Non-trainable params: 0
1462 |     _________________________________________________________________
1463 | 
1464 |     ...
1465 |     Epoch 379/500
1466 |      - 4s - loss: 2.5791 - val_loss: 2.4811
1467 |     Epoch 380/500
1468 |      - 4s - loss: 2.4674 - val_loss: 2.3694
1469 |     Epoch 381/500
1470 |      - 4s - loss: 2.4272 - val_loss: 2.3636
1471 |     Epoch 382/500
1472 |      - 4s - loss: 2.4483 - val_loss: 2.4244
1473 |     Epoch 383/500
1474 |      - 4s - loss: 2.4518 - val_loss: 2.4219
1475 |     Epoch 384/500
1476 |      - 4s - loss: 2.4448 - val_loss: 2.3649
1477 |     Epoch 385/500
1478 |      - 4s - loss: 2.5142 - val_loss: 2.3681
1479 |     Epoch 386/500
1480 |      - 4s - loss: 2.4157 - val_loss: 2.4423
1481 |     ['loss', 'val_loss']
1482 | 
1483 | 
1484 | 
1485 | ```python
1486 | # Execute if training in Colaboratory (preferably from Chrome)
1487 | # Downloads the model after the training finishes
1488 | 
1489 | from google.colab import files
1490 | files.download(baseline_gru_path)
1491 | 
1492 | # Move the model to the expected folder
1493 | !mv baseline_gru_path Models/
1494 | ```
1495 | 
1496 | 
1497 | ```python
1498 | %matplotlib inline
1499 | 
1500 | plt.plot(history.history["loss"], color="blue")
1501 | plt.plot(history.history["val_loss"], color="green")
1502 | ```
1503 | 
1504 | 
1505 | 
1506 | 
1507 |     [<matplotlib.lines.Line2D at 0x1a353fcf10>]
1508 | 
1509 | 
1510 | 
1511 | 
1512 | ![png](assets/images/rnn-time-to-event-notebook_46_1.png)
1513 | 
1514 | 
1515 | 
1516 | ```python
1517 | # Execute if you want to upload a model to Collaboratory
1518 | 
1519 | from google.colab import files
1520 | uploaded = files.upload()
1521 | 
1522 | for fn in uploaded.keys():
1523 |     print('User uploaded file "{name}" with length {length} bytes'.format(
1524 |       name=fn, length=len(uploaded[fn])))
1525 | ```
1526 | 
1527 | 
1528 | ```python
1529 | # Compile model first to load weights
1530 | 
1531 | model.load_weights("Models/" + baseline_gru_path)
1532 | ```
1533 | 
1534 | 
1535 | ```python
1536 | # We save the validation errors to later compare the models
1537 | validation_gru = [mean_weibull(alpha, beta)
1538 |                    for batch in model.predict(val_X)
1539 |                    for beta, alpha in batch]
1540 | 
1541 | evaluate_and_plot(model,
1542 |                   [("Train", train_X, train_Y_wtte),
1543 |                    ("Validation", val_X, val_Y_wtte),
1544 |                    ("Test", test_X, test_Y_wtte)],
1545 |                   weibull_function = mean_weibull)
1546 | ```
1547 | 
1548 | 
1549 | 
1550 | 
1551 | <div>
1552 | <style scoped>
1553 |     .dataframe tbody tr th:only-of-type {
1554 |         vertical-align: middle;
1555 |     }
1556 | 
1557 |     .dataframe tbody tr th {
1558 |         vertical-align: top;
1559 |     }
1560 | 
1561 |     .dataframe thead th {
1562 |         text-align: right;
1563 |     }
1564 | </style>
1565 | <table border="1" class="dataframe">
1566 |   <thead>
1567 |     <tr style="text-align: right;">
1568 |       <th></th>
1569 |       <th>0</th>
1570 |       <th>1</th>
1571 |       <th>2</th>
1572 |     </tr>
1573 |   </thead>
1574 |   <tbody>
1575 |     <tr>
1576 |       <th>Method</th>
1577 |       <td>MAE</td>
1578 |       <td>RMSE</td>
1579 |       <td>R2</td>
1580 |     </tr>
1581 |     <tr>
1582 |       <th>Train</th>
1583 |       <td>20.94</td>
1584 |       <td>33.14</td>
1585 |       <td>0.772</td>
1586 |     </tr>
1587 |     <tr>
1588 |       <th>Validation</th>
1589 |       <td>17.79</td>
1590 |       <td>25.26</td>
1591 |       <td>0.851</td>
1592 |     </tr>
1593 |     <tr>
1594 |       <th>Test</th>
1595 |       <td>26.51</td>
1596 |       <td>37.22</td>
1597 |       <td>0.602</td>
1598 |     </tr>
1599 |   </tbody>
1600 | </table>
1601 | </div>
1602 | 
1603 | 
1604 | 
1605 | 
1606 | ![png](assets/images/rnn-time-to-event-notebook_49_1.png)
1607 | 
1608 | 
1609 | # Result
1610 | 
1611 | The are three models:
1612 | - baseline
1613 | - baseline WTTE-RNN LSTM
1614 | - baseline WTTE-RNN GRU
1615 | 
1616 | The mean is used as the expected value of the RUL.
1617 | 
1618 | 
1619 | ```python
1620 | %matplotlib inline
1621 | import seaborn as sns
1622 | 
1623 | l = val_Y.flatten()
1624 | y_true = np.ma.compressed(np.ma.masked_where(l==0, l))
1625 | y_pred_baseline = np.ma.compressed(np.ma.masked_where(l==0, validation_baseline))
1626 | y_pred_wtte = np.ma.compressed(np.ma.masked_where(l==0, validation_wtte))
1627 | y_pred_gru = np.ma.compressed(np.ma.masked_where(l==0, validation_gru))
1628 | 
1629 | 
1630 | fig = plt.figure()
1631 | ax = fig.add_subplot(1, 1, 1)
1632 | ax.violinplot([y_pred_baseline - y_true,
1633 |                     y_pred_wtte - y_true,
1634 |                     y_pred_gru - y_true])
1635 | 
1636 | ax.set_xticklabels([])
1637 | plt.figtext(0.21, 0.1, ' Baseline')
1638 | plt.figtext(0.480, 0.1, ' Baseline WTTE')
1639 | plt.figtext(0.76, 0.1, ' Baseline GRU')
1640 | 
1641 | fig.set_size_inches(15, 10)
1642 | ```
1643 | 
1644 | 
1645 | ![png](assets/images/rnn-time-to-event-notebook_51_0.png)
1646 | 
1647 | 
1648 | 
1649 | ```python
1650 | 
1651 | ```
1652 | 


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_16_1.png


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_19_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_19_1.png


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_26_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_26_1.png


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_33_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_33_1.png


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_35_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_35_0.png


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_37_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_37_1.png


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_46_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_46_1.png


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_49_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_49_1.png


--------------------------------------------------------------------------------
/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_51_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/_site/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_51_0.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.4.0
  2 | altair==2.2.2
  3 | astor==0.7.1
  4 | backports-abc==0.5
  5 | backports.functools-lru-cache==1.5
  6 | backports.shutil-get-terminal-size==1.0.0
  7 | backports.weakref==1.0.post1
  8 | beautifulsoup4==4.6.3
  9 | bleach==2.1.4
 10 | cachetools==2.1.0
 11 | certifi==2018.8.24
 12 | chardet==3.0.4
 13 | configparser==3.5.0
 14 | crcmod==1.7
 15 | cycler==0.10.0
 16 | decorator==4.3.0
 17 | entrypoints==0.2.3
 18 | enum34==1.1.6
 19 | funcsigs==1.0.2
 20 | functools32==3.2.3.post2
 21 | future==0.16.0
 22 | futures==3.2.0
 23 | gast==0.2.0
 24 | google-api-core==1.3.0
 25 | google-api-python-client==1.6.7
 26 | google-auth==1.4.2
 27 | google-auth-httplib2==0.0.3
 28 | google-auth-oauthlib==0.2.0
 29 | google-cloud-bigquery==1.1.0
 30 | google-cloud-core==0.28.1
 31 | google-cloud-language==1.0.2
 32 | google-cloud-storage==1.8.0
 33 | google-cloud-translate==1.3.1
 34 | google-colab==0.0.1a1
 35 | google-resumable-media==0.3.1
 36 | googleapis-common-protos==1.5.3
 37 | grpcio==1.14.1
 38 | h5py==2.8.0
 39 | html5lib==1.0.1
 40 | httplib2==0.11.3
 41 | idna==2.6
 42 | ipykernel==4.6.1
 43 | ipython==5.5.0
 44 | ipython-genutils==0.2.0
 45 | Jinja2==2.10
 46 | joblib==0.12.2
 47 | jsonschema==2.6.0
 48 | jupyter-client==5.2.3
 49 | jupyter-core==4.4.0
 50 | Keras==2.1.6
 51 | Markdown==2.6.11
 52 | MarkupSafe==1.0
 53 | matplotlib==2.1.2
 54 | mistune==0.8.3
 55 | mock==2.0.0
 56 | mpmath==1.0.0
 57 | nbconvert==5.3.1
 58 | nbformat==4.4.0
 59 | networkx==2.1
 60 | nltk==3.2.5
 61 | notebook==5.2.2
 62 | numpy==1.14.5
 63 | oauth2client==4.1.2
 64 | oauthlib==2.1.0
 65 | olefile==0.45.1
 66 | opencv-python==3.4.2.17
 67 | pandas==0.22.0
 68 | pandas-gbq==0.4.1
 69 | pandocfilters==1.4.2
 70 | pathlib2==2.3.2
 71 | patsy==0.5.0
 72 | pbr==4.2.0
 73 | pexpect==4.6.0
 74 | pickleshare==0.7.4
 75 | Pillow==4.0.0
 76 | plotly==1.12.12
 77 | portpicker==1.2.0
 78 | prompt-toolkit==1.0.15
 79 | protobuf==3.6.1
 80 | psutil==5.4.7
 81 | ptyprocess==0.6.0
 82 | pyasn1==0.4.4
 83 | pyasn1-modules==0.2.2
 84 | Pygments==2.1.3
 85 | pymc3==3.5
 86 | pyparsing==2.2.0
 87 | pystache==0.5.4
 88 | python-dateutil==2.5.3
 89 | pytz==2018.5
 90 | PyWavelets==0.5.2
 91 | PyYAML==3.13
 92 | pyzmq==16.0.4
 93 | requests==2.18.4
 94 | requests-oauthlib==1.0.0
 95 | rsa==3.4.2
 96 | scandir==1.9.0
 97 | scikit-image==0.13.1
 98 | scikit-learn==0.19.2
 99 | scipy==0.19.1
100 | seaborn==0.7.1
101 | simplegeneric==0.8.1
102 | singledispatch==3.4.0.3
103 | six==1.10.0
104 | statsmodels==0.8.0
105 | subprocess32==3.5.2
106 | sympy==1.1.1
107 | tensorboard==1.10.0
108 | tensorflow==1.10.0
109 | tensorflow-hub==0.1.1
110 | termcolor==1.1.0
111 | terminado==0.8.1
112 | testpath==0.3.1
113 | Theano==1.0.2
114 | toolz==0.9.0
115 | tornado==4.5.3
116 | tqdm==4.25.0
117 | traitlets==4.3.2
118 | typing==3.6.4
119 | uritemplate==3.0.0
120 | urllib3==1.22
121 | vega-datasets==0.5.0
122 | wcwidth==0.1.7
123 | webencodings==0.5.1
124 | Werkzeug==0.14.1
125 | wtte==1.1.1
126 | xgboost==0.7.post4
127 | 


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_16_1.png


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_19_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_19_1.png


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_26_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_26_1.png


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_33_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_33_1.png


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_35_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_35_0.png


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_37_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_37_1.png


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_46_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_46_1.png


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_49_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_49_1.png


--------------------------------------------------------------------------------
/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_51_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/rnn-time-to-event-notebook_files/rnn-time-to-event-notebook_51_0.png


--------------------------------------------------------------------------------
/slides/RNN - Time To Event.odp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/slides/RNN - Time To Event.odp


--------------------------------------------------------------------------------
/slides/RNN - Time To Event.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/slides/RNN - Time To Event.pdf


--------------------------------------------------------------------------------
/slides/RNN - Time To Event.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Manelmc/rnn-time-to-event/274055bbf537695958657c4bc701c70843c79698/slides/RNN - Time To Event.pptx


--------------------------------------------------------------------------------