├── .gitignore
├── Project_Report.pdf
├── src
    ├── ensemble
    │   ├── utils.py
    │   ├── preprocess.py
    │   └── main.py
    ├── critical_point
    │   ├── utils.py
    │   ├── config.py
    │   ├── main.py
    │   └── preprocess.py
    └── rnn
    │   ├── config.py
    │   ├── main.py
    │   ├── model.py
    │   └── preprocess.py
├── notebooks
    ├── rnn_results_generator.ipynb
    ├── ensembling-approach.ipynb
    ├── critical-point-approaches.ipynb
    └── rnn-approaches.ipynb
├── README.md
└── data
    └── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | /data/output
2 | /data/input
3 | /**/__pycache__
4 | /**/catboost_info


--------------------------------------------------------------------------------
/Project_Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-pk/RUL-Prediction-of-Li-ion-Batteries/HEAD/Project_Report.pdf


--------------------------------------------------------------------------------
/src/ensemble/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import mean_squared_error
 3 | 
 4 | 
 5 | def get_scores(y_true, y_pred):
 6 |     return np.sqrt(mean_squared_error(y_true, y_pred))
 7 | 
 8 | 
 9 | def get_preds(model, data_x):
10 |     return model.predict(data_x).clip(min=0)
11 | 


--------------------------------------------------------------------------------
/src/critical_point/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import mean_squared_error
 3 | 
 4 | 
 5 | def get_scores(y_true, y_pred):
 6 |     return np.sqrt(mean_squared_error(y_true, y_pred))
 7 | 
 8 | 
 9 | def get_preds(model, data_x):
10 |     return model.predict(data_x).clip(min=0)
11 | 


--------------------------------------------------------------------------------
/src/critical_point/config.py:
--------------------------------------------------------------------------------
 1 | experiment1 = ["B0005", "B0006", "B0007", "B0018"]
 2 | experiment2 = ["B0025", "B0026", "B0027", "B0028"]
 3 | experiment3 = ["B0029", "B0030", "B0031", "B0032"]
 4 | experiment4 = ["B0033", "B0034", "B0036"]
 5 | experiment5 = ["B0038", "B0039", "B0040"]
 6 | experiment6 = ["B0041", "B0042", "B0043", "B0044"]
 7 | experiment7 = ["B0045", "B0046", "B0047", "B0048"]
 8 | experiment8 = ["B0049", "B0050", "B0051", "B0052"]
 9 | experiment9 = ["B0053", "B0054", "B0055", "B0056"]
10 | 


--------------------------------------------------------------------------------
/src/rnn/config.py:
--------------------------------------------------------------------------------
 1 | # Original Parameters
 2 | # LEARNING_RATE = 0.000003
 3 | # REGULARIZATION = 0.0002
 4 | # NUM_EPOCHS = 500
 5 | # BATCH_SIZE = 32
 6 | # IS_TRAINING = True
 7 | 
 8 | LEARNING_RATE = 0.0007
 9 | REGULARIZATION = 0.0002
10 | NUM_EPOCHS = 200
11 | BATCH_SIZE = 64
12 | EARLY_STOPPING = 25
13 | STEP_LR = LEARNING_RATE/100
14 | IS_TRAINING = True
15 | 
16 | experiment1 = ["B0005", "B0006", "B0007", "B0018"]
17 | experiment2 = ["B0025", "B0026", "B0027", "B0028"]
18 | experiment3 = ["B0029", "B0030", "B0031", "B0032"]
19 | experiment4 = ["B0033", "B0034", "B0036"]
20 | experiment5 = ["B0038", "B0039", "B0040"]
21 | experiment6 = ["B0041", "B0042", "B0043", "B0044"]
22 | experiment7 = ["B0045", "B0046", "B0047", "B0048"]
23 | experiment8 = ["B0049", "B0050", "B0051", "B0052"]
24 | experiment9 = ["B0053", "B0054", "B0055", "B0056"]
25 | 


--------------------------------------------------------------------------------
/src/ensemble/preprocess.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | def preprocess() :
 5 |     path = "../input/rul-prediction-for-liion-batteries-prediction/Cleaned"
 6 |     path = "../../data/output/RNN/cleaned_results"
 7 |     exps_dict = {}
 8 |     exps = os.listdir(path)
 9 |     for exp in os.listdir(path):
10 |         exps_dict[exp] = []
11 |         for m in os.listdir(f"{path}/{exp}"):
12 |             exps_dict[exp].append(m)
13 | 
14 |     pd.DataFrame(exps_dict)
15 | 
16 | 
17 |     ensemble_dict = {}
18 |     for exp in exps:
19 |         df = pd.DataFrame()
20 |         for m in exps_dict[exp]:
21 |             model_name = m.split('_')[0]
22 |             temp = pd.read_csv(f"{path}/{exp}/{m}").rename(
23 |                 columns={"Capacity": f"{model_name}_Cap", "model_predict": f"{model_name}_pred"})
24 |             curr_cols = temp.columns.tolist()
25 |             temp = temp.rename(columns={x: x.lower() for x in curr_cols})
26 |             df = pd.concat([df, temp], axis=1)
27 |         ensemble_dict[exp] = df
28 | 
29 |     ensemble_df = {}
30 |     for exp in exps:
31 |         print(exp)
32 |         print(ensemble_dict[exp].columns.tolist())
33 |         temp = ensemble_dict[exp].drop(
34 |             columns=['gru_cap', 'bigru_cap', 'bilstm_cap']).rename(columns={'lstm_cap': 'cap'})
35 |         df_x = temp.drop(columns=['cap'])
36 |         df_y = temp['cap']
37 |         ensemble_df[exp] = [df_x, df_y]
38 |     
39 |     return exps,exps_dict,ensemble_dict,ensemble_df


--------------------------------------------------------------------------------
/src/rnn/main.py:
--------------------------------------------------------------------------------
 1 | from sklearn.model_selection import train_test_split
 2 | import pandas as pd
 3 | from rnn.config import *
 4 | from src.config import *
 5 | from model import get_model
 6 | import tensorflow as tf
 7 | from preprocess import get_exp_based_df
 8 | 
 9 | if __name__ == "__main__":
10 |     # experiment1, experiment2, experiment3, experiment4, experiment5, experiment6, experiment7, experiment8, experiment9
11 |     df_x, df_y = get_exp_based_df(experiment1)
12 |     train_x, test_x, train_y, test_y = train_test_split(
13 |         df_x, df_y, test_size=0.2, random_state=0)
14 |     test_x, val_x, test_y, val_y = train_test_split(
15 |         test_x, test_y, test_size=0.5, random_state=0)
16 | 
17 |     input_shape = (train_x.shape[1], train_x.shape[2])
18 |     model, experiment_name = get_model(input_shape, rnn_layer='gru',
19 |                                        name='gru', is_bidirectional=False)
20 | 
21 |     data_path = "./model"
22 |     if IS_TRAINING:
23 |         def scheduler(epoch, lr):
24 |             if epoch < 10:
25 |                 return lr+STEP_LR
26 |             elif epoch % 5 == 0:
27 |                 return lr*0.99
28 |             return lr
29 | 
30 |         early_stopping = tf.keras.callbacks.EarlyStopping(
31 |             monitor='loss', patience=EARLY_STOPPING)
32 |         lr_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)
33 | 
34 |         history = model.fit(train_x, train_y, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE,
35 |                             verbose=1, validation_split=0.1, callbacks=[early_stopping, lr_scheduler])
36 | 
37 |         model.save(data_path + '../../output/results/trained_model/%s.h5' %
38 |                    experiment_name)
39 | 
40 |         hist_df = pd.DataFrame(history.history)
41 |         hist_csv_file = data_path + \
42 |             '../../output/results/trained_model/%s_history.csv' % experiment_name
43 |         with open(hist_csv_file, mode='w') as f:
44 |             hist_df.to_csv(f)
45 |         history = history.history
46 | 
47 |     model.evaluate(train_x, train_y)
48 |     model.evaluate(val_x, val_y)
49 |     model.evaluate(test_x, test_y)
50 | 


--------------------------------------------------------------------------------
/src/ensemble/main.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from preprocess import preprocess
 3 | from utils import *
 4 | 
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
 7 | from sklearn.svm import LinearSVR, NuSVR, SVR
 8 | from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
 9 | from sklearn.linear_model import LinearRegression, HuberRegressor
10 | from sklearn.neighbors import KNeighborsRegressor
11 | 
12 | from catboost import CatBoostRegressor
13 | from lightgbm import LGBMRegressor
14 | from xgboost import XGBRegressor
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     exps, exps_dict, ensemble_dict, ensemble_df = preprocess()
19 |     model_results = pd.DataFrame()
20 |     for exp in exps:
21 |         print(exp)
22 |         df_x, df_y = ensemble_df[exp]
23 |         train_x, test_x, train_y, test_y = train_test_split(
24 |             df_x, df_y, test_size=0.2, random_state=0)
25 |         test_x, val_x, test_y, val_y = train_test_split(
26 |             test_x, test_y, test_size=0.5, random_state=0)
27 | 
28 |         print(train_x.shape, test_x.shape, train_y.shape, test_y.shape)
29 | 
30 |         algos = (LinearRegression, HuberRegressor, KNeighborsRegressor, LinearSVR, NuSVR,
31 |                  SVR, DecisionTreeRegressor, ExtraTreeRegressor, RandomForestRegressor, ExtraTreesRegressor,
32 |                  XGBRegressor, LGBMRegressor, CatBoostRegressor)
33 | 
34 |         params = {
35 |             'silent': True
36 |         }
37 | 
38 |         for algo in algos:
39 |             model = algo()
40 |             if type(model).__name__ == 'CatBoostRegressor':
41 |                 model = algo(**params)
42 |     #         print(type(model).__name__)
43 |             model.fit(train_x, train_y)
44 | 
45 |             model_results_train = get_scores(
46 |                 train_y, get_preds(model, train_x))
47 |             model_results_val = get_scores(val_y, get_preds(model, val_x))
48 |             model_results_test = get_scores(test_y, get_preds(model, test_x))
49 |             data = {"Train": model_results_train,
50 |                     "Val": model_results_val,
51 |                     "Test": model_results_test}
52 |             temp = pd.DataFrame(data, index=[f'{exp}_{type(model).__name__}'])
53 |             model_results = model_results.append(temp)
54 | 
55 |     model_results.to_csv('../../data/output/Ensemble/ensemble_results.csv')
56 | 


--------------------------------------------------------------------------------
/src/critical_point/main.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from config import *
 3 | from preprocess import get_exp_based_df
 4 | from utils import *
 5 | 
 6 | from sklearn.model_selection import train_test_split
 7 | from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
 8 | from sklearn.svm import LinearSVR, NuSVR, SVR
 9 | from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
10 | from sklearn.linear_model import LinearRegression, HuberRegressor
11 | from sklearn.neighbors import KNeighborsRegressor
12 | 
13 | from catboost import CatBoostRegressor
14 | from lightgbm import LGBMRegressor
15 | from xgboost import XGBRegressor
16 | 
17 | if __name__ == "__main__":
18 |     exps = [experiment1, experiment2, experiment3, experiment4,
19 |             experiment5, experiment6, experiment7, experiment8, experiment9]
20 |     exp_no = 0
21 |     model_results = pd.DataFrame()
22 |     for exp in exps:
23 |         exp_no += 1
24 |         df_x, df_y = get_exp_based_df(exp)
25 |         train_x, test_x, train_y, test_y = train_test_split(
26 |             df_x, df_y, test_size=0.2, random_state=0)
27 |         test_x, val_x, test_y, val_y = train_test_split(
28 |             test_x, test_y, test_size=0.5, random_state=0)
29 | 
30 |         print(train_x.shape, test_x.shape, train_y.shape, test_y.shape)
31 | 
32 |         algos = (LinearRegression, HuberRegressor, KNeighborsRegressor, LinearSVR, NuSVR,
33 |                  SVR, DecisionTreeRegressor, ExtraTreeRegressor, RandomForestRegressor, ExtraTreesRegressor,
34 |                  XGBRegressor, LGBMRegressor, CatBoostRegressor)
35 | 
36 |         params = {
37 |             'silent': True
38 |         }
39 | 
40 |         for algo in algos:
41 |             model = algo()
42 |             model_name = type(model).__name__
43 |             if model_name == 'CatBoostRegressor':
44 |                 model = algo(**params)
45 |             model.fit(train_x, train_y)
46 | 
47 |             model_results_train = get_scores(
48 |                 train_y, get_preds(model, train_x))
49 |             model_results_val = get_scores(val_y, get_preds(model, val_x))
50 |             model_results_test = get_scores(test_y, get_preds(model, test_x))
51 |             data = {"Train": model_results_train,
52 |                     "Val": model_results_val,
53 |                     "Test": model_results_test}
54 |             temp = pd.DataFrame(data, index=[f'Exp_{exp_no}_{model_name}'])
55 |             model_results = model_results.append(temp)
56 | 
57 |     model_results.to_csv(
58 |         f'../../data/output/Critical_Point/critical_point_results.csv')
59 | 


--------------------------------------------------------------------------------
/notebooks/rnn_results_generator.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Untitled0.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyOYDDIel/3KUFFUrZOCX/QK"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":5,"metadata":{"id":"CS62sKGvj8Ii","executionInfo":{"status":"ok","timestamp":1650686327909,"user_tz":-330,"elapsed":3,"user":{"displayName":"TUSHAR BAUSKAR","userId":"04632303064409665284"}}},"outputs":[],"source":["import pandas as pd\n","import os"]},{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qomSiccbkOtl","executionInfo":{"status":"ok","timestamp":1650686280263,"user_tz":-330,"elapsed":31694,"user":{"displayName":"TUSHAR BAUSKAR","userId":"04632303064409665284"}},"outputId":"604ffb18-4299-4c09-ae40-e49cd2ded069"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}]},{"cell_type":"code","source":["INPUT_DIR = '/content/drive/MyDrive/project/Results'\n","OUTPUT_DIR = '/content/drive/MyDrive/project/Cleaned'"],"metadata":{"id":"yt_i0q5vkT5Y","executionInfo":{"status":"ok","timestamp":1650686510770,"user_tz":-330,"elapsed":495,"user":{"displayName":"TUSHAR BAUSKAR","userId":"04632303064409665284"}}},"execution_count":13,"outputs":[]},{"cell_type":"code","source":["os.listdir(INPUT_DIR)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"KBMDd_IYkjlm","executionInfo":{"status":"ok","timestamp":1650686330329,"user_tz":-330,"elapsed":4,"user":{"displayName":"TUSHAR BAUSKAR","userId":"04632303064409665284"}},"outputId":"8a12fbf3-2d9b-4d62-a97e-855b8082797c"},"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['Experiment 1',\n"," 'Experiment 2',\n"," 'Experiment 3',\n"," 'Experiment 4',\n"," 'Experiment 5',\n"," 'Experiment 6',\n"," 'Experiment 7',\n"," 'Experiment 8',\n"," 'Experiment 9',\n"," 'Untitled0.ipynb']"]},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["for i in range(1, 10):\n","  exp_dir = f'{INPUT_DIR}/Experiment {i}'\n","  for results in os.listdir(exp_dir):\n","    df_path = os.path.join(exp_dir, results)\n","    # print(df_path)\n","    df = pd.read_csv(df_path)\n","    # print(df.columns)\n","    df = df[['Capacity', 'model_predict']]\n","    df.to_csv(os.path.join(OUTPUT_DIR, f'Experiment {i}', results), index=False)"],"metadata":{"id":"Wqa1fNFgkliV","executionInfo":{"status":"ok","timestamp":1650686858518,"user_tz":-330,"elapsed":30480,"user":{"displayName":"TUSHAR BAUSKAR","userId":"04632303064409665284"}}},"execution_count":22,"outputs":[]},{"cell_type":"code","source":[""],"metadata":{"id":"Vmwp0v0MktHx"},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/src/rnn/model.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers, regularizers
 6 | 
 7 | from tensorflow.keras.models import Model
 8 | from tensorflow.keras.layers import Dense, Input
 9 | from tensorflow.keras.layers import LSTM, GRU, Bidirectional
10 | from rnn.config import *
11 | 
12 | 
13 | class RNNLayers(layers.Layer):
14 |     def __init__(self, rnn_layers_hidden_node, rnn_layer_type, is_bidirectional=False):
15 |         super(RNNLayers, self).__init__()
16 |         self.rnn_layers = []
17 |         print(rnn_layer_type)
18 | 
19 |         for hidden_node in rnn_layers_hidden_node:
20 |             if is_bidirectional:
21 |                 self.rnn_layers.append(Bidirectional(rnn_layer_type(hidden_node, activation='selu', return_sequences=True,
22 |                                                                     kernel_regularizer=regularizers.l2(REGULARIZATION))))
23 |             else:
24 |                 self.rnn_layers.append(rnn_layer_type(hidden_node, activation='selu', return_sequences=True,
25 |                                                       kernel_regularizer=regularizers.l2(REGULARIZATION)))
26 | 
27 |     def call(self, input_tensor, training):
28 |         x = None
29 |         for layer in self.rnn_layers:
30 |             if x is None:
31 |                 x = layer(input_tensor, training=training)
32 |             else:
33 |                 x = layer(x, training=training)
34 |         return x
35 | 
36 | 
37 | class DenseLayers(layers.Layer):
38 |     def __init__(self, dense_layer_hidden_nodes):
39 |         super(DenseLayers, self).__init__()
40 |         self.dense_layers = []
41 |         for hidden_node in dense_layer_hidden_nodes:
42 |             self.dense_layers.append(Dense(hidden_node, activation='selu',
43 |                                            kernel_regularizer=regularizers.l2(REGULARIZATION)))
44 | 
45 |         self.dense_layers.append(Dense(1, activation='linear'))
46 | 
47 |     def call(self, input_tensor, training):
48 |         x = None
49 |         for layer in self.dense_layers:
50 |             if x is None:
51 |                 x = layer(input_tensor, training=training)
52 |             else:
53 |                 x = layer(x, training=training)
54 |         return x
55 | 
56 | 
57 | class RULModel(Model):
58 |     def __init__(self, input_shape, rnn_layers_hidden_node, dense_layer_hidden_nodes, rnn_layer_type, is_bidirectional=False):
59 |         super(RULModel, self).__init__()
60 |         self.ip_shape = input_shape
61 |         self.rnn = RNNLayers(rnn_layers_hidden_node,
62 |                              rnn_layer_type, is_bidirectional)
63 |         self.dense = DenseLayers(dense_layer_hidden_nodes)
64 | 
65 |     def call(self, input_tensor, training):
66 |         x = self.rnn(input_tensor, training)
67 |         x = self.dense(x, training)
68 |         return x
69 | 
70 |     def model(self):
71 |         x = Input(shape=self.ip_shape)
72 |         return Model(inputs=[x], outputs=self.call(x))
73 | 
74 | 
75 | def get_model(input_shape, rnn_layer='', name='', is_bidirectional=False):
76 |     EXPERIMENT = f"{name}_rul_nasa_randomized"
77 | 
78 |     experiment_name = time.strftime("%Y-%m-%d-%H-%M-%S") + '_' + EXPERIMENT
79 |     print(experiment_name)
80 | 
81 |     # Model definition
82 |     opt = tf.keras.optimizers.Adam(lr=LEARNING_RATE)
83 |     rnn_layers_hidden_node = [256, 256, 256, 128, 128, 128, 64, 64, 64]
84 |     dense_layer_hidden_nodes = [64, 64, 32, 32]
85 |     rnn_layer_type = LSTM if rnn_layer == 'lstm' else GRU
86 | 
87 |     model = RULModel(input_shape, rnn_layers_hidden_node,
88 |                      dense_layer_hidden_nodes, rnn_layer_type, is_bidirectional)
89 |     model.compile(optimizer=opt, loss='huber', metrics=[
90 |                   'mse', 'mae', 'mape', tf.keras.metrics.RootMeanSquaredError(name='rmse')])
91 |     model.build((1, input_shape[0], input_shape[1]))
92 |     model.summary()
93 |     return model, experiment_name
94 | 


--------------------------------------------------------------------------------
/src/rnn/preprocess.py:
--------------------------------------------------------------------------------
  1 | from sklearn.model_selection import train_test_split
  2 | import os
  3 | import numpy as np
  4 | import pandas as pd
  5 | from scipy.io import loadmat
  6 | 
  7 | 
  8 | def to_padded_numpy(l, shape):
  9 |     padded_array = np.zeros(shape)
 10 |     padded_array[:len(l)] = l
 11 |     return padded_array
 12 | 
 13 | 
 14 | def preprocess_data_to_cycles():
 15 |     path = "../../data/input/battery-data-set"
 16 |     dis = os.listdir(path)
 17 |     dis_mat = []
 18 |     battery_grp = {}
 19 | 
 20 |     for i in dis:
 21 |         filtered_list = list(filter(lambda x: x.split(
 22 |             '.')[-1] == 'mat', os.listdir(f"{path}/{i}")))
 23 |         battery_grp[i.split('BatteryAgingARC')[-1][1:]
 24 |                     ] = list(map(lambda x: x.split('.')[0], filtered_list))
 25 |         dis_mat.extend(list(map(lambda x: f"{path}/{i}/{x}", filtered_list)))
 26 | 
 27 |     battery_grp['5_6_7_18'] = battery_grp['FY08Q4']
 28 |     del battery_grp['FY08Q4']
 29 | 
 30 |     bs = [x.split('/')[-1].split('.')[0] for x in dis_mat]
 31 | 
 32 |     ds = []
 33 |     for b in dis_mat:
 34 |         ds.append(loadmat(b))
 35 | 
 36 |     types = []
 37 |     times = []
 38 |     ambient_temperatures = []
 39 |     datas = []
 40 | 
 41 |     for i in range(len(ds)):
 42 |         x = ds[i][bs[i]]["cycle"][0][0][0]
 43 |         ambient_temperatures.append(
 44 |             list(map(lambda y: y[0][0], x['ambient_temperature'])))
 45 |         types.append(x['type'])
 46 |         times.append(x['time'])
 47 |         datas.append(x['data'])
 48 | 
 49 |     batteries = []
 50 |     cycles = []
 51 |     for i in range(len(ds)):
 52 |         batteries.append(bs[i])
 53 |         cycles.append(datas[i].size)
 54 | 
 55 |     battery_cycle_df = pd.DataFrame(
 56 |         {'Battery': batteries, 'Cycle': cycles}).sort_values('Battery', ascending=True)
 57 |     battery_cycle_df.drop_duplicates(inplace=True)
 58 | 
 59 |     Cycles = {}
 60 |     params = ['Voltage_measured', 'Current_measured', 'Temperature_measured',
 61 |               'Current_load', 'Voltage_load', 'Time', 'Capacity', ]
 62 | 
 63 |     for i in range(len(bs)):
 64 |         Cycles[bs[i]] = {}
 65 |         Cycles[bs[i]]['count'] = 0
 66 |         for param in params:
 67 |             Cycles[bs[i]][param] = []
 68 |             for j in range(datas[i].size):
 69 |                 if types[i][j] == 'discharge':
 70 |                     Cycles[bs[i]][param].append(datas[i][j][param][0][0][0])
 71 | 
 72 |         cap = []
 73 |         amb_temp = []
 74 |         for j in range(datas[i].size):
 75 |             if types[i][j] == 'discharge':
 76 |                 cap.append(datas[i][j]['Capacity'][0][0][0])
 77 |                 amb_temp.append(ambient_temperatures[i][j])
 78 | 
 79 |         Cycles[bs[i]]['Capacity'] = np.array(cap)
 80 |         Cycles[bs[i]]['ambient_temperatures'] = np.array(amb_temp)
 81 |     Cycles = pd.DataFrame(Cycles)
 82 | 
 83 |     return Cycles
 84 | 
 85 | 
 86 | def get_exp_based_df(exp):
 87 |     Cycles = preprocess_data_to_cycles()
 88 |     df_all = pd.DataFrame({})
 89 |     max_len = 0
 90 | 
 91 |     exp_try_out = exp
 92 | 
 93 |     for bat in exp_try_out:
 94 |         df = pd.DataFrame({})
 95 |         cols = ['Voltage_measured', 'Current_measured', 'Temperature_measured',
 96 |                 'Current_load', 'Voltage_load', 'Time', 'Capacity', 'ambient_temperatures']
 97 |         for col in cols:
 98 |             df[col] = Cycles[bat][col]
 99 |         max_l = np.max(df['Time'].apply(lambda x: len(x)).values)
100 |         max_len = max(max_l, max_len)
101 |         df_all = pd.concat([df_all, df], ignore_index=True)
102 | 
103 |     df = df_all.reset_index(drop=True)
104 |     df
105 | 
106 |     for i, j in enumerate(df['Capacity']):
107 |         try:
108 |             if len(j):
109 |                 df['Capacity'][i] = j[0]
110 |             else:
111 |                 df['Capacity'][i] = 0
112 |         except:
113 |             pass
114 | 
115 |     df_x = df.drop(columns=['Capacity', 'ambient_temperatures']).values
116 |     df_y = df['Capacity'].values
117 | 
118 |     n, m = df_x.shape[0], df_x.shape[1]
119 |     temp2 = np.zeros((n, m, max_len))
120 |     for i in range(n):
121 |         for j in range(m):
122 |             temp2[i][j] = to_padded_numpy(df_x[i][j], max_len)
123 | 
124 |     df_x = temp2
125 |     return df_x, df_y
126 | 


--------------------------------------------------------------------------------
/src/critical_point/preprocess.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import os
  4 | from scipy.io import loadmat
  5 | 
  6 | 
  7 | def preprocess_data_to_cycles():
  8 |     path = "../../data/input/battery-data-set"
  9 |     dis = os.listdir(path)
 10 |     dis_mat = []
 11 |     battery_grp = {}
 12 | 
 13 |     for i in dis:
 14 |         filtered_list = list(filter(lambda x: x.split(
 15 |             '.')[-1] == 'mat', os.listdir(f"{path}/{i}")))
 16 |         battery_grp[i.split('BatteryAgingARC')[-1][1:]
 17 |                     ] = list(map(lambda x: x.split('.')[0], filtered_list))
 18 |         dis_mat.extend(list(map(lambda x: f"{path}/{i}/{x}", filtered_list)))
 19 | 
 20 |     battery_grp['5_6_7_18'] = battery_grp['FY08Q4']
 21 |     del battery_grp['FY08Q4']
 22 | 
 23 |     bs = [x.split('/')[-1].split('.')[0] for x in dis_mat]
 24 | 
 25 |     ds = []
 26 |     for b in dis_mat:
 27 |         ds.append(loadmat(b))
 28 | 
 29 |     types = []
 30 |     times = []
 31 |     ambient_temperatures = []
 32 |     datas = []
 33 | 
 34 |     for i in range(len(ds)):
 35 |         x = ds[i][bs[i]]["cycle"][0][0][0]
 36 |         ambient_temperatures.append(
 37 |             list(map(lambda y: y[0][0], x['ambient_temperature'])))
 38 |         types.append(x['type'])
 39 |         times.append(x['time'])
 40 |         datas.append(x['data'])
 41 | 
 42 |     batteries = []
 43 |     cycles = []
 44 |     for i in range(len(ds)):
 45 |         batteries.append(bs[i])
 46 |         cycles.append(datas[i].size)
 47 | 
 48 |     battery_cycle_df = pd.DataFrame(
 49 |         {'Battery': batteries, 'Cycle': cycles}).sort_values('Battery', ascending=True)
 50 |     battery_cycle_df.drop_duplicates(inplace=True)
 51 | 
 52 |     Cycles = {}
 53 |     params = ['Voltage_measured', 'Current_measured', 'Temperature_measured',
 54 |               'Current_load', 'Voltage_load', 'Time', 'Capacity', ]
 55 | 
 56 |     for i in range(len(bs)):
 57 |         Cycles[bs[i]] = {}
 58 |         Cycles[bs[i]]['count'] = 0
 59 |         for param in params:
 60 |             Cycles[bs[i]][param] = []
 61 |             for j in range(datas[i].size):
 62 |                 if types[i][j] == 'discharge':
 63 |                     Cycles[bs[i]][param].append(datas[i][j][param][0][0][0])
 64 | 
 65 |         cap = []
 66 |         amb_temp = []
 67 |         for j in range(datas[i].size):
 68 |             if types[i][j] == 'discharge':
 69 |                 cap.append(datas[i][j]['Capacity'][0][0][0])
 70 |                 amb_temp.append(ambient_temperatures[i][j])
 71 | 
 72 |         Cycles[bs[i]]['Capacity'] = np.array(cap)
 73 |         Cycles[bs[i]]['ambient_temperatures'] = np.array(amb_temp)
 74 |     Cycles = pd.DataFrame(Cycles)
 75 | 
 76 |     return Cycles
 77 | 
 78 | 
 79 | def get_exp_based_df(exp):
 80 |     Cycles = preprocess_data_to_cycles()
 81 |     df_all = pd.DataFrame({})
 82 |     max_len = 0
 83 | 
 84 |     exp_try_out = exp
 85 | 
 86 |     for bat in exp_try_out:
 87 |         df = pd.DataFrame({})
 88 |         cols = ['Voltage_measured', 'Current_measured', 'Temperature_measured',
 89 |                 'Current_load', 'Voltage_load', 'Time', 'Capacity', 'ambient_temperatures']
 90 |         for col in cols:
 91 |             df[col] = Cycles[bat][col]
 92 |         max_l = np.max(df['Time'].apply(lambda x: len(x)).values)
 93 |         max_len = max(max_l, max_len)
 94 |         df_all = pd.concat([df_all, df], ignore_index=True)
 95 | 
 96 |     df = df_all.reset_index(drop=True)
 97 | 
 98 |     for i, j in enumerate(df['Capacity']):
 99 |         try:
100 |             if len(j):
101 |                 df['Capacity'][i] = j[0]
102 |             else:
103 |                 df['Capacity'][i] = 0
104 |         except:
105 |             pass
106 | 
107 |     # CRITICAL TIME POINTS FOR A CYCLE
108 |     # We will only these critical points for furthur training
109 | 
110 |     # TEMPERATURE_MEASURED
111 |     # => Time at highest temperature
112 | 
113 |     # VOLTAGE_MEASURED
114 |     # => Time at lowest Voltage
115 | 
116 |     # VOLTAGE_LOAD
117 |     # => First time it drops below 1 volt after 1500 time
118 | 
119 |     def getTemperatureMeasuredCritical(tm, time):
120 |         high = 0
121 |         critical = 0
122 |         for i in range(len(tm)):
123 |             if (tm[i] > high):
124 |                 high = tm[i]
125 |                 critical = time[i]
126 |         return critical
127 | 
128 |     def getVoltageMeasuredCritical(vm, time):
129 |         low = 1e9
130 |         critical = 0
131 |         for i in range(len(vm)):
132 |             if (vm[i] < low):
133 |                 low = vm[i]
134 |                 critical = time[i]
135 |         return critical
136 | 
137 |     def getVoltageLoadCritical(vl, time):
138 |         for i in range(len(vl)):
139 |             if (time[i] > 1500 and vl[i] < 1):
140 |                 return time[i]
141 |         return -1
142 | 
143 |     def fun(x):
144 |         cap = x['Capacity']
145 |         amb_temp = x['ambient_temperatures']
146 |         volt_load_c = getVoltageLoadCritical(x['Voltage_load'], x['Time'])
147 |         volt_meas_c = getVoltageMeasuredCritical(
148 |             x['Voltage_measured'], x['Time'])
149 |         temp_meas_c = getTemperatureMeasuredCritical(
150 |             x['Temperature_measured'], x['Time'])
151 | 
152 |         data = {
153 |             'Capacity': cap,
154 |             'ambient_temperatures': amb_temp,
155 |             'Critical_Voltage_load': volt_load_c,
156 |             'Critical_Voltage_measured': volt_meas_c,
157 |             'Critical_Temperature_measured': temp_meas_c,
158 |         }
159 |         data_idx = [
160 |             'Capacity', 'ambient_temperatures', 'Critical_Voltage_load',
161 |             'Critical_Voltage_measured', 'Critical_Temperature_measured',
162 |         ]
163 |         y = pd.Series(data, index=data_idx)
164 | 
165 |         return y
166 | 
167 |     df = df.apply(lambda x: fun(x), axis=1)
168 | 
169 |     df_x = df.drop(columns=['Capacity', 'ambient_temperatures']).values
170 |     df_y = df['Capacity'].values
171 | 
172 |     return df_x, df_y
173 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RUL-Prediction-for-Li-ion-Batteries
 2 | With its use seen in critical areas of safety and security, it is essential for lithium-ion batteries to be reliable. Prediction of the Remaining Useful Life (RUL) can give insights into the health of the battery. Variations of Recurrent Neural Networks (RNN) are employed to learn the capacity degradation trajectories of lithium-ion batteries. Using several regressor models as the baseline, an ensemble of RNNs is created to overcome the shortcomings of one RNN over the other. The critical point approach and the data-driven approach for regressor models and neural network models respectively help predict the RUL. 
 3 | 
 4 | 
 5 | ## Report 
 6 | [Project Report](https://github.com/utsavk28/RUL-Prediction-for-Li-ion-Batteries/blob/main/Project_Report.pdf)
 7 | 
 8 | ## Results 
 9 | 
10 | ### Various RNN Model Results 
11 | 
12 | <table class="tableizer-table">
13 | <thead><tr class="tableizer-firstrow"><th>Experiment</th><th>Model</th><th>Training RMSE</th><th>Testing RMSE</th><th>Validation RMSE</th></tr></thead><tbody>
14 |  <tr><td>Experiment 1</td><td>LSTM</td><td>0.0312</td><td>0.0304</td><td>0.0311</td></tr>
15 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>0.287</td><td>0.2792</td><td>0.3259</td></tr>
16 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.0278</td><td>0.0342</td><td>0.0356</td></tr>
17 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>0.0901</td><td>0.0945</td><td>0.1059</td></tr>
18 |  <tr><td>Experiment 2</td><td>LSTM</td><td>0.019</td><td>0.0173</td><td>0.0122</td></tr>
19 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>0.5521</td><td>0.1376</td><td>0.4871</td></tr>
20 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.0962</td><td>0.0868</td><td>0.1957</td></tr>
21 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>1.568</td><td>1.3482</td><td>1.7741</td></tr>
22 |  <tr><td>Experiment 3</td><td>LSTM</td><td>0.0183</td><td>0.0336</td><td>0.0542</td></tr>
23 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>0.107</td><td>0.1108</td><td>0.1237</td></tr>
24 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.029</td><td>0.0425</td><td>0.0516</td></tr>
25 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>0.034</td><td>0.0454</td><td>0.053</td></tr>
26 |  <tr><td>Experiment 4</td><td>LSTM</td><td>0.0248</td><td>0.0236</td><td>0.0232</td></tr>
27 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>0.2583</td><td>0.2232</td><td>0.1943</td></tr>
28 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.0152</td><td>0.0242</td><td>0.0452</td></tr>
29 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>0.2186</td><td>0.2282</td><td>0.1775</td></tr>
30 |  <tr><td>Experiment 5</td><td>LSTM</td><td>0.0145</td><td>0.0981</td><td>0.1329</td></tr>
31 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>1.2602</td><td>1.1037</td><td>0.9943</td></tr>
32 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.0253</td><td>0.0811</td><td>0.1761</td></tr>
33 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>0.3437</td><td>0.4544</td><td>0.4535</td></tr>
34 |  <tr><td>Experiment 6</td><td>LSTM</td><td>0.0123</td><td>0.0189</td><td>0.0277</td></tr>
35 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>0.7338</td><td>0.6262</td><td>0.6111</td></tr>
36 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.0967</td><td>0.1094</td><td>0.2436</td></tr>
37 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>0.138</td><td>0.2562</td><td>0.2627</td></tr>
38 |  <tr><td>Experiment 7</td><td>LSTM</td><td>0.0132</td><td>0.0253</td><td>0.0245</td></tr>
39 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>0.2486</td><td>0.3564</td><td>0.3278</td></tr>
40 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.0578</td><td>0.0645</td><td>0.0689</td></tr>
41 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>0.1896</td><td>0.2486</td><td>0.2156</td></tr>
42 |  <tr><td>Experiment 8</td><td>LSTM</td><td>0.0226</td><td>0.0356</td><td>0.0312</td></tr>
43 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>0.227</td><td>0.2792</td><td>0.4123</td></tr>
44 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.0156</td><td>0.0236</td><td>0.0384</td></tr>
45 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>0.0689</td><td>0.1562</td><td>0.1047</td></tr>
46 |  <tr><td>Experiment 9</td><td>LSTM</td><td>0.0196</td><td>0.0265</td><td>0.0241</td></tr>
47 |  <tr><td>&nbsp;</td><td>BiLSTM</td><td>0.3568</td><td>0.3956</td><td>0.4256</td></tr>
48 |  <tr><td>&nbsp;</td><td>GRU</td><td>0.0452</td><td>0.0546</td><td>0.0514</td></tr>
49 |  <tr><td>&nbsp;</td><td>BiGRU</td><td>0.0918</td><td>0.1256</td><td>0.1298</td></tr>
50 | </tbody></table>
51 | 
52 | ### Ensembling Results 
53 | 
54 | <table class="tableizer-table">
55 | <thead><tr class="tableizer-firstrow"><th>Experiment</th><th>Model</th><th>Train RMSE</th><th>Validation RMSE</th><th>Test RMSE</th></tr></thead><tbody>
56 |  <tr><td>Experiment 1</td><td>Experiment1_CatBoostRegressor</td><td>0.007525</td><td>0.027265</td><td>0.020191</td></tr>
57 |  <tr><td>&nbsp;</td><td>Experiment1_ExtraTreesRegressor</td><td>2.05E-15</td><td>0.032114</td><td>0.019708</td></tr>
58 |  <tr><td>&nbsp;</td><td>Experiment 2_LGBMRegressor</td><td>0.015742</td><td>0.01869</td><td>0.121217</td></tr>
59 |  <tr><td>Experiment 2</td><td>Experiment 2_RandomForestRegressor</td><td>0.008446</td><td>0.016777</td><td>0.118291</td></tr>
60 |  <tr><td>Experiment 3</td><td>Experiment 3_ExtraTreesRegressor</td><td>1.97E-15</td><td>0.027225</td><td>0.050309</td></tr>
61 |  <tr><td>&nbsp;</td><td>Experiment 3_XGBRegressor</td><td>0.001102</td><td>0.032008</td><td>0.052786</td></tr>
62 |  <tr><td>Experiment 4</td><td>Experiment 4_ExtraTreeRegressor</td><td>0</td><td>0.038955</td><td>0.096077</td></tr>
63 |  <tr><td>&nbsp;</td><td>Experiment 4_LGBMRegressor</td><td>0.064483</td><td>0.076645</td><td>0.094264</td></tr>
64 |  <tr><td>Experiment 5</td><td>Experiment 5_DecisionTreeRegressor</td><td>0</td><td>0.036995</td><td>0.027503</td></tr>
65 |  <tr><td>&nbsp;</td><td>Experiment 5_RandomForestRegressor</td><td>0.023868</td><td>0.033271</td><td>0.033192</td></tr>
66 |  <tr><td>Experiment 6</td><td>Experiment 6_HuberRegressor</td><td>0.012126</td><td>0.018583</td><td>0.020066</td></tr>
67 |  <tr><td>&nbsp;</td><td>Experiment 6_LinearRegression</td><td>0.01206</td><td>0.018489</td><td>0.019934</td></tr>
68 |  <tr><td>Experiment 7</td><td>Experiment 7_HuberRegressor</td><td>0.015001</td><td>0.026221</td><td>0.013487</td></tr>
69 |  <tr><td>&nbsp;</td><td>Experiment 7_LinearRegression</td><td>0.014968</td><td>0.026394</td><td>0.013531</td></tr>
70 |  <tr><td>&nbsp;</td><td>Experiment 7_LinearSVR</td><td>0.016352</td><td>0.029895</td><td>0.01475</td></tr>
71 |  <tr><td>Experiment 8</td><td>Experiment 8_DecisionTreeRegressor</td><td>0</td><td>0.075652</td><td>0.339136</td></tr>
72 |  <tr><td>&nbsp;</td><td>Experiment 8_LinearSVR</td><td>0.042245</td><td>0.108166</td><td>0.342323</td></tr>
73 |  <tr><td>Experiment 9</td><td>Experiment 9_CatBoostRegressor</td><td>0.006819</td><td>0.043317</td><td>0.031779</td></tr>
74 |  <tr><td>&nbsp;</td><td>Experiment 9_ExtraTreesRegressor</td><td>1.59E-15</td><td>0.039875</td><td>0.031513</td></tr>
75 |  <tr><td>&nbsp;</td><td>Experiment 9_LGBMRegressor</td><td>0.04493</td><td>0.124974</td><td>0.031858</td></tr>
76 | </tbody></table>
77 | 
78 | 
79 | ## References 
80 | ### Time Series 
81 | 1. [Time Series Prediction: How Is It Different From Other Machine Learning? [ML Engineer Explains] ](https://neptune.ai/blog/time-series-prediction-vs-machine-learning)
82 | 2. [A Comprehensive Guide to Time Series Analysis](https://www.analyticsvidhya.com/blog/2021/10/a-comprehensive-guide-to-time-series-analysis/)
83 | 3. [A RoadMap to Time-Series Analysis](https://medium.com/featurepreneur/a-roadmap-for-time-series-analysis-3faf49b2126)
84 | 
85 | ### Papers 
86 | 1. [A review of lithium-ion battery state of charge estimation and management system in electric vehicle applications: Challenges and recommendations](https://www.sciencedirect.com/science/article/abs/pii/S1364032117306275)
87 | 2. [Remaining Useful Life Prediction and State of Health Diagnosis of Lithium-Ion Battery Based on Second-Order Central Difference Particle Filter](https://ieeexplore.ieee.org/document/9000971)
88 | 3. [Data-Driven Remaining Useful Life Prediction for Lithium-Ion Batteries Using Multi-Charging Profile Framework: A Recurrent Neural Network Approach](https://www.mdpi.com/2071-1050/13/23/13333)
89 | 4. [Novel Statistical Analysis Approach for Remaining Useful Life Prediction of Lithium-Ion Battery](https://ieeexplore.ieee.org/document/9579982)
90 | 
91 | 
92 | ### RUL Prediction Code 
93 | 1. [RUL Prediction for Li-ion Batteries using Critical Point](https://github.com/yash0530/RUL-Prediction-for-Li-ion-Batteries)
94 | 2. [Estimation of the Remaining Useful Life (RUL) of Lithium-ion batteries using Deep LSTMs.](https://github.com/MichaelBosello/battery-rul-estimation)
95 | 3. [Transformer Network for Remaining Useful Life Prediction of Lithium-Ion Batteries](https://github.com/XiuzeZhou/RUL)
96 | 


--------------------------------------------------------------------------------
/notebooks/ensembling-approach.ipynb:
--------------------------------------------------------------------------------
1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import os\nimport numpy as np\nimport pandas as pd\nimport scipy\nfrom scipy.io import loadmat\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport shutil\n\npath = \"../input/rul-prediction-for-liion-batteries-prediction/Cleaned\"","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2022-04-23T18:16:47.54031Z","iopub.execute_input":"2022-04-23T18:16:47.540654Z","iopub.status.idle":"2022-04-23T18:16:48.878834Z","shell.execute_reply.started":"2022-04-23T18:16:47.540567Z","shell.execute_reply":"2022-04-23T18:16:48.877757Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"exps_dict = {}\nexps = os.listdir(path)\nfor exp in os.listdir(path) :\n    exps_dict[exp] = []\n    for m in os.listdir(f\"{path}/{exp}\") :\n        exps_dict[exp].append(m)\n\npd.DataFrame(exps_dict)","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:16:48.880911Z","iopub.execute_input":"2022-04-23T18:16:48.881155Z","iopub.status.idle":"2022-04-23T18:16:48.963913Z","shell.execute_reply.started":"2022-04-23T18:16:48.881126Z","shell.execute_reply":"2022-04-23T18:16:48.963007Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error\nfrom sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score\n\ndef get_scores(y_true,y_pred) :\n    return {\n        'max_error':max_error(y_true,y_pred),\n        'mean_absolute_error':mean_absolute_error(y_true,y_pred),\n        'mean_squared_error':mean_squared_error(y_true,y_pred),\n        'mean_squared_log_error':mean_squared_log_error(y_true,y_pred),\n        'median_absolute_error':median_absolute_error(y_true,y_pred),\n        'mean_absolute_percentage_error':mean_absolute_percentage_error(y_true,y_pred),\n        'r2_score':r2_score(y_true,y_pred)\n    }\n\ndef get_preds(model,data_x) :\n    return model.predict(data_x).clip(min=0)","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:16:48.965323Z","iopub.execute_input":"2022-04-23T18:16:48.965595Z","iopub.status.idle":"2022-04-23T18:16:49.199721Z","shell.execute_reply.started":"2022-04-23T18:16:48.965562Z","shell.execute_reply":"2022-04-23T18:16:49.198973Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"ensemble_dict = {}\nfor exp in exps:\n    df = pd.DataFrame()\n    for m in exps_dict[exp] :\n#         print(f\"{path}/{exp}/{m}\")\n        model_name = m.split('_')[0]\n        temp = pd.read_csv(f\"{path}/{exp}/{m}\").rename(columns={\"Capacity\":f\"{model_name}_Cap\",\"model_predict\":f\"{model_name}_pred\"})\n        curr_cols = temp.columns.tolist()\n        temp = temp.rename(columns={x:x.lower() for x in curr_cols})\n#         print(temp.columns.tolist())\n        df = pd.concat([df,temp],axis=1)\n    ensemble_dict[exp] = df\n# ensemble_dict","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:16:49.201514Z","iopub.execute_input":"2022-04-23T18:16:49.201932Z","iopub.status.idle":"2022-04-23T18:16:49.521399Z","shell.execute_reply.started":"2022-04-23T18:16:49.201882Z","shell.execute_reply":"2022-04-23T18:16:49.520607Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"temp=None\nensemble_df = {}\nfor exp in exps :\n    print(exp)\n    print(ensemble_dict[exp].columns.tolist())\n    temp=ensemble_dict[exp].drop(columns=['gru_cap','bigru_cap','bilstm_cap']).rename(columns={'lstm_cap':'cap'})\n    df_x = temp.drop(columns=['cap'])\n    df_y = temp['cap']\n    ensemble_df[exp] = [df_x,df_y]\n# ensemble_df","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:16:49.522638Z","iopub.execute_input":"2022-04-23T18:16:49.523344Z","iopub.status.idle":"2022-04-23T18:16:49.559861Z","shell.execute_reply.started":"2022-04-23T18:16:49.523305Z","shell.execute_reply":"2022-04-23T18:16:49.558695Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.linear_model import LinearRegression,HuberRegressor\nfrom sklearn.naive_bayes import BernoulliNB,CategoricalNB,ComplementNB,GaussianNB,MultinomialNB\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.svm import LinearSVR,NuSVR,SVR\nfrom sklearn.tree import DecisionTreeRegressor,ExtraTreeRegressor\nfrom sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,VotingRegressor\nfrom xgboost import XGBRegressor\nfrom lightgbm import LGBMRegressor\nfrom catboost import CatBoostRegressor","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:16:49.561812Z","iopub.execute_input":"2022-04-23T18:16:49.562394Z","iopub.status.idle":"2022-04-23T18:16:51.310496Z","shell.execute_reply.started":"2022-04-23T18:16:49.562343Z","shell.execute_reply":"2022-04-23T18:16:51.309546Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\n\nfor exp in exps :\n    print(exp)\n    df_x,df_y = ensemble_df[exp]\n    train_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)\n    test_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)\n\n    print(train_x.shape,test_x.shape,train_y.shape,test_y.shape)\n    \n    algos = (LinearRegression,HuberRegressor,KNeighborsRegressor,LinearSVR,NuSVR,\n             SVR, DecisionTreeRegressor,ExtraTreeRegressor,RandomForestRegressor,ExtraTreesRegressor,\n    XGBRegressor,LGBMRegressor,CatBoostRegressor)\n\n    params = {\n        'silent':True\n    }\n\n    for algo in algos :\n        model = algo()\n        if type(model).__name__ == 'CatBoostRegressor' :\n            model = algo(**params)\n        print(type(model).__name__)\n        model.fit(train_x,train_y)\n\n        model_results_train = pd.Series(get_scores(train_y,get_preds(model,train_x)))\n        model_results_val = pd.Series(get_scores(val_y,get_preds(model,val_x)))\n        model_results_test = pd.Series(get_scores(test_y,get_preds(model,test_x)))\n        data = {\"Train\": model_results_train,\n                \"Val\": model_results_val,\n                \"Test\": model_results_test}\n        model_results = pd.DataFrame(data)\n        print(model_results)\n        print(\"~\"*100)\n        print(\"\\n\")","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:16:51.311762Z","iopub.execute_input":"2022-04-23T18:16:51.312013Z","iopub.status.idle":"2022-04-23T18:17:07.040068Z","shell.execute_reply.started":"2022-04-23T18:16:51.311982Z","shell.execute_reply":"2022-04-23T18:17:07.038972Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import numpy as np\nfrom sklearn.model_selection import train_test_split\n\nfrom sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error\nfrom sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score\n\ndef get_scores(y_true,y_pred) :\n    return np.sqrt(mean_squared_error(y_true,y_pred))\n\nmodel_results = pd.DataFrame()\nfor exp in exps :\n    print(exp)\n    df_x,df_y = ensemble_df[exp]\n    train_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)\n    test_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)\n\n    print(train_x.shape,test_x.shape,train_y.shape,test_y.shape)\n    \n    algos = (LinearRegression,HuberRegressor,KNeighborsRegressor,LinearSVR,NuSVR,\n             SVR, DecisionTreeRegressor,ExtraTreeRegressor,RandomForestRegressor,ExtraTreesRegressor,\n    XGBRegressor,LGBMRegressor,CatBoostRegressor)\n\n    params = {\n        'silent':True\n    }\n\n    for algo in algos :\n        model = algo()\n        if type(model).__name__ == 'CatBoostRegressor' :\n            model = algo(**params)\n#         print(type(model).__name__)\n        model.fit(train_x,train_y)\n\n        model_results_train = get_scores(train_y,get_preds(model,train_x))\n        model_results_val = get_scores(val_y,get_preds(model,val_x))\n        model_results_test = get_scores(test_y,get_preds(model,test_x))\n        data = {\"Train\": model_results_train,\n                \"Val\": model_results_val,\n                \"Test\": model_results_test}\n        temp = pd.DataFrame(data,index=[f'{exp}_{type(model).__name__}'])\n        model_results = model_results.append(temp)\n#         print(temp)\n#         print(model_results)\n#         print(\"~\"*100)\n#         print(\"\\n\")\n#         break\n#     break\nmodel_results","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:23:03.468523Z","iopub.execute_input":"2022-04-23T18:23:03.468853Z","iopub.status.idle":"2022-04-23T18:23:18.585659Z","shell.execute_reply.started":"2022-04-23T18:23:03.468819Z","shell.execute_reply":"2022-04-23T18:23:18.584467Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"model_results.to_csv('ensemble_results.csv')","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:26:59.553136Z","iopub.execute_input":"2022-04-23T18:26:59.55412Z","iopub.status.idle":"2022-04-23T18:26:59.562775Z","shell.execute_reply.started":"2022-04-23T18:26:59.55407Z","shell.execute_reply":"2022-04-23T18:26:59.561464Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import numpy as np\nfrom sklearn.model_selection import train_test_split\n\nfrom sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error\nfrom sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score\n\ndef get_scores(y_true,y_pred) :\n    return np.sqrt(mean_squared_error(y_true,y_pred))\n\nmodel_results = pd.DataFrame()\nfor exp in exps :\n    print(exp)\n    df_x,df_y = ensemble_df[exp]\n    train_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)\n    test_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)\n\n    print(train_x.shape,test_x.shape,train_y.shape,test_y.shape)\n    \n    algos = (LinearRegression,HuberRegressor,KNeighborsRegressor,LinearSVR,NuSVR,\n             SVR, DecisionTreeRegressor,ExtraTreeRegressor,RandomForestRegressor,ExtraTreesRegressor,\n    XGBRegressor,LGBMRegressor,CatBoostRegressor)\n\n    params = {\n        'silent':True\n    }\n\n    for algo in algos :\n        model = algo()\n        if type(model).__name__ == 'CatBoostRegressor' :\n            model = algo(**params)\n        model.fit(train_x,train_y)\n\n        model_results_train = get_scores(train_y,get_preds(model,train_x))\n        model_results_val = get_scores(val_y,get_preds(model,val_x))\n        model_results_test = get_scores(test_y,get_preds(model,test_x))\n        data = {\"Train\": model_results_train,\n                \"Val\": model_results_val,\n                \"Test\": model_results_test}\n        temp = pd.DataFrame(data,index=[f'{exp}_{type(model).__name__}'])\n        model_results = model_results.append(temp)\nmodel_results","metadata":{},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/notebooks/critical-point-approaches.ipynb:
--------------------------------------------------------------------------------
1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import os\nimport numpy as np\nimport pandas as pd\nimport scipy\nimport scipy.io\nfrom scipy.io import loadmat\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport shutil\nimport math\nimport ntpath\nimport sys\nimport logging\nimport time\nimport random\n\nfrom importlib import reload\nimport plotly.graph_objects as go\n\nimport tensorflow as tf\nfrom tensorflow import keras","metadata":{"id":"XEFyZIgMivab","execution":{"iopub.status.busy":"2022-04-23T18:31:28.983972Z","iopub.execute_input":"2022-04-23T18:31:28.984351Z","iopub.status.idle":"2022-04-23T18:31:35.00018Z","shell.execute_reply.started":"2022-04-23T18:31:28.984255Z","shell.execute_reply":"2022-04-23T18:31:34.998964Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"path = \"../input/battery-data-set\"\ndis = os.listdir(path)\ndis_mat = []\nbattery_grp = {\n    \n}\nfor i in dis :\n    filtered_list = list(filter(lambda x:x.split('.')[-1] == 'mat',os.listdir(f\"{path}/{i}\")))\n    battery_grp[i.split('BatteryAgingARC')[-1][1:]] = list(map(lambda x:x.split('.')[0],filtered_list))\n    dis_mat.extend(list(map(lambda x:f\"{path}/{i}/{x}\",filtered_list)))\n\nbattery_grp['5_6_7_18'] = battery_grp['FY08Q4']\ndel battery_grp['FY08Q4']\n\ndis_mat\nbattery_grp","metadata":{"id":"Qiwf6CzR_8As","outputId":"9f265d26-432a-4196-ee0b-f34aa21674f3","execution":{"iopub.status.busy":"2022-04-23T18:31:35.002136Z","iopub.execute_input":"2022-04-23T18:31:35.00261Z","iopub.status.idle":"2022-04-23T18:31:35.042527Z","shell.execute_reply.started":"2022-04-23T18:31:35.002556Z","shell.execute_reply":"2022-04-23T18:31:35.041898Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from scipy.io import loadmat\n\nbs = [x.split('/')[-1].split('.')[0] for x in dis_mat]\nprint(bs)\n\nds = []\nfor b in dis_mat:\n    ds.append(loadmat(b))\n\ntypes = []\ntimes = []\nambient_temperatures = []\ndatas = []\n\nfor i in range(len(ds)):\n    x = ds[i][bs[i]][\"cycle\"][0][0][0]\n#     ambient_temperatures.append(x['ambient_temperature'])\n    ambient_temperatures.append(list(map(lambda y:y[0][0],x['ambient_temperature'])))\n    types.append(x['type'])\n    times.append(x['time'])\n    datas.append(x['data']) \n    \nbatteries = []\ncycles = []\nfor i in range(len(ds)):\n    batteries.append(bs[i])\n    cycles.append(datas[i].size)\n    \nbattery_cycle_df = pd.DataFrame({'Battery':batteries,'Cycle':cycles}).sort_values('Battery',ascending=True)\nbattery_cycle_df.drop_duplicates(inplace=True)\nbattery_cycle_df","metadata":{"id":"OQAeH9e1_8At","outputId":"fe4376d3-ad98-4c69-e675-d6ea4e607ea4","execution":{"iopub.status.busy":"2022-04-23T18:31:35.046305Z","iopub.execute_input":"2022-04-23T18:31:35.046575Z","iopub.status.idle":"2022-04-23T18:31:40.415541Z","shell.execute_reply.started":"2022-04-23T18:31:35.046546Z","shell.execute_reply":"2022-04-23T18:31:40.41459Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"Cycles = {}\nparams = ['Voltage_measured','Current_measured','Temperature_measured',\n          'Current_load','Voltage_load','Time','Capacity',]\n\nfor i in range(len(bs)):\n    Cycles[bs[i]] = {}\n    Cycles[bs[i]]['count'] = 0 # This is true for battery B0005, 06, 07\n#     ambient_temperatures[i]\n    for param in params:\n        Cycles[bs[i]][param] = []\n        for j in range(datas[i].size):\n            if types[i][j] == 'discharge':\n                Cycles[bs[i]][param].append(datas[i][j][param][0][0][0])\n        \n    cap = []\n    amb_temp = []\n    for j in range(datas[i].size):\n        if types[i][j] == 'discharge':\n            cap.append(datas[i][j]['Capacity'][0][0][0])\n            amb_temp.append(ambient_temperatures[i][j])\n            \n    Cycles[bs[i]]['Capacity'] = np.array(cap)\n    Cycles[bs[i]]['ambient_temperatures'] = np.array(amb_temp)\nCycles = pd.DataFrame(Cycles)\n\nbattery_count = []\nfor b in battery_cycle_df['Battery'].values.tolist() :\n    Cycles[b]['count'] = len(Cycles[b]['Capacity'])\nCycles","metadata":{"id":"IxenDRjM_8Au","outputId":"12535861-c1b3-41af-9f5f-2d3a74b709d1","execution":{"iopub.status.busy":"2022-04-23T18:31:40.41723Z","iopub.execute_input":"2022-04-23T18:31:40.417466Z","iopub.status.idle":"2022-04-23T18:31:51.800179Z","shell.execute_reply.started":"2022-04-23T18:31:40.417433Z","shell.execute_reply":"2022-04-23T18:31:51.799333Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"Cycles['B0052']","metadata":{"id":"-H5AOvPQ_8Au","outputId":"7e873356-9409-44a5-8bf9-28eb908144a3","execution":{"iopub.status.busy":"2022-04-23T18:31:51.801431Z","iopub.execute_input":"2022-04-23T18:31:51.801737Z","iopub.status.idle":"2022-04-23T18:31:51.909296Z","shell.execute_reply.started":"2022-04-23T18:31:51.801702Z","shell.execute_reply":"2022-04-23T18:31:51.908353Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"Cycles.loc['count',:].reset_index()\nbattery_cycle_df\ncompare_df = Cycles.loc['count',:].reset_index().set_index('index').join(battery_cycle_df.set_index('Battery')).rename(columns={'count':'After','Cycle':'Before'})\ncompare_df[['Before','After']]","metadata":{"id":"SzYECRbS_8Av","outputId":"c6ec60ac-da2d-42b1-ee25-1e3a06a4c2a4","execution":{"iopub.status.busy":"2022-04-23T18:31:51.910548Z","iopub.execute_input":"2022-04-23T18:31:51.910861Z","iopub.status.idle":"2022-04-23T18:31:51.937292Z","shell.execute_reply.started":"2022-04-23T18:31:51.910827Z","shell.execute_reply":"2022-04-23T18:31:51.936275Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"pd.DataFrame(Cycles['B0047'])","metadata":{"id":"zYkXgpbM_8Av","outputId":"a123fb76-2041-4efd-ca6e-4ebea0a7553e","execution":{"iopub.status.busy":"2022-04-23T18:31:51.938539Z","iopub.execute_input":"2022-04-23T18:31:51.938808Z","iopub.status.idle":"2022-04-23T18:31:52.531289Z","shell.execute_reply.started":"2022-04-23T18:31:51.938776Z","shell.execute_reply":"2022-04-23T18:31:52.530608Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df = pd.DataFrame({})\ncols = ['Voltage_measured','Current_measured','Temperature_measured',\n          'Current_load','Voltage_load','Time','Capacity','ambient_temperatures']\nfor col in cols :\n    df[col] = Cycles['B0047'][col]\nmax_len = np.max(df['Time'].apply(lambda x:len(x)).values)\ndf_x = df.drop(columns=['Capacity']).values\ndf_y = df['Capacity'].values\ndf","metadata":{"id":"EuBsJlFW_8Aw","outputId":"04a6ecb6-6456-4ddb-aa1b-50ef86c54f9f","execution":{"iopub.status.busy":"2022-04-23T18:31:52.532514Z","iopub.execute_input":"2022-04-23T18:31:52.532893Z","iopub.status.idle":"2022-04-23T18:31:52.644266Z","shell.execute_reply.started":"2022-04-23T18:31:52.532852Z","shell.execute_reply":"2022-04-23T18:31:52.64359Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"batteries","metadata":{"id":"HeDfzDH3_8Aw","outputId":"f560a437-dd5e-4901-9c87-72461d0f5b43","execution":{"iopub.status.busy":"2022-04-23T18:31:52.645688Z","iopub.execute_input":"2022-04-23T18:31:52.646125Z","iopub.status.idle":"2022-04-23T18:31:52.652655Z","shell.execute_reply.started":"2022-04-23T18:31:52.646073Z","shell.execute_reply":"2022-04-23T18:31:52.651584Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df_all = pd.DataFrame({})\nmax_len = 0\n\nexperiment1 = [\"B0005\", \"B0006\", \"B0007\", \"B0018\"]\nexperiment2 = [\"B0025\", \"B0026\", \"B0027\", \"B0028\"]\nexperiment3 = [\"B0029\", \"B0030\", \"B0031\", \"B0032\"]\nexperiment4 = [\"B0033\", \"B0034\", \"B0036\"]\nexperiment5 = [\"B0038\", \"B0039\", \"B0040\"]\nexperiment6 = [\"B0041\", \"B0042\", \"B0043\", \"B0044\"]\nexperiment7 = [\"B0045\", \"B0046\", \"B0047\", \"B0048\"]\nexperiment8 = [\"B0049\", \"B0050\", \"B0051\", \"B0052\"]\nexperiment9 = [\"B0053\", \"B0054\", \"B0055\", \"B0056\"]\n\nexp_try_out = experiment1 # replace karke wo experiment wala dalo\nresults_name = \"critical_point_exp1_results\"\n\nfor bat in exp_try_out:\n    df = pd.DataFrame({})\n    cols = ['Voltage_measured','Current_measured','Temperature_measured',\n              'Current_load','Voltage_load','Time','Capacity','ambient_temperatures']\n    for col in cols :\n        df[col] = Cycles[bat][col]\n    max_l = np.max(df['Time'].apply(lambda x:len(x)).values)\n    max_len = max(max_l,max_len)\n    df_all = pd.concat([df_all,df],ignore_index=True)\n    \ndf=df_all.reset_index(drop=True)\ndf\n\n\nflag = False\nfor i,j in enumerate(df['Capacity']) :\n    try :\n        if len(j) :\n            df['Capacity'][i] = j[0] \n        else :\n            df['Capacity'][i] = 0\n        flag = True\n    except :\n        pass\nprint(flag)\n\n## CRITICAL TIME POINTS FOR A CYCLE\n## We will only these critical points for furthur training\n\n## TEMPERATURE_MEASURED\n## => Time at highest temperature\n\n## VOLTAGE_MEASURED\n## => Time at lowest Voltage\n\n## VOLTAGE_LOAD\n## => First time it drops below 1 volt after 1500 time\n\n\ndef getTemperatureMeasuredCritical(tm, time):\n    high = 0\n    critical = 0\n    for i in range(len(tm)):\n        if (tm[i] > high):\n            high = tm[i]\n            critical = time[i]\n    return critical\n\ndef getVoltageMeasuredCritical(vm, time):\n    low = 1e9\n    critical = 0\n    for i in range(len(vm)):\n        if (vm[i] < low):\n            low = vm[i]\n            critical = time[i]\n    return critical\n\ndef getVoltageLoadCritical(vl, time):\n    for i in range(len(vl)):\n        if (time[i] > 1500 and vl[i] < 1):\n            return time[i]\n    return -1\n\n\ndef fun(x) :\n    cap = x['Capacity']\n    amb_temp = x['ambient_temperatures']\n    volt_load_c = getVoltageLoadCritical(x['Voltage_load'],x['Time'])\n    volt_meas_c = getVoltageMeasuredCritical(x['Voltage_measured'],x['Time'])\n    temp_meas_c = getTemperatureMeasuredCritical(x['Temperature_measured'],x['Time'])\n    \n    data = {\n        'Capacity':cap,\n        'ambient_temperatures':amb_temp,\n        'Critical_Voltage_load':volt_load_c,\n        'Critical_Voltage_measured':volt_meas_c,\n        'Critical_Temperature_measured':temp_meas_c,\n    }\n    data_idx = [\n        'Capacity', 'ambient_temperatures','Critical_Voltage_load',\n        'Critical_Voltage_measured','Critical_Temperature_measured',\n    ]\n    y = pd.Series(data,index=data_idx)\n\n    return y\ndf = df.apply(lambda x:fun(x),axis=1)\ndf","metadata":{"id":"zC8KMf8E_8Ax","outputId":"22a60593-68f7-4f79-80ec-0e21da1ed976","execution":{"iopub.status.busy":"2022-04-23T18:31:52.655602Z","iopub.execute_input":"2022-04-23T18:31:52.65583Z","iopub.status.idle":"2022-04-23T18:31:53.603135Z","shell.execute_reply.started":"2022-04-23T18:31:52.655803Z","shell.execute_reply":"2022-04-23T18:31:53.602266Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error\nfrom sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score\n\ndef get_scores(y_true,y_pred) :\n    return {\n        'max_error':max_error(y_true,y_pred),\n        'mean_absolute_error':mean_absolute_error(y_true,y_pred),\n        'mean_squared_error':mean_squared_error(y_true,y_pred),\n        'mean_squared_log_error':mean_squared_log_error(y_true,y_pred),\n        'median_absolute_error':median_absolute_error(y_true,y_pred),\n        'mean_absolute_percentage_error':mean_absolute_percentage_error(y_true,y_pred),\n        'r2_score':r2_score(y_true,y_pred)\n    }\n\ndef get_preds(model,data_x) :\n    return model.predict(data_x).clip(min=0)\n\n# print(get_scores(train_y,get_preds(lr,train_x)))\n# print(get_scores(test_y,get_preds(lr,test_x)))\n\ndf_x = df.drop(columns=['Capacity']).values\ndf_y = df['Capacity'].values\n\nfrom sklearn.model_selection import train_test_split\ntrain_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)\ntest_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)\n\ntrain_x.shape,test_x.shape,val_x.shape,train_y.shape,test_y.shape,val_y.shape","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:31:53.604506Z","iopub.execute_input":"2022-04-23T18:31:53.604879Z","iopub.status.idle":"2022-04-23T18:31:53.819688Z","shell.execute_reply.started":"2022-04-23T18:31:53.60484Z","shell.execute_reply":"2022-04-23T18:31:53.818561Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pickle\nfrom sklearn.linear_model import LinearRegression\n\nlr = LinearRegression()\nlr.fit(train_x,train_y)\nmodel_name = \"LR\"\n# print(get_scores(train_y,get_preds(lr,train_x)))\n# print(get_scores(val_y,get_preds(lr,val_x)))\n# print(get_scores(test_y,get_preds(lr,test_x)))\n\nlr_results_train = pd.Series(get_scores(train_y,get_preds(lr,train_x)))\nlr_results_val = pd.Series(get_scores(val_y,get_preds(lr,val_x)))\nlr_results_test = pd.Series(get_scores(test_y,get_preds(lr,test_x)))\ndata = {\"Train\": lr_results_train,\n        \"Val\": lr_results_val,\n        \"Test\": lr_results_test}\nlr_results = pd.DataFrame(data)\nlr_results.to_csv(f\"{model_name}_{results_name}.csv\")\npickle.dump(lr, open(f\"{model_name}_{results_name}\", 'wb'))\nlr_results ","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:31:53.821105Z","iopub.execute_input":"2022-04-23T18:31:53.821435Z","iopub.status.idle":"2022-04-23T18:31:53.941367Z","shell.execute_reply.started":"2022-04-23T18:31:53.821399Z","shell.execute_reply":"2022-04-23T18:31:53.940426Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.linear_model import LinearRegression,HuberRegressor\nfrom sklearn.naive_bayes import BernoulliNB,CategoricalNB,ComplementNB,GaussianNB,MultinomialNB\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.svm import LinearSVR,NuSVR,SVR\nfrom sklearn.tree import DecisionTreeRegressor,ExtraTreeRegressor\nfrom sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,VotingRegressor\nfrom xgboost import XGBRegressor\nfrom lightgbm import LGBMRegressor\nfrom catboost import CatBoostRegressor\n\nalgos = (LinearRegression,HuberRegressor,KNeighborsRegressor,\nLinearSVR,NuSVR,\n#          SVR, DecisionTreeRegressor,ExtraTreeRegressor,RandomForestRegressor,ExtraTreesRegressor,\nXGBRegressor,LGBMRegressor,CatBoostRegressor)\n\nparams = {\n    'silent':True\n}\n\nfor algo in algos :\n    model = algo()\n    if type(model).__name__ == 'CatBoostRegressor' :\n        model = algo(**params)\n    print(type(model).__name__)\n    model.fit(train_x,train_y)\n\n    model_results_train = pd.Series(get_scores(train_y,get_preds(model,train_x)))\n    model_results_val = pd.Series(get_scores(val_y,get_preds(model,val_x)))\n    model_results_test = pd.Series(get_scores(test_y,get_preds(model,test_x)))\n    data = {\"Train\": model_results_train,\n            \"Val\": model_results_val,\n            \"Test\": model_results_test}\n    model_results = pd.DataFrame(data)\n    print(model_results)\n    print(\"~\"*100)\n    print(\"\\n\")\n#     break","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:31:53.942823Z","iopub.execute_input":"2022-04-23T18:31:53.943067Z","iopub.status.idle":"2022-04-23T18:31:57.012738Z","shell.execute_reply.started":"2022-04-23T18:31:53.943035Z","shell.execute_reply":"2022-04-23T18:31:57.011657Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import numpy as np\nfrom sklearn.model_selection import train_test_split\n\nfrom sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error\nfrom sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score\n\ndef get_scores(y_true,y_pred) :\n    return np.sqrt(mean_squared_error(y_true,y_pred))\n\nmodel_results = pd.DataFrame()\n\nalgos = (LinearRegression,HuberRegressor,KNeighborsRegressor,\nLinearSVR,NuSVR,\n#          SVR, DecisionTreeRegressor,ExtraTreeRegressor,RandomForestRegressor,ExtraTreesRegressor,\nXGBRegressor,LGBMRegressor,CatBoostRegressor)\n\nparams = {\n    'silent':True\n}\n\nfor algo in algos :\n    model = algo()\n    if type(model).__name__ == 'CatBoostRegressor' :\n        model = algo(**params)\n    print(type(model).__name__)\n    model.fit(train_x,train_y)\n\n    model_results_train = (get_scores(train_y,get_preds(model,train_x)))\n    model_results_val = (get_scores(val_y,get_preds(model,val_x)))\n    model_results_test = (get_scores(test_y,get_preds(model,test_x)))\n    data = {\"Train\": model_results_train,\n            \"Val\": model_results_val,\n            \"Test\": model_results_test}\n    temp = pd.DataFrame(data,index=[f'Critical-Point_{type(model).__name__}'])\n    model_results = model_results.append(temp)\nmodel_results","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:33:48.450875Z","iopub.execute_input":"2022-04-23T18:33:48.451745Z","iopub.status.idle":"2022-04-23T18:33:49.937922Z","shell.execute_reply.started":"2022-04-23T18:33:48.451703Z","shell.execute_reply":"2022-04-23T18:33:49.936799Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"model_results.to_csv('critical_point_results.csv')","metadata":{"execution":{"iopub.status.busy":"2022-04-23T18:35:18.466436Z","iopub.execute_input":"2022-04-23T18:35:18.466724Z","iopub.status.idle":"2022-04-23T18:35:18.473232Z","shell.execute_reply.started":"2022-04-23T18:35:18.466693Z","shell.execute_reply":"2022-04-23T18:35:18.472061Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
  1 | # Data
  2 | ## Dataset
  3 | ## Dataset Overview
  4 | The dataset has been collected from a custom-built battery prognostics tested at the NASA Ames Prognostics Center of Excellence. The Lithium-ion batteries were run through 2 different operational profiles (charging and discharging) at different temperatures. The experiments were stopped when the batteries reached the end-of-life (EOL) criteria of 30% fade in rated capacity (from 2 Ah to 1.4 Ah)
  5 | ### Dataset Structure
  6 | ##### Charge
  7 | 1. Voltage_measured: Battery terminal voltage (Volts)
  8 | 2. Current_measured: Battery output current (Amps)
  9 | 3. Temperature_measured: Battery temperature (degree C)
 10 | 4. Current_charge: Current measured at charger (Amps)
 11 | 5. Voltage_charge: Voltage measured at charger (Volts)
 12 | 6. Time: Time vector for the cycle (secs)
 13 | 
 14 | ##### Discharge
 15 | 1. Voltage_measured: Battery terminal voltage (Volts)
 16 | 2. Current_measured: Battery output current (Amps)
 17 | 3. Temperature_measured: Battery temperature (degree C)
 18 | 4. Current_charge: Current measured at load (Amps)
 19 | 5. Voltage_charge: Voltage measured at load (Volts)
 20 | 6. Time: Time vector for the cycle (secs)
 21 | 7. Capacity: Battery capacity (Ahr) for discharge till 2.7V
 22 | 
 23 | ##### Impedance
 24 | 1. Sense_current: Current in sense branch (Amps)
 25 | 2. Battery_current: Current in battery branch (Amps)
 26 | 3. Current_ratio: Ratio of the above currents
 27 | 4. Battery_impedance: Battery impedance (Ohms) computed from raw data
 28 | 5. Rectified_impedance: Calibrated and smoothed battery impedance (Ohms)
 29 | 6. Re: Estimated electrolyte resistance (Ohms)
 30 | 7. Rct: Estimated charge transfer resistance (Ohms)
 31 | 
 32 | 
 33 | ## Folder Structure & Files
 34 | ```
 35 | 📁 data/
 36 | ├─📁 input/
 37 | │ └─📁 battery-data-set/
 38 | │   ├─📁 BatteryAgingARC-FY08Q4/
 39 | │   │ ├─📄 B0005.mat
 40 | │   │ ├─📄 B0006.mat
 41 | │   │ ├─📄 B0007.mat
 42 | │   │ ├─📄 B0018.mat
 43 | │   │ └─📄 README.txt
 44 | │   ├─📁 BatteryAgingARC_25-44/
 45 | │   │ ├─📄 B0025.mat
 46 | │   │ ├─📄 B0026.mat
 47 | │   │ ├─📄 B0027.mat
 48 | │   │ ├─📄 B0028.mat
 49 | │   │ ├─📄 B0029.mat
 50 | │   │ ├─📄 B0030.mat
 51 | │   │ ├─📄 B0031.mat
 52 | │   │ ├─📄 B0032.mat
 53 | │   │ ├─📄 B0033.mat
 54 | │   │ ├─📄 B0034.mat
 55 | │   │ ├─📄 B0036.mat
 56 | │   │ ├─📄 B0038.mat
 57 | │   │ ├─📄 B0039.mat
 58 | │   │ ├─📄 B0040.mat
 59 | │   │ ├─📄 B0041.mat
 60 | │   │ ├─📄 B0042.mat
 61 | │   │ ├─📄 B0043.mat
 62 | │   │ ├─📄 B0044.mat
 63 | │   │ ├─📄 README_25_26_27_28.txt
 64 | │   │ ├─📄 README_29_30_31_32.txt
 65 | │   │ ├─📄 README_33_34_36.txt
 66 | │   │ ├─📄 README_38_39_40.txt
 67 | │   │ └─📄 README_41_42_43_44.txt
 68 | │   ├─📁 BatteryAgingARC_25_26_27_28_P1/
 69 | │   │ ├─📄 B0025.mat
 70 | │   │ ├─📄 B0026.mat
 71 | │   │ ├─📄 B0027.mat
 72 | │   │ ├─📄 B0028.mat
 73 | │   │ └─📄 README.txt
 74 | │   ├─📁 BatteryAgingARC_45_46_47_48/
 75 | │   │ ├─📄 B0045.mat
 76 | │   │ ├─📄 B0046.mat
 77 | │   │ ├─📄 B0047.mat
 78 | │   │ ├─📄 B0048.mat
 79 | │   │ └─📄 README_45_46_47_48.txt
 80 | │   ├─📁 BatteryAgingARC_49_50_51_52/
 81 | │   │ ├─📄 B0049.mat
 82 | │   │ ├─📄 B0050.mat
 83 | │   │ ├─📄 B0051.mat
 84 | │   │ ├─📄 B0052.mat
 85 | │   │ └─📄 README_49_50_51_52.txt
 86 | │   └─📁 BatteryAgingARC_53_54_55_56/
 87 | │     ├─📄 B0053.mat
 88 | │     ├─📄 B0054.mat
 89 | │     ├─📄 B0055.mat
 90 | │     ├─📄 B0056.mat
 91 | │     └─📄 README_53_54_55_56.txt
 92 | ├─📁 output/
 93 | │ ├─📁 Critical_Point/
 94 | │ │ └─📄 critical_point_results.csv
 95 | │ ├─📁 Ensemble/
 96 | │ │ └─📄 ensemble_results.csv
 97 | │ └─📁 RNN/
 98 | │   ├─📁 cleaned_results/
 99 | │   │ ├─📁 Experiment 1/
100 | │   │ │ ├─📄 BIGRU_EXP_1.csv
101 | │   │ │ ├─📄 BILSTM_EXP_1.csv
102 | │   │ │ ├─📄 GRU_EXP_1.csv
103 | │   │ │ └─📄 LSTM_EXP_1.csv
104 | │   │ ├─📁 Experiment 2/
105 | │   │ │ ├─📄 bigru_exp_2_results.csv
106 | │   │ │ ├─📄 bilstm_exp_2_results.csv
107 | │   │ │ ├─📄 gru_exp_2_results.csv
108 | │   │ │ └─📄 lstm_exp_2_results.csv
109 | │   │ ├─📁 Experiment 3/
110 | │   │ │ ├─📄 bigur_exp_3_results.csv
111 | │   │ │ ├─📄 bilstm_exp_3_results.csv
112 | │   │ │ ├─📄 gru_exp_3_results.csv
113 | │   │ │ └─📄 lstm_exp_3_results.csv
114 | │   │ ├─📁 Experiment 4/
115 | │   │ │ ├─📄 33_34_36_BiGRU_results.csv
116 | │   │ │ ├─📄 33_34_36_BiLSTM_results.csv
117 | │   │ │ ├─📄 33_34_36_GRU_results.csv
118 | │   │ │ └─📄 33_34_36_LSTM_results.csv
119 | │   │ ├─📁 Experiment 5/
120 | │   │ │ ├─📄 38_39_40_BiGRU_results.csv
121 | │   │ │ ├─📄 38_39_40_BiLSTM_results.csv
122 | │   │ │ ├─📄 38_39_40_GRU_results.csv
123 | │   │ │ └─📄 38_39_40_LSTM_results.csv
124 | │   │ ├─📁 Experiment 6/
125 | │   │ │ ├─📄 41_42_43_44_BiGRU_results.csv
126 | │   │ │ ├─📄 41_42_43_44_BiLSTM_results.csv
127 | │   │ │ ├─📄 41_42_43_44_GRU_results.csv
128 | │   │ │ └─📄 41_42_43_44_LSTM_results.csv
129 | │   │ ├─📁 Experiment 7/
130 | │   │ │ ├─📄 7_BiGRU_results.csv
131 | │   │ │ ├─📄 7_BiLSTM_results.csv
132 | │   │ │ ├─📄 7_GRU_results.csv
133 | │   │ │ └─📄 7_LSTM_results.csv
134 | │   │ ├─📁 Experiment 8/
135 | │   │ │ ├─📄 8_BiGRU_results.csv
136 | │   │ │ ├─📄 8_BiLSTM_results.csv
137 | │   │ │ ├─📄 8_GRU_results.csv
138 | │   │ │ └─📄 8_LSTM_results.csv
139 | │   │ └─📁 Experiment 9/
140 | │   │   ├─📄 9_BiGRU_results.csv
141 | │   │   ├─📄 9_BiLSTM_results.csv
142 | │   │   ├─📄 9_GRU_results.csv
143 | │   │   └─📄 9_LSTM_results.csv
144 | │   ├─📁 models/
145 | │   │ ├─📁 Exp4/
146 | │   │ │ ├─📄 33_34_36_BiGRU.csv
147 | │   │ │ ├─📄 33_34_36_BiGRU.h5
148 | │   │ │ ├─📄 33_34_36_BiLSTM.csv
149 | │   │ │ ├─📄 33_34_36_BiLSTM.h5
150 | │   │ │ ├─📄 33_34_36_GRU.csv
151 | │   │ │ ├─📄 33_34_36_GRU.h5
152 | │   │ │ ├─📄 33_34_36_LSTM.csv
153 | │   │ │ └─📄 33_34_36_LSTM.h5
154 | │   │ ├─📁 Exp5/
155 | │   │ │ ├─📄 38_39_40_BiGRU.csv
156 | │   │ │ ├─📄 38_39_40_BiGRU.h5
157 | │   │ │ ├─📄 38_39_40_BiLSTM.csv
158 | │   │ │ ├─📄 38_39_40_BiLSTM.h5
159 | │   │ │ ├─📄 38_39_40_GRU.csv
160 | │   │ │ ├─📄 38_39_40_GRU.h5
161 | │   │ │ ├─📄 38_39_40_LSTM.csv
162 | │   │ │ └─📄 38_39_40_LSTM.h5
163 | │   │ ├─📁 Exp6/
164 | │   │ │ ├─📄 41_42_43_44_BiGRU.csv
165 | │   │ │ ├─📄 41_42_43_44_BiGRU.h5
166 | │   │ │ ├─📄 41_42_43_44_BiLSTM.csv
167 | │   │ │ ├─📄 41_42_43_44_BiLSTM.h5
168 | │   │ │ ├─📄 41_42_43_44_GRU.csv
169 | │   │ │ ├─📄 41_42_43_44_GRU.h5
170 | │   │ │ ├─📄 41_42_43_44_LSTM.csv
171 | │   │ │ └─📄 41_42_43_44_LSTM.h5
172 | │   │ ├─📁 Experiment 1/
173 | │   │ │ ├─📄 bigru_exp_1.csv
174 | │   │ │ ├─📄 bigru_exp_1.h5
175 | │   │ │ ├─📄 bilstm_exp_1.csv
176 | │   │ │ ├─📄 bilstm_exp_1.h5
177 | │   │ │ ├─📄 gru_exp_1.csv
178 | │   │ │ ├─📄 gru_exp_1.h5
179 | │   │ │ ├─📄 lstm_exp_1.csv
180 | │   │ │ └─📄 lstm_exp_1.h5
181 | │   │ ├─📁 Experiment 2/
182 | │   │ │ ├─📄 bigru_exp_2.csv
183 | │   │ │ ├─📄 bigru_exp_2.h5
184 | │   │ │ ├─📄 bilstm_exp_2.csv
185 | │   │ │ ├─📄 bilstm_exp_2.h5
186 | │   │ │ ├─📄 gru_exp_2.csv
187 | │   │ │ ├─📄 gru_exp_2.h5
188 | │   │ │ ├─📄 lstm_exp_2.csv
189 | │   │ │ └─📄 lstm_exp_2.h5
190 | │   │ ├─📁 Experiment 3/
191 | │   │ │ ├─📄 bigru_exp_3.csv
192 | │   │ │ ├─📄 bigur_exp_3.h5
193 | │   │ │ ├─📄 bilstm_exp_3.csv
194 | │   │ │ ├─📄 bilstm_exp_3.h5
195 | │   │ │ ├─📄 gru_exp_3.csv
196 | │   │ │ ├─📄 gru_exp_3.h5
197 | │   │ │ ├─📄 lstm_exp_3.csv
198 | │   │ │ └─📄 lstm_exp_3.h5
199 | │   │ ├─📁 Experiment 7/
200 | │   │ │ ├─📄 7_BiGRU.csv
201 | │   │ │ ├─📄 7_BiGRU.h5
202 | │   │ │ ├─📄 7_BiLSTM.csv
203 | │   │ │ ├─📄 7_BiLSTM.h5
204 | │   │ │ ├─📄 7_GRU.csv
205 | │   │ │ ├─📄 7_GRU.h5
206 | │   │ │ ├─📄 7_LSTM.csv
207 | │   │ │ └─📄 7_LSTM.h5
208 | │   │ ├─📁 Experiment8/
209 | │   │ │ ├─📄 8_BiGRU.csv
210 | │   │ │ ├─📄 8_BiGRU.h5
211 | │   │ │ ├─📄 8_BiLSTM.csv
212 | │   │ │ ├─📄 8_BiLSTM.h5
213 | │   │ │ ├─📄 8_GRU.csv
214 | │   │ │ ├─📄 8_GRU.h5
215 | │   │ │ ├─📄 8_LSTM.csv
216 | │   │ │ └─📄 8_LSTM.h5
217 | │   │ └─📁 Experiment9/
218 | │   │   ├─📄 9_BiGRU.csv
219 | │   │   ├─📄 9_BiGRU.h5
220 | │   │   ├─📄 9_BiLSTM.csv
221 | │   │   ├─📄 9_BiLSTM.h5
222 | │   │   ├─📄 9_GRU.csv
223 | │   │   ├─📄 9_GRU.h5
224 | │   │   ├─📄 9_LSTM.csv
225 | │   │   └─📄 9_LSTM.h5
226 | │   └─📁 results/
227 | │     ├─📁 Experiment 1/
228 | │     │ ├─📄 BIGRU_EXP_1.csv
229 | │     │ ├─📄 BILSTM_EXP_1.csv
230 | │     │ ├─📄 GRU_EXP_1.csv
231 | │     │ └─📄 LSTM_EXP_1.csv
232 | │     ├─📁 Experiment 2/
233 | │     │ ├─📄 bigru_exp_2_results.csv
234 | │     │ ├─📄 bilstm_exp_2_results.csv
235 | │     │ ├─📄 gru_exp_2_results.csv
236 | │     │ └─📄 lstm_exp_2_results.csv
237 | │     ├─📁 Experiment 3/
238 | │     │ ├─📄 bigur_exp_3_results.csv
239 | │     │ ├─📄 bilstm_exp_3_results.csv
240 | │     │ ├─📄 gru_exp_3_results.csv
241 | │     │ └─📄 lstm_exp_3_results.csv
242 | │     ├─📁 Experiment 4/
243 | │     │ ├─📄 33_34_36_BiGRU_results.csv
244 | │     │ ├─📄 33_34_36_BiLSTM_results.csv
245 | │     │ ├─📄 33_34_36_GRU_results.csv
246 | │     │ └─📄 33_34_36_LSTM_results.csv
247 | │     ├─📁 Experiment 5/
248 | │     │ ├─📄 38_39_40_BiGRU_results.csv
249 | │     │ ├─📄 38_39_40_BiLSTM_results.csv
250 | │     │ ├─📄 38_39_40_GRU_results.csv
251 | │     │ └─📄 38_39_40_LSTM_results.csv
252 | │     ├─📁 Experiment 6/
253 | │     │ ├─📄 41_42_43_44_BiGRU_results.csv
254 | │     │ ├─📄 41_42_43_44_BiLSTM_results.csv
255 | │     │ ├─📄 41_42_43_44_GRU_results.csv
256 | │     │ └─📄 41_42_43_44_LSTM_results.csv
257 | │     ├─📁 Experiment 7/
258 | │     │ ├─📄 7_BiGRU_results.csv
259 | │     │ ├─📄 7_GRU_results.csv
260 | │     │ └─📄 7_LSTM_results.csv
261 | │     ├─📁 Experiment 8/
262 | │     │ ├─📄 8_BiGRU_results.csv
263 | │     │ ├─📄 8_BiLSTM_results.csv
264 | │     │ ├─📄 8_GRU_results.csv
265 | │     │ └─📄 8_LSTM_results.csv
266 | │     └─📁 Experiment 9/
267 | │       ├─📄 9_BiGRU_results.csv
268 | │       ├─📄 9_BiLSTM_results.csv
269 | │       ├─📄 9_GRU_results.csv
270 | │       └─📄 9_LSTM_results.csv
271 | └─📄 README.md
272 | >>> sd.seedir('./data', style='emoji', indent=4)
273 | 📁 data/
274 | ├───📁 input/
275 | │   └───📁 battery-data-set/
276 | │       ├───📁 BatteryAgingARC-FY08Q4/
277 | │       │   ├───📄 B0005.mat
278 | │       │   ├───📄 B0006.mat
279 | │       │   ├───📄 B0007.mat
280 | │       │   ├───📄 B0018.mat
281 | │       │   └───📄 README.txt
282 | │       ├───📁 BatteryAgingARC_25-44/
283 | │       │   ├───📄 B0025.mat
284 | │       │   ├───📄 B0026.mat
285 | │       │   ├───📄 B0027.mat
286 | │       │   ├───📄 B0028.mat
287 | │       │   ├───📄 B0029.mat
288 | │       │   ├───📄 B0030.mat
289 | │       │   ├───📄 B0031.mat
290 | │       │   ├───📄 B0032.mat
291 | │       │   ├───📄 B0033.mat
292 | │       │   ├───📄 B0034.mat
293 | │       │   ├───📄 B0036.mat
294 | │       │   ├───📄 B0038.mat
295 | │       │   ├───📄 B0039.mat
296 | │       │   ├───📄 B0040.mat
297 | │       │   ├───📄 B0041.mat
298 | │       │   ├───📄 B0042.mat
299 | │       │   ├───📄 B0043.mat
300 | │       │   ├───📄 B0044.mat
301 | │       │   ├───📄 README_25_26_27_28.txt
302 | │       │   ├───📄 README_29_30_31_32.txt
303 | │       │   ├───📄 README_33_34_36.txt
304 | │       │   ├───📄 README_38_39_40.txt
305 | │       │   └───📄 README_41_42_43_44.txt
306 | │       ├───📁 BatteryAgingARC_25_26_27_28_P1/
307 | │       │   ├───📄 B0025.mat
308 | │       │   ├───📄 B0026.mat
309 | │       │   ├───📄 B0027.mat
310 | │       │   ├───📄 B0028.mat
311 | │       │   └───📄 README.txt
312 | │       ├───📁 BatteryAgingARC_45_46_47_48/
313 | │       │   ├───📄 B0045.mat
314 | │       │   ├───📄 B0046.mat
315 | │       │   ├───📄 B0047.mat
316 | │       │   ├───📄 B0048.mat
317 | │       │   └───📄 README_45_46_47_48.txt
318 | │       ├───📁 BatteryAgingARC_49_50_51_52/
319 | │       │   ├───📄 B0049.mat
320 | │       │   ├───📄 B0050.mat
321 | │       │   ├───📄 B0051.mat
322 | │       │   ├───📄 B0052.mat
323 | │       │   └───📄 README_49_50_51_52.txt
324 | │       └───📁 BatteryAgingARC_53_54_55_56/
325 | │           ├───📄 B0053.mat
326 | │           ├───📄 B0054.mat
327 | │           ├───📄 B0055.mat
328 | │           ├───📄 B0056.mat
329 | │           └───📄 README_53_54_55_56.txt
330 | ├───📁 output/
331 | │   ├───📁 Critical_Point/
332 | │   │   └───📄 critical_point_results.csv
333 | │   ├───📁 Ensemble/
334 | │   │   └───📄 ensemble_results.csv
335 | │   └───📁 RNN/
336 | │       ├───📁 cleaned_results/
337 | │       │   ├───📁 Experiment 1/
338 | │       │   │   ├───📄 BIGRU_EXP_1.csv
339 | │       │   │   ├───📄 BILSTM_EXP_1.csv
340 | │       │   │   ├───📄 GRU_EXP_1.csv
341 | │       │   │   └───📄 LSTM_EXP_1.csv
342 | │       │   ├───📁 Experiment 2/
343 | │       │   │   ├───📄 bigru_exp_2_results.csv
344 | │       │   │   ├───📄 bilstm_exp_2_results.csv
345 | │       │   │   ├───📄 gru_exp_2_results.csv
346 | │       │   │   └───📄 lstm_exp_2_results.csv
347 | │       │   ├───📁 Experiment 3/
348 | │       │   │   ├───📄 bigur_exp_3_results.csv
349 | │       │   │   ├───📄 bilstm_exp_3_results.csv
350 | │       │   │   ├───📄 gru_exp_3_results.csv
351 | │       │   │   └───📄 lstm_exp_3_results.csv
352 | │       │   ├───📁 Experiment 4/
353 | │       │   │   ├───📄 33_34_36_BiGRU_results.csv
354 | │       │   │   ├───📄 33_34_36_BiLSTM_results.csv
355 | │       │   │   ├───📄 33_34_36_GRU_results.csv
356 | │       │   │   └───📄 33_34_36_LSTM_results.csv
357 | │       │   ├───📁 Experiment 5/
358 | │       │   │   ├───📄 38_39_40_BiGRU_results.csv
359 | │       │   │   ├───📄 38_39_40_BiLSTM_results.csv
360 | │       │   │   ├───📄 38_39_40_GRU_results.csv
361 | │       │   │   └───📄 38_39_40_LSTM_results.csv
362 | │       │   ├───📁 Experiment 6/
363 | │       │   │   ├───📄 41_42_43_44_BiGRU_results.csv
364 | │       │   │   ├───📄 41_42_43_44_BiLSTM_results.csv
365 | │       │   │   ├───📄 41_42_43_44_GRU_results.csv
366 | │       │   │   └───📄 41_42_43_44_LSTM_results.csv
367 | │       │   ├───📁 Experiment 7/
368 | │       │   │   ├───📄 7_BiGRU_results.csv
369 | │       │   │   ├───📄 7_BiLSTM_results.csv
370 | │       │   │   ├───📄 7_GRU_results.csv
371 | │       │   │   └───📄 7_LSTM_results.csv
372 | │       │   ├───📁 Experiment 8/
373 | │       │   │   ├───📄 8_BiGRU_results.csv
374 | │       │   │   ├───📄 8_BiLSTM_results.csv
375 | │       │   │   ├───📄 8_GRU_results.csv
376 | │       │   │   └───📄 8_LSTM_results.csv
377 | │       │   └───📁 Experiment 9/
378 | │       │       ├───📄 9_BiGRU_results.csv
379 | │       │       ├───📄 9_BiLSTM_results.csv
380 | │       │       ├───📄 9_GRU_results.csv
381 | │       │       └───📄 9_LSTM_results.csv
382 | │       ├───📁 models/
383 | │       │   ├───📁 Exp4/
384 | │       │   │   ├───📄 33_34_36_BiGRU.csv
385 | │       │   │   ├───📄 33_34_36_BiGRU.h5
386 | │       │   │   ├───📄 33_34_36_BiLSTM.csv
387 | │       │   │   ├───📄 33_34_36_BiLSTM.h5
388 | │       │   │   ├───📄 33_34_36_GRU.csv
389 | │       │   │   ├───📄 33_34_36_GRU.h5
390 | │       │   │   ├───📄 33_34_36_LSTM.csv
391 | │       │   │   └───📄 33_34_36_LSTM.h5
392 | │       │   ├───📁 Exp5/
393 | │       │   │   ├───📄 38_39_40_BiGRU.csv
394 | │       │   │   ├───📄 38_39_40_BiGRU.h5
395 | │       │   │   ├───📄 38_39_40_BiLSTM.csv
396 | │       │   │   ├───📄 38_39_40_BiLSTM.h5
397 | │       │   │   ├───📄 38_39_40_GRU.csv
398 | │       │   │   ├───📄 38_39_40_GRU.h5
399 | │       │   │   ├───📄 38_39_40_LSTM.csv
400 | │       │   │   └───📄 38_39_40_LSTM.h5
401 | │       │   ├───📁 Exp6/
402 | │       │   │   ├───📄 41_42_43_44_BiGRU.csv
403 | │       │   │   ├───📄 41_42_43_44_BiGRU.h5
404 | │       │   │   ├───📄 41_42_43_44_BiLSTM.csv
405 | │       │   │   ├───📄 41_42_43_44_BiLSTM.h5
406 | │       │   │   ├───📄 41_42_43_44_GRU.csv
407 | │       │   │   ├───📄 41_42_43_44_GRU.h5
408 | │       │   │   ├───📄 41_42_43_44_LSTM.csv
409 | │       │   │   └───📄 41_42_43_44_LSTM.h5
410 | │       │   ├───📁 Experiment 1/
411 | │       │   │   ├───📄 bigru_exp_1.csv
412 | │       │   │   ├───📄 bigru_exp_1.h5
413 | │       │   │   ├───📄 bilstm_exp_1.csv
414 | │       │   │   ├───📄 bilstm_exp_1.h5
415 | │       │   │   ├───📄 gru_exp_1.csv
416 | │       │   │   ├───📄 gru_exp_1.h5
417 | │       │   │   ├───📄 lstm_exp_1.csv
418 | │       │   │   └───📄 lstm_exp_1.h5
419 | │       │   ├───📁 Experiment 2/
420 | │       │   │   ├───📄 bigru_exp_2.csv
421 | │       │   │   ├───📄 bigru_exp_2.h5
422 | │       │   │   ├───📄 bilstm_exp_2.csv
423 | │       │   │   ├───📄 bilstm_exp_2.h5
424 | │       │   │   ├───📄 gru_exp_2.csv
425 | │       │   │   ├───📄 gru_exp_2.h5
426 | │       │   │   ├───📄 lstm_exp_2.csv
427 | │       │   │   └───📄 lstm_exp_2.h5
428 | │       │   ├───📁 Experiment 3/
429 | │       │   │   ├───📄 bigru_exp_3.csv
430 | │       │   │   ├───📄 bigur_exp_3.h5
431 | │       │   │   ├───📄 bilstm_exp_3.csv
432 | │       │   │   ├───📄 bilstm_exp_3.h5
433 | │       │   │   ├───📄 gru_exp_3.csv
434 | │       │   │   ├───📄 gru_exp_3.h5
435 | │       │   │   ├───📄 lstm_exp_3.csv
436 | │       │   │   └───📄 lstm_exp_3.h5
437 | │       │   ├───📁 Experiment 7/
438 | │       │   │   ├───📄 7_BiGRU.csv
439 | │       │   │   ├───📄 7_BiGRU.h5
440 | │       │   │   ├───📄 7_BiLSTM.csv
441 | │       │   │   ├───📄 7_BiLSTM.h5
442 | │       │   │   ├───📄 7_GRU.csv
443 | │       │   │   ├───📄 7_GRU.h5
444 | │       │   │   ├───📄 7_LSTM.csv
445 | │       │   │   └───📄 7_LSTM.h5
446 | │       │   ├───📁 Experiment8/
447 | │       │   │   ├───📄 8_BiGRU.csv
448 | │       │   │   ├───📄 8_BiGRU.h5
449 | │       │   │   ├───📄 8_BiLSTM.csv
450 | │       │   │   ├───📄 8_BiLSTM.h5
451 | │       │   │   ├───📄 8_GRU.csv
452 | │       │   │   ├───📄 8_GRU.h5
453 | │       │   │   ├───📄 8_LSTM.csv
454 | │       │   │   └───📄 8_LSTM.h5
455 | │       │   └───📁 Experiment9/
456 | │       │       ├───📄 9_BiGRU.csv
457 | │       │       ├───📄 9_BiLSTM.csv
458 | │       │       ├───📄 9_BiLSTM.h5
459 | │       │       ├───📄 9_GRU.csv
460 | │       │       ├───📄 9_GRU.h5
461 | │       │       ├───📄 9_LSTM.csv
462 | │       │       └───📄 9_LSTM.h5
463 | │       └───📁 results/
464 | │           ├───📁 Experiment 1/
465 | │           │   ├───📄 BIGRU_EXP_1.csv
466 | │           │   ├───📄 BILSTM_EXP_1.csv
467 | │           │   ├───📄 GRU_EXP_1.csv
468 | │           │   └───📄 LSTM_EXP_1.csv
469 | │           ├───📁 Experiment 2/
470 | │           │   ├───📄 bigru_exp_2_results.csv
471 | │           │   ├───📄 bilstm_exp_2_results.csv
472 | │           │   ├───📄 gru_exp_2_results.csv
473 | │           │   └───📄 lstm_exp_2_results.csv
474 | │           ├───📁 Experiment 3/
475 | │           │   ├───📄 bigur_exp_3_results.csv
476 | │           │   ├───📄 bilstm_exp_3_results.csv
477 | │           │   ├───📄 gru_exp_3_results.csv
478 | │           │   └───📄 lstm_exp_3_results.csv
479 | │           ├───📁 Experiment 4/
480 | │           │   ├───📄 33_34_36_BiGRU_results.csv
481 | │           │   ├───📄 33_34_36_BiLSTM_results.csv
482 | │           │   ├───📄 33_34_36_GRU_results.csv
483 | │           │   └───📄 33_34_36_LSTM_results.csv
484 | │           ├───📁 Experiment 5/
485 | │           │   ├───📄 38_39_40_BiGRU_results.csv
486 | │           │   ├───📄 38_39_40_BiLSTM_results.csv
487 | │           │   ├───📄 38_39_40_GRU_results.csv
488 | │           │   └───📄 38_39_40_LSTM_results.csv
489 | │           ├───📁 Experiment 6/
490 | │           │   ├───📄 41_42_43_44_BiGRU_results.csv
491 | │           │   ├───📄 41_42_43_44_BiLSTM_results.csv
492 | │           │   ├───📄 41_42_43_44_GRU_results.csv
493 | │           │   └───📄 41_42_43_44_LSTM_results.csv
494 | │           ├───📁 Experiment 7/
495 | │           │   ├───📄 7_BiGRU_results.csv
496 | │           │   ├───📄 7_BiLSTM_results.csv
497 | │           │   ├───📄 7_GRU_results.csv
498 | │           │   └───📄 7_LSTM_results.csv
499 | │           ├───📁 Experiment 8/
500 | │           │   ├───📄 8_BiGRU_results.csv
501 | │           │   ├───📄 8_BiLSTM_results.csv
502 | │           │   ├───📄 8_GRU_results.csv
503 | │           │   └───📄 8_LSTM_results.csv
504 | │           └───📁 Experiment 9/
505 | │               ├───📄 9_BiGRU_results.csv
506 | │               ├───📄 9_BiLSTM_results.csv
507 | │               ├───📄 9_GRU_results.csv
508 | │               └───📄 9_LSTM_results.csv
509 | └───📄 README.md
510 | ```
511 | 


--------------------------------------------------------------------------------
/notebooks/rnn-approaches.ipynb:
--------------------------------------------------------------------------------
1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import os\nimport numpy as np\nimport pandas as pd\nimport scipy\nfrom scipy.io import loadmat\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport shutil","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2022-04-17T09:33:24.564076Z","iopub.execute_input":"2022-04-17T09:33:24.564764Z","iopub.status.idle":"2022-04-17T09:33:24.569306Z","shell.execute_reply.started":"2022-04-17T09:33:24.564725Z","shell.execute_reply":"2022-04-17T09:33:24.568526Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"path = \"../input/battery-data-set\"\ndis = os.listdir(path)\ndis_mat = []\nbattery_grp = {\n    \n}\nfor i in dis :\n    filtered_list = list(filter(lambda x:x.split('.')[-1] == 'mat',os.listdir(f\"{path}/{i}\")))\n    battery_grp[i.split('BatteryAgingARC')[-1][1:]] = list(map(lambda x:x.split('.')[0],filtered_list))\n    dis_mat.extend(list(map(lambda x:f\"{path}/{i}/{x}\",filtered_list)))\n\nbattery_grp['5_6_7_18'] = battery_grp['FY08Q4']\ndel battery_grp['FY08Q4']\n\ndis_mat\nbattery_grp","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:24.575725Z","iopub.execute_input":"2022-04-17T09:33:24.576399Z","iopub.status.idle":"2022-04-17T09:33:24.592866Z","shell.execute_reply.started":"2022-04-17T09:33:24.57637Z","shell.execute_reply":"2022-04-17T09:33:24.592089Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from scipy.io import loadmat\n\nbs = [x.split('/')[-1].split('.')[0] for x in dis_mat]\nprint(bs)\n\nds = []\nfor b in dis_mat:\n    ds.append(loadmat(b))\n\ntypes = []\ntimes = []\nambient_temperatures = []\ndatas = []\n\nfor i in range(len(ds)):\n    x = ds[i][bs[i]][\"cycle\"][0][0][0]\n#     ambient_temperatures.append(x['ambient_temperature'])\n    ambient_temperatures.append(list(map(lambda y:y[0][0],x['ambient_temperature'])))\n    types.append(x['type'])\n    times.append(x['time'])\n    datas.append(x['data']) \n    \nbatteries = []\ncycles = []\nfor i in range(len(ds)):\n    batteries.append(bs[i])\n    cycles.append(datas[i].size)\n    \nbattery_cycle_df = pd.DataFrame({'Battery':batteries,'Cycle':cycles}).sort_values('Battery',ascending=True)\nbattery_cycle_df.drop_duplicates(inplace=True)\nbattery_cycle_df","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:24.596053Z","iopub.execute_input":"2022-04-17T09:33:24.596262Z","iopub.status.idle":"2022-04-17T09:33:27.222541Z","shell.execute_reply.started":"2022-04-17T09:33:24.596236Z","shell.execute_reply":"2022-04-17T09:33:27.22181Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"Cycles = {}\nparams = ['Voltage_measured','Current_measured','Temperature_measured',\n          'Current_load','Voltage_load','Time','Capacity',]\n\nfor i in range(len(bs)):\n    Cycles[bs[i]] = {}\n    Cycles[bs[i]]['count'] = 0 # This is true for battery B0005, 06, 07\n#     ambient_temperatures[i]\n    for param in params:\n        Cycles[bs[i]][param] = []\n        for j in range(datas[i].size):\n            if types[i][j] == 'discharge':\n                Cycles[bs[i]][param].append(datas[i][j][param][0][0][0])\n        \n    cap = []\n    amb_temp = []\n    for j in range(datas[i].size):\n        if types[i][j] == 'discharge':\n            cap.append(datas[i][j]['Capacity'][0][0][0])\n            amb_temp.append(ambient_temperatures[i][j])\n            \n    Cycles[bs[i]]['Capacity'] = np.array(cap)\n    Cycles[bs[i]]['ambient_temperatures'] = np.array(amb_temp)\nCycles = pd.DataFrame(Cycles)\n\nbattery_count = []\nfor b in battery_cycle_df['Battery'].values.tolist() :\n    Cycles[b]['count'] = len(Cycles[b]['Capacity'])\nCycles","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:27.224167Z","iopub.execute_input":"2022-04-17T09:33:27.224599Z","iopub.status.idle":"2022-04-17T09:33:36.569726Z","shell.execute_reply.started":"2022-04-17T09:33:27.224561Z","shell.execute_reply":"2022-04-17T09:33:36.569043Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"Cycles['B0052']","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:36.571186Z","iopub.execute_input":"2022-04-17T09:33:36.571639Z","iopub.status.idle":"2022-04-17T09:33:36.66157Z","shell.execute_reply.started":"2022-04-17T09:33:36.571602Z","shell.execute_reply":"2022-04-17T09:33:36.66084Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"Cycles.loc['count',:].reset_index()\nbattery_cycle_df\ncompare_df = Cycles.loc['count',:].reset_index().set_index('index').join(battery_cycle_df.set_index('Battery')).rename(columns={'count':'After','Cycle':'Before'})\ncompare_df[['Before','After']]","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:36.663447Z","iopub.execute_input":"2022-04-17T09:33:36.663671Z","iopub.status.idle":"2022-04-17T09:33:36.680817Z","shell.execute_reply.started":"2022-04-17T09:33:36.663639Z","shell.execute_reply":"2022-04-17T09:33:36.679846Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"pd.DataFrame(Cycles['B0047'])","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:36.682466Z","iopub.execute_input":"2022-04-17T09:33:36.682881Z","iopub.status.idle":"2022-04-17T09:33:37.169602Z","shell.execute_reply.started":"2022-04-17T09:33:36.68284Z","shell.execute_reply":"2022-04-17T09:33:37.16889Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df = pd.DataFrame({})\ncols = ['Voltage_measured','Current_measured','Temperature_measured',\n          'Current_load','Voltage_load','Time','Capacity','ambient_temperatures']\nfor col in cols :\n    df[col] = Cycles['B0047'][col]\nmax_len = np.max(df['Time'].apply(lambda x:len(x)).values)\ndf_x = df.drop(columns=['Capacity']).values\ndf_y = df['Capacity'].values\ndf","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.171212Z","iopub.execute_input":"2022-04-17T09:33:37.171656Z","iopub.status.idle":"2022-04-17T09:33:37.277257Z","shell.execute_reply.started":"2022-04-17T09:33:37.171616Z","shell.execute_reply":"2022-04-17T09:33:37.276251Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"batteries","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.278911Z","iopub.execute_input":"2022-04-17T09:33:37.279221Z","iopub.status.idle":"2022-04-17T09:33:37.287296Z","shell.execute_reply.started":"2022-04-17T09:33:37.279166Z","shell.execute_reply":"2022-04-17T09:33:37.286326Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df_all = pd.DataFrame({})\nmax_len = 0\n\nexperiment1 = [\"B0005\", \"B0006\", \"B0007\", \"B0018\"]\nexperiment2 = [\"B0025\", \"B0026\", \"B0027\", \"B0028\"]\nexperiment3 = [\"B0029\", \"B0030\", \"B0031\", \"B0032\"]\nexperiment4 = [\"B0033\", \"B0034\", \"B0036\"]\nexperiment5 = [\"B0038\", \"B0039\", \"B0040\"]\nexperiment6 = [\"B0041\", \"B0042\", \"B0043\", \"B0044\"]\nexperiment7 = [\"B0045\", \"B0046\", \"B0047\", \"B0048\"]\nexperiment8 = [\"B0049\", \"B0050\", \"B0051\", \"B0052\"]\nexperiment9 = [\"B0053\", \"B0054\", \"B0055\", \"B0056\"]\n\nexp_try_out = experiment1 # replace karke wo experiment wala dalo\n\nfor bat in exp_try_out:\n    df = pd.DataFrame({})\n    cols = ['Voltage_measured','Current_measured','Temperature_measured',\n              'Current_load','Voltage_load','Time','Capacity','ambient_temperatures']\n    for col in cols :\n        df[col] = Cycles[bat][col]\n    max_l = np.max(df['Time'].apply(lambda x:len(x)).values)\n    max_len = max(max_l,max_len)\n    df_all = pd.concat([df_all,df],ignore_index=True)\n    \ndf=df_all.reset_index(drop=True)\ndf","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.289253Z","iopub.execute_input":"2022-04-17T09:33:37.289981Z","iopub.status.idle":"2022-04-17T09:33:37.486579Z","shell.execute_reply.started":"2022-04-17T09:33:37.289908Z","shell.execute_reply":"2022-04-17T09:33:37.485512Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"flag = False\nfor i,j in enumerate(df['Capacity']) :\n    try :\n        if len(j) :\n            df['Capacity'][i] = j[0] \n        else :\n            df['Capacity'][i] = 0\n        flag = True\n    except :\n        pass\nprint(flag)","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.491962Z","iopub.execute_input":"2022-04-17T09:33:37.492788Z","iopub.status.idle":"2022-04-17T09:33:37.505542Z","shell.execute_reply.started":"2022-04-17T09:33:37.492717Z","shell.execute_reply":"2022-04-17T09:33:37.503997Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# df[1719:1780]","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.509233Z","iopub.execute_input":"2022-04-17T09:33:37.509532Z","iopub.status.idle":"2022-04-17T09:33:37.517206Z","shell.execute_reply.started":"2022-04-17T09:33:37.509504Z","shell.execute_reply":"2022-04-17T09:33:37.51624Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.shape,max_len","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.518786Z","iopub.execute_input":"2022-04-17T09:33:37.519264Z","iopub.status.idle":"2022-04-17T09:33:37.52605Z","shell.execute_reply.started":"2022-04-17T09:33:37.519227Z","shell.execute_reply":"2022-04-17T09:33:37.525081Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def to_padded_numpy(l, shape):\n    padded_array = np.zeros(shape)\n#     print(l)\n    padded_array[:len(l)] = l\n    return padded_array\n\ndf_x = df.drop(columns=['Capacity','ambient_temperatures']).values\ndf_y = df['Capacity'].values\nambient_temperatures = df['ambient_temperatures'].values\nn,m = df_x.shape[0], df_x.shape[1]\ntemp2 = np.zeros((n,m,max_len))\nfor i in range(n) :\n    for j in range(m) :\n        temp2[i][j] = to_padded_numpy(df_x[i][j],max_len)\n\ndf_x = temp2.reshape(n,m*max_len)\ndf_x = np.concatenate((df_x, ambient_temperatures.reshape(ambient_temperatures.shape[0],1)), axis=1)\ndf_x.shape","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.527324Z","iopub.execute_input":"2022-04-17T09:33:37.527752Z","iopub.status.idle":"2022-04-17T09:33:37.549783Z","shell.execute_reply.started":"2022-04-17T09:33:37.527709Z","shell.execute_reply":"2022-04-17T09:33:37.549064Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"split = 20\ntrain_size = df_x.shape[0]*(100-split)//100\ntest_size = df_x.shape[0]-train_size\n\ntrain_x,test_x = df_x[:train_size],df_x[train_size:]\ntrain_y,test_y = df_y[:train_size],df_y[train_size:]\ntrain_x.shape,test_x.shape,train_y.shape,test_y.shape","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.551005Z","iopub.execute_input":"2022-04-17T09:33:37.551459Z","iopub.status.idle":"2022-04-17T09:33:37.559877Z","shell.execute_reply.started":"2022-04-17T09:33:37.551403Z","shell.execute_reply":"2022-04-17T09:33:37.559202Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.linear_model import LinearRegression\n\nlr = LinearRegression()\nlr.fit(train_x,train_y)","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.561335Z","iopub.execute_input":"2022-04-17T09:33:37.561766Z","iopub.status.idle":"2022-04-17T09:33:37.628768Z","shell.execute_reply.started":"2022-04-17T09:33:37.561733Z","shell.execute_reply":"2022-04-17T09:33:37.62809Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error\nfrom sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score\n\ndef get_scores(y_true,y_pred) :\n    return {\n        'max_error':max_error(y_true,y_pred),\n        'mean_absolute_error':mean_absolute_error(y_true,y_pred),\n        'mean_squared_error':mean_squared_error(y_true,y_pred),\n        'mean_squared_log_error':mean_squared_log_error(y_true,y_pred),\n        'median_absolute_error':median_absolute_error(y_true,y_pred),\n        'mean_absolute_percentage_error':mean_absolute_percentage_error(y_true,y_pred),\n        'r2_score':r2_score(y_true,y_pred)\n    }\n\ndef get_preds(model,data_x) :\n    return model.predict(data_x).clip(min=0)\n\nprint(get_scores(train_y,get_preds(lr,train_x)))\nprint(get_scores(test_y,get_preds(lr,test_x)))\n\n# for i,j in enumerate(lr.predict(train_x)) :\n#     print(train_y[i],j)","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:33:37.630281Z","iopub.execute_input":"2022-04-17T09:33:37.63082Z","iopub.status.idle":"2022-04-17T09:33:37.659326Z","shell.execute_reply.started":"2022-04-17T09:33:37.630754Z","shell.execute_reply":"2022-04-17T09:33:37.65816Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def to_padded_numpy(l, shape):\n    padded_array = np.zeros(shape)\n#     print(l)\n    padded_array[:len(l)] = l\n    return padded_array\n\ndf_x = df.drop(columns=['Capacity','ambient_temperatures']).values\ndf_y = df['Capacity'].values\nambient_temperatures = df['ambient_temperatures'].values\nn,m = df_x.shape[0], df_x.shape[1]\ntemp2 = np.zeros((n,m,max_len))\nfor i in range(n) :\n    for j in range(m) :\n        temp2[i][j] = to_padded_numpy(df_x[i][j],max_len)\n\ndf_x = temp2\n# df_x = temp2.reshape(n,m*max_len)\n# df_x = np.concatenate((df_x, ambient_temperatures.reshape(ambient_temperatures.shape[0],1)), axis=1)\ndf_x.shape\n\ntest_split = 10\nval_split = 10\ntrain_size = df_x.shape[0]*(100-(test_split+val_split))//100\nval_size = df_x.shape[0]*(val_split)//100\ntest_size = df_x.shape[0]-train_size-val_size\n\n# train_x,test_x = df_x[:train_size],df_x[train_size:]\n# train_y,test_y = df_y[:train_size],df_y[train_size:]\n\n# train_x,train_y = df_x[:train_size],df_y[:train_size]\n# val_x,val_y = df_x[train_size:train_size+val_size],df_y[train_size:train_size+val_size]\n# test_x,test_y = df_x[train_size+val_size:],df_y[train_size+val_size:]\n\n# train_x=np.asarray(train_x).astype('float32')\n# test_x=np.asarray(test_x).astype('float32')\n# train_y=np.asarray(train_y).astype('float32')\n# test_y=np.asarray(test_y).astype('float32')\n\nfrom sklearn.model_selection import train_test_split\ntrain_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)\ntest_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)\n\ntemp = train_x[0][0][0]\ntrain_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)\ntest_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)\n\n# ((508, 6, 371), (65, 6, 371), (508,), (65,))\ntrain_x.shape,test_x.shape,train_y.shape,test_y.shape","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:34:33.235984Z","iopub.execute_input":"2022-04-17T09:34:33.236263Z","iopub.status.idle":"2022-04-17T09:34:33.26302Z","shell.execute_reply.started":"2022-04-17T09:34:33.236231Z","shell.execute_reply":"2022-04-17T09:34:33.262375Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"train_x[0][0][0]\nassert train_x[0][0][0] == temp","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:34:34.068922Z","iopub.execute_input":"2022-04-17T09:34:34.069199Z","iopub.status.idle":"2022-04-17T09:34:34.074342Z","shell.execute_reply.started":"2022-04-17T09:34:34.069151Z","shell.execute_reply":"2022-04-17T09:34:34.073527Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"for i,j in enumerate(train_y) :\n    try :\n        len(j)\n        print(i)\n#         break\n    except :\n#         print(e)\n#         break\n        temp = 1","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:34:49.12225Z","iopub.execute_input":"2022-04-17T09:34:49.122512Z","iopub.status.idle":"2022-04-17T09:34:49.126625Z","shell.execute_reply.started":"2022-04-17T09:34:49.122484Z","shell.execute_reply":"2022-04-17T09:34:49.125974Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import numpy as np\nimport pandas as pd\nimport scipy.io\nimport math\nimport os\nimport ntpath\nimport sys\nimport logging\nimport time\nimport sys\nimport random\n\nfrom importlib import reload\nimport plotly.graph_objects as go\n\nimport tensorflow as tf\nfrom tensorflow import keras\nfrom tensorflow.keras import layers, regularizers\n\nfrom tensorflow.keras.models import Sequential\nfrom tensorflow.keras.layers import Dense, Dropout, Activation\nfrom tensorflow.keras.optimizers import SGD, Adam\nfrom tensorflow.keras.layers import LSTM, Embedding, RepeatVector, TimeDistributed, Masking, Bidirectional\nfrom tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LambdaCallback\n\nIS_TRAINING = True\n\n# Original Parameters\n# LEARNING_RATE = 0.000003\n# REGULARIZATION = 0.0002\n# NUM_EPOCHS = 500\n# BATCH_SIZE = 32\n\nLEARNING_RATE = 0.0007\nREGULARIZATION = 0.0002\nNUM_EPOCHS = 200\nBATCH_SIZE = 64\nEARLY_STOPPING=25\nSTEP_LR = LEARNING_RATE/100\n\nif IS_TRAINING:\n    EXPERIMENT = \"lstm_rul_nasa_randomized\"\n\n    experiment_name = time.strftime(\"%Y-%m-%d-%H-%M-%S\") + '_' + EXPERIMENT\n    print(experiment_name)\n\n    # Model definition\n\n    opt = tf.keras.optimizers.Adam(lr=LEARNING_RATE)\n\n    model = Sequential()\n    model.add(Masking(input_shape=(train_x.shape[1], train_x.shape[2])))\n#     model.add(LSTM(256, activation='selu',return_sequences=True,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(LSTM(256, activation='selu',return_sequences=True,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(LSTM(256, activation='selu',return_sequences=True,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n#     model.add(LSTM(128, activation='selu',return_sequences=True,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(LSTM(128, activation='selu',return_sequences=True,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(LSTM(128, activation='selu',return_sequences=True,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n#     model.add(LSTM(64, activation='selu',return_sequences=True,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(LSTM(64, activation='selu',return_sequences=True,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(LSTM(64, activation='selu',return_sequences=False,kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(Dense(64, activation='selu', kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(Dense(64, activation='selu', kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(Dense(32, activation='selu', kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(Dense(32, activation='selu', kernel_regularizer=regularizers.l2(REGULARIZATION)))\n    model.add(Dense(1, activation='linear'))\n    model.summary()\n    \n    model.compile(optimizer=opt, loss='huber', metrics=['mse', 'mae', 'mape', tf.keras.metrics.RootMeanSquaredError(name='rmse')])\n    \ndata_path=\"./model\"\nif IS_TRAINING:\n    def scheduler(epoch, lr):\n        if epoch < 10:\n            return lr+STEP_LR\n        elif epoch%5 == 0:\n            return lr*0.99\n        return lr\n    \n    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=EARLY_STOPPING)\n    lr_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)    \n    \n    history = model.fit(train_x, train_y,epochs=NUM_EPOCHS, batch_size=BATCH_SIZE,verbose=1, validation_split=0.1,callbacks=[early_stopping,lr_scheduler])\n    \n    model.save(data_path + 'results/trained_model/%s.h5' % experiment_name)\n\n    hist_df = pd.DataFrame(history.history)\n    hist_csv_file = data_path + 'results/trained_model/%s_history.csv' % experiment_name\n    with open(hist_csv_file, mode='w') as f:\n        hist_df.to_csv(f)\n    history = history.history","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:34:50.187429Z","iopub.execute_input":"2022-04-17T09:34:50.187881Z","iopub.status.idle":"2022-04-17T09:36:19.528807Z","shell.execute_reply.started":"2022-04-17T09:34:50.187845Z","shell.execute_reply":"2022-04-17T09:36:19.528086Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"model.evaluate(train_x, train_y)\nmodel.evaluate(val_x, val_y)\nmodel.evaluate(test_x, test_y)","metadata":{"execution":{"iopub.status.busy":"2022-04-17T09:36:19.530344Z","iopub.execute_input":"2022-04-17T09:36:19.530595Z","iopub.status.idle":"2022-04-17T09:36:19.724427Z","shell.execute_reply.started":"2022-04-17T09:36:19.530559Z","shell.execute_reply":"2022-04-17T09:36:19.723782Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------