├── .gitignore ├── README.md ├── transformer_helper.py ├── rolling_and_plot_tf.py └── transform_notebook.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | .DS_Store 3 | __pycache__/ 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Transformers + TensorFlow and Pandas for SOC Estimation 2 | 3 | **The Testing branch is the most up to date** 4 | 5 | Repo with the Decoder implemented: [Attar's Github Repo](https://github.com/att-ar/transform_decode_soc) 6 | 7 | Building a transformer neural network using TensorFlow and Transformers in Python with the goal of prediciting Li-ion State of Charge based on real time voltage, current and delta time data. 8 | 9 | This transformer is composed of only the encoder layer, and it uses Batch Normalization instead of the Layer Normalization found in NLP. 10 | This was done because literature said these two changes proved significantly more effective than the NLP application of transformers. 11 | 12 | The transformers' input will be voltage, current, delta time and previous SOC points in a batch of windowed data of shape:
13 | ```(G.batch_size, G.window_size, G.num_features)``` 14 | 15 | The voltage, current and soc data will be from time: $$t - \text{windowsize} \rightarrow t$$
16 | The output should be the SOC prediction at time $t + 1$ for each batch, the output shape should be `(G.batch_size, 1)` 17 | -------------------------------------------------------------------------------- /transformer_helper.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from os import environ 4 | environ["TF_CPP_MIN_LOG_LEVEL"] = "2" 5 | 6 | 7 | def get_angles(pos, k, d: int): 8 | """ 9 | Get angles to be used in the positional encoding vectors 10 | 11 | Arguments: 12 | pos -- Column vector containing the positions [[0], [1], ...,[N-1]] 13 | k -- Row vector containing the dimension span [[0, 1, 2, ..., d-1]] 14 | d -- Encoding size 15 | 16 | Returns: 17 | angles -- (pos, d) np.array 18 | """ 19 | # Get i from dimension span k 20 | i = k // 2 21 | # Calculate the angles using pos, i and d 22 | angles = pos / (10000 ** (2 * i / d)) 23 | 24 | return angles 25 | 26 | 27 | def positional_encoding(positions: int, d: int): 28 | """ 29 | Precomputes a matrix with all the positional encodings 30 | 31 | Arguments: 32 | positions - Maximum number of positions to be encoded 33 | d - Encoding size 34 | 35 | Returns: 36 | pos_encoding - (1, position, d_model) matrix with the positional encodings 37 | """ 38 | angle_rads = get_angles(np.arange(positions)[:, np.newaxis], 39 | np.arange(d)[np.newaxis, :], 40 | d) 41 | 42 | # apply sin to even indices 2i 43 | angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2]) 44 | 45 | # apply cos to odd indices 2i+1 46 | angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2]) 47 | 48 | pos_encoding = angle_rads[np.newaxis, :, :].reshape(1, positions, d) 49 | 50 | # casts tensor to float dtype 51 | return tf.cast(pos_encoding, dtype=tf.float32) 52 | 53 | 54 | def create_look_ahead_mask(sequence_length): 55 | """ 56 | Returns an upper triangular matrix filled with ones. 57 | Lets the training model check if it got predictions right by having access to the actual output 58 | 59 | Arguments: 60 | sequence_length -- matrix size (sequence length is the number of time steps per input 61 | input.shape = [batch_size, sequence_length, num_features]) 62 | 63 | Returns: 64 | mask -- (size, size) tensor 65 | 66 | >>>create_look_ahead_mask(5) 67 | 73 | """ 74 | mask = tf.linalg.band_part( 75 | tf.ones((1, sequence_length, sequence_length)), -1, 0) 76 | return mask.squeeze() 77 | -------------------------------------------------------------------------------- /rolling_and_plot_tf.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | import plotly.express as px 5 | import plotly.graph_objects as go 6 | from plotly.subplots import make_subplots 7 | 8 | from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler 9 | from sklearn.model_selection import train_test_split 10 | 11 | import torch 12 | 13 | 14 | def helper(value, j): 15 | ''' 16 | helper function for data_plot() 17 | ''' 18 | if value == "None": 19 | return None 20 | elif type(value) == list and j < len(value): 21 | return value[j] 22 | else: # not a list so only one value 23 | if j == 0: 24 | return value 25 | else: 26 | return None 27 | 28 | 29 | def data_plot(data=None, x=None, y=None, 30 | x_title=None, y_title=None, title=None, 31 | **kwargs): 32 | ''' 33 | list of pandas.DataFrame, list of str, list of str, list of str, kwargs -> plotly plot object 34 | 35 | Precondition: If an argument has multiple objects, they must be in a list (can have nested lists). 36 | The order of the arguments must be in the same order as the DataFrames. 37 | There must be the same number of x columns as y columns passed. 38 | 39 | ex) ocv_plot( 40 | data = [df1, df2], 41 | x = [ "SOC", "SOC-Dis" ], 42 | y = [ "OCV", "OCV-Dis" ], 43 | mode = ["lines+markers", "markers"], 44 | color = ["mintcream", "darkorchid"] 45 | ) 46 | 47 | This function takes one or more DataFrames, columns from the respective DataFrames to be plot on x and y-axes. 48 | It also takes the mode of plotting desired for the DataFrames and optional keyword arguments. 49 | It outputs a plotly plot of the data from the columns that were passed. 50 | 51 | Parameters: 52 | `data` DataFrame or list of DataFrames 53 | 54 | `x` list of columns or nested lists of columns 55 | example of each option in order: 56 | x = ["SOC-Dis"] 57 | x = ["SOC-Dis","SOC-Chg","SOC"] 58 | x = [ ["Test Time (sec)","Step Time (sec)"], "Step"] 59 | Test Time and Step Time are both from the same DataFrame; there must be two y columns as well. 60 | 61 | `y` list of columns or nested lists of columns 62 | View `x` for help 63 | 64 | `x_title` str 65 | the name of the x_axis to be displayed 66 | else None 67 | 68 | `y_title` str 69 | the name of the y_axis to be displayed 70 | else None 71 | 72 | `title` str 73 | The title of the Plot 74 | default None will not add a title 75 | 76 | **kwargs: (alphabetical order) 77 | 78 | `color` str, list of str, nested lists of str: 79 | same principle as above arguments, 80 | assigns the color of the individual data lines. 81 | if no value is passed for a plot, plotly will do it automatically. 82 | 83 | The 'color' property is a color and may be specified as: 84 | - A hex string (e.g. '#ff0000') 85 | - An rgb/rgba string (e.g. 'rgb(255,0,0)') 86 | - An hsl/hsla string (e.g. 'hsl(0,100%,50%)') 87 | - An hsv/hsva string (e.g. 'hsv(0,100%,100%)') 88 | - A named CSS color: 89 | aliceblue, antiquewhite, aqua, aquamarine, azure, 90 | beige, bisque, black, blanchedalmond, blue, 91 | blueviolet, brown, burlywood, cadetblue, 92 | chartreuse, chocolate, coral, cornflowerblue, 93 | cornsilk, crimson, cyan, darkblue, darkcyan, 94 | darkgoldenrod, darkgray, darkgrey, darkgreen, 95 | darkkhaki, darkmagenta, darkolivegreen, darkorange, 96 | darkorchid, darkred, darksalmon, darkseagreen, 97 | darkslateblue, darkslategray, darkslategrey, 98 | darkturquoise, darkviolet, deeppink, deepskyblue, 99 | dimgray, dimgrey, dodgerblue, firebrick, 100 | floralwhite, forestgreen, fuchsia, gainsboro, 101 | ghostwhite, gold, goldenrod, gray, grey, green, 102 | greenyellow, honeydew, hotpink, indianred, indigo, 103 | ivory, khaki, lavender, lavenderblush, lawngreen, 104 | lemonchiffon, lightblue, lightcoral, lightcyan, 105 | lightgoldenrodyellow, lightgray, lightgrey, 106 | lightgreen, lightpink, lightsalmon, lightseagreen, 107 | lightskyblue, lightslategray, lightslategrey, 108 | lightsteelblue, lightyellow, lime, limegreen, 109 | linen, magenta, maroon, mediumaquamarine, 110 | mediumblue, mediumorchid, mediumpurple, 111 | mediumseagreen, mediumslateblue, mediumspringgreen, 112 | mediumturquoise, mediumvioletred, midnightblue, 113 | mintcream, mistyrose, moccasin, navajowhite, navy, 114 | oldlace, olive, olivedrab, orange, orangered, 115 | orchid, palegoldenrod, palegreen, paleturquoise, 116 | palevioletred, papayawhip, peachpuff, peru, pink, 117 | plum, powderblue, purple, red, rosybrown, 118 | royalblue, rebeccapurple, saddlebrown, salmon, 119 | sandybrown, seagreen, seashell, sienna, silver, 120 | skyblue, slateblue, slategray, slategrey, snow, 121 | springgreen, steelblue, tan, teal, thistle, tomato, 122 | turquoise, violet, wheat, white, whitesmoke, 123 | yellow, yellowgreen 124 | - A number that will be interpreted as a color 125 | according to scatter.marker.colorscale 126 | - A list or array of any of the above 127 | 128 | `mode` str, list of str, nested lists of str: 129 | default None: will set mode = "lines" 130 | Note: str must be one of "lines", "markers", "lines+markers" which are self-explanatory 131 | example of each option in order: 132 | mode = "markers" 133 | mode = ["lines+markers", "lines"] 134 | mode = ["lines+markers",["lines","lines"]] 135 | 136 | `name` str, list of str, nested list of strs 137 | same principle as above arguments 138 | assigns the names of the individual data lines to be displayed in the legend 139 | 140 | `size` int/float, list of int/float or nested lists of int/float 141 | same principle as above arguments 142 | assigns the size of the individual data lines 143 | if no value is passed, plotly will do it automatically. 144 | 145 | 146 | >>>df1 = generate_ocv_pts("JMFM_12_SOC_OCV_Test_220411.txt", to_csv = False) 147 | >>>df2 = ocv_estimate(df1, to_csv = False) 148 | >>>data_plot(data = [df1,df2], 149 | x=[ ["SOC-Chg","SOC-Dis"],"SOC" ], 150 | y = [ ["OCV-Chg","OCV-Dis"], "OCV" ], 151 | title = "JMFM-12 OCV vs. SOC Curve", 152 | x_title = "SOC (%)", 153 | y_title = "OCV (V)", 154 | mode = [ ["markers","markers"] ], 155 | color = [ ["violet","lightcoral"], "darkorchid"], 156 | name = [ ["Charge-OCV","Discharge-OCV"], "OCV"], 157 | size = [[4.5,4.5]] 158 | ) 159 | figure... 160 | ''' 161 | if type(data) == list and not pd.Series( 162 | pd.Series([len(x), len(y)]) == len(data) 163 | ).all(): 164 | return '''Error: x and y columns passed much match the number of DataFrames passed 165 | Use nested lists for multiple columns from the same DataFrame 166 | ''' 167 | 168 | elif type(data) != list and not pd.Series(pd.Series([len(x), len(y)]) == 1).all(): 169 | return '''Error: x and y columns passed much match the number of DataFrames passed 170 | Use nested lists for multiple columns from the same DataFrame 171 | ''' 172 | 173 | if "mode" in kwargs.keys(): 174 | if type(kwargs["mode"]) == list and len(kwargs["mode"]) > len(data): 175 | return "Error: passed more modes than DataFrames" 176 | 177 | if "color" in kwargs.keys(): 178 | if type(kwargs["color"]) == list and len(kwargs["color"]) > len(data): 179 | return "Error: passed more colors than DataFrames" 180 | 181 | if "name" in kwargs.keys(): 182 | if type(kwargs["name"]) == list and len(kwargs["name"]) > len(data): 183 | return "Error: passed more names than DataFrames" 184 | 185 | if "size" in kwargs.keys(): 186 | if type(kwargs["size"]) == list and len(kwargs["size"]) > len(data): 187 | return "Error: passed more sizes than DataFrames" 188 | 189 | frame = pd.DataFrame(data={"x": x, "y": y}) 190 | 191 | for i in ["color", "mode", "name", "size"]: 192 | frame = frame.join( 193 | pd.Series(kwargs.get(i), name=i, dtype="object"), 194 | how="outer") 195 | 196 | frame.fillna("None", inplace=True) 197 | 198 | figure = make_subplots( 199 | x_title=x_title, y_title=y_title, subplot_titles=[title]) 200 | 201 | for i in frame.index: 202 | if type(data) == list: 203 | use_data = data[i] 204 | else: 205 | use_data = data 206 | 207 | if type(frame["x"][i]) == list: # y[i] must be a list 208 | for j in range(len(x[i])): 209 | use_x = frame.loc[i, "x"][j] 210 | use_y = frame.loc[i, "y"][j] 211 | 212 | use_color = helper(frame.loc[i, "color"], j) 213 | use_mode = helper(frame.loc[i, "mode"], j) 214 | use_name = helper(frame.loc[i, "name"], j) 215 | use_size = helper(frame.loc[i, "size"], j) 216 | 217 | figure.add_trace( 218 | go.Scatter( 219 | x=use_data[use_x], y=use_data[use_y], 220 | mode=use_mode, marker={ 221 | "size": use_size, "color": use_color}, 222 | name=use_name) 223 | ) 224 | else: # x[i] and y[i] are not lists 225 | use_x = frame.loc[i, "x"] 226 | use_y = frame.loc[i, "y"] 227 | use_color = helper(frame.loc[i, "color"], 0) 228 | use_mode = helper(frame.loc[i, "mode"], 0) 229 | use_name = helper(frame.loc[i, "name"], 0) 230 | use_size = helper(frame.loc[i, "size"], 0) 231 | # zero is just a placholder 232 | 233 | figure.add_trace( 234 | go.Scatter( 235 | x=use_data[use_x], y=use_data[use_y], 236 | mode=use_mode, marker={ 237 | "size": use_size, "color": use_color}, 238 | name=use_name) 239 | ) 240 | return figure 241 | 242 | 243 | # ------------------------------------------------------- 244 | 245 | def normalize(data: pd.DataFrame, capacity: float): 246 | ''' 247 | pd.DataFrame -> pd.DataFrame 248 | Precondition: "delta t" is removed from the DataFrame 249 | 250 | Normalizes the data by applying sklearn.preprocessing functions 251 | Voltage is scaled between 0 and 1; 252 | Current is scaled to become C-rate 253 | SOC is scaled between 0 and 1 (just divided by 100) 254 | 255 | Output: 256 | normalized pd.DataFrame 257 | ''' 258 | data["current"] /= capacity 259 | data["voltage"] = MinMaxScaler((0, 1)).fit_transform( 260 | data["voltage"].values.reshape(-1, 1)) 261 | data["soc"] /= 100. 262 | 263 | print(f'''Scaled stats: 264 | 265 | variance:\n{data.var(axis = 0)}, 266 | 267 | mean:\n{data.mean(axis=0)}''') 268 | 269 | return data 270 | 271 | # ------------------------------------------------------- 272 | 273 | 274 | def rolling_split_trial(df, window_size): 275 | ''' 276 | implements rolling window sectioning 277 | There are four input features: delta_t, V, I at time t, and SOC at time t-1 278 | Prediction at time t uses the features given 279 | ''' 280 | if "delta t" in df.columns: 281 | col = ["delta t", "current", "voltage"] 282 | else: 283 | col = ["current", "voltage"] 284 | df_x = (df[col].iloc[1:].reset_index(drop=True) # staggered right by one 285 | .join( 286 | df["soc"].iloc[:-1].reset_index(drop=True), # staggered left by one 287 | how="outer" 288 | )) 289 | df_x = [window.values 290 | for window 291 | in df_x.rolling(window=window_size, 292 | min_periods=window_size - 2, 293 | method="table" 294 | )][window_size:] 295 | 296 | # staggered right by one 297 | df_y = df["soc"].iloc[window_size + 1:].values[:, np.newaxis] 298 | 299 | return np.array(df_x, dtype="float32"), np.array(df_y, dtype="float32") 300 | 301 | 302 | def rolling_split(df, window_size, test_size=0.1, train=True): 303 | ''' 304 | Precondition: "delta t" is not in the columns 305 | implements rolling window sectioning 306 | Four input features: delta_t, I, V, SOC all at time t-1 307 | The prediction of SOC at time t uses no other information 308 | 309 | Returns a shuffled and windowed dataset using 310 | sklearn.model_selection.train_test_split 311 | 312 | Parameters: 313 | `window_size` int 314 | the number of consecutive data points needed to form a data window 315 | `test_size` float in between 0 and 0.2 exclusive 316 | the ratio of data points allocated to the dev/test set 317 | Should never exceed 0.2 318 | ''' 319 | assert "delta t" not in df.columns 320 | assert isinstance(test_size, float) 321 | assert test_size > 0 and test_size <= 0.2 322 | 323 | df_x = [window.values 324 | for window 325 | # staggered left by one 326 | in df[["current", "voltage", "soc"]].iloc[:-1] 327 | .rolling(window=window_size, 328 | min_periods=window_size - 2, 329 | method="table" 330 | )][window_size:] 331 | 332 | df_y = df["soc"].iloc[window_size + 1:].values 333 | 334 | if train: 335 | return train_test_split(np.array(df_x, dtype="float32"), 336 | np.array(df_y, dtype="float32")[:, np.newaxis], 337 | test_size=test_size, 338 | shuffle=True) 339 | else: 340 | return (np.array(df_x, dtype="float32"), 341 | np.array(df_y, dtype="float32")[:, np.newaxis]) 342 | 343 | # ---------------------------------------------------------------- 344 | # Validation 345 | 346 | def validate(model, dataloader, dev=True): 347 | ''' 348 | tensorflow model, tensorflow DataSet -> pd.DataFrame, prints 2 floats and a Plotly plot 349 | 350 | !! Tensorflow version, not the original PyTorch version 351 | This function runs a td.data.Dataset through the model and prints the max and min 352 | predicted SOC, it also prints a Plotly plot of the predictions versus the labels 353 | This function outputs a pandas.DataFrame of the predictions with their corresponding labels. 354 | 355 | Parameters: 356 | `dev` bool 357 | whether or not it's the developmental set 358 | use False if it's the entire dataset 359 | ''' 360 | 361 | aggregate = model.predict(dataloader, verbose = 1) 362 | print("Max pred: ", aggregate.max(), "\tMin pred: ", aggregate.min()) 363 | 364 | np_labels = np.concatenate([label.numpy() for _, label in dataloader][ 365 | :len(aggregate)], axis = 0) 366 | 367 | visualize = pd.DataFrame(data={"pred": aggregate.squeeze(), 368 | "labels": np_labels.squeeze()}) 369 | 370 | if dev: # if it is the dev set, the values need to be sorted by value 371 | visualize.sort_values("labels", inplace=True) 372 | # if it is the entire dataset, it is already sorted chronologically which is more important 373 | 374 | visualize.reset_index(drop=True) 375 | 376 | visualize["point"] = list(range(1, len(visualize) + 1)) 377 | print("Percent Accuracy:", np.mean(100.0 - abs((aggregate - np_labels))/(np_labels+0.01) * 100)) 378 | 379 | fig = data_plot(data=visualize, 380 | x=[["point", "point"]], 381 | y=[["pred", "labels"]], 382 | x_title="Data Point", 383 | y_title="SOC", 384 | title="Predicted vs Actual SOC", 385 | name=[["predictions", "labels"]], 386 | mode=[["lines", "lines"]], 387 | color=[["red", "yellow"]] 388 | ) 389 | fig.show() 390 | return visualize 391 | -------------------------------------------------------------------------------- /transform_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "h_Iw1qCBlT-z" 7 | }, 8 | "source": [ 9 | "\n", 10 | "## Import" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "id": "cqXowf9MlT-1" 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "from google.colab import drive\n", 22 | "drive.mount('/content/drive')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "id": "eh1JdQmwlT-3" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "!cp /content/drive/MyDrive/transformer_soc/rolling_and_plot_tf.py .\n", 34 | "!cp /content/drive/MyDrive/transformer_soc/sim_data.csv .\n", 35 | "!cp /content/drive/MyDrive/transformer_soc/transformer_helper.py ." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "id": "_OpwqWL2QH5G" 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "# from os import environ\n", 47 | "# environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"1\"\n", 48 | "# removes tensorflow warnings triggered because of Tensorflow incompatibility with my Apple M1 chip.\n", 49 | "# ignore this when using a non Apple Silicon device, ie. Google Colab or the likes.\n", 50 | "\n", 51 | "import tensorflow as tf\n", 52 | "from tensorflow.keras.layers import MultiHeadAttention, Dense, Input, Dropout, BatchNormalization\n", 53 | "import tensorflow.keras.backend as K\n", 54 | "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n", 55 | "\n", 56 | "from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler\n", 57 | "from sklearn.model_selection import train_test_split\n", 58 | "\n", 59 | "from dataclasses import dataclass" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "id": "Z6BKLL9B3vIZ" 66 | }, 67 | "source": [ 68 | "Cells Below is **only for TPUs**\n", 69 | "\n", 70 | "---\n", 71 | "\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "id": "WMA_zsLY3x6O" 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "# import os\n", 83 | "# resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')\n", 84 | "# tf.config.experimental_connect_to_cluster(resolver)\n", 85 | "# # This is the TPU initialization code that has to be at the beginning.\n", 86 | "# tf.tpu.experimental.initialize_tpu_system(resolver)\n", 87 | "# print(\"All devices: \", tf.config.list_logical_devices('TPU'))\n", 88 | "\n", 89 | "# strategy = tf.distribute.TPUStrategy(resolver)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": { 95 | "id": "0K8Ni6bD4Mge" 96 | }, 97 | "source": [ 98 | "\n", 99 | "\n", 100 | "---\n", 101 | "\n" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "id": "_DOA-JbhlT-4" 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "import numpy as np\n", 113 | "import pandas as pd\n", 114 | "\n", 115 | "!pip install jupyterplot\n", 116 | "from jupyterplot import ProgressPlot as PP\n", 117 | "\n", 118 | "from transformer_helper import *\n", 119 | "from rolling_and_plot_tf import data_plot, rolling_split, normalize, validate\n", 120 | "\n", 121 | "%reload_ext autoreload\n", 122 | "%autoreload 2" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": { 128 | "id": "pvorie1ElT-5" 129 | }, 130 | "source": [ 131 | "Will have to figure out how to set device to cuda in TensorFlow" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": { 137 | "id": "RUteRx9dlT-5" 138 | }, 139 | "source": [ 140 | "## Table of Contents\n", 141 | "\n", 142 | "- [Import](#0)\n", 143 | "- [JupyterPlot](#jup)\n", 144 | "- [Preprocessing](#win)\n", 145 | "- [Encoder](#enc)\n", 146 | " - [Encoder Layer](#enc-lay)\n", 147 | " - [Full Encoder](#full-enc)\n", 148 | "- [Transformer](#transform)\n", 149 | "- [Callbacks & Learn Rate Scheduler](#loss)\n", 150 | "- [Training](#train)\n", 151 | "- [Validate](#val)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": { 157 | "id": "0EL21GdslT-5" 158 | }, 159 | "source": [ 160 | "# Literature:\n", 161 | "\n", 162 | "\n", 163 | "According to [A Transformer-based Framework for Multivariate Time Series Representation Learning](https://dl.acm.org/doi/abs/10.1145/3447548.3467401):\n", 164 | "Using **Batch Normalization is significantly more effective** for multivariate time-series than using the traditional Layer Normalization method found in NLP.\n", 165 | "\n", 166 | "In addition, according to [Deep learning approach towards accurate state of charge estimation for lithium-ion batteries using self-supervised transformer model](https://www.nature.com/articles/s41598-021-98915-8#Sec9):\n", 167 | "Using a transformer network while **forgoing the Decoder Layer** is more effective for the application of State-of-Charge estimation." 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": { 173 | "id": "VG0gPyv0oDBi" 174 | }, 175 | "source": [ 176 | "$\\large{Self\\ Attention}$\n", 177 | "$$\n", 178 | "\\text { Attention }(Q, K, V)=\\operatorname{softmax}\\left(\\frac{Q K^{T}}{\\sqrt{d_{k}}}+{M}\\right) V\n", 179 | "$$" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "id": "k2DSwSOZlT-7" 186 | }, 187 | "source": [ 188 | "$\\large{Input}$\n", 189 | "\n", 190 | "Voltage, Current, SOC at times:\n", 191 | "$$t - window\\_size - 1 \\rightarrow t - 1 $$" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "id": "Bw-WpE1ulT-9" 198 | }, 199 | "source": [ 200 | "**Note**\n", 201 | "\n", 202 | "Cannot use embedding layers with battery data because of floating point values and negative values" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "id": "WStD-7ytlT-9" 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "@dataclass\n", 214 | "class G:\n", 215 | " #preprocess\n", 216 | " capacity = 18.02 # cell capacity in Ampere hours\n", 217 | " window_time = 96 #seconds\n", 218 | " window_size = 32\n", 219 | " slicing = window_time // window_size\n", 220 | " batch_size = 16\n", 221 | " #network\n", 222 | " dense_dim = 32\n", 223 | " model_dim = 128\n", 224 | " num_features = 3 # current, voltage, and soc at t minus G.window_size -> t minus 1\n", 225 | " num_heads = 16\n", 226 | " num_layers = 6\n", 227 | " #learning_rate_scheduler\n", 228 | " T_i = 1\n", 229 | " T_mult = 2\n", 230 | " T_cur = 0.0\n", 231 | " #training\n", 232 | " epochs = 256 #should be a power of T_mult because of cosine annealing with warm restarts scheduler\n", 233 | " learning_rate = 0.0045\n", 234 | " min_learning_rate = 6e-11\n", 235 | "# weight_decay = 0.0 #No weight decay param in the the keras optimizers" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": { 241 | "id": "prIueTe-lT-9" 242 | }, 243 | "source": [ 244 | "\n", 245 | "# Preprocessing" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": { 252 | "id": "il6DI4Z7lT--" 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "# from google.colab import files\n", 257 | "file = pd.read_csv(\"/content/sim_data.csv\")\n", 258 | "#if using sim_data.csv:\n", 259 | "file[\"soc\"] *= 100.0" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": { 266 | "id": "SLQrFOvrlT--" 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "data_plot(data = [file],\n", 271 | " title=\"OCV v SOC\",\n", 272 | " x = [\"test time (sec)\"],\n", 273 | " y = [\"soc\"],\n", 274 | " markers = \"lines\",\n", 275 | " color = \"darkorchid\",\n", 276 | " x_title = \"Test Time (sec)\",\n", 277 | " y_title = \"SOC\"\n", 278 | " )" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": { 285 | "id": "_f7QighFlT--" 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "file = normalize(file.loc[:,[\"current\",\"voltage\",\"soc\"]].iloc[::G.slicing], G.capacity)\n", 290 | "#uses sklearn.preprocessing" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": { 297 | "id": "x79KvZ3ilT--" 298 | }, 299 | "outputs": [], 300 | "source": [ 301 | "x_train, x_test, y_train, y_test = rolling_split(file, G.window_size, train=True)\n", 302 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 303 | "#uses sklearn.model_selection\n", 304 | "\n", 305 | "x_train = tf.data.Dataset.from_tensor_slices(x_train)\n", 306 | "y_train = tf.data.Dataset.from_tensor_slices(y_train)\n", 307 | "x_test = tf.data.Dataset.from_tensor_slices(x_test)\n", 308 | "y_test = tf.data.Dataset.from_tensor_slices(y_test)\n", 309 | "\n", 310 | "train_dataloader = tf.data.Dataset.zip((x_train, y_train)).batch(G.batch_size, drop_remainder=True)\n", 311 | "test_dataloader = tf.data.Dataset.zip((x_test, y_test)).batch(G.batch_size, drop_remainder=True)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": { 318 | "id": "yRmivoyVlT-_" 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "for x,y in train_dataloader:\n", 323 | " print(f\"Shape of X [window, features]: {x.shape}\")\n", 324 | " print(f\"Shape of y: {y.shape} {y.dtype}\")\n", 325 | " break" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": { 331 | "id": "blS0pEpTqRVI" 332 | }, 333 | "source": [ 334 | "\n", 335 | "# Encoder" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": { 342 | "id": "sC5vJhz29vZR" 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "def FullyConnected():\n", 347 | " return tf.keras.Sequential([\n", 348 | " tf.keras.layers.Dense(G.dense_dim, activation='relu',\n", 349 | " kernel_initializer = tf.keras.initializers.HeNormal(),\n", 350 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.005, maxval = 0.08)\n", 351 | " ),\n", 352 | " # (G.batch_size, G.window_size, G.dense_dim)\n", 353 | " tf.keras.layers.BatchNormalization(momentum = 0.98, epsilon=5e-4),\n", 354 | " tf.keras.layers.Dense(G.dense_dim, activation='relu',\n", 355 | " kernel_initializer = tf.keras.initializers.HeNormal(),\n", 356 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.001, maxval = 0.01)\n", 357 | " ),\n", 358 | " # (G.batch_size, G.window_size, G.dense_dim)\n", 359 | " tf.keras.layers.BatchNormalization(momentum = 0.95, epsilon=5e-4)\n", 360 | " ])" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": { 366 | "id": "R65WbX5wqYYH" 367 | }, 368 | "source": [ 369 | "\n", 370 | "### Encoder Layer" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": { 377 | "id": "tIufbrc-9_2u" 378 | }, 379 | "outputs": [], 380 | "source": [ 381 | "class EncoderLayer(tf.keras.layers.Layer):\n", 382 | " \"\"\"\n", 383 | " The encoder layer is composed by a multi-head self-attention mechanism,\n", 384 | " followed by a simple, positionwise fully connected feed-forward network. \n", 385 | " This archirecture includes a residual connection around each of the two \n", 386 | " sub-layers, followed by batch normalization.\n", 387 | " \"\"\"\n", 388 | " def __init__(self,\n", 389 | " num_heads,\n", 390 | " num_features,\n", 391 | " dense_dim,\n", 392 | " dropout_rate,\n", 393 | " batchnorm_eps):\n", 394 | " super(EncoderLayer, self).__init__()\n", 395 | "\n", 396 | " self.mha = MultiHeadAttention(\n", 397 | " num_heads = num_heads,\n", 398 | " key_dim = dense_dim,\n", 399 | " dropout = dropout_rate,\n", 400 | " kernel_initializer = tf.keras.initializers.HeNormal(),\n", 401 | " # kernel_regularizer = tf.keras.regularizers.L2(1e-4),\n", 402 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.001, maxval = 0.01)\n", 403 | " )\n", 404 | " \n", 405 | " #feed-forward-network\n", 406 | " self.ffn = FullyConnected()\n", 407 | " \n", 408 | " \n", 409 | " self.batchnorm1 = BatchNormalization(momentum = 0.95, epsilon=batchnorm_eps)\n", 410 | " self.batchnorm2 = BatchNormalization(momentum = 0.95, epsilon=batchnorm_eps)\n", 411 | "\n", 412 | " self.dropout_ffn = Dropout(dropout_rate)\n", 413 | " \n", 414 | " def call(self, x, training):\n", 415 | " \"\"\"\n", 416 | " Forward pass for the Encoder Layer\n", 417 | " \n", 418 | " Arguments:\n", 419 | " x -- Tensor of shape (G.batch_size, G.window_size, G.num_features)\n", 420 | " training -- Boolean, set to true to activate\n", 421 | " the training mode for dropout layers\n", 422 | " Returns:\n", 423 | " encoder_layer_out -- Tensor of shape (G.batch_size, G.window_size, G.num_features)\n", 424 | " \"\"\"\n", 425 | " # Dropout is added by Keras automatically if the dropout parameter is non-zero during training\n", 426 | " \n", 427 | " attn_output = self.mha(query = x,\n", 428 | " value = x) # Self attention\n", 429 | " \n", 430 | " out1 = self.batchnorm1(tf.add(x, attn_output)) # (G.batch_size, G.window_size, G.dense_dim)\n", 431 | " \n", 432 | " ffn_output = self.ffn(out1)\n", 433 | " \n", 434 | " ffn_output = self.dropout_ffn(ffn_output) # (G.batch_size, G.window_size, G.dense_dim)\n", 435 | " \n", 436 | " encoder_layer_out = self.batchnorm2(tf.add(ffn_output, out1))\n", 437 | " # (G.batch_size, G.window_size, G.dense_dim)\n", 438 | " return encoder_layer_out" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": { 444 | "id": "IKgObFUUlT_B" 445 | }, 446 | "source": [ 447 | "\n", 448 | "### Full Encoder" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": { 455 | "id": "7j2Tjr0K0t0I" 456 | }, 457 | "outputs": [], 458 | "source": [ 459 | "class Encoder(tf.keras.layers.Layer):\n", 460 | " \"\"\"\n", 461 | " The entire Encoder starts by passing the input to an embedding layer \n", 462 | " and using positional encoding to then pass the output through a stack of\n", 463 | " encoder Layers\n", 464 | " \n", 465 | " \"\"\" \n", 466 | " def __init__(self,\n", 467 | " num_layers = G.num_layers,\n", 468 | " num_heads = G.num_heads,\n", 469 | " num_features = G.num_features,\n", 470 | " dense_dim = G.dense_dim,\n", 471 | " maximum_position_encoding = G.window_size,\n", 472 | " dropout_rate=0.15,\n", 473 | " batchnorm_eps=1e-4):\n", 474 | " \n", 475 | " super(Encoder, self).__init__()\n", 476 | "\n", 477 | " self.num_layers = num_layers\n", 478 | "\n", 479 | " #linear input layer\n", 480 | " self.lin_input = tf.keras.layers.Dense(dense_dim, activation=\"relu\")\n", 481 | " \n", 482 | " self.pos_encoding = positional_encoding(maximum_position_encoding, \n", 483 | " dense_dim)\n", 484 | "\n", 485 | "\n", 486 | " self.enc_layers = [EncoderLayer(num_heads = num_heads,\n", 487 | " num_features = num_features,\n", 488 | " dense_dim = dense_dim,\n", 489 | " dropout_rate = dropout_rate,\n", 490 | " batchnorm_eps = batchnorm_eps) \n", 491 | " for _ in range(self.num_layers)]\n", 492 | " \n", 493 | " def call(self, x, training):\n", 494 | " \"\"\"\n", 495 | " Forward pass for the Encoder\n", 496 | " \n", 497 | " Arguments:\n", 498 | " x -- Tensor of shape (G.batch_size, G.window_size, G.num_features)\n", 499 | " training -- Boolean, set to true to activate\n", 500 | " the training mode for dropout layers\n", 501 | " mask -- Boolean mask to ensure that the padding is not \n", 502 | " treated as part of the input\n", 503 | " Returns:\n", 504 | " Tensor of shape (G.batch_size, G.dense_dim)\n", 505 | " \"\"\"\n", 506 | " x = self.lin_input(x)\n", 507 | " seq_len = tf.shape(x)[1]\n", 508 | " x += self.pos_encoding[:, :seq_len, :]\n", 509 | " \n", 510 | " for i in range(self.num_layers):\n", 511 | " x = self.enc_layers[i](x, training)\n", 512 | " \n", 513 | " # only need the final time's data : time = t-1 from the window\n", 514 | " # x has shape (G.batch_size, G.window_size, G.dense_dim)\n", 515 | " # but I am only returning time t-1:\n", 516 | " return x[:, -1, :] # (G.batch_size, G.dense_dim)" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": { 522 | "id": "_U2F58rnlT_C" 523 | }, 524 | "source": [ 525 | " \n", 526 | "# Transformer" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": { 533 | "id": "QHymPmaj-2ba" 534 | }, 535 | "outputs": [], 536 | "source": [ 537 | "class Transformer(tf.keras.Model):\n", 538 | " \"\"\"\n", 539 | " Complete transformer with an Encoder and a Decoder\n", 540 | " \"\"\"\n", 541 | " def __init__(self,\n", 542 | " num_layers = G.num_layers,\n", 543 | " num_heads = G.num_heads,\n", 544 | " dense_dim = G.dense_dim,\n", 545 | " max_positional_encoding_input = G.window_size,\n", 546 | " max_positional_encoding_target = G.window_size):\n", 547 | " super(Transformer, self).__init__()\n", 548 | "\n", 549 | "\n", 550 | " self.encoder = Encoder()\n", 551 | "\n", 552 | " self.final_stack = tf.keras.Sequential([\n", 553 | " tf.keras.layers.Dense(\n", 554 | " dense_dim, activation = \"relu\",\n", 555 | " kernel_initializer = tf.keras.initializers.HeNormal(),\n", 556 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.001, maxval = 0.02)\n", 557 | " ),\n", 558 | " tf.keras.layers.BatchNormalization(momentum = 0.97, epsilon=5e-4),\n", 559 | "\n", 560 | " tf.keras.layers.Dense(\n", 561 | " 1, activation = \"sigmoid\",\n", 562 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.001, maxval = 0.005)\n", 563 | " )\n", 564 | " ])\n", 565 | " \n", 566 | " def call(self, x, training):\n", 567 | " \"\"\"\n", 568 | " Forward pass for the entire Transformer\n", 569 | " Arguments:\n", 570 | " x -- tf.data.Dataset containing batch inputs and targets\n", 571 | " batched & windowed voltage, current and soc data with batched soc targets\n", 572 | " training -- Boolean, set to true to activate\n", 573 | " the training mode for dropout and batchnorm layers\n", 574 | " Returns:\n", 575 | " final_output -- SOC prediction at time t\n", 576 | " \n", 577 | " \"\"\"\n", 578 | " enc_output = self.encoder(x, training) # (G.batch_size, G.dense_dim)\n", 579 | " \n", 580 | " final_output = self.final_stack(enc_output) # (G.batch_size, 1)\n", 581 | "\n", 582 | "\n", 583 | " \n", 584 | " return final_output" 585 | ] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": { 590 | "id": "kiILRshLv9Bx" 591 | }, 592 | "source": [ 593 | "## Note:\n", 594 | "\n", 595 | "The `training` argument in the model and layer calls sets the `keras.backend.learning_phase()` value to the appropriate value for the use case.\n", 596 | "ie.\n", 597 | "- If I am using the train_loop(), `training` is set to True which means all the Dropout and BatchNormalization layers are active.\n", 598 | "- If I am using the test_loop(), `training` is set to False which means all the Dropout and BatchNormalization layers are inactive." 599 | ] 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "metadata": { 604 | "id": "Q6IncgGX4z_9" 605 | }, 606 | "source": [ 607 | "If Using **TPUs** use the cell right below this text\n", 608 | "\n", 609 | "---\n", 610 | "\n" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": null, 616 | "metadata": { 617 | "id": "un5xiWL644Uf" 618 | }, 619 | "outputs": [], 620 | "source": [ 621 | "# tf.keras.backend.clear_session()\n", 622 | "# with strategy.scope():\n", 623 | "# model = Transformer()" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": { 629 | "id": "yeCjW7VP44fP" 630 | }, 631 | "source": [ 632 | "\n", 633 | "\n", 634 | "---\n", 635 | "\n" 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": { 641 | "id": "nJ8bVUEh45Mj" 642 | }, 643 | "source": [ 644 | "If **not using TPUs**:\n", 645 | "\n", 646 | "---\n", 647 | "\n" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": { 654 | "id": "ovllyglWlT_C" 655 | }, 656 | "outputs": [], 657 | "source": [ 658 | "tf.keras.backend.clear_session()\n", 659 | "model = Transformer()\n", 660 | "model.build((G.batch_size, G.window_size, G.num_features))\n", 661 | "model.summary(expand_nested=True)" 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": { 667 | "id": "SWtYX-8348Z1" 668 | }, 669 | "source": [ 670 | "\n", 671 | "\n", 672 | "---\n", 673 | "\n" 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": null, 679 | "metadata": { 680 | "id": "JUcLoUmWlT_D" 681 | }, 682 | "outputs": [], 683 | "source": [ 684 | "model.load_weights(\"/content/drive/MyDrive/transformer_soc/model_weights.tf\")" 685 | ] 686 | }, 687 | { 688 | "cell_type": "markdown", 689 | "metadata": { 690 | "id": "yYtQv1TtlT_D" 691 | }, 692 | "source": [ 693 | "\n", 694 | "# Callbacks and Scheduler" 695 | ] 696 | }, 697 | { 698 | "cell_type": "markdown", 699 | "metadata": { 700 | "id": "aTN3TiSblT_D" 701 | }, 702 | "source": [ 703 | "**Learning Rate Scheduler**\n", 704 | "\n", 705 | "Cosine Annealing with Warm Restarts proposed by Loshchilov et al. in [SGDR: Stochastic Gradient Descent with Warm Restarts](https://doi.org/10.48550/arXiv.1608.03983)" 706 | ] 707 | }, 708 | { 709 | "cell_type": "markdown", 710 | "metadata": { 711 | "id": "xWt1eUd9o6WA" 712 | }, 713 | "source": [ 714 | "$$\\mu_t = \\mu_{min} + \\frac{1}{2}(\\mu_{max} - \\mu_{min})\\cdot (1 + \\cos (\\frac{T_{cur}}{T_i}\\pi))$$\n", 715 | "\n", 716 | "Where:\n", 717 | " - $\\mu$ is the learning_rate, subscript $t$ is for time = $t$\n", 718 | " - $T_{cur}$ is the number of epochs since the last restart\n", 719 | " - $T_i$ is the number of epochs between two restarts\n", 720 | "\n", 721 | "Note:\n", 722 | " - When $T_{cur} = T_i \\rightarrow \\mu_t = \\mu_{min}$\n", 723 | " - When $T_{cur} = 0 \\rightarrow \\mu_t = \\mu_{max}$" 724 | ] 725 | }, 726 | { 727 | "cell_type": "markdown", 728 | "metadata": { 729 | "id": "sLjZ7ICoSGif" 730 | }, 731 | "source": [ 732 | "---\n", 733 | "**The Cell below is for the LambdaCallback Class in keras in order to implement Cosine Annealing with Warm Restarts** ↓\n", 734 | "\n", 735 | "Used with callbacks in model.fit()\n", 736 | "\n", 737 | "---" 738 | ] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "execution_count": null, 743 | "metadata": { 744 | "id": "mZg1uSmDQMTZ" 745 | }, 746 | "outputs": [], 747 | "source": [ 748 | "def schedule(batch, logs):\n", 749 | " '''\n", 750 | " This is a dummy function for the LearningRateScheduler Class\n", 751 | " I am trying to see if I can use the model.compile(), model.fit(), model.evaluate(), trio with\n", 752 | " Cosine Annealing with Warm Restarts\n", 753 | " Returns a new learning rate based on the schedule described below\n", 754 | " \n", 755 | " Call after every batch\n", 756 | " '''\n", 757 | " \n", 758 | " mu_i = G.min_learning_rate + 0.5 * (\n", 759 | " G.learning_rate - G.min_learning_rate) * (\n", 760 | " 1 + tf.math.cos(np.pi * G.T_cur / G.T_i))\n", 761 | " \n", 762 | " G.T_cur += G.batch_size / len(x_train)\n", 763 | " if np.isclose(G.T_cur, G.T_i):\n", 764 | " G.T_i *= G.T_mult\n", 765 | " G.T_cur = 0.0\n", 766 | " K.set_value(model.optimizer.learning_rate, mu_i)" 767 | ] 768 | }, 769 | { 770 | "cell_type": "markdown", 771 | "metadata": { 772 | "id": "bzZcCFve2o5O" 773 | }, 774 | "source": [ 775 | "**Progress Plot Callback**" 776 | ] 777 | }, 778 | { 779 | "cell_type": "code", 780 | "execution_count": null, 781 | "metadata": { 782 | "id": "ZeomH0iN2o5O" 783 | }, 784 | "outputs": [], 785 | "source": [ 786 | "class ProgressCallback(tf.keras.callbacks.Callback):\n", 787 | " def on_epoch_end(self, epoch, logs = None):\n", 788 | " train_loss = logs[\"loss\"]\n", 789 | " train_acc = 100.0 - logs[\"mean_absolute_percentage_error\"]\n", 790 | " test_loss = logs[\"val_loss\"]\n", 791 | " test_acc = 100.0 - logs[\"val_mean_absolute_percentage_error\"]\n", 792 | " global pp\n", 793 | " pp.update([[train_loss, test_loss],\n", 794 | " [train_acc, test_acc]])" 795 | ] 796 | }, 797 | { 798 | "cell_type": "markdown", 799 | "metadata": { 800 | "id": "A699g9Sp2o5P" 801 | }, 802 | "source": [ 803 | "**Save Model Progress Callback**\n", 804 | "\n", 805 | "Does not work with TPUs" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": null, 811 | "metadata": { 812 | "id": "dTdfb-br2o5P" 813 | }, 814 | "outputs": [], 815 | "source": [ 816 | "class SaveModel(tf.keras.callbacks.Callback):\n", 817 | " def on_epoch_end(self, epoch, logs = None):\n", 818 | " if epoch != 0 and epoch % 15 == 0:\n", 819 | " self.model.save_weights(\"/content/drive/MyDrive/transformer_soc/model_weights.h5\")" 820 | ] 821 | }, 822 | { 823 | "cell_type": "markdown", 824 | "metadata": { 825 | "id": "Mgkzjt8NReWS" 826 | }, 827 | "source": [ 828 | "**Early Stopping and Saving Best Model checkpoint Callbacks**" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": null, 834 | "metadata": { 835 | "id": "_-Lh-49NRb_r" 836 | }, 837 | "outputs": [], 838 | "source": [ 839 | "model_options = tf.saved_model.SaveOptions(experimental_io_device=\"/job:localhost\")\n", 840 | "# earlystopping = EarlyStopping(monitor='val_mean_absolute_percentage_error', patience=150, verbose=0, mode='min')\n", 841 | "mcp_save = ModelCheckpoint('/content/drive/MyDrive/transformer_soc/tpu_model_weights', save_format = \"tf\", save_best_only=True, monitor='val_mean_absolute_percentage_error', mode='min', options = model_options)" 842 | ] 843 | }, 844 | { 845 | "cell_type": "code", 846 | "execution_count": null, 847 | "metadata": { 848 | "id": "hg7FmZOHlT_E" 849 | }, 850 | "outputs": [], 851 | "source": [ 852 | "loss_object = tf.keras.losses.LogCosh()\n", 853 | "\n", 854 | "optimizer = tf.keras.optimizers.Adam(learning_rate = G.learning_rate,\n", 855 | " beta_1 = 0.9,\n", 856 | " beta_2 = 0.999\n", 857 | " )\n", 858 | "\n", 859 | "#cos_anneal is for the model.fit() call\n", 860 | "cos_anneal = tf.keras.callbacks.LambdaCallback(on_batch_end = schedule)\n", 861 | "\n", 862 | "#progress plot callback\n", 863 | "pp_update = ProgressCallback()\n", 864 | "\n", 865 | "#model parameters save callback\n", 866 | "model_save = SaveModel() #This is optional" 867 | ] 868 | }, 869 | { 870 | "cell_type": "markdown", 871 | "metadata": { 872 | "id": "45--3qknlT_H" 873 | }, 874 | "source": [ 875 | "\n", 876 | "# Training\n", 877 | "\n", 878 | "**There are two compile calls, one requires a TPU**" 879 | ] 880 | }, 881 | { 882 | "cell_type": "code", 883 | "execution_count": null, 884 | "metadata": { 885 | "id": "ynnk3or6-FMd" 886 | }, 887 | "outputs": [], 888 | "source": [ 889 | "pp = PP(plot_names = [\"Mean Log Loss\", \"% Accuracy\"],\n", 890 | " line_names = [\"Train Loop\", \"Test Loop\"],\n", 891 | " x_label = \"epochs\"\n", 892 | " )\n", 893 | "\n", 894 | "# ##### if using a TPU:\n", 895 | "# with strategy.scope():\n", 896 | "# model.compile(optimizer, loss_object, steps_per_execution = 3, metrics=[\"mean_absolute_percentage_error\"])\n", 897 | "\n", 898 | "##### else:\n", 899 | "# model.compile(optimizer, loss_object, metrics=[\"mean_absolute_percentage_error\"])\n", 900 | "## Dont compile after training, it causes issues.\n", 901 | "\n", 902 | "#-----------------------------------------------------------------\n", 903 | "#Note: can add `model_save` to the callbacks list in model.fit()\n", 904 | "# it saves the model params to the google drive every 15 epochs\n", 905 | "#-------------------------------------------------------------------\n", 906 | "\n", 907 | "steps_per_epoch = len(train_dataloader) // G.epochs\n", 908 | "validation_steps = len(test_dataloader) // G.epochs\n", 909 | "\n", 910 | "history = model.fit(train_dataloader,\n", 911 | " batch_size = G.batch_size,\n", 912 | " epochs = G.epochs,\n", 913 | " verbose = 1,\n", 914 | " steps_per_epoch = steps_per_epoch,\n", 915 | " callbacks = [cos_anneal, pp_update],\n", 916 | " validation_data = test_dataloader,\n", 917 | " validation_steps = validation_steps\n", 918 | " )" 919 | ] 920 | }, 921 | { 922 | "cell_type": "code", 923 | "execution_count": null, 924 | "metadata": { 925 | "id": "9yF6RygxlT_I" 926 | }, 927 | "outputs": [], 928 | "source": [ 929 | "model.save(\"/content/drive/MyDrive/transformer_soc/tpu_model.h5\") #doesnt work with TPUs" 930 | ] 931 | }, 932 | { 933 | "cell_type": "code", 934 | "execution_count": null, 935 | "metadata": { 936 | "id": "ljBF-U_vIjrL" 937 | }, 938 | "outputs": [], 939 | "source": [ 940 | "#works with TPUs\n", 941 | "checkpoint = tf.train.Checkpoint(model = model)\n", 942 | "options = tf.train.CheckpointOptions(experimental_io_device=\"/job:localhost\")\n", 943 | "checkpoint.save(\"/content/drive/MyDrive/transformer_soc/tpu_model/ckpt\", options=options)" 944 | ] 945 | }, 946 | { 947 | "cell_type": "markdown", 948 | "metadata": { 949 | "id": "L5pSwH7QlT_I" 950 | }, 951 | "source": [ 952 | "\n", 953 | "# Validate" 954 | ] 955 | }, 956 | { 957 | "cell_type": "markdown", 958 | "metadata": { 959 | "id": "JYr2y9eulT_I" 960 | }, 961 | "source": [ 962 | "**Dev Set**" 963 | ] 964 | }, 965 | { 966 | "cell_type": "code", 967 | "execution_count": null, 968 | "metadata": { 969 | "id": "CuY9saCblT_I", 970 | "scrolled": true 971 | }, 972 | "outputs": [], 973 | "source": [ 974 | "visualize_dev = validate(model, test_dataloader, dev = True)" 975 | ] 976 | }, 977 | { 978 | "cell_type": "markdown", 979 | "metadata": { 980 | "id": "v5uLkWkLlT_I" 981 | }, 982 | "source": [ 983 | "**Entire Dataset**" 984 | ] 985 | }, 986 | { 987 | "cell_type": "code", 988 | "execution_count": null, 989 | "metadata": { 990 | "id": "gjvsvbIllT_I" 991 | }, 992 | "outputs": [], 993 | "source": [ 994 | "x_set, y_set = rolling_split(file, G.window_size, train = False)\n", 995 | "\n", 996 | "x_set = tf.data.Dataset.from_tensor_slices(x_set)\n", 997 | "y_set = tf.data.Dataset.from_tensor_slices(y_set)\n", 998 | "\n", 999 | "set_dataloader = tf.data.Dataset.zip((x_set, y_set)).batch(G.batch_size, drop_remainder=True)\n", 1000 | "\n", 1001 | "visualize = validate(model, set_dataloader, dev = False)" 1002 | ] 1003 | } 1004 | ], 1005 | "metadata": { 1006 | "accelerator": "TPU", 1007 | "colab": { 1008 | "name": "transform_notebook.ipynb", 1009 | "provenance": [] 1010 | }, 1011 | "gpuClass": "standard", 1012 | "kernelspec": { 1013 | "display_name": "Python 3 (ipykernel)", 1014 | "language": "python", 1015 | "name": "python3" 1016 | }, 1017 | "language_info": { 1018 | "codemirror_mode": { 1019 | "name": "ipython", 1020 | "version": 3 1021 | }, 1022 | "file_extension": ".py", 1023 | "mimetype": "text/x-python", 1024 | "name": "python", 1025 | "nbconvert_exporter": "python", 1026 | "pygments_lexer": "ipython3", 1027 | "version": "3.10.5" 1028 | } 1029 | }, 1030 | "nbformat": 4, 1031 | "nbformat_minor": 1 1032 | } 1033 | --------------------------------------------------------------------------------