├── .gitignore
├── README.md
├── transformer_helper.py
├── rolling_and_plot_tf.py
└── transform_notebook.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | .DS_Store
3 | __pycache__/
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Transformers + TensorFlow and Pandas for SOC Estimation
2 |
3 | **The Testing branch is the most up to date**
4 |
5 | Repo with the Decoder implemented: [Attar's Github Repo](https://github.com/att-ar/transform_decode_soc)
6 |
7 | Building a transformer neural network using TensorFlow and Transformers in Python with the goal of prediciting Li-ion State of Charge based on real time voltage, current and delta time data.
8 |
9 | This transformer is composed of only the encoder layer, and it uses Batch Normalization instead of the Layer Normalization found in NLP.
10 | This was done because literature said these two changes proved significantly more effective than the NLP application of transformers.
11 |
12 | The transformers' input will be voltage, current, delta time and previous SOC points in a batch of windowed data of shape:
13 | ```(G.batch_size, G.window_size, G.num_features)```
14 |
15 | The voltage, current and soc data will be from time: $$t - \text{windowsize} \rightarrow t$$
16 | The output should be the SOC prediction at time $t + 1$ for each batch, the output shape should be `(G.batch_size, 1)`
17 |
--------------------------------------------------------------------------------
/transformer_helper.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from os import environ
4 | environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
5 |
6 |
7 | def get_angles(pos, k, d: int):
8 | """
9 | Get angles to be used in the positional encoding vectors
10 |
11 | Arguments:
12 | pos -- Column vector containing the positions [[0], [1], ...,[N-1]]
13 | k -- Row vector containing the dimension span [[0, 1, 2, ..., d-1]]
14 | d -- Encoding size
15 |
16 | Returns:
17 | angles -- (pos, d) np.array
18 | """
19 | # Get i from dimension span k
20 | i = k // 2
21 | # Calculate the angles using pos, i and d
22 | angles = pos / (10000 ** (2 * i / d))
23 |
24 | return angles
25 |
26 |
27 | def positional_encoding(positions: int, d: int):
28 | """
29 | Precomputes a matrix with all the positional encodings
30 |
31 | Arguments:
32 | positions - Maximum number of positions to be encoded
33 | d - Encoding size
34 |
35 | Returns:
36 | pos_encoding - (1, position, d_model) matrix with the positional encodings
37 | """
38 | angle_rads = get_angles(np.arange(positions)[:, np.newaxis],
39 | np.arange(d)[np.newaxis, :],
40 | d)
41 |
42 | # apply sin to even indices 2i
43 | angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
44 |
45 | # apply cos to odd indices 2i+1
46 | angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
47 |
48 | pos_encoding = angle_rads[np.newaxis, :, :].reshape(1, positions, d)
49 |
50 | # casts tensor to float dtype
51 | return tf.cast(pos_encoding, dtype=tf.float32)
52 |
53 |
54 | def create_look_ahead_mask(sequence_length):
55 | """
56 | Returns an upper triangular matrix filled with ones.
57 | Lets the training model check if it got predictions right by having access to the actual output
58 |
59 | Arguments:
60 | sequence_length -- matrix size (sequence length is the number of time steps per input
61 | input.shape = [batch_size, sequence_length, num_features])
62 |
63 | Returns:
64 | mask -- (size, size) tensor
65 |
66 | >>>create_look_ahead_mask(5)
67 |
73 | """
74 | mask = tf.linalg.band_part(
75 | tf.ones((1, sequence_length, sequence_length)), -1, 0)
76 | return mask.squeeze()
77 |
--------------------------------------------------------------------------------
/rolling_and_plot_tf.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | import plotly.express as px
5 | import plotly.graph_objects as go
6 | from plotly.subplots import make_subplots
7 |
8 | from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler
9 | from sklearn.model_selection import train_test_split
10 |
11 | import torch
12 |
13 |
14 | def helper(value, j):
15 | '''
16 | helper function for data_plot()
17 | '''
18 | if value == "None":
19 | return None
20 | elif type(value) == list and j < len(value):
21 | return value[j]
22 | else: # not a list so only one value
23 | if j == 0:
24 | return value
25 | else:
26 | return None
27 |
28 |
29 | def data_plot(data=None, x=None, y=None,
30 | x_title=None, y_title=None, title=None,
31 | **kwargs):
32 | '''
33 | list of pandas.DataFrame, list of str, list of str, list of str, kwargs -> plotly plot object
34 |
35 | Precondition: If an argument has multiple objects, they must be in a list (can have nested lists).
36 | The order of the arguments must be in the same order as the DataFrames.
37 | There must be the same number of x columns as y columns passed.
38 |
39 | ex) ocv_plot(
40 | data = [df1, df2],
41 | x = [ "SOC", "SOC-Dis" ],
42 | y = [ "OCV", "OCV-Dis" ],
43 | mode = ["lines+markers", "markers"],
44 | color = ["mintcream", "darkorchid"]
45 | )
46 |
47 | This function takes one or more DataFrames, columns from the respective DataFrames to be plot on x and y-axes.
48 | It also takes the mode of plotting desired for the DataFrames and optional keyword arguments.
49 | It outputs a plotly plot of the data from the columns that were passed.
50 |
51 | Parameters:
52 | `data` DataFrame or list of DataFrames
53 |
54 | `x` list of columns or nested lists of columns
55 | example of each option in order:
56 | x = ["SOC-Dis"]
57 | x = ["SOC-Dis","SOC-Chg","SOC"]
58 | x = [ ["Test Time (sec)","Step Time (sec)"], "Step"]
59 | Test Time and Step Time are both from the same DataFrame; there must be two y columns as well.
60 |
61 | `y` list of columns or nested lists of columns
62 | View `x` for help
63 |
64 | `x_title` str
65 | the name of the x_axis to be displayed
66 | else None
67 |
68 | `y_title` str
69 | the name of the y_axis to be displayed
70 | else None
71 |
72 | `title` str
73 | The title of the Plot
74 | default None will not add a title
75 |
76 | **kwargs: (alphabetical order)
77 |
78 | `color` str, list of str, nested lists of str:
79 | same principle as above arguments,
80 | assigns the color of the individual data lines.
81 | if no value is passed for a plot, plotly will do it automatically.
82 |
83 | The 'color' property is a color and may be specified as:
84 | - A hex string (e.g. '#ff0000')
85 | - An rgb/rgba string (e.g. 'rgb(255,0,0)')
86 | - An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
87 | - An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
88 | - A named CSS color:
89 | aliceblue, antiquewhite, aqua, aquamarine, azure,
90 | beige, bisque, black, blanchedalmond, blue,
91 | blueviolet, brown, burlywood, cadetblue,
92 | chartreuse, chocolate, coral, cornflowerblue,
93 | cornsilk, crimson, cyan, darkblue, darkcyan,
94 | darkgoldenrod, darkgray, darkgrey, darkgreen,
95 | darkkhaki, darkmagenta, darkolivegreen, darkorange,
96 | darkorchid, darkred, darksalmon, darkseagreen,
97 | darkslateblue, darkslategray, darkslategrey,
98 | darkturquoise, darkviolet, deeppink, deepskyblue,
99 | dimgray, dimgrey, dodgerblue, firebrick,
100 | floralwhite, forestgreen, fuchsia, gainsboro,
101 | ghostwhite, gold, goldenrod, gray, grey, green,
102 | greenyellow, honeydew, hotpink, indianred, indigo,
103 | ivory, khaki, lavender, lavenderblush, lawngreen,
104 | lemonchiffon, lightblue, lightcoral, lightcyan,
105 | lightgoldenrodyellow, lightgray, lightgrey,
106 | lightgreen, lightpink, lightsalmon, lightseagreen,
107 | lightskyblue, lightslategray, lightslategrey,
108 | lightsteelblue, lightyellow, lime, limegreen,
109 | linen, magenta, maroon, mediumaquamarine,
110 | mediumblue, mediumorchid, mediumpurple,
111 | mediumseagreen, mediumslateblue, mediumspringgreen,
112 | mediumturquoise, mediumvioletred, midnightblue,
113 | mintcream, mistyrose, moccasin, navajowhite, navy,
114 | oldlace, olive, olivedrab, orange, orangered,
115 | orchid, palegoldenrod, palegreen, paleturquoise,
116 | palevioletred, papayawhip, peachpuff, peru, pink,
117 | plum, powderblue, purple, red, rosybrown,
118 | royalblue, rebeccapurple, saddlebrown, salmon,
119 | sandybrown, seagreen, seashell, sienna, silver,
120 | skyblue, slateblue, slategray, slategrey, snow,
121 | springgreen, steelblue, tan, teal, thistle, tomato,
122 | turquoise, violet, wheat, white, whitesmoke,
123 | yellow, yellowgreen
124 | - A number that will be interpreted as a color
125 | according to scatter.marker.colorscale
126 | - A list or array of any of the above
127 |
128 | `mode` str, list of str, nested lists of str:
129 | default None: will set mode = "lines"
130 | Note: str must be one of "lines", "markers", "lines+markers" which are self-explanatory
131 | example of each option in order:
132 | mode = "markers"
133 | mode = ["lines+markers", "lines"]
134 | mode = ["lines+markers",["lines","lines"]]
135 |
136 | `name` str, list of str, nested list of strs
137 | same principle as above arguments
138 | assigns the names of the individual data lines to be displayed in the legend
139 |
140 | `size` int/float, list of int/float or nested lists of int/float
141 | same principle as above arguments
142 | assigns the size of the individual data lines
143 | if no value is passed, plotly will do it automatically.
144 |
145 |
146 | >>>df1 = generate_ocv_pts("JMFM_12_SOC_OCV_Test_220411.txt", to_csv = False)
147 | >>>df2 = ocv_estimate(df1, to_csv = False)
148 | >>>data_plot(data = [df1,df2],
149 | x=[ ["SOC-Chg","SOC-Dis"],"SOC" ],
150 | y = [ ["OCV-Chg","OCV-Dis"], "OCV" ],
151 | title = "JMFM-12 OCV vs. SOC Curve",
152 | x_title = "SOC (%)",
153 | y_title = "OCV (V)",
154 | mode = [ ["markers","markers"] ],
155 | color = [ ["violet","lightcoral"], "darkorchid"],
156 | name = [ ["Charge-OCV","Discharge-OCV"], "OCV"],
157 | size = [[4.5,4.5]]
158 | )
159 | figure...
160 | '''
161 | if type(data) == list and not pd.Series(
162 | pd.Series([len(x), len(y)]) == len(data)
163 | ).all():
164 | return '''Error: x and y columns passed much match the number of DataFrames passed
165 | Use nested lists for multiple columns from the same DataFrame
166 | '''
167 |
168 | elif type(data) != list and not pd.Series(pd.Series([len(x), len(y)]) == 1).all():
169 | return '''Error: x and y columns passed much match the number of DataFrames passed
170 | Use nested lists for multiple columns from the same DataFrame
171 | '''
172 |
173 | if "mode" in kwargs.keys():
174 | if type(kwargs["mode"]) == list and len(kwargs["mode"]) > len(data):
175 | return "Error: passed more modes than DataFrames"
176 |
177 | if "color" in kwargs.keys():
178 | if type(kwargs["color"]) == list and len(kwargs["color"]) > len(data):
179 | return "Error: passed more colors than DataFrames"
180 |
181 | if "name" in kwargs.keys():
182 | if type(kwargs["name"]) == list and len(kwargs["name"]) > len(data):
183 | return "Error: passed more names than DataFrames"
184 |
185 | if "size" in kwargs.keys():
186 | if type(kwargs["size"]) == list and len(kwargs["size"]) > len(data):
187 | return "Error: passed more sizes than DataFrames"
188 |
189 | frame = pd.DataFrame(data={"x": x, "y": y})
190 |
191 | for i in ["color", "mode", "name", "size"]:
192 | frame = frame.join(
193 | pd.Series(kwargs.get(i), name=i, dtype="object"),
194 | how="outer")
195 |
196 | frame.fillna("None", inplace=True)
197 |
198 | figure = make_subplots(
199 | x_title=x_title, y_title=y_title, subplot_titles=[title])
200 |
201 | for i in frame.index:
202 | if type(data) == list:
203 | use_data = data[i]
204 | else:
205 | use_data = data
206 |
207 | if type(frame["x"][i]) == list: # y[i] must be a list
208 | for j in range(len(x[i])):
209 | use_x = frame.loc[i, "x"][j]
210 | use_y = frame.loc[i, "y"][j]
211 |
212 | use_color = helper(frame.loc[i, "color"], j)
213 | use_mode = helper(frame.loc[i, "mode"], j)
214 | use_name = helper(frame.loc[i, "name"], j)
215 | use_size = helper(frame.loc[i, "size"], j)
216 |
217 | figure.add_trace(
218 | go.Scatter(
219 | x=use_data[use_x], y=use_data[use_y],
220 | mode=use_mode, marker={
221 | "size": use_size, "color": use_color},
222 | name=use_name)
223 | )
224 | else: # x[i] and y[i] are not lists
225 | use_x = frame.loc[i, "x"]
226 | use_y = frame.loc[i, "y"]
227 | use_color = helper(frame.loc[i, "color"], 0)
228 | use_mode = helper(frame.loc[i, "mode"], 0)
229 | use_name = helper(frame.loc[i, "name"], 0)
230 | use_size = helper(frame.loc[i, "size"], 0)
231 | # zero is just a placholder
232 |
233 | figure.add_trace(
234 | go.Scatter(
235 | x=use_data[use_x], y=use_data[use_y],
236 | mode=use_mode, marker={
237 | "size": use_size, "color": use_color},
238 | name=use_name)
239 | )
240 | return figure
241 |
242 |
243 | # -------------------------------------------------------
244 |
245 | def normalize(data: pd.DataFrame, capacity: float):
246 | '''
247 | pd.DataFrame -> pd.DataFrame
248 | Precondition: "delta t" is removed from the DataFrame
249 |
250 | Normalizes the data by applying sklearn.preprocessing functions
251 | Voltage is scaled between 0 and 1;
252 | Current is scaled to become C-rate
253 | SOC is scaled between 0 and 1 (just divided by 100)
254 |
255 | Output:
256 | normalized pd.DataFrame
257 | '''
258 | data["current"] /= capacity
259 | data["voltage"] = MinMaxScaler((0, 1)).fit_transform(
260 | data["voltage"].values.reshape(-1, 1))
261 | data["soc"] /= 100.
262 |
263 | print(f'''Scaled stats:
264 |
265 | variance:\n{data.var(axis = 0)},
266 |
267 | mean:\n{data.mean(axis=0)}''')
268 |
269 | return data
270 |
271 | # -------------------------------------------------------
272 |
273 |
274 | def rolling_split_trial(df, window_size):
275 | '''
276 | implements rolling window sectioning
277 | There are four input features: delta_t, V, I at time t, and SOC at time t-1
278 | Prediction at time t uses the features given
279 | '''
280 | if "delta t" in df.columns:
281 | col = ["delta t", "current", "voltage"]
282 | else:
283 | col = ["current", "voltage"]
284 | df_x = (df[col].iloc[1:].reset_index(drop=True) # staggered right by one
285 | .join(
286 | df["soc"].iloc[:-1].reset_index(drop=True), # staggered left by one
287 | how="outer"
288 | ))
289 | df_x = [window.values
290 | for window
291 | in df_x.rolling(window=window_size,
292 | min_periods=window_size - 2,
293 | method="table"
294 | )][window_size:]
295 |
296 | # staggered right by one
297 | df_y = df["soc"].iloc[window_size + 1:].values[:, np.newaxis]
298 |
299 | return np.array(df_x, dtype="float32"), np.array(df_y, dtype="float32")
300 |
301 |
302 | def rolling_split(df, window_size, test_size=0.1, train=True):
303 | '''
304 | Precondition: "delta t" is not in the columns
305 | implements rolling window sectioning
306 | Four input features: delta_t, I, V, SOC all at time t-1
307 | The prediction of SOC at time t uses no other information
308 |
309 | Returns a shuffled and windowed dataset using
310 | sklearn.model_selection.train_test_split
311 |
312 | Parameters:
313 | `window_size` int
314 | the number of consecutive data points needed to form a data window
315 | `test_size` float in between 0 and 0.2 exclusive
316 | the ratio of data points allocated to the dev/test set
317 | Should never exceed 0.2
318 | '''
319 | assert "delta t" not in df.columns
320 | assert isinstance(test_size, float)
321 | assert test_size > 0 and test_size <= 0.2
322 |
323 | df_x = [window.values
324 | for window
325 | # staggered left by one
326 | in df[["current", "voltage", "soc"]].iloc[:-1]
327 | .rolling(window=window_size,
328 | min_periods=window_size - 2,
329 | method="table"
330 | )][window_size:]
331 |
332 | df_y = df["soc"].iloc[window_size + 1:].values
333 |
334 | if train:
335 | return train_test_split(np.array(df_x, dtype="float32"),
336 | np.array(df_y, dtype="float32")[:, np.newaxis],
337 | test_size=test_size,
338 | shuffle=True)
339 | else:
340 | return (np.array(df_x, dtype="float32"),
341 | np.array(df_y, dtype="float32")[:, np.newaxis])
342 |
343 | # ----------------------------------------------------------------
344 | # Validation
345 |
346 | def validate(model, dataloader, dev=True):
347 | '''
348 | tensorflow model, tensorflow DataSet -> pd.DataFrame, prints 2 floats and a Plotly plot
349 |
350 | !! Tensorflow version, not the original PyTorch version
351 | This function runs a td.data.Dataset through the model and prints the max and min
352 | predicted SOC, it also prints a Plotly plot of the predictions versus the labels
353 | This function outputs a pandas.DataFrame of the predictions with their corresponding labels.
354 |
355 | Parameters:
356 | `dev` bool
357 | whether or not it's the developmental set
358 | use False if it's the entire dataset
359 | '''
360 |
361 | aggregate = model.predict(dataloader, verbose = 1)
362 | print("Max pred: ", aggregate.max(), "\tMin pred: ", aggregate.min())
363 |
364 | np_labels = np.concatenate([label.numpy() for _, label in dataloader][
365 | :len(aggregate)], axis = 0)
366 |
367 | visualize = pd.DataFrame(data={"pred": aggregate.squeeze(),
368 | "labels": np_labels.squeeze()})
369 |
370 | if dev: # if it is the dev set, the values need to be sorted by value
371 | visualize.sort_values("labels", inplace=True)
372 | # if it is the entire dataset, it is already sorted chronologically which is more important
373 |
374 | visualize.reset_index(drop=True)
375 |
376 | visualize["point"] = list(range(1, len(visualize) + 1))
377 | print("Percent Accuracy:", np.mean(100.0 - abs((aggregate - np_labels))/(np_labels+0.01) * 100))
378 |
379 | fig = data_plot(data=visualize,
380 | x=[["point", "point"]],
381 | y=[["pred", "labels"]],
382 | x_title="Data Point",
383 | y_title="SOC",
384 | title="Predicted vs Actual SOC",
385 | name=[["predictions", "labels"]],
386 | mode=[["lines", "lines"]],
387 | color=[["red", "yellow"]]
388 | )
389 | fig.show()
390 | return visualize
391 |
--------------------------------------------------------------------------------
/transform_notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "h_Iw1qCBlT-z"
7 | },
8 | "source": [
9 | "\n",
10 | "## Import"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "metadata": {
17 | "id": "cqXowf9MlT-1"
18 | },
19 | "outputs": [],
20 | "source": [
21 | "from google.colab import drive\n",
22 | "drive.mount('/content/drive')"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {
29 | "id": "eh1JdQmwlT-3"
30 | },
31 | "outputs": [],
32 | "source": [
33 | "!cp /content/drive/MyDrive/transformer_soc/rolling_and_plot_tf.py .\n",
34 | "!cp /content/drive/MyDrive/transformer_soc/sim_data.csv .\n",
35 | "!cp /content/drive/MyDrive/transformer_soc/transformer_helper.py ."
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {
42 | "id": "_OpwqWL2QH5G"
43 | },
44 | "outputs": [],
45 | "source": [
46 | "# from os import environ\n",
47 | "# environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"1\"\n",
48 | "# removes tensorflow warnings triggered because of Tensorflow incompatibility with my Apple M1 chip.\n",
49 | "# ignore this when using a non Apple Silicon device, ie. Google Colab or the likes.\n",
50 | "\n",
51 | "import tensorflow as tf\n",
52 | "from tensorflow.keras.layers import MultiHeadAttention, Dense, Input, Dropout, BatchNormalization\n",
53 | "import tensorflow.keras.backend as K\n",
54 | "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n",
55 | "\n",
56 | "from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler\n",
57 | "from sklearn.model_selection import train_test_split\n",
58 | "\n",
59 | "from dataclasses import dataclass"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {
65 | "id": "Z6BKLL9B3vIZ"
66 | },
67 | "source": [
68 | "Cells Below is **only for TPUs**\n",
69 | "\n",
70 | "---\n",
71 | "\n"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {
78 | "id": "WMA_zsLY3x6O"
79 | },
80 | "outputs": [],
81 | "source": [
82 | "# import os\n",
83 | "# resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')\n",
84 | "# tf.config.experimental_connect_to_cluster(resolver)\n",
85 | "# # This is the TPU initialization code that has to be at the beginning.\n",
86 | "# tf.tpu.experimental.initialize_tpu_system(resolver)\n",
87 | "# print(\"All devices: \", tf.config.list_logical_devices('TPU'))\n",
88 | "\n",
89 | "# strategy = tf.distribute.TPUStrategy(resolver)"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {
95 | "id": "0K8Ni6bD4Mge"
96 | },
97 | "source": [
98 | "\n",
99 | "\n",
100 | "---\n",
101 | "\n"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "id": "_DOA-JbhlT-4"
109 | },
110 | "outputs": [],
111 | "source": [
112 | "import numpy as np\n",
113 | "import pandas as pd\n",
114 | "\n",
115 | "!pip install jupyterplot\n",
116 | "from jupyterplot import ProgressPlot as PP\n",
117 | "\n",
118 | "from transformer_helper import *\n",
119 | "from rolling_and_plot_tf import data_plot, rolling_split, normalize, validate\n",
120 | "\n",
121 | "%reload_ext autoreload\n",
122 | "%autoreload 2"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {
128 | "id": "pvorie1ElT-5"
129 | },
130 | "source": [
131 | "Will have to figure out how to set device to cuda in TensorFlow"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {
137 | "id": "RUteRx9dlT-5"
138 | },
139 | "source": [
140 | "## Table of Contents\n",
141 | "\n",
142 | "- [Import](#0)\n",
143 | "- [JupyterPlot](#jup)\n",
144 | "- [Preprocessing](#win)\n",
145 | "- [Encoder](#enc)\n",
146 | " - [Encoder Layer](#enc-lay)\n",
147 | " - [Full Encoder](#full-enc)\n",
148 | "- [Transformer](#transform)\n",
149 | "- [Callbacks & Learn Rate Scheduler](#loss)\n",
150 | "- [Training](#train)\n",
151 | "- [Validate](#val)"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {
157 | "id": "0EL21GdslT-5"
158 | },
159 | "source": [
160 | "# Literature:\n",
161 | "\n",
162 | "\n",
163 | "According to [A Transformer-based Framework for Multivariate Time Series Representation Learning](https://dl.acm.org/doi/abs/10.1145/3447548.3467401):\n",
164 | "Using **Batch Normalization is significantly more effective** for multivariate time-series than using the traditional Layer Normalization method found in NLP.\n",
165 | "\n",
166 | "In addition, according to [Deep learning approach towards accurate state of charge estimation for lithium-ion batteries using self-supervised transformer model](https://www.nature.com/articles/s41598-021-98915-8#Sec9):\n",
167 | "Using a transformer network while **forgoing the Decoder Layer** is more effective for the application of State-of-Charge estimation."
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {
173 | "id": "VG0gPyv0oDBi"
174 | },
175 | "source": [
176 | "$\\large{Self\\ Attention}$\n",
177 | "$$\n",
178 | "\\text { Attention }(Q, K, V)=\\operatorname{softmax}\\left(\\frac{Q K^{T}}{\\sqrt{d_{k}}}+{M}\\right) V\n",
179 | "$$"
180 | ]
181 | },
182 | {
183 | "cell_type": "markdown",
184 | "metadata": {
185 | "id": "k2DSwSOZlT-7"
186 | },
187 | "source": [
188 | "$\\large{Input}$\n",
189 | "\n",
190 | "Voltage, Current, SOC at times:\n",
191 | "$$t - window\\_size - 1 \\rightarrow t - 1 $$"
192 | ]
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "metadata": {
197 | "id": "Bw-WpE1ulT-9"
198 | },
199 | "source": [
200 | "**Note**\n",
201 | "\n",
202 | "Cannot use embedding layers with battery data because of floating point values and negative values"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "metadata": {
209 | "id": "WStD-7ytlT-9"
210 | },
211 | "outputs": [],
212 | "source": [
213 | "@dataclass\n",
214 | "class G:\n",
215 | " #preprocess\n",
216 | " capacity = 18.02 # cell capacity in Ampere hours\n",
217 | " window_time = 96 #seconds\n",
218 | " window_size = 32\n",
219 | " slicing = window_time // window_size\n",
220 | " batch_size = 16\n",
221 | " #network\n",
222 | " dense_dim = 32\n",
223 | " model_dim = 128\n",
224 | " num_features = 3 # current, voltage, and soc at t minus G.window_size -> t minus 1\n",
225 | " num_heads = 16\n",
226 | " num_layers = 6\n",
227 | " #learning_rate_scheduler\n",
228 | " T_i = 1\n",
229 | " T_mult = 2\n",
230 | " T_cur = 0.0\n",
231 | " #training\n",
232 | " epochs = 256 #should be a power of T_mult because of cosine annealing with warm restarts scheduler\n",
233 | " learning_rate = 0.0045\n",
234 | " min_learning_rate = 6e-11\n",
235 | "# weight_decay = 0.0 #No weight decay param in the the keras optimizers"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {
241 | "id": "prIueTe-lT-9"
242 | },
243 | "source": [
244 | "\n",
245 | "# Preprocessing"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": null,
251 | "metadata": {
252 | "id": "il6DI4Z7lT--"
253 | },
254 | "outputs": [],
255 | "source": [
256 | "# from google.colab import files\n",
257 | "file = pd.read_csv(\"/content/sim_data.csv\")\n",
258 | "#if using sim_data.csv:\n",
259 | "file[\"soc\"] *= 100.0"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": null,
265 | "metadata": {
266 | "id": "SLQrFOvrlT--"
267 | },
268 | "outputs": [],
269 | "source": [
270 | "data_plot(data = [file],\n",
271 | " title=\"OCV v SOC\",\n",
272 | " x = [\"test time (sec)\"],\n",
273 | " y = [\"soc\"],\n",
274 | " markers = \"lines\",\n",
275 | " color = \"darkorchid\",\n",
276 | " x_title = \"Test Time (sec)\",\n",
277 | " y_title = \"SOC\"\n",
278 | " )"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": null,
284 | "metadata": {
285 | "id": "_f7QighFlT--"
286 | },
287 | "outputs": [],
288 | "source": [
289 | "file = normalize(file.loc[:,[\"current\",\"voltage\",\"soc\"]].iloc[::G.slicing], G.capacity)\n",
290 | "#uses sklearn.preprocessing"
291 | ]
292 | },
293 | {
294 | "cell_type": "code",
295 | "execution_count": null,
296 | "metadata": {
297 | "id": "x79KvZ3ilT--"
298 | },
299 | "outputs": [],
300 | "source": [
301 | "x_train, x_test, y_train, y_test = rolling_split(file, G.window_size, train=True)\n",
302 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
303 | "#uses sklearn.model_selection\n",
304 | "\n",
305 | "x_train = tf.data.Dataset.from_tensor_slices(x_train)\n",
306 | "y_train = tf.data.Dataset.from_tensor_slices(y_train)\n",
307 | "x_test = tf.data.Dataset.from_tensor_slices(x_test)\n",
308 | "y_test = tf.data.Dataset.from_tensor_slices(y_test)\n",
309 | "\n",
310 | "train_dataloader = tf.data.Dataset.zip((x_train, y_train)).batch(G.batch_size, drop_remainder=True)\n",
311 | "test_dataloader = tf.data.Dataset.zip((x_test, y_test)).batch(G.batch_size, drop_remainder=True)"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": null,
317 | "metadata": {
318 | "id": "yRmivoyVlT-_"
319 | },
320 | "outputs": [],
321 | "source": [
322 | "for x,y in train_dataloader:\n",
323 | " print(f\"Shape of X [window, features]: {x.shape}\")\n",
324 | " print(f\"Shape of y: {y.shape} {y.dtype}\")\n",
325 | " break"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {
331 | "id": "blS0pEpTqRVI"
332 | },
333 | "source": [
334 | "\n",
335 | "# Encoder"
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": null,
341 | "metadata": {
342 | "id": "sC5vJhz29vZR"
343 | },
344 | "outputs": [],
345 | "source": [
346 | "def FullyConnected():\n",
347 | " return tf.keras.Sequential([\n",
348 | " tf.keras.layers.Dense(G.dense_dim, activation='relu',\n",
349 | " kernel_initializer = tf.keras.initializers.HeNormal(),\n",
350 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.005, maxval = 0.08)\n",
351 | " ),\n",
352 | " # (G.batch_size, G.window_size, G.dense_dim)\n",
353 | " tf.keras.layers.BatchNormalization(momentum = 0.98, epsilon=5e-4),\n",
354 | " tf.keras.layers.Dense(G.dense_dim, activation='relu',\n",
355 | " kernel_initializer = tf.keras.initializers.HeNormal(),\n",
356 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.001, maxval = 0.01)\n",
357 | " ),\n",
358 | " # (G.batch_size, G.window_size, G.dense_dim)\n",
359 | " tf.keras.layers.BatchNormalization(momentum = 0.95, epsilon=5e-4)\n",
360 | " ])"
361 | ]
362 | },
363 | {
364 | "cell_type": "markdown",
365 | "metadata": {
366 | "id": "R65WbX5wqYYH"
367 | },
368 | "source": [
369 | "\n",
370 | "### Encoder Layer"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": null,
376 | "metadata": {
377 | "id": "tIufbrc-9_2u"
378 | },
379 | "outputs": [],
380 | "source": [
381 | "class EncoderLayer(tf.keras.layers.Layer):\n",
382 | " \"\"\"\n",
383 | " The encoder layer is composed by a multi-head self-attention mechanism,\n",
384 | " followed by a simple, positionwise fully connected feed-forward network. \n",
385 | " This archirecture includes a residual connection around each of the two \n",
386 | " sub-layers, followed by batch normalization.\n",
387 | " \"\"\"\n",
388 | " def __init__(self,\n",
389 | " num_heads,\n",
390 | " num_features,\n",
391 | " dense_dim,\n",
392 | " dropout_rate,\n",
393 | " batchnorm_eps):\n",
394 | " super(EncoderLayer, self).__init__()\n",
395 | "\n",
396 | " self.mha = MultiHeadAttention(\n",
397 | " num_heads = num_heads,\n",
398 | " key_dim = dense_dim,\n",
399 | " dropout = dropout_rate,\n",
400 | " kernel_initializer = tf.keras.initializers.HeNormal(),\n",
401 | " # kernel_regularizer = tf.keras.regularizers.L2(1e-4),\n",
402 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.001, maxval = 0.01)\n",
403 | " )\n",
404 | " \n",
405 | " #feed-forward-network\n",
406 | " self.ffn = FullyConnected()\n",
407 | " \n",
408 | " \n",
409 | " self.batchnorm1 = BatchNormalization(momentum = 0.95, epsilon=batchnorm_eps)\n",
410 | " self.batchnorm2 = BatchNormalization(momentum = 0.95, epsilon=batchnorm_eps)\n",
411 | "\n",
412 | " self.dropout_ffn = Dropout(dropout_rate)\n",
413 | " \n",
414 | " def call(self, x, training):\n",
415 | " \"\"\"\n",
416 | " Forward pass for the Encoder Layer\n",
417 | " \n",
418 | " Arguments:\n",
419 | " x -- Tensor of shape (G.batch_size, G.window_size, G.num_features)\n",
420 | " training -- Boolean, set to true to activate\n",
421 | " the training mode for dropout layers\n",
422 | " Returns:\n",
423 | " encoder_layer_out -- Tensor of shape (G.batch_size, G.window_size, G.num_features)\n",
424 | " \"\"\"\n",
425 | " # Dropout is added by Keras automatically if the dropout parameter is non-zero during training\n",
426 | " \n",
427 | " attn_output = self.mha(query = x,\n",
428 | " value = x) # Self attention\n",
429 | " \n",
430 | " out1 = self.batchnorm1(tf.add(x, attn_output)) # (G.batch_size, G.window_size, G.dense_dim)\n",
431 | " \n",
432 | " ffn_output = self.ffn(out1)\n",
433 | " \n",
434 | " ffn_output = self.dropout_ffn(ffn_output) # (G.batch_size, G.window_size, G.dense_dim)\n",
435 | " \n",
436 | " encoder_layer_out = self.batchnorm2(tf.add(ffn_output, out1))\n",
437 | " # (G.batch_size, G.window_size, G.dense_dim)\n",
438 | " return encoder_layer_out"
439 | ]
440 | },
441 | {
442 | "cell_type": "markdown",
443 | "metadata": {
444 | "id": "IKgObFUUlT_B"
445 | },
446 | "source": [
447 | "\n",
448 | "### Full Encoder"
449 | ]
450 | },
451 | {
452 | "cell_type": "code",
453 | "execution_count": null,
454 | "metadata": {
455 | "id": "7j2Tjr0K0t0I"
456 | },
457 | "outputs": [],
458 | "source": [
459 | "class Encoder(tf.keras.layers.Layer):\n",
460 | " \"\"\"\n",
461 | " The entire Encoder starts by passing the input to an embedding layer \n",
462 | " and using positional encoding to then pass the output through a stack of\n",
463 | " encoder Layers\n",
464 | " \n",
465 | " \"\"\" \n",
466 | " def __init__(self,\n",
467 | " num_layers = G.num_layers,\n",
468 | " num_heads = G.num_heads,\n",
469 | " num_features = G.num_features,\n",
470 | " dense_dim = G.dense_dim,\n",
471 | " maximum_position_encoding = G.window_size,\n",
472 | " dropout_rate=0.15,\n",
473 | " batchnorm_eps=1e-4):\n",
474 | " \n",
475 | " super(Encoder, self).__init__()\n",
476 | "\n",
477 | " self.num_layers = num_layers\n",
478 | "\n",
479 | " #linear input layer\n",
480 | " self.lin_input = tf.keras.layers.Dense(dense_dim, activation=\"relu\")\n",
481 | " \n",
482 | " self.pos_encoding = positional_encoding(maximum_position_encoding, \n",
483 | " dense_dim)\n",
484 | "\n",
485 | "\n",
486 | " self.enc_layers = [EncoderLayer(num_heads = num_heads,\n",
487 | " num_features = num_features,\n",
488 | " dense_dim = dense_dim,\n",
489 | " dropout_rate = dropout_rate,\n",
490 | " batchnorm_eps = batchnorm_eps) \n",
491 | " for _ in range(self.num_layers)]\n",
492 | " \n",
493 | " def call(self, x, training):\n",
494 | " \"\"\"\n",
495 | " Forward pass for the Encoder\n",
496 | " \n",
497 | " Arguments:\n",
498 | " x -- Tensor of shape (G.batch_size, G.window_size, G.num_features)\n",
499 | " training -- Boolean, set to true to activate\n",
500 | " the training mode for dropout layers\n",
501 | " mask -- Boolean mask to ensure that the padding is not \n",
502 | " treated as part of the input\n",
503 | " Returns:\n",
504 | " Tensor of shape (G.batch_size, G.dense_dim)\n",
505 | " \"\"\"\n",
506 | " x = self.lin_input(x)\n",
507 | " seq_len = tf.shape(x)[1]\n",
508 | " x += self.pos_encoding[:, :seq_len, :]\n",
509 | " \n",
510 | " for i in range(self.num_layers):\n",
511 | " x = self.enc_layers[i](x, training)\n",
512 | " \n",
513 | " # only need the final time's data : time = t-1 from the window\n",
514 | " # x has shape (G.batch_size, G.window_size, G.dense_dim)\n",
515 | " # but I am only returning time t-1:\n",
516 | " return x[:, -1, :] # (G.batch_size, G.dense_dim)"
517 | ]
518 | },
519 | {
520 | "cell_type": "markdown",
521 | "metadata": {
522 | "id": "_U2F58rnlT_C"
523 | },
524 | "source": [
525 | " \n",
526 | "# Transformer"
527 | ]
528 | },
529 | {
530 | "cell_type": "code",
531 | "execution_count": null,
532 | "metadata": {
533 | "id": "QHymPmaj-2ba"
534 | },
535 | "outputs": [],
536 | "source": [
537 | "class Transformer(tf.keras.Model):\n",
538 | " \"\"\"\n",
539 | " Complete transformer with an Encoder and a Decoder\n",
540 | " \"\"\"\n",
541 | " def __init__(self,\n",
542 | " num_layers = G.num_layers,\n",
543 | " num_heads = G.num_heads,\n",
544 | " dense_dim = G.dense_dim,\n",
545 | " max_positional_encoding_input = G.window_size,\n",
546 | " max_positional_encoding_target = G.window_size):\n",
547 | " super(Transformer, self).__init__()\n",
548 | "\n",
549 | "\n",
550 | " self.encoder = Encoder()\n",
551 | "\n",
552 | " self.final_stack = tf.keras.Sequential([\n",
553 | " tf.keras.layers.Dense(\n",
554 | " dense_dim, activation = \"relu\",\n",
555 | " kernel_initializer = tf.keras.initializers.HeNormal(),\n",
556 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.001, maxval = 0.02)\n",
557 | " ),\n",
558 | " tf.keras.layers.BatchNormalization(momentum = 0.97, epsilon=5e-4),\n",
559 | "\n",
560 | " tf.keras.layers.Dense(\n",
561 | " 1, activation = \"sigmoid\",\n",
562 | " bias_initializer = tf.keras.initializers.RandomUniform(minval=0.001, maxval = 0.005)\n",
563 | " )\n",
564 | " ])\n",
565 | " \n",
566 | " def call(self, x, training):\n",
567 | " \"\"\"\n",
568 | " Forward pass for the entire Transformer\n",
569 | " Arguments:\n",
570 | " x -- tf.data.Dataset containing batch inputs and targets\n",
571 | " batched & windowed voltage, current and soc data with batched soc targets\n",
572 | " training -- Boolean, set to true to activate\n",
573 | " the training mode for dropout and batchnorm layers\n",
574 | " Returns:\n",
575 | " final_output -- SOC prediction at time t\n",
576 | " \n",
577 | " \"\"\"\n",
578 | " enc_output = self.encoder(x, training) # (G.batch_size, G.dense_dim)\n",
579 | " \n",
580 | " final_output = self.final_stack(enc_output) # (G.batch_size, 1)\n",
581 | "\n",
582 | "\n",
583 | " \n",
584 | " return final_output"
585 | ]
586 | },
587 | {
588 | "cell_type": "markdown",
589 | "metadata": {
590 | "id": "kiILRshLv9Bx"
591 | },
592 | "source": [
593 | "## Note:\n",
594 | "\n",
595 | "The `training` argument in the model and layer calls sets the `keras.backend.learning_phase()` value to the appropriate value for the use case.\n",
596 | "ie.\n",
597 | "- If I am using the train_loop(), `training` is set to True which means all the Dropout and BatchNormalization layers are active.\n",
598 | "- If I am using the test_loop(), `training` is set to False which means all the Dropout and BatchNormalization layers are inactive."
599 | ]
600 | },
601 | {
602 | "cell_type": "markdown",
603 | "metadata": {
604 | "id": "Q6IncgGX4z_9"
605 | },
606 | "source": [
607 | "If Using **TPUs** use the cell right below this text\n",
608 | "\n",
609 | "---\n",
610 | "\n"
611 | ]
612 | },
613 | {
614 | "cell_type": "code",
615 | "execution_count": null,
616 | "metadata": {
617 | "id": "un5xiWL644Uf"
618 | },
619 | "outputs": [],
620 | "source": [
621 | "# tf.keras.backend.clear_session()\n",
622 | "# with strategy.scope():\n",
623 | "# model = Transformer()"
624 | ]
625 | },
626 | {
627 | "cell_type": "markdown",
628 | "metadata": {
629 | "id": "yeCjW7VP44fP"
630 | },
631 | "source": [
632 | "\n",
633 | "\n",
634 | "---\n",
635 | "\n"
636 | ]
637 | },
638 | {
639 | "cell_type": "markdown",
640 | "metadata": {
641 | "id": "nJ8bVUEh45Mj"
642 | },
643 | "source": [
644 | "If **not using TPUs**:\n",
645 | "\n",
646 | "---\n",
647 | "\n"
648 | ]
649 | },
650 | {
651 | "cell_type": "code",
652 | "execution_count": null,
653 | "metadata": {
654 | "id": "ovllyglWlT_C"
655 | },
656 | "outputs": [],
657 | "source": [
658 | "tf.keras.backend.clear_session()\n",
659 | "model = Transformer()\n",
660 | "model.build((G.batch_size, G.window_size, G.num_features))\n",
661 | "model.summary(expand_nested=True)"
662 | ]
663 | },
664 | {
665 | "cell_type": "markdown",
666 | "metadata": {
667 | "id": "SWtYX-8348Z1"
668 | },
669 | "source": [
670 | "\n",
671 | "\n",
672 | "---\n",
673 | "\n"
674 | ]
675 | },
676 | {
677 | "cell_type": "code",
678 | "execution_count": null,
679 | "metadata": {
680 | "id": "JUcLoUmWlT_D"
681 | },
682 | "outputs": [],
683 | "source": [
684 | "model.load_weights(\"/content/drive/MyDrive/transformer_soc/model_weights.tf\")"
685 | ]
686 | },
687 | {
688 | "cell_type": "markdown",
689 | "metadata": {
690 | "id": "yYtQv1TtlT_D"
691 | },
692 | "source": [
693 | "\n",
694 | "# Callbacks and Scheduler"
695 | ]
696 | },
697 | {
698 | "cell_type": "markdown",
699 | "metadata": {
700 | "id": "aTN3TiSblT_D"
701 | },
702 | "source": [
703 | "**Learning Rate Scheduler**\n",
704 | "\n",
705 | "Cosine Annealing with Warm Restarts proposed by Loshchilov et al. in [SGDR: Stochastic Gradient Descent with Warm Restarts](https://doi.org/10.48550/arXiv.1608.03983)"
706 | ]
707 | },
708 | {
709 | "cell_type": "markdown",
710 | "metadata": {
711 | "id": "xWt1eUd9o6WA"
712 | },
713 | "source": [
714 | "$$\\mu_t = \\mu_{min} + \\frac{1}{2}(\\mu_{max} - \\mu_{min})\\cdot (1 + \\cos (\\frac{T_{cur}}{T_i}\\pi))$$\n",
715 | "\n",
716 | "Where:\n",
717 | " - $\\mu$ is the learning_rate, subscript $t$ is for time = $t$\n",
718 | " - $T_{cur}$ is the number of epochs since the last restart\n",
719 | " - $T_i$ is the number of epochs between two restarts\n",
720 | "\n",
721 | "Note:\n",
722 | " - When $T_{cur} = T_i \\rightarrow \\mu_t = \\mu_{min}$\n",
723 | " - When $T_{cur} = 0 \\rightarrow \\mu_t = \\mu_{max}$"
724 | ]
725 | },
726 | {
727 | "cell_type": "markdown",
728 | "metadata": {
729 | "id": "sLjZ7ICoSGif"
730 | },
731 | "source": [
732 | "---\n",
733 | "**The Cell below is for the LambdaCallback Class in keras in order to implement Cosine Annealing with Warm Restarts** ↓\n",
734 | "\n",
735 | "Used with callbacks in model.fit()\n",
736 | "\n",
737 | "---"
738 | ]
739 | },
740 | {
741 | "cell_type": "code",
742 | "execution_count": null,
743 | "metadata": {
744 | "id": "mZg1uSmDQMTZ"
745 | },
746 | "outputs": [],
747 | "source": [
748 | "def schedule(batch, logs):\n",
749 | " '''\n",
750 | " This is a dummy function for the LearningRateScheduler Class\n",
751 | " I am trying to see if I can use the model.compile(), model.fit(), model.evaluate(), trio with\n",
752 | " Cosine Annealing with Warm Restarts\n",
753 | " Returns a new learning rate based on the schedule described below\n",
754 | " \n",
755 | " Call after every batch\n",
756 | " '''\n",
757 | " \n",
758 | " mu_i = G.min_learning_rate + 0.5 * (\n",
759 | " G.learning_rate - G.min_learning_rate) * (\n",
760 | " 1 + tf.math.cos(np.pi * G.T_cur / G.T_i))\n",
761 | " \n",
762 | " G.T_cur += G.batch_size / len(x_train)\n",
763 | " if np.isclose(G.T_cur, G.T_i):\n",
764 | " G.T_i *= G.T_mult\n",
765 | " G.T_cur = 0.0\n",
766 | " K.set_value(model.optimizer.learning_rate, mu_i)"
767 | ]
768 | },
769 | {
770 | "cell_type": "markdown",
771 | "metadata": {
772 | "id": "bzZcCFve2o5O"
773 | },
774 | "source": [
775 | "**Progress Plot Callback**"
776 | ]
777 | },
778 | {
779 | "cell_type": "code",
780 | "execution_count": null,
781 | "metadata": {
782 | "id": "ZeomH0iN2o5O"
783 | },
784 | "outputs": [],
785 | "source": [
786 | "class ProgressCallback(tf.keras.callbacks.Callback):\n",
787 | " def on_epoch_end(self, epoch, logs = None):\n",
788 | " train_loss = logs[\"loss\"]\n",
789 | " train_acc = 100.0 - logs[\"mean_absolute_percentage_error\"]\n",
790 | " test_loss = logs[\"val_loss\"]\n",
791 | " test_acc = 100.0 - logs[\"val_mean_absolute_percentage_error\"]\n",
792 | " global pp\n",
793 | " pp.update([[train_loss, test_loss],\n",
794 | " [train_acc, test_acc]])"
795 | ]
796 | },
797 | {
798 | "cell_type": "markdown",
799 | "metadata": {
800 | "id": "A699g9Sp2o5P"
801 | },
802 | "source": [
803 | "**Save Model Progress Callback**\n",
804 | "\n",
805 | "Does not work with TPUs"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": null,
811 | "metadata": {
812 | "id": "dTdfb-br2o5P"
813 | },
814 | "outputs": [],
815 | "source": [
816 | "class SaveModel(tf.keras.callbacks.Callback):\n",
817 | " def on_epoch_end(self, epoch, logs = None):\n",
818 | " if epoch != 0 and epoch % 15 == 0:\n",
819 | " self.model.save_weights(\"/content/drive/MyDrive/transformer_soc/model_weights.h5\")"
820 | ]
821 | },
822 | {
823 | "cell_type": "markdown",
824 | "metadata": {
825 | "id": "Mgkzjt8NReWS"
826 | },
827 | "source": [
828 | "**Early Stopping and Saving Best Model checkpoint Callbacks**"
829 | ]
830 | },
831 | {
832 | "cell_type": "code",
833 | "execution_count": null,
834 | "metadata": {
835 | "id": "_-Lh-49NRb_r"
836 | },
837 | "outputs": [],
838 | "source": [
839 | "model_options = tf.saved_model.SaveOptions(experimental_io_device=\"/job:localhost\")\n",
840 | "# earlystopping = EarlyStopping(monitor='val_mean_absolute_percentage_error', patience=150, verbose=0, mode='min')\n",
841 | "mcp_save = ModelCheckpoint('/content/drive/MyDrive/transformer_soc/tpu_model_weights', save_format = \"tf\", save_best_only=True, monitor='val_mean_absolute_percentage_error', mode='min', options = model_options)"
842 | ]
843 | },
844 | {
845 | "cell_type": "code",
846 | "execution_count": null,
847 | "metadata": {
848 | "id": "hg7FmZOHlT_E"
849 | },
850 | "outputs": [],
851 | "source": [
852 | "loss_object = tf.keras.losses.LogCosh()\n",
853 | "\n",
854 | "optimizer = tf.keras.optimizers.Adam(learning_rate = G.learning_rate,\n",
855 | " beta_1 = 0.9,\n",
856 | " beta_2 = 0.999\n",
857 | " )\n",
858 | "\n",
859 | "#cos_anneal is for the model.fit() call\n",
860 | "cos_anneal = tf.keras.callbacks.LambdaCallback(on_batch_end = schedule)\n",
861 | "\n",
862 | "#progress plot callback\n",
863 | "pp_update = ProgressCallback()\n",
864 | "\n",
865 | "#model parameters save callback\n",
866 | "model_save = SaveModel() #This is optional"
867 | ]
868 | },
869 | {
870 | "cell_type": "markdown",
871 | "metadata": {
872 | "id": "45--3qknlT_H"
873 | },
874 | "source": [
875 | "\n",
876 | "# Training\n",
877 | "\n",
878 | "**There are two compile calls, one requires a TPU**"
879 | ]
880 | },
881 | {
882 | "cell_type": "code",
883 | "execution_count": null,
884 | "metadata": {
885 | "id": "ynnk3or6-FMd"
886 | },
887 | "outputs": [],
888 | "source": [
889 | "pp = PP(plot_names = [\"Mean Log Loss\", \"% Accuracy\"],\n",
890 | " line_names = [\"Train Loop\", \"Test Loop\"],\n",
891 | " x_label = \"epochs\"\n",
892 | " )\n",
893 | "\n",
894 | "# ##### if using a TPU:\n",
895 | "# with strategy.scope():\n",
896 | "# model.compile(optimizer, loss_object, steps_per_execution = 3, metrics=[\"mean_absolute_percentage_error\"])\n",
897 | "\n",
898 | "##### else:\n",
899 | "# model.compile(optimizer, loss_object, metrics=[\"mean_absolute_percentage_error\"])\n",
900 | "## Dont compile after training, it causes issues.\n",
901 | "\n",
902 | "#-----------------------------------------------------------------\n",
903 | "#Note: can add `model_save` to the callbacks list in model.fit()\n",
904 | "# it saves the model params to the google drive every 15 epochs\n",
905 | "#-------------------------------------------------------------------\n",
906 | "\n",
907 | "steps_per_epoch = len(train_dataloader) // G.epochs\n",
908 | "validation_steps = len(test_dataloader) // G.epochs\n",
909 | "\n",
910 | "history = model.fit(train_dataloader,\n",
911 | " batch_size = G.batch_size,\n",
912 | " epochs = G.epochs,\n",
913 | " verbose = 1,\n",
914 | " steps_per_epoch = steps_per_epoch,\n",
915 | " callbacks = [cos_anneal, pp_update],\n",
916 | " validation_data = test_dataloader,\n",
917 | " validation_steps = validation_steps\n",
918 | " )"
919 | ]
920 | },
921 | {
922 | "cell_type": "code",
923 | "execution_count": null,
924 | "metadata": {
925 | "id": "9yF6RygxlT_I"
926 | },
927 | "outputs": [],
928 | "source": [
929 | "model.save(\"/content/drive/MyDrive/transformer_soc/tpu_model.h5\") #doesnt work with TPUs"
930 | ]
931 | },
932 | {
933 | "cell_type": "code",
934 | "execution_count": null,
935 | "metadata": {
936 | "id": "ljBF-U_vIjrL"
937 | },
938 | "outputs": [],
939 | "source": [
940 | "#works with TPUs\n",
941 | "checkpoint = tf.train.Checkpoint(model = model)\n",
942 | "options = tf.train.CheckpointOptions(experimental_io_device=\"/job:localhost\")\n",
943 | "checkpoint.save(\"/content/drive/MyDrive/transformer_soc/tpu_model/ckpt\", options=options)"
944 | ]
945 | },
946 | {
947 | "cell_type": "markdown",
948 | "metadata": {
949 | "id": "L5pSwH7QlT_I"
950 | },
951 | "source": [
952 | "\n",
953 | "# Validate"
954 | ]
955 | },
956 | {
957 | "cell_type": "markdown",
958 | "metadata": {
959 | "id": "JYr2y9eulT_I"
960 | },
961 | "source": [
962 | "**Dev Set**"
963 | ]
964 | },
965 | {
966 | "cell_type": "code",
967 | "execution_count": null,
968 | "metadata": {
969 | "id": "CuY9saCblT_I",
970 | "scrolled": true
971 | },
972 | "outputs": [],
973 | "source": [
974 | "visualize_dev = validate(model, test_dataloader, dev = True)"
975 | ]
976 | },
977 | {
978 | "cell_type": "markdown",
979 | "metadata": {
980 | "id": "v5uLkWkLlT_I"
981 | },
982 | "source": [
983 | "**Entire Dataset**"
984 | ]
985 | },
986 | {
987 | "cell_type": "code",
988 | "execution_count": null,
989 | "metadata": {
990 | "id": "gjvsvbIllT_I"
991 | },
992 | "outputs": [],
993 | "source": [
994 | "x_set, y_set = rolling_split(file, G.window_size, train = False)\n",
995 | "\n",
996 | "x_set = tf.data.Dataset.from_tensor_slices(x_set)\n",
997 | "y_set = tf.data.Dataset.from_tensor_slices(y_set)\n",
998 | "\n",
999 | "set_dataloader = tf.data.Dataset.zip((x_set, y_set)).batch(G.batch_size, drop_remainder=True)\n",
1000 | "\n",
1001 | "visualize = validate(model, set_dataloader, dev = False)"
1002 | ]
1003 | }
1004 | ],
1005 | "metadata": {
1006 | "accelerator": "TPU",
1007 | "colab": {
1008 | "name": "transform_notebook.ipynb",
1009 | "provenance": []
1010 | },
1011 | "gpuClass": "standard",
1012 | "kernelspec": {
1013 | "display_name": "Python 3 (ipykernel)",
1014 | "language": "python",
1015 | "name": "python3"
1016 | },
1017 | "language_info": {
1018 | "codemirror_mode": {
1019 | "name": "ipython",
1020 | "version": 3
1021 | },
1022 | "file_extension": ".py",
1023 | "mimetype": "text/x-python",
1024 | "name": "python",
1025 | "nbconvert_exporter": "python",
1026 | "pygments_lexer": "ipython3",
1027 | "version": "3.10.5"
1028 | }
1029 | },
1030 | "nbformat": 4,
1031 | "nbformat_minor": 1
1032 | }
1033 |
--------------------------------------------------------------------------------