├── src ├── .gitkeep ├── __init__.py ├── data │ ├── __init__.py │ ├── dataset.py │ └── prepare_data.py ├── engine │ ├── __init__.py │ ├── predictor.py │ └── pytorch_trainer.py ├── models │ ├── __init__.py │ ├── load_checkpoint.py │ └── lstm_model.py └── utils │ ├── __init__.py │ ├── metrics.py │ ├── load_checkpoint.py │ ├── mlflow_logger.py │ └── training.py ├── notebooks ├── .gitkeep └── weather-prediction.ipynb ├── results ├── .gitkeep ├── mae.png ├── mse.png ├── training_metrics.png └── training_report.txt ├── .dvc ├── .gitignore └── config ├── requirements.txt ├── .gitignore ├── best_model.pt.dvc ├── dataset.csv.dvc ├── processed_dataset.csv.dvc ├── .dvcignore ├── config ├── predict.yaml ├── mmscaler_values.yaml ├── pt_training.yaml └── prepare_data.json ├── predict.py └── README.md /src/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /results/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dvc/.gitignore: -------------------------------------------------------------------------------- 1 | /config.local 2 | /tmp 3 | /cache 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sagnik1511/samay_yantra/HEAD/requirements.txt -------------------------------------------------------------------------------- /results/mae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sagnik1511/samay_yantra/HEAD/results/mae.png -------------------------------------------------------------------------------- /results/mse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sagnik1511/samay_yantra/HEAD/results/mse.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | .idea/ 3 | __pycache__/ 4 | mlruns/ 5 | dataset.csv 6 | processed_dataset.csv 7 | best_model.pt -------------------------------------------------------------------------------- /best_model.pt.dvc: -------------------------------------------------------------------------------- 1 | outs: 2 | - md5: 1886f4f99f525d703077729a9a07c8ce 3 | size: 6072 4 | path: best_model.pt 5 | -------------------------------------------------------------------------------- /dataset.csv.dvc: -------------------------------------------------------------------------------- 1 | outs: 2 | - md5: cedea3f0adbde8941aa869c5d011f1f9 3 | size: 120605973 4 | path: dataset.csv 5 | -------------------------------------------------------------------------------- /results/training_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sagnik1511/samay_yantra/HEAD/results/training_metrics.png -------------------------------------------------------------------------------- /.dvc/config: -------------------------------------------------------------------------------- 1 | [core] 2 | remote = storage 3 | ['remote "storage"'] 4 | url = gdrive://1OFD5KiaSD2GPs3VR-8fHB0dSnRfXVyNF 5 | -------------------------------------------------------------------------------- /processed_dataset.csv.dvc: -------------------------------------------------------------------------------- 1 | outs: 2 | - md5: 8a1a8b779d09205871e67ccc4092033e 3 | size: 171641999 4 | path: processed_dataset.csv 5 | -------------------------------------------------------------------------------- /.dvcignore: -------------------------------------------------------------------------------- 1 | # Add patterns of files dvc should ignore, which could improve 2 | # the performance. Learn more at 3 | # https://dvc.org/doc/user-guide/dvcignore 4 | -------------------------------------------------------------------------------- /src/utils/metrics.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import mean_squared_log_error, r2_score, mean_absolute_error 2 | 3 | 4 | metrics_ = { 5 | "mae": mean_absolute_error 6 | } 7 | -------------------------------------------------------------------------------- /config/predict.yaml: -------------------------------------------------------------------------------- 1 | dataset: 2 | path: "processed_dataset.csv" 3 | seq_length: 20 4 | model: 5 | num_classes: 10 6 | input_size: 10 7 | hidden_size: 2 8 | num_layers: 1 9 | checkpoint_path: "best_model.pt" 10 | -------------------------------------------------------------------------------- /config/mmscaler_values.yaml: -------------------------------------------------------------------------------- 1 | max_values: 2 | - 1020.07 3 | - 37.28 4 | - 100.0 5 | - 28.32 6 | - 18.13 7 | - 28.82 8 | - 1393.54 9 | - 16.83 10 | - 14.0 11 | - 1219.32 12 | min_values: 13 | - 913.6 14 | - -23.01 15 | - 12.95 16 | - 0.79 17 | - 0.5 18 | - 0.8 19 | - 1059.45 20 | - -0.39 21 | - 0.0 22 | - 0.0 23 | -------------------------------------------------------------------------------- /src/utils/load_checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from src.models.lstm_model import LSTM 3 | 4 | 5 | def load_best_model(model_config): 6 | path = "best_model.pt" 7 | chkp = torch.load(path) 8 | model = LSTM(**model_config) 9 | model.load_state_dict(chkp["model"]) 10 | 11 | return model 12 | -------------------------------------------------------------------------------- /config/pt_training.yaml: -------------------------------------------------------------------------------- 1 | dataset: 2 | path: "processed_dataset.csv" 3 | batch_size: 5000 4 | seq_length: 20 5 | split_ratio: 0.8 6 | model: 7 | num_classes: 10 8 | input_size: 10 9 | hidden_size: 2 10 | num_layers: 1 11 | optimizer: 12 | lr: 0.0005 13 | training_hp: 14 | num_epochs: 10 15 | log_index: 10 16 | 17 | -------------------------------------------------------------------------------- /src/models/load_checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from src.models.lstm_model import LSTM 3 | 4 | 5 | def load_model(path, model_param, device="cpu"): 6 | chkp = torch.load(path, map_location=device) 7 | del model_param["checkpoint_path"] 8 | model = LSTM(**model_param) 9 | model.load_state_dict(chkp["model"]) 10 | 11 | return model 12 | -------------------------------------------------------------------------------- /notebooks/weather-prediction.ipynb: -------------------------------------------------------------------------------- 1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"print(\"NOOBS code in Jupyter Notebook!\")","metadata":{},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /src/utils/mlflow_logger.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | from urllib.parse import urlparse 3 | 4 | 5 | def log_pt_models(model, hparams, results): 6 | with mlflow.start_run(): 7 | for key in ["batch_size", "seq_length", "split_ratio"]: 8 | mlflow.log_param(key, hparams["dataset"][key]) 9 | for key in ["model", "optimizer", "training_hp"]: 10 | mlflow.log_params(hparams[key]) 11 | mlflow.log_metrics(results) 12 | 13 | tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme 14 | if tracking_url_type_store != "file": 15 | mlflow.pytorch.log_model(model, "model", registered_model_name="LSTM_Model") 16 | else: 17 | mlflow.pytorch.log_model(model, "model") 18 | -------------------------------------------------------------------------------- /config/prepare_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "data_path" : { 3 | "input" : "dataset.csv", 4 | "output" : "processed_dataset.csv", 5 | "mmscaler_values" : "config/mmscaler_values.yaml" 6 | }, 7 | "imp_features" : [ 8 | "Date Time", 9 | "p (mbar)", 10 | "T (degC)", 11 | "rh (%)", 12 | "VPact (mbar)", 13 | "sh (g/kg)", 14 | "H2OC (mmol/mol)", 15 | "rho (g/m**3)", 16 | "wv (m/s)", 17 | "rain (mm)", 18 | "SWDR (W/m**2)" 19 | ], 20 | "renamed_cols" : [ 21 | "time", 22 | "pressure", 23 | "tempereature", 24 | "relative_humidity", 25 | "vapour_pressure", 26 | "specific_humidty", 27 | "water_vap_concentration", 28 | "airtight", 29 | "wind_speed", 30 | "rain", 31 | "SWDR" 32 | ], 33 | "interpolation_method" : "slinear" 34 | } -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from src.engine.predictor import fetch_index, predict_results 2 | from src.models.load_checkpoint import load_model 3 | import yaml 4 | 5 | 6 | def predict(user_input, config_path): 7 | 8 | with open(config_path, "r") as f: 9 | config = yaml.safe_load(f) 10 | f.close() 11 | print(config) 12 | model = load_model(config["model"]["checkpoint_path"], config["model"]) 13 | index = fetch_index(user_input, config["dataset"]["seq_length"], config["dataset"]["path"]) 14 | print(f"index : {index}") 15 | ans = predict_results(config["dataset"]["path"], index, model, config["dataset"]["seq_length"]) 16 | 17 | return ans 18 | 19 | 20 | if __name__ == "__main__": 21 | user_inp = "2021-02-01 08:50:00" 22 | ans = predict(user_inp, "config/predict.yaml") 23 | print(ans) 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/data/dataset.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset, DataLoader 2 | 3 | 4 | class WeatherDataset(Dataset): 5 | 6 | def __init__(self, meta_df, seq_length): 7 | self.meta_df = meta_df 8 | self.seq_length = seq_length 9 | 10 | def __len__(self): 11 | return len(self.meta_df) - self.seq_length - 1 12 | 13 | def __getitem__(self, index): 14 | X = self.meta_df.iloc[index: index + self.seq_length, 1:].to_numpy().astype("float32") 15 | y = self.meta_df.iloc[index + self.seq_length, 1:].to_numpy().astype("float32") 16 | 17 | return X, y 18 | 19 | 20 | def split_data(dataframe, split_ratio=0.8): 21 | df = dataframe.copy() 22 | split_index = int(split_ratio * len(df)) 23 | train_df = df.iloc[:split_index] 24 | val_df = df.iloc[split_index:] 25 | 26 | return train_df, val_df 27 | 28 | 29 | def create_loaders(dataframe, split_ratio, batch_size, seq_length): 30 | train_set, val_set = split_data(dataframe=dataframe, split_ratio=split_ratio) 31 | train_ds = WeatherDataset(train_set, seq_length) 32 | val_ds = WeatherDataset(val_set, seq_length) 33 | 34 | train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True) 35 | val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=True) 36 | 37 | return train_dl, val_dl 38 | -------------------------------------------------------------------------------- /src/models/lstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | 6 | class LSTM(nn.Module): 7 | 8 | def __init__(self, num_classes, input_size, hidden_size, num_layers): 9 | super(LSTM, self).__init__() 10 | 11 | self.num_classes = num_classes 12 | self.num_layers = num_layers 13 | self.input_size = input_size 14 | self.hidden_size = hidden_size 15 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 16 | num_layers=num_layers, batch_first=True) 17 | self.fc = nn.Linear(hidden_size, num_classes) 18 | self.sigmoid = nn.Sigmoid() 19 | 20 | def forward(self, x, device): 21 | h_0 = Variable(torch.zeros( 22 | self.num_layers, x.size(0), self.hidden_size)).to(device) 23 | c_0 = Variable(torch.zeros( 24 | self.num_layers, x.size(0), self.hidden_size)).to(device) 25 | _, (h_out, _) = self.lstm(x, (h_0, c_0)) 26 | h_out = h_out.view(-1, self.hidden_size) 27 | out = self.sigmoid(self.fc(h_out)) 28 | return out 29 | 30 | 31 | def test(): 32 | b, s, c = 4, 12, 10 33 | rand_value = torch.rand(b, s, c) 34 | 35 | model = LSTM(num_classes=c, input_size=c, hidden_size=2, num_layers=1) 36 | op = model(rand_value) 37 | print(op.shape) 38 | 39 | 40 | if __name__ == "__main__": 41 | test() 42 | -------------------------------------------------------------------------------- /src/engine/predictor.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from torch import from_numpy 3 | import yaml 4 | from datetime import datetime 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | 9 | def fetch_index(given_time, seq_length, csv_path): 10 | min_range = pd.read_csv(csv_path, usecols=[0], 11 | skiprows=lambda x: x not in [seq_length, seq_length + 1]).to_numpy().reshape(-1)[0] 12 | max_range = "2021-01-01 00:10:00" 13 | fin_index = 893646 14 | 15 | if given_time < min_range: 16 | raise ValueError 17 | else: 18 | if max_range > given_time: 19 | timestamps = pd.read_csv(csv_path, usecols=["time"]) 20 | index = timestamps[timestamps["time"] == given_time].index[0] 21 | else: 22 | fmt = '%Y-%m-%d %H:%M:%S' 23 | t1 = datetime.strptime(max_range, fmt) 24 | t2 = datetime.strptime(given_time, fmt) 25 | diff = (t2 - t1).total_seconds() // 600 26 | index = int(fin_index + diff) 27 | 28 | return index 29 | 30 | 31 | def load_mmscaler_values(): 32 | with open("config/mmscaler_values.yaml", "r") as f: 33 | scaler_values = yaml.safe_load(f) 34 | f.close() 35 | return scaler_values 36 | 37 | 38 | def predict_single_record(model, record_arr, device="cpu"): 39 | record_arr = from_numpy(record_arr).unsqueeze(0) 40 | op = model(record_arr, device).squeeze(0) 41 | if op.device != "cpu": 42 | op = op.cpu() 43 | op = op.detach().numpy().reshape(-1) 44 | return op.tolist() 45 | 46 | 47 | def anti_transform(op_arr): 48 | scaler_dict = load_mmscaler_values() 49 | for index in range(len(op_arr)): 50 | op_arr[index] = (op_arr[index] * (scaler_dict["max_values"][index] 51 | - scaler_dict["min_values"][index])) + scaler_dict["min_values"][index] 52 | ans = [round(el, 3) for el in op_arr.tolist()] 53 | return ans 54 | 55 | 56 | def predict_results(csv_path, index, model, seq_len): 57 | if index < 893646: 58 | df = pd.read_csv(csv_path, skiprows=lambda x: x not in [index+1], header=None) 59 | df = df.to_numpy()[:, 1:].reshape(-1) 60 | ans = anti_transform(df) 61 | else: 62 | num_repeats = index - 893646 63 | data = pd.read_csv(csv_path, 64 | skiprows=lambda x: x not in [i for i in range(893646 - seq_len, 893646)], header=None) 65 | data = data.to_numpy()[:, 1:] 66 | temp_data = data.astype("float32") 67 | print(f"Predicting Future for {num_repeats} iteration") 68 | for index in tqdm(range(num_repeats)): 69 | op = predict_single_record(model, temp_data) 70 | temp_data = temp_data[1:, :].tolist() 71 | temp_data.append(op) 72 | temp_data = np.array(temp_data).astype("float32") 73 | 74 | ans = anti_transform(temp_data[-1, :]) 75 | 76 | return ans 77 | -------------------------------------------------------------------------------- /src/data/prepare_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import numpy as np 4 | from sklearn.preprocessing import MinMaxScaler 5 | import json 6 | import yaml 7 | 8 | 9 | def fetch_imp_features(dataframe, job_config): 10 | fet_cols = job_config["imp_features"] 11 | renamed_cols = job_config["renamed_cols"] 12 | assert len(fet_cols) == len(renamed_cols), "Size mismatch, can not rename columns." 13 | df = dataframe.copy() 14 | df = df[fet_cols] 15 | df.columns = renamed_cols 16 | 17 | return df 18 | 19 | 20 | def process_outliers_to_nan(dataframe): 21 | df = dataframe.copy() 22 | df.replace(-9999.0, np.nan, inplace=True) 23 | df.replace(-9999.990, np.nan, inplace=True) 24 | df["wind_speed"].replace(28.4900, np.nan, inplace=True) 25 | 26 | return df 27 | 28 | 29 | def perform_interpolation(dataframe, job_config): 30 | 31 | method = job_config["interpolation_method"] 32 | df = dataframe.copy() 33 | for fet in df.columns[1:]: 34 | df[fet] = df[fet].interpolate(method=method) 35 | 36 | return df 37 | 38 | 39 | def perform_scaling(dataframe): 40 | df = dataframe.copy() 41 | mm_scalers = [MinMaxScaler() for _ in range(len(df.columns) - 1)] 42 | mm_scaler_values = { 43 | "min_values": [], 44 | "max_values": [] 45 | } 46 | for index, fet in enumerate(df.columns[1:]): 47 | df[fet] = mm_scalers[index].fit_transform(df[fet].to_numpy().reshape(-1, 1)).reshape(-1) 48 | mm_scaler_values["min_values"].append(mm_scalers[index].data_min_.tolist()[0]) 49 | mm_scaler_values["max_values"].append(mm_scalers[index].data_max_.tolist()[0]) 50 | 51 | return df, mm_scaler_values 52 | 53 | 54 | def prepare_dataset(job_config): 55 | 56 | ip_path = job_config["data_path"]["input"] 57 | op_path = job_config["data_path"]["output"] 58 | mmscaler_conf_path = job_config["data_path"]["mmscaler_values"] 59 | assert os.path.isfile(ip_path), "Source data not found ..." 60 | dataframe = pd.read_csv(ip_path, parse_dates=["Date Time"]) 61 | print("Data Loaded ...") 62 | df = dataframe.copy() 63 | df = fetch_imp_features(dataframe=df, job_config=job_config) 64 | print("Important features filtered ...") 65 | df = process_outliers_to_nan(dataframe=df) 66 | df.drop_duplicates("time", inplace=True) 67 | df = perform_interpolation(dataframe=df, job_config=job_config) 68 | print("Performed interpolation ...") 69 | df, scaler_values = perform_scaling(dataframe=df) 70 | print('Performed Scaling ...') 71 | with open(mmscaler_conf_path, "w") as f: 72 | yaml.dump(scaler_values, f) 73 | f.close() 74 | print("Scaler values stored ...") 75 | df.to_csv(op_path, index=False) 76 | print(f"Processed data stored to {op_path}") 77 | 78 | 79 | def main(): 80 | job_config_path = os.path.join(os.getcwd(), "config", "prepare_data.json") 81 | with open(job_config_path, "r") as f: 82 | job_config = json.load(f) 83 | f.close() 84 | print(job_config) 85 | prepare_dataset(job_config=job_config) 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /src/utils/training.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from src.models.lstm_model import LSTM 3 | from src.data.dataset import create_loaders 4 | import matplotlib.pyplot as plt 5 | import torch 6 | 7 | 8 | def init_objects(job_config): 9 | dataframe = pd.read_csv(job_config["dataset"]["path"]) 10 | train_dl, val_dl = create_loaders(dataframe=dataframe, 11 | split_ratio=job_config["dataset"]["split_ratio"], 12 | batch_size=job_config["dataset"]["batch_size"], 13 | seq_length=job_config["dataset"]["seq_length"]) 14 | print("Dataloaders Generated...") 15 | model = LSTM(num_classes=job_config["model"]["num_classes"], 16 | input_size=job_config["model"]["input_size"], 17 | hidden_size=job_config["model"]["hidden_size"], 18 | num_layers=job_config["model"]["num_layers"]) 19 | print("Model Generated...") 20 | print(model) 21 | optim = torch.optim.Adam(params=model.parameters(), 22 | lr=job_config["optimizer"]["lr"]) 23 | print("Optimizer Generated...") 24 | loss_fn = torch.nn.MSELoss() 25 | 26 | return (train_dl, val_dl), model, optim, loss_fn 27 | 28 | 29 | def save_best_model_on_loss(curr_losses, best_losses, model, optim, track_on="validation"): 30 | assert track_on in ["training", "validation"] 31 | curr_train_loss, curr_val_loss = curr_losses 32 | best_train_loss, best_val_loss = best_losses 33 | if track_on == "engine": 34 | flag = save_best_model(curr_loss=curr_train_loss, 35 | best_loss=best_train_loss, 36 | model=model, 37 | optim=optim) 38 | else: 39 | flag = save_best_model(curr_loss=curr_val_loss, 40 | best_loss=best_val_loss, 41 | model=model, 42 | optim=optim) 43 | if flag: 44 | return curr_losses 45 | else: 46 | return best_losses 47 | 48 | 49 | def save_best_model(curr_loss, best_loss, model, optim): 50 | if curr_loss <= best_loss: 51 | weights = { 52 | "model": model.state_dict(), 53 | "optim": optim.state_dict() 54 | } 55 | torch.save(weights, "best_model.pt") 56 | print("Model Updated...") 57 | print(f"Current best loss : {'%.6f'%curr_loss}") 58 | return True 59 | else: 60 | print("Model didn't Updated...") 61 | print(f"Current best loss : {'%.6f' % best_loss}") 62 | return False 63 | 64 | 65 | def save_training_curve(train_dict, val_dict): 66 | for name in val_dict.keys(): 67 | plt.figure(figsize=(20, 6)) 68 | plt.plot(train_dict[name], label=f"train_{name}") 69 | plt.plot(val_dict[name], label=f"train_{name}") 70 | plt.legend() 71 | plt.savefig(f"results/{name}.png") 72 | plt.close() 73 | 74 | 75 | def update_metric_dict(true, pred, metric_dict, res_dict): 76 | if true.device != "cpu": 77 | true = true.cpu() 78 | pred = pred.cpu() 79 | true = true.detach().numpy().reshape(-1) 80 | pred = pred.detach().numpy().reshape(-1) 81 | for name, metric in metric_dict.items(): 82 | res_dict[name].append(metric(true, pred)) 83 | 84 | return res_dict 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /src/engine/pytorch_trainer.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import torch 3 | import numpy as np 4 | from src.utils.training import (init_objects, 5 | save_best_model_on_loss, 6 | save_training_curve, 7 | update_metric_dict) 8 | from src.utils.mlflow_logger import log_pt_models 9 | from src.utils.load_checkpoint import load_best_model 10 | from src.utils.metrics import metrics_ 11 | 12 | 13 | def trainer(model, train_dl, val_dl, loss_fn, optim, job_config): 14 | training_hp = job_config["training_hp"] 15 | epochs = training_hp["num_epochs"] 16 | log_index = training_hp["log_index"] 17 | best_train_loss = torch.inf 18 | best_val_loss = torch.inf 19 | train_loss_array, val_loss_array = [], [] 20 | train_metric_results = {k: [] for k, _ in metrics_.items()} 21 | val_metric_results = {k: [] for k, _ in metrics_.items()} 22 | device = "cuda" if torch.cuda.is_available() else "cpu" 23 | print(f"Device found : {device}") 24 | model.to(device) 25 | print('Model loaded on device...') 26 | for epoch in range(epochs): 27 | print(f"Epoch {epoch + 1} :") 28 | train_epoch_metric_results = {k: [] for k, _ in metrics_.items()} 29 | val_epoch_metric_results = {k: [] for k, _ in metrics_.items()} 30 | train_loss = val_loss = 0.0 31 | model.train() 32 | for index, (x, y) in enumerate(train_dl): 33 | if device != "cpu": 34 | x = x.cuda() 35 | y = y.cuda() 36 | op = model(x, device) 37 | train_epoch_metric_results = update_metric_dict(y, op, metrics_, train_epoch_metric_results) 38 | curr_loss = loss_fn(op, y) 39 | if index % log_index == 0: 40 | print(f"Step {index} Loss : {'%.6f' % curr_loss.item()}") 41 | train_loss += curr_loss.item() 42 | curr_loss.backward() 43 | optim.step() 44 | model.eval() 45 | for x, y in val_dl: 46 | if device != "cpu": 47 | x = x.cuda() 48 | y = y.cuda() 49 | op = model(x, device) 50 | val_epoch_metric_results = update_metric_dict(y, op, metrics_, val_epoch_metric_results) 51 | curr_loss = loss_fn(op, y) 52 | val_loss += curr_loss.item() 53 | print(f"Train Loss : {'%.6f' % train_loss} || Validation Loss : {'%.6f' % val_loss}") 54 | train_res = {k: np.mean(v) for k, v in train_epoch_metric_results.items()} 55 | val_res = {k: sum(v) for k, v in val_epoch_metric_results.items()} 56 | print(f"Train Metric Results : {train_res}") 57 | print(f"Validation Metric Results : {val_res}") 58 | for tot, epo in zip([train_metric_results, val_metric_results], 59 | [train_res, val_res]): 60 | for name, val in epo.items(): 61 | tot[name].append(val) 62 | train_loss_array.append(train_loss) 63 | val_loss_array.append(val_loss) 64 | best_train_loss, best_val_loss = save_best_model_on_loss(curr_losses=(train_loss, val_loss), 65 | best_losses=(best_train_loss, best_val_loss), 66 | model=model, 67 | optim=optim) 68 | print("\n") 69 | results = { 70 | "training_mse": best_train_loss, 71 | "validation_mse": best_val_loss 72 | } 73 | best_model = load_best_model(job_config["model"]) 74 | log_pt_models(model=best_model, 75 | hparams=job_config, 76 | results=results) 77 | train_metric_results["mse"] = train_loss_array 78 | val_metric_results["mse"] = val_loss_array 79 | save_training_curve(train_metric_results, val_metric_results) 80 | print("Training Completed...") 81 | 82 | 83 | def main(): 84 | job_config_path = "config/pt_training.yaml" 85 | with open(job_config_path, "r") as f: 86 | job_config = yaml.safe_load(f) 87 | f.close() 88 | print("Configuration Loaded...") 89 | print(job_config) 90 | (train_dl, val_dl), lstm_model, optim, loss_fn = init_objects(job_config) 91 | trainer(model=lstm_model, 92 | train_dl=train_dl, 93 | val_dl=val_dl, 94 | loss_fn=loss_fn, 95 | optim=optim, 96 | job_config=job_config) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |