├── GP_PV.ipynb ├── LUBE_PV.ipynb ├── NGBoost_PV.ipynb └── heatmap.ipynb /GP_PV.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from matplotlib import pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "import time\n", 14 | "from datetime import datetime\n", 15 | "\n", 16 | "from sklearn import preprocessing\n", 17 | "from sklearn.preprocessing import MinMaxScaler\n", 18 | "from sklearn.preprocessing import StandardScaler\n", 19 | "from sklearn.metrics import mean_squared_error\n", 20 | "from sklearn.metrics import mean_absolute_error\n", 21 | "\n", 22 | "import scipy.stats as stats\n", 23 | "\n", 24 | "import math\n", 25 | "from sklearn.metrics import mean_absolute_error as mae\n", 26 | "from sklearn.metrics import mean_squared_error\n", 27 | "from math import sqrt\n", 28 | "\n", 29 | "import torch\n", 30 | "import gpytorch\n", 31 | "from gpytorch.kernels import RBFKernel as RBF\n", 32 | "from gpytorch.kernels import ScaleKernel as C\n", 33 | "from gpytorch.kernels import PeriodicKernel as Per\n", 34 | "from gpytorch.kernels import RQKernel as RQ\n", 35 | "from gpytorch.kernels import MaternKernel as M\n", 36 | "from gpytorch.kernels import PolynomialKernel\n", 37 | "\n", 38 | "import properscoring as prscore" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## Read and preprocess the dataset" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "df = pd.read_csv('power_weather_data.csv')\n", 55 | "\n", 56 | "# csv file MUST contain 'date' and 'Power' fields\n", 57 | "# optional: weather data" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "df['hour'] = df['date'].apply(lambda x: x.hour )\n", 76 | "df['month'] = df['date'].apply(lambda x: x.month)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "# df['hour_sin'] = np.sin(df['hour'] * 2 * np.pi/24)\n", 86 | "# df['hour_cos'] = np.cos(df['hour'] * 2 * np.pi/24)\n", 87 | "df['month_sin'] = np.sin(df['month'] * 2 * np.pi/12)\n", 88 | "df['month_cos'] = np.cos(df['month'] * 2 * np.pi/12)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "df = df[(df['hour']>=6) & (df['hour']<=21)]" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# df = df.drop(['hour', 'month'], axis=1)\n", 107 | "df = df.drop(['month'], axis=1)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "P = df['Power']\n", 117 | "\n", 118 | "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n", 119 | "PowerData.columns = ['t-45', 't-30', 't-15']\n", 120 | "\n", 121 | "df = pd.concat([df, PowerData.reindex(df.index)], axis=1)\n", 122 | " \n", 123 | "df = df.fillna(0)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## Hyperparameters" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "weeks = [['2018-03-01', '2019-03-15']]\n", 140 | "\n", 141 | "val_days = 14\n", 142 | "\n", 143 | "# n_points_day = 4 * 24\n", 144 | "n_points_day = 4 * 16" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## Set the dataframes" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "dfs = []\n", 161 | "\n", 162 | "for w in weeks:\n", 163 | " \n", 164 | " w_start = datetime.strptime(w[0]+\" 00:00\", '%Y-%m-%d %H:%M')\n", 165 | " w_end = datetime.strptime(w[1]+\" 23:59\", '%Y-%m-%d %H:%M')\n", 166 | " \n", 167 | " dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])\n", 168 | " \n", 169 | "n_sets = len(dfs)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## Train Test Split" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "X_train_ = []\n", 186 | "X_test_ = []\n", 187 | "y_train_ = []\n", 188 | "y_test = []\n", 189 | "\n", 190 | "x_scaler = []\n", 191 | "y_scaler = []\n", 192 | "\n", 193 | "t_train = []\n", 194 | "t_test = []\n", 195 | "\n", 196 | "for i in range(len(dfs)):\n", 197 | "\n", 198 | " train = dfs[i][:int(-n_points_day*val_days)]\n", 199 | " test = dfs[i][int(-n_points_day*val_days):]\n", 200 | " \n", 201 | " X_tr = train.drop(['Power','date'], axis=1).values\n", 202 | " X_t = test.drop(['Power','date'], axis=1).values\n", 203 | " \n", 204 | " y_tr = train['Power'].values\n", 205 | " y_t = test['Power'].values\n", 206 | " \n", 207 | " x_sc = MinMaxScaler()\n", 208 | " y_sc = MinMaxScaler(feature_range=(-1,1))\n", 209 | "# x_sc = StandardScaler()\n", 210 | "# y_sc = StandardScaler()\n", 211 | " x_sc.fit(X_tr)\n", 212 | " y_sc.fit(y_tr.reshape(-1, 1))\n", 213 | " x_scaler.append(x_sc)\n", 214 | " y_scaler.append(y_sc)\n", 215 | " \n", 216 | " X_train_.append(x_sc.transform(X_tr))\n", 217 | " X_test_.append(x_sc.transform(X_t))\n", 218 | " y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)))\n", 219 | " y_test.append(y_t)\n", 220 | " \n", 221 | " t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['date'].values)\n", 222 | " t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['date'].values)\n", 223 | " " 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "X_train = []\n", 233 | "X_test = []\n", 234 | "y_train = []\n", 235 | "\n", 236 | "for i in range(len(dfs)):\n", 237 | " X_train.append(torch.from_numpy(X_train_[i]))\n", 238 | " X_test.append(torch.from_numpy(X_test_[i]))\n", 239 | " \n", 240 | " y_tr = torch.from_numpy(y_train_[i])\n", 241 | " y_train.append(torch.flatten(y_tr))" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "## GP Model" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "class ExactGPModel(gpytorch.models.ExactGP):\n", 258 | " def __init__(self, X_train, y_train, likelihood):\n", 259 | " super(ExactGPModel, self).__init__(X_train, y_train, likelihood)\n", 260 | " self.mean_module = gpytorch.means.ConstantMean()\n", 261 | " self.covar_module = C(RQ()) \n", 262 | "\n", 263 | " def forward(self, x):\n", 264 | " mean_x = self.mean_module(x)\n", 265 | " covar_x = self.covar_module(x)\n", 266 | " return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "scrolled": true 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "training_iter = 1000\n", 278 | "train_loss = []\n", 279 | "\n", 280 | "models = []\n", 281 | "likelihoods = []\n", 282 | "\n", 283 | "start = time.time()\n", 284 | "\n", 285 | "for i in range(len(dfs)):\n", 286 | " \n", 287 | " print(i)\n", 288 | " X_tr = X_train[i]\n", 289 | " y_tr = y_train[i]\n", 290 | " \n", 291 | " likelihood = gpytorch.likelihoods.GaussianLikelihood()\n", 292 | " model = ExactGPModel(X_tr, y_tr, likelihood)\n", 293 | "\n", 294 | " model = model.double()\n", 295 | " likelihood = likelihood.double()\n", 296 | "\n", 297 | " # Find optimal model hyperparameters\n", 298 | " model.train()\n", 299 | " likelihood.train()\n", 300 | "\n", 301 | " # Use the adam optimizer\n", 302 | " optimizer = torch.optim.Adam([{'params': model.parameters()}], lr=0.07) \n", 303 | "\n", 304 | " # Loss for GPs - the marginal log likelihood\n", 305 | " mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)\n", 306 | "\n", 307 | " ite = []\n", 308 | " loss_all = []\n", 309 | " \n", 310 | " for j in range(training_iter):\n", 311 | " # Zero gradients from previous iteration\n", 312 | " optimizer.zero_grad()\n", 313 | " # Output from model\n", 314 | " output = model(X_tr)\n", 315 | " # Calculate loss and backprop gradients\n", 316 | " loss = -mll(output, y_tr)\n", 317 | " loss.backward()\n", 318 | "\n", 319 | " optimizer.step()\n", 320 | " ite = np.append(ite, j)\n", 321 | " loss_all = np.append(loss_all, loss.detach().numpy())\n", 322 | " \n", 323 | " \n", 324 | " train_loss.append(loss_all)\n", 325 | " models.append(model)\n", 326 | " likelihoods.append(likelihood)\n", 327 | "\n", 328 | " \n", 329 | "end = time.time()\n", 330 | "print((end - start)/len(dfs))" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "## Evaluation" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "def PICP_func(y, lower, upper):\n", 347 | " sum_points = 0\n", 348 | " for i, yi in enumerate(y):\n", 349 | " if lower[i] <= yi <= upper[i]:\n", 350 | " sum_points += 1\n", 351 | " \n", 352 | " return sum_points / len(y)\n", 353 | "\n", 354 | "def PINAW_func(y, lower, upper):\n", 355 | " PIAW = np.mean(upper - lower)\n", 356 | " R = np.max(y) - np.min(y)\n", 357 | " PINAW = PIAW / R\n", 358 | " \n", 359 | " return PINAW" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [ 368 | "for i in range(len(dfs)):\n", 369 | " \n", 370 | " print(i)\n", 371 | " \n", 372 | " # Unpacking\n", 373 | " model = models[i]\n", 374 | " likelihood = likelihoods[i]\n", 375 | " X_t = X_test[i]\n", 376 | " y_t = y_test[i]\n", 377 | " x_sc = x_scaler[i]\n", 378 | " y_sc = y_scaler[i]\n", 379 | " \n", 380 | " \n", 381 | " model.eval()\n", 382 | " likelihood.eval()\n", 383 | " \n", 384 | " # For multi-step ahead prediction\n", 385 | " y_45 = model(X_t[0].unsqueeze(0)).mean\n", 386 | " y_30 = model(X_t[1].unsqueeze(0)).mean\n", 387 | " y_15 = model(X_t[2].unsqueeze(0)).mean\n", 388 | " for j in range(3, X_t.shape[0]):\n", 389 | " X_t[j][-3] = y_45\n", 390 | " X_t[j][-2] = y_30\n", 391 | " X_t[j][-1] = y_15\n", 392 | " y_pred_j = model(X_t[j].unsqueeze(0))\n", 393 | " y_45 = y_30\n", 394 | " y_30 = y_15\n", 395 | " y_15 = y_pred_j.mean\n", 396 | " # end of multi-step ahead\n", 397 | " \n", 398 | " y_pred_i = model(X_t)\n", 399 | " f_pred_i = likelihood(model(X_t))\n", 400 | " \n", 401 | " y_pred = y_pred_i.mean\n", 402 | " y_var = y_pred_i.variance\n", 403 | " y_covar = y_pred_i.covariance_matrix\n", 404 | " \n", 405 | " y_pred = y_pred.detach().numpy()\n", 406 | " \n", 407 | " real_y_pred = y_sc.inverse_transform(y_pred.reshape(-1, 1))\n", 408 | " \n", 409 | " real_y_pred = real_y_pred.flatten()\n", 410 | " real_y_test = y_t.flatten()\n", 411 | " \n", 412 | " lower, upper = f_pred_i.confidence_region()\n", 413 | " \n", 414 | " lower = lower.detach().numpy()\n", 415 | " upper = upper.detach().numpy()\n", 416 | " \n", 417 | " lower = y_sc.inverse_transform(lower.reshape(-1, 1))\n", 418 | " upper = y_sc.inverse_transform(upper.reshape(-1, 1))\n", 419 | " \n", 420 | " lower = lower.flatten()\n", 421 | " upper = upper.flatten()\n", 422 | " \n", 423 | " mean = (upper+lower)/2\n", 424 | " std = (mean - lower)/1.96\n", 425 | " \n", 426 | " # Deterministic metrics\n", 427 | " MAE = mean_absolute_error(real_y_test, mean)\n", 428 | " RMSE = mean_squared_error(real_y_test, mean, squared=False)\n", 429 | " MBE = np.mean(mean - real_y_test)\n", 430 | " print(f'MAE: {MAE:.3f}')\n", 431 | " print(f'RMSE: {RMSE:.3f}')\n", 432 | " print(f'MBE: {MBE:.3f}')\n", 433 | " \n", 434 | " # Probabilistic metrics\n", 435 | " PICP = PICP_func(real_y_test, lower, upper)\n", 436 | " PINAW = PINAW_func(real_y_test, lower, upper)\n", 437 | " C = prscore.crps_gaussian(real_y_test, mu=mean, sig=std)\n", 438 | " CRPS = C.mean()\n", 439 | " print(f'PICP: {PICP:.3f}')\n", 440 | " print(f'PINAW: {PINAW:.3f}')\n", 441 | " print(f'CRPS: {CRPS:.3f}')\n", 442 | " print('\\n')" 443 | ] 444 | } 445 | ], 446 | "metadata": { 447 | "kernelspec": { 448 | "display_name": "Python 3", 449 | "language": "python", 450 | "name": "python3" 451 | }, 452 | "language_info": { 453 | "codemirror_mode": { 454 | "name": "ipython", 455 | "version": 3 456 | }, 457 | "file_extension": ".py", 458 | "mimetype": "text/x-python", 459 | "name": "python", 460 | "nbconvert_exporter": "python", 461 | "pygments_lexer": "ipython3", 462 | "version": "3.7.7" 463 | } 464 | }, 465 | "nbformat": 4, 466 | "nbformat_minor": 4 467 | } 468 | -------------------------------------------------------------------------------- /LUBE_PV.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from matplotlib import pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "import time\n", 14 | "from datetime import datetime\n", 15 | "\n", 16 | "from sklearn.preprocessing import MinMaxScaler\n", 17 | "from sklearn.metrics import mean_squared_error\n", 18 | "from sklearn.metrics import mean_absolute_error\n", 19 | "\n", 20 | "import scipy.stats as stats\n", 21 | "\n", 22 | "from sklearn.metrics import mean_absolute_error as mae\n", 23 | "from math import sqrt\n", 24 | "\n", 25 | "import torch\n", 26 | "from torch import nn, optim\n", 27 | "import torch.optim as optim\n", 28 | "import torch.nn.functional as F\n", 29 | "from torch.optim.optimizer import Optimizer\n", 30 | "\n", 31 | "import time\n", 32 | "\n", 33 | "import properscoring as prscore\n", 34 | "\n", 35 | "import math\n", 36 | "from torch.autograd import Variable" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Read and preprocess the dataset" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "df = pd.read_csv('power_weather_data.csv')\n", 53 | "\n", 54 | "# csv file MUST contain 'date' and 'Power' fields\n", 55 | "# optional: weather data" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "df['hour'] = df['date'].apply(lambda x: x.hour )\n", 74 | "df['month'] = df['date'].apply(lambda x: x.month)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# df['hour_sin'] = np.sin(df['hour'] * 2 * np.pi/24)\n", 84 | "# df['hour_cos'] = np.cos(df['hour'] * 2 * np.pi/24)\n", 85 | "df['month_sin'] = np.sin(df['month'] * 2 * np.pi/12)\n", 86 | "df['month_cos'] = np.cos(df['month'] * 2 * np.pi/12)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "df = df[(df['hour']>=6) & (df['hour']<=21)]" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# df = df.drop(['hour', 'month'], axis=1)\n", 105 | "df = df.drop(['month'], axis=1)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "P = df['Power']\n", 115 | "\n", 116 | "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n", 117 | "PowerData.columns = ['t-45', 't-30', 't-15']\n", 118 | "\n", 119 | "df = pd.concat([df, PowerData.reindex(df.index)], axis=1)\n", 120 | " \n", 121 | "df = df.fillna(0)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## Hyperparameters" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "weeks = [['2018-03-01', '2019-03-15']]\n", 138 | "\n", 139 | "val_days = 14\n", 140 | "\n", 141 | "# n_points_day = 4 * 24\n", 142 | "n_points_day = 4 * 16" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "## Set the dataframes" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "dfs = []\n", 159 | "\n", 160 | "for w in weeks:\n", 161 | " \n", 162 | " w_start = datetime.strptime(w[0]+\" 00:00\", '%Y-%m-%d %H:%M')\n", 163 | " w_end = datetime.strptime(w[1]+\" 23:59\", '%Y-%m-%d %H:%M')\n", 164 | " \n", 165 | " dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])\n", 166 | " \n", 167 | "n_sets = len(dfs)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## Train Test Split" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "X_train_ = []\n", 184 | "X_test_ = []\n", 185 | "y_train_ = []\n", 186 | "y_test_ = []\n", 187 | "\n", 188 | "x_scaler = []\n", 189 | "y_scaler = []\n", 190 | "\n", 191 | "t_train = []\n", 192 | "t_test = []\n", 193 | "\n", 194 | "for i in range(len(dfs)):\n", 195 | "\n", 196 | " train = dfs[i][:int(-n_points_day*val_days)]\n", 197 | " test = dfs[i][int(-n_points_day*val_days):]\n", 198 | " \n", 199 | " X_tr = train.drop(['Power','date'], axis=1).values\n", 200 | " X_t = test.drop(['Power','date'], axis=1).values\n", 201 | " \n", 202 | " y_tr = train['Power'].values\n", 203 | " y_t = test['Power'].values\n", 204 | " \n", 205 | " x_sc = MinMaxScaler()\n", 206 | " y_sc = MinMaxScaler()\n", 207 | "# x_sc = StandardScaler()\n", 208 | "# y_sc = StandardScaler()\n", 209 | " x_sc.fit(X_tr)\n", 210 | " y_sc.fit(y_tr.reshape(-1, 1))\n", 211 | " x_scaler.append(x_sc)\n", 212 | " y_scaler.append(y_sc)\n", 213 | " \n", 214 | " X_train_.append(x_sc.transform(X_tr))\n", 215 | " X_test_.append(x_sc.transform(X_t))\n", 216 | " y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)) + 0.001)\n", 217 | " y_test_.append(y_sc.transform(y_t.reshape(-1, 1)) + 0.001)\n", 218 | " \n", 219 | " t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['date'].values)\n", 220 | " t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['date'].values)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "X_train = []\n", 230 | "X_test = []\n", 231 | "y_train = []\n", 232 | "y_test = []\n", 233 | "\n", 234 | "for i in range(len(dfs)):\n", 235 | " X_train.append(torch.from_numpy(X_train_[i]).float())\n", 236 | " X_test.append(torch.from_numpy(X_test_[i]).float())\n", 237 | " \n", 238 | " y_tr = torch.from_numpy(y_train_[i]).float()\n", 239 | " y_train.append(torch.squeeze(y_tr))\n", 240 | " y_t = torch.from_numpy(y_test_[i]).float()\n", 241 | " y_test.append(torch.squeeze(y_t))" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "## LUBE" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "import torch.nn as nn\n", 258 | "import torch.nn.functional as F\n", 259 | "\n", 260 | "n_neurons = 50\n", 261 | "eta = 50\n", 262 | "\n", 263 | "class Net(nn.Module):\n", 264 | " def __init__(self, n_features):\n", 265 | " super(Net, self).__init__()\n", 266 | " self.fc1 = nn.Linear(n_features, n_neurons)\n", 267 | " self.fc2 = nn.Linear(n_neurons, 2)\n", 268 | " def forward(self, x):\n", 269 | " x = F.relu(self.fc1(x)) #\n", 270 | " return torch.sigmoid(self.fc2(x)) \n" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "def CWC(y_pred, y_true):\n", 280 | " \n", 281 | " y_pred = Variable(y_pred, requires_grad=True).to(device)\n", 282 | " y_true = Variable(y_true, requires_grad=True).to(device)\n", 283 | " \n", 284 | " u = y_pred.detach().numpy().T[0]\n", 285 | " l = y_pred.detach().numpy().T[1]\n", 286 | " \n", 287 | " u = torch.squeeze(torch.from_numpy(u).float())\n", 288 | " l = torch.squeeze(torch.from_numpy(l).float())\n", 289 | " \n", 290 | " sum = 0\n", 291 | " W = []\n", 292 | " for i in range(len(y_pred)):\n", 293 | " \n", 294 | " Wi = torch.abs(u[i]-l[i]) #)**2 \n", 295 | " W.append(Wi)\n", 296 | " \n", 297 | " if l[i] < y_true[i] < u[i]:\n", 298 | " sum += 1\n", 299 | " \n", 300 | " #calculate PICP: PI coverage probability\n", 301 | " PICP = sum/len(y_true)\n", 302 | " \n", 303 | " #calculate MPIW\n", 304 | " W = np.array(W)\n", 305 | " W = torch.from_numpy(W).float()\n", 306 | " MPIW = torch.sqrt(torch.mean(W))\n", 307 | " \n", 308 | " R = torch.max(y_true)-torch.min(y_true)\n", 309 | "\n", 310 | " return ((MPIW)/R)*(1+1*math.exp(-eta*(PICP-0.95)))\n", 311 | "\n" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "class UniformSampler(object):\n", 321 | " def __init__(self, minval, maxval, dtype='float', cuda=False):\n", 322 | " self.minval = minval\n", 323 | " self.maxval = maxval\n", 324 | " self.cuda = cuda\n", 325 | " self.dtype_str = dtype\n", 326 | " dtypes = {\n", 327 | " 'float': torch.cuda.FloatTensor if cuda else torch.FloatTensor,\n", 328 | " 'int': torch.cuda.IntTensor if cuda else torch.IntTensor,\n", 329 | " 'long': torch.cuda.LongTensor if cuda else torch.LongTensor\n", 330 | " }\n", 331 | " self.dtype = dtypes[dtype]\n", 332 | "\n", 333 | " def sample(self, size):\n", 334 | " if self.dtype_str == 'float':\n", 335 | " return self.dtype(*size).uniform_(\n", 336 | " self.minval, self.maxval\n", 337 | " )\n", 338 | " elif self.dtype_str == 'int' or self.dtype_str == 'long':\n", 339 | " return self.dtype(*size).random_(\n", 340 | " self.minval, self.maxval + 1\n", 341 | " )\n", 342 | " else:\n", 343 | " raise Exception(\"unknown dtype\")\n", 344 | "\n", 345 | "\n", 346 | "class GaussianSampler(object):\n", 347 | " def __init__(self, mu, sigma, dtype='float', cuda=False):\n", 348 | " self.sigma = sigma\n", 349 | " self.mu = mu\n", 350 | " self.cuda = cuda\n", 351 | " self.dtype_str = dtype\n", 352 | " dtypes = {\n", 353 | " 'float': torch.cuda.FloatTensor if cuda else torch.FloatTensor,\n", 354 | " 'int': torch.cuda.IntTensor if cuda else torch.IntTensor,\n", 355 | " 'long': torch.cuda.LongTensor if cuda else torch.LongTensor\n", 356 | " }\n", 357 | " self.dtype = dtypes[dtype]\n", 358 | "\n", 359 | " def sample(self, size):\n", 360 | " ''' pytorch doesnt support int or long normal distrs\n", 361 | " so we will resolve to casting '''\n", 362 | " rand_float = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor\n", 363 | " rand_block = rand_float(*size).normal_(self.mu, self.sigma)\n", 364 | "\n", 365 | " if self.dtype_str == 'int' or self.dtype_str == 'long':\n", 366 | " rand_block = rand_block.type(self.dtype)\n", 367 | "\n", 368 | " return rand_block\n", 369 | "\n", 370 | "\n", 371 | "class SimulatedAnnealing(Optimizer):\n", 372 | " def __init__(self, params, sampler, tau0=5.0, anneal_rate=0.0003,\n", 373 | " min_temp=1e-5, anneal_every=10, hard=True, hard_rate=0.95):\n", 374 | " defaults = dict(sampler=sampler, tau0=tau0, tau=tau0, anneal_rate=anneal_rate,\n", 375 | " min_temp=min_temp, anneal_every=anneal_every,\n", 376 | " hard=hard, hard_rate=hard_rate, iteration=0)\n", 377 | " super(SimulatedAnnealing, self).__init__(params, defaults)\n", 378 | "\n", 379 | "\n", 380 | " def step(self, closure=None):\n", 381 | " \"\"\"Performs a single optimization step.\n", 382 | " Arguments:\n", 383 | " closure (callable, optional): A closure that reevaluates the model\n", 384 | " and returns the loss.\n", 385 | " \"\"\"\n", 386 | " if closure is None:\n", 387 | " raise Exception(\"loss closure is required to do SA\")\n", 388 | "\n", 389 | " loss = closure()\n", 390 | "\n", 391 | " for group in self.param_groups:\n", 392 | " # the sampler samples randomness\n", 393 | " # that is used in optimizations\n", 394 | " sampler = group['sampler']\n", 395 | "\n", 396 | " # clone all of the params to keep in case we need to swap back\n", 397 | " cloned_params = [p.clone() for p in group['params']]\n", 398 | "\n", 399 | " for p in group['params']:\n", 400 | " # anneal tau if it matches the requirements\n", 401 | " if group['iteration'] > 0 and group['iteration'] % group['anneal_every'] == 0:\n", 402 | " if not group['hard']:\n", 403 | " # smoother annealing: consider using this over hard annealing\n", 404 | " rate = -group['anneal_rate'] * group['iteration']\n", 405 | " group['tau'] = np.maximum(group['tau0'] * np.exp(rate),\n", 406 | " group['min_temp'])\n", 407 | " else:\n", 408 | " # hard annealing\n", 409 | " group['tau'] = np.maximum(group['hard_rate'] * group['tau'],\n", 410 | " group['min_temp'])\n", 411 | "\n", 412 | " random_perturbation = group['sampler'].sample(p.data.size())\n", 413 | " p.data = p.data / torch.norm(p.data)\n", 414 | " p.data.add_(random_perturbation)\n", 415 | " group['iteration'] += 1\n", 416 | "\n", 417 | " # re-evaluate the loss function with the perturbed params\n", 418 | " # if we didn't accept the new params, then swap back and return\n", 419 | " loss_perturbed = closure()\n", 420 | " final_loss, is_swapped_back = self.anneal(loss, loss_perturbed, group['tau'])\n", 421 | " if is_swapped_back:\n", 422 | " for p, pbkp in zip(group['params'], cloned_params):\n", 423 | " p.data = pbkp.data\n", 424 | "\n", 425 | " return final_loss \n", 426 | "\n", 427 | "\n", 428 | " def anneal(self, loss, loss_perturbed, tau):\n", 429 | " '''returns loss, is_new_loss'''\n", 430 | " def acceptance_prob(old, new, temp):\n", 431 | " return torch.exp((old - new)/(temp))\n", 432 | "\n", 433 | " if loss_perturbed.data < loss.data:\n", 434 | "# print(\"old = \", loss.data, \"| pert = \", loss_perturbed.data, \" | tau = \", tau)\n", 435 | " return loss_perturbed, False\n", 436 | " else:\n", 437 | " # evaluate the metropolis criterion\n", 438 | " ap = acceptance_prob(loss, loss_perturbed, tau)\n", 439 | " random = np.random.rand()\n", 440 | " print(\"old = \", loss.data, \"| new = \", loss_perturbed.data,\n", 441 | " \" | ap = \", ap.data, \" | tau = \", tau, \" | r = \", random)\n", 442 | " \n", 443 | " if ap.data > random:\n", 444 | " return loss_perturbed, False\n", 445 | "\n", 446 | " return loss, True\n", 447 | " " 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "def closure():\n", 457 | " y_pred_train = net(X_train_i)\n", 458 | " loss = CWC(y_pred_train, y_train_i)\n", 459 | " return loss\n", 460 | "\n", 461 | "t_loss = []\n", 462 | "nets = []\n", 463 | "\n", 464 | "start = time.time()\n", 465 | "\n", 466 | "for i in range(len(dfs)):\n", 467 | "\n", 468 | " net = Net(X_train[i].shape[1])\n", 469 | "\n", 470 | " sampler = GaussianSampler(mu=0, sigma=1) #sampler = UniformSampler(minval=-0.5, maxval=0.5)\n", 471 | " optimizer = SimulatedAnnealing(net.parameters(), sampler=sampler)\n", 472 | " \n", 473 | " device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 474 | " X_train_i = X_train[i].to(device)\n", 475 | " y_train_i = y_train[i].to(device)\n", 476 | " X_test_i = X_test[i].to(device)\n", 477 | " y_test_i = y_test[i].to(device)\n", 478 | " net = net.to(device)\n", 479 | " \n", 480 | " ite = []\n", 481 | " loss_all = []\n", 482 | " \n", 483 | " for epoch in range(1000):\n", 484 | " y_pred_train = net(X_train_i)\n", 485 | " y_pred_train = torch.squeeze(y_pred_train)\n", 486 | " train_loss = CWC(y_pred_train, y_train_i)\n", 487 | " train_loss = train_loss.to(device)\n", 488 | "\n", 489 | "\n", 490 | " ite = np.append(ite, epoch)\n", 491 | " loss_all = np.append(loss_all, train_loss.detach().numpy()) \n", 492 | "\n", 493 | " optimizer.zero_grad()\n", 494 | " train_loss.backward()\n", 495 | " optimizer.step(closure)\n", 496 | " \n", 497 | " \n", 498 | " t_loss.append(loss_all)\n", 499 | " nets.append(net)\n", 500 | " \n", 501 | " \n", 502 | "end = time.time()\n", 503 | "print((end - start)/len(dfs))" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "metadata": {}, 509 | "source": [ 510 | "## Evaluation" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": null, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [ 519 | "def PICP_func(y, lower, upper):\n", 520 | " sum_points = 0\n", 521 | " for i, yi in enumerate(y):\n", 522 | " if lower[i] <= yi <= upper[i]:\n", 523 | " sum_points += 1\n", 524 | " \n", 525 | " return sum_points / len(y)\n", 526 | "\n", 527 | "def PINAW_func(y, lower, upper):\n", 528 | " PIAW = np.mean(upper - lower)\n", 529 | " R = np.max(y) - np.min(y)\n", 530 | " PINAW = PIAW / R\n", 531 | " \n", 532 | " return PINAW" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": null, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "for i in range(len(dfs)):\n", 542 | " \n", 543 | " net = nets[i]\n", 544 | " \n", 545 | " y_pred_train = net(X_train[i])\n", 546 | " y_pred_train= y_pred_train.detach().numpy()\n", 547 | " \n", 548 | " y_train_i = y_train[i].cpu()\n", 549 | " y_train_i = y_train_i.detach().numpy()\n", 550 | " \n", 551 | " \n", 552 | " # For multi-step ahead prediction\n", 553 | " y_45_ = net(X_test[i][0].unsqueeze(0)).detach().numpy()\n", 554 | " y_45 = ((y_45_.T[0] + y_45_.T[1]) / 2)[0]\n", 555 | " y_30_ = net(X_test[i][1].unsqueeze(0)).detach().numpy()\n", 556 | " y_30 = ((y_30_.T[0] + y_30_.T[1]) / 2)[0]\n", 557 | " y_15_ = net(X_test[i][2].unsqueeze(0)).detach().numpy()\n", 558 | " y_15 = ((y_15_.T[0] + y_15_.T[1]) / 2)[0]\n", 559 | " for j in range(3, X_test[i].shape[0]):\n", 560 | " X_test[i][j][-3] = torch.tensor(y_45)\n", 561 | " X_test[i][j][-2] = torch.tensor(y_30)\n", 562 | " X_test[i][j][-1] = torch.tensor(y_15)\n", 563 | " y_pred_j_ = net(X_test[i][j].unsqueeze(0)).detach().numpy()\n", 564 | " y_pred_j = ((y_pred_j_.T[0] + y_pred_j_.T[1]) / 2)[0]\n", 565 | " y_45 = y_30\n", 566 | " y_30 = y_15\n", 567 | " y_15 = y_pred_j\n", 568 | " # end of multi-step ahead\n", 569 | " \n", 570 | " y_pred_test = net(X_test[i])\n", 571 | " y_pred_test= y_pred_test.detach().numpy()\n", 572 | " y_test_i = y_test[i].cpu()\n", 573 | " y_test_i = y_test_i.detach().numpy()\n", 574 | " \n", 575 | " upper_train = y_pred_train.T[0]\n", 576 | " lower_train = y_pred_train.T[1]\n", 577 | " \n", 578 | " upper = y_pred_test.T[0]\n", 579 | " lower = y_pred_test.T[1]\n", 580 | " \n", 581 | " real_y_train = y_scaler[i].inverse_transform(y_train_i.reshape(-1, 1))\n", 582 | " real_y_test = y_scaler[i].inverse_transform(y_test_i.reshape(-1, 1))\n", 583 | " \n", 584 | " upper_train = y_scaler[i].inverse_transform(upper_train.reshape(-1, 1))\n", 585 | " lower_train = y_scaler[i].inverse_transform(lower_train.reshape(-1, 1))\n", 586 | " \n", 587 | " upper = y_scaler[i].inverse_transform(upper.reshape(-1, 1))\n", 588 | " lower = y_scaler[i].inverse_transform(lower.reshape(-1, 1))\n", 589 | " \n", 590 | " real_y_test = real_y_test.flatten()\n", 591 | " real_y_train = real_y_train.flatten()\n", 592 | " \n", 593 | " lower_train = lower_train.flatten()\n", 594 | " upper_train = upper_train.flatten()\n", 595 | " \n", 596 | " lower = lower.flatten()\n", 597 | " upper = upper.flatten()\n", 598 | " \n", 599 | " for j in range(len(lower)):\n", 600 | " if lower[j]<10e-6:\n", 601 | " lower[j]=0\n", 602 | " \n", 603 | " mean = (upper+lower)/2\n", 604 | " std = (mean - lower)/1.96\n", 605 | " \n", 606 | " # Deterministic metrics\n", 607 | " MAE = mean_absolute_error(real_y_test, mean)\n", 608 | " RMSE = mean_squared_error(real_y_test, mean, squared=False)\n", 609 | " MBE = np.mean(mean - real_y_test)\n", 610 | " print(f'MAE: {MAE:.3f}')\n", 611 | " print(f'RMSE: {RMSE:.3f}')\n", 612 | " print(f'MBE: {MBE:.3f}')\n", 613 | " \n", 614 | " # Probabilistic metrics\n", 615 | " PICP = PICP_func(real_y_test, lower, upper)\n", 616 | " PINAW = PINAW_func(real_y_test, lower, upper)\n", 617 | " C = prscore.crps_gaussian(real_y_test, mu=mean, sig=std)\n", 618 | " CRPS = C.mean()\n", 619 | " print(f'PICP: {PICP:.3f}')\n", 620 | " print(f'PINAW: {PINAW:.3f}')\n", 621 | " print(f'CRPS: {CRPS:.3f}')\n", 622 | " print('\\n')" 623 | ] 624 | } 625 | ], 626 | "metadata": { 627 | "kernelspec": { 628 | "display_name": "Python 3", 629 | "language": "python", 630 | "name": "python3" 631 | }, 632 | "language_info": { 633 | "codemirror_mode": { 634 | "name": "ipython", 635 | "version": 3 636 | }, 637 | "file_extension": ".py", 638 | "mimetype": "text/x-python", 639 | "name": "python", 640 | "nbconvert_exporter": "python", 641 | "pygments_lexer": "ipython3", 642 | "version": "3.7.7" 643 | } 644 | }, 645 | "nbformat": 4, 646 | "nbformat_minor": 4 647 | } 648 | -------------------------------------------------------------------------------- /NGBoost_PV.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from matplotlib import pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "import time\n", 14 | "from datetime import datetime\n", 15 | "\n", 16 | "from sklearn.preprocessing import MinMaxScaler\n", 17 | "from sklearn.metrics import mean_squared_error\n", 18 | "from sklearn.metrics import mean_absolute_error\n", 19 | "\n", 20 | "from sklearn.tree import DecisionTreeRegressor\n", 21 | "\n", 22 | "from ngboost import NGBRegressor\n", 23 | "\n", 24 | "import properscoring as prscore\n", 25 | "\n", 26 | "import pickle\n", 27 | "from pathlib import Path\n", 28 | "import os" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Read and preprocess the dataset" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "df = pd.read_csv('power_weather_data.csv')\n", 45 | "\n", 46 | "# csv file MUST contain 'date' and 'Power' fields\n", 47 | "# optional: weather data" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "df['hour'] = df['date'].apply(lambda x: x.hour )\n", 66 | "df['month'] = df['date'].apply(lambda x: x.month)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# df['hour_sin'] = np.sin(df['hour'] * 2 * np.pi/24)\n", 76 | "# df['hour_cos'] = np.cos(df['hour'] * 2 * np.pi/24)\n", 77 | "df['month_sin'] = np.sin(df['month'] * 2 * np.pi/12)\n", 78 | "df['month_cos'] = np.cos(df['month'] * 2 * np.pi/12)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "df = df[(df['hour']>=6) & (df['hour']<=21)]" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "# df = df.drop(['hour', 'month'], axis=1)\n", 97 | "df = df.drop(['month'], axis=1)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "P = df['Power']\n", 107 | "\n", 108 | "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n", 109 | "PowerData.columns = ['t-45', 't-30', 't-15']\n", 110 | "\n", 111 | "df = pd.concat([df, PowerData.reindex(df.index)], axis=1)\n", 112 | " \n", 113 | "df = df.fillna(0)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Hyperparameters" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "weeks = [['2018-03-01', '2019-03-15']]\n", 130 | "\n", 131 | "val_days = 14\n", 132 | "\n", 133 | "# n_points_day = 4 * 24\n", 134 | "n_points_day = 4 * 16" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## Set the dataframes" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "dfs = []\n", 151 | "\n", 152 | "for w in weeks:\n", 153 | " \n", 154 | " w_start = datetime.strptime(w[0]+\" 00:00\", '%Y-%m-%d %H:%M')\n", 155 | " w_end = datetime.strptime(w[1]+\" 23:59\", '%Y-%m-%d %H:%M')\n", 156 | " \n", 157 | " dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])\n", 158 | " \n", 159 | "n_sets = len(dfs)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Train Test Split" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "X_train_ = []\n", 176 | "X_test_ = []\n", 177 | "y_train_ = []\n", 178 | "y_test_ = []\n", 179 | "\n", 180 | "x_scaler = []\n", 181 | "y_scaler = []\n", 182 | "\n", 183 | "t_train = []\n", 184 | "t_test = []\n", 185 | "\n", 186 | "for i in range(n_sets):\n", 187 | "\n", 188 | " train = dfs[i][:int(-n_points_day*val_days)]\n", 189 | " test = dfs[i][int(-n_points_day*val_days):]\n", 190 | " \n", 191 | " X_tr = train.drop(['Power','date'], axis=1).values\n", 192 | " X_t = test.drop(['Power','date'], axis=1).values\n", 193 | " \n", 194 | " y_tr = train['Power'].values\n", 195 | " y_t = test['Power'].values\n", 196 | " \n", 197 | " x_sc = MinMaxScaler()\n", 198 | " y_sc = MinMaxScaler()\n", 199 | "# x_sc = StandardScaler()\n", 200 | "# y_sc = StandardScaler()\n", 201 | " x_sc.fit(X_tr)\n", 202 | " y_sc.fit(y_tr.reshape(-1, 1)) #reshape only because fit needs a 2d array\n", 203 | " x_scaler.append(x_sc)\n", 204 | " y_scaler.append(y_sc)\n", 205 | " \n", 206 | " X_train_.append(x_sc.transform(X_tr))\n", 207 | " X_test_.append(x_sc.transform(X_t))\n", 208 | " y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)))\n", 209 | " y_test_.append(y_sc.transform(y_t.reshape(-1, 1)))\n", 210 | " \n", 211 | " t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['date'].values)\n", 212 | " t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['date'].values)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "X_train = X_train_\n", 229 | "X_test = X_test_\n", 230 | "y_train = y_train_\n", 231 | "y_test = y_test_" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "## NGBoost" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "tree_learner = DecisionTreeRegressor(\n", 248 | " criterion=\"friedman_mse\",\n", 249 | " min_samples_split=2,\n", 250 | " min_samples_leaf=1,\n", 251 | " min_weight_fraction_leaf=0.0,\n", 252 | " max_depth=3,\n", 253 | " splitter=\"best\",\n", 254 | " random_state=None,\n", 255 | ")" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "scrolled": true 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "ngbs = []\n", 267 | "\n", 268 | "start = time.time()\n", 269 | "\n", 270 | "for i in range(n_sets):\n", 271 | " \n", 272 | " X_train_i = X_train[i]\n", 273 | " y_train_i = y_train[i]\n", 274 | "\n", 275 | " ngb = NGBRegressor(Base=tree_learner, n_estimators=1000).fit(X_train_i, y_train_i.ravel())\n", 276 | " \n", 277 | " ngbs.append(ngb)\n", 278 | "\n", 279 | "end = time.time()\n", 280 | "print((end - start)/n_sets)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "## Evaluation" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "y = []\n", 297 | "y_hat = []\n", 298 | "upper_hat = []\n", 299 | "lower_hat = []\n", 300 | "\n", 301 | "for i in range(n_sets):\n", 302 | " \n", 303 | " ngb = ngbs[i]\n", 304 | " X_test_i = X_test[i]\n", 305 | " y_test_i = y_test[i]\n", 306 | " \n", 307 | " # For multi-step ahead prediction\n", 308 | " y_first = ngb.predict(X_test_i[:3])\n", 309 | " \n", 310 | " y_3 = y_first[3-3]\n", 311 | " y_2 = y_first[3-2]\n", 312 | " y_1 = y_first[3-1]\n", 313 | " for j in range(3, X_test[i].shape[0]):\n", 314 | " X_test_i[j][-3] = y_3\n", 315 | " X_test_i[j][-2] = y_2\n", 316 | " X_test_i[j][-1] = y_1\n", 317 | " y_pred_j = ngb.pred_dist(X_test_i[j].reshape(1, -1)).loc\n", 318 | " y_3 = y_2\n", 319 | " y_2 = y_1\n", 320 | " y_1 = y_pred_j\n", 321 | " # end of multi-step ahead\n", 322 | " \n", 323 | " y_pred = ngb.predict(X_test_i)\n", 324 | " y_dists = ngb.pred_dist(X_test_i)\n", 325 | " \n", 326 | " mean = y_dists.loc\n", 327 | " std = y_dists.scale\n", 328 | " \n", 329 | " mean = y_scaler[i].inverse_transform(mean.reshape(1, -1))\n", 330 | " std = y_scaler[i].inverse_transform(std.reshape(1, -1))\n", 331 | " mean = mean.flatten()\n", 332 | " std = std.flatten()\n", 333 | " \n", 334 | " real_y_test = y_scaler[i].inverse_transform(y_test_i)\n", 335 | " real_y_test = real_y_test.flatten()\n", 336 | " \n", 337 | " lower = []\n", 338 | " upper = []\n", 339 | " for s in range(1,4):\n", 340 | " lower = lower + [mean - s * std]\n", 341 | " upper = upper + [mean + s * std]\n", 342 | " \n", 343 | " y_hat.append(mean)\n", 344 | " y.append(real_y_test)\n", 345 | " lower_hat.append(lower)\n", 346 | " upper_hat.append(upper)\n", 347 | " \n", 348 | " # Deterministic metrics\n", 349 | " MAE = mean_absolute_error(real_y_test, mean)\n", 350 | " RMSE = mean_squared_error(real_y_test, mean, squared=False)\n", 351 | " MBE = np.mean(mean - real_y_test)\n", 352 | " print(f'MAE: {MAE:.3f}')\n", 353 | " print(f'RMSE: {RMSE:.3f}')\n", 354 | " print(f'MBE: {MBE:.3f}')\n", 355 | " \n", 356 | " # Probabilistic metrics\n", 357 | " PICP = PICP_func(real_y_test, lower[1], upper[1])\n", 358 | " PINAW = PINAW_func(real_y_test, lower[1], upper[1])\n", 359 | " C = prscore.crps_gaussian(real_y_test, mu=mean, sig=std)\n", 360 | " CRPS = C.mean()\n", 361 | " print(f'PICP: {PICP:.3f}')\n", 362 | " print(f'PINAW: {PINAW:.3f}')\n", 363 | " print(f'CRPS: {CRPS:.3f}')\n", 364 | " print('\\n') " 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "## SHAP" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "import shap\n", 381 | "shap.initjs()" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "i = 0\n", 391 | "ngb = ngbs[i]\n", 392 | "\n", 393 | "features = list(dfs[i].columns)[2:]\n", 394 | "\n", 395 | "explainer = shap.TreeExplainer(ngb, model_output=0) # menan (point forecast): model_output=0, std (uncertainty): model_output=1 \n", 396 | "shap_values = explainer.shap_values(X_train[i])" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "## SHAP Summary Plots" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "%matplotlib notebook\n", 413 | "shap.summary_plot(shap_values, X_train[i], feature_names=features, show=True, plot_size=(15,8))" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [ 422 | "%matplotlib notebook\n", 423 | "shap.summary_plot(shap_values, X_train[i], feature_names=features, show=True, plot_size=(15,8), plot_type='bar')" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "## SHAP Interaction Plots" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "# Feature indeces:\n", 440 | "# 0: Temperature\n", 441 | "# 1: Humidity\n", 442 | "# 2: precipitation\n", 443 | "# 3: wind speed\n", 444 | "# 4: radiation\n", 445 | "# 5: hour\n", 446 | "# 6: month_sin\n", 447 | "# 7: month_cos\n", 448 | "# 8: t-45\n", 449 | "# 9: t-30\n", 450 | "# 10: t-15" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "shap_interaction_values = explainer.shap_interaction_values(X_train[i])" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "%matplotlib inline\n", 469 | "shap.dependence_plot((10,4), shap_interaction_values, X_tr, feature_names=features, ax=ax)" 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "metadata": {}, 475 | "source": [ 476 | "## Force plots" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "%matplotlib notebook\n", 486 | "shap.force_plot(explainer.expected_value, shap_values[851,:], features=features,link='logit', matplotlib=True, figsize=(10, 3),contribution_threshold=0.025 )" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": {}, 493 | "outputs": [], 494 | "source": [ 495 | "dfs[i].iloc[851]" 496 | ] 497 | } 498 | ], 499 | "metadata": { 500 | "kernelspec": { 501 | "display_name": "Python 3", 502 | "language": "python", 503 | "name": "python3" 504 | }, 505 | "language_info": { 506 | "codemirror_mode": { 507 | "name": "ipython", 508 | "version": 3 509 | }, 510 | "file_extension": ".py", 511 | "mimetype": "text/x-python", 512 | "name": "python", 513 | "nbconvert_exporter": "python", 514 | "pygments_lexer": "ipython3", 515 | "version": "3.7.10" 516 | } 517 | }, 518 | "nbformat": 4, 519 | "nbformat_minor": 4 520 | } 521 | -------------------------------------------------------------------------------- /heatmap.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from matplotlib import pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "import time\n", 14 | "from datetime import datetime" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Read and preprocess the datasets" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "df1 = pd.read_csv('power_weather_data1.csv')\n", 31 | "df2 = pd.read_csv('power_weather_data2.csv')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "df1['date'] = pd.to_datetime(df1['date'], format='%m/%d/%Y %H:%M')\n", 41 | "\n", 42 | "df2['date'] = pd.to_datetime(df2['date'], format='%m/%d/%Y %H:%M')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "df1['hour'] = df1['date'].apply(lambda x: x.hour )\n", 52 | "df1['month'] = df1['date'].apply(lambda x: x.month)\n", 53 | "\n", 54 | "df2['hour'] = df2['date'].apply(lambda x: x.hour )\n", 55 | "df2['month'] = df2['date'].apply(lambda x: x.month)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "df1['month_sin'] = np.sin(df1['month'] * 2 * np.pi/12)\n", 65 | "df1['month_cos'] = np.cos(df1['month'] * 2 * np.pi/12)\n", 66 | "\n", 67 | "df2['month_sin'] = np.sin(df2['month'] * 2 * np.pi/12)\n", 68 | "df2['month_cos'] = np.cos(df2['month'] * 2 * np.pi/12)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "df1 = df1[(df1['hour']>=6) & (df1['hour']<=21)]\n", 78 | "\n", 79 | "df2 = df2[(df2['hour']>=6) & (df2['hour']<=21)]" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "df1 = df1.drop(['month'], axis=1)\n", 89 | "\n", 90 | "df2 = df2.drop(['month'], axis=1)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "max_P1 = max(df1['Power'])\n", 100 | "max_P2 = max(df2['Power'])" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "df1['Power'] = df1['Power'] / max_P1\n", 110 | "\n", 111 | "P = df1['Power']\n", 112 | "\n", 113 | "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n", 114 | "PowerData.columns = ['t-45', 't-30', 't-15']\n", 115 | "\n", 116 | "df1 = pd.concat([df1, PowerData.reindex(df1.index)], axis=1)\n", 117 | " \n", 118 | "df1 = df1.fillna(0)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "df2['Power'] = df2['Power'] / max_P2\n", 128 | "\n", 129 | "P = df2['Power']\n", 130 | "\n", 131 | "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n", 132 | "PowerData.columns = ['t-45', 't-30', 't-15']\n", 133 | "\n", 134 | "df2 = pd.concat([df2, PowerData.reindex(df2.index)], axis=1)\n", 135 | " \n", 136 | "df2 = df2.fillna(0)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "df = pd.concat([df1, df2])" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Heatmap" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "fig = plt.figure(figsize=(15,9))\n", 162 | "b = sns.heatmap(df.corr(), annot=True, cmap='coolwarm')" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Boxenplot" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "fig = plt.figure(figsize=(12,8))\n", 179 | "b = sns.boxenplot(x='hour', y='Power', data=df, color='green')" 180 | ] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.7.7" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 4 204 | } 205 | --------------------------------------------------------------------------------