├── GP_PV.ipynb
├── LUBE_PV.ipynb
├── NGBoost_PV.ipynb
└── heatmap.ipynb


/GP_PV.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import numpy as np\n",
 11 |     "from matplotlib import pyplot as plt\n",
 12 |     "import seaborn as sns\n",
 13 |     "import time\n",
 14 |     "from datetime import datetime\n",
 15 |     "\n",
 16 |     "from sklearn import preprocessing\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from sklearn.preprocessing import StandardScaler\n",
 19 |     "from sklearn.metrics import mean_squared_error\n",
 20 |     "from sklearn.metrics import mean_absolute_error\n",
 21 |     "\n",
 22 |     "import scipy.stats as stats\n",
 23 |     "\n",
 24 |     "import math\n",
 25 |     "from sklearn.metrics import mean_absolute_error as mae\n",
 26 |     "from sklearn.metrics import mean_squared_error\n",
 27 |     "from math import sqrt\n",
 28 |     "\n",
 29 |     "import torch\n",
 30 |     "import gpytorch\n",
 31 |     "from gpytorch.kernels import RBFKernel as RBF\n",
 32 |     "from gpytorch.kernels import ScaleKernel as C\n",
 33 |     "from gpytorch.kernels import PeriodicKernel as Per\n",
 34 |     "from gpytorch.kernels import RQKernel as RQ\n",
 35 |     "from gpytorch.kernels import MaternKernel as M\n",
 36 |     "from gpytorch.kernels import PolynomialKernel\n",
 37 |     "\n",
 38 |     "import properscoring as prscore"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "## Read and preprocess the dataset"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "df = pd.read_csv('power_weather_data.csv')\n",
 55 |     "\n",
 56 |     "# csv file MUST contain 'date' and 'Power' fields\n",
 57 |     "# optional: weather data"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "df['hour'] = df['date'].apply(lambda x: x.hour )\n",
 76 |     "df['month'] = df['date'].apply(lambda x: x.month)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# df['hour_sin'] = np.sin(df['hour'] * 2 * np.pi/24)\n",
 86 |     "# df['hour_cos'] = np.cos(df['hour'] * 2 * np.pi/24)\n",
 87 |     "df['month_sin'] = np.sin(df['month'] * 2 * np.pi/12)\n",
 88 |     "df['month_cos'] = np.cos(df['month'] * 2 * np.pi/12)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "df = df[(df['hour']>=6) & (df['hour']<=21)]"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "# df = df.drop(['hour', 'month'], axis=1)\n",
107 |     "df = df.drop(['month'], axis=1)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "P = df['Power']\n",
117 |     "\n",
118 |     "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n",
119 |     "PowerData.columns = ['t-45', 't-30', 't-15']\n",
120 |     "\n",
121 |     "df = pd.concat([df, PowerData.reindex(df.index)], axis=1)\n",
122 |     "    \n",
123 |     "df = df.fillna(0)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "## Hyperparameters"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "weeks = [['2018-03-01', '2019-03-15']]\n",
140 |     "\n",
141 |     "val_days = 14\n",
142 |     "\n",
143 |     "# n_points_day = 4 * 24\n",
144 |     "n_points_day = 4 * 16"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "## Set the dataframes"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "dfs = []\n",
161 |     "\n",
162 |     "for w in weeks:\n",
163 |     "    \n",
164 |     "    w_start = datetime.strptime(w[0]+\" 00:00\", '%Y-%m-%d %H:%M')\n",
165 |     "    w_end = datetime.strptime(w[1]+\" 23:59\", '%Y-%m-%d %H:%M')\n",
166 |     "    \n",
167 |     "    dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])\n",
168 |     "    \n",
169 |     "n_sets = len(dfs)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "## Train Test Split"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "X_train_ = []\n",
186 |     "X_test_ = []\n",
187 |     "y_train_ = []\n",
188 |     "y_test = []\n",
189 |     "\n",
190 |     "x_scaler = []\n",
191 |     "y_scaler = []\n",
192 |     "\n",
193 |     "t_train = []\n",
194 |     "t_test = []\n",
195 |     "\n",
196 |     "for i in range(len(dfs)):\n",
197 |     "\n",
198 |     "    train = dfs[i][:int(-n_points_day*val_days)]\n",
199 |     "    test = dfs[i][int(-n_points_day*val_days):]\n",
200 |     "    \n",
201 |     "    X_tr = train.drop(['Power','date'], axis=1).values\n",
202 |     "    X_t = test.drop(['Power','date'], axis=1).values\n",
203 |     "    \n",
204 |     "    y_tr = train['Power'].values\n",
205 |     "    y_t = test['Power'].values\n",
206 |     "    \n",
207 |     "    x_sc = MinMaxScaler()\n",
208 |     "    y_sc = MinMaxScaler(feature_range=(-1,1))\n",
209 |     "#     x_sc = StandardScaler()\n",
210 |     "#     y_sc = StandardScaler()\n",
211 |     "    x_sc.fit(X_tr)\n",
212 |     "    y_sc.fit(y_tr.reshape(-1, 1))\n",
213 |     "    x_scaler.append(x_sc)\n",
214 |     "    y_scaler.append(y_sc)\n",
215 |     "    \n",
216 |     "    X_train_.append(x_sc.transform(X_tr))\n",
217 |     "    X_test_.append(x_sc.transform(X_t))\n",
218 |     "    y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)))\n",
219 |     "    y_test.append(y_t)\n",
220 |     "    \n",
221 |     "    t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['date'].values)\n",
222 |     "    t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['date'].values)\n",
223 |     "    "
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "X_train = []\n",
233 |     "X_test = []\n",
234 |     "y_train = []\n",
235 |     "\n",
236 |     "for i in range(len(dfs)):\n",
237 |     "    X_train.append(torch.from_numpy(X_train_[i]))\n",
238 |     "    X_test.append(torch.from_numpy(X_test_[i]))\n",
239 |     "    \n",
240 |     "    y_tr = torch.from_numpy(y_train_[i])\n",
241 |     "    y_train.append(torch.flatten(y_tr))"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {},
247 |    "source": [
248 |     "## GP Model"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": [
257 |     "class ExactGPModel(gpytorch.models.ExactGP):\n",
258 |     "    def __init__(self, X_train, y_train, likelihood):\n",
259 |     "        super(ExactGPModel, self).__init__(X_train, y_train, likelihood)\n",
260 |     "        self.mean_module = gpytorch.means.ConstantMean()\n",
261 |     "        self.covar_module = C(RQ())    \n",
262 |     "\n",
263 |     "    def forward(self, x):\n",
264 |     "        mean_x = self.mean_module(x)\n",
265 |     "        covar_x = self.covar_module(x)\n",
266 |     "        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": null,
272 |    "metadata": {
273 |     "scrolled": true
274 |    },
275 |    "outputs": [],
276 |    "source": [
277 |     "training_iter = 1000\n",
278 |     "train_loss = []\n",
279 |     "\n",
280 |     "models = []\n",
281 |     "likelihoods = []\n",
282 |     "\n",
283 |     "start = time.time()\n",
284 |     "\n",
285 |     "for i in range(len(dfs)):\n",
286 |     "    \n",
287 |     "    print(i)\n",
288 |     "    X_tr = X_train[i]\n",
289 |     "    y_tr = y_train[i]\n",
290 |     "    \n",
291 |     "    likelihood = gpytorch.likelihoods.GaussianLikelihood()\n",
292 |     "    model = ExactGPModel(X_tr, y_tr, likelihood)\n",
293 |     "\n",
294 |     "    model = model.double()\n",
295 |     "    likelihood = likelihood.double()\n",
296 |     "\n",
297 |     "    # Find optimal model hyperparameters\n",
298 |     "    model.train()\n",
299 |     "    likelihood.train()\n",
300 |     "\n",
301 |     "    # Use the adam optimizer\n",
302 |     "    optimizer = torch.optim.Adam([{'params': model.parameters()}], lr=0.07) \n",
303 |     "\n",
304 |     "    # Loss for GPs - the marginal log likelihood\n",
305 |     "    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)\n",
306 |     "\n",
307 |     "    ite = []\n",
308 |     "    loss_all = []\n",
309 |     "    \n",
310 |     "    for j in range(training_iter):\n",
311 |     "        # Zero gradients from previous iteration\n",
312 |     "        optimizer.zero_grad()\n",
313 |     "        # Output from model\n",
314 |     "        output = model(X_tr)\n",
315 |     "        # Calculate loss and backprop gradients\n",
316 |     "        loss = -mll(output, y_tr)\n",
317 |     "        loss.backward()\n",
318 |     "\n",
319 |     "        optimizer.step()\n",
320 |     "        ite = np.append(ite, j)\n",
321 |     "        loss_all = np.append(loss_all, loss.detach().numpy())\n",
322 |     "        \n",
323 |     "    \n",
324 |     "    train_loss.append(loss_all)\n",
325 |     "    models.append(model)\n",
326 |     "    likelihoods.append(likelihood)\n",
327 |     "\n",
328 |     "    \n",
329 |     "end = time.time()\n",
330 |     "print((end - start)/len(dfs))"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "## Evaluation"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": null,
343 |    "metadata": {},
344 |    "outputs": [],
345 |    "source": [
346 |     "def PICP_func(y, lower, upper):\n",
347 |     "    sum_points = 0\n",
348 |     "    for i, yi in enumerate(y):\n",
349 |     "        if lower[i] <= yi <= upper[i]:\n",
350 |     "            sum_points += 1\n",
351 |     "    \n",
352 |     "    return sum_points / len(y)\n",
353 |     "\n",
354 |     "def PINAW_func(y, lower, upper):\n",
355 |     "    PIAW = np.mean(upper - lower)\n",
356 |     "    R = np.max(y) - np.min(y)\n",
357 |     "    PINAW = PIAW / R\n",
358 |     "    \n",
359 |     "    return PINAW"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": null,
365 |    "metadata": {},
366 |    "outputs": [],
367 |    "source": [
368 |     "for i in range(len(dfs)):\n",
369 |     "    \n",
370 |     "    print(i)\n",
371 |     "    \n",
372 |     "    # Unpacking\n",
373 |     "    model = models[i]\n",
374 |     "    likelihood = likelihoods[i]\n",
375 |     "    X_t = X_test[i]\n",
376 |     "    y_t = y_test[i]\n",
377 |     "    x_sc = x_scaler[i]\n",
378 |     "    y_sc = y_scaler[i]\n",
379 |     "    \n",
380 |     "    \n",
381 |     "    model.eval()\n",
382 |     "    likelihood.eval()\n",
383 |     "    \n",
384 |     "    # For multi-step ahead prediction\n",
385 |     "    y_45 = model(X_t[0].unsqueeze(0)).mean\n",
386 |     "    y_30 = model(X_t[1].unsqueeze(0)).mean\n",
387 |     "    y_15 = model(X_t[2].unsqueeze(0)).mean\n",
388 |     "    for j in range(3, X_t.shape[0]):\n",
389 |     "        X_t[j][-3] = y_45\n",
390 |     "        X_t[j][-2] = y_30\n",
391 |     "        X_t[j][-1] = y_15\n",
392 |     "        y_pred_j = model(X_t[j].unsqueeze(0))\n",
393 |     "        y_45 = y_30\n",
394 |     "        y_30 = y_15\n",
395 |     "        y_15 = y_pred_j.mean\n",
396 |     "    # end of multi-step ahead\n",
397 |     "    \n",
398 |     "    y_pred_i = model(X_t)\n",
399 |     "    f_pred_i = likelihood(model(X_t))\n",
400 |     "    \n",
401 |     "    y_pred = y_pred_i.mean\n",
402 |     "    y_var = y_pred_i.variance\n",
403 |     "    y_covar = y_pred_i.covariance_matrix\n",
404 |     "    \n",
405 |     "    y_pred = y_pred.detach().numpy()\n",
406 |     "    \n",
407 |     "    real_y_pred = y_sc.inverse_transform(y_pred.reshape(-1, 1))\n",
408 |     "    \n",
409 |     "    real_y_pred = real_y_pred.flatten()\n",
410 |     "    real_y_test = y_t.flatten()\n",
411 |     "    \n",
412 |     "    lower, upper = f_pred_i.confidence_region()\n",
413 |     "    \n",
414 |     "    lower = lower.detach().numpy()\n",
415 |     "    upper = upper.detach().numpy()\n",
416 |     "    \n",
417 |     "    lower = y_sc.inverse_transform(lower.reshape(-1, 1))\n",
418 |     "    upper = y_sc.inverse_transform(upper.reshape(-1, 1))\n",
419 |     "    \n",
420 |     "    lower = lower.flatten()\n",
421 |     "    upper = upper.flatten()\n",
422 |     "    \n",
423 |     "    mean = (upper+lower)/2\n",
424 |     "    std = (mean - lower)/1.96\n",
425 |     "    \n",
426 |     "    # Deterministic metrics\n",
427 |     "    MAE = mean_absolute_error(real_y_test, mean)\n",
428 |     "    RMSE = mean_squared_error(real_y_test, mean, squared=False)\n",
429 |     "    MBE = np.mean(mean - real_y_test)\n",
430 |     "    print(f'MAE: {MAE:.3f}')\n",
431 |     "    print(f'RMSE: {RMSE:.3f}')\n",
432 |     "    print(f'MBE: {MBE:.3f}')\n",
433 |     "    \n",
434 |     "    # Probabilistic metrics\n",
435 |     "    PICP = PICP_func(real_y_test, lower, upper)\n",
436 |     "    PINAW = PINAW_func(real_y_test, lower, upper)\n",
437 |     "    C = prscore.crps_gaussian(real_y_test, mu=mean, sig=std)\n",
438 |     "    CRPS = C.mean()\n",
439 |     "    print(f'PICP: {PICP:.3f}')\n",
440 |     "    print(f'PINAW: {PINAW:.3f}')\n",
441 |     "    print(f'CRPS: {CRPS:.3f}')\n",
442 |     "    print('\\n')"
443 |    ]
444 |   }
445 |  ],
446 |  "metadata": {
447 |   "kernelspec": {
448 |    "display_name": "Python 3",
449 |    "language": "python",
450 |    "name": "python3"
451 |   },
452 |   "language_info": {
453 |    "codemirror_mode": {
454 |     "name": "ipython",
455 |     "version": 3
456 |    },
457 |    "file_extension": ".py",
458 |    "mimetype": "text/x-python",
459 |    "name": "python",
460 |    "nbconvert_exporter": "python",
461 |    "pygments_lexer": "ipython3",
462 |    "version": "3.7.7"
463 |   }
464 |  },
465 |  "nbformat": 4,
466 |  "nbformat_minor": 4
467 | }
468 | 


--------------------------------------------------------------------------------
/LUBE_PV.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import numpy as np\n",
 11 |     "from matplotlib import pyplot as plt\n",
 12 |     "import seaborn as sns\n",
 13 |     "import time\n",
 14 |     "from datetime import datetime\n",
 15 |     "\n",
 16 |     "from sklearn.preprocessing import MinMaxScaler\n",
 17 |     "from sklearn.metrics import mean_squared_error\n",
 18 |     "from sklearn.metrics import mean_absolute_error\n",
 19 |     "\n",
 20 |     "import scipy.stats as stats\n",
 21 |     "\n",
 22 |     "from sklearn.metrics import mean_absolute_error as mae\n",
 23 |     "from math import sqrt\n",
 24 |     "\n",
 25 |     "import torch\n",
 26 |     "from torch import nn, optim\n",
 27 |     "import torch.optim as optim\n",
 28 |     "import torch.nn.functional as F\n",
 29 |     "from torch.optim.optimizer import Optimizer\n",
 30 |     "\n",
 31 |     "import time\n",
 32 |     "\n",
 33 |     "import properscoring as prscore\n",
 34 |     "\n",
 35 |     "import math\n",
 36 |     "from torch.autograd import Variable"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## Read and preprocess the dataset"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "df = pd.read_csv('power_weather_data.csv')\n",
 53 |     "\n",
 54 |     "# csv file MUST contain 'date' and 'Power' fields\n",
 55 |     "# optional: weather data"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "df['hour'] = df['date'].apply(lambda x: x.hour )\n",
 74 |     "df['month'] = df['date'].apply(lambda x: x.month)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# df['hour_sin'] = np.sin(df['hour'] * 2 * np.pi/24)\n",
 84 |     "# df['hour_cos'] = np.cos(df['hour'] * 2 * np.pi/24)\n",
 85 |     "df['month_sin'] = np.sin(df['month'] * 2 * np.pi/12)\n",
 86 |     "df['month_cos'] = np.cos(df['month'] * 2 * np.pi/12)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "df = df[(df['hour']>=6) & (df['hour']<=21)]"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "# df = df.drop(['hour', 'month'], axis=1)\n",
105 |     "df = df.drop(['month'], axis=1)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "P = df['Power']\n",
115 |     "\n",
116 |     "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n",
117 |     "PowerData.columns = ['t-45', 't-30', 't-15']\n",
118 |     "\n",
119 |     "df = pd.concat([df, PowerData.reindex(df.index)], axis=1)\n",
120 |     "    \n",
121 |     "df = df.fillna(0)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "## Hyperparameters"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "weeks = [['2018-03-01', '2019-03-15']]\n",
138 |     "\n",
139 |     "val_days = 14\n",
140 |     "\n",
141 |     "# n_points_day = 4 * 24\n",
142 |     "n_points_day = 4 * 16"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "## Set the dataframes"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "dfs = []\n",
159 |     "\n",
160 |     "for w in weeks:\n",
161 |     "    \n",
162 |     "    w_start = datetime.strptime(w[0]+\" 00:00\", '%Y-%m-%d %H:%M')\n",
163 |     "    w_end = datetime.strptime(w[1]+\" 23:59\", '%Y-%m-%d %H:%M')\n",
164 |     "    \n",
165 |     "    dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])\n",
166 |     "    \n",
167 |     "n_sets = len(dfs)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "## Train Test Split"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": [
183 |     "X_train_ = []\n",
184 |     "X_test_ = []\n",
185 |     "y_train_ = []\n",
186 |     "y_test_ = []\n",
187 |     "\n",
188 |     "x_scaler = []\n",
189 |     "y_scaler = []\n",
190 |     "\n",
191 |     "t_train = []\n",
192 |     "t_test = []\n",
193 |     "\n",
194 |     "for i in range(len(dfs)):\n",
195 |     "\n",
196 |     "    train = dfs[i][:int(-n_points_day*val_days)]\n",
197 |     "    test = dfs[i][int(-n_points_day*val_days):]\n",
198 |     "    \n",
199 |     "    X_tr = train.drop(['Power','date'], axis=1).values\n",
200 |     "    X_t = test.drop(['Power','date'], axis=1).values\n",
201 |     "    \n",
202 |     "    y_tr = train['Power'].values\n",
203 |     "    y_t = test['Power'].values\n",
204 |     "    \n",
205 |     "    x_sc = MinMaxScaler()\n",
206 |     "    y_sc = MinMaxScaler()\n",
207 |     "#     x_sc = StandardScaler()\n",
208 |     "#     y_sc = StandardScaler()\n",
209 |     "    x_sc.fit(X_tr)\n",
210 |     "    y_sc.fit(y_tr.reshape(-1, 1))\n",
211 |     "    x_scaler.append(x_sc)\n",
212 |     "    y_scaler.append(y_sc)\n",
213 |     "    \n",
214 |     "    X_train_.append(x_sc.transform(X_tr))\n",
215 |     "    X_test_.append(x_sc.transform(X_t))\n",
216 |     "    y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)) + 0.001)\n",
217 |     "    y_test_.append(y_sc.transform(y_t.reshape(-1, 1)) + 0.001)\n",
218 |     "    \n",
219 |     "    t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['date'].values)\n",
220 |     "    t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['date'].values)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "X_train = []\n",
230 |     "X_test = []\n",
231 |     "y_train = []\n",
232 |     "y_test = []\n",
233 |     "\n",
234 |     "for i in range(len(dfs)):\n",
235 |     "    X_train.append(torch.from_numpy(X_train_[i]).float())\n",
236 |     "    X_test.append(torch.from_numpy(X_test_[i]).float())\n",
237 |     "    \n",
238 |     "    y_tr = torch.from_numpy(y_train_[i]).float()\n",
239 |     "    y_train.append(torch.squeeze(y_tr))\n",
240 |     "    y_t = torch.from_numpy(y_test_[i]).float()\n",
241 |     "    y_test.append(torch.squeeze(y_t))"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {},
247 |    "source": [
248 |     "## LUBE"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": [
257 |     "import torch.nn as nn\n",
258 |     "import torch.nn.functional as F\n",
259 |     "\n",
260 |     "n_neurons = 50\n",
261 |     "eta = 50\n",
262 |     "\n",
263 |     "class Net(nn.Module):\n",
264 |     "  def __init__(self, n_features):\n",
265 |     "    super(Net, self).__init__()\n",
266 |     "    self.fc1 = nn.Linear(n_features, n_neurons)\n",
267 |     "    self.fc2 = nn.Linear(n_neurons, 2)\n",
268 |     "  def forward(self, x):\n",
269 |     "    x = F.relu(self.fc1(x)) #\n",
270 |     "    return torch.sigmoid(self.fc2(x)) \n"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": [
279 |     "def CWC(y_pred, y_true):\n",
280 |     "    \n",
281 |     "    y_pred = Variable(y_pred, requires_grad=True).to(device)\n",
282 |     "    y_true = Variable(y_true, requires_grad=True).to(device)\n",
283 |     "    \n",
284 |     "    u = y_pred.detach().numpy().T[0]\n",
285 |     "    l = y_pred.detach().numpy().T[1]\n",
286 |     "    \n",
287 |     "    u = torch.squeeze(torch.from_numpy(u).float())\n",
288 |     "    l = torch.squeeze(torch.from_numpy(l).float())\n",
289 |     "   \n",
290 |     "    sum = 0\n",
291 |     "    W = []\n",
292 |     "    for i in range(len(y_pred)):\n",
293 |     "        \n",
294 |     "        Wi = torch.abs(u[i]-l[i]) #)**2 \n",
295 |     "        W.append(Wi)\n",
296 |     "        \n",
297 |     "        if l[i] < y_true[i] < u[i]:\n",
298 |     "            sum += 1\n",
299 |     "    \n",
300 |     "    #calculate PICP: PI coverage probability\n",
301 |     "    PICP = sum/len(y_true)\n",
302 |     "    \n",
303 |     "    #calculate MPIW\n",
304 |     "    W = np.array(W)\n",
305 |     "    W = torch.from_numpy(W).float()\n",
306 |     "    MPIW = torch.sqrt(torch.mean(W))\n",
307 |     "    \n",
308 |     "    R = torch.max(y_true)-torch.min(y_true)\n",
309 |     "\n",
310 |     "    return ((MPIW)/R)*(1+1*math.exp(-eta*(PICP-0.95)))\n",
311 |     "\n"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "class UniformSampler(object):\n",
321 |     "    def __init__(self, minval, maxval, dtype='float', cuda=False):\n",
322 |     "        self.minval = minval\n",
323 |     "        self.maxval = maxval\n",
324 |     "        self.cuda = cuda\n",
325 |     "        self.dtype_str = dtype\n",
326 |     "        dtypes = {\n",
327 |     "            'float': torch.cuda.FloatTensor if cuda else torch.FloatTensor,\n",
328 |     "            'int': torch.cuda.IntTensor if cuda else torch.IntTensor,\n",
329 |     "            'long': torch.cuda.LongTensor if cuda else torch.LongTensor\n",
330 |     "        }\n",
331 |     "        self.dtype = dtypes[dtype]\n",
332 |     "\n",
333 |     "    def sample(self, size):\n",
334 |     "        if self.dtype_str == 'float':\n",
335 |     "            return self.dtype(*size).uniform_(\n",
336 |     "                self.minval, self.maxval\n",
337 |     "            )\n",
338 |     "        elif self.dtype_str == 'int' or self.dtype_str == 'long':\n",
339 |     "            return self.dtype(*size).random_(\n",
340 |     "                self.minval, self.maxval + 1\n",
341 |     "            )\n",
342 |     "        else:\n",
343 |     "            raise Exception(\"unknown dtype\")\n",
344 |     "\n",
345 |     "\n",
346 |     "class GaussianSampler(object):\n",
347 |     "    def __init__(self, mu, sigma, dtype='float', cuda=False):\n",
348 |     "        self.sigma = sigma\n",
349 |     "        self.mu = mu\n",
350 |     "        self.cuda = cuda\n",
351 |     "        self.dtype_str = dtype\n",
352 |     "        dtypes = {\n",
353 |     "            'float': torch.cuda.FloatTensor if cuda else torch.FloatTensor,\n",
354 |     "            'int': torch.cuda.IntTensor if cuda else torch.IntTensor,\n",
355 |     "            'long': torch.cuda.LongTensor if cuda else torch.LongTensor\n",
356 |     "        }\n",
357 |     "        self.dtype = dtypes[dtype]\n",
358 |     "\n",
359 |     "    def sample(self, size):\n",
360 |     "        ''' pytorch doesnt support int or long normal distrs\n",
361 |     "            so we will resolve to casting '''\n",
362 |     "        rand_float = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor\n",
363 |     "        rand_block = rand_float(*size).normal_(self.mu, self.sigma)\n",
364 |     "\n",
365 |     "        if self.dtype_str == 'int' or self.dtype_str == 'long':\n",
366 |     "            rand_block = rand_block.type(self.dtype)\n",
367 |     "\n",
368 |     "        return rand_block\n",
369 |     "\n",
370 |     "\n",
371 |     "class SimulatedAnnealing(Optimizer):\n",
372 |     "    def __init__(self, params, sampler, tau0=5.0, anneal_rate=0.0003,\n",
373 |     "                 min_temp=1e-5, anneal_every=10, hard=True, hard_rate=0.95):\n",
374 |     "        defaults = dict(sampler=sampler, tau0=tau0, tau=tau0, anneal_rate=anneal_rate,\n",
375 |     "                        min_temp=min_temp, anneal_every=anneal_every,\n",
376 |     "                        hard=hard, hard_rate=hard_rate, iteration=0)\n",
377 |     "        super(SimulatedAnnealing, self).__init__(params, defaults)\n",
378 |     "\n",
379 |     "\n",
380 |     "    def step(self, closure=None):\n",
381 |     "        \"\"\"Performs a single optimization step.\n",
382 |     "        Arguments:\n",
383 |     "            closure (callable, optional): A closure that reevaluates the model\n",
384 |     "                and returns the loss.\n",
385 |     "        \"\"\"\n",
386 |     "        if closure is None:\n",
387 |     "            raise Exception(\"loss closure is required to do SA\")\n",
388 |     "\n",
389 |     "        loss = closure()\n",
390 |     "\n",
391 |     "        for group in self.param_groups:\n",
392 |     "            # the sampler samples randomness\n",
393 |     "            # that is used in optimizations\n",
394 |     "            sampler = group['sampler']\n",
395 |     "\n",
396 |     "            # clone all of the params to keep in case we need to swap back\n",
397 |     "            cloned_params = [p.clone() for p in group['params']]\n",
398 |     "\n",
399 |     "            for p in group['params']:\n",
400 |     "                # anneal tau if it matches the requirements\n",
401 |     "                if group['iteration'] > 0 and group['iteration'] % group['anneal_every'] == 0:\n",
402 |     "                    if not group['hard']:\n",
403 |     "                        # smoother annealing: consider using this over hard annealing\n",
404 |     "                        rate = -group['anneal_rate'] * group['iteration']\n",
405 |     "                        group['tau'] = np.maximum(group['tau0'] * np.exp(rate),\n",
406 |     "                                                  group['min_temp'])\n",
407 |     "                    else:\n",
408 |     "                        # hard annealing\n",
409 |     "                        group['tau'] = np.maximum(group['hard_rate'] * group['tau'],\n",
410 |     "                                                  group['min_temp'])\n",
411 |     "\n",
412 |     "                random_perturbation = group['sampler'].sample(p.data.size())\n",
413 |     "                p.data = p.data / torch.norm(p.data)\n",
414 |     "                p.data.add_(random_perturbation)\n",
415 |     "                group['iteration'] += 1\n",
416 |     "\n",
417 |     "            # re-evaluate the loss function with the perturbed params\n",
418 |     "            # if we didn't accept the new params, then swap back and return\n",
419 |     "            loss_perturbed = closure()\n",
420 |     "            final_loss, is_swapped_back = self.anneal(loss, loss_perturbed, group['tau'])\n",
421 |     "            if is_swapped_back:\n",
422 |     "                for p, pbkp in zip(group['params'], cloned_params):\n",
423 |     "                    p.data = pbkp.data\n",
424 |     "\n",
425 |     "            return final_loss \n",
426 |     "\n",
427 |     "\n",
428 |     "    def anneal(self, loss, loss_perturbed, tau):\n",
429 |     "        '''returns loss, is_new_loss'''\n",
430 |     "        def acceptance_prob(old, new, temp):\n",
431 |     "            return torch.exp((old - new)/(temp))\n",
432 |     "\n",
433 |     "        if loss_perturbed.data < loss.data:\n",
434 |     "#             print(\"old = \", loss.data, \"| pert = \", loss_perturbed.data, \" | tau = \", tau)\n",
435 |     "            return loss_perturbed, False\n",
436 |     "        else:\n",
437 |     "            # evaluate the metropolis criterion\n",
438 |     "            ap = acceptance_prob(loss, loss_perturbed, tau)\n",
439 |     "            random = np.random.rand()\n",
440 |     "            print(\"old = \", loss.data, \"| new = \", loss_perturbed.data,\n",
441 |     "                  \" | ap = \", ap.data, \" | tau = \", tau, \" | r = \", random)\n",
442 |     "            \n",
443 |     "            if ap.data > random:\n",
444 |     "                return loss_perturbed, False\n",
445 |     "\n",
446 |     "            return loss, True\n",
447 |     "        "
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "code",
452 |    "execution_count": null,
453 |    "metadata": {},
454 |    "outputs": [],
455 |    "source": [
456 |     "def closure():\n",
457 |     "    y_pred_train = net(X_train_i)\n",
458 |     "    loss = CWC(y_pred_train, y_train_i)\n",
459 |     "    return loss\n",
460 |     "\n",
461 |     "t_loss = []\n",
462 |     "nets = []\n",
463 |     "\n",
464 |     "start = time.time()\n",
465 |     "\n",
466 |     "for i in range(len(dfs)):\n",
467 |     "\n",
468 |     "    net = Net(X_train[i].shape[1])\n",
469 |     "\n",
470 |     "    sampler = GaussianSampler(mu=0, sigma=1) #sampler = UniformSampler(minval=-0.5, maxval=0.5)\n",
471 |     "    optimizer = SimulatedAnnealing(net.parameters(), sampler=sampler)\n",
472 |     "    \n",
473 |     "    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
474 |     "    X_train_i = X_train[i].to(device)\n",
475 |     "    y_train_i = y_train[i].to(device)\n",
476 |     "    X_test_i = X_test[i].to(device)\n",
477 |     "    y_test_i = y_test[i].to(device)\n",
478 |     "    net = net.to(device)\n",
479 |     "    \n",
480 |     "    ite = []\n",
481 |     "    loss_all = []\n",
482 |     "    \n",
483 |     "    for epoch in range(1000):\n",
484 |     "        y_pred_train = net(X_train_i)\n",
485 |     "        y_pred_train = torch.squeeze(y_pred_train)\n",
486 |     "        train_loss = CWC(y_pred_train, y_train_i)\n",
487 |     "        train_loss = train_loss.to(device)\n",
488 |     "\n",
489 |     "\n",
490 |     "        ite = np.append(ite, epoch)\n",
491 |     "        loss_all = np.append(loss_all, train_loss.detach().numpy()) \n",
492 |     "\n",
493 |     "        optimizer.zero_grad()\n",
494 |     "        train_loss.backward()\n",
495 |     "        optimizer.step(closure)\n",
496 |     "        \n",
497 |     "    \n",
498 |     "    t_loss.append(loss_all)\n",
499 |     "    nets.append(net)\n",
500 |     "    \n",
501 |     "    \n",
502 |     "end = time.time()\n",
503 |     "print((end - start)/len(dfs))"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "markdown",
508 |    "metadata": {},
509 |    "source": [
510 |     "## Evaluation"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": null,
516 |    "metadata": {},
517 |    "outputs": [],
518 |    "source": [
519 |     "def PICP_func(y, lower, upper):\n",
520 |     "    sum_points = 0\n",
521 |     "    for i, yi in enumerate(y):\n",
522 |     "        if lower[i] <= yi <= upper[i]:\n",
523 |     "            sum_points += 1\n",
524 |     "    \n",
525 |     "    return sum_points / len(y)\n",
526 |     "\n",
527 |     "def PINAW_func(y, lower, upper):\n",
528 |     "    PIAW = np.mean(upper - lower)\n",
529 |     "    R = np.max(y) - np.min(y)\n",
530 |     "    PINAW = PIAW / R\n",
531 |     "    \n",
532 |     "    return PINAW"
533 |    ]
534 |   },
535 |   {
536 |    "cell_type": "code",
537 |    "execution_count": null,
538 |    "metadata": {},
539 |    "outputs": [],
540 |    "source": [
541 |     "for i in range(len(dfs)):\n",
542 |     "    \n",
543 |     "    net = nets[i]\n",
544 |     "    \n",
545 |     "    y_pred_train = net(X_train[i])\n",
546 |     "    y_pred_train= y_pred_train.detach().numpy()\n",
547 |     "    \n",
548 |     "    y_train_i = y_train[i].cpu()\n",
549 |     "    y_train_i = y_train_i.detach().numpy()\n",
550 |     "    \n",
551 |     "    \n",
552 |     "    # For multi-step ahead prediction\n",
553 |     "    y_45_ = net(X_test[i][0].unsqueeze(0)).detach().numpy()\n",
554 |     "    y_45 = ((y_45_.T[0] + y_45_.T[1]) / 2)[0]\n",
555 |     "    y_30_ = net(X_test[i][1].unsqueeze(0)).detach().numpy()\n",
556 |     "    y_30 = ((y_30_.T[0] + y_30_.T[1]) / 2)[0]\n",
557 |     "    y_15_ = net(X_test[i][2].unsqueeze(0)).detach().numpy()\n",
558 |     "    y_15 = ((y_15_.T[0] + y_15_.T[1]) / 2)[0]\n",
559 |     "    for j in range(3, X_test[i].shape[0]):\n",
560 |     "        X_test[i][j][-3] = torch.tensor(y_45)\n",
561 |     "        X_test[i][j][-2] = torch.tensor(y_30)\n",
562 |     "        X_test[i][j][-1] = torch.tensor(y_15)\n",
563 |     "        y_pred_j_ = net(X_test[i][j].unsqueeze(0)).detach().numpy()\n",
564 |     "        y_pred_j = ((y_pred_j_.T[0] + y_pred_j_.T[1]) / 2)[0]\n",
565 |     "        y_45 = y_30\n",
566 |     "        y_30 = y_15\n",
567 |     "        y_15 = y_pred_j\n",
568 |     "    # end of multi-step ahead\n",
569 |     "    \n",
570 |     "    y_pred_test = net(X_test[i])\n",
571 |     "    y_pred_test= y_pred_test.detach().numpy()\n",
572 |     "    y_test_i = y_test[i].cpu()\n",
573 |     "    y_test_i = y_test_i.detach().numpy()\n",
574 |     "    \n",
575 |     "    upper_train = y_pred_train.T[0]\n",
576 |     "    lower_train = y_pred_train.T[1]\n",
577 |     "    \n",
578 |     "    upper = y_pred_test.T[0]\n",
579 |     "    lower = y_pred_test.T[1]\n",
580 |     "    \n",
581 |     "    real_y_train = y_scaler[i].inverse_transform(y_train_i.reshape(-1, 1))\n",
582 |     "    real_y_test = y_scaler[i].inverse_transform(y_test_i.reshape(-1, 1))\n",
583 |     "    \n",
584 |     "    upper_train = y_scaler[i].inverse_transform(upper_train.reshape(-1, 1))\n",
585 |     "    lower_train = y_scaler[i].inverse_transform(lower_train.reshape(-1, 1))\n",
586 |     "    \n",
587 |     "    upper = y_scaler[i].inverse_transform(upper.reshape(-1, 1))\n",
588 |     "    lower = y_scaler[i].inverse_transform(lower.reshape(-1, 1))\n",
589 |     "    \n",
590 |     "    real_y_test = real_y_test.flatten()\n",
591 |     "    real_y_train = real_y_train.flatten()\n",
592 |     "    \n",
593 |     "    lower_train = lower_train.flatten()\n",
594 |     "    upper_train = upper_train.flatten()\n",
595 |     "    \n",
596 |     "    lower = lower.flatten()\n",
597 |     "    upper = upper.flatten()\n",
598 |     "    \n",
599 |     "    for j in range(len(lower)):\n",
600 |     "        if lower[j]<10e-6:\n",
601 |     "            lower[j]=0\n",
602 |     "        \n",
603 |     "    mean = (upper+lower)/2\n",
604 |     "    std = (mean - lower)/1.96\n",
605 |     "    \n",
606 |     "    # Deterministic metrics\n",
607 |     "    MAE = mean_absolute_error(real_y_test, mean)\n",
608 |     "    RMSE = mean_squared_error(real_y_test, mean, squared=False)\n",
609 |     "    MBE = np.mean(mean - real_y_test)\n",
610 |     "    print(f'MAE: {MAE:.3f}')\n",
611 |     "    print(f'RMSE: {RMSE:.3f}')\n",
612 |     "    print(f'MBE: {MBE:.3f}')\n",
613 |     "    \n",
614 |     "    # Probabilistic metrics\n",
615 |     "    PICP = PICP_func(real_y_test, lower, upper)\n",
616 |     "    PINAW = PINAW_func(real_y_test, lower, upper)\n",
617 |     "    C = prscore.crps_gaussian(real_y_test, mu=mean, sig=std)\n",
618 |     "    CRPS = C.mean()\n",
619 |     "    print(f'PICP: {PICP:.3f}')\n",
620 |     "    print(f'PINAW: {PINAW:.3f}')\n",
621 |     "    print(f'CRPS: {CRPS:.3f}')\n",
622 |     "    print('\\n')"
623 |    ]
624 |   }
625 |  ],
626 |  "metadata": {
627 |   "kernelspec": {
628 |    "display_name": "Python 3",
629 |    "language": "python",
630 |    "name": "python3"
631 |   },
632 |   "language_info": {
633 |    "codemirror_mode": {
634 |     "name": "ipython",
635 |     "version": 3
636 |    },
637 |    "file_extension": ".py",
638 |    "mimetype": "text/x-python",
639 |    "name": "python",
640 |    "nbconvert_exporter": "python",
641 |    "pygments_lexer": "ipython3",
642 |    "version": "3.7.7"
643 |   }
644 |  },
645 |  "nbformat": 4,
646 |  "nbformat_minor": 4
647 | }
648 | 


--------------------------------------------------------------------------------
/NGBoost_PV.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import numpy as np\n",
 11 |     "from matplotlib import pyplot as plt\n",
 12 |     "import seaborn as sns\n",
 13 |     "import time\n",
 14 |     "from datetime import datetime\n",
 15 |     "\n",
 16 |     "from sklearn.preprocessing import MinMaxScaler\n",
 17 |     "from sklearn.metrics import mean_squared_error\n",
 18 |     "from sklearn.metrics import mean_absolute_error\n",
 19 |     "\n",
 20 |     "from sklearn.tree import DecisionTreeRegressor\n",
 21 |     "\n",
 22 |     "from ngboost import NGBRegressor\n",
 23 |     "\n",
 24 |     "import properscoring as prscore\n",
 25 |     "\n",
 26 |     "import pickle\n",
 27 |     "from pathlib import Path\n",
 28 |     "import os"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "## Read and preprocess the dataset"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "df = pd.read_csv('power_weather_data.csv')\n",
 45 |     "\n",
 46 |     "# csv file MUST contain 'date' and 'Power' fields\n",
 47 |     "# optional: weather data"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "df['hour'] = df['date'].apply(lambda x: x.hour )\n",
 66 |     "df['month'] = df['date'].apply(lambda x: x.month)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "# df['hour_sin'] = np.sin(df['hour'] * 2 * np.pi/24)\n",
 76 |     "# df['hour_cos'] = np.cos(df['hour'] * 2 * np.pi/24)\n",
 77 |     "df['month_sin'] = np.sin(df['month'] * 2 * np.pi/12)\n",
 78 |     "df['month_cos'] = np.cos(df['month'] * 2 * np.pi/12)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "df = df[(df['hour']>=6) & (df['hour']<=21)]"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# df = df.drop(['hour', 'month'], axis=1)\n",
 97 |     "df = df.drop(['month'], axis=1)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "P = df['Power']\n",
107 |     "\n",
108 |     "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n",
109 |     "PowerData.columns = ['t-45', 't-30', 't-15']\n",
110 |     "\n",
111 |     "df = pd.concat([df, PowerData.reindex(df.index)], axis=1)\n",
112 |     "    \n",
113 |     "df = df.fillna(0)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Hyperparameters"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "weeks = [['2018-03-01', '2019-03-15']]\n",
130 |     "\n",
131 |     "val_days = 14\n",
132 |     "\n",
133 |     "# n_points_day = 4 * 24\n",
134 |     "n_points_day = 4 * 16"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "## Set the dataframes"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "dfs = []\n",
151 |     "\n",
152 |     "for w in weeks:\n",
153 |     "    \n",
154 |     "    w_start = datetime.strptime(w[0]+\" 00:00\", '%Y-%m-%d %H:%M')\n",
155 |     "    w_end = datetime.strptime(w[1]+\" 23:59\", '%Y-%m-%d %H:%M')\n",
156 |     "    \n",
157 |     "    dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])\n",
158 |     "    \n",
159 |     "n_sets = len(dfs)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "## Train Test Split"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "X_train_ = []\n",
176 |     "X_test_ = []\n",
177 |     "y_train_ = []\n",
178 |     "y_test_ = []\n",
179 |     "\n",
180 |     "x_scaler = []\n",
181 |     "y_scaler = []\n",
182 |     "\n",
183 |     "t_train = []\n",
184 |     "t_test = []\n",
185 |     "\n",
186 |     "for i in range(n_sets):\n",
187 |     "\n",
188 |     "    train = dfs[i][:int(-n_points_day*val_days)]\n",
189 |     "    test = dfs[i][int(-n_points_day*val_days):]\n",
190 |     "    \n",
191 |     "    X_tr = train.drop(['Power','date'], axis=1).values\n",
192 |     "    X_t = test.drop(['Power','date'], axis=1).values\n",
193 |     "    \n",
194 |     "    y_tr = train['Power'].values\n",
195 |     "    y_t = test['Power'].values\n",
196 |     "    \n",
197 |     "    x_sc = MinMaxScaler()\n",
198 |     "    y_sc = MinMaxScaler()\n",
199 |     "#     x_sc = StandardScaler()\n",
200 |     "#     y_sc = StandardScaler()\n",
201 |     "    x_sc.fit(X_tr)\n",
202 |     "    y_sc.fit(y_tr.reshape(-1, 1))  #reshape only because fit needs a 2d array\n",
203 |     "    x_scaler.append(x_sc)\n",
204 |     "    y_scaler.append(y_sc)\n",
205 |     "    \n",
206 |     "    X_train_.append(x_sc.transform(X_tr))\n",
207 |     "    X_test_.append(x_sc.transform(X_t))\n",
208 |     "    y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)))\n",
209 |     "    y_test_.append(y_sc.transform(y_t.reshape(-1, 1)))\n",
210 |     "    \n",
211 |     "    t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['date'].values)\n",
212 |     "    t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['date'].values)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": []
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "X_train = X_train_\n",
229 |     "X_test = X_test_\n",
230 |     "y_train = y_train_\n",
231 |     "y_test = y_test_"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "## NGBoost"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {},
245 |    "outputs": [],
246 |    "source": [
247 |     "tree_learner = DecisionTreeRegressor(\n",
248 |     "    criterion=\"friedman_mse\",\n",
249 |     "    min_samples_split=2,\n",
250 |     "    min_samples_leaf=1,\n",
251 |     "    min_weight_fraction_leaf=0.0,\n",
252 |     "    max_depth=3,\n",
253 |     "    splitter=\"best\",\n",
254 |     "    random_state=None,\n",
255 |     ")"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "scrolled": true
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "ngbs = []\n",
267 |     "\n",
268 |     "start = time.time()\n",
269 |     "\n",
270 |     "for i in range(n_sets):\n",
271 |     "    \n",
272 |     "    X_train_i = X_train[i]\n",
273 |     "    y_train_i = y_train[i]\n",
274 |     "\n",
275 |     "    ngb = NGBRegressor(Base=tree_learner, n_estimators=1000).fit(X_train_i, y_train_i.ravel())\n",
276 |     "    \n",
277 |     "    ngbs.append(ngb)\n",
278 |     "\n",
279 |     "end = time.time()\n",
280 |     "print((end - start)/n_sets)"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {},
286 |    "source": [
287 |     "## Evaluation"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "y = []\n",
297 |     "y_hat = []\n",
298 |     "upper_hat = []\n",
299 |     "lower_hat = []\n",
300 |     "\n",
301 |     "for i in range(n_sets):\n",
302 |     "    \n",
303 |     "    ngb = ngbs[i]\n",
304 |     "    X_test_i = X_test[i]\n",
305 |     "    y_test_i = y_test[i]\n",
306 |     "    \n",
307 |     "    # For multi-step ahead prediction\n",
308 |     "    y_first = ngb.predict(X_test_i[:3])\n",
309 |     "    \n",
310 |     "    y_3 = y_first[3-3]\n",
311 |     "    y_2 = y_first[3-2]\n",
312 |     "    y_1 = y_first[3-1]\n",
313 |     "    for j in range(3, X_test[i].shape[0]):\n",
314 |     "        X_test_i[j][-3] = y_3\n",
315 |     "        X_test_i[j][-2] = y_2\n",
316 |     "        X_test_i[j][-1] = y_1\n",
317 |     "        y_pred_j = ngb.pred_dist(X_test_i[j].reshape(1, -1)).loc\n",
318 |     "        y_3 = y_2\n",
319 |     "        y_2 = y_1\n",
320 |     "        y_1 = y_pred_j\n",
321 |     "    # end of multi-step ahead\n",
322 |     "    \n",
323 |     "    y_pred = ngb.predict(X_test_i)\n",
324 |     "    y_dists = ngb.pred_dist(X_test_i)\n",
325 |     "    \n",
326 |     "    mean = y_dists.loc\n",
327 |     "    std = y_dists.scale\n",
328 |     "    \n",
329 |     "    mean = y_scaler[i].inverse_transform(mean.reshape(1, -1))\n",
330 |     "    std = y_scaler[i].inverse_transform(std.reshape(1, -1))\n",
331 |     "    mean = mean.flatten()\n",
332 |     "    std = std.flatten()\n",
333 |     "    \n",
334 |     "    real_y_test = y_scaler[i].inverse_transform(y_test_i)\n",
335 |     "    real_y_test = real_y_test.flatten()\n",
336 |     "    \n",
337 |     "    lower = []\n",
338 |     "    upper = []\n",
339 |     "    for s in range(1,4):\n",
340 |     "        lower = lower + [mean - s * std]\n",
341 |     "        upper = upper + [mean + s * std]\n",
342 |     "    \n",
343 |     "    y_hat.append(mean)\n",
344 |     "    y.append(real_y_test)\n",
345 |     "    lower_hat.append(lower)\n",
346 |     "    upper_hat.append(upper)\n",
347 |     "    \n",
348 |     "    # Deterministic metrics\n",
349 |     "    MAE = mean_absolute_error(real_y_test, mean)\n",
350 |     "    RMSE = mean_squared_error(real_y_test, mean, squared=False)\n",
351 |     "    MBE = np.mean(mean - real_y_test)\n",
352 |     "    print(f'MAE: {MAE:.3f}')\n",
353 |     "    print(f'RMSE: {RMSE:.3f}')\n",
354 |     "    print(f'MBE: {MBE:.3f}')\n",
355 |     "    \n",
356 |     "    # Probabilistic metrics\n",
357 |     "    PICP = PICP_func(real_y_test, lower[1], upper[1])\n",
358 |     "    PINAW = PINAW_func(real_y_test, lower[1], upper[1])\n",
359 |     "    C = prscore.crps_gaussian(real_y_test, mu=mean, sig=std)\n",
360 |     "    CRPS = C.mean()\n",
361 |     "    print(f'PICP: {PICP:.3f}')\n",
362 |     "    print(f'PINAW: {PINAW:.3f}')\n",
363 |     "    print(f'CRPS: {CRPS:.3f}')\n",
364 |     "    print('\\n') "
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "## SHAP"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": [
380 |     "import shap\n",
381 |     "shap.initjs()"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": null,
387 |    "metadata": {},
388 |    "outputs": [],
389 |    "source": [
390 |     "i = 0\n",
391 |     "ngb = ngbs[i]\n",
392 |     "\n",
393 |     "features = list(dfs[i].columns)[2:]\n",
394 |     "\n",
395 |     "explainer = shap.TreeExplainer(ngb, model_output=0)  # menan (point forecast): model_output=0, std (uncertainty):  model_output=1   \n",
396 |     "shap_values = explainer.shap_values(X_train[i])"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "metadata": {},
402 |    "source": [
403 |     "## SHAP Summary Plots"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": [
412 |     "%matplotlib notebook\n",
413 |     "shap.summary_plot(shap_values, X_train[i], feature_names=features, show=True, plot_size=(15,8))"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": null,
419 |    "metadata": {},
420 |    "outputs": [],
421 |    "source": [
422 |     "%matplotlib notebook\n",
423 |     "shap.summary_plot(shap_values, X_train[i], feature_names=features, show=True, plot_size=(15,8), plot_type='bar')"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "## SHAP Interaction Plots"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": null,
436 |    "metadata": {},
437 |    "outputs": [],
438 |    "source": [
439 |     "# Feature indeces:\n",
440 |     "# 0: Temperature\n",
441 |     "# 1: Humidity\n",
442 |     "# 2: precipitation\n",
443 |     "# 3: wind speed\n",
444 |     "# 4: radiation\n",
445 |     "# 5: hour\n",
446 |     "# 6: month_sin\n",
447 |     "# 7: month_cos\n",
448 |     "# 8: t-45\n",
449 |     "# 9: t-30\n",
450 |     "# 10: t-15"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "code",
455 |    "execution_count": null,
456 |    "metadata": {},
457 |    "outputs": [],
458 |    "source": [
459 |     "shap_interaction_values = explainer.shap_interaction_values(X_train[i])"
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "code",
464 |    "execution_count": null,
465 |    "metadata": {},
466 |    "outputs": [],
467 |    "source": [
468 |     "%matplotlib inline\n",
469 |     "shap.dependence_plot((10,4), shap_interaction_values, X_tr, feature_names=features, ax=ax)"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "markdown",
474 |    "metadata": {},
475 |    "source": [
476 |     "## Force plots"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": null,
482 |    "metadata": {},
483 |    "outputs": [],
484 |    "source": [
485 |     "%matplotlib notebook\n",
486 |     "shap.force_plot(explainer.expected_value, shap_values[851,:], features=features,link='logit', matplotlib=True, figsize=(10, 3),contribution_threshold=0.025 )"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "code",
491 |    "execution_count": null,
492 |    "metadata": {},
493 |    "outputs": [],
494 |    "source": [
495 |     "dfs[i].iloc[851]"
496 |    ]
497 |   }
498 |  ],
499 |  "metadata": {
500 |   "kernelspec": {
501 |    "display_name": "Python 3",
502 |    "language": "python",
503 |    "name": "python3"
504 |   },
505 |   "language_info": {
506 |    "codemirror_mode": {
507 |     "name": "ipython",
508 |     "version": 3
509 |    },
510 |    "file_extension": ".py",
511 |    "mimetype": "text/x-python",
512 |    "name": "python",
513 |    "nbconvert_exporter": "python",
514 |    "pygments_lexer": "ipython3",
515 |    "version": "3.7.10"
516 |   }
517 |  },
518 |  "nbformat": 4,
519 |  "nbformat_minor": 4
520 | }
521 | 


--------------------------------------------------------------------------------
/heatmap.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import numpy as np\n",
 11 |     "from matplotlib import pyplot as plt\n",
 12 |     "import seaborn as sns\n",
 13 |     "import time\n",
 14 |     "from datetime import datetime"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Read and preprocess the datasets"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "df1 = pd.read_csv('power_weather_data1.csv')\n",
 31 |     "df2 = pd.read_csv('power_weather_data2.csv')"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "df1['date'] = pd.to_datetime(df1['date'], format='%m/%d/%Y %H:%M')\n",
 41 |     "\n",
 42 |     "df2['date'] = pd.to_datetime(df2['date'], format='%m/%d/%Y %H:%M')"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "df1['hour'] = df1['date'].apply(lambda x: x.hour )\n",
 52 |     "df1['month'] = df1['date'].apply(lambda x: x.month)\n",
 53 |     "\n",
 54 |     "df2['hour'] = df2['date'].apply(lambda x: x.hour )\n",
 55 |     "df2['month'] = df2['date'].apply(lambda x: x.month)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "df1['month_sin'] = np.sin(df1['month'] * 2 * np.pi/12)\n",
 65 |     "df1['month_cos'] = np.cos(df1['month'] * 2 * np.pi/12)\n",
 66 |     "\n",
 67 |     "df2['month_sin'] = np.sin(df2['month'] * 2 * np.pi/12)\n",
 68 |     "df2['month_cos'] = np.cos(df2['month'] * 2 * np.pi/12)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "df1 = df1[(df1['hour']>=6) & (df1['hour']<=21)]\n",
 78 |     "\n",
 79 |     "df2 = df2[(df2['hour']>=6) & (df2['hour']<=21)]"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "df1 = df1.drop(['month'], axis=1)\n",
 89 |     "\n",
 90 |     "df2 = df2.drop(['month'], axis=1)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "max_P1 = max(df1['Power'])\n",
100 |     "max_P2 = max(df2['Power'])"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "df1['Power'] = df1['Power'] / max_P1\n",
110 |     "\n",
111 |     "P = df1['Power']\n",
112 |     "\n",
113 |     "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n",
114 |     "PowerData.columns = ['t-45', 't-30', 't-15']\n",
115 |     "\n",
116 |     "df1 = pd.concat([df1, PowerData.reindex(df1.index)], axis=1)\n",
117 |     "    \n",
118 |     "df1 = df1.fillna(0)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "df2['Power'] = df2['Power'] / max_P2\n",
128 |     "\n",
129 |     "P = df2['Power']\n",
130 |     "\n",
131 |     "PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)\n",
132 |     "PowerData.columns = ['t-45', 't-30', 't-15']\n",
133 |     "\n",
134 |     "df2 = pd.concat([df2, PowerData.reindex(df2.index)], axis=1)\n",
135 |     "    \n",
136 |     "df2 = df2.fillna(0)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "df = pd.concat([df1, df2])"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "## Heatmap"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "fig = plt.figure(figsize=(15,9))\n",
162 |     "b = sns.heatmap(df.corr(), annot=True, cmap='coolwarm')"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "## Boxenplot"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "fig = plt.figure(figsize=(12,8))\n",
179 |     "b = sns.boxenplot(x='hour', y='Power', data=df, color='green')"
180 |    ]
181 |   }
182 |  ],
183 |  "metadata": {
184 |   "kernelspec": {
185 |    "display_name": "Python 3",
186 |    "language": "python",
187 |    "name": "python3"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 3
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython3",
199 |    "version": "3.7.7"
200 |   }
201 |  },
202 |  "nbformat": 4,
203 |  "nbformat_minor": 4
204 | }
205 | 


--------------------------------------------------------------------------------