├── README.md ├── simulations └── code │ ├── .DS_Store │ ├── .Rhistory │ ├── README.md │ ├── aggregation.R │ ├── analysis.R │ ├── analysis_utils.R │ ├── bights.R │ ├── code.R │ ├── config_paths.R │ ├── main.R │ ├── mint.R │ ├── nicefigs.R │ ├── packages.R │ ├── permutations.R │ ├── runsim.sh │ ├── simulate.R │ └── utils.R └── smart-meters └── code ├── MinT_ecov.R ├── README.md ├── aggregation.R ├── aggregation_merge.R ├── basef.R ├── config_general.R ├── config_paths.R ├── config_splitting.R ├── init_1_rawmeters.R ├── init_2_MyHierarchy.R ├── init_3_bottomlevel_meters.R ├── init_4_aggregated_meters.R ├── makebf_byhalfour.R ├── multiplot.R ├── nicefigs.R ├── permutations.R ├── plot_calendar_effects.R ├── plot_coherency.R ├── plot_coverage.R ├── plot_forecasts.R ├── plot_parameters.R ├── plot_series.R ├── plot_tree.R ├── results.R ├── results_utils.R ├── run_aggregation.sh ├── run_basef.sh ├── utils.R ├── utils_ets.R ├── utils_hts.R └── utils_kde.R /README.md: -------------------------------------------------------------------------------- 1 | # Source code for *Hierarchical Probabilistic Forecasting of Electricity Demand with Smart Meter Data* by Ben Taieb, Souhaib, Taylor, James, and Hyndman, Rob. 2 | 3 | 1. The code for the simulation experiments are in [simulations](simulations/code). 4 | 5 | 2. The code for the smart meter experiments are in [smart-meters](smart-meters/code). 6 | -------------------------------------------------------------------------------- /simulations/code/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bsouhaib/prob-hts/adec3abc697340033f94782f5be6f3306b271f01/simulations/code/.DS_Store -------------------------------------------------------------------------------- /simulations/code/.Rhistory: -------------------------------------------------------------------------------- 1 | h <- 1 2 | par(mfrow = c(3, 3)) 3 | listplot <- list() 4 | for(j in seq(my_bights$nts)){ 5 | X <- sample_paths[j, , h] 6 | res <- sapply(test_results[[1]], function(mat){ 7 | mat[, j, h] 8 | }) 9 | vals <- data.frame(res, true = X) 10 | #listplot[[j]] <- ecdfplot(~base+depbu, data=vals, auto.key=list(space='right')) 11 | mycols <- c("green", "purple", "grey", "orange", "blue", "yellow", "black") 12 | plot(ecdf(vals[, 1]), cex = .2, xlim = c(min(vals), max(vals)), col = mycols[1]) 13 | for(k in seq(2, ncol(vals))){ 14 | lines(ecdf(vals[, k]), col = mycols[k], cex = .2) 15 | } 16 | #Xpred <- test_results[[1]]$indepbu[, j, h] 17 | #Xpred <- results$allqf[j, 1, h, ] 18 | #plot(ecdf(X), type = 'l') 19 | #lines(ecdf(Xpred), col = "red") 20 | #print(ks.test(X, Xpred)) 21 | #browser() 22 | } 23 | library(lattice) 24 | library(latticeExtra) 25 | h <- 1 26 | par(mfrow = c(2, 4)) 27 | listplot <- list() 28 | for(j in seq(my_bights$nts)){ 29 | X <- sample_paths[j, , h] 30 | res <- sapply(test_results[[1]], function(mat){ 31 | mat[, j, h] 32 | }) 33 | vals <- data.frame(res, true = X) 34 | #listplot[[j]] <- ecdfplot(~base+depbu, data=vals, auto.key=list(space='right')) 35 | mycols <- c("green", "purple", "grey", "orange", "blue", "yellow", "black") 36 | plot(ecdf(vals[, 1]), cex = .2, xlim = c(min(vals), max(vals)), col = mycols[1]) 37 | for(k in seq(2, ncol(vals))){ 38 | lines(ecdf(vals[, k]), col = mycols[k], cex = .2) 39 | } 40 | #Xpred <- test_results[[1]]$indepbu[, j, h] 41 | #Xpred <- results$allqf[j, 1, h, ] 42 | #plot(ecdf(X), type = 'l') 43 | #lines(ecdf(Xpred), col = "red") 44 | #print(ks.test(X, Xpred)) 45 | #browser() 46 | } 47 | source("newmain.R") 48 | source("newmain.R") 49 | plot(quantile_forecasts) 50 | quantile_forecasts 51 | res <- do.call(forecast, c(list(object = model, h = H), param_forecast)) 52 | mf[i, ] <- res$mean 53 | future[i, ] <- series[ts_split[2] + seq(1, H)] 54 | mylevels <- seq(0, 98, 2) 55 | f <- forecast(model, h = 1, level = mylevels) 56 | c <- mylevels/2 57 | alpha <- 50 + c(rev(-c), c) 58 | quantile_forecasts <- c(rev(f$lower), f$upper) 59 | quantile_forecasts 60 | traceback() 61 | param_forecast 62 | qlow <- res$lower[, seq(ncol(res$lower), 1)] 63 | qup <- res$upper[, -1] 64 | t(cbind(qlow, qup)) 65 | param_forecast 66 | #mylevels <- seq(0, 98, 2) 67 | #f <- forecast(model, h = 1, level = mylevels) 68 | c <- param_forecast$level/2 69 | alpha <- 50 + c(rev(-c), c) 70 | alpha 71 | quantile_forecasts <- c(rev(f$lower), f$upper) 72 | res <- do.call(forecast, c(list(object = model, h = H), param_forecast)) 73 | mf[i, ] <- res$mean 74 | future[i, ] <- series[ts_split[2] + seq(1, H)] 75 | #mylevels <- seq(0, 98, 2) 76 | #f <- forecast(model, h = 1, level = mylevels) 77 | c <- param_forecast$level/2 78 | alpha <- 50 + c(rev(-c), c) 79 | #mylevels <- seq(0, 98, 2) 80 | #f <- forecast(model, h = 1, level = mylevels) 81 | gamma <- param_forecast$level/2 82 | alpha <- 50 + c(rev(-gamma), gamma) 83 | f <- do.call(forecast, c(list(object = model, h = H), param_forecast)) 84 | mf[i, ] <- res$mean 85 | future[i, ] <- series[ts_split[2] + seq(1, H)] 86 | #mylevels <- seq(0, 98, 2) 87 | #f <- forecast(model, h = 1, level = mylevels) 88 | gamma <- param_forecast$level/2 89 | alpha <- 50 + c(rev(-gamma), gamma) 90 | quantile_forecasts <- c(rev(f$lower), f$upper) 91 | quantile_forecasts 92 | dim(f$lower) 93 | cbind(apply(f$lower, 1, rev), f$upper) 94 | dim(f$upper) 95 | dim(apply(f$lower, 1, rev)) 96 | x <- cbind(t(apply(f$lower, 1, rev)), f$upper) 97 | dim(x) 98 | x[1, ] 99 | dim(f$lower) 100 | qf <- sapply(seq(H), function(h){ 101 | c(rev(f$lower[h, ]), f$upper[h, ]) 102 | }) 103 | dim(qf) 104 | qf[, 1] 105 | plot(qf[, 1]) 106 | plot(qf[, 1], alpha) 107 | alpha 108 | alpha/100 109 | alpha <- (50 + c(rev(-gamma), gamma))/100 110 | alpha 111 | res <- do.call(forecast, c(list(object = model, h = H), param_forecast)) 112 | qlow <- res$lower[, seq(ncol(res$lower), 1)] 113 | qup <- res$upper[, -1] 114 | qf[, , i] <- t(cbind(qlow, qup)) 115 | dim(t(cbind(qlow, qup))) 116 | source("newmain.R") 117 | source("newmain.R") 118 | source("newmain.R") 119 | dim(quantf) 120 | dim(qf[, , i] ) 121 | f$upper[h, ] 122 | f$upper 123 | h 124 | h <- 1 125 | f$upper 126 | f$upper[h, ] 127 | source("newmain.R") 128 | dim(quantf) 129 | quantf 130 | dim(quantf) 131 | dim(qf[, , i] ) 132 | source("newmain.R") 133 | source("newmain.R") 134 | source("newmain.R") 135 | dim(test_results[[1]]$mintdiagonal) 136 | if(task == "scoring"){ 137 | err_test <- lapply(seq(H), function(h){ 138 | lapply(seq(n_test), function(itest){ # itest 139 | lapply(test_results[[itest]], function(mat_method){ # imethod 140 | obs <- results$allfuture[, itest, h] 141 | X <- mat_method[, , h] 142 | crps <- compute_crps(X, obs) 143 | squared_erors <- (apply(X, 2, mean) - obs)^2 144 | qs <- compute_qscores(X, obs) 145 | qs_tails <- apply(qs, 2, function(x){ mean(x * weights_tails) }) 146 | qs_uniform <- apply(qs, 2, function(x){ mean(x * weights_uniform) }) 147 | qs_rtail <- apply(qs, 2, function(x){ mean(x * weights_rtail) }) 148 | qs_ltail <- apply(qs, 2, function(x){ mean(x * weights_ltail) }) 149 | # DO NOT SAVE qs (too big) 150 | list(crps = crps, squared_erors = squared_erors, qs_tails = qs_tails, qs_rtail = qs_rtail, qs_ltail = qs_ltail, qs_uniform = qs_uniform) 151 | }) 152 | }) 153 | }) 154 | list_simulations[[i]] <- err_test 155 | }else if(task == "recovery"){} 156 | fpkg_levels 157 | do.recovery <- TRUE 158 | obj.path <- ifelse(do.recovery, list(npaths = 500), NULL) 159 | obj.path 160 | source("newmain.R") 161 | T_test 162 | traceback() 163 | source("newmain.R") 164 | H 165 | source("newmain.R") 166 | load(filetosave) 167 | for(i in seq(nb_simulations)){ 168 | samples_pred <- list_simulations[[i]]$samples_pred 169 | samples_true <- list_simulations[[i]]$samples_true 170 | stop("done") 171 | } 172 | dim(samples_pred) 173 | samples_pred 174 | length(samples_pred) 175 | dim(samples_pred$mintdiagonal) 176 | dim(samples_true) 177 | samples_true <- aperm(samples_true, c(2, 1, 3)) 178 | dim(samples_true) 179 | res <- sapply(samples_pred, function(mat){ 180 | mat[, j, h] 181 | }) 182 | j <- 1 183 | samples_pred 184 | res <- sapply(samples_pred, function(mat){ 185 | mat[, j, h] 186 | }) 187 | dim(res) 188 | samples_method <- sapply(samples_pred, function(mat){ 189 | mat[, j, h] 190 | }) 191 | dim(samples_method) 192 | dim(samples_method) 193 | ks.test() 194 | lapply(seq_along(ncol(samples_method)), function(jcol){ 195 | ks.test(samples_method[, jcol], samples_true[, j, h]) 196 | browser() 197 | }) 198 | ks.test(samples_method[, jcol], samples_true[, j, h]) 199 | lapply(seq_along(ncol(samples_method)), function(jcol){ 200 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h]) 201 | browser() 202 | }) 203 | res_test 204 | str(res_test) 205 | sapply(seq_along(ncol(samples_method)), function(jcol){ 206 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h]) 207 | res_test$statisic 208 | }) 209 | jcol <- 1 210 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h]) 211 | res_Test 212 | res_test$statistic 213 | sapply(seq_along(ncol(samples_method)), function(jcol){ 214 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h]) 215 | res_test$statistic 216 | }) 217 | ncol(samples_method) 218 | sapply(seq(ncol(samples_method)), function(jcol){ 219 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h]) 220 | res_test$statistic 221 | }) 222 | load(filetosave) 223 | h <- 1 224 | mat_test <- matrix(NA, nrow = nb_simulations, ncol = 6) 225 | for(i in seq(nb_simulations)){ 226 | samples_pred <- list_simulations[[i]]$samples_pred 227 | samples_true <- list_simulations[[i]]$samples_true 228 | samples_true <- aperm(samples_true, c(2, 1, 3)) 229 | samples_method <- sapply(samples_pred, function(mat){ 230 | mat[, j, h] 231 | }) 232 | mat_test[i, ] <- sapply(seq(ncol(samples_method)), function(jcol){ 233 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h]) 234 | res_test$statistic 235 | }) 236 | } 237 | warnings() 238 | mat_test 239 | boxplot(mat_test) 240 | head(samples_method) 241 | samples_method[, "base"] 242 | load(filetosave) 243 | h <- 1 244 | mat_test <- matrix(NA, nrow = nb_simulations, ncol = 6) 245 | for(i in seq(nb_simulations)){ 246 | samples_pred <- list_simulations[[i]]$samples_pred 247 | samples_true <- list_simulations[[i]]$samples_true 248 | samples_true <- aperm(samples_true, c(2, 1, 3)) 249 | samples_method <- sapply(samples_pred, function(mat){ 250 | mat[, j, h] 251 | }) 252 | mat_test[i, ] <- sapply(seq_along(colnames(samples_method)), function(mycol){ 253 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 254 | res_test$statistic 255 | }) 256 | } 257 | mat_test 258 | ? sapply 259 | colnames(samples_method) 260 | load(filetosave) 261 | h <- 1 262 | mat_test <- matrix(NA, nrow = nb_simulations, ncol = 6) 263 | for(i in seq(nb_simulations)){ 264 | samples_pred <- list_simulations[[i]]$samples_pred 265 | samples_true <- list_simulations[[i]]$samples_true 266 | samples_true <- aperm(samples_true, c(2, 1, 3)) 267 | samples_method <- sapply(samples_pred, function(mat){ 268 | mat[, j, h] 269 | }) 270 | mat_test[i, ] <- sapply(colnames(samples_method), function(mycol){ 271 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 272 | res_test$statistic 273 | }) 274 | } 275 | mat_test 276 | colnames(samples_method) 277 | lapply(colnames(samples_method), function(mycol){ 278 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 279 | res_test$statistic 280 | }) 281 | sapply(colnames(samples_method), function(mycol){ 282 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 283 | res_test$statistic 284 | }) 285 | sapply(colnames(samples_method), function(mycol){ 286 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 287 | as.numeric(res_test$statistic) 288 | }) 289 | load(filetosave) 290 | h <- 1 291 | mat_test <- sapply(seq(nb_simulations), function(i){ 292 | samples_pred <- list_simulations[[i]]$samples_pred 293 | samples_true <- list_simulations[[i]]$samples_true 294 | samples_true <- aperm(samples_true, c(2, 1, 3)) 295 | samples_method <- sapply(samples_pred, function(mat){ 296 | mat[, j, h] 297 | }) 298 | sapply(colnames(samples_method), function(mycol){ 299 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 300 | as.numeric(res_test$statistic) 301 | }) 302 | }) 303 | mat_test 304 | mat_test <- t(mat_test) 305 | mat_test 306 | boxplot(mat_test) 307 | j 308 | mat_test <- sapply(seq(7), function(j){ 309 | sapply(seq(nb_simulations), function(i){ 310 | samples_pred <- list_simulations[[i]]$samples_pred 311 | samples_true <- list_simulations[[i]]$samples_true 312 | samples_true <- aperm(samples_true, c(2, 1, 3)) 313 | samples_method <- sapply(samples_pred, function(mat){ 314 | mat[, j, h] 315 | }) 316 | sapply(colnames(samples_method), function(mycol){ 317 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 318 | as.numeric(res_test$statistic) 319 | }) 320 | }) 321 | }) 322 | dim(mat_test) 323 | mat_test <- sapply(seq(7), function(j){ 324 | sapply(seq(nb_simulations), function(i){ 325 | samples_pred <- list_simulations[[i]]$samples_pred 326 | samples_true <- list_simulations[[i]]$samples_true 327 | samples_true <- aperm(samples_true, c(2, 1, 3)) 328 | samples_method <- sapply(samples_pred, function(mat){ 329 | mat[, j, h] 330 | }) 331 | sapply(colnames(samples_method), function(mycol){ 332 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 333 | as.numeric(res_test$statistic) 334 | }) 335 | }) 336 | }, simplify = "array") 337 | dim(mat_test) 338 | mat_test <- sapply(seq(7), function(j){ 339 | sapply(seq(nb_simulations), function(i){ 340 | samples_pred <- list_simulations[[i]]$samples_pred 341 | samples_true <- list_simulations[[i]]$samples_true 342 | samples_true <- aperm(samples_true, c(2, 1, 3)) 343 | samples_method <- sapply(samples_pred, function(mat){ 344 | mat[, j, h] 345 | }) 346 | t(sapply(colnames(samples_method), function(mycol){ 347 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 348 | as.numeric(res_test$statistic) 349 | })) 350 | }) 351 | }, simplify = "array") 352 | dim(mat_test) 353 | mat_test <- sapply(seq(7), function(j){ 354 | sapply(seq(nb_simulations), function(i){ 355 | samples_pred <- list_simulations[[i]]$samples_pred 356 | samples_true <- list_simulations[[i]]$samples_true 357 | samples_true <- aperm(samples_true, c(2, 1, 3)) 358 | samples_method <- sapply(samples_pred, function(mat){ 359 | mat[, j, h] 360 | }) 361 | sapply(colnames(samples_method), function(mycol){ 362 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 363 | as.numeric(res_test$statistic) 364 | }) 365 | }) 366 | }, simplify = "array") 367 | dim(mat_test) 368 | mat_test <- aperm(mat_test, c(2, 1, 3)) 369 | dim(mat_test) 370 | par(mfrow = c(2, 4)) 371 | for(j in seq(7)){ 372 | boxplot(mat_test[, , j]) 373 | } 374 | par(mfrow = c(2, 4)) 375 | for(j in seq(7)){ 376 | boxplot(mat_test[, , j]) 377 | } 378 | names(test_results[[itest]]) 379 | source("newmain.R") 380 | load(filetosave) 381 | h <- 1 382 | mat_test <- sapply(seq(7), function(j){ 383 | sapply(seq(nb_simulations), function(i){ 384 | samples_pred <- list_simulations[[i]]$samples_pred 385 | samples_true <- list_simulations[[i]]$samples_true 386 | samples_true <- aperm(samples_true, c(2, 1, 3)) 387 | samples_method <- sapply(samples_pred, function(mat){ 388 | mat[, j, h] 389 | }) 390 | sapply(colnames(samples_method), function(mycol){ 391 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 392 | as.numeric(res_test$statistic) 393 | }) 394 | }) 395 | }, simplify = "array") 396 | mat_test <- aperm(mat_test, c(2, 1, 3)) 397 | #par(mfrow = c(2, 4)) 398 | par(mfrow = c(4, 2)) 399 | for(j in seq(7)){ 400 | boxplot(mat_test[, , j]) 401 | } 402 | load(filetosave) 403 | h <- 1 404 | mat_test <- sapply(seq(7), function(j){ 405 | sapply(seq(nb_simulations), function(i){ 406 | samples_pred <- list_simulations[[i]]$samples_pred 407 | samples_true <- list_simulations[[i]]$samples_true 408 | samples_true <- aperm(samples_true, c(2, 1, 3)) 409 | samples_method <- sapply(samples_pred, function(mat){ 410 | mat[, j, h] 411 | }) 412 | sapply(colnames(samples_method), function(mycol){ 413 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 414 | as.numeric(res_test$statistic) 415 | }) 416 | }) 417 | }, simplify = "array") 418 | mat_test <- aperm(mat_test, c(2, 1, 3)) 419 | #par(mfrow = c(2, 4)) 420 | par(mfrow = c(4, 2)) 421 | for(j in seq(7)){ 422 | boxplot(mat_test[, , j]) 423 | } 424 | par(mfrow = c(3, 2)) 425 | for(j in seq(7)){ 426 | boxplot(mat_test[, , j]) 427 | } 428 | par(mfrow = c(2, 3)) 429 | for(j in seq(7)){ 430 | boxplot(mat_test[, , j]) 431 | } 432 | mat_test <- sapply(seq(7), function(j){ 433 | sapply(seq(nb_simulations), function(i){ 434 | samples_pred <- list_simulations[[i]]$samples_pred 435 | samples_true <- list_simulations[[i]]$samples_true 436 | samples_true <- aperm(samples_true, c(2, 1, 3)) 437 | samples_method <- sapply(samples_pred, function(mat){ 438 | mat[, j, h] 439 | }) 440 | sapply(colnames(samples_method), function(mycol){ 441 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 442 | #as.numeric(res_test$statistic) 443 | as.numeric(res_test$pvalue) 444 | }) 445 | }) 446 | }, simplify = "array") 447 | warnings() 448 | mat_test <- aperm(mat_test, c(2, 1, 3)) 449 | par(mfrow = c(2, 3)) 450 | for(j in seq(7)){ 451 | boxplot(mat_test[, , j]) 452 | } 453 | mat_test <- sapply(seq(7), function(j){ 454 | sapply(seq(nb_simulations), function(i){ 455 | samples_pred <- list_simulations[[i]]$samples_pred 456 | samples_true <- list_simulations[[i]]$samples_true 457 | samples_true <- aperm(samples_true, c(2, 1, 3)) 458 | samples_method <- sapply(samples_pred, function(mat){ 459 | mat[, j, h] 460 | }) 461 | sapply(colnames(samples_method), function(mycol){ 462 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 463 | #as.numeric(res_test$statistic) 464 | browser() 465 | as.numeric(res_test$pvalue) 466 | }) 467 | }) 468 | }, simplify = "array") 469 | mat_test <- sapply(seq(7), function(j){ 470 | sapply(seq(nb_simulations), function(i){ 471 | samples_pred <- list_simulations[[i]]$samples_pred 472 | samples_true <- list_simulations[[i]]$samples_true 473 | samples_true <- aperm(samples_true, c(2, 1, 3)) 474 | samples_method <- sapply(samples_pred, function(mat){ 475 | mat[, j, h] 476 | }) 477 | sapply(colnames(samples_method), function(mycol){ 478 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 479 | #as.numeric(res_test$statistic) 480 | browser() 481 | as.numeric(res_test$p.value) 482 | }) 483 | }) 484 | }, simplify = "array") 485 | mat_test <- aperm(mat_test, c(2, 1, 3)) 486 | #par(mfrow = c(2, 4)) 487 | par(mfrow = c(2, 3)) 488 | for(j in seq(7)){ 489 | boxplot(mat_test[, , j]) 490 | } 491 | mat_test <- sapply(seq(7), function(j){ 492 | sapply(seq(nb_simulations), function(i){ 493 | samples_pred <- list_simulations[[i]]$samples_pred 494 | samples_true <- list_simulations[[i]]$samples_true 495 | samples_true <- aperm(samples_true, c(2, 1, 3)) 496 | samples_method <- sapply(samples_pred, function(mat){ 497 | mat[, j, h] 498 | }) 499 | sapply(colnames(samples_method), function(mycol){ 500 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h]) 501 | #as.numeric(res_test$statistic) 502 | #browser() 503 | as.numeric(res_test$p.value) 504 | }) 505 | }) 506 | }, simplify = "array") 507 | mat_test <- aperm(mat_test, c(2, 1, 3)) 508 | #par(mfrow = c(2, 4)) 509 | par(mfrow = c(2, 3)) 510 | for(j in seq(7)){ 511 | boxplot(mat_test[, , j]) 512 | } 513 | -------------------------------------------------------------------------------- /simulations/code/README.md: -------------------------------------------------------------------------------- 1 | # Source code for the simulation experiments of the paper *Hierarchical Probabilistic Forecasting of Electricity Demand with Smart Meter Data* by Ben Taieb, Souhaib, Taylor, James, and Hyndman, Rob. 2 | 3 | 4 | The code implements all the results for the simulation experiments in Section 7. 5 | 6 | # Usage 7 | 8 | 1. Specify the scenario in *runsim.sh*, i.e. specify experiment, marg, Tlearning, M, Trueparam, etc. See runsim.sh for some examples. 9 | 2. Run the script *runsim.sh* 10 | ``` 11 | ./runsim.sh 12 | ``` 13 | 3. To produce the figures with the results, run the script *analysis.R* with the right scenario, as in step 1. 14 | ``` 15 | ./analysis.sh 16 | ``` 17 | 4. The results will then be available in the folder *work*. 18 | -------------------------------------------------------------------------------- /simulations/code/aggregation.R: -------------------------------------------------------------------------------- 1 | # This function applies the permutations to the samples generated from the marginal distributions. 2 | # In other words, it restores the dependences between the variables. 3 | permutate_samples <- function(my_bights, my_samples, list_matpermutations){ 4 | 5 | n_permutations <- unique(sapply(list_matpermutations, nrow)) 6 | stopifnot(length(n_permutations) == 1) 7 | 8 | stopifnot(nrow(my_samples) == n_permutations) 9 | 10 | itree <- my_bights$itree 11 | aggSeries <- my_bights$anames 12 | bottomSeries <- my_bights$bnames 13 | A <- my_bights$A 14 | ########## 15 | # compute the parsing order of the aggregate nodes 16 | leaves <- V(itree)[degree(itree, mode="out") == 0] 17 | agg_nodes <- V(itree)[degree(itree, mode="out") != 0] 18 | 19 | depth_aggnodes <- sapply(agg_nodes, function(agg_node){ 20 | vec <- distances(itree, agg_node, leaves, mode = "out") 21 | max( vec[which(vec!=Inf)]) 22 | }) 23 | 24 | ordered_agg_nodes_names <- names(sort(depth_aggnodes)) 25 | ordered_agg_nodes <- V(itree)[match(ordered_agg_nodes_names, V(itree)$name)] 26 | ########## 27 | 28 | base_samples <- my_samples 29 | 30 | base_samples_bottom <- my_samples[, seq(my_bights$naggts + 1, my_bights$nts)] 31 | colnames(base_samples_bottom) <- my_bights$bnames 32 | 33 | perm_samples_bottom <- base_samples_bottom 34 | variables <- colnames(perm_samples_bottom) 35 | 36 | mat_test <- NULL 37 | # PERM-BU 38 | for(inode in seq_along(ordered_agg_nodes)){ 39 | #print(inode) 40 | 41 | agg_node <- ordered_agg_nodes[inode] 42 | idseries_agg <- names(agg_node) 43 | iagg <- match(idseries_agg, aggSeries) 44 | children_nodes <- ego(itree, order = 1, nodes = agg_node, mode = "out")[[1]][-1] 45 | nkids <- length(children_nodes) 46 | 47 | mat_permutations <- list_matpermutations[[idseries_agg]] 48 | 49 | ranks_historical <- mat_permutations 50 | stopifnot(all(colnames(ranks_historical) == names(children_nodes))) 51 | 52 | depth_node <- depth_aggnodes[match(idseries_agg, names(depth_aggnodes))] 53 | 54 | samples_children <- matrix(NA, nrow = M, ncol = nkids) 55 | 56 | columns_agg <- which(children_nodes %in% agg_nodes) 57 | columns_bottom <- which(children_nodes %in% leaves) 58 | children_names <- names(children_nodes) 59 | 60 | # Extracting/computing the samples for each child 61 | if(length(columns_agg) > 0){ 62 | id_agg_children <- match(children_names[columns_agg], aggSeries) 63 | samples_agg_children <- as.matrix(t(tcrossprod(A[id_agg_children, , drop = F], perm_samples_bottom))) 64 | samples_children[, columns_agg] <- samples_agg_children 65 | } 66 | 67 | if(length(columns_bottom) > 0){ 68 | id_bottom_children <- match(children_names[columns_bottom], bottomSeries) 69 | samples_children[, columns_bottom] <- perm_samples_bottom[, id_bottom_children] 70 | } 71 | 72 | # Computing the ranks of the samples for each child 73 | ranks_samples_children <- sapply(seq(ncol(samples_children)), function(j){ 74 | rank(samples_children[, j], ties.method = "random") 75 | }) 76 | 77 | index_mat <- sapply(seq(nkids), function(j){ 78 | res <- match(ranks_historical[, j], ranks_samples_children[, j]) 79 | stopifnot(all(!is.na(res))) 80 | res 81 | }) 82 | 83 | # Permutating the rows 84 | if(length(columns_bottom) > 0){ 85 | perm_samples_bottom[, id_bottom_children] <- sapply(seq_along(id_bottom_children), function(j){ 86 | perm_samples_bottom[index_mat[, columns_bottom[j]], id_bottom_children[j]] 87 | }) 88 | } 89 | 90 | if(length(columns_agg) > 0){ 91 | res <- lapply(seq_along(id_agg_children), function(j){ 92 | id <- which(A[id_agg_children[j], ] == 1) 93 | perm_samples_bottom[index_mat[, columns_agg[j]], id, drop = F] 94 | }) 95 | ids <- lapply(id_agg_children, function(id_agg_child){ 96 | which(A[id_agg_child, ] == 1) 97 | }) 98 | ids <- unlist(ids) 99 | perm_samples_bottom[, ids] <- do.call(cbind, res) 100 | } 101 | 102 | }# agg node 103 | 104 | perm_samples_bottom 105 | } 106 | -------------------------------------------------------------------------------- /simulations/code/analysis.R: -------------------------------------------------------------------------------- 1 | # This script produces the figures with all the accuracy measures. 2 | rm(list = ls()) 3 | source("config_paths.R") 4 | source("utils.R") 5 | source("nicefigs.R") 6 | source("analysis_utils.R") 7 | library(igraph) 8 | library(kSamples) 9 | library(car) 10 | graphics.off() 11 | 12 | probs <- seq(1, 99)/100 13 | 14 | # task can be: "pvalue" "mse" "wasserstein" 15 | task <- "wasserstein" 16 | # experiment can be: "small" "newlarge" 17 | experiment <- "newlarge" 18 | 19 | # T_learn <- 10000 20 | # M <- 1000 21 | T_learn <- 500 22 | M <- 500 23 | marg <- "norm" 24 | #marg <- "t" 25 | use.trueparam <- FALSE #TRUE 26 | tag <- "jasa" 27 | set_jobs <- seq(1, 12) 28 | 29 | h <- 1 30 | all_simulations <- NULL 31 | for(idjob in set_jobs){ 32 | print(idjob) 33 | filetoload <- nameResfile(experiment, marg, T_learn, M, use.trueparam, idjob, tag) 34 | load(filetoload) 35 | all_simulations <- c(all_simulations, list_simulations) 36 | } 37 | 38 | idnull <- which(sapply(all_simulations, is.null)) 39 | if(length(idnull) > 0){ 40 | all_simulations <- all_simulations[-which(sapply(all_simulations, is.null))] 41 | } 42 | 43 | nb_simulations <- length(all_simulations) 44 | print(nb_simulations) 45 | 46 | 47 | load(file.path(rdata.folder, paste("info_", experiment, ".Rdata", sep = ""))) 48 | nts <- my_bights$nts 49 | naggts <- my_bights$naggts 50 | node_names <- names(V(my_bights$itree)) 51 | 52 | 53 | for(i in seq_along(all_simulations)){ 54 | x_mintshrink <- all_simulations[[i]]$samples_pred$mintshrink 55 | x_indepbu <- all_simulations[[i]]$samples_pred$indepbu 56 | x_indepbumintshrink <- x_indepbu 57 | 58 | mean_mintshrink <- apply(x_mintshrink, c(2, 3), mean) 59 | mean_indepbu <- apply(x_indepbu, c(2, 3), mean) 60 | 61 | nvar <- dim(x_indepbumintshrink)[1] 62 | for(k in seq(nvar)){ 63 | x_indepbumintshrink[k, , ] <- x_indepbu[k, , ] - mean_indepbu + mean_mintshrink 64 | } 65 | 66 | all_simulations[[i]]$samples_pred$indepbumintshrink <- x_indepbumintshrink 67 | } 68 | 69 | 70 | mycols <- c("red", "green" , "grey", "blue", "purple", "yellow", "orange") 71 | # "mintdiagonal" "mintshrink" "base" "depbu" "depbumint" "indepbu" "indepbumintshrink" 72 | 73 | name_methods <- c(names(all_simulations[[1]]$samples_pred), "true") 74 | 75 | id1 <- id2 <- seq(M) 76 | 77 | id_print <- c(1, 2, 30) 78 | #id_print <- c(1, 2, 30, 40, 50, 60) 79 | 80 | mat_test <- sapply(id_print, function(j){ 81 | if(j%%10 == 0) 82 | print(j) 83 | 84 | sapply(seq(nb_simulations), function(i){ 85 | 86 | samples_pred <- all_simulations[[i]]$samples_pred 87 | samples_true <- all_simulations[[i]]$samples_true 88 | samples_true <- aperm(samples_true, c(2, 1, 3)) 89 | 90 | samples_method <- sapply(samples_pred, function(mat){ 91 | mat[, j, h] 92 | }) 93 | 94 | x2 <- samples_true[, j, h] 95 | if(task == "mse"){ 96 | mu <- mean(x2) 97 | RET <- sapply(colnames(samples_method), function(mycol){ 98 | x1 <- samples_method[, mycol] 99 | (mean(x1) - mu)^2 100 | }) 101 | }else if(task == "pvalue"){ 102 | RET <- sapply(colnames(samples_method), function(mycol){ 103 | x1 <- samples_method[, mycol] 104 | ks.test(x1[id1], x2[id2])$p.value 105 | }) 106 | }else if(task == "wasserstein"){ 107 | true_quantiles <- quantile(x2, probs) 108 | RET <- sapply(colnames(samples_method), function(mycol){ 109 | x1 <- samples_method[, mycol] 110 | log(mean( (quantile(x1, probs) - true_quantiles)^2)) 111 | }) 112 | } 113 | RET 114 | }) 115 | }, simplify = "array") 116 | 117 | mat_test <- aperm(mat_test, c(2, 1, 3)) 118 | print(dimnames(mat_test)) 119 | 120 | 121 | ######### BOXPLOTS (pvalues, mse, wasserstein) 122 | myfile <- namePdffile(experiment, marg, T_learn, M, use.trueparam, 123 | paste(tag, "-", task,sep = "") ) 124 | 125 | if(T_learn == 500){ 126 | savepdf(myfile, width = 21 * 0.9, height = 29.7 * 0.25) 127 | par(mfrow = c(1, 3), mar=c(5.05,3,2,2)) 128 | }else{ 129 | savepdf(myfile, width = 21 * 0.9, height = 29.7 * 0.20) 130 | par(mfrow = c(1, 3), mar=c(1,3,2,2)) 131 | } 132 | 133 | 134 | for(j in seq(dim(mat_test)[3])){ 135 | mymat <- mat_test[, , j] 136 | 137 | if(T_learn == 500){ 138 | colnames(mymat) <- sapply(colnames(mymat), bettername) 139 | myxaxt <- NULL 140 | }else{ 141 | myxaxt <- 'n' 142 | } 143 | 144 | if(task != "mse"){ 145 | if(j == 1){ 146 | mymain <- "Level 1 (top level)" 147 | }else if(j == 3){ 148 | mymain <- "Level 3 (bottom level)" 149 | }else{ 150 | mymain <- "Level 2" 151 | #paste("Level ", j, ifelse(j == 3, "(bottom level)", "") ) 152 | } 153 | 154 | 155 | if(task == "pvalue"){ 156 | boxplot(mymat, las=2, ylab = "p-value", main = mymain, 157 | col = mycols, yaxt = 'n', xaxt = myxaxt, cex.axis = 1, outline = FALSE) 158 | axis(2, at = c(0, 0.25, 0.5, 0.75, 1), cex.axis = 1.2) 159 | abline(h = c(0, 0.25, 0.5, 0.75, 1), lwd = 0.1) 160 | }else{ 161 | boxplot(mymat, las=2, ylab = "2-Wasserstein distance (log scale)", 162 | main = mymain, col = mycols, xaxt = myxaxt, cex.axis = 1, outline = FALSE) 163 | } 164 | 165 | }else{ 166 | boxplot(mymat, las=2, ylab = "MSE", 167 | main = paste("Leval ", j, ifelse(j == 3, "(bottom level)", "") ), 168 | col = mycols, cex.axis = 1.2, 169 | outline = FALSE) 170 | } 171 | } 172 | 173 | dev.off() -------------------------------------------------------------------------------- /simulations/code/analysis_utils.R: -------------------------------------------------------------------------------- 1 | 2 | bettername <- function(namemethod, option = 1){ 3 | if(namemethod == "mintdiagonal"){ 4 | #RET <- "NL-MD" 5 | RET <- ifelse(option == 1, "Norm- \n MinTDiag", "Norm-MinTDiag") 6 | }else if(namemethod == "mintshrink"){ 7 | #RET <- "NL-MS" 8 | RET <- "Norm- \n MinTShrink" 9 | RET <- ifelse(option == 1, "Norm- \n MinTShrink", "Norm-MinTShrink") 10 | }else if(namemethod == "base"){ 11 | RET <- "BASE" 12 | }else if(namemethod == "depbu"){ 13 | #RET <- "DB-NM" 14 | RET <- ifelse(option == 1, "DepBU- \n NoMinT", "DepBU-NoMinT") 15 | }else if(namemethod == "depbumint"){ 16 | #RET <- "DB-MS" 17 | RET <- ifelse(option == 1, "DepBU- \n MinTShrink", "DepBU-MinTShrink") 18 | }else if(namemethod == "indepbu"){ 19 | #RET <- "IB-NM" 20 | RET <- ifelse(option == 1, "IndepBU- \n NoMinT", "IndepBU-NoMinT") 21 | }else if(namemethod == "indepbumintshrink"){ 22 | #RET <- "IB-MS" 23 | RET <- ifelse(option == 1, "IndepBU- \n MinTShrink" , "IndepBU-MinTShrink") 24 | }else{ 25 | RET <- namemethod 26 | } 27 | RET 28 | } -------------------------------------------------------------------------------- /simulations/code/bights.R: -------------------------------------------------------------------------------- 1 | # This script defines the "bights" object, i.e. the big hierarchical time series, with associated functions. 2 | make.data <- function(obj_bights, list_subsets, H){ 3 | list_basef <- lapply(list_subsets, function(ts_split){ 4 | subseries(obj_bights, ts_split = ts_split, H = H, do.forecast = T, keep.data = F) 5 | }) 6 | browser() 7 | Yhat <- simplify2array(lapply(list_basef, "[[", "predictions")) 8 | Y <- simplify2array(lapply(list_basef, "[[", "future_data")) 9 | list(Yhat = Yhat, Y = Y) 10 | } 11 | 12 | bights <- function(bts, A) { 13 | nbts <- ncol(bts) 14 | naggts <- nrow(A) 15 | nts <- naggts + nbts 16 | Tobs <- nrow(bts) 17 | 18 | A <- methods::as(A, "sparseMatrix") 19 | S <- rbind(A, Diagonal(nbts)) 20 | S <- methods::as(S, "sparseMatrix") 21 | 22 | yts <- matrix(NA, nrow = nrow(bts), ncol = nts) 23 | 24 | if (nbts <= 1L) { 25 | stop("Argument bts must be a multivariate time series.", call. = FALSE) 26 | } 27 | 28 | yts[, seq(naggts)] <- as.matrix(t(A %*% t(bts))) 29 | yts[, seq(naggts + 1, nts)] <- bts 30 | 31 | output <- structure( 32 | list(yts = yts, A = A, S = S, nbts = nbts, naggts = naggts, nts = nts, Tobs = Tobs), 33 | class = c("bights") 34 | ) 35 | return(output) 36 | } 37 | 38 | 39 | subseries <- function(bights, ts_split, H = 1, do.forecast = FALSE, keep.data = FALSE) { 40 | 41 | learn_data <- bights$yts[seq(ts_split[1], ts_split[2]), ] 42 | 43 | interval_future <- ts_split[2] + seq(1, H) 44 | 45 | future_data <- bights$yts[interval_future, , drop = F] 46 | 47 | predictions <- NULL 48 | if(do.forecast){ 49 | predictions <- sapply(seq(bights$nts), function(j){ 50 | model <- fit_fct(learn_data[, j], "ARIMA") 51 | forecast(model, h = H)$mean 52 | }) 53 | } 54 | 55 | output <- list(future_data = future_data, predictions = predictions) 56 | 57 | 58 | if(keep.data) 59 | output <- c(output, list(learn_data = learn_data)) 60 | 61 | return(output) 62 | } 63 | -------------------------------------------------------------------------------- /simulations/code/code.R: -------------------------------------------------------------------------------- 1 | # This script defines some functions to generate the base forecasts. 2 | makebf <- function(obj_bights, list_subsets, H, config_forecast_agg, config_forecast_bot, refit_step, mc.cores = 1){ 3 | n <- obj_bights$nts 4 | m <- obj_bights$nbts 5 | results <- vector("list", n) 6 | 7 | results <- mclapply(seq(n), function(j){ 8 | if(j <= n-m){ 9 | config_forecast <- config_forecast_agg 10 | }else{ 11 | config_forecast <- config_forecast_bot 12 | } 13 | 14 | rolling.forecast(obj_bights$yts[, j], list_subsets, H, config_forecast, refit_step = refit_step, j) 15 | }, mc.cores = mc.cores) 16 | 17 | allmf <- simplify2array(lapply(results, "[[", "mf")) 18 | allfuture <- simplify2array(lapply(results, "[[", "future")) 19 | allqf <- lapply(results, "[[", "list_qf") 20 | allresiduals <- simplify2array(lapply(results, "[[", "e_residuals")) 21 | 22 | allmf <- aperm(allmf, c(3, 1, 2)) 23 | allfuture <- aperm(allfuture, c(3, 1, 2)) 24 | allresiduals <- t(allresiduals) 25 | 26 | list(allmf = allmf, allfuture = allfuture, allqf = allqf, allresiduals = allresiduals) 27 | 28 | } 29 | 30 | rolling.forecast <- function(series, list_subsets, H, config_forecast, refit_step, idseries){ 31 | n_subsets <- length(list_subsets) 32 | mf <- future <- matrix(NA, nrow = n_subsets, ncol = H) 33 | list_qf <- vector("list", n_subsets) 34 | 35 | fit_fct <- config_forecast$fit_fct 36 | refit_fct <- config_forecast$refit_fct 37 | param_fit_fct <- config_forecast$param_fit_fct 38 | param_refit_fct <- config_forecast$param_refit_fct 39 | param_forecast <- config_forecast$param_forecast 40 | 41 | 42 | for(i in seq(n_subsets)){ 43 | ts_split <- list_subsets[[i]] 44 | if(is.ts(series)){ 45 | learn_series <- subset(series, start = ts_split[1], end = ts_split[2]) 46 | }else{ 47 | learn_series <- series[seq(ts_split[1], ts_split[2])] 48 | } 49 | 50 | future[i, ] <- series[ts_split[2] + seq(1, H)] 51 | 52 | 53 | if( (i-1) %% refit_step == 0){ 54 | if(use.trueparam && idseries > my_bights$naggts){ 55 | ar_param <- obj_simul$param$ar_param[[idseries - my_bights$naggts]] 56 | ma_param <- obj_simul$param$ma_param[[idseries - my_bights$naggts]] 57 | stopifnot(!is.null(ar_param) && !is.null(ma_param)) 58 | model <- Arima(y = learn_series, order = c(length(ar_param), 0, length(ma_param)), 59 | fixed = c(ar_param, ma_param, 0)) 60 | }else{ 61 | model <- do.call(fit_fct, c(list(y = learn_series), param_fit_fct)) 62 | } 63 | 64 | }else{ 65 | model <- do.call(refit_fct, c(list(y = learn_series, model = model), param_refit_fct)) 66 | } 67 | 68 | if(i == 1){ 69 | e_residuals <- as.numeric(resid(model)) 70 | } 71 | 72 | 73 | f <- do.call(forecast, c(list(object = model, h = H), param_forecast)) 74 | mf[i, ] <- f$mean 75 | quantf <- sapply(seq(H), function(h){ 76 | c(rev(f$lower[h, ]), f$upper[h, ]) 77 | }) 78 | list_qf[[i]] <- quantf 79 | 80 | #nsamples <- param_forecast$npaths 81 | #f <- t(replicate(nsamples, simulate(model, bootstrap = do.bootstrap.residuals, nsim = H, future = T))) 82 | #mf[i, ] <- apply(f, 2, mean) 83 | #list_qf[[i]] <- f 84 | } 85 | 86 | output <- list(future = future, mf = mf, list_qf = list_qf, e_residuals = e_residuals) 87 | } 88 | 89 | 90 | makeINFO <- function(tags){ 91 | myedges <- data.frame(rbind(cbind(tags[, 1], tags[, 2]), 92 | cbind(tags[, 2], tags[, 3]))) 93 | itree <- graph.data.frame(myedges) 94 | itree <- simplify(itree, remove.loops = F) 95 | 96 | 97 | # Compute A - for each agg. node, compute the associated leafs 98 | all.nodes.names <- V(itree)$name 99 | agg.nodes.names <- aggSeries <- all.nodes.names[which(degree(itree, V(itree), "out")!=0)] 100 | n_agg <- length(agg.nodes.names) 101 | 102 | bottomSeries <- tags[, ncol(tags)] 103 | n_bottom <- ncol(bts) 104 | A <- matrix(0, nrow = n_agg, ncol = n_bottom) 105 | 106 | for(i in seq_along(agg.nodes.names)){ 107 | agg.node.name <- agg.nodes.names[i] 108 | reachable <- which(shortest.paths(itree, agg.node.name, mode="out") != Inf) 109 | terminal.nodes <- reachable[which(degree(itree, reachable, mode="out") == 0)] 110 | terminal.nodes.names <- all.nodes.names[terminal.nodes] 111 | ids <- match(terminal.nodes.names, bottomSeries) 112 | stopifnot(all(!is.na(ids))) 113 | A[i, ids] <- 1 114 | } 115 | output <- list(bottomSeries = bottomSeries, aggSeries = aggSeries, itree = itree, 116 | A = A, n_agg = n_agg, n_bottom = n_bottom) 117 | return(output) 118 | } 119 | 120 | 121 | makeINFO2 <- function(tags){ 122 | myedges <- data.frame(do.call(rbind, lapply(seq(ncol(tags) - 1), function(j){ 123 | cbind(tags[, j], tags[, j+1]) 124 | }))) 125 | 126 | itree <- graph.data.frame(myedges) 127 | itree <- simplify(itree, remove.loops = F) 128 | 129 | 130 | # Compute A - for each agg. node, compute the associated leafs 131 | all.nodes.names <- V(itree)$name 132 | agg.nodes.names <- aggSeries <- all.nodes.names[which(degree(itree, V(itree), "out")!=0)] 133 | n_agg <- length(agg.nodes.names) 134 | 135 | bottomSeries <- tags[, ncol(tags)] 136 | n_bottom <- length(bottomSeries) 137 | A <- matrix(0, nrow = n_agg, ncol = n_bottom) 138 | 139 | for(i in seq_along(agg.nodes.names)){ 140 | agg.node.name <- agg.nodes.names[i] 141 | reachable <- which(shortest.paths(itree, agg.node.name, mode="out") != Inf) 142 | terminal.nodes <- reachable[which(degree(itree, reachable, mode="out") == 0)] 143 | terminal.nodes.names <- all.nodes.names[terminal.nodes] 144 | ids <- match(terminal.nodes.names, bottomSeries) 145 | stopifnot(all(!is.na(ids))) 146 | A[i, ids] <- 1 147 | } 148 | output <- list(bottomSeries = bottomSeries, aggSeries = aggSeries, itree = itree, 149 | A = A, n_agg = n_agg, n_bottom = n_bottom) 150 | return(output) 151 | } -------------------------------------------------------------------------------- /simulations/code/config_paths.R: -------------------------------------------------------------------------------- 1 | 2 | main.folder <- "../" 3 | work.folder <- file.path(main.folder, "work") 4 | rdata.folder <- file.path(work.folder , "rdata") 5 | pdf.folder <- file.path(work.folder , "pdfs") 6 | rout.folder <- file.path(work.folder, "rout") 7 | -------------------------------------------------------------------------------- /simulations/code/main.R: -------------------------------------------------------------------------------- 1 | # This is the main script which computes the base forecasts as well as all the revised forecasts from the other methods. 2 | # The arguments are: 3 | # experiment: the experiment you want to perform ("small", "newlarge") 4 | # marg: the distribution of the errors ("norm", "t") 5 | # T_learn: the size of the learning set (training + validation) 6 | # M: the number of observations per samples (when sampling from the predictive distributions) 7 | # idjob: the job id (useful when using distributed computing, see runsim.sh) 8 | # nb_simulations: the nnumber of simulations to perform 9 | # nbcores: the number of cores used by the job 10 | # use.trueparam: do you want to use the true AR parameters? (TRUE, FALSE) 11 | rm(list = ls()) 12 | assign("last.warning", NULL, envir = baseenv()) 13 | args = (commandArgs(TRUE)) 14 | if(length(args) == 0){ 15 | experiment <- "newlarge" 16 | marg <- "norm" 17 | T_learn <- 500 18 | M <- 500 19 | idjob <- 1986 20 | nb_simulations <- 1000 21 | nbcores <- 1 22 | use.trueparam <- TRUE 23 | }else{ 24 | 25 | for(i in 1:length(args)){ 26 | print(args[[i]]) 27 | } 28 | 29 | experiment <- args[[1]] 30 | marg <- args[[2]] 31 | T_learn <- as.integer(args[[3]]) 32 | M <- as.integer(args[[4]]) 33 | use.trueparam <- as.logical(args[[5]]) 34 | idjob <- as.integer(args[[6]]) 35 | nb_simulations <- as.integer(args[[7]]) 36 | nbcores <- as.integer(args[[8]]) 37 | } 38 | 39 | set.seed(idjob) 40 | 41 | graphics.off() 42 | 43 | source("config_paths.R") 44 | source("packages.R") 45 | source("bights.R") 46 | source("simulate.R") 47 | source("utils.R") 48 | source("mint.R") 49 | source("code.R") 50 | source("permutations.R") 51 | source("aggregation.R") 52 | source("nicefigs.R") 53 | 54 | tag <- "jasa" 55 | do.recovery <- TRUE 56 | 57 | fixed.dgp <- FALSE 58 | do.bootstrap.residuals <- TRUE 59 | do.correction <- FALSE 60 | 61 | print(fixed.dgp) 62 | print(use.trueparam) 63 | print(do.bootstrap.residuals) 64 | print(do.correction) 65 | 66 | if(use.trueparam){ 67 | print("True parameters are used at the bottom and the top levels !") 68 | } 69 | 70 | print(experiment) 71 | print(marg) 72 | print(M) 73 | 74 | q_probs <- runif(M) 75 | n_true_paths <- M 76 | n_forecast_paths <- M 77 | 78 | H <- 2 79 | mc.cores.basef <- nbcores 80 | refit_step <- 40 81 | T_test <- ifelse(do.recovery, H, 500) 82 | T_all <- T_learn + T_test 83 | n_warm <- 500 84 | n_simul <- T_all 85 | 86 | print(T_learn) 87 | 88 | obj.path <- NULL 89 | if(do.recovery){ 90 | obj.path <- list(npaths = n_true_paths) 91 | } 92 | 93 | config_forecast_bot <- list(fit_fct = auto.arima, refit_fct = Arima, 94 | param_fit_fct = list(seasonal = FALSE, stationary = TRUE, approximation = FALSE, ic = "bic"), # max.p = 2, max.q = 2, 95 | param_refit_fct = list(use.initial.values = TRUE), 96 | param_forecast = list( bootstrap = do.bootstrap.residuals, npaths = n_forecast_paths )) 97 | 98 | config_forecast_agg <- list(fit_fct = auto.arima, refit_fct = Arima, 99 | param_fit_fct = list(seasonal = FALSE, stationary = TRUE, approximation = FALSE, ic = "bic"), 100 | param_refit_fct = list(use.initial.values = TRUE), 101 | param_forecast = list( bootstrap = do.bootstrap.residuals, npaths = n_forecast_paths )) 102 | 103 | list_simulations <- vector("list", nb_simulations) 104 | for(i in seq_along(list_simulations)){ 105 | 106 | print(paste(i, " - Start ALL -", base::date(), sep = "")) 107 | 108 | if(experiment == "newlarge"){ 109 | obj_simul <- list() 110 | obj_simul$param <- NULL 111 | obj_simul$param$ar_param <- obj_simul$param$ma_param <- NULL 112 | 113 | ngroups <- 25 114 | list_params <- vector("list", ngroups) 115 | 116 | res <- NULL 117 | for(igroup in seq(ngroups)){ 118 | NM <- cbind(paste("T", igroup, sep = "") , 119 | c( paste("A", igroup, sep = "") , paste("B", igroup, sep = "") , 120 | paste("C", igroup, sep = "") , paste("D", igroup, sep = "") )) 121 | res <- rbind(res, NM) 122 | } 123 | res <- cbind("T", res) 124 | 125 | tags <- cbind(res[, 1], sapply(seq(2, ncol(res)), function(j)( 126 | apply(res[, seq(j)], 1, paste, collapse = "") 127 | ))) 128 | 129 | myinfo <- makeINFO2(tags) 130 | A <- myinfo$A 131 | 132 | bts <- matrix(NA, nrow = n_simul, ncol = ngroups * 4) 133 | sample_paths_bottom <- array(NA, c(H, ngroups * 4, obj.path$npaths)) 134 | for(igroup in seq(ngroups)){ 135 | res_sim <- simulate_small_hts(n_simul, marg, obj.path) 136 | obj_simul$param$ar_param <- c(obj_simul$param$ar_param, res_sim$param$ar_param) 137 | obj_simul$param$ma_param <- c(obj_simul$param$ma_param, res_sim$param$ma_param) 138 | 139 | id <- seq((igroup - 1) * 4 + 1, (igroup - 1) * 4 + 4 ) 140 | bts[, id] <- res_sim$bts 141 | sample_paths_bottom[, id, ] <- res_sim$sample_paths 142 | } 143 | 144 | }else if(experiment == "small"){ 145 | obj_simul <- simulate_small_hts(n_simul, marg, obj.path) 146 | bts <- obj_simul$bts 147 | 148 | NM <- cbind(rep("T", 4), rep(c("A", "B"), each = 2), rep(c("A", "B"), 2)) 149 | tags <- cbind(NM[, 1], sapply(seq(2, ncol(NM)), function(j)( 150 | apply(NM[, seq(j)], 1, paste, collapse = "") 151 | ))) 152 | 153 | myinfo <- makeINFO(tags) 154 | A <- myinfo$A 155 | sample_paths_bottom <- obj_simul$sample_paths 156 | 157 | } 158 | ########### 159 | my_bights <- bights(bts, A) 160 | my_bights$itree <- myinfo$itree 161 | my_bights$bnames <- myinfo$bottomSeries 162 | my_bights$anames <- myinfo$aggSeries 163 | my_bights$allnames <- c(myinfo$aggSeries, myinfo$bottomSeries) 164 | 165 | 166 | sample_paths <- sapply(seq(H), function(h){ 167 | as.matrix(my_bights$S %*% sample_paths_bottom[h, , ]) 168 | }, simplify = 'array') 169 | 170 | print(paste("Data simulated: ", " ", base::date())) 171 | 172 | P_BU <- pbu(my_bights) 173 | 174 | infofile <- file.path(rdata.folder, paste("info_", experiment, ".Rdata", sep = "")) 175 | save(file = infofile, list = c("my_bights")) 176 | 177 | list_subsets_test <- lapply(seq(T_learn, T_all - H), function(i){c(i - T_learn + 1, i)}) 178 | 179 | 180 | results <- makebf(my_bights, list_subsets_test, H = H, 181 | config_forecast_agg = config_forecast_agg, config_forecast_bot = config_forecast_bot, 182 | refit_step = refit_step, mc.cores = mc.cores.basef) 183 | 184 | e_hat <- results$allresiduals 185 | row.names(e_hat) <- my_bights$allnames 186 | e_hat <- e_hat[, sample(seq(ncol(e_hat)), M)] 187 | 188 | mint_methods <- list(mintdiagonal = "diagonal", mintshrink = "shrink") 189 | list_MINT <- lapply(mint_methods, function(wmethod){ 190 | compute_pmint(my_bights, method = wmethod, residuals = e_hat) 191 | }) 192 | 193 | 194 | # MINT 195 | res_allmint <- lapply(list_MINT, function(mylist){ 196 | res_obj <- mylist 197 | means_mint <- sapply(seq(H), function(h){ 198 | as.matrix(my_bights$S %*% res_obj$P_MINT %*% results$allmf[, , h]) 199 | }, simplify = "array") 200 | variances_mint <- diag(res_obj$V) 201 | list(means_mint = means_mint, variances_mint = variances_mint) 202 | }) 203 | 204 | e_tilde <- my_bights$S %*% list_MINT$mintshrink$P_MINT %*% e_hat 205 | 206 | # Make permutations 207 | obj_permutations_ehat <- make_permutations(my_bights, t(e_hat)) 208 | obj_permutations_etilde <- make_permutations(my_bights, t(e_tilde)) 209 | 210 | 211 | n_test <- length(list_subsets_test) 212 | test_depbu_samples <- test_base_samples <- test_mint_samples <- test_indepbu_samples <- vector("list", n_test) 213 | test_results <- vector("list", n_test) 214 | for(itest in seq(n_test)){ 215 | 216 | if(itest %% 100 == 0) 217 | print(itest) 218 | 219 | samples_almint <- lapply(res_allmint, function(res_mint){ 220 | mint_samples <- sapply(seq(my_bights$nts), function(j){ 221 | sapply(seq(H), function(h){ 222 | m <- res_mint$means_mint[j, itest, h] 223 | v <- res_mint$variances_mint[j] 224 | qnorm(q_probs, m, sqrt(v)) 225 | }) 226 | }, simplify = "array") 227 | mint_samples <- aperm(mint_samples, c(1, 3, 2)) 228 | }) 229 | 230 | test_results[[itest]] <- c(test_results[[itest]], samples_almint) 231 | 232 | # sample from all basef 233 | base_samples <- sapply(seq(my_bights$nts), function(i){ 234 | sapply(seq(H), function(h){ 235 | qf <- results$allqf[[i]][[itest]][, h] 236 | sample(qf) 237 | }) 238 | }, simplify = "array") 239 | base_samples <- aperm(base_samples, c(1, 3, 2)) 240 | 241 | test_results[[itest]]$base <- base_samples 242 | 243 | # DEP-BU AND INDEP-BU 244 | depbu_samples <- indepbu_samples <- depbumint_samples <- array(NA, dim(base_samples)) 245 | for(h in seq(H)){ 246 | samples_h <- base_samples[, , h] 247 | 248 | # DEP-BU 249 | permuted_samples_bottom <- permutate_samples(my_bights, samples_h, obj_permutations_ehat$list_matpermutations) # ATTENTION ICI 250 | tpermuted_samples_bottom <- t(permuted_samples_bottom) 251 | depbu_samples[, , h] <- as.matrix(t(my_bights$S %*% tpermuted_samples_bottom)) 252 | 253 | if(do.correction){ 254 | permuted_samples_bottom <- permutate_samples(my_bights, samples_h, obj_permutations_etilde$list_matpermutations) 255 | tpermuted_samples_bottom <- t(permuted_samples_bottom) 256 | } 257 | 258 | # DEP-BU + MINT 259 | btilde <- P_BU %*% res_allmint[["mintshrink"]]$means_mint[, itest, h] 260 | meanrevised_samples_bottom <- tpermuted_samples_bottom - apply(permuted_samples_bottom, 2, mean) + btilde 261 | if(do.correction){ 262 | meanrevised_samples_bottom <- (meanrevised_samples_bottom/apply(permuted_samples_bottom, 2, sd) ) * sqrt(P_BU %*% res_allmint[["mintshrink"]]$variances_mint) 263 | } 264 | depbumint_samples[, , h] <- as.matrix(t(my_bights$S %*% meanrevised_samples_bottom)) 265 | 266 | # INDEP-BU 267 | indepbu_samples[, , h] <- as.matrix(t(my_bights$S %*% P_BU %*% t(samples_h))) 268 | } 269 | 270 | test_results[[itest]]$depbu <- depbu_samples 271 | test_results[[itest]]$depbumint <- depbumint_samples 272 | test_results[[itest]]$indepbu <- indepbu_samples 273 | 274 | }# test 275 | 276 | 277 | 278 | if(do.recovery){ 279 | list_simulations[[i]] <- list(samples_pred = test_results[[1]], samples_true = sample_paths) 280 | }else{ 281 | err_test <- lapply(seq(H), function(h){ 282 | lapply(seq(n_test), function(itest){ # itest 283 | lapply(test_results[[itest]], function(mat_method){ # imethod 284 | obs <- results$allfuture[, itest, h] 285 | X <- mat_method[, , h] 286 | 287 | crps <- compute_crps(X, obs) 288 | 289 | squared_erors <- (apply(X, 2, mean) - obs)^2 290 | 291 | qs <- compute_qscores(X, obs) 292 | qs_tails <- apply(qs, 2, function(x){ mean(x * weights_tails) }) 293 | qs_uniform <- apply(qs, 2, function(x){ mean(x * weights_uniform) }) 294 | qs_rtail <- apply(qs, 2, function(x){ mean(x * weights_rtail) }) 295 | qs_ltail <- apply(qs, 2, function(x){ mean(x * weights_ltail) }) 296 | 297 | # DO NOT SAVE qs (too big) 298 | list(crps = crps, squared_erors = squared_erors, qs_tails = qs_tails, qs_rtail = qs_rtail, qs_ltail = qs_ltail, qs_uniform = qs_uniform) 299 | }) 300 | }) 301 | }) 302 | list_simulations[[i]] <- err_test 303 | 304 | } 305 | if(i%%5 == 0){ 306 | filetosave <- nameResfile(experiment, marg, T_learn, M, use.trueparam, idjob, tag) 307 | save(file = filetosave, list = c("list_simulations")) 308 | } 309 | 310 | } 311 | filetosave <- nameResfile(experiment, marg, T_learn, M, use.trueparam, idjob, tag) 312 | save(file = filetosave, list = c("list_simulations")) 313 | -------------------------------------------------------------------------------- /simulations/code/mint.R: -------------------------------------------------------------------------------- 1 | # All the functions needed to compute the MinT forecasts. 2 | compute_pmint <- function(objhts, method = "diagonal", residuals){ 3 | J <- Matrix(cbind(matrix(0, nrow = objhts$nbts, ncol = objhts$nts - objhts$nbts), diag(objhts$nbts)), sparse = TRUE) 4 | U <- Matrix(rbind(diag(objhts$nts - objhts$nbts), -t(objhts$A)), sparse = TRUE) 5 | P_BU <- cbind(matrix(0, objhts$nbts, objhts$nts - objhts$nbts), diag(objhts$nbts)) 6 | 7 | #if(!is.null(residuals)) 8 | # R1 <- t(residuals) 9 | 10 | R1 <- t(residuals) 11 | 12 | if(is.null(method)) 13 | method <- "diagonal" 14 | 15 | if(method == "diagonal"){ 16 | # Diagonal matrix 17 | W <- Diagonal(x = vec_w(R1)) 18 | }else if(method == "shrink"){ 19 | # Shrunk matrix 20 | target_diagonal <- lowerD(R1) 21 | shrink_results <- shrink.estim(R1, target_diagonal) 22 | W <- shrink_results$shrink.cov 23 | }else if(method == "ols"){ 24 | W <- diag(objhts$nts) 25 | }else if(method == "sample"){ 26 | n <- nrow(R1) 27 | W <- crossprod(R1) / n 28 | if(is.positive.definite(W)==FALSE) 29 | { 30 | stop("MinT needs covariance matrix to be positive definite.", call. = FALSE) 31 | } 32 | } 33 | 34 | MAT1 <- W %*% U 35 | MAT2 <- crossprod(U,MAT1) 36 | MAT3 <- tcrossprod(solve(MAT2), U) 37 | C1 <- J %*% MAT1 38 | P_MINT <- P_BU - C1 %*% MAT3 39 | 40 | S <- objhts$S 41 | n_agg <- objhts$naggts 42 | n_total <- objhts$nts 43 | V <- S %*% P_MINT %*% W %*% t(P_MINT) %*% t(S) 44 | 45 | #V_agg <- diag(V)[seq(n_agg)] 46 | #V_bot <- diag(V)[seq(n_agg + 1, n_total)] 47 | #list(V_agg = V_agg, V_bot = V_bot) 48 | 49 | 50 | list(P_MINT = P_MINT, W = W, V = V) 51 | } 52 | 53 | shrink.estim <- function(x, tar) 54 | { 55 | if (is.matrix(x) == TRUE && is.numeric(x) == FALSE) 56 | stop("The data matrix must be numeric!") 57 | p <- ncol(x) 58 | n <- nrow(x) 59 | 60 | covm <- crossprod(x) / n 61 | corm <- cov2cor(covm) 62 | xs <- scale(x, center = FALSE, scale = sqrt(diag(covm))) 63 | v <- (1/(n * (n - 1))) * (crossprod(xs^2) - 1/n * (crossprod(xs))^2) 64 | diag(v) <- 0 65 | corapn <- cov2cor(tar) 66 | d <- (corm - corapn)^2 67 | lambda <- sum(v)/sum(d) 68 | lambda <- max(min(lambda, 1), 0) 69 | shrink.cov <- lambda * tar + (1 - lambda) * covm 70 | return(list(shrink.cov = shrink.cov, lambda = lambda )) 71 | } 72 | 73 | vec_w <- function(x){ 74 | n <- nrow(x) 75 | apply(x, 2, crossprod) / n 76 | } 77 | lowerD <- function(x) 78 | { 79 | n <- nrow(x) 80 | return(diag(vec_w(x))) 81 | } 82 | -------------------------------------------------------------------------------- /simulations/code/nicefigs.R: -------------------------------------------------------------------------------- 1 | # Create nice R figures 2 | 3 | savepdf <- function(file, width=16, height=10) 4 | { 5 | .._fname <- paste(file,".pdf",sep="") 6 | pdf(.._fname, width=width/2.54, height=height/2.54, 7 | pointsize=10) 8 | par(mgp=c(2.2,0.45,0), tcl=-0.4, mar=c(3.3,3.6,1.1,1.1)) 9 | assign(".._fname",.._fname,envir=.GlobalEnv) 10 | } 11 | endpdf <- function() 12 | { 13 | dev.off() 14 | system(paste("pdfcrop",.._fname,.._fname)) 15 | } 16 | -------------------------------------------------------------------------------- /simulations/code/packages.R: -------------------------------------------------------------------------------- 1 | library(MASS) 2 | library(forecast) 3 | library(Matrix) 4 | library(SGL) 5 | library(glmnet) 6 | library(parallel) 7 | library(matrixcalc) 8 | 9 | library(igraph) 10 | library(plotrix) 11 | library(copula) 12 | library(sn) 13 | library(scoringRules) 14 | library(propagate) 15 | library(SuppDists) 16 | library(hts) -------------------------------------------------------------------------------- /simulations/code/permutations.R: -------------------------------------------------------------------------------- 1 | # This function computes the permutations needed by aggregation.R to restore the dependences between the variables of interest. 2 | make_permutations <- function(my_bights, residuals){ 3 | 4 | itree <- my_bights$itree 5 | mat_residuals <- residuals 6 | n_resid <- nrow(mat_residuals) 7 | 8 | # compute the parsing order of the aggregate nodes 9 | leaves <- V(itree)[degree(itree, mode="out") == 0] 10 | agg_nodes <- V(itree)[degree(itree, mode="out") != 0] 11 | all_nodes <- V(itree) 12 | 13 | stopifnot(all(names(all_nodes) == colnames(residuals))) 14 | 15 | list_matpermutations <- list_vecties <- vector("list", length(agg_nodes)) 16 | 17 | for(inode in seq_along(agg_nodes)){ 18 | agg_node <- agg_nodes[inode] 19 | idseries_agg <- names(agg_node) 20 | children_nodes <- ego(itree, order = 1, nodes = agg_node, mode = "out")[[1]][-1] 21 | 22 | idchildren <- match(children_nodes, all_nodes) 23 | mat_residuals <- residuals[, idchildren] 24 | 25 | vec_ties <- sapply(seq(ncol(mat_residuals)), function(j){ 26 | (nrow(mat_residuals) - length(unique(mat_residuals[, j]))) / nrow(mat_residuals) 27 | }) * 100 28 | 29 | mat_residuals <- tail(mat_residuals, M) 30 | mat_permutations <- apply(mat_residuals, 2, rank, ties.method = "random") 31 | colnames(mat_permutations) <- names(children_nodes) 32 | 33 | list_matpermutations[[inode]] <- mat_permutations 34 | list_vecties[[inode]] <- vec_ties 35 | } 36 | list_matpermutations <- setNames(list_matpermutations, names(agg_nodes)) 37 | list_vecties <- setNames(list_vecties, names(agg_nodes)) 38 | list(list_matpermutations = list_matpermutations, list_vecties = list_vecties) 39 | } 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /simulations/code/runsim.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | full_path=$(realpath $0) 4 | dir_path=$(dirname $full_path) 5 | mypath=$(dirname $dir_path ) 6 | 7 | rscript="main.R" 8 | 9 | experiment="small" && marg="norm" && Tlearning=500 && M=500 && Trueparam="FALSE" 10 | # experiment="newlarge" && marg="norm" && Tlearning=500 && M=500 && Trueparam="FALSE" 11 | # experiment="small" && marg="norm" && Tlearning=10000 && M=1000 && Trueparam="FALSE" 12 | # experiment="newlarge" && marg="norm" && Tlearning=10000 && M=1000 && Trueparam="FALSE" 13 | 14 | nbsimul=84 15 | nbcores=3 16 | alljobs=$(seq 1 12) 17 | 18 | for idjob in ${alljobs[@]} 19 | do 20 | flag="$experiment-$Tlearning-$idjob" 21 | echo "$flag" 22 | Rscript --vanilla $rscript $experiment $marg $Tlearning $M $Trueparam $idjob $nbsimul $nbcores > "$mypath/work/rout/$flag.Rout" 2> "$mypath/work/rout/$flag.err" & 23 | done 24 | -------------------------------------------------------------------------------- /simulations/code/simulate.R: -------------------------------------------------------------------------------- 1 | # This script contains all the functions to simulate the AR models as well as the true distribution of the future observations. 2 | transform_sample <- function(X, mu_theo, sd_theo){ 3 | mu <- apply(X, 2, mean) 4 | sdeviation <- apply(X, 2, sd) 5 | t( ((t(X) - mu + mu_theo)/sdeviation) * sd_theo ) 6 | } 7 | 8 | gen.innov <- function(n, marg, meanVec, covarianceMat){ 9 | 10 | mycor <- diag(diag(covarianceMat)^(-.5)) %*% covarianceMat %*% diag(diag(covarianceMat)^(-.5)) 11 | 12 | nvar <- ncol(covarianceMat) 13 | correlations <- mycor[lower.tri(mycor)] 14 | mycop <- normalCopula(correlations, dispstr = "un", dim = nvar) 15 | margins <- rep(marg, nvar) 16 | if(marg == "norm"){ 17 | parameters <- lapply(seq(nvar), function(i){ 18 | list(mean = meanVec[i], sd = sqrt(covarianceMat[i, i]) ) 19 | }) 20 | #mvrnorm(n = n_simul, mus, Sigma = Sigma) 21 | }else if(marg == "t"){ 22 | list_param <- list(df = 6) #list(df = 4) # df = 10 too big 23 | parameters <- rep(list(list_param), nvar) 24 | }else if(marg == "sn"){ 25 | list_param <- list(alpha = -5, omega = 5) 26 | parameters <- rep(list(list_param), nvar) 27 | }else if(marg == "mst"){ 28 | list_param <- list(alpha = -20, omega = 3, nu = 7) 29 | parameters <- rep(list(list_param), nvar) 30 | }else if(marg == "mixture"){ 31 | probs <- c(0.3,0.7) 32 | mus <- c(-2, 2) 33 | 34 | list_param <- list(probs = probs, mus = mus, sds = sqrt(c(1,1)) ) 35 | parameters <- rep(list(list_param), nvar) 36 | } 37 | my.mvdc <- mvdc(mycop, margins, parameters) 38 | innov <- rMvdc(n, my.mvdc) 39 | #innov <- scale(innov, scale = F) 40 | } 41 | 42 | orderGen <- function(n.bot) 43 | { 44 | 45 | order.diff <- rep(0, n.bot) 46 | order.ar <- sample(0:2, n.bot, replace = TRUE) 47 | order.ma <- sample(0:2, n.bot, replace = TRUE) 48 | order.d <- cbind(order.ar, order.diff, order.ma) 49 | 50 | 51 | ar.d <- matrix(, n.bot, 2) 52 | ma.d <- matrix(, n.bot, 2) 53 | 54 | for(j in 1:n.bot) 55 | { 56 | order.t <- order.d[j, ] 57 | 58 | # define AR coefficients 59 | ar.coeff <- c() 60 | if(order.t[1]==0) 61 | { 62 | ar.d[j, 1] <- NA 63 | } 64 | 65 | ar.coeff1 <- 0 66 | ar.coeff2 <- 0 67 | if(order.t[1]==1) 68 | { 69 | ar.coeff1 <- runif(1, 0.5, 0.7) 70 | ar.coeff <- ar.coeff1 71 | ar.d[j, 1] <- ar.coeff 72 | } 73 | if(order.t[1]==2) 74 | { 75 | ar.coeff2 <- runif(1, 0.5, 0.7) 76 | lower.ar.b <- ar.coeff2 - 0.9 77 | upper.ar.b <- 0.9 - ar.coeff2 78 | ar.coeff1 <- runif(1, lower.ar.b, upper.ar.b) 79 | ar.coeff <- c(ar.coeff1, ar.coeff2) 80 | ar.d[j, 1:2] <- ar.coeff 81 | } 82 | 83 | # define MA coefficients 84 | ma.coeff <- c() 85 | if(order.t[3]==0) 86 | { 87 | ma.d[j, 1] <- NA 88 | } 89 | ma.coeff1 <- 0 90 | ma.coeff2 <- 0 91 | if(order.t[3]==1) 92 | { 93 | ma.coeff1 <- runif(1, 0.5, 0.7) 94 | ma.coeff <- ma.coeff1 95 | ma.d[j, 1] <- ma.coeff 96 | } 97 | if(order.t[3]==2) 98 | { 99 | ma.coeff2 <- runif(1, 0.5, 0.7) 100 | lower.ma.b <- -1 * (0.9 + ma.coeff2) / ((0.9+0.7)/0.5) 101 | upper.ma.b <- -1 * lower.ma.b 102 | ma.coeff1 <- runif(1, lower.ma.b, upper.ma.b) 103 | ma.coeff <- c(ma.coeff1, ma.coeff2) 104 | ma.d[j, 1:2] <- ma.coeff 105 | } 106 | } 107 | return(list(ar.d, order.d, ma.d)) 108 | } 109 | 110 | dataGen <- function(n, n.bot, var.mat, npaths) 111 | { 112 | order.gen <- ordergenCor(n.bot) 113 | ar.d <- order.gen[[1]] 114 | order.d <- order.gen[[2]] 115 | ma.d <- order.gen[[3]] 116 | data.bot <- matrix(NA, n, n.bot) 117 | 118 | if(!is.matrix(var.mat)){ 119 | nvar <- 1 120 | Sigma <- matrix(var.mat) 121 | }else{ 122 | nvar <- n.bot 123 | Sigma <- var.mat 124 | } 125 | 126 | if(nvar == 1){ # Common pattern 127 | #innov_burnin <- rmvnorm(n_warm, mean = rep(0, nvar), sigma = Sigma) 128 | #innov_insample <- rmvnorm(n, mean = rep(0, nvar), sigma = Sigma) 129 | #innov_future <- rmvnorm(npaths * H, mean = rep(0, nvar), sigma = Sigma) 130 | 131 | innov_burnin <- matrix(rnorm(n_warm, mean = 0, sd = Sigma), ncol = 1) 132 | innov_insample <- matrix(rnorm(n, mean = 0, sd = Sigma), ncol = 1) 133 | #innov_future <- matrix(rnorm(npaths * H, mean = 0, sd = Sigma), ncol = 1) 134 | 135 | innov_future <- sapply(seq(H), function(h){ 136 | x <- matrix(rnorm(npaths, mean = 0, sd = Sigma), ncol = 1) 137 | transform_sample(x, rep(0, nvar), sqrt(diag(Sigma)) ) 138 | }, simplify = "array") 139 | 140 | }else{ 141 | 142 | allsd <- sqrt(diag(Sigma)) 143 | innov_burnin <- matrix(NA, nrow = n_warm, ncol = nvar) 144 | innov_insample <- matrix(NA, nrow = n, ncol = nvar) 145 | innov_future <- array(NA, c(npaths, nvar, H)) 146 | for(j in seq(nvar)){ 147 | innov_burnin[, j] <- rnorm(n_warm, mean = 0, sd = allsd[j]) 148 | innov_insample[, j] <- rnorm(n, mean = 0, sd = allsd[j]) 149 | innov_future[, j , ] <- matrix(rnorm(npaths * H, mean = 0, sd = allsd[j]), nrow = npaths, ncol = H) 150 | } 151 | 152 | } 153 | 154 | list_paths <- vector("list", nvar) 155 | for(j in 1:nvar) 156 | { 157 | 158 | # generating data from a ARIMA model 159 | data.bot[, j] <- arima.sim(list(order = order.d[j, ], ar = na.omit(ar.d[j, ]), ma = na.omit(ma.d[j, ])), n, 160 | n.start = n_warm, start.innov = innov_burnin[, j], 161 | innov = innov_insample[, j])[(order.d[j, 2] + 1):(n + order.d[j, 2])] 162 | 163 | y <- head(data.bot[, j], -H) # IMPORTANT 164 | model_path <- Arima(y, order = order.d[j, ], fixed = c(na.omit(ar.d[j, ]), na.omit(ma.d[j, ]), 0)) 165 | 166 | 167 | sample_paths <- sapply(seq(npaths), function(ipath){ 168 | #id <- seq((ipath - 1) * H + 1, ipath * H) 169 | #simulate(model_path, future = TRUE, nsim = H, innov = innov_future[id, j]) 170 | simulate(model_path, future = TRUE, nsim = H, innov = innov_future[ipath, j, ]) 171 | }, simplify = "array") 172 | list_paths[[j]] <- sample_paths 173 | 174 | } 175 | 176 | spaths <- simplify2array(list_paths) 177 | 178 | return(list(data.bot, order.gen, spaths)) 179 | } 180 | 181 | ordergenCor <- ordergenCom <- ordergenNoise <- orderGen 182 | data.genCom <- data.genCor <- data.genNoise <- dataGen 183 | 184 | 185 | simulate_large_hts <- function(n, obj.path){ 186 | 187 | npaths <- obj.path$npaths 188 | 189 | #nodes <- list(6, rep(4, 6), rep(4, 24), rep(4, 96), rep(4, 384)) 190 | #nodes <- list(6, rep(4, 6), rep(4, 24), rep(4, 96)) 191 | nodes <- list(6, rep(4, 6), rep(4, 24)) 192 | 193 | gmat <- hts:::GmatrixH(nodes) 194 | gmat <- apply(gmat, 1, table) 195 | n.tot <- sum(unlist(nodes)) + 1 196 | n.bot <- sum(nodes[[length(nodes)]]) 197 | 198 | 199 | 200 | # Generating data for the common pattern 201 | varCom <- 0.005 202 | #### 203 | obj.genCom <- data.genCom(n, 1, var = varCom, npaths) 204 | 205 | dataCom <- matrix(obj.genCom[[1]], ncol = 1) # ONE DIMENSIONAL 206 | allCom <- dataCom[, rep(1, times = n.bot)] 207 | # Only 2 series out of 4 at the bottom level contains the common pattern 208 | idxCom <- c(seq(1, n.bot, 4), seq(2, n.bot, 4)) 209 | allCom[, idxCom] <- 0 210 | 211 | 212 | allCom.path <- sapply(seq(npaths), function(ipath){ 213 | datComP <- matrix(obj.genCom[[3]][, ipath, 1], ncol = 1) 214 | allComP <- datComP[, rep(1, times = n.bot)] 215 | allComP[, idxCom] <- 0 216 | allComP 217 | }, simplify = "array") 218 | 219 | # Adding noise to the common pattern 220 | varRange <- list(0.4, 0.4, 0.4, 0.4, 0.4) # variance of the noise for each level (level 1 to level 5) 221 | 222 | # Generating data for the correlated pattern 223 | bCor <- runif(n.bot/4, 0.3, 0.8) # block correlations 224 | bCorMat <- lapply(bCor, function(x){ 225 | y <- matrix(x, 4, 4) 226 | diag(y) = 1 227 | return(y) 228 | }) 229 | 230 | corMatB <- bdiag(bCorMat) # correlation matrix at the bottom level 231 | varVec <- runif(n.bot, 0.05, 0.1) 232 | varMat <- as.matrix(Diagonal(x = sqrt(varVec)) %*% corMatB %*% Diagonal(x = sqrt(varVec))) # cov matrix 233 | #### 234 | 235 | obj.genCor <- data.genCor(n, n.bot, varMat, npaths) 236 | allCor <- obj.genCor[[1]] # generates data with correlated errors 237 | 238 | allCor.path <- obj.genCor[[3]] 239 | 240 | 241 | if(FALSE){ 242 | 243 | # generates white noise errors for level 1 to level 4 (level 5: bottom level ignored) 244 | noiseL <- noiseL.path <- list() 245 | for(i in 1:(length(nodes)-1)) 246 | { 247 | nodesLv <- sum(nodes[[i]]) 248 | nLv <- nodesLv / 2 249 | var.mat <- diag(nLv) 250 | diag(var.mat) <- rep(varRange[[i]], nLv) 251 | #### 252 | datL <- rmvnorm(n, rep(0, nLv), var.mat) 253 | datL <- datL[, rep(1:ncol(datL), each = 2)] # replicating to get the data for -ve part 254 | sign.vec <- rep(c(1, -1), nLv) # adding +/- into the data 255 | datL <- t(t(datL) * sign.vec / as.numeric(gmat[[i+1]])) # contribution that passes to the bottom level 256 | datL <- datL[, rep(1:ncol(datL), times = gmat[[i+1]])] # all noise series at the bottom level 257 | noiseL[[i]] <- datL 258 | #### 259 | #datL.path <- rmvnorm(n, rep(0, nLv), var.mat) 260 | noiseL.path[[i]] <- sapply(seq(npaths), function(ipath){ 261 | datLpath <- rmvnorm(H, rep(0, nLv), var.mat) 262 | datLpath <- datLpath[, rep(1:ncol(datLpath), each = 2)] # replicating to get the data for -ve part 263 | datLpath <- t(t(datLpath) * sign.vec / as.numeric(gmat[[i+1]])) # contribution that passes to the bottom level 264 | datLpath <- datLpath[, rep(1:ncol(datLpath), times = gmat[[i+1]])] # all noise series at the bottom level 265 | datLpath 266 | }, simplify = "array") 267 | 268 | } 269 | 270 | 271 | 272 | # generate ARMA series for the noise at the bottom level 273 | var.mat <- diag(n.bot/2) 274 | diag(var.mat) <- rep(varRange[[length(nodes)]], sum(nodes[[length(nodes)]])/2) 275 | #### 276 | obj.genNoise <- data.genNoise(n, n.bot/2, var.mat, npaths) 277 | 278 | noiseB <- obj.genNoise[[1]] 279 | noiseB <- noiseB[, rep(1:ncol(noiseB), each = 2)] 280 | sign.vec <- rep(c(1, -1), n.bot/2) 281 | noiseB <- t(t(noiseB) * sign.vec) 282 | noiseB[, idxCom] <- 0L # adding noise only to common component 283 | 284 | noiseB.path <- sapply(seq(npaths), function(ipath){ 285 | noiseBp <- obj.genNoise[[3]][, ipath, ] 286 | noiseBp <- noiseBp[, rep(1:ncol(noiseBp), each = 2)] 287 | sign.vec <- rep(c(1, -1), n.bot/2) 288 | noiseBp <- t(t(noiseBp) * sign.vec) 289 | noiseBp[, idxCom] <- 0L 290 | noiseBp 291 | }, simplify = "array") 292 | 293 | }# IF FALSE 294 | 295 | # common + correlated + noise 296 | if(FALSE){ 297 | allB <- allCom + allCor + Reduce("+", noiseL) + noiseB 298 | 299 | allB.path <- sapply(seq(npaths), function(ipath){ 300 | allCom.path[, , ipath] + allCor.path[, ipath, ] + Reduce("+", noiseL.path)[, , ipath] + noiseB.path[, , ipath] 301 | }, simplify = "array") 302 | sample_paths <- allB.path 303 | }else if(TRUE){ 304 | allB <- allCom + allCor 305 | allB.path <- sapply(seq(npaths), function(ipath){ 306 | allCom.path[, , ipath] + allCor.path[, ipath, ] 307 | }, simplify = "array") 308 | sample_paths <- allB.path 309 | 310 | #browser() 311 | }else if(FALSE){ 312 | 313 | varCom <- 0.005 314 | obj.genCom <- data.genCom(n, 1, var = varCom, npaths) 315 | dataCom <- matrix(obj.genCom[[1]], ncol = 1) # ONE DIMENSIONAL 316 | allCom <- dataCom[, rep(1, times = n.bot)] 317 | # Only 2 series out of 4 at the bottom level contains the common pattern 318 | idxCom <- c(seq(1, n.bot, 4), seq(2, n.bot, 4)) 319 | #allCom[, idxCom] <- 0 320 | allCom[, idxCom] <- rnorm(nrow(allCom)) 321 | allCom.path <- sapply(seq(npaths), function(ipath){ 322 | datComP <- matrix(obj.genCom[[3]][, ipath, 1], ncol = 1) 323 | allComP <- datComP[, rep(1, times = n.bot)] 324 | #allComP[, idxCom] <- 0 325 | allComP[, idxCom] <- rnorm(nrow(allComP)) 326 | allComP 327 | }, simplify = "array") 328 | 329 | allB <- allCom 330 | allB.path <- sapply(seq(npaths), function(ipath){ 331 | allCom.path[, , ipath] 332 | }, simplify = "array") 333 | sample_paths <- allB.path 334 | } 335 | 336 | 337 | 338 | 339 | dat.new <- ts(allB, frequency = 1L) 340 | sim.hts <- hts(dat.new, nodes = nodes) 341 | #ally <- allts(sim.hts) 342 | S <- smatrix(sim.hts) 343 | A <- head(S, nrow(S) - ncol(S)) 344 | 345 | list(A = A, bts = allB, sim.hts = sim.hts, sample_paths = sample_paths) 346 | } 347 | 348 | simulate_small_hts <- function(n_simul, marg = NULL, obj.path = NULL){ 349 | p <- 2; d <- 0; q <- 1 350 | 351 | ar_param <- ma_param <- vector("list", 4) 352 | for(j in seq(4)){ 353 | if(fixed.dgp){ 354 | phi_2 <- 0.5327375 #runif(1, min = 0.5, max = 0.7) 355 | phi_1 <- -0.2739356 #runif(1, min = phi_2 - 1, max = 1 - phi_2) 356 | theta_1 <- 0.5469495 #runif(1, min = 0.5, max = 0.7) 357 | }else{ 358 | phi_2 <- runif(1, min = 0.5, max = 0.7) 359 | # !!!!! cause non-stationarity runif(1, min = phi_2 - 1, max = 1 - phi_2) 360 | # ROB EXPLANATION: if not using true parameters, there is a bias and close to boundary, makes it wordse 361 | # phi_1 <- runif(1, min = phi_2 - 0.9, max = 0.9 - phi_2) 362 | phi_1 <- runif(1, min = phi_2 - 0.71, max = 0.71 - phi_2) 363 | theta_1 <- runif(1, min = 0.5, max = 0.7) 364 | } 365 | ar_param[[j]] <- c(phi_1, phi_2); 366 | ma_param[[j]] <- c(theta_1) 367 | } 368 | 369 | mean_process <- 0 370 | mus <- rep(0, 4) 371 | #a <- 3.3; 372 | #Sigma <- rbind(c(a, 3, 2, 1), c(3, a, 2, 1), c(2, 2, a, 3), c(1, 1, 3, a)) 373 | varVec <- rep(1, 4) #varVec <- rep(2, 4) 374 | 375 | corMatB <- rbind(c(1, 0.7, 0.2, 0.3), 376 | c(0.7, 1, 0.3, 0.2), 377 | c(0.2, 0.3, 1, 0.6), 378 | c(0.3, 0.2, 0.6, 1)) 379 | # corMatB <- diag(4) 380 | 381 | 382 | Sigma <- as.matrix(Diagonal(x = sqrt(varVec)) %*% corMatB %*% Diagonal(x = sqrt(varVec))) 383 | 384 | innov_insample <- gen.innov(n_simul, marg = marg, mus, Sigma) 385 | innov_burnin <- gen.innov(n_warm, marg = marg, mus, Sigma) 386 | 387 | #n.start <- p + q 388 | #start.innov <- rep(0, n.start) 389 | 390 | bts <- sapply(seq(4), function(j){ 391 | #arima.sim(n = n_simul, list(order = c(p, d, q), ar = ar_param, ma = ma_param), 392 | # n.start = n.start, start.innov = start.innov, innov = innov_insample[, j]) 393 | arima.sim(n = n_simul, list(order = c(p, d, q), ar = ar_param[[j]], ma = ma_param[[j]]), 394 | n.start = n_warm, start.innov = innov_burnin[, j], innov = innov_insample[, j]) 395 | }) 396 | 397 | if(!is.null(obj.path)){ 398 | stopifnot(T_test == H) 399 | 400 | npaths <- obj.path$npaths 401 | 402 | models_path <- lapply(seq(4), function(j){ 403 | y <- head(bts[, j], -H) # IMPORTANT !!! 404 | Arima(y, order = c(p, d, q), fixed = c(ar_param[[j]], ma_param[[j]], mean_process)) 405 | }) 406 | 407 | innov_future <- gen.innov(npaths * H, marg = marg, mus, Sigma) 408 | sample_paths <- sapply(seq(npaths), function(ipath){ 409 | #print(ipath) 410 | #innov_future <- gen.innov(H, marg = marg) 411 | id <- seq((ipath - 1) * H + 1, ipath * H) 412 | sapply(seq(4), function(j){ 413 | # simulate(models_path[[j]], future = TRUE, nsim = H, innov = innov_future[, j]) 414 | simulate(models_path[[j]], future = TRUE, nsim = H, innov = innov_future[id, j]) 415 | }) 416 | }, simplify = "array") 417 | #innov_future <- mvrnorm(n = H, mus, Sigma = Sigma) 418 | #paths <- t(replicate(npaths, simulate(model, future = TRUE, nsim = H, innov = innov_future[, j]))) 419 | 420 | }else{ 421 | sample_paths <- NULL 422 | } 423 | 424 | list(bts = bts, param = list(ar_param = ar_param, ma_param = ma_param), sample_paths = sample_paths) 425 | } 426 | 427 | 428 | 429 | 430 | -------------------------------------------------------------------------------- /simulations/code/utils.R: -------------------------------------------------------------------------------- 1 | # Some useful functions. 2 | nameResfile <- function(experiment, marg, T_learn, M, use.trueparam, idjob, tag){ 3 | file.path(rdata.folder, 4 | paste(experiment, "_", marg, "_", T_learn, "_", M, "_", use.trueparam, "_", idjob, "_", tag, ".Rdata", sep = "")) 5 | } 6 | 7 | namePdffile <- function(experiment, marg, T_learn, M, use.trueparam, tag){ 8 | file.path(pdf.folder, 9 | paste(experiment, "_", marg, "_", T_learn, "_", M, "_", use.trueparam, "_", tag, sep = "")) 10 | } 11 | 12 | pbu <- function(objhts){ 13 | P_BU <- Matrix(0, nrow = objhts$nbts, ncol = objhts$nts, sparse = T) 14 | P_BU[cbind(seq(objhts$nbts), seq(objhts$nts - objhts$nbts + 1, objhts$nts)) ] <- 1 15 | P_BU 16 | } 17 | 18 | compute_crps <- function(X, obs){ 19 | sapply(seq(ncol(X)), function(j){ 20 | crps_sample(y = obs[j], dat = X[, j]) # from ScoringRules package 21 | }) 22 | } 23 | 24 | compute_qscores <- function(X, obs){ 25 | X_sorted <- apply(X, 2, sort) 26 | sapply(seq(ncol(X)), function(j){ 27 | qf <- X_sorted[, j] 28 | 2 * ((obs[j] <= qf) - q_probs) * (qf - obs[j]) 29 | }) 30 | } -------------------------------------------------------------------------------- /smart-meters/code/MinT_ecov.R: -------------------------------------------------------------------------------- 1 | # This script computes the covariance of the one-step-ahead insample residuals (matrix W) for MinT. 2 | rm(list = ls()) 3 | source("config_paths.R") 4 | source("config_general.R") 5 | source("config_splitting.R") 6 | source("utils.R") 7 | source("utils_hts.R") 8 | 9 | library(Matrix) 10 | library(igraph) 11 | 12 | load(file.path(work.folder, "myinfo.Rdata")) 13 | 14 | n_bottom <- length(bottomSeries) 15 | n_total <- n_agg + n_bottom 16 | 17 | R_onestep <- matrix(NA, nrow = length(learn$id) - n_past_obs_kd, ncol = n_total) 18 | 19 | for(do.agg in c(TRUE, FALSE)){ 20 | if(do.agg){ 21 | set_series <- aggSeries 22 | algo <- algo.agg 23 | }else{ 24 | set_series <- bottomSeries 25 | algo <- algo.bottom 26 | } 27 | 28 | mat_residuals <- sapply(seq_along(set_series), function(j){ 29 | if(j%%100 == 0) 30 | print(j) 31 | 32 | idseries <- set_series[j] 33 | if(algo == "KD-IC-NML"){ 34 | resid_MINT_file <- file.path(insample.folder, algo, paste("residuals_MINT_", idseries, "_", algo, ".Rdata", sep = "")) 35 | load(resid_MINT_file) # residuals_MINT 36 | e_vec <- c(rep(NA, n_past_obs_kd), residuals_MINT) 37 | }else if(algo == "DYNREG" || algo == "DETS"){ 38 | resid_MINT_file <- file.path(insample.folder, algo, paste("residuals_MINT_", idseries, "_", algo, "_", 1, ".Rdata", sep = "")) 39 | load(resid_MINT_file) 40 | e_vec <- residuals_MINT 41 | } 42 | e_vec 43 | }) 44 | print(dim(mat_residuals)) 45 | mat_residuals <- tail(mat_residuals, -n_past_obs_kd) 46 | if(do.agg){ 47 | R_onestep[, seq(n_agg)] <- mat_residuals 48 | }else{ 49 | R_onestep[, seq(n_agg + 1, n_total)] <- mat_residuals 50 | } 51 | } 52 | 53 | covmethods <- c("shrink", "blockdiagonalshrink", "diagonal", "sample") 54 | 55 | for(covmethod in covmethods){ 56 | W1file <- file.path(work.folder, "wmatrices", paste("W1_", algo.agg, "_", algo.bottom, "_", covmethod, ".Rdata", sep = "")) 57 | if(covmethod == "diagonal"){ 58 | W1 <- Diagonal(x = vec_w(R_onestep)) 59 | }else if(covmethod == "sample"){ 60 | W1 <- crossprod(R_onestep) / nrow(R_onestep) 61 | }else if(covmethod == "shrink"){ 62 | target_diagonal <- lowerD(R_onestep) 63 | shrink_results <- shrink.estim(R_onestep, target_diagonal) 64 | W1 <- shrink_results$shrink.cov 65 | }else if(covmethod == "blockdiagonalshrink"){ 66 | load(file.path(work.folder, "wmatrices", paste("W1_", algo.agg, "_", algo.bottom, "_", "shrink", ".Rdata", sep = ""))) 67 | W1_bdiagonal <- W1 68 | W1_bdiagonal[seq(n_agg), seq(n_agg + 1, n_total)] <- 0 69 | W1_bdiagonal[seq(n_agg + 1, n_total), seq(n_agg)] <- 0 70 | W1 <- W1_bdiagonal 71 | }else{ 72 | stop("error !") 73 | } 74 | save(file = W1file, list = c("W1")) 75 | } 76 | 77 | if(TRUE){ 78 | allh <- tail(calendar$periodOfDay[learn$id], -n_past_obs_kd) 79 | 80 | for(h in seq(48)){ 81 | 82 | #id <- which(allh == h) 83 | seth <- c(h -1, h, h + 1) 84 | if(h == 1){ 85 | seth <- c(48, 1, 2) 86 | }else if(h == 48){ 87 | seth <- c(47, 48, 1) 88 | } 89 | stopifnot(h %in% seq(48)) 90 | 91 | id <- which(allh %in% seth) 92 | 93 | R <- R_onestep[id, ] 94 | 95 | # shrink 96 | Whfile <- file.path(work.folder, "wmatrices", paste("W_", h, "_", algo.agg, "_", algo.bottom, "_", "shrink", ".Rdata", sep = "")) 97 | target_diagonal <- lowerD(R) 98 | shrink_results <- shrink.estim(R, target_diagonal) 99 | W1 <- shrink_results$shrink.cov 100 | save(file = Whfile, list = c("W1")) 101 | 102 | # diagonal 103 | Whfile <- file.path(work.folder, "wmatrices", paste("W_", h, "_", algo.agg, "_", algo.bottom, "_", "diagonal", ".Rdata", sep = "")) 104 | W1 <- Diagonal(x = vec_w(R)) 105 | save(file = Whfile, list = c("W1")) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /smart-meters/code/README.md: -------------------------------------------------------------------------------- 1 | # Source code for the smart meters experiments of the paper *Hierarchical Probabilistic Forecasting of Electricity Demand with Smart Meter Data* by Ben Taieb, Souhaib, Taylor, James, and Hyndman, Rob. 2 | 3 | The code computes all the results in Section 6, and produces all the figures in the paper. 4 | 5 | # Usage 6 | 7 | 1. Downnload the following files: *edrp_elec.csv*, *edrp_metadata.xlsx* and *edrp_geography_data.xlsx* from https://discover.ukdataservice.ac.uk/catalogue/?sn=7591 (registration required). 8 | 2. Save these files in "../rawdata" 9 | 3. Apply a preprocessing step and save the results in new Rdata files (slow). 10 | ``` 11 | source("init_1_raw_meters.R") 12 | source("init_2_myHierarchy.R") 13 | source("init_3_bottom_meters.R") 14 | source("init_4_aggregated_meters.R") 15 | ``` 16 | 3. Specify the scenario for the bottom-level forecasts (see run_basef.sh) and run the following script (slow): 17 | ``` 18 | ./run_basef.sh 19 | ``` 20 | 3. Specify the scenario for the aggregate-level forecasts (see run_basef.sh) and run the script again (slow): 21 | ``` 22 | ./run_basef.sh 23 | ``` 24 | 4. Compute the covariance matrix W for the MinT method. 25 | ``` 26 | source("MinT_ecov.R") 27 | ``` 28 | 5. Compute the permutations (empirical copulas). 29 | ``` 30 | source("permutations.R") 31 | ``` 32 | 6. Merge the results by half hour. 33 | ``` 34 | source("makebf_byhalfhour.R") 35 | ``` 36 | 7. Compute the forecasts for the aggregated series (slow). 37 | ``` 38 | ./run_aggregation.sh 39 | ``` 40 | 8. Merge the aggregation results. 41 | ``` 42 | source("aggregation_merge.R") 43 | ``` 44 | 9. Produce the figures with all the accuracy measures. 45 | ``` 46 | source("results.R") 47 | ``` 48 | 49 | To produce all the Figures, run the following scripts: 50 | ``` 51 | source("plot_calendar_effects.R") 52 | source("plot_coherency.R") 53 | source("plot_coverage.R") 54 | source("plot_forecasts.R") 55 | source("plot_parameters.R") 56 | source("plot_series.R") 57 | source("plot_tree.R") 58 | ``` 59 | 60 | 61 | -------------------------------------------------------------------------------- /smart-meters/code/aggregation.R: -------------------------------------------------------------------------------- 1 | # This script will use the results generated by the script "permutations.R" to compute the forecasts for the aggregated series. 2 | # This script allows to distribute the computations, and compute the aggragations only for a set of observations in the test set. 3 | # By doing so, we can avoid computing it for all the 4416 observations in the test set in one shot. 4 | # This script also computes the coverage of the 50% and 90% prediction intervals, and save it in Rdata files. 5 | # The arguments are 6 | # idjob: the id of the job 7 | # allidtest: the set of observations for which we want to compute the aggregations 8 | rm(list = ls()) 9 | args = (commandArgs(TRUE)) 10 | if(length(args) == 0){ 11 | idjob <- 1 12 | allidtest <- seq(48) #1:123 #1:4 #1:1104 13 | }else{ 14 | idjob <- as.numeric(args[[1]]) 15 | allidtest <- NULL 16 | for(i in seq(2, length(args))){ 17 | allidtest <- c(allidtest, as.numeric(args[[i]])) 18 | } 19 | } 20 | source("config_paths.R") 21 | source("config_general.R") 22 | source("config_splitting.R") 23 | source("utils.R") 24 | library(igraph) 25 | library(Matrix) 26 | library(quadprog) 27 | 28 | set.seed(1986) 29 | 30 | computeParam <- function(m, v){ 31 | list(mulog = log(m/sqrt(1+(v/m^2))), sdlog = sqrt(log(1+(v/m^2)))) 32 | } 33 | 34 | compute_crps <- function(methods, n, mat_samples, observations){ 35 | res <- sapply(seq_along(methods), function(imethod){ 36 | sapply(seq(n), function(i){ 37 | crps_sampling(mat_samples[, i, imethod], observations[i]) 38 | }) 39 | }) 40 | colnames(res) <- methods 41 | res 42 | } 43 | 44 | weighted_crps <- function(qscores, weights){ 45 | sum(qscores * weights)/length(qscores) 46 | } 47 | 48 | compute_qscores <- function(methods, n, mat_samples, observations){ 49 | sorted_samples <- apply(mat_samples, c(2, 3), sort) 50 | qscores <- sapply(seq(n), function(i){ 51 | obs <- observations[i] 52 | sapply(seq_along(methods), function(imethod){ 53 | qf <- sorted_samples[, i, imethod] 54 | 2 * ((obs <= qf) - q_probs) * (qf - obs) 55 | }) 56 | }, simplify = 'array') 57 | qscores 58 | } 59 | 60 | print(allidtest) 61 | 62 | load(file.path(work.folder, "myinfo.Rdata")) 63 | n_bottom <- length(bottomSeries) 64 | 65 | bot_methods <- c("BASE", "BASE-MINTshrink", "MINTdiag", "MINTshrink") 66 | agg_methods <- c("BASE", "INDEPBU", "PERMBU", "PERMBU-MINTshrink", "INDEPBU-MINTshrink", "MINTdiag", "MINTshrink") 67 | 68 | 69 | do.mint <- any(c(grepl("MINT", bot_methods), grepl("MINT", agg_methods))) 70 | do.mintvar <- any(c(grepl("MINTdiag", bot_methods), grepl("MINTdiag", agg_methods), grepl("MINTshrink", bot_methods), grepl("MINTshrink", agg_methods))) 71 | 72 | if(do.mint){ 73 | #load(file.path(work.folder, "wmatrices", paste("W1_", algo.agg, "_", algo.bottom, "_", "shrink", ".Rdata", sep = ""))) 74 | #W1shrink <- W1 75 | 76 | #load(file.path(work.folder, "wmatrices", paste("W1_", algo.agg, "_", algo.bottom, "_", "diagonal", ".Rdata", sep = ""))) 77 | #W1diag <- W1 78 | 79 | J <- Matrix(cbind(matrix(0, nrow = n_bottom, ncol = n_agg), diag(n_bottom)), sparse = TRUE) 80 | U <- Matrix(rbind(diag(n_agg), -t(Sagg)), sparse = TRUE) 81 | } 82 | 83 | P_bu <- cbind(matrix(0, nrow = n_bottom, ncol = n_agg), diag(n_bottom)) 84 | n_total <- n_agg + n_bottom 85 | weights_GTOP <- rep(1, n_agg + n_bottom) 86 | Rmat <- diag(sqrt(weights_GTOP)) 87 | 88 | ########## 89 | # compute the parsing order of the aggregate nodes 90 | leaves <- V(itree)[degree(itree, mode="out") == 0] 91 | agg_nodes <- V(itree)[degree(itree, mode="out") != 0] 92 | 93 | depth_aggnodes <- sapply(agg_nodes, function(agg_node){ 94 | vec <- distances(itree, agg_node, leaves, mode = "out") 95 | max( vec[which(vec!=Inf)]) 96 | }) 97 | 98 | ordered_agg_nodes_names <- names(sort(depth_aggnodes)) 99 | ordered_agg_nodes <- V(itree)[match(ordered_agg_nodes_names, V(itree)$name)] 100 | ########## 101 | 102 | ntest <- length(test$id) 103 | 104 | list_crps_agg <- list_wcrps_agg <- list_qscores_agg <- list_mse_agg <- vector("list", ntest) 105 | list_crps_bot <- list_wcrps_bot <- list_mse_bot <- vector("list", ntest) 106 | 107 | list_samples_agg <- vector("list", ntest) 108 | sum_overtest_qscores_agg <- sum_overtest_qscores_bot <- 0 109 | 110 | # LOADING PERMUTATION FILE 111 | perm_file <- file.path(permutations.folder, paste("perm_", algo.agg, "_", algo.bottom, ".Rdata", sep = "")) 112 | load(perm_file) # "list_matpermutations" "list_vecties" 113 | 114 | # Generate samples 115 | for(idtest in allidtest){ 116 | print(idtest) 117 | print(base::date()) 118 | 119 | res_byidtest_file <- file.path(work.folder, "byidtest", paste("results_byidtest_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 120 | load(res_byidtest_file) 121 | 122 | iday <- getInfo(idtest)$iday 123 | hour <- getInfo(idtest)$hour 124 | 125 | for(do.agg in c(TRUE, FALSE)){ 126 | if(do.agg){ 127 | set_series <- aggSeries 128 | algo <- algo.agg 129 | base_samples_agg <- matrix(NA, nrow = M, ncol = length(set_series)) 130 | colnames(base_samples_agg) <- set_series 131 | }else{ 132 | set_series <- bottomSeries 133 | algo <- algo.bottom 134 | base_samples_bottom <- matrix(NA, nrow = M, ncol = length(set_series)) 135 | colnames(base_samples_bottom) <- set_series 136 | } 137 | for(j in seq_along(set_series)){ 138 | 139 | idseries <- set_series[j] 140 | 141 | if(do.agg){ 142 | invcdf <- approxfun(taus, QF_agg_idtest[, j], rule = 2) 143 | base_samples_agg[, j] <- invcdf(q_probs) 144 | }else{ 145 | invcdf <- approxfun(taus, QF_bottom_idtest[, j], rule = 2) 146 | base_samples_bottom[, j] <- invcdf(q_probs) 147 | } 148 | }# series 149 | 150 | }# agg and bottom 151 | 152 | # rank_X <- apply(Q, 2, rank, ties.method = "random") 153 | # I know that the rank of each observations is 1 --> M 154 | perm_samples_bottom <- base_samples_bottom 155 | variables <- colnames(perm_samples_bottom) 156 | 157 | mat_test <- NULL 158 | # PERM-BU 159 | for(inode in seq_along(ordered_agg_nodes)){ 160 | 161 | agg_node <- ordered_agg_nodes[inode] 162 | idseries_agg <- names(agg_node) 163 | iagg <- match(idseries_agg, aggSeries) 164 | children_nodes <- ego(itree, order = 1, nodes = agg_node, mode = "out")[[1]][-1] 165 | nkids <- length(children_nodes) 166 | 167 | 168 | # load permutation file 169 | mat_permutations <- list_matpermutations[[idseries_agg]] 170 | 171 | ranks_historical <- mat_permutations 172 | stopifnot(all(colnames(ranks_historical) == names(children_nodes))) 173 | 174 | depth_node <- depth_aggnodes[match(idseries_agg, names(depth_aggnodes))] 175 | 176 | samples_children <- matrix(NA, nrow = M, ncol = nkids) 177 | 178 | columns_agg <- which(children_nodes %in% agg_nodes) 179 | columns_bottom <- which(children_nodes %in% leaves) 180 | children_names <- names(children_nodes) 181 | 182 | # Extracting/computing the samples for each child 183 | if(length(columns_agg) > 0){ 184 | id_agg_children <- match(children_names[columns_agg], aggSeries) 185 | samples_agg_children <- t(tcrossprod(Sagg[id_agg_children, , drop = F], perm_samples_bottom)) 186 | samples_children[, columns_agg] <- samples_agg_children 187 | } 188 | 189 | if(length(columns_bottom) > 0){ 190 | id_bottom_children <- match(children_names[columns_bottom], bottomSeries) 191 | samples_children[, columns_bottom] <- perm_samples_bottom[, id_bottom_children] 192 | } 193 | 194 | # Computing the ranks of the samples for each child 195 | ranks_samples_children <- sapply(seq(ncol(samples_children)), function(j){ 196 | rank(samples_children[, j], ties.method = "random") 197 | }) 198 | 199 | index_mat <- sapply(seq(nkids), function(j){ 200 | res <- match(ranks_historical[, j], ranks_samples_children[, j]) 201 | stopifnot(all(!is.na(res))) 202 | res 203 | }) 204 | 205 | # Permutating the rows 206 | if(length(columns_bottom) > 0){ 207 | perm_samples_bottom[, id_bottom_children] <- sapply(seq_along(id_bottom_children), function(j){ 208 | perm_samples_bottom[index_mat[, columns_bottom[j]], id_bottom_children[j]] 209 | }) 210 | } 211 | 212 | if(length(columns_agg) > 0){ 213 | res <- lapply(seq_along(id_agg_children), function(j){ 214 | id <- which(Sagg[id_agg_children[j], ] == 1) 215 | #print(id) 216 | #print("---") 217 | perm_samples_bottom[index_mat[, columns_agg[j]], id, drop = F] 218 | }) 219 | ids <- lapply(id_agg_children, function(id_agg_child){ 220 | which(Sagg[id_agg_child, ] == 1) 221 | }) 222 | ids <- unlist(ids) 223 | perm_samples_bottom[, ids] <- do.call(cbind, res) 224 | } 225 | 226 | }# agg node 227 | 228 | ###### MINT ############ 229 | # adjustments 230 | if(do.mint){ 231 | #print("MINT") 232 | 233 | hwanted <- idtest%%48 234 | if(hwanted == 0) 235 | hwanted <- 48 236 | 237 | Whfile <- file.path(work.folder, "wmatrices", paste("W_", hwanted, "_", algo.agg, "_", algo.bottom, "_", "shrink", ".Rdata", sep = "")) 238 | load(Whfile) 239 | W1shrink <- W1 240 | 241 | Whfile <- file.path(work.folder, "wmatrices", paste("W_", hwanted, "_", algo.agg, "_", algo.bottom, "_", "diagonal", ".Rdata", sep = "")) 242 | load(Whfile) 243 | W1diag <- W1 244 | 245 | b_hat <- mean_bottom_idtest 246 | a_hat <- mean_agg_idtest 247 | 248 | y_hat <- c(a_hat, b_hat) 249 | adj_bottom_MINTshrink <- as.numeric(mint_betastar(W1shrink, y_hat = y_hat)) 250 | adj_agg_MINTshrink <- as.numeric(Sagg %*% adj_bottom_MINTshrink) 251 | revisedMINTshrink_bottom_idtest <- mean_bottom_idtest + adj_bottom_MINTshrink 252 | 253 | adj_bottom_MINTdiag <- as.numeric(mint_betastar(W1diag, y_hat = y_hat)) 254 | revisedMINTdiag_bottom_idtest <- mean_bottom_idtest + adj_bottom_MINTdiag 255 | 256 | 257 | # MINT Variance 258 | if(do.mintvar){ 259 | S <- rbind(Sagg, diag(n_bottom)) 260 | #P_shrink <- mint_pmatrix(W1shrink) 261 | #V_MINTshrink <- S %*% P_shrink %*% W1shrink %*% t(P_shrink) %*% t(S) 262 | #V_MINTshrink_agg <- diag(V_MINTshrink)[seq(n_agg)] 263 | #V_MINTshrink_bot <- diag(V_MINTshrink)[seq(n_agg + 1, n_total)] 264 | 265 | myfct <- function(W){ 266 | P <- mint_pmatrix(W) 267 | V <- S %*% P %*% W %*% t(P) %*% t(S) 268 | V_agg <- diag(V)[seq(n_agg)] 269 | V_bot <- diag(V)[seq(n_agg + 1, n_total)] 270 | list(V_agg = V_agg, V_bot = V_bot) 271 | } 272 | res_shrink <- myfct(W1shrink) 273 | V_MINTshrink_agg <- res_shrink$V_agg 274 | V_MINTshrink_bot <- res_shrink$V_bot 275 | 276 | res_diag <- myfct(W1diag) 277 | V_MINTdiag_agg <- res_diag$V_agg 278 | V_MINTdiag_bot <- res_diag$V_bot 279 | 280 | } 281 | # a_tilde_test <- Sagg %*% P_mint %*% y_hat 282 | 283 | # library(MASS) 284 | # n_bottom 285 | # Xstandard <- mvrnorm(n = M, rep(0, n_bottom), diag(n_bottom)) 286 | # s <- svd(Sigma) 287 | # b_tilde <- as.numeric(P_mint %*% t(t(y_hat))) 288 | # V_bottom <- V_mint[seq(n_agg + 1, n_total), seq(n_agg + 1, n_total)] 289 | # X <- mvrnorm(n = M, b_tilde, V_bottom) 290 | # Y <- t(Sagg %*% t(X)) 291 | 292 | } 293 | ######################## 294 | 295 | ######################## BOTTOM 296 | samples_bot <- array(NA, c(M, n_bottom, length(bot_methods))) 297 | meanf_bot <- matrix(NA, nrow = n_bottom, ncol = length(bot_methods)) 298 | for(ibot_method in seq_along(bot_methods)){ 299 | bot_method <- bot_methods[ibot_method] 300 | if(bot_method == "BASE"){ 301 | samples_bot_method <- base_samples_bottom 302 | meanf_bot_method <- mean_bottom_idtest 303 | }else if(bot_method == "BASE-MINTshrink"){ 304 | samples_bot_method <- t(t(base_samples_bottom) + adj_bottom_MINTshrink) 305 | meanf_bot_method <- revisedMINTshrink_bottom_idtest 306 | }else if(bot_method == "BASE-MCOMB"){ 307 | samples_bot_method <- t(t(base_samples_bottom) + adj_bottom_mcomb) 308 | meanf_bot_method <- revisedmean_bottom_idtest 309 | }else if(bot_method == "BASE-MCOMBRECON"){ 310 | samples_bot_method <- t(t(base_samples_bottom) + adj_bottom_mcombrecon) 311 | meanf_bot_method <- rev_and_reconcilied_bottom_mean_idtest 312 | }else if(bot_method == "MINTshrink" || bot_method == "MINTdiag"){ 313 | 314 | if(bot_method == "MINTshrink"){ 315 | meanf_bot_method <- revisedMINTshrink_bottom_idtest 316 | varf_bot_method <- V_MINTshrink_bot 317 | }else if(bot_method == "MINTdiag"){ 318 | meanf_bot_method <- revisedMINTdiag_bottom_idtest 319 | varf_bot_method <- V_MINTdiag_bot 320 | } 321 | id_negative <- which(meanf_bot_method < 0) 322 | meanf_bot_method[id_negative] <- mean_bottom_idtest[id_negative] 323 | 324 | samples_bot_method <- sapply(seq(n_bottom), function(ibot){ 325 | m <- meanf_bot_method[ibot] 326 | v <- varf_bot_method[ibot] 327 | resparam <- computeParam(m, v) 328 | qlnorm(q_probs, resparam$mulog, resparam$sdlog) 329 | }) 330 | 331 | }else{ 332 | stop("error") 333 | } 334 | samples_bot[, , ibot_method] <- samples_bot_method 335 | meanf_bot[, ibot_method] <- meanf_bot_method 336 | } 337 | 338 | # MSE 339 | mse_matrix_bottom <- (meanf_bot - obs_bottom_idtest)^2 340 | list_mse_bot[[idtest]] <- mse_matrix_bottom 341 | 342 | # CRPS 343 | botmethods_crps <- compute_crps(bot_methods, n_bottom, samples_bot, obs_bottom_idtest) 344 | list_crps_bot[[idtest]] <- botmethods_crps 345 | 346 | # QS 347 | qscores_bottom <- compute_qscores(bot_methods, n_bottom, samples_bot, obs_bottom_idtest) 348 | sum_overtest_qscores_bot <- sum_overtest_qscores_bot + qscores_bottom 349 | 350 | ######################## AGG 351 | samples_agg <- array(NA, c(M, n_agg, length(agg_methods))) 352 | meanf_agg <- matrix(NA, nrow = n_agg, ncol = length(agg_methods)) 353 | 354 | for(iagg_method in seq_along(agg_methods)){ 355 | agg_method <- agg_methods[iagg_method] 356 | if(agg_method == "BASE"){ 357 | samples_agg_method <- base_samples_agg 358 | meanf_agg_method <- mean_agg_idtest 359 | }else if(agg_method == "INDEPBU"){ 360 | samples_agg_method <- t(tcrossprod(Sagg, apply(base_samples_bottom, 2, sample))) 361 | meanf_agg_method <- (Sagg %*% mean_bottom_idtest) 362 | }else if(agg_method == "PERMBU"){ 363 | samples_agg_method <- t(tcrossprod(Sagg, perm_samples_bottom)) 364 | meanf_agg_method <- (Sagg %*% mean_bottom_idtest) 365 | #}else if(agg_method == "INDEPBU-MINT"){ 366 | # samples_agg_method <- t(t(samples_agg[, , match("INDEPBU", agg_methods)]) + adj_agg_MINT) 367 | }else if(agg_method == "PERMBU-MINTshrink"){ 368 | samples_agg_method <- t(t(samples_agg[, , match("PERMBU" , agg_methods)]) + adj_agg_MINTshrink) 369 | meanf_agg_method <- (Sagg %*% revisedMINTshrink_bottom_idtest) 370 | }else if(agg_method == "INDEPBU-MINTshrink"){ 371 | samples_agg_method <- t(t(samples_agg[, , match("INDEPBU" , agg_methods)]) + adj_agg_MINTshrink) 372 | meanf_agg_method <- (Sagg %*% revisedMINTshrink_bottom_idtest) 373 | }else if(agg_method == "PERMBU-MCOMB"){ 374 | samples_agg_method <- t(t(samples_agg[, , match("PERMBU" , agg_methods)]) + adj_agg_mcomb) 375 | meanf_agg_method <- (Sagg %*% revisedmean_bottom_idtest) 376 | }else if(agg_method == "PERMBU-MCOMBRECON"){ 377 | samples_agg_method <- t(t(samples_agg[, , match("PERMBU" , agg_methods)]) + adj_agg_mcombrecon) 378 | meanf_agg_method <- (Sagg %*% rev_and_reconcilied_bottom_mean_idtest) 379 | }else if(agg_method == "PERMBU-MCOMBUNRECON"){ 380 | samples_agg_method <- t(t(samples_agg[, , match("PERMBU" , agg_methods)]) + adj_agg_mcombunrecon) 381 | meanf_agg_method <- revisedmean_agg_idtest 382 | }else if(agg_method == "MINTdiag" || agg_method == "MINTshrink"){ 383 | 384 | if(agg_method == "MINTshrink"){ 385 | meanf_agg_method <- (Sagg %*% revisedMINTshrink_bottom_idtest) 386 | varf_agg_method <- V_MINTshrink_agg 387 | 388 | }else if(agg_method == "MINTdiag"){ 389 | meanf_agg_method <- (Sagg %*% revisedMINTdiag_bottom_idtest) 390 | varf_agg_method <- V_MINTdiag_agg 391 | } 392 | sd_agg_method <- sqrt(varf_agg_method) 393 | 394 | samples_agg_method <- sapply(seq(n_agg), function(iagg){ 395 | resparam <- computeParam(meanf_agg_method[iagg], varf_agg_method[iagg]) 396 | qlnorm(q_probs, resparam$mulog, resparam$sdlog) 397 | #qnorm(q_probs, meanf_agg_method[iagg], sd_agg_method[iagg]) 398 | }) 399 | 400 | }else{ 401 | stop("error") 402 | } 403 | samples_agg[, , iagg_method] <- samples_agg_method 404 | meanf_agg[, iagg_method] <- meanf_agg_method 405 | } 406 | 407 | # MSE 408 | mse_matrix_agg <- (meanf_agg - obs_agg_idtest)^2 409 | list_mse_agg[[idtest]] <- mse_matrix_agg 410 | 411 | # CRPS 412 | aggmethods_crps <- compute_crps(agg_methods, n_agg, samples_agg, obs_agg_idtest) 413 | list_crps_agg[[idtest]] <- aggmethods_crps 414 | 415 | # QS 416 | qscores_agg <- compute_qscores(agg_methods, n_agg, samples_agg, obs_agg_idtest) 417 | sum_overtest_qscores_agg <- sum_overtest_qscores_agg + qscores_agg 418 | 419 | weights_uniform <- 1 420 | #weights_center <- q_probs * (1 - q_probs) 421 | weights_tails_bis <- (2 * q_probs - 1)^4 422 | weights_tails <- (2 * q_probs - 1)^2 423 | weights_rtail <- q_probs^2 424 | weights_ltail <- (1 - q_probs)^2 425 | weights_matrix <- rbind(weights_uniform, weights_tails_bis, weights_tails, weights_rtail, weights_ltail) 426 | 427 | aggmethods_wcrps <- sapply(seq(n_agg), function(iagg){ 428 | sapply(seq_along(agg_methods), function(imethod){ 429 | (weights_matrix %*% qscores_agg[, imethod, iagg])/M 430 | }) 431 | }, simplify = "array") 432 | #aggmethods_wcrps <- aperm(aggmethods_wcrps, c(1, 3, 2)) 433 | list_wcrps_agg[[idtest]] <- aperm(aggmethods_wcrps, c(1, 3, 2)) 434 | 435 | # FOR BOTTOM 436 | botmethods_wcrps <- sapply(seq(n_bottom), function(ibot){ 437 | sapply(seq_along(bot_methods), function(imethod){ 438 | (weights_matrix %*% qscores_bottom[, imethod, ibot])/M 439 | }) 440 | }, simplify = "array") 441 | # botmethods_wcrps <- aperm(botmethods_wcrps, c(1, 3, 2)) 442 | list_wcrps_bot[[idtest]] <- aperm(botmethods_wcrps, c(1, 3, 2)) 443 | 444 | print(warnings()) 445 | assign("last.warning", NULL, envir = baseenv()) 446 | 447 | do.coverage <- TRUE 448 | if(do.coverage){ 449 | coverage_agg <- sapply(seq_along(agg_methods), function(imethod){ 450 | QF_agg_idtest <- apply(samples_agg[, , imethod], 2, quantile, taus) 451 | agg_90 <- (obs_agg_idtest >= QF_agg_idtest[which(taus == 0.05), ] & obs_agg_idtest <= QF_agg_idtest[which(taus == 0.95), ]) 452 | agg_50 <- (obs_agg_idtest >= QF_agg_idtest[which(taus == 0.25), ] & obs_agg_idtest <= QF_agg_idtest[which(taus == 0.75), ]) 453 | rbind(agg_50, agg_90) 454 | }, simplify = "array") 455 | 456 | coverage_bot <- sapply(seq_along(bot_methods), function(imethod){ 457 | QF_bottom_idtest <- apply(samples_bot[, , imethod], 2, quantile, taus) 458 | bot_90 <- (obs_bottom_idtest >= QF_bottom_idtest[which(taus == 0.05), ] & obs_bottom_idtest <= QF_bottom_idtest[which(taus == 0.95), ]) 459 | bot_50 <- (obs_bottom_idtest >= QF_bottom_idtest[which(taus == 0.25), ] & obs_bottom_idtest <= QF_bottom_idtest[which(taus == 0.75), ]) 460 | rbind(bot_50, bot_90) 461 | }, simplify = "array") 462 | 463 | coverage_idtest <- file.path(coverage.folder, paste("coverage_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 464 | save(file = coverage_idtest, list = c("coverage_bot", "coverage_agg")) 465 | } 466 | 467 | }# END IDTEST 468 | 469 | avg_qscores_agg <- sum_overtest_qscores_agg/length(allidtest) 470 | avg_qscores_bot <- sum_overtest_qscores_bot/length(allidtest) 471 | 472 | res_job <- file.path(loss.folder, paste("results_HTS_", algo.agg, "_", algo.bottom, "_", idjob, ".Rdata", sep = "")) 473 | #save(file = res_job, list = c("list_crps_agg", "list_crps_bot", "list_mse_bot", "list_mse_agg", "list_wcrps_agg", "list_wcrps_bot", "avg_qscores_agg", "avg_qscores_bot")) 474 | 475 | if(FALSE){ 476 | if(idjob %in% c(1)){ 477 | samples_job <- file.path(work.folder, "samples_agg", paste("samples_agg_", algo.agg, "_", algo.bottom, "_", idjob, ".Rdata", sep = "")) 478 | save(file = samples_job, list = c("list_samples_agg")) 479 | } 480 | } 481 | -------------------------------------------------------------------------------- /smart-meters/code/aggregation_merge.R: -------------------------------------------------------------------------------- 1 | # This script simply merge the results of all Rdata files produced by "aggregation.R". 2 | rm(list = ls()) 3 | source("config_paths.R") 4 | source("config_general.R") 5 | source("config_splitting.R") 6 | source("utils.R") 7 | library(igraph) 8 | 9 | load(file.path(work.folder, "myinfo.Rdata")) 10 | 11 | node_nbkids <- apply(Sagg, 1, sum) 12 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix 13 | 14 | ntest <- length(test$id) 15 | n_bottom <- length(bottomSeries) 16 | 17 | do.twentyfour <- FALSE 18 | if(do.twentyfour){ 19 | tday <- tday[seq(1, 48, 2)] 20 | } 21 | 22 | #nbperjob <- 276 23 | #nbperjob <- 69 24 | #njobs <- ntest/nbperjob 25 | 26 | #nbperjob <- 123 27 | #njobs <- 36 28 | 29 | #nbperjob <- 130 30 | #njobs <- 34 31 | 32 | #nbperjob <- 138 33 | #njobs <- 32 34 | 35 | 36 | nbperjob <- 368 37 | njobs <- 12 38 | 39 | leaves <- V(itree)[degree(itree, mode="out") == 0] 40 | agg_nodes <- V(itree)[degree(itree, mode="out") != 0] 41 | 42 | depth_aggnodes <- sapply(agg_nodes, function(agg_node){ 43 | vec <- distances(itree, agg_node, leaves, mode = "out") 44 | max( vec[which(vec!=Inf)]) 45 | }) 46 | 47 | #agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "NAIVEBU-MINT", "PERMBU-MINT") 48 | #color.agg <- c("black", "orange", "darkblue") 49 | #bot_methods <- c("BASE", "BASE-MINT") 50 | #color.bot <- c("black") 51 | 52 | #agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-MEANCOMB") 53 | #color.agg <- c("grey", "orange", "cyan", "purple", "darkblue") 54 | #bot_methods <- c("BASE", "BASE-MINT", "BASE-MEANCOMB") 55 | #color.bot <- c("black", "purple", "darkblue") 56 | 57 | #agg_methods <- c("BASE", "NAIVEBU", "PERMBU") 58 | #color.agg <- c("grey", "orange", "cyan") 59 | #bot_methods <- c("BASE", "BASE-MINT") 60 | #color.bot <- c("black", "purple") 61 | 62 | #bot_methods <- c("BASE", "BASE-MINT", "BASE-MCOMB", "BASE-MCOMBRECON") 63 | #color.bot <- c("black", "purple", "darkgreen", "darkblue") 64 | #agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-MCOMB", "PERMBU-MCOMBRECON") 65 | #color.agg <- c("grey", "orange", "cyan", "purple", "darkgreen", "darkblue") 66 | 67 | 68 | 69 | ##### JASA PAPER 70 | bot_methods <- c("BASE", "BASE-MINT", "MINTdiag", "MINTshrink") 71 | color.bot <- c("black", "purple", "red", "green") 72 | 73 | agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "NAIVEBU-MINT", "MINTdiag", "MINTshrink") 74 | color.agg <- c("black", "orange", "purple", "cyan", "pink", "red", "green") 75 | 76 | pch.agg <- c(8, 0, 2, 2, 0, 1, 3) 77 | pch.bot <- c(8, 2, 1, 3) 78 | lty.agg <- c(1, 3, 2, 2, 3, 6, 5) 79 | lty.bot <- c(1, 2, 6, 5) 80 | 81 | 82 | if(FALSE){ 83 | bot_methods <- c("BASE", "BASE-MINT", "BASE-MCOMB", "BASE-MCOMBRECON", "PROBMINT") 84 | color.bot <- c("black", "purple", "darkgreen", "darkblue", "green") 85 | agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-MCOMB", 86 | "PERMBU-MCOMBRECON", "PERMBU-MCOMBUNRECON", "NAIVEBU-MINT", "PROBMINT") 87 | color.agg <- c("black", "orange", "cyan", "purple", "darkgreen", "darkblue", "red", "pink", "green") 88 | 89 | agg_better_names <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-GTOP1", "PERMBU-GTOP2", "PERMBU-COMB") 90 | bot_better_names <- c("BASE", "PERMBU-MINT", "PERMBU-GTOP1", "PERMBU-GTOP2") 91 | } 92 | if(FALSE){ 93 | bot_methods <- c("BASE", "BASE-MINT", "BASE-MCOMB", "BASE-MCOMBRECON") 94 | color.bot <- c("black", "purple", "darkgreen", "darkblue") 95 | agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-MCOMB", 96 | "PERMBU-MCOMBRECON", "PERMBU-MCOMBUNRECON") 97 | color.agg <- c("black", "orange", "cyan", "purple", "darkgreen", "darkblue", "red") 98 | 99 | agg_better_names <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-GTOP1", "PERMBU-GTOP2", "PERMBU-COMB") 100 | bot_better_names <- c("BASE", "PERMBU-MINT", "PERMBU-GTOP1", "PERMBU-GTOP2") 101 | } 102 | 103 | 104 | 105 | wcrps_agg <- array(NA, c(5, n_agg, ntest, length(agg_methods))) 106 | crps_agg <- array(NA, c(n_agg, ntest, length(agg_methods))) 107 | 108 | wcrps_bottom <- array(NA, c(5, n_bottom, ntest, length(bot_methods))) 109 | crps_bottom <- array(NA, c(n_bottom, ntest, length(bot_methods))) 110 | 111 | mse_agg <- array(NA, c(n_agg, ntest, length(agg_methods))) 112 | mse_bottom <- array(NA, c(n_bottom, ntest, length(bot_methods))) 113 | 114 | total_qscores_agg <- total_qscores_bot <- 0 115 | 116 | for(idjob in seq(njobs)){ 117 | print(idjob) 118 | allidtest <- (idjob - 1) * nbperjob + seq(nbperjob) 119 | 120 | if(nbperjob == 123 && idjob == 36){ 121 | allidtest <- 4306:4416 122 | #allidtest <- 4291:4416 123 | } 124 | 125 | res_job <- file.path(loss.folder, paste("results_HTS_", algo.agg, "_", algo.bottom, "_", idjob, ".Rdata", sep = "")) 126 | load(res_job) 127 | 128 | # crps agg 129 | list_crps_agg_nonull <- list_crps_agg[-which(sapply(list_crps_agg, is.null))] 130 | mat_crps_agg <- sapply(seq_along(list_crps_agg_nonull), function(i){list_crps_agg_nonull[[i]]}, simplify = 'array') 131 | 132 | # wcrps agg 133 | list_wcrps_agg_nonull <- list_wcrps_agg[-which(sapply(list_wcrps_agg, is.null))] 134 | mat_wcrps_agg <- sapply(seq_along(list_wcrps_agg_nonull), function(i){list_wcrps_agg_nonull[[i]]}, simplify = 'array') 135 | 136 | # wcrps bot 137 | list_wcrps_bot_nonull <- list_wcrps_bot[-which(sapply(list_wcrps_bot, is.null))] 138 | mat_wcrps_bot <- sapply(seq_along(list_wcrps_bot_nonull), function(i){list_wcrps_bot_nonull[[i]]}, simplify = 'array') 139 | 140 | # crps bot 141 | list_crps_bot_nonull <- list_crps_bot[-which(sapply(list_crps_bot, is.null))] 142 | mat_crps_bot <- sapply(seq_along(list_crps_bot_nonull), function(i){list_crps_bot_nonull[[i]]}, simplify = 'array') 143 | 144 | 145 | # 146 | crps_bottom[, allidtest, ] <- aperm(mat_crps_bot, c(1, 3, 2)) 147 | wcrps_bottom[, , allidtest, ] <- aperm(mat_wcrps_bot, c(1, 2, 4, 3)) 148 | 149 | crps_agg[, allidtest,] <- aperm(mat_crps_agg, c(1, 3, 2)) 150 | wcrps_agg[, , allidtest,] <- aperm(mat_wcrps_agg, c(1, 2, 4, 3)) 151 | 152 | 153 | total_qscores_agg <- total_qscores_agg + avg_qscores_agg 154 | total_qscores_bot <- total_qscores_bot + avg_qscores_bot 155 | 156 | list_mse_agg_nonull <- list_mse_agg[-which(sapply(list_mse_agg, is.null))] 157 | mat_mse_agg <- sapply(seq_along(list_mse_agg_nonull), function(i){list_mse_agg_nonull[[i]]}, simplify = 'array') 158 | list_mse_bot_nonull <- list_mse_bot[-which(sapply(list_mse_bot, is.null))] 159 | mat_mse_bot <- sapply(seq_along(list_mse_bot_nonull), function(i){list_mse_bot_nonull[[i]]}, simplify = 'array') 160 | 161 | mse_agg[, allidtest,] <- aperm(mat_mse_agg, c(1, 3, 2)) 162 | mse_bottom[, allidtest,] <- aperm(mat_mse_bot, c(1, 3, 2)) 163 | } 164 | 165 | total_qscores_agg <- total_qscores_agg / njobs 166 | total_qscores_bot <- total_qscores_bot / njobs 167 | 168 | 169 | # crps_agg total_qscores_agg 170 | # crps_bottom total_qscores_bot 171 | 172 | # AGG MSE 173 | mse_agg_byhour <- sapply(seq(n_agg), function(iagg){ 174 | sapply(seq_along(agg_methods), function(imethod){ 175 | res <- apply(matrix(mse_agg[iagg, , imethod], ncol = 48, byrow = T), 2, mean) 176 | 177 | if(do.twentyfour){ 178 | res <- sapply(seq(1, 48, by = 2), function(i){ 179 | mean(res[seq(i, i+1)]) 180 | }) 181 | } 182 | res 183 | }) 184 | }, simplify = 'array') 185 | 186 | # BOT MSE 187 | mse_bot_byhour <- sapply(seq(n_bottom), function(ibot){ 188 | sapply(seq_along(bot_methods), function(imethod){ 189 | res <- apply(matrix(mse_bottom[ibot, , imethod], ncol = 48, byrow = T), 2, mean) 190 | if(do.twentyfour){ 191 | res <- sapply(seq(1, 48, by = 2), function(i){ 192 | mean(res[seq(i, i+1)]) 193 | }) 194 | } 195 | res 196 | }) 197 | }, simplify = 'array') 198 | 199 | # BOT CRPS 200 | crps_bot_byhour <- sapply(seq(n_bottom), function(ibot){ 201 | sapply(seq_along(bot_methods), function(imethod){ 202 | res <- apply(matrix(crps_bottom[ibot, , imethod], ncol = 48, byrow = T), 2, mean) 203 | if(do.twentyfour){ 204 | res <- sapply(seq(1, 48, by = 2), function(i){ 205 | mean(res[seq(i, i+1)]) 206 | }) 207 | } 208 | res 209 | }) 210 | }, simplify = 'array') 211 | 212 | # AGG CRPS 213 | crps_agg_byhour <- sapply(seq(n_agg), function(iagg){ 214 | sapply(seq_along(agg_methods), function(imethod){ 215 | res <- apply(matrix(crps_agg[iagg, , imethod], ncol = 48, byrow = T), 2, mean) 216 | if(do.twentyfour){ 217 | res <- sapply(seq(1, 48, by = 2), function(i){ 218 | mean(res[seq(i, i+1)]) 219 | }) 220 | } 221 | res 222 | }) 223 | }, simplify = 'array') 224 | 225 | # AGG WCRPS 226 | wcrps_agg_byhour <- sapply(seq(5), function(iweight){ 227 | sapply(seq(n_agg), function(iagg){ 228 | sapply(seq_along(agg_methods), function(imethod){ 229 | res <- apply(matrix(wcrps_agg[iweight, iagg, , imethod], ncol = 48, byrow = T), 2, mean) 230 | if(do.twentyfour){ 231 | res <- sapply(seq(1, 48, by = 2), function(i){ 232 | mean(res[seq(i, i+1)]) 233 | }) 234 | } 235 | res 236 | }) 237 | }, simplify = 'array') 238 | }, simplify = 'array') 239 | 240 | wcrps_agg_byhour <- aperm(wcrps_agg_byhour, c(4, 1, 2, 3)) 241 | 242 | # BOT WCRPS 243 | wcrps_bot_byhour <- sapply(seq(5), function(iweight){ 244 | sapply(seq(n_bottom), function(ibot){ 245 | sapply(seq_along(bot_methods), function(imethod){ 246 | res <- apply(matrix(wcrps_bottom[iweight, ibot, , imethod], ncol = 48, byrow = T), 2, mean) 247 | if(do.twentyfour){ 248 | res <- sapply(seq(1, 48, by = 2), function(i){ 249 | mean(res[seq(i, i+1)]) 250 | }) 251 | } 252 | res 253 | }) 254 | }, simplify = 'array') 255 | }, simplify = 'array') 256 | 257 | wcrps_bot_byhour <- aperm(wcrps_bot_byhour, c(4, 1, 2, 3)) 258 | 259 | 260 | res_info <- getInfoNode("nb_nodes") 261 | #res_info <- getInfoNode("kwh") 262 | agg_nodes_order <- sort(res_info$info_nodes_agg, index = T, decreasing = T)$ix 263 | bot_nodes_order <- sort(res_info$info_nodes_bottom, index = T, decreasing = T)$ix 264 | 265 | 266 | -------------------------------------------------------------------------------- /smart-meters/code/basef.R: -------------------------------------------------------------------------------- 1 | # This script produces the base forecasts for both the bottom-level and aggregate-level series. 2 | # KDE is used for the bottom-level series, and double exponential smoothing is used for the aggregate-level series. 3 | # The arguments include 4 | # "do.agg": forecasts are produced for the aggregate-level series? (True or False) 5 | # "algo": the forecasting algorithm ("KD-IC-NML", "DETS" or "Uncond") 6 | # "alliseries": the index of all series for which we want to produce the forecasts 7 | rm(list = ls()) 8 | print(base::date()) 9 | args = (commandArgs(TRUE)) 10 | if(length(args) == 0){ 11 | do.agg <- F 12 | algo <- c("KD-IC-NML") 13 | #alliseries <- c(1, 20, 100) 14 | alliseries <- 320 15 | }else{ 16 | 17 | for(i in 1:length(args)){ 18 | print(args[[i]]) 19 | } 20 | 21 | algo <- args[[1]] 22 | do.agg <- as.logical(args[[2]]) 23 | alliseries <- NULL 24 | for(i in seq(3, length(args))){ 25 | alliseries <- c(alliseries, as.numeric(args[[i]])) 26 | } 27 | } 28 | 29 | print(algo) 30 | 31 | source("config_paths.R") 32 | source("config_general.R") 33 | source("config_splitting.R") 34 | source("utils.R") 35 | 36 | library(parallel) 37 | library(fBasics) 38 | library(msm) 39 | library(gtools) 40 | library(forecast) 41 | library(abind) 42 | library(glmnet) 43 | 44 | load(file.path(work.folder, "myinfo.Rdata")) 45 | 46 | algos_allowed <- c("Uncond", "KD-IC-NML", "DETS") 47 | stopifnot(algo %in% algos_allowed) 48 | 49 | for(iseries in alliseries){ 50 | 51 | print(base::date()) 52 | print(iseries) 53 | if(do.agg){ 54 | idseries <- aggSeries[iseries] 55 | load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = ""))) 56 | }else{ 57 | idseries <- bottomSeries[iseries] 58 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 59 | } 60 | 61 | print(algo) 62 | res_file <- file.path(basef.folder, algo, paste("results_", idseries, "_", algo, ".Rdata", sep = "")) 63 | dir.create(file.path(basef.folder, algo), showWarnings = FALSE) 64 | 65 | if(algo == "DETS"){ 66 | 67 | do.logtrans <- FALSE 68 | if(do.logtrans){ 69 | my_ts <- log(demand) 70 | }else{ 71 | my_ts <- demand 72 | } 73 | 74 | ids_future <- test$id 75 | nb_futuredays <- length(seq_testing_interval)/48 76 | 77 | all_qf <- all_mf <- all_sd <- all_mfsample <- vector("list", nb_futuredays) 78 | mydays <- seq(1, nb_futuredays) 79 | 80 | for(id_future_day in mydays){ 81 | print(id_future_day) 82 | print(base::date()) 83 | 84 | if(id_future_day == 1){ 85 | ids_past <- learn$id 86 | n_past_obs <- length(ids_past) 87 | }else{ 88 | n_past_obs <- n_past_obs_tbats 89 | ids_past <- tail(learn$id, n_past_obs) 90 | } 91 | 92 | offset_nhours <- (id_future_day - 1) * 48 93 | ids_future_hours <- ids_future[offset_nhours + seq(1, 48)] 94 | 95 | if(offset_nhours > 0){ 96 | ids_past_actual <- c(ids_past, ids_future)[offset_nhours + seq(n_past_obs)] 97 | }else{ 98 | ids_past_actual <- ids_past 99 | } 100 | 101 | ypast <- as.numeric(my_ts[ids_past_actual]) 102 | 103 | do.optimize <- (id_future_day - 1) %% 7 == 0 104 | 105 | 106 | # initialization 107 | a <- 1/336 * (mean(ypast[seq(336)]) - mean(ypast[336 + seq(336)])) 108 | b <- mean(diff(ypast[seq(336)])) 109 | T_0 <- (a+b)/2 110 | l_start <- mean(ypast[seq(2 * 336)]) - 336.5 * T_0 111 | 112 | # days 113 | nb_obs <- 7 * m_1 114 | indices <- seq(nb_obs) 115 | smoothed_line <- ma(ypast[indices], m_1) 116 | #indices <- seq(m_1/2 + 1, nb_obs - m_1/2) 117 | x <- ypast[indices] - smoothed_line[indices] 118 | mat <- matrix(x, ncol = 48, byrow = T) 119 | D <- apply(mat, 2, mean, na.rm = T) 120 | D <- D - mean(D) 121 | 122 | # weeks 123 | nb_weeks <- 4 124 | indices <- seq(nb_weeks * m_2) 125 | smoothed_line <- ma(ypast[indices], m_2) 126 | x <- ypast[indices] - smoothed_line[indices] - rep(D, nb_weeks * 7) 127 | mat <- matrix(x, ncol = 336, byrow = T) 128 | W <- apply(mat, 2, mean, na.rm = T) 129 | W <- W - mean(W) 130 | 131 | e_0 <- rep(0, m_2) 132 | l_0 <- rep(l_start, m_2) 133 | d_0 <- rep(D, 7) 134 | w_0 <- W 135 | 136 | ### 137 | if(do.optimize){ 138 | N <- 100 139 | THETA <- matrix(runif(N * 4), ncol = 4) 140 | E <- sapply(seq(nrow(THETA)), function(i){ 141 | #print(i) 142 | func_to_optimize(THETA[i, ], y = ypast, e_0 = e_0, l_0 = l_0, d_0 = d_0, w_0 = w_0, do.forecast = FALSE) 143 | }) 144 | id <- sort(E, index = T)$ix[1] 145 | res_optim <- optim(THETA[id, ], fn = func_to_optimize, y = ypast, e_0 = e_0, l_0 = l_0, d_0 = d_0, w_0 = w_0, do.forecast = F, 146 | method = "L-BFGS-B", lower = 0, upper = 1) 147 | if(id_future_day == 1){ 148 | param_file <- file.path(basef.folder, algo, paste("parameters_", idseries, "_", algo, ".Rdata", sep = "")) 149 | save(file = param_file, list = c("res_optim")) 150 | #stop("done") 151 | } 152 | } 153 | 154 | obj_forecast <- iterate(res_optim$par, ypast, e_0, l_0, d_0, w_0, do.forecast = T) 155 | 156 | 157 | if(id_future_day == 1){ 158 | dir.create(file.path(insample.folder, algo), recursive = TRUE, showWarnings = FALSE) 159 | # insample mean 160 | all_mu <- obj_forecast$yhat 161 | insample_condmean_file <- file.path(insample.folder, algo, paste("condmean_", idseries, "_", algo, "_", id_future_day, ".Rdata", sep = "")) 162 | save(file = insample_condmean_file, list = c("all_mu")) 163 | 164 | # residuals COPULA 165 | e_residuals <- obj_forecast$residuals 166 | resid_file <- file.path(insample.folder, algo, paste("residuals_", idseries, "_", algo, "_", id_future_day, ".Rdata", sep = "")) 167 | save(file = resid_file, list = c("e_residuals")) 168 | 169 | # residuals MINT 170 | residuals_MINT <- obj_forecast$residuals 171 | resid_MINT_file <- file.path(insample.folder, algo, paste("residuals_MINT_", idseries, "_", algo, "_", id_future_day, ".Rdata", sep = "")) 172 | save(file = resid_MINT_file, list = c("residuals_MINT")) 173 | } 174 | 175 | all_mf[[id_future_day]] <- obj_forecast$mf 176 | all_qf[[id_future_day]] <- obj_forecast$qf 177 | 178 | all_mfsample[[id_future_day]] <- obj_forecast$mfsample 179 | 180 | } 181 | list_save <- c("all_qf", "all_mf", "all_mfsample") 182 | save(file = res_file, list = list_save) 183 | 184 | }else if(algo == "Uncond"){ 185 | qFlearn <- quantile(demand[learn$id], taus) 186 | qFtest <- matrix(rep(qFlearn, length(test$id)), ncol = length(test$id)) 187 | 188 | mFlearn <- mean(demand[learn$id]) 189 | mFtest <- rep(mFlearn, length(test$id)) 190 | save(file = res_file, list = c("qFtest", "mFtest")) 191 | 192 | }else if(grepl("KD-D", algo) || grepl("KD-IC", algo)){ 193 | 194 | 195 | if(grepl("TRC", algo)){ 196 | mykernel <- "truncated" 197 | }else if(grepl("NML", algo)){ 198 | mykernel <- "normal" 199 | }else if(grepl("LNL", algo)){ 200 | mykernel <- "lognormal" 201 | }else{ 202 | mykernel <- "normal" 203 | } 204 | 205 | ### LEARNING 206 | res_learning <- predictkde("learning") 207 | 208 | results_crps <- sapply(res_learning$results, function(list_vectors){ 209 | sapply(list_vectors, function(list_two){ 210 | sapply(list_two, function(vector){ 211 | identity(vector) 212 | }, simplify = "array") 213 | 214 | }, simplify = "array") 215 | }, simplify = "array") 216 | 217 | ic_days <- res_learning$ic_days 218 | 219 | idbest_bandwiths <- idbest_lambda <- NULL 220 | for(ic in seq(3)){ 221 | err <- apply(results_crps[, , , which(ic_days == ic)], c(1, 3), mean) 222 | idbest <- which(err == min(err), arr.ind = T) 223 | 224 | idbest_bandwiths <- c(idbest_bandwiths, idbest[1, 1]) 225 | idbest_lambda <- c(idbest_lambda, idbest[1, 2]) 226 | } 227 | selected_bandwiths_ic <- res_learning$bandwiths[idbest_bandwiths] 228 | selected_lambdas_ic <- res_learning$lambdas[idbest_lambda] 229 | 230 | param_file <- file.path(basef.folder, algo, paste("parameters_", idseries, "_", algo, ".Rdata", sep = "")) 231 | bandwiths <- res_learning$lambdas 232 | save(file = param_file, 233 | list = c("selected_bandwiths_ic", "selected_lambdas_ic", "bandwiths")) 234 | 235 | ### TESTING 236 | res_testing <- predictkde("testing", selected_bandwiths = selected_bandwiths_ic, selected_lambdas = selected_lambdas_ic) 237 | 238 | # all_crps <- getItem(res_testing$results, "crps") 239 | all_qf <- getfromlist(res_testing$results, "q_hat") 240 | all_mf <- getfromlist(res_testing$results, "mu_hat") 241 | all_varf <- getfromlist(res_testing$results, "var_hat") 242 | 243 | save(file = res_file, list = c("all_qf", "all_mf", "all_varf")) 244 | 245 | ### IN SAMPLE INFO 246 | res_insample_info <- predictkde("insample_info", selected_bandwiths = selected_bandwiths_ic, selected_lambdas = selected_lambdas_ic) 247 | 248 | # residuals 249 | all_residuals <- getfromlist(res_insample_info$results, "residuals") 250 | e_residuals_unscaled <- unlist(all_residuals) 251 | all_var <- getfromlist(res_insample_info$results, "var_hat") 252 | all_mu <- getfromlist(res_insample_info$results, "mu_hat") 253 | 254 | all_varhat <- unlist(all_var) 255 | e_residuals <- e_residuals_unscaled/sqrt(all_varhat) 256 | 257 | # save residuals COPULA 258 | dir.create(file.path(insample.folder, algo), recursive = TRUE, showWarnings = FALSE) 259 | resid_file <- file.path(insample.folder, algo, paste("residuals_", idseries, "_", algo, ".Rdata", sep = "")) 260 | save(file = resid_file, list = c("e_residuals")) 261 | 262 | # save residuals MINT 263 | residuals_MINT <- e_residuals_unscaled 264 | resid_MINT_file <- file.path(insample.folder, algo, paste("residuals_MINT_", idseries, "_", algo, ".Rdata", sep = "")) 265 | save(file = resid_MINT_file, list = c("residuals_MINT")) 266 | 267 | # extract insample quantiles 268 | all_qf_insample <- getfromlist(res_insample_info$results, "q_hat") 269 | 270 | all_qfe_insample <- lapply(seq_along(length(all_qf_insample)), function(iday){ 271 | t((t(all_qf_insample[[iday]]) - all_mu[[iday]])/sqrt(all_var[[iday]])) 272 | }) 273 | 274 | insample_condmean_file <- file.path(insample.folder, algo, paste("condmean_", idseries, "_", algo, ".Rdata", sep = "")) 275 | save(file = insample_condmean_file, list = c("all_mu")) 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /smart-meters/code/config_general.R: -------------------------------------------------------------------------------- 1 | source("nicefigs.R") 2 | 3 | hour <- rep(c("00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", 4 | "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"), each = 2) 5 | minutes <- rep(c("00", "30"), 24) 6 | tday <- paste(hour, minutes, sep=":") 7 | 8 | abbr.dweek <- c("Mon","Tue","Wed","Thu","Fri", "Sat","Sun") 9 | 10 | taus <- rev(seq(1, 99)/100) 11 | 12 | hours_night <- night_hours <- c(seq(1, 12), 46, 47, 48) 13 | hours_day <- day_hours <- setdiff(seq(1, 48), hours_night) 14 | hours_all <- c(hours_night, hours_day) 15 | index_hours <- sort(hours_all, index = T)$ix 16 | 17 | iday_withmodels <- c(1, seq(10, 90, 10)) 18 | 19 | #n_past_obs_kd <- 90 *48 20 | n_past_obs_kd <- 60 *48 21 | n_past_obs_tbats <- (31 + 28 + 31 + 30)*48 22 | 23 | M <- 120 * 48 # 5760 24 | 25 | q_probs <- seq(M)/(M + 1) 26 | 27 | m_1 <- 48 28 | m_2 <- 336 29 | 30 | algo.agg <- "DETS" 31 | algo.bottom <- "KD-IC-NML" 32 | 33 | min_bandwith <- 10^-6 34 | 35 | print("----") 36 | print(algo.agg) 37 | print(algo.bottom) 38 | print("----") 39 | 40 | -------------------------------------------------------------------------------- /smart-meters/code/config_paths.R: -------------------------------------------------------------------------------- 1 | main.folder <- "../" 2 | 3 | rawdata.folder <- file.path(main.folder, "rawdata") 4 | procdata.folder <- file.path(main.folder, "procdata") 5 | work.folder <- file.path(main.folder, "work") 6 | 7 | initmeters.folder <- file.path(procdata.folder, "initmeters") 8 | mymeters.folder <- file.path(procdata.folder , "mymeters") 9 | aggseries.folder <- file.path(procdata.folder , "aggseries") 10 | 11 | basef.folder <- file.path(work.folder , "basef") 12 | results.folder <- file.path(work.folder , "pdfs") 13 | loss.folder <- file.path(work.folder , "loss") 14 | insample.folder <- file.path(work.folder , "insample") 15 | permutations.folder <- file.path(work.folder , "permutations") 16 | coverage.folder <- file.path(work.folder , "coverage") 17 | 18 | #list.folders <- c(rawdata.folder, procdata.folder, work.folder, initmeters.folder, mymeters.folder, aggseries.folder, 19 | # basef.folder, results.folder, loss.folder, insample.folder, permutations.folder, coverage.folder) 20 | #sapply(list.folders, dir.create) -------------------------------------------------------------------------------- /smart-meters/code/config_splitting.R: -------------------------------------------------------------------------------- 1 | # This script defines the data split into training, validation and test data. 2 | # It also defines some calendar information for the data. 3 | library(lubridate) 4 | #load(file.path(working.folder, "myinfo.Rdata")) 5 | 6 | startTrain <- ymd_hms("2009-4-20 00:00:00"); endTrain <- ymd_hms("2010-3-31 23:30:00") 7 | startVal <- ymd_hms("2010-4-01 00:00:00"); endVal <- ymd_hms("2010-4-30 23:30:00") 8 | startLearn <- startTrain; endLearn <- endVal 9 | startTest <- ymd_hms("2010-5-01 00:00:00"); endTest <- ymd_hms("2010-7-31 23:30:00") 10 | startObs <- startTrain; endObs <- endTest 11 | 12 | 13 | complete_interval <- lubridate::interval(startObs, endObs) 14 | seq_complete_interval <- seq(int_start(complete_interval), int_end(complete_interval), by = "30 min") 15 | 16 | training_interval <- lubridate::interval(startTrain, endTrain) # 11 months of training 17 | seq_training_interval <- seq(lubridate::int_start(training_interval), lubridate::int_end(training_interval), by = "30 min") 18 | 19 | validation_interval <- lubridate::interval(startVal, endVal) # 1 month of validation 20 | seq_validation_interval <- seq(lubridate::int_start(validation_interval), lubridate::int_end(validation_interval), by = "30 min") 21 | 22 | learning_interval <- lubridate::union(training_interval, validation_interval) 23 | seq_learning_interval <- seq(lubridate::int_start(learning_interval), lubridate::int_end(learning_interval), by = "30 min") 24 | 25 | testing_interval <- lubridate::interval(startTest, endTest) # 3 months of testing 26 | seq_testing_interval <- seq(lubridate::int_start(testing_interval), lubridate::int_end(testing_interval), by = "30 min") 27 | 28 | 29 | stopifnot(lubridate::union(learning_interval, testing_interval) == complete_interval) 30 | 31 | train <- validation <- learn <- test <- NULL 32 | train$id <- match(seq_training_interval, seq_complete_interval) 33 | validation$id <- match(seq_validation_interval, seq_complete_interval) 34 | learn$id <- match(seq_learning_interval, seq_complete_interval) 35 | test$id <- match(seq_testing_interval, seq_complete_interval) 36 | 37 | # Calendar variables for the period 38 | calendar <- NULL 39 | calendar$dweek <- lubridate::wday(seq_complete_interval) 40 | calendar$dweek <- (calendar$dweek - 1) + ((calendar$dweek - 1) == 0)*7 41 | 42 | calendar$periodOfDay <- 2*(lubridate::hour(seq_complete_interval) + 1) - (lubridate::minute(seq_complete_interval) == 0) 43 | calendar$tyear <- lubridate::yday(seq_complete_interval) 44 | calendar$periodOfWeek <- (calendar$dweek - 1)*48 + calendar$periodOfDay 45 | calendar$year <- lubridate::year(seq_complete_interval) 46 | 47 | periodOfCycle <- calendar$periodOfWeek 48 | periodOfCycle[which(periodOfCycle %in% seq(1, 5*48))] <- 1 49 | periodOfCycle[which(periodOfCycle %in% seq(5*48 + 1, 6*48))] <- 2 50 | periodOfCycle[which(periodOfCycle %in% seq(6*48 + 1, 7*48))] <- 3 51 | calendar$periodOfCycle <- periodOfCycle 52 | 53 | hol2009England <- c("2009-1-01", "2009-4-10", "2009-4-13", "2009-5-04", "2009-5-25", "2009-8-31", "2009-12-25", "2009-12-28") 54 | hol2010England <- c("2010-1-01", "2010-4-2", "2010-4-5", "2010-5-3", "2010-5-31", "2010-8-30", "2010-12-27", "2010-12-28") 55 | holEngland <- ymd(c(hol2009England, hol2010England)) 56 | 57 | hol2009Scotland <- c("2009-1-01", "2009-1-02", "2009-4-10", "2009-5-04", "2009-5-25", "2009-8-3", "2009-12-25", "2009-12-28") 58 | hol2010Scotland <- c("2010-1-01", "2010-1-02", "2010-1-03", "2010-1-04", "2010-4-2", "2010-5-3", "2010-5-31", "2010-8-2", "2010-12-27", "2010-12-28") 59 | holScotland <- ymd(c(hol2009Scotland, hol2010Scotland)) 60 | 61 | makeHol <- function(holset){ 62 | daysInPeriod <- seq(int_start(complete_interval), int_end(complete_interval), by = "day") 63 | typeday <- rep("WD", length(daysInPeriod)) 64 | 65 | id <- na.omit(match(holset, daysInPeriod)) 66 | typeday[id] <- "NWD" 67 | 68 | dayOfWeek <- lubridate::wday(daysInPeriod) 69 | typeday[which(dayOfWeek == 7 | dayOfWeek == 1)] <- "NWD" 70 | 71 | hol <- as.numeric(typeday !="WD" & dayOfWeek != 7 & dayOfWeek != 1) 72 | hols <- numeric(length(hol)) 73 | 74 | for(i in 2:(length(hol)-1)) 75 | { 76 | if(hol[i-1] == 1 & hol[i] == 0) 77 | hols[i] <- 1 78 | else if(hol[i+1] == 1 & hol[i] == 0) 79 | hols[i] <- -1 80 | } 81 | hols <- hols + 2*hol 82 | hols <- factor(hols, levels = c(0,-1,2,1), labels = c("Normal","Day before","Holiday","Day after")) 83 | return(hols) 84 | } 85 | 86 | #calendar$holidayEngland <- rep(makeHol(holEngland), each = 48) 87 | #calendar$holidayScotland <- rep(makeHol(holScotland), each = 48) 88 | myhol <- ymd(intersect(c(hol2009England, hol2010England), c(hol2009Scotland, hol2010Scotland))) 89 | calendar$holiday <- rep(makeHol(myhol), each = 48) 90 | -------------------------------------------------------------------------------- /smart-meters/code/init_1_rawmeters.R: -------------------------------------------------------------------------------- 1 | # This script loads the raw data and generates multiple (raw) Rdata files 2 | # (metadata, geographical information, smart meter observations, etc) 3 | 4 | rm(list = ls()) 5 | source("config_paths.R") 6 | 7 | library(data.table) 8 | library(dplyr) 9 | library(lubridate) 10 | library(gdata) 11 | library(parallel) 12 | 13 | 14 | # Create Rdata files and save them in S3 15 | DT <- fread(file.path(rawdata.folder, "edrp_elec.csv")) 16 | DT <- DT %>% rename(IDMETER = ANON_ID) 17 | 18 | ######### GEO DATA and META DATA ######### 19 | metaDT <- tbl_df(read.xls(file.path(rawdata.folder, "edrp_metadata.xlsx"), nrows = 14319, skip = 1)) %>% 20 | mutate_each(funs(ymd_hm), firstAdvance) %>% 21 | mutate_each(funs(ymd_hm), lastAdvance) %>% 22 | rename(IDMETER = Hhold_ID) 23 | 24 | geodemoDT <- tbl_df(read.xls(file.path(rawdata.folder, "edrp_geography_data.xlsx"))) %>% 25 | rename(IDMETER = anonID) %>% # replace "--" entries in NUTS1 by "---" and in NUTS4 by "-------" 26 | mutate(NUTS1 = ifelse(NUTS1 == "--", "---", as.character(NUTS1))) %>% 27 | mutate(NUTS4 = ifelse(NUTS4 == "--" | NUTS4 == "", paste(NUTS1, "----", sep = ""), as.character(NUTS4))) %>% 28 | mutate(NUTS2 = substr(NUTS4, 1, 4)) %>% 29 | mutate(NUTS3 = substr(NUTS4, 1, 5)) 30 | 31 | geodemoDT <- geodemoDT %>% 32 | mutate(ACORN_Category = ifelse(ACORN_Category == "" | is.na(ACORN_Category), "-", ACORN_Category)) %>% 33 | mutate(ACORN_Group = as.numeric(ifelse(ACORN_Group == "" | is.na(ACORN_Group), "99", ACORN_Group))) %>% 34 | mutate(ACORN_Type = as.numeric(ifelse(ACORN_Type == "" | is.na(ACORN_Type), "99", ACORN_Type))) %>% 35 | mutate(DEMO1 = paste("D", ACORN_Category, sep = ""), 36 | DEMO2 = paste(DEMO1, sprintf("%02d", ACORN_Group), sep = ""), 37 | DEMO3 = paste(DEMO2, sprintf("%02d", ACORN_Type), sep = "")) 38 | 39 | # infoDT <- inner_join(metaDT, geodemoDT, by = "IDMETER") # 14319 meters (not 14621 meters = 16249 - 1628) 40 | infoDT <- inner_join(metaDT, geodemoDT, by = "IDMETER") 41 | 42 | 43 | allmeters <- infoDT %>% dplyr::select(IDMETER) %>% .$IDMETER 44 | 45 | print("Making info file") 46 | # Create the info file 47 | save(file = file.path(work.folder, "info.Rdata") , list = c("infoDT", "allmeters")) 48 | 49 | # Create a file for each meter 50 | do.it <- TRUE 51 | if(do.it){ 52 | print("Making files for each meter") 53 | setmeters <- allmeters 54 | 55 | setmeters <- allmeters[6685:length(allmeters)] 56 | 57 | res <- lapply(setmeters, function(idmeter){ 58 | 59 | if(idmeter%%100 == 0) 60 | print(idmeter) 61 | 62 | infoMeter <- infoDT %>% filter(IDMETER == idmeter) %>% select(firstAdvance, lastAdvance) 63 | firstAdvance <- infoMeter %>% .$firstAdvance 64 | lastAdvance <- infoMeter %>% .$lastAdvance 65 | alldates <- seq(firstAdvance, lastAdvance, by = "30 min") 66 | 67 | navec <- rep(NA, length(alldates)) 68 | dataset <- tbl_df(data.frame(ELECKWH = navec)) # ELECKWH 69 | 70 | #dataset <- tbl_df(data.frame(TIME = alldates, HH = 2*(hour(alldates) + 1) - (minute(alldates) == 0), ELECKWH = navec)) # TIME HH ELECKWH 71 | 72 | meterdata <- filter(DT, IDMETER == idmeter) %>% 73 | mutate_each(funs(dmy_hms), ADVANCEDATETIME) %>% 74 | arrange(ADVANCEDATETIME) # IDMETER ADVANCEDATETIME HH ELECKWH 75 | 76 | datetime <- select(meterdata, ADVANCEDATETIME) %>% .$ADVANCEDATETIME 77 | index <- match(datetime, alldates) 78 | stopifnot(all(!is.na(index))) 79 | 80 | dataset[index, c("ELECKWH")] <- select(meterdata, ELECKWH) 81 | 82 | #dataset[index, c("TIME", "ELECKWH")] <- select(meterdata, ADVANCEDATETIME, ELECKWH) 83 | #dataset <- dataset %>% 84 | # mutate(year = year(TIME)) %>% 85 | # mutate(month = month(TIME)) %>% 86 | # mutate(day = day(TIME)) %>% 87 | # mutate(dayOfWeek = lubridate::wday(TIME, label = T)) %>% 88 | # mutate(dayOfYear = yday(TIME)) 89 | 90 | stopifnot(nrow(dataset) > 0) 91 | save(file = file.path(initmeters.folder, paste("meter-", idmeter, ".Rdata", sep = "")) , list = c("dataset")) 92 | }) 93 | } 94 | 95 | 96 | -------------------------------------------------------------------------------- /smart-meters/code/init_2_MyHierarchy.R: -------------------------------------------------------------------------------- 1 | # This script builds the hierarchy (based on geographical information). 2 | # It also cleans the data and select the meters with the lowest number of 3 | # missing values. 4 | 5 | rm(list = ls()) 6 | library(lubridate) 7 | library(gdata) 8 | library(dplyr) 9 | library(igraph) 10 | source("config_paths.R") 11 | source("config_splitting.R") 12 | 13 | makelist <- function(vecintervals){ 14 | sol <- lapply(seq(length(vecintervals)), function(i){list(vecintervals[i])}) 15 | mylist <- lapply(sol, "[[", 1) 16 | } 17 | 18 | load(file.path(work.folder, "info.Rdata")) 19 | 20 | # "UKG" "UKL" "UKJ" "UKI" "UKM" "UKF" "---" "UKK" "UKD" 21 | myregion <- "UKF" 22 | 23 | subInfoDT <- infoDT %>% 24 | filter(NUTS1 %in% myregion) 25 | 26 | allintervals <- subInfoDT %>% 27 | transmute(interval = lubridate::interval(firstAdvance, lastAdvance)) %>% 28 | .$interval 29 | 30 | listintervals <- makelist(allintervals) 31 | 32 | myinterval <- interval(startObs, endObs) 33 | seq_myinterval <- seq(startObs, endObs, by = "30 min") 34 | 35 | matches <- lapply(listintervals, function(oneinterval){ lubridate::intersect(oneinterval, myinterval) == myinterval }) 36 | metersInInterval <- subInfoDT[which(unlist(matches)), ] %>% .$IDMETER 37 | print(length(metersInInterval)) 38 | 39 | 40 | pctFound <- n <- n_na <- n_expected <- n_avail <- numeric(length(metersInInterval)) + NA 41 | listmissing <- vector("list", length(metersInInterval)) 42 | for(i in seq_along(metersInInterval)){ 43 | print(i) 44 | meter <- metersInInterval[i] 45 | 46 | infoMeter <- subInfoDT %>% filter(IDMETER == meter) %>% select(firstAdvance, lastAdvance) 47 | firstAdvance <- infoMeter %>% .$firstAdvance 48 | lastAdvance <- infoMeter %>% .$lastAdvance 49 | alldates <- seq(firstAdvance, lastAdvance, by = "30 min") 50 | ids <- match(seq_myinterval, alldates) 51 | stopifnot(all(!is.na(ids))) 52 | 53 | load(file.path(initmeters.folder, paste("meter-", meter, ".Rdata", sep = ""))) 54 | n[i] <- nrow(dataset) 55 | n_expected[i] <- length(alldates) 56 | n_na[i] <- length(which(is.na(dataset[ids, 1]))) 57 | n_avail[i] <- n_expected[i] - n_na[i] 58 | pctFound[i] <- 1 - (n_na[i]/n[i]) 59 | } 60 | 61 | id <- which(pctFound > 0.99) 62 | finalmeters <- metersInInterval[id] 63 | 64 | # Keep meters with few missing values 65 | x <- subInfoDT %>% filter(IDMETER %in% finalmeters) 66 | 67 | # Keep meters with complete NUTS information 68 | x <- x %>% filter(!grepl("-", NUTS4)) 69 | 70 | # Remove few weird meters 71 | x <- x %>% filter(!IDMETER %in% c(6228, 13154, 9503)) 72 | 73 | # Some meter with high consumption during the night 74 | x <- x %>% filter(!IDMETER %in% c(12874L, 6951L, 14738L, 925L, 8255L)) 75 | 76 | # Each node must have at least two children nodes (NUTS HIERARCHY) 77 | idset <- which(x[, "NUTS4"] == "UKF2100") 78 | res <- split(idset, c(1,2)) 79 | x[res[[1]], "NUTS4"] <- "UKF2100" 80 | x[res[[2]], "NUTS4"] <- "UKF2101" 81 | 82 | idset <- which(x[, "NUTS4"] == "UKF2202") 83 | res <- split(idset, c(1,2)) 84 | x[res[[1]], "NUTS4"] <- "UKF2202" 85 | x[res[[2]], "NUTS4"] <- "UKF2209" 86 | 87 | idset <- which(x[, "NUTS4"] == "UKF1100") 88 | res <- split(idset, c(1,2)) 89 | x[res[[1]], "NUTS4"] <- "UKF1100" 90 | x[res[[2]], "NUTS4"] <- "UKF1101" 91 | 92 | idset <- which(x[, "NUTS4"] == "UKF1400") 93 | res <- split(idset, c(1,2)) 94 | x[res[[1]], "NUTS4"] <- "UKF1400" 95 | x[res[[2]], "NUTS4"] <- "UKF1401" 96 | 97 | for(mynuts4 in c("UKF3004", "UKF3006")){ 98 | idset <- which(x[, "NUTS4"] == mynuts4) 99 | x[idset, "NUTS4"] <- paste("UKF31", substr(mynuts4, 6,7), sep = "") 100 | x[idset, "NUTS3"] <- "UKF31" 101 | } 102 | 103 | # Remove some branches in DEMO HIERARCHY 104 | x <- x %>% filter(DEMO2 != "D517", DEMO1 != "D2") 105 | 106 | # Save myinfo.Rdata 107 | myinfoDT <- x 108 | bottomSeries <- myinfoDT %>% .$IDMETER 109 | n_bottom <- length(bottomSeries) 110 | 111 | myedges <- data.frame(rbind(cbind(myinfoDT$NUTS1,myinfoDT$NUTS2), 112 | cbind(myinfoDT$NUTS2, myinfoDT$NUTS3), 113 | cbind(myinfoDT$NUTS3, myinfoDT$NUTS4), 114 | cbind(myinfoDT$NUTS4, myinfoDT$IDMETER))) 115 | itree <- graph.data.frame(myedges) 116 | itree <- simplify(itree, remove.loops = F) 117 | # plot(itree, layout = layout.reingold.tilford(itree, root=1, circular=T), vertex.label.cex = 0.4, vertex.size = 1, vertex.label.dist = .2) 118 | # MUCH BETTER: plot(itree, layout = layout.reingold.tilford(itree, root=1, circular=T), vertex.size=0, vertex.label=NA, edge.arrow.size=0) 119 | #browser() 120 | 121 | # Compute Sagg - for each agg. node, compute the associated leafs 122 | all.nodes.names <- V(itree)$name 123 | agg.nodes.names <- aggSeries <- all.nodes.names[which(degree(itree, V(itree), "out")!=0)] 124 | n_agg <- length(agg.nodes.names) 125 | Sagg <- matrix(0, nrow = n_agg, ncol = n_bottom) 126 | 127 | for(i in seq_along(agg.nodes.names)){ 128 | agg.node.name <- agg.nodes.names[i] 129 | reachable <- which(shortest.paths(itree, agg.node.name, mode="out") != Inf) 130 | terminal.nodes <- reachable[which(degree(itree, reachable, mode="out") == 0)] 131 | #print(terminal.nodes) 132 | terminal.nodes.names <- all.nodes.names[terminal.nodes] 133 | #myinfoDT %>% filter(IDMETER %in% all.nodes.names[terminal.nodes]) %>% select(NUTS4) 134 | ids <- match(terminal.nodes.names, bottomSeries) 135 | stopifnot(all(!is.na(ids))) 136 | Sagg[i, ids] <- 1 137 | } 138 | 139 | save(file = file.path(work.folder, "myinfo.Rdata") , list = c("myinfoDT", "bottomSeries", "itree", "Sagg", "aggSeries", "n_agg", "n_bottom")) 140 | 141 | 142 | -------------------------------------------------------------------------------- /smart-meters/code/init_3_bottomlevel_meters.R: -------------------------------------------------------------------------------- 1 | # This script will load the Rdata files for the smart meters and preprocess it. 2 | # A new Rdata file will be saved with the "cleaned" data (one per smart meter). 3 | 4 | rm(list = ls()) 5 | library(lubridate) 6 | library(gdata) 7 | library(dplyr) 8 | source("config_paths.R") 9 | source("config_splitting.R") 10 | 11 | load(file.path(work.folder, "myinfo.Rdata")) 12 | 13 | ################ Computing the replecements id for special days ################ 14 | holidays_toprocess <- c("2009-5-04", "2009-5-25", "2009-8-31", "2009-12-25", 15 | "2009-12-28", "2010-1-01", "2010-4-2", "2010-4-5", "2010-5-3", "2010-5-31") 16 | 17 | index_final <- seq(length(seq_complete_interval)) 18 | 19 | for(i in seq_along(holidays_toprocess)){ 20 | 21 | target <- ymd(holidays_toprocess[i]) 22 | 23 | idhours <- which(year(seq_complete_interval) == year(target) & 24 | month(seq_complete_interval) == month(target) & 25 | day(seq_complete_interval) == day(target)) 26 | 27 | stopifnot(length(idhours) == 48) 28 | 29 | if(i %in% c(1, 2, 3, 8, 9, 10)){ 30 | id <- 48 31 | }else if(i %in% c(4, 6, 7)){ 32 | id <- -48 33 | }else if(i == 5){ 34 | id <- -48*2 35 | } 36 | #1. Replace mon by tue \\ +48 37 | #2. Replace mon by tue \\ +48 38 | #3. Replace mon by tue \\ +48 39 | #4. 24/12 replaced by 23nd \\ -48 40 | #5. 25/12 replaced by 23nd \\ -48 *2 41 | #6. 1/1 replaced by 31/12 \\ -48 42 | #7. 24/4 Fri by Thu \\ -48 43 | #8. 5/4 Mon by Tue \\ +48 44 | #9. 3/5 same \\ +48 45 | #10. 31/5 same \\ +48 46 | index_final[idhours] <- index_final[idhours + id] 47 | } 48 | ################################################################################ 49 | 50 | for(idseries in bottomSeries){ 51 | print(idseries) 52 | load(file.path(initmeters.folder, paste("meter-", idseries, ".Rdata", sep = ""))) 53 | 54 | ### 55 | infoMeter <- myinfoDT %>% filter(IDMETER == idseries) %>% select(firstAdvance, lastAdvance) 56 | firstAdvance <- infoMeter %>% .$firstAdvance 57 | lastAdvance <- infoMeter %>% .$lastAdvance 58 | alldates <- seq(firstAdvance, lastAdvance, by = "30 min") 59 | ids <- match(seq_complete_interval, alldates) 60 | stopifnot(all(!is.na(ids))) 61 | 62 | load(file.path(initmeters.folder, paste("meter-", idseries, ".Rdata", sep = ""))) 63 | obs <- dataset[ids,] %>% .$ELECKWH 64 | npoints <- length(obs) 65 | ### 66 | 67 | ################ Filling missing observations ############### 68 | id_na <- which(is.na(obs)) 69 | if(length(id_na) > 0){ 70 | 71 | for(id in id_na){ 72 | 73 | idrepl <- id + c(-48 *2, - 48, -2, -1, 1, 2, 48, 48 * 2) 74 | 75 | idok <- idrepl[which(idrepl <= npoints & idrepl > 0)] 76 | stopifnot(length(idok) > 0) 77 | 78 | repl <- obs[idok] 79 | repl <- repl[which(!is.na(repl))] 80 | stopifnot(length(repl) > 0) 81 | 82 | val <- median(repl) 83 | obs[id] <- val 84 | } 85 | } 86 | 87 | # Replacing special days 88 | obs <- obs[index_final] 89 | demand <- obs 90 | 91 | #if(idseries == bottomSeries[272]){ 92 | # stop("done") 93 | #} 94 | 95 | #if(FALSE){ 96 | # reverting the discretaziation process 97 | breaks <- sort(unique(demand)) 98 | sds <- numeric(length(breaks)) 99 | for(i in seq_along(breaks)){ 100 | if(i == 1){ # some have a lot of repetitions of the min 101 | sds[i] <- mean(c((breaks[i] - 0)/3, (breaks[i+1] - breaks[i])/3)) 102 | 103 | }else if(i == length(breaks)){ # maximum 3 reptitions of the max (so, not important) 104 | sds[i] <- (breaks[i] - breaks[i-1])/10 105 | }else{ 106 | #(i != 1 && i != length(breaks)){ 107 | 108 | sds[i] <- mean(c((breaks[i] - breaks[i -1])/3, (breaks[i+1] - breaks[i])/3)) 109 | } 110 | } 111 | demand_perturbed <- rnorm(length(demand), mean = demand, sd = sds[match(demand, breaks)]) 112 | demand_perturbed[which(demand_perturbed < 0)] <- 0 # very few will be negative 113 | 114 | demand <- demand_perturbed 115 | #} 116 | 117 | save(file = file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")) , list = c("demand")) 118 | } 119 | 120 | 121 | if(FALSE){ 122 | for(idseries in bottomSeries){ 123 | print(idseries) 124 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 125 | load(file.path(initmeters.folder, paste("meter-", idseries, ".Rdata", sep = ""))) 126 | 127 | infoMeter <- myinfoDT %>% filter(IDMETER == idseries) %>% select(firstAdvance, lastAdvance) 128 | firstAdvance <- infoMeter %>% .$firstAdvance 129 | lastAdvance <- infoMeter %>% .$lastAdvance 130 | alldates <- seq(firstAdvance, lastAdvance, by = "30 min") 131 | ids <- match(seq_complete_interval, alldates) 132 | 133 | myid <- 48 * 7 * 20 + seq(48 * 7 * 2) 134 | 135 | 136 | 137 | x <- dataset[ids[myid], 1] %>% .$ELECKWH 138 | plot.ts(demand[myid]) 139 | lines(x, col = "red") 140 | } 141 | dev.off() 142 | } 143 | -------------------------------------------------------------------------------- /smart-meters/code/init_4_aggregated_meters.R: -------------------------------------------------------------------------------- 1 | # This script will load the cleaned data computed using the script "init_3_bottomlevel_meters.R". 2 | # It will produce the aggregated series based on the hierarchy defined in "init_2_MyHierarchy.R". 3 | # These series will be saved in seperate Rdata files. 4 | 5 | rm(list = ls()) 6 | library(dplyr) 7 | source("config_paths.R") 8 | 9 | 10 | load(file.path(work.folder, "myinfo.Rdata")) 11 | # "myinfoDT", "bottomSeries", "itree", "Sagg", "aggSeries", "n_agg", "n_bottom" 12 | 13 | #hierarchy <- "NUTS" 14 | 15 | nobs <- 22464 16 | 17 | bottom_series <- matrix(NA, nrow = nobs, ncol = n_bottom) 18 | for(j in seq_along(bottomSeries)){ 19 | if(j %% 100 == 0) 20 | print(j) 21 | 22 | idseries <- bottomSeries[j] 23 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 24 | bottom_series[, j] <- demand 25 | } 26 | stopifnot(all(!apply(apply(bottom_series, 2, is.na), 2, any))) 27 | 28 | 29 | agg_series <- tcrossprod(Sagg, bottom_series) 30 | 31 | for(iagg in seq(n_agg)){ 32 | demand <- agg_series[iagg, ] 33 | code <- aggSeries[iagg] 34 | save(file = file.path(aggseries.folder, paste("series-", code, ".Rdata", sep = "")) , list = c("demand")) 35 | } 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /smart-meters/code/makebf_byhalfour.R: -------------------------------------------------------------------------------- 1 | # This script gives a new representation of the base forecasts to simplify the computational load 2 | # when applying the permutations. It creates new Rdata files where each file gives the forecasts by half-hour. 3 | rm(list = ls()) 4 | 5 | source("config_paths.R") 6 | source("config_general.R") 7 | source("config_splitting.R") 8 | source("utils.R") 9 | 10 | library(ff) 11 | load(file.path(work.folder, "myinfo.Rdata")) 12 | 13 | ntest <- length(test$id) 14 | 15 | QF_bottom <- vector("list", length(bottomSeries)) 16 | QF_agg <- vector("list", length(aggSeries)) 17 | 18 | obs_agg <- revisedmean_agg <- mean_agg <- matrix(NA, nrow = length(aggSeries), ncol = ntest) 19 | obs_bottom <- revisedmean_bottom <- mean_bottom <- matrix(NA, nrow = length(bottomSeries), ncol = ntest) 20 | 21 | for(do.agg in c(TRUE, FALSE)){ 22 | 23 | if(do.agg){ 24 | set_series <- aggSeries 25 | algo <- algo.agg 26 | }else{ 27 | set_series <- bottomSeries 28 | algo <- algo.bottom 29 | } 30 | 31 | for(j in seq_along(set_series)){ 32 | # if(j%%100 == 0) 33 | print(j) 34 | 35 | if(do.agg){ 36 | QF_agg[[j]] <- matrix(NA, nrow = length(taus), ncol = ntest) 37 | }else{ 38 | QF_bottom[[j]] <- matrix(NA, nrow = length(taus), ncol = ntest) 39 | } 40 | 41 | idseries <- set_series[j] 42 | 43 | load(file.path(basef.folder, algo, paste("results_", idseries, "_", algo, ".Rdata", sep = ""))) 44 | 45 | load(file.path(work.folder, "revisedf", paste("revised_meanf_", algo.agg, "_", algo.bottom, "_", idseries, ".Rdata", sep = "")) ) 46 | # mu_revised_alltest 47 | 48 | if(do.agg){ 49 | load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = ""))) 50 | obs_agg[j, ] <- demand[test$id] 51 | revisedmean_agg[j, ] <- mu_revised_alltest 52 | mean_agg[j, ] <- unlist(all_mf) 53 | }else{ 54 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 55 | obs_bottom[j, ] <- demand[test$id] 56 | revisedmean_bottom[j, ] <- mu_revised_alltest 57 | mean_bottom[j, ] <- unlist(all_mf) 58 | } 59 | 60 | 61 | for(idtest in seq(ntest)){ 62 | iday <- getInfo(idtest)$iday 63 | hour <- getInfo(idtest)$hour 64 | 65 | if(do.agg){ 66 | QF_agg[[j]][, idtest] <- all_qf[[iday]][, hour] 67 | }else{ 68 | QF_bottom[[j]][, idtest] <- all_qf[[iday]][, hour] 69 | } 70 | }# idtest 71 | }# series 72 | }# AGG and BOTTOM 73 | 74 | #stop("done") 75 | dir.create(file.path(work.folder, "byidtest"), recursive = TRUE, showWarnings = FALSE) 76 | 77 | for(idtest in seq(ntest)){ 78 | print(idtest) 79 | res_byidtest_file <- file.path(work.folder, "byidtest", paste("results_byidtest_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 80 | 81 | QF_agg_idtest <- sapply(seq(length(aggSeries)), function(j){ 82 | QF_agg[[j]][, idtest] 83 | }) 84 | 85 | QF_bottom_idtest <- sapply(seq(length(bottomSeries)), function(j){ 86 | QF_bottom[[j]][, idtest] 87 | }) 88 | 89 | obs_agg_idtest <- obs_agg[, idtest] 90 | obs_bottom_idtest <- obs_bottom[, idtest] 91 | 92 | revisedmean_bottom_idtest <- revisedmean_bottom[, idtest] 93 | revisedmean_agg_idtest <- revisedmean_agg[, idtest] 94 | 95 | mean_bottom_idtest <- mean_bottom[, idtest] 96 | mean_agg_idtest <- mean_agg[, idtest] 97 | 98 | save(file = res_byidtest_file, list = c("QF_agg_idtest", "QF_bottom_idtest", 99 | "obs_agg_idtest", "obs_bottom_idtest", 100 | "revisedmean_bottom_idtest", 'revisedmean_agg_idtest', 101 | "mean_agg_idtest", "mean_bottom_idtest")) 102 | } 103 | -------------------------------------------------------------------------------- /smart-meters/code/multiplot.R: -------------------------------------------------------------------------------- 1 | # Multiple plot function 2 | # 3 | # ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) 4 | # - cols: Number of columns in layout 5 | # - layout: A matrix specifying the layout. If present, 'cols' is ignored. 6 | # 7 | # If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), 8 | # then plot 1 will go in the upper left, 2 will go in the upper right, and 9 | # 3 will go all the way across the bottom. 10 | # 11 | multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { 12 | library(grid) 13 | 14 | # Make a list from the ... arguments and plotlist 15 | plots <- c(list(...), plotlist) 16 | 17 | numPlots = length(plots) 18 | 19 | # If layout is NULL, then use 'cols' to determine layout 20 | if (is.null(layout)) { 21 | # Make the panel 22 | # ncol: Number of columns of plots 23 | # nrow: Number of rows needed, calculated from # of cols 24 | layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), 25 | ncol = cols, nrow = ceiling(numPlots/cols)) 26 | } 27 | 28 | if (numPlots==1) { 29 | print(plots[[1]]) 30 | 31 | } else { 32 | # Set up the page 33 | grid.newpage() 34 | pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) 35 | 36 | # Make each plot, in the correct location 37 | for (i in 1:numPlots) { 38 | # Get the i,j matrix positions of the regions that contain this subplot 39 | matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) 40 | 41 | print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, 42 | layout.pos.col = matchidx$col)) 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /smart-meters/code/nicefigs.R: -------------------------------------------------------------------------------- 1 | # Create nice R figures 2 | 3 | savepdf <- function(file, width=16, height=10) 4 | { 5 | .._fname <- paste(file,".pdf",sep="") 6 | pdf(.._fname, width=width/2.54, height=height/2.54, 7 | pointsize=10) 8 | par(mgp=c(2.2,0.45,0), tcl=-0.4, mar=c(3.3,3.6,1.1,1.1)) 9 | assign(".._fname",.._fname,envir=.GlobalEnv) 10 | } 11 | endpdf <- function() 12 | { 13 | dev.off() 14 | system(paste("pdfcrop",.._fname,.._fname)) 15 | } 16 | -------------------------------------------------------------------------------- /smart-meters/code/permutations.R: -------------------------------------------------------------------------------- 1 | # This script computes the permutations to apply to the samples from the marginals 2 | # in order to restore the dependences among the variables. 3 | rm(list = ls()) 4 | source("config_paths.R") 5 | source("config_general.R") 6 | source("config_splitting.R") 7 | source("utils.R") 8 | #library(parallel) 9 | library(igraph) 10 | 11 | load(file.path(work.folder, "myinfo.Rdata")) 12 | 13 | # compute the parsing order of the aggregate nodes 14 | leaves <- V(itree)[degree(itree, mode="out") == 0] 15 | agg_nodes <- V(itree)[degree(itree, mode="out") != 0] 16 | 17 | list_matpermutations <- list_vecties <- vector("list", length(agg_nodes)) 18 | 19 | for(inode in seq_along(agg_nodes)){ 20 | agg_node <- agg_nodes[inode] 21 | idseries_agg <- names(agg_node) 22 | children_nodes <- ego(itree, order = 1, nodes = agg_node, mode = "out")[[1]][-1] 23 | print(children_nodes) 24 | 25 | mat_residuals <- sapply(seq_along(children_nodes), function(inode){ 26 | child_node <- children_nodes[inode] 27 | isbottom <- (child_node %in% leaves) 28 | idseries <- names(child_node) 29 | if(isbottom){ 30 | resid_file <- file.path(insample.folder, algo.bottom, paste("residuals_", idseries, "_", algo.bottom, ".Rdata", sep = "")) 31 | load(resid_file) 32 | e_residuals <- c(rep(NA, n_past_obs_kd), e_residuals) 33 | }else{ 34 | resid_file <- file.path(insample.folder, algo.agg, paste("residuals_", idseries, "_", algo.agg, "_", 1, ".Rdata", sep = "")) 35 | load(resid_file) 36 | #e_residuals 37 | } 38 | e_residuals 39 | }) 40 | 41 | # remove the first few rows (not available for KD) 42 | mat_residuals <- tail(mat_residuals, -n_past_obs_kd) 43 | 44 | #hour_alli <- getInfo(tail(learn$id, -n_past_obs_kd))$hour 45 | id_all <- tail(learn$id, -n_past_obs_kd) 46 | 47 | # compute ranks by time of day 48 | n_resid <- nrow(mat_residuals) 49 | stopifnot(n_resid %% 48 == 0) 50 | 51 | vec_ties <- sapply(seq(ncol(mat_residuals)), function(j){ 52 | (nrow(mat_residuals) - length(unique(mat_residuals[, j]))) / nrow(mat_residuals) 53 | }) * 100 54 | 55 | mat_residuals <- tail(mat_residuals, M) # 5760 instead of 15168 56 | mat_permutations <- apply(mat_residuals, 2, rank, ties.method = "random") 57 | colnames(mat_permutations) <- names(children_nodes) 58 | 59 | list_matpermutations[[inode]] <- mat_permutations 60 | list_vecties[[inode]] <- vec_ties 61 | #perm_file <- file.path(permutations.folder, paste("perm_", algo.agg, "_", algo.bottom, "_", idseries_agg, ".Rdata", sep = "")) 62 | #save(file = perm_file, list = c("mat_permutations", "vec_ties")) 63 | } 64 | 65 | list_matpermutations <- setNames(list_matpermutations, names(agg_nodes)) 66 | list_vecties <- setNames(list_vecties, names(agg_nodes)) 67 | perm_file <- file.path(permutations.folder, paste("perm_", algo.agg, "_", algo.bottom, ".Rdata", sep = "")) 68 | save(file = perm_file, list = c("list_matpermutations", "list_vecties")) 69 | -------------------------------------------------------------------------------- /smart-meters/code/plot_calendar_effects.R: -------------------------------------------------------------------------------- 1 | # This script produces the figure showing the electricity demand for each day of the week, and various aggregations. 2 | rm(list = ls()) 3 | source("config_paths.R") 4 | source("config_general.R") 5 | source("config_splitting.R") 6 | source("utils.R") 7 | 8 | load(file.path(work.folder, "myinfo.Rdata")) 9 | 10 | node_nbkids <- apply(Sagg, 1, sum) 11 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix 12 | 13 | day_dweek <- matrix(calendar$dweek, ncol = 48, byrow = T)[, 1] 14 | dweeks <- unique(day_dweek) 15 | 16 | if(FALSE){ 17 | # all meters 18 | savepdf(file.path(results.folder, paste("cycle_all_meters", sep = ""))) 19 | par(mfrow = c(2, 2)) 20 | set_ids <- seq(n_bottom) 21 | for(i in set_ids){ 22 | idseries <- bottomSeries[i] 23 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 24 | demand <- demand/max(demand) 25 | demand_day <- matrix(demand, ncol = 48, byrow = T) 26 | res <- sapply(dweeks, function(dweek){ 27 | apply(demand_day[which(day_dweek == dweek), ], 2, mean) 28 | }) 29 | matplot(res, type = 'l', lty = 1) 30 | } 31 | endpdf() 32 | 33 | 34 | # average over all meters 35 | MAT <- array(NA, c(n_bottom, 48, 7)) 36 | #matrix(NA, nrow = n_bottom, ncol = 48) 37 | 38 | for(iseries in seq(n_bottom)){ 39 | idseries <- bottomSeries[iseries] 40 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 41 | demand <- demand/max(demand) 42 | demand_day <- matrix(demand, ncol = 48, byrow = T) 43 | 44 | res <- sapply(dweeks, function(dweek){ 45 | apply(demand_day[which(day_dweek == dweek), ], 2, mean) 46 | }) 47 | MAT[iseries, , ] <- res 48 | } 49 | 50 | savepdf(file.path(results.folder, paste("cycle_meters", sep = "")), height = 27 * 0.25, width = 21) 51 | par(mfrow = c(1, 3)) 52 | 53 | # average over meters 54 | v <- apply(MAT, c(2, 3), mean) 55 | matplot(v, type = 'l', lty = 1, xlab = "", ylab = "Electricity demand (scaled)") 56 | 57 | set_ids <- c(1223, 1233) 58 | for(i in set_ids){ 59 | idseries <- bottomSeries[i] 60 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 61 | demand <- demand/max(demand) 62 | demand_day <- matrix(demand, ncol = 48, byrow = T) 63 | res <- sapply(dweeks, function(dweek){ 64 | apply(demand_day[which(day_dweek == dweek), ], 2, mean) 65 | }) 66 | matplot(res, type = 'l', lty = 1, ylab = "Electricity demand (scaled)") 67 | } 68 | endpdf() 69 | }#FALSE 70 | 71 | 72 | #### 73 | MAT_bottom <- array(NA, c(n_bottom, 48, 7)) 74 | MAT_agg <- array(NA, c(n_agg, 48, 7)) 75 | #matrix(NA, nrow = n_bottom, ncol = 48) 76 | 77 | for(iseries in seq(n_bottom)){ 78 | idseries <- bottomSeries[iseries] 79 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 80 | demand_day <- matrix(demand, ncol = 48, byrow = T) 81 | 82 | res <- sapply(dweeks, function(dweek){ 83 | apply(demand_day[which(day_dweek == dweek), ], 2, mean) 84 | }) 85 | MAT_bottom[iseries, , ] <- res 86 | } 87 | for(iseries in seq(n_agg)){ 88 | idseries <- aggSeries[iseries] 89 | load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = ""))) 90 | demand_day <- matrix(demand, ncol = 48, byrow = T) 91 | 92 | res <- sapply(dweeks, function(dweek){ 93 | apply(demand_day[which(day_dweek == dweek), ], 2, mean) 94 | }) 95 | MAT_agg[iseries, , ] <- res 96 | } 97 | 98 | #pdf(file.path(results.folder, paste("IC.pdf", sep = "")), width = 21 * 0.9, height = 27 * 0.5) 99 | #par(mfrow = c(2, 3)) 100 | 101 | do.color <- TRUE 102 | if(do.color){ 103 | savepdf(file.path(results.folder, paste("IC_COLOR", sep = "")), height = 27 * 0.46) 104 | par(mfrow = c(3, 2)) 105 | my_cex <- 1 106 | my_pch <- c(3, 4, 5, 6, 7, 1, 2) 107 | mycol <- c("brown", "grey", "green", "yellow", "black", "blue", "red") 108 | }else{ 109 | savepdf(file.path(results.folder, paste("IC_BLACK", sep = "")), height = 27 * 0.5) 110 | par(mfrow = c(3, 2)) 111 | my_cex <- .7 112 | my_pch <- c(3, 4, 5, 6, 7, 1, 2) 113 | mycol <- "black" 114 | } 115 | 116 | 117 | itday <- c(1, seq(8, 48, by = 8)) 118 | 119 | # id <- node_order 120 | #id <- c(1, 4, 7, 39, 52) 121 | id <- c(1, 7, 39, 50) 122 | 123 | 124 | for(iagg in node_order[id]){ 125 | ind <- which(Sagg[iagg, ] == 1) 126 | v <- MAT_agg[iagg, , ] 127 | 128 | matplot(v, 129 | ylab = "Consumption (kWh)", main = paste(length(ind), " aggregated meters" , sep = ""), 130 | xaxt = "n", xlab = "Time of Day", 131 | type = 'o', pch = my_pch, col = mycol, cex = my_cex) 132 | 133 | 134 | #axis(1, labels = tday[seq(1, 48, by = 8)], at = seq(1, 48, by = 8)) 135 | axis(1, labels = tday[itday], at = itday) 136 | if(iagg == 1){ 137 | 138 | legend("topleft", abbr.dweek[c(6, 7, 1, 2, 3, 4, 5)], 139 | pch = my_pch[c(6, 7, 1, 2, 3, 4, 5)], col = mycol[c(6, 7, 1, 2, 3, 4, 5)], 140 | lty = 1, cex = my_cex, bty = "n") 141 | 142 | } 143 | } 144 | 145 | #matplot(MAT[50, , ], type = 'l', lty = my_lty, ylab = "Consumption (kWh)", main = 1, xaxt = "n", xlab = "Time of Day", col = "black") 146 | matplot(MAT_bottom[50, , ], ylab = "Consumption (kWh)", main = "Individual smart meter", xaxt = "n", xlab = "Time of Day", col = "black", type = 'l') 147 | if(is.null(my_pch)){ 148 | matlines(MAT_bottom[50, , ], lty = 1, col = mycol, cex = my_cex) 149 | }else{ 150 | matpoints(MAT_bottom[50, , ], pch = my_pch, col = mycol, cex = my_cex) 151 | } 152 | axis(1, labels = tday[itday], at = itday) 153 | #axis(1, labels = tday[seq(1, 48, by = 8)], at = seq(1, 48, by = 8)) 154 | 155 | dev.off() 156 | 157 | 158 | -------------------------------------------------------------------------------- /smart-meters/code/plot_coherency.R: -------------------------------------------------------------------------------- 1 | # This script produces the figure which shows the incoherency errors. 2 | rm(list = ls()) 3 | source("config_paths.R") 4 | source("config_general.R") 5 | source("config_splitting.R") 6 | source("utils.R") 7 | source("multiplot.R") 8 | 9 | library(reshape2) 10 | library(ggplot2) 11 | 12 | nhours <- 48 13 | 14 | gplot_matrix <- function(X, my_title = ""){ 15 | nbrow <- nrow(X) 16 | Z <- melt(t(X)) 17 | Zp <- Z 18 | Zp$Var2 <- nbrow + 1 - Zp$Var2 19 | 20 | #mybreaks <- seq(1, nhours, by = 6) 21 | mybreaks <- c(1, seq(8, nhours, by = 8)) 22 | #mybreaks <- c(1, seq(33, nhours, by = 48)) 23 | mylabels <- rep(tday, nhours/48)[mybreaks] 24 | 25 | p <- ggplot(data = Zp, aes(x = Var1, y = Var2)) + 26 | geom_tile(aes(fill = value), colour = "white") + 27 | #scale_fill_gradient2(name = expression(log(abs(tilde(epsilon)))), low="darkblue", high="darkgreen", guide="colorbar")+ 28 | scale_fill_gradient2(name = "", low="white", high="black", guide="colorbar")+ 29 | scale_x_continuous(breaks = mybreaks, labels = mylabels) + 30 | xlab("Hour of the day") + 31 | ylab("Aggregate series") + 32 | theme_bw() + 33 | ggtitle(my_title) 34 | 35 | # geom_vline(xintercept = seq(1, nhours, by = 48), linetype=3) + 36 | 37 | p 38 | } 39 | 40 | load(file.path(work.folder, "myinfo.Rdata")) 41 | 42 | rank <- sort(apply(Sagg, 1, sum), index = T, decreasing = T)$ix 43 | 44 | node_nbkids <- apply(Sagg, 1, sum) 45 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix 46 | 47 | algo.agg <- "DETS" 48 | algo.bottom <- "KD-IC-NML" 49 | 50 | ntotal <- n_agg + n_bottom 51 | 52 | meanf <- matrix(NA, nrow = ntotal, ncol = 4416) 53 | 54 | prob_coherency <- prob_coherency_10 <- prob_coherency_50 <- prob_coherency_90 <- matrix(NA, nrow = n_agg, ncol = 48) 55 | allres <- vector("list", 48) 56 | allres[1:48] <- 0 57 | 58 | for(idtest in seq(4416)){ 59 | print(idtest) 60 | 61 | res_byidtest_file <- file.path(work.folder, "byidtest", paste("results_byidtest_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 62 | load(res_byidtest_file) 63 | # "QF_agg_idtest", "QF_bottom_idtest", "obs_agg_idtest", "obs_bottom_idtest", 64 | # "revisedmean_bottom_idtest", 'revisedmean_agg_idtest', "mean_agg_idtest", "mean_bottom_idtest" 65 | 66 | meanf[seq(1, n_agg), idtest] <- mean_agg_idtest 67 | meanf[seq(n_agg + 1, n_agg + n_bottom), idtest] <- mean_bottom_idtest 68 | 69 | if(FALSE){ 70 | base_samples_agg <- matrix(NA, nrow = M, ncol = n_agg) 71 | base_samples_bottom <- matrix(NA, nrow = M, ncol = n_bottom) 72 | 73 | for(j in seq(n_agg)){ 74 | invcdf <- approxfun(taus, QF_agg_idtest[, j], rule = 2) 75 | base_samples_agg[, j] <- invcdf(q_probs) 76 | } 77 | 78 | for(j in seq(n_bottom)){ 79 | invcdf <- approxfun(taus, QF_bottom_idtest[, j], rule = 2) 80 | base_samples_bottom[, j] <- invcdf(q_probs) 81 | } 82 | 83 | sum_agg <- t(Sagg %*% t(base_samples_bottom)) 84 | q_sum_agg <- apply(sum_agg, 2, sort) 85 | q_agg <- apply(base_samples_agg, 2, sort) 86 | 87 | res <- (q_sum_agg - q_agg)^2 88 | 89 | hour <- idtest%%48 90 | if(hour == 0) 91 | hour <- 48 92 | 93 | allres[[hour]] <- allres[[hour]] + res 94 | 95 | #prob_coherency[, idtest] <- apply(res, 2, mean) 96 | #prob_coherency_10[, idtest] <- res[which.min(abs(q_probs - 0.1)), ] 97 | #prob_coherency_50[, idtest] <- res[which.min(abs(q_probs - 0.5)), ] 98 | #prob_coherency_90[, idtest] <- res[which.min(abs(q_probs - 0.9)), ] 99 | 100 | } 101 | 102 | } 103 | 104 | meanres <- lapply(allres, function(mat){ 105 | mat/92 106 | }) 107 | 108 | stop("done") 109 | 110 | 111 | 112 | 113 | listp <- vector("list", 8) 114 | 115 | for(p in seq(8)){ 116 | weights <- numeric(M) 117 | if(p == 1){ 118 | weights[which.min(abs(q_probs - 0.05))] <- 1 119 | my_title <- "0.05" 120 | }else if(p == 2){ 121 | weights[which.min(abs(q_probs - 0.25))] <- 1 122 | my_title <- "0.25" 123 | }else if(p == 3){ 124 | weights[which.min(abs(q_probs - 0.5))] <- 1 125 | my_title <- "0.5" 126 | }else if(p == 4){ 127 | weights[which.min(abs(q_probs - 0.75))] <- 1 128 | my_title <- "0.75" 129 | }else if(p == 5){ 130 | weights[which.min(abs(q_probs - 0.95))] <- 1 131 | my_title <- "0.95" 132 | }else if(p == 6){ 133 | #weights[which(q_probs <= 0.05 | q_probs >= 0.95)] <- 1 134 | #my_title <- "both tails" 135 | 136 | weights[which(q_probs <= 0.05 | q_probs >= 0)] <- 1 137 | my_title <- "lower tails" 138 | 139 | }else if(p == 7){ 140 | weights[which(q_probs > 0.05 | q_probs < 0.9)] <- 1 141 | my_title <- "center" 142 | }else if(p == 8){ 143 | weights[seq(M)] <- 1 144 | my_title <- "all" 145 | } 146 | weights <- weights/sum(weights) 147 | 148 | res <- sapply(meanres, function(mat){ 149 | t(weights) %*% mat 150 | }) 151 | 152 | listp[[p]] <- gplot_matrix(log(res[rank, ]), my_title) 153 | } 154 | multiplot(listp[[1]], listp[[2]], listp[[3]], listp[[4]], listp[[5]], listp[[6]] , listp[[7]], listp[[8]], cols=3) 155 | 156 | ##### 157 | savepdf(file.path(results.folder, paste("prob-coherency", sep = "") )) 158 | print(listp[[8]]) 159 | endpdf() 160 | ##### 161 | 162 | do.relative <- FALSE 163 | ##### 164 | if(do.relative){ 165 | r <- (meanf[seq(1, n_agg), ] - Sagg %*% meanf[seq(n_agg + 1, n_agg + n_bottom), ])/meanf[seq(1, n_agg), ] 166 | #r <- (meanf[seq(1, n_agg), ] - Sagg %*% meanf[seq(n_agg + 1, n_agg + n_bottom), ])/Sagg %*% meanf[seq(n_agg + 1, n_agg + n_bottom), ] 167 | }else{ 168 | r <- meanf[seq(1, n_agg), ] - Sagg %*% meanf[seq(n_agg + 1, n_agg + n_bottom), ] 169 | } 170 | 171 | #r <- r[rank, ] 172 | #r <- r[node_order, ] 173 | #X <- log(abs(r[, seq(48)])) 174 | 175 | v <- sapply(node_order, function(iagg){ 176 | #log(abs(matrix(r[iagg, ], ncol = 48, byrow = T))) 177 | #abs(matrix(r[iagg, ], ncol = 48, byrow = T)) 178 | matrix(r[iagg, ], ncol = 48, byrow = T) 179 | }, simplify = "array") 180 | 181 | 182 | ######### 183 | itday <- c(1, seq(12, 48, by = 12)) 184 | id <- c(1, 7, 39, 50) 185 | id <- c(1, 7) 186 | 187 | savepdf(file.path(results.folder, paste("mean-coherency", sep = "") )) 188 | #par(mfrow = c(2, 2)) 189 | par(mfrow = c(1, 2)) 190 | for(iagg in node_order[id]){ 191 | ind <- which(Sagg[iagg, ] == 1) 192 | boxplot(v[, , iagg], xaxt = "n", main = paste(length(ind), " aggregated meters" , sep = ""), ylab = "Coherency errors", xlab = "Time of day") 193 | axis(1, labels = tday[itday], at = itday, cex = .8) 194 | abline(h = 0) 195 | } 196 | endpdf() 197 | ######### 198 | 199 | v <- aperm(v, c(1, 3, 2)) 200 | X <- apply(v, c(2, 3), mean) 201 | 202 | 203 | if(do.relative){ 204 | #X <- abs(X) 205 | }else{ 206 | X <- log(abs(X)) 207 | } 208 | 209 | 210 | 211 | savepdf(file.path(results.folder, paste("mean-coherency", sep = "") )) 212 | print(gplot_matrix(X)) 213 | endpdf() 214 | 215 | 216 | MAT <- log(1 + abs(X)) 217 | savepdf(file.path(results.folder, paste("mean-coherency", sep = "") )) 218 | print(gplot_matrix(MAT)) 219 | endpdf() 220 | ##### 221 | 222 | 223 | p1 <- gplot_matrix(log(prob_coherency[rank, ])) 224 | p2 <- gplot_matrix(log(prob_coherency_10[rank, ])) 225 | p3 <- gplot_matrix(log(prob_coherency_50[rank, ])) 226 | p4 <- gplot_matrix(log(prob_coherency_90[rank, ])) 227 | multiplot(p1, p2, p3, p4, cols=2) 228 | 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /smart-meters/code/plot_coverage.R: -------------------------------------------------------------------------------- 1 | # This script produces the figure which shows the coverage of the 50% and 90% prediction intervals. 2 | # The coverage were computed in aggregation.R 3 | rm(list = ls()) 4 | source("config_paths.R") 5 | source("config_general.R") 6 | source("config_splitting.R") 7 | source("utils.R") 8 | 9 | load(file.path(work.folder, "myinfo.Rdata")) 10 | 11 | algo.agg <- "DETS" 12 | algo.bottom <- "KD-IC-NML" 13 | 14 | all_coverage_bot <- all_coverage_agg <- 0 15 | 16 | all_idtest <- seq(4416) 17 | # all_idtest <- c(seq(1, 3000), seq(3400, 4416)) 18 | 19 | for(idtest in all_idtest){ 20 | if(idtest%%100==0) 21 | print(idtest) 22 | coverage_idtest <- file.path(coverage.folder, paste("coverage_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 23 | load(coverage_idtest) 24 | # c("coverage_bot", "coverage_agg") # 2 1578 4 25 | all_coverage_bot <- all_coverage_bot + coverage_bot 26 | all_coverage_agg <- all_coverage_agg + coverage_agg 27 | } 28 | all_coverage_bot <- all_coverage_bot / length(all_idtest) 29 | all_coverage_agg <- all_coverage_agg / length(all_idtest) 30 | 31 | 32 | bot_methods <- c("BASE", "BASE-MINT", "MINTdiag", "MINTshrink") 33 | bot_colors <- c("grey", "purple", "red", "green") 34 | 35 | agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "NAIVEBU-MINT", "MINTdiag", "MINTshrink") 36 | agg_colors <- c("grey", "orange", "purple", "cyan", "pink", "red", "green") 37 | agg_colors[match(c("PERMBU-MINT", "NAIVEBU-MINT"), agg_methods)] <- agg_colors[match(c("PERMBU", "NAIVEBU"), agg_methods)] 38 | # I have change BASE color from black to grey 39 | 40 | 41 | #agg_newnames <- c("BASE", "IndepBU", "DepBU", "DepBU-MinTShrink", "IndepBU-MinTShrink", "LogN-MinTDiag", "LogN-MinTShrink") 42 | agg_newnames <- c("BASE", "IndepBU- \n NoMinT", "DepBU- \n NoMinT", 43 | "DepBU- \n MinTShrink", "IndepBU- \n MinTShrink", "LogN- \n MinTDiag", "LogN- \n MinTShrink") 44 | 45 | #c("BASE", "BASE-MINT", "MINTdiag", "MINTshrink") 46 | #c("BASE", "BASE", "BASE", "BASE-MINT", "BASE-MINT", "MINTdiag", "MINTshrink") 47 | id_matching <- c(1, 1, 1, 2, 2, 3, 4) 48 | 49 | myorder <- c(6, 7, 1, 3, 4, 2, 5) 50 | 51 | savepdf(file.path(results.folder, "coverage"), height = 6) 52 | par(mfrow = c(1, 2)) 53 | for(i in 1:2){ 54 | if(i == 1){ 55 | cov90 <- all_coverage_agg[2, , ] 56 | cov50 <- all_coverage_agg[1, , ] 57 | mycolors <- agg_colors 58 | }else{ 59 | cov90 <- all_coverage_bot[2, , id_matching] 60 | cov50 <- all_coverage_bot[1, , id_matching] 61 | mycolors <- bot_colors[id_matching] 62 | } 63 | colnames(cov90) <- agg_newnames 64 | colnames(cov50) <- agg_newnames 65 | 66 | # changer order 67 | cov90 <- cov90[, myorder] 68 | cov50 <- cov50[, myorder] 69 | mycolors <- mycolors[myorder] 70 | 71 | boxplot(cov50, outline = F, ylim = c(0.2, 1), las = 2, cex.axis = .6, yaxt = 'n', col = mycolors, 72 | ylab = "Coverage rate") 73 | axis(2, at = c(0.5, 0.9), cex.axis = 0.6, label = c("50%", "90%")) 74 | 75 | boxplot(cov90, outline = F, ylim = c(0.2, 1), , las = 2, cex.axis = .6, add = T, yaxt = 'n', col = mycolors) 76 | abline(h = c(0.5, 0.9), lwd = 0.1) 77 | 78 | } 79 | dev.off() 80 | -------------------------------------------------------------------------------- /smart-meters/code/plot_forecasts.R: -------------------------------------------------------------------------------- 1 | # This script allows to produce a plot for one-day-ahead probabilistic forecasts (with 50% and 95% prediction intervals). 2 | rm(list = ls()) 3 | source("config_paths.R") 4 | source("config_general.R") 5 | source("config_splitting.R") 6 | source("utils.R") 7 | 8 | load(file.path(work.folder, "myinfo.Rdata")) 9 | 10 | plot.permsamples <- FALSE 11 | do.jasa <- TRUE 12 | 13 | if(plot.permsamples){ 14 | 15 | do.agg <- T 16 | algo.agg <- "DETS" 17 | algo.bottom <- "KD-IC-NML" 18 | 19 | alliseries <- c(1) 20 | idays <- seq(1, 2, by = 1) 21 | idays <- 1 22 | algorithms <- c("INDEPBU", "PERMBU", "MINTshrink", "INDEPBU-MINTshrink", "PERMBU-MINTshrink", "BASE") 23 | agg_methods <- c("BASE", "INDEPBU", "PERMBU", "PERMBU-MINTshrink", "INDEPBU-MINTshrink", "MINTdiag", "MINTshrink") 24 | 25 | nbperjob <- 368 26 | 27 | QF_agg <- array(NA, c(M, length(algorithms), 48*2)) 28 | allidtest <- seq(1, 48*2) 29 | idjob <- 1 30 | samples_job <- file.path(work.folder, "samples_agg", paste("samples_agg_", algo.agg, "_", algo.bottom, "_", idjob, ".Rdata", sep = "")) 31 | load(samples_job) 32 | 33 | list_samples_agg_nonull <- list_samples_agg[-which(sapply(list_samples_agg, is.null))] 34 | BIGARRAY <- sapply(seq_along(list_samples_agg_nonull), function(i){list_samples_agg_nonull[[i]]}, simplify = 'array') 35 | QF_agg[, , allidtest] <- BIGARRAY[, alliseries, match(algorithms, agg_methods), ] 36 | 37 | mf_agg <- apply(QF_agg, c(2, 3), mean) 38 | qf_agg <- apply(QF_agg, c(2, 3), quantile, prob = taus) 39 | all_qf <- lapply(idays, function(iday){ 40 | qf_agg[, , (iday - 1) * 48 + seq(48) ] 41 | }) 42 | all_mf <- lapply(idays, function(iday){ 43 | mf_agg[, (iday - 1) * 48 + seq(48) ] 44 | }) 45 | 46 | 47 | }else{ 48 | do.agg <- F 49 | alliseries <- seq(200) #c(1267) 50 | algorithms <- c("KD-IC-NML") 51 | 52 | idays <- seq(1, 92, by = 1) 53 | idays <- 1 54 | 55 | if(do.jasa){ 56 | series_isagg <- c(TRUE, FALSE) # actual 57 | alliseries <- c(1, 34) # actual 58 | idays <- c(11) # bettter coverage 59 | } 60 | } 61 | 62 | only.future <- FALSE 63 | 64 | if(do.jasa){ 65 | tag <- "example" 66 | savepdf(file.path(results.folder, paste("PLOT_forecasts_", tag, sep = "")), height = 27 * 0.25, width = 21) 67 | par(mfrow = c(1, 2)) 68 | }else{ 69 | tag <- "allmethods" 70 | savepdf(file.path(results.folder, paste("PLOT_forecasts_", tag, sep = "") )) 71 | } 72 | 73 | for(iseries in alliseries){ 74 | 75 | if(do.jasa){ 76 | do.agg <- series_isagg[which(iseries == alliseries)] 77 | if(do.agg){ 78 | algorithms <- "DETS" 79 | }else{ 80 | algorithms <- "KD-IC-NML" 81 | } 82 | } 83 | 84 | print(iseries) 85 | if(do.agg){ 86 | idseries <- aggSeries[iseries] 87 | load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = ""))) 88 | }else{ 89 | idseries <- bottomSeries[iseries] 90 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 91 | } 92 | 93 | 94 | if(!do.jasa){ 95 | #par(mfrow = c(2, 2)) 96 | par(mfrow = c(2, 3)) 97 | } 98 | 99 | list_load <- vector("list", length(algorithms)) 100 | for(ialgo in seq_along(algorithms)){ 101 | algo <- algorithms[ialgo] 102 | algo_load <- algo 103 | 104 | if(plot.permsamples){ 105 | qf <- lapply(idays, function(iday){ 106 | all_qf[[iday]][, ialgo, ] 107 | }) 108 | mf <- lapply(idays, function(iday){ 109 | all_mf[[iday]][ialgo, ] 110 | }) 111 | 112 | list_load[[ialgo]] <- list(all_qf = qf, all_mf = mf) 113 | }else{ 114 | res_file <- file.path(basef.folder, algo, paste("results_", idseries, "_", algo, ".Rdata", sep = "")) 115 | load(res_file) 116 | 117 | if(algo_load == "KD-IC-NML"){ 118 | list_load[[ialgo]] <- list(all_qf = all_qf, all_mf = all_mf) #res_testing 119 | }else if(algo_load == "TBATS" || algo_load == "DYNREG" || algo_load == "DETS"){ 120 | list_load[[ialgo]] <- list(all_qf = all_qf, all_mf = all_mf) 121 | #list_load[[ialgo]] <- list(all_qf = all_qf, all_mf = all_mf, all_mfsample = all_mfsample) 122 | }else if(algo_load == "Uncond"){ 123 | list_load[[ialgo]] <- list(qFtest = qFtest, mFtest = mFtest) 124 | } 125 | } 126 | }#algo 127 | 128 | 129 | for(iday in idays){ 130 | 131 | day_min <- Inf 132 | day_max <- -Inf 133 | for(ialgo in seq_along(algorithms)){ 134 | day_min <- pmin(day_min, min(list_load[[ialgo]]$all_qf[[iday]])) 135 | day_max <- pmax(day_max, max(list_load[[ialgo]]$all_qf[[iday]])) 136 | } 137 | 138 | print(iday) 139 | for(ialgo in seq_along(algorithms)){ 140 | 141 | algo <- algorithms[ialgo] 142 | algo_load <- algo 143 | 144 | if(algo_load == "KD-IC-NML" || algo_load == "TBATS" || algo_load == "DYNREG" || algo_load == "DETS" || plot.permsamples){ 145 | 146 | all_qf <- list_load[[ialgo]]$all_qf 147 | all_mf <- list_load[[ialgo]]$all_mf 148 | mu_hat <- matrix(unlist(all_mf), ncol = 48, byrow = T) 149 | 150 | qf_allhours <- all_qf[[iday]] 151 | 152 | }else if(algo_load == "Uncond"){ 153 | qFtest <- list_load[[ialgo]]$qFtest 154 | mFtest <- list_load[[ialgo]]$mFtest 155 | 156 | qf_allhours <- qFtest 157 | mu_hat <- matrix(mFtest, ncol = 48, byrow = T) 158 | } 159 | 160 | rownames(qf_allhours) <- paste(taus*100, "%", sep = "") 161 | 162 | future <- demand[test$id[(iday - 1) * 48 + seq(1, 48)]] 163 | subtaus <- c("5%", "25%", "75%", "95%") 164 | #subtaus <- c("1%", "25%", "75%", "99%") 165 | 166 | mymain <- ifelse(algo == "KD-IC-NML", "Individual smart meter", ifelse(algo == "DETS", "Top aggregated series", algo)) 167 | 168 | #myYLIM <- c(0, max(c(future, qf_allhours[subtaus, ]), na.rm = T)) 169 | myYLIM <- c(day_min, day_max) 170 | 171 | plotQF(qf_allhours, future, subtaus, id = seq(48), only.future = only.future, 172 | main = mymain, xlab = "Time of day", ylab = "Consumption (kWh)", xaxt='n', cex.lab = 1.2, ylim = myYLIM) 173 | 174 | #axis(1, labels = tday, at = seq(1, 48)) 175 | itday <- c(1, seq(8, 48, by = 8)) 176 | axis(1, labels = tday[itday], at = itday, cex.axis=0.9) 177 | 178 | lines(mu_hat[iday, ], col = "red") 179 | 180 | }# ALGO 181 | 182 | }# DAY 183 | 184 | #dev.off() 185 | } # iseries 186 | dev.off() 187 | -------------------------------------------------------------------------------- /smart-meters/code/plot_parameters.R: -------------------------------------------------------------------------------- 1 | # This script produces the table showing the values for the different parameters of the exponential smoothing methods. 2 | rm(list = ls()) 3 | source("config_paths.R") 4 | source("config_general.R") 5 | source("config_splitting.R") 6 | source("utils.R") 7 | library(Hmisc) 8 | 9 | load(file.path(work.folder, "myinfo.Rdata")) 10 | 11 | 12 | node_nbkids <- apply(Sagg, 1, sum) 13 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix 14 | 15 | algo <- "DETS" 16 | 17 | MAT <- matrix(NA, nrow = n_agg, ncol = 4) 18 | for(i in seq_along(aggSeries)){ 19 | idseries <- aggSeries[i] 20 | param_file <- file.path(basef.folder, algo, paste("parameters_", idseries, "_", algo, ".Rdata", sep = "")) 21 | if(file.exists(param_file)){ 22 | load(param_file) 23 | MAT[i, ] <- res_optim$par 24 | }else{ 25 | print(paste("file does not exist for ", i, sep = "")) 26 | } 27 | } 28 | MAT <- MAT[node_order, ] 29 | 30 | savepdf(file.path(results.folder, paste("DETS", sep = ""))) 31 | par(mfrow = c(2, 2)) 32 | xlab <- "aggregate series" 33 | plot(MAT[, 1], xlab = xlab, ylab = expression(phi)) 34 | plot(MAT[, 2], xlab = xlab, ylab = expression(alpha)) 35 | plot(MAT[, 3], xlab = xlab, ylab = expression(delta)) 36 | plot(MAT[, 4], xlab = xlab, ylab = expression(omega)) 37 | endpdf() 38 | 39 | 40 | savepdf(file.path(results.folder, paste("DETS", sep = ""))) 41 | par(mfrow = c(2, 2)) 42 | xlab <- "number of aggregated meters (log scale) " 43 | for(j in seq(4)){ 44 | if(j == 1){ 45 | my_ylab <- expression(phi) 46 | }else if(j == 2){ 47 | my_ylab <- expression(alpha) 48 | }else if(j == 3){ 49 | my_ylab <- expression(delta) 50 | }else if(j == 4){ 51 | my_ylab <- expression(omega) 52 | } 53 | plot(log(node_nbkids), MAT[, j], xlab = xlab, ylab = my_ylab) 54 | } 55 | endpdf() 56 | 57 | id <- c(1, 7, 39, 50) 58 | #id <- c(1, 3, 5, 7, 11, 25, 31, 37, 46, 50) 59 | nb <- apply(Sagg, 1, sum)[node_order[id]] 60 | 61 | #MAT_toprint <- data.frame(t(MAT)[, id]) 62 | #greeks <- c(phi = "$\\phi$", alpha="$\\alpha$", delta = "$\\delta$", omega = "$\\omega$") 63 | 64 | MAT_toprint <- data.frame(t(MAT)[c(2, 3, 4, 1), id]) 65 | greeks <- c(alpha="$\\alpha$", delta = "$\\delta$", omega = "$\\omega$", phi = "$\\phi$") 66 | 67 | row.names(MAT_toprint) <- greeks 68 | colnames(MAT_toprint) <- nb 69 | MAT_toprint <- format.df(MAT_toprint, dec = 3) 70 | 71 | latex(MAT_toprint, file= file.path(results.folder, paste("TABLE_DETS.tex")), 72 | title = "", cgroup = "Number of aggregated meters", label = "tab:param_dets", 73 | caption = "Parameters of the exponential smoothing method at different levels of aggregation.") 74 | 75 | 76 | MAT_bandwiths <- MAT_decay <- matrix(NA, nrow = n_bottom, ncol = 3) 77 | 78 | algo <- "KD-IC-NML" 79 | for(i in seq_along(bottomSeries)){ 80 | print(i) 81 | idseries <- bottomSeries[i] 82 | param_file <- file.path(basef.folder, algo, paste("parameters_", idseries, "_", algo, ".Rdata", sep = "")) 83 | 84 | if(file.exists(param_file)){ 85 | load(param_file) 86 | MAT_bandwiths[i, ] <- selected_bandwiths_ic 87 | MAT_decay[i, ] <- selected_lambdas_ic 88 | }else{ 89 | print(paste("file does not exist for ", i, sep = "")) 90 | } 91 | } 92 | 93 | -------------------------------------------------------------------------------- /smart-meters/code/plot_series.R: -------------------------------------------------------------------------------- 1 | # This script produces the figure which shows the electricity demand for different levels of aggregation. 2 | rm(list = ls()) 3 | 4 | source("config_paths.R") 5 | source("config_general.R") 6 | source("config_splitting.R") 7 | source("utils.R") 8 | 9 | library(parallel) 10 | library(fBasics) 11 | library(msm) 12 | library(gtools) 13 | library(forecast) 14 | library(abind) 15 | library(glmnet) 16 | 17 | load(file.path(work.folder, "myinfo.Rdata")) 18 | 19 | node_nbkids <- apply(Sagg, 1, sum) 20 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix 21 | 22 | offset <- 48 * 7 * 4 23 | 24 | #nb_points <- 48 * 7 * 3 25 | nb_points <- 48 * 7 * 2 26 | nb_points <- 48 * 7 27 | 28 | alldemand <- NULL 29 | 30 | for(iseries in node_order){ 31 | idseries <- aggSeries[iseries] 32 | load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = ""))) 33 | alldemand <- cbind(alldemand, demand[offset + seq(nb_points)]) 34 | } 35 | #id <- seq(1, 55, by = 10) 36 | #id <- c(1, 4, 7, 39, 52) 37 | id <- c(1, 7, 39, 50) 38 | 39 | 40 | alldemand <- alldemand[, node_order[id]] 41 | load(file.path(mymeters.folder, paste("mymeter-", bottomSeries[511], ".Rdata", sep = ""))) 42 | alldemand <- cbind(alldemand, demand[seq(nb_points)]) 43 | #alldemand <- scale(alldemand) 44 | colnames(alldemand) <- c(node_nbkids[node_order[id]], 1) 45 | 46 | 47 | 48 | savepdf(file.path(results.folder, "demand-bylevel")) 49 | my.ts.panel <- function(x, col = col, bg = bg, pch = pch, type = type, vpos=48*7, ...){ 50 | lines(x, col = col, bg = bg, pch = pch, type = type, ...) 51 | #abline(v=vpos) 52 | } 53 | plot.ts(alldemand, panel=my.ts.panel, nc = 1, axes = F, xlab = "", xaxt = "n", main = "") 54 | abline(v = seq(1, 48 * 7, by = 48), lty = 3) 55 | 56 | v <- seq(24, 48 * 7, by = 48) 57 | axis(1, labels = abbr.dweek, at=v , tick = F) 58 | 59 | endpdf() 60 | # main = paste(node_nbkids[node_order[id]], " aggregated meters", sep = ""), -------------------------------------------------------------------------------- /smart-meters/code/plot_tree.R: -------------------------------------------------------------------------------- 1 | # This script produces the figure which shows the hierarchy in the form of a circular graph. 2 | rm(list = ls()) 3 | source("config_paths.R") 4 | source("config_general.R") 5 | source("config_splitting.R") 6 | source("utils.R") 7 | library(igraph) 8 | library(Matrix) 9 | 10 | load(file.path(work.folder, "myinfo.Rdata")) 11 | 12 | #savepdf(file.path(results.folder, paste("hierarchy", sep = "") )) 13 | #plot(itree, layout = 14 | # layout.reingold.tilford(itree, root=1, circular=T), vertex.size=0, vertex.label=NA, edge.arrow.size=0) 15 | #dev.off() 16 | 17 | g <- itree 18 | v_names <- names(V(g)) 19 | v_names[seq(2, length(v_names))] <- substr(v_names[seq(2, length(v_names))], 4, 7) 20 | v_names[seq(56, length(v_names))] <- "" 21 | g <- set.vertex.attribute(g, "name", value=v_names) 22 | 23 | savepdf(file.path(results.folder, paste("hierarchy-plot", sep = "") )) 24 | 25 | #plot(g, layout = layout.reingold.tilford(g, root=1, circular=T), vertex.size=0, edge.arrow.size=0, vertex.label.cex = .7, 26 | # vertex.label.dist=.3, vertex.label.degree = .30) 27 | #myvsize <- c(apply(Sagg, 1, sum), rep(0, ncol(Sagg))) 28 | #plot(g, layout = layout.reingold.tilford(g, root=1, circular=T), vertex.size=myvsize/90, edge.arrow.size=0, vertex.label.cex = .7) 29 | 30 | #myvsize <- log(c(apply(Sagg, 1, sum), rep(1, ncol(Sagg)))) 31 | myvsize <- c(apply(Sagg, 1, sum), rep(1, ncol(Sagg)))/56 32 | 33 | plot(g, layout = layout.reingold.tilford(g, root=1, circular=T), 34 | vertex.size=myvsize, edge.arrow.size=0, vertex.label= NA, vertex.color = "white") 35 | 36 | endpdf() 37 | 38 | #myvsize <- apply(Sagg, 1, sum)/60 39 | #res <- getInfoNode("kwh") 40 | res <- getInfoNode("nb_nodes") 41 | newg <- delete_vertices(g, seq(56, length(V(g)))) 42 | V(newg)$color <- "white" 43 | 44 | myvsize <- res$info_nodes_agg/15 45 | savepdf(file.path(results.folder, paste("hierarchy-plot-size", sep = "") )) 46 | plot(newg, layout = layout.reingold.tilford(newg, root=1), vertex.size = myvsize, vertex.label= NA, edge.arrow.size=0) 47 | endpdf() 48 | 49 | myvsize <- sqrt(res$info_nodes_agg) 50 | savepdf(file.path(results.folder, paste("hierarchy-plot-sqrtsize", sep = "") )) 51 | plot(newg, layout = layout.reingold.tilford(newg, root=1), vertex.size = myvsize, vertex.label= NA, edge.arrow.size=0) 52 | endpdf() 53 | 54 | 55 | myvsize <- res$info_nodes_agg/20 56 | savepdf(file.path(results.folder, paste("hierarchy-plot-size", sep = "") ) ) 57 | plot(newg, layout = layout_as_tree(newg, root=1), vertex.size = myvsize, vertex.label= NA, edge.arrow.size=0) 58 | endpdf() 59 | 60 | #myvsize <- res$info_nodes_agg/20 61 | #plot(newg, layout = layout.reingold.tilford(newg, root=1, circular=T), vertex.size = myvsize, vertex.label= NA, edge.arrow.size=0) 62 | 63 | -------------------------------------------------------------------------------- /smart-meters/code/results.R: -------------------------------------------------------------------------------- 1 | # This script produces the figures with all the forecast accuracy measures. 2 | 3 | # Note: You need to run the script "aggregation_merge.R" first to generate all the results files. 4 | 5 | source("results_utils.R") 6 | shifter <- function(x, n = 1) { 7 | #if (n == 0) x else c(tail(x, -n), head(x, n)) 8 | if (n == 0) x else c(tail(x, n), head(x, -n)) 9 | } 10 | 11 | color.agg[match(c("PERMBU-MINT", "NAIVEBU-MINT"), agg_methods)] <- color.agg[match(c("PERMBU", "NAIVEBU"), agg_methods)] 12 | 13 | do.skill <- FALSE 14 | do.colors <- TRUE 15 | 16 | 17 | better_names <- function(x){ 18 | if(measure == "MSE" || measure == "RMSE"){ 19 | x[which(x == "NAIVEBU")] <- "NoMinT" #"IndepBU and DepBU" 20 | x[which(x == "MINTdiag")] <- "MinTDiag" 21 | x[which(x == "MINTshrink")] <- "MinTShrink" 22 | }else if(measure == "CRPS" || measure == "CRPS Tails"){ 23 | x[which(x == "NAIVEBU")] <- "IndepBU" 24 | x[which(x == "PERMBU")] <- "DepBU" 25 | x[which(x == "NAIVEBU-MINT")] <- "IndepBU-MinTShrink" 26 | x[which(x == "PERMBU-MINT")] <- "DepBU-MinTShrink" 27 | 28 | x[which(x == "MINTdiag")] <- "LogN-MinTDiag" 29 | x[which(x == "MINTshrink")] <- "LogN-MinTShrink" 30 | } 31 | 32 | return(x) 33 | } 34 | 35 | #grouping_hours <- c(10, 8, 8, 8, 8, 6) 36 | #grouping_hours <- c(10, 20, 18) 37 | #grouping_hours <- c(16, 22 ,10) 38 | #grouping_hours <- rep(2, 24) 39 | #myfactor_hours <- rep(seq(length( grouping_hours )), times = grouping_hours) 40 | 41 | 42 | myfactor_hours <- numeric(48) 43 | myfactor_hours[which(seq(1, 48) %in% c(seq(45, 48), seq(1, 12)))] <- 3 44 | myfactor_hours[which(seq(48) %in% seq(13, 28))] <- 1 45 | myfactor_hours[which(seq(48) %in% seq(29, 44))] <- 2 46 | myfactor_hours <- as.factor(myfactor_hours) 47 | grouping_hours <- table(myfactor_hours) 48 | 49 | grouping_series_agg <- c(rep(1, 6), 8, 5, 7, 10, 9, 5, 5) 50 | myfactor_series_agg <- rep(seq(length( grouping_series_agg )), times = grouping_series_agg) 51 | 52 | grouping_series_bot <- 526*3 53 | myfactor_series_bot <- rep(seq(length( grouping_series_bot )), times = grouping_series_bot) 54 | 55 | measures <- c("CRPS", "CRPS Tails", "MSE", "RMSE") 56 | for(measure in measures){ 57 | print(measure) 58 | list_mat <- get_mat(measure, do.skill = FALSE) 59 | 60 | #mysteps <- 1:2 61 | mysteps <- 2 62 | for(mystep in mysteps){ 63 | if(measure == "MSE" || measure == "RMSE"){ 64 | if(mystep == 1){ 65 | #algos <- c("BASE", "NAIVEBU") 66 | algos <- c("NAIVEBU") 67 | }else{ 68 | #algos <- c("BASE", "MINTdiag", "MINTshrink") 69 | algos <- c("NAIVEBU", "MINTdiag", "MINTshrink") 70 | } 71 | }else{ 72 | if(mystep == 1){ 73 | #algos <- c("BASE", "NAIVEBU", "PERMBU") 74 | algos <- c("NAIVEBU", "PERMBU") 75 | }else{ 76 | #algos <- c("BASE", "PERMBU-MINT", "NAIVEBU-MINT", "MINTdiag", "MINTshrink") 77 | algos <- c("PERMBU-MINT", "NAIVEBU-MINT", "MINTdiag", "MINTshrink") 78 | } 79 | } 80 | id_wanted_agg <- match(algos, agg_methods) 81 | id_wanted_bot <- match(sapply(algos, to_aggname), bot_methods) 82 | if(do.colors){ 83 | mycolors <- color.agg[id_wanted_agg] 84 | }else{ 85 | mycolors <- "black" 86 | } 87 | 88 | filename <- paste("_REVIEW_RESULTS_JASA_", gsub(" ", "", measure, fixed = TRUE), "_", mystep, "_", 89 | ifelse(do.colors, "COLOR", "BLACK"), "_", ifelse(do.skill, "SKILL", "ABSOLUTE"), sep = "") 90 | 91 | savepdf(file.path(results.folder, filename), height = 5) 92 | par(mfrow = c(1, 3)) 93 | 94 | results_bot <- list_mat$res_bot 95 | results_bot_avg <- apply(results_bot, c(2, 3), function(x){ tapply(x, myfactor_hours, mean) }) 96 | results_bot_avg <- apply(results_bot_avg, c(1, 2), function(x){ tapply(x, myfactor_series_bot, mean) }) 97 | x_nbbot <- 1 98 | 99 | results_agg <- list_mat$res_agg 100 | results_agg <- results_agg[, , node_order] 101 | results_agg_avg <- apply(results_agg, c(2, 3), function(x){ tapply(x, myfactor_hours, mean) }) 102 | results_agg_avg <- apply(results_agg_avg, c(1, 2), function(x){ tapply(x, myfactor_series_agg, mean) }) 103 | results_agg_avg <- aperm(results_agg_avg, c(2, 3, 1)) 104 | x_nbagg <- tapply(res_info$info_nodes_agg[node_order], myfactor_series_agg, mean) 105 | 106 | mse_ylim <- vector("list", 3) 107 | mse_ylim[[1]] <- NULL #c(-10, 7) 108 | mse_ylim[[2]] <- c(-15, 5) #c(-45, 10) 109 | mse_ylim[[3]] <- c(-80, 20) #c(-60, 15) 110 | 111 | 112 | for(k in seq( length(grouping_hours) ) ){ 113 | my_ylim <- NULL 114 | if( (measure == "MSE" || measure == "RMSE") && k <= 3){ 115 | my_ylim <- mse_ylim[[k]] 116 | } 117 | 118 | #maink <- paste(tday[range(which(k == myfactor_hours))], collapse = " - ") 119 | id <- which(myfactor_hours == k) 120 | if(k == 3){ 121 | id <- shifter(id, 4) 122 | } 123 | maink <- paste(tday[c(head(id, 1), tail(id, 1))], collapse = " - ") 124 | 125 | i_base <- match("BASE", bot_methods) 126 | if(do.skill){ 127 | u_bot <- t( (results_bot_avg[k, i_base] - t(results_bot_avg[k, id_wanted_bot]))/results_bot_avg[k, i_base]) 128 | u_bot <- t(u_bot) * 100 129 | }else{ 130 | u_bot <- t(results_bot_avg[k, id_wanted_bot]) 131 | } 132 | 133 | 134 | i_base <- match("BASE", agg_methods) 135 | if(do.skill){ 136 | u_agg <- t( (results_agg_avg[k, i_base, ] - t(results_agg_avg[k, id_wanted_agg, ]))/results_agg_avg[k, i_base, ]) 137 | u_agg <- t(u_agg) * 100 138 | 139 | }else{ 140 | u_agg <- t(results_agg_avg[k, id_wanted_agg, ]) 141 | } 142 | 143 | if(length(id_wanted_agg) == 1) 144 | u_agg <- t(u_agg) 145 | 146 | u_all <- rbind(u_agg, u_bot) 147 | x_all <- c(x_nbagg, x_nbbot) 148 | 149 | if(do.skill){ 150 | matplot(log10(x_all), u_all, 151 | type = 'o', pch = pch.agg[id_wanted_agg], lty = 1, col = mycolors, 152 | xlab = "Number of aggregated meters", 153 | ylab = paste(measure, " skill (%)", sep = ""), main = maink, 154 | ylim = my_ylim, xaxt = "n", 155 | cex = 1.2) 156 | abline(h = 0) 157 | }else{ 158 | matplot(log10(x_all), u_all, 159 | type = 'o', pch = pch.agg[id_wanted_agg], lty = 1, col = mycolors, 160 | xlab = "Number of aggregated meters", 161 | ylab = paste(measure, ifelse(measure == "RMSE", " (kWh)", "") , sep = ""), main = maink, xaxt = "n", 162 | cex = 1.2) 163 | } 164 | 165 | #axis(1, at = log10(x_all), labels = log(x_all)) 166 | x <- c(0, 1, 2, 3) 167 | axis(1, at = x, labels = 10^x) 168 | 169 | if(k == 1){ 170 | myplace <- ifelse(measure == "RMSE" & !do.skill, "topleft", "bottomleft") 171 | myinset <- c(0, 0) 172 | if(measure == "CRPS"){ 173 | myinset <- c(0.1,0) 174 | } 175 | legend(myplace, better_names(agg_methods[id_wanted_agg]), 176 | lty = 1, pch = pch.agg[id_wanted_agg], col = mycolors, cex = 1.1, bty = "n", 177 | inset = myinset) 178 | } 179 | 180 | 181 | 182 | } # k 183 | dev.off() 184 | }# id 185 | } # measures 186 | 187 | -------------------------------------------------------------------------------- /smart-meters/code/results_utils.R: -------------------------------------------------------------------------------- 1 | # Various functions used in the "results.R" script. 2 | 3 | get_mat <- function(measure, do.skill){ 4 | if(grepl("CRPS", measure)){ 5 | if(measure == "CRPS"){ 6 | iweight <- 1 7 | }else if(measure == "CRPS Tails"){ 8 | iweight <- 2 9 | }else if(measure == "CRPS Right tail"){ 10 | iweight <- 4 11 | }else if(measure == "CRPS Left tail"){ 12 | iweight <- 5 13 | } 14 | res_agg <- wcrps_agg_byhour[iweight, , , ] 15 | res_bot <- wcrps_bot_byhour[iweight, , , ] 16 | }else if(measure == "MSE"){ 17 | res_agg <- mse_agg_byhour 18 | res_bot <- mse_bot_byhour 19 | }else if(measure == "RMSE"){ 20 | res_agg <- sqrt(mse_agg_byhour) 21 | res_bot <- sqrt(mse_bot_byhour) 22 | }else if(measure == "QS"){ 23 | res_agg <- total_qscores_agg 24 | res_bot <- total_qscores_bot 25 | }else{ 26 | stop("error") 27 | } 28 | 29 | if(do.skill){ 30 | mat_agg_skill <- sapply(seq_along(agg_methods), function(iaggmethod){ 31 | (res_agg[, match("BASE", agg_methods), ] - res_agg[, iaggmethod, ])/res_agg[, match("BASE", agg_methods), ] 32 | }, simplify = 'array') 33 | 34 | mat_bot_skill <- sapply(seq_along(bot_methods), function(ibotgmethod){ 35 | (res_bot[, match("BASE", bot_methods), ] - res_bot[, ibotgmethod, ])/res_bot[, match("BASE", bot_methods), ] 36 | }, simplify = 'array') 37 | 38 | #browser() 39 | res_agg <- aperm(mat_agg_skill, c(1, 3, 2)) 40 | res_bot <- aperm(mat_bot_skill, c(1, 3, 2)) 41 | } 42 | 43 | list(res_agg = res_agg, res_bot = res_bot) 44 | } 45 | 46 | to_aggname <- function(algo){ 47 | if(algo %in% c("NAIVEBU", "PERMBU")) 48 | { 49 | res <- "BASE" 50 | }else if(algo %in% c("PERMBU-MINT", "NAIVEBU-MINT")){ 51 | # res <- "MINTshrink" 52 | res <- "BASE-MINT" 53 | }else{ 54 | res <- algo 55 | } 56 | res 57 | } 58 | 59 | -------------------------------------------------------------------------------- /smart-meters/code/run_aggregation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | full_path=$(realpath $0) 4 | dir_path=$(dirname $full_path) 5 | mypath=$(dirname $dir_path ) 6 | 7 | rscript="aggregation.R" 8 | 9 | njobs=12 10 | nperjobs=368 11 | 12 | allijobs=$(seq 1 $njobs ) 13 | 14 | for ijob in ${allijobs[@]} 15 | do 16 | start=$(( 0 + ($ijob - 1) * $nperjobs + 1 )) 17 | end=$(( 0 + ($ijob - 1) * $nperjobs + $nperjobs )) 18 | allidtest=( $(seq $start $end ) ) 19 | 20 | echo "${allidtest[@]}" 21 | Rscript --vanilla $rscript $ijob ${allidtest[@]} > "$mypath/work/rout/aggregation-$ijob.Rout" 2> "$mypath/work/rout/aggregation-$ijob.err" & 22 | done 23 | 24 | 25 | -------------------------------------------------------------------------------- /smart-meters/code/run_basef.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | full_path=$(realpath $0) 4 | dir_path=$(dirname $full_path) 5 | mypath=$(dirname $dir_path ) 6 | 7 | rscript="basef.R" 8 | 9 | doagg=FALSE && tag="bottom" && algo="KD-IC-NML" && njobs=4 && nperjobs=395 # (bottom-level forecasts) 10 | # doagg=TRUE && tag="agg" && algo="DETS" && njobs=28 && nperjobs=2 # (aggregate-level forecasts) 11 | 12 | allijobs=$(seq 1 $njobs ) 13 | 14 | for ijob in ${allijobs[@]} 15 | do 16 | start=$(( 0 + ($ijob - 1)* ($nperjobs) + 1 )) 17 | end=$(( 0 + ($ijob - 1)* ($nperjobs) + ($nperjobs) )) 18 | alliseries=( $(seq $start $end ) ) 19 | 20 | echo "${alliseries[@]}" 21 | 22 | Rscript --vanilla $rscript $algo $doagg ${alliseries[@]} > "$mypath/work/rout/basef-$tag-$algo-$ijob.Rout" 2> "$mypath/work/rout/basef-$tag-$algo-$ijob.err" & 23 | 24 | done 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /smart-meters/code/utils.R: -------------------------------------------------------------------------------- 1 | # Various useful functions used by many scripts. 2 | source("utils_ets.R") 3 | source("utils_kde.R") 4 | library(scoringRules) 5 | 6 | getInfoNode <- function(typeinfo) 7 | { 8 | if(typeinfo == "nb_nodes"){ 9 | info_nodes_agg <- apply(Sagg, 1, sum) 10 | info_nodes_bottom <- rep(1, n_bottom) 11 | }else if(typeinfo == "kwh"){ 12 | for(do.agg in c(TRUE, FALSE)){ 13 | nseries <- ifelse(do.agg, n_agg, n_bottom) 14 | x <- numeric(nseries) 15 | for(iseries in seq(nseries)){ 16 | if(do.agg){ 17 | idseries <- aggSeries[iseries] 18 | load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = ""))) 19 | }else{ 20 | idseries <- bottomSeries[iseries] 21 | load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = ""))) 22 | } 23 | #x[iseries] <- mean(demand) 24 | x[iseries] <- mean(apply(matrix(demand, nrow = 2), 2, sum)) 25 | } 26 | if(do.agg){ 27 | info_nodes_agg <- x 28 | }else{ 29 | info_nodes_bottom <- x 30 | } 31 | } 32 | } 33 | list(info_nodes_agg = info_nodes_agg, info_nodes_bottom = info_nodes_bottom) 34 | } 35 | 36 | gtop <- function(ycheck, R, nagg, ntotal, Sagg, P_bu){ 37 | Rinv <- solve(R) #Rinv <- diag(1 / sqrt(weights_GTOP)) 38 | A <- R 39 | A2 <- t(A) %*% A # A2 = R^T %*% R 40 | 41 | dvec <- A2 %*% ycheck 42 | Dmat <- Rinv 43 | Amat <- diag(ntotal)[seq(nagg), ] - Sagg %*% P_bu 44 | meq <- nagg 45 | Amat <- t(Amat) 46 | res <- solve.QP(Dmat, dvec, Amat, bvec = rep(0, meq), meq = meq, factorized = TRUE) 47 | #print(res) 48 | yproj <- res$solution 49 | } 50 | 51 | fourier.series = function(t,terms,period) 52 | { 53 | n = length(t) 54 | X = matrix(NA, nrow=n, ncol=2*terms) 55 | for(i in 1:terms) 56 | { 57 | X[,2*i-1] = sin(2*pi*i*t/period) 58 | X[,2*i] = cos(2*pi*i*t/period) 59 | } 60 | colnames(X) = paste(c("S","C"),rep(1:terms,rep(2,terms)),sep="") 61 | return(X) 62 | } 63 | 64 | backtransform_log <- function(x, fvar){ 65 | exp(x) * (1 + 0.5 * fvar) 66 | } 67 | 68 | 69 | mint_betastar <- function(W, y_hat){ 70 | MAT1 <- W %*% U 71 | MAT2 <- crossprod(U,MAT1) 72 | MAT3 <- tcrossprod(solve(MAT2), U) 73 | C1 <- J %*% MAT1 74 | C2 <- MAT3 %*% y_hat 75 | adj <- C1 %*% C2 76 | -adj 77 | } 78 | 79 | mint_pmatrix <- function(W){ 80 | MAT1 <- W %*% U 81 | MAT2 <- crossprod(U,MAT1) 82 | MAT3 <- tcrossprod(solve(MAT2), U) 83 | C1 <- J %*% MAT1 84 | J - C1 %*% MAT3 85 | } 86 | 87 | hasnoBottom <- function(algo){ 88 | grepl("BU", algo) & !grepl("NNLS", algo) 89 | } 90 | 91 | getInfo <- function(idtest){ 92 | iday <- floor((idtest-1)/48) + 1 93 | hour <- (idtest -1)%%48 + 1 94 | list(iday = iday, hour = hour) 95 | } 96 | 97 | crps_mixture <- function(mus, vars, weights, x_query){ 98 | M <- length(mus) 99 | 100 | sigmas <- sqrt(vars); x_centered <- (x_query - mus) 101 | 102 | #comp1 <- sum(2 * sigmas * dnorm(x_centered / sigmas) + x_centered * (2 * pnorm(x_centered / sigmas) - 1))/M 103 | comp1_part1 <- 2 * sigmas * dnorm(x_centered / sigmas) + x_centered * (2 * pnorm(x_centered / sigmas) - 1) 104 | comp1 <- sum(weights * comp1_part1) 105 | 106 | ids <- permutations(n = M, r = 2, repeats.allowed=T) 107 | 108 | mudiffs <- mus[ids[, 1]] - mus[ids[, 2]] 109 | varsums <- vars[ids[, 1]] + vars[ids[, 2]] 110 | wproducts <- weights[ids[, 1]] * weights[ids[, 2]] 111 | 112 | sigmasums <- sqrt(varsums) 113 | comp2_part1 <- 2 * sigmasums * dnorm(mudiffs / sigmasums) + mudiffs * (2 * pnorm(mudiffs / sigmasums) - 1) 114 | comp2 <- sum(wproducts * comp2_part1) 115 | 116 | #comp2 <- (1/M^2) * sum(2 * sigmasums * dnorm(mudiffs / sigmasums) + 117 | # mudiffs * (2 * pnorm(mudiffs / sigmasums) - 1)) 118 | 119 | comp1 - 0.5 * comp2 120 | } 121 | 122 | crps_sampling <- function(X, obs){ 123 | X <- sample(X) 124 | Xprime <- diff(X) 125 | mean(abs(X - obs)) - 0.5 * mean(abs(Xprime)) 126 | } 127 | 128 | 129 | 130 | getfromlist <- function(mylist, item = c("crps", "residuals", "squared_error", "q_hat", "tauhat", "mu_hat", "var_hat")){ 131 | lapply(mylist, function(daylist){ 132 | sapply(daylist, function(hourlist){ 133 | hourlist[[item]] 134 | }, simplify = "array") 135 | }) 136 | } 137 | 138 | 139 | getItem <- function(mylist, item, order_hours){ 140 | item_night <- getfromlist(mylist$res_nighthours, item) 141 | item_day <- getfromlist(mylist$res_dayhours, item) 142 | if(length(dim(item_night)) == 3){ 143 | item_all <- abind(item_night, item_day, along = 2) 144 | item_all <- item_all[, order_hours, ] 145 | res <- lapply(seq(dim(item_all)[3]), function(iday){ 146 | item_all[, , iday] 147 | }) 148 | }else if(length(dim(item_night)) == 2){ 149 | item_all <- rbind(item_night, item_day) 150 | item_all <- item_all[order_hours, ] 151 | res <- lapply(seq(ncol(item_all)), function(iday){ 152 | item_all[, iday] 153 | }) 154 | } 155 | res 156 | } 157 | quantile_loss <- function(y, f, tau){ 158 | mean(tau*(y - f)*((y - f) >= 0) - (1-tau)*(y - f)*((y - f) < 0)) 159 | } 160 | 161 | check_function <- function(y, f, tau){ 162 | tau*(y - f)*((y - f) >= 0) - (1-tau)*(y - f)*((y - f) < 0) 163 | } 164 | 165 | 166 | makeSuperposed<-function(datatable,maxy,colorvar,maintitle=NULL,xlab,ylab,do.plot=TRUE, densities = NULL) 167 | { 168 | if(length(colorvar)!=ncol(datatable)-1) 169 | stop("Error color !") 170 | 171 | if(!is.null(densities) && length(densities) != ncol(datatable)-1 ){ 172 | stop("Error densities ! ") 173 | } 174 | 175 | #ylim<-c(0,1.1*max(maxy,apply(datatable[,-1],1,sum))) 176 | ylim<-c(0,maxy) 177 | xx <- c(datatable[,1], rev(datatable[,1])) 178 | 179 | if(do.plot){ 180 | plot(x=datatable[,1], y=datatable[,2], ylim=ylim,type='l',ylab=ylab, xlab=xlab, main=maintitle) 181 | # plot(x=datatable[,1], y=datatable[,2], ylim=ylim,type='l',ylab=ylab, xlab=xlab, main=maintitle) 182 | 183 | } 184 | 185 | a<-datatable[,2] 186 | yysrc2 <- c(rep(0, nrow(datatable)), rev(a)) 187 | 188 | allvar <- seq(2,ncol(datatable)) 189 | for(variable in allvar) 190 | { 191 | id <- which(variable==allvar) 192 | 193 | mydensity <- NULL 194 | if(!is.null(densities)){ 195 | mydensity <- densities[id] 196 | if(is.na(mydensity)) 197 | mydensity <- NULL 198 | } 199 | 200 | polygon(xx, yysrc2, col=colorvar[variable-1],border=NA, density = mydensity) 201 | if(variable != ncol(datatable)) 202 | { 203 | b<-rev(apply(datatable[,2:(variable+1),drop=F],1,sum)) 204 | yysrc2 <- c(a,b) 205 | a<-rev(b) 206 | } 207 | } 208 | 209 | } 210 | 211 | ######## 212 | plotQF <- function(quantilef, obs, taus, id, only.future = FALSE, ...) 213 | { 214 | qf <- quantilef[taus, id] 215 | medianf <- quantilef["50%", id] 216 | future <- obs[id] 217 | 218 | # Plotting 219 | matplot(id, cbind(future, t(qf)), type = "n", ...) 220 | if(!only.future){ 221 | x <- seq(ncol(qf)) 222 | xx <- c(x, rev(x)) 223 | 224 | nbrow <- nrow(qf) 225 | colorvar <- c("grey", "lightblue", "grey") 226 | idcol <- 1 227 | for(row in seq(nbrow, 2, by =-1)){ 228 | yy <- c(qf[row, ], rev(qf[row - 1,])) 229 | polygon(xx, yy, col=colorvar[idcol],border=NA) 230 | idcol <- idcol +1 231 | } 232 | } 233 | points(id, future, pch = 20) 234 | if(!only.future){ 235 | lines(medianf) 236 | } 237 | } 238 | 239 | 240 | allqs <- function(qf, obs, taus){ 241 | allqs <- NULL 242 | for(id in seq_along(taus)){ 243 | alpha <- taus[id] 244 | #print(alpha) 245 | qs <- quantileScore(obs, qf[id, ], alpha, breaks = c(-10, union(quantile(qf[id, ]), quantile(qf[id, ]))) )$qs.orig 246 | allqs <- c(allqs, qs) 247 | 248 | } 249 | allqs 250 | } 251 | 252 | -------------------------------------------------------------------------------- /smart-meters/code/utils_ets.R: -------------------------------------------------------------------------------- 1 | # Functions needed to compute exponential smoothing forecasts. 2 | iterate <- function(theta, y, e_0, l_0, d_0, w_0, do.forecast = FALSE){ 3 | phi <- theta[1] 4 | alpha <- theta[2] 5 | delta <- theta[3] 6 | omega <- theta[4] 7 | 8 | n <- length(y) 9 | yhat <- e <- l <- d <- w <- numeric(n) + NA 10 | if(do.forecast){ 11 | H <- 48 12 | stopifnot(H <= 48) 13 | nf <- n + H 14 | ysim <- yhat <- e <- l <- d <- w <- numeric(nf) + NA 15 | } 16 | 17 | 18 | yhat_local_0 <- e_local_0 <- l_local_0 <- d_local_0 <- w_local_0 <- z <- numeric(m_2 * 2) + NA 19 | e_local_0[seq(1, m_2)] <- e_0 20 | l_local_0[seq(1, m_2)] <- l_0 21 | d_local_0[seq(1, m_2)] <- d_0 22 | w_local_0[seq(1, m_2)] <- w_0 23 | z[seq(m_2 + 1, 2 * m_2)] <- y[seq(m_2)] 24 | 25 | for(i in seq(m_2 + 1, 2 * m_2)){ 26 | 27 | yhat_local_0[i] <- l_local_0[i - 1] + d_local_0[i - m_1] + w_local_0[i - m_2] + phi * e_local_0[i - 1] 28 | e_local_0[i] <- z[i] - (l_local_0[i - 1] + d_local_0[i - m_1] + w_local_0[i - m_2]) # added - m_2 for y 29 | l_local_0[i] <- l_local_0[i - 1] + alpha * e_local_0[i] 30 | d_local_0[i] <- d_local_0[i - m_1] + delta * e_local_0[i] 31 | w_local_0[i] <- w_local_0[i - m_2] + omega * e_local_0[i] 32 | } 33 | yhat[seq(m_2)] <- yhat_local_0[m_2 + seq(1, m_2)] 34 | e[seq(m_2)] <- e_local_0[m_2 + seq(1, m_2)] 35 | l[seq(m_2)] <- l_local_0[m_2 + seq(1, m_2)] 36 | d[seq(m_2)] <- d_local_0[m_2 + seq(1, m_2)] 37 | w[seq(m_2)] <- w_local_0[m_2 + seq(1, m_2)] 38 | 39 | for(i in seq(m_2 + 1, n)){ 40 | yhat[i] <- l[i - 1] + d[i - m_1] + w[i - m_2] + phi * e[i - 1] 41 | e[i] <- y[i] - (l[i - 1] + d[i - m_1] + w[i - m_2]) 42 | l[i] <- l[i - 1] + alpha * e[i] 43 | d[i] <- d[i - m_1] + delta * e[i] 44 | w[i] <- w[i - m_2] + omega * e[i] 45 | } 46 | 47 | # mean forecast 48 | if(do.forecast){ 49 | 50 | for(h in seq(H)){ 51 | i <- n 52 | stopifnot(h <= m_1) 53 | # yhat[i + h] <- l[i] + (alpha * phi * (1 - phi^(h - 1))) / ((1 - phi) * e[i]) + d[i - m_1 + h] + w[i - m_2 + h] + phi^h * e[i] 54 | yhat[i + h] <- l[i] + (alpha * phi * (1 - phi^(h - 1)) / (1 - phi)) * e[i] + d[i - m_1 + h] + w[i - m_2 + h] + phi^h * e[i] 55 | } 56 | mf <- yhat[n + seq(H)] 57 | } 58 | 59 | # sample paths + quantile forecast 60 | if(do.forecast){ 61 | K <- 5000 62 | samples <- matrix(NA, nrow = K, ncol = H) 63 | 64 | residuals <- y - head(yhat, -H) 65 | mat_residuals <- matrix(residuals, ncol = 48, byrow = T) 66 | # bootstrap vectors of size 48 from the residuals 67 | ind <- sample(seq(nrow(mat_residuals)), K, replace = TRUE) 68 | 69 | for(k in seq(K)){ 70 | varepsilon <- mat_residuals[ind[k], ] 71 | for(h in seq(H)){ 72 | i <- n + h 73 | ysim[i] <- l[i - 1] + d[i - m_1] + w[i - m_2] + phi * e[i - 1] + varepsilon[h] 74 | e[i] <- ysim[i] - (l[i - 1] + d[i - m_1] + w[i - m_2]) 75 | l[i] <- l[i - 1] + alpha * e[i] 76 | d[i] <- d[i - m_1] + delta * e[i] 77 | w[i] <- w[i - m_2] + omega * e[i] 78 | } 79 | samples[k, ] <- ysim[n + seq(H)] 80 | } 81 | qf <- apply(samples, 2, quantile, prob = taus) 82 | # matplot(t(qf), type = 'l', lty = 1) 83 | mfsample <- apply(samples, 2, mean) 84 | 85 | yhat <- head(yhat, -H) 86 | e <- head(e, -H) 87 | l <- head(l, -H) 88 | d <- head(d, -H) 89 | w <- head(w, -H) 90 | } 91 | 92 | if(do.forecast){ 93 | RET <- list(yhat = yhat, e = e, l = l, d = d, w = w, mf = mf, qf = qf, residuals = residuals, mfsample = mfsample) 94 | }else{ 95 | RET <- list(yhat = yhat) 96 | } 97 | RET 98 | } 99 | 100 | func_to_optimize <- function(theta, y, e_0, l_0, d_0, w_0, do.forecast){ 101 | obj <- iterate(theta, y, e_0, l_0, d_0, w_0, do.forecast) 102 | mean((y - obj$yhat)^2) 103 | } 104 | 105 | -------------------------------------------------------------------------------- /smart-meters/code/utils_hts.R: -------------------------------------------------------------------------------- 1 | # Functions needed to compute MinT forecasts. 2 | 3 | shrink.estim <- function(x, tar) 4 | { 5 | if (is.matrix(x) == TRUE && is.numeric(x) == FALSE) 6 | stop("The data matrix must be numeric!") 7 | p <- ncol(x) 8 | n <- nrow(x) 9 | 10 | covm <- crossprod(x) / n 11 | corm <- cov2cor(covm) 12 | xs <- scale(x, center = FALSE, scale = sqrt(diag(covm))) 13 | v <- (1/(n * (n - 1))) * (crossprod(xs^2) - 1/n * (crossprod(xs))^2) 14 | diag(v) <- 0 15 | corapn <- cov2cor(tar) 16 | d <- (corm - corapn)^2 17 | lambda <- sum(v)/sum(d) 18 | lambda <- max(min(lambda, 1), 0) 19 | shrink.cov <- lambda * tar + (1 - lambda) * covm 20 | return(list(shrink.cov = shrink.cov, lambda = lambda )) 21 | } 22 | 23 | vec_w <- function(x){ 24 | n <- nrow(x) 25 | apply(x, 2, crossprod) / n 26 | } 27 | lowerD <- function(x) 28 | { 29 | n <- nrow(x) 30 | return(diag(vec_w(x))) 31 | } 32 | -------------------------------------------------------------------------------- /smart-meters/code/utils_kde.R: -------------------------------------------------------------------------------- 1 | # Functions needed to compute forecasts based on kernel density estimates. 2 | 3 | predictkde <- function(task = c("learning", "testing", "insample_info"), selected_bandwiths = NULL, selected_lambdas = NULL){ 4 | 5 | n_past_obs <- n_past_obs_kd 6 | 7 | if(task == "learning"){ 8 | 9 | ids_past <- tail(train$id, n_past_obs) 10 | ids_future <- validation$id 11 | 12 | ##### Bandwith interval ##### 13 | #n_base <- length(train$id) 14 | n_base <- n_past_obs 15 | if(grepl("KD-D", algo)){ 16 | n_approx <- n_base/48 17 | }else if(grepl("KD-IC", algo)){ 18 | #n_approx <- n_base/144 # 144 = 48 * 3 19 | n_approx <- (n_base/336)*4 #(should be 5 for weekdays and 3 for weekends. So (3+5)/2 = 4) 20 | }else{ 21 | stop("error in algo") 22 | } 23 | 24 | n_approx <- 8 25 | 26 | stopifnot(!is.null(n_approx)) 27 | 28 | if(mykernel == "normal" || mykernel == "truncated"){ 29 | x_samples <- sample(demand[ids_past], n_approx) 30 | }else if(mykernel == "lognormal"){ 31 | x_samples <- log(sample(demand[ids_past], n_approx)) 32 | } 33 | 34 | res <- sapply(seq(10), function(l){ 35 | sapply(seq(48), function(h){bw.nrd(sample(demand[ids_past][seq(h, length(ids_past), by = 48)], n_approx))}) 36 | }) 37 | bw_normal <- max(apply(res, 1, mean)) 38 | 39 | bandwiths_vec <- seq(min_bandwith, bw_normal, length.out = 5) 40 | bandwiths_subvec1 <- seq(bandwiths_vec[1], bandwiths_vec[2], length.out = 5) 41 | bandwiths_subvec2 <- seq(bandwiths_vec[2], bandwiths_vec[3], length.out = 5) 42 | bandwiths <- c(bandwiths_subvec1, bandwiths_subvec2, bandwiths_vec[-seq(3)]) 43 | #bandwiths <- seq(10^-4, bw_normal, length.out = 15) 44 | 45 | stopifnot(all(bandwiths>0)) 46 | 47 | nb_futuredays <- length(seq_validation_interval)/48 48 | 49 | #lambdas <- c(0.8, 0.90, 0.95, 1) 50 | lambdas <- c(0.2, 0.4, 0.6, 0.8, 0.90, 0.95, 1) 51 | 52 | }else if(task == "testing"){ 53 | stopifnot(length(selected_bandwiths) == 3) 54 | 55 | ids_past <- tail(learn$id, n_past_obs) 56 | ids_future <- test$id 57 | 58 | nb_futuredays <- length(seq_testing_interval)/48 59 | }else if(task == "insample_info"){ 60 | stopifnot(length(selected_bandwiths) == 3) 61 | 62 | ids_past <- head(learn$id, n_past_obs) 63 | ids_future <- tail(learn$id, -n_past_obs) 64 | nb_futuredays <- length(ids_future)/48 65 | } 66 | 67 | results <- vector("list", nb_futuredays) 68 | 69 | ic_days <- calendar$periodOfCycle[ids_future][seq(1, length(ids_future), by = 48)] 70 | 71 | 72 | for(id_future_day in seq(1, nb_futuredays)){ 73 | 74 | offset_nhours <- (id_future_day - 1) * 48 75 | 76 | ids_future_hours <- ids_future[offset_nhours + seq(1, 48)] 77 | 78 | 79 | if(offset_nhours > 0){ 80 | #ids_past_actual <- c(tail(ids_past, -offset_nhours), head(ids_future, offset_nhours)) 81 | ids_past_actual <- c(ids_past, ids_future)[offset_nhours + seq(n_past_obs)] 82 | }else{ 83 | ids_past_actual <- ids_past 84 | } 85 | 86 | # if day is IC 1, 2, ou 3 use different bandwiths 87 | # mybandwith is eiter a vector or a number 88 | if(task == "testing" || task == "insample_info"){ 89 | ic_day <- ic_days[id_future_day] 90 | bandwiths <- selected_bandwiths[ic_day] 91 | lambdas <- selected_lambdas[ic_day] 92 | } 93 | 94 | # results[[id_future_day]] <- lapply(ids_future_hours, function(id){kde(id, ids_past_actual, bandwiths, lambda, task)}) 95 | if(length(lambdas) == 1){ 96 | results[[id_future_day]] <- lapply(ids_future_hours, function(id){kde(id, ids_past_actual, bandwiths, lambdas, task)}) 97 | }else{ 98 | results[[id_future_day]] <- lapply(lambdas, function(lambda){ 99 | lapply(ids_future_hours, function(id){kde(id, ids_past_actual, bandwiths, lambda, task)}) 100 | }) 101 | } 102 | 103 | } 104 | list(results = results, bandwiths = bandwiths, lambdas = lambdas, ic_days = ic_days) 105 | } 106 | 107 | ###################################################### 108 | kde <- function(id_query, ids_data, bandwiths, lambda, task){ 109 | #print(id_query) 110 | #### 111 | if(algo == "KD-U"){ 112 | ids_data_kept <- ids_data 113 | }else if(grepl("KD-D", algo)){ 114 | ids_data_kept <- ids_data[which(calendar$periodOfDay[ids_data] == calendar$periodOfDay[id_query])] 115 | }else if(grepl("KD-IC", algo)){ 116 | 117 | if(calendar$periodOfCycle[id_query] == 1){ # MONDAY TO FRIDAY 118 | is_selected <- calendar$periodOfCycle[ids_data] == calendar$periodOfCycle[id_query] & 119 | calendar$periodOfDay[ids_data] == calendar$periodOfDay[id_query] 120 | }else{ # SATURDAY AND SUNDAY 121 | if(calendar$periodOfDay[id_query] == 1){ 122 | is_selected <- (calendar$periodOfCycle[ids_data] == calendar$periodOfCycle[id_query] & 123 | calendar$periodOfDay[ids_data] %in% (calendar$periodOfDay[id_query] + seq(0, 1))) | 124 | (calendar$dweek[ids_data] == (calendar$dweek[id_query] - 1) & calendar$periodOfDay[ids_data] == 48) 125 | }else if(calendar$periodOfDay[id_query] == 48){ 126 | is_selected <- (calendar$periodOfCycle[ids_data] == calendar$periodOfCycle[id_query] & 127 | calendar$periodOfDay[ids_data] %in% (calendar$periodOfDay[id_query] + seq(-1, 0))) | 128 | (calendar$dweek[ids_data] == (calendar$dweek[id_query]%%7 + 1) & calendar$periodOfDay[ids_data] == 1) 129 | }else{ 130 | is_selected <- calendar$periodOfCycle[ids_data] == calendar$periodOfCycle[id_query] & 131 | calendar$periodOfDay[ids_data] %in% (calendar$periodOfDay[id_query] + seq(-1, 1)) 132 | } 133 | } 134 | 135 | is_selected <- calendar$periodOfWeek[ids_data] == calendar$periodOfWeek[id_query] 136 | 137 | ids_data_kept <- ids_data[which(is_selected)] 138 | n <- length(ids_data_kept) 139 | normalized_weights <- rep(1, n)/n 140 | 141 | do.weighting <- TRUE 142 | if(do.weighting){ 143 | weights_all <- lambda^floor((tail(ids_data) - ids_data)/336) 144 | weights_selected <- weights_all[which(is_selected)] 145 | normalized_weights <- weights_selected/sum(weights_selected) 146 | } 147 | } 148 | 149 | 150 | x <- demand[ids_data_kept] 151 | #### 152 | 153 | n <- length(x) 154 | minx <- min(x) 155 | maxx <- max(x) 156 | #logx <- log(x) 157 | 158 | lower_bound <- 0 #minx 159 | r <- 1 # 3 160 | 161 | #xgrid <- c(seq(from = minx , to = quantile(x, .9), length = 90), seq(from = quantile(x, .91), to = maxx , length = 10)) 162 | xgrid <- c(0, 163 | seq(from = minx , to = quantile(x, .9), length = 190), 164 | seq(from = quantile(x, .91), to = maxx, length = 10), 165 | seq(from = maxx, to = maxx + r* max(bandwiths), length = 3)) 166 | xgrid <- sort(unique(xgrid)) 167 | 168 | vec_crps <- residuals <- squared_error <- mu_hat <- var_hat <- numeric(length(bandwiths)) 169 | for(i in seq_along(bandwiths)){ 170 | h <- bandwiths[i] 171 | vech <- rep(h, length(x)) 172 | 173 | if(mykernel == "normal"){ 174 | 175 | ids_boundary <- which(x <= (lower_bound + r * h)) 176 | vech[ids_boundary] <- pmax((x[ids_boundary] - lower_bound) / r, min_bandwith) 177 | 178 | } 179 | 180 | cdfs <- sapply(seq(length(x)), function(i){ 181 | xi <- x[i] 182 | if(mykernel == "normal"){ 183 | #pnorm((xgrid - obs)/h)/(n) 184 | #pnorm((xgrid - xi)/vech[i])/(n) 185 | pnorm((xgrid - xi)/vech[i]) * normalized_weights[i] 186 | }else if(mykernel == "lognormal"){ 187 | plnorm(xgrid, meanlog = log(xi), sdlog = vech[i], lower.tail = TRUE, log.p = FALSE)/n 188 | }else if(mykernel == "truncated"){ 189 | ptnorm(xgrid, mean = xi, sd = vech[i], lower = lowerx, upper = upperx, lower.tail = TRUE, log.p = FALSE)/n 190 | } 191 | }) 192 | cdf <- rowSums(cdfs) 193 | 194 | if(task == "insample_info"){ 195 | 196 | mycdf <- function(xq){ 197 | sum(sapply(seq(length(x)), function(i){ 198 | xi <- x[i] 199 | pnorm((xq - xi)/vech[i]) * normalized_weights[i] 200 | })) 201 | } 202 | } 203 | 204 | 205 | # Mean forecasts 206 | if(mykernel == "normal"){ 207 | all_mus <- x 208 | }else if(mykernel == "lognormal"){ 209 | all_mus <- sapply(x, function(xi){ exp(log(xi) + (h^2)/2) }) 210 | }else if(mykernel == "truncated"){ 211 | all_mus <- sapply(x, function(xi){ 212 | alpha <- (lowerx - xi)/h 213 | beta <- (upperx - xi)/h 214 | xi + ((dnorm(alpha) - dnorm(beta))*h) / (pnorm(beta) - pnorm(alpha)) 215 | }) 216 | } 217 | #mu_hat[i] <- sum(all_mus)/n 218 | mu_hat[i] <- sum(all_mus * normalized_weights) 219 | 220 | if(mykernel == "normal"){ 221 | #var_hat[i] <- sum(x^2)/n + sum(vech^2)/n - (mu_hat[i])^2 222 | var_hat[i] <- sum(normalized_weights * ((x - mu_hat[i])^2 + vech^2)) 223 | } 224 | 225 | obs <- demand[id_query] 226 | 227 | 228 | if(task == "insample_info"){ 229 | upit <- mycdf(obs) 230 | #upit <- ecdf(x)(obs) 231 | } 232 | 233 | residuals[i] <- obs - mu_hat[i] 234 | squared_error[i] <- (residuals[i])^2 235 | 236 | if(task != "insample_info"){ 237 | if(mykernel == "normal"){ 238 | 239 | vec_crps[i] <- crps_mixture(x, vech, normalized_weights, obs) 240 | 241 | }else{ 242 | invkcdf <- approxfun(cdf, xgrid, rule = 2) 243 | X1 <- invkcdf(runif(1000)) 244 | vec_crps[i] <- crps_sampling(X1, obs) 245 | #vec_crps[i] <- mean(abs(X1 - obs)) - 0.5 * mean(abs(X1 - invkcdf(u2))) 246 | } 247 | } 248 | 249 | if(task != "learning"){ 250 | invcdf <- approxfun(cdf, xgrid, rule = 2) 251 | q_hat <- invcdf(taus) 252 | } 253 | 254 | }# bandwiths 255 | 256 | 257 | if(task == "learning"){ 258 | ret <- vec_crps #list(crps = crps) 259 | }else if(task == "testing"){ 260 | ret <- list(crps = vec_crps, squared_error = squared_error, q_hat = q_hat, mu_hat = mu_hat, var_hat = var_hat) 261 | }else if(task == "insample_info"){ 262 | ret <- list(residuals = residuals, q_hat = q_hat, mu_hat = mu_hat, var_hat = var_hat, upit = upit) 263 | }else{ 264 | stop("ERROR ...") 265 | } 266 | ret 267 | } --------------------------------------------------------------------------------