├── README.md
├── simulations
    └── code
    │   ├── .DS_Store
    │   ├── .Rhistory
    │   ├── README.md
    │   ├── aggregation.R
    │   ├── analysis.R
    │   ├── analysis_utils.R
    │   ├── bights.R
    │   ├── code.R
    │   ├── config_paths.R
    │   ├── main.R
    │   ├── mint.R
    │   ├── nicefigs.R
    │   ├── packages.R
    │   ├── permutations.R
    │   ├── runsim.sh
    │   ├── simulate.R
    │   └── utils.R
└── smart-meters
    └── code
        ├── MinT_ecov.R
        ├── README.md
        ├── aggregation.R
        ├── aggregation_merge.R
        ├── basef.R
        ├── config_general.R
        ├── config_paths.R
        ├── config_splitting.R
        ├── init_1_rawmeters.R
        ├── init_2_MyHierarchy.R
        ├── init_3_bottomlevel_meters.R
        ├── init_4_aggregated_meters.R
        ├── makebf_byhalfour.R
        ├── multiplot.R
        ├── nicefigs.R
        ├── permutations.R
        ├── plot_calendar_effects.R
        ├── plot_coherency.R
        ├── plot_coverage.R
        ├── plot_forecasts.R
        ├── plot_parameters.R
        ├── plot_series.R
        ├── plot_tree.R
        ├── results.R
        ├── results_utils.R
        ├── run_aggregation.sh
        ├── run_basef.sh
        ├── utils.R
        ├── utils_ets.R
        ├── utils_hts.R
        └── utils_kde.R


/README.md:
--------------------------------------------------------------------------------
1 | # Source code for *Hierarchical Probabilistic Forecasting of Electricity Demand with Smart Meter Data* by Ben Taieb, Souhaib, Taylor, James, and Hyndman, Rob.
2 | 
3 | 1. The code for the simulation experiments are in [simulations](simulations/code).
4 | 
5 | 2. The code for the smart meter experiments are in [smart-meters](smart-meters/code).
6 | 


--------------------------------------------------------------------------------
/simulations/code/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bsouhaib/prob-hts/adec3abc697340033f94782f5be6f3306b271f01/simulations/code/.DS_Store


--------------------------------------------------------------------------------
/simulations/code/.Rhistory:
--------------------------------------------------------------------------------
  1 | h <- 1
  2 | par(mfrow = c(3, 3))
  3 | listplot <- list()
  4 | for(j in seq(my_bights$nts)){
  5 | X <- sample_paths[j, , h]
  6 | res <- sapply(test_results[[1]], function(mat){
  7 | mat[, j, h]
  8 | })
  9 | vals <- data.frame(res, true = X)
 10 | #listplot[[j]] <- ecdfplot(~base+depbu, data=vals, auto.key=list(space='right'))
 11 | mycols <- c("green", "purple", "grey", "orange", "blue", "yellow", "black")
 12 | plot(ecdf(vals[, 1]), cex = .2, xlim = c(min(vals), max(vals)), col = mycols[1])
 13 | for(k in seq(2, ncol(vals))){
 14 | lines(ecdf(vals[, k]), col = mycols[k], cex = .2)
 15 | }
 16 | #Xpred <- test_results[[1]]$indepbu[, j, h]
 17 | #Xpred <- results$allqf[j, 1, h, ]
 18 | #plot(ecdf(X), type = 'l')
 19 | #lines(ecdf(Xpred), col = "red")
 20 | #print(ks.test(X, Xpred))
 21 | #browser()
 22 | }
 23 | library(lattice)
 24 | library(latticeExtra)
 25 | h <- 1
 26 | par(mfrow = c(2, 4))
 27 | listplot <- list()
 28 | for(j in seq(my_bights$nts)){
 29 | X <- sample_paths[j, , h]
 30 | res <- sapply(test_results[[1]], function(mat){
 31 | mat[, j, h]
 32 | })
 33 | vals <- data.frame(res, true = X)
 34 | #listplot[[j]] <- ecdfplot(~base+depbu, data=vals, auto.key=list(space='right'))
 35 | mycols <- c("green", "purple", "grey", "orange", "blue", "yellow", "black")
 36 | plot(ecdf(vals[, 1]), cex = .2, xlim = c(min(vals), max(vals)), col = mycols[1])
 37 | for(k in seq(2, ncol(vals))){
 38 | lines(ecdf(vals[, k]), col = mycols[k], cex = .2)
 39 | }
 40 | #Xpred <- test_results[[1]]$indepbu[, j, h]
 41 | #Xpred <- results$allqf[j, 1, h, ]
 42 | #plot(ecdf(X), type = 'l')
 43 | #lines(ecdf(Xpred), col = "red")
 44 | #print(ks.test(X, Xpred))
 45 | #browser()
 46 | }
 47 | source("newmain.R")
 48 | source("newmain.R")
 49 | plot(quantile_forecasts)
 50 | quantile_forecasts
 51 | res <- do.call(forecast, c(list(object = model, h = H), param_forecast))
 52 | mf[i, ] <- res$mean
 53 | future[i, ] <- series[ts_split[2] + seq(1, H)]
 54 | mylevels <- seq(0, 98, 2)
 55 | f <- forecast(model, h = 1, level = mylevels)
 56 | c <- mylevels/2
 57 | alpha <- 50 + c(rev(-c), c)
 58 | quantile_forecasts <- c(rev(f$lower), f$upper)
 59 | quantile_forecasts
 60 | traceback()
 61 | param_forecast
 62 | qlow <- res$lower[, seq(ncol(res$lower), 1)]
 63 | qup <- res$upper[, -1]
 64 | t(cbind(qlow, qup))
 65 | param_forecast
 66 | #mylevels <- seq(0, 98, 2)
 67 | #f <- forecast(model, h = 1, level = mylevels)
 68 | c <- param_forecast$level/2
 69 | alpha <- 50 + c(rev(-c), c)
 70 | alpha
 71 | quantile_forecasts <- c(rev(f$lower), f$upper)
 72 | res <- do.call(forecast, c(list(object = model, h = H), param_forecast))
 73 | mf[i, ] <- res$mean
 74 | future[i, ] <- series[ts_split[2] + seq(1, H)]
 75 | #mylevels <- seq(0, 98, 2)
 76 | #f <- forecast(model, h = 1, level = mylevels)
 77 | c <- param_forecast$level/2
 78 | alpha <- 50 + c(rev(-c), c)
 79 | #mylevels <- seq(0, 98, 2)
 80 | #f <- forecast(model, h = 1, level = mylevels)
 81 | gamma <- param_forecast$level/2
 82 | alpha <- 50 + c(rev(-gamma), gamma)
 83 | f <- do.call(forecast, c(list(object = model, h = H), param_forecast))
 84 | mf[i, ] <- res$mean
 85 | future[i, ] <- series[ts_split[2] + seq(1, H)]
 86 | #mylevels <- seq(0, 98, 2)
 87 | #f <- forecast(model, h = 1, level = mylevels)
 88 | gamma <- param_forecast$level/2
 89 | alpha <- 50 + c(rev(-gamma), gamma)
 90 | quantile_forecasts <- c(rev(f$lower), f$upper)
 91 | quantile_forecasts
 92 | dim(f$lower)
 93 | cbind(apply(f$lower, 1, rev), f$upper)
 94 | dim(f$upper)
 95 | dim(apply(f$lower, 1, rev))
 96 | x <- cbind(t(apply(f$lower, 1, rev)), f$upper)
 97 | dim(x)
 98 | x[1, ]
 99 | dim(f$lower)
100 | qf <- sapply(seq(H), function(h){
101 | c(rev(f$lower[h, ]), f$upper[h, ])
102 | })
103 | dim(qf)
104 | qf[, 1]
105 | plot(qf[, 1])
106 | plot(qf[, 1], alpha)
107 | alpha
108 | alpha/100
109 | alpha <- (50 + c(rev(-gamma), gamma))/100
110 | alpha
111 | res <- do.call(forecast, c(list(object = model, h = H), param_forecast))
112 | qlow <- res$lower[, seq(ncol(res$lower), 1)]
113 | qup <- res$upper[, -1]
114 | qf[, , i] <- t(cbind(qlow, qup))
115 | dim(t(cbind(qlow, qup)))
116 | source("newmain.R")
117 | source("newmain.R")
118 | source("newmain.R")
119 | dim(quantf)
120 | dim(qf[, , i] )
121 | f$upper[h, ]
122 | f$upper
123 | h
124 | h <- 1
125 | f$upper
126 | f$upper[h, ]
127 | source("newmain.R")
128 | dim(quantf)
129 | quantf
130 | dim(quantf)
131 | dim(qf[, , i] )
132 | source("newmain.R")
133 | source("newmain.R")
134 | source("newmain.R")
135 | dim(test_results[[1]]$mintdiagonal)
136 | if(task == "scoring"){
137 | err_test <- lapply(seq(H), function(h){
138 | lapply(seq(n_test), function(itest){ # itest
139 | lapply(test_results[[itest]], function(mat_method){ # imethod
140 | obs <- results$allfuture[, itest, h]
141 | X <- mat_method[, , h]
142 | crps <- compute_crps(X, obs)
143 | squared_erors <- (apply(X, 2, mean) - obs)^2
144 | qs <- compute_qscores(X, obs)
145 | qs_tails <- apply(qs, 2, function(x){ mean(x * weights_tails) })
146 | qs_uniform <- apply(qs, 2, function(x){ mean(x * weights_uniform) })
147 | qs_rtail <- apply(qs, 2, function(x){ mean(x * weights_rtail) })
148 | qs_ltail <- apply(qs, 2, function(x){ mean(x * weights_ltail) })
149 | # DO NOT SAVE qs (too big)
150 | list(crps = crps, squared_erors = squared_erors, qs_tails = qs_tails, qs_rtail = qs_rtail, qs_ltail = qs_ltail, qs_uniform = qs_uniform)
151 | })
152 | })
153 | })
154 | list_simulations[[i]] <- err_test
155 | }else if(task == "recovery"){}
156 | fpkg_levels
157 | do.recovery <- TRUE
158 | obj.path <- ifelse(do.recovery, list(npaths = 500), NULL)
159 | obj.path
160 | source("newmain.R")
161 | T_test
162 | traceback()
163 | source("newmain.R")
164 | H
165 | source("newmain.R")
166 | load(filetosave)
167 | for(i in seq(nb_simulations)){
168 | samples_pred <- list_simulations[[i]]$samples_pred
169 | samples_true <- list_simulations[[i]]$samples_true
170 | stop("done")
171 | }
172 | dim(samples_pred)
173 | samples_pred
174 | length(samples_pred)
175 | dim(samples_pred$mintdiagonal)
176 | dim(samples_true)
177 | samples_true <- aperm(samples_true, c(2, 1, 3))
178 | dim(samples_true)
179 | res <- sapply(samples_pred, function(mat){
180 | mat[, j, h]
181 | })
182 | j <- 1
183 | samples_pred
184 | res <- sapply(samples_pred, function(mat){
185 | mat[, j, h]
186 | })
187 | dim(res)
188 | samples_method <- sapply(samples_pred, function(mat){
189 | mat[, j, h]
190 | })
191 | dim(samples_method)
192 | dim(samples_method)
193 | ks.test()
194 | lapply(seq_along(ncol(samples_method)), function(jcol){
195 | ks.test(samples_method[, jcol], samples_true[, j, h])
196 | browser()
197 | })
198 | ks.test(samples_method[, jcol], samples_true[, j, h])
199 | lapply(seq_along(ncol(samples_method)), function(jcol){
200 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h])
201 | browser()
202 | })
203 | res_test
204 | str(res_test)
205 | sapply(seq_along(ncol(samples_method)), function(jcol){
206 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h])
207 | res_test$statisic
208 | })
209 | jcol <- 1
210 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h])
211 | res_Test
212 | res_test$statistic
213 | sapply(seq_along(ncol(samples_method)), function(jcol){
214 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h])
215 | res_test$statistic
216 | })
217 | ncol(samples_method)
218 | sapply(seq(ncol(samples_method)), function(jcol){
219 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h])
220 | res_test$statistic
221 | })
222 | load(filetosave)
223 | h <- 1
224 | mat_test <- matrix(NA, nrow = nb_simulations, ncol = 6)
225 | for(i in seq(nb_simulations)){
226 | samples_pred <- list_simulations[[i]]$samples_pred
227 | samples_true <- list_simulations[[i]]$samples_true
228 | samples_true <- aperm(samples_true, c(2, 1, 3))
229 | samples_method <- sapply(samples_pred, function(mat){
230 | mat[, j, h]
231 | })
232 | mat_test[i, ] <- sapply(seq(ncol(samples_method)), function(jcol){
233 | res_test <- ks.test(samples_method[, jcol], samples_true[, j, h])
234 | res_test$statistic
235 | })
236 | }
237 | warnings()
238 | mat_test
239 | boxplot(mat_test)
240 | head(samples_method)
241 | samples_method[, "base"]
242 | load(filetosave)
243 | h <- 1
244 | mat_test <- matrix(NA, nrow = nb_simulations, ncol = 6)
245 | for(i in seq(nb_simulations)){
246 | samples_pred <- list_simulations[[i]]$samples_pred
247 | samples_true <- list_simulations[[i]]$samples_true
248 | samples_true <- aperm(samples_true, c(2, 1, 3))
249 | samples_method <- sapply(samples_pred, function(mat){
250 | mat[, j, h]
251 | })
252 | mat_test[i, ] <- sapply(seq_along(colnames(samples_method)), function(mycol){
253 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
254 | res_test$statistic
255 | })
256 | }
257 | mat_test
258 | ? sapply
259 | colnames(samples_method)
260 | load(filetosave)
261 | h <- 1
262 | mat_test <- matrix(NA, nrow = nb_simulations, ncol = 6)
263 | for(i in seq(nb_simulations)){
264 | samples_pred <- list_simulations[[i]]$samples_pred
265 | samples_true <- list_simulations[[i]]$samples_true
266 | samples_true <- aperm(samples_true, c(2, 1, 3))
267 | samples_method <- sapply(samples_pred, function(mat){
268 | mat[, j, h]
269 | })
270 | mat_test[i, ] <- sapply(colnames(samples_method), function(mycol){
271 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
272 | res_test$statistic
273 | })
274 | }
275 | mat_test
276 | colnames(samples_method)
277 | lapply(colnames(samples_method), function(mycol){
278 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
279 | res_test$statistic
280 | })
281 | sapply(colnames(samples_method), function(mycol){
282 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
283 | res_test$statistic
284 | })
285 | sapply(colnames(samples_method), function(mycol){
286 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
287 | as.numeric(res_test$statistic)
288 | })
289 | load(filetosave)
290 | h <- 1
291 | mat_test <- sapply(seq(nb_simulations), function(i){
292 | samples_pred <- list_simulations[[i]]$samples_pred
293 | samples_true <- list_simulations[[i]]$samples_true
294 | samples_true <- aperm(samples_true, c(2, 1, 3))
295 | samples_method <- sapply(samples_pred, function(mat){
296 | mat[, j, h]
297 | })
298 | sapply(colnames(samples_method), function(mycol){
299 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
300 | as.numeric(res_test$statistic)
301 | })
302 | })
303 | mat_test
304 | mat_test <- t(mat_test)
305 | mat_test
306 | boxplot(mat_test)
307 | j
308 | mat_test <- sapply(seq(7), function(j){
309 | sapply(seq(nb_simulations), function(i){
310 | samples_pred <- list_simulations[[i]]$samples_pred
311 | samples_true <- list_simulations[[i]]$samples_true
312 | samples_true <- aperm(samples_true, c(2, 1, 3))
313 | samples_method <- sapply(samples_pred, function(mat){
314 | mat[, j, h]
315 | })
316 | sapply(colnames(samples_method), function(mycol){
317 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
318 | as.numeric(res_test$statistic)
319 | })
320 | })
321 | })
322 | dim(mat_test)
323 | mat_test <- sapply(seq(7), function(j){
324 | sapply(seq(nb_simulations), function(i){
325 | samples_pred <- list_simulations[[i]]$samples_pred
326 | samples_true <- list_simulations[[i]]$samples_true
327 | samples_true <- aperm(samples_true, c(2, 1, 3))
328 | samples_method <- sapply(samples_pred, function(mat){
329 | mat[, j, h]
330 | })
331 | sapply(colnames(samples_method), function(mycol){
332 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
333 | as.numeric(res_test$statistic)
334 | })
335 | })
336 | }, simplify = "array")
337 | dim(mat_test)
338 | mat_test <- sapply(seq(7), function(j){
339 | sapply(seq(nb_simulations), function(i){
340 | samples_pred <- list_simulations[[i]]$samples_pred
341 | samples_true <- list_simulations[[i]]$samples_true
342 | samples_true <- aperm(samples_true, c(2, 1, 3))
343 | samples_method <- sapply(samples_pred, function(mat){
344 | mat[, j, h]
345 | })
346 | t(sapply(colnames(samples_method), function(mycol){
347 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
348 | as.numeric(res_test$statistic)
349 | }))
350 | })
351 | }, simplify = "array")
352 | dim(mat_test)
353 | mat_test <- sapply(seq(7), function(j){
354 | sapply(seq(nb_simulations), function(i){
355 | samples_pred <- list_simulations[[i]]$samples_pred
356 | samples_true <- list_simulations[[i]]$samples_true
357 | samples_true <- aperm(samples_true, c(2, 1, 3))
358 | samples_method <- sapply(samples_pred, function(mat){
359 | mat[, j, h]
360 | })
361 | sapply(colnames(samples_method), function(mycol){
362 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
363 | as.numeric(res_test$statistic)
364 | })
365 | })
366 | }, simplify = "array")
367 | dim(mat_test)
368 | mat_test <- aperm(mat_test, c(2, 1, 3))
369 | dim(mat_test)
370 | par(mfrow  = c(2, 4))
371 | for(j in seq(7)){
372 | boxplot(mat_test[, , j])
373 | }
374 | par(mfrow  = c(2, 4))
375 | for(j in seq(7)){
376 | boxplot(mat_test[, , j])
377 | }
378 | names(test_results[[itest]])
379 | source("newmain.R")
380 | load(filetosave)
381 | h <- 1
382 | mat_test <- sapply(seq(7), function(j){
383 | sapply(seq(nb_simulations), function(i){
384 | samples_pred <- list_simulations[[i]]$samples_pred
385 | samples_true <- list_simulations[[i]]$samples_true
386 | samples_true <- aperm(samples_true, c(2, 1, 3))
387 | samples_method <- sapply(samples_pred, function(mat){
388 | mat[, j, h]
389 | })
390 | sapply(colnames(samples_method), function(mycol){
391 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
392 | as.numeric(res_test$statistic)
393 | })
394 | })
395 | }, simplify = "array")
396 | mat_test <- aperm(mat_test, c(2, 1, 3))
397 | #par(mfrow  = c(2, 4))
398 | par(mfrow  = c(4, 2))
399 | for(j in seq(7)){
400 | boxplot(mat_test[, , j])
401 | }
402 | load(filetosave)
403 | h <- 1
404 | mat_test <- sapply(seq(7), function(j){
405 | sapply(seq(nb_simulations), function(i){
406 | samples_pred <- list_simulations[[i]]$samples_pred
407 | samples_true <- list_simulations[[i]]$samples_true
408 | samples_true <- aperm(samples_true, c(2, 1, 3))
409 | samples_method <- sapply(samples_pred, function(mat){
410 | mat[, j, h]
411 | })
412 | sapply(colnames(samples_method), function(mycol){
413 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
414 | as.numeric(res_test$statistic)
415 | })
416 | })
417 | }, simplify = "array")
418 | mat_test <- aperm(mat_test, c(2, 1, 3))
419 | #par(mfrow  = c(2, 4))
420 | par(mfrow  = c(4, 2))
421 | for(j in seq(7)){
422 | boxplot(mat_test[, , j])
423 | }
424 | par(mfrow  = c(3, 2))
425 | for(j in seq(7)){
426 | boxplot(mat_test[, , j])
427 | }
428 | par(mfrow  = c(2, 3))
429 | for(j in seq(7)){
430 | boxplot(mat_test[, , j])
431 | }
432 | mat_test <- sapply(seq(7), function(j){
433 | sapply(seq(nb_simulations), function(i){
434 | samples_pred <- list_simulations[[i]]$samples_pred
435 | samples_true <- list_simulations[[i]]$samples_true
436 | samples_true <- aperm(samples_true, c(2, 1, 3))
437 | samples_method <- sapply(samples_pred, function(mat){
438 | mat[, j, h]
439 | })
440 | sapply(colnames(samples_method), function(mycol){
441 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
442 | #as.numeric(res_test$statistic)
443 | as.numeric(res_test$pvalue)
444 | })
445 | })
446 | }, simplify = "array")
447 | warnings()
448 | mat_test <- aperm(mat_test, c(2, 1, 3))
449 | par(mfrow  = c(2, 3))
450 | for(j in seq(7)){
451 | boxplot(mat_test[, , j])
452 | }
453 | mat_test <- sapply(seq(7), function(j){
454 | sapply(seq(nb_simulations), function(i){
455 | samples_pred <- list_simulations[[i]]$samples_pred
456 | samples_true <- list_simulations[[i]]$samples_true
457 | samples_true <- aperm(samples_true, c(2, 1, 3))
458 | samples_method <- sapply(samples_pred, function(mat){
459 | mat[, j, h]
460 | })
461 | sapply(colnames(samples_method), function(mycol){
462 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
463 | #as.numeric(res_test$statistic)
464 | browser()
465 | as.numeric(res_test$pvalue)
466 | })
467 | })
468 | }, simplify = "array")
469 | mat_test <- sapply(seq(7), function(j){
470 | sapply(seq(nb_simulations), function(i){
471 | samples_pred <- list_simulations[[i]]$samples_pred
472 | samples_true <- list_simulations[[i]]$samples_true
473 | samples_true <- aperm(samples_true, c(2, 1, 3))
474 | samples_method <- sapply(samples_pred, function(mat){
475 | mat[, j, h]
476 | })
477 | sapply(colnames(samples_method), function(mycol){
478 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
479 | #as.numeric(res_test$statistic)
480 | browser()
481 | as.numeric(res_test$p.value)
482 | })
483 | })
484 | }, simplify = "array")
485 | mat_test <- aperm(mat_test, c(2, 1, 3))
486 | #par(mfrow  = c(2, 4))
487 | par(mfrow  = c(2, 3))
488 | for(j in seq(7)){
489 | boxplot(mat_test[, , j])
490 | }
491 | mat_test <- sapply(seq(7), function(j){
492 | sapply(seq(nb_simulations), function(i){
493 | samples_pred <- list_simulations[[i]]$samples_pred
494 | samples_true <- list_simulations[[i]]$samples_true
495 | samples_true <- aperm(samples_true, c(2, 1, 3))
496 | samples_method <- sapply(samples_pred, function(mat){
497 | mat[, j, h]
498 | })
499 | sapply(colnames(samples_method), function(mycol){
500 | res_test <- ks.test(samples_method[, mycol], samples_true[, j, h])
501 | #as.numeric(res_test$statistic)
502 | #browser()
503 | as.numeric(res_test$p.value)
504 | })
505 | })
506 | }, simplify = "array")
507 | mat_test <- aperm(mat_test, c(2, 1, 3))
508 | #par(mfrow  = c(2, 4))
509 | par(mfrow  = c(2, 3))
510 | for(j in seq(7)){
511 | boxplot(mat_test[, , j])
512 | }
513 | 


--------------------------------------------------------------------------------
/simulations/code/README.md:
--------------------------------------------------------------------------------
 1 | # Source code for the simulation experiments of the paper *Hierarchical Probabilistic Forecasting of Electricity Demand with Smart Meter Data* by Ben Taieb, Souhaib, Taylor, James, and Hyndman, Rob.
 2 | 
 3 | 
 4 | The code implements all the results for the simulation experiments in Section 7.
 5 | 
 6 | # Usage
 7 | 
 8 | 1. Specify the scenario in *runsim.sh*, i.e. specify experiment, marg, Tlearning, M, Trueparam, etc. See runsim.sh for some examples.
 9 | 2. Run the script *runsim.sh*
10 | ```
11 | ./runsim.sh
12 | ```
13 | 3. To produce the figures with the results, run the script *analysis.R* with the right scenario, as in step 1.
14 | ```
15 | ./analysis.sh
16 | ```
17 | 4. The results will then be available in the folder *work*.
18 | 


--------------------------------------------------------------------------------
/simulations/code/aggregation.R:
--------------------------------------------------------------------------------
  1 | # This function applies the permutations to the samples generated from the marginal distributions.
  2 | # In other words, it restores the dependences between the variables.
  3 | permutate_samples <- function(my_bights, my_samples, list_matpermutations){
  4 |  
  5 |   n_permutations <- unique(sapply(list_matpermutations, nrow))
  6 |   stopifnot(length(n_permutations) == 1)
  7 |   
  8 |   stopifnot(nrow(my_samples) == n_permutations)
  9 |   
 10 |   itree <- my_bights$itree
 11 |   aggSeries <- my_bights$anames
 12 |   bottomSeries <- my_bights$bnames
 13 |   A <- my_bights$A
 14 |   ##########
 15 |   # compute the parsing order of the aggregate nodes
 16 |   leaves <- V(itree)[degree(itree, mode="out") == 0]
 17 |   agg_nodes <- V(itree)[degree(itree, mode="out") != 0]
 18 |   
 19 |   depth_aggnodes <- sapply(agg_nodes, function(agg_node){
 20 |     vec <- distances(itree, agg_node, leaves, mode = "out")
 21 |     max( vec[which(vec!=Inf)])
 22 |   })
 23 |   
 24 |   ordered_agg_nodes_names <- names(sort(depth_aggnodes))
 25 |   ordered_agg_nodes <- V(itree)[match(ordered_agg_nodes_names, V(itree)$name)]
 26 |   ##########
 27 |   
 28 |   base_samples <- my_samples
 29 |   
 30 |   base_samples_bottom <- my_samples[, seq(my_bights$naggts + 1, my_bights$nts)]
 31 |   colnames(base_samples_bottom) <- my_bights$bnames
 32 |   
 33 |   perm_samples_bottom <- base_samples_bottom
 34 |   variables <- colnames(perm_samples_bottom)
 35 |   
 36 |   mat_test <- NULL
 37 |   # PERM-BU
 38 |   for(inode in seq_along(ordered_agg_nodes)){
 39 |     #print(inode)
 40 |     
 41 |     agg_node <- ordered_agg_nodes[inode]
 42 |     idseries_agg <- names(agg_node)
 43 |     iagg <- match(idseries_agg, aggSeries)
 44 |     children_nodes <- ego(itree, order = 1, nodes = agg_node, mode = "out")[[1]][-1]
 45 |     nkids <- length(children_nodes)
 46 |     
 47 |     mat_permutations <- list_matpermutations[[idseries_agg]]
 48 |     
 49 |     ranks_historical <- mat_permutations
 50 |     stopifnot(all(colnames(ranks_historical) == names(children_nodes)))
 51 |     
 52 |     depth_node <- depth_aggnodes[match(idseries_agg, names(depth_aggnodes))]
 53 |     
 54 |     samples_children <- matrix(NA, nrow = M, ncol = nkids)
 55 |     
 56 |     columns_agg <- which(children_nodes %in% agg_nodes)
 57 |     columns_bottom <- which(children_nodes %in% leaves)
 58 |     children_names <- names(children_nodes)
 59 |     
 60 |     # Extracting/computing the samples for each child
 61 |     if(length(columns_agg) > 0){
 62 |       id_agg_children  <- match(children_names[columns_agg], aggSeries)
 63 |       samples_agg_children <- as.matrix(t(tcrossprod(A[id_agg_children, , drop = F], perm_samples_bottom)))
 64 |       samples_children[, columns_agg] <- samples_agg_children
 65 |     }
 66 |     
 67 |     if(length(columns_bottom) > 0){
 68 |       id_bottom_children  <- match(children_names[columns_bottom], bottomSeries)
 69 |       samples_children[, columns_bottom] <- perm_samples_bottom[, id_bottom_children]
 70 |     }
 71 |     
 72 |     # Computing the ranks of the samples for each child
 73 |     ranks_samples_children <- sapply(seq(ncol(samples_children)), function(j){
 74 |       rank(samples_children[, j], ties.method = "random")
 75 |     })
 76 |     
 77 |     index_mat <- sapply(seq(nkids), function(j){
 78 |       res <- match(ranks_historical[, j], ranks_samples_children[, j])
 79 |       stopifnot(all(!is.na(res)))
 80 |       res
 81 |     })
 82 |     
 83 |     # Permutating the rows
 84 |     if(length(columns_bottom) > 0){
 85 |       perm_samples_bottom[, id_bottom_children] <- sapply(seq_along(id_bottom_children), function(j){
 86 |         perm_samples_bottom[index_mat[, columns_bottom[j]], id_bottom_children[j]]
 87 |       })
 88 |     }
 89 |     
 90 |     if(length(columns_agg) > 0){
 91 |       res <- lapply(seq_along(id_agg_children), function(j){
 92 |         id <- which(A[id_agg_children[j], ] == 1)
 93 |         perm_samples_bottom[index_mat[, columns_agg[j]], id, drop = F]
 94 |       })
 95 |       ids <- lapply(id_agg_children, function(id_agg_child){
 96 |         which(A[id_agg_child, ] == 1)
 97 |       })
 98 |       ids <- unlist(ids)
 99 |       perm_samples_bottom[, ids] <- do.call(cbind, res)
100 |     }
101 |     
102 |   }# agg node
103 | 
104 |   perm_samples_bottom
105 | }
106 | 


--------------------------------------------------------------------------------
/simulations/code/analysis.R:
--------------------------------------------------------------------------------
  1 | # This script produces the figures with all the accuracy measures.
  2 | rm(list = ls())
  3 | source("config_paths.R")
  4 | source("utils.R")
  5 | source("nicefigs.R")
  6 | source("analysis_utils.R")
  7 | library(igraph)
  8 | library(kSamples)
  9 | library(car)
 10 | graphics.off()
 11 | 
 12 | probs <- seq(1, 99)/100
 13 | 
 14 | # task can be: "pvalue"  "mse" "wasserstein"
 15 | task <- "wasserstein"
 16 | # experiment can be: "small" "newlarge"
 17 | experiment <- "newlarge"
 18 | 
 19 | # T_learn <- 10000
 20 | # M <- 1000
 21 | T_learn <- 500
 22 | M <- 500
 23 | marg <- "norm"
 24 | #marg <- "t"
 25 | use.trueparam <- FALSE #TRUE
 26 | tag <- "jasa"
 27 | set_jobs <- seq(1, 12) 
 28 | 
 29 | h <- 1
 30 | all_simulations <- NULL
 31 | for(idjob in set_jobs){
 32 |   print(idjob)
 33 |   filetoload <- nameResfile(experiment, marg, T_learn, M, use.trueparam, idjob, tag)
 34 |   load(filetoload)
 35 |   all_simulations <- c(all_simulations, list_simulations)
 36 | }
 37 | 
 38 | idnull <- which(sapply(all_simulations, is.null))
 39 | if(length(idnull) > 0){
 40 |   all_simulations <- all_simulations[-which(sapply(all_simulations, is.null))]
 41 | }
 42 | 
 43 | nb_simulations <- length(all_simulations)
 44 | print(nb_simulations)
 45 | 
 46 | 
 47 | load(file.path(rdata.folder, paste("info_", experiment, ".Rdata", sep = "")))
 48 | nts <- my_bights$nts
 49 | naggts <- my_bights$naggts
 50 | node_names <- names(V(my_bights$itree))
 51 | 
 52 | 
 53 | for(i in seq_along(all_simulations)){
 54 |   x_mintshrink <- all_simulations[[i]]$samples_pred$mintshrink
 55 |   x_indepbu    <- all_simulations[[i]]$samples_pred$indepbu
 56 |   x_indepbumintshrink    <- x_indepbu
 57 |   
 58 |   mean_mintshrink <- apply(x_mintshrink, c(2, 3), mean)
 59 |   mean_indepbu    <- apply(x_indepbu, c(2, 3), mean)
 60 |   
 61 |   nvar <- dim(x_indepbumintshrink)[1]
 62 |   for(k in seq(nvar)){
 63 |     x_indepbumintshrink[k, , ] <- x_indepbu[k, , ] - mean_indepbu + mean_mintshrink
 64 |   }
 65 |   
 66 |   all_simulations[[i]]$samples_pred$indepbumintshrink <- x_indepbumintshrink
 67 | }
 68 | 
 69 | 
 70 | mycols <- c("red", "green" , "grey", "blue", "purple",  "yellow", "orange")
 71 | # "mintdiagonal"      "mintshrink"  "base"   "depbu"   "depbumint"  "indepbu"   "indepbumintshrink"
 72 | 
 73 | name_methods <- c(names(all_simulations[[1]]$samples_pred), "true")
 74 | 
 75 | id1 <- id2 <- seq(M)
 76 | 
 77 | id_print <- c(1, 2, 30)
 78 | #id_print <- c(1, 2, 30, 40, 50, 60)
 79 | 
 80 | mat_test <- sapply(id_print, function(j){
 81 |   if(j%%10 == 0)
 82 |   print(j)
 83 |   
 84 |   sapply(seq(nb_simulations), function(i){
 85 |     
 86 |     samples_pred <- all_simulations[[i]]$samples_pred
 87 |     samples_true <- all_simulations[[i]]$samples_true
 88 |     samples_true <- aperm(samples_true, c(2, 1, 3))
 89 |     
 90 |     samples_method <- sapply(samples_pred, function(mat){
 91 |       mat[, j, h]
 92 |     })
 93 | 
 94 |     x2 <- samples_true[, j, h]
 95 |     if(task == "mse"){
 96 |       mu <- mean(x2)
 97 |       RET <- sapply(colnames(samples_method), function(mycol){
 98 |         x1 <- samples_method[, mycol]
 99 |         (mean(x1) - mu)^2
100 |       })
101 |     }else if(task == "pvalue"){
102 |       RET <- sapply(colnames(samples_method), function(mycol){
103 |         x1 <- samples_method[, mycol]
104 |         ks.test(x1[id1], x2[id2])$p.value
105 |       })
106 |     }else if(task == "wasserstein"){
107 |       true_quantiles <- quantile(x2, probs)
108 |       RET <- sapply(colnames(samples_method), function(mycol){
109 |         x1 <- samples_method[, mycol]
110 |         log(mean( (quantile(x1, probs) - true_quantiles)^2))
111 |       })
112 |     }
113 |     RET
114 |   })
115 | }, simplify = "array")
116 | 
117 | mat_test <- aperm(mat_test, c(2, 1, 3))
118 | print(dimnames(mat_test))
119 | 
120 | 
121 | ######### BOXPLOTS (pvalues, mse, wasserstein)
122 |   myfile <- namePdffile(experiment, marg, T_learn, M, use.trueparam,  
123 |                         paste(tag, "-", task,sep = "") )
124 |   
125 |   if(T_learn == 500){
126 |     savepdf(myfile, width = 21 * 0.9, height = 29.7 * 0.25)
127 |     par(mfrow  = c(1, 3), mar=c(5.05,3,2,2))
128 |   }else{
129 |     savepdf(myfile, width = 21 * 0.9, height = 29.7 * 0.20)
130 |     par(mfrow  = c(1, 3), mar=c(1,3,2,2))
131 |   }
132 |   
133 |   
134 |   for(j in seq(dim(mat_test)[3])){
135 |     mymat <- mat_test[, , j]
136 |     
137 |     if(T_learn == 500){
138 |       colnames(mymat) <- sapply(colnames(mymat), bettername)
139 |       myxaxt <- NULL
140 |     }else{
141 |       myxaxt <- 'n'
142 |     }
143 |     
144 |     if(task != "mse"){
145 |       if(j == 1){
146 |         mymain <- "Level 1 (top level)"
147 |       }else if(j == 3){
148 |         mymain <- "Level 3 (bottom level)"
149 |       }else{
150 |         mymain <- "Level 2"
151 |         #paste("Level ", j, ifelse(j == 3, "(bottom level)", "") )
152 |       }
153 |       
154 |       
155 |       if(task == "pvalue"){
156 |         boxplot(mymat, las=2, ylab = "p-value", main = mymain, 
157 |                 col = mycols, yaxt = 'n', xaxt = myxaxt, cex.axis = 1, outline = FALSE)
158 |         axis(2, at = c(0, 0.25, 0.5, 0.75, 1), cex.axis = 1.2)
159 |         abline(h = c(0, 0.25, 0.5, 0.75, 1), lwd = 0.1)
160 |       }else{
161 |         boxplot(mymat, las=2, ylab = "2-Wasserstein distance (log scale)", 
162 |                 main = mymain, col = mycols, xaxt = myxaxt, cex.axis = 1, outline = FALSE)
163 |       }
164 |       
165 |     }else{
166 |       boxplot(mymat, las=2, ylab = "MSE", 
167 |               main = paste("Leval ", j, ifelse(j == 3, "(bottom level)", "") ), 
168 |               col = mycols, cex.axis = 1.2,
169 |               outline = FALSE)
170 |     }
171 |   }
172 |   
173 |   dev.off()


--------------------------------------------------------------------------------
/simulations/code/analysis_utils.R:
--------------------------------------------------------------------------------
 1 | 
 2 | bettername <- function(namemethod, option = 1){
 3 |   if(namemethod == "mintdiagonal"){
 4 |     #RET <- "NL-MD" 
 5 |     RET <- ifelse(option == 1, "Norm- \n MinTDiag", "Norm-MinTDiag")
 6 |   }else if(namemethod == "mintshrink"){
 7 |     #RET <- "NL-MS"  
 8 |     RET <- "Norm- \n MinTShrink"
 9 |     RET <- ifelse(option == 1, "Norm- \n MinTShrink", "Norm-MinTShrink")
10 |   }else if(namemethod == "base"){
11 |     RET <- "BASE"
12 |   }else if(namemethod == "depbu"){
13 |     #RET <- "DB-NM"   
14 |     RET <- ifelse(option == 1, "DepBU- \n NoMinT", "DepBU-NoMinT")
15 |   }else if(namemethod == "depbumint"){
16 |     #RET <- "DB-MS"   
17 |     RET <- ifelse(option == 1, "DepBU- \n MinTShrink", "DepBU-MinTShrink")
18 |   }else if(namemethod == "indepbu"){
19 |     #RET <- "IB-NM" 
20 |     RET <- ifelse(option == 1, "IndepBU- \n NoMinT", "IndepBU-NoMinT")
21 |   }else if(namemethod == "indepbumintshrink"){
22 |     #RET <- "IB-MS" 
23 |     RET <- ifelse(option == 1, "IndepBU- \n MinTShrink" , "IndepBU-MinTShrink")
24 |   }else{
25 |     RET <- namemethod
26 |   }
27 |   RET
28 | }


--------------------------------------------------------------------------------
/simulations/code/bights.R:
--------------------------------------------------------------------------------
 1 | # This script defines the "bights" object, i.e. the big hierarchical time series, with associated functions.
 2 | make.data <- function(obj_bights, list_subsets, H){
 3 |   list_basef <- lapply(list_subsets, function(ts_split){
 4 |     subseries(obj_bights, ts_split = ts_split, H = H, do.forecast = T, keep.data = F)
 5 |   })
 6 |   browser()
 7 |   Yhat <- simplify2array(lapply(list_basef, "[[", "predictions"))
 8 |   Y <- simplify2array(lapply(list_basef, "[[", "future_data"))
 9 |   list(Yhat = Yhat, Y = Y)
10 | }
11 | 
12 | bights <- function(bts, A) {
13 |   nbts <- ncol(bts)
14 |   naggts <- nrow(A)
15 |   nts <- naggts + nbts
16 |   Tobs <- nrow(bts)
17 |   
18 |   A <- methods::as(A, "sparseMatrix")
19 |   S <- rbind(A, Diagonal(nbts))
20 |   S <- methods::as(S, "sparseMatrix")
21 |   
22 |   yts <- matrix(NA, nrow = nrow(bts), ncol = nts)
23 |     
24 |   if (nbts <= 1L) {
25 |     stop("Argument bts must be a multivariate time series.", call. = FALSE)
26 |   }
27 |   
28 |   yts[, seq(naggts)] <-  as.matrix(t(A %*% t(bts)))
29 |   yts[, seq(naggts + 1, nts)] <- bts
30 |   
31 |   output <- structure(
32 |     list(yts = yts, A = A, S = S, nbts = nbts, naggts = naggts, nts = nts, Tobs = Tobs),
33 |     class = c("bights")
34 |   )
35 |   return(output)
36 | }
37 | 
38 | 
39 | subseries <- function(bights, ts_split, H = 1, do.forecast = FALSE, keep.data = FALSE) {
40 |   
41 |   learn_data <- bights$yts[seq(ts_split[1], ts_split[2]), ]
42 |   
43 |   interval_future <- ts_split[2] + seq(1, H)
44 |   
45 |   future_data <- bights$yts[interval_future, , drop = F]
46 |   
47 |   predictions <- NULL
48 |   if(do.forecast){
49 |     predictions <- sapply(seq(bights$nts), function(j){
50 |       model <- fit_fct(learn_data[, j], "ARIMA")
51 |       forecast(model, h = H)$mean
52 |     })
53 |   }
54 |   
55 |   output <- list(future_data = future_data, predictions = predictions)
56 |   
57 |   
58 |   if(keep.data)
59 |   output <- c(output, list(learn_data = learn_data))
60 |   
61 |   return(output)
62 | }
63 | 


--------------------------------------------------------------------------------
/simulations/code/code.R:
--------------------------------------------------------------------------------
  1 | # This script defines some functions to generate the base forecasts.
  2 | makebf <- function(obj_bights, list_subsets, H, config_forecast_agg, config_forecast_bot, refit_step, mc.cores = 1){
  3 |   n <- obj_bights$nts
  4 |   m <- obj_bights$nbts
  5 |   results <- vector("list", n)
  6 |   
  7 |   results <- mclapply(seq(n), function(j){
  8 |     if(j <= n-m){
  9 |       config_forecast <- config_forecast_agg
 10 |     }else{
 11 |       config_forecast <- config_forecast_bot
 12 |     }
 13 |     
 14 |     rolling.forecast(obj_bights$yts[, j], list_subsets, H, config_forecast, refit_step = refit_step, j)
 15 |   }, mc.cores = mc.cores)
 16 |   
 17 |   allmf <- simplify2array(lapply(results, "[[", "mf"))
 18 |   allfuture <- simplify2array(lapply(results, "[[", "future"))
 19 |   allqf <- lapply(results, "[[", "list_qf")
 20 |   allresiduals <- simplify2array(lapply(results, "[[", "e_residuals"))
 21 | 
 22 |   allmf <- aperm(allmf, c(3, 1, 2))
 23 |   allfuture <- aperm(allfuture, c(3, 1, 2))
 24 |   allresiduals <- t(allresiduals)
 25 |   
 26 |   list(allmf = allmf, allfuture = allfuture, allqf = allqf, allresiduals = allresiduals)
 27 |   
 28 | }  
 29 | 
 30 | rolling.forecast <- function(series, list_subsets, H, config_forecast, refit_step, idseries){
 31 |   n_subsets <- length(list_subsets)  
 32 |   mf <- future <- matrix(NA, nrow = n_subsets, ncol = H)  
 33 |   list_qf <- vector("list", n_subsets)
 34 |   
 35 |   fit_fct <- config_forecast$fit_fct  
 36 |   refit_fct <- config_forecast$refit_fct
 37 |   param_fit_fct <- config_forecast$param_fit_fct
 38 |   param_refit_fct <- config_forecast$param_refit_fct
 39 |   param_forecast <- config_forecast$param_forecast
 40 |   
 41 | 
 42 |   for(i in seq(n_subsets)){
 43 |     ts_split <- list_subsets[[i]]
 44 |     if(is.ts(series)){
 45 |       learn_series <- subset(series, start = ts_split[1], end = ts_split[2])
 46 |     }else{
 47 |       learn_series <- series[seq(ts_split[1], ts_split[2])]
 48 |     }
 49 |     
 50 |     future[i, ] <- series[ts_split[2] + seq(1, H)]
 51 |     
 52 |    
 53 |     if( (i-1) %% refit_step == 0){
 54 |       if(use.trueparam && idseries > my_bights$naggts){
 55 |           ar_param <- obj_simul$param$ar_param[[idseries - my_bights$naggts]]
 56 |           ma_param <- obj_simul$param$ma_param[[idseries - my_bights$naggts]]
 57 |           stopifnot(!is.null(ar_param) && !is.null(ma_param)) 
 58 |           model <- Arima(y = learn_series, order = c(length(ar_param), 0, length(ma_param)), 
 59 |                 fixed = c(ar_param, ma_param, 0))
 60 |       }else{
 61 |         model <- do.call(fit_fct, c(list(y = learn_series), param_fit_fct))
 62 |       }
 63 |       
 64 |     }else{
 65 |       model <- do.call(refit_fct, c(list(y = learn_series, model = model), param_refit_fct))
 66 |     }
 67 |     
 68 |     if(i == 1){
 69 |       e_residuals <- as.numeric(resid(model))
 70 |     }
 71 |     
 72 | 
 73 |     f <- do.call(forecast, c(list(object = model, h = H), param_forecast))
 74 |     mf[i, ] <- f$mean
 75 |     quantf <- sapply(seq(H), function(h){
 76 |       c(rev(f$lower[h, ]), f$upper[h, ])
 77 |     })
 78 |     list_qf[[i]] <- quantf
 79 | 
 80 |     #nsamples <- param_forecast$npaths
 81 |     #f <- t(replicate(nsamples, simulate(model, bootstrap = do.bootstrap.residuals, nsim = H, future = T)))
 82 |     #mf[i, ] <- apply(f, 2, mean)
 83 |     #list_qf[[i]] <- f
 84 |   }
 85 | 
 86 |   output <- list(future = future, mf = mf, list_qf = list_qf, e_residuals = e_residuals)
 87 | }
 88 | 
 89 | 
 90 | makeINFO <- function(tags){
 91 |   myedges <- data.frame(rbind(cbind(tags[, 1], tags[, 2]), 
 92 |                               cbind(tags[, 2], tags[, 3])))
 93 |   itree <- graph.data.frame(myedges)
 94 |   itree <- simplify(itree, remove.loops = F)
 95 |   
 96 |   
 97 |   # Compute A - for each agg. node, compute the associated leafs
 98 |   all.nodes.names <- V(itree)$name
 99 |   agg.nodes.names <- aggSeries <- all.nodes.names[which(degree(itree, V(itree), "out")!=0)]
100 |   n_agg <- length(agg.nodes.names)
101 |   
102 |   bottomSeries <- tags[, ncol(tags)]
103 |   n_bottom <- ncol(bts)
104 |   A <- matrix(0, nrow = n_agg, ncol = n_bottom)
105 |   
106 |   for(i in seq_along(agg.nodes.names)){
107 |     agg.node.name <- agg.nodes.names[i]
108 |     reachable <- which(shortest.paths(itree, agg.node.name, mode="out") != Inf)
109 |     terminal.nodes <- reachable[which(degree(itree, reachable, mode="out") == 0)]
110 |     terminal.nodes.names <- all.nodes.names[terminal.nodes]
111 |     ids <- match(terminal.nodes.names, bottomSeries)
112 |     stopifnot(all(!is.na(ids)))
113 |     A[i, ids] <- 1
114 |   }
115 |   output <- list(bottomSeries = bottomSeries, aggSeries = aggSeries, itree = itree, 
116 |                  A = A, n_agg = n_agg, n_bottom = n_bottom)
117 |   return(output)
118 | }
119 | 
120 | 
121 | makeINFO2 <- function(tags){
122 |   myedges <- data.frame(do.call(rbind, lapply(seq(ncol(tags) - 1), function(j){
123 |     cbind(tags[, j], tags[, j+1])
124 |   })))
125 |   
126 |   itree <- graph.data.frame(myedges)
127 |   itree <- simplify(itree, remove.loops = F)
128 |   
129 |   
130 |   # Compute A - for each agg. node, compute the associated leafs
131 |   all.nodes.names <- V(itree)$name
132 |   agg.nodes.names <- aggSeries <- all.nodes.names[which(degree(itree, V(itree), "out")!=0)]
133 |   n_agg <- length(agg.nodes.names)
134 |   
135 |   bottomSeries <- tags[, ncol(tags)]
136 |   n_bottom <- length(bottomSeries)
137 |   A <- matrix(0, nrow = n_agg, ncol = n_bottom)
138 |   
139 |   for(i in seq_along(agg.nodes.names)){
140 |     agg.node.name <- agg.nodes.names[i]
141 |     reachable <- which(shortest.paths(itree, agg.node.name, mode="out") != Inf)
142 |     terminal.nodes <- reachable[which(degree(itree, reachable, mode="out") == 0)]
143 |     terminal.nodes.names <- all.nodes.names[terminal.nodes]
144 |     ids <- match(terminal.nodes.names, bottomSeries)
145 |     stopifnot(all(!is.na(ids)))
146 |     A[i, ids] <- 1
147 |   }
148 |   output <- list(bottomSeries = bottomSeries, aggSeries = aggSeries, itree = itree, 
149 |                  A = A, n_agg = n_agg, n_bottom = n_bottom)
150 |   return(output)
151 | }


--------------------------------------------------------------------------------
/simulations/code/config_paths.R:
--------------------------------------------------------------------------------
1 | 
2 | main.folder   <- "../"
3 | work.folder <- file.path(main.folder, "work")
4 | rdata.folder  <- file.path(work.folder , "rdata")
5 | pdf.folder <- file.path(work.folder , "pdfs")
6 | rout.folder <- file.path(work.folder, "rout")
7 | 


--------------------------------------------------------------------------------
/simulations/code/main.R:
--------------------------------------------------------------------------------
  1 | # This is the main script which computes the base forecasts as well as all the revised forecasts from the other methods.
  2 | # The arguments are:
  3 | # experiment: the experiment you want to perform ("small", "newlarge")
  4 | # marg: the distribution of the errors ("norm", "t")
  5 | # T_learn: the size of the learning set (training + validation)
  6 | # M: the number of observations per samples (when sampling from the predictive distributions)
  7 | # idjob: the job id (useful when using distributed computing, see runsim.sh)
  8 | # nb_simulations: the nnumber of simulations to perform
  9 | # nbcores: the number of cores used by the job
 10 | # use.trueparam: do you want to use the true AR parameters? (TRUE, FALSE)
 11 | rm(list = ls())
 12 | assign("last.warning", NULL, envir = baseenv())
 13 | args = (commandArgs(TRUE))
 14 | if(length(args) == 0){
 15 |   experiment <- "newlarge"
 16 |   marg <- "norm"
 17 |   T_learn <- 500
 18 |   M <- 500
 19 |   idjob <- 1986
 20 |   nb_simulations <- 1000
 21 |   nbcores <- 1
 22 |   use.trueparam <- TRUE
 23 | }else{
 24 |   
 25 |   for(i in 1:length(args)){
 26 |     print(args[[i]])
 27 |   }
 28 |   
 29 |   experiment <- args[[1]]
 30 |   marg <- args[[2]]
 31 |   T_learn <- as.integer(args[[3]])
 32 |   M <- as.integer(args[[4]])
 33 |   use.trueparam <- as.logical(args[[5]])
 34 |   idjob <- as.integer(args[[6]])
 35 |   nb_simulations <- as.integer(args[[7]])
 36 |   nbcores <- as.integer(args[[8]])
 37 | }
 38 | 
 39 | set.seed(idjob)
 40 | 
 41 | graphics.off()
 42 | 
 43 | source("config_paths.R")
 44 | source("packages.R")
 45 | source("bights.R")
 46 | source("simulate.R")
 47 | source("utils.R")
 48 | source("mint.R")
 49 | source("code.R")
 50 | source("permutations.R")
 51 | source("aggregation.R")
 52 | source("nicefigs.R")
 53 | 
 54 | tag <- "jasa"
 55 | do.recovery <- TRUE
 56 | 
 57 | fixed.dgp <- FALSE
 58 | do.bootstrap.residuals <- TRUE
 59 | do.correction <- FALSE
 60 | 
 61 | print(fixed.dgp)
 62 | print(use.trueparam)
 63 | print(do.bootstrap.residuals)
 64 | print(do.correction)
 65 | 
 66 | if(use.trueparam){
 67 |   print("True parameters are used at the bottom and the top levels !")
 68 | }
 69 | 
 70 | print(experiment)
 71 | print(marg)  
 72 | print(M)
 73 | 
 74 | q_probs <- runif(M)
 75 | n_true_paths <- M
 76 | n_forecast_paths <- M
 77 | 
 78 | H <- 2
 79 | mc.cores.basef <- nbcores
 80 | refit_step <- 40
 81 | T_test <- ifelse(do.recovery, H, 500)
 82 | T_all   <- T_learn + T_test
 83 | n_warm  <- 500
 84 | n_simul <- T_all
 85 | 
 86 | print(T_learn)
 87 | 
 88 | obj.path <- NULL 
 89 | if(do.recovery){
 90 |     obj.path <- list(npaths = n_true_paths)
 91 | }
 92 | 
 93 | config_forecast_bot <- list(fit_fct = auto.arima, refit_fct = Arima, 
 94 |                             param_fit_fct = list(seasonal = FALSE, stationary = TRUE, approximation = FALSE, ic = "bic"),  # max.p = 2, max.q = 2, 
 95 |                             param_refit_fct = list(use.initial.values = TRUE),
 96 |                             param_forecast = list( bootstrap = do.bootstrap.residuals, npaths = n_forecast_paths ))
 97 | 
 98 | config_forecast_agg <- list(fit_fct = auto.arima, refit_fct = Arima, 
 99 |                             param_fit_fct = list(seasonal = FALSE, stationary = TRUE, approximation = FALSE, ic = "bic"), 
100 |                             param_refit_fct = list(use.initial.values = TRUE),
101 |                             param_forecast = list( bootstrap = do.bootstrap.residuals, npaths = n_forecast_paths ))
102 | 
103 | list_simulations  <- vector("list", nb_simulations)
104 | for(i in seq_along(list_simulations)){
105 |   
106 |   print(paste(i, " - Start ALL -", base::date(), sep = ""))
107 |   
108 |   if(experiment == "newlarge"){
109 |     obj_simul <- list()
110 |     obj_simul$param <- NULL
111 |     obj_simul$param$ar_param <- obj_simul$param$ma_param <- NULL
112 |     
113 |     ngroups <- 25
114 |     list_params <- vector("list", ngroups)
115 |     
116 |     res <- NULL
117 |     for(igroup in seq(ngroups)){
118 |       NM <- cbind(paste("T", igroup, sep = "") , 
119 |                   c( paste("A", igroup, sep = "") , paste("B", igroup, sep = "") , 
120 |                      paste("C", igroup, sep = "") , paste("D", igroup, sep = "")   ))
121 |       res <- rbind(res, NM)
122 |     }
123 |     res <- cbind("T", res)
124 |     
125 |     tags <- cbind(res[, 1], sapply(seq(2, ncol(res)), function(j)(
126 |       apply(res[, seq(j)], 1, paste, collapse = "")
127 |     )))
128 |     
129 |     myinfo <- makeINFO2(tags)
130 |     A <- myinfo$A
131 |     
132 |     bts <- matrix(NA, nrow = n_simul, ncol = ngroups * 4)
133 |     sample_paths_bottom <- array(NA, c(H, ngroups * 4, obj.path$npaths))
134 |     for(igroup in seq(ngroups)){
135 |       res_sim <- simulate_small_hts(n_simul, marg, obj.path)
136 |       obj_simul$param$ar_param <- c(obj_simul$param$ar_param, res_sim$param$ar_param)
137 |       obj_simul$param$ma_param <- c(obj_simul$param$ma_param, res_sim$param$ma_param)
138 |       
139 |       id <- seq((igroup - 1) * 4 + 1, (igroup - 1) * 4 + 4 )
140 |       bts[, id] <- res_sim$bts
141 |       sample_paths_bottom[, id, ] <- res_sim$sample_paths
142 |     }
143 |     
144 |   }else if(experiment == "small"){
145 |     obj_simul <- simulate_small_hts(n_simul, marg, obj.path)
146 |     bts <- obj_simul$bts
147 |     
148 |     NM <- cbind(rep("T", 4), rep(c("A", "B"), each = 2), rep(c("A", "B"), 2))
149 |     tags <- cbind(NM[, 1], sapply(seq(2, ncol(NM)), function(j)(
150 |       apply(NM[, seq(j)], 1, paste, collapse = "")
151 |     )))
152 |     
153 |     myinfo <- makeINFO(tags)
154 |     A <- myinfo$A
155 |     sample_paths_bottom <- obj_simul$sample_paths
156 |     
157 |   }
158 |   ###########
159 |   my_bights <- bights(bts, A)
160 |   my_bights$itree <- myinfo$itree
161 |   my_bights$bnames <-  myinfo$bottomSeries
162 |   my_bights$anames <-  myinfo$aggSeries
163 |   my_bights$allnames <- c(myinfo$aggSeries, myinfo$bottomSeries)
164 |   
165 |   
166 |   sample_paths <- sapply(seq(H), function(h){
167 |     as.matrix(my_bights$S %*% sample_paths_bottom[h, , ])
168 |   }, simplify = 'array')
169 |   
170 |   print(paste("Data simulated: ", " ", base::date()))
171 |   
172 |   P_BU <- pbu(my_bights)
173 |   
174 |   infofile <- file.path(rdata.folder, paste("info_", experiment, ".Rdata", sep = ""))
175 |   save(file =  infofile, list = c("my_bights"))
176 | 
177 |   list_subsets_test <- lapply(seq(T_learn, T_all - H), function(i){c(i - T_learn + 1, i)})
178 |   
179 |   
180 |   results <- makebf(my_bights, list_subsets_test, H = H, 
181 |                     config_forecast_agg = config_forecast_agg, config_forecast_bot = config_forecast_bot, 
182 |                     refit_step = refit_step, mc.cores = mc.cores.basef)
183 |   
184 |   e_hat <- results$allresiduals
185 |   row.names(e_hat) <-  my_bights$allnames
186 |   e_hat <- e_hat[, sample(seq(ncol(e_hat)), M)]
187 |   
188 |   mint_methods <- list(mintdiagonal = "diagonal", mintshrink = "shrink")
189 |   list_MINT <- lapply(mint_methods, function(wmethod){
190 |     compute_pmint(my_bights, method = wmethod, residuals = e_hat)
191 |   })
192 |   
193 |   
194 |   # MINT 
195 |   res_allmint <- lapply(list_MINT, function(mylist){
196 |     res_obj <- mylist
197 |     means_mint <- sapply(seq(H), function(h){
198 |       as.matrix(my_bights$S %*% res_obj$P_MINT %*%  results$allmf[, , h])
199 |     }, simplify = "array")
200 |     variances_mint <- diag(res_obj$V)
201 |     list(means_mint = means_mint, variances_mint = variances_mint)
202 |   })
203 |   
204 |   e_tilde <- my_bights$S %*% list_MINT$mintshrink$P_MINT %*% e_hat
205 |   
206 |   # Make permutations
207 |   obj_permutations_ehat <- make_permutations(my_bights, t(e_hat))
208 |   obj_permutations_etilde <- make_permutations(my_bights, t(e_tilde))
209 |   
210 |   
211 |   n_test <- length(list_subsets_test)
212 |   test_depbu_samples <- test_base_samples <- test_mint_samples <- test_indepbu_samples <- vector("list", n_test)
213 |   test_results <- vector("list", n_test)
214 |   for(itest in seq(n_test)){
215 |     
216 |     if(itest %% 100 == 0)
217 |       print(itest)
218 |     
219 |     samples_almint <- lapply(res_allmint, function(res_mint){
220 |       mint_samples <- sapply(seq(my_bights$nts), function(j){
221 |         sapply(seq(H), function(h){
222 |           m <- res_mint$means_mint[j, itest, h]
223 |           v <- res_mint$variances_mint[j]
224 |           qnorm(q_probs, m, sqrt(v))
225 |         })
226 |       }, simplify = "array")
227 |       mint_samples <- aperm(mint_samples, c(1, 3, 2))
228 |     })
229 |     
230 |     test_results[[itest]] <- c(test_results[[itest]], samples_almint)
231 |     
232 |     # sample from all basef
233 |     base_samples <- sapply(seq(my_bights$nts), function(i){
234 |       sapply(seq(H), function(h){
235 |         qf <- results$allqf[[i]][[itest]][, h]
236 |         sample(qf)
237 |       })
238 |     }, simplify = "array")
239 |     base_samples <- aperm(base_samples, c(1, 3, 2))
240 |     
241 |     test_results[[itest]]$base <- base_samples
242 |     
243 |     # DEP-BU AND INDEP-BU
244 |     depbu_samples <- indepbu_samples <- depbumint_samples <- array(NA, dim(base_samples))
245 |     for(h in seq(H)){
246 |       samples_h <- base_samples[, , h] 
247 |       
248 |       # DEP-BU
249 |       permuted_samples_bottom <- permutate_samples(my_bights, samples_h, obj_permutations_ehat$list_matpermutations) # ATTENTION ICI
250 |       tpermuted_samples_bottom <- t(permuted_samples_bottom)
251 |       depbu_samples[, , h] <- as.matrix(t(my_bights$S %*% tpermuted_samples_bottom))
252 |       
253 |       if(do.correction){
254 |         permuted_samples_bottom <- permutate_samples(my_bights, samples_h, obj_permutations_etilde$list_matpermutations) 
255 |         tpermuted_samples_bottom <- t(permuted_samples_bottom)
256 |       }
257 |       
258 |       # DEP-BU + MINT
259 |       btilde <- P_BU %*% res_allmint[["mintshrink"]]$means_mint[, itest, h]
260 |       meanrevised_samples_bottom <- tpermuted_samples_bottom - apply(permuted_samples_bottom, 2, mean) + btilde
261 |       if(do.correction){
262 |         meanrevised_samples_bottom <- (meanrevised_samples_bottom/apply(permuted_samples_bottom, 2, sd) ) * sqrt(P_BU %*% res_allmint[["mintshrink"]]$variances_mint)
263 |       }
264 |       depbumint_samples[, , h] <- as.matrix(t(my_bights$S %*% meanrevised_samples_bottom))
265 |       
266 |       # INDEP-BU
267 |       indepbu_samples[, , h] <- as.matrix(t(my_bights$S %*% P_BU %*% t(samples_h)))
268 |     }
269 |     
270 |     test_results[[itest]]$depbu <- depbu_samples
271 |     test_results[[itest]]$depbumint <- depbumint_samples
272 |     test_results[[itest]]$indepbu <- indepbu_samples
273 |   
274 |   }# test
275 |   
276 |   
277 |   
278 |   if(do.recovery){
279 |       list_simulations[[i]] <- list(samples_pred = test_results[[1]], samples_true = sample_paths)
280 |   }else{
281 |     err_test <- lapply(seq(H), function(h){
282 |       lapply(seq(n_test), function(itest){ # itest
283 |         lapply(test_results[[itest]], function(mat_method){ # imethod
284 |           obs <- results$allfuture[, itest, h]
285 |           X <- mat_method[, , h]
286 |           
287 |           crps <- compute_crps(X, obs)
288 |           
289 |           squared_erors <- (apply(X, 2, mean) - obs)^2
290 |           
291 |           qs <- compute_qscores(X, obs)
292 |           qs_tails <- apply(qs, 2, function(x){ mean(x * weights_tails) })
293 |           qs_uniform <- apply(qs, 2, function(x){ mean(x * weights_uniform) })
294 |           qs_rtail <- apply(qs, 2, function(x){ mean(x * weights_rtail) })
295 |           qs_ltail <- apply(qs, 2, function(x){ mean(x * weights_ltail) })
296 |           
297 |           # DO NOT SAVE qs (too big)
298 |           list(crps = crps, squared_erors = squared_erors, qs_tails = qs_tails, qs_rtail = qs_rtail, qs_ltail = qs_ltail, qs_uniform = qs_uniform)
299 |         })
300 |       })
301 |     })
302 |     list_simulations[[i]] <- err_test
303 |     
304 |   }
305 |     if(i%%5 == 0){
306 |       filetosave <- nameResfile(experiment, marg, T_learn, M, use.trueparam, idjob, tag)
307 |       save(file = filetosave, list = c("list_simulations"))
308 |     }
309 |   
310 | }
311 |   filetosave <- nameResfile(experiment, marg, T_learn, M, use.trueparam, idjob, tag)
312 |   save(file = filetosave, list = c("list_simulations"))
313 | 


--------------------------------------------------------------------------------
/simulations/code/mint.R:
--------------------------------------------------------------------------------
 1 | # All the functions needed to compute the MinT forecasts.
 2 | compute_pmint <- function(objhts, method = "diagonal", residuals){
 3 |   J <- Matrix(cbind(matrix(0, nrow = objhts$nbts, ncol = objhts$nts - objhts$nbts), diag(objhts$nbts)), sparse = TRUE)
 4 |   U <- Matrix(rbind(diag(objhts$nts - objhts$nbts), -t(objhts$A)), sparse = TRUE)
 5 |   P_BU <- cbind(matrix(0, objhts$nbts, objhts$nts - objhts$nbts), diag(objhts$nbts)) 
 6 |   
 7 |   #if(!is.null(residuals))
 8 |   #  R1 <- t(residuals)
 9 |   
10 |   R1 <- t(residuals)
11 |   
12 |   if(is.null(method))
13 |     method <- "diagonal"
14 |   
15 |   if(method == "diagonal"){
16 |     # Diagonal matrix
17 |     W <- Diagonal(x = vec_w(R1))
18 |   }else if(method == "shrink"){
19 |     # Shrunk matrix
20 |     target_diagonal <- lowerD(R1)
21 |     shrink_results <- shrink.estim(R1, target_diagonal)
22 |     W <- shrink_results$shrink.cov
23 |   }else if(method == "ols"){
24 |     W <- diag(objhts$nts)
25 |   }else if(method == "sample"){
26 |     n <- nrow(R1)
27 |     W <- crossprod(R1) / n
28 |     if(is.positive.definite(W)==FALSE)
29 |     {
30 |       stop("MinT needs covariance matrix to be positive definite.", call. = FALSE)
31 |     }
32 |   }
33 |   
34 |   MAT1 <- W %*% U
35 |   MAT2 <- crossprod(U,MAT1)
36 |   MAT3 <- tcrossprod(solve(MAT2), U)
37 |   C1 <- J %*% MAT1
38 |   P_MINT <- P_BU - C1 %*% MAT3
39 |   
40 |   S <- objhts$S
41 |   n_agg <- objhts$naggts
42 |   n_total <- objhts$nts
43 |   V <- S %*% P_MINT %*% W %*% t(P_MINT) %*% t(S)
44 |   
45 |   #V_agg <- diag(V)[seq(n_agg)]
46 |   #V_bot <- diag(V)[seq(n_agg + 1, n_total)]
47 |   #list(V_agg = V_agg, V_bot = V_bot)
48 |   
49 |   
50 |   list(P_MINT = P_MINT, W = W, V = V)
51 | }
52 | 
53 | shrink.estim <- function(x, tar)
54 | {
55 |   if (is.matrix(x) == TRUE && is.numeric(x) == FALSE)
56 |     stop("The data matrix must be numeric!")
57 |   p <- ncol(x)
58 |   n <- nrow(x)
59 |   
60 |   covm <- crossprod(x) / n
61 |   corm <- cov2cor(covm)
62 |   xs <- scale(x, center = FALSE, scale = sqrt(diag(covm)))
63 |   v <- (1/(n * (n - 1))) * (crossprod(xs^2) - 1/n * (crossprod(xs))^2)
64 |   diag(v) <- 0
65 |   corapn <- cov2cor(tar)
66 |   d <- (corm - corapn)^2
67 |   lambda <- sum(v)/sum(d)
68 |   lambda <- max(min(lambda, 1), 0)
69 |   shrink.cov <- lambda * tar + (1 - lambda) * covm
70 |   return(list(shrink.cov = shrink.cov, lambda = lambda ))
71 | }
72 | 
73 | vec_w <- function(x){
74 | 	n <- nrow(x)
75 | 	apply(x, 2, crossprod) / n
76 | }
77 | lowerD <- function(x)
78 | {
79 |   n <- nrow(x)
80 |   return(diag(vec_w(x)))
81 | }
82 | 


--------------------------------------------------------------------------------
/simulations/code/nicefigs.R:
--------------------------------------------------------------------------------
 1 | # Create nice R figures
 2 | 
 3 | savepdf <- function(file, width=16, height=10)
 4 | {
 5 |   .._fname <- paste(file,".pdf",sep="")
 6 |   pdf(.._fname, width=width/2.54, height=height/2.54,
 7 |       pointsize=10)
 8 |   par(mgp=c(2.2,0.45,0), tcl=-0.4, mar=c(3.3,3.6,1.1,1.1))
 9 |   assign(".._fname",.._fname,envir=.GlobalEnv)
10 | }
11 | endpdf <- function()
12 | {
13 |   dev.off()
14 |   system(paste("pdfcrop",.._fname,.._fname))
15 | }
16 | 


--------------------------------------------------------------------------------
/simulations/code/packages.R:
--------------------------------------------------------------------------------
 1 | library(MASS)
 2 | library(forecast)
 3 | library(Matrix)
 4 | library(SGL)
 5 | library(glmnet)
 6 | library(parallel)
 7 | library(matrixcalc)
 8 | 
 9 | library(igraph)
10 | library(plotrix)
11 | library(copula)
12 | library(sn)
13 | library(scoringRules)
14 | library(propagate)
15 | library(SuppDists)
16 | library(hts)


--------------------------------------------------------------------------------
/simulations/code/permutations.R:
--------------------------------------------------------------------------------
 1 | # This function computes the permutations needed by aggregation.R to restore the dependences between the variables of interest.
 2 | make_permutations <- function(my_bights, residuals){
 3 |   
 4 |   itree <- my_bights$itree
 5 |   mat_residuals <- residuals
 6 |   n_resid <- nrow(mat_residuals)
 7 |   
 8 |   # compute the parsing order of the aggregate nodes
 9 |   leaves <- V(itree)[degree(itree, mode="out") == 0]
10 |   agg_nodes <- V(itree)[degree(itree, mode="out") != 0]
11 |   all_nodes <- V(itree)
12 |   
13 |   stopifnot(all(names(all_nodes) == colnames(residuals)))
14 |   
15 |   list_matpermutations <- list_vecties <- vector("list", length(agg_nodes))
16 |   
17 |   for(inode in seq_along(agg_nodes)){
18 |     agg_node <- agg_nodes[inode]
19 |     idseries_agg <- names(agg_node)
20 |     children_nodes <- ego(itree, order = 1, nodes = agg_node, mode = "out")[[1]][-1]
21 | 
22 |     idchildren <- match(children_nodes, all_nodes)
23 |     mat_residuals <- residuals[, idchildren]
24 |     
25 |     vec_ties <- sapply(seq(ncol(mat_residuals)), function(j){
26 |       (nrow(mat_residuals) - length(unique(mat_residuals[, j]))) / nrow(mat_residuals)
27 |     }) * 100
28 |     
29 |     mat_residuals <- tail(mat_residuals, M)
30 |     mat_permutations <- apply(mat_residuals, 2, rank, ties.method = "random")
31 |     colnames(mat_permutations) <- names(children_nodes)
32 |     
33 |     list_matpermutations[[inode]] <- mat_permutations
34 |     list_vecties[[inode]] <- vec_ties
35 |   }
36 |   list_matpermutations <- setNames(list_matpermutations, names(agg_nodes))
37 |   list_vecties         <- setNames(list_vecties, names(agg_nodes))
38 |   list(list_matpermutations = list_matpermutations, list_vecties = list_vecties)
39 | }
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/simulations/code/runsim.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | full_path=$(realpath $0)
 4 | dir_path=$(dirname $full_path)
 5 | mypath=$(dirname $dir_path )
 6 | 
 7 | rscript="main.R"
 8 | 
 9 | experiment="small" && marg="norm" && Tlearning=500 && M=500 && Trueparam="FALSE"
10 | # experiment="newlarge" && marg="norm" && Tlearning=500 && M=500 && Trueparam="FALSE"
11 | # experiment="small" && marg="norm" && Tlearning=10000 && M=1000 && Trueparam="FALSE"
12 | # experiment="newlarge" && marg="norm" && Tlearning=10000 && M=1000 && Trueparam="FALSE"
13 | 
14 | nbsimul=84
15 | nbcores=3
16 | alljobs=$(seq 1 12)
17 | 
18 | for idjob in ${alljobs[@]}
19 | do
20 |       flag="$experiment-$Tlearning-$idjob"
21 |       echo "$flag"
22 |       Rscript --vanilla $rscript $experiment $marg $Tlearning $M $Trueparam $idjob $nbsimul $nbcores  > "$mypath/work/rout/$flag.Rout" 2> "$mypath/work/rout/$flag.err" &
23 | done
24 | 


--------------------------------------------------------------------------------
/simulations/code/simulate.R:
--------------------------------------------------------------------------------
  1 | # This script contains all the functions to simulate the AR models as well as the true distribution of the future observations.
  2 | transform_sample <- function(X, mu_theo, sd_theo){
  3 |   mu <- apply(X, 2, mean)
  4 |   sdeviation <- apply(X, 2, sd)
  5 |   t( ((t(X) - mu + mu_theo)/sdeviation) * sd_theo )
  6 | }
  7 | 
  8 | gen.innov <- function(n, marg, meanVec, covarianceMat){
  9 |   
 10 |   mycor <- diag(diag(covarianceMat)^(-.5)) %*% covarianceMat %*% diag(diag(covarianceMat)^(-.5)) 
 11 |   
 12 |   nvar <- ncol(covarianceMat)
 13 |   correlations <- mycor[lower.tri(mycor)]
 14 |   mycop <- normalCopula(correlations, dispstr = "un", dim = nvar)
 15 |   margins <-  rep(marg, nvar)
 16 |   if(marg == "norm"){
 17 |     parameters <- lapply(seq(nvar), function(i){
 18 |       list(mean = meanVec[i], sd = sqrt(covarianceMat[i, i]) )
 19 |     })
 20 |     #mvrnorm(n = n_simul, mus, Sigma = Sigma)
 21 |   }else if(marg == "t"){
 22 |     list_param  <- list(df = 6) #list(df = 4) # df = 10 too big
 23 |     parameters <- rep(list(list_param), nvar)
 24 |   }else if(marg == "sn"){
 25 |     list_param  <- list(alpha = -5, omega = 5)
 26 |     parameters <- rep(list(list_param), nvar)
 27 |   }else if(marg == "mst"){
 28 |     list_param  <- list(alpha = -20, omega = 3, nu = 7) 
 29 |     parameters <- rep(list(list_param), nvar)
 30 |   }else if(marg == "mixture"){
 31 |     probs <- c(0.3,0.7)
 32 |     mus <-  c(-2, 2)
 33 |     
 34 |     list_param <- list(probs = probs, mus = mus, sds = sqrt(c(1,1)) )
 35 |     parameters <- rep(list(list_param), nvar)
 36 |   }
 37 |   my.mvdc <- mvdc(mycop, margins, parameters)
 38 |   innov <- rMvdc(n, my.mvdc)
 39 |   #innov <- scale(innov, scale = F)
 40 | }
 41 | 
 42 | orderGen <- function(n.bot)
 43 | {
 44 |   
 45 |   order.diff <- rep(0, n.bot)
 46 |   order.ar <- sample(0:2, n.bot, replace = TRUE)
 47 |   order.ma <- sample(0:2, n.bot, replace = TRUE)
 48 |   order.d <- cbind(order.ar, order.diff, order.ma)
 49 |   
 50 |   
 51 |   ar.d <- matrix(, n.bot, 2)
 52 |   ma.d <- matrix(, n.bot, 2)
 53 |   
 54 |   for(j in 1:n.bot)
 55 |   {
 56 |     order.t <- order.d[j, ]
 57 |     
 58 |     # define AR coefficients
 59 |     ar.coeff <- c()
 60 |     if(order.t[1]==0)
 61 |     {
 62 |       ar.d[j, 1] <- NA
 63 |     }
 64 |     
 65 |     ar.coeff1 <- 0
 66 |     ar.coeff2 <- 0
 67 |     if(order.t[1]==1)
 68 |     {
 69 |       ar.coeff1 <- runif(1, 0.5, 0.7)
 70 |       ar.coeff <- ar.coeff1
 71 |       ar.d[j, 1] <- ar.coeff 
 72 |     }
 73 |     if(order.t[1]==2)
 74 |     {
 75 |       ar.coeff2 <- runif(1, 0.5, 0.7)
 76 |       lower.ar.b <- ar.coeff2 - 0.9
 77 |       upper.ar.b <- 0.9 - ar.coeff2
 78 |       ar.coeff1 <- runif(1, lower.ar.b, upper.ar.b)
 79 |       ar.coeff <- c(ar.coeff1, ar.coeff2)
 80 |       ar.d[j, 1:2] <- ar.coeff
 81 |     }
 82 |     
 83 |     # define MA coefficients
 84 |     ma.coeff <- c()
 85 |     if(order.t[3]==0)
 86 |     {
 87 |       ma.d[j, 1] <- NA
 88 |     }
 89 |     ma.coeff1 <- 0
 90 |     ma.coeff2 <- 0
 91 |     if(order.t[3]==1)
 92 |     {
 93 |       ma.coeff1 <- runif(1, 0.5, 0.7)
 94 |       ma.coeff <- ma.coeff1
 95 |       ma.d[j, 1] <- ma.coeff
 96 |     }
 97 |     if(order.t[3]==2)
 98 |     {
 99 |       ma.coeff2 <- runif(1, 0.5, 0.7)
100 |       lower.ma.b <- -1 * (0.9 + ma.coeff2) / ((0.9+0.7)/0.5)
101 |       upper.ma.b <- -1 * lower.ma.b
102 |       ma.coeff1 <- runif(1, lower.ma.b, upper.ma.b)
103 |       ma.coeff <- c(ma.coeff1, ma.coeff2)
104 |       ma.d[j, 1:2] <- ma.coeff
105 |     }
106 |   }
107 |   return(list(ar.d, order.d, ma.d))
108 | }
109 | 
110 | dataGen <- function(n, n.bot, var.mat, npaths)
111 | { 
112 |   order.gen <- ordergenCor(n.bot)
113 |   ar.d <- order.gen[[1]]
114 |   order.d <- order.gen[[2]]
115 |   ma.d <- order.gen[[3]]
116 |   data.bot <- matrix(NA, n, n.bot)
117 |   
118 |   if(!is.matrix(var.mat)){
119 |     nvar <- 1
120 |     Sigma <- matrix(var.mat)
121 |   }else{
122 |     nvar <- n.bot
123 |     Sigma <- var.mat
124 |   }
125 |   
126 |   if(nvar == 1){ # Common pattern
127 |     #innov_burnin   <- rmvnorm(n_warm, mean = rep(0, nvar), sigma = Sigma)
128 |     #innov_insample <- rmvnorm(n, mean = rep(0, nvar), sigma = Sigma)
129 |     #innov_future   <- rmvnorm(npaths * H, mean = rep(0, nvar), sigma = Sigma)
130 |     
131 |     innov_burnin   <- matrix(rnorm(n_warm, mean = 0, sd = Sigma), ncol = 1)
132 |     innov_insample <- matrix(rnorm(n, mean = 0, sd = Sigma), ncol = 1)
133 |     #innov_future   <- matrix(rnorm(npaths * H, mean = 0, sd = Sigma), ncol = 1)
134 |     
135 |     innov_future <- sapply(seq(H), function(h){
136 |       x <- matrix(rnorm(npaths, mean = 0, sd = Sigma), ncol = 1)
137 |       transform_sample(x, rep(0, nvar), sqrt(diag(Sigma)) )
138 |     }, simplify = "array")
139 | 
140 |   }else{
141 |     
142 |     allsd  <- sqrt(diag(Sigma))
143 |     innov_burnin <- matrix(NA, nrow = n_warm, ncol = nvar)
144 |     innov_insample <- matrix(NA, nrow = n, ncol = nvar)
145 |     innov_future <- array(NA, c(npaths, nvar, H))
146 |     for(j in seq(nvar)){
147 |       innov_burnin[, j] <- rnorm(n_warm, mean = 0, sd = allsd[j])
148 |       innov_insample[, j] <- rnorm(n, mean = 0, sd = allsd[j])
149 |       innov_future[, j , ] <- matrix(rnorm(npaths * H, mean = 0, sd = allsd[j]), nrow = npaths, ncol = H)
150 |     }
151 |     
152 |   }
153 |   
154 |   list_paths <- vector("list", nvar)
155 |   for(j in 1:nvar)
156 |   {
157 |     
158 |     # generating data from a ARIMA model
159 |     data.bot[, j] <- arima.sim(list(order = order.d[j, ], ar = na.omit(ar.d[j, ]), ma = na.omit(ma.d[j, ])), n, 
160 |                                n.start = n_warm, start.innov = innov_burnin[, j],  
161 |                                innov = innov_insample[, j])[(order.d[j, 2] + 1):(n + order.d[j, 2])]
162 |     
163 |     y <- head(data.bot[, j], -H) # IMPORTANT
164 |     model_path <- Arima(y, order = order.d[j, ], fixed = c(na.omit(ar.d[j, ]), na.omit(ma.d[j, ]), 0))
165 |     
166 |     
167 |     sample_paths <- sapply(seq(npaths), function(ipath){
168 |       #id <- seq((ipath - 1) * H + 1, ipath * H)
169 |       #simulate(model_path, future = TRUE, nsim = H, innov = innov_future[id, j])
170 |       simulate(model_path, future = TRUE, nsim = H, innov = innov_future[ipath, j, ])
171 |     }, simplify = "array")                         
172 |     list_paths[[j]] <-  sample_paths                        
173 |     
174 |   }
175 | 
176 |   spaths <- simplify2array(list_paths)
177 |   
178 |   return(list(data.bot, order.gen, spaths))
179 | }
180 | 
181 | ordergenCor <- ordergenCom <- ordergenNoise <-  orderGen 
182 | data.genCom <- data.genCor <- data.genNoise <- dataGen
183 | 
184 | 
185 | simulate_large_hts <- function(n, obj.path){
186 |   
187 |   npaths <- obj.path$npaths
188 |   
189 |   #nodes <- list(6, rep(4, 6), rep(4, 24), rep(4, 96), rep(4, 384))
190 |   #nodes <- list(6, rep(4, 6), rep(4, 24), rep(4, 96))
191 |   nodes <- list(6, rep(4, 6), rep(4, 24))
192 |   
193 |   gmat <- hts:::GmatrixH(nodes)
194 |   gmat <- apply(gmat, 1, table)
195 |   n.tot <- sum(unlist(nodes)) + 1
196 |   n.bot <- sum(nodes[[length(nodes)]])
197 |   
198 | 
199 | 
200 |   # Generating data for the common pattern
201 |   varCom <- 0.005
202 | ####  
203 |   obj.genCom <- data.genCom(n, 1, var = varCom, npaths)
204 |   
205 |   dataCom <- matrix(obj.genCom[[1]], ncol = 1) # ONE DIMENSIONAL
206 |   allCom <- dataCom[, rep(1, times = n.bot)] 
207 |   # Only 2 series out of 4 at the bottom level contains the common pattern
208 |   idxCom <- c(seq(1, n.bot, 4), seq(2, n.bot, 4))
209 |   allCom[, idxCom] <- 0
210 |   
211 |   
212 |   allCom.path <- sapply(seq(npaths), function(ipath){
213 |     datComP <- matrix(obj.genCom[[3]][, ipath, 1], ncol = 1)
214 |     allComP <- datComP[, rep(1, times = n.bot)]
215 |     allComP[, idxCom] <- 0
216 |     allComP
217 |   }, simplify = "array")
218 |   
219 |   # Adding noise to the common pattern
220 |   varRange <- list(0.4, 0.4, 0.4, 0.4, 0.4) # variance of the noise for each level (level 1 to level 5)
221 |   
222 |   # Generating data for the correlated pattern
223 |   bCor <- runif(n.bot/4, 0.3, 0.8) # block correlations
224 |   bCorMat <- lapply(bCor, function(x){
225 |     y <- matrix(x, 4, 4)
226 |     diag(y) = 1
227 |     return(y)
228 |   })
229 |   
230 |   corMatB <- bdiag(bCorMat) # correlation matrix at the bottom level
231 |   varVec <- runif(n.bot, 0.05, 0.1)
232 |   varMat <- as.matrix(Diagonal(x = sqrt(varVec)) %*% corMatB %*% Diagonal(x = sqrt(varVec))) # cov matrix
233 | #### 
234 |   
235 |   obj.genCor <- data.genCor(n, n.bot, varMat, npaths)
236 |   allCor <- obj.genCor[[1]] # generates data with correlated errors
237 |   
238 |   allCor.path <- obj.genCor[[3]]
239 |   
240 | 
241 |   if(FALSE){  
242 |     
243 |   # generates white noise errors for level 1 to level 4 (level 5: bottom level ignored)
244 |   noiseL <- noiseL.path <- list()
245 |   for(i in 1:(length(nodes)-1))
246 |   {
247 |     nodesLv <- sum(nodes[[i]])
248 |     nLv <- nodesLv / 2
249 |     var.mat <- diag(nLv)
250 |     diag(var.mat) <- rep(varRange[[i]], nLv)
251 | ####     
252 |     datL <- rmvnorm(n, rep(0, nLv), var.mat)
253 |     datL <- datL[, rep(1:ncol(datL), each = 2)] # replicating to get the data for -ve part
254 |     sign.vec <- rep(c(1, -1), nLv) # adding +/- into the data
255 |     datL <- t(t(datL) * sign.vec / as.numeric(gmat[[i+1]])) # contribution that passes to the bottom level
256 |     datL <- datL[, rep(1:ncol(datL), times = gmat[[i+1]])] # all noise series at the bottom level
257 |     noiseL[[i]] <- datL
258 | ####  
259 |     #datL.path <- rmvnorm(n, rep(0, nLv), var.mat)
260 |     noiseL.path[[i]] <- sapply(seq(npaths), function(ipath){
261 |       datLpath <- rmvnorm(H, rep(0, nLv), var.mat)
262 |       datLpath <- datLpath[, rep(1:ncol(datLpath), each = 2)] # replicating to get the data for -ve part
263 |       datLpath <- t(t(datLpath) * sign.vec / as.numeric(gmat[[i+1]])) # contribution that passes to the bottom level
264 |       datLpath <- datLpath[, rep(1:ncol(datLpath), times = gmat[[i+1]])] # all noise series at the bottom level
265 |       datLpath
266 |     }, simplify = "array")
267 |     
268 |   }
269 |   
270 |   
271 |   
272 |   # generate ARMA series for the noise at the bottom level
273 |   var.mat <- diag(n.bot/2)
274 |   diag(var.mat) <- rep(varRange[[length(nodes)]], sum(nodes[[length(nodes)]])/2)
275 | ####   
276 |   obj.genNoise <- data.genNoise(n, n.bot/2, var.mat, npaths)
277 |   
278 |   noiseB <- obj.genNoise[[1]]
279 |   noiseB <- noiseB[, rep(1:ncol(noiseB), each = 2)]
280 |   sign.vec <- rep(c(1, -1), n.bot/2)
281 |   noiseB <- t(t(noiseB) * sign.vec) 
282 |   noiseB[, idxCom] <- 0L # adding noise only to common component
283 |   
284 |   noiseB.path <- sapply(seq(npaths), function(ipath){
285 |     noiseBp <- obj.genNoise[[3]][, ipath, ]
286 |     noiseBp <- noiseBp[, rep(1:ncol(noiseBp), each = 2)]
287 |     sign.vec <- rep(c(1, -1), n.bot/2)
288 |     noiseBp <- t(t(noiseBp) * sign.vec) 
289 |     noiseBp[, idxCom] <- 0L
290 |     noiseBp
291 |     }, simplify = "array")
292 |   
293 |   }# IF FALSE
294 |   
295 |   # common + correlated + noise
296 |   if(FALSE){
297 |     allB <- allCom + allCor + Reduce("+", noiseL) + noiseB
298 |     
299 |     allB.path <- sapply(seq(npaths), function(ipath){
300 |       allCom.path[, , ipath] + allCor.path[, ipath, ] + Reduce("+", noiseL.path)[, , ipath] + noiseB.path[, , ipath]
301 |     }, simplify = "array")
302 |     sample_paths <- allB.path
303 |   }else if(TRUE){
304 |     allB <- allCom + allCor
305 |     allB.path <- sapply(seq(npaths), function(ipath){
306 |       allCom.path[, , ipath] + allCor.path[, ipath, ]
307 |     }, simplify = "array")
308 |     sample_paths <- allB.path
309 |     
310 |     #browser()
311 |   }else if(FALSE){
312 |     
313 |     varCom <- 0.005
314 |     obj.genCom <- data.genCom(n, 1, var = varCom, npaths)
315 |     dataCom <- matrix(obj.genCom[[1]], ncol = 1) # ONE DIMENSIONAL
316 |     allCom <- dataCom[, rep(1, times = n.bot)] 
317 |     # Only 2 series out of 4 at the bottom level contains the common pattern
318 |     idxCom <- c(seq(1, n.bot, 4), seq(2, n.bot, 4))
319 |     #allCom[, idxCom] <- 0
320 |     allCom[, idxCom] <- rnorm(nrow(allCom))
321 |     allCom.path <- sapply(seq(npaths), function(ipath){
322 |       datComP <- matrix(obj.genCom[[3]][, ipath, 1], ncol = 1)
323 |       allComP <- datComP[, rep(1, times = n.bot)]
324 |       #allComP[, idxCom] <- 0
325 |       allComP[, idxCom] <- rnorm(nrow(allComP))
326 |       allComP
327 |     }, simplify = "array")
328 |     
329 |     allB <- allCom 
330 |     allB.path <- sapply(seq(npaths), function(ipath){
331 |       allCom.path[, , ipath] 
332 |     }, simplify = "array")
333 |     sample_paths <- allB.path
334 |   }
335 |   
336 |   
337 | 
338 |   
339 |   dat.new <- ts(allB, frequency = 1L)
340 |   sim.hts <- hts(dat.new, nodes = nodes)
341 |   #ally <- allts(sim.hts)
342 |   S <- smatrix(sim.hts)
343 |   A <- head(S, nrow(S) - ncol(S))
344 |   
345 |   list(A = A, bts = allB, sim.hts = sim.hts, sample_paths = sample_paths)
346 | }
347 | 
348 | simulate_small_hts <- function(n_simul, marg = NULL, obj.path = NULL){
349 |   p <- 2; d <- 0; q <- 1
350 |   
351 |   ar_param <- ma_param <- vector("list", 4)
352 |   for(j in seq(4)){
353 |     if(fixed.dgp){
354 |       phi_2 <- 0.5327375 #runif(1, min = 0.5, max = 0.7)
355 |       phi_1 <- -0.2739356 #runif(1, min = phi_2 - 1, max = 1 - phi_2)
356 |       theta_1 <-  0.5469495 #runif(1, min = 0.5, max = 0.7)
357 |     }else{
358 |       phi_2 <- runif(1, min = 0.5, max = 0.7)
359 |       # !!!!! cause non-stationarity runif(1, min = phi_2 - 1, max = 1 - phi_2) 
360 |       # ROB EXPLANATION: if not using true parameters, there is a bias and close to boundary, makes it wordse
361 |       # phi_1 <- runif(1, min = phi_2 - 0.9, max = 0.9 - phi_2) 
362 |       phi_1 <- runif(1, min = phi_2 - 0.71, max = 0.71 - phi_2)
363 |       theta_1 <- runif(1, min = 0.5, max = 0.7)
364 |     }
365 |     ar_param[[j]] <- c(phi_1, phi_2); 
366 |     ma_param[[j]] <- c(theta_1)
367 |   }
368 |     
369 |   mean_process <- 0
370 |   mus <- rep(0, 4)
371 |   #a <- 3.3; 
372 |   #Sigma <-  rbind(c(a, 3, 2, 1), c(3, a, 2, 1), c(2, 2, a, 3), c(1, 1, 3, a))
373 |   varVec <- rep(1, 4) #varVec <- rep(2, 4)
374 |   
375 |   corMatB <- rbind(c(1, 0.7, 0.2, 0.3), 
376 |                    c(0.7, 1, 0.3, 0.2),
377 |                    c(0.2, 0.3, 1, 0.6),
378 |                    c(0.3, 0.2, 0.6, 1))
379 |   # corMatB <- diag(4)
380 |   
381 |   
382 |   Sigma <- as.matrix(Diagonal(x = sqrt(varVec)) %*% corMatB %*% Diagonal(x = sqrt(varVec)))
383 |   
384 |   innov_insample <- gen.innov(n_simul, marg = marg, mus, Sigma)
385 |   innov_burnin   <- gen.innov(n_warm, marg = marg, mus, Sigma)
386 |   
387 |   #n.start <- p + q
388 |   #start.innov <- rep(0, n.start)
389 |   
390 |   bts <- sapply(seq(4), function(j){
391 |     #arima.sim(n = n_simul, list(order = c(p, d, q), ar = ar_param, ma = ma_param), 
392 |     #          n.start = n.start, start.innov = start.innov, innov = innov_insample[, j])
393 |     arima.sim(n = n_simul, list(order = c(p, d, q), ar = ar_param[[j]], ma = ma_param[[j]]), 
394 |               n.start = n_warm, start.innov = innov_burnin[, j], innov = innov_insample[, j])
395 |   })
396 |   
397 |   if(!is.null(obj.path)){
398 |     stopifnot(T_test == H)
399 |     
400 |     npaths <- obj.path$npaths
401 |     
402 |     models_path <- lapply(seq(4), function(j){
403 |       y <- head(bts[, j], -H) # IMPORTANT !!!
404 |       Arima(y, order = c(p, d, q), fixed = c(ar_param[[j]], ma_param[[j]], mean_process))
405 |     })
406 |     
407 |     innov_future <-  gen.innov(npaths * H, marg = marg, mus, Sigma)
408 |     sample_paths <- sapply(seq(npaths), function(ipath){
409 |       #print(ipath)
410 |       #innov_future <-  gen.innov(H, marg = marg)
411 |       id <- seq((ipath - 1) * H + 1, ipath * H)
412 |       sapply(seq(4), function(j){
413 |         # simulate(models_path[[j]], future = TRUE, nsim = H, innov = innov_future[, j])
414 |         simulate(models_path[[j]], future = TRUE, nsim = H, innov = innov_future[id, j])
415 |       })
416 |     }, simplify = "array")
417 |     #innov_future <- mvrnorm(n = H, mus, Sigma = Sigma)
418 |     #paths <- t(replicate(npaths, simulate(model, future = TRUE, nsim = H, innov = innov_future[, j])))
419 |     
420 |   }else{
421 |     sample_paths <- NULL
422 |   }
423 |   
424 |   list(bts = bts, param = list(ar_param = ar_param, ma_param = ma_param), sample_paths = sample_paths)
425 | }
426 | 
427 | 
428 | 
429 | 
430 | 


--------------------------------------------------------------------------------
/simulations/code/utils.R:
--------------------------------------------------------------------------------
 1 | # Some useful functions.
 2 | nameResfile <- function(experiment, marg, T_learn, M, use.trueparam, idjob, tag){
 3 |   file.path(rdata.folder, 
 4 |             paste(experiment, "_", marg, "_", T_learn, "_", M, "_", use.trueparam, "_", idjob, "_", tag, ".Rdata", sep = ""))
 5 | }
 6 | 
 7 | namePdffile <- function(experiment, marg, T_learn, M, use.trueparam, tag){
 8 |   file.path(pdf.folder, 
 9 |             paste(experiment, "_", marg, "_", T_learn, "_", M, "_",  use.trueparam, "_", tag, sep = ""))
10 | }
11 | 
12 | pbu <- function(objhts){
13 |   P_BU <- Matrix(0, nrow = objhts$nbts, ncol = objhts$nts, sparse = T)
14 |   P_BU[cbind(seq(objhts$nbts), seq(objhts$nts - objhts$nbts + 1, objhts$nts)) ] <- 1
15 |   P_BU
16 | }
17 | 
18 | compute_crps <- function(X, obs){
19 |   sapply(seq(ncol(X)), function(j){
20 |     crps_sample(y = obs[j], dat = X[, j]) # from ScoringRules package
21 |   })
22 | }
23 | 
24 | compute_qscores <- function(X, obs){
25 |   X_sorted <- apply(X, 2, sort)
26 |   sapply(seq(ncol(X)), function(j){
27 |     qf <- X_sorted[, j]
28 |     2 * ((obs[j] <= qf) - q_probs) * (qf - obs[j])      
29 |   })
30 | }


--------------------------------------------------------------------------------
/smart-meters/code/MinT_ecov.R:
--------------------------------------------------------------------------------
  1 | # This script computes the covariance of the one-step-ahead insample residuals (matrix W) for MinT.
  2 | rm(list = ls())
  3 | source("config_paths.R")
  4 | source("config_general.R")
  5 | source("config_splitting.R")
  6 | source("utils.R")
  7 | source("utils_hts.R")
  8 | 
  9 | library(Matrix)
 10 | library(igraph)
 11 | 
 12 | load(file.path(work.folder, "myinfo.Rdata"))
 13 | 
 14 | n_bottom <- length(bottomSeries)
 15 | n_total <- n_agg + n_bottom
 16 | 
 17 | R_onestep <- matrix(NA, nrow = length(learn$id) - n_past_obs_kd, ncol = n_total)
 18 | 
 19 | for(do.agg in c(TRUE, FALSE)){
 20 |   if(do.agg){
 21 |     set_series <- aggSeries
 22 |     algo <- algo.agg
 23 |   }else{
 24 |     set_series <- bottomSeries
 25 |     algo <- algo.bottom
 26 |   }
 27 |   
 28 |   mat_residuals <- sapply(seq_along(set_series), function(j){
 29 |     if(j%%100 == 0)
 30 |     print(j)
 31 |     
 32 |     idseries <- set_series[j]
 33 |     if(algo == "KD-IC-NML"){
 34 |       resid_MINT_file <- file.path(insample.folder, algo, paste("residuals_MINT_", idseries, "_", algo, ".Rdata", sep = "")) 
 35 |       load(resid_MINT_file) # residuals_MINT
 36 |       e_vec <- c(rep(NA, n_past_obs_kd), residuals_MINT)
 37 |     }else if(algo == "DYNREG" || algo == "DETS"){
 38 |       resid_MINT_file <- file.path(insample.folder, algo, paste("residuals_MINT_", idseries, "_", algo, "_", 1, ".Rdata", sep = "")) 
 39 |       load(resid_MINT_file)
 40 |       e_vec <- residuals_MINT
 41 |     }
 42 |     e_vec
 43 |   })
 44 |   print(dim(mat_residuals))
 45 |   mat_residuals <- tail(mat_residuals, -n_past_obs_kd)
 46 |   if(do.agg){
 47 |     R_onestep[, seq(n_agg)] <- mat_residuals
 48 |   }else{
 49 |     R_onestep[, seq(n_agg + 1, n_total)] <- mat_residuals
 50 |   }
 51 | }
 52 | 
 53 | covmethods  <- c("shrink", "blockdiagonalshrink", "diagonal", "sample")
 54 | 
 55 | for(covmethod in covmethods){
 56 |   W1file <- file.path(work.folder, "wmatrices", paste("W1_", algo.agg, "_", algo.bottom, "_", covmethod, ".Rdata", sep = "")) 
 57 |   if(covmethod == "diagonal"){
 58 |     W1 <- Diagonal(x = vec_w(R_onestep))
 59 |   }else if(covmethod == "sample"){
 60 |     W1 <- crossprod(R_onestep) / nrow(R_onestep)
 61 |   }else if(covmethod == "shrink"){
 62 |     target_diagonal <- lowerD(R_onestep)
 63 |     shrink_results <- shrink.estim(R_onestep, target_diagonal)
 64 |     W1 <- shrink_results$shrink.cov
 65 |   }else if(covmethod == "blockdiagonalshrink"){
 66 |     load(file.path(work.folder, "wmatrices", paste("W1_", algo.agg, "_", algo.bottom, "_", "shrink", ".Rdata", sep = "")))
 67 |     W1_bdiagonal <- W1
 68 |     W1_bdiagonal[seq(n_agg), seq(n_agg + 1, n_total)] <- 0
 69 |     W1_bdiagonal[seq(n_agg + 1, n_total), seq(n_agg)] <- 0
 70 |     W1 <- W1_bdiagonal
 71 |   }else{
 72 |     stop("error !")
 73 |   }
 74 |   save(file = W1file, list = c("W1"))
 75 | }
 76 | 
 77 | if(TRUE){
 78 |   allh <- tail(calendar$periodOfDay[learn$id], -n_past_obs_kd)
 79 |   
 80 |   for(h in seq(48)){
 81 |     
 82 |     #id <- which(allh == h)
 83 |     seth <- c(h -1, h, h + 1)
 84 |     if(h == 1){
 85 |       seth <- c(48, 1, 2)
 86 |     }else if(h == 48){
 87 |       seth <- c(47, 48, 1)
 88 |     }
 89 |     stopifnot(h %in% seq(48))
 90 |     
 91 |     id <- which(allh %in% seth)
 92 |     
 93 |     R <- R_onestep[id, ]
 94 |     
 95 |     # shrink
 96 |     Whfile <- file.path(work.folder, "wmatrices", paste("W_", h, "_", algo.agg, "_", algo.bottom, "_", "shrink", ".Rdata", sep = ""))
 97 |     target_diagonal <- lowerD(R)
 98 |     shrink_results <- shrink.estim(R, target_diagonal)
 99 |     W1 <- shrink_results$shrink.cov
100 |     save(file = Whfile, list = c("W1"))
101 |     
102 |     # diagonal
103 |     Whfile <- file.path(work.folder, "wmatrices", paste("W_", h, "_", algo.agg, "_", algo.bottom, "_", "diagonal", ".Rdata", sep = ""))
104 |     W1 <- Diagonal(x = vec_w(R))
105 |     save(file = Whfile, list = c("W1"))
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/smart-meters/code/README.md:
--------------------------------------------------------------------------------
 1 | # Source code for the smart meters experiments of the paper *Hierarchical Probabilistic Forecasting of Electricity Demand with Smart Meter Data* by Ben Taieb, Souhaib, Taylor, James, and Hyndman, Rob.
 2 | 
 3 | The code computes all the results in Section 6, and produces all the figures in the paper.
 4 | 
 5 | # Usage
 6 | 
 7 | 1. Downnload the following files: *edrp_elec.csv*, *edrp_metadata.xlsx* and *edrp_geography_data.xlsx* from https://discover.ukdataservice.ac.uk/catalogue/?sn=7591 (registration required).
 8 | 2. Save these files in "../rawdata"
 9 | 3. Apply a preprocessing step and save the results in new Rdata files (slow).
10 | ```
11 | source("init_1_raw_meters.R")
12 | source("init_2_myHierarchy.R")
13 | source("init_3_bottom_meters.R")
14 | source("init_4_aggregated_meters.R")
15 | ```
16 | 3. Specify the scenario for the bottom-level forecasts (see run_basef.sh) and run the following script (slow):
17 | ```
18 | ./run_basef.sh
19 | ```
20 | 3. Specify the scenario for the aggregate-level forecasts (see run_basef.sh) and run the script again (slow):
21 | ```
22 | ./run_basef.sh
23 | ```
24 | 4. Compute the covariance matrix W for the MinT method. 
25 | ```
26 | source("MinT_ecov.R")
27 | ```
28 | 5. Compute the permutations (empirical copulas).
29 | ```
30 | source("permutations.R")
31 | ```
32 | 6. Merge the results by half hour.
33 | ```
34 | source("makebf_byhalfhour.R")
35 | ```
36 | 7. Compute the forecasts for the aggregated series (slow).
37 | ```
38 | ./run_aggregation.sh
39 | ```
40 | 8. Merge the aggregation results.
41 | ```
42 | source("aggregation_merge.R")
43 | ```
44 | 9. Produce the figures with all the accuracy measures.
45 | ```
46 | source("results.R")
47 | ```
48 | 
49 | To produce all the Figures, run the following scripts:
50 | ```
51 | source("plot_calendar_effects.R")
52 | source("plot_coherency.R")
53 | source("plot_coverage.R")
54 | source("plot_forecasts.R")
55 | source("plot_parameters.R")
56 | source("plot_series.R")
57 | source("plot_tree.R")
58 | ```
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/smart-meters/code/aggregation.R:
--------------------------------------------------------------------------------
  1 | # This script will use the results generated by the script "permutations.R" to compute the forecasts for the aggregated series.
  2 | # This script allows to distribute the computations, and compute the aggragations only for a set of observations in the test set. 
  3 | # By doing so, we can avoid computing it for all the 4416 observations in the test set in one shot.
  4 | # This script also computes the coverage of the 50% and 90% prediction intervals, and save it in Rdata files.
  5 | # The arguments are
  6 | # idjob: the id of the job
  7 | # allidtest: the set of observations for which we want to compute the aggregations
  8 | rm(list = ls())
  9 | args = (commandArgs(TRUE))
 10 | if(length(args) == 0){
 11 |   idjob <- 1
 12 |   allidtest <- seq(48) #1:123 #1:4 #1:1104 
 13 | }else{
 14 |   idjob <- as.numeric(args[[1]])
 15 |   allidtest <- NULL
 16 |   for(i in seq(2, length(args))){
 17 |     allidtest <- c(allidtest, as.numeric(args[[i]]))
 18 |   }
 19 | }
 20 | source("config_paths.R")
 21 | source("config_general.R")
 22 | source("config_splitting.R")
 23 | source("utils.R")
 24 | library(igraph)
 25 | library(Matrix)
 26 | library(quadprog)
 27 | 
 28 | set.seed(1986)
 29 | 
 30 | computeParam <- function(m, v){
 31 |   list(mulog = log(m/sqrt(1+(v/m^2))), sdlog = sqrt(log(1+(v/m^2))))
 32 | }
 33 | 
 34 | compute_crps <- function(methods, n, mat_samples, observations){
 35 |   res <- sapply(seq_along(methods), function(imethod){
 36 |     sapply(seq(n), function(i){
 37 |       crps_sampling(mat_samples[, i, imethod], observations[i])
 38 |     })
 39 |   })
 40 |   colnames(res) <- methods
 41 |   res
 42 | }
 43 | 
 44 | weighted_crps <- function(qscores, weights){
 45 |   sum(qscores * weights)/length(qscores)
 46 | }
 47 | 
 48 | compute_qscores <- function(methods, n, mat_samples, observations){
 49 |   sorted_samples <- apply(mat_samples, c(2, 3), sort)
 50 |   qscores <- sapply(seq(n), function(i){
 51 |     obs <- observations[i]
 52 |     sapply(seq_along(methods), function(imethod){
 53 |       qf <- sorted_samples[, i, imethod]
 54 |       2 * ((obs <= qf) - q_probs) * (qf - obs)      
 55 |     })
 56 |   }, simplify = 'array')
 57 |   qscores
 58 | }
 59 | 
 60 | print(allidtest)
 61 | 
 62 | load(file.path(work.folder, "myinfo.Rdata"))
 63 | n_bottom <- length(bottomSeries)
 64 | 
 65 | bot_methods <- c("BASE", "BASE-MINTshrink", "MINTdiag", "MINTshrink")
 66 | agg_methods <- c("BASE", "INDEPBU", "PERMBU", "PERMBU-MINTshrink", "INDEPBU-MINTshrink", "MINTdiag", "MINTshrink")
 67 | 
 68 | 
 69 | do.mint <- any(c(grepl("MINT", bot_methods), grepl("MINT", agg_methods)))
 70 | do.mintvar <- any(c(grepl("MINTdiag", bot_methods), grepl("MINTdiag", agg_methods), grepl("MINTshrink", bot_methods), grepl("MINTshrink", agg_methods)))
 71 | 
 72 | if(do.mint){
 73 |   #load(file.path(work.folder, "wmatrices", paste("W1_", algo.agg, "_", algo.bottom, "_", "shrink", ".Rdata", sep = "")))
 74 |   #W1shrink <- W1
 75 |   
 76 |   #load(file.path(work.folder, "wmatrices", paste("W1_", algo.agg, "_", algo.bottom, "_", "diagonal", ".Rdata", sep = "")))
 77 |   #W1diag <- W1
 78 |   
 79 |   J <- Matrix(cbind(matrix(0, nrow = n_bottom, ncol = n_agg), diag(n_bottom)), sparse = TRUE)
 80 |   U <- Matrix(rbind(diag(n_agg), -t(Sagg)), sparse = TRUE)
 81 | }
 82 | 
 83 | P_bu <- cbind(matrix(0, nrow = n_bottom, ncol = n_agg), diag(n_bottom))
 84 | n_total <- n_agg + n_bottom
 85 | weights_GTOP <- rep(1, n_agg + n_bottom)
 86 | Rmat <- diag(sqrt(weights_GTOP))
 87 | 
 88 | ##########
 89 | # compute the parsing order of the aggregate nodes
 90 | leaves <- V(itree)[degree(itree, mode="out") == 0]
 91 | agg_nodes <- V(itree)[degree(itree, mode="out") != 0]
 92 | 
 93 | depth_aggnodes <- sapply(agg_nodes, function(agg_node){
 94 |   vec <- distances(itree, agg_node, leaves, mode = "out")
 95 |   max( vec[which(vec!=Inf)])
 96 | })
 97 | 
 98 | ordered_agg_nodes_names <- names(sort(depth_aggnodes))
 99 | ordered_agg_nodes <- V(itree)[match(ordered_agg_nodes_names, V(itree)$name)]
100 | ##########
101 | 
102 | ntest <- length(test$id)
103 | 
104 | list_crps_agg <- list_wcrps_agg <- list_qscores_agg <- list_mse_agg <- vector("list", ntest)
105 | list_crps_bot <- list_wcrps_bot <- list_mse_bot  <- vector("list", ntest)
106 |   
107 | list_samples_agg <- vector("list", ntest)
108 | sum_overtest_qscores_agg <- sum_overtest_qscores_bot <- 0
109 | 
110 | # LOADING PERMUTATION FILE
111 | perm_file <- file.path(permutations.folder, paste("perm_", algo.agg, "_", algo.bottom, ".Rdata", sep = "")) 
112 | load(perm_file) # "list_matpermutations" "list_vecties"
113 | 
114 | # Generate samples
115 | for(idtest in allidtest){
116 |   print(idtest)
117 |   print(base::date())
118 |   
119 |   res_byidtest_file <- file.path(work.folder, "byidtest", paste("results_byidtest_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 
120 |   load(res_byidtest_file)
121 |   
122 |   iday <- getInfo(idtest)$iday
123 |   hour <- getInfo(idtest)$hour
124 |   
125 |   for(do.agg in c(TRUE, FALSE)){
126 |     if(do.agg){
127 |       set_series <- aggSeries
128 |       algo <- algo.agg
129 |       base_samples_agg <- matrix(NA, nrow = M, ncol = length(set_series))
130 |       colnames(base_samples_agg) <- set_series
131 |     }else{
132 |       set_series <- bottomSeries
133 |       algo <- algo.bottom
134 |       base_samples_bottom <- matrix(NA, nrow = M, ncol = length(set_series))
135 |       colnames(base_samples_bottom) <- set_series
136 |     }
137 |     for(j in seq_along(set_series)){
138 |       
139 |       idseries <- set_series[j]
140 |       
141 |       if(do.agg){
142 |         invcdf <- approxfun(taus, QF_agg_idtest[, j], rule = 2)
143 |         base_samples_agg[, j]    <- invcdf(q_probs)
144 |       }else{
145 |         invcdf <- approxfun(taus, QF_bottom_idtest[, j], rule = 2)
146 |         base_samples_bottom[, j] <- invcdf(q_probs)
147 |       }
148 |     }# series
149 |     
150 |   }# agg and bottom
151 |   
152 |   # rank_X <- apply(Q, 2, rank, ties.method = "random")
153 |   # I know that the rank of each observations is 1 --> M
154 |   perm_samples_bottom <- base_samples_bottom
155 |   variables <- colnames(perm_samples_bottom)
156 |   
157 |   mat_test <- NULL
158 |   # PERM-BU
159 |   for(inode in seq_along(ordered_agg_nodes)){
160 | 
161 |     agg_node <- ordered_agg_nodes[inode]
162 |     idseries_agg <- names(agg_node)
163 |     iagg <- match(idseries_agg, aggSeries)
164 |     children_nodes <- ego(itree, order = 1, nodes = agg_node, mode = "out")[[1]][-1]
165 |     nkids <- length(children_nodes)
166 |     
167 |     
168 |     # load permutation file
169 |     mat_permutations <- list_matpermutations[[idseries_agg]]
170 |     
171 |     ranks_historical <- mat_permutations
172 |     stopifnot(all(colnames(ranks_historical) == names(children_nodes)))
173 |     
174 |     depth_node <- depth_aggnodes[match(idseries_agg, names(depth_aggnodes))]
175 |     
176 |     samples_children <- matrix(NA, nrow = M, ncol = nkids)
177 |     
178 |     columns_agg <- which(children_nodes %in% agg_nodes)
179 |     columns_bottom <- which(children_nodes %in% leaves)
180 |     children_names <- names(children_nodes)
181 |     
182 |     # Extracting/computing the samples for each child
183 |     if(length(columns_agg) > 0){
184 |       id_agg_children  <- match(children_names[columns_agg], aggSeries)
185 |       samples_agg_children <- t(tcrossprod(Sagg[id_agg_children, , drop = F], perm_samples_bottom))
186 |       samples_children[, columns_agg] <- samples_agg_children
187 |     }
188 |     
189 |     if(length(columns_bottom) > 0){
190 |       id_bottom_children  <- match(children_names[columns_bottom], bottomSeries)
191 |       samples_children[, columns_bottom] <- perm_samples_bottom[, id_bottom_children]
192 |     }
193 |     
194 |     # Computing the ranks of the samples for each child
195 |     ranks_samples_children <- sapply(seq(ncol(samples_children)), function(j){
196 |       rank(samples_children[, j], ties.method = "random")
197 |     })
198 |     
199 |     index_mat <- sapply(seq(nkids), function(j){
200 |       res <- match(ranks_historical[, j], ranks_samples_children[, j])
201 |       stopifnot(all(!is.na(res)))
202 |       res
203 |     })
204 |     
205 |     # Permutating the rows
206 |     if(length(columns_bottom) > 0){
207 |       perm_samples_bottom[, id_bottom_children] <- sapply(seq_along(id_bottom_children), function(j){
208 |         perm_samples_bottom[index_mat[, columns_bottom[j]], id_bottom_children[j]]
209 |       })
210 |     }
211 |     
212 |     if(length(columns_agg) > 0){
213 |       res <- lapply(seq_along(id_agg_children), function(j){
214 |         id <- which(Sagg[id_agg_children[j], ] == 1)
215 |         #print(id)
216 |         #print("---")
217 |         perm_samples_bottom[index_mat[, columns_agg[j]], id, drop = F]
218 |       })
219 |       ids <- lapply(id_agg_children, function(id_agg_child){
220 |         which(Sagg[id_agg_child, ] == 1)
221 |       })
222 |       ids <- unlist(ids)
223 |       perm_samples_bottom[, ids] <- do.call(cbind, res)
224 |     }
225 | 
226 |   }# agg node
227 |   
228 |   ###### MINT ############
229 |   #  adjustments 
230 |   if(do.mint){
231 |     #print("MINT")
232 |     
233 |     hwanted <- idtest%%48
234 |     if(hwanted == 0)
235 |       hwanted <- 48
236 |     
237 |     Whfile <- file.path(work.folder, "wmatrices", paste("W_", hwanted, "_", algo.agg, "_", algo.bottom, "_", "shrink", ".Rdata", sep = ""))
238 |     load(Whfile)
239 |     W1shrink <- W1
240 |     
241 |     Whfile <- file.path(work.folder, "wmatrices", paste("W_", hwanted, "_", algo.agg, "_", algo.bottom, "_", "diagonal", ".Rdata", sep = ""))
242 |     load(Whfile)
243 |     W1diag <- W1
244 |   
245 |     b_hat <- mean_bottom_idtest
246 |     a_hat <- mean_agg_idtest 
247 |       
248 |     y_hat <- c(a_hat, b_hat)
249 |     adj_bottom_MINTshrink <- as.numeric(mint_betastar(W1shrink, y_hat = y_hat))
250 |     adj_agg_MINTshrink     <- as.numeric(Sagg %*% adj_bottom_MINTshrink) 
251 |     revisedMINTshrink_bottom_idtest <- mean_bottom_idtest + adj_bottom_MINTshrink
252 |     
253 |     adj_bottom_MINTdiag <- as.numeric(mint_betastar(W1diag, y_hat = y_hat))
254 |     revisedMINTdiag_bottom_idtest   <- mean_bottom_idtest + adj_bottom_MINTdiag
255 |     
256 |     
257 |     # MINT Variance
258 |     if(do.mintvar){
259 |       S <- rbind(Sagg, diag(n_bottom))
260 |       #P_shrink <- mint_pmatrix(W1shrink)
261 |       #V_MINTshrink <- S %*% P_shrink %*% W1shrink %*% t(P_shrink) %*% t(S)
262 |       #V_MINTshrink_agg <- diag(V_MINTshrink)[seq(n_agg)]
263 |       #V_MINTshrink_bot <- diag(V_MINTshrink)[seq(n_agg + 1, n_total)]
264 |       
265 |       myfct <- function(W){
266 |         P <- mint_pmatrix(W)
267 |         V <- S %*% P %*% W %*% t(P) %*% t(S)
268 |         V_agg <- diag(V)[seq(n_agg)]
269 |         V_bot <- diag(V)[seq(n_agg + 1, n_total)]
270 |         list(V_agg = V_agg, V_bot = V_bot)
271 |       }
272 |       res_shrink <- myfct(W1shrink)
273 |       V_MINTshrink_agg <- res_shrink$V_agg
274 |       V_MINTshrink_bot <- res_shrink$V_bot
275 |         
276 |       res_diag <- myfct(W1diag)
277 |       V_MINTdiag_agg <- res_diag$V_agg
278 |       V_MINTdiag_bot <- res_diag$V_bot
279 |       
280 |     }
281 |     # a_tilde_test <- Sagg %*% P_mint %*% y_hat
282 |     
283 |     # library(MASS)
284 |     # n_bottom
285 |     # Xstandard <- mvrnorm(n = M, rep(0, n_bottom), diag(n_bottom))
286 |     # s <- svd(Sigma)
287 |     # b_tilde <- as.numeric(P_mint %*% t(t(y_hat)))
288 |     # V_bottom <- V_mint[seq(n_agg + 1, n_total), seq(n_agg + 1, n_total)]
289 |     # X <- mvrnorm(n = M, b_tilde, V_bottom)
290 |     # Y <- t(Sagg %*% t(X))
291 |     
292 |    }
293 |   ########################
294 |   
295 |   ######################## BOTTOM
296 |   samples_bot <- array(NA, c(M, n_bottom, length(bot_methods)))
297 |   meanf_bot   <- matrix(NA, nrow = n_bottom, ncol = length(bot_methods))
298 |   for(ibot_method in seq_along(bot_methods)){
299 |     bot_method <- bot_methods[ibot_method]
300 |     if(bot_method == "BASE"){
301 |       samples_bot_method <- base_samples_bottom
302 |       meanf_bot_method <- mean_bottom_idtest
303 |     }else if(bot_method == "BASE-MINTshrink"){
304 |       samples_bot_method <- t(t(base_samples_bottom) + adj_bottom_MINTshrink)
305 |       meanf_bot_method <- revisedMINTshrink_bottom_idtest
306 |     }else if(bot_method == "BASE-MCOMB"){
307 |       samples_bot_method <- t(t(base_samples_bottom) + adj_bottom_mcomb)
308 |       meanf_bot_method <- revisedmean_bottom_idtest
309 |     }else if(bot_method == "BASE-MCOMBRECON"){
310 |       samples_bot_method <- t(t(base_samples_bottom) + adj_bottom_mcombrecon)
311 |       meanf_bot_method <- rev_and_reconcilied_bottom_mean_idtest
312 |     }else if(bot_method == "MINTshrink" || bot_method == "MINTdiag"){
313 |       
314 |       if(bot_method == "MINTshrink"){
315 |         meanf_bot_method <- revisedMINTshrink_bottom_idtest
316 |         varf_bot_method  <- V_MINTshrink_bot
317 |       }else if(bot_method == "MINTdiag"){
318 |         meanf_bot_method <- revisedMINTdiag_bottom_idtest
319 |         varf_bot_method  <- V_MINTdiag_bot
320 |       }
321 |       id_negative <- which(meanf_bot_method < 0)
322 |       meanf_bot_method[id_negative] <- mean_bottom_idtest[id_negative]
323 |       
324 |       samples_bot_method <- sapply(seq(n_bottom), function(ibot){
325 |         m <- meanf_bot_method[ibot]
326 |         v <- varf_bot_method[ibot]
327 |         resparam <- computeParam(m, v)
328 |         qlnorm(q_probs, resparam$mulog, resparam$sdlog)
329 |       })
330 |       
331 |     }else{
332 |       stop("error")
333 |     }
334 |     samples_bot[, , ibot_method] <- samples_bot_method
335 |     meanf_bot[, ibot_method] <- meanf_bot_method
336 |   }
337 | 
338 |   # MSE
339 |   mse_matrix_bottom <- (meanf_bot - obs_bottom_idtest)^2
340 |   list_mse_bot[[idtest]] <- mse_matrix_bottom
341 |   
342 |   # CRPS
343 |   botmethods_crps <- compute_crps(bot_methods, n_bottom, samples_bot, obs_bottom_idtest)
344 |   list_crps_bot[[idtest]] <- botmethods_crps
345 |   
346 |   # QS
347 |   qscores_bottom <- compute_qscores(bot_methods, n_bottom, samples_bot, obs_bottom_idtest)
348 |   sum_overtest_qscores_bot <- sum_overtest_qscores_bot + qscores_bottom
349 |   
350 |   ######################## AGG
351 |   samples_agg <- array(NA, c(M, n_agg, length(agg_methods)))
352 |   meanf_agg   <- matrix(NA, nrow = n_agg, ncol = length(agg_methods))
353 |   
354 |   for(iagg_method in seq_along(agg_methods)){
355 |     agg_method <- agg_methods[iagg_method]
356 |     if(agg_method == "BASE"){
357 |       samples_agg_method <- base_samples_agg
358 |       meanf_agg_method <- mean_agg_idtest
359 |     }else if(agg_method == "INDEPBU"){
360 |       samples_agg_method <- t(tcrossprod(Sagg, apply(base_samples_bottom, 2, sample)))
361 |       meanf_agg_method <- (Sagg %*% mean_bottom_idtest)
362 |     }else if(agg_method == "PERMBU"){
363 |       samples_agg_method <- t(tcrossprod(Sagg, perm_samples_bottom))
364 |       meanf_agg_method <- (Sagg %*% mean_bottom_idtest)
365 |     #}else if(agg_method == "INDEPBU-MINT"){
366 |     #  samples_agg_method <- t(t(samples_agg[, , match("INDEPBU", agg_methods)]) + adj_agg_MINT)
367 |     }else if(agg_method == "PERMBU-MINTshrink"){
368 |       samples_agg_method <- t(t(samples_agg[, , match("PERMBU" , agg_methods)]) + adj_agg_MINTshrink)
369 |       meanf_agg_method <- (Sagg %*% revisedMINTshrink_bottom_idtest)
370 |     }else if(agg_method == "INDEPBU-MINTshrink"){
371 |       samples_agg_method <- t(t(samples_agg[, , match("INDEPBU" , agg_methods)]) + adj_agg_MINTshrink)
372 |       meanf_agg_method <- (Sagg %*% revisedMINTshrink_bottom_idtest)
373 |     }else if(agg_method == "PERMBU-MCOMB"){
374 |       samples_agg_method <- t(t(samples_agg[, , match("PERMBU" , agg_methods)]) + adj_agg_mcomb)
375 |       meanf_agg_method <- (Sagg %*% revisedmean_bottom_idtest)
376 |     }else if(agg_method == "PERMBU-MCOMBRECON"){
377 |       samples_agg_method <- t(t(samples_agg[, , match("PERMBU" , agg_methods)]) + adj_agg_mcombrecon)
378 |       meanf_agg_method <- (Sagg %*% rev_and_reconcilied_bottom_mean_idtest)
379 |     }else if(agg_method == "PERMBU-MCOMBUNRECON"){
380 |       samples_agg_method <- t(t(samples_agg[, , match("PERMBU" , agg_methods)]) + adj_agg_mcombunrecon)
381 |       meanf_agg_method <- revisedmean_agg_idtest
382 |     }else if(agg_method == "MINTdiag" || agg_method == "MINTshrink"){
383 |       
384 |       if(agg_method == "MINTshrink"){
385 |         meanf_agg_method <- (Sagg %*% revisedMINTshrink_bottom_idtest)
386 |         varf_agg_method  <- V_MINTshrink_agg
387 |         
388 |       }else if(agg_method == "MINTdiag"){
389 |         meanf_agg_method <- (Sagg %*% revisedMINTdiag_bottom_idtest)
390 |         varf_agg_method  <- V_MINTdiag_agg
391 |       }    
392 |       sd_agg_method    <- sqrt(varf_agg_method)
393 |       
394 |       samples_agg_method <- sapply(seq(n_agg), function(iagg){
395 |         resparam <- computeParam(meanf_agg_method[iagg], varf_agg_method[iagg])
396 |         qlnorm(q_probs, resparam$mulog, resparam$sdlog)
397 |         #qnorm(q_probs, meanf_agg_method[iagg], sd_agg_method[iagg])
398 |       })
399 | 
400 |     }else{
401 |       stop("error")
402 |     }
403 |     samples_agg[, , iagg_method] <- samples_agg_method
404 |     meanf_agg[, iagg_method] <- meanf_agg_method
405 |   }
406 | 
407 |   # MSE
408 |   mse_matrix_agg    <- (meanf_agg - obs_agg_idtest)^2
409 |   list_mse_agg[[idtest]] <- mse_matrix_agg
410 | 
411 |   # CRPS
412 |   aggmethods_crps <- compute_crps(agg_methods, n_agg, samples_agg, obs_agg_idtest)
413 |   list_crps_agg[[idtest]] <- aggmethods_crps
414 |   
415 |   # QS
416 |   qscores_agg <- compute_qscores(agg_methods, n_agg, samples_agg, obs_agg_idtest)
417 |   sum_overtest_qscores_agg <- sum_overtest_qscores_agg + qscores_agg
418 |   
419 |   weights_uniform <- 1
420 |   #weights_center  <- q_probs * (1 - q_probs)
421 |   weights_tails_bis <- (2 * q_probs - 1)^4
422 |   weights_tails   <- (2 * q_probs - 1)^2
423 |   weights_rtail   <- q_probs^2
424 |   weights_ltail   <- (1 - q_probs)^2
425 |   weights_matrix <- rbind(weights_uniform, weights_tails_bis, weights_tails, weights_rtail, weights_ltail)
426 |   
427 |   aggmethods_wcrps <- sapply(seq(n_agg), function(iagg){
428 |     sapply(seq_along(agg_methods), function(imethod){
429 |       (weights_matrix %*% qscores_agg[, imethod, iagg])/M
430 |     })
431 |   }, simplify = "array")
432 |   #aggmethods_wcrps <- aperm(aggmethods_wcrps, c(1, 3, 2))
433 |   list_wcrps_agg[[idtest]] <- aperm(aggmethods_wcrps, c(1, 3, 2))
434 |   
435 |   # FOR BOTTOM
436 |   botmethods_wcrps <- sapply(seq(n_bottom), function(ibot){
437 |     sapply(seq_along(bot_methods), function(imethod){
438 |       (weights_matrix %*% qscores_bottom[, imethod, ibot])/M
439 |     })
440 |   }, simplify = "array")
441 |   # botmethods_wcrps <- aperm(botmethods_wcrps, c(1, 3, 2))
442 |   list_wcrps_bot[[idtest]] <- aperm(botmethods_wcrps, c(1, 3, 2))
443 |   
444 |   print(warnings())
445 |   assign("last.warning", NULL, envir = baseenv())
446 |   
447 |   do.coverage <- TRUE
448 |   if(do.coverage){
449 |     coverage_agg <- sapply(seq_along(agg_methods), function(imethod){
450 |       QF_agg_idtest <- apply(samples_agg[, , imethod], 2, quantile, taus)
451 |       agg_90 <-  (obs_agg_idtest >= QF_agg_idtest[which(taus == 0.05), ]  & obs_agg_idtest <= QF_agg_idtest[which(taus == 0.95), ]) 
452 |       agg_50 <- (obs_agg_idtest >= QF_agg_idtest[which(taus == 0.25), ]  & obs_agg_idtest <= QF_agg_idtest[which(taus == 0.75), ]) 
453 |       rbind(agg_50, agg_90)
454 |     }, simplify = "array")
455 |     
456 |     coverage_bot <- sapply(seq_along(bot_methods), function(imethod){
457 |       QF_bottom_idtest <- apply(samples_bot[, , imethod], 2, quantile, taus)
458 |       bot_90 <- (obs_bottom_idtest >= QF_bottom_idtest[which(taus == 0.05), ]  & obs_bottom_idtest <= QF_bottom_idtest[which(taus == 0.95), ]) 
459 |       bot_50 <- (obs_bottom_idtest >= QF_bottom_idtest[which(taus == 0.25), ]  & obs_bottom_idtest <= QF_bottom_idtest[which(taus == 0.75), ])
460 |       rbind(bot_50, bot_90)
461 |     }, simplify = "array")
462 |     
463 |     coverage_idtest <- file.path(coverage.folder, paste("coverage_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 
464 |     save(file = coverage_idtest, list = c("coverage_bot", "coverage_agg"))
465 |   }
466 |   
467 | }# END IDTEST
468 | 
469 | avg_qscores_agg <- sum_overtest_qscores_agg/length(allidtest)
470 | avg_qscores_bot <- sum_overtest_qscores_bot/length(allidtest)
471 | 
472 | res_job <- file.path(loss.folder, paste("results_HTS_", algo.agg, "_", algo.bottom, "_", idjob, ".Rdata", sep = "")) 
473 | #save(file = res_job, list = c("list_crps_agg", "list_crps_bot", "list_mse_bot", "list_mse_agg", "list_wcrps_agg", "list_wcrps_bot", "avg_qscores_agg", "avg_qscores_bot"))
474 | 
475 | if(FALSE){
476 |   if(idjob %in% c(1)){
477 |     samples_job <- file.path(work.folder, "samples_agg", paste("samples_agg_", algo.agg, "_", algo.bottom, "_", idjob, ".Rdata", sep = "")) 
478 |     save(file = samples_job, list = c("list_samples_agg"))
479 |   }
480 | }
481 | 


--------------------------------------------------------------------------------
/smart-meters/code/aggregation_merge.R:
--------------------------------------------------------------------------------
  1 | # This script simply merge the results of all Rdata files produced by "aggregation.R".
  2 | rm(list = ls())
  3 | source("config_paths.R")
  4 | source("config_general.R")
  5 | source("config_splitting.R")
  6 | source("utils.R")
  7 | library(igraph)
  8 | 
  9 | load(file.path(work.folder, "myinfo.Rdata"))
 10 | 
 11 | node_nbkids <- apply(Sagg, 1, sum)
 12 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix
 13 | 
 14 | ntest <- length(test$id)
 15 | n_bottom <- length(bottomSeries)
 16 | 
 17 | do.twentyfour <- FALSE
 18 | if(do.twentyfour){
 19 |   tday <- tday[seq(1, 48, 2)]
 20 | }
 21 | 
 22 | #nbperjob <- 276
 23 | #nbperjob <- 69
 24 | #njobs <- ntest/nbperjob
 25 | 
 26 | #nbperjob <- 123
 27 | #njobs <- 36
 28 | 
 29 | #nbperjob <- 130
 30 | #njobs <- 34
 31 | 
 32 | #nbperjob <- 138
 33 | #njobs <- 32
 34 | 
 35 | 
 36 | nbperjob <- 368
 37 | njobs <- 12
 38 | 
 39 | leaves <- V(itree)[degree(itree, mode="out") == 0]
 40 | agg_nodes <- V(itree)[degree(itree, mode="out") != 0]
 41 | 
 42 | depth_aggnodes <- sapply(agg_nodes, function(agg_node){
 43 |   vec <- distances(itree, agg_node, leaves, mode = "out")
 44 |   max( vec[which(vec!=Inf)])
 45 | })
 46 | 
 47 | #agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "NAIVEBU-MINT", "PERMBU-MINT")
 48 | #color.agg <- c("black", "orange", "darkblue")
 49 | #bot_methods <- c("BASE", "BASE-MINT")
 50 | #color.bot <- c("black")
 51 | 
 52 | #agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-MEANCOMB")
 53 | #color.agg <- c("grey", "orange", "cyan", "purple", "darkblue")
 54 | #bot_methods <- c("BASE", "BASE-MINT", "BASE-MEANCOMB")
 55 | #color.bot <- c("black", "purple", "darkblue")
 56 | 
 57 | #agg_methods <- c("BASE", "NAIVEBU", "PERMBU")
 58 | #color.agg <- c("grey", "orange", "cyan")
 59 | #bot_methods <- c("BASE", "BASE-MINT")
 60 | #color.bot <- c("black", "purple")
 61 | 
 62 | #bot_methods <- c("BASE", "BASE-MINT", "BASE-MCOMB", "BASE-MCOMBRECON")
 63 | #color.bot <- c("black", "purple", "darkgreen", "darkblue")
 64 | #agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-MCOMB", "PERMBU-MCOMBRECON")
 65 | #color.agg <- c("grey", "orange", "cyan", "purple", "darkgreen", "darkblue")
 66 | 
 67 | 
 68 | 
 69 | ##### JASA PAPER
 70 | bot_methods <- c("BASE", "BASE-MINT", "MINTdiag", "MINTshrink")
 71 | color.bot <- c("black", "purple", "red", "green")
 72 | 
 73 | agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "NAIVEBU-MINT", "MINTdiag", "MINTshrink")
 74 | color.agg <- c("black", "orange", "purple", "cyan", "pink", "red", "green")
 75 | 
 76 | pch.agg <- c(8, 0, 2, 2, 0, 1, 3) 
 77 | pch.bot <- c(8, 2, 1, 3)
 78 | lty.agg <- c(1, 3, 2, 2, 3, 6, 5)
 79 | lty.bot <- c(1, 2, 6, 5)
 80 | 
 81 | 
 82 | if(FALSE){
 83 | bot_methods <- c("BASE", "BASE-MINT", "BASE-MCOMB", "BASE-MCOMBRECON", "PROBMINT")
 84 | color.bot <- c("black", "purple", "darkgreen", "darkblue", "green")
 85 | agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-MCOMB", 
 86 |                  "PERMBU-MCOMBRECON", "PERMBU-MCOMBUNRECON", "NAIVEBU-MINT", "PROBMINT")
 87 | color.agg <- c("black", "orange", "cyan", "purple", "darkgreen", "darkblue", "red", "pink", "green")
 88 | 
 89 | agg_better_names <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-GTOP1", "PERMBU-GTOP2", "PERMBU-COMB")
 90 | bot_better_names <- c("BASE", "PERMBU-MINT", "PERMBU-GTOP1", "PERMBU-GTOP2")
 91 | }
 92 | if(FALSE){
 93 |   bot_methods <- c("BASE", "BASE-MINT", "BASE-MCOMB", "BASE-MCOMBRECON")
 94 |   color.bot <- c("black", "purple", "darkgreen", "darkblue")
 95 |   agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-MCOMB", 
 96 |                    "PERMBU-MCOMBRECON", "PERMBU-MCOMBUNRECON")
 97 |   color.agg <- c("black", "orange", "cyan", "purple", "darkgreen", "darkblue", "red")
 98 |   
 99 |   agg_better_names <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "PERMBU-GTOP1", "PERMBU-GTOP2", "PERMBU-COMB")
100 |   bot_better_names <- c("BASE", "PERMBU-MINT", "PERMBU-GTOP1", "PERMBU-GTOP2")
101 | }
102 | 
103 | 
104 | 
105 | wcrps_agg    <- array(NA, c(5, n_agg, ntest, length(agg_methods)))
106 | crps_agg    <- array(NA, c(n_agg, ntest, length(agg_methods)))
107 | 
108 | wcrps_bottom <- array(NA, c(5, n_bottom, ntest, length(bot_methods)))
109 | crps_bottom <- array(NA, c(n_bottom, ntest, length(bot_methods)))
110 | 
111 | mse_agg    <- array(NA, c(n_agg, ntest, length(agg_methods)))
112 | mse_bottom <- array(NA, c(n_bottom, ntest, length(bot_methods)))
113 | 
114 | total_qscores_agg <- total_qscores_bot <- 0
115 | 
116 | for(idjob in seq(njobs)){
117 |   print(idjob)
118 |   allidtest <- (idjob - 1) * nbperjob + seq(nbperjob) 
119 |   
120 |   if(nbperjob == 123 && idjob == 36){
121 |      allidtest <- 4306:4416
122 |     #allidtest <- 4291:4416
123 |   }
124 |   
125 |     res_job <- file.path(loss.folder, paste("results_HTS_", algo.agg, "_", algo.bottom, "_", idjob, ".Rdata", sep = "")) 
126 |     load(res_job)
127 |   
128 |   # crps agg
129 |   list_crps_agg_nonull <- list_crps_agg[-which(sapply(list_crps_agg, is.null))]
130 |   mat_crps_agg <- sapply(seq_along(list_crps_agg_nonull),  function(i){list_crps_agg_nonull[[i]]}, simplify = 'array')
131 |   
132 |   # wcrps agg
133 |   list_wcrps_agg_nonull <- list_wcrps_agg[-which(sapply(list_wcrps_agg, is.null))]
134 |   mat_wcrps_agg <- sapply(seq_along(list_wcrps_agg_nonull),  function(i){list_wcrps_agg_nonull[[i]]}, simplify = 'array')
135 |   
136 |   # wcrps bot
137 |   list_wcrps_bot_nonull <- list_wcrps_bot[-which(sapply(list_wcrps_bot, is.null))]
138 |   mat_wcrps_bot <- sapply(seq_along(list_wcrps_bot_nonull),  function(i){list_wcrps_bot_nonull[[i]]}, simplify = 'array')
139 | 
140 |   # crps bot
141 |   list_crps_bot_nonull <- list_crps_bot[-which(sapply(list_crps_bot, is.null))]
142 |   mat_crps_bot <- sapply(seq_along(list_crps_bot_nonull),  function(i){list_crps_bot_nonull[[i]]}, simplify = 'array')
143 |   
144 |   
145 |   #
146 |   crps_bottom[, allidtest, ] <- aperm(mat_crps_bot, c(1, 3, 2))
147 |   wcrps_bottom[, , allidtest, ] <- aperm(mat_wcrps_bot, c(1, 2, 4, 3))
148 |   
149 |   crps_agg[, allidtest,]     <- aperm(mat_crps_agg, c(1, 3, 2))
150 |   wcrps_agg[, , allidtest,]    <- aperm(mat_wcrps_agg, c(1, 2, 4, 3))
151 |   
152 |   
153 |   total_qscores_agg <- total_qscores_agg + avg_qscores_agg
154 |   total_qscores_bot <- total_qscores_bot + avg_qscores_bot
155 | 
156 |   list_mse_agg_nonull <- list_mse_agg[-which(sapply(list_mse_agg, is.null))]
157 |   mat_mse_agg <- sapply(seq_along(list_mse_agg_nonull),  function(i){list_mse_agg_nonull[[i]]}, simplify = 'array')
158 |   list_mse_bot_nonull <- list_mse_bot[-which(sapply(list_mse_bot, is.null))]
159 |   mat_mse_bot <- sapply(seq_along(list_mse_bot_nonull),  function(i){list_mse_bot_nonull[[i]]}, simplify = 'array')
160 |   
161 |   mse_agg[, allidtest,]     <- aperm(mat_mse_agg, c(1, 3, 2))
162 |   mse_bottom[, allidtest,]     <- aperm(mat_mse_bot, c(1, 3, 2))
163 | }
164 | 
165 | total_qscores_agg <- total_qscores_agg / njobs
166 | total_qscores_bot <- total_qscores_bot / njobs
167 | 
168 | 
169 | # crps_agg   total_qscores_agg
170 | # crps_bottom total_qscores_bot
171 | 
172 | # AGG MSE
173 | mse_agg_byhour <- sapply(seq(n_agg), function(iagg){
174 |   sapply(seq_along(agg_methods), function(imethod){
175 |     res <- apply(matrix(mse_agg[iagg, , imethod], ncol = 48, byrow = T), 2, mean)
176 |     
177 |     if(do.twentyfour){
178 |       res <- sapply(seq(1, 48, by = 2), function(i){
179 |         mean(res[seq(i, i+1)])
180 |       })
181 |     }
182 |     res
183 |   })
184 | }, simplify = 'array')
185 | 
186 | # BOT MSE
187 | mse_bot_byhour <- sapply(seq(n_bottom), function(ibot){
188 |   sapply(seq_along(bot_methods), function(imethod){
189 |     res <- apply(matrix(mse_bottom[ibot, , imethod], ncol = 48, byrow = T), 2, mean)
190 |     if(do.twentyfour){
191 |       res <- sapply(seq(1, 48, by = 2), function(i){
192 |         mean(res[seq(i, i+1)])
193 |       })
194 |     }
195 |     res
196 |   })
197 | }, simplify = 'array')
198 | 
199 | # BOT CRPS
200 | crps_bot_byhour <- sapply(seq(n_bottom), function(ibot){
201 |   sapply(seq_along(bot_methods), function(imethod){
202 |     res <- apply(matrix(crps_bottom[ibot, , imethod], ncol = 48, byrow = T), 2, mean)
203 |     if(do.twentyfour){
204 |       res <- sapply(seq(1, 48, by = 2), function(i){
205 |         mean(res[seq(i, i+1)])
206 |       })
207 |     }
208 |     res
209 |   })
210 | }, simplify = 'array')
211 | 
212 | # AGG CRPS
213 | crps_agg_byhour <- sapply(seq(n_agg), function(iagg){
214 |   sapply(seq_along(agg_methods), function(imethod){
215 |     res <- apply(matrix(crps_agg[iagg, , imethod], ncol = 48, byrow = T), 2, mean)
216 |     if(do.twentyfour){
217 |       res <- sapply(seq(1, 48, by = 2), function(i){
218 |         mean(res[seq(i, i+1)])
219 |       })
220 |     }
221 |     res
222 |   })
223 | }, simplify = 'array')
224 | 
225 | # AGG WCRPS
226 | wcrps_agg_byhour <- sapply(seq(5), function(iweight){
227 |   sapply(seq(n_agg), function(iagg){
228 |     sapply(seq_along(agg_methods), function(imethod){
229 |       res <- apply(matrix(wcrps_agg[iweight, iagg, , imethod], ncol = 48, byrow = T), 2, mean)
230 |       if(do.twentyfour){
231 |         res <- sapply(seq(1, 48, by = 2), function(i){
232 |           mean(res[seq(i, i+1)])
233 |         })
234 |       }
235 |       res
236 |     })
237 |   }, simplify = 'array')
238 | }, simplify = 'array')
239 | 
240 | wcrps_agg_byhour <- aperm(wcrps_agg_byhour, c(4, 1, 2, 3))
241 | 
242 | # BOT WCRPS
243 | wcrps_bot_byhour <- sapply(seq(5), function(iweight){
244 |   sapply(seq(n_bottom), function(ibot){
245 |     sapply(seq_along(bot_methods), function(imethod){
246 |       res <- apply(matrix(wcrps_bottom[iweight, ibot, , imethod], ncol = 48, byrow = T), 2, mean)
247 |       if(do.twentyfour){
248 |         res <- sapply(seq(1, 48, by = 2), function(i){
249 |           mean(res[seq(i, i+1)])
250 |         })
251 |       }
252 |       res
253 |     })
254 |   }, simplify = 'array')
255 | }, simplify = 'array')
256 | 
257 | wcrps_bot_byhour <- aperm(wcrps_bot_byhour, c(4, 1, 2, 3))
258 | 
259 | 
260 | res_info <- getInfoNode("nb_nodes")
261 | #res_info <- getInfoNode("kwh")
262 | agg_nodes_order <- sort(res_info$info_nodes_agg, index = T, decreasing = T)$ix
263 | bot_nodes_order <- sort(res_info$info_nodes_bottom, index = T, decreasing = T)$ix
264 | 
265 | 
266 | 


--------------------------------------------------------------------------------
/smart-meters/code/basef.R:
--------------------------------------------------------------------------------
  1 | # This script produces the base forecasts for both the bottom-level and aggregate-level series. 
  2 | # KDE is used for the bottom-level series, and double exponential smoothing is used for the aggregate-level series.
  3 | # The arguments include 
  4 | # "do.agg": forecasts are produced for the aggregate-level series? (True or False)
  5 | # "algo": the forecasting algorithm ("KD-IC-NML", "DETS" or "Uncond")
  6 | # "alliseries": the index of all series for which we want to produce the forecasts
  7 | rm(list = ls())
  8 | print(base::date())
  9 | args = (commandArgs(TRUE))
 10 | if(length(args) == 0){
 11 |   do.agg <- F
 12 |   algo <- c("KD-IC-NML")
 13 |   #alliseries <- c(1, 20, 100)
 14 |   alliseries <- 320
 15 | }else{
 16 |   
 17 |   for(i in 1:length(args)){
 18 |     print(args[[i]])
 19 |   }
 20 |   
 21 |   algo <- args[[1]]
 22 |   do.agg <- as.logical(args[[2]])
 23 |   alliseries <- NULL
 24 |   for(i in seq(3, length(args))){
 25 |     alliseries <- c(alliseries, as.numeric(args[[i]]))
 26 |   }
 27 | }
 28 | 
 29 | print(algo)
 30 | 
 31 | source("config_paths.R")
 32 | source("config_general.R")
 33 | source("config_splitting.R")
 34 | source("utils.R")
 35 | 
 36 | library(parallel)
 37 | library(fBasics)
 38 | library(msm)
 39 | library(gtools)
 40 | library(forecast)
 41 | library(abind)
 42 | library(glmnet)
 43 | 
 44 | load(file.path(work.folder, "myinfo.Rdata"))
 45 | 
 46 | algos_allowed <- c("Uncond", "KD-IC-NML", "DETS")
 47 | stopifnot(algo %in% algos_allowed)
 48 | 
 49 | for(iseries in alliseries){
 50 | 
 51 |   print(base::date())
 52 |   print(iseries)
 53 |   if(do.agg){
 54 |     idseries <- aggSeries[iseries]
 55 |     load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = "")))
 56 |   }else{
 57 |     idseries <- bottomSeries[iseries]
 58 |     load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
 59 |   }
 60 |   
 61 |   print(algo)
 62 |   res_file <- file.path(basef.folder, algo, paste("results_", idseries, "_", algo, ".Rdata", sep = "")) 
 63 |   dir.create(file.path(basef.folder, algo), showWarnings = FALSE)
 64 |  
 65 |   if(algo == "DETS"){
 66 |     
 67 |     do.logtrans <- FALSE
 68 |     if(do.logtrans){
 69 |       my_ts <- log(demand)	     
 70 |     }else{
 71 |       my_ts <- demand
 72 |     }
 73 |     
 74 |     ids_future <- test$id
 75 |     nb_futuredays <- length(seq_testing_interval)/48
 76 |     
 77 |     all_qf <- all_mf <- all_sd <- all_mfsample <- vector("list", nb_futuredays)
 78 |     mydays <- seq(1, nb_futuredays)
 79 |     
 80 |     for(id_future_day in mydays){
 81 |       print(id_future_day)
 82 |       print(base::date())
 83 |       
 84 |       if(id_future_day == 1){
 85 |         ids_past   <-  learn$id
 86 |         n_past_obs <- length(ids_past)
 87 |       }else{
 88 |         n_past_obs <- n_past_obs_tbats
 89 |         ids_past   <- tail(learn$id, n_past_obs)
 90 |       }
 91 |       
 92 |       offset_nhours <- (id_future_day - 1) * 48
 93 |       ids_future_hours <- ids_future[offset_nhours + seq(1, 48)] 
 94 |       
 95 |       if(offset_nhours > 0){
 96 |         ids_past_actual <- c(ids_past, ids_future)[offset_nhours + seq(n_past_obs)]
 97 |       }else{
 98 |         ids_past_actual <- ids_past
 99 |       }
100 |       
101 |       ypast <- as.numeric(my_ts[ids_past_actual])
102 |       
103 |       do.optimize <- (id_future_day - 1) %% 7 == 0
104 |       
105 |       
106 |         # initialization
107 |         a <- 1/336 * (mean(ypast[seq(336)]) - mean(ypast[336 + seq(336)]))
108 |         b <- mean(diff(ypast[seq(336)]))
109 |         T_0 <- (a+b)/2
110 |         l_start <- mean(ypast[seq(2 * 336)]) - 336.5 * T_0
111 |         
112 |         # days
113 |         nb_obs <- 7 * m_1
114 |         indices <- seq(nb_obs)
115 |         smoothed_line <- ma(ypast[indices], m_1)
116 |         #indices <- seq(m_1/2 + 1, nb_obs - m_1/2)
117 |         x <- ypast[indices] - smoothed_line[indices]
118 |         mat <- matrix(x, ncol = 48, byrow = T)
119 |         D <- apply(mat, 2, mean, na.rm = T)
120 |         D <- D - mean(D)
121 |         
122 |         # weeks
123 |         nb_weeks <- 4
124 |         indices <- seq(nb_weeks * m_2)
125 |         smoothed_line <- ma(ypast[indices], m_2)
126 |         x <- ypast[indices] - smoothed_line[indices] - rep(D, nb_weeks * 7)
127 |         mat <- matrix(x, ncol = 336, byrow = T)
128 |         W <- apply(mat, 2, mean, na.rm = T)
129 |         W <- W - mean(W)
130 |         
131 |         e_0 <- rep(0, m_2)
132 |         l_0 <- rep(l_start, m_2)
133 |         d_0 <- rep(D, 7)
134 |         w_0 <- W
135 |         
136 |       ###  
137 |       if(do.optimize){  
138 |         N <- 100
139 |         THETA <-  matrix(runif(N * 4), ncol = 4)
140 |         E <- sapply(seq(nrow(THETA)), function(i){ 
141 |           #print(i)
142 |           func_to_optimize(THETA[i, ], y = ypast, e_0 = e_0, l_0 = l_0, d_0 = d_0, w_0 = w_0, do.forecast = FALSE)
143 |         })
144 |         id <- sort(E, index = T)$ix[1]
145 |         res_optim <- optim(THETA[id, ], fn = func_to_optimize, y = ypast, e_0 = e_0, l_0 = l_0, d_0 = d_0, w_0 = w_0, do.forecast = F,
146 |                            method = "L-BFGS-B", lower = 0, upper = 1)
147 |         if(id_future_day == 1){
148 |           param_file <- file.path(basef.folder, algo, paste("parameters_", idseries, "_", algo, ".Rdata", sep = "")) 
149 |           save(file = param_file, list = c("res_optim"))
150 |           #stop("done")
151 |         }
152 |       }
153 |         
154 |       obj_forecast <- iterate(res_optim$par, ypast, e_0, l_0, d_0, w_0, do.forecast = T)
155 |       
156 |       
157 |       if(id_future_day == 1){
158 |         dir.create(file.path(insample.folder, algo), recursive = TRUE, showWarnings = FALSE)
159 |         # insample mean
160 |         all_mu <- obj_forecast$yhat
161 |         insample_condmean_file <- file.path(insample.folder, algo, paste("condmean_", idseries, "_", algo, "_", id_future_day, ".Rdata", sep = "")) 
162 |         save(file = insample_condmean_file, list = c("all_mu"))
163 |         
164 |         # residuals COPULA
165 |         e_residuals <- obj_forecast$residuals
166 |         resid_file <- file.path(insample.folder, algo, paste("residuals_", idseries, "_", algo, "_", id_future_day, ".Rdata", sep = "")) 
167 |         save(file = resid_file, list = c("e_residuals"))
168 |         
169 |         # residuals MINT
170 |         residuals_MINT <- obj_forecast$residuals
171 |         resid_MINT_file <- file.path(insample.folder, algo, paste("residuals_MINT_", idseries, "_", algo, "_", id_future_day, ".Rdata", sep = "")) 
172 |         save(file = resid_MINT_file, list = c("residuals_MINT"))
173 |       }
174 | 
175 |       all_mf[[id_future_day]] <- obj_forecast$mf
176 |       all_qf[[id_future_day]] <- obj_forecast$qf
177 |       
178 |       all_mfsample[[id_future_day]] <- obj_forecast$mfsample
179 |       
180 |     }
181 |     list_save <- c("all_qf", "all_mf", "all_mfsample")
182 |     save(file = res_file, list = list_save)
183 |     
184 |   }else if(algo == "Uncond"){
185 |     qFlearn <- quantile(demand[learn$id], taus)
186 |     qFtest <- matrix(rep(qFlearn, length(test$id)), ncol = length(test$id))
187 |     
188 |     mFlearn <- mean(demand[learn$id])
189 |     mFtest <- rep(mFlearn, length(test$id))
190 |     save(file = res_file, list = c("qFtest", "mFtest"))
191 |     
192 |   }else if(grepl("KD-D", algo) || grepl("KD-IC", algo)){
193 |     
194 |     
195 |     if(grepl("TRC", algo)){
196 |       mykernel <- "truncated"
197 |     }else if(grepl("NML", algo)){
198 |       mykernel <- "normal"
199 |     }else if(grepl("LNL", algo)){
200 |       mykernel <- "lognormal"
201 |     }else{
202 |       mykernel <- "normal"
203 |     }
204 |     
205 |     ### LEARNING
206 |     res_learning <- predictkde("learning")
207 |     
208 |     results_crps <- sapply(res_learning$results, function(list_vectors){
209 |         sapply(list_vectors, function(list_two){
210 |           sapply(list_two, function(vector){
211 |             identity(vector)
212 |           }, simplify = "array")
213 |           
214 |           }, simplify = "array")
215 |         }, simplify = "array") 
216 |     
217 |     ic_days <- res_learning$ic_days
218 |     
219 |     idbest_bandwiths <- idbest_lambda <- NULL
220 |     for(ic in seq(3)){
221 |       err <- apply(results_crps[, , , which(ic_days == ic)], c(1, 3), mean)
222 |       idbest <- which(err == min(err), arr.ind = T)
223 | 
224 |       idbest_bandwiths <- c(idbest_bandwiths, idbest[1, 1])
225 |       idbest_lambda    <- c(idbest_lambda, idbest[1, 2])
226 |     }
227 |     selected_bandwiths_ic <- res_learning$bandwiths[idbest_bandwiths]
228 |     selected_lambdas_ic <- res_learning$lambdas[idbest_lambda]
229 |     
230 |     param_file <- file.path(basef.folder, algo, paste("parameters_", idseries, "_", algo, ".Rdata", sep = "")) 
231 |     bandwiths <- res_learning$lambdas
232 |     save(file = param_file, 
233 |          list = c("selected_bandwiths_ic", "selected_lambdas_ic", "bandwiths"))
234 |     
235 |     ### TESTING
236 |     res_testing <- predictkde("testing", selected_bandwiths = selected_bandwiths_ic, selected_lambdas = selected_lambdas_ic)
237 |     
238 |     # all_crps <- getItem(res_testing$results, "crps")
239 |     all_qf  <- getfromlist(res_testing$results, "q_hat")
240 |     all_mf  <- getfromlist(res_testing$results, "mu_hat")
241 |     all_varf  <- getfromlist(res_testing$results, "var_hat")
242 |     
243 |     save(file = res_file, list = c("all_qf", "all_mf", "all_varf"))
244 |     
245 |     ### IN SAMPLE INFO
246 |     res_insample_info <- predictkde("insample_info", selected_bandwiths = selected_bandwiths_ic, selected_lambdas = selected_lambdas_ic)
247 | 
248 |     # residuals
249 |     all_residuals <- getfromlist(res_insample_info$results, "residuals")
250 |     e_residuals_unscaled <- unlist(all_residuals)
251 |     all_var <- getfromlist(res_insample_info$results, "var_hat")
252 |     all_mu <- getfromlist(res_insample_info$results, "mu_hat")
253 |     
254 |     all_varhat <- unlist(all_var)
255 |     e_residuals <- e_residuals_unscaled/sqrt(all_varhat)
256 |     
257 |     # save residuals COPULA
258 |     dir.create(file.path(insample.folder, algo), recursive = TRUE, showWarnings = FALSE)
259 |     resid_file <- file.path(insample.folder, algo, paste("residuals_", idseries, "_", algo, ".Rdata", sep = "")) 
260 |     save(file = resid_file, list = c("e_residuals"))
261 |     
262 |     # save residuals MINT
263 |     residuals_MINT <- e_residuals_unscaled
264 |     resid_MINT_file <- file.path(insample.folder, algo, paste("residuals_MINT_", idseries, "_", algo, ".Rdata", sep = "")) 
265 |     save(file = resid_MINT_file, list = c("residuals_MINT"))
266 |     
267 |     # extract insample quantiles
268 |     all_qf_insample  <- getfromlist(res_insample_info$results, "q_hat")
269 | 
270 |     all_qfe_insample <- lapply(seq_along(length(all_qf_insample)), function(iday){
271 |       t((t(all_qf_insample[[iday]]) - all_mu[[iday]])/sqrt(all_var[[iday]]))
272 |     })
273 |     
274 |     insample_condmean_file <- file.path(insample.folder, algo, paste("condmean_", idseries, "_", algo, ".Rdata", sep = "")) 
275 |     save(file = insample_condmean_file, list = c("all_mu"))
276 |   }
277 | }
278 | 


--------------------------------------------------------------------------------
/smart-meters/code/config_general.R:
--------------------------------------------------------------------------------
 1 | source("nicefigs.R")
 2 | 
 3 | hour    <- rep(c("00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", 
 4 |                  "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"), each = 2)
 5 | minutes <- rep(c("00", "30"), 24)
 6 | tday <- paste(hour, minutes, sep=":")
 7 | 
 8 | abbr.dweek <- c("Mon","Tue","Wed","Thu","Fri", "Sat","Sun")
 9 | 
10 | taus <- rev(seq(1, 99)/100)
11 | 
12 | hours_night <- night_hours <-  c(seq(1, 12), 46, 47, 48)
13 | hours_day   <- day_hours <- setdiff(seq(1, 48), hours_night)
14 | hours_all <- c(hours_night, hours_day)
15 | index_hours <- sort(hours_all, index = T)$ix
16 | 
17 | iday_withmodels <- c(1, seq(10, 90, 10))
18 | 
19 | #n_past_obs_kd    <- 90 *48
20 | n_past_obs_kd    <- 60 *48
21 | n_past_obs_tbats <- (31 + 28 + 31 + 30)*48
22 | 
23 | M <- 120 * 48 # 5760
24 |   
25 | q_probs <- seq(M)/(M + 1)
26 | 
27 | m_1 <- 48
28 | m_2 <- 336
29 | 
30 | algo.agg <- "DETS"
31 | algo.bottom <- "KD-IC-NML"
32 | 
33 | min_bandwith <- 10^-6
34 | 
35 | print("----")
36 | print(algo.agg)
37 | print(algo.bottom)
38 | print("----")
39 | 
40 | 


--------------------------------------------------------------------------------
/smart-meters/code/config_paths.R:
--------------------------------------------------------------------------------
 1 | main.folder   <- "../"
 2 | 
 3 | rawdata.folder <- file.path(main.folder, "rawdata")
 4 | procdata.folder <- file.path(main.folder, "procdata")
 5 | work.folder <- file.path(main.folder, "work")
 6 | 
 7 | initmeters.folder  <- file.path(procdata.folder, "initmeters")
 8 | mymeters.folder   <- file.path(procdata.folder , "mymeters")
 9 | aggseries.folder   <- file.path(procdata.folder , "aggseries")
10 | 
11 | basef.folder  <- file.path(work.folder , "basef")
12 | results.folder <- file.path(work.folder , "pdfs")
13 | loss.folder <- file.path(work.folder , "loss")
14 | insample.folder <- file.path(work.folder , "insample")
15 | permutations.folder <- file.path(work.folder , "permutations")
16 | coverage.folder <- file.path(work.folder , "coverage")
17 | 
18 | #list.folders <- c(rawdata.folder, procdata.folder, work.folder, initmeters.folder, mymeters.folder, aggseries.folder,
19 | #                  basef.folder, results.folder, loss.folder, insample.folder, permutations.folder, coverage.folder)
20 | #sapply(list.folders, dir.create)


--------------------------------------------------------------------------------
/smart-meters/code/config_splitting.R:
--------------------------------------------------------------------------------
 1 | # This script defines the data split into training, validation and test data. 
 2 | # It also defines some calendar information for the data.
 3 | library(lubridate)
 4 | #load(file.path(working.folder, "myinfo.Rdata"))
 5 | 
 6 | startTrain <- ymd_hms("2009-4-20 00:00:00"); endTrain <- ymd_hms("2010-3-31 23:30:00")
 7 | startVal   <- ymd_hms("2010-4-01 00:00:00"); endVal <- ymd_hms("2010-4-30 23:30:00")
 8 | startLearn <- startTrain; 					 endLearn <- endVal
 9 | startTest  <- ymd_hms("2010-5-01 00:00:00"); endTest <- ymd_hms("2010-7-31 23:30:00")
10 | startObs   <- startTrain; 					 endObs <- endTest
11 | 
12 | 
13 | complete_interval <- lubridate::interval(startObs, endObs)
14 | seq_complete_interval <- seq(int_start(complete_interval), int_end(complete_interval), by = "30 min")
15 | 
16 | training_interval   <- lubridate::interval(startTrain, endTrain)  # 11 months of training
17 | seq_training_interval <- seq(lubridate::int_start(training_interval), lubridate::int_end(training_interval), by = "30 min")
18 | 
19 | validation_interval <- lubridate::interval(startVal, endVal) # 1 month of validation
20 | seq_validation_interval <- seq(lubridate::int_start(validation_interval), lubridate::int_end(validation_interval), by = "30 min")
21 | 
22 | learning_interval   <- lubridate::union(training_interval, validation_interval)
23 | seq_learning_interval <- seq(lubridate::int_start(learning_interval), lubridate::int_end(learning_interval), by = "30 min")
24 | 
25 | testing_interval     <- lubridate::interval(startTest, endTest)   # 3 months of testing
26 | seq_testing_interval <- seq(lubridate::int_start(testing_interval), lubridate::int_end(testing_interval), by = "30 min")
27 | 
28 | 
29 | stopifnot(lubridate::union(learning_interval, testing_interval) == complete_interval)
30 | 
31 | train <- validation  <- learn <- test  <- NULL
32 | train$id <- match(seq_training_interval, seq_complete_interval)
33 | validation$id <- match(seq_validation_interval, seq_complete_interval)
34 | learn$id <- match(seq_learning_interval, seq_complete_interval)
35 | test$id  <- match(seq_testing_interval, seq_complete_interval)
36 | 
37 | # Calendar variables for the period
38 | calendar <- NULL
39 | calendar$dweek <- lubridate::wday(seq_complete_interval)
40 | calendar$dweek <- (calendar$dweek - 1) + ((calendar$dweek - 1) == 0)*7
41 | 
42 | calendar$periodOfDay <- 2*(lubridate::hour(seq_complete_interval) + 1) - (lubridate::minute(seq_complete_interval) == 0)
43 | calendar$tyear <- lubridate::yday(seq_complete_interval)
44 | calendar$periodOfWeek <- (calendar$dweek - 1)*48 + calendar$periodOfDay
45 | calendar$year <- lubridate::year(seq_complete_interval)
46 | 
47 | periodOfCycle <- calendar$periodOfWeek
48 | periodOfCycle[which(periodOfCycle %in% seq(1, 5*48))] <- 1  
49 | periodOfCycle[which(periodOfCycle %in% seq(5*48 + 1, 6*48))] <- 2
50 | periodOfCycle[which(periodOfCycle %in% seq(6*48 + 1, 7*48))] <- 3  
51 | calendar$periodOfCycle <- periodOfCycle
52 | 
53 | hol2009England  <- c("2009-1-01", "2009-4-10", "2009-4-13", "2009-5-04", "2009-5-25", "2009-8-31", "2009-12-25", "2009-12-28")
54 | hol2010England  <- c("2010-1-01", "2010-4-2", "2010-4-5", "2010-5-3", "2010-5-31", "2010-8-30", "2010-12-27", "2010-12-28")
55 | holEngland <- ymd(c(hol2009England, hol2010England))
56 | 
57 | hol2009Scotland <- c("2009-1-01", "2009-1-02", "2009-4-10", "2009-5-04", "2009-5-25",  "2009-8-3", "2009-12-25", "2009-12-28")
58 | hol2010Scotland <- c("2010-1-01", "2010-1-02", "2010-1-03", "2010-1-04", "2010-4-2", "2010-5-3", "2010-5-31", "2010-8-2", "2010-12-27", "2010-12-28")
59 | holScotland     <- ymd(c(hol2009Scotland, hol2010Scotland))
60 | 
61 | makeHol <- function(holset){
62 |   daysInPeriod <- seq(int_start(complete_interval), int_end(complete_interval), by = "day")
63 |   typeday <- rep("WD", length(daysInPeriod))
64 |   
65 |   id <- na.omit(match(holset, daysInPeriod))
66 |   typeday[id] <- "NWD"
67 |   
68 |   dayOfWeek <- lubridate::wday(daysInPeriod)
69 |   typeday[which(dayOfWeek == 7 | dayOfWeek == 1)] <- "NWD"
70 |   
71 |   hol  <- as.numeric(typeday !="WD" & dayOfWeek != 7 & dayOfWeek != 1)
72 |   hols <- numeric(length(hol))
73 |   
74 |   for(i in 2:(length(hol)-1))
75 |   {
76 |     if(hol[i-1] == 1 & hol[i] == 0)
77 |       hols[i] <- 1
78 |     else if(hol[i+1] == 1 & hol[i] == 0)
79 |       hols[i] <- -1
80 |   }
81 |   hols <- hols + 2*hol
82 |   hols <- factor(hols, levels = c(0,-1,2,1), labels = c("Normal","Day before","Holiday","Day after"))
83 |   return(hols)
84 | }
85 | 
86 | #calendar$holidayEngland   <- rep(makeHol(holEngland),  each = 48)
87 | #calendar$holidayScotland  <- rep(makeHol(holScotland), each = 48)
88 | myhol <- ymd(intersect(c(hol2009England, hol2010England), c(hol2009Scotland, hol2010Scotland)))
89 | calendar$holiday  <- rep(makeHol(myhol),  each = 48)
90 | 


--------------------------------------------------------------------------------
/smart-meters/code/init_1_rawmeters.R:
--------------------------------------------------------------------------------
 1 | # This script loads the raw data and generates multiple (raw) Rdata files 
 2 | # (metadata, geographical information, smart meter observations, etc)
 3 | 
 4 | rm(list = ls())
 5 | source("config_paths.R")
 6 | 
 7 | library(data.table)
 8 | library(dplyr)
 9 | library(lubridate)
10 | library(gdata)
11 | library(parallel)
12 | 
13 | 
14 | # Create Rdata files and save them in S3
15 | DT <- fread(file.path(rawdata.folder, "edrp_elec.csv"))
16 | DT <- DT %>% rename(IDMETER = ANON_ID) 
17 | 
18 | ######### GEO DATA and META DATA #########
19 | metaDT <- tbl_df(read.xls(file.path(rawdata.folder, "edrp_metadata.xlsx"), nrows = 14319, skip = 1)) %>%
20 |   mutate_each(funs(ymd_hm), firstAdvance) %>% 
21 |   mutate_each(funs(ymd_hm), lastAdvance) %>%
22 |   rename(IDMETER = Hhold_ID) 
23 | 
24 | geodemoDT  <- tbl_df(read.xls(file.path(rawdata.folder, "edrp_geography_data.xlsx"))) %>% 
25 |   rename(IDMETER = anonID) %>% # replace "--" entries in NUTS1 by "---" and in NUTS4 by "-------"
26 |   mutate(NUTS1 = ifelse(NUTS1 == "--", "---", as.character(NUTS1))) %>%
27 |   mutate(NUTS4 = ifelse(NUTS4 == "--" | NUTS4 == "", paste(NUTS1, "----", sep = ""), as.character(NUTS4))) %>%
28 |   mutate(NUTS2 = substr(NUTS4, 1, 4)) %>%
29 |   mutate(NUTS3 = substr(NUTS4, 1, 5))
30 | 
31 | geodemoDT <- geodemoDT %>% 
32 |   mutate(ACORN_Category = ifelse(ACORN_Category == "" | is.na(ACORN_Category), "-", ACORN_Category)) %>%
33 |   mutate(ACORN_Group = as.numeric(ifelse(ACORN_Group == "" | is.na(ACORN_Group), "99", ACORN_Group))) %>%
34 |   mutate(ACORN_Type = as.numeric(ifelse(ACORN_Type == "" | is.na(ACORN_Type), "99", ACORN_Type))) %>% 
35 |   mutate(DEMO1 = paste("D", ACORN_Category, sep = ""), 
36 |          DEMO2 = paste(DEMO1, sprintf("%02d", ACORN_Group), sep = ""), 
37 |          DEMO3 = paste(DEMO2, sprintf("%02d", ACORN_Type), sep = ""))
38 | 
39 | # infoDT <- inner_join(metaDT, geodemoDT, by = "IDMETER") # 14319 meters (not 14621 meters = 16249 - 1628)
40 | infoDT <- inner_join(metaDT, geodemoDT, by = "IDMETER")
41 | 
42 | 
43 | allmeters <- infoDT %>% dplyr::select(IDMETER) %>% .$IDMETER
44 | 
45 | print("Making info file")
46 | # Create the info file
47 | save(file = file.path(work.folder, "info.Rdata") , list = c("infoDT", "allmeters"))
48 | 
49 | # Create a file for each meter
50 | do.it <- TRUE
51 | if(do.it){
52 |   print("Making files for each meter")
53 |   setmeters <- allmeters
54 |   
55 |   setmeters <- allmeters[6685:length(allmeters)]
56 |   
57 |   res <- lapply(setmeters, function(idmeter){
58 |     
59 |     if(idmeter%%100 == 0)
60 |     print(idmeter)
61 |     
62 |     infoMeter <- infoDT %>% filter(IDMETER == idmeter) %>% select(firstAdvance, lastAdvance) 
63 |     firstAdvance <- infoMeter %>% .$firstAdvance
64 |     lastAdvance  <- infoMeter %>% .$lastAdvance
65 |     alldates <- seq(firstAdvance, lastAdvance, by = "30 min")
66 |     
67 |     navec <- rep(NA, length(alldates))
68 |     dataset <- tbl_df(data.frame(ELECKWH = navec)) # ELECKWH	
69 |     
70 |     #dataset <- tbl_df(data.frame(TIME = alldates, HH = 2*(hour(alldates) + 1) - (minute(alldates) == 0), ELECKWH = navec)) # TIME HH ELECKWH	
71 |     
72 |     meterdata <- filter(DT, IDMETER == idmeter) %>% 
73 |       mutate_each(funs(dmy_hms), ADVANCEDATETIME) %>%
74 |       arrange(ADVANCEDATETIME) # IDMETER ADVANCEDATETIME HH ELECKWH
75 |     
76 |     datetime <- select(meterdata, ADVANCEDATETIME) %>% .$ADVANCEDATETIME
77 |     index <- match(datetime, alldates)
78 |     stopifnot(all(!is.na(index)))
79 |     
80 |     dataset[index, c("ELECKWH")] <- select(meterdata, ELECKWH)
81 |     
82 |     #dataset[index, c("TIME", "ELECKWH")] <- select(meterdata, ADVANCEDATETIME, ELECKWH)
83 |     #dataset <- dataset %>% 						
84 |     #  mutate(year = year(TIME)) %>% 
85 |     #  mutate(month = month(TIME)) %>% 
86 |     #  mutate(day = day(TIME)) %>%
87 |     #  mutate(dayOfWeek = lubridate::wday(TIME, label = T)) %>%
88 |     #  mutate(dayOfYear = yday(TIME))
89 |     
90 |     stopifnot(nrow(dataset) > 0)
91 |     save(file = file.path(initmeters.folder, paste("meter-", idmeter, ".Rdata", sep = "")) , list = c("dataset"))
92 |   })
93 | }
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/smart-meters/code/init_2_MyHierarchy.R:
--------------------------------------------------------------------------------
  1 | # This script builds the hierarchy (based on geographical information). 
  2 | # It also cleans the data and select the meters with the lowest number of
  3 | # missing values.
  4 | 
  5 | rm(list = ls())
  6 | library(lubridate)
  7 | library(gdata)
  8 | library(dplyr)
  9 | library(igraph)
 10 | source("config_paths.R")
 11 | source("config_splitting.R")
 12 | 
 13 | makelist <- function(vecintervals){
 14 |   sol <- lapply(seq(length(vecintervals)), function(i){list(vecintervals[i])})
 15 |   mylist <- lapply(sol, "[[", 1)
 16 | }
 17 | 
 18 | load(file.path(work.folder, "info.Rdata"))
 19 | 
 20 | # "UKG" "UKL" "UKJ" "UKI" "UKM" "UKF" "---" "UKK" "UKD"
 21 | myregion <- "UKF"
 22 | 
 23 | subInfoDT <- infoDT %>%
 24 |   filter(NUTS1 %in% myregion)
 25 | 
 26 | allintervals <- subInfoDT %>%
 27 |   transmute(interval = lubridate::interval(firstAdvance, lastAdvance)) %>%
 28 |   .$interval
 29 | 
 30 | listintervals <- makelist(allintervals)
 31 | 
 32 | myinterval <- interval(startObs, endObs)
 33 | seq_myinterval <- seq(startObs, endObs, by = "30 min")
 34 | 
 35 | matches <- lapply(listintervals, function(oneinterval){ lubridate::intersect(oneinterval, myinterval) == myinterval })
 36 | metersInInterval <- subInfoDT[which(unlist(matches)), ] %>% .$IDMETER
 37 | print(length(metersInInterval))
 38 | 
 39 | 
 40 | pctFound <- n <- n_na <- n_expected <- n_avail <- numeric(length(metersInInterval)) + NA
 41 | listmissing <- vector("list", length(metersInInterval))
 42 | for(i in seq_along(metersInInterval)){
 43 |   print(i)
 44 |   meter <- metersInInterval[i]
 45 |   
 46 |   infoMeter <- subInfoDT %>% filter(IDMETER == meter) %>% select(firstAdvance, lastAdvance) 
 47 |   firstAdvance <- infoMeter %>% .$firstAdvance
 48 |   lastAdvance  <- infoMeter %>% .$lastAdvance
 49 |   alldates <- seq(firstAdvance, lastAdvance, by = "30 min")
 50 |   ids <- match(seq_myinterval, alldates) 
 51 |   stopifnot(all(!is.na(ids)))
 52 | 
 53 |   load(file.path(initmeters.folder, paste("meter-", meter, ".Rdata", sep = "")))
 54 |   n[i] <- nrow(dataset)
 55 |   n_expected[i] <- length(alldates)
 56 |   n_na[i] <- length(which(is.na(dataset[ids, 1])))
 57 |   n_avail[i] <- n_expected[i] - n_na[i]
 58 |   pctFound[i] <- 1 - (n_na[i]/n[i])
 59 | }
 60 | 
 61 | id <- which(pctFound > 0.99)
 62 | finalmeters <- metersInInterval[id]
 63 | 
 64 | # Keep meters with few missing values
 65 | x <- subInfoDT %>% filter(IDMETER %in% finalmeters)
 66 | 
 67 | # Keep meters with complete NUTS information
 68 | x <- x %>% filter(!grepl("-", NUTS4))
 69 | 
 70 | # Remove few weird meters
 71 | x <- x %>% filter(!IDMETER %in% c(6228, 13154, 9503))
 72 | 
 73 | # Some meter with high consumption during the night 
 74 | x <- x %>% filter(!IDMETER %in% c(12874L, 6951L, 14738L, 925L, 8255L))
 75 | 
 76 | # Each node must have at least two children nodes (NUTS HIERARCHY)
 77 | idset <- which(x[, "NUTS4"] == "UKF2100")
 78 | res <- split(idset, c(1,2))
 79 | x[res[[1]], "NUTS4"] <- "UKF2100"
 80 | x[res[[2]], "NUTS4"] <- "UKF2101"
 81 | 
 82 | idset <- which(x[, "NUTS4"] == "UKF2202")
 83 | res <- split(idset, c(1,2))
 84 | x[res[[1]], "NUTS4"] <- "UKF2202"
 85 | x[res[[2]], "NUTS4"] <- "UKF2209"
 86 | 
 87 | idset <- which(x[, "NUTS4"] == "UKF1100")
 88 | res <- split(idset, c(1,2))
 89 | x[res[[1]], "NUTS4"] <- "UKF1100"
 90 | x[res[[2]], "NUTS4"] <- "UKF1101"
 91 | 
 92 | idset <- which(x[, "NUTS4"] == "UKF1400")
 93 | res <- split(idset, c(1,2))
 94 | x[res[[1]], "NUTS4"] <- "UKF1400"
 95 | x[res[[2]], "NUTS4"] <- "UKF1401"
 96 | 
 97 | for(mynuts4 in c("UKF3004", "UKF3006")){
 98 |   idset <- which(x[, "NUTS4"] == mynuts4)
 99 |   x[idset, "NUTS4"] <- paste("UKF31", substr(mynuts4, 6,7), sep = "")
100 |   x[idset, "NUTS3"] <- "UKF31"
101 | }
102 | 
103 | # Remove some branches in DEMO HIERARCHY
104 | x <- x %>% filter(DEMO2 != "D517", DEMO1 != "D2")
105 | 
106 | # Save myinfo.Rdata
107 | myinfoDT <- x
108 | bottomSeries <- myinfoDT %>% .$IDMETER
109 | n_bottom <- length(bottomSeries)
110 | 
111 | myedges <- data.frame(rbind(cbind(myinfoDT$NUTS1,myinfoDT$NUTS2), 
112 |                             cbind(myinfoDT$NUTS2, myinfoDT$NUTS3), 
113 |                             cbind(myinfoDT$NUTS3, myinfoDT$NUTS4), 
114 |                             cbind(myinfoDT$NUTS4, myinfoDT$IDMETER)))
115 | itree <- graph.data.frame(myedges)
116 | itree <- simplify(itree, remove.loops = F)
117 | # plot(itree, layout = layout.reingold.tilford(itree, root=1, circular=T), vertex.label.cex = 0.4, vertex.size = 1, vertex.label.dist = .2)
118 | # MUCH BETTER: plot(itree, layout = layout.reingold.tilford(itree, root=1, circular=T), vertex.size=0, vertex.label=NA, edge.arrow.size=0)
119 | #browser()
120 | 
121 | # Compute Sagg - for each agg. node, compute the associated leafs
122 | all.nodes.names <- V(itree)$name
123 | agg.nodes.names <- aggSeries <- all.nodes.names[which(degree(itree, V(itree), "out")!=0)]
124 | n_agg <- length(agg.nodes.names)
125 | Sagg <- matrix(0, nrow = n_agg, ncol = n_bottom)
126 | 
127 | for(i in seq_along(agg.nodes.names)){
128 |   agg.node.name <- agg.nodes.names[i]
129 |   reachable <- which(shortest.paths(itree, agg.node.name, mode="out") != Inf)
130 |   terminal.nodes <- reachable[which(degree(itree, reachable, mode="out") == 0)]
131 |   #print(terminal.nodes)
132 |   terminal.nodes.names <- all.nodes.names[terminal.nodes]
133 |   #myinfoDT %>% filter(IDMETER %in% all.nodes.names[terminal.nodes]) %>% select(NUTS4)
134 |   ids <- match(terminal.nodes.names, bottomSeries)
135 |   stopifnot(all(!is.na(ids)))
136 |   Sagg[i, ids] <- 1
137 | }
138 | 
139 | save(file = file.path(work.folder, "myinfo.Rdata") , list = c("myinfoDT", "bottomSeries", "itree", "Sagg", "aggSeries", "n_agg", "n_bottom"))
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/smart-meters/code/init_3_bottomlevel_meters.R:
--------------------------------------------------------------------------------
  1 | # This script will load the Rdata files for the smart meters and preprocess it. 
  2 | # A new Rdata file will be saved with the "cleaned" data (one per smart meter).
  3 | 
  4 | rm(list = ls())
  5 | library(lubridate)
  6 | library(gdata)
  7 | library(dplyr)
  8 | source("config_paths.R")
  9 | source("config_splitting.R")
 10 | 
 11 | load(file.path(work.folder, "myinfo.Rdata"))
 12 | 
 13 | ################ Computing the replecements id for special days ################
 14 | holidays_toprocess <- c("2009-5-04", "2009-5-25", "2009-8-31", "2009-12-25", 
 15 |                         "2009-12-28", "2010-1-01", "2010-4-2", "2010-4-5", "2010-5-3", "2010-5-31")
 16 | 
 17 | index_final <- seq(length(seq_complete_interval))
 18 | 
 19 | for(i in seq_along(holidays_toprocess)){
 20 |   
 21 |   target <- ymd(holidays_toprocess[i])
 22 |   
 23 |   idhours <- which(year(seq_complete_interval) == year(target) & 
 24 |                      month(seq_complete_interval) ==  month(target) & 
 25 |                      day(seq_complete_interval) == day(target))
 26 |   
 27 |   stopifnot(length(idhours) == 48)
 28 |   
 29 |   if(i %in% c(1, 2, 3, 8, 9, 10)){
 30 |     id <- 48
 31 |   }else if(i %in% c(4, 6, 7)){
 32 |     id <- -48
 33 |   }else if(i == 5){
 34 |     id <- -48*2
 35 |   }
 36 |   #1. Replace mon by tue \\ +48
 37 |   #2. Replace mon by tue \\ +48
 38 |   #3. Replace mon by tue \\ +48
 39 |   #4. 24/12 replaced by 23nd \\ -48
 40 |   #5. 25/12 replaced by 23nd \\ -48 *2
 41 |   #6. 1/1   replaced by 31/12 \\ -48
 42 |   #7. 24/4  Fri by Thu \\ -48
 43 |   #8. 5/4   Mon by Tue \\ +48
 44 |   #9. 3/5    same \\ +48
 45 |   #10. 31/5  same \\ +48
 46 |   index_final[idhours] <- index_final[idhours + id]	
 47 | }
 48 | ################################################################################
 49 | 
 50 | for(idseries in bottomSeries){
 51 |   print(idseries)
 52 |   load(file.path(initmeters.folder, paste("meter-", idseries, ".Rdata", sep = "")))
 53 |   
 54 |   ###
 55 |   infoMeter <- myinfoDT %>% filter(IDMETER == idseries) %>% select(firstAdvance, lastAdvance) 
 56 |   firstAdvance <- infoMeter %>% .$firstAdvance
 57 |   lastAdvance  <- infoMeter %>% .$lastAdvance
 58 |   alldates <- seq(firstAdvance, lastAdvance, by = "30 min")
 59 |   ids <- match(seq_complete_interval, alldates) 
 60 |   stopifnot(all(!is.na(ids)))
 61 |   
 62 |   load(file.path(initmeters.folder, paste("meter-", idseries, ".Rdata", sep = "")))
 63 |   obs <- dataset[ids,] %>% .$ELECKWH
 64 |   npoints <- length(obs)
 65 |   ###
 66 |   
 67 |   ################ Filling missing observations ###############
 68 |   id_na <- which(is.na(obs))
 69 |   if(length(id_na) > 0){
 70 |     
 71 |     for(id in id_na){
 72 |       
 73 |       idrepl <- id + c(-48 *2, - 48, -2, -1, 1, 2, 48, 48 * 2)
 74 |       
 75 |       idok <- idrepl[which(idrepl <= npoints & idrepl > 0)]
 76 |       stopifnot(length(idok) > 0)
 77 |       
 78 |       repl <- obs[idok]
 79 |       repl <- repl[which(!is.na(repl))]
 80 |       stopifnot(length(repl) > 0)
 81 |       
 82 |       val <- median(repl)
 83 |       obs[id] <- val
 84 |     }
 85 |   }
 86 |   
 87 |   # Replacing special days
 88 |   obs <- obs[index_final]
 89 |   demand <- obs
 90 |   
 91 |   #if(idseries == bottomSeries[272]){
 92 |   #  stop("done")  
 93 |   #}
 94 |   
 95 |   #if(FALSE){
 96 |   # reverting the discretaziation process
 97 |   breaks  <- sort(unique(demand))
 98 |   sds <- numeric(length(breaks)) 
 99 |   for(i in seq_along(breaks)){
100 |     if(i == 1){ # some have a lot of repetitions of the min
101 |       sds[i] <- mean(c((breaks[i] - 0)/3, (breaks[i+1] - breaks[i])/3))
102 |       
103 |     }else if(i == length(breaks)){ # maximum 3 reptitions of the max (so, not important)
104 |       sds[i] <- (breaks[i] - breaks[i-1])/10
105 |     }else{ 
106 |       #(i != 1 && i != length(breaks)){
107 |       
108 |       sds[i] <- mean(c((breaks[i] - breaks[i -1])/3, (breaks[i+1] - breaks[i])/3))
109 |     }
110 |   }
111 |   demand_perturbed <- rnorm(length(demand), mean = demand, sd = sds[match(demand, breaks)])
112 |   demand_perturbed[which(demand_perturbed < 0)] <- 0 # very few will be negative
113 |   
114 |   demand <- demand_perturbed
115 |   #}
116 |   
117 |   save(file = file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")) , list = c("demand"))
118 | }
119 | 
120 | 
121 | if(FALSE){
122 |   for(idseries in bottomSeries){
123 |     print(idseries)
124 |     load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
125 |     load(file.path(initmeters.folder, paste("meter-", idseries, ".Rdata", sep = "")))
126 |     
127 |     infoMeter <- myinfoDT %>% filter(IDMETER == idseries) %>% select(firstAdvance, lastAdvance) 
128 |     firstAdvance <- infoMeter %>% .$firstAdvance
129 |     lastAdvance  <- infoMeter %>% .$lastAdvance
130 |     alldates <- seq(firstAdvance, lastAdvance, by = "30 min")
131 |     ids <- match(seq_complete_interval, alldates) 
132 |     
133 |     myid <- 48 * 7 * 20 + seq(48 * 7 * 2)
134 |     
135 |     
136 |     
137 |     x <- dataset[ids[myid], 1] %>% .$ELECKWH
138 |     plot.ts(demand[myid])
139 |     lines(x, col = "red")
140 |   }
141 |   dev.off()
142 | }
143 | 


--------------------------------------------------------------------------------
/smart-meters/code/init_4_aggregated_meters.R:
--------------------------------------------------------------------------------
 1 | # This script will load the cleaned data computed using the script "init_3_bottomlevel_meters.R".
 2 | # It will produce the aggregated series based on the hierarchy defined in "init_2_MyHierarchy.R".
 3 | # These series will be saved in seperate Rdata files.
 4 | 
 5 | rm(list = ls())
 6 | library(dplyr)
 7 | source("config_paths.R")
 8 | 
 9 | 
10 | load(file.path(work.folder, "myinfo.Rdata"))
11 | # "myinfoDT", "bottomSeries", "itree", "Sagg", "aggSeries", "n_agg", "n_bottom"
12 | 
13 | #hierarchy <- "NUTS"
14 | 
15 | nobs <- 22464
16 | 
17 | bottom_series <- matrix(NA, nrow = nobs, ncol = n_bottom)
18 | for(j in seq_along(bottomSeries)){
19 |   if(j %% 100 == 0)
20 |     print(j)
21 |   
22 |   idseries <- bottomSeries[j]
23 |   load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
24 |   bottom_series[, j] <- demand
25 | }
26 | stopifnot(all(!apply(apply(bottom_series, 2, is.na), 2, any)))
27 | 
28 | 
29 | agg_series <- tcrossprod(Sagg, bottom_series)
30 | 
31 | for(iagg in seq(n_agg)){
32 |   demand <- agg_series[iagg, ]
33 |   code <- aggSeries[iagg]
34 |   save(file = file.path(aggseries.folder, paste("series-", code, ".Rdata", sep = "")) , list = c("demand"))
35 | }
36 | 
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/smart-meters/code/makebf_byhalfour.R:
--------------------------------------------------------------------------------
  1 | # This script gives a new representation of the base forecasts to simplify the computational load
  2 | # when applying the permutations. It creates new Rdata files where each file gives the forecasts by half-hour.
  3 | rm(list = ls())
  4 | 
  5 | source("config_paths.R")
  6 | source("config_general.R")
  7 | source("config_splitting.R")
  8 | source("utils.R")
  9 | 
 10 | library(ff)
 11 | load(file.path(work.folder, "myinfo.Rdata"))
 12 | 
 13 | ntest <- length(test$id)
 14 | 
 15 | QF_bottom <- vector("list", length(bottomSeries))
 16 | QF_agg <- vector("list", length(aggSeries))
 17 | 
 18 | obs_agg    <- revisedmean_agg    <- mean_agg <- matrix(NA, nrow =  length(aggSeries), ncol = ntest)
 19 | obs_bottom <- revisedmean_bottom <- mean_bottom <- matrix(NA, nrow =  length(bottomSeries), ncol = ntest)
 20 | 
 21 | for(do.agg in c(TRUE, FALSE)){
 22 |   
 23 |   if(do.agg){
 24 |     set_series <- aggSeries
 25 |     algo <- algo.agg
 26 |   }else{
 27 |     set_series <- bottomSeries
 28 |     algo <- algo.bottom
 29 |   }
 30 |   
 31 |   for(j in seq_along(set_series)){
 32 |    # if(j%%100 == 0)
 33 |     print(j)
 34 |     
 35 |     if(do.agg){
 36 |       QF_agg[[j]] <- matrix(NA, nrow = length(taus), ncol = ntest)
 37 |     }else{
 38 |       QF_bottom[[j]] <- matrix(NA, nrow = length(taus), ncol = ntest)
 39 |     }
 40 |     
 41 |     idseries <- set_series[j]
 42 |     
 43 |     load(file.path(basef.folder, algo, paste("results_", idseries, "_", algo, ".Rdata", sep = "")))
 44 | 
 45 |     load(file.path(work.folder, "revisedf", paste("revised_meanf_", algo.agg, "_", algo.bottom, "_", idseries, ".Rdata", sep = "")) )
 46 |     # mu_revised_alltest
 47 | 
 48 |     if(do.agg){
 49 |       load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = "")))
 50 |       obs_agg[j, ] <- demand[test$id]
 51 |       revisedmean_agg[j, ] <- mu_revised_alltest
 52 |       mean_agg[j, ] <- unlist(all_mf)
 53 |     }else{
 54 |       load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
 55 |       obs_bottom[j, ] <- demand[test$id]
 56 |       revisedmean_bottom[j, ] <- mu_revised_alltest
 57 |       mean_bottom[j, ] <- unlist(all_mf)
 58 |     }
 59 |     
 60 |     
 61 |     for(idtest in seq(ntest)){
 62 |       iday <- getInfo(idtest)$iday
 63 |       hour <- getInfo(idtest)$hour
 64 |       
 65 |       if(do.agg){
 66 |         QF_agg[[j]][, idtest] <- all_qf[[iday]][, hour]
 67 |       }else{
 68 |         QF_bottom[[j]][, idtest] <- all_qf[[iday]][, hour]
 69 |       }
 70 |     }# idtest
 71 |   }# series
 72 | }# AGG and BOTTOM
 73 | 
 74 | #stop("done")
 75 | dir.create(file.path(work.folder, "byidtest"), recursive = TRUE, showWarnings = FALSE)
 76 | 
 77 | for(idtest in seq(ntest)){
 78 |   print(idtest)
 79 |   res_byidtest_file <- file.path(work.folder, "byidtest", paste("results_byidtest_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = ""))
 80 |   
 81 |   QF_agg_idtest <- sapply(seq(length(aggSeries)), function(j){
 82 |     QF_agg[[j]][, idtest]
 83 |   })
 84 |   
 85 |   QF_bottom_idtest <- sapply(seq(length(bottomSeries)), function(j){
 86 |     QF_bottom[[j]][, idtest]
 87 |   })
 88 |   
 89 |   obs_agg_idtest <- obs_agg[, idtest]
 90 |   obs_bottom_idtest <- obs_bottom[, idtest]
 91 |   
 92 |   revisedmean_bottom_idtest <- revisedmean_bottom[, idtest]
 93 |   revisedmean_agg_idtest    <- revisedmean_agg[, idtest]
 94 |   
 95 |   mean_bottom_idtest <- mean_bottom[, idtest]
 96 |   mean_agg_idtest    <- mean_agg[, idtest]
 97 | 
 98 |   save(file = res_byidtest_file, list = c("QF_agg_idtest", "QF_bottom_idtest", 
 99 |                                           "obs_agg_idtest", "obs_bottom_idtest",
100 |                                           "revisedmean_bottom_idtest", 'revisedmean_agg_idtest',
101 |                                           "mean_agg_idtest", "mean_bottom_idtest"))    
102 | }
103 | 


--------------------------------------------------------------------------------
/smart-meters/code/multiplot.R:
--------------------------------------------------------------------------------
 1 | # Multiple plot function
 2 | #
 3 | # ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
 4 | # - cols:   Number of columns in layout
 5 | # - layout: A matrix specifying the layout. If present, 'cols' is ignored.
 6 | #
 7 | # If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
 8 | # then plot 1 will go in the upper left, 2 will go in the upper right, and
 9 | # 3 will go all the way across the bottom.
10 | #
11 | multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
12 |   library(grid)
13 |   
14 |   # Make a list from the ... arguments and plotlist
15 |   plots <- c(list(...), plotlist)
16 |   
17 |   numPlots = length(plots)
18 |   
19 |   # If layout is NULL, then use 'cols' to determine layout
20 |   if (is.null(layout)) {
21 |     # Make the panel
22 |     # ncol: Number of columns of plots
23 |     # nrow: Number of rows needed, calculated from # of cols
24 |     layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
25 |                      ncol = cols, nrow = ceiling(numPlots/cols))
26 |   }
27 |   
28 |   if (numPlots==1) {
29 |     print(plots[[1]])
30 |     
31 |   } else {
32 |     # Set up the page
33 |     grid.newpage()
34 |     pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
35 |     
36 |     # Make each plot, in the correct location
37 |     for (i in 1:numPlots) {
38 |       # Get the i,j matrix positions of the regions that contain this subplot
39 |       matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
40 |       
41 |       print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
42 |                                       layout.pos.col = matchidx$col))
43 |     }
44 |   }
45 | }


--------------------------------------------------------------------------------
/smart-meters/code/nicefigs.R:
--------------------------------------------------------------------------------
 1 | # Create nice R figures
 2 | 
 3 | savepdf <- function(file, width=16, height=10)
 4 | {
 5 |   .._fname <- paste(file,".pdf",sep="")
 6 |   pdf(.._fname, width=width/2.54, height=height/2.54,
 7 |       pointsize=10)
 8 |   par(mgp=c(2.2,0.45,0), tcl=-0.4, mar=c(3.3,3.6,1.1,1.1))
 9 |   assign(".._fname",.._fname,envir=.GlobalEnv)
10 | }
11 | endpdf <- function()
12 | {
13 |   dev.off()
14 |   system(paste("pdfcrop",.._fname,.._fname))
15 | }
16 | 


--------------------------------------------------------------------------------
/smart-meters/code/permutations.R:
--------------------------------------------------------------------------------
 1 | # This script computes the permutations to apply to the samples from the marginals 
 2 | # in order to restore the dependences among the variables.
 3 | rm(list = ls())
 4 | source("config_paths.R")
 5 | source("config_general.R")
 6 | source("config_splitting.R")
 7 | source("utils.R")
 8 | #library(parallel)
 9 | library(igraph)
10 | 
11 | load(file.path(work.folder, "myinfo.Rdata"))
12 | 
13 | # compute the parsing order of the aggregate nodes
14 | leaves <- V(itree)[degree(itree, mode="out") == 0]
15 | agg_nodes <- V(itree)[degree(itree, mode="out") != 0]
16 | 
17 | list_matpermutations <- list_vecties <- vector("list", length(agg_nodes))
18 | 
19 | for(inode in seq_along(agg_nodes)){
20 |   agg_node <- agg_nodes[inode]
21 |   idseries_agg <- names(agg_node)
22 |   children_nodes <- ego(itree, order = 1, nodes = agg_node, mode = "out")[[1]][-1]
23 |   print(children_nodes)
24 |     
25 |     mat_residuals <- sapply(seq_along(children_nodes), function(inode){
26 |       child_node <- children_nodes[inode]
27 |       isbottom <- (child_node %in% leaves)
28 |       idseries <- names(child_node)
29 |       if(isbottom){
30 |         resid_file <- file.path(insample.folder, algo.bottom, paste("residuals_", idseries, "_", algo.bottom, ".Rdata", sep = "")) 
31 |         load(resid_file)
32 |         e_residuals <- c(rep(NA, n_past_obs_kd), e_residuals)
33 |       }else{
34 |         resid_file <- file.path(insample.folder, algo.agg, paste("residuals_", idseries, "_", algo.agg, "_", 1, ".Rdata", sep = "")) 
35 |         load(resid_file)
36 |         #e_residuals
37 |       }
38 |       e_residuals
39 |     })
40 |     
41 |     # remove the first few rows (not available for KD)
42 |     mat_residuals <- tail(mat_residuals, -n_past_obs_kd)
43 |     
44 |     #hour_alli <- getInfo(tail(learn$id, -n_past_obs_kd))$hour
45 |     id_all <- tail(learn$id, -n_past_obs_kd)
46 |     
47 |     # compute ranks by time of day
48 |     n_resid <- nrow(mat_residuals)
49 |     stopifnot(n_resid %% 48 == 0)
50 |     
51 |     vec_ties <- sapply(seq(ncol(mat_residuals)), function(j){
52 |       (nrow(mat_residuals) - length(unique(mat_residuals[, j]))) / nrow(mat_residuals)
53 |     }) * 100
54 |     
55 |     mat_residuals <- tail(mat_residuals, M) # 5760 instead of 15168
56 |     mat_permutations <- apply(mat_residuals, 2, rank, ties.method = "random")
57 |     colnames(mat_permutations) <- names(children_nodes)
58 | 
59 |     list_matpermutations[[inode]] <- mat_permutations
60 |     list_vecties[[inode]] <- vec_ties
61 |     #perm_file <- file.path(permutations.folder, paste("perm_", algo.agg, "_", algo.bottom, "_", idseries_agg, ".Rdata", sep = "")) 
62 |     #save(file = perm_file, list = c("mat_permutations", "vec_ties"))
63 | }
64 | 
65 | list_matpermutations <- setNames(list_matpermutations, names(agg_nodes))
66 | list_vecties         <- setNames(list_vecties, names(agg_nodes))
67 | perm_file <- file.path(permutations.folder, paste("perm_", algo.agg, "_", algo.bottom, ".Rdata", sep = "")) 
68 | save(file = perm_file, list = c("list_matpermutations", "list_vecties"))
69 | 


--------------------------------------------------------------------------------
/smart-meters/code/plot_calendar_effects.R:
--------------------------------------------------------------------------------
  1 | # This script produces the figure showing the electricity demand for each day of the week, and various aggregations.
  2 | rm(list = ls())
  3 | source("config_paths.R")
  4 | source("config_general.R")
  5 | source("config_splitting.R")
  6 | source("utils.R")
  7 | 
  8 | load(file.path(work.folder, "myinfo.Rdata"))
  9 | 
 10 | node_nbkids <- apply(Sagg, 1, sum)
 11 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix
 12 | 
 13 | day_dweek <- matrix(calendar$dweek, ncol = 48, byrow = T)[, 1]
 14 | dweeks <- unique(day_dweek)
 15 | 
 16 | if(FALSE){
 17 |   # all meters
 18 |   savepdf(file.path(results.folder, paste("cycle_all_meters", sep = "")))
 19 |   par(mfrow = c(2, 2))
 20 |   set_ids <- seq(n_bottom)
 21 |   for(i in set_ids){
 22 |     idseries <- bottomSeries[i]
 23 |     load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
 24 |     demand <- demand/max(demand)
 25 |     demand_day <- matrix(demand, ncol = 48, byrow = T)
 26 |     res <- sapply(dweeks, function(dweek){
 27 |       apply(demand_day[which(day_dweek == dweek), ], 2, mean)
 28 |     })
 29 |     matplot(res, type = 'l', lty = 1)
 30 |   }
 31 |   endpdf()
 32 |   
 33 |   
 34 |   # average over all meters
 35 |   MAT <- array(NA, c(n_bottom, 48, 7)) 
 36 |   #matrix(NA, nrow = n_bottom, ncol = 48)
 37 |   
 38 |   for(iseries in seq(n_bottom)){
 39 |     idseries <- bottomSeries[iseries]
 40 |     load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
 41 |     demand <- demand/max(demand)
 42 |     demand_day <- matrix(demand, ncol = 48, byrow = T)
 43 |     
 44 |     res <- sapply(dweeks, function(dweek){
 45 |       apply(demand_day[which(day_dweek == dweek), ], 2, mean)
 46 |     })
 47 |     MAT[iseries, , ] <- res
 48 |   }
 49 |   
 50 |   savepdf(file.path(results.folder, paste("cycle_meters", sep = "")), height = 27 * 0.25, width = 21)
 51 |   par(mfrow = c(1, 3))
 52 |   
 53 |   # average over meters
 54 |   v <- apply(MAT, c(2, 3), mean)
 55 |   matplot(v, type = 'l', lty = 1, xlab = "", ylab = "Electricity demand (scaled)")
 56 |   
 57 |   set_ids <- c(1223, 1233)
 58 |   for(i in set_ids){
 59 |     idseries <- bottomSeries[i]
 60 |     load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
 61 |     demand <- demand/max(demand)
 62 |     demand_day <- matrix(demand, ncol = 48, byrow = T)
 63 |     res <- sapply(dweeks, function(dweek){
 64 |       apply(demand_day[which(day_dweek == dweek), ], 2, mean)
 65 |     })
 66 |     matplot(res, type = 'l', lty = 1, ylab = "Electricity demand (scaled)")
 67 |   }
 68 |   endpdf()
 69 | }#FALSE
 70 | 
 71 | 
 72 | ####
 73 | MAT_bottom <- array(NA, c(n_bottom, 48, 7)) 
 74 | MAT_agg <- array(NA, c(n_agg, 48, 7)) 
 75 | #matrix(NA, nrow = n_bottom, ncol = 48)
 76 | 
 77 | for(iseries in seq(n_bottom)){
 78 |   idseries <- bottomSeries[iseries]
 79 |   load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
 80 |   demand_day <- matrix(demand, ncol = 48, byrow = T)
 81 |   
 82 |   res <- sapply(dweeks, function(dweek){
 83 |     apply(demand_day[which(day_dweek == dweek), ], 2, mean)
 84 |   })
 85 |   MAT_bottom[iseries, , ] <- res
 86 | }
 87 | for(iseries in seq(n_agg)){
 88 |   idseries <- aggSeries[iseries]
 89 |   load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = "")))
 90 |   demand_day <- matrix(demand, ncol = 48, byrow = T)
 91 |   
 92 |   res <- sapply(dweeks, function(dweek){
 93 |     apply(demand_day[which(day_dweek == dweek), ], 2, mean)
 94 |   })
 95 |   MAT_agg[iseries, , ] <- res
 96 | }
 97 | 
 98 | #pdf(file.path(results.folder, paste("IC.pdf", sep = "")), width = 21 * 0.9, height = 27 * 0.5)
 99 | #par(mfrow = c(2, 3))
100 | 
101 | do.color <- TRUE
102 | if(do.color){
103 |   savepdf(file.path(results.folder, paste("IC_COLOR", sep = "")), height = 27 * 0.46)
104 |   par(mfrow = c(3, 2))
105 |   my_cex <- 1
106 |   my_pch <- c(3, 4, 5, 6, 7, 1, 2)
107 |   mycol <- c("brown", "grey", "green", "yellow", "black", "blue", "red")
108 | }else{
109 |   savepdf(file.path(results.folder, paste("IC_BLACK", sep = "")), height = 27 * 0.5)
110 |   par(mfrow = c(3, 2))
111 |   my_cex <- .7
112 |   my_pch <- c(3, 4, 5, 6, 7, 1, 2)
113 |   mycol <- "black"
114 | }
115 | 
116 | 
117 | itday <- c(1, seq(8, 48, by = 8))
118 | 
119 | # id <- node_order
120 | #id <- c(1, 4, 7, 39, 52)
121 | id <- c(1, 7, 39, 50)
122 | 
123 | 
124 | for(iagg in node_order[id]){
125 |   ind <- which(Sagg[iagg, ] == 1)
126 |   v <- MAT_agg[iagg, , ]
127 | 
128 |   matplot(v, 
129 |           ylab = "Consumption (kWh)", main = paste(length(ind), " aggregated meters" , sep = ""), 
130 |           xaxt = "n", xlab = "Time of Day",
131 |           type = 'o', pch = my_pch, col = mycol, cex = my_cex)
132 |   
133 |   
134 |   #axis(1, labels = tday[seq(1, 48, by = 8)], at = seq(1, 48, by = 8))
135 |   axis(1, labels = tday[itday], at = itday)
136 |   if(iagg == 1){
137 |     
138 |     legend("topleft", abbr.dweek[c(6, 7, 1, 2, 3, 4, 5)], 
139 |            pch = my_pch[c(6, 7, 1, 2, 3, 4, 5)], col = mycol[c(6, 7, 1, 2, 3, 4, 5)],
140 |            lty = 1, cex = my_cex, bty = "n")
141 |     
142 |   }
143 | }
144 | 
145 | #matplot(MAT[50, , ], type = 'l', lty = my_lty, ylab = "Consumption (kWh)", main = 1, xaxt = "n", xlab = "Time of Day", col = "black")
146 | matplot(MAT_bottom[50, , ], ylab = "Consumption (kWh)", main = "Individual smart meter", xaxt = "n", xlab = "Time of Day", col = "black", type = 'l')
147 | if(is.null(my_pch)){
148 |   matlines(MAT_bottom[50, , ], lty = 1, col = mycol, cex = my_cex)
149 | }else{
150 |   matpoints(MAT_bottom[50, , ], pch = my_pch, col = mycol, cex = my_cex)
151 | }
152 | axis(1, labels = tday[itday], at = itday)
153 | #axis(1, labels = tday[seq(1, 48, by = 8)], at = seq(1, 48, by = 8))
154 | 
155 | dev.off()
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/smart-meters/code/plot_coherency.R:
--------------------------------------------------------------------------------
  1 | # This script produces the figure which shows the incoherency errors.
  2 | rm(list = ls())
  3 | source("config_paths.R")
  4 | source("config_general.R")
  5 | source("config_splitting.R")
  6 | source("utils.R")
  7 | source("multiplot.R")
  8 | 
  9 | library(reshape2)
 10 | library(ggplot2)
 11 | 
 12 | nhours <- 48
 13 | 
 14 | gplot_matrix <- function(X, my_title = ""){
 15 |   nbrow <- nrow(X)
 16 |   Z <- melt(t(X))
 17 |   Zp <- Z
 18 |   Zp$Var2 <- nbrow + 1 - Zp$Var2
 19 |   
 20 |   #mybreaks <- seq(1, nhours, by = 6) 
 21 |   mybreaks <- c(1, seq(8, nhours, by = 8)) 
 22 |   #mybreaks <- c(1, seq(33, nhours, by = 48))
 23 |   mylabels <- rep(tday, nhours/48)[mybreaks]
 24 |   
 25 |   p <- ggplot(data = Zp, aes(x = Var1, y = Var2)) + 
 26 |     geom_tile(aes(fill = value), colour = "white") + 
 27 |     #scale_fill_gradient2(name = expression(log(abs(tilde(epsilon)))), low="darkblue", high="darkgreen", guide="colorbar")+
 28 |     scale_fill_gradient2(name = "", low="white", high="black", guide="colorbar")+
 29 |     scale_x_continuous(breaks = mybreaks, labels = mylabels) +
 30 |     xlab("Hour of the day") + 
 31 |     ylab("Aggregate series") +
 32 |     theme_bw() +
 33 |     ggtitle(my_title)
 34 |   
 35 |   #  geom_vline(xintercept = seq(1, nhours, by = 48), linetype=3) +
 36 |   
 37 |   p
 38 | } 
 39 | 
 40 | load(file.path(work.folder, "myinfo.Rdata"))
 41 | 
 42 | rank <- sort(apply(Sagg, 1, sum), index = T, decreasing = T)$ix
 43 | 
 44 | node_nbkids <- apply(Sagg, 1, sum)
 45 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix
 46 | 
 47 | algo.agg <- "DETS"
 48 | algo.bottom <- "KD-IC-NML"
 49 | 
 50 | ntotal <- n_agg + n_bottom
 51 | 
 52 | meanf <- matrix(NA, nrow = ntotal, ncol = 4416)
 53 | 
 54 | prob_coherency <- prob_coherency_10 <- prob_coherency_50 <- prob_coherency_90 <- matrix(NA, nrow = n_agg, ncol = 48)
 55 | allres <- vector("list", 48)
 56 | allres[1:48] <- 0
 57 | 
 58 | for(idtest in seq(4416)){
 59 |   print(idtest)
 60 |   
 61 |   res_byidtest_file <- file.path(work.folder, "byidtest", paste("results_byidtest_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 
 62 |   load(res_byidtest_file)
 63 |   # "QF_agg_idtest", "QF_bottom_idtest", "obs_agg_idtest", "obs_bottom_idtest", 
 64 |   # "revisedmean_bottom_idtest", 'revisedmean_agg_idtest', "mean_agg_idtest", "mean_bottom_idtest"
 65 | 
 66 |   meanf[seq(1, n_agg), idtest] <- mean_agg_idtest
 67 |   meanf[seq(n_agg + 1, n_agg + n_bottom), idtest] <- mean_bottom_idtest
 68 |   
 69 |   if(FALSE){
 70 |     base_samples_agg <- matrix(NA, nrow = M, ncol = n_agg)
 71 |     base_samples_bottom <- matrix(NA, nrow = M, ncol = n_bottom)
 72 |     
 73 |     for(j in seq(n_agg)){
 74 |       invcdf <- approxfun(taus, QF_agg_idtest[, j], rule = 2)
 75 |       base_samples_agg[, j]    <- invcdf(q_probs)
 76 |     }
 77 |     
 78 |     for(j in seq(n_bottom)){
 79 |       invcdf <- approxfun(taus, QF_bottom_idtest[, j], rule = 2)
 80 |       base_samples_bottom[, j] <- invcdf(q_probs)
 81 |     }
 82 |     
 83 |     sum_agg <- t(Sagg %*% t(base_samples_bottom))
 84 |     q_sum_agg <- apply(sum_agg, 2, sort)
 85 |     q_agg <- apply(base_samples_agg, 2, sort)
 86 |     
 87 |     res <- (q_sum_agg - q_agg)^2
 88 | 
 89 |     hour <- idtest%%48
 90 |     if(hour == 0)
 91 |       hour <- 48
 92 |     
 93 |     allres[[hour]] <- allres[[hour]] + res
 94 |     
 95 |     #prob_coherency[, idtest] <- apply(res, 2, mean)
 96 |     #prob_coherency_10[, idtest] <- res[which.min(abs(q_probs - 0.1)), ]
 97 |     #prob_coherency_50[, idtest] <- res[which.min(abs(q_probs - 0.5)), ]
 98 |     #prob_coherency_90[, idtest] <- res[which.min(abs(q_probs - 0.9)), ]
 99 |     
100 |   }
101 |   
102 | }
103 | 
104 | meanres <- lapply(allres, function(mat){
105 |   mat/92
106 | })
107 | 
108 | stop("done")
109 | 
110 | 
111 | 
112 | 
113 | listp <- vector("list", 8)
114 | 
115 | for(p in seq(8)){
116 |   weights <- numeric(M)
117 |   if(p == 1){
118 |     weights[which.min(abs(q_probs - 0.05))] <- 1
119 |     my_title <- "0.05"
120 |   }else if(p == 2){
121 |     weights[which.min(abs(q_probs - 0.25))] <- 1
122 |     my_title <- "0.25"
123 |   }else if(p == 3){
124 |     weights[which.min(abs(q_probs - 0.5))] <- 1
125 |     my_title <- "0.5"
126 |   }else if(p == 4){
127 |     weights[which.min(abs(q_probs - 0.75))] <- 1
128 |     my_title <- "0.75"
129 |   }else if(p == 5){
130 |     weights[which.min(abs(q_probs - 0.95))] <- 1
131 |     my_title <- "0.95"
132 |   }else if(p == 6){
133 |     #weights[which(q_probs <= 0.05 | q_probs >= 0.95)] <- 1
134 |     #my_title <- "both tails"
135 |     
136 |     weights[which(q_probs <= 0.05 | q_probs >= 0)] <- 1
137 |     my_title <- "lower tails"
138 |     
139 |   }else if(p == 7){
140 |     weights[which(q_probs > 0.05 | q_probs < 0.9)] <- 1
141 |     my_title <- "center"
142 |   }else  if(p == 8){
143 |     weights[seq(M)] <- 1
144 |     my_title <- "all"
145 |   }
146 |   weights <- weights/sum(weights)
147 |   
148 |   res <- sapply(meanres, function(mat){
149 |     t(weights) %*% mat
150 |   })
151 |   
152 |   listp[[p]] <-  gplot_matrix(log(res[rank, ]), my_title)
153 | }
154 | multiplot(listp[[1]], listp[[2]], listp[[3]], listp[[4]], listp[[5]], listp[[6]] , listp[[7]], listp[[8]], cols=3)
155 | 
156 | #####
157 | savepdf(file.path(results.folder, paste("prob-coherency", sep = "") ))
158 | print(listp[[8]])
159 | endpdf()
160 | #####
161 | 
162 | do.relative <- FALSE
163 | #####
164 | if(do.relative){
165 |   r <- (meanf[seq(1, n_agg), ] - Sagg %*% meanf[seq(n_agg + 1, n_agg + n_bottom), ])/meanf[seq(1, n_agg), ]
166 |   #r <- (meanf[seq(1, n_agg), ] - Sagg %*% meanf[seq(n_agg + 1, n_agg + n_bottom), ])/Sagg %*% meanf[seq(n_agg + 1, n_agg + n_bottom), ]
167 | }else{
168 |   r <- meanf[seq(1, n_agg), ] - Sagg %*% meanf[seq(n_agg + 1, n_agg + n_bottom), ]
169 | }
170 | 
171 | #r <- r[rank, ]
172 | #r <- r[node_order, ]
173 | #X <- log(abs(r[, seq(48)]))
174 | 
175 | v <- sapply(node_order, function(iagg){
176 |   #log(abs(matrix(r[iagg, ], ncol = 48, byrow = T)))
177 |   #abs(matrix(r[iagg, ], ncol = 48, byrow = T))
178 |   matrix(r[iagg, ], ncol = 48, byrow = T)
179 | }, simplify = "array")
180 | 
181 | 
182 | #########
183 | itday <- c(1, seq(12, 48, by = 12))
184 | id <- c(1, 7, 39, 50)
185 | id <- c(1, 7)
186 | 
187 | savepdf(file.path(results.folder, paste("mean-coherency", sep = "") ))
188 | #par(mfrow = c(2, 2))
189 | par(mfrow = c(1, 2))
190 | for(iagg in node_order[id]){
191 |   ind <- which(Sagg[iagg, ] == 1)
192 |   boxplot(v[, , iagg], xaxt = "n", main = paste(length(ind), " aggregated meters" , sep = ""), ylab = "Coherency errors", xlab = "Time of day")
193 |   axis(1, labels = tday[itday], at = itday, cex = .8)
194 |   abline(h = 0)
195 | }
196 | endpdf()
197 | #########
198 | 
199 | v <- aperm(v, c(1, 3, 2))
200 | X <- apply(v, c(2, 3), mean)
201 | 
202 | 
203 | if(do.relative){
204 |   #X <- abs(X)
205 | }else{
206 |   X <- log(abs(X))
207 | }
208 | 
209 | 
210 | 
211 | savepdf(file.path(results.folder, paste("mean-coherency", sep = "") ))
212 | print(gplot_matrix(X))
213 | endpdf()
214 | 
215 | 
216 | MAT <- log(1 + abs(X))
217 | savepdf(file.path(results.folder, paste("mean-coherency", sep = "") ))
218 | print(gplot_matrix(MAT))
219 | endpdf()
220 | #####
221 | 
222 | 
223 | p1 <- gplot_matrix(log(prob_coherency[rank, ]))
224 | p2 <- gplot_matrix(log(prob_coherency_10[rank, ]))
225 | p3 <- gplot_matrix(log(prob_coherency_50[rank, ]))
226 | p4 <- gplot_matrix(log(prob_coherency_90[rank, ]))
227 | multiplot(p1, p2, p3, p4, cols=2)
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 


--------------------------------------------------------------------------------
/smart-meters/code/plot_coverage.R:
--------------------------------------------------------------------------------
 1 | # This script produces the figure which shows the coverage of the 50% and 90% prediction intervals.
 2 | # The coverage were computed in aggregation.R
 3 | rm(list = ls())
 4 | source("config_paths.R")
 5 | source("config_general.R")
 6 | source("config_splitting.R")
 7 | source("utils.R")
 8 | 
 9 | load(file.path(work.folder, "myinfo.Rdata"))
10 | 
11 | algo.agg <- "DETS"
12 | algo.bottom <- "KD-IC-NML"
13 | 
14 |   all_coverage_bot <- all_coverage_agg <- 0
15 |   
16 |   all_idtest <- seq(4416)
17 |   #  all_idtest <- c(seq(1, 3000), seq(3400, 4416))
18 |   
19 |   for(idtest in all_idtest){
20 |     if(idtest%%100==0)
21 |       print(idtest)
22 |     coverage_idtest <- file.path(coverage.folder, paste("coverage_", algo.agg, "_", algo.bottom, "_", idtest, ".Rdata", sep = "")) 
23 |     load(coverage_idtest)
24 |     # c("coverage_bot", "coverage_agg") #  2 1578    4
25 |     all_coverage_bot <- all_coverage_bot + coverage_bot
26 |     all_coverage_agg <- all_coverage_agg + coverage_agg
27 |   }
28 |   all_coverage_bot <- all_coverage_bot / length(all_idtest)
29 |   all_coverage_agg <- all_coverage_agg / length(all_idtest)
30 |   
31 |   
32 |   bot_methods <- c("BASE", "BASE-MINT", "MINTdiag", "MINTshrink")
33 |   bot_colors <- c("grey", "purple", "red", "green")
34 |   
35 |   agg_methods <- c("BASE", "NAIVEBU", "PERMBU", "PERMBU-MINT", "NAIVEBU-MINT", "MINTdiag", "MINTshrink")
36 |   agg_colors <- c("grey", "orange", "purple", "cyan", "pink", "red", "green")
37 |   agg_colors[match(c("PERMBU-MINT", "NAIVEBU-MINT"), agg_methods)] <- agg_colors[match(c("PERMBU", "NAIVEBU"), agg_methods)]
38 |   # I have change BASE color from black to grey
39 |   
40 |   
41 |   #agg_newnames <- c("BASE", "IndepBU", "DepBU", "DepBU-MinTShrink", "IndepBU-MinTShrink", "LogN-MinTDiag", "LogN-MinTShrink")
42 |   agg_newnames <- c("BASE", "IndepBU- \n NoMinT", "DepBU- \n NoMinT", 
43 |                     "DepBU- \n MinTShrink", "IndepBU- \n MinTShrink", "LogN- \n MinTDiag", "LogN- \n MinTShrink")
44 |   
45 |   #c("BASE", "BASE-MINT", "MINTdiag", "MINTshrink")
46 |   #c("BASE", "BASE", "BASE", "BASE-MINT", "BASE-MINT", "MINTdiag", "MINTshrink")
47 |   id_matching <- c(1, 1, 1, 2, 2, 3, 4)
48 |   
49 |   myorder <- c(6, 7, 1, 3, 4, 2, 5)
50 |     
51 |   savepdf(file.path(results.folder, "coverage"), height = 6)
52 |   par(mfrow = c(1, 2))
53 |   for(i in 1:2){
54 |     if(i == 1){
55 |       cov90 <- all_coverage_agg[2, , ]
56 |       cov50 <- all_coverage_agg[1, , ]
57 |       mycolors <- agg_colors
58 |     }else{
59 |       cov90 <- all_coverage_bot[2, , id_matching]
60 |       cov50 <- all_coverage_bot[1, , id_matching]
61 |       mycolors <- bot_colors[id_matching]
62 |     }
63 |     colnames(cov90) <- agg_newnames
64 |     colnames(cov50) <- agg_newnames
65 |     
66 |     # changer order
67 |     cov90 <- cov90[, myorder]
68 |     cov50 <- cov50[, myorder]
69 |     mycolors <- mycolors[myorder]
70 |     
71 |     boxplot(cov50, outline = F, ylim = c(0.2, 1), las = 2, cex.axis = .6, yaxt = 'n', col = mycolors,
72 |             ylab = "Coverage rate")
73 |     axis(2, at = c(0.5, 0.9), cex.axis = 0.6, label = c("50%", "90%"))
74 |     
75 |     boxplot(cov90, outline = F, ylim = c(0.2, 1), , las = 2, cex.axis = .6, add = T, yaxt = 'n', col = mycolors)
76 |     abline(h = c(0.5, 0.9), lwd = 0.1)
77 |     
78 |   }
79 |   dev.off()
80 | 


--------------------------------------------------------------------------------
/smart-meters/code/plot_forecasts.R:
--------------------------------------------------------------------------------
  1 | # This script allows to produce a plot for one-day-ahead probabilistic forecasts (with 50% and 95% prediction intervals). 
  2 | rm(list = ls())
  3 | source("config_paths.R")
  4 | source("config_general.R")
  5 | source("config_splitting.R")
  6 | source("utils.R")
  7 | 
  8 | load(file.path(work.folder, "myinfo.Rdata"))
  9 | 
 10 | plot.permsamples <- FALSE
 11 | do.jasa <- TRUE
 12 | 
 13 | if(plot.permsamples){
 14 |   
 15 |   do.agg <- T
 16 |   algo.agg <- "DETS"
 17 |   algo.bottom  <- "KD-IC-NML"
 18 |   
 19 |   alliseries <- c(1)
 20 |   idays <- seq(1, 2, by = 1)
 21 |   idays <- 1
 22 |   algorithms <- c("INDEPBU", "PERMBU", "MINTshrink", "INDEPBU-MINTshrink", "PERMBU-MINTshrink", "BASE")
 23 |   agg_methods <- c("BASE", "INDEPBU", "PERMBU", "PERMBU-MINTshrink", "INDEPBU-MINTshrink", "MINTdiag", "MINTshrink")
 24 |   
 25 |   nbperjob <- 368
 26 |   
 27 |   QF_agg <- array(NA, c(M, length(algorithms), 48*2))
 28 |   allidtest <- seq(1, 48*2)
 29 |   idjob <- 1
 30 |   samples_job <- file.path(work.folder, "samples_agg", paste("samples_agg_", algo.agg, "_", algo.bottom, "_", idjob, ".Rdata", sep = "")) 
 31 |   load(samples_job)
 32 |   
 33 |   list_samples_agg_nonull <- list_samples_agg[-which(sapply(list_samples_agg, is.null))]
 34 |   BIGARRAY <- sapply(seq_along(list_samples_agg_nonull),  function(i){list_samples_agg_nonull[[i]]}, simplify = 'array')
 35 |   QF_agg[, , allidtest] <- BIGARRAY[, alliseries, match(algorithms, agg_methods), ]
 36 |   
 37 |   mf_agg <- apply(QF_agg, c(2, 3), mean)
 38 |   qf_agg <- apply(QF_agg, c(2, 3), quantile, prob = taus)
 39 |   all_qf <- lapply(idays, function(iday){
 40 |     qf_agg[, , (iday - 1) * 48 + seq(48) ]
 41 |   })
 42 |   all_mf <- lapply(idays, function(iday){
 43 |     mf_agg[, (iday - 1) * 48 + seq(48) ]
 44 |   })
 45 |   
 46 | 
 47 | }else{
 48 |   do.agg <- F
 49 |   alliseries <- seq(200) #c(1267)
 50 |   algorithms <- c("KD-IC-NML")
 51 | 
 52 |   idays <- seq(1, 92, by = 1)
 53 |   idays <- 1
 54 |   
 55 |   if(do.jasa){
 56 |     series_isagg <- c(TRUE, FALSE) # actual
 57 |     alliseries <- c(1, 34) # actual
 58 |     idays <- c(11) # bettter coverage
 59 |   }
 60 | }
 61 | 
 62 | only.future <- FALSE
 63 | 
 64 | if(do.jasa){
 65 |   tag <- "example"
 66 |   savepdf(file.path(results.folder, paste("PLOT_forecasts_", tag, sep = "")), height = 27 * 0.25, width = 21)
 67 |   par(mfrow = c(1, 2))
 68 | }else{
 69 |   tag <- "allmethods"
 70 |   savepdf(file.path(results.folder, paste("PLOT_forecasts_", tag, sep = "") ))
 71 | }
 72 | 
 73 | for(iseries in alliseries){
 74 | 
 75 |   if(do.jasa){
 76 |     do.agg <- series_isagg[which(iseries == alliseries)]
 77 |     if(do.agg){
 78 |       algorithms <- "DETS"
 79 |     }else{
 80 |       algorithms <- "KD-IC-NML"
 81 |     }
 82 |   }
 83 |   
 84 |   print(iseries)
 85 |   if(do.agg){
 86 |     idseries <- aggSeries[iseries]
 87 |     load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = "")))
 88 |   }else{
 89 |     idseries <- bottomSeries[iseries]
 90 |     load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
 91 |   }
 92 |   
 93 |   
 94 |   if(!do.jasa){
 95 |     #par(mfrow = c(2, 2))
 96 |     par(mfrow = c(2, 3))
 97 |   }
 98 |   
 99 |   list_load <- vector("list", length(algorithms))
100 |   for(ialgo in seq_along(algorithms)){
101 |     algo <- algorithms[ialgo]
102 |     algo_load <- algo
103 |     
104 |     if(plot.permsamples){
105 |       qf <- lapply(idays, function(iday){
106 |         all_qf[[iday]][, ialgo, ]
107 |       })
108 |       mf <- lapply(idays, function(iday){
109 |         all_mf[[iday]][ialgo, ]
110 |       })
111 |       
112 |       list_load[[ialgo]] <-  list(all_qf = qf, all_mf = mf)
113 |     }else{
114 |       res_file <- file.path(basef.folder, algo, paste("results_", idseries, "_", algo, ".Rdata", sep = "")) 
115 |       load(res_file)
116 |   
117 |       if(algo_load == "KD-IC-NML"){
118 |         list_load[[ialgo]] <- list(all_qf = all_qf, all_mf = all_mf) #res_testing
119 |       }else if(algo_load == "TBATS" || algo_load == "DYNREG" || algo_load == "DETS"){
120 |         list_load[[ialgo]] <- list(all_qf = all_qf, all_mf = all_mf)
121 |         #list_load[[ialgo]] <- list(all_qf = all_qf, all_mf = all_mf, all_mfsample = all_mfsample)
122 |       }else if(algo_load == "Uncond"){
123 |         list_load[[ialgo]] <- list(qFtest = qFtest, mFtest = mFtest)
124 |       }
125 |     }
126 |   }#algo
127 |   
128 |   
129 |   for(iday in idays){
130 |     
131 |     day_min <- Inf
132 |     day_max <- -Inf
133 |     for(ialgo in seq_along(algorithms)){
134 |       day_min <- pmin(day_min, min(list_load[[ialgo]]$all_qf[[iday]]))
135 |       day_max <- pmax(day_max, max(list_load[[ialgo]]$all_qf[[iday]]))
136 |     }
137 | 
138 |     print(iday)
139 |     for(ialgo in seq_along(algorithms)){
140 |       
141 |       algo <- algorithms[ialgo]
142 |       algo_load <- algo
143 |       
144 |       if(algo_load == "KD-IC-NML" || algo_load == "TBATS" || algo_load == "DYNREG" || algo_load == "DETS" || plot.permsamples){
145 |         
146 |         all_qf <- list_load[[ialgo]]$all_qf
147 |         all_mf <- list_load[[ialgo]]$all_mf
148 |         mu_hat <- matrix(unlist(all_mf), ncol = 48, byrow = T)
149 |         
150 |         qf_allhours <- all_qf[[iday]]
151 | 
152 |       }else if(algo_load == "Uncond"){
153 |         qFtest <- list_load[[ialgo]]$qFtest
154 |         mFtest <- list_load[[ialgo]]$mFtest
155 |         
156 |         qf_allhours <- qFtest
157 |         mu_hat <- matrix(mFtest, ncol = 48, byrow = T)
158 |       }
159 | 
160 |       rownames(qf_allhours) <- paste(taus*100, "%", sep = "")
161 |       
162 |       future <- demand[test$id[(iday - 1) * 48 + seq(1, 48)]]
163 |       subtaus    <- c("5%", "25%", "75%", "95%")
164 |       #subtaus    <- c("1%", "25%", "75%", "99%")
165 |       
166 |       mymain <- ifelse(algo == "KD-IC-NML", "Individual smart meter", ifelse(algo == "DETS", "Top aggregated series", algo))
167 |       
168 |       #myYLIM <- c(0, max(c(future, qf_allhours[subtaus, ]), na.rm = T))	
169 |         myYLIM <- c(day_min, day_max)
170 |         
171 |         plotQF(qf_allhours, future, subtaus, id = seq(48), only.future = only.future,
172 |                main = mymain, xlab = "Time of day", ylab = "Consumption (kWh)", xaxt='n', cex.lab = 1.2, ylim = myYLIM)
173 |         
174 |         #axis(1, labels = tday, at = seq(1, 48))
175 |         itday <- c(1, seq(8, 48, by = 8))
176 |         axis(1, labels = tday[itday], at = itday, cex.axis=0.9)
177 |         
178 |         lines(mu_hat[iday, ], col = "red")
179 | 
180 |     }# ALGO
181 |     
182 |   }# DAY
183 |   
184 |   #dev.off()
185 | } # iseries
186 | dev.off()
187 | 


--------------------------------------------------------------------------------
/smart-meters/code/plot_parameters.R:
--------------------------------------------------------------------------------
 1 | # This script produces the table showing the values for the different parameters of the exponential smoothing methods.
 2 | rm(list = ls())
 3 | source("config_paths.R")
 4 | source("config_general.R")
 5 | source("config_splitting.R")
 6 | source("utils.R")
 7 | library(Hmisc)
 8 | 
 9 | load(file.path(work.folder, "myinfo.Rdata"))
10 | 
11 | 
12 | node_nbkids <- apply(Sagg, 1, sum)
13 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix
14 | 
15 | algo <- "DETS"
16 | 
17 | MAT <- matrix(NA, nrow = n_agg, ncol = 4)
18 | for(i in seq_along(aggSeries)){
19 |   idseries <- aggSeries[i]
20 |   param_file <- file.path(basef.folder, algo, paste("parameters_", idseries, "_", algo, ".Rdata", sep = "")) 
21 |   if(file.exists(param_file)){
22 |     load(param_file)
23 |     MAT[i, ] <- res_optim$par
24 |   }else{
25 |     print(paste("file does not exist for ", i, sep = ""))
26 |   }
27 | }
28 | MAT <- MAT[node_order, ]
29 | 
30 | savepdf(file.path(results.folder, paste("DETS", sep = "")))
31 | par(mfrow = c(2, 2))
32 | xlab <- "aggregate series"
33 | plot(MAT[, 1], xlab = xlab, ylab = expression(phi))
34 | plot(MAT[, 2], xlab = xlab, ylab = expression(alpha))
35 | plot(MAT[, 3], xlab = xlab, ylab = expression(delta))
36 | plot(MAT[, 4], xlab = xlab, ylab = expression(omega))
37 | endpdf()
38 | 
39 | 
40 | savepdf(file.path(results.folder, paste("DETS", sep = "")))
41 | par(mfrow = c(2, 2))
42 | xlab <- "number of aggregated meters (log scale) "
43 | for(j in seq(4)){
44 |   if(j == 1){
45 |     my_ylab <- expression(phi)
46 |   }else if(j == 2){
47 |     my_ylab <- expression(alpha)
48 |   }else if(j == 3){
49 |     my_ylab <- expression(delta)
50 |   }else if(j == 4){
51 |     my_ylab <- expression(omega)
52 |   }
53 |   plot(log(node_nbkids), MAT[, j], xlab = xlab, ylab = my_ylab)
54 | }
55 | endpdf()
56 | 
57 | id <- c(1, 7, 39, 50)
58 | #id <- c(1, 3, 5, 7, 11, 25, 31, 37, 46, 50)
59 | nb <- apply(Sagg, 1, sum)[node_order[id]]
60 | 
61 | #MAT_toprint <- data.frame(t(MAT)[, id])
62 | #greeks <- c(phi = "$\\phi$", alpha="$\\alpha$", delta = "$\\delta$", omega = "$\\omega$")
63 | 
64 | MAT_toprint <- data.frame(t(MAT)[c(2, 3, 4, 1), id])
65 | greeks <- c(alpha="$\\alpha$", delta = "$\\delta$", omega = "$\\omega$", phi = "$\\phi$")
66 | 
67 | row.names(MAT_toprint) <- greeks
68 | colnames(MAT_toprint) <- nb
69 | MAT_toprint <- format.df(MAT_toprint, dec = 3)
70 | 
71 | latex(MAT_toprint, file= file.path(results.folder, paste("TABLE_DETS.tex")), 
72 |              title = "", cgroup = "Number of aggregated meters", label = "tab:param_dets", 
73 |       caption = "Parameters of the exponential smoothing method at different levels of aggregation.")
74 | 
75 | 
76 | MAT_bandwiths <- MAT_decay <-  matrix(NA, nrow = n_bottom, ncol = 3)
77 | 
78 | algo <- "KD-IC-NML"
79 | for(i in seq_along(bottomSeries)){
80 |   print(i)
81 |   idseries <- bottomSeries[i]
82 |   param_file <- file.path(basef.folder, algo, paste("parameters_", idseries, "_", algo, ".Rdata", sep = "")) 
83 | 
84 |   if(file.exists(param_file)){
85 |     load(param_file)
86 |     MAT_bandwiths[i, ] <- selected_bandwiths_ic
87 |     MAT_decay[i, ]     <- selected_lambdas_ic
88 |   }else{
89 |     print(paste("file does not exist for ", i, sep = ""))
90 |   }
91 | }
92 | 
93 | 


--------------------------------------------------------------------------------
/smart-meters/code/plot_series.R:
--------------------------------------------------------------------------------
 1 | # This script produces the figure which shows the electricity demand for different levels of aggregation.
 2 | rm(list = ls())
 3 | 
 4 | source("config_paths.R")
 5 | source("config_general.R")
 6 | source("config_splitting.R")
 7 | source("utils.R")
 8 | 
 9 | library(parallel)
10 | library(fBasics)
11 | library(msm)
12 | library(gtools)
13 | library(forecast)
14 | library(abind)
15 | library(glmnet)
16 | 
17 | load(file.path(work.folder, "myinfo.Rdata"))
18 | 
19 | node_nbkids <- apply(Sagg, 1, sum)
20 | node_order <- sort(node_nbkids, index = T, decreasing = T)$ix
21 | 
22 | offset <- 48 * 7 * 4
23 | 
24 | #nb_points <- 48 * 7 * 3
25 | nb_points <- 48 * 7 * 2
26 | nb_points <- 48 * 7 
27 | 
28 | alldemand <- NULL
29 | 
30 | for(iseries in node_order){
31 |     idseries <- aggSeries[iseries]
32 |     load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = "")))
33 |   alldemand <- cbind(alldemand, demand[offset + seq(nb_points)])
34 | }
35 | #id <- seq(1, 55, by = 10)
36 | #id <- c(1, 4, 7, 39, 52)
37 | id <- c(1, 7, 39, 50)
38 | 
39 |   
40 | alldemand <- alldemand[, node_order[id]]
41 | load(file.path(mymeters.folder, paste("mymeter-", bottomSeries[511], ".Rdata", sep = "")))
42 | alldemand <- cbind(alldemand, demand[seq(nb_points)])
43 | #alldemand <- scale(alldemand) 
44 | colnames(alldemand) <- c(node_nbkids[node_order[id]], 1)
45 | 
46 | 
47 | 
48 | savepdf(file.path(results.folder, "demand-bylevel"))
49 | my.ts.panel <- function(x, col = col, bg = bg, pch = pch, type = type,  vpos=48*7, ...){
50 |   lines(x, col = col, bg = bg, pch = pch, type = type, ...)
51 |   #abline(v=vpos)
52 |   }
53 | plot.ts(alldemand, panel=my.ts.panel,  nc = 1, axes = F, xlab = "", xaxt = "n", main = "")
54 | abline(v = seq(1, 48 * 7, by = 48), lty = 3)
55 | 
56 | v <- seq(24, 48 * 7, by = 48)
57 | axis(1, labels = abbr.dweek, at=v , tick = F)
58 | 
59 | endpdf()
60 | # main = paste(node_nbkids[node_order[id]], " aggregated meters", sep = ""), 


--------------------------------------------------------------------------------
/smart-meters/code/plot_tree.R:
--------------------------------------------------------------------------------
 1 | # This script produces the figure which shows the hierarchy in the form of a circular graph.
 2 | rm(list = ls())
 3 | source("config_paths.R")
 4 | source("config_general.R")
 5 | source("config_splitting.R")
 6 | source("utils.R")
 7 | library(igraph)
 8 | library(Matrix)
 9 | 
10 | load(file.path(work.folder, "myinfo.Rdata"))
11 | 
12 | #savepdf(file.path(results.folder, paste("hierarchy", sep = "") ))
13 | #plot(itree, layout = 
14 | #       layout.reingold.tilford(itree, root=1, circular=T), vertex.size=0, vertex.label=NA, edge.arrow.size=0)
15 | #dev.off()
16 | 
17 | g <- itree
18 | v_names <- names(V(g))
19 | v_names[seq(2, length(v_names))] <- substr(v_names[seq(2, length(v_names))], 4, 7)
20 | v_names[seq(56, length(v_names))] <- ""
21 | g <- set.vertex.attribute(g, "name", value=v_names)
22 | 
23 | savepdf(file.path(results.folder, paste("hierarchy-plot", sep = "") ))
24 | 
25 | #plot(g, layout = layout.reingold.tilford(g, root=1, circular=T), vertex.size=0, edge.arrow.size=0, vertex.label.cex = .7, 
26 | #     vertex.label.dist=.3, vertex.label.degree = .30)
27 | #myvsize <- c(apply(Sagg, 1, sum), rep(0, ncol(Sagg)))
28 | #plot(g, layout = layout.reingold.tilford(g, root=1, circular=T), vertex.size=myvsize/90, edge.arrow.size=0, vertex.label.cex = .7)
29 | 
30 | #myvsize <- log(c(apply(Sagg, 1, sum), rep(1, ncol(Sagg))))
31 | myvsize <- c(apply(Sagg, 1, sum), rep(1, ncol(Sagg)))/56
32 | 
33 | plot(g, layout = layout.reingold.tilford(g, root=1, circular=T), 
34 |      vertex.size=myvsize, edge.arrow.size=0, vertex.label= NA, vertex.color = "white")
35 | 
36 | endpdf()
37 | 
38 | #myvsize <- apply(Sagg, 1, sum)/60
39 | #res <- getInfoNode("kwh")
40 | res <- getInfoNode("nb_nodes")
41 | newg <- delete_vertices(g, seq(56, length(V(g))))
42 | V(newg)$color <- "white"
43 | 
44 | myvsize <- res$info_nodes_agg/15
45 | savepdf(file.path(results.folder, paste("hierarchy-plot-size", sep = "") ))
46 | plot(newg, layout = layout.reingold.tilford(newg, root=1), vertex.size = myvsize, vertex.label= NA, edge.arrow.size=0)
47 | endpdf()
48 | 
49 | myvsize <- sqrt(res$info_nodes_agg)
50 | savepdf(file.path(results.folder, paste("hierarchy-plot-sqrtsize", sep = "") ))
51 | plot(newg, layout = layout.reingold.tilford(newg, root=1), vertex.size = myvsize, vertex.label= NA, edge.arrow.size=0)
52 | endpdf()
53 | 
54 | 
55 | myvsize <- res$info_nodes_agg/20
56 | savepdf(file.path(results.folder, paste("hierarchy-plot-size", sep = "") ) )
57 | plot(newg, layout = layout_as_tree(newg, root=1), vertex.size = myvsize, vertex.label= NA, edge.arrow.size=0)
58 | endpdf()
59 | 
60 | #myvsize <- res$info_nodes_agg/20
61 | #plot(newg, layout = layout.reingold.tilford(newg, root=1, circular=T), vertex.size = myvsize, vertex.label= NA, edge.arrow.size=0)
62 | 
63 | 


--------------------------------------------------------------------------------
/smart-meters/code/results.R:
--------------------------------------------------------------------------------
  1 | # This script produces the figures with all the forecast accuracy measures.
  2 | 
  3 | # Note: You need to run the script "aggregation_merge.R" first to generate all the results files.
  4 | 
  5 | source("results_utils.R")
  6 | shifter <- function(x, n = 1) {
  7 |   #if (n == 0) x else c(tail(x, -n), head(x, n))
  8 |   if (n == 0) x else c(tail(x, n), head(x, -n))
  9 | }
 10 | 
 11 | color.agg[match(c("PERMBU-MINT", "NAIVEBU-MINT"), agg_methods)] <- color.agg[match(c("PERMBU", "NAIVEBU"), agg_methods)]
 12 | 
 13 | do.skill <- FALSE
 14 | do.colors <- TRUE
 15 | 
 16 | 
 17 | better_names <- function(x){
 18 |   if(measure == "MSE" || measure == "RMSE"){
 19 |     x[which(x == "NAIVEBU")] <- "NoMinT" #"IndepBU and DepBU"
 20 |     x[which(x == "MINTdiag")] <- "MinTDiag"
 21 |     x[which(x == "MINTshrink")] <- "MinTShrink"
 22 |   }else if(measure == "CRPS" || measure == "CRPS Tails"){
 23 |     x[which(x == "NAIVEBU")] <- "IndepBU"
 24 |     x[which(x == "PERMBU")] <- "DepBU"
 25 |     x[which(x == "NAIVEBU-MINT")] <- "IndepBU-MinTShrink"
 26 |     x[which(x == "PERMBU-MINT")]  <- "DepBU-MinTShrink"
 27 |     
 28 |     x[which(x == "MINTdiag")] <- "LogN-MinTDiag"
 29 |     x[which(x == "MINTshrink")] <- "LogN-MinTShrink"
 30 |   }
 31 |   
 32 |   return(x)
 33 | }
 34 | 
 35 | #grouping_hours <- c(10, 8, 8, 8, 8, 6) 
 36 | #grouping_hours <- c(10, 20, 18) 
 37 | #grouping_hours <- c(16, 22 ,10)
 38 | #grouping_hours <- rep(2, 24)
 39 | #myfactor_hours <- rep(seq(length( grouping_hours )), times = grouping_hours)
 40 | 
 41 | 
 42 |   myfactor_hours <- numeric(48)
 43 |   myfactor_hours[which(seq(1, 48) %in% c(seq(45, 48), seq(1, 12)))] <- 3
 44 |   myfactor_hours[which(seq(48) %in% seq(13, 28))] <- 1
 45 |   myfactor_hours[which(seq(48) %in% seq(29, 44))] <- 2
 46 |   myfactor_hours <- as.factor(myfactor_hours)
 47 |   grouping_hours <- table(myfactor_hours)
 48 | 
 49 | grouping_series_agg <- c(rep(1, 6), 8, 5, 7, 10, 9, 5, 5)
 50 | myfactor_series_agg <- rep(seq(length( grouping_series_agg )), times = grouping_series_agg)
 51 | 
 52 | grouping_series_bot <- 526*3
 53 | myfactor_series_bot <- rep(seq(length( grouping_series_bot )), times = grouping_series_bot)
 54 | 
 55 | measures <- c("CRPS", "CRPS Tails", "MSE", "RMSE")
 56 | for(measure in measures){
 57 |   print(measure)
 58 |   list_mat <- get_mat(measure, do.skill = FALSE)
 59 | 
 60 |   #mysteps <- 1:2
 61 |   mysteps <- 2
 62 |   for(mystep in mysteps){
 63 |     if(measure == "MSE" || measure == "RMSE"){
 64 |       if(mystep == 1){
 65 |         #algos <- c("BASE", "NAIVEBU")
 66 |         algos <- c("NAIVEBU")
 67 |       }else{
 68 |         #algos <- c("BASE", "MINTdiag", "MINTshrink")
 69 |         algos <- c("NAIVEBU", "MINTdiag", "MINTshrink")
 70 |       }	
 71 |     }else{
 72 |       if(mystep == 1){
 73 |         #algos <- c("BASE", "NAIVEBU", "PERMBU")
 74 |         algos <- c("NAIVEBU", "PERMBU")
 75 |       }else{
 76 |         #algos <- c("BASE", "PERMBU-MINT", "NAIVEBU-MINT", "MINTdiag", "MINTshrink")
 77 |         algos <- c("PERMBU-MINT", "NAIVEBU-MINT", "MINTdiag", "MINTshrink")
 78 |       }
 79 |     }
 80 |     id_wanted_agg <- match(algos, agg_methods)
 81 |     id_wanted_bot <- match(sapply(algos, to_aggname), bot_methods)  
 82 |     if(do.colors){
 83 |       mycolors <- color.agg[id_wanted_agg]
 84 |     }else{
 85 |       mycolors <- "black"
 86 |     }   
 87 |     
 88 |     filename <- paste("_REVIEW_RESULTS_JASA_", gsub(" ", "", measure, fixed = TRUE), "_", mystep, "_", 
 89 |                       ifelse(do.colors, "COLOR", "BLACK"), "_", ifelse(do.skill, "SKILL", "ABSOLUTE"), sep = "")
 90 |     
 91 |     savepdf(file.path(results.folder, filename), height = 5)
 92 |     par(mfrow = c(1, 3))
 93 |     
 94 |     results_bot <- list_mat$res_bot
 95 |     results_bot_avg <- apply(results_bot, c(2, 3), function(x){ tapply(x, myfactor_hours, mean) })
 96 |     results_bot_avg <- apply(results_bot_avg, c(1, 2), function(x){ tapply(x, myfactor_series_bot, mean) })
 97 |     x_nbbot <- 1
 98 |     
 99 |     results_agg <- list_mat$res_agg
100 |     results_agg <- results_agg[, , node_order]
101 |     results_agg_avg <- apply(results_agg, c(2, 3), function(x){ tapply(x, myfactor_hours, mean) })
102 |     results_agg_avg <- apply(results_agg_avg, c(1, 2), function(x){ tapply(x, myfactor_series_agg, mean) })
103 |     results_agg_avg <- aperm(results_agg_avg, c(2, 3, 1))
104 |     x_nbagg <- tapply(res_info$info_nodes_agg[node_order], myfactor_series_agg, mean)
105 |     
106 |     mse_ylim <- vector("list", 3)
107 |     mse_ylim[[1]] <- NULL #c(-10, 7)
108 |     mse_ylim[[2]] <-  c(-15, 5) #c(-45, 10)
109 |     mse_ylim[[3]] <-  c(-80, 20) #c(-60, 15)
110 |     
111 |     
112 |     for(k in seq( length(grouping_hours) ) ){
113 |       my_ylim <- NULL
114 |       if( (measure == "MSE" || measure == "RMSE") && k <= 3){
115 |         my_ylim <- mse_ylim[[k]]
116 |       }
117 | 
118 |       #maink <- paste(tday[range(which(k == myfactor_hours))], collapse = " - ")
119 |       id <- which(myfactor_hours == k)
120 |       if(k == 3){
121 |         id <- shifter(id, 4)
122 |       }
123 |       maink <- paste(tday[c(head(id, 1), tail(id, 1))], collapse = " - ")
124 |       
125 |       i_base <- match("BASE", bot_methods)
126 |       if(do.skill){
127 |         u_bot <- t( (results_bot_avg[k, i_base] - t(results_bot_avg[k, id_wanted_bot]))/results_bot_avg[k, i_base])
128 |         u_bot <- t(u_bot) * 100
129 |       }else{
130 |         u_bot <- t(results_bot_avg[k, id_wanted_bot])
131 |       }
132 |       
133 |       
134 |       i_base <- match("BASE", agg_methods)
135 |       if(do.skill){
136 |         u_agg <- t( (results_agg_avg[k, i_base, ] - t(results_agg_avg[k, id_wanted_agg, ]))/results_agg_avg[k, i_base, ])
137 |         u_agg <- t(u_agg) * 100
138 |         
139 |       }else{
140 |         u_agg <- t(results_agg_avg[k, id_wanted_agg, ])
141 |       }
142 |       
143 |       if(length(id_wanted_agg) == 1)
144 |         u_agg <- t(u_agg)
145 |       
146 |       u_all <- rbind(u_agg, u_bot)
147 |       x_all <- c(x_nbagg, x_nbbot)
148 |       
149 |       if(do.skill){
150 |         matplot(log10(x_all), u_all, 
151 |               type = 'o', pch = pch.agg[id_wanted_agg],  lty = 1, col = mycolors,
152 |               xlab = "Number of aggregated meters", 
153 |               ylab = paste(measure, " skill (%)", sep = ""), main = maink,
154 |               ylim = my_ylim, xaxt = "n", 
155 |               cex = 1.2)
156 |         abline(h = 0)
157 |       }else{
158 |         matplot(log10(x_all), u_all, 
159 |                 type = 'o', pch = pch.agg[id_wanted_agg],  lty = 1, col = mycolors,
160 |                 xlab = "Number of aggregated meters", 
161 |                 ylab = paste(measure, ifelse(measure == "RMSE", " (kWh)", "") , sep = ""), main = maink, xaxt = "n", 
162 |                 cex = 1.2)
163 |       }
164 |       
165 |       #axis(1, at = log10(x_all), labels = log(x_all))
166 |       x <- c(0,  1, 2, 3)
167 |       axis(1, at = x, labels = 10^x)
168 |       
169 |       if(k == 1){
170 |         myplace <- ifelse(measure == "RMSE" & !do.skill, "topleft", "bottomleft")
171 |         myinset <- c(0, 0)
172 |         if(measure == "CRPS"){
173 |           myinset <- c(0.1,0)
174 |         }
175 |         legend(myplace, better_names(agg_methods[id_wanted_agg]), 
176 |                lty = 1, pch = pch.agg[id_wanted_agg], col = mycolors, cex = 1.1, bty = "n",
177 |                inset = myinset)
178 |       }	
179 |       
180 |       
181 |       
182 |     } # k 
183 |     dev.off()
184 |   }# id
185 | } # measures
186 | 
187 | 


--------------------------------------------------------------------------------
/smart-meters/code/results_utils.R:
--------------------------------------------------------------------------------
 1 | # Various functions used in the "results.R" script.
 2 | 
 3 | get_mat <- function(measure, do.skill){
 4 |   if(grepl("CRPS", measure)){ 
 5 |     if(measure == "CRPS"){
 6 |       iweight <- 1
 7 |     }else if(measure == "CRPS Tails"){
 8 |       iweight <- 2
 9 |     }else if(measure == "CRPS Right tail"){
10 |       iweight <- 4
11 |     }else if(measure == "CRPS Left tail"){
12 |       iweight <- 5
13 |     }
14 |     res_agg <- wcrps_agg_byhour[iweight, , , ]
15 |     res_bot <- wcrps_bot_byhour[iweight, , , ]
16 |   }else if(measure == "MSE"){
17 |     res_agg <- mse_agg_byhour
18 |     res_bot <- mse_bot_byhour
19 |   }else if(measure == "RMSE"){
20 |     res_agg <- sqrt(mse_agg_byhour)
21 |     res_bot <- sqrt(mse_bot_byhour)
22 |   }else if(measure == "QS"){
23 |     res_agg <- total_qscores_agg
24 |     res_bot <- total_qscores_bot
25 |   }else{
26 |     stop("error")
27 |   }
28 |   
29 |   if(do.skill){
30 |     mat_agg_skill <- sapply(seq_along(agg_methods), function(iaggmethod){
31 |       (res_agg[, match("BASE", agg_methods), ] - res_agg[, iaggmethod, ])/res_agg[, match("BASE", agg_methods), ]
32 |     }, simplify = 'array')
33 |     
34 |     mat_bot_skill <- sapply(seq_along(bot_methods), function(ibotgmethod){
35 |       (res_bot[, match("BASE", bot_methods), ] - res_bot[, ibotgmethod, ])/res_bot[, match("BASE", bot_methods), ]
36 |     }, simplify = 'array')
37 |     
38 |     #browser()
39 |     res_agg <- aperm(mat_agg_skill, c(1, 3, 2))
40 |     res_bot <- aperm(mat_bot_skill, c(1, 3, 2))
41 |   }
42 |   
43 |   list(res_agg = res_agg, res_bot = res_bot)
44 | }
45 | 
46 | to_aggname <- function(algo){
47 |   if(algo %in% c("NAIVEBU", "PERMBU"))
48 |   {
49 |     res <- "BASE"
50 |   }else if(algo %in% c("PERMBU-MINT", "NAIVEBU-MINT")){
51 |     # res <- "MINTshrink"
52 |     res <- "BASE-MINT"
53 |   }else{
54 |     res <- algo
55 |   } 
56 |   res
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/smart-meters/code/run_aggregation.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | full_path=$(realpath $0)
 4 | dir_path=$(dirname $full_path)
 5 | mypath=$(dirname $dir_path )
 6 | 
 7 | rscript="aggregation.R"
 8 | 
 9 | njobs=12
10 | nperjobs=368
11 | 
12 | allijobs=$(seq 1 $njobs )
13 | 
14 | for ijob in ${allijobs[@]}
15 | do
16 |   start=$(( 0 + ($ijob - 1) * $nperjobs + 1 ))
17 |   end=$(( 0 + ($ijob - 1) * $nperjobs + $nperjobs ))
18 |   allidtest=( $(seq $start $end ) )
19 | 
20 |   echo "${allidtest[@]}"
21 |   Rscript --vanilla $rscript $ijob ${allidtest[@]} > "$mypath/work/rout/aggregation-$ijob.Rout" 2> "$mypath/work/rout/aggregation-$ijob.err" &
22 | done
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/smart-meters/code/run_basef.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | full_path=$(realpath $0)
 4 | dir_path=$(dirname $full_path)
 5 | mypath=$(dirname $dir_path )
 6 | 
 7 | rscript="basef.R"
 8 | 
 9 | doagg=FALSE &&  tag="bottom" && algo="KD-IC-NML" && njobs=4 && nperjobs=395 # (bottom-level forecasts)
10 | # doagg=TRUE &&  tag="agg" && algo="DETS" && njobs=28 && nperjobs=2         # (aggregate-level forecasts)
11 | 
12 | allijobs=$(seq 1 $njobs )
13 | 
14 | for ijob in ${allijobs[@]}
15 | do
16 |   start=$(( 0 + ($ijob - 1)* ($nperjobs) + 1 ))
17 |   end=$(( 0 + ($ijob - 1)* ($nperjobs) + ($nperjobs) ))
18 |   alliseries=( $(seq $start $end ) )
19 |   
20 |   echo "${alliseries[@]}"
21 |   
22 |   Rscript --vanilla $rscript $algo $doagg ${alliseries[@]} > "$mypath/work/rout/basef-$tag-$algo-$ijob.Rout" 2> "$mypath/work/rout/basef-$tag-$algo-$ijob.err" &
23 | 
24 | done
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/smart-meters/code/utils.R:
--------------------------------------------------------------------------------
  1 | # Various useful functions used by many scripts.
  2 | source("utils_ets.R")
  3 | source("utils_kde.R")
  4 | library(scoringRules)
  5 | 
  6 | getInfoNode <- function(typeinfo)
  7 | {
  8 |   if(typeinfo == "nb_nodes"){
  9 |     info_nodes_agg <- apply(Sagg, 1, sum)
 10 |     info_nodes_bottom <- rep(1, n_bottom)
 11 |   }else if(typeinfo == "kwh"){
 12 |     for(do.agg in c(TRUE, FALSE)){
 13 |       nseries <- ifelse(do.agg, n_agg, n_bottom)
 14 |       x <- numeric(nseries)
 15 |       for(iseries in seq(nseries)){
 16 |         if(do.agg){
 17 |           idseries <- aggSeries[iseries]
 18 |           load(file.path(aggseries.folder, paste("series-", idseries, ".Rdata", sep = "")))
 19 |         }else{
 20 |           idseries <- bottomSeries[iseries]
 21 |           load(file.path(mymeters.folder, paste("mymeter-", idseries, ".Rdata", sep = "")))
 22 |         }
 23 |         #x[iseries] <- mean(demand)
 24 |         x[iseries] <- mean(apply(matrix(demand, nrow = 2), 2, sum))
 25 |       }
 26 |       if(do.agg){
 27 |         info_nodes_agg <- x
 28 |       }else{
 29 |         info_nodes_bottom <- x
 30 |       }
 31 |     }
 32 |   }
 33 |   list(info_nodes_agg = info_nodes_agg, info_nodes_bottom = info_nodes_bottom)
 34 | }
 35 | 
 36 | gtop <- function(ycheck, R, nagg, ntotal, Sagg, P_bu){
 37 |   Rinv <- solve(R) #Rinv <- diag(1 / sqrt(weights_GTOP)) 
 38 |   A <- R
 39 |   A2 <- t(A) %*% A # A2 = R^T %*% R 
 40 |   
 41 |   dvec <- A2 %*% ycheck
 42 |   Dmat <- Rinv
 43 |   Amat <- diag(ntotal)[seq(nagg), ] - Sagg %*% P_bu 
 44 |   meq <- nagg
 45 |   Amat <- t(Amat)
 46 |   res <- solve.QP(Dmat, dvec, Amat, bvec = rep(0, meq), meq = meq, factorized = TRUE)
 47 |   #print(res)
 48 |   yproj <- res$solution
 49 | }
 50 | 
 51 | fourier.series = function(t,terms,period)
 52 | {
 53 |   n = length(t)
 54 |   X = matrix(NA, nrow=n, ncol=2*terms)
 55 |   for(i in 1:terms)
 56 |   {
 57 |     X[,2*i-1] = sin(2*pi*i*t/period)
 58 |     X[,2*i]   = cos(2*pi*i*t/period)
 59 |   }
 60 |   colnames(X) = paste(c("S","C"),rep(1:terms,rep(2,terms)),sep="")
 61 |   return(X)
 62 | }
 63 | 
 64 | backtransform_log <- function(x, fvar){
 65 |   exp(x) * (1 + 0.5 * fvar)
 66 | }
 67 | 
 68 | 
 69 | mint_betastar <- function(W, y_hat){
 70 |   MAT1 <- W %*% U
 71 |   MAT2 <- crossprod(U,MAT1)
 72 |   MAT3 <- tcrossprod(solve(MAT2), U)
 73 |   C1 <- J %*% MAT1
 74 |   C2 <- MAT3 %*% y_hat
 75 |   adj <- C1 %*% C2
 76 |   -adj
 77 | }
 78 | 
 79 | mint_pmatrix <- function(W){
 80 |   MAT1 <- W %*% U
 81 |   MAT2 <- crossprod(U,MAT1)
 82 |   MAT3 <- tcrossprod(solve(MAT2), U)
 83 |   C1 <- J %*% MAT1
 84 |   J - C1 %*% MAT3
 85 | }
 86 | 
 87 | hasnoBottom <- function(algo){
 88 |   grepl("BU", algo) & !grepl("NNLS", algo) 
 89 | }
 90 | 
 91 | getInfo <- function(idtest){
 92 |   iday <- floor((idtest-1)/48) + 1
 93 |   hour <- (idtest -1)%%48 + 1
 94 |   list(iday = iday, hour = hour)
 95 | }
 96 | 
 97 | crps_mixture <- function(mus, vars, weights, x_query){
 98 |   M <- length(mus)
 99 |   
100 |   sigmas <- sqrt(vars); x_centered <- (x_query - mus)
101 |   
102 |   #comp1 <- sum(2 * sigmas * dnorm(x_centered / sigmas) + x_centered * (2 * pnorm(x_centered / sigmas) - 1))/M
103 |   comp1_part1 <- 2 * sigmas * dnorm(x_centered / sigmas) + x_centered * (2 * pnorm(x_centered / sigmas) - 1)
104 |   comp1 <- sum(weights * comp1_part1)
105 |   
106 |   ids <- permutations(n = M, r = 2, repeats.allowed=T)
107 |   
108 |   mudiffs <- mus[ids[, 1]] - mus[ids[, 2]]
109 |   varsums <- vars[ids[, 1]] + vars[ids[, 2]]
110 |   wproducts <- weights[ids[, 1]] * weights[ids[, 2]]
111 |   
112 |   sigmasums <- sqrt(varsums)
113 |   comp2_part1 <- 2 * sigmasums * dnorm(mudiffs / sigmasums) + mudiffs * (2 * pnorm(mudiffs / sigmasums) - 1)
114 |   comp2 <- sum(wproducts * comp2_part1)
115 |   
116 |   #comp2 <- (1/M^2) * sum(2 * sigmasums * dnorm(mudiffs / sigmasums) + 
117 |   #                         mudiffs * (2 * pnorm(mudiffs / sigmasums) - 1))
118 |   
119 |   comp1 - 0.5 * comp2
120 | }
121 | 
122 | crps_sampling <- function(X, obs){
123 |   X <- sample(X)
124 |   Xprime <- diff(X)
125 |   mean(abs(X - obs)) - 0.5 * mean(abs(Xprime))
126 | }
127 | 
128 | 
129 | 
130 | getfromlist <- function(mylist, item = c("crps", "residuals", "squared_error", "q_hat", "tauhat", "mu_hat", "var_hat")){
131 |   lapply(mylist, function(daylist){
132 |     sapply(daylist, function(hourlist){
133 |       hourlist[[item]]
134 |     }, simplify = "array")
135 |   })
136 | }
137 | 
138 | 
139 | getItem <- function(mylist, item, order_hours){
140 |   item_night <- getfromlist(mylist$res_nighthours, item)
141 |   item_day   <- getfromlist(mylist$res_dayhours, item)
142 |   if(length(dim(item_night)) == 3){
143 |     item_all <- abind(item_night, item_day, along = 2)
144 |     item_all <- item_all[, order_hours, ]
145 |     res <- lapply(seq(dim(item_all)[3]), function(iday){
146 |       item_all[, , iday]
147 |     })
148 |   }else if(length(dim(item_night)) == 2){
149 |     item_all <- rbind(item_night, item_day)
150 |     item_all <- item_all[order_hours, ]
151 |     res <- lapply(seq(ncol(item_all)), function(iday){
152 |       item_all[, iday]
153 |     })
154 |   }
155 |   res
156 | }
157 | quantile_loss <- function(y, f, tau){
158 |   mean(tau*(y - f)*((y - f) >= 0) - (1-tau)*(y - f)*((y - f) < 0))
159 | }
160 | 
161 | check_function <- function(y, f, tau){
162 |   tau*(y - f)*((y - f) >= 0) - (1-tau)*(y - f)*((y - f) < 0)
163 | }
164 | 
165 | 
166 | makeSuperposed<-function(datatable,maxy,colorvar,maintitle=NULL,xlab,ylab,do.plot=TRUE, densities = NULL)
167 | {
168 |   if(length(colorvar)!=ncol(datatable)-1)
169 |     stop("Error color !")
170 |   
171 |   if(!is.null(densities) && length(densities) != ncol(datatable)-1 ){
172 |     stop("Error densities ! ")
173 |   }
174 |   
175 |   #ylim<-c(0,1.1*max(maxy,apply(datatable[,-1],1,sum)))
176 |   ylim<-c(0,maxy)
177 |   xx <- c(datatable[,1], rev(datatable[,1]))
178 |   
179 |   if(do.plot){
180 |     plot(x=datatable[,1], y=datatable[,2], ylim=ylim,type='l',ylab=ylab, xlab=xlab, main=maintitle)
181 |     #		plot(x=datatable[,1], y=datatable[,2], ylim=ylim,type='l',ylab=ylab, xlab=xlab, main=maintitle)
182 |     
183 |   }
184 |   
185 |   a<-datatable[,2]
186 |   yysrc2 <- c(rep(0, nrow(datatable)), rev(a))
187 |   
188 |   allvar <- seq(2,ncol(datatable))
189 |   for(variable in allvar)
190 |   {
191 |     id <- which(variable==allvar)
192 |     
193 |     mydensity <- NULL
194 |     if(!is.null(densities)){
195 |       mydensity <- densities[id]
196 |       if(is.na(mydensity))
197 |         mydensity <- NULL
198 |     }
199 |     
200 |     polygon(xx, yysrc2, col=colorvar[variable-1],border=NA, density = mydensity)
201 |     if(variable != ncol(datatable))
202 |     {
203 |       b<-rev(apply(datatable[,2:(variable+1),drop=F],1,sum))
204 |       yysrc2 <- c(a,b)
205 |       a<-rev(b)
206 |     }
207 |   }
208 |   
209 | }
210 | 
211 | ########
212 | plotQF <- function(quantilef, obs, taus, id, only.future = FALSE, ...)
213 | {
214 |   qf <- quantilef[taus, id]
215 |   medianf <- 	quantilef["50%", id]
216 |   future <- obs[id]
217 |   
218 |   # Plotting
219 |   matplot(id, cbind(future, t(qf)), type = "n", ...)
220 |   if(!only.future){
221 |     x <- seq(ncol(qf))
222 |     xx <- c(x, rev(x))
223 |     
224 |     nbrow <- nrow(qf)
225 |     colorvar <- c("grey", "lightblue", "grey")
226 |     idcol <- 1
227 |     for(row in seq(nbrow, 2, by =-1)){
228 |       yy <- c(qf[row, ], rev(qf[row - 1,]))         
229 |       polygon(xx, yy, col=colorvar[idcol],border=NA)
230 |       idcol <- idcol +1
231 |     }
232 |   }
233 |   points(id, future, pch = 20)
234 |   if(!only.future){
235 |     lines(medianf)
236 |   }
237 | }
238 | 
239 | 
240 | allqs <- function(qf, obs, taus){
241 |   allqs <- NULL
242 |   for(id in seq_along(taus)){
243 |     alpha <- taus[id]
244 |     #print(alpha)
245 |     qs <- quantileScore(obs, qf[id, ], alpha, breaks = c(-10, union(quantile(qf[id, ]), quantile(qf[id, ])))   )$qs.orig
246 |     allqs <- c(allqs, qs)
247 |     
248 |   }
249 |   allqs
250 | }
251 | 
252 | 


--------------------------------------------------------------------------------
/smart-meters/code/utils_ets.R:
--------------------------------------------------------------------------------
  1 | # Functions needed to compute exponential smoothing forecasts.
  2 | iterate <- function(theta, y, e_0, l_0, d_0, w_0, do.forecast = FALSE){
  3 |   phi   <- theta[1]
  4 |   alpha <- theta[2]
  5 |   delta <- theta[3]
  6 |   omega <- theta[4]
  7 |   
  8 |   n <- length(y)
  9 |   yhat <- e <- l <- d <- w <- numeric(n) + NA
 10 |   if(do.forecast){
 11 |     H <- 48
 12 |     stopifnot(H <= 48)
 13 |     nf <- n + H   
 14 |     ysim <-  yhat <- e <- l <- d <- w <- numeric(nf) + NA
 15 |   }
 16 |   
 17 |   
 18 |   yhat_local_0 <- e_local_0 <- l_local_0 <- d_local_0 <- w_local_0 <- z <- numeric(m_2 * 2) + NA
 19 |   e_local_0[seq(1, m_2)] <- e_0
 20 |   l_local_0[seq(1, m_2)] <- l_0
 21 |   d_local_0[seq(1, m_2)] <- d_0
 22 |   w_local_0[seq(1, m_2)] <- w_0
 23 |   z[seq(m_2 + 1, 2 * m_2)] <- y[seq(m_2)]
 24 |   
 25 |   for(i in seq(m_2 + 1, 2 * m_2)){
 26 |     
 27 |     yhat_local_0[i]   <- l_local_0[i - 1] + d_local_0[i - m_1] + w_local_0[i - m_2] + phi * e_local_0[i - 1]
 28 |     e_local_0[i] <- z[i] - (l_local_0[i - 1] + d_local_0[i - m_1] + w_local_0[i - m_2]) # added - m_2 for y
 29 |     l_local_0[i] <- l_local_0[i - 1] + alpha * e_local_0[i]
 30 |     d_local_0[i] <- d_local_0[i - m_1] + delta * e_local_0[i]
 31 |     w_local_0[i] <- w_local_0[i - m_2] + omega * e_local_0[i]
 32 |   }
 33 |   yhat[seq(m_2)]   <- yhat_local_0[m_2 + seq(1, m_2)]
 34 |   e[seq(m_2)] <- e_local_0[m_2 + seq(1, m_2)]
 35 |   l[seq(m_2)] <- l_local_0[m_2 + seq(1, m_2)]
 36 |   d[seq(m_2)] <- d_local_0[m_2 + seq(1, m_2)]
 37 |   w[seq(m_2)] <- w_local_0[m_2 + seq(1, m_2)]
 38 |   
 39 |   for(i in seq(m_2 + 1, n)){
 40 |     yhat[i]   <- l[i - 1] + d[i - m_1] + w[i - m_2] + phi * e[i - 1]
 41 |     e[i] <- y[i] - (l[i - 1] + d[i - m_1] + w[i - m_2])
 42 |     l[i] <- l[i - 1] + alpha * e[i]
 43 |     d[i] <- d[i - m_1] + delta * e[i]
 44 |     w[i] <- w[i - m_2] + omega * e[i]
 45 |   }
 46 |   
 47 |   # mean forecast
 48 |   if(do.forecast){
 49 |     
 50 |     for(h in seq(H)){
 51 |       i <- n 
 52 |       stopifnot(h <= m_1)
 53 |       # yhat[i + h] <- l[i] + (alpha * phi * (1 - phi^(h - 1))) / ((1 - phi) * e[i])  + d[i - m_1 + h] + w[i - m_2 + h] + phi^h * e[i]
 54 |       yhat[i + h] <- l[i] + (alpha * phi * (1 - phi^(h - 1)) / (1 - phi)) * e[i]  + d[i - m_1 + h] + w[i - m_2 + h] + phi^h * e[i]
 55 |     }
 56 |     mf <- yhat[n + seq(H)]
 57 |   }
 58 |   
 59 |   # sample paths + quantile forecast
 60 |   if(do.forecast){
 61 |     K <- 5000
 62 |     samples <- matrix(NA, nrow = K, ncol = H)
 63 |     
 64 |     residuals <- y - head(yhat, -H)
 65 |     mat_residuals <- matrix(residuals, ncol = 48, byrow = T)
 66 |     # bootstrap vectors of size 48 from the residuals
 67 |     ind <- sample(seq(nrow(mat_residuals)), K, replace = TRUE)
 68 |     
 69 |     for(k in seq(K)){
 70 |       varepsilon <- mat_residuals[ind[k], ]
 71 |       for(h in seq(H)){
 72 |         i <- n + h
 73 |         ysim[i]   <- l[i - 1] + d[i - m_1] + w[i - m_2] + phi * e[i - 1] + varepsilon[h]
 74 |         e[i] <- ysim[i] - (l[i - 1] + d[i - m_1] + w[i - m_2])
 75 |         l[i] <- l[i - 1] + alpha * e[i]
 76 |         d[i] <- d[i - m_1] + delta * e[i]
 77 |         w[i] <- w[i - m_2] + omega * e[i]
 78 |       }
 79 |       samples[k, ] <- ysim[n + seq(H)]
 80 |     }
 81 |     qf <- apply(samples, 2, quantile, prob = taus)
 82 |     # matplot(t(qf), type = 'l', lty = 1)
 83 |     mfsample <- apply(samples, 2, mean)
 84 |     
 85 |     yhat <- head(yhat, -H)
 86 |     e    <- head(e, -H)
 87 |     l    <- head(l, -H)
 88 |     d    <- head(d, -H) 
 89 |     w    <- head(w, -H)
 90 |   }
 91 |   
 92 |   if(do.forecast){
 93 |     RET <- list(yhat = yhat, e = e, l = l, d = d, w = w, mf = mf, qf = qf, residuals = residuals, mfsample = mfsample)
 94 |   }else{
 95 |     RET <- list(yhat = yhat)
 96 |   }
 97 |   RET
 98 | }
 99 | 
100 | func_to_optimize <- function(theta, y, e_0, l_0, d_0, w_0, do.forecast){
101 |   obj <- iterate(theta, y, e_0, l_0, d_0, w_0, do.forecast)
102 |   mean((y - obj$yhat)^2)
103 | }
104 | 
105 | 


--------------------------------------------------------------------------------
/smart-meters/code/utils_hts.R:
--------------------------------------------------------------------------------
 1 | # Functions needed to compute MinT forecasts.
 2 | 
 3 | shrink.estim <- function(x, tar)
 4 | {
 5 |   if (is.matrix(x) == TRUE && is.numeric(x) == FALSE)
 6 |     stop("The data matrix must be numeric!")
 7 |   p <- ncol(x)
 8 |   n <- nrow(x)
 9 |   
10 |   covm <- crossprod(x) / n
11 |   corm <- cov2cor(covm)
12 |   xs <- scale(x, center = FALSE, scale = sqrt(diag(covm)))
13 |   v <- (1/(n * (n - 1))) * (crossprod(xs^2) - 1/n * (crossprod(xs))^2)
14 |   diag(v) <- 0
15 |   corapn <- cov2cor(tar)
16 |   d <- (corm - corapn)^2
17 |   lambda <- sum(v)/sum(d)
18 |   lambda <- max(min(lambda, 1), 0)
19 |   shrink.cov <- lambda * tar + (1 - lambda) * covm
20 |   return(list(shrink.cov = shrink.cov, lambda = lambda ))
21 | }
22 | 
23 | vec_w <- function(x){
24 |   n <- nrow(x)
25 |   apply(x, 2, crossprod) / n
26 | }
27 | lowerD <- function(x)
28 | {
29 |   n <- nrow(x)
30 |   return(diag(vec_w(x)))
31 | }
32 | 


--------------------------------------------------------------------------------
/smart-meters/code/utils_kde.R:
--------------------------------------------------------------------------------
  1 | # Functions needed to compute forecasts based on kernel density estimates.
  2 | 
  3 | predictkde <- function(task = c("learning", "testing", "insample_info"), selected_bandwiths = NULL, selected_lambdas = NULL){
  4 |   
  5 |   n_past_obs <- n_past_obs_kd
  6 |   
  7 |   if(task == "learning"){
  8 |     
  9 |     ids_past   <- tail(train$id, n_past_obs)
 10 |     ids_future <- validation$id
 11 |     
 12 |     ##### Bandwith interval #####
 13 |     #n_base <- length(train$id)
 14 |     n_base <- n_past_obs
 15 |     if(grepl("KD-D", algo)){
 16 |       n_approx <- n_base/48
 17 |     }else if(grepl("KD-IC", algo)){
 18 |       #n_approx <- n_base/144 # 144 = 48 * 3
 19 |       n_approx <- (n_base/336)*4  #(should be 5 for weekdays and 3 for weekends. So (3+5)/2 = 4)
 20 |     }else{
 21 |       stop("error in algo")
 22 |     }
 23 |     
 24 |     n_approx <- 8
 25 |     
 26 |     stopifnot(!is.null(n_approx))
 27 |     
 28 |     if(mykernel == "normal" || mykernel == "truncated"){
 29 |       x_samples <- sample(demand[ids_past], n_approx)
 30 |     }else if(mykernel == "lognormal"){
 31 |       x_samples <- log(sample(demand[ids_past], n_approx))
 32 |     }
 33 |     
 34 |     res <- sapply(seq(10), function(l){
 35 |       sapply(seq(48), function(h){bw.nrd(sample(demand[ids_past][seq(h, length(ids_past), by = 48)], n_approx))})
 36 |     })
 37 |     bw_normal <- max(apply(res, 1, mean))
 38 |     
 39 |     bandwiths_vec <- seq(min_bandwith, bw_normal, length.out = 5)
 40 |     bandwiths_subvec1 <- seq(bandwiths_vec[1], bandwiths_vec[2], length.out = 5)
 41 |     bandwiths_subvec2 <- seq(bandwiths_vec[2], bandwiths_vec[3], length.out = 5)
 42 |     bandwiths <- c(bandwiths_subvec1, bandwiths_subvec2, bandwiths_vec[-seq(3)])
 43 |     #bandwiths <-  seq(10^-4, bw_normal, length.out = 15)
 44 |     
 45 |     stopifnot(all(bandwiths>0))
 46 |   
 47 |     nb_futuredays <- length(seq_validation_interval)/48
 48 |     
 49 |     #lambdas <- c(0.8, 0.90, 0.95, 1)
 50 |     lambdas <- c(0.2, 0.4, 0.6, 0.8, 0.90, 0.95, 1)
 51 |     
 52 |   }else if(task == "testing"){
 53 |     stopifnot(length(selected_bandwiths) == 3)
 54 |     
 55 |     ids_past   <- tail(learn$id, n_past_obs)
 56 |     ids_future <- test$id
 57 |     
 58 |     nb_futuredays <- length(seq_testing_interval)/48
 59 |   }else if(task == "insample_info"){
 60 |     stopifnot(length(selected_bandwiths) == 3)
 61 |     
 62 |     ids_past   <- head(learn$id, n_past_obs)
 63 |     ids_future <- tail(learn$id, -n_past_obs)
 64 |     nb_futuredays <- length(ids_future)/48
 65 |   }
 66 |   
 67 |   results <- vector("list", nb_futuredays)
 68 |   
 69 |   ic_days <- calendar$periodOfCycle[ids_future][seq(1, length(ids_future), by = 48)]
 70 |  
 71 |   
 72 |   for(id_future_day in seq(1, nb_futuredays)){
 73 | 
 74 |     offset_nhours <- (id_future_day - 1) * 48
 75 |     
 76 |     ids_future_hours <- ids_future[offset_nhours + seq(1, 48)] 
 77 |     
 78 |     
 79 |     if(offset_nhours > 0){
 80 |       #ids_past_actual <- c(tail(ids_past, -offset_nhours), head(ids_future, offset_nhours))
 81 |       ids_past_actual <- c(ids_past, ids_future)[offset_nhours + seq(n_past_obs)]
 82 |     }else{
 83 |       ids_past_actual <- ids_past
 84 |     }
 85 |     
 86 |     # if day is IC 1, 2, ou 3 use different bandwiths
 87 |     # mybandwith is eiter a vector or a number
 88 |     if(task == "testing" || task == "insample_info"){
 89 |       ic_day <-  ic_days[id_future_day]
 90 |       bandwiths <- selected_bandwiths[ic_day]
 91 |       lambdas <- selected_lambdas[ic_day]
 92 |     }
 93 |     
 94 |     #  results[[id_future_day]] <- lapply(ids_future_hours, function(id){kde(id, ids_past_actual, bandwiths, lambda, task)})
 95 |     if(length(lambdas) == 1){
 96 |       results[[id_future_day]] <-  lapply(ids_future_hours, function(id){kde(id, ids_past_actual, bandwiths, lambdas, task)})
 97 |     }else{
 98 |       results[[id_future_day]] <- lapply(lambdas, function(lambda){
 99 |         lapply(ids_future_hours, function(id){kde(id, ids_past_actual, bandwiths, lambda, task)})
100 |       })
101 |     }
102 |     
103 |   }	
104 |   list(results = results, bandwiths = bandwiths, lambdas = lambdas, ic_days = ic_days)
105 | }
106 | 
107 | ######################################################
108 | kde <- function(id_query, ids_data, bandwiths, lambda, task){
109 |   #print(id_query)
110 |   ####
111 |   if(algo == "KD-U"){
112 |     ids_data_kept <- ids_data
113 |   }else if(grepl("KD-D", algo)){
114 |     ids_data_kept <- ids_data[which(calendar$periodOfDay[ids_data] == calendar$periodOfDay[id_query])]
115 |   }else if(grepl("KD-IC", algo)){
116 |     
117 |     if(calendar$periodOfCycle[id_query] == 1){ # MONDAY TO FRIDAY
118 |       is_selected <- calendar$periodOfCycle[ids_data] == calendar$periodOfCycle[id_query] & 
119 |         calendar$periodOfDay[ids_data] == calendar$periodOfDay[id_query]
120 |     }else{ # SATURDAY AND SUNDAY
121 |       if(calendar$periodOfDay[id_query] == 1){
122 |         is_selected <- (calendar$periodOfCycle[ids_data] == calendar$periodOfCycle[id_query] & 
123 |                           calendar$periodOfDay[ids_data] %in% (calendar$periodOfDay[id_query] + seq(0, 1))) | 
124 |           (calendar$dweek[ids_data] == (calendar$dweek[id_query] - 1) & calendar$periodOfDay[ids_data] == 48)
125 |       }else if(calendar$periodOfDay[id_query] == 48){
126 |         is_selected <- (calendar$periodOfCycle[ids_data] == calendar$periodOfCycle[id_query] & 
127 |                           calendar$periodOfDay[ids_data] %in% (calendar$periodOfDay[id_query] + seq(-1, 0))) | 
128 |           (calendar$dweek[ids_data] == (calendar$dweek[id_query]%%7 + 1) & calendar$periodOfDay[ids_data] == 1)
129 |       }else{
130 |         is_selected <- calendar$periodOfCycle[ids_data] == calendar$periodOfCycle[id_query] & 
131 |           calendar$periodOfDay[ids_data] %in% (calendar$periodOfDay[id_query] + seq(-1, 1))
132 |       }
133 |     }
134 |     
135 |     is_selected <- calendar$periodOfWeek[ids_data] == calendar$periodOfWeek[id_query]
136 |     
137 |     ids_data_kept <- ids_data[which(is_selected)]
138 |     n <- length(ids_data_kept)
139 |     normalized_weights <- rep(1, n)/n
140 |     
141 |     do.weighting <- TRUE
142 |     if(do.weighting){
143 |       weights_all <- lambda^floor((tail(ids_data) - ids_data)/336)
144 |       weights_selected <- weights_all[which(is_selected)]
145 |       normalized_weights <- weights_selected/sum(weights_selected)
146 |     }
147 |   }
148 | 
149 |   
150 |   x <- demand[ids_data_kept]
151 |   ####
152 |   
153 |   n <- length(x)
154 |   minx <- min(x)
155 |   maxx <- max(x)
156 |   #logx <- log(x)
157 |   
158 |   lower_bound <- 0  #minx
159 |   r <- 1 # 3
160 |   
161 |   #xgrid <- c(seq(from = minx , to = quantile(x, .9), length = 90), seq(from = quantile(x, .91), to = maxx  , length = 10))
162 |   xgrid <- c(0, 
163 |              seq(from = minx , to = quantile(x, .9), length = 190), 
164 |              seq(from = quantile(x, .91), to = maxx, length = 10),
165 |              seq(from = maxx, to = maxx + r* max(bandwiths), length = 3))
166 |   xgrid <- sort(unique(xgrid))
167 |   
168 |   vec_crps <- residuals <- squared_error <- mu_hat <- var_hat <- numeric(length(bandwiths))
169 |   for(i in seq_along(bandwiths)){
170 |     h <- bandwiths[i]
171 |     vech <- rep(h, length(x))
172 |     
173 |     if(mykernel == "normal"){
174 |       
175 |       ids_boundary <- which(x <= (lower_bound + r * h))
176 |       vech[ids_boundary] <- pmax((x[ids_boundary] - lower_bound) / r, min_bandwith) 
177 | 
178 |     }
179 |     
180 |     cdfs <- sapply(seq(length(x)), function(i){ 	
181 |       xi <- x[i]		
182 |       if(mykernel == "normal"){
183 |         #pnorm((xgrid - obs)/h)/(n)
184 |         #pnorm((xgrid - xi)/vech[i])/(n)
185 |         pnorm((xgrid - xi)/vech[i]) * normalized_weights[i]
186 |       }else if(mykernel == "lognormal"){
187 |         plnorm(xgrid, meanlog = log(xi), sdlog = vech[i], lower.tail = TRUE, log.p = FALSE)/n
188 |       }else if(mykernel == "truncated"){
189 |         ptnorm(xgrid, mean = xi, sd = vech[i], lower = lowerx, upper = upperx, lower.tail = TRUE, log.p = FALSE)/n
190 |       }
191 |     })
192 |     cdf <- rowSums(cdfs)
193 |     
194 |     if(task == "insample_info"){
195 |      
196 |       mycdf <- function(xq){
197 |         sum(sapply(seq(length(x)), function(i){ 	
198 |           xi <- x[i]
199 |           pnorm((xq - xi)/vech[i]) * normalized_weights[i]
200 |         }))
201 |       }
202 |     }
203 |     
204 |     
205 |     # Mean forecasts 
206 |     if(mykernel == "normal"){
207 |       all_mus <- x
208 |     }else if(mykernel == "lognormal"){
209 |       all_mus <- sapply(x, function(xi){ exp(log(xi) + (h^2)/2) })
210 |     }else if(mykernel == "truncated"){
211 |       all_mus <- sapply(x, function(xi){ 	
212 |         alpha <- (lowerx - xi)/h
213 |         beta  <- (upperx - xi)/h
214 |         xi + ((dnorm(alpha) - dnorm(beta))*h) / (pnorm(beta) - pnorm(alpha))	
215 |       })
216 |     }
217 |     #mu_hat[i] <- sum(all_mus)/n
218 |     mu_hat[i] <- sum(all_mus * normalized_weights)
219 |     
220 |     if(mykernel == "normal"){
221 |       #var_hat[i] <- sum(x^2)/n + sum(vech^2)/n - (mu_hat[i])^2
222 |       var_hat[i] <- sum(normalized_weights * ((x - mu_hat[i])^2 + vech^2))
223 |     }
224 |     
225 |     obs <- demand[id_query]
226 |     
227 |     
228 |     if(task == "insample_info"){
229 |       upit <- mycdf(obs)
230 |       #upit <- ecdf(x)(obs)
231 |     }
232 |     
233 |     residuals[i] <- obs - mu_hat[i] 
234 |     squared_error[i] <- (residuals[i])^2 
235 |     
236 |     if(task != "insample_info"){
237 |       if(mykernel == "normal"){
238 |         
239 |         vec_crps[i] <- crps_mixture(x, vech, normalized_weights, obs)
240 |         
241 |       }else{
242 |         invkcdf <- approxfun(cdf, xgrid, rule = 2)
243 |         X1 <- invkcdf(runif(1000))
244 |         vec_crps[i] <- crps_sampling(X1, obs)
245 |         #vec_crps[i] <- mean(abs(X1 - obs)) - 0.5 * mean(abs(X1 - invkcdf(u2)))
246 |       }
247 |     }
248 |     
249 |     if(task != "learning"){
250 |       invcdf <- approxfun(cdf, xgrid, rule = 2)
251 |       q_hat <- invcdf(taus)
252 |     }
253 |     
254 |   }# bandwiths
255 |   
256 |   
257 |   if(task == "learning"){
258 |     ret <- vec_crps #list(crps = crps)
259 |   }else if(task == "testing"){
260 |     ret <- list(crps = vec_crps, squared_error = squared_error, q_hat = q_hat, mu_hat = mu_hat, var_hat = var_hat)
261 |   }else if(task == "insample_info"){
262 |     ret <- list(residuals = residuals, q_hat = q_hat, mu_hat = mu_hat, var_hat = var_hat, upit = upit)
263 |   }else{
264 |     stop("ERROR ...")
265 |   }
266 |   ret
267 | }


--------------------------------------------------------------------------------