├── 01-intro.R ├── 02-graphics.R ├── 03-decomposition.R ├── 04-features.R ├── 05-toolbox.R ├── 06-judgmental.R ├── 07-regression.R ├── 08-exponential-smoothing.R ├── 09-arima.R ├── 10-dynamic-regression.R ├── 11-hierarchical-forecasting.R ├── 12-advanced.R ├── 13-practical.R ├── README.md └── before-each-chapter.R /01-intro.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----beer, fig.cap='Australian quarterly beer production: 2000Q1–2010Q2, with two years of forecasts.', echo=FALSE, message=FALSE---- 4 | ausbeer2 <- aus_production %>% 5 | filter(year(Quarter) >= 2000) 6 | ausbeer2 %>% 7 | model(ETS(Beer)) %>% 8 | forecast() %>% 9 | autoplot(ausbeer2) + 10 | labs(y = "Megalitres", title = "Australian beer production") 11 | 12 | ## ----austa1, echo=FALSE, message=FALSE, warning=FALSE, fig.cap="Total international visitors to Australia (1980-2015) along with ten possible futures."---- 13 | austa <- readr::read_csv("https://OTexts.com/fpp3/extrafiles/austa.csv") %>% 14 | as_tsibble(index = Year) 15 | sim <- austa %>% 16 | model(ETS()) %>% 17 | generate(h = 10, times = 10) %>% 18 | mutate(replicate = factor(.rep, levels = 1:10, labels = paste("Future", 1:10))) 19 | p1 <- ggplot(austa, aes(x = Year)) + 20 | geom_line(aes(y = Visitors, colour = "Data")) + 21 | geom_line(aes(y = .sim, colour = replicate), data = sim) + 22 | labs(y = "Visitors (millions)", 23 | title = "Total international arrivals to Australia") + 24 | scale_colour_manual( 25 | values = c("#000000", rainbow(10)), 26 | breaks = c("Data", paste("Future", 1:10)), 27 | name = " " 28 | ) 29 | p2 <- austa %>% 30 | model(ETS = ETS(Visitors)) %>% 31 | forecast(h = "10 years") %>% 32 | autoplot(austa) + 33 | labs(y = "Visitors (millions)", 34 | title = "Forecasts of total international arrivals to Australia") 35 | aligned_plots <- patchwork::align_plots(p1,p2) 36 | aligned_plots[[1]] 37 | 38 | ## ----austa2, dependson="austa1", echo=FALSE, message=FALSE, warning=FALSE, fig.cap="(ref:figcapausta2)"---- 39 | aligned_plots[[2]] 40 | 41 | -------------------------------------------------------------------------------- /02-graphics.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----tstable, echo=FALSE---------------------------------------------------------------------------------- 4 | x <- c(123, 39, 78, 52, 110) 5 | yr <- 2015:2019 6 | knitr::kable(tibble(Year = yr, Observation = x), booktabs = TRUE) 7 | 8 | ## ----first-tsibble---------------------------------------------------------------------------------------- 9 | y <- tsibble( 10 | Year = 2015:2019, 11 | Observation = c(123, 39, 78, 52, 110), 12 | index = Year 13 | ) 14 | 15 | ## ----tstablemonth, echo=FALSE----------------------------------------------------------------------------- 16 | z <- tibble(Month = paste(2019, month.abb[1:5]), Observation = c(50, 23, 34, 30, 25)) 17 | # knitr::kable(z, booktabs=TRUE) 18 | 19 | ## ----tstablemonth2---------------------------------------------------------------------------------------- 20 | z 21 | 22 | ## ----month-tsibble---------------------------------------------------------------------------------------- 23 | z %>% 24 | mutate(Month = yearmonth(Month)) %>% 25 | as_tsibble(index = Month) 26 | 27 | ## ----tstable2, echo=FALSE, results=ifelse(html, 'markup', 'asis')----------------------------------------- 28 | tab <- tribble( 29 | ~`Frequency`, ~Function, 30 | "Annual", "`start:end`", 31 | "Quarterly", "`yearquarter()`", 32 | "Monthly", "`yearmonth()`", 33 | "Weekly", "`yearweek()`", 34 | "Daily", "`as_date()`, `ymd()`", 35 | "Sub-daily", "`as_datetime()`, `ymd_hms()`" 36 | ) 37 | if(!html) { 38 | tab <- tab %>% 39 | mutate( 40 | Function = stringr::str_replace(Function, "`","\\\\texttt{"), 41 | Function = stringr::str_replace(Function, "`, `","}, \\\\texttt{"), 42 | Function = stringr::str_replace(Function, "`","}"), 43 | Function = stringr::str_replace_all(Function,"_","\\\\_") 44 | ) 45 | } 46 | tab %>% knitr::kable(booktabs = TRUE, escape=html) 47 | 48 | ## ----tstablekey------------------------------------------------------------------------------------------- 49 | olympic_running 50 | 51 | ## ----distinctfn------------------------------------------------------------------------------------------- 52 | olympic_running %>% distinct(Sex) 53 | 54 | ## ----pbs1------------------------------------------------------------------------------------------------- 55 | PBS 56 | 57 | ## ----pbs2------------------------------------------------------------------------------------------------- 58 | PBS %>% 59 | filter(ATC2 == "A10") 60 | 61 | ## ----pbs3------------------------------------------------------------------------------------------------- 62 | PBS %>% 63 | filter(ATC2 == "A10") %>% 64 | select(Month, Concession, Type, Cost) 65 | 66 | ## ----pbs4------------------------------------------------------------------------------------------------- 67 | PBS %>% 68 | filter(ATC2 == "A10") %>% 69 | select(Month, Concession, Type, Cost) %>% 70 | summarise(TotalC = sum(Cost)) 71 | 72 | ## ----pbs5------------------------------------------------------------------------------------------------- 73 | PBS %>% 74 | filter(ATC2 == "A10") %>% 75 | select(Month, Concession, Type, Cost) %>% 76 | summarise(TotalC = sum(Cost)) %>% 77 | mutate(Cost = TotalC/1e6) 78 | 79 | ## ----a10-------------------------------------------------------------------------------------------------- 80 | PBS %>% 81 | filter(ATC2 == "A10") %>% 82 | select(Month, Concession, Type, Cost) %>% 83 | summarise(TotalC = sum(Cost)) %>% 84 | mutate(Cost = TotalC / 1e6) -> a10 85 | 86 | ## ----prison, echo=FALSE, warning=FALSE, message=FALSE, eval=TRUE------------------------------------------ 87 | prison <- readr::read_csv("https://OTexts.com/fpp3/extrafiles/prison_population.csv") 88 | prison %>% 89 | head(10) %>% 90 | knitr::kable(booktabs = TRUE) 91 | 92 | ## ----prison2, dependson='prison'-------------------------------------------------------------------------- 93 | prison <- prison %>% 94 | mutate(Quarter = yearquarter(Date)) %>% 95 | select(-Date) %>% 96 | as_tsibble(key = c(State, Gender, Legal, Indigenous), 97 | index = Quarter) 98 | 99 | prison 100 | 101 | ## ----freqtable, echo=FALSE, message=FALSE----------------------------------------------------------------- 102 | intervals <- list( 103 | Quarters = tsibble::new_interval(quarter = 1), 104 | Months = tsibble::new_interval(month = 1), 105 | Weeks = tsibble::new_interval(week = 1), 106 | Days = tsibble::new_interval(day = 1), 107 | Hours = tsibble::new_interval(hour = 1), 108 | Minutes = tsibble::new_interval(minute = 1), 109 | Seconds = tsibble::new_interval(second = 1) 110 | ) 111 | 112 | intervals %>% 113 | purrr::map(common_periods) %>% 114 | purrr::map(as.list) %>% 115 | purrr::map_dfr(as_tibble, .id = "Data") %>% 116 | purrr::set_names(., stringr::str_to_sentence(colnames(.))) %>% 117 | select(Data, Minute, Hour, Day, Week, Year) %>% 118 | mutate_all(format, scientific = FALSE, nsmall = 2) %>% 119 | mutate_all(~ gsub(".00", "", ., fixed = TRUE)) %>% 120 | mutate_all(~ gsub(" NA", "", ., fixed = TRUE)) %>% 121 | knitr::kable(booktabs = TRUE) 122 | 123 | ## ----ansett, fig.cap="Weekly economy passenger load on Ansett Airlines."---------------------------------- 124 | melsyd_economy <- ansett %>% 125 | filter(Airports == "MEL-SYD", Class == "Economy") %>% 126 | mutate(Passengers = Passengers/1000) 127 | autoplot(melsyd_economy, Passengers) + 128 | labs(title = "Ansett airlines economy class", 129 | subtitle = "Melbourne-Sydney", 130 | y = "Passengers ('000)") 131 | 132 | ## ----a10plot, fig.cap="Monthly sales of antidiabetic drugs in Australia.", dependson='a10'---------------- 133 | autoplot(a10, Cost) + 134 | labs(y = "$ (millions)", 135 | title = "Australian antidiabetic drug sales") 136 | 137 | ## ----fourexamples, echo=FALSE, fig.cap="Four examples of time series showing different patterns.", fig.env="figure*", warning = FALSE, message=FALSE---- 138 | smallfonts <- theme( 139 | text = element_text(size = 9), 140 | axis.text = element_text(size = 8) 141 | ) 142 | p1 <- fma::hsales %>% 143 | as_tsibble() %>% 144 | autoplot(value) + smallfonts + 145 | labs(y = "Houses (millions)", title = "Sales of new one-family houses, USA") 146 | p2 <- fma::ustreas %>% 147 | as_tsibble() %>% 148 | autoplot(value) + smallfonts + 149 | labs(x = "Day", y = "Number", title = "US treasury bill contracts") 150 | p3 <- aus_production %>% 151 | autoplot(Electricity) + smallfonts + 152 | labs(y = "kWh (billion) ", title = "Australian quarterly electricity production") 153 | p4 <- gafa_stock %>% 154 | filter(Symbol == "GOOG") %>% 155 | autoplot(difference(Close)) + smallfonts + 156 | labs(y = "$US", title = "Daily changes in Google closing stock price") 157 | 158 | (p1 | p2) / (p3 | p4) 159 | 160 | ## ----seasonplot1, fig.cap="Seasonal plot of monthly antidiabetic drug sales in Australia.", dependson='a10', warning=FALSE, echo=FALSE---- 161 | a10 %>% 162 | gg_season(Cost, labels = "both") + 163 | labs(y = "$ (millions)", 164 | title = "Seasonal plot: Antidiabetic drug sales") + 165 | expand_limits(x = ymd(c("1972-12-28", "1973-12-04"))) 166 | 167 | ## ----multipleseasonplots1, warning=FALSE, fig.cap="Seasonal plot showing daily seasonal patterns for Victorian electricity demand.", fig.asp=0.6---- 168 | vic_elec %>% gg_season(Demand, period = "day") + 169 | theme(legend.position = "none") + 170 | labs(y="MWh", title="Electricity demand: Victoria") 171 | 172 | ## ----multipleseasonplots2, warning=FALSE, fig.cap="Seasonal plot showing weekly seasonal patterns for Victorian electricity demand.", fig.asp=0.6---- 173 | vic_elec %>% gg_season(Demand, period = "week") + 174 | theme(legend.position = "none") + 175 | labs(y="MWh", title="Electricity demand: Victoria") 176 | 177 | ## ----multipleseasonplots3, warning=FALSE, fig.cap="Seasonal plot showing yearly seasonal patterns for Victorian electricity demand.", fig.asp=0.6---- 178 | vic_elec %>% gg_season(Demand, period = "year") + 179 | labs(y="MWh", title="Electricity demand: Victoria") 180 | 181 | ## ----subseriesplot, fig.cap="Seasonal subseries plot of monthly antidiabetic drug sales in Australia.", dependson='a10', fig.height=3, fig.width=8, fig.asp=0.375, warning=FALSE---- 182 | a10 %>% 183 | gg_subseries(Cost) + 184 | labs( 185 | y = "$ (millions)", 186 | title = "Australian antidiabetic drug sales" 187 | ) 188 | 189 | ## ----holidays--------------------------------------------------------------------------------------------- 190 | holidays <- tourism %>% 191 | filter(Purpose == "Holiday") %>% 192 | group_by(State) %>% 193 | summarise(Trips = sum(Trips)) 194 | 195 | ## ----holidaysprint---------------------------------------------------------------------------------------- 196 | holidays 197 | 198 | ## ----holidays-plot, echo=TRUE, dependson="holidays", fig.height=3.9, fig.asp=0.5, fig.cap="Time plots of Australian domestic holidays by state."---- 199 | autoplot(holidays, Trips) + 200 | labs(y = "Overnight trips ('000)", 201 | title = "Australian domestic holidays") 202 | 203 | ## ----holidaysseason, fig.height=9, fig.asp=1.3, fig.cap="Season plots of Australian domestic holidays by state.", warning=FALSE---- 204 | gg_season(holidays, Trips) + 205 | labs(y = "Overnight trips ('000)", 206 | title = "Australian domestic holidays") 207 | 208 | ## ----holidayssubseries, fig.height=10, fig.width=8, fig.asp=1.3, fig.cap="Subseries plots of Australian domestic holidays by state.", warning=FALSE---- 209 | holidays %>% 210 | gg_subseries(Trips) + 211 | labs(y = "Overnight trips ('000)", 212 | title = "Australian domestic holidays") 213 | 214 | ## ----edemand, fig.cap="Half hourly electricity demand in Victoria, Australia, for 2014.", fig.height=2.5, fig.asp=0.45---- 215 | vic_elec %>% 216 | filter(year(Time) == 2014) %>% 217 | autoplot(Demand) + 218 | labs(y = "GW", 219 | title = "Half-hourly electricity demand: Victoria") 220 | 221 | ## ----victemp, fig.cap="Half hourly temperature in Melbourne, Australia, for 2014.", fig.height=2.5, fig.asp=0.45---- 222 | vic_elec %>% 223 | filter(year(Time) == 2014) %>% 224 | autoplot(Temperature) + 225 | labs( 226 | y = "Degrees Celsius", 227 | title = "Half-hourly temperatures: Melbourne, Australia" 228 | ) 229 | 230 | ## ----edemand2, fig.cap="Half-hourly electricity demand plotted against temperature for 2014 in Victoria, Australia."---- 231 | vic_elec %>% 232 | filter(year(Time) == 2014) %>% 233 | ggplot(aes(x = Temperature, y = Demand)) + 234 | geom_point() + 235 | labs(x = "Temperature (degrees Celsius)", 236 | y = "Electricity demand (GW)") 237 | 238 | ## ----corr, fig.cap="Examples of data sets with different levels of correlation.", echo=FALSE, warning=FALSE, message=FALSE, fig.width=10, fig.height=4.5, fig.asp=0.55---- 239 | corplot <- function(rho) { 240 | library(mvtnorm) 241 | x <- rmvnorm(100, sigma = matrix(c(1, rho, rho, 1), 2, 2)) 242 | ggplot(as.data.frame(x), aes(x = V1, y = V2)) + 243 | geom_point() + 244 | labs( 245 | x = "", 246 | y = "", 247 | title = paste("Correlation =", sprintf("%.2f", rho)) 248 | ) + 249 | xlim(-3.5, 3.5) + 250 | ylim(-3.5, 3.5) 251 | } 252 | set.seed(12345) 253 | p1 <- corplot(-0.99) 254 | p2 <- corplot(-0.75) 255 | p3 <- corplot(-0.5) 256 | p4 <- corplot(-0.25) 257 | p5 <- corplot(0.99) 258 | p6 <- corplot(0.75) 259 | p7 <- corplot(0.5) 260 | p8 <- corplot(0.25) 261 | 262 | (p1 | p2 | p3 | p4) / (p5 | p6 | p7 | p8) 263 | 264 | ## ----eleccorrelation, include=FALSE----------------------------------------------------------------------- 265 | x <- vic_elec %>% filter(year(Time) == 2014) 266 | eleccor <- cor(x$Temperature, x$Demand) 267 | 268 | ## ----anscombe, fig.cap="(ref:anscombe)", echo=FALSE, fig.asp=1, out.width="55%", fig.height=4,fig.width=4---- 269 | p1 <- ggplot(anscombe, aes(x = x1, y = y1)) + 270 | geom_point() + 271 | labs(x = "x", y = "y") 272 | p2 <- ggplot(anscombe, aes(x = x2, y = y2)) + 273 | geom_point() + 274 | labs(x = "x", y = "y") 275 | p3 <- ggplot(anscombe, aes(x = x3, y = y3)) + 276 | geom_point() + 277 | labs(x = "x", y = "y") 278 | p4 <- ggplot(anscombe, aes(x = x4, y = y4)) + 279 | geom_point() + 280 | labs(x = "x", y = "y") 281 | (p1 | p2) / (p3 | p4) 282 | 283 | ## ----vntimeplots, fig.cap="Quarterly visitor nights for the states and territories of Australia.", fig.asp=1.3, out.width="100%"---- 284 | visitors <- tourism %>% 285 | group_by(State) %>% 286 | summarise(Trips = sum(Trips)) 287 | visitors %>% 288 | ggplot(aes(x = Quarter, y = Trips)) + 289 | geom_line() + 290 | facet_grid(vars(State), scales = "free_y") + 291 | labs(title = "Australian domestic tourism", 292 | y= "Overnight trips ('000)") 293 | 294 | ## ----ScatterMatrixch2, fig.cap="A scatterplot matrix of the quarterly visitor nights in the states and territories of Australia.", fig.asp=1, fig.height=10, fig.width=10, out.width="100%", message=FALSE, fig.env="figure*"---- 295 | visitors %>% 296 | pivot_wider(values_from=Trips, names_from=State) %>% 297 | GGally::ggpairs(columns = 2:9) 298 | 299 | ## ----beerlagplot, fig.cap="Lagged scatterplots for quarterly beer production.", fig.asp=1----------------- 300 | recent_production <- aus_production %>% 301 | filter(year(Quarter) >= 2000) 302 | recent_production %>% 303 | gg_lag(Beer, geom = "point") + 304 | labs(x = "lag(Beer, k)") 305 | 306 | ## ----beeracfraw, dependson='beerlagplot'------------------------------------------------------------------ 307 | recent_production %>% ACF(Beer, lag_max = 9) 308 | 309 | ## ----beeracf, fig.cap="Autocorrelation function of quarterly beer production.", fig.asp=0.3, dependson="beerlagplot"---- 310 | recent_production %>% 311 | ACF(Beer) %>% 312 | autoplot() + labs(title="Australian beer production") 313 | 314 | ## ----acfa10, echo=TRUE, fig.cap="ACF of monthly Australian antidiabetic drug sales.", fig.asp=0.3, dependson="aelec"---- 315 | a10 %>% 316 | ACF(Cost, lag_max = 48) %>% 317 | autoplot() + 318 | labs(title="Australian antidiabetic drug sales") 319 | 320 | ## ----wnoise, fig.cap="A white noise time series.", fig.asp=0.5-------------------------------------------- 321 | set.seed(30) 322 | y <- tsibble(sample = 1:50, wn = rnorm(50), index = sample) 323 | y %>% autoplot(wn) + labs(title = "White noise", y = "") 324 | 325 | ## ----wnoiseacf, fig.cap="Autocorrelation function for the white noise series.", fig.asp=0.3, dependson="wnoise"---- 326 | y %>% 327 | ACF(wn) %>% 328 | autoplot() + labs(title = "White noise") 329 | 330 | ## ----acfguess, fig.asp=0.45, fig.width=10, echo=FALSE, message=FALSE, warning=FALSE, out.width="135%"----- 331 | cowtemp <- as_tsibble(fma::cowtemp) 332 | USAccDeaths <- as_tsibble(USAccDeaths) 333 | AirPassengers <- as_tsibble(AirPassengers) 334 | mink <- as_tsibble(fma::mink) 335 | tp1 <- autoplot(cowtemp, value) + 336 | labs(x = "", y = "chirps per minute", title = "1. Daily temperature of cow") 337 | tp2 <- autoplot(USAccDeaths, value) + 338 | labs(x = "", y = "thousands", title = "2. Monthly accidental deaths") 339 | tp3 <- autoplot(AirPassengers, value) + 340 | labs(x = "", y = "thousands", title = "3. Monthly air passengers") 341 | tp4 <- autoplot(mink, value) + 342 | labs(x = "", y = "thousands", title = "4. Annual mink trappings") 343 | acfb <- ACF(cowtemp, value) %>% 344 | autoplot() + 345 | labs(x = "", title = "B") + 346 | ylim(-0.45, 1) 347 | acfa <- ACF(USAccDeaths, value) %>% 348 | autoplot() + 349 | labs(x = "", title = "A") + 350 | ylim(-0.45, 1) 351 | acfd <- ACF(AirPassengers, value) %>% 352 | autoplot() + 353 | labs(x = "", title = "D") + 354 | ylim(-0.45, 1) 355 | acfc <- ACF(mink, value) %>% 356 | autoplot() + 357 | labs(x = "", title = "C") + 358 | ylim(-0.45, 1) 359 | (tp1 / acfa) | (tp2 / acfb) | (tp3 / acfc) | (tp4 / acfd) 360 | 361 | -------------------------------------------------------------------------------- /03-decomposition.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----gdp-per-capita, fig.cap = "Australian GDP per-capita.", fig.asp=0.45--------------------------------- 4 | global_economy %>% 5 | filter(Country == "Australia") %>% 6 | autoplot(GDP/Population) + 7 | labs(title= "GDP per capita", y = "$US") 8 | 9 | ## ----printretail0, message=FALSE, warning=FALSE----------------------------------------------------------- 10 | print_retail <- aus_retail %>% 11 | filter(Industry == "Newspaper and book retailing") %>% 12 | group_by(Industry) %>% 13 | index_by(Year = year(Month)) %>% 14 | summarise(Turnover = sum(Turnover)) 15 | aus_economy <- global_economy %>% 16 | filter(Code == "AUS") 17 | 18 | ## ----printretail, message=FALSE, warning=FALSE, fig.cap='Turnover for the Australian print media industry in Australian dollars. The "Adjusted" turnover has been adjusted for inflation using the CPI.'---- 19 | print_retail %>% 20 | left_join(aus_economy, by = "Year") %>% 21 | mutate(Adjusted_turnover = Turnover / CPI * 100) %>% 22 | pivot_longer(c(Turnover, Adjusted_turnover), 23 | values_to = "Turnover") %>% 24 | mutate(name = factor(name, 25 | levels=c("Turnover","Adjusted_turnover"))) %>% 26 | ggplot(aes(x = Year, y = Turnover)) + 27 | geom_line() + 28 | facet_grid(name ~ ., scales = "free_y") + 29 | labs(title = "Turnover: Australian print media industry", 30 | y = "$AU") 31 | 32 | ## ----BoxCoxlambda, echo=TRUE, fig.asp=0.5, fig.cap="Transformed Australian quarterly gas production with the $\\lambda$ parameter chosen using the Guerrero method."---- 33 | lambda <- aus_production %>% 34 | features(Gas, features = guerrero) %>% 35 | pull(lambda_guerrero) 36 | aus_production %>% 37 | autoplot(box_cox(Gas, lambda)) + 38 | labs(y = "", 39 | title = latex2exp::TeX(paste0( 40 | "Transformed gas production with $\\lambda$ = ", 41 | round(lambda,2)))) 42 | 43 | ## ----usretailemployment, fig.cap="Total number of persons employed in US retail.", fig.asp=0.5------------ 44 | us_retail_employment <- us_employment %>% 45 | filter(year(Month) >= 1990, Title == "Retail Trade") %>% 46 | select(-Series_ID) 47 | autoplot(us_retail_employment, Employed) + 48 | labs(y = "Persons (thousands)", 49 | title = "Total employment in US retail") 50 | 51 | ## ----empl-stl1, echo=TRUE, dependson='us_retail_employment'----------------------------------------------- 52 | dcmp <- us_retail_employment %>% 53 | model(stl = STL(Employed)) 54 | components(dcmp) 55 | 56 | ## ----empltrend, fig.cap="Total number of persons employed in US retail: the trend-cycle component (orange) and the raw data (grey).", fig.asp=0.5---- 57 | components(dcmp) %>% 58 | as_tsibble() %>% 59 | autoplot(Employed, colour="gray") + 60 | geom_line(aes(y=trend), colour = "#D55E00") + 61 | labs( 62 | y = "Persons (thousands)", 63 | title = "Total employment in US retail" 64 | ) 65 | 66 | ## ----emplstl, fig.cap="The total number of persons employed in US retail (top) and its three additive components.", fig.asp=0.9---- 67 | components(dcmp) %>% autoplot() 68 | 69 | ## ----empl-retail-sa, fig.cap="Seasonally adjusted retail employment data (blue) and the original data (grey).", fig.asp=0.5---- 70 | components(dcmp) %>% 71 | as_tsibble() %>% 72 | autoplot(Employed, colour = "gray") + 73 | geom_line(aes(y=season_adjust), colour = "#0072B2") + 74 | labs(y = "Persons (thousands)", 75 | title = "Total employment in US retail") 76 | 77 | ## ----aus-exports, fig.cap="Australian exports of goods and services: 1960--2017.", echo=TRUE, fig.asp=0.5---- 78 | global_economy %>% 79 | filter(Country == "Australia") %>% 80 | autoplot(Exports) + 81 | labs(y = "% of GDP", title = "Total Australian exports") 82 | 83 | ## ----aus-exports-tbl, echo=FALSE-------------------------------------------------------------------------- 84 | options(knitr.kable.NA = "") 85 | 86 | aus_exports <- global_economy %>% 87 | filter(Country == "Australia") %>% 88 | select(Exports) %>% 89 | mutate( 90 | `5-MA` = slider::slide_dbl(Exports, mean, .before = 2L, .after = 2L, .complete = TRUE) 91 | ) %>% 92 | as_tibble() %>% 93 | select(Year, Exports, `5-MA`) 94 | out <- bind_rows( 95 | head(aus_exports, 8), 96 | tail(aus_exports, 8) 97 | ) %>% 98 | mutate( 99 | Year = as.integer(Year), 100 | Exports = format(Exports, digits = 4) %>% stringr::str_replace(" NA", ""), 101 | `5-MA` = format(`5-MA`, digits = 4) %>% stringr::str_replace(" NA", "") 102 | ) 103 | out <- rbind( 104 | out[1:8, ], 105 | rep("...", 3), 106 | out[9:16, ] 107 | ) %>% knitr::kable( 108 | booktabs = TRUE, format.args = list(digits = 6, trim = FALSE), 109 | caption = "Annual Australian exports of goods and services: 1960--2017." 110 | ) 111 | if (!html) { 112 | out <- kableExtra::kable_styling(out, latex_options = "hold_position") 113 | } 114 | out 115 | 116 | ## ----aus-exports-code------------------------------------------------------------------------------------- 117 | aus_exports <- global_economy %>% 118 | filter(Country == "Australia") %>% 119 | mutate( 120 | `5-MA` = slider::slide_dbl(Exports, mean, 121 | .before = 2, .after = 2, .complete = TRUE) 122 | ) 123 | 124 | ## ----aus-exports-plot, fig.cap="Australian exports (black) along with the 5-MA estimate of the trend-cycle (orange).", echo=TRUE, warning=FALSE,message=FALSE, fig.asp=0.5---- 125 | aus_exports %>% 126 | autoplot(Exports) + 127 | geom_line(aes(y = `5-MA`), colour = "#D55E00") + 128 | labs(y = "% of GDP", 129 | title = "Total Australian exports") + 130 | guides(colour = guide_legend(title = "series")) 131 | 132 | ## ----aus-exports-compare, fig.cap="Different moving averages applied to the Australian exports data.", echo=FALSE, warning=FALSE,message=FALSE---- 133 | global_economy %>% 134 | filter(Country == "Australia") %>% 135 | transmute( 136 | Exports = Exports, 137 | `3-MA` = slider::slide_dbl(Exports, mean, .before = 1, .after = 1, .complete = TRUE), 138 | `5-MA` = slider::slide_dbl(Exports, mean, .before = 2, .after = 2, .complete = TRUE), 139 | `7-MA` = slider::slide_dbl(Exports, mean, .before = 3, .after = 3, .complete = TRUE), 140 | `9-MA` = slider::slide_dbl(Exports, mean, .before = 4, .after = 4, .complete = TRUE), 141 | ) %>% 142 | pivot_longer(-c(Year, Exports)) %>% 143 | ggplot(aes(x = Year, y = Exports)) + 144 | geom_line() + 145 | geom_line(aes(y = value), colour = "#D55E00") + 146 | facet_wrap(name ~ .) + 147 | labs(y="% of GDP", 148 | title = "Total Australian exports") 149 | 150 | ## ----beerma, echo=TRUE------------------------------------------------------------------------------------ 151 | beer <- aus_production %>% 152 | filter(year(Quarter) >= 1992) %>% 153 | select(Quarter, Beer) 154 | beer_ma <- beer %>% 155 | mutate( 156 | `4-MA` = slider::slide_dbl(Beer, mean, 157 | .before = 1, .after = 2, .complete = TRUE), 158 | `2x4-MA` = slider::slide_dbl(`4-MA`, mean, 159 | .before = 1, .after = 0, .complete = TRUE) 160 | ) 161 | 162 | ## ----matable, results='asis', echo=FALSE------------------------------------------------------------------ 163 | format_num <- function(x) ifelse(is.na(x), "", format(x, nsmall = 2)) 164 | options(knitr.kable.NA = "") 165 | out <- bind_rows( 166 | head(beer_ma, 6), 167 | tail(beer_ma, 6) 168 | ) %>% 169 | mutate_if(is.numeric, format_num) %>% 170 | as_tibble() %>% 171 | mutate(Quarter = as.character(Quarter)) 172 | out <- rbind( 173 | out[1:6, ], 174 | rep("...", 4), 175 | out[7:12, ] 176 | ) %>% knitr::kable( 177 | booktabs = TRUE, 178 | caption = "A moving average of order 4 applied to the quarterly beer data, followed by a moving average of order 2." 179 | ) 180 | if (!html) { 181 | out <- kableExtra::kable_styling(out, latex_options = "hold_position") 182 | } 183 | out 184 | 185 | ## ----empl-MA, fig.cap="A 2x12-MA applied to the US retail employment series.", echo=TRUE, warning=FALSE, dependson='us_retail_employment', fig.asp=0.5---- 186 | us_retail_employment_ma <- us_retail_employment %>% 187 | mutate( 188 | `12-MA` = slider::slide_dbl(Employed, mean, 189 | .before = 5, .after = 6, .complete = TRUE), 190 | `2x12-MA` = slider::slide_dbl(`12-MA`, mean, 191 | .before = 1, .after = 0, .complete = TRUE) 192 | ) 193 | us_retail_employment_ma %>% 194 | autoplot(Employed, colour = "gray") + 195 | geom_line(aes(y = `2x12-MA`), colour = "#D55E00") + 196 | labs(y = "Persons (thousands)", 197 | title = "Total employment in US retail") 198 | 199 | ## ----classical-empl, warning=FALSE, echo=FALSE, fig.asp=0.9, fig.cap="A classical additive decomposition of US retail employment.", dependson='us_retail_employment'---- 200 | us_retail_employment %>% 201 | model(classical_decomposition(Employed, type = "additive")) %>% 202 | components() %>% 203 | autoplot() + 204 | labs(title = "Classical additive decomposition of total US retail employment") 205 | 206 | ## ----x11, echo=TRUE, warning=FALSE, fig.asp=0.85, fig.cap="A multiplicative decomposition of US retail employment using X-11.", dependson='us_retail_employment'---- 207 | x11_dcmp <- us_retail_employment %>% 208 | model(x11 = X_13ARIMA_SEATS(Employed ~ x11())) %>% 209 | components() 210 | autoplot(x11_dcmp) + 211 | labs(title = 212 | "Decomposition of total US retail employment using X-11.") 213 | 214 | ## ----x11-seasadj, echo=TRUE, dependson='x11',warning=FALSE, fig.asp=0.45, fig.cap="US retail employment: the original data (grey), the trend-cycle component (orange) and the seasonally adjusted data (barely visible in blue)."---- 215 | x11_dcmp %>% 216 | ggplot(aes(x = Month)) + 217 | geom_line(aes(y = Employed, colour = "Data")) + 218 | geom_line(aes(y = season_adjust, 219 | colour = "Seasonally Adjusted")) + 220 | geom_line(aes(y = trend, colour = "Trend")) + 221 | labs(y = "Persons (thousands)", 222 | title = "Total employment in US retail") + 223 | scale_colour_manual( 224 | values = c("gray", "#0072B2", "#D55E00"), 225 | breaks = c("Data", "Seasonally Adjusted", "Trend") 226 | ) 227 | 228 | ## ----print-media3, dependson='x11',fig.cap="Seasonal sub-series plot of the seasonal component from the X-11 method applied to total US retail employment.", echo=TRUE, fig.asp=0.5---- 229 | x11_dcmp %>% 230 | gg_subseries(seasonal) 231 | 232 | ## ----seasonal-dep, include=FALSE-------------------------------------------------------------------------- 233 | # Declare seasonal suggest as renv dependency 234 | library(seasonal) 235 | 236 | ## ----seats, eval=TRUE, echo=TRUE, warning=FALSE, fig.asp=0.9, fig.cap="A decomposition of US retail employment obtained using SEATS.", dependson='us_retail_employment'---- 237 | seats_dcmp <- us_retail_employment %>% 238 | model(seats = X_13ARIMA_SEATS(Employed ~ seats())) %>% 239 | components() 240 | autoplot(seats_dcmp) + 241 | labs(title = 242 | "Decomposition of total US retail employment using SEATS") 243 | 244 | ## ----empl-stl2, fig.cap="Total US retail employment (top) and its three additive components obtained from a robust STL decomposition with flexible trend-cycle and fixed seasonality.",fig.asp=0.85, echo=TRUE---- 245 | us_retail_employment %>% 246 | model( 247 | STL(Employed ~ trend(window = 7) + 248 | season(window = "periodic"), 249 | robust = TRUE)) %>% 250 | components() %>% 251 | autoplot() 252 | 253 | ## ----labour, echo=FALSE, fig.cap="Decomposition of the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.", fig.asp=0.9, message=FALSE---- 254 | dcmp <- as_tsibble(fma::labour) %>% 255 | model(stl = STL(value ~ season(window = 11), robust = TRUE)) 256 | components(dcmp) %>% autoplot() 257 | 258 | ## ----labour2, echo=FALSE, fig.cap="Seasonal component from the decomposition shown in the previous figure.", dependson="labour"---- 259 | components(dcmp) %>% 260 | gg_subseries(season_year) 261 | 262 | -------------------------------------------------------------------------------- /04-features.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----feature_mean2---------------------------------------------------------------------------------------- 4 | tourism %>% 5 | features(Trips, list(mean = mean)) %>% 6 | arrange(mean) 7 | 8 | ## ----feature_fivenum-------------------------------------------------------------------------------------- 9 | tourism %>% features(Trips, quantile) 10 | 11 | ## ----feature_acf------------------------------------------------------------------------------------------ 12 | tourism %>% features(Trips, feat_acf) 13 | 14 | ## ----stl-features, echo = TRUE---------------------------------------------------------------------------- 15 | tourism %>% 16 | features(Trips, feat_stl) 17 | 18 | ## ----featuresplot, fig.height=4.6, fig.cap="Seasonal strength vs trend strength for all tourism series.", fig.env="figure*"---- 19 | tourism %>% 20 | features(Trips, feat_stl) %>% 21 | ggplot(aes(x = trend_strength, y = seasonal_strength_year, 22 | col = Purpose)) + 23 | geom_point() + 24 | facet_wrap(vars(State)) 25 | 26 | ## ----extreme, fig.height=3, fig.asp=0.45, fig.cap="The most seasonal series in the Australian tourism data."---- 27 | tourism %>% 28 | features(Trips, feat_stl) %>% 29 | filter( 30 | seasonal_strength_year == max(seasonal_strength_year) 31 | ) %>% 32 | left_join(tourism, by = c("State", "Region", "Purpose")) %>% 33 | ggplot(aes(x = Quarter, y = Trips)) + 34 | geom_line() + 35 | facet_grid(vars(State, Region, Purpose)) 36 | 37 | ## ----all_features, warning=FALSE-------------------------------------------------------------------------- 38 | tourism_features <- tourism %>% 39 | features(Trips, feature_set(pkgs = "feasts")) 40 | tourism_features 41 | 42 | ## ----seasonalfeatures, fig.cap="Pairwise plots of all the seasonal features for the Australian tourism data", message=FALSE, fig.width=12, fig.height=12, fig.asp=1, fig.env = 'figure*'---- 43 | library(glue) 44 | tourism_features %>% 45 | select_at(vars(contains("season"), Purpose)) %>% 46 | mutate( 47 | seasonal_peak_year = seasonal_peak_year + 48 | 4*(seasonal_peak_year==0), 49 | seasonal_trough_year = seasonal_trough_year + 50 | 4*(seasonal_trough_year==0), 51 | seasonal_peak_year = glue("Q{seasonal_peak_year}"), 52 | seasonal_trough_year = glue("Q{seasonal_trough_year}"), 53 | ) %>% 54 | GGally::ggpairs(mapping = aes(colour = Purpose)) 55 | 56 | ## ----pca, fig.cap="(ref:pca)", out.width="70%", fig.width=4, fig.height=4, fig.asp=1---------------------- 57 | library(broom) 58 | pcs <- tourism_features %>% 59 | select(-State, -Region, -Purpose) %>% 60 | prcomp(scale = TRUE) %>% 61 | augment(tourism_features) 62 | pcs %>% 63 | ggplot(aes(x = .fittedPC1, y = .fittedPC2, col = Purpose)) + 64 | geom_point() + 65 | theme(aspect.ratio = 1) 66 | 67 | ## ----pcaoutliers, dependson='pca', fig.asp=1, fig.cap="Four anomalous time series from the Australian tourism data."---- 68 | outliers <- pcs %>% 69 | filter(.fittedPC1 > 10) %>% 70 | select(Region, State, Purpose, .fittedPC1, .fittedPC2) 71 | outliers 72 | outliers %>% 73 | left_join(tourism, by = c("State", "Region", "Purpose")) %>% 74 | mutate( 75 | Series = glue("{State}", "{Region}", "{Purpose}", 76 | .sep = "\n\n") 77 | ) %>% 78 | ggplot(aes(x = Quarter, y = Trips)) + 79 | geom_line() + 80 | facet_grid(Series ~ ., scales = "free") + 81 | labs(title = "Outlying time series in PC space") 82 | 83 | -------------------------------------------------------------------------------- /05-toolbox.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----workflow, echo = FALSE, fig.height=2.5, fig.asp=0.4-------------------------------------------------- 4 | line_curve <- function(x, y, xend, yend, ...) { 5 | geom_curve( 6 | aes(x = x, y = y, xend = xend, yend = yend), 7 | arrow = arrow(type = "closed", length = unit(0.03, "npc")), 8 | ... 9 | ) 10 | } 11 | 12 | ggplot() + 13 | geom_text( 14 | aes(x = x, y = y, label = label), 15 | data = tribble( 16 | ~x, ~y, ~label, 17 | 1, 0, "Tidy", 18 | 7/3, 0, "Visualise", 19 | 3, 0.5, "Specify", 20 | 11/3, 0, "Estimate", 21 | 3, -0.5, "Evaluate", 22 | 5, 0, "Forecast" 23 | ), 24 | size = 5 25 | ) + 26 | geom_segment( 27 | aes(x = x, y = y, xend = xend, yend = yend), 28 | data = tribble( 29 | ~x, ~y, ~xend, ~yend, 30 | 1.3, 0, 1.9, 0, 31 | 4.1, 0, 4.6, 0 32 | ), 33 | arrow = arrow(type = "closed", length = unit(0.03, "npc")) 34 | ) + 35 | line_curve(7/3, 0.1, 8/3, 0.5, angle = 250, curvature = -0.3) + 36 | line_curve(10/3, 0.5, 11/3, 0.1, angle = 250, curvature = -0.3) + 37 | line_curve(8/3, -0.5, 7/3, -0.1, angle = 250, curvature = -0.3) + 38 | line_curve(11/3, -0.1, 10/3, -0.5, angle = 250, curvature = -0.3) + 39 | theme_void() + 40 | xlim(0.8, 5.2) + 41 | ylim(-0.6, 0.6) 42 | 43 | ## ----gdppc------------------------------------------------------------------------------------------------ 44 | gdppc <- global_economy %>% 45 | mutate(GDP_per_capita = GDP / Population) 46 | 47 | ## ----swedengdp, fig.cap="GDP per capita data for Sweden from 1960 to 2017.", dependson='gdppc'------------ 48 | gdppc %>% 49 | filter(Country == "Sweden") %>% 50 | autoplot(GDP_per_capita) + 51 | labs(y = "$US", title = "GDP per capita for Sweden") 52 | 53 | ## ----gdp_models, warning=FALSE, message=FALSE, dependson='gdppc'------------------------------------------ 54 | fit <- gdppc %>% 55 | model(trend_model = TSLM(GDP_per_capita ~ trend())) 56 | 57 | ## ----gdp_models2, dependson='gdp_models'------------------------------------------------------------------ 58 | fit 59 | 60 | ## ----gdp_forecasts, dependson='gdp_models', warning=FALSE, message=FALSE---------------------------------- 61 | fit %>% forecast(h = "3 years") 62 | 63 | ## ----gdpforecastplot, fig.asp=0.55, dependson='gdp_models', warning=FALSE, message=FALSE, fig.cap="Forecasts of GDP per capita for Sweden using a simple trend model."---- 64 | fit %>% 65 | forecast(h = "3 years") %>% 66 | filter(Country == "Sweden") %>% 67 | autoplot(gdppc) + 68 | labs(y = "$US", title = "GDP per capita for Sweden") 69 | 70 | ## ----bricks----------------------------------------------------------------------------------------------- 71 | bricks <- aus_production %>% 72 | filter_index("1970 Q1" ~ "2004 Q4") %>% 73 | select(Bricks) 74 | 75 | ## ----mean-method-explained, fig.asp=0.55, echo=FALSE, message=FALSE, warning=FALSE, fig.cap="Mean (or average) forecasts applied to clay brick production in Australia."---- 76 | bricks <- bricks %>% 77 | mutate(average = mean(Bricks)) 78 | 79 | fc <- as_tibble(bricks) %>% 80 | filter(row_number() == n()) %>% 81 | mutate(Quarter = list(as_date(Quarter) + months(c(0, 12*5)))) %>% 82 | unnest(Quarter) 83 | 84 | bricks %>% 85 | ggplot(aes(x = Quarter, y = Bricks)) + 86 | geom_line() + 87 | geom_line(aes(y = average), colour = "blue", linetype = "dashed") + 88 | geom_line(aes(y = average), data = fc, colour = "blue") + 89 | labs(title = "Clay brick production in Australia") 90 | 91 | ## ----naive-method-explained, fig.asp=0.55, echo = FALSE, warning = FALSE, fig.cap="Naïve forecasts applied to clay brick production in Australia."---- 92 | bricks %>% 93 | model(NAIVE(Bricks)) %>% 94 | forecast(h = "5 years") %>% 95 | autoplot(bricks, level = NULL) + 96 | geom_point(aes(y = Bricks), data = slice(bricks, n()), colour = "blue") + 97 | labs(title = "Clay brick production in Australia") 98 | 99 | ## ----snaive-method-explained, fig.asp=0.55, echo = FALSE, warning = FALSE, fig.cap="Seasonal naïve forecasts applied to clay brick production in Australia."---- 100 | bricks %>% 101 | model(SNAIVE(Bricks ~ lag("year"))) %>% 102 | forecast(h = "5 years") %>% 103 | autoplot(bricks, level = NULL) + 104 | geom_point(aes(y = Bricks), data = slice(bricks, (n() - 3):n()), colour = "blue") + 105 | labs(title = "Clay brick production in Australia") 106 | 107 | ## ----drift-method-explained, fig.asp=0.55, echo = FALSE, warning = FALSE, fig.cap="Drift forecasts applied to clay brick production in Australia."---- 108 | bricks %>% 109 | model(RW(Bricks ~ drift())) %>% 110 | forecast(h = "5 years") %>% 111 | autoplot(bricks, level = NULL) + 112 | geom_line(aes(y = Bricks), 113 | data = slice(bricks, range(cumsum(!is.na(Bricks)))), 114 | linetype = "dashed", colour = "blue" 115 | ) + 116 | labs(title = "Clay brick production in Australia") 117 | 118 | ## ----beerf, fig.cap="Forecasts of Australian quarterly beer production.", warning=FALSE, message=FALSE, fig.asp=0.5---- 119 | # Set training data from 1992 to 2006 120 | train <- aus_production %>% 121 | filter_index("1992 Q1" ~ "2006 Q4") 122 | # Fit the models 123 | beer_fit <- train %>% 124 | model( 125 | Mean = MEAN(Beer), 126 | `Naïve` = NAIVE(Beer), 127 | `Seasonal naïve` = SNAIVE(Beer) 128 | ) 129 | # Generate forecasts for 14 quarters 130 | beer_fc <- beer_fit %>% forecast(h = 14) 131 | # Plot forecasts against actual values 132 | beer_fc %>% 133 | autoplot(train, level = NULL) + 134 | autolayer( 135 | filter_index(aus_production, "2007 Q1" ~ .), 136 | colour = "black" 137 | ) + 138 | labs( 139 | y = "Megalitres", 140 | title = "Forecasts for quarterly beer production" 141 | ) + 142 | guides(colour = guide_legend(title = "Forecast")) 143 | 144 | ## ----google2015, fig.cap="Forecasts based on Google's daily closing stock price in 2015.", message=FALSE, warning=FALSE, fig.asp=0.5---- 145 | # Re-index based on trading days 146 | google_stock <- gafa_stock %>% 147 | filter(Symbol == "GOOG", year(Date) >= 2015) %>% 148 | mutate(day = row_number()) %>% 149 | update_tsibble(index = day, regular = TRUE) 150 | # Filter the year of interest 151 | google_2015 <- google_stock %>% filter(year(Date) == 2015) 152 | # Fit the models 153 | google_fit <- google_2015 %>% 154 | model( 155 | Mean = MEAN(Close), 156 | `Naïve` = NAIVE(Close), 157 | Drift = NAIVE(Close ~ drift()) 158 | ) 159 | # Produce forecasts for the trading days in January 2016 160 | google_jan_2016 <- google_stock %>% 161 | filter(yearmonth(Date) == yearmonth("2016 Jan")) 162 | google_fc <- google_fit %>% 163 | forecast(new_data = google_jan_2016) 164 | # Plot the forecasts 165 | google_fc %>% 166 | autoplot(google_2015, level = NULL) + 167 | autolayer(google_jan_2016, Close, colour = "black") + 168 | labs(y = "$US", 169 | title = "Google daily closing stock prices", 170 | subtitle = "(Jan 2015 - Jan 2016)") + 171 | guides(colour = guide_legend(title = "Forecast")) 172 | 173 | ## ----augment, dependson='beerf'--------------------------------------------------------------------------- 174 | augment(beer_fit) 175 | 176 | ## ----GSPautoplot, fig.cap="Daily Google stock prices in 2015.", dependson='google_2015'------------------- 177 | autoplot(google_2015, Close) + 178 | labs(y = "$US", 179 | title = "Google daily closing stock prices in 2015") 180 | 181 | ## ----GSPresid, fig.cap="Residuals from forecasting the Google stock price using the naïve method.", warning=FALSE, fig.asp=0.4, dependson='google_2015'---- 182 | aug <- google_2015 %>% 183 | model(NAIVE(Close)) %>% 184 | augment() 185 | autoplot(aug, .innov) + 186 | labs(y = "$US", 187 | title = "Residuals from the naïve method") 188 | 189 | ## ----GSPhist, fig.cap="Histogram of the residuals from the naïve method applied to the Google stock price. The right tail seems a little too long for a normal distribution.", warning=FALSE, message=FALSE, dependson="GSPresid", fig.asp=0.5---- 190 | aug %>% 191 | ggplot(aes(x = .innov)) + 192 | geom_histogram() + 193 | labs(title = "Histogram of residuals") 194 | 195 | ## ----GSPacf, fig.cap="ACF of the residuals from the naïve method applied to the Google stock price. The lack of correlation suggesting the forecasts are good.", fig.asp=0.3, dependson="GSPresid"---- 196 | aug %>% 197 | ACF(.innov) %>% 198 | autoplot() + 199 | labs(title = "Residuals from the naïve method") 200 | 201 | ## ----tsresiduals, warning=FALSE, dependson='google_2015', fig.cap="Residual diagnostic graphs for the naïve method applied to the Google stock price."---- 202 | google_2015 %>% 203 | model(NAIVE(Close)) %>% 204 | gg_tsresiduals() 205 | 206 | ## ----Boxtest, dependson="GSPresid"------------------------------------------------------------------------ 207 | aug %>% features(.innov, box_pierce, lag = 10, dof = 0) 208 | 209 | aug %>% features(.innov, ljung_box, lag = 10, dof = 0) 210 | 211 | ## ----goog_drift, warning=FALSE, dependson='google_2015'--------------------------------------------------- 212 | fit <- google_2015 %>% model(RW(Close ~ drift())) 213 | tidy(fit) 214 | 215 | ## ----tsresid_drift, warning=FALSE, dependson='googl_drift'------------------------------------------------ 216 | augment(fit) %>% features(.innov, ljung_box, lag=10, dof=1) 217 | 218 | ## ----pcmultipliers, echo=FALSE---------------------------------------------------------------------------- 219 | tab <- tibble(Percentage = c(seq(50, 95, by = 5), 96:99)) %>% 220 | mutate(Multiplier = qnorm(0.5 + Percentage / 200)) %>% 221 | knitr::kable( 222 | digits = 2, 223 | booktabs = TRUE, 224 | caption = "Multipliers to be used for prediction intervals." 225 | ) 226 | if(!html) { 227 | tab <- gsub("\\\\centering","\\\\vspace*{-0.4cm}\\\\centering",tab) 228 | tab <- gsub("\\\\end\\{tabular\\}","\\\\end\\{tabular\\}\\\\vspace*{0.3cm}",tab) 229 | } 230 | tab 231 | 232 | ## ----GSPpi, echo=FALSE, dependson="google2015"------------------------------------------------------------ 233 | aug <- google_2015 %>% 234 | model(NAIVE(Close)) %>% 235 | augment() 236 | googsd <- sqrt(mean(aug$.resid^2, na.rm = TRUE)) 237 | googf <- round(last(google_2015$Close), 2) 238 | mult <- -qnorm(.5 - c(80, 95) / 200) 239 | upper <- c(googf) + mult * googsd 240 | lower <- c(googf) - mult * googsd 241 | 242 | ## ----sigmatable, echo=FALSE------------------------------------------------------------------------------- 243 | tab <- rbind( 244 | c("Mean", "$\\hat\\sigma_h = \\hat\\sigma\\sqrt{1 + 1/T}$"), 245 | c("Naïve", "$\\hat\\sigma_h = \\hat\\sigma\\sqrt{h}$"), 246 | c("Seasonal naïve", "$\\hat\\sigma_h = \\hat\\sigma\\sqrt{k+1}$"), 247 | c("Drift", "$\\hat\\sigma_h = \\hat\\sigma\\sqrt{h(1+h/T)}$") 248 | ) 249 | colnames(tab) <- c("Benchmark method", "$h$-step forecast standard deviation") 250 | caption <- "Multi-step forecast standard deviation for the four benchmark methods, where $\\sigma$ is the residual standard deviation, $m$ is the seasonal period, and $k$ is the integer part of $(h-1) /m$ (i.e., the number of complete years in the forecast period prior to time $T+h$)." 251 | tab <- knitr::kable(tab, format=if_else(html, 'html', 'latex'), booktabs = TRUE, escape = FALSE, caption = caption) 252 | if(!html) { 253 | tab <- gsub("\\\\centering","\\\\vspace*{-0.4cm}\\\\centering",tab) 254 | tab <- gsub("\\\\end\\{tabular\\}","\\\\end\\{tabular\\}\\\\vspace*{0.3cm}",tab) 255 | } 256 | tab 257 | 258 | ## ----googforecasts, dependson="GSPpi"--------------------------------------------------------------------- 259 | google_2015 %>% 260 | model(NAIVE(Close)) %>% 261 | forecast(h = 10) %>% 262 | hilo() 263 | 264 | ## ----googforecasts2, echo=TRUE, dependson="GSPpi", fig.asp=0.55, fig.cap="(ref:googforecasts2)"----------- 265 | google_2015 %>% 266 | model(NAIVE(Close)) %>% 267 | forecast(h = 10) %>% 268 | autoplot(google_2015) + 269 | labs(title="Google daily closing stock price", y="$US" ) 270 | 271 | ## ----generate, dependson="google2015"--------------------------------------------------------------------- 272 | fit <- google_2015 %>% 273 | model(NAIVE(Close)) 274 | sim <- fit %>% generate(h = 30, times = 5, bootstrap = TRUE) 275 | sim 276 | 277 | ## ----showsim, fig.cap="Five simulated future sample paths of the Google closing stock price based on a naïve method with bootstrapped residuals.", dependson="generate"---- 278 | google_2015 %>% 279 | ggplot(aes(x = day)) + 280 | geom_line(aes(y = Close)) + 281 | geom_line(aes(y = .sim, colour = as.factor(.rep)), 282 | data = sim) + 283 | labs(title="Google daily closing stock price", y="$US" ) + 284 | guides(colour = "none") 285 | 286 | ## ----fcbootstrap, dependson="generate"-------------------------------------------------------------------- 287 | fc <- fit %>% forecast(h = 30, bootstrap = TRUE) 288 | fc 289 | 290 | ## ----fcbootstrapplot, fig.cap="Forecasts of the Google closing stock price based on a naïve method with bootstrapped residuals.", dependson="fcbootstrap"---- 291 | autoplot(fc, google_2015) + 292 | labs(title="Google daily closing stock price", y="$US" ) 293 | 294 | ## ----googforecastsboot, echo=TRUE, dependson="google2015"------------------------------------------------- 295 | google_2015 %>% 296 | model(NAIVE(Close)) %>% 297 | forecast(h = 10, bootstrap = TRUE, times = 1000) %>% 298 | hilo() 299 | 300 | ## ----biasadjust, message=FALSE, warning=FALSE, echo=TRUE, fig.cap="Forecasts of egg prices using the drift method applied to the logged data. The bias-adjusted mean forecasts are shown with a solid line, while the median forecasts are dashed."---- 301 | prices %>% 302 | filter(!is.na(eggs)) %>% 303 | model(RW(log(eggs) ~ drift())) %>% 304 | forecast(h = 50) %>% 305 | autoplot(prices %>% filter(!is.na(eggs)), 306 | level = 80, point_forecast = lst(mean, median) 307 | ) + 308 | labs(title = "Annual egg prices", 309 | y = "$US (in cents adjusted for inflation) ") 310 | 311 | ## ----print-media4, fig.cap="Naïve forecasts of the seasonally adjusted data obtained from an STL decomposition of the total US retail employment.", echo=TRUE---- 312 | us_retail_employment <- us_employment %>% 313 | filter(year(Month) >= 1990, Title == "Retail Trade") 314 | dcmp <- us_retail_employment %>% 315 | model(STL(Employed ~ trend(window = 7), robust = TRUE)) %>% 316 | components() %>% 317 | select(-.model) 318 | dcmp %>% 319 | model(NAIVE(season_adjust)) %>% 320 | forecast() %>% 321 | autoplot(dcmp) + 322 | labs(y = "Number of people", 323 | title = "US retail employment") 324 | 325 | ## ----print-media5, fig.cap="Forecasts of the total US retail employment data based on a naïve forecast of the seasonally adjusted data and a seasonal naïve forecast of the seasonal component, after an STL decomposition of the data.", echo=TRUE---- 326 | fit_dcmp <- us_retail_employment %>% 327 | model(stlf = decomposition_model( 328 | STL(Employed ~ trend(window = 7), robust = TRUE), 329 | NAIVE(season_adjust) 330 | )) 331 | fit_dcmp %>% 332 | forecast() %>% 333 | autoplot(us_retail_employment)+ 334 | labs(y = "Number of people", 335 | title = "US retail employment") 336 | 337 | ## ----print-media5-resids, fig.cap="Checking the residuals.", echo=TRUE, warning=FALSE, dependson="print-media5"---- 338 | fit_dcmp %>% gg_tsresiduals() 339 | 340 | ## ----traintest, fig.asp=0.1, echo=FALSE------------------------------------------------------------------- 341 | train <- 1:18 342 | test <- 19:24 343 | par(mar = c(0, 0, 0, 0)) 344 | plot(0, 0, xlim = c(0, 26), ylim = c(0, 2), xaxt = "n", yaxt = "n", bty = "n", xlab = "", ylab = "", type = "n") 345 | arrows(0, 0.5, 25, 0.5, 0.05) 346 | points(train, train * 0 + 0.5, pch = 19, col = "#0072B2") 347 | points(test, test * 0 + 0.5, pch = 19, col = "#D55E00") 348 | text(26, 0.5, "time") 349 | text(10, 1, "Training data", col = "#0072B2") 350 | text(21, 1, "Test data", col = "#D55E00") 351 | 352 | ## ----beeraccuracy, fig.cap="Forecasts of Australian quarterly beer production using data up to the end of 2007.", message=FALSE,warning=FALSE, fig.asp=0.5---- 353 | recent_production <- aus_production %>% 354 | filter(year(Quarter) >= 1992) 355 | beer_train <- recent_production %>% 356 | filter(year(Quarter) <= 2007) 357 | 358 | beer_fit <- beer_train %>% 359 | model( 360 | Mean = MEAN(Beer), 361 | `Naïve` = NAIVE(Beer), 362 | `Seasonal naïve` = SNAIVE(Beer), 363 | Drift = RW(Beer ~ drift()) 364 | ) 365 | 366 | beer_fc <- beer_fit %>% 367 | forecast(h = 10) 368 | 369 | beer_fc %>% 370 | autoplot( 371 | aus_production %>% filter(year(Quarter) >= 1992), 372 | level = NULL 373 | ) + 374 | labs( 375 | y = "Megalitres", 376 | title = "Forecasts for quarterly beer production" 377 | ) + 378 | guides(colour = guide_legend(title = "Forecast")) 379 | 380 | ## ----beeraccuracytable, echo=FALSE, dependson="beeraccuracy"---------------------------------------------- 381 | accuracy(beer_fc, recent_production) %>% 382 | mutate(Method = paste(.model, "method")) %>% 383 | select(Method, RMSE, MAE, MAPE, MASE) %>% 384 | knitr::kable(digits = 2, booktabs = TRUE) 385 | 386 | ## ----GSPfc0, fig.cap="Forecasts of the Google stock price for Jan 2016.", warning=FALSE, fig.asp=0.55, dependson="google2015"---- 387 | google_fit <- google_2015 %>% 388 | model( 389 | Mean = MEAN(Close), 390 | `Naïve` = NAIVE(Close), 391 | Drift = RW(Close ~ drift()) 392 | ) 393 | 394 | google_fc <- google_fit %>% 395 | forecast(google_jan_2016) 396 | 397 | ## ----GSPfc, fig.cap="Forecasts of the Google stock price for Jan 2016.", warning=FALSE, fig.asp=0.55, dependson="GSPfc0"---- 398 | google_fc %>% 399 | autoplot(bind_rows(google_2015, google_jan_2016), 400 | level = NULL) + 401 | labs(y = "$US", 402 | title = "Google closing stock prices from Jan 2015") + 403 | guides(colour = guide_legend(title = "Forecast")) 404 | 405 | ## ----GSPaccuracytable, echo=FALSE, dependson="GSPfc", warning=FALSE--------------------------------------- 406 | accuracy(google_fc, google_stock) %>% 407 | mutate(Method = paste(.model, "method")) %>% 408 | select(Method, RMSE, MAE, MAPE, MASE) %>% 409 | knitr::kable(digits = 2, booktabs = TRUE) 410 | 411 | ## ----googlepi, fig.cap="(ref:googlepi)", warning=FALSE, fig.asp=0.55, dependson='GSPfc'------------------- 412 | google_fc %>% 413 | filter(.model == "Naïve") %>% 414 | autoplot(bind_rows(google_2015, google_jan_2016), level=80)+ 415 | labs(y = "$US", 416 | title = "Google closing stock prices") 417 | 418 | ## ----qp, dependson='GSPfc', echo=FALSE-------------------------------------------------------------------- 419 | lo80 <- google_fc %>% 420 | select(Date, .model, Close) %>% 421 | hilo(Close, level = 80) %>% 422 | unpack_hilo(`80%`) %>% 423 | filter(.model == "Naïve", Date == "2016-01-04") %>% 424 | pull(`80%_lower`) 425 | actual <- google_stock %>% 426 | filter(Date == ymd("2016-01-04")) %>% 427 | pull(Close) 428 | pi80 <- google_fc %>% 429 | select(Date, .model, Close) %>% 430 | hilo(Close, level = 80) %>% 431 | unpack_hilo(`80%`) %>% 432 | filter(.model == "Naïve", Date == "2016-01-04") %>% 433 | select(`80%_lower`, `80%_upper`) %>% 434 | rename(lo = `80%_lower`, hi = `80%_upper`) 435 | 436 | ## ----googlepcscore, dependson='GSPfc'--------------------------------------------------------------------- 437 | google_fc %>% 438 | filter(.model == "Naïve", Date == "2016-01-04") %>% 439 | accuracy(google_stock, list(qs=quantile_score), probs=0.10) 440 | 441 | ## ----googlewinklerscore, dependson='GSPfc'---------------------------------------------------------------- 442 | google_fc %>% 443 | filter(.model == "Naïve", Date == "2016-01-04") %>% 444 | accuracy(google_stock, 445 | list(winkler = winkler_score), level = 80) 446 | 447 | ## ----crps, dependson='GSPfc'------------------------------------------------------------------------------ 448 | google_fc %>% 449 | accuracy(google_stock, list(crps = CRPS)) 450 | 451 | ## ----skillscore, dependson='GSPfc'------------------------------------------------------------------------ 452 | google_fc %>% 453 | accuracy(google_stock, list(skill = skill_score(CRPS))) 454 | 455 | ## ----cairo-dep, include=FALSE----------------------------------------------------------------------------- 456 | # Declare Cairo suggest as renv dependency 457 | library(Cairo) 458 | 459 | ## ----cv1, echo=FALSE, fig.asp=0.47, dev=if_else(html,'CairoSVG','pdf')------------------------------------ 460 | tscv_plot <- function(.init, .step, h = 1) { 461 | expand.grid( 462 | time = seq(26), 463 | .id = seq(trunc(20 / .step)) 464 | ) %>% 465 | group_by(.id) %>% 466 | mutate( 467 | observation = case_when( 468 | time <= ((.id - 1) * .step + .init) ~ "train", 469 | time %in% c((.id - 1) * .step + .init + h) ~ "test", 470 | TRUE ~ "unused" 471 | ) 472 | ) %>% 473 | ungroup() %>% 474 | filter(.id <= 26 - .init) %>% 475 | ggplot(aes(x = time, y = .id)) + 476 | geom_segment( 477 | aes(x = 0, xend = 27, y = .id, yend = .id), 478 | arrow = arrow(length = unit(0.015, "npc")), 479 | col = "black", size = .25 480 | ) + 481 | geom_point(aes(col = observation), size = 2) + 482 | scale_y_reverse() + 483 | scale_colour_manual(values = c(train = "#0072B2", test = "#D55E00", unused = "gray")) + 484 | #theme_void() + 485 | #geom_label(aes(x = 28.5, y = 1, label = "time")) + 486 | guides(colour = "none") + 487 | labs(x="time", y="") + 488 | theme_void() + 489 | theme(axis.title = element_text()) 490 | } 491 | tscv_plot(.init = 6, .step = 1, h = 1) 492 | 493 | ## ----cv4, echo=FALSE, fig.asp=0.47, dependson='cv1', dev=if_else(html,'CairoSVG','pdf')------------------- 494 | tscv_plot(.init = 6, .step = 1, h = 4) 495 | 496 | ## ----googtscv, dependson="google2015", warning=FALSE------------------------------------------------------ 497 | # Time series cross-validation accuracy 498 | google_2015_tr <- google_2015 %>% 499 | stretch_tsibble(.init = 3, .step = 1) %>% 500 | relocate(Date, Symbol, .id) 501 | google_2015_tr 502 | 503 | ## ----googtscv2, results='hide', warning=FALSE, dependson="googtscv"-------------------------------------- 504 | # TSCV accuracy 505 | google_2015_tr %>% 506 | model(RW(Close ~ drift())) %>% 507 | forecast(h = 1) %>% 508 | accuracy(google_2015) 509 | # Training set accuracy 510 | google_2015 %>% 511 | model(RW(Close ~ drift())) %>% 512 | accuracy() 513 | 514 | ## ----googtscveval, warning=FALSE, echo = FALSE, dependson='googtscv'-------------------------------------- 515 | google_2015_tr %>% 516 | model(RW(Close ~ drift())) %>% 517 | forecast(h = 1) %>% 518 | accuracy(google_2015) %>% 519 | mutate(.type = "Cross-validation") %>% 520 | bind_rows(google_2015 %>% model(RW(Close ~ drift())) %>% accuracy()) %>% 521 | rename(`Evaluation method` = .type) %>% 522 | select(`Evaluation method`, RMSE, MAE, MAPE, MASE) %>% 523 | knitr::kable(digits = 2, booktabs = TRUE) 524 | 525 | ## ----CV-accuracy-plot, echo=TRUE, warning=FALSE, fig.cap="RMSE as a function of forecast horizon for the drift method applied to Google closing stock prices."---- 526 | google_2015_tr <- google_2015 %>% 527 | stretch_tsibble(.init = 3, .step = 1) 528 | fc <- google_2015_tr %>% 529 | model(RW(Close ~ drift())) %>% 530 | forecast(h = 8) %>% 531 | group_by(.id) %>% 532 | mutate(h = row_number()) %>% 533 | ungroup() 534 | fc %>% 535 | accuracy(google_2015, by = c("h", ".model")) %>% 536 | ggplot(aes(x = h, y = RMSE)) + 537 | geom_point() 538 | 539 | -------------------------------------------------------------------------------- /06-judgmental.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----tfc, fig.cap="(ref:tfc)", message=FALSE, warning=FALSE, echo=FALSE, fig.asp=0.5---------------------- 4 | tfcvn <- readr::read_csv("https://OTexts.com/fpp3/extrafiles/dat_3_TFC.csv") %>% 5 | as_tsibble(index = Year) %>% 6 | pivot_longer(c(TFC,Regression,ETS), names_to="Method", values_to="Forecast") 7 | tfcvn %>% 8 | ggplot(aes(x = Year, y = Data/1000)) + 9 | geom_line() + 10 | geom_line(aes(y = Forecast/1000, colour = Method)) + 11 | labs(title= "Australian domestic tourism: Total ", 12 | y = "Visitor nights (millions)") + 13 | scale_colour_manual( 14 | name = "", 15 | values = c("#E69F00", "#0072B2", "#CC79A7"), 16 | breaks = c("TFC", "Regression", "ETS") 17 | ) 18 | 19 | -------------------------------------------------------------------------------- /07-regression.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----SLRpop1, fig.cap="An example of data from a simple linear regression model.", echo=FALSE, warning=FALSE, message=FALSE, fig.pos="t"---- 4 | set.seed(2) 5 | x <- runif(50, 0, 4) 6 | df <- data.frame( 7 | x = x, 8 | y = 3 + 10 * x + rnorm(50, 0, 10) 9 | ) 10 | ggplot(df, aes(x, y)) + 11 | geom_point() + 12 | geom_abline( 13 | slope = 10, intercept = 3, 14 | col = "#D55E00", size = 0.3 15 | ) + 16 | geom_label( 17 | x = .3, y = 40, parse = TRUE, col = "#D55E00", 18 | label = " beta[0] + beta[1] * x" 19 | ) + 20 | geom_segment( 21 | x = .3, y = 36, xend = 0, yend = 4, 22 | arrow = arrow(length = unit(0.02, "npc")), 23 | size = 0.2, col = "#D55E00" 24 | ) + 25 | geom_label( 26 | x = 1.5, y = 55, parse = TRUE, col = "#0072B2", 27 | label = "y[t] == beta[0] + beta[1] * x[t] + epsilon[t]" 28 | ) + 29 | geom_segment( 30 | x = 1.5, y = 52, xend = df$x[19] - 0.03, yend = df$y[19] + 1.5, 31 | arrow = arrow(length = unit(0.02, "npc")), 32 | size = 0.2, col = "#0072B2" 33 | ) + 34 | geom_segment( 35 | x = df$x[19], y = df$y[19], 36 | xend = df$x[19], yend = 3 + 10 * df$x[19], 37 | col = "#009E73", size = 0.2, 38 | arrow = arrow(length = unit(0.02, "npc"), ends = "both") 39 | ) + 40 | geom_label( 41 | x = df$x[19] - 0.07, 42 | y = (df$y[19] + 3 + 10 * df$x[19]) / 2, 43 | col = "#009E73", label = "epsilon[t]", 44 | parse = TRUE 45 | ) 46 | 47 | ## ----ConsInc, echo=TRUE, fig.cap="Percentage changes in personal consumption expenditure and personal income for the US.", fig.asp=0.45---- 48 | us_change %>% 49 | pivot_longer(c(Consumption, Income), names_to="Series") %>% 50 | autoplot(value) + 51 | labs(y = "% change") 52 | 53 | ## ----fitcons, include=FALSE------------------------------------------------------------------------------- 54 | fit_cons <- us_change %>% 55 | model(TSLM(Consumption ~ Income)) 56 | 57 | ## ----ConsInc2, echo=TRUE, fig.cap="Scatterplot of quarterly changes in consumption expenditure versus quarterly changes in personal income and the fitted regression line.", message=FALSE---- 58 | us_change %>% 59 | ggplot(aes(x = Income, y = Consumption)) + 60 | labs(y = "Consumption (quarterly % change)", 61 | x = "Income (quarterly % change)") + 62 | geom_point() + 63 | geom_smooth(method = "lm", se = FALSE) 64 | 65 | ## ----tslmcons, echo=TRUE---------------------------------------------------------------------------------- 66 | us_change %>% 67 | model(TSLM(Consumption ~ Income)) %>% 68 | report() 69 | 70 | ## ----MultiPredictors, echo=TRUE, fig.cap="Quarterly percentage changes in industrial production and personal savings and quarterly changes in the unemployment rate for the US over the period 1970Q1-2019Q2."---- 71 | us_change %>% 72 | select(-Consumption, -Income) %>% 73 | pivot_longer(-Quarter) %>% 74 | ggplot(aes(Quarter, value, color = name)) + 75 | geom_line() + 76 | facet_grid(name ~ ., scales = "free_y") + 77 | guides(colour = "none") + 78 | labs(y="% change") 79 | 80 | ## ----ScatterMatrix, message=FALSE, fig.cap="A scatterplot matrix of US consumption expenditure and the four predictors.", fig.asp=1, message=FALSE, fig.env="figure*"---- 81 | us_change %>% 82 | GGally::ggpairs(columns = 2:6) 83 | 84 | ## ----usestim, echo=TRUE, fig.cap="Multiple regression output from least squares estimation."-------------- 85 | fit_consMR <- us_change %>% 86 | model(tslm = TSLM(Consumption ~ Income + Production + 87 | Unemployment + Savings)) 88 | report(fit_consMR) 89 | 90 | ## ----usfitted1, echo=TRUE, fig.cap="Time plot of actual US consumption expenditure and predicted US consumption expenditure.", dependson="usestim"---- 91 | augment(fit_consMR) %>% 92 | ggplot(aes(x = Quarter)) + 93 | geom_line(aes(y = Consumption, colour = "Data")) + 94 | geom_line(aes(y = .fitted, colour = "Fitted")) + 95 | labs(y = NULL, 96 | title = "Percent change in US consumption expenditure" 97 | ) + 98 | scale_colour_manual(values=c(Data="black",Fitted="#D55E00")) + 99 | guides(colour = guide_legend(title = NULL)) 100 | 101 | ## ----usfitted2, echo=TRUE, fig.cap="Actual US consumption expenditure plotted against predicted US consumption expenditure.", message=FALSE, warning=FALSE, dependson="usestim"---- 102 | augment(fit_consMR) %>% 103 | ggplot(aes(x = Consumption, y = .fitted)) + 104 | geom_point() + 105 | labs( 106 | y = "Fitted (predicted values)", 107 | x = "Data (actual values)", 108 | title = "Percent change in US consumption expenditure" 109 | ) + 110 | geom_abline(intercept = 0, slope = 1) 111 | 112 | ## ----corfitconsMR, echo=FALSE, dependson="usestim"-------------------------------------------------------- 113 | r <- with(augment(fit_consMR), cor(.fitted, Consumption)) 114 | 115 | ## ----uschangeresidcheck, fig.cap="Analysing the residuals from a regression model for US quarterly consumption.", message=FALSE, warning=FALSE, dependson="usestim", fig.asp=0.6---- 116 | fit_consMR %>% gg_tsresiduals() 117 | 118 | ## ----uschangeresidcheck2, message=FALSE, warning=FALSE, class.output='r', dependson='usestim'------------- 119 | augment(fit_consMR) %>% 120 | features(.innov, ljung_box, lag = 10, dof = 5) 121 | 122 | ## ----resids, echo=TRUE, fig.cap="Scatterplots of residuals versus each predictor.", dependson='usestim'---- 123 | us_change %>% 124 | left_join(residuals(fit_consMR), by = "Quarter") %>% 125 | pivot_longer(Income:Unemployment, 126 | names_to = "regressor", values_to = "x") %>% 127 | ggplot(aes(x = x, y = .resid)) + 128 | geom_point() + 129 | facet_wrap(. ~ regressor, scales = "free_x") + 130 | labs(y = "Residuals", x = "") 131 | 132 | ## ----resids2, echo=TRUE, fig.cap="Scatterplots of residuals versus fitted values.", fig.asp=0.55, dependson='usestim'---- 133 | augment(fit_consMR) %>% 134 | ggplot(aes(x = .fitted, y = .resid)) + 135 | geom_point() + labs(x = "Fitted", y = "Residuals") 136 | 137 | ## ----outlier, fig.cap="The effect of outliers and influential observations on regression", fig.asp=0.45, echo=FALSE, message=FALSE,warning=FALSE---- 138 | fit1 <- lm(Consumption ~ Income, data = us_change) 139 | 140 | us_change_outliers <- us_change 141 | us_change_outliers$Consumption[1] <- -4 142 | us_change_outliers$Income[1] <- 1 143 | fit2 <- lm(Consumption ~ Income, data = us_change_outliers) 144 | p1 <- ggplot(us_change_outliers, aes(x = Income, y = Consumption)) + 145 | labs(y = "% change in consumption", x = "% change in income") + 146 | geom_point() + 147 | geom_abline(intercept = fit1$coefficients[1], slope = fit1$coefficients[2]) + 148 | geom_abline(intercept = fit2$coefficients[1], slope = fit2$coefficients[2], colour = "#D55E00") + 149 | geom_point(x = 1, y = -4, shape = 1, size = 7, col = "#0072B2") 150 | 151 | us_change_outliers$Income[1] <- 6 152 | fit2 <- lm(Consumption ~ Income, data = us_change_outliers) 153 | p2 <- ggplot(us_change_outliers, aes(x = Income, y = Consumption)) + 154 | labs(y = "% change in consumption", x = "% change in income") + 155 | geom_point() + 156 | geom_abline(intercept = fit1$coefficients[1], slope = fit1$coefficients[2]) + 157 | geom_abline(intercept = fit2$coefficients[1], slope = fit2$coefficients[2], colour = "#D55E00") + 158 | geom_point(x = 6, y = -4, shape = 1, size = 7, col = "#0072B2") 159 | 160 | p1 | p2 161 | 162 | ## ----spurious, echo=FALSE, fig.asp=0.5, warning=FALSE, fig.cap="Trending time series data can appear to be related, as shown in this example where air passengers in Australia are regressed against rice production in Guinea.", message=FALSE---- 163 | p1 <- aus_airpassengers %>% 164 | filter(Year <= 2011) %>% 165 | autoplot(Passengers) + 166 | labs(title = "Air transport: Australia", 167 | y="Passengers (millions)") 168 | p2 <- guinea_rice %>% 169 | autoplot(Production) + 170 | labs(title = "Rice production: Guinea", 171 | y = "Metric tons (millions)") 172 | p3 <- aus_airpassengers %>% 173 | left_join(guinea_rice, by = "Year") %>% 174 | ggplot(aes(x = Production, y = Passengers)) + 175 | geom_point() + 176 | labs(y = "Air passengers in Australia (millions)", 177 | x = "Rice production in Guinea (million tons)") 178 | 179 | (p1 / p2) | p3 180 | 181 | ## ----tslmspurious, echo=TRUE, dependson="spurious"-------------------------------------------------------- 182 | fit <- aus_airpassengers %>% 183 | filter(Year <= 2011) %>% 184 | left_join(guinea_rice, by = "Year") %>% 185 | model(TSLM(Passengers ~ Production)) 186 | report(fit) 187 | 188 | ## ----tslmspurious2, fig.cap="Residuals from a spurious regression.", dependson='tslmspurious', class.output='r', dependson='tslmspurious'---- 189 | fit %>% gg_tsresiduals() 190 | 191 | ## ----dowdummy, echo=FALSE--------------------------------------------------------------------------------- 192 | df <- matrix("0", nrow = 13, ncol = 6) 193 | df[1:6, ] <- paste(diag(6)) 194 | df[8:12, ] <- paste(diag(6)[1:5, ]) 195 | rownames(df) <- rep(c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"), 2)[1:13] 196 | colnames(df) <- paste("$d_{", 1:6, ",t}$", sep = "") 197 | 198 | dfrows <- 9 199 | df <- df[seq(dfrows), ] 200 | if (html) { 201 | rownames(df)[dfrows] <- df[dfrows, ] <- "⋮" 202 | knitr::kable(df) 203 | } else { 204 | rownames(df)[dfrows] <- df[dfrows, ] <- "$\\vdots$" 205 | tab <- knitr::kable(df, booktabs = TRUE, format = "latex", escape = FALSE) 206 | tab <- gsub("\\\\begin", "\\\\begingroup\\\\fontsize{10}{12}\\\\selectfont\\\\begin", tab) 207 | gsub("\\\\end\\{tabular\\}", "\\\\end{tabular}\\\\endgroup", tab) 208 | } 209 | 210 | ## ----beeragain, echo=TRUE, fig.cap="Australian quarterly beer production.", fig.asp=0.5------------------- 211 | recent_production <- aus_production %>% 212 | filter(year(Quarter) >= 1992) 213 | recent_production %>% 214 | autoplot(Beer) + 215 | labs(y = "Megalitres", 216 | title = "Australian quarterly beer production") 217 | 218 | ## ----fig.beerfit, echo=TRUE, dependson="beeragain"-------------------------------------------------------- 219 | fit_beer <- recent_production %>% 220 | model(TSLM(Beer ~ trend() + season())) 221 | report(fit_beer) 222 | 223 | ## ----beerlm2, echo=TRUE, fig.cap="Time plot of beer production and predicted beer production.", dependson="fig.beerfit"---- 224 | augment(fit_beer) %>% 225 | ggplot(aes(x = Quarter)) + 226 | geom_line(aes(y = Beer, colour = "Data")) + 227 | geom_line(aes(y = .fitted, colour = "Fitted")) + 228 | scale_colour_manual( 229 | values = c(Data = "black", Fitted = "#D55E00") 230 | ) + 231 | labs(y = "Megalitres", 232 | title = "Australian quarterly beer production") + 233 | guides(colour = guide_legend(title = "Series")) 234 | 235 | ## ----beerlm3, echo=TRUE, fig.cap="Actual beer production plotted against predicted beer production.", dependson="fig.beerfit"---- 236 | augment(fit_beer) %>% 237 | ggplot(aes(x = Beer, y = .fitted, 238 | colour = factor(quarter(Quarter)))) + 239 | geom_point() + 240 | labs(y = "Fitted", x = "Actual values", 241 | title = "Australian quarterly beer production") + 242 | geom_abline(intercept = 0, slope = 1) + 243 | guides(colour = guide_legend(title = "Quarter")) 244 | 245 | ## ----fourierbeer, echo=TRUE, dependson="beeragain"-------------------------------------------------------- 246 | fourier_beer <- recent_production %>% 247 | model(TSLM(Beer ~ trend() + fourier(K = 2))) 248 | report(fourier_beer) 249 | 250 | ## ----CVfitconsMR, echo=TRUE, dependson='usestim'---------------------------------------------------------- 251 | glance(fit_consMR) %>% 252 | select(adj_r_squared, CV, AIC, AICc, BIC) 253 | 254 | ## ----tblusMR, echo=FALSE, message = FALSE----------------------------------------------------------------- 255 | library(rlang) 256 | library(purrr) 257 | 258 | opts <- expand.grid(Income = 0:1, Production = 0:1, Savings = 0:1, Unemployment = 0:1) %>% 259 | mutate( 260 | formulae = 261 | pmap( 262 | list(Income = Income, Production = Production, Savings = Savings, Unemployment = Unemployment), 263 | function(...) { 264 | spec <- list(...) 265 | new_formula( 266 | sym("Consumption"), 267 | reduce(syms(names(spec)[spec == 1]), call2, .fn = "+", .init = 1) 268 | ) 269 | } 270 | ), 271 | models = set_names(map(formulae, TSLM), map_chr(formulae, deparse)) 272 | ) 273 | 274 | tab <- us_change %>% 275 | model( 276 | !!!opts$models 277 | ) %>% 278 | glance() %>% 279 | bind_cols(opts) %>% 280 | transmute(Income, Production, Savings, Unemployment, AdjR2 = adj_r_squared, CV, AIC, AICc, BIC) %>% 281 | arrange(AICc) %>% 282 | mutate(across(where(is.integer), function(x){if_else(x==1, ifelse(html,"⬤","$\\bullet$"), "")})) %>% 283 | knitr::kable( 284 | format = if_else(html, "html", "latex"), 285 | digits = 3, align = c(rep("c", 4)), booktabs = TRUE, escape=FALSE, 286 | caption = "All 16 possible models for forecasting US consumption with 4 predictors." 287 | ) 288 | if (html) { 289 | tab 290 | } else { 291 | tab <- kableExtra::kable_styling(tab, latex_options = c("scale_down", "hold_position")) 292 | tab <- gsub("\\\\centering", "\\\\centering\\\\vspace*{-0.3cm}", tab) 293 | gsub("\\\\end\\{tabular\\}\\}", "\\\\end{tabular}}\\\\vspace{0.3cm}", tab) 294 | } 295 | 296 | ## ----beerlm1, echo=TRUE, fig.cap="(ref:figcapbeerlm1)"---------------------------------------------------- 297 | recent_production <- aus_production %>% 298 | filter(year(Quarter) >= 1992) 299 | fit_beer <- recent_production %>% 300 | model(TSLM(Beer ~ trend() + season())) 301 | fc_beer <- forecast(fit_beer) 302 | fc_beer %>% 303 | autoplot(recent_production) + 304 | labs( 305 | title = "Forecasts of beer production using regression", 306 | y = "megalitres" 307 | ) 308 | 309 | ## ----ConsInc4a, echo=TRUE--------------------------------------------------------------------------------- 310 | fit_consBest <- us_change %>% 311 | model( 312 | lm = TSLM(Consumption ~ Income + Savings + Unemployment) 313 | ) 314 | future_scenarios <- scenarios( 315 | Increase = new_data(us_change, 4) %>% 316 | mutate(Income=1, Savings=0.5, Unemployment=0), 317 | Decrease = new_data(us_change, 4) %>% 318 | mutate(Income=-1, Savings=-0.5, Unemployment=0), 319 | names_to = "Scenario") 320 | 321 | fc <- forecast(fit_consBest, new_data = future_scenarios) 322 | 323 | ## ----ConsInc4, echo=TRUE, fig.cap="Forecasting percentage changes in personal consumption expenditure for the US under scenario based forecasting."---- 324 | us_change %>% 325 | autoplot(Consumption) + 326 | autolayer(fc) + 327 | labs(title = "US consumption", y = "% change") 328 | 329 | ## ----fitconsupdown, include=FALSE------------------------------------------------------------------------- 330 | fit_cons <- us_change %>% 331 | model(TSLM(Consumption ~ Income)) 332 | fcast_ave <- forecast(fit_cons, new_data(us_change, 4) %>% mutate(Income = mean(us_change$Income))) 333 | fcast_up <- forecast(fit_cons, new_data(us_change, 4) %>% mutate(Income = 5)) 334 | 335 | ## ----savePI, echo=FALSE----------------------------------------------------------------------------------- 336 | PI80 <- as.data.frame(hilo(fcast_ave$Consumption[1], 80))[1, ][[1]] 337 | PI95 <- as.data.frame(hilo(fcast_ave$Consumption[1], 95))[1, ][[1]] 338 | 339 | ## ----conSimplePI, fig.cap="(ref:figcapconSimplePI)", echo=TRUE-------------------------------------------- 340 | fit_cons <- us_change %>% 341 | model(TSLM(Consumption ~ Income)) 342 | new_cons <- scenarios( 343 | "Average increase" = new_data(us_change, 4) %>% 344 | mutate(Income = mean(us_change$Income)), 345 | "Extreme increase" = new_data(us_change, 4) %>% 346 | mutate(Income = 12), 347 | names_to = "Scenario" 348 | ) 349 | fcast <- forecast(fit_cons, new_cons) 350 | 351 | us_change %>% 352 | autoplot(Consumption) + 353 | autolayer(fcast) + 354 | labs(title = "US consumption", y = "% change") 355 | 356 | ## ----boston, echo=TRUE------------------------------------------------------------------------------------ 357 | boston_men <- boston_marathon %>% 358 | filter(Year >= 1924) %>% 359 | filter(Event == "Men's open division") %>% 360 | mutate(Minutes = as.numeric(Time)/60) 361 | 362 | ## ----marathonLinear, echo=FALSE, fig.cap="Fitting a linear trend to the Boston marathon winning times is inadequate", message=FALSE, warning=FALSE, dependson='boston'---- 363 | fit_lin <- boston_men %>% 364 | model(TSLM(Minutes ~ trend())) 365 | p1 <- boston_men %>% 366 | ggplot(aes(x = Year, y = Minutes)) + 367 | geom_line() + 368 | geom_smooth(method = "lm", se = FALSE) + 369 | labs(title = "Boston marathon winning times", 370 | y = "Minutes") 371 | p2 <- augment(fit_lin) %>% 372 | autoplot(.resid) + 373 | labs(title = "Residuals from a linear trend", 374 | y="Minutes") 375 | p1 / p2 376 | 377 | ## ----marathonNonlinear, echo=TRUE, message=TRUE, warning=FALSE, fig.cap="Projecting forecasts from linear, exponential and piecewise linear trends for the Boston marathon winning times.", dependson='boston'---- 378 | fit_trends <- boston_men %>% 379 | model( 380 | linear = TSLM(Minutes ~ trend()), 381 | exponential = TSLM(log(Minutes) ~ trend()), 382 | piecewise = TSLM(Minutes ~ trend(knots = c(1950, 1980))) 383 | ) 384 | fc_trends <- fit_trends %>% forecast(h = 10) 385 | 386 | boston_men %>% 387 | autoplot(Minutes) + 388 | geom_line(data = fitted(fit_trends), 389 | aes(y = .fitted, colour = .model)) + 390 | autolayer(fc_trends, alpha = 0.5, level = 95) + 391 | labs(y = "Minutes", 392 | title = "Boston marathon winning times") 393 | 394 | ## ----ex7.1------------------------------------------------------------------------------------------------ 395 | jan14_vic_elec <- vic_elec %>% 396 | filter(yearmonth(Time) == yearmonth("2014 Jan")) %>% 397 | index_by(Date = as_date(Time)) %>% 398 | summarise( 399 | Demand = sum(Demand), 400 | Temperature = max(Temperature) 401 | ) 402 | 403 | ## ----ex7.1c, results = "hide", fig.show = "hide", message=FALSE, warning=FALSE, dependson='ex7.1'--------- 404 | jan14_vic_elec %>% 405 | model(TSLM(Demand ~ Temperature)) %>% 406 | forecast( 407 | new_data(jan14_vic_elec, 1) %>% 408 | mutate(Temperature = 15) 409 | ) %>% 410 | autoplot(jan14_vic_elec) 411 | 412 | -------------------------------------------------------------------------------- /08-exponential-smoothing.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----7-oil, fig.cap="Exports of goods and services from Algeria from 1960 to 2017.", echo=TRUE------------ 4 | algeria_economy <- global_economy %>% 5 | filter(Country == "Algeria") 6 | algeria_economy %>% 7 | autoplot(Exports) + 8 | labs(y = "% of GDP", title = "Exports: Algeria") 9 | 10 | ## ----alpha, echo=FALSE------------------------------------------------------------------------------------ 11 | tab <- as.data.frame(matrix(NA, nrow = 6, ncol = 4)) 12 | rownames(tab) <- c("$y_{T}$", paste("$y_{T-", 1:5, "}$", sep = "")) 13 | alpha <- c(0.2, 0.4, 0.6, 0.8) 14 | colnames(tab) <- paste("$\\alpha=", alpha, "$", sep = "") 15 | for (i in 1:6) { 16 | tab[i, ] <- alpha * (1 - alpha)^(i - 1) 17 | } 18 | knitr::kable(tab, digits = 4, booktabs = TRUE, escape = FALSE) 19 | 20 | ## ----sesfit, echo=TRUE------------------------------------------------------------------------------------ 21 | # Estimate parameters 22 | fit <- algeria_economy %>% 23 | model(ETS(Exports ~ error("A") + trend("N") + season("N"))) 24 | fc <- fit %>% 25 | forecast(h = 5) 26 | 27 | ## ----sesparam, echo=FALSE, dependson='sesfit'------------------------------------------------------------- 28 | alpha <- tidy(fit)$estimate[1] 29 | l0 <- tidy(fit)$estimate[2] 30 | 31 | ## ----export-ses, echo=FALSE, dependson='sesfit'----------------------------------------------------------- 32 | format_num <- function(x, decplaces = 2) { 33 | fmt <- paste0("%.", decplaces, "f") 34 | ifelse(is.na(x), "", sprintf(fmt, x)) 35 | } 36 | 37 | # Data set for table 38 | fc_tbl <- fc %>% 39 | as_tibble() %>% 40 | transmute(Year, .fitted = .mean) %>% 41 | mutate(Time = as.character(row_number()), .fitted = format_num(.fitted)) %>% 42 | bind_rows(tibble(Time = "$h$", .fitted = "$\\hat{y}_{T+h\\vert T}$"), .) 43 | 44 | options(knitr.kable.NA = "") 45 | as_tsibble(components(fit)) %>% 46 | left_join(augment(fit), by = c("Country", ".model", "Exports", "Year")) %>% 47 | mutate( 48 | Time = as.character(row_number() - 1), 49 | .fitted = format_num(.fitted), 50 | Exports = format_num(Exports), 51 | level = format_num(level) 52 | ) %>% 53 | select(Year, Time, Exports, level, .fitted) %>% 54 | as_tibble() -> tmp 55 | 56 | tab <- tmp %>% 57 | slice(1:9) %>% 58 | bind_rows(summarise_at(tmp, vars(-Year), function(...) if (html) "⋮" else "\\vdots")) %>% 59 | bind_rows(slice(tmp, (n() - 3):n())) %>% 60 | bind_rows(fc_tbl) %>% 61 | bind_rows(tibble(Time = "$t$", Exports = "$y_t$", level = "$\\ell_t$", .fitted = "$\\hat{y}_{t\\vert t-1}$"), .) %>% 62 | transmute(Year, Time, Observation = Exports, Level = level, Forecast = .fitted) %>% 63 | knitr::kable(caption = "Forecasting goods and services exports from Algeria using simple exponential smoothing.", booktabs = TRUE, escape = FALSE) 64 | 65 | if (html) { 66 | tab 67 | } else { 68 | tab <- gsub("\\\\centering", "\\\\centering\\\\vspace*{-0.3cm}", tab) 69 | gsub("\\\\end\\{tabular\\}", "\\\\end{tabular}\\\\vspace*{0.3cm}", tab) 70 | } 71 | 72 | ## ----ses, fig.cap="Simple exponential smoothing applied to exports from Algeria (1960--2017). The orange curve shows the one-step-ahead fitted values.", echo=TRUE, dependson='sesfit'---- 73 | fc %>% 74 | autoplot(algeria_economy) + 75 | geom_line(aes(y = .fitted), col="#D55E00", 76 | data = augment(fit)) + 77 | labs(y="% of GDP", title="Exports: Algeria") + 78 | guides(colour = "none") 79 | 80 | ## ----auspop, echo=TRUE, fig.cap="Australia's population, 1960-2017."-------------------------------------- 81 | aus_economy <- global_economy %>% 82 | filter(Code == "AUS") %>% 83 | mutate(Pop = Population / 1e6) 84 | autoplot(aus_economy, Pop) + 85 | labs(y = "Millions", title = "Australian population") 86 | 87 | ## ----popholt0, echo=TRUE---------------------------------------------------------------------------------- 88 | fit <- aus_economy %>% 89 | model( 90 | AAN = ETS(Pop ~ error("A") + trend("A") + season("N")) 91 | ) 92 | fc <- fit %>% forecast(h = 10) 93 | 94 | ## ----popcoeff, echo=FALSE--------------------------------------------------------------------------------- 95 | est <- tidy(fit) 96 | alpha <- est %>% filter(term=="alpha") %>% pull(estimate) 97 | beta <- est %>% filter(term=="beta") %>% pull(estimate) 98 | betastar <- beta/alpha 99 | 100 | ## ----popholt, echo=FALSE---------------------------------------------------------------------------------- 101 | # Data set for table 102 | fc_tbl <- fc %>% 103 | as_tibble() %>% 104 | transmute(Year, .fitted = .mean) %>% 105 | mutate(Time = as.character(row_number()), .fitted = format_num(.fitted)) %>% 106 | bind_rows(tibble(Time = "$h$", .fitted = "$\\hat{y}_{T+h\\mid T}$"), .) 107 | 108 | options(knitr.kable.NA = "") 109 | tmp <- as_tsibble(components(fit)) %>% 110 | left_join(augment(fit), by = c("Country", ".model", "Pop", "Year")) %>% 111 | mutate( 112 | Time = as.character(row_number() - 1), 113 | .fitted = format_num(.fitted), 114 | Pop = format_num(Pop), 115 | level = format_num(level), 116 | slope = format_num(slope) 117 | ) %>% 118 | select(Year, Time, Pop, level, slope, .fitted) %>% 119 | as_tibble() %>% 120 | bind_rows(tibble(Time = "$t$", Pop = "$y_t$", level = "$\\ell_t$", .fitted = "$\\hat{y}_{t+1\\mid t}$"), .) 121 | 122 | tmp <- tmp %>% 123 | slice(1:9) %>% 124 | bind_rows(summarise_at(tmp, vars(-Year), function(...) if (html) "⋮" else "\\vdots")) %>% 125 | bind_rows(slice(tmp, (n() - 3):n())) %>% 126 | bind_rows(fc_tbl) %>% 127 | transmute(Year, Time, Observation = Pop, Level = level, Slope = slope, Forecast = .fitted) 128 | 129 | caption <- "Forecasting Australian annual population using Holt's linear trend method." 130 | 131 | if (html) { 132 | knitr::kable(tmp, booktabs = TRUE, escape = FALSE, caption = caption) 133 | } else { 134 | knitr::kable(tmp, booktabs = TRUE, escape = FALSE, caption = caption) %>% 135 | kableExtra::kable_styling(font_size = 9) 136 | } 137 | 138 | ## ----checkholt, echo=FALSE-------------------------------------------------------------------------------- 139 | if (sum(abs(tidy(fit)$estimate[1:2] - c(0.9999, 0.3266))) > 1e-4) { 140 | stop("Parameter error") 141 | } 142 | 143 | ## ----dampedtrend, fig.cap="Forecasting annual Australian population (millions) over 2018-2032. For the damped trend method, $\\phi=0.90$.", echo=TRUE, fig.asp=0.55---- 144 | aus_economy %>% 145 | model( 146 | `Holt's method` = ETS(Pop ~ error("A") + 147 | trend("A") + season("N")), 148 | `Damped Holt's method` = ETS(Pop ~ error("A") + 149 | trend("Ad", phi = 0.9) + season("N")) 150 | ) %>% 151 | forecast(h = 15) %>% 152 | autoplot(aus_economy, level = NULL) + 153 | labs(title = "Australian population", 154 | y = "Millions") + 155 | guides(colour = guide_legend(title = "Forecast")) 156 | 157 | ## ----www-usage, fig.cap="Users connected to the internet through a server", fig.asp=0.5, echo=TRUE-------- 158 | www_usage <- as_tsibble(WWWusage) 159 | www_usage %>% autoplot(value) + 160 | labs(x="Minute", y="Number of users", 161 | title = "Internet usage per minute") 162 | 163 | ## ----expsmoothcv, echo=TRUE, warning=FALSE---------------------------------------------------------------- 164 | www_usage %>% 165 | stretch_tsibble(.init = 10) %>% 166 | model( 167 | SES = ETS(value ~ error("A") + trend("N") + season("N")), 168 | Holt = ETS(value ~ error("A") + trend("A") + season("N")), 169 | Damped = ETS(value ~ error("A") + trend("Ad") + 170 | season("N")) 171 | ) %>% 172 | forecast(h = 1) %>% 173 | accuracy(www_usage) 174 | 175 | ## ----holtmodel, echo=TRUE--------------------------------------------------------------------------------- 176 | fit <- www_usage %>% 177 | model( 178 | Damped = ETS(value ~ error("A") + trend("Ad") + 179 | season("N")) 180 | ) 181 | # Estimated parameters: 182 | tidy(fit) 183 | 184 | ## ----fig-7-comp, fig.cap="Forecasting internet usage: comparing forecasting performance of non-seasonal methods.", echo=TRUE, dependson='holtmodel', fig.asp=0.5---- 185 | fit %>% 186 | forecast(h = 10) %>% 187 | autoplot(www_usage) + 188 | labs(x="Minute", y="Number of users", 189 | title = "Internet usage per minute") 190 | 191 | ## ----wwwcoef, echo=FALSE---------------------------------------------------------------------------------- 192 | phi <- fit %>% tidy() %>% filter(term=="phi") %>% pull(estimate) %>% format_num(3) 193 | 194 | ## ----7-HW, fig.cap="Forecasting domestic overnight trips in Australia using the Holt-Winters method with both additive and multiplicative seasonality.", echo=TRUE---- 195 | aus_holidays <- tourism %>% 196 | filter(Purpose == "Holiday") %>% 197 | summarise(Trips = sum(Trips)/1e3) 198 | fit <- aus_holidays %>% 199 | model( 200 | additive = ETS(Trips ~ error("A") + trend("A") + 201 | season("A")), 202 | multiplicative = ETS(Trips ~ error("M") + trend("A") + 203 | season("M")) 204 | ) 205 | fc <- fit %>% forecast(h = "3 years") 206 | fc %>% 207 | autoplot(aus_holidays, level = NULL) + 208 | labs(title="Australian domestic tourism", 209 | y="Overnight trips (millions)") + 210 | guides(colour = guide_legend(title = "Forecast")) 211 | 212 | ## ----tab75, echo=FALSE, dependson="7-HW"------------------------------------------------------------------ 213 | fit_add <- fit %>% select(additive) 214 | 215 | # Data set for table 216 | fc_tbl <- fc %>% 217 | filter(.model == "additive") %>% 218 | as_tibble() %>% 219 | transmute(Quarter = as.character(Quarter), .fitted = .mean) %>% 220 | mutate(Time = as.character(row_number()), .fitted = format_num(.fitted, 1)) %>% 221 | bind_rows(tibble(Time = "$h$", .fitted = "$\\hat{y}_{T+h\\vert T}$"), .) 222 | 223 | options(knitr.kable.NA = "") 224 | cmp_tbl <- as_tsibble(components(fit_add)) %>% 225 | left_join(augment(fit_add), by = c(".model", "Trips", "Quarter")) %>% 226 | as_tibble() %>% 227 | mutate( 228 | Quarter = as.character(Quarter), 229 | Time = as.character(row_number() - 1), 230 | .fitted = format_num(.fitted, 1), 231 | Trips = format_num(Trips, 1), 232 | level = format_num(level, 1), 233 | slope = format_num(slope, 1), 234 | season = format_num(season, 1) 235 | ) %>% 236 | select(Quarter, Time, Trips, level, slope, season, .fitted) %>% 237 | bind_rows(tibble( 238 | Quarter = "", 239 | Time = "$t$", 240 | Trips = "$y_t$", 241 | level = "$\\ell_t$", 242 | slope = "$b_t$", 243 | season = "$s_t$", 244 | .fitted = "$\\hat{y}_{t+1\\vert t}$" 245 | ), .) 246 | 247 | ctab <- cmp_tbl %>% 248 | slice(1:9) %>% 249 | bind_rows(summarise_at(cmp_tbl, vars(-Quarter), function(...) if (html) "⋮" else "\\vdots")) %>% 250 | bind_rows(slice(cmp_tbl, (n() - 3):n())) %>% 251 | bind_rows(fc_tbl) %>% 252 | transmute(Quarter, Time, Observation = Trips, Level = level, Slope = slope, Season = season, Forecast = .fitted) 253 | 254 | fit_add_coef <- tidy(fit_add) 255 | alpha <- fit_add_coef %>% filter(term=="alpha") %>% pull(estimate) 256 | beta <- fit_add_coef %>% filter(term=="beta") %>% pull(estimate) 257 | betastar <- format_num(beta / alpha, 4) 258 | alpha <- format_num(alpha, 4) 259 | gamma <- fit_add_coef %>% filter(term=="gamma") %>% pull(estimate) %>% format_num(4) 260 | rmse <- accuracy(fit_add) %>% pull(RMSE) %>% format_num(4) 261 | 262 | caption <- paste0("Applying Holt-Winters' method with additive seasonality for forecasting domestic tourism in Australia. Notice that the additive seasonal component sums to approximately zero. The smoothing parameters and initial estimates for the components are $\\alpha = ", alpha, "$, $\\beta^* = ", betastar, "$, $\\gamma = ", gamma, "$ and RMSE $=", rmse,"$.") 263 | if (html) { 264 | knitr::kable(ctab, digits = 2, escape = FALSE, caption = caption, align = "r") 265 | } else { 266 | knitr::kable(ctab, 267 | digits = 2, escape = FALSE, 268 | booktabs = TRUE, caption = caption, align = "r" 269 | ) %>% 270 | kableExtra::kable_styling(font_size = 9) 271 | } 272 | 273 | ## ----tab76, echo=FALSE, dependson="tab75"----------------------------------------------------------------- 274 | fit_mult <- fit %>% select(multiplicative) 275 | 276 | # Data set for table 277 | fc_tbl <- fc %>% 278 | filter(.model == "multiplicative") %>% 279 | as_tibble() %>% 280 | transmute(Quarter = as.character(Quarter), .fitted = .mean) %>% 281 | mutate(Time = as.character(row_number()), .fitted = format_num(.fitted, 1)) %>% 282 | bind_rows(tibble(Time = "$h$", .fitted = "$\\hat{y}_{T+h\\vert T}$"), .) 283 | 284 | options(knitr.kable.NA = "") 285 | cmp_tbl <- as_tsibble(components(fit_mult)) %>% 286 | left_join(augment(fit_mult), by = c(".model", "Trips", "Quarter")) %>% 287 | as_tibble() %>% 288 | mutate( 289 | Quarter = as.character(Quarter), 290 | Time = as.character(row_number() - 1), 291 | .fitted = format_num(.fitted, 1), 292 | Trips = format_num(Trips, 1), 293 | level = format_num(level, 1), 294 | slope = format_num(slope, 1), 295 | season = format_num(season, 1) 296 | ) %>% 297 | select(Quarter, Time, Trips, level, slope, season, .fitted) %>% 298 | bind_rows(tibble( 299 | Quarter = "", 300 | Time = "$t$", 301 | Trips = "$y_t$", 302 | level = "$\\ell_t$", 303 | slope = "$b_t$", 304 | season = "$s_t$", 305 | .fitted = "$\\hat{y}_{t+1\\vert t}$" 306 | ), .) 307 | 308 | ctab <- cmp_tbl %>% 309 | slice(1:9) %>% 310 | bind_rows(summarise_at(cmp_tbl, vars(-Quarter), function(...) if (html) "⋮" else "\\vdots")) %>% 311 | bind_rows(slice(cmp_tbl, (n() - 3):n())) %>% 312 | bind_rows(fc_tbl) %>% 313 | transmute(Quarter, Time, Observation = Trips, Level = level, Slope = slope, Season = season, Forecast = .fitted) 314 | 315 | fit_mult_coef <- tidy(fit_mult) 316 | alpha <- fit_mult_coef %>% filter(term=="alpha") %>% pull(estimate) 317 | beta <- fit_mult_coef %>% filter(term=="beta") %>% pull(estimate) 318 | betastar <- format_num(beta / alpha, 4) 319 | alpha <- format_num(alpha, 4) 320 | gamma <- fit_mult_coef %>% filter(term=="gamma") %>% pull(estimate) %>% format_num(4) 321 | rmse <- accuracy(fit_mult) %>% pull(RMSE) %>% format_num(4) 322 | 323 | caption <- paste0("Applying Holt-Winters' method with multiplicative seasonality for forecasting domestic tourism in Australia. Notice that the multiplicative seasonal component sums to approximately $m=4$. The smoothing parameters and initial estimates for the components are $\\alpha = ", alpha, "$, $\\beta^* = ", betastar, "$, $\\gamma = ", gamma, "$ and RMSE $=", rmse,"$.") 324 | # Output 325 | if (html) { 326 | knitr::kable(ctab, digits = 2, escape = FALSE, caption = caption, align = "r") 327 | } else { 328 | knitr::kable(ctab, 329 | digits = 2, escape = FALSE, 330 | booktabs = TRUE, caption = caption, align = "r" 331 | ) %>% 332 | kableExtra::kable_styling(font_size = 9) 333 | } 334 | 335 | ## ----fig-7-LevelTrendSeas, fig.cap="Estimated components for the Holt-Winters method with additive and multiplicative seasonal components.", echo=FALSE, dependson="7-HW", warning = FALSE, fig.width=8, fig.height=8, fig.asp=0.9, fig.env = 'figure*'---- 336 | p1 <- fit %>% select(additive) %>% components() %>% autoplot() + labs(subtitle = "Additive seasonality") 337 | p2 <- fit %>% select(multiplicative) %>% components() %>% autoplot() + labs(subtitle = "Multiplicative seasonality") 338 | p1 | p2 339 | 340 | ## ----hyndsight, echo=TRUE, fig.cap="Forecasts of daily pedestrian traffic at the Southern Cross railway station, Melbourne.", fig.asp=0.45---- 341 | sth_cross_ped <- pedestrian %>% 342 | filter(Date >= "2016-07-01", 343 | Sensor == "Southern Cross Station") %>% 344 | index_by(Date) %>% 345 | summarise(Count = sum(Count)/1000) 346 | sth_cross_ped %>% 347 | filter(Date <= "2016-07-31") %>% 348 | model( 349 | hw = ETS(Count ~ error("M") + trend("Ad") + season("M")) 350 | ) %>% 351 | forecast(h = "2 weeks") %>% 352 | autoplot(sth_cross_ped %>% filter(Date <= "2016-08-14")) + 353 | labs(title = "Daily traffic: Southern Cross", 354 | y="Pedestrians ('000)") 355 | 356 | ## ----taxonomy, echo=FALSE--------------------------------------------------------------------------------- 357 | tab <- data.frame( 358 | `X` = c("", "N (None)", "A (Additive)", "A$_d$ (Additive damped)"), 359 | N = c("(None)", "(N,N)", "(A,N)", "(A$_d$,N)"), 360 | A = c("(Additive)", "(N,A)", "(A,A)", "(A$_d$,A)"), 361 | M = c("(Multiplicative)", "(N,M)", "(A,M)", "(A$_d$,M)") 362 | ) 363 | colnames(tab) <- c("", "N", "A", "M") 364 | caption <- "A two-way classification of exponential smoothing methods." 365 | if (html) { 366 | tab %>% 367 | knitr::kable(format = "html", escape = FALSE, caption = caption) %>% 368 | kableExtra::row_spec(0, background = "#e6e6e6") %>% 369 | kableExtra::row_spec(1, bold = TRUE, background = "#e6e6e6") %>% 370 | kableExtra::row_spec(2, background = "#f6f6f6") %>% 371 | kableExtra::row_spec(3, background = "#f6f6f6") %>% 372 | kableExtra::row_spec(4, background = "#f6f6f6") %>% 373 | kableExtra::column_spec(1, bold = TRUE, background = "#e6e6e6") %>% 374 | kableExtra::add_header_above( 375 | c("Trend Component" = 1, "Seasonal Component" = 3), 376 | align = "l" 377 | ) 378 | } else { 379 | colnames(tab) <- c("", "(None)", "(Additive)", "(Multiplicative)") 380 | tab <- tab[-1, ] 381 | tab <- knitr::kable(tab, 382 | booktabs = TRUE, format = "latex", 383 | row.names = FALSE, escape = FALSE, caption = caption 384 | ) %>% 385 | kableExtra::add_header_above( 386 | c(" " = 1, "N" = 1, "A" = 1, "M" = 1), 387 | align = "l", line = FALSE 388 | ) %>% 389 | kableExtra::add_header_above( 390 | c("Trend Component" = 1, "Seasonal Component" = 3), 391 | align = "l", bold = TRUE, line = FALSE 392 | ) %>% 393 | kableExtra::kable_styling(latex_options = "hold_position") 394 | tab <- gsub( 395 | "\\\\midrule", 396 | "\\\\cmidrule(l{2pt}r{2pt}){1-1} \\\\cmidrule(l{2pt}r{2pt}){2-4}", 397 | tab 398 | ) 399 | tab <- gsub("\\\\centering", "\\\\centering\\\\vspace*{-0.3cm}", tab) 400 | gsub("\\\\end\\{tabular\\}", "\\\\end{tabular}\\\\vspace*{0.3cm}", tab) 401 | } 402 | 403 | ## ----shorthand, echo=FALSE-------------------------------------------------------------------------------- 404 | tab <- tibble::tibble( 405 | `Short hand` = c( 406 | "(N,N)", "(A,N)", "(A$_d$,N)", "(A,A)", "(A,M)", "(A$_d$,M)" 407 | ), 408 | "Method" = c( 409 | "Simple exponential smoothing", 410 | "Holt's linear method", 411 | "Additive damped trend method", 412 | "Additive Holt-Winters' method", 413 | "Multiplicative Holt-Winters' method", 414 | "Holt-Winters' damped method" 415 | ) 416 | ) 417 | knitr::kable(tab, booktabs = TRUE, longtable = FALSE, escape = FALSE) 418 | 419 | ## ----austouristsets, echo=TRUE---------------------------------------------------------------------------- 420 | aus_holidays <- tourism %>% 421 | filter(Purpose == "Holiday") %>% 422 | summarise(Trips = sum(Trips)/1e3) 423 | fit <- aus_holidays %>% 424 | model(ETS(Trips)) 425 | report(fit) 426 | 427 | ## ----checkmna, include=FALSE------------------------------------------------------------------------------ 428 | stopifnot(as.character(fit[[1]]) == "") 429 | alpha <- tidy(fit) %>% filter(term=="alpha") %>% pull(estimate) %>% format_num(4) 430 | gamma <- tidy(fit) %>% filter(term=="gamma") %>% pull(estimate) %>% format_num(4) 431 | 432 | ## ----MNAstates, fig.cap="Graphical representation of the estimated states over time.", echo=TRUE, dependson="austouristets", warning=FALSE---- 433 | components(fit) %>% 434 | autoplot() + 435 | labs(title = "ETS(M,N,A) components") 436 | 437 | ## ----MNAresiduals, echo=FALSE, fig.cap="Residuals and one-step forecast errors from the ETS(M,N,A) model.", dependson="austouristets"---- 438 | augment(fit) %>% 439 | as_tibble() %>% 440 | select(-Trips, -.fitted) %>% 441 | pivot_longer(c(.resid,.innov), values_to = "Residual") %>% 442 | mutate(name = recode(name, .resid = "Regular residuals", .innov = "Innovation residuals")) %>% 443 | ggplot(aes(x = Quarter, y = Residual)) + 444 | geom_line() + 445 | facet_grid(name ~ ., scales = "free_y") + 446 | labs(y=NULL) 447 | 448 | ## ----MNAforecasts, fig.cap="Forecasting Australian domestic overnight trips using an ETS(M,N,A) model.", echo=TRUE, dependson="austouristets"---- 449 | fit %>% 450 | forecast(h = 8) %>% 451 | autoplot(aus_holidays)+ 452 | labs(title="Australian domestic tourism", 453 | y="Overnight trips (millions)") 454 | 455 | ## ----pitable, echo=FALSE, escape=FALSE-------------------------------------------------------------------- 456 | tab <- rbind( 457 | c("(A,N,N)", "$\\sigma_h^2 = \\sigma^2\\big[1 + \\alpha^2(h-1)\\big]$"), 458 | c("(A,A,N)", "$\\sigma_h^2 = \\sigma^2\\Big[1 + (h-1)\\big\\{\\alpha^2 + \\alpha\\beta h + \\frac16\\beta^2h(2h-1)\\big\\}\\Big]$"), 459 | c("(A,A$_d$,N)", "$\\sigma_h^2 = \\sigma^2\\biggl[1 + \\alpha^2(h-1) + \\frac{\\beta\\phi h}{(1-\\phi)^2} \\left\\{2\\alpha(1-\\phi) +\\beta\\phi\\right\\}$"), 460 | c("", "$\\hspace*{1cm}\\mbox{} - \\frac{\\beta\\phi(1-\\phi^h)}{(1-\\phi)^2(1-\\phi^2)} \\left\\{ 2\\alpha(1-\\phi^2)+ \\beta\\phi(1+2\\phi-\\phi^h)\\right\\}\\biggr]$"), 461 | c("(A,N,A)", "$\\sigma_h^2 = \\sigma^2\\Big[1 + \\alpha^2(h-1) + \\gamma k(2\\alpha+\\gamma)\\Big]$"), 462 | c("(A,A,A)", "$\\sigma_h^2 = \\sigma^2\\Big[1 + (h-1)\\big\\{\\alpha^2 + \\alpha\\beta h + \\frac16\\beta^2h(2h-1)\\big\\}$"), 463 | c("", "$\\hspace*{1cm}\\mbox{} + \\gamma k \\big\\{2\\alpha+ \\gamma + \\beta m (k+1)\\big\\} \\Big]$"), 464 | c("(A,A$_d$,A)", "$\\sigma_h^2 = \\sigma^2\\biggl[1 + \\alpha^2(h-1) + \\gamma k(2\\alpha+\\gamma)$"), 465 | c("", "$\\hspace*{1cm}\\mbox{} +\\frac{\\beta\\phi h}{(1-\\phi)^2} \\left\\{2\\alpha(1-\\phi) + \\beta\\phi \\right\\}$"), 466 | c("", "$\\hspace*{1cm}\\mbox{} - \\frac{\\beta\\phi(1-\\phi^h)}{(1-\\phi)^2(1-\\phi^2)} \\left\\{ 2\\alpha(1-\\phi^2)+ \\beta\\phi(1+2\\phi-\\phi^h)\\right\\}$"), 467 | c("", "$\\hspace*{1cm}\\mbox{} + \\frac{2\\beta\\gamma\\phi}{(1-\\phi)(1-\\phi^m)}\\left\\{k(1-\\phi^m) - \\phi^m(1-\\phi^{mk})\\right\\}\\biggr]$") 468 | ) 469 | colnames(tab) <- c("Model", "Forecast variance: $\\sigma_h^2$") 470 | caption <- "Forecast variance expressions for each additive state space model, where $\\sigma^2$ is the residual variance, $m$ is the seasonal period, and $k$ is the integer part of $(h-1) /m$ (i.e., the number of complete years in the forecast period prior to time $T+h$)." 471 | if (html) { 472 | tab <- gsub("\\$\\\\hspace\\*\\{1cm\\}", paste0(c(rep(" ", 20), "$"), collapse = ""), tab) 473 | tab <- knitr::kable(tab, format = "html", escape = FALSE, caption = caption) %>% 474 | kableExtra::row_spec(c(1, 3, 4, 6, 7), background = "#f7f7f7") %>% 475 | kableExtra::row_spec(c(2, 5, 8:11), background = "#ececec") 476 | tab <- gsub("", "", tab) 477 | tab 478 | } else { 479 | tab[2, 1] <- paste0("\\midrule", tab[2, 1]) 480 | tab[3, 1] <- paste0("\\midrule", tab[3, 1]) 481 | tab[5, 1] <- paste0("\\midrule", tab[5, 1]) 482 | tab[6, 1] <- paste0("\\midrule", tab[6, 1]) 483 | tab[8, 1] <- paste0("\\midrule", tab[8, 1]) 484 | tab <- knitr::kable(tab, 485 | format = "latex", booktabs = TRUE, escape = FALSE, 486 | caption = caption) 487 | tab <- gsub("\\\\centering", "\\\\centering\\\\vspace*{-0.3cm}", tab) 488 | gsub("\\\\end\\{tabular\\}", "\\\\end{tabular}\\\\vspace*{0.3cm}", tab) 489 | } 490 | 491 | -------------------------------------------------------------------------------- /09-arima.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----stationary, fig.cap="Which of these series are stationary? (a) Google closing stock price in 2015; (b) Daily change in the Google stock price in 2015; (c) Annual number of strikes in the US; (d) Monthly sales of new one-family houses sold in the US; (e) Annual price of a dozen eggs in the US (constant dollars); (f) Monthly total of pigs slaughtered in Victoria, Australia; (g) Annual total of Canadian Lynx furs traded by the Hudson Bay Company; (h) Quarterly Australian beer production; (i) Monthly Australian gas production.", echo=FALSE, fig.width=10,fig.asp=0.75, fig.env="figure*", fig.pos="t", warning = FALSE, message=FALSE---- 4 | p1 <- gafa_stock %>% 5 | filter(Symbol == "GOOG", year(Date) == 2015) %>% 6 | autoplot(Close) + 7 | labs(subtitle = "(a) Google closing price", x = "Day", y = " $US") 8 | 9 | p2 <- gafa_stock %>% 10 | filter(Symbol == "GOOG", year(Date) == 2015) %>% 11 | autoplot(difference(Close)) + 12 | labs(subtitle = "(b) Change in google price", x = "Day", y = "$US") 13 | 14 | p3 <- as_tsibble(fma::strikes) %>% 15 | autoplot(value) + 16 | labs(subtitle = "(c) Strikes: US", 17 | y = "Number of strikes", 18 | x = "Year") 19 | 20 | p4 <- as_tsibble(fma::hsales) %>% 21 | autoplot(value) + 22 | labs(subtitle = "(d) House sales: US", 23 | y = "Number of houses", 24 | x = "Month") 25 | 26 | p5 <- as_tsibble(fma::eggs) %>% 27 | autoplot(value) + 28 | labs(subtitle = "(e) Egg prices: US", 29 | y = "$US (contant prices)", 30 | x = "Year") 31 | 32 | p6 <- aus_livestock %>% 33 | filter(State == "Victoria", Animal == "Pigs") %>% 34 | autoplot(Count) + 35 | labs(subtitle = "(f) Pigs slaughtered: Victoria, Australia", 36 | y = "Number of pigs", 37 | x="Month") 38 | 39 | p7 <- pelt %>% 40 | autoplot(Lynx) + 41 | labs(subtitle = "(g) Lynx trapped: Canada", 42 | y = "Number of lynx", 43 | x = "Year") 44 | 45 | p8 <- aus_production %>% 46 | filter(year(Quarter) %in% 1991:1995) %>% 47 | autoplot(Beer) + 48 | labs(subtitle = "(h) Beer production: Australia", 49 | y = "Megalitres", 50 | x = "Quarter") 51 | 52 | p9 <- aus_production %>% 53 | autoplot(Gas) + 54 | labs(subtitle = "(i) Gas production: Australia", 55 | y = "Petajoules", 56 | x = "Quarter") 57 | 58 | (p1 | p2 | p3) / (p4 | p5 | p6) / (p7 | p8 | p9) 59 | 60 | ## ----acfstationary, echo=FALSE, fig.asp=0.35, fig.cap="The ACF of the Google closing stock price in 2015 (left) and of the daily changes in Google closing stock price in 2015 (right).", warning=FALSE---- 61 | google_2015 <- gafa_stock %>% 62 | filter(Symbol == "GOOG", year(Date) == 2015) %>% 63 | mutate(trading_day = row_number()) %>% 64 | update_tsibble(index = trading_day, regular = TRUE) 65 | p1 <- google_2015 %>% ACF(Close) %>% autoplot() + labs(subtitle = "Google closing stock price") 66 | p2 <- google_2015 %>% ACF(difference(Close)) %>% autoplot() + labs(subtitle = "Changes in Google closing stock price") 67 | p1 | p2 68 | 69 | ## ----googlb, echo=TRUE------------------------------------------------------------------------------------ 70 | google_2015 %>% 71 | mutate(diff_close = difference(Close)) %>% 72 | features(diff_close, ljung_box, lag = 10) 73 | 74 | ## ----googlb2, echo=FALSE---------------------------------------------------------------------------------- 75 | pv <- google_2015 %>% 76 | mutate(diff_close = difference(Close)) %>% 77 | features(diff_close, ljung_box, lag = 10) %>% 78 | pull(lb_pvalue) 79 | 80 | ## ----a10diff, fig.cap="Logs and seasonal differences of the A10 (antidiabetic) sales data. The logarithms stabilise the variance, while the seasonal differences remove the seasonality and trend.", fig.asp=0.95, echo = FALSE---- 81 | PBS %>% 82 | filter(ATC2 == "A10") %>% 83 | summarise(Cost = sum(Cost)/1e6) %>% 84 | transmute( 85 | `Sales ($million)` = Cost, 86 | `Log sales` = log(Cost), 87 | `Annual change in log sales` = difference(log(Cost), 12) 88 | ) %>% 89 | pivot_longer(-Month, names_to="Type", values_to="Sales") %>% 90 | mutate( 91 | Type = factor(Type, levels = c( 92 | "Sales ($million)", 93 | "Log sales", 94 | "Annual change in log sales")) 95 | ) %>% 96 | ggplot(aes(x = Month, y = Sales)) + 97 | geom_line() + 98 | facet_grid(vars(Type), scales = "free_y") + 99 | labs(title = "Antidiabetic drug sales", y = NULL) 100 | 101 | ## ----h02diff, fig.asp=1.2, fig.cap="(ref:h02diff)"-------------------------------------------------------- 102 | PBS %>% 103 | filter(ATC2 == "H02") %>% 104 | summarise(Cost = sum(Cost)/1e6) %>% 105 | transmute( 106 | `Sales ($million)` = Cost, 107 | `Log sales` = log(Cost), 108 | `Annual change in log sales` = difference(log(Cost), 12), 109 | `Doubly differenced log sales` = 110 | difference(difference(log(Cost), 12), 1) 111 | ) %>% 112 | pivot_longer(-Month, names_to="Type", values_to="Sales") %>% 113 | mutate( 114 | Type = factor(Type, levels = c( 115 | "Sales ($million)", 116 | "Log sales", 117 | "Annual change in log sales", 118 | "Doubly differenced log sales")) 119 | ) %>% 120 | ggplot(aes(x = Month, y = Sales)) + 121 | geom_line() + 122 | facet_grid(vars(Type), scales = "free_y") + 123 | labs(title = "Corticosteroid drug sales", y = NULL) 124 | 125 | ## ----googkpss--------------------------------------------------------------------------------------------- 126 | google_2015 %>% 127 | features(Close, unitroot_kpss) 128 | 129 | ## ----googkpss_store, include=FALSE------------------------------------------------------------------------ 130 | goog_kpss <- google_2015 %>% 131 | features(Close, unitroot_kpss) 132 | 133 | ## ----googkpss2, dependson="googkpss"---------------------------------------------------------------------- 134 | google_2015 %>% 135 | mutate(diff_close = difference(Close)) %>% 136 | features(diff_close, unitroot_kpss) 137 | 138 | ## ----googndiffs------------------------------------------------------------------------------------------- 139 | google_2015 %>% 140 | features(Close, unitroot_ndiffs) 141 | 142 | ## ----ausretaildiff---------------------------------------------------------------------------------------- 143 | aus_total_retail <- aus_retail %>% 144 | summarise(Turnover = sum(Turnover)) 145 | aus_total_retail %>% 146 | mutate(log_turnover = log(Turnover)) %>% 147 | features(log_turnover, unitroot_nsdiffs) 148 | 149 | aus_total_retail %>% 150 | mutate(log_turnover = difference(log(Turnover), 12)) %>% 151 | features(log_turnover, unitroot_ndiffs) 152 | 153 | ## ----arp, fig.cap="Two examples of data from autoregressive models with different parameters. Left: AR(1) with $y_t = 18 -0.8y_{t-1} + \\varepsilon_t$. Right: AR(2) with $y_t = 8 + 1.3y_{t-1}-0.7y_{t-2}+\\varepsilon_t$. In both cases, $\\varepsilon_t$ is normally distributed white noise with mean zero and variance one.", echo=FALSE, fig.asp=0.45---- 154 | set.seed(1) 155 | p1 <- autoplot(as_tsibble(10 + arima.sim(list(ar = -0.8), n = 100)),value) + 156 | labs(y="", x="Time", title="AR(1)") 157 | p2 <- autoplot(as_tsibble(20 + arima.sim(list(ar = c(1.3, -0.7)), n = 100)),value) + 158 | labs(y="", x="Time", title="AR(2)") 159 | p1 | p2 160 | 161 | ## ----maq, fig.cap="Two examples of data from moving average models with different parameters. Left: MA(1) with $y_t = 20 + \\varepsilon_t + 0.8\\varepsilon_{t-1}$. Right: MA(2) with $y_t = \\varepsilon_t- \\varepsilon_{t-1}+0.8\\varepsilon_{t-2}$. In both cases, $\\varepsilon_t$ is normally distributed white noise with mean zero and variance one.", echo=FALSE, fig.asp=0.45,fig.pos="t"---- 162 | set.seed(2) 163 | p1 <- autoplot(as_tsibble(20 + arima.sim(list(ma = 0.8), n = 100)),value) + 164 | labs(y="", x="Time", title="MA(1)") 165 | p2 <- autoplot(as_tsibble(arima.sim(list(ma = c(-1, +0.8)), n = 100)),value) + 166 | labs(y="", x="Time", title="MA(2)") 167 | p1 | p2 168 | 169 | ## ----pdqtable, echo=FALSE, warning=FALSE------------------------------------------------------------------ 170 | cbind( 171 | paste0("$",c("p","d","q"),"=$"), 172 | c( 173 | "order of the autoregressive part;", 174 | "degree of first differencing involved;", 175 | "order of the moving average part.") 176 | ) %>% 177 | knitr::kable(escape=FALSE, booktabs=TRUE, longtable=FALSE, align="rl") 178 | 179 | ## ----arimaspecialcases, echo=FALSE, warning=FALSE--------------------------------------------------------- 180 | df <- rbind( 181 | c("White noise", "ARIMA(0,0,0) with no constant"), 182 | c("Random walk", "ARIMA(0,1,0) with no constant"), 183 | c("Random walk with drift","ARIMA(0,1,0) with a constant"), 184 | c("Autoregression", "ARIMA($p$,0,0)"), 185 | c("Moving average", "ARIMA(0,0,$q$)")) 186 | out <- knitr::kable(df, escape=FALSE, booktabs=TRUE, caption="Special cases of ARIMA models.") 187 | if(!html) { 188 | out <- kableExtra::kable_styling(out, latex_options="hold_position") 189 | out <- gsub("\\\\centering", "\\\\centering\\\\vspace*{-0.3cm}", out) 190 | } 191 | out 192 | 193 | ## ----egyptexports, fig.cap="Annual Egyptian exports as a percentage of GDP since 1960.", fig.asp=0.5------ 194 | global_economy %>% 195 | filter(Code == "EGY") %>% 196 | autoplot(Exports) + 197 | labs(y = "% of GDP", title = "Egyptian exports") 198 | 199 | ## ----egyptexportsauto------------------------------------------------------------------------------------- 200 | fit <- global_economy %>% 201 | filter(Code == "EGY") %>% 202 | model(ARIMA(Exports)) 203 | report(fit) 204 | 205 | ## ----egyptexportsmodel, include=FALSE, warning=FALSE, dependson="egyptexportsauto"------------------------ 206 | stopifnot(identical( 207 | unlist(fit[1,2][[1]][[1]]$fit$spec), 208 | c(p=2L, d=0L, q=1L, P=0, D=0, Q=0, constant=TRUE, period.year=1) 209 | )) 210 | coef <- rlang::set_names(tidy(fit)$estimate, tidy(fit)$term) 211 | 212 | ## ----egyptexportsf, fig.cap="Forecasts of Egyptian exports.", fig.asp=0.5, dependson="egyptexportsauto"---- 213 | fit %>% forecast(h=10) %>% 214 | autoplot(global_economy) + 215 | labs(y = "% of GDP", title = "Egyptian exports") 216 | 217 | ## ----egyptacf, fig.cap="ACF of Egyptian exports.", fig.asp=0.3-------------------------------------------- 218 | global_economy %>% 219 | filter(Code == "EGY") %>% 220 | ACF(Exports) %>% 221 | autoplot() 222 | 223 | ## ----egyptpacf, fig.cap="PACF of Egyptian exports.", fig.asp=0.3------------------------------------------ 224 | global_economy %>% 225 | filter(Code == "EGY") %>% 226 | PACF(Exports) %>% 227 | autoplot() 228 | 229 | ## ----egyptar---------------------------------------------------------------------------------------------- 230 | fit2 <- global_economy %>% 231 | filter(Code == "EGY") %>% 232 | model(ARIMA(Exports ~ pdq(4,0,0))) 233 | report(fit2) 234 | 235 | ## ----ARMAgridsearch, echo=FALSE, fig.cap = "An illustrative example of the Hyndman-Khandakar stepwise search process", message=FALSE, warning=FALSE, fig.asp=1, out.width="60%", fig.width=4, fig.height=4, fig.pos="!h"---- 236 | start <- tribble( 237 | ~p, ~q, 238 | 0, 0, 239 | 1, 0, 240 | 0, 1, 241 | 2, 2 242 | ) 243 | selected <- tribble( 244 | ~p, ~q, 245 | 2, 2, 246 | 3, 3, 247 | 4, 2 248 | ) 249 | griddf <- expand.grid(p = 0:6, q = 0:6) %>% 250 | as_tibble() %>% 251 | left_join(start %>% mutate(start = TRUE)) %>% 252 | left_join(selected %>% mutate(chosen = TRUE)) %>% 253 | replace_na(list(start = FALSE, chosen = FALSE)) %>% 254 | mutate( 255 | step = case_when( 256 | start ~ 1, 257 | (p - selected$p[1])^2 + (q - selected$q[1])^2 <= 2 ~ 2, 258 | (p - selected$p[2])^2 + (q - selected$q[2])^2 <= 2 ~ 3, 259 | (p - selected$p[3])^2 + (q - selected$q[3])^2 <= 2 ~ 4, 260 | TRUE ~ NA_real_ 261 | ) 262 | ) %>% 263 | left_join(selected %>% 264 | mutate(step = row_number() + 1) %>% 265 | rename(fromp = p, fromq = q) 266 | ) %>% 267 | mutate(step = as.character(step)) 268 | griddf %>% 269 | ggplot(aes(x = q, y = p)) + 270 | geom_point(aes(alpha = 0.2), colour = "gray", size = 5, shape = 19) + 271 | geom_segment(aes(x = fromq, y = fromp, xend = q, yend = p, col=step), 272 | arrow = arrow(length = unit(0.15, "inches"), type='open'), 273 | size = 1, lineend = "butt") + 274 | geom_point(aes(col = step), size = 5, shape = 19) + 275 | geom_point(data = filter(griddf, chosen), size = 12, shape = 21, stroke = 1.4) + 276 | scale_y_reverse(breaks = 0:6) + 277 | scale_x_continuous(position = "top", breaks = 0:6) + 278 | theme_minimal() + 279 | theme( 280 | panel.grid.minor = element_blank(), 281 | axis.text = element_text(size = 10), 282 | axis.title.x = element_text(size = 12, hjust = 0), 283 | axis.title.y = element_text( 284 | size = 12, hjust = 1, 285 | angle = 0, margin = margin(t = 0, r = 10, b = 0, l = 0) 286 | ) 287 | ) + 288 | scale_colour_manual( 289 | breaks = paste(1:4), 290 | values = c("#D55E00", "#0072B2","#009E73", "#CC79A7") 291 | ) + 292 | guides(alpha = "none") 293 | 294 | ## ----caf, fig.cap="Exports of the Central African Republic as a percentage of GDP.", fig.asp=0.55--------- 295 | global_economy %>% 296 | filter(Code == "CAF") %>% 297 | autoplot(Exports) + 298 | labs(title="Central African Republic exports", 299 | y="% of GDP") 300 | 301 | ## ----caf2, fig.cap="Time plot and ACF and PACF plots for the differenced Central African Republic Exports.", warning=FALSE---- 302 | global_economy %>% 303 | filter(Code == "CAF") %>% 304 | gg_tsdisplay(difference(Exports), plot_type='partial') 305 | 306 | ## ----caf_fit---------------------------------------------------------------------------------------------- 307 | caf_fit <- global_economy %>% 308 | filter(Code == "CAF") %>% 309 | model(arima210 = ARIMA(Exports ~ pdq(2,1,0)), 310 | arima013 = ARIMA(Exports ~ pdq(0,1,3)), 311 | stepwise = ARIMA(Exports), 312 | search = ARIMA(Exports, stepwise=FALSE)) 313 | 314 | caf_fit %>% pivot_longer(!Country, names_to = "Model name", 315 | values_to = "Orders") 316 | glance(caf_fit) %>% arrange(AICc) %>% select(.model:BIC) 317 | 318 | ## ----cafhidden, dependson='caf_fit', include=FALSE-------------------------------------------------------- 319 | best <- format((caf_fit %>% pull(search))[[1]]) 320 | stepwise <- format((caf_fit %>% pull(stepwise))[[1]]) 321 | stopifnot(best == "ARIMA(3,1,0)") 322 | 323 | ## ----cafres, dependson='caf_fit', fig.cap="Residual plots for the ARIMA(3,1,0) model."-------------------- 324 | caf_fit %>% 325 | select(search) %>% 326 | gg_tsresiduals() 327 | 328 | ## ----caf_lb, dependson='caf_fit'-------------------------------------------------------------------------- 329 | augment(caf_fit) %>% 330 | filter(.model=='search') %>% 331 | features(.innov, ljung_box, lag = 10, dof = 3) 332 | 333 | ## ----caffc, fig.cap="Forecasts for the Central African Republic Exports.", fig.asp=0.65, dependson="caf_fit"---- 334 | caf_fit %>% 335 | forecast(h=5) %>% 336 | filter(.model=='search') %>% 337 | autoplot(global_economy) 338 | 339 | ## ----armaroots, fig.cap="Inverse characteristic roots for the ARIMA(3,1,0) model fitted to the Central African Republic Exports.", fig.width=3.8, fig.asp=1, out.width="65%"---- 340 | gg_arma(caf_fit %>% select(Country, search)) 341 | 342 | ## ----usemployment1, fig.cap="Monthly US leisure and hospitality employment, 2000-2019.", fig.asp=0.45----- 343 | leisure <- us_employment %>% 344 | filter(Title == "Leisure and Hospitality", 345 | year(Month) > 2000) %>% 346 | mutate(Employed = Employed/1000) %>% 347 | select(Month, Employed) 348 | autoplot(leisure, Employed) + 349 | labs(title = "US employment: leisure and hospitality", 350 | y="Number of people (millions)") 351 | 352 | ## ----usemployment2, fig.cap="Seasonally differenced Monthly US leisure and hospitality employment.", fig.asp=0.55, warning=FALSE---- 353 | leisure %>% 354 | gg_tsdisplay(difference(Employed, 12), 355 | plot_type='partial', lag=36) + 356 | labs(title="Seasonally differenced", y="") 357 | 358 | ## ----usemployment3, fig.cap="Double differenced Monthly US leisure and hospitality employment.", fig.asp=0.55, warning=FALSE---- 359 | leisure %>% 360 | gg_tsdisplay(difference(Employed, 12) %>% difference(), 361 | plot_type='partial', lag=36) + 362 | labs(title = "Double differenced", y="") 363 | 364 | ## ----usemployment4---------------------------------------------------------------------------------------- 365 | fit <- leisure %>% 366 | model( 367 | arima012011 = ARIMA(Employed ~ pdq(0,1,2) + PDQ(0,1,1)), 368 | arima210011 = ARIMA(Employed ~ pdq(2,1,0) + PDQ(0,1,1)), 369 | auto = ARIMA(Employed, stepwise = FALSE, approx = FALSE) 370 | ) 371 | fit %>% pivot_longer(everything(), names_to = "Model name", 372 | values_to = "Orders") 373 | glance(fit) %>% arrange(AICc) %>% select(.model:BIC) 374 | 375 | ## ----conshidden, include=FALSE, dependson="usemployment4"------------------------------------------------- 376 | leisure_best <- format((fit %>% pull(auto))[[1]]) %>% 377 | stringr::str_replace("\\[12\\]","$_{12}$") 378 | 379 | ## ----usemployment5, fig.cap="(ref:usemployment5)", fig.asp=0.6, dependson="usemployment4"----------------- 380 | fit %>% select(auto) %>% gg_tsresiduals(lag=36) 381 | 382 | ## ----usemployment6, dependson="usemployment4"------------------------------------------------------------- 383 | augment(fit) %>% 384 | filter(.model == "auto") %>% 385 | features(.innov, ljung_box, lag=24, dof=4) 386 | 387 | ## ----usemployment7, fig.cap="(ref:usemployment7)", fig.asp=0.5, dependson="usemployment4"----------------- 388 | forecast(fit, h=36) %>% 389 | filter(.model=='auto') %>% 390 | autoplot(leisure) + 391 | labs(title = "US employment: leisure and hospitality", 392 | y="Number of people (millions)") 393 | 394 | ## ----h02, fig.cap="Corticosteroid drug sales in Australia (in millions of scripts per month). Logged data shown in bottom panel."---- 395 | h02 <- PBS %>% 396 | filter(ATC2 == "H02") %>% 397 | summarise(Cost = sum(Cost)/1e6) 398 | h02 %>% 399 | mutate(log(Cost)) %>% 400 | pivot_longer(-Month) %>% 401 | ggplot(aes(x = Month, y = value)) + 402 | geom_line() + 403 | facet_grid(name ~ ., scales = "free_y") + 404 | labs(y="", title="Corticosteroid drug scripts (H02)") 405 | 406 | ## ----h02b, fig.cap="Seasonally differenced corticosteroid drug sales in Australia (in millions of scripts per month).", warning=FALSE---- 407 | h02 %>% gg_tsdisplay(difference(log(Cost), 12), 408 | plot_type='partial', lag_max = 24) 409 | 410 | ## ----h02aicc, echo=FALSE---------------------------------------------------------------------------------- 411 | models <- list( 412 | c(3,0,0,2,1,0), 413 | c(3,0,1,2,1,0), 414 | c(3,0,2,2,1,0), 415 | c(3,0,1,1,1,0), 416 | c(3,0,1,0,1,1), 417 | c(3,0,1,0,1,2), 418 | c(3,0,1,1,1,1) 419 | ) 420 | library(purrr) 421 | model_defs <- map(models, ~ ARIMA(log(Cost) ~ 0 + pdq(!!.[1], !!.[2], !!.[3]) + PDQ(!!.[4], !!.[5], !!.[6]))) 422 | model_defs <- set_names(model_defs, map_chr(models, 423 | ~ sprintf("ARIMA(%i,%i,%i)(%i,%i,%i)$_{12}$", .[1], .[2], .[3], .[4], .[5], .[6]))) 424 | 425 | fit <- h02 %>% 426 | model(!!!model_defs) 427 | 428 | tab <- fit %>% 429 | glance %>% 430 | arrange(AICc) %>% 431 | select(.model, AICc) %>% 432 | knitr::kable(digits=2, row.names=FALSE, align='cc', booktabs=TRUE, escape = FALSE, 433 | col.names=c("Model","AICc"), 434 | caption="AICc values for various ARIMA models applied for H02 monthly script sales data.") 435 | 436 | if (html) { 437 | tab 438 | } else { 439 | tab <- gsub("\\\\centering", "\\\\centering\\\\vspace*{-0.3cm}", tab) 440 | gsub("\\\\end\\{tabular\\}", "\\\\end{tabular}\\\\vspace*{0.0cm}", tab) 441 | } 442 | 443 | ## ----checkclaimh02, echo=FALSE, warning=FALSE------------------------------------------------------------- 444 | if(glance(fit)$.model[which.min(glance(fit)$AICc)] != "ARIMA(3,0,1)(0,1,2)$_{12}$") 445 | stop("Not best model") 446 | 447 | ## ----h02res, fig.cap="Innovation residuals from the ARIMA(3,0,1)(0,1,2)$_{12}$ model applied to the H02 monthly script sales data."---- 448 | fit <- h02 %>% 449 | model(ARIMA(log(Cost) ~ 0 + pdq(3,0,1) + PDQ(0,1,2))) 450 | fit %>% gg_tsresiduals(lag_max=36) 451 | 452 | ## ----h02resb---------------------------------------------------------------------------------------------- 453 | augment(fit) %>% 454 | features(.innov, ljung_box, lag = 36, dof = 6) 455 | 456 | ## ----autoh02, echo=FALSE, warning=FALSE------------------------------------------------------------------- 457 | manual <- gsub("[<>]","",format(fit[[1]])[[1]]) 458 | manual <- gsub("[","$_{",manual, fixed=TRUE) 459 | manual <- gsub("]","}$",manual, fixed=TRUE) 460 | autoh2 <- gsub("[<>]","",format(model(h02, ARIMA(log(Cost)))[[1]])[[1]]) 461 | autoh2 <- gsub("[","$_{",autoh2, fixed=TRUE) 462 | autoh2 <- gsub("]","}$",autoh2, fixed=TRUE) 463 | besth2 <- gsub("[<>]","",format(model(h02, ARIMA(log(Cost), stepwise=FALSE, approximation=FALSE))[[1]])[[1]]) 464 | besth2 <- gsub("[","$_{",besth2, fixed=TRUE) 465 | besth2 <- gsub("]","}$",besth2, fixed=TRUE) 466 | 467 | ## ----h02search, echo=FALSE, warning=FALSE----------------------------------------------------------------- 468 | models <- list( 469 | c(3,0,0,2,1,0), 470 | c(3,0,1,2,1,0), 471 | c(3,0,2,2,1,0), 472 | c(3,0,1,1,1,0), 473 | c(3,0,1,0,1,1), 474 | c(3,0,1,0,1,2), 475 | c(3,0,1,0,1,3), 476 | c(3,0,1,1,1,1), 477 | c(3,0,3,0,1,1), 478 | c(3,0,2,0,1,1), 479 | #c(2,1,0,0,1,0), 480 | c(2,1,0,0,1,1), 481 | c(2,1,0,1,1,0), 482 | c(2,1,1,0,1,1), 483 | c(2,1,2,0,1,1), 484 | c(2,1,3,0,1,1), 485 | c(2,1,4,0,1,1)) 486 | 487 | model_defs <- map(models, ~ ARIMA(log(Cost) ~ 0 + pdq(!!.[1], !!.[2], !!.[3]) + PDQ(!!.[4], !!.[5], !!.[6]))) 488 | model_defs <- set_names(model_defs, map_chr(models, 489 | ~ sprintf("ARIMA(%i,%i,%i)(%i,%i,%i)$_{12}$", .[1], .[2], .[3], .[4], .[5], .[6]))) 490 | 491 | # WARNING generated by following code indicating convergence issue. 492 | fit <- h02 %>% 493 | filter_index(~ "2006 Jun") %>% 494 | model(!!!model_defs) 495 | 496 | rmse_table <- fit %>% 497 | forecast(h = "2 years") %>% 498 | accuracy(h02 %>% filter_index("2006 Jul" ~ .)) %>% 499 | arrange(RMSE) %>% 500 | select(.model, RMSE) 501 | # Check manual model is still second 502 | stopifnot(rmse_table$.model[2] == manual) 503 | 504 | tab <- rmse_table %>% 505 | knitr::kable(escape=FALSE, 506 | digits=4, row.names=FALSE, align='cc', booktabs=TRUE, 507 | caption="RMSE values for various ARIMA models applied for H02 monthly script sales data over test set July 2006 -- June 2008 .") 508 | 509 | if (html) { 510 | tab 511 | } else { 512 | tab <- gsub("\\\\centering", "\\\\centering\\\\vspace*{-0.3cm}\\\\small", tab) 513 | gsub("\\\\end\\{tabular\\}", "\\\\end{tabular}\\\\vspace*{0.3cm}", tab) 514 | } 515 | 516 | ## ----h02f, fig.cap="Forecasts from the ARIMA(3,0,1)(0,1,2)$_{12}$ model applied to the H02 monthly script sales data.", fig.asp=0.5---- 517 | h02 %>% 518 | model(ARIMA(log(Cost) ~ 0 + pdq(3,0,1) + PDQ(0,1,2))) %>% 519 | forecast() %>% 520 | autoplot(h02) + 521 | labs(y=" $AU (millions)", 522 | title="Corticosteroid drug scripts (H02) sales") 523 | 524 | ## ----venn, fig.cap="The ETS and ARIMA model classes overlap with the additive ETS models having equivalent ARIMA forms.", echo=FALSE, message=FALSE, warning=FALSE, out.width="90%"---- 525 | library(latex2exp) 526 | cols = c(ets = "#D55E00", arima = "#0072b2") 527 | tibble( 528 | x = c(-0.866, 0.866), 529 | y = c(-0.5, -0.5), 530 | labels = c("ets", "arima"), 531 | ) %>% 532 | ggplot(aes(colour = labels, fill=labels)) + 533 | ggforce::geom_circle(aes(x0 = x, y0 = y, r = 1.5), alpha = 0.3, size = 1) + 534 | scale_colour_manual(values=cols) + scale_fill_manual(values=cols) + 535 | coord_fixed() + guides(fill = "none") + 536 | geom_text(aes(label = "ETS models", x = -1.5, y = 1.15), col = cols["ets"], fontface = "bold", size=6) + 537 | geom_text(aes(label = "Combination\n of components", x = -1.5, y = 0.3), col = cols["ets"], size=4) + 538 | geom_text(aes(label = "9 non-additive\n ETS models", x = -1.5, y = -0.6), col = cols["ets"], size=4) + 539 | geom_text(aes(label = "All ETS models\n with M components", x = -.95, y = -1.6), col = cols["ets"], size=4) + 540 | geom_text(aes(label = "ARIMA models", x = 1.5, y = 1.15), col = cols["arima"], fontface = "bold", size=6) + 541 | geom_text(aes(label = "Modelling\n autocorrelations", x = 1.5, y = 0.3), col = cols["arima"], size=4) + 542 | annotate("text", label = TeX("Potentially $\\infty$ models"), x = 1.5, y = -0.6, col = cols["arima"], size=4) + 543 | geom_text(aes(label = "All stationary models\n Many large models", x = 1.01, y = -1.6), col = cols["arima"], size=4) + 544 | geom_text(aes(label = "6 additive\n ETS models", x = 0, y = -0.6), col = "#6b6859", size=4) + 545 | guides(colour = "none", fill = "none") + theme_void() 546 | 547 | ## ----etsarima, echo=FALSE--------------------------------------------------------------------------------- 548 | tab <- data.frame( 549 | etsmodel = c( 550 | "ETS(A,N,N)", 551 | "ETS(A,A,N)","", 552 | "ETS(A,A$_d$,N)","","", 553 | "ETS(A,N,A)", 554 | "ETS(A,A,A)", 555 | "ETS(A,A$_d$,A)"), 556 | arimamodel = c( 557 | "ARIMA(0,1,1)", 558 | "ARIMA(0,2,2)","", 559 | "ARIMA(1,1,2)","","", 560 | "ARIMA(0,1,$m$)(0,1,0)$_m$", 561 | "ARIMA(0,1,$m+1$)(0,1,0)$_m$", 562 | "ARIMA(1,0,$m+1$)(0,1,0)$_m$"), 563 | parameters = c( 564 | "$\\theta_1=\\alpha-1$", 565 | "$\\theta_1=\\alpha+\\beta-2$", 566 | "$\\theta_2=1-\\alpha$", 567 | "$\\phi_1=\\phi$", 568 | "$\\theta_1=\\alpha+\\phi\\beta-1-\\phi$", 569 | "$\\theta_2=(1-\\alpha)\\phi$", 570 | "","","")) 571 | if(html) { 572 | out <- knitr::kable(tab, format='html', col.names=c("ETS model","ARIMA model", "Parameters"), booktabs=TRUE, escape=FALSE, caption="Equivalence relationships between ETS and ARIMA models.") %>% 573 | kableExtra::row_spec(c(1,4:6,8), background="#f6f6f6") %>% 574 | kableExtra::row_spec(c(2:3,7,9), background="#e6e6e6") 575 | out <- gsub("","",out) 576 | out 577 | } else { 578 | tab <- knitr::kable(tab, 579 | format='latex', 580 | col.names=c("ETS model","ARIMA model", "Parameters"), 581 | booktabs=TRUE, 582 | escape=FALSE, 583 | caption="Equivalence relationships between ETS and ARIMA models.") 584 | tab <- gsub("\\\\centering", "\\\\centering\\\\vspace*{-0.5cm}", tab) 585 | gsub("\\\\end\\{tabular\\}", "\\\\end{tabular}\\\\vspace*{0.3cm}", tab) 586 | } 587 | 588 | ## ----tscvpop, echo=TRUE, warning=FALSE-------------------------------------------------------------------- 589 | aus_economy <- global_economy %>% 590 | filter(Code == "AUS") %>% 591 | mutate(Population = Population/1e6) 592 | 593 | aus_economy %>% 594 | slice(-n()) %>% 595 | stretch_tsibble(.init = 10) %>% 596 | model( 597 | ETS(Population), 598 | ARIMA(Population) 599 | ) %>% 600 | forecast(h = 1) %>% 601 | accuracy(aus_economy) %>% 602 | select(.model, RMSE:MAPE) 603 | 604 | ## ----popetsplot, echo=TRUE, fig.cap="Forecasts from an ETS model fitted to the Australian population.", fig.asp=0.55---- 605 | aus_economy %>% 606 | model(ETS(Population)) %>% 607 | forecast(h = "5 years") %>% 608 | autoplot(aus_economy %>% filter(Year >= 2000)) + 609 | labs(title = "Australian population", 610 | y = "People (millions)") 611 | 612 | ## ----qcement1, echo=TRUE---------------------------------------------------------------------------------- 613 | cement <- aus_production %>% 614 | select(Cement) %>% 615 | filter_index("1988 Q1" ~ .) 616 | train <- cement %>% filter_index(. ~ "2007 Q4") 617 | 618 | ## ----qcement2, echo=TRUE, fig.cap="Residual diagnostic plots for the ARIMA model fitted to the quarterly cement production training data.", dependson="qcement1"---- 619 | fit_arima <- train %>% model(ARIMA(Cement)) 620 | report(fit_arima) 621 | fit_arima %>% gg_tsresiduals(lag_max = 16) 622 | 623 | ## ----qcement2b, echo=TRUE, dependson="qcement1"----------------------------------------------------------- 624 | augment(fit_arima) %>% 625 | features(.innov, ljung_box, lag = 16, dof = 6) 626 | 627 | ## ----qcement3, echo=TRUE, fig.cap="Residual diagnostic plots for the ETS model fitted to the quarterly cement production training data.", dependson="qcement1"---- 628 | fit_ets <- train %>% model(ETS(Cement)) 629 | report(fit_ets) 630 | fit_ets %>% 631 | gg_tsresiduals(lag_max = 16) 632 | 633 | ## ----qcement3b, echo=TRUE, dependson="qcement1"----------------------------------------------------------- 634 | augment(fit_ets) %>% 635 | features(.innov, ljung_box, lag = 16, dof = 6) 636 | 637 | ## ----qcement4, echo=TRUE, dependson=c("qcement2","qcement3")---------------------------------------------- 638 | # Generate forecasts and compare accuracy over the test set 639 | bind_rows( 640 | fit_arima %>% accuracy(), 641 | fit_ets %>% accuracy(), 642 | fit_arima %>% forecast(h = 10) %>% accuracy(cement), 643 | fit_ets %>% forecast(h = 10) %>% accuracy(cement) 644 | ) %>% 645 | select(-ME, -MPE, -ACF1) 646 | 647 | ## ----qcement5, echo=TRUE, fig.cap="Forecasts from an ARIMA model fitted to all of the available quarterly cement production data since 1988.", dependson="qcement1", fig.asp=0.45---- 648 | cement %>% 649 | model(ARIMA(Cement)) %>% 650 | forecast(h="3 years") %>% 651 | autoplot(cement) + 652 | labs(title = "Cement production in Australia", 653 | y = "Tonnes ('000)") 654 | 655 | ## ----wnacfplus, fig.asp=0.3, echo=FALSE, fig.cap="Left: ACF for a white noise series of 36 numbers. Middle: ACF for a white noise series of 360 numbers. Right: ACF for a white noise series of 1,000 numbers.", fig.env="figure*"---- 656 | set.seed(1) 657 | x1 <- tsibble(idx = seq_len(36), y = rnorm(36), index = idx) 658 | x2 <- tsibble(idx = seq_len(360), y = rnorm(360), index = idx) 659 | x3 <- tsibble(idx = seq_len(1000), y = rnorm(1000), index = idx) 660 | p1 <- x1 %>% ACF(y, lag_max = 20) %>% autoplot() + ylim(c(-1, 1)) + labs(x="lag") 661 | p2 <- x2 %>% ACF(y, lag_max = 20) %>% autoplot() + ylim(c(-1, 1)) + labs(x="lag") 662 | p3 <- x3 %>% ACF(y, lag_max = 20) %>% autoplot() + ylim(c(-1, 1)) + labs(x="lag") 663 | p1 | p2 | p3 664 | 665 | ## ----ex9.6------------------------------------------------------------------------------------------------ 666 | y <- numeric(100) 667 | e <- rnorm(100) 668 | for(i in 2:100) 669 | y[i] <- 0.6*y[i-1] + e[i] 670 | sim <- tsibble(idx = seq_len(100), y = y, index = idx) 671 | 672 | ## ----hares, echo=FALSE, warning=FALSE, message=FALSE------------------------------------------------------ 673 | pelt_table <- pelt %>% 674 | tail(5) %>% 675 | select(Year, Hare) 676 | tab <- as.data.frame(matrix(c(NA,pelt_table$Hare), nrow=1)) 677 | colnames(tab) <- c("Year",pelt_table$Year) 678 | tab[1,1] <- "Number of hare pelts" 679 | tab %>% 680 | knitr::kable(booktabs=TRUE) 681 | 682 | fit <- pelt %>% model(ARIMA(Hare ~ pdq(4,0,0))) 683 | coef <- rlang::set_names(tidy(fit)$estimate, tidy(fit)$term) 684 | constant <- coef['constant'] 685 | phi1 <- coef['ar1'] 686 | phi2 <- coef['ar2'] 687 | phi3 <- coef['ar3'] 688 | phi4 <- coef['ar4'] 689 | 690 | ## ----swisspop, echo=FALSE, warning=FALSE, message=FALSE--------------------------------------------------- 691 | swiss_pop <- global_economy %>% 692 | filter(Country=="Switzerland") %>% 693 | tail(5) %>% 694 | select(Year, Population) %>% 695 | mutate(Population = Population/1e6) 696 | tab <- as.data.frame(matrix(c(NA,swiss_pop$Population), nrow=1)) 697 | colnames(tab) <- c("Year",swiss_pop$Year) 698 | tab[1,1] <- "Population (millions)" 699 | tab %>% 700 | knitr::kable(digits=2, booktabs=TRUE) 701 | fit <- global_economy %>% 702 | filter(Country=="Switzerland") %>% 703 | model(ARIMA(Population/1e6 ~ 1 + pdq(3,1,0))) 704 | coef <- rlang::set_names(tidy(fit)$estimate, tidy(fit)$term) 705 | phi1 <- coef['ar1'] 706 | phi2 <- coef['ar2'] 707 | phi3 <- coef['ar3'] 708 | intercept <- coef['constant'] 709 | 710 | -------------------------------------------------------------------------------- /10-dynamic-regression.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----usconsump, fig.cap="Percentage changes in quarterly personal consumption expenditure and personal disposable income for the USA, 1970 Q1 to 2019 Q2.", fig.asp=0.55---- 4 | us_change %>% 5 | pivot_longer(c(Consumption, Income), 6 | names_to = "var", values_to = "value") %>% 7 | ggplot(aes(x = Quarter, y = value)) + 8 | geom_line() + 9 | facet_grid(vars(var), scales = "free_y") + 10 | labs(title = "US consumption and personal income", 11 | y = "Quarterly % change") 12 | 13 | ## ----usconsump2, fig.cap="Residuals ($e_t$) obtained from a regression of change in consumption expenditure on change in disposable income, assuming an ARIMA(1,0,2) error model."---- 14 | fit <- us_change %>% 15 | model(ARIMA(Consumption ~ Income)) 16 | report(fit) 17 | 18 | ## ----usconsumpparam, echo=FALSE, dependson="usconsump2"--------------------------------------------------- 19 | coef <- rlang::set_names(tidy(fit)$estimate, tidy(fit)$term) 20 | phi1 <- coef["ar1"] 21 | theta1 <- coef["ma1"] 22 | theta2 <- coef["ma2"] 23 | intercept <- coef["intercept"] 24 | slope <- coef["Income"] 25 | sigma2 <- glance(fit)$sigma2 26 | 27 | ## ----usconsumpres, fig.cap="Regression residuals ($\\eta_t$) and ARIMA residuals ($\\varepsilon_t$) from the fitted model.", fig.asp=0.55, dependson="usconsump2"---- 28 | bind_rows( 29 | `Regression residuals` = 30 | as_tibble(residuals(fit, type = "regression")), 31 | `ARIMA residuals` = 32 | as_tibble(residuals(fit, type = "innovation")), 33 | .id = "type" 34 | ) %>% 35 | mutate( 36 | type = factor(type, levels=c( 37 | "Regression residuals", "ARIMA residuals")) 38 | ) %>% 39 | ggplot(aes(x = Quarter, y = .resid)) + 40 | geom_line() + 41 | facet_grid(vars(type)) 42 | 43 | ## ----digits, echo=FALSE----------------------------------------------------------------------------------- 44 | options(digits = 5) 45 | 46 | ## ----usconsumpres2, fig.cap="The innovation residuals (i.e., the estimated ARIMA errors) are not significantly different from white noise.", dependson=c('usconsump2','digits'), class.output='r', dependson="usconsump2"---- 47 | fit %>% gg_tsresiduals() 48 | 49 | ## ----usconsumpres3, dependson="usconsump2"---------------------------------------------------------------- 50 | augment(fit) %>% 51 | features(.innov, ljung_box, dof = 5, lag = 8) 52 | 53 | ## ----usconsump3, fig.cap="Forecasts obtained from regressing the percentage change in consumption expenditure on the percentage change in disposable income, with an ARIMA(1,0,2) error model.", fig.asp=0.5, dependson='usconsump2'---- 54 | us_change_future <- new_data(us_change, 8) %>% 55 | mutate(Income = mean(us_change$Income)) 56 | forecast(fit, new_data = us_change_future) %>% 57 | autoplot(us_change) + 58 | labs(y = "Percentage change") 59 | 60 | ## ----elecscatter, echo=TRUE, fig.cap="Daily electricity demand versus maximum daily temperature for the state of Victoria in Australia for 2014.", fig.asp=0.75---- 61 | vic_elec_daily <- vic_elec %>% 62 | filter(year(Time) == 2014) %>% 63 | index_by(Date = date(Time)) %>% 64 | summarise( 65 | Demand = sum(Demand) / 1e3, 66 | Temperature = max(Temperature), 67 | Holiday = any(Holiday) 68 | ) %>% 69 | mutate(Day_Type = case_when( 70 | Holiday ~ "Holiday", 71 | wday(Date) %in% 2:6 ~ "Weekday", 72 | TRUE ~ "Weekend" 73 | )) 74 | 75 | vic_elec_daily %>% 76 | ggplot(aes(x = Temperature, y = Demand, colour = Day_Type)) + 77 | geom_point() + 78 | labs(y = "Electricity demand (GW)", 79 | x = "Maximum daily temperature") 80 | 81 | ## ----electime, fig.cap="Daily electricity demand and maximum daily temperature for the state of Victoria in Australia for 2014."---- 82 | vic_elec_daily %>% 83 | pivot_longer(c(Demand, Temperature)) %>% 84 | ggplot(aes(x = Date, y = value)) + 85 | geom_line() + 86 | facet_grid(name ~ ., scales = "free_y") + ylab("") 87 | 88 | ## ----elecdailyfit, fig.cap="Residuals diagnostics for a dynamic regression model for daily electricity demand with workday and quadratic temperature effects."---- 89 | fit <- vic_elec_daily %>% 90 | model(ARIMA(Demand ~ Temperature + I(Temperature^2) + 91 | (Day_Type == "Weekday"))) 92 | fit %>% gg_tsresiduals() 93 | augment(fit) %>% 94 | features(.innov, ljung_box, dof = 9, lag = 14) 95 | 96 | ## ----checkelecfit, include=FALSE, dependson="elecdailyfit"------------------------------------------------ 97 | ncoef <- fit %>% tidy() %>% NROW() 98 | if(ncoef != 9L) 99 | stop("dof incorrect") 100 | 101 | ## ----elecdailyfc, fig.cap="Forecasts from the dynamic regression model for daily electricity demand. All future temperatures have been set to 26 degrees, and the working day dummy variable has been set to known future values.", dependson='elecdailyfit'---- 102 | vic_elec_future <- new_data(vic_elec_daily, 14) %>% 103 | mutate( 104 | Temperature = 26, 105 | Holiday = c(TRUE, rep(FALSE, 13)), 106 | Day_Type = case_when( 107 | Holiday ~ "Holiday", 108 | wday(Date) %in% 2:6 ~ "Weekday", 109 | TRUE ~ "Weekend" 110 | ) 111 | ) 112 | forecast(fit, vic_elec_future) %>% 113 | autoplot(vic_elec_daily) + 114 | labs(title="Daily electricity demand: Victoria", 115 | y="GW") 116 | 117 | ## ----austa, fig.cap="Total annual passengers (in millions) for Australian air carriers, 1970--2016.", fig.asp=0.5---- 118 | aus_airpassengers %>% 119 | autoplot(Passengers) + 120 | labs(y = "Passengers (millions)", 121 | title = "Total annual air passengers") 122 | 123 | ## ----deterministictrend----------------------------------------------------------------------------------- 124 | fit_deterministic <- aus_airpassengers %>% 125 | model(deterministic = ARIMA(Passengers ~ 1 + trend() + 126 | pdq(d = 0))) 127 | report(fit_deterministic) 128 | 129 | ## ----austaparams, echo=FALSE, dependson='deterministictrend'---------------------------------------------- 130 | coef <- rlang::set_names(tidy(fit_deterministic)$estimate, tidy(fit_deterministic)$term) 131 | phi1 <- coef["ar1"] 132 | intercept <- coef["intercept"] 133 | slope <- coef["trend()"] 134 | sigma2 <- glance(fit_deterministic)$sigma2 135 | 136 | ## ----stochastictrend-------------------------------------------------------------------------------------- 137 | fit_stochastic <- aus_airpassengers %>% 138 | model(stochastic = ARIMA(Passengers ~ pdq(d = 1))) 139 | report(fit_stochastic) 140 | 141 | ## ----austaparams2, echo=FALSE, dependson='stochastictrend'------------------------------------------------ 142 | coef <- rlang::set_names(tidy(fit_stochastic)$estimate, tidy(fit_stochastic)$term) 143 | drift <- coef["constant"] 144 | sigma2 <- glance(fit_stochastic)$sigma2 145 | 146 | ## ----austaf, fig.cap="Forecasts of annual passengers for Australian air carriers using a deterministic trend model (orange) and a stochastic trend model (blue).", message=FALSE,dependson=c("deterministictrend","stochastictrend")---- 147 | aus_airpassengers %>% 148 | autoplot(Passengers) + 149 | autolayer(fit_stochastic %>% forecast(h = 20), 150 | colour = "#0072B2", level = 95) + 151 | autolayer(fit_deterministic %>% forecast(h = 20), 152 | colour = "#D55E00", alpha = 0.65, level = 95) + 153 | labs(y = "Air passengers (millions)", 154 | title = "Forecasts from trend models") 155 | 156 | ## ----eatout, fig.width=10, fig.asp=0.8,fig.cap="Using Fourier terms and ARIMA errors for forecasting monthly expenditure on eating out in Australia.", fig.env="figure*"---- 157 | aus_cafe <- aus_retail %>% 158 | filter( 159 | Industry == "Cafes, restaurants and takeaway food services", 160 | year(Month) %in% 2004:2018 161 | ) %>% 162 | summarise(Turnover = sum(Turnover)) 163 | 164 | fit <- model(aus_cafe, 165 | `K = 1` = ARIMA(log(Turnover) ~ fourier(K=1) + PDQ(0,0,0)), 166 | `K = 2` = ARIMA(log(Turnover) ~ fourier(K=2) + PDQ(0,0,0)), 167 | `K = 3` = ARIMA(log(Turnover) ~ fourier(K=3) + PDQ(0,0,0)), 168 | `K = 4` = ARIMA(log(Turnover) ~ fourier(K=4) + PDQ(0,0,0)), 169 | `K = 5` = ARIMA(log(Turnover) ~ fourier(K=5) + PDQ(0,0,0)), 170 | `K = 6` = ARIMA(log(Turnover) ~ fourier(K=6) + PDQ(0,0,0)) 171 | ) 172 | 173 | fit %>% 174 | forecast(h = "2 years") %>% 175 | autoplot(aus_cafe, level = 95) + 176 | facet_wrap(vars(.model), ncol = 2) + 177 | guides(colour = "none", fill = "none", level = "none") + 178 | geom_label( 179 | aes(x = yearmonth("2007 Jan"), y = 4250, 180 | label = paste0("AICc = ", format(AICc))), 181 | data = glance(fit) 182 | ) + 183 | labs(title= "Total monthly eating-out expenditure", 184 | y="$ billions") 185 | 186 | ## ----tvadvert, fig.cap="Numbers of insurance quotations provided per month and the expenditure on advertising per month."---- 187 | insurance %>% 188 | pivot_longer(Quotes:TVadverts) %>% 189 | ggplot(aes(x = Month, y = value)) + 190 | geom_line() + 191 | facet_grid(vars(name), scales = "free_y") + 192 | labs(y = "", title = "Insurance advertising and quotations") 193 | 194 | ## ----tvadvert2, dependson='tdadvert'---------------------------------------------------------------------- 195 | fit <- insurance %>% 196 | # Restrict data so models use same fitting period 197 | mutate(Quotes = c(NA, NA, NA, Quotes[4:40])) %>% 198 | # Estimate models 199 | model( 200 | lag0 = ARIMA(Quotes ~ pdq(d = 0) + TVadverts), 201 | lag1 = ARIMA(Quotes ~ pdq(d = 0) + 202 | TVadverts + lag(TVadverts)), 203 | lag2 = ARIMA(Quotes ~ pdq(d = 0) + 204 | TVadverts + lag(TVadverts) + 205 | lag(TVadverts, 2)), 206 | lag3 = ARIMA(Quotes ~ pdq(d = 0) + 207 | TVadverts + lag(TVadverts) + 208 | lag(TVadverts, 2) + lag(TVadverts, 3)) 209 | ) 210 | 211 | ## ----tvadvertaicc, dependson="tvadvert2"------------------------------------------------------------------ 212 | glance(fit) 213 | 214 | ## ----tvfitcheck, echo = FALSE, dependson='tdadvert2'------------------------------------------------------ 215 | # Check AICc order 216 | if (sort.int(glance(fit)$AICc, index.return = TRUE)$ix[1] != 2) { 217 | stop("TV model not correct") 218 | } 219 | 220 | ## ----tvadvert3, dependson="tvadvert"---------------------------------------------------------------------- 221 | fit_best <- insurance %>% 222 | model(ARIMA(Quotes ~ pdq(d = 0) + 223 | TVadverts + lag(TVadverts))) 224 | report(fit_best) 225 | 226 | ## ----tvadvertparam, echo=FALSE, dependson='tdadvert3'----------------------------------------------------- 227 | # Check model 228 | tidy_fit <- tidy(fit_best) 229 | if (!identical(tidy_fit$term[1:3], c("ar1", "ma1", "ma2")) | NROW(tidy_fit) != 6L) { 230 | stop("Not an ARMA(1,2) model") 231 | } 232 | # Store coefficients 233 | coef <- rlang::set_names(tidy_fit$estimate, tidy_fit$term) 234 | phi1 <- coef["ar1"] 235 | ma1 <- coef["ma1"] 236 | ma2 <- coef["ma2"] 237 | intercept <- coef["intercept"] 238 | gamma0 <- coef["TVadverts"] 239 | gamma1 <- coef["lag(TVadverts)"] 240 | 241 | ## ----tvadvertf8, fig.cap="Forecasts of monthly insurance quotes, assuming that the future advertising expenditure is 8 units in each future month.", dependson='tvadvert3', fig.asp=0.55---- 242 | insurance_future <- new_data(insurance, 20) %>% 243 | mutate(TVadverts = 8) 244 | fit_best %>% 245 | forecast(insurance_future) %>% 246 | autoplot(insurance) + 247 | labs( 248 | y = "Quotes", 249 | title = "Forecast quotes with future advertising set to 8" 250 | ) 251 | 252 | -------------------------------------------------------------------------------- /11-hierarchical-forecasting.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----HierTree, echo=FALSE, fig.cap="A two level hierarchical tree diagram.", message=FALSE, warning=FALSE, fig.show = "hold",fig.height=6.2,fig.width=11, out.width="40%"---- 4 | if (html) { 5 | knitr::include_graphics("figs/hts.png") 6 | } else { 7 | g <- igraph::graph_from_literal(Total - -A:B, A - -AA:AB:AC, B - -BA:BB) 8 | layout <- igraph::layout_as_tree(g, root = "Total") 9 | igraph::V(g)$color <- c( 10 | "Thistle", "GreenYellow", "LightBlue", 11 | rep("GreenYellow", 3), rep("LightBlue", 2) 12 | ) 13 | igraph::V(g)$label.cex <- 2 14 | plot(g, layout = layout, vertex.size = 40) 15 | } 16 | 17 | ## ----aus-states-tab, echo=FALSE--------------------------------------------------------------------------- 18 | tab <- data.frame( 19 | State = c("Australian Capital Territory", "New South Wales", "Northern Territory", "Queensland", "South Australia", "Tasmania", "Victoria", "Western Australia"), 20 | Region = c("Canberra", "Blue Mountains, Capital Country, Central Coast, Central NSW, Hunter, New England North West, North Coast NSW, Outback NSW, Riverina, Snowy Mountains, South Coast, Sydney, The Murray.", "Alice Springs, Barkly, Darwin, Kakadu Arnhem, Katherine Daly, Lasseter, MacDonnell.", "Brisbane, Bundaberg, Central Queensland, Darling Downs, Fraser Coast, Gold Coast, Mackay, Northern Outback, Sunshine Coast, Tropical North Queensland, Whitsundays.", "Adelaide, Adelaide Hills, Barossa, Clare Valley, Eyre Peninsula, Fleurieu Peninsula, Flinders Ranges and Outback, Kangaroo Island, Limestone Coast, Murraylands, Riverland, Yorke Peninsula.", "East Coast, Hobart and the South, Launceston Tamar and the North, North West, Wilderness West.", "Ballarat, Bendigo Loddon, Central Highlands, Central Murray, Geelong and the Bellarine, Gippsland, Goulburn, Great Ocean Road, High Country, Lakes, Macedon, Mallee, Melbourne, Melbourne East, Murray East, Peninsula, Phillip Island, Spa Country, Upper Yarra, Western Grampians, Wimmera.", "Australia's Coral Coast, Australia's Golden Outback, Australia's North West, Australia's South West, Experience Perth.") 21 | ) %>% 22 | knitr::kable(caption = "Australian tourism regions.", 23 | booktabs = TRUE, 24 | longtable = FALSE, 25 | align = if(html){c("l","l")}else{c("l","p{8.5cm}")}, 26 | format = if_else(html,"html","latex") 27 | ) 28 | if (!html) { 29 | tab <- gsub("\\\\centering","\\\\vspace*{0.8cm}\\\\centering\\\\small",tab) 30 | tab <- gsub("\\[t\\]","\\[!ht\\]",tab) 31 | } 32 | tab 33 | 34 | ## ----recode----------------------------------------------------------------------------------------------- 35 | tourism <- tsibble::tourism %>% 36 | mutate(State = recode(State, 37 | `New South Wales` = "NSW", 38 | `Northern Territory` = "NT", 39 | `Queensland` = "QLD", 40 | `South Australia` = "SA", 41 | `Tasmania` = "TAS", 42 | `Victoria` = "VIC", 43 | `Western Australia` = "WA" 44 | )) 45 | 46 | ## ----nested, echo=TRUE------------------------------------------------------------------------------------ 47 | tourism_hts <- tourism %>% 48 | aggregate_key(State / Region, Trips = sum(Trips)) 49 | tourism_hts 50 | 51 | ## ----tourismStates, fig.width=9, fig.asp=0.7, fig.cap="Domestic overnight trips from 1998 Q1 to 2017 Q4 aggregated by state.", warning=FALSE, message=FALSE, echo=TRUE---- 52 | tourism_hts %>% 53 | filter(is_aggregated(Region)) %>% 54 | autoplot(Trips) + 55 | labs(y = "Trips ('000)", 56 | title = "Australian tourism: national and states") + 57 | facet_wrap(vars(State), scales = "free_y", ncol = 3) + 58 | theme(legend.position = "none") 59 | 60 | ## ----seasonStates, echo=FALSE, fig.cap="Seasonal plots for overnight trips for Queensland and the Northern Territory, and Victoria and Tasmania highlighting the contrast in seasonal patterns between northern and southern states in Australia.", fig.asp=0.5, fig.width=7, out.width="80%", message=FALSE, warning=FALSE---- 61 | tourism_hts %>% 62 | filter(State == "NT" | State == "QLD" | 63 | State == "TAS" | State == "VIC", is_aggregated(Region)) %>% 64 | select(-Region) %>% 65 | mutate(State = factor(State, levels=c("QLD","VIC","NT","TAS"))) %>% 66 | gg_season(Trips) + 67 | facet_wrap(vars(State), nrow = 2, scales = "free_y")+ 68 | labs(y = "Trips ('000)") 69 | 70 | ## ----tourismRegions, echo=FALSE, fig.asp=0.6, fig.cap="Domestic overnight trips from 1998 Q1 to 2017 Q4 for some selected regions.", fig.width=9, message=FALSE, warning=FALSE---- 71 | tourism_hts %>% 72 | filter(vctrs::vec_in(Region, c("North Coast NSW", "Snowy Mountains", "Hunter", "New England North West", "Alice Springs", "Darwin", "Kakadu Arnhem", "MacDonnell", "Brisbane", "Gold Coast", "Northern Outback", "Sunshine Coast", "Tropical North Queensland", "Adelaide Hills", "Murraylands", "Yorke Peninsula", "Kangaroo Island", "Ballarat", "Great Ocean Road", "High Country", "Goulburn", "Australia's Coral Coast", "Australia's Golden Outback", "Australia's North West", "Australia's North West"))) %>% 73 | autoplot() + 74 | facet_wrap(State ~ ., scales = "free_y", ncol = 3) + 75 | labs(y = "Trips ('000)", 76 | title = "Australian tourism: by regions nested within states") + 77 | theme(legend.position = "none") 78 | 79 | ## ----GroupTree, echo=FALSE, fig.cap="Alternative representations of a two level grouped structure.", out.width="60%", fig.show = "hold",fig.height=8,fig.width=14---- 80 | par(mfrow = c(1, 2)) 81 | g <- igraph::graph_from_literal(Total - -A:B, A - -AX:AY, B - -BX:BY) 82 | layout <- igraph::layout_as_tree(g, root = "Total") 83 | igraph::V(g)$color <- c( 84 | "Thistle", "GreenYellow", "LightBlue", 85 | rep("GreenYellow", 2), rep("LightBlue", 2) 86 | ) 87 | igraph::V(g)$label.cex <- 3 88 | plot(g, layout = layout, vertex.size = 54) 89 | 90 | g2 <- igraph::graph_from_literal(Total - -X:Y, X - -AX:BX, Y - -AY:BY) 91 | layout2 <- igraph::layout_as_tree(g2, root = "Total") 92 | igraph::V(g2)$color <- c( 93 | "Thistle", "GreenYellow", "LightBlue", 94 | rep("GreenYellow", 2), rep("LightBlue", 2) 95 | ) 96 | igraph::V(g2)$label.cex <- 3 97 | plot(g2, layout = layout2, vertex.size = 54) 98 | par(mfrow = c(1, 1)) 99 | 100 | ## ----prisongts, fig.width=9, fig.asp = .7, echo=FALSE, fig.cap="Total Australian quarterly adult prison population, disaggregated by state, by legal status, and by gender.", warning=FALSE, message=FALSE, fig.pos="b", fig.env="figure*"---- 101 | prison <- readr::read_csv("https://OTexts.com/fpp3/extrafiles/prison_population.csv") %>% 102 | mutate(Quarter = yearquarter(Date)) %>% 103 | select(-Date) %>% 104 | as_tsibble(key = c(Gender, Legal, State, Indigenous), 105 | index = Quarter) %>% 106 | relocate(Quarter) 107 | 108 | prison_gts <- prison %>% 109 | aggregate_key(Gender * Legal * State, Count = sum(Count) / 1e3) 110 | 111 | p1 <- prison_gts %>% 112 | filter( 113 | is_aggregated(Gender), 114 | is_aggregated(Legal), 115 | is_aggregated(State) 116 | ) %>% 117 | autoplot(Count) + 118 | labs(y = "Number of prisoners ('000)", 119 | title = "Prison population: Total") 120 | 121 | p2 <- prison_gts %>% 122 | filter( 123 | (!is_aggregated(Gender)) + 124 | (!is_aggregated(Legal)) + 125 | (!is_aggregated(State)) == 1) %>% 126 | mutate( 127 | disaggregator = case_when( 128 | !is_aggregated(Gender) ~ "Gender", 129 | !is_aggregated(Legal) ~ "Legal", 130 | !is_aggregated(State) ~ "State" 131 | ), 132 | value = case_when( 133 | !is_aggregated(Gender) ~ as.character(Gender), 134 | !is_aggregated(Legal) ~ as.character(Legal), 135 | !is_aggregated(State) ~ as.character(State) 136 | ), 137 | series = paste(disaggregator, value, sep = "/") 138 | ) %>% 139 | ggplot(aes(x = Quarter, y = Count, colour = series)) + 140 | geom_line() + 141 | labs(y = "Number of prisoners ('000)") + 142 | facet_wrap(vars(disaggregator), scales = "free_y") 143 | 144 | p1 / p2 145 | 146 | ## ----prison1, fig.width = 9, fig.asp = 0.8, echo=FALSE, fig.cap="Australian adult prison population disaggregated by pairs of attributes.", message=FALSE, warning=FALSE, dependson="prisongts", fig.env="figure*"---- 147 | p5 <- prison_gts %>% 148 | filter( 149 | !is_aggregated(Gender), 150 | !is_aggregated(Legal), 151 | !is_aggregated(State) 152 | ) %>% 153 | mutate(Gender = factor(Gender, levels=c("Male","Female"))) %>% 154 | ggplot(aes(x = Quarter, y = Count, group = Gender, colour = Gender)) + 155 | stat_summary(fun = sum, geom = "line") + 156 | labs(title = "Prison population by state and gender", y = "") + 157 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 158 | facet_wrap(~ as.character(State), nrow = 1, scales = "free_y") + 159 | scale_colour_manual(values=c(Male = "#0072B2", Female="#D55E00")) + 160 | guides(colour = guide_legend("Gender")) 161 | 162 | p6 <- prison_gts %>% 163 | filter( 164 | !is_aggregated(Gender), 165 | !is_aggregated(Legal), 166 | !is_aggregated(State) 167 | ) %>% 168 | mutate(Legal = factor(Legal, levels=c("Sentenced","Remanded"))) %>% 169 | ggplot(aes( 170 | x = Quarter, y = Count, 171 | group = Legal, colour = Legal 172 | )) + 173 | stat_summary(fun = sum, geom = "line") + 174 | labs(title = "Prison population by state and legal status", 175 | y = "Number of prisoners ('000)") + 176 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 177 | guides(colour = guide_legend("Legal status")) + 178 | facet_wrap(~ as.character(State), nrow = 1, scales = "free_y") + 179 | scale_colour_manual(values=c(Remanded = "#cc79a7", Sentenced="#e69f00")) 180 | 181 | p7 <- prison_gts %>% 182 | filter( 183 | !is_aggregated(Gender), 184 | !is_aggregated(Legal), 185 | !is_aggregated(State) 186 | ) %>% 187 | mutate(Gender = factor(Gender, levels=c("Male","Female"))) %>% 188 | ggplot(aes( 189 | x = Quarter, y = Count, 190 | group = Gender, colour=Gender, 191 | )) + 192 | stat_summary(fun = sum, geom = "line") + 193 | labs(title = "Prison population by legal status and gender", y = "") + 194 | facet_wrap(~ as.character(Legal), nrow = 1, scales = "free_y") + 195 | scale_colour_manual(values=c(Male = "#0072B2", Female="#D55E00")) + 196 | guides(colour = "none") 197 | 198 | (p5 / p6 / p7) 199 | 200 | ## ----prisonBTS, fig.width=9, fig.asp = 0.3, echo=FALSE, fig.cap="Bottom-level time series for the Australian adult prison population, grouped by state, legal status and gender.", message=FALSE, warning=FALSE, dependson="prisongts",fig.env="figure*"---- 201 | prison_gts %>% 202 | filter( 203 | !is_aggregated(State), !is_aggregated(Gender), 204 | !is_aggregated(Legal) 205 | ) %>% 206 | mutate( 207 | faceter = factor(paste(Legal, Gender, sep = " / "), 208 | levels = c("Sentenced / Male", "Remanded / Male", 209 | "Sentenced / Female", "Remanded / Female") 210 | ) 211 | ) %>% 212 | ggplot(aes(x = Quarter, y = Count, colour = faceter)) + 213 | geom_line() + 214 | labs(title = "Australian prison population: bottom-level series", 215 | y = "Number of prisoners ('000)") + 216 | guides(colour = guide_legend("Legal status & Gender")) + 217 | facet_wrap(vars(as.character(State)), nrow = 1, scales = "free_y") + 218 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) 219 | 220 | ## ----mixed, echo=TRUE------------------------------------------------------------------------------------- 221 | tourism_full <- tourism %>% 222 | aggregate_key((State/Region) * Purpose, Trips = sum(Trips)) 223 | 224 | ## ----mixed-purpose, fig.width=10, fig.asp = 0.6, echo=FALSE, fig.cap="Australian domestic overnight trips from 1998 Q1 to 2017 Q4 disaggregated by purpose of travel.", message=FALSE, warning=FALSE, dependson="mixed",fig.env="figure*"---- 225 | tourism_full %>% 226 | filter(is_aggregated(State), is_aggregated(Region), !is_aggregated(Purpose)) %>% 227 | ggplot(aes(x = Quarter, y = Trips, 228 | group = as.character(Purpose), colour = as.character(Purpose)) 229 | ) + 230 | stat_summary(fun = sum, geom = "line") + 231 | facet_wrap(~ as.character(Purpose), scales = "free_y", nrow = 2) + 232 | labs(title = "Australian tourism: by purpose of travel", 233 | y = "Trips ('000)") + 234 | # theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 235 | guides(colour = guide_legend("Purpose")) 236 | 237 | ## ----mixed-state-purpose, fig.width=10, fig.asp = 0.6, echo=FALSE, fig.cap="Australian domestic overnight trips over the period 1998 Q1 to 2017 Q4 disaggregated by purpose of travel and by state.", message=FALSE, warning=FALSE, dependson="mixed",fig.env="figure*"---- 238 | tourism_full %>% 239 | filter(!is_aggregated(State), is_aggregated(Region), !is_aggregated(Purpose)) %>% 240 | ggplot(aes(x = Quarter, y = Trips, 241 | group = as.character(Purpose), colour = as.character(Purpose)) 242 | ) + 243 | stat_summary(fun = sum, geom = "line") + 244 | facet_wrap(~ as.character(State), scales = "free_y", nrow = 2) + 245 | labs(title = "Australian tourism: by purpose of travel and state", 246 | y = "Trips ('000)") + 247 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 248 | guides(colour = guide_legend("Purpose")) 249 | 250 | ## ----tourism_states, message=FALSE------------------------------------------------------------------------ 251 | tourism_states <- tourism %>% 252 | aggregate_key(State, Trips = sum(Trips)) 253 | 254 | ## ----bu_by_hand, message=FALSE---------------------------------------------------------------------------- 255 | fcasts_state <- tourism_states %>% 256 | filter(!is_aggregated(State)) %>% 257 | model(ets = ETS(Trips)) %>% 258 | forecast() 259 | 260 | # Sum bottom-level forecasts to get top-level forecasts 261 | fcasts_national <- fcasts_state %>% 262 | summarise(value = sum(Trips), .mean = mean(value)) 263 | 264 | ## ----bottom_up, message=FALSE----------------------------------------------------------------------------- 265 | tourism_states %>% 266 | model(ets = ETS(Trips)) %>% 267 | reconcile(bu = bottom_up(ets)) %>% 268 | forecast() 269 | 270 | ## ----tourismfit, echo=TRUE-------------------------------------------------------------------------------- 271 | tourism_full <- tourism %>% 272 | aggregate_key((State/Region) * Purpose, Trips = sum(Trips)) 273 | 274 | fit <- tourism_full %>% 275 | filter(year(Quarter) <= 2015) %>% 276 | model(base = ETS(Trips)) %>% 277 | reconcile( 278 | bu = bottom_up(base), 279 | ols = min_trace(base, method = "ols"), 280 | mint = min_trace(base, method = "mint_shrink"), 281 | ) 282 | 283 | ## ----tourismfc, echo=TRUE, message=FALSE, warning=FALSE, dependson="tourismfit"--------------------------- 284 | fc <- fit %>% forecast(h = "2 years") 285 | 286 | ## ----tourism-states, fig.width=10, fig.asp = .55, echo=TRUE, fig.cap="Forecasts of overnight trips for Australia and its states over the test period 2016Q1--2017Q4.", warning=FALSE, message=FALSE, fig.pos="!htb", fig.env="figure*", dependson="tourismfc"---- 287 | fc %>% 288 | filter(is_aggregated(Region), is_aggregated(Purpose)) %>% 289 | autoplot( 290 | tourism_full %>% filter(year(Quarter) >= 2011), 291 | level = NULL 292 | ) + 293 | labs(y = "Trips ('000)") + 294 | facet_wrap(vars(State), scales = "free_y") 295 | 296 | ## ----tourism-purpose, echo=FALSE, fig.asp=.45, echo=TRUE, fig.cap="Forecasts of overnight trips by purpose of travel over the test period 2016Q1--2017Q4.", fig.env="figure*", fig.pos="!htb", fig.width=10, message=FALSE, warning=FALSE, dependson="tourismfc"---- 297 | fc %>% 298 | filter(is_aggregated(State), !is_aggregated(Purpose)) %>% 299 | autoplot( 300 | tourism_full %>% filter(year(Quarter) >= 2011), 301 | level = NULL 302 | ) + 303 | labs(y = "Trips ('000)") + 304 | facet_wrap(vars(Purpose), scales = "free_y") 305 | 306 | ## ----tourism-evaluation, echo=FALSE, message=FALSE, warning=FALSE, dependson="tourismfc"------------------ 307 | tab <- matrix(NA, ncol = 8, nrow = 6) 308 | rownames(tab) <- c("Total", "Purpose", "State", "Regions", "Bottom", "All series") 309 | colnames(tab) <- c("Base", "Bottom-up", "MinT", "OLS", "Base", "Bottom-up", "MinT", "OLS") 310 | 311 | filter_tab <- matrix(NA, ncol = 1, nrow = 6) 312 | 313 | filter_tab[1] <- "fc %>% filter(is_aggregated(State),is_aggregated(Region),is_aggregated(Purpose))" 314 | filter_tab[2] <- "fc %>% 315 | filter(is_aggregated(State),is_aggregated(Region),!is_aggregated(Purpose))" 316 | filter_tab[3] <- "fc %>% filter(!is_aggregated(State),is_aggregated(Region),is_aggregated(Purpose))" 317 | filter_tab[4] <- "fc %>% 318 | filter(!is_aggregated(State),!is_aggregated(Region),is_aggregated(Purpose))" 319 | filter_tab[5] <- "fc %>% filter(!is_aggregated(State),!is_aggregated(Region),!is_aggregated(Purpose))" 320 | filter_tab[6] <- "fc" 321 | 322 | for (i in 1:6) { 323 | eval(parse(text = filter_tab[i])) %>% 324 | accuracy(data = tourism_full, measures = list(rmse = RMSE, mase = MASE)) %>% 325 | group_by(.model) %>% 326 | summarise(rmse = mean(rmse), mase = mean(mase)) -> err 327 | tab[i, ] <- cbind(t(err[, 2]), t(err[, 3])) 328 | } 329 | 330 | out <- knitr::kable(tab, 331 | digits = 2, booktabs = TRUE, 332 | format = ifelse(html, "html", "latex"), 333 | caption = "Accuracy of forecasts for Australian overnight trips over the test set 2016Q1--2017Q4.", 334 | table.envir="table*" 335 | ) %>% 336 | kableExtra::add_header_above(c(" " = 1, "RMSE" = 4, "MASE" = 4)) 337 | if (!html) { 338 | out <- gsub("\\[t\\]", "\\[b\\]", out) 339 | } 340 | out 341 | 342 | ## ----fcaccuracy2, message=FALSE, dependson="tourismfc"---------------------------------------------------- 343 | fc %>% 344 | filter(is_aggregated(State), is_aggregated(Purpose)) %>% 345 | accuracy( 346 | data = tourism_full, 347 | measures = list(rmse = RMSE, mase = MASE) 348 | ) %>% 349 | group_by(.model) %>% 350 | summarise(rmse = mean(rmse), mase = mean(mase)) 351 | 352 | ## ----prisonfc--------------------------------------------------------------------------------------------- 353 | fit <- prison_gts %>% 354 | filter(year(Quarter) <= 2014) %>% 355 | model(base = ETS(Count)) %>% 356 | reconcile( 357 | bottom_up = bottom_up(base), 358 | MinT = min_trace(base, method = "mint_shrink") 359 | ) 360 | fc <- fit %>% forecast(h = 8) 361 | 362 | ## ----prisonforecasts-aggregate, fig.width=8, fig.asp = .5, echo=TRUE, fig.cap="Forecasts for the total Australian quarterly adult prison population for the period 2015Q1--2016Q4.", warning=FALSE, message=FALSE, fig.pos="b", fig.env="figure*", dependson="prisonfc"---- 363 | fc %>% 364 | filter(is_aggregated(State), is_aggregated(Gender), 365 | is_aggregated(Legal)) %>% 366 | autoplot(prison_gts, alpha = 0.7, level = 90) + 367 | labs(y = "Number of prisoners ('000)", 368 | title = "Australian prison population (total)") 369 | 370 | ## ----prisonforecasts-State, fig.width=8, fig.asp = 0.45, echo=TRUE, fig.cap="Forecasts for the Australian quarterly adult prison population, disaggregated by state.", warning=FALSE, message=FALSE, fig.pos="!htb", fig.env="figure*"---- 371 | fc %>% 372 | filter( 373 | .model %in% c("base", "MinT"), 374 | !is_aggregated(State), is_aggregated(Legal), 375 | is_aggregated(Gender) 376 | ) %>% 377 | autoplot( 378 | prison_gts %>% filter(year(Quarter) >= 2010), 379 | alpha = 0.7, level = 90 380 | ) + 381 | labs(title = "Prison population (by state)", 382 | y = "Number of prisoners ('000)") + 383 | facet_wrap(vars(State), scales = "free_y", ncol = 4) + 384 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) 385 | 386 | ## ----prisonforecasts-LegalGender, fig.width=9, fig.asp = 0.35, echo=FALSE, fig.cap="Forecasts for the Australian quarterly adult prison population, disaggregated by legal status and by gender.", warning=FALSE, message=FALSE, fig.pos="!htb", fig.env="figure*"---- 387 | p1 <- fc %>% 388 | filter( 389 | .model %in% c("MinT", "base"), !is_aggregated(Legal), 390 | is_aggregated(State), is_aggregated(Gender) 391 | ) %>% 392 | autoplot( 393 | prison_gts %>% filter(year(Quarter) >= 2010), 394 | alpha = 0.7, level = 90 395 | ) + 396 | labs(title = "Prison population (by legal status)", 397 | y = "Number of prisoners ('000)") + 398 | facet_wrap(vars(Legal), scales = "free_y", nrow = 1) + 399 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) 400 | 401 | p2 <- fc %>% 402 | filter( 403 | .model %in% c("MinT", "base"), 404 | is_aggregated(State), is_aggregated(Legal), 405 | !is_aggregated(Gender) 406 | ) %>% 407 | autoplot( 408 | prison_gts %>% filter(year(Quarter) >= 2010), 409 | alpha = 0.7, level = 90 410 | ) + 411 | labs(title = "Prison population (by gender)", 412 | y = "Number of prisoners ('000)") + 413 | facet_wrap(vars(Gender), scales = "free_y", nrow = 1) + 414 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) 415 | 416 | (p1 | p2) + plot_layout(guides = "collect") 417 | 418 | ## ----prisonforecasts-bottom, fig.width=8, fig.asp = 0.8, echo=FALSE, fig.cap="Forecasts for bottom-level series the Australian quarterly adult prison population, disaggregated by state, by legal status and by gender.", warning=FALSE, message=FALSE, fig.pos="!htb", fig.env="figure*"---- 419 | fc %>% 420 | mutate(Sex_Legal = paste(Gender, "+", Legal)) %>% 421 | as_fable(index = Quarter, key = c(Sex_Legal, State, .model)) %>% 422 | filter( 423 | .model %in% c("base", "MinT"), 424 | vctrs::vec_in(State, c("NSW", "QLD", "VIC", "WA")), !is_aggregated(Gender), 425 | !is_aggregated(Legal) 426 | ) %>% 427 | autoplot( 428 | prison_gts %>% 429 | filter(year(Quarter) >= 2010) %>% 430 | mutate(Sex_Legal = paste(Gender, "+", Legal)) %>% 431 | as_tsibble(index = Quarter, key = c(Sex_Legal, State)), 432 | alpha = 0.7, level = 90 433 | ) + 434 | labs(y = "Number of prisoners ('000)") + 435 | facet_wrap(Sex_Legal ~ State, scales = "free_y") + 436 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) 437 | 438 | ## ----prisonaccuracy, dependson="prisonfc", message=FALSE-------------------------------------------------- 439 | fc %>% 440 | filter(is_aggregated(State), is_aggregated(Gender), 441 | is_aggregated(Legal)) %>% 442 | accuracy(data = prison_gts, 443 | measures = list(mase = MASE, 444 | ss = skill_score(CRPS) 445 | ) 446 | ) %>% 447 | group_by(.model) %>% 448 | summarise(mase = mean(mase), sspc = mean(ss) * 100) 449 | 450 | ## ----tab-crime-evaluation,echo=FALSE, message=FALSE------------------------------------------------------- 451 | tab <- matrix(NA, ncol = 6, nrow = 6) 452 | rownames(tab) <- c("Total", "State", "Legal status", "Gender", "Bottom", "All series") 453 | colnames(tab) <- c("Base", "Bottom-up", "MinT", "Base", "Bottom-up", "MinT") 454 | 455 | filter_tab <- matrix(NA, ncol = 1, nrow = 6) 456 | 457 | filter_tab[1] <- "fc %>% filter(is_aggregated(State),is_aggregated(Gender),is_aggregated(Legal))" 458 | filter_tab[2] <- "fc %>% filter(!is_aggregated(State),is_aggregated(Gender),is_aggregated(Legal))" 459 | filter_tab[3] <- "fc %>% filter(is_aggregated(State),is_aggregated(Gender),!is_aggregated(Legal))" 460 | filter_tab[4] <- "fc %>% filter(is_aggregated(State),!is_aggregated(Gender),is_aggregated(Legal))" 461 | filter_tab[5] <- "fc %>% filter(!is_aggregated(State),!is_aggregated(Gender),!is_aggregated(Legal))" 462 | filter_tab[6] <- "fc " 463 | 464 | i <- 1 465 | for (i in 1:6) { 466 | eval(parse(text = filter_tab[i])) %>% 467 | accuracy( 468 | data = prison_gts, 469 | measures = list(mase = MASE, ss = skill_score(CRPS)) 470 | ) %>% 471 | group_by(.model) %>% 472 | summarise(mase = mean(mase), sspc = mean(ss) * 100) -> err 473 | tab[i, ] <- cbind(t(err[, 2]), t(err[, 3])) 474 | } 475 | 476 | out <- knitr::kable(tab, 477 | digits = 2, booktabs = TRUE, 478 | format = ifelse(html, "html", "latex"), 479 | caption = "Accuracy of Australian prison population forecasts for different groups of series." 480 | ) %>% 481 | kableExtra::add_header_above(c(" " = 1, "MASE" = 3, "Skill Score (CRPS)" = 3)) 482 | if (!html) { 483 | out <- gsub("\\[t\\]", "\\[b\\]", out) 484 | } 485 | out 486 | 487 | -------------------------------------------------------------------------------- /12-advanced.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----calls, echo=FALSE, fig.cap="Five-minute call volume handled on weekdays between 7:00am and 9:05pm in a large North American commercial bank. Top panel: data from 3 March -- 24 October 2003. Bottom panel: first four weeks of data.", fig.asp=0.7, fig.pos="htb", warning=FALSE, message=FALSE---- 4 | p1 <- bank_calls %>% 5 | fill_gaps() %>% 6 | autoplot(Calls) + 7 | labs(y = "Calls", 8 | title = "Five-minute call volume to bank") 9 | p2 <- bank_calls %>% 10 | fill_gaps() %>% 11 | filter(as_date(DateTime) <= "2003-03-28") %>% 12 | autoplot(Calls) + 13 | labs(y = "Calls", 14 | title = "Five-minute call volume over 4 weeks") 15 | p1 / p2 16 | 17 | ## ----callsmstl0------------------------------------------------------------------------------------------- 18 | calls <- bank_calls %>% 19 | mutate(t = row_number()) %>% 20 | update_tsibble(index = t, regular = TRUE) 21 | 22 | ## ----callsmstl, fig.cap="Multiple STL for the call volume data.", fig.asp=.95, fig.height=6--------------- 23 | calls %>% 24 | model( 25 | STL(sqrt(Calls) ~ season(period = 169) + 26 | season(period = 5*169), 27 | robust = TRUE) 28 | ) %>% 29 | components() %>% 30 | autoplot() + labs(x = "Observation") 31 | 32 | ## ----callsmstlf, fig.cap="Multiple STL for the call volume data.", fig.asp=0.55--------------------------- 33 | # Forecasts from STL+ETS decomposition 34 | my_dcmp_spec <- decomposition_model( 35 | STL(sqrt(Calls) ~ season(period = 169) + 36 | season(period = 5*169), 37 | robust = TRUE), 38 | ETS(season_adjust ~ season("N")) 39 | ) 40 | fc <- calls %>% 41 | model(my_dcmp_spec) %>% 42 | forecast(h = 5 * 169) 43 | 44 | # Add correct time stamps to fable 45 | fc_with_times <- bank_calls %>% 46 | new_data(n = 7 * 24 * 60 / 5) %>% 47 | mutate(time = format(DateTime, format = "%H:%M:%S")) %>% 48 | filter( 49 | time %in% format(bank_calls$DateTime, format = "%H:%M:%S"), 50 | wday(DateTime, week_start = 1) <= 5 51 | ) %>% 52 | mutate(t = row_number() + max(calls$t)) %>% 53 | left_join(fc, by = "t") %>% 54 | as_fable(response = "Calls", distribution = Calls) 55 | 56 | # Plot results with last 3 weeks of data 57 | fc_with_times %>% 58 | fill_gaps() %>% 59 | autoplot(bank_calls %>% tail(14 * 169) %>% fill_gaps()) + 60 | labs(y = "Calls", 61 | title = "Five-minute call volume to bank") 62 | 63 | ## ----callsharmonics0, echo=TRUE, dependson='callsmstl',warning=FALSE-------------------------------------- 64 | fit <- calls %>% 65 | model( 66 | dhr = ARIMA(sqrt(Calls) ~ PDQ(0, 0, 0) + pdq(d = 0) + 67 | fourier(period = 169, K = 10) + 68 | fourier(period = 5*169, K = 5))) 69 | 70 | fc <- fit %>% forecast(h = 5 * 169) 71 | 72 | # Add correct time stamps to fable 73 | fc_with_times <- bank_calls %>% 74 | new_data(n = 7 * 24 * 60 / 5) %>% 75 | mutate(time = format(DateTime, format = "%H:%M:%S")) %>% 76 | filter( 77 | time %in% format(bank_calls$DateTime, format = "%H:%M:%S"), 78 | wday(DateTime, week_start = 1) <= 5 79 | ) %>% 80 | mutate(t = row_number() + max(calls$t)) %>% 81 | left_join(fc, by = "t") %>% 82 | as_fable(response = "Calls", distribution = Calls) 83 | 84 | ## ----callsharmonics, echo=TRUE, fig.cap="Forecasts from a dynamic harmonic regression applied to the call volume data.",fig.asp=0.55,dependson='callsharmonics0',warning=FALSE---- 85 | # Plot results with last 3 weeks of data 86 | fc_with_times %>% 87 | fill_gaps() %>% 88 | autoplot(bank_calls %>% tail(14 * 169) %>% fill_gaps()) + 89 | labs(y = "Calls", 90 | title = "Five-minute call volume to bank") 91 | 92 | ## ----callsorder, echo=FALSE, dependson='callsharmonics'--------------------------------------------------- 93 | ncoef <- fit %>% 94 | broom::tidy() %>% 95 | NROW() 96 | if (ncoef == 0L) stop(paste("Model error", broom::tidy(fit))) 97 | arma <- fit %>% 98 | tidy() %>% 99 | filter(stringr::str_detect(term, "[mar][mar][0-9]")) %>% 100 | NROW() 101 | nf169 <- fit %>% 102 | tidy() %>% 103 | filter(stringr::str_detect(term, "fourier\\(period = 169")) %>% 104 | NROW() 105 | nf845 <- fit %>% 106 | tidy() %>% 107 | filter(stringr::str_detect(term, "fourier\\(period = 5")) %>% 108 | NROW() 109 | if (ncoef != (arma + nf169 + nf845 + 1L)) { 110 | stop(paste( 111 | "Coefficients don't add up", 112 | ncoef, arma, nf169, nf845 113 | )) 114 | } 115 | 116 | ## ----elecdemand, echo=TRUE, fig.cap="Half-hourly electricity demand and corresponding temperatures in 2012--2014, Victoria, Australia."---- 117 | vic_elec %>% 118 | pivot_longer(Demand:Temperature, names_to = "Series") %>% 119 | ggplot(aes(x = Time, y = value)) + 120 | geom_line() + 121 | facet_grid(rows = vars(Series), scales = "free_y") + 122 | labs(y = "") 123 | 124 | ## ----elecdemand2, echo=TRUE, fig.cap="Half-hourly electricity demand for Victoria, plotted against temperatures for the same times in Melbourne, the largest city in Victoria."---- 125 | elec <- vic_elec %>% 126 | mutate( 127 | DOW = wday(Date, label = TRUE), 128 | Working_Day = !Holiday & !(DOW %in% c("Sat", "Sun")), 129 | Cooling = pmax(Temperature, 18) 130 | ) 131 | elec %>% 132 | ggplot(aes(x=Temperature, y=Demand, col=Working_Day)) + 133 | geom_point(alpha = 0.6) + 134 | labs(x="Temperature (degrees Celsius)", y="Demand (MWh)") 135 | 136 | ## ----elecdemand3, echo=TRUE, dependson='elecdemand2', warning=FALSE--------------------------------------- 137 | fit <- elec %>% 138 | model( 139 | ARIMA(Demand ~ PDQ(0, 0, 0) + pdq(d = 0) + 140 | Temperature + Cooling + Working_Day + 141 | fourier(period = "day", K = 10) + 142 | fourier(period = "week", K = 5) + 143 | fourier(period = "year", K = 3)) 144 | ) 145 | 146 | ## ----elecdemand4, echo=TRUE, fig.cap="Forecasts from a dynamic harmonic regression model applied to half-hourly electricity demand data.", dependson='elecdemand3', fig.asp=0.55---- 147 | elec_newdata <- new_data(elec, 2*48) %>% 148 | mutate( 149 | Temperature = tail(elec$Temperature, 2 * 48), 150 | Date = lubridate::as_date(Time), 151 | DOW = wday(Date, label = TRUE), 152 | Working_Day = (Date != "2015-01-01") & 153 | !(DOW %in% c("Sat", "Sun")), 154 | Cooling = pmax(Temperature, 18) 155 | ) 156 | fc <- fit %>% 157 | forecast(new_data = elec_newdata) 158 | 159 | fc %>% 160 | autoplot(elec %>% tail(10 * 48)) + 161 | labs(title="Half hourly electricity demand: Victoria", 162 | y = "Demand (MWh)", x = "Time [30m]") 163 | 164 | ## ----elecdemand5, echo=TRUE, fig.cap="Residual diagnostics for the dynamic harmonic regression model.", dependson='elecdemand4'---- 165 | fit %>% gg_tsresiduals() 166 | 167 | ## ----propheteg, message=FALSE----------------------------------------------------------------------------- 168 | library(fable.prophet) 169 | cement <- aus_production %>% 170 | filter(year(Quarter) >= 1988) 171 | train <- cement %>% 172 | filter(year(Quarter) <= 2007) 173 | fit <- train %>% 174 | model( 175 | arima = ARIMA(Cement), 176 | ets = ETS(Cement), 177 | prophet = prophet(Cement ~ season(period = 4, order = 2, 178 | type = "multiplicative")) 179 | ) 180 | 181 | ## ----prophetegfc, fig.cap="Prophet compared to ETS and ARIMA on the Cement production data, with a 10-quarter test set."---- 182 | fc <- fit %>% forecast(h = "2 years 6 months") 183 | fc %>% autoplot(cement) 184 | 185 | ## ----prophetegaccuracy------------------------------------------------------------------------------------ 186 | fc %>% accuracy(cement) 187 | 188 | ## ----prophetelec, fig.cap="Components of a Prophet model fitted to the Victorian electricity demand data.", fig.height=6, fig.asp=0.9---- 189 | fit <- elec %>% 190 | model( 191 | prophet(Demand ~ Temperature + Cooling + Working_Day + 192 | season(period = "day", order = 10) + 193 | season(period = "week", order = 5) + 194 | season(period = "year", order = 3)) 195 | ) 196 | fit %>% 197 | components() %>% 198 | autoplot() 199 | 200 | ## ----prophetelecres, fig.asp=0.6, fig.cap="Residuals from the Prophet model for Victorian electricity demand.", dependson="prophetelec"---- 201 | fit %>% gg_tsresiduals() 202 | 203 | ## ----prophetfc0, dependson="prophetelec"------------------------------------------------------------------ 204 | fc <- fit %>% 205 | forecast(new_data = elec_newdata) 206 | 207 | ## ----prophetfc, fig.cap="Two day forecasts from the Prophet model for Victorian electricity demand.", dependson="prophetfc0"---- 208 | fc %>% 209 | autoplot(elec %>% tail(10 * 48)) + 210 | labs(x = "Date", y = "Demand (MWh)") 211 | 212 | ## ----varselect-------------------------------------------------------------------------------------------- 213 | fit <- us_change %>% 214 | model( 215 | aicc = VAR(vars(Consumption, Income)), 216 | bic = VAR(vars(Consumption, Income), ic = "bic") 217 | ) 218 | fit 219 | 220 | ## ----varglance, dependson='varselect'--------------------------------------------------------------------- 221 | glance(fit) 222 | 223 | ## ----varplots, fig.height=6, fig.asp=0.7, fig.cap="ACF of the residuals from the two VAR models. A VAR(5) model is selected by the AICc, while a VAR(1) model is selected using the BIC.", dependson='varselect',out.width="100%"---- 224 | fit %>% 225 | augment() %>% 226 | ACF(.innov) %>% 227 | autoplot() 228 | 229 | ## ----VAR5, fig.cap="Forecasts for US consumption and income generated from a VAR(5) model.", fig.asp=0.4, dependson='varselect'---- 230 | fit %>% 231 | select(aicc) %>% 232 | forecast() %>% 233 | autoplot(us_change %>% filter(year(Quarter) > 2010)) 234 | 235 | ## ----sunspot1, echo=FALSE, message=FALSE------------------------------------------------------------------ 236 | fit <- sunspot.year %>% 237 | as_tsibble() %>% 238 | model(NNETAR(sqrt(value))) 239 | 240 | ## ----sunspotname, echo=FALSE------------------------------------------------------------------------------ 241 | sunspot_model <- fit[[1]] %>% 242 | as.character() %>% 243 | stringr::str_remove_all("[<>]*") 244 | sunspot_p <- sunspot_model %>% 245 | stringr::str_extract("[0-9]*,") %>% 246 | stringr::str_remove(",") %>% 247 | as.numeric() 248 | sunspot_k <- sunspot_model %>% 249 | stringr::str_extract(",[0-9]*") %>% 250 | stringr::str_remove(",") %>% 251 | as.numeric() 252 | stopifnot(sunspot_p == 9L & sunspot_k == 5L) 253 | 254 | ## ----sunspotnnetar, fig.cap="Forecasts from a neural network with ten lagged inputs and one hidden layer containing six neurons."---- 255 | sunspots <- sunspot.year %>% as_tsibble() 256 | sunspots %>% 257 | model(NNETAR(sqrt(value))) %>% 258 | forecast(h = 30) %>% 259 | autoplot(sunspots) + 260 | labs(x = "Year", y = "Counts", 261 | title = "Yearly sunspots") 262 | 263 | ## ----nnetarsim, message=FALSE, fig.cap="Future sample paths for the annual sunspot data."---------------- 264 | fit %>% 265 | generate(times = 9, h = 30) %>% 266 | autoplot(.sim) + 267 | autolayer(sunspots, value) + 268 | theme(legend.position = "none") 269 | 270 | ## ----cementstl, message=FALSE, warning=FALSE, fig.cap="STL decomposition of quarterly Australian cement production.", fig.asp=0.9---- 271 | cement <- aus_production %>% 272 | filter(year(Quarter) >= 1988) %>% 273 | select(Quarter, Cement) 274 | cement_stl <- cement %>% 275 | model(stl = STL(Cement)) 276 | cement_stl %>% 277 | components() %>% 278 | autoplot() 279 | 280 | ## ----cementbootstrapped, fig.cap="Ten bootstrapped versions of quarterly Australian cement production (coloured), along with the original data (black).",dependson="cementstl"---- 281 | cement_stl %>% 282 | generate(new_data = cement, times = 10, 283 | bootstrap_block_size = 8) %>% 284 | autoplot(.sim) + 285 | autolayer(cement, Cement) + 286 | guides(colour = "none") + 287 | labs(title = "Cement production: Bootstrapped series", 288 | y="Tonnes ('000)") 289 | 290 | ## ----cementsim, dependson="cementstl"--------------------------------------------------------------------- 291 | sim <- cement_stl %>% 292 | generate(new_data = cement, times = 100, 293 | bootstrap_block_size = 8) %>% 294 | select(-.model, -Cement) 295 | 296 | ## ----cementnboot, dependson="cementsim", fig.cap="Forecasts of 100 bootstrapped series obtained using ETS models.", message=FALSE, warning=FALSE, fig.asp=0.5---- 297 | ets_forecasts <- sim %>% 298 | model(ets = ETS(.sim)) %>% 299 | forecast(h = 12) 300 | ets_forecasts %>% 301 | update_tsibble(key = .rep) %>% 302 | autoplot(.mean) + 303 | autolayer(cement, Cement) + 304 | guides(colour = "none") + 305 | labs(title = "Cement production: bootstrapped forecasts", 306 | y="Tonnes ('000)") 307 | 308 | ## ----baggedf, dependson="cementnboot", fig.cap="Comparing bagged ETS forecasts (the average of 100 bootstrapped forecasts in orange) and ETS applied directly to the data (in blue)."---- 309 | bagged <- ets_forecasts %>% 310 | summarise(bagged_mean = mean(.mean)) 311 | cement %>% 312 | model(ets = ETS(Cement)) %>% 313 | forecast(h = 12) %>% 314 | autoplot(cement) + 315 | autolayer(bagged, bagged_mean, col = "#D55E00") + 316 | labs(title = "Cement production in Australia", 317 | y="Tonnes ('000)") 318 | 319 | -------------------------------------------------------------------------------- /13-practical.R: -------------------------------------------------------------------------------- 1 | source("before-each-chapter.R") 2 | 3 | ## ----gasstl, fig.cap="Forecasts for weekly US gasoline production using an STL decomposition with an ETS model for the seasonally adjusted data.", fig.asp=0.58---- 4 | my_dcmp_spec <- decomposition_model( 5 | STL(Barrels), 6 | ETS(season_adjust ~ season("N")) 7 | ) 8 | us_gasoline %>% 9 | model(stl_ets = my_dcmp_spec) %>% 10 | forecast(h = "2 years") %>% 11 | autoplot(us_gasoline) + 12 | labs(y = "Millions of barrels per day", 13 | title = "Weekly US gasoline production") 14 | 15 | ## ----gasweekly_choosemodel, message=FALSE, include=FALSE-------------------------------------------------- 16 | library(purrr) 17 | model_defs <- map( 18 | as.list(seq(25)), 19 | ~ ARIMA(Barrels ~ PDQ(0, 0, 0) + fourier(K = !!.[1])) 20 | ) 21 | model_defs <- set_names(model_defs, sprintf("ARIMA + fourier(%i)", seq(25))) 22 | gas_aicc <- numeric(length(model_defs)) 23 | for (k in seq_along(model_defs)) { 24 | fit <- us_gasoline %>% 25 | model(model_defs[[k]]) %>% 26 | glance() 27 | if (!is.null(fit$AICc)) { 28 | gas_aicc[k] <- fit$AICc 29 | } 30 | } 31 | bestK <- which.min(gas_aicc) 32 | if (bestK != 6L) { 33 | stop("Gas DHR model changed") 34 | } 35 | 36 | ## ----gasweekly, message=FALSE----------------------------------------------------------------------------- 37 | gas_dhr <- us_gasoline %>% 38 | model(dhr = ARIMA(Barrels ~ PDQ(0, 0, 0) + fourier(K = 6))) 39 | 40 | ## ----gasarima, message=FALSE, warning=FALSE, include=FALSE, dependson="gasweekly"------------------------- 41 | arimaorder <- as.numeric(gas_dhr$dhr[[1]]$fit$spec[1, 1:3]) 42 | modelname <- paste0("ARIMA(", arimaorder[1], ",", arimaorder[2], ",", arimaorder[3], ")") 43 | if (!identical(arimaorder, c(0, 1, 1))) { 44 | stop("Gas DHR ARIMA error changed") 45 | } 46 | 47 | ## ----gasforecast, fig.cap="Forecasts for weekly US gasoline production using a dynamic harmonic regression model.", fig.asp=0.58, dependson="gasweekly"---- 48 | gas_dhr %>% 49 | forecast(h = "2 years") %>% 50 | autoplot(us_gasoline) + 51 | labs(y = "Millions of barrels per day", 52 | title = "Weekly US gasoline production") 53 | 54 | ## ----j06, fig.cap="Numbers of scripts sold for Immune sera and immunoglobulins on the Australian Pharmaceutical Benefits Scheme.", fig.height=3, fig.asp=0.5---- 55 | j06 <- PBS %>% 56 | filter(ATC2 == "J06") %>% 57 | summarise(Scripts = sum(Scripts)) 58 | 59 | j06 %>% autoplot(Scripts) + 60 | labs(y="Number of scripts", 61 | title = "Sales for immune sera and immunoglobulins") 62 | 63 | ## ----j06table, echo=FALSE--------------------------------------------------------------------------------- 64 | firstten <- j06 %>% 65 | mutate( 66 | nonzero = Scripts > 0, 67 | cumdemand = cumsum(nonzero) 68 | ) %>% 69 | filter(cumdemand <= 10) %>% 70 | select(Month, Scripts) 71 | tab <- firstten %>% 72 | knitr::kable( 73 | booktabs = TRUE, 74 | caption = "The first 10 non-zero demand values." 75 | ) 76 | if (!html) { 77 | tab <- gsub("\\\\centering","\\\\vspace*{-0.2cm}\\\\centering\\\\small",tab) 78 | tab <- gsub("\\[t\\]","\\[!ht\\]",tab) 79 | } 80 | tab 81 | 82 | ## ----j06table2, echo=FALSE, warning=FALSE----------------------------------------------------------------- 83 | options(knitr.kable.NA = "") 84 | q <- firstten$Scripts[firstten$Scripts > 0] 85 | a <- c(NA, diff(which(firstten$Scripts > 0))) 86 | out <- rbind(i = seq(10), q, a) 87 | rownames(out) <- c("$i$", "$q_i$", "$a_i$") 88 | out <- out %>% 89 | knitr::kable( 90 | booktabs = TRUE, 91 | escape = FALSE, 92 | caption = "The first 10 non-zero demand values shown as demand and inter-arrival series." 93 | ) 94 | if (!html) { 95 | out <- gsub("\\\\centering","\\\\vspace*{-0.cm}\\\\centering\\\\small",out) 96 | out <- gsub("\\[t\\]","\\[!ht\\]",out) 97 | out <- gsub("\\\\end\\{tabular\\}","\\\\end{tabular}\\\\vspace*{0.2cm}",out) 98 | } 99 | out 100 | lastq <- tail(j06$Scripts[j06$Scripts > 0], 1) 101 | if (tail(j06$Scripts, 1) > 0) { 102 | lasta <- tail(diff(which(j06$Scripts > 0)), 1) 103 | } else { 104 | lasta <- NROW(j06) - max(which(j06$Scripts > 0)) 105 | } 106 | 107 | ## ----crostonfit, include=FALSE---------------------------------------------------------------------------- 108 | fit <- j06 %>% 109 | model(CROSTON(Scripts)) 110 | 111 | ## ----crostoncheck, echo=FALSE----------------------------------------------------------------------------- 112 | q0 <- fit[[1]][[1]]$fit$par$estimate[1] 113 | alphaq <- fit[[1]][[1]]$fit$par$estimate[2] 114 | a0 <- fit[[1]][[1]]$fit$par$estimate[3] 115 | alphaa <- fit[[1]][[1]]$fit$par$estimate[4] 116 | q <- j06$Scripts[j06$Scripts > 0] 117 | a <- c(NA, diff(which(j06$Scripts > 0))) 118 | qhat <- alphaq * sum(rev((1 - alphaq)^(seq_along(q) - 1)) * q) 119 | ahat <- alphaa * sum(rev((1 - alphaa)^(seq_along(a) - 1)) * a, na.rm = TRUE) 120 | 121 | ## ----crostonfc-------------------------------------------------------------------------------------------- 122 | j06 %>% 123 | model(CROSTON(Scripts)) %>% 124 | forecast(h = 6) 125 | 126 | ## ----positiveeggs, fig.cap="Forecasts for the price of a dozen eggs, constrained to be positive using a Box-Cox transformation."---- 127 | egg_prices <- prices %>% filter(!is.na(eggs)) 128 | egg_prices %>% 129 | model(ETS(log(eggs) ~ trend("A"))) %>% 130 | forecast(h = 50) %>% 131 | autoplot(egg_prices) + 132 | labs(title = "Annual egg prices", 133 | y = "$US (in cents adjusted for inflation) ") 134 | 135 | ## ----constrained, fig.cap="Forecasts for the price of a dozen eggs, constrained to be lie between 50 and 400 cents US."---- 136 | scaled_logit <- function(x, lower = 0, upper = 1) { 137 | log((x - lower) / (upper - x)) 138 | } 139 | inv_scaled_logit <- function(x, lower = 0, upper = 1) { 140 | (upper - lower) * exp(x) / (1 + exp(x)) + lower 141 | } 142 | my_scaled_logit <- new_transformation( 143 | scaled_logit, inv_scaled_logit) 144 | egg_prices %>% 145 | model( 146 | ETS(my_scaled_logit(eggs, lower = 50, upper = 400) 147 | ~ trend("A")) 148 | ) %>% 149 | forecast(h = 50) %>% 150 | autoplot(egg_prices) + 151 | labs(title = "Annual egg prices", 152 | y = "$US (in cents adjusted for inflation) ") 153 | 154 | ## ----auscafe, message=FALSE, warning=FALSE---------------------------------------------------------------- 155 | auscafe <- aus_retail %>% 156 | filter(stringr::str_detect(Industry, "Takeaway")) %>% 157 | summarise(Turnover = sum(Turnover)) 158 | train <- auscafe %>% 159 | filter(year(Month) <= 2013) 160 | STLF <- decomposition_model( 161 | STL(log(Turnover) ~ season(window = Inf)), 162 | ETS(season_adjust ~ season("N")) 163 | ) 164 | cafe_models <- train %>% 165 | model( 166 | ets = ETS(Turnover), 167 | stlf = STLF, 168 | arima = ARIMA(log(Turnover)) 169 | ) %>% 170 | mutate(combination = (ets + stlf + arima) / 3) 171 | cafe_fc <- cafe_models %>% 172 | forecast(h = "5 years") 173 | 174 | ## ----combineplot, dependson="auscafe", fig.cap="Point forecasts from various methods applied to Australian monthly expenditure on eating out."---- 175 | cafe_fc %>% 176 | autoplot(auscafe %>% filter(year(Month) > 2008), 177 | level = NULL) + 178 | labs(y = "$ billion", 179 | title = "Australian monthly expenditure on eating out") 180 | 181 | ## ----combineaccuracy, dependson="auscafe"----------------------------------------------------------------- 182 | cafe_fc %>% 183 | accuracy(auscafe) %>% 184 | arrange(RMSE) 185 | 186 | ## ----cafe_fc_dist----------------------------------------------------------------------------------------- 187 | cafe_fc %>% filter(Month == min(Month)) 188 | 189 | ## ----cafe_fc_gen, warning=FALSE, message=FALSE------------------------------------------------------------ 190 | cafe_futures <- cafe_models %>% 191 | # Generate 1000 future sample paths 192 | generate(h = "5 years", times = 1000) %>% 193 | # Compute forecast distributions from future sample paths 194 | as_tibble() %>% 195 | group_by(Month, .model) %>% 196 | summarise( 197 | dist = distributional::dist_sample(list(.sim)) 198 | ) %>% 199 | ungroup() %>% 200 | # Create fable object 201 | as_fable(index = Month, key = .model, 202 | distribution = dist, response = "Turnover") 203 | 204 | ## ----cafe_fc_gen_futures, warning=FALSE, message=FALSE, dependson="cafe_fc_gen"--------------------------- 205 | # Forecast distributions for h=1 206 | cafe_futures %>% filter(Month == min(Month)) 207 | 208 | ## ----auscafecombPI, fig.cap="Prediction intervals for the combination forecast of Australian monthly expenditure on eating out.", fig.asp=0.5---- 209 | cafe_futures %>% 210 | filter(.model == "combination") %>% 211 | autoplot(auscafe %>% filter(year(Month) > 2008)) + 212 | labs(y = "$ billion", 213 | title = "Australian monthly expenditure on eating out") 214 | 215 | ## ----auscafe_winkler-------------------------------------------------------------------------------------- 216 | cafe_futures %>% 217 | accuracy(auscafe, measures = interval_accuracy_measures, 218 | level = 95) %>% 219 | arrange(winkler) 220 | 221 | ## ----aggregates, message=FALSE, dependson="auscafe"------------------------------------------------------- 222 | fit <- auscafe %>% 223 | # Fit a model to the data 224 | model(ETS(Turnover)) 225 | futures <- fit %>% 226 | # Simulate 10000 future sample paths, each of length 12 227 | generate(times = 10000, h = 12) %>% 228 | # Sum the results for each sample path 229 | as_tibble() %>% 230 | group_by(.rep) %>% 231 | summarise(.sim = sum(.sim)) %>% 232 | # Store as a distribution 233 | summarise(total = distributional::dist_sample(list(.sim))) 234 | 235 | ## ----aggregates2, dependson="aggregates"------------------------------------------------------------------ 236 | futures %>% 237 | mutate( 238 | mean = mean(total), 239 | pi80 = hilo(total, 80), 240 | pi95 = hilo(total, 95) 241 | ) 242 | 243 | ## ----aggregates3, dependson="aggregates"------------------------------------------------------------------ 244 | forecast(fit, h = 12) %>% 245 | as_tibble() %>% 246 | summarise(total = sum(.mean)) 247 | 248 | ## ----backcasting, fig.cap="Backcasts for Australian monthly expenditure on cafés, restaurants and takeaway food services using an ETS model.", dependson='auscafe'---- 249 | backcasts <- auscafe %>% 250 | mutate(reverse_time = rev(row_number())) %>% 251 | update_tsibble(index = reverse_time) %>% 252 | model(ets = ETS(Turnover ~ season(period = 12))) %>% 253 | forecast(h = 15) %>% 254 | mutate(Month = auscafe$Month[1] - (1:15)) %>% 255 | as_fable(index = Month, response = "Turnover", 256 | distribution = "Turnover") 257 | backcasts %>% 258 | autoplot(auscafe %>% filter(year(Month) < 1990)) + 259 | labs(title = "Backcasts of Australian food expenditure", 260 | y = "$ (billions)") 261 | 262 | ## ----shortseries, message=FALSE--------------------------------------------------------------------------- 263 | m3totsibble <- function(z) { 264 | bind_rows( 265 | as_tsibble(z$x) %>% mutate(Type = "Training"), 266 | as_tsibble(z$xx) %>% mutate(Type = "Test") 267 | ) %>% 268 | mutate( 269 | st = z$st, 270 | type = z$type, 271 | period = z$period, 272 | description = z$description, 273 | sn = z$sn, 274 | ) %>% 275 | as_tibble() 276 | } 277 | short <- Mcomp::M3 %>% 278 | subset("yearly") %>% 279 | purrr::map_dfr(m3totsibble) %>% 280 | group_by(sn) %>% 281 | mutate(n = max(row_number())) %>% 282 | filter(n <= 20) %>% 283 | ungroup() %>% 284 | as_tsibble(index = index, key = c(sn, period, st)) 285 | 286 | ## ----shortfit, dependson="shortseries"-------------------------------------------------------------------- 287 | short_fit <- short %>% 288 | model(arima = ARIMA(value)) 289 | 290 | ## ----shortfit_results, dependson="shortfit", include=FALSE------------------------------------------------ 291 | nptable <- tidy(short_fit) %>% 292 | group_by(sn) %>% 293 | summarise(n = n()) %>% 294 | right_join(short_fit) %>% 295 | replace_na(list(n = 0)) %>% 296 | group_by(n) %>% 297 | summarise(count = n()) 298 | 299 | ## ----isms, warning=FALSE, fig.cap="Forecasts from an ARIMA model fitted to the Australian monthly expenditure on cafés, restaurants and takeaway food services.", dependson='auscafe', fig.asp=0.58---- 300 | training <- auscafe %>% filter(year(Month) <= 2013) 301 | test <- auscafe %>% filter(year(Month) > 2013) 302 | cafe_fit <- training %>% 303 | model(ARIMA(log(Turnover))) 304 | cafe_fit %>% 305 | forecast(h = 60) %>% 306 | autoplot(auscafe) + 307 | labs(title = "Australian food expenditure", 308 | y = "$ (billions)") 309 | 310 | ## ----isms2, dependson="isms", warning=FALSE, fig.cap="Twelve-step fitted values from an ARIMA model fitted to the Australian café training data.", fig.asp=0.58---- 311 | fits12 <- fitted(cafe_fit, h = 12) 312 | training %>% 313 | autoplot(Turnover) + 314 | autolayer(fits12, .fitted, col = "#D55E00") + 315 | labs(title = "Australian food expenditure", 316 | y = "$ (billions)") 317 | 318 | ## ----oosos2, dependson='isms'----------------------------------------------------------------------------- 319 | cafe_fit %>% 320 | refit(test) %>% 321 | accuracy() 322 | 323 | ## ----ahoutlier, fig.cap="Number of overnight trips to the Adelaide Hills region of South Australia.", fig.asp=0.4---- 324 | tourism %>% 325 | filter( 326 | Region == "Adelaide Hills", Purpose == "Visiting" 327 | ) %>% 328 | autoplot(Trips) + 329 | labs(title = "Quarterly overnight trips to Adelaide Hills", 330 | y = "Number of trips") 331 | 332 | ## ----stlahdecomp, fig.cap="STL decomposition of visitors to the Adelaide Hills region of South Australia, with no seasonal component.", fig.asp=0.7---- 333 | ah_decomp <- tourism %>% 334 | filter( 335 | Region == "Adelaide Hills", Purpose == "Visiting" 336 | ) %>% 337 | # Fit a non-seasonal STL decomposition 338 | model( 339 | stl = STL(Trips ~ season(period = 1), robust = TRUE) 340 | ) %>% 341 | components() 342 | ah_decomp %>% autoplot() 343 | 344 | ## ----stl_outliers, dependson="stlahdecomp"---------------------------------------------------------------- 345 | outliers <- ah_decomp %>% 346 | filter( 347 | remainder < quantile(remainder, 0.25) - 3*IQR(remainder) | 348 | remainder > quantile(remainder, 0.75) + 3*IQR(remainder) 349 | ) 350 | outliers 351 | 352 | ## ----ah_miss, message=FALSE, dependson="stl_outliers"----------------------------------------------------- 353 | ah_miss <- tourism %>% 354 | filter( 355 | Region == "Adelaide Hills", 356 | Purpose == "Visiting" 357 | ) %>% 358 | # Remove outlying observations 359 | anti_join(outliers) %>% 360 | # Replace with missing values 361 | fill_gaps() 362 | ah_fill <- ah_miss %>% 363 | # Fit ARIMA model to the data containing missing values 364 | model(ARIMA(Trips)) %>% 365 | # Estimate Trips for all periods 366 | interpolate(ah_miss) 367 | ah_fill %>% 368 | # Only show outlying periods 369 | right_join(outliers %>% select(-Trips)) 370 | 371 | ## ----replacment, include=FALSE, dependson="ah_miss"------------------------------------------------------- 372 | outlier <- outliers$Trips 373 | replacement <- ah_miss %>% 374 | model(ARIMA(Trips)) %>% 375 | interpolate(ah_miss) %>% 376 | right_join(outliers %>% select(-Trips)) %>% 377 | pull(Trips) 378 | 379 | ## ----replacement-plot, fig.cap="Number of overnight trips to the Adelaide Hills region of South Australia with the 2002Q4 outlier being replaced using an ARIMA model for interpolation.", dependson="ah_miss"---- 380 | ah_fill %>% 381 | autoplot(Trips) + 382 | autolayer(ah_fill %>% filter_index("2002 Q3"~"2003 Q1"), 383 | Trips, colour="#D55E00") + 384 | labs(title = "Quarterly overnight trips to Adelaide Hills", 385 | y = "Number of trips") 386 | 387 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fpp3 scripts 2 | 3 | This repository contains the R code used to produce all the tables, figures and examples in [*Forecasting: Principles and Practice* (3rd edition)](https://OTexts.com/fpp3). 4 | -------------------------------------------------------------------------------- /before-each-chapter.R: -------------------------------------------------------------------------------- 1 | set.seed(1967) 2 | 3 | knitr::opts_chunk$set( 4 | comment = "#>", 5 | collapse = TRUE, 6 | echo = TRUE, 7 | cache = TRUE, 8 | out.width = "100%", 9 | fig.align = 'center', 10 | fig.width = 7, 11 | fig.asp = 0.618 # 1 / phi 12 | ) 13 | 14 | library(fpp3, quietly=TRUE) 15 | library(patchwork) 16 | 17 | html <- knitr::is_html_output() 18 | 19 | # Set some defaults 20 | # Colours to be viridis for continuous scales and Okabe for discrete scales 21 | options( 22 | digits = 4, 23 | width=58 + html*20, 24 | ggplot2.continuous.colour="viridis", 25 | ggplot2.continuous.fill = "viridis", 26 | ggplot2.discrete.colour = c("#D55E00", "#0072B2","#009E73", "#CC79A7", "#E69F00", "#56B4E9", "#F0E442"), 27 | ggplot2.discrete.fill = c("#D55E00", "#0072B2","#009E73", "#CC79A7", "#E69F00", "#56B4E9", "#F0E442") 28 | ) 29 | 30 | # Avoid some conflicts 31 | conflicted::conflict_prefer("VAR","fable") 32 | conflicted::conflict_prefer("select","dplyr") 33 | conflicted::conflict_prefer("filter","dplyr") 34 | conflicted::conflict_prefer("vars","dplyr") 35 | conflicted::conflict_prefer("invoke","purrr") 36 | 37 | --------------------------------------------------------------------------------