├── src
    ├── Makevars
    ├── Makevars.win
    ├── stan
    │   ├── .Rapp.history
    │   ├── static_btd.stan
    │   ├── dynamic_btd.stan
    │   ├── student_t_dynamic.stan
    │   ├── student_t.stan
    │   ├── double_pois.stan
    │   ├── neg_bin.stan
    │   ├── skellam.stan
    │   ├── biv_pois.stan
    │   ├── zero_infl_skellam.stan
    │   ├── diag_infl_biv_pois.stan
    │   └── neg_bin_dynamic.stan
    └── install.libs.R
├── .github
    ├── .gitignore
    └── workflows
    │   ├── test-coverage.yaml
    │   └── R-CMD-check.yaml
├── .DS_Store
├── data
    ├── italy.rda
    └── england.rda
├── Meta
    └── vignette.rds
├── man
    ├── figures
    │   ├── logo.png
    │   ├── readme_foot_prob.png
    │   ├── readme_foot_rank.png
    │   ├── readme_pp_foot.png
    │   └── readme_foot_abilities.png
    ├── italy.Rd
    ├── print.compareFoot.Rd
    ├── england.Rd
    ├── print.stanFoot.Rd
    ├── print.btdFoot.Rd
    ├── plot_logStrength.Rd
    ├── foot_rank.Rd
    ├── foot_round_robin.Rd
    ├── foot_prob.Rd
    ├── pp_foot.Rd
    ├── foot_abilities.Rd
    ├── plot_btdPosterior.Rd
    ├── priors.Rd
    ├── compare_foot.Rd
    ├── mle_foot.Rd
    └── btd_foot.Rd
├── vignettes
    ├── pp_foot-1.png
    ├── pp_foot-2.png
    ├── abilities-1.png
    ├── abilities-2.png
    ├── pp_checks-1.png
    ├── rank_pred1-1.png
    ├── rank_pred1-2.png
    ├── rank_pred2-1.png
    ├── rank_pred2-2.png
    ├── rank_pred2-3.png
    ├── abilities_dyn-1.png
    ├── comparing_priors-1.png
    ├── foot_roundrobin-1.png
    ├── static_fit_corr-1.png
    ├── plot_btdPosterior_dyn-1.png
    ├── plot_btdPosterior_stat-1.png
    ├── foot_prob_weekly_predict-1.png
    ├── plot_logStrength_teams_dyn-1.png
    ├── plot_btdPosterior_teams_dyn-1.png
    ├── plot_btdPosterior_teams_stat-1.png
    ├── plot_btdPosterior_teams_dyn_dens-1.png
    └── plot_btdPosterior_teams_stat_dens-1.png
├── .gitignore
├── CRAN-SUBMISSION
├── CRAN-RELEASE
├── R
    ├── startup.R
    ├── italy.R
    ├── globals.R
    ├── england.R
    ├── plot_logStrength.R
    ├── foot_priors.R
    ├── utils_foot.R
    └── pp_foot.R
├── rebuild-long-running-vignette.R
├── .Rbuildignore
├── footBayes.Rproj
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_plot_logStrength.R
    │   ├── test_pp_foot.R
    │   ├── test_compare_foot.R
    │   ├── test_mle_foot.R
    │   └── test_foot_prob.R
├── cran-comments.md
├── NEWS.md
├── DESCRIPTION
├── NAMESPACE
├── README.Rmd
└── README.md


/src/Makevars:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/stan/.Rapp.history:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/.DS_Store


--------------------------------------------------------------------------------
/data/italy.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/data/italy.rda


--------------------------------------------------------------------------------
/Meta/vignette.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/Meta/vignette.rds


--------------------------------------------------------------------------------
/data/england.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/data/england.rda


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/logo.png


--------------------------------------------------------------------------------
/vignettes/pp_foot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/pp_foot-1.png


--------------------------------------------------------------------------------
/vignettes/pp_foot-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/pp_foot-2.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .DS_Store
6 | 
7 | /doc/
8 | /Meta/
9 | 


--------------------------------------------------------------------------------
/vignettes/abilities-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/abilities-1.png


--------------------------------------------------------------------------------
/vignettes/abilities-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/abilities-2.png


--------------------------------------------------------------------------------
/vignettes/pp_checks-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/pp_checks-1.png


--------------------------------------------------------------------------------
/vignettes/rank_pred1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred1-1.png


--------------------------------------------------------------------------------
/vignettes/rank_pred1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred1-2.png


--------------------------------------------------------------------------------
/vignettes/rank_pred2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred2-1.png


--------------------------------------------------------------------------------
/vignettes/rank_pred2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred2-2.png


--------------------------------------------------------------------------------
/vignettes/rank_pred2-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred2-3.png


--------------------------------------------------------------------------------
/vignettes/abilities_dyn-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/abilities_dyn-1.png


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 2.0.0
2 | Date: 2025-05-15 16:28:11 UTC
3 | SHA: 2bd20fe9e07798b4f1b023cfac3ef858ae227da4
4 | 


--------------------------------------------------------------------------------
/man/figures/readme_foot_prob.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/readme_foot_prob.png


--------------------------------------------------------------------------------
/man/figures/readme_foot_rank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/readme_foot_rank.png


--------------------------------------------------------------------------------
/man/figures/readme_pp_foot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/readme_pp_foot.png


--------------------------------------------------------------------------------
/vignettes/comparing_priors-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/comparing_priors-1.png


--------------------------------------------------------------------------------
/vignettes/foot_roundrobin-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/foot_roundrobin-1.png


--------------------------------------------------------------------------------
/vignettes/static_fit_corr-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/static_fit_corr-1.png


--------------------------------------------------------------------------------
/man/figures/readme_foot_abilities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/readme_foot_abilities.png


--------------------------------------------------------------------------------
/vignettes/plot_btdPosterior_dyn-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_dyn-1.png


--------------------------------------------------------------------------------
/vignettes/plot_btdPosterior_stat-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_stat-1.png


--------------------------------------------------------------------------------
/vignettes/foot_prob_weekly_predict-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/foot_prob_weekly_predict-1.png


--------------------------------------------------------------------------------
/vignettes/plot_logStrength_teams_dyn-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_logStrength_teams_dyn-1.png


--------------------------------------------------------------------------------
/vignettes/plot_btdPosterior_teams_dyn-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_teams_dyn-1.png


--------------------------------------------------------------------------------
/vignettes/plot_btdPosterior_teams_stat-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_teams_stat-1.png


--------------------------------------------------------------------------------
/CRAN-RELEASE:
--------------------------------------------------------------------------------
1 | This package was submitted to CRAN on 2022-02-08.
2 | Once it is accepted, delete this file and tag the release (commit c259aeb).
3 | 


--------------------------------------------------------------------------------
/vignettes/plot_btdPosterior_teams_dyn_dens-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_teams_dyn_dens-1.png


--------------------------------------------------------------------------------
/vignettes/plot_btdPosterior_teams_stat_dens-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_teams_stat_dens-1.png


--------------------------------------------------------------------------------
/R/startup.R:
--------------------------------------------------------------------------------
1 | .onAttach <- function(lib, pkg) {
2 |   packageStartupMessage(
3 |     "NOTE: footBayes depends on the R package cmdstanr, which is not available on CRAN.
4 |       The cmdstanr package may be installed at https://mc-stan.org/cmdstanr/."
5 |   )
6 | }
7 | 


--------------------------------------------------------------------------------
/rebuild-long-running-vignette.R:
--------------------------------------------------------------------------------
1 | old_wd <- getwd()
2 | 
3 | setwd("vignettes/")
4 | knitr::knit("footBayes_a_rapid_guide.Rmd.orig", output = "footBayes_a_rapid_guide.Rmd")
5 | knitr::purl("footBayes_a_rapid_guide.Rmd.orig", output = "footBayes_a_rapid_guide.R")
6 | 
7 | setwd(old_wd)
8 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^cran-comments\.md$
 4 | 
 5 | ^\.travis\.yml$
 6 | ^CRAN-RELEASE$
 7 | ^CRAN-SUBMISSION$
 8 | 
 9 | ^docs$
10 | ^Meta$
11 | ^doc$
12 | 
13 | ^rebuild-long-running-vignette.R
14 | 
15 | ^README\.Rmd$
16 | ^README-.*\.png$
17 | ^\README_cache$
18 | ^logo\.png$
19 | 
20 | ^\.github$
21 | ^\.DS_Store
22 | 


--------------------------------------------------------------------------------
/footBayes.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: 912f92ac-40bc-4d09-8a67-203ee124cf14
 3 | 
 4 | RestoreWorkspace: Default
 5 | SaveWorkspace: Default
 6 | AlwaysSaveHistory: Default
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 | 
16 | AutoAppendNewline: Yes
17 | StripTrailingWhitespace: Yes
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(footBayes)
11 | library(ggplot2)
12 | library(dplyr)
13 | library(instantiate)
14 | 
15 | test_check("footBayes")
16 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | * win-builder (devel and release)
 3 | * local OS X install, R 4.4.3
 4 | * local Linux, R 4.4.3
 5 | 
 6 | ## R CMD check results
 7 | 0 errors | 0 warnings | 1 notes
 8 | 
 9 | The note is related to the installed package size. This is primarily due to the inclusion of the `bin` sub-directory, which contains precompiled `CmdStan` models obtained using the `instantiate` package.
10 | 
11 | ## Downstream dependencies
12 | There are currently no downstream dependencies for this package.
13 | 
14 | 
15 | Thank you
16 | 
17 | 


--------------------------------------------------------------------------------
/R/italy.R:
--------------------------------------------------------------------------------
 1 | #' Italy league results 1934-2022
 2 | #'
 3 | #' All results for Italian soccer games in the top tier
 4 | #' from 1934/35 season to 2021/22 season.
 5 | #'
 6 | #' @format A data frame with 27684 rows and 8 variables:
 7 | #' \describe{
 8 | #'   \item{Date}{Date of match}
 9 | #'   \item{Season}{Season of match - refers to starting year}
10 | #'   \item{home}{Home team}
11 | #'   \item{visitor}{Visiting team}
12 | #'   \item{FT}{Full-time result}
13 | #'   \item{hgoal}{Goals scored by home team}
14 | #'   \item{vgoal}{Goals scored by visiting team}
15 | #'   \item{tier}{Tier of football pyramid: 1}
16 | #' }
17 | "italy"
18 | 


--------------------------------------------------------------------------------
/R/globals.R:
--------------------------------------------------------------------------------
 1 | #' Global variables
 2 | #'
 3 | #' Global variables
 4 | #'
 5 | #' @export
 6 | 
 7 | utils::globalVariables(c(
 8 |   "median", "quantile", "group_by", "summarise",
 9 |   "first", "times", "lo", "hi", "mid", "sd", "par",
10 |   "Home", "Away", "Prob", "rpois", "lo2", "hi2",
11 |   "q_025", "q_975", "q_25", "q_75", "q_50",
12 |   "filter", "Home_prob", ".", "dpois", "optim", "pchisq",
13 |   "as_nlist", "valori", "mutate", "times", "%>%",
14 |   "day", "distinct", "matches", "favorite", "underdog",
15 |   "periods", "rank_points", "team", "period", "log_strength"
16 | ))
17 | 
18 | # LinkingTo: StanHeaders (>= 2.18.0), rstan (>= 2.18.1), BH (>= 1.66.0),
19 | # Rcpp (>= 0.12.0), RcppEigen (>= 0.3.3.3.0)
20 | 


--------------------------------------------------------------------------------
/man/italy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/italy.R
 3 | \docType{data}
 4 | \name{italy}
 5 | \alias{italy}
 6 | \title{Italy league results 1934-2022}
 7 | \format{
 8 | A data frame with 27684 rows and 8 variables:
 9 | \describe{
10 |   \item{Date}{Date of match}
11 |   \item{Season}{Season of match - refers to starting year}
12 |   \item{home}{Home team}
13 |   \item{visitor}{Visiting team}
14 |   \item{FT}{Full-time result}
15 |   \item{hgoal}{Goals scored by home team}
16 |   \item{vgoal}{Goals scored by visiting team}
17 |   \item{tier}{Tier of football pyramid: 1}
18 | }
19 | }
20 | \usage{
21 | italy
22 | }
23 | \description{
24 | All results for Italian soccer games in the top tier
25 | from 1934/35 season to 2021/22 season.
26 | }
27 | \keyword{datasets}
28 | 


--------------------------------------------------------------------------------
/R/england.R:
--------------------------------------------------------------------------------
 1 | #' English league results 1888-2022
 2 | #'
 3 | #' All results for English soccer games in the top 4 tiers
 4 | #' from 1888/89 season to 2021/22 season.
 5 | #'
 6 | #' @format A data frame with 203956 rows and 12 variables:
 7 | #' \describe{
 8 | #'   \item{Date}{Date of match}
 9 | #'   \item{Season}{Season of match - refers to starting year}
10 | #'   \item{home}{Home team}
11 | #'   \item{visitor}{Visiting team}
12 | #'   \item{FT}{Full-time result}
13 | #'   \item{hgoal}{Goals scored by home team}
14 | #'   \item{vgoal}{Goals scored by visiting team}
15 | #'   \item{division}{Division: 1,2,3,4 or 3N (Old 3-North) or 3S (Old 3-South)}
16 | #'   \item{tier}{Tier of football pyramid: 1,2,3,4}
17 | #'   \item{totgoal}{Total goals in game}
18 | #'   \item{goaldif}{Goal difference in game home goals - visitor goals}
19 | #'   \item{result}{Result: H-Home Win, A-Away Win, D-Draw}
20 | #' }
21 | "england"
22 | 


--------------------------------------------------------------------------------
/man/print.compareFoot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/print_fun_foot.R
 3 | \name{print.compareFoot}
 4 | \alias{print.compareFoot}
 5 | \title{Print method for compareFoot objects}
 6 | \usage{
 7 | \method{print}{compareFoot}(x, digits = 3, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of class \code{compareFoot} returned by \code{\link{compare_foot}}.}
11 | 
12 | \item{digits}{Number of digits to use when printing numeric values for the metrics. Default is \code{3}.}
13 | 
14 | \item{...}{Additional arguments passed to \code{print}.}
15 | }
16 | \description{
17 | Provides a formatted output when printing objects of class \code{compareFoot}, displaying the predictive performance metrics and, if available, the confusion matrices for each model or probability matrix.
18 | }
19 | \author{
20 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
21 | }
22 | 


--------------------------------------------------------------------------------
/man/england.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/england.R
 3 | \docType{data}
 4 | \name{england}
 5 | \alias{england}
 6 | \title{English league results 1888-2022}
 7 | \format{
 8 | A data frame with 203956 rows and 12 variables:
 9 | \describe{
10 |   \item{Date}{Date of match}
11 |   \item{Season}{Season of match - refers to starting year}
12 |   \item{home}{Home team}
13 |   \item{visitor}{Visiting team}
14 |   \item{FT}{Full-time result}
15 |   \item{hgoal}{Goals scored by home team}
16 |   \item{vgoal}{Goals scored by visiting team}
17 |   \item{division}{Division: 1,2,3,4 or 3N (Old 3-North) or 3S (Old 3-South)}
18 |   \item{tier}{Tier of football pyramid: 1,2,3,4}
19 |   \item{totgoal}{Total goals in game}
20 |   \item{goaldif}{Goal difference in game home goals - visitor goals}
21 |   \item{result}{Result: H-Home Win, A-Away Win, D-Draw}
22 | }
23 | }
24 | \usage{
25 | england
26 | }
27 | \description{
28 | All results for English soccer games in the top 4 tiers
29 | from 1888/89 season to 2021/22 season.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # footBayes 2.0.1
 2 | * Updated vignette.
 3 | * Correct typo in foot_compare() description.
 4 | * Correct typo in pp_foot() description.
 5 | 
 6 | 
 7 | # footBayes 2.0.0
 8 | 
 9 | * Migration from `rstan` interface to `CmdStanR` interface.
10 | * Add support for the `VI`, `pathfinder` and `laplace` algorithms.
11 | * Add pre-compiled CmdStan models using the package `instantiate`.
12 | * Add AIC and BIC output elements in `mle_foot()`.
13 | * Add Bayesian static/dynamic Negative Binomial model in `stan_foot()`.
14 | * Minor `ggplot2` edits on `foot_rank()`, `foot_abilities()` and `pp_foot()`.
15 | * Updated vignette.
16 | 
17 | 
18 | # footBayes 1.0.0
19 | 
20 | * Bradley-Terry model for abilities.
21 | * Dynamic ranking in the models.
22 | * Updated vignette.
23 | * Probabilistic predictive performance (pseudo-R squared, Brier score, etc.).
24 | 
25 | # footBayes 0.2.0
26 | 
27 | * Inclusion of diagonal-inflated bivariate Poisson and zero-inflated Skellam models.
28 | * Minor edits and drop engsoccerdata dependence.
29 | * Updated vignette.
30 | 
31 | # footBayes 0.1.0
32 | 
33 | * First submission to CRAN.
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/src/install.libs.R:
--------------------------------------------------------------------------------
 1 | libs <- file.path(R_PACKAGE_DIR, "libs", R_ARCH)
 2 | dir.create(libs, recursive = TRUE, showWarnings = FALSE)
 3 | for (file in c("symbols.rds", Sys.glob(paste0("*", SHLIB_EXT)))) {
 4 |   if (file.exists(file)) {
 5 |     file.copy(file, file.path(libs, file))
 6 |   }
 7 | }
 8 | inst_stan <- file.path("..", "inst", "stan")
 9 | if (dir.exists(inst_stan)) {
10 |   warning(
11 |     "Stan models in inst/stan/ are deprecated in {instantiate} ",
12 |     ">= 0.0.4.9001 (2024-01-03). Please put them in src/stan/ instead."
13 |   )
14 |   if (file.exists("stan")) {
15 |     warning("src/stan/ already exists. Not copying models from inst/stan/.")
16 |   } else {
17 |     message("Copying inst/stan/ to src/stan/.")
18 |     fs::dir_copy(path = inst_stan, new_path = "stan")
19 |   }
20 | }
21 | bin <- file.path(R_PACKAGE_DIR, "bin")
22 | if (!file.exists(bin)) {
23 |   dir.create(bin, recursive = TRUE, showWarnings = FALSE)
24 | }
25 | bin_stan <- file.path(bin, "stan")
26 | fs::dir_copy(path = "stan", new_path = bin_stan)
27 | callr::r(
28 |   func = function(bin_stan) {
29 |     instantiate::stan_package_compile(
30 |       models = instantiate::stan_package_model_files(path = bin_stan))
31 |   },
32 |   args = list(bin_stan = bin_stan)
33 | )
34 | 


--------------------------------------------------------------------------------
/man/print.stanFoot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/print_fun_foot.R
 3 | \name{print.stanFoot}
 4 | \alias{print.stanFoot}
 5 | \title{Print Method for stanFoot Objects}
 6 | \usage{
 7 | \method{print}{stanFoot}(x, pars = NULL, teams = NULL, digits = 3, true_names = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of class \code{stanFoot}.}
11 | 
12 | \item{pars}{Optional character vector specifying parameters to include in the summary. This can be specific parameter names (e.g., \code{"att"}, \code{"def"}, \code{"att_raw"}, \code{"def_raw"}, \code{"home"}, \code{"sigma_att"}, \code{"sigma_def"}, \code{"rho"}, and \code{"beta"}). If \code{NULL}, all parameters are included.}
13 | 
14 | \item{teams}{Optional character vector specifying team names whose \code{"att"}, \code{"def"}, \code{"att_raw"}, \code{"def_raw"} parameters should be displayed.}
15 | 
16 | \item{digits}{Number of digits to use when printing numeric values. Default is \code{3}.}
17 | 
18 | \item{true_names}{Logical value indicating whether to display team names in parameter summaries. Default is \code{TRUE}.}
19 | 
20 | \item{...}{Additional arguments passed.}
21 | }
22 | \description{
23 | Provides detailed posterior summaries for the Stan football model parameters.
24 | }
25 | \author{
26 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
27 | }
28 | 


--------------------------------------------------------------------------------
/man/print.btdFoot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/print_fun_foot.R
 3 | \name{print.btdFoot}
 4 | \alias{print.btdFoot}
 5 | \title{Print Method for btdFoot Objects}
 6 | \usage{
 7 | \method{print}{btdFoot}(
 8 |   x,
 9 |   pars = NULL,
10 |   teams = NULL,
11 |   digits = 3,
12 |   true_names = TRUE,
13 |   display = "both",
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{x}{An object of class \code{btdFoot}.}
19 | 
20 | \item{pars}{Optional character vector specifying parameters to include in the summary (e.g., \code{"logStrength"}, \code{"logTie"}, \code{"home"}, \code{"log_lik"}, and \code{"y_rep"}).}
21 | 
22 | \item{teams}{Optional character vector specifying team names whose \code{logStrength} parameters should be displayed.}
23 | 
24 | \item{digits}{Number of digits to use when printing numeric values. Default is \code{3}.}
25 | 
26 | \item{true_names}{Logical value indicating whether to display team names in parameter summaries. Default is \code{TRUE}.}
27 | 
28 | \item{display}{Character string specifying which parts of the output to display. Options are \code{"both"}, \code{"rankings"}, or \code{"parameters"}. Default is \code{"both"}.}
29 | 
30 | \item{...}{Additional arguments passed.}
31 | }
32 | \description{
33 | Provides detailed posterior summaries for the Bayesian Bradley-Terry-Davidson model parameters.
34 | }
35 | \author{
36 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
37 | }
38 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 | 
 8 | name: test-coverage.yaml
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   test-coverage:
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           dependencies: NA
28 |           extra-packages: |
29 |             R.rsp
30 |             knitr
31 |             rcmdcheck
32 |             rmarkdown
33 |             lme4
34 |             mice
35 |             BH
36 |             MCMCglmm
37 |             ape
38 |             stan-dev/cmdstanr
39 |             RcppEigen
40 |             testthat
41 |             emmeans
42 |             projpred
43 |             priorsense
44 |             RWiener
45 |             rtdists
46 |             extraDistr
47 |             spdep
48 |             mnormt
49 |             splines2
50 |             arm
51 |             statmod
52 |             diffobj
53 |             betareg
54 |             usethis
55 |             testthat
56 |             covr
57 |             xml2
58 | 
59 |       - name: Build Cmdstan
60 |         run: |
61 |           cmdstanr::check_cmdstan_toolchain(fix = TRUE)
62 |           cmdstanr::install_cmdstan(cores = parallel::detectCores())
63 |         shell: Rscript {0}
64 | 
65 |       - name: Test coverage
66 |         run: covr::codecov()
67 |         shell: Rscript {0}
68 | 


--------------------------------------------------------------------------------
/man/plot_logStrength.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_logStrength.R
 3 | \name{plot_logStrength}
 4 | \alias{plot_logStrength}
 5 | \title{Plot Rankings for btdFoot Objects}
 6 | \usage{
 7 | plot_logStrength(x, teams = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of class \code{btdFoot}.}
11 | 
12 | \item{teams}{An optional character vector specifying team names to include in the rankings plot. If \code{NULL}, all teams are included.}
13 | }
14 | \value{
15 | A \code{ggplot} object:
16 | \itemize{
17 |   \item Dynamic BTD: A lineplot for the \code{log_strengths} over each period, colored by team.
18 |   \item Static BTD: An horizontal barplot for each team.
19 | }
20 | }
21 | \description{
22 | Visualizes team rankings based on whether the ranking is dynamic or static.
23 | }
24 | \details{
25 | \itemize{
26 | \item Dynamic Ranking: Plots Rank Points over Periods for each team with lines and points.
27 | \item Static Ranking: Plots Rank Points on the x-axis against Team Names on the y-axis with horizontal lines and points.
28 | }
29 | }
30 | \examples{
31 | \dontrun{
32 | if (instantiate::stan_cmdstan_exists()) {
33 |   library(dplyr)
34 | 
35 |   data("italy")
36 | 
37 |   italy_2020_2021_rank <- italy \%>\%
38 |     select(Season, home, visitor, hgoal, vgoal) \%>\%
39 |     filter(Season == "2020" | Season == "2021") \%>\%
40 |     mutate(match_outcome = case_when(
41 |       hgoal > vgoal ~ 1, # Home team wins
42 |       hgoal == vgoal ~ 2, # Draw
43 |       hgoal < vgoal ~ 3 # Away team wins
44 |     )) \%>\%
45 |     mutate(periods = case_when(
46 |       row_number() <= 190 ~ 1,
47 |       row_number() <= 380 ~ 2,
48 |       row_number() <= 570 ~ 3,
49 |       TRUE ~ 4
50 |     )) \%>\% # Assign periods based on match number
51 |     select(periods,
52 |       home_team = home,
53 |       away_team = visitor, match_outcome
54 |     )
55 | 
56 |   fit_rank_dyn <- btd_foot(
57 |     data = italy_2020_2021_rank,
58 |     dynamic_rank = TRUE,
59 |     rank_measure = "median",
60 |     iter_sampling = 1000,
61 |     parallel_chains = 2,
62 |     chains = 2
63 |   )
64 | 
65 |   plot_logStrength(fit_rank_dyn)
66 | 
67 |   plot_logStrength(fit_rank_dyn, teams = c("AC Milan", "AS Roma", "Juventus", "Inter"))
68 | }
69 | }
70 | 
71 | }
72 | \author{
73 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}.
74 | }
75 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 | 
 8 | name: R-CMD-check.yaml
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   R-CMD-check:
14 |     runs-on: ${{ matrix.config.os }}
15 | 
16 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
17 | 
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         config:
22 |           - {os: macos-latest,   r: 'release'}
23 |           - {os: windows-latest, r: 'release'}
24 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
25 |           - {os: ubuntu-latest,   r: 'release'}
26 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
27 | 
28 |     env:
29 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
30 |       R_KEEP_PKG_SOURCE: yes
31 | 
32 |     steps:
33 |       - uses: actions/checkout@v4
34 | 
35 |       - uses: r-lib/actions/setup-pandoc@v2
36 | 
37 |       - uses: r-lib/actions/setup-r@v2
38 |         with:
39 |           r-version: ${{ matrix.config.r }}
40 |           http-user-agent: ${{ matrix.config.http-user-agent }}
41 |           use-public-rspm: true
42 | 
43 |       - uses: r-lib/actions/setup-r-dependencies@v2
44 |         with:
45 |           dependencies: NA
46 |           extra-packages: |
47 |             R.rsp
48 |             knitr
49 |             rcmdcheck
50 |             rmarkdown
51 |             lme4
52 |             mice
53 |             BH
54 |             MCMCglmm
55 |             ape
56 |             stan-dev/cmdstanr
57 |             RcppEigen
58 |             testthat
59 |             emmeans
60 |             projpred
61 |             priorsense
62 |             RWiener
63 |             rtdists
64 |             extraDistr
65 |             spdep
66 |             mnormt
67 |             splines2
68 |             arm
69 |             statmod
70 |             diffobj
71 |             betareg
72 | 
73 |       - name: Build Cmdstan
74 |         run: |
75 |           cmdstanr::check_cmdstan_toolchain(fix = TRUE)
76 |           cmdstanr::install_cmdstan(cores = parallel::detectCores())
77 |         shell: Rscript {0}
78 | 
79 |       - uses: r-lib/actions/check-r-package@v2
80 |         with:
81 |           upload-snapshots: true
82 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
83 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: footBayes
 2 | Type: Package
 3 | Title: Fitting Bayesian and MLE Football Models
 4 | Version: 2.1.0
 5 | Date: 2025-08-07
 6 | Authors@R: c(person(given = "Leonardo",
 7 |                       family = "Egidi",
 8 |                       role = c("aut", "cre"),
 9 |                       email = "legidi@units.it"),
10 |                person(given = "Roberto",
11 |                       family = "Macrì Demartino",
12 |                       role = "aut"),
13 |                person(given = "Vasilis",
14 |                       family = "Palaskas.",
15 |                       role = "aut"))
16 | Maintainer: Leonardo Egidi <legidi@units.it>
17 | License: GPL-2
18 | Description: This is the first package allowing for the estimation,
19 |              visualization and prediction of the most well-known 
20 |              football models: double Poisson, bivariate Poisson,
21 |              Skellam, student_t, diagonal-inflated bivariate Poisson, and
22 |              zero-inflated Skellam. It supports both maximum likelihood estimation (MLE, for 
23 |              'static' models only) and Bayesian inference.
24 |              For Bayesian methods, it incorporates several techniques:
25 |              MCMC sampling with Hamiltonian Monte Carlo, variational inference using
26 |              either the Pathfinder algorithm or Automatic Differentiation Variational
27 |              Inference (ADVI), and the Laplace approximation.
28 |              The package compiles all the 'CmdStan' models once during installation
29 |              using the 'instantiate' package.
30 |              The model construction relies on the most well-known football references, such as 
31 |              Dixon and Coles (1997) <doi:10.1111/1467-9876.00065>,
32 |              Karlis and Ntzoufras (2003) <doi:10.1111/1467-9884.00366> and
33 |              Egidi, Pauli and Torelli (2018) <doi:10.1177/1471082X18798414>.
34 | URL: https://github.com/leoegidi/footbayes
35 | BugReports: https://github.com/leoegidi/footbayes/issues
36 | Encoding: UTF-8
37 | SystemRequirements: CmdStan (https://mc-stan.org/users/interfaces/cmdstan), pandoc (>= 1.12.3), pandoc-citeproc
38 | Depends: R (>= 4.2.0)
39 | Imports: rstan (>= 2.18.1), instantiate, reshape2, ggplot2, ggridges, matrixStats, 
40 |          extraDistr, metRology, dplyr, tidyr, numDeriv, magrittr, rlang, posterior
41 | Suggests: 
42 |     testthat (>= 3.0.0),
43 |     knitr (>= 1.37),
44 |     rmarkdown (>= 2.10),
45 |     loo,
46 |     bayesplot,
47 |     cmdstanr (>= 0.6.0)
48 | Additional_repositories: https://stan-dev.r-universe.dev/
49 | RoxygenNote: 7.3.2
50 | VignetteBuilder: knitr
51 | LazyData: true
52 | BuildManual: yes
53 | Config/testthat/edition: 3
54 | 


--------------------------------------------------------------------------------
/man/foot_rank.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/foot_rank.R
 3 | \name{foot_rank}
 4 | \alias{foot_rank}
 5 | \title{Rank and points predictions}
 6 | \usage{
 7 | foot_rank(object, data, teams = NULL, visualize = "individual")
 8 | }
 9 | \arguments{
10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, or \code{stanfit}.}
11 | 
12 | \item{data}{A data frame containing match data with columns:
13 | \itemize{
14 |   \item \code{periods}:  Time point of each observation (integer >= 1).
15 |   \item \code{home_team}: Home team's name (character string).
16 |   \item \code{away_team}: Away team's name (character string).
17 |   \item \code{home_goals}: Goals scored by the home team (integer >= 0).
18 |   \item \code{away_goals}: Goals scored by the away team (integer >= 0).
19 | }}
20 | 
21 | \item{teams}{An optional character vector specifying team names to include. If \code{NULL}, all teams are included.}
22 | 
23 | \item{visualize}{Type of plots, one among \code{"aggregated"} or \code{"individual"}. Default is \code{"individual"}.}
24 | }
25 | \value{
26 | If \code{visualize = "aggregated"}: a list with
27 | \itemize{
28 |   \item{\code{rank_table}}: A data frame of observed and simulated final points (median, 25\%/75\% quantiles).
29 |   \item{\code{rank_plot}}: A \code{ggplot} comparing observed vs simulated final points for each team.
30 | }
31 | 
32 | If \code{visualize = "individual"}: A \code{ggplot} showing, for each selected team, the observed and
33 | simulated cumulative points over match‑days.
34 | }
35 | \description{
36 | Posterior predictive plots and final rank table for football seasons.
37 | }
38 | \details{
39 | For Bayesian models fitted via \code{stan_foot} the final rank tables are computed according to the
40 | simulation from the posterior predictive distribution of future (out-of-sample) matches.
41 | The dataset should refer to one or more seasons from a given national football league (Premier League, Serie A, La Liga, etc.).
42 | }
43 | \examples{
44 | \dontrun{
45 | if (instantiate::stan_cmdstan_exists()) {
46 |   library(dplyr)
47 | 
48 |   data("italy")
49 |   italy_1999_2000 <- italy \%>\%
50 |     dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\%
51 |     dplyr::filter(Season == "1999" | Season == "2000")
52 | 
53 |   colnames(italy_1999_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
54 | 
55 |   fit <- stan_foot(italy_1999_2000, "double_pois", iter_sampling = 200)
56 |   foot_rank(fit, italy_1999_2000)
57 |   foot_rank(fit, italy_1999_2000, visualize = "individual")
58 | }
59 | }
60 | 
61 | }
62 | \author{
63 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
64 | }
65 | 


--------------------------------------------------------------------------------
/man/foot_round_robin.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/foot_round_robin.R
 3 | \name{foot_round_robin}
 4 | \alias{foot_round_robin}
 5 | \title{Round-robin for football leagues}
 6 | \usage{
 7 | foot_round_robin(object, data, teams = NULL, output = "both")
 8 | }
 9 | \arguments{
10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}.}
11 | 
12 | \item{data}{A data frame containing match data with columns:
13 | \itemize{
14 |   \item \code{periods}:  Time point of each observation (integer >= 1).
15 |   \item \code{home_team}: Home team's name (character string).
16 |   \item \code{away_team}: Away team's name (character string).
17 |   \item \code{home_goals}: Goals scored by the home team (integer >= 0).
18 |   \item \code{away_goals}: Goals scored by the away team (integer >= 0).
19 | }}
20 | 
21 | \item{teams}{An optional character vector specifying team names to include. If \code{NULL}, all teams are included.}
22 | 
23 | \item{output}{An optional character string specifying the type of output to return. One of \code{"both"}, \code{"table"},
24 | or \code{"plot"}. Default is \code{"both"}.}
25 | }
26 | \value{
27 | If \code{output = "both"} a list with:
28 | \itemize{
29 |   \item{\code{round_table}}: A data frame of matchups (\code{Home}, \code{Away}), observed scores, and \code{Home_prob} (median posterior probability of a home win).
30 |   \item{\code{round_plot}}: A \code{ggplot} heatmap of home‑win probabilities with observed scores overlaid.
31 | }
32 | If \code{output = "table"} or \code{"plot"}, returns only that component.
33 | }
34 | \description{
35 | Posterior predictive probabilities for a football season in a round-robin format
36 | }
37 | \details{
38 | For Bayesian models fitted via \code{stan_foot} the round-robin table is computed according to the
39 | simulation from the posterior predictive distribution of future (out-of-sample) matches.
40 | The dataset should refer to one or more seasons from a given national football league (Premier League, Serie A, La Liga, etc.).
41 | }
42 | \examples{
43 | \dontrun{
44 | if (instantiate::stan_cmdstan_exists()) {
45 |   library(dplyr)
46 | 
47 |   data("italy")
48 |   italy_1999_2000 <- italy \%>\%
49 |     dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\%
50 |     dplyr::filter(Season == "1999" | Season == "2000")
51 | 
52 |   colnames(italy_1999_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
53 | 
54 |   fit <- stan_foot(italy_1999_2000, "double_pois", predict = 45, iter_sampling = 200)
55 | 
56 |   foot_round_robin(fit, italy_1999_2000)
57 |   foot_round_robin(fit, italy_1999_2000, c("Parma AC", "AS Roma"))
58 | }
59 | }
60 | }
61 | \author{
62 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
63 | }
64 | 


--------------------------------------------------------------------------------
/man/foot_prob.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/foot_prob.R
 3 | \name{foot_prob}
 4 | \alias{foot_prob}
 5 | \title{Plot football matches probabilities for out-of-sample football matches.}
 6 | \usage{
 7 | foot_prob(object, data, home_team, away_team)
 8 | }
 9 | \arguments{
10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{\link[rstan]{stanfit}}, or class
11 | \code{\link{list}} containing the Maximum Likelihood Estimates (MLE) for the model parameters fitted
12 |  with \code{mle_foot}.}
13 | 
14 | \item{data}{A data frame containing match data with columns:
15 | \itemize{
16 |   \item \code{periods}:  Time point of each observation (integer >= 1).
17 |   \item \code{home_team}: Home team's name (character string).
18 |   \item \code{away_team}: Away team's name (character string).
19 |   \item \code{home_goals}: Goals scored by the home team (integer >= 0).
20 |   \item \code{away_goals}: Goals scored by the away team (integer >= 0).
21 | }}
22 | 
23 | \item{home_team}{The home team(s) for the predicted matches.}
24 | 
25 | \item{away_team}{The away team(s) for the predicted matches.}
26 | }
27 | \value{
28 | A list with components:
29 |  \itemize{
30 |  \item \code{prob_table}: A data frame containing the results probabilities of the out-of-sample matches.
31 |  \item \code{prob_plot}: A \code{ggplot} object for Bayesian models only showing the posterior predictive heatmap
32 |   of exact score probabilities, with the true result highlighted.
33 | }
34 | }
35 | \description{
36 | The function provides a table containing the home win, draw and away win probabilities for a bunch of
37 | out-of-sample matches as specified by \code{stan_foot} or \code{mle_foot}.
38 | }
39 | \details{
40 | For Bayesian models the results probabilities are computed according to the
41 | simulation from the posterior predictive distribution of future (out-of-sample) matches.
42 | Specifically, matches are ordered from those in which the favorite team has the highest posterior probability
43 | of winning to those where the underdog is more likely to win. For MLE models
44 | fitted via the \code{mle_foot} the probabilities are computed by simulating from the MLE estimates.
45 | }
46 | \examples{
47 | \dontrun{
48 | if (instantiate::stan_cmdstan_exists()) {
49 |   library(dplyr)
50 | 
51 |   data("italy")
52 |   italy_2000 <- italy \%>\%
53 |     dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\%
54 |     dplyr::filter(Season == "2000")
55 | 
56 |   colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
57 | 
58 | 
59 | 
60 |   fit <- stan_foot(
61 |     data = italy_2000,
62 |     model = "double_pois",
63 |     predict = 18
64 |   ) # double pois
65 | 
66 |   foot_prob(
67 |     fit, italy_2000, "Inter",
68 |     "Bologna FC"
69 |   )
70 | 
71 |   foot_prob(fit, italy_2000) # all the out-of-sample matches
72 | }
73 | }
74 | 
75 | }
76 | \author{
77 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}.
78 | }
79 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,btdFoot)
 4 | S3method(print,compareFoot)
 5 | S3method(print,stanFoot)
 6 | export(btd_foot)
 7 | export(cauchy)
 8 | export(compare_foot)
 9 | export(foot_abilities)
10 | export(foot_prob)
11 | export(foot_rank)
12 | export(foot_round_robin)
13 | export(laplace)
14 | export(mle_foot)
15 | export(normal)
16 | export(plot_btdPosterior)
17 | export(plot_logStrength)
18 | export(pp_foot)
19 | export(stan_foot)
20 | export(student_t)
21 | import(matrixStats)
22 | importFrom(dplyr,across)
23 | importFrom(dplyr,arrange)
24 | importFrom(dplyr,distinct)
25 | importFrom(dplyr,group_by)
26 | importFrom(dplyr,mutate)
27 | importFrom(dplyr,select)
28 | importFrom(dplyr,summarise)
29 | importFrom(dplyr,ungroup)
30 | importFrom(dplyr,where)
31 | importFrom(extraDistr,dbvpois)
32 | importFrom(extraDistr,dskellam)
33 | importFrom(extraDistr,rbvpois)
34 | importFrom(extraDistr,rskellam)
35 | importFrom(ggplot2,aes)
36 | importFrom(ggplot2,after_stat)
37 | importFrom(ggplot2,annotate)
38 | importFrom(ggplot2,dup_axis)
39 | importFrom(ggplot2,element_blank)
40 | importFrom(ggplot2,element_text)
41 | importFrom(ggplot2,expansion)
42 | importFrom(ggplot2,facet_wrap)
43 | importFrom(ggplot2,geom_boxplot)
44 | importFrom(ggplot2,geom_errorbarh)
45 | importFrom(ggplot2,geom_line)
46 | importFrom(ggplot2,geom_point)
47 | importFrom(ggplot2,geom_rect)
48 | importFrom(ggplot2,geom_ribbon)
49 | importFrom(ggplot2,geom_segment)
50 | importFrom(ggplot2,geom_text)
51 | importFrom(ggplot2,geom_tile)
52 | importFrom(ggplot2,geom_vline)
53 | importFrom(ggplot2,ggplot)
54 | importFrom(ggplot2,ggtitle)
55 | importFrom(ggplot2,guide_legend)
56 | importFrom(ggplot2,guides)
57 | importFrom(ggplot2,labs)
58 | importFrom(ggplot2,lims)
59 | importFrom(ggplot2,position_dodge)
60 | importFrom(ggplot2,rel)
61 | importFrom(ggplot2,scale_color_manual)
62 | importFrom(ggplot2,scale_colour_manual)
63 | importFrom(ggplot2,scale_fill_gradient)
64 | importFrom(ggplot2,scale_fill_manual)
65 | importFrom(ggplot2,scale_x_continuous)
66 | importFrom(ggplot2,scale_x_discrete)
67 | importFrom(ggplot2,scale_y_continuous)
68 | importFrom(ggplot2,scale_y_discrete)
69 | importFrom(ggplot2,theme)
70 | importFrom(ggplot2,theme_bw)
71 | importFrom(ggplot2,unit)
72 | importFrom(ggplot2,xlab)
73 | importFrom(ggplot2,ylab)
74 | importFrom(ggplot2,ylim)
75 | importFrom(ggridges,stat_density_ridges)
76 | importFrom(instantiate,stan_package_model)
77 | importFrom(magrittr,"%>%")
78 | importFrom(matrixStats,colMedians)
79 | importFrom(matrixStats,colQuantiles)
80 | importFrom(matrixStats,colVars)
81 | importFrom(metRology,dt.scaled)
82 | importFrom(metRology,rt.scaled)
83 | importFrom(numDeriv,hessian)
84 | importFrom(posterior,as_draws_rvars)
85 | importFrom(posterior,draws_of)
86 | importFrom(reshape2,melt)
87 | importFrom(rlang,.data)
88 | importFrom(rstan,extract)
89 | importFrom(stats,approx)
90 | importFrom(stats,density)
91 | importFrom(stats,median)
92 | importFrom(stats,quantile)
93 | importFrom(stats,setNames)
94 | importFrom(tidyr,pivot_longer)
95 | importFrom(tidyr,pivot_wider)
96 | 


--------------------------------------------------------------------------------
/man/pp_foot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pp_foot.R
 3 | \name{pp_foot}
 4 | \alias{pp_foot}
 5 | \title{Posterior predictive checks for football models}
 6 | \usage{
 7 | pp_foot(object, data, type = "aggregated", coverage = 0.95)
 8 | }
 9 | \arguments{
10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}.}
11 | 
12 | \item{data}{A data frame containing match data with columns:
13 | \itemize{
14 |   \item \code{periods}:  Time point of each observation (integer >= 1).
15 |   \item \code{home_team}: Home team's name (character string).
16 |   \item \code{away_team}: Away team's name (character string).
17 |   \item \code{home_goals}: Goals scored by the home team (integer >= 0).
18 |   \item \code{away_goals}: Goals scored by the away team (integer >= 0).
19 | }}
20 | 
21 | \item{type}{Type of plots, one among \code{"aggregated"} or \code{"matches"}. Default is \code{"aggregated"}.}
22 | 
23 | \item{coverage}{Argument to specify the width \eqn{1-\alpha} of posterior probability intervals. Default is 0.95.}
24 | }
25 | \value{
26 | A list with elements:
27 | \itemize{
28 |   \item{\code{pp_plot}}: A \code{ggplot} object for the selected \code{type} of plot.
29 |   \item{\code{pp_table}}: A data frame of summary statistics:
30 |     \itemize{
31 |       \item For \code{"aggregated"}: Goal differences and their Bayesian p‑values.
32 |       \item For \code{"matches"}: Nominal \code{1-alpha} and observed empirical Bayesian coverage.
33 |     }
34 | }
35 | }
36 | \description{
37 | The function provides posterior predictive plots to check the adequacy of the Bayesian models as
38 | returned by the \code{stan_foot} function.
39 | }
40 | \details{
41 | Posterior predictive plots: when \code{"aggregated"} (default) is selected, the function
42 | returns a frequency plot for some pre-selected goal-difference values,
43 | along with their correspondent Bayesian p-values, computed as
44 | \eqn{Pr(y_{rep} \ge y)|y)}, where \eqn{y_{rep}} is a data replication from the
45 | posterior predictive distribution (more details in Gelman et al., 2013).
46 | Bayesian p-values very close to 0 or 1 could exhibit
47 | possible model misfits.
48 | 
49 | When \code{"matches"} is selected an ordered-frequency plot for all the
50 | goal-differences in the considered matches is provided, along with the
51 | empirical Bayesian coverage at level \eqn{1-\alpha}.
52 | }
53 | \examples{
54 | \dontrun{
55 | if (instantiate::stan_cmdstan_exists()) {
56 |   library(dplyr)
57 | 
58 |   data("italy")
59 |   italy_2000 <- italy \%>\%
60 |     dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\%
61 |     dplyr::filter(Season == "2000")
62 | 
63 |   colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
64 | 
65 |   fit <- stan_foot(italy_2000, "double_pois", iter_sampling = 200)
66 | 
67 |   pp_foot(fit, italy_2000)
68 | }
69 | }
70 | 
71 | }
72 | \references{
73 | Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., & Rubin, D. B. (2013). Bayesian data analysis. CRC press.
74 | }
75 | \author{
76 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
77 | }
78 | 


--------------------------------------------------------------------------------
/man/foot_abilities.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/foot_abilities.R
 3 | \name{foot_abilities}
 4 | \alias{foot_abilities}
 5 | \title{Plot football abilities from Stan and MLE models}
 6 | \usage{
 7 | foot_abilities(object, data, type = "both", teams = NULL)
 8 | }
 9 | \arguments{
10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}, or class
11 | \code{\link{list}} containing the Maximum Likelihood Estimates (MLE) for the model parameters fitted
12 |  with \code{mle_foot}.}
13 | 
14 | \item{data}{A data frame containing match data with columns:
15 | \itemize{
16 |   \item \code{periods}:  Time point of each observation (integer >= 1).
17 |   \item \code{home_team}: Home team's name (character string).
18 |   \item \code{away_team}: Away team's name (character string).
19 |   \item \code{home_goals}: Goals scored by the home team (integer >= 0).
20 |   \item \code{away_goals}: Goals scored by the away team (integer >= 0).
21 | }}
22 | 
23 | \item{type}{Type of ability in Poisson models: one among \code{"defense"}, \code{"attack"} or \code{"both"}. Default is \code{"both"}.}
24 | 
25 | \item{teams}{An optional character vector specifying team names to include. If \code{NULL}, all teams are included.}
26 | }
27 | \value{
28 | A \code{ggplot} object showing each selected team’s ability estimates:
29 | \itemize{
30 |   \item For static Bayesian or MLE fits, horizontal error bars
31 |    (95\% intervals) and point estimates.
32 |   \item For dynamic Bayesian fits, ribbon and line plots over \code{periods}.
33 | }
34 | }
35 | \description{
36 | Depicts teams' abilities either from the Stan models fitted via the \code{stan_foot} function
37 | or from MLE models fitted via the \code{mle_foot} function.
38 | }
39 | \examples{
40 | \dontrun{
41 | if (instantiate::stan_cmdstan_exists()) {
42 |   library(dplyr)
43 | 
44 |   data("italy")
45 |   italy <- as_tibble(italy)
46 | 
47 |   ### no dynamics, no prediction
48 | 
49 |   italy_2000_2002 <- italy \%>\%
50 |     dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\%
51 |     dplyr::filter(Season == "2000" | Season == "2001" | Season == "2002")
52 | 
53 |   colnames(italy_2000_2002) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
54 | 
55 |   fit1 <- stan_foot(
56 |     data = italy_2000_2002,
57 |     model = "double_pois"
58 |   ) # double poisson
59 | 
60 |   fit2 <- stan_foot(
61 |     data = italy_2000_2002,
62 |     model = "biv_pois"
63 |   ) # bivariate poisson
64 | 
65 |   fit3 <- stan_foot(
66 |     data = italy_2000_2002,
67 |     model = "skellam"
68 |   ) # skellam
69 | 
70 |   fit4 <- stan_foot(
71 |     data = italy_2000_2002,
72 |     model = "student_t"
73 |   ) # student_t
74 | 
75 |   foot_abilities(fit1, italy_2000_2002)
76 |   foot_abilities(fit2, italy_2000_2002)
77 |   foot_abilities(fit3, italy_2000_2002)
78 |   foot_abilities(fit4, italy_2000_2002)
79 | 
80 |   ### seasonal dynamics, predict the last season
81 | 
82 |   fit5 <- stan_foot(
83 |     data = italy_2000_2002,
84 |     model = "biv_pois",
85 |     predict = 180,
86 |     dynamic_type = "seasonal"
87 |   ) # bivariate poisson
88 |   foot_abilities(fit5, italy_2000_2002)
89 | }
90 | }
91 | }
92 | \author{
93 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}.
94 | }
95 | 


--------------------------------------------------------------------------------
/src/stan/static_btd.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |       int<lower=1> N;          // Number of observations
 3 |       int<lower=1> nteams;          // Number of teams
 4 |       array[N] int<lower=1, upper=nteams> team1;  // Index of team1 in each observation
 5 |       array[N] int<lower=1, upper=nteams> team2;  // Index of team2 in each observation
 6 |       real mean_logStrength;                // Initial mean for logStrength
 7 |       real<lower=1e-8> sd_logStrength;         // Standard deviation for logStrength
 8 |       real mean_logTie;
 9 |       real<lower=1e-8> sd_logTie;
10 |       array[N] int<lower=1, upper=3> y;      // Outcome: 1 if team1 beats team2, 3 if team2 beats team1, 2 for tie
11 |       int<lower=0, upper=1> ind_home;        // Home effect indicator
12 |       real mean_home;              // Mean for home effect
13 |       real<lower=1e-8> sd_home;      // Standard deviation for home effect
14 |     }
15 |     parameters {
16 |       vector[nteams] logStrength_raw;          // Log strength parameters for each team (static)
17 |       real logTie;             // Log tie parameter
18 |       real home;                  // Home team effect parameter
19 |     }
20 | 
21 |     transformed parameters {
22 |       real adj_h_eff;
23 |       vector[nteams] logStrength;
24 | 
25 |       adj_h_eff = home * ind_home;
26 | 
27 |       // Sum-to-zero constraint for log-strength parameters
28 |       logStrength = logStrength_raw - mean(logStrength_raw);
29 |     }
30 | 
31 |     model {
32 |       // Priors for strengths
33 |       logStrength_raw ~ normal(mean_logStrength, sd_logStrength);
34 | 
35 |       // Prior for tie parameter
36 |       logTie ~ normal(mean_logTie, sd_logTie);
37 | 
38 |       // Prior for the home effect
39 | 
40 |       home ~ normal(mean_home, sd_home);
41 | 
42 |       // Likelihood
43 |       for (n in 1:N) {
44 |         real delta_team1 = exp(logStrength[team1[n]] + adj_h_eff);
45 |         real delta_team2 = exp(logStrength[team2[n]]);
46 |         real nu = exp(logTie);
47 |         real denom = delta_team1 + delta_team2 + (nu * sqrt(delta_team1 * delta_team2));
48 |         real p_i_win = delta_team1 / denom;
49 |         real p_j_win = delta_team2 / denom;
50 |         real p_tie = (nu * sqrt(delta_team1 * delta_team2)) / denom;
51 |         if (y[n] == 1) {
52 |           target += log(p_i_win);
53 |         } else if (y[n] == 3) {
54 |           target += log(p_j_win);
55 |         } else if (y[n] == 2) {
56 |           target += log(p_tie);
57 |         }
58 |       }
59 |     }
60 | 
61 | generated quantities {
62 |     // Log-likelihood vector
63 |     vector[N] log_lik;
64 | 
65 |     // Posterior predictive vector
66 |     array[N] int y_rep;
67 | 
68 |     for (n in 1:N) {
69 |         // Delta values
70 |         real delta_team1 = exp(logStrength[team1[n]] + adj_h_eff);
71 |         real delta_team2 = exp(logStrength[team2[n]]);
72 |         real nu = exp(logTie);
73 |         real denom = delta_team1 + delta_team2 + (nu * sqrt(delta_team1 * delta_team2));
74 | 
75 |         // Probabilities
76 |         real p_i_win = delta_team1 / denom;
77 |         real p_j_win = delta_team2 / denom;
78 |         real p_tie = (nu * sqrt(delta_team1 * delta_team2)) / denom;
79 | 
80 |         // Log-likelihood
81 |         if (y[n] == 1) {
82 |             log_lik[n] = log(p_i_win);
83 |         } else if (y[n] == 3) {
84 |             log_lik[n] = log(p_j_win);
85 |         } else if (y[n] == 2) {
86 |             log_lik[n] = log(p_tie);
87 |         }
88 | 
89 |         // Posterior predictive samples
90 |         vector[3] probs;
91 |         probs[1] = p_i_win;
92 |         probs[2] = p_tie;
93 |         probs[3] = p_j_win;
94 | 
95 |         y_rep[n] = categorical_rng(probs);
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/man/plot_btdPosterior.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/plot_btdPosterior.R
  3 | \name{plot_btdPosterior}
  4 | \alias{plot_btdPosterior}
  5 | \title{Plot Posterior Distributions for \code{btdFoot} Objects}
  6 | \usage{
  7 | plot_btdPosterior(
  8 |   x,
  9 |   pars = "logStrength",
 10 |   plot_type = "boxplot",
 11 |   teams = NULL,
 12 |   ncol = NULL,
 13 |   scales = NULL
 14 | )
 15 | }
 16 | \arguments{
 17 | \item{x}{An object of class \code{btdFoot}.}
 18 | 
 19 | \item{pars}{A character string specifying the parameter to plot.
 20 | Choices are \code{"logStrength"}, \code{"logTie"}, and \code{"home"}.
 21 | Default is \code{"logStrength"}.}
 22 | 
 23 | \item{plot_type}{A character string specifying the type of plot.
 24 | Choices are \code{"boxplot"} and \code{"density"}.
 25 | Default is \code{"boxplot"}.}
 26 | 
 27 | \item{teams}{An optional character vector specifying team names to include in the posterior
 28 | boxplots or density plots. If \code{NULL}, all teams are included.}
 29 | 
 30 | \item{ncol}{An optional integer specifying the number of columns in the facet wrap
 31 | when using a dynamic Bayesian Bradley-Terry-Davidson model.
 32 | Default is \code{8}.}
 33 | 
 34 | \item{scales}{An optional character string specifying the scales for the facets
 35 | when using a dynamic Bayesian Bradley-Terry-Davidson model.
 36 | Options include \code{"free"}, \code{"fixed"}, \code{"free_x"}, and \code{"free_y"}.
 37 | Default is \code{"free_x"}.}
 38 | }
 39 | \value{
 40 | A \code{ggplot} object displaying:
 41 | \itemize{
 42 |   \item For \code{pars="logStrength"}:
 43 |     \itemize{
 44 |       \item Dynamic BTD: Faceted boxplots or density plots (including the 95\% credible interval) of posterior log-strengths by team and period.
 45 |       \item Static BTD: Boxplots or density plots (including the 95\% credible interval) of posterior log-strengths for each team.
 46 |     }
 47 |   \item For \code{pars="logTie"} or \code{pars="home"}: A single boxplot or density plot with 95\% credible interval.
 48 | }
 49 | }
 50 | \description{
 51 | Plots for the posterior distributions of team log-strengths and other parameters with customizable plot types and facets.
 52 | }
 53 | \examples{
 54 | \dontrun{
 55 | if (instantiate::stan_cmdstan_exists()) {
 56 |   library(dplyr)
 57 | 
 58 |   # Load example data
 59 |   data("italy")
 60 | 
 61 |   # Prepare the data
 62 |   italy_2020_2021_rank <- italy \%>\%
 63 |     select(Season, home, visitor, hgoal, vgoal) \%>\%
 64 |     filter(Season \%in\% c("2020", "2021")) \%>\%
 65 |     mutate(match_outcome = case_when(
 66 |       hgoal > vgoal ~ 1, # Home team wins
 67 |       hgoal == vgoal ~ 2, # Draw
 68 |       hgoal < vgoal ~ 3 # Away team wins
 69 |     )) \%>\%
 70 |     mutate(periods = case_when(
 71 |       row_number() <= 190 ~ 1,
 72 |       row_number() <= 380 ~ 2,
 73 |       row_number() <= 570 ~ 3,
 74 |       TRUE ~ 4
 75 |     )) \%>\% # Assign periods based on match number
 76 |     select(periods,
 77 |       home_team = home,
 78 |       away_team = visitor, match_outcome
 79 |     )
 80 | 
 81 |   # Fit the Bayesian Bradley-Terry-Davidson model with dynamic ranking
 82 |   fit_rank_dyn <- btd_foot(
 83 |     data = italy_2020_2021_rank,
 84 |     dynamic_rank = TRUE,
 85 |     rank_measure = "median",
 86 |     iter_sampling = 1000,
 87 |     parallel_chains = 2,
 88 |     chains = 2
 89 |   )
 90 | 
 91 |   # Plot posterior distributions with default settings
 92 |   plot_btdPosterior(fit_rank_dyn)
 93 | 
 94 |   # Plot posterior distributions for specific teams with customized facets
 95 |   plot_btdPosterior(
 96 |     fit_rank_dyn,
 97 |     teams = c("AC Milan", "AS Roma", "Juventus", "Inter"),
 98 |     ncol = 2
 99 |   )
100 | 
101 |   plot_btdPosterior(
102 |     fit_rank_dyn,
103 |     plot_type = "density",
104 |     teams = c("AC Milan", "AS Roma", "Juventus", "Inter"),
105 |     ncol = 2
106 |   )
107 | }
108 | }
109 | }
110 | \author{
111 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}.
112 | }
113 | 


--------------------------------------------------------------------------------
/src/stan/dynamic_btd.stan:
--------------------------------------------------------------------------------
  1 | data {
  2 |       int<lower=1> N;          // Number of observations
  3 |       int<lower=1> nteams;          // Number of teams
  4 |       int<lower=1> ntimes_rank;      // Number of time points
  5 |       array[N] int<lower=1, upper=ntimes_rank> instants_rank;  // Time point of each observation
  6 |       array[N] int<lower=1, upper=nteams> team1;  // Index of team1 in each observation
  7 |       array[N] int<lower=1, upper=nteams> team2;  // Index of team2 in each observation
  8 |       real mean_logStrength;                // Initial mean for logStrength
  9 |       real<lower=1e-8> sd_logStrength;         // Standard deviation of the AR(1) process
 10 |       real mean_logTie;
 11 |       real<lower=1e-8> sd_logTie;
 12 |       array[N] int<lower=1, upper=3> y;      // Outcome: 1 if team1 beats team2, 3 if team2 beats team1, 2 for tie
 13 |       int<lower=0, upper=1> ind_home;        // Home effect indicator
 14 |       real mean_home;              // Mean for home effect
 15 |       real<lower=1e-8> sd_home;      // Standard deviation for home effect
 16 |   }
 17 | 
 18 |   parameters {
 19 |       matrix[ntimes_rank, nteams] logStrength_raw;     // Log strength parameters for each team over time
 20 |       real logTie;               // Log tie parameter
 21 |       real home;                  // Home team effect parameter
 22 |   }
 23 | 
 24 |   transformed parameters {
 25 |       real adj_h_eff;
 26 |       matrix[ntimes_rank, nteams] logStrength;
 27 | 
 28 | 
 29 |       // Sum-to-zero constraint for log-strength parameters
 30 |       logStrength[1]=logStrength_raw[1]-mean(logStrength_raw[1]);
 31 |       for (t in 2:ntimes_rank){
 32 |         logStrength[t]=logStrength_raw[t]-mean(logStrength_raw[t]);
 33 |       }
 34 | 
 35 |       adj_h_eff = home * ind_home;
 36 |   }
 37 | 
 38 |   model {
 39 |       // Priors for initial strengths
 40 |       for (k in 1:nteams) {
 41 |           logStrength[1, k] ~ normal(mean_logStrength, sd_logStrength);
 42 |       }
 43 | 
 44 |       // Prior for tie parameter
 45 |       logTie ~ normal(mean_logTie, sd_logTie);
 46 | 
 47 |       // AR(1) process for strength parameters
 48 |       for (t_idx in 2:ntimes_rank) {
 49 |           for (k in 1:nteams) {
 50 |               logStrength_raw[t_idx, k] ~ normal(logStrength_raw[t_idx - 1, k], sd_logStrength);
 51 |           }
 52 |       }
 53 | 
 54 |       // Prior for the home effect
 55 |       home ~ normal(mean_home, sd_home);
 56 | 
 57 |       // Likelihood
 58 |       for (n in 1:N) {
 59 |           real delta_team1 = exp(logStrength[instants_rank[n], team1[n]] + adj_h_eff);
 60 |           real delta_team2 = exp(logStrength[instants_rank[n], team2[n]]);
 61 |           real nu = exp(logTie);
 62 |           real denom = delta_team1 + delta_team2 + (nu * sqrt(delta_team1 * delta_team2));
 63 |           real p_i_win = delta_team1 / denom;
 64 |           real p_j_win = delta_team2 / denom;
 65 |           real p_tie = (nu * sqrt(delta_team1 * delta_team2)) / denom;
 66 |           if (y[n] == 1) {
 67 |               target += log(p_i_win);
 68 |           } else if (y[n] == 3) {
 69 |               target += log(p_j_win);
 70 |           } else if (y[n] == 2) {
 71 |               target += log(p_tie);
 72 |           }
 73 |       }
 74 |   }
 75 | 
 76 |   generated quantities {
 77 |     // Log-likelihood vector
 78 |     vector[N] log_lik;
 79 | 
 80 |     // Posterior predictive vector
 81 |     array[N] int y_rep;
 82 | 
 83 |     for (n in 1:N) {
 84 |         // Delta values
 85 |         real delta_team1 = exp(logStrength[instants_rank[n], team1[n]] + adj_h_eff);
 86 |         real delta_team2 = exp(logStrength[instants_rank[n], team2[n]]);
 87 |         real nu = exp(logTie);
 88 |         real denom = delta_team1 + delta_team2 + (nu * sqrt(delta_team1 * delta_team2));
 89 | 
 90 |         // Probabilities
 91 |         real p_i_win = delta_team1 / denom;
 92 |         real p_j_win = delta_team2 / denom;
 93 |         real p_tie = (nu * sqrt(delta_team1 * delta_team2)) / denom;
 94 | 
 95 |         // Log-likelihood
 96 |         if (y[n] == 1) {
 97 |             log_lik[n] = log(p_i_win);
 98 |         } else if (y[n] == 3) {
 99 |             log_lik[n] = log(p_j_win);
100 |         } else if (y[n] == 2) {
101 |             log_lik[n] = log(p_tie);
102 |         }
103 | 
104 |         // Posterior predictive samples
105 |         vector[3] probs;
106 |         probs[1] = p_i_win;
107 |         probs[2] = p_tie;
108 |         probs[3] = p_j_win;
109 | 
110 |         y_rep[n] = categorical_rng(probs);
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/man/priors.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/foot_priors.R
  3 | \name{priors}
  4 | \alias{priors}
  5 | \alias{normal}
  6 | \alias{student_t}
  7 | \alias{cauchy}
  8 | \alias{laplace}
  9 | \title{Football priors distributions and options}
 10 | \usage{
 11 | normal(location = 0, scale = NULL, autoscale = TRUE)
 12 | 
 13 | student_t(df = 1, location = 0, scale = NULL, autoscale = TRUE)
 14 | 
 15 | cauchy(location = 0, scale = NULL, autoscale = TRUE)
 16 | 
 17 | laplace(location = 0, scale = NULL, autoscale = TRUE)
 18 | }
 19 | \arguments{
 20 | \item{location}{Prior location. In most cases, this is the prior mean, but
 21 | for \code{cauchy} (which is equivalent to \code{student_t} with
 22 | \code{df=1}), the mean does not exist and \code{location} is the prior
 23 | median. The default value is \eqn{0}.}
 24 | 
 25 | \item{scale}{Prior scale. The default depends on the family (see
 26 | \strong{Details}).}
 27 | 
 28 | \item{autoscale}{A logical scalar, defaulting to \code{TRUE}.}
 29 | 
 30 | \item{df}{Prior degrees of freedom. The default is \eqn{1} for
 31 | \code{student_t}, in which case it is equivalent to \code{cauchy}.}
 32 | }
 33 | \value{
 34 | A named list to be used internally by the
 35 |  \code{stan_foot} model fitting function.
 36 | }
 37 | \description{
 38 | This prior specification is just a duplicate
 39 | of some of the priors used by the \pkg{rstanarm} package.
 40 | 
 41 | These prior distributions can be passed to the
 42 | \code{stan_foot} function, through the arguments \code{prior} and \code{prior_sd}.
 43 | See  the vignette \href{http://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior
 44 |   Distributions for rstanarm Models}} for further details (to view the priors used for an existing model see
 45 |   \href{https://mc-stan.org/rstanarm/reference/prior_summary.stanreg.html}{prior_summary}).
 46 |   The default priors used in the \pkg{stan_foot} modeling function
 47 |   are intended to be \emph{weakly informative} in that they provide moderate
 48 |   regularlization and help stabilize computation.
 49 | 
 50 | You can choose between: \code{normal}, \code{cauchy}, \code{laplace}, \code{student_t}.
 51 | }
 52 | \details{
 53 | The details depend on the family of the prior being used:
 54 | \subsection{Student t family}{
 55 |   Family members:
 56 |   \itemize{
 57 |   \item \code{normal(location, scale)}
 58 |   \item \code{student_t(df, location, scale)}
 59 |   \item \code{cauchy(location, scale)}
 60 |   }
 61 |   Each of these functions also takes an argument \code{autoscale}.
 62 | 
 63 |   For the prior distribution for the intercept, \code{location},
 64 |   \code{scale}, and \code{df} should be scalars. For the prior for the other
 65 |   coefficients they can either be vectors of length equal to the number of
 66 |   coefficients (not including the intercept), or they can be scalars, in
 67 |   which case they will be recycled to the appropriate length. As the
 68 |   degrees of freedom approaches infinity, the Student t distribution
 69 |   approaches the normal distribution and if the degrees of freedom are one,
 70 |   then the Student t distribution is the Cauchy distribution.
 71 | 
 72 |   If \code{scale} is not specified it will default to \eqn{10} for the
 73 |   intercept and \eqn{2.5} for the other coefficients.
 74 | 
 75 |   If the \code{autoscale} argument is \code{TRUE} (the default), then the
 76 |   scales will be further adjusted as described above in the documentation of
 77 |   the \code{autoscale} argument in the \strong{Arguments} section.
 78 | }
 79 | 
 80 | \subsection{Laplace family}{
 81 |   Family members:
 82 |   \itemize{
 83 |   \item \code{laplace(location, scale)}
 84 |   }
 85 |   Each of these functions also takes an argument \code{autoscale}.
 86 | 
 87 |   The Laplace distribution is also known as the double-exponential
 88 |   distribution. It is a symmetric distribution with a sharp peak at its mean
 89 |   / median / mode and fairly long tails. This distribution can be motivated
 90 |   as a scale mixture of normal distributions and the remarks above about the
 91 |   normal distribution apply here as well.
 92 | 
 93 | }
 94 | }
 95 | \references{
 96 | Gelman, A., Jakulin, A., Pittau, M. G., and Su, Y. (2008). A weakly
 97 | informative default prior distribution for logistic and other regression
 98 | models. \emph{Annals of Applied Statistics}. 2(4), 1360--1383.
 99 | }
100 | \seealso{
101 | The various vignettes for the \pkg{rstanarm} package also discuss
102 |   and demonstrate the use of some of the supported prior distributions.
103 | }
104 | \author{
105 | Leonardo Egidi \email{legidi@units.it}
106 | }
107 | 


--------------------------------------------------------------------------------
/R/plot_logStrength.R:
--------------------------------------------------------------------------------
  1 | #' Plot Rankings for btdFoot Objects
  2 | #'
  3 | #' Visualizes team rankings based on whether the ranking is dynamic or static.
  4 | #'
  5 | #' \itemize{
  6 | #' \item Dynamic Ranking: Plots Rank Points over Periods for each team with lines and points.
  7 | #' \item Static Ranking: Plots Rank Points on the x-axis against Team Names on the y-axis with horizontal lines and points.
  8 | #' }
  9 | #'
 10 | #' @param x An object of class \code{btdFoot}.
 11 | #' @param teams An optional character vector specifying team names to include in the rankings plot. If \code{NULL}, all teams are included.
 12 | #'
 13 | #' @return A \code{ggplot} object:
 14 | #' \itemize{
 15 | #'   \item Dynamic BTD: A lineplot for the \code{log_strengths} over each period, colored by team.
 16 | #'   \item Static BTD: An horizontal barplot for each team.
 17 | #' }
 18 | #'
 19 | #' @author Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}.
 20 | #'
 21 | #'
 22 | #' @examples
 23 | #' \dontrun{
 24 | #' if (instantiate::stan_cmdstan_exists()) {
 25 | #'   library(dplyr)
 26 | #'
 27 | #'   data("italy")
 28 | #'
 29 | #'   italy_2020_2021_rank <- italy %>%
 30 | #'     select(Season, home, visitor, hgoal, vgoal) %>%
 31 | #'     filter(Season == "2020" | Season == "2021") %>%
 32 | #'     mutate(match_outcome = case_when(
 33 | #'       hgoal > vgoal ~ 1, # Home team wins
 34 | #'       hgoal == vgoal ~ 2, # Draw
 35 | #'       hgoal < vgoal ~ 3 # Away team wins
 36 | #'     )) %>%
 37 | #'     mutate(periods = case_when(
 38 | #'       row_number() <= 190 ~ 1,
 39 | #'       row_number() <= 380 ~ 2,
 40 | #'       row_number() <= 570 ~ 3,
 41 | #'       TRUE ~ 4
 42 | #'     )) %>% # Assign periods based on match number
 43 | #'     select(periods,
 44 | #'       home_team = home,
 45 | #'       away_team = visitor, match_outcome
 46 | #'     )
 47 | #'
 48 | #'   fit_rank_dyn <- btd_foot(
 49 | #'     data = italy_2020_2021_rank,
 50 | #'     dynamic_rank = TRUE,
 51 | #'     rank_measure = "median",
 52 | #'     iter_sampling = 1000,
 53 | #'     parallel_chains = 2,
 54 | #'     chains = 2
 55 | #'   )
 56 | #'
 57 | #'   plot_logStrength(fit_rank_dyn)
 58 | #'
 59 | #'   plot_logStrength(fit_rank_dyn, teams = c("AC Milan", "AS Roma", "Juventus", "Inter"))
 60 | #' }
 61 | #' }
 62 | #'
 63 | #' @importFrom ggplot2 ggplot aes labs geom_line geom_point geom_segment theme_bw
 64 | #' theme element_text guides guide_legend
 65 | #' @importFrom rlang .data
 66 | #' @export
 67 | 
 68 | plot_logStrength <- function(x, teams = NULL) {
 69 |   # Check if the object is of class 'btdFoot'
 70 |   if (!inherits(x, "btdFoot")) {
 71 |     stop("Object must be of class 'btdFoot'.")
 72 |   }
 73 | 
 74 | 
 75 |   # Determine if the ranking is dynamic or static
 76 |   is_dynamic <- ifelse(length(unique(x$rank$periods, na.rm = TRUE)) > 1, TRUE, FALSE)
 77 | 
 78 |   # Handle teams
 79 |   if (!is.null(teams)) {
 80 |     # Check that specified teams exist
 81 |     available_teams <- unique(c(x$data$home_team, x$data$away_team))
 82 |     missing_teams <- setdiff(teams, available_teams)
 83 |     if (length(missing_teams) > 0) {
 84 |       stop(paste0("The following teams are not present in the data: ", paste(missing_teams, collapse = ", ")))
 85 |     }
 86 | 
 87 |     x$rank <- x$rank[x$rank$team %in% teams, ]
 88 |   }
 89 | 
 90 |   if (is_dynamic) {
 91 |     # Dynamic Ranking Plot
 92 |     p <- ggplot(x$rank, aes(x = .data$periods, y = .data$log_strengths, color = .data$team)) +
 93 |       geom_line() +
 94 |       geom_point() +
 95 |       labs(
 96 |         x = "Periods",
 97 |         y = "Log-Strength Values"
 98 |       ) +
 99 |       theme_bw() +
100 |       theme(
101 |         legend.position = "right",
102 |         legend.title = element_text(size = 10),
103 |         plot.title = element_text(hjust = 0.5)
104 |       ) +
105 |       scale_x_continuous(breaks = sort(unique(x$rank$periods))) +
106 |       guides(
107 |         color = guide_legend(title = "Team")
108 |       )
109 |   } else {
110 |     # Static Ranking Plot
111 |     p <- ggplot(x$rank, aes(y = stats::reorder(.data$team, .data$log_strengths), x = .data$log_strengths)) +
112 |       geom_segment(
113 |         aes(x = 0, xend = .data$log_strengths, yend = .data$team),
114 |         color = "deepskyblue4",
115 |         linewidth = 1
116 |       ) +
117 |       geom_point(
118 |         color = "firebrick4",
119 |         size = 3
120 |       ) +
121 |       labs(
122 |         x = "Log-Strength Values",
123 |         y = "Teams"
124 |       ) +
125 |       theme_bw() +
126 |       theme(
127 |         legend.position = "none",
128 |         plot.title = element_text(hjust = 0.5)
129 |       )
130 |   }
131 | 
132 |   return(p)
133 | }
134 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r}
  8 | #| echo: false
  9 | knitr::opts_chunk$set(
 10 |   collapse = TRUE,
 11 |   comment = "#>",
 12 |   fig.path = "man/figures/README-"
 13 | )
 14 | ```
 15 | 
 16 | # footBayes <img src="man/figures/logo.png" width = 180 align="right" />
 17 | 
 18 | [![CRAN Version](https://www.r-pkg.org/badges/version/footBayes)](https://cran.r-project.org/package=footBayes)
 19 | ![Dev Version](https://img.shields.io/badge/build-2.1.0-blue?style=flat&logo=devdotto&label=Dev.%20Vers.)
 20 | [![R-CMD-check.yaml](https://github.com/LeoEgidi/footBayes/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/LeoEgidi/footBayes/actions/workflows/R-CMD-check.yaml)
 21 | [![Codecov test coverage](https://codecov.io/gh/LeoEgidi/footBayes/graph/badge.svg)](https://app.codecov.io/gh/LeoEgidi/footBayes)
 22 | [![Downloads](https://cranlogs.r-pkg.org/badges/footBayes?color=brightgreen)](https://CRAN.R-project.org/package=footBayes)
 23 | 
 24 | The goal of `footBayes` is to propose a complete workflow to:
 25 | 
 26 | -   Fit the most well-known football models, including the double Poisson, bivariate Poisson, Skellam, and Student‑t distributions. It supports both maximum likelihood estimation (MLE) and Bayesian inference. For Bayesian methods, it incorporates several techniques: MCMC sampling with Hamiltonian Monte Carlo, variational inference using either the Pathfinder algorithm or Automatic Differentiation Variational Inference (ADVI), and the Laplace approximation.
 27 | 
 28 | -   Visualize the teams' abilities, the model checks, the rank-league reconstruction;
 29 | 
 30 | -   Predict out-of-sample matches.
 31 | 
 32 | ## Installation
 33 | 
 34 | Starting with version **2.0.0**, `footBayes` package requires installing the R package [`cmdstanr`](https://mc-stan.org/cmdstanr/) (not available on CRAN) and the command-line interface to Stan: [`CmdStan`](https://mc-stan.org/users/interfaces/cmdstan.html). 
 35 | For a step-by-step installation, please follow the instructions provided in [Getting started with CmdStanR](https://mc-stan.org/cmdstanr/articles/cmdstanr.html).
 36 | 
 37 | You can install the released version of `footBayes` from CRAN with:
 38 | 
 39 | ``` r
 40 | install.packages("footBayes", type = "source")
 41 | ```
 42 | 
 43 | Please note that it is important to set `type = "source"`. Otherwise, the 'CmdStan' models in the package may not be compiled during installation.
 44 | 
 45 | Alternatively to CRAN, you can install the development version from GitHub with:
 46 | 
 47 | ```{r gh-installation, eval = FALSE}
 48 | # install.packages("devtools")
 49 | devtools::install_github("leoegidi/footBayes")
 50 | ```
 51 | 
 52 | ## Example
 53 | 
 54 | In what follows, a quick example to fit a Bayesian double Poisson model for the Italian Serie A (seasons 2000-2001, 2001-2002, 2002-2003), visualize the estimated teams' abilities, and predict the last four match days for the season 2002-2003:
 55 | 
 56 | ```{r load, message=FALSE}
 57 | library(footBayes)
 58 | library(dplyr)
 59 | ```
 60 | 
 61 | 
 62 | 
 63 | ```{r fit1, results='hide', message=FALSE}
 64 | # Dataset for Italian Serie A
 65 | data("italy")
 66 | italy <- as_tibble(italy)
 67 | italy_2000_2002 <- italy %>%
 68 |   dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
 69 |   filter(Season == "2000" | Season == "2001" | Season == "2002")
 70 | 
 71 | colnames(italy_2000_2002) <- c("periods",
 72 |                                "home_team",
 73 |                                "away_team",
 74 |                                "home_goals",
 75 |                                "away_goals")
 76 | 
 77 | # Double poisson fit (predict last 4 match-days)
 78 | fit1 <- stan_foot(data = italy_2000_2002,
 79 |                   model = "double_pois",
 80 |                   predict = 36,
 81 |                   iter_sampling = 200,
 82 |                   chains = 2) 
 83 | ```
 84 | 
 85 | 
 86 | The results (i.e., attack and defense effects) can be investigated using
 87 | ```{r summary}
 88 | print(fit1, pars = c("att", "def"))
 89 | ```
 90 | 
 91 | To visually investigate the attack and defense effects, we
 92 | can use the `foot_abilities` function
 93 | ```{r abilities}
 94 | foot_abilities(fit1, italy_2000_2002) # teams abilities
 95 | ```
 96 | 
 97 | To check the adequacy of the Bayesian model the function `pp_foot` provides posterior predictive plots
 98 | ```{r pp_foot}
 99 | pp_foot(fit1, italy_2000_2002) # pp checks
100 | ```
101 | 
102 | Furthermore, the function `foot_rank` shows the final rank table and the plot with the predicted points
103 | ```{r pp_foot}
104 | foot_rank(fit1, italy_2000_2002) # rank league reconstruction
105 | ```
106 | 
107 | In order to analyze the possible outcomes of the predicted matches, the function `foot_prob` provides a table containing the home win, draw and away win probabilities for the out-of-sample matches
108 | ```{r pp_foot}
109 | foot_prob(fit1, italy_2000_2002) # out-of-sample posterior pred. probabilities
110 | ```
111 | 
112 | 
113 | 
114 | For more and more technical details and references, see the vignette!
115 | 


--------------------------------------------------------------------------------
/src/stan/student_t_dynamic.stan:
--------------------------------------------------------------------------------
  1 | data {
  2 |   int N;                       // number of matches
  3 |   int<lower=0> N_prev;
  4 |   int nteams;                  // number of teams
  5 |   int ntimes;                  // number of dynamic periods for abilities
  6 |   int ntimes_rank;             // number of dynamic periods for rankings
  7 |   array[N] int team1;
  8 |   array[N] int team2;
  9 |   array[N_prev] int team1_prev;
 10 |   array[N_prev] int team2_prev;
 11 |   matrix[N, 2] y;
 12 |   real nu;
 13 |   array[ntimes] int time;
 14 |   array[N] int instants;             // time indices for abilities
 15 |   array[N_prev] int instants_prev;
 16 |   array[N] int instants_rank;        // time indices for rankings
 17 |   matrix[ntimes_rank, nteams] ranking; // rankings over time
 18 |   int<lower=0, upper=1> ind_home;
 19 |   real mean_home;              // Mean for home effect
 20 |   real<lower=1e-8> sd_home;      // Standard deviation for home effect
 21 | 
 22 | 
 23 |   // priors part
 24 |   int<lower=1, upper=4> prior_dist_num;
 25 |   int<lower=1, upper=4> prior_dist_sd_num;
 26 | 
 27 |   real<lower=0> hyper_df;
 28 |   real hyper_location;
 29 | 
 30 |   real<lower=0> hyper_sd_df;
 31 |   real hyper_sd_location;
 32 |   real<lower=1e-8> hyper_sd_scale;
 33 | 
 34 |   // commensurate prior
 35 |   int<lower=0, upper=1> ind_comm_prior;
 36 |   real mu_spike;
 37 |   real<lower=0> sd_spike;
 38 |   real mu_slab;
 39 |   real<lower=0> sd_slab;
 40 |   real<lower=0, upper=1> p_spike;
 41 | }
 42 | transformed data {
 43 |   vector[N] diff_y = y[,1] - y[,2];  // modeled data
 44 | }
 45 | parameters {
 46 |   real beta;                        // common intercept
 47 |   matrix[ntimes, nteams] alpha;     // per-team weights over time
 48 |   real<lower=1e-8> sigma_a;            // common variance
 49 |   real<lower=1e-8> sigma_y;            // noise term
 50 |   real<lower=1e-8> sigma_alpha;
 51 |   real home;                        // home effect
 52 | }
 53 | transformed parameters {
 54 |   array[ntimes, ntimes_rank, nteams] real ability;
 55 |   matrix[ntimes, nteams] mu_alpha;
 56 |   real adj_h_eff;                   // Adjusted home effect
 57 | 
 58 |   adj_h_eff = home * ind_home;
 59 | 
 60 |   for (t in 1:ntimes) {
 61 |     for(tr in 1:ntimes_rank) {
 62 |       for (h in 1:nteams) {
 63 |         ability[t, tr, h] = beta * ranking[tr, h] + alpha[t, h] * sigma_a;
 64 |       }
 65 |     }
 66 |   }
 67 | 
 68 |   mu_alpha[1] = rep_row_vector(0, nteams);
 69 |   for (t in 2:ntimes) {
 70 |     mu_alpha[t] = alpha[t - 1];
 71 |   }
 72 | }
 73 | model {
 74 |   // Priors for team-specific abilities
 75 |   for (h in 1:nteams) {
 76 |     if (prior_dist_num == 1) {
 77 |       alpha[, h] ~ multi_normal(mu_alpha[, h], diag_matrix(rep_vector(square(sigma_alpha), ntimes)));
 78 |     } else if (prior_dist_num == 2) {
 79 |       alpha[, h] ~ multi_student_t(hyper_df, mu_alpha[, h], diag_matrix(rep_vector(square(sigma_alpha), ntimes)));
 80 |     } else if (prior_dist_num == 3) {
 81 |       alpha[, h] ~ multi_student_t(1, mu_alpha[, h], diag_matrix(rep_vector(square(sigma_alpha), ntimes)));
 82 |     }
 83 |   }
 84 | 
 85 |   // Hyperpriors for standard deviations
 86 |   if (prior_dist_sd_num == 1) {
 87 |     sigma_a ~ normal(hyper_sd_location, hyper_sd_scale);
 88 |     sigma_alpha ~ normal(hyper_sd_location, hyper_sd_scale);
 89 |   } else if (prior_dist_sd_num == 2) {
 90 |     sigma_a ~ student_t(hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 91 |     sigma_alpha ~ student_t(hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 92 |   } else if (prior_dist_sd_num == 3) {
 93 |     sigma_a ~ cauchy(hyper_sd_location, hyper_sd_scale);
 94 |     sigma_alpha ~ cauchy(hyper_sd_location, hyper_sd_scale);
 95 |   } else if (prior_dist_sd_num == 4) {
 96 |     sigma_a ~ double_exponential(hyper_sd_location, hyper_sd_scale);
 97 |     sigma_alpha ~ double_exponential(hyper_sd_location, hyper_sd_scale);
 98 |   }
 99 | 
100 |   // Priors for other parameters
101 |   beta ~ normal(0, 2.5);
102 |   sigma_y ~ normal(0, 2.5);
103 |   target+=normal_lpdf(home|mean_home,sd_home);
104 | 
105 |   // Likelihood
106 |   for (n in 1:N) {
107 |     diff_y[n] ~ student_t(
108 |     nu,
109 |     adj_h_eff+
110 |     ability[instants[n], instants_rank[n], team1[n]] - ability[instants[n], instants_rank[n], team2[n]],
111 |     sigma_y);
112 | 
113 |   }
114 | }
115 | generated quantities {
116 |   vector[N] diff_y_rep;
117 |   vector[N] log_lik;
118 |   vector[N_prev] diff_y_prev;
119 | 
120 |   for (n in 1:N) {
121 |     diff_y_rep[n] = student_t_rng(nu, adj_h_eff+ability[instants[n], instants_rank[n], team1[n]] - ability[instants[n], instants_rank[n], team2[n]],
122 |     sigma_y);
123 |     log_lik[n] = student_t_lpdf(diff_y[n] | nu, adj_h_eff+ability[instants[n], instants_rank[n], team1[n]] - ability[instants[n], instants_rank[n], team2[n]],
124 |     sigma_y);
125 |   }
126 |   //out-of-sample predictions
127 |   if (N_prev > 0) {
128 |     for (n in 1:N_prev) {
129 |       diff_y_prev[n] = student_t_rng( nu, adj_h_eff+ability[instants_prev[n], instants_rank[N], team1_prev[n]] - ability[instants_prev[n], instants_rank[N], team2_prev[n]],
130 |       sigma_y);
131 |     }
132 |   }
133 | }
134 | 
135 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # footBayes <img src="man/figures/logo.png" width = 180 align="right" />
  5 | 
  6 | [![CRAN
  7 | Version](https://www.r-pkg.org/badges/version/footBayes)](https://cran.r-project.org/package=footBayes)
  8 | ![Dev
  9 | Version](https://img.shields.io/badge/build-2.1.0-blue?style=flat&logo=devdotto&label=Dev.%20Vers.)
 10 | [![R-CMD-check.yaml](https://github.com/LeoEgidi/footBayes/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/LeoEgidi/footBayes/actions/workflows/R-CMD-check.yaml)
 11 | [![Codecov test
 12 | coverage](https://codecov.io/gh/LeoEgidi/footBayes/graph/badge.svg)](https://app.codecov.io/gh/LeoEgidi/footBayes)
 13 | [![Downloads](https://cranlogs.r-pkg.org/badges/footBayes?color=brightgreen)](https://CRAN.R-project.org/package=footBayes)
 14 | 
 15 | The goal of `footBayes` is to propose a complete workflow to:
 16 | 
 17 | - Fit the most well-known football models, including the double Poisson,
 18 |   bivariate Poisson, Skellam, and Student‑t distributions. It supports
 19 |   both maximum likelihood estimation (MLE) and Bayesian inference. For
 20 |   Bayesian methods, it incorporates several techniques: MCMC sampling
 21 |   with Hamiltonian Monte Carlo, variational inference using either the
 22 |   Pathfinder algorithm or Automatic Differentiation Variational
 23 |   Inference (ADVI), and the Laplace approximation.
 24 | 
 25 | - Visualize the teams’ abilities, the model checks, the rank-league
 26 |   reconstruction;
 27 | 
 28 | - Predict out-of-sample matches.
 29 | 
 30 | ## Installation
 31 | 
 32 | Starting with version **2.0.0**, `footBayes` package requires installing
 33 | the R package [`cmdstanr`](https://mc-stan.org/cmdstanr/) (not available
 34 | on CRAN) and the command-line interface to Stan:
 35 | [`CmdStan`](https://mc-stan.org/users/interfaces/cmdstan.html). For a
 36 | step-by-step installation, please follow the instructions provided in
 37 | [Getting started with
 38 | CmdStanR](https://mc-stan.org/cmdstanr/articles/cmdstanr.html).
 39 | 
 40 | You can install the released version of `footBayes` from CRAN with:
 41 | 
 42 | ``` r
 43 | install.packages("footBayes", type = "source")
 44 | ```
 45 | 
 46 | Please note that it is important to set `type = "source"`. Otherwise,
 47 | the ‘CmdStan’ models in the package may not be compiled during
 48 | installation.
 49 | 
 50 | Alternatively to CRAN, you can install the development version from
 51 | GitHub with:
 52 | 
 53 | ``` r
 54 | # install.packages("devtools")
 55 | devtools::install_github("leoegidi/footBayes")
 56 | ```
 57 | 
 58 | ## Example
 59 | 
 60 | In what follows, a quick example to fit a Bayesian double Poisson model
 61 | for the Italian Serie A (seasons 2000-2001, 2001-2002, 2002-2003),
 62 | visualize the estimated teams’ abilities, and predict the last four
 63 | match days for the season 2002-2003:
 64 | 
 65 | ``` r
 66 | library(footBayes)
 67 | library(dplyr)
 68 | ```
 69 | 
 70 | ``` r
 71 | # Dataset for Italian Serie A
 72 | data("italy")
 73 | italy <- as_tibble(italy)
 74 | italy_2000_2002 <- italy %>%
 75 |   dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
 76 |   filter(Season == "2000" | Season == "2001" | Season == "2002")
 77 | 
 78 | colnames(italy_2000_2002) <- c("periods",
 79 |                                "home_team",
 80 |                                "away_team",
 81 |                                "home_goals",
 82 |                                "away_goals")
 83 | 
 84 | # Double poisson fit (predict last 4 match-days)
 85 | fit1 <- stan_foot(data = italy_2000_2002,
 86 |                   model = "double_pois",
 87 |                   predict = 36,
 88 |                   iter_sampling = 200,
 89 |                   chains = 2) 
 90 | ```
 91 | 
 92 | The results (i.e., attack and defense effects) can be investigated using
 93 | 
 94 | ``` r
 95 | print(fit1, pars = c("att", "def"))
 96 | ```
 97 | 
 98 | To visually investigate the attack and defense effects, we can use the
 99 | `foot_abilities` function
100 | 
101 | ``` r
102 | foot_abilities(fit1, italy_2000_2002) # teams abilities
103 | ```
104 | 
105 | <img src="man/figures/readme_foot_abilities.png" width="80%" style="display: block; margin: auto;" />
106 | 
107 | To check the adequacy of the Bayesian model the function `pp_foot`
108 | provides posterior predictive plots
109 | 
110 | ``` r
111 | pp_foot(fit1, italy_2000_2002) # pp checks
112 | #> $pp_plot
113 | ```
114 | 
115 | <img src="man/figures/readme_pp_foot.png" width="80%" style="display: block; margin: auto;" />
116 | 
117 | 
118 | Furthermore, the function `foot_rank` shows the final rank table and the
119 | plot with the predicted points
120 | 
121 | ``` r
122 | foot_rank(fit1, italy_2000_2002) # rank league reconstruction
123 | ```
124 | 
125 | <img src="man/figures/readme_foot_rank.png" width="80%" style="display: block; margin: auto;" />
126 | 
127 | 
128 | In order to analyze the possible outcomes of the predicted matches, the
129 | function `foot_prob` provides a table containing the home win, draw and
130 | away win probabilities for the out-of-sample matches
131 | 
132 | ``` r
133 | foot_prob(fit1, italy_2000_2002) # out-of-sample posterior pred. probabilities
134 | ```
135 | 
136 | <img src="man/figures/readme_foot_prob.png" width="80%" style="display: block; margin: auto;" />
137 | 
138 | For more and more technical details and references, see the vignette!
139 | 


--------------------------------------------------------------------------------
/tests/testthat/test_plot_logStrength.R:
--------------------------------------------------------------------------------
  1 | # all the test PASSED (also the skipped ones!)
  2 | 
  3 | #   ____________________________________________________________________________
  4 | #   Data tests                                                             ####
  5 | 
  6 | test_that("Error thrown if input object is not of class 'btdFoot'", {
  7 |   not_btdFoot <- list(a = 1)
  8 |   expect_error(
  9 |     plot_logStrength(not_btdFoot),
 10 |     "Object must be of class 'btdFoot'."
 11 |   )
 12 | })
 13 | 
 14 | test_that("Dynamic ranking plot returns a ggplot object with correct labels", {
 15 |   skip_on_cran()
 16 |   skip_if_not(stan_cmdstan_exists())
 17 | 
 18 |   ##  ............................................................................
 19 |   ##  Data                                                                    ####
 20 | 
 21 |   data("england")
 22 |   england <- as.data.frame(england)
 23 | 
 24 |   # One season only
 25 |   england_04_05 <- england %>%
 26 |     dplyr::filter(division == 1) %>%
 27 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
 28 |     dplyr::filter(Season %in% c("2004", "2005")) %>%
 29 |     dplyr::mutate(match_outcome = dplyr::case_when(
 30 |       hgoal > vgoal ~ 1, # Home team wins
 31 |       hgoal == vgoal ~ 2, # Draw
 32 |       hgoal < vgoal ~ 3 # Away team wins
 33 |     )) %>% # Assign periods based on match number
 34 |     dplyr::select(
 35 |       periods = Season,
 36 |       home_team = home,
 37 |       away_team = visitor, match_outcome
 38 |     )
 39 |   ##  ............................................................................
 40 |   ##  Tests                                                                   ####
 41 | 
 42 |   # Correct model
 43 |   dyn_btd <- btd_foot(
 44 |     data = england_04_05, dynamic_rank = TRUE,
 45 |     method = "MCMC", iter_sampling = 200, rank_measure = "mean",
 46 |     chains = 2,
 47 |     seed = 433
 48 |   )
 49 | 
 50 |   p <- plot_logStrength(dyn_btd)
 51 |   expect_s3_class(p, "ggplot")
 52 |   # Check that the x- and y-axis labels match the dynamic branch
 53 |   expect_equal(p$labels$x, "Periods")
 54 |   expect_equal(p$labels$y, "Log-Strength Values")
 55 |   # Check that the data has multiple periods (thus dynamic)
 56 |   expect_true(max(dyn_btd$rank$periods, na.rm = TRUE) > 1)
 57 | })
 58 | 
 59 | test_that("Static ranking plot returns a ggplot object with correct labels", {
 60 |   skip_on_cran()
 61 |   skip_if_not(stan_cmdstan_exists())
 62 | 
 63 |   data("england")
 64 |   england <- as.data.frame(england)
 65 | 
 66 |   # One season only
 67 |   england_04 <- england %>%
 68 |     dplyr::filter(division == 1) %>%
 69 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
 70 |     dplyr::filter(Season %in% c("2004")) %>%
 71 |     dplyr::mutate(match_outcome = dplyr::case_when(
 72 |       hgoal > vgoal ~ 1, # Home team wins
 73 |       hgoal == vgoal ~ 2, # Draw
 74 |       hgoal < vgoal ~ 3 # Away team wins
 75 |     )) %>% # Assign periods based on match number
 76 |     dplyr::select(
 77 |       periods = Season,
 78 |       home_team = home,
 79 |       away_team = visitor, match_outcome
 80 |     )
 81 |   ##  ............................................................................
 82 |   ##  Tests                                                                   ####
 83 | 
 84 |   # Correct model
 85 |   stat_btd <- btd_foot(
 86 |     data = england_04, dynamic_rank = FALSE,
 87 |     method = "MCMC", iter_sampling = 200, rank_measure = "mean",
 88 |     chains = 2,
 89 |     seed = 433
 90 |   )
 91 | 
 92 |   p <- plot_logStrength(stat_btd)
 93 |   expect_s3_class(p, "ggplot")
 94 |   # Check that the x- and y-axis labels match the static branch
 95 |   expect_equal(p$labels$x, "Log-Strength Values")
 96 |   expect_equal(p$labels$y, "Teams")
 97 | })
 98 | 
 99 | test_that("Filtering by teams works as expected and error is thrown if specified teams are not present in the data", {
100 |   skip_if_not(stan_cmdstan_exists())
101 | 
102 |   data("england")
103 |   england <- as.data.frame(england)
104 | 
105 |   # One season only
106 |   england_04 <- england %>%
107 |     dplyr::filter(division == 1) %>%
108 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
109 |     dplyr::filter(Season %in% c("2004")) %>%
110 |     dplyr::mutate(match_outcome = dplyr::case_when(
111 |       hgoal > vgoal ~ 1, # Home team wins
112 |       hgoal == vgoal ~ 2, # Draw
113 |       hgoal < vgoal ~ 3 # Away team wins
114 |     )) %>% # Assign periods based on match number
115 |     dplyr::select(
116 |       periods = Season,
117 |       home_team = home,
118 |       away_team = visitor, match_outcome
119 |     )
120 |   ##  ............................................................................
121 |   ##  Tests                                                                   ####
122 | 
123 |   # Correct model
124 |   stat_btd <- btd_foot(
125 |     data = england_04, dynamic_rank = FALSE,
126 |     method = "MCMC", iter_sampling = 200, rank_measure = "mean",
127 |     chains = 2,
128 |     seed = 433
129 |   )
130 | 
131 |   p <- plot_logStrength(stat_btd, teams = c("Arsenal", "Chelsea"))
132 |   # Check that the plot's data only contains the specified teams
133 |   expect_setequal(unique(p$data$team), c("Arsenal", "Chelsea"))
134 | 
135 |   # Wrong plot
136 |   expect_error(
137 |     plot_logStrength(stat_btd, teams = c("Team X")),
138 |     "The following teams are not present in the data"
139 |   )
140 | })
141 | 


--------------------------------------------------------------------------------
/src/stan/student_t.stan:
--------------------------------------------------------------------------------
  1 | data {
  2 |     int N;                      // Number of observed matches
  3 |     int<lower=0> N_prev;                 // Number of predicted matches
  4 |     int nteams;                 // Number of teams
  5 |     int ntimes_rank;            // Number of dynamic periods for rankings
  6 |     matrix[ntimes_rank, nteams] ranking; // Rankings over time
  7 |     array[N] int instants_rank;       // Time indices for rankings for observed matches
  8 |     array[N] int team1;               // Team 1 indices for observed matches
  9 |     array[N] int team2;               // Team 2 indices for observed matches
 10 |     array[N_prev] int team1_prev;     // Team 1 indices for predicted matches
 11 |     array[N_prev] int team2_prev;     // Team 2 indices for predicted matches
 12 |     matrix[N, 2] y;             // Scores: column 1 is team1, column 2 is team2
 13 |     real nu;                    // Degrees of freedom for the Student's t-distribution
 14 |     int<lower=0, upper=1> ind_home;
 15 |     real mean_home;              // Mean for home effect
 16 |     real<lower=1e-8> sd_home;      // Standard deviation for home effect
 17 | 
 18 | 
 19 |     // Priors part
 20 |     int<lower=1,upper=4> prior_dist_num;    // 1: Gaussian, 2: t, 3: Cauchy, 4: Laplace
 21 |     int<lower=1,upper=4> prior_dist_sd_num; // 1: Gaussian, 2: t, 3: Cauchy, 4: Laplace
 22 | 
 23 |     real<lower=0> hyper_df;
 24 |     real hyper_location;
 25 | 
 26 |     real<lower=0> hyper_sd_df;
 27 |     real hyper_sd_location;
 28 |     real<lower=1e-8> hyper_sd_scale;
 29 | }
 30 | transformed data {
 31 |     vector[N] diff_y = y[,1] - y[,2];  // Modeled data: score differences
 32 | }
 33 | parameters {
 34 |     real beta;                      // Common coefficient for ranking
 35 |     vector[nteams] alpha;           // Per-team random effects
 36 |     real<lower=1e-8> sigma_a;          // Standard deviation for random effects
 37 |     real<lower=1e-8> sigma_y;          // Noise term in our estimate
 38 |     real<lower=1e-8> sigma_alpha;      // Standard deviation for alpha prior
 39 |     real home;                      // home effect
 40 | }
 41 | transformed parameters {
 42 |     matrix[ntimes_rank, nteams] ability;
 43 |      real adj_h_eff;                   // Adjusted home effect
 44 | 
 45 |      adj_h_eff = home * ind_home;
 46 | 
 47 |     for (t in 1:ntimes_rank) {
 48 |         // Compute abilities for all teams at time t
 49 |         ability[t] = beta * ranking[t] + (alpha * sigma_a)';  // Transpose to get a row vector
 50 |     }
 51 | }
 52 | model {
 53 |     // Priors for team-specific random effects (alpha)
 54 |     if (prior_dist_num == 1) {
 55 |         alpha ~ normal(hyper_location, sigma_alpha);
 56 |     } else if (prior_dist_num == 2) {
 57 |         alpha ~ student_t(hyper_df, hyper_location, sigma_alpha);
 58 |     } else if (prior_dist_num == 3) {
 59 |         alpha ~ cauchy(hyper_location, sigma_alpha);
 60 |     } else if (prior_dist_num == 4) {
 61 |         alpha ~ double_exponential(hyper_location, sigma_alpha);
 62 |     }
 63 | 
 64 |     // Priors for standard deviations
 65 |     if (prior_dist_sd_num == 1) {
 66 |         sigma_a ~ normal(hyper_sd_location, hyper_sd_scale);
 67 |         sigma_alpha ~ normal(hyper_sd_location, hyper_sd_scale);
 68 |     } else if (prior_dist_sd_num == 2) {
 69 |         sigma_a ~ student_t(hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 70 |         sigma_alpha ~ student_t(hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 71 |     } else if (prior_dist_sd_num == 3) {
 72 |         sigma_a ~ cauchy(hyper_sd_location, hyper_sd_scale);
 73 |         sigma_alpha ~ cauchy(hyper_sd_location, hyper_sd_scale);
 74 |     } else if (prior_dist_sd_num == 4) {
 75 |         sigma_a ~ double_exponential(hyper_sd_location, hyper_sd_scale);
 76 |         sigma_alpha ~ double_exponential(hyper_sd_location, hyper_sd_scale);
 77 |     }
 78 | 
 79 |     beta ~ normal(0, 2.5);
 80 |     sigma_y ~ normal(0, 2.5);
 81 |     target+=normal_lpdf(home|mean_home,sd_home);
 82 | 
 83 |     // Likelihood
 84 |     for (n in 1:N) {
 85 |         int rank_time = instants_rank[n];  // Time index for the current match
 86 |         diff_y[n] ~ student_t(
 87 |             nu,
 88 |             adj_h_eff+
 89 |             ability[rank_time, team1[n]] - ability[rank_time, team2[n]],
 90 |             sigma_y
 91 |         );
 92 |     }
 93 | }
 94 | generated quantities {
 95 |     vector[N] diff_y_rep;        // Replicated differences for posterior predictive checks
 96 |     vector[N] log_lik;           // Log-likelihood for model comparison
 97 |     vector[N_prev] diff_y_prev;  // Predicted differences for future matches
 98 | 
 99 |     for (n in 1:N) {
100 |         int rank_time = instants_rank[n];
101 |         diff_y_rep[n] = student_t_rng(
102 |             nu,
103 |             adj_h_eff+
104 |             ability[rank_time, team1[n]] - ability[rank_time, team2[n]],
105 |             sigma_y
106 |         );
107 |         log_lik[n] = student_t_lpdf(
108 |             diff_y[n] | nu,
109 |             adj_h_eff+
110 |             ability[rank_time, team1[n]] - ability[rank_time, team2[n]],
111 |             sigma_y
112 |         );
113 |     }
114 | 
115 |     if (N_prev > 0) {
116 |         for (n in 1:N_prev) {
117 |             diff_y_prev[n] = student_t_rng(
118 |                 nu,
119 |                 adj_h_eff+
120 |                 ability[instants_rank[N], team1_prev[n]] - ability[instants_rank[N], team2_prev[n]],
121 |                 sigma_y
122 |             );
123 |         }
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/man/compare_foot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/compare_foot.R
  3 | \name{compare_foot}
  4 | \alias{compare_foot}
  5 | \title{Compare Football Models using Various Metrics}
  6 | \usage{
  7 | compare_foot(
  8 |   source,
  9 |   test_data,
 10 |   metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"),
 11 |   conf_matrix = FALSE
 12 | )
 13 | }
 14 | \arguments{
 15 | \item{source}{A named list containing either:
 16 | \itemize{
 17 |   \item Fitted model objects (of class  \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}), each representing a football model.
 18 |   \item Matrices where each matrix contains the estimated probabilities for "Home Win," "Draw," and "Away Win" in its columns.
 19 | }}
 20 | 
 21 | \item{test_data}{A data frame containing the test dataset, with columns:
 22 | \itemize{
 23 |   \item \code{home_team}: Home team's name (character string).
 24 |   \item \code{away_team}: Away team's name (character string).
 25 |   \item \code{home_goals}: Goals scored by the home team (integer >= 0).
 26 |   \item \code{away_goals}: Goals scored by the away team (integer >= 0).
 27 | }}
 28 | 
 29 | \item{metric}{A character vector specifying the metrics to use for comparison. Options are:
 30 | \itemize{
 31 |   \item \code{"accuracy"}: Computes the accuracy of each model.
 32 |   \item \code{"brier"}: Computes the Brier score of each model.
 33 |   \item \code{"RPS"}: Computes the ranked probability score (RPS) for each model.
 34 |   \item \code{"ACP"}: Computes the average of correct probabilities (ACP) for each model.
 35 |   \item \code{"pseudoR2"}: Computes the Pseudo \eqn{R^2}, defined as the geometric mean of the probabilities assigned to the actual results.
 36 | }
 37 | Default is \code{c("accuracy", "brier", "ACP", "pseudoR2", "RPS")}, computing the specified metrics.}
 38 | 
 39 | \item{conf_matrix}{A logical value indicating whether to generate a confusion matrix comparing predicted outcomes against actual outcomes for each model or probability matrix. Default is \code{FALSE}.}
 40 | }
 41 | \value{
 42 | An object of class \code{compare_foot_output}, which is a list containing:
 43 |   \itemize{
 44 |     \item \code{metrics}: A data frame containing the metric values for each model or probability matrix.
 45 |     \item \code{confusion_matrix}: Confusion matrices for each model or probability matrix.
 46 |   }
 47 | }
 48 | \description{
 49 | Compares multiple football models or directly provided probability matrices based on specified metrics (accuracy, Brier score, ranked probability score, Pseudo \eqn{R^2}, average coverage probability), using a test dataset. Additionally, computes the confusion matrices. The function returns an object of class \code{compareFoot}.
 50 | }
 51 | \details{
 52 | The function extracts predictions from each model or directly uses the provided probability matrices and computes the chosen metrics on the test dataset. It also possible to compute confusion matrices.
 53 | }
 54 | \examples{
 55 | \dontrun{
 56 | if (instantiate::stan_cmdstan_exists()) {
 57 |   library(dplyr)
 58 | 
 59 |   data("italy")
 60 |   italy_2000 <- italy \%>\%
 61 |     dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\%
 62 |     dplyr::filter(Season == "2000")
 63 | 
 64 |   colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
 65 | 
 66 |   # Example with fitted models
 67 |   fit_1 <- stan_foot(
 68 |     data = italy_2000,
 69 |     model = "double_pois", predict = 18
 70 |   ) # Double Poisson model
 71 |   fit_2 <- stan_foot(
 72 |     data = italy_2000,
 73 |     model = "biv_pois", predict = 18
 74 |   ) # Bivariate Poisson model
 75 | 
 76 |   italy_2000_test <- italy_2000[289:306, ]
 77 | 
 78 | 
 79 |   compare_results_models <- compare_foot(
 80 |     source = list(
 81 |       double_poisson = fit_1,
 82 |       bivariate_poisson = fit_2
 83 |     ),
 84 |     test_data = italy_2000_test,
 85 |     metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"),
 86 |     conf_matrix = TRUE
 87 |   )
 88 | 
 89 |   print(compare_results_models)
 90 | 
 91 | 
 92 |   # Example with probability matrices
 93 | 
 94 |   home_team <- c(
 95 |     "AC Milan", "Inter", "Juventus", "AS Roma", "Napoli",
 96 |     "Lazio", "Atalanta", "Fiorentina", "Torino", "Sassuolo", "Udinese"
 97 |   )
 98 | 
 99 |   away_team <- c(
100 |     "Juventus", "Napoli", "Inter", "Atalanta", "Lazio",
101 |     "AC Milan", "Sassuolo", "Torino", "Fiorentina", "Udinese", "AS Roma"
102 |   )
103 | 
104 |   # Home and Away goals based on given data
105 |   home_goals <- c(2, 0, 2, 2, 3, 1, 4, 2, 1, 1, 2)
106 |   away_goals <- c(1, 0, 1, 3, 2, 1, 1, 2, 1, 1, 2)
107 | 
108 |   # Combine into a data frame
109 |   test_data <- data.frame(home_team, away_team, home_goals, away_goals)
110 | 
111 |   # Define the data for each column
112 |   pW <- c(0.51, 0.45, 0.48, 0.53, 0.56, 0.39, 0.52, 0.55, 0.61, 0.37, 0.35)
113 |   pD <- c(0.27, 0.25, 0.31, 0.18, 0.23, 0.30, 0.24, 0.26, 0.18, 0.19, 0.22)
114 |   pL <- c(0.22, 0.30, 0.21, 0.29, 0.21, 0.31, 0.24, 0.19, 0.21, 0.44, 0.43)
115 | 
116 |   # Create the data frame table_prob
117 |   table_prob <- data.frame(pW, pD, pL)
118 |   matrix_prob <- as.matrix(table_prob)
119 | 
120 |   # Use compare_foot function
121 |   compare_results_matrices <- compare_foot(
122 |     source = list(matrix_1 = matrix_prob),
123 |     test_data = test_data,
124 |     metric = c("accuracy", "brier", "pseudoR2", "ACP", "RPS")
125 |   )
126 |   # Print the results
127 |   print(compare_results_matrices)
128 | }
129 | }
130 | }
131 | \author{
132 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
133 | }
134 | 


--------------------------------------------------------------------------------
/src/stan/double_pois.stan:
--------------------------------------------------------------------------------
  1 | data{
  2 |       int N;                      // number of games
  3 |       int<lower=0> N_prev;                 // number of predicted games
  4 |       array[N,2] int y;                 // scores
  5 |       int nteams;                 // number of teams
  6 |       array[N] int team1;               // home team index
  7 |       array[N] int team2;               // away team index
  8 |       array[N_prev] int team1_prev;     // home team for pred.
  9 |       array[N_prev] int team2_prev;     // away team for pred.
 10 |       array[N] int instants_rank;
 11 |       int ntimes_rank;                 // dynamic periods for ranking
 12 |       matrix[ntimes_rank,nteams] ranking;      // eventual fifa/uefa ranking
 13 |       int<lower=0, upper=1> ind_home;
 14 |       real mean_home;              // Mean for home effect
 15 |       real<lower=1e-8> sd_home;      // Standard deviation for home effect
 16 | 
 17 |       // priors part
 18 |       int<lower=1,upper=4> prior_dist_num;    // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 19 |       int<lower=1,upper=4> prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 20 | 
 21 |       real<lower=0> hyper_df;
 22 |       real hyper_location;
 23 | 
 24 |       real<lower=0> hyper_sd_df;
 25 |       real hyper_sd_location;
 26 |       real<lower=1e-8> hyper_sd_scale;
 27 |     }
 28 |     parameters{
 29 |       vector[nteams] att_raw;
 30 |       vector[nteams] def_raw;
 31 |       real<lower=1e-8> sigma_att;
 32 |       real<lower=1e-8> sigma_def;
 33 |       real home;
 34 |       real gamma;
 35 |     }
 36 |     transformed parameters{
 37 |       real adj_h_eff;                   // Adjusted home effect
 38 |       vector[nteams] att;        // attack parameters
 39 |       vector[nteams] def;        // defence parameters
 40 |       array[N] vector[2] theta;        // exponentiated linear pred.
 41 | 
 42 |       for (t in 1:nteams){
 43 |         att[t] = att_raw[t]-mean(att_raw);
 44 |         def[t] = def_raw[t]-mean(def_raw);
 45 |       }
 46 | 
 47 |       adj_h_eff = home * ind_home;
 48 | 
 49 |       for (n in 1:N){
 50 |         theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+
 51 |                          (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
 52 |         theta[n,2] = exp(att[team2[n]]+def[team1[n]]-
 53 |                          (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
 54 |       }
 55 |     }
 56 |     model{
 57 |       // log-priors for team-specific abilities
 58 |       for (t in 1:(nteams)){
 59 |         if (prior_dist_num == 1){
 60 |           target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att);
 61 |           target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def);
 62 |         }
 63 |         else if (prior_dist_num == 2){
 64 |           target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att);
 65 |           target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def);
 66 |         }
 67 |         else if (prior_dist_num == 3){
 68 |           target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att);
 69 |           target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def);
 70 |         }
 71 |         else if (prior_dist_num == 4){
 72 |           target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att);
 73 |           target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def);
 74 |         }
 75 |       }
 76 | 
 77 | 
 78 |       // log-hyperpriors for sd parameters
 79 |       if (prior_dist_sd_num == 1 ){
 80 |         target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
 81 |         target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
 82 |       }
 83 |       else if (prior_dist_sd_num == 2){
 84 |         target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 85 |         target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 86 |       }
 87 |       else if (prior_dist_sd_num == 3){
 88 |         target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
 89 |         target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
 90 |       }
 91 |       else if (prior_dist_sd_num == 4){
 92 |         target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
 93 |         target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
 94 |       }
 95 | 
 96 |       // log-priors fixed effects
 97 |       target+=normal_lpdf(home|mean_home,sd_home);
 98 |       target+=normal_lpdf(gamma|0,1);
 99 | 
100 |       // likelihood
101 | 
102 |       target+=poisson_lpmf(y[,1]| theta[,1]);
103 |       target+=poisson_lpmf(y[,2]| theta[,2]);
104 | 
105 |     }
106 |     generated quantities{
107 |       array[N,2] int y_rep;
108 |       array[N_prev,2] int y_prev;
109 |       array[N_prev] vector[2] theta_prev;
110 |       vector[N] log_lik;
111 |       array[N] int diff_y_rep;
112 | 
113 |       //in-sample replications
114 |       for (n in 1:N){
115 |         y_rep[n,1] = poisson_rng(theta[n,1]);
116 |         y_rep[n,2] = poisson_rng(theta[n,2]);
117 |         diff_y_rep[n] = y_rep[n,1] - y_rep[n,2];
118 |         log_lik[n] =poisson_lpmf(y[n,1]| theta[n,1])+
119 |           poisson_lpmf(y[n,2]| theta[n,2]);
120 |       }
121 |       //out-of-sample predictions
122 |       if (N_prev > 0) {
123 |         for (n in 1:N_prev){
124 |           theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+def[team2_prev[n]]+
125 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
126 |           theta_prev[n,2] = exp(att[team2_prev[n]]+def[team1_prev[n]]-
127 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
128 |           y_prev[n,1] = poisson_rng(theta_prev[n,1]);
129 |           y_prev[n,2] = poisson_rng(theta_prev[n,2]);
130 |         }
131 |       }
132 |     }
133 | 


--------------------------------------------------------------------------------
/src/stan/neg_bin.stan:
--------------------------------------------------------------------------------
  1 | data {
  2 |   int<lower=1> N;                          // number of games
  3 |   int<lower=0> N_prev;
  4 |   array[N,2] int y;                        // observed scores
  5 |   int<lower=1> nteams;
  6 |   array[N] int instants_rank;
  7 |   int<lower=1> ntimes_rank;                // number of ranking periods
  8 |   array[N] int team1;
  9 |   array[N] int team2;
 10 |   array[N_prev]int team1_prev;
 11 |   array[N_prev] int team2_prev;
 12 |   matrix[ntimes_rank, nteams] ranking;
 13 |   int<lower=0, upper=1> ind_home;
 14 |   real mean_home;                          // prior mean for home effect
 15 |   real<lower=1e-8> sd_home;                  // prior sd for home effect
 16 | 
 17 |   // choice of prior distributions for att/def
 18 |   int<lower=1,upper=4> prior_dist_num;     // 1=normal,2=t,3=cauchy,4=laplace
 19 |   int<lower=1,upper=4> prior_dist_sd_num;  // same for sd parameters
 20 | 
 21 |   real<lower=0> hyper_df;                  // d.f. for t‑prior on abilities
 22 |   real hyper_location;                     // location for ability priors
 23 | 
 24 |   real<lower=0> hyper_sd_df;               // d.f. for t‑prior on sds
 25 |   real hyper_sd_location;                  // location for sd priors
 26 |   real<lower=1e-8> hyper_sd_scale;            // scale for sd priors
 27 | }
 28 | 
 29 | parameters {
 30 |   vector[nteams] att_raw;
 31 |   vector[nteams] def_raw;
 32 |   real<lower=1e-8> sigma_att;
 33 |   real<lower=1e-8> sigma_def;
 34 |   real home;
 35 |   real gamma;
 36 | 
 37 |   // Negative Binomial dispersion parameters
 38 |   real<lower=0> phi1;
 39 |   real<lower=0> phi2;
 40 | }
 41 | 
 42 | transformed parameters {
 43 |   vector[nteams] att = att_raw - mean(att_raw);
 44 |   vector[nteams] def = def_raw - mean(def_raw);
 45 |   real adj_h_eff;
 46 |   array[N] vector[2] theta;
 47 | 
 48 |   adj_h_eff = home * ind_home;
 49 | 
 50 |   for (n in 1:N) {
 51 |     theta[n,1] = exp(
 52 |       adj_h_eff
 53 |       + att[team1[n]] + def[team2[n]]
 54 |       + (gamma/2) * (ranking[instants_rank[n], team1[n]]
 55 |                      - ranking[instants_rank[n], team2[n]])
 56 |     );
 57 |     theta[n,2] = exp(
 58 |       att[team2[n]] + def[team1[n]]
 59 |       - (gamma/2) * (ranking[instants_rank[n], team1[n]]
 60 |                      - ranking[instants_rank[n], team2[n]])
 61 |     );
 62 |   }
 63 | }
 64 | 
 65 | model {
 66 |   // 1) Priors on team abilities (via target+=)
 67 |   for (t in 1:nteams) {
 68 |     if (prior_dist_num == 1) {
 69 |       target += normal_lpdf(att_raw[t]   | hyper_location, sigma_att);
 70 |       target += normal_lpdf(def_raw[t]   | hyper_location, sigma_def);
 71 |     } else if (prior_dist_num == 2) {
 72 |       target += student_t_lpdf(att_raw[t] | hyper_df, hyper_location, sigma_att);
 73 |       target += student_t_lpdf(def_raw[t] | hyper_df, hyper_location, sigma_def);
 74 |     } else if (prior_dist_num == 3) {
 75 |       target += cauchy_lpdf(att_raw[t]   | hyper_location, sigma_att);
 76 |       target += cauchy_lpdf(def_raw[t]   | hyper_location, sigma_def);
 77 |     } else {
 78 |       target += double_exponential_lpdf(att_raw[t] | hyper_location, sigma_att);
 79 |       target += double_exponential_lpdf(def_raw[t] | hyper_location, sigma_def);
 80 |     }
 81 |   }
 82 | 
 83 |   // 2) Priors on sigma_att, sigma_def
 84 |   if (prior_dist_sd_num == 1) {
 85 |     target += normal_lpdf(sigma_att | hyper_sd_location, hyper_sd_scale);
 86 |     target += normal_lpdf(sigma_def | hyper_sd_location, hyper_sd_scale);
 87 |   } else if (prior_dist_sd_num == 2) {
 88 |     target += student_t_lpdf(sigma_att | hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 89 |     target += student_t_lpdf(sigma_def | hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 90 |   } else if (prior_dist_sd_num == 3) {
 91 |     target += cauchy_lpdf(sigma_att | hyper_sd_location, hyper_sd_scale);
 92 |     target += cauchy_lpdf(sigma_def | hyper_sd_location, hyper_sd_scale);
 93 |   } else {
 94 |     target += double_exponential_lpdf(sigma_att | hyper_sd_location, hyper_sd_scale);
 95 |     target += double_exponential_lpdf(sigma_def | hyper_sd_location, hyper_sd_scale);
 96 |   }
 97 | 
 98 |   // 3) Priors on fixed effects
 99 |   target += normal_lpdf(home  | mean_home, sd_home);
100 |   target += normal_lpdf(gamma | 0, 1);
101 | 
102 |   // 4) Priors on NB dispersion
103 |   target += normal_lpdf(phi1 | 0, 5);
104 |   target += normal_lpdf(phi2 | 0, 5);
105 | 
106 |   // 5) Likelihood: marginal NB2 for each margin + shared latent component
107 |   for (n in 1:N) {
108 |     target += neg_binomial_2_lpmf(y[n,1] | theta[n,1], phi1);
109 |     target += neg_binomial_2_lpmf(y[n,2] | theta[n,2], phi2);
110 |   }
111 | }
112 | 
113 | generated quantities {
114 |   array[N,2] int y_rep;
115 |   array[N_prev,2] int y_prev;
116 |   array[N_prev] vector[2] theta_prev;
117 |   vector[N] log_lik;
118 |   array[N] int diff_y_rep;
119 | 
120 |   for (n in 1:N) {
121 |     y_rep[n,1]    = neg_binomial_2_rng(theta[n,1], phi1);
122 |     y_rep[n,2]    = neg_binomial_2_rng(theta[n,2], phi2);
123 |     diff_y_rep[n] = y_rep[n,1] - y_rep[n,2];
124 | 
125 |     log_lik[n] = neg_binomial_2_lpmf(y[n,1] | theta[n,1], phi1)
126 |                + neg_binomial_2_lpmf(y[n,2] | theta[n,2], phi2);
127 |   }
128 | 
129 |   //out-of-sample predictions
130 |   if (N_prev > 0) {
131 |     for (n in 1:N_prev){
132 |       theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+
133 |                             def[team2_prev[n]]+
134 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
135 |       theta_prev[n,2] = exp(att[team2_prev[n]]+
136 |                             def[team1_prev[n]]-
137 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
138 |       y_prev[n,1] = neg_binomial_2_rng(theta_prev[n,1], phi1);
139 |       y_prev[n,2] = neg_binomial_2_rng(theta_prev[n,2], phi2);
140 |     }
141 |   }
142 | }
143 | 


--------------------------------------------------------------------------------
/man/mle_foot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/mle_foot.R
  3 | \name{mle_foot}
  4 | \alias{mle_foot}
  5 | \title{Fit football models with Maximum Likelihood}
  6 | \usage{
  7 | mle_foot(
  8 |   data,
  9 |   model,
 10 |   predict = 0,
 11 |   maxit = 1000,
 12 |   method = "BFGS",
 13 |   interval = "profile",
 14 |   hessian = FALSE,
 15 |   sigma_y = 1
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{data}{A data frame containing match data with columns:
 20 | \itemize{
 21 |   \item \code{periods}:  Time point of each observation (integer >= 1).
 22 |   \item \code{home_team}: Home team's name (character string).
 23 |   \item \code{away_team}: Away team's name (character string).
 24 |   \item \code{home_goals}: Goals scored by the home team (integer >= 0).
 25 |   \item \code{away_goals}: Goals scored by the away team (integer >= 0).
 26 | }}
 27 | 
 28 | \item{model}{A character specifying the model to fit. Options are:
 29 | \itemize{
 30 |   \item \code{"double_pois"}: Double Poisson model.
 31 |   \item \code{"biv_pois"}: Bivariate Poisson model.
 32 |   \item \code{"skellam"}: Skellam model.
 33 |   \item \code{"student_t"}: Student's t model.
 34 |   }}
 35 | 
 36 | \item{predict}{An integer specifying the number of out-of-sample matches for prediction. If missing, the function fits the model to the entire dataset without making predictions.}
 37 | 
 38 | \item{maxit}{An integer specifying the maximum number of optimizer iterations  default is 1000).}
 39 | 
 40 | \item{method}{A character specifying the optimization method. Options are
 41 |   \itemize{
 42 |     \item \code{"Nelder-Mead"}.
 43 |     \item \code{"BFGS"} (default).
 44 |     \item \code{"CG"}.
 45 |     \item \code{"L-BFGS-B"}.
 46 |     \item \code{"SANN"}.
 47 |     \item \code{"Brent"}.
 48 |   }
 49 | For further details see \code{{optim}} function in \code{\link[stats]{stats}} package.}
 50 | 
 51 | \item{interval}{A character specifying the interval type for confidence intervals. Options are
 52 | \itemize{
 53 |   \item \code{"profile"} (default).
 54 |   \item \code{"Wald"}.
 55 |   }}
 56 | 
 57 | \item{hessian}{A logical value indicating to include the computation of the Hessian (default FALSE).}
 58 | 
 59 | \item{sigma_y}{A positive numeric value indicating the scale parameter for Student t likelihood (default 1).}
 60 | }
 61 | \value{
 62 | A named list containing:
 63 | \itemize{
 64 |   \item{\code{att}}: A matrix of attack ratings, with MLE and 95\% confidence intervals (for \code{"double_pois"}, \code{"biv_pois"} and \code{"skellam"} models).
 65 |   \item{\code{def}}: A matrix of defence ratings, with MLE and 95\% confidence intervals (for \code{"double_pois"}, \code{"biv_pois"} and \code{"skellam"} models).
 66 |   \item{\code{abilities}}: A matrix of combined ability, with MLE and 95\% confidence intervals (for \code{"student_t"} only).
 67 |   \item{\code{home_effect}}: A matrix with with MLE and 95\% confidence intervals for the home effect estimate.
 68 |   \item{\code{corr}}: A matrix with MLE and 95\% confidence intervals for the bivariate Poisson correlation parameter (for \code{"biv_pois"} only).
 69 |   \item{\code{model}}: The name of the fitted model (character).
 70 |   \item{\code{predict}}: The number of out-of-sample matches used for prediction (integer).
 71 |   \item{\code{sigma_y}}: The scale parameter used in the Student t likelihood (for \code{"student_t"} only).
 72 |   \item{\code{team1_prev}}: Integer indices of home teams in the out-of-sample matches (if \code{predict > 0}).
 73 |   \item{\code{team2_prev}}: Integer indices of away teams in the out-of-sample matches (if \code{predict > 0}).
 74 |   \item{\code{logLik}}: The maximized log likelihood (numeric).
 75 |   \item{\code{aic}}: Akaike Information Criterion (numeric).
 76 |   \item{\code{bic}}: Bayesian Information Criterion (numeric).
 77 | }
 78 | }
 79 | \description{
 80 | Fits football goal-based models using maximum likelihood estimation.
 81 | Supported models include: double Poisson, bivariate Poisson, Skellam, and Student's t.
 82 | }
 83 | \details{
 84 | MLE can be obtained only for static models, with no time-dependence.
 85 | Likelihood optimization is performed via the \code{BFGS} method
 86 | of the \code{{optim}} function in \code{\link[stats]{stats}} package.
 87 | }
 88 | \examples{
 89 | \dontrun{
 90 | library(dplyr)
 91 | 
 92 | data("italy")
 93 | italy <- as_tibble(italy)
 94 | italy_2000_2002 <- italy \%>\%
 95 |   dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\%
 96 |   dplyr::filter(Season == "2000" | Season == "2001" | Season == "2002")
 97 | 
 98 | colnames(italy_2000_2002) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
 99 | 
100 | mle_fit <- mle_foot(
101 |   data = italy_2000_2002,
102 |   model = "double_pois"
103 | )
104 | }
105 | 
106 | }
107 | \references{
108 | Baio, G. and Blangiardo, M. (2010). Bayesian hierarchical model for the prediction of football
109 | results. Journal of Applied Statistics 37(2), 253-264.
110 | 
111 | Egidi, L., Pauli, F., and Torelli, N. (2018). Combining historical data
112 | and bookmakers' odds in modelling football scores. Statistical Modelling, 18(5-6), 436-459.
113 | 
114 | Gelman, A. (2014). Stan goes to the World Cup. From
115 | "Statistical Modeling, Causal Inference, and Social Science" blog.
116 | 
117 | Karlis, D. and Ntzoufras, I. (2003). Analysis of sports data by using bivariate poisson models.
118 | Journal of the Royal Statistical Society: Series D (The Statistician) 52(3), 381-393.
119 | 
120 | Karlis, D. and Ntzoufras,I. (2009).  Bayesian modelling of football outcomes: Using
121 | the Skellam's distribution for the goal difference. IMA Journal of Management Mathematics 20(2), 133-145.
122 | 
123 | Owen, A. (2011). Dynamic Bayesian forecasting models
124 | of football match outcomes with estimation of the
125 | evolution variance parameter. IMA Journal of Management Mathematics, 22(2), 99-113.
126 | }
127 | \author{
128 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
129 | }
130 | 


--------------------------------------------------------------------------------
/src/stan/skellam.stan:
--------------------------------------------------------------------------------
  1 | functions{
  2 |       real skellam_lpmf(int k, real lambda1, real lambda2) {
  3 |         //real r = k;
  4 |         return -(lambda1 + lambda2) + (k/2) * log(lambda1/lambda2) +
  5 |           log_modified_bessel_first_kind(abs(k), 2 * sqrt(lambda1 * lambda2));
  6 |       }
  7 |     }
  8 |     data{
  9 |       int N;
 10 |       int<lower=0> N_prev;
 11 |       array[N] int diff_y;
 12 |       int nteams;
 13 |       array[N] int team1;
 14 |       array[N] int team2;
 15 |       array[N_prev] int team1_prev;
 16 |       array[N_prev] int team2_prev;
 17 |       array[N] int instants_rank;
 18 |       int ntimes_rank;                 // dynamic periods for ranking
 19 |       matrix[ntimes_rank,nteams] ranking;      // eventual fifa/uefa ranking
 20 |       int<lower=0, upper=1> ind_home;
 21 |       real mean_home;              // Mean for home effect
 22 |       real<lower=1e-8> sd_home;      // Standard deviation for home effect
 23 | 
 24 |       // priors part
 25 |       int<lower=1,upper=4> prior_dist_num;    // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 26 |       int<lower=1,upper=4> prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 27 | 
 28 |       real<lower=0> hyper_df;
 29 |       real hyper_location;
 30 | 
 31 |       real<lower=0> hyper_sd_df;
 32 |       real hyper_sd_location;
 33 |       real<lower=1e-8> hyper_sd_scale;
 34 |     }
 35 |     parameters{
 36 |       vector[nteams] att_raw;
 37 |       vector[nteams] def_raw;
 38 |       real<lower=1e-8> sigma_att;
 39 |       real<lower=1e-8> sigma_def;
 40 |       real home;
 41 |       real gamma;
 42 |     }
 43 |     transformed parameters{
 44 |       real adj_h_eff;                   // Adjusted home effect
 45 |       vector[nteams] att;
 46 |       vector[nteams] def;
 47 |       array[N,2] real theta;
 48 | 
 49 |       for (t in 1:nteams){
 50 |         att[t] = att_raw[t]-mean(att_raw);
 51 |         def[t] = def_raw[t]-mean(def_raw);
 52 |       }
 53 | 
 54 |       adj_h_eff = home * ind_home;
 55 | 
 56 |       for (n in 1:N){
 57 |         theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+
 58 |                          (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
 59 |         theta[n,2] = exp(att[team2[n]]+def[team1[n]]-
 60 |                          (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
 61 |       }
 62 |     }
 63 |     model{
 64 |       // log-priors for team-specific abilities
 65 |       for (t in 1:(nteams)){
 66 |         if (prior_dist_num == 1){
 67 |           target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att);
 68 |           target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def);
 69 |         }
 70 |         else if (prior_dist_num == 2){
 71 |           target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att);
 72 |           target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def);
 73 |         }
 74 |         else if (prior_dist_num == 3){
 75 |           target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att);
 76 |           target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def);
 77 |         }
 78 |         else if (prior_dist_num == 4){
 79 |           target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att);
 80 |           target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def);
 81 |         }
 82 |       }
 83 | 
 84 | 
 85 |       // log-hyperpriors for sd parameters
 86 |       if (prior_dist_sd_num == 1 ){
 87 |         target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
 88 |         target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
 89 |       }
 90 |       else if (prior_dist_sd_num == 2){
 91 |         target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 92 |         target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
 93 |       }
 94 |       else if (prior_dist_sd_num == 3){
 95 |         target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
 96 |         target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
 97 |       }
 98 |       else if (prior_dist_sd_num == 4){
 99 |         target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
100 |         target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
101 |       }
102 | 
103 |       // log-priors fixed effects
104 |       target+=normal_lpdf(home|mean_home,sd_home);
105 |       target+=normal_lpdf(gamma|0,1);
106 | 
107 | 
108 |       // likelihood
109 |       for (n in 1:N){
110 |         target+=skellam_lpmf(diff_y[n]| theta[n,1],theta[n,2]);
111 |       }
112 |     }
113 |     generated quantities{
114 |       array[N,2] int y_rep;
115 |       array[N] int diff_y_rep;
116 |       vector[N] log_lik;
117 |       array[N_prev,2] int y_prev;
118 |       array[N_prev] vector[2] theta_prev;
119 |       array[N_prev] int diff_y_prev;
120 | 
121 |       //in-sample replications
122 |       for (n in 1:N){
123 |         y_rep[n,1] = poisson_rng(theta[n,1]);
124 |         y_rep[n,2] = poisson_rng(theta[n,2]);
125 |         diff_y_rep[n] = y_rep[n,1] - y_rep[n,2];
126 |         log_lik[n] =skellam_lpmf(diff_y[n]| theta[n,1], theta[n,2]);
127 |       }
128 |       //out-of-sample predictions
129 |       if (N_prev > 0) {
130 |       for (n in 1:N_prev){
131 |           theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+
132 |                                   def[team2_prev[n]]+
133 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
134 |           theta_prev[n,2] = exp(att[team2_prev[n]]+
135 |                                   def[team1_prev[n]]-
136 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
137 |           y_prev[n,1] = poisson_rng(theta_prev[n,1]);
138 |           y_prev[n,2] = poisson_rng(theta_prev[n,2]);
139 |           diff_y_prev[n] = y_prev[n,1] - y_prev[n,2];
140 |         }
141 |       }
142 |     }
143 | 


--------------------------------------------------------------------------------
/man/btd_foot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/btd_foot.R
  3 | \name{btd_foot}
  4 | \alias{btd_foot}
  5 | \title{Bayesian Bradley-Terry-Davidson Model}
  6 | \usage{
  7 | btd_foot(
  8 |   data,
  9 |   dynamic_rank = FALSE,
 10 |   home_effect = FALSE,
 11 |   prior_par = list(logStrength = normal(0, 3), logTie = normal(0, 0.3), home = normal(0,
 12 |     5)),
 13 |   rank_measure = "median",
 14 |   method = "MCMC",
 15 |   ...
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{data}{A data frame containing the observations with columns:
 20 | \itemize{
 21 |   \item \code{periods}: Time point of each observation (integer >= 1).
 22 |   \item \code{home_team}: Home team's name (character string).
 23 |   \item \code{away_team}: Away team's name (character string).
 24 |   \item \code{match_outcome}: Outcome (1 if home team beats away team, 2 for tie, and 3 if away team beats home team).
 25 | }
 26 | The data frame must not contain missing values.}
 27 | 
 28 | \item{dynamic_rank}{A logical value indicating whether a dynamic ranking model is used (default is \code{FALSE}).}
 29 | 
 30 | \item{home_effect}{A logical value indicating the inclusion of a home effect in the model. (default is \code{FALSE}).}
 31 | 
 32 | \item{prior_par}{A list specifying the prior distributions for the parameters of interest, using the \code{normal} function:
 33 | \itemize{
 34 |   \item \code{logStrength}: Prior for the team log-strengths. Default is \code{normal(0, 3)}.
 35 |   \item \code{logTie}: Prior for the tie parameter. Default is \code{normal(0, 0.3)}.
 36 |   \item \code{home}: Prior for the home effect (\code{home}). Applicable only if \code{home_effect = TRUE}. Default is \code{normal(0, 5)}.
 37 | }
 38 | Only normal priors are allowed for this model.}
 39 | 
 40 | \item{rank_measure}{A character string specifying the method used to summarize the posterior distributions of the team strengths. Options are:
 41 | \itemize{
 42 |   \item \code{"median"}: Uses the median of the posterior samples (default).
 43 |   \item \code{"mean"}: Uses the mean of the posterior samples.
 44 |   \item \code{"map"}: Uses the Maximum A Posteriori estimate, calculated as the mode of the posterior distribution.
 45 | }}
 46 | 
 47 | \item{method}{A character string specifying the method used to obtain the Bayesian estimates. Options are:
 48 | \itemize{
 49 |   \item \code{"MCMC"}: Markov chain Monte Carlo algorithm (default).
 50 |   \item \code{"VI"}: Automatic differentiation variational inference algorithms.
 51 |   \item \code{"pathfinder"}: Pathfinder variational inference algorithm.
 52 |   \item \code{"laplace"}: Laplace algorithm.
 53 | }}
 54 | 
 55 | \item{...}{Additional arguments passed to \code{\link[cmdstanr]{cmdstanr}} (e.g., \code{iter_sampling}, \code{chains}, \code{parallel_chains}).}
 56 | }
 57 | \value{
 58 | An object of class \code{"btdFoot"}, which is a list containing:
 59 |   \itemize{
 60 |     \item \code{fit}: The fitted \code{CmdStanFit} object returned by \code{\link[cmdstanr]{cmdstanr}}.
 61 |     \item \code{rank}: A data frame with the rankings, including columns:
 62 |       \itemize{
 63 |         \item \code{periods}: The time period.
 64 |         \item \code{team}: The team name.
 65 |         \item \code{rank_points}: The estimated strength of the team based on the chosen \code{rank_measure}.
 66 |       }
 67 |     \item \code{data}: The input data.
 68 |     \item \code{stan_data}: The data list passed to Stan.
 69 |     \item \code{stan_code}: The Stan code of the underline model.
 70 |     \item \code{stan_args}: The optional \code{\link[cmdstanr]{cmdstanr}} parameters passed to (\code{...}).
 71 |     \item \code{rank_measure}: The summary statistic used to compute the rankings.
 72 |     \item \code{alg_method}: The inference algorithm used to obtain the Bayesian estimates.
 73 |   }
 74 | }
 75 | \description{
 76 | Fits a Bayesian Bradley-Terry-Davidson model using Stan. Supports both static and dynamic ranking models, allowing for the estimation of team strengths over time.
 77 | }
 78 | \examples{
 79 | \dontrun{
 80 | if (instantiate::stan_cmdstan_exists()) {
 81 |   library(dplyr)
 82 | 
 83 |   data("italy")
 84 | 
 85 |   italy_2020_2021 <- italy \%>\%
 86 |     dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\%
 87 |     dplyr::filter(Season == "2020" | Season == "2021") \%>\%
 88 |     dplyr::mutate(match_outcome = dplyr::case_when(
 89 |       hgoal > vgoal ~ 1, # Home team wins
 90 |       hgoal == vgoal ~ 2, # Draw
 91 |       hgoal < vgoal ~ 3 # Away team wins
 92 |     )) \%>\%
 93 |     dplyr::mutate(periods = dplyr::case_when(
 94 |       dplyr::row_number() <= 190 ~ 1,
 95 |       dplyr::row_number() <= 380 ~ 2,
 96 |       dplyr::row_number() <= 570 ~ 3,
 97 |       TRUE ~ 4
 98 |     )) \%>\% # Assign periods based on match number
 99 |     dplyr::select(periods,
100 |       home_team = home,
101 |       away_team = visitor, match_outcome
102 |     )
103 | 
104 |   # Dynamic Ranking Example with Median Rank Measure
105 |   fit_result_dyn <- btd_foot(
106 |     data = italy_2020_2021,
107 |     dynamic_rank = TRUE,
108 |     home_effect = TRUE,
109 |     prior_par = list(
110 |       logStrength = normal(0, 10),
111 |       logTie = normal(0, 5),
112 |       home = normal(0, 5)
113 |     ),
114 |     rank_measure = "median",
115 |     iter_sampling = 1000,
116 |     parallel_chains = 2,
117 |     chains = 2
118 |   )
119 | 
120 |   print(fit_result_dyn)
121 | 
122 |   print(fit_result_dyn, pars = c("logStrength", "home"), teams = c("AC Milan", "AS Roma"))
123 | 
124 |   # Static Ranking Example with MAP Rank Measure
125 |   fit_result_stat <- btd_foot(
126 |     data = italy_2020_2021,
127 |     dynamic_rank = FALSE,
128 |     prior_par = list(
129 |       logStrength = normal(0, 10),
130 |       logTie = normal(0, 5),
131 |       home = normal(0, 5)
132 |     ),
133 |     rank_measure = "map",
134 |     iter_sampling = 1000,
135 |     parallel_chains = 2,
136 |     chains = 2
137 |   )
138 | 
139 |   print(fit_result_stat)
140 | }
141 | }
142 | }
143 | \author{
144 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}.
145 | }
146 | 


--------------------------------------------------------------------------------
/R/foot_priors.R:
--------------------------------------------------------------------------------
  1 | #' Football priors distributions and options
  2 | #'
  3 | #' @name priors
  4 | #' @description This prior specification is just a duplicate
  5 | #' of some of the priors used by the \pkg{rstanarm} package.
  6 | #'
  7 | #' These prior distributions can be passed to the
  8 | #' \code{stan_foot} function, through the arguments \code{prior} and \code{prior_sd}.
  9 | #' See  the vignette \href{http://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior
 10 | #'   Distributions for rstanarm Models}} for further details (to view the priors used for an existing model see
 11 | #'   \href{https://mc-stan.org/rstanarm/reference/prior_summary.stanreg.html}{prior_summary}).
 12 | #'   The default priors used in the \pkg{stan_foot} modeling function
 13 | #'   are intended to be \emph{weakly informative} in that they provide moderate
 14 | #'   regularlization and help stabilize computation.
 15 | #'
 16 | #' You can choose between: \code{normal}, \code{cauchy}, \code{laplace}, \code{student_t}.
 17 | #'
 18 | #' @param location Prior location. In most cases, this is the prior mean, but
 19 | #'   for \code{cauchy} (which is equivalent to \code{student_t} with
 20 | #'   \code{df=1}), the mean does not exist and \code{location} is the prior
 21 | #'   median. The default value is \eqn{0}.
 22 | #' @param scale Prior scale. The default depends on the family (see
 23 | #'   \strong{Details}).
 24 | #' @param df Prior degrees of freedom. The default is \eqn{1} for
 25 | #'   \code{student_t}, in which case it is equivalent to \code{cauchy}.
 26 | #' @param autoscale A logical scalar, defaulting to \code{TRUE}.
 27 | #'
 28 | #' @details The details depend on the family of the prior being used:
 29 | #' \subsection{Student t family}{
 30 | #'   Family members:
 31 | #'   \itemize{
 32 | #'   \item \code{normal(location, scale)}
 33 | #'   \item \code{student_t(df, location, scale)}
 34 | #'   \item \code{cauchy(location, scale)}
 35 | #'   }
 36 | #'   Each of these functions also takes an argument \code{autoscale}.
 37 | #'
 38 | #'   For the prior distribution for the intercept, \code{location},
 39 | #'   \code{scale}, and \code{df} should be scalars. For the prior for the other
 40 | #'   coefficients they can either be vectors of length equal to the number of
 41 | #'   coefficients (not including the intercept), or they can be scalars, in
 42 | #'   which case they will be recycled to the appropriate length. As the
 43 | #'   degrees of freedom approaches infinity, the Student t distribution
 44 | #'   approaches the normal distribution and if the degrees of freedom are one,
 45 | #'   then the Student t distribution is the Cauchy distribution.
 46 | #'
 47 | #'   If \code{scale} is not specified it will default to \eqn{10} for the
 48 | #'   intercept and \eqn{2.5} for the other coefficients.
 49 | #'
 50 | #'   If the \code{autoscale} argument is \code{TRUE} (the default), then the
 51 | #'   scales will be further adjusted as described above in the documentation of
 52 | #'   the \code{autoscale} argument in the \strong{Arguments} section.
 53 | #' }
 54 | #'
 55 | #' \subsection{Laplace family}{
 56 | #'   Family members:
 57 | #'   \itemize{
 58 | #'   \item \code{laplace(location, scale)}
 59 | #'   }
 60 | #'   Each of these functions also takes an argument \code{autoscale}.
 61 | #'
 62 | #'   The Laplace distribution is also known as the double-exponential
 63 | #'   distribution. It is a symmetric distribution with a sharp peak at its mean
 64 | #'   / median / mode and fairly long tails. This distribution can be motivated
 65 | #'   as a scale mixture of normal distributions and the remarks above about the
 66 | #'   normal distribution apply here as well.
 67 | #'
 68 | #' }
 69 | #'
 70 | #' @return A named list to be used internally by the
 71 | #'  \code{stan_foot} model fitting function.
 72 | #'
 73 | #' @seealso The various vignettes for the \pkg{rstanarm} package also discuss
 74 | #'   and demonstrate the use of some of the supported prior distributions.
 75 | #'
 76 | #' @author Leonardo Egidi \email{legidi@units.it}
 77 | #'
 78 | #' @references
 79 | #' Gelman, A., Jakulin, A., Pittau, M. G., and Su, Y. (2008). A weakly
 80 | #' informative default prior distribution for logistic and other regression
 81 | #' models. \emph{Annals of Applied Statistics}. 2(4), 1360--1383.
 82 | #'
 83 | #'
 84 | #' @rdname priors
 85 | #' @export
 86 | #'
 87 | #'
 88 | 
 89 | normal <- function(location = 0, scale = NULL, autoscale = TRUE) {
 90 |   validate_parameter_value(scale)
 91 |   list(
 92 |     dist = "normal", df = NA,
 93 |     location = location,
 94 |     scale = scale,
 95 |     autoscale = autoscale
 96 |   )
 97 | }
 98 | 
 99 | #' @rdname priors
100 | #' @export
101 | student_t <- function(df = 1, location = 0, scale = NULL, autoscale = TRUE) {
102 |   validate_parameter_value(scale)
103 |   validate_parameter_value(df)
104 |   list(
105 |     dist = "t", df = df,
106 |     location = location,
107 |     scale = scale,
108 |     autoscale = autoscale
109 |   )
110 | }
111 | 
112 | #' @rdname priors
113 | #' @export
114 | cauchy <- function(location = 0, scale = NULL, autoscale = TRUE) {
115 |   student_t(
116 |     df = 1, location = location,
117 |     scale = scale,
118 |     autoscale = autoscale
119 |   )
120 | }
121 | 
122 | #' @rdname priors
123 | #' @export
124 | laplace <- function(location = 0, scale = NULL, autoscale = TRUE) {
125 |   list(
126 |     dist = "laplace", df = NA,
127 |     location = location,
128 |     scale = scale,
129 |     autoscale = autoscale
130 |   )
131 | }
132 | 
133 | 
134 | # internal ----------------------------------------------------------------
135 | 
136 | # Check for positive scale or df parameter (NULL ok)
137 | #
138 | # @param x The value to check.
139 | # @return Either an error is thrown or \code{TRUE} is returned invisibly.
140 | validate_parameter_value <- function(x) {
141 |   nm <- deparse(substitute(x))
142 |   if (!is.null(x)) {
143 |     if (!is.numeric(x)) {
144 |       stop(nm, " should be NULL or numeric", call. = FALSE)
145 |     }
146 |     if (any(x <= 0)) {
147 |       stop(nm, " should be positive", call. = FALSE)
148 |     }
149 |   }
150 |   invisible(TRUE)
151 | }
152 | 
153 | # An nlist object is an S3 class list of uniquely
154 | # named  numeric elements.
155 | 
156 | nlist <- function(...) {
157 |   args <- list(...)
158 |   if (length(args)) {
159 |     return(as_nlist(args))
160 |   }
161 |   structure(list(), .Names = character(0), class = "nlist")
162 | }
163 | 


--------------------------------------------------------------------------------
/tests/testthat/test_pp_foot.R:
--------------------------------------------------------------------------------
  1 | ## all the test PASSED (also the skipped ones!)
  2 | 
  3 | #   ____________________________________________________________________________
  4 | #   Test for aggregated type                                                ####
  5 | 
  6 | test_that("pp_foot returns proper output for aggregated type", {
  7 |   skip_on_cran()
  8 |   skip_if_not(stan_cmdstan_exists())
  9 | 
 10 |   ##  ............................................................................
 11 |   ##  Data                                                                    ####
 12 | 
 13 |   data("england")
 14 |   england <- as.data.frame(england)
 15 | 
 16 |   # One season only
 17 |   england_2004 <- england %>%
 18 |     dplyr::filter(division == 1) %>%
 19 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
 20 |     dplyr::filter(Season == "2004")
 21 | 
 22 |   colnames(england_2004) <- c(
 23 |     "periods", "home_team", "away_team",
 24 |     "home_goals", "away_goals"
 25 |   )
 26 | 
 27 |   ##  ............................................................................
 28 |   ##  Tests                                                                   ####
 29 | 
 30 |   model <- stan_foot(
 31 |     data = england_2004,
 32 |     model = "double_pois",
 33 |     iter_sampling = 200,
 34 |     chains = 2,
 35 |     seed = 433
 36 |   )
 37 | 
 38 |   result <- pp_foot(object = model, data = england_2004, type = "aggregated")
 39 | 
 40 |   # Expect output to be a list with components "pp_plot" and "pp_table"
 41 |   expect_type(result, "list")
 42 |   expect_true("pp_plot" %in% names(result))
 43 |   expect_true("pp_table" %in% names(result))
 44 | 
 45 |   # Check that the plot is a ggplot object and table is a data frame
 46 |   expect_s3_class(result$pp_plot, "ggplot")
 47 |   expect_true(is.data.frame(result$pp_table))
 48 | 
 49 |   # Check that the table has the expected column names
 50 |   expect_equal(names(result$pp_table), c("goal diff.", "Bayesian p-value"))
 51 | })
 52 | 
 53 | 
 54 | 
 55 | #   ____________________________________________________________________________
 56 | #   Test for matches type                                                   ####
 57 | 
 58 | test_that("pp_foot returns proper output for matches type", {
 59 |   skip_on_cran()
 60 |   skip_if_not(stan_cmdstan_exists())
 61 | 
 62 |   ##  ............................................................................
 63 |   ##  Data                                                                    ####
 64 | 
 65 |   data("england")
 66 |   england <- as.data.frame(england)
 67 | 
 68 |   # One season only
 69 |   england_2004 <- england %>%
 70 |     dplyr::filter(division == 1) %>%
 71 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
 72 |     dplyr::filter(Season == "2004")
 73 | 
 74 |   colnames(england_2004) <- c(
 75 |     "periods", "home_team", "away_team",
 76 |     "home_goals", "away_goals"
 77 |   )
 78 | 
 79 |   ##  ............................................................................
 80 |   ##  Tests                                                                   ####
 81 | 
 82 |   model <- stan_foot(
 83 |     data = england_2004,
 84 |     model = "double_pois",
 85 |     iter_sampling = 200,
 86 |     chains = 2,
 87 |     seed = 433
 88 |   )
 89 | 
 90 |   result <- pp_foot(object = model, data = england_2004, type = "matches", coverage = 0.95)
 91 | 
 92 |   expect_type(result, "list")
 93 |   expect_true("pp_plot" %in% names(result))
 94 |   expect_true("pp_table" %in% names(result))
 95 |   expect_s3_class(result$pp_plot, "ggplot")
 96 |   expect_true(is.data.frame(result$pp_table))
 97 | 
 98 |   # The table for matches should have columns "1-alpha" and "emp. coverage"
 99 |   expect_equal(names(result$pp_table), c("1-alpha", "emp. coverage"))
100 | })
101 | 
102 | 
103 | 
104 | #   ____________________________________________________________________________
105 | #   Test for missing required columns in data                               ####
106 | 
107 | 
108 | test_that("pp_foot errors if required columns are missing", {
109 |   skip_on_cran()
110 |   skip_if_not(stan_cmdstan_exists())
111 | 
112 |   ##  ............................................................................
113 |   ##  Data                                                                    ####
114 | 
115 |   data("england")
116 |   england <- as.data.frame(england)
117 | 
118 |   # One season only
119 |   england_2004 <- england %>%
120 |     dplyr::filter(division == 1) %>%
121 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
122 |     dplyr::filter(Season == "2004")
123 | 
124 |   colnames(england_2004) <- c(
125 |     "periods", "home_team", "away_team",
126 |     "home_goals", "away_goals"
127 |   )
128 | 
129 |   ##  ............................................................................
130 |   ##  Tests                                                                   ####
131 | 
132 |   model <- stan_foot(
133 |     data = england_2004,
134 |     model = "double_pois",
135 |     iter_sampling = 200,
136 |     chains = 2,
137 |     seed = 433
138 |   )
139 |   # Remove the 'periods' column
140 |   data_missing <- england_2004[, -1]
141 |   expect_error(pp_foot(object = model, data = data_missing),
142 |                "data is missing required columns:")
143 | })
144 | 
145 | 
146 | #   ____________________________________________________________________________
147 | #   Tests on input and different models                                     ####
148 | 
149 | 
150 | test_that("expect error in input", {
151 |   skip_on_cran()
152 |   skip_if_not(stan_cmdstan_exists())
153 | 
154 |   ##  ............................................................................
155 |   ##  Data                                                                    ####
156 | 
157 |   data("england")
158 |   england <- as.data.frame(england)
159 | 
160 |   # One season only
161 |   england_2004 <- england %>%
162 |     dplyr::filter(division == 1) %>%
163 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
164 |     dplyr::filter(Season == "2004")
165 | 
166 |   colnames(england_2004) <- c(
167 |     "periods", "home_team", "away_team",
168 |     "home_goals", "away_goals"
169 |   )
170 | 
171 |   ##  ............................................................................
172 |   ##  Tests                                                                   ####
173 | 
174 |   # Wrong model class
175 |   model <- mle_foot(england_2004, "double_pois")
176 |   expect_error(pp_foot(object = model, data = england_2004))
177 | 
178 |   # CmdStan model class
179 |   model <- stan_foot(
180 |     data = england_2004,
181 |     model = "student_t",
182 |     iter_sampling = 200,
183 |     chains = 2,
184 |     seed = 433
185 |   )
186 |   expect_error(pp_foot(object = model$fit, data = england_2004), NA)
187 | })
188 | 
189 | 


--------------------------------------------------------------------------------
/src/stan/biv_pois.stan:
--------------------------------------------------------------------------------
  1 | functions{
  2 | 
  3 |       real bipois_lpmf(array[] int r , real mu1,real mu2,real mu3) {
  4 |         real ss;
  5 |         real log_s;
  6 |         real mus;
  7 |         int  miny;
  8 | 
  9 |         miny = min(r[1], r[2]);
 10 | 
 11 |         ss = poisson_lpmf(r[1] | mu1) + poisson_lpmf(r[2] | mu2) -
 12 |           exp(mu3);
 13 |         if(miny > 0) {
 14 |           mus = -mu1-mu2+mu3;
 15 |           log_s = ss;
 16 | 
 17 |           for(k in 1:miny) {
 18 |             log_s = log_s + log(r[1] - k + 1) + mus
 19 |             + log(r[2] - k + 1)
 20 |             - log(k);
 21 |             ss = log_sum_exp(ss, log_s);
 22 |           }
 23 |         }
 24 |         return(ss);
 25 |       }
 26 | 
 27 |     }
 28 |     data{
 29 |       int N;   // number of games
 30 |       int<lower=0> N_prev;
 31 |       array[N,2] int y;
 32 |       int nteams;
 33 |       array[N] int instants_rank;
 34 |       int ntimes_rank;                 // dynamic periods for ranking
 35 |       array[N] int team1;
 36 |       array[N] int team2;
 37 |       array[N_prev]int team1_prev;
 38 |       array[N_prev] int team2_prev;
 39 |       matrix[ntimes_rank,nteams] ranking;
 40 |       int<lower=0, upper=1> ind_home;
 41 |       real mean_home;              // Mean for home effect
 42 |       real<lower=1e-8> sd_home;      // Standard deviation for home effect
 43 | 
 44 |       // priors part
 45 |       int<lower=1,upper=4> prior_dist_num;    // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 46 |       int<lower=1,upper=4> prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 47 | 
 48 |       real<lower=0> hyper_df;
 49 |       real hyper_location;
 50 | 
 51 |       real<lower=0> hyper_sd_df;
 52 |       real hyper_sd_location;
 53 |       real<lower=1e-8> hyper_sd_scale;
 54 |     }
 55 |     parameters{
 56 |       vector[nteams] att_raw;
 57 |       vector[nteams] def_raw;
 58 |       real<lower=1e-8> sigma_att;
 59 |       real<lower=1e-8> sigma_def;
 60 |       real home;
 61 |       real rho;
 62 |       real gamma;
 63 |     }
 64 |     transformed parameters{
 65 |       real adj_h_eff;                   // Adjusted home effect
 66 |       vector[nteams] att;
 67 |       vector[nteams] def;
 68 |       array[N] vector[3] theta;
 69 | 
 70 |       for (t in 1:nteams){
 71 |         att[t] = att_raw[t]-mean(att_raw);
 72 |         def[t] = def_raw[t]-mean(def_raw);
 73 |       }
 74 | 
 75 |       adj_h_eff = home * ind_home;
 76 | 
 77 |       for (n in 1:N){
 78 |         theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+
 79 |                          (gamma/2)*(ranking[instants_rank[n], team1[n]]-ranking[instants_rank[n], team2[n]]));
 80 |         theta[n,2] = exp(att[team2[n]]+def[team1[n]]-
 81 |                          (gamma/2)*(ranking[instants_rank[n], team1[n]]-ranking[instants_rank[n], team2[n]]));
 82 |         theta[n,3] = exp(rho);
 83 |       }
 84 |     }
 85 |     model{
 86 |       // log-priors for team-specific abilities
 87 |       for (t in 1:(nteams)){
 88 |         if (prior_dist_num == 1){
 89 |           target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att);
 90 |           target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def);
 91 |         }
 92 |         else if (prior_dist_num == 2){
 93 |           target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att);
 94 |           target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def);
 95 |         }
 96 |         else if (prior_dist_num == 3){
 97 |           target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att);
 98 |           target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def);
 99 |         }
100 |         else if (prior_dist_num == 4){
101 |           target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att);
102 |           target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def);
103 |         }
104 |       }
105 | 
106 | 
107 |       // log-hyperpriors for sd parameters
108 |       if (prior_dist_sd_num == 1 ){
109 |         target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
110 |         target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
111 |       }
112 |       else if (prior_dist_sd_num == 2){
113 |         target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
114 |         target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
115 |       }
116 |       else if (prior_dist_sd_num == 3){
117 |         target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
118 |         target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
119 |       }
120 |       else if (prior_dist_sd_num == 4){
121 |         target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
122 |         target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
123 |       }
124 | 
125 |       // log-priors fixed effects
126 |       target+=normal_lpdf(home|mean_home,sd_home);
127 |       target+=normal_lpdf(rho|0,1);
128 |       target+=normal_lpdf(gamma|0,1);
129 | 
130 |       // likelihood
131 |       for (n in 1:N){
132 |         //target+=bipois_lpmf(y[n,]| theta[n,1],
133 |         //                    theta[n,2], theta[n,3]);
134 |         target+=poisson_lpmf(y[n,1]| theta[n,1]+theta[n,3]);
135 |         target+=poisson_lpmf(y[n,2]| theta[n,2]+theta[n,3]);
136 |       }
137 |     }
138 |     generated quantities{
139 |       array[N,2]int y_rep;
140 |       array[N_prev,2] int y_prev;
141 |       array[N_prev] vector[3] theta_prev;
142 |       vector[N] log_lik;
143 |       array[N] int diff_y_rep;
144 | 
145 |       //in-sample replications
146 |       for (n in 1:N){
147 |         y_rep[n,1] = poisson_rng(theta[n,1]+theta[n,3]);
148 |         y_rep[n,2] = poisson_rng(theta[n,2]+theta[n,3]);
149 |         diff_y_rep[n] = y_rep[n,1] - y_rep[n,2];
150 |         log_lik[n] = poisson_lpmf(y[n,1]| theta[n,1]+theta[n,3])+
151 |                      poisson_lpmf(y[n,2]| theta[n,2]+theta[n,3]);
152 |         //bipois_lpmf(y[n,]| theta[n,1],
153 |         //                        theta[n,2], theta[n,3]);
154 |       }
155 | 
156 |       //out-of-sample predictions
157 |       if (N_prev > 0) {
158 |         for (n in 1:N_prev){
159 |           theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+
160 |                                   def[team2_prev[n]]+
161 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
162 |           theta_prev[n,2] = exp(att[team2_prev[n]]+
163 |                                   def[team1_prev[n]]-
164 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
165 |           theta_prev[n,3] = exp(rho);
166 |           y_prev[n,1] = poisson_rng(theta_prev[n,1]+theta_prev[n,3]);
167 |           y_prev[n,2] = poisson_rng(theta_prev[n,2]+theta_prev[n,3]);
168 |         }
169 |       }
170 |     }
171 | 


--------------------------------------------------------------------------------
/src/stan/zero_infl_skellam.stan:
--------------------------------------------------------------------------------
  1 | functions{
  2 |       real skellam_lpmf(int k, real lambda1, real lambda2) {
  3 |         //real r = k;
  4 |         return -(lambda1 + lambda2) + (k/2) * log(lambda1/lambda2) +
  5 |           log_modified_bessel_first_kind(abs(k), 2 * sqrt(lambda1 * lambda2));
  6 |       }
  7 | 
  8 |        real zero_infl_skellam_lpmf(int k, real lambda1, real lambda2, real p) {
  9 |     // This way is the easiest and proposed by https://github.com/Torvaney/karlis-ntzoufras-reproduction.
 10 |     // However, within model block, we propose in a comment the alternative way that Stan proposes in their documentation for zero inflated models
 11 |       real base_prob;
 12 |       real prob;
 13 |       real log_prob;
 14 | 
 15 |       base_prob = exp(skellam_lpmf(k| lambda1,lambda2));
 16 | 
 17 |       if (k== 0)
 18 |         prob = p + (1 - p) * base_prob;
 19 |       else
 20 |         prob = (1 - p) * base_prob;
 21 | 
 22 |       log_prob = log(prob);
 23 | 
 24 |       return log_prob;
 25 |     }
 26 | 
 27 | }
 28 |     data{
 29 |       int N;
 30 |       int<lower=0> N_prev;
 31 |       array[N] int diff_y;
 32 |       int nteams;
 33 |       array[N] int team1;
 34 |       array[N] int team2;
 35 |       array[N_prev] int team1_prev;
 36 |       array[N_prev] int team2_prev;
 37 |       array[N] int instants_rank;
 38 |       int ntimes_rank;                 // dynamic periods for ranking
 39 |       matrix[ntimes_rank,nteams] ranking;      // eventual fifa/uefa ranking
 40 |       int<lower=0, upper=1> ind_home;
 41 |       real mean_home;              // Mean for home effect
 42 |       real<lower=1e-8> sd_home;      // Standard deviation for home effect
 43 | 
 44 |       // priors part
 45 |       int<lower=1,upper=4> prior_dist_num;    // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 46 |       int<lower=1,upper=4> prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 47 | 
 48 |       real<lower=0> hyper_df;
 49 |       real hyper_location;
 50 | 
 51 |       real<lower=0> hyper_sd_df;
 52 |       real hyper_sd_location;
 53 |       real<lower=1e-8> hyper_sd_scale;
 54 |     }
 55 |     parameters{
 56 |       vector[nteams] att_raw;
 57 |       vector[nteams] def_raw;
 58 |       real<lower=1e-8> sigma_att;
 59 |       real<lower=1e-8> sigma_def;
 60 |       real home;
 61 |       real gamma;
 62 |       real <lower=0,upper=1> prob_of_draws;// excessive probability of draws
 63 | 
 64 |     }
 65 |     transformed parameters{
 66 |       real adj_h_eff;                   // Adjusted home effect
 67 |       vector[nteams] att;
 68 |       vector[nteams] def;
 69 |       array[N,2] real theta;
 70 | 
 71 |       for (t in 1:nteams){
 72 |         att[t] = att_raw[t]-mean(att_raw);
 73 |         def[t] = def_raw[t]-mean(def_raw);
 74 |       }
 75 | 
 76 |       adj_h_eff = home * ind_home;
 77 | 
 78 |       for (n in 1:N){
 79 |         theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+
 80 |                          (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
 81 |         theta[n,2] = exp(att[team2[n]]+def[team1[n]]-
 82 |                          (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
 83 |       }
 84 |     }
 85 | model{
 86 |       // log-priors for team-specific abilities
 87 |       for (t in 1:(nteams)){
 88 |         if (prior_dist_num == 1){
 89 |           target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att);
 90 |           target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def);
 91 |         }
 92 |         else if (prior_dist_num == 2){
 93 |           target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att);
 94 |           target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def);
 95 |         }
 96 |         else if (prior_dist_num == 3){
 97 |           target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att);
 98 |           target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def);
 99 |         }
100 |         else if (prior_dist_num == 4){
101 |           target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att);
102 |           target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def);
103 |         }
104 |       }
105 | 
106 | 
107 |       // log-hyperpriors for sd parameters
108 |       if (prior_dist_sd_num == 1 ){
109 |         target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
110 |         target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
111 |       }
112 |       else if (prior_dist_sd_num == 2){
113 |         target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
114 |         target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
115 |       }
116 |       else if (prior_dist_sd_num == 3){
117 |         target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
118 |         target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
119 |       }
120 |       else if (prior_dist_sd_num == 4){
121 |         target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
122 |         target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
123 |       }
124 | 
125 |       // log-priors fixed effects
126 |       target+=normal_lpdf(home|mean_home,sd_home);
127 |       target+=normal_lpdf(gamma|0,1);
128 |       target+=uniform_lpdf(prob_of_draws|0,1);
129 | 
130 | 
131 |       // likelihood
132 |       for (n in 1:N){
133 |         target+=zero_infl_skellam_lpmf(diff_y[n]| theta[n,1],theta[n,2],
134 |         prob_of_draws);
135 |       }
136 | }
137 | 
138 | generated quantities{
139 |       array[N,2] int y_rep;
140 |       array[N] int diff_y_rep;
141 |       vector[N] log_lik;
142 |       array[N_prev,2] int y_prev;
143 |       array[N_prev] vector[2] theta_prev;
144 |       array[N_prev] int diff_y_prev;
145 | 
146 |       //in-sample replications
147 |       for (n in 1:N){
148 |         y_rep[n,1] = poisson_rng(theta[n,1]);
149 |         y_rep[n,2] = poisson_rng(theta[n,2]);
150 |         diff_y_rep[n] = y_rep[n,1] - y_rep[n,2];
151 |         log_lik[n] =zero_infl_skellam_lpmf(diff_y[n]| theta[n,1],theta[n,2],
152 |         prob_of_draws);
153 |       }
154 |       //out-of-sample predictions
155 |       if (N_prev > 0) {
156 |         for (n in 1:N_prev){
157 |           theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+
158 |                                   def[team2_prev[n]]+
159 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
160 |           theta_prev[n,2] = exp(att[team2_prev[n]]+
161 |                                   def[team1_prev[n]]-
162 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
163 |           y_prev[n,1] = poisson_rng(theta_prev[n,1]);
164 |           y_prev[n,2] = poisson_rng(theta_prev[n,2]);
165 |           diff_y_prev[n] = y_prev[n,1] - y_prev[n,2];
166 |         }
167 |       }
168 | }
169 | 


--------------------------------------------------------------------------------
/tests/testthat/test_compare_foot.R:
--------------------------------------------------------------------------------
  1 | # all the test PASSED (also the skipped ones!)
  2 | 
  3 | 
  4 | #   ____________________________________________________________________________
  5 | #   Common matrix data                                                      ####
  6 | 
  7 | home_team <- c(
  8 |   "AC Milan", "Inter", "Juventus", "AS Roma", "Napoli",
  9 |   "Lazio", "Atalanta", "Fiorentina", "Torino", "Sassuolo", "Udinese"
 10 | )
 11 | 
 12 | away_team <- c(
 13 |   "Juventus", "Napoli", "Inter", "Atalanta", "Lazio",
 14 |   "AC Milan", "Sassuolo", "Torino", "Fiorentina", "Udinese", "AS Roma"
 15 | )
 16 | 
 17 | # Home and Away goals based on given data
 18 | home_goals <- c(2, 0, 2, 2, 3, 1, 4, 2, 1, 1, 2)
 19 | away_goals <- c(1, 0, 1, 3, 2, 1, 1, 2, 1, 1, 2)
 20 | 
 21 | # Combine into a data frame
 22 | test_data <- data.frame(home_team, away_team, home_goals, away_goals)
 23 | 
 24 | # Define the data for each column
 25 | pW <- c(0.51, 0.45, 0.48, 0.53, 0.56, 0.39, 0.52, 0.55, 0.61, 0.37, 0.35)
 26 | pD <- c(0.27, 0.25, 0.31, 0.18, 0.23, 0.30, 0.24, 0.26, 0.18, 0.19, 0.22)
 27 | pL <- c(0.22, 0.30, 0.21, 0.29, 0.21, 0.31, 0.24, 0.19, 0.21, 0.44, 0.43)
 28 | 
 29 | # Create the data frame table_prob
 30 | table_prob <- data.frame(pW, pD, pL)
 31 | matrix_prob <- as.matrix(table_prob)
 32 | 
 33 | 
 34 | 
 35 | #   ____________________________________________________________________________
 36 | #   Data tests                                                              ####
 37 | 
 38 | test_that("Error when test_data is missing required columns", {
 39 |   bad_test_data <- test_data[, -1] # Remove 'home_team'
 40 |   expect_error(
 41 |     compare_foot(source = list(prob1 = matrix_prob), test_data = bad_test_data),
 42 |     "test_data is missing required columns"
 43 |   )
 44 | })
 45 | 
 46 | test_that("Error when probability matrix has wrong number of rows", {
 47 |   # Create a matrix with fewer rows than test_data
 48 |   bad_matrix <- matrix_prob[1:4, ]
 49 |   expect_warning(
 50 |     expect_error(
 51 |       compare_foot(source = list(bad = bad_matrix), test_data = test_data),
 52 |       "No valid models or probability matrices were provided"
 53 |     )
 54 |   )
 55 | })
 56 | 
 57 | test_that("Error when probability matrix has wrong number of columns", {
 58 |   # Create a matrix with only 2 columns
 59 |   bad_matrix2 <- matrix_prob[, 1:2]
 60 |   expect_warning(
 61 |     compare_foot(source = list(good = matrix_prob, bad2 = bad_matrix2), test_data = test_data),
 62 |     "does not have exactly 3 columns"
 63 |   )
 64 | })
 65 | 
 66 | test_that("compare_foot removes NA rows from probability matrix", {
 67 |   # Introduce NA in row 2 (one of the entries)
 68 |   matrix_prob_na <- matrix_prob
 69 |   matrix_prob_na[2, 2] <- NA
 70 | 
 71 |   expect_warning(
 72 |     result <- compare_foot(
 73 |       source = list(prob_with_na = matrix_prob_na),
 74 |       test_data = test_data
 75 |     ),
 76 |     "contains 1 rows with NAs"
 77 |   )
 78 | })
 79 | 
 80 | 
 81 | test_that("compare_foot normalizes probability matrix rows not summing to 1", {
 82 |   # Create a probability matrix that does NOT sum to 1 by scaling each row by 2.
 83 |   matrix_prob_scaled <- matrix_prob * 2
 84 | 
 85 |   expect_warning(
 86 |     result <- compare_foot(source = list(prob_scaled = matrix_prob_scaled), test_data = test_data),
 87 |     "Probabilities in matrix 'prob_scaled' do not sum to 1. Normalizing"
 88 |   )
 89 | })
 90 | 
 91 | # test_that("compare_foot warns and errors when probability matrix becomes empty after removing NA rows", {
 92 | #   # Create a probability matrix where every row contains NA values.
 93 | #   matrix_all_na <- matrix(NA, nrow = 11, ncol = 3)
 94 | #
 95 | #   expect_warning(
 96 | #     expect_error(
 97 | #       compare_foot(source = list(all_na = matrix_all_na), test_data = test_data),
 98 | #       "No valid models or probability matrices were provided"
 99 | #     ),
100 | #     "After removing NA rows, no data remains for matrix 'all_na'"
101 | #   )
102 | # })
103 | 
104 | #   ____________________________________________________________________________
105 | #   Models and arguments tests                                              ####
106 | 
107 | test_that("compare_foot warns and skips stan_foot object without y_prev or diff_y_prev", {
108 |   skip_on_cran()
109 |   skip_if_not(stan_cmdstan_exists())
110 | 
111 |   ##  ............................................................................
112 |   ##  Data                                                                    ####
113 | 
114 |   data("italy")
115 |   italy_2000 <- italy %>%
116 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
117 |     dplyr::filter(Season == "2000")
118 | 
119 |   colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
120 | 
121 |   ##  ............................................................................
122 |   ##  Tests                                                                   ####
123 | 
124 |   fit_1 <- stan_foot(
125 |     data = italy_2000,
126 |     model = "double_pois", predict = 18,
127 |     iter_sampling = 200,
128 |     chains = 2,
129 |     seed = 433
130 |   ) # Double Poisson model
131 |   fit_2 <- stan_foot(
132 |     data = italy_2000,
133 |     model = "biv_pois",
134 |     iter_sampling = 200,
135 |     chains = 2,
136 |     seed = 433
137 |   ) # Bivariate Poisson model
138 | 
139 |   fit_3 <- stan_foot(
140 |     data = italy_2000,
141 |     model = "student_t", predict = 18,
142 |     iter_sampling = 200,
143 |     chains = 2,
144 |     seed = 433
145 |   ) # Skellam model
146 | 
147 |   italy_2000_test <- italy_2000[289:306, ]
148 | 
149 | 
150 |   expect_warning(
151 |     expect_error(
152 |       compare_foot(
153 |         source = list(bivariate_poisson = fit_2),
154 |         test_data = italy_2000_test,
155 |         metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"),
156 |         conf_matrix = TRUE
157 |       ),
158 |       "No valid models or probability matrices were provided"
159 |     ),
160 |     "does not contain 'y_prev' or 'diff_y_prev'"
161 |   )
162 | 
163 | 
164 |   expect_warning(
165 |     compare_foot(
166 |       source = list(double_poisson = fit_1$fit, bivariate_poisson = fit_2, student = fit_3),
167 |       test_data = italy_2000_test,
168 |       metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"),
169 |       conf_matrix = TRUE
170 |     ),
171 |     "does not contain 'y_prev' or 'diff_y_prev'"
172 |   )
173 | })
174 | 
175 | 
176 | test_that("compare_foot warns and skips source object with different predicted matches and/or not allowed type", {
177 |   skip_on_cran()
178 |   skip_if_not(stan_cmdstan_exists())
179 | 
180 |   ##  ............................................................................
181 |   ##  Data                                                                    ####
182 | 
183 |   data("italy")
184 |   italy_2000 <- italy %>%
185 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
186 |     dplyr::filter(Season == "2000")
187 | 
188 |   colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
189 | 
190 |   ##  ............................................................................
191 |   ##  Tests                                                                   ####
192 | 
193 |   fit_1 <- stan_foot(
194 |     data = italy_2000,
195 |     model = "double_pois", predict = 18,
196 |     iter_sampling = 200,
197 |     chains = 2,
198 |     seed = 433
199 |   ) # Double Poisson model
200 | 
201 |   fit_2 <- c(1,2,3,1)
202 | 
203 |   fit_3 <- stan_foot(
204 |     data = italy_2000,
205 |     model = "student_t", predict = 15,
206 |     iter_sampling = 200,
207 |     chains = 2,
208 |     seed = 433
209 |   ) # Skellam model
210 | 
211 |   italy_2000_test <- italy_2000[289:306, ]
212 | 
213 | 
214 |   expect_warning(
215 |     compare_foot(
216 |       source = list(double_poisson = fit_1$fit, student = fit_3),
217 |       test_data = italy_2000_test,
218 |       metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"),
219 |       conf_matrix = TRUE
220 |     ))
221 | 
222 |   expect_warning(
223 |     compare_foot(
224 |       source = list(double_poisson = fit_1$fit, bivariate_poisson = fit_2),
225 |       test_data = italy_2000_test,
226 |       metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"),
227 |       conf_matrix = TRUE
228 |     ))
229 | })
230 | 


--------------------------------------------------------------------------------
/R/utils_foot.R:
--------------------------------------------------------------------------------
  1 | #' Check Prior Value (Internal function)
  2 | #'
  3 | #' Validates the prior value to ensure it meets the necessary requirements.
  4 | #'
  5 | #' This function checks if the provided prior value is numeric and of length one.
  6 | #' If `positive` is set to \code{TRUE}, it will also ensure that the value is positive.
  7 | #'
  8 | #' @param prior_value The prior value to validate. Must be a numeric value of length one.
  9 | #' @param prior_name A string representing the name of the prior, used for error messages.
 10 | #' @param positive Logical value indicating whether the prior must be positive. Defaults to \code{FALSE}.
 11 | #' @return No return value. The function will throw an error if validation fails.
 12 | #' @examples
 13 | #' check_prior(5, "alpha", positive = TRUE)
 14 | #' check_prior(-1, "beta", positive = FALSE)
 15 | #' @noRd
 16 | 
 17 | check_prior <- function(prior_value, prior_name, positive = FALSE) {
 18 |   # Check if the prior is numeric and of length 1
 19 |   if (!is.numeric(prior_value) || length(prior_value) != 1) {
 20 |     stop(sprintf("The prior '%s' must be a single numeric value.", prior_name))
 21 |   }
 22 |   # Check if the prior is positive if required
 23 |   if (positive && prior_value <= 0) {
 24 |     stop(sprintf("The prior '%s' must be a positive numeric value.", prior_name))
 25 |   }
 26 | }
 27 | 
 28 | 
 29 | #' Compute the Maximum A Posteriori (MAP) Estimate (Internal function)
 30 | #'
 31 | #' Computes the Maximum A Posteriori (MAP) estimate from a set of samples using kernel density estimation.
 32 | #'
 33 | #' This function calculates the MAP estimate, which is the mode of the estimated density, for a given vector of samples.
 34 | #'
 35 | #' @param samples A numeric vector of samples from which to compute the MAP estimate.
 36 | #' @return A numeric value representing the MAP estimate.
 37 | #' @examples
 38 | #' samples <- rnorm(1000, mean = 5, sd = 2)
 39 | #' compute_MAP(samples)
 40 | #' @importFrom stats density
 41 | #' @noRd
 42 | 
 43 | compute_MAP <- function(samples) {
 44 |   dens <- stats::density(samples)
 45 |   MAP <- dens$x[which.max(dens$y)]
 46 |   return(MAP)
 47 | }
 48 | 
 49 | 
 50 | 
 51 | #' Normalize Rank Points (Internal Function)
 52 | #'
 53 | #' Normalizes a vector of rank points using the specified method.
 54 | #'
 55 | #' This function applies one of several normalization methods to a numeric vector of rank points.
 56 | #' Available methods are:
 57 | #' \itemize{
 58 | #'   \item \code{"none"}: No normalization is applied.
 59 | #'   \item \code{"standard"}: Standardizes the data to have mean 0 and standard deviation 0.5.
 60 | #'   \item \code{"mad"}: Normalizes using the median absolute deviation.
 61 | #'   \item \code{"min_max"}: Scales the data to be between 0 and 1.
 62 | #' }
 63 | #'
 64 | #' @param rank_points A numeric vector of rank points to normalize.
 65 | #' @param method A string specifying the normalization method. Options are \code{"none"}, \code{"standard"}, \code{"mad"}, or \code{"min_max"}.
 66 | #'
 67 | #' @return A numeric vector of normalized rank points.
 68 | #'
 69 | #' @noRd
 70 | normalize_rank_points <- function(rank_points, method) {
 71 |   if (method == "none") {
 72 |     rank_points
 73 |   } else if (method == "standard") {
 74 |     s <- stats::sd(rank_points, na.rm = TRUE)
 75 |     m <- mean(rank_points, na.rm = TRUE)
 76 |     if (s == 0) {
 77 |       rep(0, length(rank_points))
 78 |     } else {
 79 |       (rank_points - m) / (2 * s)
 80 |     }
 81 |   } else if (method == "mad") {
 82 |     md <- stats::mad(rank_points, na.rm = TRUE)
 83 |     med <- stats::median(rank_points, na.rm = TRUE)
 84 |     if (md == 0) {
 85 |       rep(0, length(rank_points))
 86 |     } else {
 87 |       (rank_points - med) / md
 88 |     }
 89 |   } else if (method == "min_max") {
 90 |     min_rp <- min(rank_points, na.rm = TRUE)
 91 |     max_rp <- max(rank_points, na.rm = TRUE)
 92 |     if (max_rp == min_rp) {
 93 |       rep(0, length(rank_points))
 94 |     } else {
 95 |       (rank_points - min_rp) / (max_rp - min_rp)
 96 |     }
 97 |   } else {
 98 |     stop("Invalid normalization method specified.")
 99 |   }
100 | }
101 | 
102 | 
103 | 
104 | #' Replace Indices with Team Names (Internal Function)
105 | #'
106 | #' Substitutes numeric indices in parameter names with corresponding team names based on a predefined mapping.
107 | #'
108 | #'
109 | #' @param param_names A character vector of parameter names that may contain numeric indices to be replaced.
110 | #' @param exclude_params A character vector of parameter name prefixes that should be excluded from renaming. Parameters starting with any of these prefixes will remain unchanged.
111 | #' @param index_map A named character vector where names correspond to numeric indices and values correspond to the team names they should be replaced with.
112 | #'
113 | #' @return A character vector with numeric indices replaced by their corresponding team names where applicable.
114 | #'
115 | #' @noRd
116 | team_names <- function(param_names, exclude_params, team_map_rev) {
117 |   sapply(param_names, function(name) {
118 |     # Skip parameters that should not have their names changed
119 |     if (any(startsWith(name, exclude_params))) {
120 |       return(name)
121 |     }
122 | 
123 |     # Attempt to match two indices: [time, team]
124 |     pattern_two <- "\\[(\\d+),(\\d+)\\]"
125 |     if (grepl(pattern_two, name)) {
126 |       matches <- regmatches(name, regexec(pattern_two, name))
127 | 
128 | 
129 |       if (length(matches[[1]]) >= 3) {
130 |         time_index <- matches[[1]][2] # First index (time)
131 |         team_index <- matches[[1]][3] # Second index (team)
132 | 
133 |         # Check if the second index corresponds to a team
134 |         if (team_index %in% names(team_map_rev)) {
135 |           team_name <- team_map_rev[[team_index]]
136 |           # Replace the second index with the team name
137 | 
138 |           new_name <- sub(paste0(",", team_index, "\\]"), paste0(", ", team_name, "]"), name)
139 |           return(new_name)
140 |         }
141 |       }
142 |     }
143 | 
144 |     # If not matched with two indices, attempt to match single index: [team]
145 |     pattern_one <- "\\[(\\d+)\\]"
146 |     if (grepl(pattern_one, name)) {
147 |       matches <- regmatches(name, regexec(pattern_one, name))
148 |       # Ensure that the single index is captured
149 |       if (length(matches[[1]]) >= 2) {
150 |         team_index <- matches[[1]][2] # Single index (team)
151 | 
152 |         # Check if the index corresponds to a team
153 |         if (team_index %in% names(team_map_rev)) {
154 |           team_name <- team_map_rev[[team_index]]
155 |           # Replace the index with the team name
156 |           new_name <- sub(paste0("\\[", team_index, "\\]"), paste0("[", team_name, "]"), name)
157 |           return(new_name)
158 |         }
159 |       }
160 |     }
161 | 
162 |     return(name)
163 |   }, USE.NAMES = FALSE)
164 | }
165 | 
166 | #' Compute the Ranked Probability Score (RPS) (Internal Function)
167 | #'
168 | #' Computes the Ranked Probability Score (RPS) from cumulative predicted probabilities
169 | #' and actual outcomes. The RPS is calculated as the mean of the squared differences
170 | #' between the cumulative predicted probabilities (for the first two outcome categories)
171 | #' and the corresponding cumulative observed probabilities.
172 | #'
173 | #' @param cum_pred A numeric matrix of cumulative predicted probabilities for each observation.
174 | #'   It should have at least two columns; the probability for the final outcome is assumed to be 1.
175 | #' @param actual A character vector of actual outcomes. Each element should be one of
176 | #'   \code{"Home Win"}, \code{"Draw"}, or \code{"Away Win"}.
177 | #'
178 | #' @return A numeric value representing the mean Ranked Probability Score.
179 | #'
180 | #' @noRd
181 | compute_RPS <- function(cum_pred, actual) {
182 |   # Create a matrix of cumulative observed probabilities
183 |   acum <- matrix(0, nrow = length(actual), ncol = 3)
184 |   acum[actual == "Home Win", ] <- matrix(rep(c(1, 1, 1), sum(actual == "Home Win")),
185 |     ncol = 3, byrow = TRUE
186 |   )
187 |   acum[actual == "Draw", ] <- matrix(rep(c(0, 1, 1), sum(actual == "Draw")),
188 |     ncol = 3, byrow = TRUE
189 |   )
190 |   acum[actual == "Away Win", ] <- matrix(rep(c(0, 0, 1), sum(actual == "Away Win")),
191 |     ncol = 3, byrow = TRUE
192 |   )
193 |   squared_diff <- (cum_pred[, 1:2] - acum[, 1:2])^2
194 |   rps_per_obs <- rowSums(squared_diff) / 2 # for 3 outcome categories: (3 - 1)
195 |   mean(rps_per_obs)
196 | }
197 | 


--------------------------------------------------------------------------------
/src/stan/diag_infl_biv_pois.stan:
--------------------------------------------------------------------------------
  1 | functions{
  2 | 
  3 |     real bipois_lpmf(array[] int r , real mu1,real mu2,real mu3) {
  4 |       real ss;
  5 |       real log_s;
  6 |       real mus;
  7 |       int  miny;
  8 | 
  9 |       miny = min(r[1], r[2]);
 10 | 
 11 |       ss = poisson_lpmf(r[1] | mu1) + poisson_lpmf(r[2] | mu2) -
 12 |         exp(mu3);
 13 |       if(miny > 0) {
 14 |         mus = -mu1-mu2+mu3;
 15 |         log_s = ss;
 16 | 
 17 |         for(k in 1:miny) {
 18 |           log_s = log_s + log(r[1] - k + 1) + mus
 19 |           + log(r[2] - k + 1)
 20 |           - log(k);
 21 |           ss = log_sum_exp(ss, log_s);
 22 |         }
 23 |       }
 24 |       return(ss);
 25 |     }
 26 |     real diag_infl_bipois_lpmf(array[] int r , real mu1,real mu2,real mu3, real p) {
 27 |     // This way is the easiest and proposed by https://github.com/Torvaney/karlis-ntzoufras-reproduction.
 28 |     // However, within model block, we propose in a comment the alternative way that Stan proposes in their documentation for zero inflated models
 29 |       real base_prob;
 30 |       real prob;
 31 |       real log_prob;
 32 | 
 33 |       base_prob = exp(bipois_lpmf(r| mu1, mu2,mu3));
 34 | 
 35 |       if (r[1] == r[2])
 36 |         prob = p + (1 - p) * base_prob;
 37 |       else
 38 |         prob = (1 - p) * base_prob;
 39 | 
 40 |       log_prob = log(prob);
 41 | 
 42 |       return log_prob;
 43 |     }
 44 | 
 45 |   }
 46 | data{
 47 |       int N;   // number of games
 48 |       int<lower=0> N_prev;
 49 |       array[N,2] int y;
 50 |       int nteams;
 51 |       array[N] int team1;
 52 |       array[N] int team2;
 53 |       array[N_prev] int team1_prev;
 54 |       array[N_prev] int team2_prev;
 55 |       array[N] int instants_rank;
 56 |       int ntimes_rank;                 // dynamic periods for ranking
 57 |       matrix[ntimes_rank,nteams] ranking;
 58 |       int<lower=0, upper=1> ind_home;
 59 |       real mean_home;              // Mean for home effect
 60 |       real<lower=1e-8> sd_home;      // Standard deviation for home effect
 61 | 
 62 |       // priors part
 63 |       int<lower=1,upper=4> prior_dist_num;    // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 64 |       int<lower=1,upper=4> prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 65 | 
 66 |       real<lower=0> hyper_df;
 67 |       real hyper_location;
 68 | 
 69 |       real<lower=0> hyper_sd_df;
 70 |       real hyper_sd_location;
 71 |       real<lower=1e-8> hyper_sd_scale;
 72 |     }
 73 |     parameters{
 74 |       vector[nteams] att_raw;
 75 |       vector[nteams] def_raw;
 76 |       real<lower=1e-8> sigma_att;
 77 |       real<lower=1e-8> sigma_def;
 78 |       real home;
 79 |       real rho;
 80 |       real gamma;
 81 |       real <lower=0,upper=1> prob_of_draws;// excessive probability of draws
 82 | 
 83 |     }
 84 |     transformed parameters{
 85 |       real adj_h_eff;                   // Adjusted home effect
 86 |       vector[nteams] att;
 87 |       vector[nteams] def;
 88 |       array[N] vector[3] theta;
 89 | 
 90 |       for (t in 1:nteams){
 91 |         att[t] = att_raw[t]-mean(att_raw);
 92 |         def[t] = def_raw[t]-mean(def_raw);
 93 |       }
 94 | 
 95 |       adj_h_eff = home * ind_home;
 96 | 
 97 |       for (n in 1:N){
 98 |         theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+
 99 |                          (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
100 |         theta[n,2] = exp(att[team2[n]]+def[team1[n]]-
101 |                          (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
102 |         theta[n,3] = exp(rho);
103 |       }
104 | }
105 | model{
106 |       // log-priors for team-specific abilities
107 |       for (t in 1:(nteams)){
108 |         if (prior_dist_num == 1){
109 |           target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att);
110 |           target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def);
111 |         }
112 |         else if (prior_dist_num == 2){
113 |           target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att);
114 |           target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def);
115 |         }
116 |         else if (prior_dist_num == 3){
117 |           target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att);
118 |           target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def);
119 |         }
120 |         else if (prior_dist_num == 4){
121 |           target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att);
122 |           target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def);
123 |         }
124 |       }
125 | 
126 | 
127 |       // log-hyperpriors for sd parameters
128 |       if (prior_dist_sd_num == 1 ){
129 |         target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
130 |         target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
131 |       }
132 |       else if (prior_dist_sd_num == 2){
133 |         target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
134 |         target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
135 |       }
136 |       else if (prior_dist_sd_num == 3){
137 |         target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
138 |         target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
139 |       }
140 |       else if (prior_dist_sd_num == 4){
141 |         target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale);
142 |         target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale);
143 |       }
144 | 
145 |       // log-priors fixed effects
146 |       target+=normal_lpdf(home|mean_home,sd_home);
147 |       target+=normal_lpdf(rho|0,1);
148 |       target+=normal_lpdf(gamma|0,1);
149 |       target+=uniform_lpdf(prob_of_draws|0,1);
150 | 
151 |       // likelihood
152 | 
153 |       for (n in 1:N){
154 |          target+=diag_infl_bipois_lpmf(y[n,]| theta[n,1],
155 |                    theta[n,2], theta[n,3],prob_of_draws);
156 | 
157 |    // if (y[n,1] == y[n,2]){// Alternative way as proposed by Stan manual
158 |   //      target += log_sum_exp(bernoulli_lpmf(1 |prob_of_draws),
159 |    //                         bernoulli_lpmf(0 | prob_of_draws)
160 |      //                    + bipois_lpmf(y[n,] | theta[n,1],
161 |                  //  theta[n,2], theta[n,3]) );
162 |      // } else {
163 |        //  target += bernoulli_lpmf(0 |prob_of_draws)
164 |          //            + bipois_lpmf(y[n,] |theta[n,1],
165 |                  //  theta[n,2], theta[n,3]);
166 |     // }
167 |  }
168 | }
169 | 
170 | generated quantities{
171 |       array[N,2] int y_rep;
172 |       array[N_prev,2] int y_prev;
173 |       vector[N] log_lik;
174 |       array[N] int diff_y_rep;
175 |       vector[N_prev] theta_home_prev;                    // exponentiated linear pred.
176 |       vector[N_prev] theta_away_prev;
177 |       vector[N_prev] theta_corr_prev;
178 | 
179 |       //in-sample replications
180 |       for (n in 1:N){
181 |         y_rep[n,1] = poisson_rng(theta[n,1]+theta[n,3]);
182 |         y_rep[n,2] = poisson_rng(theta[n,2]+theta[n,3]);
183 |         diff_y_rep[n] = y_rep[n,1] - y_rep[n,2];
184 |         log_lik[n] =diag_infl_bipois_lpmf(y[n,]| theta[n,1],
185 |                    theta[n,2], theta[n,3],prob_of_draws);
186 |            //    if (y[n,1] == y[n,2]){// Alternative way proposed by Stan documentation
187 |     //  log_lik[n] = log_sum_exp(bernoulli_lpmf(1 |prob_of_draws),
188 |           //                  bernoulli_lpmf(0 |prob_of_draws)
189 |            //                   + bipois_lpmf(y[n,] | theta[n,1],
190 |                  //  theta[n,2], theta[n,3]));
191 |    //} else {
192 |    //   log_lik[n] = bernoulli_lpmf(0 |prob_of_draws)
193 |               //    + bipois_lpmf(y[n,] | theta[n,1],
194 |                  //  theta[n,2], theta[n,3]);
195 |   //}
196 | }
197 | 
198 |     //out-of-sample predictions
199 |       if (N_prev > 0) {
200 |         for (n in 1:N_prev){
201 |           theta_home_prev[n] = exp(adj_h_eff+att[team1_prev[n]]+
202 |                                   def[team2_prev[n]]+
203 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
204 |           theta_away_prev[n] = exp(att[team2_prev[n]]+
205 |                                   def[team1_prev[n]]-
206 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
207 |           theta_corr_prev[n] = exp(rho);
208 |           y_prev[n,1] = poisson_rng(theta_home_prev[n]+theta_corr_prev[n]);
209 |           y_prev[n,2] = poisson_rng(theta_away_prev[n]+theta_corr_prev[n]);
210 |         }
211 |       }
212 | }
213 | 


--------------------------------------------------------------------------------
/tests/testthat/test_mle_foot.R:
--------------------------------------------------------------------------------
  1 | ## all the test PASSED (also the skipped ones!)
  2 | 
  3 | #   ____________________________________________________________________________
  4 | #   Data tests                                                              ####
  5 | 
  6 | test_that("errors if data is not a matrix or data frame", {
  7 |   expect_error(
  8 |     mle_foot(123, model = "double_pois"),
  9 |     "Input data must be a data.frame with columns: periods, home_team, away_team, home_goals, away_goals."
 10 |   )
 11 | })
 12 | 
 13 | test_that("errors if required columns are missing", {
 14 |   ##  ............................................................................
 15 |   ##  Data                                                                    ####
 16 | 
 17 |   df_missing <- data.frame(
 18 |     periods   = 1:5,
 19 |     home_team = rep("TeamA", 5),
 20 |     away_team = rep("TeamB", 5),
 21 |     home_goals = rep(1, 5) # missing away_goals
 22 |   )
 23 | 
 24 |   ##  ............................................................................
 25 |   ##  Tests                                                                   ####
 26 | 
 27 |   expect_error(
 28 |     mle_foot(df_missing, model = "double_pois"),
 29 |     "data is missing required columns"
 30 |   )
 31 | })
 32 | 
 33 | 
 34 | test_that("errors if goals are not numeric", {
 35 |   ##  ............................................................................
 36 |   ##  Data                                                                    ####
 37 | 
 38 |   df_non_numeric <- data.frame(
 39 |     periods    = 1:5,
 40 |     home_team  = rep("TeamA", 5),
 41 |     away_team  = rep("TeamB", 5),
 42 |     home_goals = as.character(rep(1, 5)),
 43 |     away_goals = as.character(rep(0, 5))
 44 |   )
 45 | 
 46 |   ##  ............................................................................
 47 |   ##  Tests                                                                   ####
 48 | 
 49 |   expect_error(
 50 |     mle_foot(df_non_numeric, model = "double_pois"),
 51 |     "Goals are not numeric"
 52 |   )
 53 | })
 54 | 
 55 | 
 56 | test_that("warns if data has more than 5 columns", {
 57 |   skip_on_cran()
 58 | 
 59 |   ##  ............................................................................
 60 |   ##  Data                                                                    ####
 61 |   valid_data <- data.frame(
 62 |     periods    = 1:10,
 63 |     home_team  = rep(c("TeamA", "TeamB"), 5),
 64 |     away_team  = rep(c("TeamB", "TeamA"), 5),
 65 |     home_goals = sample(0:3, 10, replace = TRUE),
 66 |     away_goals = sample(0:3, 10, replace = TRUE)
 67 |   )
 68 |   df_extra <- valid_data
 69 |   df_extra$extra_col <- rnorm(nrow(valid_data))
 70 | 
 71 |   ##  ............................................................................
 72 |   ##  Tests                                                                   ####
 73 |   expect_warning(
 74 |     mle_foot(df_extra, model = "double_pois"),
 75 |     "Your dataset seems too large"
 76 |   )
 77 | })
 78 | 
 79 | #   ____________________________________________________________________________
 80 | #   Arguments tests                                                         ####
 81 | 
 82 | test_that("errors if predict is not a non-negative integer", {
 83 |   ##  ............................................................................
 84 |   ##  Data                                                                    ####
 85 |   valid_data <- data.frame(
 86 |     periods    = 1:10,
 87 |     home_team  = rep(c("TeamA", "TeamB"), 5),
 88 |     away_team  = rep(c("TeamB", "TeamA"), 5),
 89 |     home_goals = sample(0:3, 10, replace = TRUE),
 90 |     away_goals = sample(0:3, 10, replace = TRUE)
 91 |   )
 92 |   df_extra <- valid_data
 93 |   df_extra$extra_col <- rnorm(nrow(valid_data))
 94 | 
 95 |   ##  ............................................................................
 96 |   ##  Tests                                                                   ####
 97 |   expect_error(
 98 |     mle_foot(valid_data, model = "double_pois", predict = -1),
 99 |     "The argument 'predict' must be a non-negative integer"
100 |   )
101 |   expect_error(
102 |     mle_foot(valid_data, model = "double_pois", predict = 1.5),
103 |     "The argument 'predict' must be a non-negative integer"
104 |   )
105 |   expect_error(
106 |     mle_foot(valid_data, model = "double_pois", predict = "2"),
107 |     "The argument 'predict' must be a non-negative integer"
108 |   )
109 | })
110 | 
111 | 
112 | 
113 | test_that("errors if training set size is zero or negative", {
114 |   ##  ............................................................................
115 |   ##  Data                                                                    ####
116 |   valid_data <- data.frame(
117 |     periods    = 1:10,
118 |     home_team  = rep(c("TeamA", "TeamB"), 5),
119 |     away_team  = rep(c("TeamB", "TeamA"), 5),
120 |     home_goals = sample(0:3, 10, replace = TRUE),
121 |     away_goals = sample(0:3, 10, replace = TRUE)
122 |   )
123 |   df_extra <- valid_data
124 |   df_extra$extra_col <- rnorm(nrow(valid_data))
125 | 
126 |   ##  ............................................................................
127 |   ##  Tests                                                                   ####
128 | 
129 |   # If predict equals the number of rows, training set size becomes zero.
130 |   expect_error(
131 |     mle_foot(valid_data, model = "double_pois", predict = nrow(valid_data)),
132 |     "The training set size is zero"
133 |   )
134 |   # If predict is larger than number of rows
135 |   expect_error(
136 |     mle_foot(valid_data, model = "double_pois", predict = nrow(valid_data) + 1),
137 |     "The training set size is zero"
138 |   )
139 | })
140 | 
141 | 
142 | test_that("model argument must be one of the allowed values", {
143 |   ##  ............................................................................
144 |   ##  Data                                                                    ####
145 |   valid_data <- data.frame(
146 |     periods    = 1:10,
147 |     home_team  = rep(c("TeamA", "TeamB"), 5),
148 |     away_team  = rep(c("TeamB", "TeamA"), 5),
149 |     home_goals = sample(0:3, 10, replace = TRUE),
150 |     away_goals = sample(0:3, 10, replace = TRUE)
151 |   )
152 |   df_extra <- valid_data
153 |   df_extra$extra_col <- rnorm(nrow(valid_data))
154 | 
155 |   ##  ............................................................................
156 |   ##  Tests                                                                   ####
157 |   expect_error(
158 |     mle_foot(valid_data, model = "invalid_model"))
159 | })
160 | 
161 | 
162 | test_that("output structure for skellam with out-of-sample prediction", {
163 |   skip_on_cran()
164 | 
165 |   ##  ............................................................................
166 |   ##  Data                                                                    ####
167 |   valid_data <- data.frame(
168 |     periods    = 1:10,
169 |     home_team  = rep(c("TeamA", "TeamB"), 5),
170 |     away_team  = rep(c("TeamB", "TeamA"), 5),
171 |     home_goals = sample(0:3, 10, replace = TRUE),
172 |     away_goals = sample(0:3, 10, replace = TRUE)
173 |   )
174 |   df_extra <- valid_data
175 |   df_extra$extra_col <- rnorm(nrow(valid_data))
176 | 
177 |   ##  ............................................................................
178 |   ##  Tests                                                                   ####
179 |   result <- mle_foot(valid_data, model = "skellam", predict = 2)
180 |   expect_true(all(c("att", "def", "home_effect", "model", "predict", "team1_prev", "team2_prev",
181 |                     "logLik", "aic", "bic") %in% names(result)))
182 |   expect_equal(result$predict, 2)
183 |   expect_equal(length(result$team1_prev), 2)
184 |   expect_equal(length(result$team2_prev), 2)
185 | })
186 | 
187 | # require(dplyr)
188 | #
189 | # # data
190 | # data("italy")
191 | # italy <- as_tibble(italy)
192 | # italy_2008<- italy %>%
193 | #   dplyr::select(Season, home, visitor, hgoal,vgoal) %>%
194 | #   filter( Season=="2008")
195 | #
196 | # spain <- as_tibble(spain)
197 | # spain_2008<- spain %>%
198 | #   dplyr::select(Season, home, visitor, hgoal,vgoal) %>%
199 | #   filter( Season=="2008")
200 | #
201 | # germany <- as_tibble(germany)
202 | # germany_2008<- germany %>%
203 | #   dplyr::select(Season, home, visitor, hgoal,vgoal) %>%
204 | #   filter( Season=="2008")
205 | #
206 | #
207 | # context("mle plain models")
208 | # # These tests regard the performance of the mle_foot functions,
209 | # # for methods double_pois, biv_pois, skellam and student_t.
210 | #
211 | # test_that("some mle models get errors/warnings", {
212 | #
213 | #
214 | #   expect_warning(mle_foot(data = italy_2008,
215 | #            model ="skellam"))
216 | #   # warnings: "precision lost in result"
217 | #   # ---> corrected if using "L-BFGS-B" as method
218 | #
219 | #   expect_warning(mle_foot(data = italy_2008,
220 | #            model ="student_t"))
221 | #   # warnings: log(sd)
222 | #   # ---> corrected if using "Nelder-Mead" as method
223 | #
224 | #   # datasets with less than 4 columns
225 | #   expect_error(mle_foot(data = italy_2008[,1:3],
226 | #                         model = "biv_pois"))
227 | #
228 | #   # datasets different than matrix and dataframe
229 | #   expect_error(mle_foot(data = rnorm(20),
230 | #                         model = "biv_pois"))
231 | #
232 | #   # wrong model names
233 | #   expect_error(mle_foot(data = italy_2008,
234 | #                         model = "neg_binomial"))
235 | #
236 | #
237 | # })
238 | #
239 | #
240 | # test_that("prediction causes  warnings/errors", {
241 | #
242 | #   ## predict > N
243 | #   expect_error(mle_foot(italy_2008,
244 | #                          model ="student_t",
245 | #                          predict = dim(italy_2008)[1]+1))
246 | #
247 | #   ## predict not a number
248 | #   expect_error(mle_foot(italy_2008,
249 | #                          model ="student_t",
250 | #                          predict = "a"))
251 | #
252 | #   ## predict decimal number
253 | #   # expect_warning(stan_foot(england_2004,
254 | #   #           model ="student_t",
255 | #   #           predict = 30.5))
256 | # })
257 | #
258 | #
259 | #
260 | #
261 | #
262 | #
263 | #
264 | #
265 | #
266 | 


--------------------------------------------------------------------------------
/tests/testthat/test_foot_prob.R:
--------------------------------------------------------------------------------
  1 | ## all the test PASSED (also the skipped ones!)
  2 | 
  3 | 
  4 | #   ____________________________________________________________________________
  5 | #   Data checks                                                             ####
  6 | 
  7 | test_that("foot_prob errors when data is missing required columns and when it returns a valid output", {
  8 |   skip_on_cran()
  9 |   skip_if_not(stan_cmdstan_exists())
 10 | 
 11 |   ##  ............................................................................
 12 |   ##  Data                                                                    ####
 13 | 
 14 |   data("england")
 15 |   england <- as.data.frame(england)
 16 | 
 17 |   # One season only
 18 |   england_2004 <- england %>%
 19 |     dplyr::filter(division == 1) %>%
 20 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
 21 |     dplyr::filter(Season == "2004")
 22 | 
 23 |   colnames(england_2004) <- c(
 24 |     "periods", "home_team", "away_team",
 25 |     "home_goals", "away_goals"
 26 |   )
 27 | 
 28 |   england_2004_wrg <- england_2004[, 1:3]
 29 | 
 30 |   ##  ............................................................................
 31 |   ##  Tests                                                                   ####
 32 | 
 33 |   model_pois <- stan_foot(
 34 |     data = england_2004,
 35 |     model = "double_pois",
 36 |     predict = 30,
 37 |     iter_sampling = 200,
 38 |     chains = 2,
 39 |     seed = 433
 40 |   )
 41 | 
 42 |   # Missing a column in the data
 43 |   expect_error(
 44 |     foot_prob(object = model_pois, data = england_2004_wrg)
 45 |   )
 46 | 
 47 | 
 48 |   # Correct model
 49 |   result <- expect_error(
 50 |     foot_prob(object = model_pois, data = england_2004),
 51 |     NA
 52 |   )
 53 | 
 54 |   expect_type(result, "list")
 55 |   expect_true("prob_table" %in% names(result))
 56 |   expect_true("prob_plot" %in% names(result))
 57 |   expect_s3_class(result$prob_table, "data.frame")
 58 | 
 59 |   # Check that the expected columns are in the table.
 60 |   expected_cols <- c("home_team", "away_team", "prob_h", "prob_d", "prob_a", "mlo")
 61 |   expect_true(all(expected_cols %in% names(result$prob_table)))
 62 | 
 63 |   # Verify that the plot is a ggplot object.
 64 |   expect_s3_class(result$prob_plot, "ggplot")
 65 | 
 66 | 
 67 |   # T-student model
 68 |   model_stud <- stan_foot(
 69 |     data = england_2004,
 70 |     model = "student_t",
 71 |     predict = 30,
 72 |     iter_sampling = 200,
 73 |     chains = 2,
 74 |     seed = 433
 75 |   )
 76 | 
 77 |   # Correct model CmdStan
 78 |   expect_error(
 79 |     foot_prob(object = model_stud$fit, data = england_2004),
 80 |     NA
 81 |   )
 82 | })
 83 | 
 84 | test_that("foot_prob errors if object draws lack required elements", {
 85 |   ##  ............................................................................
 86 |   ##  Data                                                                    ####
 87 | 
 88 |   dummy_data <- data.frame(
 89 |     periods = rep(1, 6),
 90 |     home_team = c("TeamA", "TeamB", "TeamC", "TeamD", "TeamE", "TeamF"),
 91 |     away_team = c("TeamG", "TeamH", "TeamI", "TeamJ", "TeamK", "TeamL"),
 92 |     home_goals = c(1, 2, 0, 3, 2, 1),
 93 |     away_goals = c(0, 1, 2, 1, 1, 2),
 94 |     stringsAsFactors = FALSE
 95 |   )
 96 | 
 97 |   ##  ............................................................................
 98 |   ##  Tests                                                                   ####
 99 | 
100 |   # Fake draws object without y_prev or diff_y_prev.
101 |   fake_draws_empty <- list(a = 1)
102 |   fake_fit_empty <- list(
103 |     draws = function() {
104 |       return(fake_draws_empty)
105 |     }
106 |   )
107 |   class(fake_fit_empty) <- "CmdStanFit"
108 |   fake_stanfoot_empty <- list(fit = fake_fit_empty)
109 |   class(fake_stanfoot_empty) <- "stanFoot"
110 | 
111 |   expect_error(
112 |     foot_prob(fake_fit_empty, dummy_data),
113 |     "does not contain 'y_prev' or 'diff_y_prev'"
114 |   )
115 | 
116 |   expect_error(
117 |     foot_prob(fake_stanfoot_empty, dummy_data),
118 |     "does not contain 'y_prev' or 'diff_y_prev'"
119 |   )
120 | 
121 |   # No 'stanfit', 'CmdStanFit', 'stanFoot' or 'list'.
122 |   class(fake_fit_empty) <- "ggplot2"
123 | 
124 |   expect_error(
125 |     foot_prob(fake_fit_empty, dummy_data),
126 |     "Provide one among these four model fit classes: 'stanfit', 'CmdStanFit', 'stanFoot' or 'list'."
127 |   )
128 | })
129 | 
130 | 
131 | #   ____________________________________________________________________________
132 | #   Arguments checks                                                        ####
133 | 
134 | 
135 | test_that("foot_prob errors when home_team and away_team lengths mismatch", {
136 |   skip_on_cran()
137 |   skip_if_not(stan_cmdstan_exists())
138 | 
139 |   ##  ............................................................................
140 |   ##  Data                                                                    ####
141 | 
142 |   data("england")
143 |   england <- as.data.frame(england)
144 | 
145 |   # One season only
146 |   england_2004 <- england %>%
147 |     dplyr::filter(division == 1) %>%
148 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
149 |     dplyr::filter(Season == "2004")
150 | 
151 |   colnames(england_2004) <- c(
152 |     "periods", "home_team", "away_team",
153 |     "home_goals", "away_goals"
154 |   )
155 | 
156 |   ##  ............................................................................
157 |   ##  Tests                                                                   ####
158 | 
159 |   model_pois <- stan_foot(
160 |     data = england_2004,
161 |     model = "double_pois",
162 |     predict = 30,
163 |     iter_sampling = 200,
164 |     chains = 2,
165 |     seed = 433
166 |   )
167 | 
168 |   # Correct
169 |   expect_error(
170 |     foot_prob(
171 |       object = model_pois, data = england_2004,
172 |       home_team = "Arsenal",
173 |       away_team = "Everton"
174 |     ), NA
175 |   )
176 | 
177 | 
178 |   # There is not any out-of-sample match:Arsenal-Chelsea
179 |   expect_error(
180 |     foot_prob(
181 |       object = model_pois, data = england_2004,
182 |       home_team = "Arsenal",
183 |       away_team = "Chelsea"
184 |     )
185 |   )
186 | 
187 |   # home_team and away_team lengths mismatch
188 |   expect_error(
189 |     foot_prob(
190 |       object = model_pois, data = england_2004,
191 |       home_team = "Arsenal",
192 |       away_team = c("Chelsea", "Liverpool")
193 |     ),
194 |     "Please, include the same number for home and away teams."
195 |   )
196 | 
197 |   # No predictions
198 |   model_pois_no_pred <- stan_foot(
199 |     data = england_2004,
200 |     model = "double_pois",
201 |     iter_sampling = 200,
202 |     chains = 2,
203 |     seed = 433
204 |   )
205 | 
206 |   expect_error(
207 |     foot_prob(
208 |       object = model_pois_no_pred, data = england_2004),
209 |   )
210 | })
211 | 
212 | 
213 | test_that("foot_prob errors when predict is zero for list objects", {
214 |   skip_on_cran()
215 |   skip_if_not(stan_cmdstan_exists())
216 | 
217 |   ##  ............................................................................
218 |   ##  Data                                                                    ####
219 | 
220 |   data("england")
221 |   england <- as.data.frame(england)
222 | 
223 |   # One season only
224 |   england_2004 <- england %>%
225 |     dplyr::filter(division == 1) %>%
226 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
227 |     dplyr::filter(Season == "2004")
228 | 
229 |   colnames(england_2004) <- c(
230 |     "periods", "home_team", "away_team",
231 |     "home_goals", "away_goals"
232 |   )
233 | 
234 |   ##  ............................................................................
235 |   ##  Tests                                                                   ####
236 | 
237 |   model_pois <- stan_foot(
238 |     data = england_2004,
239 |     model = "double_pois",
240 |     iter_sampling = 200,
241 |     chains = 2,
242 |     seed = 433
243 |   )
244 | 
245 |   expect_error(
246 |     foot_prob(model_pois, england_2004),
247 |   )
248 | })
249 | 
250 | 
251 | #   ____________________________________________________________________________
252 | #   MLE models                                                              ####
253 | 
254 | test_that("MLE models", {
255 |   skip_on_cran()
256 | 
257 |   ##  ............................................................................
258 |   ##  Data                                                                    ####
259 | 
260 |   data("england")
261 |   england <- as.data.frame(england)
262 | 
263 |   # One season only
264 |   england_2004 <- england %>%
265 |     dplyr::filter(division == 1) %>%
266 |     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
267 |     dplyr::filter(Season == "2004")
268 | 
269 |   colnames(england_2004) <- c(
270 |     "periods", "home_team", "away_team",
271 |     "home_goals", "away_goals"
272 |   )
273 | 
274 |   ##  ............................................................................
275 |   ##  Tests                                                                   ####
276 | 
277 |   model <- mle_foot(england_2004, "double_pois", predict = 180, interval = "Wald")
278 |   expect_error(foot_prob(object = model, data = england_2004), NA)
279 | 
280 |   model <- mle_foot(england_2004, "biv_pois", predict = 180)
281 |   expect_error(foot_prob(object = model, data = england_2004), NA)
282 | 
283 |   model <- mle_foot(england_2004, "skellam", predict = 180)
284 |   expect_error(foot_prob(object = model, data = england_2004), NA)
285 | })
286 | # require(dplyr)
287 | #
288 | # data("italy")
289 | # italy_2000<- italy %>%
290 | #   dplyr::select(Season, home, visitor, hgoal,vgoal) %>%
291 | #   filter(Season=="2000")
292 | #
293 | # test_that("foot_prob gets errors", {
294 | #
295 | #   # different numbers of home_team and away team
296 | #   fit2 <- stan_foot(italy_2000, "biv_pois", predict = 36, iter = 200)
297 | #   expect_error(foot_prob(fit2, italy_2000, home_team = c("Hellas Verona", "US Lecce"),
298 | #             away_team = c("AC Perugia")))
299 | #
300 | #   # no predict
301 | #   ## in stan_foot
302 | #   fit3 <- stan_foot(italy_2000, "biv_pois", iter = 200)
303 | #   expect_error(foot_prob(fit3, italy_2000))
304 | #
305 | #
306 | #   ## mle_foot
307 | #   fit4 <- mle_foot(italy_2000, "biv_pois")
308 | #   expect_error(foot_prob(fit4, italy_2000))
309 | #
310 | #   # wrong class
311 | #   y <- italy_2000$hgoal
312 | #   fit5 <- lm(y~ rnorm(length(y)))
313 | #   expect_error(foot_prob(fit5, italy_2000))
314 | #
315 | # })
316 | 


--------------------------------------------------------------------------------
/src/stan/neg_bin_dynamic.stan:
--------------------------------------------------------------------------------
  1 | data{
  2 |       int N;   // number of games
  3 |       int<lower=0> N_prev;
  4 |       array[N,2] int y;
  5 |       int nteams;
  6 |       array[N] int team1;
  7 |       array[N] int team2;
  8 |       array[N_prev] int team1_prev;
  9 |       array[N_prev] int team2_prev;
 10 |       int ntimes;                 // dynamic periods
 11 |       array[ntimes] int time;
 12 |       array[N] int instants;
 13 |       array[N_prev] int instants_prev;
 14 |       array[N] int instants_rank;
 15 |       int ntimes_rank;                 // dynamic periods for ranking
 16 |       matrix[ntimes_rank,nteams] ranking;      // eventual fifa/uefa ranking
 17 |       int<lower=0, upper=1> ind_home;
 18 |       int<lower=0, upper=1> ind_common_sigma;
 19 |       real mean_home;              // Mean for home effect
 20 |       real<lower=1e-8> sd_home;      // Standard deviation for home effect
 21 | 
 22 | 
 23 |       // priors part
 24 |       int<lower=1,upper=4> prior_dist_num;    // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 25 |       int<lower=1,upper=4> prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace
 26 | 
 27 |       real<lower=0> hyper_df;
 28 |       real hyper_location;
 29 | 
 30 |       real<lower=0> hyper_sd_df;
 31 |       real hyper_sd_location;
 32 |       real<lower=1e-8> hyper_sd_scale;
 33 | 
 34 |       // commensurate prior
 35 |       int<lower=0, upper=1> ind_comm_prior;
 36 |       real mu_spike;
 37 |       real<lower=0> sd_spike;
 38 |       real mu_slab;
 39 |       real<lower=0> sd_slab;
 40 |       real<lower=0, upper=1> p_spike;
 41 |     }
 42 | 
 43 |     transformed data {
 44 |       real lognc_spike = normal_lccdf(0 | mu_spike, sd_spike); // \Phi(mu_spike / sd_spike)
 45 |       real lognc_slab  = normal_lccdf(0 | mu_slab, sd_slab); // \Phi(mu_slab / sd_slab)
 46 |     }
 47 | 
 48 |     parameters{
 49 |       matrix[ntimes, nteams] att_raw;        // Raw attack ability
 50 |       matrix[ntimes, nteams] def_raw;        // Raw defense ability
 51 |       vector[nteams] home;
 52 |       real gamma;
 53 |       array[(ind_comm_prior|| ind_common_sigma) ? 0 : 1] real<lower=1e-8> sigma_att; // Evolution variance Egidi (2018)
 54 |       array[(ind_comm_prior|| ind_common_sigma) ? 0 : 1] real<lower=1e-8> sigma_def; // Evolution variance Egidi (2018)
 55 |       array[(ind_comm_prior|| !ind_common_sigma) ? 0 : 1] real<lower=1e-8> sigma_common; // Evolution variance Owen (2011)
 56 |       vector<lower=0>[ind_comm_prior? ntimes : 0] comm_prec_att;  // Commensurability parameter
 57 |       vector<lower=0>[ind_comm_prior? ntimes : 0] comm_prec_def;  // Commensurability parameter
 58 | 
 59 |       // Negative Binomial dispersion parameters
 60 |       real<lower=0> phi1;
 61 |       real<lower=0> phi2;
 62 |     }
 63 | 
 64 | transformed parameters {
 65 |   matrix[ntimes, nteams] att;            // attack abilities
 66 |   matrix[ntimes, nteams] def;            // defense abilities
 67 |   matrix[ntimes, nteams] mu_att;         // attack hyperparameter
 68 |   matrix[ntimes, nteams] mu_def;         // defense hyperparameter
 69 |   vector[nteams] adj_h_eff;
 70 |   array[N] vector[2] theta;
 71 | 
 72 |   // Sum-to-zero constraint for attack/defense parameters
 73 |   att[1]=att_raw[1]-mean(att_raw[1]);
 74 |   def[1]=def_raw[1]-mean(def_raw[1]);
 75 |   for (t in 2:ntimes){
 76 |     att[t]=att_raw[t]-mean(att_raw[t]);
 77 |     def[t]=def_raw[t]-mean(def_raw[t]);
 78 |   }
 79 |   // Lagged prior mean for attack/defense parameters
 80 |   for (t in 2:(ntimes)){
 81 |     mu_att[1]=rep_row_vector(hyper_location,nteams);
 82 |     mu_att[t]=att[t-1];
 83 |     //rep_row_vector(0,nteams);
 84 | 
 85 |     mu_def[1]=rep_row_vector(hyper_location,nteams);
 86 |     mu_def[t]=def[t-1];
 87 |     //rep_row_vector(0,nteams);
 88 |     }
 89 | 
 90 |   adj_h_eff = home * ind_home;
 91 | 
 92 |   for (n in 1:N){
 93 |     theta[n,1] = exp(adj_h_eff[team1[n]]+att[instants[n], team1[n]]+def[instants[n], team2[n]]+
 94 |                      (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
 95 |     theta[n,2] = exp(att[instants[n], team2[n]]+def[instants[n], team1[n]]-
 96 |                      (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]]));
 97 |   }
 98 | 
 99 |   vector[ind_comm_prior? ntimes : 0] comm_sd_att = inv_sqrt(comm_prec_att);   // SD for commensurate prior
100 |   vector[ind_comm_prior? ntimes : 0] comm_sd_def = inv_sqrt(comm_prec_def);   // SD for commensurate prior
101 | 
102 | }
103 | 
104 | model {
105 | if(ind_comm_prior == 1){
106 |         for (i in 1:ntimes) {
107 |           // Att
108 |           target += log_mix(
109 |             p_spike
110 |             , normal_lpdf(comm_prec_att[i] | mu_spike, sd_spike) - lognc_spike
111 |             , normal_lpdf(comm_prec_att[i] | mu_slab, sd_slab) - lognc_slab
112 |           );
113 |           // Def
114 |           target += log_mix(
115 |             p_spike
116 |             , normal_lpdf(comm_prec_def[i] | mu_spike, sd_spike) - lognc_spike
117 |             , normal_lpdf(comm_prec_def[i] | mu_slab, sd_slab) - lognc_slab
118 |          );
119 |         }
120 | 
121 |         // log-priors for team-specific abilities
122 |         for (h in 1:(nteams)){
123 |           for (i in 1:ntimes){
124 |             if (prior_dist_num == 1 ){
125 |               target+= normal_lpdf(att_raw[i,h]| mu_att[i,h], comm_sd_att[i]);
126 |               target+= normal_lpdf(def_raw[i,h]| mu_def[i,h], comm_sd_def[i]);
127 |             }
128 |             else if (prior_dist_num == 2 ){
129 |               target+= student_t_lpdf(att_raw[i,h]| hyper_df, mu_att[i,h], comm_sd_att[i]);
130 |               target+= student_t_lpdf(def_raw[i,h]| hyper_df, mu_def[i,h], comm_sd_def[i]);
131 |             }
132 |             else if (prior_dist_num == 3 ){
133 |               target+= student_t_lpdf(att_raw[i,h]| 1, mu_att[i,h], comm_sd_att[i]);
134 |               target+= student_t_lpdf(def_raw[i,h]| 1, mu_def[i,h], comm_sd_def[i]);
135 |             }
136 |           }
137 |         }
138 | 
139 |       }
140 |       else{
141 |         // First order autoregressive model Owen (2011)
142 |         // log-priors for team-specific abilities
143 |         for (h in 1:(nteams)){
144 |           for (i in 1:ntimes){
145 |             if (prior_dist_num == 1 ){
146 |               if(ind_common_sigma == 0) {
147 |               target+= normal_lpdf(att_raw[i,h]| mu_att[i,h], sigma_att[1]);
148 |               target+= normal_lpdf(def_raw[i,h]| mu_def[i,h], sigma_def[1]);
149 |               } else {
150 |               target+= normal_lpdf(att_raw[i,h]| mu_att[i,h], sigma_common[1]);
151 |               target+= normal_lpdf(def_raw[i,h]| mu_def[i,h], sigma_common[1]);
152 |               }
153 |             }
154 |             else if (prior_dist_num == 2 ){
155 |               if(ind_common_sigma == 0) {
156 |               target+= student_t_lpdf(att_raw[i,h]| hyper_df, mu_att[i,h], sigma_att[1]);
157 |               target+= student_t_lpdf(def_raw[i,h]| hyper_df, mu_def[i,h], sigma_def[1]);
158 |               } else{
159 |               target+= student_t_lpdf(att_raw[i,h]| hyper_df, mu_att[i,h], sigma_common[1]);
160 |               target+= student_t_lpdf(def_raw[i,h]| hyper_df, mu_def[i,h], sigma_common[1]);
161 |               }
162 |             }
163 |             else if (prior_dist_num == 3 ){
164 |               if(ind_common_sigma == 0) {
165 |               target+= student_t_lpdf(att_raw[i,h]| 1, mu_att[i,h], sigma_att[1]);
166 |               target+= student_t_lpdf(def_raw[i,h]| 1, mu_def[i,h], sigma_def[1]);
167 |               } else{
168 |               target+= student_t_lpdf(att_raw[i,h]| 1, mu_att[i,h], sigma_common[1]);
169 |               target+= student_t_lpdf(def_raw[i,h]| 1, mu_def[i,h], sigma_common[1]);
170 |               }
171 |             }
172 |           }
173 |         }
174 |         // log-hyperpriors for sd parameters
175 |           if (prior_dist_sd_num == 1 ){
176 |             if(ind_common_sigma == 0) {
177 |               target+=normal_lpdf(sigma_att[1]|hyper_sd_location, hyper_sd_scale);
178 |               target+=normal_lpdf(sigma_def[1]|hyper_sd_location, hyper_sd_scale);
179 |             } else {
180 |                 target+=normal_lpdf(sigma_common[1]|hyper_sd_location, hyper_sd_scale);
181 |               }
182 |           }
183 |           else if (prior_dist_sd_num == 2){
184 |             if(ind_common_sigma == 0) {
185 |               target+=student_t_lpdf(sigma_att[1]|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
186 |               target+=student_t_lpdf(sigma_def[1]|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
187 |             } else {
188 |                 target+=student_t_lpdf(sigma_common[1]|hyper_sd_df, hyper_sd_location, hyper_sd_scale);
189 |               }
190 |           }
191 |           else if (prior_dist_sd_num == 3){
192 |             if(ind_common_sigma == 0) {
193 |               target+=cauchy_lpdf(sigma_att[1]|hyper_sd_location, hyper_sd_scale);
194 |               target+=cauchy_lpdf(sigma_def[1]|hyper_sd_location, hyper_sd_scale);
195 |             } else {
196 |               target+=cauchy_lpdf(sigma_common[1]|hyper_sd_location, hyper_sd_scale);
197 |               }
198 |           }
199 |           else if (prior_dist_sd_num == 4){
200 |             if(ind_common_sigma == 0) {
201 |               target+=double_exponential_lpdf(sigma_att[1]|hyper_sd_location, hyper_sd_scale);
202 |               target+=double_exponential_lpdf(sigma_def[1]|hyper_sd_location, hyper_sd_scale);
203 |             }
204 |             else {
205 |               target+=double_exponential_lpdf(sigma_common[1]|hyper_sd_location, hyper_sd_scale);
206 |             }
207 |           }
208 |       }
209 | 
210 |   // log-priors fixed effects
211 |   target+=normal_lpdf(home|mean_home,sd_home);
212 |   target+=normal_lpdf(gamma|0,1);
213 | 
214 |   // 4) Priors on NB dispersion
215 |   target += normal_lpdf(phi1 | 0, 5);
216 |   target += normal_lpdf(phi2 | 0, 5);
217 | 
218 |   // 5) Likelihood: marginal NB2 for each margin + shared latent component
219 |   for (n in 1:N) {
220 |     target += neg_binomial_2_lpmf(y[n,1] | theta[n,1], phi1);
221 |     target += neg_binomial_2_lpmf(y[n,2] | theta[n,2], phi2);
222 |   }
223 | }
224 | 
225 | generated quantities {
226 |   array[N,2] int y_rep;
227 |   array[N_prev,2] int y_prev;
228 |   array[N_prev] vector[2] theta_prev;
229 |   vector[N] log_lik;
230 |   array[N] int diff_y_rep;
231 | 
232 |   for (n in 1:N) {
233 |     y_rep[n,1]    = neg_binomial_2_rng(theta[n,1], phi1);
234 |     y_rep[n,2]    = neg_binomial_2_rng(theta[n,2], phi2);
235 |     diff_y_rep[n] = y_rep[n,1] - y_rep[n,2];
236 | 
237 |     log_lik[n] = neg_binomial_2_lpmf(y[n,1] | theta[n,1], phi1)
238 |                + neg_binomial_2_lpmf(y[n,2] | theta[n,2], phi2);
239 |   }
240 | 
241 |   //out-of-sample predictions
242 |   if (N_prev > 0) {
243 |     for (n in 1:N_prev){
244 |       theta_prev[n,1] = exp(adj_h_eff[team1_prev[n]]+att[instants_prev[n],team1_prev[n]]+
245 |                             def[instants_prev[n], team2_prev[n]]+
246 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
247 |       theta_prev[n,2] = exp(att[instants_prev[n],team2_prev[n]]+
248 |                             def[instants_prev[n], team1_prev[n]]-
249 |                            (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]]));
250 |       y_prev[n,1] = neg_binomial_2_rng(theta_prev[n,1], phi1);
251 |       y_prev[n,2] = neg_binomial_2_rng(theta_prev[n,2], phi2);
252 |     }
253 |   }
254 | }
255 | 


--------------------------------------------------------------------------------
/R/pp_foot.R:
--------------------------------------------------------------------------------
  1 | #' Posterior predictive checks for football models
  2 | #'
  3 | #' The function provides posterior predictive plots to check the adequacy of the Bayesian models as
  4 | #' returned by the \code{stan_foot} function.
  5 | #'
  6 | #' @param object An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}.
  7 | #' @param data A data frame containing match data with columns:
  8 | #'   \itemize{
  9 | #'     \item \code{periods}:  Time point of each observation (integer >= 1).
 10 | #'     \item \code{home_team}: Home team's name (character string).
 11 | #'     \item \code{away_team}: Away team's name (character string).
 12 | #'     \item \code{home_goals}: Goals scored by the home team (integer >= 0).
 13 | #'     \item \code{away_goals}: Goals scored by the away team (integer >= 0).
 14 | #'   }
 15 | #' @param type  Type of plots, one among \code{"aggregated"} or \code{"matches"}. Default is \code{"aggregated"}.
 16 | #' @param coverage Argument to specify the width \eqn{1-\alpha} of posterior probability intervals. Default is 0.95.
 17 | #'
 18 | #' @return A list with elements:
 19 | #' \itemize{
 20 | #'   \item{\code{pp_plot}}: A \code{ggplot} object for the selected \code{type} of plot.
 21 | #'   \item{\code{pp_table}}: A data frame of summary statistics:
 22 | #'     \itemize{
 23 | #'       \item For \code{"aggregated"}: Goal differences and their Bayesian p‑values.
 24 | #'       \item For \code{"matches"}: Nominal \code{1-alpha} and observed empirical Bayesian coverage.
 25 | #'     }
 26 | #' }
 27 | #'
 28 | #' @details
 29 | #'
 30 | #' Posterior predictive plots: when \code{"aggregated"} (default) is selected, the function
 31 | #' returns a frequency plot for some pre-selected goal-difference values,
 32 | #' along with their correspondent Bayesian p-values, computed as
 33 | #' \eqn{Pr(y_{rep} \ge y)|y)}, where \eqn{y_{rep}} is a data replication from the
 34 | #' posterior predictive distribution (more details in Gelman et al., 2013).
 35 | #' Bayesian p-values very close to 0 or 1 could exhibit
 36 | #' possible model misfits.
 37 | #'
 38 | #' When \code{"matches"} is selected an ordered-frequency plot for all the
 39 | #' goal-differences in the considered matches is provided, along with the
 40 | #' empirical Bayesian coverage at level \eqn{1-\alpha}.
 41 | #'
 42 | #' @author Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}
 43 | #'
 44 | #' @references
 45 | #'
 46 | #' Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., & Rubin, D. B. (2013). Bayesian data analysis. CRC press.
 47 | #'
 48 | #'
 49 | #' @examples
 50 | #' \dontrun{
 51 | #' if (instantiate::stan_cmdstan_exists()) {
 52 | #'   library(dplyr)
 53 | #'
 54 | #'   data("italy")
 55 | #'   italy_2000 <- italy %>%
 56 | #'     dplyr::select(Season, home, visitor, hgoal, vgoal) %>%
 57 | #'     dplyr::filter(Season == "2000")
 58 | #'
 59 | #'   colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
 60 | #'
 61 | #'   fit <- stan_foot(italy_2000, "double_pois", iter_sampling = 200)
 62 | #'
 63 | #'   pp_foot(fit, italy_2000)
 64 | #' }
 65 | #' }
 66 | #'
 67 | #' @importFrom ggplot2 ggplot aes geom_point labs scale_colour_manual annotate
 68 | #'   scale_x_discrete scale_x_continuous scale_y_continuous dup_axis geom_ribbon geom_line
 69 | #'   theme_bw theme rel
 70 | #' @importFrom matrixStats colMedians colVars colQuantiles
 71 | #' @importFrom rstan extract
 72 | #' @importFrom posterior as_draws_rvars draws_of
 73 | #' @importFrom rlang .data
 74 | #'
 75 | #' @export
 76 | 
 77 | 
 78 | pp_foot <- function(object, data,
 79 |                     type = "aggregated",
 80 |                     coverage = 0.95) {
 81 |   #   ____________________________________________________________________________
 82 |   #   Data and argument checks                                                ####
 83 | 
 84 |   type <- match.arg(type, c("aggregated", "matches"))
 85 | 
 86 |   required_cols <- c("periods", "home_team", "away_team", "home_goals", "away_goals")
 87 |   missing_cols <- setdiff(required_cols, names(data))
 88 |   if (length(missing_cols) > 0) {
 89 |     stop(paste("data is missing required columns:", paste(missing_cols, collapse = ", ")))
 90 |   }
 91 | 
 92 |   # Check if object is of class "stanFoot" or "stanfit"
 93 |   if (inherits(object, c("stanFoot", "CmdStanFit"))) {
 94 |     draws <- if (inherits(object, "stanFoot")) {
 95 |       object$fit$draws()
 96 |     } else {
 97 |       object$draws() # for CmdStanFit objects
 98 |     }
 99 |     draws <- posterior::as_draws_rvars(draws)
100 |     if (!("diff_y_rep" %in% names(draws))) {
101 |       stop("Model does not contain 'diff_y_rep' in its samples.")
102 |     }
103 |     sims <- list()
104 |     sims$diff_y_rep <- posterior::draws_of(draws[["diff_y_rep"]])
105 |   } else if (inherits(object, "stanfit")) {
106 |     sims <- rstan::extract(object)
107 | 
108 |     if (!("diff_y_rep" %in% names(sims))) {
109 |       stop("Model does not contain 'diff_y_rep' in its samples.")
110 |     }
111 |   } else {
112 |     stop("Provide one among these three model fit classes: 'stanfit', 'CmdStanFit', 'stanFoot'.")
113 |   }
114 | 
115 | 
116 |   #   ____________________________________________________________________________
117 |   #   Dataframes and plots                                                    ####
118 | 
119 |   y <- as.matrix(data[, 4:5])
120 |   goal_diff <- as.vector(y[, 1] - y[, 2])
121 |   goal_diff_rep <- sims$diff_y_rep
122 |   esiti_short <- seq(-3, 3, 1)
123 |   M <- dim(goal_diff_rep)[1]
124 |   freq_rel_matrix <- matrix(NA, M, length(esiti_short))
125 |   ngames_train <- dim(goal_diff_rep)[2]
126 | 
127 | 
128 |   if (type == "aggregated") {
129 |     check.integer <- function(x) {
130 |       x == round(x)
131 |     }
132 | 
133 |     if (check.integer(median(goal_diff_rep)) == FALSE) { # student_t models adjustment
134 |       goal_diff_rep <- round(goal_diff_rep, 0)
135 |     }
136 | 
137 |     freq_rel_matrix <- t(apply(goal_diff_rep, 1, function(row_j) {
138 |       counts <- table(factor(row_j, levels = esiti_short))
139 |       as.vector(counts) / ngames_train
140 |     }))
141 | 
142 | 
143 |     freq_rel_frame_add <- do.call(rbind, lapply(1:M, function(j) {
144 |       data.frame(valori = esiti_short, rel = freq_rel_matrix[j, ])
145 |     }))
146 | 
147 | 
148 |     freq_rel_obs <- sapply(esiti_short, function(x) {
149 |       sum(goal_diff == x) / ngames_train
150 |     })
151 | 
152 |     frame <- data.frame(valori = esiti_short, rel = freq_rel_frame_add[, 2])
153 | 
154 |     p <- ggplot(frame, aes(x = valori, y = rel)) +
155 |       # Simulated points
156 |       geom_point(position = "jitter", alpha = 0.2, aes(colour = "simulated")) +
157 |       # Observed segments
158 |       annotate("segment",
159 |         x = -3 - 0.5, y = freq_rel_obs[1],
160 |         xend = -3 + 0.5, yend = freq_rel_obs[1],
161 |         colour = "#1E90FF", linewidth = 2
162 |       ) +
163 |       annotate("segment",
164 |         x = -2 - 0.5, y = freq_rel_obs[2],
165 |         xend = -2 + 0.5, yend = freq_rel_obs[2],
166 |         colour = "#1E90FF", linewidth = 2
167 |       ) +
168 |       annotate("segment",
169 |         x = -1 - 0.5, y = freq_rel_obs[3],
170 |         xend = -1 + 0.5, yend = freq_rel_obs[3],
171 |         colour = "#1E90FF", linewidth = 2
172 |       ) +
173 |       annotate("segment",
174 |         x = 0 - 0.5, y = freq_rel_obs[4],
175 |         xend = 0 + 0.5, yend = freq_rel_obs[4],
176 |         colour = "#1E90FF", linewidth = 2
177 |       ) +
178 |       annotate("segment",
179 |         x = 1 - 0.5, y = freq_rel_obs[5],
180 |         xend = 1 + 0.5, yend = freq_rel_obs[5],
181 |         colour = "#1E90FF", linewidth = 2
182 |       ) +
183 |       annotate("segment",
184 |         x = 2 - 0.5, y = freq_rel_obs[6],
185 |         xend = 2 + 0.5, yend = freq_rel_obs[6],
186 |         colour = "#1E90FF", linewidth = 2
187 |       ) +
188 |       annotate("segment",
189 |         x = 3 - 0.5, y = freq_rel_obs[7],
190 |         xend = 3 + 0.5, yend = freq_rel_obs[7],
191 |         colour = "#1E90FF", linewidth = 2
192 |       ) +
193 |       # Dummy layer to add legend for observed data:
194 |       geom_point(
195 |         data = data.frame(x = Inf, y = Inf),
196 |         aes(x = .data$x, y = .data$y, colour = "observed"),
197 |         size = 2
198 |       ) +
199 |       labs(x = "Goal difference", y = "Posterior predictive distribution") +
200 |       scale_colour_manual(
201 |         name = "",
202 |         values = c(observed = "#1E90FF", simulated = "#FFA500"),
203 |         labels = c("Observed", "Simulated")
204 |       ) +
205 |       scale_x_continuous(breaks = -3:3, limits = c(-4, 4)) +
206 |       theme_bw() +
207 |       theme(
208 |         axis.title = element_text(size = 19),
209 |         axis.text.x = element_text(size = 15),
210 |         axis.text.y = element_text(size = 15),
211 |         legend.position = "top",
212 |         legend.text = element_text(size = 15)
213 |       )
214 | 
215 | 
216 |     p_value <- sapply(seq_along(esiti_short), function(j) {
217 |       round(sum(frame$rel[frame$valori == esiti_short[j]] >= freq_rel_obs[j]) / M, 3)
218 |     })
219 | 
220 |     tbl <- data.frame(
221 |       `goal diff.` = esiti_short,
222 |       `Bayesian p-value` = p_value,
223 |       check.names = FALSE
224 |     )
225 | 
226 |     return(list(pp_plot = p, pp_table = tbl))
227 |   } else if (type == "matches") {
228 |     scd <- as.numeric(as.vector(goal_diff))[1:ngames_train]
229 |     scd_sims <- goal_diff_rep
230 |     scd_hat <- matrixStats::colMedians(scd_sims)
231 |     scd_se <- sqrt(matrixStats::colVars(scd_sims))
232 |     alpha <- coverage
233 |     scd_ub <- matrixStats::colQuantiles(scd_sims, probs = 1 - (1 - alpha) / 2)
234 |     scd_lb <- matrixStats::colQuantiles(scd_sims, probs = (1 - alpha) / 2)
235 |     ci_alpha <- sum(scd < scd_ub & scd_lb < scd) / ngames_train
236 |     ngames_train_draw <- sum(scd == 0)
237 |     scd_draw <- scd[scd == 0]
238 |     ci95_draw <- sum(scd_draw < scd_ub[scd == 0] & scd_lb[scd == 0] < scd_draw) / ngames_train_draw
239 | 
240 | 
241 |     sort_scd <- scd[order(scd)]
242 |     sort_scd_hat <- scd_hat[order(scd)]
243 |     sort_scd_se <- scd_se[order(scd)]
244 |     sort_scd_ub <- scd_ub[order(scd)]
245 |     sort_scd_lb <- scd_lb[order(scd)]
246 | 
247 |     df <- data.frame(list(
248 |       scd = sort_scd, scd_hat = sort_scd_hat, scd_se = sort_scd_se,
249 |       scd_ub = sort_scd_ub, scd_lb = sort_scd_lb
250 |     ))
251 | 
252 |     p <- ggplot(df, aes(x = c(1:ngames_train))) +
253 |       geom_ribbon(aes(ymin = scd_lb, ymax = scd_ub),
254 |         fill = "#FFA500"
255 |       ) +
256 |       geom_line(aes(y = scd_hat, colour = "simulated")) +
257 |       geom_point(aes(y = scd, colour = "observed"), fill = "#1E90FF", size = 0.5) +
258 |       scale_x_continuous(name = "games") +
259 |       scale_y_continuous(
260 |         name = "Goal difference",
261 |         breaks = c(-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8),
262 |         sec.axis = dup_axis()
263 |       ) +
264 |       scale_colour_manual(
265 |         name = "",
266 |         values = c(observed = "#1E90FF", simulated = "#FFA500"),
267 |         labels = c("Observed", "Simulated")
268 |       ) +
269 |       theme_bw() +
270 |       theme(
271 |         axis.title = element_text(size = 19),
272 |         axis.text.x = element_text(size = 15),
273 |         axis.text.y = element_text(size = 15),
274 |         legend.position = "top",
275 |         legend.text = element_text(size = 15)
276 |       )
277 | 
278 |     tbl <- data.frame(alpha = coverage, coverage = round(ci_alpha, 3))
279 |     colnames(tbl) <- c("1-alpha", "emp. coverage")
280 |     return(list(pp_plot = p, pp_table = tbl))
281 |   }
282 | }
283 | 


--------------------------------------------------------------------------------