├── src ├── Makevars ├── Makevars.win ├── stan │ ├── .Rapp.history │ ├── static_btd.stan │ ├── dynamic_btd.stan │ ├── student_t_dynamic.stan │ ├── student_t.stan │ ├── double_pois.stan │ ├── neg_bin.stan │ ├── skellam.stan │ ├── biv_pois.stan │ ├── zero_infl_skellam.stan │ ├── diag_infl_biv_pois.stan │ └── neg_bin_dynamic.stan └── install.libs.R ├── .github ├── .gitignore └── workflows │ ├── test-coverage.yaml │ └── R-CMD-check.yaml ├── .DS_Store ├── data ├── italy.rda └── england.rda ├── Meta └── vignette.rds ├── man ├── figures │ ├── logo.png │ ├── readme_foot_prob.png │ ├── readme_foot_rank.png │ ├── readme_pp_foot.png │ └── readme_foot_abilities.png ├── italy.Rd ├── print.compareFoot.Rd ├── england.Rd ├── print.stanFoot.Rd ├── print.btdFoot.Rd ├── plot_logStrength.Rd ├── foot_rank.Rd ├── foot_round_robin.Rd ├── foot_prob.Rd ├── pp_foot.Rd ├── foot_abilities.Rd ├── plot_btdPosterior.Rd ├── priors.Rd ├── compare_foot.Rd ├── mle_foot.Rd └── btd_foot.Rd ├── vignettes ├── pp_foot-1.png ├── pp_foot-2.png ├── abilities-1.png ├── abilities-2.png ├── pp_checks-1.png ├── rank_pred1-1.png ├── rank_pred1-2.png ├── rank_pred2-1.png ├── rank_pred2-2.png ├── rank_pred2-3.png ├── abilities_dyn-1.png ├── comparing_priors-1.png ├── foot_roundrobin-1.png ├── static_fit_corr-1.png ├── plot_btdPosterior_dyn-1.png ├── plot_btdPosterior_stat-1.png ├── foot_prob_weekly_predict-1.png ├── plot_logStrength_teams_dyn-1.png ├── plot_btdPosterior_teams_dyn-1.png ├── plot_btdPosterior_teams_stat-1.png ├── plot_btdPosterior_teams_dyn_dens-1.png └── plot_btdPosterior_teams_stat_dens-1.png ├── .gitignore ├── CRAN-SUBMISSION ├── CRAN-RELEASE ├── R ├── startup.R ├── italy.R ├── globals.R ├── england.R ├── plot_logStrength.R ├── foot_priors.R ├── utils_foot.R └── pp_foot.R ├── rebuild-long-running-vignette.R ├── .Rbuildignore ├── footBayes.Rproj ├── tests ├── testthat.R └── testthat │ ├── test_plot_logStrength.R │ ├── test_pp_foot.R │ ├── test_compare_foot.R │ ├── test_mle_foot.R │ └── test_foot_prob.R ├── cran-comments.md ├── NEWS.md ├── DESCRIPTION ├── NAMESPACE ├── README.Rmd └── README.md /src/Makevars: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/stan/.Rapp.history: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/.DS_Store -------------------------------------------------------------------------------- /data/italy.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/data/italy.rda -------------------------------------------------------------------------------- /Meta/vignette.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/Meta/vignette.rds -------------------------------------------------------------------------------- /data/england.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/data/england.rda -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /vignettes/pp_foot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/pp_foot-1.png -------------------------------------------------------------------------------- /vignettes/pp_foot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/pp_foot-2.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .DS_Store 6 | 7 | /doc/ 8 | /Meta/ 9 | -------------------------------------------------------------------------------- /vignettes/abilities-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/abilities-1.png -------------------------------------------------------------------------------- /vignettes/abilities-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/abilities-2.png -------------------------------------------------------------------------------- /vignettes/pp_checks-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/pp_checks-1.png -------------------------------------------------------------------------------- /vignettes/rank_pred1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred1-1.png -------------------------------------------------------------------------------- /vignettes/rank_pred1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred1-2.png -------------------------------------------------------------------------------- /vignettes/rank_pred2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred2-1.png -------------------------------------------------------------------------------- /vignettes/rank_pred2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred2-2.png -------------------------------------------------------------------------------- /vignettes/rank_pred2-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/rank_pred2-3.png -------------------------------------------------------------------------------- /vignettes/abilities_dyn-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/abilities_dyn-1.png -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 2.0.0 2 | Date: 2025-05-15 16:28:11 UTC 3 | SHA: 2bd20fe9e07798b4f1b023cfac3ef858ae227da4 4 | -------------------------------------------------------------------------------- /man/figures/readme_foot_prob.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/readme_foot_prob.png -------------------------------------------------------------------------------- /man/figures/readme_foot_rank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/readme_foot_rank.png -------------------------------------------------------------------------------- /man/figures/readme_pp_foot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/readme_pp_foot.png -------------------------------------------------------------------------------- /vignettes/comparing_priors-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/comparing_priors-1.png -------------------------------------------------------------------------------- /vignettes/foot_roundrobin-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/foot_roundrobin-1.png -------------------------------------------------------------------------------- /vignettes/static_fit_corr-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/static_fit_corr-1.png -------------------------------------------------------------------------------- /man/figures/readme_foot_abilities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/man/figures/readme_foot_abilities.png -------------------------------------------------------------------------------- /vignettes/plot_btdPosterior_dyn-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_dyn-1.png -------------------------------------------------------------------------------- /vignettes/plot_btdPosterior_stat-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_stat-1.png -------------------------------------------------------------------------------- /vignettes/foot_prob_weekly_predict-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/foot_prob_weekly_predict-1.png -------------------------------------------------------------------------------- /vignettes/plot_logStrength_teams_dyn-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_logStrength_teams_dyn-1.png -------------------------------------------------------------------------------- /vignettes/plot_btdPosterior_teams_dyn-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_teams_dyn-1.png -------------------------------------------------------------------------------- /vignettes/plot_btdPosterior_teams_stat-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_teams_stat-1.png -------------------------------------------------------------------------------- /CRAN-RELEASE: -------------------------------------------------------------------------------- 1 | This package was submitted to CRAN on 2022-02-08. 2 | Once it is accepted, delete this file and tag the release (commit c259aeb). 3 | -------------------------------------------------------------------------------- /vignettes/plot_btdPosterior_teams_dyn_dens-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_teams_dyn_dens-1.png -------------------------------------------------------------------------------- /vignettes/plot_btdPosterior_teams_stat_dens-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeoEgidi/footBayes/HEAD/vignettes/plot_btdPosterior_teams_stat_dens-1.png -------------------------------------------------------------------------------- /R/startup.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(lib, pkg) { 2 | packageStartupMessage( 3 | "NOTE: footBayes depends on the R package cmdstanr, which is not available on CRAN. 4 | The cmdstanr package may be installed at https://mc-stan.org/cmdstanr/." 5 | ) 6 | } 7 | -------------------------------------------------------------------------------- /rebuild-long-running-vignette.R: -------------------------------------------------------------------------------- 1 | old_wd <- getwd() 2 | 3 | setwd("vignettes/") 4 | knitr::knit("footBayes_a_rapid_guide.Rmd.orig", output = "footBayes_a_rapid_guide.Rmd") 5 | knitr::purl("footBayes_a_rapid_guide.Rmd.orig", output = "footBayes_a_rapid_guide.R") 6 | 7 | setwd(old_wd) 8 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^cran-comments\.md$ 4 | 5 | ^\.travis\.yml$ 6 | ^CRAN-RELEASE$ 7 | ^CRAN-SUBMISSION$ 8 | 9 | ^docs$ 10 | ^Meta$ 11 | ^doc$ 12 | 13 | ^rebuild-long-running-vignette.R 14 | 15 | ^README\.Rmd$ 16 | ^README-.*\.png$ 17 | ^\README_cache$ 18 | ^logo\.png$ 19 | 20 | ^\.github$ 21 | ^\.DS_Store 22 | -------------------------------------------------------------------------------- /footBayes.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 912f92ac-40bc-4d09-8a67-203ee124cf14 3 | 4 | RestoreWorkspace: Default 5 | SaveWorkspace: Default 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(footBayes) 11 | library(ggplot2) 12 | library(dplyr) 13 | library(instantiate) 14 | 15 | test_check("footBayes") 16 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | * win-builder (devel and release) 3 | * local OS X install, R 4.4.3 4 | * local Linux, R 4.4.3 5 | 6 | ## R CMD check results 7 | 0 errors | 0 warnings | 1 notes 8 | 9 | The note is related to the installed package size. This is primarily due to the inclusion of the `bin` sub-directory, which contains precompiled `CmdStan` models obtained using the `instantiate` package. 10 | 11 | ## Downstream dependencies 12 | There are currently no downstream dependencies for this package. 13 | 14 | 15 | Thank you 16 | 17 | -------------------------------------------------------------------------------- /R/italy.R: -------------------------------------------------------------------------------- 1 | #' Italy league results 1934-2022 2 | #' 3 | #' All results for Italian soccer games in the top tier 4 | #' from 1934/35 season to 2021/22 season. 5 | #' 6 | #' @format A data frame with 27684 rows and 8 variables: 7 | #' \describe{ 8 | #' \item{Date}{Date of match} 9 | #' \item{Season}{Season of match - refers to starting year} 10 | #' \item{home}{Home team} 11 | #' \item{visitor}{Visiting team} 12 | #' \item{FT}{Full-time result} 13 | #' \item{hgoal}{Goals scored by home team} 14 | #' \item{vgoal}{Goals scored by visiting team} 15 | #' \item{tier}{Tier of football pyramid: 1} 16 | #' } 17 | "italy" 18 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | #' Global variables 2 | #' 3 | #' Global variables 4 | #' 5 | #' @export 6 | 7 | utils::globalVariables(c( 8 | "median", "quantile", "group_by", "summarise", 9 | "first", "times", "lo", "hi", "mid", "sd", "par", 10 | "Home", "Away", "Prob", "rpois", "lo2", "hi2", 11 | "q_025", "q_975", "q_25", "q_75", "q_50", 12 | "filter", "Home_prob", ".", "dpois", "optim", "pchisq", 13 | "as_nlist", "valori", "mutate", "times", "%>%", 14 | "day", "distinct", "matches", "favorite", "underdog", 15 | "periods", "rank_points", "team", "period", "log_strength" 16 | )) 17 | 18 | # LinkingTo: StanHeaders (>= 2.18.0), rstan (>= 2.18.1), BH (>= 1.66.0), 19 | # Rcpp (>= 0.12.0), RcppEigen (>= 0.3.3.3.0) 20 | -------------------------------------------------------------------------------- /man/italy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/italy.R 3 | \docType{data} 4 | \name{italy} 5 | \alias{italy} 6 | \title{Italy league results 1934-2022} 7 | \format{ 8 | A data frame with 27684 rows and 8 variables: 9 | \describe{ 10 | \item{Date}{Date of match} 11 | \item{Season}{Season of match - refers to starting year} 12 | \item{home}{Home team} 13 | \item{visitor}{Visiting team} 14 | \item{FT}{Full-time result} 15 | \item{hgoal}{Goals scored by home team} 16 | \item{vgoal}{Goals scored by visiting team} 17 | \item{tier}{Tier of football pyramid: 1} 18 | } 19 | } 20 | \usage{ 21 | italy 22 | } 23 | \description{ 24 | All results for Italian soccer games in the top tier 25 | from 1934/35 season to 2021/22 season. 26 | } 27 | \keyword{datasets} 28 | -------------------------------------------------------------------------------- /R/england.R: -------------------------------------------------------------------------------- 1 | #' English league results 1888-2022 2 | #' 3 | #' All results for English soccer games in the top 4 tiers 4 | #' from 1888/89 season to 2021/22 season. 5 | #' 6 | #' @format A data frame with 203956 rows and 12 variables: 7 | #' \describe{ 8 | #' \item{Date}{Date of match} 9 | #' \item{Season}{Season of match - refers to starting year} 10 | #' \item{home}{Home team} 11 | #' \item{visitor}{Visiting team} 12 | #' \item{FT}{Full-time result} 13 | #' \item{hgoal}{Goals scored by home team} 14 | #' \item{vgoal}{Goals scored by visiting team} 15 | #' \item{division}{Division: 1,2,3,4 or 3N (Old 3-North) or 3S (Old 3-South)} 16 | #' \item{tier}{Tier of football pyramid: 1,2,3,4} 17 | #' \item{totgoal}{Total goals in game} 18 | #' \item{goaldif}{Goal difference in game home goals - visitor goals} 19 | #' \item{result}{Result: H-Home Win, A-Away Win, D-Draw} 20 | #' } 21 | "england" 22 | -------------------------------------------------------------------------------- /man/print.compareFoot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print_fun_foot.R 3 | \name{print.compareFoot} 4 | \alias{print.compareFoot} 5 | \title{Print method for compareFoot objects} 6 | \usage{ 7 | \method{print}{compareFoot}(x, digits = 3, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{compareFoot} returned by \code{\link{compare_foot}}.} 11 | 12 | \item{digits}{Number of digits to use when printing numeric values for the metrics. Default is \code{3}.} 13 | 14 | \item{...}{Additional arguments passed to \code{print}.} 15 | } 16 | \description{ 17 | Provides a formatted output when printing objects of class \code{compareFoot}, displaying the predictive performance metrics and, if available, the confusion matrices for each model or probability matrix. 18 | } 19 | \author{ 20 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 21 | } 22 | -------------------------------------------------------------------------------- /man/england.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/england.R 3 | \docType{data} 4 | \name{england} 5 | \alias{england} 6 | \title{English league results 1888-2022} 7 | \format{ 8 | A data frame with 203956 rows and 12 variables: 9 | \describe{ 10 | \item{Date}{Date of match} 11 | \item{Season}{Season of match - refers to starting year} 12 | \item{home}{Home team} 13 | \item{visitor}{Visiting team} 14 | \item{FT}{Full-time result} 15 | \item{hgoal}{Goals scored by home team} 16 | \item{vgoal}{Goals scored by visiting team} 17 | \item{division}{Division: 1,2,3,4 or 3N (Old 3-North) or 3S (Old 3-South)} 18 | \item{tier}{Tier of football pyramid: 1,2,3,4} 19 | \item{totgoal}{Total goals in game} 20 | \item{goaldif}{Goal difference in game home goals - visitor goals} 21 | \item{result}{Result: H-Home Win, A-Away Win, D-Draw} 22 | } 23 | } 24 | \usage{ 25 | england 26 | } 27 | \description{ 28 | All results for English soccer games in the top 4 tiers 29 | from 1888/89 season to 2021/22 season. 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # footBayes 2.0.1 2 | * Updated vignette. 3 | * Correct typo in foot_compare() description. 4 | * Correct typo in pp_foot() description. 5 | 6 | 7 | # footBayes 2.0.0 8 | 9 | * Migration from `rstan` interface to `CmdStanR` interface. 10 | * Add support for the `VI`, `pathfinder` and `laplace` algorithms. 11 | * Add pre-compiled CmdStan models using the package `instantiate`. 12 | * Add AIC and BIC output elements in `mle_foot()`. 13 | * Add Bayesian static/dynamic Negative Binomial model in `stan_foot()`. 14 | * Minor `ggplot2` edits on `foot_rank()`, `foot_abilities()` and `pp_foot()`. 15 | * Updated vignette. 16 | 17 | 18 | # footBayes 1.0.0 19 | 20 | * Bradley-Terry model for abilities. 21 | * Dynamic ranking in the models. 22 | * Updated vignette. 23 | * Probabilistic predictive performance (pseudo-R squared, Brier score, etc.). 24 | 25 | # footBayes 0.2.0 26 | 27 | * Inclusion of diagonal-inflated bivariate Poisson and zero-inflated Skellam models. 28 | * Minor edits and drop engsoccerdata dependence. 29 | * Updated vignette. 30 | 31 | # footBayes 0.1.0 32 | 33 | * First submission to CRAN. 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/install.libs.R: -------------------------------------------------------------------------------- 1 | libs <- file.path(R_PACKAGE_DIR, "libs", R_ARCH) 2 | dir.create(libs, recursive = TRUE, showWarnings = FALSE) 3 | for (file in c("symbols.rds", Sys.glob(paste0("*", SHLIB_EXT)))) { 4 | if (file.exists(file)) { 5 | file.copy(file, file.path(libs, file)) 6 | } 7 | } 8 | inst_stan <- file.path("..", "inst", "stan") 9 | if (dir.exists(inst_stan)) { 10 | warning( 11 | "Stan models in inst/stan/ are deprecated in {instantiate} ", 12 | ">= 0.0.4.9001 (2024-01-03). Please put them in src/stan/ instead." 13 | ) 14 | if (file.exists("stan")) { 15 | warning("src/stan/ already exists. Not copying models from inst/stan/.") 16 | } else { 17 | message("Copying inst/stan/ to src/stan/.") 18 | fs::dir_copy(path = inst_stan, new_path = "stan") 19 | } 20 | } 21 | bin <- file.path(R_PACKAGE_DIR, "bin") 22 | if (!file.exists(bin)) { 23 | dir.create(bin, recursive = TRUE, showWarnings = FALSE) 24 | } 25 | bin_stan <- file.path(bin, "stan") 26 | fs::dir_copy(path = "stan", new_path = bin_stan) 27 | callr::r( 28 | func = function(bin_stan) { 29 | instantiate::stan_package_compile( 30 | models = instantiate::stan_package_model_files(path = bin_stan)) 31 | }, 32 | args = list(bin_stan = bin_stan) 33 | ) 34 | -------------------------------------------------------------------------------- /man/print.stanFoot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print_fun_foot.R 3 | \name{print.stanFoot} 4 | \alias{print.stanFoot} 5 | \title{Print Method for stanFoot Objects} 6 | \usage{ 7 | \method{print}{stanFoot}(x, pars = NULL, teams = NULL, digits = 3, true_names = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{stanFoot}.} 11 | 12 | \item{pars}{Optional character vector specifying parameters to include in the summary. This can be specific parameter names (e.g., \code{"att"}, \code{"def"}, \code{"att_raw"}, \code{"def_raw"}, \code{"home"}, \code{"sigma_att"}, \code{"sigma_def"}, \code{"rho"}, and \code{"beta"}). If \code{NULL}, all parameters are included.} 13 | 14 | \item{teams}{Optional character vector specifying team names whose \code{"att"}, \code{"def"}, \code{"att_raw"}, \code{"def_raw"} parameters should be displayed.} 15 | 16 | \item{digits}{Number of digits to use when printing numeric values. Default is \code{3}.} 17 | 18 | \item{true_names}{Logical value indicating whether to display team names in parameter summaries. Default is \code{TRUE}.} 19 | 20 | \item{...}{Additional arguments passed.} 21 | } 22 | \description{ 23 | Provides detailed posterior summaries for the Stan football model parameters. 24 | } 25 | \author{ 26 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 27 | } 28 | -------------------------------------------------------------------------------- /man/print.btdFoot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print_fun_foot.R 3 | \name{print.btdFoot} 4 | \alias{print.btdFoot} 5 | \title{Print Method for btdFoot Objects} 6 | \usage{ 7 | \method{print}{btdFoot}( 8 | x, 9 | pars = NULL, 10 | teams = NULL, 11 | digits = 3, 12 | true_names = TRUE, 13 | display = "both", 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{x}{An object of class \code{btdFoot}.} 19 | 20 | \item{pars}{Optional character vector specifying parameters to include in the summary (e.g., \code{"logStrength"}, \code{"logTie"}, \code{"home"}, \code{"log_lik"}, and \code{"y_rep"}).} 21 | 22 | \item{teams}{Optional character vector specifying team names whose \code{logStrength} parameters should be displayed.} 23 | 24 | \item{digits}{Number of digits to use when printing numeric values. Default is \code{3}.} 25 | 26 | \item{true_names}{Logical value indicating whether to display team names in parameter summaries. Default is \code{TRUE}.} 27 | 28 | \item{display}{Character string specifying which parts of the output to display. Options are \code{"both"}, \code{"rankings"}, or \code{"parameters"}. Default is \code{"both"}.} 29 | 30 | \item{...}{Additional arguments passed.} 31 | } 32 | \description{ 33 | Provides detailed posterior summaries for the Bayesian Bradley-Terry-Davidson model parameters. 34 | } 35 | \author{ 36 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 37 | } 38 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: test-coverage.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | test-coverage: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | dependencies: NA 28 | extra-packages: | 29 | R.rsp 30 | knitr 31 | rcmdcheck 32 | rmarkdown 33 | lme4 34 | mice 35 | BH 36 | MCMCglmm 37 | ape 38 | stan-dev/cmdstanr 39 | RcppEigen 40 | testthat 41 | emmeans 42 | projpred 43 | priorsense 44 | RWiener 45 | rtdists 46 | extraDistr 47 | spdep 48 | mnormt 49 | splines2 50 | arm 51 | statmod 52 | diffobj 53 | betareg 54 | usethis 55 | testthat 56 | covr 57 | xml2 58 | 59 | - name: Build Cmdstan 60 | run: | 61 | cmdstanr::check_cmdstan_toolchain(fix = TRUE) 62 | cmdstanr::install_cmdstan(cores = parallel::detectCores()) 63 | shell: Rscript {0} 64 | 65 | - name: Test coverage 66 | run: covr::codecov() 67 | shell: Rscript {0} 68 | -------------------------------------------------------------------------------- /man/plot_logStrength.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_logStrength.R 3 | \name{plot_logStrength} 4 | \alias{plot_logStrength} 5 | \title{Plot Rankings for btdFoot Objects} 6 | \usage{ 7 | plot_logStrength(x, teams = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{btdFoot}.} 11 | 12 | \item{teams}{An optional character vector specifying team names to include in the rankings plot. If \code{NULL}, all teams are included.} 13 | } 14 | \value{ 15 | A \code{ggplot} object: 16 | \itemize{ 17 | \item Dynamic BTD: A lineplot for the \code{log_strengths} over each period, colored by team. 18 | \item Static BTD: An horizontal barplot for each team. 19 | } 20 | } 21 | \description{ 22 | Visualizes team rankings based on whether the ranking is dynamic or static. 23 | } 24 | \details{ 25 | \itemize{ 26 | \item Dynamic Ranking: Plots Rank Points over Periods for each team with lines and points. 27 | \item Static Ranking: Plots Rank Points on the x-axis against Team Names on the y-axis with horizontal lines and points. 28 | } 29 | } 30 | \examples{ 31 | \dontrun{ 32 | if (instantiate::stan_cmdstan_exists()) { 33 | library(dplyr) 34 | 35 | data("italy") 36 | 37 | italy_2020_2021_rank <- italy \%>\% 38 | select(Season, home, visitor, hgoal, vgoal) \%>\% 39 | filter(Season == "2020" | Season == "2021") \%>\% 40 | mutate(match_outcome = case_when( 41 | hgoal > vgoal ~ 1, # Home team wins 42 | hgoal == vgoal ~ 2, # Draw 43 | hgoal < vgoal ~ 3 # Away team wins 44 | )) \%>\% 45 | mutate(periods = case_when( 46 | row_number() <= 190 ~ 1, 47 | row_number() <= 380 ~ 2, 48 | row_number() <= 570 ~ 3, 49 | TRUE ~ 4 50 | )) \%>\% # Assign periods based on match number 51 | select(periods, 52 | home_team = home, 53 | away_team = visitor, match_outcome 54 | ) 55 | 56 | fit_rank_dyn <- btd_foot( 57 | data = italy_2020_2021_rank, 58 | dynamic_rank = TRUE, 59 | rank_measure = "median", 60 | iter_sampling = 1000, 61 | parallel_chains = 2, 62 | chains = 2 63 | ) 64 | 65 | plot_logStrength(fit_rank_dyn) 66 | 67 | plot_logStrength(fit_rank_dyn, teams = c("AC Milan", "AS Roma", "Juventus", "Inter")) 68 | } 69 | } 70 | 71 | } 72 | \author{ 73 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}. 74 | } 75 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: R-CMD-check.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | R-CMD-check: 14 | runs-on: ${{ matrix.config.os }} 15 | 16 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | config: 22 | - {os: macos-latest, r: 'release'} 23 | - {os: windows-latest, r: 'release'} 24 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 25 | - {os: ubuntu-latest, r: 'release'} 26 | - {os: ubuntu-latest, r: 'oldrel-1'} 27 | 28 | env: 29 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 30 | R_KEEP_PKG_SOURCE: yes 31 | 32 | steps: 33 | - uses: actions/checkout@v4 34 | 35 | - uses: r-lib/actions/setup-pandoc@v2 36 | 37 | - uses: r-lib/actions/setup-r@v2 38 | with: 39 | r-version: ${{ matrix.config.r }} 40 | http-user-agent: ${{ matrix.config.http-user-agent }} 41 | use-public-rspm: true 42 | 43 | - uses: r-lib/actions/setup-r-dependencies@v2 44 | with: 45 | dependencies: NA 46 | extra-packages: | 47 | R.rsp 48 | knitr 49 | rcmdcheck 50 | rmarkdown 51 | lme4 52 | mice 53 | BH 54 | MCMCglmm 55 | ape 56 | stan-dev/cmdstanr 57 | RcppEigen 58 | testthat 59 | emmeans 60 | projpred 61 | priorsense 62 | RWiener 63 | rtdists 64 | extraDistr 65 | spdep 66 | mnormt 67 | splines2 68 | arm 69 | statmod 70 | diffobj 71 | betareg 72 | 73 | - name: Build Cmdstan 74 | run: | 75 | cmdstanr::check_cmdstan_toolchain(fix = TRUE) 76 | cmdstanr::install_cmdstan(cores = parallel::detectCores()) 77 | shell: Rscript {0} 78 | 79 | - uses: r-lib/actions/check-r-package@v2 80 | with: 81 | upload-snapshots: true 82 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 83 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: footBayes 2 | Type: Package 3 | Title: Fitting Bayesian and MLE Football Models 4 | Version: 2.1.0 5 | Date: 2025-08-07 6 | Authors@R: c(person(given = "Leonardo", 7 | family = "Egidi", 8 | role = c("aut", "cre"), 9 | email = "legidi@units.it"), 10 | person(given = "Roberto", 11 | family = "Macrì Demartino", 12 | role = "aut"), 13 | person(given = "Vasilis", 14 | family = "Palaskas.", 15 | role = "aut")) 16 | Maintainer: Leonardo Egidi 17 | License: GPL-2 18 | Description: This is the first package allowing for the estimation, 19 | visualization and prediction of the most well-known 20 | football models: double Poisson, bivariate Poisson, 21 | Skellam, student_t, diagonal-inflated bivariate Poisson, and 22 | zero-inflated Skellam. It supports both maximum likelihood estimation (MLE, for 23 | 'static' models only) and Bayesian inference. 24 | For Bayesian methods, it incorporates several techniques: 25 | MCMC sampling with Hamiltonian Monte Carlo, variational inference using 26 | either the Pathfinder algorithm or Automatic Differentiation Variational 27 | Inference (ADVI), and the Laplace approximation. 28 | The package compiles all the 'CmdStan' models once during installation 29 | using the 'instantiate' package. 30 | The model construction relies on the most well-known football references, such as 31 | Dixon and Coles (1997) , 32 | Karlis and Ntzoufras (2003) and 33 | Egidi, Pauli and Torelli (2018) . 34 | URL: https://github.com/leoegidi/footbayes 35 | BugReports: https://github.com/leoegidi/footbayes/issues 36 | Encoding: UTF-8 37 | SystemRequirements: CmdStan (https://mc-stan.org/users/interfaces/cmdstan), pandoc (>= 1.12.3), pandoc-citeproc 38 | Depends: R (>= 4.2.0) 39 | Imports: rstan (>= 2.18.1), instantiate, reshape2, ggplot2, ggridges, matrixStats, 40 | extraDistr, metRology, dplyr, tidyr, numDeriv, magrittr, rlang, posterior 41 | Suggests: 42 | testthat (>= 3.0.0), 43 | knitr (>= 1.37), 44 | rmarkdown (>= 2.10), 45 | loo, 46 | bayesplot, 47 | cmdstanr (>= 0.6.0) 48 | Additional_repositories: https://stan-dev.r-universe.dev/ 49 | RoxygenNote: 7.3.2 50 | VignetteBuilder: knitr 51 | LazyData: true 52 | BuildManual: yes 53 | Config/testthat/edition: 3 54 | -------------------------------------------------------------------------------- /man/foot_rank.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/foot_rank.R 3 | \name{foot_rank} 4 | \alias{foot_rank} 5 | \title{Rank and points predictions} 6 | \usage{ 7 | foot_rank(object, data, teams = NULL, visualize = "individual") 8 | } 9 | \arguments{ 10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, or \code{stanfit}.} 11 | 12 | \item{data}{A data frame containing match data with columns: 13 | \itemize{ 14 | \item \code{periods}: Time point of each observation (integer >= 1). 15 | \item \code{home_team}: Home team's name (character string). 16 | \item \code{away_team}: Away team's name (character string). 17 | \item \code{home_goals}: Goals scored by the home team (integer >= 0). 18 | \item \code{away_goals}: Goals scored by the away team (integer >= 0). 19 | }} 20 | 21 | \item{teams}{An optional character vector specifying team names to include. If \code{NULL}, all teams are included.} 22 | 23 | \item{visualize}{Type of plots, one among \code{"aggregated"} or \code{"individual"}. Default is \code{"individual"}.} 24 | } 25 | \value{ 26 | If \code{visualize = "aggregated"}: a list with 27 | \itemize{ 28 | \item{\code{rank_table}}: A data frame of observed and simulated final points (median, 25\%/75\% quantiles). 29 | \item{\code{rank_plot}}: A \code{ggplot} comparing observed vs simulated final points for each team. 30 | } 31 | 32 | If \code{visualize = "individual"}: A \code{ggplot} showing, for each selected team, the observed and 33 | simulated cumulative points over match‑days. 34 | } 35 | \description{ 36 | Posterior predictive plots and final rank table for football seasons. 37 | } 38 | \details{ 39 | For Bayesian models fitted via \code{stan_foot} the final rank tables are computed according to the 40 | simulation from the posterior predictive distribution of future (out-of-sample) matches. 41 | The dataset should refer to one or more seasons from a given national football league (Premier League, Serie A, La Liga, etc.). 42 | } 43 | \examples{ 44 | \dontrun{ 45 | if (instantiate::stan_cmdstan_exists()) { 46 | library(dplyr) 47 | 48 | data("italy") 49 | italy_1999_2000 <- italy \%>\% 50 | dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\% 51 | dplyr::filter(Season == "1999" | Season == "2000") 52 | 53 | colnames(italy_1999_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 54 | 55 | fit <- stan_foot(italy_1999_2000, "double_pois", iter_sampling = 200) 56 | foot_rank(fit, italy_1999_2000) 57 | foot_rank(fit, italy_1999_2000, visualize = "individual") 58 | } 59 | } 60 | 61 | } 62 | \author{ 63 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 64 | } 65 | -------------------------------------------------------------------------------- /man/foot_round_robin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/foot_round_robin.R 3 | \name{foot_round_robin} 4 | \alias{foot_round_robin} 5 | \title{Round-robin for football leagues} 6 | \usage{ 7 | foot_round_robin(object, data, teams = NULL, output = "both") 8 | } 9 | \arguments{ 10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}.} 11 | 12 | \item{data}{A data frame containing match data with columns: 13 | \itemize{ 14 | \item \code{periods}: Time point of each observation (integer >= 1). 15 | \item \code{home_team}: Home team's name (character string). 16 | \item \code{away_team}: Away team's name (character string). 17 | \item \code{home_goals}: Goals scored by the home team (integer >= 0). 18 | \item \code{away_goals}: Goals scored by the away team (integer >= 0). 19 | }} 20 | 21 | \item{teams}{An optional character vector specifying team names to include. If \code{NULL}, all teams are included.} 22 | 23 | \item{output}{An optional character string specifying the type of output to return. One of \code{"both"}, \code{"table"}, 24 | or \code{"plot"}. Default is \code{"both"}.} 25 | } 26 | \value{ 27 | If \code{output = "both"} a list with: 28 | \itemize{ 29 | \item{\code{round_table}}: A data frame of matchups (\code{Home}, \code{Away}), observed scores, and \code{Home_prob} (median posterior probability of a home win). 30 | \item{\code{round_plot}}: A \code{ggplot} heatmap of home‑win probabilities with observed scores overlaid. 31 | } 32 | If \code{output = "table"} or \code{"plot"}, returns only that component. 33 | } 34 | \description{ 35 | Posterior predictive probabilities for a football season in a round-robin format 36 | } 37 | \details{ 38 | For Bayesian models fitted via \code{stan_foot} the round-robin table is computed according to the 39 | simulation from the posterior predictive distribution of future (out-of-sample) matches. 40 | The dataset should refer to one or more seasons from a given national football league (Premier League, Serie A, La Liga, etc.). 41 | } 42 | \examples{ 43 | \dontrun{ 44 | if (instantiate::stan_cmdstan_exists()) { 45 | library(dplyr) 46 | 47 | data("italy") 48 | italy_1999_2000 <- italy \%>\% 49 | dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\% 50 | dplyr::filter(Season == "1999" | Season == "2000") 51 | 52 | colnames(italy_1999_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 53 | 54 | fit <- stan_foot(italy_1999_2000, "double_pois", predict = 45, iter_sampling = 200) 55 | 56 | foot_round_robin(fit, italy_1999_2000) 57 | foot_round_robin(fit, italy_1999_2000, c("Parma AC", "AS Roma")) 58 | } 59 | } 60 | } 61 | \author{ 62 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 63 | } 64 | -------------------------------------------------------------------------------- /man/foot_prob.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/foot_prob.R 3 | \name{foot_prob} 4 | \alias{foot_prob} 5 | \title{Plot football matches probabilities for out-of-sample football matches.} 6 | \usage{ 7 | foot_prob(object, data, home_team, away_team) 8 | } 9 | \arguments{ 10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{\link[rstan]{stanfit}}, or class 11 | \code{\link{list}} containing the Maximum Likelihood Estimates (MLE) for the model parameters fitted 12 | with \code{mle_foot}.} 13 | 14 | \item{data}{A data frame containing match data with columns: 15 | \itemize{ 16 | \item \code{periods}: Time point of each observation (integer >= 1). 17 | \item \code{home_team}: Home team's name (character string). 18 | \item \code{away_team}: Away team's name (character string). 19 | \item \code{home_goals}: Goals scored by the home team (integer >= 0). 20 | \item \code{away_goals}: Goals scored by the away team (integer >= 0). 21 | }} 22 | 23 | \item{home_team}{The home team(s) for the predicted matches.} 24 | 25 | \item{away_team}{The away team(s) for the predicted matches.} 26 | } 27 | \value{ 28 | A list with components: 29 | \itemize{ 30 | \item \code{prob_table}: A data frame containing the results probabilities of the out-of-sample matches. 31 | \item \code{prob_plot}: A \code{ggplot} object for Bayesian models only showing the posterior predictive heatmap 32 | of exact score probabilities, with the true result highlighted. 33 | } 34 | } 35 | \description{ 36 | The function provides a table containing the home win, draw and away win probabilities for a bunch of 37 | out-of-sample matches as specified by \code{stan_foot} or \code{mle_foot}. 38 | } 39 | \details{ 40 | For Bayesian models the results probabilities are computed according to the 41 | simulation from the posterior predictive distribution of future (out-of-sample) matches. 42 | Specifically, matches are ordered from those in which the favorite team has the highest posterior probability 43 | of winning to those where the underdog is more likely to win. For MLE models 44 | fitted via the \code{mle_foot} the probabilities are computed by simulating from the MLE estimates. 45 | } 46 | \examples{ 47 | \dontrun{ 48 | if (instantiate::stan_cmdstan_exists()) { 49 | library(dplyr) 50 | 51 | data("italy") 52 | italy_2000 <- italy \%>\% 53 | dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\% 54 | dplyr::filter(Season == "2000") 55 | 56 | colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 57 | 58 | 59 | 60 | fit <- stan_foot( 61 | data = italy_2000, 62 | model = "double_pois", 63 | predict = 18 64 | ) # double pois 65 | 66 | foot_prob( 67 | fit, italy_2000, "Inter", 68 | "Bologna FC" 69 | ) 70 | 71 | foot_prob(fit, italy_2000) # all the out-of-sample matches 72 | } 73 | } 74 | 75 | } 76 | \author{ 77 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}. 78 | } 79 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,btdFoot) 4 | S3method(print,compareFoot) 5 | S3method(print,stanFoot) 6 | export(btd_foot) 7 | export(cauchy) 8 | export(compare_foot) 9 | export(foot_abilities) 10 | export(foot_prob) 11 | export(foot_rank) 12 | export(foot_round_robin) 13 | export(laplace) 14 | export(mle_foot) 15 | export(normal) 16 | export(plot_btdPosterior) 17 | export(plot_logStrength) 18 | export(pp_foot) 19 | export(stan_foot) 20 | export(student_t) 21 | import(matrixStats) 22 | importFrom(dplyr,across) 23 | importFrom(dplyr,arrange) 24 | importFrom(dplyr,distinct) 25 | importFrom(dplyr,group_by) 26 | importFrom(dplyr,mutate) 27 | importFrom(dplyr,select) 28 | importFrom(dplyr,summarise) 29 | importFrom(dplyr,ungroup) 30 | importFrom(dplyr,where) 31 | importFrom(extraDistr,dbvpois) 32 | importFrom(extraDistr,dskellam) 33 | importFrom(extraDistr,rbvpois) 34 | importFrom(extraDistr,rskellam) 35 | importFrom(ggplot2,aes) 36 | importFrom(ggplot2,after_stat) 37 | importFrom(ggplot2,annotate) 38 | importFrom(ggplot2,dup_axis) 39 | importFrom(ggplot2,element_blank) 40 | importFrom(ggplot2,element_text) 41 | importFrom(ggplot2,expansion) 42 | importFrom(ggplot2,facet_wrap) 43 | importFrom(ggplot2,geom_boxplot) 44 | importFrom(ggplot2,geom_errorbarh) 45 | importFrom(ggplot2,geom_line) 46 | importFrom(ggplot2,geom_point) 47 | importFrom(ggplot2,geom_rect) 48 | importFrom(ggplot2,geom_ribbon) 49 | importFrom(ggplot2,geom_segment) 50 | importFrom(ggplot2,geom_text) 51 | importFrom(ggplot2,geom_tile) 52 | importFrom(ggplot2,geom_vline) 53 | importFrom(ggplot2,ggplot) 54 | importFrom(ggplot2,ggtitle) 55 | importFrom(ggplot2,guide_legend) 56 | importFrom(ggplot2,guides) 57 | importFrom(ggplot2,labs) 58 | importFrom(ggplot2,lims) 59 | importFrom(ggplot2,position_dodge) 60 | importFrom(ggplot2,rel) 61 | importFrom(ggplot2,scale_color_manual) 62 | importFrom(ggplot2,scale_colour_manual) 63 | importFrom(ggplot2,scale_fill_gradient) 64 | importFrom(ggplot2,scale_fill_manual) 65 | importFrom(ggplot2,scale_x_continuous) 66 | importFrom(ggplot2,scale_x_discrete) 67 | importFrom(ggplot2,scale_y_continuous) 68 | importFrom(ggplot2,scale_y_discrete) 69 | importFrom(ggplot2,theme) 70 | importFrom(ggplot2,theme_bw) 71 | importFrom(ggplot2,unit) 72 | importFrom(ggplot2,xlab) 73 | importFrom(ggplot2,ylab) 74 | importFrom(ggplot2,ylim) 75 | importFrom(ggridges,stat_density_ridges) 76 | importFrom(instantiate,stan_package_model) 77 | importFrom(magrittr,"%>%") 78 | importFrom(matrixStats,colMedians) 79 | importFrom(matrixStats,colQuantiles) 80 | importFrom(matrixStats,colVars) 81 | importFrom(metRology,dt.scaled) 82 | importFrom(metRology,rt.scaled) 83 | importFrom(numDeriv,hessian) 84 | importFrom(posterior,as_draws_rvars) 85 | importFrom(posterior,draws_of) 86 | importFrom(reshape2,melt) 87 | importFrom(rlang,.data) 88 | importFrom(rstan,extract) 89 | importFrom(stats,approx) 90 | importFrom(stats,density) 91 | importFrom(stats,median) 92 | importFrom(stats,quantile) 93 | importFrom(stats,setNames) 94 | importFrom(tidyr,pivot_longer) 95 | importFrom(tidyr,pivot_wider) 96 | -------------------------------------------------------------------------------- /man/pp_foot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pp_foot.R 3 | \name{pp_foot} 4 | \alias{pp_foot} 5 | \title{Posterior predictive checks for football models} 6 | \usage{ 7 | pp_foot(object, data, type = "aggregated", coverage = 0.95) 8 | } 9 | \arguments{ 10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}.} 11 | 12 | \item{data}{A data frame containing match data with columns: 13 | \itemize{ 14 | \item \code{periods}: Time point of each observation (integer >= 1). 15 | \item \code{home_team}: Home team's name (character string). 16 | \item \code{away_team}: Away team's name (character string). 17 | \item \code{home_goals}: Goals scored by the home team (integer >= 0). 18 | \item \code{away_goals}: Goals scored by the away team (integer >= 0). 19 | }} 20 | 21 | \item{type}{Type of plots, one among \code{"aggregated"} or \code{"matches"}. Default is \code{"aggregated"}.} 22 | 23 | \item{coverage}{Argument to specify the width \eqn{1-\alpha} of posterior probability intervals. Default is 0.95.} 24 | } 25 | \value{ 26 | A list with elements: 27 | \itemize{ 28 | \item{\code{pp_plot}}: A \code{ggplot} object for the selected \code{type} of plot. 29 | \item{\code{pp_table}}: A data frame of summary statistics: 30 | \itemize{ 31 | \item For \code{"aggregated"}: Goal differences and their Bayesian p‑values. 32 | \item For \code{"matches"}: Nominal \code{1-alpha} and observed empirical Bayesian coverage. 33 | } 34 | } 35 | } 36 | \description{ 37 | The function provides posterior predictive plots to check the adequacy of the Bayesian models as 38 | returned by the \code{stan_foot} function. 39 | } 40 | \details{ 41 | Posterior predictive plots: when \code{"aggregated"} (default) is selected, the function 42 | returns a frequency plot for some pre-selected goal-difference values, 43 | along with their correspondent Bayesian p-values, computed as 44 | \eqn{Pr(y_{rep} \ge y)|y)}, where \eqn{y_{rep}} is a data replication from the 45 | posterior predictive distribution (more details in Gelman et al., 2013). 46 | Bayesian p-values very close to 0 or 1 could exhibit 47 | possible model misfits. 48 | 49 | When \code{"matches"} is selected an ordered-frequency plot for all the 50 | goal-differences in the considered matches is provided, along with the 51 | empirical Bayesian coverage at level \eqn{1-\alpha}. 52 | } 53 | \examples{ 54 | \dontrun{ 55 | if (instantiate::stan_cmdstan_exists()) { 56 | library(dplyr) 57 | 58 | data("italy") 59 | italy_2000 <- italy \%>\% 60 | dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\% 61 | dplyr::filter(Season == "2000") 62 | 63 | colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 64 | 65 | fit <- stan_foot(italy_2000, "double_pois", iter_sampling = 200) 66 | 67 | pp_foot(fit, italy_2000) 68 | } 69 | } 70 | 71 | } 72 | \references{ 73 | Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., & Rubin, D. B. (2013). Bayesian data analysis. CRC press. 74 | } 75 | \author{ 76 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 77 | } 78 | -------------------------------------------------------------------------------- /man/foot_abilities.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/foot_abilities.R 3 | \name{foot_abilities} 4 | \alias{foot_abilities} 5 | \title{Plot football abilities from Stan and MLE models} 6 | \usage{ 7 | foot_abilities(object, data, type = "both", teams = NULL) 8 | } 9 | \arguments{ 10 | \item{object}{An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}, or class 11 | \code{\link{list}} containing the Maximum Likelihood Estimates (MLE) for the model parameters fitted 12 | with \code{mle_foot}.} 13 | 14 | \item{data}{A data frame containing match data with columns: 15 | \itemize{ 16 | \item \code{periods}: Time point of each observation (integer >= 1). 17 | \item \code{home_team}: Home team's name (character string). 18 | \item \code{away_team}: Away team's name (character string). 19 | \item \code{home_goals}: Goals scored by the home team (integer >= 0). 20 | \item \code{away_goals}: Goals scored by the away team (integer >= 0). 21 | }} 22 | 23 | \item{type}{Type of ability in Poisson models: one among \code{"defense"}, \code{"attack"} or \code{"both"}. Default is \code{"both"}.} 24 | 25 | \item{teams}{An optional character vector specifying team names to include. If \code{NULL}, all teams are included.} 26 | } 27 | \value{ 28 | A \code{ggplot} object showing each selected team’s ability estimates: 29 | \itemize{ 30 | \item For static Bayesian or MLE fits, horizontal error bars 31 | (95\% intervals) and point estimates. 32 | \item For dynamic Bayesian fits, ribbon and line plots over \code{periods}. 33 | } 34 | } 35 | \description{ 36 | Depicts teams' abilities either from the Stan models fitted via the \code{stan_foot} function 37 | or from MLE models fitted via the \code{mle_foot} function. 38 | } 39 | \examples{ 40 | \dontrun{ 41 | if (instantiate::stan_cmdstan_exists()) { 42 | library(dplyr) 43 | 44 | data("italy") 45 | italy <- as_tibble(italy) 46 | 47 | ### no dynamics, no prediction 48 | 49 | italy_2000_2002 <- italy \%>\% 50 | dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\% 51 | dplyr::filter(Season == "2000" | Season == "2001" | Season == "2002") 52 | 53 | colnames(italy_2000_2002) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 54 | 55 | fit1 <- stan_foot( 56 | data = italy_2000_2002, 57 | model = "double_pois" 58 | ) # double poisson 59 | 60 | fit2 <- stan_foot( 61 | data = italy_2000_2002, 62 | model = "biv_pois" 63 | ) # bivariate poisson 64 | 65 | fit3 <- stan_foot( 66 | data = italy_2000_2002, 67 | model = "skellam" 68 | ) # skellam 69 | 70 | fit4 <- stan_foot( 71 | data = italy_2000_2002, 72 | model = "student_t" 73 | ) # student_t 74 | 75 | foot_abilities(fit1, italy_2000_2002) 76 | foot_abilities(fit2, italy_2000_2002) 77 | foot_abilities(fit3, italy_2000_2002) 78 | foot_abilities(fit4, italy_2000_2002) 79 | 80 | ### seasonal dynamics, predict the last season 81 | 82 | fit5 <- stan_foot( 83 | data = italy_2000_2002, 84 | model = "biv_pois", 85 | predict = 180, 86 | dynamic_type = "seasonal" 87 | ) # bivariate poisson 88 | foot_abilities(fit5, italy_2000_2002) 89 | } 90 | } 91 | } 92 | \author{ 93 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}. 94 | } 95 | -------------------------------------------------------------------------------- /src/stan/static_btd.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // Number of observations 3 | int nteams; // Number of teams 4 | array[N] int team1; // Index of team1 in each observation 5 | array[N] int team2; // Index of team2 in each observation 6 | real mean_logStrength; // Initial mean for logStrength 7 | real sd_logStrength; // Standard deviation for logStrength 8 | real mean_logTie; 9 | real sd_logTie; 10 | array[N] int y; // Outcome: 1 if team1 beats team2, 3 if team2 beats team1, 2 for tie 11 | int ind_home; // Home effect indicator 12 | real mean_home; // Mean for home effect 13 | real sd_home; // Standard deviation for home effect 14 | } 15 | parameters { 16 | vector[nteams] logStrength_raw; // Log strength parameters for each team (static) 17 | real logTie; // Log tie parameter 18 | real home; // Home team effect parameter 19 | } 20 | 21 | transformed parameters { 22 | real adj_h_eff; 23 | vector[nteams] logStrength; 24 | 25 | adj_h_eff = home * ind_home; 26 | 27 | // Sum-to-zero constraint for log-strength parameters 28 | logStrength = logStrength_raw - mean(logStrength_raw); 29 | } 30 | 31 | model { 32 | // Priors for strengths 33 | logStrength_raw ~ normal(mean_logStrength, sd_logStrength); 34 | 35 | // Prior for tie parameter 36 | logTie ~ normal(mean_logTie, sd_logTie); 37 | 38 | // Prior for the home effect 39 | 40 | home ~ normal(mean_home, sd_home); 41 | 42 | // Likelihood 43 | for (n in 1:N) { 44 | real delta_team1 = exp(logStrength[team1[n]] + adj_h_eff); 45 | real delta_team2 = exp(logStrength[team2[n]]); 46 | real nu = exp(logTie); 47 | real denom = delta_team1 + delta_team2 + (nu * sqrt(delta_team1 * delta_team2)); 48 | real p_i_win = delta_team1 / denom; 49 | real p_j_win = delta_team2 / denom; 50 | real p_tie = (nu * sqrt(delta_team1 * delta_team2)) / denom; 51 | if (y[n] == 1) { 52 | target += log(p_i_win); 53 | } else if (y[n] == 3) { 54 | target += log(p_j_win); 55 | } else if (y[n] == 2) { 56 | target += log(p_tie); 57 | } 58 | } 59 | } 60 | 61 | generated quantities { 62 | // Log-likelihood vector 63 | vector[N] log_lik; 64 | 65 | // Posterior predictive vector 66 | array[N] int y_rep; 67 | 68 | for (n in 1:N) { 69 | // Delta values 70 | real delta_team1 = exp(logStrength[team1[n]] + adj_h_eff); 71 | real delta_team2 = exp(logStrength[team2[n]]); 72 | real nu = exp(logTie); 73 | real denom = delta_team1 + delta_team2 + (nu * sqrt(delta_team1 * delta_team2)); 74 | 75 | // Probabilities 76 | real p_i_win = delta_team1 / denom; 77 | real p_j_win = delta_team2 / denom; 78 | real p_tie = (nu * sqrt(delta_team1 * delta_team2)) / denom; 79 | 80 | // Log-likelihood 81 | if (y[n] == 1) { 82 | log_lik[n] = log(p_i_win); 83 | } else if (y[n] == 3) { 84 | log_lik[n] = log(p_j_win); 85 | } else if (y[n] == 2) { 86 | log_lik[n] = log(p_tie); 87 | } 88 | 89 | // Posterior predictive samples 90 | vector[3] probs; 91 | probs[1] = p_i_win; 92 | probs[2] = p_tie; 93 | probs[3] = p_j_win; 94 | 95 | y_rep[n] = categorical_rng(probs); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /man/plot_btdPosterior.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_btdPosterior.R 3 | \name{plot_btdPosterior} 4 | \alias{plot_btdPosterior} 5 | \title{Plot Posterior Distributions for \code{btdFoot} Objects} 6 | \usage{ 7 | plot_btdPosterior( 8 | x, 9 | pars = "logStrength", 10 | plot_type = "boxplot", 11 | teams = NULL, 12 | ncol = NULL, 13 | scales = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{x}{An object of class \code{btdFoot}.} 18 | 19 | \item{pars}{A character string specifying the parameter to plot. 20 | Choices are \code{"logStrength"}, \code{"logTie"}, and \code{"home"}. 21 | Default is \code{"logStrength"}.} 22 | 23 | \item{plot_type}{A character string specifying the type of plot. 24 | Choices are \code{"boxplot"} and \code{"density"}. 25 | Default is \code{"boxplot"}.} 26 | 27 | \item{teams}{An optional character vector specifying team names to include in the posterior 28 | boxplots or density plots. If \code{NULL}, all teams are included.} 29 | 30 | \item{ncol}{An optional integer specifying the number of columns in the facet wrap 31 | when using a dynamic Bayesian Bradley-Terry-Davidson model. 32 | Default is \code{8}.} 33 | 34 | \item{scales}{An optional character string specifying the scales for the facets 35 | when using a dynamic Bayesian Bradley-Terry-Davidson model. 36 | Options include \code{"free"}, \code{"fixed"}, \code{"free_x"}, and \code{"free_y"}. 37 | Default is \code{"free_x"}.} 38 | } 39 | \value{ 40 | A \code{ggplot} object displaying: 41 | \itemize{ 42 | \item For \code{pars="logStrength"}: 43 | \itemize{ 44 | \item Dynamic BTD: Faceted boxplots or density plots (including the 95\% credible interval) of posterior log-strengths by team and period. 45 | \item Static BTD: Boxplots or density plots (including the 95\% credible interval) of posterior log-strengths for each team. 46 | } 47 | \item For \code{pars="logTie"} or \code{pars="home"}: A single boxplot or density plot with 95\% credible interval. 48 | } 49 | } 50 | \description{ 51 | Plots for the posterior distributions of team log-strengths and other parameters with customizable plot types and facets. 52 | } 53 | \examples{ 54 | \dontrun{ 55 | if (instantiate::stan_cmdstan_exists()) { 56 | library(dplyr) 57 | 58 | # Load example data 59 | data("italy") 60 | 61 | # Prepare the data 62 | italy_2020_2021_rank <- italy \%>\% 63 | select(Season, home, visitor, hgoal, vgoal) \%>\% 64 | filter(Season \%in\% c("2020", "2021")) \%>\% 65 | mutate(match_outcome = case_when( 66 | hgoal > vgoal ~ 1, # Home team wins 67 | hgoal == vgoal ~ 2, # Draw 68 | hgoal < vgoal ~ 3 # Away team wins 69 | )) \%>\% 70 | mutate(periods = case_when( 71 | row_number() <= 190 ~ 1, 72 | row_number() <= 380 ~ 2, 73 | row_number() <= 570 ~ 3, 74 | TRUE ~ 4 75 | )) \%>\% # Assign periods based on match number 76 | select(periods, 77 | home_team = home, 78 | away_team = visitor, match_outcome 79 | ) 80 | 81 | # Fit the Bayesian Bradley-Terry-Davidson model with dynamic ranking 82 | fit_rank_dyn <- btd_foot( 83 | data = italy_2020_2021_rank, 84 | dynamic_rank = TRUE, 85 | rank_measure = "median", 86 | iter_sampling = 1000, 87 | parallel_chains = 2, 88 | chains = 2 89 | ) 90 | 91 | # Plot posterior distributions with default settings 92 | plot_btdPosterior(fit_rank_dyn) 93 | 94 | # Plot posterior distributions for specific teams with customized facets 95 | plot_btdPosterior( 96 | fit_rank_dyn, 97 | teams = c("AC Milan", "AS Roma", "Juventus", "Inter"), 98 | ncol = 2 99 | ) 100 | 101 | plot_btdPosterior( 102 | fit_rank_dyn, 103 | plot_type = "density", 104 | teams = c("AC Milan", "AS Roma", "Juventus", "Inter"), 105 | ncol = 2 106 | ) 107 | } 108 | } 109 | } 110 | \author{ 111 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}. 112 | } 113 | -------------------------------------------------------------------------------- /src/stan/dynamic_btd.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // Number of observations 3 | int nteams; // Number of teams 4 | int ntimes_rank; // Number of time points 5 | array[N] int instants_rank; // Time point of each observation 6 | array[N] int team1; // Index of team1 in each observation 7 | array[N] int team2; // Index of team2 in each observation 8 | real mean_logStrength; // Initial mean for logStrength 9 | real sd_logStrength; // Standard deviation of the AR(1) process 10 | real mean_logTie; 11 | real sd_logTie; 12 | array[N] int y; // Outcome: 1 if team1 beats team2, 3 if team2 beats team1, 2 for tie 13 | int ind_home; // Home effect indicator 14 | real mean_home; // Mean for home effect 15 | real sd_home; // Standard deviation for home effect 16 | } 17 | 18 | parameters { 19 | matrix[ntimes_rank, nteams] logStrength_raw; // Log strength parameters for each team over time 20 | real logTie; // Log tie parameter 21 | real home; // Home team effect parameter 22 | } 23 | 24 | transformed parameters { 25 | real adj_h_eff; 26 | matrix[ntimes_rank, nteams] logStrength; 27 | 28 | 29 | // Sum-to-zero constraint for log-strength parameters 30 | logStrength[1]=logStrength_raw[1]-mean(logStrength_raw[1]); 31 | for (t in 2:ntimes_rank){ 32 | logStrength[t]=logStrength_raw[t]-mean(logStrength_raw[t]); 33 | } 34 | 35 | adj_h_eff = home * ind_home; 36 | } 37 | 38 | model { 39 | // Priors for initial strengths 40 | for (k in 1:nteams) { 41 | logStrength[1, k] ~ normal(mean_logStrength, sd_logStrength); 42 | } 43 | 44 | // Prior for tie parameter 45 | logTie ~ normal(mean_logTie, sd_logTie); 46 | 47 | // AR(1) process for strength parameters 48 | for (t_idx in 2:ntimes_rank) { 49 | for (k in 1:nteams) { 50 | logStrength_raw[t_idx, k] ~ normal(logStrength_raw[t_idx - 1, k], sd_logStrength); 51 | } 52 | } 53 | 54 | // Prior for the home effect 55 | home ~ normal(mean_home, sd_home); 56 | 57 | // Likelihood 58 | for (n in 1:N) { 59 | real delta_team1 = exp(logStrength[instants_rank[n], team1[n]] + adj_h_eff); 60 | real delta_team2 = exp(logStrength[instants_rank[n], team2[n]]); 61 | real nu = exp(logTie); 62 | real denom = delta_team1 + delta_team2 + (nu * sqrt(delta_team1 * delta_team2)); 63 | real p_i_win = delta_team1 / denom; 64 | real p_j_win = delta_team2 / denom; 65 | real p_tie = (nu * sqrt(delta_team1 * delta_team2)) / denom; 66 | if (y[n] == 1) { 67 | target += log(p_i_win); 68 | } else if (y[n] == 3) { 69 | target += log(p_j_win); 70 | } else if (y[n] == 2) { 71 | target += log(p_tie); 72 | } 73 | } 74 | } 75 | 76 | generated quantities { 77 | // Log-likelihood vector 78 | vector[N] log_lik; 79 | 80 | // Posterior predictive vector 81 | array[N] int y_rep; 82 | 83 | for (n in 1:N) { 84 | // Delta values 85 | real delta_team1 = exp(logStrength[instants_rank[n], team1[n]] + adj_h_eff); 86 | real delta_team2 = exp(logStrength[instants_rank[n], team2[n]]); 87 | real nu = exp(logTie); 88 | real denom = delta_team1 + delta_team2 + (nu * sqrt(delta_team1 * delta_team2)); 89 | 90 | // Probabilities 91 | real p_i_win = delta_team1 / denom; 92 | real p_j_win = delta_team2 / denom; 93 | real p_tie = (nu * sqrt(delta_team1 * delta_team2)) / denom; 94 | 95 | // Log-likelihood 96 | if (y[n] == 1) { 97 | log_lik[n] = log(p_i_win); 98 | } else if (y[n] == 3) { 99 | log_lik[n] = log(p_j_win); 100 | } else if (y[n] == 2) { 101 | log_lik[n] = log(p_tie); 102 | } 103 | 104 | // Posterior predictive samples 105 | vector[3] probs; 106 | probs[1] = p_i_win; 107 | probs[2] = p_tie; 108 | probs[3] = p_j_win; 109 | 110 | y_rep[n] = categorical_rng(probs); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /man/priors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/foot_priors.R 3 | \name{priors} 4 | \alias{priors} 5 | \alias{normal} 6 | \alias{student_t} 7 | \alias{cauchy} 8 | \alias{laplace} 9 | \title{Football priors distributions and options} 10 | \usage{ 11 | normal(location = 0, scale = NULL, autoscale = TRUE) 12 | 13 | student_t(df = 1, location = 0, scale = NULL, autoscale = TRUE) 14 | 15 | cauchy(location = 0, scale = NULL, autoscale = TRUE) 16 | 17 | laplace(location = 0, scale = NULL, autoscale = TRUE) 18 | } 19 | \arguments{ 20 | \item{location}{Prior location. In most cases, this is the prior mean, but 21 | for \code{cauchy} (which is equivalent to \code{student_t} with 22 | \code{df=1}), the mean does not exist and \code{location} is the prior 23 | median. The default value is \eqn{0}.} 24 | 25 | \item{scale}{Prior scale. The default depends on the family (see 26 | \strong{Details}).} 27 | 28 | \item{autoscale}{A logical scalar, defaulting to \code{TRUE}.} 29 | 30 | \item{df}{Prior degrees of freedom. The default is \eqn{1} for 31 | \code{student_t}, in which case it is equivalent to \code{cauchy}.} 32 | } 33 | \value{ 34 | A named list to be used internally by the 35 | \code{stan_foot} model fitting function. 36 | } 37 | \description{ 38 | This prior specification is just a duplicate 39 | of some of the priors used by the \pkg{rstanarm} package. 40 | 41 | These prior distributions can be passed to the 42 | \code{stan_foot} function, through the arguments \code{prior} and \code{prior_sd}. 43 | See the vignette \href{http://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior 44 | Distributions for rstanarm Models}} for further details (to view the priors used for an existing model see 45 | \href{https://mc-stan.org/rstanarm/reference/prior_summary.stanreg.html}{prior_summary}). 46 | The default priors used in the \pkg{stan_foot} modeling function 47 | are intended to be \emph{weakly informative} in that they provide moderate 48 | regularlization and help stabilize computation. 49 | 50 | You can choose between: \code{normal}, \code{cauchy}, \code{laplace}, \code{student_t}. 51 | } 52 | \details{ 53 | The details depend on the family of the prior being used: 54 | \subsection{Student t family}{ 55 | Family members: 56 | \itemize{ 57 | \item \code{normal(location, scale)} 58 | \item \code{student_t(df, location, scale)} 59 | \item \code{cauchy(location, scale)} 60 | } 61 | Each of these functions also takes an argument \code{autoscale}. 62 | 63 | For the prior distribution for the intercept, \code{location}, 64 | \code{scale}, and \code{df} should be scalars. For the prior for the other 65 | coefficients they can either be vectors of length equal to the number of 66 | coefficients (not including the intercept), or they can be scalars, in 67 | which case they will be recycled to the appropriate length. As the 68 | degrees of freedom approaches infinity, the Student t distribution 69 | approaches the normal distribution and if the degrees of freedom are one, 70 | then the Student t distribution is the Cauchy distribution. 71 | 72 | If \code{scale} is not specified it will default to \eqn{10} for the 73 | intercept and \eqn{2.5} for the other coefficients. 74 | 75 | If the \code{autoscale} argument is \code{TRUE} (the default), then the 76 | scales will be further adjusted as described above in the documentation of 77 | the \code{autoscale} argument in the \strong{Arguments} section. 78 | } 79 | 80 | \subsection{Laplace family}{ 81 | Family members: 82 | \itemize{ 83 | \item \code{laplace(location, scale)} 84 | } 85 | Each of these functions also takes an argument \code{autoscale}. 86 | 87 | The Laplace distribution is also known as the double-exponential 88 | distribution. It is a symmetric distribution with a sharp peak at its mean 89 | / median / mode and fairly long tails. This distribution can be motivated 90 | as a scale mixture of normal distributions and the remarks above about the 91 | normal distribution apply here as well. 92 | 93 | } 94 | } 95 | \references{ 96 | Gelman, A., Jakulin, A., Pittau, M. G., and Su, Y. (2008). A weakly 97 | informative default prior distribution for logistic and other regression 98 | models. \emph{Annals of Applied Statistics}. 2(4), 1360--1383. 99 | } 100 | \seealso{ 101 | The various vignettes for the \pkg{rstanarm} package also discuss 102 | and demonstrate the use of some of the supported prior distributions. 103 | } 104 | \author{ 105 | Leonardo Egidi \email{legidi@units.it} 106 | } 107 | -------------------------------------------------------------------------------- /R/plot_logStrength.R: -------------------------------------------------------------------------------- 1 | #' Plot Rankings for btdFoot Objects 2 | #' 3 | #' Visualizes team rankings based on whether the ranking is dynamic or static. 4 | #' 5 | #' \itemize{ 6 | #' \item Dynamic Ranking: Plots Rank Points over Periods for each team with lines and points. 7 | #' \item Static Ranking: Plots Rank Points on the x-axis against Team Names on the y-axis with horizontal lines and points. 8 | #' } 9 | #' 10 | #' @param x An object of class \code{btdFoot}. 11 | #' @param teams An optional character vector specifying team names to include in the rankings plot. If \code{NULL}, all teams are included. 12 | #' 13 | #' @return A \code{ggplot} object: 14 | #' \itemize{ 15 | #' \item Dynamic BTD: A lineplot for the \code{log_strengths} over each period, colored by team. 16 | #' \item Static BTD: An horizontal barplot for each team. 17 | #' } 18 | #' 19 | #' @author Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}. 20 | #' 21 | #' 22 | #' @examples 23 | #' \dontrun{ 24 | #' if (instantiate::stan_cmdstan_exists()) { 25 | #' library(dplyr) 26 | #' 27 | #' data("italy") 28 | #' 29 | #' italy_2020_2021_rank <- italy %>% 30 | #' select(Season, home, visitor, hgoal, vgoal) %>% 31 | #' filter(Season == "2020" | Season == "2021") %>% 32 | #' mutate(match_outcome = case_when( 33 | #' hgoal > vgoal ~ 1, # Home team wins 34 | #' hgoal == vgoal ~ 2, # Draw 35 | #' hgoal < vgoal ~ 3 # Away team wins 36 | #' )) %>% 37 | #' mutate(periods = case_when( 38 | #' row_number() <= 190 ~ 1, 39 | #' row_number() <= 380 ~ 2, 40 | #' row_number() <= 570 ~ 3, 41 | #' TRUE ~ 4 42 | #' )) %>% # Assign periods based on match number 43 | #' select(periods, 44 | #' home_team = home, 45 | #' away_team = visitor, match_outcome 46 | #' ) 47 | #' 48 | #' fit_rank_dyn <- btd_foot( 49 | #' data = italy_2020_2021_rank, 50 | #' dynamic_rank = TRUE, 51 | #' rank_measure = "median", 52 | #' iter_sampling = 1000, 53 | #' parallel_chains = 2, 54 | #' chains = 2 55 | #' ) 56 | #' 57 | #' plot_logStrength(fit_rank_dyn) 58 | #' 59 | #' plot_logStrength(fit_rank_dyn, teams = c("AC Milan", "AS Roma", "Juventus", "Inter")) 60 | #' } 61 | #' } 62 | #' 63 | #' @importFrom ggplot2 ggplot aes labs geom_line geom_point geom_segment theme_bw 64 | #' theme element_text guides guide_legend 65 | #' @importFrom rlang .data 66 | #' @export 67 | 68 | plot_logStrength <- function(x, teams = NULL) { 69 | # Check if the object is of class 'btdFoot' 70 | if (!inherits(x, "btdFoot")) { 71 | stop("Object must be of class 'btdFoot'.") 72 | } 73 | 74 | 75 | # Determine if the ranking is dynamic or static 76 | is_dynamic <- ifelse(length(unique(x$rank$periods, na.rm = TRUE)) > 1, TRUE, FALSE) 77 | 78 | # Handle teams 79 | if (!is.null(teams)) { 80 | # Check that specified teams exist 81 | available_teams <- unique(c(x$data$home_team, x$data$away_team)) 82 | missing_teams <- setdiff(teams, available_teams) 83 | if (length(missing_teams) > 0) { 84 | stop(paste0("The following teams are not present in the data: ", paste(missing_teams, collapse = ", "))) 85 | } 86 | 87 | x$rank <- x$rank[x$rank$team %in% teams, ] 88 | } 89 | 90 | if (is_dynamic) { 91 | # Dynamic Ranking Plot 92 | p <- ggplot(x$rank, aes(x = .data$periods, y = .data$log_strengths, color = .data$team)) + 93 | geom_line() + 94 | geom_point() + 95 | labs( 96 | x = "Periods", 97 | y = "Log-Strength Values" 98 | ) + 99 | theme_bw() + 100 | theme( 101 | legend.position = "right", 102 | legend.title = element_text(size = 10), 103 | plot.title = element_text(hjust = 0.5) 104 | ) + 105 | scale_x_continuous(breaks = sort(unique(x$rank$periods))) + 106 | guides( 107 | color = guide_legend(title = "Team") 108 | ) 109 | } else { 110 | # Static Ranking Plot 111 | p <- ggplot(x$rank, aes(y = stats::reorder(.data$team, .data$log_strengths), x = .data$log_strengths)) + 112 | geom_segment( 113 | aes(x = 0, xend = .data$log_strengths, yend = .data$team), 114 | color = "deepskyblue4", 115 | linewidth = 1 116 | ) + 117 | geom_point( 118 | color = "firebrick4", 119 | size = 3 120 | ) + 121 | labs( 122 | x = "Log-Strength Values", 123 | y = "Teams" 124 | ) + 125 | theme_bw() + 126 | theme( 127 | legend.position = "none", 128 | plot.title = element_text(hjust = 0.5) 129 | ) 130 | } 131 | 132 | return(p) 133 | } 134 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r} 8 | #| echo: false 9 | knitr::opts_chunk$set( 10 | collapse = TRUE, 11 | comment = "#>", 12 | fig.path = "man/figures/README-" 13 | ) 14 | ``` 15 | 16 | # footBayes 17 | 18 | [![CRAN Version](https://www.r-pkg.org/badges/version/footBayes)](https://cran.r-project.org/package=footBayes) 19 | ![Dev Version](https://img.shields.io/badge/build-2.1.0-blue?style=flat&logo=devdotto&label=Dev.%20Vers.) 20 | [![R-CMD-check.yaml](https://github.com/LeoEgidi/footBayes/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/LeoEgidi/footBayes/actions/workflows/R-CMD-check.yaml) 21 | [![Codecov test coverage](https://codecov.io/gh/LeoEgidi/footBayes/graph/badge.svg)](https://app.codecov.io/gh/LeoEgidi/footBayes) 22 | [![Downloads](https://cranlogs.r-pkg.org/badges/footBayes?color=brightgreen)](https://CRAN.R-project.org/package=footBayes) 23 | 24 | The goal of `footBayes` is to propose a complete workflow to: 25 | 26 | - Fit the most well-known football models, including the double Poisson, bivariate Poisson, Skellam, and Student‑t distributions. It supports both maximum likelihood estimation (MLE) and Bayesian inference. For Bayesian methods, it incorporates several techniques: MCMC sampling with Hamiltonian Monte Carlo, variational inference using either the Pathfinder algorithm or Automatic Differentiation Variational Inference (ADVI), and the Laplace approximation. 27 | 28 | - Visualize the teams' abilities, the model checks, the rank-league reconstruction; 29 | 30 | - Predict out-of-sample matches. 31 | 32 | ## Installation 33 | 34 | Starting with version **2.0.0**, `footBayes` package requires installing the R package [`cmdstanr`](https://mc-stan.org/cmdstanr/) (not available on CRAN) and the command-line interface to Stan: [`CmdStan`](https://mc-stan.org/users/interfaces/cmdstan.html). 35 | For a step-by-step installation, please follow the instructions provided in [Getting started with CmdStanR](https://mc-stan.org/cmdstanr/articles/cmdstanr.html). 36 | 37 | You can install the released version of `footBayes` from CRAN with: 38 | 39 | ``` r 40 | install.packages("footBayes", type = "source") 41 | ``` 42 | 43 | Please note that it is important to set `type = "source"`. Otherwise, the 'CmdStan' models in the package may not be compiled during installation. 44 | 45 | Alternatively to CRAN, you can install the development version from GitHub with: 46 | 47 | ```{r gh-installation, eval = FALSE} 48 | # install.packages("devtools") 49 | devtools::install_github("leoegidi/footBayes") 50 | ``` 51 | 52 | ## Example 53 | 54 | In what follows, a quick example to fit a Bayesian double Poisson model for the Italian Serie A (seasons 2000-2001, 2001-2002, 2002-2003), visualize the estimated teams' abilities, and predict the last four match days for the season 2002-2003: 55 | 56 | ```{r load, message=FALSE} 57 | library(footBayes) 58 | library(dplyr) 59 | ``` 60 | 61 | 62 | 63 | ```{r fit1, results='hide', message=FALSE} 64 | # Dataset for Italian Serie A 65 | data("italy") 66 | italy <- as_tibble(italy) 67 | italy_2000_2002 <- italy %>% 68 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 69 | filter(Season == "2000" | Season == "2001" | Season == "2002") 70 | 71 | colnames(italy_2000_2002) <- c("periods", 72 | "home_team", 73 | "away_team", 74 | "home_goals", 75 | "away_goals") 76 | 77 | # Double poisson fit (predict last 4 match-days) 78 | fit1 <- stan_foot(data = italy_2000_2002, 79 | model = "double_pois", 80 | predict = 36, 81 | iter_sampling = 200, 82 | chains = 2) 83 | ``` 84 | 85 | 86 | The results (i.e., attack and defense effects) can be investigated using 87 | ```{r summary} 88 | print(fit1, pars = c("att", "def")) 89 | ``` 90 | 91 | To visually investigate the attack and defense effects, we 92 | can use the `foot_abilities` function 93 | ```{r abilities} 94 | foot_abilities(fit1, italy_2000_2002) # teams abilities 95 | ``` 96 | 97 | To check the adequacy of the Bayesian model the function `pp_foot` provides posterior predictive plots 98 | ```{r pp_foot} 99 | pp_foot(fit1, italy_2000_2002) # pp checks 100 | ``` 101 | 102 | Furthermore, the function `foot_rank` shows the final rank table and the plot with the predicted points 103 | ```{r pp_foot} 104 | foot_rank(fit1, italy_2000_2002) # rank league reconstruction 105 | ``` 106 | 107 | In order to analyze the possible outcomes of the predicted matches, the function `foot_prob` provides a table containing the home win, draw and away win probabilities for the out-of-sample matches 108 | ```{r pp_foot} 109 | foot_prob(fit1, italy_2000_2002) # out-of-sample posterior pred. probabilities 110 | ``` 111 | 112 | 113 | 114 | For more and more technical details and references, see the vignette! 115 | -------------------------------------------------------------------------------- /src/stan/student_t_dynamic.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // number of matches 3 | int N_prev; 4 | int nteams; // number of teams 5 | int ntimes; // number of dynamic periods for abilities 6 | int ntimes_rank; // number of dynamic periods for rankings 7 | array[N] int team1; 8 | array[N] int team2; 9 | array[N_prev] int team1_prev; 10 | array[N_prev] int team2_prev; 11 | matrix[N, 2] y; 12 | real nu; 13 | array[ntimes] int time; 14 | array[N] int instants; // time indices for abilities 15 | array[N_prev] int instants_prev; 16 | array[N] int instants_rank; // time indices for rankings 17 | matrix[ntimes_rank, nteams] ranking; // rankings over time 18 | int ind_home; 19 | real mean_home; // Mean for home effect 20 | real sd_home; // Standard deviation for home effect 21 | 22 | 23 | // priors part 24 | int prior_dist_num; 25 | int prior_dist_sd_num; 26 | 27 | real hyper_df; 28 | real hyper_location; 29 | 30 | real hyper_sd_df; 31 | real hyper_sd_location; 32 | real hyper_sd_scale; 33 | 34 | // commensurate prior 35 | int ind_comm_prior; 36 | real mu_spike; 37 | real sd_spike; 38 | real mu_slab; 39 | real sd_slab; 40 | real p_spike; 41 | } 42 | transformed data { 43 | vector[N] diff_y = y[,1] - y[,2]; // modeled data 44 | } 45 | parameters { 46 | real beta; // common intercept 47 | matrix[ntimes, nteams] alpha; // per-team weights over time 48 | real sigma_a; // common variance 49 | real sigma_y; // noise term 50 | real sigma_alpha; 51 | real home; // home effect 52 | } 53 | transformed parameters { 54 | array[ntimes, ntimes_rank, nteams] real ability; 55 | matrix[ntimes, nteams] mu_alpha; 56 | real adj_h_eff; // Adjusted home effect 57 | 58 | adj_h_eff = home * ind_home; 59 | 60 | for (t in 1:ntimes) { 61 | for(tr in 1:ntimes_rank) { 62 | for (h in 1:nteams) { 63 | ability[t, tr, h] = beta * ranking[tr, h] + alpha[t, h] * sigma_a; 64 | } 65 | } 66 | } 67 | 68 | mu_alpha[1] = rep_row_vector(0, nteams); 69 | for (t in 2:ntimes) { 70 | mu_alpha[t] = alpha[t - 1]; 71 | } 72 | } 73 | model { 74 | // Priors for team-specific abilities 75 | for (h in 1:nteams) { 76 | if (prior_dist_num == 1) { 77 | alpha[, h] ~ multi_normal(mu_alpha[, h], diag_matrix(rep_vector(square(sigma_alpha), ntimes))); 78 | } else if (prior_dist_num == 2) { 79 | alpha[, h] ~ multi_student_t(hyper_df, mu_alpha[, h], diag_matrix(rep_vector(square(sigma_alpha), ntimes))); 80 | } else if (prior_dist_num == 3) { 81 | alpha[, h] ~ multi_student_t(1, mu_alpha[, h], diag_matrix(rep_vector(square(sigma_alpha), ntimes))); 82 | } 83 | } 84 | 85 | // Hyperpriors for standard deviations 86 | if (prior_dist_sd_num == 1) { 87 | sigma_a ~ normal(hyper_sd_location, hyper_sd_scale); 88 | sigma_alpha ~ normal(hyper_sd_location, hyper_sd_scale); 89 | } else if (prior_dist_sd_num == 2) { 90 | sigma_a ~ student_t(hyper_sd_df, hyper_sd_location, hyper_sd_scale); 91 | sigma_alpha ~ student_t(hyper_sd_df, hyper_sd_location, hyper_sd_scale); 92 | } else if (prior_dist_sd_num == 3) { 93 | sigma_a ~ cauchy(hyper_sd_location, hyper_sd_scale); 94 | sigma_alpha ~ cauchy(hyper_sd_location, hyper_sd_scale); 95 | } else if (prior_dist_sd_num == 4) { 96 | sigma_a ~ double_exponential(hyper_sd_location, hyper_sd_scale); 97 | sigma_alpha ~ double_exponential(hyper_sd_location, hyper_sd_scale); 98 | } 99 | 100 | // Priors for other parameters 101 | beta ~ normal(0, 2.5); 102 | sigma_y ~ normal(0, 2.5); 103 | target+=normal_lpdf(home|mean_home,sd_home); 104 | 105 | // Likelihood 106 | for (n in 1:N) { 107 | diff_y[n] ~ student_t( 108 | nu, 109 | adj_h_eff+ 110 | ability[instants[n], instants_rank[n], team1[n]] - ability[instants[n], instants_rank[n], team2[n]], 111 | sigma_y); 112 | 113 | } 114 | } 115 | generated quantities { 116 | vector[N] diff_y_rep; 117 | vector[N] log_lik; 118 | vector[N_prev] diff_y_prev; 119 | 120 | for (n in 1:N) { 121 | diff_y_rep[n] = student_t_rng(nu, adj_h_eff+ability[instants[n], instants_rank[n], team1[n]] - ability[instants[n], instants_rank[n], team2[n]], 122 | sigma_y); 123 | log_lik[n] = student_t_lpdf(diff_y[n] | nu, adj_h_eff+ability[instants[n], instants_rank[n], team1[n]] - ability[instants[n], instants_rank[n], team2[n]], 124 | sigma_y); 125 | } 126 | //out-of-sample predictions 127 | if (N_prev > 0) { 128 | for (n in 1:N_prev) { 129 | diff_y_prev[n] = student_t_rng( nu, adj_h_eff+ability[instants_prev[n], instants_rank[N], team1_prev[n]] - ability[instants_prev[n], instants_rank[N], team2_prev[n]], 130 | sigma_y); 131 | } 132 | } 133 | } 134 | 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # footBayes 5 | 6 | [![CRAN 7 | Version](https://www.r-pkg.org/badges/version/footBayes)](https://cran.r-project.org/package=footBayes) 8 | ![Dev 9 | Version](https://img.shields.io/badge/build-2.1.0-blue?style=flat&logo=devdotto&label=Dev.%20Vers.) 10 | [![R-CMD-check.yaml](https://github.com/LeoEgidi/footBayes/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/LeoEgidi/footBayes/actions/workflows/R-CMD-check.yaml) 11 | [![Codecov test 12 | coverage](https://codecov.io/gh/LeoEgidi/footBayes/graph/badge.svg)](https://app.codecov.io/gh/LeoEgidi/footBayes) 13 | [![Downloads](https://cranlogs.r-pkg.org/badges/footBayes?color=brightgreen)](https://CRAN.R-project.org/package=footBayes) 14 | 15 | The goal of `footBayes` is to propose a complete workflow to: 16 | 17 | - Fit the most well-known football models, including the double Poisson, 18 | bivariate Poisson, Skellam, and Student‑t distributions. It supports 19 | both maximum likelihood estimation (MLE) and Bayesian inference. For 20 | Bayesian methods, it incorporates several techniques: MCMC sampling 21 | with Hamiltonian Monte Carlo, variational inference using either the 22 | Pathfinder algorithm or Automatic Differentiation Variational 23 | Inference (ADVI), and the Laplace approximation. 24 | 25 | - Visualize the teams’ abilities, the model checks, the rank-league 26 | reconstruction; 27 | 28 | - Predict out-of-sample matches. 29 | 30 | ## Installation 31 | 32 | Starting with version **2.0.0**, `footBayes` package requires installing 33 | the R package [`cmdstanr`](https://mc-stan.org/cmdstanr/) (not available 34 | on CRAN) and the command-line interface to Stan: 35 | [`CmdStan`](https://mc-stan.org/users/interfaces/cmdstan.html). For a 36 | step-by-step installation, please follow the instructions provided in 37 | [Getting started with 38 | CmdStanR](https://mc-stan.org/cmdstanr/articles/cmdstanr.html). 39 | 40 | You can install the released version of `footBayes` from CRAN with: 41 | 42 | ``` r 43 | install.packages("footBayes", type = "source") 44 | ``` 45 | 46 | Please note that it is important to set `type = "source"`. Otherwise, 47 | the ‘CmdStan’ models in the package may not be compiled during 48 | installation. 49 | 50 | Alternatively to CRAN, you can install the development version from 51 | GitHub with: 52 | 53 | ``` r 54 | # install.packages("devtools") 55 | devtools::install_github("leoegidi/footBayes") 56 | ``` 57 | 58 | ## Example 59 | 60 | In what follows, a quick example to fit a Bayesian double Poisson model 61 | for the Italian Serie A (seasons 2000-2001, 2001-2002, 2002-2003), 62 | visualize the estimated teams’ abilities, and predict the last four 63 | match days for the season 2002-2003: 64 | 65 | ``` r 66 | library(footBayes) 67 | library(dplyr) 68 | ``` 69 | 70 | ``` r 71 | # Dataset for Italian Serie A 72 | data("italy") 73 | italy <- as_tibble(italy) 74 | italy_2000_2002 <- italy %>% 75 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 76 | filter(Season == "2000" | Season == "2001" | Season == "2002") 77 | 78 | colnames(italy_2000_2002) <- c("periods", 79 | "home_team", 80 | "away_team", 81 | "home_goals", 82 | "away_goals") 83 | 84 | # Double poisson fit (predict last 4 match-days) 85 | fit1 <- stan_foot(data = italy_2000_2002, 86 | model = "double_pois", 87 | predict = 36, 88 | iter_sampling = 200, 89 | chains = 2) 90 | ``` 91 | 92 | The results (i.e., attack and defense effects) can be investigated using 93 | 94 | ``` r 95 | print(fit1, pars = c("att", "def")) 96 | ``` 97 | 98 | To visually investigate the attack and defense effects, we can use the 99 | `foot_abilities` function 100 | 101 | ``` r 102 | foot_abilities(fit1, italy_2000_2002) # teams abilities 103 | ``` 104 | 105 | 106 | 107 | To check the adequacy of the Bayesian model the function `pp_foot` 108 | provides posterior predictive plots 109 | 110 | ``` r 111 | pp_foot(fit1, italy_2000_2002) # pp checks 112 | #> $pp_plot 113 | ``` 114 | 115 | 116 | 117 | 118 | Furthermore, the function `foot_rank` shows the final rank table and the 119 | plot with the predicted points 120 | 121 | ``` r 122 | foot_rank(fit1, italy_2000_2002) # rank league reconstruction 123 | ``` 124 | 125 | 126 | 127 | 128 | In order to analyze the possible outcomes of the predicted matches, the 129 | function `foot_prob` provides a table containing the home win, draw and 130 | away win probabilities for the out-of-sample matches 131 | 132 | ``` r 133 | foot_prob(fit1, italy_2000_2002) # out-of-sample posterior pred. probabilities 134 | ``` 135 | 136 | 137 | 138 | For more and more technical details and references, see the vignette! 139 | -------------------------------------------------------------------------------- /tests/testthat/test_plot_logStrength.R: -------------------------------------------------------------------------------- 1 | # all the test PASSED (also the skipped ones!) 2 | 3 | # ____________________________________________________________________________ 4 | # Data tests #### 5 | 6 | test_that("Error thrown if input object is not of class 'btdFoot'", { 7 | not_btdFoot <- list(a = 1) 8 | expect_error( 9 | plot_logStrength(not_btdFoot), 10 | "Object must be of class 'btdFoot'." 11 | ) 12 | }) 13 | 14 | test_that("Dynamic ranking plot returns a ggplot object with correct labels", { 15 | skip_on_cran() 16 | skip_if_not(stan_cmdstan_exists()) 17 | 18 | ## ............................................................................ 19 | ## Data #### 20 | 21 | data("england") 22 | england <- as.data.frame(england) 23 | 24 | # One season only 25 | england_04_05 <- england %>% 26 | dplyr::filter(division == 1) %>% 27 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 28 | dplyr::filter(Season %in% c("2004", "2005")) %>% 29 | dplyr::mutate(match_outcome = dplyr::case_when( 30 | hgoal > vgoal ~ 1, # Home team wins 31 | hgoal == vgoal ~ 2, # Draw 32 | hgoal < vgoal ~ 3 # Away team wins 33 | )) %>% # Assign periods based on match number 34 | dplyr::select( 35 | periods = Season, 36 | home_team = home, 37 | away_team = visitor, match_outcome 38 | ) 39 | ## ............................................................................ 40 | ## Tests #### 41 | 42 | # Correct model 43 | dyn_btd <- btd_foot( 44 | data = england_04_05, dynamic_rank = TRUE, 45 | method = "MCMC", iter_sampling = 200, rank_measure = "mean", 46 | chains = 2, 47 | seed = 433 48 | ) 49 | 50 | p <- plot_logStrength(dyn_btd) 51 | expect_s3_class(p, "ggplot") 52 | # Check that the x- and y-axis labels match the dynamic branch 53 | expect_equal(p$labels$x, "Periods") 54 | expect_equal(p$labels$y, "Log-Strength Values") 55 | # Check that the data has multiple periods (thus dynamic) 56 | expect_true(max(dyn_btd$rank$periods, na.rm = TRUE) > 1) 57 | }) 58 | 59 | test_that("Static ranking plot returns a ggplot object with correct labels", { 60 | skip_on_cran() 61 | skip_if_not(stan_cmdstan_exists()) 62 | 63 | data("england") 64 | england <- as.data.frame(england) 65 | 66 | # One season only 67 | england_04 <- england %>% 68 | dplyr::filter(division == 1) %>% 69 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 70 | dplyr::filter(Season %in% c("2004")) %>% 71 | dplyr::mutate(match_outcome = dplyr::case_when( 72 | hgoal > vgoal ~ 1, # Home team wins 73 | hgoal == vgoal ~ 2, # Draw 74 | hgoal < vgoal ~ 3 # Away team wins 75 | )) %>% # Assign periods based on match number 76 | dplyr::select( 77 | periods = Season, 78 | home_team = home, 79 | away_team = visitor, match_outcome 80 | ) 81 | ## ............................................................................ 82 | ## Tests #### 83 | 84 | # Correct model 85 | stat_btd <- btd_foot( 86 | data = england_04, dynamic_rank = FALSE, 87 | method = "MCMC", iter_sampling = 200, rank_measure = "mean", 88 | chains = 2, 89 | seed = 433 90 | ) 91 | 92 | p <- plot_logStrength(stat_btd) 93 | expect_s3_class(p, "ggplot") 94 | # Check that the x- and y-axis labels match the static branch 95 | expect_equal(p$labels$x, "Log-Strength Values") 96 | expect_equal(p$labels$y, "Teams") 97 | }) 98 | 99 | test_that("Filtering by teams works as expected and error is thrown if specified teams are not present in the data", { 100 | skip_if_not(stan_cmdstan_exists()) 101 | 102 | data("england") 103 | england <- as.data.frame(england) 104 | 105 | # One season only 106 | england_04 <- england %>% 107 | dplyr::filter(division == 1) %>% 108 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 109 | dplyr::filter(Season %in% c("2004")) %>% 110 | dplyr::mutate(match_outcome = dplyr::case_when( 111 | hgoal > vgoal ~ 1, # Home team wins 112 | hgoal == vgoal ~ 2, # Draw 113 | hgoal < vgoal ~ 3 # Away team wins 114 | )) %>% # Assign periods based on match number 115 | dplyr::select( 116 | periods = Season, 117 | home_team = home, 118 | away_team = visitor, match_outcome 119 | ) 120 | ## ............................................................................ 121 | ## Tests #### 122 | 123 | # Correct model 124 | stat_btd <- btd_foot( 125 | data = england_04, dynamic_rank = FALSE, 126 | method = "MCMC", iter_sampling = 200, rank_measure = "mean", 127 | chains = 2, 128 | seed = 433 129 | ) 130 | 131 | p <- plot_logStrength(stat_btd, teams = c("Arsenal", "Chelsea")) 132 | # Check that the plot's data only contains the specified teams 133 | expect_setequal(unique(p$data$team), c("Arsenal", "Chelsea")) 134 | 135 | # Wrong plot 136 | expect_error( 137 | plot_logStrength(stat_btd, teams = c("Team X")), 138 | "The following teams are not present in the data" 139 | ) 140 | }) 141 | -------------------------------------------------------------------------------- /src/stan/student_t.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // Number of observed matches 3 | int N_prev; // Number of predicted matches 4 | int nteams; // Number of teams 5 | int ntimes_rank; // Number of dynamic periods for rankings 6 | matrix[ntimes_rank, nteams] ranking; // Rankings over time 7 | array[N] int instants_rank; // Time indices for rankings for observed matches 8 | array[N] int team1; // Team 1 indices for observed matches 9 | array[N] int team2; // Team 2 indices for observed matches 10 | array[N_prev] int team1_prev; // Team 1 indices for predicted matches 11 | array[N_prev] int team2_prev; // Team 2 indices for predicted matches 12 | matrix[N, 2] y; // Scores: column 1 is team1, column 2 is team2 13 | real nu; // Degrees of freedom for the Student's t-distribution 14 | int ind_home; 15 | real mean_home; // Mean for home effect 16 | real sd_home; // Standard deviation for home effect 17 | 18 | 19 | // Priors part 20 | int prior_dist_num; // 1: Gaussian, 2: t, 3: Cauchy, 4: Laplace 21 | int prior_dist_sd_num; // 1: Gaussian, 2: t, 3: Cauchy, 4: Laplace 22 | 23 | real hyper_df; 24 | real hyper_location; 25 | 26 | real hyper_sd_df; 27 | real hyper_sd_location; 28 | real hyper_sd_scale; 29 | } 30 | transformed data { 31 | vector[N] diff_y = y[,1] - y[,2]; // Modeled data: score differences 32 | } 33 | parameters { 34 | real beta; // Common coefficient for ranking 35 | vector[nteams] alpha; // Per-team random effects 36 | real sigma_a; // Standard deviation for random effects 37 | real sigma_y; // Noise term in our estimate 38 | real sigma_alpha; // Standard deviation for alpha prior 39 | real home; // home effect 40 | } 41 | transformed parameters { 42 | matrix[ntimes_rank, nteams] ability; 43 | real adj_h_eff; // Adjusted home effect 44 | 45 | adj_h_eff = home * ind_home; 46 | 47 | for (t in 1:ntimes_rank) { 48 | // Compute abilities for all teams at time t 49 | ability[t] = beta * ranking[t] + (alpha * sigma_a)'; // Transpose to get a row vector 50 | } 51 | } 52 | model { 53 | // Priors for team-specific random effects (alpha) 54 | if (prior_dist_num == 1) { 55 | alpha ~ normal(hyper_location, sigma_alpha); 56 | } else if (prior_dist_num == 2) { 57 | alpha ~ student_t(hyper_df, hyper_location, sigma_alpha); 58 | } else if (prior_dist_num == 3) { 59 | alpha ~ cauchy(hyper_location, sigma_alpha); 60 | } else if (prior_dist_num == 4) { 61 | alpha ~ double_exponential(hyper_location, sigma_alpha); 62 | } 63 | 64 | // Priors for standard deviations 65 | if (prior_dist_sd_num == 1) { 66 | sigma_a ~ normal(hyper_sd_location, hyper_sd_scale); 67 | sigma_alpha ~ normal(hyper_sd_location, hyper_sd_scale); 68 | } else if (prior_dist_sd_num == 2) { 69 | sigma_a ~ student_t(hyper_sd_df, hyper_sd_location, hyper_sd_scale); 70 | sigma_alpha ~ student_t(hyper_sd_df, hyper_sd_location, hyper_sd_scale); 71 | } else if (prior_dist_sd_num == 3) { 72 | sigma_a ~ cauchy(hyper_sd_location, hyper_sd_scale); 73 | sigma_alpha ~ cauchy(hyper_sd_location, hyper_sd_scale); 74 | } else if (prior_dist_sd_num == 4) { 75 | sigma_a ~ double_exponential(hyper_sd_location, hyper_sd_scale); 76 | sigma_alpha ~ double_exponential(hyper_sd_location, hyper_sd_scale); 77 | } 78 | 79 | beta ~ normal(0, 2.5); 80 | sigma_y ~ normal(0, 2.5); 81 | target+=normal_lpdf(home|mean_home,sd_home); 82 | 83 | // Likelihood 84 | for (n in 1:N) { 85 | int rank_time = instants_rank[n]; // Time index for the current match 86 | diff_y[n] ~ student_t( 87 | nu, 88 | adj_h_eff+ 89 | ability[rank_time, team1[n]] - ability[rank_time, team2[n]], 90 | sigma_y 91 | ); 92 | } 93 | } 94 | generated quantities { 95 | vector[N] diff_y_rep; // Replicated differences for posterior predictive checks 96 | vector[N] log_lik; // Log-likelihood for model comparison 97 | vector[N_prev] diff_y_prev; // Predicted differences for future matches 98 | 99 | for (n in 1:N) { 100 | int rank_time = instants_rank[n]; 101 | diff_y_rep[n] = student_t_rng( 102 | nu, 103 | adj_h_eff+ 104 | ability[rank_time, team1[n]] - ability[rank_time, team2[n]], 105 | sigma_y 106 | ); 107 | log_lik[n] = student_t_lpdf( 108 | diff_y[n] | nu, 109 | adj_h_eff+ 110 | ability[rank_time, team1[n]] - ability[rank_time, team2[n]], 111 | sigma_y 112 | ); 113 | } 114 | 115 | if (N_prev > 0) { 116 | for (n in 1:N_prev) { 117 | diff_y_prev[n] = student_t_rng( 118 | nu, 119 | adj_h_eff+ 120 | ability[instants_rank[N], team1_prev[n]] - ability[instants_rank[N], team2_prev[n]], 121 | sigma_y 122 | ); 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /man/compare_foot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare_foot.R 3 | \name{compare_foot} 4 | \alias{compare_foot} 5 | \title{Compare Football Models using Various Metrics} 6 | \usage{ 7 | compare_foot( 8 | source, 9 | test_data, 10 | metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"), 11 | conf_matrix = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{source}{A named list containing either: 16 | \itemize{ 17 | \item Fitted model objects (of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}), each representing a football model. 18 | \item Matrices where each matrix contains the estimated probabilities for "Home Win," "Draw," and "Away Win" in its columns. 19 | }} 20 | 21 | \item{test_data}{A data frame containing the test dataset, with columns: 22 | \itemize{ 23 | \item \code{home_team}: Home team's name (character string). 24 | \item \code{away_team}: Away team's name (character string). 25 | \item \code{home_goals}: Goals scored by the home team (integer >= 0). 26 | \item \code{away_goals}: Goals scored by the away team (integer >= 0). 27 | }} 28 | 29 | \item{metric}{A character vector specifying the metrics to use for comparison. Options are: 30 | \itemize{ 31 | \item \code{"accuracy"}: Computes the accuracy of each model. 32 | \item \code{"brier"}: Computes the Brier score of each model. 33 | \item \code{"RPS"}: Computes the ranked probability score (RPS) for each model. 34 | \item \code{"ACP"}: Computes the average of correct probabilities (ACP) for each model. 35 | \item \code{"pseudoR2"}: Computes the Pseudo \eqn{R^2}, defined as the geometric mean of the probabilities assigned to the actual results. 36 | } 37 | Default is \code{c("accuracy", "brier", "ACP", "pseudoR2", "RPS")}, computing the specified metrics.} 38 | 39 | \item{conf_matrix}{A logical value indicating whether to generate a confusion matrix comparing predicted outcomes against actual outcomes for each model or probability matrix. Default is \code{FALSE}.} 40 | } 41 | \value{ 42 | An object of class \code{compare_foot_output}, which is a list containing: 43 | \itemize{ 44 | \item \code{metrics}: A data frame containing the metric values for each model or probability matrix. 45 | \item \code{confusion_matrix}: Confusion matrices for each model or probability matrix. 46 | } 47 | } 48 | \description{ 49 | Compares multiple football models or directly provided probability matrices based on specified metrics (accuracy, Brier score, ranked probability score, Pseudo \eqn{R^2}, average coverage probability), using a test dataset. Additionally, computes the confusion matrices. The function returns an object of class \code{compareFoot}. 50 | } 51 | \details{ 52 | The function extracts predictions from each model or directly uses the provided probability matrices and computes the chosen metrics on the test dataset. It also possible to compute confusion matrices. 53 | } 54 | \examples{ 55 | \dontrun{ 56 | if (instantiate::stan_cmdstan_exists()) { 57 | library(dplyr) 58 | 59 | data("italy") 60 | italy_2000 <- italy \%>\% 61 | dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\% 62 | dplyr::filter(Season == "2000") 63 | 64 | colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 65 | 66 | # Example with fitted models 67 | fit_1 <- stan_foot( 68 | data = italy_2000, 69 | model = "double_pois", predict = 18 70 | ) # Double Poisson model 71 | fit_2 <- stan_foot( 72 | data = italy_2000, 73 | model = "biv_pois", predict = 18 74 | ) # Bivariate Poisson model 75 | 76 | italy_2000_test <- italy_2000[289:306, ] 77 | 78 | 79 | compare_results_models <- compare_foot( 80 | source = list( 81 | double_poisson = fit_1, 82 | bivariate_poisson = fit_2 83 | ), 84 | test_data = italy_2000_test, 85 | metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"), 86 | conf_matrix = TRUE 87 | ) 88 | 89 | print(compare_results_models) 90 | 91 | 92 | # Example with probability matrices 93 | 94 | home_team <- c( 95 | "AC Milan", "Inter", "Juventus", "AS Roma", "Napoli", 96 | "Lazio", "Atalanta", "Fiorentina", "Torino", "Sassuolo", "Udinese" 97 | ) 98 | 99 | away_team <- c( 100 | "Juventus", "Napoli", "Inter", "Atalanta", "Lazio", 101 | "AC Milan", "Sassuolo", "Torino", "Fiorentina", "Udinese", "AS Roma" 102 | ) 103 | 104 | # Home and Away goals based on given data 105 | home_goals <- c(2, 0, 2, 2, 3, 1, 4, 2, 1, 1, 2) 106 | away_goals <- c(1, 0, 1, 3, 2, 1, 1, 2, 1, 1, 2) 107 | 108 | # Combine into a data frame 109 | test_data <- data.frame(home_team, away_team, home_goals, away_goals) 110 | 111 | # Define the data for each column 112 | pW <- c(0.51, 0.45, 0.48, 0.53, 0.56, 0.39, 0.52, 0.55, 0.61, 0.37, 0.35) 113 | pD <- c(0.27, 0.25, 0.31, 0.18, 0.23, 0.30, 0.24, 0.26, 0.18, 0.19, 0.22) 114 | pL <- c(0.22, 0.30, 0.21, 0.29, 0.21, 0.31, 0.24, 0.19, 0.21, 0.44, 0.43) 115 | 116 | # Create the data frame table_prob 117 | table_prob <- data.frame(pW, pD, pL) 118 | matrix_prob <- as.matrix(table_prob) 119 | 120 | # Use compare_foot function 121 | compare_results_matrices <- compare_foot( 122 | source = list(matrix_1 = matrix_prob), 123 | test_data = test_data, 124 | metric = c("accuracy", "brier", "pseudoR2", "ACP", "RPS") 125 | ) 126 | # Print the results 127 | print(compare_results_matrices) 128 | } 129 | } 130 | } 131 | \author{ 132 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 133 | } 134 | -------------------------------------------------------------------------------- /src/stan/double_pois.stan: -------------------------------------------------------------------------------- 1 | data{ 2 | int N; // number of games 3 | int N_prev; // number of predicted games 4 | array[N,2] int y; // scores 5 | int nteams; // number of teams 6 | array[N] int team1; // home team index 7 | array[N] int team2; // away team index 8 | array[N_prev] int team1_prev; // home team for pred. 9 | array[N_prev] int team2_prev; // away team for pred. 10 | array[N] int instants_rank; 11 | int ntimes_rank; // dynamic periods for ranking 12 | matrix[ntimes_rank,nteams] ranking; // eventual fifa/uefa ranking 13 | int ind_home; 14 | real mean_home; // Mean for home effect 15 | real sd_home; // Standard deviation for home effect 16 | 17 | // priors part 18 | int prior_dist_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 19 | int prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 20 | 21 | real hyper_df; 22 | real hyper_location; 23 | 24 | real hyper_sd_df; 25 | real hyper_sd_location; 26 | real hyper_sd_scale; 27 | } 28 | parameters{ 29 | vector[nteams] att_raw; 30 | vector[nteams] def_raw; 31 | real sigma_att; 32 | real sigma_def; 33 | real home; 34 | real gamma; 35 | } 36 | transformed parameters{ 37 | real adj_h_eff; // Adjusted home effect 38 | vector[nteams] att; // attack parameters 39 | vector[nteams] def; // defence parameters 40 | array[N] vector[2] theta; // exponentiated linear pred. 41 | 42 | for (t in 1:nteams){ 43 | att[t] = att_raw[t]-mean(att_raw); 44 | def[t] = def_raw[t]-mean(def_raw); 45 | } 46 | 47 | adj_h_eff = home * ind_home; 48 | 49 | for (n in 1:N){ 50 | theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+ 51 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 52 | theta[n,2] = exp(att[team2[n]]+def[team1[n]]- 53 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 54 | } 55 | } 56 | model{ 57 | // log-priors for team-specific abilities 58 | for (t in 1:(nteams)){ 59 | if (prior_dist_num == 1){ 60 | target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att); 61 | target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def); 62 | } 63 | else if (prior_dist_num == 2){ 64 | target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att); 65 | target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def); 66 | } 67 | else if (prior_dist_num == 3){ 68 | target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att); 69 | target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def); 70 | } 71 | else if (prior_dist_num == 4){ 72 | target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att); 73 | target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def); 74 | } 75 | } 76 | 77 | 78 | // log-hyperpriors for sd parameters 79 | if (prior_dist_sd_num == 1 ){ 80 | target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 81 | target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 82 | } 83 | else if (prior_dist_sd_num == 2){ 84 | target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 85 | target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 86 | } 87 | else if (prior_dist_sd_num == 3){ 88 | target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 89 | target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 90 | } 91 | else if (prior_dist_sd_num == 4){ 92 | target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 93 | target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 94 | } 95 | 96 | // log-priors fixed effects 97 | target+=normal_lpdf(home|mean_home,sd_home); 98 | target+=normal_lpdf(gamma|0,1); 99 | 100 | // likelihood 101 | 102 | target+=poisson_lpmf(y[,1]| theta[,1]); 103 | target+=poisson_lpmf(y[,2]| theta[,2]); 104 | 105 | } 106 | generated quantities{ 107 | array[N,2] int y_rep; 108 | array[N_prev,2] int y_prev; 109 | array[N_prev] vector[2] theta_prev; 110 | vector[N] log_lik; 111 | array[N] int diff_y_rep; 112 | 113 | //in-sample replications 114 | for (n in 1:N){ 115 | y_rep[n,1] = poisson_rng(theta[n,1]); 116 | y_rep[n,2] = poisson_rng(theta[n,2]); 117 | diff_y_rep[n] = y_rep[n,1] - y_rep[n,2]; 118 | log_lik[n] =poisson_lpmf(y[n,1]| theta[n,1])+ 119 | poisson_lpmf(y[n,2]| theta[n,2]); 120 | } 121 | //out-of-sample predictions 122 | if (N_prev > 0) { 123 | for (n in 1:N_prev){ 124 | theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+def[team2_prev[n]]+ 125 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 126 | theta_prev[n,2] = exp(att[team2_prev[n]]+def[team1_prev[n]]- 127 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 128 | y_prev[n,1] = poisson_rng(theta_prev[n,1]); 129 | y_prev[n,2] = poisson_rng(theta_prev[n,2]); 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/stan/neg_bin.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // number of games 3 | int N_prev; 4 | array[N,2] int y; // observed scores 5 | int nteams; 6 | array[N] int instants_rank; 7 | int ntimes_rank; // number of ranking periods 8 | array[N] int team1; 9 | array[N] int team2; 10 | array[N_prev]int team1_prev; 11 | array[N_prev] int team2_prev; 12 | matrix[ntimes_rank, nteams] ranking; 13 | int ind_home; 14 | real mean_home; // prior mean for home effect 15 | real sd_home; // prior sd for home effect 16 | 17 | // choice of prior distributions for att/def 18 | int prior_dist_num; // 1=normal,2=t,3=cauchy,4=laplace 19 | int prior_dist_sd_num; // same for sd parameters 20 | 21 | real hyper_df; // d.f. for t‑prior on abilities 22 | real hyper_location; // location for ability priors 23 | 24 | real hyper_sd_df; // d.f. for t‑prior on sds 25 | real hyper_sd_location; // location for sd priors 26 | real hyper_sd_scale; // scale for sd priors 27 | } 28 | 29 | parameters { 30 | vector[nteams] att_raw; 31 | vector[nteams] def_raw; 32 | real sigma_att; 33 | real sigma_def; 34 | real home; 35 | real gamma; 36 | 37 | // Negative Binomial dispersion parameters 38 | real phi1; 39 | real phi2; 40 | } 41 | 42 | transformed parameters { 43 | vector[nteams] att = att_raw - mean(att_raw); 44 | vector[nteams] def = def_raw - mean(def_raw); 45 | real adj_h_eff; 46 | array[N] vector[2] theta; 47 | 48 | adj_h_eff = home * ind_home; 49 | 50 | for (n in 1:N) { 51 | theta[n,1] = exp( 52 | adj_h_eff 53 | + att[team1[n]] + def[team2[n]] 54 | + (gamma/2) * (ranking[instants_rank[n], team1[n]] 55 | - ranking[instants_rank[n], team2[n]]) 56 | ); 57 | theta[n,2] = exp( 58 | att[team2[n]] + def[team1[n]] 59 | - (gamma/2) * (ranking[instants_rank[n], team1[n]] 60 | - ranking[instants_rank[n], team2[n]]) 61 | ); 62 | } 63 | } 64 | 65 | model { 66 | // 1) Priors on team abilities (via target+=) 67 | for (t in 1:nteams) { 68 | if (prior_dist_num == 1) { 69 | target += normal_lpdf(att_raw[t] | hyper_location, sigma_att); 70 | target += normal_lpdf(def_raw[t] | hyper_location, sigma_def); 71 | } else if (prior_dist_num == 2) { 72 | target += student_t_lpdf(att_raw[t] | hyper_df, hyper_location, sigma_att); 73 | target += student_t_lpdf(def_raw[t] | hyper_df, hyper_location, sigma_def); 74 | } else if (prior_dist_num == 3) { 75 | target += cauchy_lpdf(att_raw[t] | hyper_location, sigma_att); 76 | target += cauchy_lpdf(def_raw[t] | hyper_location, sigma_def); 77 | } else { 78 | target += double_exponential_lpdf(att_raw[t] | hyper_location, sigma_att); 79 | target += double_exponential_lpdf(def_raw[t] | hyper_location, sigma_def); 80 | } 81 | } 82 | 83 | // 2) Priors on sigma_att, sigma_def 84 | if (prior_dist_sd_num == 1) { 85 | target += normal_lpdf(sigma_att | hyper_sd_location, hyper_sd_scale); 86 | target += normal_lpdf(sigma_def | hyper_sd_location, hyper_sd_scale); 87 | } else if (prior_dist_sd_num == 2) { 88 | target += student_t_lpdf(sigma_att | hyper_sd_df, hyper_sd_location, hyper_sd_scale); 89 | target += student_t_lpdf(sigma_def | hyper_sd_df, hyper_sd_location, hyper_sd_scale); 90 | } else if (prior_dist_sd_num == 3) { 91 | target += cauchy_lpdf(sigma_att | hyper_sd_location, hyper_sd_scale); 92 | target += cauchy_lpdf(sigma_def | hyper_sd_location, hyper_sd_scale); 93 | } else { 94 | target += double_exponential_lpdf(sigma_att | hyper_sd_location, hyper_sd_scale); 95 | target += double_exponential_lpdf(sigma_def | hyper_sd_location, hyper_sd_scale); 96 | } 97 | 98 | // 3) Priors on fixed effects 99 | target += normal_lpdf(home | mean_home, sd_home); 100 | target += normal_lpdf(gamma | 0, 1); 101 | 102 | // 4) Priors on NB dispersion 103 | target += normal_lpdf(phi1 | 0, 5); 104 | target += normal_lpdf(phi2 | 0, 5); 105 | 106 | // 5) Likelihood: marginal NB2 for each margin + shared latent component 107 | for (n in 1:N) { 108 | target += neg_binomial_2_lpmf(y[n,1] | theta[n,1], phi1); 109 | target += neg_binomial_2_lpmf(y[n,2] | theta[n,2], phi2); 110 | } 111 | } 112 | 113 | generated quantities { 114 | array[N,2] int y_rep; 115 | array[N_prev,2] int y_prev; 116 | array[N_prev] vector[2] theta_prev; 117 | vector[N] log_lik; 118 | array[N] int diff_y_rep; 119 | 120 | for (n in 1:N) { 121 | y_rep[n,1] = neg_binomial_2_rng(theta[n,1], phi1); 122 | y_rep[n,2] = neg_binomial_2_rng(theta[n,2], phi2); 123 | diff_y_rep[n] = y_rep[n,1] - y_rep[n,2]; 124 | 125 | log_lik[n] = neg_binomial_2_lpmf(y[n,1] | theta[n,1], phi1) 126 | + neg_binomial_2_lpmf(y[n,2] | theta[n,2], phi2); 127 | } 128 | 129 | //out-of-sample predictions 130 | if (N_prev > 0) { 131 | for (n in 1:N_prev){ 132 | theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+ 133 | def[team2_prev[n]]+ 134 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 135 | theta_prev[n,2] = exp(att[team2_prev[n]]+ 136 | def[team1_prev[n]]- 137 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 138 | y_prev[n,1] = neg_binomial_2_rng(theta_prev[n,1], phi1); 139 | y_prev[n,2] = neg_binomial_2_rng(theta_prev[n,2], phi2); 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /man/mle_foot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mle_foot.R 3 | \name{mle_foot} 4 | \alias{mle_foot} 5 | \title{Fit football models with Maximum Likelihood} 6 | \usage{ 7 | mle_foot( 8 | data, 9 | model, 10 | predict = 0, 11 | maxit = 1000, 12 | method = "BFGS", 13 | interval = "profile", 14 | hessian = FALSE, 15 | sigma_y = 1 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{A data frame containing match data with columns: 20 | \itemize{ 21 | \item \code{periods}: Time point of each observation (integer >= 1). 22 | \item \code{home_team}: Home team's name (character string). 23 | \item \code{away_team}: Away team's name (character string). 24 | \item \code{home_goals}: Goals scored by the home team (integer >= 0). 25 | \item \code{away_goals}: Goals scored by the away team (integer >= 0). 26 | }} 27 | 28 | \item{model}{A character specifying the model to fit. Options are: 29 | \itemize{ 30 | \item \code{"double_pois"}: Double Poisson model. 31 | \item \code{"biv_pois"}: Bivariate Poisson model. 32 | \item \code{"skellam"}: Skellam model. 33 | \item \code{"student_t"}: Student's t model. 34 | }} 35 | 36 | \item{predict}{An integer specifying the number of out-of-sample matches for prediction. If missing, the function fits the model to the entire dataset without making predictions.} 37 | 38 | \item{maxit}{An integer specifying the maximum number of optimizer iterations default is 1000).} 39 | 40 | \item{method}{A character specifying the optimization method. Options are 41 | \itemize{ 42 | \item \code{"Nelder-Mead"}. 43 | \item \code{"BFGS"} (default). 44 | \item \code{"CG"}. 45 | \item \code{"L-BFGS-B"}. 46 | \item \code{"SANN"}. 47 | \item \code{"Brent"}. 48 | } 49 | For further details see \code{{optim}} function in \code{\link[stats]{stats}} package.} 50 | 51 | \item{interval}{A character specifying the interval type for confidence intervals. Options are 52 | \itemize{ 53 | \item \code{"profile"} (default). 54 | \item \code{"Wald"}. 55 | }} 56 | 57 | \item{hessian}{A logical value indicating to include the computation of the Hessian (default FALSE).} 58 | 59 | \item{sigma_y}{A positive numeric value indicating the scale parameter for Student t likelihood (default 1).} 60 | } 61 | \value{ 62 | A named list containing: 63 | \itemize{ 64 | \item{\code{att}}: A matrix of attack ratings, with MLE and 95\% confidence intervals (for \code{"double_pois"}, \code{"biv_pois"} and \code{"skellam"} models). 65 | \item{\code{def}}: A matrix of defence ratings, with MLE and 95\% confidence intervals (for \code{"double_pois"}, \code{"biv_pois"} and \code{"skellam"} models). 66 | \item{\code{abilities}}: A matrix of combined ability, with MLE and 95\% confidence intervals (for \code{"student_t"} only). 67 | \item{\code{home_effect}}: A matrix with with MLE and 95\% confidence intervals for the home effect estimate. 68 | \item{\code{corr}}: A matrix with MLE and 95\% confidence intervals for the bivariate Poisson correlation parameter (for \code{"biv_pois"} only). 69 | \item{\code{model}}: The name of the fitted model (character). 70 | \item{\code{predict}}: The number of out-of-sample matches used for prediction (integer). 71 | \item{\code{sigma_y}}: The scale parameter used in the Student t likelihood (for \code{"student_t"} only). 72 | \item{\code{team1_prev}}: Integer indices of home teams in the out-of-sample matches (if \code{predict > 0}). 73 | \item{\code{team2_prev}}: Integer indices of away teams in the out-of-sample matches (if \code{predict > 0}). 74 | \item{\code{logLik}}: The maximized log likelihood (numeric). 75 | \item{\code{aic}}: Akaike Information Criterion (numeric). 76 | \item{\code{bic}}: Bayesian Information Criterion (numeric). 77 | } 78 | } 79 | \description{ 80 | Fits football goal-based models using maximum likelihood estimation. 81 | Supported models include: double Poisson, bivariate Poisson, Skellam, and Student's t. 82 | } 83 | \details{ 84 | MLE can be obtained only for static models, with no time-dependence. 85 | Likelihood optimization is performed via the \code{BFGS} method 86 | of the \code{{optim}} function in \code{\link[stats]{stats}} package. 87 | } 88 | \examples{ 89 | \dontrun{ 90 | library(dplyr) 91 | 92 | data("italy") 93 | italy <- as_tibble(italy) 94 | italy_2000_2002 <- italy \%>\% 95 | dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\% 96 | dplyr::filter(Season == "2000" | Season == "2001" | Season == "2002") 97 | 98 | colnames(italy_2000_2002) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 99 | 100 | mle_fit <- mle_foot( 101 | data = italy_2000_2002, 102 | model = "double_pois" 103 | ) 104 | } 105 | 106 | } 107 | \references{ 108 | Baio, G. and Blangiardo, M. (2010). Bayesian hierarchical model for the prediction of football 109 | results. Journal of Applied Statistics 37(2), 253-264. 110 | 111 | Egidi, L., Pauli, F., and Torelli, N. (2018). Combining historical data 112 | and bookmakers' odds in modelling football scores. Statistical Modelling, 18(5-6), 436-459. 113 | 114 | Gelman, A. (2014). Stan goes to the World Cup. From 115 | "Statistical Modeling, Causal Inference, and Social Science" blog. 116 | 117 | Karlis, D. and Ntzoufras, I. (2003). Analysis of sports data by using bivariate poisson models. 118 | Journal of the Royal Statistical Society: Series D (The Statistician) 52(3), 381-393. 119 | 120 | Karlis, D. and Ntzoufras,I. (2009). Bayesian modelling of football outcomes: Using 121 | the Skellam's distribution for the goal difference. IMA Journal of Management Mathematics 20(2), 133-145. 122 | 123 | Owen, A. (2011). Dynamic Bayesian forecasting models 124 | of football match outcomes with estimation of the 125 | evolution variance parameter. IMA Journal of Management Mathematics, 22(2), 99-113. 126 | } 127 | \author{ 128 | Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 129 | } 130 | -------------------------------------------------------------------------------- /src/stan/skellam.stan: -------------------------------------------------------------------------------- 1 | functions{ 2 | real skellam_lpmf(int k, real lambda1, real lambda2) { 3 | //real r = k; 4 | return -(lambda1 + lambda2) + (k/2) * log(lambda1/lambda2) + 5 | log_modified_bessel_first_kind(abs(k), 2 * sqrt(lambda1 * lambda2)); 6 | } 7 | } 8 | data{ 9 | int N; 10 | int N_prev; 11 | array[N] int diff_y; 12 | int nteams; 13 | array[N] int team1; 14 | array[N] int team2; 15 | array[N_prev] int team1_prev; 16 | array[N_prev] int team2_prev; 17 | array[N] int instants_rank; 18 | int ntimes_rank; // dynamic periods for ranking 19 | matrix[ntimes_rank,nteams] ranking; // eventual fifa/uefa ranking 20 | int ind_home; 21 | real mean_home; // Mean for home effect 22 | real sd_home; // Standard deviation for home effect 23 | 24 | // priors part 25 | int prior_dist_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 26 | int prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 27 | 28 | real hyper_df; 29 | real hyper_location; 30 | 31 | real hyper_sd_df; 32 | real hyper_sd_location; 33 | real hyper_sd_scale; 34 | } 35 | parameters{ 36 | vector[nteams] att_raw; 37 | vector[nteams] def_raw; 38 | real sigma_att; 39 | real sigma_def; 40 | real home; 41 | real gamma; 42 | } 43 | transformed parameters{ 44 | real adj_h_eff; // Adjusted home effect 45 | vector[nteams] att; 46 | vector[nteams] def; 47 | array[N,2] real theta; 48 | 49 | for (t in 1:nteams){ 50 | att[t] = att_raw[t]-mean(att_raw); 51 | def[t] = def_raw[t]-mean(def_raw); 52 | } 53 | 54 | adj_h_eff = home * ind_home; 55 | 56 | for (n in 1:N){ 57 | theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+ 58 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 59 | theta[n,2] = exp(att[team2[n]]+def[team1[n]]- 60 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 61 | } 62 | } 63 | model{ 64 | // log-priors for team-specific abilities 65 | for (t in 1:(nteams)){ 66 | if (prior_dist_num == 1){ 67 | target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att); 68 | target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def); 69 | } 70 | else if (prior_dist_num == 2){ 71 | target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att); 72 | target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def); 73 | } 74 | else if (prior_dist_num == 3){ 75 | target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att); 76 | target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def); 77 | } 78 | else if (prior_dist_num == 4){ 79 | target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att); 80 | target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def); 81 | } 82 | } 83 | 84 | 85 | // log-hyperpriors for sd parameters 86 | if (prior_dist_sd_num == 1 ){ 87 | target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 88 | target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 89 | } 90 | else if (prior_dist_sd_num == 2){ 91 | target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 92 | target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 93 | } 94 | else if (prior_dist_sd_num == 3){ 95 | target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 96 | target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 97 | } 98 | else if (prior_dist_sd_num == 4){ 99 | target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 100 | target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 101 | } 102 | 103 | // log-priors fixed effects 104 | target+=normal_lpdf(home|mean_home,sd_home); 105 | target+=normal_lpdf(gamma|0,1); 106 | 107 | 108 | // likelihood 109 | for (n in 1:N){ 110 | target+=skellam_lpmf(diff_y[n]| theta[n,1],theta[n,2]); 111 | } 112 | } 113 | generated quantities{ 114 | array[N,2] int y_rep; 115 | array[N] int diff_y_rep; 116 | vector[N] log_lik; 117 | array[N_prev,2] int y_prev; 118 | array[N_prev] vector[2] theta_prev; 119 | array[N_prev] int diff_y_prev; 120 | 121 | //in-sample replications 122 | for (n in 1:N){ 123 | y_rep[n,1] = poisson_rng(theta[n,1]); 124 | y_rep[n,2] = poisson_rng(theta[n,2]); 125 | diff_y_rep[n] = y_rep[n,1] - y_rep[n,2]; 126 | log_lik[n] =skellam_lpmf(diff_y[n]| theta[n,1], theta[n,2]); 127 | } 128 | //out-of-sample predictions 129 | if (N_prev > 0) { 130 | for (n in 1:N_prev){ 131 | theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+ 132 | def[team2_prev[n]]+ 133 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 134 | theta_prev[n,2] = exp(att[team2_prev[n]]+ 135 | def[team1_prev[n]]- 136 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 137 | y_prev[n,1] = poisson_rng(theta_prev[n,1]); 138 | y_prev[n,2] = poisson_rng(theta_prev[n,2]); 139 | diff_y_prev[n] = y_prev[n,1] - y_prev[n,2]; 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /man/btd_foot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/btd_foot.R 3 | \name{btd_foot} 4 | \alias{btd_foot} 5 | \title{Bayesian Bradley-Terry-Davidson Model} 6 | \usage{ 7 | btd_foot( 8 | data, 9 | dynamic_rank = FALSE, 10 | home_effect = FALSE, 11 | prior_par = list(logStrength = normal(0, 3), logTie = normal(0, 0.3), home = normal(0, 12 | 5)), 13 | rank_measure = "median", 14 | method = "MCMC", 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{A data frame containing the observations with columns: 20 | \itemize{ 21 | \item \code{periods}: Time point of each observation (integer >= 1). 22 | \item \code{home_team}: Home team's name (character string). 23 | \item \code{away_team}: Away team's name (character string). 24 | \item \code{match_outcome}: Outcome (1 if home team beats away team, 2 for tie, and 3 if away team beats home team). 25 | } 26 | The data frame must not contain missing values.} 27 | 28 | \item{dynamic_rank}{A logical value indicating whether a dynamic ranking model is used (default is \code{FALSE}).} 29 | 30 | \item{home_effect}{A logical value indicating the inclusion of a home effect in the model. (default is \code{FALSE}).} 31 | 32 | \item{prior_par}{A list specifying the prior distributions for the parameters of interest, using the \code{normal} function: 33 | \itemize{ 34 | \item \code{logStrength}: Prior for the team log-strengths. Default is \code{normal(0, 3)}. 35 | \item \code{logTie}: Prior for the tie parameter. Default is \code{normal(0, 0.3)}. 36 | \item \code{home}: Prior for the home effect (\code{home}). Applicable only if \code{home_effect = TRUE}. Default is \code{normal(0, 5)}. 37 | } 38 | Only normal priors are allowed for this model.} 39 | 40 | \item{rank_measure}{A character string specifying the method used to summarize the posterior distributions of the team strengths. Options are: 41 | \itemize{ 42 | \item \code{"median"}: Uses the median of the posterior samples (default). 43 | \item \code{"mean"}: Uses the mean of the posterior samples. 44 | \item \code{"map"}: Uses the Maximum A Posteriori estimate, calculated as the mode of the posterior distribution. 45 | }} 46 | 47 | \item{method}{A character string specifying the method used to obtain the Bayesian estimates. Options are: 48 | \itemize{ 49 | \item \code{"MCMC"}: Markov chain Monte Carlo algorithm (default). 50 | \item \code{"VI"}: Automatic differentiation variational inference algorithms. 51 | \item \code{"pathfinder"}: Pathfinder variational inference algorithm. 52 | \item \code{"laplace"}: Laplace algorithm. 53 | }} 54 | 55 | \item{...}{Additional arguments passed to \code{\link[cmdstanr]{cmdstanr}} (e.g., \code{iter_sampling}, \code{chains}, \code{parallel_chains}).} 56 | } 57 | \value{ 58 | An object of class \code{"btdFoot"}, which is a list containing: 59 | \itemize{ 60 | \item \code{fit}: The fitted \code{CmdStanFit} object returned by \code{\link[cmdstanr]{cmdstanr}}. 61 | \item \code{rank}: A data frame with the rankings, including columns: 62 | \itemize{ 63 | \item \code{periods}: The time period. 64 | \item \code{team}: The team name. 65 | \item \code{rank_points}: The estimated strength of the team based on the chosen \code{rank_measure}. 66 | } 67 | \item \code{data}: The input data. 68 | \item \code{stan_data}: The data list passed to Stan. 69 | \item \code{stan_code}: The Stan code of the underline model. 70 | \item \code{stan_args}: The optional \code{\link[cmdstanr]{cmdstanr}} parameters passed to (\code{...}). 71 | \item \code{rank_measure}: The summary statistic used to compute the rankings. 72 | \item \code{alg_method}: The inference algorithm used to obtain the Bayesian estimates. 73 | } 74 | } 75 | \description{ 76 | Fits a Bayesian Bradley-Terry-Davidson model using Stan. Supports both static and dynamic ranking models, allowing for the estimation of team strengths over time. 77 | } 78 | \examples{ 79 | \dontrun{ 80 | if (instantiate::stan_cmdstan_exists()) { 81 | library(dplyr) 82 | 83 | data("italy") 84 | 85 | italy_2020_2021 <- italy \%>\% 86 | dplyr::select(Season, home, visitor, hgoal, vgoal) \%>\% 87 | dplyr::filter(Season == "2020" | Season == "2021") \%>\% 88 | dplyr::mutate(match_outcome = dplyr::case_when( 89 | hgoal > vgoal ~ 1, # Home team wins 90 | hgoal == vgoal ~ 2, # Draw 91 | hgoal < vgoal ~ 3 # Away team wins 92 | )) \%>\% 93 | dplyr::mutate(periods = dplyr::case_when( 94 | dplyr::row_number() <= 190 ~ 1, 95 | dplyr::row_number() <= 380 ~ 2, 96 | dplyr::row_number() <= 570 ~ 3, 97 | TRUE ~ 4 98 | )) \%>\% # Assign periods based on match number 99 | dplyr::select(periods, 100 | home_team = home, 101 | away_team = visitor, match_outcome 102 | ) 103 | 104 | # Dynamic Ranking Example with Median Rank Measure 105 | fit_result_dyn <- btd_foot( 106 | data = italy_2020_2021, 107 | dynamic_rank = TRUE, 108 | home_effect = TRUE, 109 | prior_par = list( 110 | logStrength = normal(0, 10), 111 | logTie = normal(0, 5), 112 | home = normal(0, 5) 113 | ), 114 | rank_measure = "median", 115 | iter_sampling = 1000, 116 | parallel_chains = 2, 117 | chains = 2 118 | ) 119 | 120 | print(fit_result_dyn) 121 | 122 | print(fit_result_dyn, pars = c("logStrength", "home"), teams = c("AC Milan", "AS Roma")) 123 | 124 | # Static Ranking Example with MAP Rank Measure 125 | fit_result_stat <- btd_foot( 126 | data = italy_2020_2021, 127 | dynamic_rank = FALSE, 128 | prior_par = list( 129 | logStrength = normal(0, 10), 130 | logTie = normal(0, 5), 131 | home = normal(0, 5) 132 | ), 133 | rank_measure = "map", 134 | iter_sampling = 1000, 135 | parallel_chains = 2, 136 | chains = 2 137 | ) 138 | 139 | print(fit_result_stat) 140 | } 141 | } 142 | } 143 | \author{ 144 | Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it}. 145 | } 146 | -------------------------------------------------------------------------------- /R/foot_priors.R: -------------------------------------------------------------------------------- 1 | #' Football priors distributions and options 2 | #' 3 | #' @name priors 4 | #' @description This prior specification is just a duplicate 5 | #' of some of the priors used by the \pkg{rstanarm} package. 6 | #' 7 | #' These prior distributions can be passed to the 8 | #' \code{stan_foot} function, through the arguments \code{prior} and \code{prior_sd}. 9 | #' See the vignette \href{http://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior 10 | #' Distributions for rstanarm Models}} for further details (to view the priors used for an existing model see 11 | #' \href{https://mc-stan.org/rstanarm/reference/prior_summary.stanreg.html}{prior_summary}). 12 | #' The default priors used in the \pkg{stan_foot} modeling function 13 | #' are intended to be \emph{weakly informative} in that they provide moderate 14 | #' regularlization and help stabilize computation. 15 | #' 16 | #' You can choose between: \code{normal}, \code{cauchy}, \code{laplace}, \code{student_t}. 17 | #' 18 | #' @param location Prior location. In most cases, this is the prior mean, but 19 | #' for \code{cauchy} (which is equivalent to \code{student_t} with 20 | #' \code{df=1}), the mean does not exist and \code{location} is the prior 21 | #' median. The default value is \eqn{0}. 22 | #' @param scale Prior scale. The default depends on the family (see 23 | #' \strong{Details}). 24 | #' @param df Prior degrees of freedom. The default is \eqn{1} for 25 | #' \code{student_t}, in which case it is equivalent to \code{cauchy}. 26 | #' @param autoscale A logical scalar, defaulting to \code{TRUE}. 27 | #' 28 | #' @details The details depend on the family of the prior being used: 29 | #' \subsection{Student t family}{ 30 | #' Family members: 31 | #' \itemize{ 32 | #' \item \code{normal(location, scale)} 33 | #' \item \code{student_t(df, location, scale)} 34 | #' \item \code{cauchy(location, scale)} 35 | #' } 36 | #' Each of these functions also takes an argument \code{autoscale}. 37 | #' 38 | #' For the prior distribution for the intercept, \code{location}, 39 | #' \code{scale}, and \code{df} should be scalars. For the prior for the other 40 | #' coefficients they can either be vectors of length equal to the number of 41 | #' coefficients (not including the intercept), or they can be scalars, in 42 | #' which case they will be recycled to the appropriate length. As the 43 | #' degrees of freedom approaches infinity, the Student t distribution 44 | #' approaches the normal distribution and if the degrees of freedom are one, 45 | #' then the Student t distribution is the Cauchy distribution. 46 | #' 47 | #' If \code{scale} is not specified it will default to \eqn{10} for the 48 | #' intercept and \eqn{2.5} for the other coefficients. 49 | #' 50 | #' If the \code{autoscale} argument is \code{TRUE} (the default), then the 51 | #' scales will be further adjusted as described above in the documentation of 52 | #' the \code{autoscale} argument in the \strong{Arguments} section. 53 | #' } 54 | #' 55 | #' \subsection{Laplace family}{ 56 | #' Family members: 57 | #' \itemize{ 58 | #' \item \code{laplace(location, scale)} 59 | #' } 60 | #' Each of these functions also takes an argument \code{autoscale}. 61 | #' 62 | #' The Laplace distribution is also known as the double-exponential 63 | #' distribution. It is a symmetric distribution with a sharp peak at its mean 64 | #' / median / mode and fairly long tails. This distribution can be motivated 65 | #' as a scale mixture of normal distributions and the remarks above about the 66 | #' normal distribution apply here as well. 67 | #' 68 | #' } 69 | #' 70 | #' @return A named list to be used internally by the 71 | #' \code{stan_foot} model fitting function. 72 | #' 73 | #' @seealso The various vignettes for the \pkg{rstanarm} package also discuss 74 | #' and demonstrate the use of some of the supported prior distributions. 75 | #' 76 | #' @author Leonardo Egidi \email{legidi@units.it} 77 | #' 78 | #' @references 79 | #' Gelman, A., Jakulin, A., Pittau, M. G., and Su, Y. (2008). A weakly 80 | #' informative default prior distribution for logistic and other regression 81 | #' models. \emph{Annals of Applied Statistics}. 2(4), 1360--1383. 82 | #' 83 | #' 84 | #' @rdname priors 85 | #' @export 86 | #' 87 | #' 88 | 89 | normal <- function(location = 0, scale = NULL, autoscale = TRUE) { 90 | validate_parameter_value(scale) 91 | list( 92 | dist = "normal", df = NA, 93 | location = location, 94 | scale = scale, 95 | autoscale = autoscale 96 | ) 97 | } 98 | 99 | #' @rdname priors 100 | #' @export 101 | student_t <- function(df = 1, location = 0, scale = NULL, autoscale = TRUE) { 102 | validate_parameter_value(scale) 103 | validate_parameter_value(df) 104 | list( 105 | dist = "t", df = df, 106 | location = location, 107 | scale = scale, 108 | autoscale = autoscale 109 | ) 110 | } 111 | 112 | #' @rdname priors 113 | #' @export 114 | cauchy <- function(location = 0, scale = NULL, autoscale = TRUE) { 115 | student_t( 116 | df = 1, location = location, 117 | scale = scale, 118 | autoscale = autoscale 119 | ) 120 | } 121 | 122 | #' @rdname priors 123 | #' @export 124 | laplace <- function(location = 0, scale = NULL, autoscale = TRUE) { 125 | list( 126 | dist = "laplace", df = NA, 127 | location = location, 128 | scale = scale, 129 | autoscale = autoscale 130 | ) 131 | } 132 | 133 | 134 | # internal ---------------------------------------------------------------- 135 | 136 | # Check for positive scale or df parameter (NULL ok) 137 | # 138 | # @param x The value to check. 139 | # @return Either an error is thrown or \code{TRUE} is returned invisibly. 140 | validate_parameter_value <- function(x) { 141 | nm <- deparse(substitute(x)) 142 | if (!is.null(x)) { 143 | if (!is.numeric(x)) { 144 | stop(nm, " should be NULL or numeric", call. = FALSE) 145 | } 146 | if (any(x <= 0)) { 147 | stop(nm, " should be positive", call. = FALSE) 148 | } 149 | } 150 | invisible(TRUE) 151 | } 152 | 153 | # An nlist object is an S3 class list of uniquely 154 | # named numeric elements. 155 | 156 | nlist <- function(...) { 157 | args <- list(...) 158 | if (length(args)) { 159 | return(as_nlist(args)) 160 | } 161 | structure(list(), .Names = character(0), class = "nlist") 162 | } 163 | -------------------------------------------------------------------------------- /tests/testthat/test_pp_foot.R: -------------------------------------------------------------------------------- 1 | ## all the test PASSED (also the skipped ones!) 2 | 3 | # ____________________________________________________________________________ 4 | # Test for aggregated type #### 5 | 6 | test_that("pp_foot returns proper output for aggregated type", { 7 | skip_on_cran() 8 | skip_if_not(stan_cmdstan_exists()) 9 | 10 | ## ............................................................................ 11 | ## Data #### 12 | 13 | data("england") 14 | england <- as.data.frame(england) 15 | 16 | # One season only 17 | england_2004 <- england %>% 18 | dplyr::filter(division == 1) %>% 19 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 20 | dplyr::filter(Season == "2004") 21 | 22 | colnames(england_2004) <- c( 23 | "periods", "home_team", "away_team", 24 | "home_goals", "away_goals" 25 | ) 26 | 27 | ## ............................................................................ 28 | ## Tests #### 29 | 30 | model <- stan_foot( 31 | data = england_2004, 32 | model = "double_pois", 33 | iter_sampling = 200, 34 | chains = 2, 35 | seed = 433 36 | ) 37 | 38 | result <- pp_foot(object = model, data = england_2004, type = "aggregated") 39 | 40 | # Expect output to be a list with components "pp_plot" and "pp_table" 41 | expect_type(result, "list") 42 | expect_true("pp_plot" %in% names(result)) 43 | expect_true("pp_table" %in% names(result)) 44 | 45 | # Check that the plot is a ggplot object and table is a data frame 46 | expect_s3_class(result$pp_plot, "ggplot") 47 | expect_true(is.data.frame(result$pp_table)) 48 | 49 | # Check that the table has the expected column names 50 | expect_equal(names(result$pp_table), c("goal diff.", "Bayesian p-value")) 51 | }) 52 | 53 | 54 | 55 | # ____________________________________________________________________________ 56 | # Test for matches type #### 57 | 58 | test_that("pp_foot returns proper output for matches type", { 59 | skip_on_cran() 60 | skip_if_not(stan_cmdstan_exists()) 61 | 62 | ## ............................................................................ 63 | ## Data #### 64 | 65 | data("england") 66 | england <- as.data.frame(england) 67 | 68 | # One season only 69 | england_2004 <- england %>% 70 | dplyr::filter(division == 1) %>% 71 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 72 | dplyr::filter(Season == "2004") 73 | 74 | colnames(england_2004) <- c( 75 | "periods", "home_team", "away_team", 76 | "home_goals", "away_goals" 77 | ) 78 | 79 | ## ............................................................................ 80 | ## Tests #### 81 | 82 | model <- stan_foot( 83 | data = england_2004, 84 | model = "double_pois", 85 | iter_sampling = 200, 86 | chains = 2, 87 | seed = 433 88 | ) 89 | 90 | result <- pp_foot(object = model, data = england_2004, type = "matches", coverage = 0.95) 91 | 92 | expect_type(result, "list") 93 | expect_true("pp_plot" %in% names(result)) 94 | expect_true("pp_table" %in% names(result)) 95 | expect_s3_class(result$pp_plot, "ggplot") 96 | expect_true(is.data.frame(result$pp_table)) 97 | 98 | # The table for matches should have columns "1-alpha" and "emp. coverage" 99 | expect_equal(names(result$pp_table), c("1-alpha", "emp. coverage")) 100 | }) 101 | 102 | 103 | 104 | # ____________________________________________________________________________ 105 | # Test for missing required columns in data #### 106 | 107 | 108 | test_that("pp_foot errors if required columns are missing", { 109 | skip_on_cran() 110 | skip_if_not(stan_cmdstan_exists()) 111 | 112 | ## ............................................................................ 113 | ## Data #### 114 | 115 | data("england") 116 | england <- as.data.frame(england) 117 | 118 | # One season only 119 | england_2004 <- england %>% 120 | dplyr::filter(division == 1) %>% 121 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 122 | dplyr::filter(Season == "2004") 123 | 124 | colnames(england_2004) <- c( 125 | "periods", "home_team", "away_team", 126 | "home_goals", "away_goals" 127 | ) 128 | 129 | ## ............................................................................ 130 | ## Tests #### 131 | 132 | model <- stan_foot( 133 | data = england_2004, 134 | model = "double_pois", 135 | iter_sampling = 200, 136 | chains = 2, 137 | seed = 433 138 | ) 139 | # Remove the 'periods' column 140 | data_missing <- england_2004[, -1] 141 | expect_error(pp_foot(object = model, data = data_missing), 142 | "data is missing required columns:") 143 | }) 144 | 145 | 146 | # ____________________________________________________________________________ 147 | # Tests on input and different models #### 148 | 149 | 150 | test_that("expect error in input", { 151 | skip_on_cran() 152 | skip_if_not(stan_cmdstan_exists()) 153 | 154 | ## ............................................................................ 155 | ## Data #### 156 | 157 | data("england") 158 | england <- as.data.frame(england) 159 | 160 | # One season only 161 | england_2004 <- england %>% 162 | dplyr::filter(division == 1) %>% 163 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 164 | dplyr::filter(Season == "2004") 165 | 166 | colnames(england_2004) <- c( 167 | "periods", "home_team", "away_team", 168 | "home_goals", "away_goals" 169 | ) 170 | 171 | ## ............................................................................ 172 | ## Tests #### 173 | 174 | # Wrong model class 175 | model <- mle_foot(england_2004, "double_pois") 176 | expect_error(pp_foot(object = model, data = england_2004)) 177 | 178 | # CmdStan model class 179 | model <- stan_foot( 180 | data = england_2004, 181 | model = "student_t", 182 | iter_sampling = 200, 183 | chains = 2, 184 | seed = 433 185 | ) 186 | expect_error(pp_foot(object = model$fit, data = england_2004), NA) 187 | }) 188 | 189 | -------------------------------------------------------------------------------- /src/stan/biv_pois.stan: -------------------------------------------------------------------------------- 1 | functions{ 2 | 3 | real bipois_lpmf(array[] int r , real mu1,real mu2,real mu3) { 4 | real ss; 5 | real log_s; 6 | real mus; 7 | int miny; 8 | 9 | miny = min(r[1], r[2]); 10 | 11 | ss = poisson_lpmf(r[1] | mu1) + poisson_lpmf(r[2] | mu2) - 12 | exp(mu3); 13 | if(miny > 0) { 14 | mus = -mu1-mu2+mu3; 15 | log_s = ss; 16 | 17 | for(k in 1:miny) { 18 | log_s = log_s + log(r[1] - k + 1) + mus 19 | + log(r[2] - k + 1) 20 | - log(k); 21 | ss = log_sum_exp(ss, log_s); 22 | } 23 | } 24 | return(ss); 25 | } 26 | 27 | } 28 | data{ 29 | int N; // number of games 30 | int N_prev; 31 | array[N,2] int y; 32 | int nteams; 33 | array[N] int instants_rank; 34 | int ntimes_rank; // dynamic periods for ranking 35 | array[N] int team1; 36 | array[N] int team2; 37 | array[N_prev]int team1_prev; 38 | array[N_prev] int team2_prev; 39 | matrix[ntimes_rank,nteams] ranking; 40 | int ind_home; 41 | real mean_home; // Mean for home effect 42 | real sd_home; // Standard deviation for home effect 43 | 44 | // priors part 45 | int prior_dist_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 46 | int prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 47 | 48 | real hyper_df; 49 | real hyper_location; 50 | 51 | real hyper_sd_df; 52 | real hyper_sd_location; 53 | real hyper_sd_scale; 54 | } 55 | parameters{ 56 | vector[nteams] att_raw; 57 | vector[nteams] def_raw; 58 | real sigma_att; 59 | real sigma_def; 60 | real home; 61 | real rho; 62 | real gamma; 63 | } 64 | transformed parameters{ 65 | real adj_h_eff; // Adjusted home effect 66 | vector[nteams] att; 67 | vector[nteams] def; 68 | array[N] vector[3] theta; 69 | 70 | for (t in 1:nteams){ 71 | att[t] = att_raw[t]-mean(att_raw); 72 | def[t] = def_raw[t]-mean(def_raw); 73 | } 74 | 75 | adj_h_eff = home * ind_home; 76 | 77 | for (n in 1:N){ 78 | theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+ 79 | (gamma/2)*(ranking[instants_rank[n], team1[n]]-ranking[instants_rank[n], team2[n]])); 80 | theta[n,2] = exp(att[team2[n]]+def[team1[n]]- 81 | (gamma/2)*(ranking[instants_rank[n], team1[n]]-ranking[instants_rank[n], team2[n]])); 82 | theta[n,3] = exp(rho); 83 | } 84 | } 85 | model{ 86 | // log-priors for team-specific abilities 87 | for (t in 1:(nteams)){ 88 | if (prior_dist_num == 1){ 89 | target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att); 90 | target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def); 91 | } 92 | else if (prior_dist_num == 2){ 93 | target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att); 94 | target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def); 95 | } 96 | else if (prior_dist_num == 3){ 97 | target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att); 98 | target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def); 99 | } 100 | else if (prior_dist_num == 4){ 101 | target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att); 102 | target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def); 103 | } 104 | } 105 | 106 | 107 | // log-hyperpriors for sd parameters 108 | if (prior_dist_sd_num == 1 ){ 109 | target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 110 | target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 111 | } 112 | else if (prior_dist_sd_num == 2){ 113 | target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 114 | target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 115 | } 116 | else if (prior_dist_sd_num == 3){ 117 | target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 118 | target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 119 | } 120 | else if (prior_dist_sd_num == 4){ 121 | target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 122 | target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 123 | } 124 | 125 | // log-priors fixed effects 126 | target+=normal_lpdf(home|mean_home,sd_home); 127 | target+=normal_lpdf(rho|0,1); 128 | target+=normal_lpdf(gamma|0,1); 129 | 130 | // likelihood 131 | for (n in 1:N){ 132 | //target+=bipois_lpmf(y[n,]| theta[n,1], 133 | // theta[n,2], theta[n,3]); 134 | target+=poisson_lpmf(y[n,1]| theta[n,1]+theta[n,3]); 135 | target+=poisson_lpmf(y[n,2]| theta[n,2]+theta[n,3]); 136 | } 137 | } 138 | generated quantities{ 139 | array[N,2]int y_rep; 140 | array[N_prev,2] int y_prev; 141 | array[N_prev] vector[3] theta_prev; 142 | vector[N] log_lik; 143 | array[N] int diff_y_rep; 144 | 145 | //in-sample replications 146 | for (n in 1:N){ 147 | y_rep[n,1] = poisson_rng(theta[n,1]+theta[n,3]); 148 | y_rep[n,2] = poisson_rng(theta[n,2]+theta[n,3]); 149 | diff_y_rep[n] = y_rep[n,1] - y_rep[n,2]; 150 | log_lik[n] = poisson_lpmf(y[n,1]| theta[n,1]+theta[n,3])+ 151 | poisson_lpmf(y[n,2]| theta[n,2]+theta[n,3]); 152 | //bipois_lpmf(y[n,]| theta[n,1], 153 | // theta[n,2], theta[n,3]); 154 | } 155 | 156 | //out-of-sample predictions 157 | if (N_prev > 0) { 158 | for (n in 1:N_prev){ 159 | theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+ 160 | def[team2_prev[n]]+ 161 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 162 | theta_prev[n,2] = exp(att[team2_prev[n]]+ 163 | def[team1_prev[n]]- 164 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 165 | theta_prev[n,3] = exp(rho); 166 | y_prev[n,1] = poisson_rng(theta_prev[n,1]+theta_prev[n,3]); 167 | y_prev[n,2] = poisson_rng(theta_prev[n,2]+theta_prev[n,3]); 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/stan/zero_infl_skellam.stan: -------------------------------------------------------------------------------- 1 | functions{ 2 | real skellam_lpmf(int k, real lambda1, real lambda2) { 3 | //real r = k; 4 | return -(lambda1 + lambda2) + (k/2) * log(lambda1/lambda2) + 5 | log_modified_bessel_first_kind(abs(k), 2 * sqrt(lambda1 * lambda2)); 6 | } 7 | 8 | real zero_infl_skellam_lpmf(int k, real lambda1, real lambda2, real p) { 9 | // This way is the easiest and proposed by https://github.com/Torvaney/karlis-ntzoufras-reproduction. 10 | // However, within model block, we propose in a comment the alternative way that Stan proposes in their documentation for zero inflated models 11 | real base_prob; 12 | real prob; 13 | real log_prob; 14 | 15 | base_prob = exp(skellam_lpmf(k| lambda1,lambda2)); 16 | 17 | if (k== 0) 18 | prob = p + (1 - p) * base_prob; 19 | else 20 | prob = (1 - p) * base_prob; 21 | 22 | log_prob = log(prob); 23 | 24 | return log_prob; 25 | } 26 | 27 | } 28 | data{ 29 | int N; 30 | int N_prev; 31 | array[N] int diff_y; 32 | int nteams; 33 | array[N] int team1; 34 | array[N] int team2; 35 | array[N_prev] int team1_prev; 36 | array[N_prev] int team2_prev; 37 | array[N] int instants_rank; 38 | int ntimes_rank; // dynamic periods for ranking 39 | matrix[ntimes_rank,nteams] ranking; // eventual fifa/uefa ranking 40 | int ind_home; 41 | real mean_home; // Mean for home effect 42 | real sd_home; // Standard deviation for home effect 43 | 44 | // priors part 45 | int prior_dist_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 46 | int prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 47 | 48 | real hyper_df; 49 | real hyper_location; 50 | 51 | real hyper_sd_df; 52 | real hyper_sd_location; 53 | real hyper_sd_scale; 54 | } 55 | parameters{ 56 | vector[nteams] att_raw; 57 | vector[nteams] def_raw; 58 | real sigma_att; 59 | real sigma_def; 60 | real home; 61 | real gamma; 62 | real prob_of_draws;// excessive probability of draws 63 | 64 | } 65 | transformed parameters{ 66 | real adj_h_eff; // Adjusted home effect 67 | vector[nteams] att; 68 | vector[nteams] def; 69 | array[N,2] real theta; 70 | 71 | for (t in 1:nteams){ 72 | att[t] = att_raw[t]-mean(att_raw); 73 | def[t] = def_raw[t]-mean(def_raw); 74 | } 75 | 76 | adj_h_eff = home * ind_home; 77 | 78 | for (n in 1:N){ 79 | theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+ 80 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 81 | theta[n,2] = exp(att[team2[n]]+def[team1[n]]- 82 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 83 | } 84 | } 85 | model{ 86 | // log-priors for team-specific abilities 87 | for (t in 1:(nteams)){ 88 | if (prior_dist_num == 1){ 89 | target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att); 90 | target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def); 91 | } 92 | else if (prior_dist_num == 2){ 93 | target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att); 94 | target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def); 95 | } 96 | else if (prior_dist_num == 3){ 97 | target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att); 98 | target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def); 99 | } 100 | else if (prior_dist_num == 4){ 101 | target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att); 102 | target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def); 103 | } 104 | } 105 | 106 | 107 | // log-hyperpriors for sd parameters 108 | if (prior_dist_sd_num == 1 ){ 109 | target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 110 | target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 111 | } 112 | else if (prior_dist_sd_num == 2){ 113 | target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 114 | target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 115 | } 116 | else if (prior_dist_sd_num == 3){ 117 | target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 118 | target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 119 | } 120 | else if (prior_dist_sd_num == 4){ 121 | target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 122 | target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 123 | } 124 | 125 | // log-priors fixed effects 126 | target+=normal_lpdf(home|mean_home,sd_home); 127 | target+=normal_lpdf(gamma|0,1); 128 | target+=uniform_lpdf(prob_of_draws|0,1); 129 | 130 | 131 | // likelihood 132 | for (n in 1:N){ 133 | target+=zero_infl_skellam_lpmf(diff_y[n]| theta[n,1],theta[n,2], 134 | prob_of_draws); 135 | } 136 | } 137 | 138 | generated quantities{ 139 | array[N,2] int y_rep; 140 | array[N] int diff_y_rep; 141 | vector[N] log_lik; 142 | array[N_prev,2] int y_prev; 143 | array[N_prev] vector[2] theta_prev; 144 | array[N_prev] int diff_y_prev; 145 | 146 | //in-sample replications 147 | for (n in 1:N){ 148 | y_rep[n,1] = poisson_rng(theta[n,1]); 149 | y_rep[n,2] = poisson_rng(theta[n,2]); 150 | diff_y_rep[n] = y_rep[n,1] - y_rep[n,2]; 151 | log_lik[n] =zero_infl_skellam_lpmf(diff_y[n]| theta[n,1],theta[n,2], 152 | prob_of_draws); 153 | } 154 | //out-of-sample predictions 155 | if (N_prev > 0) { 156 | for (n in 1:N_prev){ 157 | theta_prev[n,1] = exp(adj_h_eff+att[team1_prev[n]]+ 158 | def[team2_prev[n]]+ 159 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 160 | theta_prev[n,2] = exp(att[team2_prev[n]]+ 161 | def[team1_prev[n]]- 162 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 163 | y_prev[n,1] = poisson_rng(theta_prev[n,1]); 164 | y_prev[n,2] = poisson_rng(theta_prev[n,2]); 165 | diff_y_prev[n] = y_prev[n,1] - y_prev[n,2]; 166 | } 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /tests/testthat/test_compare_foot.R: -------------------------------------------------------------------------------- 1 | # all the test PASSED (also the skipped ones!) 2 | 3 | 4 | # ____________________________________________________________________________ 5 | # Common matrix data #### 6 | 7 | home_team <- c( 8 | "AC Milan", "Inter", "Juventus", "AS Roma", "Napoli", 9 | "Lazio", "Atalanta", "Fiorentina", "Torino", "Sassuolo", "Udinese" 10 | ) 11 | 12 | away_team <- c( 13 | "Juventus", "Napoli", "Inter", "Atalanta", "Lazio", 14 | "AC Milan", "Sassuolo", "Torino", "Fiorentina", "Udinese", "AS Roma" 15 | ) 16 | 17 | # Home and Away goals based on given data 18 | home_goals <- c(2, 0, 2, 2, 3, 1, 4, 2, 1, 1, 2) 19 | away_goals <- c(1, 0, 1, 3, 2, 1, 1, 2, 1, 1, 2) 20 | 21 | # Combine into a data frame 22 | test_data <- data.frame(home_team, away_team, home_goals, away_goals) 23 | 24 | # Define the data for each column 25 | pW <- c(0.51, 0.45, 0.48, 0.53, 0.56, 0.39, 0.52, 0.55, 0.61, 0.37, 0.35) 26 | pD <- c(0.27, 0.25, 0.31, 0.18, 0.23, 0.30, 0.24, 0.26, 0.18, 0.19, 0.22) 27 | pL <- c(0.22, 0.30, 0.21, 0.29, 0.21, 0.31, 0.24, 0.19, 0.21, 0.44, 0.43) 28 | 29 | # Create the data frame table_prob 30 | table_prob <- data.frame(pW, pD, pL) 31 | matrix_prob <- as.matrix(table_prob) 32 | 33 | 34 | 35 | # ____________________________________________________________________________ 36 | # Data tests #### 37 | 38 | test_that("Error when test_data is missing required columns", { 39 | bad_test_data <- test_data[, -1] # Remove 'home_team' 40 | expect_error( 41 | compare_foot(source = list(prob1 = matrix_prob), test_data = bad_test_data), 42 | "test_data is missing required columns" 43 | ) 44 | }) 45 | 46 | test_that("Error when probability matrix has wrong number of rows", { 47 | # Create a matrix with fewer rows than test_data 48 | bad_matrix <- matrix_prob[1:4, ] 49 | expect_warning( 50 | expect_error( 51 | compare_foot(source = list(bad = bad_matrix), test_data = test_data), 52 | "No valid models or probability matrices were provided" 53 | ) 54 | ) 55 | }) 56 | 57 | test_that("Error when probability matrix has wrong number of columns", { 58 | # Create a matrix with only 2 columns 59 | bad_matrix2 <- matrix_prob[, 1:2] 60 | expect_warning( 61 | compare_foot(source = list(good = matrix_prob, bad2 = bad_matrix2), test_data = test_data), 62 | "does not have exactly 3 columns" 63 | ) 64 | }) 65 | 66 | test_that("compare_foot removes NA rows from probability matrix", { 67 | # Introduce NA in row 2 (one of the entries) 68 | matrix_prob_na <- matrix_prob 69 | matrix_prob_na[2, 2] <- NA 70 | 71 | expect_warning( 72 | result <- compare_foot( 73 | source = list(prob_with_na = matrix_prob_na), 74 | test_data = test_data 75 | ), 76 | "contains 1 rows with NAs" 77 | ) 78 | }) 79 | 80 | 81 | test_that("compare_foot normalizes probability matrix rows not summing to 1", { 82 | # Create a probability matrix that does NOT sum to 1 by scaling each row by 2. 83 | matrix_prob_scaled <- matrix_prob * 2 84 | 85 | expect_warning( 86 | result <- compare_foot(source = list(prob_scaled = matrix_prob_scaled), test_data = test_data), 87 | "Probabilities in matrix 'prob_scaled' do not sum to 1. Normalizing" 88 | ) 89 | }) 90 | 91 | # test_that("compare_foot warns and errors when probability matrix becomes empty after removing NA rows", { 92 | # # Create a probability matrix where every row contains NA values. 93 | # matrix_all_na <- matrix(NA, nrow = 11, ncol = 3) 94 | # 95 | # expect_warning( 96 | # expect_error( 97 | # compare_foot(source = list(all_na = matrix_all_na), test_data = test_data), 98 | # "No valid models or probability matrices were provided" 99 | # ), 100 | # "After removing NA rows, no data remains for matrix 'all_na'" 101 | # ) 102 | # }) 103 | 104 | # ____________________________________________________________________________ 105 | # Models and arguments tests #### 106 | 107 | test_that("compare_foot warns and skips stan_foot object without y_prev or diff_y_prev", { 108 | skip_on_cran() 109 | skip_if_not(stan_cmdstan_exists()) 110 | 111 | ## ............................................................................ 112 | ## Data #### 113 | 114 | data("italy") 115 | italy_2000 <- italy %>% 116 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 117 | dplyr::filter(Season == "2000") 118 | 119 | colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 120 | 121 | ## ............................................................................ 122 | ## Tests #### 123 | 124 | fit_1 <- stan_foot( 125 | data = italy_2000, 126 | model = "double_pois", predict = 18, 127 | iter_sampling = 200, 128 | chains = 2, 129 | seed = 433 130 | ) # Double Poisson model 131 | fit_2 <- stan_foot( 132 | data = italy_2000, 133 | model = "biv_pois", 134 | iter_sampling = 200, 135 | chains = 2, 136 | seed = 433 137 | ) # Bivariate Poisson model 138 | 139 | fit_3 <- stan_foot( 140 | data = italy_2000, 141 | model = "student_t", predict = 18, 142 | iter_sampling = 200, 143 | chains = 2, 144 | seed = 433 145 | ) # Skellam model 146 | 147 | italy_2000_test <- italy_2000[289:306, ] 148 | 149 | 150 | expect_warning( 151 | expect_error( 152 | compare_foot( 153 | source = list(bivariate_poisson = fit_2), 154 | test_data = italy_2000_test, 155 | metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"), 156 | conf_matrix = TRUE 157 | ), 158 | "No valid models or probability matrices were provided" 159 | ), 160 | "does not contain 'y_prev' or 'diff_y_prev'" 161 | ) 162 | 163 | 164 | expect_warning( 165 | compare_foot( 166 | source = list(double_poisson = fit_1$fit, bivariate_poisson = fit_2, student = fit_3), 167 | test_data = italy_2000_test, 168 | metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"), 169 | conf_matrix = TRUE 170 | ), 171 | "does not contain 'y_prev' or 'diff_y_prev'" 172 | ) 173 | }) 174 | 175 | 176 | test_that("compare_foot warns and skips source object with different predicted matches and/or not allowed type", { 177 | skip_on_cran() 178 | skip_if_not(stan_cmdstan_exists()) 179 | 180 | ## ............................................................................ 181 | ## Data #### 182 | 183 | data("italy") 184 | italy_2000 <- italy %>% 185 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 186 | dplyr::filter(Season == "2000") 187 | 188 | colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 189 | 190 | ## ............................................................................ 191 | ## Tests #### 192 | 193 | fit_1 <- stan_foot( 194 | data = italy_2000, 195 | model = "double_pois", predict = 18, 196 | iter_sampling = 200, 197 | chains = 2, 198 | seed = 433 199 | ) # Double Poisson model 200 | 201 | fit_2 <- c(1,2,3,1) 202 | 203 | fit_3 <- stan_foot( 204 | data = italy_2000, 205 | model = "student_t", predict = 15, 206 | iter_sampling = 200, 207 | chains = 2, 208 | seed = 433 209 | ) # Skellam model 210 | 211 | italy_2000_test <- italy_2000[289:306, ] 212 | 213 | 214 | expect_warning( 215 | compare_foot( 216 | source = list(double_poisson = fit_1$fit, student = fit_3), 217 | test_data = italy_2000_test, 218 | metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"), 219 | conf_matrix = TRUE 220 | )) 221 | 222 | expect_warning( 223 | compare_foot( 224 | source = list(double_poisson = fit_1$fit, bivariate_poisson = fit_2), 225 | test_data = italy_2000_test, 226 | metric = c("accuracy", "brier", "ACP", "pseudoR2", "RPS"), 227 | conf_matrix = TRUE 228 | )) 229 | }) 230 | -------------------------------------------------------------------------------- /R/utils_foot.R: -------------------------------------------------------------------------------- 1 | #' Check Prior Value (Internal function) 2 | #' 3 | #' Validates the prior value to ensure it meets the necessary requirements. 4 | #' 5 | #' This function checks if the provided prior value is numeric and of length one. 6 | #' If `positive` is set to \code{TRUE}, it will also ensure that the value is positive. 7 | #' 8 | #' @param prior_value The prior value to validate. Must be a numeric value of length one. 9 | #' @param prior_name A string representing the name of the prior, used for error messages. 10 | #' @param positive Logical value indicating whether the prior must be positive. Defaults to \code{FALSE}. 11 | #' @return No return value. The function will throw an error if validation fails. 12 | #' @examples 13 | #' check_prior(5, "alpha", positive = TRUE) 14 | #' check_prior(-1, "beta", positive = FALSE) 15 | #' @noRd 16 | 17 | check_prior <- function(prior_value, prior_name, positive = FALSE) { 18 | # Check if the prior is numeric and of length 1 19 | if (!is.numeric(prior_value) || length(prior_value) != 1) { 20 | stop(sprintf("The prior '%s' must be a single numeric value.", prior_name)) 21 | } 22 | # Check if the prior is positive if required 23 | if (positive && prior_value <= 0) { 24 | stop(sprintf("The prior '%s' must be a positive numeric value.", prior_name)) 25 | } 26 | } 27 | 28 | 29 | #' Compute the Maximum A Posteriori (MAP) Estimate (Internal function) 30 | #' 31 | #' Computes the Maximum A Posteriori (MAP) estimate from a set of samples using kernel density estimation. 32 | #' 33 | #' This function calculates the MAP estimate, which is the mode of the estimated density, for a given vector of samples. 34 | #' 35 | #' @param samples A numeric vector of samples from which to compute the MAP estimate. 36 | #' @return A numeric value representing the MAP estimate. 37 | #' @examples 38 | #' samples <- rnorm(1000, mean = 5, sd = 2) 39 | #' compute_MAP(samples) 40 | #' @importFrom stats density 41 | #' @noRd 42 | 43 | compute_MAP <- function(samples) { 44 | dens <- stats::density(samples) 45 | MAP <- dens$x[which.max(dens$y)] 46 | return(MAP) 47 | } 48 | 49 | 50 | 51 | #' Normalize Rank Points (Internal Function) 52 | #' 53 | #' Normalizes a vector of rank points using the specified method. 54 | #' 55 | #' This function applies one of several normalization methods to a numeric vector of rank points. 56 | #' Available methods are: 57 | #' \itemize{ 58 | #' \item \code{"none"}: No normalization is applied. 59 | #' \item \code{"standard"}: Standardizes the data to have mean 0 and standard deviation 0.5. 60 | #' \item \code{"mad"}: Normalizes using the median absolute deviation. 61 | #' \item \code{"min_max"}: Scales the data to be between 0 and 1. 62 | #' } 63 | #' 64 | #' @param rank_points A numeric vector of rank points to normalize. 65 | #' @param method A string specifying the normalization method. Options are \code{"none"}, \code{"standard"}, \code{"mad"}, or \code{"min_max"}. 66 | #' 67 | #' @return A numeric vector of normalized rank points. 68 | #' 69 | #' @noRd 70 | normalize_rank_points <- function(rank_points, method) { 71 | if (method == "none") { 72 | rank_points 73 | } else if (method == "standard") { 74 | s <- stats::sd(rank_points, na.rm = TRUE) 75 | m <- mean(rank_points, na.rm = TRUE) 76 | if (s == 0) { 77 | rep(0, length(rank_points)) 78 | } else { 79 | (rank_points - m) / (2 * s) 80 | } 81 | } else if (method == "mad") { 82 | md <- stats::mad(rank_points, na.rm = TRUE) 83 | med <- stats::median(rank_points, na.rm = TRUE) 84 | if (md == 0) { 85 | rep(0, length(rank_points)) 86 | } else { 87 | (rank_points - med) / md 88 | } 89 | } else if (method == "min_max") { 90 | min_rp <- min(rank_points, na.rm = TRUE) 91 | max_rp <- max(rank_points, na.rm = TRUE) 92 | if (max_rp == min_rp) { 93 | rep(0, length(rank_points)) 94 | } else { 95 | (rank_points - min_rp) / (max_rp - min_rp) 96 | } 97 | } else { 98 | stop("Invalid normalization method specified.") 99 | } 100 | } 101 | 102 | 103 | 104 | #' Replace Indices with Team Names (Internal Function) 105 | #' 106 | #' Substitutes numeric indices in parameter names with corresponding team names based on a predefined mapping. 107 | #' 108 | #' 109 | #' @param param_names A character vector of parameter names that may contain numeric indices to be replaced. 110 | #' @param exclude_params A character vector of parameter name prefixes that should be excluded from renaming. Parameters starting with any of these prefixes will remain unchanged. 111 | #' @param index_map A named character vector where names correspond to numeric indices and values correspond to the team names they should be replaced with. 112 | #' 113 | #' @return A character vector with numeric indices replaced by their corresponding team names where applicable. 114 | #' 115 | #' @noRd 116 | team_names <- function(param_names, exclude_params, team_map_rev) { 117 | sapply(param_names, function(name) { 118 | # Skip parameters that should not have their names changed 119 | if (any(startsWith(name, exclude_params))) { 120 | return(name) 121 | } 122 | 123 | # Attempt to match two indices: [time, team] 124 | pattern_two <- "\\[(\\d+),(\\d+)\\]" 125 | if (grepl(pattern_two, name)) { 126 | matches <- regmatches(name, regexec(pattern_two, name)) 127 | 128 | 129 | if (length(matches[[1]]) >= 3) { 130 | time_index <- matches[[1]][2] # First index (time) 131 | team_index <- matches[[1]][3] # Second index (team) 132 | 133 | # Check if the second index corresponds to a team 134 | if (team_index %in% names(team_map_rev)) { 135 | team_name <- team_map_rev[[team_index]] 136 | # Replace the second index with the team name 137 | 138 | new_name <- sub(paste0(",", team_index, "\\]"), paste0(", ", team_name, "]"), name) 139 | return(new_name) 140 | } 141 | } 142 | } 143 | 144 | # If not matched with two indices, attempt to match single index: [team] 145 | pattern_one <- "\\[(\\d+)\\]" 146 | if (grepl(pattern_one, name)) { 147 | matches <- regmatches(name, regexec(pattern_one, name)) 148 | # Ensure that the single index is captured 149 | if (length(matches[[1]]) >= 2) { 150 | team_index <- matches[[1]][2] # Single index (team) 151 | 152 | # Check if the index corresponds to a team 153 | if (team_index %in% names(team_map_rev)) { 154 | team_name <- team_map_rev[[team_index]] 155 | # Replace the index with the team name 156 | new_name <- sub(paste0("\\[", team_index, "\\]"), paste0("[", team_name, "]"), name) 157 | return(new_name) 158 | } 159 | } 160 | } 161 | 162 | return(name) 163 | }, USE.NAMES = FALSE) 164 | } 165 | 166 | #' Compute the Ranked Probability Score (RPS) (Internal Function) 167 | #' 168 | #' Computes the Ranked Probability Score (RPS) from cumulative predicted probabilities 169 | #' and actual outcomes. The RPS is calculated as the mean of the squared differences 170 | #' between the cumulative predicted probabilities (for the first two outcome categories) 171 | #' and the corresponding cumulative observed probabilities. 172 | #' 173 | #' @param cum_pred A numeric matrix of cumulative predicted probabilities for each observation. 174 | #' It should have at least two columns; the probability for the final outcome is assumed to be 1. 175 | #' @param actual A character vector of actual outcomes. Each element should be one of 176 | #' \code{"Home Win"}, \code{"Draw"}, or \code{"Away Win"}. 177 | #' 178 | #' @return A numeric value representing the mean Ranked Probability Score. 179 | #' 180 | #' @noRd 181 | compute_RPS <- function(cum_pred, actual) { 182 | # Create a matrix of cumulative observed probabilities 183 | acum <- matrix(0, nrow = length(actual), ncol = 3) 184 | acum[actual == "Home Win", ] <- matrix(rep(c(1, 1, 1), sum(actual == "Home Win")), 185 | ncol = 3, byrow = TRUE 186 | ) 187 | acum[actual == "Draw", ] <- matrix(rep(c(0, 1, 1), sum(actual == "Draw")), 188 | ncol = 3, byrow = TRUE 189 | ) 190 | acum[actual == "Away Win", ] <- matrix(rep(c(0, 0, 1), sum(actual == "Away Win")), 191 | ncol = 3, byrow = TRUE 192 | ) 193 | squared_diff <- (cum_pred[, 1:2] - acum[, 1:2])^2 194 | rps_per_obs <- rowSums(squared_diff) / 2 # for 3 outcome categories: (3 - 1) 195 | mean(rps_per_obs) 196 | } 197 | -------------------------------------------------------------------------------- /src/stan/diag_infl_biv_pois.stan: -------------------------------------------------------------------------------- 1 | functions{ 2 | 3 | real bipois_lpmf(array[] int r , real mu1,real mu2,real mu3) { 4 | real ss; 5 | real log_s; 6 | real mus; 7 | int miny; 8 | 9 | miny = min(r[1], r[2]); 10 | 11 | ss = poisson_lpmf(r[1] | mu1) + poisson_lpmf(r[2] | mu2) - 12 | exp(mu3); 13 | if(miny > 0) { 14 | mus = -mu1-mu2+mu3; 15 | log_s = ss; 16 | 17 | for(k in 1:miny) { 18 | log_s = log_s + log(r[1] - k + 1) + mus 19 | + log(r[2] - k + 1) 20 | - log(k); 21 | ss = log_sum_exp(ss, log_s); 22 | } 23 | } 24 | return(ss); 25 | } 26 | real diag_infl_bipois_lpmf(array[] int r , real mu1,real mu2,real mu3, real p) { 27 | // This way is the easiest and proposed by https://github.com/Torvaney/karlis-ntzoufras-reproduction. 28 | // However, within model block, we propose in a comment the alternative way that Stan proposes in their documentation for zero inflated models 29 | real base_prob; 30 | real prob; 31 | real log_prob; 32 | 33 | base_prob = exp(bipois_lpmf(r| mu1, mu2,mu3)); 34 | 35 | if (r[1] == r[2]) 36 | prob = p + (1 - p) * base_prob; 37 | else 38 | prob = (1 - p) * base_prob; 39 | 40 | log_prob = log(prob); 41 | 42 | return log_prob; 43 | } 44 | 45 | } 46 | data{ 47 | int N; // number of games 48 | int N_prev; 49 | array[N,2] int y; 50 | int nteams; 51 | array[N] int team1; 52 | array[N] int team2; 53 | array[N_prev] int team1_prev; 54 | array[N_prev] int team2_prev; 55 | array[N] int instants_rank; 56 | int ntimes_rank; // dynamic periods for ranking 57 | matrix[ntimes_rank,nteams] ranking; 58 | int ind_home; 59 | real mean_home; // Mean for home effect 60 | real sd_home; // Standard deviation for home effect 61 | 62 | // priors part 63 | int prior_dist_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 64 | int prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 65 | 66 | real hyper_df; 67 | real hyper_location; 68 | 69 | real hyper_sd_df; 70 | real hyper_sd_location; 71 | real hyper_sd_scale; 72 | } 73 | parameters{ 74 | vector[nteams] att_raw; 75 | vector[nteams] def_raw; 76 | real sigma_att; 77 | real sigma_def; 78 | real home; 79 | real rho; 80 | real gamma; 81 | real prob_of_draws;// excessive probability of draws 82 | 83 | } 84 | transformed parameters{ 85 | real adj_h_eff; // Adjusted home effect 86 | vector[nteams] att; 87 | vector[nteams] def; 88 | array[N] vector[3] theta; 89 | 90 | for (t in 1:nteams){ 91 | att[t] = att_raw[t]-mean(att_raw); 92 | def[t] = def_raw[t]-mean(def_raw); 93 | } 94 | 95 | adj_h_eff = home * ind_home; 96 | 97 | for (n in 1:N){ 98 | theta[n,1] = exp(adj_h_eff+att[team1[n]]+def[team2[n]]+ 99 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 100 | theta[n,2] = exp(att[team2[n]]+def[team1[n]]- 101 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 102 | theta[n,3] = exp(rho); 103 | } 104 | } 105 | model{ 106 | // log-priors for team-specific abilities 107 | for (t in 1:(nteams)){ 108 | if (prior_dist_num == 1){ 109 | target+= normal_lpdf(att_raw[t]|hyper_location, sigma_att); 110 | target+= normal_lpdf(def_raw[t]|hyper_location, sigma_def); 111 | } 112 | else if (prior_dist_num == 2){ 113 | target+= student_t_lpdf(att_raw[t]|hyper_df, hyper_location, sigma_att); 114 | target+= student_t_lpdf(def_raw[t]|hyper_df, hyper_location, sigma_def); 115 | } 116 | else if (prior_dist_num == 3){ 117 | target+= cauchy_lpdf(att_raw[t]|hyper_location, sigma_att); 118 | target+= cauchy_lpdf(def_raw[t]|hyper_location, sigma_def); 119 | } 120 | else if (prior_dist_num == 4){ 121 | target+= double_exponential_lpdf(att_raw[t]|hyper_location, sigma_att); 122 | target+= double_exponential_lpdf(def_raw[t]|hyper_location, sigma_def); 123 | } 124 | } 125 | 126 | 127 | // log-hyperpriors for sd parameters 128 | if (prior_dist_sd_num == 1 ){ 129 | target+=normal_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 130 | target+=normal_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 131 | } 132 | else if (prior_dist_sd_num == 2){ 133 | target+=student_t_lpdf(sigma_att|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 134 | target+=student_t_lpdf(sigma_def|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 135 | } 136 | else if (prior_dist_sd_num == 3){ 137 | target+=cauchy_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 138 | target+=cauchy_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 139 | } 140 | else if (prior_dist_sd_num == 4){ 141 | target+=double_exponential_lpdf(sigma_att|hyper_sd_location, hyper_sd_scale); 142 | target+=double_exponential_lpdf(sigma_def|hyper_sd_location, hyper_sd_scale); 143 | } 144 | 145 | // log-priors fixed effects 146 | target+=normal_lpdf(home|mean_home,sd_home); 147 | target+=normal_lpdf(rho|0,1); 148 | target+=normal_lpdf(gamma|0,1); 149 | target+=uniform_lpdf(prob_of_draws|0,1); 150 | 151 | // likelihood 152 | 153 | for (n in 1:N){ 154 | target+=diag_infl_bipois_lpmf(y[n,]| theta[n,1], 155 | theta[n,2], theta[n,3],prob_of_draws); 156 | 157 | // if (y[n,1] == y[n,2]){// Alternative way as proposed by Stan manual 158 | // target += log_sum_exp(bernoulli_lpmf(1 |prob_of_draws), 159 | // bernoulli_lpmf(0 | prob_of_draws) 160 | // + bipois_lpmf(y[n,] | theta[n,1], 161 | // theta[n,2], theta[n,3]) ); 162 | // } else { 163 | // target += bernoulli_lpmf(0 |prob_of_draws) 164 | // + bipois_lpmf(y[n,] |theta[n,1], 165 | // theta[n,2], theta[n,3]); 166 | // } 167 | } 168 | } 169 | 170 | generated quantities{ 171 | array[N,2] int y_rep; 172 | array[N_prev,2] int y_prev; 173 | vector[N] log_lik; 174 | array[N] int diff_y_rep; 175 | vector[N_prev] theta_home_prev; // exponentiated linear pred. 176 | vector[N_prev] theta_away_prev; 177 | vector[N_prev] theta_corr_prev; 178 | 179 | //in-sample replications 180 | for (n in 1:N){ 181 | y_rep[n,1] = poisson_rng(theta[n,1]+theta[n,3]); 182 | y_rep[n,2] = poisson_rng(theta[n,2]+theta[n,3]); 183 | diff_y_rep[n] = y_rep[n,1] - y_rep[n,2]; 184 | log_lik[n] =diag_infl_bipois_lpmf(y[n,]| theta[n,1], 185 | theta[n,2], theta[n,3],prob_of_draws); 186 | // if (y[n,1] == y[n,2]){// Alternative way proposed by Stan documentation 187 | // log_lik[n] = log_sum_exp(bernoulli_lpmf(1 |prob_of_draws), 188 | // bernoulli_lpmf(0 |prob_of_draws) 189 | // + bipois_lpmf(y[n,] | theta[n,1], 190 | // theta[n,2], theta[n,3])); 191 | //} else { 192 | // log_lik[n] = bernoulli_lpmf(0 |prob_of_draws) 193 | // + bipois_lpmf(y[n,] | theta[n,1], 194 | // theta[n,2], theta[n,3]); 195 | //} 196 | } 197 | 198 | //out-of-sample predictions 199 | if (N_prev > 0) { 200 | for (n in 1:N_prev){ 201 | theta_home_prev[n] = exp(adj_h_eff+att[team1_prev[n]]+ 202 | def[team2_prev[n]]+ 203 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 204 | theta_away_prev[n] = exp(att[team2_prev[n]]+ 205 | def[team1_prev[n]]- 206 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 207 | theta_corr_prev[n] = exp(rho); 208 | y_prev[n,1] = poisson_rng(theta_home_prev[n]+theta_corr_prev[n]); 209 | y_prev[n,2] = poisson_rng(theta_away_prev[n]+theta_corr_prev[n]); 210 | } 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /tests/testthat/test_mle_foot.R: -------------------------------------------------------------------------------- 1 | ## all the test PASSED (also the skipped ones!) 2 | 3 | # ____________________________________________________________________________ 4 | # Data tests #### 5 | 6 | test_that("errors if data is not a matrix or data frame", { 7 | expect_error( 8 | mle_foot(123, model = "double_pois"), 9 | "Input data must be a data.frame with columns: periods, home_team, away_team, home_goals, away_goals." 10 | ) 11 | }) 12 | 13 | test_that("errors if required columns are missing", { 14 | ## ............................................................................ 15 | ## Data #### 16 | 17 | df_missing <- data.frame( 18 | periods = 1:5, 19 | home_team = rep("TeamA", 5), 20 | away_team = rep("TeamB", 5), 21 | home_goals = rep(1, 5) # missing away_goals 22 | ) 23 | 24 | ## ............................................................................ 25 | ## Tests #### 26 | 27 | expect_error( 28 | mle_foot(df_missing, model = "double_pois"), 29 | "data is missing required columns" 30 | ) 31 | }) 32 | 33 | 34 | test_that("errors if goals are not numeric", { 35 | ## ............................................................................ 36 | ## Data #### 37 | 38 | df_non_numeric <- data.frame( 39 | periods = 1:5, 40 | home_team = rep("TeamA", 5), 41 | away_team = rep("TeamB", 5), 42 | home_goals = as.character(rep(1, 5)), 43 | away_goals = as.character(rep(0, 5)) 44 | ) 45 | 46 | ## ............................................................................ 47 | ## Tests #### 48 | 49 | expect_error( 50 | mle_foot(df_non_numeric, model = "double_pois"), 51 | "Goals are not numeric" 52 | ) 53 | }) 54 | 55 | 56 | test_that("warns if data has more than 5 columns", { 57 | skip_on_cran() 58 | 59 | ## ............................................................................ 60 | ## Data #### 61 | valid_data <- data.frame( 62 | periods = 1:10, 63 | home_team = rep(c("TeamA", "TeamB"), 5), 64 | away_team = rep(c("TeamB", "TeamA"), 5), 65 | home_goals = sample(0:3, 10, replace = TRUE), 66 | away_goals = sample(0:3, 10, replace = TRUE) 67 | ) 68 | df_extra <- valid_data 69 | df_extra$extra_col <- rnorm(nrow(valid_data)) 70 | 71 | ## ............................................................................ 72 | ## Tests #### 73 | expect_warning( 74 | mle_foot(df_extra, model = "double_pois"), 75 | "Your dataset seems too large" 76 | ) 77 | }) 78 | 79 | # ____________________________________________________________________________ 80 | # Arguments tests #### 81 | 82 | test_that("errors if predict is not a non-negative integer", { 83 | ## ............................................................................ 84 | ## Data #### 85 | valid_data <- data.frame( 86 | periods = 1:10, 87 | home_team = rep(c("TeamA", "TeamB"), 5), 88 | away_team = rep(c("TeamB", "TeamA"), 5), 89 | home_goals = sample(0:3, 10, replace = TRUE), 90 | away_goals = sample(0:3, 10, replace = TRUE) 91 | ) 92 | df_extra <- valid_data 93 | df_extra$extra_col <- rnorm(nrow(valid_data)) 94 | 95 | ## ............................................................................ 96 | ## Tests #### 97 | expect_error( 98 | mle_foot(valid_data, model = "double_pois", predict = -1), 99 | "The argument 'predict' must be a non-negative integer" 100 | ) 101 | expect_error( 102 | mle_foot(valid_data, model = "double_pois", predict = 1.5), 103 | "The argument 'predict' must be a non-negative integer" 104 | ) 105 | expect_error( 106 | mle_foot(valid_data, model = "double_pois", predict = "2"), 107 | "The argument 'predict' must be a non-negative integer" 108 | ) 109 | }) 110 | 111 | 112 | 113 | test_that("errors if training set size is zero or negative", { 114 | ## ............................................................................ 115 | ## Data #### 116 | valid_data <- data.frame( 117 | periods = 1:10, 118 | home_team = rep(c("TeamA", "TeamB"), 5), 119 | away_team = rep(c("TeamB", "TeamA"), 5), 120 | home_goals = sample(0:3, 10, replace = TRUE), 121 | away_goals = sample(0:3, 10, replace = TRUE) 122 | ) 123 | df_extra <- valid_data 124 | df_extra$extra_col <- rnorm(nrow(valid_data)) 125 | 126 | ## ............................................................................ 127 | ## Tests #### 128 | 129 | # If predict equals the number of rows, training set size becomes zero. 130 | expect_error( 131 | mle_foot(valid_data, model = "double_pois", predict = nrow(valid_data)), 132 | "The training set size is zero" 133 | ) 134 | # If predict is larger than number of rows 135 | expect_error( 136 | mle_foot(valid_data, model = "double_pois", predict = nrow(valid_data) + 1), 137 | "The training set size is zero" 138 | ) 139 | }) 140 | 141 | 142 | test_that("model argument must be one of the allowed values", { 143 | ## ............................................................................ 144 | ## Data #### 145 | valid_data <- data.frame( 146 | periods = 1:10, 147 | home_team = rep(c("TeamA", "TeamB"), 5), 148 | away_team = rep(c("TeamB", "TeamA"), 5), 149 | home_goals = sample(0:3, 10, replace = TRUE), 150 | away_goals = sample(0:3, 10, replace = TRUE) 151 | ) 152 | df_extra <- valid_data 153 | df_extra$extra_col <- rnorm(nrow(valid_data)) 154 | 155 | ## ............................................................................ 156 | ## Tests #### 157 | expect_error( 158 | mle_foot(valid_data, model = "invalid_model")) 159 | }) 160 | 161 | 162 | test_that("output structure for skellam with out-of-sample prediction", { 163 | skip_on_cran() 164 | 165 | ## ............................................................................ 166 | ## Data #### 167 | valid_data <- data.frame( 168 | periods = 1:10, 169 | home_team = rep(c("TeamA", "TeamB"), 5), 170 | away_team = rep(c("TeamB", "TeamA"), 5), 171 | home_goals = sample(0:3, 10, replace = TRUE), 172 | away_goals = sample(0:3, 10, replace = TRUE) 173 | ) 174 | df_extra <- valid_data 175 | df_extra$extra_col <- rnorm(nrow(valid_data)) 176 | 177 | ## ............................................................................ 178 | ## Tests #### 179 | result <- mle_foot(valid_data, model = "skellam", predict = 2) 180 | expect_true(all(c("att", "def", "home_effect", "model", "predict", "team1_prev", "team2_prev", 181 | "logLik", "aic", "bic") %in% names(result))) 182 | expect_equal(result$predict, 2) 183 | expect_equal(length(result$team1_prev), 2) 184 | expect_equal(length(result$team2_prev), 2) 185 | }) 186 | 187 | # require(dplyr) 188 | # 189 | # # data 190 | # data("italy") 191 | # italy <- as_tibble(italy) 192 | # italy_2008<- italy %>% 193 | # dplyr::select(Season, home, visitor, hgoal,vgoal) %>% 194 | # filter( Season=="2008") 195 | # 196 | # spain <- as_tibble(spain) 197 | # spain_2008<- spain %>% 198 | # dplyr::select(Season, home, visitor, hgoal,vgoal) %>% 199 | # filter( Season=="2008") 200 | # 201 | # germany <- as_tibble(germany) 202 | # germany_2008<- germany %>% 203 | # dplyr::select(Season, home, visitor, hgoal,vgoal) %>% 204 | # filter( Season=="2008") 205 | # 206 | # 207 | # context("mle plain models") 208 | # # These tests regard the performance of the mle_foot functions, 209 | # # for methods double_pois, biv_pois, skellam and student_t. 210 | # 211 | # test_that("some mle models get errors/warnings", { 212 | # 213 | # 214 | # expect_warning(mle_foot(data = italy_2008, 215 | # model ="skellam")) 216 | # # warnings: "precision lost in result" 217 | # # ---> corrected if using "L-BFGS-B" as method 218 | # 219 | # expect_warning(mle_foot(data = italy_2008, 220 | # model ="student_t")) 221 | # # warnings: log(sd) 222 | # # ---> corrected if using "Nelder-Mead" as method 223 | # 224 | # # datasets with less than 4 columns 225 | # expect_error(mle_foot(data = italy_2008[,1:3], 226 | # model = "biv_pois")) 227 | # 228 | # # datasets different than matrix and dataframe 229 | # expect_error(mle_foot(data = rnorm(20), 230 | # model = "biv_pois")) 231 | # 232 | # # wrong model names 233 | # expect_error(mle_foot(data = italy_2008, 234 | # model = "neg_binomial")) 235 | # 236 | # 237 | # }) 238 | # 239 | # 240 | # test_that("prediction causes warnings/errors", { 241 | # 242 | # ## predict > N 243 | # expect_error(mle_foot(italy_2008, 244 | # model ="student_t", 245 | # predict = dim(italy_2008)[1]+1)) 246 | # 247 | # ## predict not a number 248 | # expect_error(mle_foot(italy_2008, 249 | # model ="student_t", 250 | # predict = "a")) 251 | # 252 | # ## predict decimal number 253 | # # expect_warning(stan_foot(england_2004, 254 | # # model ="student_t", 255 | # # predict = 30.5)) 256 | # }) 257 | # 258 | # 259 | # 260 | # 261 | # 262 | # 263 | # 264 | # 265 | # 266 | -------------------------------------------------------------------------------- /tests/testthat/test_foot_prob.R: -------------------------------------------------------------------------------- 1 | ## all the test PASSED (also the skipped ones!) 2 | 3 | 4 | # ____________________________________________________________________________ 5 | # Data checks #### 6 | 7 | test_that("foot_prob errors when data is missing required columns and when it returns a valid output", { 8 | skip_on_cran() 9 | skip_if_not(stan_cmdstan_exists()) 10 | 11 | ## ............................................................................ 12 | ## Data #### 13 | 14 | data("england") 15 | england <- as.data.frame(england) 16 | 17 | # One season only 18 | england_2004 <- england %>% 19 | dplyr::filter(division == 1) %>% 20 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 21 | dplyr::filter(Season == "2004") 22 | 23 | colnames(england_2004) <- c( 24 | "periods", "home_team", "away_team", 25 | "home_goals", "away_goals" 26 | ) 27 | 28 | england_2004_wrg <- england_2004[, 1:3] 29 | 30 | ## ............................................................................ 31 | ## Tests #### 32 | 33 | model_pois <- stan_foot( 34 | data = england_2004, 35 | model = "double_pois", 36 | predict = 30, 37 | iter_sampling = 200, 38 | chains = 2, 39 | seed = 433 40 | ) 41 | 42 | # Missing a column in the data 43 | expect_error( 44 | foot_prob(object = model_pois, data = england_2004_wrg) 45 | ) 46 | 47 | 48 | # Correct model 49 | result <- expect_error( 50 | foot_prob(object = model_pois, data = england_2004), 51 | NA 52 | ) 53 | 54 | expect_type(result, "list") 55 | expect_true("prob_table" %in% names(result)) 56 | expect_true("prob_plot" %in% names(result)) 57 | expect_s3_class(result$prob_table, "data.frame") 58 | 59 | # Check that the expected columns are in the table. 60 | expected_cols <- c("home_team", "away_team", "prob_h", "prob_d", "prob_a", "mlo") 61 | expect_true(all(expected_cols %in% names(result$prob_table))) 62 | 63 | # Verify that the plot is a ggplot object. 64 | expect_s3_class(result$prob_plot, "ggplot") 65 | 66 | 67 | # T-student model 68 | model_stud <- stan_foot( 69 | data = england_2004, 70 | model = "student_t", 71 | predict = 30, 72 | iter_sampling = 200, 73 | chains = 2, 74 | seed = 433 75 | ) 76 | 77 | # Correct model CmdStan 78 | expect_error( 79 | foot_prob(object = model_stud$fit, data = england_2004), 80 | NA 81 | ) 82 | }) 83 | 84 | test_that("foot_prob errors if object draws lack required elements", { 85 | ## ............................................................................ 86 | ## Data #### 87 | 88 | dummy_data <- data.frame( 89 | periods = rep(1, 6), 90 | home_team = c("TeamA", "TeamB", "TeamC", "TeamD", "TeamE", "TeamF"), 91 | away_team = c("TeamG", "TeamH", "TeamI", "TeamJ", "TeamK", "TeamL"), 92 | home_goals = c(1, 2, 0, 3, 2, 1), 93 | away_goals = c(0, 1, 2, 1, 1, 2), 94 | stringsAsFactors = FALSE 95 | ) 96 | 97 | ## ............................................................................ 98 | ## Tests #### 99 | 100 | # Fake draws object without y_prev or diff_y_prev. 101 | fake_draws_empty <- list(a = 1) 102 | fake_fit_empty <- list( 103 | draws = function() { 104 | return(fake_draws_empty) 105 | } 106 | ) 107 | class(fake_fit_empty) <- "CmdStanFit" 108 | fake_stanfoot_empty <- list(fit = fake_fit_empty) 109 | class(fake_stanfoot_empty) <- "stanFoot" 110 | 111 | expect_error( 112 | foot_prob(fake_fit_empty, dummy_data), 113 | "does not contain 'y_prev' or 'diff_y_prev'" 114 | ) 115 | 116 | expect_error( 117 | foot_prob(fake_stanfoot_empty, dummy_data), 118 | "does not contain 'y_prev' or 'diff_y_prev'" 119 | ) 120 | 121 | # No 'stanfit', 'CmdStanFit', 'stanFoot' or 'list'. 122 | class(fake_fit_empty) <- "ggplot2" 123 | 124 | expect_error( 125 | foot_prob(fake_fit_empty, dummy_data), 126 | "Provide one among these four model fit classes: 'stanfit', 'CmdStanFit', 'stanFoot' or 'list'." 127 | ) 128 | }) 129 | 130 | 131 | # ____________________________________________________________________________ 132 | # Arguments checks #### 133 | 134 | 135 | test_that("foot_prob errors when home_team and away_team lengths mismatch", { 136 | skip_on_cran() 137 | skip_if_not(stan_cmdstan_exists()) 138 | 139 | ## ............................................................................ 140 | ## Data #### 141 | 142 | data("england") 143 | england <- as.data.frame(england) 144 | 145 | # One season only 146 | england_2004 <- england %>% 147 | dplyr::filter(division == 1) %>% 148 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 149 | dplyr::filter(Season == "2004") 150 | 151 | colnames(england_2004) <- c( 152 | "periods", "home_team", "away_team", 153 | "home_goals", "away_goals" 154 | ) 155 | 156 | ## ............................................................................ 157 | ## Tests #### 158 | 159 | model_pois <- stan_foot( 160 | data = england_2004, 161 | model = "double_pois", 162 | predict = 30, 163 | iter_sampling = 200, 164 | chains = 2, 165 | seed = 433 166 | ) 167 | 168 | # Correct 169 | expect_error( 170 | foot_prob( 171 | object = model_pois, data = england_2004, 172 | home_team = "Arsenal", 173 | away_team = "Everton" 174 | ), NA 175 | ) 176 | 177 | 178 | # There is not any out-of-sample match:Arsenal-Chelsea 179 | expect_error( 180 | foot_prob( 181 | object = model_pois, data = england_2004, 182 | home_team = "Arsenal", 183 | away_team = "Chelsea" 184 | ) 185 | ) 186 | 187 | # home_team and away_team lengths mismatch 188 | expect_error( 189 | foot_prob( 190 | object = model_pois, data = england_2004, 191 | home_team = "Arsenal", 192 | away_team = c("Chelsea", "Liverpool") 193 | ), 194 | "Please, include the same number for home and away teams." 195 | ) 196 | 197 | # No predictions 198 | model_pois_no_pred <- stan_foot( 199 | data = england_2004, 200 | model = "double_pois", 201 | iter_sampling = 200, 202 | chains = 2, 203 | seed = 433 204 | ) 205 | 206 | expect_error( 207 | foot_prob( 208 | object = model_pois_no_pred, data = england_2004), 209 | ) 210 | }) 211 | 212 | 213 | test_that("foot_prob errors when predict is zero for list objects", { 214 | skip_on_cran() 215 | skip_if_not(stan_cmdstan_exists()) 216 | 217 | ## ............................................................................ 218 | ## Data #### 219 | 220 | data("england") 221 | england <- as.data.frame(england) 222 | 223 | # One season only 224 | england_2004 <- england %>% 225 | dplyr::filter(division == 1) %>% 226 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 227 | dplyr::filter(Season == "2004") 228 | 229 | colnames(england_2004) <- c( 230 | "periods", "home_team", "away_team", 231 | "home_goals", "away_goals" 232 | ) 233 | 234 | ## ............................................................................ 235 | ## Tests #### 236 | 237 | model_pois <- stan_foot( 238 | data = england_2004, 239 | model = "double_pois", 240 | iter_sampling = 200, 241 | chains = 2, 242 | seed = 433 243 | ) 244 | 245 | expect_error( 246 | foot_prob(model_pois, england_2004), 247 | ) 248 | }) 249 | 250 | 251 | # ____________________________________________________________________________ 252 | # MLE models #### 253 | 254 | test_that("MLE models", { 255 | skip_on_cran() 256 | 257 | ## ............................................................................ 258 | ## Data #### 259 | 260 | data("england") 261 | england <- as.data.frame(england) 262 | 263 | # One season only 264 | england_2004 <- england %>% 265 | dplyr::filter(division == 1) %>% 266 | dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 267 | dplyr::filter(Season == "2004") 268 | 269 | colnames(england_2004) <- c( 270 | "periods", "home_team", "away_team", 271 | "home_goals", "away_goals" 272 | ) 273 | 274 | ## ............................................................................ 275 | ## Tests #### 276 | 277 | model <- mle_foot(england_2004, "double_pois", predict = 180, interval = "Wald") 278 | expect_error(foot_prob(object = model, data = england_2004), NA) 279 | 280 | model <- mle_foot(england_2004, "biv_pois", predict = 180) 281 | expect_error(foot_prob(object = model, data = england_2004), NA) 282 | 283 | model <- mle_foot(england_2004, "skellam", predict = 180) 284 | expect_error(foot_prob(object = model, data = england_2004), NA) 285 | }) 286 | # require(dplyr) 287 | # 288 | # data("italy") 289 | # italy_2000<- italy %>% 290 | # dplyr::select(Season, home, visitor, hgoal,vgoal) %>% 291 | # filter(Season=="2000") 292 | # 293 | # test_that("foot_prob gets errors", { 294 | # 295 | # # different numbers of home_team and away team 296 | # fit2 <- stan_foot(italy_2000, "biv_pois", predict = 36, iter = 200) 297 | # expect_error(foot_prob(fit2, italy_2000, home_team = c("Hellas Verona", "US Lecce"), 298 | # away_team = c("AC Perugia"))) 299 | # 300 | # # no predict 301 | # ## in stan_foot 302 | # fit3 <- stan_foot(italy_2000, "biv_pois", iter = 200) 303 | # expect_error(foot_prob(fit3, italy_2000)) 304 | # 305 | # 306 | # ## mle_foot 307 | # fit4 <- mle_foot(italy_2000, "biv_pois") 308 | # expect_error(foot_prob(fit4, italy_2000)) 309 | # 310 | # # wrong class 311 | # y <- italy_2000$hgoal 312 | # fit5 <- lm(y~ rnorm(length(y))) 313 | # expect_error(foot_prob(fit5, italy_2000)) 314 | # 315 | # }) 316 | -------------------------------------------------------------------------------- /src/stan/neg_bin_dynamic.stan: -------------------------------------------------------------------------------- 1 | data{ 2 | int N; // number of games 3 | int N_prev; 4 | array[N,2] int y; 5 | int nteams; 6 | array[N] int team1; 7 | array[N] int team2; 8 | array[N_prev] int team1_prev; 9 | array[N_prev] int team2_prev; 10 | int ntimes; // dynamic periods 11 | array[ntimes] int time; 12 | array[N] int instants; 13 | array[N_prev] int instants_prev; 14 | array[N] int instants_rank; 15 | int ntimes_rank; // dynamic periods for ranking 16 | matrix[ntimes_rank,nteams] ranking; // eventual fifa/uefa ranking 17 | int ind_home; 18 | int ind_common_sigma; 19 | real mean_home; // Mean for home effect 20 | real sd_home; // Standard deviation for home effect 21 | 22 | 23 | // priors part 24 | int prior_dist_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 25 | int prior_dist_sd_num; // 1 gaussian, 2 t, 3 cauchy, 4 laplace 26 | 27 | real hyper_df; 28 | real hyper_location; 29 | 30 | real hyper_sd_df; 31 | real hyper_sd_location; 32 | real hyper_sd_scale; 33 | 34 | // commensurate prior 35 | int ind_comm_prior; 36 | real mu_spike; 37 | real sd_spike; 38 | real mu_slab; 39 | real sd_slab; 40 | real p_spike; 41 | } 42 | 43 | transformed data { 44 | real lognc_spike = normal_lccdf(0 | mu_spike, sd_spike); // \Phi(mu_spike / sd_spike) 45 | real lognc_slab = normal_lccdf(0 | mu_slab, sd_slab); // \Phi(mu_slab / sd_slab) 46 | } 47 | 48 | parameters{ 49 | matrix[ntimes, nteams] att_raw; // Raw attack ability 50 | matrix[ntimes, nteams] def_raw; // Raw defense ability 51 | vector[nteams] home; 52 | real gamma; 53 | array[(ind_comm_prior|| ind_common_sigma) ? 0 : 1] real sigma_att; // Evolution variance Egidi (2018) 54 | array[(ind_comm_prior|| ind_common_sigma) ? 0 : 1] real sigma_def; // Evolution variance Egidi (2018) 55 | array[(ind_comm_prior|| !ind_common_sigma) ? 0 : 1] real sigma_common; // Evolution variance Owen (2011) 56 | vector[ind_comm_prior? ntimes : 0] comm_prec_att; // Commensurability parameter 57 | vector[ind_comm_prior? ntimes : 0] comm_prec_def; // Commensurability parameter 58 | 59 | // Negative Binomial dispersion parameters 60 | real phi1; 61 | real phi2; 62 | } 63 | 64 | transformed parameters { 65 | matrix[ntimes, nteams] att; // attack abilities 66 | matrix[ntimes, nteams] def; // defense abilities 67 | matrix[ntimes, nteams] mu_att; // attack hyperparameter 68 | matrix[ntimes, nteams] mu_def; // defense hyperparameter 69 | vector[nteams] adj_h_eff; 70 | array[N] vector[2] theta; 71 | 72 | // Sum-to-zero constraint for attack/defense parameters 73 | att[1]=att_raw[1]-mean(att_raw[1]); 74 | def[1]=def_raw[1]-mean(def_raw[1]); 75 | for (t in 2:ntimes){ 76 | att[t]=att_raw[t]-mean(att_raw[t]); 77 | def[t]=def_raw[t]-mean(def_raw[t]); 78 | } 79 | // Lagged prior mean for attack/defense parameters 80 | for (t in 2:(ntimes)){ 81 | mu_att[1]=rep_row_vector(hyper_location,nteams); 82 | mu_att[t]=att[t-1]; 83 | //rep_row_vector(0,nteams); 84 | 85 | mu_def[1]=rep_row_vector(hyper_location,nteams); 86 | mu_def[t]=def[t-1]; 87 | //rep_row_vector(0,nteams); 88 | } 89 | 90 | adj_h_eff = home * ind_home; 91 | 92 | for (n in 1:N){ 93 | theta[n,1] = exp(adj_h_eff[team1[n]]+att[instants[n], team1[n]]+def[instants[n], team2[n]]+ 94 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 95 | theta[n,2] = exp(att[instants[n], team2[n]]+def[instants[n], team1[n]]- 96 | (gamma/2)*(ranking[instants_rank[n],team1[n]]-ranking[instants_rank[n],team2[n]])); 97 | } 98 | 99 | vector[ind_comm_prior? ntimes : 0] comm_sd_att = inv_sqrt(comm_prec_att); // SD for commensurate prior 100 | vector[ind_comm_prior? ntimes : 0] comm_sd_def = inv_sqrt(comm_prec_def); // SD for commensurate prior 101 | 102 | } 103 | 104 | model { 105 | if(ind_comm_prior == 1){ 106 | for (i in 1:ntimes) { 107 | // Att 108 | target += log_mix( 109 | p_spike 110 | , normal_lpdf(comm_prec_att[i] | mu_spike, sd_spike) - lognc_spike 111 | , normal_lpdf(comm_prec_att[i] | mu_slab, sd_slab) - lognc_slab 112 | ); 113 | // Def 114 | target += log_mix( 115 | p_spike 116 | , normal_lpdf(comm_prec_def[i] | mu_spike, sd_spike) - lognc_spike 117 | , normal_lpdf(comm_prec_def[i] | mu_slab, sd_slab) - lognc_slab 118 | ); 119 | } 120 | 121 | // log-priors for team-specific abilities 122 | for (h in 1:(nteams)){ 123 | for (i in 1:ntimes){ 124 | if (prior_dist_num == 1 ){ 125 | target+= normal_lpdf(att_raw[i,h]| mu_att[i,h], comm_sd_att[i]); 126 | target+= normal_lpdf(def_raw[i,h]| mu_def[i,h], comm_sd_def[i]); 127 | } 128 | else if (prior_dist_num == 2 ){ 129 | target+= student_t_lpdf(att_raw[i,h]| hyper_df, mu_att[i,h], comm_sd_att[i]); 130 | target+= student_t_lpdf(def_raw[i,h]| hyper_df, mu_def[i,h], comm_sd_def[i]); 131 | } 132 | else if (prior_dist_num == 3 ){ 133 | target+= student_t_lpdf(att_raw[i,h]| 1, mu_att[i,h], comm_sd_att[i]); 134 | target+= student_t_lpdf(def_raw[i,h]| 1, mu_def[i,h], comm_sd_def[i]); 135 | } 136 | } 137 | } 138 | 139 | } 140 | else{ 141 | // First order autoregressive model Owen (2011) 142 | // log-priors for team-specific abilities 143 | for (h in 1:(nteams)){ 144 | for (i in 1:ntimes){ 145 | if (prior_dist_num == 1 ){ 146 | if(ind_common_sigma == 0) { 147 | target+= normal_lpdf(att_raw[i,h]| mu_att[i,h], sigma_att[1]); 148 | target+= normal_lpdf(def_raw[i,h]| mu_def[i,h], sigma_def[1]); 149 | } else { 150 | target+= normal_lpdf(att_raw[i,h]| mu_att[i,h], sigma_common[1]); 151 | target+= normal_lpdf(def_raw[i,h]| mu_def[i,h], sigma_common[1]); 152 | } 153 | } 154 | else if (prior_dist_num == 2 ){ 155 | if(ind_common_sigma == 0) { 156 | target+= student_t_lpdf(att_raw[i,h]| hyper_df, mu_att[i,h], sigma_att[1]); 157 | target+= student_t_lpdf(def_raw[i,h]| hyper_df, mu_def[i,h], sigma_def[1]); 158 | } else{ 159 | target+= student_t_lpdf(att_raw[i,h]| hyper_df, mu_att[i,h], sigma_common[1]); 160 | target+= student_t_lpdf(def_raw[i,h]| hyper_df, mu_def[i,h], sigma_common[1]); 161 | } 162 | } 163 | else if (prior_dist_num == 3 ){ 164 | if(ind_common_sigma == 0) { 165 | target+= student_t_lpdf(att_raw[i,h]| 1, mu_att[i,h], sigma_att[1]); 166 | target+= student_t_lpdf(def_raw[i,h]| 1, mu_def[i,h], sigma_def[1]); 167 | } else{ 168 | target+= student_t_lpdf(att_raw[i,h]| 1, mu_att[i,h], sigma_common[1]); 169 | target+= student_t_lpdf(def_raw[i,h]| 1, mu_def[i,h], sigma_common[1]); 170 | } 171 | } 172 | } 173 | } 174 | // log-hyperpriors for sd parameters 175 | if (prior_dist_sd_num == 1 ){ 176 | if(ind_common_sigma == 0) { 177 | target+=normal_lpdf(sigma_att[1]|hyper_sd_location, hyper_sd_scale); 178 | target+=normal_lpdf(sigma_def[1]|hyper_sd_location, hyper_sd_scale); 179 | } else { 180 | target+=normal_lpdf(sigma_common[1]|hyper_sd_location, hyper_sd_scale); 181 | } 182 | } 183 | else if (prior_dist_sd_num == 2){ 184 | if(ind_common_sigma == 0) { 185 | target+=student_t_lpdf(sigma_att[1]|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 186 | target+=student_t_lpdf(sigma_def[1]|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 187 | } else { 188 | target+=student_t_lpdf(sigma_common[1]|hyper_sd_df, hyper_sd_location, hyper_sd_scale); 189 | } 190 | } 191 | else if (prior_dist_sd_num == 3){ 192 | if(ind_common_sigma == 0) { 193 | target+=cauchy_lpdf(sigma_att[1]|hyper_sd_location, hyper_sd_scale); 194 | target+=cauchy_lpdf(sigma_def[1]|hyper_sd_location, hyper_sd_scale); 195 | } else { 196 | target+=cauchy_lpdf(sigma_common[1]|hyper_sd_location, hyper_sd_scale); 197 | } 198 | } 199 | else if (prior_dist_sd_num == 4){ 200 | if(ind_common_sigma == 0) { 201 | target+=double_exponential_lpdf(sigma_att[1]|hyper_sd_location, hyper_sd_scale); 202 | target+=double_exponential_lpdf(sigma_def[1]|hyper_sd_location, hyper_sd_scale); 203 | } 204 | else { 205 | target+=double_exponential_lpdf(sigma_common[1]|hyper_sd_location, hyper_sd_scale); 206 | } 207 | } 208 | } 209 | 210 | // log-priors fixed effects 211 | target+=normal_lpdf(home|mean_home,sd_home); 212 | target+=normal_lpdf(gamma|0,1); 213 | 214 | // 4) Priors on NB dispersion 215 | target += normal_lpdf(phi1 | 0, 5); 216 | target += normal_lpdf(phi2 | 0, 5); 217 | 218 | // 5) Likelihood: marginal NB2 for each margin + shared latent component 219 | for (n in 1:N) { 220 | target += neg_binomial_2_lpmf(y[n,1] | theta[n,1], phi1); 221 | target += neg_binomial_2_lpmf(y[n,2] | theta[n,2], phi2); 222 | } 223 | } 224 | 225 | generated quantities { 226 | array[N,2] int y_rep; 227 | array[N_prev,2] int y_prev; 228 | array[N_prev] vector[2] theta_prev; 229 | vector[N] log_lik; 230 | array[N] int diff_y_rep; 231 | 232 | for (n in 1:N) { 233 | y_rep[n,1] = neg_binomial_2_rng(theta[n,1], phi1); 234 | y_rep[n,2] = neg_binomial_2_rng(theta[n,2], phi2); 235 | diff_y_rep[n] = y_rep[n,1] - y_rep[n,2]; 236 | 237 | log_lik[n] = neg_binomial_2_lpmf(y[n,1] | theta[n,1], phi1) 238 | + neg_binomial_2_lpmf(y[n,2] | theta[n,2], phi2); 239 | } 240 | 241 | //out-of-sample predictions 242 | if (N_prev > 0) { 243 | for (n in 1:N_prev){ 244 | theta_prev[n,1] = exp(adj_h_eff[team1_prev[n]]+att[instants_prev[n],team1_prev[n]]+ 245 | def[instants_prev[n], team2_prev[n]]+ 246 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 247 | theta_prev[n,2] = exp(att[instants_prev[n],team2_prev[n]]+ 248 | def[instants_prev[n], team1_prev[n]]- 249 | (gamma/2)*(ranking[instants_rank[N],team1_prev[n]]-ranking[instants_rank[N],team2_prev[n]])); 250 | y_prev[n,1] = neg_binomial_2_rng(theta_prev[n,1], phi1); 251 | y_prev[n,2] = neg_binomial_2_rng(theta_prev[n,2], phi2); 252 | } 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /R/pp_foot.R: -------------------------------------------------------------------------------- 1 | #' Posterior predictive checks for football models 2 | #' 3 | #' The function provides posterior predictive plots to check the adequacy of the Bayesian models as 4 | #' returned by the \code{stan_foot} function. 5 | #' 6 | #' @param object An object either of class \code{stanFoot}, \code{CmdStanFit}, \code{stanfit}. 7 | #' @param data A data frame containing match data with columns: 8 | #' \itemize{ 9 | #' \item \code{periods}: Time point of each observation (integer >= 1). 10 | #' \item \code{home_team}: Home team's name (character string). 11 | #' \item \code{away_team}: Away team's name (character string). 12 | #' \item \code{home_goals}: Goals scored by the home team (integer >= 0). 13 | #' \item \code{away_goals}: Goals scored by the away team (integer >= 0). 14 | #' } 15 | #' @param type Type of plots, one among \code{"aggregated"} or \code{"matches"}. Default is \code{"aggregated"}. 16 | #' @param coverage Argument to specify the width \eqn{1-\alpha} of posterior probability intervals. Default is 0.95. 17 | #' 18 | #' @return A list with elements: 19 | #' \itemize{ 20 | #' \item{\code{pp_plot}}: A \code{ggplot} object for the selected \code{type} of plot. 21 | #' \item{\code{pp_table}}: A data frame of summary statistics: 22 | #' \itemize{ 23 | #' \item For \code{"aggregated"}: Goal differences and their Bayesian p‑values. 24 | #' \item For \code{"matches"}: Nominal \code{1-alpha} and observed empirical Bayesian coverage. 25 | #' } 26 | #' } 27 | #' 28 | #' @details 29 | #' 30 | #' Posterior predictive plots: when \code{"aggregated"} (default) is selected, the function 31 | #' returns a frequency plot for some pre-selected goal-difference values, 32 | #' along with their correspondent Bayesian p-values, computed as 33 | #' \eqn{Pr(y_{rep} \ge y)|y)}, where \eqn{y_{rep}} is a data replication from the 34 | #' posterior predictive distribution (more details in Gelman et al., 2013). 35 | #' Bayesian p-values very close to 0 or 1 could exhibit 36 | #' possible model misfits. 37 | #' 38 | #' When \code{"matches"} is selected an ordered-frequency plot for all the 39 | #' goal-differences in the considered matches is provided, along with the 40 | #' empirical Bayesian coverage at level \eqn{1-\alpha}. 41 | #' 42 | #' @author Leonardo Egidi \email{legidi@units.it} and Roberto Macrì Demartino \email{roberto.macridemartino@deams.units.it} 43 | #' 44 | #' @references 45 | #' 46 | #' Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., & Rubin, D. B. (2013). Bayesian data analysis. CRC press. 47 | #' 48 | #' 49 | #' @examples 50 | #' \dontrun{ 51 | #' if (instantiate::stan_cmdstan_exists()) { 52 | #' library(dplyr) 53 | #' 54 | #' data("italy") 55 | #' italy_2000 <- italy %>% 56 | #' dplyr::select(Season, home, visitor, hgoal, vgoal) %>% 57 | #' dplyr::filter(Season == "2000") 58 | #' 59 | #' colnames(italy_2000) <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 60 | #' 61 | #' fit <- stan_foot(italy_2000, "double_pois", iter_sampling = 200) 62 | #' 63 | #' pp_foot(fit, italy_2000) 64 | #' } 65 | #' } 66 | #' 67 | #' @importFrom ggplot2 ggplot aes geom_point labs scale_colour_manual annotate 68 | #' scale_x_discrete scale_x_continuous scale_y_continuous dup_axis geom_ribbon geom_line 69 | #' theme_bw theme rel 70 | #' @importFrom matrixStats colMedians colVars colQuantiles 71 | #' @importFrom rstan extract 72 | #' @importFrom posterior as_draws_rvars draws_of 73 | #' @importFrom rlang .data 74 | #' 75 | #' @export 76 | 77 | 78 | pp_foot <- function(object, data, 79 | type = "aggregated", 80 | coverage = 0.95) { 81 | # ____________________________________________________________________________ 82 | # Data and argument checks #### 83 | 84 | type <- match.arg(type, c("aggregated", "matches")) 85 | 86 | required_cols <- c("periods", "home_team", "away_team", "home_goals", "away_goals") 87 | missing_cols <- setdiff(required_cols, names(data)) 88 | if (length(missing_cols) > 0) { 89 | stop(paste("data is missing required columns:", paste(missing_cols, collapse = ", "))) 90 | } 91 | 92 | # Check if object is of class "stanFoot" or "stanfit" 93 | if (inherits(object, c("stanFoot", "CmdStanFit"))) { 94 | draws <- if (inherits(object, "stanFoot")) { 95 | object$fit$draws() 96 | } else { 97 | object$draws() # for CmdStanFit objects 98 | } 99 | draws <- posterior::as_draws_rvars(draws) 100 | if (!("diff_y_rep" %in% names(draws))) { 101 | stop("Model does not contain 'diff_y_rep' in its samples.") 102 | } 103 | sims <- list() 104 | sims$diff_y_rep <- posterior::draws_of(draws[["diff_y_rep"]]) 105 | } else if (inherits(object, "stanfit")) { 106 | sims <- rstan::extract(object) 107 | 108 | if (!("diff_y_rep" %in% names(sims))) { 109 | stop("Model does not contain 'diff_y_rep' in its samples.") 110 | } 111 | } else { 112 | stop("Provide one among these three model fit classes: 'stanfit', 'CmdStanFit', 'stanFoot'.") 113 | } 114 | 115 | 116 | # ____________________________________________________________________________ 117 | # Dataframes and plots #### 118 | 119 | y <- as.matrix(data[, 4:5]) 120 | goal_diff <- as.vector(y[, 1] - y[, 2]) 121 | goal_diff_rep <- sims$diff_y_rep 122 | esiti_short <- seq(-3, 3, 1) 123 | M <- dim(goal_diff_rep)[1] 124 | freq_rel_matrix <- matrix(NA, M, length(esiti_short)) 125 | ngames_train <- dim(goal_diff_rep)[2] 126 | 127 | 128 | if (type == "aggregated") { 129 | check.integer <- function(x) { 130 | x == round(x) 131 | } 132 | 133 | if (check.integer(median(goal_diff_rep)) == FALSE) { # student_t models adjustment 134 | goal_diff_rep <- round(goal_diff_rep, 0) 135 | } 136 | 137 | freq_rel_matrix <- t(apply(goal_diff_rep, 1, function(row_j) { 138 | counts <- table(factor(row_j, levels = esiti_short)) 139 | as.vector(counts) / ngames_train 140 | })) 141 | 142 | 143 | freq_rel_frame_add <- do.call(rbind, lapply(1:M, function(j) { 144 | data.frame(valori = esiti_short, rel = freq_rel_matrix[j, ]) 145 | })) 146 | 147 | 148 | freq_rel_obs <- sapply(esiti_short, function(x) { 149 | sum(goal_diff == x) / ngames_train 150 | }) 151 | 152 | frame <- data.frame(valori = esiti_short, rel = freq_rel_frame_add[, 2]) 153 | 154 | p <- ggplot(frame, aes(x = valori, y = rel)) + 155 | # Simulated points 156 | geom_point(position = "jitter", alpha = 0.2, aes(colour = "simulated")) + 157 | # Observed segments 158 | annotate("segment", 159 | x = -3 - 0.5, y = freq_rel_obs[1], 160 | xend = -3 + 0.5, yend = freq_rel_obs[1], 161 | colour = "#1E90FF", linewidth = 2 162 | ) + 163 | annotate("segment", 164 | x = -2 - 0.5, y = freq_rel_obs[2], 165 | xend = -2 + 0.5, yend = freq_rel_obs[2], 166 | colour = "#1E90FF", linewidth = 2 167 | ) + 168 | annotate("segment", 169 | x = -1 - 0.5, y = freq_rel_obs[3], 170 | xend = -1 + 0.5, yend = freq_rel_obs[3], 171 | colour = "#1E90FF", linewidth = 2 172 | ) + 173 | annotate("segment", 174 | x = 0 - 0.5, y = freq_rel_obs[4], 175 | xend = 0 + 0.5, yend = freq_rel_obs[4], 176 | colour = "#1E90FF", linewidth = 2 177 | ) + 178 | annotate("segment", 179 | x = 1 - 0.5, y = freq_rel_obs[5], 180 | xend = 1 + 0.5, yend = freq_rel_obs[5], 181 | colour = "#1E90FF", linewidth = 2 182 | ) + 183 | annotate("segment", 184 | x = 2 - 0.5, y = freq_rel_obs[6], 185 | xend = 2 + 0.5, yend = freq_rel_obs[6], 186 | colour = "#1E90FF", linewidth = 2 187 | ) + 188 | annotate("segment", 189 | x = 3 - 0.5, y = freq_rel_obs[7], 190 | xend = 3 + 0.5, yend = freq_rel_obs[7], 191 | colour = "#1E90FF", linewidth = 2 192 | ) + 193 | # Dummy layer to add legend for observed data: 194 | geom_point( 195 | data = data.frame(x = Inf, y = Inf), 196 | aes(x = .data$x, y = .data$y, colour = "observed"), 197 | size = 2 198 | ) + 199 | labs(x = "Goal difference", y = "Posterior predictive distribution") + 200 | scale_colour_manual( 201 | name = "", 202 | values = c(observed = "#1E90FF", simulated = "#FFA500"), 203 | labels = c("Observed", "Simulated") 204 | ) + 205 | scale_x_continuous(breaks = -3:3, limits = c(-4, 4)) + 206 | theme_bw() + 207 | theme( 208 | axis.title = element_text(size = 19), 209 | axis.text.x = element_text(size = 15), 210 | axis.text.y = element_text(size = 15), 211 | legend.position = "top", 212 | legend.text = element_text(size = 15) 213 | ) 214 | 215 | 216 | p_value <- sapply(seq_along(esiti_short), function(j) { 217 | round(sum(frame$rel[frame$valori == esiti_short[j]] >= freq_rel_obs[j]) / M, 3) 218 | }) 219 | 220 | tbl <- data.frame( 221 | `goal diff.` = esiti_short, 222 | `Bayesian p-value` = p_value, 223 | check.names = FALSE 224 | ) 225 | 226 | return(list(pp_plot = p, pp_table = tbl)) 227 | } else if (type == "matches") { 228 | scd <- as.numeric(as.vector(goal_diff))[1:ngames_train] 229 | scd_sims <- goal_diff_rep 230 | scd_hat <- matrixStats::colMedians(scd_sims) 231 | scd_se <- sqrt(matrixStats::colVars(scd_sims)) 232 | alpha <- coverage 233 | scd_ub <- matrixStats::colQuantiles(scd_sims, probs = 1 - (1 - alpha) / 2) 234 | scd_lb <- matrixStats::colQuantiles(scd_sims, probs = (1 - alpha) / 2) 235 | ci_alpha <- sum(scd < scd_ub & scd_lb < scd) / ngames_train 236 | ngames_train_draw <- sum(scd == 0) 237 | scd_draw <- scd[scd == 0] 238 | ci95_draw <- sum(scd_draw < scd_ub[scd == 0] & scd_lb[scd == 0] < scd_draw) / ngames_train_draw 239 | 240 | 241 | sort_scd <- scd[order(scd)] 242 | sort_scd_hat <- scd_hat[order(scd)] 243 | sort_scd_se <- scd_se[order(scd)] 244 | sort_scd_ub <- scd_ub[order(scd)] 245 | sort_scd_lb <- scd_lb[order(scd)] 246 | 247 | df <- data.frame(list( 248 | scd = sort_scd, scd_hat = sort_scd_hat, scd_se = sort_scd_se, 249 | scd_ub = sort_scd_ub, scd_lb = sort_scd_lb 250 | )) 251 | 252 | p <- ggplot(df, aes(x = c(1:ngames_train))) + 253 | geom_ribbon(aes(ymin = scd_lb, ymax = scd_ub), 254 | fill = "#FFA500" 255 | ) + 256 | geom_line(aes(y = scd_hat, colour = "simulated")) + 257 | geom_point(aes(y = scd, colour = "observed"), fill = "#1E90FF", size = 0.5) + 258 | scale_x_continuous(name = "games") + 259 | scale_y_continuous( 260 | name = "Goal difference", 261 | breaks = c(-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8), 262 | sec.axis = dup_axis() 263 | ) + 264 | scale_colour_manual( 265 | name = "", 266 | values = c(observed = "#1E90FF", simulated = "#FFA500"), 267 | labels = c("Observed", "Simulated") 268 | ) + 269 | theme_bw() + 270 | theme( 271 | axis.title = element_text(size = 19), 272 | axis.text.x = element_text(size = 15), 273 | axis.text.y = element_text(size = 15), 274 | legend.position = "top", 275 | legend.text = element_text(size = 15) 276 | ) 277 | 278 | tbl <- data.frame(alpha = coverage, coverage = round(ci_alpha, 3)) 279 | colnames(tbl) <- c("1-alpha", "emp. coverage") 280 | return(list(pp_plot = p, pp_table = tbl)) 281 | } 282 | } 283 | --------------------------------------------------------------------------------