├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── pkgdown.yaml ├── .gitignore ├── CRAN-RELEASE ├── CRAN-SUBMISSION ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── bayesdfa-package.R ├── converge_rhat.R ├── dfa_cv.R ├── dfa_fitted.R ├── dfa_loadings.R ├── dfa_trends.R ├── find_dfa_trends.R ├── find_regimes.R ├── find_swans.R ├── fit_dfa.R ├── fit_regimes.R ├── hmm_init.R ├── invert_chains.R ├── loo.R ├── plot_fitted.R ├── plot_loadings.R ├── plot_regime_model.R ├── plot_trends.R ├── predicted.R ├── print.R ├── rotate_trends.R ├── sim.R ├── stanmodels.R └── trend_cor.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── bayesdfa.Rproj ├── configure ├── configure.win ├── cran-comments.md ├── docs ├── 404.html ├── articles │ ├── a1_bayesdfa.html │ ├── a1_bayesdfa_files │ │ └── figure-html │ │ │ ├── fit-extreme-dfa-1.png │ │ │ ├── plot-1-fitted-example-1.png │ │ │ ├── plot-1-trend-1.png │ │ │ ├── plot-2-fitted-example-1.png │ │ │ ├── plot-extreme-loadings-1.png │ │ │ ├── plot-loadings-1.png │ │ │ ├── simulate-data-plot-1.png │ │ │ ├── simulate-data-plot2-1.png │ │ │ └── simulate-data-plot3-1.png │ ├── a2_combining_data.html │ ├── a2_combining_data_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-3-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-8-1.png │ │ │ └── unnamed-chunk-9-1.png │ ├── a3_covariates.html │ ├── a3_covariates_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-4-1.png │ │ │ └── unnamed-chunk-5-1.png │ ├── a4_smooth.html │ ├── a4_smooth_files │ │ └── figure-html │ │ │ └── unnamed-chunk-3-1.png │ ├── a5_estimate_process_sigma.html │ ├── a5_estimate_process_sigma_files │ │ └── figure-html │ │ │ └── simulate-data-plot-1.png │ ├── a6_compositional.html │ ├── a6_compositional_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-12-1.png │ │ │ └── unnamed-chunk-7-1.png │ ├── a7_bigdata.html │ ├── a7_bigdata_files │ │ └── figure-html │ │ │ └── unnamed-chunk-3-1.png │ └── index.html ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── extra.css ├── extra.ss ├── index.html ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── Rplot002.png │ ├── Rplot003.png │ ├── Rplot004.png │ ├── Rplot005.png │ ├── Rplot006.png │ ├── bayesdfa-package.html │ ├── dfa_cv.html │ ├── dfa_fitted.html │ ├── dfa_loadings.html │ ├── dfa_trends.html │ ├── find_dfa_trends.html │ ├── find_inverted_chains-1.png │ ├── find_inverted_chains.html │ ├── find_regimes.html │ ├── find_swans-1.png │ ├── find_swans-2.png │ ├── find_swans-3.png │ ├── find_swans.html │ ├── fit_dfa.html │ ├── fit_regimes.html │ ├── hmm_init.html │ ├── index.html │ ├── invert_chains.html │ ├── is_converged.html │ ├── loo.html │ ├── plot_fitted-1.png │ ├── plot_fitted-2.png │ ├── plot_fitted.html │ ├── plot_loadings-1.png │ ├── plot_loadings-2.png │ ├── plot_loadings-3.png │ ├── plot_loadings-4.png │ ├── plot_loadings.html │ ├── plot_regime_model-1.png │ ├── plot_regime_model-2.png │ ├── plot_regime_model-3.png │ ├── plot_regime_model.html │ ├── plot_trends-1.png │ ├── plot_trends.html │ ├── predicted.html │ ├── rotate_trends-1.png │ ├── rotate_trends.html │ ├── sim_dfa-1.png │ ├── sim_dfa-2.png │ ├── sim_dfa-3.png │ ├── sim_dfa-4.png │ ├── sim_dfa-5.png │ ├── sim_dfa-6.png │ ├── sim_dfa.html │ ├── trend_cor-1.png │ ├── trend_cor-2.png │ └── trend_cor.html └── sitemap.xml ├── inst ├── include │ └── stan_meta_header.hpp └── stan │ ├── corr.stan │ ├── dfa.stan │ ├── hmm_gaussian.stan │ ├── include │ └── license.stan │ └── regime_1.stan ├── man ├── bayesdfa-package.Rd ├── dfa_cv.Rd ├── dfa_fitted.Rd ├── dfa_loadings.Rd ├── dfa_trends.Rd ├── find_dfa_trends.Rd ├── find_inverted_chains.Rd ├── find_regimes.Rd ├── find_swans.Rd ├── fit_dfa.Rd ├── fit_regimes.Rd ├── hmm_init.Rd ├── invert_chains.Rd ├── is_converged.Rd ├── loo.Rd ├── plot_fitted.Rd ├── plot_loadings.Rd ├── plot_regime_model.Rd ├── plot_trends.Rd ├── predicted.Rd ├── rotate_trends.Rd ├── sim_dfa.Rd └── trend_cor.Rd ├── pkgdown └── extra.css ├── src ├── Makevars ├── Makevars.win ├── RcppExports-507982db.o.tmp ├── RcppExports.cpp ├── stanExports_corr.cc ├── stanExports_corr.h ├── stanExports_dfa-ec272765.o.tmp ├── stanExports_dfa.cc ├── stanExports_dfa.h ├── stanExports_hmm_gaussian.cc ├── stanExports_hmm_gaussian.h ├── stanExports_regime_1.cc └── stanExports_regime_1.h ├── tests ├── testthat.R └── testthat │ ├── test-chain-flipping.R │ └── test-fit.R └── vignettes ├── a1_bayesdfa.Rmd ├── a2_combining_data.Rmd ├── a3_covariates.Rmd ├── a4_smooth.Rmd ├── a5_estimate_process_sigma.Rmd ├── a6_compositional.Rmd └── a7_bigdata.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^CRAN-RELEASE$ 2 | ^cran-comments\.md$ 3 | cleanup* 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | ^README-* 7 | ^README\.Rmd 8 | ^appveyor\.yml$ 9 | ^Session.vim$ 10 | ^doc$ 11 | ^Meta$ 12 | ^\.github$ 13 | ^_pkgdown\.yml$ 14 | ^docs$ 15 | ^pkgdown$ 16 | ^vignettes\combining_data.Rmd 17 | ^vignettes\compositional.Rmd 18 | ^vignettes\covariates.Rmd 19 | ^vignettes\estimate_process_sigma.Rmd 20 | ^CRAN-SUBMISSION$ 21 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | if: "!contains(github.event.head_commit.message, 'ci skip')" 18 | runs-on: ${{ matrix.config.os }} 19 | 20 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | config: 26 | #- {os: windows-latest, r: 'release'} 27 | #- {os: macOS-latest, r: 'release'} 28 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 29 | #- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 30 | 31 | env: 32 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 33 | RSPM: ${{ matrix.config.rspm }} 34 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 35 | 36 | steps: 37 | - uses: actions/checkout@v2 38 | 39 | - uses: r-lib/actions/setup-r@v1 40 | with: 41 | r-version: ${{ matrix.config.r }} 42 | 43 | - uses: r-lib/actions/setup-pandoc@v1 44 | 45 | - name: Query dependencies 46 | run: | 47 | install.packages('remotes') 48 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 49 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 50 | shell: Rscript {0} 51 | 52 | - name: Cache R packages 53 | if: runner.os != 'Windows' 54 | uses: actions/cache@v2 55 | with: 56 | path: ${{ env.R_LIBS_USER }} 57 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 58 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 59 | 60 | - name: Install system dependencies 61 | if: runner.os == 'Linux' 62 | run: | 63 | while read -r cmd 64 | do 65 | eval sudo $cmd 66 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') 67 | 68 | - name: Install dependencies 69 | run: | 70 | remotes::install_deps(dependencies = TRUE) 71 | remotes::install_cran("rcmdcheck") 72 | shell: Rscript {0} 73 | 74 | - name: Check 75 | env: 76 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 77 | run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 78 | #run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--ignore-vignettes", "--no-build-vignettes"), build_args = c("--no-manual", "--ignore-vignettes", "--no-build-vignettes"), error_on = "warning", check_dir = "check") 79 | shell: Rscript {0} 80 | 81 | - name: Upload check results 82 | if: failure() 83 | uses: actions/upload-artifact@main 84 | with: 85 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 86 | path: check 87 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: master 4 | 5 | name: pkgdown 6 | 7 | jobs: 8 | pkgdown: 9 | runs-on: macOS-latest 10 | env: 11 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 12 | steps: 13 | - uses: actions/checkout@v2 14 | 15 | - uses: r-lib/actions/setup-r@master 16 | 17 | - uses: r-lib/actions/setup-pandoc@master 18 | 19 | - name: Query dependencies 20 | run: | 21 | install.packages('remotes') 22 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 23 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 24 | shell: Rscript {0} 25 | 26 | - name: Cache R packages 27 | uses: actions/cache@v2 28 | with: 29 | path: ${{ env.R_LIBS_USER }} 30 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 31 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 32 | 33 | - name: Install dependencies 34 | run: | 35 | remotes::install_deps(dependencies = TRUE) 36 | install.packages("pkgdown") 37 | shell: Rscript {0} 38 | 39 | - name: Install package 40 | run: R CMD INSTALL . 41 | 42 | - name: Deploy package 43 | run: | 44 | git config --local user.email "actions@github.com" 45 | git config --local user.name "GitHub Actions" 46 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | *.html 6 | vignettes/*.R 7 | vignettes/*.html 8 | .Rapp.history 9 | *.o 10 | *.so 11 | *.bak 12 | .DS_Store 13 | *.html 14 | .DS_Store 15 | doc 16 | Meta 17 | docs 18 | *.pdf 19 | -------------------------------------------------------------------------------- /CRAN-RELEASE: -------------------------------------------------------------------------------- 1 | This package was submitted to CRAN on 2021-09-28. 2 | Once it is accepted, delete this file and tag the release (commit 30f4717). 3 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 1.3.3 2 | Date: 2024-02-26 18:50:27 UTC 3 | SHA: 4c684c61d6d299a4ff73b8b960e640ca353b1fe9 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bayesdfa 2 | Type: Package 3 | Title: Bayesian Dynamic Factor Analysis (DFA) with 'Stan' 4 | Version: 1.3.4 5 | Authors@R: c( 6 | person(c("Eric", "J."), "Ward", role = c("aut", "cre"), 7 | email = "eric.ward@noaa.gov"), 8 | person(c("Sean", "C."), "Anderson", role = "aut"), 9 | person(c("Luis", "A."), "Damiano", role = "aut"), 10 | person(c("Michael", "J."), "Malick", role="aut"), 11 | person(c("Philina", "A."), "English", role="aut"), 12 | person(c("Mary", "E."), "Hunsicker,", role = "ctb"), 13 | person(c("Mike", "A."), "Litzow", role = "ctb"), 14 | person(c("Mark", "D."), "Scheuerell", role = "ctb"), 15 | person(c("Elizabeth", "E."), "Holmes", role = "ctb"), 16 | person(c("Nick",""), "Tolimieri", role = "ctb"), 17 | person("Trustees of", "Columbia University", role = "cph")) 18 | Description: Implements Bayesian dynamic factor analysis with 'Stan'. Dynamic 19 | factor analysis is a dimension reduction tool for multivariate time series. 20 | 'bayesdfa' extends conventional dynamic factor models in several ways. 21 | First, extreme events may be estimated in the latent trend by modeling 22 | process error with a student-t distribution. Second, alternative constraints 23 | (including proportions are allowed). Third, the estimated 24 | dynamic factors can be analyzed with hidden Markov models to evaluate 25 | support for latent regimes. 26 | License: GPL (>=3) 27 | Encoding: UTF-8 28 | Depends: 29 | R (>= 3.5.0) 30 | Imports: 31 | dplyr, 32 | ggplot2, 33 | loo (>= 2.7.0), 34 | methods, 35 | mgcv (>= 1.8.13), 36 | Rcpp (>= 0.12.0), 37 | reshape2, 38 | rlang, 39 | rstan (>= 2.26.0), 40 | splines, 41 | viridisLite 42 | LinkingTo: 43 | BH (>= 1.66.0), 44 | Rcpp (>= 0.12.0), 45 | RcppEigen (>= 0.3.3.3.0), 46 | RcppParallel (>= 5.0.1), 47 | rstan (>= 2.26.0), 48 | StanHeaders (>= 2.26.0) 49 | Suggests: 50 | testthat, 51 | parallel, 52 | knitr, 53 | rmarkdown, 54 | rstantools 55 | URL: https://fate-ewi.github.io/bayesdfa/ 56 | BugReports: https://github.com/fate-ewi/bayesdfa/issues 57 | RoxygenNote: 7.3.2 58 | VignetteBuilder: knitr 59 | Roxygen: list(markdown = TRUE) 60 | SystemRequirements: GNU make 61 | Biarch: true 62 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(loo,bayesdfa) 4 | S3method(print,bayesdfa) 5 | export(dfa_cv) 6 | export(dfa_fitted) 7 | export(dfa_loadings) 8 | export(dfa_trends) 9 | export(find_dfa_trends) 10 | export(find_inverted_chains) 11 | export(find_regimes) 12 | export(find_swans) 13 | export(fit_dfa) 14 | export(fit_regimes) 15 | export(invert_chains) 16 | export(is_converged) 17 | export(loo) 18 | export(plot_fitted) 19 | export(plot_loadings) 20 | export(plot_regime_model) 21 | export(plot_trends) 22 | export(predicted) 23 | export(rotate_trends) 24 | export(sim_dfa) 25 | export(trend_cor) 26 | import(Rcpp) 27 | import(methods) 28 | importFrom(dplyr,"%>%") 29 | importFrom(ggplot2,aes_string) 30 | importFrom(ggplot2,coord_flip) 31 | importFrom(ggplot2,element_blank) 32 | importFrom(ggplot2,element_line) 33 | importFrom(ggplot2,element_text) 34 | importFrom(ggplot2,facet_wrap) 35 | importFrom(ggplot2,geom_errorbar) 36 | importFrom(ggplot2,geom_hline) 37 | importFrom(ggplot2,geom_line) 38 | importFrom(ggplot2,geom_point) 39 | importFrom(ggplot2,geom_ribbon) 40 | importFrom(ggplot2,geom_violin) 41 | importFrom(ggplot2,ggplot) 42 | importFrom(ggplot2,ggtitle) 43 | importFrom(ggplot2,position_dodge) 44 | importFrom(ggplot2,scale_color_manual) 45 | importFrom(ggplot2,theme) 46 | importFrom(ggplot2,theme_bw) 47 | importFrom(ggplot2,xlab) 48 | importFrom(ggplot2,ylab) 49 | importFrom(graphics,lines) 50 | importFrom(graphics,par) 51 | importFrom(graphics,plot) 52 | importFrom(graphics,points) 53 | importFrom(graphics,polygon) 54 | importFrom(graphics,segments) 55 | importFrom(loo,extract_log_lik) 56 | importFrom(loo,loo) 57 | importFrom(mgcv,s) 58 | importFrom(mgcv,smooth2random) 59 | importFrom(mgcv,smoothCon) 60 | importFrom(rlang,.data) 61 | importFrom(rstan,extract) 62 | importFrom(rstan,optimizing) 63 | importFrom(rstan,sampling) 64 | importFrom(rstan,vb) 65 | importFrom(splines,bs) 66 | importFrom(splines,splineDesign) 67 | importFrom(stats,dist) 68 | importFrom(stats,dnorm) 69 | importFrom(stats,gaussian) 70 | importFrom(stats,kmeans) 71 | importFrom(stats,median) 72 | importFrom(stats,na.omit) 73 | importFrom(stats,pnorm) 74 | importFrom(stats,quantile) 75 | importFrom(stats,rlnorm) 76 | importFrom(stats,rnorm) 77 | importFrom(stats,rt) 78 | importFrom(stats,runif) 79 | importFrom(stats,sd) 80 | importFrom(stats,time) 81 | importFrom(stats,var) 82 | importFrom(stats,varimax) 83 | importFrom(utils,combn) 84 | importFrom(viridisLite,viridis) 85 | useDynLib(bayesdfa, .registration = TRUE) 86 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # bayesdfa 0.1.0 2 | 3 | * Initial submission to CRAN. 4 | 5 | # bayesdfa 0.1.1 6 | 7 | * Changed Makevars per exchange with Stan developers. 8 | 9 | # bayesdfa 0.1.2 10 | 11 | * Changed find_inverted_chains() and invert_chains() to be compatible with dplyr 0.8 release. Specifically, removed deprecated group_by_() and summarise_() functions and changed code to remove unused factor levels. 12 | 13 | # bayesdfa 0.1.3 14 | 15 | * Changed Makevars and Makevars.win to rely on CXX_STD = CXX14 from CXX11. Also added vignette related to inclusion of covariates 16 | 17 | # bayesdfa 0.1.5 18 | 19 | * Added additional functionality to relax limits on AR(1) parameter (phi), MA(1) parameter (theta), and flexibility in estimated the standard deviation of latent trends. Also modified the data object passed in to be either a wide matrix (as previously done) or a long format data frame. The latter allows for multiple observations / time step. Finally, an additional and alternative constraint was introduced for Z, allowing elements to be modeled as a Dirchlet process, rather than conventional DFA. 20 | 21 | # bayesdfa 0.1.6 22 | 23 | * Removed warning related to vignette and noLD test 24 | 25 | # bayesdfa 0.1.7 26 | 27 | * Added non-gaussian families (poisson, negative binomial, bernoulli, Gamma, lognormal). Also included a function for doing cross validation and calculating the expected log posterior density. Another new feature included smooth models (Gaussian process, B-splines) as alternative models for trends conventionally modeled as random walks. Added functions dfa_trends(), dfa_loadings() and dfa_fitted() for extracting trends, loadings, and fitted values. 28 | 29 | # bayesdfa 1.0.0 30 | 31 | * Added constraint on diagonal of Z matrix to keep parameter estimates from 'flipping' within MCMC chains. Ensures convergence for problematic cases. This was present in 0.1.1, but later removed. 32 | 33 | # bayesdfa 1.1.0 34 | 35 | * Following 1.0.0, included a new argument to fit_dfa() function 'expansion_prior' that allows user to toggle on / off the constraint. If not included (default=FALSE), there is no constraint on the Z diagonal, and post-hoc MCMC chain inverting resolves identifiability. If 'expansion_prior' = TRUE, then the positive constraint is applied, in combination with the expansion prior for trends and loadings. 36 | 37 | # bayesdfa 1.2.0 38 | 39 | Add penalized spline models, so that the 'trend_model' argument may take on 40 | "rw" for conventional random walks, "bs" for B-splines, "ps" for "P-splines", 41 | or "gp" for Gaussian processes 42 | 43 | # bayesdfa 1.3.0 44 | 45 | Change to new Stan syntax 46 | 47 | # bayesdfa 1.3.1 48 | 49 | Versioning 50 | 51 | # bayesdfa 1.3.2 52 | 53 | - Add compatibility with new rstan 54 | - Changed weights argument to 'inv_var_weights' and 'likelihood_weights' for the glmmTMB/sdmTMB/brms style 55 | 56 | # bayesdfa 1.3.3 57 | 58 | - Add compatibility with new loo 2.7 59 | 60 | # bayesdfa 1.3.4 61 | 62 | - Update Stan linking 63 | -------------------------------------------------------------------------------- /R/bayesdfa-package.R: -------------------------------------------------------------------------------- 1 | #' The 'bayesdfa' package. 2 | #' 3 | #' @description A DESCRIPTION OF THE PACKAGE 4 | #' 5 | #' @name bayesdfa-package 6 | #' @aliases bayesdfa 7 | #' @useDynLib bayesdfa, .registration = TRUE 8 | #' @import methods 9 | #' @import Rcpp 10 | #' @importFrom rstan sampling 11 | #' 12 | #' @references 13 | #' Stan Development Team (2020). RStan: the R interface to Stan. R package version 2.21.2. https://mc-stan.org 14 | #' 15 | NULL 16 | -------------------------------------------------------------------------------- /R/converge_rhat.R: -------------------------------------------------------------------------------- 1 | #' Summarize Rhat convergence statistics across parameters 2 | #' 3 | #' Pass in `rstanfit` model object, and a threshold Rhat value for 4 | #' convergence. Returns boolean. 5 | #' 6 | #' @param fitted_model Samples extracted (with `permuted = FALSE`) from a Stan 7 | #' model. E.g. output from [invert_chains()]. 8 | #' @param threshold Threshold for maximum Rhat. 9 | #' @param parameters Vector of parameters to be included in convergence determination. Defaults = c("sigma","x","Z"). Other elements can be added including "pred", "log_lik", or "lp__" 10 | #' @export 11 | #' 12 | is_converged <- function(fitted_model, 13 | threshold = 1.05, 14 | parameters = c("sigma", "x", "Z")) { 15 | Rhats <- 16 | fitted_model$monitor[which(grepl( 17 | paste(parameters, collapse = "|"), 18 | rownames(fitted_model$monitor) 19 | ) == TRUE), "Rhat"] 20 | 21 | max(Rhats, na.rm = TRUE) < threshold 22 | } 23 | -------------------------------------------------------------------------------- /R/dfa_fitted.R: -------------------------------------------------------------------------------- 1 | #' Get the fitted values from a DFA as a data frame 2 | #' 3 | #' @param modelfit Output from \code{\link{fit_dfa}}. 4 | #' @param conf_level Probability level for CI. 5 | #' @param names Optional vector of names for time series labels. Should be same length as the number of time series. 6 | #' 7 | #' @export 8 | #' @return A data frame with the following columns: `ID` is an identifier for each time series, `time` is the time step, `y` is the observed values standardized to mean 0 and unit variance, `estimate` is the mean fitted value, `lower` is the lower CI, and `upper` is the upper CI. 9 | #' 10 | #' @seealso predicted plot_fitted fit_dfa 11 | #' 12 | #' @examples 13 | #' \donttest{ 14 | #' y <- sim_dfa(num_trends = 2, num_years = 20, num_ts = 4) 15 | #' m <- fit_dfa(y = y$y_sim, num_trends = 2, iter = 50, chains = 1) 16 | #' fitted <- dfa_fitted(m) 17 | #' } 18 | dfa_fitted <- function(modelfit, conf_level = 0.95, names = NULL) { 19 | 20 | # pred and Y have same dimensions if data is wide 21 | pred <- predicted(modelfit) 22 | n_mcmc <- dim(pred)[1] 23 | n_chains <- dim(pred)[2] 24 | n_years <- dim(pred)[3] 25 | n_ts <- dim(pred)[4] 26 | 27 | # this is the same for both data types 28 | df_pred <- data.frame( 29 | "ID" = rep(seq_len(n_ts), n_years), 30 | "time" = sort(rep(seq_len(n_years), n_ts)), 31 | "estimate" = c(t(apply(pred, c(3, 4), mean))), 32 | "lower" = c(t(apply(pred, c(3, 4), quantile, 1 - (1 - conf_level) / 2))), 33 | "upper" = c(t(apply(pred, c(3, 4), quantile, (1 - conf_level) / 2))) 34 | ) 35 | 36 | if (modelfit$shape == "wide") { 37 | df_obs <- data.frame( 38 | "ID" = rep(seq_len(n_ts), n_years), 39 | "time" = sort(rep(seq_len(n_years), n_ts)), 40 | "y" = c(modelfit$orig_data) 41 | ) 42 | } else { 43 | df_obs <- data.frame( 44 | "ID" = modelfit$orig_data[["ts"]], 45 | "time" = modelfit$orig_data[["time"]], 46 | "y" = modelfit$orig_data[["obs"]] 47 | ) 48 | } 49 | df_obs$time <- df_obs$time - min(df_obs$time) + 1 50 | 51 | # standardize 52 | for (i in seq_len(n_ts)) { 53 | indx <- which(df_obs[["ID"]] == i) 54 | df_obs[indx, "y"] <- scale(df_obs[indx, "y"], center = TRUE, scale = TRUE) 55 | } 56 | 57 | df_obs <- df_obs[order(df_obs$ID, df_obs$time), ] 58 | df_pred <- df_pred[order(df_pred$ID, df_pred$time), ] 59 | 60 | if (!is.null(names)) { 61 | if (length(names) != n_ts) { 62 | warning("bayesdfa: Length of 'names' should match number of time series. Ignoring 'names'.") 63 | } else { 64 | df_pred$ID <- names[df_pred$ID] 65 | df_obs$ID <- names[df_obs$ID] 66 | } 67 | } 68 | 69 | df <- merge(df_obs, df_pred, by = c("ID", "time"), sort = FALSE) 70 | return(df) 71 | } 72 | -------------------------------------------------------------------------------- /R/dfa_loadings.R: -------------------------------------------------------------------------------- 1 | #' Get the loadings from a DFA as a data frame 2 | #' 3 | #' @param rotated_modelfit Output from \code{\link{rotate_trends}}. 4 | #' @param names An optional vector of names for plotting the loadings. 5 | #' @param summary Logical. Should the full posterior densities be returned? Defaults to `TRUE`. 6 | #' @param conf_level Confidence level for credible intervals. Defaults to 0.95. 7 | #' 8 | #' @seealso plot_loadings fit_dfa rotate_trends 9 | #' 10 | #' @export 11 | #' @return A data frame with the following columns: 12 | #' `name` is an identifier for each loading, `trend` is the trend for the 13 | #' loading, `median` is the posterior median loading, `lower` is the lower CI, 14 | #' `upper` is the upper CI, and `prob_diff0` is the probability the loading is 15 | #' different than 0. When `summary = FALSE`, there is no `lower` or `upper` 16 | #' columns and instead there are columns `chain` and `draw`. 17 | #' 18 | #' @examples 19 | #' set.seed(42) 20 | #' s <- sim_dfa(num_trends = 2, num_ts = 4, num_years = 10) 21 | #' # only 1 chain and 180 iterations used so example runs quickly: 22 | #' m <- fit_dfa(y = s$y_sim, num_trends = 2, iter = 50, chains = 1) 23 | #' r <- rotate_trends(m) 24 | #' loadings <- dfa_loadings(r, summary = TRUE) 25 | #' loadings <- dfa_loadings(r, summary = FALSE) 26 | dfa_loadings <- function(rotated_modelfit, 27 | names = NULL, 28 | summary = TRUE, 29 | conf_level = 0.95) { 30 | v <- reshape2::melt(rotated_modelfit$Z_rot, 31 | varnames = c("iter", "name", "trend"), value.name = "loading" 32 | ) 33 | v$draw <- as.numeric(gsub("_chain.*$", "", v$iter)) 34 | v$chain <- as.numeric(gsub("^[0-9]+_chain:", "", v$iter)) 35 | v$iter <- NULL 36 | v <- v[, c("chain", "draw", "name", "trend", "loading")] 37 | 38 | v$trend <- paste0("Trend ", v$trend) 39 | v$trend <- as.factor(v$trend) 40 | if (!is.null(names)) v$name <- names[v$name] 41 | v$name <- as.factor(v$name) 42 | 43 | ## q_lower = proportion of draws less than zero 44 | ## q_upper = proportion of draws greater than zero 45 | v <- dplyr::group_by(v, .data$name, .data$trend) 46 | v <- dplyr::mutate(v, 47 | q_lower = sum(.data$loading < 0) / length(.data$loading), 48 | q_upper = 1 - .data$q_lower, 49 | prob_diff0 = max(.data$q_lower, .data$q_upper) 50 | ) 51 | v <- as.data.frame(dplyr::ungroup(v)) 52 | out <- v 53 | 54 | if (summary) { 55 | vsum <- dplyr::group_by(v, .data$name, .data$trend) 56 | vsum <- dplyr::summarize(vsum, 57 | median = median(.data$loading), 58 | lower = quantile(.data$loading, probs = (1 - conf_level) / 2), 59 | upper = quantile(.data$loading, probs = 1 - (1 - conf_level) / 2), 60 | q_lower = sum(.data$loading < 0) / length(.data$loading), 61 | q_upper = 1 - .data$q_lower, 62 | prob_diff0 = max(.data$q_lower, .data$q_upper) 63 | ) 64 | df <- as.data.frame(dplyr::ungroup(vsum)) 65 | out <- df 66 | } 67 | 68 | out$q_lower <- NULL 69 | out$q_upper <- NULL 70 | 71 | return(out) 72 | } 73 | -------------------------------------------------------------------------------- /R/dfa_trends.R: -------------------------------------------------------------------------------- 1 | #' Get the trends from a DFA as a data frame 2 | #' 3 | #' @param rotated_modelfit Output from \code{\link{rotate_trends}}. 4 | #' @param years Optional numeric vector of years. 5 | #' 6 | #' @export 7 | #' @return A data frame with the following columns: `time` is the time step, `trend_number` is an identifier for each trend, `estimate` is the trend mean, `lower` is the lower CI, and `upper` is the upper CI. 8 | #' 9 | #' @seealso plot_trends fit_dfa rotate_trends 10 | #' 11 | #' @examples 12 | #' set.seed(1) 13 | #' s <- sim_dfa(num_trends = 1) 14 | #' m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1) 15 | #' r <- rotate_trends(m) 16 | #' trends <- dfa_trends(r) 17 | dfa_trends <- function(rotated_modelfit, years = NULL) { 18 | rotated <- rotated_modelfit 19 | n_ts <- dim(rotated$Z_rot)[2] 20 | n_trends <- dim(rotated$Z_rot)[3] 21 | 22 | n_years <- dim(rotated$trends_mean)[2] 23 | if (is.null(years)) years <- seq_len(n_years) 24 | 25 | df <- data.frame( 26 | time = rep(years, n_trends), 27 | trend_number = paste0("Trend ", sort(rep(seq_len(n_trends), n_years))), 28 | estimate = c(t(rotated$trends_mean)), 29 | lower = c(t(rotated$trends_lower)), 30 | upper = c(t(rotated$trends_upper)) 31 | ) 32 | 33 | return(df) 34 | } 35 | -------------------------------------------------------------------------------- /R/find_dfa_trends.R: -------------------------------------------------------------------------------- 1 | #' Find the best number of trends according to LOOIC 2 | #' 3 | #' Fit a DFA with different number of trends and return the leave one out (LOO) 4 | #' value as calculated by the [loo][loo::loo-package()] package. 5 | #' 6 | #' @param y A matrix of data to fit. Columns represent time element. 7 | #' @param kmin Minimum number of trends, defaults to 1. 8 | #' @param kmax Maximum number of trends, defaults to 5. 9 | #' @param iter Iterations when sampling from each Stan model, defaults to 2000. 10 | #' @param thin Thinning rate when sampling from each Stan model, defaults to 1. 11 | #' @param compare_normal If `TRUE`, does model selection comparison of Normal vs. 12 | #' Student-t errors 13 | #' @param convergence_threshold The maximum allowed value of Rhat to determine 14 | #' convergence of parameters 15 | #' @param variance Vector of variance arguments for searching over large groups 16 | #' of models. Can be either or both of ("equal","unequal") 17 | #' @param ... Other arguments to pass to `fit_dfa()` 18 | #' @export 19 | #' @examples 20 | #' \donttest{ 21 | #' set.seed(42) 22 | #' s <- sim_dfa(num_trends = 2, num_years = 20, num_ts = 3) 23 | #' # only 1 chain and 180 iterations used so example runs quickly: 24 | #' m <- find_dfa_trends( 25 | #' y = s$y_sim, iter = 50, 26 | #' kmin = 1, kmax = 2, chains = 1, compare_normal = FALSE, 27 | #' variance = "equal", convergence_threshold = 1.1, 28 | #' control = list(adapt_delta = 0.95, max_treedepth = 20) 29 | #' ) 30 | #' m$summary 31 | #' m$best_model 32 | #' } 33 | #' @importFrom loo loo extract_log_lik 34 | #' @importFrom stats quantile time varimax 35 | #' @importFrom rlang .data 36 | 37 | find_dfa_trends <- function(y = y, 38 | kmin = 1, 39 | kmax = 5, 40 | iter = 2000, 41 | thin = 1, 42 | compare_normal = FALSE, 43 | convergence_threshold = 1.05, 44 | variance = c("equal", "unequal"), 45 | ...) { 46 | df <- data.frame( 47 | model = seq(1, ifelse(compare_normal == FALSE, 48 | length(variance) * length(seq(kmin, kmax)), 49 | 2 * length(variance) * length(seq(kmin, kmax)) 50 | )), 51 | num_trends = NA, 52 | looic = NA, 53 | cor = NA, 54 | error = NA, 55 | converge = FALSE, 56 | stringsAsFactors = FALSE 57 | ) 58 | best_model <- NULL 59 | best_loo <- 1.0e50 60 | 61 | indx <- 1 62 | 63 | if (length(which(variance %in% "equal")) > 0) { 64 | for (i in seq(kmin, kmax)) { 65 | model <- fit_dfa( 66 | y = y, num_trends = i, iter = iter, thin = thin, 67 | estimate_nu = TRUE, ... 68 | ) 69 | 70 | df$converge[indx] <- is_converged(model, convergence_threshold) 71 | df$num_trends[indx] <- i 72 | 73 | # relative effective sample size 74 | log_lik <- loo::extract_log_lik(model$model, merge_chains = FALSE) 75 | n_chains <- dim(rstan::extract(model$model, "log_lik", permuted = FALSE))[2] 76 | rel_eff <- loo::relative_eff(exp(log_lik)) 77 | # calculate looic 78 | df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] 79 | 80 | # if model is best, keep it 81 | if (df$looic[indx] < best_loo & df$converge[indx] == TRUE) { 82 | best_model <- model 83 | best_loo <- df$looic[indx] 84 | } 85 | df$error[indx] <- "student-t" 86 | df$cor[indx] <- "equal" 87 | indx <- indx + 1 88 | } 89 | } 90 | 91 | if (length(which(variance %in% "unequal")) > 0) { 92 | for (i in seq(kmin, kmax)) { 93 | model <- fit_dfa( 94 | y = y, num_trends = i, iter = iter, thin = thin, varIndx = seq(1, nrow(y)), 95 | estimate_nu = TRUE, ... 96 | ) 97 | df$num_trends[indx] <- i 98 | 99 | log_lik <- loo::extract_log_lik(model$model, merge_chains = FALSE) 100 | n_chains <- dim(rstan::extract(model$model, "log_lik", permuted = FALSE))[2] 101 | rel_eff <- loo::relative_eff(exp(log_lik)) 102 | # calculate looic 103 | df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] 104 | 105 | df$converge[indx] <- is_converged(model, convergence_threshold) 106 | # if model is best, keep it 107 | if (df$looic[indx] < best_loo & df$converge[indx] == TRUE) { 108 | best_model <- model 109 | best_loo <- df$looic[indx] 110 | } 111 | df$error[indx] <- "student-t" 112 | df$cor[indx] <- "independent" 113 | indx <- indx + 1 114 | } 115 | } 116 | 117 | 118 | if (compare_normal == TRUE) { 119 | if (length(which(variance %in% "equal")) > 0) { 120 | for (i in seq(kmin, kmax)) { 121 | model <- fit_dfa( 122 | y = y, num_trends = i, iter = iter, thin = thin, nu_fixed = 100, 123 | estimate_nu = FALSE, ... 124 | ) 125 | df$num_trends[indx] <- i 126 | 127 | log_lik <- loo::extract_log_lik(model$model, merge_chains = FALSE) 128 | n_chains <- dim(rstan::extract(model$model, "log_lik", permuted = FALSE))[2] 129 | rel_eff <- loo::relative_eff(exp(log_lik)) 130 | # calculate looic 131 | df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] 132 | 133 | df$converge[indx] <- is_converged(model, convergence_threshold) 134 | # if model is best, keep it 135 | if (df$looic[indx] < best_loo & df$converge[indx] == TRUE) { 136 | best_model <- model 137 | best_loo <- df$looic[indx] 138 | } 139 | df$error[indx] <- "normal" 140 | df$cor[indx] <- "equal" 141 | # df$max_rhat[indx] <- max(as.data.frame(summary(model$model)$summary)[,"Rhat"]) 142 | # df$min_neff[indx] <- min(as.data.frame(summary(model$model)$summary)[,"n_eff"]) 143 | indx <- indx + 1 144 | } 145 | } 146 | 147 | if (length(which(variance %in% "unequal")) > 0) { 148 | for (i in seq(kmin, kmax)) { 149 | model <- fit_dfa( 150 | y = y, num_trends = i, iter = iter, thin = thin, varIndx = seq(1, nrow(y)), 151 | nu_fixed = 100, estimate_nu = FALSE, ... 152 | ) 153 | df$num_trends[indx] <- i 154 | 155 | log_lik <- loo::extract_log_lik(model$model, merge_chains = FALSE) 156 | n_chains <- dim(rstan::extract(model$model, "log_lik", permuted = FALSE))[2] 157 | rel_eff <- loo::relative_eff(exp(log_lik)) 158 | # calculate looic 159 | df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] 160 | 161 | df$converge[indx] <- is_converged(model, convergence_threshold) 162 | # if model is best, keep it 163 | if (df$looic[indx] < best_loo & df$converge[indx] == TRUE) { 164 | best_model <- model 165 | best_loo <- df$looic[indx] 166 | } 167 | df$error[indx] <- "normal" 168 | df$cor[indx] <- "independent" 169 | # df$max_rhat[indx] <- max(as.data.frame(summary(model$model)$summary)[,"Rhat"]) 170 | # df$min_neff[indx] <- min(as.data.frame(summary(model$model)$summary)[,"n_eff"]) 171 | indx <- indx + 1 172 | } 173 | } 174 | } 175 | 176 | df <- dplyr::arrange(df, .data$looic) 177 | 178 | # return best model = one that converges 179 | list(summary = df, best_model = best_model) 180 | } 181 | -------------------------------------------------------------------------------- /R/find_regimes.R: -------------------------------------------------------------------------------- 1 | #' Fit multiple models with differing numbers of regimes to trend data 2 | #' 3 | #' @param y Data, time series or trend from fitted DFA model. 4 | #' @param sds Optional time series of standard deviations of estimates. If 5 | #' passed in, residual variance not estimated. 6 | #' @param min_regimes Smallest of regimes to evaluate, defaults to 1. 7 | #' @param max_regimes Biggest of regimes to evaluate, defaults to 3. 8 | #' @param ... Other parameters to pass to [rstan::sampling()]. 9 | #' @param iter MCMC iterations, defaults to 2000. 10 | #' @param thin MCMC thinning rate, defaults to 1. 11 | #' @param chains MCMC chains; defaults to 1 (note that running multiple chains 12 | #' may result in a "label switching" problem where the regimes are identified 13 | #' with different IDs across chains). 14 | #' @export 15 | #' 16 | #' @examples 17 | #' data(Nile) 18 | #' find_regimes(log(Nile), iter = 50, chains = 1, max_regimes = 2) 19 | find_regimes <- function(y, 20 | sds = NULL, 21 | min_regimes = 1, 22 | max_regimes = 3, 23 | iter = 2000, 24 | thin = 1, 25 | chains = 1, 26 | ...) { 27 | df <- data.frame(regimes = seq(min_regimes, max_regimes), looic = NA) 28 | best_loo <- 1.0e10 29 | best_model <- NA 30 | for (regime in seq(min_regimes, max_regimes)) { 31 | fit <- fit_regimes( 32 | y = y, sds = sds, n_regimes = regime, iter = iter, thin = thin, 33 | chains = chains, ... 34 | ) 35 | looic <- loo.bayesdfa(fit) 36 | k_table <- loo::pareto_k_table(looic) 37 | if (nrow(k_table) == 4) { 38 | loo_bad <- k_table["(0.7, 1]", "Count"] 39 | loo_very_bad <- k_table["(1, Inf)", "Count"] 40 | } else { 41 | loo_bad <- k_table[2, "Count"] 42 | loo_very_bad <- k_table[3, "Count"] 43 | } 44 | df$looic[which(df$regimes == regime)] <- looic$estimates["looic", "Estimate"] 45 | 46 | if (fit$looic < best_loo) { 47 | best_loo <- fit$looic 48 | best_model <- fit 49 | n_loo_bad <- loo_bad 50 | n_loo_very_bad <- loo_very_bad 51 | } 52 | } 53 | 54 | list( 55 | table = df, best_model = best_model, n_loo_bad = n_loo_bad, 56 | n_loo_very_bad = n_loo_very_bad 57 | ) 58 | } 59 | -------------------------------------------------------------------------------- /R/find_swans.R: -------------------------------------------------------------------------------- 1 | #' Find outlying "black swan" jumps in trends 2 | #' 3 | #' @param rotated_modelfit Output from [rotate_trends()]. 4 | #' @param threshold A probability threshold below which to flag trend events as 5 | #' extreme 6 | #' @param plot Logical: should a plot be made? 7 | #' 8 | #' @return 9 | #' Prints a ggplot2 plot if `plot = TRUE`; returns a data frame indicating the 10 | #' probability that any given point in time represents a "black swan" event 11 | #' invisibly. 12 | #' 13 | #' @examples 14 | #' set.seed(1) 15 | #' s <- sim_dfa(num_trends = 1, num_ts = 3, num_years = 30) 16 | #' s$y_sim[1, 15] <- s$y_sim[1, 15] - 6 17 | #' plot(s$y_sim[1, ], type = "o") 18 | #' abline(v = 15, col = "red") 19 | #' # only 1 chain and 250 iterations used so example runs quickly: 20 | #' m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1, nu_fixed = 2) 21 | #' r <- rotate_trends(m) 22 | #' p <- plot_trends(r) #+ geom_vline(xintercept = 15, colour = "red") 23 | #' print(p) 24 | #' # a 1 in 1000 probability if was from a normal distribution: 25 | #' find_swans(r, plot = TRUE, threshold = 0.001) 26 | #' @references 27 | #' Anderson, S.C., Branch, T.A., Cooper, A.B., and Dulvy, N.K. 2017. 28 | #' Black-swan events in animal populations. Proceedings of the National Academy 29 | #' of Sciences 114(12): 3252–3257. https://doi.org/10.1073/pnas.1611525114 30 | #' 31 | #' @export 32 | #' @importFrom stats pnorm 33 | 34 | find_swans <- function(rotated_modelfit, 35 | threshold = 0.01, 36 | plot = FALSE) { 37 | x <- rotated_modelfit$trends_mean 38 | d <- apply(x, 1, function(xx) c(NA, diff(xx))) 39 | sds <- apply(d, 2, sd, na.rm = TRUE) # sds != 1 40 | 41 | prob <- matrix(NA, nrow(d), ncol(d)) 42 | for (i in seq_len(ncol(d))) { 43 | prob[, i] <- 1 - pnorm(abs(d[, i]), 0, sds[i]) 44 | } 45 | prob <- as.data.frame(prob) 46 | trends <- as.data.frame(t(x)) 47 | trends$time <- seq_len(nrow(trends)) 48 | prob$time <- seq_len(nrow(prob)) 49 | trends <- reshape2::melt(trends, id.vars = c("time")) 50 | prob <- reshape2::melt(prob, id.vars = c("time")) 51 | names(trends) <- c("time", "trend_number", "trend_value") 52 | names(prob) <- c("time", "trend_number", "probability") 53 | 54 | trends$trend_number <- as.character(sub("V", "", trends$trend_number)) 55 | prob$trend_number <- as.character(sub("V", "", prob$trend_number)) 56 | trends <- dplyr::inner_join(trends, prob, c("time", "trend_number")) 57 | trends$below_threshold <- trends$probability < threshold 58 | 59 | if (plot) { 60 | g <- ggplot(trends, aes_string( 61 | x = "time", y = "trend_value", 62 | color = "below_threshold" 63 | )) + 64 | geom_point() + 65 | facet_wrap(~trend_number) 66 | print(g) 67 | } 68 | invisible(trends) 69 | } 70 | -------------------------------------------------------------------------------- /R/fit_regimes.R: -------------------------------------------------------------------------------- 1 | # Some of the following code copied (and modified) from 2 | # https://github.com/luisdamiano/stancon18 3 | # under CC-BY 4.0 4 | 5 | #' Fit models with differing numbers of regimes to trend data 6 | #' 7 | #' @param y Data, time series or trend from fitted DFA model. 8 | #' @param sds Optional time series of standard deviations of estimates. 9 | #' If passed in, residual variance not estimated. Defaults to `NULL`. 10 | #' @param n_regimes Number of regimes to evaluate, defaults 2 11 | #' @param ... Other parameters to pass to [rstan::sampling()]. 12 | #' @param iter MCMC iterations, defaults to 2000. 13 | #' @param thin MCMC thinning rate, defaults to 1. 14 | #' @param chains MCMC chains, defaults to 1 (note that running multiple chains 15 | #' may result in a label switching problem where the regimes are identified 16 | #' with different IDs across chains). 17 | #' @export 18 | #' 19 | #' @importFrom rstan sampling 20 | #' @importFrom loo extract_log_lik loo 21 | #' @import Rcpp 22 | #' 23 | #' @examples 24 | #' data(Nile) 25 | #' fit_regimes(log(Nile), iter = 50, n_regimes = 1) 26 | fit_regimes <- function(y, 27 | sds = NULL, 28 | n_regimes = 2, 29 | iter = 2000, 30 | thin = 1, 31 | chains = 1, 32 | ...) { 33 | est_sigma <- 0 34 | if (is.null(sds)) { 35 | # estimate sigma, instead of using fixed values 36 | sds <- rep(0, length(y)) 37 | est_sigma <- 1 38 | } 39 | 40 | if (n_regimes < 1) stop("`n_regimes` must be an integer >= 1.", call. = FALSE) 41 | 42 | if (identical(as.integer(n_regimes), 1L)) { 43 | stan_data <- list( 44 | T = length(y), 45 | K = 1, 46 | x_t = y, 47 | sigma_t = sds, 48 | est_sigma = est_sigma, 49 | pars = c("mu_k", "sigma_k", "log_lik") 50 | ) 51 | 52 | m <- rstan::sampling( 53 | object = stanmodels$regime_1, 54 | data = stan_data, 55 | iter = iter, 56 | chains = chains, 57 | init = function() { 58 | hmm_init(n_regimes, y) 59 | }, 60 | ... 61 | ) 62 | } 63 | 64 | if (n_regimes > 1) { 65 | stan_data <- list( 66 | T = length(y), 67 | K = n_regimes, 68 | x_t = y, 69 | sigma_t = sds, 70 | est_sigma = est_sigma, 71 | pars = c( 72 | "p_1k", "A_ij", "mu_k", "sigma_k", "log_lik", "unalpha_tk", "gamma_tk", 73 | "unbeta_tk", "ungamma_tk", "alpha_tk", "beta_tk", "zstar_t", 74 | "logp_zstar_t" 75 | ) 76 | ) 77 | 78 | m <- rstan::sampling( 79 | object = stanmodels$hmm_gaussian, 80 | data = stan_data, 81 | iter = iter, 82 | thin = thin, 83 | chains = chains, 84 | init = function() { 85 | hmm_init(n_regimes, y) 86 | }, 87 | ... 88 | ) 89 | } 90 | 91 | log_lik <- loo::extract_log_lik(m, merge_chains = FALSE) 92 | # n_chains = dim(rstan::extract(m, "log_lik", permuted=FALSE))[2] 93 | rel_eff <- loo::relative_eff(exp(log_lik)) 94 | # calculate looic 95 | looic <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] 96 | 97 | list(model = m, y = y, looic = looic) 98 | } 99 | -------------------------------------------------------------------------------- /R/hmm_init.R: -------------------------------------------------------------------------------- 1 | #' Create initial values for the HMM model. 2 | #' 3 | #' @param K The number of regimes or clusters to fit. Called by [rstan::sampling()]. 4 | #' @param x_t A matrix of values. Called by [rstan::sampling()]. 5 | #' @importFrom stats kmeans 6 | #' 7 | #' @return list of initial values (mu, sigma) 8 | hmm_init <- function(K, x_t) { 9 | clasif <- kmeans(x_t, K) 10 | init.mu <- by(x_t, clasif$cluster, mean) 11 | init.sigma <- by(x_t, clasif$cluster, sd) 12 | init.order <- order(init.mu) 13 | list(mu_k = init.mu[init.order], sigma_k = init.sigma[init.order]) 14 | } 15 | -------------------------------------------------------------------------------- /R/invert_chains.R: -------------------------------------------------------------------------------- 1 | #' Find which chains to invert 2 | #' 3 | #' Find which chains to invert by checking the sum of the squared 4 | #' deviations between the first chain and each other chain. 5 | #' 6 | #' @param model A Stan model, `rstanfit` object 7 | #' @param trend Which trend to check 8 | #' @param plot Logical: should a plot of the trend for each chain be made? 9 | #' Defaults to `FALSE` 10 | #' 11 | #' @importFrom ggplot2 geom_line 12 | #' @importFrom utils combn 13 | #' @seealso invert_chains 14 | #' @export 15 | #' 16 | #' @examples 17 | #' set.seed(2) 18 | #' s <- sim_dfa(num_trends = 2) 19 | #' set.seed(1) 20 | #' m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 30, chains = 2) 21 | #' # chains were already inverted, but we can redo that, as an example, with: 22 | #' find_inverted_chains(m$model, plot = TRUE) 23 | find_inverted_chains <- function(model, trend = 1, plot = FALSE) { 24 | chains <- NULL # required for dplyr 0.8 update 25 | parameters <- NULL 26 | value <- NULL 27 | 28 | e <- rstan::extract(model, permuted = FALSE) 29 | v <- reshape2::melt(e) 30 | vv <- v[grepl(paste0("x\\[", trend), v$parameters), ] 31 | vv$parameters <- as.factor(as.character(vv$parameters)) # needed with dplyr 0.8, all levels returned otherwise 32 | vv <- dplyr::group_by(vv, chains, parameters) 33 | vv <- dplyr::summarise(vv, estimate = stats::median(value)) 34 | zz <- v[grepl(paste0("Z\\["), v$parameters), ] 35 | zz$parameters <- as.factor(as.character(zz$parameters)) # needed with dplyr 0.8, all levels returned otherwise 36 | zz <- zz[grepl(paste0(trend, "]"), zz$parameters), ] 37 | zz <- dplyr::group_by(zz, chains, parameters) 38 | zz <- dplyr::summarise(zz, estimate = stats::median(value)) 39 | ## vv is dimensioned nchains * nyears (x[1:nyears,trend=i]) 40 | ## zz is dimensioned n_time series (Z[1:time series,trend=i]) 41 | 42 | if (plot) { 43 | p <- ggplot(vv, aes_string("as.numeric(parameters)", "estimate", 44 | color = "chains" 45 | )) + 46 | geom_line() 47 | print(p) 48 | } 49 | 50 | # cast parameters to df 51 | vvv <- reshape2::dcast(vv, parameters ~ chains, value.var = "estimate") 52 | vvv$parameters <- NULL 53 | zzz <- reshape2::dcast(zz, parameters ~ chains, value.var = "estimate") 54 | zzz$parameters <- NULL 55 | 56 | nchains <- ncol(vvv) 57 | 58 | # n_ts x n_years prediction matrix of product of trends and loadings 59 | flipped_chains <- 0 60 | pred0_loadings <- zzz[, 1] 61 | pred0_trend <- vvv[, 1] 62 | if (nchains > 1) { 63 | for (i in seq(2, nchains)) { 64 | pred1_loadings <- zzz[, i] 65 | pred1_trend <- vvv[, i] 66 | # calculate square error of flipped chain versus chain 1 as reference 67 | flipped_sq_err <- sum((-1 * zzz[, i] - pred0_loadings)^2) + sum((-1 * vvv[, i] - pred0_trend)^2) 68 | sq_err <- sum((zzz[, i] - pred0_loadings)^2) + sum((vvv[, i] - pred0_trend)^2) 69 | # if flipped trends/loadings more similar to chain 1, flip 70 | if (flipped_sq_err < sq_err) { 71 | if (flipped_chains == 0) { 72 | flipped_chains <- i 73 | } 74 | else { 75 | flipped_chains <- c(flipped_chains, i) 76 | } 77 | } 78 | } 79 | } 80 | flipped_chains 81 | } 82 | 83 | #' Invert chains 84 | #' 85 | #' @param model A Stan model, rstanfit object 86 | #' @param trends The number of trends in the DFA, defaults to 1 87 | #' @param print Logical indicating whether the summary should be printed. 88 | #' Defaults to `FALSE`. 89 | #' @param ... Other arguments to pass to [find_inverted_chains()]. 90 | #' 91 | #' @export 92 | #' @seealso find_inverted_chains 93 | invert_chains <- function(model, trends = 1, print = FALSE, ...) { 94 | e <- rstan::extract(model, permuted = FALSE) 95 | ep <- rstan::extract(model, permuted = TRUE) 96 | pars <- colnames(e[1, , ]) 97 | n_mcmc <- dim(ep$Z)[1] 98 | n_chains <- dim(e)[2] 99 | 100 | for (k in seq_len(trends)) { 101 | f <- find_inverted_chains(model, trend = k) 102 | message(paste("Inverting chains", paste(f, collapse = " & "), "for trend", k)) 103 | 104 | for (f_ in f) { 105 | for (i in grep(paste0("x\\[", k), pars)) { 106 | e[, f_, i] <- -1 * e[, f_, i] 107 | } 108 | for (i in grep(paste0("Z\\[[0-9]+,", k, "\\]"), pars)) { 109 | e[, f_, i] <- -1 * e[, f_, i] 110 | } 111 | } 112 | } 113 | 114 | mon <- rstan::monitor(e, print = print, warmup = 0) 115 | invisible(list(model = model, samples_permuted = ep, samples = e, monitor = mon)) 116 | } 117 | -------------------------------------------------------------------------------- /R/loo.R: -------------------------------------------------------------------------------- 1 | #' LOO information criteria 2 | #' 3 | #' Extract the LOOIC (leave-one-out information criterion) using 4 | #' [loo::loo()]. Note that we've implemented slightly different variants 5 | #' of loo, based on whether the DFA observation model includes correlation 6 | #' between time series or not (default is no correlation). Importantly, 7 | #' these different versions are not directly comparable to evaluate data support 8 | #' for including correlation or not in a DFA. If time series are not correlated, 9 | #' the point-wise log-likelihood for each observation is calculated and used 10 | #' in the loo calculations. However if time series are correlated, then each 11 | #' time slice is assumed to be a joint observation of 12 | #' all variables, and the point-wise log-likelihood is calculated as the 13 | #' joint likelihood of all variables under the multivariate normal distribution. 14 | #' 15 | #' @param x Output from [fit_dfa()]. 16 | #' @param ... Arguments for [loo::relative_eff()] and [loo::loo.array()]. 17 | #' 18 | #' @export 19 | #' @examples 20 | #' \donttest{ 21 | #' set.seed(1) 22 | #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) 23 | #' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1, num_trends = 1) 24 | #' loo(m) 25 | #' } 26 | #' @rdname loo 27 | loo.bayesdfa <- function(x, ...) { 28 | log_lik <- loo::extract_log_lik(x$model, merge_chains = FALSE) 29 | rel_eff <- loo::relative_eff(exp(log_lik), ...) 30 | loo::loo.array(log_lik, 31 | r_eff = rel_eff, 32 | save_psis = FALSE, 33 | ... 34 | ) 35 | } 36 | 37 | #' @name loo 38 | #' @rdname loo 39 | #' @export 40 | #' @importFrom loo loo 41 | NULL 42 | -------------------------------------------------------------------------------- /R/plot_fitted.R: -------------------------------------------------------------------------------- 1 | #' Plot the fitted values from a DFA 2 | #' 3 | #' @param modelfit Output from \code{\link{fit_dfa}}, a rstanfit object 4 | #' @param conf_level Probability level for CI. 5 | #' @param names Optional vector of names for plotting labels TODO. Should be same length as the number of time series 6 | #' @param spaghetti Defaults to FALSE, but if TRUE puts all raw time series (grey) and fitted values on a single plot 7 | #' @param time_labels Optional vector of time labels for plotting, same length as number of time steps 8 | #' @export 9 | #' @seealso plot_loadings fit_dfa rotate_trends dfa_fitted 10 | #' 11 | #' @importFrom ggplot2 geom_ribbon facet_wrap scale_color_manual 12 | #' @importFrom viridisLite viridis 13 | #' 14 | #' @examples 15 | #' \donttest{ 16 | #' y <- sim_dfa(num_trends = 2, num_years = 20, num_ts = 4) 17 | #' m <- fit_dfa(y = y$y_sim, num_trends = 2, iter = 50, chains = 1) 18 | #' p <- plot_fitted(m) 19 | #' print(p) 20 | #' 21 | #' p <- plot_fitted(m, spaghetti = TRUE) 22 | #' print(p) 23 | #' } 24 | plot_fitted <- function(modelfit, conf_level = 0.95, names = NULL, spaghetti = FALSE, time_labels = NULL) { 25 | df <- dfa_fitted(modelfit, conf_level = conf_level, names = names) 26 | df$ID <- as.factor(df$ID) 27 | 28 | # relabel time if entered 29 | if(!is.null(time_labels)) { 30 | df$new_time = time_labels[df$time] 31 | df$time = df$new_time 32 | } 33 | 34 | if (spaghetti == TRUE) { 35 | cols <- viridis(length(unique((df$ID))), end = 0.8) 36 | p1 <- ggplot(df) + 37 | geom_line(aes_string(x = "time", y = "y", group = "ID"), 38 | color = "grey50", size = 0.5 39 | ) + 40 | geom_line(aes_string(x = "time", y = "estimate", group = "ID", color = "ID"), 41 | size = 1.2 42 | ) + 43 | scale_color_manual(values = cols) + 44 | xlab("Time") + 45 | theme(legend.position = "none") 46 | } else { 47 | p1 <- ggplot(df) + 48 | geom_ribbon(aes_string(x = "time", ymin = "lower", ymax = "upper"), alpha = 0.4) + 49 | geom_line(aes_string(x = "time", y = "estimate")) + 50 | geom_point(aes_string(x = "time", y = "y"), 51 | col = "red", 52 | size = 0.5, 53 | alpha = 0.4 54 | ) + 55 | facet_wrap("ID", scales = "free_y") + 56 | xlab("Time") + 57 | ylab("") 58 | } 59 | p1 60 | } 61 | -------------------------------------------------------------------------------- /R/plot_loadings.R: -------------------------------------------------------------------------------- 1 | #' Plot the loadings from a DFA 2 | #' 3 | #' @param rotated_modelfit Output from [rotate_trends()]. 4 | #' @param names An optional vector of names for plotting the loadings. 5 | #' @param facet Logical. Should there be a separate facet for each trend? 6 | #' Defaults to `TRUE`. 7 | #' @param violin Logical. Should the full posterior densities be shown as a 8 | #' violin plot? Defaults to `TRUE`. 9 | #' @param conf_level Confidence level for credible intervals. Defaults to 0.95. 10 | #' @param threshold Numeric (0-1). Optional for plots, if included, only plot 11 | #' loadings who have Pr(<0) or Pr(>0) > threshold. For example `threshold = 0.8` 12 | #' would only display estimates where 80% of posterior density was above/below 13 | #' zero. Defaults to `NULL` (not used). 14 | #' 15 | #' @seealso plot_trends fit_dfa rotate_trends 16 | #' 17 | #' @export 18 | #' 19 | #' @importFrom ggplot2 ggplot geom_point xlab ylab theme_bw theme aes_string 20 | #' element_blank position_dodge ggtitle geom_errorbar 21 | #' element_line element_text geom_line geom_violin coord_flip geom_hline 22 | #' 23 | #' @examples 24 | #' set.seed(42) 25 | #' s <- sim_dfa(num_trends = 2, num_ts = 4, num_years = 10) 26 | #' # only 1 chain and 180 iterations used so example runs quickly: 27 | #' m <- fit_dfa(y = s$y_sim, num_trends = 2, iter = 50, chains = 1) 28 | #' r <- rotate_trends(m) 29 | #' plot_loadings(r, violin = FALSE, facet = TRUE) 30 | #' plot_loadings(r, violin = FALSE, facet = FALSE) 31 | #' plot_loadings(r, violin = TRUE, facet = FALSE) 32 | #' plot_loadings(r, violin = TRUE, facet = TRUE) 33 | plot_loadings <- function(rotated_modelfit, 34 | names = NULL, 35 | facet = TRUE, 36 | violin = TRUE, 37 | conf_level = 0.95, 38 | threshold = NULL) { 39 | v <- dfa_loadings(rotated_modelfit, 40 | summary = FALSE, 41 | names = names, 42 | conf_level = conf_level 43 | ) 44 | df <- dfa_loadings(rotated_modelfit, 45 | summary = TRUE, 46 | names = names, 47 | conf_level = conf_level 48 | ) 49 | 50 | # filter values below threshold 51 | if (!is.null(threshold)) { 52 | df <- df[df$prob_diff0 >= threshold, ] 53 | v <- v[v$prob_diff0 >= threshold, ] 54 | } 55 | 56 | if (!violin) { 57 | p1 <- ggplot(df, aes_string( 58 | x = "name", y = "median", col = "trend", 59 | alpha = "prob_diff0" 60 | )) + 61 | geom_point(size = 3, position = position_dodge(0.3)) + 62 | geom_errorbar(aes_string(ymin = "lower", ymax = "upper"), 63 | position = position_dodge(0.3), width = 0 64 | ) + 65 | geom_hline(yintercept = 0, lty = 2) + 66 | coord_flip() + 67 | xlab("Time Series") + 68 | ylab("Loading") 69 | } 70 | 71 | if (violin) { 72 | p1 <- ggplot(v, aes_string( 73 | x = "name", y = "loading", fill = "trend", 74 | alpha = "prob_diff0" 75 | )) + 76 | geom_violin(color = NA) + 77 | geom_hline(yintercept = 0, lty = 2) + 78 | coord_flip() + 79 | xlab("Time Series") + 80 | ylab("Loading") 81 | } 82 | 83 | if (facet) { 84 | p1 <- p1 + facet_wrap(~trend, scales = "free_x") 85 | } 86 | 87 | p1 88 | } 89 | -------------------------------------------------------------------------------- /R/plot_regime_model.R: -------------------------------------------------------------------------------- 1 | #' Plot the state probabilities from [find_regimes()] 2 | #' 3 | #' @param model A model returned by [find_regimes()]. 4 | #' @param probs A numeric vector of quantiles to plot the credible intervals at. 5 | #' Defaults to `c(0.05, 0.95)`. 6 | #' @param type Whether to plot the probabilities (default) or means. 7 | #' @param regime_prob_threshold The probability density that must be above 0.5. 8 | #' Defaults to 0.9 before we classify a regime (only affects `"means"` plot). 9 | #' @param plot_prob_indices Optional indices of probability plots to plot. 10 | #' Defaults to showing all. 11 | #' @param flip_regimes Optional whether to flip regimes in plots, defaults to FALSE 12 | #' @details Note that the original timeseries data (dots) are shown scaled 13 | #' between 0 and 1. 14 | #' @importFrom dplyr "%>%" 15 | #' @importFrom rlang .data 16 | #' @export 17 | #' @examples 18 | #' \donttest{ 19 | #' data(Nile) 20 | #' m <- fit_regimes(log(Nile), n_regimes = 2, chains = 1, iter = 50) 21 | #' plot_regime_model(m) 22 | #' plot_regime_model(m, plot_prob_indices = c(2)) 23 | #' plot_regime_model(m, type = "means") 24 | #' } 25 | #' 26 | plot_regime_model <- function(model, probs = c(0.05, 0.95), 27 | type = c("probability", "means"), 28 | regime_prob_threshold = 0.9, 29 | plot_prob_indices = NULL, 30 | flip_regimes = FALSE) { 31 | type <- match.arg(type) 32 | gamma_tk <- rstan::extract(model$model, pars = "gamma_tk")[[1]] 33 | mu_k <- rstan::extract(model$model, pars = "mu_k")[[1]] 34 | 35 | l <- apply(gamma_tk, 2:3, quantile, probs = probs[[1]]) 36 | u <- apply(gamma_tk, 2:3, quantile, probs = probs[[2]]) 37 | med <- apply(gamma_tk, 2:3, quantile, probs = 0.5) 38 | range01 <- function(x) (x - min(x)) / (max(x) - min(x)) 39 | mu_k_low <- apply(mu_k, 2, quantile, probs = probs[[1]]) 40 | mu_k_high <- apply(mu_k, 2, quantile, probs = probs[[2]]) 41 | mu_k <- apply(mu_k, 2, median) 42 | confident_regimes <- apply(gamma_tk, 2:3, function(x) { 43 | mean(x > 0.5) > regime_prob_threshold 44 | }) 45 | regime_indexes <- apply(confident_regimes, 1, function(x) { 46 | w <- which(x) 47 | if (length(w) == 0) NA else w 48 | }) 49 | 50 | if (flip_regimes) { 51 | mu_k <- 1 - mu_k 52 | u <- 1 - u 53 | l <- 1 - l 54 | med <- 1 - med 55 | } 56 | 57 | if (is.null(plot_prob_indices)) { 58 | # then plot all panels 59 | plot_prob_indices <- seq_len(ncol(med)) 60 | } 61 | 62 | if (type == "probability") { 63 | df_l <- reshape2::melt(l, varnames = c("Time", "State"), value.name = "lwr") 64 | df_u <- reshape2::melt(u, varnames = c("Time", "State"), value.name = "upr") 65 | df_m <- reshape2::melt(med, varnames = c("Time", "State"), value.name = "median") 66 | df_y <- data.frame(y = range01(model$y), Time = seq_along(model$y)) 67 | 68 | dplyr::inner_join(df_l, df_u, by = c("Time", "State")) %>% 69 | dplyr::inner_join(df_m, by = c("Time", "State")) %>% 70 | dplyr::filter(.data$State %in% plot_prob_indices) %>% 71 | dplyr::mutate(State = paste("State", .data$State)) %>% 72 | ggplot2::ggplot( 73 | ggplot2::aes_string("Time", y = "median", ymin = "lwr", ymax = "upr") 74 | ) + 75 | ggplot2::geom_ribbon(fill = "grey60") + 76 | ggplot2::geom_line(colour = "grey10", lwd = 0.8) + 77 | ggplot2::facet_wrap(~State) + 78 | ggplot2::coord_cartesian(expand = FALSE, ylim = c(0, 1)) + 79 | ggplot2::geom_point( 80 | data = df_y, 81 | ggplot2::aes_string(x = "Time", y = "y"), inherit.aes = FALSE 82 | ) + 83 | ggplot2::ylab("Probability") 84 | } else { 85 | plot(as.numeric(model$y), 86 | col = "#FF000070", pch = 3, ylab = "Time series value", 87 | xlab = "Time" 88 | ) 89 | if (!all(is.na(regime_indexes))) { 90 | for (i in seq_along(regime_indexes)) { 91 | segments( 92 | x0 = i - 0.5, x1 = i + 0.5, y0 = mu_k[regime_indexes[i]], 93 | y1 = mu_k[regime_indexes[i]] 94 | ) 95 | polygon(c(i - 0.5, i - 0.5, i + 0.5, i + 0.5), 96 | c( 97 | mu_k_low[regime_indexes[i]], mu_k_high[regime_indexes[i]], 98 | mu_k_high[regime_indexes[i]], mu_k_low[regime_indexes[i]] 99 | ), 100 | border = NA, col = "#00000050" 101 | ) 102 | } 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /R/plot_trends.R: -------------------------------------------------------------------------------- 1 | #' Plot the trends from a DFA 2 | #' 3 | #' @param rotated_modelfit Output from \code{\link{rotate_trends}} 4 | #' @param years Optional numeric vector of years for the plot 5 | #' @param highlight_outliers Logical. Should trend events 6 | #' that exceed the probability of occurring with a normal distribution as 7 | #' defined by \code{threshold} be highlighted? Defaults to FALSE 8 | #' @param threshold A probability threshold below which to 9 | #' flag trend events as extreme. Defaults to 0.01 10 | #' 11 | #' @export 12 | #' @seealso dfa_trends plot_loadings fit_dfa rotate_trends 13 | #' 14 | #' @importFrom ggplot2 geom_ribbon facet_wrap geom_point 15 | #' 16 | #' @examples 17 | #' set.seed(1) 18 | #' s <- sim_dfa(num_trends = 1) 19 | #' m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1) 20 | #' r <- rotate_trends(m) 21 | #' p <- plot_trends(r) 22 | #' print(p) 23 | plot_trends <- function(rotated_modelfit, 24 | years = NULL, 25 | highlight_outliers = FALSE, 26 | threshold = 0.01) { 27 | rotated <- rotated_modelfit 28 | df <- dfa_trends(rotated, years = years) 29 | 30 | # make faceted ribbon plot of trends 31 | p1 <- ggplot(df, aes_string(x = "time", y = "estimate")) + 32 | geom_ribbon(aes_string(ymin = "lower", ymax = "upper"), alpha = 0.4) + 33 | geom_line() + 34 | facet_wrap("trend_number") + 35 | xlab("Time") + 36 | ylab("") 37 | 38 | if (highlight_outliers) { 39 | swans <- find_swans(rotated, threshold = threshold) 40 | df$outliers <- swans$below_threshold 41 | p1 <- p1 + geom_point(data = df[which(df$outliers), ], color = "red") 42 | } 43 | 44 | p1 45 | } 46 | -------------------------------------------------------------------------------- /R/predicted.R: -------------------------------------------------------------------------------- 1 | #' Calculate predicted value from DFA object 2 | #' 3 | #' Pass in `rstanfit` model object. Returns array of predictions, dimensioned 4 | #' number of MCMC draws x number of MCMC chains x time series length x number of time series 5 | #' 6 | #' @param fitted_model Samples extracted (with `permuted = FALSE`) from a Stan 7 | #' model. E.g. output from [invert_chains()]. 8 | #' @export 9 | #' @examples 10 | #' \dontrun{ 11 | #' set.seed(42) 12 | #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) 13 | #' # only 1 chain and 1000 iterations used so example runs quickly: 14 | #' m <- fit_dfa(y = s$y_sim, iter = 2000, chains = 3, num_trends = 1) 15 | #' pred <- predicted(m) 16 | #' } 17 | predicted <- function(fitted_model) { 18 | Z <- rstan::extract(fitted_model$model, "Z", permuted = FALSE) 19 | x <- rstan::extract(fitted_model$model, "x", permuted = FALSE) 20 | Zperm <- rstan::extract(fitted_model$model, "Z", permuted = TRUE) 21 | xperm <- rstan::extract(fitted_model$model, "x", permuted = TRUE) 22 | 23 | n_ts <- dim(Zperm$Z)[2] 24 | n_y <- dim(xperm$x)[3] 25 | n_chains <- dim(Z)[2] 26 | n_trends <- dim(xperm$x)[2] 27 | n_mcmc <- dim(x)[1] 28 | 29 | pred <- array(0, c(n_mcmc, n_chains, n_y, n_ts)) 30 | for (i in 1:n_mcmc) { 31 | for (chain in 1:n_chains) { 32 | # for each MCMC draw / chain 33 | x_i <- t(matrix(x[i, chain, ], nrow = n_trends, ncol = n_y)) 34 | Z_i <- t(matrix(Z[i, chain, ], nrow = n_ts, ncol = n_trends)) 35 | pred[i, chain, , ] <- x_i %*% Z_i 36 | } 37 | } 38 | 39 | return(pred) 40 | } 41 | -------------------------------------------------------------------------------- /R/print.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @import methods 3 | print.bayesdfa <- function(x, ...) { 4 | base::print(x$monitor, digits = 2) 5 | } 6 | 7 | # test 8 | -------------------------------------------------------------------------------- /R/rotate_trends.R: -------------------------------------------------------------------------------- 1 | #' Rotate the trends from a DFA 2 | #' 3 | #' @param fitted_model Output from [fit_dfa()]. 4 | #' @param conf_level Probability level for CI. 5 | #' @param invert Whether to invert the trends and loadings for plotting purposes 6 | #' @importFrom stats median quantile sd 7 | #' 8 | #' @export 9 | #' 10 | #' @importFrom rstan extract 11 | #' @examples 12 | #' set.seed(42) 13 | #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) 14 | #' # only 1 chain and 800 iterations used so example runs quickly: 15 | #' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1) 16 | #' r <- rotate_trends(m) 17 | #' plot_trends(r) 18 | rotate_trends <- function(fitted_model, conf_level = 0.95, invert = FALSE) { 19 | 20 | # get the inverse of the rotation matrix 21 | n_mcmc <- dim(fitted_model$samples)[2] * dim(fitted_model$samples)[1] 22 | 23 | flip <- ifelse(invert == FALSE, 1, -1) 24 | 25 | temp <- reshape_samples(fitted_model$samples) 26 | Z <- temp$Z 27 | x <- temp$x 28 | n_ts <- dim(Z)[2] 29 | n_trends <- dim(x)[2] 30 | n_years <- dim(x)[3] 31 | 32 | # do rotation for each MCMC draw (slow) 33 | mcmc_trends_rot <- array(0, dim = c(n_mcmc, n_trends, n_years)) 34 | mcmc_Z_rot <- array(0, dim = c(n_mcmc, n_ts, n_trends)) 35 | if (n_trends > 1) { 36 | for (i in seq_len(n_mcmc)) { 37 | Zest <- Z[i, , ] 38 | H.inv <- varimax(Zest)$rotmat 39 | # rotate factor loadings 40 | Z.rot <- Zest %*% H.inv 41 | mcmc_Z_rot[i, , ] <- Z.rot 42 | # rotate trends 43 | states <- x[i, , ] 44 | trends.rot <- solve(H.inv) %*% states 45 | mcmc_trends_rot[i, , ] <- trends.rot 46 | } 47 | } 48 | if (n_trends == 1) { 49 | mcmc_Z_rot <- Z 50 | mcmc_trends_rot <- x 51 | # for (i in seq_len(n_mcmc)) { 52 | # # sometimes sampling may get stuck and oscillate within a 53 | # # chain -- catch those cases with post-hoc flipping 54 | # mcmc_Z_rot[i, , ] <- Z[i, , ] 55 | # mcmc_trends_rot[i, , ] <- x[i, , ] 56 | # # use first draw as arbitrary reference 57 | # if(sum((-mcmc_trends_rot[i, , ] - mcmc_trends_rot[1, , ])^2) < sum((mcmc_trends_rot[i, , ] - mcmc_trends_rot[1, , ])^2)) { 58 | # # then flip 59 | # mcmc_Z_rot[i, , ] <- -mcmc_Z_rot[i, , ] 60 | # mcmc_trends_rot[i, , ] <- -mcmc_trends_rot[i, , ] 61 | # } 62 | # } 63 | } 64 | 65 | list( 66 | Z_rot = flip * mcmc_Z_rot, 67 | trends = flip * mcmc_trends_rot, 68 | Z_rot_mean = apply(flip * mcmc_Z_rot, c(2, 3), mean), 69 | Z_rot_median = apply(flip * mcmc_Z_rot, c(2, 3), median), 70 | trends_mean = apply(flip * mcmc_trends_rot, c(2, 3), mean), 71 | trends_median = apply(flip * mcmc_trends_rot, c(2, 3), median), 72 | trends_lower = apply( 73 | flip * mcmc_trends_rot, c(2, 3), 74 | quantile, (1 - conf_level) / 2 75 | ), 76 | trends_upper = apply( 77 | flip * mcmc_trends_rot, c(2, 3), 78 | quantile, 1 - (1 - conf_level) / 2 79 | ) 80 | ) 81 | } 82 | 83 | # Reshape samples: 84 | reshape_samples <- function(samp) { 85 | s <- reshape2::melt(samp) 86 | z <- dplyr::filter(s, grepl("Z\\[", .data$parameters)) 87 | z$trend <- as.numeric(gsub("Z\\[[0-9]+,([0-9]+)\\]", "\\1", z$parameters)) 88 | z$ts <- as.numeric(gsub("Z\\[([0-9]+),([0-9]+)\\]", "\\1", z$parameters)) 89 | Z <- reshape2::acast(z, iterations + chains ~ ts ~ trend, value.var = "value") 90 | x <- dplyr::filter(s, grepl("x\\[", .data$parameters)) 91 | x$trend <- as.numeric(gsub("x\\[([0-9]+),([0-9]+)\\]", "\\1", x$parameters)) 92 | x$time <- as.numeric(gsub("x\\[([0-9]+),([0-9]+)\\]", "\\2", x$parameters)) 93 | x <- reshape2::acast(x, iterations + chains ~ trend ~ time, value.var = "value") 94 | list(Z = Z, x = x) 95 | } 96 | -------------------------------------------------------------------------------- /R/sim.R: -------------------------------------------------------------------------------- 1 | #' Simulate from a DFA 2 | #' 3 | #' @param num_trends The number of trends. 4 | #' @param num_years The number of years. 5 | #' @param num_ts The number of timeseries. 6 | #' @param loadings_matrix A loadings matrix. The number of rows should match the 7 | #' number of timeseries and the number of columns should match the number of 8 | #' trends. Note that this loadings matrix will be internally manipulated by 9 | #' setting some elements to 0 and constraining some elements to 1 so that the 10 | #' model can be fitted. See [fit_dfa()]. See the outfit element `Z` in 11 | #' the returned list is to see the manipulated loadings matrix. If not 12 | #' specified, a random matrix `~ N(0, 1)` is used. 13 | #' @param sigma A vector of standard deviations on the observation error. Should 14 | #' be of the same length as the number of trends. If not specified, random 15 | #' numbers are used `rlnorm(1, meanlog = log(0.2), 0.1)`. 16 | #' @param varIndx Indices of unique observation variances. Defaults to `c(1, 1, 17 | #' 1, 1)`. Unique observation error variances would be specified as `c(1, 2, 3, 18 | #' 4)` in the case of 4 time series. 19 | #' @param trend_model The type of trend model. Random walk (`"rw"`) or basis 20 | #' spline (`"bs"`) 21 | #' @param spline_weights A matrix of basis function weights that is used 22 | #' if `trend_model = "bs"`. The number of columns should correspond to 23 | #' the number of knots and the number of rows should correspond to the 24 | #' number of trends. 25 | #' @param extreme_value Value added to the random walk in the extreme time step. 26 | #' Defaults to not included. 27 | #' @param extreme_loc Location of single extreme event in the process. The same 28 | #' for all processes, and defaults to `round(n_t/2)` where `n_t` is the time 29 | #' series length 30 | #' @param nu_fixed Nu is the degrees of freedom parameter for the 31 | #' t-distribution, defaults to 100, which is effectively normal. 32 | #' @param user_supplied_deviations An optional matrix of deviations for the trend 33 | #' random walks. Columns are for trends and rows are for each time step. 34 | #' @export 35 | #' @return A list with the following elements: `y_sim` is the simulated data, 36 | #' pred is the true underlying data without observation error added, `x` is 37 | #' the underlying trends, `Z` is the manipulated loadings matrix that is fed 38 | #' to the model. 39 | #' @importFrom stats rlnorm rnorm rt 40 | #' @importFrom splines splineDesign 41 | #' @examples 42 | #' x <- sim_dfa(num_trends = 2) 43 | #' names(x) 44 | #' matplot(t(x$y_sim), type = "l") 45 | #' matplot(t(x$x), type = "l") 46 | #' 47 | #' set.seed(42) 48 | #' x <- sim_dfa(extreme_value = -4, extreme_loc = 10) 49 | #' matplot(t(x$x), type = "l") 50 | #' abline(v = 10) 51 | #' matplot(t(x$pred), type = "l") 52 | #' abline(v = 10) 53 | #' 54 | #' set.seed(42) 55 | #' x <- sim_dfa() 56 | #' matplot(t(x$x), type = "l") 57 | #' abline(v = 10) 58 | #' matplot(t(x$pred), type = "l") 59 | #' abline(v = 10) 60 | #' @export 61 | 62 | sim_dfa <- function(num_trends = 1, 63 | num_years = 20, 64 | num_ts = 4, 65 | loadings_matrix = matrix( 66 | nrow = num_ts, ncol = num_trends, 67 | rnorm(num_ts * num_trends, 0, 1) 68 | ), 69 | sigma = rlnorm(1, meanlog = log(0.2), 0.1), 70 | varIndx = rep(1, num_ts), 71 | trend_model = c("rw", "bs"), 72 | spline_weights = matrix(ncol = 6, nrow = num_trends, 73 | data = rnorm(6 * num_trends)), 74 | extreme_value = NULL, 75 | extreme_loc = NULL, 76 | nu_fixed = 100, 77 | user_supplied_deviations = NULL) { 78 | y_ignore <- matrix(rnorm(num_ts * num_years), nrow = num_ts, ncol = num_years) 79 | 80 | trend_model <- match.arg(trend_model) 81 | d <- fit_dfa(y_ignore, 82 | num_trends = num_trends, estimation = "none", scale = "center", 83 | varIndx = varIndx, nu_fixed = nu_fixed, trend_model = "rw" 84 | ) 85 | 86 | Z <- loadings_matrix 87 | y <- vector(mode = "numeric", length = d$sampling_args$data$N) 88 | 89 | for (k in seq_len(d$sampling_args$data$K)) { 90 | Z[k, k] <- abs(Z[k, k]) # add constraint for Z diagonal 91 | } 92 | # fill in 0s 93 | for (k in seq_len(d$sampling_args$data$K)) { 94 | for (p in seq_len(d$sampling_args$data$P)) { 95 | if (p < k) Z[p, k] <- 0 96 | } 97 | } 98 | 99 | x <- matrix(nrow = d$sampling_args$data$K, ncol = d$sampling_args$data$N) # random walk-trends 100 | 101 | if (trend_model == "rw") { 102 | # initial state for each trend 103 | for (k in seq_len(d$sampling_args$data$K)) { 104 | if (!is.null(user_supplied_deviations)) { 105 | devs <- user_supplied_deviations[, k] 106 | } else { 107 | devs <- rt(d$sampling_args$data$N, df = d$sampling_args$data$nu_fixed) 108 | } 109 | 110 | x[k, 1] <- rnorm(1, 0, 1) 111 | if (is.null(extreme_value)) { 112 | for (t in seq(2, d$sampling_args$data$N)) { 113 | x[k, t] <- x[k, t - 1] + devs[t] # random walk 114 | } 115 | } else { 116 | if (is.null(extreme_loc)) extreme_loc <- round(num_years / 2) 117 | for (t in 2:(extreme_loc - 1)) { 118 | x[k, t] <- x[k, t - 1] + devs[t] # random walk 119 | } 120 | # only include extreme in first trend 121 | if (k == 1) { 122 | x[1, extreme_loc] <- x[1, extreme_loc - 1] + extreme_value 123 | } else { 124 | x[k, extreme_loc] <- x[k, extreme_loc - 1] + devs[t] 125 | } 126 | for (t in seq(extreme_loc + 1, d$sampling_args$data$N)) { 127 | x[k, t] <- x[k, t - 1] + devs[t] # random walk 128 | } 129 | } 130 | } 131 | } else if (trend_model == "bs") { 132 | # num_years <- 25 133 | # spline_weights <- matrix(ncol = 7, nrow = 3, data = rnorm(21)) 134 | df <- ncol(spline_weights) 135 | degree <- 3 136 | intercept <- 1L 137 | # adapted from splines::bs 138 | ord <- 1 + degree 139 | Boundary.knots <- c(1, num_years) 140 | nIknots <- df - ord + (1L - intercept) 141 | knots <- seq(0, 1, length.out = nIknots + 2L)[-c(1L, nIknots + 2L)] 142 | knots <- quantile(seq(1, num_years), knots) 143 | Aknots <- sort(c(rep(Boundary.knots, ord), knots)) 144 | X_spline <- t(splineDesign(Aknots, x = seq(1, num_years), ord)) 145 | x <- spline_weights %*% X_spline 146 | # matplot(t(x), type = "l", lty = 1) 147 | } else { 148 | stop("Trend model not defined", call. = FALSE) 149 | } 150 | 151 | pred <- Z %*% x 152 | for (i in seq_len(d$sampling_args$data$n_pos)) { 153 | y[i] <- rnorm( 154 | 1, pred[d$sampling_args$data$row_indx_pos[i], d$sampling_args$data$col_indx_pos[i]], 155 | sigma[d$sampling_args$data$varIndx[d$sampling_args$data$row_indx_pos[i]]] 156 | ) 157 | } 158 | y_sim <- matrix(y, nrow = d$sampling_args$data$P) 159 | list(y_sim = y_sim, pred = pred, x = x, Z = Z, sigma = sigma) 160 | } 161 | -------------------------------------------------------------------------------- /R/stanmodels.R: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | # names of stan models 4 | stanmodels <- c("corr", "dfa", "hmm_gaussian", "regime_1") 5 | 6 | # load each stan module 7 | Rcpp::loadModule("stan_fit4corr_mod", what = TRUE) 8 | Rcpp::loadModule("stan_fit4dfa_mod", what = TRUE) 9 | Rcpp::loadModule("stan_fit4hmm_gaussian_mod", what = TRUE) 10 | Rcpp::loadModule("stan_fit4regime_1_mod", what = TRUE) 11 | 12 | # instantiate each stanmodel object 13 | stanmodels <- sapply(stanmodels, function(model_name) { 14 | # create C++ code for stan model 15 | stan_file <- if(dir.exists("stan")) "stan" else file.path("inst", "stan") 16 | stan_file <- file.path(stan_file, paste0(model_name, ".stan")) 17 | stanfit <- rstan::stanc_builder(stan_file, 18 | allow_undefined = TRUE, 19 | obfuscate_model_name = FALSE) 20 | stanfit$model_cpp <- list(model_cppname = stanfit$model_name, 21 | model_cppcode = stanfit$cppcode) 22 | # create stanmodel object 23 | methods::new(Class = "stanmodel", 24 | model_name = stanfit$model_name, 25 | model_code = stanfit$model_code, 26 | model_cpp = stanfit$model_cpp, 27 | mk_cppmodule = function(x) get(paste0("rstantools_model_", model_name))) 28 | }) 29 | -------------------------------------------------------------------------------- /R/trend_cor.R: -------------------------------------------------------------------------------- 1 | #' Estimate the correlation between a DFA trend and some other timeseries 2 | #' 3 | #' Fully incorporates the uncertainty from the posterior of the DFA trend 4 | #' 5 | #' @param rotated_modelfit Output from [rotate_trends()]. 6 | #' @param y A numeric vector to correlate with the DFA trend. Must be the same 7 | #' length as the DFA trend. 8 | #' @param trend A number corresponding to which trend to use, defaults to 1. 9 | #' @param time_window Indices indicating a time window slice to use in the 10 | #' correlation. Defaults to using the entire time window. Can be used to walk 11 | #' through the timeseries and test the cross correlations. 12 | #' @param trend_samples The number of samples from the trend posterior to use. A 13 | #' model will be run for each trend sample so this value shouldn't be too 14 | #' large. Defaults to 100. 15 | #' @param stan_iter The number of samples from the posterior with each Stan 16 | #' model run, defaults to 300. 17 | #' @param stan_chains The number of chains for each Stan model run, defaults to 18 | #' 1. 19 | #' @param ... Other arguments to pass to \code{\link[rstan]{sampling}} 20 | #' 21 | #' @details Uses a `sigma ~ half_t(3, 0, 2)` prior on the residual standard 22 | #' deviation and a `uniform(-1, 1)` prior on the correlation coefficient. 23 | #' Fitted as a linear regression of `y ~ x`, where y represents the `y` 24 | #' argument to [trend_cor()] and `x` represents the DFA trend, and both `y` 25 | #' and `x` have been scaled by subtracting their means and dividing by their 26 | #' standard deviations. Samples are drawn from the posterior of the trend and 27 | #' repeatedly fed through the Stan regression to come up with a combined 28 | #' posterior of the correlation. 29 | #' 30 | #' @return A numeric vector of samples from the correlation coefficient 31 | #' posterior. 32 | #' 33 | #' @examples 34 | #' set.seed(1) 35 | #' s <- sim_dfa(num_trends = 1, num_years = 15) 36 | #' m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1) 37 | #' r <- rotate_trends(m) 38 | #' n_years <- ncol(r$trends[, 1, ]) 39 | #' fake_dat <- rnorm(n_years, 0, 1) 40 | #' correlation <- trend_cor(r, fake_dat, trend_samples = 25) 41 | #' hist(correlation) 42 | #' correlation <- trend_cor(r, 43 | #' y = fake_dat, time_window = 5:15, 44 | #' trend_samples = 25 45 | #' ) 46 | #' hist(correlation) 47 | #' @export 48 | 49 | trend_cor <- function(rotated_modelfit, 50 | y, 51 | trend = 1, 52 | time_window = seq_len(length(y)), 53 | trend_samples = 100, 54 | stan_iter = 300, 55 | stan_chains = 1, 56 | ...) { 57 | 58 | 59 | # must be even to cleanly divide by 2 later: 60 | if (!stan_iter %% 2 == 0) stan_iter <- stan_iter + 1 61 | if (max(time_window) > length(y)) stop("Maximum time window value is too large") 62 | 63 | y <- as.numeric(scale(y[time_window])) 64 | x <- rotated_modelfit$trends[, trend, time_window] # samples x trend x time 65 | 66 | if (!identical(ncol(x), length(y))) stop("DFA trend and y must be same length") 67 | 68 | samples <- sample(seq_len(nrow(x)), size = trend_samples) 69 | 70 | out <- vapply(seq_len(length(samples)), FUN = function(i) { 71 | xi <- as.numeric(scale(as.numeric(x[samples[i], ]))) 72 | m <- rstan::sampling( 73 | object = stanmodels$corr, 74 | data = list(x = xi, y = y, N = length(y)), 75 | iter = stan_iter, chains = stan_chains, warmup = stan_iter / 2, ... 76 | ) 77 | rstan::extract(m, pars = "beta")[["beta"]] 78 | }, FUN.VALUE = numeric(length = stan_iter / 2)) 79 | as.numeric(out) 80 | } 81 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "README-figs/", 14 | cache.path = "README-cache/" 15 | ) 16 | ``` 17 | 18 | # bayesdfa 19 | 20 | [![R build status](https://github.com/fate-ewi/bayesdfa/workflows/R-CMD-check/badge.svg)](https://github.com/fate-ewi/bayesdfa/actions) 21 | 22 | bayesdfa implements Bayesian Dynamic Factor Analysis (DFA) with Stan. 23 | 24 | You can install the development version of the package with: 25 | 26 | ```{r, eval=FALSE} 27 | # install.packages("devtools") 28 | devtools::install_github("fate-ewi/bayesdfa") 29 | ``` 30 | 31 | ## Overview 32 | 33 | A brief video overview of the package is here, 34 | 35 |
36 | 37 |
38 | 39 | ## Vignettes 40 | We've put together several vignettes for using the `bayesdfa` package. 41 | [Overview](https://fate-ewi.github.io/bayesdfa/articles/a1_bayesdfa.html) 42 | [Combining data](https://fate-ewi.github.io/bayesdfa/articles/a2_combining_data.html) 43 | [Including covariates](https://fate-ewi.github.io/bayesdfa/articles/a3_covariates.html) 44 | [Smooth trend models](https://fate-ewi.github.io/bayesdfa/articles/a4_smooth.html) 45 | [Estimating process variance](https://fate-ewi.github.io/bayesdfa/articles/a5_estimate_process_sigma.html) 46 | [Compositional models](https://fate-ewi.github.io/bayesdfa/articles/a6_compositional.html) 47 | [DFA for big data](https://fate-ewi.github.io/bayesdfa/articles/a7_bigdata.html). 48 | 49 | Additional examples can be found in the course that Eli Holmes, Mark Scheuerell, and Eric Ward teach at the University of Washington: 50 | [Course webpage](https://nwfsc-timeseries.github.io/atsa/) 51 | [Lab book](https://nwfsc-timeseries.github.io/atsa/) 52 | 53 | ## Citing 54 | 55 | For DFA models in general, we recommend citing the MARSS package or user guide. 56 | 57 | ``` 58 | @article{marss_package, 59 | title = {{MARSS}: multivariate autoregressive state-space models for analyzing time-series data}, 60 | volume = {4}, 61 | url = {https://pdfs.semanticscholar.org/5d41/b86dff5f977a0eac426a924cf7917220fc9a.pdf}, 62 | number = {1}, 63 | journal = {R Journal}, 64 | author = {Holmes, E.E. and Ward, Eric J. and Wills, K.}, 65 | year = {2012}, 66 | pages = {11--19} 67 | } 68 | ``` 69 | 70 | ``` 71 | @article{marss_user_guide, 72 | title = {{MARSS}: Analysis of multivariate timeseries using the MARSS package}, 73 | url = {https://cran.r-project.org/web/packages/MARSS/vignettes/UserGuide.pdf}, 74 | author = {Holmes, E.E. and Scheurell, M.D. and Ward, Eric J.}, 75 | year = {2020}, 76 | } 77 | ``` 78 | 79 | For citing the `bayesdfa` package using Bayesian estimation, or models with 80 | extra features (such as extremes), cite 81 | 82 | 83 | 84 | ``` 85 | @article{ward_etal_2019, 86 | author = {Eric J. Ward and Sean C. Anderson and Luis A. Damiano and 87 | Mary E. Hunsicker and Michael A. Litzow}, 88 | title = {{Modeling regimes with extremes: the bayesdfa package for 89 | identifying and forecasting common trends and anomalies in 90 | multivariate time-series data}}, 91 | year = {2019}, 92 | journal = {{The R Journal}}, 93 | doi = {10.32614/RJ-2019-007}, 94 | url = {https://journal.r-project.org/archive/2019/RJ-2019-007/index.html} 95 | } 96 | ``` 97 | 98 | ### Applications 99 | 100 | The 'bayesdfa' models were presented to the PFMC's SSC in November 2017 and have been included in the 2018 California Current Integrated Ecosystem Report, https://www.integratedecosystemassessment.noaa.gov/Assets/iea/california/Report/pdf/CCIEA-status-report-2018.pdf 101 | 102 | ### Funding 103 | The 'bayesdfa' package was funded by a NOAA Fisheries and the Environment (FATE) grant on early warning indicators, led by Mary Hunsicker and Mike Litzow. 104 | 105 | ### NOAA Disclaimer 106 | 107 | This repository is a scientific product and is not official communication of the National Oceanic and Atmospheric Administration, or the United States Department of Commerce. All NOAA GitHub project code is provided on an ‘as is’ basis and the user assumes responsibility for its use. Any claims against the Department of Commerce or Department of Commerce bureaus stemming from the use of this GitHub project will be governed by all applicable Federal law. Any reference to specific commercial products, processes, or services by service mark, trademark, manufacturer, or otherwise, does not constitute or imply their endorsement, recommendation or favoring by the Department of Commerce. The Department of Commerce seal and logo, or the seal and logo of a DOC bureau, shall not be used in any manner to imply endorsement of any commercial product or activity by DOC or the United States Government. 108 | 109 | NOAA Fisheries 110 | 111 | [U.S. Department of Commerce](https://www.commerce.gov/) | [National Oceanographic and Atmospheric Administration](https://www.noaa.gov) | [NOAA Fisheries](https://www.fisheries.noaa.gov/) 112 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # bayesdfa 4 | 5 | [![R build 6 | status](https://github.com/fate-ewi/bayesdfa/workflows/R-CMD-check/badge.svg)](https://github.com/fate-ewi/bayesdfa/actions) 7 | 8 | bayesdfa implements Bayesian Dynamic Factor Analysis (DFA) with Stan. 9 | 10 | You can install the development version of the package with: 11 | 12 | ``` r 13 | # install.packages("devtools") 14 | devtools::install_github("fate-ewi/bayesdfa") 15 | ``` 16 | 17 | ## Overview 18 | 19 | A brief video overview of the package is here, 20 | 21 |
22 | 24 |
25 | 26 | ## Vignettes 27 | 28 | We’ve put together several vignettes for using the `bayesdfa` package. 29 | [Overview](https://fate-ewi.github.io/bayesdfa/articles/a1_bayesdfa.html) 30 | [Combining 31 | data](https://fate-ewi.github.io/bayesdfa/articles/a2_combining_data.html) 32 | [Including 33 | covariates](https://fate-ewi.github.io/bayesdfa/articles/a3_covariates.html) 34 | [Smooth trend 35 | models](https://fate-ewi.github.io/bayesdfa/articles/a4_smooth.html) 36 | [Estimating process 37 | variance](https://fate-ewi.github.io/bayesdfa/articles/a5_estimate_process_sigma.html) 38 | [Compositional 39 | models](https://fate-ewi.github.io/bayesdfa/articles/a6_compositional.html) 40 | [DFA for big 41 | data](https://fate-ewi.github.io/bayesdfa/articles/a7_bigdata.html). 42 | 43 | Additional examples can be found in the course that Eli Holmes, Mark 44 | Scheuerell, and Eric Ward teach at the University of Washington: 45 | [Course webpage](https://nwfsc-timeseries.github.io/atsa/) 46 | [Lab book](https://nwfsc-timeseries.github.io/atsa/) 47 | 48 | ## Citing 49 | 50 | For DFA models in general, we recommend citing the MARSS package or user 51 | guide. 52 | 53 | @article{marss_package, 54 | title = {{MARSS}: multivariate autoregressive state-space models for analyzing time-series data}, 55 | volume = {4}, 56 | url = {https://pdfs.semanticscholar.org/5d41/b86dff5f977a0eac426a924cf7917220fc9a.pdf}, 57 | number = {1}, 58 | journal = {R Journal}, 59 | author = {Holmes, E.E. and Ward, Eric J. and Wills, K.}, 60 | year = {2012}, 61 | pages = {11--19} 62 | } 63 | 64 | @article{marss_user_guide, 65 | title = {{MARSS}: Analysis of multivariate timeseries using the MARSS package}, 66 | url = {https://cran.r-project.org/web/packages/MARSS/vignettes/UserGuide.pdf}, 67 | author = {Holmes, E.E. and Scheurell, M.D. and Ward, Eric J.}, 68 | year = {2020}, 69 | } 70 | 71 | For citing the `bayesdfa` package using Bayesian estimation, or models 72 | with extra features (such as extremes), cite 73 | 74 | 75 | 76 | @article{ward_etal_2019, 77 | author = {Eric J. Ward and Sean C. Anderson and Luis A. Damiano and 78 | Mary E. Hunsicker and Michael A. Litzow}, 79 | title = {{Modeling regimes with extremes: the bayesdfa package for 80 | identifying and forecasting common trends and anomalies in 81 | multivariate time-series data}}, 82 | year = {2019}, 83 | journal = {{The R Journal}}, 84 | doi = {10.32614/RJ-2019-007}, 85 | url = {https://journal.r-project.org/archive/2019/RJ-2019-007/index.html} 86 | } 87 | 88 | ### Applications 89 | 90 | The ‘bayesdfa’ models were presented to the PFMC’s SSC in November 2017 91 | and have been included in the 2018 California Current Integrated 92 | Ecosystem Report, 93 | 94 | 95 | ### Funding 96 | 97 | The ‘bayesdfa’ package was funded by a NOAA Fisheries and the 98 | Environment (FATE) grant on early warning indicators, led by Mary 99 | Hunsicker and Mike Litzow. 100 | 101 | ### NOAA Disclaimer 102 | 103 | This repository is a scientific product and is not official 104 | communication of the National Oceanic and Atmospheric Administration, or 105 | the United States Department of Commerce. All NOAA GitHub project code 106 | is provided on an ‘as is’ basis and the user assumes responsibility for 107 | its use. Any claims against the Department of Commerce or Department of 108 | Commerce bureaus stemming from the use of this GitHub project will be 109 | governed by all applicable Federal law. Any reference to specific 110 | commercial products, processes, or services by service mark, trademark, 111 | manufacturer, or otherwise, does not constitute or imply their 112 | endorsement, recommendation or favoring by the Department of Commerce. 113 | The Department of Commerce seal and logo, or the seal and logo of a DOC 114 | bureau, shall not be used in any manner to imply endorsement of any 115 | commercial product or activity by DOC or the United States Government. 116 | 117 | NOAA Fisheries 118 | 119 | [U.S. Department of Commerce](https://www.commerce.gov/) \| [National 120 | Oceanographic and Atmospheric Administration](https://www.noaa.gov) \| 121 | [NOAA Fisheries](https://www.fisheries.noaa.gov/) 122 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | reference: 2 | - title: "Using the bayesdfa package" 3 | desc: > 4 | Functions for fitting Bayesian dynamic factor analyses 5 | - contents: 6 | - fit_dfa 7 | - sim_dfa 8 | - find_dfa_trends 9 | - dfa_cv 10 | - title: "Diagnostics for fitted models" 11 | desc: > 12 | Functions for evaluating convergence of DFA moddels 13 | - contents: 14 | - find_swans 15 | - is_converged 16 | - loo 17 | - predicted 18 | - rotate_trends 19 | - trend_cor 20 | - title: "Extracting DFA output" 21 | desc: > 22 | Functions for extracting common outputs 23 | - contents: 24 | - dfa_fitted 25 | - dfa_loadings 26 | - dfa_trends 27 | - title: "Univariate Hidden Markov Models" 28 | desc: > 29 | Functions for evaluating regimes with univariate HMMs 30 | - contents: 31 | - hmm_init 32 | - find_regimes 33 | - fit_regimes 34 | - title: "Plotting" 35 | desc: > 36 | Functions for plotting DFA and HMM models 37 | - contents: 38 | - plot_fitted 39 | - plot_loadings 40 | - plot_regime_model 41 | - plot_trends 42 | - title: internal 43 | contents: 44 | - bayesdfa-package 45 | - find_inverted_chains 46 | - invert_chains 47 | -------------------------------------------------------------------------------- /bayesdfa.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageBuildBinaryArgs: --staged-install 22 | PackageRoxygenize: rd,collate,namespace,vignette 23 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | #! /bin/sh 4 | "${R_HOME}/bin/Rscript" -e "rstantools::rstan_config()" 5 | -------------------------------------------------------------------------------- /configure.win: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | #! /bin/sh 4 | "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "rstantools::rstan_config()" 5 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | 3 | * local OS X install, R 3.6.0 (and more recent) 4 | * ubuntu 14.04.5 LTSR (on travis-ci), R Under development (unstable) 5 | * win-builder (devel and release) 6 | 7 | ## R CMD check results 8 | 9 | 0 errors | 1 warnings | 2 notes 10 | 11 | * installed size is 7.3Mb; sub-directories of 1Mb or more: libs 5.8Mb 12 | * GNU make is a SystemRequirements 13 | * Warning A complete check needs the 'checkbashisms' script 14 | 15 | Explanation: this is from the compiled 'Stan' model and associated libraries, and is necessary https://mc-stan.org/rstantools/articles/minimal-rstan-package.html. 16 | 17 | Note that we have an associated manuscript accepted in The R Journal. Citation: 18 | Eric J. Ward, Sean C. Anderson, Luis A. Damiano, Mary E. Hunsicker and Michael A. Litzow , The R Journal (2019) 11:2, pages 46-55, https://journal.r-project.org/archive/2019/RJ-2019-007/RJ-2019-007.pdf 19 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • bayesdfa 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 20 | 21 | 22 | 23 | 24 |
25 |
92 | 93 | 94 | 95 | 96 |
97 |
98 | 101 | 102 | Content not found. Please use links in the navbar. 103 | 104 |
105 | 106 | 110 | 111 |
112 | 113 | 114 | 115 |
119 | 120 |
121 |

122 |

Site built with pkgdown 2.0.9.

123 |
124 | 125 |
126 |
127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/fit-extreme-dfa-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/fit-extreme-dfa-1.png -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/plot-1-fitted-example-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/plot-1-fitted-example-1.png -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/plot-1-trend-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/plot-1-trend-1.png -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/plot-2-fitted-example-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/plot-2-fitted-example-1.png -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/plot-extreme-loadings-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/plot-extreme-loadings-1.png -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/plot-loadings-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/plot-loadings-1.png -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/simulate-data-plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/simulate-data-plot-1.png -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/simulate-data-plot2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/simulate-data-plot2-1.png -------------------------------------------------------------------------------- /docs/articles/a1_bayesdfa_files/figure-html/simulate-data-plot3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a1_bayesdfa_files/figure-html/simulate-data-plot3-1.png -------------------------------------------------------------------------------- /docs/articles/a2_combining_data_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a2_combining_data_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/a2_combining_data_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a2_combining_data_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/a2_combining_data_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a2_combining_data_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/articles/a2_combining_data_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a2_combining_data_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/a3_covariates_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a3_covariates_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/a3_covariates_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a3_covariates_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/a4_smooth_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a4_smooth_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/a5_estimate_process_sigma_files/figure-html/simulate-data-plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a5_estimate_process_sigma_files/figure-html/simulate-data-plot-1.png -------------------------------------------------------------------------------- /docs/articles/a6_compositional_files/figure-html/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a6_compositional_files/figure-html/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /docs/articles/a6_compositional_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a6_compositional_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/a7_bigdata_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/articles/a7_bigdata_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | Articles • bayesdfa 6 | 7 | 8 |
9 |
68 | 69 | 70 | 71 |
98 | 99 | 100 |
103 | 104 |
105 |

Site built with pkgdown 2.0.9.

106 |
107 | 108 |
109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/extra.css: -------------------------------------------------------------------------------- 1 | @import url("https://nmfs-general-modeling-tools.github.io/nmfspalette/extra.css"); 2 | -------------------------------------------------------------------------------- /docs/extra.ss: -------------------------------------------------------------------------------- 1 | @import url("https://nmfs-general-modeling-tools.github.io/nmfspalette/extra.css"); 2 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $("div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 3.1.11 2 | pkgdown: 2.0.9 3 | pkgdown_sha: ~ 4 | articles: 5 | a1_bayesdfa: a1_bayesdfa.html 6 | a2_combining_data: a2_combining_data.html 7 | a3_covariates: a3_covariates.html 8 | a4_smooth: a4_smooth.html 9 | a5_estimate_process_sigma: a5_estimate_process_sigma.html 10 | a6_compositional: a6_compositional.html 11 | a7_bigdata: a7_bigdata.html 12 | last_built: 2024-10-20T20:49Z 13 | 14 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/Rplot002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/Rplot002.png -------------------------------------------------------------------------------- /docs/reference/Rplot003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/Rplot003.png -------------------------------------------------------------------------------- /docs/reference/Rplot004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/Rplot004.png -------------------------------------------------------------------------------- /docs/reference/Rplot005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/Rplot005.png -------------------------------------------------------------------------------- /docs/reference/Rplot006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/Rplot006.png -------------------------------------------------------------------------------- /docs/reference/bayesdfa-package.html: -------------------------------------------------------------------------------- 1 | 2 | The 'bayesdfa' package. — bayesdfa-package • bayesdfa 6 | 7 | 8 |
    9 |
    68 | 69 | 70 | 71 |
    72 |
    73 | 78 | 79 |
    80 |

    A DESCRIPTION OF THE PACKAGE

    81 |
    82 | 83 | 84 |
    85 |

    References

    86 |

    Stan Development Team (2020). RStan: the R interface to Stan. R package version 2.21.2. https://mc-stan.org

    87 |
    88 | 89 |
    90 | 93 |
    94 | 95 | 96 |
    99 | 100 |
    101 |

    Site built with pkgdown 2.0.9.

    102 |
    103 | 104 |
    105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/find_inverted_chains-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/find_inverted_chains-1.png -------------------------------------------------------------------------------- /docs/reference/find_swans-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/find_swans-1.png -------------------------------------------------------------------------------- /docs/reference/find_swans-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/find_swans-2.png -------------------------------------------------------------------------------- /docs/reference/find_swans-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/find_swans-3.png -------------------------------------------------------------------------------- /docs/reference/plot_fitted-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_fitted-1.png -------------------------------------------------------------------------------- /docs/reference/plot_fitted-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_fitted-2.png -------------------------------------------------------------------------------- /docs/reference/plot_loadings-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_loadings-1.png -------------------------------------------------------------------------------- /docs/reference/plot_loadings-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_loadings-2.png -------------------------------------------------------------------------------- /docs/reference/plot_loadings-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_loadings-3.png -------------------------------------------------------------------------------- /docs/reference/plot_loadings-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_loadings-4.png -------------------------------------------------------------------------------- /docs/reference/plot_regime_model-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_regime_model-1.png -------------------------------------------------------------------------------- /docs/reference/plot_regime_model-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_regime_model-2.png -------------------------------------------------------------------------------- /docs/reference/plot_regime_model-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_regime_model-3.png -------------------------------------------------------------------------------- /docs/reference/plot_trends-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/plot_trends-1.png -------------------------------------------------------------------------------- /docs/reference/rotate_trends-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/rotate_trends-1.png -------------------------------------------------------------------------------- /docs/reference/sim_dfa-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/sim_dfa-1.png -------------------------------------------------------------------------------- /docs/reference/sim_dfa-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/sim_dfa-2.png -------------------------------------------------------------------------------- /docs/reference/sim_dfa-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/sim_dfa-3.png -------------------------------------------------------------------------------- /docs/reference/sim_dfa-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/sim_dfa-4.png -------------------------------------------------------------------------------- /docs/reference/sim_dfa-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/sim_dfa-5.png -------------------------------------------------------------------------------- /docs/reference/sim_dfa-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/sim_dfa-6.png -------------------------------------------------------------------------------- /docs/reference/trend_cor-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/trend_cor-1.png -------------------------------------------------------------------------------- /docs/reference/trend_cor-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/docs/reference/trend_cor-2.png -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /404.html 5 | 6 | 7 | /articles/a1_bayesdfa.html 8 | 9 | 10 | /articles/a2_combining_data.html 11 | 12 | 13 | /articles/a3_covariates.html 14 | 15 | 16 | /articles/a4_smooth.html 17 | 18 | 19 | /articles/a5_estimate_process_sigma.html 20 | 21 | 22 | /articles/a6_compositional.html 23 | 24 | 25 | /articles/a7_bigdata.html 26 | 27 | 28 | /articles/index.html 29 | 30 | 31 | /authors.html 32 | 33 | 34 | /index.html 35 | 36 | 37 | /news/index.html 38 | 39 | 40 | /reference/bayesdfa-package.html 41 | 42 | 43 | /reference/dfa_cv.html 44 | 45 | 46 | /reference/dfa_fitted.html 47 | 48 | 49 | /reference/dfa_loadings.html 50 | 51 | 52 | /reference/dfa_trends.html 53 | 54 | 55 | /reference/find_dfa_trends.html 56 | 57 | 58 | /reference/find_inverted_chains.html 59 | 60 | 61 | /reference/find_regimes.html 62 | 63 | 64 | /reference/find_swans.html 65 | 66 | 67 | /reference/fit_dfa.html 68 | 69 | 70 | /reference/fit_regimes.html 71 | 72 | 73 | /reference/hmm_init.html 74 | 75 | 76 | /reference/index.html 77 | 78 | 79 | /reference/invert_chains.html 80 | 81 | 82 | /reference/is_converged.html 83 | 84 | 85 | /reference/loo.html 86 | 87 | 88 | /reference/plot_fitted.html 89 | 90 | 91 | /reference/plot_loadings.html 92 | 93 | 94 | /reference/plot_regime_model.html 95 | 96 | 97 | /reference/plot_trends.html 98 | 99 | 100 | /reference/predicted.html 101 | 102 | 103 | /reference/rotate_trends.html 104 | 105 | 106 | /reference/sim_dfa.html 107 | 108 | 109 | /reference/trend_cor.html 110 | 111 | 112 | -------------------------------------------------------------------------------- /inst/include/stan_meta_header.hpp: -------------------------------------------------------------------------------- 1 | // Insert all #include statements here 2 | -------------------------------------------------------------------------------- /inst/stan/corr.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] y; 4 | vector[N] x; 5 | } 6 | parameters { 7 | real beta; 8 | real sigma; 9 | } 10 | model { 11 | // priors 12 | sigma ~ student_t(3, 0, 2); 13 | y ~ normal(beta * x, sigma); 14 | } 15 | -------------------------------------------------------------------------------- /inst/stan/hmm_gaussian.stan: -------------------------------------------------------------------------------- 1 | // copied with minor modifications from https://github.com/luisdamiano/stancon18 2 | // CC-BY 4.0 3 | 4 | functions { 5 | vector normalize(vector x) { 6 | return x / sum(x); 7 | } 8 | } 9 | 10 | data { 11 | int T; // number of observations (length) 12 | int K; // number of hidden states 13 | array[T] real x_t; // observations 14 | int est_sigma; // flag, whether to estimate sigma (1) or use values passed in (0) 15 | array[T] real sigma_t; // estimated sigma for each observation 16 | } 17 | 18 | parameters { 19 | // Discrete state model 20 | simplex[K] p_1k; // initial state probabilities 21 | array[K] simplex[K] A_ij; // transition probabilities 22 | // A_ij[i][j] = p(z_t = j | z_{t-1} = i) 23 | // Continuous observation model 24 | ordered[K] mu_k; // observation means 25 | array[K] real sigma_k; // observation standard deviations, optionally estimated if est_sigma == 1. Can the quantity K * est_sigma be used to dimension sigma_k? 26 | } 27 | 28 | transformed parameters { 29 | array[T] vector[K] unalpha_tk; 30 | 31 | { // Forward algorithm log p(z_t = j | x_{1:t}) 32 | array[K] real accumulator; 33 | 34 | if(est_sigma == 1) { 35 | // use estimated sigma values 36 | unalpha_tk[1] = log(p_1k) + normal_lpdf(x_t[1] | mu_k, sigma_k); 37 | } else { 38 | // otherwise use values passed in by user, fixed 39 | unalpha_tk[1] = log(p_1k) + normal_lpdf(x_t[1] | mu_k, sigma_t[1]); 40 | } 41 | 42 | for (t in 2:T) { 43 | for (j in 1:K) { // j = current (t) 44 | for (i in 1:K) { // i = previous (t-1) 45 | // Murphy (2012) Eq. 17.48 46 | // belief state + transition prob + local evidence at t 47 | if(est_sigma == 1) { 48 | // use estimated sigma values 49 | accumulator[i] = unalpha_tk[t-1, i] + log(A_ij[i, j]) + normal_lpdf(x_t[t] | mu_k[j], sigma_k[j]); 50 | } else { 51 | // otherwise use values passed in by user, fixed 52 | accumulator[i] = unalpha_tk[t-1, i] + log(A_ij[i, j]) + normal_lpdf(x_t[t] | mu_k[j], sigma_t[t]); 53 | } 54 | 55 | } 56 | unalpha_tk[t, j] = log_sum_exp(accumulator); 57 | } 58 | } 59 | } // Forward 60 | } 61 | 62 | model { 63 | sigma_k ~ student_t(3, 0, 1); 64 | mu_k ~ student_t(3, 0, 3); 65 | target += log_sum_exp(unalpha_tk[T]); // Note: update based only on last unalpha_tk 66 | } 67 | 68 | generated quantities { 69 | array[T] vector[K] unbeta_tk; 70 | array[T] vector[K] ungamma_tk; 71 | array[T] vector[K] alpha_tk; 72 | array[T] vector[K] beta_tk; 73 | array[T] vector[K] gamma_tk; 74 | vector[T] log_lik; // added to store log-likelihood for calculation of LOOIC 75 | array[T] int zstar_t; 76 | real logp_zstar_t; 77 | 78 | { // Forward algortihm 79 | for (t in 1:T) 80 | alpha_tk[t] = softmax(unalpha_tk[t]); 81 | } // Forward 82 | 83 | { // Backward algorithm log p(x_{t+1:T} | z_t = j) 84 | array[K] real accumulator; 85 | 86 | for (j in 1:K) 87 | unbeta_tk[T, j] = 1; 88 | 89 | for (tforward in 0:(T-2)) { 90 | int t; 91 | t = T - tforward; 92 | 93 | for (j in 1:K) { // j = previous (t-1) 94 | for (i in 1:K) { // i = next (t) 95 | // Murphy (2012) Eq. 17.58 96 | // backwards t + transition prob + local evidence at t 97 | if(est_sigma == 1) { 98 | accumulator[i] = unbeta_tk[t, i] + log(A_ij[j, i]) + normal_lpdf(x_t[t] | mu_k[i], sigma_k[i]); 99 | } else { 100 | accumulator[i] = unbeta_tk[t, i] + log(A_ij[j, i]) + normal_lpdf(x_t[t] | mu_k[i], sigma_t[t]); 101 | } 102 | 103 | } 104 | unbeta_tk[t-1, j] = log_sum_exp(accumulator); 105 | } 106 | } 107 | 108 | for (t in 1:T) 109 | beta_tk[t] = softmax(unbeta_tk[t]); 110 | } // Backward 111 | 112 | { // Forwards-backwards algorithm log p(z_t = j | x_{1:T}) 113 | for(t in 1:T) { 114 | ungamma_tk[t] = alpha_tk[t] .* beta_tk[t]; 115 | gamma_tk[t] = normalize(ungamma_tk[t]); 116 | } 117 | 118 | for(t in 1:T) { 119 | // gamma_tk is vector of normalized probability of state given all data, p(z_t = j | x_{1:T}) 120 | 121 | log_lik[t] = 0; // initialize 122 | // log_lik accumulator. need to sum to integrate over states, 123 | // p(x_t) = p(x_t | z_t = 1) * p(z_t = 1)... 124 | // gamma_tk is p(x_t | z_t = k), alpha_tk is p(z_t = k | x[1:T]) 125 | //if(est_sigma == 1) { 126 | for (j in 1:K) { 127 | log_lik[t] = log_lik[t] + gamma_tk[t,j]*alpha_tk[t,j]; 128 | } 129 | //} else { 130 | // for (j in 1:K) 131 | // log_lik[t] = log_lik[t] + gamma_tk[t,j]*alpha_tk[t,j]; 132 | //} 133 | log_lik[t] = log(log_lik[t]); 134 | } 135 | 136 | } // Forwards-backwards 137 | 138 | { // Viterbi algorithm 139 | array[T, K] int a_tk; // backpointer to the most likely previous state on the most probable path 140 | array[T, K] real delta_tk; // max prob for the seq up to t 141 | // with final output from state k for time t 142 | if(est_sigma == 1) { 143 | for (j in 1:K) 144 | delta_tk[1, K] = normal_lpdf(x_t[1] | mu_k[j], sigma_k[j]); 145 | } else { 146 | for (j in 1:K) 147 | delta_tk[1, K] = normal_lpdf(x_t[1] | mu_k[j], sigma_t[1]); 148 | } 149 | 150 | for (t in 2:T) { 151 | for (j in 1:K) { // j = current (t) 152 | delta_tk[t, j] = negative_infinity(); 153 | for (i in 1:K) { // i = previous (t-1) 154 | real logp; 155 | if(est_sigma == 1) { 156 | logp = delta_tk[t-1, i] + log(A_ij[i, j]) + normal_lpdf(x_t[t] | mu_k[j], sigma_k[j]); 157 | } else { 158 | logp = delta_tk[t-1, i] + log(A_ij[i, j]) + normal_lpdf(x_t[t] | mu_k[j], sigma_t[t]); 159 | } 160 | if (logp > delta_tk[t, j]) { 161 | a_tk[t, j] = i; 162 | delta_tk[t, j] = logp; 163 | } 164 | } 165 | } 166 | } 167 | 168 | logp_zstar_t = max(delta_tk[T]); 169 | 170 | for (j in 1:K) 171 | if (delta_tk[T, j] == logp_zstar_t) 172 | zstar_t[T] = j; 173 | 174 | for (t in 1:(T - 1)) { 175 | zstar_t[T - t] = a_tk[T - t + 1, zstar_t[T - t + 1]]; 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /inst/stan/include/license.stan: -------------------------------------------------------------------------------- 1 | /* 2 | path is free software: you can redistribute it and/or modify 3 | it under the terms of the GNU General Public License as published by 4 | the Free Software Foundation, either version 3 of the License, or 5 | (at your option) any later version. 6 | 7 | path is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | 12 | You should have received a copy of the GNU General Public License 13 | along with path. If not, see . 14 | */ 15 | -------------------------------------------------------------------------------- /inst/stan/regime_1.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int T; // number of observations (length) 3 | int K; // number of hidden states 4 | array[T] real x_t; // observations 5 | int est_sigma; // flag, whether to estimate sigma (1) or use values passed in (0) 6 | array[T] real sigma_t; // estimated sigma for each observation 7 | } 8 | parameters { 9 | real mu_k; // observation means 10 | real sigma_k; // observation standard deviations, optionally estimated if est_sigma == 1. Can the quantity K * est_sigma be used to dimension sigma_k? 11 | } 12 | transformed parameters { 13 | array[T] real sigmas; 14 | if(est_sigma == 1) { 15 | for(i in 1:T) sigmas[i] = sigma_k; 16 | } else { 17 | for(i in 1:T) sigmas[i] = sigma_t[i]; 18 | } 19 | } 20 | model { 21 | mu_k ~ student_t(3, 0, 3); 22 | sigma_k ~ student_t(3, 0, 1); 23 | 24 | x_t ~ normal(mu_k, sigmas); 25 | } 26 | generated quantities { 27 | vector[T] log_lik; 28 | //regresssion example in loo() package 29 | for (n in 1:T) { 30 | log_lik[n] = normal_lpdf(x_t[n] | mu_k, sigmas[n]); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /man/bayesdfa-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bayesdfa-package.R 3 | \name{bayesdfa-package} 4 | \alias{bayesdfa-package} 5 | \alias{bayesdfa} 6 | \title{The 'bayesdfa' package.} 7 | \description{ 8 | A DESCRIPTION OF THE PACKAGE 9 | } 10 | \references{ 11 | Stan Development Team (2020). RStan: the R interface to Stan. R package version 2.21.2. https://mc-stan.org 12 | } 13 | -------------------------------------------------------------------------------- /man/dfa_cv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dfa_cv.R 3 | \name{dfa_cv} 4 | \alias{dfa_cv} 5 | \title{Apply cross validation to DFA model} 6 | \usage{ 7 | dfa_cv( 8 | stanfit, 9 | cv_method = c("loocv", "lfocv"), 10 | fold_ids = NULL, 11 | n_folds = 10, 12 | estimation = c("sampling", "optimizing", "vb"), 13 | iter = 2000, 14 | chains = 4, 15 | thin = 1, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{stanfit}{A stanfit object, to preserve the model structure from a call to fit_dfa()} 21 | 22 | \item{cv_method}{The method used for cross validation. The options are 'loocv', where time is ignored and each data point is 23 | assigned randomly to a fold. The method 'ltocv' is leave time out cross validation, and time slices are iteratively held out 24 | out. Finally the method 'lfocv' implements leave future out cross validation to do one-step ahead predictions.} 25 | 26 | \item{fold_ids}{A vector whose length is the same as the number of total data points. Elements are the fold id of each data point. If not all data points are 27 | used (e.g. the lfocv or ltocv approach might only use 10 time steps) the value can be something other than a numbber, 28 | e.g. NA} 29 | 30 | \item{n_folds}{Number of folds, defaults to 10} 31 | 32 | \item{estimation}{Character string. Should the model be sampled using \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}} ("sampling",default), 33 | \code{\link[rstan:stanmodel-method-optimizing]{rstan::optimizing()}} ("optimizing"), variational inference \code{\link[rstan:stanmodel-method-vb]{rstan::vb()}} ("vb").} 34 | 35 | \item{iter}{Number of iterations in Stan sampling, defaults to 2000.} 36 | 37 | \item{chains}{Number of chains in Stan sampling, defaults to 4.} 38 | 39 | \item{thin}{Thinning rate in Stan sampling, defaults to 1.} 40 | 41 | \item{...}{Any other arguments to pass to \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}.} 42 | } 43 | \description{ 44 | Apply cross validation to DFA model 45 | } 46 | \examples{ 47 | \dontrun{ 48 | set.seed(42) 49 | s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) 50 | obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) 51 | long <- data.frame("obs" = obs, "ts" = sort(rep(1:3, 20)), 52 | "time" = rep(1:20, 3)) 53 | m <- fit_dfa(y = long, data_shape = "long", estimation="none") 54 | # random folds 55 | fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, iter = 50, 56 | chains = 1, estimation="sampling") 57 | 58 | # folds can also be passed in 59 | fold_ids <- sample(1:5, size = nrow(long), replace = TRUE) 60 | m <- fit_dfa(y = long, data_shape = "long", estimation="none") 61 | fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, iter = 50, chains = 1, 62 | fold_ids = fold_ids, estimation="sampling") 63 | 64 | # do an example of leave-time-out cross validation where years are dropped 65 | fold_ids <- long$time 66 | m <- fit_dfa(y = long, data_shape = "long", estimation="none") 67 | fit_cv <- dfa_cv(m, cv_method = "loocv", iter = 100, chains = 1, 68 | fold_ids = fold_ids) 69 | 70 | # example with covariates and long format data 71 | obs_covar <- expand.grid("time" = 1:20, "timeseries" = 1:3, 72 | "covariate" = 1:2) 73 | obs_covar$value <- rnorm(nrow(obs_covar), 0, 0.1) 74 | obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) 75 | m <- fit_dfa(y = long, obs_covar = obs_covar, 76 | data_shape = "long", estimation="none") 77 | fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, 78 | iter = 50, chains = 1, estimation="sampling") 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /man/dfa_fitted.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dfa_fitted.R 3 | \name{dfa_fitted} 4 | \alias{dfa_fitted} 5 | \title{Get the fitted values from a DFA as a data frame} 6 | \usage{ 7 | dfa_fitted(modelfit, conf_level = 0.95, names = NULL) 8 | } 9 | \arguments{ 10 | \item{modelfit}{Output from \code{\link{fit_dfa}}.} 11 | 12 | \item{conf_level}{Probability level for CI.} 13 | 14 | \item{names}{Optional vector of names for time series labels. Should be same length as the number of time series.} 15 | } 16 | \value{ 17 | A data frame with the following columns: \code{ID} is an identifier for each time series, \code{time} is the time step, \code{y} is the observed values standardized to mean 0 and unit variance, \code{estimate} is the mean fitted value, \code{lower} is the lower CI, and \code{upper} is the upper CI. 18 | } 19 | \description{ 20 | Get the fitted values from a DFA as a data frame 21 | } 22 | \examples{ 23 | \donttest{ 24 | y <- sim_dfa(num_trends = 2, num_years = 20, num_ts = 4) 25 | m <- fit_dfa(y = y$y_sim, num_trends = 2, iter = 50, chains = 1) 26 | fitted <- dfa_fitted(m) 27 | } 28 | } 29 | \seealso{ 30 | predicted plot_fitted fit_dfa 31 | } 32 | -------------------------------------------------------------------------------- /man/dfa_loadings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dfa_loadings.R 3 | \name{dfa_loadings} 4 | \alias{dfa_loadings} 5 | \title{Get the loadings from a DFA as a data frame} 6 | \usage{ 7 | dfa_loadings(rotated_modelfit, names = NULL, summary = TRUE, conf_level = 0.95) 8 | } 9 | \arguments{ 10 | \item{rotated_modelfit}{Output from \code{\link{rotate_trends}}.} 11 | 12 | \item{names}{An optional vector of names for plotting the loadings.} 13 | 14 | \item{summary}{Logical. Should the full posterior densities be returned? Defaults to \code{TRUE}.} 15 | 16 | \item{conf_level}{Confidence level for credible intervals. Defaults to 0.95.} 17 | } 18 | \value{ 19 | A data frame with the following columns: 20 | \code{name} is an identifier for each loading, \code{trend} is the trend for the 21 | loading, \code{median} is the posterior median loading, \code{lower} is the lower CI, 22 | \code{upper} is the upper CI, and \code{prob_diff0} is the probability the loading is 23 | different than 0. When \code{summary = FALSE}, there is no \code{lower} or \code{upper} 24 | columns and instead there are columns \code{chain} and \code{draw}. 25 | } 26 | \description{ 27 | Get the loadings from a DFA as a data frame 28 | } 29 | \examples{ 30 | set.seed(42) 31 | s <- sim_dfa(num_trends = 2, num_ts = 4, num_years = 10) 32 | # only 1 chain and 180 iterations used so example runs quickly: 33 | m <- fit_dfa(y = s$y_sim, num_trends = 2, iter = 50, chains = 1) 34 | r <- rotate_trends(m) 35 | loadings <- dfa_loadings(r, summary = TRUE) 36 | loadings <- dfa_loadings(r, summary = FALSE) 37 | } 38 | \seealso{ 39 | plot_loadings fit_dfa rotate_trends 40 | } 41 | -------------------------------------------------------------------------------- /man/dfa_trends.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dfa_trends.R 3 | \name{dfa_trends} 4 | \alias{dfa_trends} 5 | \title{Get the trends from a DFA as a data frame} 6 | \usage{ 7 | dfa_trends(rotated_modelfit, years = NULL) 8 | } 9 | \arguments{ 10 | \item{rotated_modelfit}{Output from \code{\link{rotate_trends}}.} 11 | 12 | \item{years}{Optional numeric vector of years.} 13 | } 14 | \value{ 15 | A data frame with the following columns: \code{time} is the time step, \code{trend_number} is an identifier for each trend, \code{estimate} is the trend mean, \code{lower} is the lower CI, and \code{upper} is the upper CI. 16 | } 17 | \description{ 18 | Get the trends from a DFA as a data frame 19 | } 20 | \examples{ 21 | set.seed(1) 22 | s <- sim_dfa(num_trends = 1) 23 | m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1) 24 | r <- rotate_trends(m) 25 | trends <- dfa_trends(r) 26 | } 27 | \seealso{ 28 | plot_trends fit_dfa rotate_trends 29 | } 30 | -------------------------------------------------------------------------------- /man/find_dfa_trends.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_dfa_trends.R 3 | \name{find_dfa_trends} 4 | \alias{find_dfa_trends} 5 | \title{Find the best number of trends according to LOOIC} 6 | \usage{ 7 | find_dfa_trends( 8 | y = y, 9 | kmin = 1, 10 | kmax = 5, 11 | iter = 2000, 12 | thin = 1, 13 | compare_normal = FALSE, 14 | convergence_threshold = 1.05, 15 | variance = c("equal", "unequal"), 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{y}{A matrix of data to fit. Columns represent time element.} 21 | 22 | \item{kmin}{Minimum number of trends, defaults to 1.} 23 | 24 | \item{kmax}{Maximum number of trends, defaults to 5.} 25 | 26 | \item{iter}{Iterations when sampling from each Stan model, defaults to 2000.} 27 | 28 | \item{thin}{Thinning rate when sampling from each Stan model, defaults to 1.} 29 | 30 | \item{compare_normal}{If \code{TRUE}, does model selection comparison of Normal vs. 31 | Student-t errors} 32 | 33 | \item{convergence_threshold}{The maximum allowed value of Rhat to determine 34 | convergence of parameters} 35 | 36 | \item{variance}{Vector of variance arguments for searching over large groups 37 | of models. Can be either or both of ("equal","unequal")} 38 | 39 | \item{...}{Other arguments to pass to \code{fit_dfa()}} 40 | } 41 | \description{ 42 | Fit a DFA with different number of trends and return the leave one out (LOO) 43 | value as calculated by the \link[loo:loo-package]{loo} package. 44 | } 45 | \examples{ 46 | \donttest{ 47 | set.seed(42) 48 | s <- sim_dfa(num_trends = 2, num_years = 20, num_ts = 3) 49 | # only 1 chain and 180 iterations used so example runs quickly: 50 | m <- find_dfa_trends( 51 | y = s$y_sim, iter = 50, 52 | kmin = 1, kmax = 2, chains = 1, compare_normal = FALSE, 53 | variance = "equal", convergence_threshold = 1.1, 54 | control = list(adapt_delta = 0.95, max_treedepth = 20) 55 | ) 56 | m$summary 57 | m$best_model 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /man/find_inverted_chains.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/invert_chains.R 3 | \name{find_inverted_chains} 4 | \alias{find_inverted_chains} 5 | \title{Find which chains to invert} 6 | \usage{ 7 | find_inverted_chains(model, trend = 1, plot = FALSE) 8 | } 9 | \arguments{ 10 | \item{model}{A Stan model, \code{rstanfit} object} 11 | 12 | \item{trend}{Which trend to check} 13 | 14 | \item{plot}{Logical: should a plot of the trend for each chain be made? 15 | Defaults to \code{FALSE}} 16 | } 17 | \description{ 18 | Find which chains to invert by checking the sum of the squared 19 | deviations between the first chain and each other chain. 20 | } 21 | \examples{ 22 | set.seed(2) 23 | s <- sim_dfa(num_trends = 2) 24 | set.seed(1) 25 | m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 30, chains = 2) 26 | # chains were already inverted, but we can redo that, as an example, with: 27 | find_inverted_chains(m$model, plot = TRUE) 28 | } 29 | \seealso{ 30 | invert_chains 31 | } 32 | -------------------------------------------------------------------------------- /man/find_regimes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_regimes.R 3 | \name{find_regimes} 4 | \alias{find_regimes} 5 | \title{Fit multiple models with differing numbers of regimes to trend data} 6 | \usage{ 7 | find_regimes( 8 | y, 9 | sds = NULL, 10 | min_regimes = 1, 11 | max_regimes = 3, 12 | iter = 2000, 13 | thin = 1, 14 | chains = 1, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{y}{Data, time series or trend from fitted DFA model.} 20 | 21 | \item{sds}{Optional time series of standard deviations of estimates. If 22 | passed in, residual variance not estimated.} 23 | 24 | \item{min_regimes}{Smallest of regimes to evaluate, defaults to 1.} 25 | 26 | \item{max_regimes}{Biggest of regimes to evaluate, defaults to 3.} 27 | 28 | \item{iter}{MCMC iterations, defaults to 2000.} 29 | 30 | \item{thin}{MCMC thinning rate, defaults to 1.} 31 | 32 | \item{chains}{MCMC chains; defaults to 1 (note that running multiple chains 33 | may result in a "label switching" problem where the regimes are identified 34 | with different IDs across chains).} 35 | 36 | \item{...}{Other parameters to pass to \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}.} 37 | } 38 | \description{ 39 | Fit multiple models with differing numbers of regimes to trend data 40 | } 41 | \examples{ 42 | data(Nile) 43 | find_regimes(log(Nile), iter = 50, chains = 1, max_regimes = 2) 44 | } 45 | -------------------------------------------------------------------------------- /man/find_swans.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_swans.R 3 | \name{find_swans} 4 | \alias{find_swans} 5 | \title{Find outlying "black swan" jumps in trends} 6 | \usage{ 7 | find_swans(rotated_modelfit, threshold = 0.01, plot = FALSE) 8 | } 9 | \arguments{ 10 | \item{rotated_modelfit}{Output from \code{\link[=rotate_trends]{rotate_trends()}}.} 11 | 12 | \item{threshold}{A probability threshold below which to flag trend events as 13 | extreme} 14 | 15 | \item{plot}{Logical: should a plot be made?} 16 | } 17 | \value{ 18 | Prints a ggplot2 plot if \code{plot = TRUE}; returns a data frame indicating the 19 | probability that any given point in time represents a "black swan" event 20 | invisibly. 21 | } 22 | \description{ 23 | Find outlying "black swan" jumps in trends 24 | } 25 | \examples{ 26 | set.seed(1) 27 | s <- sim_dfa(num_trends = 1, num_ts = 3, num_years = 30) 28 | s$y_sim[1, 15] <- s$y_sim[1, 15] - 6 29 | plot(s$y_sim[1, ], type = "o") 30 | abline(v = 15, col = "red") 31 | # only 1 chain and 250 iterations used so example runs quickly: 32 | m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1, nu_fixed = 2) 33 | r <- rotate_trends(m) 34 | p <- plot_trends(r) #+ geom_vline(xintercept = 15, colour = "red") 35 | print(p) 36 | # a 1 in 1000 probability if was from a normal distribution: 37 | find_swans(r, plot = TRUE, threshold = 0.001) 38 | } 39 | \references{ 40 | Anderson, S.C., Branch, T.A., Cooper, A.B., and Dulvy, N.K. 2017. 41 | Black-swan events in animal populations. Proceedings of the National Academy 42 | of Sciences 114(12): 3252–3257. https://doi.org/10.1073/pnas.1611525114 43 | } 44 | -------------------------------------------------------------------------------- /man/fit_regimes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fit_regimes.R 3 | \name{fit_regimes} 4 | \alias{fit_regimes} 5 | \title{Fit models with differing numbers of regimes to trend data} 6 | \usage{ 7 | fit_regimes( 8 | y, 9 | sds = NULL, 10 | n_regimes = 2, 11 | iter = 2000, 12 | thin = 1, 13 | chains = 1, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{y}{Data, time series or trend from fitted DFA model.} 19 | 20 | \item{sds}{Optional time series of standard deviations of estimates. 21 | If passed in, residual variance not estimated. Defaults to \code{NULL}.} 22 | 23 | \item{n_regimes}{Number of regimes to evaluate, defaults 2} 24 | 25 | \item{iter}{MCMC iterations, defaults to 2000.} 26 | 27 | \item{thin}{MCMC thinning rate, defaults to 1.} 28 | 29 | \item{chains}{MCMC chains, defaults to 1 (note that running multiple chains 30 | may result in a label switching problem where the regimes are identified 31 | with different IDs across chains).} 32 | 33 | \item{...}{Other parameters to pass to \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}.} 34 | } 35 | \description{ 36 | Fit models with differing numbers of regimes to trend data 37 | } 38 | \examples{ 39 | data(Nile) 40 | fit_regimes(log(Nile), iter = 50, n_regimes = 1) 41 | } 42 | -------------------------------------------------------------------------------- /man/hmm_init.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hmm_init.R 3 | \name{hmm_init} 4 | \alias{hmm_init} 5 | \title{Create initial values for the HMM model.} 6 | \usage{ 7 | hmm_init(K, x_t) 8 | } 9 | \arguments{ 10 | \item{K}{The number of regimes or clusters to fit. Called by \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}.} 11 | 12 | \item{x_t}{A matrix of values. Called by \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}.} 13 | } 14 | \value{ 15 | list of initial values (mu, sigma) 16 | } 17 | \description{ 18 | Create initial values for the HMM model. 19 | } 20 | -------------------------------------------------------------------------------- /man/invert_chains.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/invert_chains.R 3 | \name{invert_chains} 4 | \alias{invert_chains} 5 | \title{Invert chains} 6 | \usage{ 7 | invert_chains(model, trends = 1, print = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{model}{A Stan model, rstanfit object} 11 | 12 | \item{trends}{The number of trends in the DFA, defaults to 1} 13 | 14 | \item{print}{Logical indicating whether the summary should be printed. 15 | Defaults to \code{FALSE}.} 16 | 17 | \item{...}{Other arguments to pass to \code{\link[=find_inverted_chains]{find_inverted_chains()}}.} 18 | } 19 | \description{ 20 | Invert chains 21 | } 22 | \seealso{ 23 | find_inverted_chains 24 | } 25 | -------------------------------------------------------------------------------- /man/is_converged.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/converge_rhat.R 3 | \name{is_converged} 4 | \alias{is_converged} 5 | \title{Summarize Rhat convergence statistics across parameters} 6 | \usage{ 7 | is_converged(fitted_model, threshold = 1.05, parameters = c("sigma", "x", "Z")) 8 | } 9 | \arguments{ 10 | \item{fitted_model}{Samples extracted (with \code{permuted = FALSE}) from a Stan 11 | model. E.g. output from \code{\link[=invert_chains]{invert_chains()}}.} 12 | 13 | \item{threshold}{Threshold for maximum Rhat.} 14 | 15 | \item{parameters}{Vector of parameters to be included in convergence determination. Defaults = c("sigma","x","Z"). Other elements can be added including "pred", "log_lik", or "lp__"} 16 | } 17 | \description{ 18 | Pass in \code{rstanfit} model object, and a threshold Rhat value for 19 | convergence. Returns boolean. 20 | } 21 | -------------------------------------------------------------------------------- /man/loo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loo.R 3 | \name{loo.bayesdfa} 4 | \alias{loo.bayesdfa} 5 | \alias{loo} 6 | \title{LOO information criteria} 7 | \usage{ 8 | \method{loo}{bayesdfa}(x, ...) 9 | } 10 | \arguments{ 11 | \item{x}{Output from \code{\link[=fit_dfa]{fit_dfa()}}.} 12 | 13 | \item{...}{Arguments for \code{\link[loo:relative_eff]{loo::relative_eff()}} and \code{\link[loo:loo]{loo::loo.array()}}.} 14 | } 15 | \description{ 16 | Extract the LOOIC (leave-one-out information criterion) using 17 | \code{\link[loo:loo]{loo::loo()}}. Note that we've implemented slightly different variants 18 | of loo, based on whether the DFA observation model includes correlation 19 | between time series or not (default is no correlation). Importantly, 20 | these different versions are not directly comparable to evaluate data support 21 | for including correlation or not in a DFA. If time series are not correlated, 22 | the point-wise log-likelihood for each observation is calculated and used 23 | in the loo calculations. However if time series are correlated, then each 24 | time slice is assumed to be a joint observation of 25 | all variables, and the point-wise log-likelihood is calculated as the 26 | joint likelihood of all variables under the multivariate normal distribution. 27 | } 28 | \examples{ 29 | \donttest{ 30 | set.seed(1) 31 | s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) 32 | m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1, num_trends = 1) 33 | loo(m) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /man/plot_fitted.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_fitted.R 3 | \name{plot_fitted} 4 | \alias{plot_fitted} 5 | \title{Plot the fitted values from a DFA} 6 | \usage{ 7 | plot_fitted( 8 | modelfit, 9 | conf_level = 0.95, 10 | names = NULL, 11 | spaghetti = FALSE, 12 | time_labels = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{modelfit}{Output from \code{\link{fit_dfa}}, a rstanfit object} 17 | 18 | \item{conf_level}{Probability level for CI.} 19 | 20 | \item{names}{Optional vector of names for plotting labels TODO. Should be same length as the number of time series} 21 | 22 | \item{spaghetti}{Defaults to FALSE, but if TRUE puts all raw time series (grey) and fitted values on a single plot} 23 | 24 | \item{time_labels}{Optional vector of time labels for plotting, same length as number of time steps} 25 | } 26 | \description{ 27 | Plot the fitted values from a DFA 28 | } 29 | \examples{ 30 | \donttest{ 31 | y <- sim_dfa(num_trends = 2, num_years = 20, num_ts = 4) 32 | m <- fit_dfa(y = y$y_sim, num_trends = 2, iter = 50, chains = 1) 33 | p <- plot_fitted(m) 34 | print(p) 35 | 36 | p <- plot_fitted(m, spaghetti = TRUE) 37 | print(p) 38 | } 39 | } 40 | \seealso{ 41 | plot_loadings fit_dfa rotate_trends dfa_fitted 42 | } 43 | -------------------------------------------------------------------------------- /man/plot_loadings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_loadings.R 3 | \name{plot_loadings} 4 | \alias{plot_loadings} 5 | \title{Plot the loadings from a DFA} 6 | \usage{ 7 | plot_loadings( 8 | rotated_modelfit, 9 | names = NULL, 10 | facet = TRUE, 11 | violin = TRUE, 12 | conf_level = 0.95, 13 | threshold = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{rotated_modelfit}{Output from \code{\link[=rotate_trends]{rotate_trends()}}.} 18 | 19 | \item{names}{An optional vector of names for plotting the loadings.} 20 | 21 | \item{facet}{Logical. Should there be a separate facet for each trend? 22 | Defaults to \code{TRUE}.} 23 | 24 | \item{violin}{Logical. Should the full posterior densities be shown as a 25 | violin plot? Defaults to \code{TRUE}.} 26 | 27 | \item{conf_level}{Confidence level for credible intervals. Defaults to 0.95.} 28 | 29 | \item{threshold}{Numeric (0-1). Optional for plots, if included, only plot 30 | loadings who have Pr(<0) or Pr(>0) > threshold. For example \code{threshold = 0.8} 31 | would only display estimates where 80\% of posterior density was above/below 32 | zero. Defaults to \code{NULL} (not used).} 33 | } 34 | \description{ 35 | Plot the loadings from a DFA 36 | } 37 | \examples{ 38 | set.seed(42) 39 | s <- sim_dfa(num_trends = 2, num_ts = 4, num_years = 10) 40 | # only 1 chain and 180 iterations used so example runs quickly: 41 | m <- fit_dfa(y = s$y_sim, num_trends = 2, iter = 50, chains = 1) 42 | r <- rotate_trends(m) 43 | plot_loadings(r, violin = FALSE, facet = TRUE) 44 | plot_loadings(r, violin = FALSE, facet = FALSE) 45 | plot_loadings(r, violin = TRUE, facet = FALSE) 46 | plot_loadings(r, violin = TRUE, facet = TRUE) 47 | } 48 | \seealso{ 49 | plot_trends fit_dfa rotate_trends 50 | } 51 | -------------------------------------------------------------------------------- /man/plot_regime_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_regime_model.R 3 | \name{plot_regime_model} 4 | \alias{plot_regime_model} 5 | \title{Plot the state probabilities from \code{\link[=find_regimes]{find_regimes()}}} 6 | \usage{ 7 | plot_regime_model( 8 | model, 9 | probs = c(0.05, 0.95), 10 | type = c("probability", "means"), 11 | regime_prob_threshold = 0.9, 12 | plot_prob_indices = NULL, 13 | flip_regimes = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{model}{A model returned by \code{\link[=find_regimes]{find_regimes()}}.} 18 | 19 | \item{probs}{A numeric vector of quantiles to plot the credible intervals at. 20 | Defaults to \code{c(0.05, 0.95)}.} 21 | 22 | \item{type}{Whether to plot the probabilities (default) or means.} 23 | 24 | \item{regime_prob_threshold}{The probability density that must be above 0.5. 25 | Defaults to 0.9 before we classify a regime (only affects \code{"means"} plot).} 26 | 27 | \item{plot_prob_indices}{Optional indices of probability plots to plot. 28 | Defaults to showing all.} 29 | 30 | \item{flip_regimes}{Optional whether to flip regimes in plots, defaults to FALSE} 31 | } 32 | \description{ 33 | Plot the state probabilities from \code{\link[=find_regimes]{find_regimes()}} 34 | } 35 | \details{ 36 | Note that the original timeseries data (dots) are shown scaled 37 | between 0 and 1. 38 | } 39 | \examples{ 40 | \donttest{ 41 | data(Nile) 42 | m <- fit_regimes(log(Nile), n_regimes = 2, chains = 1, iter = 50) 43 | plot_regime_model(m) 44 | plot_regime_model(m, plot_prob_indices = c(2)) 45 | plot_regime_model(m, type = "means") 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /man/plot_trends.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_trends.R 3 | \name{plot_trends} 4 | \alias{plot_trends} 5 | \title{Plot the trends from a DFA} 6 | \usage{ 7 | plot_trends( 8 | rotated_modelfit, 9 | years = NULL, 10 | highlight_outliers = FALSE, 11 | threshold = 0.01 12 | ) 13 | } 14 | \arguments{ 15 | \item{rotated_modelfit}{Output from \code{\link{rotate_trends}}} 16 | 17 | \item{years}{Optional numeric vector of years for the plot} 18 | 19 | \item{highlight_outliers}{Logical. Should trend events 20 | that exceed the probability of occurring with a normal distribution as 21 | defined by \code{threshold} be highlighted? Defaults to FALSE} 22 | 23 | \item{threshold}{A probability threshold below which to 24 | flag trend events as extreme. Defaults to 0.01} 25 | } 26 | \description{ 27 | Plot the trends from a DFA 28 | } 29 | \examples{ 30 | set.seed(1) 31 | s <- sim_dfa(num_trends = 1) 32 | m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1) 33 | r <- rotate_trends(m) 34 | p <- plot_trends(r) 35 | print(p) 36 | } 37 | \seealso{ 38 | dfa_trends plot_loadings fit_dfa rotate_trends 39 | } 40 | -------------------------------------------------------------------------------- /man/predicted.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predicted.R 3 | \name{predicted} 4 | \alias{predicted} 5 | \title{Calculate predicted value from DFA object} 6 | \usage{ 7 | predicted(fitted_model) 8 | } 9 | \arguments{ 10 | \item{fitted_model}{Samples extracted (with \code{permuted = FALSE}) from a Stan 11 | model. E.g. output from \code{\link[=invert_chains]{invert_chains()}}.} 12 | } 13 | \description{ 14 | Pass in \code{rstanfit} model object. Returns array of predictions, dimensioned 15 | number of MCMC draws x number of MCMC chains x time series length x number of time series 16 | } 17 | \examples{ 18 | \dontrun{ 19 | set.seed(42) 20 | s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) 21 | # only 1 chain and 1000 iterations used so example runs quickly: 22 | m <- fit_dfa(y = s$y_sim, iter = 2000, chains = 3, num_trends = 1) 23 | pred <- predicted(m) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /man/rotate_trends.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rotate_trends.R 3 | \name{rotate_trends} 4 | \alias{rotate_trends} 5 | \title{Rotate the trends from a DFA} 6 | \usage{ 7 | rotate_trends(fitted_model, conf_level = 0.95, invert = FALSE) 8 | } 9 | \arguments{ 10 | \item{fitted_model}{Output from \code{\link[=fit_dfa]{fit_dfa()}}.} 11 | 12 | \item{conf_level}{Probability level for CI.} 13 | 14 | \item{invert}{Whether to invert the trends and loadings for plotting purposes} 15 | } 16 | \description{ 17 | Rotate the trends from a DFA 18 | } 19 | \examples{ 20 | set.seed(42) 21 | s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) 22 | # only 1 chain and 800 iterations used so example runs quickly: 23 | m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1) 24 | r <- rotate_trends(m) 25 | plot_trends(r) 26 | } 27 | -------------------------------------------------------------------------------- /man/sim_dfa.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sim.R 3 | \name{sim_dfa} 4 | \alias{sim_dfa} 5 | \title{Simulate from a DFA} 6 | \usage{ 7 | sim_dfa( 8 | num_trends = 1, 9 | num_years = 20, 10 | num_ts = 4, 11 | loadings_matrix = matrix(nrow = num_ts, ncol = num_trends, rnorm(num_ts * num_trends, 12 | 0, 1)), 13 | sigma = rlnorm(1, meanlog = log(0.2), 0.1), 14 | varIndx = rep(1, num_ts), 15 | trend_model = c("rw", "bs"), 16 | spline_weights = matrix(ncol = 6, nrow = num_trends, data = rnorm(6 * num_trends)), 17 | extreme_value = NULL, 18 | extreme_loc = NULL, 19 | nu_fixed = 100, 20 | user_supplied_deviations = NULL 21 | ) 22 | } 23 | \arguments{ 24 | \item{num_trends}{The number of trends.} 25 | 26 | \item{num_years}{The number of years.} 27 | 28 | \item{num_ts}{The number of timeseries.} 29 | 30 | \item{loadings_matrix}{A loadings matrix. The number of rows should match the 31 | number of timeseries and the number of columns should match the number of 32 | trends. Note that this loadings matrix will be internally manipulated by 33 | setting some elements to 0 and constraining some elements to 1 so that the 34 | model can be fitted. See \code{\link[=fit_dfa]{fit_dfa()}}. See the outfit element \code{Z} in 35 | the returned list is to see the manipulated loadings matrix. If not 36 | specified, a random matrix \code{~ N(0, 1)} is used.} 37 | 38 | \item{sigma}{A vector of standard deviations on the observation error. Should 39 | be of the same length as the number of trends. If not specified, random 40 | numbers are used \code{rlnorm(1, meanlog = log(0.2), 0.1)}.} 41 | 42 | \item{varIndx}{Indices of unique observation variances. Defaults to \code{c(1, 1, 1, 1)}. Unique observation error variances would be specified as \code{c(1, 2, 3, 4)} in the case of 4 time series.} 43 | 44 | \item{trend_model}{The type of trend model. Random walk (\code{"rw"}) or basis 45 | spline (\code{"bs"})} 46 | 47 | \item{spline_weights}{A matrix of basis function weights that is used 48 | if \code{trend_model = "bs"}. The number of columns should correspond to 49 | the number of knots and the number of rows should correspond to the 50 | number of trends.} 51 | 52 | \item{extreme_value}{Value added to the random walk in the extreme time step. 53 | Defaults to not included.} 54 | 55 | \item{extreme_loc}{Location of single extreme event in the process. The same 56 | for all processes, and defaults to \code{round(n_t/2)} where \code{n_t} is the time 57 | series length} 58 | 59 | \item{nu_fixed}{Nu is the degrees of freedom parameter for the 60 | t-distribution, defaults to 100, which is effectively normal.} 61 | 62 | \item{user_supplied_deviations}{An optional matrix of deviations for the trend 63 | random walks. Columns are for trends and rows are for each time step.} 64 | } 65 | \value{ 66 | A list with the following elements: \code{y_sim} is the simulated data, 67 | pred is the true underlying data without observation error added, \code{x} is 68 | the underlying trends, \code{Z} is the manipulated loadings matrix that is fed 69 | to the model. 70 | } 71 | \description{ 72 | Simulate from a DFA 73 | } 74 | \examples{ 75 | x <- sim_dfa(num_trends = 2) 76 | names(x) 77 | matplot(t(x$y_sim), type = "l") 78 | matplot(t(x$x), type = "l") 79 | 80 | set.seed(42) 81 | x <- sim_dfa(extreme_value = -4, extreme_loc = 10) 82 | matplot(t(x$x), type = "l") 83 | abline(v = 10) 84 | matplot(t(x$pred), type = "l") 85 | abline(v = 10) 86 | 87 | set.seed(42) 88 | x <- sim_dfa() 89 | matplot(t(x$x), type = "l") 90 | abline(v = 10) 91 | matplot(t(x$pred), type = "l") 92 | abline(v = 10) 93 | } 94 | -------------------------------------------------------------------------------- /man/trend_cor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/trend_cor.R 3 | \name{trend_cor} 4 | \alias{trend_cor} 5 | \title{Estimate the correlation between a DFA trend and some other timeseries} 6 | \usage{ 7 | trend_cor( 8 | rotated_modelfit, 9 | y, 10 | trend = 1, 11 | time_window = seq_len(length(y)), 12 | trend_samples = 100, 13 | stan_iter = 300, 14 | stan_chains = 1, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{rotated_modelfit}{Output from \code{\link[=rotate_trends]{rotate_trends()}}.} 20 | 21 | \item{y}{A numeric vector to correlate with the DFA trend. Must be the same 22 | length as the DFA trend.} 23 | 24 | \item{trend}{A number corresponding to which trend to use, defaults to 1.} 25 | 26 | \item{time_window}{Indices indicating a time window slice to use in the 27 | correlation. Defaults to using the entire time window. Can be used to walk 28 | through the timeseries and test the cross correlations.} 29 | 30 | \item{trend_samples}{The number of samples from the trend posterior to use. A 31 | model will be run for each trend sample so this value shouldn't be too 32 | large. Defaults to 100.} 33 | 34 | \item{stan_iter}{The number of samples from the posterior with each Stan 35 | model run, defaults to 300.} 36 | 37 | \item{stan_chains}{The number of chains for each Stan model run, defaults to 38 | 1.} 39 | 40 | \item{...}{Other arguments to pass to \code{\link[rstan]{sampling}}} 41 | } 42 | \value{ 43 | A numeric vector of samples from the correlation coefficient 44 | posterior. 45 | } 46 | \description{ 47 | Fully incorporates the uncertainty from the posterior of the DFA trend 48 | } 49 | \details{ 50 | Uses a \code{sigma ~ half_t(3, 0, 2)} prior on the residual standard 51 | deviation and a \code{uniform(-1, 1)} prior on the correlation coefficient. 52 | Fitted as a linear regression of \code{y ~ x}, where y represents the \code{y} 53 | argument to \code{\link[=trend_cor]{trend_cor()}} and \code{x} represents the DFA trend, and both \code{y} 54 | and \code{x} have been scaled by subtracting their means and dividing by their 55 | standard deviations. Samples are drawn from the posterior of the trend and 56 | repeatedly fed through the Stan regression to come up with a combined 57 | posterior of the correlation. 58 | } 59 | \examples{ 60 | set.seed(1) 61 | s <- sim_dfa(num_trends = 1, num_years = 15) 62 | m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1) 63 | r <- rotate_trends(m) 64 | n_years <- ncol(r$trends[, 1, ]) 65 | fake_dat <- rnorm(n_years, 0, 1) 66 | correlation <- trend_cor(r, fake_dat, trend_samples = 25) 67 | hist(correlation) 68 | correlation <- trend_cor(r, 69 | y = fake_dat, time_window = 5:15, 70 | trend_samples = 25 71 | ) 72 | hist(correlation) 73 | } 74 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | @import url("https://nmfs-ost.github.io/nmfspalette/extra.css"); 2 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | STANHEADERS_SRC = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "message()" -e "cat(system.file('include', 'src', package = 'StanHeaders', mustWork = TRUE))" -e "message()" | grep "StanHeaders") 4 | 5 | STANC_FLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "cat(ifelse(utils::packageVersion('rstan') >= '2.26', '-DUSE_STANC3',''))") 6 | PKG_CPPFLAGS = -I"../inst/include" -I"$(STANHEADERS_SRC)" -DBOOST_DISABLE_ASSERTS -DEIGEN_NO_DEBUG -DBOOST_MATH_OVERFLOW_ERROR_POLICY=errno_on_error $(STANC_FLAGS) -D_HAS_AUTO_PTR_ETC=0 7 | PKG_CXXFLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::CxxFlags()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::CxxFlags()") 8 | PKG_LIBS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::RcppParallelLibs()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::LdFlags()") 9 | 10 | CXX_STD = CXX17 11 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | STANHEADERS_SRC = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "message()" -e "cat(system.file('include', 'src', package = 'StanHeaders', mustWork = TRUE))" -e "message()" | grep "StanHeaders") 4 | 5 | PKG_CPPFLAGS = -I"../inst/include" -I"$(STANHEADERS_SRC)" -DBOOST_DISABLE_ASSERTS -DEIGEN_NO_DEBUG -DRCPP_PARALLEL_USE_TBB=1 6 | PKG_CXXFLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::CxxFlags()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::CxxFlags()") 7 | PKG_LIBS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::RcppParallelLibs()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::LdFlags()") 8 | 9 | CXX_STD = CXX14 10 | -------------------------------------------------------------------------------- /src/RcppExports-507982db.o.tmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/src/RcppExports-507982db.o.tmp -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 10 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 11 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 12 | #endif 13 | 14 | 15 | RcppExport SEXP _rcpp_module_boot_stan_fit4corr_mod(); 16 | RcppExport SEXP _rcpp_module_boot_stan_fit4dfa_mod(); 17 | RcppExport SEXP _rcpp_module_boot_stan_fit4hmm_gaussian_mod(); 18 | RcppExport SEXP _rcpp_module_boot_stan_fit4regime_1_mod(); 19 | 20 | static const R_CallMethodDef CallEntries[] = { 21 | {"_rcpp_module_boot_stan_fit4corr_mod", (DL_FUNC) &_rcpp_module_boot_stan_fit4corr_mod, 0}, 22 | {"_rcpp_module_boot_stan_fit4dfa_mod", (DL_FUNC) &_rcpp_module_boot_stan_fit4dfa_mod, 0}, 23 | {"_rcpp_module_boot_stan_fit4hmm_gaussian_mod", (DL_FUNC) &_rcpp_module_boot_stan_fit4hmm_gaussian_mod, 0}, 24 | {"_rcpp_module_boot_stan_fit4regime_1_mod", (DL_FUNC) &_rcpp_module_boot_stan_fit4regime_1_mod, 0}, 25 | {NULL, NULL, 0} 26 | }; 27 | 28 | RcppExport void R_init_bayesdfa(DllInfo *dll) { 29 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 30 | R_useDynamicSymbols(dll, FALSE); 31 | } 32 | -------------------------------------------------------------------------------- /src/stanExports_corr.cc: -------------------------------------------------------------------------------- 1 | // Generated by rstantools. Do not edit by hand. 2 | 3 | #include 4 | using namespace Rcpp ; 5 | #include "stanExports_corr.h" 6 | 7 | RCPP_MODULE(stan_fit4corr_mod) { 8 | 9 | 10 | class_ >("rstantools_model_corr") 11 | 12 | .constructor() 13 | 14 | 15 | .method("call_sampler", &rstan::stan_fit ::call_sampler) 16 | .method("param_names", &rstan::stan_fit ::param_names) 17 | .method("param_names_oi", &rstan::stan_fit ::param_names_oi) 18 | .method("param_fnames_oi", &rstan::stan_fit ::param_fnames_oi) 19 | .method("param_dims", &rstan::stan_fit ::param_dims) 20 | .method("param_dims_oi", &rstan::stan_fit ::param_dims_oi) 21 | .method("update_param_oi", &rstan::stan_fit ::update_param_oi) 22 | .method("param_oi_tidx", &rstan::stan_fit ::param_oi_tidx) 23 | .method("grad_log_prob", &rstan::stan_fit ::grad_log_prob) 24 | .method("log_prob", &rstan::stan_fit ::log_prob) 25 | .method("unconstrain_pars", &rstan::stan_fit ::unconstrain_pars) 26 | .method("constrain_pars", &rstan::stan_fit ::constrain_pars) 27 | .method("num_pars_unconstrained", &rstan::stan_fit ::num_pars_unconstrained) 28 | .method("unconstrained_param_names", &rstan::stan_fit ::unconstrained_param_names) 29 | .method("constrained_param_names", &rstan::stan_fit ::constrained_param_names) 30 | .method("standalone_gqs", &rstan::stan_fit ::standalone_gqs) 31 | ; 32 | } 33 | -------------------------------------------------------------------------------- /src/stanExports_dfa-ec272765.o.tmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fate-ewi/bayesdfa/f43eb7e57b00aca083abb690697449bc57a51982/src/stanExports_dfa-ec272765.o.tmp -------------------------------------------------------------------------------- /src/stanExports_dfa.cc: -------------------------------------------------------------------------------- 1 | // Generated by rstantools. Do not edit by hand. 2 | 3 | #include 4 | using namespace Rcpp ; 5 | #include "stanExports_dfa.h" 6 | 7 | RCPP_MODULE(stan_fit4dfa_mod) { 8 | 9 | 10 | class_ >("rstantools_model_dfa") 11 | 12 | .constructor() 13 | 14 | 15 | .method("call_sampler", &rstan::stan_fit ::call_sampler) 16 | .method("param_names", &rstan::stan_fit ::param_names) 17 | .method("param_names_oi", &rstan::stan_fit ::param_names_oi) 18 | .method("param_fnames_oi", &rstan::stan_fit ::param_fnames_oi) 19 | .method("param_dims", &rstan::stan_fit ::param_dims) 20 | .method("param_dims_oi", &rstan::stan_fit ::param_dims_oi) 21 | .method("update_param_oi", &rstan::stan_fit ::update_param_oi) 22 | .method("param_oi_tidx", &rstan::stan_fit ::param_oi_tidx) 23 | .method("grad_log_prob", &rstan::stan_fit ::grad_log_prob) 24 | .method("log_prob", &rstan::stan_fit ::log_prob) 25 | .method("unconstrain_pars", &rstan::stan_fit ::unconstrain_pars) 26 | .method("constrain_pars", &rstan::stan_fit ::constrain_pars) 27 | .method("num_pars_unconstrained", &rstan::stan_fit ::num_pars_unconstrained) 28 | .method("unconstrained_param_names", &rstan::stan_fit ::unconstrained_param_names) 29 | .method("constrained_param_names", &rstan::stan_fit ::constrained_param_names) 30 | .method("standalone_gqs", &rstan::stan_fit ::standalone_gqs) 31 | ; 32 | } 33 | -------------------------------------------------------------------------------- /src/stanExports_hmm_gaussian.cc: -------------------------------------------------------------------------------- 1 | // Generated by rstantools. Do not edit by hand. 2 | 3 | #include 4 | using namespace Rcpp ; 5 | #include "stanExports_hmm_gaussian.h" 6 | 7 | RCPP_MODULE(stan_fit4hmm_gaussian_mod) { 8 | 9 | 10 | class_ >("rstantools_model_hmm_gaussian") 11 | 12 | .constructor() 13 | 14 | 15 | .method("call_sampler", &rstan::stan_fit ::call_sampler) 16 | .method("param_names", &rstan::stan_fit ::param_names) 17 | .method("param_names_oi", &rstan::stan_fit ::param_names_oi) 18 | .method("param_fnames_oi", &rstan::stan_fit ::param_fnames_oi) 19 | .method("param_dims", &rstan::stan_fit ::param_dims) 20 | .method("param_dims_oi", &rstan::stan_fit ::param_dims_oi) 21 | .method("update_param_oi", &rstan::stan_fit ::update_param_oi) 22 | .method("param_oi_tidx", &rstan::stan_fit ::param_oi_tidx) 23 | .method("grad_log_prob", &rstan::stan_fit ::grad_log_prob) 24 | .method("log_prob", &rstan::stan_fit ::log_prob) 25 | .method("unconstrain_pars", &rstan::stan_fit ::unconstrain_pars) 26 | .method("constrain_pars", &rstan::stan_fit ::constrain_pars) 27 | .method("num_pars_unconstrained", &rstan::stan_fit ::num_pars_unconstrained) 28 | .method("unconstrained_param_names", &rstan::stan_fit ::unconstrained_param_names) 29 | .method("constrained_param_names", &rstan::stan_fit ::constrained_param_names) 30 | .method("standalone_gqs", &rstan::stan_fit ::standalone_gqs) 31 | ; 32 | } 33 | -------------------------------------------------------------------------------- /src/stanExports_regime_1.cc: -------------------------------------------------------------------------------- 1 | // Generated by rstantools. Do not edit by hand. 2 | 3 | #include 4 | using namespace Rcpp ; 5 | #include "stanExports_regime_1.h" 6 | 7 | RCPP_MODULE(stan_fit4regime_1_mod) { 8 | 9 | 10 | class_ >("rstantools_model_regime_1") 11 | 12 | .constructor() 13 | 14 | 15 | .method("call_sampler", &rstan::stan_fit ::call_sampler) 16 | .method("param_names", &rstan::stan_fit ::param_names) 17 | .method("param_names_oi", &rstan::stan_fit ::param_names_oi) 18 | .method("param_fnames_oi", &rstan::stan_fit ::param_fnames_oi) 19 | .method("param_dims", &rstan::stan_fit ::param_dims) 20 | .method("param_dims_oi", &rstan::stan_fit ::param_dims_oi) 21 | .method("update_param_oi", &rstan::stan_fit ::update_param_oi) 22 | .method("param_oi_tidx", &rstan::stan_fit ::param_oi_tidx) 23 | .method("grad_log_prob", &rstan::stan_fit ::grad_log_prob) 24 | .method("log_prob", &rstan::stan_fit ::log_prob) 25 | .method("unconstrain_pars", &rstan::stan_fit ::unconstrain_pars) 26 | .method("constrain_pars", &rstan::stan_fit ::constrain_pars) 27 | .method("num_pars_unconstrained", &rstan::stan_fit ::num_pars_unconstrained) 28 | .method("unconstrained_param_names", &rstan::stan_fit ::unconstrained_param_names) 29 | .method("constrained_param_names", &rstan::stan_fit ::constrained_param_names) 30 | .method("standalone_gqs", &rstan::stan_fit ::standalone_gqs) 31 | ; 32 | } 33 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(bayesdfa) 3 | 4 | test_check("bayesdfa") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-chain-flipping.R: -------------------------------------------------------------------------------- 1 | if (interactive()) options(mc.cores = parallel::detectCores()) 2 | 3 | # set.seed(1) 4 | # num_trends <- 2 5 | # num_ts <- 3 6 | # num_years <- 30 7 | # 8 | # 9 | # dat <- sim_dfa( 10 | # num_trends = num_trends, 11 | # num_years = num_years, 12 | # num_ts = num_ts, 13 | # loadings_matrix = loadings_matrix, 14 | # sigma = 0.2, nu_fixed = 200) 15 | # 16 | # m2 <- fit_dfa(dat$y_sim, num_trends = num_trends, zscore = TRUE, 17 | # iter = 1000, chains = 4, seed = 1) 18 | # 19 | # x <- rotate_trends(m2) 20 | # plot_trends(x) 21 | # plot_loadings(x) 22 | -------------------------------------------------------------------------------- /vignettes/a2_combining_data.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Combining data with bayesdfa" 3 | author: "Eric J. Ward, Sean C. Anderson, Mary E. Hunsicker, Mike A. Litzow, Luis A. Damiano, Mark D. Scheuerell, Elizabeth E. Holmes, Nick Tolimieri" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Combining data with bayesdfa} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | For some applications using DFA, datasets may need to be combined from several data sources, and they may differ in time series length (or precision). Here we'll use some simple examples using `fit_dfa()` and to illustrate some cautionary points. 13 | 14 | ```{r set-knitr-options, cache=FALSE, echo=FALSE, warning=FALSE, message=FALSE} 15 | library("knitr") 16 | opts_chunk$set(message = FALSE, fig.width = 5.5) 17 | ``` 18 | 19 | Let's load the necessary packages: 20 | 21 | ```{r, message=FALSE, warning=FALSE} 22 | library(bayesdfa) 23 | library(ggplot2) 24 | library(dplyr) 25 | library(rstan) 26 | chains = 1 27 | iter = 10 28 | #library(viridis) 29 | ``` 30 | 31 | ## Example 32 | 33 | First, let's simulate some simple data with `sim_dfa()`. First, using just a 1 - trend model. This example has 3 time series, and is simulated from 1 underlying trend. The first time series doesn't load heavily on the trend (it's mostly generated via white noise) but time series 2-3 have stronger loadings on the trend. 34 | 35 | ```{r} 36 | set.seed(123) 37 | loadings = matrix(0, 3, 1) 38 | loadings[1,1] = c(0.1) 39 | loadings[2:3,1] = runif(2, 0.4,1) 40 | round(loadings,3) 41 | sim = sim_dfa(num_trends = 1, num_years = 100, 42 | num_ts = 3, loadings_matrix = loadings, 43 | sigma=0.6) 44 | ``` 45 | 46 | Here we can see that time series 1 is more variable because the random component is playing a relatively larger role. 47 | 48 | ```{r echo=FALSE, warning=FALSE, message=FALSE} 49 | #id variable for position in matrix 50 | Y = as.data.frame(sim$y_sim) 51 | #as.data.frame(t(scale(t(Y)))) 52 | Y$ts <- as.factor(1:nrow(Y)) 53 | plot_data <- reshape2::melt(Y,id.var="ts") 54 | plot_data$x = as.numeric(substr(plot_data$variable, 2, length(plot_data$variable))) 55 | g1 = ggplot(plot_data, aes(x=x,y=value,group=ts,colour=ts)) + 56 | geom_point()+ 57 | geom_line() + xlab("Time") + 58 | theme_bw()#+ scale_color_viridis(end=0.8, discrete = TRUE) 59 | 60 | 61 | g1 62 | #grid.arrange(g1,g2,nrow=2) 63 | ``` 64 | 65 | ```{r echo=FALSE, warning=FALSE, message=FALSE} 66 | Y = as.data.frame(t(scale(t(sim$y_sim)))) 67 | Y$ts <- as.factor(1:nrow(Y)) 68 | plot_data <- reshape2::melt(Y,id.var="ts") 69 | plot_data$x = as.numeric(substr(plot_data$variable, 2, length(plot_data$variable))) 70 | g2 = ggplot(plot_data, aes(x=x,y=value,group=ts,colour=ts)) + 71 | geom_point()+ 72 | geom_line() + xlab("Time") + 73 | ylab("Standardized time series") + 74 | theme_bw() #+ 75 | #scale_color_viridis(end=0.8, discrete = TRUE) 76 | g2 77 | ``` 78 | 79 | Let's initially treat the first ~ 50 time points as a burn in, and fit a DFA model to the latter half of the time series, using all data. We don't really have to rotate trends (because there's just 1). The loadings are estimated ok for trend 1, but underestimated for the others. 80 | 81 | ```{r results='hide'} 82 | fit_1 = fit_dfa(y = sim$y_sim[,51:100], num_trends = 1, chains=chains, iter=iter) 83 | r = rotate_trends(fit_1) 84 | ``` 85 | ```{r} 86 | round(r$Z_rot_mean,3) 87 | ``` 88 | 89 | Now, we'll pretend that in time steps 1:50 we have observations from time series 1 (but not the others). We'll fit several additional models, adding in back data points in steps of 10, and going backwards in time. All these runs would use time points 51:100 for time series 2 and 3, but they would include time steps 51:100, then 41:100, 31:100, etc. for time series 1. 90 | 91 | *Note for comparison purposes, we'll also standardize all time series 1 time before passing them in as an argument. Time series # 1 won't be re-scaled, but will be re-centered for each iteration. This is important because the time-series are non-stationary.* 92 | 93 | ```{r results='hide', warning=FALSE, message=FALSE} 94 | 95 | output = expand.grid("ts_start"=c(0,25,50), 96 | "x"=1:100, "estimated_trend"=NA, "obs"=NA) 97 | 98 | l = matrix(0, 3, 3) 99 | 100 | for(i in 1:nrow(l)) { 101 | idx = c(1,26,51) # seq(1,60,10)[nrow(l)+1-i] 102 | Y = sim$y_sim 103 | Y = t(scale(t(Y))) 104 | Y[1,1:(idx-1)] = NA 105 | Y[2:3,1:50] = NA 106 | fit_2 = fit_dfa(y = Y, num_trends = 1, chains=1, iter=10, scale="center") 107 | r = rotate_trends(fit_2) 108 | l[i,] = c(r$Z_rot_mean) 109 | output$estimated_trend[which(output$ts_start==(idx-1))] = scale((r$Z_rot_mean %*% r$trends_mean)[2,]) 110 | output$obs[which(output$ts_start==(idx-1))] = Y[2,51:100] 111 | } 112 | ``` 113 | 114 | Now we can look at the effects of adding in the extra data from time series 2. Here are the predictions for time series 2 over time steps 51:100, adding more data in from time series 1. What this shows is that in general the trends are the same -- though there are nuanced differences between them. 115 | 116 | ```{r echo=FALSE} 117 | #output$estimated_trend[which(output$ts_start==41)] = -1 * output$estimated_trend[which(output$ts_start==41)] 118 | output$ts_start=as.factor(output$ts_start) 119 | Y = sim$y_sim 120 | Y = t(scale(t(Y))) 121 | ts2 = data.frame(x = 1:100, y = Y[2,]) 122 | ggplot(output, aes(x,y=estimated_trend,group=ts_start,col=ts_start)) + geom_line(linewidth=2, alpha=0.7) + 123 | #scale_color_viridis(end=0.8,discrete = TRUE) + xlim(51,100) + 124 | xlab("Time") + 125 | ylab("Estimated time series (# 2)") +theme_bw() 126 | ``` 127 | 128 | As a cautionary note, any time time series of different lenghts are combined using similar approaches, simulations should be done to try to estimate the influence of adding new data to shared trends or other quantities of interest. 129 | 130 | ```{r echo=FALSE, results='hide', include=FALSE} 131 | L = as.data.frame(t(l)) 132 | L$trend = as.factor(seq(1:nrow(L))) 133 | plot_data <- reshape2::melt(L,id.var="trend") 134 | plot_data$x = as.numeric(substr(plot_data$variable, 2, length(plot_data$variable))) 135 | plot_data$x = seq(1,50,5)[11-plot_data$x] 136 | ggplot(plot_data, aes(x=x,y=value,group=trend,colour=trend)) + 137 | geom_point()+ 138 | geom_line() + xlab("Time") #+ scale_color_viridis(end=0.8, discrete = TRUE) 139 | ``` 140 | 141 | -------------------------------------------------------------------------------- /vignettes/a3_covariates.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Examples of including covariates with bayesdfa" 3 | author: "Eric J. Ward, Sean C. Anderson, Mary E. Hunsicker, Mike A. Litzow, Luis A. Damiano, Mark D. Scheuerell, Elizabeth E. Holmes, Nick Tolimieri" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Examples of including covariates with bayesdfa} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | Here we will walk through how to use the bayesdfa package to fit dynamic factor analysis (DFA) models with covariates. 13 | 14 | ```{r set-knitr-options, cache=FALSE, echo=FALSE, warning=FALSE, message=FALSE} 15 | library("knitr") 16 | opts_chunk$set(message = FALSE, fig.width = 5.5) 17 | ``` 18 | 19 | Let's load the necessary packages: 20 | 21 | ```{r, message=FALSE, warning=FALSE} 22 | library(bayesdfa) 23 | library(ggplot2) 24 | library(dplyr) 25 | library(rstan) 26 | chains = 1 27 | iter = 10 28 | ``` 29 | 30 | ## Notation review for DFA models 31 | 32 | Covariates in dynamic factor analysis are generally included in the observation model, rather than the process model. Without covariates, the model can be expressed as 33 | 34 | $${x}_{t}={x}_{t-1}+{e}_t\\ { e }_{ t }\sim MVN(0,\textbf{Q})\\ { y }_{ t }=\textbf{Z}{ x }_{ t }+{ v }_{ t }\\ { v }_{ t }\sim MVN(0,\textbf{R})$$ 35 | 36 | where the matrix $\textbf{Z}$ is dimensioned as the number of time series by number of trends, and maps the observed data $y_{t}$ to the latent trends $x_{t}$. 37 | 38 | ### Observation covariates 39 | 40 | Observation covariates can be 41 | $${x}_{t}={x}_{t-1}+{e}_t\\ { e }_{ t }\sim MVN(0,\textbf{Q})\\ { y }_{ t }=\textbf{Z}{ x }_{ t }+\textbf{D}{ d }_{ t }+{ v }_{ t }\\ { v }_{ t }\sim MVN(0,\textbf{R})$$ 42 | where the matrix $\textbf{D}$ represents time series by number of covariates at time $t$. For a single covariate, such as temperature, this would mean estimating $P$ parameters, where $P$ is the number of time series. For a model including 2 covariates, the number of estimated coefficients would be $2P$ and so forth. 43 | 44 | ### Process covariates 45 | 46 | Process covariates on the trends are less common but can be written as 47 | $${x}_{t}={x}_{t-1}+\textbf{C}{ c }_{ t }+{e}_t\\ { e }_{ t }\sim MVN(0,\textbf{Q})\\ { y }_{ t }=\textbf{Z}{ x }_{ t }+{ v }_{ t }\\ { v }_{ t }\sim MVN(0,\textbf{R})$$ 48 | where the matrix $\textbf{C}$ represents the number of trends by number of covariates at time $t$. For a single trend, this would mean estimating $K$ parameters, where $K$ is the number of trends. For a model including 2 covariates, the number of estimated coefficients would be $2K$ and so forth. 49 | 50 | 51 | ## Examples -- observation covariates 52 | 53 | We'll start by simulating some random trends using the `sim_dat` function, 54 | ```{r simulate-data-obs} 55 | set.seed(1) 56 | sim_dat <- sim_dfa( 57 | num_trends = 1, 58 | num_years = 20, 59 | num_ts = 4 60 | ) 61 | ``` 62 | 63 | Next, we can add a covariate effect to the trend estimate, `x`. For example, 64 | 65 | ```{r} 66 | cov = expand.grid("time"=1:20, "timeseries"=1:4, "covariate"=1) 67 | cov$value = rnorm(nrow(cov),0,0.1) 68 | 69 | for(i in 1:nrow(cov)) { 70 | sim_dat$y[cov$timeseries[i],cov$time[i]] = sim_dat$pred[cov$timeseries[i],cov$time[i]] + 71 | c(0.1,0.2,0.3,0.4)[cov$timeseries[i]]*cov$value[i] 72 | } 73 | ``` 74 | 75 | And now fit the model with `fit_dfa` 76 | 77 | ```{r results='hide', warning=FALSE, message=FALSE} 78 | mod = fit_dfa(y = sim_dat$y, obs_covar = cov, num_trends = 1, 79 | chains=chains, iter=iter) 80 | ``` 81 | 82 | We can then make plots of the true and estimated trend, 83 | 84 | ```{r} 85 | plot(c(sim_dat$x), xlab="Time", ylab="True trend") 86 | ``` 87 | 88 | ```{r} 89 | plot_trends(rotate_trends(mod)) + ylab("Estimated trend") + theme_bw() 90 | ``` 91 | 92 | This approach could be modified to have covariates not affecting some time series. For example, if we didn't want the covariate to affect the last time series, we could say 93 | 94 | ```{r} 95 | cov = cov[which(cov$timeseries!=4),] 96 | ``` 97 | 98 | And then again fit the model 99 | ```{r eval=FALSE, results='hide', warning=FALSE, message=FALSE} 100 | mod = fit_dfa(y = sim_dat$y, obs_covar = cov, num_trends = trends, 101 | chains=chains) 102 | ``` 103 | 104 | ## Examples -- process covariates 105 | 106 | As a cautionary note, there's some identifiability issues with including covariates in the process model. Covariates need to be standardized or centered prior to being included. Future versions of this vignette will include more clear examples and recommendations. 107 | 108 | We'll start by simulating some random trends using the `sim_dat` function, 109 | 110 | ```{r simulate-data} 111 | set.seed(1) 112 | sim_dat <- sim_dfa( 113 | num_trends = 2, 114 | num_years = 20, 115 | num_ts = 3 116 | ) 117 | ``` 118 | 119 | Next, we can add a covariate effect to the trend estimate, `x`. For example, 120 | 121 | ```{r} 122 | cov = rnorm(20, 0, 1) 123 | b_pro = c(1,0.3) 124 | x = matrix(0,2,20) 125 | 126 | for(i in 1:2) { 127 | x[i,1] = cov[1]*b_pro[i] 128 | } 129 | 130 | for(i in 2:length(cov)) { 131 | x[1,i] = x[1,i-1] + cov[i]*b_pro[1] + rnorm(1,0,1) 132 | x[2,i] = x[2,i-1] + cov[i]*b_pro[2] + rnorm(1,0,1) 133 | } 134 | 135 | y = sim_dat$Z %*% x 136 | ``` 137 | 138 | And now fit the model with `fit_dfa` 139 | 140 | ```{r, eval=FALSE, results='hide', warning=FALSE, message=FALSE} 141 | pro_cov = expand.grid("trend"=1:2, "time"=1:20, "covariate"=1) 142 | pro_cov$value = cov[pro_cov$time] 143 | 144 | mod = fit_dfa(y = sim_dat$y, pro_covar = pro_cov, num_trends = 2, 145 | chains=chains, iter=iter) 146 | 147 | ``` 148 | -------------------------------------------------------------------------------- /vignettes/a4_smooth.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Examples of fitting smooth trend DFA models" 3 | author: "Eric J. Ward, Sean C. Anderson, Mary E. Hunsicker, Mike A. Litzow, Luis A. Damiano, Mark D. Scheuerell, Elizabeth E. Holmes, Nick Tolimieri" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Examples of fitting smooth trend DFA models} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | In addition to fitting conventional DFA models with trends modeled as random walks (or ARMA processes), we can also construct models where underlying trends are treated as smooth trends (B-splines, P-splines, or Gaussian processes). 13 | 14 | ```{r set-knitr-options, cache=FALSE, echo=FALSE, warning=FALSE, message=FALSE} 15 | library("knitr") 16 | opts_chunk$set(message = FALSE, fig.width = 5.5) 17 | ``` 18 | 19 | Let's load the necessary packages: 20 | 21 | ```{r, message=FALSE, warning=FALSE} 22 | library(bayesdfa) 23 | library(ggplot2) 24 | library(dplyr) 25 | library(rstan) 26 | chains = 1 27 | iter = 10 28 | ``` 29 | 30 | ## Data simulation 31 | 32 | The `sim_dfa` function normally simulates loadings $\sim N(0,1)$, but here we will simulate time series that are more similar with loadings $\sim N(1,0.1)$ 33 | 34 | ```{r} 35 | set.seed(1) 36 | s = sim_dfa(num_trends = 1, num_years = 1000, num_ts = 4, 37 | loadings_matrix = matrix(nrow = 4, ncol = 1, rnorm(4 * 1, 38 | 1, 0.1)), sigma=0.05) 39 | ``` 40 | 41 | ```{r} 42 | matplot(t(s$y_sim), type="l") 43 | ``` 44 | 45 | ## Estimating trends as B-splines 46 | 47 | As a first approach, we can fit models where trends are estimated as B-splines. To do this, we change the `trend_model` argument, and specify the number of knots. More knots translates to smoother functions. For example, 48 | 49 | ```{r eval = FALSE} 50 | set.seed(1) 51 | fit = fit_dfa(y = s$y_sim, num_trends = 1, 52 | trend_model = "bs", n_knots = 7) 53 | ``` 54 | 55 | Or for a model with more knots, 56 | 57 | ```{r eval = FALSE} 58 | set.seed(1) 59 | fit = fit_dfa(y = s$y_sim, num_trends = 1, 60 | trend_model = "bs", n_knots = 14) 61 | ``` 62 | 63 | 64 | ## Estimating trends as P-splines 65 | 66 | Obviously, trends from the B-spline model are sensitive to the number of knots. As an alternative, we also allow trends to be modeled as penalized regression splines ("P-splines"). These methods are less sensitive to the numbers of knots, and only require the knots to be enough to adequately describe the wiggliness of the function. 67 | 68 | We can fit these kinds of models by changing the `trend_model` argument 69 | ```{r eval = FALSE} 70 | set.seed(1) 71 | fit = fit_dfa(y = s$y_sim, num_trends = 1, 72 | trend_model = "ps", n_knots = 7) 73 | ``` 74 | 75 | ## Estimating trends as Gaussian processes 76 | 77 | Finally, another type of smoothing that can be done is treating the trends as Gaussian processes. Both full rank models (knots = time points) or predictive process models may be fit (fewer knots results in smoother functions). These types of models may be specified by again changing the `trend_model` argument, 78 | 79 | ```{r eval = FALSE} 80 | set.seed(1) 81 | fit = fit_dfa(y = s$y_sim, num_trends = 1, 82 | trend_model = "gp", n_knots = 7) 83 | ``` 84 | 85 | ## Comparing approaches 86 | 87 | All of the smooth trend methods are flexible and able to capture the wiggliness of latent trends. Based on our experience, the B-spline and P-spline models will generally fit faster than the Gaussian predicitve process models (because they omit a critical matrix inversion step). The full rank Gaussian process models tend to be faster than the predictive process models. All of these approaches can be compared using cross validation, or similar predictive performance criterion. 88 | -------------------------------------------------------------------------------- /vignettes/a6_compositional.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fitting compositional dynamic factor models with bayesdfa" 3 | author: "Eric J. Ward, Sean C. Anderson, Mary E. Hunsicker, Mike A. Litzow, Luis A. Damiano, Mark D. Scheuerell, Elizabeth E. Holmes, Nick Tolimieri" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Fitting compositional dynamic factor models with bayesdfa} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | An extension of conventional DFA is to change the constraints on the estimated loadings, $Z$. DFA models generally impose constraints for identifiability, where individual time series are allowed to map onto dynamic factors with loadings ranging from large negative to large positive ones. Instead, if we viewed a collection of multivariate time series as originating from a compositional model - such that the loadings $Z$ were forced to be positive, and each of the time series arises as a mixture of the estimated loadings. 13 | 14 | ```{r set-knitr-options, cache=FALSE, echo=FALSE, warning=FALSE, message=FALSE} 15 | library("knitr") 16 | opts_chunk$set(message = FALSE, fig.width = 5.5) 17 | ``` 18 | 19 | Let's load the necessary packages: 20 | 21 | ```{r, message=FALSE, warning=FALSE} 22 | library(bayesdfa) 23 | library(ggplot2) 24 | library(dplyr) 25 | library(rstan) 26 | chains = 1 27 | iter = 10 28 | ``` 29 | 30 | ## 2 - trend model 31 | 32 | First, let's simulate some data. The compositional DFA model is not interesting with a single trend, so we'll initially start with 2 trends. We'll use the sim_dfa function to simulate the random walks (assuming 20 time steps, and 2 latent trends). 33 | 34 | ```{r} 35 | set.seed(1) 36 | s = sim_dfa(num_trends = 2, num_years = 20, 37 | num_ts = 5) 38 | ``` 39 | 40 | Next, we can create some compositional loadings for the mixture. These could random (e.g. using 'rdirichlet'), but we'll pass in known values. 41 | 42 | ```{r} 43 | m = matrix(0, nrow=5,ncol=2) 44 | m[1,] = c(0.8, 0.2) # time series # 1 is 80% trend 1 45 | m[2,] = c(0.9, 0.1) # time series # 2 is 90% trend 1 46 | m[3,] = c(0.3, 0.7) # time series # 3 is 30% trend 1 47 | m[4,] = c(0.35, 0.65) # time series # 4 is 35% trend 1 48 | m[5,] = c(0.7, 0.2) # time series # 5 is 70% trend 1 49 | ``` 50 | 51 | Using the simulated trends and compositional data $m$, we can now create the simulated data, 52 | 53 | ```{r} 54 | pred = m%*%s$x 55 | y = pred + matrix(rnorm(nrow(pred)*ncol(pred),0,0.1), nrow=nrow(pred), ncol = ncol(pred)) 56 | ``` 57 | 58 | Next, we can pass this data to the fit_dfa() function and try to recover our known loadings. Note: the compositional model has a nasty label-switching issue that our 'chain flipping' function doesn't handle well. Multiple chains may be run, but need to be examined to evaluate whether they need to be flipped. To avoid that situation, we'll just use a single chain. 59 | 60 | ```{r, message=FALSE, warning=FALSE, results='hide'} 61 | fit <- fit_dfa(y = y, iter = iter, chains = chains, num_trends = 2, seed = 42, 62 | z_model = "proportion",scale="center") 63 | ``` 64 | 65 | We can't rotate the Z matrix -- because the compositional constraint would be destroyed. Other than the loadings on time series 3 and 4, these loadings are close to those in the simualting model, 66 | 67 | ```{r} 68 | pars = rstan::extract(fit$model,permuted=TRUE) 69 | rounded_Z = round(apply(pars$Z,c(2,3),mean),2) 70 | print(rounded_Z[,c(2,1)]) 71 | ``` 72 | 73 | Combining the estimated trends and true trends in the simulation shows that the trends are offset by an intercept, but track the overall simulated values very well (time series 1 represents the estimated trend trying to recover the true trend indicated with time series 3, time series 2 represents the estimated trend trying to recover the true trend indicated with time series 4) 74 | 75 | ```{r} 76 | x = apply(pars$x, c(2,3), mean)[c(2,1),] 77 | matplot(t(rbind(x,s$x))) 78 | ``` 79 | 80 | 81 | ## 3 - trend model 82 | 83 | We can extend this approach to also include a model with 3 trends, 84 | 85 | ```{r} 86 | set.seed(1) 87 | s = sim_dfa(num_trends = 3, num_years = 20, 88 | num_ts = 5) 89 | ``` 90 | 91 | Next, we can create some compositional loadings for the mixture. These could random (e.g. using 'rdirichlet'), but we'll pass in known values. 92 | 93 | ```{r} 94 | m = matrix(0, nrow=5,ncol=3) 95 | m[1,] = c(0.31, 0.48,0.21) # time series # 1 96 | m[2,] = c(0.25, 0.04, 0.71) # time series # 2 97 | m[3,] = c(0.21, 0.28, 0.51) # time series # 3 98 | m[4,] = c(0.6, 0.02, 0.38) # time series # 4 99 | m[5,] = c(0.15, 0.21, 0.64) # time series # 5 100 | ``` 101 | 102 | Add observation error 103 | ```{r} 104 | pred = m%*%s$x 105 | y = pred + matrix(rnorm(nrow(pred)*ncol(pred),0,0.01), nrow=nrow(pred), ncol = ncol(pred)) 106 | ``` 107 | 108 | Next we can fit the compositional DFA model, 109 | 110 | ```{r message=FALSE, warning=FALSE, results='hide'} 111 | fit <- fit_dfa(y = y, iter = iter, chains = chains, num_trends = 3, seed = 42, 112 | z_model = "proportion",scale="center") 113 | ``` 114 | 115 | We can look at the true values of the loadings (open triangles) and estimated values (solid circles) to see that for some of the loadings, the model estimates the parameters well. 116 | 117 | ```{r echo=FALSE} 118 | pars = rstan::extract(fit$model,permuted=TRUE) 119 | rounded_Z = round(apply(pars$Z,c(2,3),mean),2) 120 | 121 | df = data.frame("value"=c(rounded_Z), "id" = "estimated", 122 | "trend"=as.factor(sort(rep(1:3,5))), "ts" = as.factor(rep(1:5,3))) 123 | 124 | df2 = data.frame("value"=c(m[,c(3,2,1)]), "id" = "true", 125 | "trend"=as.factor(sort(rep(1:3,5))), "ts" = as.factor(rep(1:5,3))) 126 | 127 | ggplot(data=rbind(df,df2), aes(ts,value,group=trend,col=trend, 128 | fill=trend,shape=id)) + 129 | geom_point(size=4) + 130 | xlab("Time series") + ylab("Value") 131 | ``` 132 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /vignettes/a7_bigdata.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Examples of fitting DFA models with lots of data" 3 | author: "Eric J. Ward, Sean C. Anderson, Mary E. Hunsicker, Mike A. Litzow, Luis A. Damiano, Mark D. Scheuerell, Elizabeth E. Holmes, Nick Tolimieri" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Examples of fitting DFA models with lots of data} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | For some applications, there may be a huge number of observations (e.g. daily stream flow measurements, bird counts) making estimation with MCMC slow. While estimation (and uncertainty) for final models should be done with MCMC, there are a few much faster alternatives that we can use for these cases. They may be generally useful for other DFA problems -- both in diagnosing convergence problems, and doing preliminary model selection. 13 | 14 | ```{r set-knitr-options, cache=FALSE, echo=FALSE, warning=FALSE, message=FALSE} 15 | library("knitr") 16 | opts_chunk$set(message = FALSE, fig.width = 5.5) 17 | ``` 18 | 19 | Let's load the necessary packages: 20 | 21 | ```{r, message=FALSE, warning=FALSE} 22 | library(bayesdfa) 23 | library(ggplot2) 24 | library(dplyr) 25 | library(rstan) 26 | chains = 1 27 | iter = 10 28 | ``` 29 | 30 | ## Data simulation 31 | 32 | The `sim_dfa` function normally simulates loadings $\sim N(0,1)$, but here we will simulate time series that are more similar with loadings $\sim N(1,0.1)$ 33 | 34 | ```{r} 35 | set.seed(1) 36 | s = sim_dfa(num_trends = 1, num_years = 1000, num_ts = 4, 37 | loadings_matrix = matrix(nrow = 4, ncol = 1, rnorm(4 * 1, 38 | 1, 0.1)), sigma=0.05) 39 | ``` 40 | 41 | ```{r} 42 | matplot(t(s$y_sim), type="l") 43 | ``` 44 | 45 | ## Sampling argument 46 | 47 | In the examples below, we'll take advantage of the `estimation` argument. By default, this defaults to MCMC ("sampling") but can be a few other options described below. If you want to construct a model object, but do no sampling, you can also set this to "none". 48 | 49 | ```{r eval = FALSE} 50 | fit <- fit_dfa(..., estimation = "sampling") 51 | ``` 52 | 53 | 54 | ## Posterior optimization 55 | 56 | The fastest estimation approach is to do optimze the posterior (this is similar to maximum likelihood but also involves the prior distribution). We can implement this with by setting the estimation argument to "optimizing" 57 | 58 | Note -- because this model has a lot of parameters, estimation can be finicky, and can get stuck in local minima. You may have to start this from several seeds to get the model to converge successfully -- or if there is a mismatch between the model and data, it may not converge at all. 59 | 60 | For example, this model does not converge 61 | 62 | ```{r} 63 | set.seed(123) 64 | m <- fit_dfa(y = s$y_sim, estimation = "optimizing") 65 | ``` 66 | 67 | The optimizing output is saved here (`value` = log posterior, `par` = estimated parameters) 68 | 69 | ```{r} 70 | names(m$model) 71 | ``` 72 | 73 | And if convergence is successful, the optimizer code will be 0 (this model isn't converging) 74 | 75 | ```{r} 76 | m$model$return_code 77 | ``` 78 | 79 | But if we change the seed, the model will converge ok: 80 | 81 | ```{r} 82 | set.seed(124) 83 | m <- fit_dfa(y = s$y_sim, estimation = "optimizing") 84 | ``` 85 | 86 | ```{r} 87 | m$model$return_code 88 | ``` 89 | 90 | ## Posterior approximation 91 | 92 | A second approach to quickly estimating parameters is to use Variational Bayes, which is also implemented in Stan. This is implemented by changing the `estimation` to "vb", as shown below. Note: this gives a helpful message that the maximum number of iterations has been reached, so these results should not be trusted. 93 | 94 | ```{r, message=FALSE, warning=FALSE, eval=FALSE} 95 | m <- fit_dfa(y = s$y_sim, estimation = "vb", seed=123) 96 | ``` 97 | 98 | There are a number of other arguments that can be passed into `rstan::vb()`. These include `iter` (maximum iterations, defaults to 10000), `tol_rel_obj` (convergence tolerance, defaults to 0.01), and `output_samples` (posterior samples to save, defaults to 1000). To use these, a function call would be 99 | 100 | ```{r, message=FALSE, warning=FALSE, eval=FALSE} 101 | m <- fit_dfa(y = s$y_sim, estimation = "vb", seed=123, iter=20000, 102 | tol_rel_obj = 0.005, output_samples = 2000) 103 | ``` 104 | 105 | 106 | --------------------------------------------------------------------------------