├── .Rbuildignore ├── .gitattributes ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── rhub.yaml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── family.R ├── fitting.R ├── format_data.R ├── glmmfields-package.R ├── methods.R ├── plot.R ├── predict.R ├── priors.R ├── sim.R └── stan_pars.R ├── README-figs ├── grid-predictions-1.png ├── plot-predictions-1.png ├── plot-predictions-2.png └── plot-sim-1.png ├── README.Rmd ├── README.md ├── configure ├── configure.win ├── cran-comments.md ├── glmmfields.Rproj ├── inst ├── .gitignore ├── CITATION ├── include │ └── stan_meta_header.hpp ├── logo.png ├── logo.svg └── stan │ ├── glmmfields.stan │ └── include │ └── license.stan ├── man ├── format_data.Rd ├── glmmfields-package.Rd ├── glmmfields.Rd ├── lognormal.Rd ├── loo.Rd ├── nbinom2.Rd ├── plot.glmmfields.Rd ├── predict.Rd ├── priors.Rd ├── sim_glmmfields.Rd ├── stan_pars.Rd └── tidy.Rd ├── tests ├── testthat.R └── testthat │ ├── test-families.R │ ├── test-fit-ar-processes.R │ ├── test-fit-basic.R │ ├── test-fit-observation-models.R │ ├── test-fit-with-predictors.R │ ├── test-format-data.R │ ├── test-predict.R │ ├── test-priors.R │ └── test-stations.R └── vignettes └── spatial-glms.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | cleanup* 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^README-* 5 | ^README\.Rmd 6 | ^appveyor\.yml$ 7 | ^Session.vim$ 8 | ^cran-comments\.md$ 9 | ^inst/logo.png$ 10 | ^inst/logo.svg$ 11 | ^CRAN-RELEASE$ 12 | ^_pkgdown\.yml$ 13 | ^docs$ 14 | ^\.github$ 15 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | data/* binary 3 | src/* text=lf 4 | R/* text=lf 5 | man/* text=lf 6 | DESCRIPTION text=lf 7 | NAMESPACE text=lf 8 | tests/* text=lf 9 | .Rbuildignore text=lf 10 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | if: "! contains(github.event.head_commit.message, '[skip ci]')" 18 | runs-on: ${{ matrix.config.os }} 19 | 20 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | config: 26 | - {os: macOS-latest, r: 'release'} 27 | - {os: windows-latest, r: 'release'} 28 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 29 | # - {os: ubuntu-latest, r: 'release'} 30 | # - {os: ubuntu-latest, r: 'oldrel-1'} 31 | 32 | env: 33 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 34 | RSPM: ${{ matrix.config.rspm }} 35 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 36 | 37 | steps: 38 | - uses: actions/checkout@v2 39 | 40 | - uses: r-lib/actions/setup-r@v2 41 | with: 42 | r-version: ${{ matrix.config.r }} 43 | 44 | - uses: r-lib/actions/setup-pandoc@v2 45 | 46 | - name: Query dependencies 47 | run: | 48 | install.packages('remotes') 49 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 50 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 51 | shell: Rscript {0} 52 | 53 | - name: Cache R packages 54 | if: runner.os != 'Windows' 55 | uses: actions/cache@v2 56 | with: 57 | path: ${{ env.R_LIBS_USER }} 58 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 59 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 60 | 61 | - name: Install system dependencies 62 | if: runner.os == 'Linux' 63 | run: | 64 | while read -r cmd 65 | do 66 | eval sudo $cmd 67 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "latest"))') 68 | 69 | - uses: r-lib/actions/setup-r-dependencies@v2 70 | with: 71 | extra-packages: any::rcmdcheck 72 | needs: check 73 | 74 | - name: Check 75 | env: 76 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 77 | run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 78 | shell: Rscript {0} 79 | 80 | - name: Upload check results 81 | if: failure() 82 | uses: actions/upload-artifact@main 83 | with: 84 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 85 | path: check 86 | -------------------------------------------------------------------------------- /.github/workflows/rhub.yaml: -------------------------------------------------------------------------------- 1 | # R-hub's genetic GitHub Actions workflow file. It's canonical location is at 2 | # https://github.com/r-hub/rhub2/blob/v1/inst/workflow/rhub.yaml 3 | # You can update this file to a newer version using the rhub2 package: 4 | # 5 | # rhub2::rhub_setup() 6 | # 7 | # It is unlikely that you need to modify this file manually. 8 | 9 | name: R-hub 10 | run-name: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }} (${{ github.event.inputs.id }}) 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | config: 16 | description: 'A comma separated list of R-hub platforms to use.' 17 | type: string 18 | default: 'linux,windows,macos' 19 | name: 20 | description: 'Run name. You can leave this empty now.' 21 | type: string 22 | id: 23 | description: 'Unique ID. You can leave this empty now.' 24 | type: string 25 | 26 | jobs: 27 | 28 | setup: 29 | runs-on: ubuntu-latest 30 | outputs: 31 | containers: ${{ steps.rhub-setup.outputs.containers }} 32 | platforms: ${{ steps.rhub-setup.outputs.platforms }} 33 | 34 | steps: 35 | # NO NEED TO CHECKOUT HERE 36 | - uses: r-hub/rhub2/actions/rhub-setup@v1 37 | with: 38 | config: ${{ github.event.inputs.config }} 39 | id: rhub-setup 40 | 41 | linux-containers: 42 | needs: setup 43 | if: ${{ needs.setup.outputs.containers != '[]' }} 44 | runs-on: ubuntu-latest 45 | name: ${{ matrix.config.label }} 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | config: ${{ fromJson(needs.setup.outputs.containers) }} 50 | container: 51 | image: ${{ matrix.config.container }} 52 | 53 | steps: 54 | - uses: actions/checkout@v3 55 | - uses: r-hub/rhub2/actions/rhub-check@v1 56 | with: 57 | token: ${{ secrets.RHUB_TOKEN }} 58 | job-config: ${{ matrix.config.job-config }} 59 | 60 | other-platforms: 61 | needs: setup 62 | if: ${{ needs.setup.outputs.platforms != '[]' }} 63 | runs-on: ${{ matrix.config.os }} 64 | name: ${{ matrix.config.label }} 65 | strategy: 66 | fail-fast: false 67 | matrix: 68 | config: ${{ fromJson(needs.setup.outputs.platforms) }} 69 | 70 | steps: 71 | - uses: actions/checkout@v3 72 | - uses: r-hub/rhub2/actions/rhub-setup-r@v1 73 | with: 74 | job-config: ${{ matrix.config.job-config }} 75 | token: ${{ secrets.RHUB_TOKEN }} 76 | - uses: r-hub/rhub2/actions/rhub-check@v1 77 | with: 78 | job-config: ${{ matrix.config.job-config }} 79 | token: ${{ secrets.RHUB_TOKEN }} 80 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | *.o 3 | *.so 4 | *.bak 5 | .Rhistory 6 | .DS_Store 7 | .Rapp.history 8 | README-cache/ 9 | inst/doc 10 | vignettes/*.R 11 | vignettes/*.html 12 | inst/tests/mvt-mvn-difference_* 13 | *.html 14 | vignettes/*cache* 15 | vignettes/*files* 16 | src/ 17 | R/stanmodels.R 18 | 19 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: glmmfields 3 | Title: Generalized Linear Mixed Models with Robust Random Fields 4 | for Spatiotemporal Modeling 5 | Version: 0.1.8 6 | Authors@R: 7 | c(person(given = c("Sean", "C."), 8 | family = "Anderson", 9 | role = c("aut", "cre"), 10 | email = "sean@seananderson.ca"), 11 | person(given = c("Eric", "J."), 12 | family = "Ward", 13 | role = "aut"), 14 | person(given = "Trustees of", 15 | family = "Columbia University", 16 | role = "cph")) 17 | Description: Implements Bayesian spatial and spatiotemporal 18 | models that optionally allow for extreme spatial deviations through 19 | time. 'glmmfields' uses a predictive process approach with random 20 | fields implemented through a multivariate-t distribution instead of 21 | the usual multivariate normal. Sampling is conducted with 'Stan'. 22 | References: Anderson and Ward (2019) . 23 | License: GPL (>=3) 24 | URL: https://github.com/seananderson/glmmfields 25 | BugReports: https://github.com/seananderson/glmmfields/issues 26 | Depends: 27 | methods, 28 | R (>= 3.4.0), 29 | Rcpp (>= 0.12.18) 30 | Imports: 31 | assertthat, 32 | broom, 33 | broom.mixed, 34 | cluster, 35 | dplyr (>= 0.8.0), 36 | forcats, 37 | ggplot2 (>= 2.2.0), 38 | loo (>= 2.0.0), 39 | mvtnorm, 40 | nlme, 41 | RcppParallel (>= 5.0.1), 42 | reshape2, 43 | rstan (>= 2.26.0), 44 | rstantools (>= 2.1.1), 45 | tibble 46 | Suggests: 47 | bayesplot, 48 | coda, 49 | knitr, 50 | parallel, 51 | rmarkdown, 52 | testthat, 53 | viridis 54 | LinkingTo: 55 | BH (>= 1.66.0), 56 | Rcpp (>= 0.12.8), 57 | RcppEigen (>= 0.3.3.3.0), 58 | RcppParallel (>= 5.0.1), 59 | rstan (>= 2.26.0), 60 | StanHeaders (>= 2.26.0) 61 | VignetteBuilder: 62 | knitr 63 | Encoding: UTF-8 64 | Roxygen: list(markdown = TRUE) 65 | RoxygenNote: 7.2.3 66 | SystemRequirements: GNU make 67 | NeedsCompilation: yes 68 | Biarch: true 69 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(loo,glmmfields) 4 | S3method(plot,glmmfields) 5 | S3method(posterior_linpred,glmmfields) 6 | S3method(posterior_predict,glmmfields) 7 | S3method(predict,glmmfields) 8 | S3method(predictive_interval,glmmfields) 9 | S3method(print,glmmfields) 10 | S3method(tidy,glmmfields) 11 | export(glmmfields) 12 | export(half_t) 13 | export(lognormal) 14 | export(loo) 15 | export(nbinom2) 16 | export(posterior_linpred) 17 | export(posterior_predict) 18 | export(predictive_interval) 19 | export(sim_glmmfields) 20 | export(student_t) 21 | export(tidy) 22 | import(Rcpp) 23 | import(methods) 24 | import(rstantools) 25 | importFrom(RcppParallel,RcppParallelLibs) 26 | importFrom(assertthat,assert_that) 27 | importFrom(assertthat,is.count) 28 | importFrom(assertthat,is.number) 29 | importFrom(broom,tidy) 30 | importFrom(ggplot2,.data) 31 | importFrom(ggplot2,aes) 32 | importFrom(ggplot2,facet_wrap) 33 | importFrom(ggplot2,geom_hline) 34 | importFrom(ggplot2,geom_point) 35 | importFrom(ggplot2,geom_smooth) 36 | importFrom(ggplot2,ggplot) 37 | importFrom(ggplot2,scale_color_gradient2) 38 | importFrom(loo,loo) 39 | importFrom(rstan,sampling) 40 | importFrom(rstan,vb) 41 | importFrom(rstantools,posterior_linpred) 42 | importFrom(rstantools,posterior_predict) 43 | importFrom(rstantools,predictive_interval) 44 | importFrom(stats,dist) 45 | importFrom(stats,gaussian) 46 | importFrom(stats,median) 47 | importFrom(stats,model.frame) 48 | importFrom(stats,model.matrix) 49 | importFrom(stats,model.offset) 50 | importFrom(stats,model.response) 51 | importFrom(stats,na.omit) 52 | importFrom(stats,predict) 53 | importFrom(stats,quantile) 54 | importFrom(stats,rgamma) 55 | importFrom(stats,rnbinom) 56 | importFrom(stats,rnorm) 57 | importFrom(stats,runif) 58 | useDynLib(glmmfields, .registration = TRUE) 59 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # glmmfields 0.1.8 2 | 3 | * Update for rstan 2.26 (#16) 4 | 5 | # glmmfields 0.1.7 6 | 7 | * Rebuild with newest rstantools to avoid CRAN NOTE about C++ version 8 | 9 | # glmmfields 0.1.6 10 | 11 | * Add back RcppParallel as an import 12 | 13 | # glmmfields 0.1.5 14 | 15 | * Fully delegate installation to rstantools (#15) 16 | 17 | * Other minor fixes to pass R CMD check on R devel 18 | 19 | * Add offset functionality 20 | 21 | # glmmfields 0.1.4 22 | 23 | * Make compatible with updated broom.mixed 24 | 25 | # glmmfields 0.1.3 26 | 27 | * Make compatible with C++14. 28 | 29 | # glmmfields 0.1.2 30 | 31 | * Make compatible with R 3.6.0 staged installation and latest rstantools. 32 | 33 | # glmmfields 0.1.1 34 | 35 | * Changed how Stan finds directories / files 36 | 37 | # glmmfields 0.1.0.9000 38 | 39 | * Add support for random walk year effects with covariates. There are a few 40 | specific cases where covariates or the random year effect are not estimable. 41 | Examples are: 42 | 43 | 1. estimating an intercept and AR year effects (intercept confounded with 44 | 1st year effect) 45 | 2. estimating AR year effects and estimating 'phi' — the AR parameter in 46 | spatial field (also confounded) 47 | 48 | * Import S3 methods from rstantools instead of rstanarm (#5) 49 | 50 | * Adjust calculation of year index values to better allow for missing time slices 51 | 52 | # glmmfields 0.1.0 53 | 54 | * Initial submission to CRAN. 55 | -------------------------------------------------------------------------------- /R/family.R: -------------------------------------------------------------------------------- 1 | #' Lognormal family 2 | #' 3 | #' @param link The link (must be log) 4 | #' @export 5 | #' @examples 6 | #' lognormal() 7 | lognormal <- function(link = "log") { 8 | assert_that(identical("log", as.character(substitute(link)))) 9 | list(family = "lognormal", link = "log") 10 | } 11 | 12 | #' Negative binomial family 13 | #' 14 | #' This is the NB2 parameterization where the variance scales quadratically 15 | #' with the mean. 16 | #' 17 | #' @param link The link (must be log) 18 | #' @export 19 | #' @examples 20 | #' nbinom2() 21 | nbinom2 <- function(link = "log") { 22 | assert_that(identical("log", as.character(substitute(link)))) 23 | list(family = "nbinom2", link = "log") 24 | } 25 | 26 | # Check a family object 27 | # 28 | # @param family The family 29 | check_family <- function(family) { 30 | assert_that(identical(class(family), "family") | 31 | identical(lognormal(link = "log"), family) | 32 | identical(nbinom2(link = "log"), family)) 33 | 34 | assert_that(family$family %in% 35 | c("gaussian", "lognormal", "Gamma", "nbinom2", "poisson", "binomial")) 36 | 37 | if (family$family == "gaussian") assert_that(identical(family$link, "identity")) 38 | if (family$family %in% c("lognormal", "Gamma", "nbinom2", "poisson")) { 39 | assert_that(identical(family$link, "log")) 40 | } 41 | if (family$family == "binomial") assert_that(identical(family$link, "logit")) 42 | 43 | list(family = family$family, link = family$link) 44 | } 45 | 46 | logit <- function(x) stats::qlogis(x) 47 | -------------------------------------------------------------------------------- /R/fitting.R: -------------------------------------------------------------------------------- 1 | #' Fit a spatiotemporal random fields GLMM 2 | #' 3 | #' Fit a spatiotemporal random fields model that optionally uses the MVT 4 | #' distribution instead of a MVN distribution to allow for spatial extremes 5 | #' through time. It is also possible to fit a spatial random fields model 6 | #' without a time component. 7 | #' 8 | #' @param formula The model formula. 9 | #' @param data A data frame. 10 | #' @param time A character object giving the name of the time column. Leave 11 | #' as `NULL` to fit a spatial GLMM without a time element. 12 | #' @param lon A character object giving the name of the longitude column. 13 | #' @param lat A character object giving the name of the latitude column. 14 | #' @param nknots The number of knots to use in the predictive process model. 15 | #' Smaller values will be faster but may not adequately represent the shape 16 | #' of the spatial pattern. 17 | #' @param prior_gp_theta The prior on the Gaussian Process scale parameter. Must 18 | #' be declared with [half_t()]. Here, and throughout, priors that 19 | #' are normal or half-normal can be implemented by setting the first 20 | #' parameter in the half-t or student-t distribution to a large value. 21 | #' E.g. something greater than 100. 22 | #' @param prior_gp_sigma The prior on the Gaussian Process eta parameter. Must 23 | #' be declared with [half_t()]. 24 | #' @param prior_sigma The prior on the observation process scale parameter. Must 25 | #' be declared with [half_t()]. This acts as a substitute for the 26 | #' scale parameter in whatever observation distribution is being used. E.g. 27 | #' the CV for the Gamma or the dispersion parameter for the negative 28 | #' binomial. 29 | #' @param prior_rw_sigma The prior on the standard deviation parameter of the 30 | #' random walk process (if specified). Must be declared with 31 | #' [half_t()]. 32 | #' @param prior_intercept The prior on the intercept parameter. Must be declared 33 | #' with [student_t()]. 34 | #' @param prior_beta The prior on the slope parameters (if any). Must be 35 | #' declared with [student_t()]. 36 | #' @param prior_phi The prior on the AR parameter. Must be 37 | #' declared with [student_t()]. 38 | #' @param fixed_df_value The fixed value for the student-t degrees of freedom 39 | #' parameter if the degrees of freedom parameter is fixed in the MVT. If the 40 | #' degrees of freedom parameter is estimated then this argument is ignored. 41 | #' Must be 1 or greater. Very large values (e.g. the default value) 42 | #' approximate the normal distribution. If the value is >=1000 then a true 43 | #' MVN distribution will be fit. 44 | #' @param estimate_df Logical: should the degrees of freedom parameter be 45 | #' estimated? 46 | #' @param estimate_ar Logical: should the AR (autoregressive) parameter be 47 | #' estimated? Here, this refers to a autoregressive process in the evolution 48 | #' of the spatial field through time. 49 | #' @param fixed_phi_value The fixed value for temporal autoregressive parameter, 50 | #' between random fields at time(t) and time(t-1). If the phi parameter 51 | #' is estimated then this argument is ignored. 52 | #' @param family Family object describing the observation model. Note that only 53 | #' one link is implemented for each distribution. Gamma, negative binomial 54 | #' (specified via [nbinom2()] as `nbinom2(link = "log")`, and Poisson must 55 | #' have a log link. Binomial must have a logit link. Also implemented is the 56 | #' lognormal (specified via [lognormal()] as `lognormal(link = "log")`. 57 | #' Besides the negative binomial and lognormal, other families are specified 58 | #' as shown in \code{\link[stats]{family}}. 59 | #' @param binomial_N A character object giving the optional name of the column containing 60 | #' Binomial sample size. Leave as `NULL` to fit a spatial GLMM with sample sizes (N) = 1, 61 | #' equivalent to bernoulli model. 62 | #' @param covariance The covariance function of the Gaussian Process. 63 | #' One of "squared-exponential", "exponential", or "matern". 64 | #' @param matern_kappa Optional parameter for the Matern covariance function. 65 | #' Optional values are 1.5 or 2.5. Values of 0.5 are equivalent to exponential. 66 | #' @param algorithm Character object describing whether the model should be fit 67 | #' with full NUTS MCMC or via the variational inference mean-field approach. 68 | #' See [rstan::vb()]. Note that the variational inference approach 69 | #' should not be trusted for final inference and is much more likely to give 70 | #' incorrect inference than MCMC. 71 | #' @param year_re Logical: estimate a random walk for the time variable? If 72 | #' \code{TRUE}, then no fixed effects (B coefficients) will be estimated. 73 | #' In this case, \code{prior_intercept} will be used as the prior for 74 | #' the initial value in time. 75 | #' @param nb_lower_truncation For NB2 only: lower truncation value. E.g. 0 for 76 | #' no truncation, 1 for 1 and all values above. Note that estimation is 77 | #' likely to be considerably slower with lower truncation because the 78 | #' sampling is not vectorized. Also note that the log likelihood values 79 | #' returned for estimating quantities like LOOIC will not be correct if 80 | #' lower truncation is implemented. 81 | #' @param control List to pass to [rstan::sampling()]. For example, 82 | #' increase \code{adapt_delta} if there are warnings about divergent 83 | #' transitions: \code{control = list(adapt_delta = 0.99)}. By default, 84 | #' \pkg{glmmfields} sets \code{adapt_delta = 0.9}. 85 | #' @param save_log_lik Logical: should the log likelihood for each data point be 86 | #' saved so that information criteria such as LOOIC or WAIC can be calculated? 87 | #' Defaults to \code{FALSE} so that the size of model objects is smaller. 88 | #' @param df_lower_bound The lower bound on the degrees of freedom parameter. 89 | #' Values that are too low, e.g. below 2 or 3, it might affect chain 90 | #' convergence. Defaults to 2. 91 | #' @param cluster The type of clustering algorithm used to determine the knot 92 | #' locations. `"pam"` = [cluster::pam()]. The `"kmeans"` 93 | #' algorithm will be faster on larger datasets. 94 | #' @param offset An optional offset vector. 95 | #' @param ... Any other arguments to pass to [rstan::sampling()]. 96 | #' 97 | #' @details 98 | #' Note that there is no guarantee that the default priors are reasonable for 99 | #' your data. Also, there is no guarantee the default priors will remain the 100 | #' same in future versions. Therefore it is important that you specify any 101 | #' priors that are used in your model, even if they replicate the defaults in 102 | #' the package. It is particularly important that you consider that prior on 103 | #' `gp_theta` since it depends on the distance between your location points. You 104 | #' may need to scale your coordinate units so they are on a ballpark range of 105 | #' 1-10 by, say, dividing the coordinates (say in UTMs) by several order of 106 | #' magnitude. 107 | #' 108 | #' @export 109 | #' @importFrom rstan sampling vb 110 | #' @import Rcpp 111 | #' @importFrom stats dist model.frame model.matrix model.response rnorm runif model.offset 112 | #' @importFrom assertthat assert_that is.count is.number 113 | #' @importFrom stats gaussian na.omit 114 | #' @importFrom RcppParallel RcppParallelLibs 115 | #' 116 | #' @examples 117 | #' \donttest{ 118 | #' # Spatiotemporal example: 119 | #' set.seed(1) 120 | #' s <- sim_glmmfields(n_draws = 12, n_knots = 12, gp_theta = 1.5, 121 | #' gp_sigma = 0.2, sd_obs = 0.2) 122 | #' print(s$plot) 123 | #' # options(mc.cores = parallel::detectCores()) # for parallel processing 124 | #' # should use 4 or more chains for real model fits 125 | #' m <- glmmfields(y ~ 0, time = "time", 126 | #' lat = "lat", lon = "lon", data = s$dat, 127 | #' nknots = 12, iter = 1000, chains = 2, seed = 1) 128 | #' 129 | #' # Spatial example (with covariates) from the vignette and customizing 130 | #' # some priors: 131 | #' set.seed(1) 132 | #' N <- 100 # number of data points 133 | #' temperature <- rnorm(N, 0, 1) # simulated temperature data 134 | #' X <- cbind(1, temperature) # design matrix 135 | #' s <- sim_glmmfields(n_draws = 1, gp_theta = 1.2, n_data_points = N, 136 | #' gp_sigma = 0.3, sd_obs = 0.1, n_knots = 12, obs_error = "gamma", 137 | #' covariance = "squared-exponential", X = X, 138 | #' B = c(0.5, 0.2)) # B represents our intercept and slope 139 | #' d <- s$dat 140 | #' d$temperature <- temperature 141 | #' library(ggplot2) 142 | #' ggplot(s$dat, aes(lon, lat, colour = y)) + 143 | #' viridis::scale_colour_viridis() + 144 | #' geom_point(size = 3) 145 | #' m_spatial <- glmmfields(y ~ temperature, data = d, family = Gamma(link = "log"), 146 | #' lat = "lat", lon = "lon", nknots = 12, iter = 2000, chains = 2, 147 | #' prior_beta = student_t(100, 0, 1), prior_intercept = student_t(100, 0, 5), 148 | #' control = list(adapt_delta = 0.95)) 149 | #' } 150 | 151 | glmmfields <- function(formula, data, lon, lat, 152 | time = NULL, 153 | nknots = 15L, 154 | prior_gp_theta = half_t(3, 0, 5), 155 | prior_gp_sigma = half_t(3, 0, 5), 156 | prior_sigma = half_t(3, 0, 5), 157 | prior_rw_sigma = half_t(3, 0, 5), 158 | prior_intercept = student_t(3, 0, 10), 159 | prior_beta = student_t(3, 0, 3), 160 | prior_phi = student_t(1000, 0, 0.5), 161 | fixed_df_value = 1000, 162 | fixed_phi_value = 0, 163 | estimate_df = FALSE, 164 | estimate_ar = FALSE, 165 | family = gaussian(link = "identity"), 166 | binomial_N = NULL, 167 | covariance = c("squared-exponential", "exponential", "matern"), 168 | matern_kappa = 0.5, 169 | algorithm = c("sampling", "meanfield"), 170 | year_re = FALSE, 171 | nb_lower_truncation = 0, 172 | control = list(adapt_delta = 0.9), 173 | save_log_lik = FALSE, 174 | df_lower_bound = 2, 175 | cluster = c("pam", "kmeans"), 176 | offset = NULL, 177 | ...) { 178 | 179 | # argument checks: 180 | covariance <- match.arg(covariance) 181 | algorithm <- match.arg(algorithm) 182 | cluster <- match.arg(cluster) 183 | 184 | gp_sigma_scaling_factor <- 1 # removed option above 185 | 186 | is.count(nb_lower_truncation) 187 | assert_that(nb_lower_truncation >= 0) 188 | assert_that(fixed_df_value >= 1) 189 | is.number(fixed_phi_value) 190 | assert_that(matern_kappa %in% c(0.5, 1.5, 2.5)) 191 | assert_that(is.logical(save_log_lik)) 192 | assert_that(is.logical(estimate_df)) 193 | assert_that(is.logical(estimate_ar)) 194 | assert_that(is.logical(year_re)) 195 | assert_that(is.list(control)) 196 | 197 | family <- check_family(family) 198 | obs_error <- tolower(family$family) 199 | assert_that(obs_error[[1]] %in% c( 200 | "gaussian", "gamma", "poisson", "nbinom2", 201 | "binomial", "lognormal" 202 | )) 203 | 204 | if (covariance == "matern" && !matern_kappa %in% c(1.5, 2.5)) { 205 | warning( 206 | "Matern covariance specified, but Matern kappa not 1.5 or 2.5", 207 | ": defaulting to 0.5, or exponential" 208 | ) 209 | covariance <- "exponential" 210 | } 211 | 212 | if (nb_lower_truncation > 0) { 213 | warning( 214 | "Lower truncation with negative binomial has not been ", 215 | "extensively tested and calculation of log likelihood for information ", 216 | "criteria purposes is likely to be incorrect." 217 | ) 218 | } 219 | 220 | mf <- model.frame(formula, data) 221 | X <- model.matrix(formula, mf) 222 | y <- model.response(mf, "numeric") 223 | fixed_intercept <- ncol(X) == 0 224 | 225 | if (is.null(offset)) offset <- rep(0, length(y)) 226 | 227 | if (is.null(time)) { 228 | data$null_time_ <- 1 229 | time <- "null_time_" 230 | } 231 | 232 | if (is.null(binomial_N)) { 233 | data$null_N_ <- 1 234 | binomial_N <- "null_N_" 235 | } 236 | 237 | if ("station" %in% names(list(...))) { 238 | stop( 239 | "The 'station' argument is no longer needed when calling glmmfields().", 240 | "Please remove it." 241 | ) 242 | } 243 | data$station_ <- paste(data[[lon]], data[[lat]]) 244 | 245 | # user inputs raw data. this function formats it for STAN 246 | data_list <- format_data( 247 | data = data, y = y, X = X, time = time, 248 | lon = lon, lat = lat, station = "station_", nknots = nknots, 249 | covariance = covariance, 250 | fixed_intercept = fixed_intercept, cluster = cluster 251 | ) 252 | stan_data <- data_list$spatglm_data 253 | data_knots <- data_list$knots 254 | 255 | obs_model <- switch(obs_error[[1]], gaussian = 1L, gamma = 0L, nbinom2 = 2L, 256 | binomial = 4L, poisson = 5L, lognormal = 6L, 257 | stop("observation model ", obs_error[[1]], " is not defined.") 258 | ) 259 | 260 | est_temporalRE <- if (year_re) 1L else 0L 261 | 262 | stan_data <- c( 263 | stan_data, 264 | list( 265 | prior_gp_theta = parse_t_prior(prior_gp_theta), 266 | prior_gp_sigma = parse_t_prior(prior_gp_sigma), 267 | prior_sigma = parse_t_prior(prior_sigma), 268 | prior_intercept = parse_t_prior(prior_intercept), 269 | prior_rw_sigma = parse_t_prior(prior_rw_sigma), 270 | prior_beta = parse_t_prior(prior_beta), 271 | prior_phi = parse_t_prior(prior_phi), 272 | input_offset = offset, 273 | cov_func = switch(covariance, 274 | exponential = 0L, 275 | `squared-exponential` = 1L, 276 | matern = 2L, 277 | stop("covariance function ", covariance, " is not defined.") 278 | ), 279 | obs_model = obs_model, 280 | est_df = as.integer(estimate_df), 281 | est_phi = as.integer(estimate_ar), 282 | gamma_params = if (obs_error[[1]] == "gamma") 1L else 0L, 283 | norm_params = if (obs_error[[1]] %in% c("gaussian", "lognormal")) 1L else 0L, 284 | nb2_params = if (obs_error[[1]] == "nbinom2") 1L else 0L, 285 | fixed_df_value = fixed_df_value[[1]], 286 | fixed_phi_value = fixed_phi_value, 287 | est_temporalRE = est_temporalRE, 288 | n_year_effects = if (year_re) stan_data$nT else 0L, 289 | lower_truncation = nb_lower_truncation, 290 | fixed_intercept = as.integer(fixed_intercept), 291 | matern_kappa = matern_kappa, 292 | gp_sigma_scaling_factor = gp_sigma_scaling_factor, 293 | nW = if (fixed_df_value[[1]] > 999 && !estimate_df) 0L else stan_data$nT, 294 | df_lower_bound = df_lower_bound, 295 | binomialN = data[,binomial_N] 296 | ) 297 | ) 298 | 299 | if (obs_model %in% c(2L, 4L, 5L)) { # integers: NB2 or binomial or poisson obs model 300 | stan_data <- c(stan_data, list(y_int = stan_data$y)) 301 | } else { 302 | stan_data <- c(stan_data, list(y_int = rep(0L, stan_data$N))) 303 | } 304 | 305 | sampling_args <- list( 306 | object = stanmodels$glmmfields, 307 | data = stan_data, 308 | pars = stan_pars( 309 | obs_error = obs_error, estimate_df = estimate_df, 310 | est_temporalRE = est_temporalRE, estimate_ar = estimate_ar, 311 | fixed_intercept = fixed_intercept, save_log_lik = save_log_lik 312 | ), 313 | control = control, ... 314 | ) 315 | 316 | if (algorithm == "meanfield") { 317 | sampling_args$chains <- NULL 318 | sampling_args$control <- NULL 319 | m <- do.call(vb, sampling_args) 320 | } else { 321 | m <- do.call(sampling, sampling_args) 322 | } 323 | 324 | out <- list( 325 | model = m, 326 | knots = tibble::as_tibble(as.data.frame(data_knots)), 327 | y = y, X = X, 328 | data = tibble::as_tibble(data), formula = formula, 329 | covariance = covariance, matern_kappa = matern_kappa, 330 | lon = lon, lat = lat, 331 | time = time, year_re = year_re, 332 | station = data_list$stationID, obs_model = obs_model, 333 | offset = offset, 334 | fixed_intercept = fixed_intercept, family = family 335 | ) 336 | out <- structure(out, class = "glmmfields") 337 | } 338 | -------------------------------------------------------------------------------- /R/format_data.R: -------------------------------------------------------------------------------- 1 | #' Format data for fitting a glmmfields model 2 | #' 3 | #' @param data A data frame to be formatted 4 | #' @param y A numeric vector of the response 5 | #' @param X A matrix of the predictors 6 | #' @param time A character object giving the name of the time column 7 | #' @param lon A character object giving the name of the longitude column 8 | #' @param lat A character object giving the name of the latitude column 9 | #' @param station A numeric vector giving the integer ID of the station 10 | #' @param nknots The number of knots 11 | #' @param covariance The type of covariance function 12 | #' @param fixed_intercept Should the intercept be fixed? 13 | #' @param cluster The type of clustering algorithm used to determine the not locations. 14 | #' \code{"pam"} = \code{\link[cluster]{pam}}. \code{kmeans} is faster for large datasets. 15 | format_data <- function(data, y, X, time, 16 | lon = "lon", lat = "lat", 17 | station = NULL, nknots = 25L, 18 | covariance = c("squared-exponential", 19 | "exponential", "matern"), 20 | fixed_intercept = FALSE, 21 | cluster = c("pam", "kmeans")) { 22 | data <- as.data.frame(data) 23 | cluster <- match.arg(cluster) 24 | covariance <- match.arg(covariance) 25 | 26 | if (is.null(time)) { 27 | data$time <- 1 28 | time <- "time" 29 | } 30 | yearID <- as.numeric(data[, time, drop = TRUE]) 31 | yearID <- yearID - min(yearID) + 1 # convert to 1, ..., nT 32 | if (is.null(station)) { 33 | stationID <- seq_len(nrow(data)) 34 | } else { 35 | stationID <- as.numeric(forcats::as_factor(data[, station, drop = TRUE])) 36 | } 37 | data$stationID <- stationID 38 | 39 | # if stationID is duplicated, perform clustering on the subset of data 40 | if (length(unique(stationID)) < length(stationID)) { 41 | first_instance <- which(!duplicated(stationID)) 42 | 43 | if (cluster == "pam") { 44 | knots <- cluster::pam(data[first_instance, c(lon, lat), drop = FALSE], nknots)$medoids 45 | } else { 46 | if (cluster == "kmeans") { 47 | knots <- stats::kmeans(data[first_instance, c(lon, lat), drop = FALSE], nknots)$centers 48 | } else { 49 | # each point is unique, predictive process not used 50 | knots <- data[first_instance, c(lon, lat)] 51 | } 52 | } 53 | 54 | distKnots <- as.matrix(dist(knots)) 55 | ix <- sort(data[first_instance, "stationID"], index.return = TRUE)$ix 56 | 57 | # Calculate distance from knots to grid 58 | distAll <- as.matrix(stats::dist(rbind(data[first_instance, c(lon, lat)][ix, ], knots))) 59 | nLocs <- length(first_instance) 60 | } else { 61 | if (cluster == "pam") { 62 | knots <- cluster::pam(data[, c(lon, lat), drop = FALSE], nknots)$medoids 63 | } else { 64 | if (cluster == "kmeans") { 65 | knots <- stats::kmeans(data[, c(lon, lat), drop = FALSE], nknots)$centers 66 | } else { 67 | # each point is unique, predictive process not used 68 | knots <- data[, c(lon, lat), drop = FALSE] 69 | } 70 | } 71 | distKnots <- as.matrix(dist(knots)) 72 | 73 | # Calculate distance from knots to grid 74 | distAll <- as.matrix(stats::dist(rbind(data[, c(lon, lat), drop = FALSE], knots))) 75 | nLocs <- nrow(data) 76 | } 77 | 78 | if (covariance == "squared-exponential") { 79 | distKnots <- distKnots^2 # squared distances 80 | distAll <- distAll^2 # squared distances 81 | } 82 | 83 | # this is the transpose of the lower left corner 84 | distKnots21 <- t(distAll[-seq_len(nLocs), -seq(nLocs + 1, ncol(distAll))]) 85 | 86 | # create list for Stan 87 | spatglm_data <- list( 88 | nKnots = nknots, 89 | nLocs = nLocs, 90 | nT = max(yearID), 91 | N = length(y), 92 | stationID = stationID, 93 | yearID = yearID, 94 | y = y, 95 | distKnots = distKnots, 96 | distKnots21 = distKnots21, 97 | X = X, 98 | nCov = if(fixed_intercept) 0 else ncol(X) 99 | ) 100 | list(spatglm_data = spatglm_data, knots = knots) 101 | } 102 | -------------------------------------------------------------------------------- /R/glmmfields-package.R: -------------------------------------------------------------------------------- 1 | #' The 'glmmfields' package. 2 | #' 3 | #' @description Implements Bayesian spatial and spatiotemporal models that 4 | #' optionally allow for extreme spatial deviations through time. 'glmmfields' 5 | #' uses a predictive process approach with random fields implemented through a 6 | #' multivariate-t distribution instead of the usual multivariate normal. 7 | #' Sampling is conducted with 'Stan'. 8 | #' 9 | #' @docType package 10 | #' @name glmmfields-package 11 | #' @useDynLib glmmfields, .registration = TRUE 12 | #' @import methods 13 | #' @import Rcpp 14 | #' @import rstantools 15 | #' @importFrom rstan sampling 16 | #' 17 | #' @references 18 | #' Stan Development Team (2018). RStan: the R interface to Stan. R package 19 | #' version 2.18.2. http://mc-stan.org 20 | #' 21 | NULL 22 | 23 | if (getRversion() >= "2.15.1") { 24 | utils::globalVariables(c("pt", "time")) 25 | } 26 | -------------------------------------------------------------------------------- /R/methods.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @import methods 3 | print.glmmfields <- function(x, pars = c("spatialEffectsKnots", "log_lik"), 4 | include = FALSE, ...) { 5 | print(x$model, pars = pars, include = include, ...) 6 | } 7 | 8 | #' Tidy model output 9 | #' 10 | #' @param x Output from [glmmfields()] 11 | #' @param ... Other arguments 12 | #' @export 13 | #' @rdname tidy 14 | tidy <- function(x, ...) { 15 | UseMethod("tidy") 16 | } 17 | 18 | #' @importFrom broom tidy 19 | #' @export 20 | #' @rdname tidy 21 | tidy.glmmfields <- function(x, ...) { 22 | broom.mixed::tidyMCMC(x$model, ...) 23 | } 24 | 25 | #' Return LOO information criteria 26 | #' 27 | #' Extract the LOOIC (leave-one-out information criterion) using 28 | #' [loo::loo()]. 29 | #' 30 | #' @param x Output from [glmmfields()]. 31 | #' Must be fit with `save_log_lik = TRUE`, which is *not* the default. 32 | #' @param ... Arguments for [loo::relative_eff()] and [loo::loo.array()]. 33 | #' 34 | #' @export 35 | #' @examples 36 | #' \donttest{ 37 | #' set.seed(1) 38 | #' s <- sim_glmmfields(n_draws = 12, n_knots = 12, gp_theta = 1.5, 39 | #' gp_sigma = 0.2, sd_obs = 0.2) 40 | #' # options(mc.cores = parallel::detectCores()) # for parallel processing 41 | #' 42 | #' # save_log_lik defaults to FALSE to save space but is needed for loo(): 43 | #' m <- glmmfields(y ~ 0, time = "time", 44 | #' lat = "lat", lon = "lon", data = s$dat, 45 | #' nknots = 12, iter = 1000, chains = 4, seed = 1, 46 | #' save_log_lik = TRUE) 47 | #' loo(m) 48 | #' } 49 | #' @rdname loo 50 | loo.glmmfields <- function(x, ...) { 51 | log_lik <- loo::extract_log_lik(x$model, merge_chains = FALSE) 52 | rel_eff <- loo::relative_eff(exp(log_lik), ...) 53 | loo::loo.array(log_lik, 54 | r_eff = rel_eff, 55 | save_psis = FALSE, ...) 56 | } 57 | 58 | #' @name loo 59 | #' @rdname loo 60 | #' @export 61 | #' @importFrom loo loo 62 | NULL 63 | -------------------------------------------------------------------------------- /R/plot.R: -------------------------------------------------------------------------------- 1 | #' Plot predictions from an glmmfields model 2 | #' 3 | #' @param x An object returned by \code{\link{glmmfields}} 4 | #' @param type Type of plot 5 | #' @param link Logical: should the plots be made on the link scale 6 | #' or on the natural scale? 7 | #' @param ... Other arguments passed to \code{\link{predict.glmmfields}} 8 | #' 9 | #' @importFrom ggplot2 aes ggplot facet_wrap geom_point .data 10 | #' scale_color_gradient2 geom_smooth geom_hline facet_wrap 11 | #' @export 12 | #' @examples 13 | #' \donttest{ 14 | #' # Spatiotemporal example: 15 | #' set.seed(1) 16 | #' s <- sim_glmmfields(n_draws = 12, n_knots = 12, gp_theta = 1.5, 17 | #' gp_sigma = 0.2, sd_obs = 0.1) 18 | #' # options(mc.cores = parallel::detectCores()) # for parallel processing 19 | #' m <- glmmfields(y ~ 0, time = "time", 20 | #' lat = "lat", lon = "lon", data = s$dat, 21 | #' nknots = 12, iter = 600, chains = 1) 22 | #' x <- plot(m, type = "prediction") 23 | #' x 24 | #' x + ggplot2::scale_color_gradient2() 25 | #' plot(m, type = "spatial-residual") 26 | #' plot(m, type = "residual-vs-fitted") 27 | #' } 28 | 29 | plot.glmmfields <- function(x, 30 | type = c("prediction", "spatial-residual", "residual-vs-fitted"), 31 | link = TRUE, ...) { 32 | type <- match.arg(type) 33 | 34 | p <- predict(x, type = ifelse(link, "link", "response"), ...) 35 | d <- data.frame(x$data, p) 36 | y <- x$y 37 | if (link) y <- do.call(x$family$link, list(y)) 38 | d$residual <- y - p$estimate 39 | 40 | g <- NULL 41 | 42 | if (type == "prediction") { 43 | g <- ggplot(d, aes(.data[[x$lon]], .data[[x$lat]], colour = .data[["estimate"]])) + 44 | geom_point(size = 2) + 45 | facet_wrap(x$time) 46 | } 47 | 48 | if (type == "spatial-residual") { 49 | g <- ggplot(d, aes(.data[[x$lon]], .data[[x$lat]], colour = .data[["residual"]])) + 50 | geom_point(size = 2) + 51 | scale_color_gradient2() + 52 | facet_wrap(x$time) 53 | } 54 | 55 | if (type == "residual-vs-fitted") { 56 | g <- ggplot(d, aes(.data[["estimate"]], .data[["residual"]])) + 57 | geom_point() + 58 | facet_wrap(x$time) + 59 | geom_hline(yintercept = 0, lty = 2) + 60 | geom_smooth(method = "loess", se = FALSE, colour = "red") 61 | } 62 | 63 | g 64 | } 65 | -------------------------------------------------------------------------------- /R/predict.R: -------------------------------------------------------------------------------- 1 | #' Predict from a glmmfields model 2 | #' 3 | #' These functions extract posterior draws or credible intervals. The helper 4 | #' functions are named to match those in the \pkg{rstanarm} package and call the 5 | #' function `predict()` with appropriate argument values. 6 | #' 7 | #' @param object An object returned by [glmmfields()]. 8 | #' @param newdata Optionally, a data frame to predict on 9 | #' @param interval Type of interval calculation. Same as for 10 | #' [stats::predict.lm()]. 11 | #' @param estimate_method Method for computing point estimate ("mean" or 12 | #' "median") 13 | #' @param conf_level Probability level for the credible intervals. 14 | #' @param type Whether the predictions are returned on "link" scale or 15 | #' "response" scale (Same as for [stats::predict.glm()]). 16 | #' @param return_mcmc Logical. Should the full MCMC draws be returned for the 17 | #' predictions? 18 | #' @param offset Optional offset vector to be used in prediction. 19 | #' @param iter Number of MCMC iterations to draw. Defaults to all. 20 | #' @param ... Ignored currently 21 | #' 22 | #' @importFrom stats median quantile rgamma rnbinom 23 | #' @importFrom assertthat assert_that 24 | #' 25 | #' @examples 26 | #' \donttest{ 27 | #' library(ggplot2) 28 | #' 29 | #' # simulate: 30 | #' set.seed(1) 31 | #' s <- sim_glmmfields( 32 | #' n_draws = 12, n_knots = 12, gp_theta = 2.5, 33 | #' gp_sigma = 0.2, sd_obs = 0.1 34 | #' ) 35 | #' 36 | #' # fit: 37 | #' # options(mc.cores = parallel::detectCores()) # for parallel processing 38 | #' m <- glmmfields(y ~ 0, 39 | #' data = s$dat, time = "time", 40 | #' lat = "lat", lon = "lon", 41 | #' nknots = 12, iter = 800, chains = 1 42 | #' ) 43 | #' 44 | #' # Predictions: 45 | #' # Link scale credible intervals: 46 | #' p <- predict(m, type = "link", interval = "confidence") 47 | #' head(p) 48 | #' 49 | #' # Prediction intervals on new observations (include observation error): 50 | #' p <- predictive_interval(m) 51 | #' head(p) 52 | #' 53 | #' # Posterior prediction draws: 54 | #' p <- posterior_predict(m, iter = 100) 55 | #' dim(p) # rows are iterations and columns are data elements 56 | #' 57 | #' # Draws from the linear predictor (not in link space): 58 | #' p <- posterior_linpred(m, iter = 100) 59 | #' dim(p) # rows are iterations and columns are data elements 60 | #' 61 | #' # Use the `tidy` method to extract parameter estimates as a data frame: 62 | #' head(tidy(m, conf.int = TRUE, conf.method = "HPDinterval")) 63 | #' 64 | #' # Make predictions on a fine-scale spatial grid: 65 | #' pred_grid <- expand.grid( 66 | #' lat = seq(min(s$dat$lat), max(s$dat$lat), length.out = 25), 67 | #' lon = seq(min(s$dat$lon), max(s$dat$lon), length.out = 25), 68 | #' time = unique(s$dat$time) 69 | #' ) 70 | #' pred_grid$prediction <- predict(m, 71 | #' newdata = pred_grid, type = "response", iter = 100, 72 | #' estimate_method = "median", offset = rep(0, nrow(pred_grid)) 73 | #' )$estimate 74 | #' 75 | #' ggplot(pred_grid, aes(lon, lat, fill = prediction)) + 76 | #' facet_wrap(~time) + 77 | #' geom_raster() + 78 | #' scale_fill_gradient2() 79 | #' } 80 | #' @name predict 81 | NULL 82 | 83 | #' @name predictive_interval 84 | #' @rdname predict 85 | #' @export 86 | #' @importFrom rstantools predictive_interval 87 | NULL 88 | 89 | #' @name posterior_linpred 90 | #' @rdname predict 91 | #' @export 92 | #' @importFrom rstantools posterior_linpred 93 | NULL 94 | 95 | #' @name posterior_predict 96 | #' @rdname predict 97 | #' @export 98 | #' @importFrom rstantools posterior_predict 99 | NULL 100 | 101 | #' @rdname predict 102 | #' @export 103 | predictive_interval.glmmfields <- function(object, ...) { 104 | predict.glmmfields(object, interval = "prediction", type = "response", 105 | return_mcmc = FALSE, ...) 106 | } 107 | 108 | #' @rdname predict 109 | #' @export 110 | posterior_linpred.glmmfields <- function(object, ...) { 111 | predict.glmmfields(object, interval = "confidence", type = "response", 112 | return_mcmc = TRUE, ...) 113 | } 114 | 115 | #' @rdname predict 116 | #' @export 117 | posterior_predict.glmmfields <- function(object, ...) { 118 | predict.glmmfields(object, interval = "prediction", type = "response", 119 | return_mcmc = TRUE, ...) 120 | } 121 | 122 | #' @importFrom stats predict 123 | #' @rdname predict 124 | #' @export 125 | predict.glmmfields <- function(object, newdata = NULL, 126 | estimate_method = c("median", "mean"), 127 | conf_level = 0.95, 128 | interval = c("confidence", "prediction"), 129 | type = c("link", "response"), 130 | return_mcmc = FALSE, 131 | offset = NULL, 132 | iter = "all", ...) { 133 | estimate_method <- match.arg(estimate_method) 134 | interval <- match.arg(interval) 135 | type <- match.arg(type) 136 | 137 | assert_that(is.numeric(conf_level)) 138 | assert_that(identical(length(conf_level), 1L)) 139 | assert_that(conf_level > 0 && conf_level < 1) 140 | assert_that(identical(class(object), "glmmfields")) 141 | 142 | if (interval == "prediction" && type != "response") { 143 | stop("type must be 'response' if interval is 'prediction") 144 | } 145 | 146 | obs_model <- object$obs_model 147 | 148 | if (is.null(newdata)) offset <- object$offset 149 | if (is.null(offset) && !is.null(newdata)) offset <- rep(0, nrow(newdata)) 150 | if (!is.null(newdata) && !is.null(object$offset)) { 151 | if (is.null(offset)) stop("Missing `offset` argument.", call. = FALSE) 152 | } 153 | 154 | # newdata is df with time, y, lon, lat 155 | # if null, defaults to data used to fit model 156 | if (is.null(newdata)) newdata <- object$data 157 | 158 | response <- all.vars(nlme::getResponseFormula(object$formula)) 159 | newdata[[response]] <- 1.0 160 | 161 | newdata <- tibble::as_tibble(newdata) 162 | # create model.matrix() as in fitting function, only with newdata 163 | X <- model.matrix(object$formula, 164 | model.frame(object$formula, newdata, na.action = na.omit)) 165 | 166 | .missing <- colnames(object$X)[!colnames(object$X) %in% colnames(X)] 167 | if (length(.missing) > 0L) 168 | stop(paste(.missing, collapse = ", "), " are missing in `newdata`.") 169 | 170 | if (nrow(X) < nrow(newdata)) 171 | stop("Some predictors in newdata had NA values.") 172 | 173 | time <- object$time 174 | knots <- object$knots 175 | 176 | dist_knots <- as.matrix(dist(knots)) 177 | 178 | # Calculate distance from knots to grid 179 | dist_all <- as.matrix(stats::dist(rbind( 180 | newdata[, c(object$lon, object$lat)], 181 | knots 182 | ))) 183 | n_locs <- nrow(newdata) 184 | 185 | # this is the transpose of the lower left corner 186 | dist_knots21 <- t( 187 | dist_all[-seq_len(n_locs), -seq(n_locs + 1, ncol(dist_all))] 188 | ) 189 | 190 | # extract mcmc pars 191 | pars <- rstan::extract(object$model, permuted = TRUE) 192 | ## 193 | if (iter == "all") { 194 | mcmc.i <- seq_len(length(pars$lp__)) 195 | } else { 196 | mcmc.i <- base::sample(seq_len(length(pars$lp__)), size = iter) 197 | } 198 | mcmc_draws <- length(mcmc.i) 199 | pred_values <- matrix(NA, n_locs, mcmc_draws) 200 | for (i in seq_len(mcmc_draws)) { 201 | # create cov matrix at knots 202 | if (object$covariance == "exponential") { 203 | covmat <- pars$gp_sigma[mcmc.i[i]] * 204 | exp(-dist_knots / pars$gp_theta[mcmc.i[i]]) 205 | covmat21 <- pars$gp_sigma[mcmc.i[i]] * 206 | exp(-dist_knots21 / pars$gp_theta[mcmc.i[i]]) 207 | } 208 | if (object$covariance == "squared-exponential") { 209 | covmat <- pars$gp_sigma[mcmc.i[i]] * 210 | exp(-(dist_knots^2) / (2 * pars$gp_theta[mcmc.i[i]]^2)) 211 | covmat21 <- pars$gp_sigma[mcmc.i[i]] * 212 | exp(-(dist_knots21^2) / (2 * pars$gp_theta[mcmc.i[i]]^2)) 213 | } 214 | if (object$covariance == "matern") { 215 | if (object$matern_kappa == 1.5) { 216 | transformed_dist <- sqrt(3) * dist_knots / pars$gp_theta[mcmc.i[i]] 217 | covmat <- 218 | pars$gp_sigma[mcmc.i[i]] * 219 | (1 + transformed_dist) * exp(-transformed_dist) 220 | 221 | transformed_dist <- sqrt(3) * dist_knots21 / pars$gp_theta[mcmc.i[i]] 222 | covmat21 <- 223 | pars$gp_sigma[mcmc.i[i]] * 224 | (1 + transformed_dist) * exp(-transformed_dist) 225 | } 226 | if (object$matern_kappa == 2.5) { 227 | transformed_dist <- sqrt(5) * dist_knots / pars$gp_theta[mcmc.i[i]] 228 | covmat <- 229 | pars$gp_sigma[mcmc.i[i]] * 230 | (1 + transformed_dist + (transformed_dist^2) / 3) * exp(-transformed_dist) 231 | 232 | transformed_dist <- sqrt(5) * dist_knots21 / pars$gp_theta[mcmc.i[i]] 233 | covmat21 <- 234 | pars$gp_sigma[mcmc.i[i]] * 235 | (1 + transformed_dist + (transformed_dist^2) / 3) * exp(-transformed_dist) 236 | } 237 | } 238 | # these are projected spatial effects, dim = new data points x time 239 | spat_eff_knots_i <- pars$spatialEffectsKnots[mcmc.i[i], , ] 240 | if (is.matrix(spat_eff_knots_i)) { 241 | spat_eff_knots_i <- t(spat_eff_knots_i) 242 | } else { # these are for one time slice and are a vector 243 | spat_eff_knots_i <- t(t(spat_eff_knots_i)) 244 | } 245 | spat_effects <- covmat21 %*% solve(covmat) %*% spat_eff_knots_i 246 | 247 | rows <- seq_len(n_locs) 248 | 249 | if (identical(time, "null_time_")) { 250 | newdata$null_time_ <- 1 251 | time <- "null_time_" 252 | } 253 | cols <- as.numeric(as.factor(newdata[, time][[1]])) 254 | # check this for > 1 year. B will also have to be modified 255 | if (!object$year_re) { 256 | if (!object$fixed_intercept) { 257 | pred_values[, i] <- X %*% matrix(pars$B[mcmc.i[i], ], ncol = 1) + 258 | spat_effects[cbind(rows, cols)] 259 | } else { 260 | pred_values[, i] <- spat_effects[cbind(rows, cols)] 261 | } 262 | } else { 263 | pred_values[, i] <- spat_effects[cbind(rows, cols)] + 264 | pars$yearEffects[mcmc.i[i], ][cols] 265 | } 266 | } 267 | 268 | mcmc_draws <- ncol(pred_values) 269 | 270 | # if type == link, don't include observation/data model. 271 | 272 | # If predictions other than on link scale, use observation model and link to 273 | # generate (1) confidence intervals on mean or (2) prediction intervals 274 | # including obs error 275 | 276 | pred_values <- pred_values + offset 277 | 278 | if (type == "response") { 279 | # gamma or NB2 or poisson: 280 | if (obs_model %in% c(0, 2, 5, 6)) pred_values <- exp(pred_values) 281 | 282 | if (obs_model == 1) { 283 | # normal, sigma is returned 284 | pp <- t(apply(pred_values, 1, function(x) 285 | stats::rnorm(mcmc_draws, mean = x, sd = pars$sigma[, 1]))) 286 | } 287 | 288 | # binomial (plogis = inverse logit): 289 | if (obs_model == 4) pred_values <- stats::plogis(pred_values) 290 | 291 | if (obs_model == 0) { 292 | # gamma, CV is returned; gammaA = 1/(CV*CV) 293 | pp <- t(apply(pred_values, 1, function(x) 294 | stats::rgamma(mcmc_draws, 295 | shape = 1 / (pars$CV[, 1]^2), 296 | rate = 1 / (pars$CV[, 1]^2) / x 297 | ))) 298 | } 299 | if (obs_model == 2) { 300 | # negative binomial, phi returned 301 | pp <- t(apply(pred_values, 1, function(x) 302 | stats::rnbinom(mcmc_draws, mu = x, size = pars$nb2_phi[, 1]))) 303 | } 304 | if (obs_model == 4) { 305 | # binomial 306 | pp <- t(apply(pred_values, 1, function(x) 307 | stats::rbinom(mcmc_draws, size = 1, prob = x))) 308 | } 309 | if (obs_model == 5) { 310 | # poisson 311 | pp <- t(apply(pred_values, 1, function(x) 312 | stats::rpois(mcmc_draws, lambda = x))) 313 | } 314 | if (obs_model == 6) { 315 | # lognormal, sigma is returned 316 | pp <- t(apply(pred_values, 1, function(x) 317 | stats::rlnorm(mcmc_draws, 318 | meanlog = log(x), 319 | sdlog = pars$sigma[, 1] 320 | ))) 321 | } 322 | } 323 | 324 | est_method <- switch(estimate_method[[1]], median = median, mean = mean) 325 | out <- data.frame(estimate = apply(pred_values, 1, est_method)) 326 | 327 | if (interval == "confidence") { 328 | out$conf_low <- apply(pred_values, 1, quantile, 329 | probs = (1 - conf_level) / 2 330 | ) 331 | out$conf_high <- apply(pred_values, 1, quantile, 332 | probs = 1 - (1 - conf_level) / 2 333 | ) 334 | } 335 | if (interval == "prediction" && type == "response") { 336 | out$conf_low <- apply(pp, 1, quantile, probs = (1 - conf_level) / 2) 337 | out$conf_high <- apply(pp, 1, quantile, probs = 1 - (1 - conf_level) / 2) 338 | } 339 | 340 | out <- tibble::as_tibble(out) 341 | if (return_mcmc) out <- t(pred_values) # to match rstanarm generic methods 342 | out 343 | } 344 | -------------------------------------------------------------------------------- /R/priors.R: -------------------------------------------------------------------------------- 1 | #' Student-t and half-t priors 2 | #' 3 | #' Student-t and half-t priors. Note that this can be used to represent an 4 | #' effectively normal distribution prior by setting the first argument (the 5 | #' degrees of freedom parameter) to a large value (roughly 50 or above). 6 | #' 7 | #' @param df Degrees of freedom parameter 8 | #' @param location Location parameter 9 | #' @param scale Scale parameter 10 | #' @export 11 | #' @rdname priors 12 | #' @examples 13 | #' student_t(3, 0, 1) 14 | student_t <- function(df = 3, location = 0, scale = 1) { 15 | stopifnot( 16 | is.numeric(df), is.numeric(location), is.numeric(scale), 17 | df >= 1, scale > 0 18 | ) 19 | list(dist = "t", df = df, location = location, scale = scale) 20 | } 21 | 22 | #' @export 23 | #' @rdname priors 24 | #' @examples 25 | #' half_t(3, 0, 1) 26 | half_t <- function(df = 3, location = 0, scale = 1) { 27 | if (location != 0) warning("half-t location != 0") 28 | ht <- student_t(df, location, scale) 29 | ht[[1]] <- "half-t" 30 | ht 31 | } 32 | 33 | parse_t_prior <- function(x) { 34 | as.vector(unlist(x)[-1], mode = "numeric") 35 | } 36 | -------------------------------------------------------------------------------- /R/sim.R: -------------------------------------------------------------------------------- 1 | #' Simulate a random field with a MVT distribution 2 | #' 3 | #' @param n_knots The number of knots 4 | #' @param n_draws The number of draws (for example, the number of years) 5 | #' @param gp_theta The Gaussian Process scale parameter 6 | #' @param gp_sigma The Gaussian Process variance parameter 7 | #' @param mvt Logical: MVT? (vs. MVN) 8 | #' @param df The degrees of freedom parameter for the MVT distribution 9 | #' @param seed The random seed value 10 | #' @param n_data_points The number of data points per draw 11 | #' @param sd_obs The observation process scale parameter 12 | #' @param covariance The covariance function of the Gaussian process 13 | #' ("squared-exponential", "exponential", "matern") 14 | #' @param matern_kappa The optional matern parameter. Can be 1.5 or 2.5. Values 15 | #' of 0.5 equivalent to exponential model. 16 | #' @param obs_error The observation error distribution 17 | #' @param B A vector of parameters. The first element is the intercept 18 | #' @param phi The auto regressive parameter on the mean of the random field knots 19 | #' @param X The model matrix 20 | #' @param g Grid of points 21 | #' @export 22 | #' @importFrom ggplot2 ggplot facet_wrap geom_point scale_color_gradient2 23 | #' @examples 24 | #' s <- sim_glmmfields(n_draws = 12, n_knots = 12, gp_theta = 1.5, 25 | #' gp_sigma = 0.2, sd_obs = 0.2) 26 | #' names(s) 27 | sim_glmmfields <- function(n_knots = 15, n_draws = 10, gp_theta = 0.5, 28 | gp_sigma = 0.2, mvt = TRUE, df = 1e6, 29 | seed = NULL, n_data_points = 100, 30 | sd_obs = 0.1, 31 | covariance = c("squared-exponential", "exponential", "matern"), 32 | matern_kappa = 0.5, 33 | obs_error = c("normal", "gamma", "poisson", "nb2", "binomial", "lognormal"), 34 | B = c(0), phi = 0, X = rep(1, n_draws * n_data_points), 35 | g = data.frame( 36 | lon = runif(n_data_points, 0, 10), 37 | lat = runif(n_data_points, 0, 10) 38 | )) { 39 | obs_error <- match.arg(obs_error) 40 | covariance <- match.arg(covariance) 41 | 42 | station_id <- seq_len(n_data_points) 43 | n_pts <- nrow(g) 44 | 45 | if (!is.null(seed)) { 46 | set.seed(seed) 47 | } 48 | 49 | # cluster analysis to determine knot locations 50 | knots <- jitter(cluster::pam(g, n_knots)$medoids) 51 | distKnots <- as.matrix(dist(knots)) 52 | 53 | if (covariance == "matern") { 54 | if (matern_kappa %in% c(1.5, 2.5) == FALSE) { 55 | matern_kappa <- 0.5 56 | covariance[[1]] <- "exponential" 57 | } 58 | else { 59 | if (matern_kappa == 1.5) { 60 | dist_knots_sq <- distKnots # NOT squared distances despite name 61 | transformed_dist <- sqrt(3) * dist_knots_sq / gp_theta 62 | cor_knots <- (1 + transformed_dist) * exp(-transformed_dist) 63 | } 64 | if (matern_kappa == 2.5) { 65 | dist_knots_sq <- distKnots # NOT squared distances despite name 66 | transformed_dist <- sqrt(5) * dist_knots_sq / gp_theta 67 | cor_knots <- (1 + transformed_dist + (transformed_dist^2) / 3) * 68 | exp(-transformed_dist) 69 | } 70 | } 71 | } 72 | if (covariance == "squared-exponential") { 73 | dist_knots_sq <- distKnots^2 # squared distances 74 | cor_knots <- exp(-dist_knots_sq / (2 * gp_theta^2)) 75 | } 76 | if (covariance == "exponential") { 77 | dist_knots_sq <- distKnots # NOT squared distances despite name 78 | cor_knots <- exp(-dist_knots_sq / (gp_theta)) 79 | } 80 | 81 | sigma_knots <- gp_sigma^2 * cor_knots 82 | invsigma_knots <- base::solve(sigma_knots) 83 | 84 | # this is the transpose of the lower left corner 85 | if (covariance == "squared-exponential") { 86 | # calculate distance from knots to grid 87 | dist_all <- as.matrix(dist(rbind(g, knots)))^2 88 | dist_knots21_sq <- t( 89 | dist_all[-c(seq_len(n_pts)), -c((n_pts + 1):ncol(dist_all))] 90 | ) 91 | sigma21 <- gp_sigma^2 * exp(-dist_knots21_sq / (2 * gp_theta^2)) 92 | } 93 | if (covariance == "exponential") { 94 | # calculate distance from knots to grid 95 | dist_all <- as.matrix(dist(rbind(g, knots))) 96 | dist_knots21_sq <- t( # NOT squared distances despite name 97 | dist_all[-c(seq_len(n_pts)), -c((n_pts + 1):ncol(dist_all))] 98 | ) 99 | sigma21 <- gp_sigma^2 * exp(-dist_knots21_sq / (gp_theta)) 100 | } 101 | if (covariance[[1]] == "matern") { 102 | # calculate distance from knots to grid 103 | dist_all <- as.matrix(dist(rbind(g, knots))) 104 | dist_knots21_sq <- t( # NOT squared distances despite name 105 | dist_all[-c(seq_len(n_pts)), -c((n_pts + 1):ncol(dist_all))] 106 | ) 107 | if (matern_kappa == 1.5) { 108 | transformed_dist <- sqrt(3) * dist_knots21_sq / gp_theta 109 | sigma21 <- gp_sigma^2 * (1 + transformed_dist) * exp(-transformed_dist) 110 | } 111 | if (matern_kappa == 2.5) { 112 | transformed_dist <- sqrt(5) * dist_knots21_sq / gp_theta 113 | sigma21 <- gp_sigma^2 * (1 + transformed_dist + (transformed_dist^2) / 3) * 114 | exp(-transformed_dist) 115 | } 116 | } 117 | 118 | # generate vector of random effects 119 | # each 'draw' here is hypothetical draw from posterior 120 | # initialize: 121 | re_knots <- matrix(ncol = n_knots, nrow = n_draws) 122 | if (mvt) { 123 | re_knots[1, ] <- mvtnorm::rmvt(1, sigma = sigma_knots, df = df) 124 | } 125 | if (!mvt) { 126 | re_knots[1, ] <- mvtnorm::rmvnorm(1, sigma = sigma_knots) 127 | } 128 | # potentially with AR process: 129 | if (n_draws > 1) { 130 | for (i in seq(2, n_draws)) { 131 | if (mvt) { 132 | re_knots[i, ] <- mvtnorm::rmvt(1, 133 | # delta = ar * (re_knots[i - 1, ] - mean(re_knots[i - 1, ])), 134 | delta = phi * (re_knots[i - 1, ]), 135 | sigma = sigma_knots, df = df 136 | ) 137 | } 138 | if (!mvt) { 139 | re_knots[i, ] <- mvtnorm::rmvnorm(1, 140 | # mean = ar * (re_knots[i - 1, ] - mean(re_knots[i - 1, ])), 141 | mean = phi * (re_knots[i - 1, ]), 142 | sigma = sigma_knots 143 | ) 144 | } 145 | } 146 | } 147 | 148 | # project random effects to locations of the data 149 | proj <- t((sigma21 %*% invsigma_knots) %*% t(re_knots)) 150 | 151 | # multiply coefficients by the design matrix 152 | eta <- as.vector(B %*% t(X), mode = "double") 153 | eta_mat <- matrix(eta, nrow = nrow(proj), byrow = TRUE) 154 | 155 | # add the observation process: 156 | N <- ncol(proj) * nrow(proj) 157 | if (obs_error == "normal") { 158 | y <- proj + eta_mat + matrix( 159 | data = stats::rnorm(N, 0, sd_obs), 160 | ncol = ncol(proj), nrow = nrow(proj) 161 | ) 162 | } 163 | if (obs_error == "nb2") { 164 | y <- matrix( 165 | data = stats::rnbinom(N, mu = exp(proj + eta_mat), size = sd_obs), 166 | ncol = ncol(proj), nrow = nrow(proj) 167 | ) 168 | } 169 | if (obs_error == "gamma") { 170 | gamma_a <- 1 / (sd_obs^2) # sd_obs means CV here 171 | gamma_b <- gamma_a / exp(proj + eta_mat) 172 | y <- matrix( 173 | data = stats::rgamma(N, shape = gamma_a, rate = gamma_b), 174 | ncol = ncol(proj), nrow = nrow(proj) 175 | ) 176 | } 177 | if (obs_error == "binomial") { # plogis = inverse_logit 178 | y <- matrix(data = stats::rbinom(N, 179 | size = 1, 180 | prob = stats::plogis(proj + eta_mat) 181 | ), ncol = ncol(proj), nrow = nrow(proj)) 182 | } 183 | if (obs_error == "poisson") { 184 | y <- matrix( 185 | data = stats::rpois(N, lambda = exp(proj + eta_mat)), 186 | ncol = ncol(proj), nrow = nrow(proj) 187 | ) 188 | } 189 | if (obs_error == "lognormal") { 190 | y <- matrix( 191 | data = stats::rlnorm(N, meanlog = proj + eta_mat, sdlog = sd_obs), 192 | ncol = ncol(proj), nrow = nrow(proj) 193 | ) 194 | } 195 | # Reshape for output 196 | out <- reshape2::melt(y) 197 | names(out) <- c("time", "pt", "y") 198 | out <- dplyr::arrange(out, time, pt) 199 | out$lon <- rep(g$lon, n_draws) 200 | out$lat <- rep(g$lat, n_draws) 201 | out$station_id <- rep(station_id, n_draws) 202 | 203 | plot <- ggplot(out, aes(x = .data[["lon"]], y = .data[["lat"]], colour = .data[["y"]])) + 204 | facet_wrap(~time) + 205 | geom_point(size = 2) + 206 | scale_color_gradient2() 207 | 208 | list( 209 | knots = knots, 210 | re_knots = re_knots, 211 | proj = proj, 212 | dist_knots_sq = dist_knots_sq, 213 | dist_knots21_sq = dist_knots21_sq, 214 | sigma_knots = sigma_knots, 215 | g = g, 216 | plot = plot, 217 | dat = out 218 | ) 219 | } 220 | -------------------------------------------------------------------------------- /R/stan_pars.R: -------------------------------------------------------------------------------- 1 | #' Return a vector of parameters 2 | #' 3 | #' @param obs_error The observation error distribution 4 | #' @param estimate_df Logical indicating whether the degrees of freedom 5 | #' parameter should be estimated 6 | #' @param est_temporalRE Logical: estimate a random walk for the time variable? 7 | #' @param estimate_ar Logical indicating whether the ar 8 | #' parameter should be estimated 9 | #' @param fixed_intercept Should the intercept be fixed? 10 | #' @param save_log_lik Logical: should the log likelihood for each data point be 11 | #' saved so that information criteria such as LOOIC or WAIC can be calculated? 12 | #' Defaults to \code{FALSE} so that the size of model objects is smaller. 13 | stan_pars <- function(obs_error, estimate_df = TRUE, est_temporalRE = FALSE, 14 | estimate_ar = FALSE, fixed_intercept = FALSE, 15 | save_log_lik = FALSE) { 16 | p <- c( 17 | #"y_new", 18 | "gp_sigma", 19 | "gp_theta", 20 | "B", 21 | switch(obs_error[[1]], lognormal = "sigma", gaussian = "sigma", 22 | gamma = "CV", nbinom2 = "nb2_phi" 23 | ), 24 | "spatialEffectsKnots" 25 | ) 26 | if (estimate_df) p <- c("df", p) 27 | if (estimate_ar) p <- c("phi", p) 28 | if (est_temporalRE) { 29 | p <- c("year_sigma", "yearEffects", p) 30 | } 31 | if (fixed_intercept) p <- p[p != "B"] 32 | if (save_log_lik) p <- c(p, "log_lik") 33 | p 34 | } 35 | -------------------------------------------------------------------------------- /README-figs/grid-predictions-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/glmmfields/ddf3b77ca8bc1f45a370f365842c868b7c25dd84/README-figs/grid-predictions-1.png -------------------------------------------------------------------------------- /README-figs/plot-predictions-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/glmmfields/ddf3b77ca8bc1f45a370f365842c868b7c25dd84/README-figs/plot-predictions-1.png -------------------------------------------------------------------------------- /README-figs/plot-predictions-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/glmmfields/ddf3b77ca8bc1f45a370f365842c868b7c25dd84/README-figs/plot-predictions-2.png -------------------------------------------------------------------------------- /README-figs/plot-sim-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/glmmfields/ddf3b77ca8bc1f45a370f365842c868b7c25dd84/README-figs/plot-sim-1.png -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: gfm 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "README-figs/", 14 | cache.path = "README-cache/" 15 | ) 16 | options(width = 120) # for printing Stan output 17 | ``` 18 | 19 | # glmmfields 20 | 21 | [![R build status](https://github.com/seananderson/glmmfields/workflows/R-CMD-check/badge.svg)](https://github.com/seananderson/glmmfields/actions) 22 | [![CRAN status](https://www.r-pkg.org/badges/version/glmmfields)](https://cran.r-project.org/package=glmmfields) 23 | 24 | 25 | The glmmfields R package implements Bayesian spatiotemporal models that allow for extreme 26 | spatial deviations through time. It uses a predictive process approach with 27 | random fields implemented through a multivariate-t distribution instead of a 28 | multivariate normal. The models are fit with [Stan](http://mc-stan.org/). 29 | 30 | We published a paper describing the model and package in *Ecology*: 31 | 32 | Anderson, S. C., Ward, E. J. 2019. Black swans in space: modelling 33 | spatiotemporal processes with extremes. 100(1):e02403. 34 | 35 | 36 | You can install the [CRAN version](https://cran.r-project.org/package=glmmfields) of the package with: 37 | 38 | ```{r, eval=FALSE} 39 | install.packages("glmmfields") 40 | ``` 41 | 42 | If you have a C++ compiler installed, you can install the development version of the package with: 43 | 44 | ```{r, eval=FALSE} 45 | # install.packages("remotes") 46 | remotes::install_github("seananderson/glmmfields", build_vignettes = TRUE) 47 | ``` 48 | 49 | glmmfields can also fit spatial GLMs with Stan. See the vignette: 50 | 51 | ```{r, eval=FALSE} 52 | vignette("spatial-glms", package = "glmmfields") 53 | ``` 54 | 55 | ## An example spatiotemporal model 56 | 57 | ```{r libraries, cache=FALSE} 58 | library(glmmfields) 59 | library(ggplot2) 60 | ``` 61 | 62 | Simulate data: 63 | 64 | ```{r simulate, cache=TRUE} 65 | set.seed(42) 66 | s <- sim_glmmfields( 67 | df = 2.8, n_draws = 12, n_knots = 12, gp_theta = 2.5, 68 | gp_sigma = 0.2, sd_obs = 0.1 69 | ) 70 | head(s$dat) 71 | ``` 72 | 73 | ```{r plot-sim, cache=TRUE, dependson="simulate"} 74 | print(s$plot) 75 | ``` 76 | 77 | Fit the model: 78 | 79 | ```{r fit, cache=TRUE, warning=FALSE, message=FALSE, results='hide', dependson="simulate", cache.comments=FALSE} 80 | options(mc.cores = parallel::detectCores()) # for parallel processing 81 | m <- glmmfields(y ~ 0, 82 | data = s$dat, time = "time", 83 | lat = "lat", lon = "lon", 84 | nknots = 12, estimate_df = TRUE, iter = 800, seed = 1 85 | ) 86 | ``` 87 | 88 | ```{r print, cache=FALSE, dependson="fit"} 89 | print(m) 90 | ``` 91 | 92 | Plot: 93 | 94 | ```{r plot-predictions, dependson="fit"} 95 | plot(m, type = "prediction") + scale_color_gradient2() 96 | plot(m, type = "spatial-residual") 97 | ``` 98 | 99 | Predictions: 100 | 101 | ```{r get-predictions, dependson="fit"} 102 | # link scale: 103 | p <- predict(m) 104 | head(p) 105 | 106 | # posterior predictive intervals on new observations (include observation error): 107 | p <- predictive_interval(m) 108 | head(p) 109 | ``` 110 | 111 | Use the `tidy` method to extract parameter estimates as a data frame: 112 | 113 | ```{r tidy, dependson="fit"} 114 | x <- tidy(m, conf.int = TRUE) 115 | head(x) 116 | ``` 117 | 118 | Make predictions on a fine-scale spatial grid: 119 | 120 | ```{r grid-predictions, dependson="fit"} 121 | pred_grid <- expand.grid( 122 | lat = seq(min(s$dat$lat), max(s$dat$lat), length.out = 25), 123 | lon = seq(min(s$dat$lon), max(s$dat$lon), length.out = 25), 124 | time = unique(s$dat$time) 125 | ) 126 | 127 | pred_grid$prediction <- predict(m, 128 | newdata = pred_grid, type = "response", iter = 100, estimate_method = "median" 129 | )$estimate 130 | 131 | ggplot(pred_grid, aes(lon, lat, fill = prediction)) + 132 | facet_wrap(~time) + 133 | geom_raster() + 134 | scale_fill_gradient2() 135 | ``` 136 | 137 | # References 138 | 139 | Anderson, S. C., Ward, E. J. 2019. Black swans in space: modelling 140 | spatiotemporal processes with extremes. 100(1):e02403. 141 | 142 | 143 | Latimer, A. M., S. Banerjee, H. Sang Jr, E. S. Mosher, and J. A. Silander Jr. 2009. Hierarchical models facilitate spatial analysis of large data sets: a case study on invasive plant species in the northeastern United States. Ecology Letters 12:144–154. 144 | 145 | Shelton, A. O., J. T. Thorson, E. J. Ward, and B. E. Feist. 2014. Spatial semiparametric models improve estimates of species abundance and distribution. Canadian Journal of Fisheries and Aquatic Sciences 71:1655–1666. 146 | 147 | ### NOAA Disclaimer 148 | 149 | This repository is a scientific product and is not official communication of the National Oceanic and 150 | Atmospheric Administration, or the United States Department of Commerce. All NOAA GitHub project code is 151 | provided on an ‘as is’ basis and the user assumes responsibility for its use. Any claims against the Department of 152 | Commerce or Department of Commerce bureaus stemming from the use of this GitHub project will be governed 153 | by all applicable Federal law. Any reference to specific commercial products, processes, or services by service 154 | mark, trademark, manufacturer, or otherwise, does not constitute or imply their endorsement, recommendation or 155 | favoring by the Department of Commerce. The Department of Commerce seal and logo, or the seal and logo of a 156 | DOC bureau, shall not be used in any manner to imply endorsement of any commercial product or activity by 157 | DOC or the United States Government. 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # glmmfields 4 | 5 | [![R build 6 | status](https://github.com/seananderson/glmmfields/workflows/R-CMD-check/badge.svg)](https://github.com/seananderson/glmmfields/actions) 7 | [![CRAN 8 | status](https://www.r-pkg.org/badges/version/glmmfields)](https://cran.r-project.org/package=glmmfields) 9 | 10 | 11 | The glmmfields R package implements Bayesian spatiotemporal models that 12 | allow for extreme spatial deviations through time. It uses a predictive 13 | process approach with random fields implemented through a multivariate-t 14 | distribution instead of a multivariate normal. The models are fit with 15 | [Stan](http://mc-stan.org/). 16 | 17 | We published a paper describing the model and package in *Ecology*: 18 | 19 | Anderson, S. C., Ward, E. J. 2019. Black swans in space: modelling 20 | spatiotemporal processes with extremes. 100(1):e02403. 21 | 22 | 23 | You can install the [CRAN 24 | version](https://cran.r-project.org/package=glmmfields) of the package 25 | with: 26 | 27 | ``` r 28 | install.packages("glmmfields") 29 | ``` 30 | 31 | If you have a C++ compiler installed, you can install the development 32 | version of the package with: 33 | 34 | ``` r 35 | # install.packages("remotes") 36 | remotes::install_github("seananderson/glmmfields", build_vignettes = TRUE) 37 | ``` 38 | 39 | glmmfields can also fit spatial GLMs with Stan. See the vignette: 40 | 41 | ``` r 42 | vignette("spatial-glms", package = "glmmfields") 43 | ``` 44 | 45 | ## An example spatiotemporal model 46 | 47 | ``` r 48 | library(glmmfields) 49 | #> Loading required package: Rcpp 50 | library(ggplot2) 51 | ``` 52 | 53 | Simulate data: 54 | 55 | ``` r 56 | set.seed(42) 57 | s <- sim_glmmfields( 58 | df = 2.8, n_draws = 12, n_knots = 12, gp_theta = 2.5, 59 | gp_sigma = 0.2, sd_obs = 0.1 60 | ) 61 | head(s$dat) 62 | #> time pt y lon lat station_id 63 | #> 1 1 1 0.02818963 9.148060 6.262453 1 64 | #> 2 1 2 -0.21924739 9.370754 2.171577 2 65 | #> 3 1 3 -0.34719485 2.861395 2.165673 3 66 | #> 4 1 4 -0.15785483 8.304476 3.889450 4 67 | #> 5 1 5 -0.04703617 6.417455 9.424557 5 68 | #> 6 1 6 -0.23904924 5.190959 9.626080 6 69 | ``` 70 | 71 | ``` r 72 | print(s$plot) 73 | ``` 74 | 75 | ![](README-figs/plot-sim-1.png) 76 | 77 | Fit the model: 78 | 79 | ``` r 80 | options(mc.cores = parallel::detectCores()) # for parallel processing 81 | m <- glmmfields(y ~ 0, 82 | data = s$dat, time = "time", 83 | lat = "lat", lon = "lon", 84 | nknots = 12, estimate_df = TRUE, iter = 800, seed = 1 85 | ) 86 | ``` 87 | 88 | ``` r 89 | print(m) 90 | #> Inference for Stan model: glmmfields. 91 | #> 4 chains, each with iter=800; warmup=400; thin=1; 92 | #> post-warmup draws per chain=400, total post-warmup draws=1600. 93 | #> 94 | #> mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat 95 | #> df[1] 3.72 0.04 1.47 2.08 2.67 3.37 4.28 7.48 1331 1 96 | #> gp_sigma 0.30 0.00 0.04 0.22 0.27 0.30 0.32 0.39 525 1 97 | #> gp_theta 2.58 0.00 0.07 2.46 2.54 2.58 2.63 2.71 1434 1 98 | #> sigma[1] 0.10 0.00 0.00 0.09 0.10 0.10 0.10 0.10 2207 1 99 | #> lp__ 2291.28 0.42 9.59 2270.34 2285.12 2291.59 2297.73 2308.74 521 1 100 | #> 101 | #> Samples were drawn using NUTS(diag_e) at Mon Feb 13 12:45:42 2023. 102 | #> For each parameter, n_eff is a crude measure of effective sample size, 103 | #> and Rhat is the potential scale reduction factor on split chains (at 104 | #> convergence, Rhat=1). 105 | ``` 106 | 107 | Plot: 108 | 109 | ``` r 110 | plot(m, type = "prediction") + scale_color_gradient2() 111 | ``` 112 | 113 | ![](README-figs/plot-predictions-1.png) 114 | 115 | ``` r 116 | plot(m, type = "spatial-residual") 117 | ``` 118 | 119 | ![](README-figs/plot-predictions-2.png) 120 | 121 | Predictions: 122 | 123 | ``` r 124 | # link scale: 125 | p <- predict(m) 126 | head(p) 127 | #> # A tibble: 6 × 3 128 | #> estimate conf_low conf_high 129 | #> 130 | #> 1 -0.0283 -0.0868 0.0273 131 | #> 2 -0.291 -0.365 -0.220 132 | #> 3 -0.397 -0.448 -0.346 133 | #> 4 -0.196 -0.266 -0.123 134 | #> 5 -0.0370 -0.110 0.0360 135 | #> 6 -0.214 -0.294 -0.140 136 | 137 | # posterior predictive intervals on new observations (include observation error): 138 | p <- predictive_interval(m) 139 | head(p) 140 | #> # A tibble: 6 × 3 141 | #> estimate conf_low conf_high 142 | #> 143 | #> 1 -0.0283 -0.236 0.181 144 | #> 2 -0.291 -0.507 -0.0904 145 | #> 3 -0.397 -0.596 -0.206 146 | #> 4 -0.196 -0.392 0.00154 147 | #> 5 -0.0370 -0.239 0.172 148 | #> 6 -0.214 -0.423 -0.00659 149 | ``` 150 | 151 | Use the `tidy` method to extract parameter estimates as a data frame: 152 | 153 | ``` r 154 | x <- tidy(m, conf.int = TRUE) 155 | head(x) 156 | #> # A tibble: 6 × 5 157 | #> term estimate std.error conf.low conf.high 158 | #> 159 | #> 1 df[1] 3.37 1.47 2.08 7.48 160 | #> 2 gp_sigma 0.295 0.0432 0.216 0.388 161 | #> 3 gp_theta 2.58 0.0662 2.46 2.71 162 | #> 4 sigma[1] 0.0979 0.00214 0.0939 0.102 163 | #> 5 spatialEffectsKnots[1,1] -0.110 0.0341 -0.175 -0.0442 164 | #> 6 spatialEffectsKnots[2,1] -0.230 0.0386 -0.305 -0.155 165 | ``` 166 | 167 | Make predictions on a fine-scale spatial grid: 168 | 169 | ``` r 170 | pred_grid <- expand.grid( 171 | lat = seq(min(s$dat$lat), max(s$dat$lat), length.out = 25), 172 | lon = seq(min(s$dat$lon), max(s$dat$lon), length.out = 25), 173 | time = unique(s$dat$time) 174 | ) 175 | 176 | pred_grid$prediction <- predict(m, 177 | newdata = pred_grid, type = "response", iter = 100, estimate_method = "median" 178 | )$estimate 179 | 180 | ggplot(pred_grid, aes(lon, lat, fill = prediction)) + 181 | facet_wrap(~time) + 182 | geom_raster() + 183 | scale_fill_gradient2() 184 | ``` 185 | 186 | ![](README-figs/grid-predictions-1.png) 187 | 188 | # References 189 | 190 | Anderson, S. C., Ward, E. J. 2019. Black swans in space: modelling 191 | spatiotemporal processes with extremes. 100(1):e02403. 192 | 193 | 194 | Latimer, A. M., S. Banerjee, H. Sang Jr, E. S. Mosher, and J. A. 195 | Silander Jr. 2009. Hierarchical models facilitate spatial analysis of 196 | large data sets: a case study on invasive plant species in the 197 | northeastern United States. Ecology Letters 12:144–154. 198 | 199 | Shelton, A. O., J. T. Thorson, E. J. Ward, and B. E. Feist. 2014. 200 | Spatial semiparametric models improve estimates of species abundance and 201 | distribution. Canadian Journal of Fisheries and Aquatic Sciences 202 | 71:1655–1666. 203 | 204 | ### NOAA Disclaimer 205 | 206 | This repository is a scientific product and is not official 207 | communication of the National Oceanic and Atmospheric Administration, or 208 | the United States Department of Commerce. All NOAA GitHub project code 209 | is provided on an ‘as is’ basis and the user assumes responsibility for 210 | its use. Any claims against the Department of Commerce or Department of 211 | Commerce bureaus stemming from the use of this GitHub project will be 212 | governed by all applicable Federal law. Any reference to specific 213 | commercial products, processes, or services by service mark, trademark, 214 | manufacturer, or otherwise, does not constitute or imply their 215 | endorsement, recommendation or favoring by the Department of Commerce. 216 | The Department of Commerce seal and logo, or the seal and logo of a DOC 217 | bureau, shall not be used in any manner to imply endorsement of any 218 | commercial product or activity by DOC or the United States Government. 219 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | #! /bin/sh 4 | "${R_HOME}/bin/Rscript" -e "rstantools::rstan_config()" 5 | -------------------------------------------------------------------------------- /configure.win: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | #! /bin/sh 4 | "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "rstantools::rstan_config()" 5 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | This version has minor updates on the request of the 'rstan' team. It removes 2 | deprecated syntax for future 'rstan' compatibility. 3 | 4 | At the same time, we have made minor updates to avoid R CMD check NOTEs with 5 | R devel. 6 | 7 | ## Test environments 8 | 9 | * local macOS M2 install, R 4.3.1 10 | * Windows (on github-actions), R 4.3.1 11 | * Ubuntu 22.04.3 (on github-actions), R-devel 12 | * Windows (winbuilder), R-devel 13 | 14 | With memory management checks: 15 | 16 | * Ubuntu 22.04.3 (on github-actions), R-devel with valgrind 17 | 18 | ## R CMD check results 19 | 20 | 0 errors | 0 warnings | 1 notes 21 | 22 | * checking for GNU extensions in Makefiles ... NOTE 23 | GNU make is a SystemRequirements. 24 | 25 | This is correct. 26 | -------------------------------------------------------------------------------- /glmmfields.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /inst/.gitignore: -------------------------------------------------------------------------------- 1 | test.R 2 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry( 2 | bibtype = "Article", 3 | title = "Black swans in space: modelling spatiotemporal processes with extremes", 4 | author = c( 5 | person(c("Sean", "C."), "Anderson"), 6 | person(c("Eric", "J."), "Ward") 7 | ), 8 | year = "2019", 9 | journal = "Ecology", 10 | volume = "100", 11 | number = "1", 12 | pages = "e02403", 13 | doi = "10.1002/ecy.2403" 14 | ) 15 | -------------------------------------------------------------------------------- /inst/include/stan_meta_header.hpp: -------------------------------------------------------------------------------- 1 | // Insert all #include statements here 2 | -------------------------------------------------------------------------------- /inst/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/glmmfields/ddf3b77ca8bc1f45a370f365842c868b7c25dd84/inst/logo.png -------------------------------------------------------------------------------- /inst/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 22 | 24 | 25 | 27 | image/svg+xml 28 | 30 | 31 | 32 | 33 | 34 | 36 | 39 | 43 | 44 | 47 | 51 | 52 | 55 | 59 | 60 | 63 | 67 | 68 | 71 | 75 | 76 | 79 | 83 | 84 | 87 | 91 | 92 | 95 | 99 | 100 | 103 | 107 | 108 | 111 | 115 | 116 | 119 | 123 | 124 | 127 | 131 | 132 | 135 | 139 | 140 | 143 | 147 | 148 | 151 | 155 | 156 | 159 | 163 | 164 | 167 | 171 | 172 | 175 | 179 | 180 | 183 | 187 | 188 | 191 | 195 | 196 | 199 | 203 | 204 | 207 | 211 | 212 | 215 | 219 | 220 | 223 | 227 | 228 | 231 | 235 | 236 | 239 | 243 | 244 | 247 | 251 | 252 | 255 | 259 | 260 | 263 | 267 | 268 | 271 | 275 | 276 | 279 | 283 | 284 | 287 | 291 | 292 | 295 | 299 | 300 | 303 | 307 | 308 | 311 | 315 | 316 | 319 | 323 | 324 | 327 | 331 | 332 | 335 | 339 | 340 | 343 | 347 | 348 | 351 | 355 | 356 | 359 | 363 | 364 | 367 | 371 | 372 | 375 | 379 | 380 | 383 | 387 | 388 | 391 | 395 | 396 | 399 | 403 | 404 | 407 | 411 | 412 | 415 | 419 | 420 | 423 | 427 | 428 | 431 | 435 | 436 | 439 | 443 | 444 | 447 | 451 | 452 | 455 | 459 | 460 | 463 | 467 | 468 | 469 | 490 | 506 | 511 | glmmfields 523 | 524 | -------------------------------------------------------------------------------- /inst/stan/glmmfields.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int nKnots; 3 | int nLocs; 4 | int nT; 5 | int N; 6 | array[N] int stationID; 7 | array[N] int yearID; 8 | array[N] int binomialN; 9 | array[N] real y; // y for normal and gamma obs. model 10 | array[N] int y_int; // y for NB2 or poisson or binomial obs. model 11 | array[N] real input_offset; // optional offset, is 0 if not included 12 | array[3] real prior_gp_theta; 13 | array[3] real prior_gp_sigma; 14 | array[3] real prior_sigma; 15 | array[3] real prior_rw_sigma; 16 | array[3] real prior_intercept; 17 | array[3] real prior_beta; 18 | array[3] real prior_phi; 19 | matrix[nKnots, nKnots] distKnots; 20 | matrix[nLocs, nKnots] distKnots21; 21 | int nCov; 22 | matrix[N, nCov] X; 23 | int cov_func; // 0 = exp, 1 = sq_exp, 2 = matern 24 | int est_df; 25 | int est_phi; 26 | int norm_params; 27 | int gamma_params; 28 | int nb2_params; 29 | int obs_model; 30 | real fixed_df_value; 31 | real fixed_phi_value; 32 | int est_temporalRE; 33 | int n_year_effects; 34 | int lower_truncation; 35 | int fixed_intercept; 36 | real matern_kappa; 37 | int nW; // if fixed nu is large, use MVN by setting nW = 0 38 | real gp_sigma_scaling_factor; // a scaling factor to help sampling if gp_sigma is too small 39 | real df_lower_bound; 40 | } 41 | parameters { 42 | real gp_theta; 43 | real gp_sigma; 44 | array[est_df] real df; 45 | array[norm_params] real sigma; 46 | array[gamma_params] real CV; 47 | array[nb2_params] real nb2_phi; 48 | array[n_year_effects] real yearEffects; 49 | array[est_temporalRE] real year_sigma; 50 | array[nT] vector[nKnots] spatialEffectsKnots; 51 | vector[nCov] B; 52 | array[est_phi] real phi; 53 | array[nW] real W; 54 | } 55 | transformed parameters { 56 | vector[nKnots] muZeros; 57 | array[nT] vector[nLocs] spatialEffects; 58 | matrix[nKnots, nKnots] SigmaKnots; 59 | matrix[nKnots, nKnots] transformed_dist; 60 | matrix[nLocs, nKnots] transformed_dist21; 61 | matrix[nLocs, nKnots] SigmaOffDiag; 62 | matrix[nLocs, nKnots] SigmaOffDiagTemp; 63 | matrix[nLocs, nKnots] invSigmaKnots; 64 | vector[N] y_hat; 65 | array[gamma_params] real gammaA; 66 | real gp_sigma_sq; 67 | gp_sigma_sq = pow(gp_sigma*gp_sigma_scaling_factor, 2.0); 68 | 69 | // allow user to switch between covariance functions 70 | if (cov_func == 0) { 71 | // cov matrix between knots 72 | SigmaKnots = gp_sigma_sq * exp(-distKnots / gp_theta); 73 | // cov matrix between knots and projected locs 74 | SigmaOffDiagTemp = gp_sigma_sq * exp(-distKnots21 / gp_theta); 75 | } 76 | if (cov_func == 1) { 77 | // cov matrix between knots: 78 | SigmaKnots = gp_sigma_sq * 79 | exp(-inv(2.0 * pow(gp_theta, 2.0)) * distKnots); // dist^2 as data 80 | // cov matrix between knots and projected locs: 81 | SigmaOffDiagTemp = gp_sigma_sq * 82 | exp(-inv(2.0 * pow(gp_theta, 2.0)) * distKnots21); // dist^2 as data 83 | } 84 | if (cov_func == 2) { 85 | if (matern_kappa == 1.5) { 86 | // cov matrix between knots 87 | transformed_dist = sqrt(3.0) * distKnots / gp_theta; 88 | SigmaKnots = gp_sigma_sq * (1.0 + transformed_dist) .* exp (-transformed_dist); 89 | // cov matrix between knots and projected locs 90 | transformed_dist21 = sqrt(3.0) * distKnots21 / gp_theta; 91 | SigmaOffDiagTemp = gp_sigma_sq * (1.0 + transformed_dist21) .* exp (-transformed_dist21); 92 | } 93 | if (matern_kappa == 2.5) { 94 | // cov matrix between knots 95 | transformed_dist = sqrt(5.0) * distKnots / gp_theta; 96 | SigmaKnots = gp_sigma_sq * (1.0 + transformed_dist + 97 | (transformed_dist .* transformed_dist)/3.0) .* exp (-transformed_dist); 98 | // cov matrix between knots and projected locs 99 | transformed_dist21 = sqrt(5.0) * distKnots21 / gp_theta; 100 | SigmaOffDiagTemp = gp_sigma_sq * (1.0 + transformed_dist21 + 101 | (transformed_dist21 .* transformed_dist21)/3.0) .* exp (-transformed_dist21); 102 | } 103 | } 104 | 105 | for (k in 1:nKnots) { 106 | muZeros[k] = 0; 107 | } 108 | // multiply and invert once, used below: 109 | SigmaOffDiag = SigmaOffDiagTemp * inverse_spd(SigmaKnots); 110 | for (t in 1:nT) { 111 | spatialEffects[t] = SigmaOffDiag * spatialEffectsKnots[t]; 112 | } 113 | 114 | // calculate predicted value of each observation 115 | for (i in 1:N) { 116 | if (est_temporalRE == 0) { 117 | if (fixed_intercept == 0) { 118 | y_hat[i] = X[i] * B + spatialEffects[yearID[i], stationID[i]]; 119 | } else { 120 | y_hat[i] = spatialEffects[yearID[i], stationID[i]]; 121 | } 122 | } else { 123 | if(nCov == 0) { 124 | y_hat[i] = spatialEffects[yearID[i], stationID[i]] + yearEffects[yearID[i]]; 125 | } 126 | if(nCov > 0) { 127 | y_hat[i] = X[i] * B + spatialEffects[yearID[i], stationID[i]] + yearEffects[yearID[i]]; 128 | } 129 | } 130 | y_hat[i] = y_hat[i] + input_offset[i]; // optional offset, additive in link space 131 | } 132 | 133 | if (obs_model==0) { 134 | gammaA[1] = inv(pow(CV[1], 2.0)); 135 | } 136 | } 137 | model { 138 | // priors: 139 | gp_theta ~ student_t(prior_gp_theta[1], prior_gp_theta[2], prior_gp_theta[3]); 140 | gp_sigma ~ student_t(prior_gp_sigma[1], prior_gp_sigma[2], prior_gp_sigma[3]); 141 | 142 | if (est_phi == 1) { 143 | phi ~ student_t(prior_phi[1], prior_phi[2], prior_phi[3]); 144 | } 145 | 146 | if (nCov >= 1) { 147 | // global intercept, absorbed into year re [1] if those estimated 148 | B[1] ~ student_t(prior_intercept[1], prior_intercept[2], prior_intercept[3]); 149 | } 150 | if (nCov >= 2) { 151 | for (i in 2:nCov) { 152 | // coefficients associated with non-intercept covariates 153 | B[i] ~ student_t(prior_beta[1], prior_beta[2], prior_beta[3]); 154 | } 155 | } 156 | 157 | // temporal random effects, if estimated global intercept = effect in first year 158 | if (est_temporalRE == 1) { 159 | year_sigma ~ student_t(prior_rw_sigma[1], prior_rw_sigma[2], prior_rw_sigma[3]); 160 | // random walk in year terms 161 | yearEffects[1] ~ student_t(prior_intercept[1], prior_intercept[2], prior_intercept[3]); 162 | for (t in 2:nT) { 163 | yearEffects[t] ~ normal(yearEffects[t-1], year_sigma); 164 | } 165 | } 166 | 167 | // if est_df == 1 estimate MVT degrees of freedom, otherwise use fixed df 168 | if (est_df == 1) { 169 | W ~ scaled_inv_chi_square(df[1], 1); 170 | df ~ gamma(2, 0.1); 171 | } else { 172 | if (nW > 0) { // if nW == 0, we are using MVN 173 | W ~ scaled_inv_chi_square(fixed_df_value, 1); 174 | } 175 | } 176 | 177 | if (nW > 0) { // if nW == 0, we are using MVN 178 | // spatial deviates in first time slice 179 | spatialEffectsKnots[1] ~ multi_normal(muZeros, W[1] * SigmaKnots); 180 | 181 | // spatial deviates in remaining time slices 182 | for (t in 2:nT) { 183 | if (est_phi == 1) { 184 | spatialEffectsKnots[t] ~ multi_normal(phi[1] * spatialEffectsKnots[t-1], 185 | W[t] * SigmaKnots); 186 | } else { 187 | spatialEffectsKnots[t] ~ multi_normal(fixed_phi_value * spatialEffectsKnots[t-1], 188 | W[t] * SigmaKnots); 189 | } 190 | } 191 | } else { // use MVN instead of MVT 192 | spatialEffectsKnots[1] ~ multi_normal(muZeros, SigmaKnots); 193 | for (t in 2:nT) { 194 | if (est_phi == 1) { 195 | spatialEffectsKnots[t] ~ multi_normal(phi[1] * spatialEffectsKnots[t-1], 196 | SigmaKnots); 197 | } else { 198 | spatialEffectsKnots[t] ~ multi_normal(fixed_phi_value * spatialEffectsKnots[t-1], 199 | SigmaKnots); 200 | } 201 | } 202 | } 203 | 204 | // switch between observation error models: 205 | // gamma (0), normal (1), NB2 (2), binomial (4), poisson (5), lognormal (6) 206 | // where is 3? tweedie (3) is in a branch and is too slow to be practical 207 | if (obs_model == 0) { 208 | // prior on CV of gamma obs error, gamma shape 'a' is derived parameter 209 | CV[1] ~ student_t(prior_sigma[1], prior_sigma[2], prior_sigma[3]); 210 | y ~ gamma(gammaA[1], gammaA[1] ./ exp(y_hat)); 211 | } 212 | if (obs_model == 1) { 213 | sigma[1] ~ student_t(prior_sigma[1], prior_sigma[2], prior_sigma[3]); 214 | y ~ normal(y_hat, sigma[1]); 215 | } 216 | if (obs_model == 2) { 217 | nb2_phi[1] ~ student_t(prior_sigma[1], prior_sigma[2], prior_sigma[3]); 218 | if (lower_truncation == 0) { 219 | y_int ~ neg_binomial_2_log(y_hat, nb2_phi[1]); 220 | } else { 221 | for (i in 1:N) { 222 | y_int[i] ~ neg_binomial_2(exp(y_hat[i]), nb2_phi[1]) T[lower_truncation, ]; 223 | } 224 | } 225 | } 226 | if (obs_model == 4) { 227 | y_int ~ binomial_logit(binomialN, y_hat); 228 | } 229 | if (obs_model == 5) { 230 | y_int ~ poisson_log(y_hat); 231 | } 232 | if (obs_model == 6) { 233 | sigma[1] ~ student_t(prior_sigma[1], prior_sigma[2], prior_sigma[3]); 234 | y ~ lognormal(y_hat, sigma[1]); 235 | } 236 | } 237 | generated quantities { 238 | // log_lik is for use with the loo package 239 | vector[N] log_lik; 240 | //int y_new[N]; 241 | 242 | for (i in 1:N) { 243 | if (obs_model == 0) { 244 | log_lik[i] = gamma_lpdf(y[i] | gammaA[1], gammaA[1] ./ exp(y_hat[i])); 245 | } 246 | if (obs_model == 1) { 247 | log_lik[i] = normal_lpdf(y[i] | y_hat[i], sigma[1]); 248 | } 249 | if (obs_model == 2) { 250 | if (lower_truncation == 0) { 251 | log_lik[i] = neg_binomial_2_log_lpmf(y_int[i] | y_hat[i], nb2_phi[1]); 252 | } else { 253 | // Note that I had to remove T[lower_truncation, ] from the following line 254 | // and I think that will make this calculation incorrect 255 | // in the case of truncated negative binomial 256 | // the package will issue a warning 257 | log_lik[i] = neg_binomial_2_lpmf(y_int[i] | exp(y_hat[i]), nb2_phi[1]); 258 | } 259 | } 260 | if (obs_model == 4) { 261 | log_lik[i] = binomial_logit_lpmf(y_int[i] | binomialN[i], y_hat[i]); 262 | } 263 | if (obs_model == 5) { 264 | log_lik[i] = poisson_log_lpmf(y_int[i] | y_hat[i]); 265 | //y_new[i] = poisson_log_rng(y_hat[i]); 266 | } 267 | if (obs_model == 6) { 268 | log_lik[i] = lognormal_lpdf(y[i] | y_hat, sigma[1]); 269 | } 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /inst/stan/include/license.stan: -------------------------------------------------------------------------------- 1 | /* 2 | glmmfields is free software: you can redistribute it and/or modify 3 | it under the terms of the GNU General Public License as published by 4 | the Free Software Foundation, either version 3 of the License, or 5 | (at your option) any later version. 6 | 7 | glmmfields is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | 12 | You should have received a copy of the GNU General Public License 13 | along with glmmfields. If not, see . 14 | */ 15 | -------------------------------------------------------------------------------- /man/format_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/format_data.R 3 | \name{format_data} 4 | \alias{format_data} 5 | \title{Format data for fitting a glmmfields model} 6 | \usage{ 7 | format_data( 8 | data, 9 | y, 10 | X, 11 | time, 12 | lon = "lon", 13 | lat = "lat", 14 | station = NULL, 15 | nknots = 25L, 16 | covariance = c("squared-exponential", "exponential", "matern"), 17 | fixed_intercept = FALSE, 18 | cluster = c("pam", "kmeans") 19 | ) 20 | } 21 | \arguments{ 22 | \item{data}{A data frame to be formatted} 23 | 24 | \item{y}{A numeric vector of the response} 25 | 26 | \item{X}{A matrix of the predictors} 27 | 28 | \item{time}{A character object giving the name of the time column} 29 | 30 | \item{lon}{A character object giving the name of the longitude column} 31 | 32 | \item{lat}{A character object giving the name of the latitude column} 33 | 34 | \item{station}{A numeric vector giving the integer ID of the station} 35 | 36 | \item{nknots}{The number of knots} 37 | 38 | \item{covariance}{The type of covariance function} 39 | 40 | \item{fixed_intercept}{Should the intercept be fixed?} 41 | 42 | \item{cluster}{The type of clustering algorithm used to determine the not locations. 43 | \code{"pam"} = \code{\link[cluster]{pam}}. \code{kmeans} is faster for large datasets.} 44 | } 45 | \description{ 46 | Format data for fitting a glmmfields model 47 | } 48 | -------------------------------------------------------------------------------- /man/glmmfields-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glmmfields-package.R 3 | \docType{package} 4 | \name{glmmfields-package} 5 | \alias{glmmfields-package} 6 | \title{The 'glmmfields' package.} 7 | \description{ 8 | Implements Bayesian spatial and spatiotemporal models that 9 | optionally allow for extreme spatial deviations through time. 'glmmfields' 10 | uses a predictive process approach with random fields implemented through a 11 | multivariate-t distribution instead of the usual multivariate normal. 12 | Sampling is conducted with 'Stan'. 13 | } 14 | \references{ 15 | Stan Development Team (2018). RStan: the R interface to Stan. R package 16 | version 2.18.2. http://mc-stan.org 17 | } 18 | -------------------------------------------------------------------------------- /man/glmmfields.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fitting.R 3 | \name{glmmfields} 4 | \alias{glmmfields} 5 | \title{Fit a spatiotemporal random fields GLMM} 6 | \usage{ 7 | glmmfields( 8 | formula, 9 | data, 10 | lon, 11 | lat, 12 | time = NULL, 13 | nknots = 15L, 14 | prior_gp_theta = half_t(3, 0, 5), 15 | prior_gp_sigma = half_t(3, 0, 5), 16 | prior_sigma = half_t(3, 0, 5), 17 | prior_rw_sigma = half_t(3, 0, 5), 18 | prior_intercept = student_t(3, 0, 10), 19 | prior_beta = student_t(3, 0, 3), 20 | prior_phi = student_t(1000, 0, 0.5), 21 | fixed_df_value = 1000, 22 | fixed_phi_value = 0, 23 | estimate_df = FALSE, 24 | estimate_ar = FALSE, 25 | family = gaussian(link = "identity"), 26 | binomial_N = NULL, 27 | covariance = c("squared-exponential", "exponential", "matern"), 28 | matern_kappa = 0.5, 29 | algorithm = c("sampling", "meanfield"), 30 | year_re = FALSE, 31 | nb_lower_truncation = 0, 32 | control = list(adapt_delta = 0.9), 33 | save_log_lik = FALSE, 34 | df_lower_bound = 2, 35 | cluster = c("pam", "kmeans"), 36 | offset = NULL, 37 | ... 38 | ) 39 | } 40 | \arguments{ 41 | \item{formula}{The model formula.} 42 | 43 | \item{data}{A data frame.} 44 | 45 | \item{lon}{A character object giving the name of the longitude column.} 46 | 47 | \item{lat}{A character object giving the name of the latitude column.} 48 | 49 | \item{time}{A character object giving the name of the time column. Leave 50 | as \code{NULL} to fit a spatial GLMM without a time element.} 51 | 52 | \item{nknots}{The number of knots to use in the predictive process model. 53 | Smaller values will be faster but may not adequately represent the shape 54 | of the spatial pattern.} 55 | 56 | \item{prior_gp_theta}{The prior on the Gaussian Process scale parameter. Must 57 | be declared with \code{\link[=half_t]{half_t()}}. Here, and throughout, priors that 58 | are normal or half-normal can be implemented by setting the first 59 | parameter in the half-t or student-t distribution to a large value. 60 | E.g. something greater than 100.} 61 | 62 | \item{prior_gp_sigma}{The prior on the Gaussian Process eta parameter. Must 63 | be declared with \code{\link[=half_t]{half_t()}}.} 64 | 65 | \item{prior_sigma}{The prior on the observation process scale parameter. Must 66 | be declared with \code{\link[=half_t]{half_t()}}. This acts as a substitute for the 67 | scale parameter in whatever observation distribution is being used. E.g. 68 | the CV for the Gamma or the dispersion parameter for the negative 69 | binomial.} 70 | 71 | \item{prior_rw_sigma}{The prior on the standard deviation parameter of the 72 | random walk process (if specified). Must be declared with 73 | \code{\link[=half_t]{half_t()}}.} 74 | 75 | \item{prior_intercept}{The prior on the intercept parameter. Must be declared 76 | with \code{\link[=student_t]{student_t()}}.} 77 | 78 | \item{prior_beta}{The prior on the slope parameters (if any). Must be 79 | declared with \code{\link[=student_t]{student_t()}}.} 80 | 81 | \item{prior_phi}{The prior on the AR parameter. Must be 82 | declared with \code{\link[=student_t]{student_t()}}.} 83 | 84 | \item{fixed_df_value}{The fixed value for the student-t degrees of freedom 85 | parameter if the degrees of freedom parameter is fixed in the MVT. If the 86 | degrees of freedom parameter is estimated then this argument is ignored. 87 | Must be 1 or greater. Very large values (e.g. the default value) 88 | approximate the normal distribution. If the value is >=1000 then a true 89 | MVN distribution will be fit.} 90 | 91 | \item{fixed_phi_value}{The fixed value for temporal autoregressive parameter, 92 | between random fields at time(t) and time(t-1). If the phi parameter 93 | is estimated then this argument is ignored.} 94 | 95 | \item{estimate_df}{Logical: should the degrees of freedom parameter be 96 | estimated?} 97 | 98 | \item{estimate_ar}{Logical: should the AR (autoregressive) parameter be 99 | estimated? Here, this refers to a autoregressive process in the evolution 100 | of the spatial field through time.} 101 | 102 | \item{family}{Family object describing the observation model. Note that only 103 | one link is implemented for each distribution. Gamma, negative binomial 104 | (specified via \code{\link[=nbinom2]{nbinom2()}} as \code{nbinom2(link = "log")}, and Poisson must 105 | have a log link. Binomial must have a logit link. Also implemented is the 106 | lognormal (specified via \code{\link[=lognormal]{lognormal()}} as \code{lognormal(link = "log")}. 107 | Besides the negative binomial and lognormal, other families are specified 108 | as shown in \code{\link[stats]{family}}.} 109 | 110 | \item{binomial_N}{A character object giving the optional name of the column containing 111 | Binomial sample size. Leave as \code{NULL} to fit a spatial GLMM with sample sizes (N) = 1, 112 | equivalent to bernoulli model.} 113 | 114 | \item{covariance}{The covariance function of the Gaussian Process. 115 | One of "squared-exponential", "exponential", or "matern".} 116 | 117 | \item{matern_kappa}{Optional parameter for the Matern covariance function. 118 | Optional values are 1.5 or 2.5. Values of 0.5 are equivalent to exponential.} 119 | 120 | \item{algorithm}{Character object describing whether the model should be fit 121 | with full NUTS MCMC or via the variational inference mean-field approach. 122 | See \code{\link[rstan:stanmodel-method-vb]{rstan::vb()}}. Note that the variational inference approach 123 | should not be trusted for final inference and is much more likely to give 124 | incorrect inference than MCMC.} 125 | 126 | \item{year_re}{Logical: estimate a random walk for the time variable? If 127 | \code{TRUE}, then no fixed effects (B coefficients) will be estimated. 128 | In this case, \code{prior_intercept} will be used as the prior for 129 | the initial value in time.} 130 | 131 | \item{nb_lower_truncation}{For NB2 only: lower truncation value. E.g. 0 for 132 | no truncation, 1 for 1 and all values above. Note that estimation is 133 | likely to be considerably slower with lower truncation because the 134 | sampling is not vectorized. Also note that the log likelihood values 135 | returned for estimating quantities like LOOIC will not be correct if 136 | lower truncation is implemented.} 137 | 138 | \item{control}{List to pass to \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}. For example, 139 | increase \code{adapt_delta} if there are warnings about divergent 140 | transitions: \code{control = list(adapt_delta = 0.99)}. By default, 141 | \pkg{glmmfields} sets \code{adapt_delta = 0.9}.} 142 | 143 | \item{save_log_lik}{Logical: should the log likelihood for each data point be 144 | saved so that information criteria such as LOOIC or WAIC can be calculated? 145 | Defaults to \code{FALSE} so that the size of model objects is smaller.} 146 | 147 | \item{df_lower_bound}{The lower bound on the degrees of freedom parameter. 148 | Values that are too low, e.g. below 2 or 3, it might affect chain 149 | convergence. Defaults to 2.} 150 | 151 | \item{cluster}{The type of clustering algorithm used to determine the knot 152 | locations. \code{"pam"} = \code{\link[cluster:pam]{cluster::pam()}}. The \code{"kmeans"} 153 | algorithm will be faster on larger datasets.} 154 | 155 | \item{offset}{An optional offset vector.} 156 | 157 | \item{...}{Any other arguments to pass to \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}.} 158 | } 159 | \description{ 160 | Fit a spatiotemporal random fields model that optionally uses the MVT 161 | distribution instead of a MVN distribution to allow for spatial extremes 162 | through time. It is also possible to fit a spatial random fields model 163 | without a time component. 164 | } 165 | \details{ 166 | Note that there is no guarantee that the default priors are reasonable for 167 | your data. Also, there is no guarantee the default priors will remain the 168 | same in future versions. Therefore it is important that you specify any 169 | priors that are used in your model, even if they replicate the defaults in 170 | the package. It is particularly important that you consider that prior on 171 | \code{gp_theta} since it depends on the distance between your location points. You 172 | may need to scale your coordinate units so they are on a ballpark range of 173 | 1-10 by, say, dividing the coordinates (say in UTMs) by several order of 174 | magnitude. 175 | } 176 | \examples{ 177 | \donttest{ 178 | # Spatiotemporal example: 179 | set.seed(1) 180 | s <- sim_glmmfields(n_draws = 12, n_knots = 12, gp_theta = 1.5, 181 | gp_sigma = 0.2, sd_obs = 0.2) 182 | print(s$plot) 183 | # options(mc.cores = parallel::detectCores()) # for parallel processing 184 | # should use 4 or more chains for real model fits 185 | m <- glmmfields(y ~ 0, time = "time", 186 | lat = "lat", lon = "lon", data = s$dat, 187 | nknots = 12, iter = 1000, chains = 2, seed = 1) 188 | 189 | # Spatial example (with covariates) from the vignette and customizing 190 | # some priors: 191 | set.seed(1) 192 | N <- 100 # number of data points 193 | temperature <- rnorm(N, 0, 1) # simulated temperature data 194 | X <- cbind(1, temperature) # design matrix 195 | s <- sim_glmmfields(n_draws = 1, gp_theta = 1.2, n_data_points = N, 196 | gp_sigma = 0.3, sd_obs = 0.1, n_knots = 12, obs_error = "gamma", 197 | covariance = "squared-exponential", X = X, 198 | B = c(0.5, 0.2)) # B represents our intercept and slope 199 | d <- s$dat 200 | d$temperature <- temperature 201 | library(ggplot2) 202 | ggplot(s$dat, aes(lon, lat, colour = y)) + 203 | viridis::scale_colour_viridis() + 204 | geom_point(size = 3) 205 | m_spatial <- glmmfields(y ~ temperature, data = d, family = Gamma(link = "log"), 206 | lat = "lat", lon = "lon", nknots = 12, iter = 2000, chains = 2, 207 | prior_beta = student_t(100, 0, 1), prior_intercept = student_t(100, 0, 5), 208 | control = list(adapt_delta = 0.95)) 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /man/lognormal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/family.R 3 | \name{lognormal} 4 | \alias{lognormal} 5 | \title{Lognormal family} 6 | \usage{ 7 | lognormal(link = "log") 8 | } 9 | \arguments{ 10 | \item{link}{The link (must be log)} 11 | } 12 | \description{ 13 | Lognormal family 14 | } 15 | \examples{ 16 | lognormal() 17 | } 18 | -------------------------------------------------------------------------------- /man/loo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{loo.glmmfields} 4 | \alias{loo.glmmfields} 5 | \alias{loo} 6 | \title{Return LOO information criteria} 7 | \usage{ 8 | \method{loo}{glmmfields}(x, ...) 9 | } 10 | \arguments{ 11 | \item{x}{Output from \code{\link[=glmmfields]{glmmfields()}}. 12 | Must be fit with \code{save_log_lik = TRUE}, which is \emph{not} the default.} 13 | 14 | \item{...}{Arguments for \code{\link[loo:relative_eff]{loo::relative_eff()}} and \code{\link[loo:loo]{loo::loo.array()}}.} 15 | } 16 | \description{ 17 | Extract the LOOIC (leave-one-out information criterion) using 18 | \code{\link[loo:loo]{loo::loo()}}. 19 | } 20 | \examples{ 21 | \donttest{ 22 | set.seed(1) 23 | s <- sim_glmmfields(n_draws = 12, n_knots = 12, gp_theta = 1.5, 24 | gp_sigma = 0.2, sd_obs = 0.2) 25 | # options(mc.cores = parallel::detectCores()) # for parallel processing 26 | 27 | # save_log_lik defaults to FALSE to save space but is needed for loo(): 28 | m <- glmmfields(y ~ 0, time = "time", 29 | lat = "lat", lon = "lon", data = s$dat, 30 | nknots = 12, iter = 1000, chains = 4, seed = 1, 31 | save_log_lik = TRUE) 32 | loo(m) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /man/nbinom2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/family.R 3 | \name{nbinom2} 4 | \alias{nbinom2} 5 | \title{Negative binomial family} 6 | \usage{ 7 | nbinom2(link = "log") 8 | } 9 | \arguments{ 10 | \item{link}{The link (must be log)} 11 | } 12 | \description{ 13 | This is the NB2 parameterization where the variance scales quadratically 14 | with the mean. 15 | } 16 | \examples{ 17 | nbinom2() 18 | } 19 | -------------------------------------------------------------------------------- /man/plot.glmmfields.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.R 3 | \name{plot.glmmfields} 4 | \alias{plot.glmmfields} 5 | \title{Plot predictions from an glmmfields model} 6 | \usage{ 7 | \method{plot}{glmmfields}( 8 | x, 9 | type = c("prediction", "spatial-residual", "residual-vs-fitted"), 10 | link = TRUE, 11 | ... 12 | ) 13 | } 14 | \arguments{ 15 | \item{x}{An object returned by \code{\link{glmmfields}}} 16 | 17 | \item{type}{Type of plot} 18 | 19 | \item{link}{Logical: should the plots be made on the link scale 20 | or on the natural scale?} 21 | 22 | \item{...}{Other arguments passed to \code{\link{predict.glmmfields}}} 23 | } 24 | \description{ 25 | Plot predictions from an glmmfields model 26 | } 27 | \examples{ 28 | \donttest{ 29 | # Spatiotemporal example: 30 | set.seed(1) 31 | s <- sim_glmmfields(n_draws = 12, n_knots = 12, gp_theta = 1.5, 32 | gp_sigma = 0.2, sd_obs = 0.1) 33 | # options(mc.cores = parallel::detectCores()) # for parallel processing 34 | m <- glmmfields(y ~ 0, time = "time", 35 | lat = "lat", lon = "lon", data = s$dat, 36 | nknots = 12, iter = 600, chains = 1) 37 | x <- plot(m, type = "prediction") 38 | x 39 | x + ggplot2::scale_color_gradient2() 40 | plot(m, type = "spatial-residual") 41 | plot(m, type = "residual-vs-fitted") 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /man/predict.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.R 3 | \name{predict} 4 | \alias{predict} 5 | \alias{predictive_interval} 6 | \alias{posterior_linpred} 7 | \alias{posterior_predict} 8 | \alias{predictive_interval.glmmfields} 9 | \alias{posterior_linpred.glmmfields} 10 | \alias{posterior_predict.glmmfields} 11 | \alias{predict.glmmfields} 12 | \title{Predict from a glmmfields model} 13 | \usage{ 14 | \method{predictive_interval}{glmmfields}(object, ...) 15 | 16 | \method{posterior_linpred}{glmmfields}(object, ...) 17 | 18 | \method{posterior_predict}{glmmfields}(object, ...) 19 | 20 | \method{predict}{glmmfields}( 21 | object, 22 | newdata = NULL, 23 | estimate_method = c("median", "mean"), 24 | conf_level = 0.95, 25 | interval = c("confidence", "prediction"), 26 | type = c("link", "response"), 27 | return_mcmc = FALSE, 28 | offset = NULL, 29 | iter = "all", 30 | ... 31 | ) 32 | } 33 | \arguments{ 34 | \item{object}{An object returned by \code{\link[=glmmfields]{glmmfields()}}.} 35 | 36 | \item{...}{Ignored currently} 37 | 38 | \item{newdata}{Optionally, a data frame to predict on} 39 | 40 | \item{estimate_method}{Method for computing point estimate ("mean" or 41 | "median")} 42 | 43 | \item{conf_level}{Probability level for the credible intervals.} 44 | 45 | \item{interval}{Type of interval calculation. Same as for 46 | \code{\link[stats:predict.lm]{stats::predict.lm()}}.} 47 | 48 | \item{type}{Whether the predictions are returned on "link" scale or 49 | "response" scale (Same as for \code{\link[stats:predict.glm]{stats::predict.glm()}}).} 50 | 51 | \item{return_mcmc}{Logical. Should the full MCMC draws be returned for the 52 | predictions?} 53 | 54 | \item{offset}{Optional offset vector to be used in prediction.} 55 | 56 | \item{iter}{Number of MCMC iterations to draw. Defaults to all.} 57 | } 58 | \description{ 59 | These functions extract posterior draws or credible intervals. The helper 60 | functions are named to match those in the \pkg{rstanarm} package and call the 61 | function \code{predict()} with appropriate argument values. 62 | } 63 | \examples{ 64 | \donttest{ 65 | library(ggplot2) 66 | 67 | # simulate: 68 | set.seed(1) 69 | s <- sim_glmmfields( 70 | n_draws = 12, n_knots = 12, gp_theta = 2.5, 71 | gp_sigma = 0.2, sd_obs = 0.1 72 | ) 73 | 74 | # fit: 75 | # options(mc.cores = parallel::detectCores()) # for parallel processing 76 | m <- glmmfields(y ~ 0, 77 | data = s$dat, time = "time", 78 | lat = "lat", lon = "lon", 79 | nknots = 12, iter = 800, chains = 1 80 | ) 81 | 82 | # Predictions: 83 | # Link scale credible intervals: 84 | p <- predict(m, type = "link", interval = "confidence") 85 | head(p) 86 | 87 | # Prediction intervals on new observations (include observation error): 88 | p <- predictive_interval(m) 89 | head(p) 90 | 91 | # Posterior prediction draws: 92 | p <- posterior_predict(m, iter = 100) 93 | dim(p) # rows are iterations and columns are data elements 94 | 95 | # Draws from the linear predictor (not in link space): 96 | p <- posterior_linpred(m, iter = 100) 97 | dim(p) # rows are iterations and columns are data elements 98 | 99 | # Use the `tidy` method to extract parameter estimates as a data frame: 100 | head(tidy(m, conf.int = TRUE, conf.method = "HPDinterval")) 101 | 102 | # Make predictions on a fine-scale spatial grid: 103 | pred_grid <- expand.grid( 104 | lat = seq(min(s$dat$lat), max(s$dat$lat), length.out = 25), 105 | lon = seq(min(s$dat$lon), max(s$dat$lon), length.out = 25), 106 | time = unique(s$dat$time) 107 | ) 108 | pred_grid$prediction <- predict(m, 109 | newdata = pred_grid, type = "response", iter = 100, 110 | estimate_method = "median", offset = rep(0, nrow(pred_grid)) 111 | )$estimate 112 | 113 | ggplot(pred_grid, aes(lon, lat, fill = prediction)) + 114 | facet_wrap(~time) + 115 | geom_raster() + 116 | scale_fill_gradient2() 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /man/priors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/priors.R 3 | \name{student_t} 4 | \alias{student_t} 5 | \alias{half_t} 6 | \title{Student-t and half-t priors} 7 | \usage{ 8 | student_t(df = 3, location = 0, scale = 1) 9 | 10 | half_t(df = 3, location = 0, scale = 1) 11 | } 12 | \arguments{ 13 | \item{df}{Degrees of freedom parameter} 14 | 15 | \item{location}{Location parameter} 16 | 17 | \item{scale}{Scale parameter} 18 | } 19 | \description{ 20 | Student-t and half-t priors. Note that this can be used to represent an 21 | effectively normal distribution prior by setting the first argument (the 22 | degrees of freedom parameter) to a large value (roughly 50 or above). 23 | } 24 | \examples{ 25 | student_t(3, 0, 1) 26 | half_t(3, 0, 1) 27 | } 28 | -------------------------------------------------------------------------------- /man/sim_glmmfields.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sim.R 3 | \name{sim_glmmfields} 4 | \alias{sim_glmmfields} 5 | \title{Simulate a random field with a MVT distribution} 6 | \usage{ 7 | sim_glmmfields( 8 | n_knots = 15, 9 | n_draws = 10, 10 | gp_theta = 0.5, 11 | gp_sigma = 0.2, 12 | mvt = TRUE, 13 | df = 1e+06, 14 | seed = NULL, 15 | n_data_points = 100, 16 | sd_obs = 0.1, 17 | covariance = c("squared-exponential", "exponential", "matern"), 18 | matern_kappa = 0.5, 19 | obs_error = c("normal", "gamma", "poisson", "nb2", "binomial", "lognormal"), 20 | B = c(0), 21 | phi = 0, 22 | X = rep(1, n_draws * n_data_points), 23 | g = data.frame(lon = runif(n_data_points, 0, 10), lat = runif(n_data_points, 0, 10)) 24 | ) 25 | } 26 | \arguments{ 27 | \item{n_knots}{The number of knots} 28 | 29 | \item{n_draws}{The number of draws (for example, the number of years)} 30 | 31 | \item{gp_theta}{The Gaussian Process scale parameter} 32 | 33 | \item{gp_sigma}{The Gaussian Process variance parameter} 34 | 35 | \item{mvt}{Logical: MVT? (vs. MVN)} 36 | 37 | \item{df}{The degrees of freedom parameter for the MVT distribution} 38 | 39 | \item{seed}{The random seed value} 40 | 41 | \item{n_data_points}{The number of data points per draw} 42 | 43 | \item{sd_obs}{The observation process scale parameter} 44 | 45 | \item{covariance}{The covariance function of the Gaussian process 46 | ("squared-exponential", "exponential", "matern")} 47 | 48 | \item{matern_kappa}{The optional matern parameter. Can be 1.5 or 2.5. Values 49 | of 0.5 equivalent to exponential model.} 50 | 51 | \item{obs_error}{The observation error distribution} 52 | 53 | \item{B}{A vector of parameters. The first element is the intercept} 54 | 55 | \item{phi}{The auto regressive parameter on the mean of the random field knots} 56 | 57 | \item{X}{The model matrix} 58 | 59 | \item{g}{Grid of points} 60 | } 61 | \description{ 62 | Simulate a random field with a MVT distribution 63 | } 64 | \examples{ 65 | s <- sim_glmmfields(n_draws = 12, n_knots = 12, gp_theta = 1.5, 66 | gp_sigma = 0.2, sd_obs = 0.2) 67 | names(s) 68 | } 69 | -------------------------------------------------------------------------------- /man/stan_pars.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/stan_pars.R 3 | \name{stan_pars} 4 | \alias{stan_pars} 5 | \title{Return a vector of parameters} 6 | \usage{ 7 | stan_pars( 8 | obs_error, 9 | estimate_df = TRUE, 10 | est_temporalRE = FALSE, 11 | estimate_ar = FALSE, 12 | fixed_intercept = FALSE, 13 | save_log_lik = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{obs_error}{The observation error distribution} 18 | 19 | \item{estimate_df}{Logical indicating whether the degrees of freedom 20 | parameter should be estimated} 21 | 22 | \item{est_temporalRE}{Logical: estimate a random walk for the time variable?} 23 | 24 | \item{estimate_ar}{Logical indicating whether the ar 25 | parameter should be estimated} 26 | 27 | \item{fixed_intercept}{Should the intercept be fixed?} 28 | 29 | \item{save_log_lik}{Logical: should the log likelihood for each data point be 30 | saved so that information criteria such as LOOIC or WAIC can be calculated? 31 | Defaults to \code{FALSE} so that the size of model objects is smaller.} 32 | } 33 | \description{ 34 | Return a vector of parameters 35 | } 36 | -------------------------------------------------------------------------------- /man/tidy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{tidy} 4 | \alias{tidy} 5 | \alias{tidy.glmmfields} 6 | \title{Tidy model output} 7 | \usage{ 8 | tidy(x, ...) 9 | 10 | \method{tidy}{glmmfields}(x, ...) 11 | } 12 | \arguments{ 13 | \item{x}{Output from \code{\link[=glmmfields]{glmmfields()}}} 14 | 15 | \item{...}{Other arguments} 16 | } 17 | \description{ 18 | Tidy model output 19 | } 20 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(glmmfields) 3 | 4 | test_check("glmmfields") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-families.R: -------------------------------------------------------------------------------- 1 | test_that("families are parsed", { 2 | expect_equal(check_family(nbinom2(link = "log"))$family, "nbinom2") 3 | expect_equal(check_family(nbinom2(link = "log"))$link, "log") 4 | expect_equal(check_family(lognormal(link = "log"))$family, "lognormal") 5 | expect_equal(check_family(nbinom2(link = "log"))$link, "log") 6 | 7 | expect_error(check_family(gaussian(link = "aaa"))) 8 | expect_error(check_family(nbinom2(link = "aaa"))) 9 | expect_error(check_family(lognormal(link = "aaa"))) 10 | 11 | expect_error(check_family(quasibinomial(link = "logit"))) 12 | 13 | check_family(binomial(link = logit)) 14 | check_family(nbinom2(link = log)) 15 | check_family(lognormal(link = log)) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test-fit-ar-processes.R: -------------------------------------------------------------------------------- 1 | if (interactive()) options(mc.cores = parallel::detectCores()) 2 | 3 | ITER <- 600 4 | CHAINS <- 2 5 | SEED <- 9999 6 | TOL <- 0.1 # % 7 | TOL_df <- .25 # % 8 | n_data_points <- 50 9 | 10 | # ------------------------------------------------------ 11 | # a Gaussian observation model with random walk year effects 12 | 13 | test_that("mvt-norm estimates random walk year effects", { 14 | skip_on_cran() 15 | skip_on_travis() 16 | skip_on_appveyor() 17 | 18 | set.seed(SEED * 2) 19 | 20 | gp_sigma <- 0.2 21 | sigma <- 0.1 22 | df <- 1000 23 | gp_theta <- 1.8 24 | n_draws <- 12 25 | nknots <- 5 26 | year_sigma <- 0.5 27 | B <- vector(mode = "double", length = n_draws) 28 | B[1] <- 0 29 | for (i in 2:length(B)) { 30 | B[i] <- B[i - 1] + rnorm(1, 0, year_sigma) # random walk 31 | } 32 | 33 | s <- sim_glmmfields( 34 | df = df, n_draws = n_draws, gp_theta = gp_theta, 35 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, B = B, 36 | X = model.matrix(~a - 1, data.frame(a = gl(n_draws, 100))) 37 | ) 38 | # print(s$plot) 39 | # library(ggplot2); ggplot(s$dat, aes(time, y)) + geom_point() 40 | 41 | suppressWarnings({ 42 | m <- glmmfields(y ~ 0, 43 | data = s$dat, time = "time", 44 | lat = "lat", lon = "lon", nknots = nknots, 45 | iter = ITER, chains = CHAINS, seed = SEED, 46 | estimate_df = FALSE, fixed_df_value = df, year_re = TRUE, 47 | prior_intercept = student_t(999, 0, 5), control = list(adapt_delta = 0.9), 48 | prior_rw_sigma = half_t(1e6, 0, 1) 49 | ) 50 | }) 51 | m 52 | 53 | b <- tidy(m, estimate.method = "median") 54 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 55 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 56 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 57 | expect_equal(as.numeric(b[grep("yearEffects\\[*", b$term), "estimate", drop = TRUE]), B, tol = 0.1) 58 | expect_equal(as.numeric(b[grep("year_sigma", b$term), "estimate", drop = TRUE]), year_sigma, tol = 0.1) 59 | }) 60 | 61 | # ------------------------------------------------------ 62 | # a Gaussian observation model with random walk year effects with covariate 63 | 64 | test_that("mvt-norm estimates random walk year effects with covariate", { 65 | skip_on_cran() 66 | skip_on_travis() 67 | skip_on_appveyor() 68 | 69 | set.seed(SEED * 2) 70 | 71 | gp_sigma <- 0.2 72 | sigma <- 0.1 73 | df <- 1000 74 | gp_theta <- 1.8 75 | n_draws <- 12 76 | nknots <- 5 77 | year_sigma <- 0.5 78 | B <- vector(mode = "double", length = n_draws) 79 | B[1] <- 0 80 | for (i in 2:length(B)) { 81 | B[i] <- B[i - 1] + rnorm(1, 0, year_sigma) # random walk 82 | } 83 | 84 | cov_vec = rnorm(n_draws*100,0,1) 85 | model_matrix = model.matrix(~a - 1 + cov + cov2, 86 | data.frame(a = gl(n_draws, 100), cov=cov_vec, cov2=cov_vec^2)) 87 | 88 | s <- sim_glmmfields( 89 | df = df, n_draws = n_draws, gp_theta = gp_theta, 90 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, 91 | B = c(B, 3, -0.1), 92 | X = model_matrix) 93 | s$dat$cov = model_matrix[,"cov"] 94 | s$dat$cov2 = model_matrix[,"cov2"] 95 | # print(s$plot) 96 | # library(ggplot2); ggplot(s$dat, aes(time, y)) + geom_point() 97 | 98 | # include formula with the covariate and transformation 99 | suppressWarnings({ 100 | m <- glmmfields(y ~ -1+ cov + cov2, 101 | data = s$dat, time = "time", 102 | lat = "lat", lon = "lon", nknots = nknots, 103 | iter = ITER, chains = CHAINS, seed = SEED, 104 | estimate_df = FALSE, fixed_df_value = df, year_re = TRUE, 105 | prior_intercept = student_t(999, 0, 5), control = list(adapt_delta = 0.9), 106 | prior_rw_sigma = half_t(1e6, 0, 1) 107 | ) 108 | }) 109 | m 110 | 111 | b <- tidy(m, estimate.method = "median") 112 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 113 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 114 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 115 | expect_equal(as.numeric(b[grep("yearEffects\\[*", b$term), "estimate", drop = TRUE]), B, tol = 0.1) 116 | expect_equal(as.numeric(b[grep("year_sigma", b$term), "estimate", drop = TRUE]), year_sigma, tol = 0.1) 117 | }) 118 | 119 | # --------------- 120 | # AR process 121 | 122 | test_that("mvt-norm estimates ar process", { 123 | skip_on_cran() 124 | skip_on_travis() 125 | skip_on_appveyor() 126 | 127 | set.seed(SEED) 128 | 129 | gp_sigma <- 0.2 130 | sigma <- 0.1 131 | df <- 1000 132 | gp_theta <- 1.8 133 | n_draws <- 20 134 | nknots <- 7 135 | phi <- 0.5 136 | 137 | s <- sim_glmmfields( 138 | df = df, n_draws = n_draws, gp_theta = gp_theta, 139 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, phi = phi, 140 | n_data_points = 100 141 | ) 142 | # print(s$plot) 143 | # library(ggplot2); ggplot(s$dat, aes(time, y)) + 144 | # geom_point(alpha = 0.5, position = position_jitter(width = 0.2)) 145 | 146 | suppressWarnings({ 147 | m <- glmmfields(y ~ 0, 148 | data = s$dat, time = "time", 149 | lat = "lat", lon = "lon", nknots = nknots, 150 | iter = ITER, chains = CHAINS, seed = SEED, 151 | estimate_ar = TRUE 152 | ) 153 | }) 154 | m 155 | 156 | b <- tidy(m, estimate.method = "median") 157 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 158 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 159 | expect_equal(as.numeric(b[b$term == "phi[1]", "estimate", drop = TRUE]), phi, tol = phi * TOL) 160 | }) 161 | 162 | # ------------------- 163 | # AR + year random effects 164 | 165 | test_that("mvt-norm estimates ar process *with* year random walk effects", { 166 | skip_on_cran() 167 | skip_on_travis() 168 | skip_on_appveyor() 169 | 170 | set.seed(SEED) 171 | 172 | gp_sigma <- 0.2 173 | sigma <- 0.1 174 | df <- 1000 175 | gp_theta <- 1.8 176 | n_draws <- 20 177 | nknots <- 7 178 | phi <- 0.3 179 | B <- vector(mode = "double", length = n_draws) 180 | B[1] <- 0 181 | year_sigma <- 0.3 182 | for (i in 2:length(B)) { 183 | B[i] <- B[i - 1] + rnorm(1, 0, year_sigma) # random walk 184 | } 185 | 186 | s <- sim_glmmfields( 187 | df = df, n_draws = n_draws, gp_theta = gp_theta, 188 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, phi = phi, 189 | B = B, X = model.matrix(~a - 1, data.frame(a = gl(n_draws, n_data_points))), 190 | n_data_points = n_data_points 191 | ) 192 | # print(s$plot) 193 | # library(ggplot2); ggplot(s$dat, aes(time, y)) + 194 | # geom_point(alpha = 0.5, position = position_jitter(width = 0.2)) 195 | 196 | suppressWarnings({ 197 | m <- glmmfields(y ~ 0, 198 | data = s$dat, time = "time", 199 | lat = "lat", lon = "lon", nknots = nknots, 200 | iter = ITER, chains = CHAINS, seed = SEED, 201 | year_re = TRUE, 202 | estimate_ar = TRUE 203 | ) 204 | }) 205 | m 206 | 207 | TOL <- 0.15 208 | b <- tidy(m, estimate.method = "median") 209 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 210 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 211 | expect_equal(as.numeric(b[b$term == "year_sigma[1]", "estimate", drop = TRUE]), year_sigma, tol = year_sigma * 0.2) 212 | expect_equal(as.numeric(b[b$term == "phi[1]", "estimate", drop = TRUE]), phi, tol = phi * TOL) 213 | expect_equal(as.numeric(b[b$term == "phi[1]", "estimate", drop = TRUE]), phi, tol = phi * TOL) 214 | }) 215 | 216 | # ------------------- 217 | # AR fixed + year random effects + covariate. These checks fail when phi 218 | # is estimated, because the phi parameter in the spatial field is 219 | # confounded with the AR(1) year random effects. Here phi is fixed. 220 | 221 | test_that("mvt-norm estimates ar process *with* year random walk effects and covariate", { 222 | skip_on_cran() 223 | skip_on_travis() 224 | skip_on_appveyor() 225 | 226 | set.seed(SEED) 227 | 228 | gp_sigma <- 0.2 229 | sigma <- 0.1 230 | df <- 1000 231 | gp_theta <- 1.8 232 | n_draws <- 20 233 | nknots <- 7 234 | phi <- 0.7 235 | B <- vector(mode = "double", length = n_draws) 236 | B[1] <- 0 237 | year_sigma <- 0.3 238 | for (i in 2:length(B)) { 239 | B[i] <- B[i - 1] + rnorm(1, 0, year_sigma) # random walk 240 | } 241 | 242 | cov_vec = rnorm(n_draws*n_data_points,0,1) 243 | model_matrix = model.matrix(~a - 1 + cov + cov2, 244 | data.frame(a = gl(n_draws, n_data_points), cov=cov_vec, cov2=cov_vec^2)) 245 | 246 | s <- sim_glmmfields( 247 | df = df, n_draws = n_draws, gp_theta = gp_theta, 248 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, phi = phi, 249 | B = c(B, 3, -0.1), X = model_matrix, n_data_points = n_data_points 250 | ) 251 | # print(s$plot) 252 | # library(ggplot2); ggplot(s$dat, aes(time, y)) + 253 | # geom_point(alpha = 0.5, position = position_jitter(width = 0.2)) 254 | 255 | # create dummy covariate 256 | s$dat$cov = model_matrix[,"cov"] 257 | s$dat$cov2 = model_matrix[,"cov2"] 258 | 259 | # include formula with the covariate and transformation 260 | suppressWarnings({ 261 | m <- glmmfields(y ~ -1 + cov + cov2, 262 | data = s$dat, time = "time", 263 | lat = "lat", lon = "lon", nknots = nknots, 264 | iter = ITER, chains = CHAINS, seed = SEED, 265 | year_re = TRUE, fixed_phi_value = phi, 266 | estimate_ar = FALSE 267 | ) 268 | }) 269 | m 270 | 271 | TOL <- 0.15 272 | b <- tidy(m, estimate.method = "median") 273 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 274 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 275 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 276 | expect_equal(as.numeric(b[grep("yearEffects\\[*", b$term), "estimate", drop = TRUE]), B, tol = 0.1) 277 | #expect_equal(b[b$term == "year_sigma[1]", "estimate"], year_sigma, tol = 0.1) 278 | #expect_equal(b[b$term == "phi[1]", "estimate"], phi, tol = phi * TOL) 279 | }) 280 | 281 | # -------------------------- 282 | # global int + AR RF 283 | 284 | test_that("mvt-norm estimates global int + AR RF", { 285 | skip_on_cran() 286 | skip_on_travis() 287 | skip_on_appveyor() 288 | 289 | set.seed(SEED * 2) 290 | 291 | gp_sigma <- 0.2 292 | sigma <- 0.2 293 | df <- 1000 294 | gp_theta <- 1.8 295 | n_draws <- 20 296 | nknots <- 10 297 | phi <- 0.75 298 | B <- vector(mode = "double", length = n_draws) 299 | B[1] <- 6 300 | year_sigma <- 0.0001 301 | for (i in 2:length(B)) { 302 | B[i] <- B[i - 1] + rnorm(1, 0, year_sigma) # random walk 303 | } 304 | 305 | s <- sim_glmmfields( 306 | df = df, n_draws = n_draws, gp_theta = gp_theta, 307 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, phi = phi, 308 | B = B, X = model.matrix(~a - 1, data.frame(a = gl(n_draws, n_data_points))), 309 | n_data_points = n_data_points 310 | ) 311 | # print(s$plot) 312 | # library(ggplot2); ggplot(s$dat, aes(time, y)) + 313 | # geom_point(alpha = 0.5, position = position_jitter(width = 0.2)) 314 | 315 | suppressWarnings({ 316 | m <- glmmfields(y ~ 1, 317 | data = s$dat, time = "time", 318 | lat = "lat", lon = "lon", nknots = nknots, 319 | iter = ITER, chains = CHAINS, seed = SEED, 320 | year_re = FALSE, 321 | control = list(adapt_delta = 0.95), 322 | estimate_ar = TRUE, prior_intercept = student_t(99, 0, 30) 323 | ) 324 | }) 325 | m 326 | 327 | b <- tidy(m, estimate.method = "median") 328 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 329 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 330 | expect_equal(as.numeric(b[b$term == "phi[1]", "estimate", drop = TRUE]), phi, tol = phi * TOL) 331 | }) 332 | 333 | # -------------------------- 334 | # many ints + fixed AR 335 | 336 | test_that("mvt-norm estimates many ints + fixed AR", { 337 | skip_on_cran() 338 | skip_on_travis() 339 | skip_on_appveyor() 340 | 341 | set.seed(SEED * 5) 342 | 343 | gp_sigma <- 0.2 344 | sigma <- 0.2 345 | df <- 6 346 | gp_theta <- 1.8 347 | n_draws <- 20 348 | nknots <- 10 349 | phi <- 1 350 | B <- vector(mode = "double", length = n_draws) 351 | B[1] <- 6 352 | year_sigma <- 0.4 353 | for (i in 2:length(B)) { 354 | B[i] <- B[i - 1] + rnorm(1, 0, year_sigma) # random walk 355 | } 356 | 357 | # plot(B) 358 | s <- sim_glmmfields( 359 | df = df, n_draws = n_draws, gp_theta = gp_theta, 360 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, phi = phi, 361 | B = B, X = model.matrix(~a - 1, data.frame(a = gl(n_draws, n_data_points))), 362 | n_data_points = n_data_points 363 | ) 364 | # print(s$plot) 365 | 366 | library(dplyr) 367 | means <- group_by(s$dat, time) %>% summarise(m = mean(y)) 368 | plot(B, pch = 19) 369 | points(means$m, col = "red") 370 | 371 | library(ggplot2) 372 | ggplot(s$dat, aes(time, y)) + 373 | geom_point(alpha = 0.5, position = position_jitter(width = 0.2)) + 374 | geom_point( 375 | data = data.frame(B = B, time = seq_len(n_draws)), 376 | aes(x = time, y = B), inherit.aes = FALSE, col = "red" 377 | ) 378 | 379 | suppressWarnings({ 380 | m <- glmmfields(y ~ 0 + as.factor(time), 381 | data = s$dat, 382 | time = "time", 383 | lat = "lat", lon = "lon", nknots = nknots, 384 | iter = ITER, chains = CHAINS, seed = SEED, 385 | fixed_df_value = 6, estimate_df = FALSE, 386 | estimate_ar = FALSE, fixed_phi_value = 1, prior_intercept = student_t(99, 0, 30) 387 | ) 388 | }) 389 | m 390 | 391 | suppressWarnings({ 392 | m2 <- glmmfields(y ~ -1, 393 | data = s$dat, 394 | time = "time", 395 | lat = "lat", lon = "lon", nknots = nknots, 396 | iter = ITER, chains = CHAINS, seed = SEED, 397 | fixed_df_value = 6, estimate_df = FALSE, 398 | estimate_ar = FALSE, fixed_phi_value = 1, prior_intercept = student_t(99, 0, 30) 399 | ) 400 | }) 401 | m2 402 | 403 | b <- tidy(m, estimate.method = "median") 404 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 405 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 406 | 407 | B_hat <- subset(b, grepl("B", term)) 408 | expect_equal(B, as.numeric(B_hat$estimate), tol = TOL) 409 | 410 | library(dplyr) 411 | q <- subset(b, grepl("B", term)) 412 | qq <- group_by(s$dat, time) %>% summarise(m = mean(y)) 413 | plot(q$estimate, col = "blue") 414 | points(seq_len(length(B)), B, pch = 19) 415 | points(qq$m, col = "red") 416 | }) 417 | -------------------------------------------------------------------------------- /tests/testthat/test-fit-basic.R: -------------------------------------------------------------------------------- 1 | if (interactive()) options(mc.cores = parallel::detectCores()) 2 | 3 | # expect_parameter <- function(x, stan_term, true_value, tolerance) { 4 | # expect_equal(x[x[,term] == stan_term, "estimate"], true_value, tol = tolerance) 5 | # } 6 | 7 | ITER <- 600 8 | CHAINS <- 2 9 | SEED <- 9999 10 | TOL <- 0.2 # % 11 | TOL_df <- .25 # % 12 | 13 | # ------------------------------------------------------ 14 | # a basic fit 15 | 16 | gp_sigma <- 0.2 17 | sigma <- 0.1 18 | df <- 4 19 | gp_theta <- 1.2 20 | n_draws <- 15 21 | nknots <- 7 22 | n_data_points <- 50 23 | 24 | # ------------------------------------------------------ 25 | # with repeat stations 26 | 27 | test_that("mvt-norm model fits with repeat stations (plus other main functions)", { 28 | skip_on_cran() 29 | set.seed(SEED) 30 | 31 | s <- sim_glmmfields( 32 | df = df, n_draws = n_draws, gp_theta = gp_theta, 33 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, n_data_points = n_data_points 34 | ) 35 | # s$plot 36 | 37 | suppressWarnings({ 38 | m <- glmmfields(y ~ 0, 39 | data = s$dat, time = "time", 40 | lat = "lat", lon = "lon", nknots = nknots, 41 | iter = ITER, chains = CHAINS, seed = SEED, 42 | estimate_df = FALSE, fixed_df_value = df 43 | ) 44 | }) 45 | 46 | expect_output(print(m), "Inference for Stan model") 47 | 48 | p <- predict(m) 49 | pp <- predict(m, type = "response", interval = "prediction") 50 | # plot(s$dat$y, p$estimate) 51 | # segments(s$dat$y, pp$conf_low, s$dat$y, pp$conf_high, lwd = 0.5, col = "#00000020") 52 | # segments(s$dat$y, p$conf_low, s$dat$y, p$conf_high, lwd = 1, col = "#00000060") 53 | # abline(a = 0, b = 1) 54 | 55 | expect_equal(mean((p$estimate - s$dat$y)^2), 0, tol = 0.01) 56 | 57 | plot(m) 58 | plot(m, type = "spatial-residual") 59 | plot(m, type = "residual-vs-fitted") 60 | 61 | coverage <- mean(s$dat$y > pp$conf_low & s$dat$y < pp$conf_high) 62 | expect_equal(coverage, 0.95, tol = 0.025) 63 | 64 | b <- tidy(m, estimate.method = "median") 65 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 66 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 67 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 68 | }) 69 | 70 | # ------------------------------------------------------ 71 | # a Gaussian observation model exponential covariance function 72 | 73 | test_that("mvt-norm model fits with an exponential covariance function", { 74 | skip_on_cran() 75 | skip_on_travis() 76 | skip_on_appveyor() 77 | 78 | gp_sigma <- 0.2 79 | sigma <- 0.1 80 | df <- 10 81 | gp_theta <- 1.2 82 | n_draws <- 4 83 | nknots <- 9 84 | 85 | set.seed(SEED) 86 | s <- sim_glmmfields( 87 | df = df, n_draws = n_draws, gp_theta = gp_theta, 88 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, n_data_points = n_data_points, 89 | covariance = "exponential" 90 | ) 91 | # print(s$plot) 92 | 93 | suppressWarnings({ 94 | m <- glmmfields(y ~ 1, 95 | data = s$dat, time = "time", 96 | lat = "lat", lon = "lon", nknots = nknots, 97 | iter = ITER, chains = CHAINS, seed = SEED, 98 | estimate_df = FALSE, fixed_df_value = df, 99 | covariance = "exponential" 100 | ) 101 | }) 102 | 103 | b <- tidy(m, estimate.method = "median") 104 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 105 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 106 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 107 | }) 108 | 109 | # ------------------------------------------------------ 110 | # a Gaussian observation model matern covariance function 111 | 112 | test_that("mvn-norm model fits with an matern covariance function", { 113 | skip_on_cran() 114 | skip_on_travis() 115 | skip_on_appveyor() 116 | 117 | gp_sigma <- 0.2 118 | sigma <- 0.1 119 | df <- 10 120 | gp_theta <- 1.2 121 | n_draws <- 4 122 | nknots <- 9 123 | matern_kappa <- 1.5 124 | 125 | set.seed(SEED) 126 | s <- sim_glmmfields( 127 | df = df, n_draws = n_draws, gp_theta = gp_theta, 128 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, n_data_points = n_data_points, 129 | covariance = "matern", matern_kappa = matern_kappa 130 | ) 131 | # print(s$plot) 132 | 133 | 134 | suppressWarnings({ 135 | m <- glmmfields(y ~ 1, 136 | data = s$dat, time = "time", 137 | lat = "lat", lon = "lon", nknots = nknots, 138 | iter = ITER, chains = CHAINS, seed = SEED, 139 | estimate_df = FALSE, fixed_df_value = df, 140 | covariance = "matern", matern_kappa = matern_kappa 141 | ) 142 | }) 143 | 144 | b <- tidy(m, estimate.method = "median") 145 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 146 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 147 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 148 | }) 149 | 150 | test_that("predictions work with one time slice", { 151 | skip_on_cran() 152 | skip_on_travis() 153 | skip_on_appveyor() 154 | set.seed(SEED) 155 | 156 | s <- sim_glmmfields( 157 | df = df, n_draws = 1, gp_theta = gp_theta, 158 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, n_data_points = n_data_points 159 | ) 160 | 161 | suppressWarnings({ 162 | m <- glmmfields(y ~ 0, 163 | data = s$dat, time = "time", 164 | lat = "lat", lon = "lon", nknots = nknots, 165 | iter = ITER, chains = CHAINS, seed = SEED, 166 | estimate_df = FALSE, fixed_df_value = df 167 | ) 168 | }) 169 | 170 | p <- predict(m) 171 | }) 172 | 173 | # ------------------------------------------------- 174 | # make sure large degrees of freedom values 175 | # return values very close to the true MVN distribution 176 | 177 | test_that("true MVN model closely resembles MVT model with a large fixed df", { 178 | skip_on_cran() 179 | skip_on_travis() 180 | skip_on_appveyor() 181 | 182 | gp_sigma <- 0.2 183 | sigma <- 0.1 184 | gp_theta <- 1.2 185 | n_draws <- 4 186 | nknots <- 9 187 | 188 | set.seed(SEED) 189 | s <- sim_glmmfields( 190 | n_draws = n_draws, gp_theta = gp_theta, 191 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, 192 | df = 800, n_data_points = n_data_points 193 | ) 194 | 195 | suppressWarnings({ 196 | m_mvt <- glmmfields(y ~ 1, 197 | data = s$dat, time = "time", 198 | lat = "lat", lon = "lon", nknots = nknots, 199 | iter = ITER, chains = CHAINS, seed = SEED, 200 | estimate_df = FALSE, fixed_df_value = 800 201 | ) 202 | }) 203 | 204 | suppressWarnings({ 205 | m_mvn <- glmmfields(y ~ 1, 206 | data = s$dat, time = "time", 207 | lat = "lat", lon = "lon", nknots = nknots, 208 | iter = ITER, chains = CHAINS, seed = SEED, 209 | estimate_df = FALSE, fixed_df_value = 1e9 210 | ) # internally switched to true MVN 211 | }) 212 | 213 | b_mvt <- tidy(m_mvt, estimate.method = "median") 214 | b_mvn <- tidy(m_mvn, estimate.method = "median") 215 | expect_equal(as.numeric(b_mvn$estimate), as.numeric(b_mvt$estimate), tol = 0.02) 216 | }) 217 | 218 | # ------------------------------------------------------------------- 219 | 220 | test_that("A basic model fits with missing time elements", { 221 | skip_on_cran() 222 | skip_on_travis() 223 | skip_on_appveyor() 224 | 225 | gp_sigma <- 0.2 226 | sigma <- 0.1 227 | df <- 10 228 | gp_theta <- 1.2 229 | n_draws <- 10 230 | nknots <- 9 231 | 232 | set.seed(SEED * 2) 233 | s <- sim_glmmfields( 234 | df = df, n_draws = n_draws, gp_theta = gp_theta, 235 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, n_data_points = n_data_points, 236 | covariance = "squared-exponential" 237 | ) 238 | # print(s$plot) 239 | 240 | s$dat <- s$dat[s$dat$time != 4, , drop = FALSE] 241 | 242 | suppressWarnings({ 243 | m <- glmmfields(y ~ 1, 244 | data = s$dat, time = "time", 245 | lat = "lat", lon = "lon", nknots = nknots, 246 | iter = ITER, chains = CHAINS, seed = SEED, 247 | estimate_df = FALSE, fixed_df_value = df, 248 | covariance = "squared-exponential" 249 | ) 250 | }) 251 | 252 | b <- tidy(m, estimate.method = "median") 253 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 254 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL * 1.5) 255 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 256 | }) 257 | 258 | 259 | test_that("A offset in formula works", { 260 | skip_on_cran() 261 | skip_on_travis() 262 | skip_on_appveyor() 263 | 264 | gp_sigma <- 0.2 265 | sigma <- 0.1 266 | df <- 10 267 | gp_theta <- 1.2 268 | n_draws <- 2 269 | nknots <- 9 270 | 271 | set.seed(SEED * 2) 272 | s <- sim_glmmfields( 273 | df = df, n_draws = n_draws, gp_theta = gp_theta, 274 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, n_data_points = n_data_points, 275 | covariance = "squared-exponential" 276 | ) 277 | # print(s$plot) 278 | s$dat$offset <- rnorm(nrow(s$dat), mean = 0, sd = 0.1) 279 | 280 | suppressWarnings({ 281 | m <- glmmfields(y ~ 1, 282 | data = s$dat, time = "time", 283 | lat = "lat", lon = "lon", nknots = nknots, 284 | iter = ITER, chains = CHAINS, seed = SEED, 285 | estimate_df = FALSE, fixed_df_value = df, 286 | covariance = "squared-exponential" 287 | ) 288 | }) 289 | suppressWarnings({ 290 | m_offset <- glmmfields(y ~ 1, offset = s$dat$offset, 291 | data = s$dat, time = "time", 292 | lat = "lat", lon = "lon", nknots = nknots, 293 | iter = ITER, chains = CHAINS, seed = SEED, 294 | estimate_df = FALSE, fixed_df_value = df, 295 | covariance = "squared-exponential" 296 | ) 297 | }) 298 | b <- tidy(m, estimate.method = "median") 299 | b_offset <- tidy(m_offset, estimate.method = "median") 300 | 301 | p1 <- predict(m_offset) 302 | p2 <- predict(m_offset, newdata = s$dat, offset = s$dat$offset) 303 | expect_identical(p1, p2) 304 | # expect_error(p3 <- predict(m_offset, newdata = s$dat), regexp = "offset") 305 | }) 306 | -------------------------------------------------------------------------------- /tests/testthat/test-fit-observation-models.R: -------------------------------------------------------------------------------- 1 | if (interactive()) options(mc.cores = parallel::detectCores()) 2 | 3 | ITER <- 600 4 | CHAINS <- 2 5 | SEED <- 9999 6 | TOL <- 0.25 # % 7 | TOL_df <- .25 # % 8 | 9 | nknots <- 10 10 | gp_sigma <- 0.3 11 | 12 | # ------------------------------------------------------ 13 | # a negative binomial model 14 | 15 | test_that("mvt-nb2 model fits", { 16 | skip_on_cran() 17 | skip_on_travis() 18 | skip_on_appveyor() 19 | set.seed(SEED) 20 | 21 | sigma <- 8 22 | df <- 5 23 | b0 <- 7 24 | n_draws <- 8 25 | gp_theta <- 1.6 26 | 27 | s <- sim_glmmfields( 28 | df = df, n_draws = n_draws, gp_theta = gp_theta, 29 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, 30 | obs_error = "nb2", B = b0 31 | ) 32 | # print(s$plot) 33 | 34 | suppressWarnings({ 35 | m <- glmmfields(y ~ 1, 36 | data = s$dat, time = "time", 37 | lat = "lat", lon = "lon", nknots = nknots, 38 | iter = ITER, chains = CHAINS, family = nbinom2(link = "log"), 39 | estimate_df = FALSE, fixed_df_value = df, 40 | control = list(adapt_delta = 0.9), seed = SEED 41 | ) 42 | }) 43 | 44 | p <- predict(m) 45 | 46 | b <- tidy(m, estimate.method = "median") 47 | # expect_equal(b[b$term == "nb2_phi[1]", "estimate", drop = TRUE], sigma, tol = sigma * TOL) 48 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 49 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 50 | expect_equal(as.numeric(b[b$term == "B[1]", "estimate", drop = TRUE]), b0, tol = gp_theta * TOL) 51 | }) 52 | 53 | # ------------------------------------------------------ 54 | # a Gamma observation model 55 | 56 | test_that("mvt-gamma model fits", { 57 | skip_on_cran() 58 | skip_on_travis() 59 | skip_on_appveyor() 60 | 61 | set.seed(SEED) 62 | 63 | sigma <- 0.3 64 | df <- 10 65 | b0 <- 2 66 | n_draws <- 15 67 | gp_theta <- 1.6 68 | 69 | s <- sim_glmmfields( 70 | df = df, n_draws = n_draws, gp_theta = gp_theta, 71 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, 72 | obs_error = "gamma", B = b0 73 | ) 74 | # print(s$plot) 75 | 76 | suppressWarnings({ 77 | m <- glmmfields(y ~ 1, 78 | data = s$dat, time = "time", 79 | lat = "lat", lon = "lon", nknots = nknots, 80 | iter = ITER, chains = CHAINS, family = Gamma(link = "log"), 81 | estimate_df = FALSE, fixed_df_value = df, seed = SEED 82 | ) 83 | }) 84 | 85 | p <- predict(m) 86 | 87 | b <- tidy(m, estimate.method = "median") 88 | expect_equal(as.numeric(b[b$term == "CV[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 89 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 90 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 91 | expect_equal(as.numeric(b[b$term == "B[1]", "estimate", drop = TRUE]), b0, tol = gp_theta * TOL) 92 | }) 93 | 94 | # ------------------------------------------------------ 95 | # a binomial observation model 96 | 97 | test_that("mvt-binomial model fits", { 98 | skip_on_cran() 99 | skip_on_travis() 100 | skip_on_appveyor() 101 | 102 | set.seed(SEED) 103 | 104 | nknots <- 12 105 | gp_sigma <- 1.2 106 | df <- 10 107 | b0 <- 0 108 | n_draws <- 15 109 | gp_theta <- 2.1 110 | 111 | s <- sim_glmmfields( 112 | df = df, n_draws = n_draws, gp_theta = gp_theta, 113 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, 114 | obs_error = "binomial", B = b0 115 | ) 116 | # print(s$plot) 117 | # out <- reshape2::melt(s$proj) 118 | # names(out) <- c("time", "pt", "y") 119 | # out <- dplyr::arrange_(out, "time", "pt") 120 | # out$lon <- s$dat$lon 121 | # out$lat <- s$dat$lat 122 | # ggplot2::ggplot(out, ggplot2::aes(lon, lat, colour = plogis(y))) + ggplot2::geom_point() + 123 | # ggplot2::facet_wrap(~time) 124 | 125 | suppressWarnings({ 126 | m <- glmmfields(y ~ 0, 127 | data = s$dat, time = "time", 128 | lat = "lat", lon = "lon", nknots = nknots, 129 | iter = ITER, chains = CHAINS, family = binomial(link = "logit"), 130 | estimate_df = FALSE, fixed_df_value = df, seed = SEED 131 | ) 132 | }) 133 | # m 134 | # 135 | # p <- predict(m) 136 | # p <- predict(m, interval = "prediction") 137 | 138 | b <- tidy(m, estimate.method = "median") 139 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 140 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 141 | }) 142 | 143 | # ------------------------------------------------------ 144 | # a poisson observation model 145 | 146 | test_that("mvt-poisson model fits", { 147 | skip_on_cran() 148 | skip_on_travis() 149 | skip_on_appveyor() 150 | 151 | set.seed(SEED) 152 | 153 | nknots <- 12 154 | gp_sigma <- 0.8 155 | df <- 10 156 | b0 <- 3 157 | n_draws <- 15 158 | gp_theta <- 2.1 159 | 160 | s <- sim_glmmfields( 161 | df = df, n_draws = n_draws, gp_theta = gp_theta, 162 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, 163 | obs_error = "poisson", B = b0 164 | ) 165 | # print(s$plot) 166 | # hist(s$dat$y) 167 | 168 | suppressWarnings({ 169 | m <- glmmfields(y ~ 1, 170 | data = s$dat, time = "time", 171 | lat = "lat", lon = "lon", nknots = nknots, 172 | iter = ITER, chains = CHAINS, family = poisson(link = "log"), 173 | estimate_df = FALSE, fixed_df_value = df, seed = SEED 174 | ) 175 | }) 176 | m 177 | 178 | b <- tidy(m, estimate.method = "median") 179 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 180 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 181 | expect_equal(as.numeric(b[b$term == "B[1]", "estimate", drop = TRUE]), b0, tol = gp_theta * TOL) 182 | }) 183 | -------------------------------------------------------------------------------- /tests/testthat/test-fit-with-predictors.R: -------------------------------------------------------------------------------- 1 | if (interactive()) options(mc.cores = parallel::detectCores()) 2 | 3 | ITER <- 600 4 | CHAINS <- 2 5 | SEED <- 9999 6 | TOL <- 0.2 # % 7 | TOL_df <- .25 # % 8 | 9 | # ------------------------------------------------------ 10 | # a Gaussian observation model with factor-level predictors for years 11 | 12 | test_that("mvt-norm estimates betas", { 13 | skip_on_cran() 14 | skip_on_travis() 15 | skip_on_appveyor() 16 | 17 | gp_sigma <- 0.2 18 | sigma <- 0.1 19 | df <- 10 20 | gp_theta <- 1.2 21 | n_draws <- 15 22 | nknots <- 9 23 | set.seed(SEED) 24 | B <- rnorm(n_draws, 0, 1) 25 | 26 | s <- sim_glmmfields( 27 | df = df, n_draws = n_draws, gp_theta = gp_theta, 28 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, B = B, 29 | X = model.matrix(~a - 1, data.frame(a = gl(n_draws, 100))) 30 | ) 31 | # print(s$plot) 32 | # library(ggplot2); ggplot(s$dat, aes(time, y)) + geom_point() 33 | 34 | suppressWarnings({ 35 | m <- glmmfields(y ~ as.factor(time) - 1, 36 | data = s$dat, time = "time", 37 | lat = "lat", lon = "lon", nknots = nknots, 38 | iter = ITER, chains = CHAINS, seed = SEED, 39 | estimate_df = FALSE, fixed_df_value = df, 40 | prior_beta = student_t(50, 0, 2) 41 | ) 42 | }) 43 | 44 | b <- tidy(m, estimate.method = "median") 45 | expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), sigma, tol = sigma * TOL) 46 | expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), gp_sigma, tol = gp_sigma * TOL) 47 | expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), gp_theta, tol = gp_theta * TOL) 48 | expect_equal(as.numeric(b[grep("B\\[*", b$term), "estimate", drop = TRUE]), B, tol = 0.05) 49 | }) 50 | -------------------------------------------------------------------------------- /tests/testthat/test-format-data.R: -------------------------------------------------------------------------------- 1 | test_that("format_data() formats some data", { 2 | s <- sim_glmmfields(n_data_points = 50, n_knots = 5, n_draws = 2) 3 | mf <- model.frame(y ~ 1, s$dat) 4 | X <- model.matrix(y ~ 1, mf) 5 | y <- model.response(mf, "numeric") 6 | f <- format_data(s$dat, y = y, X = X, time = "time", nknots = 5) 7 | 8 | expect_equal(nrow(f$knots), 5L) 9 | expect_equal(dim(f$spatglm_data$distKnots), c(5L, 5L)) 10 | }) 11 | -------------------------------------------------------------------------------- /tests/testthat/test-predict.R: -------------------------------------------------------------------------------- 1 | if (interactive()) options(mc.cores = parallel::detectCores()) 2 | 3 | ITER <- 600 4 | CHAINS <- 2 5 | SEED <- 9999 6 | TOL <- 0.2 # % 7 | TOL_df <- .25 # % 8 | 9 | gp_sigma <- 0.2 10 | sigma <- 0.1 11 | df <- 1000 12 | gp_theta <- 1.2 13 | n_draws <- 15 14 | nknots <- 8 15 | n_data_points <- 50 16 | 17 | test_that("predict.glmmfields works", { 18 | skip_on_cran() 19 | skip_on_travis() 20 | skip_on_appveyor() 21 | set.seed(SEED) 22 | 23 | s <- sim_glmmfields( 24 | df = df, n_draws = n_draws, gp_theta = gp_theta, 25 | gp_sigma = gp_sigma, sd_obs = sigma, n_knots = nknots, n_data_points = n_data_points 26 | ) 27 | 28 | suppressWarnings({ 29 | m <- glmmfields(y ~ 0, 30 | data = s$dat, time = "time", 31 | lat = "lat", lon = "lon", nknots = nknots, 32 | iter = ITER, chains = CHAINS, seed = SEED, 33 | estimate_df = FALSE, fixed_df_value = df 34 | ) 35 | }) 36 | 37 | p <- predict(m) 38 | p_newdata <- predict(m, newdata = s$dat, offset = rep(0, nrow(s$dat))) 39 | p_newdata2 <- predict(m, newdata = m$data, offset = rep(0, nrow(s$dat))) 40 | 41 | plot(s$dat$y, p$estimate) 42 | plot(s$dat$y, p_newdata$estimate) 43 | plot(s$dat$y, p_newdata2$estimate) 44 | 45 | expect_identical(p, p_newdata) 46 | 47 | expect_gte(cor(s$dat$y, p$estimate), 0.75) 48 | expect_gte(cor(s$dat$y, p_newdata$estimate), 0.75) 49 | expect_gte(cor(s$dat$y, p_newdata2$estimate), 0.75) 50 | 51 | # with a subset of data 52 | random_subset <- sample(seq_len(nrow(s$dat)), size = 200) 53 | p_newdata <- predict(m, newdata = s$dat[random_subset, ], 54 | offset = rep(0, nrow(s$dat[random_subset, ]))) 55 | plot(s$dat$y[random_subset], p_newdata$estimate) 56 | expect_gte(cor(s$dat$y[random_subset], p_newdata$estimate), 0.75) 57 | 58 | nd <- s$dat 59 | nd$y <- NULL 60 | p <- predict(m, newdata = nd, offset = rep(0, nrow(nd))) 61 | }) 62 | -------------------------------------------------------------------------------- /tests/testthat/test-priors.R: -------------------------------------------------------------------------------- 1 | test_that("priors parse", { 2 | expect_equal(parse_t_prior(student_t(3, 0, 1)), c(3, 0, 1)) 3 | expect_error(student_t(-99, 0, 1)) 4 | expect_error(student_t(3, 0, -99)) 5 | expect_warning(half_t(3, -99, 1)) 6 | }) 7 | -------------------------------------------------------------------------------- /tests/testthat/test-stations.R: -------------------------------------------------------------------------------- 1 | set.seed(42) 2 | 3 | s <- sim_glmmfields( 4 | df = 1000, n_draws = 2, gp_theta = 1.5, 5 | gp_sigma = 0.3, sd_obs = 0.1, n_knots = 8, n_data_points = 30 6 | ) 7 | 8 | test_that("Stations in second time slice can be in different order from first time slice", { 9 | skip_on_cran() 10 | skip_on_travis() 11 | skip_on_appveyor() 12 | 13 | d <- s$dat 14 | d$ID <- seq_len(nrow(d)) 15 | suppressWarnings({ 16 | m <- glmmfields(y ~ 0, 17 | data = d, time = "time", 18 | lat = "lat", lon = "lon", nknots = 8, 19 | iter = 400, chains = 2, seed = 1 20 | ) 21 | }) 22 | d$pred <- predict(m)$estimate 23 | 24 | d2 <- d 25 | d2[d2$time == 2, ] <- d2[d2$time == 2, ][sample(seq_len(30), size = 30), ] # scramble time 2 26 | suppressWarnings({ 27 | m2 <- glmmfields(y ~ 0, 28 | data = d2, time = "time", 29 | lat = "lat", lon = "lon", nknots = 8, 30 | iter = 400, chains = 2, seed = 1 31 | ) 32 | }) 33 | d2$pred <- predict(m2)$estimate 34 | d2 <- dplyr::arrange(d2, ID) 35 | 36 | plot(d2$pred, d$pred) 37 | expect_equal(d2$pred, d$pred, tolerance = 0.000001) 38 | }) 39 | 40 | test_that("Stations in second time slice introduce new stations", { 41 | skip_on_cran() 42 | skip_on_travis() 43 | skip_on_appveyor() 44 | 45 | d <- s$dat 46 | d$ID <- seq_len(nrow(d)) 47 | suppressWarnings({ 48 | m <- glmmfields(y ~ 0, 49 | data = d, time = "time", 50 | lat = "lat", lon = "lon", nknots = 8, 51 | iter = 800, chains = 2, seed = 1 52 | ) 53 | }) 54 | 55 | d2 <- d[-c(2, 10), ] 56 | suppressWarnings({ 57 | m2 <- glmmfields(y ~ 0, 58 | data = d2, time = "time", 59 | lat = "lat", lon = "lon", nknots = 8, 60 | iter = 800, chains = 2, seed = 1 61 | ) 62 | }) 63 | d2$pred <- predict(m2)$estimate 64 | d$pred <- predict(m)$estimate 65 | d <- dplyr::filter(d, ID %in% d2$ID) 66 | 67 | plot(d2$pred, d$pred) 68 | expect_equal(d2$pred, d$pred, tolerance = .02) 69 | }) 70 | 71 | test_that("Ordering of time slices doesn't matter if stations aren't always present", { 72 | skip_on_cran() 73 | skip_on_travis() 74 | skip_on_appveyor() 75 | 76 | d <- s$dat 77 | d$ID <- seq_len(nrow(d)) 78 | d <- d[-c(2, 10), ] 79 | 80 | suppressWarnings({ 81 | m <- glmmfields(y ~ 0, 82 | data = d, time = "time", 83 | lat = "lat", lon = "lon", nknots = 8, 84 | iter = 800, chains = 2, seed = 1, cores = 1 85 | ) 86 | }) 87 | sd <- m$stan_data 88 | 89 | d2 <- rbind(d[d$time == 2, ], d[d$time == 1, ]) 90 | suppressWarnings({ 91 | m2 <- glmmfields(y ~ 0, 92 | data = d2, time = "time", 93 | lat = "lat", lon = "lon", nknots = 8, 94 | iter = 800, chains = 2, seed = 1, cores = 1 95 | ) 96 | }) 97 | sd2 <- m2$stan_data 98 | d2$pred <- predict(m2)$estimate 99 | d$pred <- predict(m)$estimate 100 | d2 <- dplyr::arrange(d2, ID) 101 | 102 | plot(d2$pred, d$pred) 103 | expect_equal(d2$pred, d$pred, tolerance = .01) 104 | }) 105 | -------------------------------------------------------------------------------- /vignettes/spatial-glms.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Spatial GLMs with glmmfields" 3 | author: "Sean C. Anderson and Eric J. Ward" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Spatial GLMs with glmmfields} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | Here we will use the glmmfields package to fit a spatial GLM with a predictor. While glmmfields was designed to fit spatiotemporal GLMs with the possibility of extreme events, it can also be used to fit regular spatial GLMs without a time element and without extreme events. Currently it can fit Gaussian (link = identity), Gamma (link = log), Poisson (link = log), negative binomial (link = log), and binomial (link = logit) models. The package can also fit lognormal (link = log) models. 13 | 14 | ```{r set-knitr-options, cache=FALSE, echo=FALSE} 15 | library("knitr") 16 | opts_chunk$set(message = FALSE, fig.width = 5.5) 17 | ``` 18 | 19 | Let's load the necessary packages: 20 | 21 | ```{r, message=FALSE, warning=FALSE} 22 | library(glmmfields) 23 | library(ggplot2) 24 | library(dplyr) 25 | ``` 26 | 27 | Set up parallel processing (not used in this example): 28 | 29 | ```{r, echo=TRUE, eval=FALSE} 30 | options(mc.cores = parallel::detectCores()) 31 | ``` 32 | 33 | First, let's simulate some data. We will use the built-in function `sim_glmmfields()`, but normally you would start with your own data. We will simulate 200 data points, some (fake) temperature data, an underlying random field spatial pattern, and add some observation error. In this example we will fit a Gamma GLM with a log link. 34 | 35 | The underlying intercept is 0.5 and the slope between temperature and our observed variable (say biomass or density of individuals in a quadrat) is 0.2. 36 | 37 | ```{r simulate-data} 38 | set.seed(1) 39 | N <- 200 # number of data points 40 | temperature <- rnorm(N, 0, 1) # simulated temperature data 41 | X <- cbind(1, temperature) # our design matrix 42 | s <- sim_glmmfields( 43 | n_draws = 1, gp_theta = 1.5, n_data_points = N, 44 | gp_sigma = 0.2, sd_obs = 0.2, n_knots = 12, obs_error = "gamma", 45 | covariance = "squared-exponential", X = X, 46 | B = c(0.5, 0.2) # B represents our intercept and slope 47 | ) 48 | d <- s$dat 49 | d$temperature <- temperature 50 | ggplot(s$dat, aes(lon, lat, colour = y)) + 51 | viridis::scale_colour_viridis() + 52 | geom_point(size = 3) 53 | ``` 54 | 55 | If we fit a regular GLM we can see that there is spatial autocorrelation in the residuals: 56 | 57 | ```{r} 58 | m_glm <- glm(y ~ temperature, data = d, family = Gamma(link = "log")) 59 | m_glm 60 | confint(m_glm) 61 | d$m_glm_residuals <- residuals(m_glm) 62 | ggplot(d, aes(lon, lat, colour = m_glm_residuals)) + 63 | scale_color_gradient2() + 64 | geom_point(size = 3) 65 | ``` 66 | 67 | Let's instead fit a spatial GLM with random fields. Note that we are only using 1 chain and 500 iterations here so the vignette builds quickly on CRAN. For final inference, you should likely use 4 or more chains and 2000 or more iterations. 68 | 69 | ```{r, results='hide'} 70 | m_spatial <- glmmfields(y ~ temperature, 71 | data = d, family = Gamma(link = "log"), 72 | lat = "lat", lon = "lon", nknots = 12, iter = 500, chains = 1, 73 | prior_intercept = student_t(3, 0, 10), 74 | prior_beta = student_t(3, 0, 3), 75 | prior_sigma = half_t(3, 0, 3), 76 | prior_gp_theta = half_t(3, 0, 10), 77 | prior_gp_sigma = half_t(3, 0, 3), 78 | seed = 123 # passed to rstan::sampling() 79 | ) 80 | ``` 81 | 82 | Let's look at the model output: 83 | 84 | ```{r} 85 | m_spatial 86 | ``` 87 | 88 | We can see that the 95% credible intervals are considerably wider on the intercept term and narrower on the slope coefficient in the spatial GLM vs. the model that ignored the spatial autocorrelation. 89 | 90 | Let's look at the residuals in space this time: 91 | 92 | ```{r} 93 | plot(m_spatial, type = "spatial-residual", link = TRUE) + 94 | geom_point(size = 3) 95 | ``` 96 | 97 | That looks better. 98 | 99 | We can inspect the residuals versus fitted values: 100 | 101 | ```{r} 102 | plot(m_spatial, type = "residual-vs-fitted") 103 | ``` 104 | 105 | And the predictions from our model itself: 106 | 107 | ```{r} 108 | plot(m_spatial, type = "prediction", link = FALSE) + 109 | viridis::scale_colour_viridis() + 110 | geom_point(size = 3) 111 | ``` 112 | 113 | Compare that to our data at the top. Note that the original data also includes observation error with a CV of 0.2. 114 | 115 | We can also extract the predictions themselves with credible intervals: 116 | 117 | ```{r} 118 | # link scale: 119 | p <- predict(m_spatial) 120 | head(p) 121 | 122 | # response scale: 123 | p <- predict(m_spatial, type = "response") 124 | head(p) 125 | 126 | # prediction intervals on new observations (include observation error): 127 | p <- predict(m_spatial, type = "response", interval = "prediction") 128 | head(p) 129 | ``` 130 | 131 | Or use the `tidy` method to get our parameter estimates as a data frame: 132 | 133 | ```{r} 134 | head(tidy(m_spatial, conf.int = TRUE, conf.method = "HPDinterval")) 135 | ``` 136 | 137 | Or make the predictions on a fine-scale spatial grid for a constant value of the predictor: 138 | 139 | ```{r} 140 | pred_grid <- expand.grid( 141 | lat = seq(min(d$lat), max(d$lat), length.out = 30), 142 | lon = seq(min(d$lon), max(d$lon), length.out = 30) 143 | ) 144 | pred_grid$temperature <- mean(d$temperature) 145 | pred_grid$prediction <- predict( 146 | m_spatial, 147 | newdata = pred_grid, 148 | type = "response" 149 | )$estimate 150 | ggplot(pred_grid, aes(lon, lat, fill = prediction)) + 151 | geom_raster() + 152 | viridis::scale_fill_viridis() 153 | ``` 154 | --------------------------------------------------------------------------------