├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── data.R
    ├── data_format.R
    ├── inference.R
    ├── mcmc_diagnostics.R
    ├── model_comparison.R
    ├── models.R
    ├── plotting.R
    ├── point_estimate.R
    ├── posterior_interval.R
    ├── posterior_predict.R
    ├── posterior_samples.R
    ├── rater-package.R
    ├── rater_fit_class.R
    ├── rater_model_class.R
    ├── simulate.R
    ├── stanmodels.R
    ├── utils.R
    └── zzz.R
├── README.Rmd
├── README.md
├── codecov.yml
├── configure
├── configure.win
├── cran-comments.md
├── data
    ├── anesthesia.rda
    └── caries.rda
├── inst
    ├── include
    │   └── stan_meta_header.hpp
    └── stan
    │   ├── class_conditional_dawid_skene.stan
    │   ├── dawid_skene.stan
    │   ├── grouped_data.stan
    │   ├── hierarchical_dawid_skene.stan
    │   └── include
    │       └── license.stan
├── man
    ├── anesthesia.Rd
    ├── as_mcmc.list.Rd
    ├── caries.Rd
    ├── class_probabilities.Rd
    ├── figures
    │   ├── README-plot-demo-1.png
    │   └── rater.png
    ├── get_stanfit.Rd
    ├── loo.rater_fit.Rd
    ├── make_complete_rating_design_sim_data.Rd
    ├── make_theta.Rd
    ├── mcmc_diagnostics.Rd
    ├── models.Rd
    ├── plot.rater_fit.Rd
    ├── point_estimate.Rd
    ├── posterior_interval.mcmc_fit.Rd
    ├── posterior_interval.optim_fit.Rd
    ├── posterior_predict.rater_fit.Rd
    ├── posterior_samples.Rd
    ├── print.mcmc_fit.Rd
    ├── print.optim_fit.Rd
    ├── print.rater_model.Rd
    ├── prior_summary.rater_fit.Rd
    ├── rater-package.Rd
    ├── rater.Rd
    ├── reexports.Rd
    ├── simulate_dawid_skene_model.Rd
    ├── simulate_hier_dawid_skene_model.Rd
    ├── summary.mcmc_fit.Rd
    ├── summary.optim_fit.Rd
    ├── summary.rater_model.Rd
    ├── waic.rater_fit.Rd
    └── wide_to_long.Rd
├── pkgdown
    ├── _pkgdown.yml
    └── extra.css
├── rater.Rproj
├── src
    ├── Makevars
    ├── Makevars.win
    └── RcppExports.cpp
├── tests
    ├── testthat.R
    └── testthat
    │   ├── _snaps
    │       ├── model_class.md
    │       └── rater.md
    │   ├── helper.R
    │   ├── setup.R
    │   ├── test-data_format.R
    │   ├── test-mcmc_diagnostics.R
    │   ├── test-model_comparison.R
    │   ├── test-point_estimate.R
    │   ├── test-posterior_interval.R
    │   ├── test-posterior_predict.R
    │   ├── test-posterior_samples.R
    │   ├── test-simulate.R
    │   ├── test_fit_class.R
    │   ├── test_model_class.R
    │   ├── test_models.R
    │   ├── test_plotting.R
    │   ├── test_rater.R
    │   └── test_utils.R
└── vignettes
    ├── .gitignore
    ├── data-formats.Rmd
    └── workflow.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^codecov\.yml$
 2 | ^rateR\.Rproj$
 3 | ^\.Rproj\.user$
 4 | ^\.travis\.yml$
 5 | ^docs$
 6 | ^LICENSE$
 7 | ^pkgdown$
 8 | ^README\.Rmd$
 9 | ^\.github$
10 | ^cran-comments\.md$
11 | ^CRAN-RELEASE$
12 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     steps:
23 |       - uses: actions/checkout@v3
24 | 
25 |       - uses: r-lib/actions/setup-pandoc@v2
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::pkgdown, local::.
34 |           needs: website
35 | 
36 |       - name: Build site
37 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
38 |         shell: Rscript {0}
39 | 
40 |       - name: Deploy to GitHub pages 🚀
41 |         if: github.event_name != 'pull_request'
42 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
43 |         with:
44 |           clean: false
45 |           branch: gh-pages
46 |           folder: docs
47 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: |
31 |           covr::codecov(
32 |             quiet = FALSE,
33 |             clean = FALSE,
34 |             install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
35 |           )
36 |         shell: Rscript {0}
37 | 
38 |       - name: Show testthat output
39 |         if: always()
40 |         run: |
41 |           ## --------------------------------------------------------------------
42 |           find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
43 |         shell: bash
44 | 
45 |       - name: Upload test results
46 |         if: failure()
47 |         uses: actions/upload-artifact@v3
48 |         with:
49 |           name: coverage-test-failures
50 |           path: ${{ runner.temp }}/package
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | *.o
3 | *.so
4 | inst/doc
5 | .DS_Store
6 | src/stanExports_*
7 | check/
8 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: rater
 2 | Title: Statistical Models of Repeated Categorical Rating Data
 3 | Version: 1.3.1.9000
 4 | Authors@R: 
 5 |     c(person(given = "Jeffrey",
 6 |              family = "Pullin",
 7 |              role = c("aut", "cre", "cph"),
 8 |              email = "jeffrey.pullin@gmail.com",
 9 |              comment = c(ORCID = "0000-0003-3651-5471")),
10 |       person(given = "Damjan",
11 |              family = "Vukcevic",
12 |              role = "aut",
13 |              comment = c(ORCID = "0000-0001-7780-9586")),
14 |       person(given = "Lars Mølgaard",
15 |              family = "Saxhaug",
16 |              role = "ctb",
17 |              comment = c(ORCID = "0000-0001-5084-1578")))
18 | Description: Fit statistical models based on the Dawid-Skene model - Dawid
19 |     and Skene (1979) <doi:10.2307/2346806> - to repeated categorical
20 |     rating data.  Full Bayesian inference for these models is supported
21 |     through the Stan modelling language. 'rater' also allows the user to
22 |     extract and plot key parameters of these models.
23 | License: GPL-2
24 | URL: https://jeffreypullin.github.io/rater/,
25 |     https://github.com/jeffreypullin/rater
26 | BugReports: https://github.com/jeffreypullin/rater/issues
27 | Depends:
28 |     R (>= 3.4.0)
29 | Imports:
30 |     ggplot2 (>= 2.2.1),
31 |     loo (> 2.0.0),
32 |     methods,
33 |     Rcpp (>= 0.12.0),
34 |     RcppParallel (>= 5.0.1),
35 |     rlang (> 0.2.0),
36 |     rstan (>= 2.26.0),
37 |     rstantools (>= 2.0.0)
38 | Suggests:
39 |     coda,
40 |     covr,
41 |     knitr,
42 |     rmarkdown,
43 |     testthat
44 | LinkingTo:
45 |     BH (>= 1.66.0),
46 |     Rcpp (>= 0.12.0),
47 |     RcppEigen (>= 0.3.3.3.0),
48 |     RcppParallel (>= 5.0.1),
49 |     rstan (>= 2.26.0),
50 |     StanHeaders (>= 2.26.0)
51 | VignetteBuilder: 
52 |     knitr
53 | Biarch: true
54 | Config/testthat/edition: 3
55 | Encoding: UTF-8
56 | LazyData: true
57 | Roxygen: list(markdown = TRUE)
58 | RoxygenNote: 7.2.3
59 | SystemRequirements: GNU make
60 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(class_probabilities,mcmc_fit)
 4 | S3method(class_probabilities,optim_fit)
 5 | S3method(loo,rater_fit)
 6 | S3method(plot,rater_fit)
 7 | S3method(posterior_interval,mcmc_fit)
 8 | S3method(posterior_interval,optim_fit)
 9 | S3method(posterior_predict,rater_fit)
10 | S3method(print,mcmc_fit)
11 | S3method(print,optim_fit)
12 | S3method(print,rater_model)
13 | S3method(prior_summary,rater_fit)
14 | S3method(summary,mcmc_fit)
15 | S3method(summary,optim_fit)
16 | S3method(summary,rater_model)
17 | S3method(waic,rater_fit)
18 | export(as_mcmc.list)
19 | export(class_conditional_dawid_skene)
20 | export(class_probabilities)
21 | export(dawid_skene)
22 | export(get_stanfit)
23 | export(hier_dawid_skene)
24 | export(loo)
25 | export(loo_compare)
26 | export(make_complete_rating_design_sim_data)
27 | export(make_theta)
28 | export(mcmc_diagnostics)
29 | export(point_estimate)
30 | export(posterior_interval)
31 | export(posterior_predict)
32 | export(posterior_samples)
33 | export(prior_summary)
34 | export(rater)
35 | export(simulate_dawid_skene_model)
36 | export(simulate_hier_dawid_skene_model)
37 | export(waic)
38 | export(wide_to_long)
39 | import(Rcpp)
40 | import(methods)
41 | importFrom(RcppParallel,CxxFlags)
42 | importFrom(RcppParallel,RcppParallelLibs)
43 | importFrom(ggplot2,aes)
44 | importFrom(ggplot2,coord_cartesian)
45 | importFrom(ggplot2,element_blank)
46 | importFrom(ggplot2,element_rect)
47 | importFrom(ggplot2,facet_wrap)
48 | importFrom(ggplot2,geom_bar)
49 | importFrom(ggplot2,geom_text)
50 | importFrom(ggplot2,geom_tile)
51 | importFrom(ggplot2,ggplot)
52 | importFrom(ggplot2,guides)
53 | importFrom(ggplot2,labs)
54 | importFrom(ggplot2,scale_fill_gradient)
55 | importFrom(ggplot2,theme)
56 | importFrom(ggplot2,theme_bw)
57 | importFrom(loo,extract_log_lik)
58 | importFrom(loo,loo)
59 | importFrom(loo,loo_compare)
60 | importFrom(loo,relative_eff)
61 | importFrom(loo,waic)
62 | importFrom(rlang,.data)
63 | importFrom(rstan,As.mcmc.list)
64 | importFrom(rstan,Rhat)
65 | importFrom(rstan,ess_bulk)
66 | importFrom(rstan,extract)
67 | importFrom(rstan,optimizing)
68 | importFrom(rstan,sampling)
69 | importFrom(rstantools,posterior_interval)
70 | importFrom(rstantools,posterior_predict)
71 | importFrom(rstantools,prior_summary)
72 | importFrom(stats,rnorm)
73 | importFrom(utils,capture.output)
74 | importFrom(utils,head)
75 | useDynLib(rater, .registration = TRUE)
76 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # rater (development version)
 2 | 
 3 | # rater 1.3.1
 4 | 
 5 | * Fix the description of the number of raters in the anesthesia data. The documentation had erroneously stated there were four anesthetists, not five.
 6 | 
 7 | * Update the stan code for compatibility with rstan v2.26.0 (@andrjohns)
 8 | 
 9 | # rater 1.3.0
10 | 
11 | * Updated the Stan implementation, priors, and initialisation points of the hierarchical Dawid-Skene model, leading to much more reliable convergence. 
12 | 
13 | * Added the ability to visualise the theta parameter with uncertainty.
14 | 
15 | * Added row names to the output of `class_probabilities()`.
16 | 
17 | * Added the ability to specify the column names of long format data passed to `rater()`.
18 | 
19 | * Added `simulate_dawid_skene_model()` and `simulate_hier_dawid_skene_model()` to simulate data from the Dawid-Skene and hierarchical Dawid-Skene models.
20 | 
21 | * Re-export `loo_compare()`.
22 | 
23 | * Allowed the theta parameter to be extracted from the hierarchical Dawid-Skene model.
24 | 
25 | # rater 1.2.0
26 | 
27 | * Add `waic()` function for model comparison
28 | 
29 | * Silence warnings with the latest ggplot2 version 
30 | 
31 | * Fix validation bug in `posterior_predict()`
32 | 
33 | # rater 1.1.0
34 | 
35 | * `summary()` now works with the class conditional and hierarchical Dawid-Skene models.
36 | 
37 | * All functions applied to fitted class conditional Dawid-Skene models will automatically convert the relevant parameters of the model into a full theta parameter equivalent to the Dawid-Skene model. This is designed to allow easier comparison of the class conditional model with the full Dawid-Skene model.
38 | 
39 | * Plotting via `plot()` of the `rater_fit` object has been changed in several ways. `plot.rater_fit` now:
40 | 
41 |   - Only returns one plot 
42 |   - Only returns the theta plot by default
43 |   - Exposes the `prob`, `which` (called `rater_index`) and new `item_index` 
44 |     arguments in the plot generic.
45 |     
46 | * Add the ability to only plot a subset of items when plotting the class probabilities. This can be controlled by the new `item_index` argument to `plot()`
47 | 
48 | * Added the function `wide_to_long()` to convert wide data to long data.
49 | 
50 | * Add the option `data_format = "wide"` to `rater()` to allow wide data to be passed into `rater()` directly.
51 | 
52 | * Added the `get_stanfit()` function to extract the underlying stanfit object from a rater fit object.
53 | 
54 | * Added an implementation of the `posterior_predict` generic from {rstantools} allowing simulation from the posterior predictive distribution of fitted standard, and class conditional, Dawid-Skene models. (The hierarchical Dawid-Skene model is not yet supported).
55 | 
56 | * Added an implementation of the `prior_summary` generic from {rstantools} for `rater_fit` objects.
57 | 
58 | * Add the `loo.rater_fit` method to allow the calculation of loo, a modern Bayesian model comparison metric, for rater models. loo values can be compared using the excellent {loo} package.
59 | 
60 | * Added the `loo.rater_fit` method to allow the calculation of loo, a modern Bayesian model comparison metric, for rater models. loo values can be compared using the excellent {loo} package.
61 | 
62 | * Rater specific prior parameters can now be used in the Dawid-Skene model for both grouped and long data. In practice this means that it is now possible to pass a J * K * K array for `beta` into `dawid_skene()` which encodes a K * K prior parameter for each of the J raters' error matrices. For backwards compatibility and ease of use it is still possible to pass a single matrix for `beta` which will still be interpreted as the prior parameter for all the of the raters' error matrices.
63 | 
64 | * The plot produced for the pi parameter has been changed. The new plot represents the uncertainty in the point estimates when MCMC has been used to fit the model.
65 | 
66 | * Prior parameters for the Dawid-Skene and class conditional Dawid-Skene models have been altered slightly to improve convergence of optimization when the number of classes is small. 
67 | 
68 | * `summary.mcmc_fit` now displays the number of remaining parameters correctly.
69 | 
70 | * Added the `as_mcmc.list()` function to convert MCMC fits to {coda} `mcmc.list` objects.
71 | 
72 | # rater 1.0.0
73 | 
74 | * Initial release
75 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Anaesthetist ratings for patient suitability for surgery
 2 | #'
 3 | #' The data consist of ratings, on a 4-point scale, made by five anaesthetists
 4 | #' of patients' pre-operative health. The ratings were based on the
 5 | #' anaesthetists assessments of a standard form completed for all of the
 6 | #' patients. There are 45 patients (items) and five anaesthetists (raters) in
 7 | #' total. The first anaesthetist assessed the forms a total of three times,
 8 | #' spaced several weeks apart. The other anaesthetists each assessed the forms
 9 | #' once. The data is in 'long' format.
10 | #'
11 | #' @format A `data.frame` with 315 rows and 3 columns:
12 | #' \describe{
13 | #'   \item{item}{The item index - which item is being rated}
14 | #'   \item{rater}{The rater index - which rater is doing the rating}
15 | #'   \item{rating}{The rating given}
16 | #' }
17 | #'
18 | #' @references
19 | #' Dawid, A. P., and A. M. Skene. "Maximum Likelihood Estimation of Observer
20 | #' Error-Rates Using the EM Algorithm." Applied Statistics 28, no. 1 (1979): 20.
21 | #'
22 | "anesthesia"
23 | 
24 | #' Dentist ratings of whether caries are healthy or not based on X-rays
25 | #'
26 | #' It consists of binary ratings, made by 5 dentists, of whether a given tooth
27 | #' was healthy (sound) or had caries, also known as cavities. The ratings were
28 | #' performed using X-ray only, which was thought to be more error-prone than
29 | #' visual/tactile assessment of each tooth. In total 3,689 ratings were made.
30 | #' This data is in 'grouped' format. Each row is one of the 'pattern' with
31 | #' the final columns being a tally of how many times that pattern occurs in
32 | #' the dataset.
33 | #'
34 | #' @format A `data.frame` with 6 columns and 32 rows.
35 | #' \describe{
36 | #'   \item{rater_1}{The rating of the dentist 1}
37 | #'   \item{rater_2}{The rating of the dentist 2}
38 | #'   \item{rater_3}{The rating of the dentist 3}
39 | #'   \item{rater_4}{The rating of the dentist 4}
40 | #'   \item{rater_5}{The rating of the dentist 5}
41 | #'   \item{n}{The number of times the rating pattern appears in the dataset}
42 | #' }
43 | #'
44 | #' @references
45 | #' Espeland, Mark A., and Stanley L. Handelman. “Using Latent Class Models to
46 | #' Characterize and Assess Relative Error in Discrete Measurements.”
47 | #' Biometrics 45, no. 2 (1989): 587–99.
48 | #'
49 | "caries"
50 | 


--------------------------------------------------------------------------------
/R/data_format.R:
--------------------------------------------------------------------------------
 1 | #' Convert wide data to the long format
 2 | #'
 3 | #' @param data Data in a wide format. Must be 2D data object which can be
 4 | #'   converted to a data.frame
 5 | #'
 6 | #' @return The data converted into long format. A data.frame with three columns
 7 | #'   item, rater and rating.
 8 | #'
 9 | #' @details Wide data refers to a way of laying out categorical rating data
10 | #'   where each item is one row and each column represents the ratings of each
11 | #'   rater. Elements of the data can be `NA`, indicating that an item wasn't
12 | #'   rated by a rater. Wide data cannot represent the same rater rating an item
13 | #'   multiple times.
14 | #'
15 | #'   Currently any column names of the data are ignored and the raters are
16 | #'   labelled by their column position (1 indexed, left to right). Only numeric
17 | #'   ratings are currently supported.
18 | #'
19 | #' @examples
20 | #' wide_data <- data.frame(dater_1 = c(3, 2, 2), rater_2 = c(4, 2, 2))
21 | #' wide_data
22 | #'
23 | #' long_data <- wide_to_long(wide_data)
24 | #' long_data
25 | #'
26 | #'
27 | #' @export
28 | #'
29 | wide_to_long <- function(data) {
30 | 
31 |   if (!inherits(data, "data.frame") &&  !inherits(data, "matrix")) {
32 |     stop("`data` must be a data.frame or matrix.", call = FALSE)
33 |   }
34 |   data <- as.data.frame(data)
35 | 
36 |   # FIXME We should accept non-numeric data (GitHub issue: #81) but for
37 |   # now we explicitly check that is all columns contain numeric values.
38 |   if (!all(vapply(data, is.numeric, FUN.VALUE = logical(1)))) {
39 |     stop("All columns in `data` must contain only numeric values.",
40 |          call. = FALSE)
41 |   }
42 | 
43 |   values <- unlist(unclass(data))
44 |   non_na_values <- values[!is.na(values)]
45 |   if (any(non_na_values == 0)) {
46 |     stop("Some ratings are 0. All ratings must be in 1:K",
47 |          " where K is the number of classes.",
48 |          call. = FALSE)
49 |   }
50 | 
51 |   len <- length(non_na_values)
52 | 
53 |   rating <- numeric(len)
54 |   rater <- numeric(len)
55 |   item <- numeric(len)
56 | 
57 |   n <- 1
58 |   for (i in 1:nrow(data)) {
59 |     for (j in 1:ncol(data)) {
60 |       if (!is.na(data[[i, j]])) {
61 | 
62 |        rating[[n]] <- data[i, j]
63 |        item[[n]] <- i
64 |        rater[[n]] <- j
65 | 
66 |        n <- n + 1
67 |       }
68 |     }
69 |   }
70 | 
71 |   data.frame(item = item, rater = rater, rating = rating)
72 | }
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/R/mcmc_diagnostics.R:
--------------------------------------------------------------------------------
  1 | #' Retrieve MCMC convergence diagnostics for a rater fit
  2 | #'
  3 | #' @param fit An rater `mcmc_fit` object.
  4 | #' @param pars A character vector of parameter names to return. By default
  5 | #'   `c("pi", "theta")`.
  6 | #'
  7 | #' @return A matrix where the columns represent different diagnostics and the
  8 | #'   rows are different parameters. Currently the first column contains
  9 | #'   the Rhat statistic and the second bulk effective samples size. The
 10 | #'   rownames contain the parameter names.
 11 | #'
 12 | #' @details MCMC diagnostics cannot be calculate for the z due to the
 13 | #'   marginalisation used to fit the models.
 14 | #'
 15 | #'   These MCMC diagnostics are intended as basic sanity check of the quality
 16 | #'   of the MCMC samples returned. Users who want more in depth diagnostics
 17 | #'   should consider using [as_mcmc.list()] to convert the samples to a
 18 | #'   [coda::mcmc.list()] object, or [get_stanfit()] to extract the underlying
 19 | #'   stanfit object.
 20 | #'
 21 | #' @seealso [rstan::Rhat()], [rstan::ess_bulk()] [as_mcmc.list()],
 22 | #'   [get_stanfit()].
 23 | #'
 24 | #' @importFrom rstan extract Rhat ess_bulk
 25 | #'
 26 | #' @references
 27 | #' Aki Vehtari, Andrew Gelman, Daniel Simpson, Bob Carpenter, and
 28 | #' Paul-Christian Bürkner (2019). Rank-normalization, folding, and
 29 | #' localization: An improved R-hat for assessing convergence of
 30 | #' MCMC. \emph{arXiv preprint} \code{arXiv:1903.08008}.
 31 | #'
 32 | #' @examples
 33 | #' \donttest{
 34 | #'
 35 | #' fit <- rater(anesthesia, "dawid_skene")
 36 | #'
 37 | #' # Calculate the diagnostics for all parameters.
 38 | #' mcmc_diagnostics(fit)
 39 | #'
 40 | #' # Calculate the diagnostics just for the pi parameter.
 41 | #' mcmc_diagnostics(fit, pars = "pi")
 42 | #'
 43 | #' }
 44 | #'
 45 | #' @export
 46 | #'
 47 | mcmc_diagnostics <- function(fit, pars = c("pi", "theta")) {
 48 | 
 49 |   if (inherits(fit, "optim_fit")) {
 50 |     stop("Cannot extract MCMC diagnositcs from a optimisation fit.",
 51 |          call. = FALSE)
 52 |   }
 53 | 
 54 |   if ("z" %in% pars) {
 55 |     stop("Cannot extract MCMC diagnostics for the latent class.",
 56 |          call. = FALSE)
 57 |   }
 58 | 
 59 |   diagnostics <- matrix(nrow = 0, ncol = 2)
 60 | 
 61 |   if ("pi" %in% pars) {
 62 |     K <- fit$stan_data$K
 63 |     pi_diagnostics <- matrix(nrow = K, ncol = 2)
 64 |     row_names <- character(K)
 65 |     draws <- rstan::extract(get_samples(fit), pars = "pi", permuted = FALSE)
 66 |     for (i in 1:K) {
 67 |       name <- paste0("pi[", i, "]")
 68 |       pi_diagnostics[i, 1] <- rstan::Rhat(draws[, , name])
 69 |       pi_diagnostics[i, 2] <- rstan::ess_bulk(draws[, , name])
 70 |       row_names[[i]] <- name
 71 |     }
 72 |     colnames(pi_diagnostics) <- c("Rhat", "ess_bulk")
 73 |     rownames(pi_diagnostics) <- row_names
 74 | 
 75 |     diagnostics <- rbind(diagnostics, pi_diagnostics)
 76 |   }
 77 | 
 78 |   if ("theta" %in% pars) {
 79 |     K <- fit$stan_data$K
 80 |     J <- fit$stan_data$J
 81 | 
 82 |     if (inherits(get_model(fit), "hier_dawid_skene")) {
 83 |       draws <- rstan::extract(get_samples(fit), pars = "beta",
 84 |                               permuted = FALSE)
 85 |     } else {
 86 |       draws <- rstan::extract(get_samples(fit), pars = "theta",
 87 |                               permuted = FALSE)
 88 |     }
 89 | 
 90 |     theta_diagnostics <- matrix(nrow = J * K * K, ncol = 2)
 91 |     row_names <- character(J * K * K)
 92 | 
 93 |     if (inherits(get_model(fit), "hier_dawid_skene")) {
 94 |       # Here we assume that the MCMC diagnostics apply to the derived
 95 |       # theta parameter.
 96 |       n <- 1
 97 |       for (j in 1:J) {
 98 |         for (k in 1:K) {
 99 |           for (i in 1:K) {
100 |             stan_name <- sprintf("beta[%s,%s,%s]", j, k, i)
101 |             theta_diagnostics[n, 1] <- rstan::Rhat(draws[, , stan_name])
102 |             theta_diagnostics[n, 2] <- rstan::ess_bulk(draws[, , stan_name])
103 |             row_names[[n]] <- sprintf("theta[%s, %s, %s]", j, k, i)
104 |             n <- n + 1
105 |           }
106 |         }
107 |       }
108 |     } else if (inherits(get_model(fit), "class_conditional_dawid_skene")) {
109 |       n <- 1
110 |       for (j in 1:J) {
111 |         for (k in 1:K) {
112 |           par_draws <- draws[, , sprintf("theta[%s,%s]", j, k)]
113 |           for (i in 1:K) {
114 |             theta_diagnostics[n, 1] <- rstan::Rhat(par_draws)
115 |             theta_diagnostics[n, 2] <- rstan::ess_bulk(par_draws)
116 |             row_names[[n]] <- sprintf("theta[%s, %s, %i]", j, k, i)
117 |             n <- n + 1
118 |           }
119 |         }
120 |       }
121 |     } else {
122 |       n <- 1
123 |       for (j in 1:J) {
124 |         for (k in 1:K) {
125 |           for (i in 1:K) {
126 |             stan_name <- sprintf("theta[%s,%s,%s]", j, k, i)
127 |             theta_diagnostics[n, 1] <- rstan::Rhat(draws[, , stan_name])
128 |             theta_diagnostics[n, 2] <- rstan::ess_bulk(draws[, , stan_name])
129 |             row_names[[n]] <- sprintf("theta[%s, %s, %s]", j, k, i)
130 |             n <- n + 1
131 |           }
132 |         }
133 |       }
134 |     }
135 |     colnames(theta_diagnostics) <- c("Rhat", "ess_bulk")
136 |     rownames(theta_diagnostics) <- row_names
137 | 
138 |     diagnostics <- rbind(diagnostics, theta_diagnostics)
139 |   }
140 | 
141 |   diagnostics
142 | }
143 | 


--------------------------------------------------------------------------------
/R/model_comparison.R:
--------------------------------------------------------------------------------
  1 | #' Compute the PSIS LOO CV - a measure of model fit - of a rater fit object.
  2 | #'
  3 | #' @param x A `rater_fit` object. All model types are currently supported
  4 | #'   except the basic Dawid-Skene model fit with grouped data.
  5 | #' @param ... Other arguments passed.
  6 | #' @param cores The number of cores to use when calling the underlying
  7 | #'   functions. By default the value of the `mc.cores` option.
  8 | #'
  9 | #' @return A loo object.
 10 | #'
 11 | #' @details This function is somewhat experimental; model comparison is always
 12 | #'   difficult and choosing between variants of the Dawid-Skene model should
 13 | #'   be largely guided by considerations of data size and what is known about
 14 | #'   the characteristics of the raters. loo is, however, one of the leading
 15 | #'   methods for Bayesian model comparison and should provide a helpful guide
 16 | #'   in many situations.
 17 | #'
 18 | #'   When calculating loo we always use the relative effective
 19 | #'   sample size, calculated using `loo::relaive_eff` to improve the estimates
 20 | #'   of the PSIS effective sample sizes and Monte Carlo error.
 21 | #'
 22 | #'   For further information about the details of loo and PSIS please consult
 23 | #'   the provided references.
 24 | #'
 25 | #' @examples
 26 | #'
 27 | #' \donttest{
 28 | #' fit_ds <- rater(anesthesia, "dawid_skene", verbose = FALSE, chains = 1)
 29 | #' fit_ccds <- rater(anesthesia, "class_conditional_dawid_skene",
 30 | #'                   verbose = FALSE, chains = 1)
 31 | #'
 32 | #' loo_ds <- loo(fit_ds)
 33 | #' loo_ccds <- loo(fit_ccds)
 34 | #'
 35 | #' # To compare the loos easily we can use the loo_compare function from the
 36 | #' # loo package:
 37 | #' library(loo)
 38 | #'
 39 | #' loo_compare(loo_ds, loo_ccds)
 40 | #'
 41 | #' # The documentation of the loo package contains more information about how
 42 | #' # the output should be interpreted.
 43 | #' }
 44 | #'
 45 | #' @references
 46 | #' Vehtari, A., Gelman, A., and Gabry, J. (2017a). Practical Bayesian model
 47 | #' evaluation using leave-one-out cross-validation and WAIC.
 48 | #' *Statistics and Computing*. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4
 49 | #' ([journal version](https://link.springer.com/article/10.1007/s11222-016-9696-4),
 50 | #'  [preprint arXiv:1507.04544](https://arxiv.org/abs/1507.04544)).
 51 | #'
 52 | #' Vehtari, A., Simpson, D., Gelman, A., Yao, Y., and Gabry, J. (2019).
 53 | #' Pareto smoothed importance sampling.
 54 | #' [preprint arXiv:1507.02646](https://arxiv.org/abs/1507.02646)
 55 | #'
 56 | #' @importFrom loo extract_log_lik relative_eff loo
 57 | #'
 58 | #' @aliases loo
 59 | #' @method loo rater_fit
 60 | #' @importFrom loo loo
 61 | #' @export
 62 | #' @export loo
 63 | #'
 64 | loo.rater_fit <- function(x,
 65 |                           ...,
 66 |                           cores = getOption("mc.cores", 1)) {
 67 | 
 68 |   if (x$data_format == "grouped") {
 69 |     stop("loo is not supported for models fit using grouped data.",
 70 |          call. = FALSE)
 71 |   }
 72 | 
 73 |   if (inherits(x, "optim_fit")) {
 74 |     stop("loo cannot be calculated for models fit using optimisation.",
 75 |          acall. = FALSE)
 76 |   }
 77 | 
 78 |   log_lik <- loo::extract_log_lik(x$samples, merge_chains = FALSE)
 79 |   r_eff <- loo::relative_eff(exp(log_lik), cores = cores)
 80 |   loo <- loo::loo(log_lik, r_eff = r_eff, cores = cores)
 81 | 
 82 |   loo
 83 | }
 84 | 
 85 | #' Compute the WAIC - a measure of model fit - of a rater fit object.
 86 | #'
 87 | #' @param x A `rater_fit` object. All model types are currently supported
 88 | #'   except the basic Dawid-Skene model fit with grouped data.
 89 | #' @param ... Other arguments passed.
 90 | #'
 91 | #' @return A waic/loo object.
 92 | #'
 93 | #' @details This function provides provides an additional method for model
 94 | #' comparison, on top of the `loo()` function. In general we recommend that
 95 | #' `loo()` is preferred: see the documentation of the loo package for details.
 96 | #' Also, note the comments regarding model selection the the details section
 97 | #' of `loo()`.
 98 | #'
 99 | #' @examples
100 | #'
101 | #' \donttest{
102 | #' fit_ds <- rater(anesthesia, "dawid_skene", verbose = FALSE, chains = 1)
103 | #' fit_ccds <- rater(anesthesia, "class_conditional_dawid_skene",
104 | #'                   verbose = FALSE, chains = 1)
105 | #'
106 | #' waic(fit_ds)
107 | #' waic(fit_ccds)
108 | #' }
109 | #'
110 | #' @references
111 | #'
112 | #' Watanabe, S. (2010). Asymptotic equivalence of Bayes cross validation and
113 | #' widely application information criterion in singular learning theory.
114 | #' *Journal of Machine Learning Research* 11, 3571-3594.
115 | #'
116 | #' Vehtari, A., Gelman, A., and Gabry, J. (2017a). Practical Bayesian model
117 | #' evaluation using leave-one-out cross-validation and WAIC.
118 | #' *Statistics and Computing*. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4
119 | #' ([journal version](https://link.springer.com/article/10.1007/s11222-016-9696-4),
120 | #'  [preprint arXiv:1507.04544](https://arxiv.org/abs/1507.04544)).
121 | #'
122 | #' @importFrom loo extract_log_lik waic
123 | #'
124 | #' @aliases waic
125 | #' @method waic rater_fit
126 | #' @importFrom loo waic
127 | #' @export
128 | #' @export waic
129 | #'
130 | waic.rater_fit <- function(x, ...) {
131 | 
132 |   if (x$data_format == "grouped") {
133 |     stop("waic is not supported for models fit using grouped data.",
134 |          call. = FALSE)
135 |   }
136 | 
137 |   if (inherits(x, "optim_fit")) {
138 |     stop("waic cannot be calculated for models fit using optimisation.",
139 |          call. = FALSE)
140 |   }
141 | 
142 |   log_lik <- loo::extract_log_lik(x$samples, merge_chains = FALSE)
143 |   waic <- loo::waic(log_lik)
144 | 
145 |   waic
146 | }
147 | 
148 | # Re-export the loo_compare function.
149 | #' @importFrom loo loo_compare
150 | #' @export
151 | loo::loo_compare
152 | 


--------------------------------------------------------------------------------
/R/models.R:
--------------------------------------------------------------------------------
  1 | #' @name models
  2 | #'
  3 | #' @title Probabilistic models of repeated categorical rating
  4 | #' @description Functions to set up models and change their prior
  5 | #'   parameters for use in [rater()].
  6 | #'
  7 | #' @return a rater model object that can be passed to [rater()].
  8 | #'
  9 | NULL
 10 | 
 11 | #' @rdname models
 12 | #'
 13 | #' @param alpha prior parameter for pi
 14 | #' @param beta prior parameter for theta. This can either be a K * K matrix, in
 15 | #'   which case it is interpreted as the prior parameter of all of the J
 16 | #'   raters, or a J by K by K array in which case it is the fully specified
 17 | #'   prior parameter for all raters. (Here K is the number of categories in the
 18 | #'   data and J is the number of raters in the data.)
 19 | #'
 20 | #' @examples
 21 | #' # Model with default prior parameters:
 22 | #' default_m <- dawid_skene()
 23 | #'
 24 | #' # Changing alpha:
 25 | #' set_alpha_m <- dawid_skene(alpha = c(2, 2, 2))
 26 | #'
 27 | #' # Changing beta, single matrix:
 28 | #' # (See details for how this is interpreted.)
 29 | #' beta_mat <- matrix(1, nrow = 4, ncol = 4)
 30 | #' diag(beta_mat) <- 4
 31 | #' beta_mat_m <- dawid_skene()
 32 | #'
 33 | #' # The above is equivalent (when the model is fit - see details) to:
 34 | #' beta_array <- array(NA, dim = c(2, 4, 4))
 35 | #' for (i in 1:2) {
 36 | #'   beta_array[i, , ] <- beta_mat
 37 | #' }
 38 | #' beta_array_m <- dawid_skene(beta = beta_array)
 39 | #'
 40 | #' # But you can also specify an array where each slice is different.
 41 | #' # (Again, see details for how this is interpreted.)
 42 | #' beta_array[1, , ] <- matrix(1, nrow = 4, ncol = 4)
 43 | #' beta_array_m <- dawid_skene(beta = beta_array)
 44 | #'
 45 | #' @export
 46 | #'
 47 | dawid_skene <- function(alpha = NULL, beta = NULL) {
 48 |   validate_alpha(alpha)
 49 | 
 50 |   # `beta` can either be a K * K matrix which we interpret as the prior on the
 51 |   # error matrix on each of the raters as in {1.0.0} or a J * K * K array, the
 52 |   # set of priors on the error matrices of the J raters.
 53 | 
 54 |   if (!is.null(beta) && !is.matrix(beta) && !is.array(beta)) {
 55 |     stop("beta must be a numeric matrix or array", call. = FALSE)
 56 |   }
 57 | 
 58 |   alpha_k <- NULL
 59 |   beta_k <- NULL
 60 | 
 61 |   # Beta as matrix case.
 62 |   if (is.matrix(beta)) {
 63 | 
 64 |     # Test if the matrix is square.
 65 |     if (nrow(beta) != ncol(beta)) {
 66 |       stop("beta must a square matrix", call. = FALSE)
 67 |     }
 68 | 
 69 |     beta_k <- unique(dim(beta))
 70 |   }
 71 | 
 72 |   # Beta as array case.
 73 |   if (is.array(beta) && length(dim(beta)) > 2) {
 74 | 
 75 |     if (length(dim(beta)) != 3) {
 76 |       stop("`beta` must be a 3 dimensional array", call. = FALSE)
 77 |     }
 78 | 
 79 |     if (length(unique(dim(beta)[2:3])) != 1) {
 80 |       stop("Subslices of `beta` must be square matrices.", call. = FALSE)
 81 |     }
 82 | 
 83 |     beta_k <- unique(dim(beta)[2:3])
 84 |   }
 85 | 
 86 |   if (is.numeric(alpha)) {
 87 |     alpha_k <- length(alpha)
 88 |   }
 89 | 
 90 |   ks <- c(alpha_k, beta_k)
 91 | 
 92 |   # ks will be NULL if both alpha and beta are not specified.
 93 |   if (is.null(ks)) {
 94 |     K <- NULL
 95 |   } else {
 96 |     if (length(unique(ks)) > 1) {
 97 |       # FIXME: Make this error more informative.
 98 |       stop("`alpha` and `beta` are not compatible.", call. = FALSE)
 99 |     } else {
100 |       K <- unique(ks)
101 |     }
102 |   }
103 | 
104 |   m <- list(parameters = list(alpha = alpha, beta = beta),
105 |             name = "Bayesian Dawid and Skene Model",
106 |             file = "dawid_skene",
107 |             K = K)
108 |   class(m) <- c("dawid_skene", "rater_model")
109 |   m
110 | }
111 | 
112 | #' @rdname models
113 | #'
114 | #' @param alpha prior parameter for pi
115 | #'
116 | #' @examples
117 | #' # Default:
118 | #' hier_dawid_skene()
119 | #'
120 | #' # Changing alpha
121 | #' hier_dawid_skene(alpha = c(2, 2))
122 | #'
123 | #' @export
124 | #'
125 | hier_dawid_skene <- function(alpha = NULL) {
126 |   # Note: this does not allow the user to change the N(0, 1) hyperpriors.
127 |   validate_alpha(alpha)
128 | 
129 |   K <- if (!is.null(alpha)) length(alpha) else NULL
130 | 
131 |   m <- list(parameters = list(alpha = alpha),
132 |             name = "Bayesian Hierarchical Dawid and Skene Model",
133 |             file = "hierarchical_dawid_skene",
134 |             K = K)
135 |   class(m) <- c("hier_dawid_skene", "rater_model")
136 |   m
137 | }
138 | 
139 | #' @rdname models
140 | #'
141 | #' @param beta_1 First on diagonal prior probability parameter
142 | #' @param beta_2 Second on diagonal prior probability parameter for theta
143 | #'
144 | #' @examples
145 | #' # Default:
146 | #' class_conditional_dawid_skene()
147 | #'
148 | #' # Not default:
149 | #' class_conditional_dawid_skene(
150 | #'   alpha = c(2, 2),
151 | #'   beta_1 = c(4, 4),
152 | #'   beta_2 = c(2, 2)
153 | #' )
154 | #'
155 | #' @export
156 | #'
157 | class_conditional_dawid_skene <- function(alpha = NULL,
158 |                                           beta_1 = NULL,
159 |                                           beta_2 = NULL) {
160 |   validate_alpha(alpha)
161 | 
162 |   # length(NULL) = 0.
163 |   ks <- c(length(alpha), length(beta_1), length(beta_2))
164 |   ks <- ks[ks > 0]
165 | 
166 |   if (length(unique(ks)) > 1) {
167 |     stop("Prior parameters are not compatible.", call. = FALSE)
168 |   }
169 |   K <- if (length(ks) > 0) unique(ks) else NULL
170 | 
171 |   m <- list(parameters = list(alpha = alpha, beta_1 = beta_1, beta_2 = beta_2),
172 |             name = "Bayesian Class conditional Dawid and Skene Model",
173 |             file = "class_conditional_dawid_skene",
174 |             K = K)
175 |   class(m) <- c("class_conditional_dawid_skene", "rater_model")
176 |   m
177 | }
178 | 
179 | validate_alpha <- function(alpha) {
180 |   if (!is.null(alpha) && !is.numeric(alpha)) {
181 |       stop("alpha must be a numeric vector", call. = FALSE)
182 |   }
183 | }
184 | 


--------------------------------------------------------------------------------
/R/plotting.R:
--------------------------------------------------------------------------------
  1 | #' Plot the prevalence estimates
  2 | #'
  3 | #' @param fit A rater fit object.
  4 | #' @param prob A single probability. The size of the credible interval
  5 | #'   returned, if the fit is an `mcmc_fit`. Silently ignored if a the fit is
  6 | #'   an `optim_fit` object. By default 0.9.
  7 | #' @return A plot of the prevalence estimates extracted from the fit. If the
  8 | #'   fit is a `mcmc_fit` this will include credible intervals, if it is an
  9 | #'   `optim_fit` it will not.
 10 | #'
 11 | #' @importFrom ggplot2 ggplot aes geom_bar geom_text coord_cartesian labs
 12 | #'     theme_bw
 13 | #' @importFrom rlang .data
 14 | #'
 15 | #' @noRd
 16 | #'
 17 | plot_pi <- function(fit, prob = 0.9) {
 18 |   UseMethod("plot_pi")
 19 | }
 20 | 
 21 | #' @rdname plot_pi
 22 | #' @noRd
 23 | plot_pi.mcmc_fit <- function(fit, prob = 0.9) {
 24 |   pi <- point_estimate(fit, pars = "pi")[[1]]
 25 | 
 26 |   # Here we know that the fit is an `mcmc_fit` so this will work.
 27 |   pi_cred_int <- posterior_interval(fit, prob = prob, pars = "pi")
 28 | 
 29 |   plot_data <- data.frame(
 30 |     cat = factor(paste0("Class ", 1:length(pi)),
 31 |                  levels = paste0("Class ", length(pi):1)),
 32 |     pi = pi,
 33 |     pi_lower = pi_cred_int[, 1],
 34 |     pi_upper = pi_cred_int[, 2]
 35 |   )
 36 | 
 37 |   percent <- paste0(prob * 100, "%")
 38 |   plot <- ggplot2::ggplot(plot_data, ggplot2::aes(x = .data$cat, y = .data$pi)) +
 39 |     ggplot2::geom_point(size = 2, colour = "steelblue") +
 40 |     ggplot2::geom_errorbar(ggplot2::aes(ymin = .data$pi_lower,
 41 |                                         ymax = .data$pi_upper),
 42 |                            width = 0.15, colour = "steelblue") +
 43 |     ggplot2::coord_flip(ylim = c(0, 1)) +
 44 |     ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
 45 |     ggplot2::labs(x = "",
 46 |                   y = "Prevalence probability",
 47 |                   caption = paste0(percent, " credible intervals")) +
 48 |     ggplot2::theme_bw()
 49 | 
 50 |   plot
 51 | }
 52 | 
 53 | #' @rdname plot_pi
 54 | #' @noRd
 55 | plot_pi.optim_fit <- function(fit, prob = 0.9) {
 56 |    pi <- point_estimate(fit, pars = "pi")[[1]]
 57 | 
 58 |   plot_data <- data.frame(
 59 |     cat = factor(paste0("Class ", 1:length(pi)),
 60 |                  levels = paste0("Class ", length(pi):1)),
 61 |     pi = pi
 62 |   )
 63 | 
 64 |   plot <- ggplot2::ggplot(plot_data, ggplot2::aes(x = .data$cat, y = .data$pi)) +
 65 |     ggplot2::geom_point(size = 2, colour = "steelblue") +
 66 |     ggplot2::coord_flip(ylim = c(0, 1)) +
 67 |     ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
 68 |     ggplot2::labs(x = "",
 69 |                   y = "Prevalence probability") +
 70 |     ggplot2::theme_bw()
 71 | 
 72 |   plot
 73 | }
 74 | 
 75 | #' Plot the rater accuracy estimates
 76 | #'
 77 | #' @param fit rater fit object
 78 | #' @param which which raters to plot
 79 | #'
 80 | #' @return Plot of the rate accuracy estimates
 81 | #'
 82 | #' @importFrom ggplot2 ggplot aes geom_tile geom_text facet_wrap labs guides
 83 | #'      scale_fill_gradient theme_bw theme element_rect element_blank
 84 | #' @importFrom rlang .data
 85 | #'
 86 | #' @noRd
 87 | #'
 88 | plot_theta <- function(fit, which = NULL) {
 89 |   theta <- theta_point_estimate(fit, which = which)
 90 | 
 91 |   # theta will always have dim[[2]] and it will always be == K
 92 |   K <- dim(theta)[[2]]
 93 | 
 94 |   # would be great if we could treat in arrays and matrices the 'same'
 95 |   if (length(dim(theta)) > 2) {
 96 |     J <- dim(theta)[[1]]
 97 |     value <- unlist(lapply(1:J, function(x) as.vector(theta[x, , ])))
 98 |   } else {
 99 |     J <- 1
100 |     value <- as.vector(theta)
101 |   }
102 |   which <- if (is.null(which)) 1:J else which
103 | 
104 |   plot_data <- data.frame(
105 |                   x = factor(rep(rep(1:K, each = K), J), levels = 1:K),
106 |                   y = factor(rep(rep(1:K, K), J), levels = K:1),
107 |                   rater = rep(which, each = K^2),
108 |                   value = value,
109 |                   round_value = round(value, 2))
110 |   rownames(plot_data) <- NULL
111 | 
112 |   plot <- ggplot2::ggplot(plot_data, ggplot2::aes(x = .data$x, y = .data$y)) +
113 |    ggplot2::geom_tile(ggplot2::aes(fill = .data$value), col = "black") +
114 |    ggplot2::geom_text(ggplot2::aes(label = .data$round_value)) +
115 |    ggplot2::facet_wrap(~ rater) +
116 |    # TODO add way to change defaults
117 |    ggplot2::scale_fill_gradient(low = "white", high = "steelblue") +
118 |    ggplot2::labs(y = "True label",
119 |                  x = "Assigned label") +
120 |    ggplot2::guides(fill = "none") +
121 |    ggplot2::theme_bw() +
122 |    ggplot2::theme(strip.background = ggplot2::element_rect(fill = "white"),
123 |                   panel.grid.major = ggplot2::element_blank(),
124 |                   panel.grid.minor = ggplot2::element_blank(),
125 |                   panel.border     = ggplot2::element_blank()) +
126 |    NULL
127 | 
128 |   plot
129 | }
130 | 
131 | #' Plot the rater accuracy estimates with uncertainty
132 | #'
133 | #' @param fit rater fit object
134 | #' @param which which raters to plot
135 | #'
136 | #' @return Plot of the rater accuracy estimates with uncertainty visualised
137 | #'
138 | #' @importFrom ggplot2 ggplot aes geom_tile geom_text facet_wrap labs guides
139 | #'      scale_fill_gradient theme_bw theme element_rect element_blank
140 | #' @importFrom rlang .data
141 | #'
142 | #' @noRd
143 | #'
144 | plot_theta_points <- function(fit, prob = 0.9, which = NULL) {
145 | 
146 |   theta_point_est <- point_estimate(fit, pars = "theta")$theta
147 |   theta_cred_int <- posterior_interval(fit, pars = "theta", prob = prob)
148 |   theta_point_est_long <- theta_to_long_format(theta_point_est)
149 |   J <- fit$stan_data$J
150 |   K <- fit$stan_data$K
151 | 
152 |   if (is.null(which)) {
153 |     which <- 1:J
154 |   }
155 | 
156 |   plot_data <- data.frame(cbind(theta_point_est_long, theta_cred_int))
157 |   plot_data$theta_name <- rownames(plot_data)
158 |   rownames(plot_data) <- NULL
159 |   colnames(plot_data) <- c("theta", "theta_lower", "theta_upper", "theta_name")
160 | 
161 |   ind <- rep(1:J, each = K * K)
162 |   which_ind <- which(ind %in% which)
163 |   plot_data <- plot_data[which_ind, ]
164 | 
165 |   plot_data$theta_name <- factor(plot_data$theta_name,
166 |                                  levels = rev(plot_data$theta_name))
167 | 
168 |   percent <- paste0(prob * 100, "%")
169 |   plot <- ggplot2::ggplot(plot_data, ggplot2::aes(x = .data$theta_name,
170 |                                                   y = .data$theta)) +
171 |     ggplot2::geom_point(size = 2, colour = "steelblue") +
172 |     ggplot2::geom_errorbar(ggplot2::aes(ymin = .data$theta_lower,
173 |                                         ymax = .data$theta_upper),
174 |                            width = 0.15, colour = "steelblue") +
175 |     ggplot2::coord_flip(ylim = c(0, 1)) +
176 |     ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
177 |     ggplot2::labs(x = "",
178 |                   y = "Rater probability",
179 |                   caption = paste0(percent, " credible intervals")) +
180 |     ggplot2::theme_bw()
181 | 
182 |   plot
183 | }
184 | 
185 | #' Plot the latent class estimates of a rater fit.
186 | #'
187 | #' @param fit A `rater_fit` object.
188 | #' @param ... Other arguments
189 | #'
190 | #' @return Plot of the rate accuracy estimates
191 | #'
192 | #' @importFrom ggplot2 ggplot aes geom_tile geom_text labs theme_bw theme
193 | #'     scale_fill_gradient guides element_blank
194 | #' @importFrom rlang .data
195 | #'
196 | #' @noRd
197 | #'
198 | plot_class_probabilities <- function(fit, item_index = NULL) {
199 | 
200 |   x <- class_probabilities(fit)
201 |   I <- nrow(x)
202 |   K <- ncol(x)
203 | 
204 |   if (is.null(item_index)) {
205 |     plot_data <- data.frame(
206 |       x = factor(rep(1:K, each = I), levels = 1:K),
207 |       y = factor(rep(1:I, K), levels = I:1),
208 |       prob = as.vector(x),
209 |       round_prob = round(as.vector(x), 2)
210 |     )
211 |   } else {
212 | 
213 |     if (!is.numeric(item_index) || !all(item_index %in% 1:I)) {
214 |       stop("`item_index` must be a numeric vector with elements in 1:I",
215 |            call. = FALSE)
216 |     }
217 | 
218 |     x <- x[item_index, ]
219 |     plot_data <- data.frame(
220 |       x = factor(rep(1:K, each = length(item_index)), levels = 1:K),
221 |       y = factor(rep(item_index, K), levels = rev(item_index)),
222 |       prob = as.vector(x),
223 |       round_prob = round(as.vector(x), 2)
224 |     )
225 |   }
226 | 
227 |   plot <- ggplot2::ggplot(plot_data, ggplot2::aes(x = .data$x, y = .data$y)) +
228 |     ggplot2::geom_tile(ggplot2::aes(fill = .data$prob), colour = "black") +
229 |     ggplot2::geom_text(ggplot2::aes(label = .data$round_prob)) +
230 |     ggplot2::labs(x = "Latent Class",
231 |                   y = "Item") +
232 |     ggplot2::scale_fill_gradient(low = "white", high = "steelblue") +
233 |     ggplot2::guides(fill = "none") +
234 |     ggplot2::theme_bw() +
235 |     ggplot2::theme(panel.grid.major = ggplot2::element_blank(),
236 |                    panel.grid.minor = ggplot2::element_blank(),
237 |                    panel.border     = ggplot2::element_blank()) +
238 |     NULL
239 | 
240 |   plot
241 | }
242 | 


--------------------------------------------------------------------------------
/R/point_estimate.R:
--------------------------------------------------------------------------------
  1 | #' Extract point estimates of parameters from a fit object
  2 | #'
  3 | #' @param fit A rater fit object
  4 | #' @param pars A character vector of parameter names to return. By default
  5 | #'   `c("pi", "theta", "z")`.
  6 | #' @param ... Extra arguments
  7 | #'
  8 | #' @details If the passed fit object was fit using MCMC then the posterior
  9 | #'   means are returned. If it was fit through optimisation the maximum a
 10 | #'   priori (MAP) estimates are returned. The z parameter returned is the
 11 | #'   value of class probabilities which is largest. To return the full
 12 | #'   posterior distributions of the latent class use `class_probabilities()`.
 13 | #'
 14 | #'   For the class conditional model the 'full' theta parameterisation (i.e.
 15 | #'   appearing to have the same number of parameters as the standard
 16 | #'   Dawid-Skene model) is calculated and returned. This is designed to allow
 17 | #'   easier comparison with the full Dawid-Skene model.
 18 | #'
 19 | #' @return A named list of the parameter estimates.
 20 | #'
 21 | #' @seealso `class_probabilities()`
 22 | #'
 23 | #' @examples
 24 | #'
 25 | #' \donttest{
 26 | #' # A model fit using MCMC.
 27 | #' mcmc_fit <- rater(anesthesia, "dawid_skene")
 28 | #'
 29 | #' # This will return the posterior mean (except for z)
 30 | #' post_mean_estimate <- point_estimate(mcmc_fit)
 31 | #'
 32 | #' # A model fit using optimisation.
 33 | #' optim_fit <- rater(anesthesia, dawid_skene(), method = "optim")
 34 | #'
 35 | #' # This will output MAP estimates of the parameters.
 36 | #' map_estimate <- point_estimate(optim_fit)
 37 | #'
 38 | #' }
 39 | #'
 40 | #' @export
 41 | #'
 42 | point_estimate <- function(fit,
 43 |                            pars = c("pi", "theta", "z"),
 44 |                            ...) {
 45 |   out <- list()
 46 |   for (par in pars) {
 47 |     out <- switch(par,
 48 |       "pi" = c(out, pi = list(pi_point_estimate(fit, ...))),
 49 |       "theta" = c(out, theta = list(theta_point_estimate(fit, ...))),
 50 |       "z" = c(out, z = list(z_point_estimate(fit, ...))),
 51 |       stop("Unknown parameter passed", call. = FALSE)
 52 |     )
 53 |   }
 54 | 
 55 |   out
 56 | }
 57 | 
 58 | #' Extract a point estimate of the pi parameter from an MCMC fit
 59 | #'
 60 | #' @param fit A rater object.
 61 | #' @param ... Other arguments.
 62 | #'
 63 | #' @return A vector of length K containing the posterior mean (`mcmc_fit`)
 64 | #'   or MAP estimate (`optim_fit`) of pi.
 65 | #'
 66 | #' @noRd
 67 | pi_point_estimate <- function(fit, ...) {
 68 |   UseMethod("pi_point_estimate")
 69 | }
 70 | 
 71 | #' @rdname pi_point_estimate
 72 | #' @noRd
 73 | pi_point_estimate.mcmc_fit <- function(fit, ...) {
 74 |   pi_draws <- posterior_samples(fit, pars = "pi")[[1]]
 75 |   apply(pi_draws, 2, mean)
 76 | }
 77 | 
 78 | #' @rdname pi_point_estimate
 79 | #' @noRd
 80 | pi_point_estimate.optim_fit <- function(fit, ...) {
 81 |   par <- fit$estimates$par
 82 |   # ^ is an anchor for the start of the line - needed due to the log_pi in the
 83 |   # HDS model.
 84 |   out <- par[grep("^pi", names(par))]
 85 |   names(out) <- NULL
 86 |   out
 87 | }
 88 | 
 89 | #' Extract latent class estimates from a fit
 90 | #'
 91 | #' @param fit A rater fit object.
 92 | #' @param ... Extra arguments.
 93 | #'
 94 | #' @details This function returns actual estimates of the latent class i.e.
 95 | #'   whole numbers from 1 to K. This is taken to be the latent class with the
 96 | #'   highest probability. (This can be thought of a kind of post-hoc MAP
 97 | #'   estimate.)
 98 | #'
 99 | #' @return Latent class estimates: A vector length I consisting of whole
100 | #'   numbers from 1 to K.
101 | #'
102 | #' @noRd
103 | #'
104 | z_point_estimate <- function(fit, ...) {
105 |   p_z <- class_probabilities(fit, ...)
106 |   # which.max only takes the first maximum if multiple are found but this
107 |   # is not a problem as we are dealing with floats.
108 |   apply(p_z, 1, which.max)
109 | }
110 | 
111 | #' Extract latent class probabilities from a rater fit object
112 | #'
113 | #' @param fit A rater fit object.
114 | #' @param ... Extra arguments.
115 | #'
116 | #' @return A I * K matrix where each element is the probably of item i being
117 | #'   of class k. (I is the number of items and K the number of classes).
118 | #'
119 | #' @details The latent class probabilities are obtained by marginalising out
120 | #'   the latent class and then calculating, for each draw of pi and theta, the
121 | #'   conditional probability of the latent class given the other parameters
122 | #'   and the data. Averaging these conditional probabilities gives the
123 | #'   (unconditional) latent class probabilities retuned by this function.
124 | #'
125 | #' @examples
126 | #' \donttest{
127 | #'
128 | #' fit <- rater(anesthesia, "dawid_skene")
129 | #' class_probabilities(fit)
130 | #'
131 | #' }
132 | #'
133 | #' @export
134 | #'
135 | class_probabilities <- function(fit, ...) {
136 |   UseMethod("class_probabilities")
137 | }
138 | 
139 | #' @rdname class_probabilities
140 | #' @export
141 | class_probabilities.mcmc_fit <- function(fit, ...) {
142 |   # We can't use posterior_samples here because these are not technically
143 |   # draws.
144 |   log_p_z_samps <- rstan::extract(get_samples(fit))$log_p_z
145 |   p_z_samps <- apply(log_p_z_samps, c(1, 2), softmax)
146 |   p_z_samps <- aperm(p_z_samps, c(2, 3, 1))
147 |   p_z <- apply(p_z_samps, c(2, 3), mean)
148 |   if (fit$data_format == "grouped") {
149 |     p_z <- enlarge_z(p_z, fit)
150 |   }
151 |   rownames(p_z) <- as.character(seq_len(nrow(p_z)))
152 |   p_z
153 | }
154 | 
155 | #' @rdname class_probabilities
156 | #' @export
157 | class_probabilities.optim_fit <- function(fit, ...) {
158 |   par <- fit$estimates$par
159 |   K <- fit$stan_data$K
160 |   if (fit$data_format == "grouped") {
161 |     I <- length(fit$stan_data$tally)
162 |   } else {
163 |     I <- fit$stan_data$I
164 |   }
165 |   log_p_z_values <- par[grep("log_p_z", names(par))]
166 |   log_p_z <- matrix(log_p_z_values, nrow = I, ncol = K)
167 |   p_z <- t(apply(log_p_z, 1, softmax))
168 |   if (fit$data_format == "grouped") {
169 |     p_z <- enlarge_z(p_z, fit)
170 |   }
171 |   rownames(p_z) <- as.character(seq_len(nrow(p_z)))
172 |   p_z
173 | }
174 | 
175 | #' Extract rater accuracy estimates for the Dawid-Skene models
176 | #'
177 | #' Extract rater accuracy/theta estimates from a Dawid Skene fit object
178 | #'
179 | #' @param fit A rater fit object.
180 | #' @param which Which rater's error matrices should be returned.
181 | #' @param ... Extra arguments.
182 | #'
183 | #' @return An array of K * K matrices each a rater's accuracy matrix.
184 | #'
185 | #' @noRd
186 | #'
187 | theta_point_estimate <- function(fit, which = NULL, ...) {
188 |   UseMethod("theta_point_estimate")
189 | }
190 | 
191 | #' @rdname theta_point_estimate
192 | #' @noRd
193 | theta_point_estimate.mcmc_fit <- function(fit, which = NULL, ...) {
194 | 
195 |   # We now 'unspool' the theta parameter for the class conditional model by
196 |   # default so this works for both the standard and class conditional models.
197 |   theta_samps <- posterior_samples(fit, pars = "theta")[[1]]
198 | 
199 |   J <- dim(theta_samps)[[2]]
200 |   if (is.null(which)) {
201 |     which <- 1:J
202 |   }
203 |   validate_which(which, J)
204 | 
205 |   theta <- apply(theta_samps, c(2, 3, 4), mean)
206 |   theta[which, , ]
207 | }
208 | 
209 | #' @rdname theta_point_estimate
210 | #' @noRd
211 | theta_point_estimate.optim_fit <- function(fit, which = NULL, ...) {
212 |   switch(fit$model$file,
213 |     "hierarchical_dawid_skene" = theta_point_estimate_hds_optim(fit, which, ...),
214 |     "dawid_skene" = theta_point_estimate_ds_optim(fit, which, ...),
215 |     "class_conditional_dawid_skene" =
216 |       theta_point_estimate_ccds_optim(fit, which, ...),
217 |     stop("Model type not supported", call. = FALSE))
218 | }
219 | 
220 | theta_point_estimate_hds_optim <- function(fit, which, ...) {
221 |   par <- fit$estimates$par
222 |   beta_values <- par[grep("\\bbeta\\b", names(par))]
223 |   K <- fit$stan_data$K
224 |   J <- fit$stan_data$J
225 |   if (is.null(which)) {
226 |     which <- 1:J
227 |   }
228 |   beta <- array(beta_values, dim = c(J, K, K))
229 |   theta <- array(dim = c(J, K, K))
230 |   for (j in seq_len(J)) {
231 |     for (k in seq_len(K)) {
232 |       theta[j, k, ] <- softmax(beta[j, k, ])
233 |     }
234 |   }
235 |   theta
236 | }
237 | 
238 | theta_point_estimate_ds_optim <- function(fit, which, ...) {
239 |   par <- fit$estimates$par
240 |   theta_values <- par[grep("\\btheta\\b", names(par))]
241 |   K <- fit$stan_data$K
242 |   J <- fit$stan_data$J
243 |   if (is.null(which)) {
244 |     which <- 1:J
245 |   }
246 |   theta <- array(theta_values, dim = c(J, K, K))
247 |   theta[which, , ]
248 | }
249 | 
250 | theta_point_estimate_ccds_optim <- function(fit, which, ...) {
251 |   par <- fit$estimates$par
252 |   cc_theta_values <- par[grep("\\btheta\\b", names(par))]
253 |   K <- fit$stan_data$K
254 |   J <- fit$stan_data$J
255 |   if (is.null(which)) {
256 |     which <- 1:J
257 |   }
258 |   cc_theta <- matrix(cc_theta_values, nrow = J, ncol = K)
259 |   theta <- unspool_cc_theta(cc_theta)
260 |   theta[which, , ]
261 | }
262 | 
263 | # Helper functions
264 | 
265 | validate_which <- function(which, J) {
266 |   if (!(length(which) > 0) || !is.numeric(which)) {
267 |     stop("which must be a positive length numeric vector", call. = FALSE)
268 |   }
269 |   # TODO Make this error more informative.
270 |   if (length(which(which %in% 1:J)) != length(which)) {
271 |     stop("All numbers in `which` must be drawn from 1:", J, call. = FALSE)
272 |   }
273 | }
274 | 
275 | enlarge_z <- function(p_z, fit) {
276 |   stopifnot(fit$data_format == "grouped")
277 |   p_z[rep(1:nrow(p_z), fit$stan_data$tally), ]
278 | }
279 | 
280 | unspool_cc_theta <- function(cc_theta) {
281 |   J <- nrow(cc_theta)
282 |   K <- ncol(cc_theta)
283 |   theta_out <- array(0, dim = c(J, K, K))
284 |   for (j in 1:J) {
285 |     for (k in 1:K) {
286 |       theta_out[j, k, ] <- (1 - cc_theta[j, k]) / (K - 1)
287 |       theta_out[j, k, k] <- cc_theta[j, k]
288 |     }
289 |   }
290 |   theta_out
291 | }
292 | 


--------------------------------------------------------------------------------
/R/posterior_interval.R:
--------------------------------------------------------------------------------
  1 | #' Extract posterior intervals for parameters of the model
  2 | #'
  3 | #' @param object A rater `mcmc_fit` object.
  4 | #' @param prob A single probability. The size of the credible interval
  5 | #'   returned. By default `0.9`.
  6 | #' @param pars The parameters to calculate the intervals for
  7 | #' @param ... Other arguments.
  8 | #'
  9 | #' @return A matrix with 2 columns. The first column is the lower bound of
 10 | #'   of the credible interval and the second is the upper bound. Each row
 11 | #'   corresponds to one individuals parameters. The rownames are the parameter
 12 | #'   names.
 13 | #'
 14 | #' @details Posterior intervals can only be calculated for models fit with
 15 | #'   MCMC. In addition, posterior intervals are not meaningful for the latent
 16 | #'   class (and indeed cannot be calculated). The *full* posterior distribution
 17 | #'   of the latent class can be extracted using [class_probabilities]
 18 | #'
 19 | #'   For the class conditional model the 'full' theta parameterisation (i.e.
 20 | #'   appearing to have the same number of parameters as the standard
 21 | #'   Dawid-Skene model) is calculated and returned. This is designed to allow
 22 | #'   easier comparison with the full Dawid-Skene model.
 23 | #'
 24 | #' @examples
 25 | #'
 26 | #' \donttest{
 27 | #' fit <- rater(anesthesia, "dawid_skene", verbose = FALSE, chains = 1)
 28 | #'
 29 | #' intervals <- posterior_interval(fit)
 30 | #' head(intervals)
 31 | #'
 32 | #' }
 33 | #'
 34 | #' @aliases posterior_interval
 35 | #' @method posterior_interval mcmc_fit
 36 | #' @importFrom rstantools posterior_interval
 37 | #' @export
 38 | #' @export posterior_interval
 39 | #'
 40 | posterior_interval.mcmc_fit <- function(object,
 41 |                                         prob = 0.9,
 42 |                                         pars = c("pi", "theta"),
 43 |                                         ...) {
 44 | 
 45 |   fit <- object
 46 |   # We could keep the stan data after fitting, but it doesn't seem worth
 47 |   # the added complexity.
 48 |   K <- fit$stan_data$K
 49 |   J <- fit$stan_data$J
 50 | 
 51 |   intervals <- list()
 52 |   for (i in 1:length(pars)) {
 53 |     par <- match.arg(pars[[i]], c("pi", "theta", "z"))
 54 | 
 55 |     if (par == "pi") {
 56 |       pi_draws <- posterior_samples(fit, pars = "pi")[[1]]
 57 |       colnames(pi_draws) <- sprintf("pi[%s]", 1:K)
 58 |       pi_interval <- rstantools::posterior_interval(pi_draws, prob, ...)
 59 |       intervals[[i]] <- pi_interval
 60 | 
 61 |     } else if (par == "theta") {
 62 |       theta_draws_raw <- posterior_samples(fit, pars = "theta")[[1]]
 63 |       n_draws <- dim(theta_draws_raw)[[1]]
 64 | 
 65 |       theta_draws_mat <- matrix(0, nrow = n_draws, ncol = J * K * K)
 66 |       col_names <- character(J * K * K)
 67 |       n <- 1
 68 |       for (j in 1:J) {
 69 |         for (k in 1:K) {
 70 |           for (i in 1:K) {
 71 |             theta_draws_mat[, n] <- theta_draws_raw[, j, k, i]
 72 |             col_names[[n]] <- sprintf("theta[%s, %s, %s]", j, k, i)
 73 |             n <- n + 1
 74 |           }
 75 |         }
 76 |       }
 77 |       colnames(theta_draws_mat) <- col_names
 78 |       intervals[[i]] <- rstantools::posterior_interval(theta_draws_mat,
 79 |                                                        prob, ...)
 80 |     } else if (par == "z") {
 81 |       stop("Cannot calculate quantiles for z", call. = FALSE)
 82 |     }
 83 |   }
 84 | 
 85 |   do.call(rbind, intervals)
 86 | }
 87 | 
 88 | #' Extract posterior intervals for parameters of the model
 89 | #'
 90 | #' @param object A rater optim_fit object
 91 | #' @param prob A probability
 92 | #' @param pars The parameters to calculate the intervals for
 93 | #' @param ... Other arguments
 94 | #'
 95 | #' @method posterior_interval optim_fit
 96 | #' @importFrom rstantools posterior_interval
 97 | #' @export
 98 | #' @export posterior_interval
 99 | #'
100 | posterior_interval.optim_fit <- function(object,
101 |                                          prob = 0.9,
102 |                                          pars = c("pi", "theta"),
103 |                                          ...) {
104 |   stop("Can't calculate posterior intervals for a model fit using",
105 |        " optimisation.",
106 |        call. = FALSE)
107 | }
108 | 


--------------------------------------------------------------------------------
/R/posterior_predict.R:
--------------------------------------------------------------------------------
 1 | #' Draw from the posterior predictive distribution
 2 | #'
 3 | #' @param object A `rater_fit` object.
 4 | #' @param new_data New data for the model to be fit to. The must be in the form
 5 | #'   used in `rater()` except without the 'rating' column.
 6 | #' @param seed An optional random seed to use.
 7 | #' @param ... Other arguments.
 8 | #'
 9 | #' @return The passed `new_data` augmented with a column 'z' containing the
10 | #'   latent class of each item and 'rating' containing the simulated rating.
11 | #'
12 | #' @details The number of raters implied by the entries in the rater column
13 | #'   must match the number of raters in the fitted model.
14 | #'
15 | #' @examples
16 | #'
17 | #' \donttest{
18 | #'
19 | #' fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
20 | #' new_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
21 | #'
22 | #' predictions <- posterior_predict(fit, new_data)
23 | #' predictions
24 | #'
25 | #' }
26 | #'
27 | #' @aliases posterior_predict
28 | #' @method posterior_predict rater_fit
29 | #' @importFrom rstantools posterior_predict
30 | #' @export
31 | #' @export posterior_predict
32 | #'
33 | posterior_predict.rater_fit <- function(object, new_data, seed = NULL, ...) {
34 | 
35 |   if (!is.null(seed)) {
36 |     set.seed(seed)
37 |   }
38 | 
39 |   fit <- object
40 | 
41 |   new_data <- as.data.frame(new_data)
42 |   col_names <- colnames(new_data)
43 |   if (ncol(new_data) != 2 || !all(c("item", "rater") %in% col_names)) {
44 |     stop("`new_data` must have two columns 'item' and 'rater'", call. = FALSE)
45 |   }
46 | 
47 |   if (!all(new_data$rater %in% 1:fit$stan_data$J)) {
48 |     stop("The number of raters in the fitted and new data must match",
49 |          call. = FALSE)
50 |   }
51 | 
52 |   pi <- point_estimate(fit, pars = "pi")$pi
53 |   theta <- point_estimate(fit, pars = "theta")$theta
54 |   n <- nrow(new_data)
55 |   K <- length(pi)
56 |   I <- max(new_data$item)
57 | 
58 |   item_z <- sample(1:K, size = I, replace = TRUE, prob = pi)
59 |   z <- item_z[new_data$item]
60 | 
61 |   ratings <- numeric(n)
62 |   for (i in seq_len(n)) {
63 |     j <- new_data$rater[[i]]
64 |     ratings[[i]] <- sample(1:K, 1, prob = theta[j, z[[i]], ])
65 |   }
66 | 
67 |   pred <- cbind(new_data, z = z, ratings = ratings)
68 |   pred
69 | }
70 | 


--------------------------------------------------------------------------------
/R/posterior_samples.R:
--------------------------------------------------------------------------------
 1 | #' Extract posterior samples from a rater fit object
 2 | #'
 3 | #' @param fit A rater fit object.
 4 | #' @param pars A character vector of parameter names to return. By default
 5 | #'   `c("pi", "theta")`.
 6 | #'
 7 | #' @return A named list of the posterior samples for each parameters. For each
 8 | #'   parameter the samples are in the form returned by [rstan::extract()].
 9 | #'
10 | #' @details Posterior samples can only be returned for models fitting using
11 | #'   MCMC not optimisation. In addition, posterior samples cannot be returned
12 | #'   for the latent class due to the marginalisation technique used internally.
13 | #'
14 | #'   For the class conditional model the 'full' theta parameterisation (i.e.
15 | #'   appearing to have the same number of parameters as the standard
16 | #'   Dawid-Skene model) is calculated and returned. This is designed to allow
17 | #'   easier comparison with the full Dawid-Skene model.
18 | #'
19 | #' @importFrom rstan extract
20 | #'
21 | #' @examples
22 | #'
23 | #' \donttest{
24 | #' fit <- rater(anesthesia, "dawid_skene")
25 | #'
26 | #' samples <- posterior_samples(fit)
27 | #'
28 | #' # Look at first 6 samples for each of the pi parameters
29 | #' head(samples$pi)
30 | #'
31 | #' # Look at the first 6 samples for the theta[1, 1, 1] parameter
32 | #' head(samples$theta[, 1, 1, 1])
33 | #'
34 | #' # Only get the samples for the pi parameter:
35 | #' pi_samples <- posterior_samples(fit, pars = "pi")
36 | #'
37 | #' }
38 | #'
39 | #' @export
40 | #'
41 | posterior_samples <- function(fit, pars = c("pi", "theta")) {
42 |   if (inherits(fit, "optim_fit")) {
43 |     stop("Cannot return draws from an optimisaton fit", call. = FALSE)
44 |   }
45 | 
46 |   samples <- list()
47 |   for (par in pars) {
48 |     par <- match.arg(par, c("pi", "theta", "z"))
49 |     samples <- switch(par,
50 |       "pi"    = c(samples, pi = list(rstan::extract(get_samples(fit))$pi)),
51 |       "theta" = {
52 |         raw_theta <- rstan::extract(get_samples(fit))$theta
53 |         if (inherits(fit$model, "hier_dawid_skene")) {
54 |           beta <- rstan::extract(get_samples(fit))$beta
55 |           N <- dim(beta)[[1]]
56 |           J <- fit$stan_data$J
57 |           K <- fit$stan_data$K
58 |           full_theta <- array(dim = c(N, J, K, K))
59 |           for (i in seq_len(N)) {
60 |             for (j in seq_len(J)) {
61 |               for (k in seq_len(K))
62 |                 full_theta[i, j, k, ] <- softmax(beta[i, j, k, ])
63 |             }
64 |           }
65 |         } else if (inherits(fit$model, "class_conditional_dawid_skene")) {
66 |           N <- dim(raw_theta)[[1]]
67 |           J <- fit$stan_data$J
68 |           K <- fit$stan_data$K
69 |           full_theta <- array(dim = c(N, J, K, K))
70 |           for (i in seq_len(N)) {
71 |             full_theta[i, , , ] <- unspool_cc_theta(raw_theta[i, , ])
72 |           }
73 |         } else {
74 |           # Standard Dawid-Skene model.
75 |           full_theta <- raw_theta
76 |         }
77 |         c(samples, theta = list(full_theta))
78 |       },
79 |       "z"     = stop("Cannot return draws for marginalised discrete parameter",
80 |                      call. = FALSE),
81 |       stop("Invalid pars argument", call. = FALSE)
82 |     )
83 |   }
84 | 
85 |   samples
86 | }
87 | 


--------------------------------------------------------------------------------
/R/rater-package.R:
--------------------------------------------------------------------------------
 1 | #' The 'rater' package.
 2 | #'
 3 | #' @description  Fit statistical models based on the Dawid-Skene model to repeated
 4 | #'  categorical rating data. Full Bayesian inference for these models is
 5 | #'  supported through the Stan modelling language. rater also allows the user to
 6 | #'  extract and plot key parameters of these models.
 7 | #'
 8 | #' @docType package
 9 | #' @name rater-package
10 | #' @useDynLib rater, .registration = TRUE
11 | #' @import methods
12 | #' @import Rcpp
13 | #' @importFrom RcppParallel CxxFlags RcppParallelLibs
14 | #' @importFrom rstan sampling
15 | #' @aliases rater-package
16 | #' @references
17 | #' Stan Development Team (2018). RStan: the R interface to Stan. R package version 2.18.2. http://mc-stan.org
18 | #'
19 | NULL
20 | 


--------------------------------------------------------------------------------
/R/rater_fit_class.R:
--------------------------------------------------------------------------------
  1 | #' Make an MCMC rater fit object
  2 | #'
  3 | #' @param model A rater model; an object of class `rater_model`.
  4 | #' @param samples A stanfit object containing posterior samples.
  5 | #' @param stan_data The data used to fit the model in the form passed to Stan.
  6 | #' @param data_format The format of the data used to fit the model.
  7 | #'
  8 | #' @return An object of class `c("mcmc_fit", "rater_fit")`
  9 | #'
 10 | #' @noRd
 11 | #'
 12 | new_mcmc_fit <- function(model, samples, stan_data, data_format) {
 13 |   new <- list(model = model,
 14 |               samples = samples,
 15 |               stan_data = stan_data,
 16 |               data_format = data_format)
 17 |   class(new) <- c("mcmc_fit", "rater_fit")
 18 |   new
 19 | }
 20 | 
 21 | #' Make an optimisation rater fit object
 22 | #'
 23 | #' @param model A rater model; an object of class `rater_model`.
 24 | #' @param estimates A stanfit object containing parameter estimates.
 25 | #' @param stan_data The data used to fit the model in the form passed to Stan.
 26 | #' @param data_format The format of the data used to fit the model.
 27 | #'
 28 | #' @return An object of class `c("optim_fit", "rater_fit")`
 29 | #'
 30 | #' @noRd
 31 | #'
 32 | new_optim_fit <- function(model, estimates, stan_data, data_format) {
 33 |   new <- list(model = model,
 34 |               estimates = estimates,
 35 |               stan_data = stan_data,
 36 |               data_format = data_format)
 37 |   class(new) <- c("optim_fit", "rater_fit")
 38 |   new
 39 | }
 40 | 
 41 | #' Print a `mcmc_fit` object
 42 | #'
 43 | #' @param x An object of class `mcmc_fit`.
 44 | #' @param ... Other arguments.
 45 | #'
 46 | #' @examples
 47 | #' \donttest{
 48 | #'
 49 | #' # Suppress sampling output.
 50 | #' mcmc_fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
 51 | #' print(mcmc_fit)
 52 | #'
 53 | #' }
 54 | #'
 55 | #' @export
 56 | #'
 57 | # nocov start
 58 | print.mcmc_fit <- function(x, ...) {
 59 |   cat(get_name(get_model(x)), "with MCMC draws.\n")
 60 | }
 61 | # nocov end
 62 | 
 63 | #' Print a `optim_fit` object
 64 | #'
 65 | #' @param x An object of class `optim_fit`.
 66 | #' @param ... Other arguments.
 67 | #'
 68 | #' @examples
 69 | #' \donttest{
 70 | #'
 71 | #' optim_fit <- rater(anesthesia, "dawid_skene", method = "optim")
 72 | #' print(optim_fit)
 73 | #'
 74 | #' }
 75 | #'
 76 | #' @export
 77 | #'
 78 | # nocov start
 79 | print.optim_fit <- function(x, ...) {
 80 |   cat(get_name(get_model(x)), "with MAP estimates.\n")
 81 | }
 82 | # nocov end
 83 | 
 84 | #' Plot a `rater_fit` object
 85 | #'
 86 | #' @param x An object of class `rater_fit`.
 87 | #' @param pars A length one character vector specifying the parameter to plot.
 88 | #'   By default `"theta"`.
 89 | #' @param prob The coverage of the credible intervals shown in the `"pi"` plot.
 90 | #'   If not plotting pi this argument will be ignored. By default `0.9`.
 91 | #' @param rater_index The indexes of the raters shown in the `"theta` plot.
 92 | #'   If not plotting theta this argument will be ignored. By default `NULL`
 93 | #'   which means that all raters will be plotted.
 94 | #' @param item_index The indexes of the items shown in the class probabilities
 95 | #'   plot. If not plotting the class probabilities this argument will be
 96 | #'   ignored. By default `NULL` which means that all items will be plotted.
 97 | #'   This argument is particularly useful to focus the subset of items with
 98 | #'   substantial uncertainty in their class assignments.
 99 | #' @param theta_plot_type The type of plot of the "theta" parameter. Can be
100 | #'   either `"matrix"` or `"points"`. If `"matrix"` (the default) the plot
101 | #'   will show the point estimates of the individual rater error matrices,
102 | #'   visualised as tile plots. If `"points"`, the elements of the theta
103 | #'   parameter will be displayed as points, with associated credible intervals.
104 | #'   Overall, the `"matrix"` type is likely more intuitive, but the `"points"`
105 | #'   type can also visualise the uncertainty in the parameter estimates.
106 | #' @param ... Other arguments.
107 | #'
108 | #' @return A ggplot2 object.
109 | #'
110 | #' @details The use of `pars` to refer to only one parameter is for backwards
111 | #'  compatibility and consistency with the rest of the interface.
112 | #'
113 | #' @examples
114 | #'
115 | #' \donttest{
116 | #' fit <- rater(anesthesia, "dawid_skene")
117 | #'
118 | #' # By default will just plot the theta plot
119 | #' plot(fit)
120 | #'
121 | #' # Select which parameter to plot.
122 | #' plot(fit, pars = "pi")
123 | #'
124 | #' # Plot the theta parameter for rater 1, showing uncertainty.
125 | #' plot(fit, pars = "theta", theta_plot_type = "points", rater_index = 1)
126 | #'
127 | #' }
128 | #'
129 | #' @export
130 | #'
131 | plot.rater_fit <- function(x,
132 |                            pars = "theta",
133 |                            prob = 0.9,
134 |                            rater_index = NULL,
135 |                            item_index = NULL,
136 |                            theta_plot_type = "matrix",
137 |                            ...) {
138 | 
139 |   if (length(pars) > 1 || !is.character(pars)) {
140 |     stop("`pars` must be a length 1 character vector.", call. = FALSE)
141 |   }
142 | 
143 |   if (inherits(x, "optim_fit") && theta_plot_type == "points") {
144 |     stop("`'points'` plot type is not supported for models fit using",
145 |          "optimisation, use `theta_plot_type = 'matrix' instead.",
146 |          call. = FALSE)
147 |   }
148 | 
149 |   which <- rater_index
150 |   plot_names <- c("theta", "raters",
151 |                   "pi", "prevalence",
152 |                   "class_probabilities", "latent_class")
153 | 
154 |   par <- match.arg(pars, plot_names)
155 |   theta_type <- match.arg(theta_plot_type, c("matrix", "points"))
156 | 
157 |   if (par %in% c("theta", "raters")) {
158 |     par <- paste0(par, "_", theta_type)
159 |   }
160 | 
161 |   plot <- switch(par,
162 |     "theta_matrix" = plot_theta(x, which = which),
163 |     "raters_matrix" = plot_theta(x, which = which),
164 |     "theta_points" = plot_theta_points(x, prob = prob, which = which),
165 |     "raters_points" = plot_theta_points(x, prob = prob, which = which),
166 |     "class_probabilities" = plot_class_probabilities(x,
167 |                                                      item_index = item_index),
168 |     "latent_class" = plot_class_probabilities(x, item_index = item_index),
169 |     # Luckily "p" will fall through correctly.
170 |     "pi" = plot_pi(x, prob = prob),
171 |     "prevalence" = plot_pi(x, prob = prob),
172 |     "z" = stop("Cannot plot z directly.", call. = FALSE),
173 |     stop("Invalid pars argument", call. = FALSE)
174 |   )
175 | 
176 |   plot
177 | }
178 | 
179 | #' Summarise a `mcmc_fit` object
180 | #'
181 | #' @param object An object of class `mcmc_fit`.
182 | #' @param n_pars The number of pi/theta parameters and z 'items' to display.
183 | #' @param ... Other arguments passed to function.
184 | #'
185 | #' @details For the class conditional model the 'full' theta parameterisation
186 | #'   (i.e. appearing to have the same number of parameters as the standard
187 | #'   Dawid-Skene model) is calculated and returned. This is designed to allow
188 | #'   easier comparison with the full Dawid-Skene model.
189 | #'
190 | #' @examples
191 | #' \donttest{
192 | #'
193 | #' fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
194 | #'
195 | #' summary(fit)
196 | #'
197 | #' }
198 | #'
199 | #' @method summary mcmc_fit
200 | #'
201 | #' @importFrom utils head
202 | #'
203 | #' @export
204 | #'
205 | summary.mcmc_fit <- function(object, n_pars = 8, ...) {
206 |   fit <- object
207 | 
208 |   # Prepare pi.
209 |   pi_est <- pi_to_long_format(pi_point_estimate(fit))
210 |   colnames(pi_est) <- "mean"
211 |   pi_interval <- posterior_interval(fit, pars = "pi")
212 |   pi_mcmc_diagnostics <- mcmc_diagnostics(fit, pars = "pi")
213 |   pi <- cbind(pi_est, pi_interval, pi_mcmc_diagnostics)
214 | 
215 |   pars <- pi
216 | 
217 |   # Prepare theta.
218 |   theta_est <- theta_to_long_format(theta_point_estimate(fit))
219 |   colnames(theta_est) <- "mean"
220 |   theta_interval <- posterior_interval(fit, pars = "theta")
221 |   theta_mcmc_diagnostics <- mcmc_diagnostics(fit, pars = "theta")
222 |   theta <- cbind(theta_est, theta_interval, theta_mcmc_diagnostics)
223 | 
224 |   pars <- rbind(pi, theta)
225 | 
226 |   # Prepare z.
227 |   class_probs <- class_probabilities(fit)
228 |   colnames(class_probs) <- sprintf("Pr(z = %s)", 1:ncol(class_probs))
229 |   z <- z_to_long_format(apply(class_probs, 1, which.max))
230 |   colnames(z) <- "MAP"
231 |   z_out <- cbind(z, class_probs)
232 | 
233 |   # Do the actual printing:
234 | 
235 |   cat("Model:\n")
236 |   print(get_model(fit))
237 | 
238 |   cat("\nFitting method: MCMC\n")
239 | 
240 |   cat("\npi/theta samples:\n")
241 |   print(round(utils::head(pars, n_pars), 2))
242 |   # pars is a matrix where each row is a parametery thing.
243 |   if (nrow(pars) > n_pars) {
244 |     n_remaining <- nrow(pars) - n_pars
245 |     cat("# ... with", n_remaining, "more rows\n")
246 |   }
247 | 
248 |   cat("\nz:\n")
249 |   print(round(head(z_out, n_pars), 2))
250 |   n_remaining_z <- nrow(z) - n_pars
251 |   cat("# ... with", n_remaining_z, "more items\n")
252 | 
253 | }
254 | 
255 | #' Summarise an `optim_fit` object
256 | #'
257 | #' @param object An object of class `optim_fit`.
258 | #' @param n_pars The number of pi/theta parameters and z 'items' to display.
259 | #' @param ... Other arguments passed to function.
260 | #'
261 | #' @details For the class conditional model the 'full' theta parameterisation
262 | #'   (i.e. appearing to have the same number of parameters as the standard
263 | #'   Dawid-Skene model) is calculated and returned. This is designed to allow
264 | #'   easier comparison with the full Dawid-Skene model.
265 | #'
266 | #' @examples
267 | #' \donttest{
268 | #'
269 | #' fit <- rater(anesthesia, "dawid_skene", method = "optim")
270 | #'
271 | #' summary(fit)
272 | #'
273 | #' }
274 | #'
275 | #' @method summary optim_fit
276 | #'
277 | #' @importFrom utils head
278 | #'
279 | #' @export
280 | #'
281 | summary.optim_fit <- function(object, n_pars = 8, ...) {
282 |   x <- object
283 |   fit <- object
284 | 
285 |   # Prepare pi.
286 |   pi <- pi_to_long_format(pi_point_estimate(fit))
287 |   colnames(pi) <- "mean"
288 | 
289 |   pars <- pi
290 | 
291 |   # Prepare theta.
292 |   theta <- theta_to_long_format(theta_point_estimate(fit))
293 |   colnames(theta) <- "mean"
294 |   pars <- rbind(pi, theta)
295 | 
296 |   # Prepare z.
297 |   class_probs <- class_probabilities(fit)
298 |   colnames(class_probs) <- sprintf("Pr(z = %s)", 1:ncol(class_probs))
299 |   z <- z_to_long_format(apply(class_probs, 1, which.max))
300 |   colnames(z) <- "MAP"
301 |   z_out <- cbind(z, class_probs)
302 | 
303 |   # Do the actual printing:
304 | 
305 |   cat("Model:\n")
306 |   print(get_model(fit))
307 | 
308 |   cat("\nFitting method: Optimisation\n")
309 | 
310 |   cat("\npi/theta estimates:\n")
311 | 
312 |   print(round(head(pars, n_pars), 2))
313 |   # pars is a *list*
314 |   if (length(pars) > n_pars) {
315 |     n_remaining <- length(pars) - n_pars
316 |     cat("# ... with", n_remaining, "more rows\n")
317 |   }
318 | 
319 |   cat("\nz:\n")
320 |   print(round(utils::head(z_out, n_pars), 2))
321 |   n_remaining_z <- nrow(z) - n_pars
322 |   cat("# ... with", n_remaining_z, "more items\n")
323 | 
324 |   cat("\n")
325 |   cat(paste0("Log probability: ", round(x$estimates$value, 4), "\n"))
326 |   cat(paste0("Fit converged: ", as.logical(x$estimates$return_code - 1), "\n"))
327 | }
328 | 
329 | #' Convert a rater_fit object to a {coda} `mcmc.list` object.
330 | #'
331 | #' @param fit A rater_fit object.
332 | #'
333 | #' @return A {coda} mcmc.list object.
334 | #'
335 | #' @importFrom rstan As.mcmc.list
336 | #'
337 | #' @examples
338 | #' \donttest{
339 | #'
340 | #' # Fit a model using MCMC (the default).
341 | #' mcmc_fit <- rater(anesthesia, "dawid_skene")
342 | #'
343 | #' # Convert it to an mcmc.list
344 | #' rater_mcmc_list <- as_mcmc.list(mcmc_fit)
345 | #'
346 | #' }
347 | #'
348 | #' @export
349 | #'
350 | as_mcmc.list <- function(fit) {
351 | 
352 |   if (!inherits(fit, "rater_fit")) {
353 |     stop("`as_mcmc.list` must be passed a rater fit object.", call. = FALSE)
354 |   }
355 | 
356 |   if (inherits(fit, "optim_fit")) {
357 |     stop("Cannot convert a optimisation fit to a mcmc.list object",
358 |          call. = FALSE)
359 |   }
360 | 
361 |   # We must have a mcmc_fit, rater_fit!
362 |   rstan::As.mcmc.list(fit$samples)
363 | }
364 | 
365 | #' Provide a summary of the priors specified in a `rater_fit` object.
366 | #'
367 | #' @param object A `rater_fit` object.
368 | #' @param ... Other arguments.
369 | #'
370 | #' @examples
371 | #' \donttest{
372 | #' # Fit a model using MCMC (the default).
373 | #' fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
374 | #'
375 | #' # Summarise the priors (and model) specified in the fit.
376 | #' prior_summary(fit)
377 | #'
378 | #' }
379 | #'
380 | #' @aliases prior_summary
381 | #' @method prior_summary rater_fit
382 | #' @importFrom rstantools prior_summary
383 | #' @export
384 | #' @export prior_summary
385 | #'
386 | prior_summary.rater_fit <- function(object, ...) {
387 |   get_model(object)
388 | }
389 | 
390 | #' Get the underlying `stanfit` object from a `rater_fit` object.
391 | #'
392 | #' @param fit A `rater_fit` object.
393 | #'
394 | #' @return A `stanfit` object from rstan.
395 | #'
396 | #' @examples
397 | #'
398 | #' \donttest{
399 | #' fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
400 | #'
401 | #' stan_fit <- get_stanfit(fit)
402 | #' stan_fit
403 | #'
404 | #' }
405 | #'
406 | #' @export
407 | #'
408 | get_stanfit <- function(fit) {
409 | 
410 |   if (!inherits(fit, "rater_fit")) {
411 |     stop("`fit` must be rater_fit object.", call. = FALSE)
412 |   }
413 | 
414 |   if (inherits(fit, "optim_fit")) {
415 |     stan_fit <- fit$estimates
416 |   } else {
417 |     stan_fit <- fit$samples
418 |   }
419 | 
420 |   stan_fit
421 | }
422 | 
423 | is.mcmc_fit <- function(x) {
424 |   inherits(x, "mcmc_fit")
425 | }
426 | 
427 | is.optim_fit <- function(x) {
428 |   inherits(x, "optim_fit")
429 | }
430 | 
431 | is.rater_fit <- function(x) {
432 |   inherits(x, "rater_fit")
433 | }
434 | 
435 | get_model <- function(f) {
436 |   f$model
437 | }
438 | 
439 | #' Get the posterior samples from a rater mcmc fit object
440 | #'
441 | #' @param fit A rater `mcmc_fit` object.
442 | #'
443 | #' @noRd
444 | #'
445 | get_samples <- function(fit) {
446 |   fit$samples
447 | }
448 | 
449 | get_estimates <- function(f) {
450 |   f$estimates
451 | }
452 | 
453 | 
454 | # bit of a hack reusing get_data - should it be generic?
455 | 


--------------------------------------------------------------------------------
/R/rater_model_class.R:
--------------------------------------------------------------------------------
 1 | #' Print a `rater_model` object.
 2 | #'
 3 | #' @param x A `rater_model` object.
 4 | #' @param ... Other arguments
 5 | #'
 6 | #' @examples
 7 | #' mod <- dawid_skene()
 8 | #' print(mod)
 9 | #'
10 | #' @export
11 | #'
12 | print.rater_model <- function(x, ...) {
13 |   cat(get_name(x), "\n\n")
14 |   pars <- get_parameters(x)
15 |   cat("Prior parameters:\n\n")
16 |   for (i in 1:length(pars)) {
17 |     cat(paste0(names(pars)[[i]], ":"))
18 |     if (!is.null(pars[[i]])) {
19 |       cat("\n \n")
20 |       print(pars[[i]])
21 |       cat("\n")
22 |     } else {
23 |       cat(" default\n")
24 |     }
25 |   }
26 | }
27 | 
28 | #' Summarise a `rater_model`.
29 | #'
30 | #' @param object A `rater_model` object.
31 | #' @param ... Other arguments.
32 | #'
33 | #' @examples
34 | #' mod <- dawid_skene()
35 | #' summary(mod)
36 | #'
37 | #' @method summary rater_model
38 | #'
39 | #' @export
40 | #'
41 | summary.rater_model <- function(object, ...) {
42 |   cat(get_name(object))
43 | }
44 | 
45 | is.dawid_skene <- function(model) {
46 |   inherits(model, "dawid_skene")
47 | }
48 | 
49 | is.hier_dawid_skene <- function(m) {
50 |   inherits(m, "hier_dawid_skene")
51 | }
52 | 
53 | is.rater_model <- function(m) {
54 |   inherits(m, "rater_model")
55 | }
56 | 
57 | is.class_conditional_dawid_skene <- function(m) {
58 |   inherits(m, "class_conditional_dawid_skene")
59 | }
60 | 
61 | #' Gets the long name of a model
62 | #'
63 | #' @param m Object of type `rater_model`.
64 | #'
65 | #' @noRd
66 | #'
67 | get_name <- function(m) {
68 |   m$name
69 | }
70 | 
71 | #' Gets the stan file name of a model
72 | #'
73 | #' @param m object of type `rater_model`.
74 | #'
75 | #' @noRd
76 | #'
77 | get_file <- function(m) {
78 |   m$file
79 | }
80 | 
81 | get_K <- function(m) {
82 |   m$K
83 | }
84 | 
85 | get_parameters <- function(m) {
86 |   m$parameters
87 | }
88 | 


--------------------------------------------------------------------------------
/R/simulate.R:
--------------------------------------------------------------------------------
  1 | #' Simulate data from the Dawid-Skene model
  2 | #'
  3 | #' @param pi The pi parameter of the Dawid-Skene model.
  4 | #' @param theta The theta parameter of the Dawid-Skene model.
  5 | #' @param sim_data Data to guide the simulation. The data must be in the long
  6 | #'   data format used in `rater()` except without the 'rating' column. The data
  7 | #'   specifies:
  8 | #'   * the number of items in the data, and
  9 | #'   * which raters rate each item and how many times they do so.
 10 | #' @param seed An optional random seed to use.
 11 | #'
 12 | #' @return The passed `sim_data` augmented with columns:
 13 | #'   * `"z"` containing the latent class of each item,
 14 | #'   * `"rating"` containing the simulated ratings.
 15 | #'
 16 | #' @details The number of raters implied by the entries in the rater column
 17 | #'   must match the number of raters implied by the passed theta parameter.
 18 | #'
 19 | #'   This function can also be used to simulate from the class-conditional
 20 | #'   Dawid-Skene model by specifying theta in the required form (i.e where
 21 | #'   all off-diagonal entries of the error matrices are equal.)
 22 | #'
 23 | #' @examples
 24 | #'
 25 | #' \donttest{
 26 | #'
 27 | #' J <- 5
 28 | #' K <- 4
 29 | #' pi <- rep(1 / K, K)
 30 | #' theta <- make_theta(0.7, J, K)
 31 | #' sim_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
 32 | #'
 33 | #' simulations <- simulate_dawid_skene_model(pi, theta, sim_data)
 34 | #' simulations
 35 | #'
 36 | #' }
 37 | #'
 38 | #' @export
 39 | #'
 40 | simulate_dawid_skene_model <- function(pi, theta, sim_data, seed = NULL) {
 41 | 
 42 |   if (!is.null(seed)) {
 43 |     set.seed(seed)
 44 |   }
 45 | 
 46 |   # Check pi.
 47 |   if (!(is.numeric(pi) && isTRUE(all.equal(sum(pi), 1)))) {
 48 |     stop("`pi` must be a numeric vector that sums to 1.", call. = FALSE)
 49 |   }
 50 | 
 51 |   pi_K <- length(pi)
 52 | 
 53 |   theta_dim <- dim(theta)
 54 |   if (!length(theta_dim) == 3) {
 55 |     stop("`theta` must be a three-dimensional array.", call. = FALSE)
 56 |   }
 57 | 
 58 |   # Check theta.
 59 |   if (length(unique(theta_dim[2:3])) != 1) {
 60 |     stop("The last two dimensions of `theta` must be the same.", call. = FALSE)
 61 |   }
 62 | 
 63 |   theta_J <- theta_dim[[1]]
 64 |   theta_K <- theta_dim[[2]]
 65 | 
 66 |   for (j in seq_len(theta_J)) {
 67 |     for (k in seq_len(theta_K)) {
 68 |       if (!isTRUE(all.equal(sum(theta[j, k, ]), 1))) {
 69 |         stop("theta[", j, ", ", k, ", ] must sum to 1.", call. = FALSE)
 70 |       }
 71 |     }
 72 |   }
 73 | 
 74 |   # Check consistency of pi and theta.
 75 |   if (pi_K != theta_K) {
 76 |     stop("The number of ratings implied by pi and theta is not the same.",
 77 |          call. = FALSE)
 78 |   }
 79 | 
 80 |   # Check the simulation data.
 81 |   sim_data <- as.data.frame(sim_data)
 82 |   col_names <- colnames(sim_data)
 83 |   if (ncol(sim_data) != 2 || !all(c("item", "rater") %in% col_names)) {
 84 |     stop("`sim_data` must have two columns 'item' and 'rater'", call. = FALSE)
 85 |   }
 86 | 
 87 |   if (!all(sim_data$rater %in% seq_len(theta_J))) {
 88 |     stop("The number of raters implied by theta and implied by the simulation ",
 89 |          "data must match.", call. = FALSE)
 90 |   }
 91 | 
 92 |   # Perform the simulation.
 93 |   n <- nrow(sim_data)
 94 |   K <- pi_K
 95 |   I <- max(sim_data$item)
 96 | 
 97 |   item_z <- sample(1:K, size = I, replace = TRUE, prob = pi)
 98 |   z <- item_z[sim_data$item]
 99 | 
100 |   ratings <- numeric(n)
101 |   for (i in seq_len(n)) {
102 |     j <- sim_data$rater[[i]]
103 |     ratings[[i]] <- sample(1:K, 1, prob = theta[j, z[[i]], ])
104 |   }
105 | 
106 |   sim <- cbind(sim_data, z = z, ratings = ratings)
107 |   sim
108 | }
109 | 
110 | #' Simulate data from the hierarchical Dawid-Skene model
111 | #'
112 | #' @param pi The pi parameter of the hierarchical Dawid-Skene model.
113 | #' @param mu The mu parameter of the hierarchical Dawid-Skene model.
114 | #' @param sigma The sigma parameter of the hierarchical Dawid-Skene model.
115 | #' @param sim_data Data to guide the simulation. The data must be in the long
116 | #'   data format used in `rater()` except without the 'rating' column. The data
117 | #'   specifies:
118 | #'   * the number of items in the data, and
119 | #'   * which raters rate each item and how many times they do so.
120 | #' @param seed An optional random seed to use.
121 | #'
122 | #' @return The passed `sim_data` augmented with columns:
123 | #'   * `"z"` containing the latent class of each item,
124 | #'   * `"rating"` containing the simulated rating.
125 | #'
126 | #' @details The number of raters implied by the entries in the rater column
127 | #'   must match the number of raters implied by the passed theta parameter.
128 | #'
129 | #' @examples
130 | #'
131 | #' \donttest{
132 | #'
133 | #' J <- 5
134 | #' K <- 4
135 | #'
136 | #' pi <- rep(1 / K, K)
137 | #'
138 | #' mu <- matrix(0, nrow = K, ncol = K)
139 | #' diag(mu) <- 5
140 | #'
141 | #' sigma <- matrix(sqrt(2) / sqrt(pi), nrow = K, ncol = K)
142 | #'
143 | #' sim_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
144 | #'
145 | #' sim_result <- simulate_hier_dawid_skene_model(pi, mu, sigma, sim_data)
146 | #'
147 | #' sim_result$sim
148 | #' sim_result$theta
149 | #'
150 | #' }
151 | #'
152 | #' @importFrom stats rnorm
153 | #'
154 | #' @export
155 | #'
156 | simulate_hier_dawid_skene_model <- function(pi, mu, sigma, sim_data, seed = NULL) {
157 | 
158 |   if (!is.null(seed)) {
159 |     set.seed(seed)
160 |   }
161 | 
162 |   # Check pi.
163 |   if (!(is.numeric(pi) && isTRUE(all.equal(sum(pi), 1)))) {
164 |     stop("`pi` must be a numeric vector that sums to 1.", call. = FALSE)
165 |   }
166 | 
167 |   pi_K <- length(pi)
168 | 
169 |   # Check mu.
170 |   if (!(is.matrix(mu) && length(unique(dim(mu))) == 1)) {
171 |     stop("`mu` must be a square matrix.")
172 |   }
173 | 
174 |   mu_K <- nrow(mu)
175 | 
176 |   # Check sigma.
177 |   if (!(is.matrix(sigma) && length(unique(dim(sigma))) == 1 && all(sigma > 0))) {
178 |     stop("`sigma` must be a square matrix with all elements be greater then 0.")
179 |   }
180 | 
181 |   sigma_K <- nrow(sigma)
182 | 
183 |   # Check consistency of parameters.
184 |   if (length(unique(c(pi_K, mu_K, sigma_K))) != 1) {
185 |     stop("`pi`, `mu` and `sigma` imply different numbers of categories.",
186 |          call. = FALSE)
187 |   }
188 | 
189 |   # Check the simulation data.
190 |   sim_data <- as.data.frame(sim_data)
191 |   col_names <- colnames(sim_data)
192 |   if (ncol(sim_data) != 2 || !all(c("item", "rater") %in% col_names)) {
193 |     stop("`sim_data` must have two columns 'item' and 'rater'", call. = FALSE)
194 |   }
195 | 
196 |   # Perform the simulation.
197 |   n <- nrow(sim_data)
198 |   K <- pi_K
199 |   J <- max(sim_data$rater)
200 |   I <- max(sim_data$item)
201 | 
202 |   item_z <- sample(1:K, size = I, replace = TRUE, prob = pi)
203 |   z <- item_z[sim_data$item]
204 | 
205 |   gamma <- array(0, c(J, K, K))
206 |   for (j in seq_len(J)) {
207 |     for (k in seq_len(K)) {
208 |       for (i in seq_len(K)) {
209 |         gamma[j, k, k] <- stats::rnorm(1, mu[k, k], sigma[k, k])
210 |       }
211 |     }
212 |   }
213 | 
214 |   theta <- array(0, c(J, K, K))
215 |   for (j in seq_len(J)) {
216 |     for (k in seq_len(K)) {
217 |       theta[j, k, ] <- softmax(gamma[j, k, ])
218 |     }
219 |   }
220 | 
221 |   ratings <- numeric(n)
222 |   for (i in seq_len(n)) {
223 |     j <- sim_data$rater[[i]]
224 |     ratings[[i]] <- sample(1:K, 1, prob = theta[j, z[[i]], ])
225 |   }
226 | 
227 |   sim <- cbind(sim_data, z = z, ratings = ratings)
228 | 
229 |   out <- list(theta = theta, sim = sim)
230 |   out
231 | }
232 | 
233 | #' Make a theta parameter
234 | #'
235 | #' @param diag_values The diagonal entries of each error matrix.
236 | #' @param J The number of raters (The umber matrices in 3D array).
237 | #' @param K The number of latent classes.
238 | #'
239 | #' @return A c(J, K, K) array; the theta parameter
240 | #'
241 | #' @details The `diag_values` argument can either be a numeric vector of length
242 | #'   1 or J. If it is length J, the jth element is the diagonal values of the
243 | #'   error matrix for the jth rater. If it is length 1 all raters have the same
244 | #'   diagonal values.
245 | #'
246 | #' @examples
247 | #'
248 | #' theta <- make_theta(0.7, 5, 4)
249 | #' theta[1, , ]
250 | #'
251 | #' @export
252 | #'
253 | make_theta <- function(diag_values, J, K) {
254 | 
255 |   if (!(is.numeric(diag_values) && all(diag_values > 0 & diag_values < 1))) {
256 |     stop("`diag_values` must be a probability.", call. = FALSE)
257 |   }
258 | 
259 |   if (length(diag_values) != 1 & length(diag_values) != J) {
260 |     stop("`diag_values` must be length 1 or length `J`.", call. = FALSE)
261 |   }
262 | 
263 |   if (length(diag_values) == 1) {
264 |     diag_values <- rep(diag_values, J)
265 |   }
266 | 
267 |   theta <- array(0, dim = c(J, K, K))
268 |   for (j in 1:J) {
269 |     theta[j, ,] <- array((1 - diag_values[[j]]) / (K - 1))
270 |     for (k in 1:K) {
271 |       theta[j, k, k] <- diag_values[[j]]
272 |     }
273 |   }
274 | 
275 |   theta
276 | }
277 | 
278 | #' Produce simulation data from a 'complete' rating design
279 | #'
280 | #' @param I The number of items.
281 | #' @param J The number of raters.
282 | #' @param N The number of times each rater rates each item.
283 | #'
284 | #' @return Simulation data in the format required by
285 | #'   [simulate_dawid_skene_model()] or [simulate_hier_dawid_skene_model()].
286 | #'
287 | #' @details A 'complete' rating design is situation where every rater rates
288 | #'   each item the same number of times. In this function the number of times
289 | #'   each rater rates each item is `N`.
290 | #'
291 | #' @examples
292 | #'
293 | #' make_complete_rating_design_sim_data(100, 5, 2)
294 | #'
295 | #' @export
296 | #'
297 | make_complete_rating_design_sim_data <- function(I, J, N) {
298 |   data.frame(item = rep(1:I, each = J * N), rater = rep(1:J, I * N))
299 | }
300 | 
301 | 


--------------------------------------------------------------------------------
/R/stanmodels.R:
--------------------------------------------------------------------------------
 1 | # Generated by rstantools.  Do not edit by hand.
 2 | 
 3 | # names of stan models
 4 | stanmodels <- c("class_conditional_dawid_skene", "dawid_skene", "grouped_data", "hierarchical_dawid_skene")
 5 | 
 6 | # load each stan module
 7 | Rcpp::loadModule("stan_fit4class_conditional_dawid_skene_mod", what = TRUE)
 8 | Rcpp::loadModule("stan_fit4dawid_skene_mod", what = TRUE)
 9 | Rcpp::loadModule("stan_fit4grouped_data_mod", what = TRUE)
10 | Rcpp::loadModule("stan_fit4hierarchical_dawid_skene_mod", what = TRUE)
11 | 
12 | # instantiate each stanmodel object
13 | stanmodels <- sapply(stanmodels, function(model_name) {
14 |   # create C++ code for stan model
15 |   stan_file <- if(dir.exists("stan")) "stan" else file.path("inst", "stan")
16 |   stan_file <- file.path(stan_file, paste0(model_name, ".stan"))
17 |   stanfit <- rstan::stanc_builder(stan_file,
18 |                                   allow_undefined = TRUE,
19 |                                   obfuscate_model_name = FALSE)
20 |   stanfit$model_cpp <- list(model_cppname = stanfit$model_name,
21 |                             model_cppcode = stanfit$cppcode)
22 |   # create stanmodel object
23 |   methods::new(Class = "stanmodel",
24 |                model_name = stanfit$model_name,
25 |                model_code = stanfit$model_code,
26 |                model_cpp = stanfit$model_cpp,
27 |                mk_cppmodule = function(x) get(paste0("rstantools_model_", model_name)))
28 | })
29 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' Numerically stable log_sum_exp function
 2 | #'
 3 | #' @param x vector of real numbers
 4 | #'
 5 | #' @noRd
 6 | #'
 7 | logsumexp <- function(x) {
 8 |     y <- max(x)
 9 |     y + log(sum(exp(x - y)))
10 |   }
11 | 
12 | #' Softmax function
13 | #'
14 | #' @param x vector of real numbers
15 | #'
16 | #' @noRd
17 | #'
18 | softmax <- function(x) {
19 |   exp(x - logsumexp(x))
20 | }
21 | 
22 | pi_to_long_format <- function(par) {
23 |   K <- length(par)
24 |   out <- matrix(par, ncol = 1, nrow = K)
25 |   rownames(out) <- sprintf("pi[%s]", 1:K)
26 |   out
27 | }
28 | 
29 | theta_to_long_format <- function(par) {
30 |   J <- dim(par)[[1]]
31 |   K <- dim(par)[[2]]
32 |   n <- 1
33 |   values <- numeric(J * K * K)
34 |   names <- character(J * K * K)
35 |   for (j in 1:J) {
36 |     for (k in 1:K) {
37 |       for (i in 1:K) {
38 |         values[[n]] <- par[j, k, i]
39 |         names[[n]] <- sprintf("theta[%s, %s, %s]", j, k, i)
40 |         n <- n + 1
41 |       }
42 |     }
43 |   }
44 |   out <- matrix(values, nrow = J * K * K, ncol = 1)
45 |   rownames(out) <- names
46 |   out
47 | }
48 | 
49 | z_to_long_format <- function(par) {
50 |   I <- length(par)
51 |   out <- matrix(par, ncol = 1, nrow = I)
52 |   rownames(out) <- sprintf("z[%s]", 1:I)
53 |   out
54 | }
55 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | # The machine information is taken from the rstan startup message.
 2 | .onAttach <- function(...) {
 3 |   packageStartupMessage(
 4 |     "* The rater package uses `Stan` to fit bayesian models.\n",
 5 |     "* If you are working on a local, multicore CPU with excess RAM please call:\n",
 6 |     "* options(mc.cores = parallel::detectCores())\n",
 7 |     "* This will allow Stan to run inference on multiple cores in parallel."
 8 |   )
 9 | }
10 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | ```{r setup, include=FALSE}
 6 | knitr::opts_chunk$set(echo = TRUE, fig.path = "man/figures/README-")
 7 | set.seed(1)
 8 | ```
 9 | 
10 | # rater <img src="man/figures/rater.png" align="right" width="160" />
11 | 
12 | <!-- badges: start -->
13 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/rater)](https://cran.r-project.org/package=rater)
14 | [![R-CMD-check](https://github.com/jeffreypullin/rater/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/jeffreypullin/rater/actions/workflows/R-CMD-check.yaml)
15 | [![Coverage status](https://codecov.io/gh/jeffreypullin/rater/branch/master/graph/badge.svg)](https://app.codecov.io/gh/jeffreypullin/rater?branch=master)
16 | ![pkgdown](https://github.com/jeffreypullin/rater/workflows/pkgdown/badge.svg)
17 | 
18 | <!-- badges: end -->
19 | 
20 | **rater** provides tools for fitting and interrogating statistical models of repeated categorical rating data. The package provides a simple interface to fit a selection of these models, with arbitrary prior parameters, using MCMC and optimisation provided by [Stan](https://mc-stan.org/). A selection of functions are also provided to plot parts of these models and extract key parameters. 
21 | 
22 | ## Example usage:
23 | 
24 | ```{r fit-demo, message = FALSE, warning = FALSE, results = "hide"}
25 | library(rater)
26 | 
27 | fit <- rater(anesthesia, "dawid_skene") # Sampling output suppressed.
28 | ```
29 | 
30 | Get the posterior mean of the "pi" parameter.
31 | 
32 | ```{r extract-demo}
33 | point_estimate(fit, "pi")
34 | ```
35 | 
36 | Plot the accuracy matrices of the raters.
37 | 
38 | ```{r plot-demo}
39 | plot(fit, "raters")
40 | ```
41 | 
42 | ## Installation
43 | 
44 | **rater** requires the **rstan** package to fit models. Detailed instructions to install **rstan** can be found [here](https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started)
45 | 
46 | ### CRAN 
47 | 
48 | Install **rater** from CRAN with: 
49 | 
50 | ```{r cran-installation, eval = FALSE}
51 | install.packages("rater")
52 | ```
53 | 
54 | ### Development
55 | 
56 | To install the development version of **rater** from GitHub run:
57 | 
58 | ```{r dev-installation, eval = FALSE}
59 | # install.packages("remotes")
60 | remotes::install_github("jeffreypullin/rater")
61 | ```
62 | 
63 | #### Installation notes:
64 | 
65 | - When installing from source, i.e. when installing the development version or installing from CRAN on Linux, the **Stan** models in the package will be compiled - this will lead to an install time of few minutes. Please be patient - this compilation means that **no** compilation is required when using the package
66 | 
67 | - During compilation many warnings may be displayed in the terminal; these are harmless but impossible to suppress.
68 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # rater <img src="man/figures/rater.png" align="right" width="160" />
 3 | 
 4 | <!-- badges: start -->
 5 | 
 6 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/rater)](https://cran.r-project.org/package=rater)
 7 | [![R-CMD-check](https://github.com/jeffreypullin/rater/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/jeffreypullin/rater/actions/workflows/R-CMD-check.yaml)
 8 | [![Coverage
 9 | status](https://codecov.io/gh/jeffreypullin/rater/branch/master/graph/badge.svg)](https://app.codecov.io/gh/jeffreypullin/rater?branch=master)
10 | ![pkgdown](https://github.com/jeffreypullin/rater/workflows/pkgdown/badge.svg)
11 | 
12 | <!-- badges: end -->
13 | 
14 | **rater** provides tools for fitting and interrogating statistical
15 | models of repeated categorical rating data. The package provides a
16 | simple interface to fit a selection of these models, with arbitrary
17 | prior parameters, using MCMC and optimisation provided by
18 | [Stan](https://mc-stan.org/). A selection of functions are also provided
19 | to plot parts of these models and extract key parameters.
20 | 
21 | ## Example usage:
22 | 
23 | ``` r
24 | library(rater)
25 | 
26 | fit <- rater(anesthesia, "dawid_skene") # Sampling output suppressed.
27 | ```
28 | 
29 | Get the posterior mean of the “pi” parameter.
30 | 
31 | ``` r
32 | point_estimate(fit, "pi")
33 | ```
34 | 
35 |     ## $pi
36 |     ## [1] 0.37559632 0.40734481 0.14321934 0.07383953
37 | 
38 | Plot the accuracy matrices of the raters.
39 | 
40 | ``` r
41 | plot(fit, "raters")
42 | ```
43 | 
44 | ![](man/figures/README-plot-demo-1.png)<!-- -->
45 | 
46 | ## Installation
47 | 
48 | **rater** requires the **rstan** package to fit models. Detailed
49 | instructions to install **rstan** can be found
50 | [here](https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started)
51 | 
52 | ### CRAN
53 | 
54 | Install **rater** from CRAN with:
55 | 
56 | ``` r
57 | install.packages("rater")
58 | ```
59 | 
60 | ### Development
61 | 
62 | To install the development version of **rater** from GitHub run:
63 | 
64 | ``` r
65 | # install.packages("remotes")
66 | remotes::install_github("jeffreypullin/rater")
67 | ```
68 | 
69 | #### Installation notes:
70 | 
71 | - When installing from source, i.e. when installing the development
72 |   version or installing from CRAN on Linux, the **Stan** models in the
73 |   package will be compiled - this will lead to an install time of few
74 |   minutes. Please be patient - this compilation means that **no**
75 |   compilation is required when using the package
76 | 
77 | - During compilation many warnings may be displayed in the terminal;
78 |   these are harmless but impossible to suppress.
79 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |     patch:
10 |       default:
11 |         target: auto
12 |         threshold: 1%
13 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
1 | # Generated by rstantools.  Do not edit by hand.
2 | 
3 | #! /bin/sh
4 | "${R_HOME}/bin/Rscript" -e "rstantools::rstan_config()"
5 | 


--------------------------------------------------------------------------------
/configure.win:
--------------------------------------------------------------------------------
1 | # Generated by rstantools.  Do not edit by hand.
2 | 
3 | #! /bin/sh
4 | "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "rstantools::rstan_config()"
5 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## rater 1.3.1
 2 | 
 3 | This is a new patch release of the package that updates the stan code to be compatible with a future release of the rstan package. 
 4 | 
 5 | ## Test environments
 6 | 
 7 | * local mac OS install, R 4.2.2
 8 | * ubuntu 22.04 (on github actions), R 4.3.1
 9 | * mac OS Monterey 12 (on github actions) R 4.3.1
10 | * Microsoft Windows Server 2022 (on github actions) R 4.3.1
11 | * win-builder (devel)
12 | 
13 | ## R CMD check results
14 | 
15 | Dawid and Skene are names and not misspelled.
16 | 
17 | rater uses the rstan package which causes the two additional notes.  
18 | 
19 | 0 errors | 0 warnings | 3 notes
20 | 
21 | * checking CRAN incoming feasibility ... NOTE
22 |   Maintainer: 'Jeffrey Pullin <jeffrey.pullin@gmail.com>'
23 |   
24 | * checking installed package size ... NOTE
25 |     installed size is  5.8Mb
26 |     sub-directories of 1Mb or more:
27 |     libs   4.6Mb
28 | 
29 | * checking for GNU extensions in Makefiles ... NOTE
30 |     GNU make is a SystemRequirements.
31 | 


--------------------------------------------------------------------------------
/data/anesthesia.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jeffreypullin/rater/60fbe70c02cdec6864c892ae819ae40c357e8ad0/data/anesthesia.rda


--------------------------------------------------------------------------------
/data/caries.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jeffreypullin/rater/60fbe70c02cdec6864c892ae819ae40c357e8ad0/data/caries.rda


--------------------------------------------------------------------------------
/inst/include/stan_meta_header.hpp:
--------------------------------------------------------------------------------
1 | // Insert all #include<foo.hpp> statements here
2 | 


--------------------------------------------------------------------------------
/inst/stan/class_conditional_dawid_skene.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;               // total number of annotations
 3 |   int<lower=1> J;               // number of annotators
 4 |   int<lower=1> K;               // number of annotation categories
 5 |   int<lower=1> I;               // number of items
 6 |   array[N] int<lower=1, upper=I> ii;  // item index for annotation n
 7 |   array[N] int<lower=1, upper=J> jj;  // annotator for annotation n
 8 |   array[N] int<lower=0, upper=K> y;   // annotation for observation n
 9 |   vector<lower=0>[K] alpha;     // prior for pi
10 |   // Assume the same across raters
11 |   vector<lower=0>[K] beta_1;    // prior for theta
12 |   vector<lower=0>[K] beta_2;
13 | }
14 | 
15 | parameters {
16 |   simplex[K] pi;
17 |   matrix<lower=0, upper=1>[J, K] theta;
18 | }
19 | 
20 | transformed parameters {
21 |   array[I] vector[K] log_p_z;
22 | 
23 |   for (i in 1:I) {
24 |     log_p_z[i] = log(pi);
25 |   }
26 | 
27 |   for (n in 1:N) {
28 |     for (k in 1:K) {
29 |       if (k == y[n]) {
30 |         log_p_z[ii[n], k] = log_p_z[ii[n], k] + log(theta[jj[n], k]);
31 |       } else {
32 |         log_p_z[ii[n], k] = log_p_z[ii[n], k] + log1m(theta[jj[n], k]) - log(K - 1);
33 |       }
34 |     }
35 |   }
36 | 
37 | }
38 | 
39 | model {
40 |   // prior on pi
41 |   pi ~ dirichlet(alpha);
42 | 
43 |   for (j in 1:J) {
44 |     for (k in 1:K) {
45 |       theta[j, k] ~ beta(beta_1[k], beta_2[k]);
46 |     }
47 |   }
48 | 
49 |   for (i in 1:I) {
50 |     // log_sum_exp used for numerical stability
51 |     target += log_sum_exp(log_p_z[i]);
52 |   }
53 | }
54 | 
55 | generated quantities {
56 |   vector[I] log_lik;
57 |   for (i in 1:I) {
58 |     log_lik[i] = log_sum_exp(log_p_z[i]);
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/inst/stan/dawid_skene.stan:
--------------------------------------------------------------------------------
 1 | /* Bayesian implementation of Dawid and Skene's noisy categorical rating model.
 2 |  * This implementation requires data in a 'long' format in order to allow
 3 |  * incomplete designs. This implementation is heavily based on the implementation
 4 |  * of this model for complete designs given in the Stan Manual section TODO.
 5 |  * as well as publicly avaliable code for fitting MAP estimates via EM
 6 |  * for the same model. That code can be veiwed here:
 7 |  * This code was written before the author was aware of an implemtation of this
 8 |  * model in Paun et. al. 2018.
 9 |  */
10 | 
11 | data {
12 |   int<lower=1> N;               // total number of annotations
13 |   int<lower=1> J;               // number of annotators
14 |   int<lower=1> K;               // number of annotation categories
15 |   int<lower=1> I;               // number of items
16 |   array[N] int<lower=1, upper=I> ii;  // item index for annotation n
17 |   array[N] int<lower=1, upper=J> jj;  // annotator for annotation n
18 |   array[N] int<lower=0, upper=K> y;   // annotation for observation n
19 |   vector<lower=0>[K] alpha;     // prior for pi
20 |   array[J, K] vector<lower=0>[K] beta;   // prior for theta
21 | }
22 | 
23 | parameters {
24 |   simplex[K] pi;
25 |   array[J, K] simplex[K] theta;
26 | }
27 | 
28 | transformed parameters {
29 |   array[I] vector[K] log_p_z;
30 |   for (i in 1:I) {
31 |     log_p_z[i] = log(pi);
32 |   }
33 |   for (n in 1:N) {
34 |     for (k in 1:K) {
35 |       // Here we marginalise over the latent discrete paramter
36 |       log_p_z[ii[n], k] = log_p_z[ii[n], k] + log(theta[jj[n], k, y[n]]);
37 |     }
38 |   }
39 | }
40 | 
41 | model {
42 |   // prior on pi
43 |   pi ~ dirichlet(alpha);
44 | 
45 |   for (j in 1:J) {
46 |     for (k in 1:K) {
47 |        //prior on theta
48 |        theta[j, k] ~ dirichlet(beta[j, k]);
49 |     }
50 |   }
51 | 
52 |   for (i in 1:I) {
53 |     // log_sum_exp used for numerical stability
54 |     target += log_sum_exp(log_p_z[i]);
55 |   }
56 | }
57 | 
58 | generated quantities {
59 |   vector[I] log_lik;
60 |   for (i in 1:I) {
61 |     log_lik[i] = log_sum_exp(log_p_z[i]);
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/inst/stan/grouped_data.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;                  // number of different amounts
 3 |   int<lower=1> K;                  // number of annotation categories
 4 |   int<lower=1> J;                  // number of annotators
 5 |   array[N] real tally;                   // total of each different combination
 6 |   array[N, J] int<lower=1,upper=K> key;  //
 7 |   vector<lower=0>[K] alpha;        // prior for pi
 8 |   array[J, K] vector<lower=0>[K] beta;   // prior for theta
 9 | }
10 | 
11 | parameters {
12 |   simplex[K] pi;
13 |   array[J, K] simplex[K] theta;
14 | }
15 | 
16 | transformed parameters {
17 |   array[N] vector[K] log_p_z;
18 |   // use the prior for the prevalence
19 |   for (i in 1:N) {
20 |     log_p_z[i] = log(pi);
21 |     for (j in 1:J)
22 |       for (k in 1:K)
23 |          log_p_z[i, k] = log_p_z[i, k] + log(theta[j, k, key[i, j]]);
24 |   }
25 | }
26 | 
27 | model {
28 |   // prior on pi
29 |   pi ~ dirichlet(alpha);
30 | 
31 |   for (j in 1:J) {
32 |     for (k in 1:K) {
33 |       theta[j, k] ~ dirichlet(beta[j, k]);
34 |     }
35 |   }
36 | 
37 |   // adding repeatedly on log scale - multiplication
38 |   for (i in 1:N) {
39 |     target += tally[i] * log_sum_exp(log_p_z[i]);
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/inst/stan/hierarchical_dawid_skene.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;               // Total number of ratings.
 3 |   int<lower=1> J;               // Number of raters.
 4 |   int<lower=1> K;               // Number of rating categories.
 5 |   int<lower=1> I;               // Number of items.
 6 |   array[N] int<lower=1, upper=I> ii;  // Item index for rating n.
 7 |   array[N] int<lower=1, upper=J> jj;  // Rater for annotation n.
 8 |   array[N] int<lower=0, upper=K> y;   // Rating for observation n.
 9 |   vector<lower=0>[K] alpha;     // Prior on pi.
10 | }
11 | 
12 | parameters {
13 |   simplex[K] pi;
14 |   array[J] matrix[K, K] beta_raw;
15 |   matrix[K, K] mu;
16 |   matrix<lower=0>[K, K] sigma;
17 | }
18 | 
19 | transformed parameters {
20 |   array[J] matrix[K,K] beta;
21 |   array[I] vector[K] log_p_z;
22 |   vector[K] log_pi;
23 | 
24 |   for(j in 1:J) {
25 |     // Non centered parameterization.
26 |     beta[j] = mu + sigma .* beta_raw[j];
27 |     for(k in 1:K) {
28 |       // Log softmax
29 |       beta[j,k] = beta[j,k] - log_sum_exp(beta[j,k]);
30 |     }
31 |   }
32 | 
33 |   // Compute outside the loop for efficency.
34 |   log_pi = log(pi);
35 |   for (i in 1:I) {
36 |     log_p_z[i] = log_pi;
37 |   }
38 | 
39 |   for (n in 1:N) {
40 |     for (k in 1:K) {
41 |       log_p_z[ii[n], k] = log_p_z[ii[n], k] + beta[jj[n], k, y[n]];
42 |     }
43 |   }
44 | 
45 | }
46 | 
47 | model {
48 | 
49 |   pi ~ dirichlet(alpha);
50 |   for (k in 1:K) {
51 |     for (i in 1:K) {
52 |       if (k == i) {
53 |         mu[k, i] ~ normal(2, 1);
54 |       } else {
55 |         mu[k, i] ~ normal(0, 1);
56 |       }
57 |     }
58 |   }
59 |   to_vector(sigma) ~ normal(0, 1);
60 | 
61 |   for(j in 1:J) {
62 |     // part of the non centered parameterization
63 |     to_vector(beta_raw[j]) ~ normal(0, 1);
64 |   }
65 | 
66 |   for (i in 1:I) {
67 |     target += log_sum_exp(log_p_z[i]);
68 |   }
69 | 
70 | }
71 | 
72 | generated quantities {
73 |   vector[I] log_lik;
74 |   for (i in 1:I) {
75 |     log_lik[i] = log_sum_exp(log_p_z[i]);
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/inst/stan/include/license.stan:
--------------------------------------------------------------------------------
 1 | /*
 2 |     rater is free software: you can redistribute it and/or modify
 3 |     it under the terms of the GNU General Public License as published by
 4 |     the Free Software Foundation, either version 3 of the License, or
 5 |     (at your option) any later version.
 6 | 
 7 |     rater is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 | 
12 |     You should have received a copy of the GNU General Public License
13 |     along with rater.  If not, see <http://www.gnu.org/licenses/>.
14 | */
15 | 


--------------------------------------------------------------------------------
/man/anesthesia.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{anesthesia}
 5 | \alias{anesthesia}
 6 | \title{Anaesthetist ratings for patient suitability for surgery}
 7 | \format{
 8 | A \code{data.frame} with 315 rows and 3 columns:
 9 | \describe{
10 | \item{item}{The item index - which item is being rated}
11 | \item{rater}{The rater index - which rater is doing the rating}
12 | \item{rating}{The rating given}
13 | }
14 | }
15 | \usage{
16 | anesthesia
17 | }
18 | \description{
19 | The data consist of ratings, on a 4-point scale, made by five anaesthetists
20 | of patients' pre-operative health. The ratings were based on the
21 | anaesthetists assessments of a standard form completed for all of the
22 | patients. There are 45 patients (items) and five anaesthetists (raters) in
23 | total. The first anaesthetist assessed the forms a total of three times,
24 | spaced several weeks apart. The other anaesthetists each assessed the forms
25 | once. The data is in 'long' format.
26 | }
27 | \references{
28 | Dawid, A. P., and A. M. Skene. "Maximum Likelihood Estimation of Observer
29 | Error-Rates Using the EM Algorithm." Applied Statistics 28, no. 1 (1979): 20.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/as_mcmc.list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_fit_class.R
 3 | \name{as_mcmc.list}
 4 | \alias{as_mcmc.list}
 5 | \title{Convert a rater_fit object to a {coda} \code{mcmc.list} object.}
 6 | \usage{
 7 | as_mcmc.list(fit)
 8 | }
 9 | \arguments{
10 | \item{fit}{A rater_fit object.}
11 | }
12 | \value{
13 | A {coda} mcmc.list object.
14 | }
15 | \description{
16 | Convert a rater_fit object to a {coda} \code{mcmc.list} object.
17 | }
18 | \examples{
19 | \donttest{
20 | 
21 | # Fit a model using MCMC (the default).
22 | mcmc_fit <- rater(anesthesia, "dawid_skene")
23 | 
24 | # Convert it to an mcmc.list
25 | rater_mcmc_list <- as_mcmc.list(mcmc_fit)
26 | 
27 | }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/man/caries.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{caries}
 5 | \alias{caries}
 6 | \title{Dentist ratings of whether caries are healthy or not based on X-rays}
 7 | \format{
 8 | A \code{data.frame} with 6 columns and 32 rows.
 9 | \describe{
10 | \item{rater_1}{The rating of the dentist 1}
11 | \item{rater_2}{The rating of the dentist 2}
12 | \item{rater_3}{The rating of the dentist 3}
13 | \item{rater_4}{The rating of the dentist 4}
14 | \item{rater_5}{The rating of the dentist 5}
15 | \item{n}{The number of times the rating pattern appears in the dataset}
16 | }
17 | }
18 | \usage{
19 | caries
20 | }
21 | \description{
22 | It consists of binary ratings, made by 5 dentists, of whether a given tooth
23 | was healthy (sound) or had caries, also known as cavities. The ratings were
24 | performed using X-ray only, which was thought to be more error-prone than
25 | visual/tactile assessment of each tooth. In total 3,689 ratings were made.
26 | This data is in 'grouped' format. Each row is one of the 'pattern' with
27 | the final columns being a tally of how many times that pattern occurs in
28 | the dataset.
29 | }
30 | \references{
31 | Espeland, Mark A., and Stanley L. Handelman. “Using Latent Class Models to
32 | Characterize and Assess Relative Error in Discrete Measurements.”
33 | Biometrics 45, no. 2 (1989): 587–99.
34 | }
35 | \keyword{datasets}
36 | 


--------------------------------------------------------------------------------
/man/class_probabilities.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/point_estimate.R
 3 | \name{class_probabilities}
 4 | \alias{class_probabilities}
 5 | \alias{class_probabilities.mcmc_fit}
 6 | \alias{class_probabilities.optim_fit}
 7 | \title{Extract latent class probabilities from a rater fit object}
 8 | \usage{
 9 | class_probabilities(fit, ...)
10 | 
11 | \method{class_probabilities}{mcmc_fit}(fit, ...)
12 | 
13 | \method{class_probabilities}{optim_fit}(fit, ...)
14 | }
15 | \arguments{
16 | \item{fit}{A rater fit object.}
17 | 
18 | \item{...}{Extra arguments.}
19 | }
20 | \value{
21 | A I * K matrix where each element is the probably of item i being
22 | of class k. (I is the number of items and K the number of classes).
23 | }
24 | \description{
25 | Extract latent class probabilities from a rater fit object
26 | }
27 | \details{
28 | The latent class probabilities are obtained by marginalising out
29 | the latent class and then calculating, for each draw of pi and theta, the
30 | conditional probability of the latent class given the other parameters
31 | and the data. Averaging these conditional probabilities gives the
32 | (unconditional) latent class probabilities retuned by this function.
33 | }
34 | \examples{
35 | \donttest{
36 | 
37 | fit <- rater(anesthesia, "dawid_skene")
38 | class_probabilities(fit)
39 | 
40 | }
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/man/figures/README-plot-demo-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jeffreypullin/rater/60fbe70c02cdec6864c892ae819ae40c357e8ad0/man/figures/README-plot-demo-1.png


--------------------------------------------------------------------------------
/man/figures/rater.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jeffreypullin/rater/60fbe70c02cdec6864c892ae819ae40c357e8ad0/man/figures/rater.png


--------------------------------------------------------------------------------
/man/get_stanfit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_fit_class.R
 3 | \name{get_stanfit}
 4 | \alias{get_stanfit}
 5 | \title{Get the underlying \code{stanfit} object from a \code{rater_fit} object.}
 6 | \usage{
 7 | get_stanfit(fit)
 8 | }
 9 | \arguments{
10 | \item{fit}{A \code{rater_fit} object.}
11 | }
12 | \value{
13 | A \code{stanfit} object from rstan.
14 | }
15 | \description{
16 | Get the underlying \code{stanfit} object from a \code{rater_fit} object.
17 | }
18 | \examples{
19 | 
20 | \donttest{
21 | fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
22 | 
23 | stan_fit <- get_stanfit(fit)
24 | stan_fit
25 | 
26 | }
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/man/loo.rater_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_comparison.R
 3 | \name{loo.rater_fit}
 4 | \alias{loo.rater_fit}
 5 | \alias{loo}
 6 | \title{Compute the PSIS LOO CV - a measure of model fit - of a rater fit object.}
 7 | \usage{
 8 | \method{loo}{rater_fit}(x, ..., cores = getOption("mc.cores", 1))
 9 | }
10 | \arguments{
11 | \item{x}{A \code{rater_fit} object. All model types are currently supported
12 | except the basic Dawid-Skene model fit with grouped data.}
13 | 
14 | \item{...}{Other arguments passed.}
15 | 
16 | \item{cores}{The number of cores to use when calling the underlying
17 | functions. By default the value of the \code{mc.cores} option.}
18 | }
19 | \value{
20 | A loo object.
21 | }
22 | \description{
23 | Compute the PSIS LOO CV - a measure of model fit - of a rater fit object.
24 | }
25 | \details{
26 | This function is somewhat experimental; model comparison is always
27 | difficult and choosing between variants of the Dawid-Skene model should
28 | be largely guided by considerations of data size and what is known about
29 | the characteristics of the raters. loo is, however, one of the leading
30 | methods for Bayesian model comparison and should provide a helpful guide
31 | in many situations.
32 | 
33 | When calculating loo we always use the relative effective
34 | sample size, calculated using \code{loo::relaive_eff} to improve the estimates
35 | of the PSIS effective sample sizes and Monte Carlo error.
36 | 
37 | For further information about the details of loo and PSIS please consult
38 | the provided references.
39 | }
40 | \examples{
41 | 
42 | \donttest{
43 | fit_ds <- rater(anesthesia, "dawid_skene", verbose = FALSE, chains = 1)
44 | fit_ccds <- rater(anesthesia, "class_conditional_dawid_skene",
45 |                   verbose = FALSE, chains = 1)
46 | 
47 | loo_ds <- loo(fit_ds)
48 | loo_ccds <- loo(fit_ccds)
49 | 
50 | # To compare the loos easily we can use the loo_compare function from the
51 | # loo package:
52 | library(loo)
53 | 
54 | loo_compare(loo_ds, loo_ccds)
55 | 
56 | # The documentation of the loo package contains more information about how
57 | # the output should be interpreted.
58 | }
59 | 
60 | }
61 | \references{
62 | Vehtari, A., Gelman, A., and Gabry, J. (2017a). Practical Bayesian model
63 | evaluation using leave-one-out cross-validation and WAIC.
64 | \emph{Statistics and Computing}. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4
65 | (\href{https://link.springer.com/article/10.1007/s11222-016-9696-4}{journal version},
66 | \href{https://arxiv.org/abs/1507.04544}{preprint arXiv:1507.04544}).
67 | 
68 | Vehtari, A., Simpson, D., Gelman, A., Yao, Y., and Gabry, J. (2019).
69 | Pareto smoothed importance sampling.
70 | \href{https://arxiv.org/abs/1507.02646}{preprint arXiv:1507.02646}
71 | }
72 | 


--------------------------------------------------------------------------------
/man/make_complete_rating_design_sim_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate.R
 3 | \name{make_complete_rating_design_sim_data}
 4 | \alias{make_complete_rating_design_sim_data}
 5 | \title{Produce simulation data from a 'complete' rating design}
 6 | \usage{
 7 | make_complete_rating_design_sim_data(I, J, N)
 8 | }
 9 | \arguments{
10 | \item{I}{The number of items.}
11 | 
12 | \item{J}{The number of raters.}
13 | 
14 | \item{N}{The number of times each rater rates each item.}
15 | }
16 | \value{
17 | Simulation data in the format required by
18 | \code{\link[=simulate_dawid_skene_model]{simulate_dawid_skene_model()}} or \code{\link[=simulate_hier_dawid_skene_model]{simulate_hier_dawid_skene_model()}}.
19 | }
20 | \description{
21 | Produce simulation data from a 'complete' rating design
22 | }
23 | \details{
24 | A 'complete' rating design is situation where every rater rates
25 | each item the same number of times. In this function the number of times
26 | each rater rates each item is \code{N}.
27 | }
28 | \examples{
29 | 
30 | make_complete_rating_design_sim_data(100, 5, 2)
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/man/make_theta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate.R
 3 | \name{make_theta}
 4 | \alias{make_theta}
 5 | \title{Make a theta parameter}
 6 | \usage{
 7 | make_theta(diag_values, J, K)
 8 | }
 9 | \arguments{
10 | \item{diag_values}{The diagonal entries of each error matrix.}
11 | 
12 | \item{J}{The number of raters (The umber matrices in 3D array).}
13 | 
14 | \item{K}{The number of latent classes.}
15 | }
16 | \value{
17 | A c(J, K, K) array; the theta parameter
18 | }
19 | \description{
20 | Make a theta parameter
21 | }
22 | \details{
23 | The \code{diag_values} argument can either be a numeric vector of length
24 | 1 or J. If it is length J, the jth element is the diagonal values of the
25 | error matrix for the jth rater. If it is length 1 all raters have the same
26 | diagonal values.
27 | }
28 | \examples{
29 | 
30 | theta <- make_theta(0.7, 5, 4)
31 | theta[1, , ]
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/man/mcmc_diagnostics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mcmc_diagnostics.R
 3 | \name{mcmc_diagnostics}
 4 | \alias{mcmc_diagnostics}
 5 | \title{Retrieve MCMC convergence diagnostics for a rater fit}
 6 | \usage{
 7 | mcmc_diagnostics(fit, pars = c("pi", "theta"))
 8 | }
 9 | \arguments{
10 | \item{fit}{An rater \code{mcmc_fit} object.}
11 | 
12 | \item{pars}{A character vector of parameter names to return. By default
13 | \code{c("pi", "theta")}.}
14 | }
15 | \value{
16 | A matrix where the columns represent different diagnostics and the
17 | rows are different parameters. Currently the first column contains
18 | the Rhat statistic and the second bulk effective samples size. The
19 | rownames contain the parameter names.
20 | }
21 | \description{
22 | Retrieve MCMC convergence diagnostics for a rater fit
23 | }
24 | \details{
25 | MCMC diagnostics cannot be calculate for the z due to the
26 | marginalisation used to fit the models.
27 | 
28 | These MCMC diagnostics are intended as basic sanity check of the quality
29 | of the MCMC samples returned. Users who want more in depth diagnostics
30 | should consider using \code{\link[=as_mcmc.list]{as_mcmc.list()}} to convert the samples to a
31 | \code{\link[coda:mcmc.list]{coda::mcmc.list()}} object, or \code{\link[=get_stanfit]{get_stanfit()}} to extract the underlying
32 | stanfit object.
33 | }
34 | \examples{
35 | \donttest{
36 | 
37 | fit <- rater(anesthesia, "dawid_skene")
38 | 
39 | # Calculate the diagnostics for all parameters.
40 | mcmc_diagnostics(fit)
41 | 
42 | # Calculate the diagnostics just for the pi parameter.
43 | mcmc_diagnostics(fit, pars = "pi")
44 | 
45 | }
46 | 
47 | }
48 | \references{
49 | Aki Vehtari, Andrew Gelman, Daniel Simpson, Bob Carpenter, and
50 | Paul-Christian Bürkner (2019). Rank-normalization, folding, and
51 | localization: An improved R-hat for assessing convergence of
52 | MCMC. \emph{arXiv preprint} \code{arXiv:1903.08008}.
53 | }
54 | \seealso{
55 | \code{\link[rstan:Rhat]{rstan::Rhat()}}, \code{\link[rstan:Rhat]{rstan::ess_bulk()}} \code{\link[=as_mcmc.list]{as_mcmc.list()}},
56 | \code{\link[=get_stanfit]{get_stanfit()}}.
57 | }
58 | 


--------------------------------------------------------------------------------
/man/models.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/models.R
 3 | \name{models}
 4 | \alias{models}
 5 | \alias{dawid_skene}
 6 | \alias{hier_dawid_skene}
 7 | \alias{class_conditional_dawid_skene}
 8 | \title{Probabilistic models of repeated categorical rating}
 9 | \usage{
10 | dawid_skene(alpha = NULL, beta = NULL)
11 | 
12 | hier_dawid_skene(alpha = NULL)
13 | 
14 | class_conditional_dawid_skene(alpha = NULL, beta_1 = NULL, beta_2 = NULL)
15 | }
16 | \arguments{
17 | \item{alpha}{prior parameter for pi}
18 | 
19 | \item{beta}{prior parameter for theta. This can either be a K * K matrix, in
20 | which case it is interpreted as the prior parameter of all of the J
21 | raters, or a J by K by K array in which case it is the fully specified
22 | prior parameter for all raters. (Here K is the number of categories in the
23 | data and J is the number of raters in the data.)}
24 | 
25 | \item{beta_1}{First on diagonal prior probability parameter}
26 | 
27 | \item{beta_2}{Second on diagonal prior probability parameter for theta}
28 | }
29 | \value{
30 | a rater model object that can be passed to \code{\link[=rater]{rater()}}.
31 | }
32 | \description{
33 | Functions to set up models and change their prior
34 | parameters for use in \code{\link[=rater]{rater()}}.
35 | }
36 | \examples{
37 | # Model with default prior parameters:
38 | default_m <- dawid_skene()
39 | 
40 | # Changing alpha:
41 | set_alpha_m <- dawid_skene(alpha = c(2, 2, 2))
42 | 
43 | # Changing beta, single matrix:
44 | # (See details for how this is interpreted.)
45 | beta_mat <- matrix(1, nrow = 4, ncol = 4)
46 | diag(beta_mat) <- 4
47 | beta_mat_m <- dawid_skene()
48 | 
49 | # The above is equivalent (when the model is fit - see details) to:
50 | beta_array <- array(NA, dim = c(2, 4, 4))
51 | for (i in 1:2) {
52 |   beta_array[i, , ] <- beta_mat
53 | }
54 | beta_array_m <- dawid_skene(beta = beta_array)
55 | 
56 | # But you can also specify an array where each slice is different.
57 | # (Again, see details for how this is interpreted.)
58 | beta_array[1, , ] <- matrix(1, nrow = 4, ncol = 4)
59 | beta_array_m <- dawid_skene(beta = beta_array)
60 | 
61 | # Default:
62 | hier_dawid_skene()
63 | 
64 | # Changing alpha
65 | hier_dawid_skene(alpha = c(2, 2))
66 | 
67 | # Default:
68 | class_conditional_dawid_skene()
69 | 
70 | # Not default:
71 | class_conditional_dawid_skene(
72 |   alpha = c(2, 2),
73 |   beta_1 = c(4, 4),
74 |   beta_2 = c(2, 2)
75 | )
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/man/plot.rater_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_fit_class.R
 3 | \name{plot.rater_fit}
 4 | \alias{plot.rater_fit}
 5 | \title{Plot a \code{rater_fit} object}
 6 | \usage{
 7 | \method{plot}{rater_fit}(
 8 |   x,
 9 |   pars = "theta",
10 |   prob = 0.9,
11 |   rater_index = NULL,
12 |   item_index = NULL,
13 |   theta_plot_type = "matrix",
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{x}{An object of class \code{rater_fit}.}
19 | 
20 | \item{pars}{A length one character vector specifying the parameter to plot.
21 | By default \code{"theta"}.}
22 | 
23 | \item{prob}{The coverage of the credible intervals shown in the \code{"pi"} plot.
24 | If not plotting pi this argument will be ignored. By default \code{0.9}.}
25 | 
26 | \item{rater_index}{The indexes of the raters shown in the \verb{"theta} plot.
27 | If not plotting theta this argument will be ignored. By default \code{NULL}
28 | which means that all raters will be plotted.}
29 | 
30 | \item{item_index}{The indexes of the items shown in the class probabilities
31 | plot. If not plotting the class probabilities this argument will be
32 | ignored. By default \code{NULL} which means that all items will be plotted.
33 | This argument is particularly useful to focus the subset of items with
34 | substantial uncertainty in their class assignments.}
35 | 
36 | \item{theta_plot_type}{The type of plot of the "theta" parameter. Can be
37 | either \code{"matrix"} or \code{"points"}. If \code{"matrix"} (the default) the plot
38 | will show the point estimates of the individual rater error matrices,
39 | visualised as tile plots. If \code{"points"}, the elements of the theta
40 | parameter will be displayed as points, with associated credible intervals.
41 | Overall, the \code{"matrix"} type is likely more intuitive, but the \code{"points"}
42 | type can also visualise the uncertainty in the parameter estimates.}
43 | 
44 | \item{...}{Other arguments.}
45 | }
46 | \value{
47 | A ggplot2 object.
48 | }
49 | \description{
50 | Plot a \code{rater_fit} object
51 | }
52 | \details{
53 | The use of \code{pars} to refer to only one parameter is for backwards
54 | compatibility and consistency with the rest of the interface.
55 | }
56 | \examples{
57 | 
58 | \donttest{
59 | fit <- rater(anesthesia, "dawid_skene")
60 | 
61 | # By default will just plot the theta plot
62 | plot(fit)
63 | 
64 | # Select which parameter to plot.
65 | plot(fit, pars = "pi")
66 | 
67 | # Plot the theta parameter for rater 1, showing uncertainty.
68 | plot(fit, pars = "theta", theta_plot_type = "points", rater_index = 1)
69 | 
70 | }
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/man/point_estimate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/point_estimate.R
 3 | \name{point_estimate}
 4 | \alias{point_estimate}
 5 | \title{Extract point estimates of parameters from a fit object}
 6 | \usage{
 7 | point_estimate(fit, pars = c("pi", "theta", "z"), ...)
 8 | }
 9 | \arguments{
10 | \item{fit}{A rater fit object}
11 | 
12 | \item{pars}{A character vector of parameter names to return. By default
13 | \code{c("pi", "theta", "z")}.}
14 | 
15 | \item{...}{Extra arguments}
16 | }
17 | \value{
18 | A named list of the parameter estimates.
19 | }
20 | \description{
21 | Extract point estimates of parameters from a fit object
22 | }
23 | \details{
24 | If the passed fit object was fit using MCMC then the posterior
25 | means are returned. If it was fit through optimisation the maximum a
26 | priori (MAP) estimates are returned. The z parameter returned is the
27 | value of class probabilities which is largest. To return the full
28 | posterior distributions of the latent class use \code{class_probabilities()}.
29 | 
30 | For the class conditional model the 'full' theta parameterisation (i.e.
31 | appearing to have the same number of parameters as the standard
32 | Dawid-Skene model) is calculated and returned. This is designed to allow
33 | easier comparison with the full Dawid-Skene model.
34 | }
35 | \examples{
36 | 
37 | \donttest{
38 | # A model fit using MCMC.
39 | mcmc_fit <- rater(anesthesia, "dawid_skene")
40 | 
41 | # This will return the posterior mean (except for z)
42 | post_mean_estimate <- point_estimate(mcmc_fit)
43 | 
44 | # A model fit using optimisation.
45 | optim_fit <- rater(anesthesia, dawid_skene(), method = "optim")
46 | 
47 | # This will output MAP estimates of the parameters.
48 | map_estimate <- point_estimate(optim_fit)
49 | 
50 | }
51 | 
52 | }
53 | \seealso{
54 | \code{class_probabilities()}
55 | }
56 | 


--------------------------------------------------------------------------------
/man/posterior_interval.mcmc_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/posterior_interval.R
 3 | \name{posterior_interval.mcmc_fit}
 4 | \alias{posterior_interval.mcmc_fit}
 5 | \alias{posterior_interval}
 6 | \title{Extract posterior intervals for parameters of the model}
 7 | \usage{
 8 | \method{posterior_interval}{mcmc_fit}(object, prob = 0.9, pars = c("pi", "theta"), ...)
 9 | }
10 | \arguments{
11 | \item{object}{A rater \code{mcmc_fit} object.}
12 | 
13 | \item{prob}{A single probability. The size of the credible interval
14 | returned. By default \code{0.9}.}
15 | 
16 | \item{pars}{The parameters to calculate the intervals for}
17 | 
18 | \item{...}{Other arguments.}
19 | }
20 | \value{
21 | A matrix with 2 columns. The first column is the lower bound of
22 | of the credible interval and the second is the upper bound. Each row
23 | corresponds to one individuals parameters. The rownames are the parameter
24 | names.
25 | }
26 | \description{
27 | Extract posterior intervals for parameters of the model
28 | }
29 | \details{
30 | Posterior intervals can only be calculated for models fit with
31 | MCMC. In addition, posterior intervals are not meaningful for the latent
32 | class (and indeed cannot be calculated). The \emph{full} posterior distribution
33 | of the latent class can be extracted using \link{class_probabilities}
34 | 
35 | For the class conditional model the 'full' theta parameterisation (i.e.
36 | appearing to have the same number of parameters as the standard
37 | Dawid-Skene model) is calculated and returned. This is designed to allow
38 | easier comparison with the full Dawid-Skene model.
39 | }
40 | \examples{
41 | 
42 | \donttest{
43 | fit <- rater(anesthesia, "dawid_skene", verbose = FALSE, chains = 1)
44 | 
45 | intervals <- posterior_interval(fit)
46 | head(intervals)
47 | 
48 | }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/man/posterior_interval.optim_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/posterior_interval.R
 3 | \name{posterior_interval.optim_fit}
 4 | \alias{posterior_interval.optim_fit}
 5 | \title{Extract posterior intervals for parameters of the model}
 6 | \usage{
 7 | \method{posterior_interval}{optim_fit}(object, prob = 0.9, pars = c("pi", "theta"), ...)
 8 | }
 9 | \arguments{
10 | \item{object}{A rater optim_fit object}
11 | 
12 | \item{prob}{A probability}
13 | 
14 | \item{pars}{The parameters to calculate the intervals for}
15 | 
16 | \item{...}{Other arguments}
17 | }
18 | \description{
19 | Extract posterior intervals for parameters of the model
20 | }
21 | 


--------------------------------------------------------------------------------
/man/posterior_predict.rater_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/posterior_predict.R
 3 | \name{posterior_predict.rater_fit}
 4 | \alias{posterior_predict.rater_fit}
 5 | \alias{posterior_predict}
 6 | \title{Draw from the posterior predictive distribution}
 7 | \usage{
 8 | \method{posterior_predict}{rater_fit}(object, new_data, seed = NULL, ...)
 9 | }
10 | \arguments{
11 | \item{object}{A \code{rater_fit} object.}
12 | 
13 | \item{new_data}{New data for the model to be fit to. The must be in the form
14 | used in \code{rater()} except without the 'rating' column.}
15 | 
16 | \item{seed}{An optional random seed to use.}
17 | 
18 | \item{...}{Other arguments.}
19 | }
20 | \value{
21 | The passed \code{new_data} augmented with a column 'z' containing the
22 | latent class of each item and 'rating' containing the simulated rating.
23 | }
24 | \description{
25 | Draw from the posterior predictive distribution
26 | }
27 | \details{
28 | The number of raters implied by the entries in the rater column
29 | must match the number of raters in the fitted model.
30 | }
31 | \examples{
32 | 
33 | \donttest{
34 | 
35 | fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
36 | new_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
37 | 
38 | predictions <- posterior_predict(fit, new_data)
39 | predictions
40 | 
41 | }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/man/posterior_samples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/posterior_samples.R
 3 | \name{posterior_samples}
 4 | \alias{posterior_samples}
 5 | \title{Extract posterior samples from a rater fit object}
 6 | \usage{
 7 | posterior_samples(fit, pars = c("pi", "theta"))
 8 | }
 9 | \arguments{
10 | \item{fit}{A rater fit object.}
11 | 
12 | \item{pars}{A character vector of parameter names to return. By default
13 | \code{c("pi", "theta")}.}
14 | }
15 | \value{
16 | A named list of the posterior samples for each parameters. For each
17 | parameter the samples are in the form returned by \code{\link[rstan:stanfit-method-extract]{rstan::extract()}}.
18 | }
19 | \description{
20 | Extract posterior samples from a rater fit object
21 | }
22 | \details{
23 | Posterior samples can only be returned for models fitting using
24 | MCMC not optimisation. In addition, posterior samples cannot be returned
25 | for the latent class due to the marginalisation technique used internally.
26 | 
27 | For the class conditional model the 'full' theta parameterisation (i.e.
28 | appearing to have the same number of parameters as the standard
29 | Dawid-Skene model) is calculated and returned. This is designed to allow
30 | easier comparison with the full Dawid-Skene model.
31 | }
32 | \examples{
33 | 
34 | \donttest{
35 | fit <- rater(anesthesia, "dawid_skene")
36 | 
37 | samples <- posterior_samples(fit)
38 | 
39 | # Look at first 6 samples for each of the pi parameters
40 | head(samples$pi)
41 | 
42 | # Look at the first 6 samples for the theta[1, 1, 1] parameter
43 | head(samples$theta[, 1, 1, 1])
44 | 
45 | # Only get the samples for the pi parameter:
46 | pi_samples <- posterior_samples(fit, pars = "pi")
47 | 
48 | }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/man/print.mcmc_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_fit_class.R
 3 | \name{print.mcmc_fit}
 4 | \alias{print.mcmc_fit}
 5 | \title{Print a \code{mcmc_fit} object}
 6 | \usage{
 7 | \method{print}{mcmc_fit}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of class \code{mcmc_fit}.}
11 | 
12 | \item{...}{Other arguments.}
13 | }
14 | \description{
15 | Print a \code{mcmc_fit} object
16 | }
17 | \examples{
18 | \donttest{
19 | 
20 | # Suppress sampling output.
21 | mcmc_fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
22 | print(mcmc_fit)
23 | 
24 | }
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/man/print.optim_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_fit_class.R
 3 | \name{print.optim_fit}
 4 | \alias{print.optim_fit}
 5 | \title{Print a \code{optim_fit} object}
 6 | \usage{
 7 | \method{print}{optim_fit}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of class \code{optim_fit}.}
11 | 
12 | \item{...}{Other arguments.}
13 | }
14 | \description{
15 | Print a \code{optim_fit} object
16 | }
17 | \examples{
18 | \donttest{
19 | 
20 | optim_fit <- rater(anesthesia, "dawid_skene", method = "optim")
21 | print(optim_fit)
22 | 
23 | }
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/man/print.rater_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_model_class.R
 3 | \name{print.rater_model}
 4 | \alias{print.rater_model}
 5 | \title{Print a \code{rater_model} object.}
 6 | \usage{
 7 | \method{print}{rater_model}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{rater_model} object.}
11 | 
12 | \item{...}{Other arguments}
13 | }
14 | \description{
15 | Print a \code{rater_model} object.
16 | }
17 | \examples{
18 | mod <- dawid_skene()
19 | print(mod)
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/man/prior_summary.rater_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_fit_class.R
 3 | \name{prior_summary.rater_fit}
 4 | \alias{prior_summary.rater_fit}
 5 | \alias{prior_summary}
 6 | \title{Provide a summary of the priors specified in a \code{rater_fit} object.}
 7 | \usage{
 8 | \method{prior_summary}{rater_fit}(object, ...)
 9 | }
10 | \arguments{
11 | \item{object}{A \code{rater_fit} object.}
12 | 
13 | \item{...}{Other arguments.}
14 | }
15 | \description{
16 | Provide a summary of the priors specified in a \code{rater_fit} object.
17 | }
18 | \examples{
19 | \donttest{
20 | # Fit a model using MCMC (the default).
21 | fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
22 | 
23 | # Summarise the priors (and model) specified in the fit.
24 | prior_summary(fit)
25 | 
26 | }
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/man/rater-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater-package.R
 3 | \docType{package}
 4 | \name{rater-package}
 5 | \alias{rater-package}
 6 | \title{The 'rater' package.}
 7 | \description{
 8 | Fit statistical models based on the Dawid-Skene model to repeated
 9 | categorical rating data. Full Bayesian inference for these models is
10 | supported through the Stan modelling language. rater also allows the user to
11 | extract and plot key parameters of these models.
12 | }
13 | \references{
14 | Stan Development Team (2018). RStan: the R interface to Stan. R package version 2.18.2. http://mc-stan.org
15 | }
16 | 


--------------------------------------------------------------------------------
/man/rater.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/inference.R
 3 | \name{rater}
 4 | \alias{rater}
 5 | \title{Fit statistical models to repeated categorical rating data using Stan}
 6 | \usage{
 7 | rater(
 8 |   data,
 9 |   model,
10 |   method = "mcmc",
11 |   data_format = "long",
12 |   long_data_colnames = c(item = "item", rater = "rater", rating = "rating"),
13 |   inits = NULL,
14 |   verbose = TRUE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{data}{A 2D data object: data.frame, matrix, tibble etc. with data in
20 | either long or grouped format.}
21 | 
22 | \item{model}{Model to fit to data - must be rater_model or a character
23 | string - the name of the model. If the character string is used, the
24 | prior parameters will be set to their default values.}
25 | 
26 | \item{method}{A length 1 character vector, either \code{"mcmc"} or \code{"optim"}.
27 | This will be fitting method used by Stan. By default \code{"mcmc"}}
28 | 
29 | \item{data_format}{A length 1 character vector, \code{"long"}, \code{"wide"} and
30 | \code{"grouped"}. The format that the passed data is in. Defaults to \code{"long"}.
31 | See \verb{vignette("data-formats)} for details.}
32 | 
33 | \item{long_data_colnames}{A 3-element named character vector that specifies
34 | the names of the three required columns in the long data format. The vector
35 | must have the required names:
36 | * item: the name of the column containing the item indexes,
37 | * rater: the name of the column containing the rater indexes,
38 | * rating: the name of the column containing the ratings.
39 | By default, the names of the columns are the same as the names of the
40 | vector: \code{"item"}, \code{"rater"}, and \code{"rating"} respectively. This argument is
41 | ignored when the \code{data_format} argument is either \code{"wide"} or \code{"grouped"}.}
42 | 
43 | \item{inits}{The initialization points of the fitting algorithm}
44 | 
45 | \item{verbose}{Should \code{rater()} produce information about the progress
46 | of the chains while using the MCMC algorithm. Defaults to \code{TRUE}}
47 | 
48 | \item{...}{Extra parameters which are passed to the Stan fitting interface.}
49 | }
50 | \value{
51 | An object of class rater_fit containing the fitted parameters.
52 | }
53 | \description{
54 | This functions allows the user to fit statistical models of noisy
55 | categorical rating, based on the Dawid-Skene model, using Bayesian
56 | inference. A variety of data formats and models are supported. Inference
57 | is done using Stan, allowing models to be fit efficiently, using both
58 | optimisation and Markov Chain Monte Carlo (MCMC).
59 | }
60 | \details{
61 | The default MCMC algorithm used by Stan is No U Turn Sampling
62 | (NUTS) and the default optimisation method is LGFGS. For MCMC 4 chains
63 | are run be default with 2000 iterations in total each.
64 | }
65 | \examples{
66 | \donttest{
67 | 
68 | # Fit a model using MCMC (the default).
69 | mcmc_fit <- rater(anesthesia, "dawid_skene")
70 | 
71 | # Fit a model using optimisation.
72 | optim_fit <- rater(anesthesia, dawid_skene(), method = "optim")
73 | 
74 | # Fit a model using passing data grouped data.
75 | grouped_fit <- rater(caries, dawid_skene(), data_format = "grouped")
76 | 
77 | }
78 | 
79 | }
80 | \seealso{
81 | \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}, \code{\link[rstan:stanmodel-method-optimizing]{rstan::optimizing()}}
82 | }
83 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_comparison.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{loo_compare}
 7 | \title{Objects exported from other packages}
 8 | \keyword{internal}
 9 | \description{
10 | These objects are imported from other packages. Follow the links
11 | below to see their documentation.
12 | 
13 | \describe{
14 |   \item{loo}{\code{\link[loo]{loo_compare}}}
15 | }}
16 | 
17 | 


--------------------------------------------------------------------------------
/man/simulate_dawid_skene_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate.R
 3 | \name{simulate_dawid_skene_model}
 4 | \alias{simulate_dawid_skene_model}
 5 | \title{Simulate data from the Dawid-Skene model}
 6 | \usage{
 7 | simulate_dawid_skene_model(pi, theta, sim_data, seed = NULL)
 8 | }
 9 | \arguments{
10 | \item{pi}{The pi parameter of the Dawid-Skene model.}
11 | 
12 | \item{theta}{The theta parameter of the Dawid-Skene model.}
13 | 
14 | \item{sim_data}{Data to guide the simulation. The data must be in the long
15 | data format used in \code{rater()} except without the 'rating' column. The data
16 | specifies:
17 | \itemize{
18 | \item the number of items in the data, and
19 | \item which raters rate each item and how many times they do so.
20 | }}
21 | 
22 | \item{seed}{An optional random seed to use.}
23 | }
24 | \value{
25 | The passed \code{sim_data} augmented with columns:
26 | \itemize{
27 | \item \code{"z"} containing the latent class of each item,
28 | \item \code{"rating"} containing the simulated ratings.
29 | }
30 | }
31 | \description{
32 | Simulate data from the Dawid-Skene model
33 | }
34 | \details{
35 | The number of raters implied by the entries in the rater column
36 | must match the number of raters implied by the passed theta parameter.
37 | 
38 | This function can also be used to simulate from the class-conditional
39 | Dawid-Skene model by specifying theta in the required form (i.e where
40 | all off-diagonal entries of the error matrices are equal.)
41 | }
42 | \examples{
43 | 
44 | \donttest{
45 | 
46 | J <- 5
47 | K <- 4
48 | pi <- rep(1 / K, K)
49 | theta <- make_theta(0.7, J, K)
50 | sim_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
51 | 
52 | simulations <- simulate_dawid_skene_model(pi, theta, sim_data)
53 | simulations
54 | 
55 | }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/man/simulate_hier_dawid_skene_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate.R
 3 | \name{simulate_hier_dawid_skene_model}
 4 | \alias{simulate_hier_dawid_skene_model}
 5 | \title{Simulate data from the hierarchical Dawid-Skene model}
 6 | \usage{
 7 | simulate_hier_dawid_skene_model(pi, mu, sigma, sim_data, seed = NULL)
 8 | }
 9 | \arguments{
10 | \item{pi}{The pi parameter of the hierarchical Dawid-Skene model.}
11 | 
12 | \item{mu}{The mu parameter of the hierarchical Dawid-Skene model.}
13 | 
14 | \item{sigma}{The sigma parameter of the hierarchical Dawid-Skene model.}
15 | 
16 | \item{sim_data}{Data to guide the simulation. The data must be in the long
17 | data format used in \code{rater()} except without the 'rating' column. The data
18 | specifies:
19 | \itemize{
20 | \item the number of items in the data, and
21 | \item which raters rate each item and how many times they do so.
22 | }}
23 | 
24 | \item{seed}{An optional random seed to use.}
25 | }
26 | \value{
27 | The passed \code{sim_data} augmented with columns:
28 | \itemize{
29 | \item \code{"z"} containing the latent class of each item,
30 | \item \code{"rating"} containing the simulated rating.
31 | }
32 | }
33 | \description{
34 | Simulate data from the hierarchical Dawid-Skene model
35 | }
36 | \details{
37 | The number of raters implied by the entries in the rater column
38 | must match the number of raters implied by the passed theta parameter.
39 | }
40 | \examples{
41 | 
42 | \donttest{
43 | 
44 | J <- 5
45 | K <- 4
46 | 
47 | pi <- rep(1 / K, K)
48 | 
49 | mu <- matrix(0, nrow = K, ncol = K)
50 | diag(mu) <- 5
51 | 
52 | sigma <- matrix(sqrt(2) / sqrt(pi), nrow = K, ncol = K)
53 | 
54 | sim_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
55 | 
56 | sim_result <- simulate_hier_dawid_skene_model(pi, mu, sigma, sim_data)
57 | 
58 | sim_result$sim
59 | sim_result$theta
60 | 
61 | }
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/man/summary.mcmc_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_fit_class.R
 3 | \name{summary.mcmc_fit}
 4 | \alias{summary.mcmc_fit}
 5 | \title{Summarise a \code{mcmc_fit} object}
 6 | \usage{
 7 | \method{summary}{mcmc_fit}(object, n_pars = 8, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{An object of class \code{mcmc_fit}.}
11 | 
12 | \item{n_pars}{The number of pi/theta parameters and z 'items' to display.}
13 | 
14 | \item{...}{Other arguments passed to function.}
15 | }
16 | \description{
17 | Summarise a \code{mcmc_fit} object
18 | }
19 | \details{
20 | For the class conditional model the 'full' theta parameterisation
21 | (i.e. appearing to have the same number of parameters as the standard
22 | Dawid-Skene model) is calculated and returned. This is designed to allow
23 | easier comparison with the full Dawid-Skene model.
24 | }
25 | \examples{
26 | \donttest{
27 | 
28 | fit <- rater(anesthesia, "dawid_skene", verbose = FALSE)
29 | 
30 | summary(fit)
31 | 
32 | }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/summary.optim_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_fit_class.R
 3 | \name{summary.optim_fit}
 4 | \alias{summary.optim_fit}
 5 | \title{Summarise an \code{optim_fit} object}
 6 | \usage{
 7 | \method{summary}{optim_fit}(object, n_pars = 8, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{An object of class \code{optim_fit}.}
11 | 
12 | \item{n_pars}{The number of pi/theta parameters and z 'items' to display.}
13 | 
14 | \item{...}{Other arguments passed to function.}
15 | }
16 | \description{
17 | Summarise an \code{optim_fit} object
18 | }
19 | \details{
20 | For the class conditional model the 'full' theta parameterisation
21 | (i.e. appearing to have the same number of parameters as the standard
22 | Dawid-Skene model) is calculated and returned. This is designed to allow
23 | easier comparison with the full Dawid-Skene model.
24 | }
25 | \examples{
26 | \donttest{
27 | 
28 | fit <- rater(anesthesia, "dawid_skene", method = "optim")
29 | 
30 | summary(fit)
31 | 
32 | }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/summary.rater_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rater_model_class.R
 3 | \name{summary.rater_model}
 4 | \alias{summary.rater_model}
 5 | \title{Summarise a \code{rater_model}.}
 6 | \usage{
 7 | \method{summary}{rater_model}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{A \code{rater_model} object.}
11 | 
12 | \item{...}{Other arguments.}
13 | }
14 | \description{
15 | Summarise a \code{rater_model}.
16 | }
17 | \examples{
18 | mod <- dawid_skene()
19 | summary(mod)
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/man/waic.rater_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_comparison.R
 3 | \name{waic.rater_fit}
 4 | \alias{waic.rater_fit}
 5 | \alias{waic}
 6 | \title{Compute the WAIC - a measure of model fit - of a rater fit object.}
 7 | \usage{
 8 | \method{waic}{rater_fit}(x, ...)
 9 | }
10 | \arguments{
11 | \item{x}{A \code{rater_fit} object. All model types are currently supported
12 | except the basic Dawid-Skene model fit with grouped data.}
13 | 
14 | \item{...}{Other arguments passed.}
15 | }
16 | \value{
17 | A waic/loo object.
18 | }
19 | \description{
20 | Compute the WAIC - a measure of model fit - of a rater fit object.
21 | }
22 | \details{
23 | This function provides provides an additional method for model
24 | comparison, on top of the \code{loo()} function. In general we recommend that
25 | \code{loo()} is preferred: see the documentation of the loo package for details.
26 | Also, note the comments regarding model selection the the details section
27 | of \code{loo()}.
28 | }
29 | \examples{
30 | 
31 | \donttest{
32 | fit_ds <- rater(anesthesia, "dawid_skene", verbose = FALSE, chains = 1)
33 | fit_ccds <- rater(anesthesia, "class_conditional_dawid_skene",
34 |                   verbose = FALSE, chains = 1)
35 | 
36 | waic(fit_ds)
37 | waic(fit_ccds)
38 | }
39 | 
40 | }
41 | \references{
42 | Watanabe, S. (2010). Asymptotic equivalence of Bayes cross validation and
43 | widely application information criterion in singular learning theory.
44 | \emph{Journal of Machine Learning Research} 11, 3571-3594.
45 | 
46 | Vehtari, A., Gelman, A., and Gabry, J. (2017a). Practical Bayesian model
47 | evaluation using leave-one-out cross-validation and WAIC.
48 | \emph{Statistics and Computing}. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4
49 | (\href{https://link.springer.com/article/10.1007/s11222-016-9696-4}{journal version},
50 | \href{https://arxiv.org/abs/1507.04544}{preprint arXiv:1507.04544}).
51 | }
52 | 


--------------------------------------------------------------------------------
/man/wide_to_long.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_format.R
 3 | \name{wide_to_long}
 4 | \alias{wide_to_long}
 5 | \title{Convert wide data to the long format}
 6 | \usage{
 7 | wide_to_long(data)
 8 | }
 9 | \arguments{
10 | \item{data}{Data in a wide format. Must be 2D data object which can be
11 | converted to a data.frame}
12 | }
13 | \value{
14 | The data converted into long format. A data.frame with three columns
15 | item, rater and rating.
16 | }
17 | \description{
18 | Convert wide data to the long format
19 | }
20 | \details{
21 | Wide data refers to a way of laying out categorical rating data
22 | where each item is one row and each column represents the ratings of each
23 | rater. Elements of the data can be \code{NA}, indicating that an item wasn't
24 | rated by a rater. Wide data cannot represent the same rater rating an item
25 | multiple times.
26 | 
27 | Currently any column names of the data are ignored and the raters are
28 | labelled by their column position (1 indexed, left to right). Only numeric
29 | ratings are currently supported.
30 | }
31 | \examples{
32 | wide_data <- data.frame(dater_1 = c(3, 2, 2), rater_2 = c(4, 2, 2))
33 | wide_data
34 | 
35 | long_data <- wide_to_long(wide_data)
36 | long_data
37 | 
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/pkgdown/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | template:
 2 |   params:
 3 |     bootswatch: flatly
 4 | 
 5 | navbar:
 6 |   structure:
 7 |     left:
 8 |     - home
 9 |     - intro
10 |     - reference
11 |     - articles
12 |     - tutorials
13 |     - news
14 |     right: github
15 |   components:
16 |     home:
17 |       icon: fa-home fa-lg
18 |       href: index.html
19 |     reference:
20 |       text: Reference
21 |       href: reference/index.html
22 |     articles:
23 |       text: Articles
24 |       menu:
25 |       - text: rater workflow
26 |         href: articles/workflow.html
27 |       - text: Data formats
28 |         href: articles/data-formats.html
29 |     github:
30 |       icon: fa-lg fa-github
31 |       href: https://github.com/Voltemand/rater
32 | 


--------------------------------------------------------------------------------
/pkgdown/extra.css:
--------------------------------------------------------------------------------
 1 | /* Taken from: https://stackoverflow.com/questions/18529274/change-navbar-color-in-twitter-bootstrap */
 2 | 
 3 | .navbar-default {
 4 |     background-color: #4682b4;
 5 |     border-color: #4682b4;
 6 | }
 7 | 
 8 | .navbar-default .navbar-nav > .active > a,
 9 | .navbar-default .navbar-nav > .active > a:hover,
10 | .navbar-default .navbar-nav > .active > a:focus {
11 |     color: #4682b4;
12 |     background-color: #FFFFFF;
13 | }
14 | 
15 | .navbar-default .navbar-nav > li > a {
16 |     color: #FFFFFF;
17 | }
18 | 


--------------------------------------------------------------------------------
/rater.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
 1 | # Generated by rstantools.  Do not edit by hand.
 2 | 
 3 | STANHEADERS_SRC = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "message()" -e "cat(system.file('include', 'src', package = 'StanHeaders', mustWork = TRUE))" -e "message()" | grep "StanHeaders")
 4 | 
 5 | STANC_FLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "cat(ifelse(utils::packageVersion('rstan') >= 2.26, '-DUSE_STANC3',''))")
 6 | PKG_CPPFLAGS = -I"../inst/include" -I"$(STANHEADERS_SRC)" -DBOOST_DISABLE_ASSERTS -DEIGEN_NO_DEBUG -DBOOST_MATH_OVERFLOW_ERROR_POLICY=errno_on_error $(STANC_FLAGS) -D_HAS_AUTO_PTR_ETC=0
 7 | PKG_CXXFLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::CxxFlags()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::CxxFlags()")
 8 | PKG_LIBS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::RcppParallelLibs()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::LdFlags()")
 9 | 
10 | CXX_STD = CXX17
11 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
 1 | # Generated by rstantools.  Do not edit by hand.
 2 | 
 3 | STANHEADERS_SRC = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "message()" -e "cat(system.file('include', 'src', package = 'StanHeaders', mustWork = TRUE))" -e "message()" | grep "StanHeaders")
 4 | 
 5 | STANC_FLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "cat(ifelse(utils::packageVersion('rstan') >= 2.26, '-DUSE_STANC3',''))")
 6 | PKG_CPPFLAGS = -I"../inst/include" -I"$(STANHEADERS_SRC)" -DBOOST_DISABLE_ASSERTS -DEIGEN_NO_DEBUG -DRCPP_PARALLEL_USE_TBB=1 $(STANC_FLAGS) -D_HAS_AUTO_PTR_ETC=0
 7 | PKG_CXXFLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::CxxFlags()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::CxxFlags()")
 8 | PKG_LIBS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::RcppParallelLibs()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::LdFlags()")
 9 | 
10 | CXX_STD = CXX17
11 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <RcppEigen.h>
 5 | #include <Rcpp.h>
 6 | 
 7 | using namespace Rcpp;
 8 | 
 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
10 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
11 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
12 | #endif
13 | 
14 | 
15 | RcppExport SEXP _rcpp_module_boot_stan_fit4class_conditional_dawid_skene_mod();
16 | RcppExport SEXP _rcpp_module_boot_stan_fit4dawid_skene_mod();
17 | RcppExport SEXP _rcpp_module_boot_stan_fit4grouped_data_mod();
18 | RcppExport SEXP _rcpp_module_boot_stan_fit4hierarchical_dawid_skene_mod();
19 | 
20 | static const R_CallMethodDef CallEntries[] = {
21 |     {"_rcpp_module_boot_stan_fit4class_conditional_dawid_skene_mod", (DL_FUNC) &_rcpp_module_boot_stan_fit4class_conditional_dawid_skene_mod, 0},
22 |     {"_rcpp_module_boot_stan_fit4dawid_skene_mod", (DL_FUNC) &_rcpp_module_boot_stan_fit4dawid_skene_mod, 0},
23 |     {"_rcpp_module_boot_stan_fit4grouped_data_mod", (DL_FUNC) &_rcpp_module_boot_stan_fit4grouped_data_mod, 0},
24 |     {"_rcpp_module_boot_stan_fit4hierarchical_dawid_skene_mod", (DL_FUNC) &_rcpp_module_boot_stan_fit4hierarchical_dawid_skene_mod, 0},
25 |     {NULL, NULL, 0}
26 | };
27 | 
28 | RcppExport void R_init_rater(DllInfo *dll) {
29 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
30 |     R_useDynamicSymbols(dll, FALSE);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rater)
3 | 
4 | test_check("rater")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/model_class.md:
--------------------------------------------------------------------------------
 1 | # print works for models with default parameters
 2 | 
 3 |     Code
 4 |       print(dawid_skene())
 5 |     Output
 6 |       Bayesian Dawid and Skene Model 
 7 |       
 8 |       Prior parameters:
 9 |       
10 |       alpha: default
11 |       beta: default
12 | 
13 | ---
14 | 
15 |     Code
16 |       print(class_conditional_dawid_skene())
17 |     Output
18 |       Bayesian Class conditional Dawid and Skene Model 
19 |       
20 |       Prior parameters:
21 |       
22 |       alpha: default
23 |       beta_1: default
24 |       beta_2: default
25 | 
26 | ---
27 | 
28 |     Code
29 |       print(hier_dawid_skene())
30 |     Output
31 |       Bayesian Hierarchical Dawid and Skene Model 
32 |       
33 |       Prior parameters:
34 |       
35 |       alpha: default
36 | 
37 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/rater.md:
--------------------------------------------------------------------------------
 1 | # rater errors correctly
 2 | 
 3 |     Code
 4 |       rater(data.frame(item = 0, rater = 0, rating = 0), dawid_skene())
 5 |     Condition
 6 |       Error:
 7 |       ! 
 8 |       * Some item indexes are 0. All indexes must be in 1:I where I is the number of items.
 9 |       * Some rater indexes are 0. All indexes must be in 1:J where J is the number of raters.
10 |       * Some ratings are 0. All ratings must be in 1:K where K is the number of classes.
11 | 
12 | ---
13 | 
14 |     Code
15 |       rater(data.frame(thing = 0, n = 0), dawid_skene(), data_format = "grouped")
16 |     Condition
17 |       Error:
18 |       ! 
19 |       * All elements of the column `n` must be > 0.
20 |       * Some ratings are 0. All ratings must be in 1:K where K is the number of classes.
21 | 
22 | 


--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
 1 | #' Returns a character vector of the geoms of a ggplot2 plot
 2 | #'
 3 | #' Taken from: https://bit.ly/2B7TzEw (shortened)
 4 | #'
 5 | #' @param p a ggplot2 plot
 6 | get_geoms <- function(p) {
 7 |   sapply(p$layers, function(x) class(x$geom)[1])
 8 | }
 9 | 
10 | #' Returns the number of facets of a ggplot2 plot
11 | #'
12 | #' Taken from: https://bit.ly/2UriIB4 (shortened)
13 | #'
14 | #' @param p a ggplot2 plot
15 | get_facet_dim <- function(p) {
16 |   length(unique(ggplot2::ggplot_build(p)$data[[1]]$PANEL))
17 | }
18 | 
19 | # Taken from greta.
20 | expect_ok <- function(expr) {
21 |   expect_error(expr, NA)
22 | }
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/tests/testthat/setup.R:
--------------------------------------------------------------------------------
 1 | data("anesthesia")
 2 | 
 3 | ds_fit <- rater(anesthesia, dawid_skene(), iter = 200, chains = 1,
 4 |                 verbose = FALSE)
 5 | hds_fit <- rater(anesthesia, hier_dawid_skene(), iter = 200, chains = 1,
 6 |                  verbose = FALSE)
 7 | ccds_fit <- rater(anesthesia, class_conditional_dawid_skene(), iter = 200,
 8 |                   chains = 1, verbose = FALSE, seed = 42)
 9 | 
10 | ds_fit_optim <- rater(anesthesia, dawid_skene(), method = "optim")
11 | hds_fit_optim <- rater(anesthesia, hier_dawid_skene(), method = "optim")
12 | ccds_fit_optim <- rater(anesthesia, class_conditional_dawid_skene(),
13 |                         method = "optim")
14 | 
15 | ds_fit_grouped <- rater(caries, dawid_skene(), data_format = "grouped",
16 |                         iter = 200, chains = 1, verbose = FALSE)
17 | 
18 | ds_fit_grouped_optim <- rater(caries, dawid_skene(), method = "optim",
19 |                               data_format = "grouped")
20 | 
21 | ds_model <- dawid_skene()
22 | hds_model <- hier_dawid_skene()
23 | ccds_model <- class_conditional_dawid_skene()
24 | 
25 | default_colnames <- c(
26 |   item = "item",
27 |   rater = "rater",
28 |   rating = "rating"
29 | )
30 | 
31 | J <- 5
32 | I <- 45
33 | K <- 4
34 | 
35 | J_caries <- 5
36 | I_caries <- 3859
37 | K_caries <- 2
38 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data_format.R:
--------------------------------------------------------------------------------
 1 | test_that("wide_to_long error appropriately", {
 2 |   expect_error(
 3 |     wide_to_long(2),
 4 |     "`data` must be a data.frame or matrix."
 5 |   )
 6 |   expect_error(
 7 |     wide_to_long(data.frame("a")),
 8 |     "All columns in `data` must contain only numeric values."
 9 |   )
10 |   expect_error(
11 |     wide_to_long(data.frame(0)),
12 |     "Some ratings are 0. All ratings must be in 1:K where K is the number of classes."
13 |   )
14 | })
15 | 
16 | test_that("wide_to_long converts complete data correctly", {
17 |   wide_data <- data.frame(c(3, 2, 2), c(4, 2, 2))
18 |   long_data <- data.frame(item = c(1, 1, 2, 2, 3, 3),
19 |                           rater = c(1, 2, 1, 2, 1, 2),
20 |                           rating = c(3, 4, 2, 2, 2, 2))
21 | 
22 |   expect_equal(wide_to_long(wide_data), long_data)
23 | })
24 | 
25 | test_that("wide_to_long converts incomplete data correctly", {
26 |   wide_data <- data.frame(c(3, 2, 2, 3, 3, NA), c(4, 2, 2, NA, NA, 4))
27 |   long_data <- data.frame(item = c(1, 1, 2, 2, 3, 3, 4, 5, 6),
28 |                           rater = c(1, 2, 1, 2, 1, 2, 1, 1, 2),
29 |                           rating = c(3, 4, 2, 2, 2, 2, 3, 3, 4))
30 | 
31 |   expect_equal(wide_to_long(wide_data), long_data)
32 | })
33 | 
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mcmc_diagnostics.R:
--------------------------------------------------------------------------------
 1 | test_that("mcmc_diagnostics error appropriatly", {
 2 |   expect_error(
 3 |     mcmc_diagnostics(ds_fit_optim),
 4 |     "Cannot extract MCMC diagnositcs from a optimisation fit."
 5 |   )
 6 |   expect_error(
 7 |     mcmc_diagnostics(ds_fit, pars = "z"),
 8 |     "Cannot extract MCMC diagnostics for the latent class."
 9 |   )
10 | })
11 | 
12 | test_that("MCMC diagnostics for pi have the correct form", {
13 |   K <- 4
14 |   ds_mcmc_diags_pi <- mcmc_diagnostics(ds_fit, pars = "pi")
15 |   expect_equal(nrow(ds_mcmc_diags_pi), K)
16 |   expect_type(ds_mcmc_diags_pi, "double")
17 |   expect_equal(colnames(ds_mcmc_diags_pi), c("Rhat", "ess_bulk"))
18 | 
19 |   ccds_mcmc_diags_pi <- mcmc_diagnostics(ccds_fit, pars = "pi")
20 |   expect_equal(nrow(ccds_mcmc_diags_pi), K)
21 |   expect_type(ccds_mcmc_diags_pi, "double")
22 |   expect_equal(colnames(ccds_mcmc_diags_pi), c("Rhat", "ess_bulk"))
23 | 
24 |   hds_mcmc_diags_pi <- mcmc_diagnostics(hds_fit, pars = "pi")
25 |   expect_equal(nrow(hds_mcmc_diags_pi), K)
26 |   expect_type(hds_mcmc_diags_pi, "double")
27 |   expect_equal(colnames(hds_mcmc_diags_pi), c("Rhat", "ess_bulk"))
28 | })
29 | 
30 | test_that("MCMC diagnostics for theta have the correct form", {
31 |   K <- 4
32 |   J <- 5
33 |   ds_mcmc_diags_theta <- mcmc_diagnostics(ds_fit, pars = "theta")
34 |   expect_equal(nrow(ds_mcmc_diags_theta), J * K * K)
35 |   expect_type(ds_mcmc_diags_theta, "double")
36 |   expect_equal(colnames(ds_mcmc_diags_theta), c("Rhat", "ess_bulk"))
37 | 
38 |   ccds_mcmc_diags_theta <- mcmc_diagnostics(ccds_fit, pars = "theta")
39 |   expect_equal(nrow(ccds_mcmc_diags_theta), J * K * K)
40 |   expect_type(ccds_mcmc_diags_theta, "double")
41 |   expect_equal(colnames(ccds_mcmc_diags_theta), c("Rhat", "ess_bulk"))
42 | 
43 |   hds_mcmc_diags_theta <- mcmc_diagnostics(ccds_fit, pars = "theta")
44 |   expect_equal(nrow(hds_mcmc_diags_theta), J * K * K)
45 |   expect_type(hds_mcmc_diags_theta, "double")
46 |   expect_equal(colnames(hds_mcmc_diags_theta), c("Rhat", "ess_bulk"))
47 | })
48 | 
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-model_comparison.R:
--------------------------------------------------------------------------------
 1 | test_that("loo errors appropriately", {
 2 |   expect_error(
 3 |     loo(ds_fit_grouped),
 4 |     "loo is not supported for models fit using grouped data."
 5 |   )
 6 |   expect_error(
 7 |     loo(ds_fit_optim),
 8 |     "loo cannot be calculated for models fit using optimisation."
 9 |   )
10 | })
11 | 
12 | test_that("loo works (smoke test)", {
13 |   # Sensitive to sampling.
14 |   skip_on_cran()
15 | 
16 |   expect_warning(loo(ds_fit))
17 |   expect_warning(loo(ccds_fit))
18 |   expect_warning(loo(hds_fit))
19 | })
20 | 
21 | test_that("loo output is the right format", {
22 |   loo_res <- suppressWarnings(loo(ds_fit))
23 |   expect_s3_class(loo_res, "loo")
24 | })
25 | 
26 | test_that("loo_compare works (smoke test)", {
27 |   loo_ds <- suppressWarnings(loo(ds_fit))
28 |   loo_ccds <- suppressWarnings(loo(ccds_fit))
29 |   expect_ok(loo_compare(loo_ds, loo_ccds))
30 | })
31 | 
32 | test_that("waic errors appropriately", {
33 |   expect_error(
34 |     waic(ds_fit_grouped),
35 |     "waic is not supported for models fit using grouped data."
36 |   )
37 |   expect_error(
38 |     waic(ds_fit_optim),
39 |     "waic cannot be calculated for models fit using optimisation."
40 |   )
41 | })
42 | 
43 | test_that("waic works (smoke test)", {
44 |   # Sensitive to sampling.
45 |   skip_on_cran()
46 | 
47 |   expect_warning(waic(ds_fit))
48 |   expect_warning(waic(ccds_fit))
49 |   expect_warning(waic(hds_fit))
50 | })
51 | 
52 | test_that("waic output is the right format", {
53 |   waic_res <- suppressWarnings(waic(ds_fit))
54 |   expect_s3_class(waic_res, c("waic", "loo"))
55 | })
56 | 


--------------------------------------------------------------------------------
/tests/testthat/test-point_estimate.R:
--------------------------------------------------------------------------------
  1 | test_that("point estiamte output is named", {
  2 |   all_pars <- point_estimate(ds_fit)
  3 |   expect_named(all_pars, c("pi", "theta", "z"))
  4 |   just_pi <- point_estimate(ds_fit, pars = "pi")
  5 |   expect_named(just_pi, "pi")
  6 | })
  7 | 
  8 | test_that("validate_which error appropriatly", {
  9 |   expect_error(
 10 |     validate_which("which", 2),
 11 |     "which must be a positive length numeric vector"
 12 |   )
 13 |   expect_error(
 14 |     validate_which(numeric(0), 2),
 15 |     "which must be a positive length numeric vector"
 16 |   )
 17 |   expect_error(
 18 |     validate_which(1:9, 6),
 19 |     "All numbers in `which` must be drawn from 1:6"
 20 |   )
 21 | })
 22 | 
 23 | test_that("point_estimate output for pi has correct form", {
 24 |   K <- 4
 25 |   K_caries <- 2
 26 | 
 27 |   out <- point_estimate(ds_fit, pars = "pi")$pi
 28 |   expect_equal(length(out), K)
 29 |   expect_equal(sum(out), 1)
 30 | 
 31 |   out <- point_estimate(ds_fit_optim, pars = "pi")$pi
 32 |   expect_equal(length(out), K)
 33 |   expect_equal(sum(out), 1)
 34 | 
 35 |   out <- point_estimate(ds_fit_grouped, pars = "pi")$pi
 36 |   expect_equal(length(out), K_caries)
 37 |   expect_equal(sum(out), 1)
 38 | 
 39 |   out <- point_estimate(ds_fit_grouped_optim, pars = "pi")$pi
 40 |   expect_equal(length(out), K_caries)
 41 |   expect_equal(sum(out), 1)
 42 | 
 43 |   out <- point_estimate(ccds_fit, pars = "pi")$pi
 44 |   expect_equal(length(out), K)
 45 |   expect_equal(sum(out), 1)
 46 | 
 47 |   out <- point_estimate(ccds_fit_optim, pars = "pi")$pi
 48 |   expect_equal(length(out), K)
 49 |   expect_equal(sum(out), 1)
 50 | 
 51 |   out <- point_estimate(hds_fit, pars = "pi")$pi
 52 |   expect_equal(length(out), K)
 53 |   expect_equal(sum(out), 1)
 54 | 
 55 |   out <- point_estimate(hds_fit_optim, pars = "pi")$pi
 56 |   expect_equal(length(out), K)
 57 |   expect_equal(sum(out), 1)
 58 | })
 59 | 
 60 | test_that("point estimate output for z has the correct form", {
 61 |   K <- 4
 62 |   K_caries <- 2
 63 | 
 64 |   I <- 45
 65 |   I_caries <- 3859
 66 | 
 67 |   out <- point_estimate(ds_fit, pars = "z")$z
 68 |   expect_equal(length(out), I)
 69 |   expect_true(all(out %in% 1:K))
 70 | 
 71 |   out <- point_estimate(ds_fit_optim, pars = "z")$z
 72 |   expect_equal(length(out), I)
 73 |   expect_true(all(out %in% 1:K))
 74 | 
 75 |   out <- point_estimate(ds_fit_grouped, pars = "z")$z
 76 |   expect_equal(length(out), I_caries)
 77 |   expect_true(all(out %in%  1:K_caries))
 78 | 
 79 |   out <- point_estimate(ds_fit_grouped_optim, pars = "z")$z
 80 |   expect_equal(length(out), I_caries)
 81 |   expect_true(all(out %in%  1:K_caries))
 82 | 
 83 |   out <- point_estimate(ccds_fit, pars = "z")$z
 84 |   expect_equal(length(out), I)
 85 |   expect_true(all(out %in%  1:K))
 86 | 
 87 |   out <- point_estimate(ccds_fit_optim, pars = "z")$z
 88 |   expect_equal(length(out), I)
 89 |   expect_true(all(out %in%  1:K))
 90 | 
 91 |   out <- point_estimate(hds_fit, pars = "z")$z
 92 |   expect_equal(length(out), I)
 93 |   expect_true(all(out %in%  1:K))
 94 | 
 95 |   out <- point_estimate(hds_fit_optim, pars = "z")$z
 96 |   expect_equal(length(out), I)
 97 |   expect_true(all(out %in%  1:K))
 98 | })
 99 | 
100 | test_that("class_probabilites output has correct form", {
101 |   K <- 4
102 |   K_caries <- 2
103 | 
104 |   I <- 45
105 |   I_caries <- 3859
106 | 
107 |   out <- class_probabilities(ds_fit)
108 |   expect_equal(dim(out), c(I, K))
109 |   expect_equal(unname(rowSums(out)), rep(1, I))
110 |   expect_equal(rownames(out), as.character(1:I))
111 | 
112 |   out <- class_probabilities(ds_fit_optim)
113 |   expect_equal(dim(out), c(I, K))
114 |   expect_equal(unname(rowSums(out)), rep(1, I))
115 |   expect_equal(rownames(out), as.character(1:I))
116 | 
117 |   out <- class_probabilities(ds_fit_grouped)
118 |   expect_equal(dim(out), c(I_caries, K_caries))
119 |   expect_equal(unname(rowSums(out)), rep(1, I_caries))
120 |   expect_equal(rownames(out), as.character(1:I_caries))
121 | 
122 |   out <- class_probabilities(ds_fit_grouped_optim)
123 |   expect_equal(dim(out), c(I_caries, K_caries))
124 |   expect_equal(unname(rowSums(out)), rep(1, I_caries))
125 |   expect_equal(rownames(out), as.character(1:I_caries))
126 | 
127 |   out <- class_probabilities(ccds_fit)
128 |   expect_equal(dim(out), c(I, K))
129 |   expect_equal(unname(rowSums(out)), rep(1, I))
130 |   expect_equal(rownames(out), as.character(1:I))
131 | 
132 |   out <- class_probabilities(ccds_fit_optim)
133 |   expect_equal(dim(out), c(I, K))
134 |   expect_equal(unname(rowSums(out)), rep(1, I))
135 |   expect_equal(rownames(out), as.character(1:I))
136 | 
137 |   out <- class_probabilities(hds_fit)
138 |   expect_equal(dim(out), c(I, K))
139 |   expect_equal(unname(rowSums(out)), rep(1, I))
140 |   expect_equal(rownames(out), as.character(1:I))
141 | 
142 |   out <- class_probabilities(hds_fit_optim)
143 |   expect_equal(dim(out), c(I, K))
144 |   expect_equal(unname(rowSums(out)), rep(1, I))
145 |   expect_equal(rownames(out), as.character(1:I))
146 | })
147 | 
148 | test_that("theta point_estimate for long DS (MCMC + optimisation) is correct", {
149 |   K <- 4
150 | 
151 |   ds_mcmc_out <- point_estimate(ds_fit, pars = "theta")$theta
152 |   expect_true(is.array(ds_mcmc_out))
153 |   apply(ds_mcmc_out, 1, function(x) expect_equal(rowSums(x), rep(1, K)))
154 | 
155 |   ds_optim_out <- point_estimate(ds_fit_optim, pars = "theta")$theta
156 |   expect_equal(is.array(ds_optim_out), TRUE)
157 |   apply(ds_optim_out, 1, function(x) expect_equal(rowSums(x), rep(1, K)))
158 | })
159 | 
160 | test_that("theta point_estimate for grouped DS (MCMC + optimisation) is correct", {
161 |   K <- 2
162 | 
163 |   ds_mcmc_grouped_out <- point_estimate(ds_fit_grouped, pars = "theta")$theta
164 |   expect_true(is.array(ds_mcmc_grouped_out))
165 |   apply(ds_mcmc_grouped_out, 1, function(x) expect_equal(rowSums(x), rep(1, K)))
166 | 
167 |   ds_optim_grouped_out <- point_estimate(ds_fit_grouped_optim, pars = "theta")$theta
168 |   expect_equal(is.array(ds_optim_grouped_out), TRUE)
169 |   apply(ds_optim_grouped_out, 1, function(x) expect_equal(rowSums(x), rep(1, K)))
170 | })
171 | 
172 | test_that("theta point_estimate for CCDS(MCMC + optimsation) has correct form", {
173 |   J <- 5
174 |   K <- 4
175 | 
176 |   ccds_mcmc_out <- point_estimate(ccds_fit, pars = "theta")[[1]]
177 |   expect_true(is.array(ccds_mcmc_out))
178 |   expect_equal(dim(ccds_mcmc_out), c(J, K, K))
179 |   # Test that all the off diagonal elements are equal.
180 |   expect_equal(var(ccds_mcmc_out[1, 1, -1]), 0)
181 |   apply(ccds_mcmc_out, 1, function(x) expect_equal(rowSums(x), rep(1, K)))
182 | 
183 |   ccds_optim_out <- point_estimate(ccds_fit_optim, pars = "theta")[[1]]
184 |   expect_true(is.array(ccds_optim_out))
185 |   expect_equal(dim(ccds_optim_out), c(J, K, K))
186 |   expect_equal(var(ccds_optim_out[1, 1, -1]), 0)
187 |   apply(ccds_optim_out, 1, function(x) expect_equal(rowSums(x), rep(1, K)))
188 | })
189 | 
190 | test_that("theta point_estimate for HDS (MCMC + optimsation) has correct form", {
191 |   J <- 5
192 |   K <- 4
193 | 
194 |   hds_mcmc_out <- point_estimate(hds_fit, pars = "theta")[[1]]
195 |   expect_true(is.array(hds_mcmc_out))
196 |   expect_equal(dim(hds_mcmc_out), c(J, K, K))
197 |   apply(hds_mcmc_out, 1, function(x) expect_equal(rowSums(x), rep(1, K)))
198 | 
199 |   hds_optim_out <- point_estimate(hds_fit_optim, pars = "theta")[[1]]
200 |   expect_true(is.array(hds_optim_out))
201 |   expect_equal(dim(hds_optim_out), c(J, K, K))
202 |   apply(hds_optim_out, 1, function(x) expect_equal(rowSums(x), rep(1, K)))
203 | })
204 | 


--------------------------------------------------------------------------------
/tests/testthat/test-posterior_interval.R:
--------------------------------------------------------------------------------
 1 | test_that("posterior_interval pi has the correct form", {
 2 |   ds_pi_interval <- posterior_interval(ds_fit, pars = "pi")
 3 |   expect_equal(dim(ds_pi_interval), c(4, 2))
 4 |   expect_equal(colnames(ds_pi_interval), c("5%", "95%"))
 5 | 
 6 |   ccds_pi_interval <- posterior_interval(ccds_fit, pars = "pi")
 7 |   expect_equal(dim(ccds_pi_interval), c(4, 2))
 8 |   expect_equal(colnames(ccds_pi_interval), c("5%", "95%"))
 9 | 
10 |   hds_pi_interval <- posterior_interval(hds_fit, pars = "pi")
11 |   expect_equal(dim(hds_pi_interval), c(4, 2))
12 |   expect_equal(colnames(hds_pi_interval), c("5%", "95%"))
13 | })
14 | 
15 | test_that("Can change interval probability", {
16 |   default <- posterior_interval(ds_fit, pars = "pi")
17 | 
18 |   smaller <- posterior_interval(ds_fit, pars = "pi", prob = 0.5)
19 |   expect_lte(default[1, 1], smaller[1, 1])
20 |   expect_gte(default[1, 2], smaller[1, 2])
21 | 
22 |   larger <- posterior_interval(ds_fit, pars = "pi", prob = 0.99)
23 |   expect_gte(default[1, 1], larger[1, 1])
24 |   expect_lte(default[1, 2], larger[1, 2])
25 | })
26 | 
27 | test_that("posterior_interval for theta has the correct form", {
28 |   J <- 5
29 |   K <- 4
30 | 
31 |   ds_theta_interval <- posterior_interval(ds_fit, pars = "theta")
32 |   expect_equal(dim(ds_theta_interval), c(J * K * K , 2))
33 |   expect_equal(colnames(ds_theta_interval), c("5%", "95%"))
34 | 
35 |   ccds_theta_interval <- posterior_interval(ccds_fit, pars = "theta")
36 |   expect_equal(dim(ccds_theta_interval), c(J * K * K , 2))
37 |   expect_equal(colnames(ccds_theta_interval), c("5%", "95%"))
38 | 
39 |   hds_theta_interval <- posterior_interval(hds_fit, pars = "theta")
40 |   expect_equal(dim(hds_theta_interval), c(J * K * K , 2))
41 |   expect_equal(colnames(hds_theta_interval), c("5%", "95%"))
42 | })
43 | 
44 | test_that("DS and CCDS posterior_interval for theta have the same rownames", {
45 |   ds_theta_interval <- posterior_interval(ds_fit, pars = "theta")
46 |   ccds_theta_interval <- posterior_interval(ds_fit, pars = "theta")
47 |   expect_equal(rownames(ds_theta_interval), rownames(ccds_theta_interval))
48 | })
49 | 
50 | test_that("posterior_interval errors correctly", {
51 |   expect_error(
52 |     posterior_interval(ds_fit_optim, pars = "z"),
53 |     "Can't calculate posterior intervals for a model fit using optimisation."
54 |   )
55 |   expect_error(
56 |     posterior_interval(ds_fit, pars = "z"),
57 |     "Cannot calculate quantiles for z"
58 |   )
59 | })
60 | 
61 | test_that("posterior_interval orders parameters correctly", {
62 |   correct_rownames <- sprintf("theta[1, 1, %s]", 1:K)
63 |   expect_equal(rownames(posterior_interval(ds_fit, pars = "theta"))[1:K],
64 |                correct_rownames)
65 | })
66 | 


--------------------------------------------------------------------------------
/tests/testthat/test-posterior_predict.R:
--------------------------------------------------------------------------------
 1 | test_that("posterior_predict works for all other model types (smoke test)", {
 2 |   J <- 5
 3 |   new_data <- data.frame(item = rep(1:2, each = J), rater = rep(1:J, 2))
 4 | 
 5 |   expect_ok(posterior_predict(ds_fit, new_data))
 6 |   expect_ok(posterior_predict(ccds_fit, new_data))
 7 | 
 8 |   expect_ok(posterior_predict(ds_fit_optim, new_data))
 9 |   expect_ok(posterior_predict(ccds_fit_optim, new_data))
10 | 
11 |   expect_ok(posterior_predict(ds_fit_grouped, new_data))
12 |   expect_ok(posterior_predict(ds_fit_grouped_optim, new_data))
13 | 
14 |   expect_ok(posterior_predict(hds_fit, new_data))
15 |   expect_ok(posterior_predict(hds_fit_optim, new_data))
16 | })
17 | 
18 | test_that("posterior_predict respects the seed", {
19 |   new_data <- data.frame(item = rep(1:2, each = J), rater = rep(1:J, 2))
20 |   expect_equal(posterior_predict(ds_fit, new_data, seed = 430),
21 |                posterior_predict(ds_fit, new_data, seed = 430))
22 | })
23 | 
24 | test_that("posterior_predict validates new_data correctly", {
25 | 
26 |   expect_error(
27 |     posterior_predict(ds_fit, new_data = data.frame(a = 1, b = 2, c = 3)),
28 |     "`new_data` must have two columns 'item' and 'rater'"
29 |   )
30 |   expect_error(
31 |     posterior_predict(ds_fit, new_data = data.frame(raters = 1, items = 3)),
32 |     "`new_data` must have two columns 'item' and 'rater'"
33 |   )
34 | 
35 |   wrong_new_data <- data.frame(item = rep(1:2, each = 6), rater = rep(1:6, 2))
36 |   expect_error(
37 |     posterior_predict(ds_fit, wrong_new_data),
38 |     "The number of raters in the fitted and new data must match"
39 |   )
40 | })
41 | 
42 | test_that("posterior_predict works for single rater new data", {
43 |   expect_ok(posterior_predict(ds_fit, data.frame(item = 1:2, rater = 1)))
44 |   expect_ok(posterior_predict(ds_fit, data.frame(item = 1:2, rater = 5)))
45 | })
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test-posterior_samples.R:
--------------------------------------------------------------------------------
 1 | test_that("posterior_samples works (smoke test)", {
 2 |   expect_ok(posterior_samples(ds_fit))
 3 |   expect_ok(posterior_samples(ccds_fit))
 4 |   expect_ok(posterior_samples(hds_fit))
 5 |   expect_ok(posterior_samples(ds_fit_grouped))
 6 | })
 7 | 
 8 | test_that("posterior_samples output has the correct form", {
 9 |   expect_type(posterior_samples(ds_fit, pars = c("pi", "theta")), "list")
10 |   expect_length(posterior_samples(ds_fit, pars = "pi"), 1)
11 |   expect_named(
12 |     posterior_samples(ds_fit, pars = c("pi", "theta")),
13 |     c("pi", "theta")
14 |   )
15 | })
16 | 
17 | test_that("posterior_samples errors correctly", {
18 |   expect_error(
19 |     posterior_samples(ds_fit, pars = c("z")),
20 |     "Cannot return draws for marginalised discrete parameter"
21 |   )
22 | 
23 |   expect_error(posterior_samples(ds_fit, pars = c("nonsense")))
24 | })
25 | 
26 | test_that("posterior_samples returns full theta for class conditional model", {
27 |   # 100 is the number of post-warmup samples in the tests.
28 |   expect_equal(dim(posterior_samples(ccds_fit)$theta), c(100, J, K, K))
29 | })
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-simulate.R:
--------------------------------------------------------------------------------
  1 | test_that("`simulate_dawid_skene_model()` errors appropriately", {
  2 | 
  3 |   J <- 5
  4 |   K <- 4
  5 |   pi <- rep(1 / K, K)
  6 |   theta <- make_theta(0.7, J, K)
  7 |   sim_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
  8 | 
  9 |   expect_error(
 10 |     simulate_dawid_skene_model("a", theta, sim_data),
 11 |     "`pi` must be a numeric vector that sums to 1."
 12 |   )
 13 | 
 14 |   expect_error(
 15 |     simulate_dawid_skene_model(c(1, 1, 1), theta, sim_data),
 16 |     "`pi` must be a numeric vector that sums to 1."
 17 |   )
 18 | 
 19 |   expect_error(
 20 |     simulate_dawid_skene_model(pi, matrix(0, nrow = 2, ncol = 2), sim_data),
 21 |     "`theta` must be a three-dimensional array."
 22 |   )
 23 | 
 24 |   expect_error(
 25 |     simulate_dawid_skene_model(pi, array(0, dim = c(5, 4, 5)), sim_data),
 26 |     "The last two dimensions of `theta` must be the same."
 27 |   )
 28 | 
 29 |   bad_theta <- theta
 30 |   bad_theta[1, 1, 1] <- 2
 31 | 
 32 |   expect_error(
 33 |     simulate_dawid_skene_model(pi, bad_theta, sim_data),
 34 |   )
 35 | 
 36 |   expect_error(
 37 |     simulate_dawid_skene_model(rep(1 / 6, 6), theta, sim_data),
 38 |     "The number of ratings implied by pi and theta is not the same."
 39 |   )
 40 | 
 41 |   expect_error(
 42 |     simulate_dawid_skene_model(pi, theta, data.frame(a = 1, b = 2, c = 3)),
 43 |     "`sim_data` must have two columns 'item' and 'rater'"
 44 |   )
 45 | 
 46 |   expect_error(
 47 |     simulate_dawid_skene_model(pi, theta, data.frame(item = 1, raterr = 1)),
 48 |     "`sim_data` must have two columns 'item' and 'rater'"
 49 |   )
 50 | 
 51 |   expect_error(
 52 |     simulate_dawid_skene_model(pi, theta, data.frame(item = 1, rater = 6)),
 53 |     "The number of raters implied by theta and implied by the simulation data must match."
 54 |   )
 55 | })
 56 | 
 57 | test_that("`simulate_dawid_skene_model()` has sensible output", {
 58 | 
 59 |   J <- 5
 60 |   K <- 4
 61 |   pi <- rep(1 / K, K)
 62 |   theta <- make_theta(0.7, J, K)
 63 |   sim_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
 64 |   sim <- simulate_dawid_skene_model(pi, theta, sim_data)
 65 | 
 66 |   expect_lte(max(sim$ratings), length(pi))
 67 |   expect_gte(min(sim$ratings), 1)
 68 | 
 69 |   expect_lte(max(sim$z), length(pi))
 70 |   expect_gte(min(sim$z), 1)
 71 | 
 72 |   expect_equal(length(unique(sim[sim$item == 1, "z"])), 1)
 73 |   expect_equal(length(unique(sim[sim$item == 2, "z"])), 1)
 74 | 
 75 |   expect_equal(sim[, c("item", "rater")], sim_data)
 76 | })
 77 | 
 78 | test_that("`simulate_hier_dawid_skene_model()` errors appropriately", {
 79 | 
 80 |   J <- 5
 81 |   K <- 4
 82 |   pi <- rep(1 / K, K)
 83 |   mu <- matrix(0, nrow = K, ncol = K)
 84 |   diag(mu) <- 5
 85 |   sigma <- matrix(sqrt(2) / sqrt(pi), nrow = K, ncol = K)
 86 |   sim_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
 87 | 
 88 |   expect_error(
 89 |     simulate_hier_dawid_skene_model("a", mu, sigma, sim_data),
 90 |     "`pi` must be a numeric vector that sums to 1."
 91 |   )
 92 | 
 93 |   expect_error(
 94 |     simulate_hier_dawid_skene_model(c(1, 1, 1), mu, sigma, sim_data),
 95 |     "`pi` must be a numeric vector that sums to 1."
 96 |   )
 97 | 
 98 |   expect_error(
 99 |     simulate_hier_dawid_skene_model(pi, c(1, 1, 1), sigma, sim_data),
100 |     "`mu` must be a square matrix."
101 |   )
102 | 
103 |   expect_error(
104 |     simulate_hier_dawid_skene_model(pi, mu, c(1, 1, 1), sim_data),
105 |     "`sigma` must be a square matrix with all elements be greater then 0."
106 |   )
107 | 
108 |   expect_error(
109 |     simulate_hier_dawid_skene_model(rep(1 / 5, 5), mu, sigma, sim_data),
110 |     "`pi`, `mu` and `sigma` imply different numbers of categories."
111 |   )
112 | 
113 |   expect_error(
114 |     simulate_hier_dawid_skene_model(pi, mu, sigma, data.frame(a = 1, b = 2, c = 3)),
115 |     "`sim_data` must have two columns 'item' and 'rater'"
116 |   )
117 | 
118 |   expect_error(
119 |     simulate_hier_dawid_skene_model(pi, mu, sigma, data.frame(item = 1, raterr = 1)),
120 |     "`sim_data` must have two columns 'item' and 'rater'"
121 |   )
122 | })
123 | 
124 | test_that("`simulate_hier_dawid_skene_model()` has sensible output", {
125 | 
126 |   J <- 5
127 |   K <- 4
128 |   pi <- rep(1 / K, K)
129 |   mu <- matrix(0, nrow = K, ncol = K)
130 |   diag(mu) <- 5
131 |   sigma <- matrix(sqrt(2) / sqrt(pi), nrow = K, ncol = K)
132 |   sim_data <- data.frame(item = rep(1:2, each = 5), rater = rep(1:5, 2))
133 |   sim_out <- simulate_hier_dawid_skene_model(pi, mu, sigma, sim_data)
134 |   sim <- sim_out$sim
135 |   theta <- sim_out$theta
136 | 
137 |   expect_lte(max(sim$ratings), length(pi))
138 |   expect_gte(min(sim$ratings), 1)
139 | 
140 |   expect_lte(max(sim$z), length(pi))
141 |   expect_gte(min(sim$z), 1)
142 | 
143 |   expect_equal(length(unique(sim[sim$item == 1, "z"])), 1)
144 |   expect_equal(length(unique(sim[sim$item == 2, "z"])), 1)
145 | 
146 |   expect_equal(sim[, c("item", "rater")], sim_data)
147 | 
148 |   expect_equal(dim(theta), c(J, K, K))
149 |   expect_equal(sum(theta[1, 1, ]), 1)
150 | })
151 | 
152 | test_that("`make_theta` works", {
153 | 
154 |   J <- 5
155 |   K <- 4
156 | 
157 |   expect_error(
158 |     make_theta("a", J, K),
159 |     "`diag_values` must be a probability."
160 |   )
161 |   expect_error(
162 |     make_theta(2, J, K),
163 |     "`diag_values` must be a probability."
164 |   )
165 | 
166 |   expect_error(
167 |     make_theta(rep(0.7, 6), J, K),
168 |     "`diag_values` must be length 1 or length `J`."
169 |   )
170 | 
171 |   expect_equal(make_theta(0.7, J, K), make_theta(rep(0.7, J), J, K))
172 | 
173 |   theta <- make_theta(0.7, J, K)
174 |   expect_equal(theta[1, 1, 1], 0.7)
175 |   expect_equal(dim(theta), c(J, K, K))
176 |   expect_equal(theta[1, 1, 2], (1 - 0.7) / (K - 1))
177 | })
178 | 
179 | test_that("`make_complete_rating_design_sim_data()`", {
180 | 
181 |   I <- 10
182 |   J <- 5
183 |   N <- 2
184 |   sim_data <- make_complete_rating_design_sim_data(I, J, N)
185 | 
186 |   expect_equal(colnames(sim_data), c("item", "rater"))
187 |   expect_equal(ncol(sim_data), 2)
188 |   expect_s3_class(sim_data, "data.frame")
189 | 
190 |   expect_equal(nrow(sim_data), I * J * N)
191 |   expect_equal(max(sim_data$item), I)
192 |   expect_equal(max(sim_data$rater), J)
193 | })
194 | 


--------------------------------------------------------------------------------
/tests/testthat/test_fit_class.R:
--------------------------------------------------------------------------------
 1 | test_that("new_mcmc_fit and new_optim_fit work", {
 2 |   expect_s3_class(new_mcmc_fit(2, 2, 2, 2), c("mcmc_fit", "rater_fit"))
 3 |   expect_s3_class(new_optim_fit(2, 2, 2, 2), c("optim_fit", "rater_fit"))
 4 | })
 5 | 
 6 | test_that("print works for rater_fit objects", {
 7 |   expect_output(
 8 |     print(ds_fit),
 9 |     "Bayesian Dawid and Skene Model with MCMC draws"
10 |   )
11 | 
12 |   expect_output(
13 |     print(hds_fit),
14 |     "Bayesian Hierarchical Dawid and Skene Model with MCMC draws"
15 |   )
16 | })
17 | 
18 | test_that("is.mcmc_fit works", {
19 | 
20 |   test_fit <- 2
21 |   expect_false(is.rater_fit(test_fit))
22 |   expect_false(is.mcmc_fit(test_fit))
23 | 
24 |   class(test_fit) <- c("mcmc_fit", "rater_fit")
25 |   expect_true(is.mcmc_fit(test_fit))
26 |   expect_true(is.rater_fit(test_fit))
27 | })
28 | 
29 | test_that("plot.fit dispatches correctly", {
30 | 
31 |   # Why does this test not compare the plots directly?
32 |   # Because of the internals of {rater} the plots are created in different
33 |   # environments causing testthat 3e and R 4.1 to fail. In any case testing
34 |   # the geoms is probably sufficient here as we are really testing the sanity
35 |   # of the switch statement controlling plot dispatch.
36 | 
37 |   plot_theta_p <- plot_theta(ds_fit)
38 |   plot_p <- plot(ds_fit, pars = "theta")
39 |   expect_equal(get_geoms(plot_p), get_geoms(plot_theta_p))
40 | 
41 |   plot_class_probs_p <- plot_theta(ds_fit)
42 |   plot_p <- plot(ds_fit, pars = "latent_class")
43 |   expect_equal(get_geoms(plot_p), get_geoms(plot_class_probs_p))
44 | 
45 |   plot_pi_p <- plot_pi(ds_fit)
46 |   plot_p <- plot(ds_fit, pars = "pi")
47 |   expect_equal(get_geoms(plot_p), get_geoms(plot_pi_p))
48 | })
49 | 
50 | test_that("as_mcmc.list works", {
51 |   expect_error(as_mcmc.list(ds_fit_optim))
52 |   expect_error(as_mcmc.list(2))
53 |   expect_true(coda::is.mcmc.list(as_mcmc.list(ds_fit)))
54 | })
55 | 
56 | test_that("prior_summary works", {
57 |   expect_equal(prior_summary(ds_fit), ds_fit$model)
58 | })
59 | 
60 | test_that("get_stanfit works", {
61 |   expect_equal(get_stanfit(ds_fit), ds_fit$samples)
62 |   expect_equal(get_stanfit(ds_fit_optim), ds_fit_optim$estimates)
63 |   expect_error(get_stanfit(2))
64 | })
65 | 
66 | test_that("summary works", {
67 |   expect_output(summary(ds_fit))
68 |   expect_output(summary(ds_fit_optim))
69 |   expect_output(summary(ds_fit_grouped))
70 | 
71 |   expect_output(summary(ccds_fit))
72 |   expect_output(summary(hds_fit))
73 | })
74 | 


--------------------------------------------------------------------------------
/tests/testthat/test_model_class.R:
--------------------------------------------------------------------------------
 1 | test_that("print works for models with default parameters", {
 2 |   expect_snapshot(print(dawid_skene()))
 3 |   expect_snapshot(print(class_conditional_dawid_skene()))
 4 |   expect_snapshot(print(hier_dawid_skene()))
 5 | })
 6 | 
 7 | test_that("summary works for models", {
 8 |   expect_output(summary(dawid_skene()), "Bayesian Dawid and Skene Model")
 9 |   expect_output(
10 |     summary(dawid_skene(alpha = c(3, 3))),
11 |     "Bayesian Dawid and Skene Model"
12 |   )
13 |   expect_output(
14 |     summary(hier_dawid_skene()),
15 |     "Bayesian Hierarchical Dawid and Skene Model"
16 |   )
17 | })
18 | 
19 | test_that("is.* functions work for models", {
20 |   expect_false(is.rater_model(2))
21 |   expect_true(is.rater_model(dawid_skene()))
22 | 
23 |   expect_false(is.dawid_skene(2))
24 |   expect_true(is.dawid_skene(dawid_skene()))
25 | 
26 |   expect_false(is.hier_dawid_skene(2))
27 |   expect_true(is.hier_dawid_skene(hier_dawid_skene()))
28 | 
29 |   expect_false(is.class_conditional_dawid_skene(2))
30 |   expect_true(is.class_conditional_dawid_skene(class_conditional_dawid_skene()))
31 | })
32 | 
33 | test_that("get_name and get_file work", {
34 |   expect_equal(get_name(dawid_skene()), dawid_skene()$name)
35 |   expect_equal(get_file(dawid_skene()), dawid_skene()$file)
36 | })
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test_models.R:
--------------------------------------------------------------------------------
  1 | test_that("dawid_skene() constructor works", {
  2 |   model <- dawid_skene()
  3 | 
  4 |   expect_s3_class(model, "rater_model")
  5 |   expect_s3_class(model, "dawid_skene")
  6 | 
  7 |   expect_length(model, 4)
  8 | 
  9 |   pars <- get_parameters(model)
 10 | 
 11 |   expect_length(pars, 2)
 12 |   expect_named(pars, c("alpha", "beta"))
 13 |   expect_equal(pars, list(NULL, NULL), ignore_attr = "names")
 14 | })
 15 | 
 16 | test_that("dawid_skene() errors correctly", {
 17 |   expect_error(
 18 |     dawid_skene(beta = "a string"),
 19 |     "beta must be a numeric matrix or array"
 20 |   )
 21 |   expect_error(
 22 |     dawid_skene(beta = list()),
 23 |     "beta must be a numeric matrix or array"
 24 |   )
 25 | 
 26 |   expect_error(
 27 |     dawid_skene(beta = matrix(1, nrow = 2, ncol = 3)),
 28 |     "beta must a square matrix"
 29 |   )
 30 | 
 31 |   expect_error(
 32 |     dawid_skene(beta = array(1, dim = rep(1, 4))),
 33 |     "`beta` must be a 3 dimensional array"
 34 |   )
 35 | 
 36 |   expect_error(
 37 |     dawid_skene(beta = array(1, c(3, 2, 3))),
 38 |     "Subslices of `beta` must be square matrices."
 39 |   )
 40 | 
 41 |   expect_error(
 42 |     dawid_skene(alpha = c(3, 3), beta = matrix(1, nrow = 3, ncol = 3)),
 43 |     "`alpha` and `beta` are not compatible.",
 44 |   )
 45 | })
 46 | 
 47 | test_that("Computation of K is correct in dawid_skene()", {
 48 |   expect_equal(get_K(dawid_skene(alpha = 1)), 1)
 49 |   expect_equal(get_K(dawid_skene(alpha = rep(1, 100))), 100)
 50 | 
 51 |   expect_equal(get_K(dawid_skene(beta = matrix(1:4, nrow = 2))), 2)
 52 |   expect_equal(get_K(dawid_skene(beta = matrix(1:16, nrow = 4))), 4)
 53 | 
 54 |   expect_equal(get_K(dawid_skene(beta = array(1, c(3, 2, 2)))), 2)
 55 | })
 56 | 
 57 | test_that("hier_dawid_skene() constructor works", {
 58 |   model <- hier_dawid_skene()
 59 | 
 60 |   expect_s3_class(model, "rater_model")
 61 |   expect_s3_class(model, "hier_dawid_skene")
 62 | 
 63 |   expect_length(model, 4)
 64 | 
 65 |   pars <- get_parameters(model)
 66 | 
 67 |   expect_named(pars, "alpha")
 68 |   expect_length(pars, 1)
 69 |   expect_equal(pars, list(NULL), ignore_attr = "names")
 70 | })
 71 | 
 72 | test_that("class_conditional_dawid_skene() constructor works", {
 73 |   model <- class_conditional_dawid_skene()
 74 | 
 75 |   expect_s3_class(model, "rater_model")
 76 |   expect_s3_class(model, "class_conditional_dawid_skene")
 77 | 
 78 |   expect_length(model, 4)
 79 | 
 80 |   pars <- get_parameters(model)
 81 | 
 82 |   expect_length(pars, 3)
 83 |   expect_named(pars, c("alpha", "beta_1", "beta_2"))
 84 |   expect_equal(pars, list(NULL, NULL, NULL), ignore_attr = "names")
 85 | })
 86 | 
 87 | test_that("class_conditional_dawid_skene() errors correctly", {
 88 |   expect_error(
 89 |     class_conditional_dawid_skene(alpha = rep(1, 2), beta_1 = rep(1, 3)),
 90 |     "Prior parameters are not compatible."
 91 |   )
 92 | })
 93 | 
 94 | test_that("validate alpha errors appropriately", {
 95 |   expect_error(validate_alpha("a string"))
 96 |   expect_error(validate_alpha(list()))
 97 | 
 98 |   expect_error(validate_alpha(c(3, 3)), NA)
 99 |   expect_error(validate_alpha(NULL), NA)
100 | })
101 | 


--------------------------------------------------------------------------------
/tests/testthat/test_plotting.R:
--------------------------------------------------------------------------------
 1 | test_that("Plotting works for long data models fit with MCMC (smoke test)", {
 2 |   expect_ok(plot(ds_fit, "pi"))
 3 |   expect_ok(plot(ds_fit, "theta"))
 4 |   expect_ok(plot(ds_fit, "theta", theta_plot_type = "points"))
 5 |   expect_ok(plot(ds_fit, "class_probabilities"))
 6 | 
 7 |   expect_ok(plot(ccds_fit, "pi"))
 8 |   expect_ok(plot(ccds_fit, "theta"))
 9 |   expect_ok(plot(ccds_fit, "theta", theta_plot_type = "points"))
10 |   expect_ok(plot(ccds_fit, "class_probabilities"))
11 | 
12 |   expect_ok(plot(hds_fit, "pi"))
13 |   expect_ok(plot(hds_fit, "theta"))
14 |   expect_ok(plot(hds_fit, "theta", theta_plot_type = "points"))
15 |   expect_ok(plot(hds_fit, "class_probabilities"))
16 | })
17 | 
18 | test_that("Plotting works for long data models fit with optimisation (smoke test)", {
19 |   expect_ok(plot(ds_fit_optim, "pi"))
20 |   expect_ok(plot(ds_fit_optim, "theta"))
21 |   expect_error(plot(ds_fit_optim, "theta", theta_plot_type = "points"))
22 |   expect_ok(plot(ds_fit_optim, "class_probabilities"))
23 | 
24 |   expect_ok(plot(ccds_fit_optim, "pi"))
25 |   expect_ok(plot(ccds_fit_optim, "theta"))
26 |   expect_error(plot(ccs_fit_optim, "theta", theta_plot_type = "points"))
27 |   expect_ok(plot(ccds_fit_optim, "class_probabilities"))
28 | 
29 |   expect_ok(plot(hds_fit_optim, "pi"))
30 |   expect_ok(plot(hds_fit_optim, "theta"))
31 |   expect_error(plot(hds_fit_optim, "theta", theta_plot_type = "points"))
32 |   expect_ok(plot(hds_fit_optim, "class_probabilities"))
33 | })
34 | 
35 | test_that("Plotting works for grouped data Dawid-Skene (MCMC + optimisation) (smoke test)", {
36 |   expect_ok(plot(ds_fit_grouped, "pi"))
37 |   expect_ok(plot(ds_fit_grouped, "theta"))
38 |   expect_ok(plot(ds_fit_grouped, "theta", theta_plot_type = "points"))
39 |   expect_ok(plot(ds_fit_grouped, "class_probabilities"))
40 | 
41 |   expect_ok(plot(ds_fit_grouped_optim, "pi"))
42 |   expect_ok(plot(ds_fit_grouped_optim, "theta"))
43 |   expect_error(plot(ds_fit_grouped_option, "theta", theta_plot_type = "points"))
44 |   expect_ok(plot(ds_fit_grouped_optim, "class_probabilities"))
45 | })
46 | 
47 | test_that("plot_prevalence output has correct type", {
48 |   ds_plot <- plot_pi(ds_fit)
49 |   expect_equal(get_geoms(ds_plot), c("GeomPoint", "GeomErrorbar"))
50 | })
51 | 
52 | test_that("plot_raters output has correct type", {
53 |   ds_plot <- plot_theta(ds_fit)
54 |   expect_equal(get_facet_dim(ds_plot), 5)
55 |   expect_equal(get_geoms(ds_plot), c("GeomTile", "GeomText"))
56 | })
57 | 
58 | test_that("plot_latent_class output has correct type", {
59 |   ds_plot <- plot_class_probabilities(ds_fit)
60 |   expect_equal(get_geoms(ds_plot), c("GeomTile", "GeomText"))
61 | })
62 | 
63 | test_that("plot_theta_points output has correct type", {
64 |   ds_plot <- plot_theta_points(ds_fit)
65 |   expect_equal(get_geoms(ds_plot), c("GeomPoint", "GeomErrorbar"))
66 | })
67 | 


--------------------------------------------------------------------------------
/tests/testthat/test_rater.R:
--------------------------------------------------------------------------------
  1 | test_that("verbose flag works", {
  2 |   expect_silent(
  3 |     suppressWarnings(
  4 |       rater(anesthesia, "dawid_skene",
  5 |             chains = 1, iter = 200, verbose = FALSE)
  6 |       )
  7 |   )
  8 | })
  9 | 
 10 | test_that("Passing model as string works", {
 11 | 
 12 |   # Unexplained warnings in the past - potentially flaky...
 13 |   skip_on_cran()
 14 | 
 15 |   # This was failing previously because the check of whether the model and
 16 |   # format are compatible requires an *actual* model, so we have to validate
 17 |   # and convert string -> model object before validating.
 18 |   expect_ok(
 19 |     rater(caries, "dawid_skene", method = "optim", data_format = "grouped")
 20 |   )
 21 | 
 22 |   fit_function <- rater(anesthesia, dawid_skene(), method = "optim")
 23 |   fit_string <- rater(anesthesia, "dawid_skene", method = "optim")
 24 |   expect_equal(fit_function, fit_string)
 25 | })
 26 | 
 27 | test_that("rater infernce is 'correct'", {
 28 |   # TODO This is a stopgap solution designed to detect large changes in
 29 |   # behaviour. In future, it would be great to have a full framework to assess
 30 |   # the the performance of the inference.
 31 |   pi_est <- point_estimate(ds_fit_optim, pars = "pi")[[1]]
 32 |   # Correct value is 0.41.
 33 |   expect_lt(pi_est[[2]], 0.45)
 34 |   expect_gt(pi_est[[2]], 0.35)
 35 | })
 36 | 
 37 | test_that("rater returns objects of the correct type", {
 38 |   expect_true(is.rater_fit(ds_fit))
 39 |   expect_true(is.mcmc_fit(ds_fit))
 40 |   expect_true(is.optim_fit(ds_fit_optim))
 41 | })
 42 | 
 43 | test_that("rater errors correctly", {
 44 |   expect_error(
 45 |     rater(anesthesia, "not_a_proper_model"),
 46 |     "Invalid model string specification."
 47 |   )
 48 |   expect_error(
 49 |     rater(caries, hier_dawid_skene(), data_format = "grouped"),
 50 |     "Grouped data can only be used with the Dawid and Skene model."
 51 |   )
 52 |   expect_error(
 53 |     rater(1:10, dawid_skene()),
 54 |     "`data` must be a data.frame or matrix."
 55 |   )
 56 |   expect_error(
 57 |     rater(data.frame(1, 2), dawid_skene()),
 58 |     "Long format `data` must have exactly three columns."
 59 |   )
 60 |   expect_error(
 61 |     rater(data.frame(item = 1, rater = 1, ratingg = 1), dawid_skene()),
 62 |     "Long format `data` must have three columns with names: item, rater, rating."
 63 |   )
 64 |   expect_error(
 65 |     rater(data.frame(anything = 1, not_n = 1), dawid_skene(), data_format = "grouped"),
 66 |     "The last column must be named `n`."
 67 |   )
 68 | 
 69 |   expect_snapshot(
 70 |     rater(data.frame(item = 0, rater = 0, rating = 0), dawid_skene()),
 71 |     error = TRUE
 72 |   )
 73 | 
 74 |   expect_snapshot(
 75 |     rater(data.frame(thing = 0, n = 0), dawid_skene(), data_format = "grouped"),
 76 |     error = TRUE
 77 |   )
 78 | })
 79 | 
 80 | test_that("rater provides useful messages for probably not long data", {
 81 | 
 82 |   expect_error(
 83 |     suppressMessages(
 84 |       expect_message(
 85 |         rater(data.frame(1, 2, 3, 3), "dawid_skene"),
 86 |         "Is your data in wide format? Consider using `data_format = wide`."
 87 |       )
 88 |     )
 89 |   )
 90 | 
 91 |   expect_error(
 92 |     suppressMessages(
 93 |       expect_message(
 94 |         rater(data.frame(1, 2, 3, 31), "dawid_skene"),
 95 |         "Is your data in grouped format? Consider using `data_format = grouped`."
 96 |       )
 97 |     )
 98 |   )
 99 | })
100 | 
101 | test_that("parse_priors is correct for the Dawid-Skene model", {
102 | 
103 |   anesthesia_list <- as_stan_data(anesthesia, "long", default_colnames)
104 | 
105 |   K <- anesthesia_list$K
106 |   J <- anesthesia_list$J
107 |   ds_priors <- parse_priors(dawid_skene(), K, J)
108 | 
109 |   # Construct the default priors.
110 |   default_alpha <- rep(3, K)
111 | 
112 |   N <- 8
113 |   p <- 0.6
114 |   on_diag <- N * p
115 |   off_diag <- N * (1 - p) / (K - 1)
116 |   beta_slice <- matrix(off_diag, nrow = K, ncol = K)
117 |   diag(beta_slice) <- on_diag
118 |   default_beta <- array(dim = c(J, K, K))
119 |   for (j in 1:J) {
120 |     default_beta[j, , ] <- beta_slice
121 |   }
122 | 
123 |   expect_equal(ds_priors$alpha, default_alpha)
124 |   expect_equal(ds_priors$beta, default_beta)
125 | 
126 |   test_alpha <- rep(9, K)
127 |   test_beta_mat <- matrix(17, nrow = K, ncol = K)
128 |   test_beta_array <- array(dim = c(J, K, K))
129 |   for (j in 1:J) {
130 |     test_beta_array[j, , ] <- test_beta_mat
131 |   }
132 | 
133 |   ds_priors_mat <- parse_priors(
134 |     dawid_skene(alpha = test_alpha, beta = test_beta_mat),
135 |     K,
136 |     J
137 |   )
138 | 
139 |   expect_equal(ds_priors_mat$alpha, test_alpha)
140 |   expect_equal(ds_priors_mat$beta, test_beta_array)
141 | 
142 |   ds_priors_array <- parse_priors(
143 |     dawid_skene(alpha = test_alpha, beta = test_beta_array),
144 |     K,
145 |     J
146 |   )
147 | 
148 |   expect_equal(ds_priors_array$beta, test_beta_array)
149 | })
150 | 
151 | test_that("parse_priors is correct for the Hierarchical Dawid-Skene model", {
152 |   default_alpha <- rep(3, K)
153 |   test_alpha <- rep(9, K)
154 | 
155 |   hds_priors <- parse_priors(hier_dawid_skene(), K, J)
156 |   expect_equal(hds_priors$alpha, default_alpha)
157 | 
158 |   hds_priors <- parse_priors(hier_dawid_skene(alpha = test_alpha), K, J)
159 |   expect_equal(hds_priors$alpha, test_alpha)
160 | })
161 | 
162 | test_that("parse_priors is correct for the Class conditional Dawid-Skene model", {
163 | 
164 |   test_beta_1 <- rep(1, K)
165 |   test_beta_2 <- rep(98, K)
166 |   test_alpha <- rep(9, K)
167 | 
168 |   ccds_priors <- parse_priors(
169 |     class_conditional_dawid_skene(
170 |       alpha = test_alpha,
171 |       beta_1 = test_beta_1,
172 |       beta_2 = test_beta_2
173 |     ),
174 |     K,
175 |     J
176 |   )
177 | 
178 |   expect_equal(ccds_priors$alpha, test_alpha)
179 |   expect_equal(ccds_priors$beta_1, test_beta_1)
180 |   expect_equal(ccds_priors$beta_2, test_beta_2)
181 | })
182 | 
183 | test_that("as_stan_data handles wide data correctly", {
184 | 
185 |   wide_data <- data.frame(c(3, 2, 2), c(4, 2, 2))
186 |   long_data <- data.frame(item = c(1, 1, 2, 2, 3, 3),
187 |                           rater = c(1, 2, 1, 2, 1, 2),
188 |                           rating = c(3, 4, 2, 2, 2, 2))
189 | 
190 |   expect_equal(as_stan_data(wide_data, "wide", default_colnames),
191 |                as_stan_data(long_data, "long", default_colnames))
192 | })
193 | 
194 | test_that("create_inits() works for the Dawid-Skene model", {
195 |   anesthesia_stan_data <- as_stan_data(anesthesia, "long", default_colnames)
196 |   K <- anesthesia_stan_data$K
197 |   J <- anesthesia_stan_data$J
198 | 
199 |   pi_init <- rep(1 / K, K)
200 |   theta_init <- array(0.2 / (K - 1), c(J, K, K))
201 |   for (j in 1:J) {
202 |       diag(theta_init[j, ,]) <- 0.8
203 |   }
204 | 
205 |   expect_equal(
206 |     create_inits(dawid_skene(), anesthesia_stan_data),
207 |     function(n) list(theta = theta_init, pi = pi_init),
208 |     ignore_function_env = TRUE
209 |   )
210 | })
211 | 
212 | test_that("create_inits() works for the class conditional Dawid-Skene model", {
213 |   anesthesia_stan_data <- as_stan_data(anesthesia, "long", default_colnames)
214 |   K <- anesthesia_stan_data$K
215 |   J <- anesthesia_stan_data$J
216 | 
217 |   pi_init <- rep(1 / K, K)
218 |   theta_init <- matrix(0.8, nrow = J, ncol = K)
219 | 
220 |   expect_equal(
221 |     create_inits(class_conditional_dawid_skene(), anesthesia_stan_data),
222 |     function(n) list(theta = theta_init, pi = pi_init),
223 |     ignore_function_env = TRUE
224 |   )
225 | })
226 | 
227 | test_that("create_inits() works for the hierarchical Dawid-Skene model", {
228 |   anesthesia_stan_data <- as_stan_data(anesthesia, "long", default_colnames)
229 | 
230 |   hds_init_func <- create_inits(hier_dawid_skene(), anesthesia_stan_data)
231 |   expect_named(hds_init_func(), c("pi", "mu", "sigma", "beta_raw"))
232 | })
233 | 
234 | test_that("Invalid `long_data_colnames` generates appropriate errors", {
235 | 
236 |   expect_error(
237 |     rater(anesthesia, "dawid_skene", long_data_colnames = lapply(1:4, identity)),
238 |     "`long_data_colnames` must be length three."
239 |   )
240 | 
241 |   expect_error(
242 |     rater(anesthesia, "dawid_skene", long_data_colnames = 1:3),
243 |     "`long_data_colnames` must be a character vector."
244 |   )
245 | 
246 |   expect_error(
247 |     rater(anesthesia, "dawid_skene", long_data_colnames = letters[1:3]),
248 |     "`long_data_colnames` must have names: `item`, `rater` and `rating`."
249 |   )
250 | 
251 |   expect_error(
252 |     rater(anesthesia, "dawid_skene",
253 |           long_data_colnames = c(item = "a", rater = "b", ratingg = "c")),
254 |     "`long_data_colnames` must have names: `item`, `rater` and `rating`."
255 |   )
256 | 
257 |   expect_warning(
258 |     rater(caries, "dawid_skene", data_format = "grouped", method = "optim",
259 |           long_data_colnames = c(item = "a", rater = "b", rating = "c")),
260 |     "Non-default `long_data_colnames` will be ignored as `data_format` is not `'long'`"
261 |   )
262 | })
263 | 
264 | test_that("Non-default `long_data_colnames` works", {
265 | 
266 |   skip_on_cran()
267 | 
268 |   new_anesthesia_1 <- anesthesia
269 |   colnames(new_anesthesia_1) <- c("a", "b", "c")
270 | 
271 |   expect_identical(
272 |     rater(new_anesthesia_1, "dawid_skene", method = "optim",
273 |           long_data_colnames = c(item = "a", rater = "b", rating = "c")
274 |           ),
275 |     rater(anesthesia, "dawid_skene", method = "optim")
276 |   )
277 | 
278 |   new_anesthesia_2 <- anesthesia
279 |   colnames(new_anesthesia_2) <- c("a", "b", "c")
280 |   new_anesthesia_2 <- new_anesthesia_2[, c(2, 1, 3)]
281 | 
282 |   expect_identical(
283 |     rater(new_anesthesia_2, "dawid_skene", method = "optim",
284 |           long_data_colnames = c(item = "a", rater = "b", rating = "c")
285 |           ),
286 |     rater(anesthesia, "dawid_skene", method = "optim")
287 |   )
288 | })
289 | 
290 | 


--------------------------------------------------------------------------------
/tests/testthat/test_utils.R:
--------------------------------------------------------------------------------
1 | test_that("logsumexp works", {
2 |   expect_equal(logsumexp(c(log(1))), 0)
3 |   expect_equal(logsumexp(1), 1)
4 | })
5 | 
6 | test_that("softmax works", {
7 |   expect_equal(softmax(c(1, 1)), c(1 / 2, 1 / 2))
8 | })
9 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/data-formats.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Data formats"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Data formats}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 |   collapse = TRUE,
13 |   comment = "#>"
14 | )
15 | ```
16 | 
17 | ```{r setup}
18 | library(rater)
19 | ```
20 | 
21 | ## Data formats for rater
22 | 
23 | {rater} allows the user to fit statistical models to repeated categorical rating data. There are, however, different formats this rating data can be arranged in. {rater} supports three of the most common, which we call `"long"`, `"wide"` and `"grouped"`. This vignette explains: 
24 | 
25 | * What characterises each format,
26 | * What features of each format {rater} relies on, and, 
27 | * In what circumstances you may want to use each format. 
28 | 
29 | ## Long format
30 | 
31 | The default format for data passed to the `rater()` function is `"long"`. This is the default format because it is capable of representing rating data with incomplete designs (not every rater rates each item) and with repeated ratings (raters ratings items more than once). Long data is defined by having three columns: 
32 | 
33 | 1. The index of the rater
34 | 2. The index of the item 
35 | 3. The rating given by that rater for that item
36 | 
37 | in {rater}, for this data to be recognised, these columns must have names `"rater"`, `"item"` and `"rating"` respectively. An example of long data in the appropriate format for {rater} is given illustrated below: 
38 | 
39 | ```{r show-long-data, echo = FALSE}
40 | long_data <- data.frame(
41 |   item = c(1, 1, 2, 2, 3, 3), 
42 |   rater = c(1, 2, 1, 2, 1, 2), 
43 |   rating = c(3, 4, 2, 2, 2, 2)
44 | )
45 | knitr::kable(long_data)
46 | ```
47 | 
48 | We can read the first row, for example, of the dataset as saying that the item 1 was rated a 3 by rater 1. Repeated ratings are represented by rows with the same rater and item (and possibly rating) combination and missing data is represented simply by the absence of certain rater and item combinations. Long data is useful because it can represent *all* possible categorical rating data. The `anesthesia` data included with {rater} (which includes repeated ratings) is represented in this format.
49 | 
50 | To illustrate the differences between formats, the data used in the long data example will be used also presented in the wide and grouped formats below.
51 | 
52 | ## Wide data
53 | 
54 | The next data format which can be used in `rater()` is the wide format. In wide format data each column corresponds to the ratings of a particular rater, each row is an item and the entries of the corresponding table are the ratings themselves. For example the following table presents the previous long data example in wide format:
55 | 
56 | ```{r show-wide-data, echo = FALSE} 
57 | wide_data <- data.frame(
58 |   rater_1 = c(3, 2, 2), 
59 |   rater_2 = c(4, 2, 2)
60 | )
61 | knitr::kable(wide_data)
62 | ```
63 | 
64 | This format is natural if there are no repeated ratings, which cannot be represented in this format. Missing data can be represented by explicit `NA` entries in the data. In `rater()` this format can be used by setting `data_format = "wide"`. Internally this simply converts the data to long format; there is no computational advantage to using wide data. 
65 | 
66 | Note that when wide data is passed to `rater()` any column names (i.e. `rater_1` and `rater_2` above) will be ignored and the raters will be numbered as they appear in the data left to right. In future 'labelling' of the rater may be supported in which case the columns names will be interpreted as the labels.
67 | 
68 | ## Grouped data 
69 | 
70 | The final format of data supported by {rater} is the grouped format. This format can be thought of as an extension of the wide format where rating 'patterns' which occurs multiple times are collapsed together, while a new column is added to represent how many times each pattern occurred in the original data. For example in the running data example the pattern of both raters giving the rating 2 occurs twice, while the pattern of rater 1 giving the rating a 3 and rater 2 giving the rating 4 occurs once. This is illustrated in the grouped data representation of the example data below:
71 | 
72 | ```{r show-grouped-data, echo = FALSE} 
73 | grouped_data <- data.frame(
74 |   data.frame(
75 |     rater_1 = c(3, 2), 
76 |     rater_2 = c(4, 2), 
77 |     n = c(1, 2))
78 | )
79 | 
80 | knitr::kable(grouped_data)
81 | ```
82 | 
83 | Here the column `n` represents the number of times each pattern occurs. `rater()` requires that a column named `n` is the right most column in grouped data and will interpret the remaining columns as for wide data. Currently grouped data passed to `rater()` cannot contain any missing values. The `caries` data included in the package is in the {rater} grouped data format.
84 | 
85 | The grouped format can only represent the same data as wide format, but it is still useful. This is because using grouped data allows a different from of the likelihood of the statistical models implemented in rater to be used, which can greatly speed up model fitting. If the number of patterns in the data is much less than the number of item by rater combinations (i.e. the number of rows in the long format) then using grouped data can lead to large speed-ups. Currently this re-writing of the likelihood is only available for the Dawid-Skene model, not any of the extensions implemented in the package. 
86 | 
87 | --- 
88 | 


--------------------------------------------------------------------------------
/vignettes/workflow.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Workflow"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Workflow}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r setup, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   comment = "#>"
 13 | )
 14 | ```
 15 | 
 16 | ## What is rater?
 17 | 
 18 | The {rater} package is designed to allow easy fitting and analysis of Bayesian models of categorical data annotation using [Stan](https://mc-stan.org/). Here we demonstrate the basic workflow for using the package.
 19 | 
 20 | ## Data
 21 | 
 22 | We will use the *anesthesia* data set taken from the paper *Maximum Likelihood Estimation of Observer Error-Rates Using the EM Algorithm* by A. P. Dawid and A. M. Skene, the paper which introduced the original Dawid-Skene model the type of models used This dataset is included in **rater**. We can prepare the package and data with: 
 23 | 
 24 | ```{r package-load}
 25 | # Load the rater package.
 26 | library(rater)
 27 | 
 28 | # Access the 'anesthesia' data set.
 29 | data("anesthesia")
 30 | ```
 31 | 
 32 | The data comes in the form of a `data.frame` with three columns `item`, `rater` and `rating`. In the nomenclature of the package we would describe this as *long* data. *Long* data is the standard data format for passing data to inference functions in {rater}. The `item` column is the index of each item, the `rater` column is the index of the rater and `rating` is the actual rating. For example the twentieth row of the dataset: 
 33 | 
 34 | ```{r one-row}
 35 | anesthesia[20, ]
 36 | ```
 37 | 
 38 | means that item 3 was rated as being in category 2 by the fourth rater. {rater} also allows the use of *grouped* data for fitting some of the models but that feature is not covered in this vignette. 
 39 | 
 40 | ## Inference
 41 | 
 42 | The core function of the {rater} package is the `rater()` function which fits a specified categorical rating to model to given data. This function has two arguments: `data`, data in an appropriate format and `model`, a character string or functions specifying the model you would like to fit. By default `rater()` will fit the model using MCMC (specifically NUTS) provided by Stan. To fit the basic Dawid-Skene model[^1] to the anesthesia data we can run. 
 43 | 
 44 | ```{r, inference, warnings = FALSE, message = FALSE}
 45 | fit <- rater(anesthesia, "dawid_skene", chains = 1, verbose = FALSE)
 46 | ```
 47 | 
 48 | Note that here we have set `verbose = FALSE` to suppress the normal Stan sampling output. We have also specified that we should use only 1 chain, simply to speed up the creation of the vignette. Other fitting parameters can be passed directly to the underlying Stan functions through the `...` in `rater()`. 
 49 | 
 50 | We can also compute MAP estimates by specifying `method = "optim"`in `rater()`:
 51 | 
 52 | ```{r optim-fit}
 53 | optim_fit <- rater(anesthesia, "dawid_skene", method = "optim")
 54 | ```
 55 | 
 56 | ## Plotting
 57 | 
 58 | Having fit the Dawid and Skene model to the data we can now plot parameter estimates from the model. 
 59 | 
 60 | To plot the population prevalence estimates (the parameter $\pi$ in the model) we run:
 61 | 
 62 | ```{r plot-pi, fig.width = 6, fig.height = 4}
 63 | plot(fit, pars = "pi")
 64 | ```
 65 | 
 66 | To plot the rater's error's matrices of the (the parameter $\theta$ in the model) we run:
 67 | 
 68 | ```{r plot-theta, fig.width = 6, fig.height = 4}
 69 | plot(fit, pars = "theta")
 70 | ```
 71 | 
 72 | To plot the latent class estimates we run:
 73 | 
 74 | ```{r plot-z, fig.width = 6, fig.height = 8}
 75 | plot(fit, pars = "latent_class")
 76 | ```
 77 | 
 78 | ## Point estimates
 79 | 
 80 | In additions we can extract point estimates for all the parameters. These can be extracted using the `point_estimates()` function. Different parameters can be extracted using the `pars` argument i.e.
 81 | 
 82 | ```{r point-estimates}
 83 | # Extract all parameters.
 84 | all_parameters <- point_estimate(fit)
 85 | 
 86 | # Extract only the 'pi' parameter. 
 87 | point_estimate(fit, pars = "pi")
 88 | ```
 89 | 
 90 | Note that the interpretation of the point estimates returned will differ depending on whether the model has been fit using MCMC or optimisation. 
 91 | 
 92 | ## Other functions
 93 | 
 94 | {rater} also supports a variety of other functions to extract useful quantities from fit objects which are listed below:
 95 | 
 96 | * `posterior_samples()`
 97 | * `posterior_interval()`
 98 | * `class_probabilities()`
 99 | * `mcmc_diagnostics()`
100 | 
101 | Hopefully the uses of these functions are fairly self explanatory. 
102 | 
103 | [^1]: {rater} also supports the 'class conditional' and 'hierarchical' Dawid-Skene models as well as setting (some of) the prior parameters in all three models. 
104 | 


--------------------------------------------------------------------------------