├── .Rbuildignore ├── .gitattributes ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ ├── codecov.yaml │ └── rhub.yaml ├── .gitignore ├── CRAN-SUBMISSION ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── discretization.R ├── estimation.R ├── part_bfi.R ├── simulation.R └── utilities.R ├── README.Rmd ├── README.html ├── README.md ├── _pkgdown.yml ├── codemeta.json ├── cran-comments.md ├── data └── part_bfi.rda ├── dev ├── config_attachment.yaml ├── dev.R ├── doc.R └── prepare_data.R ├── docs ├── .nojekyll ├── 404.html ├── LICENSE-text.html ├── LICENSE.html ├── apple-touch-icon-120x120.png ├── apple-touch-icon-152x152.png ├── apple-touch-icon-180x180.png ├── apple-touch-icon-60x60.png ├── apple-touch-icon-76x76.png ├── apple-touch-icon.png ├── articles │ ├── index.html │ └── using_latent2likert.html ├── authors.html ├── deps │ ├── bootstrap-5.3.1 │ │ ├── bootstrap.bundle.min.js │ │ ├── bootstrap.bundle.min.js.map │ │ └── bootstrap.min.css │ ├── data-deps.txt │ └── jquery-3.6.0 │ │ ├── jquery-3.6.0.js │ │ ├── jquery-3.6.0.min.js │ │ └── jquery-3.6.0.min.map ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon.ico ├── index.html ├── link.svg ├── logo.png ├── news │ └── index.html ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── Rplot002.png │ ├── Rplot003.png │ ├── Rplot004.png │ ├── discretize_density.html │ ├── estimate_mean_and_sd.html │ ├── estimate_params.html │ ├── figures │ │ ├── courses-grouped-min.png │ │ ├── courses-stacked-min.png │ │ ├── logo.png │ │ ├── overview-min.png │ │ ├── scales-min.png │ │ └── transformation-min.png │ ├── index.html │ ├── part_bfi.html │ ├── plot_likert_transform-1.png │ ├── plot_likert_transform-2.png │ ├── plot_likert_transform-3.png │ ├── plot_likert_transform-4.png │ ├── plot_likert_transform.html │ ├── response_prop.html │ ├── rlikert.html │ └── simulate_likert.html ├── search.json └── sitemap.xml ├── man ├── discretize_density.Rd ├── estimate_mean_and_sd.Rd ├── estimate_params.Rd ├── figures │ ├── courses-grouped-min.png │ ├── courses-stacked-min.png │ ├── logo.png │ ├── overview-min.png │ ├── scales-min.png │ └── transformation-min.png ├── part_bfi.Rd ├── plot_likert_transform.Rd ├── rLikert.Rd ├── response_prop.Rd └── simulate_likert.Rd ├── pkgdown └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── tests ├── testthat.R └── testthat │ ├── test_discretization.R │ ├── test_estimation.R │ ├── test_simulation.R │ └── test_utilities.R └── vignettes ├── .gitignore ├── references.bib └── using_latent2likert.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^.gitattributes.save 4 | ^.gitattributes 5 | ^.gitignore 6 | ^_pkgdown\.yml$ 7 | ^docs$ 8 | ^pkgdown$ 9 | ^responsesR\.Rproj$ 10 | ^README.Rmd 11 | ^README.html 12 | ^LICENSE.md 13 | ^NEWS.html 14 | ^\.github$ 15 | ^vignettes/articles$ 16 | ^man/figures/*\.png$ 17 | ^CODE_OF_CONDUCT.md 18 | ^fix_site.py 19 | ^tests/informal$ 20 | ^dev$ 21 | ^doc$ 22 | ^Meta$ 23 | ^revdep$ 24 | ^cran-comments\.md$ 25 | ^codemeta\.json$ 26 | ^CRAN-SUBMISSION$ 27 | ^articles$ 28 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.html linguist-vendored 2 | *.htm linguist-vendored 3 | *.py linguist-vendored 4 | *.tex linguist-vendored 5 | docs/** linguist-vendored 6 | man/** linguist-vendored -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v4 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | upload-snapshots: true 50 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 51 | -------------------------------------------------------------------------------- /.github/workflows/codecov.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: codecov 10 | 11 | jobs: 12 | codecov: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::covr 27 | needs: coverage 28 | 29 | - name: Test coverage 30 | run: | 31 | covr::codecov( 32 | quiet = FALSE, 33 | clean = FALSE, 34 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 35 | ) 36 | shell: Rscript {0} 37 | 38 | - name: Show testthat output 39 | if: always() 40 | run: | 41 | ## -------------------------------------------------------------------- 42 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 43 | shell: bash 44 | 45 | - name: Upload test results 46 | if: failure() 47 | uses: actions/upload-artifact@v4 48 | with: 49 | name: coverage-test-failures 50 | path: ${{ runner.temp }}/package 51 | -------------------------------------------------------------------------------- /.github/workflows/rhub.yaml: -------------------------------------------------------------------------------- 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at 2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml 3 | # You can update this file to a newer version using the rhub2 package: 4 | # 5 | # rhub::rhub_setup() 6 | # 7 | # It is unlikely that you need to modify this file manually. 8 | 9 | name: R-hub 10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | config: 16 | description: 'A comma separated list of R-hub platforms to use.' 17 | type: string 18 | default: 'linux,windows,macos' 19 | name: 20 | description: 'Run name. You can leave this empty now.' 21 | type: string 22 | id: 23 | description: 'Unique ID. You can leave this empty now.' 24 | type: string 25 | 26 | jobs: 27 | 28 | setup: 29 | runs-on: ubuntu-latest 30 | outputs: 31 | containers: ${{ steps.rhub-setup.outputs.containers }} 32 | platforms: ${{ steps.rhub-setup.outputs.platforms }} 33 | 34 | steps: 35 | # NO NEED TO CHECKOUT HERE 36 | - uses: r-hub/actions/setup@v1 37 | with: 38 | config: ${{ github.event.inputs.config }} 39 | id: rhub-setup 40 | 41 | linux-containers: 42 | needs: setup 43 | if: ${{ needs.setup.outputs.containers != '[]' }} 44 | runs-on: ubuntu-latest 45 | name: ${{ matrix.config.label }} 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | config: ${{ fromJson(needs.setup.outputs.containers) }} 50 | container: 51 | image: ${{ matrix.config.container }} 52 | 53 | steps: 54 | - uses: r-hub/actions/checkout@v1 55 | - uses: r-hub/actions/platform-info@v1 56 | with: 57 | token: ${{ secrets.RHUB_TOKEN }} 58 | job-config: ${{ matrix.config.job-config }} 59 | - uses: r-hub/actions/setup-deps@v1 60 | with: 61 | token: ${{ secrets.RHUB_TOKEN }} 62 | job-config: ${{ matrix.config.job-config }} 63 | - uses: r-hub/actions/run-check@v1 64 | with: 65 | token: ${{ secrets.RHUB_TOKEN }} 66 | job-config: ${{ matrix.config.job-config }} 67 | 68 | other-platforms: 69 | needs: setup 70 | if: ${{ needs.setup.outputs.platforms != '[]' }} 71 | runs-on: ${{ matrix.config.os }} 72 | name: ${{ matrix.config.label }} 73 | strategy: 74 | fail-fast: false 75 | matrix: 76 | config: ${{ fromJson(needs.setup.outputs.platforms) }} 77 | 78 | steps: 79 | - uses: r-hub/actions/checkout@v1 80 | - uses: r-hub/actions/setup-r@v1 81 | with: 82 | job-config: ${{ matrix.config.job-config }} 83 | token: ${{ secrets.RHUB_TOKEN }} 84 | - uses: r-hub/actions/platform-info@v1 85 | with: 86 | token: ${{ secrets.RHUB_TOKEN }} 87 | job-config: ${{ matrix.config.job-config }} 88 | - uses: r-hub/actions/setup-deps@v1 89 | with: 90 | job-config: ${{ matrix.config.job-config }} 91 | token: ${{ secrets.RHUB_TOKEN }} 92 | - uses: r-hub/actions/run-check@v1 93 | with: 94 | job-config: ${{ matrix.config.job-config }} 95 | token: ${{ secrets.RHUB_TOKEN }} 96 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | 3 | # History files 4 | .Rhistory 5 | .Rapp.history 6 | 7 | # Session Data files 8 | .RData 9 | .RDataTmp 10 | 11 | # User-specific files 12 | .Ruserdata 13 | 14 | # Example code in package build process 15 | *-Ex.R 16 | 17 | # Output files from R CMD build 18 | /*.tar.gz 19 | 20 | # Output files from R CMD check 21 | /*.Rcheck/ 22 | 23 | # RStudio files 24 | .Rproj.user/ 25 | *.Rproj 26 | 27 | # produced vignettes 28 | vignettes/*.html 29 | vignettes/*.pdf 30 | 31 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 32 | .httr-oauth 33 | 34 | # knitr and R markdown default cache directories 35 | *_cache/ 36 | /cache/ 37 | 38 | # Temporary files created by R markdown 39 | *.utf8.md 40 | *.knit.md 41 | 42 | # R Environment Variables 43 | .Renviron 44 | 45 | # translation temp files 46 | po/*~ 47 | 48 | # RStudio Connect folder 49 | rsconnect/ 50 | .Rproj.user 51 | 52 | # Ignore Mac DS_Store files 53 | .DS_Store 54 | **/.DS_Store 55 | 56 | # revdep 57 | /doc/ 58 | /Meta/ 59 | revdep/ 60 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 1.2.1 2 | Date: 2024-06-22 15:11:33 UTC 3 | SHA: c297b1372dbb8ce47e787afe982acf94c44d5147 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: latent2likert 3 | Title: Converting Latent Variables into Likert Scale Responses 4 | Version: 1.2.2 5 | Authors@R: 6 | person("Marko", "Lalovic", , "marko@lalovic.io", role = c("aut", "cre")) 7 | Description: Effectively simulates the discretization process inherent to 8 | Likert scales while minimizing distortion. It converts continuous 9 | latent variables into ordinal categories to generate Likert scale item 10 | responses. Particularly useful for accurately modeling and analyzing 11 | survey data that use Likert scales, especially when applying 12 | statistical techniques that require metric data. 13 | License: MIT + file LICENSE 14 | URL: https://latent2likert.lalovic.io/ 15 | BugReports: https://github.com/markolalovic/latent2likert/issues/ 16 | Depends: 17 | R (>= 3.5) 18 | Imports: 19 | graphics, 20 | mvtnorm, 21 | stats, 22 | utils 23 | Suggests: 24 | sn, 25 | knitr, 26 | rmarkdown, 27 | testthat (>= 3.0.0) 28 | VignetteBuilder: 29 | knitr 30 | Encoding: UTF-8 31 | Language: en-US 32 | LazyData: true 33 | Roxygen: list(markdown = TRUE) 34 | RoxygenNote: 7.3.1 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2024 2 | COPYRIGHT HOLDER: Marko Lalovic -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2024 Marko Lalovic 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(discretize_density) 4 | export(estimate_mean_and_sd) 5 | export(estimate_params) 6 | export(plot_likert_transform) 7 | export(response_prop) 8 | export(rlikert) 9 | export(simulate_likert) 10 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | ## Version 0.1.0 2 | - Release date: 2020-10-17 3 | - Initial release (development version). 4 | - Tested on platforms: x86_64-pc-linux-gnu (64-bit) and x86_64-w64-mingw32 (64-bit). 5 | 6 | ## Version 1.0.0 7 | - Release date: 2024-03-28 8 | - Added the option to generate correlated Likert scale items in the `rlikert()` function. 9 | - Added the `estimate_params()` function for estimating parameters from existing survey data. 10 | - Resolved dependency issues. 11 | - Reduced dependency to only standard R packages: mvtnorm, stats, and graphics. 12 | - The sn package is now required only for generating correlated responses using a skew normal distribution. 13 | - Added user prompts to install the sn package when needed. 14 | 15 | ## Version 1.1.0 16 | - Release date: 2024-06-06 17 | - Minor updates to functions and vignettes. 18 | 19 | ## Version 1.1.1 20 | - Release date: 2024-06-06 21 | - Renamed the package from `responsesR` to `latent2likert` to better reflect its purpose of converting latent variables into Likert scale responses. 22 | - Improved the package website and documentation. 23 | - **Note:** The codebase is under development, and finer details may change. 24 | 25 | ## Version 1.2.1 26 | - Release date: 2024-06-12 27 | - Refactored code for enhanced modularity and maintainability. 28 | - Modularized core functions for improved readability. 29 | - Improved the structure and organization of the codebase. 30 | - Enhanced error handling for various correlation input scenarios. 31 | 32 | ## Version 1.2.2 33 | - Release date: 2024-07-26 34 | - Exported previously internal function `estimate_mean_and_sd` based on user feedback, now available for direct use. 35 | - Moved the package `sn` to Suggests. 36 | - Fixed vignette building errors. 37 | -------------------------------------------------------------------------------- /R/discretization.R: -------------------------------------------------------------------------------- 1 | #' Discretize Density 2 | #' 3 | #' Transforms the density function of a continuous random variable into a 4 | #' discrete probability distribution with minimal distortion 5 | #' using the Lloyd-Max algorithm. 6 | #' 7 | #' @param density_fn probability density function. 8 | #' @param n_levels cardinality of the set of all possible outcomes. 9 | #' @param eps convergence threshold for the algorithm. 10 | #' 11 | #' @return A list containing: 12 | #' \describe{ 13 | #' \item{prob}{discrete probability distribution.} 14 | #' \item{endp}{endpoints of intervals that partition the continuous domain.} 15 | #' \item{repr}{representation points of the intervals.} 16 | #' \item{dist}{distortion measured as the mean-squared error (MSE).} 17 | #' } 18 | #' @examples 19 | #' discretize_density(density_fn = stats::dnorm, n_levels = 5) 20 | #' discretize_density(density_fn = function(x) { 21 | #' 2 * stats::dnorm(x) * stats::pnorm(0.5 * x) 22 | #' }, n_levels = 4) 23 | #' @details 24 | #' The function addresses the problem of transforming a continuous random 25 | #' variable \eqn{X} into a discrete random variable \eqn{Y} with minimal 26 | #' distortion. Distortion is measured as mean-squared error (MSE): 27 | #' \deqn{ 28 | #' \text{E}\left[ (X - Y)^2 \right] = 29 | #' \sum_{k=1}^{K} \int_{x_{k-1}}^{x_{k}} f_{X}(x) 30 | #' \left( x - r_{k} \right)^2 \, dx 31 | #' } 32 | #' where: 33 | #' \describe{ 34 | #' \item{\eqn{f_{X}}}{ is the probability density function of \eqn{X},} 35 | #' \item{\eqn{K}}{ is the number of possible outcomes of \eqn{Y},} 36 | #' \item{\eqn{x_{k}}}{ are endpoints of intervals that partition the domain 37 | #' of \eqn{X},} 38 | #' \item{\eqn{r_{k}}}{ are representation points of the intervals.} 39 | #' } 40 | #' This problem is solved using the following iterative procedure: 41 | #' \describe{ 42 | #' \item{\eqn{1.}}{Start with an arbitrary initial set of representation 43 | #' points: \eqn{r_{1} < r_{2} < \dots < r_{K}}.} 44 | #' \item{\eqn{2.}}{Repeat the following steps until the improvement in MSE 45 | #' falls below given \eqn{\varepsilon}.} 46 | #' \item{\eqn{3.}}{Calculate endpoints as \eqn{x_{k} = (r_{k+1} + r_{k})/2} 47 | #' for each \eqn{k = 1, \dots, K-1} and set \eqn{x_{0}} and \eqn{x_{K}} to 48 | #' \eqn{-\infty} and \eqn{\infty}, respectively.} 49 | #' \item{\eqn{4.}}{Update representation points by setting \eqn{r_{k}} 50 | #' equal to the conditional mean of \eqn{X} given \eqn{X \in (x_{k-1}, x_{k})} 51 | #' for each \eqn{k = 1, \dots, K}.} 52 | #' } 53 | #' 54 | #' With each execution of step \eqn{(3)} and step \eqn{(4)}, the MSE decreases 55 | #' or remains the same. As MSE is nonnegative, it approaches a limit. 56 | #' The algorithm terminates when the improvement in MSE is less than a given 57 | #' \eqn{\varepsilon > 0}, ensuring convergence after a finite number 58 | #' of iterations. 59 | #' 60 | #' This procedure is known as Lloyd-Max's algorithm, initially used for scalar 61 | #' quantization and closely related to the k-means algorithm. Local convergence 62 | #' has been proven for log-concave density functions by Kieffer. Many common 63 | #' probability distributions are log-concave including the normal and skew 64 | #' normal distribution, as shown by Azzalini. 65 | #' 66 | #' @references 67 | #' Azzalini, A. (1985). 68 | #' A class of distributions which includes the normal ones. 69 | #' \emph{Scandinavian Journal of Statistics} \bold{12(2)}, 171–178. 70 | #' 71 | #' Kieffer, J. (1983). 72 | #' Uniqueness of locally optimal quantizer for log-concave density and convex 73 | #' error function. 74 | #' \emph{IEEE Transactions on Information Theory} \bold{29}, 42–47. 75 | #' 76 | #' Lloyd, S. (1982). 77 | #' Least squares quantization in PCM. 78 | #' \emph{IEEE Transactions on Information Theory} \bold{28 (2)}, 129–137. 79 | #' 80 | #' @export 81 | discretize_density <- function(density_fn, n_levels, eps = 1e-6) { 82 | # Initial set of representation points 83 | repr <- seq(-5, 5, length.out = n_levels) 84 | midp <- calc_midpoints(repr) 85 | dist <- compute_distortion(density_fn, midp, repr) 86 | 87 | diff <- Inf 88 | while (diff > eps) { 89 | repr <- update_epresentatives(density_fn, midp) 90 | midp <- calc_midpoints(repr) 91 | newd <- compute_distortion(density_fn, midp, repr) 92 | 93 | diff <- dist - newd 94 | dist <- newd 95 | } 96 | 97 | endp <- c(-Inf, midp, Inf) 98 | prob <- calc_probs(density_fn, endp) 99 | return(list(prob = prob, endp = endp, repr = repr, dist = dist)) 100 | } 101 | 102 | #' Calculate Midpoints 103 | #' 104 | #' Calculates the midpoints of the intervals given their representation points. 105 | #' 106 | #' @param repr representation points. 107 | #' @return a vector of midpoints. 108 | #' @noRd 109 | calc_midpoints <- function(repr) { 110 | n_levels <- length(repr) 111 | midp <- (repr[2:n_levels] + repr[1:(n_levels - 1)]) / 2 112 | return(midp) 113 | } 114 | 115 | #' Update Representation Points 116 | #' 117 | #' Calculates the representation points for the intervals given the midpoints. 118 | #' 119 | #' @param density_fn probability density function. 120 | #' @param midp midpoints of the intervals. 121 | #' 122 | #' @return a vector of representation points. 123 | #' @noRd 124 | update_epresentatives <- function(density_fn, midp) { 125 | n_levels <- length(midp) + 1 126 | endp <- c(-Inf, midp, Inf) 127 | repr <- numeric(n_levels) 128 | for (k in seq_len(n_levels)) { 129 | a <- stats::integrate(function(x) x * density_fn(x), endp[k], endp[k + 1]) 130 | b <- stats::integrate(density_fn, endp[k], endp[k + 1]) 131 | repr[k] <- a[[1]] / b[[1]] 132 | } 133 | return(repr) 134 | } 135 | 136 | #' Compute Distortion 137 | #' 138 | #' Computes the distortion (mean-squared error) given the midpoints and 139 | #' representation points. 140 | #' 141 | #' @param density_fn probability density function. 142 | #' @param midp midpoints of the intervals. 143 | #' @param repr representation points of the intervals. 144 | #' 145 | #' @return distortion (mean-squared error). 146 | #' @noRd 147 | compute_distortion <- function(density_fn, midp, repr) { 148 | n_levels <- length(repr) 149 | endp <- c(-Inf, midp, Inf) 150 | mse <- vapply(seq_len(n_levels), function(k) { 151 | stats::integrate(function(x) { 152 | density_fn(x) * (x - repr[k])^2 153 | }, endp[k], endp[k + 1])[[1]] 154 | }, numeric(1)) 155 | return(sum(mse)) 156 | } 157 | 158 | #' Calculate Probabilities 159 | #' 160 | #' Calculates the discrete probabilities for the given endpoints. 161 | #' 162 | #' @param density_fn probability density function. 163 | #' @param endp endpoints of the intervals. 164 | #' 165 | #' @return a vector of probabilities for the intervals. 166 | #' @noRd 167 | calc_probs <- function(density_fn, endp) { 168 | n_levels <- length(endp) - 1 169 | prob <- vapply(seq_len(n_levels), function(k) { 170 | stats::integrate(function(x) { 171 | density_fn(x) 172 | }, endp[k], endp[k + 1])[[1]] 173 | }, numeric(1)) 174 | return(prob) 175 | } 176 | -------------------------------------------------------------------------------- /R/estimation.R: -------------------------------------------------------------------------------- 1 | #' Estimate Latent Parameters 2 | #' 3 | #' Estimates the location and scaling parameters of the latent variables from 4 | #' existing survey data. 5 | #' 6 | #' @param data survey data with columns representing individual items. 7 | #' Apart from this, \code{data} can be of almost any class such as 8 | #' "data.frame" "matrix" or "array". 9 | #' @param n_levels number of response categories, a vector or a number. 10 | #' @param skew marginal skewness of latent variables, defaults to 0. 11 | #' @return A table of estimated parameters for each latent variable. 12 | #' @examples 13 | #' data(part_bfi) 14 | #' vars <- c("A1", "A2", "A3", "A4", "A5") 15 | #' estimate_params(data = part_bfi[, vars], n_levels = 6) 16 | #' @details 17 | #' The relationship between the continuous random variable \eqn{X} and the 18 | #' discrete probability distribution \eqn{p_k}, for \eqn{k = 1, \dots, K}, 19 | #' can be described by a system of non-linear equations: 20 | #' \deqn{ 21 | #' p_{k} = F_{X}\left( \frac{x_{k - 1} - \xi}{\omega} \right) 22 | #' - F_{X}\left( \frac{x_{k} - \xi}{\omega} \right) 23 | #' \quad \text{for} \ k = 1, \dots, K 24 | #' } 25 | #' where: 26 | #' \describe{ 27 | #' \item{\eqn{F_{X}}}{ is the cumulative distribution function of \eqn{X},} 28 | #' \item{\eqn{K}}{ is the number of possible response categories,} 29 | #' \item{\eqn{x_{k}}}{ are the endpoints defining the boundaries of the 30 | #' response categories,} 31 | #' \item{\eqn{p_{k}}}{ is the probability of the \eqn{k}-th 32 | #' response category,} 33 | #' \item{\eqn{\xi}}{ is the location parameter of \eqn{X},} 34 | #' \item{\eqn{\omega}}{ is the scaling parameter of \eqn{X}.} 35 | #' } 36 | #' The endpoints \eqn{x_{k}} are calculated by discretizing a 37 | #' random variable \eqn{Z} 38 | #' with mean 0 and standard deviation 1 that follows the same 39 | #' distribution as \eqn{X}. 40 | #' By solving the above system of non-linear equations iteratively, 41 | #' we can find the parameters that best fit the observed discrete 42 | #' probability distribution \eqn{p_{k}}. 43 | #' 44 | #' The function \code{estimate_params}: 45 | #' * Computes the proportion table of the responses for each item. 46 | #' * Estimates the probabilities \eqn{p_{k}} for each item. 47 | #' * Computes the estimates of \eqn{\xi} and \eqn{\omega} for each item. 48 | #' * Combines the estimated parameters for all items into a table. 49 | #' 50 | #' @seealso \code{\link{discretize_density}} for details on calculating 51 | #' the endpoints, and \code{\link{part_bfi}} for example of the survey data. 52 | #' @export 53 | estimate_params <- function(data, n_levels, skew = 0) { 54 | if (is.vector(data)) { 55 | prob <- prop.table(table(data)) 56 | estimates <- estimate_mean_and_sd(prob, n_levels, skew) 57 | tab <- as.table(estimates) 58 | rownames(tab) <- c("mean", "sd") 59 | } else { # multiple items 60 | nitems <- ncol(data) 61 | skew <- rep(skew, length.out = nitems) 62 | if (is.numeric(n_levels)) { 63 | n_levels <- rep(n_levels, nitems) 64 | } 65 | mat <- matrix(data = NA, nrow = nitems, ncol = 2) 66 | for (i in seq_len(ncol(data))) { 67 | prob <- prop.table(table(data[, i])) 68 | estimates <- estimate_mean_and_sd(prob, n_levels[i], skew[i]) 69 | mat[i, ] <- estimates 70 | } 71 | tab <- as.table(t(mat)) 72 | dimnames(tab) <- list( 73 | estimates = c("mean", "sd"), 74 | items = colnames(data) 75 | ) 76 | } 77 | return(tab) 78 | } 79 | 80 | #' Estimate mean and standard deviation 81 | #' 82 | #' Estimates the mean and standard deviation of a latent variable given the 83 | #' discrete probabilities of its observed Likert scale responses. 84 | #' 85 | #' @param prob named vector of probabilities for each response category. 86 | #' @param n_levels number of response categories for the Likert scale item. 87 | #' @param skew marginal skewness of the latent variable, defaults to 0. 88 | #' @param eps tolerance for convergence, defaults to 1e-6. 89 | #' @param maxit maximum number of iterations, defaults to 100. 90 | #' 91 | #' @return A numeric vector with two elements: the estimated mean and 92 | #' standard deviation. 93 | #' 94 | #' @examples 95 | #' prob <- c("1" = 0.313, "2" = 0.579, "3" = 0.105, "4" = 0.003) 96 | #' # returns estimates that are close to the actual mean and sd: c(-1, 0.5) 97 | #' estimate_mean_and_sd(prob, 5) 98 | #' 99 | #' @details 100 | #' This function uses an iterative algorithm to solve the system of non-linear 101 | #' equations that describe the relationship between the continuous latent 102 | #' variable and the observed discrete probability distribution of Likert scale 103 | #' responses. The algorithm ensures stability by reparameterizing the system 104 | #' and applying constraints to prevent stepping into invalid regions. 105 | #' 106 | #' @export 107 | estimate_mean_and_sd <- function(prob, n_levels, skew = 0, 108 | eps = 1e-6, maxit = 100) { 109 | prob <- as.vector(pad_levels(prob, n_levels)) 110 | endp <- calc_endpoints(n_levels, skew) 111 | dist_funcs <- initialize_distributions(skew) 112 | x <- matrix(c(0, 1)) # Initial guess 113 | trace <- matrix(rep(x, maxit), ncol = maxit) 114 | 115 | for (i in seq_len(maxit)) { 116 | b <- fn(x, endp, prob, dist_funcs$cdf_X) 117 | A <- jac(x, endp, dist_funcs$pdf_X) 118 | A_svd <- svd(A) 119 | dx <- A_svd$v %*% diag(1 / A_svd$d) %*% t(A_svd$u) %*% (-b) 120 | 121 | # Prevent stepping into negative values for v = 1 / sd 122 | while ((x + dx)[2] < 0) { 123 | dx <- dx / 2 124 | } 125 | 126 | x <- x + 0.2 * dx 127 | trace[, i] <- x 128 | 129 | if (norm(dx, "2") < eps) { 130 | break 131 | } 132 | } 133 | 134 | mean <- x[1] 135 | sd <- 1 / x[2] 136 | 137 | return(c(mean, sd)) 138 | } 139 | 140 | #' Initialize CDF and PDF Functions 141 | #' 142 | #' Initializes the cumulative distribution function (CDF) and probability 143 | #' density function (PDF) based on the skewness parameter. 144 | #' 145 | #' @param skew numeric value representing the skewness of the distribution. 146 | #' 147 | #' @return A list containing the CDF and PDF functions appropriate for the 148 | #' given skewness. 149 | #' 150 | #' @noRd 151 | initialize_distributions <- function(skew) { 152 | if (abs(skew) > 0) { 153 | check_package("sn") 154 | cp <- c("mu" = 0, "sd" = 1, "skew" = skew) 155 | dp <- sn::cp2dp(cp, family = "SN") 156 | return(list( 157 | cdf_X = function(x) sn::psn(x, dp = dp), 158 | pdf_X = function(x) sn::dsn(x, dp = dp) 159 | )) 160 | } else { 161 | return(list(cdf_X = stats::pnorm, pdf_X = stats::dnorm)) 162 | } 163 | } 164 | 165 | #' Calculate Differences for Root Finding 166 | #' 167 | #' Computes the differences used in the iterative root-finding process. 168 | #' 169 | #' @param x numeric vector of current estimates for the location and scaling 170 | #' parameters. 171 | #' @param endp numeric vector of endpoints defining the boundaries of the 172 | #' response categories. 173 | #' @param prob numeric vector of probabilities for each response category. 174 | #' @param cdf_X function representing the cumulative distribution function 175 | #' (CDF) of the latent variable. 176 | #' 177 | #' @return A matrix of differences between the CDF evaluated at the endpoints 178 | #' and the observed probabilities. 179 | #' 180 | #' @noRd 181 | fn <- function(x, endp, prob, cdf_X) { 182 | u <- x[1] 183 | v <- x[2] 184 | y <- cdf_X(v * endp - u * v) 185 | return(matrix(utils::tail(y, -1) - utils::head(y, -1) - prob)) 186 | } 187 | 188 | # Compute Jacobian Matrix 189 | #' 190 | #' Computes the Jacobian matrix used in the iterative root-finding process. 191 | #' 192 | #' @param x numeric vector of current estimates for the location and scaling 193 | #' parameters. 194 | #' @param endp numeric vector of endpoints defining the boundaries of the 195 | #' response categories. 196 | #' @param pdf_X function representing the probability density function (PDF) 197 | #' of the latent variable. 198 | #' 199 | #' @return A matrix representing the Jacobian of the system of equations with 200 | #' respect to the parameters. 201 | #' 202 | #' @noRd 203 | jac <- function(x, endp, pdf_X) { 204 | u <- x[1] 205 | v <- x[2] 206 | midp <- utils::head(utils::tail(endp, -1), -1) 207 | 208 | du <- pdf_X(v * endp - u * v) * (-v) 209 | dv <- pdf_X(v * midp - u * v) * (midp - u) 210 | 211 | du <- utils::tail(du, -1) - utils::head(du, -1) 212 | dv <- c(utils::head(dv, 1), utils::tail(dv, -1) 213 | - utils::head(dv, -1), -utils::tail(dv, 1)) 214 | 215 | return(cbind(du, dv)) 216 | } 217 | -------------------------------------------------------------------------------- /R/part_bfi.R: -------------------------------------------------------------------------------- 1 | #' Agreeableness and Gender Data 2 | #' 3 | #' This dataset is a cleaned up version of a small part of \code{bfi} dataset 4 | #' from \code{psychTools} package. It contains responses to the first 5 items 5 | #' of the agreeableness scale from the International Personality Item Pool 6 | #' (IPIP) and the gender attribute. It includes responses from 2800 subjects. 7 | #' Each item was answered on a six point Likert scale ranging from 8 | #' 1 (very inaccurate), to 6 (very accurate). Gender was coded as 9 | #' 0 for male and 1 for female. Missing values were addressed using 10 | #' mode imputation. 11 | #' 12 | #' @docType data 13 | #' 14 | #' @usage data(part_bfi) 15 | #' 16 | #' @format {An object of class \code{"data.frame"} with 2800 observations on 17 | #' the following 6 variables:} 18 | #' \describe{ 19 | #' \item{A1}{Am indifferent to the feelings of others.} 20 | #' \item{A2}{Inquire about others' well-being.} 21 | #' \item{A3}{Know how to comfort others.} 22 | #' \item{A4}{Love children.} 23 | #' \item{A5}{Make people feel at ease.} 24 | #' \item{gender}{Gender of the respondent.} 25 | #' } 26 | #' @source {International Personality Item Pool ({https://ipip.ori.org})} 27 | #' @source {https://search.r-project.org/CRAN/refmans/psychTools/html/bfi.html} 28 | #' @references 29 | #' Revelle, W. (2024). 30 | #' Psych: Procedures for Psychological, Psychometric, and Personality Research. 31 | #' \emph{Evanston, Illinois: Northwestern University.} 32 | #' \url{https://CRAN.R-project.org/package=psych} 33 | #' @examples 34 | #' data(part_bfi) 35 | #' head(part_bfi) 36 | "part_bfi" 37 | -------------------------------------------------------------------------------- /R/simulation.R: -------------------------------------------------------------------------------- 1 | #' Simulate Likert Scale Item Responses 2 | #' 3 | #' Simulates Likert scale item responses based on a specified number 4 | #' of response categories and the centered parameters of the latent variable. 5 | #' 6 | #' @param n_levels number of response categories for the Likert scale item. 7 | #' @param cp centered parameters of the latent variable. 8 | #' Named vector including mean (`mu`), standard deviation (`sd`), 9 | #' and skewness (`skew`). Skewness must be between -0.95 and 0.95. 10 | #' @return A named vector of probabilities for each response category. 11 | #' @examples 12 | #' cp <- c(mu = 0, sd = 1, skew = 0.5) 13 | #' simulate_likert(n_levels = 5, cp = cp) 14 | #' cp2 <- c(mu = 1, sd = 2, skew = -0.3) 15 | #' simulate_likert(n_levels = 7, cp = cp2) 16 | #' @details 17 | #' The simulation process uses the following model detailed by 18 | #' Boari and Nai-Ruscone. Let \eqn{X} be the continuous variable of interest, 19 | #' measured using Likert scale questions with \eqn{K} response categories. The 20 | #' observed discrete variable \eqn{Y} is defined as follows: 21 | #' \deqn{ 22 | #' Y = k, \quad \text{ if } \ \ x_{k - 1} < X \leq x_{k} 23 | #' \quad \text{ for } \ \ k = 1, \dots, K 24 | #' } 25 | #' where \eqn{x_{k}}, \eqn{k = 0, \dots, K} are endpoints defined in the domain 26 | #' of \eqn{X} such that: 27 | #' \deqn{ 28 | #' -\infty = x_{0} < x_{1} < \dots < x_{K - 1} < x_{K} = \infty. 29 | #' } 30 | #' The endpoints dictate the transformation of the density 31 | #' \eqn{f_{X}} of \eqn{X} into a discrete probability distribution: 32 | #' \deqn{ 33 | #' \text{Pr}(Y = k) = \int_{x_{k - 1}}^{x_{k}} f_{X}(x) \, dx 34 | #' \quad \text{ for } \ \ k = 1, \dots, K. 35 | #' } 36 | #' 37 | #' The continuous latent variable is modeled using a skew normal distribution. 38 | #' The function \code{simulate_likert} performs the following steps: 39 | #' * Ensures the centered parameters are within the acceptable range. 40 | #' * Converts the centered parameters to direct parameters. 41 | #' * Defines the density function for the skew normal distribution. 42 | #' * Computes the probabilities for each response category 43 | #' using optimal endpoints. 44 | #' 45 | #' @references 46 | #' Boari, G. and Nai Ruscone, M. (2015). 47 | #' A procedure simulating Likert scale item responses. 48 | #' \emph{Electronic Journal of Applied Statistical Analysis} \bold{8(3)}, 49 | #' 288–297. 50 | #' \doi{10.1285/i20705948v8n3p288} 51 | #' 52 | #' @seealso \code{\link{discretize_density}} for details on how to calculate 53 | #' the optimal endpoints. 54 | #' @export 55 | simulate_likert <- function(n_levels, cp) { 56 | validate_skewness(cp[["skew"]]) 57 | 58 | dp <- convert_params(cp) 59 | density_fn <- function(x) { 60 | density_sn(x, dp[["xi"]], dp[["omega"]], dp[["alpha"]]) 61 | } 62 | endp <- calc_endpoints(n_levels, cp[["skew"]]) 63 | prob <- calc_probs(density_fn, endp) 64 | names(prob) <- paste(seq_len(n_levels)) 65 | return(prob) 66 | } 67 | 68 | #' Generate Random Responses 69 | #' 70 | #' Generates an array of random responses to Likert-type questions based on 71 | #' specified latent variables. 72 | #' 73 | #' @param size number of observations. 74 | #' @param n_items number of Likert scale items (number of questions). 75 | #' @param n_levels number of response categories for each item. 76 | #' Integer or vector of integers. 77 | #' @param mean means of the latent variables. 78 | #' Numeric or vector of numerics. Defaults to 0. 79 | #' @param sd standard deviations of the latent variables. 80 | #' Numeric or vector of numerics. Defaults to 1. 81 | #' @param skew marginal skewness of the latent variables. 82 | #' Numeric or vector of numerics. Defaults to 0. 83 | #' @param corr correlations between latent variables. 84 | #' Can be a single numeric value representing the same correlation for 85 | #' all pairs, or an actual correlation matrix. Defaults to 0. 86 | #' 87 | #' @return A matrix of random responses with dimensions \code{size} by 88 | #' \code{n_items}. The column names are \code{Y1, Y2, ..., Yn} where 89 | #' \code{n} is the number of items. Each entry in the matrix represents 90 | #' a Likert scale response, ranging from 1 to \code{n_levels}. 91 | #' @examples 92 | #' # Generate responses for a single item with 5 levels 93 | #' rlikert(size = 10, n_items = 1, n_levels = 5) 94 | #' 95 | #' # Generate responses for three items with different levels and parameters 96 | #' rlikert( 97 | #' size = 10, n_items = 3, n_levels = c(4, 5, 6), 98 | #' mean = c(0, -1, 0), sd = c(0.8, 1, 1), corr = 0.5 99 | #' ) 100 | #' 101 | #' # Generate responses with a correlation matrix 102 | #' corr <- matrix(c( 103 | #' 1.00, -0.63, -0.39, 104 | #' -0.63, 1.00, 0.41, 105 | #' -0.39, 0.41, 1.00 106 | #' ), nrow = 3) 107 | #' data <- rlikert( 108 | #' size = 1000, n_items = 3, n_levels = c(4, 5, 6), 109 | #' mean = c(0, -1, 0), sd = c(0.8, 1, 1), corr = corr 110 | #' ) 111 | #' 112 | #' @export 113 | rlikert <- function(size, n_items, n_levels, 114 | mean = 0, sd = 1, skew = 0, corr = 0) { 115 | # If there's only one item, generate responses directly 116 | if (n_items == 1) { 117 | return(generate_responses(size, n_levels, mean, sd, skew)) 118 | } 119 | 120 | # Ensure input values are vectors of length n_items 121 | mean <- rep(mean, length.out = n_items) 122 | sd <- rep(sd, length.out = n_items) 123 | skew <- rep(skew, length.out = n_items) 124 | n_levels <- rep(n_levels, length.out = n_items) 125 | 126 | # Determine the correlation case 127 | corr_case <- handle_corr_case(corr) 128 | 129 | if (corr_case == 1) { 130 | # Generate multiple univariate item responses without correlations 131 | data <- matrix(nrow = size, ncol = n_items) 132 | for (i in seq_len(n_items)) { 133 | data[, i] <- generate_responses( 134 | size, n_levels[i], 135 | mean[i], sd[i], skew[i] 136 | ) 137 | } 138 | colnames(data) <- paste0("Y", seq_len(n_items)) 139 | return(data) 140 | } 141 | 142 | # Generate the correlation matrix 143 | corr_matrix <- generate_corr_matrix(corr, corr_case, n_items) 144 | sigma <- cor2cov(corr_matrix, sd) 145 | 146 | # Generate latent variables 147 | latent_variables <- generate_latent_variables(size, mean, sigma, skew) 148 | 149 | # Discretize latent variables to generate responses 150 | data <- matrix(nrow = size, ncol = n_items) 151 | for (i in seq_len(n_items)) { 152 | endp <- calc_endpoints(n_levels[i], skew[i]) 153 | data[, i] <- findInterval(latent_variables[, i], endp) 154 | } 155 | colnames(data) <- paste0("Y", seq_len(n_items)) 156 | return(data) 157 | } 158 | 159 | #' Calculate Optimal Endpoints 160 | #' 161 | #' Returns the optimal endpoints of intervals that transform 162 | #' a neutral density into discrete probability distribution. 163 | #' 164 | #' @param n_levels integer. Number of response categories. 165 | #' @param skew numeric. Skewness parameter of the latent variable. 166 | #' 167 | #' @return A numeric vector of optimal endpoints. 168 | #' @noRd 169 | calc_endpoints <- function(n_levels, skew) { 170 | dp <- convert_params(c("mu" = 0, "sd" = 1, "skew" = skew)) 171 | 172 | density_fn <- function(x) { 173 | density_sn(x, dp[["xi"]], dp[["omega"]], dp[["alpha"]]) 174 | } 175 | 176 | endp <- discretize_density(density_fn, n_levels)[["endp"]] 177 | return(endp) 178 | } 179 | 180 | #' Generate Responses 181 | #' 182 | #' Generates random responses for a single Likert scale item. 183 | #' 184 | #' @param size integer. Number of observations. 185 | #' @param n_levels integer. Number of response categories. 186 | #' @param mean numeric. Mean of the latent variable. 187 | #' @param sd numeric. Standard deviation of the latent variable. 188 | #' @param skew numeric. Skewness parameter of the latent variable. 189 | #' 190 | #' @return A numeric vector of simulated responses. 191 | #' @noRd 192 | generate_responses <- function(size, n_levels, mean, sd, skew) { 193 | cp <- c("mu" = mean, "sd" = sd, "skew" = skew) 194 | prob <- simulate_likert(n_levels, cp) 195 | sample(seq_len(n_levels), size, replace = TRUE, prob = prob) 196 | } 197 | 198 | #' Handle Correlation Input Case 199 | #' 200 | #' Determines the type of correlation input provided. 201 | #' 202 | #' @param corr numeric or character or matrix. Correlation input. 203 | #' 204 | #' @return An integer representing the correlation case. 205 | #' @noRd 206 | handle_corr_case <- function(corr) { 207 | if (is.numeric(corr) && !is.matrix(corr)) { 208 | if (corr == 0) { 209 | return(1) 210 | } 211 | if (corr != 0) { 212 | return(3) 213 | } 214 | } 215 | if (is.character(corr) && corr == "random") { 216 | return(2) 217 | } 218 | if (is.matrix(corr)) { 219 | return(4) 220 | } 221 | stop("Invalid correlation input") 222 | } 223 | 224 | #' Generate Correlation Matrix 225 | #' 226 | #' Generates the correlation matrix based on the correlation case. 227 | #' 228 | #' @param corr numeric or matrix. Correlation input. 229 | #' @param corr_case integer. Correlation case identifier. 230 | #' @param n_items integer. Number of Likert scale items. 231 | #' 232 | #' @return A numeric matrix representing the correlation matrix. 233 | #' @noRd 234 | generate_corr_matrix <- function(corr, corr_case, n_items) { 235 | if (corr_case == 2) { 236 | return(generate_rand_corr_matrix(n_items)) 237 | } 238 | if (corr_case == 3) { 239 | if (corr > 1 | corr < -1) stop("Correlation must be between -1 and 1.") 240 | corr_matrix <- matrix(corr, n_items, n_items) 241 | diag(corr_matrix) <- 1 242 | return(corr_matrix) 243 | } 244 | if (corr_case == 4) { 245 | return(corr) 246 | } 247 | } 248 | 249 | #' Generate Latent Variables 250 | #' 251 | #' Generates latent variables based on the specified parameters. 252 | #' 253 | #' @param size number of observations. 254 | #' @param mean means of the latent variables. Numeric vector. 255 | #' @param sigma covariance matrix of the latent variables. 256 | #' @param skew skewness parameters of the latent variables. Numeric vector. 257 | #' 258 | #' @return A matrix of generated latent variables. 259 | #' @noRd 260 | generate_latent_variables <- function(size, mean, sigma, skew) { 261 | if (any(skew != 0)) { # at least one latent variable is skewed 262 | if (!requireNamespace("sn", quietly = TRUE)) { 263 | stop("Package \"sn\" must be installed. 264 | Please run:\n\n\tinstall.packages(\"sn\")\n\n") 265 | } 266 | cp_list <- list(mean = mean, var.cov = sigma, skew = skew) 267 | dp_list <- sn::cp2dp(cp_list, family = "SN") 268 | return(sn::rmsn(size, dp = dp_list)) 269 | } else { 270 | return(mvtnorm::rmvnorm(size, mean, sigma)) 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /R/utilities.R: -------------------------------------------------------------------------------- 1 | #' Plot Transformation 2 | #' 3 | #' Plots the densities of latent variables and the corresponding 4 | #' transformed discrete probability distributions. 5 | #' 6 | #' @param n_items number of Likert scale items (questions). 7 | #' @param n_levels number of response categories for each Likert item. 8 | #' Integer or vector of integers. 9 | #' @param mean means of the latent variables. 10 | #' Numeric or vector of numerics. Defaults to 0. 11 | #' @param sd standard deviations of the latent variables. 12 | #' Numeric or vector of numerics. Defaults to 1. 13 | #' @param skew marginal skewness of the latent variables. 14 | #' Numeric or vector of numerics. Defaults to 0. 15 | #' @return NULL. The function produces a plot. 16 | #' @examples 17 | #' plot_likert_transform(n_items = 3, n_levels = c(3, 4, 5)) 18 | #' plot_likert_transform(n_items = 3, n_levels = 5, mean = c(0, 1, 2)) 19 | #' plot_likert_transform(n_items = 3, n_levels = 5, sd = c(0.8, 1, 1.2)) 20 | #' plot_likert_transform(n_items = 3, n_levels = 5, skew = c(-0.5, 0, 0.5)) 21 | #' @export 22 | plot_likert_transform <- function( 23 | n_items, n_levels, 24 | mean = 0, sd = 1, skew = 0) { 25 | n_levels <- rep(n_levels, length.out = n_items) 26 | mean <- rep(mean, length.out = n_items) 27 | sd <- rep(sd, length.out = n_items) 28 | skew <- rep(skew, length.out = n_items) 29 | graphics::layout(matrix(seq_len(n_items * 2), nrow = 2, ncol = n_items)) 30 | x <- seq(-3, 3, length = 1000) 31 | for (i in seq_len(n_items)) { 32 | # Draw the densities of latent variables 33 | cp <- c("mu" = mean[i], "sd" = sd[i], "skew" = skew[i]) 34 | dp <- convert_params(cp) 35 | y <- density_sn(x, dp[["xi"]], dp[["omega"]], dp[["alpha"]]) 36 | graphics::plot(x, y, type = "l", lwd = 2, xlab = "", ylab = "", main = "") 37 | graphics::title(paste("X", i, sep = "")) 38 | 39 | # Draw the corresponding discrete probability distributions 40 | prob <- simulate_likert(n_levels[i], cp) 41 | graphics::barplot(prob) 42 | graphics::title(paste("Y", i, sep = "")) 43 | } 44 | invisible(NULL) 45 | } 46 | 47 | #' Plot Contour 48 | #' 49 | #' Plots the contour of the objective function values over a grid 50 | #' of parameter values. It visualizes the norm of the function \code{fn} 51 | #' for different values of \code{u} (mean) and \code{v} (1/standard deviation) 52 | #' and overlays the trace of parameter updates during the estimation process. 53 | #' 54 | #' @param fn objective function to be minimized. 55 | #' @param endp endpoints of intervals that partition the continuous domain. 56 | #' @param prob discrete probability distribution. 57 | #' @param cdf_X cumulative distribution function of the latent variable. 58 | #' @param trace matrix of parameter updates. 59 | #' @return NULL. The function produces a plot. 60 | #' @noRd 61 | plot_contour <- function(fn, endp, prob, cdf_X, trace) { 62 | xlen <- 50 63 | ylen <- 50 64 | xgrid <- seq(-3, 3, length.out = xlen) # Range for mean (mu) 65 | ygrid <- seq(0.1, 3, length.out = ylen) # Range for 1/sd 66 | zvals <- matrix(NA, ncol = xlen, nrow = ylen) 67 | for (i in seq_len(xlen)) { 68 | for (j in seq_len(ylen)) { 69 | zvals[i, j] <- norm(fn( 70 | matrix(c(xgrid[i], ygrid[j])), 71 | endp, prob, cdf_X 72 | ), "2") 73 | } 74 | } 75 | graphics::contour( 76 | x = xgrid, y = ygrid, z = zvals, 77 | col = "gray42", xlab = "u = mu", ylab = "v = 1/sd" 78 | ) 79 | graphics::grid(col = "lightgray", lty = "dotted") 80 | graphics::points(trace[1, ], trace[2, ], pch = 20, col = "blue") 81 | invisible(NULL) 82 | } 83 | 84 | #' Calculate Response Proportions 85 | #' 86 | #' Returns a table of proportions for each possible response category. 87 | #' 88 | #' @param data numeric vector or matrix of responses. 89 | #' @param n_levels number of response categories. 90 | #' @return A table of response category proportions. 91 | #' @examples 92 | #' data <- c(1, 2, 2, 3, 3, 3) 93 | #' response_prop(data, n_levels = 3) 94 | #' 95 | #' data_matrix <- matrix(c(1, 2, 2, 3, 3, 3), ncol = 2) 96 | #' response_prop(data_matrix, n_levels = 3) 97 | #' @export 98 | response_prop <- function(data, n_levels) { 99 | if (is.vector(data)) { 100 | tab <- pad_levels(prop.table(table(data)), n_levels) 101 | } else { 102 | tab <- t(apply(data, 2, function(x_col) { 103 | pad_levels(prop.table(table(x_col)), n_levels) 104 | })) 105 | dimnames(tab) <- list(Item = rownames(tab), Response = colnames(tab)) 106 | } 107 | return(tab) 108 | } 109 | 110 | #' Pad Missing Levels 111 | #' 112 | #' Helper function that takes a vector of proportions or probabilities 113 | #' across possible responses and pads the missing levels with zeros up 114 | #' to the specified number of response categories. 115 | #' 116 | #' @param pr proportions or probabilities across possible responses. 117 | #' @param n_levels number of response categories. 118 | #' @return A named vector of proportions across all possible responses. 119 | #' @examples 120 | #' pr <- c("2" = 0.25, "3" = 0.25, "4" = 0.50) 121 | #' pad_levels(pr, 5) 122 | #' @noRd 123 | pad_levels <- function(pr, n_levels) { 124 | padded_pr <- vapply(seq_len(n_levels), function(k) { 125 | ifelse(as.character(k) %in% names(pr), pr[as.character(k)], 0) 126 | }, numeric(1)) 127 | names(padded_pr) <- as.character(seq_len(n_levels)) 128 | return(padded_pr) 129 | } 130 | 131 | #' Validate Skewness 132 | #' 133 | #' Checks if the skewness parameter is within the acceptable range. 134 | #' 135 | #' @param skew numeric. Skewness parameter. 136 | #' @noRd 137 | validate_skewness <- function(skew) { 138 | if (skew > 0.95 || skew < -0.95) { 139 | stop("The value of skewness must be in the range -0.95 to 0.95: 140 | `skew >= -0.95` and `skew <= 0.95`.") 141 | } 142 | } 143 | 144 | #' Check Package Installation 145 | #' 146 | #' Checks if a package is installed, and stops with an error message if not. 147 | #' 148 | #' @param pkg character. The name of the package. 149 | #' @noRd 150 | check_package <- function(pkg) { 151 | if (!requireNamespace(pkg, quietly = TRUE)) { 152 | stop(sprintf( 153 | 'Package "%s" must be installed. 154 | Please run:\n\n\tinstall.packages("%s")\n\n', 155 | pkg, pkg 156 | ), call. = FALSE) 157 | } 158 | } 159 | 160 | #' Density of Skew Normal Distribution 161 | #' 162 | #' Computes the probability density function of a skew normal distribution. 163 | #' 164 | #' @param x numeric. Variable. 165 | #' @param xi numeric. Location parameter. 166 | #' @param omega numeric. Scale parameter. 167 | #' @param alpha numeric. Shape parameter. 168 | #' @return numeric. Density at x. 169 | #' @seealso [sn::dsn()] 170 | #' @noRd 171 | density_sn <- function(x, xi = 0, omega = 1, alpha = 0) { 172 | return(2 / omega * stats::dnorm((x - xi) / omega) * 173 | stats::pnorm(alpha * (x - xi) / omega)) 174 | } 175 | 176 | #' Convert Centered Parameters 177 | #' 178 | #' Converts centered parameters to direct parameters used in the 179 | #' skew normal density. 180 | #' 181 | #' @param cp numeric vector. Centered parameters c(mu, sd, skew). 182 | #' @return numeric vector. Direct parameters c(xi, omega, alpha). 183 | #' @seealso [sn::cp2dp] 184 | #' @noRd 185 | convert_params <- function(cp) { 186 | mu <- cp[1] 187 | sd <- cp[2] 188 | skew <- cp[3] 189 | 190 | b <- sqrt(2 / pi) 191 | r <- sign(skew) * (2 * abs(skew) / (4 - pi))^(1 / 3) 192 | delta <- r / (b * sqrt(1 + r^2)) 193 | 194 | mu_z <- b * delta 195 | sd_z <- sqrt(1 - mu_z^2) 196 | 197 | omega <- sd / sd_z 198 | xi <- mu - omega * mu_z 199 | alpha <- delta / sqrt(1 - delta^2) 200 | dp <- as.numeric(c(xi, omega, alpha)) 201 | names(dp) <- c("xi", "omega", "alpha") 202 | return(dp) 203 | } 204 | 205 | #' Mean of Skew Normal Distribution 206 | #' 207 | #' Computes the mean of a skew normal distribution. 208 | #' 209 | #' @param alpha numeric. Shape parameter. 210 | #' @return numeric. Mean of the skew normal distribution. 211 | #' @noRd 212 | mean_skew_normal <- function(alpha) { 213 | return(delta_skew_normal(alpha) * sqrt(2 / pi)) 214 | } 215 | 216 | #' Delta Parameter of Skew Normal Distribution 217 | #' 218 | #' Computes the delta parameter of a skew normal distribution. 219 | #' 220 | #' @param alpha numeric. Shape parameter. 221 | #' @return numeric. Delta parameter. 222 | #' @noRd 223 | delta_skew_normal <- function(alpha) { 224 | return(alpha / (sqrt(1 + alpha^2))) 225 | } 226 | 227 | #' Variance of Skew Normal Distribution 228 | #' 229 | #' Computes the variance of a skew normal distribution. 230 | #' 231 | #' @param alpha numeric. Shape parameter. 232 | #' @return numeric. Variance of the skew normal distribution. 233 | #' @noRd 234 | var_skew_normal <- function(alpha) { 235 | return(1 - 2 * (delta_skew_normal(alpha)^2) / pi) 236 | } 237 | 238 | #' Scale and Shift 239 | #' 240 | #' Scales and shifts a variable based on direct parameters. 241 | #' 242 | #' @param x numeric. Variable. 243 | #' @param dp numeric vector. Direct parameters xi, omega, alpha. 244 | #' @return numeric. Shifted and scaled variable. 245 | #' @noRd 246 | scale_and_shift <- function(x, dp) { 247 | xi <- dp[["xi"]] 248 | omega <- dp[["omega"]] 249 | alpha <- dp[["alpha"]] 250 | mean_sn <- mean_skew_normal(alpha) 251 | return((x - mean_sn) / omega + mean_sn - xi / omega) 252 | } 253 | 254 | #' Generate Random Correlation Matrix 255 | #' 256 | #' Generates a random p x p correlation matrix. 257 | #' 258 | #' @param p integer. The size of the correlation matrix. 259 | #' @return numeric matrix. A random p x p correlation matrix. 260 | #' @noRd 261 | generate_rand_corr_matrix <- function(p) { 262 | corr <- drop(stats::rWishart(1, p, diag(p))) 263 | corr <- stats::cov2cor(corr) 264 | return(corr) 265 | } 266 | 267 | #' Convert Correlation Matrix to Covariance Matrix 268 | #' 269 | #' Converts a correlation matrix to a covariance matrix. 270 | #' 271 | #' @param corr numeric matrix. Correlation matrix. 272 | #' @param s numeric vector. Standard deviations. 273 | #' @return numeric matrix. Covariance matrix. 274 | #' @noRd 275 | cor2cov <- function(corr, s) { 276 | return(diag(s) %*% corr %*% diag(s)) 277 | } 278 | 279 | #' Generate Random Centered Parameters 280 | #' 281 | #' Generates random centered parameters for mu, sd, and skewness. 282 | #' 283 | #' @return numeric vector. Random centered parameters c(mu, sd, gamma1). 284 | #' @noRd 285 | generate_random_cp <- function() { 286 | mu <- stats::rnorm(1, 0, 1) 287 | sd <- stats::runif(1, 0.1, 2) 288 | gamma1 <- stats::runif(1, -0.95, 0.95) 289 | cp <- c("mu" = mu, "sd" = sd, "gamma1" = gamma1) 290 | return(cp) 291 | } 292 | 293 | #' Percentify 294 | #' 295 | #' Converts a numeric value to a percentage string. 296 | #' 297 | #' @param x numeric. The value to convert. 298 | #' @param digits integer. Number of digits to round to. Defaults to 0. 299 | #' @return character. Percentage string. 300 | #' @noRd 301 | percentify <- function(x, digits = 0) { 302 | percentage <- formatC(x * 100, format = "f", digits = digits) 303 | return(paste0(percentage, "%")) 304 | } 305 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r setup, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | dpi=250, 13 | out.width = "70%") 14 | ``` 15 | 16 | # latent2likert Package logo 17 | 18 | 19 | [![R-CMD-check](https://github.com/markolalovic/latent2likert/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/markolalovic/latent2likert/actions/workflows/R-CMD-check.yaml) 20 | [![codecov](https://codecov.io/gh/markolalovic/latent2likert/branch/main/graph/badge.svg?token=HZTG6RUB2J)](https://app.codecov.io/gh/markolalovic/latent2likert) 21 | [![CRAN status](https://www.r-pkg.org/badges/version/latent2likert)](https://CRAN.R-project.org/package=latent2likert) 22 | [![downloads](https://cranlogs.r-pkg.org/badges/latent2likert)](https://cranlogs.r-pkg.org/badges/latent2likert) 23 | 24 | 25 | ## Overview 26 | 27 | The **latent2likert** package is designed to effectively simulate the discretization process inherent to Likert scales while minimizing distortion. It converts continuous latent variables into ordinal categories to generate Likert scale item responses. This is particularly useful for accurately modeling and analyzing survey data that use Likert scales, especially when applying statistical techniques that require metric data. 28 | 29 | 30 | ## Installation 31 | 32 | You can install the released version from CRAN: 33 | 34 | ```{r, eval = FALSE} 35 | install.packages("latent2likert") 36 | ``` 37 | 38 | Or the latest development version from GitHub: 39 | ``` r 40 | devtools::install_github("markolalovic/latent2likert") 41 | ``` 42 | 43 | ## Dependencies 44 | 45 | To keep the package lightweight, **latent2likert** only imports [mvtnorm](https://cran.r-project.org/package=mvtnorm), along with the standard R packages stats and graphics, which are typically included in R releases. An additional suggested dependency is the package [sn](https://cran.r-project.org/package=sn), which is required only for generating random responses from correlated Likert items based on a multivariate skew normal distribution. The package prompts the user to install this dependency during interactive sessions if needed. 46 | 47 | ## Features 48 | 49 | * `rlikert`: Generates random responses to Likert scale questions based on specified means and standard deviations of latent variables, with optional settings for skewness and correlations. 50 | * `estimate_params`: Estimates latent parameters from existing survey data. 51 | 52 | 53 | ## Structure 54 | 55 |
56 | Overview of inputs and outputs 57 |
Overview of inputs and outputs.
58 |
59 | 60 | 61 | ## Using `rlikert` 62 | 63 | You can use the `rlikert` function to simulate Likert item responses. 64 | 65 | To generate a sample of random responses to one item on a 5-point Likert scale, use: 66 | 67 | ```{r} 68 | library(latent2likert) 69 | rlikert(size = 10, n_items = 1, n_levels = 5) 70 | ``` 71 | 72 | To generate responses to multiple items with specified parameters: 73 | 74 | ```{r} 75 | rlikert(size = 10, 76 | n_items = 3, 77 | n_levels = c(4, 5, 6), 78 | mean = c(0, -1, 0), 79 | sd = c(0.8, 1, 1), 80 | corr = 0.5) 81 | ``` 82 | You can also provide a correlation matrix: 83 | 84 | ```{r} 85 | corr <- matrix(c(1.00, -0.63, -0.39, 86 | -0.63, 1.00, 0.41, 87 | -0.39, 0.41, 1.00), nrow=3) 88 | data <- rlikert(size = 10^3, 89 | n_items = 3, 90 | n_levels = c(4, 5, 6), 91 | mean = c(0, -1, 0), 92 | sd = c(0.8, 1, 1), 93 | corr = corr) 94 | ``` 95 | Note that the correlations among the Likert response variables are only estimates of the actual correlations between the latent variables, and these estimates are typically lower: 96 | 97 | ```{r} 98 | cor(data) 99 | ``` 100 | 101 | 102 | ## Using `estimate_params` 103 | 104 | Given the data, you can estimate the values of latent parameters using: 105 | 106 | ```{r} 107 | estimate_params(data, n_levels = c(4, 5, 6), skew = 0) 108 | ``` 109 | 110 | 111 | ## Transformation 112 | 113 | To visualize the transformation, you can use `plot_likert_transform()`. It plots the densities of latent variables in the first row and transformed discrete probability distributions below: 114 | 115 | ```{r, eval = FALSE} 116 | plot_likert_transform(n_items = 3, 117 | n_levels = 5, 118 | mean = c(0, -1, 0), 119 | sd = c(0.8, 1, 1), 120 | skew = c(0, 0, 0.5)) 121 | ``` 122 | 123 |
124 | Transformation of latent variables to Likert response variables 125 |
Transformation of latent variables to Likert response variables.
126 |
127 |
128 | 129 | 130 | Note that, depending on the value of the skewness parameter, the normal latent distribution is used if skew = 0, otherwise the skew normal distribution is used. The value of skewness is restricted to the range -0.95 to 0.95, that is 131 | 132 | > `skew >= -0.95` and `skew <= 0.95`. 133 | 134 | 135 | ## Further Reading 136 | 137 | * For more detailed information and practical examples, please refer to the package [vignette](https://lalovic.io/latent2likert/articles/using_latent2likert.html). 138 | * The implemented algorithms are described in the functions [reference](https://lalovic.io/latent2likert/reference/index.html). 139 | 140 | ## Related R Packages 141 | 142 | Alternatively, you can simulate Likert item responses using the `draw_likert` function from the [fabricatr](https://CRAN.R-project.org/package=fabricatr) package. This function recodes a latent variable into a Likert response variable by specifying intervals that subdivide the continuous range. The **latent2likert** package, however, offers an advantage by automatically calculating optimal intervals that minimize distortion between the latent variable and the Likert response variable for both normal and skew normal latent distributions, eliminating the need to manually specify the intervals. 143 | 144 | There are also several alternative approaches that do not rely on latent distributions. One method involves directly defining a discrete probability distribution and sampling from it using the `sample` function in R or the `likert` function from the [wakefield](https://CRAN.R-project.org/package=wakefield) package. Another approach is to specify the means, standard deviations, and correlations among Likert response variables. For this, you can use [LikertMakeR](https://CRAN.R-project.org/package=LikertMakeR) or [SimCorMultRes](https://CRAN.R-project.org/package=SimCorMultRes) to generate correlated multinomial responses. 145 | 146 | Additionally, you can define a data generating process. For those familiar with item response theory, the [mirt](https://CRAN.R-project.org/package=mirt) package allows users to specify discrimination and difficulty parameters for each response category. 147 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # latent2likert Package logo 5 | 6 | 7 | 8 | [![R-CMD-check](https://github.com/markolalovic/latent2likert/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/markolalovic/latent2likert/actions/workflows/R-CMD-check.yaml) 9 | [![codecov](https://codecov.io/gh/markolalovic/latent2likert/branch/main/graph/badge.svg?token=HZTG6RUB2J)](https://app.codecov.io/gh/markolalovic/latent2likert) 10 | [![CRAN 11 | status](https://www.r-pkg.org/badges/version/latent2likert)](https://CRAN.R-project.org/package=latent2likert) 12 | [![downloads](https://cranlogs.r-pkg.org/badges/latent2likert)](https://cranlogs.r-pkg.org/badges/latent2likert) 13 | 14 | 15 | ## Overview 16 | 17 | The **latent2likert** package is designed to effectively simulate the 18 | discretization process inherent to Likert scales while minimizing 19 | distortion. It converts continuous latent variables into ordinal 20 | categories to generate Likert scale item responses. This is particularly 21 | useful for accurately modeling and analyzing survey data that use Likert 22 | scales, especially when applying statistical techniques that require 23 | metric data. 24 | 25 | ## Installation 26 | 27 | You can install the released version from CRAN: 28 | 29 | ``` r 30 | install.packages("latent2likert") 31 | ``` 32 | 33 | Or the latest development version from GitHub: 34 | 35 | ``` r 36 | devtools::install_github("markolalovic/latent2likert") 37 | ``` 38 | 39 | ## Dependencies 40 | 41 | To keep the package lightweight, **latent2likert** only imports 42 | [mvtnorm](https://cran.r-project.org/package=mvtnorm), along with the 43 | standard R packages stats and graphics, which are typically included in 44 | R releases. An additional suggested dependency is the package 45 | [sn](https://cran.r-project.org/package=sn), which is required only for 46 | generating random responses from correlated Likert items based on a 47 | multivariate skew normal distribution. The package prompts the user to 48 | install this dependency during interactive sessions if needed. 49 | 50 | ## Features 51 | 52 | - `rlikert`: Generates random responses to Likert scale questions based 53 | on specified means and standard deviations of latent variables, with 54 | optional settings for skewness and correlations. 55 | - `estimate_params`: Estimates latent parameters from existing survey 56 | data. 57 | 58 | ## Structure 59 | 60 |
61 | 62 | Overview of inputs and outputs 63 |
64 | 65 | Overview of inputs and outputs. 66 |
67 | 68 |
69 | 70 | ## Using `rlikert` 71 | 72 | You can use the `rlikert` function to simulate Likert item responses. 73 | 74 | To generate a sample of random responses to one item on a 5-point Likert 75 | scale, use: 76 | 77 | ``` r 78 | library(latent2likert) 79 | rlikert(size = 10, n_items = 1, n_levels = 5) 80 | #> [1] 1 3 3 3 2 4 1 3 3 1 81 | ``` 82 | 83 | To generate responses to multiple items with specified parameters: 84 | 85 | ``` r 86 | rlikert(size = 10, 87 | n_items = 3, 88 | n_levels = c(4, 5, 6), 89 | mean = c(0, -1, 0), 90 | sd = c(0.8, 1, 1), 91 | corr = 0.5) 92 | #> Y1 Y2 Y3 93 | #> [1,] 2 1 3 94 | #> [2,] 2 2 2 95 | #> [3,] 4 3 2 96 | #> [4,] 3 3 4 97 | #> [5,] 4 5 6 98 | #> [6,] 2 1 4 99 | #> [7,] 1 2 3 100 | #> [8,] 3 1 6 101 | #> [9,] 3 3 4 102 | #> [10,] 3 2 6 103 | ``` 104 | 105 | You can also provide a correlation matrix: 106 | 107 | ``` r 108 | corr <- matrix(c(1.00, -0.63, -0.39, 109 | -0.63, 1.00, 0.41, 110 | -0.39, 0.41, 1.00), nrow=3) 111 | data <- rlikert(size = 10^3, 112 | n_items = 3, 113 | n_levels = c(4, 5, 6), 114 | mean = c(0, -1, 0), 115 | sd = c(0.8, 1, 1), 116 | corr = corr) 117 | ``` 118 | 119 | Note that the correlations among the Likert response variables are only 120 | estimates of the actual correlations between the latent variables, and 121 | these estimates are typically lower: 122 | 123 | ``` r 124 | cor(data) 125 | #> Y1 Y2 Y3 126 | #> Y1 1.0000000 -0.5774145 -0.3838274 127 | #> Y2 -0.5774145 1.0000000 0.3856997 128 | #> Y3 -0.3838274 0.3856997 1.0000000 129 | ``` 130 | 131 | ## Using `estimate_params` 132 | 133 | Given the data, you can estimate the values of latent parameters using: 134 | 135 | ``` r 136 | estimate_params(data, n_levels = c(4, 5, 6), skew = 0) 137 | #> items 138 | #> estimates Y1 Y2 Y3 139 | #> mean -0.0526979746 -0.9696916596 -0.0009229545 140 | #> sd 0.8163184862 1.0533629380 1.0381389630 141 | ``` 142 | 143 | ## Transformation 144 | 145 | To visualize the transformation, you can use `plot_likert_transform()`. 146 | It plots the densities of latent variables in the first row and 147 | transformed discrete probability distributions below: 148 | 149 | ``` r 150 | plot_likert_transform(n_items = 3, 151 | n_levels = 5, 152 | mean = c(0, -1, 0), 153 | sd = c(0.8, 1, 1), 154 | skew = c(0, 0, 0.5)) 155 | ``` 156 | 157 |
158 | 159 | Transformation of latent variables to Likert response variables 160 |
161 | 162 | Transformation of latent variables to Likert response 163 | variables. 164 |
165 | 166 |
167 | 168 |
169 | 170 | Note that, depending on the value of the skewness parameter, the normal 171 | latent distribution is used if skew = 0, otherwise the skew normal 172 | distribution is used. The value of skewness is restricted to the range 173 | -0.95 to 0.95, that is 174 | 175 | > `skew >= -0.95` and `skew <= 0.95`. 176 | 177 | ## Further Reading 178 | 179 | - For more detailed information and practical examples, please refer to 180 | the package 181 | [vignette](https://lalovic.io/latent2likert/articles/using_latent2likert.html). 182 | - The implemented algorithms are described in the functions 183 | [reference](https://lalovic.io/latent2likert/reference/index.html). 184 | 185 | ## Related R Packages 186 | 187 | Alternatively, you can simulate Likert item responses using the 188 | `draw_likert` function from the 189 | [fabricatr](https://CRAN.R-project.org/package=fabricatr) package. This 190 | function recodes a latent variable into a Likert response variable by 191 | specifying intervals that subdivide the continuous range. The 192 | **latent2likert** package, however, offers an advantage by automatically 193 | calculating optimal intervals that minimize distortion between the 194 | latent variable and the Likert response variable for both normal and 195 | skew normal latent distributions, eliminating the need to manually 196 | specify the intervals. 197 | 198 | There are also several alternative approaches that do not rely on latent 199 | distributions. One method involves directly defining a discrete 200 | probability distribution and sampling from it using the `sample` 201 | function in R or the `likert` function from the 202 | [wakefield](https://CRAN.R-project.org/package=wakefield) package. 203 | Another approach is to specify the means, standard deviations, and 204 | correlations among Likert response variables. For this, you can use 205 | [LikertMakeR](https://CRAN.R-project.org/package=LikertMakeR) or 206 | [SimCorMultRes](https://CRAN.R-project.org/package=SimCorMultRes) to 207 | generate correlated multinomial responses. 208 | 209 | Additionally, you can define a data generating process. For those 210 | familiar with item response theory, the 211 | [mirt](https://CRAN.R-project.org/package=mirt) package allows users to 212 | specify discrimination and difficulty parameters for each response 213 | category. 214 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://markolalovic.github.io/latent2likert/ 2 | template: 3 | bootstrap: 5 4 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", 3 | "@type": "SoftwareSourceCode", 4 | "identifier": "latent2likert", 5 | "description": "Effectively simulates the discretization process inherent to Likert scales while minimizing distortion. It converts continuous latent variables into ordinal categories to generate Likert scale item responses. Particularly useful for accurately modeling and analyzing survey data that use Likert scales, especially when applying statistical techniques that require metric data.", 6 | "name": "latent2likert: Converting Latent Variables Into Likert Scale Responses", 7 | "codeRepository": "https://github.com/markolalovic/latent2likert", 8 | "issueTracker": "https://github.com/markolalovic/latent2likert/issues/", 9 | "license": "https://spdx.org/licenses/MIT", 10 | "version": "1.2.1", 11 | "programmingLanguage": { 12 | "@type": "ComputerLanguage", 13 | "name": "R", 14 | "url": "https://r-project.org" 15 | }, 16 | "runtimePlatform": "R version 4.4.0 (2024-04-24)", 17 | "author": [ 18 | { 19 | "@type": "Person", 20 | "givenName": "Marko", 21 | "familyName": "Lalovic", 22 | "email": "marko@lalovic.io" 23 | } 24 | ], 25 | "maintainer": [ 26 | { 27 | "@type": "Person", 28 | "givenName": "Marko", 29 | "familyName": "Lalovic", 30 | "email": "marko@lalovic.io" 31 | } 32 | ], 33 | "softwareSuggestions": [ 34 | { 35 | "@type": "SoftwareApplication", 36 | "identifier": "sn", 37 | "name": "sn", 38 | "provider": { 39 | "@id": "https://cran.r-project.org", 40 | "@type": "Organization", 41 | "name": "Comprehensive R Archive Network (CRAN)", 42 | "url": "https://cran.r-project.org" 43 | }, 44 | "sameAs": "https://CRAN.R-project.org/package=sn" 45 | }, 46 | { 47 | "@type": "SoftwareApplication", 48 | "identifier": "cowplot", 49 | "name": "cowplot", 50 | "provider": { 51 | "@id": "https://cran.r-project.org", 52 | "@type": "Organization", 53 | "name": "Comprehensive R Archive Network (CRAN)", 54 | "url": "https://cran.r-project.org" 55 | }, 56 | "sameAs": "https://CRAN.R-project.org/package=cowplot" 57 | }, 58 | { 59 | "@type": "SoftwareApplication", 60 | "identifier": "GGally", 61 | "name": "GGally", 62 | "provider": { 63 | "@id": "https://cran.r-project.org", 64 | "@type": "Organization", 65 | "name": "Comprehensive R Archive Network (CRAN)", 66 | "url": "https://cran.r-project.org" 67 | }, 68 | "sameAs": "https://CRAN.R-project.org/package=GGally" 69 | }, 70 | { 71 | "@type": "SoftwareApplication", 72 | "identifier": "ggh4x", 73 | "name": "ggh4x", 74 | "provider": { 75 | "@id": "https://cran.r-project.org", 76 | "@type": "Organization", 77 | "name": "Comprehensive R Archive Network (CRAN)", 78 | "url": "https://cran.r-project.org" 79 | }, 80 | "sameAs": "https://CRAN.R-project.org/package=ggh4x" 81 | }, 82 | { 83 | "@type": "SoftwareApplication", 84 | "identifier": "gridExtra", 85 | "name": "gridExtra", 86 | "provider": { 87 | "@id": "https://cran.r-project.org", 88 | "@type": "Organization", 89 | "name": "Comprehensive R Archive Network (CRAN)", 90 | "url": "https://cran.r-project.org" 91 | }, 92 | "sameAs": "https://CRAN.R-project.org/package=gridExtra" 93 | }, 94 | { 95 | "@type": "SoftwareApplication", 96 | "identifier": "knitr", 97 | "name": "knitr", 98 | "provider": { 99 | "@id": "https://cran.r-project.org", 100 | "@type": "Organization", 101 | "name": "Comprehensive R Archive Network (CRAN)", 102 | "url": "https://cran.r-project.org" 103 | }, 104 | "sameAs": "https://CRAN.R-project.org/package=knitr" 105 | }, 106 | { 107 | "@type": "SoftwareApplication", 108 | "identifier": "psych", 109 | "name": "psych", 110 | "provider": { 111 | "@id": "https://cran.r-project.org", 112 | "@type": "Organization", 113 | "name": "Comprehensive R Archive Network (CRAN)", 114 | "url": "https://cran.r-project.org" 115 | }, 116 | "sameAs": "https://CRAN.R-project.org/package=psych" 117 | }, 118 | { 119 | "@type": "SoftwareApplication", 120 | "identifier": "RColorBrewer", 121 | "name": "RColorBrewer", 122 | "provider": { 123 | "@id": "https://cran.r-project.org", 124 | "@type": "Organization", 125 | "name": "Comprehensive R Archive Network (CRAN)", 126 | "url": "https://cran.r-project.org" 127 | }, 128 | "sameAs": "https://CRAN.R-project.org/package=RColorBrewer" 129 | }, 130 | { 131 | "@type": "SoftwareApplication", 132 | "identifier": "rmarkdown", 133 | "name": "rmarkdown", 134 | "provider": { 135 | "@id": "https://cran.r-project.org", 136 | "@type": "Organization", 137 | "name": "Comprehensive R Archive Network (CRAN)", 138 | "url": "https://cran.r-project.org" 139 | }, 140 | "sameAs": "https://CRAN.R-project.org/package=rmarkdown" 141 | }, 142 | { 143 | "@type": "SoftwareApplication", 144 | "identifier": "testthat", 145 | "name": "testthat", 146 | "version": ">= 3.0.0", 147 | "provider": { 148 | "@id": "https://cran.r-project.org", 149 | "@type": "Organization", 150 | "name": "Comprehensive R Archive Network (CRAN)", 151 | "url": "https://cran.r-project.org" 152 | }, 153 | "sameAs": "https://CRAN.R-project.org/package=testthat" 154 | } 155 | ], 156 | "softwareRequirements": { 157 | "1": { 158 | "@type": "SoftwareApplication", 159 | "identifier": "R", 160 | "name": "R", 161 | "version": ">= 3.5" 162 | }, 163 | "2": { 164 | "@type": "SoftwareApplication", 165 | "identifier": "graphics", 166 | "name": "graphics" 167 | }, 168 | "3": { 169 | "@type": "SoftwareApplication", 170 | "identifier": "mvtnorm", 171 | "name": "mvtnorm", 172 | "provider": { 173 | "@id": "https://cran.r-project.org", 174 | "@type": "Organization", 175 | "name": "Comprehensive R Archive Network (CRAN)", 176 | "url": "https://cran.r-project.org" 177 | }, 178 | "sameAs": "https://CRAN.R-project.org/package=mvtnorm" 179 | }, 180 | "4": { 181 | "@type": "SoftwareApplication", 182 | "identifier": "stats", 183 | "name": "stats" 184 | }, 185 | "SystemRequirements": null 186 | }, 187 | "fileSize": "2130.159KB", 188 | "relatedLink": "https://lalovic.io/latent2likert/", 189 | "releaseNotes": "https://github.com/markolalovic/latent2likert/blob/master/NEWS.md", 190 | "readme": "https://github.com/markolalovic/latent2likert/blob/main/README.md", 191 | "contIntegration": ["https://github.com/markolalovic/latent2likert/actions/workflows/R-CMD-check.yaml", "https://app.codecov.io/gh/markolalovic/latent2likert?branch=main"], 192 | "keywords": ["simulation", "likert", "survey-data", "likert-data"] 193 | } 194 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 1 note 4 | 5 | * This is a new release. 6 | -------------------------------------------------------------------------------- /data/part_bfi.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/data/part_bfi.rda -------------------------------------------------------------------------------- /dev/config_attachment.yaml: -------------------------------------------------------------------------------- 1 | path.n: NAMESPACE 2 | path.d: DESCRIPTION 3 | dir.r: R 4 | dir.v: vignettes 5 | dir.t: tests 6 | extra.suggests: ~ 7 | pkg_ignore: ~ 8 | document: yes 9 | normalize: yes 10 | inside_rmd: no 11 | must.exist: yes 12 | check_if_suggests_is_installed: yes 13 | -------------------------------------------------------------------------------- /dev/dev.R: -------------------------------------------------------------------------------- 1 | # Development 2 | devtools::load_all("/Users/markolalovic/dev/latent2likert/") 3 | 4 | covr::codecov( 5 | quiet = FALSE, 6 | clean = FALSE, 7 | token = Sys.getenv("CODECOV_TOKEN") 8 | ) 9 | -------------------------------------------------------------------------------- /dev/doc.R: -------------------------------------------------------------------------------- 1 | # Documentation 2 | devtools::document() 3 | devtools::build_manual() 4 | -------------------------------------------------------------------------------- /dev/prepare_data.R: -------------------------------------------------------------------------------- 1 | # Prepare survey data 2 | library(psych) 3 | 4 | # Take only the first 5 items and gender 5 | vars <- c("A1", "A2", "A3", "A4", "A5") 6 | part_bfi <- bfi[, c(vars, "gender")] 7 | 8 | # Zero-one coding male = 0, female = 1 9 | part_bfi$gender <- as.integer(part_bfi$gender - 1) 10 | 11 | # Impute the missing values with modes 12 | for (var in vars) { 13 | mode_value <- names(sort(-table(part_bfi[, var])))[1] 14 | part_bfi[, var][is.na(part_bfi[, var])] <- as.integer(mode_value) 15 | 16 | stopifnot( 17 | class(part_bfi[, var]) == "integer", 18 | sum(is.na(part_bfi[, var])) == 0 19 | ) 20 | } 21 | str(part_bfi) 22 | 23 | # Save the data set 24 | usethis::use_data(part_bfi) 25 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/.nojekyll -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • latent2likert 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 25 | 26 | 27 | Skip to contents 28 | 29 | 30 |
73 |
74 |
78 | 79 | Content not found. Please use links in the navbar. 80 | 81 |
82 |
83 | 84 | 85 | 94 |
95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /docs/LICENSE-text.html: -------------------------------------------------------------------------------- 1 | 2 | License • latent2likert 6 | Skip to contents 7 | 8 | 9 |
47 |
48 |
52 | 53 |
YEAR: 2024
54 | COPYRIGHT HOLDER: Marko Lalovic
55 | 
56 | 57 |
58 | 59 | 60 |
69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /docs/LICENSE.html: -------------------------------------------------------------------------------- 1 | 2 | MIT License • latent2likert 6 | Skip to contents 7 | 8 | 9 |
47 |
48 |
52 | 53 |
54 | 55 |

Copyright (c) 2024 Marko Lalovic

56 |

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

57 |

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

58 |

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

59 |
60 | 61 |
62 | 63 | 64 |
73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /docs/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | Articles • latent2likert 6 | Skip to contents 7 | 8 | 9 |
47 |
48 |
51 | 52 |
53 |

All vignettes

54 |
55 | 56 |
Using latent2likert
57 |
58 |
59 |
60 | 61 | 62 |
71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | Authors and Citation • latent2likert 6 | Skip to contents 7 | 8 | 9 |
47 |
48 |
51 | 52 |
53 |

Authors

54 | 55 |
  • 56 |

    Marko Lalovic. Author, maintainer. 57 |

    58 |
  • 59 |
60 | 61 |
62 |

Citation

63 |

Source: DESCRIPTION

64 | 65 |

Lalovic M (2024). 66 | latent2likert: Converting Latent Variables into Likert Scale Responses. 67 | R package version 1.2.2, https://latent2likert.lalovic.io/. 68 |

69 |
@Manual{,
70 |   title = {latent2likert: Converting Latent Variables into Likert Scale Responses},
71 |   author = {Marko Lalovic},
72 |   year = {2024},
73 |   note = {R package version 1.2.2},
74 |   url = {https://latent2likert.lalovic.io/},
75 | }
76 |
77 |
79 | 80 | 81 |
90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /docs/deps/data-deps.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/favicon-16x16.png -------------------------------------------------------------------------------- /docs/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/favicon-32x32.png -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/favicon.ico -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/logo.png -------------------------------------------------------------------------------- /docs/news/index.html: -------------------------------------------------------------------------------- 1 | 2 | Changelog • latent2likert 6 | Skip to contents 7 | 8 | 9 |
47 |
48 |
52 | 53 |
54 |

Version 0.1.0

55 |
  • Release date: 2020-10-17
  • 56 |
  • Initial release (development version).
  • 57 |
  • Tested on platforms: x86_64-pc-linux-gnu (64-bit) and x86_64-w64-mingw32 (64-bit).
  • 58 |
59 |
60 |

Version 1.0.0

61 |
  • Release date: 2024-03-28
  • 62 |
  • Added the option to generate correlated Likert scale items in the rlikert() function.
  • 63 |
  • Added the estimate_params() function for estimating parameters from existing survey data.
  • 64 |
  • Resolved dependency issues.
  • 65 |
  • Reduced dependency to only standard R packages: mvtnorm, stats, and graphics.
  • 66 |
  • The sn package is now required only for generating correlated responses using a skew normal distribution.
  • 67 |
  • Added user prompts to install the sn package when needed.
  • 68 |
69 |
70 |

Version 1.1.0

71 |
  • Release date: 2024-06-06
  • 72 |
  • Minor updates to functions and vignettes.
  • 73 |
74 |
75 |

Version 1.1.1

76 |
  • Release date: 2024-06-06
  • 77 |
  • Renamed the package from responsesR to latent2likert to better reflect its purpose of converting latent variables into Likert scale responses.
  • 78 |
  • Improved the package website and documentation.
  • 79 |
  • 80 | Note: The codebase is under development, and finer details may change.
  • 81 |
82 |
83 |

Version 1.2.1

CRAN release: 2024-06-24

84 |
  • Release date: 2024-06-12
  • 85 |
  • Refactored code for enhanced modularity and maintainability.
  • 86 |
  • Modularized core functions for improved readability.
  • 87 |
  • Improved the structure and organization of the codebase.
  • 88 |
  • Enhanced error handling for various correlation input scenarios.
  • 89 |
90 |
91 |

Version 1.2.2

92 |
  • Release date: 2024-07-26
  • 93 |
  • Exported previously internal function estimate_mean_and_sd based on user feedback, now available for direct use.
  • 94 |
  • Moved the package sn to Suggests.
  • 95 |
  • Fixed vignette building errors.
  • 96 |
97 |
99 | 100 | 101 |
110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('nav.navbar').headroom(); 6 | 7 | Toc.init({ 8 | $nav: $("#toc"), 9 | $scope: $("main h2, main h3, main h4, main h5, main h6") 10 | }); 11 | 12 | if ($('#toc').length) { 13 | $('body').scrollspy({ 14 | target: '#toc', 15 | offset: $("nav.navbar").outerHeight() + 1 16 | }); 17 | } 18 | 19 | // Activate popovers 20 | $('[data-bs-toggle="popover"]').popover({ 21 | container: 'body', 22 | html: true, 23 | trigger: 'focus', 24 | placement: "top", 25 | sanitize: false, 26 | }); 27 | 28 | $('[data-bs-toggle="tooltip"]').tooltip(); 29 | 30 | /* Clipboard --------------------------*/ 31 | 32 | function changeTooltipMessage(element, msg) { 33 | var tooltipOriginalTitle=element.getAttribute('data-bs-original-title'); 34 | element.setAttribute('data-bs-original-title', msg); 35 | $(element).tooltip('show'); 36 | element.setAttribute('data-bs-original-title', tooltipOriginalTitle); 37 | } 38 | 39 | if(ClipboardJS.isSupported()) { 40 | $(document).ready(function() { 41 | var copyButton = ""; 42 | 43 | $("div.sourceCode").addClass("hasCopyButton"); 44 | 45 | // Insert copy buttons: 46 | $(copyButton).prependTo(".hasCopyButton"); 47 | 48 | // Initialize tooltips: 49 | $('.btn-copy-ex').tooltip({container: 'body'}); 50 | 51 | // Initialize clipboard: 52 | var clipboard = new ClipboardJS('[data-clipboard-copy]', { 53 | text: function(trigger) { 54 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 55 | } 56 | }); 57 | 58 | clipboard.on('success', function(e) { 59 | changeTooltipMessage(e.trigger, 'Copied!'); 60 | e.clearSelection(); 61 | }); 62 | 63 | clipboard.on('error', function(e) { 64 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 65 | }); 66 | 67 | }); 68 | } 69 | 70 | /* Search marking --------------------------*/ 71 | var url = new URL(window.location.href); 72 | var toMark = url.searchParams.get("q"); 73 | var mark = new Mark("main#main"); 74 | if (toMark) { 75 | mark.mark(toMark, { 76 | accuracy: { 77 | value: "complementary", 78 | limiters: [",", ".", ":", "/"], 79 | } 80 | }); 81 | } 82 | 83 | /* Search --------------------------*/ 84 | /* Adapted from https://github.com/rstudio/bookdown/blob/2d692ba4b61f1e466c92e78fd712b0ab08c11d31/inst/resources/bs4_book/bs4_book.js#L25 */ 85 | // Initialise search index on focus 86 | var fuse; 87 | $("#search-input").focus(async function(e) { 88 | if (fuse) { 89 | return; 90 | } 91 | 92 | $(e.target).addClass("loading"); 93 | var response = await fetch($("#search-input").data("search-index")); 94 | var data = await response.json(); 95 | 96 | var options = { 97 | keys: ["what", "text", "code"], 98 | ignoreLocation: true, 99 | threshold: 0.1, 100 | includeMatches: true, 101 | includeScore: true, 102 | }; 103 | fuse = new Fuse(data, options); 104 | 105 | $(e.target).removeClass("loading"); 106 | }); 107 | 108 | // Use algolia autocomplete 109 | var options = { 110 | autoselect: true, 111 | debug: true, 112 | hint: false, 113 | minLength: 2, 114 | }; 115 | var q; 116 | async function searchFuse(query, callback) { 117 | await fuse; 118 | 119 | var items; 120 | if (!fuse) { 121 | items = []; 122 | } else { 123 | q = query; 124 | var results = fuse.search(query, { limit: 20 }); 125 | items = results 126 | .filter((x) => x.score <= 0.75) 127 | .map((x) => x.item); 128 | if (items.length === 0) { 129 | items = [{dir:"Sorry 😿",previous_headings:"",title:"No results found.",what:"No results found.",path:window.location.href}]; 130 | } 131 | } 132 | callback(items); 133 | } 134 | $("#search-input").autocomplete(options, [ 135 | { 136 | name: "content", 137 | source: searchFuse, 138 | templates: { 139 | suggestion: (s) => { 140 | if (s.title == s.what) { 141 | return `${s.dir} >
${s.title}
`; 142 | } else if (s.previous_headings == "") { 143 | return `${s.dir} >
${s.title}
> ${s.what}`; 144 | } else { 145 | return `${s.dir} >
${s.title}
> ${s.previous_headings} > ${s.what}`; 146 | } 147 | }, 148 | }, 149 | }, 150 | ]).on('autocomplete:selected', function(event, s) { 151 | window.location.href = s.path + "?q=" + q + "#" + s.id; 152 | }); 153 | }); 154 | })(window.jQuery || window.$) 155 | 156 | 157 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 3.2.1 2 | pkgdown: 2.0.9 3 | pkgdown_sha: ~ 4 | articles: 5 | using_latent2likert: using_latent2likert.html 6 | last_built: 2024-10-09T21:14Z 7 | urls: 8 | reference: https://markolalovic.github.io/latent2likert/reference 9 | article: https://markolalovic.github.io/latent2likert/articles 10 | 11 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/Rplot002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/Rplot002.png -------------------------------------------------------------------------------- /docs/reference/Rplot003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/Rplot003.png -------------------------------------------------------------------------------- /docs/reference/Rplot004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/Rplot004.png -------------------------------------------------------------------------------- /docs/reference/estimate_mean_and_sd.html: -------------------------------------------------------------------------------- 1 | 2 | Estimate mean and standard deviation — estimate_mean_and_sd • latent2likert 8 | Skip to contents 9 | 10 | 11 |
49 |
50 |
55 | 56 |
57 |

Estimates the mean and standard deviation of a latent variable given the 58 | discrete probabilities of its observed Likert scale responses.

59 |
60 | 61 |
62 |

Usage

63 |
estimate_mean_and_sd(prob, n_levels, skew = 0, eps = 1e-06, maxit = 100)
64 |
65 | 66 |
67 |

Arguments

68 |
prob
69 |

named vector of probabilities for each response category.

70 | 71 | 72 |
n_levels
73 |

number of response categories for the Likert scale item.

74 | 75 | 76 |
skew
77 |

marginal skewness of the latent variable, defaults to 0.

78 | 79 | 80 |
eps
81 |

tolerance for convergence, defaults to 1e-6.

82 | 83 | 84 |
maxit
85 |

maximum number of iterations, defaults to 100.

86 | 87 |
88 |
89 |

Value

90 | 91 | 92 |

A numeric vector with two elements: the estimated mean and 93 | standard deviation.

94 |
95 |
96 |

Details

97 |

This function uses an iterative algorithm to solve the system of non-linear 98 | equations that describe the relationship between the continuous latent 99 | variable and the observed discrete probability distribution of Likert scale 100 | responses. The algorithm ensures stability by reparameterizing the system 101 | and applying constraints to prevent stepping into invalid regions.

102 |
103 | 104 |
105 |

Examples

106 |
prob <- c("1" = 0.313, "2" = 0.579, "3" = 0.105, "4" = 0.003)
107 | # returns estimates that are close to the actual mean and sd: c(-1, 0.5)
108 | estimate_mean_and_sd(prob, 5)
109 | #> [1] -1.0025695  0.5004155
110 | 
111 | 
112 |
113 |
115 | 116 | 117 |
126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /docs/reference/figures/courses-grouped-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/figures/courses-grouped-min.png -------------------------------------------------------------------------------- /docs/reference/figures/courses-stacked-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/figures/courses-stacked-min.png -------------------------------------------------------------------------------- /docs/reference/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/figures/logo.png -------------------------------------------------------------------------------- /docs/reference/figures/overview-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/figures/overview-min.png -------------------------------------------------------------------------------- /docs/reference/figures/scales-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/figures/scales-min.png -------------------------------------------------------------------------------- /docs/reference/figures/transformation-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/figures/transformation-min.png -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | Function reference • latent2likert 6 | Skip to contents 7 | 8 | 9 |
47 |
48 |
51 | 52 |
53 |

All functions

54 | 55 | 56 | 57 | 58 |
59 | 60 | 61 | 62 | 63 |
64 | 65 | discretize_density() 66 |
67 |
Discretize Density
68 |
69 | 70 | estimate_mean_and_sd() 71 |
72 |
Estimate mean and standard deviation
73 |
74 | 75 | estimate_params() 76 |
77 |
Estimate Latent Parameters
78 |
79 | 80 | part_bfi 81 |
82 |
Agreeableness and Gender Data
83 |
84 | 85 | plot_likert_transform() 86 |
87 |
Plot Transformation
88 |
89 | 90 | response_prop() 91 |
92 |
Calculate Response Proportions
93 |
94 | 95 | rlikert() 96 |
97 |
Generate Random Responses
98 |
99 | 100 | simulate_likert() 101 |
102 |
Simulate Likert Scale Item Responses
103 |
104 |
105 | 106 | 107 |
116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /docs/reference/plot_likert_transform-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/plot_likert_transform-1.png -------------------------------------------------------------------------------- /docs/reference/plot_likert_transform-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/plot_likert_transform-2.png -------------------------------------------------------------------------------- /docs/reference/plot_likert_transform-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/plot_likert_transform-3.png -------------------------------------------------------------------------------- /docs/reference/plot_likert_transform-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/reference/plot_likert_transform-4.png -------------------------------------------------------------------------------- /docs/reference/plot_likert_transform.html: -------------------------------------------------------------------------------- 1 | 2 | Plot Transformation — plot_likert_transform • latent2likert 8 | Skip to contents 9 | 10 | 11 |
49 |
50 |
55 | 56 |
57 |

Plots the densities of latent variables and the corresponding 58 | transformed discrete probability distributions.

59 |
60 | 61 |
62 |

Usage

63 |
plot_likert_transform(n_items, n_levels, mean = 0, sd = 1, skew = 0)
64 |
65 | 66 |
67 |

Arguments

68 |
n_items
69 |

number of Likert scale items (questions).

70 | 71 | 72 |
n_levels
73 |

number of response categories for each Likert item. 74 | Integer or vector of integers.

75 | 76 | 77 |
mean
78 |

means of the latent variables. 79 | Numeric or vector of numerics. Defaults to 0.

80 | 81 | 82 |
sd
83 |

standard deviations of the latent variables. 84 | Numeric or vector of numerics. Defaults to 1.

85 | 86 | 87 |
skew
88 |

marginal skewness of the latent variables. 89 | Numeric or vector of numerics. Defaults to 0.

90 | 91 |
92 |
93 |

Value

94 | 95 | 96 |

NULL. The function produces a plot.

97 |
98 | 99 |
100 |

Examples

101 |
plot_likert_transform(n_items = 3, n_levels = c(3, 4, 5))
102 | 
103 | plot_likert_transform(n_items = 3, n_levels = 5, mean = c(0, 1, 2))
104 | 
105 | plot_likert_transform(n_items = 3, n_levels = 5, sd = c(0.8, 1, 1.2))
106 | 
107 | plot_likert_transform(n_items = 3, n_levels = 5, skew = c(-0.5, 0, 0.5))
108 | 
109 | 
110 |
111 |
113 | 114 | 115 |
124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /docs/reference/response_prop.html: -------------------------------------------------------------------------------- 1 | 2 | Calculate Response Proportions — response_prop • latent2likert 6 | Skip to contents 7 | 8 | 9 |
47 |
48 |
53 | 54 |
55 |

Returns a table of proportions for each possible response category.

56 |
57 | 58 |
59 |

Usage

60 |
response_prop(data, n_levels)
61 |
62 | 63 |
64 |

Arguments

65 |
data
66 |

numeric vector or matrix of responses.

67 | 68 | 69 |
n_levels
70 |

number of response categories.

71 | 72 |
73 |
74 |

Value

75 | 76 | 77 |

A table of response category proportions.

78 |
79 | 80 |
81 |

Examples

82 |
data <- c(1, 2, 2, 3, 3, 3)
 83 | response_prop(data, n_levels = 3)
 84 | #>         1         2         3 
 85 | #> 0.1666667 0.3333333 0.5000000 
 86 | 
 87 | data_matrix <- matrix(c(1, 2, 2, 3, 3, 3), ncol = 2)
 88 | response_prop(data_matrix, n_levels = 3)
 89 | #>       Response
 90 | #> Item           1         2 3
 91 | #>   [1,] 0.3333333 0.6666667 0
 92 | #>   [2,] 0.0000000 0.0000000 1
 93 | 
94 |
95 |
97 | 98 | 99 |
108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | https://markolalovic.github.io/latent2likert/404.html 5 | 6 | 7 | https://markolalovic.github.io/latent2likert/LICENSE-text.html 8 | 9 | 10 | https://markolalovic.github.io/latent2likert/LICENSE.html 11 | 12 | 13 | https://markolalovic.github.io/latent2likert/articles/index.html 14 | 15 | 16 | https://markolalovic.github.io/latent2likert/articles/using_latent2likert.html 17 | 18 | 19 | https://markolalovic.github.io/latent2likert/authors.html 20 | 21 | 22 | https://markolalovic.github.io/latent2likert/index.html 23 | 24 | 25 | https://markolalovic.github.io/latent2likert/news/index.html 26 | 27 | 28 | https://markolalovic.github.io/latent2likert/reference/discretize_density.html 29 | 30 | 31 | https://markolalovic.github.io/latent2likert/reference/estimate_mean_and_sd.html 32 | 33 | 34 | https://markolalovic.github.io/latent2likert/reference/estimate_params.html 35 | 36 | 37 | https://markolalovic.github.io/latent2likert/reference/index.html 38 | 39 | 40 | https://markolalovic.github.io/latent2likert/reference/part_bfi.html 41 | 42 | 43 | https://markolalovic.github.io/latent2likert/reference/plot_likert_transform.html 44 | 45 | 46 | https://markolalovic.github.io/latent2likert/reference/response_prop.html 47 | 48 | 49 | https://markolalovic.github.io/latent2likert/reference/rlikert.html 50 | 51 | 52 | https://markolalovic.github.io/latent2likert/reference/simulate_likert.html 53 | 54 | 55 | -------------------------------------------------------------------------------- /man/discretize_density.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/discretization.R 3 | \name{discretize_density} 4 | \alias{discretize_density} 5 | \title{Discretize Density} 6 | \usage{ 7 | discretize_density(density_fn, n_levels, eps = 1e-06) 8 | } 9 | \arguments{ 10 | \item{density_fn}{probability density function.} 11 | 12 | \item{n_levels}{cardinality of the set of all possible outcomes.} 13 | 14 | \item{eps}{convergence threshold for the algorithm.} 15 | } 16 | \value{ 17 | A list containing: 18 | \describe{ 19 | \item{prob}{discrete probability distribution.} 20 | \item{endp}{endpoints of intervals that partition the continuous domain.} 21 | \item{repr}{representation points of the intervals.} 22 | \item{dist}{distortion measured as the mean-squared error (MSE).} 23 | } 24 | } 25 | \description{ 26 | Transforms the density function of a continuous random variable into a 27 | discrete probability distribution with minimal distortion 28 | using the Lloyd-Max algorithm. 29 | } 30 | \details{ 31 | The function addresses the problem of transforming a continuous random 32 | variable \eqn{X} into a discrete random variable \eqn{Y} with minimal 33 | distortion. Distortion is measured as mean-squared error (MSE): 34 | \deqn{ 35 | \text{E}\left[ (X - Y)^2 \right] = 36 | \sum_{k=1}^{K} \int_{x_{k-1}}^{x_{k}} f_{X}(x) 37 | \left( x - r_{k} \right)^2 \, dx 38 | } 39 | where: 40 | \describe{ 41 | \item{\eqn{f_{X}}}{ is the probability density function of \eqn{X},} 42 | \item{\eqn{K}}{ is the number of possible outcomes of \eqn{Y},} 43 | \item{\eqn{x_{k}}}{ are endpoints of intervals that partition the domain 44 | of \eqn{X},} 45 | \item{\eqn{r_{k}}}{ are representation points of the intervals.} 46 | } 47 | This problem is solved using the following iterative procedure: 48 | \describe{ 49 | \item{\eqn{1.}}{Start with an arbitrary initial set of representation 50 | points: \eqn{r_{1} < r_{2} < \dots < r_{K}}.} 51 | \item{\eqn{2.}}{Repeat the following steps until the improvement in MSE 52 | falls below given \eqn{\varepsilon}.} 53 | \item{\eqn{3.}}{Calculate endpoints as \eqn{x_{k} = (r_{k+1} + r_{k})/2} 54 | for each \eqn{k = 1, \dots, K-1} and set \eqn{x_{0}} and \eqn{x_{K}} to 55 | \eqn{-\infty} and \eqn{\infty}, respectively.} 56 | \item{\eqn{4.}}{Update representation points by setting \eqn{r_{k}} 57 | equal to the conditional mean of \eqn{X} given \eqn{X \in (x_{k-1}, x_{k})} 58 | for each \eqn{k = 1, \dots, K}.} 59 | } 60 | 61 | With each execution of step \eqn{(3)} and step \eqn{(4)}, the MSE decreases 62 | or remains the same. As MSE is nonnegative, it approaches a limit. 63 | The algorithm terminates when the improvement in MSE is less than a given 64 | \eqn{\varepsilon > 0}, ensuring convergence after a finite number 65 | of iterations. 66 | 67 | This procedure is known as Lloyd-Max's algorithm, initially used for scalar 68 | quantization and closely related to the k-means algorithm. Local convergence 69 | has been proven for log-concave density functions by Kieffer. Many common 70 | probability distributions are log-concave including the normal and skew 71 | normal distribution, as shown by Azzalini. 72 | } 73 | \examples{ 74 | discretize_density(density_fn = stats::dnorm, n_levels = 5) 75 | discretize_density(density_fn = function(x) { 76 | 2 * stats::dnorm(x) * stats::pnorm(0.5 * x) 77 | }, n_levels = 4) 78 | } 79 | \references{ 80 | Azzalini, A. (1985). 81 | A class of distributions which includes the normal ones. 82 | \emph{Scandinavian Journal of Statistics} \bold{12(2)}, 171–178. 83 | 84 | Kieffer, J. (1983). 85 | Uniqueness of locally optimal quantizer for log-concave density and convex 86 | error function. 87 | \emph{IEEE Transactions on Information Theory} \bold{29}, 42–47. 88 | 89 | Lloyd, S. (1982). 90 | Least squares quantization in PCM. 91 | \emph{IEEE Transactions on Information Theory} \bold{28 (2)}, 129–137. 92 | } 93 | -------------------------------------------------------------------------------- /man/estimate_mean_and_sd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimation.R 3 | \name{estimate_mean_and_sd} 4 | \alias{estimate_mean_and_sd} 5 | \title{Estimate mean and standard deviation} 6 | \usage{ 7 | estimate_mean_and_sd(prob, n_levels, skew = 0, eps = 1e-06, maxit = 100) 8 | } 9 | \arguments{ 10 | \item{prob}{named vector of probabilities for each response category.} 11 | 12 | \item{n_levels}{number of response categories for the Likert scale item.} 13 | 14 | \item{skew}{marginal skewness of the latent variable, defaults to 0.} 15 | 16 | \item{eps}{tolerance for convergence, defaults to 1e-6.} 17 | 18 | \item{maxit}{maximum number of iterations, defaults to 100.} 19 | } 20 | \value{ 21 | A numeric vector with two elements: the estimated mean and 22 | standard deviation. 23 | } 24 | \description{ 25 | Estimates the mean and standard deviation of a latent variable given the 26 | discrete probabilities of its observed Likert scale responses. 27 | } 28 | \details{ 29 | This function uses an iterative algorithm to solve the system of non-linear 30 | equations that describe the relationship between the continuous latent 31 | variable and the observed discrete probability distribution of Likert scale 32 | responses. The algorithm ensures stability by reparameterizing the system 33 | and applying constraints to prevent stepping into invalid regions. 34 | } 35 | \examples{ 36 | prob <- c("1" = 0.313, "2" = 0.579, "3" = 0.105, "4" = 0.003) 37 | # returns estimates that are close to the actual mean and sd: c(-1, 0.5) 38 | estimate_mean_and_sd(prob, 5) 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/estimate_params.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimation.R 3 | \name{estimate_params} 4 | \alias{estimate_params} 5 | \title{Estimate Latent Parameters} 6 | \usage{ 7 | estimate_params(data, n_levels, skew = 0) 8 | } 9 | \arguments{ 10 | \item{data}{survey data with columns representing individual items. 11 | Apart from this, \code{data} can be of almost any class such as 12 | "data.frame" "matrix" or "array".} 13 | 14 | \item{n_levels}{number of response categories, a vector or a number.} 15 | 16 | \item{skew}{marginal skewness of latent variables, defaults to 0.} 17 | } 18 | \value{ 19 | A table of estimated parameters for each latent variable. 20 | } 21 | \description{ 22 | Estimates the location and scaling parameters of the latent variables from 23 | existing survey data. 24 | } 25 | \details{ 26 | The relationship between the continuous random variable \eqn{X} and the 27 | discrete probability distribution \eqn{p_k}, for \eqn{k = 1, \dots, K}, 28 | can be described by a system of non-linear equations: 29 | \deqn{ 30 | p_{k} = F_{X}\left( \frac{x_{k - 1} - \xi}{\omega} \right) 31 | - F_{X}\left( \frac{x_{k} - \xi}{\omega} \right) 32 | \quad \text{for} \ k = 1, \dots, K 33 | } 34 | where: 35 | \describe{ 36 | \item{\eqn{F_{X}}}{ is the cumulative distribution function of \eqn{X},} 37 | \item{\eqn{K}}{ is the number of possible response categories,} 38 | \item{\eqn{x_{k}}}{ are the endpoints defining the boundaries of the 39 | response categories,} 40 | \item{\eqn{p_{k}}}{ is the probability of the \eqn{k}-th 41 | response category,} 42 | \item{\eqn{\xi}}{ is the location parameter of \eqn{X},} 43 | \item{\eqn{\omega}}{ is the scaling parameter of \eqn{X}.} 44 | } 45 | The endpoints \eqn{x_{k}} are calculated by discretizing a 46 | random variable \eqn{Z} 47 | with mean 0 and standard deviation 1 that follows the same 48 | distribution as \eqn{X}. 49 | By solving the above system of non-linear equations iteratively, 50 | we can find the parameters that best fit the observed discrete 51 | probability distribution \eqn{p_{k}}. 52 | 53 | The function \code{estimate_params}: 54 | \itemize{ 55 | \item Computes the proportion table of the responses for each item. 56 | \item Estimates the probabilities \eqn{p_{k}} for each item. 57 | \item Computes the estimates of \eqn{\xi} and \eqn{\omega} for each item. 58 | \item Combines the estimated parameters for all items into a table. 59 | } 60 | } 61 | \examples{ 62 | data(part_bfi) 63 | vars <- c("A1", "A2", "A3", "A4", "A5") 64 | estimate_params(data = part_bfi[, vars], n_levels = 6) 65 | } 66 | \seealso{ 67 | \code{\link{discretize_density}} for details on calculating 68 | the endpoints, and \code{\link{part_bfi}} for example of the survey data. 69 | } 70 | -------------------------------------------------------------------------------- /man/figures/courses-grouped-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/man/figures/courses-grouped-min.png -------------------------------------------------------------------------------- /man/figures/courses-stacked-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/man/figures/courses-stacked-min.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/man/figures/logo.png -------------------------------------------------------------------------------- /man/figures/overview-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/man/figures/overview-min.png -------------------------------------------------------------------------------- /man/figures/scales-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/man/figures/scales-min.png -------------------------------------------------------------------------------- /man/figures/transformation-min.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/man/figures/transformation-min.png -------------------------------------------------------------------------------- /man/part_bfi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/part_bfi.R 3 | \docType{data} 4 | \name{part_bfi} 5 | \alias{part_bfi} 6 | \title{Agreeableness and Gender Data} 7 | \format{ 8 | {An object of class \code{"data.frame"} with 2800 observations on 9 | the following 6 variables:} 10 | \describe{ 11 | \item{A1}{Am indifferent to the feelings of others.} 12 | \item{A2}{Inquire about others' well-being.} 13 | \item{A3}{Know how to comfort others.} 14 | \item{A4}{Love children.} 15 | \item{A5}{Make people feel at ease.} 16 | \item{gender}{Gender of the respondent.} 17 | } 18 | } 19 | \source{ 20 | {International Personality Item Pool ({https://ipip.ori.org})} 21 | 22 | {https://search.r-project.org/CRAN/refmans/psychTools/html/bfi.html} 23 | } 24 | \usage{ 25 | data(part_bfi) 26 | } 27 | \description{ 28 | This dataset is a cleaned up version of a small part of \code{bfi} dataset 29 | from \code{psychTools} package. It contains responses to the first 5 items 30 | of the agreeableness scale from the International Personality Item Pool 31 | (IPIP) and the gender attribute. It includes responses from 2800 subjects. 32 | Each item was answered on a six point Likert scale ranging from 33 | 1 (very inaccurate), to 6 (very accurate). Gender was coded as 34 | 0 for male and 1 for female. Missing values were addressed using 35 | mode imputation. 36 | } 37 | \examples{ 38 | data(part_bfi) 39 | head(part_bfi) 40 | } 41 | \references{ 42 | Revelle, W. (2024). 43 | Psych: Procedures for Psychological, Psychometric, and Personality Research. 44 | \emph{Evanston, Illinois: Northwestern University.} 45 | \url{https://CRAN.R-project.org/package=psych} 46 | } 47 | \keyword{datasets} 48 | -------------------------------------------------------------------------------- /man/plot_likert_transform.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utilities.R 3 | \name{plot_likert_transform} 4 | \alias{plot_likert_transform} 5 | \title{Plot Transformation} 6 | \usage{ 7 | plot_likert_transform(n_items, n_levels, mean = 0, sd = 1, skew = 0) 8 | } 9 | \arguments{ 10 | \item{n_items}{number of Likert scale items (questions).} 11 | 12 | \item{n_levels}{number of response categories for each Likert item. 13 | Integer or vector of integers.} 14 | 15 | \item{mean}{means of the latent variables. 16 | Numeric or vector of numerics. Defaults to 0.} 17 | 18 | \item{sd}{standard deviations of the latent variables. 19 | Numeric or vector of numerics. Defaults to 1.} 20 | 21 | \item{skew}{marginal skewness of the latent variables. 22 | Numeric or vector of numerics. Defaults to 0.} 23 | } 24 | \value{ 25 | NULL. The function produces a plot. 26 | } 27 | \description{ 28 | Plots the densities of latent variables and the corresponding 29 | transformed discrete probability distributions. 30 | } 31 | \examples{ 32 | plot_likert_transform(n_items = 3, n_levels = c(3, 4, 5)) 33 | plot_likert_transform(n_items = 3, n_levels = 5, mean = c(0, 1, 2)) 34 | plot_likert_transform(n_items = 3, n_levels = 5, sd = c(0.8, 1, 1.2)) 35 | plot_likert_transform(n_items = 3, n_levels = 5, skew = c(-0.5, 0, 0.5)) 36 | } 37 | -------------------------------------------------------------------------------- /man/rLikert.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulation.R 3 | \name{rlikert} 4 | \alias{rlikert} 5 | \title{Generate Random Responses} 6 | \usage{ 7 | rlikert(size, n_items, n_levels, mean = 0, sd = 1, skew = 0, corr = 0) 8 | } 9 | \arguments{ 10 | \item{size}{number of observations.} 11 | 12 | \item{n_items}{number of Likert scale items (number of questions).} 13 | 14 | \item{n_levels}{number of response categories for each item. 15 | Integer or vector of integers.} 16 | 17 | \item{mean}{means of the latent variables. 18 | Numeric or vector of numerics. Defaults to 0.} 19 | 20 | \item{sd}{standard deviations of the latent variables. 21 | Numeric or vector of numerics. Defaults to 1.} 22 | 23 | \item{skew}{marginal skewness of the latent variables. 24 | Numeric or vector of numerics. Defaults to 0.} 25 | 26 | \item{corr}{correlations between latent variables. 27 | Can be a single numeric value representing the same correlation for 28 | all pairs, or an actual correlation matrix. Defaults to 0.} 29 | } 30 | \value{ 31 | A matrix of random responses with dimensions \code{size} by 32 | \code{n_items}. The column names are \code{Y1, Y2, ..., Yn} where 33 | \code{n} is the number of items. Each entry in the matrix represents 34 | a Likert scale response, ranging from 1 to \code{n_levels}. 35 | } 36 | \description{ 37 | Generates an array of random responses to Likert-type questions based on 38 | specified latent variables. 39 | } 40 | \examples{ 41 | # Generate responses for a single item with 5 levels 42 | rlikert(size = 10, n_items = 1, n_levels = 5) 43 | 44 | # Generate responses for three items with different levels and parameters 45 | rlikert( 46 | size = 10, n_items = 3, n_levels = c(4, 5, 6), 47 | mean = c(0, -1, 0), sd = c(0.8, 1, 1), corr = 0.5 48 | ) 49 | 50 | # Generate responses with a correlation matrix 51 | corr <- matrix(c( 52 | 1.00, -0.63, -0.39, 53 | -0.63, 1.00, 0.41, 54 | -0.39, 0.41, 1.00 55 | ), nrow = 3) 56 | data <- rlikert( 57 | size = 1000, n_items = 3, n_levels = c(4, 5, 6), 58 | mean = c(0, -1, 0), sd = c(0.8, 1, 1), corr = corr 59 | ) 60 | 61 | } 62 | -------------------------------------------------------------------------------- /man/response_prop.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utilities.R 3 | \name{response_prop} 4 | \alias{response_prop} 5 | \title{Calculate Response Proportions} 6 | \usage{ 7 | response_prop(data, n_levels) 8 | } 9 | \arguments{ 10 | \item{data}{numeric vector or matrix of responses.} 11 | 12 | \item{n_levels}{number of response categories.} 13 | } 14 | \value{ 15 | A table of response category proportions. 16 | } 17 | \description{ 18 | Returns a table of proportions for each possible response category. 19 | } 20 | \examples{ 21 | data <- c(1, 2, 2, 3, 3, 3) 22 | response_prop(data, n_levels = 3) 23 | 24 | data_matrix <- matrix(c(1, 2, 2, 3, 3, 3), ncol = 2) 25 | response_prop(data_matrix, n_levels = 3) 26 | } 27 | -------------------------------------------------------------------------------- /man/simulate_likert.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulation.R 3 | \name{simulate_likert} 4 | \alias{simulate_likert} 5 | \title{Simulate Likert Scale Item Responses} 6 | \usage{ 7 | simulate_likert(n_levels, cp) 8 | } 9 | \arguments{ 10 | \item{n_levels}{number of response categories for the Likert scale item.} 11 | 12 | \item{cp}{centered parameters of the latent variable. 13 | Named vector including mean (\code{mu}), standard deviation (\code{sd}), 14 | and skewness (\code{skew}). Skewness must be between -0.95 and 0.95.} 15 | } 16 | \value{ 17 | A named vector of probabilities for each response category. 18 | } 19 | \description{ 20 | Simulates Likert scale item responses based on a specified number 21 | of response categories and the centered parameters of the latent variable. 22 | } 23 | \details{ 24 | The simulation process uses the following model detailed by 25 | Boari and Nai-Ruscone. Let \eqn{X} be the continuous variable of interest, 26 | measured using Likert scale questions with \eqn{K} response categories. The 27 | observed discrete variable \eqn{Y} is defined as follows: 28 | \deqn{ 29 | Y = k, \quad \text{ if } \ \ x_{k - 1} < X \leq x_{k} 30 | \quad \text{ for } \ \ k = 1, \dots, K 31 | } 32 | where \eqn{x_{k}}, \eqn{k = 0, \dots, K} are endpoints defined in the domain 33 | of \eqn{X} such that: 34 | \deqn{ 35 | -\infty = x_{0} < x_{1} < \dots < x_{K - 1} < x_{K} = \infty. 36 | } 37 | The endpoints dictate the transformation of the density 38 | \eqn{f_{X}} of \eqn{X} into a discrete probability distribution: 39 | \deqn{ 40 | \text{Pr}(Y = k) = \int_{x_{k - 1}}^{x_{k}} f_{X}(x) \, dx 41 | \quad \text{ for } \ \ k = 1, \dots, K. 42 | } 43 | 44 | The continuous latent variable is modeled using a skew normal distribution. 45 | The function \code{simulate_likert} performs the following steps: 46 | \itemize{ 47 | \item Ensures the centered parameters are within the acceptable range. 48 | \item Converts the centered parameters to direct parameters. 49 | \item Defines the density function for the skew normal distribution. 50 | \item Computes the probabilities for each response category 51 | using optimal endpoints. 52 | } 53 | } 54 | \examples{ 55 | cp <- c(mu = 0, sd = 1, skew = 0.5) 56 | simulate_likert(n_levels = 5, cp = cp) 57 | cp2 <- c(mu = 1, sd = 2, skew = -0.3) 58 | simulate_likert(n_levels = 7, cp = cp2) 59 | } 60 | \references{ 61 | Boari, G. and Nai Ruscone, M. (2015). 62 | A procedure simulating Likert scale item responses. 63 | \emph{Electronic Journal of Applied Statistical Analysis} \bold{8(3)}, 64 | 288–297. 65 | \doi{10.1285/i20705948v8n3p288} 66 | } 67 | \seealso{ 68 | \code{\link{discretize_density}} for details on how to calculate 69 | the optimal endpoints. 70 | } 71 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(latent2likert) 3 | 4 | test_check("latent2likert") -------------------------------------------------------------------------------- /tests/testthat/test_discretization.R: -------------------------------------------------------------------------------- 1 | context("Testing discretization") 2 | 3 | test_that("discretization of N(0,1) using 4 levels gives expected results", { 4 | res <- discretize_density(density_fn = dnorm, n_levels = 4) 5 | expected <- list( 6 | prob = c(0.16, 0.34, 0.34, 0.16), 7 | endp = c(-Inf, -0.98, 0, 0.98, Inf), 8 | repr = c(-1.51, -0.45, 0.45, 1.51), 9 | dist = 0.12 10 | ) 11 | expect_equal(res, expected, tolerance = 0.05) 12 | }) 13 | 14 | test_that("discretization of N(0,1) using 5 levels gives expected results", { 15 | res <- discretize_density(density_fn = dnorm, n_levels = 5) 16 | expected <- list( 17 | prob = c(0.11, 0.24, 0.30, .24, 0.11), 18 | endp = c(-Inf, -1.25, -0.38, 0.38, 1.25, Inf), 19 | repr = c(-1.73, -0.77, 0, 0.77, 1.73), 20 | dist = 0.08 21 | ) 22 | expect_equal(res, expected, tolerance = 0.05) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test_estimation.R: -------------------------------------------------------------------------------- 1 | context("Testing estimation of parameters") 2 | 3 | test_that("`mean` and `sd` are well estimated given `prob` 4 | for normal case", { 5 | actual <- c(-1, 0.5) # actual mean and sd 6 | prob <- c("1" = 0.313, "2" = 0.579, "3" = 0.105, "4" = 0.003) 7 | estimates <- estimate_mean_and_sd(prob, 5) 8 | expect_equal(estimates, actual, tolerance = 0.1) 9 | }) 10 | 11 | test_that("`mean` and `sd` are well estimated given `prob` 12 | for skew case", { 13 | actual <- c(0.146858, 1.084341) # actual mean and sd 14 | skew <- -0.4565873 # skewness 15 | prob <- c( 16 | "1" = 0.036, "2" = 0.089, "3" = 0.142, 17 | "4" = 0.185, "5" = 0.21, "6" = 0.201, "7" = 0.137 18 | ) 19 | estimates <- estimate_mean_and_sd(prob, 7, skew) 20 | expect_equal(estimates, actual, tolerance = 0.05) 21 | }) 22 | 23 | test_that("estimate_params returns accurate estimates for 24 | latent parameters when data is a vector", { 25 | set.seed(12345) # for reproducibility 26 | 27 | # Generate test data for a single item (vector data) 28 | data_vector <- rlikert( 29 | size = 1000, 30 | n_items = 1, 31 | n_levels = 5, 32 | mean = 0.5, 33 | sd = 1.2, 34 | skew = 0 35 | ) 36 | 37 | # Estimate parameters 38 | estimates <- estimate_params(data_vector, n_levels = 5, skew = 0) 39 | 40 | # Extract estimated means and standard deviations 41 | estimated_mean <- as.numeric(estimates["mean"]) 42 | estimated_sd <- as.numeric(estimates["sd"]) 43 | 44 | # Actual means and standard deviations 45 | actual_mean <- 0.5 46 | actual_sd <- 1.2 47 | 48 | # Check that estimates are close to actual mean and sd 49 | expect_equal(estimated_mean, actual_mean, tolerance = 0.1) 50 | expect_equal(estimated_sd, actual_sd, tolerance = 0.1) 51 | }) 52 | 53 | test_that("estimate_params returns accurate estimates for 54 | latent parameters", { 55 | set.seed(12345) # for reproducibility 56 | 57 | # Generate test data 58 | corr <- matrix(c( 59 | 1.00, -0.63, -0.39, 60 | -0.63, 1.00, 0.41, 61 | -0.39, 0.41, 1.00 62 | ), nrow = 3) 63 | data <- rlikert( 64 | size = 1000, 65 | n_items = 3, 66 | n_levels = c(4, 5, 6), 67 | mean = c(0, -1, 0), 68 | sd = c(0.8, 1, 1), 69 | corr = corr 70 | ) 71 | 72 | # Estimate parameters 73 | estimates <- estimate_params(data, n_levels = c(4, 5, 6), skew = 0) 74 | 75 | # Extract estimated means and standard deviations 76 | estimated_means <- as.numeric(estimates["mean", ]) 77 | estimated_sds <- as.numeric(estimates["sd", ]) 78 | 79 | # Actual means and standard deviations 80 | actual_means <- c(0, -1, 0) 81 | actual_sds <- c(0.8, 1, 1) 82 | 83 | # Check if estimated means are close to actual means and sds 84 | expect_equal(estimated_means, actual_means, tolerance = 0.1) 85 | expect_equal(estimated_sds, actual_sds, tolerance = 0.1) 86 | }) 87 | 88 | test_that("plot_contour executes without errors", { 89 | # Define a simple objective function 90 | test_fn <- function(x, endp, prob, cdf_X) { 91 | u <- x[1] 92 | v <- x[2] 93 | y <- cdf_X(v * endp - u * v) 94 | return(matrix(utils::tail(y, -1) - utils::head(y, -1) - prob)) 95 | } 96 | 97 | # Example parameters 98 | endp <- c(-Inf, -1, 0, 1, Inf) 99 | prob <- c(0.1, 0.2, 0.4, 0.3) 100 | cdf_X <- stats::pnorm 101 | trace <- matrix(c(rep(0, 100), rep(1, 100)), nrow = 2) 102 | 103 | # Check if plotting function executes without errors 104 | expect_silent(plot_contour(test_fn, endp, prob, cdf_X, trace)) 105 | }) 106 | -------------------------------------------------------------------------------- /tests/testthat/test_simulation.R: -------------------------------------------------------------------------------- 1 | context("Testing simulation") 2 | 3 | test_that("simulate_likert using 4 levels and N(0,1) gives 4 | expected result", { 5 | n_levels <- 4 6 | cp <- c("mu" = 0, "sd" = 1, "skew" = 0) 7 | prob <- simulate_likert(n_levels, cp) 8 | prob <- as.numeric(prob) 9 | expected_prob <- c(0.163, 0.337, 0.337, 0.163) 10 | expect_equal(prob, expected_prob, tolerance = 0.05) 11 | }) 12 | 13 | test_that("simulate_likert using 5 levels and N(0,1) gives 14 | expected result", { 15 | n_levels <- 5 16 | cp <- c("mu" = 0, "sd" = 1, "skew" = 0) 17 | prob <- simulate_likert(n_levels, cp) 18 | prob <- as.numeric(prob) 19 | expected_prob <- c(0.106, 0.244, 0.298, 0.244, 0.106) 20 | expect_equal(prob, expected_prob, tolerance = 0.05) 21 | }) 22 | 23 | test_that("Correlation input is handled correctly", { 24 | corr_matrix <- generate_rand_corr_matrix(3) 25 | corr_inputs <- list(0, "random", 0.5, corr_matrix) 26 | 27 | for (i in seq_along(corr_inputs)) { 28 | corr <- corr_inputs[[i]] 29 | corr_case <- handle_corr_case(corr) 30 | expect_equal(i, corr_case) 31 | } 32 | }) 33 | 34 | test_that("Invalid corr input raises error", { 35 | res <- try(handle_corr_case("Invalid"), silent = TRUE) 36 | expect_equal(class(res), "try-error") 37 | }) 38 | 39 | test_that("generate_corr_matrix returns a matrix that 40 | resembles a correlation matrix", { 41 | n_items <- 3 42 | corr_matrix <- generate_rand_corr_matrix(n_items) 43 | corr_inputs <- list("random", 0.5, corr_matrix) 44 | 45 | for (i in seq_along(corr_inputs)) { 46 | corr <- corr_inputs[[i]] 47 | corr_case <- handle_corr_case(corr) 48 | res <- generate_corr_matrix(corr, corr_case, n_items) 49 | 50 | expect_true(all(diag(res) == 1), 51 | info = paste("Failed on input:", i) 52 | ) 53 | expect_true(isSymmetric(res), 54 | info = paste("Failed on input:", i) 55 | ) 56 | } 57 | }) 58 | 59 | test_that("generated response proportions closely approximate the 60 | probabilities for a single item", { 61 | set.seed(12345) # for reproducibility 62 | size <- 1000 63 | n_items <- 1 64 | n_levels <- 5 65 | mean <- 0 66 | sd <- 1 67 | skew <- 0 68 | 69 | data <- rlikert(size, n_items, n_levels, mean, sd, skew) 70 | 71 | cp <- c("mu" = mean, "sd" = sd, "skew" = skew) 72 | prob <- simulate_likert(n_levels, cp) 73 | data_prop <- response_prop(data, n_levels) 74 | expect_equal(prob, data_prop, tolerance = 0.05) 75 | }) 76 | 77 | test_that("generated response proportions closely approximate the 78 | probabilities for multiple items without corr", { 79 | set.seed(12345) # for reproducibility 80 | size <- 1000 81 | n_levels <- 5 82 | n_items <- 3 83 | 84 | mean <- c(-1, 0, 1) 85 | sd <- c(0.5, 1, 0.5) 86 | skew <- c(0.5, 0.5, 0.5) 87 | corr <- 0 88 | 89 | data <- rlikert(size, n_items, n_levels, mean, sd, skew, corr) 90 | for (i in seq_len(n_items)) { 91 | cp <- c("mu" = mean[i], "sd" = sd[i], "skew" = skew[i]) 92 | prob <- simulate_likert(n_levels, cp) 93 | data_prop <- response_prop(data[, i], n_levels) 94 | expect_equal(prob, data_prop, tolerance = 0.05) 95 | } 96 | }) 97 | 98 | test_that("generated response proportions closely approximate the 99 | probabilities for multiple items with corr", { 100 | set.seed(12345) # for reproducibility 101 | size <- 1000 102 | n_levels <- 5 103 | n_items <- 3 104 | 105 | mean <- c(-1, 0, 1) 106 | sd <- c(1, 1, 0.5) 107 | skew <- c(0.5, 0.5, 0.5) 108 | corr <- 0.5 109 | 110 | data <- rlikert(size, n_items, n_levels, mean, sd, skew, corr) 111 | for (i in seq_len(n_items)) { 112 | cp <- c("mu" = mean[i], "sd" = sd[i], "skew" = skew[i]) 113 | prob <- simulate_likert(n_levels, cp) 114 | data_prop <- response_prop(data[, i], n_levels) 115 | expect_equal(prob, data_prop, tolerance = 0.05) 116 | } 117 | }) 118 | 119 | test_that("generated response proportions closely approximate the 120 | probabilities, when using a random corr matrix", { 121 | set.seed(12345) # for reproducibility 122 | size <- 1000 123 | n_levels <- 5 124 | n_items <- 3 125 | 126 | mean <- c(0, -1, -1) 127 | sd <- c(1, 1, 0.5) 128 | skew <- c(0, 0, 0) 129 | corr <- "random" 130 | 131 | data <- rlikert(size, n_items, n_levels, mean, sd, skew, corr) 132 | for (i in seq_len(n_items)) { 133 | cp <- c("mu" = mean[i], "sd" = sd[i], "skew" = skew[i]) 134 | prob <- simulate_likert(n_levels, cp) 135 | data_prop <- response_prop(data[, i], n_levels) 136 | expect_equal(prob, data_prop, tolerance = 0.05) 137 | } 138 | }) 139 | 140 | test_that("generated response proportions closely approximate the 141 | correlations between latent variables", { 142 | set.seed(12345) # for reproducibility 143 | size <- 1000 144 | n_levels <- 5 145 | n_items <- 3 146 | 147 | mean <- c(0, 0, 0) 148 | sd <- c(1, 1, 1) 149 | skew <- c(0, 0, 0) 150 | corr <- 0.5 151 | 152 | data <- rlikert(size, n_items, n_levels, mean, sd, skew, corr) 153 | data_corr_matrix <- cor(data) 154 | 155 | actual_corr_matrix <- generate_corr_matrix(corr, 3, n_items) 156 | dimnames(actual_corr_matrix) <- dimnames(data_corr_matrix) 157 | 158 | expect_equal(actual_corr_matrix, 159 | data_corr_matrix, 160 | tolerance = 0.1 161 | ) 162 | }) 163 | 164 | test_that("generated response proportions closely approximate the 165 | correlations between latent variables, harder case", { 166 | set.seed(12345) # for reproducibility 167 | size <- 1000 168 | n_levels <- 6 169 | n_items <- 3 170 | 171 | mean <- c(-0.5, 0, 0.5) 172 | sd <- c(0.5, 0.5, 0.5) 173 | skew <- c(-0.3, -0.4, -0.5) 174 | corr <- 0.7 175 | 176 | data <- rlikert(size, n_items, n_levels, mean, sd, skew, corr) 177 | data_corr_matrix <- cor(data) 178 | 179 | actual_corr_matrix <- generate_corr_matrix(corr, 3, n_items) 180 | dimnames(actual_corr_matrix) <- dimnames(data_corr_matrix) 181 | 182 | expect_equal(actual_corr_matrix, 183 | data_corr_matrix, 184 | tolerance = 0.2 185 | ) 186 | }) 187 | -------------------------------------------------------------------------------- /tests/testthat/test_utilities.R: -------------------------------------------------------------------------------- 1 | context("Testing helper functions") 2 | 3 | testthat::test_that("pad_levels gives the correct result", { 4 | pr <- c("2" = 0.25, "3" = 0.25, "4" = 0.50) 5 | n_levels <- 5 6 | actual_pr <- c("1" = 0, "2" = 0.25, "3" = 0.25, "4" = 0.50, "5" = 0) 7 | padded_pr <- pad_levels(pr, n_levels) 8 | testthat::expect_equal(padded_pr, actual_pr) 9 | }) 10 | 11 | testthat::test_that("response_prop gives the correct result, 12 | univariate case", { 13 | data <- rep(c(1, 2, 3, 4), each = 2) 14 | tab <- response_prop(data, n_levels = 4) 15 | correct_tab <- rep(0.25, 4) 16 | names(correct_tab) <- 1:4 17 | testthat::expect_true(identical(tab, correct_tab)) 18 | }) 19 | 20 | testthat::test_that("response_prop gives the correct result, 21 | multivariate case", { 22 | y <- rep(c(1, 2, 3, 4), each = 2) 23 | data <- cbind(y, y) 24 | tab <- response_prop(data, n_levels = 4) 25 | 26 | correct_tab <- rbind(rep(0.25, 4), rep(0.25, 4)) 27 | dimnames(correct_tab) <- dimnames(tab) 28 | 29 | testthat::expect_true(identical(tab, correct_tab)) 30 | }) 31 | 32 | testthat::test_that("pad_levels gives the correct result", { 33 | pk <- rep(0.25, 4) 34 | names(pk) <- 1:4 35 | padded_pk <- pad_levels(pk, 5) 36 | 37 | correct_pk <- c(rep(0.25, 4), 0) 38 | names(correct_pk) <- 1:5 39 | testthat::expect_true(identical(padded_pk, correct_pk)) 40 | }) 41 | 42 | testthat::test_that("percentify gives the correct result", { 43 | xbreaks <- seq(from = 0, to = 1, length.out = 6) 44 | xlabs <- vapply(xbreaks, percentify, character(1)) 45 | correct_xlabs <- c("0%", "20%", "40%", "60%", "80%", "100%") 46 | testthat::expect_true(identical(xlabs, correct_xlabs)) 47 | }) 48 | 49 | # parameter conversion test 50 | testthat::test_that("convert_params gives the same results as sn::cp2dp", { 51 | cp <- generate_random_cp() 52 | dp1 <- sn::cp2dp(cp, family = "SN") 53 | dp2 <- convert_params(cp) 54 | testthat::expect_true(all.equal(dp1, dp2)) 55 | }) 56 | 57 | # density expression test 58 | testthat::test_that("density_sn gives the same results as sn::dsn", { 59 | cp <- generate_random_cp() 60 | dp <- convert_params(cp) 61 | 62 | x <- seq(-5, 5, length = 100) 63 | y1 <- density_sn(x, dp[["xi"]], dp[["omega"]], dp[["alpha"]]) 64 | y2 <- sn::dsn(x, dp = dp) 65 | 66 | testthat::expect_equal(y1, y2) 67 | }) 68 | 69 | 70 | testthat::test_that("plot_likert_transform runs without errors", { 71 | testthat::expect_error( 72 | plot_likert_transform(n_items = 3, n_levels = c(3, 4, 5)), 73 | NA 74 | ) 75 | testthat::expect_error( 76 | plot_likert_transform(n_items = 3, n_levels = 5, mean = c(0, 1, 2)), 77 | NA 78 | ) 79 | testthat::expect_error( 80 | plot_likert_transform(n_items = 3, n_levels = 5, sd = c(0.8, 1, 1.2)), 81 | NA 82 | ) 83 | }) 84 | 85 | testthat::test_that("delta_skew_normal returns correct value", { 86 | alpha <- 1 87 | result <- delta_skew_normal(alpha) 88 | expected_result <- 0.71 89 | testthat::expect_equal(result, expected_result, tolerance = 0.05) 90 | }) 91 | 92 | testthat::test_that("mean_skew_normal returns correct value", { 93 | alpha <- 1 94 | result <- mean_skew_normal(alpha) 95 | expected_result <- 0.56 96 | testthat::expect_equal(result, expected_result, tolerance = 0.05) 97 | }) 98 | 99 | testthat::test_that("var_skew_normal returns correct value", { 100 | alpha <- 1 101 | result <- var_skew_normal(alpha) 102 | expected_result <- 0.68 103 | testthat::expect_equal(result, expected_result, tolerance = 0.05) 104 | }) 105 | 106 | testthat::test_that("scale_and_shift returns correct values", { 107 | dp <- c(xi = 1, omega = 2, alpha = 1) 108 | x <- c(-1, 0, 1) 109 | result <- scale_and_shift(x, dp) 110 | expected_result <- c(-0.7179052, -0.2179052, 0.2820948) 111 | testthat::expect_equal(result, expected_result, tolerance = 0.05) 112 | }) 113 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R -------------------------------------------------------------------------------- /vignettes/references.bib: -------------------------------------------------------------------------------- 1 | @misc{mcnamara2024teaching, 2 | title={Teaching modeling in introductory statistics: A comparison of formula and tidyverse syntaxes}, 3 | author={Amelia McNamara}, 4 | year={2024}, 5 | eprint={2201.12960}, 6 | archivePrefix={arXiv}, 7 | primaryClass={stat.OT} 8 | } 9 | 10 | @misc{psych, 11 | title = {psych: Procedures for Psychological, Psychometric, and Personality Research}, 12 | author = {William Revelle}, 13 | organization = {Northwestern University}, 14 | address = {Evanston, Illinois}, 15 | year = {2024}, 16 | note = {R package version 2.4.3}, 17 | url = {https://CRAN.R-project.org/package=psych}, 18 | } 19 | -------------------------------------------------------------------------------- /vignettes/using_latent2likert.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using latent2likert" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Using latent2likert} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | bibliography: references.bib 9 | link-citations: true 10 | --- 11 | 12 | ```{r, include = FALSE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | ## Introduction 20 | In social sciences, variables of interest are often conceptualized as latent variables — hidden continuous variables measured through Likert scale questions, typically categorized as Strongly disagree, Disagree, Neutral, Agree, and Strongly agree. Researchers frequently aim to uncover these latent variables using various statistical techniques. Accurate modeling of survey data is essential for comparative analysis through simulation. The **latent2likert** package addresses this need by providing an effective algorithm to simulate Likert response variables from hypothetical latent variables. This vignette provides two practical workflow examples demonstrating the use of the **latent2likert** package. 21 | 22 | 23 | ## Simulating Survey Data 24 | The following hypothetical survey simulation is loosely based on an actual comparative study on teaching and learning R in a pair of introductory statistics labs [@mcnamara2024teaching]. 25 | 26 | Imagine a situation where 10 participants from Course A and 20 participants from Course B have completed the survey. Suppose the initial question was: 27 | 28 | > "How would you rate your experience with the course?" 29 | 30 | with four possible answers: 31 | 32 | > Poor, Fair, Good, and Excellent. 33 | 34 | Let's assume that the participants in Course A were neutral regarding the question, while participants in Course B had a more positive experience on average. 35 | 36 | By choosing appropriate parameters for the latent distributions and setting the number of categories n_levels = 4, we can generate hypothetical responses (standard deviation sd = 1 and skewness skew = 0, by default): 37 | 38 | ```{r} 39 | library(latent2likert) # Load the package 40 | set.seed(12345) # Ensure reproducible results 41 | 42 | # Generate responses for Course A and Course B 43 | responses_A <- rlikert(size = 10, n_items = 1, n_levels = 4, mean = 0, sd = 1) 44 | responses_B <- rlikert(size = 20, n_items = 1, n_levels = 4, mean = 1, sd = 1) 45 | ``` 46 | 47 | To summarize the results, create a data frame from all responses: 48 | 49 | ```{r} 50 | n_levels <- 4 51 | n_groups <- 2 52 | categories <- c("Poor", "Fair", "Good", "Excellent") 53 | 54 | # Create a data frame to summarize the responses 55 | response_data <- data.frame( 56 | Course = rep(c("A", "B"), each = n_levels), 57 | Response = factor(rep(categories, n_groups), levels = categories), 58 | Proportion = c( 59 | response_prop(responses_A, n_levels), 60 | response_prop(responses_B, n_levels) 61 | ) 62 | ) 63 | 64 | # Filter out rows with zero proportions 65 | response_data <- response_data[response_data$Proportion > 0, ] 66 | response_data 67 | ``` 68 | 69 | The results can then be visualized using a grouped bar chart: 70 | 71 |
72 | Grouped bar chart 73 |
74 | 75 | 76 | ## Pre and Post Comparison 77 | Now suppose that the survey also asked the participants to rate their skills on a 5-point Likert scale, ranging from 1 (very poor) to 5 (very good) in: 78 | 79 | * Programming, 80 | * Searching Online, 81 | * Solving Problems. 82 | 83 | The survey was completed by the participants both before and after taking the course for a pre and post-comparison. Suppose that participants' assessments of: 84 | 85 | * Programming skills on average increased, 86 | * Searching Online stayed about the same, 87 | * Solving Problems increased in Course A, but decreased for participants in Course B. 88 | 89 | Let's simulate the survey data for this scenario: 90 | 91 | ```{r} 92 | # Pre- and post-assessments of skills for Course A 93 | pre_A <- rlikert(size = 10, n_items = 3, n_levels = 5, mean = c(-1, 0, 1)) 94 | post_A <- rlikert(size = 10, n_items = 3, n_levels = 5, mean = c(0, 0, 2)) 95 | 96 | # Pre- and post-assessments of skills for Course B 97 | pre_B <- rlikert(size = 20, n_items = 3, n_levels = 5, mean = c(-1, 0, 1)) 98 | post_B <- rlikert(size = 20, n_items = 3, n_levels = 5, mean = c(0, 0, 0)) 99 | ``` 100 | 101 | Create a data frame from all responses to summarize the results: 102 | 103 | ```{r} 104 | # Combine pre and post responses into a list 105 | pre_post <- list(pre_A, post_A, pre_B, post_B) 106 | 107 | # Number of items and response levels 108 | n_items <- 3 109 | n_levels <- 5 110 | 111 | # Define skills assessed 112 | skills <- c("Programming", "Searching online", "Solving problems") 113 | 114 | # Generate repeated skill labels for questions 115 | questions <- rep(rep(skills, each = n_levels), 4) 116 | questions <- factor(questions, levels = skills) 117 | 118 | # Create a data frame to summarize the responses 119 | response_data <- data.frame( 120 | Course = rep(c("Course A", "Course B"), each = 2 * n_items * n_levels), 121 | Question = questions, 122 | Time = as.factor(rep(c( 123 | rep("before", n_items * n_levels), 124 | rep("after", n_items * n_levels) 125 | ), 2)), 126 | Response = rep(seq_len(n_levels), 2 * n_items * 2), 127 | Proportion = as.vector(sapply(pre_post, function(d) { 128 | as.vector(t(response_prop(d, n_levels))) 129 | })) 130 | ) 131 | 132 | head(response_data) 133 | ``` 134 | 135 | And visualize the results with a stacked bar chart: 136 | 137 |
138 | Stacked bar chart 139 |
140 | 141 | 142 | ## Recreating Scale Scores 143 | We will use part of the [bfi](https://search.r-project.org/CRAN/refmans/psych/html/bfi.html) data set from [@psych]. Specifically, we'll focus on the first 5 items corresponding to agreeableness. To investigate the differences in agreeableness between men and women, we'll also use the gender attribute. 144 | 145 | Load the data: 146 | 147 | ```{r} 148 | data(part_bfi) 149 | head(part_bfi) 150 | ``` 151 | 152 | Separate the items into two groups according to their gender: 153 | 154 | ```{r} 155 | vars <- c("A1", "A2", "A3", "A4", "A5") 156 | items_male <- part_bfi[part_bfi$gender == 0, vars] 157 | items_female <- part_bfi[part_bfi$gender == 1, vars] 158 | ``` 159 | 160 | Estimate the parameters of the latent variables, assuming they are normal and providing the number of possible response categories `n_levels = 6`: 161 | 162 | ```{r} 163 | params_male <- estimate_params(data = items_male, n_levels = 6) 164 | params_female <- estimate_params(data = items_female, n_levels = 6) 165 | ``` 166 | 167 | Generate new responses to the items using the estimated parameters and estimated correlations: 168 | 169 | ```{r} 170 | set.seed(12345) # Ensure reproducible results 171 | 172 | new_items_male <- rlikert( 173 | size = nrow(items_male), 174 | n_items = 5, 175 | n_levels = 6, 176 | mean = params_male["mean", ], 177 | sd = params_male["sd", ], 178 | corr = cor(items_male) 179 | ) 180 | 181 | new_items_female <- rlikert( 182 | size = nrow(items_female), 183 | n_items = 5, 184 | n_levels = 6, 185 | mean = params_female["mean", ], 186 | sd = params_female["sd", ], 187 | corr = cor(items_female) 188 | ) 189 | ``` 190 | 191 | Create agreeableness scale scores for both groups of participants by taking the average of these 5 items: 192 | 193 | ```{r} 194 | # Combine new items and gender in new data frame 195 | new_data <- data.frame(rbind(new_items_male, new_items_female)) 196 | new_data$gender <- c(rep(0, nrow(items_male)), rep(1, nrow(items_female))) 197 | head(new_data) 198 | 199 | # Reverse the first item because it has negative correlations 200 | part_bfi$A1 <- (min(part_bfi$A1) + max(part_bfi$A1)) - part_bfi$A1 201 | new_data$Y1 <- (min(new_data$Y1) + max(new_data$Y1)) - new_data$Y1 202 | 203 | # Create agreeableness scale scores 204 | part_bfi$agreeable <- rowMeans(part_bfi[, vars]) 205 | new_data$agreeable <- rowMeans(new_data[, c("Y1", "Y2", "Y3", "Y4", "Y5")]) 206 | ``` 207 | 208 | The results can be visualized with a grouped boxplot: 209 | 210 |
211 | Scales 212 |
213 | 214 | 215 | ## References 216 | 217 | 218 | 219 | 220 | --------------------------------------------------------------------------------