
├── .Rbuildignore
├── .gitattributes
├── .github
├── .gitignore
└── workflows
│ ├── R-CMD-check.yaml
│ ├── codecov.yaml
│ └── rhub.yaml
├── .gitignore
├── CRAN-SUBMISSION
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
├── discretization.R
├── estimation.R
├── part_bfi.R
├── simulation.R
└── utilities.R
├── README.Rmd
├── README.html
├── README.md
├── _pkgdown.yml
├── codemeta.json
├── cran-comments.md
├── data
└── part_bfi.rda
├── dev
├── config_attachment.yaml
├── dev.R
├── doc.R
└── prepare_data.R
├── docs
├── .nojekyll
├── 404.html
├── LICENSE-text.html
├── LICENSE.html
├── apple-touch-icon-120x120.png
├── apple-touch-icon-152x152.png
├── apple-touch-icon-180x180.png
├── apple-touch-icon-60x60.png
├── apple-touch-icon-76x76.png
├── apple-touch-icon.png
├── articles
│ ├── index.html
│ └── using_latent2likert.html
├── authors.html
├── deps
│ ├── bootstrap-5.3.1
│ │ ├── bootstrap.bundle.min.js
│ │ ├── bootstrap.bundle.min.js.map
│ │ └── bootstrap.min.css
│ ├── data-deps.txt
│ └── jquery-3.6.0
│ │ ├── jquery-3.6.0.js
│ │ ├── jquery-3.6.0.min.js
│ │ └── jquery-3.6.0.min.map
├── favicon-16x16.png
├── favicon-32x32.png
├── favicon.ico
├── index.html
├── link.svg
├── logo.png
├── news
│ └── index.html
├── pkgdown.js
├── pkgdown.yml
├── reference
│ ├── Rplot001.png
│ ├── Rplot002.png
│ ├── Rplot003.png
│ ├── Rplot004.png
│ ├── discretize_density.html
│ ├── estimate_mean_and_sd.html
│ ├── estimate_params.html
│ ├── figures
│ │ ├── courses-grouped-min.png
│ │ ├── courses-stacked-min.png
│ │ ├── logo.png
│ │ ├── overview-min.png
│ │ ├── scales-min.png
│ │ └── transformation-min.png
│ ├── index.html
│ ├── part_bfi.html
│ ├── plot_likert_transform-1.png
│ ├── plot_likert_transform-2.png
│ ├── plot_likert_transform-3.png
│ ├── plot_likert_transform-4.png
│ ├── plot_likert_transform.html
│ ├── response_prop.html
│ ├── rlikert.html
│ └── simulate_likert.html
├── search.json
└── sitemap.xml
├── man
├── discretize_density.Rd
├── estimate_mean_and_sd.Rd
├── estimate_params.Rd
├── figures
│ ├── courses-grouped-min.png
│ ├── courses-stacked-min.png
│ ├── logo.png
│ ├── overview-min.png
│ ├── scales-min.png
│ └── transformation-min.png
├── part_bfi.Rd
├── plot_likert_transform.Rd
├── rLikert.Rd
├── response_prop.Rd
└── simulate_likert.Rd
├── pkgdown
└── favicon
│ ├── apple-touch-icon-120x120.png
│ ├── apple-touch-icon-152x152.png
│ ├── apple-touch-icon-180x180.png
│ ├── apple-touch-icon-60x60.png
│ ├── apple-touch-icon-76x76.png
│ ├── apple-touch-icon.png
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ └── favicon.ico
├── tests
├── testthat.R
└── testthat
│ ├── test_discretization.R
│ ├── test_estimation.R
│ ├── test_simulation.R
│ └── test_utilities.R
└── vignettes
├── .gitignore
├── references.bib
└── using_latent2likert.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^.gitattributes.save
4 | ^.gitattributes
5 | ^.gitignore
6 | ^_pkgdown\.yml$
7 | ^docs$
8 | ^pkgdown$
9 | ^responsesR\.Rproj$
10 | ^README.Rmd
11 | ^README.html
12 | ^LICENSE.md
13 | ^NEWS.html
14 | ^\.github$
15 | ^vignettes/articles$
16 | ^man/figures/*\.png$
17 | ^CODE_OF_CONDUCT.md
18 | ^fix_site.py
19 | ^tests/informal$
20 | ^dev$
21 | ^doc$
22 | ^Meta$
23 | ^revdep$
24 | ^cran-comments\.md$
25 | ^codemeta\.json$
26 | ^CRAN-SUBMISSION$
27 | ^articles$
28 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.html linguist-vendored
2 | *.htm linguist-vendored
3 | *.py linguist-vendored
4 | *.tex linguist-vendored
5 | docs/** linguist-vendored
6 | man/** linguist-vendored
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: R-CMD-check
10 |
11 | jobs:
12 | R-CMD-check:
13 | runs-on: ${{ matrix.config.os }}
14 |
15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 |
17 | strategy:
18 | fail-fast: false
19 | matrix:
20 | config:
21 | - {os: macos-latest, r: 'release'}
22 | - {os: windows-latest, r: 'release'}
23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
24 | - {os: ubuntu-latest, r: 'release'}
25 | - {os: ubuntu-latest, r: 'oldrel-1'}
26 |
27 | env:
28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 | R_KEEP_PKG_SOURCE: yes
30 |
31 | steps:
32 | - uses: actions/checkout@v4
33 |
34 | - uses: r-lib/actions/setup-pandoc@v2
35 |
36 | - uses: r-lib/actions/setup-r@v2
37 | with:
38 | r-version: ${{ matrix.config.r }}
39 | http-user-agent: ${{ matrix.config.http-user-agent }}
40 | use-public-rspm: true
41 |
42 | - uses: r-lib/actions/setup-r-dependencies@v2
43 | with:
44 | extra-packages: any::rcmdcheck
45 | needs: check
46 |
47 | - uses: r-lib/actions/check-r-package@v2
48 | with:
49 | upload-snapshots: true
50 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
51 |
--------------------------------------------------------------------------------
/.github/workflows/codecov.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: codecov
10 |
11 | jobs:
12 | codecov:
13 | runs-on: ubuntu-latest
14 | env:
15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |
17 | steps:
18 | - uses: actions/checkout@v4
19 |
20 | - uses: r-lib/actions/setup-r@v2
21 | with:
22 | use-public-rspm: true
23 |
24 | - uses: r-lib/actions/setup-r-dependencies@v2
25 | with:
26 | extra-packages: any::covr
27 | needs: coverage
28 |
29 | - name: Test coverage
30 | run: |
31 | covr::codecov(
32 | quiet = FALSE,
33 | clean = FALSE,
34 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
35 | )
36 | shell: Rscript {0}
37 |
38 | - name: Show testthat output
39 | if: always()
40 | run: |
41 | ## --------------------------------------------------------------------
42 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
43 | shell: bash
44 |
45 | - name: Upload test results
46 | if: failure()
47 | uses: actions/upload-artifact@v4
48 | with:
49 | name: coverage-test-failures
50 | path: ${{ runner.temp }}/package
51 |
--------------------------------------------------------------------------------
/.github/workflows/rhub.yaml:
--------------------------------------------------------------------------------
1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at
2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml
3 | # You can update this file to a newer version using the rhub2 package:
4 | #
5 | # rhub::rhub_setup()
6 | #
7 | # It is unlikely that you need to modify this file manually.
8 |
9 | name: R-hub
10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}"
11 |
12 | on:
13 | workflow_dispatch:
14 | inputs:
15 | config:
16 | description: 'A comma separated list of R-hub platforms to use.'
17 | type: string
18 | default: 'linux,windows,macos'
19 | name:
20 | description: 'Run name. You can leave this empty now.'
21 | type: string
22 | id:
23 | description: 'Unique ID. You can leave this empty now.'
24 | type: string
25 |
26 | jobs:
27 |
28 | setup:
29 | runs-on: ubuntu-latest
30 | outputs:
31 | containers: ${{ steps.rhub-setup.outputs.containers }}
32 | platforms: ${{ steps.rhub-setup.outputs.platforms }}
33 |
34 | steps:
35 | # NO NEED TO CHECKOUT HERE
36 | - uses: r-hub/actions/setup@v1
37 | with:
38 | config: ${{ github.event.inputs.config }}
39 | id: rhub-setup
40 |
41 | linux-containers:
42 | needs: setup
43 | if: ${{ needs.setup.outputs.containers != '[]' }}
44 | runs-on: ubuntu-latest
45 | name: ${{ matrix.config.label }}
46 | strategy:
47 | fail-fast: false
48 | matrix:
49 | config: ${{ fromJson(needs.setup.outputs.containers) }}
50 | container:
51 | image: ${{ matrix.config.container }}
52 |
53 | steps:
54 | - uses: r-hub/actions/checkout@v1
55 | - uses: r-hub/actions/platform-info@v1
56 | with:
57 | token: ${{ secrets.RHUB_TOKEN }}
58 | job-config: ${{ matrix.config.job-config }}
59 | - uses: r-hub/actions/setup-deps@v1
60 | with:
61 | token: ${{ secrets.RHUB_TOKEN }}
62 | job-config: ${{ matrix.config.job-config }}
63 | - uses: r-hub/actions/run-check@v1
64 | with:
65 | token: ${{ secrets.RHUB_TOKEN }}
66 | job-config: ${{ matrix.config.job-config }}
67 |
68 | other-platforms:
69 | needs: setup
70 | if: ${{ needs.setup.outputs.platforms != '[]' }}
71 | runs-on: ${{ matrix.config.os }}
72 | name: ${{ matrix.config.label }}
73 | strategy:
74 | fail-fast: false
75 | matrix:
76 | config: ${{ fromJson(needs.setup.outputs.platforms) }}
77 |
78 | steps:
79 | - uses: r-hub/actions/checkout@v1
80 | - uses: r-hub/actions/setup-r@v1
81 | with:
82 | job-config: ${{ matrix.config.job-config }}
83 | token: ${{ secrets.RHUB_TOKEN }}
84 | - uses: r-hub/actions/platform-info@v1
85 | with:
86 | token: ${{ secrets.RHUB_TOKEN }}
87 | job-config: ${{ matrix.config.job-config }}
88 | - uses: r-hub/actions/setup-deps@v1
89 | with:
90 | job-config: ${{ matrix.config.job-config }}
91 | token: ${{ secrets.RHUB_TOKEN }}
92 | - uses: r-hub/actions/run-check@v1
93 | with:
94 | job-config: ${{ matrix.config.job-config }}
95 | token: ${{ secrets.RHUB_TOKEN }}
96 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 |
3 | # History files
4 | .Rhistory
5 | .Rapp.history
6 |
7 | # Session Data files
8 | .RData
9 | .RDataTmp
10 |
11 | # User-specific files
12 | .Ruserdata
13 |
14 | # Example code in package build process
15 | *-Ex.R
16 |
17 | # Output files from R CMD build
18 | /*.tar.gz
19 |
20 | # Output files from R CMD check
21 | /*.Rcheck/
22 |
23 | # RStudio files
24 | .Rproj.user/
25 | *.Rproj
26 |
27 | # produced vignettes
28 | vignettes/*.html
29 | vignettes/*.pdf
30 |
31 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
32 | .httr-oauth
33 |
34 | # knitr and R markdown default cache directories
35 | *_cache/
36 | /cache/
37 |
38 | # Temporary files created by R markdown
39 | *.utf8.md
40 | *.knit.md
41 |
42 | # R Environment Variables
43 | .Renviron
44 |
45 | # translation temp files
46 | po/*~
47 |
48 | # RStudio Connect folder
49 | rsconnect/
50 | .Rproj.user
51 |
52 | # Ignore Mac DS_Store files
53 | .DS_Store
54 | **/.DS_Store
55 |
56 | # revdep
57 | /doc/
58 | /Meta/
59 | revdep/
60 |
--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 1.2.1
2 | Date: 2024-06-22 15:11:33 UTC
3 | SHA: c297b1372dbb8ce47e787afe982acf94c44d5147
4 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Type: Package
2 | Package: latent2likert
3 | Title: Converting Latent Variables into Likert Scale Responses
4 | Version: 1.2.2
5 | Authors@R:
6 | person("Marko", "Lalovic", , "marko@lalovic.io", role = c("aut", "cre"))
7 | Description: Effectively simulates the discretization process inherent to
8 | Likert scales while minimizing distortion. It converts continuous
9 | latent variables into ordinal categories to generate Likert scale item
10 | responses. Particularly useful for accurately modeling and analyzing
11 | survey data that use Likert scales, especially when applying
12 | statistical techniques that require metric data.
13 | License: MIT + file LICENSE
14 | URL: https://latent2likert.lalovic.io/
15 | BugReports: https://github.com/markolalovic/latent2likert/issues/
16 | Depends:
17 | R (>= 3.5)
18 | Imports:
19 | graphics,
20 | mvtnorm,
21 | stats,
22 | utils
23 | Suggests:
24 | sn,
25 | knitr,
26 | rmarkdown,
27 | testthat (>= 3.0.0)
28 | VignetteBuilder:
29 | knitr
30 | Encoding: UTF-8
31 | Language: en-US
32 | LazyData: true
33 | Roxygen: list(markdown = TRUE)
34 | RoxygenNote: 7.3.1
35 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2024
2 | COPYRIGHT HOLDER: Marko Lalovic
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2024 Marko Lalovic
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(discretize_density)
4 | export(estimate_mean_and_sd)
5 | export(estimate_params)
6 | export(plot_likert_transform)
7 | export(response_prop)
8 | export(rlikert)
9 | export(simulate_likert)
10 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | ## Version 0.1.0
2 | - Release date: 2020-10-17
3 | - Initial release (development version).
4 | - Tested on platforms: x86_64-pc-linux-gnu (64-bit) and x86_64-w64-mingw32 (64-bit).
5 |
6 | ## Version 1.0.0
7 | - Release date: 2024-03-28
8 | - Added the option to generate correlated Likert scale items in the `rlikert()` function.
9 | - Added the `estimate_params()` function for estimating parameters from existing survey data.
10 | - Resolved dependency issues.
11 | - Reduced dependency to only standard R packages: mvtnorm, stats, and graphics.
12 | - The sn package is now required only for generating correlated responses using a skew normal distribution.
13 | - Added user prompts to install the sn package when needed.
14 |
15 | ## Version 1.1.0
16 | - Release date: 2024-06-06
17 | - Minor updates to functions and vignettes.
18 |
19 | ## Version 1.1.1
20 | - Release date: 2024-06-06
21 | - Renamed the package from `responsesR` to `latent2likert` to better reflect its purpose of converting latent variables into Likert scale responses.
22 | - Improved the package website and documentation.
23 | - **Note:** The codebase is under development, and finer details may change.
24 |
25 | ## Version 1.2.1
26 | - Release date: 2024-06-12
27 | - Refactored code for enhanced modularity and maintainability.
28 | - Modularized core functions for improved readability.
29 | - Improved the structure and organization of the codebase.
30 | - Enhanced error handling for various correlation input scenarios.
31 |
32 | ## Version 1.2.2
33 | - Release date: 2024-07-26
34 | - Exported previously internal function `estimate_mean_and_sd` based on user feedback, now available for direct use.
35 | - Moved the package `sn` to Suggests.
36 | - Fixed vignette building errors.
37 |
--------------------------------------------------------------------------------
/R/discretization.R:
--------------------------------------------------------------------------------
1 | #' Discretize Density
2 | #'
3 | #' Transforms the density function of a continuous random variable into a
4 | #' discrete probability distribution with minimal distortion
5 | #' using the Lloyd-Max algorithm.
6 | #'
7 | #' @param density_fn probability density function.
8 | #' @param n_levels cardinality of the set of all possible outcomes.
9 | #' @param eps convergence threshold for the algorithm.
10 | #'
11 | #' @return A list containing:
12 | #' \describe{
13 | #' \item{prob}{discrete probability distribution.}
14 | #' \item{endp}{endpoints of intervals that partition the continuous domain.}
15 | #' \item{repr}{representation points of the intervals.}
16 | #' \item{dist}{distortion measured as the mean-squared error (MSE).}
17 | #' }
18 | #' @examples
19 | #' discretize_density(density_fn = stats::dnorm, n_levels = 5)
20 | #' discretize_density(density_fn = function(x) {
21 | #' 2 * stats::dnorm(x) * stats::pnorm(0.5 * x)
22 | #' }, n_levels = 4)
23 | #' @details
24 | #' The function addresses the problem of transforming a continuous random
25 | #' variable \eqn{X} into a discrete random variable \eqn{Y} with minimal
26 | #' distortion. Distortion is measured as mean-squared error (MSE):
27 | #' \deqn{
28 | #' \text{E}\left[ (X - Y)^2 \right] =
29 | #' \sum_{k=1}^{K} \int_{x_{k-1}}^{x_{k}} f_{X}(x)
30 | #' \left( x - r_{k} \right)^2 \, dx
31 | #' }
32 | #' where:
33 | #' \describe{
34 | #' \item{\eqn{f_{X}}}{ is the probability density function of \eqn{X},}
35 | #' \item{\eqn{K}}{ is the number of possible outcomes of \eqn{Y},}
36 | #' \item{\eqn{x_{k}}}{ are endpoints of intervals that partition the domain
37 | #' of \eqn{X},}
38 | #' \item{\eqn{r_{k}}}{ are representation points of the intervals.}
39 | #' }
40 | #' This problem is solved using the following iterative procedure:
41 | #' \describe{
42 | #' \item{\eqn{1.}}{Start with an arbitrary initial set of representation
43 | #' points: \eqn{r_{1} < r_{2} < \dots < r_{K}}.}
44 | #' \item{\eqn{2.}}{Repeat the following steps until the improvement in MSE
45 | #' falls below given \eqn{\varepsilon}.}
46 | #' \item{\eqn{3.}}{Calculate endpoints as \eqn{x_{k} = (r_{k+1} + r_{k})/2}
47 | #' for each \eqn{k = 1, \dots, K-1} and set \eqn{x_{0}} and \eqn{x_{K}} to
48 | #' \eqn{-\infty} and \eqn{\infty}, respectively.}
49 | #' \item{\eqn{4.}}{Update representation points by setting \eqn{r_{k}}
50 | #' equal to the conditional mean of \eqn{X} given \eqn{X \in (x_{k-1}, x_{k})}
51 | #' for each \eqn{k = 1, \dots, K}.}
52 | #' }
53 | #'
54 | #' With each execution of step \eqn{(3)} and step \eqn{(4)}, the MSE decreases
55 | #' or remains the same. As MSE is nonnegative, it approaches a limit.
56 | #' The algorithm terminates when the improvement in MSE is less than a given
57 | #' \eqn{\varepsilon > 0}, ensuring convergence after a finite number
58 | #' of iterations.
59 | #'
60 | #' This procedure is known as Lloyd-Max's algorithm, initially used for scalar
61 | #' quantization and closely related to the k-means algorithm. Local convergence
62 | #' has been proven for log-concave density functions by Kieffer. Many common
63 | #' probability distributions are log-concave including the normal and skew
64 | #' normal distribution, as shown by Azzalini.
65 | #'
66 | #' @references
67 | #' Azzalini, A. (1985).
68 | #' A class of distributions which includes the normal ones.
69 | #' \emph{Scandinavian Journal of Statistics} \bold{12(2)}, 171–178.
70 | #'
71 | #' Kieffer, J. (1983).
72 | #' Uniqueness of locally optimal quantizer for log-concave density and convex
73 | #' error function.
74 | #' \emph{IEEE Transactions on Information Theory} \bold{29}, 42–47.
75 | #'
76 | #' Lloyd, S. (1982).
77 | #' Least squares quantization in PCM.
78 | #' \emph{IEEE Transactions on Information Theory} \bold{28 (2)}, 129–137.
79 | #'
80 | #' @export
81 | discretize_density <- function(density_fn, n_levels, eps = 1e-6) {
82 | # Initial set of representation points
83 | repr <- seq(-5, 5, length.out = n_levels)
84 | midp <- calc_midpoints(repr)
85 | dist <- compute_distortion(density_fn, midp, repr)
86 |
87 | diff <- Inf
88 | while (diff > eps) {
89 | repr <- update_epresentatives(density_fn, midp)
90 | midp <- calc_midpoints(repr)
91 | newd <- compute_distortion(density_fn, midp, repr)
92 |
93 | diff <- dist - newd
94 | dist <- newd
95 | }
96 |
97 | endp <- c(-Inf, midp, Inf)
98 | prob <- calc_probs(density_fn, endp)
99 | return(list(prob = prob, endp = endp, repr = repr, dist = dist))
100 | }
101 |
102 | #' Calculate Midpoints
103 | #'
104 | #' Calculates the midpoints of the intervals given their representation points.
105 | #'
106 | #' @param repr representation points.
107 | #' @return a vector of midpoints.
108 | #' @noRd
109 | calc_midpoints <- function(repr) {
110 | n_levels <- length(repr)
111 | midp <- (repr[2:n_levels] + repr[1:(n_levels - 1)]) / 2
112 | return(midp)
113 | }
114 |
115 | #' Update Representation Points
116 | #'
117 | #' Calculates the representation points for the intervals given the midpoints.
118 | #'
119 | #' @param density_fn probability density function.
120 | #' @param midp midpoints of the intervals.
121 | #'
122 | #' @return a vector of representation points.
123 | #' @noRd
124 | update_epresentatives <- function(density_fn, midp) {
125 | n_levels <- length(midp) + 1
126 | endp <- c(-Inf, midp, Inf)
127 | repr <- numeric(n_levels)
128 | for (k in seq_len(n_levels)) {
129 | a <- stats::integrate(function(x) x * density_fn(x), endp[k], endp[k + 1])
130 | b <- stats::integrate(density_fn, endp[k], endp[k + 1])
131 | repr[k] <- a[[1]] / b[[1]]
132 | }
133 | return(repr)
134 | }
135 |
136 | #' Compute Distortion
137 | #'
138 | #' Computes the distortion (mean-squared error) given the midpoints and
139 | #' representation points.
140 | #'
141 | #' @param density_fn probability density function.
142 | #' @param midp midpoints of the intervals.
143 | #' @param repr representation points of the intervals.
144 | #'
145 | #' @return distortion (mean-squared error).
146 | #' @noRd
147 | compute_distortion <- function(density_fn, midp, repr) {
148 | n_levels <- length(repr)
149 | endp <- c(-Inf, midp, Inf)
150 | mse <- vapply(seq_len(n_levels), function(k) {
151 | stats::integrate(function(x) {
152 | density_fn(x) * (x - repr[k])^2
153 | }, endp[k], endp[k + 1])[[1]]
154 | }, numeric(1))
155 | return(sum(mse))
156 | }
157 |
158 | #' Calculate Probabilities
159 | #'
160 | #' Calculates the discrete probabilities for the given endpoints.
161 | #'
162 | #' @param density_fn probability density function.
163 | #' @param endp endpoints of the intervals.
164 | #'
165 | #' @return a vector of probabilities for the intervals.
166 | #' @noRd
167 | calc_probs <- function(density_fn, endp) {
168 | n_levels <- length(endp) - 1
169 | prob <- vapply(seq_len(n_levels), function(k) {
170 | stats::integrate(function(x) {
171 | density_fn(x)
172 | }, endp[k], endp[k + 1])[[1]]
173 | }, numeric(1))
174 | return(prob)
175 | }
176 |
--------------------------------------------------------------------------------
/R/estimation.R:
--------------------------------------------------------------------------------
1 | #' Estimate Latent Parameters
2 | #'
3 | #' Estimates the location and scaling parameters of the latent variables from
4 | #' existing survey data.
5 | #'
6 | #' @param data survey data with columns representing individual items.
7 | #' Apart from this, \code{data} can be of almost any class such as
8 | #' "data.frame" "matrix" or "array".
9 | #' @param n_levels number of response categories, a vector or a number.
10 | #' @param skew marginal skewness of latent variables, defaults to 0.
11 | #' @return A table of estimated parameters for each latent variable.
12 | #' @examples
13 | #' data(part_bfi)
14 | #' vars <- c("A1", "A2", "A3", "A4", "A5")
15 | #' estimate_params(data = part_bfi[, vars], n_levels = 6)
16 | #' @details
17 | #' The relationship between the continuous random variable \eqn{X} and the
18 | #' discrete probability distribution \eqn{p_k}, for \eqn{k = 1, \dots, K},
19 | #' can be described by a system of non-linear equations:
20 | #' \deqn{
21 | #' p_{k} = F_{X}\left( \frac{x_{k - 1} - \xi}{\omega} \right)
22 | #' - F_{X}\left( \frac{x_{k} - \xi}{\omega} \right)
23 | #' \quad \text{for} \ k = 1, \dots, K
24 | #' }
25 | #' where:
26 | #' \describe{
27 | #' \item{\eqn{F_{X}}}{ is the cumulative distribution function of \eqn{X},}
28 | #' \item{\eqn{K}}{ is the number of possible response categories,}
29 | #' \item{\eqn{x_{k}}}{ are the endpoints defining the boundaries of the
30 | #' response categories,}
31 | #' \item{\eqn{p_{k}}}{ is the probability of the \eqn{k}-th
32 | #' response category,}
33 | #' \item{\eqn{\xi}}{ is the location parameter of \eqn{X},}
34 | #' \item{\eqn{\omega}}{ is the scaling parameter of \eqn{X}.}
35 | #' }
36 | #' The endpoints \eqn{x_{k}} are calculated by discretizing a
37 | #' random variable \eqn{Z}
38 | #' with mean 0 and standard deviation 1 that follows the same
39 | #' distribution as \eqn{X}.
40 | #' By solving the above system of non-linear equations iteratively,
41 | #' we can find the parameters that best fit the observed discrete
42 | #' probability distribution \eqn{p_{k}}.
43 | #'
44 | #' The function \code{estimate_params}:
45 | #' * Computes the proportion table of the responses for each item.
46 | #' * Estimates the probabilities \eqn{p_{k}} for each item.
47 | #' * Computes the estimates of \eqn{\xi} and \eqn{\omega} for each item.
48 | #' * Combines the estimated parameters for all items into a table.
49 | #'
50 | #' @seealso \code{\link{discretize_density}} for details on calculating
51 | #' the endpoints, and \code{\link{part_bfi}} for example of the survey data.
52 | #' @export
53 | estimate_params <- function(data, n_levels, skew = 0) {
54 | if (is.vector(data)) {
55 | prob <- prop.table(table(data))
56 | estimates <- estimate_mean_and_sd(prob, n_levels, skew)
57 | tab <- as.table(estimates)
58 | rownames(tab) <- c("mean", "sd")
59 | } else { # multiple items
60 | nitems <- ncol(data)
61 | skew <- rep(skew, length.out = nitems)
62 | if (is.numeric(n_levels)) {
63 | n_levels <- rep(n_levels, nitems)
64 | }
65 | mat <- matrix(data = NA, nrow = nitems, ncol = 2)
66 | for (i in seq_len(ncol(data))) {
67 | prob <- prop.table(table(data[, i]))
68 | estimates <- estimate_mean_and_sd(prob, n_levels[i], skew[i])
69 | mat[i, ] <- estimates
70 | }
71 | tab <- as.table(t(mat))
72 | dimnames(tab) <- list(
73 | estimates = c("mean", "sd"),
74 | items = colnames(data)
75 | )
76 | }
77 | return(tab)
78 | }
79 |
80 | #' Estimate mean and standard deviation
81 | #'
82 | #' Estimates the mean and standard deviation of a latent variable given the
83 | #' discrete probabilities of its observed Likert scale responses.
84 | #'
85 | #' @param prob named vector of probabilities for each response category.
86 | #' @param n_levels number of response categories for the Likert scale item.
87 | #' @param skew marginal skewness of the latent variable, defaults to 0.
88 | #' @param eps tolerance for convergence, defaults to 1e-6.
89 | #' @param maxit maximum number of iterations, defaults to 100.
90 | #'
91 | #' @return A numeric vector with two elements: the estimated mean and
92 | #' standard deviation.
93 | #'
94 | #' @examples
95 | #' prob <- c("1" = 0.313, "2" = 0.579, "3" = 0.105, "4" = 0.003)
96 | #' # returns estimates that are close to the actual mean and sd: c(-1, 0.5)
97 | #' estimate_mean_and_sd(prob, 5)
98 | #'
99 | #' @details
100 | #' This function uses an iterative algorithm to solve the system of non-linear
101 | #' equations that describe the relationship between the continuous latent
102 | #' variable and the observed discrete probability distribution of Likert scale
103 | #' responses. The algorithm ensures stability by reparameterizing the system
104 | #' and applying constraints to prevent stepping into invalid regions.
105 | #'
106 | #' @export
107 | estimate_mean_and_sd <- function(prob, n_levels, skew = 0,
108 | eps = 1e-6, maxit = 100) {
109 | prob <- as.vector(pad_levels(prob, n_levels))
110 | endp <- calc_endpoints(n_levels, skew)
111 | dist_funcs <- initialize_distributions(skew)
112 | x <- matrix(c(0, 1)) # Initial guess
113 | trace <- matrix(rep(x, maxit), ncol = maxit)
114 |
115 | for (i in seq_len(maxit)) {
116 | b <- fn(x, endp, prob, dist_funcs$cdf_X)
117 | A <- jac(x, endp, dist_funcs$pdf_X)
118 | A_svd <- svd(A)
119 | dx <- A_svd$v %*% diag(1 / A_svd$d) %*% t(A_svd$u) %*% (-b)
120 |
121 | # Prevent stepping into negative values for v = 1 / sd
122 | while ((x + dx)[2] < 0) {
123 | dx <- dx / 2
124 | }
125 |
126 | x <- x + 0.2 * dx
127 | trace[, i] <- x
128 |
129 | if (norm(dx, "2") < eps) {
130 | break
131 | }
132 | }
133 |
134 | mean <- x[1]
135 | sd <- 1 / x[2]
136 |
137 | return(c(mean, sd))
138 | }
139 |
140 | #' Initialize CDF and PDF Functions
141 | #'
142 | #' Initializes the cumulative distribution function (CDF) and probability
143 | #' density function (PDF) based on the skewness parameter.
144 | #'
145 | #' @param skew numeric value representing the skewness of the distribution.
146 | #'
147 | #' @return A list containing the CDF and PDF functions appropriate for the
148 | #' given skewness.
149 | #'
150 | #' @noRd
151 | initialize_distributions <- function(skew) {
152 | if (abs(skew) > 0) {
153 | check_package("sn")
154 | cp <- c("mu" = 0, "sd" = 1, "skew" = skew)
155 | dp <- sn::cp2dp(cp, family = "SN")
156 | return(list(
157 | cdf_X = function(x) sn::psn(x, dp = dp),
158 | pdf_X = function(x) sn::dsn(x, dp = dp)
159 | ))
160 | } else {
161 | return(list(cdf_X = stats::pnorm, pdf_X = stats::dnorm))
162 | }
163 | }
164 |
165 | #' Calculate Differences for Root Finding
166 | #'
167 | #' Computes the differences used in the iterative root-finding process.
168 | #'
169 | #' @param x numeric vector of current estimates for the location and scaling
170 | #' parameters.
171 | #' @param endp numeric vector of endpoints defining the boundaries of the
172 | #' response categories.
173 | #' @param prob numeric vector of probabilities for each response category.
174 | #' @param cdf_X function representing the cumulative distribution function
175 | #' (CDF) of the latent variable.
176 | #'
177 | #' @return A matrix of differences between the CDF evaluated at the endpoints
178 | #' and the observed probabilities.
179 | #'
180 | #' @noRd
181 | fn <- function(x, endp, prob, cdf_X) {
182 | u <- x[1]
183 | v <- x[2]
184 | y <- cdf_X(v * endp - u * v)
185 | return(matrix(utils::tail(y, -1) - utils::head(y, -1) - prob))
186 | }
187 |
188 | # Compute Jacobian Matrix
189 | #'
190 | #' Computes the Jacobian matrix used in the iterative root-finding process.
191 | #'
192 | #' @param x numeric vector of current estimates for the location and scaling
193 | #' parameters.
194 | #' @param endp numeric vector of endpoints defining the boundaries of the
195 | #' response categories.
196 | #' @param pdf_X function representing the probability density function (PDF)
197 | #' of the latent variable.
198 | #'
199 | #' @return A matrix representing the Jacobian of the system of equations with
200 | #' respect to the parameters.
201 | #'
202 | #' @noRd
203 | jac <- function(x, endp, pdf_X) {
204 | u <- x[1]
205 | v <- x[2]
206 | midp <- utils::head(utils::tail(endp, -1), -1)
207 |
208 | du <- pdf_X(v * endp - u * v) * (-v)
209 | dv <- pdf_X(v * midp - u * v) * (midp - u)
210 |
211 | du <- utils::tail(du, -1) - utils::head(du, -1)
212 | dv <- c(utils::head(dv, 1), utils::tail(dv, -1)
213 | - utils::head(dv, -1), -utils::tail(dv, 1))
214 |
215 | return(cbind(du, dv))
216 | }
217 |
--------------------------------------------------------------------------------
/R/part_bfi.R:
--------------------------------------------------------------------------------
1 | #' Agreeableness and Gender Data
2 | #'
3 | #' This dataset is a cleaned up version of a small part of \code{bfi} dataset
4 | #' from \code{psychTools} package. It contains responses to the first 5 items
5 | #' of the agreeableness scale from the International Personality Item Pool
6 | #' (IPIP) and the gender attribute. It includes responses from 2800 subjects.
7 | #' Each item was answered on a six point Likert scale ranging from
8 | #' 1 (very inaccurate), to 6 (very accurate). Gender was coded as
9 | #' 0 for male and 1 for female. Missing values were addressed using
10 | #' mode imputation.
11 | #'
12 | #' @docType data
13 | #'
14 | #' @usage data(part_bfi)
15 | #'
16 | #' @format {An object of class \code{"data.frame"} with 2800 observations on
17 | #' the following 6 variables:}
18 | #' \describe{
19 | #' \item{A1}{Am indifferent to the feelings of others.}
20 | #' \item{A2}{Inquire about others' well-being.}
21 | #' \item{A3}{Know how to comfort others.}
22 | #' \item{A4}{Love children.}
23 | #' \item{A5}{Make people feel at ease.}
24 | #' \item{gender}{Gender of the respondent.}
25 | #' }
26 | #' @source {International Personality Item Pool ({https://ipip.ori.org})}
27 | #' @source {https://search.r-project.org/CRAN/refmans/psychTools/html/bfi.html}
28 | #' @references
29 | #' Revelle, W. (2024).
30 | #' Psych: Procedures for Psychological, Psychometric, and Personality Research.
31 | #' \emph{Evanston, Illinois: Northwestern University.}
32 | #' \url{https://CRAN.R-project.org/package=psych}
33 | #' @examples
34 | #' data(part_bfi)
35 | #' head(part_bfi)
36 | "part_bfi"
37 |
--------------------------------------------------------------------------------
/R/simulation.R:
--------------------------------------------------------------------------------
1 | #' Simulate Likert Scale Item Responses
2 | #'
3 | #' Simulates Likert scale item responses based on a specified number
4 | #' of response categories and the centered parameters of the latent variable.
5 | #'
6 | #' @param n_levels number of response categories for the Likert scale item.
7 | #' @param cp centered parameters of the latent variable.
8 | #' Named vector including mean (`mu`), standard deviation (`sd`),
9 | #' and skewness (`skew`). Skewness must be between -0.95 and 0.95.
10 | #' @return A named vector of probabilities for each response category.
11 | #' @examples
12 | #' cp <- c(mu = 0, sd = 1, skew = 0.5)
13 | #' simulate_likert(n_levels = 5, cp = cp)
14 | #' cp2 <- c(mu = 1, sd = 2, skew = -0.3)
15 | #' simulate_likert(n_levels = 7, cp = cp2)
16 | #' @details
17 | #' The simulation process uses the following model detailed by
18 | #' Boari and Nai-Ruscone. Let \eqn{X} be the continuous variable of interest,
19 | #' measured using Likert scale questions with \eqn{K} response categories. The
20 | #' observed discrete variable \eqn{Y} is defined as follows:
21 | #' \deqn{
22 | #' Y = k, \quad \text{ if } \ \ x_{k - 1} < X \leq x_{k}
23 | #' \quad \text{ for } \ \ k = 1, \dots, K
24 | #' }
25 | #' where \eqn{x_{k}}, \eqn{k = 0, \dots, K} are endpoints defined in the domain
26 | #' of \eqn{X} such that:
27 | #' \deqn{
28 | #' -\infty = x_{0} < x_{1} < \dots < x_{K - 1} < x_{K} = \infty.
29 | #' }
30 | #' The endpoints dictate the transformation of the density
31 | #' \eqn{f_{X}} of \eqn{X} into a discrete probability distribution:
32 | #' \deqn{
33 | #' \text{Pr}(Y = k) = \int_{x_{k - 1}}^{x_{k}} f_{X}(x) \, dx
34 | #' \quad \text{ for } \ \ k = 1, \dots, K.
35 | #' }
36 | #'
37 | #' The continuous latent variable is modeled using a skew normal distribution.
38 | #' The function \code{simulate_likert} performs the following steps:
39 | #' * Ensures the centered parameters are within the acceptable range.
40 | #' * Converts the centered parameters to direct parameters.
41 | #' * Defines the density function for the skew normal distribution.
42 | #' * Computes the probabilities for each response category
43 | #' using optimal endpoints.
44 | #'
45 | #' @references
46 | #' Boari, G. and Nai Ruscone, M. (2015).
47 | #' A procedure simulating Likert scale item responses.
48 | #' \emph{Electronic Journal of Applied Statistical Analysis} \bold{8(3)},
49 | #' 288–297.
50 | #' \doi{10.1285/i20705948v8n3p288}
51 | #'
52 | #' @seealso \code{\link{discretize_density}} for details on how to calculate
53 | #' the optimal endpoints.
54 | #' @export
55 | simulate_likert <- function(n_levels, cp) {
56 | validate_skewness(cp[["skew"]])
57 |
58 | dp <- convert_params(cp)
59 | density_fn <- function(x) {
60 | density_sn(x, dp[["xi"]], dp[["omega"]], dp[["alpha"]])
61 | }
62 | endp <- calc_endpoints(n_levels, cp[["skew"]])
63 | prob <- calc_probs(density_fn, endp)
64 | names(prob) <- paste(seq_len(n_levels))
65 | return(prob)
66 | }
67 |
68 | #' Generate Random Responses
69 | #'
70 | #' Generates an array of random responses to Likert-type questions based on
71 | #' specified latent variables.
72 | #'
73 | #' @param size number of observations.
74 | #' @param n_items number of Likert scale items (number of questions).
75 | #' @param n_levels number of response categories for each item.
76 | #' Integer or vector of integers.
77 | #' @param mean means of the latent variables.
78 | #' Numeric or vector of numerics. Defaults to 0.
79 | #' @param sd standard deviations of the latent variables.
80 | #' Numeric or vector of numerics. Defaults to 1.
81 | #' @param skew marginal skewness of the latent variables.
82 | #' Numeric or vector of numerics. Defaults to 0.
83 | #' @param corr correlations between latent variables.
84 | #' Can be a single numeric value representing the same correlation for
85 | #' all pairs, or an actual correlation matrix. Defaults to 0.
86 | #'
87 | #' @return A matrix of random responses with dimensions \code{size} by
88 | #' \code{n_items}. The column names are \code{Y1, Y2, ..., Yn} where
89 | #' \code{n} is the number of items. Each entry in the matrix represents
90 | #' a Likert scale response, ranging from 1 to \code{n_levels}.
91 | #' @examples
92 | #' # Generate responses for a single item with 5 levels
93 | #' rlikert(size = 10, n_items = 1, n_levels = 5)
94 | #'
95 | #' # Generate responses for three items with different levels and parameters
96 | #' rlikert(
97 | #' size = 10, n_items = 3, n_levels = c(4, 5, 6),
98 | #' mean = c(0, -1, 0), sd = c(0.8, 1, 1), corr = 0.5
99 | #' )
100 | #'
101 | #' # Generate responses with a correlation matrix
102 | #' corr <- matrix(c(
103 | #' 1.00, -0.63, -0.39,
104 | #' -0.63, 1.00, 0.41,
105 | #' -0.39, 0.41, 1.00
106 | #' ), nrow = 3)
107 | #' data <- rlikert(
108 | #' size = 1000, n_items = 3, n_levels = c(4, 5, 6),
109 | #' mean = c(0, -1, 0), sd = c(0.8, 1, 1), corr = corr
110 | #' )
111 | #'
112 | #' @export
113 | rlikert <- function(size, n_items, n_levels,
114 | mean = 0, sd = 1, skew = 0, corr = 0) {
115 | # If there's only one item, generate responses directly
116 | if (n_items == 1) {
117 | return(generate_responses(size, n_levels, mean, sd, skew))
118 | }
119 |
120 | # Ensure input values are vectors of length n_items
121 | mean <- rep(mean, length.out = n_items)
122 | sd <- rep(sd, length.out = n_items)
123 | skew <- rep(skew, length.out = n_items)
124 | n_levels <- rep(n_levels, length.out = n_items)
125 |
126 | # Determine the correlation case
127 | corr_case <- handle_corr_case(corr)
128 |
129 | if (corr_case == 1) {
130 | # Generate multiple univariate item responses without correlations
131 | data <- matrix(nrow = size, ncol = n_items)
132 | for (i in seq_len(n_items)) {
133 | data[, i] <- generate_responses(
134 | size, n_levels[i],
135 | mean[i], sd[i], skew[i]
136 | )
137 | }
138 | colnames(data) <- paste0("Y", seq_len(n_items))
139 | return(data)
140 | }
141 |
142 | # Generate the correlation matrix
143 | corr_matrix <- generate_corr_matrix(corr, corr_case, n_items)
144 | sigma <- cor2cov(corr_matrix, sd)
145 |
146 | # Generate latent variables
147 | latent_variables <- generate_latent_variables(size, mean, sigma, skew)
148 |
149 | # Discretize latent variables to generate responses
150 | data <- matrix(nrow = size, ncol = n_items)
151 | for (i in seq_len(n_items)) {
152 | endp <- calc_endpoints(n_levels[i], skew[i])
153 | data[, i] <- findInterval(latent_variables[, i], endp)
154 | }
155 | colnames(data) <- paste0("Y", seq_len(n_items))
156 | return(data)
157 | }
158 |
159 | #' Calculate Optimal Endpoints
160 | #'
161 | #' Returns the optimal endpoints of intervals that transform
162 | #' a neutral density into discrete probability distribution.
163 | #'
164 | #' @param n_levels integer. Number of response categories.
165 | #' @param skew numeric. Skewness parameter of the latent variable.
166 | #'
167 | #' @return A numeric vector of optimal endpoints.
168 | #' @noRd
169 | calc_endpoints <- function(n_levels, skew) {
170 | dp <- convert_params(c("mu" = 0, "sd" = 1, "skew" = skew))
171 |
172 | density_fn <- function(x) {
173 | density_sn(x, dp[["xi"]], dp[["omega"]], dp[["alpha"]])
174 | }
175 |
176 | endp <- discretize_density(density_fn, n_levels)[["endp"]]
177 | return(endp)
178 | }
179 |
180 | #' Generate Responses
181 | #'
182 | #' Generates random responses for a single Likert scale item.
183 | #'
184 | #' @param size integer. Number of observations.
185 | #' @param n_levels integer. Number of response categories.
186 | #' @param mean numeric. Mean of the latent variable.
187 | #' @param sd numeric. Standard deviation of the latent variable.
188 | #' @param skew numeric. Skewness parameter of the latent variable.
189 | #'
190 | #' @return A numeric vector of simulated responses.
191 | #' @noRd
192 | generate_responses <- function(size, n_levels, mean, sd, skew) {
193 | cp <- c("mu" = mean, "sd" = sd, "skew" = skew)
194 | prob <- simulate_likert(n_levels, cp)
195 | sample(seq_len(n_levels), size, replace = TRUE, prob = prob)
196 | }
197 |
198 | #' Handle Correlation Input Case
199 | #'
200 | #' Determines the type of correlation input provided.
201 | #'
202 | #' @param corr numeric or character or matrix. Correlation input.
203 | #'
204 | #' @return An integer representing the correlation case.
205 | #' @noRd
206 | handle_corr_case <- function(corr) {
207 | if (is.numeric(corr) && !is.matrix(corr)) {
208 | if (corr == 0) {
209 | return(1)
210 | }
211 | if (corr != 0) {
212 | return(3)
213 | }
214 | }
215 | if (is.character(corr) && corr == "random") {
216 | return(2)
217 | }
218 | if (is.matrix(corr)) {
219 | return(4)
220 | }
221 | stop("Invalid correlation input")
222 | }
223 |
224 | #' Generate Correlation Matrix
225 | #'
226 | #' Generates the correlation matrix based on the correlation case.
227 | #'
228 | #' @param corr numeric or matrix. Correlation input.
229 | #' @param corr_case integer. Correlation case identifier.
230 | #' @param n_items integer. Number of Likert scale items.
231 | #'
232 | #' @return A numeric matrix representing the correlation matrix.
233 | #' @noRd
234 | generate_corr_matrix <- function(corr, corr_case, n_items) {
235 | if (corr_case == 2) {
236 | return(generate_rand_corr_matrix(n_items))
237 | }
238 | if (corr_case == 3) {
239 | if (corr > 1 | corr < -1) stop("Correlation must be between -1 and 1.")
240 | corr_matrix <- matrix(corr, n_items, n_items)
241 | diag(corr_matrix) <- 1
242 | return(corr_matrix)
243 | }
244 | if (corr_case == 4) {
245 | return(corr)
246 | }
247 | }
248 |
249 | #' Generate Latent Variables
250 | #'
251 | #' Generates latent variables based on the specified parameters.
252 | #'
253 | #' @param size number of observations.
254 | #' @param mean means of the latent variables. Numeric vector.
255 | #' @param sigma covariance matrix of the latent variables.
256 | #' @param skew skewness parameters of the latent variables. Numeric vector.
257 | #'
258 | #' @return A matrix of generated latent variables.
259 | #' @noRd
260 | generate_latent_variables <- function(size, mean, sigma, skew) {
261 | if (any(skew != 0)) { # at least one latent variable is skewed
262 | if (!requireNamespace("sn", quietly = TRUE)) {
263 | stop("Package \"sn\" must be installed.
264 | Please run:\n\n\tinstall.packages(\"sn\")\n\n")
265 | }
266 | cp_list <- list(mean = mean, var.cov = sigma, skew = skew)
267 | dp_list <- sn::cp2dp(cp_list, family = "SN")
268 | return(sn::rmsn(size, dp = dp_list))
269 | } else {
270 | return(mvtnorm::rmvnorm(size, mean, sigma))
271 | }
272 | }
273 |
--------------------------------------------------------------------------------
/R/utilities.R:
--------------------------------------------------------------------------------
1 | #' Plot Transformation
2 | #'
3 | #' Plots the densities of latent variables and the corresponding
4 | #' transformed discrete probability distributions.
5 | #'
6 | #' @param n_items number of Likert scale items (questions).
7 | #' @param n_levels number of response categories for each Likert item.
8 | #' Integer or vector of integers.
9 | #' @param mean means of the latent variables.
10 | #' Numeric or vector of numerics. Defaults to 0.
11 | #' @param sd standard deviations of the latent variables.
12 | #' Numeric or vector of numerics. Defaults to 1.
13 | #' @param skew marginal skewness of the latent variables.
14 | #' Numeric or vector of numerics. Defaults to 0.
15 | #' @return NULL. The function produces a plot.
16 | #' @examples
17 | #' plot_likert_transform(n_items = 3, n_levels = c(3, 4, 5))
18 | #' plot_likert_transform(n_items = 3, n_levels = 5, mean = c(0, 1, 2))
19 | #' plot_likert_transform(n_items = 3, n_levels = 5, sd = c(0.8, 1, 1.2))
20 | #' plot_likert_transform(n_items = 3, n_levels = 5, skew = c(-0.5, 0, 0.5))
21 | #' @export
22 | plot_likert_transform <- function(
23 | n_items, n_levels,
24 | mean = 0, sd = 1, skew = 0) {
25 | n_levels <- rep(n_levels, length.out = n_items)
26 | mean <- rep(mean, length.out = n_items)
27 | sd <- rep(sd, length.out = n_items)
28 | skew <- rep(skew, length.out = n_items)
29 | graphics::layout(matrix(seq_len(n_items * 2), nrow = 2, ncol = n_items))
30 | x <- seq(-3, 3, length = 1000)
31 | for (i in seq_len(n_items)) {
32 | # Draw the densities of latent variables
33 | cp <- c("mu" = mean[i], "sd" = sd[i], "skew" = skew[i])
34 | dp <- convert_params(cp)
35 | y <- density_sn(x, dp[["xi"]], dp[["omega"]], dp[["alpha"]])
36 | graphics::plot(x, y, type = "l", lwd = 2, xlab = "", ylab = "", main = "")
37 | graphics::title(paste("X", i, sep = ""))
38 |
39 | # Draw the corresponding discrete probability distributions
40 | prob <- simulate_likert(n_levels[i], cp)
41 | graphics::barplot(prob)
42 | graphics::title(paste("Y", i, sep = ""))
43 | }
44 | invisible(NULL)
45 | }
46 |
47 | #' Plot Contour
48 | #'
49 | #' Plots the contour of the objective function values over a grid
50 | #' of parameter values. It visualizes the norm of the function \code{fn}
51 | #' for different values of \code{u} (mean) and \code{v} (1/standard deviation)
52 | #' and overlays the trace of parameter updates during the estimation process.
53 | #'
54 | #' @param fn objective function to be minimized.
55 | #' @param endp endpoints of intervals that partition the continuous domain.
56 | #' @param prob discrete probability distribution.
57 | #' @param cdf_X cumulative distribution function of the latent variable.
58 | #' @param trace matrix of parameter updates.
59 | #' @return NULL. The function produces a plot.
60 | #' @noRd
61 | plot_contour <- function(fn, endp, prob, cdf_X, trace) {
62 | xlen <- 50
63 | ylen <- 50
64 | xgrid <- seq(-3, 3, length.out = xlen) # Range for mean (mu)
65 | ygrid <- seq(0.1, 3, length.out = ylen) # Range for 1/sd
66 | zvals <- matrix(NA, ncol = xlen, nrow = ylen)
67 | for (i in seq_len(xlen)) {
68 | for (j in seq_len(ylen)) {
69 | zvals[i, j] <- norm(fn(
70 | matrix(c(xgrid[i], ygrid[j])),
71 | endp, prob, cdf_X
72 | ), "2")
73 | }
74 | }
75 | graphics::contour(
76 | x = xgrid, y = ygrid, z = zvals,
77 | col = "gray42", xlab = "u = mu", ylab = "v = 1/sd"
78 | )
79 | graphics::grid(col = "lightgray", lty = "dotted")
80 | graphics::points(trace[1, ], trace[2, ], pch = 20, col = "blue")
81 | invisible(NULL)
82 | }
83 |
84 | #' Calculate Response Proportions
85 | #'
86 | #' Returns a table of proportions for each possible response category.
87 | #'
88 | #' @param data numeric vector or matrix of responses.
89 | #' @param n_levels number of response categories.
90 | #' @return A table of response category proportions.
91 | #' @examples
92 | #' data <- c(1, 2, 2, 3, 3, 3)
93 | #' response_prop(data, n_levels = 3)
94 | #'
95 | #' data_matrix <- matrix(c(1, 2, 2, 3, 3, 3), ncol = 2)
96 | #' response_prop(data_matrix, n_levels = 3)
97 | #' @export
98 | response_prop <- function(data, n_levels) {
99 | if (is.vector(data)) {
100 | tab <- pad_levels(prop.table(table(data)), n_levels)
101 | } else {
102 | tab <- t(apply(data, 2, function(x_col) {
103 | pad_levels(prop.table(table(x_col)), n_levels)
104 | }))
105 | dimnames(tab) <- list(Item = rownames(tab), Response = colnames(tab))
106 | }
107 | return(tab)
108 | }
109 |
110 | #' Pad Missing Levels
111 | #'
112 | #' Helper function that takes a vector of proportions or probabilities
113 | #' across possible responses and pads the missing levels with zeros up
114 | #' to the specified number of response categories.
115 | #'
116 | #' @param pr proportions or probabilities across possible responses.
117 | #' @param n_levels number of response categories.
118 | #' @return A named vector of proportions across all possible responses.
119 | #' @examples
120 | #' pr <- c("2" = 0.25, "3" = 0.25, "4" = 0.50)
121 | #' pad_levels(pr, 5)
122 | #' @noRd
123 | pad_levels <- function(pr, n_levels) {
124 | padded_pr <- vapply(seq_len(n_levels), function(k) {
125 | ifelse(as.character(k) %in% names(pr), pr[as.character(k)], 0)
126 | }, numeric(1))
127 | names(padded_pr) <- as.character(seq_len(n_levels))
128 | return(padded_pr)
129 | }
130 |
131 | #' Validate Skewness
132 | #'
133 | #' Checks if the skewness parameter is within the acceptable range.
134 | #'
135 | #' @param skew numeric. Skewness parameter.
136 | #' @noRd
137 | validate_skewness <- function(skew) {
138 | if (skew > 0.95 || skew < -0.95) {
139 | stop("The value of skewness must be in the range -0.95 to 0.95:
140 | `skew >= -0.95` and `skew <= 0.95`.")
141 | }
142 | }
143 |
144 | #' Check Package Installation
145 | #'
146 | #' Checks if a package is installed, and stops with an error message if not.
147 | #'
148 | #' @param pkg character. The name of the package.
149 | #' @noRd
150 | check_package <- function(pkg) {
151 | if (!requireNamespace(pkg, quietly = TRUE)) {
152 | stop(sprintf(
153 | 'Package "%s" must be installed.
154 | Please run:\n\n\tinstall.packages("%s")\n\n',
155 | pkg, pkg
156 | ), call. = FALSE)
157 | }
158 | }
159 |
160 | #' Density of Skew Normal Distribution
161 | #'
162 | #' Computes the probability density function of a skew normal distribution.
163 | #'
164 | #' @param x numeric. Variable.
165 | #' @param xi numeric. Location parameter.
166 | #' @param omega numeric. Scale parameter.
167 | #' @param alpha numeric. Shape parameter.
168 | #' @return numeric. Density at x.
169 | #' @seealso [sn::dsn()]
170 | #' @noRd
171 | density_sn <- function(x, xi = 0, omega = 1, alpha = 0) {
172 | return(2 / omega * stats::dnorm((x - xi) / omega) *
173 | stats::pnorm(alpha * (x - xi) / omega))
174 | }
175 |
176 | #' Convert Centered Parameters
177 | #'
178 | #' Converts centered parameters to direct parameters used in the
179 | #' skew normal density.
180 | #'
181 | #' @param cp numeric vector. Centered parameters c(mu, sd, skew).
182 | #' @return numeric vector. Direct parameters c(xi, omega, alpha).
183 | #' @seealso [sn::cp2dp]
184 | #' @noRd
185 | convert_params <- function(cp) {
186 | mu <- cp[1]
187 | sd <- cp[2]
188 | skew <- cp[3]
189 |
190 | b <- sqrt(2 / pi)
191 | r <- sign(skew) * (2 * abs(skew) / (4 - pi))^(1 / 3)
192 | delta <- r / (b * sqrt(1 + r^2))
193 |
194 | mu_z <- b * delta
195 | sd_z <- sqrt(1 - mu_z^2)
196 |
197 | omega <- sd / sd_z
198 | xi <- mu - omega * mu_z
199 | alpha <- delta / sqrt(1 - delta^2)
200 | dp <- as.numeric(c(xi, omega, alpha))
201 | names(dp) <- c("xi", "omega", "alpha")
202 | return(dp)
203 | }
204 |
205 | #' Mean of Skew Normal Distribution
206 | #'
207 | #' Computes the mean of a skew normal distribution.
208 | #'
209 | #' @param alpha numeric. Shape parameter.
210 | #' @return numeric. Mean of the skew normal distribution.
211 | #' @noRd
212 | mean_skew_normal <- function(alpha) {
213 | return(delta_skew_normal(alpha) * sqrt(2 / pi))
214 | }
215 |
216 | #' Delta Parameter of Skew Normal Distribution
217 | #'
218 | #' Computes the delta parameter of a skew normal distribution.
219 | #'
220 | #' @param alpha numeric. Shape parameter.
221 | #' @return numeric. Delta parameter.
222 | #' @noRd
223 | delta_skew_normal <- function(alpha) {
224 | return(alpha / (sqrt(1 + alpha^2)))
225 | }
226 |
227 | #' Variance of Skew Normal Distribution
228 | #'
229 | #' Computes the variance of a skew normal distribution.
230 | #'
231 | #' @param alpha numeric. Shape parameter.
232 | #' @return numeric. Variance of the skew normal distribution.
233 | #' @noRd
234 | var_skew_normal <- function(alpha) {
235 | return(1 - 2 * (delta_skew_normal(alpha)^2) / pi)
236 | }
237 |
238 | #' Scale and Shift
239 | #'
240 | #' Scales and shifts a variable based on direct parameters.
241 | #'
242 | #' @param x numeric. Variable.
243 | #' @param dp numeric vector. Direct parameters xi, omega, alpha.
244 | #' @return numeric. Shifted and scaled variable.
245 | #' @noRd
246 | scale_and_shift <- function(x, dp) {
247 | xi <- dp[["xi"]]
248 | omega <- dp[["omega"]]
249 | alpha <- dp[["alpha"]]
250 | mean_sn <- mean_skew_normal(alpha)
251 | return((x - mean_sn) / omega + mean_sn - xi / omega)
252 | }
253 |
254 | #' Generate Random Correlation Matrix
255 | #'
256 | #' Generates a random p x p correlation matrix.
257 | #'
258 | #' @param p integer. The size of the correlation matrix.
259 | #' @return numeric matrix. A random p x p correlation matrix.
260 | #' @noRd
261 | generate_rand_corr_matrix <- function(p) {
262 | corr <- drop(stats::rWishart(1, p, diag(p)))
263 | corr <- stats::cov2cor(corr)
264 | return(corr)
265 | }
266 |
267 | #' Convert Correlation Matrix to Covariance Matrix
268 | #'
269 | #' Converts a correlation matrix to a covariance matrix.
270 | #'
271 | #' @param corr numeric matrix. Correlation matrix.
272 | #' @param s numeric vector. Standard deviations.
273 | #' @return numeric matrix. Covariance matrix.
274 | #' @noRd
275 | cor2cov <- function(corr, s) {
276 | return(diag(s) %*% corr %*% diag(s))
277 | }
278 |
279 | #' Generate Random Centered Parameters
280 | #'
281 | #' Generates random centered parameters for mu, sd, and skewness.
282 | #'
283 | #' @return numeric vector. Random centered parameters c(mu, sd, gamma1).
284 | #' @noRd
285 | generate_random_cp <- function() {
286 | mu <- stats::rnorm(1, 0, 1)
287 | sd <- stats::runif(1, 0.1, 2)
288 | gamma1 <- stats::runif(1, -0.95, 0.95)
289 | cp <- c("mu" = mu, "sd" = sd, "gamma1" = gamma1)
290 | return(cp)
291 | }
292 |
293 | #' Percentify
294 | #'
295 | #' Converts a numeric value to a percentage string.
296 | #'
297 | #' @param x numeric. The value to convert.
298 | #' @param digits integer. Number of digits to round to. Defaults to 0.
299 | #' @return character. Percentage string.
300 | #' @noRd
301 | percentify <- function(x, digits = 0) {
302 | percentage <- formatC(x * 100, format = "f", digits = digits)
303 | return(paste0(percentage, "%"))
304 | }
305 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r setup, include = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-",
12 | dpi=250,
13 | out.width = "70%")
14 | ```
15 |
16 | # latent2likert
17 |
18 |
19 | [](https://github.com/markolalovic/latent2likert/actions/workflows/R-CMD-check.yaml)
20 | [](https://app.codecov.io/gh/markolalovic/latent2likert)
21 | [](https://CRAN.R-project.org/package=latent2likert)
22 | [](https://cranlogs.r-pkg.org/badges/latent2likert)
23 |
24 |
25 | ## Overview
26 |
27 | The **latent2likert** package is designed to effectively simulate the discretization process inherent to Likert scales while minimizing distortion. It converts continuous latent variables into ordinal categories to generate Likert scale item responses. This is particularly useful for accurately modeling and analyzing survey data that use Likert scales, especially when applying statistical techniques that require metric data.
28 |
29 |
30 | ## Installation
31 |
32 | You can install the released version from CRAN:
33 |
34 | ```{r, eval = FALSE}
35 | install.packages("latent2likert")
36 | ```
37 |
38 | Or the latest development version from GitHub:
39 | ``` r
40 | devtools::install_github("markolalovic/latent2likert")
41 | ```
42 |
43 | ## Dependencies
44 |
45 | To keep the package lightweight, **latent2likert** only imports [mvtnorm](https://cran.r-project.org/package=mvtnorm), along with the standard R packages stats and graphics, which are typically included in R releases. An additional suggested dependency is the package [sn](https://cran.r-project.org/package=sn), which is required only for generating random responses from correlated Likert items based on a multivariate skew normal distribution. The package prompts the user to install this dependency during interactive sessions if needed.
46 |
47 | ## Features
48 |
49 | * `rlikert`: Generates random responses to Likert scale questions based on specified means and standard deviations of latent variables, with optional settings for skewness and correlations.
50 | * `estimate_params`: Estimates latent parameters from existing survey data.
51 |
52 |
53 | ## Structure
54 |
55 |
57 |
125 |
128 |
129 |
130 | Note that, depending on the value of the skewness parameter, the normal latent distribution is used if skew = 0, otherwise the skew normal distribution is used. The value of skewness is restricted to the range -0.95 to 0.95, that is
131 |
132 | > `skew >= -0.95` and `skew <= 0.95`.
133 |
134 |
135 | ## Further Reading
136 |
137 | * For more detailed information and practical examples, please refer to the package [vignette](https://lalovic.io/latent2likert/articles/using_latent2likert.html).
138 | * The implemented algorithms are described in the functions [reference](https://lalovic.io/latent2likert/reference/index.html).
139 |
140 | ## Related R Packages
141 |
142 | Alternatively, you can simulate Likert item responses using the `draw_likert` function from the [fabricatr](https://CRAN.R-project.org/package=fabricatr) package. This function recodes a latent variable into a Likert response variable by specifying intervals that subdivide the continuous range. The **latent2likert** package, however, offers an advantage by automatically calculating optimal intervals that minimize distortion between the latent variable and the Likert response variable for both normal and skew normal latent distributions, eliminating the need to manually specify the intervals.
143 |
144 | There are also several alternative approaches that do not rely on latent distributions. One method involves directly defining a discrete probability distribution and sampling from it using the `sample` function in R or the `likert` function from the [wakefield](https://CRAN.R-project.org/package=wakefield) package. Another approach is to specify the means, standard deviations, and correlations among Likert response variables. For this, you can use [LikertMakeR](https://CRAN.R-project.org/package=LikertMakeR) or [SimCorMultRes](https://CRAN.R-project.org/package=SimCorMultRes) to generate correlated multinomial responses.
145 |
146 | Additionally, you can define a data generating process. For those familiar with item response theory, the [mirt](https://CRAN.R-project.org/package=mirt) package allows users to specify discrimination and difficulty parameters for each response category.
147 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # latent2likert
5 |
6 |
7 |
8 | [](https://github.com/markolalovic/latent2likert/actions/workflows/R-CMD-check.yaml)
9 | [](https://app.codecov.io/gh/markolalovic/latent2likert)
10 | [](https://CRAN.R-project.org/package=latent2likert)
12 | [](https://cranlogs.r-pkg.org/badges/latent2likert)
13 |
14 |
15 | ## Overview
16 |
17 | The **latent2likert** package is designed to effectively simulate the
18 | discretization process inherent to Likert scales while minimizing
19 | distortion. It converts continuous latent variables into ordinal
20 | categories to generate Likert scale item responses. This is particularly
21 | useful for accurately modeling and analyzing survey data that use Likert
22 | scales, especially when applying statistical techniques that require
23 | metric data.
24 |
25 | ## Installation
26 |
27 | You can install the released version from CRAN:
28 |
29 | ``` r
30 | install.packages("latent2likert")
31 | ```
32 |
33 | Or the latest development version from GitHub:
34 |
35 | ``` r
36 | devtools::install_github("markolalovic/latent2likert")
37 | ```
38 |
39 | ## Dependencies
40 |
41 | To keep the package lightweight, **latent2likert** only imports
42 | [mvtnorm](https://cran.r-project.org/package=mvtnorm), along with the
43 | standard R packages stats and graphics, which are typically included in
44 | R releases. An additional suggested dependency is the package
45 | [sn](https://cran.r-project.org/package=sn), which is required only for
46 | generating random responses from correlated Likert items based on a
47 | multivariate skew normal distribution. The package prompts the user to
48 | install this dependency during interactive sessions if needed.
49 |
50 | ## Features
51 |
52 | - `rlikert`: Generates random responses to Likert scale questions based
53 | on specified means and standard deviations of latent variables, with
54 | optional settings for skewness and correlations.
55 | - `estimate_params`: Estimates latent parameters from existing survey
56 | data.
57 |
58 | ## Structure
59 |
60 |
63 |
160 |
169 |
170 | Note that, depending on the value of the skewness parameter, the normal
171 | latent distribution is used if skew = 0, otherwise the skew normal
172 | distribution is used. The value of skewness is restricted to the range
173 | -0.95 to 0.95, that is
174 |
175 | > `skew >= -0.95` and `skew <= 0.95`.
176 |
177 | ## Further Reading
178 |
179 | - For more detailed information and practical examples, please refer to
180 | the package
181 | [vignette](https://lalovic.io/latent2likert/articles/using_latent2likert.html).
182 | - The implemented algorithms are described in the functions
183 | [reference](https://lalovic.io/latent2likert/reference/index.html).
184 |
185 | ## Related R Packages
186 |
187 | Alternatively, you can simulate Likert item responses using the
188 | `draw_likert` function from the
189 | [fabricatr](https://CRAN.R-project.org/package=fabricatr) package. This
190 | function recodes a latent variable into a Likert response variable by
191 | specifying intervals that subdivide the continuous range. The
192 | **latent2likert** package, however, offers an advantage by automatically
193 | calculating optimal intervals that minimize distortion between the
194 | latent variable and the Likert response variable for both normal and
195 | skew normal latent distributions, eliminating the need to manually
196 | specify the intervals.
197 |
198 | There are also several alternative approaches that do not rely on latent
199 | distributions. One method involves directly defining a discrete
200 | probability distribution and sampling from it using the `sample`
201 | function in R or the `likert` function from the
202 | [wakefield](https://CRAN.R-project.org/package=wakefield) package.
203 | Another approach is to specify the means, standard deviations, and
204 | correlations among Likert response variables. For this, you can use
205 | [LikertMakeR](https://CRAN.R-project.org/package=LikertMakeR) or
206 | [SimCorMultRes](https://CRAN.R-project.org/package=SimCorMultRes) to
207 | generate correlated multinomial responses.
208 |
209 | Additionally, you can define a data generating process. For those
210 | familiar with item response theory, the
211 | [mirt](https://CRAN.R-project.org/package=mirt) package allows users to
212 | specify discrimination and difficulty parameters for each response
213 | category.
214 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://markolalovic.github.io/latent2likert/
2 | template:
3 | bootstrap: 5
4 |
--------------------------------------------------------------------------------
/codemeta.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
3 | "@type": "SoftwareSourceCode",
4 | "identifier": "latent2likert",
5 | "description": "Effectively simulates the discretization process inherent to Likert scales while minimizing distortion. It converts continuous latent variables into ordinal categories to generate Likert scale item responses. Particularly useful for accurately modeling and analyzing survey data that use Likert scales, especially when applying statistical techniques that require metric data.",
6 | "name": "latent2likert: Converting Latent Variables Into Likert Scale Responses",
7 | "codeRepository": "https://github.com/markolalovic/latent2likert",
8 | "issueTracker": "https://github.com/markolalovic/latent2likert/issues/",
9 | "license": "https://spdx.org/licenses/MIT",
10 | "version": "1.2.1",
11 | "programmingLanguage": {
12 | "@type": "ComputerLanguage",
13 | "name": "R",
14 | "url": "https://r-project.org"
15 | },
16 | "runtimePlatform": "R version 4.4.0 (2024-04-24)",
17 | "author": [
18 | {
19 | "@type": "Person",
20 | "givenName": "Marko",
21 | "familyName": "Lalovic",
22 | "email": "marko@lalovic.io"
23 | }
24 | ],
25 | "maintainer": [
26 | {
27 | "@type": "Person",
28 | "givenName": "Marko",
29 | "familyName": "Lalovic",
30 | "email": "marko@lalovic.io"
31 | }
32 | ],
33 | "softwareSuggestions": [
34 | {
35 | "@type": "SoftwareApplication",
36 | "identifier": "sn",
37 | "name": "sn",
38 | "provider": {
39 | "@id": "https://cran.r-project.org",
40 | "@type": "Organization",
41 | "name": "Comprehensive R Archive Network (CRAN)",
42 | "url": "https://cran.r-project.org"
43 | },
44 | "sameAs": "https://CRAN.R-project.org/package=sn"
45 | },
46 | {
47 | "@type": "SoftwareApplication",
48 | "identifier": "cowplot",
49 | "name": "cowplot",
50 | "provider": {
51 | "@id": "https://cran.r-project.org",
52 | "@type": "Organization",
53 | "name": "Comprehensive R Archive Network (CRAN)",
54 | "url": "https://cran.r-project.org"
55 | },
56 | "sameAs": "https://CRAN.R-project.org/package=cowplot"
57 | },
58 | {
59 | "@type": "SoftwareApplication",
60 | "identifier": "GGally",
61 | "name": "GGally",
62 | "provider": {
63 | "@id": "https://cran.r-project.org",
64 | "@type": "Organization",
65 | "name": "Comprehensive R Archive Network (CRAN)",
66 | "url": "https://cran.r-project.org"
67 | },
68 | "sameAs": "https://CRAN.R-project.org/package=GGally"
69 | },
70 | {
71 | "@type": "SoftwareApplication",
72 | "identifier": "ggh4x",
73 | "name": "ggh4x",
74 | "provider": {
75 | "@id": "https://cran.r-project.org",
76 | "@type": "Organization",
77 | "name": "Comprehensive R Archive Network (CRAN)",
78 | "url": "https://cran.r-project.org"
79 | },
80 | "sameAs": "https://CRAN.R-project.org/package=ggh4x"
81 | },
82 | {
83 | "@type": "SoftwareApplication",
84 | "identifier": "gridExtra",
85 | "name": "gridExtra",
86 | "provider": {
87 | "@id": "https://cran.r-project.org",
88 | "@type": "Organization",
89 | "name": "Comprehensive R Archive Network (CRAN)",
90 | "url": "https://cran.r-project.org"
91 | },
92 | "sameAs": "https://CRAN.R-project.org/package=gridExtra"
93 | },
94 | {
95 | "@type": "SoftwareApplication",
96 | "identifier": "knitr",
97 | "name": "knitr",
98 | "provider": {
99 | "@id": "https://cran.r-project.org",
100 | "@type": "Organization",
101 | "name": "Comprehensive R Archive Network (CRAN)",
102 | "url": "https://cran.r-project.org"
103 | },
104 | "sameAs": "https://CRAN.R-project.org/package=knitr"
105 | },
106 | {
107 | "@type": "SoftwareApplication",
108 | "identifier": "psych",
109 | "name": "psych",
110 | "provider": {
111 | "@id": "https://cran.r-project.org",
112 | "@type": "Organization",
113 | "name": "Comprehensive R Archive Network (CRAN)",
114 | "url": "https://cran.r-project.org"
115 | },
116 | "sameAs": "https://CRAN.R-project.org/package=psych"
117 | },
118 | {
119 | "@type": "SoftwareApplication",
120 | "identifier": "RColorBrewer",
121 | "name": "RColorBrewer",
122 | "provider": {
123 | "@id": "https://cran.r-project.org",
124 | "@type": "Organization",
125 | "name": "Comprehensive R Archive Network (CRAN)",
126 | "url": "https://cran.r-project.org"
127 | },
128 | "sameAs": "https://CRAN.R-project.org/package=RColorBrewer"
129 | },
130 | {
131 | "@type": "SoftwareApplication",
132 | "identifier": "rmarkdown",
133 | "name": "rmarkdown",
134 | "provider": {
135 | "@id": "https://cran.r-project.org",
136 | "@type": "Organization",
137 | "name": "Comprehensive R Archive Network (CRAN)",
138 | "url": "https://cran.r-project.org"
139 | },
140 | "sameAs": "https://CRAN.R-project.org/package=rmarkdown"
141 | },
142 | {
143 | "@type": "SoftwareApplication",
144 | "identifier": "testthat",
145 | "name": "testthat",
146 | "version": ">= 3.0.0",
147 | "provider": {
148 | "@id": "https://cran.r-project.org",
149 | "@type": "Organization",
150 | "name": "Comprehensive R Archive Network (CRAN)",
151 | "url": "https://cran.r-project.org"
152 | },
153 | "sameAs": "https://CRAN.R-project.org/package=testthat"
154 | }
155 | ],
156 | "softwareRequirements": {
157 | "1": {
158 | "@type": "SoftwareApplication",
159 | "identifier": "R",
160 | "name": "R",
161 | "version": ">= 3.5"
162 | },
163 | "2": {
164 | "@type": "SoftwareApplication",
165 | "identifier": "graphics",
166 | "name": "graphics"
167 | },
168 | "3": {
169 | "@type": "SoftwareApplication",
170 | "identifier": "mvtnorm",
171 | "name": "mvtnorm",
172 | "provider": {
173 | "@id": "https://cran.r-project.org",
174 | "@type": "Organization",
175 | "name": "Comprehensive R Archive Network (CRAN)",
176 | "url": "https://cran.r-project.org"
177 | },
178 | "sameAs": "https://CRAN.R-project.org/package=mvtnorm"
179 | },
180 | "4": {
181 | "@type": "SoftwareApplication",
182 | "identifier": "stats",
183 | "name": "stats"
184 | },
185 | "SystemRequirements": null
186 | },
187 | "fileSize": "2130.159KB",
188 | "relatedLink": "https://lalovic.io/latent2likert/",
189 | "releaseNotes": "https://github.com/markolalovic/latent2likert/blob/master/NEWS.md",
190 | "readme": "https://github.com/markolalovic/latent2likert/blob/main/README.md",
191 | "contIntegration": ["https://github.com/markolalovic/latent2likert/actions/workflows/R-CMD-check.yaml", "https://app.codecov.io/gh/markolalovic/latent2likert?branch=main"],
192 | "keywords": ["simulation", "likert", "survey-data", "likert-data"]
193 | }
194 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 |
3 | 0 errors | 0 warnings | 1 note
4 |
5 | * This is a new release.
6 |
--------------------------------------------------------------------------------
/data/part_bfi.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/data/part_bfi.rda
--------------------------------------------------------------------------------
/dev/config_attachment.yaml:
--------------------------------------------------------------------------------
1 | path.n: NAMESPACE
2 | path.d: DESCRIPTION
3 | dir.r: R
4 | dir.v: vignettes
5 | dir.t: tests
6 | extra.suggests: ~
7 | pkg_ignore: ~
8 | document: yes
9 | normalize: yes
10 | inside_rmd: no
11 | must.exist: yes
12 | check_if_suggests_is_installed: yes
13 |
--------------------------------------------------------------------------------
/dev/dev.R:
--------------------------------------------------------------------------------
1 | # Development
2 | devtools::load_all("/Users/markolalovic/dev/latent2likert/")
3 |
4 | covr::codecov(
5 | quiet = FALSE,
6 | clean = FALSE,
7 | token = Sys.getenv("CODECOV_TOKEN")
8 | )
9 |
--------------------------------------------------------------------------------
/dev/doc.R:
--------------------------------------------------------------------------------
1 | # Documentation
2 | devtools::document()
3 | devtools::build_manual()
4 |
--------------------------------------------------------------------------------
/dev/prepare_data.R:
--------------------------------------------------------------------------------
1 | # Prepare survey data
2 | library(psych)
3 |
4 | # Take only the first 5 items and gender
5 | vars <- c("A1", "A2", "A3", "A4", "A5")
6 | part_bfi <- bfi[, c(vars, "gender")]
7 |
8 | # Zero-one coding male = 0, female = 1
9 | part_bfi$gender <- as.integer(part_bfi$gender - 1)
10 |
11 | # Impute the missing values with modes
12 | for (var in vars) {
13 | mode_value <- names(sort(-table(part_bfi[, var])))[1]
14 | part_bfi[, var][is.na(part_bfi[, var])] <- as.integer(mode_value)
15 |
16 | stopifnot(
17 | class(part_bfi[, var]) == "integer",
18 | sum(is.na(part_bfi[, var])) == 0
19 | )
20 | }
21 | str(part_bfi)
22 |
23 | # Save the data set
24 | usethis::use_data(part_bfi)
25 |
--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markolalovic/latent2likert/90c53cff3f6a722f6ee2c8ba5574edec50cb7428/docs/.nojekyll
--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
YEAR: 2024 54 | COPYRIGHT HOLDER: Marko Lalovic 55 |56 | 57 |
Copyright (c) 2024 Marko Lalovic
56 |Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
57 |The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
58 |THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
59 |rlikert()
function.estimate_params()
function for estimating parameters from existing survey data.responsesR
to latent2likert
to better reflect its purpose of converting latent variables into Likert scale responses.CRAN release: 2024-06-24
84 |R/estimation.R
53 | estimate_mean_and_sd.Rd
Estimates the mean and standard deviation of a latent variable given the 58 | discrete probabilities of its observed Likert scale responses.
59 |named vector of probabilities for each response category.
number of response categories for the Likert scale item.
marginal skewness of the latent variable, defaults to 0.
tolerance for convergence, defaults to 1e-6.
maximum number of iterations, defaults to 100.
A numeric vector with two elements: the estimated mean and 93 | standard deviation.
94 |This function uses an iterative algorithm to solve the system of non-linear 98 | equations that describe the relationship between the continuous latent 99 | variable and the observed discrete probability distribution of Likert scale 100 | responses. The algorithm ensures stability by reparameterizing the system 101 | and applying constraints to prevent stepping into invalid regions.
102 |prob <- c("1" = 0.313, "2" = 0.579, "3" = 0.105, "4" = 0.003)
107 | # returns estimates that are close to the actual mean and sd: c(-1, 0.5)
108 | estimate_mean_and_sd(prob, 5)
109 | #> [1] -1.0025695 0.5004155
110 |
111 |
discretize_density()
66 | estimate_mean_and_sd()
71 | estimate_params()
76 | part_bfi
81 | plot_likert_transform()
86 | response_prop()
91 | rlikert()
96 | simulate_likert()
101 | Plots the densities of latent variables and the corresponding 58 | transformed discrete probability distributions.
59 |number of Likert scale items (questions).
number of response categories for each Likert item. 74 | Integer or vector of integers.
means of the latent variables. 79 | Numeric or vector of numerics. Defaults to 0.
standard deviations of the latent variables. 84 | Numeric or vector of numerics. Defaults to 1.
marginal skewness of the latent variables. 89 | Numeric or vector of numerics. Defaults to 0.
plot_likert_transform(n_items = 3, n_levels = c(3, 4, 5))
102 |
103 | plot_likert_transform(n_items = 3, n_levels = 5, mean = c(0, 1, 2))
104 |
105 | plot_likert_transform(n_items = 3, n_levels = 5, sd = c(0.8, 1, 1.2))
106 |
107 | plot_likert_transform(n_items = 3, n_levels = 5, skew = c(-0.5, 0, 0.5))
108 |
109 |
Returns a table of proportions for each possible response category.
56 |numeric vector or matrix of responses.
number of response categories.
data <- c(1, 2, 2, 3, 3, 3)
83 | response_prop(data, n_levels = 3)
84 | #> 1 2 3
85 | #> 0.1666667 0.3333333 0.5000000
86 |
87 | data_matrix <- matrix(c(1, 2, 2, 3, 3, 3), ncol = 2)
88 | response_prop(data_matrix, n_levels = 3)
89 | #> Response
90 | #> Item 1 2 3
91 | #> [1,] 0.3333333 0.6666667 0
92 | #> [2,] 0.0000000 0.0000000 1
93 |