├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── coef.R
    ├── data.R
    ├── hgrm.R
    ├── hgrm2.R
    ├── hgrmDIF.R
    ├── hltm.R
    ├── hltm2.R
    ├── latent_scores.R
    ├── print.R
    ├── summary.R
    ├── sysdata.rda
    ├── utils.R
    ├── utils_grm.R
    └── utils_ltm.R
├── README.Rmd
├── README.md
├── cran-comments.md
├── data
    └── nes_econ2008.rda
├── hIRT.Rproj
└── man
    ├── coef_item.Rd
    ├── figures
        └── README-pressure-1.png
    ├── hgrm.Rd
    ├── hgrm2.Rd
    ├── hgrmDIF.Rd
    ├── hltm.Rd
    ├── hltm2.Rd
    ├── latent_scores.Rd
    ├── nes_econ2008.Rd
    ├── print.hIRT.Rd
    └── summary.hIRT.Rd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^data-raw$
4 | ^\.travis\.yml$
5 | ^README\.Rmd$
6 | ^README-.*\.png$
7 | ^cran-comments\.md$
8 | ^CRAN-RELEASE$
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | .Rproj.user/**
 3 | .Rhistory
 4 | .RData
 5 | .Ruserdata
 6 | data-raw
 7 | CRAN-RELEASE
 8 | cran-comments
 9 | inst/doc
10 | hIRT.Rproj
11 | .Rproj.user
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 | 
3 | language: R
4 | sudo: false
5 | cache: packages
6 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: hIRT
 2 | Type: Package
 3 | Title: Hierarchical Item Response Theory Models
 4 | Version: 0.4.0
 5 | Authors@R: person("Xiang", "Zhou", email = "xiang_zhou@fas.harvard.edu",
 6 |   role = c("aut", "cre"))
 7 | Description: Implementation of a class of hierarchical item response
 8 |   theory (IRT) models where both the mean and the variance of latent preferences
 9 |   (ability parameters) may depend on observed covariates. The current
10 |   implementation includes both the two-parameter latent trait model for binary data and the
11 |   graded response model for ordinal data. Both are fitted via the Expectation-Maximization (EM)
12 |   algorithm. Asymptotic standard errors are derived from the observed information
13 |   matrix.
14 | Depends:
15 |     R (>= 3.4.0),
16 |     stats
17 | Imports:
18 |     pryr (>= 0.1.2),
19 |     rms (>= 5.1-1),
20 |     ltm (>= 1.1-1),
21 |     Matrix (>= 1.2-10)
22 | Suggests:
23 |     ggplot2
24 | License: GPL (>= 3)
25 | Encoding: UTF-8
26 | LazyData: true
27 | RoxygenNote: 7.1.1
28 | URL: http://github.com/xiangzhou09/hIRT
29 | BugReports: http://github.com/xiangzhou09/hIRT
30 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,hIRT)
 4 | S3method(print,summary_hIRT)
 5 | S3method(summary,hIRT)
 6 | export(coef_item)
 7 | export(coef_mean)
 8 | export(coef_var)
 9 | export(hgrm)
10 | export(hgrm2)
11 | export(hgrmDIF)
12 | export(hltm)
13 | export(hltm2)
14 | export(latent_scores)
15 | import(stats)
16 | importFrom(ltm,grm)
17 | importFrom(ltm,ltm)
18 | importFrom(pryr,compose)
19 | importFrom(pryr,partial)
20 | importFrom(rms,lrm.fit)
21 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # hIRT 0.3.1
2 | 
3 | * fixed bug in README.md
4 | 
5 | 
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/R/coef.R:
--------------------------------------------------------------------------------
 1 | #' Parameter Estimates from Hierarchical IRT Models.
 2 | #'
 3 | #' Parameter estimates from either \code{hltm} or \code{hgrm} or \code{hgrmDIF} models. \code{code_item}
 4 | #' reports estimates of item parameters. \code{coef_mean} reports results for the mean equation.
 5 | #' \code{coef_var} reports results for the variance equation.
 6 | #'
 7 | #' @inheritParams print.hIRT
 8 | #' @param by_item Logical. Should item parameters be stored item by item
 9 | #'   (if \code{TRUE}) or put together in a data frame (if \code{FALSE})?
10 | #'
11 | #' @return Parameter estimates, standard errors, z values, and p values
12 | #'   organized as a data frame (if \code{by_item = TRUE}) or a list (if \code{
13 | #'   by_item = FALSE}).
14 | #'
15 | #' @export
16 | #' @examples
17 | #' y <- nes_econ2008[, -(1:3)]
18 | #' x <- model.matrix( ~ party * educ, nes_econ2008)
19 | #' z <- model.matrix( ~ party, nes_econ2008)
20 | #' nes_m1 <- hgrm(y, x, z)
21 | #' coef_item(nes_m1)
22 | coef_item <- function(x, by_item = TRUE, digits = 3) {
23 | 
24 |   if(inherits(x, "hgrm")){
25 |     H <- unname(x[["H"]])
26 |     xitem <- x[["coefficients"]][1:sum(H), , drop = FALSE]
27 |     if (by_item == FALSE) return(round(xitem, digits))
28 |     index <- findInterval(1:sum(H), c(1, cumsum(H)[-length(H)] + 1))
29 |     out <- split(xitem, index)
30 |     for (i in seq_along(out)) {
31 |       tmp <- strsplit(rownames(out[[i]]), "\\.")
32 |       rownames(out[[i]]) <- vapply(tmp, function(x) x[length(x)], FUN.VALUE = character(1L))
33 |       out[[i]] <- round(out[[i]], digits)
34 |     }
35 |   } else if (inherits(x, "hltm")){
36 |     J <- x[["J"]]
37 |     xitem <- x[["coefficients"]][1:(2 * J), , drop = FALSE]
38 |     if (by_item == FALSE)
39 |       return(round(xitem, digits))
40 |     out <- split(xitem, rep(1:J, each = 2))
41 |     for (i in 1:J) rownames(out[[i]]) <- c("Diff", "Dscrmn")
42 |   } else if (inherits(x, "hgrmDIF")){
43 |     H <- unname(x[["H"]])
44 |     p <- x[["p"]]
45 |     ncoefs <- vapply(x[["coef_item"]], length, integer(1L))
46 |     sH <- sum(ncoefs)
47 |     xitem <- x[["coefficients"]][1:sH, , drop = FALSE]
48 |     if (by_item == FALSE) return(round(xitem, digits))
49 |     index <- findInterval(1:sH, c(1, cumsum(ncoefs[-length(ncoefs)]) + 1))
50 |     out <- split(xitem, index)
51 |     for (i in seq_along(out)) {
52 |       tmp <- strsplit(rownames(out[[i]]), "\\.")
53 |       rownames(out[[i]]) <- vapply(tmp, function(x) x[length(x)], FUN.VALUE = character(1L))
54 |       out[[i]] <- round(out[[i]], digits)
55 |     }
56 |   } else stop("Use only with 'hgrm' or 'hltm' or `hgrmDIF` objects.\n")
57 | 
58 |   stats::setNames(out, names(x[["H"]]))
59 | }
60 | 
61 | #' @inheritParams print.hIRT
62 | #'
63 | #' @export
64 | #' @rdname coef_item
65 | #' @examples
66 | #' coef_mean(nes_m1)
67 | coef_mean <- function(x, digits = 3) {
68 | 
69 |   # if (x[["p"]] < 2) return(NULL)
70 |   sH <- if (inherits(x, "hltm"))
71 |     2 * x[["J"]] else if (inherits(x, "hgrm"))
72 |       sum(x[["H"]]) else if (inherits(x, "hgrmDIF"))
73 |         sum(vapply(x[["coef_item"]], length, integer(1L))) else{
74 |           stop("Use only with 'hgrm' or 'hltm' or `hgrmDIF` objects.\n")
75 |         }
76 |   gamma_indices <- (sH + 1):(sH + x[["p"]])
77 |   round(x[["coefficients"]][gamma_indices, , drop = FALSE], digits = digits)
78 | }
79 | 
80 | #' @inheritParams print.hIRT
81 | #'
82 | #' @export
83 | #' @rdname coef_item
84 | #' @examples
85 | #' coef_var(nes_m1)
86 | coef_var <- function(x, digits = 3) {
87 | 
88 |   # if (x[["q"]] < 2) return(NULL)
89 |   sH <- if (inherits(x, "hltm"))
90 |     2 * x[["J"]] else if (inherits(x, "hgrm"))
91 |       sum(x[["H"]]) else if (inherits(x, "hgrmDIF"))
92 |         sum(vapply(x[["coef_item"]], length, integer(1L))) else{
93 |           stop("Use only with 'hgrm' or 'hltm' or `hgrmDIF` objects.\n")
94 |         }
95 |   lambda_indices <- (sH + x[["p"]] + 1):(sH + x[["p"]] + x[["q"]])
96 |   round(x[["coefficients"]][lambda_indices, , drop = FALSE], digits = digits)
97 | }
98 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Public Attitudes on Economic Issues in ANES 2008
 2 | #'
 3 | #' A dataset containing gender, party ID, education, and responses to 10 survey items
 4 | #' on economic issues from the American National Election Studies, 2008.
 5 | #'
 6 | #' @format A data frame with 2268 rows and 13 variables: \describe{
 7 | #'   \item{gender}{gender. 1: male; 2: female}
 8 | #'   \item{party}{party identification: Democrat, independent, or Republican}
 9 | #'   \item{educ}{education. 1: high school or less; 2: some college or above}
10 | #'   \item{health_ins7}{Support for government or private health insurance, 7 categories}
11 | #'   \item{jobs_guar7}{Support for government guarantee jobs and income, 7 categories}
12 | #'   \item{gov_services7}{Should government reduce or increase spending on services?, 7 categories}
13 | #'   \item{FS_poor3}{Federal spending on the poor, 3 categories}
14 | #'   \item{FS_childcare3}{Federal spending on child care, 3 categories}
15 | #'   \item{FS_crime3}{Federal spending on crime, 3 categories}
16 | #'   \item{FS_publicschools3}{Federal spending on public schools, 3 categories}
17 | #'   \item{FS_welfare3}{Federal spending on welfare, 3 categories}
18 | #'   \item{FS_envir3}{Federal spending on environment, 3 categories}
19 | #'   \item{FS_socsec3}{Federal spending on Social Security, 3 categories}
20 | #'   }
21 | "nes_econ2008"
22 | 
23 | 


--------------------------------------------------------------------------------
/R/hgrm.R:
--------------------------------------------------------------------------------
  1 | #' Fitting Hierarchical Graded Response Models (for Ordinal Responses)
  2 | #'
  3 | #' \code{hgrm} fits a hierarchical graded response model in which both
  4 | #' the mean and the variance of the latent preference (ability parameter)
  5 | #' may depend on person-specific covariates (\code{x} and \code{z}).
  6 | #' Specifically, the mean is specified as a linear combination of \code{x}
  7 | #' and the log of the variance is specified as a linear combination of
  8 | #' \code{z}. Nonresponses are treated as missing at random.
  9 | #'
 10 | #' @param y A data frame or matrix of item responses.
 11 | #' @param x An optional model matrix, including the intercept term, that predicts the
 12 | #'   mean of the latent preference. If not supplied, only the intercept term is included.
 13 | #' @param z An optional model matrix, including the intercept term, that predicts the
 14 | #'   variance of the latent preference. If not supplied, only the intercept term is included.
 15 | #' @param constr The type of constraints used to identify the model: "latent_scale",
 16 | #'   or "items". The default, "latent_scale" constrains the mean of latent preferences
 17 | #'   to zero and the geometric mean of prior variance to one; "items" places constraints
 18 | #'   on item parameters instead and sets the mean of item difficulty parameters to zero
 19 | #'   and the geometric mean of the discrimination parameters to one.
 20 | #' @param beta_set The index of the item for which the discrimination parameter is
 21 | #'   restricted to be positive (or negative). It may take any integer value from
 22 | #'   1 to \code{ncol(y)}.
 23 | #' @param sign_set Logical. Should the discrimination parameter of
 24 | #'   the corresponding item (indexed by \code{beta_set}) be positive
 25 | #'   (if \code{TRUE}) or negative (if \code{FALSE})?
 26 | #' @param init A character string indicating how item parameters are initialized. It can be
 27 | #'   "naive", "glm", or "irt".
 28 | #' @param control A list of control values
 29 | #' \describe{
 30 | #'  \item{max_iter}{The maximum number of iterations of the EM algorithm.
 31 | #'    The default is 150.}
 32 | #'  \item{eps}{Tolerance parameter used to determine convergence of the
 33 | #'   EM algorithm. Specifically, iterations continue until the Euclidean
 34 | #'   distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps},
 35 | #'   where \eqn{\beta} is the vector of item discrimination parameters.
 36 | #'   \code{eps}=1e-4 by default.}
 37 | #'  \item{max_iter2}{The maximum number of iterations of the conditional
 38 | #'    maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}.
 39 | #'    The default is 15.}
 40 | #'  \item{eps2}{Tolerance parameter used to determine convergence of the
 41 | #'    conditional maximization procedures for updating \eqn{\gamma} and
 42 | #'    \eqn{\lambda}. Specifically, iterations continue until the Euclidean
 43 | #'   distance between two consecutive log likelihoods falls under \code{eps2}.
 44 | #'   \code{eps2}=1e-3 by default.}
 45 | #'  \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.}
 46 | #'  \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.}
 47 | #' }
 48 | #'
 49 | #' @return An object of class \code{hgrm}.
 50 | #'  \item{coefficients}{A data frame of parameter estimates, standard errors,
 51 | #'   z values and p values.}
 52 | #'  \item{scores}{A data frame of EAP estimates of latent preferences and
 53 | #'   their approximate standard errors.}
 54 | #'  \item{vcov}{Variance-covariance matrix of parameter estimates.}
 55 | #'  \item{log_Lik}{The log-likelihood value at convergence.}
 56 | #'  \item{N}{Number of units.}
 57 | #'  \item{J}{Number of items.}
 58 | #'  \item{H}{A vector denoting the number of response categories for each item.}
 59 | #'  \item{ylevels}{A list showing the levels of the factorized response categories.}
 60 | #'  \item{p}{The number of predictors for the mean equation.}
 61 | #'  \item{q}{The number of predictors for the variance equation.}
 62 | #'  \item{control}{List of control values.}
 63 | #'  \item{call}{The matched call.}
 64 | #' @references Zhou, Xiang. 2019. "\href{https://doi.org/10.1017/pan.2018.63}{Hierarchical Item Response Models for Analyzing Public Opinion.}" Political Analysis.
 65 | #' @importFrom rms lrm.fit
 66 | #' @importFrom pryr compose
 67 | #' @importFrom pryr partial
 68 | #' @importFrom ltm grm
 69 | #' @importFrom ltm ltm
 70 | #' @import stats
 71 | #' @export
 72 | #' @examples
 73 | #' y <- nes_econ2008[, -(1:3)]
 74 | #' x <- model.matrix( ~ party * educ, nes_econ2008)
 75 | #' z <- model.matrix( ~ party, nes_econ2008)
 76 | #' nes_m1 <- hgrm(y, x, z)
 77 | #' nes_m1
 78 | 
 79 | hgrm <- function(y, x = NULL, z = NULL, constr = c("latent_scale", "items"),
 80 |                  beta_set = 1L, sign_set = TRUE, init = c("naive", "glm", "irt"),
 81 |                  control = list()) {
 82 | 
 83 |   # match call
 84 |   cl <- match.call()
 85 | 
 86 |   # check y and convert y into data.frame if needed
 87 |   if(missing(y)) stop("`y` must be provided.")
 88 |   if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L)
 89 |     stop("'y' must be either a data.frame or a matrix with at least two columns.")
 90 |   if(is.matrix(y)) y <- as.data.frame(y)
 91 | 
 92 |   # number of units and items
 93 |   N <- nrow(y)
 94 |   J <- ncol(y)
 95 | 
 96 |   # convert each y_j into an integer vector
 97 |   y[] <- lapply(y, factor, exclude = c(NA, NaN))
 98 |   ylevels <- lapply(y, levels)
 99 |   y[] <- lapply(y, as.integer)
100 |   if (!is.na(invalid <- match(TRUE, vapply(y, invalid_grm, logical(1L)))))
101 |     stop(paste(names(y)[invalid], "does not have at least two valid responses"))
102 |   H <- vapply(y, max, integer(1L), na.rm = TRUE)
103 | 
104 |   # check x and z (x and z should contain an intercept column)
105 |   x <- x %||% as.matrix(rep(1, N))
106 |   z <- z %||% as.matrix(rep(1, N))
107 |   if (!is.matrix(x)) stop("`x` must be a matrix.")
108 |   if (!is.matrix(z)) stop("`z` must be a matrix.")
109 |   if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'")
110 |   p <- ncol(x)
111 |   q <- ncol(z)
112 |   colnames(x) <- colnames(x) %||% paste0("x", 1:p)
113 |   colnames(z) <- colnames(z) %||% paste0("x", 1:q)
114 | 
115 |   # check beta_set and sign_set
116 |   stopifnot(beta_set %in% 1:J, is.logical(sign_set))
117 | 
118 |   # check constraint
119 |   constr <- match.arg(constr)
120 |   init <- match.arg(init)
121 | 
122 |   # control parameters
123 |   con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4)
124 |   con[names(control)] <- control
125 | 
126 |   # set environments for utility functions
127 |   environment(loglik_grm) <- environment(theta_post_grm) <- environment(dummy_fun_grm) <- environment(tab2df_grm) <- environment()
128 | 
129 |   # GL points
130 |   K <- con[["K"]]
131 |   theta_ls <- con[["C"]] * GLpoints[[K]][["x"]]
132 |   qw_ls <- con[["C"]] * GLpoints[[K]][["w"]]
133 | 
134 |   # imputation
135 |   y_imp <- y
136 |   if(anyNA(y)) y_imp[] <- lapply(y, impute)
137 | 
138 |   # pca
139 |   theta_eap <- {
140 |     tmp <- princomp(y_imp, cor = TRUE)$scores[, 1]
141 |     (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE)
142 |   }
143 | 
144 |   # initialization of alpha and beta parameters
145 |   if (init == "naive"){
146 | 
147 |     alpha <- lapply(H, function(x) c(Inf, seq(1, -1, length.out = x - 1), -Inf))
148 |     beta <- vapply(y, function(y) cov(y, theta_eap, use = "complete.obs")/var(theta_eap), double(1L))
149 | 
150 |   } else if (init == "glm"){
151 | 
152 |     pseudo_lrm <- lapply(y_imp, function(y) lrm.fit(theta_eap, y)[["coefficients"]])
153 |     beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L))
154 |     alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf))
155 | 
156 |   } else {
157 | 
158 |     grm_coefs <- grm(y)[["coefficients"]]
159 |     beta <- vapply(grm_coefs, function(x) x[[length(x)]], double(1L))
160 |     alpha <- lapply(grm_coefs, function(x) c(Inf, rev(x[-length(x)]), -Inf))
161 | 
162 |   }
163 | 
164 |   # initial values of gamma and lambda
165 |   lm_opr <- tcrossprod(solve(crossprod(x)), x)
166 |   gamma <- lm_opr %*% theta_eap
167 |   lambda <- rep(0, q)
168 |   fitted_mean <- as.double(x %*% gamma)
169 |   fitted_var <- rep(1, N)
170 | 
171 |   # EM algorithm
172 |   for (iter in seq(1, con[["max_iter"]])) {
173 | 
174 |     # store previous parameters
175 |     alpha_prev <- alpha
176 |     beta_prev <- beta
177 |     gamma_prev <- gamma
178 |     lambda_prev <- lambda
179 | 
180 |     # construct w_ik
181 |     posterior <- Map(theta_post_grm, theta_ls, qw_ls)
182 |     w <- {
183 |       tmp <- matrix(unlist(posterior), N, K)
184 |       t(sweep(tmp, 1, rowSums(tmp), FUN = "/"))
185 |     }
186 | 
187 |     # maximization
188 |     pseudo_tab <- Map(dummy_fun_grm, y, H)
189 |     pseudo_y <- lapply(pseudo_tab, tab2df_grm, theta_ls = theta_ls)
190 |     pseudo_lrm <- lapply(pseudo_y, function(df) lrm_fit(df["x"], df[["y"]], weights = df[["wt"]])[["coefficients"]])
191 |     beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L))
192 |     alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf))
193 | 
194 |     # EAP and VAP estimates of latent preferences
195 |     theta_eap <- t(theta_ls %*% w)
196 |     theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2
197 | 
198 |     # variance regression
199 |     gamma <- lm_opr %*% theta_eap
200 |     r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
201 |     if (ncol(z)==1) lambda <- log(mean(r2)) else{
202 |       s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]]
203 |       loglik <- -0.5 * (log(s2) + r2/s2)
204 |       LL0 <- sum(loglik)
205 |       dLL <- 1
206 |       for (m in seq(1, con[["max_iter2"]])) {
207 |         gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]]
208 |         r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
209 |         var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))
210 |         s2 <- var_reg[["fitted.values"]]
211 |         loglik <- -0.5 * (log(s2) + r2/s2)
212 |         LL_temp <- sum(loglik)
213 |         dLL <- LL_temp - LL0
214 |         if (dLL < con[["eps2"]])
215 |           break
216 |         LL0 <- LL_temp
217 |       }
218 |       lambda <- var_reg[["coefficients"]]
219 |     }
220 | 
221 |     # location constraint
222 |     tmp <- mean(x %*% gamma)
223 |     alpha <- Map(function(x, y) x + tmp * y, alpha, beta)
224 |     gamma[[1L]] <- gamma[[1L]] - tmp
225 | 
226 |     # scale constraint
227 |     tmp <- mean(z %*% lambda)
228 |     gamma <- gamma/exp(tmp/2)
229 |     beta <- beta * exp(tmp/2)
230 |     lambda[[1L]] <- lambda[[1L]] - tmp
231 | 
232 |     # direction contraint
233 |     if (sign_set == (beta[[beta_set]] < 0)) {
234 |       gamma <- -gamma
235 |       beta <- -beta
236 |     }
237 | 
238 |     fitted_mean <- as.double(x %*% gamma)
239 |     fitted_var <- exp(as.double(z %*% lambda))
240 |     # cat(beta, "\n")
241 |     # cat(abs(beta - beta_prev), "\n")
242 | 
243 |     cat(".")
244 | 
245 |     # check convergence
246 |     if (sqrt(mean((beta - beta_prev)^2)) < con[["eps"]]) {
247 |       cat("\n converged at iteration", iter, "\n")
248 |       break
249 |     } else if (iter == con[["max_iter"]]) {
250 |       stop("algorithm did not converge; try increasing `max_iter` or decreasing `eps`")
251 |       break
252 |     } else next
253 |   }
254 | 
255 |   gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = ""))
256 |   lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = ""))
257 | 
258 |   # inference
259 |   pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))),
260 |                 N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE)
261 |   Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_grm(alpha = alpha, beta = beta, rep(theta_k, N))))  # K-list
262 |   Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N))
263 |   Li <- rowSums(Lik * pik)
264 | 
265 |   # log likelihood
266 |   log_Lik <- sum(log(Li))
267 | 
268 |   # outer product of gradients
269 |   environment(sj_ab_grm) <- environment(si_gamma) <- environment(si_lambda) <- environment()
270 |   s_ab <- unname(Reduce(rbind, lapply(1:J, sj_ab_grm)))
271 |   s_lambda <- s_gamma <- NULL
272 |   s_gamma <- vapply(1:N, si_gamma, double(p))
273 |   s_lambda <- vapply(1:N, si_lambda, double(q))
274 | 
275 |   # covariance matrix and standard errors
276 |   s_all <- rbind(s_ab[-c(1L, nrow(s_ab)), , drop = FALSE], s_gamma, s_lambda)
277 |   s_all[is.na(s_all)] <- 0
278 |   covmat <- tryCatch(solve(tcrossprod(s_all)),
279 |                      error = function(e) {warning("The information matrix is singular; SE calculation failed.");
280 |                        matrix(NA, nrow(s_all), nrow(s_all))})
281 |   se_all <- sqrt(diag(covmat))
282 | 
283 |   # reorganize se_all
284 |   sH <- sum(H)
285 |   gamma_indices <- (sH - 1):(sH + p - 2)
286 |   lambda_indices <- (sH + p - 1):(sH + p + q - 2)
287 |   se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices])
288 | 
289 |   # name se_all and covmat
290 |   names_ab <- unlist(lapply(names(alpha), function(x) {
291 |     tmp <- alpha[[x]]
292 |     paste(x, c(names(tmp)[-c(1L, length(tmp))], "Dscrmn"))
293 |   }))
294 |   names(se_all) <- c(names_ab, names(gamma), names(lambda))
295 |   rownames(covmat) <- colnames(covmat) <- c(names_ab[-c(1L, length(names_ab))], names(gamma), names(lambda))
296 | 
297 |   # item coefficients
298 |   coef_item <- Map(function(a, b) c(a[-c(1L, length(a))], Dscrmn = b), alpha, beta)
299 | 
300 |   # all coefficients
301 |   coef_all <- c(unlist(coef_item), gamma, lambda)
302 |   coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all,
303 |                       p_value = 2 * (1 - pnorm(abs(coef_all/se_all))))
304 |   rownames(coefs) <- names(se_all)
305 | 
306 |   # item constraints
307 |   if (constr == "items"){
308 | 
309 |     gamma0_prev <- gamma[[1L]]
310 | 
311 |     # location constraint
312 |     alpha_sum <- sum(vapply(alpha, function(x) sum(x[-c(1L, length(x))]), double(1L)))
313 |     beta_sum <- sum((H-1) * beta)
314 |     c1 <- alpha_sum/beta_sum
315 |     gamma[[1L]] <- gamma[[1L]] + c1
316 |     alpha <- Map(function(x, y) x - c1 * y, alpha, beta)
317 | 
318 |     # scale constraint
319 |     c2 <- 2 * mean(log(abs(beta)))
320 |     gamma <- gamma * exp(c2/2)
321 |     lambda[[1L]] <- lambda[[1L]] + c2
322 |     beta <- beta / exp(c2/2)
323 | 
324 |     # fitted means and variances
325 |     fitted_mean <- as.double(x %*% gamma)
326 |     fitted_var <- exp(as.double(z %*% lambda))
327 | 
328 |     # theta_eap and theta_vap
329 |     theta_eap <- (theta_eap - gamma0_prev) * exp(c2/2) + gamma[[1L]]
330 |     theta_vap <- theta_vap * (exp(c2/2))^2
331 | 
332 |     # covmat for new parameterization
333 |     tmp_fun <- function(d) {
334 |       mat <- diag(d)
335 |       mat[d, d] <- exp(-c2/2)
336 |       mat[1:(d-1), d] <- rep(-c1, d-1)
337 |       mat
338 |     }
339 |     A <- Reduce(Matrix::bdiag, lapply(H, tmp_fun))
340 |     A2 <- A[seq(2, nrow(A)-1), seq(2, ncol(A)-1)]
341 |     B <- Matrix::bdiag(exp(c2/2) * diag(p), diag(q))
342 |     C <- Matrix::bdiag(A2, B)
343 |     covmat <- C %*% Matrix::tcrossprod(covmat, C)
344 | 
345 |     se_all <- sqrt(Matrix::diag(covmat))
346 | 
347 |     # reorganize se_all
348 |     sH <- sum(H)
349 |     gamma_indices <- (sH - 1):(sH + p - 2)
350 |     lambda_indices <- (sH + p - 1):(sH + p + q - 2)
351 |     se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices])
352 | 
353 |     # name se_all and covmat
354 |     names_ab <- unlist(lapply(names(alpha), function(x) {
355 |       tmp <- alpha[[x]]
356 |       paste(x, c(names(tmp)[-c(1L, length(tmp))], "Dscrmn"))
357 |     }))
358 |     names(se_all) <- c(names_ab, names(gamma), names(lambda))
359 |     rownames(covmat) <- colnames(covmat) <- c(names_ab[-c(1L, length(names_ab))], names(gamma), names(lambda))
360 | 
361 |     # item coefficients
362 |     coef_item <- Map(function(a, b) c(a[-c(1L, length(a))], Dscrmn = b), alpha, beta)
363 | 
364 |     # all coefficients
365 |     coef_all <- c(unlist(coef_item), gamma, lambda)
366 |     coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all,
367 |                         p_value = 2 * (1 - pnorm(abs(coef_all/se_all))))
368 |     rownames(coefs) <- names(se_all)
369 |   }
370 | 
371 |   # ability parameter estimates
372 |   theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap),
373 |                       prior_mean = fitted_mean, prior_sd = sqrt(fitted_var))
374 | 
375 |   # output
376 |   out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik, constr = constr,
377 |               N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, control = con, call = cl)
378 |   class(out) <- c("hgrm", "hIRT")
379 |   out
380 | }
381 | 
382 | 


--------------------------------------------------------------------------------
/R/hgrm2.R:
--------------------------------------------------------------------------------
  1 | #' Hierarchical Graded Response Models with Known Item Parameters
  2 | #'
  3 | #' \code{hgrm2} fits a hierarchical graded response model where the item parameters
  4 | #'   are known and supplied by the user.
  5 | #'
  6 | #' @param y A data frame or matrix of item responses.
  7 | #' @param x An optional model matrix, including the intercept term, that predicts the
  8 | #'   mean of the latent preference. If not supplied, only the intercept term is included.
  9 | #' @param z An optional model matrix, including the intercept term, that predicts the
 10 | #'   variance of the latent preference. If not supplied, only the intercept term is included.
 11 | #' @param item_coefs A list of known item parameters. The parameters of item \eqn{j} are given
 12 | #'   by the \eqn{j}th element, which should be a vector of length \eqn{H_j}, containing
 13 | #'   \eqn{H_j - 1} item difficulty parameters (in descending order) and one item discrimination
 14 | #'   parameter.
 15 | #' @param control A list of control values
 16 | #' \describe{
 17 | #'  \item{max_iter}{The maximum number of iterations of the EM algorithm.
 18 | #'    The default is 150.}
 19 | #'  \item{eps}{Tolerance parameter used to determine convergence of the
 20 | #'   EM algorithm. Specifically, iterations continue until the Euclidean
 21 | #'   distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps},
 22 | #'   where \eqn{\beta} is the vector of item discrimination parameters.
 23 | #'   \code{eps}=1e-4 by default.}
 24 | #'  \item{max_iter2}{The maximum number of iterations of the conditional
 25 | #'    maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}.
 26 | #'    The default is 15.}
 27 | #'  \item{eps2}{Tolerance parameter used to determine convergence of the
 28 | #'    conditional maximization procedures for updating \eqn{\gamma} and
 29 | #'    \eqn{\lambda}. Specifically, iterations continue until the Euclidean
 30 | #'   distance between two consecutive log likelihoods falls under \code{eps2}.
 31 | #'   \code{eps2}=1e-3 by default.}
 32 | #'  \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.}
 33 | #'  \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.}
 34 | #' }
 35 | #'
 36 | #' @return An object of class \code{hgrm}.
 37 | #'  \item{coefficients}{A data frame of parameter estimates, standard errors,
 38 | #'   z values and p values.}
 39 | #'  \item{scores}{A data frame of EAP estimates of latent preferences and
 40 | #'   their approximate standard errors.}
 41 | #'  \item{vcov}{Variance-covariance matrix of parameter estimates.}
 42 | #'  \item{log_Lik}{The log-likelihood value at convergence.}
 43 | #'  \item{N}{Number of units.}
 44 | #'  \item{J}{Number of items.}
 45 | #'  \item{H}{A vector denoting the number of response categories for each item.}
 46 | #'  \item{ylevels}{A list showing the levels of the factorized response categories.}
 47 | #'  \item{p}{The number of predictors for the mean equation.}
 48 | #'  \item{q}{The number of predictors for the variance equation.}
 49 | #'  \item{control}{List of control values.}
 50 | #'  \item{call}{The matched call.}
 51 | #' @importFrom rms lrm.fit
 52 | #' @importFrom pryr compose
 53 | #' @importFrom pryr partial
 54 | #' @import stats
 55 | #' @export
 56 | #' @examples
 57 | #'
 58 | #' y <- nes_econ2008[, -(1:3)]
 59 | #' x <- model.matrix( ~ party * educ, nes_econ2008)
 60 | #' z <- model.matrix( ~ party, nes_econ2008)
 61 | #'
 62 | #' n <- nrow(nes_econ2008)
 63 | #' id_train <- sample.int(n, n/4)
 64 | #' id_test <- setdiff(1:n, id_train)
 65 | #'
 66 | #' y_train <- y[id_train, ]
 67 | #' x_train <- x[id_train, ]
 68 | #' z_train <- z[id_train, ]
 69 | #'
 70 | #' mod_train <- hgrm(y_train, x_train, z_train)
 71 | #'
 72 | #' y_test <- y[id_test, ]
 73 | #' x_test <- x[id_test, ]
 74 | #' z_test <- z[id_test, ]
 75 | #'
 76 | #' item_coefs <- lapply(coef_item(mod_train), `[[`, "Estimate")
 77 | #'
 78 | #' model_test <- hgrm2(y_test, x_test, z_test, item_coefs = item_coefs)
 79 | 
 80 | hgrm2 <- function(y, x = NULL, z = NULL, item_coefs, control = list()) {
 81 | 
 82 |   # match call
 83 |   cl <- match.call()
 84 | 
 85 |   # check y and convert y into data.frame if needed
 86 |   if(missing(y)) stop("`y` must be provided.")
 87 |   if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L)
 88 |     stop("'y' must be either a data.frame or a matrix with at least two columns.")
 89 |   if(is.matrix(y)) y <- as.data.frame(y)
 90 | 
 91 |   # number of units and items
 92 |   N <- nrow(y)
 93 |   J <- ncol(y)
 94 | 
 95 |   # convert each y_j into an integer vector
 96 |   y[] <- lapply(y, factor, exclude = c(NA, NaN))
 97 |   ylevels <- lapply(y, levels)
 98 |   y[] <- lapply(y, as.integer)
 99 |   if (!is.na(invalid <- match(TRUE, vapply(y, invalid_grm, logical(1L)))))
100 |     stop(paste(names(y)[invalid], "does not have at least two valid responses"))
101 |   H <- vapply(y, max, integer(1L), na.rm = TRUE)
102 | 
103 |   # extract item parameters
104 |   if(missing(item_coefs))
105 |     stop("`item_coefs` must be supplied.")
106 |   if(!is.list(item_coefs) || length(item_coefs) != J)
107 |     stop("`item_coefs` must be a list of `ncol(y)` elements")
108 |   item_coefs_H <- vapply(item_coefs, length, integer(1L))
109 |   if(!all.equal(item_coefs_H, H))
110 |     stop("`item_coefs` do not match the number of response categories in `y`")
111 |   alpha <- lapply(item_coefs, function(x) c(Inf, x[-length(x)], -Inf))
112 |   beta <- vapply(item_coefs, function(x) x[[length(x)]], double(1L))
113 | 
114 |   # check x and z (x and z should contain an intercept column)
115 |   x <- x %||% as.matrix(rep(1, N))
116 |   z <- z %||% as.matrix(rep(1, N))
117 |   if (!is.matrix(x)) stop("`x` must be a matrix.")
118 |   if (!is.matrix(z)) stop("`z` must be a matrix.")
119 |   if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'")
120 |   p <- ncol(x)
121 |   q <- ncol(z)
122 |   colnames(x) <- colnames(x) %||% paste0("x", 1:p)
123 |   colnames(z) <- colnames(z) %||% paste0("x", 1:q)
124 | 
125 |   # control parameters
126 |   con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4)
127 |   con[names(control)] <- control
128 | 
129 |   # set environments for utility functions
130 |   environment(loglik_grm) <- environment(theta_post_grm) <- environment(dummy_fun_grm) <- environment(tab2df_grm) <- environment()
131 | 
132 |   # GL points
133 |   K <- con[["K"]]
134 |   theta_ls <- con[["C"]] * GLpoints[[K]][["x"]]
135 |   qw_ls <- con[["C"]] * GLpoints[[K]][["w"]]
136 | 
137 |   # imputation
138 |   y_imp <- y
139 |   if(anyNA(y)) y_imp[] <- lapply(y, impute)
140 | 
141 |   # pca for initial values of theta_eap
142 |   theta_eap <- {
143 |     tmp <- princomp(y_imp, cor = TRUE)$scores[, 1]
144 |     (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE)
145 |   }
146 | 
147 |   # initial values of gamma and lambda
148 |   lm_opr <- tcrossprod(solve(crossprod(x)), x)
149 |   gamma <- lm_opr %*% theta_eap
150 |   lambda <- rep(0, q)
151 |   fitted_mean <- as.double(x %*% gamma)
152 |   fitted_var <- rep(1, N)
153 | 
154 |   # EM algorithm
155 |   for (iter in seq(1, con[["max_iter"]])) {
156 | 
157 |     # store previous parameters
158 |     # alpha_prev <- alpha
159 |     # beta_prev <- beta
160 |     gamma_prev <- gamma
161 |     lambda_prev <- lambda
162 | 
163 |     # construct w_ik
164 |     posterior <- Map(theta_post_grm, theta_ls, qw_ls)
165 |     w <- {
166 |       tmp <- matrix(unlist(posterior), N, K)
167 |       t(sweep(tmp, 1, rowSums(tmp), FUN = "/"))
168 |     }
169 | 
170 |     # # maximization
171 |     # pseudo_tab <- Map(dummy_fun_grm, y, H)
172 |     # pseudo_y <- lapply(pseudo_tab, tab2df_grm, theta_ls = theta_ls)
173 |     # pseudo_lrm <- lapply(pseudo_y, function(df) lrm.fit(df[["x"]], df[["y"]], weights = df[["wt"]])[["coefficients"]])
174 |     # beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L))
175 |     # alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf))
176 | 
177 |     # EAP and VAP estimates of latent preferences
178 |     theta_eap <- t(theta_ls %*% w)
179 |     theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2
180 | 
181 |     # variance regression
182 |     gamma <- lm_opr %*% theta_eap
183 |     r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
184 |     if (ncol(z)==1) lambda <- log(mean(r2)) else{
185 |       s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]]
186 |       loglik <- -0.5 * (log(s2) + r2/s2)
187 |       LL0 <- sum(loglik)
188 |       dLL <- 1
189 |       for (m in seq(1, con[["max_iter2"]])) {
190 |         gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]]
191 |         r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
192 |         var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))
193 |         s2 <- var_reg[["fitted.values"]]
194 |         loglik <- -0.5 * (log(s2) + r2/s2)
195 |         LL_temp <- sum(loglik)
196 |         dLL <- LL_temp - LL0
197 |         if (dLL < con[["eps2"]])
198 |           break
199 |         LL0 <- LL_temp
200 |       }
201 |       lambda <- var_reg[["coefficients"]]
202 |     }
203 | 
204 |     fitted_mean <- as.double(x %*% gamma)
205 |     fitted_var <- exp(as.double(z %*% lambda))
206 |     cat(".")
207 | 
208 |     if (sqrt(mean((gamma/gamma_prev - 1)^2)) < con[["eps"]]) {
209 |       cat("\n converged at iteration", iter, "\n")
210 |       break
211 |     } else if (iter == con[["max_iter"]]) {
212 |       stop("algorithm did not converge; try increasing `max_iter` or decreasing `eps`")
213 |       break
214 |     } else next
215 |   }
216 | 
217 |   gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = ""))
218 |   lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = ""))
219 | 
220 |   # inference
221 |   pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))),
222 |                 N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE)
223 |   Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_grm(alpha = alpha, beta = beta, rep(theta_k, N))))  # K-list
224 |   Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N))
225 |   Li <- rowSums(Lik * pik)
226 | 
227 |   # log likelihood
228 |   log_Lik <- sum(log(Li))
229 | 
230 |   # outer product of gradients
231 |   environment(sj_ab_grm) <- environment(si_gamma) <- environment(si_lambda) <- environment()
232 |   # s_ab <- unname(Reduce(rbind, lapply(1:J, sj_ab_grm)))
233 |   s_gamma <- vapply(1:N, si_gamma, double(p))
234 |   s_lambda <- vapply(1:N, si_lambda, double(q))
235 | 
236 |   # covariance matrix
237 |   s_all <- rbind(s_gamma, s_lambda)
238 |   s_all[is.na(s_all)] <- 0
239 |   covmat <- tryCatch(solve(tcrossprod(s_all)),
240 |                      error = function(e) {warning("The information matrix is singular; SE calculation failed.");
241 |                        matrix(NA, nrow(s_all), nrow(s_all))})
242 | 
243 |   # reorganize se_all
244 |   sH <- sum(H)
245 |   gamma_indices <- (sH - 1):(sH + p - 2)
246 |   lambda_indices <- (sH + p - 1):(sH + p + q - 2)
247 |   se_all <- c(rep(0, sH), sqrt(diag(covmat)))
248 | 
249 |   # name se_all and covmat
250 |   names_ab <- unlist(lapply(names(alpha), function(x) {
251 |     tmp <- alpha[[x]]
252 |     paste(x, c(paste0("y>=", seq(2, length(tmp)-1)), "Dscrmn"))
253 |   }))
254 |   names(se_all) <- c(names_ab, names(gamma), names(lambda))
255 |   rownames(covmat) <- colnames(covmat) <- c(names(gamma), names(lambda))
256 | 
257 |   # item coefficients
258 |   coef_item <- Map(function(a, b) c(a[-c(1L, length(a))], Dscrmn = b), alpha, beta)
259 | 
260 |   # all coefficients
261 |   coef_all <- c(unlist(coef_item), gamma, lambda)
262 |   coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all,
263 |                       p_value = 2 * (1 - pnorm(abs(coef_all/se_all))))
264 |   rownames(coefs) <- names(se_all)
265 | 
266 |   # ability parameter estimates
267 |   theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap),
268 |                       prior_mean = fitted_mean, prior_sd = sqrt(fitted_var))
269 | 
270 |   # output
271 |   out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik,
272 |               N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, control = con, call = cl)
273 |   class(out) <- c("hgrm", "hIRT")
274 |   out
275 | }
276 | 


--------------------------------------------------------------------------------
/R/hgrmDIF.R:
--------------------------------------------------------------------------------
  1 | #' Hierarchical Graded Response Models with Differential Item Functioning
  2 | #'
  3 | #' \code{hgrmDIF} fits a hierarchical graded response model similar to hgrm(), but person-specific
  4 | #' covariates \code{x} are allowed to affect item responses directly (not via the latent preference).
  5 | #' This model can be used to test for the presence of differential item functioning.
  6 | #'
  7 | #' @param y A data frame or matrix of item responses.
  8 | #' @param x An optional model matrix, including the intercept term, that predicts the
  9 | #'   mean of the latent preference. If not supplied, only the intercept term is included.
 10 | #' @param z An optional model matrix, including the intercept term, that predicts the
 11 | #'   variance of the latent preference. If not supplied, only the intercept term is included.
 12 | #' @param x0 A matrix specifying the covariates by which differential item functioning operates. If not supplied,
 13 | #'   \code{x0} is taken to be a matrix containing all predictors in \code{x} except the intercept.
 14 | #' @param items_dif The indices of the items for which differential item functioning is tested.
 15 | #' @param form_dif Form of differential item functioning being tested. Either "uniform" or "non-uniform."
 16 | #' @param constr The type of constraints used to identify the model: "latent_scale",
 17 | #'   or "items". The default, "latent_scale" constrains the mean of latent preferences
 18 | #'   to zero and the geometric mean of prior variance to one; "items" places constraints
 19 | #'   on item parameters instead and sets the mean of item difficulty parameters to zero
 20 | #'   and the geometric mean of the discrimination parameters to one. Currently, only "latent_scale"
 21 | #'   is supported in hgrmDIF().
 22 | #' @param beta_set The index of the item for which the discrimination parameter is
 23 | #'   restricted to be positive (or negative). It may take any integer value from
 24 | #'   1 to \code{ncol(y)}.
 25 | #' @param sign_set Logical. Should the discrimination parameter of
 26 | #'   the corresponding item (indexed by \code{beta_set}) be positive
 27 | #'   (if \code{TRUE}) or negative (if \code{FALSE})?
 28 | #' @param init A character string indicating how item parameters are initialized. It can be
 29 | #'   "naive", "glm", or "irt".
 30 | #' @param control A list of control values
 31 | #' \describe{
 32 | #'  \item{max_iter}{The maximum number of iterations of the EM algorithm.
 33 | #'    The default is 150.}
 34 | #'  \item{eps}{Tolerance parameter used to determine convergence of the
 35 | #'   EM algorithm. Specifically, iterations continue until the Euclidean
 36 | #'   distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps},
 37 | #'   where \eqn{\beta} is the vector of item discrimination parameters.
 38 | #'   \code{eps}=1e-4 by default.}
 39 | #'  \item{max_iter2}{The maximum number of iterations of the conditional
 40 | #'    maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}.
 41 | #'    The default is 15.}
 42 | #'  \item{eps2}{Tolerance parameter used to determine convergence of the
 43 | #'    conditional maximization procedures for updating \eqn{\gamma} and
 44 | #'    \eqn{\lambda}. Specifically, iterations continue until the Euclidean
 45 | #'   distance between two consecutive log likelihoods falls under \code{eps2}.
 46 | #'   \code{eps2}=1e-3 by default.}
 47 | #'  \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.}
 48 | #'  \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.}
 49 | #' }
 50 | #'
 51 | #' @return An object of class \code{hgrm}.
 52 | #'  \item{coefficients}{A data frame of parameter estimates, standard errors,
 53 | #'   z values and p values.}
 54 | #'  \item{scores}{A data frame of EAP estimates of latent preferences and
 55 | #'   their approximate standard errors.}
 56 | #'  \item{vcov}{Variance-covariance matrix of parameter estimates.}
 57 | #'  \item{log_Lik}{The log-likelihood value at convergence.}
 58 | #'  \item{N}{Number of units.}
 59 | #'  \item{J}{Number of items.}
 60 | #'  \item{H}{A vector denoting the number of response categories for each item.}
 61 | #'  \item{ylevels}{A list showing the levels of the factorized response categories.}
 62 | #'  \item{p}{The number of predictors for the mean equation.}
 63 | #'  \item{q}{The number of predictors for the variance equation.}
 64 | #'  \item{p0}{The number of predictors for items with DIF.}
 65 | #'  \item{coef_item}{Item coefficient estimates.}
 66 | #'  \item{control}{List of control values.}
 67 | #'  \item{call}{The matched call.}
 68 | #' @importFrom rms lrm.fit
 69 | #' @importFrom pryr compose
 70 | #' @importFrom pryr partial
 71 | #' @importFrom ltm grm
 72 | #' @importFrom ltm ltm
 73 | #' @import stats
 74 | #' @export
 75 | #' @examples
 76 | #' y <- nes_econ2008[, -(1:3)]
 77 | #' x <- model.matrix( ~ party * educ, nes_econ2008)
 78 | #' nes_m2 <- hgrmDIF(y, x, items_dif = 1:2)
 79 | #' coef_item(nes_m2)
 80 | 
 81 | hgrmDIF <- function(y, x = NULL, z = NULL, x0 = x[, -1, drop = FALSE],
 82 |                     items_dif = 1L, form_dif = c("uniform", "non-uniform"),
 83 |                     constr = c("latent_scale"), beta_set = 1L, sign_set = TRUE,
 84 |                     init = c("naive", "glm", "irt"), control = list()) {
 85 | 
 86 |   # match call
 87 |   cl <- match.call()
 88 | 
 89 |   # check y and convert y into data.frame if needed
 90 |   if(missing(y)) stop("`y` must be provided.")
 91 |   if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L)
 92 |     stop("'y' must be either a data.frame or a matrix with at least two columns.")
 93 |   if(is.matrix(y)) y <- as.data.frame(y)
 94 | 
 95 |   # number of units and items
 96 |   N <- nrow(y)
 97 |   J <- ncol(y)
 98 | 
 99 |   # convert each y_j into an integer vector
100 |   y[] <- lapply(y, factor, exclude = c(NA, NaN))
101 |   ylevels <- lapply(y, levels)
102 |   y[] <- lapply(y, as.integer)
103 |   if (!is.na(invalid <- match(TRUE, vapply(y, invalid_grm, logical(1L)))))
104 |     stop(paste(names(y)[invalid], "does not have at least two valid responses"))
105 |   H <- vapply(y, max, integer(1L), na.rm = TRUE)
106 | 
107 |   # check x0
108 |   if(!is.matrix(x0)) x0 <- as.matrix(x0)
109 | 
110 |   # check x and z (x and z should contain an intercept column)
111 |   x <- x %||% as.matrix(rep(1, N))
112 |   z <- z %||% as.matrix(rep(1, N))
113 |   if (!is.matrix(x)) stop("`x` must be a matrix.")
114 |   if (!is.matrix(z)) stop("`z` must be a matrix.")
115 |   if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'")
116 |   p <- ncol(x)
117 |   q <- ncol(z)
118 |   p0 <- ncol(x0)
119 |   colnames(x) <- colnames(x) %||% paste0("x", 1:p)
120 |   colnames(z) <- colnames(z) %||% paste0("z", 1:q)
121 |   colnames(x0) <- colnames(x0) %||% paste0("x0", 1:p0)
122 | 
123 |   # check item, beta_set and sign_set
124 |   stopifnot(beta_set %in% 1:J, is.logical(sign_set))
125 | 
126 |   # check constraint
127 |   constr <- match.arg(constr)
128 |   init <- match.arg(init)
129 |   form_dif <- match.arg(form_dif)
130 | 
131 |   # control parameters
132 |   con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4)
133 |   con[names(control)] <- control
134 | 
135 |   # set environments for utility functions
136 |   environment(x0DIF) <- environment(loglik_grmDIF) <- environment(theta_post_grmDIF) <- environment(dummy_fun_grm) <- environment(tab2df_grm) <- environment()
137 | 
138 |   # GL points
139 |   K <- con[["K"]]
140 |   theta_ls <- con[["C"]] * GLpoints[[K]][["x"]]
141 |   qw_ls <- con[["C"]] * GLpoints[[K]][["w"]]
142 | 
143 |   # imputation
144 |   y_imp <- y
145 |   if(anyNA(y)) y_imp[] <- lapply(y, impute)
146 | 
147 |   # pca
148 |   theta_eap <- {
149 |     tmp <- princomp(y_imp, cor = TRUE)$scores[, 1]
150 |     (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE)
151 |   }
152 | 
153 |   # initialization of alpha and beta parameters
154 |   if (init == "naive"){
155 | 
156 |     alpha <- lapply(H, function(x) c(Inf, seq(1, -1, length.out = x - 1), -Inf))
157 |     beta <- vapply(y, function(y) cov(y, theta_eap, use = "complete.obs")/var(theta_eap), double(1L))
158 | 
159 |   } else if (init == "glm"){
160 | 
161 |     pseudo_lrm <- lapply(y_imp, function(y) lrm.fit(theta_eap, y)[["coefficients"]])
162 |     beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L))
163 |     alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf))
164 | 
165 |   } else {
166 | 
167 |     grm_coefs <- grm(y)[["coefficients"]]
168 |     beta <- vapply(grm_coefs, function(x) x[[length(x)]], double(1L))
169 |     alpha <- lapply(grm_coefs, function(x) c(Inf, rev(x[-length(x)]), -Inf))
170 | 
171 |   }
172 | 
173 |   # initialization of x0DIFnames, x0_lrm, and eta
174 |   tmp_theta <- rep(theta_ls, N)
175 |   tmp_x0 <- apply(x0, 2, rep, each = K)
176 |   if(form_dif == "uniform") {
177 |     x0DIFnames <- colnames(x0)
178 |     x0_lrm <- cbind(tmp_theta, tmp_x0)
179 |     eta <- lapply(1:J, function(j) if(j %in% items_dif) rep(0, p0) else double(0L))
180 |   } else{
181 |     x0DIFnames <- c(colnames(x0), paste0("Dscrmn * ", colnames(x0)))
182 |     x0_lrm <- cbind(tmp_theta, tmp_x0, tmp_theta * tmp_x0)
183 |     eta <- lapply(1:J, function(j) if(j %in% items_dif) rep(0, 2 * p0) else double(0L))
184 |   }
185 |   colnames(x0_lrm) <- c("theta", x0DIFnames)
186 |   names(eta) <- names(alpha) <- names(H)
187 | 
188 |   # initial values of gamma and lambda
189 |   lm_opr <- tcrossprod(solve(crossprod(x)), x)
190 |   gamma <- lm_opr %*% theta_eap
191 |   lambda <- rep(0, q)
192 |   fitted_mean <- as.double(x %*% gamma)
193 |   fitted_var <- rep(1, N)
194 |   pseudo_lrm <- vector(mode = "list", length = J)
195 | 
196 |   # EM algorithm
197 |   for (iter in seq(1, con[["max_iter"]])) {
198 | 
199 |     # store previous parameters
200 |     alpha_prev <- alpha
201 |     beta_prev <- beta
202 |     gamma_prev <- gamma
203 |     lambda_prev <- lambda
204 |     eta_prev <- eta
205 | 
206 |     # construct w_ik
207 |     # list of length K, each element an N-vector
208 |     posterior <- Map(theta_post_grmDIF, theta_ls, qw_ls)
209 |     # K-by-N matrix
210 |     w <- {
211 |       tmp <- matrix(unlist(posterior), N, K)
212 |       t(sweep(tmp, 1, rowSums(tmp), FUN = "/"))
213 |     }
214 | 
215 |     # maximization with DIF
216 |     w_lrm <- as.vector(w)
217 |     for (j in seq(1, J)){
218 |       y_lrm <- rep(y[[j]], each = K)
219 |       pseudo_lrm[[j]] <- if(j %in% items_dif) lrm_fit(x0_lrm, y_lrm, weights = w_lrm)[["coefficients"]] else
220 |         lrm_fit(cbind(theta = tmp_theta), y_lrm, weights = w_lrm)[["coefficients"]]
221 |     }
222 |     beta <- vapply(pseudo_lrm, function(xx) xx[["theta"]], double(1L))
223 |     eta <- lapply(1:J, function(j) if(j %in% items_dif) pseudo_lrm[[j]][-(1:H[[j]])] else double(0L))
224 |     alpha <- Map(function(xx, H_j) c(Inf, xx[1:(H_j-1)], -Inf), pseudo_lrm, H)
225 |     names(alpha) <- names(H)
226 | 
227 |     # # maximization in hgrm()
228 |     # # list of length J, each element a K-by-H_j matrix containing
229 |     # # the pseudo number of people with theta^k choosing h
230 |     # pseudo_tab <- Map(dummy_fun_grm, y, H)
231 |     # # list of length J, each element a data frame with K*H_j units and 3 columns: y, x(theta), and wt
232 |     # pseudo_y <- lapply(pseudo_tab, tab2df_grm, theta_ls = theta_ls)
233 |     # # weighted lrm on each element of pseudo_y
234 |     # pseudo_lrm <- lapply(pseudo_y, function(df) lrm_fit(df[["x"]], df[["y"]], weights = df[["wt"]])[["coefficients"]])
235 |     # beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L))
236 |     # alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf))
237 | 
238 |     # EAP and VAP estimates of latent preferences
239 |     theta_eap <- t(theta_ls %*% w)
240 |     theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2
241 | 
242 |     # variance regression
243 |     gamma <- lm_opr %*% theta_eap
244 |     r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
245 |     if (ncol(z)==1) lambda <- log(mean(r2)) else{
246 |       s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]]
247 |       loglik <- -0.5 * (log(s2) + r2/s2)
248 |       LL0 <- sum(loglik)
249 |       dLL <- 1
250 |       for (m in seq(1, con[["max_iter2"]])) {
251 |         gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]]
252 |         r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
253 |         var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))
254 |         s2 <- var_reg[["fitted.values"]]
255 |         loglik <- -0.5 * (log(s2) + r2/s2)
256 |         LL_temp <- sum(loglik)
257 |         dLL <- LL_temp - LL0
258 |         if (dLL < con[["eps2"]])
259 |           break
260 |         LL0 <- LL_temp
261 |       }
262 |       lambda <- var_reg[["coefficients"]]
263 |     }
264 | 
265 |     # location constraint
266 |     tmp <- mean(x %*% gamma)
267 |     alpha <- Map(function(x, y) x + tmp * y, alpha, beta)
268 |     gamma[[1L]] <- gamma[[1L]] - tmp
269 | 
270 |     # scale constraint
271 |     tmp <- mean(z %*% lambda)
272 |     gamma <- gamma/exp(tmp/2)
273 |     beta <- beta * exp(tmp/2)
274 |     lambda[[1L]] <- lambda[[1L]] - tmp
275 | 
276 |     # direction contraint
277 |     if (sign_set == (beta[[beta_set]] < 0)) {
278 |       gamma <- -gamma
279 |       beta <- -beta
280 |     }
281 | 
282 |     fitted_mean <- as.double(x %*% gamma)
283 |     fitted_var <- exp(as.double(z %*% lambda))
284 |     # cat(beta, "\n")
285 |     # cat(abs(beta - beta_prev), "\n")
286 | 
287 |     cat(".")
288 | 
289 |     # check convergence
290 |     if (sqrt(mean((beta - beta_prev)^2)) < con[["eps"]]) {
291 |       cat("\n converged at iteration", iter, "\n")
292 |       break
293 |     } else if (iter == con[["max_iter"]]) {
294 |       stop("algorithm did not converge; try increasing `max_iter` or decreasing `eps`")
295 |       break
296 |     } else next
297 |   }
298 | 
299 |   gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = ""))
300 |   lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = ""))
301 | 
302 |   # inference
303 |   # pik equals p_ik * w_k in Zhou2019supp
304 |   pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))),
305 |                 N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE)
306 |   Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_grmDIF(alpha = alpha, beta = beta, eta = eta, rep(theta_k, N))))  # K-list
307 |   Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N))
308 |   Li <- rowSums(Lik * pik)
309 | 
310 |   # log likelihood
311 |   log_Lik <- sum(log(Li))
312 | 
313 |   # outer product of gradients
314 |   environment(sj_ab_grmDIF) <- environment(sj_ab_grm) <- environment(si_gamma) <- environment(si_lambda) <- environment()
315 |   s_abe <- unname(Reduce(rbind, lapply(1:J, sj_ab_grmDIF)))
316 |   s_lambda <- s_gamma <- NULL
317 |   s_gamma <- vapply(1:N, si_gamma, double(p))
318 |   s_lambda <- vapply(1:N, si_lambda, double(q))
319 | 
320 |   # covariance matrix and standard errors
321 |   s_all <- rbind(s_abe[-c(1L, nrow(s_abe)), , drop = FALSE], s_gamma, s_lambda)
322 |   s_all[is.na(s_all)] <- 0
323 |   covmat <- tryCatch(solve(tcrossprod(s_all)),
324 |                      error = function(e) {warning("The information matrix is singular; SE calculation failed.");
325 |                        matrix(NA, nrow(s_all), nrow(s_all))})
326 |   se_all <- sqrt(diag(covmat))
327 | 
328 |   # reorganize se_all
329 |   sH <- sum(H) + p0 * length(items_dif) + p0 * as.double(form_dif == "non-uniform") * length(items_dif)
330 |   gamma_indices <- (sH - 1):(sH + p - 2)
331 |   lambda_indices <- (sH + p - 1):(sH + p + q - 2)
332 |   se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices])
333 | 
334 |   # name se_all and covmat
335 |   names(alpha) <- names(H)
336 |   names_abe <- unlist(lapply(1:J, function(j) {
337 |     itemname <- names(alpha)[[j]]
338 |     tmp <- alpha[[j]]
339 |     if(j %in% items_dif) paste(itemname, c(names(tmp)[-c(1L, length(tmp))], x0DIFnames, "Dscrmn")) else{
340 |       paste(itemname, c(names(tmp)[-c(1L, length(tmp))], "Dscrmn"))
341 |     }
342 |   }))
343 |   names(se_all) <- c(names_abe, names(gamma), names(lambda))
344 |   rownames(covmat) <- colnames(covmat) <- c(names_abe[-c(1L, length(names_abe))], names(gamma), names(lambda))
345 | 
346 |   # item coefficients
347 |   coef_item <- Map(function(a, b, c) c(a[-c(1L, length(a))], c, Dscrmn = b), alpha, beta, eta)
348 | 
349 |   # all coefficients
350 |   coef_all <- c(unlist(coef_item), gamma, lambda)
351 |   coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all,
352 |                       p_value = 2 * (1 - pnorm(abs(coef_all/se_all))))
353 | 
354 |   # ability parameter estimates
355 |   theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap),
356 |                       prior_mean = fitted_mean, prior_sd = sqrt(fitted_var))
357 | 
358 |   # output
359 |   out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik,
360 |               items_dif = items_dif,  form_dif = form_dif, constr = constr,
361 |               N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, p0 = p0, coef_item = coef_item,
362 |               control = con, call = cl)
363 |   class(out) <- c("hgrmDIF")
364 |   out
365 | }
366 | 
367 | 


--------------------------------------------------------------------------------
/R/hltm.R:
--------------------------------------------------------------------------------
  1 | #' Fitting Hierarchical Latent Trait Models (for Binary Responses).
  2 | #'
  3 | #' \code{hltm} fits a hierarchical latent trait model in which both
  4 | #' the mean and the variance of the latent preference (ability parameter)
  5 | #' may depend on person-specific covariates (\code{x} and \code{z}).
  6 | #' Specifically, the mean is specified as a linear combination of \code{x}
  7 | #' and the log of the variance is specified as a linear combination of
  8 | #' \code{z}.
  9 | #'
 10 | #' @inheritParams hgrm
 11 | #'
 12 | #' @return An object of class \code{hltm}.
 13 | #'  \item{coefficients}{A data frame of parameter estimates, standard errors,
 14 | #'   z values and p values.}
 15 | #'  \item{scores}{A data frame of EAP estimates of latent preferences and
 16 | #'   their approximate standard errors.}
 17 | #'  \item{vcov}{Variance-covariance matrix of parameter estimates.}
 18 | #'  \item{log_Lik}{The log-likelihood value at convergence.}
 19 | #'  \item{N}{Number of units.}
 20 | #'  \item{J}{Number of items.}
 21 | #'  \item{H}{A vector denoting the number of response categories for each item.}
 22 | #'  \item{ylevels}{A list showing the levels of the factorized response categories.}
 23 | #'  \item{p}{The number of predictors for the mean equation.}
 24 | #'  \item{q}{The number of predictors for the variance equation.}
 25 | #'  \item{control}{List of control values.}
 26 | #'  \item{call}{The matched call.}
 27 | #' @references Zhou, Xiang. 2019. "\href{https://doi.org/10.1017/pan.2018.63}{Hierarchical Item Response Models for Analyzing Public Opinion.}" Political Analysis.
 28 | #' @importFrom rms lrm.fit
 29 | #' @importFrom pryr compose
 30 | #' @importFrom pryr partial
 31 | #' @import stats
 32 | #' @export
 33 | #' @examples
 34 | #' y <- nes_econ2008[, -(1:3)]
 35 | #' x <- model.matrix( ~ party * educ, nes_econ2008)
 36 | #' z <- model.matrix( ~ party, nes_econ2008)
 37 | #'
 38 | #' dichotomize <- function(x) findInterval(x, c(mean(x, na.rm = TRUE)))
 39 | #' y[] <- lapply(y, dichotomize)
 40 | #' nes_m1 <- hltm(y, x, z)
 41 | #' nes_m1
 42 | 
 43 | hltm <- function(y, x = NULL, z = NULL, constr = c("latent_scale", "items"),
 44 |                  beta_set = 1L, sign_set = TRUE, init = c("naive", "glm", "irt"),
 45 |                  control = list()) {
 46 | 
 47 |   # match call
 48 |   cl <- match.call()
 49 | 
 50 |   # check y and convert y into data.frame if needed
 51 |   if(missing(y)) stop("`y` must be provided.")
 52 |   if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L)
 53 |     stop("'y' must be either a data.frame or a matrix with at least two columns.")
 54 |   if(is.matrix(y)) y <- as.data.frame(y)
 55 | 
 56 |   # number of units and items
 57 |   N <- nrow(y)
 58 |   J <- ncol(y)
 59 | 
 60 |   # convert each y_j into an integer vector
 61 |   y[] <- lapply(y, factor, exclude = c(NA, NaN))
 62 |   ylevels <- lapply(y, levels)
 63 |   y[] <- lapply(y, function(x) as.integer(x) - 1)
 64 |   if (!is.na(invalid <- match(TRUE, vapply(y, invalid_ltm, logical(1L)))))
 65 |     stop(paste(names(y)[invalid], "is not a dichotomous variable"))
 66 |   H <- vapply(y, max, double(1L), na.rm = TRUE) + 1
 67 | 
 68 |   # check x and z (x and z should contain an intercept column)
 69 |   x <- x %||% as.matrix(rep(1, N))
 70 |   z <- z %||% as.matrix(rep(1, N))
 71 |   if (!is.matrix(x)) stop("`x` must be a matrix.")
 72 |   if (!is.matrix(z)) stop("`z` must be a matrix.")
 73 |   if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'")
 74 |   p <- ncol(x)
 75 |   q <- ncol(z)
 76 |   colnames(x) <- colnames(x) %||% paste0("x", 1:p)
 77 |   colnames(z) <- colnames(z) %||% paste0("x", 1:q)
 78 | 
 79 |   # check beta_set and sign_set
 80 |   stopifnot(beta_set %in% 1:J, is.logical(sign_set))
 81 | 
 82 |   # check constraint
 83 |   constr <- match.arg(constr)
 84 |   init <- match.arg(init)
 85 | 
 86 |   # control parameters
 87 |   con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4)
 88 |   con[names(control)] <- control
 89 | 
 90 |   # set environments for utility functions
 91 |   environment(loglik_ltm) <- environment(theta_post_ltm) <- environment(dummy_fun_ltm) <- environment(tab2df_ltm) <- environment()
 92 | 
 93 |   # GL points
 94 |   K <- con[["K"]]
 95 |   theta_ls <- con[["C"]] * GLpoints[[K]][["x"]]
 96 |   qw_ls <- con[["C"]] * GLpoints[[K]][["w"]]
 97 | 
 98 |   # imputation
 99 |   y_imp <- y
100 |   if(anyNA(y)) y_imp[] <- lapply(y, impute)
101 | 
102 |   # pca for initial values of theta_eap
103 |   theta_eap <- {
104 |     tmp <- princomp(y_imp, cor = TRUE)$scores[, 1]
105 |     (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE)
106 |   }
107 | 
108 |   # initialization of alpha and beta parameters
109 |   if (init == "naive"){
110 |     alpha <- rep(0, J)
111 |     beta <- vapply(y, function(y) cov(y, theta_eap, use = "complete.obs")/var(theta_eap), double(1L))
112 |   } else if (init == "glm"){
113 |     pseudo_logit <- lapply(y_imp, function(y) glm.fit(cbind(1, theta_eap), y, family = binomial("logit"))[["coefficients"]])
114 |     beta <- vapply(pseudo_logit, function(x) x[2L], double(1L))
115 |     alpha <- vapply(pseudo_logit, function(x) x[1L], double(1L))
116 |   } else {
117 |     ltm_coefs <- ltm(y ~ z1)[["coefficients"]]
118 |     beta <- ltm_coefs[, 2, drop = TRUE]
119 |     alpha <- ltm_coefs[, 1, drop = TRUE]
120 |   }
121 | 
122 |   # initial values of gamma and lambda
123 |   lm_opr <- tcrossprod(solve(crossprod(x)), x)
124 |   gamma <- lm_opr %*% theta_eap
125 |   lambda <- rep(0, q)
126 |   fitted_mean <- as.double(x %*% gamma)
127 |   fitted_var <- rep(1, N)
128 | 
129 |   # EM algorithm
130 |   for (iter in seq(1, con[["max_iter"]])) {
131 | 
132 |       # store previous parameters
133 |       alpha_prev <- alpha
134 |       beta_prev <- beta
135 |       gamma_prev <- gamma
136 |       lambda_prev <- lambda
137 | 
138 |       # construct w_ik
139 |       posterior <- Map(theta_post_ltm, theta_ls, qw_ls)
140 |       w <- {
141 |           tmp <- matrix(unlist(posterior), N, K)
142 |           t(sweep(tmp, 1, rowSums(tmp), FUN = "/"))
143 |       }
144 | 
145 |       # maximization
146 |       pseudo_tab <- lapply(y, dummy_fun_ltm)
147 |       pseudo_y <- lapply(pseudo_tab, tab2df_ltm, theta_ls = theta_ls)
148 |       pseudo_logit <- lapply(pseudo_y, function(df) glm.fit(cbind(1, df[["x"]]),
149 |           df[["y"]], weights = df[["wt"]], family = quasibinomial("logit"))[["coefficients"]])
150 |       beta <- vapply(pseudo_logit, function(x) x[2L], double(1L))
151 |       alpha <- vapply(pseudo_logit, function(x) x[1L], double(1L))
152 | 
153 |       # EAP and VAP estimates of latent preferences
154 |       theta_eap <- t(theta_ls %*% w)
155 |       theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2
156 | 
157 |       # variance regression
158 |       gamma <- lm_opr %*% theta_eap
159 |       r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
160 | 
161 |       if (ncol(z)==1) lambda <- log(mean(r2)) else{
162 |         s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]]
163 |         loglik <- -0.5 * (log(s2) + r2/s2)
164 |         LL0 <- sum(loglik)
165 |         dLL <- 1
166 |         for (m in seq(1, con[["max_iter2"]])) {
167 |             gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]]
168 |             r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
169 |             var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))
170 |             s2 <- var_reg[["fitted.values"]]
171 |             loglik <- -0.5 * (log(s2) + r2/s2)
172 |             LL_temp <- sum(loglik)
173 |             dLL <- LL_temp - LL0
174 |             if (dLL < con[["eps2"]])
175 |                 break
176 |             LL0 <- LL_temp
177 |         }
178 |         lambda <- var_reg[["coefficients"]]
179 |       }
180 | 
181 |       # location constraint
182 |       tmp <- mean(x %*% gamma)
183 |       alpha <- unlist(Map(function(x, y) x + tmp * y, alpha, beta))
184 |       gamma[1L] <- gamma[1L] - tmp
185 | 
186 |       # scale constraint
187 |       tmp <- mean(z %*% lambda)
188 |       gamma <- gamma/exp(tmp/2)
189 |       beta <- beta * exp(tmp/2)
190 |       lambda[1L] <- lambda[1L] - tmp
191 | 
192 |       # direction contraint
193 |       if (sign_set == (beta[beta_set] < 0)) {
194 |         gamma <- -gamma
195 |         beta <- -beta
196 |       }
197 |       fitted_mean <- as.double(x %*% gamma)
198 |       fitted_var <- exp(as.double(z %*% lambda))
199 | 
200 |       cat(".")
201 | 
202 |       # check convergence
203 |       if (sqrt(mean((beta - beta_prev)^2)) < con[["eps"]]) {
204 |           cat("\n converged at iteration", iter, "\n")
205 |           break
206 |       } else if (iter == con[["max_iter"]]) {
207 |           stop("algorithm did not converge; try increasing max_iter.")
208 |           break
209 |       } else next
210 |   }
211 | 
212 |   gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = ""))
213 |   lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = ""))
214 | 
215 |   # inference
216 |   pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))),
217 |                 N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE)
218 |   Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_ltm(alpha = alpha, beta = beta, rep(theta_k, N))))  # K-list
219 |   Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N))
220 |   Li <- rowSums(Lik * pik)
221 | 
222 |   # log likelihood
223 |   log_Lik <- sum(log(Li))
224 | 
225 |   # outer product of gradients
226 |   environment(dalpha_ltm) <- environment(sj_ab_ltm) <- environment(si_gamma) <- environment(si_lambda) <- environment()
227 |   dalpha <- dalpha_ltm(alpha, beta)  # K*J matrix
228 |   s_ab <- unname(Reduce(cbind, lapply(1:J, sj_ab_ltm)))
229 |   s_gamma <- vapply(1:N, si_gamma, double(p))
230 |   s_lambda <- vapply(1:N, si_lambda, double(q))
231 | 
232 |   s_all <- rbind(t(s_ab)[-c(1L, ncol(s_ab)), , drop = FALSE], s_gamma, s_lambda)
233 |   s_all[is.na(s_all)] <- 0
234 |   covmat <- tryCatch(solve(tcrossprod(s_all)),
235 |                      error = function(e) {warning("The information matrix is singular; SE calculation failed.");
236 |                        matrix(NA, nrow(s_all), nrow(s_all))})
237 |   se_all <- sqrt(diag(covmat))
238 | 
239 |   # reorganize se_all
240 |   sH <- 2 * J
241 |   gamma_indices <- (sH - 1):(sH + p - 2)
242 |   lambda_indices <- (sH + p - 1):(sH + p + q - 2)
243 |   se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices])
244 | 
245 |   # name se_all and covmat
246 |   names_ab <- paste(rep(names(alpha), each = 2), c("Diff", "Dscrmn"))
247 |   names(se_all) <- c(names_ab, names(gamma), names(lambda))
248 |   rownames(covmat) <- colnames(covmat) <- c(names_ab[-c(1L, length(names_ab))], names(gamma), names(lambda))
249 | 
250 |   # item coefficients
251 |   coefs_item <- Map(function(a, b) c(Diff = a, Dscrmn = b), alpha, beta)
252 | 
253 |   # all coefficients
254 |   coef_all <- c(unlist(coefs_item), gamma, lambda)
255 |   coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all,
256 |                       p_value = 2 * (1 - pnorm(abs(coef_all/se_all))))
257 |   rownames(coefs) <- names(se_all)
258 | 
259 |   # item constraints
260 |   if (constr == "items"){
261 | 
262 |     gamma0_prev <- gamma[1L]
263 | 
264 |     # location constraint
265 |     alpha_sum <- sum(alpha)
266 |     beta_sum <- sum(beta)
267 |     c1 <- alpha_sum/beta_sum
268 |     gamma[1L] <- gamma[1L] + c1  # adjust gamma0
269 |     alpha <- unlist(Map(function(x, y) x - c1 * y, alpha, beta))
270 | 
271 |     # scale constraint
272 |     c2 <- 2 * mean(log(abs(beta)))
273 |     gamma <- gamma * exp(c2/2)
274 |     lambda[1L] <- lambda[1L] + c2
275 |     beta <- beta / exp(c2/2)
276 | 
277 |     # fitted means and variances
278 |     fitted_mean <- as.double(x %*% gamma)
279 |     fitted_var <- exp(as.double(z %*% lambda))
280 | 
281 |     # theta_eap and theta_vap
282 |     theta_eap <- (theta_eap - gamma0_prev) * exp(c2/2) + gamma[1L]
283 |     theta_vap <- theta_vap * (exp(c2/2))^2
284 | 
285 |     # covmat for new parameterization
286 |     tmp_fun <- function(d) {
287 |       mat <- diag(d)
288 |       mat[d, d] <- exp(-c2/2)
289 |       mat[1:(d-1), d] <- rep(-c1, d-1)
290 |       mat
291 |     }
292 |     A <- Reduce(Matrix::bdiag, lapply(H, tmp_fun))
293 |     A2 <- A[seq(2, nrow(A)-1), seq(2, ncol(A)-1)]
294 |     B <- Matrix::bdiag(exp(c2/2) * diag(p), diag(q))
295 |     C <- Matrix::bdiag(A2, B)
296 |     covmat <- C %*% Matrix::tcrossprod(covmat, C)
297 | 
298 |     se_all <- sqrt(Matrix::diag(covmat))
299 | 
300 |     # reorganize se_all
301 |     sH <- 2 * J
302 |     lambda_indices <- gamma_indices <- NULL
303 |     gamma_indices <- (sH - 1):(sH + p - 2)
304 |     lambda_indices <- (sH + p - 1):(sH + p + q - 2)
305 |     se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices])
306 | 
307 |     # name se_all and covmat
308 |     names_ab <- paste(rep(names(alpha), each = 2), c("Diff", "Dscrmn"))
309 |     names(se_all) <- c(names_ab, names(gamma), names(lambda))
310 |     rownames(covmat) <- colnames(covmat) <- c(names_ab[-c(1L, length(names_ab))], names(gamma), names(lambda))
311 | 
312 |     # item coefficients
313 |     coefs_item <- Map(function(a, b) c(Diff = a, Dscrmn = b), alpha, beta)
314 | 
315 |     # all coefficients
316 |     coef_all <- c(unlist(coefs_item), gamma, lambda)
317 |     coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all,
318 |                         p_value = 2 * (1 - pnorm(abs(coef_all/se_all))))
319 |     rownames(coefs) <- names(se_all)
320 |   }
321 | 
322 |   # ability parameter estimates
323 |   theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap),
324 |                       prior_mean = fitted_mean, prior_sd = sqrt(fitted_var))
325 | 
326 |   # output
327 |   out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik, constr = constr,
328 |               N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, control = con, call = cl)
329 |   class(out) <- c("hltm", "hIRT")
330 |   out
331 | }
332 | 
333 | 


--------------------------------------------------------------------------------
/R/hltm2.R:
--------------------------------------------------------------------------------
  1 | #' Hierarchical Latent Trait Models with Known Item Parameters.
  2 | #'
  3 | #' \code{hltm2} fits a hierarchical latent trait model where the item parameters
  4 | #'   are known and supplied by the user.
  5 | #'
  6 | #' @inheritParams hgrm2
  7 | #' @param item_coefs A list of known item parameters. The parameters of item \eqn{j} are given
  8 | #'   by the \eqn{j}th element, which should be a vector of length 2, containing
  9 | #'   the item difficulty parameter and item discrimination parameter.
 10 | #'
 11 | #' @return An object of class \code{hltm}.
 12 | #'  \item{coefficients}{A data frame of parameter estimates, standard errors,
 13 | #'   z values and p values.}
 14 | #'  \item{scores}{A data frame of EAP estimates of latent preferences and
 15 | #'   their approximate standard errors.}
 16 | #'  \item{vcov}{Variance-covariance matrix of parameter estimates.}
 17 | #'  \item{log_Lik}{The log-likelihood value at convergence.}
 18 | #'  \item{N}{Number of units.}
 19 | #'  \item{J}{Number of items.}
 20 | #'  \item{H}{A vector denoting the number of response categories for each item.}
 21 | #'  \item{ylevels}{A list showing the levels of the factorized response categories.}
 22 | #'  \item{p}{The number of predictors for the mean equation.}
 23 | #'  \item{q}{The number of predictors for the variance equation.}
 24 | #'  \item{control}{List of control values.}
 25 | #'  \item{call}{The matched call.}
 26 | #' @importFrom rms lrm.fit
 27 | #' @importFrom pryr compose
 28 | #' @importFrom pryr partial
 29 | #' @import stats
 30 | #' @export
 31 | #' @examples
 32 | #' y <- nes_econ2008[, -(1:3)]
 33 | #' x <- model.matrix( ~ party * educ, nes_econ2008)
 34 | #' z <- model.matrix( ~ party, nes_econ2008)
 35 | #' dichotomize <- function(x) findInterval(x, c(mean(x, na.rm = TRUE)))
 36 | #' y_bin <- y
 37 | #' y_bin[] <- lapply(y, dichotomize)
 38 | #'
 39 | #' n <- nrow(nes_econ2008)
 40 | #' id_train <- sample.int(n, n/4)
 41 | #' id_test <- setdiff(1:n, id_train)
 42 | #'
 43 | #' y_bin_train <- y_bin[id_train, ]
 44 | #' x_train <- x[id_train, ]
 45 | #' z_train <- z[id_train, ]
 46 | #'
 47 | #' mod_train <- hltm(y_bin_train, x_train, z_train)
 48 | #'
 49 | #' y_bin_test <- y_bin[id_test, ]
 50 | #' x_test <- x[id_test, ]
 51 | #' z_test <- z[id_test, ]
 52 | #'
 53 | #' item_coefs <- lapply(coef_item(mod_train), `[[`, "Estimate")
 54 | #'
 55 | #' model_test <- hltm2(y_bin_test, x_test, z_test, item_coefs = item_coefs)
 56 | 
 57 | hltm2 <- function(y, x = NULL, z = NULL, item_coefs, control = list()) {
 58 | 
 59 |   # match call
 60 |   cl <- match.call()
 61 | 
 62 |   # check y and convert y into data.frame if needed
 63 |   if(missing(y)) stop("`y` must be provided.")
 64 |   if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L)
 65 |     stop("'y' must be either a data.frame or a matrix with at least two columns.")
 66 |   if(is.matrix(y)) y <- as.data.frame(y)
 67 | 
 68 |   # number of units and items
 69 |   N <- nrow(y)
 70 |   J <- ncol(y)
 71 | 
 72 |   # convert each y_j into an integer vector
 73 |   y[] <- lapply(y, factor, exclude = c(NA, NaN))
 74 |   ylevels <- lapply(y, levels)
 75 |   y[] <- lapply(y, function(x) as.integer(x) - 1)
 76 |   if (!is.na(invalid <- match(TRUE, vapply(y, invalid_ltm, logical(1L)))))
 77 |     stop(paste(names(y)[invalid], "is not a dichotomous variable"))
 78 |   H <- vapply(y, max, double(1L), na.rm = TRUE) + 1
 79 | 
 80 |   # extract item parameters
 81 |   if(missing(item_coefs))
 82 |     stop("`item_coefs` must be supplied.")
 83 |   if(!is.list(item_coefs) || length(item_coefs) != J)
 84 |     stop("`item_coefs` must be a list of `ncol(y)` elements")
 85 |   item_coefs_H <- vapply(item_coefs, length, integer(1L))
 86 |   if(!all.equal(item_coefs_H, H))
 87 |     stop("`item_coefs` do not match the number of response categories in `y`")
 88 |   alpha <- vapply(item_coefs, function(x) x[[1L]], double(1L))
 89 |   beta <- vapply(item_coefs, function(x) x[[2L]], double(1L))
 90 | 
 91 |   # check x and z (x and z should contain an intercept column)
 92 |   x <- x %||% as.matrix(rep(1, N))
 93 |   z <- z %||% as.matrix(rep(1, N))
 94 |   if (!is.matrix(x)) stop("`x` must be a matrix.")
 95 |   if (!is.matrix(z)) stop("`z` must be a matrix.")
 96 |   if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'")
 97 |   p <- ncol(x)
 98 |   q <- ncol(z)
 99 |   colnames(x) <- colnames(x) %||% paste0("x", 1:p)
100 |   colnames(z) <- colnames(z) %||% paste0("x", 1:q)
101 | 
102 |   # control parameters
103 |   con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4)
104 |   con[names(control)] <- control
105 | 
106 |   # set environments for utility functions
107 |   environment(loglik_ltm) <- environment(theta_post_ltm) <- environment(dummy_fun_ltm) <- environment(tab2df_ltm) <- environment()
108 | 
109 |   # GL points
110 |   K <- con[["K"]]
111 |   theta_ls <- con[["C"]] * GLpoints[[K]][["x"]]
112 |   qw_ls <- con[["C"]] * GLpoints[[K]][["w"]]
113 | 
114 |   # imputation
115 |   y_imp <- y
116 |   if(anyNA(y)) y_imp[] <- lapply(y, impute)
117 | 
118 |   # pca for initial values of theta_eap
119 |   theta_eap <- {
120 |     tmp <- princomp(y_imp, cor = TRUE)$scores[, 1]
121 |     (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE)
122 |   }
123 | 
124 |   # initial values of gamma and lambda
125 |   lm_opr <- tcrossprod(solve(crossprod(x)), x)
126 |   gamma <- lm_opr %*% theta_eap
127 |   lambda <- rep(0, q)
128 |   fitted_mean <- as.double(x %*% gamma)
129 |   fitted_var <- rep(1, N)
130 | 
131 |   # EM algorithm
132 |   for (iter in seq(1, con[["max_iter"]])) {
133 | 
134 |     # store previous parameters
135 |     # alpha_prev <- alpha
136 |     # beta_prev <- beta
137 |     gamma_prev <- gamma
138 |     lambda_prev <- lambda
139 | 
140 |     # construct w_ik
141 |     posterior <- Map(theta_post_ltm, theta_ls, qw_ls)
142 |     w <- {
143 |       tmp <- matrix(unlist(posterior), N, K)
144 |       t(sweep(tmp, 1, rowSums(tmp), FUN = "/"))
145 |     }
146 | 
147 |     # # maximization
148 |     # pseudo_tab <- lapply(y, dummy_fun_ltm)
149 |     # pseudo_y <- lapply(pseudo_tab, tab2df_ltm, theta_ls = theta_ls)
150 |     # pseudo_logit <- lapply(pseudo_y, function(df) glm.fit(cbind(1, df[["x"]]),
151 |     #                                                       df[["y"]], weights = df[["wt"]], family = quasibinomial("logit"))[["coefficients"]])
152 |     # beta <- vapply(pseudo_logit, function(x) x[2L], double(1L))
153 |     # alpha <- vapply(pseudo_logit, function(x) x[1L], double(1L))
154 | 
155 |     # EAP and VAP estimates of latent preferences
156 |     theta_eap <- t(theta_ls %*% w)
157 |     theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2
158 | 
159 |     # variance regression
160 |     gamma <- lm_opr %*% theta_eap
161 |     r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
162 | 
163 |     if (ncol(z)==1) lambda <- log(mean(r2)) else{
164 |       s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]]
165 |       loglik <- -0.5 * (log(s2) + r2/s2)
166 |       LL0 <- sum(loglik)
167 |       dLL <- 1
168 |       for (m in seq(1, con[["max_iter2"]])) {
169 |         gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]]
170 |         r2 <- (theta_eap - x %*% gamma)^2 + theta_vap
171 |         var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))
172 |         s2 <- var_reg[["fitted.values"]]
173 |         loglik <- -0.5 * (log(s2) + r2/s2)
174 |         LL_temp <- sum(loglik)
175 |         dLL <- LL_temp - LL0
176 |         if (dLL < con[["eps2"]])
177 |           break
178 |         LL0 <- LL_temp
179 |       }
180 |       lambda <- var_reg[["coefficients"]]
181 |     }
182 | 
183 |     fitted_mean <- as.double(x %*% gamma)
184 |     fitted_var <- exp(as.double(z %*% lambda))
185 |     cat(".")
186 | 
187 |     # check convergence
188 |     if (sqrt(mean((gamma/gamma_prev - 1)^2)) < con[["eps"]]) {
189 |       cat("\n converged at iteration", iter, "\n")
190 |       break
191 |     } else if (iter == con[["max_iter"]]) {
192 |       stop("algorithm did not converge; try increasing max_iter.")
193 |       break
194 |     } else next
195 |   }
196 | 
197 |   gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = ""))
198 |   lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = ""))
199 | 
200 |   # inference
201 |   pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))),
202 |                 N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE)
203 |   Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_ltm(alpha = alpha, beta = beta, rep(theta_k, N))))  # K-list
204 |   Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N))
205 |   Li <- rowSums(Lik * pik)
206 | 
207 |   # log likelihood
208 |   log_Lik <- sum(log(Li))
209 | 
210 |   # outer product of gradients
211 |   environment(dalpha_ltm) <- environment(sj_ab_ltm) <- environment(si_gamma) <- environment(si_lambda) <- environment()
212 |   dalpha <- dalpha_ltm(alpha, beta)  # K*J matrix
213 |   # s_ab <- unname(Reduce(cbind, lapply(1:J, sj_ab_ltm)))
214 |   s_gamma <- vapply(1:N, si_gamma, double(p))
215 |   s_lambda <- vapply(1:N, si_lambda, double(q))
216 | 
217 |   s_all <- rbind(s_gamma, s_lambda)
218 |   s_all[is.na(s_all)] <- 0
219 |   covmat <- tryCatch(solve(tcrossprod(s_all)),
220 |                      error = function(e) {warning("The information matrix is singular; SE calculation failed.");
221 |                        matrix(NA, nrow(s_all), nrow(s_all))})
222 |   se_all <- sqrt(diag(covmat))
223 | 
224 |   # reorganize se_all
225 |   sH <- 2 * J
226 |   gamma_indices <- (sH - 1):(sH + p - 2)
227 |   lambda_indices <- (sH + p - 1):(sH + p + q - 2)
228 |   se_all <- c(rep(0, sH), sqrt(diag(covmat)))
229 | 
230 |   # name se_all and covmat
231 |   names_ab <- paste(rep(names(alpha), each = 2), c("Diff", "Dscrmn"))
232 |   names(se_all) <- c(names_ab, names(gamma), names(lambda))
233 |   rownames(covmat) <- colnames(covmat) <- c(names(gamma), names(lambda))
234 | 
235 |   # item coefficients
236 |   coefs_item <- Map(function(a, b) c(Diff = a, Dscrmn = b), alpha, beta)
237 | 
238 |   # all coefficients
239 |   coef_all <- c(unlist(coefs_item), gamma, lambda)
240 |   coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all,
241 |                       p_value = 2 * (1 - pnorm(abs(coef_all/se_all))))
242 |   rownames(coefs) <- names(se_all)
243 | 
244 |   # ability parameter estimates
245 |   theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap),
246 |                       prior_mean = fitted_mean, prior_sd = sqrt(fitted_var))
247 | 
248 |   # output
249 |   out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik,
250 |               N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, control = con, call = cl)
251 |   class(out) <- c("hltm", "hIRT")
252 |   out
253 | }
254 | 


--------------------------------------------------------------------------------
/R/latent_scores.R:
--------------------------------------------------------------------------------
 1 | #' Estimates of Latent Preferences/Abilities
 2 | #'
 3 | #' EAP estimates of latent preferences for either \code{hltm} or \code{hgrm} models.
 4 | #'
 5 | #' @inheritParams print.hIRT
 6 | #'
 7 | #' @return A data frame of EAP estimates of latent preferences and their approximate standard errors.
 8 | #' @export
 9 | #' @examples
10 | #' y <- nes_econ2008[, -(1:3)]
11 | #' x <- model.matrix( ~ party * educ, nes_econ2008)
12 | #' z <- model.matrix( ~ party, nes_econ2008)
13 | #' nes_m1 <- hgrm(y, x, z)
14 | #' pref <- latent_scores(nes_m1)
15 | #' require(ggplot2)
16 | #' ggplot(data = nes_econ2008) +
17 | #' geom_density(aes(x = pref$post_mean, col = party))
18 | latent_scores <- function(x, digits = 3) {
19 |     if (!inherits(x, "hIRT"))
20 |       stop("Use only with 'hIRT' objects.\n")
21 |     round(x[["scores"]], digits)
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/R/print.R:
--------------------------------------------------------------------------------
 1 | #' Printing an object of class \code{hIRT}
 2 | #' @param x An object of class \code{hIRT}
 3 | #' @param digits The number of significant digits to use when printing
 4 | #' @param ... further arguments passed to \code{\link{print}}.
 5 | #' @export
 6 | print.hIRT <- function(x, digits = 3, ...) {
 7 |   cat("\nCall:\n", paste(deparse(x[["call"]]), sep = "\n", collapse = "\n"),
 8 |       "\n\n", sep = "")
 9 |   cat("Mean Regression:\n")
10 |   print(coef_mean(x, digits), ...)
11 |   cat("\n")
12 |   cat("Variance Regression:\n")
13 |   print(coef_var(x, digits), ...)
14 |   cat("\nLog Likelihood:", round(x[["log_Lik"]], digits))
15 |   cat("\n\n")
16 |   invisible(x)
17 | }
18 | 


--------------------------------------------------------------------------------
/R/summary.R:
--------------------------------------------------------------------------------
 1 | #' Summarizing Hierarchical Item Response Theory Models
 2 | #'
 3 | #' Summarizing the fit of either \code{hltm} or \code{hgrm}.
 4 | #'
 5 | #' @param object An object of class \code{hIRT}.
 6 | #' @param by_item Logical. Should item parameters be stored item by item
 7 | #'   (if \code{TRUE}) or put together in a data frame (if \code{FALSE})?
 8 | #' @param digits the number of significant digits to use when printing.
 9 | #'
10 | #' @return An object of class \code{summary_hIRT}.
11 | #'  \item{call}{The matched call.}
12 | #'  \item{model}{Model fit statistics: Log likelihood, AIC, and BIC.}
13 | #'  \item{item_coefs}{Item parameter estimates, standard errors,
14 | #'   z values, and p values.}
15 | #'  \item{mean_coefs}{Parameter estimates for the mean equation.}
16 | #'  \item{var_coefs}{Parameter estimates for the variance equation.}
17 | #'
18 | #' @export
19 | #' @examples
20 | #' y <- nes_econ2008[, -(1:3)]
21 | #' x <- model.matrix( ~ party * educ, nes_econ2008)
22 | #' z <- model.matrix( ~ party, nes_econ2008)
23 | #' nes_m1 <- hgrm(y, x, z)
24 | #' summary(nes_m1, by_item = TRUE)
25 | 
26 | summary.hIRT <- function(object, by_item = FALSE, digits = 3, ...) {
27 |   item_coefs <- coef_item(object, by_item = by_item, digits)
28 |   mean_coefs <- coef_mean(object, digits)
29 |   var_coefs <- coef_var(object, digits)
30 |   log_Lik = object[["log_Lik"]]
31 |   df <- sum(object[["H"]]) + sum(object[["p"]]) + sum(object[["q"]]) - 2
32 |   N <- length(object[["scores"]])
33 |   AIC <- -2 * log_Lik + 2 * df
34 |   BIC <- -2 * log_Lik + df * log(N)
35 |   model <- list(log_Lik = log_Lik, AIC = AIC, BIC = BIC)
36 |   out <- list(call = object[["call"]], model = model, item_coefs = item_coefs,
37 |               mean_coefs = mean_coefs, var_coefs = var_coefs)
38 |   class(out) <- c("summary_hIRT")
39 |   out
40 | }
41 | 
42 | #' @inheritParams print.hIRT
43 | #' @export
44 | #' @rdname summary.hIRT
45 | print.summary_hIRT <- function(x, digits = 3, ...) {
46 |   cat("\nCall:\n", paste(deparse(x[["call"]]), sep = "\n", collapse = "\n"),
47 |       "\n\n", sep = "")
48 |   cat("Model Summary:\n")
49 |   model_sum <- as.data.frame(x[["model"]], row.names = "")
50 |   print(model_sum)
51 |   cat("\n Item Coefficients: \n")
52 |   print(x[["item_coefs"]], ...)
53 |   cat("\n Mean Regression Coefficients:\n")
54 |   print(x[["mean_coefs"]], ...)
55 |   cat("\n Variance Regression Coefficients:\n")
56 |   print(x[["var_coefs"]], ...)
57 |   cat("\n\n")
58 |   invisible(x)
59 | }
60 | 


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiangzhou09/hIRT/b33d90f13287f60f7feffd0c061858340dff40a6/R/sysdata.rda


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | utils::globalVariables(c("H", "J", "K", "Li", "Lijk", "Lik", "N",
 2 |                   "alpha", "dalpha", "lambda", "eta", "p", "p0", "q", "pik",
 3 |                   "theta_ls", "w", "x", "x0", "y", "z", "fitted_mean", "fitted_var",
 4 |                   "form_dif", "items_dif"))
 5 | 
 6 | # median impute
 7 | impute <- function(vec){
 8 |   vec[is.na(vec)] <- median(vec, na.rm = TRUE)
 9 |   vec
10 | }
11 | 
12 | # logical or infix function
13 | `%||%` <- function(a, b) if (!is.null(a)) a else b
14 | 
15 | # calculate gamma gradient for case i
16 | si_gamma <- function(i) {
17 |     sum(pik[i, ] * Lik[i, ] * (theta_ls - fitted_mean[[i]]))/
18 |     fitted_var[[i]]/Li[[i]] * x[i, 1:p]
19 | }
20 | 
21 | # calculate lambda gradient for case i
22 | si_lambda <- function(i) {
23 |     sum(0.5 * pik[i, ] * Lik[i, ] * ((theta_ls - fitted_mean[[i]])^2/fitted_var[[i]] - 1))/Li[[i]] * z[i, 1:q]
24 | }
25 | 


--------------------------------------------------------------------------------
/R/utils_grm.R:
--------------------------------------------------------------------------------
  1 | # check if a vector has at least two valid responses
  2 | invalid_grm <- function(x) max(x, na.rm = TRUE) < 2
  3 | 
  4 | lrm_fit <- function(x, y, weights, tol = 1e-16, ...){
  5 |   valid <- weights>tol & !is.na(y)
  6 |   lrm.fit(x[valid, , drop = FALSE], y[valid], weights = weights[valid], ...)
  7 | }
  8 | 
  9 | # log likelihood function (return N * J matrix) y: N*J data frame alpha:
 10 | # length J list beta: length J numeric vector theta: length N numeric
 11 | # vector
 12 | loglik_grm <- function(alpha, beta, theta) {
 13 |     util <- outer(theta, beta)
 14 |     alpha_l <- simplify2array(unname(Map(function(x, y) x[y], alpha, y)))
 15 |     alpha_h <- simplify2array(unname(Map(function(x, y) x[y + 1L], alpha, y)))
 16 |     log(plogis(util + alpha_l) - plogis(util + alpha_h))
 17 | }
 18 | 
 19 | # posterior of theta (unnormalized) (returns N-vector)
 20 | # y: N*J data frame
 21 | # x: N*p model matrix
 22 | # z: N*q model matrix
 23 | # alpha: length J list
 24 | # beta: length J numeric vector
 25 | # gamma: p-vector
 26 | # lambda: q-vector
 27 | # theta_k: numeric scalar
 28 | # qw_k numeric scalar
 29 | theta_post_grm <- function(theta_k, qw_k) {
 30 |     wt_k <- dnorm(theta_k - fitted_mean, sd = sqrt(fitted_var)) * qw_k  # prior density * quadrature weight
 31 |     loglik <- rowSums(loglik_grm(alpha, beta, rep(theta_k, N)), na.rm = TRUE)
 32 |     logPop <- log(wt_k)
 33 |     exp(loglik + logPop)
 34 | }
 35 | 
 36 | theta_prior_grm <- function(theta_k, qw_k) {
 37 |   wt_k <- dnorm(theta_k - fitted_mean, sd = sqrt(fitted_var)) * qw_k  # prior density * quadrature weight
 38 |   # loglik <- rowSums(loglik_grm(alpha, beta, rep(theta_k, N)), na.rm = TRUE)
 39 |   logPop <- log(wt_k)
 40 |   exp(logPop)
 41 | }
 42 | 
 43 | # pseudo tabulated data for item J (returns K*H_j matrix)
 44 | # y_j: N-vector
 45 | # H_j: number of response categories for item j
 46 | # w: K*N matrix
 47 | dummy_fun_grm <- function(y_j, H_j) {
 48 |     dummy_mat <- outer(y_j, 1:H_j, "==")  # N*H_j matrix
 49 |     dummy_mat[is.na(dummy_mat)] <- 0
 50 |     w %*% dummy_mat
 51 | }
 52 | 
 53 | # pseudo tabulated data to pseudo data frame
 54 | # tab: K*H_j matrix
 55 | # theta_ls: K-vector
 56 | tab2df_grm <- function(tab, theta_ls) {
 57 |     H_j <- ncol(tab)
 58 |     theta <- rep(theta_ls, H_j)
 59 |     y <- rep(1:H_j, each = K)
 60 |     data.frame(y = factor(y), x = theta, wt = as.double(tab))
 61 | }
 62 | 
 63 | # score function of alpha and beta (returns an H_j*N matrix) Lik: N*K
 64 | # matrix pik: N*K matrix alpha: J-list beta: J-vector theta_ls: K-vector
 65 | sj_ab_grm <- function(j) {
 66 |     temp2 <- array(0, c(N, K, H[[j]] + 1))
 67 |     h <- .subset2(y, j)
 68 |     drv_h <- vapply(theta_ls, function(theta_k) exp(alpha[[j]][h] + beta[[j]] *
 69 |         theta_k)/(1 + exp(alpha[[j]][h] + beta[[j]] * theta_k))^2, double(N))
 70 |     drv_h_plus_one <- -vapply(theta_ls, function(theta_k) exp(alpha[[j]][h +
 71 |         1L] + beta[[j]] * theta_k)/(1 + exp(alpha[[j]][h + 1L] + beta[[j]] *
 72 |         theta_k))^2, double(N))
 73 |     drv_h[h == 1, ] <- 0
 74 |     drv_h_plus_one[h == H[[j]], ] <- 0
 75 |     for (i in seq_len(N)) {
 76 |         if (is.na(h[[i]])) next
 77 |         temp2[i, , h[[i]]] <- drv_h[i, ]
 78 |         temp2[i, , h[[i]] + 1L] <- drv_h_plus_one[i, ]
 79 |     }
 80 |     comp_a <- pik * Lik/vapply(Lijk, `[`, 1:N, j, FUN.VALUE = double(N))  # N*K matrix
 81 |     comp_a[is.na(comp_a)] <- 0
 82 |     s_alpha <- vapply(1:N, function(i) comp_a[i, ] %*% temp2[i, , 2:H[[j]]],
 83 |         double(H[[j]] - 1L))  # (H[j]-1)*N matrix
 84 |     temp2_beta <- drv_h + drv_h_plus_one
 85 |     s_beta <- rowSums(comp_a * matrix(theta_ls, N, K, byrow = TRUE) * temp2_beta)  # N-vector
 86 |     s <- sweep(rbind(s_alpha, s_beta), 2, rowSums(Lik * pik), FUN = "/")
 87 | }
 88 | 
 89 | 
 90 | x0DIF <- function(theta_k){
 91 |   if(form_dif == "uniform") x0 else cbind(x0, theta_k * x0)
 92 | }
 93 | 
 94 | # log likelihood function (return N * J matrix) y for DIF, returns N*J data frame
 95 | # alpha: length J list
 96 | # beta: length J numeric vector
 97 | # theta: length N numeric vector
 98 | loglik_grmDIF <- function(alpha, beta, theta, eta) {
 99 |   util <- outer(theta, beta)
100 |   tmp_x <- x0DIF(theta)
101 |   util2 <- vapply(1:length(eta), function(j) if(j %in% items_dif) tmp_x %*% eta[[j]] else double(N), double(N))
102 |   alpha_l <- simplify2array(unname(Map(function(x, y) x[y], alpha, y)))
103 |   alpha_h <- simplify2array(unname(Map(function(x, y) x[y + 1L], alpha, y)))
104 |   log(plogis(util + util2 + alpha_l) - plogis(util + util2 + alpha_h))
105 | }
106 | 
107 | # posterior of theta (unnormalized) (returns N-vector)
108 | # y: N*J data frame
109 | # x: N*p model matrix
110 | # z: N*q model matrix
111 | # alpha: length J list
112 | # beta: length J numeric vector
113 | # gamma: p-vector
114 | # lambda: q-vector
115 | # theta_k: numeric scalar
116 | # qw_k numeric scalar
117 | theta_post_grmDIF <- function(theta_k, qw_k) {
118 |   wt_k <- dnorm(theta_k - fitted_mean, sd = sqrt(fitted_var)) * qw_k  # prior density * quadrature weight
119 |   loglik <- rowSums(loglik_grmDIF(alpha, beta, rep(theta_k, N), eta), na.rm = TRUE)
120 |   logPop <- log(wt_k)
121 |   exp(loglik + logPop)
122 | }
123 | 
124 | # score function of alpha and beta and eta (returns an ?*N matrix)
125 | # Lik: N*K matrix pik: N*K matrix alpha: J-list beta: J-vector theta_ls: K-vector
126 | sj_ab_grmDIF <- function(j) {
127 | 
128 |   temp2 <- array(0, c(N, K, H[[j]] + 1))
129 |   h <- .subset2(y, j)
130 | 
131 |   if(j %in% items_dif){
132 | 
133 |     drv_h <- vapply(theta_ls, function(theta_k) exp(alpha[[j]][h] + beta[[j]] * theta_k +
134 |                                                       x0DIF(theta_k) %*% eta[[j]])/
135 |                         (1 + exp(alpha[[j]][h] + beta[[j]] * theta_k + x0DIF(theta_k) %*% eta[[j]]))^2,
136 |                     double(N))
137 | 
138 |     drv_h_plus_one <- -vapply(theta_ls, function(theta_k) exp(alpha[[j]][h + 1L] + beta[[j]] * theta_k +
139 |                                                                 x0DIF(theta_k) %*% eta[[j]])/
140 |                                 (1 + exp(alpha[[j]][h + 1L] + beta[[j]] * theta_k + x0DIF(theta_k) %*% eta[[j]]))^2,
141 |                               double(N))
142 |     } else{
143 | 
144 |       drv_h <- vapply(theta_ls, function(theta_k) exp(alpha[[j]][h] + beta[[j]] * theta_k)/
145 |                         (1 + exp(alpha[[j]][h] + beta[[j]] * theta_k))^2,
146 |                       double(N))
147 | 
148 |       drv_h_plus_one <- -vapply(theta_ls, function(theta_k) exp(alpha[[j]][h + 1L] + beta[[j]] * theta_k)/
149 |                                   (1 + exp(alpha[[j]][h + 1L] + beta[[j]] * theta_k))^2,
150 |                                 double(N))
151 |     }
152 |   drv_h[h == 1, ] <- 0
153 |   drv_h_plus_one[h == H[[j]], ] <- 0
154 | 
155 |   for (i in seq_len(N)) {
156 |     if (is.na(h[[i]])) next
157 |     temp2[i, , h[[i]]] <- drv_h[i, ]
158 |     temp2[i, , h[[i]] + 1L] <- drv_h_plus_one[i, ]
159 |   }
160 | 
161 |   comp_a <- pik * Lik/vapply(Lijk, `[`, 1:N, j, FUN.VALUE = double(N))  # N*K matrix
162 |   comp_a[is.na(comp_a)] <- 0
163 | 
164 |   s_alpha <- vapply(1:N, function(i) comp_a[i, ] %*% temp2[i, , 2:H[[j]]],
165 |                     double(H[[j]] - 1L))  # (H[j]-1)*N matrix
166 | 
167 |   temp2_beta <- drv_h + drv_h_plus_one
168 |   s_beta <- rowSums(comp_a * matrix(theta_ls, N, K, byrow = TRUE) * temp2_beta)  # N-vector
169 | 
170 |   if(j %in% items_dif){
171 |     s_eta <- vapply(1:p0, function(i) rowSums(comp_a * matrix(x0[, i, drop = FALSE], N, K, byrow = FALSE) * temp2_beta), double(N))
172 |     if(form_dif == "non-uniform"){
173 |       s_eta <- cbind(s_eta, vapply(1:p0, function(i) rowSums(comp_a * outer(x0[, i], theta_ls) * temp2_beta), double(N)))
174 |     }
175 |     s <- sweep(rbind(s_alpha, t(s_eta), s_beta), 2, rowSums(Lik * pik), FUN = "/")
176 |   } else{
177 |     s <- sweep(rbind(s_alpha, s_beta), 2, rowSums(Lik * pik), FUN = "/")
178 |   }
179 | }
180 | 
181 | 


--------------------------------------------------------------------------------
/R/utils_ltm.R:
--------------------------------------------------------------------------------
 1 | # check if a vector is dichotomous
 2 | invalid_ltm <- function(x) max(x, na.rm = TRUE) != 1
 3 | 
 4 | glm_fit <- function(x, y, weights, tol = 1e-16, ...){
 5 |     glm.fit(x[weights>tol, , drop = FALSE], y[weights>tol], weights = weights[weights>tol], ...)
 6 | }
 7 | 
 8 | # log likelihood function (return N * J matrix) y: N*J data frame alpha:
 9 | # length J numeric vector beta: length J numeric vector theta: length N
10 | # numeric vector
11 | loglik_ltm <- function(alpha, beta, theta) {
12 |     util <- matrix(alpha, N, J, byrow = TRUE) + outer(theta, beta)
13 |     log(exp(as.matrix(y) * util)/(1 + exp(util)))
14 | }
15 | 
16 | # posterior of theta (unnormalized) (returns N-vector) y: N*J data frame
17 | # x: N*p model matrix z: N*q model matrix alpha: length J list beta:
18 | # length J numeric vector gamma: p-vector lambda: q-vector theta_k:
19 | # numeric scalar qw_k numeric scalar
20 | theta_post_ltm <- function(theta_k, qw_k) {
21 |     N <- nrow(y)
22 |     wt_k <- dnorm(theta_k - fitted_mean, sd = sqrt(fitted_var)) * qw_k  # prior density * quadrature weight
23 |     loglik <- rowSums(loglik_ltm(alpha, beta, rep(theta_k, N)), na.rm = TRUE)
24 |     logPop <- log(wt_k)
25 |     exp(loglik + logPop)
26 | }
27 | 
28 | # pseudo tabulated data for item J (returns K*2 matrix) y_j: N-vector w:
29 | # K*N matrix
30 | dummy_fun_ltm <- function(y_j) {
31 |     dummy_mat <- outer(y_j, c(0, 1), "==")  # N*H_j matrix
32 |     dummy_mat[is.na(dummy_mat)] <- 0
33 |     w %*% dummy_mat
34 | }
35 | 
36 | # pseudo tabulated data to pseudo data frame tab: K*2 matrix theta_ls:
37 | # K-vector
38 | tab2df_ltm <- function(tab, theta_ls) {
39 |     theta <- rep(theta_ls, 2)
40 |     y <- rep(c(0, 1), each = K)
41 |     data.frame(y = factor(y), x = theta, wt = as.double(tab))
42 | }
43 | 
44 | # derivative of likelihood wrt alpha, given theta_k
45 | dalpha_ltm <- function(alpha, beta) {
46 |     putil <- plogis(matrix(alpha, K, J, byrow = TRUE) + outer(theta_ls, beta))
47 |     putil * (1 - putil)
48 | }
49 | 
50 | # score function of alpha and beta (return a N*2 matrix) Lik: N*K matrix
51 | # pik: N*K matrix alpha: J-vector beta: J-vector theta_ls: K-vector
52 | sj_ab_ltm <- function(j) {
53 |     tmp_mat <- (pik * Lik/vapply(Lijk, `[`, 1:N, j, FUN.VALUE = double(N)))  # N*K matrix
54 |     dalpha_j <- dalpha[, j, drop = FALSE]  # K*1 matrix
55 |     dbeta_j <- dalpha_j * theta_ls  # K*1 matrix
56 |     sgn <- .subset2(y, j) * 2 - 1
57 |     sgn[is.na(sgn)] <- 0  # N-vector (1, -1, or 0)
58 |     drv_alpha <- sgn * (tmp_mat %*% dalpha_j)/Li
59 |     drv_beta <- sgn * (tmp_mat %*% dbeta_j)/Li
60 |     cbind(drv_alpha, drv_beta)
61 | }
62 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # hIRT: hierarchical item response theory (IRT) models
 17 | 
 18 | hIRT is an R package that implements a class of hierarchical item response theory (IRT) models where both the mean and the variance of the latent "ability parameters" may depend on observed covariates. The current implementation includes both the two-parameter latent trait model for binary data (`hltm()` and `hltm2()`) and the graded response model for ordinal data (`hgrm()` and `hgrm2()`). Both are fitted via the Expectation-Maximization (EM) algorithm. Asymptotic standard errors are derived from the observed information matrix. 
 19 | 
 20 | **Main Reference**: Zhou, Xiang. 2019. "Hierarchical Item Response Models for Analyzing Public Opinion." Political Analysis, 27(4): 481-502. Available at: <https://doi.org/10.1017/pan.2018.63>
 21 | 
 22 | Full paper with technical appendix is available at: <https://scholar.harvard.edu/files/xzhou/files/Zhou2019_hIRT.pdf>
 23 | 
 24 | ## Installation
 25 | 
 26 | You can install the released version of hIRT from [CRAN](https://CRAN.R-project.org) with:
 27 | 
 28 | ``` r
 29 | install.packages("hIRT")
 30 | ```
 31 | 
 32 | And the development version from [GitHub](https://github.com/) with:
 33 | 
 34 | ``` r
 35 | # install.packages("devtools")
 36 | devtools::install_github("xiangzhou09/hIRT")
 37 | ```
 38 | ## Example
 39 | 
 40 | The following example illustrates how the `hgrm()` function can be used to examine the effects of education and party affiliation on economic ideology, a latent variable gauged by a number of survey items in the American National Election Studies (ANES), 2008. Documentation of the dataset `nes_econ2008` can be accessed by running `?nes_econ2008` in R after loading the `hIRT` package.
 41 | 
 42 | ```{r example}
 43 | library(hIRT)
 44 | 
 45 | # survey items used to measure economic ideology
 46 | y <- nes_econ2008[, -(1:3)]
 47 | 
 48 | # predictors for the mean of economic ideology
 49 | x <- model.matrix( ~ party * educ, nes_econ2008)
 50 | 
 51 | # predictors for the variance of economic ideology
 52 | z <- model.matrix( ~ party, nes_econ2008)
 53 | 
 54 | # fitting a hierarhical graded response model
 55 | nes_m1 <- hgrm(y, x, z)
 56 | 
 57 | nes_m1
 58 | ```
 59 | 
 60 | The output from `hgrm` is an object of class `hIRT`. The `print()` method for `hIRT` outputs the regression tables for the mean regression and the variance regression.
 61 | 
 62 | ## Extracting coefficients
 63 | 
 64 | The `coef_item()`, `coef_mean()`, and `coef_var()` functions can be used to extract coefficient tables for item parameters, the mean regression, and the variance regression respectively. 
 65 | 
 66 | 
 67 | ```{r coef}
 68 | coef_item(nes_m1)
 69 | 
 70 | coef_mean(nes_m1)
 71 | 
 72 | coef_var(nes_m1)
 73 | ```
 74 | 
 75 | ## Latent scores
 76 | 
 77 | The `latent_scores()` function can be used to extract the Expected A Posteriori (EAP) estimates of the latent ability parameters, along with their "prior" estimates (without the random effects). In this example, the latent ability estimates can be interpreted as the estimated ideological positions of ANES respondents on economic issues.
 78 | 
 79 | ```{r latent}
 80 | 
 81 | pref <- latent_scores(nes_m1)
 82 | 
 83 | summary(pref)
 84 | ```
 85 | 
 86 | ## Identification constraints.
 87 | 
 88 | The `constr` parameter in the `hgrm()` and `hltm()` function can be used to specify the type of constraints used to identify the model. The default option, `"latent_scale"`, constrains the mean of the latent ability parameters to zero and the geometric mean of their prior variance to one; Alternatively, `"items"` sets the mean of the item difficulty parameters to zero and the geometric mean of the discrimination parameters to one.
 89 | 
 90 | In practice, one may want to interpret the effects of the mean predictors (in the above example, education and party affiliation) on the standard deviation scale of the latent trait. This can be easily achieved through rescaling their point estimates and standard errors.
 91 | 
 92 | ```{r constr, message=FALSE}
 93 | 
 94 | library(dplyr)
 95 | 
 96 | total_sd <- sqrt(var(pref$post_mean) + mean(pref$post_sd^2))
 97 | 
 98 | coef_mean_sd_scale <- coef_mean(nes_m1) %>%
 99 |   tibble::rownames_to_column("term") %>%
100 |   mutate(`Estimate` = `Estimate`/total_sd,
101 |          `Std_Error` = `Std_Error`/total_sd)
102 | 
103 | coef_mean_sd_scale
104 | 
105 | ```
106 | 
107 | ## hIRT with fixed item parameters
108 | 
109 | Sometimes, the researcher might want to fit the hIRT models using a set of fixed item parameters, for example, to make results comparable across different studies. The `hgrm2()` and `hltm2()` functions can be used for this purpose. They are illustrated in more detail in the package documentation.
110 | 
111 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # hIRT: hierarchical item response theory (IRT) models
  5 | 
  6 | hIRT is an R package that implements a class of hierarchical item
  7 | response theory (IRT) models where both the mean and the variance of the
  8 | latent “ability parameters” may depend on observed covariates. The
  9 | current implementation includes both the two-parameter latent trait
 10 | model for binary data (`hltm()` and `hltm2()`) and the graded response
 11 | model for ordinal data (`hgrm()` and `hgrm2()`). Both are fitted via the
 12 | Expectation-Maximization (EM) algorithm. Asymptotic standard errors are
 13 | derived from the observed information matrix.
 14 | 
 15 | **Main Reference**: Zhou, Xiang. 2019. “Hierarchical Item Response
 16 | Models for Analyzing Public Opinion.” Political Analysis, 27(4):
 17 | 481-502. Available at: <https://doi.org/10.1017/pan.2018.63>
 18 | 
 19 | Full paper with technical appendix is available at:
 20 | <https://scholar.harvard.edu/files/xzhou/files/Zhou2019_hIRT.pdf>
 21 | 
 22 | ## Installation
 23 | 
 24 | You can install the released version of hIRT from
 25 | [CRAN](https://CRAN.R-project.org) with:
 26 | 
 27 | ``` r
 28 | install.packages("hIRT")
 29 | ```
 30 | 
 31 | And the development version from [GitHub](https://github.com/) with:
 32 | 
 33 | ``` r
 34 | # install.packages("devtools")
 35 | devtools::install_github("xiangzhou09/hIRT")
 36 | ```
 37 | 
 38 | ## Example
 39 | 
 40 | The following example illustrates how the `hgrm()` function can be used
 41 | to examine the effects of education and party affiliation on economic
 42 | ideology, a latent variable gauged by a number of survey items in the
 43 | American National Election Studies (ANES), 2008. Documentation of the
 44 | dataset `nes_econ2008` can be accessed by running `?nes_econ2008` in R
 45 | after loading the `hIRT` package.
 46 | 
 47 | ``` r
 48 | library(hIRT)
 49 | #> Registered S3 method overwritten by 'pryr':
 50 | #>   method      from
 51 | #>   print.bytes Rcpp
 52 | 
 53 | # survey items used to measure economic ideology
 54 | y <- nes_econ2008[, -(1:3)]
 55 | 
 56 | # predictors for the mean of economic ideology
 57 | x <- model.matrix( ~ party * educ, nes_econ2008)
 58 | 
 59 | # predictors for the variance of economic ideology
 60 | z <- model.matrix( ~ party, nes_econ2008)
 61 | 
 62 | # fitting a hierarhical graded response model
 63 | nes_m1 <- hgrm(y, x, z)
 64 | #> ............
 65 | #>  converged at iteration 12
 66 | 
 67 | nes_m1
 68 | #> 
 69 | #> Call:
 70 | #> hgrm(y = y, x = x, z = z)
 71 | #> 
 72 | #> Mean Regression:
 73 | #>                         Estimate Std_Error z_value p_value
 74 | #> x(Intercept)              -0.480     0.105  -4.570   0.000
 75 | #> xpartyindependent          0.386     0.086   4.473   0.000
 76 | #> xpartyRepublican           1.133     0.135   8.408   0.000
 77 | #> xeduc2                     0.037     0.079   0.467   0.641
 78 | #> xpartyindependent:educ2    0.235     0.117   2.007   0.045
 79 | #> xpartyRepublican:educ2     0.428     0.148   2.886   0.004
 80 | #> 
 81 | #> Variance Regression:
 82 | #>                   Estimate Std_Error z_value p_value
 83 | #> z(Intercept)        -0.097     0.139  -0.697   0.486
 84 | #> zpartyindependent    0.166     0.100   1.661   0.097
 85 | #> zpartyRepublican     0.172     0.126   1.373   0.170
 86 | #> 
 87 | #> Log Likelihood: -16259.16
 88 | ```
 89 | 
 90 | The output from `hgrm` is an object of class `hIRT`. The `print()`
 91 | method for `hIRT` outputs the regression tables for the mean regression
 92 | and the variance regression.
 93 | 
 94 | ## Extracting coefficients
 95 | 
 96 | The `coef_item()`, `coef_mean()`, and `coef_var()` functions can be used
 97 | to extract coefficient tables for item parameters, the mean regression,
 98 | and the variance regression respectively.
 99 | 
100 | ``` r
101 | coef_item(nes_m1)
102 | #> $health_ins7
103 | #>        Estimate Std_Error z_value p_value
104 | #> y>=2      1.279        NA      NA      NA
105 | #> y>=3      0.541     0.063   8.542   0.000
106 | #> y>=4     -0.075     0.083  -0.898   0.369
107 | #> y>=5     -1.047     0.107  -9.826   0.000
108 | #> y>=6     -1.852     0.124 -14.901   0.000
109 | #> y>=7     -2.684     0.149 -17.990   0.000
110 | #> Dscrmn    1.016     0.096  10.569   0.000
111 | #> 
112 | #> $jobs_guar7
113 | #>        Estimate Std_Error z_value p_value
114 | #> y>=2      2.136     0.173  12.377       0
115 | #> y>=3      1.352     0.153   8.860       0
116 | #> y>=4      0.607     0.141   4.299       0
117 | #> y>=5     -0.520     0.137  -3.797       0
118 | #> y>=6     -1.611     0.141 -11.429       0
119 | #> y>=7     -2.785     0.163 -17.043       0
120 | #> Dscrmn    1.305     0.114  11.448       0
121 | #> 
122 | #> $gov_services7
123 | #>        Estimate Std_Error z_value p_value
124 | #> y>=2      3.950     0.222  17.760   0.000
125 | #> y>=3      2.859     0.182  15.707   0.000
126 | #> y>=4      1.831     0.158  11.592   0.000
127 | #> y>=5      0.247     0.147   1.679   0.093
128 | #> y>=6     -1.001     0.154  -6.490   0.000
129 | #> y>=7     -2.020     0.169 -11.947   0.000
130 | #> Dscrmn   -1.363     0.116 -11.715   0.000
131 | #> 
132 | #> $FS_poor3
133 | #>        Estimate Std_Error z_value p_value
134 | #> y>=2     -1.180     0.179  -6.601       0
135 | #> y>=3     -4.459     0.243 -18.357       0
136 | #> Dscrmn    1.918     0.164  11.679       0
137 | #> 
138 | #> $FS_childcare3
139 | #>        Estimate Std_Error z_value p_value
140 | #> y>=2     -0.808     0.148  -5.474       0
141 | #> y>=3     -4.051     0.192 -21.132       0
142 | #> Dscrmn    1.608     0.128  12.535       0
143 | #> 
144 | #> $FS_crime3
145 | #>        Estimate Std_Error z_value p_value
146 | #> y>=2     -0.845     0.066 -12.866       0
147 | #> y>=3     -3.150     0.108 -29.048       0
148 | #> Dscrmn    0.516     0.059   8.823       0
149 | #> 
150 | #> $FS_publicschools3
151 | #>        Estimate Std_Error z_value p_value
152 | #> y>=2     -1.790     0.136 -13.197       0
153 | #> y>=3     -4.144     0.188 -22.022       0
154 | #> Dscrmn    1.302     0.111  11.751       0
155 | #> 
156 | #> $FS_welfare3
157 | #>        Estimate Std_Error z_value p_value
158 | #> y>=2      1.054     0.117   8.970       0
159 | #> y>=3     -1.355     0.116 -11.650       0
160 | #> Dscrmn    1.178     0.099  11.937       0
161 | #> 
162 | #> $FS_envir3
163 | #>        Estimate Std_Error z_value p_value
164 | #> y>=2     -0.855     0.106  -8.071       0
165 | #> y>=3     -3.499     0.159 -22.023       0
166 | #> Dscrmn    1.101     0.092  11.953       0
167 | #> 
168 | #> $FS_socsec3
169 | #>        Estimate Std_Error z_value p_value
170 | #> y>=2     -1.091     0.104 -10.535       0
171 | #> y>=3     -4.278     0.178 -24.033       0
172 | #> Dscrmn    1.028        NA      NA      NA
173 | 
174 | coef_mean(nes_m1)
175 | #>                         Estimate Std_Error z_value p_value
176 | #> x(Intercept)              -0.480     0.105  -4.570   0.000
177 | #> xpartyindependent          0.386     0.086   4.473   0.000
178 | #> xpartyRepublican           1.133     0.135   8.408   0.000
179 | #> xeduc2                     0.037     0.079   0.467   0.641
180 | #> xpartyindependent:educ2    0.235     0.117   2.007   0.045
181 | #> xpartyRepublican:educ2     0.428     0.148   2.886   0.004
182 | 
183 | coef_var(nes_m1)
184 | #>                   Estimate Std_Error z_value p_value
185 | #> z(Intercept)        -0.097     0.139  -0.697   0.486
186 | #> zpartyindependent    0.166     0.100   1.661   0.097
187 | #> zpartyRepublican     0.172     0.126   1.373   0.170
188 | ```
189 | 
190 | ## Latent scores
191 | 
192 | The `latent_scores()` function can be used to extract the Expected A
193 | Posteriori (EAP) estimates of the latent ability parameters, along with
194 | their “prior” estimates (without the random effects). In this example,
195 | the latent ability estimates can be interpreted as the estimated
196 | ideological positions of ANES respondents on economic issues.
197 | 
198 | ``` r
199 | 
200 | pref <- latent_scores(nes_m1)
201 | 
202 | summary(pref)
203 | #>    post_mean            post_sd         prior_mean            prior_sd    
204 | #>  Min.   :-2.082000   Min.   :0.3940   Min.   :-0.4800000   Min.   :0.953  
205 | #>  1st Qu.:-0.751000   1st Qu.:0.4788   1st Qu.:-0.4440000   1st Qu.:0.953  
206 | #>  Median :-0.104000   Median :0.5280   Median :-0.0950000   Median :1.035  
207 | #>  Mean   :-0.000147   Mean   :0.5469   Mean   :-0.0001561   Mean   :1.001  
208 | #>  3rd Qu.: 0.629500   3rd Qu.:0.6090   3rd Qu.: 0.1770000   3rd Qu.:1.035  
209 | #>  Max.   : 3.359000   Max.   :0.9780   Max.   : 1.1170000   Max.   :1.039
210 | ```
211 | 
212 | ## Identification constraints.
213 | 
214 | The `constr` parameter in the `hgrm()` and `hltm()` function can be used
215 | to specify the type of constraints used to identify the model. The
216 | default option, `"latent_scale"`, constrains the mean of the latent
217 | ability parameters to zero and the geometric mean of their prior
218 | variance to one; Alternatively, `"items"` sets the mean of the item
219 | difficulty parameters to zero and the geometric mean of the
220 | discrimination parameters to one.
221 | 
222 | In practice, one may want to interpret the effects of the mean
223 | predictors (in the above example, education and party affiliation) on
224 | the standard deviation scale of the latent trait. This can be easily
225 | achieved through rescaling their point estimates and standard errors.
226 | 
227 | ``` r
228 | 
229 | library(dplyr)
230 | 
231 | total_sd <- sqrt(var(pref$post_mean) + mean(pref$post_sd^2))
232 | 
233 | coef_mean_sd_scale <- coef_mean(nes_m1) %>%
234 |   tibble::rownames_to_column("term") %>%
235 |   mutate(`Estimate` = `Estimate`/total_sd,
236 |          `Std_Error` = `Std_Error`/total_sd)
237 | 
238 | coef_mean_sd_scale
239 | #>                      term    Estimate  Std_Error z_value p_value
240 | #> 1            x(Intercept) -0.42437486 0.09283200  -4.570   0.000
241 | #> 2       xpartyindependent  0.34126812 0.07603383   4.473   0.000
242 | #> 3        xpartyRepublican  1.00170150 0.11935543   8.408   0.000
243 | #> 4                  xeduc2  0.03271223 0.06984503   0.467   0.641
244 | #> 5 xpartyindependent:educ2  0.20776686 0.10344137   2.007   0.045
245 | #> 6  xpartyRepublican:educ2  0.37840092 0.13084892   2.886   0.004
246 | ```
247 | 
248 | ## hIRT with fixed item parameters
249 | 
250 | Sometimes, the researcher might want to fit the hIRT models using a set
251 | of fixed item parameters, for example, to make results comparable across
252 | different studies. The `hgrm2()` and `hltm2()` functions can be used for
253 | this purpose. They are illustrated in more detail in the package
254 | documentation.
255 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | * ubuntu 12.04 (on travis-ci), R 3.4.1
 3 | * win-builder (devel and release)
 4 | 
 5 | ## R CMD check results
 6 | 
 7 | 0 errors | 0 warnings | 0 notes
 8 | 
 9 | ## Downstream dependencies
10 | 
11 | There are currently no downstream dependencies for this package.
12 | 
13 | ---
14 | 


--------------------------------------------------------------------------------
/data/nes_econ2008.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiangzhou09/hIRT/b33d90f13287f60f7feffd0c061858340dff40a6/data/nes_econ2008.rda


--------------------------------------------------------------------------------
/hIRT.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/man/coef_item.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/coef.R
 3 | \name{coef_item}
 4 | \alias{coef_item}
 5 | \alias{coef_mean}
 6 | \alias{coef_var}
 7 | \title{Parameter Estimates from Hierarchical IRT Models.}
 8 | \usage{
 9 | coef_item(x, by_item = TRUE, digits = 3)
10 | 
11 | coef_mean(x, digits = 3)
12 | 
13 | coef_var(x, digits = 3)
14 | }
15 | \arguments{
16 | \item{x}{An object of class \code{hIRT}}
17 | 
18 | \item{by_item}{Logical. Should item parameters be stored item by item
19 | (if \code{TRUE}) or put together in a data frame (if \code{FALSE})?}
20 | 
21 | \item{digits}{The number of significant digits to use when printing}
22 | }
23 | \value{
24 | Parameter estimates, standard errors, z values, and p values
25 |   organized as a data frame (if \code{by_item = TRUE}) or a list (if \code{
26 |   by_item = FALSE}).
27 | }
28 | \description{
29 | Parameter estimates from either \code{hltm} or \code{hgrm} or \code{hgrmDIF} models. \code{code_item}
30 | reports estimates of item parameters. \code{coef_mean} reports results for the mean equation.
31 | \code{coef_var} reports results for the variance equation.
32 | }
33 | \examples{
34 | y <- nes_econ2008[, -(1:3)]
35 | x <- model.matrix( ~ party * educ, nes_econ2008)
36 | z <- model.matrix( ~ party, nes_econ2008)
37 | nes_m1 <- hgrm(y, x, z)
38 | coef_item(nes_m1)
39 | coef_mean(nes_m1)
40 | coef_var(nes_m1)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/figures/README-pressure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiangzhou09/hIRT/b33d90f13287f60f7feffd0c061858340dff40a6/man/figures/README-pressure-1.png


--------------------------------------------------------------------------------
/man/hgrm.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hgrm.R
  3 | \name{hgrm}
  4 | \alias{hgrm}
  5 | \title{Fitting Hierarchical Graded Response Models (for Ordinal Responses)}
  6 | \usage{
  7 | hgrm(
  8 |   y,
  9 |   x = NULL,
 10 |   z = NULL,
 11 |   constr = c("latent_scale", "items"),
 12 |   beta_set = 1L,
 13 |   sign_set = TRUE,
 14 |   init = c("naive", "glm", "irt"),
 15 |   control = list()
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{y}{A data frame or matrix of item responses.}
 20 | 
 21 | \item{x}{An optional model matrix, including the intercept term, that predicts the
 22 | mean of the latent preference. If not supplied, only the intercept term is included.}
 23 | 
 24 | \item{z}{An optional model matrix, including the intercept term, that predicts the
 25 | variance of the latent preference. If not supplied, only the intercept term is included.}
 26 | 
 27 | \item{constr}{The type of constraints used to identify the model: "latent_scale",
 28 | or "items". The default, "latent_scale" constrains the mean of latent preferences
 29 | to zero and the geometric mean of prior variance to one; "items" places constraints
 30 | on item parameters instead and sets the mean of item difficulty parameters to zero
 31 | and the geometric mean of the discrimination parameters to one.}
 32 | 
 33 | \item{beta_set}{The index of the item for which the discrimination parameter is
 34 | restricted to be positive (or negative). It may take any integer value from
 35 | 1 to \code{ncol(y)}.}
 36 | 
 37 | \item{sign_set}{Logical. Should the discrimination parameter of
 38 | the corresponding item (indexed by \code{beta_set}) be positive
 39 | (if \code{TRUE}) or negative (if \code{FALSE})?}
 40 | 
 41 | \item{init}{A character string indicating how item parameters are initialized. It can be
 42 | "naive", "glm", or "irt".}
 43 | 
 44 | \item{control}{A list of control values
 45 | \describe{
 46 |  \item{max_iter}{The maximum number of iterations of the EM algorithm.
 47 |    The default is 150.}
 48 |  \item{eps}{Tolerance parameter used to determine convergence of the
 49 |   EM algorithm. Specifically, iterations continue until the Euclidean
 50 |   distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps},
 51 |   where \eqn{\beta} is the vector of item discrimination parameters.
 52 |   \code{eps}=1e-4 by default.}
 53 |  \item{max_iter2}{The maximum number of iterations of the conditional
 54 |    maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}.
 55 |    The default is 15.}
 56 |  \item{eps2}{Tolerance parameter used to determine convergence of the
 57 |    conditional maximization procedures for updating \eqn{\gamma} and
 58 |    \eqn{\lambda}. Specifically, iterations continue until the Euclidean
 59 |   distance between two consecutive log likelihoods falls under \code{eps2}.
 60 |   \code{eps2}=1e-3 by default.}
 61 |  \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.}
 62 |  \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.}
 63 | }}
 64 | }
 65 | \value{
 66 | An object of class \code{hgrm}.
 67 |  \item{coefficients}{A data frame of parameter estimates, standard errors,
 68 |   z values and p values.}
 69 |  \item{scores}{A data frame of EAP estimates of latent preferences and
 70 |   their approximate standard errors.}
 71 |  \item{vcov}{Variance-covariance matrix of parameter estimates.}
 72 |  \item{log_Lik}{The log-likelihood value at convergence.}
 73 |  \item{N}{Number of units.}
 74 |  \item{J}{Number of items.}
 75 |  \item{H}{A vector denoting the number of response categories for each item.}
 76 |  \item{ylevels}{A list showing the levels of the factorized response categories.}
 77 |  \item{p}{The number of predictors for the mean equation.}
 78 |  \item{q}{The number of predictors for the variance equation.}
 79 |  \item{control}{List of control values.}
 80 |  \item{call}{The matched call.}
 81 | }
 82 | \description{
 83 | \code{hgrm} fits a hierarchical graded response model in which both
 84 | the mean and the variance of the latent preference (ability parameter)
 85 | may depend on person-specific covariates (\code{x} and \code{z}).
 86 | Specifically, the mean is specified as a linear combination of \code{x}
 87 | and the log of the variance is specified as a linear combination of
 88 | \code{z}. Nonresponses are treated as missing at random.
 89 | }
 90 | \examples{
 91 | y <- nes_econ2008[, -(1:3)]
 92 | x <- model.matrix( ~ party * educ, nes_econ2008)
 93 | z <- model.matrix( ~ party, nes_econ2008)
 94 | nes_m1 <- hgrm(y, x, z)
 95 | nes_m1
 96 | }
 97 | \references{
 98 | Zhou, Xiang. 2019. "\href{https://doi.org/10.1017/pan.2018.63}{Hierarchical Item Response Models for Analyzing Public Opinion.}" Political Analysis.
 99 | }
100 | 


--------------------------------------------------------------------------------
/man/hgrm2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hgrm2.R
 3 | \name{hgrm2}
 4 | \alias{hgrm2}
 5 | \title{Hierarchical Graded Response Models with Known Item Parameters}
 6 | \usage{
 7 | hgrm2(y, x = NULL, z = NULL, item_coefs, control = list())
 8 | }
 9 | \arguments{
10 | \item{y}{A data frame or matrix of item responses.}
11 | 
12 | \item{x}{An optional model matrix, including the intercept term, that predicts the
13 | mean of the latent preference. If not supplied, only the intercept term is included.}
14 | 
15 | \item{z}{An optional model matrix, including the intercept term, that predicts the
16 | variance of the latent preference. If not supplied, only the intercept term is included.}
17 | 
18 | \item{item_coefs}{A list of known item parameters. The parameters of item \eqn{j} are given
19 | by the \eqn{j}th element, which should be a vector of length \eqn{H_j}, containing
20 | \eqn{H_j - 1} item difficulty parameters (in descending order) and one item discrimination
21 | parameter.}
22 | 
23 | \item{control}{A list of control values
24 | \describe{
25 |  \item{max_iter}{The maximum number of iterations of the EM algorithm.
26 |    The default is 150.}
27 |  \item{eps}{Tolerance parameter used to determine convergence of the
28 |   EM algorithm. Specifically, iterations continue until the Euclidean
29 |   distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps},
30 |   where \eqn{\beta} is the vector of item discrimination parameters.
31 |   \code{eps}=1e-4 by default.}
32 |  \item{max_iter2}{The maximum number of iterations of the conditional
33 |    maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}.
34 |    The default is 15.}
35 |  \item{eps2}{Tolerance parameter used to determine convergence of the
36 |    conditional maximization procedures for updating \eqn{\gamma} and
37 |    \eqn{\lambda}. Specifically, iterations continue until the Euclidean
38 |   distance between two consecutive log likelihoods falls under \code{eps2}.
39 |   \code{eps2}=1e-3 by default.}
40 |  \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.}
41 |  \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.}
42 | }}
43 | }
44 | \value{
45 | An object of class \code{hgrm}.
46 |  \item{coefficients}{A data frame of parameter estimates, standard errors,
47 |   z values and p values.}
48 |  \item{scores}{A data frame of EAP estimates of latent preferences and
49 |   their approximate standard errors.}
50 |  \item{vcov}{Variance-covariance matrix of parameter estimates.}
51 |  \item{log_Lik}{The log-likelihood value at convergence.}
52 |  \item{N}{Number of units.}
53 |  \item{J}{Number of items.}
54 |  \item{H}{A vector denoting the number of response categories for each item.}
55 |  \item{ylevels}{A list showing the levels of the factorized response categories.}
56 |  \item{p}{The number of predictors for the mean equation.}
57 |  \item{q}{The number of predictors for the variance equation.}
58 |  \item{control}{List of control values.}
59 |  \item{call}{The matched call.}
60 | }
61 | \description{
62 | \code{hgrm2} fits a hierarchical graded response model where the item parameters
63 |   are known and supplied by the user.
64 | }
65 | \examples{
66 | 
67 | y <- nes_econ2008[, -(1:3)]
68 | x <- model.matrix( ~ party * educ, nes_econ2008)
69 | z <- model.matrix( ~ party, nes_econ2008)
70 | 
71 | n <- nrow(nes_econ2008)
72 | id_train <- sample.int(n, n/4)
73 | id_test <- setdiff(1:n, id_train)
74 | 
75 | y_train <- y[id_train, ]
76 | x_train <- x[id_train, ]
77 | z_train <- z[id_train, ]
78 | 
79 | mod_train <- hgrm(y_train, x_train, z_train)
80 | 
81 | y_test <- y[id_test, ]
82 | x_test <- x[id_test, ]
83 | z_test <- z[id_test, ]
84 | 
85 | item_coefs <- lapply(coef_item(mod_train), `[[`, "Estimate")
86 | 
87 | model_test <- hgrm2(y_test, x_test, z_test, item_coefs = item_coefs)
88 | }
89 | 


--------------------------------------------------------------------------------
/man/hgrmDIF.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hgrmDIF.R
  3 | \name{hgrmDIF}
  4 | \alias{hgrmDIF}
  5 | \title{Hierarchical Graded Response Models with Differential Item Functioning}
  6 | \usage{
  7 | hgrmDIF(
  8 |   y,
  9 |   x = NULL,
 10 |   z = NULL,
 11 |   x0 = x[, -1, drop = FALSE],
 12 |   items_dif = 1L,
 13 |   form_dif = c("uniform", "non-uniform"),
 14 |   constr = c("latent_scale"),
 15 |   beta_set = 1L,
 16 |   sign_set = TRUE,
 17 |   init = c("naive", "glm", "irt"),
 18 |   control = list()
 19 | )
 20 | }
 21 | \arguments{
 22 | \item{y}{A data frame or matrix of item responses.}
 23 | 
 24 | \item{x}{An optional model matrix, including the intercept term, that predicts the
 25 | mean of the latent preference. If not supplied, only the intercept term is included.}
 26 | 
 27 | \item{z}{An optional model matrix, including the intercept term, that predicts the
 28 | variance of the latent preference. If not supplied, only the intercept term is included.}
 29 | 
 30 | \item{x0}{A matrix specifying the covariates by which differential item functioning operates. If not supplied,
 31 | \code{x0} is taken to be a matrix containing all predictors in \code{x} except the intercept.}
 32 | 
 33 | \item{items_dif}{The indices of the items for which differential item functioning is tested.}
 34 | 
 35 | \item{form_dif}{Form of differential item functioning being tested. Either "uniform" or "non-uniform."}
 36 | 
 37 | \item{constr}{The type of constraints used to identify the model: "latent_scale",
 38 | or "items". The default, "latent_scale" constrains the mean of latent preferences
 39 | to zero and the geometric mean of prior variance to one; "items" places constraints
 40 | on item parameters instead and sets the mean of item difficulty parameters to zero
 41 | and the geometric mean of the discrimination parameters to one. Currently, only "latent_scale"
 42 | is supported in hgrmDIF().}
 43 | 
 44 | \item{beta_set}{The index of the item for which the discrimination parameter is
 45 | restricted to be positive (or negative). It may take any integer value from
 46 | 1 to \code{ncol(y)}.}
 47 | 
 48 | \item{sign_set}{Logical. Should the discrimination parameter of
 49 | the corresponding item (indexed by \code{beta_set}) be positive
 50 | (if \code{TRUE}) or negative (if \code{FALSE})?}
 51 | 
 52 | \item{init}{A character string indicating how item parameters are initialized. It can be
 53 | "naive", "glm", or "irt".}
 54 | 
 55 | \item{control}{A list of control values
 56 | \describe{
 57 |  \item{max_iter}{The maximum number of iterations of the EM algorithm.
 58 |    The default is 150.}
 59 |  \item{eps}{Tolerance parameter used to determine convergence of the
 60 |   EM algorithm. Specifically, iterations continue until the Euclidean
 61 |   distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps},
 62 |   where \eqn{\beta} is the vector of item discrimination parameters.
 63 |   \code{eps}=1e-4 by default.}
 64 |  \item{max_iter2}{The maximum number of iterations of the conditional
 65 |    maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}.
 66 |    The default is 15.}
 67 |  \item{eps2}{Tolerance parameter used to determine convergence of the
 68 |    conditional maximization procedures for updating \eqn{\gamma} and
 69 |    \eqn{\lambda}. Specifically, iterations continue until the Euclidean
 70 |   distance between two consecutive log likelihoods falls under \code{eps2}.
 71 |   \code{eps2}=1e-3 by default.}
 72 |  \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.}
 73 |  \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.}
 74 | }}
 75 | }
 76 | \value{
 77 | An object of class \code{hgrm}.
 78 |  \item{coefficients}{A data frame of parameter estimates, standard errors,
 79 |   z values and p values.}
 80 |  \item{scores}{A data frame of EAP estimates of latent preferences and
 81 |   their approximate standard errors.}
 82 |  \item{vcov}{Variance-covariance matrix of parameter estimates.}
 83 |  \item{log_Lik}{The log-likelihood value at convergence.}
 84 |  \item{N}{Number of units.}
 85 |  \item{J}{Number of items.}
 86 |  \item{H}{A vector denoting the number of response categories for each item.}
 87 |  \item{ylevels}{A list showing the levels of the factorized response categories.}
 88 |  \item{p}{The number of predictors for the mean equation.}
 89 |  \item{q}{The number of predictors for the variance equation.}
 90 |  \item{p0}{The number of predictors for items with DIF.}
 91 |  \item{coef_item}{Item coefficient estimates.}
 92 |  \item{control}{List of control values.}
 93 |  \item{call}{The matched call.}
 94 | }
 95 | \description{
 96 | \code{hgrmDIF} fits a hierarchical graded response model similar to hgrm(), but person-specific
 97 | covariates \code{x} are allowed to affect item responses directly (not via the latent preference).
 98 | This model can be used to test for the presence of differential item functioning.
 99 | }
100 | \examples{
101 | y <- nes_econ2008[, -(1:3)]
102 | x <- model.matrix( ~ party * educ, nes_econ2008)
103 | nes_m2 <- hgrmDIF(y, x, items_dif = 1:2)
104 | coef_item(nes_m2)
105 | }
106 | 


--------------------------------------------------------------------------------
/man/hltm.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hltm.R
  3 | \name{hltm}
  4 | \alias{hltm}
  5 | \title{Fitting Hierarchical Latent Trait Models (for Binary Responses).}
  6 | \usage{
  7 | hltm(
  8 |   y,
  9 |   x = NULL,
 10 |   z = NULL,
 11 |   constr = c("latent_scale", "items"),
 12 |   beta_set = 1L,
 13 |   sign_set = TRUE,
 14 |   init = c("naive", "glm", "irt"),
 15 |   control = list()
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{y}{A data frame or matrix of item responses.}
 20 | 
 21 | \item{x}{An optional model matrix, including the intercept term, that predicts the
 22 | mean of the latent preference. If not supplied, only the intercept term is included.}
 23 | 
 24 | \item{z}{An optional model matrix, including the intercept term, that predicts the
 25 | variance of the latent preference. If not supplied, only the intercept term is included.}
 26 | 
 27 | \item{constr}{The type of constraints used to identify the model: "latent_scale",
 28 | or "items". The default, "latent_scale" constrains the mean of latent preferences
 29 | to zero and the geometric mean of prior variance to one; "items" places constraints
 30 | on item parameters instead and sets the mean of item difficulty parameters to zero
 31 | and the geometric mean of the discrimination parameters to one.}
 32 | 
 33 | \item{beta_set}{The index of the item for which the discrimination parameter is
 34 | restricted to be positive (or negative). It may take any integer value from
 35 | 1 to \code{ncol(y)}.}
 36 | 
 37 | \item{sign_set}{Logical. Should the discrimination parameter of
 38 | the corresponding item (indexed by \code{beta_set}) be positive
 39 | (if \code{TRUE}) or negative (if \code{FALSE})?}
 40 | 
 41 | \item{init}{A character string indicating how item parameters are initialized. It can be
 42 | "naive", "glm", or "irt".}
 43 | 
 44 | \item{control}{A list of control values
 45 | \describe{
 46 |  \item{max_iter}{The maximum number of iterations of the EM algorithm.
 47 |    The default is 150.}
 48 |  \item{eps}{Tolerance parameter used to determine convergence of the
 49 |   EM algorithm. Specifically, iterations continue until the Euclidean
 50 |   distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps},
 51 |   where \eqn{\beta} is the vector of item discrimination parameters.
 52 |   \code{eps}=1e-4 by default.}
 53 |  \item{max_iter2}{The maximum number of iterations of the conditional
 54 |    maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}.
 55 |    The default is 15.}
 56 |  \item{eps2}{Tolerance parameter used to determine convergence of the
 57 |    conditional maximization procedures for updating \eqn{\gamma} and
 58 |    \eqn{\lambda}. Specifically, iterations continue until the Euclidean
 59 |   distance between two consecutive log likelihoods falls under \code{eps2}.
 60 |   \code{eps2}=1e-3 by default.}
 61 |  \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.}
 62 |  \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.}
 63 | }}
 64 | }
 65 | \value{
 66 | An object of class \code{hltm}.
 67 |  \item{coefficients}{A data frame of parameter estimates, standard errors,
 68 |   z values and p values.}
 69 |  \item{scores}{A data frame of EAP estimates of latent preferences and
 70 |   their approximate standard errors.}
 71 |  \item{vcov}{Variance-covariance matrix of parameter estimates.}
 72 |  \item{log_Lik}{The log-likelihood value at convergence.}
 73 |  \item{N}{Number of units.}
 74 |  \item{J}{Number of items.}
 75 |  \item{H}{A vector denoting the number of response categories for each item.}
 76 |  \item{ylevels}{A list showing the levels of the factorized response categories.}
 77 |  \item{p}{The number of predictors for the mean equation.}
 78 |  \item{q}{The number of predictors for the variance equation.}
 79 |  \item{control}{List of control values.}
 80 |  \item{call}{The matched call.}
 81 | }
 82 | \description{
 83 | \code{hltm} fits a hierarchical latent trait model in which both
 84 | the mean and the variance of the latent preference (ability parameter)
 85 | may depend on person-specific covariates (\code{x} and \code{z}).
 86 | Specifically, the mean is specified as a linear combination of \code{x}
 87 | and the log of the variance is specified as a linear combination of
 88 | \code{z}.
 89 | }
 90 | \examples{
 91 | y <- nes_econ2008[, -(1:3)]
 92 | x <- model.matrix( ~ party * educ, nes_econ2008)
 93 | z <- model.matrix( ~ party, nes_econ2008)
 94 | 
 95 | dichotomize <- function(x) findInterval(x, c(mean(x, na.rm = TRUE)))
 96 | y[] <- lapply(y, dichotomize)
 97 | nes_m1 <- hltm(y, x, z)
 98 | nes_m1
 99 | }
100 | \references{
101 | Zhou, Xiang. 2019. "\href{https://doi.org/10.1017/pan.2018.63}{Hierarchical Item Response Models for Analyzing Public Opinion.}" Political Analysis.
102 | }
103 | 


--------------------------------------------------------------------------------
/man/hltm2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hltm2.R
 3 | \name{hltm2}
 4 | \alias{hltm2}
 5 | \title{Hierarchical Latent Trait Models with Known Item Parameters.}
 6 | \usage{
 7 | hltm2(y, x = NULL, z = NULL, item_coefs, control = list())
 8 | }
 9 | \arguments{
10 | \item{y}{A data frame or matrix of item responses.}
11 | 
12 | \item{x}{An optional model matrix, including the intercept term, that predicts the
13 | mean of the latent preference. If not supplied, only the intercept term is included.}
14 | 
15 | \item{z}{An optional model matrix, including the intercept term, that predicts the
16 | variance of the latent preference. If not supplied, only the intercept term is included.}
17 | 
18 | \item{item_coefs}{A list of known item parameters. The parameters of item \eqn{j} are given
19 | by the \eqn{j}th element, which should be a vector of length 2, containing
20 | the item difficulty parameter and item discrimination parameter.}
21 | 
22 | \item{control}{A list of control values
23 | \describe{
24 |  \item{max_iter}{The maximum number of iterations of the EM algorithm.
25 |    The default is 150.}
26 |  \item{eps}{Tolerance parameter used to determine convergence of the
27 |   EM algorithm. Specifically, iterations continue until the Euclidean
28 |   distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps},
29 |   where \eqn{\beta} is the vector of item discrimination parameters.
30 |   \code{eps}=1e-4 by default.}
31 |  \item{max_iter2}{The maximum number of iterations of the conditional
32 |    maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}.
33 |    The default is 15.}
34 |  \item{eps2}{Tolerance parameter used to determine convergence of the
35 |    conditional maximization procedures for updating \eqn{\gamma} and
36 |    \eqn{\lambda}. Specifically, iterations continue until the Euclidean
37 |   distance between two consecutive log likelihoods falls under \code{eps2}.
38 |   \code{eps2}=1e-3 by default.}
39 |  \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.}
40 |  \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.}
41 | }}
42 | }
43 | \value{
44 | An object of class \code{hltm}.
45 |  \item{coefficients}{A data frame of parameter estimates, standard errors,
46 |   z values and p values.}
47 |  \item{scores}{A data frame of EAP estimates of latent preferences and
48 |   their approximate standard errors.}
49 |  \item{vcov}{Variance-covariance matrix of parameter estimates.}
50 |  \item{log_Lik}{The log-likelihood value at convergence.}
51 |  \item{N}{Number of units.}
52 |  \item{J}{Number of items.}
53 |  \item{H}{A vector denoting the number of response categories for each item.}
54 |  \item{ylevels}{A list showing the levels of the factorized response categories.}
55 |  \item{p}{The number of predictors for the mean equation.}
56 |  \item{q}{The number of predictors for the variance equation.}
57 |  \item{control}{List of control values.}
58 |  \item{call}{The matched call.}
59 | }
60 | \description{
61 | \code{hltm2} fits a hierarchical latent trait model where the item parameters
62 |   are known and supplied by the user.
63 | }
64 | \examples{
65 | y <- nes_econ2008[, -(1:3)]
66 | x <- model.matrix( ~ party * educ, nes_econ2008)
67 | z <- model.matrix( ~ party, nes_econ2008)
68 | dichotomize <- function(x) findInterval(x, c(mean(x, na.rm = TRUE)))
69 | y_bin <- y
70 | y_bin[] <- lapply(y, dichotomize)
71 | 
72 | n <- nrow(nes_econ2008)
73 | id_train <- sample.int(n, n/4)
74 | id_test <- setdiff(1:n, id_train)
75 | 
76 | y_bin_train <- y_bin[id_train, ]
77 | x_train <- x[id_train, ]
78 | z_train <- z[id_train, ]
79 | 
80 | mod_train <- hltm(y_bin_train, x_train, z_train)
81 | 
82 | y_bin_test <- y_bin[id_test, ]
83 | x_test <- x[id_test, ]
84 | z_test <- z[id_test, ]
85 | 
86 | item_coefs <- lapply(coef_item(mod_train), `[[`, "Estimate")
87 | 
88 | model_test <- hltm2(y_bin_test, x_test, z_test, item_coefs = item_coefs)
89 | }
90 | 


--------------------------------------------------------------------------------
/man/latent_scores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/latent_scores.R
 3 | \name{latent_scores}
 4 | \alias{latent_scores}
 5 | \title{Estimates of Latent Preferences/Abilities}
 6 | \usage{
 7 | latent_scores(x, digits = 3)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of class \code{hIRT}}
11 | 
12 | \item{digits}{The number of significant digits to use when printing}
13 | }
14 | \value{
15 | A data frame of EAP estimates of latent preferences and their approximate standard errors.
16 | }
17 | \description{
18 | EAP estimates of latent preferences for either \code{hltm} or \code{hgrm} models.
19 | }
20 | \examples{
21 | y <- nes_econ2008[, -(1:3)]
22 | x <- model.matrix( ~ party * educ, nes_econ2008)
23 | z <- model.matrix( ~ party, nes_econ2008)
24 | nes_m1 <- hgrm(y, x, z)
25 | pref <- latent_scores(nes_m1)
26 | require(ggplot2)
27 | ggplot(data = nes_econ2008) +
28 | geom_density(aes(x = pref$post_mean, col = party))
29 | }
30 | 


--------------------------------------------------------------------------------
/man/nes_econ2008.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{nes_econ2008}
 5 | \alias{nes_econ2008}
 6 | \title{Public Attitudes on Economic Issues in ANES 2008}
 7 | \format{
 8 | A data frame with 2268 rows and 13 variables: \describe{
 9 |   \item{gender}{gender. 1: male; 2: female}
10 |   \item{party}{party identification: Democrat, independent, or Republican}
11 |   \item{educ}{education. 1: high school or less; 2: some college or above}
12 |   \item{health_ins7}{Support for government or private health insurance, 7 categories}
13 |   \item{jobs_guar7}{Support for government guarantee jobs and income, 7 categories}
14 |   \item{gov_services7}{Should government reduce or increase spending on services?, 7 categories}
15 |   \item{FS_poor3}{Federal spending on the poor, 3 categories}
16 |   \item{FS_childcare3}{Federal spending on child care, 3 categories}
17 |   \item{FS_crime3}{Federal spending on crime, 3 categories}
18 |   \item{FS_publicschools3}{Federal spending on public schools, 3 categories}
19 |   \item{FS_welfare3}{Federal spending on welfare, 3 categories}
20 |   \item{FS_envir3}{Federal spending on environment, 3 categories}
21 |   \item{FS_socsec3}{Federal spending on Social Security, 3 categories}
22 |   }
23 | }
24 | \usage{
25 | nes_econ2008
26 | }
27 | \description{
28 | A dataset containing gender, party ID, education, and responses to 10 survey items
29 | on economic issues from the American National Election Studies, 2008.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/print.hIRT.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/print.R
 3 | \name{print.hIRT}
 4 | \alias{print.hIRT}
 5 | \title{Printing an object of class \code{hIRT}}
 6 | \usage{
 7 | \method{print}{hIRT}(x, digits = 3, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of class \code{hIRT}}
11 | 
12 | \item{digits}{The number of significant digits to use when printing}
13 | 
14 | \item{...}{further arguments passed to \code{\link{print}}.}
15 | }
16 | \description{
17 | Printing an object of class \code{hIRT}
18 | }
19 | 


--------------------------------------------------------------------------------
/man/summary.hIRT.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary.R
 3 | \name{summary.hIRT}
 4 | \alias{summary.hIRT}
 5 | \alias{print.summary_hIRT}
 6 | \title{Summarizing Hierarchical Item Response Theory Models}
 7 | \usage{
 8 | \method{summary}{hIRT}(object, by_item = FALSE, digits = 3, ...)
 9 | 
10 | \method{print}{summary_hIRT}(x, digits = 3, ...)
11 | }
12 | \arguments{
13 | \item{object}{An object of class \code{hIRT}.}
14 | 
15 | \item{by_item}{Logical. Should item parameters be stored item by item
16 | (if \code{TRUE}) or put together in a data frame (if \code{FALSE})?}
17 | 
18 | \item{digits}{the number of significant digits to use when printing.}
19 | 
20 | \item{...}{further arguments passed to \code{\link{print}}.}
21 | 
22 | \item{x}{An object of class \code{hIRT}}
23 | }
24 | \value{
25 | An object of class \code{summary_hIRT}.
26 |  \item{call}{The matched call.}
27 |  \item{model}{Model fit statistics: Log likelihood, AIC, and BIC.}
28 |  \item{item_coefs}{Item parameter estimates, standard errors,
29 |   z values, and p values.}
30 |  \item{mean_coefs}{Parameter estimates for the mean equation.}
31 |  \item{var_coefs}{Parameter estimates for the variance equation.}
32 | }
33 | \description{
34 | Summarizing the fit of either \code{hltm} or \code{hgrm}.
35 | }
36 | \examples{
37 | y <- nes_econ2008[, -(1:3)]
38 | x <- model.matrix( ~ party * educ, nes_econ2008)
39 | z <- model.matrix( ~ party, nes_econ2008)
40 | nes_m1 <- hgrm(y, x, z)
41 | summary(nes_m1, by_item = TRUE)
42 | }
43 | 


--------------------------------------------------------------------------------