├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── coef.R ├── data.R ├── hgrm.R ├── hgrm2.R ├── hgrmDIF.R ├── hltm.R ├── hltm2.R ├── latent_scores.R ├── print.R ├── summary.R ├── sysdata.rda ├── utils.R ├── utils_grm.R └── utils_ltm.R ├── README.Rmd ├── README.md ├── cran-comments.md ├── data └── nes_econ2008.rda ├── hIRT.Rproj └── man ├── coef_item.Rd ├── figures └── README-pressure-1.png ├── hgrm.Rd ├── hgrm2.Rd ├── hgrmDIF.Rd ├── hltm.Rd ├── hltm2.Rd ├── latent_scores.Rd ├── nes_econ2008.Rd ├── print.hIRT.Rd └── summary.hIRT.Rd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^data-raw$ 4 | ^\.travis\.yml$ 5 | ^README\.Rmd$ 6 | ^README-.*\.png$ 7 | ^cran-comments\.md$ 8 | ^CRAN-RELEASE$ 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | .Rproj.user/** 3 | .Rhistory 4 | .RData 5 | .Ruserdata 6 | data-raw 7 | CRAN-RELEASE 8 | cran-comments 9 | inst/doc 10 | hIRT.Rproj 11 | .Rproj.user 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: hIRT 2 | Type: Package 3 | Title: Hierarchical Item Response Theory Models 4 | Version: 0.4.0 5 | Authors@R: person("Xiang", "Zhou", email = "xiang_zhou@fas.harvard.edu", 6 | role = c("aut", "cre")) 7 | Description: Implementation of a class of hierarchical item response 8 | theory (IRT) models where both the mean and the variance of latent preferences 9 | (ability parameters) may depend on observed covariates. The current 10 | implementation includes both the two-parameter latent trait model for binary data and the 11 | graded response model for ordinal data. Both are fitted via the Expectation-Maximization (EM) 12 | algorithm. Asymptotic standard errors are derived from the observed information 13 | matrix. 14 | Depends: 15 | R (>= 3.4.0), 16 | stats 17 | Imports: 18 | pryr (>= 0.1.2), 19 | rms (>= 5.1-1), 20 | ltm (>= 1.1-1), 21 | Matrix (>= 1.2-10) 22 | Suggests: 23 | ggplot2 24 | License: GPL (>= 3) 25 | Encoding: UTF-8 26 | LazyData: true 27 | RoxygenNote: 7.1.1 28 | URL: http://github.com/xiangzhou09/hIRT 29 | BugReports: http://github.com/xiangzhou09/hIRT 30 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,hIRT) 4 | S3method(print,summary_hIRT) 5 | S3method(summary,hIRT) 6 | export(coef_item) 7 | export(coef_mean) 8 | export(coef_var) 9 | export(hgrm) 10 | export(hgrm2) 11 | export(hgrmDIF) 12 | export(hltm) 13 | export(hltm2) 14 | export(latent_scores) 15 | import(stats) 16 | importFrom(ltm,grm) 17 | importFrom(ltm,ltm) 18 | importFrom(pryr,compose) 19 | importFrom(pryr,partial) 20 | importFrom(rms,lrm.fit) 21 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # hIRT 0.3.1 2 | 3 | * fixed bug in README.md 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /R/coef.R: -------------------------------------------------------------------------------- 1 | #' Parameter Estimates from Hierarchical IRT Models. 2 | #' 3 | #' Parameter estimates from either \code{hltm} or \code{hgrm} or \code{hgrmDIF} models. \code{code_item} 4 | #' reports estimates of item parameters. \code{coef_mean} reports results for the mean equation. 5 | #' \code{coef_var} reports results for the variance equation. 6 | #' 7 | #' @inheritParams print.hIRT 8 | #' @param by_item Logical. Should item parameters be stored item by item 9 | #' (if \code{TRUE}) or put together in a data frame (if \code{FALSE})? 10 | #' 11 | #' @return Parameter estimates, standard errors, z values, and p values 12 | #' organized as a data frame (if \code{by_item = TRUE}) or a list (if \code{ 13 | #' by_item = FALSE}). 14 | #' 15 | #' @export 16 | #' @examples 17 | #' y <- nes_econ2008[, -(1:3)] 18 | #' x <- model.matrix( ~ party * educ, nes_econ2008) 19 | #' z <- model.matrix( ~ party, nes_econ2008) 20 | #' nes_m1 <- hgrm(y, x, z) 21 | #' coef_item(nes_m1) 22 | coef_item <- function(x, by_item = TRUE, digits = 3) { 23 | 24 | if(inherits(x, "hgrm")){ 25 | H <- unname(x[["H"]]) 26 | xitem <- x[["coefficients"]][1:sum(H), , drop = FALSE] 27 | if (by_item == FALSE) return(round(xitem, digits)) 28 | index <- findInterval(1:sum(H), c(1, cumsum(H)[-length(H)] + 1)) 29 | out <- split(xitem, index) 30 | for (i in seq_along(out)) { 31 | tmp <- strsplit(rownames(out[[i]]), "\\.") 32 | rownames(out[[i]]) <- vapply(tmp, function(x) x[length(x)], FUN.VALUE = character(1L)) 33 | out[[i]] <- round(out[[i]], digits) 34 | } 35 | } else if (inherits(x, "hltm")){ 36 | J <- x[["J"]] 37 | xitem <- x[["coefficients"]][1:(2 * J), , drop = FALSE] 38 | if (by_item == FALSE) 39 | return(round(xitem, digits)) 40 | out <- split(xitem, rep(1:J, each = 2)) 41 | for (i in 1:J) rownames(out[[i]]) <- c("Diff", "Dscrmn") 42 | } else if (inherits(x, "hgrmDIF")){ 43 | H <- unname(x[["H"]]) 44 | p <- x[["p"]] 45 | ncoefs <- vapply(x[["coef_item"]], length, integer(1L)) 46 | sH <- sum(ncoefs) 47 | xitem <- x[["coefficients"]][1:sH, , drop = FALSE] 48 | if (by_item == FALSE) return(round(xitem, digits)) 49 | index <- findInterval(1:sH, c(1, cumsum(ncoefs[-length(ncoefs)]) + 1)) 50 | out <- split(xitem, index) 51 | for (i in seq_along(out)) { 52 | tmp <- strsplit(rownames(out[[i]]), "\\.") 53 | rownames(out[[i]]) <- vapply(tmp, function(x) x[length(x)], FUN.VALUE = character(1L)) 54 | out[[i]] <- round(out[[i]], digits) 55 | } 56 | } else stop("Use only with 'hgrm' or 'hltm' or `hgrmDIF` objects.\n") 57 | 58 | stats::setNames(out, names(x[["H"]])) 59 | } 60 | 61 | #' @inheritParams print.hIRT 62 | #' 63 | #' @export 64 | #' @rdname coef_item 65 | #' @examples 66 | #' coef_mean(nes_m1) 67 | coef_mean <- function(x, digits = 3) { 68 | 69 | # if (x[["p"]] < 2) return(NULL) 70 | sH <- if (inherits(x, "hltm")) 71 | 2 * x[["J"]] else if (inherits(x, "hgrm")) 72 | sum(x[["H"]]) else if (inherits(x, "hgrmDIF")) 73 | sum(vapply(x[["coef_item"]], length, integer(1L))) else{ 74 | stop("Use only with 'hgrm' or 'hltm' or `hgrmDIF` objects.\n") 75 | } 76 | gamma_indices <- (sH + 1):(sH + x[["p"]]) 77 | round(x[["coefficients"]][gamma_indices, , drop = FALSE], digits = digits) 78 | } 79 | 80 | #' @inheritParams print.hIRT 81 | #' 82 | #' @export 83 | #' @rdname coef_item 84 | #' @examples 85 | #' coef_var(nes_m1) 86 | coef_var <- function(x, digits = 3) { 87 | 88 | # if (x[["q"]] < 2) return(NULL) 89 | sH <- if (inherits(x, "hltm")) 90 | 2 * x[["J"]] else if (inherits(x, "hgrm")) 91 | sum(x[["H"]]) else if (inherits(x, "hgrmDIF")) 92 | sum(vapply(x[["coef_item"]], length, integer(1L))) else{ 93 | stop("Use only with 'hgrm' or 'hltm' or `hgrmDIF` objects.\n") 94 | } 95 | lambda_indices <- (sH + x[["p"]] + 1):(sH + x[["p"]] + x[["q"]]) 96 | round(x[["coefficients"]][lambda_indices, , drop = FALSE], digits = digits) 97 | } 98 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Public Attitudes on Economic Issues in ANES 2008 2 | #' 3 | #' A dataset containing gender, party ID, education, and responses to 10 survey items 4 | #' on economic issues from the American National Election Studies, 2008. 5 | #' 6 | #' @format A data frame with 2268 rows and 13 variables: \describe{ 7 | #' \item{gender}{gender. 1: male; 2: female} 8 | #' \item{party}{party identification: Democrat, independent, or Republican} 9 | #' \item{educ}{education. 1: high school or less; 2: some college or above} 10 | #' \item{health_ins7}{Support for government or private health insurance, 7 categories} 11 | #' \item{jobs_guar7}{Support for government guarantee jobs and income, 7 categories} 12 | #' \item{gov_services7}{Should government reduce or increase spending on services?, 7 categories} 13 | #' \item{FS_poor3}{Federal spending on the poor, 3 categories} 14 | #' \item{FS_childcare3}{Federal spending on child care, 3 categories} 15 | #' \item{FS_crime3}{Federal spending on crime, 3 categories} 16 | #' \item{FS_publicschools3}{Federal spending on public schools, 3 categories} 17 | #' \item{FS_welfare3}{Federal spending on welfare, 3 categories} 18 | #' \item{FS_envir3}{Federal spending on environment, 3 categories} 19 | #' \item{FS_socsec3}{Federal spending on Social Security, 3 categories} 20 | #' } 21 | "nes_econ2008" 22 | 23 | -------------------------------------------------------------------------------- /R/hgrm.R: -------------------------------------------------------------------------------- 1 | #' Fitting Hierarchical Graded Response Models (for Ordinal Responses) 2 | #' 3 | #' \code{hgrm} fits a hierarchical graded response model in which both 4 | #' the mean and the variance of the latent preference (ability parameter) 5 | #' may depend on person-specific covariates (\code{x} and \code{z}). 6 | #' Specifically, the mean is specified as a linear combination of \code{x} 7 | #' and the log of the variance is specified as a linear combination of 8 | #' \code{z}. Nonresponses are treated as missing at random. 9 | #' 10 | #' @param y A data frame or matrix of item responses. 11 | #' @param x An optional model matrix, including the intercept term, that predicts the 12 | #' mean of the latent preference. If not supplied, only the intercept term is included. 13 | #' @param z An optional model matrix, including the intercept term, that predicts the 14 | #' variance of the latent preference. If not supplied, only the intercept term is included. 15 | #' @param constr The type of constraints used to identify the model: "latent_scale", 16 | #' or "items". The default, "latent_scale" constrains the mean of latent preferences 17 | #' to zero and the geometric mean of prior variance to one; "items" places constraints 18 | #' on item parameters instead and sets the mean of item difficulty parameters to zero 19 | #' and the geometric mean of the discrimination parameters to one. 20 | #' @param beta_set The index of the item for which the discrimination parameter is 21 | #' restricted to be positive (or negative). It may take any integer value from 22 | #' 1 to \code{ncol(y)}. 23 | #' @param sign_set Logical. Should the discrimination parameter of 24 | #' the corresponding item (indexed by \code{beta_set}) be positive 25 | #' (if \code{TRUE}) or negative (if \code{FALSE})? 26 | #' @param init A character string indicating how item parameters are initialized. It can be 27 | #' "naive", "glm", or "irt". 28 | #' @param control A list of control values 29 | #' \describe{ 30 | #' \item{max_iter}{The maximum number of iterations of the EM algorithm. 31 | #' The default is 150.} 32 | #' \item{eps}{Tolerance parameter used to determine convergence of the 33 | #' EM algorithm. Specifically, iterations continue until the Euclidean 34 | #' distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps}, 35 | #' where \eqn{\beta} is the vector of item discrimination parameters. 36 | #' \code{eps}=1e-4 by default.} 37 | #' \item{max_iter2}{The maximum number of iterations of the conditional 38 | #' maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}. 39 | #' The default is 15.} 40 | #' \item{eps2}{Tolerance parameter used to determine convergence of the 41 | #' conditional maximization procedures for updating \eqn{\gamma} and 42 | #' \eqn{\lambda}. Specifically, iterations continue until the Euclidean 43 | #' distance between two consecutive log likelihoods falls under \code{eps2}. 44 | #' \code{eps2}=1e-3 by default.} 45 | #' \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.} 46 | #' \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.} 47 | #' } 48 | #' 49 | #' @return An object of class \code{hgrm}. 50 | #' \item{coefficients}{A data frame of parameter estimates, standard errors, 51 | #' z values and p values.} 52 | #' \item{scores}{A data frame of EAP estimates of latent preferences and 53 | #' their approximate standard errors.} 54 | #' \item{vcov}{Variance-covariance matrix of parameter estimates.} 55 | #' \item{log_Lik}{The log-likelihood value at convergence.} 56 | #' \item{N}{Number of units.} 57 | #' \item{J}{Number of items.} 58 | #' \item{H}{A vector denoting the number of response categories for each item.} 59 | #' \item{ylevels}{A list showing the levels of the factorized response categories.} 60 | #' \item{p}{The number of predictors for the mean equation.} 61 | #' \item{q}{The number of predictors for the variance equation.} 62 | #' \item{control}{List of control values.} 63 | #' \item{call}{The matched call.} 64 | #' @references Zhou, Xiang. 2019. "\href{https://doi.org/10.1017/pan.2018.63}{Hierarchical Item Response Models for Analyzing Public Opinion.}" Political Analysis. 65 | #' @importFrom rms lrm.fit 66 | #' @importFrom pryr compose 67 | #' @importFrom pryr partial 68 | #' @importFrom ltm grm 69 | #' @importFrom ltm ltm 70 | #' @import stats 71 | #' @export 72 | #' @examples 73 | #' y <- nes_econ2008[, -(1:3)] 74 | #' x <- model.matrix( ~ party * educ, nes_econ2008) 75 | #' z <- model.matrix( ~ party, nes_econ2008) 76 | #' nes_m1 <- hgrm(y, x, z) 77 | #' nes_m1 78 | 79 | hgrm <- function(y, x = NULL, z = NULL, constr = c("latent_scale", "items"), 80 | beta_set = 1L, sign_set = TRUE, init = c("naive", "glm", "irt"), 81 | control = list()) { 82 | 83 | # match call 84 | cl <- match.call() 85 | 86 | # check y and convert y into data.frame if needed 87 | if(missing(y)) stop("`y` must be provided.") 88 | if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L) 89 | stop("'y' must be either a data.frame or a matrix with at least two columns.") 90 | if(is.matrix(y)) y <- as.data.frame(y) 91 | 92 | # number of units and items 93 | N <- nrow(y) 94 | J <- ncol(y) 95 | 96 | # convert each y_j into an integer vector 97 | y[] <- lapply(y, factor, exclude = c(NA, NaN)) 98 | ylevels <- lapply(y, levels) 99 | y[] <- lapply(y, as.integer) 100 | if (!is.na(invalid <- match(TRUE, vapply(y, invalid_grm, logical(1L))))) 101 | stop(paste(names(y)[invalid], "does not have at least two valid responses")) 102 | H <- vapply(y, max, integer(1L), na.rm = TRUE) 103 | 104 | # check x and z (x and z should contain an intercept column) 105 | x <- x %||% as.matrix(rep(1, N)) 106 | z <- z %||% as.matrix(rep(1, N)) 107 | if (!is.matrix(x)) stop("`x` must be a matrix.") 108 | if (!is.matrix(z)) stop("`z` must be a matrix.") 109 | if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'") 110 | p <- ncol(x) 111 | q <- ncol(z) 112 | colnames(x) <- colnames(x) %||% paste0("x", 1:p) 113 | colnames(z) <- colnames(z) %||% paste0("x", 1:q) 114 | 115 | # check beta_set and sign_set 116 | stopifnot(beta_set %in% 1:J, is.logical(sign_set)) 117 | 118 | # check constraint 119 | constr <- match.arg(constr) 120 | init <- match.arg(init) 121 | 122 | # control parameters 123 | con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4) 124 | con[names(control)] <- control 125 | 126 | # set environments for utility functions 127 | environment(loglik_grm) <- environment(theta_post_grm) <- environment(dummy_fun_grm) <- environment(tab2df_grm) <- environment() 128 | 129 | # GL points 130 | K <- con[["K"]] 131 | theta_ls <- con[["C"]] * GLpoints[[K]][["x"]] 132 | qw_ls <- con[["C"]] * GLpoints[[K]][["w"]] 133 | 134 | # imputation 135 | y_imp <- y 136 | if(anyNA(y)) y_imp[] <- lapply(y, impute) 137 | 138 | # pca 139 | theta_eap <- { 140 | tmp <- princomp(y_imp, cor = TRUE)$scores[, 1] 141 | (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE) 142 | } 143 | 144 | # initialization of alpha and beta parameters 145 | if (init == "naive"){ 146 | 147 | alpha <- lapply(H, function(x) c(Inf, seq(1, -1, length.out = x - 1), -Inf)) 148 | beta <- vapply(y, function(y) cov(y, theta_eap, use = "complete.obs")/var(theta_eap), double(1L)) 149 | 150 | } else if (init == "glm"){ 151 | 152 | pseudo_lrm <- lapply(y_imp, function(y) lrm.fit(theta_eap, y)[["coefficients"]]) 153 | beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L)) 154 | alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf)) 155 | 156 | } else { 157 | 158 | grm_coefs <- grm(y)[["coefficients"]] 159 | beta <- vapply(grm_coefs, function(x) x[[length(x)]], double(1L)) 160 | alpha <- lapply(grm_coefs, function(x) c(Inf, rev(x[-length(x)]), -Inf)) 161 | 162 | } 163 | 164 | # initial values of gamma and lambda 165 | lm_opr <- tcrossprod(solve(crossprod(x)), x) 166 | gamma <- lm_opr %*% theta_eap 167 | lambda <- rep(0, q) 168 | fitted_mean <- as.double(x %*% gamma) 169 | fitted_var <- rep(1, N) 170 | 171 | # EM algorithm 172 | for (iter in seq(1, con[["max_iter"]])) { 173 | 174 | # store previous parameters 175 | alpha_prev <- alpha 176 | beta_prev <- beta 177 | gamma_prev <- gamma 178 | lambda_prev <- lambda 179 | 180 | # construct w_ik 181 | posterior <- Map(theta_post_grm, theta_ls, qw_ls) 182 | w <- { 183 | tmp <- matrix(unlist(posterior), N, K) 184 | t(sweep(tmp, 1, rowSums(tmp), FUN = "/")) 185 | } 186 | 187 | # maximization 188 | pseudo_tab <- Map(dummy_fun_grm, y, H) 189 | pseudo_y <- lapply(pseudo_tab, tab2df_grm, theta_ls = theta_ls) 190 | pseudo_lrm <- lapply(pseudo_y, function(df) lrm_fit(df["x"], df[["y"]], weights = df[["wt"]])[["coefficients"]]) 191 | beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L)) 192 | alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf)) 193 | 194 | # EAP and VAP estimates of latent preferences 195 | theta_eap <- t(theta_ls %*% w) 196 | theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2 197 | 198 | # variance regression 199 | gamma <- lm_opr %*% theta_eap 200 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 201 | if (ncol(z)==1) lambda <- log(mean(r2)) else{ 202 | s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]] 203 | loglik <- -0.5 * (log(s2) + r2/s2) 204 | LL0 <- sum(loglik) 205 | dLL <- 1 206 | for (m in seq(1, con[["max_iter2"]])) { 207 | gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]] 208 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 209 | var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log")) 210 | s2 <- var_reg[["fitted.values"]] 211 | loglik <- -0.5 * (log(s2) + r2/s2) 212 | LL_temp <- sum(loglik) 213 | dLL <- LL_temp - LL0 214 | if (dLL < con[["eps2"]]) 215 | break 216 | LL0 <- LL_temp 217 | } 218 | lambda <- var_reg[["coefficients"]] 219 | } 220 | 221 | # location constraint 222 | tmp <- mean(x %*% gamma) 223 | alpha <- Map(function(x, y) x + tmp * y, alpha, beta) 224 | gamma[[1L]] <- gamma[[1L]] - tmp 225 | 226 | # scale constraint 227 | tmp <- mean(z %*% lambda) 228 | gamma <- gamma/exp(tmp/2) 229 | beta <- beta * exp(tmp/2) 230 | lambda[[1L]] <- lambda[[1L]] - tmp 231 | 232 | # direction contraint 233 | if (sign_set == (beta[[beta_set]] < 0)) { 234 | gamma <- -gamma 235 | beta <- -beta 236 | } 237 | 238 | fitted_mean <- as.double(x %*% gamma) 239 | fitted_var <- exp(as.double(z %*% lambda)) 240 | # cat(beta, "\n") 241 | # cat(abs(beta - beta_prev), "\n") 242 | 243 | cat(".") 244 | 245 | # check convergence 246 | if (sqrt(mean((beta - beta_prev)^2)) < con[["eps"]]) { 247 | cat("\n converged at iteration", iter, "\n") 248 | break 249 | } else if (iter == con[["max_iter"]]) { 250 | stop("algorithm did not converge; try increasing `max_iter` or decreasing `eps`") 251 | break 252 | } else next 253 | } 254 | 255 | gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = "")) 256 | lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = "")) 257 | 258 | # inference 259 | pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))), 260 | N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE) 261 | Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_grm(alpha = alpha, beta = beta, rep(theta_k, N)))) # K-list 262 | Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N)) 263 | Li <- rowSums(Lik * pik) 264 | 265 | # log likelihood 266 | log_Lik <- sum(log(Li)) 267 | 268 | # outer product of gradients 269 | environment(sj_ab_grm) <- environment(si_gamma) <- environment(si_lambda) <- environment() 270 | s_ab <- unname(Reduce(rbind, lapply(1:J, sj_ab_grm))) 271 | s_lambda <- s_gamma <- NULL 272 | s_gamma <- vapply(1:N, si_gamma, double(p)) 273 | s_lambda <- vapply(1:N, si_lambda, double(q)) 274 | 275 | # covariance matrix and standard errors 276 | s_all <- rbind(s_ab[-c(1L, nrow(s_ab)), , drop = FALSE], s_gamma, s_lambda) 277 | s_all[is.na(s_all)] <- 0 278 | covmat <- tryCatch(solve(tcrossprod(s_all)), 279 | error = function(e) {warning("The information matrix is singular; SE calculation failed."); 280 | matrix(NA, nrow(s_all), nrow(s_all))}) 281 | se_all <- sqrt(diag(covmat)) 282 | 283 | # reorganize se_all 284 | sH <- sum(H) 285 | gamma_indices <- (sH - 1):(sH + p - 2) 286 | lambda_indices <- (sH + p - 1):(sH + p + q - 2) 287 | se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices]) 288 | 289 | # name se_all and covmat 290 | names_ab <- unlist(lapply(names(alpha), function(x) { 291 | tmp <- alpha[[x]] 292 | paste(x, c(names(tmp)[-c(1L, length(tmp))], "Dscrmn")) 293 | })) 294 | names(se_all) <- c(names_ab, names(gamma), names(lambda)) 295 | rownames(covmat) <- colnames(covmat) <- c(names_ab[-c(1L, length(names_ab))], names(gamma), names(lambda)) 296 | 297 | # item coefficients 298 | coef_item <- Map(function(a, b) c(a[-c(1L, length(a))], Dscrmn = b), alpha, beta) 299 | 300 | # all coefficients 301 | coef_all <- c(unlist(coef_item), gamma, lambda) 302 | coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all, 303 | p_value = 2 * (1 - pnorm(abs(coef_all/se_all)))) 304 | rownames(coefs) <- names(se_all) 305 | 306 | # item constraints 307 | if (constr == "items"){ 308 | 309 | gamma0_prev <- gamma[[1L]] 310 | 311 | # location constraint 312 | alpha_sum <- sum(vapply(alpha, function(x) sum(x[-c(1L, length(x))]), double(1L))) 313 | beta_sum <- sum((H-1) * beta) 314 | c1 <- alpha_sum/beta_sum 315 | gamma[[1L]] <- gamma[[1L]] + c1 316 | alpha <- Map(function(x, y) x - c1 * y, alpha, beta) 317 | 318 | # scale constraint 319 | c2 <- 2 * mean(log(abs(beta))) 320 | gamma <- gamma * exp(c2/2) 321 | lambda[[1L]] <- lambda[[1L]] + c2 322 | beta <- beta / exp(c2/2) 323 | 324 | # fitted means and variances 325 | fitted_mean <- as.double(x %*% gamma) 326 | fitted_var <- exp(as.double(z %*% lambda)) 327 | 328 | # theta_eap and theta_vap 329 | theta_eap <- (theta_eap - gamma0_prev) * exp(c2/2) + gamma[[1L]] 330 | theta_vap <- theta_vap * (exp(c2/2))^2 331 | 332 | # covmat for new parameterization 333 | tmp_fun <- function(d) { 334 | mat <- diag(d) 335 | mat[d, d] <- exp(-c2/2) 336 | mat[1:(d-1), d] <- rep(-c1, d-1) 337 | mat 338 | } 339 | A <- Reduce(Matrix::bdiag, lapply(H, tmp_fun)) 340 | A2 <- A[seq(2, nrow(A)-1), seq(2, ncol(A)-1)] 341 | B <- Matrix::bdiag(exp(c2/2) * diag(p), diag(q)) 342 | C <- Matrix::bdiag(A2, B) 343 | covmat <- C %*% Matrix::tcrossprod(covmat, C) 344 | 345 | se_all <- sqrt(Matrix::diag(covmat)) 346 | 347 | # reorganize se_all 348 | sH <- sum(H) 349 | gamma_indices <- (sH - 1):(sH + p - 2) 350 | lambda_indices <- (sH + p - 1):(sH + p + q - 2) 351 | se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices]) 352 | 353 | # name se_all and covmat 354 | names_ab <- unlist(lapply(names(alpha), function(x) { 355 | tmp <- alpha[[x]] 356 | paste(x, c(names(tmp)[-c(1L, length(tmp))], "Dscrmn")) 357 | })) 358 | names(se_all) <- c(names_ab, names(gamma), names(lambda)) 359 | rownames(covmat) <- colnames(covmat) <- c(names_ab[-c(1L, length(names_ab))], names(gamma), names(lambda)) 360 | 361 | # item coefficients 362 | coef_item <- Map(function(a, b) c(a[-c(1L, length(a))], Dscrmn = b), alpha, beta) 363 | 364 | # all coefficients 365 | coef_all <- c(unlist(coef_item), gamma, lambda) 366 | coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all, 367 | p_value = 2 * (1 - pnorm(abs(coef_all/se_all)))) 368 | rownames(coefs) <- names(se_all) 369 | } 370 | 371 | # ability parameter estimates 372 | theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap), 373 | prior_mean = fitted_mean, prior_sd = sqrt(fitted_var)) 374 | 375 | # output 376 | out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik, constr = constr, 377 | N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, control = con, call = cl) 378 | class(out) <- c("hgrm", "hIRT") 379 | out 380 | } 381 | 382 | -------------------------------------------------------------------------------- /R/hgrm2.R: -------------------------------------------------------------------------------- 1 | #' Hierarchical Graded Response Models with Known Item Parameters 2 | #' 3 | #' \code{hgrm2} fits a hierarchical graded response model where the item parameters 4 | #' are known and supplied by the user. 5 | #' 6 | #' @param y A data frame or matrix of item responses. 7 | #' @param x An optional model matrix, including the intercept term, that predicts the 8 | #' mean of the latent preference. If not supplied, only the intercept term is included. 9 | #' @param z An optional model matrix, including the intercept term, that predicts the 10 | #' variance of the latent preference. If not supplied, only the intercept term is included. 11 | #' @param item_coefs A list of known item parameters. The parameters of item \eqn{j} are given 12 | #' by the \eqn{j}th element, which should be a vector of length \eqn{H_j}, containing 13 | #' \eqn{H_j - 1} item difficulty parameters (in descending order) and one item discrimination 14 | #' parameter. 15 | #' @param control A list of control values 16 | #' \describe{ 17 | #' \item{max_iter}{The maximum number of iterations of the EM algorithm. 18 | #' The default is 150.} 19 | #' \item{eps}{Tolerance parameter used to determine convergence of the 20 | #' EM algorithm. Specifically, iterations continue until the Euclidean 21 | #' distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps}, 22 | #' where \eqn{\beta} is the vector of item discrimination parameters. 23 | #' \code{eps}=1e-4 by default.} 24 | #' \item{max_iter2}{The maximum number of iterations of the conditional 25 | #' maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}. 26 | #' The default is 15.} 27 | #' \item{eps2}{Tolerance parameter used to determine convergence of the 28 | #' conditional maximization procedures for updating \eqn{\gamma} and 29 | #' \eqn{\lambda}. Specifically, iterations continue until the Euclidean 30 | #' distance between two consecutive log likelihoods falls under \code{eps2}. 31 | #' \code{eps2}=1e-3 by default.} 32 | #' \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.} 33 | #' \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.} 34 | #' } 35 | #' 36 | #' @return An object of class \code{hgrm}. 37 | #' \item{coefficients}{A data frame of parameter estimates, standard errors, 38 | #' z values and p values.} 39 | #' \item{scores}{A data frame of EAP estimates of latent preferences and 40 | #' their approximate standard errors.} 41 | #' \item{vcov}{Variance-covariance matrix of parameter estimates.} 42 | #' \item{log_Lik}{The log-likelihood value at convergence.} 43 | #' \item{N}{Number of units.} 44 | #' \item{J}{Number of items.} 45 | #' \item{H}{A vector denoting the number of response categories for each item.} 46 | #' \item{ylevels}{A list showing the levels of the factorized response categories.} 47 | #' \item{p}{The number of predictors for the mean equation.} 48 | #' \item{q}{The number of predictors for the variance equation.} 49 | #' \item{control}{List of control values.} 50 | #' \item{call}{The matched call.} 51 | #' @importFrom rms lrm.fit 52 | #' @importFrom pryr compose 53 | #' @importFrom pryr partial 54 | #' @import stats 55 | #' @export 56 | #' @examples 57 | #' 58 | #' y <- nes_econ2008[, -(1:3)] 59 | #' x <- model.matrix( ~ party * educ, nes_econ2008) 60 | #' z <- model.matrix( ~ party, nes_econ2008) 61 | #' 62 | #' n <- nrow(nes_econ2008) 63 | #' id_train <- sample.int(n, n/4) 64 | #' id_test <- setdiff(1:n, id_train) 65 | #' 66 | #' y_train <- y[id_train, ] 67 | #' x_train <- x[id_train, ] 68 | #' z_train <- z[id_train, ] 69 | #' 70 | #' mod_train <- hgrm(y_train, x_train, z_train) 71 | #' 72 | #' y_test <- y[id_test, ] 73 | #' x_test <- x[id_test, ] 74 | #' z_test <- z[id_test, ] 75 | #' 76 | #' item_coefs <- lapply(coef_item(mod_train), `[[`, "Estimate") 77 | #' 78 | #' model_test <- hgrm2(y_test, x_test, z_test, item_coefs = item_coefs) 79 | 80 | hgrm2 <- function(y, x = NULL, z = NULL, item_coefs, control = list()) { 81 | 82 | # match call 83 | cl <- match.call() 84 | 85 | # check y and convert y into data.frame if needed 86 | if(missing(y)) stop("`y` must be provided.") 87 | if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L) 88 | stop("'y' must be either a data.frame or a matrix with at least two columns.") 89 | if(is.matrix(y)) y <- as.data.frame(y) 90 | 91 | # number of units and items 92 | N <- nrow(y) 93 | J <- ncol(y) 94 | 95 | # convert each y_j into an integer vector 96 | y[] <- lapply(y, factor, exclude = c(NA, NaN)) 97 | ylevels <- lapply(y, levels) 98 | y[] <- lapply(y, as.integer) 99 | if (!is.na(invalid <- match(TRUE, vapply(y, invalid_grm, logical(1L))))) 100 | stop(paste(names(y)[invalid], "does not have at least two valid responses")) 101 | H <- vapply(y, max, integer(1L), na.rm = TRUE) 102 | 103 | # extract item parameters 104 | if(missing(item_coefs)) 105 | stop("`item_coefs` must be supplied.") 106 | if(!is.list(item_coefs) || length(item_coefs) != J) 107 | stop("`item_coefs` must be a list of `ncol(y)` elements") 108 | item_coefs_H <- vapply(item_coefs, length, integer(1L)) 109 | if(!all.equal(item_coefs_H, H)) 110 | stop("`item_coefs` do not match the number of response categories in `y`") 111 | alpha <- lapply(item_coefs, function(x) c(Inf, x[-length(x)], -Inf)) 112 | beta <- vapply(item_coefs, function(x) x[[length(x)]], double(1L)) 113 | 114 | # check x and z (x and z should contain an intercept column) 115 | x <- x %||% as.matrix(rep(1, N)) 116 | z <- z %||% as.matrix(rep(1, N)) 117 | if (!is.matrix(x)) stop("`x` must be a matrix.") 118 | if (!is.matrix(z)) stop("`z` must be a matrix.") 119 | if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'") 120 | p <- ncol(x) 121 | q <- ncol(z) 122 | colnames(x) <- colnames(x) %||% paste0("x", 1:p) 123 | colnames(z) <- colnames(z) %||% paste0("x", 1:q) 124 | 125 | # control parameters 126 | con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4) 127 | con[names(control)] <- control 128 | 129 | # set environments for utility functions 130 | environment(loglik_grm) <- environment(theta_post_grm) <- environment(dummy_fun_grm) <- environment(tab2df_grm) <- environment() 131 | 132 | # GL points 133 | K <- con[["K"]] 134 | theta_ls <- con[["C"]] * GLpoints[[K]][["x"]] 135 | qw_ls <- con[["C"]] * GLpoints[[K]][["w"]] 136 | 137 | # imputation 138 | y_imp <- y 139 | if(anyNA(y)) y_imp[] <- lapply(y, impute) 140 | 141 | # pca for initial values of theta_eap 142 | theta_eap <- { 143 | tmp <- princomp(y_imp, cor = TRUE)$scores[, 1] 144 | (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE) 145 | } 146 | 147 | # initial values of gamma and lambda 148 | lm_opr <- tcrossprod(solve(crossprod(x)), x) 149 | gamma <- lm_opr %*% theta_eap 150 | lambda <- rep(0, q) 151 | fitted_mean <- as.double(x %*% gamma) 152 | fitted_var <- rep(1, N) 153 | 154 | # EM algorithm 155 | for (iter in seq(1, con[["max_iter"]])) { 156 | 157 | # store previous parameters 158 | # alpha_prev <- alpha 159 | # beta_prev <- beta 160 | gamma_prev <- gamma 161 | lambda_prev <- lambda 162 | 163 | # construct w_ik 164 | posterior <- Map(theta_post_grm, theta_ls, qw_ls) 165 | w <- { 166 | tmp <- matrix(unlist(posterior), N, K) 167 | t(sweep(tmp, 1, rowSums(tmp), FUN = "/")) 168 | } 169 | 170 | # # maximization 171 | # pseudo_tab <- Map(dummy_fun_grm, y, H) 172 | # pseudo_y <- lapply(pseudo_tab, tab2df_grm, theta_ls = theta_ls) 173 | # pseudo_lrm <- lapply(pseudo_y, function(df) lrm.fit(df[["x"]], df[["y"]], weights = df[["wt"]])[["coefficients"]]) 174 | # beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L)) 175 | # alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf)) 176 | 177 | # EAP and VAP estimates of latent preferences 178 | theta_eap <- t(theta_ls %*% w) 179 | theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2 180 | 181 | # variance regression 182 | gamma <- lm_opr %*% theta_eap 183 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 184 | if (ncol(z)==1) lambda <- log(mean(r2)) else{ 185 | s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]] 186 | loglik <- -0.5 * (log(s2) + r2/s2) 187 | LL0 <- sum(loglik) 188 | dLL <- 1 189 | for (m in seq(1, con[["max_iter2"]])) { 190 | gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]] 191 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 192 | var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log")) 193 | s2 <- var_reg[["fitted.values"]] 194 | loglik <- -0.5 * (log(s2) + r2/s2) 195 | LL_temp <- sum(loglik) 196 | dLL <- LL_temp - LL0 197 | if (dLL < con[["eps2"]]) 198 | break 199 | LL0 <- LL_temp 200 | } 201 | lambda <- var_reg[["coefficients"]] 202 | } 203 | 204 | fitted_mean <- as.double(x %*% gamma) 205 | fitted_var <- exp(as.double(z %*% lambda)) 206 | cat(".") 207 | 208 | if (sqrt(mean((gamma/gamma_prev - 1)^2)) < con[["eps"]]) { 209 | cat("\n converged at iteration", iter, "\n") 210 | break 211 | } else if (iter == con[["max_iter"]]) { 212 | stop("algorithm did not converge; try increasing `max_iter` or decreasing `eps`") 213 | break 214 | } else next 215 | } 216 | 217 | gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = "")) 218 | lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = "")) 219 | 220 | # inference 221 | pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))), 222 | N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE) 223 | Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_grm(alpha = alpha, beta = beta, rep(theta_k, N)))) # K-list 224 | Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N)) 225 | Li <- rowSums(Lik * pik) 226 | 227 | # log likelihood 228 | log_Lik <- sum(log(Li)) 229 | 230 | # outer product of gradients 231 | environment(sj_ab_grm) <- environment(si_gamma) <- environment(si_lambda) <- environment() 232 | # s_ab <- unname(Reduce(rbind, lapply(1:J, sj_ab_grm))) 233 | s_gamma <- vapply(1:N, si_gamma, double(p)) 234 | s_lambda <- vapply(1:N, si_lambda, double(q)) 235 | 236 | # covariance matrix 237 | s_all <- rbind(s_gamma, s_lambda) 238 | s_all[is.na(s_all)] <- 0 239 | covmat <- tryCatch(solve(tcrossprod(s_all)), 240 | error = function(e) {warning("The information matrix is singular; SE calculation failed."); 241 | matrix(NA, nrow(s_all), nrow(s_all))}) 242 | 243 | # reorganize se_all 244 | sH <- sum(H) 245 | gamma_indices <- (sH - 1):(sH + p - 2) 246 | lambda_indices <- (sH + p - 1):(sH + p + q - 2) 247 | se_all <- c(rep(0, sH), sqrt(diag(covmat))) 248 | 249 | # name se_all and covmat 250 | names_ab <- unlist(lapply(names(alpha), function(x) { 251 | tmp <- alpha[[x]] 252 | paste(x, c(paste0("y>=", seq(2, length(tmp)-1)), "Dscrmn")) 253 | })) 254 | names(se_all) <- c(names_ab, names(gamma), names(lambda)) 255 | rownames(covmat) <- colnames(covmat) <- c(names(gamma), names(lambda)) 256 | 257 | # item coefficients 258 | coef_item <- Map(function(a, b) c(a[-c(1L, length(a))], Dscrmn = b), alpha, beta) 259 | 260 | # all coefficients 261 | coef_all <- c(unlist(coef_item), gamma, lambda) 262 | coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all, 263 | p_value = 2 * (1 - pnorm(abs(coef_all/se_all)))) 264 | rownames(coefs) <- names(se_all) 265 | 266 | # ability parameter estimates 267 | theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap), 268 | prior_mean = fitted_mean, prior_sd = sqrt(fitted_var)) 269 | 270 | # output 271 | out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik, 272 | N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, control = con, call = cl) 273 | class(out) <- c("hgrm", "hIRT") 274 | out 275 | } 276 | -------------------------------------------------------------------------------- /R/hgrmDIF.R: -------------------------------------------------------------------------------- 1 | #' Hierarchical Graded Response Models with Differential Item Functioning 2 | #' 3 | #' \code{hgrmDIF} fits a hierarchical graded response model similar to hgrm(), but person-specific 4 | #' covariates \code{x} are allowed to affect item responses directly (not via the latent preference). 5 | #' This model can be used to test for the presence of differential item functioning. 6 | #' 7 | #' @param y A data frame or matrix of item responses. 8 | #' @param x An optional model matrix, including the intercept term, that predicts the 9 | #' mean of the latent preference. If not supplied, only the intercept term is included. 10 | #' @param z An optional model matrix, including the intercept term, that predicts the 11 | #' variance of the latent preference. If not supplied, only the intercept term is included. 12 | #' @param x0 A matrix specifying the covariates by which differential item functioning operates. If not supplied, 13 | #' \code{x0} is taken to be a matrix containing all predictors in \code{x} except the intercept. 14 | #' @param items_dif The indices of the items for which differential item functioning is tested. 15 | #' @param form_dif Form of differential item functioning being tested. Either "uniform" or "non-uniform." 16 | #' @param constr The type of constraints used to identify the model: "latent_scale", 17 | #' or "items". The default, "latent_scale" constrains the mean of latent preferences 18 | #' to zero and the geometric mean of prior variance to one; "items" places constraints 19 | #' on item parameters instead and sets the mean of item difficulty parameters to zero 20 | #' and the geometric mean of the discrimination parameters to one. Currently, only "latent_scale" 21 | #' is supported in hgrmDIF(). 22 | #' @param beta_set The index of the item for which the discrimination parameter is 23 | #' restricted to be positive (or negative). It may take any integer value from 24 | #' 1 to \code{ncol(y)}. 25 | #' @param sign_set Logical. Should the discrimination parameter of 26 | #' the corresponding item (indexed by \code{beta_set}) be positive 27 | #' (if \code{TRUE}) or negative (if \code{FALSE})? 28 | #' @param init A character string indicating how item parameters are initialized. It can be 29 | #' "naive", "glm", or "irt". 30 | #' @param control A list of control values 31 | #' \describe{ 32 | #' \item{max_iter}{The maximum number of iterations of the EM algorithm. 33 | #' The default is 150.} 34 | #' \item{eps}{Tolerance parameter used to determine convergence of the 35 | #' EM algorithm. Specifically, iterations continue until the Euclidean 36 | #' distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps}, 37 | #' where \eqn{\beta} is the vector of item discrimination parameters. 38 | #' \code{eps}=1e-4 by default.} 39 | #' \item{max_iter2}{The maximum number of iterations of the conditional 40 | #' maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}. 41 | #' The default is 15.} 42 | #' \item{eps2}{Tolerance parameter used to determine convergence of the 43 | #' conditional maximization procedures for updating \eqn{\gamma} and 44 | #' \eqn{\lambda}. Specifically, iterations continue until the Euclidean 45 | #' distance between two consecutive log likelihoods falls under \code{eps2}. 46 | #' \code{eps2}=1e-3 by default.} 47 | #' \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.} 48 | #' \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.} 49 | #' } 50 | #' 51 | #' @return An object of class \code{hgrm}. 52 | #' \item{coefficients}{A data frame of parameter estimates, standard errors, 53 | #' z values and p values.} 54 | #' \item{scores}{A data frame of EAP estimates of latent preferences and 55 | #' their approximate standard errors.} 56 | #' \item{vcov}{Variance-covariance matrix of parameter estimates.} 57 | #' \item{log_Lik}{The log-likelihood value at convergence.} 58 | #' \item{N}{Number of units.} 59 | #' \item{J}{Number of items.} 60 | #' \item{H}{A vector denoting the number of response categories for each item.} 61 | #' \item{ylevels}{A list showing the levels of the factorized response categories.} 62 | #' \item{p}{The number of predictors for the mean equation.} 63 | #' \item{q}{The number of predictors for the variance equation.} 64 | #' \item{p0}{The number of predictors for items with DIF.} 65 | #' \item{coef_item}{Item coefficient estimates.} 66 | #' \item{control}{List of control values.} 67 | #' \item{call}{The matched call.} 68 | #' @importFrom rms lrm.fit 69 | #' @importFrom pryr compose 70 | #' @importFrom pryr partial 71 | #' @importFrom ltm grm 72 | #' @importFrom ltm ltm 73 | #' @import stats 74 | #' @export 75 | #' @examples 76 | #' y <- nes_econ2008[, -(1:3)] 77 | #' x <- model.matrix( ~ party * educ, nes_econ2008) 78 | #' nes_m2 <- hgrmDIF(y, x, items_dif = 1:2) 79 | #' coef_item(nes_m2) 80 | 81 | hgrmDIF <- function(y, x = NULL, z = NULL, x0 = x[, -1, drop = FALSE], 82 | items_dif = 1L, form_dif = c("uniform", "non-uniform"), 83 | constr = c("latent_scale"), beta_set = 1L, sign_set = TRUE, 84 | init = c("naive", "glm", "irt"), control = list()) { 85 | 86 | # match call 87 | cl <- match.call() 88 | 89 | # check y and convert y into data.frame if needed 90 | if(missing(y)) stop("`y` must be provided.") 91 | if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L) 92 | stop("'y' must be either a data.frame or a matrix with at least two columns.") 93 | if(is.matrix(y)) y <- as.data.frame(y) 94 | 95 | # number of units and items 96 | N <- nrow(y) 97 | J <- ncol(y) 98 | 99 | # convert each y_j into an integer vector 100 | y[] <- lapply(y, factor, exclude = c(NA, NaN)) 101 | ylevels <- lapply(y, levels) 102 | y[] <- lapply(y, as.integer) 103 | if (!is.na(invalid <- match(TRUE, vapply(y, invalid_grm, logical(1L))))) 104 | stop(paste(names(y)[invalid], "does not have at least two valid responses")) 105 | H <- vapply(y, max, integer(1L), na.rm = TRUE) 106 | 107 | # check x0 108 | if(!is.matrix(x0)) x0 <- as.matrix(x0) 109 | 110 | # check x and z (x and z should contain an intercept column) 111 | x <- x %||% as.matrix(rep(1, N)) 112 | z <- z %||% as.matrix(rep(1, N)) 113 | if (!is.matrix(x)) stop("`x` must be a matrix.") 114 | if (!is.matrix(z)) stop("`z` must be a matrix.") 115 | if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'") 116 | p <- ncol(x) 117 | q <- ncol(z) 118 | p0 <- ncol(x0) 119 | colnames(x) <- colnames(x) %||% paste0("x", 1:p) 120 | colnames(z) <- colnames(z) %||% paste0("z", 1:q) 121 | colnames(x0) <- colnames(x0) %||% paste0("x0", 1:p0) 122 | 123 | # check item, beta_set and sign_set 124 | stopifnot(beta_set %in% 1:J, is.logical(sign_set)) 125 | 126 | # check constraint 127 | constr <- match.arg(constr) 128 | init <- match.arg(init) 129 | form_dif <- match.arg(form_dif) 130 | 131 | # control parameters 132 | con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4) 133 | con[names(control)] <- control 134 | 135 | # set environments for utility functions 136 | environment(x0DIF) <- environment(loglik_grmDIF) <- environment(theta_post_grmDIF) <- environment(dummy_fun_grm) <- environment(tab2df_grm) <- environment() 137 | 138 | # GL points 139 | K <- con[["K"]] 140 | theta_ls <- con[["C"]] * GLpoints[[K]][["x"]] 141 | qw_ls <- con[["C"]] * GLpoints[[K]][["w"]] 142 | 143 | # imputation 144 | y_imp <- y 145 | if(anyNA(y)) y_imp[] <- lapply(y, impute) 146 | 147 | # pca 148 | theta_eap <- { 149 | tmp <- princomp(y_imp, cor = TRUE)$scores[, 1] 150 | (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE) 151 | } 152 | 153 | # initialization of alpha and beta parameters 154 | if (init == "naive"){ 155 | 156 | alpha <- lapply(H, function(x) c(Inf, seq(1, -1, length.out = x - 1), -Inf)) 157 | beta <- vapply(y, function(y) cov(y, theta_eap, use = "complete.obs")/var(theta_eap), double(1L)) 158 | 159 | } else if (init == "glm"){ 160 | 161 | pseudo_lrm <- lapply(y_imp, function(y) lrm.fit(theta_eap, y)[["coefficients"]]) 162 | beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L)) 163 | alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf)) 164 | 165 | } else { 166 | 167 | grm_coefs <- grm(y)[["coefficients"]] 168 | beta <- vapply(grm_coefs, function(x) x[[length(x)]], double(1L)) 169 | alpha <- lapply(grm_coefs, function(x) c(Inf, rev(x[-length(x)]), -Inf)) 170 | 171 | } 172 | 173 | # initialization of x0DIFnames, x0_lrm, and eta 174 | tmp_theta <- rep(theta_ls, N) 175 | tmp_x0 <- apply(x0, 2, rep, each = K) 176 | if(form_dif == "uniform") { 177 | x0DIFnames <- colnames(x0) 178 | x0_lrm <- cbind(tmp_theta, tmp_x0) 179 | eta <- lapply(1:J, function(j) if(j %in% items_dif) rep(0, p0) else double(0L)) 180 | } else{ 181 | x0DIFnames <- c(colnames(x0), paste0("Dscrmn * ", colnames(x0))) 182 | x0_lrm <- cbind(tmp_theta, tmp_x0, tmp_theta * tmp_x0) 183 | eta <- lapply(1:J, function(j) if(j %in% items_dif) rep(0, 2 * p0) else double(0L)) 184 | } 185 | colnames(x0_lrm) <- c("theta", x0DIFnames) 186 | names(eta) <- names(alpha) <- names(H) 187 | 188 | # initial values of gamma and lambda 189 | lm_opr <- tcrossprod(solve(crossprod(x)), x) 190 | gamma <- lm_opr %*% theta_eap 191 | lambda <- rep(0, q) 192 | fitted_mean <- as.double(x %*% gamma) 193 | fitted_var <- rep(1, N) 194 | pseudo_lrm <- vector(mode = "list", length = J) 195 | 196 | # EM algorithm 197 | for (iter in seq(1, con[["max_iter"]])) { 198 | 199 | # store previous parameters 200 | alpha_prev <- alpha 201 | beta_prev <- beta 202 | gamma_prev <- gamma 203 | lambda_prev <- lambda 204 | eta_prev <- eta 205 | 206 | # construct w_ik 207 | # list of length K, each element an N-vector 208 | posterior <- Map(theta_post_grmDIF, theta_ls, qw_ls) 209 | # K-by-N matrix 210 | w <- { 211 | tmp <- matrix(unlist(posterior), N, K) 212 | t(sweep(tmp, 1, rowSums(tmp), FUN = "/")) 213 | } 214 | 215 | # maximization with DIF 216 | w_lrm <- as.vector(w) 217 | for (j in seq(1, J)){ 218 | y_lrm <- rep(y[[j]], each = K) 219 | pseudo_lrm[[j]] <- if(j %in% items_dif) lrm_fit(x0_lrm, y_lrm, weights = w_lrm)[["coefficients"]] else 220 | lrm_fit(cbind(theta = tmp_theta), y_lrm, weights = w_lrm)[["coefficients"]] 221 | } 222 | beta <- vapply(pseudo_lrm, function(xx) xx[["theta"]], double(1L)) 223 | eta <- lapply(1:J, function(j) if(j %in% items_dif) pseudo_lrm[[j]][-(1:H[[j]])] else double(0L)) 224 | alpha <- Map(function(xx, H_j) c(Inf, xx[1:(H_j-1)], -Inf), pseudo_lrm, H) 225 | names(alpha) <- names(H) 226 | 227 | # # maximization in hgrm() 228 | # # list of length J, each element a K-by-H_j matrix containing 229 | # # the pseudo number of people with theta^k choosing h 230 | # pseudo_tab <- Map(dummy_fun_grm, y, H) 231 | # # list of length J, each element a data frame with K*H_j units and 3 columns: y, x(theta), and wt 232 | # pseudo_y <- lapply(pseudo_tab, tab2df_grm, theta_ls = theta_ls) 233 | # # weighted lrm on each element of pseudo_y 234 | # pseudo_lrm <- lapply(pseudo_y, function(df) lrm_fit(df[["x"]], df[["y"]], weights = df[["wt"]])[["coefficients"]]) 235 | # beta <- vapply(pseudo_lrm, function(x) x[[length(x)]], double(1L)) 236 | # alpha <- lapply(pseudo_lrm, function(x) c(Inf, x[-length(x)], -Inf)) 237 | 238 | # EAP and VAP estimates of latent preferences 239 | theta_eap <- t(theta_ls %*% w) 240 | theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2 241 | 242 | # variance regression 243 | gamma <- lm_opr %*% theta_eap 244 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 245 | if (ncol(z)==1) lambda <- log(mean(r2)) else{ 246 | s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]] 247 | loglik <- -0.5 * (log(s2) + r2/s2) 248 | LL0 <- sum(loglik) 249 | dLL <- 1 250 | for (m in seq(1, con[["max_iter2"]])) { 251 | gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]] 252 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 253 | var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log")) 254 | s2 <- var_reg[["fitted.values"]] 255 | loglik <- -0.5 * (log(s2) + r2/s2) 256 | LL_temp <- sum(loglik) 257 | dLL <- LL_temp - LL0 258 | if (dLL < con[["eps2"]]) 259 | break 260 | LL0 <- LL_temp 261 | } 262 | lambda <- var_reg[["coefficients"]] 263 | } 264 | 265 | # location constraint 266 | tmp <- mean(x %*% gamma) 267 | alpha <- Map(function(x, y) x + tmp * y, alpha, beta) 268 | gamma[[1L]] <- gamma[[1L]] - tmp 269 | 270 | # scale constraint 271 | tmp <- mean(z %*% lambda) 272 | gamma <- gamma/exp(tmp/2) 273 | beta <- beta * exp(tmp/2) 274 | lambda[[1L]] <- lambda[[1L]] - tmp 275 | 276 | # direction contraint 277 | if (sign_set == (beta[[beta_set]] < 0)) { 278 | gamma <- -gamma 279 | beta <- -beta 280 | } 281 | 282 | fitted_mean <- as.double(x %*% gamma) 283 | fitted_var <- exp(as.double(z %*% lambda)) 284 | # cat(beta, "\n") 285 | # cat(abs(beta - beta_prev), "\n") 286 | 287 | cat(".") 288 | 289 | # check convergence 290 | if (sqrt(mean((beta - beta_prev)^2)) < con[["eps"]]) { 291 | cat("\n converged at iteration", iter, "\n") 292 | break 293 | } else if (iter == con[["max_iter"]]) { 294 | stop("algorithm did not converge; try increasing `max_iter` or decreasing `eps`") 295 | break 296 | } else next 297 | } 298 | 299 | gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = "")) 300 | lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = "")) 301 | 302 | # inference 303 | # pik equals p_ik * w_k in Zhou2019supp 304 | pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))), 305 | N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE) 306 | Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_grmDIF(alpha = alpha, beta = beta, eta = eta, rep(theta_k, N)))) # K-list 307 | Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N)) 308 | Li <- rowSums(Lik * pik) 309 | 310 | # log likelihood 311 | log_Lik <- sum(log(Li)) 312 | 313 | # outer product of gradients 314 | environment(sj_ab_grmDIF) <- environment(sj_ab_grm) <- environment(si_gamma) <- environment(si_lambda) <- environment() 315 | s_abe <- unname(Reduce(rbind, lapply(1:J, sj_ab_grmDIF))) 316 | s_lambda <- s_gamma <- NULL 317 | s_gamma <- vapply(1:N, si_gamma, double(p)) 318 | s_lambda <- vapply(1:N, si_lambda, double(q)) 319 | 320 | # covariance matrix and standard errors 321 | s_all <- rbind(s_abe[-c(1L, nrow(s_abe)), , drop = FALSE], s_gamma, s_lambda) 322 | s_all[is.na(s_all)] <- 0 323 | covmat <- tryCatch(solve(tcrossprod(s_all)), 324 | error = function(e) {warning("The information matrix is singular; SE calculation failed."); 325 | matrix(NA, nrow(s_all), nrow(s_all))}) 326 | se_all <- sqrt(diag(covmat)) 327 | 328 | # reorganize se_all 329 | sH <- sum(H) + p0 * length(items_dif) + p0 * as.double(form_dif == "non-uniform") * length(items_dif) 330 | gamma_indices <- (sH - 1):(sH + p - 2) 331 | lambda_indices <- (sH + p - 1):(sH + p + q - 2) 332 | se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices]) 333 | 334 | # name se_all and covmat 335 | names(alpha) <- names(H) 336 | names_abe <- unlist(lapply(1:J, function(j) { 337 | itemname <- names(alpha)[[j]] 338 | tmp <- alpha[[j]] 339 | if(j %in% items_dif) paste(itemname, c(names(tmp)[-c(1L, length(tmp))], x0DIFnames, "Dscrmn")) else{ 340 | paste(itemname, c(names(tmp)[-c(1L, length(tmp))], "Dscrmn")) 341 | } 342 | })) 343 | names(se_all) <- c(names_abe, names(gamma), names(lambda)) 344 | rownames(covmat) <- colnames(covmat) <- c(names_abe[-c(1L, length(names_abe))], names(gamma), names(lambda)) 345 | 346 | # item coefficients 347 | coef_item <- Map(function(a, b, c) c(a[-c(1L, length(a))], c, Dscrmn = b), alpha, beta, eta) 348 | 349 | # all coefficients 350 | coef_all <- c(unlist(coef_item), gamma, lambda) 351 | coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all, 352 | p_value = 2 * (1 - pnorm(abs(coef_all/se_all)))) 353 | 354 | # ability parameter estimates 355 | theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap), 356 | prior_mean = fitted_mean, prior_sd = sqrt(fitted_var)) 357 | 358 | # output 359 | out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik, 360 | items_dif = items_dif, form_dif = form_dif, constr = constr, 361 | N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, p0 = p0, coef_item = coef_item, 362 | control = con, call = cl) 363 | class(out) <- c("hgrmDIF") 364 | out 365 | } 366 | 367 | -------------------------------------------------------------------------------- /R/hltm.R: -------------------------------------------------------------------------------- 1 | #' Fitting Hierarchical Latent Trait Models (for Binary Responses). 2 | #' 3 | #' \code{hltm} fits a hierarchical latent trait model in which both 4 | #' the mean and the variance of the latent preference (ability parameter) 5 | #' may depend on person-specific covariates (\code{x} and \code{z}). 6 | #' Specifically, the mean is specified as a linear combination of \code{x} 7 | #' and the log of the variance is specified as a linear combination of 8 | #' \code{z}. 9 | #' 10 | #' @inheritParams hgrm 11 | #' 12 | #' @return An object of class \code{hltm}. 13 | #' \item{coefficients}{A data frame of parameter estimates, standard errors, 14 | #' z values and p values.} 15 | #' \item{scores}{A data frame of EAP estimates of latent preferences and 16 | #' their approximate standard errors.} 17 | #' \item{vcov}{Variance-covariance matrix of parameter estimates.} 18 | #' \item{log_Lik}{The log-likelihood value at convergence.} 19 | #' \item{N}{Number of units.} 20 | #' \item{J}{Number of items.} 21 | #' \item{H}{A vector denoting the number of response categories for each item.} 22 | #' \item{ylevels}{A list showing the levels of the factorized response categories.} 23 | #' \item{p}{The number of predictors for the mean equation.} 24 | #' \item{q}{The number of predictors for the variance equation.} 25 | #' \item{control}{List of control values.} 26 | #' \item{call}{The matched call.} 27 | #' @references Zhou, Xiang. 2019. "\href{https://doi.org/10.1017/pan.2018.63}{Hierarchical Item Response Models for Analyzing Public Opinion.}" Political Analysis. 28 | #' @importFrom rms lrm.fit 29 | #' @importFrom pryr compose 30 | #' @importFrom pryr partial 31 | #' @import stats 32 | #' @export 33 | #' @examples 34 | #' y <- nes_econ2008[, -(1:3)] 35 | #' x <- model.matrix( ~ party * educ, nes_econ2008) 36 | #' z <- model.matrix( ~ party, nes_econ2008) 37 | #' 38 | #' dichotomize <- function(x) findInterval(x, c(mean(x, na.rm = TRUE))) 39 | #' y[] <- lapply(y, dichotomize) 40 | #' nes_m1 <- hltm(y, x, z) 41 | #' nes_m1 42 | 43 | hltm <- function(y, x = NULL, z = NULL, constr = c("latent_scale", "items"), 44 | beta_set = 1L, sign_set = TRUE, init = c("naive", "glm", "irt"), 45 | control = list()) { 46 | 47 | # match call 48 | cl <- match.call() 49 | 50 | # check y and convert y into data.frame if needed 51 | if(missing(y)) stop("`y` must be provided.") 52 | if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L) 53 | stop("'y' must be either a data.frame or a matrix with at least two columns.") 54 | if(is.matrix(y)) y <- as.data.frame(y) 55 | 56 | # number of units and items 57 | N <- nrow(y) 58 | J <- ncol(y) 59 | 60 | # convert each y_j into an integer vector 61 | y[] <- lapply(y, factor, exclude = c(NA, NaN)) 62 | ylevels <- lapply(y, levels) 63 | y[] <- lapply(y, function(x) as.integer(x) - 1) 64 | if (!is.na(invalid <- match(TRUE, vapply(y, invalid_ltm, logical(1L))))) 65 | stop(paste(names(y)[invalid], "is not a dichotomous variable")) 66 | H <- vapply(y, max, double(1L), na.rm = TRUE) + 1 67 | 68 | # check x and z (x and z should contain an intercept column) 69 | x <- x %||% as.matrix(rep(1, N)) 70 | z <- z %||% as.matrix(rep(1, N)) 71 | if (!is.matrix(x)) stop("`x` must be a matrix.") 72 | if (!is.matrix(z)) stop("`z` must be a matrix.") 73 | if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'") 74 | p <- ncol(x) 75 | q <- ncol(z) 76 | colnames(x) <- colnames(x) %||% paste0("x", 1:p) 77 | colnames(z) <- colnames(z) %||% paste0("x", 1:q) 78 | 79 | # check beta_set and sign_set 80 | stopifnot(beta_set %in% 1:J, is.logical(sign_set)) 81 | 82 | # check constraint 83 | constr <- match.arg(constr) 84 | init <- match.arg(init) 85 | 86 | # control parameters 87 | con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4) 88 | con[names(control)] <- control 89 | 90 | # set environments for utility functions 91 | environment(loglik_ltm) <- environment(theta_post_ltm) <- environment(dummy_fun_ltm) <- environment(tab2df_ltm) <- environment() 92 | 93 | # GL points 94 | K <- con[["K"]] 95 | theta_ls <- con[["C"]] * GLpoints[[K]][["x"]] 96 | qw_ls <- con[["C"]] * GLpoints[[K]][["w"]] 97 | 98 | # imputation 99 | y_imp <- y 100 | if(anyNA(y)) y_imp[] <- lapply(y, impute) 101 | 102 | # pca for initial values of theta_eap 103 | theta_eap <- { 104 | tmp <- princomp(y_imp, cor = TRUE)$scores[, 1] 105 | (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE) 106 | } 107 | 108 | # initialization of alpha and beta parameters 109 | if (init == "naive"){ 110 | alpha <- rep(0, J) 111 | beta <- vapply(y, function(y) cov(y, theta_eap, use = "complete.obs")/var(theta_eap), double(1L)) 112 | } else if (init == "glm"){ 113 | pseudo_logit <- lapply(y_imp, function(y) glm.fit(cbind(1, theta_eap), y, family = binomial("logit"))[["coefficients"]]) 114 | beta <- vapply(pseudo_logit, function(x) x[2L], double(1L)) 115 | alpha <- vapply(pseudo_logit, function(x) x[1L], double(1L)) 116 | } else { 117 | ltm_coefs <- ltm(y ~ z1)[["coefficients"]] 118 | beta <- ltm_coefs[, 2, drop = TRUE] 119 | alpha <- ltm_coefs[, 1, drop = TRUE] 120 | } 121 | 122 | # initial values of gamma and lambda 123 | lm_opr <- tcrossprod(solve(crossprod(x)), x) 124 | gamma <- lm_opr %*% theta_eap 125 | lambda <- rep(0, q) 126 | fitted_mean <- as.double(x %*% gamma) 127 | fitted_var <- rep(1, N) 128 | 129 | # EM algorithm 130 | for (iter in seq(1, con[["max_iter"]])) { 131 | 132 | # store previous parameters 133 | alpha_prev <- alpha 134 | beta_prev <- beta 135 | gamma_prev <- gamma 136 | lambda_prev <- lambda 137 | 138 | # construct w_ik 139 | posterior <- Map(theta_post_ltm, theta_ls, qw_ls) 140 | w <- { 141 | tmp <- matrix(unlist(posterior), N, K) 142 | t(sweep(tmp, 1, rowSums(tmp), FUN = "/")) 143 | } 144 | 145 | # maximization 146 | pseudo_tab <- lapply(y, dummy_fun_ltm) 147 | pseudo_y <- lapply(pseudo_tab, tab2df_ltm, theta_ls = theta_ls) 148 | pseudo_logit <- lapply(pseudo_y, function(df) glm.fit(cbind(1, df[["x"]]), 149 | df[["y"]], weights = df[["wt"]], family = quasibinomial("logit"))[["coefficients"]]) 150 | beta <- vapply(pseudo_logit, function(x) x[2L], double(1L)) 151 | alpha <- vapply(pseudo_logit, function(x) x[1L], double(1L)) 152 | 153 | # EAP and VAP estimates of latent preferences 154 | theta_eap <- t(theta_ls %*% w) 155 | theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2 156 | 157 | # variance regression 158 | gamma <- lm_opr %*% theta_eap 159 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 160 | 161 | if (ncol(z)==1) lambda <- log(mean(r2)) else{ 162 | s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]] 163 | loglik <- -0.5 * (log(s2) + r2/s2) 164 | LL0 <- sum(loglik) 165 | dLL <- 1 166 | for (m in seq(1, con[["max_iter2"]])) { 167 | gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]] 168 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 169 | var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log")) 170 | s2 <- var_reg[["fitted.values"]] 171 | loglik <- -0.5 * (log(s2) + r2/s2) 172 | LL_temp <- sum(loglik) 173 | dLL <- LL_temp - LL0 174 | if (dLL < con[["eps2"]]) 175 | break 176 | LL0 <- LL_temp 177 | } 178 | lambda <- var_reg[["coefficients"]] 179 | } 180 | 181 | # location constraint 182 | tmp <- mean(x %*% gamma) 183 | alpha <- unlist(Map(function(x, y) x + tmp * y, alpha, beta)) 184 | gamma[1L] <- gamma[1L] - tmp 185 | 186 | # scale constraint 187 | tmp <- mean(z %*% lambda) 188 | gamma <- gamma/exp(tmp/2) 189 | beta <- beta * exp(tmp/2) 190 | lambda[1L] <- lambda[1L] - tmp 191 | 192 | # direction contraint 193 | if (sign_set == (beta[beta_set] < 0)) { 194 | gamma <- -gamma 195 | beta <- -beta 196 | } 197 | fitted_mean <- as.double(x %*% gamma) 198 | fitted_var <- exp(as.double(z %*% lambda)) 199 | 200 | cat(".") 201 | 202 | # check convergence 203 | if (sqrt(mean((beta - beta_prev)^2)) < con[["eps"]]) { 204 | cat("\n converged at iteration", iter, "\n") 205 | break 206 | } else if (iter == con[["max_iter"]]) { 207 | stop("algorithm did not converge; try increasing max_iter.") 208 | break 209 | } else next 210 | } 211 | 212 | gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = "")) 213 | lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = "")) 214 | 215 | # inference 216 | pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))), 217 | N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE) 218 | Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_ltm(alpha = alpha, beta = beta, rep(theta_k, N)))) # K-list 219 | Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N)) 220 | Li <- rowSums(Lik * pik) 221 | 222 | # log likelihood 223 | log_Lik <- sum(log(Li)) 224 | 225 | # outer product of gradients 226 | environment(dalpha_ltm) <- environment(sj_ab_ltm) <- environment(si_gamma) <- environment(si_lambda) <- environment() 227 | dalpha <- dalpha_ltm(alpha, beta) # K*J matrix 228 | s_ab <- unname(Reduce(cbind, lapply(1:J, sj_ab_ltm))) 229 | s_gamma <- vapply(1:N, si_gamma, double(p)) 230 | s_lambda <- vapply(1:N, si_lambda, double(q)) 231 | 232 | s_all <- rbind(t(s_ab)[-c(1L, ncol(s_ab)), , drop = FALSE], s_gamma, s_lambda) 233 | s_all[is.na(s_all)] <- 0 234 | covmat <- tryCatch(solve(tcrossprod(s_all)), 235 | error = function(e) {warning("The information matrix is singular; SE calculation failed."); 236 | matrix(NA, nrow(s_all), nrow(s_all))}) 237 | se_all <- sqrt(diag(covmat)) 238 | 239 | # reorganize se_all 240 | sH <- 2 * J 241 | gamma_indices <- (sH - 1):(sH + p - 2) 242 | lambda_indices <- (sH + p - 1):(sH + p + q - 2) 243 | se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices]) 244 | 245 | # name se_all and covmat 246 | names_ab <- paste(rep(names(alpha), each = 2), c("Diff", "Dscrmn")) 247 | names(se_all) <- c(names_ab, names(gamma), names(lambda)) 248 | rownames(covmat) <- colnames(covmat) <- c(names_ab[-c(1L, length(names_ab))], names(gamma), names(lambda)) 249 | 250 | # item coefficients 251 | coefs_item <- Map(function(a, b) c(Diff = a, Dscrmn = b), alpha, beta) 252 | 253 | # all coefficients 254 | coef_all <- c(unlist(coefs_item), gamma, lambda) 255 | coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all, 256 | p_value = 2 * (1 - pnorm(abs(coef_all/se_all)))) 257 | rownames(coefs) <- names(se_all) 258 | 259 | # item constraints 260 | if (constr == "items"){ 261 | 262 | gamma0_prev <- gamma[1L] 263 | 264 | # location constraint 265 | alpha_sum <- sum(alpha) 266 | beta_sum <- sum(beta) 267 | c1 <- alpha_sum/beta_sum 268 | gamma[1L] <- gamma[1L] + c1 # adjust gamma0 269 | alpha <- unlist(Map(function(x, y) x - c1 * y, alpha, beta)) 270 | 271 | # scale constraint 272 | c2 <- 2 * mean(log(abs(beta))) 273 | gamma <- gamma * exp(c2/2) 274 | lambda[1L] <- lambda[1L] + c2 275 | beta <- beta / exp(c2/2) 276 | 277 | # fitted means and variances 278 | fitted_mean <- as.double(x %*% gamma) 279 | fitted_var <- exp(as.double(z %*% lambda)) 280 | 281 | # theta_eap and theta_vap 282 | theta_eap <- (theta_eap - gamma0_prev) * exp(c2/2) + gamma[1L] 283 | theta_vap <- theta_vap * (exp(c2/2))^2 284 | 285 | # covmat for new parameterization 286 | tmp_fun <- function(d) { 287 | mat <- diag(d) 288 | mat[d, d] <- exp(-c2/2) 289 | mat[1:(d-1), d] <- rep(-c1, d-1) 290 | mat 291 | } 292 | A <- Reduce(Matrix::bdiag, lapply(H, tmp_fun)) 293 | A2 <- A[seq(2, nrow(A)-1), seq(2, ncol(A)-1)] 294 | B <- Matrix::bdiag(exp(c2/2) * diag(p), diag(q)) 295 | C <- Matrix::bdiag(A2, B) 296 | covmat <- C %*% Matrix::tcrossprod(covmat, C) 297 | 298 | se_all <- sqrt(Matrix::diag(covmat)) 299 | 300 | # reorganize se_all 301 | sH <- 2 * J 302 | lambda_indices <- gamma_indices <- NULL 303 | gamma_indices <- (sH - 1):(sH + p - 2) 304 | lambda_indices <- (sH + p - 1):(sH + p + q - 2) 305 | se_all <- c(NA, se_all[1:(sH-2)], NA, se_all[gamma_indices], se_all[lambda_indices]) 306 | 307 | # name se_all and covmat 308 | names_ab <- paste(rep(names(alpha), each = 2), c("Diff", "Dscrmn")) 309 | names(se_all) <- c(names_ab, names(gamma), names(lambda)) 310 | rownames(covmat) <- colnames(covmat) <- c(names_ab[-c(1L, length(names_ab))], names(gamma), names(lambda)) 311 | 312 | # item coefficients 313 | coefs_item <- Map(function(a, b) c(Diff = a, Dscrmn = b), alpha, beta) 314 | 315 | # all coefficients 316 | coef_all <- c(unlist(coefs_item), gamma, lambda) 317 | coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all, 318 | p_value = 2 * (1 - pnorm(abs(coef_all/se_all)))) 319 | rownames(coefs) <- names(se_all) 320 | } 321 | 322 | # ability parameter estimates 323 | theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap), 324 | prior_mean = fitted_mean, prior_sd = sqrt(fitted_var)) 325 | 326 | # output 327 | out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik, constr = constr, 328 | N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, control = con, call = cl) 329 | class(out) <- c("hltm", "hIRT") 330 | out 331 | } 332 | 333 | -------------------------------------------------------------------------------- /R/hltm2.R: -------------------------------------------------------------------------------- 1 | #' Hierarchical Latent Trait Models with Known Item Parameters. 2 | #' 3 | #' \code{hltm2} fits a hierarchical latent trait model where the item parameters 4 | #' are known and supplied by the user. 5 | #' 6 | #' @inheritParams hgrm2 7 | #' @param item_coefs A list of known item parameters. The parameters of item \eqn{j} are given 8 | #' by the \eqn{j}th element, which should be a vector of length 2, containing 9 | #' the item difficulty parameter and item discrimination parameter. 10 | #' 11 | #' @return An object of class \code{hltm}. 12 | #' \item{coefficients}{A data frame of parameter estimates, standard errors, 13 | #' z values and p values.} 14 | #' \item{scores}{A data frame of EAP estimates of latent preferences and 15 | #' their approximate standard errors.} 16 | #' \item{vcov}{Variance-covariance matrix of parameter estimates.} 17 | #' \item{log_Lik}{The log-likelihood value at convergence.} 18 | #' \item{N}{Number of units.} 19 | #' \item{J}{Number of items.} 20 | #' \item{H}{A vector denoting the number of response categories for each item.} 21 | #' \item{ylevels}{A list showing the levels of the factorized response categories.} 22 | #' \item{p}{The number of predictors for the mean equation.} 23 | #' \item{q}{The number of predictors for the variance equation.} 24 | #' \item{control}{List of control values.} 25 | #' \item{call}{The matched call.} 26 | #' @importFrom rms lrm.fit 27 | #' @importFrom pryr compose 28 | #' @importFrom pryr partial 29 | #' @import stats 30 | #' @export 31 | #' @examples 32 | #' y <- nes_econ2008[, -(1:3)] 33 | #' x <- model.matrix( ~ party * educ, nes_econ2008) 34 | #' z <- model.matrix( ~ party, nes_econ2008) 35 | #' dichotomize <- function(x) findInterval(x, c(mean(x, na.rm = TRUE))) 36 | #' y_bin <- y 37 | #' y_bin[] <- lapply(y, dichotomize) 38 | #' 39 | #' n <- nrow(nes_econ2008) 40 | #' id_train <- sample.int(n, n/4) 41 | #' id_test <- setdiff(1:n, id_train) 42 | #' 43 | #' y_bin_train <- y_bin[id_train, ] 44 | #' x_train <- x[id_train, ] 45 | #' z_train <- z[id_train, ] 46 | #' 47 | #' mod_train <- hltm(y_bin_train, x_train, z_train) 48 | #' 49 | #' y_bin_test <- y_bin[id_test, ] 50 | #' x_test <- x[id_test, ] 51 | #' z_test <- z[id_test, ] 52 | #' 53 | #' item_coefs <- lapply(coef_item(mod_train), `[[`, "Estimate") 54 | #' 55 | #' model_test <- hltm2(y_bin_test, x_test, z_test, item_coefs = item_coefs) 56 | 57 | hltm2 <- function(y, x = NULL, z = NULL, item_coefs, control = list()) { 58 | 59 | # match call 60 | cl <- match.call() 61 | 62 | # check y and convert y into data.frame if needed 63 | if(missing(y)) stop("`y` must be provided.") 64 | if ((!is.data.frame(y) && !is.matrix(y)) || ncol(y) == 1L) 65 | stop("'y' must be either a data.frame or a matrix with at least two columns.") 66 | if(is.matrix(y)) y <- as.data.frame(y) 67 | 68 | # number of units and items 69 | N <- nrow(y) 70 | J <- ncol(y) 71 | 72 | # convert each y_j into an integer vector 73 | y[] <- lapply(y, factor, exclude = c(NA, NaN)) 74 | ylevels <- lapply(y, levels) 75 | y[] <- lapply(y, function(x) as.integer(x) - 1) 76 | if (!is.na(invalid <- match(TRUE, vapply(y, invalid_ltm, logical(1L))))) 77 | stop(paste(names(y)[invalid], "is not a dichotomous variable")) 78 | H <- vapply(y, max, double(1L), na.rm = TRUE) + 1 79 | 80 | # extract item parameters 81 | if(missing(item_coefs)) 82 | stop("`item_coefs` must be supplied.") 83 | if(!is.list(item_coefs) || length(item_coefs) != J) 84 | stop("`item_coefs` must be a list of `ncol(y)` elements") 85 | item_coefs_H <- vapply(item_coefs, length, integer(1L)) 86 | if(!all.equal(item_coefs_H, H)) 87 | stop("`item_coefs` do not match the number of response categories in `y`") 88 | alpha <- vapply(item_coefs, function(x) x[[1L]], double(1L)) 89 | beta <- vapply(item_coefs, function(x) x[[2L]], double(1L)) 90 | 91 | # check x and z (x and z should contain an intercept column) 92 | x <- x %||% as.matrix(rep(1, N)) 93 | z <- z %||% as.matrix(rep(1, N)) 94 | if (!is.matrix(x)) stop("`x` must be a matrix.") 95 | if (!is.matrix(z)) stop("`z` must be a matrix.") 96 | if (nrow(x) != N || nrow(z) != N) stop("both 'x' and 'z' must have the same number of rows as 'y'") 97 | p <- ncol(x) 98 | q <- ncol(z) 99 | colnames(x) <- colnames(x) %||% paste0("x", 1:p) 100 | colnames(z) <- colnames(z) %||% paste0("x", 1:q) 101 | 102 | # control parameters 103 | con <- list(max_iter = 150, max_iter2 = 15, eps = 1e-03, eps2 = 1e-03, K = 25, C = 4) 104 | con[names(control)] <- control 105 | 106 | # set environments for utility functions 107 | environment(loglik_ltm) <- environment(theta_post_ltm) <- environment(dummy_fun_ltm) <- environment(tab2df_ltm) <- environment() 108 | 109 | # GL points 110 | K <- con[["K"]] 111 | theta_ls <- con[["C"]] * GLpoints[[K]][["x"]] 112 | qw_ls <- con[["C"]] * GLpoints[[K]][["w"]] 113 | 114 | # imputation 115 | y_imp <- y 116 | if(anyNA(y)) y_imp[] <- lapply(y, impute) 117 | 118 | # pca for initial values of theta_eap 119 | theta_eap <- { 120 | tmp <- princomp(y_imp, cor = TRUE)$scores[, 1] 121 | (tmp - mean(tmp, na.rm = TRUE))/sd(tmp, na.rm = TRUE) 122 | } 123 | 124 | # initial values of gamma and lambda 125 | lm_opr <- tcrossprod(solve(crossprod(x)), x) 126 | gamma <- lm_opr %*% theta_eap 127 | lambda <- rep(0, q) 128 | fitted_mean <- as.double(x %*% gamma) 129 | fitted_var <- rep(1, N) 130 | 131 | # EM algorithm 132 | for (iter in seq(1, con[["max_iter"]])) { 133 | 134 | # store previous parameters 135 | # alpha_prev <- alpha 136 | # beta_prev <- beta 137 | gamma_prev <- gamma 138 | lambda_prev <- lambda 139 | 140 | # construct w_ik 141 | posterior <- Map(theta_post_ltm, theta_ls, qw_ls) 142 | w <- { 143 | tmp <- matrix(unlist(posterior), N, K) 144 | t(sweep(tmp, 1, rowSums(tmp), FUN = "/")) 145 | } 146 | 147 | # # maximization 148 | # pseudo_tab <- lapply(y, dummy_fun_ltm) 149 | # pseudo_y <- lapply(pseudo_tab, tab2df_ltm, theta_ls = theta_ls) 150 | # pseudo_logit <- lapply(pseudo_y, function(df) glm.fit(cbind(1, df[["x"]]), 151 | # df[["y"]], weights = df[["wt"]], family = quasibinomial("logit"))[["coefficients"]]) 152 | # beta <- vapply(pseudo_logit, function(x) x[2L], double(1L)) 153 | # alpha <- vapply(pseudo_logit, function(x) x[1L], double(1L)) 154 | 155 | # EAP and VAP estimates of latent preferences 156 | theta_eap <- t(theta_ls %*% w) 157 | theta_vap <- t(theta_ls^2 %*% w) - theta_eap^2 158 | 159 | # variance regression 160 | gamma <- lm_opr %*% theta_eap 161 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 162 | 163 | if (ncol(z)==1) lambda <- log(mean(r2)) else{ 164 | s2 <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log"))[["fitted.values"]] 165 | loglik <- -0.5 * (log(s2) + r2/s2) 166 | LL0 <- sum(loglik) 167 | dLL <- 1 168 | for (m in seq(1, con[["max_iter2"]])) { 169 | gamma <- lm.wfit(x, theta_eap, w = 1/s2)[["coefficients"]] 170 | r2 <- (theta_eap - x %*% gamma)^2 + theta_vap 171 | var_reg <- glm.fit(x = z, y = r2, intercept = FALSE, family = Gamma(link = "log")) 172 | s2 <- var_reg[["fitted.values"]] 173 | loglik <- -0.5 * (log(s2) + r2/s2) 174 | LL_temp <- sum(loglik) 175 | dLL <- LL_temp - LL0 176 | if (dLL < con[["eps2"]]) 177 | break 178 | LL0 <- LL_temp 179 | } 180 | lambda <- var_reg[["coefficients"]] 181 | } 182 | 183 | fitted_mean <- as.double(x %*% gamma) 184 | fitted_var <- exp(as.double(z %*% lambda)) 185 | cat(".") 186 | 187 | # check convergence 188 | if (sqrt(mean((gamma/gamma_prev - 1)^2)) < con[["eps"]]) { 189 | cat("\n converged at iteration", iter, "\n") 190 | break 191 | } else if (iter == con[["max_iter"]]) { 192 | stop("algorithm did not converge; try increasing max_iter.") 193 | break 194 | } else next 195 | } 196 | 197 | gamma <- setNames(as.double(gamma), paste("x", colnames(x), sep = "")) 198 | lambda <- setNames(as.double(lambda), paste("z", colnames(z), sep = "")) 199 | 200 | # inference 201 | pik <- matrix(unlist(Map(partial(dnorm, x = theta_ls), mean = fitted_mean, sd = sqrt(fitted_var))), 202 | N, K, byrow = TRUE) * matrix(qw_ls, N, K, byrow = TRUE) 203 | Lijk <- lapply(theta_ls, function(theta_k) exp(loglik_ltm(alpha = alpha, beta = beta, rep(theta_k, N)))) # K-list 204 | Lik <- vapply(Lijk, compose(exp, partial(rowSums, na.rm = TRUE), log), double(N)) 205 | Li <- rowSums(Lik * pik) 206 | 207 | # log likelihood 208 | log_Lik <- sum(log(Li)) 209 | 210 | # outer product of gradients 211 | environment(dalpha_ltm) <- environment(sj_ab_ltm) <- environment(si_gamma) <- environment(si_lambda) <- environment() 212 | dalpha <- dalpha_ltm(alpha, beta) # K*J matrix 213 | # s_ab <- unname(Reduce(cbind, lapply(1:J, sj_ab_ltm))) 214 | s_gamma <- vapply(1:N, si_gamma, double(p)) 215 | s_lambda <- vapply(1:N, si_lambda, double(q)) 216 | 217 | s_all <- rbind(s_gamma, s_lambda) 218 | s_all[is.na(s_all)] <- 0 219 | covmat <- tryCatch(solve(tcrossprod(s_all)), 220 | error = function(e) {warning("The information matrix is singular; SE calculation failed."); 221 | matrix(NA, nrow(s_all), nrow(s_all))}) 222 | se_all <- sqrt(diag(covmat)) 223 | 224 | # reorganize se_all 225 | sH <- 2 * J 226 | gamma_indices <- (sH - 1):(sH + p - 2) 227 | lambda_indices <- (sH + p - 1):(sH + p + q - 2) 228 | se_all <- c(rep(0, sH), sqrt(diag(covmat))) 229 | 230 | # name se_all and covmat 231 | names_ab <- paste(rep(names(alpha), each = 2), c("Diff", "Dscrmn")) 232 | names(se_all) <- c(names_ab, names(gamma), names(lambda)) 233 | rownames(covmat) <- colnames(covmat) <- c(names(gamma), names(lambda)) 234 | 235 | # item coefficients 236 | coefs_item <- Map(function(a, b) c(Diff = a, Dscrmn = b), alpha, beta) 237 | 238 | # all coefficients 239 | coef_all <- c(unlist(coefs_item), gamma, lambda) 240 | coefs <- data.frame(Estimate = coef_all, Std_Error = se_all, z_value = coef_all/se_all, 241 | p_value = 2 * (1 - pnorm(abs(coef_all/se_all)))) 242 | rownames(coefs) <- names(se_all) 243 | 244 | # ability parameter estimates 245 | theta <- data.frame(post_mean = theta_eap, post_sd = sqrt(theta_vap), 246 | prior_mean = fitted_mean, prior_sd = sqrt(fitted_var)) 247 | 248 | # output 249 | out <- list(coefficients = coefs, scores = theta, vcov = covmat, log_Lik = log_Lik, 250 | N = N, J = J, H = H, ylevels = ylevels, p = p, q = q, control = con, call = cl) 251 | class(out) <- c("hltm", "hIRT") 252 | out 253 | } 254 | -------------------------------------------------------------------------------- /R/latent_scores.R: -------------------------------------------------------------------------------- 1 | #' Estimates of Latent Preferences/Abilities 2 | #' 3 | #' EAP estimates of latent preferences for either \code{hltm} or \code{hgrm} models. 4 | #' 5 | #' @inheritParams print.hIRT 6 | #' 7 | #' @return A data frame of EAP estimates of latent preferences and their approximate standard errors. 8 | #' @export 9 | #' @examples 10 | #' y <- nes_econ2008[, -(1:3)] 11 | #' x <- model.matrix( ~ party * educ, nes_econ2008) 12 | #' z <- model.matrix( ~ party, nes_econ2008) 13 | #' nes_m1 <- hgrm(y, x, z) 14 | #' pref <- latent_scores(nes_m1) 15 | #' require(ggplot2) 16 | #' ggplot(data = nes_econ2008) + 17 | #' geom_density(aes(x = pref$post_mean, col = party)) 18 | latent_scores <- function(x, digits = 3) { 19 | if (!inherits(x, "hIRT")) 20 | stop("Use only with 'hIRT' objects.\n") 21 | round(x[["scores"]], digits) 22 | } 23 | 24 | -------------------------------------------------------------------------------- /R/print.R: -------------------------------------------------------------------------------- 1 | #' Printing an object of class \code{hIRT} 2 | #' @param x An object of class \code{hIRT} 3 | #' @param digits The number of significant digits to use when printing 4 | #' @param ... further arguments passed to \code{\link{print}}. 5 | #' @export 6 | print.hIRT <- function(x, digits = 3, ...) { 7 | cat("\nCall:\n", paste(deparse(x[["call"]]), sep = "\n", collapse = "\n"), 8 | "\n\n", sep = "") 9 | cat("Mean Regression:\n") 10 | print(coef_mean(x, digits), ...) 11 | cat("\n") 12 | cat("Variance Regression:\n") 13 | print(coef_var(x, digits), ...) 14 | cat("\nLog Likelihood:", round(x[["log_Lik"]], digits)) 15 | cat("\n\n") 16 | invisible(x) 17 | } 18 | -------------------------------------------------------------------------------- /R/summary.R: -------------------------------------------------------------------------------- 1 | #' Summarizing Hierarchical Item Response Theory Models 2 | #' 3 | #' Summarizing the fit of either \code{hltm} or \code{hgrm}. 4 | #' 5 | #' @param object An object of class \code{hIRT}. 6 | #' @param by_item Logical. Should item parameters be stored item by item 7 | #' (if \code{TRUE}) or put together in a data frame (if \code{FALSE})? 8 | #' @param digits the number of significant digits to use when printing. 9 | #' 10 | #' @return An object of class \code{summary_hIRT}. 11 | #' \item{call}{The matched call.} 12 | #' \item{model}{Model fit statistics: Log likelihood, AIC, and BIC.} 13 | #' \item{item_coefs}{Item parameter estimates, standard errors, 14 | #' z values, and p values.} 15 | #' \item{mean_coefs}{Parameter estimates for the mean equation.} 16 | #' \item{var_coefs}{Parameter estimates for the variance equation.} 17 | #' 18 | #' @export 19 | #' @examples 20 | #' y <- nes_econ2008[, -(1:3)] 21 | #' x <- model.matrix( ~ party * educ, nes_econ2008) 22 | #' z <- model.matrix( ~ party, nes_econ2008) 23 | #' nes_m1 <- hgrm(y, x, z) 24 | #' summary(nes_m1, by_item = TRUE) 25 | 26 | summary.hIRT <- function(object, by_item = FALSE, digits = 3, ...) { 27 | item_coefs <- coef_item(object, by_item = by_item, digits) 28 | mean_coefs <- coef_mean(object, digits) 29 | var_coefs <- coef_var(object, digits) 30 | log_Lik = object[["log_Lik"]] 31 | df <- sum(object[["H"]]) + sum(object[["p"]]) + sum(object[["q"]]) - 2 32 | N <- length(object[["scores"]]) 33 | AIC <- -2 * log_Lik + 2 * df 34 | BIC <- -2 * log_Lik + df * log(N) 35 | model <- list(log_Lik = log_Lik, AIC = AIC, BIC = BIC) 36 | out <- list(call = object[["call"]], model = model, item_coefs = item_coefs, 37 | mean_coefs = mean_coefs, var_coefs = var_coefs) 38 | class(out) <- c("summary_hIRT") 39 | out 40 | } 41 | 42 | #' @inheritParams print.hIRT 43 | #' @export 44 | #' @rdname summary.hIRT 45 | print.summary_hIRT <- function(x, digits = 3, ...) { 46 | cat("\nCall:\n", paste(deparse(x[["call"]]), sep = "\n", collapse = "\n"), 47 | "\n\n", sep = "") 48 | cat("Model Summary:\n") 49 | model_sum <- as.data.frame(x[["model"]], row.names = "") 50 | print(model_sum) 51 | cat("\n Item Coefficients: \n") 52 | print(x[["item_coefs"]], ...) 53 | cat("\n Mean Regression Coefficients:\n") 54 | print(x[["mean_coefs"]], ...) 55 | cat("\n Variance Regression Coefficients:\n") 56 | print(x[["var_coefs"]], ...) 57 | cat("\n\n") 58 | invisible(x) 59 | } 60 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiangzhou09/hIRT/b33d90f13287f60f7feffd0c061858340dff40a6/R/sysdata.rda -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("H", "J", "K", "Li", "Lijk", "Lik", "N", 2 | "alpha", "dalpha", "lambda", "eta", "p", "p0", "q", "pik", 3 | "theta_ls", "w", "x", "x0", "y", "z", "fitted_mean", "fitted_var", 4 | "form_dif", "items_dif")) 5 | 6 | # median impute 7 | impute <- function(vec){ 8 | vec[is.na(vec)] <- median(vec, na.rm = TRUE) 9 | vec 10 | } 11 | 12 | # logical or infix function 13 | `%||%` <- function(a, b) if (!is.null(a)) a else b 14 | 15 | # calculate gamma gradient for case i 16 | si_gamma <- function(i) { 17 | sum(pik[i, ] * Lik[i, ] * (theta_ls - fitted_mean[[i]]))/ 18 | fitted_var[[i]]/Li[[i]] * x[i, 1:p] 19 | } 20 | 21 | # calculate lambda gradient for case i 22 | si_lambda <- function(i) { 23 | sum(0.5 * pik[i, ] * Lik[i, ] * ((theta_ls - fitted_mean[[i]])^2/fitted_var[[i]] - 1))/Li[[i]] * z[i, 1:q] 24 | } 25 | -------------------------------------------------------------------------------- /R/utils_grm.R: -------------------------------------------------------------------------------- 1 | # check if a vector has at least two valid responses 2 | invalid_grm <- function(x) max(x, na.rm = TRUE) < 2 3 | 4 | lrm_fit <- function(x, y, weights, tol = 1e-16, ...){ 5 | valid <- weights>tol & !is.na(y) 6 | lrm.fit(x[valid, , drop = FALSE], y[valid], weights = weights[valid], ...) 7 | } 8 | 9 | # log likelihood function (return N * J matrix) y: N*J data frame alpha: 10 | # length J list beta: length J numeric vector theta: length N numeric 11 | # vector 12 | loglik_grm <- function(alpha, beta, theta) { 13 | util <- outer(theta, beta) 14 | alpha_l <- simplify2array(unname(Map(function(x, y) x[y], alpha, y))) 15 | alpha_h <- simplify2array(unname(Map(function(x, y) x[y + 1L], alpha, y))) 16 | log(plogis(util + alpha_l) - plogis(util + alpha_h)) 17 | } 18 | 19 | # posterior of theta (unnormalized) (returns N-vector) 20 | # y: N*J data frame 21 | # x: N*p model matrix 22 | # z: N*q model matrix 23 | # alpha: length J list 24 | # beta: length J numeric vector 25 | # gamma: p-vector 26 | # lambda: q-vector 27 | # theta_k: numeric scalar 28 | # qw_k numeric scalar 29 | theta_post_grm <- function(theta_k, qw_k) { 30 | wt_k <- dnorm(theta_k - fitted_mean, sd = sqrt(fitted_var)) * qw_k # prior density * quadrature weight 31 | loglik <- rowSums(loglik_grm(alpha, beta, rep(theta_k, N)), na.rm = TRUE) 32 | logPop <- log(wt_k) 33 | exp(loglik + logPop) 34 | } 35 | 36 | theta_prior_grm <- function(theta_k, qw_k) { 37 | wt_k <- dnorm(theta_k - fitted_mean, sd = sqrt(fitted_var)) * qw_k # prior density * quadrature weight 38 | # loglik <- rowSums(loglik_grm(alpha, beta, rep(theta_k, N)), na.rm = TRUE) 39 | logPop <- log(wt_k) 40 | exp(logPop) 41 | } 42 | 43 | # pseudo tabulated data for item J (returns K*H_j matrix) 44 | # y_j: N-vector 45 | # H_j: number of response categories for item j 46 | # w: K*N matrix 47 | dummy_fun_grm <- function(y_j, H_j) { 48 | dummy_mat <- outer(y_j, 1:H_j, "==") # N*H_j matrix 49 | dummy_mat[is.na(dummy_mat)] <- 0 50 | w %*% dummy_mat 51 | } 52 | 53 | # pseudo tabulated data to pseudo data frame 54 | # tab: K*H_j matrix 55 | # theta_ls: K-vector 56 | tab2df_grm <- function(tab, theta_ls) { 57 | H_j <- ncol(tab) 58 | theta <- rep(theta_ls, H_j) 59 | y <- rep(1:H_j, each = K) 60 | data.frame(y = factor(y), x = theta, wt = as.double(tab)) 61 | } 62 | 63 | # score function of alpha and beta (returns an H_j*N matrix) Lik: N*K 64 | # matrix pik: N*K matrix alpha: J-list beta: J-vector theta_ls: K-vector 65 | sj_ab_grm <- function(j) { 66 | temp2 <- array(0, c(N, K, H[[j]] + 1)) 67 | h <- .subset2(y, j) 68 | drv_h <- vapply(theta_ls, function(theta_k) exp(alpha[[j]][h] + beta[[j]] * 69 | theta_k)/(1 + exp(alpha[[j]][h] + beta[[j]] * theta_k))^2, double(N)) 70 | drv_h_plus_one <- -vapply(theta_ls, function(theta_k) exp(alpha[[j]][h + 71 | 1L] + beta[[j]] * theta_k)/(1 + exp(alpha[[j]][h + 1L] + beta[[j]] * 72 | theta_k))^2, double(N)) 73 | drv_h[h == 1, ] <- 0 74 | drv_h_plus_one[h == H[[j]], ] <- 0 75 | for (i in seq_len(N)) { 76 | if (is.na(h[[i]])) next 77 | temp2[i, , h[[i]]] <- drv_h[i, ] 78 | temp2[i, , h[[i]] + 1L] <- drv_h_plus_one[i, ] 79 | } 80 | comp_a <- pik * Lik/vapply(Lijk, `[`, 1:N, j, FUN.VALUE = double(N)) # N*K matrix 81 | comp_a[is.na(comp_a)] <- 0 82 | s_alpha <- vapply(1:N, function(i) comp_a[i, ] %*% temp2[i, , 2:H[[j]]], 83 | double(H[[j]] - 1L)) # (H[j]-1)*N matrix 84 | temp2_beta <- drv_h + drv_h_plus_one 85 | s_beta <- rowSums(comp_a * matrix(theta_ls, N, K, byrow = TRUE) * temp2_beta) # N-vector 86 | s <- sweep(rbind(s_alpha, s_beta), 2, rowSums(Lik * pik), FUN = "/") 87 | } 88 | 89 | 90 | x0DIF <- function(theta_k){ 91 | if(form_dif == "uniform") x0 else cbind(x0, theta_k * x0) 92 | } 93 | 94 | # log likelihood function (return N * J matrix) y for DIF, returns N*J data frame 95 | # alpha: length J list 96 | # beta: length J numeric vector 97 | # theta: length N numeric vector 98 | loglik_grmDIF <- function(alpha, beta, theta, eta) { 99 | util <- outer(theta, beta) 100 | tmp_x <- x0DIF(theta) 101 | util2 <- vapply(1:length(eta), function(j) if(j %in% items_dif) tmp_x %*% eta[[j]] else double(N), double(N)) 102 | alpha_l <- simplify2array(unname(Map(function(x, y) x[y], alpha, y))) 103 | alpha_h <- simplify2array(unname(Map(function(x, y) x[y + 1L], alpha, y))) 104 | log(plogis(util + util2 + alpha_l) - plogis(util + util2 + alpha_h)) 105 | } 106 | 107 | # posterior of theta (unnormalized) (returns N-vector) 108 | # y: N*J data frame 109 | # x: N*p model matrix 110 | # z: N*q model matrix 111 | # alpha: length J list 112 | # beta: length J numeric vector 113 | # gamma: p-vector 114 | # lambda: q-vector 115 | # theta_k: numeric scalar 116 | # qw_k numeric scalar 117 | theta_post_grmDIF <- function(theta_k, qw_k) { 118 | wt_k <- dnorm(theta_k - fitted_mean, sd = sqrt(fitted_var)) * qw_k # prior density * quadrature weight 119 | loglik <- rowSums(loglik_grmDIF(alpha, beta, rep(theta_k, N), eta), na.rm = TRUE) 120 | logPop <- log(wt_k) 121 | exp(loglik + logPop) 122 | } 123 | 124 | # score function of alpha and beta and eta (returns an ?*N matrix) 125 | # Lik: N*K matrix pik: N*K matrix alpha: J-list beta: J-vector theta_ls: K-vector 126 | sj_ab_grmDIF <- function(j) { 127 | 128 | temp2 <- array(0, c(N, K, H[[j]] + 1)) 129 | h <- .subset2(y, j) 130 | 131 | if(j %in% items_dif){ 132 | 133 | drv_h <- vapply(theta_ls, function(theta_k) exp(alpha[[j]][h] + beta[[j]] * theta_k + 134 | x0DIF(theta_k) %*% eta[[j]])/ 135 | (1 + exp(alpha[[j]][h] + beta[[j]] * theta_k + x0DIF(theta_k) %*% eta[[j]]))^2, 136 | double(N)) 137 | 138 | drv_h_plus_one <- -vapply(theta_ls, function(theta_k) exp(alpha[[j]][h + 1L] + beta[[j]] * theta_k + 139 | x0DIF(theta_k) %*% eta[[j]])/ 140 | (1 + exp(alpha[[j]][h + 1L] + beta[[j]] * theta_k + x0DIF(theta_k) %*% eta[[j]]))^2, 141 | double(N)) 142 | } else{ 143 | 144 | drv_h <- vapply(theta_ls, function(theta_k) exp(alpha[[j]][h] + beta[[j]] * theta_k)/ 145 | (1 + exp(alpha[[j]][h] + beta[[j]] * theta_k))^2, 146 | double(N)) 147 | 148 | drv_h_plus_one <- -vapply(theta_ls, function(theta_k) exp(alpha[[j]][h + 1L] + beta[[j]] * theta_k)/ 149 | (1 + exp(alpha[[j]][h + 1L] + beta[[j]] * theta_k))^2, 150 | double(N)) 151 | } 152 | drv_h[h == 1, ] <- 0 153 | drv_h_plus_one[h == H[[j]], ] <- 0 154 | 155 | for (i in seq_len(N)) { 156 | if (is.na(h[[i]])) next 157 | temp2[i, , h[[i]]] <- drv_h[i, ] 158 | temp2[i, , h[[i]] + 1L] <- drv_h_plus_one[i, ] 159 | } 160 | 161 | comp_a <- pik * Lik/vapply(Lijk, `[`, 1:N, j, FUN.VALUE = double(N)) # N*K matrix 162 | comp_a[is.na(comp_a)] <- 0 163 | 164 | s_alpha <- vapply(1:N, function(i) comp_a[i, ] %*% temp2[i, , 2:H[[j]]], 165 | double(H[[j]] - 1L)) # (H[j]-1)*N matrix 166 | 167 | temp2_beta <- drv_h + drv_h_plus_one 168 | s_beta <- rowSums(comp_a * matrix(theta_ls, N, K, byrow = TRUE) * temp2_beta) # N-vector 169 | 170 | if(j %in% items_dif){ 171 | s_eta <- vapply(1:p0, function(i) rowSums(comp_a * matrix(x0[, i, drop = FALSE], N, K, byrow = FALSE) * temp2_beta), double(N)) 172 | if(form_dif == "non-uniform"){ 173 | s_eta <- cbind(s_eta, vapply(1:p0, function(i) rowSums(comp_a * outer(x0[, i], theta_ls) * temp2_beta), double(N))) 174 | } 175 | s <- sweep(rbind(s_alpha, t(s_eta), s_beta), 2, rowSums(Lik * pik), FUN = "/") 176 | } else{ 177 | s <- sweep(rbind(s_alpha, s_beta), 2, rowSums(Lik * pik), FUN = "/") 178 | } 179 | } 180 | 181 | -------------------------------------------------------------------------------- /R/utils_ltm.R: -------------------------------------------------------------------------------- 1 | # check if a vector is dichotomous 2 | invalid_ltm <- function(x) max(x, na.rm = TRUE) != 1 3 | 4 | glm_fit <- function(x, y, weights, tol = 1e-16, ...){ 5 | glm.fit(x[weights>tol, , drop = FALSE], y[weights>tol], weights = weights[weights>tol], ...) 6 | } 7 | 8 | # log likelihood function (return N * J matrix) y: N*J data frame alpha: 9 | # length J numeric vector beta: length J numeric vector theta: length N 10 | # numeric vector 11 | loglik_ltm <- function(alpha, beta, theta) { 12 | util <- matrix(alpha, N, J, byrow = TRUE) + outer(theta, beta) 13 | log(exp(as.matrix(y) * util)/(1 + exp(util))) 14 | } 15 | 16 | # posterior of theta (unnormalized) (returns N-vector) y: N*J data frame 17 | # x: N*p model matrix z: N*q model matrix alpha: length J list beta: 18 | # length J numeric vector gamma: p-vector lambda: q-vector theta_k: 19 | # numeric scalar qw_k numeric scalar 20 | theta_post_ltm <- function(theta_k, qw_k) { 21 | N <- nrow(y) 22 | wt_k <- dnorm(theta_k - fitted_mean, sd = sqrt(fitted_var)) * qw_k # prior density * quadrature weight 23 | loglik <- rowSums(loglik_ltm(alpha, beta, rep(theta_k, N)), na.rm = TRUE) 24 | logPop <- log(wt_k) 25 | exp(loglik + logPop) 26 | } 27 | 28 | # pseudo tabulated data for item J (returns K*2 matrix) y_j: N-vector w: 29 | # K*N matrix 30 | dummy_fun_ltm <- function(y_j) { 31 | dummy_mat <- outer(y_j, c(0, 1), "==") # N*H_j matrix 32 | dummy_mat[is.na(dummy_mat)] <- 0 33 | w %*% dummy_mat 34 | } 35 | 36 | # pseudo tabulated data to pseudo data frame tab: K*2 matrix theta_ls: 37 | # K-vector 38 | tab2df_ltm <- function(tab, theta_ls) { 39 | theta <- rep(theta_ls, 2) 40 | y <- rep(c(0, 1), each = K) 41 | data.frame(y = factor(y), x = theta, wt = as.double(tab)) 42 | } 43 | 44 | # derivative of likelihood wrt alpha, given theta_k 45 | dalpha_ltm <- function(alpha, beta) { 46 | putil <- plogis(matrix(alpha, K, J, byrow = TRUE) + outer(theta_ls, beta)) 47 | putil * (1 - putil) 48 | } 49 | 50 | # score function of alpha and beta (return a N*2 matrix) Lik: N*K matrix 51 | # pik: N*K matrix alpha: J-vector beta: J-vector theta_ls: K-vector 52 | sj_ab_ltm <- function(j) { 53 | tmp_mat <- (pik * Lik/vapply(Lijk, `[`, 1:N, j, FUN.VALUE = double(N))) # N*K matrix 54 | dalpha_j <- dalpha[, j, drop = FALSE] # K*1 matrix 55 | dbeta_j <- dalpha_j * theta_ls # K*1 matrix 56 | sgn <- .subset2(y, j) * 2 - 1 57 | sgn[is.na(sgn)] <- 0 # N-vector (1, -1, or 0) 58 | drv_alpha <- sgn * (tmp_mat %*% dalpha_j)/Li 59 | drv_beta <- sgn * (tmp_mat %*% dbeta_j)/Li 60 | cbind(drv_alpha, drv_beta) 61 | } 62 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # hIRT: hierarchical item response theory (IRT) models 17 | 18 | hIRT is an R package that implements a class of hierarchical item response theory (IRT) models where both the mean and the variance of the latent "ability parameters" may depend on observed covariates. The current implementation includes both the two-parameter latent trait model for binary data (`hltm()` and `hltm2()`) and the graded response model for ordinal data (`hgrm()` and `hgrm2()`). Both are fitted via the Expectation-Maximization (EM) algorithm. Asymptotic standard errors are derived from the observed information matrix. 19 | 20 | **Main Reference**: Zhou, Xiang. 2019. "Hierarchical Item Response Models for Analyzing Public Opinion." Political Analysis, 27(4): 481-502. Available at: 21 | 22 | Full paper with technical appendix is available at: 23 | 24 | ## Installation 25 | 26 | You can install the released version of hIRT from [CRAN](https://CRAN.R-project.org) with: 27 | 28 | ``` r 29 | install.packages("hIRT") 30 | ``` 31 | 32 | And the development version from [GitHub](https://github.com/) with: 33 | 34 | ``` r 35 | # install.packages("devtools") 36 | devtools::install_github("xiangzhou09/hIRT") 37 | ``` 38 | ## Example 39 | 40 | The following example illustrates how the `hgrm()` function can be used to examine the effects of education and party affiliation on economic ideology, a latent variable gauged by a number of survey items in the American National Election Studies (ANES), 2008. Documentation of the dataset `nes_econ2008` can be accessed by running `?nes_econ2008` in R after loading the `hIRT` package. 41 | 42 | ```{r example} 43 | library(hIRT) 44 | 45 | # survey items used to measure economic ideology 46 | y <- nes_econ2008[, -(1:3)] 47 | 48 | # predictors for the mean of economic ideology 49 | x <- model.matrix( ~ party * educ, nes_econ2008) 50 | 51 | # predictors for the variance of economic ideology 52 | z <- model.matrix( ~ party, nes_econ2008) 53 | 54 | # fitting a hierarhical graded response model 55 | nes_m1 <- hgrm(y, x, z) 56 | 57 | nes_m1 58 | ``` 59 | 60 | The output from `hgrm` is an object of class `hIRT`. The `print()` method for `hIRT` outputs the regression tables for the mean regression and the variance regression. 61 | 62 | ## Extracting coefficients 63 | 64 | The `coef_item()`, `coef_mean()`, and `coef_var()` functions can be used to extract coefficient tables for item parameters, the mean regression, and the variance regression respectively. 65 | 66 | 67 | ```{r coef} 68 | coef_item(nes_m1) 69 | 70 | coef_mean(nes_m1) 71 | 72 | coef_var(nes_m1) 73 | ``` 74 | 75 | ## Latent scores 76 | 77 | The `latent_scores()` function can be used to extract the Expected A Posteriori (EAP) estimates of the latent ability parameters, along with their "prior" estimates (without the random effects). In this example, the latent ability estimates can be interpreted as the estimated ideological positions of ANES respondents on economic issues. 78 | 79 | ```{r latent} 80 | 81 | pref <- latent_scores(nes_m1) 82 | 83 | summary(pref) 84 | ``` 85 | 86 | ## Identification constraints. 87 | 88 | The `constr` parameter in the `hgrm()` and `hltm()` function can be used to specify the type of constraints used to identify the model. The default option, `"latent_scale"`, constrains the mean of the latent ability parameters to zero and the geometric mean of their prior variance to one; Alternatively, `"items"` sets the mean of the item difficulty parameters to zero and the geometric mean of the discrimination parameters to one. 89 | 90 | In practice, one may want to interpret the effects of the mean predictors (in the above example, education and party affiliation) on the standard deviation scale of the latent trait. This can be easily achieved through rescaling their point estimates and standard errors. 91 | 92 | ```{r constr, message=FALSE} 93 | 94 | library(dplyr) 95 | 96 | total_sd <- sqrt(var(pref$post_mean) + mean(pref$post_sd^2)) 97 | 98 | coef_mean_sd_scale <- coef_mean(nes_m1) %>% 99 | tibble::rownames_to_column("term") %>% 100 | mutate(`Estimate` = `Estimate`/total_sd, 101 | `Std_Error` = `Std_Error`/total_sd) 102 | 103 | coef_mean_sd_scale 104 | 105 | ``` 106 | 107 | ## hIRT with fixed item parameters 108 | 109 | Sometimes, the researcher might want to fit the hIRT models using a set of fixed item parameters, for example, to make results comparable across different studies. The `hgrm2()` and `hltm2()` functions can be used for this purpose. They are illustrated in more detail in the package documentation. 110 | 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # hIRT: hierarchical item response theory (IRT) models 5 | 6 | hIRT is an R package that implements a class of hierarchical item 7 | response theory (IRT) models where both the mean and the variance of the 8 | latent “ability parameters” may depend on observed covariates. The 9 | current implementation includes both the two-parameter latent trait 10 | model for binary data (`hltm()` and `hltm2()`) and the graded response 11 | model for ordinal data (`hgrm()` and `hgrm2()`). Both are fitted via the 12 | Expectation-Maximization (EM) algorithm. Asymptotic standard errors are 13 | derived from the observed information matrix. 14 | 15 | **Main Reference**: Zhou, Xiang. 2019. “Hierarchical Item Response 16 | Models for Analyzing Public Opinion.” Political Analysis, 27(4): 17 | 481-502. Available at: 18 | 19 | Full paper with technical appendix is available at: 20 | 21 | 22 | ## Installation 23 | 24 | You can install the released version of hIRT from 25 | [CRAN](https://CRAN.R-project.org) with: 26 | 27 | ``` r 28 | install.packages("hIRT") 29 | ``` 30 | 31 | And the development version from [GitHub](https://github.com/) with: 32 | 33 | ``` r 34 | # install.packages("devtools") 35 | devtools::install_github("xiangzhou09/hIRT") 36 | ``` 37 | 38 | ## Example 39 | 40 | The following example illustrates how the `hgrm()` function can be used 41 | to examine the effects of education and party affiliation on economic 42 | ideology, a latent variable gauged by a number of survey items in the 43 | American National Election Studies (ANES), 2008. Documentation of the 44 | dataset `nes_econ2008` can be accessed by running `?nes_econ2008` in R 45 | after loading the `hIRT` package. 46 | 47 | ``` r 48 | library(hIRT) 49 | #> Registered S3 method overwritten by 'pryr': 50 | #> method from 51 | #> print.bytes Rcpp 52 | 53 | # survey items used to measure economic ideology 54 | y <- nes_econ2008[, -(1:3)] 55 | 56 | # predictors for the mean of economic ideology 57 | x <- model.matrix( ~ party * educ, nes_econ2008) 58 | 59 | # predictors for the variance of economic ideology 60 | z <- model.matrix( ~ party, nes_econ2008) 61 | 62 | # fitting a hierarhical graded response model 63 | nes_m1 <- hgrm(y, x, z) 64 | #> ............ 65 | #> converged at iteration 12 66 | 67 | nes_m1 68 | #> 69 | #> Call: 70 | #> hgrm(y = y, x = x, z = z) 71 | #> 72 | #> Mean Regression: 73 | #> Estimate Std_Error z_value p_value 74 | #> x(Intercept) -0.480 0.105 -4.570 0.000 75 | #> xpartyindependent 0.386 0.086 4.473 0.000 76 | #> xpartyRepublican 1.133 0.135 8.408 0.000 77 | #> xeduc2 0.037 0.079 0.467 0.641 78 | #> xpartyindependent:educ2 0.235 0.117 2.007 0.045 79 | #> xpartyRepublican:educ2 0.428 0.148 2.886 0.004 80 | #> 81 | #> Variance Regression: 82 | #> Estimate Std_Error z_value p_value 83 | #> z(Intercept) -0.097 0.139 -0.697 0.486 84 | #> zpartyindependent 0.166 0.100 1.661 0.097 85 | #> zpartyRepublican 0.172 0.126 1.373 0.170 86 | #> 87 | #> Log Likelihood: -16259.16 88 | ``` 89 | 90 | The output from `hgrm` is an object of class `hIRT`. The `print()` 91 | method for `hIRT` outputs the regression tables for the mean regression 92 | and the variance regression. 93 | 94 | ## Extracting coefficients 95 | 96 | The `coef_item()`, `coef_mean()`, and `coef_var()` functions can be used 97 | to extract coefficient tables for item parameters, the mean regression, 98 | and the variance regression respectively. 99 | 100 | ``` r 101 | coef_item(nes_m1) 102 | #> $health_ins7 103 | #> Estimate Std_Error z_value p_value 104 | #> y>=2 1.279 NA NA NA 105 | #> y>=3 0.541 0.063 8.542 0.000 106 | #> y>=4 -0.075 0.083 -0.898 0.369 107 | #> y>=5 -1.047 0.107 -9.826 0.000 108 | #> y>=6 -1.852 0.124 -14.901 0.000 109 | #> y>=7 -2.684 0.149 -17.990 0.000 110 | #> Dscrmn 1.016 0.096 10.569 0.000 111 | #> 112 | #> $jobs_guar7 113 | #> Estimate Std_Error z_value p_value 114 | #> y>=2 2.136 0.173 12.377 0 115 | #> y>=3 1.352 0.153 8.860 0 116 | #> y>=4 0.607 0.141 4.299 0 117 | #> y>=5 -0.520 0.137 -3.797 0 118 | #> y>=6 -1.611 0.141 -11.429 0 119 | #> y>=7 -2.785 0.163 -17.043 0 120 | #> Dscrmn 1.305 0.114 11.448 0 121 | #> 122 | #> $gov_services7 123 | #> Estimate Std_Error z_value p_value 124 | #> y>=2 3.950 0.222 17.760 0.000 125 | #> y>=3 2.859 0.182 15.707 0.000 126 | #> y>=4 1.831 0.158 11.592 0.000 127 | #> y>=5 0.247 0.147 1.679 0.093 128 | #> y>=6 -1.001 0.154 -6.490 0.000 129 | #> y>=7 -2.020 0.169 -11.947 0.000 130 | #> Dscrmn -1.363 0.116 -11.715 0.000 131 | #> 132 | #> $FS_poor3 133 | #> Estimate Std_Error z_value p_value 134 | #> y>=2 -1.180 0.179 -6.601 0 135 | #> y>=3 -4.459 0.243 -18.357 0 136 | #> Dscrmn 1.918 0.164 11.679 0 137 | #> 138 | #> $FS_childcare3 139 | #> Estimate Std_Error z_value p_value 140 | #> y>=2 -0.808 0.148 -5.474 0 141 | #> y>=3 -4.051 0.192 -21.132 0 142 | #> Dscrmn 1.608 0.128 12.535 0 143 | #> 144 | #> $FS_crime3 145 | #> Estimate Std_Error z_value p_value 146 | #> y>=2 -0.845 0.066 -12.866 0 147 | #> y>=3 -3.150 0.108 -29.048 0 148 | #> Dscrmn 0.516 0.059 8.823 0 149 | #> 150 | #> $FS_publicschools3 151 | #> Estimate Std_Error z_value p_value 152 | #> y>=2 -1.790 0.136 -13.197 0 153 | #> y>=3 -4.144 0.188 -22.022 0 154 | #> Dscrmn 1.302 0.111 11.751 0 155 | #> 156 | #> $FS_welfare3 157 | #> Estimate Std_Error z_value p_value 158 | #> y>=2 1.054 0.117 8.970 0 159 | #> y>=3 -1.355 0.116 -11.650 0 160 | #> Dscrmn 1.178 0.099 11.937 0 161 | #> 162 | #> $FS_envir3 163 | #> Estimate Std_Error z_value p_value 164 | #> y>=2 -0.855 0.106 -8.071 0 165 | #> y>=3 -3.499 0.159 -22.023 0 166 | #> Dscrmn 1.101 0.092 11.953 0 167 | #> 168 | #> $FS_socsec3 169 | #> Estimate Std_Error z_value p_value 170 | #> y>=2 -1.091 0.104 -10.535 0 171 | #> y>=3 -4.278 0.178 -24.033 0 172 | #> Dscrmn 1.028 NA NA NA 173 | 174 | coef_mean(nes_m1) 175 | #> Estimate Std_Error z_value p_value 176 | #> x(Intercept) -0.480 0.105 -4.570 0.000 177 | #> xpartyindependent 0.386 0.086 4.473 0.000 178 | #> xpartyRepublican 1.133 0.135 8.408 0.000 179 | #> xeduc2 0.037 0.079 0.467 0.641 180 | #> xpartyindependent:educ2 0.235 0.117 2.007 0.045 181 | #> xpartyRepublican:educ2 0.428 0.148 2.886 0.004 182 | 183 | coef_var(nes_m1) 184 | #> Estimate Std_Error z_value p_value 185 | #> z(Intercept) -0.097 0.139 -0.697 0.486 186 | #> zpartyindependent 0.166 0.100 1.661 0.097 187 | #> zpartyRepublican 0.172 0.126 1.373 0.170 188 | ``` 189 | 190 | ## Latent scores 191 | 192 | The `latent_scores()` function can be used to extract the Expected A 193 | Posteriori (EAP) estimates of the latent ability parameters, along with 194 | their “prior” estimates (without the random effects). In this example, 195 | the latent ability estimates can be interpreted as the estimated 196 | ideological positions of ANES respondents on economic issues. 197 | 198 | ``` r 199 | 200 | pref <- latent_scores(nes_m1) 201 | 202 | summary(pref) 203 | #> post_mean post_sd prior_mean prior_sd 204 | #> Min. :-2.082000 Min. :0.3940 Min. :-0.4800000 Min. :0.953 205 | #> 1st Qu.:-0.751000 1st Qu.:0.4788 1st Qu.:-0.4440000 1st Qu.:0.953 206 | #> Median :-0.104000 Median :0.5280 Median :-0.0950000 Median :1.035 207 | #> Mean :-0.000147 Mean :0.5469 Mean :-0.0001561 Mean :1.001 208 | #> 3rd Qu.: 0.629500 3rd Qu.:0.6090 3rd Qu.: 0.1770000 3rd Qu.:1.035 209 | #> Max. : 3.359000 Max. :0.9780 Max. : 1.1170000 Max. :1.039 210 | ``` 211 | 212 | ## Identification constraints. 213 | 214 | The `constr` parameter in the `hgrm()` and `hltm()` function can be used 215 | to specify the type of constraints used to identify the model. The 216 | default option, `"latent_scale"`, constrains the mean of the latent 217 | ability parameters to zero and the geometric mean of their prior 218 | variance to one; Alternatively, `"items"` sets the mean of the item 219 | difficulty parameters to zero and the geometric mean of the 220 | discrimination parameters to one. 221 | 222 | In practice, one may want to interpret the effects of the mean 223 | predictors (in the above example, education and party affiliation) on 224 | the standard deviation scale of the latent trait. This can be easily 225 | achieved through rescaling their point estimates and standard errors. 226 | 227 | ``` r 228 | 229 | library(dplyr) 230 | 231 | total_sd <- sqrt(var(pref$post_mean) + mean(pref$post_sd^2)) 232 | 233 | coef_mean_sd_scale <- coef_mean(nes_m1) %>% 234 | tibble::rownames_to_column("term") %>% 235 | mutate(`Estimate` = `Estimate`/total_sd, 236 | `Std_Error` = `Std_Error`/total_sd) 237 | 238 | coef_mean_sd_scale 239 | #> term Estimate Std_Error z_value p_value 240 | #> 1 x(Intercept) -0.42437486 0.09283200 -4.570 0.000 241 | #> 2 xpartyindependent 0.34126812 0.07603383 4.473 0.000 242 | #> 3 xpartyRepublican 1.00170150 0.11935543 8.408 0.000 243 | #> 4 xeduc2 0.03271223 0.06984503 0.467 0.641 244 | #> 5 xpartyindependent:educ2 0.20776686 0.10344137 2.007 0.045 245 | #> 6 xpartyRepublican:educ2 0.37840092 0.13084892 2.886 0.004 246 | ``` 247 | 248 | ## hIRT with fixed item parameters 249 | 250 | Sometimes, the researcher might want to fit the hIRT models using a set 251 | of fixed item parameters, for example, to make results comparable across 252 | different studies. The `hgrm2()` and `hltm2()` functions can be used for 253 | this purpose. They are illustrated in more detail in the package 254 | documentation. 255 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | * ubuntu 12.04 (on travis-ci), R 3.4.1 3 | * win-builder (devel and release) 4 | 5 | ## R CMD check results 6 | 7 | 0 errors | 0 warnings | 0 notes 8 | 9 | ## Downstream dependencies 10 | 11 | There are currently no downstream dependencies for this package. 12 | 13 | --- 14 | -------------------------------------------------------------------------------- /data/nes_econ2008.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiangzhou09/hIRT/b33d90f13287f60f7feffd0c061858340dff40a6/data/nes_econ2008.rda -------------------------------------------------------------------------------- /hIRT.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/coef_item.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coef.R 3 | \name{coef_item} 4 | \alias{coef_item} 5 | \alias{coef_mean} 6 | \alias{coef_var} 7 | \title{Parameter Estimates from Hierarchical IRT Models.} 8 | \usage{ 9 | coef_item(x, by_item = TRUE, digits = 3) 10 | 11 | coef_mean(x, digits = 3) 12 | 13 | coef_var(x, digits = 3) 14 | } 15 | \arguments{ 16 | \item{x}{An object of class \code{hIRT}} 17 | 18 | \item{by_item}{Logical. Should item parameters be stored item by item 19 | (if \code{TRUE}) or put together in a data frame (if \code{FALSE})?} 20 | 21 | \item{digits}{The number of significant digits to use when printing} 22 | } 23 | \value{ 24 | Parameter estimates, standard errors, z values, and p values 25 | organized as a data frame (if \code{by_item = TRUE}) or a list (if \code{ 26 | by_item = FALSE}). 27 | } 28 | \description{ 29 | Parameter estimates from either \code{hltm} or \code{hgrm} or \code{hgrmDIF} models. \code{code_item} 30 | reports estimates of item parameters. \code{coef_mean} reports results for the mean equation. 31 | \code{coef_var} reports results for the variance equation. 32 | } 33 | \examples{ 34 | y <- nes_econ2008[, -(1:3)] 35 | x <- model.matrix( ~ party * educ, nes_econ2008) 36 | z <- model.matrix( ~ party, nes_econ2008) 37 | nes_m1 <- hgrm(y, x, z) 38 | coef_item(nes_m1) 39 | coef_mean(nes_m1) 40 | coef_var(nes_m1) 41 | } 42 | -------------------------------------------------------------------------------- /man/figures/README-pressure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiangzhou09/hIRT/b33d90f13287f60f7feffd0c061858340dff40a6/man/figures/README-pressure-1.png -------------------------------------------------------------------------------- /man/hgrm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hgrm.R 3 | \name{hgrm} 4 | \alias{hgrm} 5 | \title{Fitting Hierarchical Graded Response Models (for Ordinal Responses)} 6 | \usage{ 7 | hgrm( 8 | y, 9 | x = NULL, 10 | z = NULL, 11 | constr = c("latent_scale", "items"), 12 | beta_set = 1L, 13 | sign_set = TRUE, 14 | init = c("naive", "glm", "irt"), 15 | control = list() 16 | ) 17 | } 18 | \arguments{ 19 | \item{y}{A data frame or matrix of item responses.} 20 | 21 | \item{x}{An optional model matrix, including the intercept term, that predicts the 22 | mean of the latent preference. If not supplied, only the intercept term is included.} 23 | 24 | \item{z}{An optional model matrix, including the intercept term, that predicts the 25 | variance of the latent preference. If not supplied, only the intercept term is included.} 26 | 27 | \item{constr}{The type of constraints used to identify the model: "latent_scale", 28 | or "items". The default, "latent_scale" constrains the mean of latent preferences 29 | to zero and the geometric mean of prior variance to one; "items" places constraints 30 | on item parameters instead and sets the mean of item difficulty parameters to zero 31 | and the geometric mean of the discrimination parameters to one.} 32 | 33 | \item{beta_set}{The index of the item for which the discrimination parameter is 34 | restricted to be positive (or negative). It may take any integer value from 35 | 1 to \code{ncol(y)}.} 36 | 37 | \item{sign_set}{Logical. Should the discrimination parameter of 38 | the corresponding item (indexed by \code{beta_set}) be positive 39 | (if \code{TRUE}) or negative (if \code{FALSE})?} 40 | 41 | \item{init}{A character string indicating how item parameters are initialized. It can be 42 | "naive", "glm", or "irt".} 43 | 44 | \item{control}{A list of control values 45 | \describe{ 46 | \item{max_iter}{The maximum number of iterations of the EM algorithm. 47 | The default is 150.} 48 | \item{eps}{Tolerance parameter used to determine convergence of the 49 | EM algorithm. Specifically, iterations continue until the Euclidean 50 | distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps}, 51 | where \eqn{\beta} is the vector of item discrimination parameters. 52 | \code{eps}=1e-4 by default.} 53 | \item{max_iter2}{The maximum number of iterations of the conditional 54 | maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}. 55 | The default is 15.} 56 | \item{eps2}{Tolerance parameter used to determine convergence of the 57 | conditional maximization procedures for updating \eqn{\gamma} and 58 | \eqn{\lambda}. Specifically, iterations continue until the Euclidean 59 | distance between two consecutive log likelihoods falls under \code{eps2}. 60 | \code{eps2}=1e-3 by default.} 61 | \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.} 62 | \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.} 63 | }} 64 | } 65 | \value{ 66 | An object of class \code{hgrm}. 67 | \item{coefficients}{A data frame of parameter estimates, standard errors, 68 | z values and p values.} 69 | \item{scores}{A data frame of EAP estimates of latent preferences and 70 | their approximate standard errors.} 71 | \item{vcov}{Variance-covariance matrix of parameter estimates.} 72 | \item{log_Lik}{The log-likelihood value at convergence.} 73 | \item{N}{Number of units.} 74 | \item{J}{Number of items.} 75 | \item{H}{A vector denoting the number of response categories for each item.} 76 | \item{ylevels}{A list showing the levels of the factorized response categories.} 77 | \item{p}{The number of predictors for the mean equation.} 78 | \item{q}{The number of predictors for the variance equation.} 79 | \item{control}{List of control values.} 80 | \item{call}{The matched call.} 81 | } 82 | \description{ 83 | \code{hgrm} fits a hierarchical graded response model in which both 84 | the mean and the variance of the latent preference (ability parameter) 85 | may depend on person-specific covariates (\code{x} and \code{z}). 86 | Specifically, the mean is specified as a linear combination of \code{x} 87 | and the log of the variance is specified as a linear combination of 88 | \code{z}. Nonresponses are treated as missing at random. 89 | } 90 | \examples{ 91 | y <- nes_econ2008[, -(1:3)] 92 | x <- model.matrix( ~ party * educ, nes_econ2008) 93 | z <- model.matrix( ~ party, nes_econ2008) 94 | nes_m1 <- hgrm(y, x, z) 95 | nes_m1 96 | } 97 | \references{ 98 | Zhou, Xiang. 2019. "\href{https://doi.org/10.1017/pan.2018.63}{Hierarchical Item Response Models for Analyzing Public Opinion.}" Political Analysis. 99 | } 100 | -------------------------------------------------------------------------------- /man/hgrm2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hgrm2.R 3 | \name{hgrm2} 4 | \alias{hgrm2} 5 | \title{Hierarchical Graded Response Models with Known Item Parameters} 6 | \usage{ 7 | hgrm2(y, x = NULL, z = NULL, item_coefs, control = list()) 8 | } 9 | \arguments{ 10 | \item{y}{A data frame or matrix of item responses.} 11 | 12 | \item{x}{An optional model matrix, including the intercept term, that predicts the 13 | mean of the latent preference. If not supplied, only the intercept term is included.} 14 | 15 | \item{z}{An optional model matrix, including the intercept term, that predicts the 16 | variance of the latent preference. If not supplied, only the intercept term is included.} 17 | 18 | \item{item_coefs}{A list of known item parameters. The parameters of item \eqn{j} are given 19 | by the \eqn{j}th element, which should be a vector of length \eqn{H_j}, containing 20 | \eqn{H_j - 1} item difficulty parameters (in descending order) and one item discrimination 21 | parameter.} 22 | 23 | \item{control}{A list of control values 24 | \describe{ 25 | \item{max_iter}{The maximum number of iterations of the EM algorithm. 26 | The default is 150.} 27 | \item{eps}{Tolerance parameter used to determine convergence of the 28 | EM algorithm. Specifically, iterations continue until the Euclidean 29 | distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps}, 30 | where \eqn{\beta} is the vector of item discrimination parameters. 31 | \code{eps}=1e-4 by default.} 32 | \item{max_iter2}{The maximum number of iterations of the conditional 33 | maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}. 34 | The default is 15.} 35 | \item{eps2}{Tolerance parameter used to determine convergence of the 36 | conditional maximization procedures for updating \eqn{\gamma} and 37 | \eqn{\lambda}. Specifically, iterations continue until the Euclidean 38 | distance between two consecutive log likelihoods falls under \code{eps2}. 39 | \code{eps2}=1e-3 by default.} 40 | \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.} 41 | \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.} 42 | }} 43 | } 44 | \value{ 45 | An object of class \code{hgrm}. 46 | \item{coefficients}{A data frame of parameter estimates, standard errors, 47 | z values and p values.} 48 | \item{scores}{A data frame of EAP estimates of latent preferences and 49 | their approximate standard errors.} 50 | \item{vcov}{Variance-covariance matrix of parameter estimates.} 51 | \item{log_Lik}{The log-likelihood value at convergence.} 52 | \item{N}{Number of units.} 53 | \item{J}{Number of items.} 54 | \item{H}{A vector denoting the number of response categories for each item.} 55 | \item{ylevels}{A list showing the levels of the factorized response categories.} 56 | \item{p}{The number of predictors for the mean equation.} 57 | \item{q}{The number of predictors for the variance equation.} 58 | \item{control}{List of control values.} 59 | \item{call}{The matched call.} 60 | } 61 | \description{ 62 | \code{hgrm2} fits a hierarchical graded response model where the item parameters 63 | are known and supplied by the user. 64 | } 65 | \examples{ 66 | 67 | y <- nes_econ2008[, -(1:3)] 68 | x <- model.matrix( ~ party * educ, nes_econ2008) 69 | z <- model.matrix( ~ party, nes_econ2008) 70 | 71 | n <- nrow(nes_econ2008) 72 | id_train <- sample.int(n, n/4) 73 | id_test <- setdiff(1:n, id_train) 74 | 75 | y_train <- y[id_train, ] 76 | x_train <- x[id_train, ] 77 | z_train <- z[id_train, ] 78 | 79 | mod_train <- hgrm(y_train, x_train, z_train) 80 | 81 | y_test <- y[id_test, ] 82 | x_test <- x[id_test, ] 83 | z_test <- z[id_test, ] 84 | 85 | item_coefs <- lapply(coef_item(mod_train), `[[`, "Estimate") 86 | 87 | model_test <- hgrm2(y_test, x_test, z_test, item_coefs = item_coefs) 88 | } 89 | -------------------------------------------------------------------------------- /man/hgrmDIF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hgrmDIF.R 3 | \name{hgrmDIF} 4 | \alias{hgrmDIF} 5 | \title{Hierarchical Graded Response Models with Differential Item Functioning} 6 | \usage{ 7 | hgrmDIF( 8 | y, 9 | x = NULL, 10 | z = NULL, 11 | x0 = x[, -1, drop = FALSE], 12 | items_dif = 1L, 13 | form_dif = c("uniform", "non-uniform"), 14 | constr = c("latent_scale"), 15 | beta_set = 1L, 16 | sign_set = TRUE, 17 | init = c("naive", "glm", "irt"), 18 | control = list() 19 | ) 20 | } 21 | \arguments{ 22 | \item{y}{A data frame or matrix of item responses.} 23 | 24 | \item{x}{An optional model matrix, including the intercept term, that predicts the 25 | mean of the latent preference. If not supplied, only the intercept term is included.} 26 | 27 | \item{z}{An optional model matrix, including the intercept term, that predicts the 28 | variance of the latent preference. If not supplied, only the intercept term is included.} 29 | 30 | \item{x0}{A matrix specifying the covariates by which differential item functioning operates. If not supplied, 31 | \code{x0} is taken to be a matrix containing all predictors in \code{x} except the intercept.} 32 | 33 | \item{items_dif}{The indices of the items for which differential item functioning is tested.} 34 | 35 | \item{form_dif}{Form of differential item functioning being tested. Either "uniform" or "non-uniform."} 36 | 37 | \item{constr}{The type of constraints used to identify the model: "latent_scale", 38 | or "items". The default, "latent_scale" constrains the mean of latent preferences 39 | to zero and the geometric mean of prior variance to one; "items" places constraints 40 | on item parameters instead and sets the mean of item difficulty parameters to zero 41 | and the geometric mean of the discrimination parameters to one. Currently, only "latent_scale" 42 | is supported in hgrmDIF().} 43 | 44 | \item{beta_set}{The index of the item for which the discrimination parameter is 45 | restricted to be positive (or negative). It may take any integer value from 46 | 1 to \code{ncol(y)}.} 47 | 48 | \item{sign_set}{Logical. Should the discrimination parameter of 49 | the corresponding item (indexed by \code{beta_set}) be positive 50 | (if \code{TRUE}) or negative (if \code{FALSE})?} 51 | 52 | \item{init}{A character string indicating how item parameters are initialized. It can be 53 | "naive", "glm", or "irt".} 54 | 55 | \item{control}{A list of control values 56 | \describe{ 57 | \item{max_iter}{The maximum number of iterations of the EM algorithm. 58 | The default is 150.} 59 | \item{eps}{Tolerance parameter used to determine convergence of the 60 | EM algorithm. Specifically, iterations continue until the Euclidean 61 | distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps}, 62 | where \eqn{\beta} is the vector of item discrimination parameters. 63 | \code{eps}=1e-4 by default.} 64 | \item{max_iter2}{The maximum number of iterations of the conditional 65 | maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}. 66 | The default is 15.} 67 | \item{eps2}{Tolerance parameter used to determine convergence of the 68 | conditional maximization procedures for updating \eqn{\gamma} and 69 | \eqn{\lambda}. Specifically, iterations continue until the Euclidean 70 | distance between two consecutive log likelihoods falls under \code{eps2}. 71 | \code{eps2}=1e-3 by default.} 72 | \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.} 73 | \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.} 74 | }} 75 | } 76 | \value{ 77 | An object of class \code{hgrm}. 78 | \item{coefficients}{A data frame of parameter estimates, standard errors, 79 | z values and p values.} 80 | \item{scores}{A data frame of EAP estimates of latent preferences and 81 | their approximate standard errors.} 82 | \item{vcov}{Variance-covariance matrix of parameter estimates.} 83 | \item{log_Lik}{The log-likelihood value at convergence.} 84 | \item{N}{Number of units.} 85 | \item{J}{Number of items.} 86 | \item{H}{A vector denoting the number of response categories for each item.} 87 | \item{ylevels}{A list showing the levels of the factorized response categories.} 88 | \item{p}{The number of predictors for the mean equation.} 89 | \item{q}{The number of predictors for the variance equation.} 90 | \item{p0}{The number of predictors for items with DIF.} 91 | \item{coef_item}{Item coefficient estimates.} 92 | \item{control}{List of control values.} 93 | \item{call}{The matched call.} 94 | } 95 | \description{ 96 | \code{hgrmDIF} fits a hierarchical graded response model similar to hgrm(), but person-specific 97 | covariates \code{x} are allowed to affect item responses directly (not via the latent preference). 98 | This model can be used to test for the presence of differential item functioning. 99 | } 100 | \examples{ 101 | y <- nes_econ2008[, -(1:3)] 102 | x <- model.matrix( ~ party * educ, nes_econ2008) 103 | nes_m2 <- hgrmDIF(y, x, items_dif = 1:2) 104 | coef_item(nes_m2) 105 | } 106 | -------------------------------------------------------------------------------- /man/hltm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hltm.R 3 | \name{hltm} 4 | \alias{hltm} 5 | \title{Fitting Hierarchical Latent Trait Models (for Binary Responses).} 6 | \usage{ 7 | hltm( 8 | y, 9 | x = NULL, 10 | z = NULL, 11 | constr = c("latent_scale", "items"), 12 | beta_set = 1L, 13 | sign_set = TRUE, 14 | init = c("naive", "glm", "irt"), 15 | control = list() 16 | ) 17 | } 18 | \arguments{ 19 | \item{y}{A data frame or matrix of item responses.} 20 | 21 | \item{x}{An optional model matrix, including the intercept term, that predicts the 22 | mean of the latent preference. If not supplied, only the intercept term is included.} 23 | 24 | \item{z}{An optional model matrix, including the intercept term, that predicts the 25 | variance of the latent preference. If not supplied, only the intercept term is included.} 26 | 27 | \item{constr}{The type of constraints used to identify the model: "latent_scale", 28 | or "items". The default, "latent_scale" constrains the mean of latent preferences 29 | to zero and the geometric mean of prior variance to one; "items" places constraints 30 | on item parameters instead and sets the mean of item difficulty parameters to zero 31 | and the geometric mean of the discrimination parameters to one.} 32 | 33 | \item{beta_set}{The index of the item for which the discrimination parameter is 34 | restricted to be positive (or negative). It may take any integer value from 35 | 1 to \code{ncol(y)}.} 36 | 37 | \item{sign_set}{Logical. Should the discrimination parameter of 38 | the corresponding item (indexed by \code{beta_set}) be positive 39 | (if \code{TRUE}) or negative (if \code{FALSE})?} 40 | 41 | \item{init}{A character string indicating how item parameters are initialized. It can be 42 | "naive", "glm", or "irt".} 43 | 44 | \item{control}{A list of control values 45 | \describe{ 46 | \item{max_iter}{The maximum number of iterations of the EM algorithm. 47 | The default is 150.} 48 | \item{eps}{Tolerance parameter used to determine convergence of the 49 | EM algorithm. Specifically, iterations continue until the Euclidean 50 | distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps}, 51 | where \eqn{\beta} is the vector of item discrimination parameters. 52 | \code{eps}=1e-4 by default.} 53 | \item{max_iter2}{The maximum number of iterations of the conditional 54 | maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}. 55 | The default is 15.} 56 | \item{eps2}{Tolerance parameter used to determine convergence of the 57 | conditional maximization procedures for updating \eqn{\gamma} and 58 | \eqn{\lambda}. Specifically, iterations continue until the Euclidean 59 | distance between two consecutive log likelihoods falls under \code{eps2}. 60 | \code{eps2}=1e-3 by default.} 61 | \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.} 62 | \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.} 63 | }} 64 | } 65 | \value{ 66 | An object of class \code{hltm}. 67 | \item{coefficients}{A data frame of parameter estimates, standard errors, 68 | z values and p values.} 69 | \item{scores}{A data frame of EAP estimates of latent preferences and 70 | their approximate standard errors.} 71 | \item{vcov}{Variance-covariance matrix of parameter estimates.} 72 | \item{log_Lik}{The log-likelihood value at convergence.} 73 | \item{N}{Number of units.} 74 | \item{J}{Number of items.} 75 | \item{H}{A vector denoting the number of response categories for each item.} 76 | \item{ylevels}{A list showing the levels of the factorized response categories.} 77 | \item{p}{The number of predictors for the mean equation.} 78 | \item{q}{The number of predictors for the variance equation.} 79 | \item{control}{List of control values.} 80 | \item{call}{The matched call.} 81 | } 82 | \description{ 83 | \code{hltm} fits a hierarchical latent trait model in which both 84 | the mean and the variance of the latent preference (ability parameter) 85 | may depend on person-specific covariates (\code{x} and \code{z}). 86 | Specifically, the mean is specified as a linear combination of \code{x} 87 | and the log of the variance is specified as a linear combination of 88 | \code{z}. 89 | } 90 | \examples{ 91 | y <- nes_econ2008[, -(1:3)] 92 | x <- model.matrix( ~ party * educ, nes_econ2008) 93 | z <- model.matrix( ~ party, nes_econ2008) 94 | 95 | dichotomize <- function(x) findInterval(x, c(mean(x, na.rm = TRUE))) 96 | y[] <- lapply(y, dichotomize) 97 | nes_m1 <- hltm(y, x, z) 98 | nes_m1 99 | } 100 | \references{ 101 | Zhou, Xiang. 2019. "\href{https://doi.org/10.1017/pan.2018.63}{Hierarchical Item Response Models for Analyzing Public Opinion.}" Political Analysis. 102 | } 103 | -------------------------------------------------------------------------------- /man/hltm2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hltm2.R 3 | \name{hltm2} 4 | \alias{hltm2} 5 | \title{Hierarchical Latent Trait Models with Known Item Parameters.} 6 | \usage{ 7 | hltm2(y, x = NULL, z = NULL, item_coefs, control = list()) 8 | } 9 | \arguments{ 10 | \item{y}{A data frame or matrix of item responses.} 11 | 12 | \item{x}{An optional model matrix, including the intercept term, that predicts the 13 | mean of the latent preference. If not supplied, only the intercept term is included.} 14 | 15 | \item{z}{An optional model matrix, including the intercept term, that predicts the 16 | variance of the latent preference. If not supplied, only the intercept term is included.} 17 | 18 | \item{item_coefs}{A list of known item parameters. The parameters of item \eqn{j} are given 19 | by the \eqn{j}th element, which should be a vector of length 2, containing 20 | the item difficulty parameter and item discrimination parameter.} 21 | 22 | \item{control}{A list of control values 23 | \describe{ 24 | \item{max_iter}{The maximum number of iterations of the EM algorithm. 25 | The default is 150.} 26 | \item{eps}{Tolerance parameter used to determine convergence of the 27 | EM algorithm. Specifically, iterations continue until the Euclidean 28 | distance between \eqn{\beta_{n}} and \eqn{\beta_{n-1}} falls under \code{eps}, 29 | where \eqn{\beta} is the vector of item discrimination parameters. 30 | \code{eps}=1e-4 by default.} 31 | \item{max_iter2}{The maximum number of iterations of the conditional 32 | maximization procedures for updating \eqn{\gamma} and \eqn{\lambda}. 33 | The default is 15.} 34 | \item{eps2}{Tolerance parameter used to determine convergence of the 35 | conditional maximization procedures for updating \eqn{\gamma} and 36 | \eqn{\lambda}. Specifically, iterations continue until the Euclidean 37 | distance between two consecutive log likelihoods falls under \code{eps2}. 38 | \code{eps2}=1e-3 by default.} 39 | \item{K}{Number of Gauss-Legendre quadrature points for the E-step. The default is 21.} 40 | \item{C}{[-C, C] sets the range of integral in the E-step. \code{C}=3 by default.} 41 | }} 42 | } 43 | \value{ 44 | An object of class \code{hltm}. 45 | \item{coefficients}{A data frame of parameter estimates, standard errors, 46 | z values and p values.} 47 | \item{scores}{A data frame of EAP estimates of latent preferences and 48 | their approximate standard errors.} 49 | \item{vcov}{Variance-covariance matrix of parameter estimates.} 50 | \item{log_Lik}{The log-likelihood value at convergence.} 51 | \item{N}{Number of units.} 52 | \item{J}{Number of items.} 53 | \item{H}{A vector denoting the number of response categories for each item.} 54 | \item{ylevels}{A list showing the levels of the factorized response categories.} 55 | \item{p}{The number of predictors for the mean equation.} 56 | \item{q}{The number of predictors for the variance equation.} 57 | \item{control}{List of control values.} 58 | \item{call}{The matched call.} 59 | } 60 | \description{ 61 | \code{hltm2} fits a hierarchical latent trait model where the item parameters 62 | are known and supplied by the user. 63 | } 64 | \examples{ 65 | y <- nes_econ2008[, -(1:3)] 66 | x <- model.matrix( ~ party * educ, nes_econ2008) 67 | z <- model.matrix( ~ party, nes_econ2008) 68 | dichotomize <- function(x) findInterval(x, c(mean(x, na.rm = TRUE))) 69 | y_bin <- y 70 | y_bin[] <- lapply(y, dichotomize) 71 | 72 | n <- nrow(nes_econ2008) 73 | id_train <- sample.int(n, n/4) 74 | id_test <- setdiff(1:n, id_train) 75 | 76 | y_bin_train <- y_bin[id_train, ] 77 | x_train <- x[id_train, ] 78 | z_train <- z[id_train, ] 79 | 80 | mod_train <- hltm(y_bin_train, x_train, z_train) 81 | 82 | y_bin_test <- y_bin[id_test, ] 83 | x_test <- x[id_test, ] 84 | z_test <- z[id_test, ] 85 | 86 | item_coefs <- lapply(coef_item(mod_train), `[[`, "Estimate") 87 | 88 | model_test <- hltm2(y_bin_test, x_test, z_test, item_coefs = item_coefs) 89 | } 90 | -------------------------------------------------------------------------------- /man/latent_scores.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/latent_scores.R 3 | \name{latent_scores} 4 | \alias{latent_scores} 5 | \title{Estimates of Latent Preferences/Abilities} 6 | \usage{ 7 | latent_scores(x, digits = 3) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{hIRT}} 11 | 12 | \item{digits}{The number of significant digits to use when printing} 13 | } 14 | \value{ 15 | A data frame of EAP estimates of latent preferences and their approximate standard errors. 16 | } 17 | \description{ 18 | EAP estimates of latent preferences for either \code{hltm} or \code{hgrm} models. 19 | } 20 | \examples{ 21 | y <- nes_econ2008[, -(1:3)] 22 | x <- model.matrix( ~ party * educ, nes_econ2008) 23 | z <- model.matrix( ~ party, nes_econ2008) 24 | nes_m1 <- hgrm(y, x, z) 25 | pref <- latent_scores(nes_m1) 26 | require(ggplot2) 27 | ggplot(data = nes_econ2008) + 28 | geom_density(aes(x = pref$post_mean, col = party)) 29 | } 30 | -------------------------------------------------------------------------------- /man/nes_econ2008.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{nes_econ2008} 5 | \alias{nes_econ2008} 6 | \title{Public Attitudes on Economic Issues in ANES 2008} 7 | \format{ 8 | A data frame with 2268 rows and 13 variables: \describe{ 9 | \item{gender}{gender. 1: male; 2: female} 10 | \item{party}{party identification: Democrat, independent, or Republican} 11 | \item{educ}{education. 1: high school or less; 2: some college or above} 12 | \item{health_ins7}{Support for government or private health insurance, 7 categories} 13 | \item{jobs_guar7}{Support for government guarantee jobs and income, 7 categories} 14 | \item{gov_services7}{Should government reduce or increase spending on services?, 7 categories} 15 | \item{FS_poor3}{Federal spending on the poor, 3 categories} 16 | \item{FS_childcare3}{Federal spending on child care, 3 categories} 17 | \item{FS_crime3}{Federal spending on crime, 3 categories} 18 | \item{FS_publicschools3}{Federal spending on public schools, 3 categories} 19 | \item{FS_welfare3}{Federal spending on welfare, 3 categories} 20 | \item{FS_envir3}{Federal spending on environment, 3 categories} 21 | \item{FS_socsec3}{Federal spending on Social Security, 3 categories} 22 | } 23 | } 24 | \usage{ 25 | nes_econ2008 26 | } 27 | \description{ 28 | A dataset containing gender, party ID, education, and responses to 10 survey items 29 | on economic issues from the American National Election Studies, 2008. 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /man/print.hIRT.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print.R 3 | \name{print.hIRT} 4 | \alias{print.hIRT} 5 | \title{Printing an object of class \code{hIRT}} 6 | \usage{ 7 | \method{print}{hIRT}(x, digits = 3, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{hIRT}} 11 | 12 | \item{digits}{The number of significant digits to use when printing} 13 | 14 | \item{...}{further arguments passed to \code{\link{print}}.} 15 | } 16 | \description{ 17 | Printing an object of class \code{hIRT} 18 | } 19 | -------------------------------------------------------------------------------- /man/summary.hIRT.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.R 3 | \name{summary.hIRT} 4 | \alias{summary.hIRT} 5 | \alias{print.summary_hIRT} 6 | \title{Summarizing Hierarchical Item Response Theory Models} 7 | \usage{ 8 | \method{summary}{hIRT}(object, by_item = FALSE, digits = 3, ...) 9 | 10 | \method{print}{summary_hIRT}(x, digits = 3, ...) 11 | } 12 | \arguments{ 13 | \item{object}{An object of class \code{hIRT}.} 14 | 15 | \item{by_item}{Logical. Should item parameters be stored item by item 16 | (if \code{TRUE}) or put together in a data frame (if \code{FALSE})?} 17 | 18 | \item{digits}{the number of significant digits to use when printing.} 19 | 20 | \item{...}{further arguments passed to \code{\link{print}}.} 21 | 22 | \item{x}{An object of class \code{hIRT}} 23 | } 24 | \value{ 25 | An object of class \code{summary_hIRT}. 26 | \item{call}{The matched call.} 27 | \item{model}{Model fit statistics: Log likelihood, AIC, and BIC.} 28 | \item{item_coefs}{Item parameter estimates, standard errors, 29 | z values, and p values.} 30 | \item{mean_coefs}{Parameter estimates for the mean equation.} 31 | \item{var_coefs}{Parameter estimates for the variance equation.} 32 | } 33 | \description{ 34 | Summarizing the fit of either \code{hltm} or \code{hgrm}. 35 | } 36 | \examples{ 37 | y <- nes_econ2008[, -(1:3)] 38 | x <- model.matrix( ~ party * educ, nes_econ2008) 39 | z <- model.matrix( ~ party, nes_econ2008) 40 | nes_m1 <- hgrm(y, x, z) 41 | summary(nes_m1, by_item = TRUE) 42 | } 43 | --------------------------------------------------------------------------------