├── data └── lalonde.rda ├── README.md ├── R ├── invlogit.R ├── fround.R ├── readColumns.R ├── load.first.R ├── coef.R ├── rescale.R ├── extractDIC.R ├── corrplot.R ├── residual.plot.R ├── fitted.R ├── AllGeneric.R ├── sigma.hat.R ├── go.R ├── contrasts.bayes.R ├── traceplot.R ├── AllClass.R ├── binnedplot.R ├── discrete.histogram.R ├── multicomp.plot.R ├── standardize.R ├── se.coef.R ├── matching.R ├── simmer.R ├── triangleplot.R ├── mcsamp.R ├── sim.R ├── model.matrixBayes.R ├── AllInternal.R ├── balance.R ├── bayespolr.R └── display.R ├── man ├── readColumns.Rd ├── fround.Rd ├── extractDIC.mer.Rd ├── go.Rd ├── invlogit.Rd ├── traceplot.Rd ├── rescale.Rd ├── discrete.histogram.Rd ├── matching.Rd ├── residual.plot.Rd ├── contrasts.bayes.Rd ├── lalonde.Rd ├── sigma.hat.Rd ├── triangleplot.Rd ├── se.coef.Rd ├── model.matrixBayes.Rd ├── standardize.Rd ├── multicomp.plot.Rd ├── corrplot.Rd ├── binnedplot.Rd ├── balance.Rd ├── mcsamp.Rd ├── sim.Rd ├── display.Rd ├── coefplot.Rd ├── bayespolr.Rd └── bayesglm.Rd ├── .gitignore ├── DESCRIPTION ├── MD5 └── NAMESPACE /data/lalonde.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suyusung/arm/HEAD/data/lalonde.rda -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # arm 2 | ARM: Data Analysis Using Regression and Multilevel/Hierarchical Models 3 | -------------------------------------------------------------------------------- /R/invlogit.R: -------------------------------------------------------------------------------- 1 | #R function for the logistic function 2 | logit <- function (x) { 3 | log(x/(1-x)) 4 | } 5 | 6 | invlogit <- function (x) { 7 | 1/(1+exp(-x)) 8 | } 9 | -------------------------------------------------------------------------------- /R/fround.R: -------------------------------------------------------------------------------- 1 | fround <- function (x, digits) { 2 | format (round (x, digits), nsmall=digits) 3 | } 4 | 5 | pfround <- function (x, digits) { 6 | print (fround (x, digits), quote=FALSE) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /R/readColumns.R: -------------------------------------------------------------------------------- 1 | read.columns <- function (filename, columns){ 2 | start <- min(columns) 3 | length <- max(columns) - start + 1 4 | if (start == 1) { 5 | return(read.fwf(filename, widths = length)) 6 | } 7 | else { 8 | return(read.fwf(filename, widths = c(start - 1, length))[, 2]) 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /R/load.first.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(...) { 2 | mylib <- dirname(system.file(package = "arm")) 3 | ver <- packageDescription("arm", lib.loc = mylib)$Version 4 | builddate <- packageDescription("arm", lib.loc = mylib)$Date 5 | packageStartupMessage(paste("\narm (Version ", ver, ", built: ", builddate, ")\n", sep = "")) 6 | packageStartupMessage("Working directory is ", getwd(), "\n") 7 | } 8 | -------------------------------------------------------------------------------- /man/readColumns.Rd: -------------------------------------------------------------------------------- 1 | \name{readColumns} 2 | % functions 3 | \alias{read.columns} 4 | 5 | \title{Function to read data by columns} 6 | 7 | \description{ 8 | A function read data by columns 9 | } 10 | \usage{ 11 | read.columns(filename, columns) 12 | } 13 | \arguments{ 14 | \item{filename}{user specified file name including path of the file} 15 | \item{columns}{which columns of the data to be read} 16 | } 17 | 18 | 19 | 20 | \author{Andrew Gelman \email{gelman@stat.columbia.edu} 21 | } 22 | 23 | 24 | 25 | \keyword{methods} 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | .svn/entries 35 | *.svn-base 36 | .svn/wc.db 37 | *.db-journal 38 | .svn/format 39 | -------------------------------------------------------------------------------- /R/coef.R: -------------------------------------------------------------------------------- 1 | 2 | coef.sim <- function(object,...){ 3 | ans <- object@coef 4 | return(ans) 5 | } 6 | 7 | 8 | coef.sim.polr <- function(object, slot=c("ALL", "coef", "zeta"),...){ 9 | slot <- match.arg(slot) 10 | if(slot=="coef"){ 11 | ans <- object@coef 12 | } else if(slot=="zeta"){ 13 | ans <- object@zeta 14 | } else { 15 | ans <- cbind(object@zeta, object@coef) 16 | } 17 | return(ans) 18 | } 19 | 20 | coef.sim.merMod <- function(object,...){ 21 | fef <- object@fixef 22 | ref <- object@ranef 23 | ans <- list("fixef" = fef, "ranef" = ref) 24 | return(ans) 25 | } 26 | 27 | 28 | fixef.sim.merMod <- function(object,...){ 29 | ans <- object@fixef 30 | return(ans) 31 | } 32 | 33 | ranef.sim.merMod <- function(object,...){ 34 | ans <- object@ranef 35 | return(ans) 36 | } 37 | -------------------------------------------------------------------------------- /R/rescale.R: -------------------------------------------------------------------------------- 1 | # Function for standardizing regression predictors by dividing by 2 sd' 2 | rescale <- function (x, binary.inputs="center"){ 3 | # function to rescale by subtracting the mean and dividing by 2 sd's 4 | if (!is.numeric(x)){ 5 | x <- as.numeric(factor(x)) 6 | x.obs <- x[!is.na(x)] 7 | } 8 | x.obs <- x[!is.na(x)] 9 | # for binary cases 10 | if (length(unique(x.obs))==2){ 11 | if (binary.inputs=="0/1"){ 12 | x <- (x-min(x.obs))/(max(x.obs)-min(x.obs)) 13 | return (x) 14 | } 15 | else if (binary.inputs=="-0.5,0.5"){ 16 | return (x-0.5) 17 | } 18 | else if (binary.inputs=="center"){ 19 | return (x-mean(x.obs)) 20 | } 21 | else if (binary.inputs=="full"){ 22 | return ((x-mean(x.obs))/(2*sd(x.obs))) 23 | } 24 | } 25 | else { 26 | return ((x-mean(x.obs))/(2*sd(x.obs))) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /man/fround.Rd: -------------------------------------------------------------------------------- 1 | \name{fround} 2 | \alias{fround} 3 | \alias{pfround} 4 | 5 | \title{Formating the Rounding of Numbers} 6 | 7 | \description{ 8 | \code{fround} rounds the values in its first argument to the specified 9 | number of decimal places with surrounding quotes. 10 | 11 | \code{pfround} rounds the values in its first argument to the specified 12 | number of decimal places without surrounding quotes. 13 | 14 | } 15 | 16 | \usage{ 17 | fround(x, digits) 18 | pfround(x, digits) 19 | } 20 | 21 | \arguments{ 22 | \item{x}{a numeric vector.} 23 | \item{digits}{integer indicating the precision to be used.} 24 | } 25 | 26 | 27 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 28 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 29 | } 30 | 31 | \seealso{ 32 | \code{\link{round}} 33 | } 34 | 35 | \examples{ 36 | x <- rnorm(1) 37 | fround(x, digits=2) 38 | pfround(x, digits=2) 39 | } 40 | 41 | \keyword{manip} 42 | \keyword{print} 43 | -------------------------------------------------------------------------------- /man/extractDIC.mer.Rd: -------------------------------------------------------------------------------- 1 | \name{extractDIC} 2 | %\docType{genericFunction} 3 | \alias{extractDIC} 4 | \alias{extractDIC.merMod} 5 | \alias{extractAIC.merMod} 6 | 7 | \title{ 8 | Extract AIC and DIC from a \sQuote{mer} model 9 | } 10 | 11 | \description{ 12 | Computes the (generalized) Akaike *A*n *I*nformation *C*riterion 13 | and *D*eviance *I*nformation *C*riterion for a mer model. 14 | } 15 | \usage{ 16 | extractDIC(fit,\dots) 17 | \method{extractDIC}{merMod}(fit,\dots) 18 | 19 | %\method{extractAIC}{merMod}(fit,\dots) 20 | } 21 | 22 | \arguments{ 23 | \item{fit}{fitted \code{merMod} mode, usually the result of a fiiter like \code{merMod}.} 24 | \item{\dots}{further arguments (currently unused).} 25 | } 26 | 27 | 28 | \author{ 29 | Andrew Gelman \email{gelman@stat.columbia.edu}; 30 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 31 | } 32 | 33 | 34 | \examples{ 35 | fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy) 36 | extractAIC(fm1) 37 | extractDIC(fm1) 38 | } 39 | 40 | \keyword{manip} 41 | \keyword{methods} 42 | -------------------------------------------------------------------------------- /R/extractDIC.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | extractDIC <- function(fit,...){ 4 | UseMethod("extractDIC") 5 | } 6 | 7 | 8 | extractDIC.merMod <- function(fit,...){ 9 | #REML <- fit@dims["REML"] 10 | # llik <- logLik(fit, REML) 11 | # dev <- fit@deviance["ML"] 12 | # n <- fit@dims["n"] 13 | # Dhat <- -2 * (llik) 14 | # pD <- dev - Dhat 15 | # DIC <- dev + pD[[1]] 16 | # names(DIC) <- "DIC" 17 | # return(DIC) 18 | is_REML <- isREML(fit) 19 | llik <- logLik(fit, REML=is_REML) 20 | dev <- deviance(refitML(fit)) 21 | n <- getME(fit, "devcomp")$dims["n"] 22 | Dhat <- -2 * (llik) 23 | pD <- dev - Dhat 24 | DIC <- dev + pD[[1]] 25 | names(DIC) <- "DIC" 26 | return(DIC) 27 | } 28 | 29 | 30 | 31 | 32 | # 33 | #extractAIC.mer <- function(fit,...){ 34 | ## REML <- fit@dims["REML"] 35 | ## llik <- logLik(fit, REML) 36 | ## AIC <- AIC(llik) 37 | ## names(AIC) <- "AIC" 38 | ## return(AIC) 39 | # L <- logLik(refitML(fit)) 40 | # edf <- attr(L,"df") 41 | # out <- c(edf,-2*L + k*edf) 42 | # return(out) 43 | #} 44 | -------------------------------------------------------------------------------- /man/go.Rd: -------------------------------------------------------------------------------- 1 | \name{GO} 2 | \docType{class} 3 | % Classes 4 | \alias{GO-class} 5 | % functions 6 | \alias{go} 7 | \alias{G} 8 | 9 | \title{Function to Recall Last Source File} 10 | 11 | \description{ 12 | A function that like \code{source()} but recalls the last source file names by default. 13 | } 14 | \usage{ 15 | go(\dots, add=FALSE, timer=FALSE) 16 | } 17 | \arguments{ 18 | \item{\dots}{list of filenames as character strings.} 19 | \item{add}{add these names to the current list; if replace, then \code{FALSE}.} 20 | \item{timer}{time the execution time of \code{go()}.} 21 | } 22 | 23 | 24 | 25 | \author{ 26 | Jouni Kerman \email{jouni@kerman.com} \email{kerman@stat.columbia.edu} 27 | } 28 | 29 | 30 | \examples{ 31 | go('myprog') # will run source('myprog.r') 32 | go() # will run source('myprog.r') again 33 | go('somelib',add=TRUE) # will run source('myprog.r') and source('somelib.r') 34 | go('myprog','somelib') # same as above 35 | go('mytest') # will run source('mytest') only 36 | go() # runs source('mytest') again 37 | G # short cut to call go() 38 | } 39 | 40 | \keyword{methods} 41 | \keyword{manip} 42 | -------------------------------------------------------------------------------- /R/corrplot.R: -------------------------------------------------------------------------------- 1 | 2 | corrplot <- function(data, varnames=NULL, cutpts=NULL, abs=TRUE, details=TRUE, 3 | n.col.legend=5, cex.col=0.7, cex.var=0.9, digits=1, 4 | color=FALSE) 5 | { 6 | 7 | # some check! 8 | if (is.matrix(data)|is.data.frame(data)){ 9 | } 10 | else { 11 | stop ("Data must be a matrix or a data frame!") 12 | } 13 | if (sum(sapply(data, FUN=is.character))>0) 14 | stop ("Data contains non-numeric variables!") 15 | if (n.col.legend > 8) 16 | stop ("Suggestion: More than 8 levels of colors is difficult to read!") 17 | 18 | 19 | 20 | # prepare correlation matrix 21 | if (abs){ 22 | z.plot <- abs(cor(data, data, use="pairwise.complete.obs")) 23 | } 24 | else{ 25 | z.plot <- cor(data, data, use="pairwise.complete.obs") 26 | } 27 | 28 | if (is.null(varnames)){ 29 | z.names <- dimnames(data)[[2]] 30 | } 31 | else{ 32 | z.names <- varnames 33 | } 34 | 35 | triangleplot(x=z.plot, y=z.names, cutpts=cutpts, details=details, 36 | n.col.legend=n.col.legend, 37 | cex.col=cex.col, cex.var=cex.var, 38 | digits=digits, color=color) 39 | } 40 | -------------------------------------------------------------------------------- /R/residual.plot.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # residual plot for the observed values 3 | # ============================================================================== 4 | residual.plot <- function ( Expected, Residuals, sigma, 5 | main = deparse(substitute( Expected )), 6 | col.pts = "blue", col.ctr = "red", 7 | col.sgm = "black", cex = 0.5, gray.scale = FALSE, 8 | xlab="Predicted", ylab="Residuals", ... ) { 9 | if( gray.scale == TRUE ) { 10 | col.pts <- "black"; 11 | col.ctr <- "black"; 12 | col.sgm <- "gray60"; 13 | } 14 | plot( Expected[!is.na( Residuals )], Residuals[ !is.na( Residuals ) ], 15 | xlab = xlab, ylab = ylab, main = main, col = col.pts, 16 | pch = 19, cex = cex, ... ); 17 | #mtext( "Residuals vs Predicted", 3, cex= 0.6 ) #, adj=1 ); 18 | # add the zero line for clarity 19 | abline ( h = 0, lty = "dashed", col = col.ctr ); 20 | # residual s.e. 21 | resid.se <- sigma; 22 | # Add two-standard-error lines 23 | abline ( h = 2*resid.se, lty = "dashed", col = col.sgm ); 24 | abline ( h = -2*resid.se, lty = "dashed", col = col.sgm ); 25 | } 26 | -------------------------------------------------------------------------------- /man/invlogit.Rd: -------------------------------------------------------------------------------- 1 | \name{invlogit} 2 | \alias{invlogit} 3 | \alias{logit} 4 | 5 | \title{Logistic and Inverse logistic functions} 6 | 7 | \description{ 8 | Inverse-logit function, transforms continuous values to the range (0, 1) 9 | } 10 | \usage{ 11 | logit(x) 12 | invlogit(x) 13 | } 14 | 15 | \arguments{ 16 | \item{x}{A vector of continuous values} 17 | } 18 | 19 | \details{ 20 | The Inverse-logit function defined as: 21 | \eqn{logit^-1(x) = e^x/(1+e^x)} transforms continuous values to the range (0, 1), 22 | which is necessary, since probabilities must be between 0 and 1 and maps 23 | from the linear predictor to the probabilities 24 | } 25 | \value{ 26 | A vector of estimated probabilities 27 | } 28 | \references{Andrew Gelman and Jennifer Hill. (2006). 29 | \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models}. 30 | Cambridge University Press.} 31 | 32 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}, 33 | M.Grazia Pittau \email{grazia@stat.columbia.edu} 34 | } 35 | 36 | 37 | \examples{ 38 | data(frisk) 39 | n <- 100 40 | x1 <- rnorm (n) 41 | x2 <- rbinom (n, 1, .5) 42 | b0 <- 1 43 | b1 <- 1.5 44 | b2 <- 2 45 | Inv.logit <- invlogit(b0+b1*x1+b2*x2) 46 | plot(b0+b1*x1+b2*x2, Inv.logit) 47 | } 48 | 49 | \keyword{models} 50 | 51 | -------------------------------------------------------------------------------- /man/traceplot.Rd: -------------------------------------------------------------------------------- 1 | \name{traceplot} 2 | %\docType{genericFunction} 3 | \alias{traceplot} 4 | \alias{traceplot.default} 5 | \alias{traceplot,mcmc.list-method} 6 | \alias{traceplot,bugs-method} 7 | 8 | 9 | \title{Trace plot of \sQuote{bugs} object} 10 | 11 | \usage{ 12 | \S4method{traceplot}{bugs}( x, mfrow = c( 1, 1 ), varname = NULL, 13 | match.head = TRUE, ask = TRUE, 14 | col = rainbow( x$n.chains ), 15 | lty = 1, lwd = 1, \dots) 16 | } 17 | 18 | \arguments{ 19 | \item{x}{A bugs object} 20 | \item{mfrow}{graphical parameter (see \code{par})} 21 | \item{varname}{vector of variable names to plot} 22 | \item{match.head}{ matches the variable names by the beginning of the variable names in bugs object} 23 | \item{ask}{logical; if \code{TRUE}, the user is \emph{ask}ed before each plot, see 24 | \code{par(ask=.)}.} 25 | \item{col}{graphical parameter (see \code{par})} 26 | \item{lty}{graphical parameter (see \code{par})} 27 | \item{lwd}{graphical parameter (see \code{par})} 28 | \item{\dots}{further graphical parameters} 29 | } 30 | 31 | 32 | \description{ 33 | Displays a plot of iterations \emph{vs.} sampled values for each variable 34 | in the chain, with a separate plot per variable. 35 | } 36 | 37 | \author{ 38 | Masanao Yajima \email{yajima@stat.columbia.edu}. 39 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 40 | } 41 | 42 | \seealso{ 43 | \code{\link[coda]{densplot}}, \code{\link[coda]{plot.mcmc}}, 44 | \code{\link[coda]{traceplot}} 45 | } 46 | 47 | \keyword{hplot} 48 | -------------------------------------------------------------------------------- /man/rescale.Rd: -------------------------------------------------------------------------------- 1 | \name{rescale} 2 | \alias{rescale} 3 | 4 | \title{Function for Standardizing by Centering and Dividing by 2 sd's} 5 | \description{ 6 | This function standardizes a variable by centering and dividing by 2 sd's with 7 | exceptions for binary variables. 8 | } 9 | 10 | \usage{ 11 | rescale(x, binary.inputs="center") 12 | } 13 | 14 | \arguments{ 15 | \item{x}{a vector} 16 | \item{binary.inputs}{options for standardizing binary variables, default is \code{center}; \code{0/1} keeps original scale; 17 | \code{-0.5,0.5} rescales 0 as -0.5 and 1 as 0.5; \code{center} substracts the mean; and 18 | \code{full} substracts the mean and divids by 2 sd.} 19 | } 20 | 21 | \value{ 22 | the standardized vector 23 | } 24 | 25 | \references{Andrew Gelman. (2008). 26 | \dQuote{Scaling regression inputs by dividing by two standard deviations}. 27 | \emph{Statistics in Medicine} 27: 2865--2873. 28 | \url{http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf} 29 | } 30 | 31 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 32 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 33 | } 34 | 35 | \seealso{\code{\link{standardize}} 36 | } 37 | 38 | \examples{ 39 | # Set up the fake data 40 | n <- 100 41 | x <- rnorm (n, 2, 1) 42 | x1 <- rnorm (n) 43 | x1 <- (x1-mean(x1))/(2*sd(x1)) # standardization 44 | x2 <- rbinom (n, 1, .5) 45 | b0 <- 1 46 | b1 <- 1.5 47 | b2 <- 2 48 | y <- rbinom (n, 1, invlogit(b0+b1*x1+b2*x2)) 49 | rescale(x, "full") 50 | rescale(y, "center") 51 | } 52 | \keyword{manip} 53 | -------------------------------------------------------------------------------- /R/fitted.R: -------------------------------------------------------------------------------- 1 | 2 | # the plan here is to shuffle the ranefs back into the way a merMod object 3 | # stores them so that a simple X * beta + Z * theta op does the trick 4 | 5 | 6 | fitted.sim.merMod <- function(object, regression,...){ 7 | if (missing(regression) || is.null(regression)) stop("fitted for sim.mer requires original merPred object as well."); 8 | if (!inherits(regression, "merMod")) stop("regression argument for fitted on sim.mer does not inherit from class 'merMod'"); 9 | sims <- object; 10 | numSimulations <- dim(sims@fixef)[1]; 11 | devcomp <- getME(regression, "devcomp"); 12 | dims <- devcomp$dims; 13 | 14 | numRanef <- dims[["q"]]; 15 | numLevels <- dims[["reTrms"]]; 16 | 17 | simulatedRanef <- matrix(0, numRanef, numSimulations); 18 | 19 | index <- 0; 20 | for (i in 1:length(sims@ranef)) { 21 | levelSims <- sims@ranef[[i]]; 22 | numCoefficientsPerLevel <- dim(levelSims)[2]; 23 | numGroupsPerLevel <- dim(levelSims)[3]; 24 | for (j in 1:numCoefficientsPerLevel) { 25 | ranefRange <- index + 1:numGroupsPerLevel; 26 | index <- index + numGroupsPerLevel; 27 | 28 | simulatedRanef[ranefRange,] <- t(levelSims[,j,]); 29 | } 30 | } 31 | 32 | X <- getME(regression, "X"); 33 | Zt <- getME(regression, "Zt"); 34 | 35 | linearPredictor <- as.matrix(tcrossprod(X, sims@fixef) + crossprod(Zt, simulatedRanef)) + 36 | matrix(getME(regression, "offset"), dims[["n"]], numSimulations); 37 | 38 | if (dims[["GLMM"]] == 0L){ 39 | return(linearPredictor) 40 | }else{ 41 | return(regression@resp$family$linkinv(linearPredictor)) 42 | } 43 | }; 44 | -------------------------------------------------------------------------------- /man/discrete.histogram.Rd: -------------------------------------------------------------------------------- 1 | \name{discrete.histogram} 2 | \alias{discrete.histogram} 3 | \alias{discrete.hist} 4 | 5 | \title{Histogram for Discrete Distributions} 6 | 7 | \description{Creates a prettier histogram for discrete distributions} 8 | 9 | \usage{ 10 | discrete.histogram (x, prob, prob2=NULL, prob3=NULL, 11 | xlab="x", xaxs.label=NULL, yaxs.label=NULL, bar.width=NULL, 12 | freq=FALSE, prob.col="blue", prob2.col="red", prob3.col="gray", ...) 13 | } 14 | 15 | \arguments{ 16 | \item{x}{The vector of x's} 17 | \item{prob}{The probabilities for the x's} 18 | \item{prob2}{A second vector of probabilities of the x's} 19 | \item{prob3}{A third vector of probabilities of the x's} 20 | \item{xlab}{Label for the x axis} 21 | \item{xaxs.label}{Label for the x's} 22 | \item{yaxs.label}{Label for the y axis} 23 | \item{bar.width}{Width of the bars} 24 | \item{freq}{If TRUE, shows a frequency histogram as opposed to probability.} 25 | \item{prob.col}{The color of the first set of histogram bars.} 26 | \item{prob2.col}{The color of the second set of histogram bars.} 27 | \item{prob3.col}{The color of the third set of histogram bars.} 28 | \item{...}{Additional arguments passed to function \code{plot}} 29 | } 30 | 31 | \details{This function displays a histogram for discrete 32 | probability distributions. 33 | } 34 | 35 | 36 | \examples{ 37 | a <- c(3,4,0,0,5,1,1,1,1,0) 38 | discrete.histogram (a) 39 | 40 | x <- c(0,1,3,4,5) 41 | p <- c(.3,.4,.1,.1,.1) 42 | discrete.histogram (x,p) 43 | 44 | x <- c(0,1,3,4,5) 45 | y <- c(3,4,1,1,1) 46 | discrete.histogram (x,y) 47 | } 48 | 49 | 50 | \keyword{dplot} 51 | -------------------------------------------------------------------------------- /R/AllGeneric.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #setGeneric("coef") 4 | #setGeneric("print") 5 | #setGeneric("fitted") 6 | 7 | #setGeneric("extractAIC") 8 | 9 | if (!isGeneric("coefplot")) { 10 | setGeneric("coefplot", 11 | function(object, ...) 12 | standardGeneric("coefplot")) 13 | } 14 | 15 | 16 | if (!isGeneric("display")) { 17 | setGeneric("display", 18 | function(object, ...) 19 | standardGeneric("display")) 20 | } 21 | 22 | 23 | if (!isGeneric("sim")) { 24 | setGeneric("sim", 25 | function(object, ...) 26 | standardGeneric("sim")) 27 | } 28 | 29 | sigma.hat <- function(object,...){ 30 | UseMethod("sigma.hat") 31 | } 32 | 33 | 34 | if (!isGeneric("se.coef")) { 35 | setGeneric("se.coef", 36 | function(object, ...) 37 | standardGeneric("se.coef")) 38 | } 39 | 40 | 41 | if (!isGeneric("mcsamp")) { 42 | setGeneric("mcsamp", 43 | function(object, ...) 44 | standardGeneric("mcsamp")) 45 | } 46 | 47 | 48 | 49 | if (!isGeneric("standardize")) { 50 | setGeneric("standardize", 51 | function(object, ...) 52 | standardGeneric("standardize")) 53 | } 54 | 55 | 56 | 57 | #if (!isGeneric("terms.bayes")) { 58 | # setGeneric("terms.bayes", 59 | # function(x, ...) 60 | # standardGeneric("terms.bayes")) 61 | #} 62 | 63 | 64 | 65 | if (!isGeneric("traceplot")) { 66 | setGeneric("traceplot", 67 | function(x, ...) 68 | standardGeneric("traceplot"), 69 | useAsDefault = function(x, ...) coda::traceplot(x, ...)) 70 | } 71 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: arm 2 | Version: 1.14-4 3 | Date: 2024-4-1 4 | Title: Data Analysis Using Regression and Multilevel/Hierarchical 5 | Models 6 | Authors@R: c(person("Andrew", "Gelman", role = "aut", email = "gelman@stat.columbia.edu"), 7 | person("Yu-Sung", "Su", role = c("aut", "cre"), 8 | email = "suyusung@tsinghua.edu.cn", 9 | comment = c(ORCID = "0000-0001-5021-8209")), 10 | person("Masanao", "Yajima", role = "ctb", email = "yajima@bu.edu"), 11 | person("Jennifer", "Hill", role = "ctb", email = "jennifer.hill@nyu.edu"), 12 | person("Maria Grazia", "Pittau", role = "ctb", email = "grazia@stat.columbia.edu"), 13 | person("Jouni", "Kerman", role = "ctb", email = "jouni@kerman.com"), 14 | person("Tian", "Zheng", role = "ctb", email = "tzheng@stat.columbia.edu"), 15 | person("Vincent", "Dorie", role = "ctb", email = "vjd4@nyu.edu") 16 | ) 17 | Author: Andrew Gelman [aut], 18 | Yu-Sung Su [aut, cre] (), 19 | Masanao Yajima [ctb], 20 | Jennifer Hill [ctb], 21 | Maria Grazia Pittau [ctb], 22 | Jouni Kerman [ctb], 23 | Tian Zheng [ctb], 24 | Vincent Dorie [ctb] 25 | Maintainer: Yu-Sung Su 26 | BugReports: https://github.com/suyusung/arm/issues/ 27 | Depends: R (>= 3.1.0), MASS, Matrix (>= 1.6-1.1), stats, lme4 (>= 1.0) 28 | Imports: abind, coda, graphics, grDevices, methods, nlme, utils 29 | Description: Functions to accompany A. Gelman and J. Hill, Data Analysis Using Regression and Multilevel/Hierarchical Models, Cambridge University Press, 2007. 30 | License: GPL (>= 2) 31 | URL: https://CRAN.R-project.org/package=arm 32 | NeedsCompilation: no 33 | -------------------------------------------------------------------------------- /man/matching.Rd: -------------------------------------------------------------------------------- 1 | \name{matching} 2 | \alias{matching} 3 | 4 | \title{Single Nearest Neighborhood Matching} 5 | 6 | \description{ 7 | Function for processing matching with propensity score 8 | } 9 | 10 | \usage{ 11 | matching(z, score, replace=FALSE) 12 | } 13 | 14 | \arguments{ 15 | \item{z}{vector of indicators for treatment or control.} 16 | 17 | \item{score}{vector of the propensity scores in the same order as z.} 18 | 19 | \item{replace}{whether the control units could be reused for matching, 20 | default is \code{FALSE}.} 21 | } 22 | 23 | \details{Function for matching each treatment unit in turn 24 | the control unit (not previously chosen) with the 25 | closest propensity score } 26 | 27 | \value{ 28 | The function returns a vector of indices that the corresponding 29 | unit is matched to. 0 means matched to nothing.} 30 | 31 | \references{Andrew Gelman and Jennifer Hill. (2006). 32 | \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models}. 33 | Cambridge University Press.} 34 | 35 | \author{Jeniffer Hill \email{jh1030@columbia.edu}; 36 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 37 | } 38 | 39 | \seealso{\code{\link{balance}} 40 | } 41 | 42 | \examples{ 43 | # matching first 44 | data(lalonde) 45 | attach(lalonde) 46 | fit <- glm(treat ~ re74 + re75 + age + factor(educ) + 47 | black + hisp + married + nodegr + u74 + u75, 48 | family=binomial(link="logit")) 49 | pscores <- predict(fit, type="response") 50 | matches <- matching(z=lalonde$treat, score=pscores) 51 | matched <- matches$cnts 52 | 53 | # balance check! 54 | b.stats <- balance(lalonde, treat, matched) 55 | print(b.stats) 56 | plot(b.stats) 57 | } 58 | 59 | \keyword{models} 60 | \keyword{methods} 61 | -------------------------------------------------------------------------------- /man/residual.plot.Rd: -------------------------------------------------------------------------------- 1 | \name{residual.plot} 2 | \alias{residual.plot} 3 | 4 | \title{residual plot for the observed values} 5 | \description{ 6 | Plots the residual of observed variable. 7 | } 8 | \usage{ 9 | residual.plot(Expected, Residuals, sigma, main = deparse(substitute(Expected)), 10 | col.pts = "blue", col.ctr = "red", col.sgm = "black", cex = 0.5, 11 | gray.scale = FALSE, xlab = "Predicted", ylab = "Residuals", ...) 12 | } 13 | 14 | \arguments{ 15 | \item{Expected}{ Expected value. } 16 | \item{Residuals}{ Residual value. } 17 | \item{sigma}{ Standard error. } 18 | \item{main}{ main for the plot. See \code{plot} for detail.} 19 | \item{col.pts}{ Color of the points. } 20 | \item{col.ctr}{ Color of the line at zero. } 21 | \item{col.sgm}{ Color of standard error line. } 22 | \item{cex}{ A numerical value giving the amount by which plotting text 23 | and symbols should be magnified relative to the default. See par for detail. } 24 | \item{gray.scale}{ If \code{TRUE}, makes the plot into black and white. This option overwrites the color specification. Default is FALSE. } 25 | \item{xlab}{ Label for x axis. } 26 | \item{ylab}{ Label for y axis. } 27 | \item{\dots}{ Additional parameters passed to \code{plot} function. } 28 | } 29 | 30 | \value{ 31 | Plot to visualize pattern of residulal value for the expected value. 32 | } 33 | 34 | \author{ 35 | Masanao Yajima \email{yajima@stat.columbia.edu}, 36 | M.Grazia Pittau \email{grazia@stat.columbia.edu} 37 | } 38 | 39 | \examples{ 40 | old.par <- par(no.readonly = TRUE) 41 | 42 | x <- rnorm(100) 43 | y <- rnorm(100) 44 | fit <- lm(y~x) 45 | y.hat <- fitted(fit) 46 | u <- resid(fit) 47 | sigma <- sigma.hat(fit) 48 | residual.plot(y.hat, u, sigma) 49 | 50 | par(old.par) 51 | } 52 | 53 | 54 | \keyword{hplot} 55 | -------------------------------------------------------------------------------- /man/contrasts.bayes.Rd: -------------------------------------------------------------------------------- 1 | \name{contrast.bayes} 2 | \alias{contr.bayes.ordered} 3 | \alias{contr.bayes.unordered} 4 | 5 | \title{Contrast Matrices} 6 | 7 | \description{ 8 | Return a matrix of contrasts used in \code{\link{bayesglm}}. 9 | } 10 | 11 | \usage{ 12 | contr.bayes.unordered(n, base = 1, contrasts = TRUE) 13 | contr.bayes.ordered (n, scores = 1:n, contrasts = TRUE) 14 | } 15 | 16 | \arguments{ 17 | \item{n}{a vector of levels for a factor, or the number of levels.} 18 | \item{base}{an integer specifying which group is considered the baseline 19 | group. Ignored if \code{contrasts} is \code{FALSE}.} 20 | \item{contrasts}{a logical indicating whether contrasts should be computed.} 21 | \item{scores}{the set of values over which orthogonal polynomials are to be 22 | computed.} 23 | } 24 | 25 | \details{ 26 | These functions are adapted from \code{contr.treatment} and \code{contr.poly} 27 | in \code{\link{stats}} package. The purpose for these functions are to keep 28 | the baseline levels of categorical variables and thus to suit the use of 29 | \code{\link{bayesglm}}. 30 | 31 | \code{contr.bayes.unordered} is equivalent to \code{contr.treatment} whereas 32 | \code{contr.bayes.ordered} is equivalent to \code{contr.poly}. 33 | 34 | } 35 | 36 | 37 | \author{Yu-Sung Su \email{suyusung@tsinghua.edu.cn}} 38 | 39 | \seealso{ 40 | \code{\link{C}}, 41 | \code{\link{contr.helmert}}, 42 | \code{\link{contr.poly}}, 43 | \code{\link{contr.sum}}, 44 | \code{\link{contr.treatment}}; 45 | \code{\link{glm}}, 46 | \code{\link{aov}}, 47 | \code{\link{lm}}, 48 | \code{\link{bayesglm}}. 49 | } 50 | 51 | \examples{ 52 | cat.var <- rep(1:3, 5) 53 | dim(contr.bayes.unordered(cat.var)) 54 | # 15*15 baseline level kept! 55 | dim(contr.treatment(cat.var)) 56 | # 15*14 57 | } 58 | \keyword{design} 59 | \keyword{regression} 60 | \keyword{array} 61 | \keyword{manip} 62 | -------------------------------------------------------------------------------- /R/sigma.hat.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | sigma.hat.lm <- function(object,...){ 5 | sigma <- summary(object)$sigma 6 | return (sigma) 7 | } 8 | 9 | sigma.hat.glm <- function(object,...){ 10 | dispersion <- if (is.null(object$dispersion)){ 11 | summary(object)$dispersion 12 | } 13 | else{ 14 | object$dispersion 15 | } 16 | if (object$family$family == "gaussian") { 17 | sigma <- sqrt(dispersion) 18 | } 19 | else { 20 | sigma <- summary(object, correlation = TRUE)$sigma 21 | #sigma <- sqrt(deviance(object)/df.residual(object)) 22 | } 23 | return(sigma) 24 | } 25 | 26 | 27 | sigma.hat.sim <- function(object,...){ 28 | sigma <- object@sigma 29 | return (sigma) 30 | } 31 | 32 | sigma.hat.merMod <- function(object,...){ 33 | #object <- summary (object) 34 | fcoef <- fixef(object) 35 | #useScale <- attr (VarCorr (object), "sc") # =sc? 36 | #useScale <- object@dims["useSc"] 37 | useScale <- getME(object, "devcomp")$dims["useSc"] 38 | #ngrps <- lapply(object@flist, function(x) length(levels(x))) 39 | #n.groupings <- length (ngrps) 40 | varc <- VarCorr (object) 41 | sc <- attr(varc, "sc") # =useScale 42 | recorr <- lapply(varc, function(el) attr(el, "correlation")) 43 | reStdDev <- c(lapply(varc, function(el) attr(el, "stddev")), list(Residual = sc)) 44 | n.groupings <- length(recorr) 45 | sigmas <- as.list (rep (NA, n.groupings+1)) 46 | sigmas[1] <- ifelse (useScale, sc, 1) #####if NA, sd=1 47 | cors <- as.list (rep (NA, n.groupings+1)) 48 | names (sigmas) <- names (cors) <- c ("data", names (varc)) 49 | for (k in 1:n.groupings){ 50 | sigmas[[k+1]] <- reStdDev[[k]] 51 | cors[[k+1]] <- as.matrix (recorr[[k]]) 52 | if (length (cors[[k+1]]) == 1) cors[[k+1]] <- NA 53 | } 54 | return (list (sigma=sigmas, cors=cors)) 55 | } 56 | 57 | 58 | sigma.hat.sim.merMod <- function(object,...) 59 | { 60 | sigma <- object@sigma 61 | return (sigma) 62 | } 63 | -------------------------------------------------------------------------------- /man/lalonde.Rd: -------------------------------------------------------------------------------- 1 | \name{lalonde} 2 | \alias{lalonde} 3 | \docType{data} 4 | \title{Lalonde Dataset} 5 | \description{ 6 | Dataset used by Dehejia and Wahba (1999) to evaluate propensity score matching. 7 | } 8 | \usage{data(lalonde)} 9 | \format{ 10 | A data frame with 445 observations on the following 12 variables. 11 | \describe{ 12 | \item{age}{age in years.} 13 | \item{educ}{years of schooling.} 14 | \item{black}{indicator variable for blacks.} 15 | \item{hisp}{indicator variable for Hispanics.} 16 | \item{married}{indicator variable for martial status.} 17 | \item{nodegr}{indicator variable for high school diploma.} 18 | \item{re74}{real earnings in 1974.} 19 | \item{re75}{real earnings in 1975.} 20 | \item{re78}{real earnings in 1978.} 21 | \item{u74}{indicator variable for earnings in 1974 being zero.} 22 | \item{u75}{indicator variable for earnings in 1975 being zero.} 23 | \item{treat}{an indicator variable for treatment status.} 24 | } 25 | } 26 | \details{ 27 | Two demos are provided which use this dataset. The first, 28 | \code{DehejiaWahba}, replicates one of the models from Dehejia and 29 | Wahba (1999). The second demo, \code{AbadieImbens}, replicates the 30 | models produced by Abadie and Imbens 31 | \url{https://scholar.harvard.edu/imbens/scholar_software/matching-estimators}. 32 | Many of these models are found to produce good balance for the Lalonde 33 | data. 34 | } 35 | \references{ 36 | Dehejia, Rajeev and Sadek Wahba. 1999.``Causal Effects in 37 | Non-Experimental Studies: Re-Evaluating the 38 | Evaluation of Training Programs.'' \emph{Journal of the American Statistical 39 | Association} 94 (448): 1053-1062. 40 | 41 | LaLonde, Robert. 1986. ``Evaluating the Econometric Evaluations of 42 | Training Programs.'' \emph{American Economic Review} 76:604-620. 43 | } 44 | 45 | \note{This documentation is adapted from \code{Matching} package.} 46 | 47 | \seealso{\code{\link{matching}}, 48 | \code{\link[Matching]{GenMatch}} 49 | \code{\link{balance}} 50 | } 51 | 52 | \examples{ 53 | data(lalonde) 54 | } 55 | \keyword{datasets} 56 | -------------------------------------------------------------------------------- /man/sigma.hat.Rd: -------------------------------------------------------------------------------- 1 | \name{sigma.hat} 2 | %\docType{genericFunction} 3 | \alias{sigma.hat} 4 | \alias{sigma.hat.lm} 5 | \alias{sigma.hat.glm} 6 | \alias{sigma.hat.merMod} 7 | \alias{sigma.hat.sim} 8 | \alias{sigma.hat.sim.merMod} 9 | 10 | 11 | 12 | \title{Extract Residual Errors} 13 | 14 | \description{This generic function extracts residual errors from a fitted model. 15 | } 16 | 17 | \usage{ 18 | sigma.hat(object,\dots) 19 | 20 | \method{sigma.hat}{lm}(object,\dots) 21 | \method{sigma.hat}{glm}(object,\dots) 22 | \method{sigma.hat}{merMod}(object,\dots) 23 | \method{sigma.hat}{sim}(object,\dots) 24 | \method{sigma.hat}{sim.merMod}(object,\dots) 25 | 26 | } 27 | 28 | \arguments{ 29 | \item{object}{any fitted model object of \code{lm}, \code{glm} and \code{merMod} class} 30 | \item{\dots}{other arguments} 31 | } 32 | 33 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 34 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 35 | } 36 | 37 | \seealso{\code{\link{display}}, 38 | \code{\link{summary}}, 39 | \code{\link{lm}}, 40 | \code{\link{glm}}, 41 | \code{\link[lme4]{lmer}} 42 | } 43 | 44 | \examples{ 45 | group <- rep(1:10, rep(10,10)) 46 | mu.a <- 0 47 | sigma.a <- 2 48 | mu.b <- 3 49 | sigma.b <- 4 50 | rho <- 0 51 | Sigma.ab <- array (c(sigma.a^2, rho*sigma.a*sigma.b, 52 | rho*sigma.a*sigma.b, sigma.b^2), c(2,2)) 53 | sigma.y <- 1 54 | ab <- mvrnorm (10, c(mu.a,mu.b), Sigma.ab) 55 | a <- ab[,1] 56 | b <- ab[,2] 57 | 58 | x <- rnorm (100) 59 | y1 <- rnorm (100, a[group] + b[group]*x, sigma.y) 60 | y2 <- rbinom(100, 1, prob=invlogit(a[group] + b*x)) 61 | 62 | M1 <- lm (y1 ~ x) 63 | sigma.hat(M1) 64 | 65 | M2 <- bayesglm (y1 ~ x, prior.scale=Inf, prior.df=Inf) 66 | sigma.hat(M2) # should be same to sigma.hat(M1) 67 | 68 | M3 <- glm (y2 ~ x, family=binomial(link="logit")) 69 | sigma.hat(M3) 70 | 71 | M4 <- lmer (y1 ~ (1+x|group)) 72 | sigma.hat(M4) 73 | 74 | M5 <- glmer (y2 ~ (1+x|group), family=binomial(link="logit")) 75 | sigma.hat(M5) 76 | 77 | } 78 | \keyword{manip} 79 | \keyword{methods} 80 | -------------------------------------------------------------------------------- /R/go.R: -------------------------------------------------------------------------------- 1 | 2 | # Name: go(..., add=FALSE,timer=FALSE) 3 | # Description: Like source() but recalls the last source file names by default. Multiple source files can be specified. 4 | # Parameters: ... = list of filenames as character strings; 5 | # add = add these names to the current list? if replace, then FALSE 6 | # Note: does not pass parameters to source() 7 | # Example: go('myprog') # will run source('myprog.r') 8 | # go() # will run source('myprog.r') again 9 | # go('somelib',add=TRUE) # will run source('myprog.r') and source('somelib.r') 10 | # go('myprog','somelib') # same as above 11 | # go('mytest') # will run source('mytest') only 12 | # go() # runs source('mytest') again 13 | # Reference: jouni@kerman.com, kerman@stat.columbia.edu 14 | # Modified: 2004-06-22 15 | # 16 | 17 | go <- function(..., add=FALSE, timer=FALSE) 18 | { 19 | last.sources <- getOption(".Last.Source") 20 | sources <- unlist(list(...)) 21 | if (length(sources)<1) { 22 | sources <- last.sources 23 | } else if (add) { 24 | sources <- c(last.sources,sources) 25 | } 26 | if (length(sources)<1) { 27 | return(cat("Usage: go('sourcefile', 'sourcefile2', ..., add=?, timer=?)\n")) 28 | } 29 | options(".Last.Source"=sources) 30 | cat("Source file(s): ",sources,"\n") 31 | yy <- NULL 32 | for (src in sources) { 33 | if (is.na(src)) { 34 | next 35 | } 36 | if (!file.exists(src)) { 37 | src2 <- paste(src, ".R", sep="") 38 | if (file.exists(src2)) 39 | src <- src2 40 | else { 41 | cat("source('",src,"') : file does not exist.\n",sep='') 42 | next 43 | } 44 | } 45 | cat("source('",src,"')\n",sep="") 46 | if (timer) 47 | cat("source('",src,"') : ",max(na.omit(system.time(source(src)))), 48 | " seconds elapsed.\n", sep='') 49 | else 50 | yy[[src]] <- source(src) 51 | } 52 | invisible(yy) 53 | } 54 | 55 | 56 | # By entering "G" on the console, go() is run. This is faster than typing "go()"... 57 | print.GO <- function(x,...) {go()} 58 | G <- structure(NA, class="GO") 59 | #class(G) <- "GO" 60 | 61 | # end of go.R 62 | -------------------------------------------------------------------------------- /man/triangleplot.Rd: -------------------------------------------------------------------------------- 1 | \name{triangleplot} 2 | \alias{triangleplot} 3 | 4 | \title{Triangle Plot} 5 | 6 | \description{ 7 | Function for making a triangle plot from a square matrix 8 | } 9 | 10 | \usage{ 11 | triangleplot (x, y=NULL, cutpts=NULL, details=TRUE, 12 | n.col.legend=5, cex.col=0.7, 13 | cex.var=0.9, digits=1, color=FALSE) 14 | } 15 | 16 | \arguments{ 17 | \item{x}{a square matrix.} 18 | \item{y}{a vector of names that corresponds to each element of the square matrix x.} 19 | \item{cutpts}{a vector of cutting points for color legend, default is \code{NULL}. 20 | The function will decide the cutting points if cutpts is not assigned.} 21 | \item{details}{show more than one digits correlaton values. Default 22 | is \code{TRUE}. \code{FALSE} is suggested to get readable output.} 23 | \item{n.col.legend}{number of legend for the color thermometer} 24 | \item{cex.col}{font size of the color thermometer.} 25 | \item{cex.var}{font size of the variable names.} 26 | \item{digits}{number of digits shown in the text of the color theromoeter.} 27 | \item{color}{color of the plot, default is FALSE, which uses gray scale.} 28 | } 29 | 30 | \details{ 31 | The function makes a triangle plot from a square matrix, e.g., the correlation plot, see 32 | \code{\link{corrplot}}. If a square matrix contains missing values, the cells of missing values 33 | will be marked \code{x}. 34 | } 35 | 36 | 37 | \author{ 38 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 39 | } 40 | 41 | \seealso{\code{\link{corrplot}}, 42 | \code{\link[graphics]{par}} 43 | } 44 | 45 | \examples{ 46 | old.par <- par(no.readonly = TRUE) 47 | 48 | # create a square matrix 49 | x <- matrix(runif(1600, 0, 1), 40, 40) 50 | 51 | # fig 1 52 | triangleplot(x) 53 | 54 | # fig 2 assign cutting points 55 | triangleplot(x, cutpts=c(0,0.25,0.5,0.75,1), digits=2) 56 | 57 | # fig 3 if x contains missing value 58 | x[12,13] <- x[13,12] <- NA 59 | x[25,27] <- x[27,25] <- NA 60 | triangleplot(x) 61 | 62 | par(old.par) 63 | 64 | # 65 | #library(RColorBrewer) 66 | #cormat <- cor(iris[,-5]) 67 | #triangleplot2(cormat,color = brewer.pal( 5, "RdBu" ), 68 | # n.col.legend=5, cex.col=0.7, cex.var=0.5) 69 | 70 | } 71 | 72 | \keyword{dplot} 73 | -------------------------------------------------------------------------------- /R/contrasts.bayes.R: -------------------------------------------------------------------------------- 1 | contr.bayes.ordered <- function ( n, scores = 1:n, contrasts = TRUE ) 2 | { 3 | make.poly <- function( n, scores ) { 4 | y <- scores - mean( scores ) 5 | X <- outer( y, seq_len( n ) - 1, "^" ) 6 | QR <- qr( X ) 7 | z <- QR$qr 8 | z <- z *( row( z ) == col( z ) ) 9 | raw <- qr.qy( QR, z ) 10 | Z <- sweep( raw, 2, apply( raw, 2, function( x ) sqrt( sum( x^2 ) ) ), "/" ) 11 | colnames( Z ) <- paste( "^", 1:n - 1, sep="" ) 12 | Z 13 | } 14 | if ( is.numeric( n ) && length( n ) == 1 ) { levs <- 1:n } 15 | else { 16 | levs <- n 17 | n <- length( levs ) 18 | } 19 | if ( n < 2 ) { 20 | stop( gettextf( "contrasts not defined for %d degrees of freedom", n - 1 ), domain = NA ) 21 | } 22 | if ( n > 95 ) { 23 | stop( gettextf( "orthogonal polynomials cannot be represented accurately enough for %d degrees of freedom", n-1 ), domain = NA ) 24 | } 25 | if ( length( scores ) != n ) { 26 | stop( "'scores' argument is of the wrong length" ) 27 | } 28 | if ( !is.numeric( scores ) || any( duplicated( scores ) ) ) { 29 | stop("'scores' must all be different numbers") 30 | } 31 | contr <- make.poly( n, scores ) 32 | if ( contrasts ) { 33 | dn <- colnames( contr ) 34 | dn[2:min( 4, n )] <- c( ".L", ".Q", ".C" )[1:min( 3, n-1 )] 35 | colnames( contr ) <- dn 36 | contr[, , drop = FALSE] 37 | } 38 | else { 39 | contr[, 1] <- 1 40 | contr 41 | } 42 | } 43 | 44 | contr.bayes.unordered <- function(n, base = 1, contrasts = TRUE) { 45 | if( is.numeric( n ) && length( n ) == 1) { 46 | if( n > 1 ) { levs <- 1:n } 47 | else stop( "not enough degrees of freedom to define contrasts" ) 48 | } 49 | else { 50 | levs <- n 51 | n <- length( n ) 52 | } 53 | contr <- array( 0, c(n, n), list( levs, levs ) ) 54 | diag( contr ) <- 1 55 | if( contrasts ) { 56 | if( n < 2 ) { stop( gettextf( "contrasts not defined for %d degrees of freedom", n - 1 ), domain = NA ) } 57 | if( base < 1 | base > n ){ stop( "baseline group number out of range" ) } 58 | contr <- contr[, , drop = FALSE] 59 | } 60 | contr 61 | } 62 | -------------------------------------------------------------------------------- /R/traceplot.R: -------------------------------------------------------------------------------- 1 | #traceplot.default <- function(x, ...) coda::traceplot 2 | 3 | 4 | # ======================================================================== 5 | # function for trace plot 6 | # ======================================================================== 7 | 8 | 9 | #setMethod("traceplot", signature(x = "mcmc.list"), 10 | # function (x, smooth = TRUE, col = 1:6, type = "l", ylab = "", ...) 11 | #{ 12 | # args <- list(...) 13 | # for (j in 1:nvar(x)) { 14 | # xp <- as.vector(time(x)) 15 | # yp <- if (nvar(x) > 1) 16 | # x[, j, drop = TRUE] 17 | # else x 18 | # yp <- do.call("cbind", yp) 19 | # matplot(xp, yp, xlab = "Iterations", ylab = ylab, type = type, 20 | # col = col, ...) 21 | # if (!is.null(varnames(x)) && is.null(list(...)$main)) 22 | # title(paste("Trace of", varnames(x)[j])) 23 | # if (smooth) { 24 | # scol <- rep(col, length = nchain(x)) 25 | # for (k in 1:nchain(x)) lines(lowess(xp, yp[, k]), 26 | # col = scol[k]) 27 | # } 28 | # } 29 | #} 30 | #) 31 | # 32 | 33 | setMethod("traceplot", signature(x = "bugs"), 34 | function( x, mfrow = c( 1, 1 ), varname = NULL, 35 | match.head = TRUE, ask = TRUE, 36 | col = rainbow( x$n.chains ), 37 | lty = 1, lwd = 1, ... ) 38 | { 39 | par( mfrow = mfrow ) 40 | par( ask = ask ) 41 | n.chain <- x$n.chains 42 | n.keep <- x$n.keep 43 | bugs.array <- x$sims.array 44 | varnamelist <- gsub( "\\[.*\\]","", dimnames( bugs.array )[[3]], fixed = FALSE ) 45 | if( is.null( varname ) ){ varname <- ".*" } 46 | if( match.head ) { varname <- paste( "^", varname, sep="" ) } 47 | index <- unlist( sapply( varname, function( x ){ grep( x, varnamelist ) } ) ) 48 | n.var <- length( index ) 49 | for( j in index ) { 50 | range.x <- c( 1, n.keep ) 51 | range.y <- range( bugs.array[,,j] ) 52 | v.name <- dimnames( bugs.array )[[3]][j] 53 | plot( range.x, range.y, type = "n", main = v.name, 54 | xlab = "iteration", ylab = v.name, 55 | xaxt = "n", xaxs = "i", ... ) 56 | for( i in 1:n.chain ) { 57 | x.cord <- 1:n.keep 58 | y.cord <- bugs.array[,i,j] 59 | lines( x.cord , y.cord , col = col[i], lty = lty, lwd = lwd ) 60 | } 61 | axis( 1, at = seq(0, n.keep, n.keep*0.1), tick = TRUE ) 62 | } 63 | } 64 | ) 65 | -------------------------------------------------------------------------------- /R/AllClass.R: -------------------------------------------------------------------------------- 1 | setOldClass("family") 2 | setOldClass("mcmc.list") 3 | setOldClass("polr") 4 | setOldClass("bugs") 5 | setOldClass("svyglm") 6 | 7 | setClass("balance", 8 | representation( 9 | rawdata = "data.frame", 10 | matched = "data.frame", 11 | factor = "logical") 12 | ) 13 | 14 | 15 | 16 | 17 | setClass("bayesglm", 18 | representation( 19 | formula = "formula", 20 | family = "family", 21 | prior.mean = "numeric", 22 | prior.scale = "numeric", 23 | prior.df = "numeric"), 24 | contains = "glm" 25 | ) 26 | 27 | 28 | #setClass("bayesglm.h", 29 | # representation( 30 | # formula = "formula", 31 | # family = "family", 32 | # prior.mean = "numeric", 33 | # prior.scale = "numeric", 34 | # prior.df = "numeric", 35 | # batch = "numeric"), 36 | # contains = "bayesglm" 37 | #) 38 | 39 | #setClass("polr", 40 | # representation( 41 | # formula = "formula", 42 | # Hess = "logical", 43 | # method = "character" 44 | ## prior.mean = "numeric", 45 | ## prior.scale = "numeric", 46 | ## prior.df = "numeric", 47 | ## prior.mean.for.cutpoints = "numeric", 48 | ## prior.scale.for.cutpoints = "numeric", 49 | ## prior.df.for.cutpoints = "numeric" 50 | # ), 51 | # contains="oldClass" 52 | #) 53 | 54 | 55 | setClass("bayespolr", 56 | representation( 57 | formula = "formula", 58 | Hess = "logical", 59 | method = "character", 60 | prior.mean = "numeric", 61 | prior.scale = "numeric", 62 | prior.df = "numeric", 63 | prior.mean.for.cutpoints = "numeric", 64 | prior.scale.for.cutpoints = "numeric", 65 | prior.df.for.cutpoints = "numeric"), 66 | contains = "polr" 67 | ) 68 | 69 | 70 | setClass("sim", 71 | representation( 72 | coef = "matrix", 73 | sigma = "numeric") 74 | ) 75 | 76 | setClass("sim.polr", 77 | representation( 78 | coef = "matrix", 79 | zeta = "matrix") 80 | ) 81 | 82 | 83 | 84 | setClass("sim.merMod", 85 | representation( 86 | fixef = "matrix", 87 | ranef = "list", 88 | sigma = "ANY") 89 | ) 90 | 91 | 92 | setClass("GO") 93 | -------------------------------------------------------------------------------- /R/binnedplot.R: -------------------------------------------------------------------------------- 1 | # ==================================================================== 2 | # Functions for plotting the binned residuals 3 | # ==================================================================== 4 | 5 | binnedplot <- function(x, y, nclass=NULL, 6 | xlab="Expected Values", ylab="Average residual", 7 | main="Binned residual plot", 8 | cex.pts=0.8, col.pts=1, col.int="gray", ...) 9 | { 10 | 11 | n <- length(x) 12 | if (is.null(nclass)){ 13 | if (n >= 100){ 14 | nclass=floor(sqrt(length(x))) 15 | } 16 | if (n > 10 & n < 100){ 17 | nclass=10 18 | } 19 | if (n <=10){ 20 | nclass=floor(n/2) 21 | } 22 | } 23 | 24 | aa <- data.frame(binned.resids (x, y, nclass)$binned) 25 | plot(range(aa$xbar), range(aa$ybar, aa$X2se, -aa$X2se, na.rm=TRUE), 26 | xlab=xlab, ylab=ylab, type="n", main=main, ...) 27 | abline (0,0, lty=2) 28 | lines (aa$xbar, aa$X2se, col=col.int) 29 | lines (aa$xbar, -aa$X2se, col=col.int) 30 | points (aa$xbar, aa$ybar, pch=19, cex=cex.pts, col=col.pts) 31 | } 32 | 33 | binned.resids <- function (x, y, nclass=floor(sqrt(length(x)))){ 34 | 35 | breaks.index <- floor(length(x)*(1:(nclass-1))/nclass) 36 | if(any(breaks.index==0)) nclass <- 1 37 | x.sort <- sort(x) 38 | breaks <- -Inf 39 | if(nclass > 1){ 40 | for (i in 1:(nclass-1)){ 41 | x.lo <- x.sort[breaks.index[i]] 42 | x.hi <- x.sort[breaks.index[i]+1] 43 | if (x.lo==x.hi){ 44 | if (x.lo==min(x)){ 45 | x.lo <- -Inf 46 | } 47 | else { 48 | x.lo <- max (x[x 1) sd(y[items]) else 0 75 | output <- rbind (output, c(xbar, ybar, n, x.range, 2*sdev/sqrt(n))) 76 | 77 | } 78 | 79 | colnames (output) <- c("xbar", "ybar", "n", "x.lo", "x.hi", "2se") 80 | #output <- output[output[,"sdev"] != 0,] 81 | return (list (binned=output, xbreaks=xbreaks)) 82 | } 83 | -------------------------------------------------------------------------------- /man/se.coef.Rd: -------------------------------------------------------------------------------- 1 | \name{se.coef} 2 | %\docType{genericFunction} 3 | \alias{se.coef} 4 | \alias{se.coef,lm-method} 5 | \alias{se.coef,glm-method} 6 | \alias{se.coef,merMod-method} 7 | \alias{se.fixef} 8 | \alias{se.ranef} 9 | 10 | \title{Extract Standard Errors of Model Coefficients} 11 | 12 | \description{ 13 | These functions extract standard errors of model coefficients 14 | from objects returned by modeling functions. 15 | } 16 | \usage{ 17 | se.coef (object, \dots) 18 | se.fixef (object) 19 | se.ranef (object) 20 | 21 | \S4method{se.coef}{lm}(object) 22 | \S4method{se.coef}{glm}(object) 23 | \S4method{se.coef}{merMod}(object) 24 | } 25 | 26 | \arguments{ 27 | \item{object}{object of \code{lm}, \code{glm} and \code{merMod} fit} 28 | \item{\dots}{other arguments} 29 | } 30 | \value{ 31 | \code{se.coef} gives lists of standard errors for \code{coef}, 32 | \code{se.fixef} gives a vector of standard errors for \code{fixef} and 33 | \code{se.ranef} gives a list of standard errors for \code{ranef}. 34 | } 35 | \details{ 36 | \code{se.coef} extracts standard errors from objects 37 | returned by modeling functions. 38 | \code{se.fixef} extracts standard errors of the fixed effects 39 | from objects returned by lmer and glmer functions. 40 | \code{se.ranef} extracts standard errors of the random effects 41 | from objects returned by lmer and glmer functions. 42 | } 43 | 44 | \seealso{ 45 | \code{\link{display}}, 46 | \code{\link{coef}}, 47 | \code{\link{sigma.hat}}, 48 | } 49 | 50 | \references{Andrew Gelman and Jennifer Hill. (2006). 51 | \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models}. 52 | Cambridge University Press.} 53 | 54 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 55 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 56 | } 57 | 58 | \examples{ 59 | # Here's a simple example of a model of the form, y = a + bx + error, 60 | # with 10 observations in each of 10 groups, and with both the 61 | # intercept and the slope varying by group. First we set up the model and data. 62 | 63 | group <- rep(1:10, rep(10,10)) 64 | mu.a <- 0 65 | sigma.a <- 2 66 | mu.b <- 3 67 | sigma.b <- 4 68 | rho <- 0 69 | Sigma.ab <- array (c(sigma.a^2, rho*sigma.a*sigma.b, 70 | rho*sigma.a*sigma.b, sigma.b^2), c(2,2)) 71 | sigma.y <- 1 72 | ab <- mvrnorm (10, c(mu.a,mu.b), Sigma.ab) 73 | a <- ab[,1] 74 | b <- ab[,2] 75 | # 76 | x <- rnorm (100) 77 | y1 <- rnorm (100, a[group] + b[group]*x, sigma.y) 78 | y2 <- rbinom(100, 1, prob=invlogit(a[group] + b*x)) 79 | 80 | # lm fit 81 | M1 <- lm (y1 ~ x) 82 | se.coef (M1) 83 | 84 | # glm fit 85 | M2 <- glm (y2 ~ x) 86 | se.coef (M2) 87 | 88 | # lmer fit 89 | M3 <- lmer (y1 ~ x + (1 + x |group)) 90 | se.coef (M3) 91 | se.fixef (M3) 92 | se.ranef (M3) 93 | 94 | # glmer fit 95 | M4 <- glmer (y2 ~ 1 + (0 + x |group), family=binomial(link="logit")) 96 | se.coef (M4) 97 | se.fixef (M4) 98 | se.ranef (M4) 99 | } 100 | \keyword{manip} 101 | \keyword{methods} 102 | \keyword{models} 103 | -------------------------------------------------------------------------------- /man/model.matrixBayes.Rd: -------------------------------------------------------------------------------- 1 | \name{model.matrixBayes} 2 | %\docType{genericFunction} 3 | \alias{model.matrixBayes} 4 | 5 | \title{Construct Design Matrices} 6 | 7 | \description{ 8 | \code{model.matrixBayes} creates a design matrix. 9 | } 10 | 11 | \usage{ 12 | model.matrixBayes(object, data = environment(object), 13 | contrasts.arg = NULL, xlev = NULL, keep.order = FALSE, drop.baseline=FALSE,...) 14 | 15 | %model.matrix.bayes.h(object, data = environment(object), 16 | % contrasts.arg = NULL, xlev = NULL, keep.order = FALSE, batch = NULL, ...) 17 | } 18 | 19 | \arguments{ 20 | \item{object}{an object of an appropriate class. For the default 21 | method, a model formula or terms object.} 22 | \item{data}{a data frame created with \code{\link{model.frame}}. If 23 | another sort of object, \code{model.frame} is called first.} 24 | \item{contrasts.arg}{A list, whose entries are contrasts suitable for 25 | input to the \code{\link{contrasts}} replacement function and 26 | whose names are the names of columns of \code{data} containing 27 | \code{\link{factor}}s.} 28 | \item{xlev}{to be used as argument of \code{\link{model.frame}} if 29 | \code{data} has no \code{"terms"} attribute.} 30 | \item{keep.order}{a logical value indicating whether the terms should 31 | keep their positions. If \code{FALSE} the terms are reordered so 32 | that main effects come first, followed by the interactions, 33 | all second-order, all third-order and so on. Effects of a given 34 | order are kept in the order specified.} 35 | \item{drop.baseline}{Drop the base level of categorical Xs, default is TRUE.} 36 | % \item{batch}{Not implement yet!} 37 | \item{\dots}{further arguments passed to or from other methods.} 38 | } 39 | \details{ 40 | \code{model.matrixBayes} is adapted from \code{model.matrix} in the \code{stats} 41 | pacakge and is designed for the use of \code{\link{bayesglm}}.% and \code{bayesglm.hierachical} (not yet implemented!). 42 | It is designed to keep baseline levels of all categorical varaibles and keep the 43 | variable names unodered in the output. The design matrices created by 44 | \code{model.matrixBayes} are unidentifiable using classical regression methods, 45 | though; they can be identified using \code{\link{bayesglm}}.% and 46 | %\code{bayesglm.hierachical}. 47 | } 48 | 49 | \references{Andrew Gelman, Aleks Jakulin, Maria Grazia Pittau and Yu-Sung Su. (2009). 50 | \dQuote{A Weakly Informative Default Prior Distribution For 51 | Logistic And Other Regression Models.} 52 | \emph{The Annals of Applied Statistics} 2 (4): 1360--1383. 53 | \url{http://www.stat.columbia.edu/~gelman/research/published/priors11.pdf} 54 | } 55 | 56 | \seealso{ 57 | \code{\link[stats]{model.frame}}, \code{\link[stats]{model.extract}}, 58 | \code{\link[stats]{terms}}, \code{\link[stats]{terms.formula}}, 59 | \code{\link{bayesglm}}. 60 | } 61 | 62 | \author{Yu-Sung Su \email{suyusung@tsinghua.edu.cn}} 63 | 64 | \examples{ 65 | ff <- log(Volume) ~ log(Height) + log(Girth) 66 | str(m <- model.frame(ff, trees)) 67 | (model.matrix(ff, m)) 68 | class(ff) <- c("bayesglm", "terms", "formula") 69 | (model.matrixBayes(ff, m)) 70 | %class(ff) <- c("bayesglm.h", "terms", "formula") 71 | %(model.matrixBayes(ff, m)) 72 | } 73 | \keyword{models} 74 | \keyword{manip} 75 | -------------------------------------------------------------------------------- /man/standardize.Rd: -------------------------------------------------------------------------------- 1 | \name{standardize} 2 | %\docType{genericFunction} 3 | \alias{standardize} 4 | \alias{standardize,lm-method} 5 | \alias{standardize,glm-method} 6 | \alias{standardize,merMod-method} 7 | \alias{standardize,polr-method} 8 | 9 | 10 | \title{Function for Standardizing Regression Predictors by Centering and 11 | Dividing by 2 sd's} 12 | 13 | \description{Numeric variables that take on more than two values are each rescaled 14 | to have a mean of 0 and a sd of 0.5; 15 | Binary variables are rescaled to have a mean of 0 and a difference of 1 16 | between their two categories; 17 | Non-numeric variables that take on more than two values are unchanged; 18 | Variables that take on only one value are unchanged 19 | 20 | } 21 | \usage{ 22 | \S4method{standardize}{lm}(object, unchanged = NULL, 23 | standardize.y = FALSE, binary.inputs = "center") 24 | \S4method{standardize}{glm}(object, unchanged = NULL, 25 | standardize.y = FALSE, binary.inputs = "center") 26 | \S4method{standardize}{merMod}(object, unchanged = NULL, 27 | standardize.y = FALSE, binary.inputs = "center") 28 | \S4method{standardize}{polr}(object, unchanged = NULL, 29 | standardize.y = FALSE, binary.inputs = "center") 30 | } 31 | 32 | \arguments{ 33 | \item{object}{an object of class \code{lm} or \code{glm}} 34 | \item{unchanged}{vector of names of parameters to leave unstandardized} 35 | \item{standardize.y}{ if TRUE, the outcome variable is standardized also} 36 | \item{binary.inputs}{options for standardizing binary variables} 37 | } 38 | 39 | \details{ 40 | "0/1" (rescale so that the lower value is 0 and the upper is 1) 41 | "-0.5/0.5" (rescale so that the lower value is -0.5 and upper is 0.5) 42 | "center" (rescale so that the mean of the data is 0 and the difference 43 | between the two categories is 1) 44 | "full" (rescale by subtracting the mean and dividing by 2 sd's) 45 | "leave.alone" (do nothing) 46 | } 47 | 48 | \references{Andrew Gelman. (2008). 49 | \dQuote{Scaling regression inputs by dividing by two standard deviations.} 50 | \emph{Statistics in Medicine} 27: 2865--2873. 51 | \url{http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf} 52 | } 53 | 54 | \author{Andrew Gelman \email{gelman@stat.columbia.edu} 55 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 56 | } 57 | 58 | \seealso{\code{\link{rescale}} 59 | } 60 | 61 | 62 | 63 | \examples{ 64 | # Set up the fake data 65 | n <- 100 66 | x <- rnorm (n, 2, 1) 67 | x1 <- rnorm (n) 68 | x1 <- (x1-mean(x1))/(2*sd(x1)) # standardization 69 | x2 <- rbinom (n, 1, .5) 70 | b0 <- 1 71 | b1 <- 1.5 72 | b2 <- 2 73 | y <- rbinom (n, 1, invlogit(b0+b1*x1+b2*x2)) 74 | y2 <- sample(1:5, n, replace=TRUE) 75 | M1 <- glm (y ~ x, family=binomial(link="logit")) 76 | display(M1) 77 | M1.1 <- glm (y ~ rescale(x), family=binomial(link="logit")) 78 | display(M1.1) 79 | M1.2 <- standardize(M1) 80 | display(M1.2) 81 | # M1.1 & M1.2 should be the same 82 | M2 <- polr(ordered(y2) ~ x) 83 | display(M2) 84 | M2.1 <- polr(ordered(y2) ~ rescale(x)) 85 | display(M2.1) 86 | M2.2 <- standardize(M2.1) 87 | display(M2.2) 88 | # M2.1 & M2.2 should be the same 89 | } 90 | \keyword{manip} 91 | \keyword{models} 92 | \keyword{methods} 93 | -------------------------------------------------------------------------------- /man/multicomp.plot.Rd: -------------------------------------------------------------------------------- 1 | \name{multicomp.plot} 2 | \alias{multicomp.plot} 3 | \alias{mcplot} 4 | 5 | \title{Multiple Comparison Plot} 6 | 7 | \description{ 8 | Plots significant difference of simulated array. 9 | } 10 | 11 | \usage{ 12 | multicomp.plot(object, alpha = 0.05, main = "Multiple Comparison Plot", 13 | label = NULL, shortlabel = NULL, show.pvalue = FALSE, 14 | label.as.shortlabel = FALSE, label.on.which.axis = 3, 15 | col.low = "lightsteelblue", col.same = "white", col.high = "lightslateblue", 16 | vertical.line = TRUE, horizontal.line = FALSE, 17 | vertical.line.lty = 1, horizontal.line.lty = 1, mar=c(3.5,3.5,3.5,3.5)) 18 | } 19 | 20 | \arguments{ 21 | \item{object}{Simulated array of coefficients, columns being different variables 22 | and rows being simulated result.} 23 | \item{alpha}{Level of significance to compare.} 24 | \item{main}{Main label.} 25 | \item{label}{Labels for simulated parameters.} 26 | \item{shortlabel}{Short labels to put into the plot.} 27 | \item{show.pvalue}{Default is FALSE, if set to TRUE replaces short label 28 | with Bayesian p value. } 29 | \item{label.as.shortlabel}{Default is FALSE, if set to TRUE takes first 2 character 30 | of label and use it as short label.} 31 | \item{label.on.which.axis}{default is the 3rd (top) axis.} 32 | \item{col.low}{Color of significantly low coefficients.} 33 | \item{col.same}{Color of not significant difference.} 34 | \item{col.high}{Color of significantly high coefficients.} 35 | \item{vertical.line}{Default is TRUE, if set to FALSE does not draw vertical line.} 36 | \item{horizontal.line}{Default is FALSE, if set to TRUE draws horizontal line.} 37 | \item{vertical.line.lty}{Line type of vertical line.} 38 | \item{horizontal.line.lty}{Line type of horizontal line.} 39 | \item{mar}{A numerical vector of the form \code{c(bottom, left, top, right)} 40 | which gives the number of lines of margin to be specified on 41 | the four sides of the plot. The default is \code{c(3.5,3.5,3.5,3.5)}.} 42 | 43 | } 44 | 45 | \value{ 46 | \item{pvalue}{Array of Bayesian p value.} 47 | \item{significant}{Array of significance.} 48 | } 49 | 50 | \references{Andrew Gelman and Jennifer Hill. (2006). 51 | \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models}. 52 | Cambridge University Press. 53 | } 54 | 55 | 56 | \author{ 57 | Masanao Yajima \email{yajima@stat.columbia.edu}, 58 | Andrew Gelman \email{gelman@stat.columbia.edu} 59 | } 60 | 61 | \seealso{ 62 | \code{\link{coefplot}} 63 | } 64 | 65 | \examples{ 66 | old.par <- par(no.readonly = TRUE) 67 | 68 | # example 1 69 | simulation.array <- data.frame(coef1=rnorm(100,10,2), coef2=rnorm(100,5,2), 70 | coef3=rnorm(100,0,1), coef4=rnorm(100,-5,3), 71 | coef5=rnorm(100,-2,1)) 72 | short.lab <- c("c01", "c02", "c03", "c04", "c05") 73 | multicomp.plot(simulation.array[,1:4], label.as.shortlabel=TRUE) 74 | 75 | # wraper for multicomp.plot 76 | mcplot(simulation.array, shortlabel = short.lab) 77 | 78 | # example 2 79 | data(lalonde) 80 | M1 <- lm(re78 ~ treat + re74 + re75 + age + educ + u74 + u75, data=lalonde) 81 | M1.sim <- sim(M1) 82 | lm.sim <- coef(M1.sim)[,-1] 83 | multicomp.plot(lm.sim, label.as.shortlabel=TRUE, label.on.which.axis=2) 84 | 85 | par(old.par) 86 | } 87 | \keyword{hplot} 88 | -------------------------------------------------------------------------------- /man/corrplot.Rd: -------------------------------------------------------------------------------- 1 | \name{corrplot} 2 | \alias{corrplot} 3 | 4 | \title{Correlation Plot} 5 | \description{ 6 | Function for making a correlation plot starting from a data matrix 7 | } 8 | 9 | \usage{ 10 | corrplot (data, varnames=NULL, cutpts=NULL, 11 | abs=TRUE, details=TRUE, 12 | n.col.legend=5, cex.col=0.7, 13 | cex.var=0.9, digits=1, color=FALSE) 14 | } 15 | 16 | \arguments{ 17 | \item{data}{a data matrix} 18 | \item{varnames}{variable names of the data matrix, if not provided 19 | use default variable names} 20 | \item{abs}{if TRUE, transform all correlation values into positive values, 21 | default=TRUE.} 22 | \item{cutpts}{a vector of cutting points for color legend, default is NULL. 23 | The function will decide the cutting points if cutpts is not assigned.} 24 | \item{details}{show more than one digits correlaton values. Default 25 | is TRUE. FALSE is suggested to get readable output.} 26 | \item{n.col.legend}{number of legend for the color thermometer.} 27 | \item{cex.col}{font size of the color thermometer.} 28 | \item{cex.var}{font size of the variable names.} 29 | \item{digits}{number of digits shown in the text of the color theromoeter.} 30 | \item{color}{color of the plot, default is FALSE, which uses gray scale.} 31 | } 32 | 33 | \details{ 34 | The function adapts the R function for 35 | Figure 8 in Tian Zheng, Matthew Salganik, and Andrew Gelman, 2006, 36 | "How many people do you know in prison?: using overdispersion in count data to estimate social structure in networks", 37 | Journal of the American Statistical Association, Vol.101, N0. 474: p.409-23. 38 | } 39 | 40 | \value{ 41 | A correlation plot. 42 | } 43 | 44 | \references{ Tian Zheng, Matthew Salganik, and Andrew Gelman, 2006, 45 | "How many people do you know in prison?: using overdispersion in count data to estimate social structure in networks", 46 | Journal of the American Statistical Association, Vol.101, N0. 474: p.409-23} 47 | 48 | \author{Tian Zheng \email{tzheng@stat.columbia.edu}; 49 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 50 | } 51 | 52 | 53 | \seealso{\code{\link[stats]{cor}}, 54 | \code{\link[graphics]{par}} 55 | } 56 | 57 | \examples{ 58 | old.par <- par(no.readonly = TRUE) 59 | 60 | x1 <- rnorm(1000,50,2) 61 | x2 <- rbinom(1000,1,prob=0.63) 62 | x3 <- rpois(1000, 2) 63 | x4 <- runif(1000,40,100) 64 | x5 <- rnorm(1000,100,30) 65 | x6 <- rbeta(1000,2,2) 66 | x7 <- rpois(1000,10) 67 | x8 <- rbinom(1000,1,prob=0.4) 68 | x9 <- rbeta(1000,5,4) 69 | x10 <- runif(1000,-10,-1) 70 | 71 | test.data <- data.matrix(cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10)) 72 | test.names <- c("a short name01","a short name02","a short name03", 73 | "a short name04","a short name05","a short name06", 74 | "a short name07","a short name08","a short name09", 75 | "a short name10") 76 | 77 | # example 1 78 | corrplot(test.data) 79 | 80 | # example 2 81 | corrplot(test.data,test.names, abs=FALSE, n.col.legend=7) 82 | corrplot(test.data,test.names, abs=TRUE, n.col.legend=7) 83 | 84 | # example 3 85 | data(lalonde) 86 | corrplot(lalonde, details=FALSE, color=TRUE) 87 | corrplot(lalonde, cutpts=c(0,0.25,0.5,0.75), color=TRUE, digits=2) 88 | 89 | par(old.par) 90 | } 91 | 92 | \keyword{dplot} 93 | -------------------------------------------------------------------------------- /man/binnedplot.Rd: -------------------------------------------------------------------------------- 1 | \name{binnedplot} 2 | \alias{binnedplot} 3 | \alias{binned.resids} 4 | 5 | \title{Binned Residual Plot} 6 | \description{ 7 | A function that plots averages of y versus averages of x and can be 8 | useful to plot residuals for logistic regression. 9 | } 10 | \usage{ 11 | binnedplot(x ,y, nclass=NULL, 12 | xlab="Expected Values", ylab="Average residual", 13 | main="Binned residual plot", 14 | cex.pts=0.8, col.pts=1, col.int="gray", ...) 15 | } 16 | 17 | \arguments{ 18 | \item{x}{The expected values from the logistic regression.} 19 | \item{y}{The residuals values from logistic regression (observed values 20 | minus expected values).} 21 | \item{nclass}{Number of categories (bins) based on their fitted values in which 22 | the data are divided. Default=NULL and will take the value of nclass 23 | according to the $n$ such that if $n >=100$, 24 | nclass=floor(sqrt(length(x))); if $10 0 & length(prob3) == 0) { 70 | offset[1] <- -bar.width/2 71 | offset[2] <- bar.width/2 72 | offset[3] <- 0 73 | } 74 | for (i in 1:length(x)) { 75 | polygon(x[i] + c(-1, -1, 1, 1) * bar.width/2 + offset[1], 76 | c(0, prob[i], prob[i], 0), border = prob.col, col = prob.col) 77 | if (!is.null(prob2)) { 78 | polygon(x[i] + c(-1, -1, 1, 1) * bar.width/2 + offset[2], 79 | c(0, prob2[i], prob2[i], 0), border = prob2.col, 80 | col = prob2.col) 81 | } 82 | if (!is.null(prob3)) { 83 | polygon(x[i] + c(-1, -1, 1, 1) * bar.width/2 + offset[3], 84 | c(0, prob3[i], prob3[i], 0), border = prob3.col, 85 | col = prob3.col) 86 | } 87 | } 88 | } 89 | 90 | discrete.hist <- discrete.histogram 91 | -------------------------------------------------------------------------------- /R/multicomp.plot.R: -------------------------------------------------------------------------------- 1 | #============================================================================== 2 | # Multiple Comparison Plot 3 | #============================================================================== 4 | multicomp.plot <- function(object, alpha=0.05, main = "Multiple Comparison Plot", 5 | label = NULL, shortlabel = NULL, show.pvalue = FALSE, 6 | label.as.shortlabel = FALSE, label.on.which.axis = 3, 7 | col.low = "lightsteelblue", col.same = "white", col.high = "lightslateblue", 8 | vertical.line = TRUE, horizontal.line = FALSE, 9 | vertical.line.lty = 1, horizontal.line.lty = 1, mar=c(3.5,3.5,3.5,3.5)) 10 | { 11 | 12 | # object check: S4 methods instead?! 13 | if (!is.data.frame(object)){ 14 | if(is.matrix(object)){ 15 | object <- as.data.frame(object) 16 | } 17 | else stop ( message = "object must be a matrix or a data.frame" ) 18 | } 19 | ind <- dim( object ) [2] 20 | name <- dimnames( object ) [[2]] 21 | # label 22 | if( is.null( label ) ) { 23 | label <- name 24 | } else if( length( label ) != ind ) { 25 | stop( message = "you must specify all the label" ) 26 | } 27 | # short label 28 | if( !is.null( shortlabel ) && length( shortlabel ) != ind ){ 29 | stop( message = "you must specify all the short label" ) 30 | } 31 | else if( is.null( shortlabel ) && label.as.shortlabel ){ 32 | shortlabel <- abbreviate( label, minlength = 2) 33 | } 34 | ################################ 35 | # Calculate bayesian p-value 36 | ################################ 37 | bayes.pvalue <- matrix( 0, ind, ind ) 38 | bayes.signif <- matrix( 0, ind, ind ) 39 | for( i in 1:ind ) { 40 | for( j in 1:ind ) { 41 | bayes.pvalue[i, j] <- .pvalue( object[ , j], object[ , i] ) 42 | } 43 | } 44 | for( i in 1:ind ) { 45 | for( j in 1:ind ) { 46 | bayes.signif[i, j] <- .is.significant( bayes.pvalue[i, j], alpha = alpha ) 47 | } 48 | } 49 | dimnames( bayes.pvalue ) <- list( label, label ) 50 | diag( bayes.signif ) <- 0 51 | dimnames( bayes.signif ) <- list( label, label ) 52 | bayes.signif <- bayes.signif [ , ind:1] 53 | bayes.pvalue <- bayes.pvalue [ , ind:1] 54 | ################################ 55 | # Plot 56 | ################################ 57 | maxchar <- max(sapply(label, nchar)) 58 | mar.idx <- label.on.which.axis 59 | 60 | par(mar=mar) 61 | min.mar <- par('mar') 62 | if(mar.idx==3){ 63 | mar[mar.idx] <- min(min.mar[mar.idx], trunc(mar[mar.idx] + maxchar/3)) + mar[mar.idx] + 0.1 64 | } 65 | else { 66 | mar[mar.idx] <- min(min.mar[mar.idx], trunc(mar[mar.idx] + maxchar/2)) + 0.1 67 | } 68 | par(mar=mar) 69 | image( 1:nrow( bayes.signif ), 1:ncol( bayes.signif ), 70 | bayes.signif, ylab = "", xlab = "", yaxt = "n", xaxt = "n", 71 | col = c( col.low, col.same, col.high ) ) 72 | box( "plot" ) 73 | axis(2, at = 0, labels = "", las = 1, line = 0, tick = FALSE, 74 | xaxs = "i", yaxs = "i" ) 75 | axis(mar.idx, at = 1:nrow( bayes.signif ),line = -0.8, las = 2 , cex = 0.3, 76 | labels = label, tick = FALSE, xaxs = "i") 77 | title( main = main, line = mar[3] - 3 ) 78 | 79 | for( a in 1:ind ) { 80 | if( vertical.line ) { 81 | lines( c( a + 0.5, a + 0.5 ), c( 0, ind + 1 ), lty = vertical.line.lty ) 82 | } 83 | if( horizontal.line ) { 84 | lines( c( 0, ind + 1 ), c( a + 0.5, a + 0.5 ), lty = horizontal.line.lty ) 85 | } 86 | if( !is.null( shortlabel ) ) { 87 | for( b in 1:ind ) { 88 | if( show.pvalue ){ 89 | text( a, b, ( round( bayes.pvalue, 2 ) )[a,b], cex = 0.5 ) 90 | } else { 91 | text( a, b, shortlabel[ind+1-b], cex = 0.7 ) 92 | } 93 | } 94 | } 95 | } 96 | invisible( list( pvalue = bayes.pvalue, significant = bayes.signif ) ) 97 | } 98 | 99 | mcplot <- multicomp.plot 100 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | importFrom(graphics, 2 | "abline", 3 | "axis", 4 | "box", 5 | "image", 6 | "layout", 7 | "lines", 8 | "par", 9 | "plot", 10 | "points", 11 | "polygon", 12 | "rect", 13 | "segments", 14 | "text", 15 | "title") 16 | 17 | importFrom(grDevices, 18 | "gray", 19 | "heat.colors", 20 | "rainbow") 21 | 22 | importFrom(methods, 23 | "as", 24 | "getMethod", 25 | "new", 26 | "setClass", 27 | "setOldClass", 28 | "show", 29 | "signature") 30 | 31 | 32 | importFrom(utils, 33 | "packageDescription", 34 | "read.fwf") 35 | 36 | 37 | 38 | importFrom(Matrix, 39 | "t", 40 | "crossprod", 41 | "tcrossprod", 42 | "colMeans", 43 | "Diagonal", 44 | "solve" 45 | ) 46 | 47 | importFrom(stats, 48 | ".getXlevels", 49 | ".checkMFClasses", "AIC", "as.formula", "binomial", "coefficients", "coef", 50 | "contrasts<-", "cor", "dcauchy", "delete.response", "deviance", "dlogis", 51 | "dnorm", "dt", "family", "fitted", "formula", "gaussian", "glm.control", 52 | "is.empty.model", "lm.fit", "logLik", "model.extract", "model.frame", "model.matrix", 53 | "model.matrix.default", "model.offset", "model.response", "model.weights", 54 | "na.exclude", "na.omit", "na.pass", "napredict", "optim", "predict", "pcauchy", 55 | "plogis", "pnorm", 56 | "qt", "rchisq", "rgamma", "rnorm", "sd", "terms", "terms.formula", "var", "vcov", 57 | "weighted.mean") 58 | 59 | 60 | importFrom(coda, 61 | "nvar", 62 | "varnames", 63 | "nchain" 64 | ) 65 | 66 | importFrom(MASS, 67 | "polr", 68 | "mvrnorm" 69 | ) 70 | 71 | 72 | importFrom(nlme, 73 | "fixef", 74 | "ranef", 75 | "VarCorr" 76 | 77 | ) 78 | 79 | importFrom(lme4, 80 | "getME", 81 | "isREML", 82 | "refitML" 83 | ) 84 | 85 | importFrom(abind, "abind") 86 | 87 | 88 | 89 | exportClasses( 90 | "balance", 91 | "bayesglm", 92 | "bayespolr", 93 | "sim", 94 | "sim.merMod" 95 | ) 96 | 97 | exportMethods( 98 | "coefplot", 99 | "display", 100 | "mcsamp", 101 | "se.coef", 102 | "sim", 103 | "print", 104 | "show", 105 | "standardize", 106 | "traceplot" 107 | ) 108 | 109 | export( 110 | "extractDIC", 111 | "balance", 112 | "bayesglm", 113 | "bayesglm.fit", 114 | "bayespolr", 115 | "binnedplot", 116 | "binned.resids", 117 | "coefplot", 118 | "coefplot.default", 119 | "contr.bayes.ordered", 120 | "contr.bayes.unordered", 121 | "corrplot", 122 | "display", 123 | "discrete.histogram", 124 | "discrete.hist", 125 | "fround", 126 | "G", 127 | "go", 128 | "invlogit", 129 | "logit", 130 | "matching", 131 | "mcsamp", 132 | "model.matrixBayes", 133 | "multicomp.plot", 134 | "mcplot", 135 | "pfround", 136 | "read.columns", 137 | "rescale", 138 | "residual.plot", 139 | "se.coef", 140 | "se.fixef", 141 | "se.ranef", 142 | "sigma.hat", 143 | "sim", 144 | "traceplot", 145 | "triangleplot" 146 | ) 147 | 148 | 149 | S3method(extractDIC, merMod) 150 | S3method(print, GO) 151 | S3method(plot, balance) 152 | S3method(print, balance) 153 | S3method(predict, bayesglm) 154 | S3method(coef, sim) 155 | S3method(coef, sim.polr) 156 | S3method(coef, sim.merMod) 157 | S3method(fitted, sim.merMod) 158 | S3method(fixef, sim.merMod) 159 | S3method(ranef, sim.merMod) 160 | S3method(sigma.hat, lm) 161 | S3method(sigma.hat, glm) 162 | S3method(sigma.hat, merMod) 163 | S3method(sigma.hat, sim) 164 | S3method(sigma.hat, sim.merMod) 165 | -------------------------------------------------------------------------------- /R/standardize.R: -------------------------------------------------------------------------------- 1 | standardize.default <- function(call, unchanged=NULL, 2 | standardize.y=FALSE, binary.inputs="center"){ 3 | form <- call$formula 4 | varnames <- all.vars (form) 5 | n.vars <- length (varnames) 6 | # 7 | # Decide which variables will be unchanged 8 | # 9 | transform <- rep ("leave.alone", n.vars) 10 | if (standardize.y) { 11 | transform[1] <- "full" 12 | } 13 | for (i in 2:n.vars){ 14 | v <- varnames[i] 15 | if (is.null(call$data)) { 16 | thedata <- get(v) 17 | } 18 | else { 19 | thedata <- get(as.character(call$data))[[v]] 20 | } 21 | if (is.na(match(v,unchanged))){ 22 | num.categories <- length (unique(thedata[!is.na(thedata)])) 23 | if (num.categories==2){ 24 | transform[i] <- binary.inputs 25 | } 26 | else if (num.categories>2 & is.numeric(thedata)){ 27 | transform[i] <- "full" 28 | } 29 | } 30 | } 31 | # 32 | # New variable names: 33 | # prefix with "c." if centered or "z." if centered and scaled 34 | # 35 | varnames.new <- ifelse (transform=="leave.alone", varnames, 36 | ifelse (transform=="full", paste ("z", varnames, sep="."), 37 | paste ("c", varnames, sep="."))) 38 | transformed.variables <- (1:n.vars)[transform!="leave.alone"] 39 | 40 | 41 | #Define the new variables 42 | if (is.null(call$data)) { 43 | for (i in transformed.variables) { 44 | assign(varnames.new[i], rescale(get(varnames[i]), binary.inputs)) 45 | } 46 | } 47 | else { 48 | newvars <- NULL 49 | for (i in transformed.variables) { 50 | assign(varnames.new[i], rescale(get(as.character(call$data))[[varnames[i]]], 51 | binary.inputs)) 52 | newvars <- cbind(newvars, get(varnames.new[i])) 53 | } 54 | assign(as.character(call$data), cbind(get(as.character(call$data)), newvars)) 55 | } 56 | 57 | # Now call the regression with the new variables 58 | 59 | call.new <- call 60 | L <- sapply (as.list (varnames.new), as.name) 61 | names(L) <- varnames 62 | call.new$formula <- do.call (substitute, list (form, L)) 63 | formula <- as.character (call.new$formula) 64 | if (length(formula)!=3) stop ("formula does not have three components") 65 | formula <- paste (formula[2],formula[1],formula[3]) 66 | formula <- gsub ("factor(z.", "factor(", formula, fixed=TRUE) 67 | formula <- gsub ("factor(c.", "factor(", formula, fixed=TRUE) 68 | call.new$formula <- as.formula (formula) 69 | return (eval (call.new)) 70 | } 71 | 72 | 73 | 74 | 75 | setMethod("standardize", signature(object = "lm"), 76 | function(object, unchanged=NULL, 77 | standardize.y=FALSE, binary.inputs="center") 78 | { 79 | call <- object$call 80 | out <- standardize.default(call=call, unchanged=unchanged, 81 | standardize.y=standardize.y, binary.inputs=binary.inputs) 82 | return(out) 83 | } 84 | ) 85 | 86 | setMethod("standardize", signature(object = "glm"), 87 | function(object, unchanged=NULL, 88 | standardize.y=FALSE, binary.inputs="center") 89 | { 90 | call <- object$call 91 | out <- standardize.default(call=call, unchanged=unchanged, 92 | standardize.y=standardize.y, binary.inputs=binary.inputs) 93 | return(out) 94 | } 95 | ) 96 | 97 | setMethod("standardize", signature(object = "polr"), 98 | function(object, unchanged=NULL, 99 | standardize.y=FALSE, binary.inputs="center") 100 | { 101 | call <- object$call 102 | out <- standardize.default(call=call, unchanged=unchanged, 103 | standardize.y=standardize.y, binary.inputs=binary.inputs) 104 | return(out) 105 | } 106 | ) 107 | 108 | 109 | 110 | setMethod("standardize", signature(object = "merMod"), 111 | function(object, unchanged=NULL, 112 | standardize.y=FALSE, binary.inputs="center") 113 | { 114 | call <- object@call 115 | out <- standardize.default(call=call, unchanged=unchanged, 116 | standardize.y=standardize.y, binary.inputs=binary.inputs) 117 | return(out) 118 | } 119 | ) 120 | -------------------------------------------------------------------------------- /R/se.coef.R: -------------------------------------------------------------------------------- 1 | setMethod("se.coef", signature(object = "lm"), 2 | function(object) 3 | { 4 | object.class <- class(object)[[1]] 5 | sqrt (diag(vcov(object))) 6 | } 7 | ) 8 | 9 | 10 | setMethod("se.coef", signature(object = "glm"), 11 | function(object) 12 | { 13 | object.class <- class(object)[[1]] 14 | sqrt (diag(vcov(object))) 15 | } 16 | ) 17 | 18 | #setMethod("se.coef", signature(object = "mer"), 19 | # function(object) 20 | # { 21 | # # if (sum(unlist(lapply(object@bVar, is.na)))>0){ 22 | ## object@call$control <- list(usePQL=TRUE) 23 | ## object <- lmer(object@call$formula) 24 | ## } 25 | # #ngrps <- lapply(object@flist, function(x) length(levels(x))) 26 | # fcoef <- fixef(object) 27 | # #sc <- attr (VarCorr (object), "sc") 28 | # corF <- vcov(object)@factors$correlation 29 | # se.unmodeled <- NULL 30 | # se.unmodeled[[1]] <- corF@sd 31 | # names (se.unmodeled) <- "unmodeled" 32 | # 33 | # #coef <- ranef (object) 34 | # #estimate <- ranef(object, postVar=TRUE) 35 | # coef <- ranef(object, postVar=TRUE) 36 | # se.bygroup <- coef #ranef( object, postVar = TRUE ) 37 | # n.groupings <- length (coef) 38 | # 39 | # for (m in 1:n.groupings){ 40 | # vars.m <- attr (coef[[m]], "postVar") 41 | # K <- dim(vars.m)[1] 42 | # J <- dim(vars.m)[3] 43 | # se.bygroup[[m]] <- array (NA, c(J,K)) 44 | # for (j in 1:J){ 45 | # se.bygroup[[m]][j,] <- sqrt(diag(as.matrix(vars.m[,,j]))) 46 | # } 47 | ## se.bygroup[[m]] <- se.bygroup[[m]]*sc 48 | # names.full <- dimnames (ranef(object)[[m]]) 49 | # dimnames (se.bygroup[[m]]) <- list (names.full[[1]], 50 | # names.full[[2]]) 51 | # } 52 | # #names(se.bygroup) <- names(ngrps) 53 | # ses <- c (se.unmodeled, se.bygroup) 54 | # return (ses) 55 | # } 56 | #) 57 | 58 | setMethod("se.coef", signature(object = "merMod"), 59 | function(object) 60 | { 61 | #ngrps <- lapply(object@flist, function(x) length(levels(x))) 62 | fcoef <- fixef(object) 63 | #sc <- attr (VarCorr (object), "sc") 64 | corF <- vcov(object)@factors$correlation 65 | se.unmodeled <- NULL 66 | se.unmodeled[[1]] <- corF@sd 67 | names (se.unmodeled) <- "fixef"#"unmodeled" 68 | 69 | #coef <- ranef (object) 70 | #estimate <- ranef(object, postVar=TRUE) 71 | coef <- ranef(object, condVar=TRUE) 72 | se.bygroup <- coef #ranef( object, postVar = TRUE ) 73 | n.groupings <- length (coef) 74 | 75 | for (m in 1:n.groupings){ 76 | vars.m <- attr (coef[[m]], "postVar") 77 | K <- dim(vars.m)[1] 78 | J <- dim(vars.m)[3] 79 | se.bygroup[[m]] <- array (NA, c(J,K)) 80 | for (j in 1:J){ 81 | se.bygroup[[m]][j,] <- sqrt(diag(as.matrix(vars.m[,,j]))) 82 | } 83 | # se.bygroup[[m]] <- se.bygroup[[m]]*sc 84 | names.full <- dimnames (coef[[m]]) 85 | dimnames (se.bygroup[[m]]) <- list (names.full[[1]], 86 | names.full[[2]]) 87 | } 88 | #names(se.bygroup) <- names(ngrps) 89 | ses <- c (se.unmodeled, se.bygroup) 90 | return (ses) 91 | } 92 | ) 93 | 94 | 95 | 96 | se.fixef <- function (object){ 97 | #object <- summary (object) 98 | fcoef.name <- names(fixef(object)) 99 | corF <- vcov(object)@factors$correlation 100 | ses <- corF@sd 101 | names(ses) <- fcoef.name 102 | return (ses) 103 | } 104 | 105 | se.ranef <- function (object){ 106 | #ngrps <- lapply(object@flist, function(x) length(levels(x))) 107 | se.bygroup <- ranef( object, condVar = TRUE ) 108 | n.groupings<- length( se.bygroup ) 109 | for( m in 1:n.groupings ) { 110 | vars.m <- attr( se.bygroup[[m]], "postVar" ) 111 | K <- dim(vars.m)[1] 112 | J <- dim(vars.m)[3] 113 | names.full <- dimnames(se.bygroup[[m]]) 114 | se.bygroup[[m]] <- array(NA, c(J, K)) 115 | for (j in 1:J) { 116 | se.bygroup[[m]][j, ] <- sqrt(diag(as.matrix(vars.m[, , j]))) 117 | } 118 | dimnames(se.bygroup[[m]]) <- list(names.full[[1]], names.full[[2]]) 119 | } 120 | return(se.bygroup) 121 | } 122 | -------------------------------------------------------------------------------- /man/balance.Rd: -------------------------------------------------------------------------------- 1 | \name{balance} 2 | \docType{class} 3 | % Classes 4 | \alias{balance-class} 5 | % Function 6 | \alias{balance} 7 | % display methods 8 | \alias{print.balance} 9 | \alias{plot.balance} 10 | 11 | \title{Functions to compute the balance statistics} 12 | \description{ 13 | This function computes the balance statistics before and after matching. 14 | } 15 | \usage{ 16 | balance(rawdata, treat, matched, estimand="ATT") 17 | 18 | \method{print}{balance}(x, \dots, combined = FALSE, digits = 2) 19 | 20 | \method{plot}{balance}(x, longcovnames=NULL, which.covs="mixed", 21 | v.axis=TRUE, cex.main=1, cex.vars=1, cex.pts=1, 22 | mar=c(4, 3, 5.1, 2), plot=TRUE, x.max = NULL, \ldots) 23 | } 24 | 25 | \arguments{ 26 | \item{rawdata}{The full covariate dataset} 27 | \item{treat}{the vector of treatment assignments for the full dataset} 28 | \item{matched}{vector of weights to apply to the full dataset to create the 29 | restructured data: for matching without replacement these will all be 0's and 1's; 30 | for one-to-one matching with replacement these will all be non-negative 31 | integers; for IPTW or more complicated matching methods these could be any non-negative numbers} 32 | \item{estimand}{can either be \code{ATT}, \code{ATC}, or \code{ATE}, default is \code{ATT}} 33 | \item{x}{an object return by the balance function.} 34 | \item{combined}{default is \code{FALSE}} 35 | \item{digits}{minimal number of \emph{significant} digits, default is 2.} 36 | \item{longcovnames}{long covariate names. If not provided, plot will 37 | use covariate variable name by default} 38 | \item{which.covs}{\code{mixed} then it plots all as std diffs; 39 | \code{binary} it only plots binary and as abs unstd diffs; 40 | \code{cont} it only plots non-binary and as abs std diffs} 41 | \item{v.axis}{default is \code{TRUE}, which shows the top axis--axis(3).} 42 | \item{cex.main}{font size of main title} 43 | \item{cex.vars}{font size of variabel names} 44 | \item{cex.pts}{point size of the estimates} 45 | \item{mar}{A numerical vector of the form \code{c(bottom, left, top, right)} 46 | which gives the number of lines of margin to be specified on 47 | the four sides of the plot. The default is \code{c(0,3,5.1,2)}.} 48 | \item{plot}{default is \code{TRUE}, which will plot the plot.} 49 | \item{x.max}{set the max of the \code{xlim}, default is \code{NULL}} 50 | \item{\dots}{other plot options may be passed to this function} 51 | } 52 | \details{ 53 | This function plots the balance statistics before and after matching. 54 | The open circle dots represent the unmatched balance statistics. The 55 | solid dots represent the matched balance statistics. The closer the value 56 | of the estimates to the zero, the better the treated and control groups are 57 | balanced after matching. 58 | } 59 | 60 | \note{ 61 | The function does not work with predictors that contain factor(x), log(x) or all 62 | other data transformation. Create new objects for these variables. Attach 63 | them into the original dataset before doing the matching procedure. 64 | } 65 | 66 | \references{Andrew Gelman and Jennifer Hill. (2006). 67 | \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models}. 68 | Cambridge University Press. (Chapter 10)} 69 | 70 | \author{Jennifer Hill \email{jennifer.hill@nyu.edu}; 71 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 72 | } 73 | 74 | \seealso{\code{\link{matching}}, 75 | \code{\link{par}} 76 | } 77 | 78 | \examples{ 79 | # matching first 80 | old.par <- par(no.readonly = TRUE) 81 | data(lalonde) 82 | attach(lalonde) 83 | fit <- glm(treat ~ re74 + re75 + age + factor(educ) + 84 | black + hisp + married + nodegr + u74 + u75, 85 | family=binomial(link="logit")) 86 | pscores <- predict(fit, type="link") 87 | matches <- matching(z=lalonde$treat, score=pscores) 88 | matched <- matches$cnts 89 | 90 | # balance check 91 | b.stats <- balance(lalonde, treat, matched, estimand = "ATT") 92 | print(b.stats) 93 | plot(b.stats) 94 | par(old.par) 95 | } 96 | 97 | 98 | \keyword{methods} 99 | \keyword{manip} 100 | \keyword{hplot} 101 | \keyword{dplot} 102 | -------------------------------------------------------------------------------- /R/matching.R: -------------------------------------------------------------------------------- 1 | ## 2019 version of matching function 2 | 3 | matching <- function(z, score, replace=FALSE){ 4 | # argument z is the vector of indicators for treatment or control # 5 | # argument score is the vector of the propensity scores in the # 6 | # same order as z # 7 | # THIS FUNCTION REQUIRES THE INFERENTIAL GROUP TO SATISFY Z=1 # 8 | # Group satisfying Z=1 will remain intact and matches for them will # 9 | # be found from among those satisfying Z=0 # 10 | # # 11 | # the function (potentially) returns several things # 12 | # 1) match.ind: a vector of indices that the corresponding unit is # 13 | # matched to. The length is equal to the number of unique IDs # 14 | # 2) cnts: shows the number of times each unit will be used in any # 15 | # subsequent analyses (1 for each treated unit and number of # 16 | # times used as a match for each control unit (equivalently the # 17 | # number of treated units it is matched to) # # # 18 | # 3a) pairs: indicator for each pair [only available for # 19 | # replace=TRUE] 20 | # OR 21 | # 3b) matches: a matrix capturing which treated observations 22 | # were matched to which controls [only for replace=FALSE] 23 | # # 24 | # Ties are broken through random sampling so set seed if you want # 25 | # to replicate results # 26 | ##################################################################### 27 | n <- length(score) 28 | nt <- sum(z) 29 | nc <- sum(1-z) 30 | ind.t <- c(1:n)[z==1] 31 | ind.c <- c(1:n)[z==0] 32 | cnts <- rep(0, n) 33 | cnts[z==1] = rep(1,nt) 34 | scorec <- score[z == 0] 35 | scoret <- score[z == 1] 36 | # matching with replacement 37 | if (replace){ 38 | # calculate distances between all pairs of units 39 | dist = abs(outer(scoret,scorec,FUN="-")) 40 | # find the identify the controls with the minimum distance from 41 | # each treated -- if there are ties, randomly pick one 42 | mins = apply(dist,1,min) 43 | # create a matrix with 1's for control columns matching the minimum 44 | # distance for the corresponding treatment rows 45 | matches = dist - mins 46 | matches[matches!=0] = 1 47 | matches = 1 - matches 48 | # if more than one control observation is chosen as a match for a given 49 | # treated we randomly chose which column to retain 50 | if(sum(matches)>nt){ 51 | # figure out which rows and then replace the multiple 1's with one 52 | # randomly chosen one 53 | for(i in c(1:nt)[apply(matches,1,sum)>1]){ 54 | matches_i <- c(1:nc)[matches[i,]==1] 55 | nmi <- length(matches_i) 56 | matches[i,matches_i] <- sample(c(1,rep(0,nmi-1)),nmi,replace=FALSE) 57 | } 58 | } 59 | # now fill in matched and ind.mt and pairs and counts 60 | ind.cm <- matches %*% ind.c 61 | # now record counts 62 | cnts[z==0] <- apply(matches,2,sum) 63 | # match indicators -- shouldn't be used for analysis 64 | match.ind <- c(ind.t, ind.cm) 65 | out <- list(match.ind = match.ind, cnts = cnts, matches = matches) 66 | } 67 | # matching *without* replacement 68 | if (!replace){ 69 | pairs = rep(NA,n) 70 | match.ind <- rep(0, n) 71 | tally <- 0 72 | for (i in ind.t) { 73 | ## DEAL WITH TIES IN A MORE PRINCIPLED WAY? -- can do by adding a second 74 | # argument to break ties that is random 75 | available <- (1:n)[(z == 0) & (match.ind == 0)] 76 | j <- available[order(abs(score[available] - score[i]))[1]] 77 | cnts[j] <- 1 78 | match.ind[i] <- j 79 | match.ind[j] <- i 80 | tally <- tally + 1 81 | pairs[c(i, j)] <- tally 82 | } 83 | #match.ind <- match.ind[match.ind!=0] 84 | out <- list(match.ind = match.ind, cnts = cnts, pairs = pairs) 85 | } 86 | return(out) 87 | } 88 | 89 | 90 | #pscores.fun <- function(treat=Z, outs=Y, covs=X){ 91 | # # 92 | # N <- nrow(covs) 93 | # nouts <- 1 94 | # ncovs <- ncol(covs) 95 | # # 96 | # # first set up places to store results 97 | # res <- matrix(0,nouts,2) 98 | # bal <- matrix(0,ncovs,2) 99 | # # 100 | # # estimate p-scores 101 | # dat <- cbind.data.frame(treat=treat,covs) 102 | # mod <- glm(dat,family=binomial(link="logit")) 103 | # qx <- predict(mod, type="response")#mod$linear 104 | # # 105 | # ### Now Matching With Replacement 106 | # matchout <- matching(z=treat, score=qx, replace=TRUE) 107 | # # 108 | # ### and treatment effect estimation with robust s.e.'s 109 | # wts <- rep(1, N) 110 | # wts[treat == 0] <- matchout$cnts 111 | # res <- .wls.all2(cbind(rep(1, sum(wts > 0)), treat[wts > 0],covs[wts > 0, ]), wts[wts > 0], outs[wts > 0], treat[wts > 0]) 112 | # c(res[3],sqrt(res[2])) 113 | #} 114 | -------------------------------------------------------------------------------- /man/mcsamp.Rd: -------------------------------------------------------------------------------- 1 | \name{mcsamp} 2 | %\docType{genericFunction} 3 | \alias{mcsamp} 4 | \alias{mcsamp.default} 5 | \alias{mcsamp,merMod-method} 6 | %\alias{mcsamp,glmer-method} 7 | 8 | \title{Generic Function to Run \sQuote{mcmcsamp()} in lme4} 9 | 10 | \description{ 11 | The quick function for MCMC sampling for 12 | lmer and glmer objects and convert to Bugs objects for easy display. 13 | } 14 | \usage{ 15 | \method{mcsamp}{default}(object, n.chains=3, n.iter=1000, n.burnin=floor(n.iter/2), 16 | n.thin=max(1, floor(n.chains * (n.iter - n.burnin)/1000)), 17 | saveb=TRUE, deviance=TRUE, make.bugs.object=TRUE) 18 | \S4method{mcsamp}{merMod} (object, ...) 19 | %\S4method{mcsamp}{glmer} (object, ...) 20 | } 21 | 22 | \arguments{ 23 | \item{object}{\code{mer} objects from \code{lme4}} 24 | \item{n.chains}{number of MCMC chains} 25 | \item{n.iter}{number of iteration for each MCMC chain} 26 | \item{n.burnin}{number of burnin for each MCMC chain, 27 | Default is \code{n.iter/2}, that is, discarding the 28 | first half of the simulations.} 29 | \item{n.thin}{keep every kth draw from each MCMC chain. Must be a positive integer. 30 | Default is \code{max(1, floor(n.chains * (n.iter-n.burnin) / 31 | 1000))} which will only thin if there are at least 2000 32 | simulations.} 33 | \item{saveb}{if 'TRUE', causes the values 34 | of the random effects in each sample to be saved.} 35 | \item{deviance}{compute deviance for \code{mer} objects. Only works 36 | for \code{\link[lme4]{lmer}} object} 37 | \item{make.bugs.object}{tranform the output into bugs object, default is TRUE} 38 | \item{\ldots}{further arguments passed to or from other methods.} 39 | } 40 | 41 | \details{ 42 | This function generates a sample from the posterior 43 | distribution of the parameters of a fitted model using Markov 44 | Chain Monte Carlo methods. It automatically simulates multiple 45 | sequences and allows convergence to be monitored. The function relies on 46 | \code{mcmcsamp} in \code{lme4}. 47 | } 48 | 49 | \value{ 50 | An object of (S3) class '"bugs"' suitable for use with the 51 | functions in the "R2WinBUGS" package. 52 | } 53 | 54 | \references{Andrew Gelman and Jennifer Hill, 55 | Data Analysis Using Regression and Multilevel/Hierarchical Models, 56 | Cambridge University Press, 2006. 57 | 58 | Douglas Bates and Deepayan Sarkar, 59 | lme4: Linear mixed-effects models using S4 classes. 60 | } 61 | 62 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 63 | Yu-Sung Su \email{ys463@columbia.edu} 64 | } 65 | 66 | \seealso{\code{\link{display}}, 67 | \code{\link[lme4]{lmer}}, 68 | \code{\link{sim}} 69 | } 70 | 71 | \examples{ 72 | ## Here's a simple example of a model of the form, y = a + bx + error, 73 | ## with 10 observations in each of 10 groups, and with both the intercept 74 | ## and the slope varying by group. First we set up the model and data. 75 | ## 76 | # group <- rep(1:10, rep(10,10)) 77 | # group2 <- rep(1:10, 10) 78 | # mu.a <- 0 79 | # sigma.a <- 2 80 | # mu.b <- 3 81 | # sigma.b <- 4 82 | # rho <- 0.56 83 | # Sigma.ab <- array (c(sigma.a^2, rho*sigma.a*sigma.b, 84 | # rho*sigma.a*sigma.b, sigma.b^2), c(2,2)) 85 | # sigma.y <- 1 86 | # ab <- mvrnorm (10, c(mu.a,mu.b), Sigma.ab) 87 | # a <- ab[,1] 88 | # b <- ab[,2] 89 | # d <- rnorm(10) 90 | # 91 | # x <- rnorm (100) 92 | # y1 <- rnorm (100, a[group] + b*x, sigma.y) 93 | # y2 <- rbinom(100, 1, prob=invlogit(a[group] + b*x)) 94 | # y3 <- rnorm (100, a[group] + b[group]*x + d[group2], sigma.y) 95 | # y4 <- rbinom(100, 1, prob=invlogit(a[group] + b*x + d[group2])) 96 | # 97 | ## 98 | ## Then fit and display a simple varying-intercept model: 99 | # 100 | # M1 <- lmer (y1 ~ x + (1|group)) 101 | # display (M1) 102 | # M1.sim <- mcsamp (M1) 103 | # print (M1.sim) 104 | # plot (M1.sim) 105 | ## 106 | ## Then the full varying-intercept, varying-slope model: 107 | ## 108 | # M2 <- lmer (y1 ~ x + (1 + x |group)) 109 | # display (M2) 110 | # M2.sim <- mcsamp (M2) 111 | # print (M2.sim) 112 | # plot (M2.sim) 113 | ## 114 | ## Then the full varying-intercept, logit model: 115 | ## 116 | # M3 <- lmer (y2 ~ x + (1|group), family=binomial(link="logit")) 117 | # display (M3) 118 | # M3.sim <- mcsamp (M3) 119 | # print (M3.sim) 120 | # plot (M3.sim) 121 | ## 122 | ## Then the full varying-intercept, varying-slope logit model: 123 | ## 124 | # M4 <- lmer (y2 ~ x + (1|group) + (0+x |group), 125 | # family=binomial(link="logit")) 126 | # display (M4) 127 | # M4.sim <- mcsamp (M4) 128 | # print (M4.sim) 129 | # plot (M4.sim) 130 | # 131 | ## 132 | ## Then non-nested varying-intercept, varying-slop model: 133 | ## 134 | # M5 <- lmer (y3 ~ x + (1 + x |group) + (1|group2)) 135 | # display(M5) 136 | # M5.sim <- mcsamp (M5) 137 | # print (M5.sim) 138 | # plot (M5.sim) 139 | 140 | } 141 | 142 | \keyword{models} 143 | \keyword{methods} 144 | -------------------------------------------------------------------------------- /man/sim.Rd: -------------------------------------------------------------------------------- 1 | \name{sim} 2 | %\docType{genericFunction} 3 | \alias{sim} 4 | \alias{sim-class} 5 | \alias{sim.merMod-class} 6 | \alias{sim,lm-method} 7 | \alias{sim,glm-method} 8 | \alias{sim,polr-method} 9 | \alias{sim,merMod-method} 10 | \alias{coef.sim} 11 | \alias{coef.sim.polr} 12 | \alias{coef.sim.merMod} 13 | \alias{fixef.sim.merMod} 14 | \alias{ranef.sim.merMod} 15 | \alias{fitted.sim.merMod} 16 | 17 | 18 | 19 | 20 | \title{Functions to Get Posterior Distributions} 21 | 22 | \description{ 23 | This generic function gets posterior simulations of sigma and beta from a \code{lm} object, or 24 | simulations of beta from a \code{glm} object, or 25 | simulations of beta from a \code{merMod} object 26 | } 27 | 28 | \usage{ 29 | sim(object, ...) 30 | 31 | \S4method{sim}{lm}(object, n.sims = 100) 32 | \S4method{sim}{glm}(object, n.sims = 100) 33 | \S4method{sim}{polr}(object, n.sims = 100) 34 | \S4method{sim}{merMod}(object, n.sims = 100) 35 | 36 | \method{coef}{sim}(object,\dots) 37 | \method{coef}{sim.polr}(object, slot=c("ALL", "coef", "zeta"),\dots) 38 | \method{coef}{sim.merMod}(object,\dots) 39 | \method{fixef}{sim.merMod}(object,\dots) 40 | \method{ranef}{sim.merMod}(object,\dots) 41 | \method{fitted}{sim.merMod}(object, regression,\dots) 42 | 43 | 44 | } 45 | 46 | \arguments{ 47 | \item{object}{the output of a call to \code{lm} with n data points and k predictors.} 48 | \item{slot}{return which slot of \code{sim.polr}, available options are \code{coef, zeta, ALL}.} 49 | \item{...}{further arguments passed to or from other methods.} 50 | \item{n.sims}{number of independent simulation draws to create.} 51 | \item{regression}{the orginial mer model} 52 | } 53 | 54 | \value{ 55 | \item{coef}{matrix (dimensions n.sims x k) of n.sims random draws of coefficients.} 56 | \item{zeta}{matrix (dimensions n.sims x k) of n.sims random draws of zetas (cut points in polr).} 57 | \item{fixef}{matrix (dimensions n.sims x k) of n.sims random draws of coefficients of the fixed effects for the \code{merMod} objects. Previously, it is called \code{unmodeled}.} 58 | \item{sigma}{vector of n.sims random draws of sigma 59 | (for \code{glm}'s, this just returns a vector of 1's or else of the 60 | square root of the overdispersion parameter if that is in the model)} 61 | } 62 | 63 | \references{Andrew Gelman and Jennifer Hill. (2006). 64 | \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models}. 65 | Cambridge University Press.} 66 | 67 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 68 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn}; 69 | Vincent Dorie \email{vjd4@nyu.edu} 70 | } 71 | 72 | \seealso{\code{\link{display}}, 73 | \code{\link{lm}}, 74 | \code{\link{glm}}, 75 | \code{\link[lme4]{lmer}} 76 | } 77 | 78 | \examples{ 79 | #Examples of "sim" 80 | set.seed (1) 81 | J <- 15 82 | n <- J*(J+1)/2 83 | group <- rep (1:J, 1:J) 84 | mu.a <- 5 85 | sigma.a <- 2 86 | a <- rnorm (J, mu.a, sigma.a) 87 | b <- -3 88 | x <- rnorm (n, 2, 1) 89 | sigma.y <- 6 90 | y <- rnorm (n, a[group] + b*x, sigma.y) 91 | u <- runif (J, 0, 3) 92 | y123.dat <- cbind (y, x, group) 93 | # Linear regression 94 | x1 <- y123.dat[,2] 95 | y1 <- y123.dat[,1] 96 | M1 <- lm (y1 ~ x1) 97 | display(M1) 98 | M1.sim <- sim(M1) 99 | coef.M1.sim <- coef(M1.sim) 100 | sigma.M1.sim <- sigma.hat(M1.sim) 101 | ## to get the uncertainty for the simulated estimates 102 | apply(coef(M1.sim), 2, quantile) 103 | quantile(sigma.hat(M1.sim)) 104 | 105 | # Logistic regression 106 | u.data <- cbind (1:J, u) 107 | dimnames(u.data)[[2]] <- c("group", "u") 108 | u.dat <- as.data.frame (u.data) 109 | y <- rbinom (n, 1, invlogit (a[group] + b*x)) 110 | M2 <- glm (y ~ x, family=binomial(link="logit")) 111 | display(M2) 112 | M2.sim <- sim (M2) 113 | coef.M2.sim <- coef(M2.sim) 114 | sigma.M2.sim <- sigma.hat(M2.sim) 115 | 116 | # Ordered Logistic regression 117 | house.plr <- polr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) 118 | display(house.plr) 119 | M.plr <- sim(house.plr) 120 | coef.sim <- coef(M.plr, slot="coef") 121 | zeta.sim <- coef(M.plr, slot="zeta") 122 | coefall.sim <- coef(M.plr) 123 | 124 | # Using lmer: 125 | # Example 1 126 | E1 <- lmer (y ~ x + (1 | group)) 127 | display(E1) 128 | E1.sim <- sim (E1) 129 | coef.E1.sim <- coef(E1.sim) 130 | fixef.E1.sim <- fixef(E1.sim) 131 | ranef.E1.sim <- ranef(E1.sim) 132 | sigma.E1.sim <- sigma.hat(E1.sim) 133 | yhat <- fitted(E1.sim, E1) 134 | 135 | # Example 2 136 | u.full <- u[group] 137 | E2 <- lmer (y ~ x + u.full + (1 | group)) 138 | display(E2) 139 | E2.sim <- sim (E2) 140 | coef.E2.sim <- coef(E2.sim) 141 | fixef.E2.sim <- fixef(E2.sim) 142 | ranef.E2.sim <- ranef(E2.sim) 143 | sigma.E2.sim <- sigma.hat(E2.sim) 144 | yhat <- fitted(E2.sim, E2) 145 | 146 | # Example 3 147 | y <- rbinom (n, 1, invlogit (a[group] + b*x)) 148 | E3 <- glmer (y ~ x + (1 | group), family=binomial(link="logit")) 149 | display(E3) 150 | E3.sim <- sim (E3) 151 | coef.E3.sim <- coef(E3.sim) 152 | fixef.E3.sim <- fixef(E3.sim) 153 | ranef.E3.sim <- ranef(E3.sim) 154 | sigma.E3.sim <- sigma.hat(E3.sim) 155 | yhat <- fitted(E3.sim, E3) 156 | } 157 | 158 | \keyword{models} 159 | \keyword{methods} 160 | -------------------------------------------------------------------------------- /R/simmer.R: -------------------------------------------------------------------------------- 1 | # simulations of sigma, fixef, and ranef drawn from a posterior 2 | # under a flat prior and conditioned on estimate of ranef covar 3 | setMethod("sim", signature(object = "merMod"), 4 | function(object, n.sims=100) 5 | { 6 | applyLeftFactor <- function(decomp, rhs) { 7 | c(as.vector(decomp$ul %*% rhs[ranefRange] + decomp$ur %*% rhs[fixefRange]), 8 | as.vector(decomp$lr %*% rhs[fixefRange])); 9 | } 10 | 11 | # information is conditional on hyperparameters 12 | # information is of [ranef, fixef] 13 | getInverseInformationLeftFactor <- function(regression) { 14 | Lz <- getME(regression, "L"); 15 | Rzx <- getME(regression, "RZX"); 16 | Rx <- getME(regression, "RX"); 17 | 18 | # upper left, lower right, and lower left blocks of left-factor of inverse 19 | #solveFunc <- getMethod("solve", signature(a = "CHMfactor", b = "diagonalMatrix")); 20 | #Rz.inv <- t(solveFunc(Lz, Diagonal(Lz@Dim[1]), "L")); 21 | Rz.inv <- t(solve(Lz, Diagonal(Lz@Dim[1]), system = "L")); 22 | Rx.inv <- solve(Rx); 23 | Rzx.inv <- -Rz.inv %*% Rzx %*% Rx.inv; 24 | 25 | # this is me figuring some stuff out. new lmer doesn't permute Zt apparently 26 | # 27 | #Lz.tmp <- as(Lz, "sparseMatrix"); 28 | #P.chol <- as(Lz@perm + 1, "pMatrix"); 29 | #Zt <- getME(regression, "Zt"); 30 | #W <- Diagonal(numObs, regression@resp$sqrtXwt); 31 | ## P.ranef <- getRanefPerm(regression); 32 | #Lambdat <- getME(regression, "Lambdat") # t(P.ranef) %*% getME(regression, "Lambdat") %*% P.ranef; 33 | #A <- Lambdat %*% Zt; 34 | #C <- A %*% W; 35 | #L.hyp <- Cholesky(tcrossprod(P.chol %*% C), Imult = 1, LDL = FALSE, perm = FALSE); 36 | #L.hyp@perm <- Lz@perm; 37 | #L.hyp@type[1] <- 2L; 38 | #browser(); 39 | 40 | #P.ranef <- getRanefPerm(model); 41 | #Lambda <- P.ranef %*% getRanefChol(model) %*% t(P.ranef); 42 | Lambda <- t(getME(regression, "Lambda")); 43 | 44 | return(list(ul = Lambda %*% Rz.inv, ur = Lambda %*% Rzx.inv, lr = Rx.inv)); 45 | } 46 | 47 | # assumes p(sigma^2) propto sigma^-2 48 | sampleCommonScale <- function(ignored) { 49 | return(sqrt(1 / rgamma(1, 0.5 * numDoF, 50 | 0.5 * devcomp$cmp[["pwrss"]]))); 51 | } 52 | 53 | regression <- object; 54 | devcomp <- getME(regression, "devcomp"); 55 | dims <- devcomp$dims; 56 | 57 | if (dims[["NLMM"]] != 0L) stop("sim not yet implemented for nlmms"); 58 | 59 | numObs <- dims[["n"]]; 60 | numRanef <- dims[["q"]]; 61 | numFixef <- dims[["p"]]; 62 | numLevels <- dims[["reTrms"]]; 63 | 64 | isLinearMixedModel <- dims[["GLMM"]] == 0L && dims[["NLMM"]] == 0L; 65 | numEffects <- numRanef + numFixef; 66 | numDoF <- numObs - numFixef; 67 | 68 | # pertain to simulations that we do all as a single vector 69 | ranefRange <- 1:numRanef; 70 | fixefRange <- numRanef + 1:numFixef; 71 | 72 | # stuff used to rearrange ranef into usable form 73 | groupsPerUniqueFactor <- lapply(regression@flist, levels); 74 | factorPerLevel <- attr(regression@flist, "assign"); 75 | 76 | coefficientNamesPerLevel <- regression@cnms; 77 | numCoefficientsPerLevel <- as.numeric(sapply(coefficientNamesPerLevel, length)); 78 | numGroupsPerLevel <- as.numeric(sapply(groupsPerUniqueFactor[factorPerLevel], length)); 79 | numRanefsPerLevel <- numCoefficientsPerLevel * numGroupsPerLevel; 80 | ranefLevelMap <- rep.int(seq_along(numRanefsPerLevel), numRanefsPerLevel); 81 | 82 | # storage for sims 83 | simulatedSD <- if (isLinearMixedModel) { rep(NA, n.sims); } else { NA }; 84 | simulatedRanef <- vector("list", numLevels); 85 | names(simulatedRanef) <- names(regression@cnms); 86 | for (i in 1:numLevels) { 87 | simulatedRanef[[i]] <- array(NA, c(n.sims, numGroupsPerLevel[i], numCoefficientsPerLevel[i]), 88 | list(NULL, groupsPerUniqueFactor[[factorPerLevel[i]]], coefficientNamesPerLevel[[i]])); 89 | } 90 | 91 | simulatedFixef <- matrix(NA, n.sims, numFixef, 92 | dimnames = list(NULL, names(fixef(regression)))); 93 | 94 | 95 | # "b" are the rotated random effects, i.e. what ranef() returns in 96 | # a rearranged format. 97 | effectsMean <- c(getME(regression, "b")@x, getME(regression, "beta")); 98 | effectsCovLeftFactor <- getInverseInformationLeftFactor(regression); 99 | 100 | for (i in 1:n.sims) { 101 | if (isLinearMixedModel) { 102 | simulatedSD[i] <- sampleCommonScale(regression); 103 | sphericalEffects <- rnorm(numEffects, 0, simulatedSD[i]); 104 | } else { 105 | sphericalEffects <- rnorm(numEffects); 106 | } 107 | simulatedEffects <- applyLeftFactor(effectsCovLeftFactor, sphericalEffects) + effectsMean; 108 | 109 | simulatedFixef[i,] <- simulatedEffects[fixefRange]; 110 | 111 | rawRanef <- simulatedEffects[ranefRange]; 112 | simulatedRanefPerLevel <- split(rawRanef, ranefLevelMap); 113 | for (k in 1:numLevels) { 114 | simulatedRanef[[k]][i,,] <- matrix(simulatedRanefPerLevel[[k]], ncol = numCoefficientsPerLevel[k], byrow = TRUE); 115 | } 116 | } 117 | 118 | ans <- new("sim.merMod", 119 | "fixef" = simulatedFixef, 120 | "ranef" = simulatedRanef, 121 | "sigma" = simulatedSD); 122 | return(ans); 123 | }); 124 | -------------------------------------------------------------------------------- /R/triangleplot.R: -------------------------------------------------------------------------------- 1 | 2 | triangleplot <- function (x, y = NULL, cutpts = NULL, details = TRUE, n.col.legend = 5, 3 | cex.col = 0.7, cex.var = 0.9, digits = 1, color = FALSE) 4 | { 5 | if (!is.matrix(x)) 6 | stop("x must be a matrix!") 7 | if (dim(x)[1] != dim(x)[2]) 8 | stop("x must be a square matrix!") 9 | x.na <- x 10 | x.na[is.na(x.na)] <- -999 11 | z.plot <- x 12 | if (is.null(y)) { 13 | z.names <- dimnames(x)[[2]] 14 | } 15 | else { 16 | z.names <- y 17 | } 18 | for (i in 1:dim(z.plot)[1]) for (j in i:dim(z.plot)[2]) z.plot[i, 19 | j] <- NA 20 | layout(matrix(c(2, 1), 1, 2, byrow = FALSE), c(10.5, 1.5)) 21 | layout(matrix(c(2, 1), 1, 2, byrow = FALSE), c(10.5, 1.5)) 22 | if (is.null(cutpts)) { 23 | if (details) { 24 | neg.check <- abs(sum(z.plot[z.plot < 0], na.rm = T)) 25 | if (neg.check > 0) { 26 | z.breaks <- sort(c(0, seq(min(z.plot, na.rm = T), 27 | max(z.plot, na.rm = T), length = n.col.legend))) 28 | } 29 | else { 30 | z.breaks <- seq(min(z.plot, na.rm = T), max(z.plot, 31 | na.rm = T), length = n.col.legend + 1) 32 | } 33 | for (i in 1:4) { 34 | n1 <- length(unique(round(z.breaks, digits = digits))) 35 | n2 <- length(z.breaks) 36 | ifelse((n1 != n2), digits <- digits + 1, digits <- digits) 37 | } 38 | if (digits > 3) { 39 | stop("Too many digits! Try to adjust n.col.legend to get better presentation!") 40 | } 41 | } 42 | else { 43 | postive.z <- na.exclude(unique(round(z.plot[z.plot > 44 | 0], digits = digits))) 45 | neg.check <- abs(sum(z.plot[z.plot < 0], na.rm = T)) 46 | ifelse(neg.check > 0, negative.z <- na.exclude(unique(round(z.plot[z.plot < 47 | 0], digits = digits))), negative.z <- 0) 48 | max.z <- max(z.plot, na.rm = T) 49 | min.z <- min(z.plot, na.rm = T) 50 | z.breaks <- sort(unique(c(postive.z, negative.z))) 51 | n.breaks <- length(z.breaks) 52 | l.legend <- ceiling(n.col.legend/2) 53 | if (n.breaks > 8) { 54 | if (neg.check > 0) { 55 | postive.z <- seq(0, max(postive.z), length = l.legend + 56 | 1) 57 | negative.z <- seq(min(negative.z), 0, length = l.legend) 58 | z.breaks <- sort(unique(c(postive.z, negative.z))) 59 | n.breaks <- length(z.breaks) 60 | z.breaks[1] <- min.z 61 | z.breaks[n.breaks] <- max.z 62 | n.col.legend <- length(z.breaks) - 1 63 | } 64 | else { 65 | postive.z <- seq(0, max(postive.z), length = n.col.legend + 66 | 1) 67 | z.breaks <- sort(unique(c(postive.z, negative.z))) 68 | n.breaks <- length(z.breaks) 69 | z.breaks[1] <- min.z 70 | z.breaks[n.breaks] <- max.z 71 | n.col.legend <- length(z.breaks) - 1 72 | } 73 | } 74 | else { 75 | if (neg.check > 0) { 76 | z.breaks <- sort(c(0, seq(min(z.plot, na.rm = T), 77 | max(z.plot, na.rm = T), length = n.col.legend))) 78 | } 79 | else { 80 | z.breaks <- seq(min(z.plot, na.rm = T), max(z.plot, 81 | na.rm = T), length = n.col.legend + 1) 82 | } 83 | } 84 | } 85 | } 86 | if (!is.null(cutpts)) { 87 | z.breaks = cutpts 88 | n.breaks <- length(z.breaks) 89 | n.col.legend <- length(z.breaks) - 1 90 | } 91 | if (color) { 92 | z.colors <- heat.colors(n.col.legend)[n.col.legend:1] 93 | } 94 | else { 95 | z.colors <- gray(n.col.legend:1/n.col.legend) 96 | } 97 | par(mar = c(0.5, 0.1, 2, 0.1), pty = "m") 98 | plot(c(0, 1), c(min(z.breaks), max(z.breaks)), type = "n", 99 | bty = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n") 100 | for (i in 2:(length(z.breaks))) { 101 | rect(xleft = 0.5, ybottom = z.breaks[i - 1], xright = 1, 102 | ytop = z.breaks[i], col = z.colors[i - 1]) 103 | text(x = 0.45, y = z.breaks[i - 1], labels = format(round(z.breaks[i - 104 | 1], digits)), cex = cex.col, adj = 1, xpd = TRUE) 105 | } 106 | rect(xleft = 0.5, ybottom = z.breaks[length(z.breaks)], xright = 1, 107 | ytop = z.breaks[length(z.breaks)], col = z.colors[length(z.colors)]) 108 | text(x = 0.45, y = z.breaks[length(z.breaks)], labels = format(round(z.breaks[length(z.breaks)], 109 | digits)), cex = cex.col, adj = 1, xpd = TRUE) 110 | par(mar = c(0.1, 0.1, 2, 0.1), pty = "m") 111 | image(x = 1:dim(z.plot)[1], y = 1:dim(z.plot)[2], z = z.plot, 112 | xaxt = "n", yaxt = "n", bty = "n", col = z.colors, breaks = z.breaks, 113 | xlim = c(-2, dim(z.plot)[1] + 0.5), ylim = c(-1, dim(z.plot)[2] + 114 | 0.5), xlab = "", ylab = "") 115 | text(x = 1:dim(z.plot)[1], y = 1:dim(z.plot)[2], labels = z.names, 116 | cex = cex.var, adj = 1, xpd = TRUE) 117 | for (i in 1:dim(z.plot)[1]) { 118 | for (j in i:dim(z.plot)[2]) { 119 | if (x.na[i, j] == -999 & i != j) 120 | points(x = j, y = i, pch = "x", cex = 0.9) 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /man/display.Rd: -------------------------------------------------------------------------------- 1 | \name{display} 2 | %\docType{genericFunction} 3 | \alias{display} 4 | \alias{display,lm-method} 5 | \alias{display,bayesglm-method} 6 | %\alias{display,bayesglm.h-method} 7 | \alias{display,glm-method} 8 | \alias{display,merMod-method} 9 | \alias{display,polr-method} 10 | \alias{display,svyglm-method} 11 | 12 | \title{Functions for Processing lm, glm, mer, polr and svyglm Output} 13 | 14 | \description{This generic function gives a 15 | clean printout of lm, glm, mer, polr and svyglm objects.} 16 | 17 | \usage{ 18 | display (object, ...) 19 | 20 | \S4method{display}{lm}(object, digits=2, detail=FALSE) 21 | \S4method{display}{bayesglm}(object, digits=2, detail=FALSE) 22 | %\S4method{display}{bayesglm.h}(object, digits=2, detail=FALSE) 23 | \S4method{display}{glm}(object, digits=2, detail=FALSE) 24 | \S4method{display}{merMod}(object, digits=2, detail=FALSE) 25 | \S4method{display}{polr}(object, digits=2, detail=FALSE) 26 | \S4method{display}{svyglm}(object, digits=2, detail=FALSE) 27 | } 28 | 29 | \arguments{ 30 | \item{object}{The output of a call to lm, glm, mer, polr, svyglm or related regressions 31 | function with n data points and k predictors.} 32 | \item{...}{further arguments passed to or from other methods.} 33 | \item{digits}{number of significant digits to display.} 34 | \item{detail}{defaul is \code{FALSE}, if \code{TRUE}, display p-values or z-values} 35 | } 36 | 37 | \details{This generic function gives a 38 | clean printout of lm, glm, mer and polr objects, focusing 39 | on the most pertinent pieces of information: 40 | the coefficients and their standard errors, the sample size, 41 | number of predictors, residual standard deviation, and R-squared. 42 | Note: R-squared is automatically displayed to 2 digits, and deviances are 43 | automatically displayed to 1 digit, no matter what. 44 | } 45 | 46 | \value{Coefficients 47 | and their standard errors, the sample size, number of predictors, 48 | residual standard 49 | deviation, and R-squared} 50 | 51 | \references{Andrew Gelman and Jennifer Hill, 52 | Data Analysis Using Regression and Multilevel/Hierarchical Models, 53 | Cambridge University Press, 2006.} 54 | 55 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 56 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn}; 57 | Maria Grazia Pittau \email{grazia@stat.columbia.edu} 58 | } 59 | 60 | \note{Output are the model, the regression coefficients and standard errors, 61 | and the residual sd and R-squared (for a linear model), 62 | or the null deviance and residual deviance (for a generalized linear model). 63 | } 64 | 65 | \seealso{\code{\link[base]{summary}}, 66 | \code{\link[stats]{lm}}, 67 | \code{\link[stats]{glm}}, 68 | \code{\link[lme4]{lmer}}, 69 | \code{\link[MASS]{polr}}, 70 | \code{\link[survey]{svyglm}} 71 | } 72 | 73 | \examples{ 74 | # Here's a simple example of a model of the form, y = a + bx + error, 75 | # with 10 observations in each of 10 groups, and with both the 76 | # intercept and the slope varying by group. First we set up the model and data. 77 | group <- rep(1:10, rep(10,10)) 78 | group2 <- rep(1:10, 10) 79 | mu.a <- 0 80 | sigma.a <- 2 81 | mu.b <- 3 82 | sigma.b <- 4 83 | rho <- 0.56 84 | Sigma.ab <- array (c(sigma.a^2, rho*sigma.a*sigma.b, 85 | rho*sigma.a*sigma.b, sigma.b^2), c(2,2)) 86 | sigma.y <- 1 87 | ab <- mvrnorm (10, c(mu.a,mu.b), Sigma.ab) 88 | a <- ab[,1] 89 | b <- ab[,2] 90 | d <- rnorm(10) 91 | 92 | x <- rnorm (100) 93 | y1 <- rnorm (100, a[group] + b*x, sigma.y) 94 | y2 <- rbinom(100, 1, prob=invlogit(a[group] + b*x)) 95 | y3 <- rnorm (100, a[group] + b[group]*x + d[group2], sigma.y) 96 | y4 <- rbinom(100, 1, prob=invlogit(a[group] + b*x + d[group2])) 97 | 98 | 99 | # display a simple linear model 100 | 101 | M1 <- lm (y1 ~ x) 102 | display (M1) 103 | M1.sim <- sim(M1, n.sims=2) 104 | 105 | # display a simple logit model 106 | 107 | M2 <- glm (y2 ~ x, family=binomial(link="logit")) 108 | display (M2) 109 | M2.sim <- sim(M2, n.sims=2) 110 | 111 | # Then fit and display a simple varying-intercept model: 112 | 113 | M3 <- lmer (y1 ~ x + (1|group)) 114 | display (M3) 115 | M3.sim <- sim(M3, n.sims=2) 116 | 117 | 118 | # Then the full varying-intercept, varying-slope model: 119 | 120 | M4 <- lmer (y1 ~ x + (1 + x |group)) 121 | display (M4) 122 | M4.sim <- sim(M4, n.sims=2) 123 | 124 | 125 | # Then the full varying-intercept, logit model: 126 | 127 | M5 <- glmer (y2 ~ x + (1|group), family=binomial(link="logit")) 128 | display (M5) 129 | M5.sim <- sim(M5, n.sims=2) 130 | 131 | 132 | # Then the full varying-intercept, varying-slope logit model: 133 | 134 | M6 <- glmer (y2 ~ x + (1|group) + (0 + x |group), 135 | family=binomial(link="logit")) 136 | display (M6) 137 | M6.sim <- sim(M6, n.sims=2) 138 | 139 | 140 | # Then non-nested varying-intercept, varying-slop model: 141 | 142 | M7 <- lmer (y3 ~ x + (1 + x |group) + (1|group2)) 143 | display(M7) 144 | M7.sim <- sim(M7, n.sims=2) 145 | 146 | 147 | # Then the ordered logit model from polr 148 | 149 | M8 <- polr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) 150 | display(M8) 151 | 152 | M9 <- bayespolr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) 153 | display(M9) 154 | 155 | 156 | } 157 | 158 | 159 | \keyword{manip} 160 | \keyword{methods} 161 | -------------------------------------------------------------------------------- /R/mcsamp.R: -------------------------------------------------------------------------------- 1 | # mcsamp function (wrapper for mcmcsamp in lmer()) 2 | # Quick function to run mcmcsamp() [the function for MCMC sampling for 3 | # lmer objects) and convert to Bugs objects for easy display 4 | 5 | mcsamp.default <- function (object, n.chains=3, n.iter=1000, n.burnin=floor(n.iter/2), 6 | n.thin=max(1, floor(n.chains * (n.iter - n.burnin)/1000)), 7 | saveb=TRUE, deviance=TRUE, make.bugs.object=TRUE) 8 | { 9 | cat("mcsamp() used to be a wrapper for mcmcsamp() in lme4.\nCurrently, mcmcsamp() is no longer available in lme4.\nSo in the meantime, we suggest that users use sim() to get\nsimulated estimates.\n") 10 | } 11 | 12 | 13 | 14 | #mcsamp.default <- function (object, n.chains=3, n.iter=1000, n.burnin=floor(n.iter/2), 15 | # n.thin=max(1, floor(n.chains * (n.iter - n.burnin)/1000)), 16 | # saveb=TRUE, deviance=TRUE, make.bugs.object=TRUE) 17 | #{ 18 | # 19 | # if (n.chains<2) stop ("n.chains must be at least 2") 20 | # n.keep <- n.iter - n.burnin 21 | # first.chain <- mcmcsamp (object, n.iter, saveb=saveb, trans=TRUE, deviance=deviance)[(n.burnin+1):n.iter,] 22 | # n.parameters <- ncol(first.chain) 23 | # 24 | # if (deviance) { 25 | # sims <- array (NA, c(n.keep, n.chains, n.parameters+1)) 26 | # } 27 | # if (!deviance){ 28 | # sims <- array (NA, c(n.keep, n.chains, n.parameters)) 29 | # } 30 | # 31 | # pred.names <- attr(terms(object), "term.labels") 32 | # par.names <- dimnames(first.chain)[[2]] 33 | # par.names <- gsub("b.", "b@", par.names, ignore.case = FALSE, # Su: rename "b.*" to "" 34 | # extended = TRUE, perl = FALSE, 35 | # fixed = TRUE, useBytes = FALSE) 36 | # par.names <- gsub("b@.*", "", par.names, ignore.case = FALSE, 37 | # extended = TRUE, perl = FALSE, 38 | # fixed = FALSE) 39 | # par.names <- par.names[is.na(match(par.names,""))] 40 | # name.chk.idx <- as.logical(match(par.names, pred.names, nomatch=0)) 41 | # par.names[name.chk.idx] <- paste("beta", par.names[name.chk.idx], sep=".") 42 | # 43 | # if (saveb){ 44 | # b.hat <- se.coef (object) # Su: use se.coef() 45 | # n.groupings <- length(b.hat) - 1 46 | # J <- NA 47 | # K <- NA 48 | # for (m in 1:n.groupings){ 49 | # J[m] <- dim(b.hat[[m+1]])[1] 50 | # K[m] <- dim(b.hat[[m+1]])[2] 51 | # var.names <- paste (abbreviate(names(b.hat)[m+1],4), ".", 52 | # unlist (dimnames(b.hat[[m+1]])[2]), sep="") ##sep="." 53 | # par.names <- c (par.names, 54 | # paste (rep(var.names,J[m]), "[", rep(1:J[m],each=K[m]), "]", sep="")) 55 | # } 56 | # } 57 | # sims[,1,1:n.parameters] <- first.chain 58 | # 59 | # for (k in 2:n.chains){ 60 | # sims[,k,1:n.parameters] <- mcmcsamp (object, n.iter, saveb=saveb, trans=TRUE, deviance=deviance)[(n.burnin+1):n.iter,] 61 | # } 62 | # 63 | # select <- c(rep(FALSE, n.thin-1),TRUE) 64 | # sims <- sims[select,,] 65 | # 66 | # for (j in 1:n.parameters){ 67 | # if (pmatch("log(sigma^2)", par.names[j], nomatch=0)){#=="log(sigma^2)"){ 68 | # par.names[j] <- "sigma.y" 69 | # sims[,,j] <- exp (sims[,,j]/2) 70 | # } 71 | # else if (pmatch("log(", par.names[j], nomatch=0)){#(substr(par.names[j],1,4)=="log("){ 72 | # par.names[j] <- paste ("sigma.", substr(par.names[j], 5, nchar(par.names[j])-1), sep="") 73 | # sims[,,j] <- exp (sims[,,j]/2) 74 | # } 75 | # else if (pmatch("atanh(", par.names[j], nomatch=0)){#(substr(par.names[j],1,6)=="atanh("){ 76 | # par.names[j] <- paste ("rho.", substr(par.names[j], 7, nchar(par.names[j])-1), sep="") 77 | # sims[,,j] <- tanh (sims[,,j]) 78 | # } 79 | # #else if (substr(par.names[j],1,4)=="eta."){#(pmatch("eta.", par.names[j], nomatch=0)){#(substr(par.names[j],1,4)=="eta."){ 80 | # # par.names[j] <- paste ("", substr(par.names[j], 5, nchar(par.names[j])), sep="") 81 | # # par.names[j] <- par.names[j] 82 | # #} 83 | # else if (pmatch("deviance", par.names[j], nomatch=0)){#(par.names[j]=="deviance"){ # Su: keep par.names for "deviance" 84 | # sims[,,n.parameters+1] <- sims[,,j] 85 | # sims <- sims[,,-j] # Su: delete deviance value from sims 86 | # } 87 | ## else { 88 | ## } 89 | # } 90 | # par.names <- gsub("(", "", par.names, ignore.case = FALSE, 91 | # extended = TRUE, perl = FALSE, 92 | # fixed = TRUE, useBytes = FALSE) 93 | # par.names <- gsub(")", "", par.names, ignore.case = FALSE, 94 | # extended = TRUE, perl = FALSE, 95 | # fixed = TRUE, useBytes = FALSE) 96 | # # par.names <- gsub(".Intercept", ".Int", par.names, ignore.case = FALSE, 97 | ## extended = TRUE, perl = FALSE, 98 | ## fixed = TRUE, useBytes = FALSE) 99 | # par.names <- gsub("rescale", "z.", par.names, ignore.case = FALSE, 100 | # extended = TRUE, perl = FALSE, 101 | # fixed = TRUE, useBytes = FALSE) 102 | # 103 | # par.names <- par.names[is.na(match(par.names,"deviance"))] # Su: delete par.names for "deviance" 104 | # 105 | # if (deviance){ 106 | # dimnames(sims) <- list (NULL, NULL, c(par.names,"deviance")) 107 | # } 108 | # if (!deviance){ 109 | # dimnames(sims) <- list (NULL, NULL, par.names) 110 | # } 111 | # if (make.bugs.object){ 112 | # return (as.bugs.array (sims, program="lmer", n.iter=n.iter, n.burnin=n.burnin, n.thin=n.thin, DIC=deviance)) 113 | # } 114 | # else { 115 | # return (sims) 116 | # } 117 | #} 118 | # 119 | # 120 | # 121 | setMethod("mcsamp", signature(object = "merMod"), 122 | function (object, ...) 123 | { 124 | mcsamp.default(object, deviance=TRUE, ...) 125 | } 126 | ) 127 | # 128 | #setMethod("mcsamp", signature(object = "glmer"), 129 | # function (object, ...) 130 | #{ 131 | # mcsamp.default(object, deviance=FALSE, ...) 132 | #} 133 | #) 134 | -------------------------------------------------------------------------------- /R/sim.R: -------------------------------------------------------------------------------- 1 | setMethod("sim", signature(object = "lm"), 2 | function(object, n.sims=100) 3 | { 4 | object.class <- class(object)[[1]] 5 | summ <- summary (object) 6 | coef <- summ$coef[,1:2,drop=FALSE] 7 | dimnames(coef)[[2]] <- c("coef.est","coef.sd") 8 | sigma.hat <- summ$sigma 9 | beta.hat <- coef[,1,drop = FALSE] 10 | V.beta <- summ$cov.unscaled 11 | n <- summ$df[1] + summ$df[2] 12 | k <- summ$df[1] 13 | sigma <- rep (NA, n.sims) 14 | beta <- array (NA, c(n.sims,k)) 15 | dimnames(beta) <- list (NULL, rownames(beta.hat)) 16 | for (s in 1:n.sims){ 17 | sigma[s] <- sigma.hat*sqrt((n-k)/rchisq(1,n-k)) 18 | beta[s,] <- MASS::mvrnorm (1, beta.hat, V.beta*sigma[s]^2) 19 | } 20 | 21 | ans <- new("sim", 22 | coef = beta, 23 | sigma = sigma) 24 | return (ans) 25 | } 26 | ) 27 | 28 | 29 | 30 | setMethod("sim", signature(object = "glm"), 31 | function(object, n.sims=100) 32 | { 33 | object.class <- class(object)[[1]] 34 | summ <- summary (object, correlation=TRUE, dispersion = object$dispersion) 35 | coef <- summ$coef[,1:2,drop=FALSE] 36 | dimnames(coef)[[2]] <- c("coef.est","coef.sd") 37 | beta.hat <- coef[,1,drop=FALSE] 38 | sd.beta <- coef[,2,drop=FALSE] 39 | corr.beta <- summ$corr 40 | n <- summ$df[1] + summ$df[2] 41 | k <- summ$df[1] 42 | V.beta <- corr.beta * array(sd.beta,c(k,k)) * t(array(sd.beta,c(k,k))) 43 | #beta <- array (NA, c(n.sims,k)) 44 | # dimnames(beta) <- list (NULL, dimnames(beta.hat)[[1]]) 45 | # for (s in 1:n.sims){ 46 | # beta[s,] <- MASS::mvrnorm (1, beta.hat, V.beta) 47 | # } 48 | beta <- MASS::mvrnorm (n.sims, beta.hat, V.beta) 49 | # Added by Masanao 50 | beta2 <- array (0, c(n.sims,length(coefficients(object)))) 51 | dimnames(beta2) <- list (NULL, names(coefficients(object))) 52 | beta2[,dimnames(beta2)[[2]]%in%dimnames(beta)[[2]]] <- beta 53 | # Added by Masanao 54 | sigma <- rep (sqrt(summ$dispersion), n.sims) 55 | 56 | ans <- new("sim", 57 | coef = beta2, 58 | sigma = sigma) 59 | return(ans) 60 | } 61 | ) 62 | 63 | 64 | 65 | 66 | 67 | setMethod("sim", signature(object = "polr"), 68 | function(object, n.sims=100){ 69 | x <- as.matrix(model.matrix(object)) 70 | coefs <- coef(object) 71 | k <- length(coefs) 72 | zeta <- object$zeta 73 | Sigma <- vcov(object) 74 | 75 | if(n.sims==1){ 76 | parameters <- t(MASS::mvrnorm(n.sims, c(coefs, zeta), Sigma)) 77 | }else{ 78 | parameters <- MASS::mvrnorm(n.sims, c(coefs, zeta), Sigma) 79 | } 80 | ans <- new("sim.polr", 81 | coef = parameters[,1:k,drop=FALSE], 82 | zeta = parameters[,-(1:k),drop=FALSE]) 83 | return(ans) 84 | }) 85 | 86 | 87 | 88 | #setMethod("sim", signature(object = "mer"), 89 | # function(object, n.sims=100) 90 | # { 91 | # #object <- summary(object) 92 | ## if (lapply(object@bVar,sum)<=0|sum(unlist(lapply(object@bVar, is.na)))>0){ 93 | ## object@call$control <- list(usePQL=TRUE) 94 | ## object <- lmer(object@call$formula) 95 | # #} 96 | # #sc <- attr (VarCorr (object), "sc") 97 | # # simulate unmodeled coefficients 98 | # 99 | # fcoef <- fixef(object) 100 | # corF <- vcov(object)@factors$correlation 101 | # se.unmodeled <- corF@sd 102 | # V.beta <- (se.unmodeled %o% se.unmodeled) * as.matrix(corF) 103 | # beta.unmodeled <- NULL 104 | # if (length (fcoef) > 0){ 105 | # beta.unmodeled[[1]] <- mvrnorm (n.sims, fcoef, V.beta) 106 | # names (beta.unmodeled) <- "unmodeled" 107 | # } 108 | # # simulate coefficients within groups 109 | # #coef <- ranef (object) 110 | # #estimate <- ranef(object, postVar=TRUE) 111 | # #vars <- object@bVar 112 | # #beta.bygroup <- vars 113 | # 114 | # sc <- attr (VarCorr (object), "sc") 115 | # coef <- ranef(object, postVar=TRUE) 116 | # beta.bygroup <- c(coef) 117 | # n.groupings <- length (coef) 118 | # for (m in 1:n.groupings){ 119 | # #vars.m <- vars[[m]] 120 | # vars.m <- attr (coef[[m]], "postVar") 121 | # K <- dim(vars.m)[1] 122 | # J <- dim(vars.m)[3] 123 | # beta.bygroup[[m]] <- array (NA, c(n.sims, J, K)) 124 | # bhat <- coef[[m]] 125 | # for (j in 1:J){ 126 | # V.beta <- untriangle(vars.m[,,j])#*sc^2 127 | # beta.bygroup[[m]][,j,] <- mvrnorm (n.sims, bhat[j,], V.beta) 128 | # } 129 | # dimnames (beta.bygroup[[m]]) <- c (list(NULL), dimnames(bhat)) 130 | # } 131 | # betas <- c (beta.unmodeled, beta.bygroup) 132 | # return (betas) 133 | # } 134 | #) 135 | 136 | #setMethod("sim", signature(object = "mer"), 137 | # function(object, n.sims=100, ranef=TRUE) 138 | # { 139 | # # simulate unmodeled coefficients 140 | # fcoef <- fixef(object) 141 | # corF <- vcov(object)@factors$correlation 142 | # se.unmodeled <- corF@sd 143 | # V.beta <- (se.unmodeled %o% se.unmodeled) * as.matrix(corF) 144 | # beta.unmodeled <- NULL 145 | # if (length (fcoef) > 0){ 146 | # beta.unmodeled[[1]] <- mvrnorm (n.sims, fcoef, V.beta) 147 | # names (beta.unmodeled) <- "fixef"#"unmodeled" 148 | # coef <- beta.unmodeled 149 | # } 150 | # if(ranef){ 151 | # # simulate coefficients within groups 152 | # sc <- attr (VarCorr (object), "sc") # scale 153 | # #coef <- ranef (object) 154 | # #estimate <- ranef(object, postVar=TRUE) 155 | # coef <- ranef(object, postVar=TRUE) 156 | # beta.bygroup <- coef 157 | # n.groupings <- length (coef) 158 | # for (m in 1:n.groupings){ 159 | # bhat <- as.matrix(coef[[m]]) # to suit the use of mvrnorm 160 | # vars.m <- attr (coef[[m]], "postVar") 161 | # K <- dim(vars.m)[1] 162 | # J <- dim(vars.m)[3] 163 | # beta.bygroup[[m]] <- array (NA, c(n.sims, J, K)) 164 | # for (j in 1:J){ 165 | # V.beta <- .untriangle(vars.m[,,j])#*sc^2 166 | # beta.bygroup[[m]][,j,] <- mvrnorm (n.sims, bhat[j,], V.beta) 167 | # } 168 | # dimnames (beta.bygroup[[m]]) <- c (list(NULL), dimnames(bhat)) 169 | # } 170 | # coef <- c (beta.unmodeled, beta.bygroup) 171 | # } 172 | # return (coef) 173 | # } 174 | #) 175 | -------------------------------------------------------------------------------- /R/model.matrixBayes.R: -------------------------------------------------------------------------------- 1 | #setMethod("model.matrix.bayes", signature(object = "bayesglm"), 2 | model.matrixBayes <- function(object, data = environment(object), 3 | contrasts.arg = NULL, xlev = NULL, keep.order=FALSE, drop.baseline=FALSE,...) 4 | { 5 | #class(object) <- c("terms", "formula") 6 | t <- if( missing( data ) ) { 7 | terms( object ) 8 | }else{ 9 | terms.formula(object, data = data, keep.order=keep.order) 10 | } 11 | attr(t, "intercept") <- attr(object, "intercept") 12 | if (is.null(attr(data, "terms"))){ 13 | data <- model.frame(object, data, xlev=xlev) 14 | }else { 15 | reorder <- match(sapply(attr(t,"variables"), deparse, width.cutoff=500)[-1], names(data)) 16 | if (anyNA(reorder)) { 17 | stop( "model frame and formula mismatch in model.matrix()" ) 18 | } 19 | if(!identical(reorder, seq_len(ncol(data)))) { 20 | data <- data[,reorder, drop = FALSE] 21 | } 22 | } 23 | int <- attr(t, "response") 24 | if(length(data)) { # otherwise no rhs terms, so skip all this 25 | 26 | if (drop.baseline){ 27 | contr.funs <- as.character(getOption("contrasts")) 28 | }else{ 29 | contr.funs <- as.character(list("contr.bayes.unordered", "contr.bayes.ordered")) 30 | } 31 | 32 | namD <- names(data) 33 | ## turn any character columns into factors 34 | for(i in namD) 35 | if(is.character( data[[i]] ) ) { 36 | data[[i]] <- factor(data[[i]]) 37 | warning( gettextf( "variable '%s' converted to a factor", i ), domain = NA) 38 | } 39 | isF <- vapply(data, function(x) is.factor(x) || is.logical(x), NA) 40 | isF[int] <- FALSE 41 | isOF <- vapply(data, is.ordered, NA) 42 | for( nn in namD[isF] ) # drop response 43 | if( is.null( attr( data[[nn]], "contrasts" ) ) ) { 44 | contrasts( data[[nn]] ) <- contr.funs[1 + isOF[nn]] 45 | } 46 | ## it might be safer to have numerical contrasts: 47 | ## get(contr.funs[1 + isOF[nn]])(nlevels(data[[nn]])) 48 | if ( !is.null( contrasts.arg ) && is.list( contrasts.arg ) ) { 49 | if ( is.null( namC <- names( contrasts.arg ) ) ) { 50 | stop( "invalid 'contrasts.arg' argument" ) 51 | } 52 | for (nn in namC) { 53 | if ( is.na( ni <- match( nn, namD ) ) ) { 54 | warning( gettextf( "variable '%s' is absent, its contrast will be ignored", nn ), domain = NA ) 55 | } 56 | else { 57 | ca <- contrasts.arg[[nn]] 58 | if( is.matrix( ca ) ) { 59 | contrasts( data[[ni]], ncol( ca ) ) <- ca 60 | } 61 | else { 62 | contrasts( data[[ni]] ) <- contrasts.arg[[nn]] 63 | } 64 | } 65 | } 66 | } 67 | } else { # internal model.matrix needs some variable 68 | isF <- FALSE 69 | data <- data.frame(x=rep(0, nrow(data))) 70 | } 71 | #ans <- .Internal( model.matrix( t, data ) ) 72 | ans <- model.matrix.default(object=t, data=data) 73 | cons <- if(any(isF)){ 74 | lapply( data[isF], function(x) attr( x, "contrasts") ) 75 | }else { NULL } 76 | attr(ans, "contrasts" ) <- cons 77 | ans 78 | } 79 | #) 80 | 81 | #setMethod("model.matrix.bayes", signature(object = "bayesglm.h"), 82 | #model.matrix.bayes.h <- function (object, data = environment(object), 83 | # contrasts.arg = NULL, 84 | # xlev = NULL, keep.order = FALSE, batch = NULL, ...) 85 | #{ 86 | # class(object) <- c("formula") 87 | # t <- if (missing(data)) { 88 | # terms(object) 89 | # } 90 | # else { 91 | # terms(object, data = data, keep.order = keep.order) 92 | # } 93 | # attr(t, "intercept") <- attr(object, "intercept") 94 | # if (is.null(attr(data, "terms"))) { 95 | # data <- model.frame(object, data, xlev = xlev) 96 | # } 97 | # else { 98 | # reorder <- match(sapply(attr(t, "variables"), deparse, 99 | # width.cutoff = 500)[-1], names(data)) 100 | # if (any(is.na(reorder))) { 101 | # stop("model frame and formula mismatch in model.matrix()") 102 | # } 103 | # if (!identical(reorder, seq_len(ncol(data)))) { 104 | # data <- data[, reorder, drop = FALSE] 105 | # } 106 | # } 107 | # int <- attr(t, "response") 108 | # if (length(data)) { 109 | # contr.funs <- as.character(getOption("contrasts")) 110 | # contr.bayes.funs <- as.character(list("contr.bayes.unordered", 111 | # "contr.bayes.ordered")) 112 | # namD <- names(data) 113 | # for (i in namD) if (is.character(data[[i]])) { 114 | # data[[i]] <- factor(data[[i]]) 115 | # warning(gettextf("variable '%s' converted to a factor", i), domain = NA) 116 | # } 117 | # isF <- sapply(data, function(x) is.factor(x) || is.logical(x)) 118 | # isF[int] <- FALSE 119 | # isOF <- sapply(data, is.ordered) 120 | # if (length(batch) > 1) { 121 | # ba <- batch[isF[-1]] 122 | # } 123 | # else if (length(batch) == 1) { 124 | # ba <- rep(batch, length(isF[-1])) 125 | # } 126 | # else { 127 | # ba <- rep(0, length(isF[-1])) 128 | # } 129 | # iin <- 1 130 | # for (nn in namD[isF]) if (is.null(attr(data[[nn]], "contrasts"))) { 131 | # if (ba[[iin]] > 0) { 132 | # contrasts(data[[nn]]) <- contr.bayes.funs 133 | # } 134 | # else { 135 | # contrasts(data[[nn]]) <- contr.funs 136 | # } 137 | # iin <- iin + 1 138 | # } 139 | # if (!is.null(contrasts.arg) && is.list(contrasts.arg)) { 140 | # if (is.null(namC <- names(contrasts.arg))) { 141 | # stop("invalid 'contrasts.arg' argument") 142 | # } 143 | # for (nn in namC) { 144 | # if (is.na(ni <- match(nn, namD))) { 145 | # warning(gettextf("variable '%s' is absent, its contrast will be ignored", 146 | # nn), domain = NA) 147 | # } 148 | # else { 149 | # ca <- contrasts.arg[[nn]] 150 | # if (is.matrix(ca)) { 151 | # contrasts(data[[ni]], ncol(ca)) <- ca 152 | # } 153 | # else { 154 | # contrasts(data[[ni]]) <- contrasts.arg[[nn]] 155 | # } 156 | # } 157 | # } 158 | # } 159 | # } 160 | # else { 161 | # isF <- FALSE 162 | # data <- list(x = rep(0, nrow(data))) 163 | # } 164 | # ans <- .Internal(model.matrix(t, data)) 165 | # cons <- if (any(isF)) { 166 | # lapply(data[isF], function(x) attr(x, "contrasts")) 167 | # } 168 | # else { 169 | # NULL 170 | # } 171 | # attr(ans, "contrasts") <- cons 172 | # ans 173 | #} 174 | ##) 175 | -------------------------------------------------------------------------------- /man/coefplot.Rd: -------------------------------------------------------------------------------- 1 | \name{coefplot} 2 | %\docType{genericFunction} 3 | \alias{coefplot} 4 | \alias{coefplot.default} 5 | \alias{coefplot,numeric-method} 6 | \alias{coefplot,lm-method} 7 | \alias{coefplot,glm-method} 8 | \alias{coefplot,bugs-method} 9 | \alias{coefplot,polr-method} 10 | 11 | 12 | \title{Generic Function for Making Coefficient Plot} 13 | \description{ 14 | Functions that plot the coefficients plus and minus 1 and 2 sd 15 | from a lm, glm, bugs, and polr fits. 16 | } 17 | \usage{ 18 | coefplot(object,\dots) 19 | 20 | \method{coefplot}{default}(coefs, sds, CI=2, 21 | lower.conf.bounds, upper.conf.bounds, 22 | varnames=NULL, vertical=TRUE, 23 | v.axis=TRUE, h.axis=TRUE, 24 | cex.var=0.8, cex.pts=0.9, 25 | col.pts=1, pch.pts=20, var.las=2, 26 | main=NULL, xlab=NULL, ylab=NULL, mar=c(1,3,5.1,2), 27 | plot=TRUE, add=FALSE, offset=.1, \dots) 28 | 29 | \S4method{coefplot}{bugs}(object, var.idx=NULL, varnames=NULL, 30 | CI=1, vertical=TRUE, 31 | v.axis=TRUE, h.axis=TRUE, 32 | cex.var=0.8, cex.pts=0.9, 33 | col.pts=1, pch.pts=20, var.las=2, 34 | main=NULL, xlab=NULL, ylab=NULL, 35 | plot=TRUE, add=FALSE, offset=.1, 36 | mar=c(1,3,5.1,2), \dots) 37 | 38 | \S4method{coefplot}{numeric}(object, \dots) 39 | \S4method{coefplot}{lm}(object, varnames=NULL, intercept=FALSE, \dots) 40 | \S4method{coefplot}{glm}(object, varnames=NULL, intercept=FALSE, \dots) 41 | \S4method{coefplot}{polr}(object, varnames=NULL, \dots) 42 | } 43 | 44 | \arguments{ 45 | \item{object}{fitted objects-lm, glm, bugs and polr, or a vector of coefficients.} 46 | \item{...}{further arguments passed to or from other methods.} 47 | \item{coefs}{a vector of coefficients.} 48 | \item{sds}{a vector of sds of coefficients.} 49 | \item{CI}{confidence interval, default is 2, which will plot plus and minus 2 sds or 50 | 95\% CI. If CI=1, plot plus and minus 1 sds or 50\% CI instead.} 51 | \item{lower.conf.bounds}{lower bounds of confidence intervals.} 52 | \item{upper.conf.bounds}{upper bounds of confidence intervals.} 53 | \item{varnames}{a vector of variable names, default is NULL, which will use 54 | the names of variables; if specified, the length of varnames must be equal to 55 | the length of predictors, including the intercept.} 56 | \item{vertical}{orientation of the plot, default is TRUE which will plot 57 | variable names in the 2nd axis. If FALSE, plot variable names in the first 58 | axis instead.} 59 | \item{v.axis}{default is TRUE, which shows the bottom axis--axis(1).} 60 | \item{h.axis}{default is TRUE, which shows the left axis--axis(2).} 61 | \item{cex.var}{The fontsize of the varible names, default=0.8.} 62 | \item{cex.pts}{The size of data points, default=0.9.} 63 | \item{col.pts}{color of points and segments, default is black.} 64 | \item{pch.pts}{symbol of points, default is solid dot.} 65 | \item{var.las}{the orientation of variable names against the axis, default is 2. 66 | see the usage of \code{las} in \code{\link{par}}.} 67 | \item{main}{The main title (on top) using font and size (character 68 | expansion) \code{par("font.main")} and color \code{par("col.main")}.} 69 | \item{xlab}{X axis label using font and character expansion 70 | \code{par("font.lab")} and color \code{par("col.lab")}.} 71 | \item{ylab}{Y axis label, same font attributes as \code{xlab}.} 72 | \item{mar}{A numerical vector of the form \code{c(bottom, left, top, right)} 73 | which gives the number of lines of margin to be specified on 74 | the four sides of the plot. The default is \code{c(1,3,5.1,2)}.} 75 | \item{plot}{default is TRUE, plot the estimates.} 76 | \item{add}{if add=TRUE, plot over the existing plot. default is FALSE.} 77 | \item{offset}{add extra spaces to separate from the existing dots. default is 0.1.} 78 | % \item{lower.bound}{default is -Inf.} 79 | \item{var.idx}{the index of the variables of a bugs object, default 80 | is NULL which will plot all the variables.} 81 | \item{intercept}{If TRUE will plot intercept, default=FALSE to get better presentation.} 82 | } 83 | 84 | \details{ 85 | This function plots coefficients from bugs, lm, glm and polr 86 | with 1 sd and 2 sd interval bars. 87 | } 88 | 89 | \value{ 90 | Plot of the coefficients from a bugs, lm or glm fit. You can add the intercept, 91 | the variable names and the display the result of the fitted model. 92 | } 93 | 94 | \references{Andrew Gelman and Jennifer Hill, 95 | Data Analysis Using Regression and Multilevel/Hierarchical Models, 96 | Cambridge University Press, 2006.} 97 | 98 | \author{Yu-Sung Su \email{suyusung@tsinghua.edu.cn} 99 | } 100 | 101 | \seealso{\code{\link{display}}, 102 | \code{\link[graphics]{par}}, 103 | \code{\link[stats]{lm}}, 104 | \code{\link[stats]{glm}}, 105 | \code{\link{bayesglm}}, 106 | \code{\link[graphics]{plot}} 107 | } 108 | \examples{ 109 | old.par <- par(no.readonly = TRUE) 110 | 111 | y1 <- rnorm(1000,50,23) 112 | y2 <- rbinom(1000,1,prob=0.72) 113 | x1 <- rnorm(1000,50,2) 114 | x2 <- rbinom(1000,1,prob=0.63) 115 | x3 <- rpois(1000, 2) 116 | x4 <- runif(1000,40,100) 117 | x5 <- rbeta(1000,2,2) 118 | 119 | longnames <- c("a long name01","a long name02","a long name03", 120 | "a long name04","a long name05") 121 | 122 | fit1 <- lm(y1 ~ x1 + x2 + x3 + x4 + x5) 123 | fit2 <- glm(y2 ~ x1 + x2 + x3 + x4 + x5, 124 | family=binomial(link="logit")) 125 | op <- par() 126 | # plot 1 127 | par (mfrow=c(2,2)) 128 | coefplot(fit1) 129 | coefplot(fit2, col.pts="blue") 130 | 131 | # plot 2 132 | longnames <- c("(Intercept)", longnames) 133 | coefplot(fit1, longnames, intercept=TRUE, CI=1) 134 | 135 | # plot 3 136 | coefplot(fit2, vertical=FALSE, var.las=1, frame.plot=TRUE) 137 | 138 | # plot 4: comparison to show bayesglm works better than glm 139 | n <- 100 140 | x1 <- rnorm (n) 141 | x2 <- rbinom (n, 1, .5) 142 | b0 <- 1 143 | b1 <- 1.5 144 | b2 <- 2 145 | y <- rbinom (n, 1, invlogit(b0+b1*x1+b2*x2)) 146 | y <- ifelse (x2==1, 1, y) 147 | x1 <- rescale(x1) 148 | x2 <- rescale(x2, "center") 149 | 150 | M1 <- glm (y ~ x1 + x2, family=binomial(link="logit")) 151 | display (M1) 152 | M2 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit")) 153 | display (M2) 154 | 155 | #=================== 156 | # stacked plot 157 | #=================== 158 | coefplot(M2, xlim=c(-1,5), intercept=TRUE) 159 | coefplot(M1, add=TRUE, col.pts="red") 160 | 161 | #==================== 162 | # arrayed plot 163 | #==================== 164 | par(mfrow=c(1,2)) 165 | x.scale <- c(0, 7.5) # fix x.scale for comparison 166 | coefplot(M1, xlim=x.scale, main="glm", intercept=TRUE) 167 | coefplot(M2, xlim=x.scale, main="bayesglm", intercept=TRUE) 168 | 169 | # plot 5: the ordered logit model from polr 170 | M3 <- polr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) 171 | coefplot(M3, main="polr") 172 | 173 | M4 <- bayespolr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) 174 | coefplot(M4, main="bayespolr", add=TRUE, col.pts="red") 175 | 176 | ## plot 6: plot bugs & lmer 177 | # par <- op 178 | # M5 <- lmer(Reaction ~ Days + (1|Subject), sleepstudy) 179 | # M5.sim <- mcsamp(M5) 180 | # coefplot(M5.sim, var.idx=5:22, CI=1, ylim=c(18,1), main="lmer model") 181 | 182 | 183 | # plot 7: plot coefficients & sds vectors 184 | coef.vect <- c(0.2, 1.4, 2.3, 0.5) 185 | sd.vect <- c(0.12, 0.24, 0.23, 0.15) 186 | longnames <- c("var1", "var2", "var3", "var4") 187 | coefplot (coef.vect, sd.vect, varnames=longnames, main="Regression Estimates") 188 | coefplot (coef.vect, sd.vect, varnames=longnames, vertical=FALSE, 189 | var.las=1, main="Regression Estimates") 190 | 191 | par(old.par) 192 | } 193 | 194 | \keyword{hplot} 195 | \keyword{dplot} 196 | \keyword{methods} 197 | \keyword{manip} 198 | -------------------------------------------------------------------------------- /man/bayespolr.Rd: -------------------------------------------------------------------------------- 1 | \name{bayespolr} 2 | \docType{class} 3 | % Classes 4 | \alias{bayespolr-class} 5 | % functions 6 | \alias{bayespolr} 7 | % display methods 8 | \alias{print,bayespolr-method} 9 | \alias{show,bayespolr-method} 10 | 11 | \title{Bayesian Ordered Logistic or Probit Regression} 12 | 13 | \description{Bayesian functions for ordered logistic or probit modeling 14 | with independent normal, t, or Cauchy prior distribution 15 | for the coefficients. 16 | } 17 | \usage{ 18 | bayespolr(formula, data, weights, start, ..., 19 | subset, na.action, contrasts = NULL, 20 | Hess = TRUE, model = TRUE, 21 | method = c("logistic", "probit", "cloglog", "cauchit"), 22 | drop.unused.levels=TRUE, 23 | prior.mean = 0, 24 | prior.scale = 2.5, 25 | prior.df = 1, 26 | prior.counts.for.bins = NULL, 27 | min.prior.scale=1e-12, 28 | scaled = TRUE, 29 | maxit = 100, 30 | print.unnormalized.log.posterior = FALSE) 31 | } 32 | 33 | \arguments{ 34 | \item{formula}{a formula expression as for regression models, of the form 35 | \code{response ~ predictors}. The response should be a factor 36 | (preferably an ordered factor), which will be interpreted as 37 | an ordinal response, with levels ordered as in the factor. A 38 | proportional odds model will be fitted. The model must have 39 | an intercept: attempts to remove one will lead to a warning 40 | and be ignored. An offset may be used. See the documentation 41 | of \code{formula} for other details.} 42 | 43 | \item{data}{an optional data frame in which to interpret the variables 44 | occurring in \code{formula}.} 45 | 46 | \item{weights}{optional case weights in fitting. Default to 1.} 47 | 48 | \item{start}{initial values for the parameters. This is in the format 49 | \code{c(coefficients, zeta)}} 50 | 51 | \item{\dots}{additional arguments to be passed to \code{optim}, most often a 52 | \code{control} argument.} 53 | 54 | \item{subset}{expression saying which subset of the rows of the data should 55 | be used in the fit. All observations are included by 56 | default.} 57 | 58 | \item{na.action}{a function to filter missing data.} 59 | 60 | \item{contrasts}{a list of contrasts to be used for some or all of the 61 | factors appearing as variables in the model formula.} 62 | 63 | \item{Hess}{logical for whether the Hessian (the observed information 64 | matrix) should be returned.} 65 | 66 | \item{model}{logical for whether the model matrix should be returned.} 67 | 68 | \item{method}{logistic or probit or complementary log-log or cauchit 69 | (corresponding to a Cauchy latent variable and only available 70 | in R >= 2.1.0).} 71 | 72 | \item{drop.unused.levels}{default \code{TRUE}, if \code{FALSE}, it interpolates the 73 | intermediate values if the data have integer levels.} 74 | 75 | \item{prior.mean}{prior mean for the coefficients: default is 0. 76 | Can be a vector of length equal to the number of predictors 77 | (not counting the intercepts). If it is a scalar, it is 78 | expanded to the length of this vector.} 79 | 80 | \item{prior.scale}{prior scale for the coefficients: default is 2.5. 81 | Can be a vector of length equal to the number of predictors 82 | (not counting the intercepts). If it is a scalar, it is expanded 83 | to the length of this vector.} 84 | 85 | \item{prior.df}{for t distribution: default is 1 (Cauchy). 86 | Set to \code{Inf} to get normal prior distributions. Can 87 | be a vector of length equal to the number of predictors (not counting 88 | the intercepts). If it is a scalar, it is expanded to the length of this 89 | vector.} 90 | 91 | \item{prior.counts.for.bins}{default is \code{NULL}, which will augment the data by 92 | giving each cut point a \code{1/levels(y)}. To use a noninformative prior, assign 93 | prior.counts.for.bins = 0. If it is a scalar, it is expanded to the number 94 | of levels of y.} 95 | 96 | \item{min.prior.scale}{Minimum prior scale for the coefficients: default is 1e-12.} 97 | 98 | 99 | \item{scaled}{if \code{scaled = TRUE}, then the prior distribution is rescaled. 100 | Can be a vector of length equal to the number of cutpoints 101 | (intercepts). If it is a scalar, it is expanded to the length of 102 | this vector.} 103 | 104 | \item{maxit}{integer giving the maximal number of IWLS iterations, default is 100. This can also be controlled by \code{control}.} 105 | 106 | \item{print.unnormalized.log.posterior}{display the unnormalized log posterior 107 | likelihood for bayesglm fit, default=\code{FALSE}} 108 | } 109 | \details{ 110 | The program is a simple alteration of \code{\link[MASS]{polr}} in 111 | \code{VR} version 7.2-31 that augments the 112 | loglikelihood with the log of the t prior distributions for the 113 | coefficients. 114 | 115 | We use Student-t prior distributions for the coefficients. The prior 116 | distributions for the intercepts (the cutpoints) are set so they apply 117 | to the value when all predictors are set to their mean values. 118 | 119 | If scaled=TRUE, the scales for the prior distributions of the 120 | coefficients are determined as follows: For a predictor with only one 121 | value, we just use \code{prior.scale}. For a predictor with two 122 | values, we use prior.scale/range(x). 123 | For a predictor with more than two values, we use prior.scale/(2*sd(x)). 124 | } 125 | 126 | \value{ 127 | See \code{polr} for details. 128 | 129 | \item{prior.mean}{prior means for the cofficients.} 130 | \item{prior.scale}{prior scales for the cofficients.} 131 | \item{prior.df}{prior dfs for the cofficients.} 132 | \item{prior.counts.for.bins}{prior counts for the cutpoints.} 133 | } 134 | 135 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 136 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn}; 137 | Maria Grazia Pittau \email{grazia@stat.columbia.edu} 138 | } 139 | \seealso{\code{\link{bayesglm}}, 140 | \code{\link[MASS]{polr}} 141 | } 142 | 143 | \examples{ 144 | M1 <- polr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) 145 | display (M1) 146 | 147 | M2 <- bayespolr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing, 148 | prior.scale=Inf, prior.df=Inf) # Same as M1 149 | display (M2) 150 | 151 | M3 <- bayespolr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) 152 | display (M3) 153 | 154 | M4 <- bayespolr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing, 155 | prior.scale=2.5, prior.df=1) # Same as M3 156 | display (M4) 157 | 158 | M5 <- bayespolr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing, 159 | prior.scale=2.5, prior.df=7) 160 | display (M5) 161 | 162 | M6 <- bayespolr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing, 163 | prior.scale=2.5, prior.df=Inf) 164 | display (M6) 165 | 166 | # Assign priors 167 | M7 <- bayespolr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing, 168 | prior.mean=rep(0,6), prior.scale=rep(2.5,6), prior.df=c(1,1,1,7,7,7)) 169 | display (M7) 170 | 171 | 172 | #### Another example 173 | y <- factor (rep (1:10,1:10)) 174 | x <- rnorm (length(y)) 175 | x <- x - mean(x) 176 | 177 | M8 <- polr (y ~ x) 178 | display (M8) 179 | 180 | M9 <- bayespolr (y ~ x, prior.scale=Inf, prior.df=Inf, prior.counts.for.bins=0) 181 | display (M9) # same as M1 182 | 183 | M10 <- bayespolr (y ~ x, prior.scale=Inf, prior.df=Inf, prior.counts.for.bins=10000) 184 | display (M10) 185 | 186 | 187 | #### Another example 188 | 189 | y <- factor (rep (1:3,1:3)) 190 | x <- rnorm (length(y)) 191 | x <- x - mean(x) 192 | 193 | M11 <- polr (y ~ x) 194 | display (M11) 195 | 196 | M12 <- bayespolr (y ~ x, prior.scale=Inf, prior.df=Inf, prior.counts.for.bins=0) 197 | display (M12) # same as M1 198 | 199 | M13 <- bayespolr (y ~ x, prior.scale=Inf, prior.df=Inf, prior.counts.for.bins=1) 200 | display (M13) 201 | 202 | M14 <- bayespolr (y ~ x, prior.scale=Inf, prior.df=Inf, prior.counts.for.bins=10) 203 | display (M14) 204 | 205 | } 206 | 207 | \keyword{models} 208 | \keyword{methods} 209 | \keyword{regression} 210 | -------------------------------------------------------------------------------- /R/AllInternal.R: -------------------------------------------------------------------------------- 1 | # some useful little functions 2 | #.round <- base:::round 3 | sd.scalar <- function (x, ...) {sqrt(var(as.vector(x), ...))} 4 | wmean <- function (x, w, ...) {mean(x*w, ...)/mean(w, ...)} 5 | logit <- function (x) {log(x/(1-x))} 6 | .untriangle <- function (x) {x + t(x) - x*diag(nrow(as.matrix(x)))} 7 | 8 | 9 | 10 | 11 | # new functions! 12 | 13 | as.matrix.VarCorr <- function (x, ..., useScale, digits){ 14 | # VarCorr function for lmer objects, altered as follows: 15 | # 1. specify rounding 16 | # 2. print statement at end is removed 17 | # 3. reMat is returned 18 | # 4. last line kept in reMat even when there's no error term 19 | sc <- attr(x, "sc")[[1]] 20 | if(is.na(sc)) sc <- 1 21 | # recorr <- lapply(varc, function(el) el@factors$correlation) 22 | recorr <- lapply(x, function(el) attr(el, "correlation")) 23 | #reStdDev <- c(lapply(recorr, slot, "sd"), list(Residual = sc)) 24 | reStdDev <- c(lapply(x, function(el) attr(el, "stddev")), list(Residual = sc)) 25 | reLens <- unlist(c(lapply(reStdDev, length))) 26 | reMat <- array('', c(sum(reLens), 4), 27 | list(rep('', sum(reLens)), 28 | c("Groups", "Name", "Variance", "Std.Dev."))) 29 | reMat[1+cumsum(reLens)-reLens, 1] <- names(reLens) 30 | reMat[,2] <- c(unlist(lapply(reStdDev, names)), "") 31 | # reMat[,3] <- format(unlist(reStdDev)^2, digits = digits) 32 | # reMat[,4] <- format(unlist(reStdDev), digits = digits) 33 | reMat[,3] <- fround(unlist(reStdDev)^2, digits) 34 | reMat[,4] <- fround(unlist(reStdDev), digits) 35 | if (any(reLens > 1)) { 36 | maxlen <- max(reLens) 37 | corr <- 38 | do.call("rbind", 39 | lapply(recorr, 40 | function(x, maxlen) { 41 | x <- as(x, "matrix") 42 | # cc <- format(round(x, 3), nsmall = 3) 43 | cc <- fround (x, digits) 44 | cc[!lower.tri(cc)] <- "" 45 | nr <- dim(cc)[1] 46 | if (nr >= maxlen) return(cc) 47 | cbind(cc, matrix("", nr, maxlen-nr)) 48 | }, maxlen)) 49 | colnames(corr) <- c("Corr", rep("", maxlen - 1)) 50 | reMat <- cbind(reMat, rbind(corr, rep("", ncol(corr)))) 51 | } 52 | # if (!useScale) reMat <- reMat[-nrow(reMat),] 53 | if (useScale<0) reMat[nrow(reMat),] <- c ("No residual sd", rep("",ncol(reMat)-1)) 54 | return (reMat) 55 | } 56 | 57 | 58 | # rwish and dwish functions stolen from Martin and Quinn's MCMCpack 59 | 60 | rwish <- function (v, S){ 61 | if (!is.matrix(S)) 62 | S <- matrix(S) 63 | if (nrow(S) != ncol(S)) { 64 | stop(message = "S not square in rwish().\n") 65 | } 66 | if (v < nrow(S)) { 67 | stop(message = "v is less than the dimension of S in rwish().\n") 68 | } 69 | p <- nrow(S) 70 | CC <- chol(S) 71 | Z <- matrix(0, p, p) 72 | diag(Z) <- sqrt(rchisq(p, v:(v - p + 1))) 73 | if (p > 1) { 74 | pseq <- 1:(p - 1) 75 | Z[rep(p * pseq, pseq) + unlist(lapply(pseq, seq))] <- rnorm(p * 76 | (p - 1)/2) 77 | } 78 | return(crossprod(Z %*% CC)) 79 | } 80 | 81 | dwish <- function (W, v, S) { 82 | if (!is.matrix(S)) 83 | S <- matrix(S) 84 | if (nrow(S) != ncol(S)) { 85 | stop(message = "W not square in dwish()\n\n") 86 | } 87 | if (!is.matrix(W)) 88 | S <- matrix(W) 89 | if (nrow(W) != ncol(W)) { 90 | stop(message = "W not square in dwish()\n\n") 91 | } 92 | if (nrow(S) != ncol(W)) { 93 | stop(message = "W and X of different dimensionality in dwish()\n\n") 94 | } 95 | if (v < nrow(S)) { 96 | stop(message = "v is less than the dimension of S in dwish()\n\n") 97 | } 98 | k <- nrow(S) 99 | gammapart <- 1 100 | for (i in 1:k) { 101 | gammapart <- gammapart * gamma((v + 1 - i)/2) 102 | } 103 | denom <- gammapart * 2^(v * k/2) * pi^(k * (k - 1)/4) 104 | detS <- det(S) 105 | detW <- det(W) 106 | hold <- solve(S) %*% W 107 | tracehold <- sum(hold[row(hold) == col(hold)]) 108 | num <- detS^(-v/2) * detW^((v - k - 1)/2) * exp(-1/2 * tracehold) 109 | return(num/denom) 110 | } 111 | 112 | # no visible binding~~~~~~~~~~~~~~~ 113 | # functions used to pass the check for bayespolr 114 | 115 | pgumbel <- function(q, loc = 0, scale = 1, lower.tail = TRUE) 116 | { 117 | q <- (q - loc)/scale 118 | p <- exp(-exp(-q)) 119 | if (!lower.tail) 1 - p else p 120 | } 121 | 122 | dgumbel <- function (x, loc = 0, scale = 1, log = FALSE) 123 | { 124 | d <- log(1/scale) - x - exp(-x) 125 | if (!log) exp(d) else d 126 | } 127 | 128 | # defin n to pass the bayesglm.fit and bayesglm.h.fit check 129 | n <- NULL 130 | 131 | # for mcplot 132 | .pvalue <- function ( v1, v2 ){ 133 | mean( ( sign( v1 - v2 ) + 1 ) / 2 ) 134 | } 135 | 136 | .is.significant <- function ( p, alpha = 0.05 ){ 137 | significant <- 0 + ( p > ( 1 - alpha ) ) - ( p < alpha ) 138 | return( significant ) 139 | } 140 | 141 | 142 | .weights.default <- function (object, ...) 143 | { 144 | wts <- object$weights 145 | if (is.null(wts)) 146 | wts 147 | else napredict(object$na.action, wts) 148 | } 149 | 150 | #.sweep.inv <- function(G){ 151 | # # sweeps a symmetric matrix on all positions 152 | # # (so inverts the matrix) 153 | # for(i in 1:nrow(G)) { 154 | # G <- .sweep.oper(G, i) 155 | # } 156 | # G 157 | #} 158 | # 159 | #.sweep.oper <- function(G = theta, k = 1.){ 160 | # # k is the sweep position 161 | # p <- dim(G)[1.] 162 | # H <- G 163 | # #first do generic elements (those that don't involve k) 164 | # H[] <- 0. 165 | # tmp <- matrix(G[, k], p, 1.) %*% matrix(G[, k], 1., p) 166 | # #now replace the row and col with index=k 167 | # H <- G - tmp/G[k, k] 168 | # H[, k] <- G[, k]/G[k, k] 169 | # #now replace the (k,k) diagonal element 170 | # H[k, ] <- G[, k]/G[k, k] 171 | # # and we're done 172 | # H[k, k] <- -1./G[k, k] 173 | # H 174 | #} 175 | # 176 | # 177 | #.wls.all2 <- function(X, w = wts, Y = y, treat = Trt) 178 | #{ 179 | # # 180 | # # This produces coefficient estimates and both standard and robust variances 181 | # # estimates for regression with weights 182 | # # the standard variance corresponds to a situation where an observation represents 183 | # # the mean of w observations 184 | # # the robust variance corresponds to a situation where weights represent 185 | # # probability or sampling weights 186 | # # 187 | # # first put together the necessary data inputs 188 | # # 189 | # nunits <- sum(w > 0) 190 | # k <- ncol(X) 191 | # ## now the weights, properly normed 192 | # wn <- w * (nunits/sum(w)) 193 | # W <- diag(wn * (nunits/sum(wn))) 194 | # # 195 | # # x prime x inverse (including weights) 196 | # vhat <- - .sweep.inv((t(X) %*% W %*% X)) 197 | # # 198 | # # estimated regression coefficients and variance for just the treatment coefficient 199 | # b <- vhat %*% t(X) %*% W %*% Y 200 | # MSE <- c(t(Y) %*% W %*% Y - t(b) %*% t(X) %*% W %*% Y)/(nunits - k) 201 | # var.std <- (vhat * MSE)[2, 2] 202 | # # 203 | # ###### now for the robust variance calculations 204 | # # now a matrix where each row represents the contribution to the score 205 | # # for each observation 206 | # U <- c((Y - X %*% b) * wn) * X 207 | # # finite sample adjustment 208 | # qc <- nunits/(nunits - 2) 209 | # # the sum of outer products of each of the above score contributions for 210 | # # each person is calculated here 211 | # prodU <- array(0, c(k, k, nunits)) 212 | # for(i in 1:nunits) { 213 | # prodU[, , i] <- outer(U[i, ], U[i, ]) 214 | # } 215 | # # putting it all together... 216 | # Vrob <- qc * vhat %*% apply(prodU, c(1, 2), sum) %*% vhat 217 | # # and we pull off the variance just for the treatment effect 218 | # var.rob <- Vrob[2, 2] 219 | # ############### 220 | # results <- c(var.std, var.rob, b[2]) 221 | # results 222 | #} 223 | -------------------------------------------------------------------------------- /R/balance.R: -------------------------------------------------------------------------------- 1 | # balance function after 2019 2 | 3 | balance <- function (rawdata, treat, matched, estimand="ATT") 4 | #factor = TRUE) 5 | { 6 | # rawdata: the full covariate dataset 7 | # treat: the vector of treatment assignments for the full dataset 8 | # matched: vector of weights to apply to the full dataset to create the 9 | # restructured data: 10 | # --for matching without replacement these will all be 0's and 1's 11 | # --for one-to-one matching with replacement these will all be non-negative 12 | # integers 13 | # --for IPTW or more complicated matching methods these could be any 14 | # non-negative numbers 15 | # estimand: can either be ATT, ATC, or ATE 16 | #require("Hmisc") 17 | if(missing(rawdata)) stop("rawdata is required") 18 | if(missing(matched)) stop("argument matched is required") 19 | if(missing(treat)) stop("treatment vector (treat) is required") 20 | cat("Balance diagnostics assume that the estimand is the",estimand,"\n") 21 | # 22 | #raw.dat <- data.frame(rawdata, treat = treat) 23 | covnames <- colnames(rawdata) 24 | if (is.null(covnames)){ 25 | cat("No covariate names provided. Generic names will be generated.") 26 | covnames = paste("v",c(1:ncol(rawdata)),sep="") 27 | } 28 | K <- length(covnames) 29 | diff.means <- matrix(NA, K, 5) 30 | var.t <- numeric(K) 31 | var.c <- numeric(K) 32 | std.denom <- numeric(K) 33 | binary <- rep(1,K) 34 | # 35 | # First we calculate balance on the RAW DATA 36 | # Columns are (1) treat mean, (2) control mean, (3) diff in means, (4) abs std diff, 37 | # (5) ratio of sds 38 | for (i in 1:K) { 39 | # separate means by group 40 | diff.means[i, 1] <- mean(rawdata[treat==1, i]) 41 | diff.means[i, 2] <- mean(rawdata[treat==0, i]) 42 | # separate variances by group == only used as input to calculations below 43 | var.t[i] <- var(rawdata[(treat == 1), i]) 44 | var.c[i] <- var(rawdata[(treat == 0), i]) 45 | # denominator in standardized difference calculations 46 | if(estimand=="ATE"){std.denom[i] <- sqrt((var.t[i]+var.c[i])/2)} 47 | else{ 48 | std.denom[i] <- ifelse(estimand=="ATT",sqrt(var.t[i]),sqrt(var.c[i])) 49 | } 50 | # difference in means 51 | diff.means[i, 3] <- diff.means[i, 1] - diff.means[i, 2] 52 | # standardized difference in means (sign intact) 53 | diff.means[i, 4] <- abs(diff.means[i, 3]/std.denom[i]) 54 | if(length(unique(rawdata[,covnames[i]]))>2){ 55 | binary[i] = 0 56 | } 57 | } 58 | #ifelse(estimand="ATT",sqrt(var.c[i]/var.t[i]),sqrt(var.t[i]/var.c[i])) 59 | # dimnames(diff.means) <- list(covnames[-(K + 1)], c("treat", "control", "unstd.diff", 60 | # "abs.std.diff", "ratio")) 61 | # diff.means[is.na(diff.means)] = "--" #maybe only worry about in print function 62 | dimnames(diff.means) <- list(covnames, c("treat", "control", "unstd.diff", 63 | "abs.std.diff", "ratio")) 64 | # Now we calculate balance on the restructured data 65 | diff.means.matched = matrix(NA, K, 5) 66 | # 67 | for (i in 1:K) { 68 | wts0 <- matched[treat==0] 69 | # separate means by group 70 | diff.means.matched[i, 1] <- mean(rawdata[treat == 1, i]) 71 | diff.means.matched[i, 2] <- weighted.mean(rawdata[treat==0, i],w=wts0) 72 | # separate variances by group == only used as input to calculations below 73 | # these overwrite the variance above 74 | var.t[i] <- var(rawdata[treat == 1, i]) 75 | var.c[i] <- as.numeric(stats::cov.wt(rawdata[treat == 0, i, drop = FALSE], wt = wts0)$cov) 76 | # difference in means 77 | diff.means.matched[i, 3] <- diff.means.matched[i, 1] - diff.means.matched[i, 2] 78 | # absolute standardized difference in means (denominator is stolen from 79 | # calculations on raw data above) 80 | diff.means.matched[i, 4] <- abs(diff.means.matched[i, 3])/std.denom[i] 81 | if(length(unique(rawdata[,covnames[i]]))>2){ 82 | # just for binary 83 | # ratio of sds (treat over control: should we change to comparison over inferential) 84 | diff.means.matched[i, 5] <- sqrt(var.c[i]/var.t[i]) 85 | } 86 | } 87 | #dimnames(diff.means.matched) <- list(covnames[-(K + 1)], c("treat", "control", "unstd.diff", 88 | # "abs.std.diff", "ratio")) 89 | dimnames(diff.means.matched) <- list(covnames, c("treat", "control", "unstd.diff", 90 | "abs.std.diff", "ratio")) 91 | # 92 | out <- list(diff.means.raw = diff.means, diff.means.matched = diff.means.matched, 93 | covnames = covnames, binary = binary) 94 | class(out) <- "balance" 95 | return(out) 96 | } 97 | 98 | 99 | print.balance <- function(x, ..., combined=FALSE, digits= 2) 100 | { 101 | if(combined==FALSE){ 102 | cat("Balance Statistics for Unmatched Data\n") 103 | cat("--\n") 104 | print(round(x$diff.means.raw, digits=digits)) 105 | cat("--\n") 106 | cat("\n") 107 | cat("Balance Statistics for Matched Data\n") 108 | cat("--\n") 109 | print(round(x$diff.means.matched, digits=digits), na.print="--") 110 | cat("--\n") 111 | cat("\n") 112 | } 113 | else{ 114 | cat("Balance Statistics\n") 115 | cat("--\n") 116 | print(round(cbind(x$diff.means.raw,x$diff.matched.raw)[,c(4,9,5,10)], 117 | digits=digits), na.print="--") 118 | } 119 | } 120 | 121 | ### NEXT NEED TO FIGURE OUT HOW TO REVERSE THE ORDER OF THE COVARIATES 122 | 123 | plot.balance <- function(x, longcovnames=NULL, which.covs="mixed", 124 | v.axis=TRUE, cex.main=1, cex.vars=1, cex.pts=1, 125 | mar=c(4, 3, 5.1, 2), plot=TRUE, x.max = NULL,...) 126 | { 127 | # if which.covs = mixed then it plots all as std diffs 128 | # if which.covs = binary it only plots binary and as abs unstd diffs 129 | # if which.covs = cont it only plots non-binary and as abs std diffs 130 | # 131 | 132 | covnames <- x$covnames 133 | if(!is.null(x.max)){ 134 | x.range = c(0,x.max) 135 | } 136 | # if(which.covs=="binary") { 137 | # cat("condition satisfied \n") 138 | # } 139 | 140 | # if plotting all, then use the standardized diff for all 141 | if(which.covs == "mixed"){ 142 | pts <- x$diff.means.raw[,4] # before matched.dat 143 | pts2 <- x$diff.means.matched[,4] # after matched 144 | K <- length(pts) 145 | idx <- 1:K 146 | main="Absolute Standardized Difference in Means" 147 | } 148 | #if plotting just binary use the unstandardized difference 149 | # for the plot make it the absolute value of 150 | if(which.covs == "binary"){ 151 | pts <- abs(x$diff.means.raw[x$binary==TRUE,3]) # before matched.dat 152 | pts2 <- abs(x$diff.means.matched[x$binary==TRUE,3]) # after matched 153 | K <- length(pts) 154 | idx <- 1:K 155 | main="Absolute Difference in Means" 156 | covnames = covnames[x$binary==TRUE] 157 | } 158 | #if plotting just continuous use the standardized difference 159 | if(which.covs == "cont"){ 160 | pts <- x$diff.means.raw[x$binary==FALSE,4] # before matched 161 | pts2 <- x$diff.means.matched[x$binary==FALSE,4] # after matched 162 | K <- length(pts) 163 | idx <- 1:K 164 | main="Absolute Standardized Difference in Means" 165 | covnames = covnames[x$binary==FALSE] 166 | } 167 | cat(pts,"\n") 168 | # tune the graphic console 169 | #par (mar=mar, mgp=mgp, oma=oma, tcl=tcl) 170 | 171 | par(mar = mar) 172 | if (is.null(longcovnames)) { 173 | longcovnames <- covnames 174 | maxchar <- max(sapply(longcovnames, nchar)) 175 | } 176 | else { 177 | maxchar <- max(sapply(longcovnames, nchar)) 178 | } 179 | min.mar <- par("mar") 180 | mar[2] <- max(min.mar[2], trunc(mar[2] + maxchar/10)) + mar[2] + 0.5 181 | par(mar = mar) 182 | 183 | ## now reverse the order of everything so the plot proceeds from 184 | ## to top to bottom with respect to original ordering of variables 185 | pts = rev(pts) 186 | pts2 = rev(pts2) 187 | longcovnames = rev(longcovnames) 188 | 189 | if(plot){ 190 | # plot the estimates 191 | if(is.null(x.max)){ 192 | plot(c(pts,pts2), c(idx,idx), 193 | #xlim=c(0, max(c(pts,pts2))), 194 | bty="n", xlab="", ylab="", 195 | xaxt="n", yaxt="n", type="n", 196 | main=main, cex.main=cex.main) 197 | } 198 | if(!is.null(x.max)){ 199 | plot(c(pts,pts2), c(idx,idx), 200 | bty="n", xlab="", ylab="", 201 | xaxt="n", yaxt="n", type="n", 202 | xlim=x.range, 203 | main=main, cex.main=cex.main) 204 | } 205 | abline(v=0, lty=2) 206 | points(pts, idx, cex=cex.pts) # before matched 207 | points(pts2, idx, pch=19, cex=cex.pts) # after matched 208 | if (v.axis){ 209 | axis(3) 210 | } 211 | if (is.null(longcovnames)){ 212 | axis(2, at=1:K, labels=covnames[1:K], 213 | las=2, hadj=1, lty=0, cex.axis=cex.vars) 214 | } 215 | else{ 216 | axis(2, at=1:K, labels=longcovnames[1:K], 217 | las=2, hadj=1, lty=0, cex.axis=cex.vars) 218 | } 219 | } 220 | else{ 221 | plot(c(pts,pts2), c(idx,idx), 222 | bty="n", xlab="", ylab="", 223 | xaxt="n", yaxt="n", #xaxs="i", 224 | #yaxs="i", 225 | type="n", axes=FALSE, 226 | #ylim=c(max(idx)+.25, min(idx)-.25), 227 | #xlim=x.range, 228 | main="", cex.main=cex.main,...) 229 | } 230 | return(list("raw"=pts, "matched"=pts2)) 231 | } 232 | -------------------------------------------------------------------------------- /R/bayespolr.R: -------------------------------------------------------------------------------- 1 | # New bayespolr() using Kenny's Dirichlet prior distribution 2 | 3 | bayespolr <- 4 | function (formula, data, weights, start, ..., subset, na.action, 5 | contrasts = NULL, Hess = TRUE, model = TRUE, method = c("logistic", 6 | "probit", "cloglog", "cauchit"), drop.unused.levels = TRUE, 7 | prior.mean = 0, prior.scale = 2.5, prior.df = 1, prior.counts.for.bins = NULL, 8 | min.prior.scale = 1e-12, 9 | scaled = TRUE, maxit = 100, print.unnormalized.log.posterior = FALSE) 10 | { 11 | logit <- function(p) log(p/(1 - p)) 12 | dt.deriv <- function(x, mean, scale, df, log = TRUE, delta = 0.001) { 13 | (dt((x + delta - mean)/scale, df, log = log) - dt((x - 14 | delta - mean)/scale, df, log = log))/(2 * delta) 15 | } 16 | 17 | fmin <- function(beta) { 18 | theta <- beta[pc + 1:q] 19 | gamm <- c(-100, cumsum(c(theta[1], exp(theta[-1]))), 20 | 100) 21 | eta <- offset 22 | if (pc > 0) 23 | eta <- eta + drop(x %*% beta[1:pc]) 24 | pr <- pfun(gamm[y + 1] - eta) - pfun(gamm[y] - eta) 25 | if (all(pr > 0)) 26 | f <- -sum(wt * log(pr)) 27 | else f <- Inf 28 | if (pc > 0) 29 | f <- f - sum(dt((beta[1:pc] - prior.mean)/prior.scale, 30 | prior.df, log = TRUE)) 31 | return(f) 32 | } 33 | 34 | gmin <- function(beta) { 35 | jacobian <- function(theta) { 36 | k <- length(theta) 37 | etheta <- exp(theta) 38 | mat <- matrix(0, k, k) 39 | mat[, 1] <- rep(1, k) 40 | for (i in 2:k) mat[i:k, i] <- etheta[i] 41 | mat 42 | } 43 | theta <- beta[pc + 1:q] 44 | gamm <- c(-100, cumsum(c(theta[1], exp(theta[-1]))), 45 | 100) 46 | eta <- offset 47 | if (pc > 0) 48 | eta <- eta + drop(x %*% beta[1:pc]) 49 | pr <- pfun(gamm[y + 1] - eta) - pfun(gamm[y] - eta) 50 | p1 <- dfun(gamm[y + 1] - eta) 51 | p2 <- dfun(gamm[y] - eta) 52 | g1 <- if (pc > 0) 53 | t(x) %*% (wt * (p1 - p2)/pr) 54 | else numeric(0) 55 | xx <- .polrY1 * p1 - .polrY2 * p2 56 | g2 <- -t(xx) %*% (wt/pr) 57 | g2 <- t(g2) %*% jacobian(theta) 58 | if (pc > 0) 59 | g1 <- g1 - dt.deriv(beta[1:pc], prior.mean, prior.scale, 60 | prior.df, log = TRUE) 61 | if (all(pr > 0)) 62 | c(g1, g2) 63 | else rep(NA, pc + q) 64 | } 65 | m <- match.call(expand.dots = FALSE) 66 | mf <- match(c("formula", "data", "subset", "weights", "na.action", 67 | "etastart", "mustart", "offset"), names(m), 0) 68 | m <- m[c(1, mf)] 69 | m$drop.unused.levels <- drop.unused.levels 70 | method <- match.arg(method) 71 | 72 | ##### adjust prior.scale for probit #### 73 | if (method == "probit"){ 74 | prior.scale <- prior.scale*1.6 75 | } 76 | ################ 77 | 78 | for(jj in 1:length(prior.scale)){ 79 | if (prior.scale[jj] < min.prior.scale){ 80 | prior.scale[jj] <- min.prior.scale 81 | warning ("prior scale for variable ", jj, " set to min.prior.scale = ", min.prior.scale,"\n") 82 | } 83 | } 84 | 85 | 86 | 87 | 88 | pfun <- switch(method, logistic = plogis, probit = pnorm, 89 | cloglog = pgumbel, cauchit = pcauchy) 90 | dfun <- switch(method, logistic = dlogis, probit = dnorm, 91 | cloglog = dgumbel, cauchit = dcauchy) 92 | if (is.matrix(eval.parent(m$data))) 93 | m$data <- as.data.frame(data) 94 | m$start <- m$Hess <- m$method <- m$... <- NULL 95 | m[[1]] <- as.name("model.frame") 96 | m <- eval.parent(m) 97 | Terms <- attr(m, "terms") 98 | x <- model.matrix(Terms, m, contrasts) 99 | xint <- match("(Intercept)", colnames(x), nomatch = 0) 100 | n <- nrow(x) 101 | pc <- ncol(x) 102 | cons <- attr(x, "contrasts") 103 | if (xint > 0) { 104 | x <- x[, -xint, drop = FALSE] 105 | pc <- pc - 1 106 | } 107 | else warning("an intercept is needed and assumed") 108 | wt <- model.weights(m) 109 | if (!length(wt)) 110 | wt <- rep(1, n) 111 | offset <- model.offset(m) 112 | if (length(offset) <= 1) 113 | offset <- rep(0, n) 114 | y <- model.response(m) 115 | if (!is.factor(y)) 116 | stop("response must be a factor") 117 | lev <- levels(y) 118 | if (length(lev) <= 2) 119 | stop("response must have 3 or more levels") 120 | y <- unclass(y) 121 | q <- length(lev) - 1 122 | Y <- matrix(0, n, q) 123 | .polrY1 <- col(Y) == y 124 | .polrY2 <- col(Y) == y - 1 125 | if (missing(start)) { 126 | q1 <- length(lev)%/%2 127 | y1 <- (y > q1) 128 | X <- cbind(Intercept = rep(1, n), x) 129 | fit <- switch(method, 130 | logistic = bayesglm.fit(X, y1, 131 | wt, family = binomial(), offset = offset, intercept = TRUE, 132 | prior.mean = prior.mean, prior.scale = prior.scale, 133 | prior.df = prior.df, prior.mean.for.intercept = 0, 134 | prior.scale.for.intercept = 10, prior.df.for.intercept = 1, 135 | min.prior.scale = min.prior.scale, 136 | scaled = scaled, control = glm.control(maxit=maxit), 137 | print.unnormalized.log.posterior = print.unnormalized.log.posterior), 138 | probit = bayesglm.fit(X, y1, wt, family = binomial("probit"), 139 | offset = offset, intercept = TRUE, prior.mean = prior.mean, 140 | prior.scale = prior.scale, prior.df = prior.df, 141 | prior.mean.for.intercept = 0, prior.scale.for.intercept = 10, 142 | prior.df.for.intercept = 1, 143 | min.prior.scale = min.prior.scale, 144 | scaled = scaled, 145 | control = glm.control(maxit=maxit), print.unnormalized.log.posterior = print.unnormalized.log.posterior), 146 | cloglog = bayesglm.fit(X, y1, wt, family = binomial("probit"), 147 | offset = offset, intercept = TRUE, prior.mean = prior.mean, 148 | prior.scale = prior.scale, prior.df = prior.df, 149 | prior.mean.for.intercept = 0, prior.scale.for.intercept = 10, 150 | prior.df.for.intercept = 1, 151 | min.prior.scale = min.prior.scale, 152 | scaled = scaled, 153 | control = glm.control(maxit=maxit), print.unnormalized.log.posterior = print.unnormalized.log.posterior), 154 | cauchit = bayesglm.fit(X, y1, wt, family = binomial("cauchit"), 155 | offset = offset, intercept = TRUE, prior.mean = prior.mean, 156 | prior.scale = prior.scale, prior.df = prior.df, 157 | prior.mean.for.intercept = 0, prior.scale.for.intercept = 10, 158 | prior.df.for.intercept = 1, 159 | min.prior.scale = min.prior.scale, 160 | scaled = scaled, 161 | control = glm.control(maxit=maxit), print.unnormalized.log.posterior = print.unnormalized.log.posterior)) 162 | if (!fit$converged) 163 | warning("attempt to find suitable starting values failed") 164 | coefs <- fit$coefficients 165 | if (any(is.na(coefs))) { 166 | warning("design appears to be rank-deficient, so dropping some coefs") 167 | keep <- names(coefs)[!is.na(coefs)] 168 | coefs <- coefs[keep] 169 | x <- x[, keep[-1], drop = FALSE] 170 | pc <- ncol(x) 171 | } 172 | spacing <- logit((1:q)/(q + 1)) 173 | if (method != "logistic") 174 | spacing <- spacing/1.7 175 | gammas <- -coefs[1] + spacing - spacing[q1] 176 | thetas <- c(gammas[1], log(diff(gammas))) 177 | start <- c(coefs[-1], thetas) 178 | } 179 | # rep start to have the same length of coef + zeta 180 | else if (length(start)==1){ 181 | start <- rep(start, (pc+q)) 182 | } 183 | else if (length(start) != pc + q) 184 | stop("'start' is not of the correct length") 185 | 186 | J <- NCOL(x) 187 | 188 | # SU: if no x's, no priors for coefs 2008.2.9 189 | if (xint>1) { 190 | if (length(prior.mean) == 1) 191 | prior.mean <- rep(prior.mean, J) 192 | if (length(prior.scale) == 1) { 193 | prior.scale <- rep(prior.scale, J) 194 | if (scaled == TRUE) { 195 | for (j in 1:J) { 196 | x.obs <- x[, j] 197 | x.obs <- x.obs[!is.na(x.obs)] 198 | num.categories <- length(unique(x.obs)) 199 | if (num.categories == 2) { 200 | prior.scale[j] <- prior.scale[j]/(max(x.obs) - min(x.obs)) 201 | } 202 | else if (num.categories > 2) { 203 | prior.scale[j] <- prior.scale[j]/(2 * sd(x.obs)) 204 | } 205 | } 206 | } 207 | } 208 | if (length(prior.df) == 1) { 209 | prior.df <- rep(prior.df, J) 210 | } 211 | } 212 | 213 | # prior for intercept sum(priors.intercpet)=1 214 | if (is.null(prior.counts.for.bins)) { 215 | prior.counts.for.bins <- 1/(q+1) 216 | } 217 | if (length(prior.counts.for.bins) == 1) { 218 | prior.counts.for.bins <- rep(prior.counts.for.bins, q+1) 219 | } 220 | # Augment the data to add prior information 221 | y.0 <- y 222 | Y.0 <- Y 223 | x.0 <- x 224 | wt.0 <- wt 225 | offset.0 <- offset 226 | .polrY1.0 <- .polrY1 227 | .polrY2.0 <- .polrY2 228 | y <- c (y.0, 1:(q+1)) 229 | Y <- matrix(0, n+q+1, q) 230 | .polrY1 <- col(Y) == y 231 | .polrY2 <- col(Y) == y - 1 232 | x <- rbind (x.0, matrix (colMeans(x.0), nrow=(q+1), ncol=J, byrow=TRUE)) 233 | wt <- c (wt.0, prior.counts.for.bins) 234 | offset <- c (offset, rep(0,q+1)) 235 | # Fit the model as before 236 | res <- optim(start, fmin, gmin, method = "BFGS", hessian = Hess, ...) 237 | # Restore the old variables 238 | y <- y.0 239 | Y <- Y.0 240 | x <- x.0 241 | wt <- wt.0 242 | offset <- offset.0 243 | .polrY1 <- .polrY1.0 244 | .polrY2 <- .polrY2.0 245 | # Continue on as before 246 | beta <- res$par[seq_len(pc)] 247 | theta <- res$par[pc + 1:q] 248 | zeta <- cumsum(c(theta[1], exp(theta[-1]))) 249 | deviance <- 2 * res$value 250 | niter <- c(f.evals = res$counts[1], g.evals = res$counts[2]) 251 | names(zeta) <- paste(lev[-length(lev)], lev[-1], sep = "|") 252 | if (pc > 0) { 253 | names(beta) <- colnames(x) 254 | eta <- drop(x %*% beta) 255 | } 256 | else { 257 | eta <- rep(0, n) 258 | } 259 | cumpr <- matrix(pfun(matrix(zeta, n, q, byrow = TRUE) - eta), , q) 260 | fitted <- t(apply(cumpr, 1, function(x) diff(c(0, x, 1)))) 261 | dimnames(fitted) <- list(row.names(m), lev) 262 | fit <- list(coefficients = beta, zeta = zeta, deviance = deviance, 263 | fitted.values = fitted, lev = lev, terms = Terms, df.residual = sum(wt) - 264 | pc - q, edf = pc + q, n = sum(wt), nobs = sum(wt), 265 | call = match.call(), method = method, convergence = res$convergence, 266 | prior.mean = prior.mean, prior.scale = prior.scale, prior.df = prior.df, 267 | prior.counts.for.bins = prior.counts.for.bins, niter = niter) 268 | if (Hess) { 269 | dn <- c(names(beta), names(zeta)) 270 | H <- res$hessian 271 | dimnames(H) <- list(dn, dn) 272 | fit$Hessian <- H 273 | } 274 | if (model){ 275 | fit$model <- m 276 | } 277 | fit$na.action <- attr(m, "na.action") 278 | fit$contrasts <- cons 279 | fit$xlevels <- .getXlevels(Terms, m) 280 | class(fit) <- c("bayespolr", "polr") 281 | fit 282 | } 283 | 284 | setMethod("print", signature(x = "bayespolr"), 285 | function(x, digits= 2) display(object=x, digits=digits)) 286 | setMethod("show", signature(object = "bayespolr"), 287 | function(object) display(object, digits=2)) 288 | -------------------------------------------------------------------------------- /man/bayesglm.Rd: -------------------------------------------------------------------------------- 1 | \name{bayesglm} 2 | \docType{class} 3 | % Classes 4 | \alias{bayesglm-class} 5 | %\alias{bayesglm.h-class} 6 | % functions 7 | \alias{bayesglm} 8 | \alias{bayesglm.fit} 9 | % display methods 10 | \alias{print,bayesglm-method} 11 | %\alias{print,bayesglm.h-method} 12 | \alias{show,bayesglm-method} 13 | %\alias{show,bayesglm.h-method} 14 | \alias{predict.bayesglm} 15 | 16 | \title{Bayesian generalized linear models.} 17 | 18 | \description{Bayesian functions for generalized linear modeling 19 | with independent normal, t, or Cauchy prior distribution 20 | for the coefficients.} 21 | 22 | \usage{ 23 | bayesglm (formula, family = gaussian, data, 24 | weights, subset, na.action, 25 | start = NULL, etastart, mustart, 26 | offset, control = list(...), 27 | model = TRUE, method = "glm.fit", 28 | x = FALSE, y = TRUE, contrasts = NULL, 29 | drop.unused.levels = TRUE, 30 | prior.mean = 0, 31 | prior.scale = NULL, 32 | prior.df = 1, 33 | prior.mean.for.intercept = 0, 34 | prior.scale.for.intercept = NULL, 35 | prior.df.for.intercept = 1, 36 | min.prior.scale=1e-12, 37 | scaled = TRUE, keep.order=TRUE, 38 | drop.baseline=TRUE, 39 | maxit=100, 40 | print.unnormalized.log.posterior=FALSE, 41 | Warning=TRUE,...) 42 | 43 | bayesglm.fit (x, y, weights = rep(1, nobs), 44 | start = NULL, etastart = NULL, 45 | mustart = NULL, offset = rep(0, nobs), family = gaussian(), 46 | control = list(), intercept = TRUE, 47 | prior.mean = 0, 48 | prior.scale = NULL, 49 | prior.df = 1, 50 | prior.mean.for.intercept = 0, 51 | prior.scale.for.intercept = NULL, 52 | prior.df.for.intercept = 1, 53 | min.prior.scale=1e-12, scaled = TRUE, 54 | print.unnormalized.log.posterior=FALSE, Warning=TRUE) 55 | } 56 | 57 | \arguments{ 58 | \item{formula}{a symbolic description of the model to be fit. 59 | The details of model specification are given below.} 60 | 61 | \item{family}{a description of the error distribution and link 62 | function to be used in the model. This can be a character string 63 | naming a family function, a family function or the result of a call 64 | to a family function. (See \code{\link{family}} for details of 65 | family functions.)} 66 | 67 | \item{data}{an optional data frame, list or environment (or object 68 | coercible by \code{\link{as.data.frame}} to a data frame) containing 69 | the variables in the model. If not found in \code{data}, the 70 | variables are taken from \code{environment(formula)}, 71 | typically the environment from which \code{glm} is called.} 72 | 73 | \item{weights}{an optional vector of weights to be used in the fitting 74 | process. Should be \code{NULL} or a numeric vector.} 75 | 76 | \item{subset}{an optional vector specifying a subset of observations 77 | to be used in the fitting process.} 78 | 79 | \item{na.action}{a function which indicates what should happen 80 | when the data contain \code{NA}s. The default is set by 81 | the \code{na.action} setting of \code{\link{options}}, and is 82 | \code{\link{na.fail}} if that is unset. The \dQuote{factory-fresh} 83 | default is \code{\link{na.omit}}. Another possible value is 84 | \code{NULL}, no action. Value \code{\link{na.exclude}} can be useful.} 85 | 86 | \item{start}{starting values for the parameters in the linear predictor.} 87 | 88 | \item{etastart}{starting values for the linear predictor.} 89 | 90 | \item{mustart}{starting values for the vector of means.} 91 | 92 | \item{offset}{this can be used to specify an \emph{a priori} 93 | known component to be included in the linear predictor 94 | during fitting. This should be \code{NULL} or a numeric vector of 95 | length either one or equal to the number of cases. 96 | One or more \code{\link{offset}} terms can be included in the 97 | formula instead or as well, and if both are specified their sum is 98 | used. See \code{\link{model.offset}}.} 99 | 100 | \item{control}{a list of parameters for controlling the fitting 101 | process. See the documentation for \code{\link{glm.control}} 102 | for details.} 103 | 104 | \item{model}{a logical value indicating whether \emph{model frame} 105 | should be included as a component of the returned value.} 106 | 107 | \item{method}{the method to be used in fitting the model. 108 | The default method \code{"glm.fit"} uses iteratively reweighted 109 | least squares (IWLS). The only current alternative is 110 | \code{"model.frame"} which returns the model frame and does no fitting.} 111 | 112 | \item{x, y}{For \code{glm}: 113 | logical values indicating whether the response vector and model 114 | matrix used in the fitting process should be returned as components 115 | of the returned value. 116 | 117 | For \code{glm.fit}: \code{x} is a design matrix of dimension \code{n 118 | * p}, and \code{y} is a vector of observations of length \code{n}. 119 | } 120 | 121 | \item{contrasts}{an optional list. See the \code{contrasts.arg} 122 | of \code{model.matrix.default}.} 123 | 124 | \item{drop.unused.levels}{default TRUE, if FALSE, it interpolates the 125 | intermediate values if the data have integer levels.} 126 | 127 | \item{intercept}{logical. Should an intercept be included in the 128 | \emph{null} model?} 129 | 130 | \item{prior.mean}{prior mean for the coefficients: default is 0. Can be a vector 131 | of length equal to the number of predictors 132 | (not counting the intercept, if any). If it is a scalar, it is 133 | expanded to the length of this vector.} 134 | 135 | \item{prior.scale}{prior scale for the coefficients: default is NULL; if is NULL, for 136 | a logit model, prior.scale is 2.5; for a probit model, prior scale is 2.5*1.6. 137 | Can be a vector of length equal to the number of predictors 138 | (not counting the intercept, if any). If it is a scalar, it is 139 | expanded to the length of this vector.} 140 | 141 | \item{prior.df}{prior degrees of freedom for the coefficients. 142 | For t distribution: default is 1 (Cauchy). Set to Inf to 143 | get normal prior distributions. Can be a vector of length equal to 144 | the number of predictors (not counting the intercept, if any). 145 | If it is a scalar, it is expanded to the length of this vector.} 146 | 147 | \item{prior.mean.for.intercept}{prior mean for the intercept: default 148 | is 0. See \sQuote{Details}.} 149 | 150 | \item{prior.scale.for.intercept}{prior scale for the intercept: default is NULL; for 151 | a logit model, prior scale for intercept is 10; 152 | for probit model, prior scale for intercept is rescaled as 10*1.6.} 153 | 154 | \item{prior.df.for.intercept}{prior degrees of freedom for the intercept: default is 1.} 155 | 156 | \item{min.prior.scale}{Minimum prior scale for the coefficients: default is 1e-12.} 157 | 158 | \item{scaled}{scaled=TRUE, the scales for the prior distributions of the coefficients 159 | are determined as follows: For a predictor with only one value, 160 | we just use prior.scale. For a predictor with two values, 161 | we use prior.scale/range(x). For a predictor with more than two values, 162 | we use prior.scale/(2*sd(x)). If the response is Gaussian, prior.scale is also 163 | multiplied by 2 * sd(y). Default is TRUE} 164 | 165 | \item{keep.order}{a logical value indicating whether the terms should 166 | keep their positions. If \code{FALSE} the terms are reordered so 167 | that main effects come first, followed by the interactions, 168 | all second-order, all third-order and so on. Effects of a given 169 | order are kept in the order specified. Default is TRUE.} 170 | 171 | \item{drop.baseline}{Drop the base level of categorical x's, default is TRUE.} 172 | 173 | \item{maxit}{integer giving the maximal number of IWLS iterations, default is 100. This can also be controlled by \code{control}.} 174 | \item{print.unnormalized.log.posterior}{display the unnormalized log posterior likelihood for bayesglm, default=FALSE} 175 | \item{Warning}{default is TRUE, which will show the error messages of not convergence and separation.} 176 | \item{\dots}{further arguments passed to or from other methods.} 177 | } 178 | \details{ 179 | The program is a simple alteration of \code{glm()} that uses an approximate EM 180 | algorithm to update the betas at each step using an augmented regression 181 | to represent the prior information. 182 | 183 | We use Student-t prior distributions for the coefficients. The prior 184 | distribution for the constant term is set so it applies to the value 185 | when all predictors are set to their mean values. 186 | 187 | If scaled=TRUE, the scales for the prior distributions of the 188 | coefficients are determined as follows: For a predictor with only one 189 | value, we just use prior.scale. For a predictor with two values, we use 190 | prior.scale/range(x). For a predictor with more than two values, we use 191 | prior.scale/(2*sd(x)). 192 | 193 | We include all the \code{glm()} arguments but we haven't tested that all the 194 | options (e.g., \code{offsets}, \code{contrasts}, 195 | \code{deviance} for the null model) all work. 196 | 197 | The new arguments here are: \code{prior.mean}, \code{prior.scale}, 198 | \code{prior.scale.for.intercept}, \code{prior.df}, \code{prior.df.for.intercept}and 199 | \code{scaled}. 200 | } 201 | 202 | \value{See \code{\link[stats]{glm}} for details. 203 | 204 | \item{prior.mean}{prior means for the coefficients and the intercept.} 205 | \item{prior.scale}{prior scales for the coefficients} 206 | \item{prior.df}{prior dfs for the coefficients.} 207 | \item{prior.scale.for.intercept}{prior scale for the intercept} 208 | \item{prior.df.for.intercept}{prior df for the intercept} 209 | 210 | } 211 | 212 | \references{Andrew Gelman, Aleks Jakulin, Maria Grazia Pittau and Yu-Sung Su. (2009). 213 | \dQuote{A Weakly Informative Default Prior Distribution For 214 | Logistic And Other Regression Models.} 215 | \emph{The Annals of Applied Statistics} 2 (4): 1360--1383. 216 | \url{http://www.stat.columbia.edu/~gelman/research/published/priors11.pdf} 217 | } 218 | 219 | \author{Andrew Gelman \email{gelman@stat.columbia.edu}; 220 | Yu-Sung Su \email{suyusung@tsinghua.edu.cn}; 221 | Daniel Lee \email{bearlee@alum.mit.edu}; 222 | Aleks Jakulin \email{Jakulin@stat.columbia.edu}} 223 | 224 | \seealso{ 225 | \code{\link[stats]{glm}}, 226 | \code{\link{bayespolr}} 227 | } 228 | 229 | \examples{ 230 | n <- 100 231 | x1 <- rnorm (n) 232 | x2 <- rbinom (n, 1, .5) 233 | b0 <- 1 234 | b1 <- 1.5 235 | b2 <- 2 236 | y <- rbinom (n, 1, invlogit(b0+b1*x1+b2*x2)) 237 | 238 | M1 <- glm (y ~ x1 + x2, family=binomial(link="logit")) 239 | display (M1) 240 | 241 | M2 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit"), 242 | prior.scale=Inf, prior.df=Inf) 243 | display (M2) # just a test: this should be identical to classical logit 244 | 245 | M3 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit")) 246 | # default Cauchy prior with scale 2.5 247 | display (M3) 248 | 249 | M4 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit"), 250 | prior.scale=2.5, prior.df=1) 251 | # Same as M3, explicitly specifying Cauchy prior with scale 2.5 252 | display (M4) 253 | 254 | M5 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit"), 255 | prior.scale=2.5, prior.df=7) # t_7 prior with scale 2.5 256 | display (M5) 257 | 258 | M6 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit"), 259 | prior.scale=2.5, prior.df=Inf) # normal prior with scale 2.5 260 | display (M6) 261 | 262 | # Create separation: set y=1 whenever x2=1 263 | # Now it should blow up without the prior! 264 | 265 | y <- ifelse (x2==1, 1, y) 266 | 267 | M1 <- glm (y ~ x1 + x2, family=binomial(link="logit")) 268 | display (M1) 269 | 270 | M2 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit"), 271 | prior.scale=Inf, prior.scale.for.intercept=Inf) # Same as M1 272 | display (M2) 273 | 274 | M3 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit")) 275 | display (M3) 276 | 277 | M4 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit"), 278 | prior.scale=2.5, prior.scale.for.intercept=10) # Same as M3 279 | display (M4) 280 | 281 | M5 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit"), 282 | prior.scale=2.5, prior.df=7) 283 | display (M5) 284 | 285 | M6 <- bayesglm (y ~ x1 + x2, family=binomial(link="logit"), 286 | prior.scale=2.5, prior.df=Inf) 287 | display (M6) 288 | 289 | # bayesglm with gaussian family (bayes lm) 290 | sigma <- 5 291 | y2 <- rnorm (n, b0+b1*x1+b2*x2, sigma) 292 | M7 <- bayesglm (y2 ~ x1 + x2, prior.scale=Inf, prior.df=Inf) 293 | display (M7) 294 | 295 | 296 | # bayesglm with categorical variables 297 | z1 <- trunc(runif(n, 4, 9)) 298 | levels(factor(z1)) 299 | z2 <- trunc(runif(n, 15, 19)) 300 | levels(factor(z2)) 301 | 302 | ## drop the base level (R default) 303 | M8 <- bayesglm (y ~ x1 + factor(z1) + factor(z2), 304 | family=binomial(link="logit"), prior.scale=2.5, prior.df=Inf) 305 | display (M8) 306 | 307 | ## keep all levels with the intercept, keep the variable order 308 | M9 <- bayesglm (y ~ x1 + x1:x2 + factor(z1) + x2 + factor(z2), 309 | family=binomial(link="logit"), 310 | prior.mean=rep(0,12), 311 | prior.scale=rep(2.5,12), 312 | prior.df=rep(Inf,12), 313 | prior.mean.for.intercept=0, 314 | prior.scale.for.intercept=10, 315 | prior.df.for.intercept=1, 316 | drop.baseline=FALSE, keep.order=TRUE) 317 | display (M9) 318 | 319 | ## keep all levels without the intercept 320 | M10 <- bayesglm (y ~ x1 + factor(z1) + x1:x2 + factor(z2)-1, 321 | family=binomial(link="logit"), 322 | prior.mean=rep(0,11), 323 | prior.scale=rep(2.5,11), 324 | prior.df=rep(Inf,11), 325 | drop.baseline=FALSE) 326 | display (M10) 327 | 328 | } 329 | 330 | \keyword{models} 331 | \keyword{methods} 332 | \keyword{regression} 333 | -------------------------------------------------------------------------------- /R/display.R: -------------------------------------------------------------------------------- 1 | setMethod("display", signature(object = "lm"), 2 | function(object, digits=2, detail=FALSE) 3 | { 4 | out <- NULL 5 | out$call <- object$call 6 | summ <- summary (object) 7 | out$sigma.hat <- summ$sigma 8 | out$r.squared <- summ$r.squared 9 | if(detail){ 10 | coef <- summ$coef[,,drop=FALSE] 11 | } 12 | else{ 13 | coef <- summ$coef[,1:2,drop=FALSE] 14 | } 15 | dimnames(coef)[[2]][1:2] <- c("coef.est","coef.se") 16 | out$coef <- coef[,"coef.est"]#,drop=FALSE] 17 | out$se <- coef[,"coef.se"]#,drop=FALSE] 18 | out$t.value <- summ$coef[,3] 19 | out$p.value <- summ$coef[,4] 20 | out$n <- summ$df[1] + summ$df[2] 21 | out$k <- summ$df[1] 22 | print (out$call) 23 | pfround (coef, digits) 24 | cat("---\n") 25 | cat (paste ("n = ", out$n, ", k = ", out$k, 26 | "\nresidual sd = ", fround (out$sigma.hat, digits), 27 | ", R-Squared = ", fround (out$r.squared, 2), "\n", sep="")) 28 | return(invisible(out)) 29 | } 30 | ) 31 | 32 | 33 | 34 | setMethod("display", signature(object = "bayesglm"), 35 | function(object, digits=2, detail=FALSE) 36 | { 37 | out <- NULL 38 | out$call <- object$call 39 | summ <- summary(object, dispersion = object$dispersion) 40 | if(detail){ 41 | coef <- summ$coefficients 42 | coef[ rownames( coef ) %in% rownames( summ$coef[, , drop = FALSE]) , ] <- summ$coef[ , , drop = FALSE ] 43 | out$z.value <- coef[,3]#,drop=FALSE] 44 | out$p.value <- coef[,4]#,drop=FALSE] 45 | } 46 | else{ 47 | coef <- matrix( NA, length( object$coefficients ),2 ) 48 | rownames(coef) <- names( object$coefficients ) ## M 49 | coef[ rownames( coef ) %in% rownames( summ$coef[, 1:2, drop = FALSE]) , ] <- summ$coef[ , 1:2, drop = FALSE ] ## M 50 | } 51 | dimnames(coef)[[2]][1:2] <- c( "coef.est", "coef.se") 52 | out$coef <- coef[,"coef.est"]#,drop=FALSE] 53 | out$se <- coef[,"coef.se"]#,drop=FALSE] 54 | out$n <- summ$df[1] + summ$df[2] 55 | out$k <- summ$df[1] 56 | out$deviance <- summ$deviance 57 | out$null.deviance <- summ$null.deviance 58 | print(out$call) 59 | pfround(coef, digits) 60 | cat("---\n") 61 | cat(paste("n = ", out$n, ", k = ", out$k, "\nresidual deviance = ", 62 | fround(out$deviance, 1), ", null deviance = ", fround(out$null.deviance, 1), " (difference = ", fround(out$null.deviance - out$deviance, 1), ")", "\n", sep = "")) 63 | out$dispersion <- if (is.null(object$dispersion)){ 64 | summ$dispersion 65 | } else { 66 | object$dispersion 67 | } 68 | if (out$dispersion != 1) { 69 | out$overdispersion.parameter <- out$dispersion 70 | cat(paste("overdispersion parameter = ", fround(out$dispersion, 1), "\n", sep = "")) 71 | if (family(object)$family == "gaussian") { 72 | out$sigma.hat <- sqrt(out$dispersion) 73 | cat(paste("residual sd is sqrt(overdispersion) = ", fround(out$sigma.hat, digits), "\n", sep = "")) 74 | } 75 | } 76 | return(invisible(out)) 77 | } 78 | ) 79 | 80 | #setMethod("display", signature(object = "bayesglm.h"), 81 | # function (object, digits = 2, detail = FALSE) 82 | # { 83 | # call <- object$call 84 | # summ <- summary(object, dispersion = object$dispersion) 85 | # if(detail){ 86 | # coef <- summ$coefficients 87 | # coef[ rownames( coef ) %in% rownames( summ$coef[, , drop = FALSE]) , ] <- summ$coef[ , , drop = FALSE ] 88 | # } 89 | # else{ 90 | # coef <- matrix( NA, length( object$coefficients ),2 ) 91 | # rownames(coef) <- names( object$coefficients ) ## M 92 | # coef[ rownames( coef ) %in% rownames( summ$coef[, 1:2, drop = FALSE]) , ] <- summ$coef[ , 1:2, drop = FALSE ] ## M 93 | # } 94 | # dimnames(coef)[[2]][1:2] <- c( "coef.est", "coef.se") 95 | # #n <- summ$df[1] + summ$df[2] 96 | # n <- summ$df.residual 97 | # k <- summ$df[1] 98 | # print(call) 99 | # if(max(object$batch)>0){ 100 | # nn<- strsplit( rownames( coef )[seq( from= length( object$batch ) + 1 ,to = nrow( coef ))], "." , fixed=TRUE) 101 | # bb<- c( object$batch,unlist( lapply (nn , function( lst ) { lst[[3]] } ) ) ) 102 | # } 103 | # else {bb<- c( object$batch)} 104 | # cc<- cbind( fround( coef, digits ), bb ) 105 | # dimnames(cc)[[2]][3]<-"batch" 106 | # print( cc , quote = FALSE ) 107 | # cat("---\n") 108 | # cat(paste("n = ", n, ", k = ", k, "\nresidual deviance = ", 109 | # fround(summ$deviance, 1), ", null deviance = ", fround(summ$null.deviance, 110 | # 1), " (difference = ", fround(summ$null.deviance - 111 | # summ$deviance, 1), ")", "\n", sep = "")) 112 | # dispersion <- if (is.null(object$dispersion)) 113 | # summ$dispersion 114 | # else object$dispersion 115 | # if (dispersion != 1) { 116 | # cat(paste("overdispersion parameter = ", fround(dispersion, 117 | # 1), "\n", sep = "")) 118 | # if (family(object)$family == "gaussian") { 119 | # cat(paste("residual sd is sqrt(overdispersion) = ", 120 | # fround(sqrt(dispersion), digits), "\n", sep = "")) 121 | # cat(paste("group sd is sigma.batch = ", 122 | # fround(object$sigma.batch, digits), "\n", sep = "")) 123 | # } 124 | # } 125 | # } 126 | #) 127 | 128 | 129 | 130 | 131 | setMethod("display", signature(object = "glm"), 132 | function(object, digits=2, detail=FALSE) 133 | { 134 | out <- NULL 135 | out$call <- object$call 136 | summ <- summary(object, dispersion = object$dispersion) 137 | if(detail){ 138 | coef <- summ$coef[, , drop = FALSE] 139 | out$z.value <- coef[,3]#,drop=FALSE] 140 | out$p.value <- coef[,4]#,drop=FALSE] 141 | } 142 | else{ 143 | coef <- summ$coef[, 1:2, drop = FALSE] 144 | } 145 | dimnames(coef)[[2]][1:2] <- c("coef.est", "coef.se") 146 | out$n <- summ$df[1] + summ$df[2] 147 | out$k <- summ$df[1] 148 | out$coef <- coef[,"coef.est"] 149 | out$se <- coef[,"coef.se"] 150 | print(out$call) 151 | pfround(coef, digits) 152 | out$deviance <- summ$deviance 153 | out$null.deviance <- summ$null.deviance 154 | cat("---\n") 155 | cat(paste(" n = ", out$n, ", k = ", out$k, "\n residual deviance = ", 156 | fround(out$deviance, 1), ", null deviance = ", fround(out$null.deviance, 1), " (difference = ", fround(summ$null.deviance - summ$deviance, 1), ")", "\n", sep = "")) 157 | out$dispersion <- if (is.null(object$dispersion)){ 158 | summ$dispersion 159 | } else { 160 | object$dispersion 161 | } 162 | if (out$dispersion != 1) { 163 | cat(paste(" overdispersion parameter = ", fround(out$dispersion, 1), "\n", sep = "")) 164 | if (family(object)$family=="gaussian") { 165 | out$sigma.hat <- sqrt(out$dispersion) 166 | cat(paste(" residual sd is sqrt(overdispersion) = ", 167 | fround(out$sigma.hat, digits), "\n", sep = "")) 168 | } 169 | } 170 | return(invisible(out)) 171 | } 172 | ) 173 | 174 | 175 | 176 | 177 | #setMethod("display", signature(object = "mer"), 178 | # function(object, digits=2) 179 | # { 180 | # call <- object@call 181 | # print (call) 182 | # #object <- summary(object) 183 | # fcoef <- fixef(object) 184 | # useScale <- attr( VarCorr(object), "sc") 185 | # corF <- vcov(object)@factors$correlation 186 | # coefs <- cbind(fcoef, corF@sd) 187 | # if (length (fcoef) > 0){ 188 | # dimnames(coefs) <- list(names(fcoef), c("coef.est", "coef.se")) 189 | # pfround (coefs, digits) 190 | # } 191 | # cat("\nError terms:\n") 192 | # vc <- as.matrix.VarCorr (VarCorr (object), useScale=useScale, digits) 193 | # print (vc[,c(1:2,4:ncol(vc))], quote=FALSE) 194 | # ngrps <- lapply(object@flist, function(x) length(levels(x))) 195 | # REML <- object@status["REML"] 196 | # llik <- logLik(object, REML) 197 | # AIC <- AIC(llik) 198 | # dev <- object@deviance["ML"] # Dbar 199 | # n <- object@devComp["n"] 200 | # Dhat <- -2*(llik) # Dhat 201 | # pD <- dev - Dhat # pD 202 | # DIC <- dev + pD # DIC=Dbar+pD=Dhat+2pD 203 | # cat("---\n") 204 | # cat(sprintf("number of obs: %d, groups: ", n)) 205 | # cat(paste(paste(names(ngrps), ngrps, sep = ", "), collapse = "; ")) 206 | # cat(sprintf("\nAIC = %g, DIC = ", fround(AIC, 1))) 207 | # cat(fround(DIC, 1)) 208 | # cat("\ndeviance =", fround (dev, 1), "\n") 209 | # if (useScale < 0){ 210 | # cat("overdispersion parameter =", fround (.Call("mer_sigma", 211 | # object, FALSE, PACKAGE = "lme4"), 1), "\n") 212 | # } 213 | # } 214 | #) 215 | 216 | 217 | 218 | setMethod("display", signature(object = "merMod"), 219 | function(object, digits=2, detail=FALSE) 220 | { 221 | out <- NULL 222 | out$call <- object@call 223 | print (out$call) 224 | #object <- summary(object) 225 | #summ <- summary(object) 226 | fcoef <- fixef(object) 227 | #coefs <- attr(summ, "coefs") 228 | #useScale <- attr (VarCorr (object), "sc") 229 | useScale <- getME(object, "devcomp")$dims["useSc"] 230 | corF <- vcov(object)@factors$correlation 231 | coefs <- cbind(fcoef, corF@sd) 232 | if (length (fcoef) > 0){ 233 | if (!useScale) { 234 | coefs <- coefs[, 1:2, drop = FALSE] 235 | out$z.value <- coefs[, 1]/coefs[, 2] 236 | out$p.value <- 2 * pnorm(abs(out$z.value), lower.tail = FALSE) 237 | coefs <- cbind(coefs, `z value` = out$z.value, `Pr(>|z|)` = out$p.value) 238 | } 239 | else { 240 | out$t.value <- coefs[, 1]/coefs[, 2] 241 | coefs <- cbind(coefs, `t value` = out$t.value) 242 | } 243 | dimnames(coefs)[[2]][1:2] <- c("coef.est", "coef.se") 244 | if(detail){ 245 | pfround (coefs, digits) 246 | } 247 | else{ 248 | pfround(coefs[,1:2], digits) 249 | } 250 | } 251 | out$coef <- coefs[,"coef.est"] 252 | out$se <- coefs[,"coef.se"] 253 | cat("\nError terms:\n") 254 | vc <- as.matrix.VarCorr (VarCorr (object), useScale=useScale, digits=digits) 255 | print (vc[,c(1:2,4:ncol(vc))], quote=FALSE) 256 | out$ngrps <- lapply(object@flist, function(x) length(levels(x))) 257 | is_REML <- isREML(object) 258 | llik <- logLik(object, REML=is_REML) 259 | out$AIC <- AIC(llik) 260 | out$deviance <- deviance(refitML(object)) # Dbar 261 | out$n <- getME(object, "devcomp")$dims["n"] 262 | Dhat <- -2*(llik) # Dhat 263 | pD <- out$deviance - Dhat # pD 264 | out$DIC <- out$deviance + pD # DIC=Dbar+pD=Dhat+2pD 265 | cat("---\n") 266 | cat(sprintf("number of obs: %d, groups: ", out$n)) 267 | cat(paste(paste(names(out$ngrps), out$ngrps, sep = ", "), collapse = "; ")) 268 | cat(sprintf("\nAIC = %g, DIC = ", round(out$AIC,1))) 269 | cat(round(out$DIC, 1)) 270 | cat("\ndeviance =", fround (out$deviance, 1), "\n") 271 | if (useScale < 0){ 272 | out$sigma.hat <- .Call("mer_sigma", object, FALSE, PACKAGE = "lme4") 273 | cat("overdispersion parameter =", fround (out$sigma.hat, 1), "\n") 274 | } 275 | return(invisible(out)) 276 | } 277 | ) 278 | 279 | 280 | 281 | setMethod("display", signature(object = "polr"), 282 | function(object, digits=2, detail=FALSE) 283 | { 284 | out <- NULL 285 | out$call <- object$call 286 | summ <- summary(object) 287 | if(detail){ 288 | coef <- summ$coef[, , drop = FALSE] 289 | out$t.value <- coef[,"t value"] 290 | } 291 | else{ 292 | coef <- summ$coef[, 1:2, drop = FALSE] 293 | } 294 | dimnames(coef)[[2]][1:2] <- c("coef.est", "coef.se") 295 | out$coef <- coef[,"coef.est"] 296 | out$se <- coef[,"coef.se"] 297 | out$n <- summ$n 298 | out$k <- nrow (coef) 299 | out$k.intercepts <- length (summ$zeta) 300 | print(out$call) 301 | pfround(coef, digits) 302 | cat("---\n") 303 | cat(paste("n = ", out$n, ", k = ", out$k, " (including ", out$k.intercepts, 304 | " intercepts)\nresidual deviance = ", 305 | fround(deviance(object), 1), 306 | ", null deviance is not computed by polr", 307 | "\n", sep = "")) 308 | #cat("AIC:", fround(AIC(object), 1), "\n") 309 | return(invisible(out)) 310 | } 311 | ) 312 | 313 | 314 | setMethod("display", signature(object = "svyglm"), 315 | function(object, digits=2, detail=FALSE) 316 | { 317 | out <- NULL 318 | out$call <- object$call 319 | out$survey.design <- object$survey.design 320 | summ <- summary(object) 321 | if(detail){ 322 | coef <- summ$coef[, , drop = FALSE] 323 | out$z.value <- coef[,3]#,drop=FALSE] 324 | out$p.value <- coef[,4]#,drop=FALSE] 325 | } 326 | else{ 327 | coef <- summ$coef[, 1:2, drop = FALSE] 328 | } 329 | dimnames(coef)[[2]][1:2] <- c("coef.est", "coef.se") 330 | out$n <- summ$df[1] + summ$df[2] 331 | out$k <- summ$df[1] 332 | out$coef <- coef[,"coef.est"] 333 | out$se <- coef[,"coef.se"] 334 | print(out$call) 335 | cat("\n") 336 | print(out$survey.design) 337 | cat("\n") 338 | pfround(coef, digits) 339 | out$deviance <- summ$deviance 340 | out$null.deviance <- summ$null.deviance 341 | cat("---\n") 342 | cat(paste(" n = ", out$n, ", k = ", out$k, "\n residual deviance = ", 343 | fround(out$deviance, 1), ", null deviance = ", fround(out$null.deviance, 1), " (difference = ", fround(summ$null.deviance - summ$deviance, 1), ")", "\n", sep = "")) 344 | out$dispersion <- summ$dispersion[1] 345 | if (out$dispersion != 1) { 346 | cat(paste(" overdispersion parameter = ", fround(out$dispersion, 1), "\n", sep = "")) 347 | if (family(object)$family=="gaussian") { 348 | out$sigma.hat <- sqrt(out$dispersion) 349 | cat(paste(" residual sd is sqrt(overdispersion) = ", 350 | fround(out$sigma.hat, digits), "\n", sep = "")) 351 | } 352 | } 353 | return(invisible(out)) 354 | } 355 | ) 356 | 357 | 358 | #setMethod("display", signature(object = "bayespolr"), 359 | # function(object, digits=2) 360 | # { 361 | # call <- object$call 362 | # summ <- summary(object) 363 | # coef <- summ$coef[, 1:2, drop = FALSE] 364 | # dimnames(coef)[[2]] <- c("coef.est", "coef.se") 365 | # n <- summ$n # or maybe should be "nobs", I don't know for sure 366 | # k <- nrow (coef) 367 | # k.intercepts <- length (summ$zeta) 368 | # print(call) 369 | # pfround(coef, digits) 370 | # cat("---\n") 371 | # cat(paste("n = ", n, ", k = ", k, " (including ", k.intercepts, 372 | # " intercepts)\nresidual deviance = ", 373 | # fround(summ$deviance, 1), 374 | # ", null deviance is not computed by bayespolr", 375 | # "\n", sep = "")) 376 | # } 377 | #) 378 | --------------------------------------------------------------------------------