├── DESCRIPTION ├── NAMESPACE ├── R ├── boot2lme.R ├── svy2lme.R ├── svy2relmat.R └── svylmeNG.R ├── README.md ├── THANKS ├── data ├── milk_subset.rda ├── nzmaths.rda └── pisa.rda ├── inst ├── COPYRIGHTS └── scripts │ ├── README │ ├── milk-sampling.R │ ├── pairwise-milk.R │ ├── pisa-analysis.R │ ├── svy2lmesim2.R │ ├── twins-pairwise.R │ └── twins-sampling.R └── man ├── boot2lme.Rd ├── milk_subset.Rd ├── nzmaths.Rd ├── pisa.Rd ├── svy2lme.Rd └── svy2relmer.Rd /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: svylme 2 | Title: Linear Mixed Models for Complex Survey Data 3 | Version: 1.5-2 4 | Authors@R: person("Thomas", "Lumley", email = "t.lumley@auckland.ac.nz", 5 | role = c("aut", "cre")) 6 | Description: Linear mixed models for complex survey data, by pairwise composite likelihood, as described in Lumley & Huang (2023) . Supports nested and crossed random effects, and correlated random effects as in genetic models. Allows for multistage sampling and for other designs where pairwise sampling probabilities are specified or can be calculated. 7 | Imports: minqa, Matrix, lme4, methods, utils, stats 8 | Depends: survey, R (>= 3.5.0) 9 | License: GPL-3 10 | Maintainer: Thomas Lumley 11 | 12 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | import(Matrix) 2 | import(methods) 3 | import(minqa) 4 | import(survey) 5 | import(lme4) 6 | importFrom(utils,setTxtProgressBar) 7 | importFrom(utils,txtProgressBar) 8 | importFrom("stats", "ave", "model.frame", "pnorm", "weights") 9 | importFrom("stats", "printCoefmat") 10 | importFrom("stats", "vcov") 11 | export(svy2lme,svy2relmer) 12 | export(boot2lme) 13 | 14 | S3method(print,svy2lme) 15 | S3method(print, boot2lme) 16 | 17 | S3method(coef,svy2lme) 18 | 19 | S3method(vcov,svy2lme) 20 | S3method(vcov,boot2lme) 21 | 22 | 23 | -------------------------------------------------------------------------------- /R/boot2lme.R: -------------------------------------------------------------------------------- 1 | 2 | boot2lme<-function(model, rdesign, verbose=FALSE){ 3 | 4 | if(is.null(model$devfun)) stop("model must be fitted with return.devfun=TRUE") 5 | 6 | naa<-environment(model$devfun)$naa 7 | if (!is.null(naa)){ 8 | if (length(environment(model$devfun)$y)+length(naa) == NROW(rdesign)) 9 | rdesign<-rdesign[-naa,,drop=FALSE] 10 | if(verbose) warning(paste(length(naa),"observations dropped because of missing values")) 11 | } 12 | if (length(environment(model$devfun)$y) != NROW(rdesign)){ 13 | stop("number of rows of design does not match model") 14 | } 15 | 16 | basewts<-weights(rdesign, "sampling") 17 | replicates<-weights(rdesign, "analysis") 18 | scale<-rdesign$scale 19 | rscales<-rdesign$rscales 20 | 21 | 22 | nrep<-ncol(replicates) 23 | pwt0<-if (model$method=="nested") get("pwts",environment(model$devfun)) else get("pwt",environment(model$devfun)) 24 | if (is.null(rscales)) rscales<-rep(1,nrep) 25 | 26 | ii<-get("ii", environment(model$devfun)) 27 | jj<-get("jj", environment(model$devfun)) 28 | repwt<-(replicates/basewts)[ii,] 29 | repwtj<-(replicates/basewts)[jj,] 30 | if ((model$method=="nested") && (any(abs((repwt-repwtj)/(1+repwt+repwtj))>1e-5))) 31 | warning("replicate weights vary within cluster") 32 | else { 33 | repwt<-repwt*repwtj 34 | } 35 | 36 | theta0<-model$opt$par 37 | thetastar<-matrix(nrow=nrep,ncol=length(theta0)) 38 | betastar<-matrix(nrow=nrep,ncol=length(model$beta)) 39 | s2star<-numeric(nrep) 40 | 41 | D<-get("L",environment(model$devfun)) 42 | Dstar<-array(0,c(nrep,NROW(D),NCOL(D))) 43 | 44 | if (verbose) pb<-txtProgressBar(min = 0, max = nrep, style = 3) 45 | 46 | for(i in 1:nrep){ 47 | if (verbose) setTxtProgressBar(pb, i) 48 | if (model$method=="nested"){ 49 | thetastar[i,]<-bobyqa(theta0, model$devfun, 50 | lower = model$lower, 51 | upper = rep(Inf, length(theta0)), pwt=repwt[,i]*pwt0)$par 52 | } else { ## need to pass in univariate weights as well, for all.pairs. 53 | thetastar[i,]<-bobyqa(theta0, model$devfun, 54 | lower = model$lower, 55 | upper = rep(Inf, length(theta0)), 56 | pwt_new=repwt[,i]*pwt0, 57 | pw_uni_new=weights(rdesign,"analysis")[,i], 58 | subtract_margins=model$subtract.margins)$par 59 | } 60 | betastar[i,]<-get("beta",environment(model$devfun)) 61 | s2star[i]<-get("s2",environment(model$devfun)) 62 | Dstar[i,,]<-get("L",environment(model$devfun)) 63 | } 64 | 65 | if(verbose) close(pb) 66 | 67 | rval<-list(theta=thetastar, beta=betastar, s2=s2star, D=Dstar,scale=scale, rscales=rscales, formula=model$formula) 68 | 69 | class(rval)<-"boot2lme" 70 | rval 71 | } 72 | 73 | print.boot2lme<-function(x,...){ 74 | cat("boot2lme:",length(x$s2),"replicates from", deparse(x$formula)) 75 | invisible(x) 76 | } 77 | 78 | vcov.boot2lme<-function(object, parameter=c("beta","theta","s2","relSD","SD","relVar","fullVar"),...){ 79 | parameter<-match.arg(parameter) 80 | 81 | nthetas<-NCOL(object$theta) 82 | 83 | if (nthetas==1){ 84 | V<-switch(parameter, 85 | beta=svrVar(object$beta, object$scale,object$rscales), 86 | theta=svrVar(object$theta, object$scale,object$rscales), 87 | s2=svrVar(object$s2, object$scale, object$rscales), 88 | relSD=svrVar(sqrt((apply(object$D,1, diag))), object$scale, object$rscales), ##FIXME: dimension decay when there's just one random effect 89 | SD=svrVar(sqrt((apply(object$D,1, diag))*object$s2), object$scale, object$rscales), 90 | relVar=svrVar((apply(object$D,1,c)), object$scale, object$rscales), 91 | fullVar=svrVar((apply(object$D,1,c))*object$s2, object$scale, object$rscales) 92 | ) 93 | 94 | } else { 95 | V<-switch(parameter, 96 | beta=svrVar(object$beta, object$scale,object$rscales), 97 | theta=svrVar(object$theta, object$scale,object$rscales), 98 | s2=svrVar(object$s2, object$scale, object$rscales), 99 | relSD=svrVar(sqrt(t(apply(object$D,1, diag))), object$scale, object$rscales), ##FIXME: dimension decay when there's just one random effect 100 | SD=svrVar(sqrt(t(apply(object$D,1, diag))*object$s2), object$scale, object$rscales), 101 | relVar=svrVar(t(apply(object$D,1,c)), object$scale, object$rscales), 102 | fullVar=svrVar(t(apply(object$D,1,c))*object$s2, object$scale, object$rscales) 103 | ) 104 | } 105 | 106 | as.matrix(V) 107 | 108 | } 109 | -------------------------------------------------------------------------------- /R/svy2lme.R: -------------------------------------------------------------------------------- 1 | ## like all.equal only specialised to matrices and returns logical 2 | is_close<-function(a,b, tolerance=1e-5){ 3 | all(abs((as.matrix(a)-as.matrix(b))/(as.matrix(a)+as.matrix(b)))1) 40 | stop("you need weights/probabilities for each stage of sampling") 41 | 42 | if (NCOL(design$cluster)==1 && !any(duplicated(design$cluster))){ 43 | ## ok, element sampling 44 | if(is.null(design$fpc$popsize)) #with replacement 45 | return(list(full=design$prob[ii]*design$prob[jj], 46 | first=design$prob[ii], 47 | cond=rep(1,length(ii)))) 48 | else if(is_close(as.vector(design$allprob), 49 | as.vector(design$fpc$sampsize/design$fpc$popsize),tolerance=1e-4)){ 50 | # srs, possibly stratified 51 | n<-design$fpc$sampsize 52 | N<-design$fpc$popsize 53 | return(list(full= n[ii]*(n[jj]-1)%//%( N[ii]*(N[jj]-1)), 54 | first=n[ii]/N[ii], 55 | cond=rep(1,length(ii)))) 56 | } else { 57 | ## Hajek high entropy: based on Brewer p153, equation 9.14 58 | pi<-design$allprob 59 | denom<-ave(1-pi, design$strata,FUN=sum) 60 | samestrata<-(design$strata[ii]==design$strata[jj]) 61 | return(list(full=pi[ii]*pi[jj]*(1- ifelse(samestrata, (1-pi[ii])*(1-pi[jj])/denom, 0)), 62 | first=pi[ii], 63 | cond=rep(1,length(ii)))) 64 | } 65 | } else if (all(by(design$prob, design$cluster[,1], function(x) length(unique(x)))==1)) { 66 | ## possibly ok, sampling of whole clusters 67 | warning("assuming no subsampling within clusters because multi-stage weights were not given") 68 | if(is.null(design$fpc$popsize)) #with replacement 69 | return(list(full=design$prob[ii], 70 | first=design$prob[ii], 71 | cond=rep(1, length(ii)))) 72 | else if(is_close(as.vector(design$allprob[[1]]), 73 | as.vector(design$fpc$sampsize/design$fpc$popsize),tolerance=1e-4)){ 74 | # srs, possibly stratified 75 | n<-design$fpc$sampsize 76 | N<-design$fpc$popsize 77 | return(list(full= (n[ii]/N[ii]), 78 | first=n[ii]/N[ii], 79 | cond=rep(1,length(ii)))) 80 | } else { 81 | ## Hajek high entropy: based on Brewer p153, equation 9.14 82 | pi<-design$allprob 83 | denom<-ave(1-pi, design$strata,FUN=sum) 84 | samestrata<-(design$strata[ii,1]==design$strata[jj,1]) 85 | return(list(full=pi[ii,1], 86 | first=pi[ii,1], 87 | cond=rep(1,length(ii)))) 88 | } 89 | } else { 90 | ## not ok 91 | stop("you need weights/probabilities for each stage of sampling") 92 | } 93 | } 94 | 95 | ## If we're here, we have multistage weights 96 | if (ncol(design$allprob)!=ncol(design$cluster)){ 97 | ## ? can't happen 98 | stop("number of stages of sampling does not match number of stages of weights") 99 | } 100 | 101 | if(is.null(design$fpc$popsize)){ #with replacement 102 | last<-ncol(design$allprob) 103 | return(list(full=design$prob[ii]*design$allprob[jj,last], 104 | first=apply(design$allprob[ii,-last, drop=FALSE], 1, prod), 105 | cond=design$allprob[ii,last]*design$allprob[jj,last])) 106 | } 107 | if(all.equal(as.matrix(design$allprob), as.matrix(design$fpc$sampsize/design$fpc$popsize),tolerance=1e-4)){ 108 | ## multistage stratified random sampling 109 | last<-ncol(design$allprob) 110 | n<-design$fpc$sampsize 111 | N<-design$fpc$popsize 112 | samestrata<-(design$strata[ii, ]==design$strata[jj, ]) 113 | pstages <-(n[ii,]/N[ii,])*(samestrata*((n[jj,]-1)%//%(N[jj,]-1)) + (1-samestrata)*(n[jj,]/N[jj,])) ##FIXME divide by zero when N==1 114 | return(list(full=apply((n[ii,]/N[ii,])[,-last,drop=FALSE],1,prod)*pstages[,last], 115 | first=apply((n[ii,]/N[ii,])[,-last,drop=FALSE],1,prod), 116 | cond=pstages[,last])) 117 | } 118 | 119 | ## Hajek high entropy: Brewer p153 120 | first<-cpwt<-rep_len(1,length(ii)) 121 | for (i in 1:ncol(design$allprob)){ 122 | pi<-design$allprob[,i] 123 | denom<-ave(1-pi, design$strata[,i],FUN=sum) 124 | samestrata<-(design$strata[ii,i]==design$strata[jj,i]) 125 | if (i==ncol(design$allprob)) 126 | cpwt<-cpwt*pi[ii]*pi[jj]*(1- ifelse(samestrata, (1-pi[ii])*(1-pi[jj])/denom, 0)) 127 | else 128 | first<-first*pi[ii] 129 | } 130 | return(list(full=first*cpwt, first= first, cond=cpwt)) 131 | 132 | } 133 | 134 | getpairs<-function(gp, TOOBIG=1000){ 135 | n<-length(gp) 136 | if (n < TOOBIG){ 137 | ij<-outer(gp,gp,"==") 138 | ij<-ij & upper.tri(ij) 139 | return(which(ij,arr.ind=TRUE)) 140 | } 141 | 142 | ng<-ave(1:n, gp, FUN=length) 143 | j<-rep(1:n,ng) 144 | i<-numeric(length(j)) 145 | for (g in unique(gp)){ 146 | this<- which(gp[j]==g) 147 | i[this] <-which(gp==g) 148 | } 149 | data.frame(i=i[i1) 334 | stop("you need weights/probabilities for each stage of sampling") 335 | 336 | if (NCOL(design$cluster)==1 && !any(duplicated(design$cluster))){ 337 | ## ok, element sampling, can't be same PSU 338 | if(is.null(design$fpc$popsize)) #with replacement 339 | return(list(full=design$prob[ii]*design$prob[jj], 340 | first=design$prob[ii], 341 | cond=rep(1,length(ii)))) 342 | else if(is_close(as.vector(design$allprob), 343 | as.vector(design$fpc$sampsize/design$fpc$popsize),tolerance=1e-4)){ 344 | ## srs, possibly stratified 345 | n<-design$fpc$sampsize 346 | N<-design$fpc$popsize 347 | return(list(full= n[ii]*(n[jj]-1)%//%( N[ii]*(N[jj]-1)), 348 | first=n[ii]/N[ii], 349 | cond=rep(1,length(ii)))) 350 | } else { 351 | ## Hajek high entropy: based on Brewer p153, equation 9.14 352 | pi<-design$allprob 353 | denom<-ave(1-pi, design$strata,FUN=sum) 354 | samestrata<-(design$strata[ii]==design$strata[jj]) 355 | return(list(full=pi[ii]*pi[jj]*(1- ifelse(samestrata, (1-pi[ii])*(1-pi[jj])/denom, 0)), 356 | first=pi[ii], 357 | cond=rep(1,length(ii)))) 358 | } 359 | } else if (all(by(design$prob, design$cluster[,1], function(x) length(unique(x)))==1)) { 360 | ## possibly ok, sampling of whole PSUs 361 | warning("assuming no subsampling within PSUs because multi-stage weights were not given") 362 | 363 | samePSU<-design$cluster[ii,1]==design$cluster[jj,1] 364 | 365 | if(is.null(design$fpc$popsize)){ #with replacement 366 | return(list(full=ifelse(samePSU, design$prob[ii], design$prob[ii]*design$prob[jj]), 367 | first=design$prob[ii], 368 | cond=rep(1, length(ii)))) 369 | } else if(is_close(as.vector(design$allprob[[1]]), 370 | as.vector(design$fpc$sampsize/design$fpc$popsize),tolerance=1e-4)){ 371 | # srs, possibly stratified 372 | n<-design$fpc$sampsize 373 | N<-design$fpc$popsize 374 | return(list(full= ifelse(samePSU, (n[ii]/N[ii]),(n[ii]/N[ii])*(n[jj]/N[jj])), 375 | first=n[ii]/N[ii], 376 | cond=rep(1,length(ii)))) 377 | } else { 378 | ## Hajek high entropy: based on Brewer p153, equation 9.14 379 | pi<-design$allprob 380 | denom<-ave(1-pi, design$strata,FUN=sum) 381 | samestrata<-(design$strata[ii,1]==design$strata[jj,1]) 382 | return(list(full=ifelse(samePSU, pi[ii,1], pi[ii,1]*pi[jj,1]*(1- ifelse(samestrata, (1-pi[ii,1])*(1-pi[jj,1])/denom, 0))), 383 | first=pi[ii,1], 384 | cond=rep(1,length(ii)))) 385 | } 386 | } else { 387 | ## not ok 388 | stop("you need weights/probabilities for each stage of sampling") 389 | } 390 | } 391 | 392 | ## If we're here, we have multistage weights 393 | if (ncol(design$allprob)!=ncol(design$cluster)){ 394 | ## ? can't happen 395 | stop("number of stages of sampling does not match number of stages of weights") 396 | } 397 | samePSU<-design$cluster[ii,1]==design$cluster[jj,1] 398 | 399 | if(is.null(design$fpc$popsize)){ #with replacement 400 | last<-ncol(design$allprob) 401 | return(list(full=ifelse(samePSU, design$prob[ii]*design$allprob[jj,last],design$prob[ii]*design$prob[jj]), 402 | first=apply(design$allprob[ii,-last, drop=FALSE], 1, prod), 403 | cond=design$allprob[ii,last]*design$allprob[jj,last])) 404 | } 405 | if(all.equal(as.matrix(design$allprob), as.matrix(design$fpc$sampsize/design$fpc$popsize),tolerance=1e-4)){ 406 | ## multistage stratified random sampling 407 | last<-ncol(design$allprob) 408 | n<-design$fpc$sampsize 409 | N<-design$fpc$popsize 410 | samestrata<-(design$strata[ii, ]==design$strata[jj, ]) 411 | pstages <-(n[ii,]/N[ii,])*(samestrata*((n[jj,]-1)%//%(N[jj,]-1)) + (1-samestrata)*(n[jj,]/N[jj,])) ##FIXME divide by zero when N==1 412 | return(list(full=ifelse(samePSU, apply((n[ii,]/N[ii,])[,-last,drop=FALSE],1,prod)*pstages[,last],design$prob[ii]*design$prob[jj]), 413 | first=apply((n[ii,]/N[ii,])[,-last,drop=FALSE],1,prod), 414 | cond=pstages[,last])) 415 | } 416 | 417 | ## Hajek high entropy: Brewer p153 418 | first<-cpwt<-rep_len(1,length(ii)) 419 | for (i in 1:ncol(design$allprob)){ 420 | pi<-design$allprob[,i] 421 | denom<-ave(1-pi, design$strata[,i],FUN=sum) 422 | samestrata<-(design$strata[ii,i]==design$strata[jj,i]) 423 | if (i==ncol(design$allprob)) 424 | cpwt<-cpwt*pi[ii]*pi[jj]*(1- ifelse(samestrata, (1-pi[ii])*(1-pi[jj])/denom, 0)) 425 | else 426 | first<-first*pi[ii] 427 | } 428 | return(list(full=ifelse(samePSU, first*cpwt,design$prob[ii]*design$prob[jj]), first= first, cond=cpwt)) 429 | 430 | } 431 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # svylme 2 | Mixed models for complex surveys 3 | 4 | This package fits linear mixed models to data from complex surveys, by maximising a weighted pairwise likelihood 5 | 6 | ``` 7 | remotes::install_github("tslumley/svylme") 8 | ``` 9 | 10 | ## Advantages 11 | 12 | It works (gives consistent estimates of the regression coefficients and variance components) for **any** linear mixed model and **any** design, without any restrictions on the sampling units 13 | and model clusters being related. For example, you could sample on home address but fit a model clustering on school. 14 | 15 | The implementation allows for correlated random effects such as you get in quantiative genetics 16 | 17 | ## Disadvantages 18 | 19 | Linear models only 20 | 21 | Some loss of efficiency compared to just fitting a design-based linear model (if you don't care about the variance components) 22 | 23 | There isn't (yet) an analog of the BLUPs of random effects, eg for small-area estimation 24 | 25 | If your sampling units and model clusters are the same, and your design isn't too strongly informative, you can likely get more precise estimates of the variance components with 26 | stagewise pseudolikelihood as implemented in Stata or Mplus. 27 | -------------------------------------------------------------------------------- /THANKS: -------------------------------------------------------------------------------- 1 | This code is based on research supported by the Marsden Fund Council 2 | from Government funding, managed by Royal Society Te Apārangi 3 | -------------------------------------------------------------------------------- /data/milk_subset.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tslumley/svylme/426d6edd43c341e238e9a2e00079ea71a7501a2c/data/milk_subset.rda -------------------------------------------------------------------------------- /data/nzmaths.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tslumley/svylme/426d6edd43c341e238e9a2e00079ea71a7501a2c/data/nzmaths.rda -------------------------------------------------------------------------------- /data/pisa.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tslumley/svylme/426d6edd43c341e238e9a2e00079ea71a7501a2c/data/pisa.rda -------------------------------------------------------------------------------- /inst/COPYRIGHTS: -------------------------------------------------------------------------------- 1 | The file R/svy2relmat.R contains code modified from the lme4qtl package. 2 | This is copyright Andrey Ziyatdinov, but is in turn based in part on 3 | the lme4 package, https://github.com/lme4/lme4. All these are GPL-3 4 | -------------------------------------------------------------------------------- /inst/scripts/README: -------------------------------------------------------------------------------- 1 | Examples 2 | 3 | pairwise-milk: milk yields, from pedigreemm. Large gene/environment example with varying group sizes 4 | milk-sampling: subsampling from the milk data 5 | twins-pairwise: bmi of twins, from mets. Large data set of mostly pairs. 6 | twins-sampling: subsampling from the twins data 7 | svy2lmesim2: simulated data where PSU partially overlaps with model cluster 8 | 9 | -------------------------------------------------------------------------------- /inst/scripts/milk-sampling.R: -------------------------------------------------------------------------------- 1 | library(pedigreemm) 2 | 3 | data(milk) 4 | 5 | milk <- within(milk, { 6 | id <- as.character(id) 7 | sdMilk <- milk / sd(milk) 8 | }) 9 | system.time( 10 | m0<-pedigreemm(sdMilk~lact+log(dim)+(1|id)+(1|herd),data=milk, pedigree=list(id=pedCowsR), REML=FALSE) 11 | ) 12 | 13 | A_gen <- getA(pedCowsR) 14 | ind <- rownames(A_gen) %in% milk$id 15 | A_gen <- A_gen[ind, ind] 16 | 17 | 18 | library(lme4qtl) 19 | library(svylme) 20 | library(sampling) 21 | 22 | simMilk<-function(theta,model, n){ 23 | Lambda<- getME(model, "Lambda") 24 | Zt<-getME(model,"Zt") 25 | Lind<-getME(model, "Lind") 26 | Lambda@x<- theta[Lind] 27 | s2<-model@devcomp$cmp["sigmaML"] 28 | m<-nrow(Zt) 29 | u<-matrix(rnorm(m*n,0,1),ncol=n) 30 | U<-crossprod(Zt,Lambda)%*%u*sqrt(s2) 31 | Y<-drop(getME(model,"X")%*%model@beta)+U+matrix(rnorm(nrow(U)*n,0,s=sqrt(s2)),ncol=n) 32 | Y 33 | } 34 | 35 | 36 | set.seed(2023-6-29) 37 | sim_milk<-simMilk(m0@optinfo$val, m0,2) 38 | milk$simMilk<-sim_milk[,1] 39 | 40 | herds<-aggregate(milk$milk,list(milk$herd),sum) 41 | herds$p<-herds[,2]*10/sum(herds[,2]) 42 | 43 | cfsvy<-function(model) c(coef(model), unlist(coef(model,random=TRUE))[c(2,5,1)]) 44 | 45 | cflmer<-function(model){ 46 | a<-VarCorr(model) 47 | c(fixef(model), as.vector(unlist(a[1:2])), attr(a,"sc")^2) 48 | } 49 | 50 | Pi2<-UPtillepi2(herds$p) 51 | dimnames(Pi2)<-list(herds[,1], herds[,1]) 52 | 53 | 54 | 55 | one.sim<-function(){ 56 | sampled_herds<-as.logical(UPtille(herds$p)) 57 | submilk<-subset(milk, herd %in% herds[sampled_herds,1]) 58 | submilk$herd<-as.character(submilk$herd) 59 | 60 | p<-herds$p[sampled_herds] 61 | names(p)<-herds[sampled_herds,1] 62 | submilk$p<-p[submilk$herd] 63 | 64 | 65 | PI2_sub<-Pi2[sampled_herds,sampled_herds][submilk$herd,submilk$herd] 66 | 67 | 68 | 69 | sub_milk_des<-svydesign(id=~herd,data=submilk, prob=~p,pps=ppsmat(PI2_sub)) 70 | 71 | 72 | m1a<-relmatLmer(sdMilk~lact+log(dim)+(1|id)+(1|herd),data=submilk, relmat=list(id=A_gen),REML=FALSE) 73 | m2a<-svy2relmer(sdMilk~lact+log(dim)+(1|id)+(1|herd),design=sub_milk_des, relmat=list(id=A_gen),return.devfun=TRUE) 74 | m3a<-svy2relmer(sdMilk~lact+log(dim)+(1|id)+(1|herd),design=sub_milk_des, relmat=list(id=A_gen),all.pairs=TRUE, subtract.margins=TRUE) 75 | 76 | 77 | 78 | m1b<-relmatLmer(simMilk~lact+log(dim)+(1|id)+(1|herd),data=submilk, relmat=list(id=A_gen),REML=FALSE) 79 | m2b<-svy2relmer(simMilk~lact+log(dim)+(1|id)+(1|herd),design=sub_milk_des, relmat=list(id=A_gen),return.devfun=TRUE) 80 | m3b<-svy2relmer(simMilk~lact+log(dim)+(1|id)+(1|herd),design=sub_milk_des, relmat=list(id=A_gen),all.pairs=TRUE, subtract.margins=TRUE) 81 | 82 | rval<-c( cflmer(m1a), 83 | cfsvy(m2a), 84 | cfsvy(m3a), 85 | cflmer(m1b), 86 | cfsvy(m2b), 87 | cfsvy(m3b) 88 | ) 89 | } 90 | 91 | results<-replicate(1000, tryCatch(one.sim(), error=function(e) rep(NA,36))) 92 | save(results,file="~/milk-sampling.rda") 93 | 94 | 95 | milk_des<-svydesign(id=~1 ,data=milk) 96 | 97 | true1a<-m0 98 | true1b<-svy2relmer(sdMilk~lact+log(dim)+(1|id)+(1|herd),design=milk_des, relmat=list(id=A_gen)) 99 | trub1c<-svy2relmer(sdMilk~lact+log(dim)+(1|id)+(1|herd),design=milk_des, relmat=list(id=A_gen),all.pairs=TRUE, subtract.margins=TRUE) 100 | 101 | 102 | true2a<-pedigreemm(simMilk~lact+log(dim)+(1|id)+(1|herd),data=milk, pedigree=list(id=pedCowsR), REML=FALSE) 103 | true2b<-svy2relmer(simMilk~lact+log(dim)+(1|id)+(1|herd),design=milk_des, relmat=list(id=A_gen)) 104 | trub2c<-svy2relmer(simMilk~lact+log(dim)+(1|id)+(1|herd),design=milk_des, relmat=list(id=A_gen),all.pairs=TRUE, subtract.margins=TRUE) 105 | 106 | -------------------------------------------------------------------------------- /inst/scripts/pairwise-milk.R: -------------------------------------------------------------------------------- 1 | library(pedigreemm) 2 | 3 | data(milk) 4 | 5 | milk <- within(milk, { 6 | id <- as.character(id) 7 | sdMilk <- milk / sd(milk) 8 | }) 9 | system.time( 10 | m0<-pedigreemm(sdMilk~lact+log(dim)+(1|id)+(1|herd),data=milk, pedigree=list(id=pedCowsR), REML=FALSE) 11 | ) 12 | 13 | A_gen <- getA(pedCowsR) 14 | ind <- rownames(A_gen) %in% milk$id 15 | A_gen <- A_gen[ind, ind] 16 | 17 | 18 | library(lme4qtl) 19 | system.time( 20 | m1<-relmatLmer(sdMilk~lact+log(dim)+(1|id)+(1|herd),data=milk, relmat=list(id=A_gen)) 21 | ) 22 | 23 | 24 | 25 | library(svylme) 26 | milk_des<-svydesign(id=~1,data=milk) 27 | system.time( 28 | m2<-svy2relmer(sdMilk~lact+log(dim)+(1|id)+(1|herd),design=milk_des, relmat=list(id=A_gen)) 29 | ) 30 | 31 | system.time( 32 | m3<-svy2relmer(sdMilk~lact+log(dim)+(1|id)+(1|herd),design=milk_des, relmat=list(id=A_gen),all.pairs=TRUE, subtract.margins=TRUE) 33 | ) 34 | 35 | 36 | simMilk<-function(theta,model, n){ 37 | Lambda<- getME(model, "Lambda") 38 | Zt<-getME(model,"Zt") 39 | Lind<-getME(model, "Lind") 40 | Lambda@x<- theta[Lind] 41 | s2<-model@devcomp$cmp["sigmaML"] 42 | m<-nrow(Zt) 43 | u<-matrix(rnorm(m*n,0,1),ncol=n) 44 | U<-crossprod(Zt,Lambda)%*%u*sqrt(s2) 45 | Y<-drop(getME(model,"X")%*%model@beta)+U+matrix(rnorm(nrow(U)*n,0,s=sqrt(s2)),ncol=n) 46 | Y 47 | } 48 | 49 | 50 | set.seed(2023-6-7) 51 | sim_milk<-simMilk(m0@optinfo$val, m0,2) 52 | milk$simMilk<-sim_milk[,1] 53 | sim_milk_des<-svydesign(id=~1,data=milk) 54 | 55 | m1a<-relmatLmer(simMilk~lact+log(dim)+(1|id)+(1|herd),data=milk, relmat=list(id=A_gen),REML=FALSE) 56 | m2a<-svy2relmer(simMilk~lact+log(dim)+(1|id)+(1|herd),design=sim_milk_des, relmat=list(id=A_gen),return.devfun=TRUE) 57 | m3a<-svy2relmer(simMilk~lact+log(dim)+(1|id)+(1|herd),design=sim_milk_des, relmat=list(id=A_gen),all.pairs=TRUE, subtract.margins=TRUE) 58 | 59 | 60 | 61 | 62 | m0 63 | m1 64 | m2 65 | m3 66 | 67 | m1a 68 | m2a 69 | m3a -------------------------------------------------------------------------------- /inst/scripts/pisa-analysis.R: -------------------------------------------------------------------------------- 1 | 2 | data(nzmaths) 3 | 4 | nzmaths$cSTRATUM<- nzmaths$STRATUM 5 | nzmaths$cSTRATUM[nzmaths$cSTRATUM=="NZL0102"]<-"NZL0202" 6 | 7 | 8 | des<-svydesign(id=~SCHOOLID+STIDSTD, strata=~cSTRATUM, nest=TRUE, 9 | weights=~W_FSCHWT+condwt, data=nzmaths) 10 | 11 | des<-update(des, centPCGIRLS=PCGIRLS-0.5) 12 | jkdes<-as.svrepdesign(des) 13 | 14 | m1<-svy2lme(PV1MATH~ (1+ ST04Q01 |SCHOOLID)+ST04Q01*(centPCGIRLS+SMRATIO)+MATHEFF+OPENPS, design=des, return.devfun=TRUE) 15 | m2<-svy2lme(PV1MATH~ (1+ ST04Q01 |SCHOOLID)+ST04Q01*(centPCGIRLS+SMRATIO)+MATHEFF+OPENPS, design=des, return.devfun=TRUE,all.pairs=TRUE, subtract.margins=TRUE) 16 | 17 | m1var<-boot2lme(m1,jkdes,verbose=TRUE) 18 | m2var<-boot2lme(m2,jkdes,verbose=TRUE) 19 | -------------------------------------------------------------------------------- /inst/scripts/svy2lmesim2.R: -------------------------------------------------------------------------------- 1 | library(lme4) 2 | library(svylme) 3 | 4 | 5 | library(parallel) 6 | RNGkind("L'Ecuyer-CMRG") 7 | mcreplicate<-function(n, expr,...){ 8 | l<-mclapply(integer(n), eval.parent(substitute(function(...) expr)), mc.cores=6,mc.set.seed = TRUE, mc.preschedule=TRUE) 9 | simplify2array(l, higher = TRUE) 10 | } 11 | 12 | 13 | set.seed(2023-6-20) 14 | 15 | N1=400 16 | N2=400 17 | latitude<-1:N2 18 | longitude<-1:N1 19 | population<-expand.grid(lat=latitude,long=longitude) 20 | population$PSU<-population$long 21 | overlap=ceiling(N2*1/2) 22 | 23 | 24 | cflmer<-function(model){ 25 | a<-VarCorr(model) 26 | c(fixef(model), as.vector(unlist(a[1:2])), attr(a,"sc")^2, SE(model), c(0,0)) 27 | } 28 | cfsvy<-function(model){ 29 | a<-coef(model, random=TRUE) 30 | c(coef(model), diag(a$varb),a$s2,SE(model),c(0,0)) 31 | } 32 | 33 | cfglm<-function(model){c(coef(model),c(0,0), SE(model), c(0,0))} 34 | 35 | model_cluster<-function(population, overlap){ 36 | population$cluster<-numeric(nrow(population)) 37 | 38 | id<-ifelse(population$lat<=overlap, 39 | population$long, 40 | ((population$long+population$lat-overlap) %% N1)+1 41 | ) 42 | population$cluster<-id 43 | population 44 | } 45 | 46 | 47 | f<-function(overlap,REPS=1000){ 48 | 49 | population<-model_cluster(population,overlap) 50 | population$x<- population$long %% 40 51 | population$z<-rnorm(400*400) 52 | population$u<-sort(rnorm(400))[population$cluster] 53 | population$y<- with(population, x+z + u+rnorm(400*400)) 54 | 55 | population$strata<-(population$long-1) %/% 40 56 | population$uid<-1:nrow(population) 57 | 58 | true<-cflmer(lmer(y~x+z+(1|cluster), population)) 59 | 60 | rr<-mcreplicate(REPS, { 61 | 62 | stratsize<- c(20,5,4,3,2,2,3,4,5,20) 63 | names(stratsize)<-unique(population$strata) 64 | sstrat<-stratsample(population$strata[!duplicated(population$PSU)], stratsize) 65 | 66 | stage1psu<- population$PSU[!duplicated(population$PSU)][sstrat] 67 | stage1<- subset(population, PSU %in% stage1psu) 68 | 69 | 70 | stratsize2<-rep(c(20,8,20),c(1,66,1)) 71 | names(stratsize2)<-unique(stage1$PSU) 72 | stage2<-stage1[stratsample(stage1$PSU, stratsize2),] 73 | 74 | 75 | stage2$fpc1<-400/10 76 | stage2$fpc2<-400 77 | des<-svydesign(id=~PSU+uid, fpc=~fpc1+fpc2, strata=~strata,data=stage2) 78 | pair<-svy2lme(y~x+z+(1|cluster), design=des,return.devfun=TRUE) 79 | jkdes<-as.svrepdesign(des) 80 | jkvar<-boot2lme(pair,jkdes) 81 | 82 | c( 83 | cfsvy(pair), 84 | ##cflmer(lmer(y~x+z+(1|cluster), population)), 85 | cflmer(lmer(y~x+z+(1|cluster), stage2)), 86 | cfglm(svyglm(y~x+z+(1|cluster), design=des)), 87 | rep(0,5),SE(jkvar,"beta"), SE(jkvar,"fullVar"), sqrt(vcov(jkvar,"s2")) 88 | ) 89 | }) 90 | 91 | list( 92 | overlap=overlap/N2, 93 | true=true, 94 | median=matrix(apply(rr, 1, median),byrow=TRUE,nrow=4), 95 | mad=matrix(apply(rr, 1, mad),byrow=TRUE,nrow=4) 96 | ) 97 | } 98 | 99 | 100 | ##results<- lapply(c(0.1,0.25,0.5,0.75,0.9,1)*N2, f) 101 | 102 | 103 | results_0.25<-replicate(100, f(N2*1/4)) 104 | results_0.75<-replicate(100, f(N2*3/4)) 105 | save(results_0.25,results_0.75, file="~/svy2lmesim-crossed1.rda") 106 | 107 | 108 | ## summaries 109 | ## > round(rowMeans(sapply(results_0.75["median",],function(x) x[1,])),3) 110 | ## [1] -0.121 1.006 1.000 0.993 0.965 0.247 0.010 0.078 111 | ## > round(rowMeans(sapply(results_0.75["mad",],function(x) x[1,])),3) 112 | ## [1] 0.278 0.013 0.092 0.189 0.126 0.072 0.003 0.021 113 | -------------------------------------------------------------------------------- /inst/scripts/twins-pairwise.R: -------------------------------------------------------------------------------- 1 | data("twinbmi",package="mets") 2 | library(svylme) 3 | library(Matrix) 4 | I_twin<-with(twinbmi, Matrix(outer(1:nrow(twinbmi),1:nrow(twinbmi),function(i,j) (id[i]==id[j]) & (i!=j)))) 5 | I_mz<-with(twinbmi, Matrix(outer(1:nrow(twinbmi),1:nrow(twinbmi),function(i,j) (id[i]==id[j]) & (zyg[i]=="MZ") & (i!=j)))) 6 | 7 | n<-nrow(I_twin) 8 | Phi_env<-I_twin+Diagonal(n) 9 | Phi_add<-I_twin/2+I_mz/2+Diagonal(n) 10 | Phi_dom<-I_twin/4+I_mz*3/4+Diagonal(n) 11 | 12 | dimnames(Phi_env)<-list(twinbmi$id,twinbmi$id) 13 | dimnames(Phi_add)<-list(twinbmi$id,twinbmi$id) 14 | dimnames(Phi_dom)<-list(twinbmi$id,twinbmi$id) 15 | 16 | 17 | twinbmi$id2<-twinbmi$id 18 | twinbmi$id3<-twinbmi$id 19 | 20 | des<-svydesign(id=~id,data=twinbmi) 21 | 22 | ## environment 23 | svy2lme(bmi ~ age+gender+(1|id), design=des) 24 | svy2relmer(bmi ~ age+gender+(1|id), design=des,relmat=list(id=Phi_env)) 25 | lme4::lmer(bmi ~ age+gender+(1|id),data=twinbmi) 26 | lme4qtl::relmatLmer(bmi ~ age+gender+(1|id),data=twinbmi,relmat=list(id=Phi_env)) 27 | svy2lme(bmi ~ age+gender+(1|id), design=des,all.pairs=TRUE,subtract.margins=TRUE) 28 | 29 | ## environment plus additive genetic 30 | svy2relmer(bmi ~ age+gender+(1|id)+(1|id2), design=des,relmat=list(id=Phi_env,id2=Phi_add)) 31 | lme4qtl::relmatLmer(bmi ~ age+gender+(1|id)+(1|id2), data=twinbmi,relmat=list(id=Phi_env,id2=Phi_add)) 32 | 33 | 34 | ## environment plus additive and dominant genetic 35 | svy2relmer(bmi ~ age+gender+(1|id)+(1|id2)+(1|id3), design=des,relmat=list(id=Phi_env,id2=Phi_add,id3=Phi_dom)) 36 | lme4qtl::relmatLmer(bmi ~ age+gender+(1|id)+(1|id2)+(1|id3), data=twinbmi,relmat=list(id=Phi_env,id2=Phi_add,id3=Phi_dom)) 37 | -------------------------------------------------------------------------------- /inst/scripts/twins-sampling.R: -------------------------------------------------------------------------------- 1 | data("twinbmi",package="mets") 2 | library(svylme) 3 | library(lme4) 4 | library(Matrix) 5 | I_twin<-with(twinbmi, Matrix(outer(1:nrow(twinbmi),1:nrow(twinbmi),function(i,j) (id[i]==id[j]) & (i!=j)))) 6 | I_mz<-with(twinbmi, Matrix(outer(1:nrow(twinbmi),1:nrow(twinbmi),function(i,j) (id[i]==id[j]) & (zyg[i]=="MZ") & (i!=j)))) 7 | 8 | n<-nrow(I_twin) 9 | Phi_env<-I_twin+Diagonal(n) 10 | Phi_add<-I_twin/2+I_mz/2+Diagonal(n) 11 | Phi_dom<-I_twin/4+I_mz*3/4+Diagonal(n) 12 | 13 | dimnames(Phi_env)<-list(twinbmi$id,twinbmi$id) 14 | dimnames(Phi_add)<-list(twinbmi$id,twinbmi$id) 15 | dimnames(Phi_dom)<-list(twinbmi$id,twinbmi$id) 16 | 17 | 18 | twinbmi$id2<-twinbmi$id 19 | twinbmi$id3<-twinbmi$id 20 | 21 | ## sampling 22 | 23 | ## whole twins 24 | 25 | twinbmi$dbmi<-with(twinbmi, ave(bmi,id, FUN=function(v) if (length(v)>1) abs(diff(v)) else 0)) 26 | 27 | dup<-duplicated(twinbmi$id) 28 | uid<-twinbmi$id[!dup] 29 | udbmi<-twinbmi$dbmi[!dup] 30 | twinbmi$strata<-cut(twinbmi$dbmi, quantile(udbmi,(1:5)/5), include.lowest=TRUE) 31 | nsample<-c(50,50,150,400) 32 | names(nsample)<-levels(twinbmi$strata) 33 | 34 | results<-replicate(1000,{tryCatch({ 35 | 36 | insample<- twinbmi$id %in% uid[stratsample(twinbmi$strata[!dup], nsample)] 37 | twinbmi$fpc<-1383 38 | des<-svydesign(id=~id,data=twinbmi[insample,], strata=~strata, fpc=~fpc) 39 | 40 | 41 | 42 | ## environment 43 | a<-svy2lme(bmi ~ age+gender+(1|id), design=des) 44 | b<-lme4::lmer(bmi ~ age+gender+(1|id),data=twinbmi[insample,]) 45 | ##svy2lme(bmi ~ age+gender+(1|id), design=des,all.pairs=TRUE,subtract.margins=TRUE) 46 | 47 | ## environment plus additive genetic 48 | d<-svy2relmer(bmi ~ age+gender+(1|id)+(1|id2), design=des,relmat=list(id=Phi_env,id2=Phi_add)) 49 | e<-lme4qtl::relmatLmer(bmi ~ age+gender+(1|id)+(1|id2), data=twinbmi[insample,],relmat=list(id=Phi_env,id2=Phi_add)) 50 | 51 | r1<-list(a=c(coef(a),sqrt(unlist(coef(a, random=TRUE))[2:1])), 52 | b=c(fixef(b),c(sqrt(unlist(VarCorr(b)[1])),attr(VarCorr(b),"sc"))), 53 | d=c(coef(d),sqrt(unlist(coef(d, random=TRUE))[c(2,5,1)])), 54 | e=c(fixef(e),c(sqrt(unlist(VarCorr(e)[1:2])),attr(VarCorr(e),"sc"))) 55 | ) 56 | 57 | ## individuals 58 | 59 | twinbmi$keep<-rbinom(nrow(twinbmi), 1, .5) 60 | twinsubsample <- subset(twinbmi[insample,], keep==1) 61 | twinsubsample$fpc2<-2 62 | iid<-1:nrow(twinsubsample) 63 | des2<-svydesign(id=~id+iid,data=twinsubsample, strata=~strata, fpc=~fpc+fpc2) 64 | 65 | ## environment 66 | A<-svy2lme(bmi ~ age+gender+(1|id), design=des2) 67 | ##lme4::lmer(bmi ~ age+gender+(1|id),data=twinsubsample) 68 | B<-svy2lme(bmi ~ age+gender+(1|id), design=des2,all.pairs=TRUE,subtract.margins=TRUE) 69 | 70 | ## environment plus additive genetic 71 | D<-svy2relmer(bmi ~ age+gender+(1|id)+(1|id2), design=des2,relmat=list(id=Phi_env,id2=Phi_add)) 72 | ##lme4qtl::relmatLmer(bmi ~ age+gender+(1|id)+(1|id2), data=twinsubsample,relmat=list(id=Phi_env,id2=Phi_add)) 73 | E<-svy2relmer(bmi ~ age+gender+(1|id)+(1|id2), design=des2,relmat=list(id=Phi_env,id2=Phi_add),all.pairs=TRUE,subtract.margins=TRUE) 74 | 75 | 76 | r2<-list(A=c(coef(A),sqrt(unlist(coef(A, random=TRUE))[2:1])), 77 | B=c(coef(B),sqrt(unlist(coef(B, random=TRUE))[2:1])), 78 | D=c(coef(D),sqrt(unlist(coef(D, random=TRUE))[c(2,5,1)])), 79 | E=c(coef(E),sqrt(unlist(coef(E, random=TRUE))[c(2,5,1)])) 80 | ) 81 | 82 | c(r1,r2) 83 | }, error=function(e) NULL) 84 | }) 85 | 86 | save(results, file="twin-sampling.rda") 87 | -------------------------------------------------------------------------------- /man/boot2lme.Rd: -------------------------------------------------------------------------------- 1 | \name{boot2lme} 2 | \alias{boot2lme} 3 | \alias{vcov.boot2lme} 4 | %- Also NEED an '\alias' for EACH other topic documented here. 5 | \title{ 6 | Resampling variances for svy2lme 7 | } 8 | \description{ 9 | Computes variance estimates for the weighted-pairwise-likelihood 10 | linear mixed models fitted by \code{\link{svy2lme}} using replicate 11 | weights. The replicate weights for a pair are obtained by dividing by 12 | the sampling weight and then multiplying by the replicate 13 | weight. There will be a warning if the ratio of replicate weight to 14 | sampling weight differs for observations in the same pair. 15 | } 16 | \usage{ 17 | boot2lme(model, rdesign, verbose = FALSE) 18 | \method{vcov}{boot2lme}(object, 19 | parameter=c("beta", "theta","s2", "relSD" ,"SD","relVar","fullVar"), 20 | ...) 21 | } 22 | %- maybe also 'usage' for other objects documented here. 23 | \arguments{ 24 | \item{model}{ 25 | A model returned by \code{svy2lme} with the \code{devfun=TRUE} option 26 | } 27 | \item{rdesign}{ 28 | replicate-weights design corresponding to the design used to fit the model, see example 29 | } 30 | 31 | \item{verbose}{ 32 | print progess information? 33 | } 34 | \item{object}{returned by \code{boot2lme}} 35 | \item{\dots}{for method compatibility} 36 | \item{parameter}{Variance matrix for: regression parameters, relative variance 37 | parameters on Cholesky square root scale, residual variance, relative 38 | standard errors of random effects, standard errors of random effects, 39 | entire relative variance matrix, entire variance matrix} 40 | } 41 | \details{ 42 | The variance is estimated from the replicates \code{thetastar} and original point estimate \code{theta} as \code{scale*sum(rscales* (thetastar-theta)^2)}. 43 | } 44 | \value{ 45 | For \code{boot2lme}, an object of class \code{boot2lme} with components 46 | \item{theta}{replicates of variance parameters (on Cholesky square 47 | root scale)} 48 | \item{beta}{replicates of regression parameters} 49 | \item{D}{replicates of relative variance matrix} 50 | \item{scale,rscales}{from the input} 51 | \item{formula}{model formula from the input} 52 | 53 | For the \code{vcov} method, a variance matrix. 54 | } 55 | 56 | 57 | 58 | \examples{ 59 | 60 | data(api, package="survey") 61 | 62 | # two-stage cluster sample 63 | dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2) 64 | 65 | m0<-svy2lme(api00~(1|dnum)+ell+mobility, design=dclus2,return.devfun=TRUE) 66 | jkdes<-as.svrepdesign(dclus2, type="mrb") 67 | jkvar<-boot2lme(m0,jkdes) 68 | 69 | SE(jkvar, "beta") 70 | SE(jkvar, "SD") 71 | SE(jkvar,"s2") 72 | 73 | 74 | m1<-svy2lme(api00~(1|dnum)+ell+mobility, 75 | design=dclus2,return.devfun=TRUE, all.pairs=TRUE, subtract.margins=TRUE) 76 | jk1var<-boot2lme(m1,jkdes) 77 | 78 | SE(jk1var, "beta") 79 | SE(jk1var, "SD") 80 | 81 | 82 | \donttest{ 83 | ##takes a few minutes 84 | data(pisa) 85 | 86 | pisa$w_condstuwt<-with(pisa, w_fstuwt/wnrschbw) 87 | pisa$id_student<-1:nrow(pisa) 88 | 89 | dpisa<-survey::svydesign(id=~id_school+id_student, weight=~wnrschbw+w_condstuwt, data=pisa) 90 | 91 | m<-svy2lme(isei~(1+female|id_school)+female+high_school+college+one_for+both_for+test_lang, 92 | design=dpisa, return.devfun=TRUE,method="nested") 93 | 94 | bpisa<-as.svrepdesign(dpisa, type="bootstrap", replicates=100) 95 | 96 | b<-boot2lme(m, bpisa, verbose=TRUE) 97 | str(b) 98 | 99 | vcov(b,"beta") 100 | vcov(b,"s2") 101 | 102 | ## SE() inherits the parameter= argument 103 | SE(b,"beta") 104 | SE(b,"theta") 105 | SE(b,"SD") 106 | 107 | } 108 | } 109 | % Add one or more standard keywords, see file 'KEYWORDS' in the 110 | % R documentation directory. 111 | \keyword{regression}% use one of RShowDoc("KEYWORDS") 112 | \keyword{survey}% __ONLY ONE__ keyword per line 113 | -------------------------------------------------------------------------------- /man/milk_subset.Rd: -------------------------------------------------------------------------------- 1 | \name{milk_subset} 2 | \alias{milk_subset} 3 | \alias{A_gen} 4 | \docType{data} 5 | \title{ 6 | Milk production (subset) 7 | } 8 | \description{ 9 | A subset of a dataset from the \code{pedigreemm} package, created as an 10 | example for the \code{lme4qtl} package. The original data had records 11 | of the milk production of 3397 lactations from first through fifty 12 | parity Holsteins. These were 1,359 cows, daughters of 38 sires in 57 13 | herds. The data was downloaded from the USDA internet site. All 14 | lactation records represent cows with at least 100 days in milk, with an 15 | average of 347 days. Milk yield ranged from 4,065 to 19,345 kg estimated 16 | for 305 days, averaging 11,636 kg. There were 1,314, 1,006, 640, 334 and 17 | 103 records were from first thorough fifth lactation animals. The 18 | subset is of cows from 3 sires. 19 | 20 | 21 | } 22 | \usage{data("milk_subset")} 23 | \format{ 24 | A data frame with 316 observations on the following 13 variables. 25 | \describe{ 26 | \item{\code{id}}{numeric identifier of cow} 27 | \item{\code{lact}}{number of lactation for which production is measured} 28 | \item{\code{herd}}{a factor indicating the herd} 29 | \item{\code{sire}}{a factor indicating the sire} 30 | \item{\code{dim}}{number of days in milk for that lactation} 31 | \item{\code{milk}}{milk production estimated at 305 days} 32 | \item{\code{fat}}{fat production estimated at 305 days} 33 | \item{\code{prot}}{protein production estimated at 305 days} 34 | \item{\code{scs}}{the somatic cell score} 35 | \item{\code{sdMilk}}{\code{milk} scaled by cow-specific 36 | standard deviation} 37 | \item{\code{herd_id}}{a character vector indicating the herd} 38 | \item{\code{one}}{a vector of 1s for convenience in weighting} 39 | \item{\code{one2}}{another vector of 1s for convenience in weighting} 40 | } 41 | } 42 | \details{ 43 | This data example gives noticeably different results for full likelihood 44 | and pairwise likelihood because the model is misspecified. The best 45 | fitting linear model for the large herd 89 is different, and that herd 46 | gets relatively more weight in the pairwise analysis (because it has 47 | more pairs). 48 | 49 | } 50 | \source{ 51 | Constructed at \url{https://github.com/variani/lme4qtl/blob/master/vignettes/pedigreemm.Rmd} 52 | } 53 | \references{ 54 | 2010. A.I. Vazquez, D.M. Bates, G.J.M. Rosa, D. Gianola and K.A. Weigel. 55 | Technical Note: An R package for fitting generalized linear mixed models 56 | in animal breeding. Journal of Animal Science, 88:497-504. 57 | } 58 | \examples{ 59 | data(milk_subset) 60 | herd_des<- svydesign(id = ~herd + id, prob = ~one + one2, data = milk_subset) 61 | lm(sdMilk ~ lact + log(dim),data=milk_subset,subset=herd==89) 62 | lm(sdMilk ~ lact + log(dim),data=milk_subset,subset=herd!=89) 63 | svy2lme(sdMilk ~ lact + log(dim) + (1|herd), design=herd_des,method="nested") 64 | svy2lme(sdMilk ~ lact + log(dim) + (1|herd), design=herd_des,method="general") 65 | 66 | ## pairwise result is closer to herd 89 than to remainder 67 | lme4::lmer(sdMilk ~ lact + log(dim) + (1|herd), data=milk_subset) 68 | svy2relmer(sdMilk ~ lact + log(dim) + (1|id) + (1|herd), design=herd_des, 69 | relmat = list(id = A_gen)) 70 | 71 | ## compare to all pairs 72 | svy2lme(sdMilk ~ lact + log(dim) + (1|herd), 73 | design=herd_des,method="general", all.pairs=TRUE) 74 | svy2lme(sdMilk ~ lact + log(dim) + (1|herd), 75 | design=herd_des,method="general", all.pairs=TRUE, subtract.margins=TRUE) 76 | 77 | } 78 | \keyword{datasets} 79 | -------------------------------------------------------------------------------- /man/nzmaths.Rd: -------------------------------------------------------------------------------- 1 | \name{nzmaths} 2 | \alias{nzmaths} 3 | \docType{data} 4 | \title{ 5 | Maths Performance Data from the PISA 2012 survey in New Zealand 6 | } 7 | \description{ 8 | Data on maths performance, gender, some problem-solving variables and some school resource variables. 9 | } 10 | \usage{data("nzmaths")} 11 | \format{ 12 | A data frame with 4291 observations on the following 26 variables. 13 | \describe{ 14 | \item{\code{SCHOOLID}}{School ID} 15 | \item{\code{CNT}}{Country id: a factor with levels \code{New Zealand}} 16 | \item{\code{STRATUM}}{a factor with levels \code{NZL0101} \code{NZL0102} \code{NZL0202} \code{NZL0203}} 17 | \item{\code{OECD}}{Is the country in the OECD?} 18 | \item{\code{STIDSTD}}{Student ID} 19 | \item{\code{ST04Q01}}{Gender: a factor with levels \code{Female} \code{Male}} 20 | \item{\code{ST14Q02}}{Mother has university qualifications \code{No} \code{Yes}} 21 | \item{\code{ST18Q02}}{Father has university qualifications \code{No} \code{Yes}} 22 | \item{\code{MATHEFF}}{Mathematics Self-Efficacy: numeric vector} 23 | \item{\code{OPENPS}}{Mathematics Self-Efficacy: numeric vector} 24 | \item{\code{PV1MATH},\code{PV2MATH},\code{PV3MATH},\code{PV4MATH},\code{PV5MATH} }{'Plausible values' (multiple imputations) for maths performance} 25 | \item{\code{W_FSTUWT}}{Design weight for student} 26 | \item{\code{SC35Q02}}{Proportion of maths teachers with professional development in maths in past year} 27 | \item{\code{PCGIRLS}}{Proportion of girls at the school} 28 | \item{\code{PROPMA5A}}{Proportion of maths teachers with ISCED 5A (math major)} 29 | \item{\code{ABGMATH}}{Does the school group maths students: a factor with levels \code{No ability grouping between any classes} \code{One of these forms of ability grouping between classes for s} \code{One of these forms of ability grouping for all classes}} 30 | \item{\code{SMRATIO}}{Number of students per maths teacher} 31 | \item{\code{W_FSCHWT}}{Design weight for school} 32 | \item{\code{condwt}}{Design weight for student given school} 33 | } 34 | } 35 | 36 | \source{ 37 | A subset extracted from the \code{PISA2012lite} R package, \url{https://github.com/pbiecek/PISA2012lite} 38 | } 39 | \references{ 40 | OECD (2013) PISA 2012 Assessment and Analytical Framework: Mathematics, Reading, Science, Problem Solving and Financial Literacy. OECD Publishing. 41 | } 42 | \examples{ 43 | data(nzmaths) 44 | 45 | oo<-options(survey.lonely.psu="average") ## only one PSU in one of the strata 46 | 47 | des<-svydesign(id=~SCHOOLID+STIDSTD, strata=~STRATUM, nest=TRUE, 48 | weights=~W_FSCHWT+condwt, data=nzmaths) 49 | 50 | ## This example works, but it takes more than five seconds to run, so it 51 | ## has been commented out 52 | ## m1<-svy2lme(PV1MATH~ (1+ ST04Q01 |SCHOOLID)+ST04Q01*(PCGIRLS+SMRATIO)+MATHEFF+OPENPS, design=des) 53 | 54 | options(oo) 55 | 56 | } 57 | \keyword{datasets} 58 | -------------------------------------------------------------------------------- /man/pisa.Rd: -------------------------------------------------------------------------------- 1 | \name{pisa} 2 | \alias{pisa} 3 | \docType{data} 4 | \title{ 5 | Data from the PISA international school survey 6 | } 7 | \description{ 8 | Data from the PISA survey of schools, obtained from Stata, who obtained it from Rabe-Hesketh & Skrondal. 9 | } 10 | \usage{data("pisa")} 11 | \format{ 12 | A data frame with 2069 observations on the following 11 variables. 13 | \describe{ 14 | \item{\code{female}}{1 for female} 15 | \item{\code{isei}}{socioeconomic index} 16 | \item{\code{w_fstuwt}}{student sampling weight (total)} 17 | \item{\code{wnrschbw}}{school sampling weight} 18 | \item{\code{high_school}}{1 if highest level of parents' education is high school} 19 | \item{\code{college}}{1 if highest level of parents' education is college/uni} 20 | \item{\code{one_for}}{1 if one parent is foreign-born} 21 | \item{\code{both_for}}{1 if both parents are foreign-born} 22 | \item{\code{test_lang}}{1 if the test language is spoken at home} 23 | \item{\code{pass_read}}{1 if the student passed a reading proficiency test} 24 | \item{\code{id_school}}{school (sampling unit) identifier} 25 | } 26 | } 27 | 28 | \source{ 29 | Data downloaded from 30 | \url{https://www.stata-press.com/data/r15/pisa2000.dta} 31 | } 32 | \references{ 33 | Rabe-Hesketh, S., and A. Skrondal. 2006. Multilevel modelling of 34 | complex survey data.Journal of the Royal Statistical Society, Series A. 169: 805-827 35 | } 36 | \examples{ 37 | data(pisa) 38 | 39 | ## This model doesn't make a lot of sense, but it's the one in the 40 | ## Stata documentation because the outcome variable is numeric. 41 | 42 | pisa$w_condstuwt<-with(pisa, w_fstuwt/wnrschbw) 43 | pisa$id_student<-1:nrow(pisa) 44 | 45 | dpisa<-survey::svydesign(id=~id_school+id_student, weight=~wnrschbw+w_condstuwt, data=pisa) 46 | 47 | 48 | svy2lme(isei~(1|id_school)+female+high_school+college+one_for+both_for+test_lang, 49 | design=dpisa) 50 | 51 | 52 | 53 | 54 | } 55 | \keyword{datasets} 56 | -------------------------------------------------------------------------------- /man/svy2lme.Rd: -------------------------------------------------------------------------------- 1 | \name{svy2lme} 2 | \alias{svy2lme} 3 | \encoding{utf8} 4 | \alias{coef.svy2lme} 5 | %- Also NEED an '\alias' for EACH other topic documented here. 6 | \title{ 7 | Linear mixed models by pairwise likelihood 8 | } 9 | \description{ 10 | Fits linear mixed models to survey data by maximimising the profile pairwise composite 11 | likelihood. 12 | } 13 | \usage{ 14 | svy2lme(formula, design, sterr=TRUE, return.devfun=FALSE, 15 | method=c("general","nested"), all.pairs=FALSE, subtract.margins=FALSE) 16 | \method{coef}{svy2lme}(object,...,random=FALSE) 17 | } 18 | %- maybe also 'usage' for other objects documented here. 19 | \arguments{ 20 | \item{formula}{ 21 | Model formula as in the \code{lme4} package} 22 | \item{design}{ 23 | A survey design object produced by \code{survey::svydesign}. The 24 | pairwise weights will be computed from this design, which must have 25 | separate probabilities or weights for each stage of sampling. 26 | } 27 | \item{sterr}{ 28 | Estimate standard errors for fixed effects? Set to \code{FALSE} for 29 | greater speed when using resampling to get standard errors. Also, 30 | a PPS-without-replacement survey design can't get sandwich standard errors 31 | (because fourth-order sampling probabilities would be needed) 32 | } 33 | 34 | \item{return.devfun}{If \code{TRUE}, return the deviance function as a 35 | component of the object. This will increase the memory use 36 | substantially, but allows for bootstrapping.} 37 | \item{method}{\code{"nested"} requires the model clusters to have a 38 | single grouping variable that is the same as the primary sampling 39 | unit. It's faster.} 40 | \item{all.pairs}{Only with \code{method="general"}, use all pairs 41 | rather than just correlated pairs?} 42 | \item{subtract.margins}{If \code{TRUE} and \code{all.pairs=TRUE}, 43 | compute with all pairs by the faster algorithm involving subtraction 44 | from the marginal likelihood} 45 | \item{object}{\code{svy2lme} object} 46 | \item{\dots}{for method compatibility} 47 | \item{random}{if \code{TRUE}, the variance components rather than 48 | the fixed effects} 49 | } 50 | \details{ 51 | The population pairwise likelihood would be the sum of the 52 | loglikelihoods for a pair of observations, taken over all pairs of 53 | observations from the same cluster. This is estimated by taking a 54 | weighted sum over pairs in the sample, with the weights being the 55 | reciprocals of pairwise sampling probabilities. The advantage over 56 | standard weighted pseudolikelihoods is that there is no 57 | large-cluster assumption needed and no rescaling of weights. The 58 | disadvantage is some loss of efficiency and simpler point 59 | estimation. 60 | 61 | With \code{method="nested"} we have the method of Yi et al 62 | (2016). Using \code{method="general"} relaxes the conditions on the 63 | design and model. 64 | 65 | The code uses \code{lme4::lmer} to parse the formula and produce 66 | starting values, profiles out the fixed effects and residual 67 | variance, and then uses \code{minqa::bobyqa} to maximise the 68 | resulting profile deviance. 69 | 70 | As with \code{lme4::lmer}, the default is to estimate the 71 | correlations of the random effects, since there is typically no 72 | reason to assume these are zero. You can force two random effects to 73 | be independent by entering them in separate terms, eg 74 | \code{(1|g)+(-1+x|g)} in the model formula asks for a random intercept 75 | and a random slope with no random intercept, which will be uncorrelated. 76 | 77 | The internal parametrisation of the variance components uses the 78 | Cholesky decomposition of the relative variance matrix (the variance 79 | matrix divided by the residual variance), as in 80 | \code{lme4::lmer}. The component \code{object$s2} contains the 81 | estimated residual variance and the component \code{object$opt$par} 82 | contains the elements of the Cholesky factor in column-major order, 83 | omitting any elements that are forced to be zero by requiring 84 | indepedent random effects. 85 | 86 | Standard errors of the fixed effects are currently estimated using a 87 | "with replacement" approximation, valid when the sampling fraction 88 | is small and superpopulation (model, process) inference is 89 | intended. Tthe influence functions are added up within 90 | cluster, centered within strata, the residuals added up within strata, and then 91 | the crossproduct is taken within each stratum. The stratum variance 92 | is scaled by \code{ni/(ni-1)} where \code{ni} is the number of PSUs 93 | in the stratum, and then added up across strata. When the sampling 94 | and model structure are the same, this is the estimator of Yi et al, 95 | but it also allows for there to be sampling stages before the stages 96 | that are modelled, and for the model and sampling structures to be 97 | different. 98 | 99 | The \code{return.devfun=TRUE} option is useful if you want to 100 | examine objects that aren't returned as part of the output. For 101 | example, \code{get("ij", environment(object$devfun))} is the set of 102 | pairs used in computation. 103 | 104 | } 105 | \value{ 106 | \code{svy2lme} returns an object with \code{print}, \code{coef}, and 107 | \code{vcov} methods. 108 | 109 | The \code{coef} method with \code{random=TRUE} returns a two-element 110 | list: the first element is the estimated residual variance, the second 111 | is the matrix of estimated variances and covariances of the random effects. 112 | 113 | } 114 | \references{ 115 | J.N.K. Rao, François Verret and Mike A. Hidiroglou "A weighted composite likelihood approach to inference for two-level models from survey data" Survey Methodology, December 2013 Vol. 39, No. 2, pp. 263-282 116 | 117 | Grace Y. Yi , J. N. K. Rao and Haocheng Li "A WEIGHTED COMPOSITE LIKELIHOOD APPROACH FOR ANALYSIS OF SURVEY DATA UNDER TWO-LEVEL MODELS" Statistica Sinica 118 | Vol. 26, No. 2 (April 2016), pp. 569-587 119 | } 120 | \author{ 121 | Thomas Lumley 122 | } 123 | \examples{ 124 | 125 | data(api, package="survey") 126 | 127 | # one-stage cluster sample 128 | dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc) 129 | # two-stage cluster sample 130 | dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2) 131 | 132 | svy2lme(api00~(1|dnum)+ell+mobility+api99, design=dclus1) 133 | svy2lme(api00~(1|dnum)+ell+mobility+api99, design=dclus2) 134 | svy2lme(api00~(1|dnum)+ell+mobility+api99, design=dclus2,method="nested") 135 | 136 | lme4::lmer(api00~(1|dnum)+ell+mobility+api99, data=apipop) 137 | 138 | ## Simulated 139 | 140 | set.seed(2000-2-29) 141 | 142 | df<-data.frame(x=rnorm(1000*20),g=rep(1:1000,each=20), t=rep(1:20,1000), id=1:20000) 143 | df$u<-with(df, rnorm(1000)[g]) 144 | 145 | df$y<-with(df, x+u+rnorm(1000,s=2)) 146 | 147 | ## oversample extreme `u` to bias random-intercept variance 148 | pg<-exp(abs(df$u/2)-2.2)[df$t==1] 149 | 150 | in1<-rbinom(1000,1,pg)==1 151 | in2<-rep(1:5, length(in1)) 152 | 153 | sdf<-subset(df, (g \%in\% (1:1000)[in1]) & (t \%in\% in2)) 154 | 155 | p1<-rep(pg[in1],each=5) 156 | N2<-rep(20,nrow(sdf)) 157 | 158 | ## Population values 159 | lme4::lmer(y~x+(1|g), data=df) 160 | 161 | ## Naive estimator: higher intercept variance 162 | lme4::lmer(y~x+(1|g), data=sdf) 163 | 164 | ##pairwise estimator 165 | sdf$w1<-1/p1 166 | sdf$w2<-20/5 167 | 168 | design<-survey::svydesign(id=~g+id, data=sdf, weights=~w1+w2) 169 | pair<-svy2lme(y~x+(1|g),design=design,method="nested") 170 | pair 171 | 172 | pair_g<-svy2lme(y~x+(1|g),design=design,method="general") 173 | pair_g 174 | 175 | all.equal(coef(pair), coef(pair_g)) 176 | all.equal(SE(pair), SE(pair_g)) 177 | 178 | 179 | } 180 | % Add one or more standard keywords, see file 'KEYWORDS' in the 181 | % R documentation directory. 182 | \keyword{regression }% use one of RShowDoc("KEYWORDS") 183 | \keyword{survey }% __ONLY ONE__ keyword per line 184 | -------------------------------------------------------------------------------- /man/svy2relmer.Rd: -------------------------------------------------------------------------------- 1 | \name{svy2relmer} 2 | \alias{svy2relmer} 3 | \encoding{utf8} 4 | %- Also NEED an '\alias' for EACH other topic documented here. 5 | \title{ 6 | Linear mixed models with correlated random effects 7 | } 8 | \description{ 9 | Fits linear mixed models by maximising the profile pairwise composite 10 | likelihood. Allows for correlated random effects, eg, for genetic 11 | relatedness (QTL) models 12 | } 13 | \usage{ 14 | svy2relmer(formula, design, sterr=TRUE, return.devfun=FALSE, relmat=NULL, 15 | all.pairs=FALSE, subtract.margins=FALSE ) 16 | } 17 | %- maybe also 'usage' for other objects documented here. 18 | \arguments{ 19 | \item{formula}{ 20 | Model formula as in the \code{lme4} package, or with terms like 21 | \code{(1|id)} for correlated random effects together with the 22 | \code{relmat} argument. 23 | } 24 | \item{design}{ 25 | A survey design object produced by \code{survey::svydesign}. The 26 | pairwise weights will be computed from this design, which must have 27 | separate probabilities or weights for each stage of sampling. 28 | } 29 | \item{sterr}{ 30 | Estimate standard errors for fixed effects? Set to \code{FALSE} for 31 | greater speed when using resampling to get standard errors. 32 | } 33 | 34 | \item{return.devfun}{If \code{TRUE}, return the deviance function as a 35 | component of the object. This will increase the memory use 36 | substantially, but allows for bootstrapping.} 37 | \item{relmat}{ Specifies a list of relatedness matrices that corresponds to one or 38 | more random-effect groupings (eg \code{(1|id)} in the \code{formula} 39 | together with \code{relmat=list(id=Phi)} implies a covariance matrix 40 | of \code{Phi} for the random effects before scaling). See Details and 41 | the vignettes. } 42 | \item{all.pairs}{Use all pairs rather than just correlated pairs?} 43 | \item{subtract.margins}{If \code{TRUE} and \code{all.pairs=TRUE}, 44 | compute with all pairs by the faster algorithm involving subtraction 45 | from the marginal likelihood} 46 | } 47 | \details{ 48 | 49 | This function is very similar to \code{\link{svy2lme}} and only the 50 | differences are described here. 51 | 52 | Formula parsing and starting values use code based on the 53 | \code{lme4qtl} package. 54 | 55 | In \code{svy2lme} and \code{lme4::lmer}, the model is based on 56 | independent standard Normal random effects that are transformed to 57 | give random coefficients that might be correlated within observation 58 | but are either identical or independent between observations. In 59 | this function, the basic random effects in a term are multiplied by a square 60 | root of the \code{relmat} matrix for that term, giving basic random 61 | effects whose covariance between observations proportional to the 62 | \code{relmat} matrix. For example, in a quantitative trait locus 63 | model in genetics, the matrix would be a genetic relatedness matrix. 64 | 65 | The \code{relmat} matrices must have dimnames for matching to the 66 | id variable. It is permissible for the \code{relmat} matrices to 67 | be larger than necessary -- eg, containing related units that are 68 | not in the sample -- since the dimnames will be used to select the 69 | relevant submatrix. 70 | 71 | There can be only one random-effect term for each \code{relmat} term. If 72 | you need more, make a copy of the term with a different 73 | name. 74 | 75 | The \code{return.devfun=TRUE} option is useful if you want to 76 | examine objects that aren't returned as part of the output. For 77 | example, \code{get("ij", environment(object$devfun))} is the set of 78 | pairs used in computation. 79 | 80 | } 81 | \value{ 82 | \code{svy2relmer} returns an object with \code{print}, \code{coef}, and 83 | \code{vcov} methods. 84 | 85 | 86 | } 87 | \references{ 88 | Ziyatdinov, A., Vázquez-Santiago, M., Brunel, H. et al. lme4qtl: linear mixed models with flexible covariance structure for genetic studies of related individuals. BMC Bioinformatics 19, 68 (2018). \url{https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2057-x} 89 | } 90 | \author{ 91 | Thomas Lumley 92 | } 93 | \examples{ 94 | data(milk_subset) 95 | herd_des<- svydesign(id = ~herd + id, prob = ~one + one2, data = milk_subset) 96 | 97 | svy2lme(sdMilk ~ lact + log(dim) + (1|herd), design=herd_des, method="general") 98 | 99 | svy2relmer(sdMilk ~ lact + log(dim) + (1|id) + (1|herd), design=herd_des, 100 | relmat = list(id = A_gen)) 101 | 102 | 103 | } 104 | % Add one or more standard keywords, see file 'KEYWORDS' in the 105 | % R documentation directory. 106 | \keyword{regression }% use one of RShowDoc("KEYWORDS") 107 | \keyword{survey }% __ONLY ONE__ keyword per line 108 | --------------------------------------------------------------------------------