├── R ├── int.R ├── maxstat.R ├── pvalues.R ├── additive.R ├── dominant.R ├── recessive.R ├── codominant.R ├── is.snp.R ├── overdominant.R ├── z$.setupSNP.R ├── intervals.R ├── pvalues.WGassociation.R ├── labels.setupSNP.R ├── as.snp.R ├── labels.WGassociation.R ├── z[_-.setupSNP.R ├── dominant.WGassociation.R ├── print.SNPinteraction.R ├── additive.WGassociation.R ├── recessive.WGassociation.R ├── GenotypeRate.R ├── codominant.WGassociation.R ├── dominant.snp.R ├── overdominant.WGassociation.R ├── plot.setupSNP.R ├── print.maxstat.R ├── recessive.snp.R ├── additive.default.R ├── codominant.snp.R ├── trim.R ├── codominant.default.R ├── maxstat.table.R ├── qqpval.r ├── is.Monomorphic.R ├── pvalTest.R ├── summary.haplo.glm.R ├── togeno.R ├── dominant.default.R ├── print.summary.snp.R ├── z[.snp.r ├── orderChromosome.R ├── print.tableHWE.R ├── is.quantitative.R ├── overdominant.default.R ├── additive.snp.R ├── scanWGassociation.R ├── WGstats.R ├── make.geno.R ├── maxstat.setupSNP.R ├── maxstat.default.R ├── recessive.default.R ├── crea.lab.R ├── related.R ├── Table.mean.se.R ├── extractPval.R ├── overdominant.snp.R ├── z[.setupSNP.R ├── print.WGassociation.R ├── summary.setupSNP.R ├── Table.N.Per.R ├── sortSNPs.R ├── plot.permTest.R ├── print.haploOut.R ├── GenomicControl.R ├── plot.snp.R ├── expandsetupSNP.R ├── extractPval.i.R ├── assoc.R ├── z[[_-.setupSNP.R ├── modelTest.R ├── Bonferroni.sig.R ├── getNiceTable.R ├── interleave.R ├── reorder.snp.R ├── setupSNP.R ├── print.permTest.R ├── z[.WGassociation.R ├── c.WGassociation.r ├── intervals.dif.R ├── intervals.or.R ├── getSignificantSNPs.R ├── print.snp.r ├── plotWGassociation.R ├── tableHWE.R ├── permTest.R ├── summary.WGassociation.R ├── print.intervals.R ├── summary.snp.R ├── plotMissing.R ├── odds.r ├── print.snpOut.R ├── intervals.haplo.glm.R ├── SNPHWE.R ├── WGassociation.R ├── plot.SNPinteraction.R ├── print.LD.R ├── haplo.interaction.R ├── getGeneSymbol.R ├── interactionPval.R ├── snp.R ├── table.interaction.R ├── table.corner.R ├── association.R └── LD.R ├── data ├── HapMap.rda ├── SNPs.rda ├── asthma.rda ├── resHapMap.rda ├── SNPs.info.pos.rda └── HapMap.SNPs.pos.rda ├── memcheck.R ├── .Rbuildignore ├── man ├── getNiceTable.Rd ├── scanWGassociation.Rd ├── related.Rd ├── asthma.Rd ├── isMonomorphic.Rd ├── HapMap.Rd ├── resHapMap.Rd ├── HapMap.SNPs.pos.Rd ├── qqpval.Rd ├── int.Rd ├── SNPs.info.pos.Rd ├── makegeno.Rd ├── SNPs.Rd ├── getGeneSymbol.Rd ├── plotMissing.Rd ├── Tablemeanse.Rd ├── TableNPer.Rd ├── odds.Rd ├── GenomicControl.Rd ├── getSignificantSNPs.Rd ├── intervals.Rd ├── plotWGassociation.Rd ├── tableHWE.Rd ├── inheritance.Rd ├── sortSNPs.Rd ├── SNPassoc-internal.Rd ├── setupSNP.Rd ├── BonferroniSig.Rd ├── maxstat.Rd ├── LD.Rd ├── permTest.Rd ├── haplointeraction.Rd ├── interactionPval.Rd ├── snp.Rd ├── WGassociation.Rd └── association.Rd ├── README.md ├── .gitignore ├── DESCRIPTION ├── .github └── workflows │ └── rhub.yaml ├── NAMESPACE └── inst └── docs └── changelog.txt /R/int.R: -------------------------------------------------------------------------------- 1 | `int` <- 2 | function(x) 3 | { 4 | x 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/maxstat.R: -------------------------------------------------------------------------------- 1 | maxstat<-function(x,...) 2 | UseMethod("maxstat") 3 | -------------------------------------------------------------------------------- /R/pvalues.R: -------------------------------------------------------------------------------- 1 | `pvalues` <- 2 | function(x) UseMethod("pvalues") 3 | 4 | -------------------------------------------------------------------------------- /R/additive.R: -------------------------------------------------------------------------------- 1 | `additive` <- 2 | function (o) UseMethod("additive") 3 | 4 | -------------------------------------------------------------------------------- /R/dominant.R: -------------------------------------------------------------------------------- 1 | `dominant` <- 2 | function (o) UseMethod("dominant") 3 | 4 | -------------------------------------------------------------------------------- /R/recessive.R: -------------------------------------------------------------------------------- 1 | `recessive` <- 2 | function (o) UseMethod("recessive") 3 | 4 | -------------------------------------------------------------------------------- /R/codominant.R: -------------------------------------------------------------------------------- 1 | `codominant` <- 2 | function(o) UseMethod("codominant") 3 | 4 | -------------------------------------------------------------------------------- /R/is.snp.R: -------------------------------------------------------------------------------- 1 | `is.snp` <- 2 | function(x) 3 | { 4 | inherits(x, "snp") 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/overdominant.R: -------------------------------------------------------------------------------- 1 | `overdominant` <- 2 | function (o) UseMethod("overdominant") 3 | 4 | -------------------------------------------------------------------------------- /data/HapMap.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isglobal-brge/SNPassoc/HEAD/data/HapMap.rda -------------------------------------------------------------------------------- /data/SNPs.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isglobal-brge/SNPassoc/HEAD/data/SNPs.rda -------------------------------------------------------------------------------- /data/asthma.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isglobal-brge/SNPassoc/HEAD/data/asthma.rda -------------------------------------------------------------------------------- /R/z$.setupSNP.R: -------------------------------------------------------------------------------- 1 | "$<-.setupSNP"<- 2 | function (x, i, value) { 3 | x[[i]]<-value 4 | x 5 | } -------------------------------------------------------------------------------- /R/intervals.R: -------------------------------------------------------------------------------- 1 | `intervals` <- 2 | function(o, level = .95, ...) 3 | UseMethod("intervals") 4 | 5 | -------------------------------------------------------------------------------- /R/pvalues.WGassociation.R: -------------------------------------------------------------------------------- 1 | `pvalues.WGassociation` <- 2 | function(x) attr(x,"pvalues") 3 | 4 | -------------------------------------------------------------------------------- /data/resHapMap.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isglobal-brge/SNPassoc/HEAD/data/resHapMap.rda -------------------------------------------------------------------------------- /R/labels.setupSNP.R: -------------------------------------------------------------------------------- 1 | `labels.setupSNP` <- 2 | function(object, ...) attr(object,"label.SNPs") 3 | 4 | -------------------------------------------------------------------------------- /data/SNPs.info.pos.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isglobal-brge/SNPassoc/HEAD/data/SNPs.info.pos.rda -------------------------------------------------------------------------------- /data/HapMap.SNPs.pos.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isglobal-brge/SNPassoc/HEAD/data/HapMap.SNPs.pos.rda -------------------------------------------------------------------------------- /R/as.snp.R: -------------------------------------------------------------------------------- 1 | `as.snp` <- 2 | function (x, ...) 3 | { 4 | if (is.snp(x)) x else snp(x, ...) 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/labels.WGassociation.R: -------------------------------------------------------------------------------- 1 | `labels.WGassociation` <- 2 | function(object, ...) attr(object,"label.SNPs") 3 | 4 | -------------------------------------------------------------------------------- /R/z[_-.setupSNP.R: -------------------------------------------------------------------------------- 1 | `[<-.setupSNP` <- 2 | function(x,i,j,value){ 3 | out<-NextMethod("[<-") 4 | out } 5 | 6 | -------------------------------------------------------------------------------- /memcheck.R: -------------------------------------------------------------------------------- 1 | devtools::load_all() 2 | devtools::run_examples() 3 | devtools::build_vignettes() 4 | devtools::test() -------------------------------------------------------------------------------- /R/dominant.WGassociation.R: -------------------------------------------------------------------------------- 1 | `dominant.WGassociation` <- 2 | function(o) 3 | { 4 | attr(o,"pvalues")[,"dominant"] 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/print.SNPinteraction.R: -------------------------------------------------------------------------------- 1 | `print.SNPinteraction` <- 2 | function(x, ...) 3 | { 4 | print.table(x,na.print="-") 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/additive.WGassociation.R: -------------------------------------------------------------------------------- 1 | `additive.WGassociation` <- 2 | function(o) 3 | { 4 | attr(o,"pvalues")[,"log-additive"] 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/recessive.WGassociation.R: -------------------------------------------------------------------------------- 1 | `recessive.WGassociation` <- 2 | function(o) 3 | { 4 | attr(o,"pvalues")[,"recessive"] 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/GenotypeRate.R: -------------------------------------------------------------------------------- 1 | `GenotypeRate` <- 2 | function(x) 3 | { 4 | temp<-sum(!is.na(x))/length(x) 5 | ans<-temp*100 6 | ans 7 | } 8 | 9 | -------------------------------------------------------------------------------- /R/codominant.WGassociation.R: -------------------------------------------------------------------------------- 1 | `codominant.WGassociation` <- 2 | function(o) 3 | { 4 | attr(o,"pvalues")[,"codominant"] 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/dominant.snp.R: -------------------------------------------------------------------------------- 1 | `dominant.snp` <- 2 | function (o) 3 | { 4 | o<-dominant.default(o) 5 | class(o)<-c("snp","factor") 6 | o 7 | } -------------------------------------------------------------------------------- /R/overdominant.WGassociation.R: -------------------------------------------------------------------------------- 1 | `overdominant.WGassociation` <- 2 | function(o) 3 | { 4 | attr(o,"pvalues")[,"overdominant"] 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/plot.setupSNP.R: -------------------------------------------------------------------------------- 1 | `plot.setupSNP`<- 2 | function(x,which=1,...){ 3 | plot(x[,attr(x,"colSNPs")[which]], label=labels(x)[which], ...) 4 | } -------------------------------------------------------------------------------- /R/print.maxstat.R: -------------------------------------------------------------------------------- 1 | print.maxstat<-function(x,...) 2 | { 3 | printCoefmat(t(x), dig.tst=3, tst.ind=1:4, P.values = TRUE, na.print="-", ...) 4 | } -------------------------------------------------------------------------------- /R/recessive.snp.R: -------------------------------------------------------------------------------- 1 | `recessive.snp` <- 2 | function (o) 3 | { 4 | o<-recessive.default(o) 5 | class(o)<-c("snp","factor") 6 | o 7 | } 8 | 9 | -------------------------------------------------------------------------------- /R/additive.default.R: -------------------------------------------------------------------------------- 1 | `additive.default` <- 2 | function (o) 3 | { 4 | if(!inherits(o,"factor")) o<-codominant(o) 5 | as.numeric(o)-1 6 | } 7 | 8 | -------------------------------------------------------------------------------- /R/codominant.snp.R: -------------------------------------------------------------------------------- 1 | `codominant.snp` <- 2 | function(o) 3 | { 4 | o<-codominant.default(o) 5 | class(o)<-c("snp","factor") 6 | o 7 | } 8 | 9 | -------------------------------------------------------------------------------- /R/trim.R: -------------------------------------------------------------------------------- 1 | trim <- function(s) 2 | { 3 | s <- sub(pattern="^ +", replacement="", x=s) 4 | s <- sub(pattern=" +$", replacement="", x=s) 5 | s 6 | } -------------------------------------------------------------------------------- /R/codominant.default.R: -------------------------------------------------------------------------------- 1 | `codominant.default` <- 2 | function(o) 3 | { 4 | if (length(unique(o[!is.na(o)]))>3) 5 | stop("variable should have 3 levels max") 6 | else factor(o) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^.*\.old$ 3 | ^.*\.png$ 4 | ^.*\.complete$ 5 | ^\.Rproj\.user$ 6 | memcheck.R 7 | ^\SNPassoc.R$ 8 | ^_pkgdown\.yml$ 9 | ^docs$ 10 | ^pkgdown$ 11 | ^\.github$ 12 | -------------------------------------------------------------------------------- /R/maxstat.table.R: -------------------------------------------------------------------------------- 1 | maxstat.table<-function(x,...) { 2 | if (dim(x)[1]> 2) x<-t(x) 3 | if (dim(x)[1]> 2) stop("table should be 2x3") 4 | if (dim(x)[2]==1) warning("this SNP is monomorphic") 5 | maxstat.matrix(x, ...) 6 | } 7 | -------------------------------------------------------------------------------- /R/qqpval.r: -------------------------------------------------------------------------------- 1 | qqpval<-function(p, pch=16, col=4, ...){ 2 | p<-p[!is.na(p)] 3 | n<-length(p) 4 | pexp<-(1:n)/(n+1) 5 | plot(-log(pexp,10), -log(sort(p),10), xlab="-log(expected P value)", ylab="-log(observed P value)", pch=pch, col=col, ...) 6 | abline(0,1,col=2) 7 | } 8 | -------------------------------------------------------------------------------- /R/is.Monomorphic.R: -------------------------------------------------------------------------------- 1 | `is.Monomorphic` <- 2 | function (x) 3 | { 4 | ans<-FALSE 5 | if (length(x)==1) 6 | { 7 | if (x[1] == "Monomorphic") 8 | ans <- TRUE 9 | } 10 | else 11 | ans <- length(table(x)[table(x) > 0]) == 1 12 | return(ans) 13 | } 14 | 15 | -------------------------------------------------------------------------------- /R/pvalTest.R: -------------------------------------------------------------------------------- 1 | `pvalTest` <- 2 | function(dataX,Y,quantitative,type,genotypingRate) 3 | { 4 | pvalues<-t(data.frame(lapply(dataX,FUN=modelTest,Y=Y, 5 | quantitative=quantitative,type=type, 6 | genotypingRate = genotypingRate ))) 7 | pvalues 8 | } 9 | 10 | -------------------------------------------------------------------------------- /R/summary.haplo.glm.R: -------------------------------------------------------------------------------- 1 | `summary.haplo.glm` <- 2 | function(object, ...) 3 | { 4 | o <- object 5 | coe <- o$coe 6 | se <- sqrt(diag(o$var.mat)[1:length(coe)]) 7 | z <- coe/se 8 | p <- 2-2*pnorm(abs(z)) 9 | list( coeficients = cbind(coe,se,z,p) ) 10 | } 11 | 12 | -------------------------------------------------------------------------------- /R/togeno.R: -------------------------------------------------------------------------------- 1 | `togeno` <- 2 | function(f,sep=sep,lab=lab) 3 | { 4 | nam<-paste(lab,c("1","2"),sep=".") 5 | f<-as.character(factor(f)) 6 | f[is.na(f)]<-paste("0",sep,"0",sep="") 7 | g<-as.data.frame(t(matrix(unlist(strsplit(f,sep)),2,length(f)))) 8 | names(g)<-nam 9 | g 10 | } 11 | 12 | -------------------------------------------------------------------------------- /R/dominant.default.R: -------------------------------------------------------------------------------- 1 | `dominant.default` <- 2 | function (o) 3 | { 4 | if(!inherits(o,"factor")) o<-codominant(o) 5 | if(length(levels(o))==3) 6 | { 7 | o[o == levels(o)[3]] <- levels(o)[2] 8 | levels(o)[2] <- paste(levels(o)[2:3], collapse = "-") 9 | } 10 | factor(o) 11 | } 12 | 13 | -------------------------------------------------------------------------------- /R/print.summary.snp.R: -------------------------------------------------------------------------------- 1 | print.summary.snp<-function(x,...) 2 | { 3 | cat("Genotypes: \n") 4 | print(x$genotype.freq, na="",...) 5 | cat("\n") 6 | cat("Alleles: \n") 7 | print(x$allele.freq, na="", ...) 8 | cat("\n") 9 | cat("HWE (p value):", x$HWE, "\n") 10 | } 11 | -------------------------------------------------------------------------------- /R/z[.snp.r: -------------------------------------------------------------------------------- 1 | "[.snp"<- 2 | function (x, ..., drop = FALSE) 3 | { 4 | y <- NextMethod("[") 5 | # attr(y, "contrasts") <- attr(x, "contrasts") 6 | # attr(y, "levels") <- attr(x, "levels") 7 | # class(y) <- oldClass(x) 8 | attributes(y) <- attributes(x) 9 | if (drop) 10 | factor(y) 11 | else y 12 | } 13 | -------------------------------------------------------------------------------- /R/orderChromosome.R: -------------------------------------------------------------------------------- 1 | `orderChromosome` <-function(x) 2 | { 3 | temp<-rep(NA,13) 4 | for (i in 10:22) 5 | temp[i-9]<-grep(i,x) 6 | temp2<-rep(NA,9) 7 | for (i in 1:9){ 8 | aux<-grep(i,x) 9 | temp2[i]<-aux[!aux%in%temp] 10 | } 11 | temp3<-grep("X",x) 12 | res<-c(temp2,temp,temp3) 13 | res 14 | } 15 | 16 | -------------------------------------------------------------------------------- /R/print.tableHWE.R: -------------------------------------------------------------------------------- 1 | `print.tableHWE` <- 2 | function(x, digits=4, sig=0.05, na="-", ...) 3 | { 4 | x<-round(x,digits) 5 | x<-data.frame(x) 6 | if (ncol(x)<3) 7 | { 8 | names(x)[1]<-"HWE (p value)" 9 | x$flag<- apply(x,1,function(x)ifelse(any(x2 10 | ans 11 | } 12 | 13 | -------------------------------------------------------------------------------- /man/getNiceTable.Rd: -------------------------------------------------------------------------------- 1 | \name{getNiceTable} 2 | \alias{getNiceTable} 3 | \title{Get Latex output} 4 | \usage{ 5 | getNiceTable( x ) 6 | } 7 | \arguments{ 8 | \item{x}{WGassociation object.} 9 | } 10 | \value{ 11 | The R output of specific association analyses exported into LaTeX 12 | } 13 | \description{ 14 | Create Latex output from association analyses 15 | } 16 | -------------------------------------------------------------------------------- /man/scanWGassociation.Rd: -------------------------------------------------------------------------------- 1 | \name{scanWGassociation} 2 | \alias{scanWGassociation} 3 | \title{Whole genome association analysis} 4 | \description{ 5 | This function is obsolete due to some problems with gfotran compiler. 6 | Use 'WGassociation' function instead or send an e-mail to the maintainer 7 | for receiving a version including this function 8 | } 9 | \keyword{utilities} 10 | -------------------------------------------------------------------------------- /R/overdominant.default.R: -------------------------------------------------------------------------------- 1 | `overdominant.default` <- 2 | function (o) 3 | { 4 | if(!inherits(o,"factor")) o<-codominant(o) 5 | if(length(levels(o))==3) # collapses 1+3 vs 2 6 | { 7 | o[o == levels(o)[3]] <- levels(o)[1] 8 | levels(o)[1] <- paste(levels(o)[c(1, 3)], collapse = "-") 9 | o<-factor(o,levels=levels(o)[1:2]) 10 | } # if <3 levels, return factor 11 | o 12 | } 13 | 14 | -------------------------------------------------------------------------------- /R/additive.snp.R: -------------------------------------------------------------------------------- 1 | `additive.snp` <- 2 | function (o) 3 | { 4 | if(length(levels(o))==3) 5 | o<-as.numeric(o)-1 6 | else 7 | { 8 | allele<-attr(o,"allele.names") 9 | if(sum(levels(o)%in%paste(allele,collapse="/"))>0) 10 | { 11 | o<-as.numeric(o)-1 12 | } 13 | else 14 | { 15 | o<-as.numeric(o)-1 16 | o[o==1]<-2 17 | } 18 | } 19 | o 20 | } 21 | 22 | -------------------------------------------------------------------------------- /R/scanWGassociation.R: -------------------------------------------------------------------------------- 1 | 2 | `scanWGassociation` <- 3 | function (formula, data, model = c("all"), nperm, quantitative = is.quantitative(formula, 4 | data), genotypingRate = 80) 5 | { 6 | warning("This function is obsolete due to some problems with gfotran compiler. \n 7 | Use 'WGassociation' function instead or send an e-mail to the maintainer \n 8 | for receiving a version including this function") 9 | } 10 | 11 | 12 | -------------------------------------------------------------------------------- /R/WGstats.R: -------------------------------------------------------------------------------- 1 | `WGstats` <- 2 | function (object, ...) 3 | { 4 | if (!inherits(object, "WGassociation")) 5 | stop("object must be an object of class 'WGassociation'") 6 | 7 | if (!is.null(attr(object,"fast"))) 8 | stop("\n summary is implemented only for 'WGassociation' function") 9 | 10 | x <- attr(object,"tables") 11 | mostattributes(x)<-NULL 12 | 13 | print(x, na.print = "", ...) 14 | invisible(x) 15 | } 16 | 17 | -------------------------------------------------------------------------------- /man/related.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/related.R 3 | \name{related} 4 | \alias{related} 5 | \title{Get related samples} 6 | \usage{ 7 | related(x) 8 | } 9 | \arguments{ 10 | \item{x}{An object obtained from SNPrelate package.} 11 | } 12 | \value{ 13 | A matrix with related individuals. 14 | } 15 | \description{ 16 | Get related samples 17 | } 18 | \examples{ 19 | library(SNPassoc) 20 | data(SNPs) 21 | 22 | } 23 | -------------------------------------------------------------------------------- /R/make.geno.R: -------------------------------------------------------------------------------- 1 | `make.geno` <- 2 | function (data, SNPs.sel) 3 | { 4 | if (!inherits(data, "setupSNP")) 5 | stop("data must be an object of class 'setupSNP'") 6 | 7 | ans<-togeno(data[,SNPs.sel[1]],sep="/",lab=SNPs.sel[1]) 8 | for (i in 2:length(SNPs.sel)) 9 | { 10 | ans.i<-togeno(data[,SNPs.sel[i]],sep="/",lab=SNPs.sel[i]) 11 | ans<-cbind(ans,ans.i) 12 | } 13 | geno <- haplo.stats::setupGeno(ans) 14 | geno 15 | } 16 | 17 | -------------------------------------------------------------------------------- /R/maxstat.setupSNP.R: -------------------------------------------------------------------------------- 1 | maxstat.setupSNP<-function(x, y, colSNPs=attr(x,"colSNPs"), ...) 2 | { 3 | if(missing(y)) 4 | stop("a case-control variable must be indicated in the second argument") 5 | 6 | y<-deparse(substitute(y)) 7 | if (!exists(y)) y<-x[,y] else y<-get(y) 8 | if(length(table(y))>2) 9 | stop("case-control variable must have only 2 levels") 10 | 11 | ans<-sapply(x[,colSNPs, drop=FALSE], function(o) maxstat(y, o, ...)) 12 | class(ans)<-"maxstat" 13 | ans 14 | } 15 | -------------------------------------------------------------------------------- /R/maxstat.default.R: -------------------------------------------------------------------------------- 1 | maxstat.default<-function(x, y, ...) 2 | { 3 | # x: case/control 4 | # y: SNP 5 | 6 | name.caco<-deparse(substitute(x)) 7 | name.snp<-deparse(substitute(y)) 8 | ok <- complete.cases(x,y) 9 | x <- x[ok] 10 | y <- y[ok] 11 | 12 | if(length(unique(x))>2 ) stop(paste(name.caco, " has > 2 different values")) 13 | if(length(unique(y))>3 ) stop(paste(name.snp, " has > 3 different values")) 14 | xx<-table(x,y) 15 | maxstat.table(xx, ...) 16 | } 17 | -------------------------------------------------------------------------------- /R/recessive.default.R: -------------------------------------------------------------------------------- 1 | `recessive.default` <- 2 | function (o) 3 | { 4 | if(!inherits(o,"factor")) o<-codominant(o) 5 | if(length(levels(o))==3) 6 | { 7 | o[o == levels(o)[1]] <- levels(o)[2] 8 | levels(o)[2] <- paste(levels(o)[1:2], collapse = "-") 9 | } 10 | else 11 | { 12 | allele<-attr(o,"allele.names") 13 | if(sum(levels(o)%in%paste(allele,collapse="/"))>0) 14 | { 15 | o[o == levels(o)[2]] <- levels(o)[1] 16 | levels(o)[1] <- paste(levels(o)[1:2], collapse = "-") 17 | } 18 | } 19 | 20 | factor(o) 21 | } 22 | 23 | -------------------------------------------------------------------------------- /R/crea.lab.R: -------------------------------------------------------------------------------- 1 | `crea.lab` <- 2 | function (x, pos.ini, cex, dist) 3 | { 4 | n <- nrow(x) 5 | nn <- dimnames(x)[[1]] 6 | text(pos.ini + 0.1, 1.2, "frequency", cex = cex, adj = 0) 7 | text(pos.ini + 0.5, 1.2, "percentage", cex = cex, adj = 0) 8 | for (i in 1:n) { 9 | control <- (i - 1) * dist 10 | text(pos.ini, 1 - control, nn[i], cex = cex) 11 | text(pos.ini + 0.4, 1 - control, x[i, 1], adj = 1, cex = cex) 12 | text(pos.ini + 0.8, 1 - control, formatC(x[i, 2], 2,2,format="f"), adj = 1, 13 | cex = cex) 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /man/asthma.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{asthma} 3 | \alias{asthma} 4 | \title{SNP data on asthma case-control study} 5 | \format{The \code{asthma} data frame has 1578 rows (individuals) and 57 columns (variables) of data from a genetic study on asthma.} 6 | \usage{ 7 | data("asthma") 8 | } 9 | \value{ 10 | An \code{data.frame} object. 11 | } 12 | \description{ 13 | \code{data.frame} with 51 SNPs and 6 epidemiological variables: country, gender, age, bmi, smoke ans case/control status. 14 | } 15 | \examples{ 16 | data(asthma) 17 | dim(asthma) 18 | str(asthma) 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /man/isMonomorphic.Rd: -------------------------------------------------------------------------------- 1 | \name{is.Monomorphic} 2 | \alias{is.Monomorphic} 3 | \title{Check whether a SNP is Monomorphic} 4 | \description{ 5 | This function verifies when a SNP is Monomorphic 6 | } 7 | \usage{ 8 | is.Monomorphic(x) 9 | } 10 | \arguments{ 11 | \item{x}{any R object} 12 | } 13 | \value{ 14 | A logical value TRUE if the SNP is Monomorphic, otherwise a FALSE 15 | } 16 | 17 | \examples{ 18 | 19 | data(SNPs) 20 | is.Monomorphic(SNPs$snp10001) 21 | is.Monomorphic(SNPs$snp100020) 22 | apply(SNPs[,20:30],2,is.Monomorphic) 23 | } 24 | 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /R/related.R: -------------------------------------------------------------------------------- 1 | #' Get related samples 2 | #' 3 | #' @param x An object obtained from SNPrelate package. 4 | #' @return A matrix with related individuals. 5 | #' @examples 6 | #' library(SNPassoc) 7 | #' data(SNPs) 8 | #' 9 | #' @export 10 | related <- function(x) { 11 | ans <- NULL 12 | while(nrow(x)>0) { 13 | xx <- plyr::count(c(x$ID1, x$ID2)) 14 | o <- order(xx$freq, decreasing = TRUE) 15 | xx <- xx[o,] 16 | rm.xx <- xx$x[1] 17 | x <- subset(x, x$ID1 != rm.xx & x$ID2 != rm.xx) 18 | ans <- c(as.character(rm.xx), ans) 19 | ans 20 | } 21 | ans 22 | } 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SNPassoc 2 | This package carries out most common analysis when performing whole genome association studies. These analyses include descriptive statistics and exploratory analysis of missing values, calculation of Hardy-Weinberg equilibrium, analysis of association based on generalized linear models (either for quantitative or binary traits), and analysis of multiple SNPs (haplotype and epistasis analysis). Permutation test and related tests (sum statistic and truncated product) are also implemented. Max-statistic and genetic risk-allele score exact distributions are also possible to be estimated. 3 | 4 | -------------------------------------------------------------------------------- /R/Table.mean.se.R: -------------------------------------------------------------------------------- 1 | `Table.mean.se` <- 2 | function(var, dep, subset = !is.na(var)) 3 | { 4 | var <- as.factor(var) 5 | n <- ifelse(is.na(tapply(dep[subset],var[subset],FUN=length)),0,tapply(dep[subset],var[subset],FUN=length)) 6 | me <- ifelse(is.na(tapply(dep[subset],var[subset],FUN=mean)),0,tapply(dep[subset],var[subset],FUN=mean)) 7 | se <- ifelse(is.na(tapply(dep[subset],var[subset],FUN=function(x){sd(x)/sqrt(length(x))})),0,tapply(dep[subset],var[subset],FUN=function(x){sd(x)/sqrt(length(x))})) 8 | ta <- cbind(n=n,me=me,se=se) 9 | rownames(ta) <- names(n) 10 | list(tp = ta) 11 | } 12 | 13 | -------------------------------------------------------------------------------- /R/extractPval.R: -------------------------------------------------------------------------------- 1 | `extractPval` <- 2 | function(x) 3 | { 4 | 5 | models<-attr(x,"models") 6 | if(length(models)==6) 7 | models<-c(1:5) 8 | quantitative<-attr(x,"quantitative") 9 | pos<-ifelse(quantitative,7,8) 10 | 11 | ans<-t(data.frame(lapply(1:length(x),extractPval.i,x=x,pos=pos,models=models))) 12 | 13 | ans<-data.frame(ans) 14 | for (i in 2:ncol(ans)) 15 | ans[,i]<-as.numeric(as.character(ans[,i])) 16 | 17 | dimnames(ans)[[1]]<-attr(x,"label.SNPs") 18 | dimnames(ans)[[2]]<-c("comments",c("codominant","dominant","recessive","overdominant","log-additive")[models]) 19 | ans 20 | 21 | } 22 | 23 | -------------------------------------------------------------------------------- /R/overdominant.snp.R: -------------------------------------------------------------------------------- 1 | `overdominant.snp` <- 2 | function (o) 3 | { 4 | if(length(levels(o))==3) 5 | { 6 | o[o == levels(o)[3]] <- levels(o)[1] 7 | levels(o)[1] <- paste(levels(o)[c(1, 3)], collapse = "-") 8 | o<-factor(o,levels=levels(o)[1:2]) 9 | } 10 | else if(length(levels(o))==2) # 2 genotypes only 11 | { 12 | allele<-attr(o,"allele.names") 13 | if(sum(levels(o)%in%paste(allele,collapse="/"))==0) # no heterozygous 14 | { 15 | o[o == levels(o)[2]] <- levels(o)[1] 16 | levels(o)[1] <- paste(levels(o)[1:2], collapse = "-") 17 | o<-factor(o,levels=levels(o)[1]) 18 | } 19 | } 20 | class(o)<-c("snp","factor") 21 | o 22 | } 23 | 24 | -------------------------------------------------------------------------------- /R/z[.setupSNP.R: -------------------------------------------------------------------------------- 1 | `[.setupSNP` <- 2 | function(x,i,j,...){ 3 | 4 | out<-NextMethod("[") 5 | 6 | if (!is.null(dim(out))){ 7 | k<- match(attr(x, "label.SNPs"), names(out)) # nuevas columnas con snps 8 | k<-k[!is.na(k)] 9 | ik<- match(names(out), attr(x, "label.SNPs")) # nuevas columnas con snps 10 | ik<-ik[!is.na(ik)] 11 | 12 | snps<- attr(x, "label.SNPs")[ik] 13 | # for (l in snps) attr(out[,l],"allele.names")<- attr(x[,l],"allele.names") 14 | 15 | attr(out, "colSNPs") <- sort(k) 16 | attr(out, "label.SNPs") <- attr(x, "label.SNPs")[ik] 17 | attr(out, "gen.info") <- attr(x, "gen.info")[ik,] 18 | } 19 | out } 20 | 21 | -------------------------------------------------------------------------------- /R/print.WGassociation.R: -------------------------------------------------------------------------------- 1 | `print.WGassociation` <- 2 | function (x, digits = 5, ...) 3 | { 4 | if (!inherits(x, "WGassociation")) 5 | stop("x must be an object of class 'WGassociation'") 6 | ans <- attr(x, "pvalues") 7 | if(ncol(ans)>1){ 8 | if(!is.numeric(ans[,1])){ 9 | ans[, -1] <- round(ans[, -1,drop=FALSE], digits) 10 | out <- as.matrix(ans) 11 | out[,1]<-gsub("\\\\","",out[,1,drop=FALSE]) 12 | } else { 13 | 14 | out <- as.matrix(round(ans, digits)) 15 | } 16 | } else { 17 | out<-gsub("\\\\","",as.matrix(ans)) 18 | } 19 | 20 | print(out, quote = FALSE, na.print = "-", ...) 21 | invisible(ans) 22 | 23 | } 24 | 25 | -------------------------------------------------------------------------------- /R/summary.setupSNP.R: -------------------------------------------------------------------------------- 1 | `summary.setupSNP` <- 2 | function(object, ...) 3 | { 4 | if (!inherits(object, "setupSNP")) 5 | stop("object must be an object of class 'setupSNP'") 6 | 7 | colSNPs<-attr(object,"colSNPs") 8 | 9 | if(length(colSNPs)>0) { 10 | temp <- mclapply(object[,colSNPs, drop=FALSE], expandsetupSNP) 11 | 12 | ans <- do.call(rbind,temp) 13 | out<-as.matrix(ans) 14 | dimnames(out)[[2]][4] <- "missing (%)" 15 | print(out, quote=FALSE, na.print="-") 16 | 17 | } else { 18 | class(object)<-"data.frame" 19 | ans<-summary(object) 20 | print(ans) 21 | } 22 | invisible(ans) 23 | } 24 | 25 | -------------------------------------------------------------------------------- /R/Table.N.Per.R: -------------------------------------------------------------------------------- 1 | `Table.N.Per` <- 2 | function(var, dep, subset = !is.na(var)) 3 | { 4 | var <- as.factor(var) 5 | dep <- as.factor(dep) 6 | ta <- table(var[subset], dep[subset]) 7 | dimnames(ta) <- list(levels(var), levels(dep)) 8 | per <- matrix(nrow = dim(ta)[1], ncol = dim(ta)[2]) 9 | dimnames(per) <- list(levels(var[subset]), c("%", "%")) 10 | for(i in 1.:dim(ta)[1.]) { 11 | for(j in 1.:dim(ta)[2]) { 12 | per[i, j] <- cbind(as.matrix(round((ta[i, j] * 100)/sum(ta[, j]), 1))) 13 | } 14 | } 15 | tp <- cbind(ta, per) 16 | tp <- tp[, order(rep(1:2, 2))] 17 | list(tp = tp) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /R/sortSNPs.R: -------------------------------------------------------------------------------- 1 | `sortSNPs` <- 2 | function (data, colSNPs, info) 3 | { 4 | o <- order(info[, 2], info[, 3]) 5 | label.SNPs.o <- info[o, 1] 6 | label.SNPs <- names(data[, colSNPs, drop=FALSE]) 7 | 8 | #control 9 | ans <- match(label.SNPs, label.SNPs.o) 10 | if (sum(is.na(ans)) > 0) { 11 | 12 | warning("The SNPs: ", as.character(label.SNPs[is.na(ans)]), 13 | "are not included in the file with the genomic positions and they are discarded" ) 14 | 15 | } 16 | 17 | ans <- match(label.SNPs.o, label.SNPs) 18 | 19 | 20 | out <- colSNPs[ans[!is.na(ans)]] 21 | out <- out[!is.na(out)] 22 | res <- list(pos=out, dataSorted=info[o,]) 23 | res 24 | } 25 | 26 | -------------------------------------------------------------------------------- /R/plot.permTest.R: -------------------------------------------------------------------------------- 1 | plot.permTest<-function(x,...) 2 | { 3 | 4 | if (!inherits(x, "permTest")) 5 | stop("x must be an object of class 'permTest'") 6 | 7 | param<-x$param 8 | pmin<-x$pmin 9 | psig<-x$psig 10 | 11 | o<-density(pmin) 12 | grid<-seq(0,max(o$x),length=1000) 13 | plot(grid,dbeta(grid,param[1],param[2]),xlab="minimum p value",ylab="density",type="n") 14 | hist(pmin,prob=TRUE,col="gray90",border="gray50",add=TRUE) 15 | lines(grid,dbeta(grid,param[1],param[2]),col="blue",lwd=2) 16 | 17 | segments(psig,0,psig,dbeta(psig,param[1],param[2]),col="red") 18 | legend("topright",c("empirical distribution","theoretical distribution",paste("adjusted p value:",round(psig,8))), 19 | lty=c(1,1,1),col=c("gray90","blue","red"),title="",cex=0.8,bty="n") 20 | 21 | } 22 | -------------------------------------------------------------------------------- /man/HapMap.Rd: -------------------------------------------------------------------------------- 1 | \name{HapMap} 2 | \alias{HapMap} 3 | \docType{data} 4 | 5 | \title{ SNPs from HapMap project} 6 | 7 | \description{ 8 | Information about 9307 SNPs from the HapMap project 9 | belonging to 22 chromosomes. Information about two different population is available: 10 | European population (CEU) and Yoruba (YRI). The genomic 11 | information (names of SNPs, chromosomes and genetic position) is 12 | also available in a data frame called 'HapMap.SNPs.pos'. 13 | 14 | } 15 | 16 | \usage{data(HapMap)} 17 | 18 | \format{ 19 | A data frame with 120 observations on the 9808 variables (SNPs) and one variable 20 | called 'group' indicating the population. 21 | 22 | } 23 | 24 | \source{ 25 | HapMap project (http://www.hapmap.org) 26 | } 27 | 28 | \examples{ 29 | data(HapMap) 30 | } 31 | 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /man/resHapMap.Rd: -------------------------------------------------------------------------------- 1 | \name{resHapMap} 2 | \alias{resHapMap} 3 | \docType{data} 4 | 5 | \title{ SNPs from HapMap project} 6 | 7 | \description{ 8 | Information about 9307 SNPs from the HapMap project 9 | belonging to 22 chromosomes. Information about two different population is available: 10 | European population (CEU) and Yoruba (YRI). The genomic 11 | information (names of SNPs, chromosomes and genetic position) is 12 | also available in a data frame called 'HapMap.SNPs.pos'. 13 | 14 | } 15 | 16 | \usage{data(resHapMap)} 17 | 18 | \format{ 19 | A data frame with 120 observations on the 9808 variables (SNPs) and one variable 20 | called 'group' indicating the population. 21 | 22 | } 23 | 24 | \source{ 25 | HapMap project (http://www.hapmap.org) 26 | } 27 | 28 | \examples{ 29 | data(resHapMap) 30 | } 31 | 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | *Rproj 20 | 21 | # produced vignettes 22 | vignettes/*.html 23 | vignettes/*.pdf 24 | 25 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 26 | .httr-oauth 27 | 28 | # Old files 29 | *.old 30 | *.complete 31 | 32 | # knitr and R markdown default cache directories 33 | /*_cache/ 34 | /cache/ 35 | 36 | # Temporary files created by R markdown 37 | *.utf8.md 38 | *.knit.md 39 | 40 | # Compile code 41 | *.o 42 | *.dll 43 | 44 | 45 | # pkgdown files 46 | docs 47 | pkgdown 48 | ^_pkgdown\.yml$ 49 | logo.png 50 | _pkgdown.yml 51 | SNPassoc.R 52 | /vignettes/SNPassoc_cache 53 | -------------------------------------------------------------------------------- /man/HapMap.SNPs.pos.Rd: -------------------------------------------------------------------------------- 1 | \name{HapMap.SNPs.pos} 2 | \alias{HapMap.SNPs.pos} 3 | \docType{data} 4 | 5 | \title{ SNPs from HapMap project} 6 | 7 | \description{ 8 | Information about 9307 SNPs from the HapMap project 9 | belonging to 22 chromosomes. Information about two different population is available: 10 | European population (CEU) and Yoruba (YRI). The genomic 11 | information (names of SNPs, chromosomes and genetic position) is 12 | also available in a data frame called 'HapMap.SNPs.pos'. 13 | 14 | } 15 | 16 | \usage{data(HapMap.SNPs.pos)} 17 | 18 | \format{ 19 | A data frame with 120 observations on the 9808 variables (SNPs) and one variable 20 | called 'group' indicating the population. 21 | 22 | } 23 | 24 | \source{ 25 | HapMap project (http://www.hapmap.org) 26 | } 27 | 28 | \examples{ 29 | data(HapMap.SNPs.pos) 30 | } 31 | 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /man/qqpval.Rd: -------------------------------------------------------------------------------- 1 | \name{qqpval} 2 | \alias{qqpval} 3 | \title{Functions for inspecting population substructure} 4 | \description{ 5 | This function plots ranked observed p values against the corresponding expected p values in -log scale. 6 | } 7 | \usage{ 8 | qqpval(p, pch=16, col=4, \dots) 9 | } 10 | 11 | \arguments{ 12 | \item{p}{a vector of p values} 13 | \item{pch}{symbol to use for points} 14 | \item{col}{color for points} 15 | \item{\dots}{other plot arguments} 16 | } 17 | 18 | 19 | \value{ 20 | No return value, just the plot 21 | } 22 | 23 | 24 | \seealso{ \code{\link{GenomicControl}}, \code{\link{WGassociation}}} 25 | 26 | \examples{ 27 | data(SNPs) 28 | datSNP<-setupSNP(SNPs,6:40,sep="") 29 | res<-WGassociation(casco,datSNP,model=c("do","re","log-add")) 30 | 31 | # observed vs expected p values for recessive model 32 | qqpval(recessive(res)) 33 | 34 | } 35 | 36 | \keyword{utilities} 37 | -------------------------------------------------------------------------------- /man/int.Rd: -------------------------------------------------------------------------------- 1 | \name{int} 2 | \alias{int} 3 | \title{ 4 | Identify interaction term 5 | } 6 | \description{ 7 | This is a special function used for 'haplo.interaction' function. It 8 | identifies the variable that will interact with the haplotype estimates. 9 | Using \code{int()} in a formula implies that 10 | the interaction term between this variable and haplotypes is included in 'haplo.glm' function. 11 | } 12 | \usage{ 13 | int(x) 14 | } 15 | 16 | \arguments{ 17 | \item{x}{ 18 | A factor variable. 19 | }} 20 | 21 | \value{ 22 | \code{x} 23 | } 24 | 25 | \seealso{ 26 | \code{\link{haplo.interaction}} 27 | } 28 | 29 | \examples{ 30 | 31 | library(SNPassoc) 32 | library(haplo.stats) 33 | 34 | data(SNPs) 35 | datSNP<-setupSNP(SNPs, 6:40, sep = "") 36 | mod <- haplo.interaction(casco~int(sex)+blood.pre, data = datSNP, 37 | SNPs.sel = c("snp10001","snp10004","snp10005")) 38 | 39 | } 40 | 41 | \keyword{utilities} -------------------------------------------------------------------------------- /man/SNPs.info.pos.Rd: -------------------------------------------------------------------------------- 1 | \name{SNPs.info.pos} 2 | \alias{SNPs.info.pos} 3 | 4 | \title{SNPs in a case-control study} 5 | \usage{data(SNPs.info.pos)} 6 | \description{ 7 | SNPs data.frame contains selected SNPs and other clinical covariates for cases and controls 8 | in a case-control study 9 | 10 | SNPs.info.pos data.frame contains the names of the SNPs included in the data set 'SNPs' including 11 | their chromosome and their genomic position 12 | } 13 | \format{ 14 | 'SNPs.info.pos' data.frame contains the following columns: 15 | A data frame with 35 observations on the following 3 variables. 16 | \describe{ 17 | \item{\code{snp}}{name of SNP} 18 | \item{\code{chr}}{name of chromosome} 19 | \item{\code{pos}}{genomic position} 20 | } 21 | } 22 | 23 | \source{ 24 | Data obtained from our department. The reference and details will be supplied after being published. 25 | } 26 | 27 | \keyword{datasets} 28 | -------------------------------------------------------------------------------- /R/print.haploOut.R: -------------------------------------------------------------------------------- 1 | `print.haploOut` <- 2 | function(x, digits = max(3, getOption("digits") - 3), ...) 3 | { 4 | cat("\n") 5 | cat(" Haplotype using SNPs:", attr(x, "label.snp"), " adjusted by:", attr(x, 6 | "varAdj"), "\n") 7 | cat(" Interaction \n") 8 | cat("-------------------------\n") 9 | etiq<-dimnames(x[[1]])[[2]] 10 | 11 | if(attr(x,"quantitative")) 12 | { 13 | etiq[2]<-paste(etiq[2],"(dif)") 14 | etiq[5]<-paste(etiq[5],"(dif)") 15 | } 16 | else 17 | { 18 | etiq[2]<-paste(etiq[2],"(OR)") 19 | etiq[5]<-paste(etiq[5],"(OR)") 20 | } 21 | 22 | dimnames(x[[1]])[[2]]<-etiq 23 | print(x[[1]]) 24 | cat("\n") 25 | cat("p interaction:",x[[4]],"\n") 26 | cat("\n",paste(attr(x,"varInt"),"within haplotype"), "\n") 27 | cat("-------------------------\n") 28 | print(x[[2]]) 29 | cat(paste("haplotype within",attr(x,"varInt")), "\n") 30 | cat("-------------------------\n") 31 | print(x[[3]]) 32 | 33 | } 34 | 35 | -------------------------------------------------------------------------------- /R/GenomicControl.R: -------------------------------------------------------------------------------- 1 | GenomicControl<-function(x, snp.sel) 2 | { 3 | if(!inherits(x,"WGassociation")) 4 | stop("x must be an object of class 'WGassociation'") 5 | 6 | WGchisq<-function(x, model) { 7 | df<-ifelse(model=="codominant", 2, 1) 8 | qchisq(x,df, lower.tail=FALSE) 9 | } 10 | 11 | if (missing(snp.sel)) snp.sel<-rep(TRUE,nrow(x)) 12 | p<-pvalues(x)[snp.sel,-1] 13 | chisq.obs<-sapply(1:ncol(p) ,function(x) WGchisq(p[,x],names(p)[x])) 14 | 15 | lambda<-apply(chisq.obs,2,median,na.rm=TRUE) 16 | names(lambda)<-names(x)[-1] 17 | 18 | den<-rep(0.456,ncol(x)-1) 19 | den[names(lambda)=="codominant"]<-1.388 20 | lambda<- lambda/den 21 | 22 | lambdaOK<-ifelse(lambda<1,1,lambda) 23 | chisq.corrrected<-sweep(chisq.obs, 2, lambdaOK,FUN="/") 24 | pOK<-1-pchisq(chisq.corrrected,1) 25 | pOK[pOK==0]<-NA 26 | 27 | k<-length(names(x)) 28 | attr(x,"pvalues")[,2:k]<-pOK 29 | 30 | # cat("\nlambda:\n") 31 | message(lambda) 32 | print(lambda) 33 | x 34 | 35 | } 36 | -------------------------------------------------------------------------------- /R/plot.snp.R: -------------------------------------------------------------------------------- 1 | `plot.snp` <- 2 | function (x, type = barplot, label, ...) 3 | { 4 | if (!inherits(x, "snp")) 5 | stop("snp must be an object of class 'WGassociation'") 6 | if (missing(label)) { 7 | label <- deparse(substitute(x)) 8 | } 9 | 10 | # Reset par options on exit function 11 | old.mar <- par("mar") 12 | on.exit(par(old.mar)) 13 | 14 | old.mfrow <- par("mfrow") 15 | on.exit(par(mfrow = old.mfrow)) 16 | 17 | m <- m <- matrix(c(1, 2), nrow = 2, ncol = 1, byrow = TRUE) 18 | layout(m, heights = c(1, 5.5)) 19 | par(mar = c(0, 0, 0, 0)) 20 | xx <- summary(x) 21 | plot(c(1:5), rep(1, 5), ylim=c(0.1,1.6), type = "n", axes = FALSE, xlab = "", 22 | ylab = "") 23 | text(1, 1.5, label, font = 2, adj = 0) 24 | crea.lab(xx$allele.freq, 1.6, 0.8, 0.25) 25 | crea.lab(xx$genotype.freq, 2.8, 0.8, 0.25) 26 | text(4.5, 1, paste("HWE (pvalue):", round(xx$HWE, 6)), cex = 0.8) 27 | # par(mar = old.mar) 28 | type(xx$genotype.freq[, 1], ...) 29 | } 30 | 31 | -------------------------------------------------------------------------------- /man/makegeno.Rd: -------------------------------------------------------------------------------- 1 | \name{make.geno} 2 | \alias{make.geno} 3 | 4 | \title{Create a group of locus objects from some SNPs, assign to 'model.matrix' class. 5 | } 6 | 7 | \description{ 8 | This function prepares the CRITICAL element corresponding to matrix of genotypes necessary to be included in 'haplo.glm' function. 9 | } 10 | 11 | \usage{ 12 | make.geno(data, SNPs.sel) 13 | } 14 | 15 | \arguments{ 16 | \item{data}{an object of class 'setupSNP' containing the the SNPs that will be used to estimate the haplotypes.} 17 | \item{SNPs.sel}{ a vector indicating the names of SNPs that are used to estimate the haplotypes } 18 | } 19 | 20 | 21 | \value{ 22 | the same as 'setupGeno' function, from 'haplo.stats' library, returns 23 | } 24 | 25 | 26 | \seealso{\code{\link{snp}}} 27 | 28 | \examples{ 29 | ## Not run: 30 | data(SNPs) 31 | # first, we create an object of class 'setupSNP' 32 | datSNP<-setupSNP(SNPs,6:40,sep="") 33 | geno<-make.geno(datSNP,c("snp10001","snp10002","snp10003")) 34 | ## End(Not run) 35 | 36 | 37 | } 38 | 39 | 40 | \keyword{utilities} -------------------------------------------------------------------------------- /R/expandsetupSNP.R: -------------------------------------------------------------------------------- 1 | `expandsetupSNP` <- 2 | function (o) 3 | { 4 | if(!inherits(o,"logical")) { # all missings 5 | x <- summary(o) 6 | control <- !is.na(x$allele.freq[,2]) & x$allele.freq[,2] != 0 7 | o <- order( x$allele.freq[control,2], decreasing = TRUE) 8 | 9 | alleles <- rbind(x$allele.names)[o] 10 | 11 | if (length(alleles) > 1) { 12 | alleles <- paste(alleles, collapse = "/") 13 | } 14 | 15 | aux <- ifelse( any(!is.na(x$allele.freq[, 2])), 16 | round( max(x$allele.freq[, 2], na.rm = TRUE), 1), 17 | NA) 18 | 19 | out <- data.frame(alleles = alleles, 20 | major.allele.freq = aux, 21 | HWE = round(x$HWE, 6), 22 | missing = round(x$missing.allele * 100, 1)) 23 | 24 | } else { 25 | out <- data.frame(alleles = NA, major.allele.freq = NA, HWE = NA, missing = 100) 26 | } 27 | out 28 | } 29 | -------------------------------------------------------------------------------- /man/SNPs.Rd: -------------------------------------------------------------------------------- 1 | \name{SNPs} 2 | \alias{SNPs} 3 | 4 | \title{SNPs in a case-control study} 5 | \usage{data(SNPs)} 6 | \description{ 7 | SNPs data.frame contains selected SNPs and other clinical covariates for cases and controls 8 | in a case-control study 9 | 10 | SNPs.info.pos data.frame contains the names of the SNPs included in the data set 'SNPs' including 11 | their chromosome and their genomic position 12 | } 13 | \format{ 14 | 'SNPs' data.frame contains the following columns: 15 | \tabular{ll}{ 16 | id \tab identifier of each subject \cr 17 | casco \tab case or control status: 0-control, 1-case \cr 18 | sex \tab gender: Male and Female \cr 19 | blood.pre \tab arterial blood presure \cr 20 | protein \tab protein levels \cr 21 | snp10001 \tab SNP 1 \cr 22 | snp10002 \tab SNP 2 \cr 23 | ... \tab ... \cr 24 | snp100036 \tab SNP 36 \cr 25 | } 26 | } 27 | 28 | \source{ 29 | Data obtained from our department. The reference and details will be supplied after being published. 30 | } 31 | 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /R/extractPval.i.R: -------------------------------------------------------------------------------- 1 | `extractPval.i` <- 2 | function (i, x, pos, models) 3 | { 4 | tt<-x[[i]] 5 | if (is.null(nrow(tt))) 6 | control<-1000 7 | else 8 | control<-nrow(tt) 9 | 10 | if(length(models)*2>control) 11 | { 12 | tt <- tt[, pos] 13 | ans <- tt[!is.na(tt)][1] 14 | ans <- c(NA,ans, rep(NA, length(models) - 1)) 15 | } 16 | else 17 | { 18 | if (!is.null(dim(tt))) { 19 | if (length(models) == 1) { 20 | tt <- tt[, pos] 21 | ans <- c(NA, tt[!is.na(tt)][1]) 22 | } 23 | else { 24 | tt <- tt[, pos] 25 | ans <- c(NA, tt[!is.na(tt)]) 26 | if ((length(ans) - 1) < length(models)) 27 | # ans <- c(ans, rep(NA, length(models) - 1)) 28 | ans <- c(ans, rep(NA, length(models) - (length(ans) - 1))) 29 | } 30 | } 31 | else if (!is.na(charmatch("Geno", tt))) 32 | ans <- c(tt[1], rep(NA, length(models))) 33 | else ans <- c("Monomorphic", rep(NA, length(models))) 34 | } 35 | ans 36 | } 37 | 38 | -------------------------------------------------------------------------------- /R/assoc.R: -------------------------------------------------------------------------------- 1 | `assoc` <- 2 | function(y,x,test="lrt",quantitative) 3 | { 4 | lrt<-function(m) 5 | { 6 | if (m$family$family=="gaussian") { 7 | df1<-m$df.null 8 | df2<-m$df.residual 9 | df<-df1-df2 10 | ans<-1-pchisq(((m$null.deviance-m$deviance))/(m$deviance/df2),df) 11 | } 12 | else { 13 | ans<-1-pchisq(m$null.deviance-m$deviance,m$df.null-m$df.residual) 14 | } 15 | ans 16 | } 17 | 18 | G<-function(x,y) 19 | { 20 | tt<-table(y,x) 21 | df<-(dim(tt)[1]-1)*(dim(tt)[2]-1) 22 | tt.r<-apply(tt,1,sum) 23 | R<-tt.r[1] 24 | S<-tt.r[2] 25 | N<-sum(tt) 26 | n<-apply(tt,2,sum) 27 | a1<-(tt[1,]*N)/(R*n) 28 | a2<-(tt[2,]*N)/(S*n) 29 | ans <- 2*sum(tt*log(rbind(a1,a2))) 30 | pval <- 1-pchisq(ans,df=df) 31 | pval 32 | } 33 | 34 | 35 | if (length(levels(x))==1) { 36 | pval<-NA 37 | } 38 | else { 39 | if (test=="lrt") { 40 | if (quantitative) 41 | pval<-lrt(glm(y~x,family="gaussian")) 42 | else 43 | # pval<-lrt(glm(y~x,family="binomial")) 44 | pval<-G(x,y) 45 | } 46 | } 47 | pval 48 | } 49 | 50 | -------------------------------------------------------------------------------- /R/z[[_-.setupSNP.R: -------------------------------------------------------------------------------- 1 | `[[<-.setupSNP` <- 2 | function(x,i,j,info,value){ 3 | out<-NextMethod("[[") 4 | 5 | lab<- attr(x, "label.SNPs") 6 | val<- names(out)[!(names(out) %in% lab) & !(names(out) %in% names(x))] 7 | if(!is.null(val)){ # new var 8 | inf<- attr(x, "gen.info") 9 | if(length(val)>1) warning("More than 1 column added. May break consistency of colSNPs", call.=FALSE) 10 | if (is.snp(value)){ 11 | lab<-c(lab,val[1]) 12 | if(!is.null(inf)) { 13 | if (missing(info)){ 14 | info<-rep(NA,ncol(inf)) 15 | warning("info was filled with NA") 16 | } 17 | inf<-rbind(inf,info) 18 | } 19 | } 20 | } 21 | if (!is.null(dim(out))){ 22 | k<- match(lab, names(out)) # nuevas columnas con snps 23 | k<-k[!is.na(k)] 24 | ik<- match(names(out), lab) # nuevas columnas con snps 25 | ik<-ik[!is.na(ik)] 26 | 27 | attr(out, "colSNPs") <- sort(k) 28 | attr(out, "label.SNPs") <- lab[ik] 29 | attr(out, "gen.info") <- inf[ik,] 30 | } 31 | out } 32 | 33 | -------------------------------------------------------------------------------- /R/modelTest.R: -------------------------------------------------------------------------------- 1 | `modelTest` <- 2 | function(X,Y,quantitative,type,genotypingRate) 3 | { 4 | control<-ifelse(6%in%type,5,length(type)) 5 | controlGeno <- GenotypeRate(X) 6 | if (genotypingRate > controlGeno) 7 | { 8 | ans<-c("Genot error",rep(NA,control)) 9 | } 10 | else 11 | { 12 | if (is.Monomorphic(X)) 13 | ans<-c("Monomorphic",rep(NA,control)) 14 | else { 15 | ans<-NA 16 | if (1%in%type | 6%in%type) { 17 | mco<-assoc(Y,codominant(X),quantitative=quantitative) 18 | ans<-c(ans,mco) 19 | } 20 | if (2%in%type | 6%in%type) { 21 | mdo<-assoc(Y,dominant(X),quantitative=quantitative) 22 | ans<-c(ans,mdo) 23 | } 24 | if (3%in%type | 6%in%type) { 25 | mre<-assoc(Y,recessive(X),quantitative=quantitative) 26 | ans<-c(ans,mre) 27 | } 28 | if (4%in%type | 6%in%type) { 29 | mov<-assoc(Y,overdominant(X),quantitative=quantitative) 30 | ans<-c(ans,mov) 31 | } 32 | if (5%in%type | 6%in%type) { 33 | mad<-assoc(Y,additive(X),quantitative=quantitative) 34 | ans<-c(ans,mad) 35 | } 36 | } 37 | } 38 | ans 39 | } 40 | 41 | -------------------------------------------------------------------------------- /man/getGeneSymbol.Rd: -------------------------------------------------------------------------------- 1 | \name{getGeneSymbol} 2 | \alias{getGeneSymbol} 3 | \title{Get gene symbol from a list of SNPs} 4 | \usage{ 5 | getGeneSymbol( 6 | x, 7 | snpCol = 1, 8 | chrCol = 2, 9 | posCol = 3, 10 | db = TxDb.Hsapiens.UCSC.hg19.knownGene 11 | ) 12 | } 13 | \arguments{ 14 | \item{x}{data.frame containing: SNP name, chromosome and genomic position.} 15 | 16 | \item{snpCol}{column of x having the SNP name. Default is 1.} 17 | 18 | \item{chrCol}{column of x having the SNP chromosome. Default is 2.} 19 | 20 | \item{posCol}{column of x having the SNP position. Default is 3.} 21 | 22 | \item{db}{reference genome. Default is 'TxDb.Hsapiens.UCSC.hg19.knownGene'} 23 | } 24 | \value{ 25 | a data.frame having initial information and gene symbol 26 | } 27 | \description{ 28 | Get gene symbol from a list of SNPs 29 | } 30 | \examples{ 31 | \donttest{ 32 | snps = c('rs58108140','rs189107123','rs180734498','rs144762171') 33 | chr = c('chr1','chr1','chr1','chr1') 34 | pos = c(10583, 10611, 13302, 13327) 35 | 36 | x <- data.frame(snps, chr, pos ) 37 | 38 | getGeneSymbol(x) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /man/plotMissing.Rd: -------------------------------------------------------------------------------- 1 | \name{plotMissing} 2 | \alias{plotMissing} 3 | 4 | \title{ Plot of missing genotypes} 5 | 6 | \description{ 7 | Plot a grid showing which genotypes are missing 8 | } 9 | \usage{ 10 | plotMissing(x, print.labels.SNPs = TRUE, 11 | main = "Genotype missing data", ...) 12 | } 13 | 14 | \arguments{ 15 | \item{x}{an object of class 'setupSNP'} 16 | \item{print.labels.SNPs}{should labels of SNPs be printed?} 17 | \item{main}{title to place on plot} 18 | \item{...}{extra arguments of 'image' function} 19 | } 20 | 21 | \details{ 22 | This function uses 'image' function to plot a grid with black pixels where the genotypes 23 | are missing. 24 | } 25 | 26 | \seealso{ \code{\link{setupSNP}}} 27 | 28 | \value{ 29 | No return value, just the plot 30 | } 31 | 32 | \examples{ 33 | data(SNPs) 34 | data(SNPs.info.pos) 35 | ans<-setupSNP(SNPs,colSNPs=6:40,sep="") 36 | plotMissing(ans) 37 | 38 | # The same plot with the SNPs sorted by genomic position and 39 | # showing the information about chromosomes 40 | 41 | ans<-setupSNP(SNPs,colSNPs=6:40,sort=TRUE,SNPs.info.pos,sep="") 42 | plotMissing(ans) 43 | } 44 | \keyword{utilities} -------------------------------------------------------------------------------- /R/Bonferroni.sig.R: -------------------------------------------------------------------------------- 1 | `Bonferroni.sig` <- 2 | function (x, model = "codominant", alpha = 0.05, include.all.SNPs = FALSE) 3 | { 4 | if (!inherits(x, "WGassociation")) 5 | stop("x must be a 'WGassociation' object") 6 | x <- attr(x, "pvalues") 7 | model.type <- names(x) 8 | m <- charmatch(model, model.type, nomatch = 0) 9 | if (m == 0) 10 | stop("this model is was not fitted") 11 | temp1 <- grep("no", as.character(x[, m ])) 12 | temp2 <- c(1:nrow(x))[is.na(x[, m ])] 13 | temp <- c(temp1, temp2) 14 | if (!(include.all.SNPs) & length(temp)>=1) { 15 | x <- x[-temp, c(1, (m ))] 16 | cut.p <- alpha/nrow(x) 17 | } 18 | else cut.p <- alpha/nrow(x) 19 | message("number of tests: ", nrow(x)) 20 | message("alpha: ", alpha) 21 | message("corrected alpha: ", cut.p) 22 | significant <- x[as.numeric(x[, 2]) <= cut.p, ] 23 | if (all(is.na(significant))) { 24 | message(" No significant SNPs after Bonferroni correction ") 25 | ans <- NULL 26 | } 27 | else { 28 | ans <- significant 29 | print(as.matrix(ans), na.print = "-", quote = FALSE) 30 | } 31 | invisible(ans) 32 | } 33 | 34 | -------------------------------------------------------------------------------- /R/getNiceTable.R: -------------------------------------------------------------------------------- 1 | 2 | getNiceTable <- function(x) 3 | { 4 | if (!inherits(x, "WGassociation")) 5 | stop("object 'x' must be of class 'WGassociation'") 6 | out <- WGstats(x) 7 | temp <- lapply(out, getTableSNP) 8 | tt <- NULL 9 | nlines <- NULL 10 | for (i in 1:length(temp)) 11 | { 12 | tt.i <- temp[[i]] 13 | nlines <- c(nlines, nrow(temp[[i]])) 14 | aux <- rbind(c(names(temp)[i], rep(NA,7)), tt.i) 15 | tt <- rbind(tt, aux) 16 | } 17 | colnames(tt)[c(1,7,8)] <- c("SNP", "CI95%", "p-value") 18 | colnames(tt) <- gsub("%", "\\\\%", colnames(tt)) 19 | tt2 <- gsub("NA","", tt) 20 | ans <- gsub("\\( - \\)","", tt2) 21 | attr(ans, "nlines") <- nlines 22 | ans 23 | } 24 | 25 | 26 | 27 | getTableSNP <- function(x) 28 | { 29 | ff <- function(x) 30 | { 31 | ans <- apply(x,1, function(x) paste("(", paste(x, collapse="-"), ")", sep="")) 32 | ans[1] <- "NA" 33 | ans 34 | } 35 | 36 | part1 <- apply(x[,1:5], 2, format) 37 | part2 <- ff(format(x[,6:7])) 38 | part3 <- formatC(x[,8]) 39 | ans <- cbind(rownames(x),part1, part2, part3) 40 | rownames(ans) <- NULL 41 | ans 42 | } 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /man/Tablemeanse.Rd: -------------------------------------------------------------------------------- 1 | \name{Table.mean.se} 2 | \alias{Table.mean.se} 3 | 4 | \title{Descriptive sample size, mean, and standard error} 5 | \description{ 6 | This function computes sample size, mean and standard error of a quantitative trait for 7 | each genotype (or combination of genotypes) 8 | } 9 | 10 | \usage{ 11 | Table.mean.se(var, dep, subset = !is.na(var)) 12 | } 13 | 14 | \arguments{ 15 | \item{var}{quantitative trait} 16 | \item{dep}{variable with genotypes or any combination of them} 17 | \item{subset}{an optional vector specifying a subset of observations to be used in the descriptive analysis} 18 | } 19 | 20 | \value{ 21 | \item{tp}{A matrix giving sample size (n), median (me) and standard error (se) for each genotype} 22 | } 23 | 24 | \seealso{ \code{\link{Table.N.Per}} } 25 | \examples{ 26 | data(SNPs) 27 | # sample size, mean age and standard error for each genotype 28 | Table.mean.se(SNPs$snp10001,SNPs$protein) 29 | 30 | # The same table for a subset (males) 31 | Table.mean.se(SNPs$snp10001,SNPs$protein,SNPs$sex=="Male") 32 | 33 | # The same table assuming a dominant model 34 | Table.mean.se(dominant(snp(SNPs$snp10001,sep="")),SNPs$protein,SNPs$sex=="Male") 35 | 36 | 37 | } 38 | \keyword{utilities} 39 | 40 | -------------------------------------------------------------------------------- /R/interleave.R: -------------------------------------------------------------------------------- 1 | # $Id: interleave.R 789 2005-12-08 20:18:15Z warnes $ 2 | 3 | interleave <- function(..., append.source=TRUE, sep=": ", drop=FALSE) 4 | { 5 | sources <- list(...) 6 | 7 | sources[sapply(sources, is.null)] <- NULL 8 | 9 | sources <- mclapply(sources, function(x) 10 | if(is.matrix(x) || is.data.frame(x)) 11 | x else t(x) , ... ) 12 | 13 | nrows <- sapply( sources, nrow ) 14 | mrows <- max(nrows) 15 | if(any(nrows!=mrows & nrows!=1 )) 16 | stop("Arguments have differening numbers of rows.") 17 | 18 | sources <- mclapply(sources, function(x) 19 | if(nrow(x)==1) x[rep(1,mrows),,drop=drop] else x , ...) 20 | 21 | tmp <- do.call("rbind",sources) 22 | 23 | nsources <- length(sources) 24 | indexes <- outer( ( 0:(nsources-1) ) * mrows , 1:mrows, "+" ) 25 | 26 | retval <- tmp[indexes,,drop=drop] 27 | 28 | if(append.source && !is.null(names(sources) )) 29 | if(!is.null(row.names(tmp)) ) 30 | row.names(retval) <- paste(format(row.names(retval)), 31 | format(names(sources)), 32 | sep=sep) 33 | else 34 | row.names(retval) <- rep(names(sources), mrows) 35 | 36 | retval 37 | } 38 | -------------------------------------------------------------------------------- /R/reorder.snp.R: -------------------------------------------------------------------------------- 1 | `reorder.snp` <- 2 | function(x, ref="common", ...) 3 | { 4 | s <- x 5 | if(!inherits(s,"snp")) { 6 | stop("object must be of class 'snp'") 7 | } 8 | 9 | type <- charmatch(ref, c("common","minor")) 10 | 11 | if (is.na(type)) { 12 | stop("ref must be either 'common' or 'minor'") 13 | } 14 | 15 | class(s) <- "factor" 16 | tt <- table(s) 17 | 18 | if (type==1) { 19 | if (length(tt) == 3 & min(tt) > 0) { 20 | if (tt[1] < tt[3]) { 21 | s <- relevel(relevel(s, 2), 3) 22 | } 23 | } else { 24 | if (length(unique(unlist(strsplit(names(tt)[1], "/")))) == 2 25 | & length(tt)>1) { 26 | s <- relevel(s, 2) 27 | } 28 | } 29 | } else { 30 | if (length(tt) == 3 & min(tt) > 0) { 31 | if (tt[3] < tt[1]) { 32 | s <- relevel(relevel(s, 2), 3) 33 | } 34 | } else { 35 | if (length(unique(unlist(strsplit(names(tt)[1], "/")))) == 2) { 36 | s <- relevel(s, 2) 37 | } 38 | } 39 | } 40 | 41 | class(s) <- c("snp","factor") 42 | s 43 | } 44 | -------------------------------------------------------------------------------- /man/TableNPer.Rd: -------------------------------------------------------------------------------- 1 | \name{Table.N.Per} 2 | \alias{Table.N.Per} 3 | 4 | \title{ Descriptive sample size and percentage} 5 | \description{ 6 | This function computes sample size and percentage for each category of a categorical trait (e.g. 7 | case-control status) for each genotype (or combination of genotypes). 8 | } 9 | 10 | \usage{ 11 | Table.N.Per(var, dep, subset = !is.na(var)) 12 | } 13 | 14 | \arguments{ 15 | \item{var}{categorical trait.} 16 | \item{dep}{variable with genotypes or any combination of them} 17 | \item{subset}{an optional vector specifying a subset of observations to be used in the descriptive analysis. } 18 | } 19 | 20 | \value{ 21 | \item{tp}{A matrix giving sample size (n),and the percentage (\%) for each level of the categorical trait 22 | for each genotype} 23 | } 24 | 25 | 26 | \seealso{ \code{\link{Table.mean.se}} } 27 | \examples{ 28 | data(SNPs) 29 | #sample size and percentage of cases and controls for each genotype 30 | Table.N.Per(SNPs$snp10001,SNPs$casco) 31 | 32 | # The same table for a subset (males) 33 | Table.N.Per(SNPs$snp10001,SNPs$casco,SNPs$sex=="Male") 34 | 35 | # The same table assuming a dominant model 36 | Table.N.Per(dominant(snp(SNPs$snp10001,sep="")),SNPs$casco,SNPs$sex=="Male") 37 | 38 | 39 | } 40 | \keyword{utilities} 41 | 42 | -------------------------------------------------------------------------------- /man/odds.Rd: -------------------------------------------------------------------------------- 1 | \name{odds} 2 | \alias{odds} 3 | 4 | \title{Extract odds ratios, 95\% CI and pvalues} 5 | \description{ 6 | Extract odds ratios, 95% CI and pvalues from an WGassociation object fitted with WGassociation function. scanWGassociation doesn't fit models so doesn't estimate OR. 7 | 8 | } 9 | \usage{ 10 | odds(x, model=c("log-additive", "dominant", "recessive", "overdominant", "codominant"), 11 | sorted=c("no","p-value","or")) 12 | } 13 | 14 | \arguments{ 15 | \item{x}{an object of class 'WGassociation' output of WGassociation} 16 | \item{model}{model to be extracted. Only first one is used. The first letter is enough, low or upper case.} 17 | \item{sorted}{Sort the output by P value or OR.} 18 | } 19 | 20 | \value{ 21 | A matrix with OR 95\% CI (lower, upper) and P value for the selected model. For codominant model, the OR and 95\%CI are given for heterozygous and homozigous. 22 | } 23 | 24 | \references{ 25 | JR Gonzalez, L Armengol, X Sole, E Guino, JM Mercader, X Estivill, V Moreno. 26 | SNPassoc: an R package to perform whole genome association studies. Bioinformatics, 27 | 2007;23(5):654-5. 28 | } 29 | 30 | 31 | \examples{ 32 | data(SNPs) 33 | datSNP<-setupSNP(SNPs,6:40,sep="") 34 | ans<-WGassociation(casco~1,data=datSNP,model="all") 35 | odds(ans) 36 | } 37 | 38 | 39 | \keyword{utilities} 40 | 41 | -------------------------------------------------------------------------------- /R/setupSNP.R: -------------------------------------------------------------------------------- 1 | `setupSNP` <- 2 | function(data, colSNPs, sort=FALSE, info, sep="/", ...) 3 | 4 | { 5 | if (missing(data)) 6 | stop("Required argument data is missing") 7 | 8 | if (is.matrix(data)) 9 | data<-as.data.frame(data) 10 | 11 | if (!is.data.frame(data)) 12 | stop("Argument data is not a data.frame or matrix") 13 | 14 | if (sort) 15 | { 16 | temp <- sortSNPs(data, colSNPs, info) 17 | pos <- temp$pos 18 | info <- temp$dataSorted 19 | temp <- data[, pos, drop=FALSE] 20 | #..debug..# dataSNPs <- mclapply(temp, snp, sep = sep, ...) 21 | dataSNPs <- lapply(temp, snp, sep = sep, ...) 22 | } 23 | else 24 | { 25 | #..debug..# dataSNPs <- mclapply(data[, colSNPs, drop=FALSE], snp, sep = sep, ...) 26 | dataSNPs <- lapply(data[, colSNPs, drop=FALSE], snp, sep = sep, ...) 27 | } 28 | 29 | dataSNPs <- data.frame(dataSNPs) 30 | datPhen <- data[ , -colSNPs, drop=FALSE] 31 | 32 | ans<-cbind(datPhen,dataSNPs) 33 | 34 | label.SNPs <- names(dataSNPs) 35 | class(ans) <- c("setupSNP","data.frame") 36 | attr(ans,"row.names") <- 1:length(ans[[1]]) 37 | attr(ans,"label.SNPs") <- label.SNPs 38 | attr(ans,"colSNPs") <- c( (length(ans) - length(label.SNPs)+1) : length(ans)) 39 | if (sort) 40 | attr(ans,"gen.info") <- info 41 | ans 42 | 43 | } 44 | 45 | -------------------------------------------------------------------------------- /R/print.permTest.R: -------------------------------------------------------------------------------- 1 | print.permTest<-function(x, level=0.05, digits=8, ...) 2 | { 3 | if (!inherits(x, "permTest")) 4 | stop("x must be an object of class 'permTest'") 5 | 6 | cat("\n") 7 | cat(paste("Permutation test analysis ","(",(1-level)*100,"% confidence level)",sep=""), "\n") 8 | cat("------------------------------------------------ \n") 9 | cat("Number of SNPs analyzed: ",sum(x$nSNPs),"\n") 10 | cat("Number of valid SNPs (e.g., non-Monomorphic and passing calling rate): ",x$nSNPs[1],"\n") 11 | cat("P value after Bonferroni correction: ",round(level/x$nSNPs[1] ,digits),"\n") 12 | 13 | control<-attr(x,"method") 14 | if (control==1) 15 | { 16 | pos<-ceiling(length(x$pmin)*(1-level)) 17 | pPerm<-sort(x$pmin,decreasing=TRUE)[pos] 18 | cat("\n") 19 | cat("P values based on permutation procedure: \n") 20 | cat("P value from empirical distribution of minimum p values: ",round(pPerm ,digits),"\n") 21 | cat("P value assuming a Beta distribution for minimum p values: ",round(x$psig ,digits),"\n") 22 | } 23 | if (control==2) 24 | { 25 | cat("\n") 26 | cat(paste("Rank truncated product of the K=", x$K, " most significant p-values:",sep="")) 27 | cat("\n") 28 | cat("Product of K p-values (-log scale): ",x$rtp ,"\n") 29 | cat("Significance: ",ifelse(x$sig==0,"<0.001",x$sig) ,"\n") 30 | } 31 | 32 | } -------------------------------------------------------------------------------- /man/GenomicControl.Rd: -------------------------------------------------------------------------------- 1 | \name{GenomicControl} 2 | \alias{GenomicControl} 3 | 4 | \title{Population substructure} 5 | 6 | \description{ 7 | This function estimates an inflation (or deflation) factor, lambda, as indicated in the paper 8 | by Devlin et al. (2001) and corrects the p-values using this factor. 9 | } 10 | 11 | \usage{ 12 | GenomicControl(x, snp.sel) 13 | } 14 | 15 | \arguments{ 16 | \item{x}{an object of class 'WGassociation'. } 17 | \item{snp.sel}{SNPs used to compute lambda. Not required.} 18 | } 19 | 20 | \details{ 21 | This method is only valid for 2x2 tables. This means that the object of class 'WGassociation' might not 22 | have fitted the codominant model. 23 | 24 | See reference for further details. 25 | } 26 | 27 | \value{ 28 | The same object of class 'WGassociation' where the p-values have been corrected for genomic control. 29 | } 30 | 31 | \references{B Devlin, K Roeder, and S.A. Bacanu. Unbiased Methods for Population Based Association Studies. 32 | Genetic Epidemiology (2001) 21:273-84 33 | } 34 | 35 | \seealso{\code{\link{qqpval}}, \code{\link{WGassociation}}} 36 | 37 | \examples{ 38 | 39 | data(SNPs) 40 | datSNP<-setupSNP(SNPs,6:40,sep="") 41 | res<-WGassociation(casco,datSNP,model=c("do","re","log-add")) 42 | 43 | # Genomic Control 44 | resCorrected<-GenomicControl(res) 45 | } 46 | 47 | 48 | 49 | \keyword{utilities} 50 | -------------------------------------------------------------------------------- /R/z[.WGassociation.R: -------------------------------------------------------------------------------- 1 | `[.WGassociation` <- 2 | function(x,i,j, ...){ 3 | if (missing(i)) i<-1:nrow(x) 4 | if (missing(j)) j<-1:ncol(x) 5 | 6 | if (is.numeric(i)) i<-(1:nrow(x))[i] 7 | if (is.numeric(j)) j<-(1:ncol(x))[j] 8 | 9 | if (is.character(i)) i<-match(i,rownames(x)) 10 | if (is.character(j)) j<-match(j,colnames(x)) 11 | 12 | if (is.logical(i)) i<-(1:nrow(x))[i] 13 | if (is.logical(j)) j<-(1:ncol(x))[j] 14 | 15 | 16 | if (!(1 %in% j)) j<-c(1,j) 17 | if (length(j)==1) if(j==1) j<-1:2 18 | if (any(!(i %in% 1:nrow(x)))) stop("Undefined rows selected") 19 | if (any(!(j %in% 1:ncol(x)))) stop("Undefined cols selected") 20 | 21 | out<-attr(x,"pvalues")[i,j] # data.frame 22 | 23 | attr(out, "tables") <- attr(x, "tables")[i] 24 | attr(out, "label.SNPs") <- attr(x, "label.SNPs")[i] 25 | attr(out, "colSNPs") <- attr(x, "colSNPs")[i] 26 | attr(out, "gen.info") <- attr(x, "gen.info")[i,] 27 | attr(out, "whole") <- attr(x, "whole") 28 | attr(out, "pvalPerm") <- attr(x, "pvalPerm")[i,] 29 | attr(out, "pvalues") <- out 30 | attr(out, "models") <- c(0,attr(x, "models"))[j] 31 | attr(out, "quantitative") <- attr(x, "quantitative") 32 | attr(out, "fast") <- attr(x, "fast") 33 | class(out) <- c("WGassociation", "data.frame") 34 | 35 | out } 36 | 37 | -------------------------------------------------------------------------------- /man/getSignificantSNPs.Rd: -------------------------------------------------------------------------------- 1 | \name{getSignificantSNPs} 2 | \alias{getSignificantSNPs} 3 | 4 | \title{Extract significant SNPs from an object of class 'WGassociation' } 5 | 6 | \description{ 7 | Extract significant SNPs from an object of class 'WGassociation' when genomic 8 | information is available} 9 | 10 | \usage{ 11 | getSignificantSNPs(x, chromosome, model, sig = 1e-15) 12 | } 13 | 14 | \arguments{ 15 | \item{x}{an object of class 'WGassociation'} 16 | \item{chromosome}{ chromosome from which SNPs are extracted} 17 | \item{model}{ genetic model from which SNPs are extracted} 18 | \item{sig}{ statistical significance level. The default is 1e-15} 19 | } 20 | 21 | \value{ 22 | A list with the following components: 23 | \item{names}{the name of SNPs} 24 | \item{column}{the columns corresponding to the SNPs in the original data frame} 25 | ... 26 | } 27 | 28 | \seealso{ \code{\link{WGassociation}} } 29 | 30 | \examples{ 31 | data(resHapMap) 32 | # resHapMap contains the results for a log-additive genetic model 33 | 34 | # to get the significant SNPs for chromosome 12 35 | getSignificantSNPs(resHapMap,chromosome=12) 36 | # to get the significant SNPs for chromosome 5 37 | getSignificantSNPs(resHapMap,5) 38 | # to get the significant SNPs for chromosome X at level 1e-8 39 | getSignificantSNPs(resHapMap,5,sig=1e-8) 40 | 41 | } 42 | 43 | 44 | \keyword{ utilities } 45 | -------------------------------------------------------------------------------- /R/c.WGassociation.r: -------------------------------------------------------------------------------- 1 | c.WGassociation<- 2 | function (...) 3 | { 4 | allargs <- list(...) 5 | allargs <- allargs[sapply(allargs, length) > 0] 6 | n <- length(allargs) 7 | if (n == 0) 8 | return(structure(list(), class = "data.frame", row.names = integer())) 9 | lapply(1:n, function(i) if (!inherits(allargs[[i]], "WGassociation")) 10 | stop("Please supply 'WGassociation' objects")) 11 | x1 <- allargs[[1]] 12 | for (i in 2:n) { 13 | xi <- allargs[[i]] 14 | if (any(attr(x1, "models") != attr(xi, "models"))) 15 | stop("All objects should have identical structure") 16 | if (attr(x1, "quantitative") != attr(xi, "quantitative")) 17 | stop("All objects should have identical structure") 18 | } 19 | for (i in 2:n) { 20 | xi <- allargs[[i]] 21 | out <- rbind(attr(x1, "pvalues"), attr(xi, "pvalues")) 22 | attr(out, "tables") <- c(attr(x1, "tables"), attr(xi, 23 | "tables")) 24 | attr(out, "label.SNPs") <- c(attr(x1, "label.SNPs"), 25 | attr(xi, "label.SNPs")) 26 | attr(out, "colSNPs") <- c(attr(x1, "colSNPs"), attr(xi, 27 | "colSNPs")) 28 | attr(out, "gen.info") <- rbind(attr(x1, "gen.info"), 29 | attr(xi, "gen.info")) 30 | attr(out, "pvalues") <- out 31 | x1<-out 32 | } 33 | class(out) <- c("WGassociation", "data.frame") 34 | out 35 | } -------------------------------------------------------------------------------- /R/intervals.dif.R: -------------------------------------------------------------------------------- 1 | `intervals.dif` <- 2 | function(o, level, x.b, var, pval=TRUE, ...) 3 | { 4 | 5 | x<-o 6 | z<-abs(qnorm((1-level)/2)) 7 | 8 | mat.coef <- merge(x$coef, summary(x)$coef, by=0, all.x=TRUE, sort=FALSE) 9 | nom.pos <- data.frame(names(x$coef), ordre=1:length(x$coef)) 10 | mat.ordre <- merge(nom.pos, mat.coef, by.x=1, by.y=1, all.x=TRUE, sort=FALSE) 11 | mat.ordre <- mat.ordre[order(mat.ordre$ordre),] 12 | 13 | a <- as.matrix(mat.ordre[,c("Estimate")]) 14 | se <- as.matrix(mat.ordre[,c("Std. Error")]) 15 | 16 | li <- a - (z * se) 17 | ls <- a + (z * se) 18 | 19 | if (missing(var)) 20 | { 21 | focus <- nrow(a); 22 | m <- cbind(a[focus,],li[focus,],ls[focus,]) 23 | 24 | if (pval) 25 | { 26 | p.as <- anova(x, x.b, test = "F")$"Pr(>F)"[2] 27 | m <- cbind(m, p.as) 28 | colnames(m) <- c("dif","lo","up","pval") 29 | } 30 | else 31 | { 32 | colnames(m) <- c("dif","lower","uppper") 33 | } 34 | } 35 | else 36 | { 37 | focus <- nrow(a) - length(levels(var)) + 2:length(levels(var)) 38 | m <- cbind(a[focus,],li[focus,],ls[focus,]) 39 | m <- rbind(c(0,NA,NA),m) 40 | 41 | if (pval) 42 | { 43 | p.as <- anova(x, x.b, test = "F")$"Pr(>F)"[2] 44 | m <- cbind(m, c(p.as,rep(NA,times=length(levels(var))-1))) 45 | colnames(m) <- c("dif","lower","upper","pval") 46 | } 47 | else 48 | { 49 | colnames(m) <- c("dif","lower","upper") 50 | } 51 | } 52 | 53 | list(m=m); 54 | } 55 | 56 | -------------------------------------------------------------------------------- /man/intervals.Rd: -------------------------------------------------------------------------------- 1 | \name{intervals} 2 | \alias{intervals} 3 | \alias{intervals.haplo.glm} 4 | \alias{print.intervals} 5 | \alias{summary.haplo.glm} 6 | 7 | \title{Print ORs and 95\% confidence intervals for an object of class 'haplo.glm'} 8 | 9 | \description{Print ORs and confidence intervals for an object of class 'haplo.glm'} 10 | 11 | \usage{ 12 | intervals(o, level=.95, ...) 13 | } 14 | 15 | \arguments{ 16 | \item{o}{object of class 'haplo.glm'} 17 | \item{level}{significance level. Default is 95 percent} 18 | \item{...}{other arguments} 19 | 20 | } 21 | \value{ intervals object with ORs and 95\% confidence intervals for an object of class 'haplo.glm'} 22 | 23 | \examples{ 24 | # Not Run 25 | library(SNPassoc) 26 | library(haplo.stats) 27 | 28 | data(asthma, package = "SNPassoc") 29 | 30 | asthma.s <- setupSNP(data=asthma, colSNPs=7:ncol(asthma), sep="") 31 | trait <- asthma.s$casecontrol 32 | snpsH <- c("rs714588", "rs1023555", "rs898070") 33 | genoH <- make.geno(asthma.s, snpsH) 34 | 35 | mod <- haplo.stats:: haplo.glm( trait ~ genoH, 36 | family="binomial", 37 | locus.label=snpsH, 38 | allele.lev=attributes(genoH)$unique.alleles, 39 | control = haplo.glm.control(haplo.freq.min=0.05)) 40 | intervals(mod) 41 | summary(mod) 42 | 43 | } 44 | \keyword{utilities} -------------------------------------------------------------------------------- /R/intervals.or.R: -------------------------------------------------------------------------------- 1 | `intervals.or` <- 2 | function(o, level, x.b, var, ...) 3 | { 4 | x<-o 5 | z<-abs(qnorm((1-level)/2)) 6 | mat.coef <- merge(x$coef, summary(x)$coef, by=0, all.x=TRUE, sort=FALSE) 7 | nom.pos <- data.frame(names(x$coef), ordre=1:length(x$coef)) 8 | mat.ordre <- merge(nom.pos, mat.coef, by.x=1, by.y=1, all.x=TRUE, sort=FALSE) 9 | mat.ordre <- mat.ordre[order(mat.ordre$ordre),] 10 | 11 | a <- as.matrix(mat.ordre[,c("Estimate")]) 12 | se <- as.matrix(mat.ordre[,c("Std. Error")]) 13 | 14 | or <- exp(a) 15 | li <- exp(a - z * se) 16 | ls <- exp(a + z * se) 17 | if(missing(var)) 18 | { 19 | focus <- dim(a)[1.] 20 | or.ic <- round(cbind(or[focus, ], li[focus, ], ls[focus, ]), 2.) 21 | or.ic[or.ic > 999.] <- NA 22 | t1 <- anova(x, x.b, test = "Chi") 23 | p.as <- t1[2, grep("^P.*Chi",names(t1))] 24 | or.ic <- cbind(or.ic, p.as) 25 | dimnames(or.ic) <- NULL 26 | } 27 | else 28 | { 29 | focus <- dim(a)[1.] - length(levels(var)) + 2:length(levels(var)) 30 | or.ic <- round(cbind(or[focus, ], li[focus, ], ls[focus, ]), 2) 31 | or.ic[or.ic > 999.] <- NA 32 | or.ic <- round(rbind(c(1, NA, NA), or.ic), 2) 33 | t1 <- anova(x, x.b, test = "Chi") 34 | p.as <- t1[2, grep("^P.*Chi",names(t1))] 35 | or.ic <- cbind(or.ic, c(p.as, rep(NA, times = length(levels(var)) - 1))) 36 | dimnames(or.ic) <- list(levels(var), c(" OR ", "lower", "upper", "p-value")) 37 | } 38 | 39 | list(or.ic = or.ic) 40 | 41 | } 42 | 43 | -------------------------------------------------------------------------------- /R/getSignificantSNPs.R: -------------------------------------------------------------------------------- 1 | `getSignificantSNPs` <- 2 | function (x, chromosome, model, sig = 1e-15) 3 | { 4 | if (!inherits(x, "WGassociation")) 5 | stop("x must be an object of class 'WGassociation'") 6 | 7 | if(is.null(attr(x, "gen.info"))) 8 | { 9 | pvalues <- attr(x, "pvalues") 10 | mm<-charmatch(model,dimnames(pvalues)[[2]]) 11 | if (is.na(mm)) 12 | stop("model selected is not correct") 13 | sel2<-pvalues[,mm]<=sig 14 | SNPs.sel <- pvalues[sel2, ] 15 | pos.sel <- attr(x, "colSNPs")[sel2] 16 | out <- list(names = dimnames(SNPs.sel)[[1]], column = pos.sel) 17 | } 18 | 19 | else 20 | { 21 | if (!chromosome %in% c(1:22) & chromosome != "X") 22 | stop("chromosome should be either a number between 1 and 22 or X") 23 | if (chromosome == "X") 24 | chromosome <- 23 25 | gen.info <- attr(x, "gen.info") 26 | pvalues <- attr(x, "pvalues") 27 | chrs <- gen.info[, 2] 28 | chr.l <- unique(chrs) 29 | chr <- chr.l[orderChromosome(chr.l)] 30 | sel <- chr[chromosome] 31 | SNPs <- gen.info[gen.info[, 2] %in% sel, 1] 32 | sel2 <- dimnames(pvalues)[[1]] %in% SNPs & !is.na(pvalues[, 33 | 2]) & pvalues[, 2] <= sig 34 | SNPs.sel <- pvalues[sel2, ] 35 | pos.sel <- attr(x, "colSNPs")[sel2] 36 | out <- list(names = dimnames(SNPs.sel)[[1]], column = pos.sel) 37 | } 38 | out 39 | } 40 | 41 | -------------------------------------------------------------------------------- /R/print.snp.r: -------------------------------------------------------------------------------- 1 | print.snp<- 2 | function (x, quote = FALSE, max.levels = NULL, width = getOption("width"), 3 | ...) 4 | { 5 | ord <- is.ordered(x) 6 | if (length(x) <= 0) 7 | cat(if (ord) 8 | "ordered" 9 | else "factor", "(0)\n", sep = "") 10 | else { 11 | xx <- x 12 | # class(xx) <- NULL 13 | # levels(xx) <- NULL 14 | attributes(xx)<-NULL 15 | xx[] <- as.character(x) 16 | print(xx, quote = quote, ...) 17 | } 18 | maxl <- if (is.null(max.levels)) 19 | TRUE 20 | else max.levels 21 | if (maxl) { 22 | n <- length(lev <- encodeString(levels(x), quote = ifelse(quote, 23 | "\"", ""))) 24 | colsep <- if (ord) 25 | " < " 26 | else " " 27 | T0 <- "Genotypes: " 28 | if (is.logical(maxl)) 29 | maxl <- { 30 | width <- width - (nchar(T0, type = "w") + 3 + 31 | 1 + 3) 32 | lenl <- cumsum(nchar(lev, type = "w") + nchar(colsep, 33 | type = "w")) 34 | if (n <= 1 || lenl[n] <= width) 35 | n 36 | else max(1, which(lenl > width)[1] - 1) 37 | } 38 | drop <- n > maxl 39 | cat(if (drop) 40 | paste(format(n), ""), T0, paste(if (drop) 41 | c(lev[1:max(1, maxl - 1)], "...", if (maxl > 1) lev[n]) 42 | else lev, collapse = colsep), "\n", sep = "") 43 | cat("Alleles: ",attr(x,"allele.names"),"\n") 44 | } 45 | invisible(x) 46 | } 47 | -------------------------------------------------------------------------------- /man/plotWGassociation.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.WGassociation} 2 | \alias{plot.WGassociation} 3 | 4 | \title{Function to plot -log p values from an object of class 'WGassociation'} 5 | \description{ 6 | Function to plot -log p values from an object of class 'WGassociation' 7 | } 8 | 9 | \usage{ 10 | \method{plot}{WGassociation}(x, ...) 11 | } 12 | 13 | \arguments{ 14 | \item{x}{ an object of class 'WGassociation'} 15 | \item{\dots}{other graphical parameters } 16 | } 17 | \details{ 18 | A panel with different plots (one for each mode of inheritance) are plotted. Each of them represents 19 | the -log(p value) for each SNP. Two horizontal lines are also plotted. One one them indicates the nominal 20 | statistical significance level whereas the other one indicates the statistical 21 | significance level after Bonferroni correction. 22 | } 23 | 24 | \references{ 25 | JR Gonzalez, L Armengol, X Sole, E Guino, JM Mercader, X Estivill, V Moreno. 26 | SNPassoc: an R package to perform whole genome association studies. Bioinformatics, 27 | 2007;23(5):654-5. 28 | } 29 | 30 | 31 | \value{ 32 | No return value, just the plot 33 | } 34 | 35 | \examples{ 36 | 37 | library(SNPassoc) 38 | 39 | data(asthma, package = "SNPassoc") 40 | asthma.s <- setupSNP(data=asthma, colSNPs=7:ncol(asthma), sep="") 41 | 42 | ans <- WGassociation(casecontrol, data=asthma.s) 43 | 44 | plot(ans) 45 | 46 | } 47 | 48 | \seealso{ \code{\link{association}} \code{\link{setupSNP}} \code{\link{WGassociation}} } 49 | 50 | 51 | 52 | \keyword{utilities} 53 | -------------------------------------------------------------------------------- /R/plotWGassociation.R: -------------------------------------------------------------------------------- 1 | plot.WGassociation <- function(x, ...){ 2 | if (!inherits(x, "WGassociation")) 3 | stop("x must be an object of class 'WGassociation'") 4 | 5 | xx <- data.frame(SNP=rownames(x), data.frame(x)[,2:ncol(x)]) 6 | names(xx)[2:ncol(x)] <- gsub("log.", "", names(x)[2:ncol(x)] ) 7 | # names(xx)[6] <- "additive" 8 | dat <- tidyr::gather(xx, key="model", value="p.value", -"SNP") 9 | # dat$model <- factor(dat$model, 10 | # levels=c("codominant", "dominant", 11 | # "recessive", "overdominant", 12 | # "additive")) 13 | dat$model <- factor(dat$model, 14 | levels=unique(names(xx)[2:ncol(xx)])) 15 | 16 | plt <- ggplot(dat, aes(x=.data$SNP, y=-log10(.data$p.value))) + 17 | geom_point() + 18 | xlab("SNPs") + ylab(expression(-log[10]("p-value"))) + 19 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 20 | geom_hline(aes(yintercept = -log10(0.05), 21 | linetype = "Nominal"), 22 | colour="blue") + 23 | geom_hline(aes(yintercept = -log10(0.05/nrow(xx)), 24 | linetype = "Bonferroni"), 25 | colour="red") + 26 | 27 | scale_linetype_manual(name = "Significance", values = c(2, 2), 28 | guide = guide_legend(override.aes = list(color = c("red", "blue")))) 29 | 30 | plt + facet_wrap( ~ model, ncol=1) + 31 | theme(strip.text.x = element_text(face="bold")) + 32 | theme(legend.position="top", legend.direction="horizontal") 33 | } 34 | -------------------------------------------------------------------------------- /R/tableHWE.R: -------------------------------------------------------------------------------- 1 | `tableHWE` <- 2 | function (x, strata, ...) 3 | { 4 | if (!inherits(x, "setupSNP")) { 5 | stop("x must be an object of class 'setupSNP'") 6 | } 7 | 8 | colSNPs <- attr(x, "colSNPs") 9 | # VM seleccion incorrecta de datos!! data.SNPs <- x[colSNPs,,drop=FALSE] 10 | data.SNPs <- x[ , colSNPs, drop=FALSE] 11 | tt <- mclapply( data.SNPs, table, ...) 12 | ans <- cbind( "HWE (p value)" = unlist(mclapply(tt, SNPHWE, ...)) ) 13 | 14 | if (!missing(strata)) { 15 | 16 | # VM buscar en x si existe 17 | strata.name <- deparse(substitute(strata)) 18 | if(!exists(strata.name) & strata.name %in% names(x)) { 19 | strata<-x[,strata.name] 20 | } 21 | 22 | if (length(table(strata)) > 5) { 23 | stop("strata looks numeric") 24 | } 25 | strates <- names(table(strata) > 0) 26 | n.strata <- length(strates) 27 | i <- 1 28 | while (i <= n.strata) { 29 | data.SNPs.temp <- subset(data.SNPs, strata == strates[i]) 30 | # tt <- apply(data.SNPs.temp, 2, table) 31 | ## VM fallaba si todos los SNPs devuelven 3 genotipos porque el resultado es una matriz, no lista 32 | tt <- mclapply(data.SNPs.temp, table, ...) 33 | temp <- cbind("HWE (p value)" = unlist(mclapply(tt, SNPHWE, ...))) 34 | ans <- cbind(ans, temp) 35 | i <- i + 1 36 | } 37 | dimnames(ans)[[2]] <- c("all groups", strates) 38 | } 39 | class(ans) <- c("tableHWE", "matrix") 40 | ans 41 | } 42 | 43 | 44 | -------------------------------------------------------------------------------- /man/tableHWE.Rd: -------------------------------------------------------------------------------- 1 | \name{tableHWE} 2 | \alias{tableHWE} 3 | \alias{print.tableHWE} 4 | 5 | \title{Test for Hardy-Weinberg Equilibrium} 6 | \description{ 7 | Test the null hypothesis that Hardy-Weinberg equilibrium holds in cases, 8 | controls and both populations. 9 | 10 | \code{print} print the information. Number of digits, and significance 11 | level can be changed 12 | } 13 | \usage{ 14 | tableHWE(x, strata, ...) 15 | 16 | } 17 | 18 | \arguments{ 19 | \item{x}{an object of class 'setupSNP'} 20 | \item{strata}{a factor variable for a stratified analysis} 21 | \item{...}{optional arguments} 22 | 23 | } 24 | \details{ 25 | This function calculates the HWE test for those variables of class 'snp' in the object x of 26 | class 'setupSNP' 27 | } 28 | \value{ 29 | A matrix with p values for Hardy-Weinberg Equilibrium 30 | } 31 | 32 | \author{This function is based on an R function which computes an exact SNP test of Hardy-Weinberg Equilibrium 33 | written by Wigginton JE, Cutler DJ and Abecasis GR available at \url{http://csg.sph.umich.edu/abecasis/Exact/r_instruct.html}} 34 | 35 | \seealso{ \code{\link{setupSNP}}} 36 | 37 | \references{Wigginton JE, Cutler DJ and Abecasis GR (2005). A note on exact tests of Hardy-Weinberg equilibrium. Am J Hum Genet 76:887-93 38 | 39 | } 40 | 41 | \examples{ 42 | data(SNPs) 43 | ans<-setupSNP(SNPs,6:40,sep="") 44 | res<-tableHWE(ans) 45 | print(res) 46 | #change the significance level showed in the flag column 47 | print(res,sig=0.001) 48 | 49 | 50 | #stratified analysis 51 | res<-tableHWE(ans,ans$sex) 52 | print(res) 53 | 54 | 55 | } 56 | \keyword{utilities} 57 | 58 | -------------------------------------------------------------------------------- /man/inheritance.Rd: -------------------------------------------------------------------------------- 1 | \name{inheritance} 2 | \alias{inheritance} 3 | \alias{geneticModel} 4 | \alias{codominant} 5 | \alias{dominant} 6 | \alias{recessive} 7 | \alias{overdominant} 8 | \alias{additive} 9 | 10 | \title{Collapsing (or recoding) genotypes into different categories (generally two) depending on a given genetic mode of inheritance} 11 | \description{codominant function recodifies a variable having genotypes depending on the allelic frequency 12 | in descending order. \cr 13 | 14 | dominant, recessive, and overdominant functions collapse the three categories of a given SNP 15 | into two categories as follows: Let 'AA', 'Aa', and 'aa' be the three genotypes. After determining 16 | the most frequent allele (let's suppose that 'A' is the major allele) the functions return a vector 17 | with to categories as follows. dominant: 'AA' and 'Aa-aa'; recessive: 'AA-Aa' and 'aa'; 18 | overdominant: 'AA-aa' vs 'Aa'. \cr 19 | 20 | additive function creates a numerical variable, 1, 2, 3 corresponding to the three genotypes sorted 21 | out by descending allelic frequency (this model is referred as log-additive).} 22 | \usage{ 23 | codominant(o) 24 | dominant(o) 25 | recessive(o) 26 | overdominant(o) 27 | additive(o) 28 | } 29 | 30 | \arguments{ 31 | \item{o}{categorical covariate having genotypes} 32 | } 33 | \value{A snp object collapsing genotypes into different categories depending on a given genetic mode of inheritance} 34 | \examples{ 35 | data(SNPs) 36 | dominant(snp(SNPs$snp10001,sep="")) 37 | overdominant(snp(SNPs$snp10001,sep="")) 38 | } 39 | \keyword{utilities} -------------------------------------------------------------------------------- /man/sortSNPs.Rd: -------------------------------------------------------------------------------- 1 | \name{sortSNPs} 2 | \alias{sortSNPs} 3 | 4 | \title{Sort a vector of SNPs by genomic position} 5 | \description{ 6 | This function sorts a vector with the position of SNPs in a data frame 7 | using another data frame which contains information about SNPs, their 8 | chromosome, and their genomic position 9 | } 10 | \usage{ 11 | sortSNPs(data, colSNPs, info) 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{data}{a required data frame with the SNPs} 16 | \item{colSNPs}{a required vector indicating which columns of 'data' contains genotype data} 17 | \item{info}{a required data frame with genomic information for the SNPs (chromosome and position). 18 | The first column must have the SNPs, the second one the chromosome and the third one the genomic 19 | position. } 20 | } 21 | \details{ 22 | First of all, the function obtains a vector with the SNPs sorted using the data frame with the genomic 23 | positions (see 'dataSNPs.pos' argument). 24 | Then, the columns which indicate where the information about the 25 | genotypes is in our data frame, are sorted using this vector. 26 | 27 | This information is useful when \code{\link{WGassociation}} function is called since it 28 | allow the user to summaryze the results with the SNPs sorted by genomic position 29 | } 30 | 31 | \value{ 32 | a vector indicating the colums where the SNPs are recorded in our data frame ('data' argument), sorted 33 | using the genomic positions listed in 'dataSNPs.pos' argument) 34 | } 35 | 36 | \examples{ 37 | # 38 | # data(SNPs) 39 | # data(SNPs.info.pos) 40 | # colSNPs.order<-sortSNPs(SNPs,c(6:40),SNPs.info.pos) 41 | # 42 | } 43 | \keyword{utilities} 44 | 45 | -------------------------------------------------------------------------------- /R/permTest.R: -------------------------------------------------------------------------------- 1 | permTest<-function(x,method="minimum",K) 2 | { 3 | if (!inherits(x, "WGassociation")) 4 | stop("x must be an object of class 'WGassociation'") 5 | 6 | if (is.null(attr(x,"pvalPerm"))) 7 | stop ("\n try again 'scanWGassociation' indicating the number of permutations") 8 | perms<-attr(x,"pvalPerm") 9 | 10 | type.method<-charmatch(method,c("minimum","rtp")) 11 | if (is.na(type.method)) 12 | stop ("\n method should be 'minimum' or 'rtp'") 13 | 14 | # w <- options(warn=-1) 15 | 16 | if (type.method==1) 17 | { 18 | pmin<-apply(perms,2,min,na.rm=TRUE) 19 | 20 | llhd2 <- function(x,p) { 21 | ans <- -sum(log(dbeta(x,p[1],p[2]))) 22 | ans 23 | } 24 | 25 | pIni<-c(1,100) 26 | param<-nlm(llhd2,x=pmin,p=pIni)$estimate 27 | psig<-qbeta(0.05,param[1],param[2]) 28 | valid.SNPs<-length(perms[!is.na(perms[,1]),1]) 29 | discarded.SNPs<-length(perms[is.na(perms[,1]),1]) 30 | ans<-list(pmin=pmin,param=param,psig=psig,nSNPs=c(valid.SNPs,discarded.SNPs)) 31 | } 32 | 33 | 34 | if (type.method==2) 35 | { 36 | rtpK<-function(x,K) 37 | { 38 | x<-sort(x) 39 | ans<-sum(-log(x[1:K])) 40 | ans 41 | } 42 | 43 | perms<-attr(x,"pvalPerm") 44 | stat<-rtpK(pvalues(x)[,2],K) 45 | pval<-apply(perms,2,rtpK,K) 46 | sig<-mean(pval>=stat) 47 | valid.SNPs<-length(perms[!is.na(perms[,1]),1]) 48 | discarded.SNPs<-length(perms[is.na(perms[,1]),1]) 49 | ans<-list(rtp=stat,sig=sig,nSNPs=c(valid.SNPs,discarded.SNPs),K=K) 50 | } 51 | 52 | # options(w) 53 | 54 | attr(ans,"method")<-type.method 55 | class(ans)<-"permTest" 56 | ans 57 | } 58 | 59 | -------------------------------------------------------------------------------- /man/SNPassoc-internal.Rd: -------------------------------------------------------------------------------- 1 | % --- SNPstat-internal.Rd --- 2 | 3 | \name{SNPassoc-internal} 4 | \alias{SNPassoc-internal} 5 | \alias{association.fit} 6 | \alias{extractPval} 7 | \alias{extractPval.i} 8 | \alias{SNPHWE} 9 | \alias{GenotypeRate} 10 | \alias{intervals.dif} 11 | \alias{intervals.or} 12 | \alias{is.quantitative} 13 | \alias{haplo.inter.fit} 14 | \alias{table.corner} 15 | \alias{table.interaction} 16 | \alias{crea.lab} 17 | \alias{togeno} 18 | \alias{orderChromosome} 19 | \alias{expandsetupSNP} 20 | \alias{pvalTest} 21 | \alias{modelTest} 22 | \alias{assoc} 23 | \alias{codominant.default} 24 | \alias{dominant.default} 25 | \alias{recessive.default} 26 | \alias{overdominant.default} 27 | \alias{additive.default} 28 | \alias{trim} 29 | \alias{interleave} 30 | 31 | 32 | \title{Internal SNPstat functions} 33 | \description{Internal SNPassoc functions} 34 | \value{No return value, internal calls } 35 | \usage{ 36 | association.fit(var, dep, adj, quantitative, type, level, 37 | nIndiv, genotypingRate = 0, ...) 38 | extractPval(x) 39 | extractPval.i(i,x,pos,models) 40 | SNPHWE(x) 41 | GenotypeRate(x) 42 | haplo.inter.fit(geno, var2, dep, adj = NULL, fam, 43 | haplo.freq.min, ...) 44 | crea.lab(x,pos.ini,cex,dist) 45 | orderChromosome(x) 46 | togeno(f,sep=sep,lab=lab) 47 | expandsetupSNP(o) 48 | pvalTest(dataX,Y,quantitative,type,genotypingRate) 49 | modelTest(X,Y,quantitative,type,genotypingRate) 50 | assoc(y,x,test="lrt",quantitative) 51 | trim(s) 52 | interleave(..., append.source=TRUE, sep=": ", drop=FALSE) 53 | \method{codominant}{default}(o) 54 | \method{dominant}{default}(o) 55 | \method{recessive}{default}(o) 56 | \method{overdominant}{default}(o) 57 | \method{additive}{default}(o) 58 | } 59 | 60 | \details{These are not to be called by the user} 61 | \keyword{internal} 62 | -------------------------------------------------------------------------------- /R/summary.WGassociation.R: -------------------------------------------------------------------------------- 1 | `summary.WGassociation` <- 2 | function(object,pSig=0.000001,...) 3 | { 4 | 5 | if (!inherits(object, "WGassociation")) 6 | stop("object must be of class 'WGassociation'") 7 | 8 | genes<-attr(object,"gen.info") 9 | 10 | if (is.null(genes)){ 11 | aux<-attr(object,"pvalues") 12 | info0<-nrow(aux) 13 | info1<-round((table(factor(aux[,1],levels=c("Genot error","Monomorphic")))/info0)*100,1) 14 | nSig<-sum(aux[,2]<=pSig,na.rm=TRUE) 15 | info2<-c(nSig,round((nSig/info0)*100,1)) 16 | info<-c(info0,info1,info2) 17 | ans<-rbind(info) 18 | rownames(ans)<-"" 19 | colnames(ans)<-c("SNPs (n)","Genot error (%)","Monomorphic (%)","Significant* (n)","(%)") 20 | } 21 | else { 22 | SNPs<-attr(object,"label.SNPs") 23 | pval<-attr(object,"pvalues") 24 | nSNPs<-table(genes[,2]) 25 | chr.l <- names(nSNPs) 26 | 27 | if(length(chr.l)==22) 28 | o<-orderChromosome(chr.l) 29 | else 30 | o<-c(1:length(chr.l)) 31 | chr <- chr.l[o] 32 | nSNPs.o<-nSNPs[o] 33 | 34 | info<-matrix(NA,nrow=length(chr),ncol=5) 35 | 36 | for (i in 1:length(chr)) 37 | { 38 | info0<-nSNPs.o[i] 39 | temp<-genes[genes[,2]==chr[i],] 40 | aux<-pval[dimnames(pval)[[1]]%in%temp[,1],] 41 | info1<-round((table(factor(aux[,1],levels=c("Genot error","Monomorphic")))/nrow(aux))*100,1) 42 | nSig<-sum(aux[,2]<=pSig,na.rm=TRUE) 43 | info2<-c(nSig,round((nSig/nrow(aux))*100,1)) 44 | info[i,]<-c(info0,info1,info2) 45 | } 46 | 47 | ans<-data.frame(info) 48 | names(ans)<-c("SNPs (n)","Genot error (%)","Monomorphic (%)", 49 | "Significant* (n)","(%)") 50 | dimnames(ans)[[1]]<-chr 51 | 52 | } 53 | print(ans) 54 | cat("\n *Number of statistically significant associations at level", pSig) 55 | cat("\n") 56 | invisible(ans) 57 | } 58 | 59 | -------------------------------------------------------------------------------- /R/print.intervals.R: -------------------------------------------------------------------------------- 1 | `print.intervals` <- 2 | function (x, len = 6, d = 2, exclude.intercept = TRUE, pval = TRUE, 3 | ...) 4 | { 5 | n <- x 6 | dd <- dim(n) 7 | mx <- 10^(len - (d + 1)) 8 | n[n > mx] <- Inf 9 | a <- formatC(n, d, len, format = "f") 10 | dim(a) <- dd 11 | if (length(dd) == 1) { 12 | dd <- c(1, dd) 13 | dim(a) <- dd 14 | lab <- " " 15 | } 16 | else lab <- dimnames(n)[[1]] 17 | if (!pval) { 18 | mx <- max(nchar(lab)) + 1 19 | cat(paste(rep(" ", mx), collapse = ""), paste(" ", dimnames(n)[[2]]), 20 | "\n") 21 | for (i in (1 + exclude.intercept):dd[1]) { 22 | lab[i] <- paste(c(rep(" ", mx - nchar(lab[i])), lab[i]), 23 | collapse = "") 24 | if (i == (1 + exclude.intercept)) 25 | cat(lab[i], formatC(n[i, 1], 4, 6, format = "f"), 26 | a[i, 2], "Reference haplotype", "\n") 27 | else cat(lab[i], ifelse(is.na(n[i, 1])," ",formatC(n[i, 1], 4, 6, format = "f")), 28 | a[i, 2], "(", a[i, 3], "-", a[i, 4], ") \n") 29 | } 30 | } 31 | else { 32 | mx <- max(nchar(lab)) + 1 33 | cat(paste(rep(" ", mx), collapse = ""), paste(" ", dimnames(n)[[2]]), 34 | "\n") 35 | for (i in (1 + exclude.intercept):dd[1]) { 36 | lab[i] <- paste(c(rep(" ", mx - nchar(lab[i])), lab[i]), 37 | collapse = "") 38 | if (i == (1 + exclude.intercept)) 39 | cat(lab[i], formatC(n[i, 1], 4, 6, format = "f"), 40 | a[i, 2], "Reference haplotype", "\n") 41 | else cat(lab[i], ifelse(is.na(n[i, 1])," ",formatC(n[i, 1], 4, 6, format = "f")), 42 | a[i, 2], "(", a[i, 3], "-", a[i, 4], ") ", formatC(n[i, 43 | 5], 4, 6, format = "f"), "\n") 44 | } 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: SNPassoc 2 | Version: 2.1-2 3 | Date: 2024-10-28 4 | Depends: R (>= 4.0.0) 5 | Imports: 6 | haplo.stats, 7 | mvtnorm, 8 | parallel, 9 | survival, 10 | tidyr, 11 | plyr, 12 | ggplot2, 13 | poisbinom 14 | Suggests: testthat, knitr, rmarkdown, biomaRt, 15 | VariantAnnotation, 16 | GenomicRanges, IRanges, S4Vectors, org.Hs.eg.db, 17 | TxDb.Hsapiens.UCSC.hg19.knownGene 18 | Title: SNPs-Based Whole Genome Association Studies 19 | Authors@R: c( person(given = "Victor", family = "Moreno", role = "aut"), 20 | person(given = "Juan R", family = "Gonzalez", 21 | email = "juanr.gonzalez@isglobal.org", role = c("aut"), 22 | comment = c(ORCID = "0000-0003-3267-2146")), 23 | person(given = "Dolors", family = "Pelegri", 24 | email = "dolors.pelegri@isglobal.org", role = c("aut", "cre"), 25 | comment = c(ORCID = "0000-0002-5993-3003"))) 26 | Description: Functions to perform most of the common analysis in genome 27 | association studies are implemented. These analyses include descriptive 28 | statistics and exploratory analysis of missing values, calculation of 29 | Hardy-Weinberg equilibrium, analysis of association based on generalized 30 | linear models (either for quantitative or binary traits), and analysis 31 | of multiple SNPs (haplotype and epistasis analysis). Permutation test 32 | and related tests (sum statistic and truncated product) are also 33 | implemented. Max-statistic and genetic risk-allele score exact 34 | distributions are also possible to be estimated. The methods are 35 | described in Gonzalez JR et al., 2007 . 36 | URL: https://github.com/isglobal-brge/SNPassoc 37 | License: GPL (>= 2) 38 | Encoding: UTF-8 39 | VignetteBuilder: knitr 40 | RoxygenNote: 7.2.2 41 | NeedsCompilation: no 42 | -------------------------------------------------------------------------------- /R/summary.snp.R: -------------------------------------------------------------------------------- 1 | `summary.snp` <- 2 | function (object, ...) 3 | { 4 | n <- length(object) 5 | nas <- is.na(object) 6 | n.typed <- n - sum(nas) 7 | ll <- levels(object) 8 | tbl <- table(object) 9 | tt <- c(tbl) 10 | names(tt) <- dimnames(tbl)[[1]] 11 | if (any(nas)) 12 | { 13 | tt.g <- c(tt, "NA's" = sum(nas)) 14 | missing.allele<-sum(nas)/(sum(tt)+sum(nas)) 15 | } 16 | else 17 | { 18 | tt.g <- tt 19 | missing.allele<-0 20 | } 21 | tt.g.prop <- prop.table(tbl) 22 | if (any(nas)) 23 | tt.g.prop <- c(tt.g.prop, NA) 24 | ans.g <- cbind(frequency = tt.g, percentage = tt.g.prop * 100) 25 | 26 | alle <- attr(object, "allele.names") 27 | alle1 <- length(grep(paste(alle[1], "/", sep = ""), as.character(object))) + 28 | length(grep(paste("/", alle[1], sep = ""), as.character(object))) 29 | if (length(alle) > 1) { 30 | 31 | alle2 <- length(grep(paste(alle[2], "/", sep = ""), as.character(object))) + 32 | length(grep(paste("/", alle[2], sep = ""), as.character(object))) 33 | tt.a <- c(alle1, alle2) 34 | tt.a.prop <- prop.table(tt.a) 35 | ans.a <- cbind(frequency = tt.a, percentage = tt.a.prop * 100) 36 | pvalueHWE <- SNPHWE(c(tbl,0,0)[1:3]) # VM make sure 3 genotypes are sent i 37 | dimnames(ans.a)[[1]] <- alle 38 | } 39 | else { 40 | tt.a <- alle1 41 | tt.a.prop <- prop.table(tt.a) 42 | ans.a <- t(c(frequency = tt.a, percentage = tt.a.prop * 100)) 43 | rownames(ans.a)<-alle 44 | pvalueHWE <- NA 45 | } 46 | if (any(nas)) 47 | ans.a <- rbind(ans.a, "NA's" = c(2 * sum(nas), NA)) 48 | 49 | ans <- list(allele.names = alle, allele.freq = ans.a, genotype.freq = ans.g, 50 | n = n, n.typed = n.typed, HWE = pvalueHWE, missing.allele=missing.allele) 51 | class(ans) <- "summary.snp" 52 | ans 53 | } 54 | 55 | -------------------------------------------------------------------------------- /R/plotMissing.R: -------------------------------------------------------------------------------- 1 | `plotMissing` <- 2 | function (x, print.labels.SNPs = TRUE, 3 | main = "Genotype missing data", ...) 4 | { 5 | if(!inherits(x,"setupSNP")) 6 | stop("x must be an object of class 'setupSNP'") 7 | 8 | colSNPs<-attr(x,"colSNPs") 9 | data.SNPs <- t(x[colSNPs, drop=FALSE]) 10 | label.SNPs<- attr(x,"label.SNPs") 11 | genInfo<-attr(x,"gen.info") 12 | 13 | data.Missing <- is.na(data.SNPs) 14 | 15 | # Reset par options on exit function 16 | old.xpd <- par("xpd") 17 | on.exit(par(old.xpd)) 18 | 19 | old.las <- par("las") 20 | on.exit(par(las = old.las)) 21 | 22 | par(xpd = TRUE) 23 | 24 | image(1:nrow(data.Missing), 1:ncol(data.Missing), data.Missing, 25 | col = c("white", "black"), ylab = "Individuals", xlab = ifelse(print.labels.SNPs, 26 | "", "SNPs"), axes = !print.labels.SNPs) 27 | if (print.labels.SNPs) { 28 | axis(1, at = c(1:length(label.SNPs)), labels = label.SNPs, 29 | las = 3, cex.axis = 0.7) 30 | axis(2) 31 | } 32 | title(main, line = 3) 33 | if (!is.null(genInfo)) 34 | n.snps <- table(genInfo[, 2]) 35 | else n.snps <- length(label.SNPs) 36 | a <- c(0.5, cumsum(n.snps) + 0.5) 37 | b <- par("usr") 38 | if (!is.null(genInfo)) 39 | col.ok <- c("black", rep("red", length(a) - 1)) 40 | else col.ok <- c("black", rep("black", length(a) - 1)) 41 | segments(a, b[3], a, b[4] + diff(b[3:4]) * 0.02, col = col.ok) 42 | abline(h = 0.5 + c(0, ncol(data.Missing)), xpd = FALSE) 43 | a <- par("usr") 44 | wh <- cumsum(c(0.5, n.snps)) 45 | if (!is.null(genInfo)) { 46 | segments(a, b[3], a, b[4] + diff(b[3:4]) * 0.02, col = c("black", 47 | rep("red", length(a) - 1))) 48 | names.geno <- unique(genInfo[, 2]) 49 | n.gen <- length(names.geno) 50 | for (i in 1:n.gen) text(mean(wh[i + c(0, 1)]), a[4] + 51 | (a[4] - a[3]) * 0.025, names.geno[i], srt = 45, cex = 0.8, 52 | adj = 0.2) 53 | } 54 | } 55 | 56 | -------------------------------------------------------------------------------- /man/setupSNP.Rd: -------------------------------------------------------------------------------- 1 | \name{setupSNP} 2 | \alias{setupSNP} 3 | \alias{summary.setupSNP} 4 | \alias{plot.setupSNP} 5 | \alias{[.setupSNP} 6 | \alias{[[<-.setupSNP} 7 | \alias{[<-.setupSNP} 8 | \alias{$<-.setupSNP} 9 | \alias{labels.setupSNP} 10 | 11 | \title{Convert columns in a dataframe to class 'snp'} 12 | 13 | \description{ 14 | \code{setupSNP} Convert columns in a dataframe to class 'snp' 15 | 16 | \code{summary.setupSNP} gives a summary for an object of class 'setupSNP' including 17 | allele names, major allele frequencie, an exact thest of Hardy-Weinberg 18 | equilibrium and percentage of missing genotypes 19 | 20 | } 21 | 22 | \usage{ 23 | setupSNP(data, colSNPs, sort = FALSE, info, sep = "/", ...) 24 | } 25 | 26 | \arguments{ 27 | \item{data}{dataframe containing columns with the SNPs to be converted} 28 | \item{colSNPs}{Vector specifying which columns contain SNPs data} 29 | \item{sort}{should SNPs be sorted. Default is FALSE} 30 | \item{info}{if sort is TRUE a dataframe containing information 31 | about the SNPs regarding their genomic position and the gene where 32 | they are located} 33 | \item{sep}{character separator used to divide alleles in the genotypes} 34 | \item{...}{optional arguments} 35 | } 36 | \value{ 37 | a dataframe of class 'setupSNP' containing converted SNP variables. 38 | All other variables will be unchanged. 39 | } 40 | 41 | \references{ 42 | JR Gonzalez, L Armengol, X Sole, E Guino, JM Mercader, X Estivill, V Moreno. 43 | SNPassoc: an R package to perform whole genome association studies. Bioinformatics, 44 | 2007;23(5):654-5. 45 | } 46 | 47 | 48 | \seealso{ \code{\link{snp}}} 49 | 50 | \examples{ 51 | 52 | data(SNPs) 53 | myDat<-setupSNP(SNPs,6:40,sep="") 54 | 55 | 56 | #sorted SNPs and having genomic information 57 | data(SNPs.info.pos) 58 | myDat.o<-setupSNP(SNPs,6:40,sep="",sort=TRUE, info=SNPs.info.pos) 59 | 60 | # summary 61 | summary(myDat.o) 62 | 63 | # plot one SNP 64 | plot(myDat,which=2) 65 | 66 | } 67 | \keyword{utilities} 68 | -------------------------------------------------------------------------------- /R/odds.r: -------------------------------------------------------------------------------- 1 | 2 | odds<-function(x, model=c("log-additive", "dominant", "recessive", "overdominant", "codominant"), sorted=c("no","p-value","or")){ 3 | tables<-attr(x,"tables") 4 | 5 | if(is.null(tables)) stop("An object fitted with WGassociation is needed. scanWGassociation doesn't estimate ORs.") 6 | 7 | models<-c("log-additive", "additive" , "dominant", "recessive", "overdominant", "codominant") 8 | mo<-pmatch(tolower(model[1]),models) 9 | if (is.na(mo)) stop("Incorrect model chosen") 10 | 11 | add<-lapply(tables, function(o){ 12 | if(is.null(dim(o))){ 13 | c(NA,NA,NA,NA) 14 | } else { 15 | tag<- c("0,1,2", "0,1,2", "Dominant", "Recessive", "Overdominant","Codominant") 16 | lag<- c(0,0,2,2,2,2) 17 | r<-match(tag[mo],rownames(o))+lag[mo] 18 | row<-o[r,5:8] 19 | if(mo>2) row[4]<-o[r-1,8] 20 | if (mo==6) { 21 | lab<-names(row) 22 | row<-c(row[1:3],o[r+1,5:7],row[4]) 23 | dim(row)<-c(7,1) 24 | rownames(row)<-c(paste(lab[1:3],rep("1",3),sep="."),paste(lab[1:3],rep("2",3),sep="."),lab[4]) 25 | } 26 | row 27 | } 28 | } 29 | ) 30 | add<-as.data.frame(add) 31 | add<-t(add) 32 | 33 | pvals<-attr(x,"pvalues") 34 | 35 | codo<- match("codominant", colnames(pvals)) 36 | 37 | if(!is.na(codo)){ 38 | pv<-match("p-value", colnames(add)) 39 | add[is.na(add[,pv]),pv]<-pvals[is.na(add[,pv]),codo] 40 | } 41 | 42 | so<-pmatch(tolower(sorted[1]),c("p-value","or")) 43 | if(!is.na(so)){ 44 | if(so==1) col<-match("p-value", colnames(add)) 45 | if(so==2 & mo< 6) col<-1 46 | if(so==2 & mo==6) col<-4 47 | add<-add[order(add[,col]),] 48 | } 49 | colnames(add)[length(colnames(add))]<-paste("p-value",models[mo],sep=".") 50 | as.data.frame(add) 51 | } 52 | -------------------------------------------------------------------------------- /man/BonferroniSig.Rd: -------------------------------------------------------------------------------- 1 | \name{Bonferroni.sig} 2 | \alias{Bonferroni.sig} 3 | 4 | \title{ Bonferroni correction of p values} 5 | \description{ 6 | This function shows the SNPs that are statistically significant after correcting 7 | for the number of tests performed (Bonferroni correction) for an object of class 8 | "WGassociation" 9 | } 10 | \usage{ 11 | Bonferroni.sig(x, model = "codominant", alpha = 0.05, 12 | include.all.SNPs=FALSE) 13 | } 14 | 15 | \arguments{ 16 | \item{x}{an object of class 'WGassociation'.} 17 | \item{model}{a character string specifying the type of genetic model (mode of inheritance). This 18 | indicantes how the genotypes should be collapsed when 'plot.summary' is TRUE. Possible 19 | values are "codominant", "dominant", "recessive", "overdominant", or "log-additive". 20 | The default is "codominant". Only the first words are required, e.g "co", "do", ... . } 21 | \item{alpha}{nominal level of significance. Default is 0.05} 22 | \item{include.all.SNPs}{logical value indicating whether all SNPs are considered in the Bonferroni 23 | correction. That is, the number of performed tests is equal to the number of SNPs or equal to the 24 | number of SNPs where a p value may be computed. The default value is FALSE indicating that the 25 | number of tests is equal to the number of SNPs that are non Monomorphic and the rate of genotyping 26 | is greater than the percentage indicated in the \code{GeneticModel.pval} function. } 27 | } 28 | \details{ 29 | After deciding the genetic model, the function shows the SNPs that are statistically significant at 30 | \code{alpha} level corrected by the number of performed tests. 31 | } 32 | \value{ 33 | A data frame with the SNPs and the p values for those SNPs that are statistically significant 34 | after Bonferroni correction 35 | } 36 | 37 | 38 | \seealso{ \code{\link{WGassociation}}} 39 | \examples{ 40 | data(SNPs) 41 | datSNP<-setupSNP(SNPs,6:40,sep="") 42 | ans<-WGassociation(protein~1,data=datSNP,model="all") 43 | Bonferroni.sig(ans, model="codominant", alpha=0.05, include.all.SNPs=FALSE) 44 | 45 | } 46 | \keyword{utilities} 47 | -------------------------------------------------------------------------------- /man/maxstat.Rd: -------------------------------------------------------------------------------- 1 | \name{maxstat} 2 | \alias{maxstat} 3 | \alias{maxstat.default} 4 | \alias{maxstat.table} 5 | \alias{maxstat.setupSNP} 6 | \alias{maxstat.matrix} 7 | \alias{print.maxstat} 8 | 9 | \title{max-statistic for a 2x3 table} 10 | \description{ 11 | Computes the asymptotic p-value for max-statistic for a 2x3 table 12 | } 13 | \usage{ 14 | maxstat(x, \dots) 15 | 16 | \method{maxstat}{default}(x, y, \dots) 17 | 18 | \method{maxstat}{table}(x, \dots) 19 | 20 | \method{maxstat}{setupSNP}(x, y, colSNPs=attr(x,"colSNPs"), ...) 21 | 22 | \method{maxstat}{matrix}(x, \dots) 23 | 24 | } 25 | \arguments{ 26 | \item{x}{a numeric matrix with 2 rows (cases/controls) and 3 colums (genotypes) or a vector with case/control status 27 | or an object of class 'setupSNP'.} 28 | \item{y}{an optional numeric vector containing the information for a given SNP. In this 29 | case 'x' argument must contain a vector indicarting case/control status. If 'x' argument is an object of 30 | class 'setupSNP' this argument migth be the name of the variable containing case/control information.} 31 | \item{colSNPs}{a vector indicating which columns contain those SNPs to compute max-statistic. By default 32 | max-statistic is computed for those SNPs specified when the object of class 'setupSNP' was 33 | created.} 34 | 35 | \item{\dots}{further arguments to be passed to or from methods.} 36 | } 37 | \references{ 38 | Gonzalez JR, Carrasco JL, Dudbridge F, Armengol L, Estivill X, Moreno V. Maximizing association statistics over 39 | genetic models (2007). Submitted 40 | 41 | Sladek R, Rocheleau G, Rung J et al. A genome-wide association study identifies novel risk loci for type 2 diabetes (2007). Nature 445, 881-885 42 | 43 | } 44 | \value{ 45 | A matrix with the chi-square statistic for dominant, recessive, log-additive and max-statistic and 46 | its asymptotic p-value. 47 | } 48 | \seealso{ 49 | \code{\link{setupSNP}} 50 | } 51 | \examples{ 52 | 53 | # example from Sladek et al. (2007) for the SNP rs1111875 54 | tt<-matrix(c(77,298,310,122,316,231),nrow=2,ncol=3,byrow=TRUE) 55 | maxstat(tt) 56 | 57 | data(SNPs) 58 | maxstat(SNPs$casco,SNPs$snp10001) 59 | myDat<-setupSNP(SNPs,6:40,sep="") 60 | maxstat(myDat,casco) 61 | 62 | 63 | 64 | } 65 | \keyword{utilities} -------------------------------------------------------------------------------- /man/LD.Rd: -------------------------------------------------------------------------------- 1 | \name{LD} 2 | \alias{LD} 3 | \alias{LD.snp} 4 | \alias{LD.setupSNP} 5 | \alias{LDplot} 6 | \alias{LDtable} 7 | 8 | \title{max-statistic for a 2x3 table} 9 | \description{ 10 | Compute pairwise linkage disequilibrium between genetic markers 11 | } 12 | \usage{ 13 | LD(g1, \dots) 14 | 15 | \method{LD}{snp}(g1, g2, \dots) 16 | 17 | \method{LD}{setupSNP}(g1, SNPs, \dots) 18 | 19 | 20 | LDplot(x, digits = 3, marker, distance, which = c("D", "D'", 21 | "r", "X^2", "P-value", "n", " "), ...) 22 | 23 | LDtable(x, colorcut = c(0, 0.01, 0.025, 0.05, 0.1, 1), 24 | colors = heat.colors(length(colorcut)), 25 | textcol = "black", digits = 3, show.all = FALSE, 26 | which = c("D", "D'", "r", "X^2", "P-value", "n"), 27 | colorize = "P-value", cex, \dots) 28 | 29 | 30 | } 31 | \arguments{ 32 | 33 | \item{g1}{genotype object or dataframe containing genotype objects} 34 | \item{g2}{genotype object (ignored if g1 is a dataframe)} 35 | \item{SNPs}{columns containing SNPs} 36 | \item{x}{LD or LD.data.frame object} 37 | \item{digits}{Number of significant digits to display} 38 | \item{which}{Name(s) of LD information items to be displayed} 39 | \item{colorcut}{P-value cutoffs points for colorizing LDtable} 40 | \item{colors}{Colors for each P-value cutoff given in 'colorcut' for LDtable} 41 | \item{textcol}{Color for text labels for LDtable} 42 | \item{marker}{Marker used as 'comparator' on LDplot. If omitted separate 43 | lines for each marker will be displayed} 44 | \item{distance}{Marker location, used for locating of markers on LDplot.} 45 | \item{show.all}{If TRUE, show all rows/columns of matrix. Otherwise omit 46 | completely blank rows/columns.} 47 | \item{colorize}{LD parameter used for determining table cell colors} 48 | \item{cex}{Scaling factor for table text. If absent, text will be scaled 49 | to fit within the table cells.} 50 | 51 | \item{\dots}{Optional arguments ('plot.LD.data.frame' passes these to 52 | 'LDtable' and 'LDplot').} 53 | } 54 | 55 | \references{ 56 | genetics R package by Gregory Warnes et al. (warnes@bst.rochester.edu) 57 | } 58 | 59 | \author{ 60 | functions adapted from LD, LDtable and LDplot in package genetics by Gregory Warnes et al. (warnes@bst.rochester.edu) 61 | } 62 | 63 | \value{ 64 | None 65 | } 66 | 67 | \seealso{ 68 | \code{\link{setupSNP}} \code{\link{snp}} 69 | } 70 | 71 | 72 | \keyword{utilities} 73 | -------------------------------------------------------------------------------- /R/print.snpOut.R: -------------------------------------------------------------------------------- 1 | `print.snpOut` <- 2 | function (x, digits = max(3, getOption("digits") - 3), ...) 3 | { 4 | temp <- attr(x, "label.snp") 5 | temp2 <- gsub("(snp\\()", "", temp) 6 | attr(x, "label.snp") <- gsub("\\)", "", temp2) 7 | cat("\n") 8 | if (!attr(x, "Interaction")) { 9 | if (is.null(attr(x, "strata"))) { 10 | cat("SNP:", attr(x, "label.snp"), " adjusted by:", 11 | attr(x, "varAdj"), "\n") 12 | class(x) <- NULL 13 | attr(x, "varAdj") <- attr(x, "label.snp") <- attr(x, 14 | "BigTable") <- attr(x, "Interaction") <- NULL 15 | if (!is.Monomorphic(x)) { 16 | print(x, na.print = "", digits = digits, quote = FALSE) 17 | } 18 | else cat("Monomorphic\n") 19 | } 20 | else { 21 | nstrat <- length(attr(x, "strata")) 22 | for (i in 1:nstrat) { 23 | cat(" strata:", attr(x, "strata")[i], "\n") 24 | cat("SNP:", attr(x, "label.snp"), " adjusted by:", 25 | attr(x, "varAdj"), "\n") 26 | class(x) <- NULL 27 | attr(x, "varAdj") <- attr(x, "label.snp") <- attr(x, 28 | "BigTable") <- attr(x, "Interaction") <- NULL 29 | if (!is.Monomorphic(x)) { 30 | print(x[[i]], na.print = "", digits = digits, 31 | quote = FALSE) 32 | } 33 | else cat("Monomorphic\n") 34 | cat("\n") 35 | } 36 | } 37 | } 38 | else { 39 | cat(" SNP:", attr(x, "label.snp"), " adjusted by:", 40 | attr(x, "varAdj"), "\n") 41 | cat(" Interaction \n") 42 | cat("---------------------\n") 43 | print(x[[1]], digits = digits) 44 | cat("\n") 45 | cat("p interaction:", x[[4]], "\n") 46 | cat("\n", paste(attr(x, "label.int"), "within",attr(x, "label.snp")), "\n") 47 | cat("---------------------\n") 48 | for (i in 1:length(x[[2]])) { 49 | cat(names(x[[2]])[i], "\n") 50 | print(x[[2]][[i]], digits = digits) 51 | cat("\n") 52 | } 53 | cat("p trend:", x[[5]], "\n") 54 | cat("\n", paste(attr(x, "label.snp"),"within",attr(x, "label.int")), "\n") 55 | cat("---------------------\n") 56 | for (i in 1:length(x[[3]])) { 57 | cat(names(x[[3]])[i], "\n") 58 | print(x[[3]][[i]], digits = digits) 59 | cat("\n") 60 | } 61 | cat("p trend:", x[[6]], "\n") 62 | } 63 | } 64 | 65 | -------------------------------------------------------------------------------- /R/intervals.haplo.glm.R: -------------------------------------------------------------------------------- 1 | intervals.haplo.glm<-function (o, level = 0.95, sign = 1, FUN = exp, ...) 2 | { 3 | if (o$family$family != "binomial") 4 | FUN = function(x) x 5 | z <- abs(qnorm((1 - level)/2)) 6 | co <- summary(o)$coef 7 | strsplit2 <- function(x, split) { 8 | ans <- unlist(strsplit(x, split)) 9 | return(ifelse(length(ans) == 2, ans[2], ans)) 10 | } 11 | control0 <- sapply(dimnames(summary(o)$coef)[[1]], FUN = strsplit2, 12 | split = "[.]") 13 | aux <- grep("(Intercept)", dimnames(summary(o)$coef)[[1]]) 14 | control.geno <- c(1:length(control0))[-aux] 15 | control <- control0[c(1, control.geno)] 16 | n.control <- length(control) 17 | nombres <- rep(NA, n.control) 18 | freqs <- rep(NA, n.control) 19 | for (i in 1:n.control) { 20 | if (control[i] != "rare" & control[i] != "(Intercept)") { 21 | nombres[i] <- paste(o$haplo.unique[as.numeric(control[i]), 22 | ], collapse = "") 23 | freqs[i] <- o$haplo.freq[as.numeric(control[i])] 24 | } 25 | else if (control[i] == "(Intercept)") { 26 | nombres[i] <- "(Intercept)" 27 | freqs[i] <- -1 28 | } 29 | else { 30 | nombres[i] <- "rare" 31 | freqs[i] <- sum(o$haplo.freq[o$haplo.rare]) 32 | } 33 | } 34 | or <- FUN(co[, 1] * sign) 35 | li <- FUN(co[, 1] * sign - z * co[, 2]) 36 | ls <- FUN(co[, 1] * sign + z * co[, 2]) 37 | if (o$family$family != "binomial") 38 | or <- c(or[1], or[1], or[-1]) 39 | else or <- c(or[1], 1, or[-1]) 40 | li <- c(li[1], NA, li[-1]) 41 | ls <- c(ls[1], NA, ls[-1]) 42 | pvals <- co[, 4] 43 | pvals <- c(pvals[1], NA, pvals[-1]) 44 | nombre.ref <- paste(o$haplo.unique[o$haplo.base, ], collapse = "") 45 | nombre.cov <- dimnames(summary(o)$coef)[[1]][-c(1:n.control)] 46 | nombres <- c(nombres[1], nombre.ref, nombres[-1], nombre.cov) 47 | 48 | n.control.2<-length(o$haplo.freq)-length(o$haplo.rare) 49 | if (n.control.2!=length(o$coef)) 50 | { 51 | nombres[(n.control.2+2):length(nombres)]<-names(o$coef)[-c(1:n.control.2)] 52 | } 53 | 54 | 55 | 56 | ncov <- length(nombre.cov) 57 | freqs <- c(freqs[1], o$haplo.freq[o$haplo.base], freqs[-1], 58 | rep(NA, ncov)) 59 | names(freqs) <- names(or) 60 | r <- cbind(freqs, or, li, ls, pvals) 61 | if (o$family$family != "binomial") 62 | dimnames(r) <- list(nombres, c("freq", "diff", paste(level * 63 | 100, "%", sep = ""), "C.I.", "P-val")) 64 | else dimnames(r) <- list(nombres, c("freq", "or", paste(level * 65 | 100, "%", sep = ""), "C.I.", "P-val")) 66 | class(r) <- "intervals" 67 | r 68 | } 69 | -------------------------------------------------------------------------------- /man/permTest.Rd: -------------------------------------------------------------------------------- 1 | \name{permTest} 2 | \alias{permTest} 3 | \alias{print.permTest} 4 | \alias{plot.permTest} 5 | 6 | \title{Permutation test analysis} 7 | 8 | \description{ 9 | This function extract the p values for permutation approach performed using scanWGassociation function 10 | } 11 | \usage{ 12 | permTest(x, method="minimum", K) 13 | } 14 | 15 | \arguments{ 16 | \item{x}{a required object of class 'WGassociation' with the attribute 'permTest'. See details} 17 | \item{method}{statistic used in the permutation test. The default is 'minimum' but 'rtp' (rank truncated product) is 18 | also available.} 19 | \item{K}{number of the K most significant p values from the total number of test performed (e.g number of SNPs) used 20 | to compute the rank truncated product. This argument is only required when method='rtp'. See references } 21 | 22 | } 23 | 24 | \details{ 25 | This function extract the p values from an object of class 'WGassociation'. This object migth be obtained using 26 | the funcion called 'scanWGassociation' indicating the number of permutations in the argument 'nperm'. 27 | } 28 | 29 | 30 | \value{ 31 | An object of class 'permTest'. 32 | 33 | 'print' returns a summary indicating the number of SNPs analyzed, the number of valid SNPs (those non-Monomorphic and 34 | that pass the calling rate), the p value after Bonferroni correction, and the p values based on permutation approach. 35 | One of them is based on considering the empirical percentil for the minimum p values, and the another one on assuming 36 | that the minimum p values follow a beta distribution. 37 | 38 | 39 | 'plot' produces a plot of the empirical distribution for the minimum p values (histogram) and the expected distribution 40 | assuming a beta distribution. The corrected p value is also showed in the plot. 41 | 42 | See examples for further illustration about all previous issues. 43 | 44 | } 45 | 46 | \references{ 47 | Dudbridge F, Gusnanto A and Koeleman BPC. Detecting multiple associations in genome-wide studies. 48 | Human Genomics, 2006;2:310-317. 49 | 50 | Dudbridge F and Koeleman BPC. Efficient computation of significance levels for multiple associations in large studies of 51 | correlated data, including genomewide association studies. Am J Hum Genet, 2004;75:424-435. 52 | 53 | JR Gonzalez, L Armengol, X Sole, E Guino, JM Mercader, X Estivill, V Moreno. 54 | SNPassoc: an R package to perform whole genome association studies. Bioinformatics, 55 | 2007;23(5):654-5. 56 | } 57 | 58 | \seealso{ \code{\link{scanWGassociation}}} 59 | 60 | \examples{ 61 | 62 | library(SNPassoc) 63 | 64 | data(asthma, package = "SNPassoc") 65 | asthma.s <- setupSNP(data=asthma, colSNPs=7:ncol(asthma), sep="") 66 | 67 | ans <- WGassociation(casecontrol, data=asthma.s) 68 | 69 | } 70 | 71 | \keyword{utilities} 72 | -------------------------------------------------------------------------------- /R/SNPHWE.R: -------------------------------------------------------------------------------- 1 | `SNPHWE` <- 2 | function(x) 3 | { 4 | if (length(x)<3) { 5 | p <- NA 6 | 7 | } else { 8 | 9 | obs_hom1 <- x[1] 10 | obs_hets <- x[2] 11 | obs_hom2 <- x[3] 12 | 13 | if (obs_hom1 < 0 || obs_hom2 < 0 || obs_hets < 0) { 14 | return(-1.0) 15 | } 16 | 17 | # total number of genotypes 18 | N <- obs_hom1 + obs_hom2 + obs_hets 19 | 20 | # rare homozygotes, common homozygotes 21 | obs_homr <- min(obs_hom1, obs_hom2) 22 | obs_homc <- max(obs_hom1, obs_hom2) 23 | 24 | # number of rare allele copies 25 | rare <- obs_homr * 2 + obs_hets 26 | 27 | # Initialize probability array 28 | probs <- rep(0, 1 + rare) 29 | 30 | # Find midpoint of the distribution 31 | mid <- floor(rare * ( 2 * N - rare) / (2 * N)) 32 | if ( (mid %% 2) != (rare %% 2) ){ 33 | mid <- mid + 1 34 | } 35 | 36 | probs[mid + 1] <- 1.0 37 | mysum <- 1.0 38 | 39 | # Calculate probablities from midpoint down 40 | curr_hets <- mid 41 | curr_homr <- (rare - mid) / 2 42 | curr_homc <- N - curr_hets - curr_homr 43 | 44 | while ( curr_hets >= 2) { 45 | probs[curr_hets - 1] <- probs[curr_hets + 1] * curr_hets * (curr_hets - 1.0) / (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0)) 46 | mysum <- mysum + probs[curr_hets - 1] 47 | 48 | # 2 fewer heterozygotes -> add 1 rare homozygote, 1 common homozygote 49 | curr_hets <- curr_hets - 2 50 | curr_homr <- curr_homr + 1 51 | curr_homc <- curr_homc + 1 52 | } 53 | 54 | # Calculate probabilities from midpoint up 55 | curr_hets <- mid 56 | curr_homr <- (rare - mid) / 2 57 | curr_homc <- N - curr_hets - curr_homr 58 | 59 | while ( curr_hets <= rare - 2) { 60 | probs[curr_hets + 3] <- probs[curr_hets + 1] * 4.0 * curr_homr * curr_homc / ((curr_hets + 2.0) * (curr_hets + 1.0)) 61 | mysum <- mysum + probs[curr_hets + 3] 62 | 63 | # add 2 heterozygotes -> subtract 1 rare homozygtote, 1 common homozygote 64 | curr_hets <- curr_hets + 2 65 | curr_homr <- curr_homr - 1 66 | curr_homc <- curr_homc - 1 67 | } 68 | 69 | # P-value calculation 70 | target <- probs[obs_hets + 1] 71 | p <- min(1.0, sum(probs[probs <= target])/ mysum) 72 | 73 | #plo <- min(1.0, sum(probs[1:obs_hets + 1]) / mysum) 74 | 75 | #phi <- min(1.0, sum(probs[obs_hets + 1: rare + 1]) / mysum) 76 | 77 | } 78 | 79 | return(p) 80 | } 81 | 82 | -------------------------------------------------------------------------------- /R/WGassociation.R: -------------------------------------------------------------------------------- 1 | `WGassociation` <- 2 | function (formula, data, model=c("all"), quantitative = is.quantitative(formula, 3 | data), genotypingRate = 80, level=0.95, ...) 4 | { 5 | 6 | if(!inherits(data,"setupSNP")) 7 | stop("data must be an object of class 'setupSNP'") 8 | 9 | if (length(attr(data,"colSNPs")) > 2000 & (length(model) > 1 | any(model%in%"all"))) 10 | stop("Select only one genetic model when more than 2000 SNPs are analyzed \n or use 'scanWGassociation' function") 11 | 12 | cl <- match.call() 13 | mf <- match.call(expand.dots = FALSE) 14 | m0 <- match(c("formula", "data"), names(mf), 0) 15 | mf <- mf[c(1, m0)] 16 | 17 | # 18 | # aceptar respuesta sin formula 19 | # 20 | if( length(grep("~",mf[[2]]))==0){ 21 | formula<-as.formula(paste(mf[[2]],"~1",sep="")) 22 | formula.1<- list(formula) 23 | mode(formula.1)<-"call" 24 | mf[2]<-formula.1 25 | } 26 | 27 | mf[[1]] <- as.name("model.frame") 28 | mf <- eval(mf, parent.frame()) 29 | mt <- attr(mf, "terms") 30 | temp0 <- as.character(mt) 31 | adj <- paste(temp0[2], temp0[1], temp0[3]) 32 | 33 | Terms <- if (missing(data)) 34 | terms(formula) 35 | else terms(formula, data = data) 36 | ord <- attr(Terms, "order") 37 | if (any(ord > 1)) 38 | stop("interaction term is not implemented") 39 | 40 | 41 | association.i<-function(snp.i,adj,data,model,quantitative,genotypingRate,level, ...) 42 | { 43 | association( as.formula(paste(adj,"+",snp.i)) , data=data, 44 | model=model, quantitative=quantitative, genotypingRate= 45 | genotypingRate, level=level, ...) 46 | } 47 | 48 | 49 | colSNPs<-attr(data,"colSNPs") 50 | if (! (is.vector(colSNPs) & length(colSNPs) > 0)) stop("data should have an attribute called 'colSNPs'. Try again 'setupSNP' function") 51 | 52 | # if (is.vector(colSNPs) & length(colSNPs) > 1) 53 | # dataSNPs <- data[, colSNPs] 54 | # else stop("data should have an attribute called 'colSNPs'. Try again 'setupSNP' function") 55 | 56 | 57 | 58 | type<-charmatch(model,c("codominant","dominant","recessive","overdominant","log-additive","all")) 59 | type<-sort(type) 60 | 61 | if (any(type%in%6)) 62 | type<-1:5 63 | 64 | if(any(is.na(type))) 65 | stop("model must be 'codominant','dominant','recessive','overdominant', 66 | 'log-additive', 'all' or any combination of them") 67 | 68 | SNPs<-attr(data,"label.SNPs") 69 | 70 | tab<-mclapply(SNPs, association.i, adj=adj, data=data, model=model, 71 | quantitative=quantitative, genotypingRate= 72 | genotypingRate, level=level, ...) 73 | 74 | names(tab)<-SNPs 75 | attr(tab,"label.SNPs")<-attr(data,"label.SNPs") 76 | attr(tab,"models")<-type 77 | attr(tab,"quantitative")<-quantitative 78 | out<-extractPval(tab) 79 | 80 | attr(out,"tables")<-tab 81 | attr(out,"label.SNPs")<-attr(data,"label.SNPs") 82 | attr(out,"models")<-type 83 | attr(out,"quantitative")<-quantitative 84 | attr(out,"pvalues")<-out 85 | attr(out,"gen.info")<-attr(data,"gen.info") 86 | attr(out,"whole")<-attr(data,"whole") 87 | attr(out,"colSNPs")<-attr(data,"colSNPs") 88 | 89 | class(out)<-c("WGassociation","data.frame") 90 | 91 | out 92 | } 93 | 94 | -------------------------------------------------------------------------------- /.github/workflows/rhub.yaml: -------------------------------------------------------------------------------- 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at 2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml 3 | # You can update this file to a newer version using the rhub2 package: 4 | # 5 | # rhub::rhub_setup() 6 | # 7 | # It is unlikely that you need to modify this file manually. 8 | 9 | name: R-hub 10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | config: 16 | description: 'A comma separated list of R-hub platforms to use.' 17 | type: string 18 | default: 'linux,windows,macos' 19 | name: 20 | description: 'Run name. You can leave this empty now.' 21 | type: string 22 | id: 23 | description: 'Unique ID. You can leave this empty now.' 24 | type: string 25 | 26 | jobs: 27 | 28 | setup: 29 | runs-on: ubuntu-latest 30 | outputs: 31 | containers: ${{ steps.rhub-setup.outputs.containers }} 32 | platforms: ${{ steps.rhub-setup.outputs.platforms }} 33 | 34 | steps: 35 | # NO NEED TO CHECKOUT HERE 36 | - uses: r-hub/actions/setup@v1 37 | with: 38 | config: ${{ github.event.inputs.config }} 39 | id: rhub-setup 40 | 41 | linux-containers: 42 | needs: setup 43 | if: ${{ needs.setup.outputs.containers != '[]' }} 44 | runs-on: ubuntu-latest 45 | name: ${{ matrix.config.label }} 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | config: ${{ fromJson(needs.setup.outputs.containers) }} 50 | container: 51 | image: ${{ matrix.config.container }} 52 | 53 | steps: 54 | - uses: r-hub/actions/checkout@v1 55 | - uses: r-hub/actions/platform-info@v1 56 | with: 57 | token: ${{ secrets.RHUB_TOKEN }} 58 | job-config: ${{ matrix.config.job-config }} 59 | - uses: r-hub/actions/setup-deps@v1 60 | with: 61 | token: ${{ secrets.RHUB_TOKEN }} 62 | job-config: ${{ matrix.config.job-config }} 63 | - uses: r-hub/actions/run-check@v1 64 | with: 65 | token: ${{ secrets.RHUB_TOKEN }} 66 | job-config: ${{ matrix.config.job-config }} 67 | 68 | other-platforms: 69 | needs: setup 70 | if: ${{ needs.setup.outputs.platforms != '[]' }} 71 | runs-on: ${{ matrix.config.os }} 72 | name: ${{ matrix.config.label }} 73 | strategy: 74 | fail-fast: false 75 | matrix: 76 | config: ${{ fromJson(needs.setup.outputs.platforms) }} 77 | 78 | steps: 79 | - uses: r-hub/actions/checkout@v1 80 | - uses: r-hub/actions/setup-r@v1 81 | with: 82 | job-config: ${{ matrix.config.job-config }} 83 | token: ${{ secrets.RHUB_TOKEN }} 84 | - uses: r-hub/actions/platform-info@v1 85 | with: 86 | token: ${{ secrets.RHUB_TOKEN }} 87 | job-config: ${{ matrix.config.job-config }} 88 | - uses: r-hub/actions/setup-deps@v1 89 | with: 90 | job-config: ${{ matrix.config.job-config }} 91 | token: ${{ secrets.RHUB_TOKEN }} 92 | - uses: r-hub/actions/run-check@v1 93 | with: 94 | job-config: ${{ matrix.config.job-config }} 95 | token: ${{ secrets.RHUB_TOKEN }} 96 | -------------------------------------------------------------------------------- /R/plot.SNPinteraction.R: -------------------------------------------------------------------------------- 1 | `plot.SNPinteraction` <- 2 | function(x, main.tit, ...) 3 | { 4 | control<-apply(x,1,function(x) sum(is.na(x))==length(x)) 5 | x.OK<-x[!control,!control] 6 | 7 | if (!is.null(attr(x,"gen.info"))){ 8 | genInfo<-attr(x,"gen.info") 9 | o <- order(genInfo[, 2], genInfo[, 3]) 10 | label.SNPs <- as.character(genInfo[o, 1]) 11 | label.SNPs <- label.SNPs[label.SNPs%in%dimnames(x.OK)[[1]]] 12 | orderSNPs.ok<-match(label.SNPs, dimnames(x.OK)[[1]]) 13 | x.OK <- x.OK[orderSNPs.ok,orderSNPs.ok ] 14 | genInfo <- genInfo[genInfo[,1]%in%label.SNPs,] 15 | } 16 | else { 17 | label.SNPs <- dimnames(x.OK)[[1]] 18 | } 19 | 20 | # Reset par options on exit function 21 | old.xpd <- par("xpd") 22 | on.exit(par(xpd = old.xpd)) 23 | 24 | old.las <- par("las") 25 | on.exit(par(las = old.las)) 26 | 27 | old.mfrow <- par("mfrow") 28 | on.exit(par(mfrow = old.mfrow)) 29 | 30 | par(xpd=NA) 31 | 32 | m <- matrix(1:2, 1, 2) 33 | layout(m, widths=c(4.5, 1)) 34 | 35 | 36 | # Other palettes: 37 | # mypaletteOld<-brewer.pal(9,"Greens") 38 | # mypaletteOld<-c("#F7FCF5", "#E5F5E0", "#C7E9C0","#A1D99B", "#74C476", "#41AB5D", "#238B45", "#006D2C", "#00441B") 39 | # mypaletteOld<-brewer.pal(9,"Reds") 40 | # mypaletteOld<- c("#FFF5F0", "#FEE0D2", "#FCBBA1", "#FC9272", "#FB6A4A", "#EF3B2C", "#CB181D", "#A50F15", "#67000D") 41 | # This is used: mypaletteOld<-brewer.pal(9,"YlGn") 42 | 43 | mypaletteOld <- c("#FFFFE5", "#F7FCB9", "#D9F0A3", "#ADDD8E", "#78C679", "#41AB5D", "#238443", "#006837", "#004529") 44 | 45 | mypalette<-mypaletteOld[c(9,6,4,3,3,2,2,1,1)] 46 | 47 | pvalCut<-c(0,0.001,0.01,0.05,0.1,0.2,0.3,0.5,0.7,1) 48 | 49 | image(1:nrow(x.OK),1:ncol(x.OK),x.OK,col=mypalette,breaks=pvalCut, 50 | axes=FALSE,xlab="",ylab="") 51 | 52 | axis(1,at=c(1:nrow(x.OK)),labels=label.SNPs,las=3,cex.axis=0.7,col="darkgreen") 53 | axis(2,at=c(1:nrow(x.OK)),labels=label.SNPs,las=1,cex.axis=0.7,col="darkgreen") 54 | 55 | 56 | if (missing(main.tit)) 57 | main.tit<-paste("SNPs interactions --",attr(x,"model"),"model") 58 | 59 | title(main.tit,line=3) 60 | 61 | if (!is.null(attr(x,"gen.info"))) 62 | n.snps <- table(genInfo[, 2]) 63 | else n.snps <- nrow(x.OK) 64 | 65 | 66 | a <- c(0.5, cumsum(n.snps) + 0.5) 67 | 68 | b <- par("usr") 69 | segments(a, b[3], a, b[4] + diff(b[3:4]) * 0.02, col="darkblue",lwd=2) 70 | segments(b[3], a, b[4]+diff(b[3:4]) * 0.02, a, col="darkblue",lwd=2) 71 | 72 | abline(coef=c(0,1),xpd=FALSE,col="yellow") 73 | 74 | if(!is.null(attr(x,"gen.info"))) 75 | { 76 | a <- par("usr") 77 | wh <- cumsum(c(0.5, n.snps)) 78 | names.geno<-unique(genInfo[,2]) 79 | n.gen<-length(names.geno) 80 | 81 | for (i in 1:n.gen) 82 | { 83 | text(mean(wh[i + c(0, 1)]), a[4] + (a[4] - a[3]) * 0.025, names.geno[i],srt=45,cex=0.8,adj=0.2) 84 | text(a[4] + (a[4] - a[3]) * 0.025, mean(wh[i + c(0, 1)]), names.geno[i],srt=45,cex=0.8,adj=0.5) 85 | } 86 | } 87 | 88 | 89 | image(0.5,1:10,matrix(pvalCut,nrow=1,ncol=10),col=rev(mypalette),breaks=pvalCut,axes=FALSE, 90 | xlab="",ylab="") 91 | marcas<-c(0.5,3.5,4.5,5.5,7.5,8.5,9.5,10.5) 92 | axis(2,marcas,rev(c(0,0.001,0.01,0.05,0.1,0.2,0.3,1)),pos=0.5) 93 | text(30,5.5,"pvalues",srt=90) 94 | 95 | } 96 | 97 | -------------------------------------------------------------------------------- /R/print.LD.R: -------------------------------------------------------------------------------- 1 | # $Id: print.LD.R 395 2005-10-04 23:43:31Z warnes $ 2 | 3 | print.LD <- function(x, digits=getOption("digits"), ...) 4 | { 5 | 6 | # Reset options 7 | saveopt <- options() 8 | on.exit(options(saveopt)) 9 | 10 | 11 | options(digits=digits) 12 | cat("\n") 13 | cat("Pairwise LD\n") 14 | cat("-----------\n") 15 | 16 | est <- t(as.matrix( c(D=x$"D","D'"=x$"D'","Corr"=x$"r"))) 17 | rownames(est) <- "Estimates:" 18 | print(est) 19 | cat("\n") 20 | 21 | test <- t(as.matrix( c("X^2"=x$"X^2", "P-value"=x$"P-value", 22 | "N"=x$"n") ) ) 23 | rownames(test) <- "LD Test:" 24 | print(test) 25 | cat("\n") 26 | 27 | # options(saveopt) 28 | invisible(x) 29 | } 30 | 31 | 32 | summary.LD.data.frame <- function(object, digits=getOption("digits"), 33 | which=c("D", "D'", "r", "X^2", 34 | "P-value", "n", " "), 35 | rowsep, show.all=FALSE, 36 | ...) 37 | { 38 | 39 | if(missing(rowsep)) 40 | if(length(which)==1) 41 | rowsep <- NULL 42 | else 43 | rowsep <- " " 44 | 45 | if(is.null(rowsep)) 46 | blank <- NULL 47 | else 48 | blank <- matrix(rowsep, ncol=ncol(object$"D"), nrow=nrow(object$"D")) 49 | 50 | 51 | 52 | # Reset options 53 | saveopt <- options() 54 | on.exit(options(saveopt)) 55 | 56 | options(digits=digits) 57 | 58 | 59 | pdat <- list() 60 | for(name in which) 61 | pdat[[name]] <- object[[name]] 62 | 63 | tab <- interleave( 64 | "D" = if('D' %in% names(pdat)) pdat$D else NULL, 65 | "D'" = pdat$"D'", 66 | "Corr." = pdat$"r", 67 | "X^2"= pdat$"X^2", 68 | "P-value" = pdat$"P-value", 69 | "n" = pdat$"n", 70 | " "=blank, 71 | sep=" " 72 | ) 73 | 74 | statlist <- which[ ! (which %in% c("P-value", "n", " ") ) ] 75 | statlist[statlist=="X^2"] <- "X\\^2" 76 | 77 | formatlist <- sapply( statlist, function(object) grep(object, rownames(tab) ) ) 78 | formatlist <- unique(sort(unlist(formatlist))) 79 | 80 | pvallist <- grep( "P-value", rownames(tab) ) 81 | 82 | tab[formatlist,] <- formatC(as.numeric(tab[formatlist,]), digits=digits, 83 | format="f") 84 | tab[pvallist,] <- apply(object$"P-value", c(1,2), 85 | function(object)trim(format.pval(object, digits=digits))) 86 | 87 | tab[trim(tab)=="NA"] <- NA 88 | 89 | if(!show.all) 90 | { 91 | # drop blank row/column 92 | entrylen <- nrow(tab)/nrow(object$n) 93 | tab <- tab[1:(nrow(tab) - entrylen),-1] 94 | } 95 | 96 | 97 | # options(saveopt) 98 | class(tab) <- "summary.LD.data.frame" 99 | tab 100 | } 101 | 102 | print.summary.LD.data.frame <- function(x, digits=getOption("digits"), ...) 103 | { 104 | cat("\n") 105 | cat("Pairwise LD\n") 106 | cat("-----------\n") 107 | 108 | print(as.matrix(unclass(x)), digits=digits, quote=FALSE, 109 | na.print=" ", right=TRUE) 110 | 111 | cat("\n") 112 | 113 | invisible(x) 114 | 115 | 116 | } 117 | 118 | 119 | print.LD.data.frame <- function(x, ...) 120 | print(summary(x)) 121 | -------------------------------------------------------------------------------- /man/haplointeraction.Rd: -------------------------------------------------------------------------------- 1 | \name{haplo.interaction} 2 | \alias{haplo.interaction} 3 | \alias{print.haploOut} 4 | 5 | \title{Haplotype interaction with a covariate} 6 | 7 | \description{ 8 | This function computes the ORs (or mean differences if a quantitative trait is analyzed) and their 95\% confidence intervals corresponding to 9 | an interaction between the haplotypes and a categorical covariate 10 | } 11 | 12 | \usage{ 13 | haplo.interaction(formula, data, SNPs.sel, quantitative = 14 | is.quantitative(formula, data), haplo.freq.min = 0.05, ...) 15 | } 16 | 17 | \arguments{ 18 | \item{formula}{ a symbolic description of the model to be fitted (a formula object). 19 | It might have either a continuous variable (quantitative traits) or a 20 | factor variable (case-control studies) as the response on the left of the \code{~} 21 | operator and a term corresponding to the interaction variable on the right indicated using 22 | 'interaction' function (e.g. \code{~}int(var), where var is a factor variable) and it is 23 | required. Terms with additional covariates on the the right of the ~ operator may be 24 | added to fit an adjusted model (e.g., \code{~}var1+var2+...+varN+int(var)). } 25 | \item{data}{ an object of class 'setupSNP' containing the variables in the model and the SNPs that will be used to estimate the 26 | haplotypes. } 27 | \item{SNPs.sel}{a vector indicating the names of SNPs that are used to estimate the haplotypes} 28 | \item{quantitative}{logical value indicating whether the phenotype (which is on the 29 | left of the operator \code{~} in 'formula' argument) is quantitative. The function 30 | 'is.quantitative' returns FALSE when the phenotype 31 | is a variable with two categories (i.e. indicating case-control status). Thus, 32 | it is not a required argument but it may be modified by the user. } 33 | \item{haplo.freq.min}{control parameter for haplo.glm included in 'haplo.glm.control'. This parameter corresponds to the minimum 34 | haplotype frequency for a haplotype to be included in the regression model as its own effect. The 35 | haplotype frequency is based on the EM algorithm that estimates haplotype frequencies independently 36 | of any trait. } 37 | \item{...}{additional parameters for 'haplo.glm.control'.} 38 | } 39 | 40 | \details{ 41 | The function estimates the haplotypes for the SNPs indicated in the 'SNPs.sel' argument. Then, usign 'haplo.glm' function (from 'haplo.stats' 42 | library) estimates the interaction between these haplotypes and the covariate indicated in the formula by means of 'interaction' function. 43 | } 44 | 45 | \value{ 46 | Three different tables are given. The first one corresponds to the full interaction matrix where the ORs (or mean differences if a quantitative 47 | trait is analyzed) are expressed with respect to the most frequent haplotype and the first category of the covariate. The other two tables 48 | show the ORs (or mean differences if a quantitative trait is analyzed) and their 95\% confidence intervals for both marginal models. 49 | P values for interaction are also showed in the output. 50 | 51 | } 52 | 53 | \examples{ 54 | # not Run 55 | library(SNPassoc) 56 | library(haplo.stats) 57 | 58 | data(SNPs) 59 | datSNP<-setupSNP(SNPs,6:40,sep="") 60 | res <- haplo.interaction(log(protein)~int(sex), data=datSNP, 61 | SNPs.sel=c("snp100019","snp10001","snp100029")) 62 | res 63 | } 64 | \keyword{utilities } -------------------------------------------------------------------------------- /R/haplo.interaction.R: -------------------------------------------------------------------------------- 1 | `haplo.interaction` <- 2 | function(formula, data, SNPs.sel, quantitative = is.quantitative(formula, data), 3 | haplo.freq.min=0.05,...) 4 | 5 | { 6 | if (!inherits(data, "setupSNP")) 7 | stop("data must be an object of class 'setupSNP'") 8 | 9 | control.SNPs<-sum(!is.na(match(names(data),SNPs.sel))) 10 | if (control.SNPs!=length(SNPs.sel)) 11 | stop("Some of the SNPs selected are not in the data set") 12 | 13 | cl <- match.call() 14 | mf <- match.call(expand.dots = FALSE) 15 | m0 <- match(c("formula", "data", "subset"), names(mf), 0) 16 | mf <- mf[c(1, m0)] 17 | mf[[1]] <- as.name("model.frame") 18 | mf <- eval(mf, parent.frame()) 19 | mt <- attr(mf, "terms") 20 | 21 | special <- c("int") 22 | Terms <- if (missing(data)) 23 | terms(formula, special) 24 | else terms(formula, special, data = data) 25 | posInt <- attr(Terms, "specials")$int 26 | 27 | if(length(posInt)) 28 | { 29 | var2<-mf[,posInt] 30 | if(!length(levels(var2))) 31 | { 32 | stop("interaction variable must be a factor") 33 | } 34 | } 35 | else 36 | stop("formula needs an 'interaction' term") 37 | 38 | control.missing<-dimnames(mf)[[1]] 39 | geno <- make.geno(data[dimnames(data)[[1]]%in%control.missing,], 40 | SNPs.sel) 41 | 42 | dep <- mf[, 1] 43 | if (ncol(mf) > 2) 44 | adj <- data.frame(mf[, -c(1,posInt)]) 45 | else 46 | adj <- NULL 47 | varAdj <- attr(mt, "term.labels")[-(posInt-1)] 48 | varInt <- attr(mt, "term.labels")[posInt-1] 49 | varInt <-gsub("int\\(","",varInt) 50 | varInt <-gsub("\\)","",varInt) 51 | 52 | out <- haplo.inter.fit(geno, var2, dep, adj , 53 | ifelse(quantitative,"gaussian","binomial"), haplo.freq.min, ...) 54 | 55 | res.corner<-out[[1]] 56 | xx<-dimnames(res.corner)[[2]] 57 | xx[xx=="li"]<-"lower" 58 | xx[xx=="ls"]<-"upper" 59 | dimnames(res.corner)[[2]]<-xx 60 | 61 | temp<-out[[2]] 62 | etiq1<-dimnames(temp)[[1]] 63 | aux0<-dimnames(temp)[[2]] 64 | etiq2<-aux0[seq(2,length(aux0),3)] 65 | 66 | ans<-list(NA) 67 | for (i in 1:length(etiq2)) 68 | { 69 | ans[[i]]<-temp[,c(1,(2+3*(i-1)):(4+3*(i-1)))] 70 | ans[[i]][1,2]<-ifelse(quantitative,0,1) 71 | ans[[i]]<-ans[[i]][,-1] 72 | if (!quantitative) 73 | dimnames(ans[[i]])[[2]]<-c("OR","lower","upper") 74 | else 75 | dimnames(ans[[i]])[[2]]<-c("diff","lower","upper") 76 | } 77 | names(ans)<-etiq2 78 | res.int1<-ans 79 | 80 | temp<-out[[3]] 81 | etiq1<-dimnames(temp)[[1]] 82 | aux0<-dimnames(temp)[[2]] 83 | etiq2<-aux0[seq(2,length(aux0),3)] 84 | 85 | ans2<-list(NA) 86 | for (i in 1:length(etiq1)) 87 | { 88 | ans.i<- matrix(temp[i,][-1],nrow=length(etiq2) ,ncol=3,byrow=TRUE) 89 | ans2[[i]]<-data.frame(ans.i) 90 | dimnames(ans2[[i]])[[1]]<-etiq2 91 | ans2[[i]][1,1]<-ifelse(quantitative,0,1) 92 | if (!quantitative) 93 | dimnames(ans2[[i]])[[2]]<-c("OR","lower","upper") 94 | else 95 | dimnames(ans2[[i]])[[2]]<-c("diff","lower","upper") 96 | } 97 | names(ans2)<-etiq1 98 | res.int2<-ans2 99 | 100 | res<-list(res.corner,res.int1,res.int2,out$pval) 101 | attr(res,"label.snp") <- SNPs.sel 102 | attr(res,"varAdj") <- varAdj 103 | attr(res,"varInt") <- varInt 104 | attr(res,"quantitative") <- quantitative 105 | class(res) <- "haploOut" 106 | res 107 | 108 | } 109 | 110 | -------------------------------------------------------------------------------- /R/getGeneSymbol.R: -------------------------------------------------------------------------------- 1 | #' Get gene symbol from a list of SNPs 2 | #' 3 | #' @description 4 | #' The getGeneSymbol function searches for the genes associated with the SNPs 5 | #' for a given chromosome at a given position. To perform the annotation this 6 | #' function needs to have the BioConductor libraries: S4Vectors, GenomicRanges, 7 | #' IRanges, VariantAnnotation and org.Hs.eg.db libraries. 8 | #' As well as the library with the reference genome database, by default needs 9 | #' TxDb.Hsapiens.ICSC.hg19.KnownGene library 10 | #' @param x data.frame containing: SNP name, chromosome and genomic position. 11 | #' @param snpCol column of x having the SNP name. Default is 1. 12 | #' @param chrCol column of x having the SNP chromosome. Default is 2. 13 | #' @param posCol column of x having the SNP position. Default is 3. 14 | #' @param db reference genome. Default is 'TxDb.Hsapiens.UCSC.hg19.knownGene' 15 | #' @return a data.frame having initial information and gene symbol 16 | 17 | 18 | getGeneSymbol <- function(x, snpCol=1, chrCol=2, posCol=3, 19 | db="TxDb.Hsapiens.UCSC.hg19.knownGene") { 20 | 21 | 22 | if (!requireNamespace("S4Vectors")) { 23 | stop( "Package \"S4Vectors\" must be installed to use this function.", 24 | call. = FALSE ) 25 | } 26 | if (!requireNamespace("GenomicRanges")) { 27 | stop( "Package \"db <- eval(parse(text = db))\" must be installed to use this function.", 28 | call. = FALSE ) 29 | } 30 | if (!requireNamespace("IRanges")) { 31 | stop( "Package \"IRanges\" must be installed to use this function.", 32 | call. = FALSE ) 33 | } 34 | if (!requireNamespace("VariantAnnotation")) { 35 | stop( "Package \"VariantAnnotation\" must be installed to use this function.", 36 | call. = FALSE ) 37 | } 38 | if (!requireNamespace("org.Hs.eg.db")) { 39 | stop( "Package \"org.Hs.eg.db\" must be installed to use this function.", 40 | call. = FALSE ) 41 | } 42 | 43 | if (!requireNamespace(db)) { 44 | stop( "Reference genome must be installed to use this function.", 45 | call. = FALSE ) 46 | } else { 47 | library(db, character.only=TRUE) 48 | } 49 | 50 | db <- eval(parse(text = db)) 51 | 52 | df <- x[,c(snpCol, chrCol, posCol)] 53 | names(df) <- c("rsid", "chr", "pos") 54 | 55 | target <- with(df, 56 | GenomicRanges::GRanges( seqnames = S4Vectors::Rle(chr), 57 | ranges = IRanges::IRanges(pos, end=pos, names=rsid), 58 | strand = S4Vectors::Rle(GenomicRanges::strand("*")))) 59 | 60 | loc <- VariantAnnotation::locateVariants(target, db, 61 | VariantAnnotation::AllVariants()) 62 | names(loc) <- NULL 63 | info <- as.data.frame(loc) 64 | info$names <- names(target)[ info$QUERYID ] 65 | info <- info[ , c("names", "seqnames", "start", "end", "LOCATION", "GENEID", "PRECEDEID", "FOLLOWID")] 66 | info <- unique(info) 67 | 68 | Symbol2id <- as.list( org.Hs.eg.db::org.Hs.egSYMBOL2EG) 69 | id2Symbol <- rep( names(Symbol2id), sapply(Symbol2id, length) ) 70 | names(id2Symbol) <- unlist(Symbol2id) 71 | 72 | x <- unique( with(info, c(levels(GENEID), levels(PRECEDEID), levels(FOLLOWID))) ) 73 | 74 | info$GENESYMBOL <- id2Symbol[ as.character(info$GENEID) ] 75 | info$PRECEDESYMBOL <- id2Symbol[ as.character(info$PRECEDEID) ] 76 | info$FOLLOWSYMBOL <- id2Symbol[ as.character(info$FOLLOWID) ] 77 | ans <- info[, c("names", "seqnames", "start", "LOCATION", "GENESYMBOL", "GENEID")] 78 | names(ans)[3] <- "position" 79 | temp <- which(names(df)%in%c("chr","pos")) 80 | out <- merge(ans, df[, -temp, drop=FALSE], by.x="names", by.y="rsid") 81 | out 82 | } 83 | 84 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | export( 2 | snp, 3 | setupSNP, 4 | plotMissing, 5 | tableHWE, 6 | WGstats, 7 | WGassociation, 8 | scanWGassociation, 9 | getSignificantSNPs, 10 | Bonferroni.sig, 11 | association, 12 | additive, 13 | codominant, 14 | dominant, 15 | overdominant, 16 | odds, 17 | recessive, 18 | pvalues, 19 | intervals, 20 | int, 21 | interactionPval, 22 | make.geno, 23 | haplo.interaction, 24 | permTest, 25 | qqpval, 26 | GenomicControl, 27 | maxstat, 28 | LD, 29 | LDplot, 30 | LDtable, 31 | getNiceTable, 32 | getGeneSymbol, 33 | Table.N.Per, 34 | Table.mean.se, 35 | is.Monomorphic, 36 | sortSNPs, 37 | related 38 | ) 39 | 40 | import("haplo.stats") 41 | import("survival") 42 | import("mvtnorm") 43 | import("ggplot2") 44 | importFrom("tidyr", "gather") 45 | importFrom("poisbinom", "dpoisbinom" ) 46 | importFrom("parallel", "mclapply") 47 | importFrom("plyr", "arrange", "count") 48 | importFrom("grDevices", "heat.colors") 49 | # importFrom("Biostrings", "getSeq", "DNA_BASES") 50 | importFrom("graphics", "abline", "axis", "barplot", "hist", "image", 51 | "layout", "legend", "lines", "matplot", "par", "segments", 52 | "strheight", "strwidth", "text", "title") 53 | importFrom("stats", ".getXlevels", "AIC", "anova", "as.formula", 54 | "binomial", "complete.cases", "dbeta", "density", 55 | "fisher.test", "gaussian", "glm", "glm.control", "glm.fit", 56 | "is.empty.model", "median", "model.extract", "model.matrix", 57 | "model.offset", "model.response", "model.weights", 58 | "na.omit", "nlm", "optimize", "pchisq", "pnorm", 59 | "printCoefmat", "qbeta", "qchisq", "qnorm", "relevel", 60 | "runif", "sd", "terms", "reorder") 61 | 62 | 63 | S3method(summary,haplo.glm) 64 | S3method(intervals,haplo.glm) 65 | S3method(intervals,dif) 66 | S3method(intervals,or) 67 | 68 | S3method(print,haploOut) 69 | S3method(print,intervals) 70 | S3method(print,snpOut) 71 | S3method(print,tableHWE) 72 | 73 | S3method(codominant,default) 74 | S3method(dominant,default) 75 | S3method(recessive,default) 76 | S3method(overdominant,default) 77 | S3method(additive,default) 78 | 79 | S3method(codominant,snp) 80 | S3method(dominant,snp) 81 | S3method(recessive,snp) 82 | S3method(overdominant,snp) 83 | S3method(additive,snp) 84 | S3method(plot,snp) 85 | S3method(reorder,snp) 86 | S3method(summary,snp) 87 | S3method(print,snp) 88 | S3method("[",snp) 89 | 90 | S3method(print,summary.snp) 91 | 92 | 93 | S3method(plot,SNPinteraction) 94 | S3method(print,SNPinteraction) 95 | 96 | S3method(summary,setupSNP) 97 | S3method("[",setupSNP) 98 | S3method("[[<-",setupSNP) 99 | S3method("[<-",setupSNP) 100 | S3method("$<-",setupSNP) 101 | S3method(labels,setupSNP) 102 | S3method(plot,setupSNP) 103 | 104 | 105 | S3method(summary,WGassociation) 106 | S3method(print,WGassociation) 107 | S3method(plot,WGassociation) 108 | S3method(pvalues,WGassociation) 109 | S3method(codominant,WGassociation) 110 | S3method(dominant,WGassociation) 111 | S3method(recessive,WGassociation) 112 | S3method(overdominant,WGassociation) 113 | S3method(additive,WGassociation) 114 | S3method("[",WGassociation) 115 | S3method(c,WGassociation) 116 | 117 | S3method(labels,WGassociation) 118 | 119 | S3method(print,permTest) 120 | S3method(plot,permTest) 121 | 122 | S3method(print,maxstat) 123 | S3method(maxstat,default) 124 | S3method(maxstat,table) 125 | S3method(maxstat,setupSNP) 126 | S3method(maxstat,matrix) 127 | 128 | S3method(LD,snp) 129 | S3method(LD,setupSNP) 130 | 131 | S3method(print,LD) 132 | S3method(print,LD.data.frame) 133 | S3method(summary,LD.data.frame) 134 | S3method(print,summary.LD.data.frame) 135 | 136 | ################## 137 | # hidden functions 138 | 139 | # as.snp 140 | # is.quantitative 141 | # is.snp 142 | # association.fit 143 | 144 | # crea.lab 145 | # expandsetupSNP 146 | # extractPval 147 | # extractPval.i 148 | # GenotypeRate 149 | # haplo.inter.fit 150 | 151 | # orderChromosome 152 | 153 | # SNPHWE 154 | # 155 | # table.corner 156 | # table.interaction 157 | # togeno 158 | # Table.N.Per 159 | # Table.mean.se 160 | 161 | # pvalTest 162 | # modelTest 163 | # assoc 164 | 165 | 166 | -------------------------------------------------------------------------------- /man/interactionPval.Rd: -------------------------------------------------------------------------------- 1 | \name{interactionPval} 2 | \alias{interactionPval} 3 | \alias{print.SNPinteraction} 4 | \alias{plot.SNPinteraction} 5 | 6 | \title{ Two-dimensional SNP analysis for association studies} 7 | \description{ 8 | Perform a two-dimensional SNP analysis (interaction) for association studies with 9 | possible allowance for covariate 10 | } 11 | \usage{ 12 | interactionPval(formula, data, quantitative = 13 | is.quantitative(formula, data), model = "codominant") 14 | } 15 | 16 | \arguments{ 17 | \item{formula}{a formula object. It might have either a continuous variable (quantitative traits) or a 18 | factor variable (case-control study) as the response on the left of the \code{~} 19 | operator and the terms corresponding to the covariates to be adjusted. A crude analysis 20 | is performed indicating \code{~}1 } 21 | \item{data}{ a required object of class 'setupSNP'. } 22 | \item{quantitative}{ logical value indicating whether the phenotype (those which is in the 23 | left of the operator ~ in 'formula' argument) is quantitative. The function 24 | 'is.quantitative' returns FALSE when the phenotype 25 | is a variable with two categories (i.e. indicating case-control status). Thus, 26 | it is not a required argument but it may be modified by the user.} 27 | \item{model}{a character string specifying the type of genetic model (mode of inheritance). This 28 | indicates how the genotypes should be collapsed. Possible 29 | value are "codominant", "dominant", "recessive", "overdominant" or "log-additive". The default 30 | is "codominant". Only the first words are required, e.g "co", "do", "re", "ov", "log"} 31 | } 32 | \details{ 33 | The 'interactionPval' function calculates, for each pair of SNPs (i,j), the likelihood underling the null model L0, 34 | the likelihood under each of the single-SNP, L(i) and L(j), the likelihood under an additive SNP 35 | model La(i,j), and the likelihood under a full SNP model (including SNP-SNP interaction), Lf(i,j). 36 | 37 | The upper triangle in matrix from this function contains the p values for the interaction (epistasis) 38 | log-likelihood ratio test, LRT, LRTij = -2 (log Lf(i,j) - log La(i,j)) 39 | 40 | The diagonal contains the p values from LRT for the crude effect of each SNP, LRTii = -2 (log L(i) - log L0) 41 | 42 | The lower triangle contains the p values from LRT comparing the two-SNP additive likelihood to the best 43 | of the single-SNP models, LRTji = -2 (log La(i,j) - log max(L(i),L(j))) 44 | 45 | In all cases the models including the SNPs are adjusted by the covariates indicated in the 'formula' 46 | argument. This method is used either for quantitative traits and dicotomous variables (case-control studies). 47 | 48 | } 49 | \value{ 50 | The 'interactionPval' function returns a matrix of class 'SNPinteraction' containing the p values corresponding to the 51 | different likelihood ratio tests above describe. 52 | 53 | Methods defined for 'SNPinteraction' objects are provided for print and plot. The plot method uses 'image' to plot a grid 54 | of p values. The upper triangle contains the interaction (epistasis) p values from LRT. The content in the lower triangle 55 | is the p values from the LRT comparing the additive model with the best single model. The diagonal contains the main effects 56 | pvalues from LRT. The 'plot.SNPinteraction' function also allows the user to plot the SNPs sorted by genomic position and 57 | with the information about chromosomes as in the 'plotMissing' function. 58 | 59 | } 60 | 61 | \references{ 62 | JR Gonzalez, L Armengol, X Sole, E Guino, JM Mercader, X Estivill, V Moreno. 63 | SNPassoc: an R package to perform whole genome association studies. Bioinformatics, 64 | 2007;23(5):654-5. 65 | } 66 | 67 | \note{ two-dimensional SNP analysis on a dense grid can take a great deal of computer 68 | time and memory. 69 | } 70 | 71 | \seealso{\code{\link{setupSNP}}} 72 | 73 | \examples{ 74 | 75 | data(SNPs) 76 | datSNP<-setupSNP(SNPs,6:40,sep="") 77 | 78 | ansCod<-interactionPval(log(protein)~sex,datSNP) 79 | print(ansCod) 80 | plot(ansCod) 81 | 82 | } 83 | 84 | 85 | \keyword{utilities} -------------------------------------------------------------------------------- /man/snp.Rd: -------------------------------------------------------------------------------- 1 | \name{snp} 2 | \alias{snp} 3 | \alias{is.snp} 4 | \alias{as.snp} 5 | \alias{reorder.snp} 6 | \alias{summary.snp} 7 | \alias{plot.snp} 8 | \alias{dominant.snp} 9 | \alias{codominant.snp} 10 | \alias{recessive.snp} 11 | \alias{additive.snp} 12 | \alias{print.snp} 13 | \alias{[.snp} 14 | \alias{print.summary.snp} 15 | 16 | \title{SNP object} 17 | \description{ 18 | 19 | \code{snp} creates an snp object 20 | 21 | \code{is} returns \code{TRUE} if \code{x} is of class 'snp' 22 | 23 | \code{as} attempts to coerce its argument into an object of class 'snp' 24 | 25 | \code{reorder} change the reference genotype 26 | 27 | \code{summary} gives a summary for an object of class 'snp' including genotype 28 | and allele frequencies and an exact thest of Hardy-Weinberg 29 | equilibrium 30 | 31 | \code{plot} gives a summary for an object of class 'snp' including genotype 32 | and allele frequencies and an exact thest of Hardy-Weinberg 33 | equilibrium in a plot. Barplot or pie are allowed 34 | 35 | \code{[.snp} is a copy of [.factor modified to preserve all attributes 36 | 37 | } 38 | \usage{ 39 | 40 | snp(x, sep = "/", name.genotypes, reorder="common", 41 | remove.spaces = TRUE, allow.partial.missing = FALSE) 42 | 43 | is.snp(x) 44 | 45 | as.snp(x, ...) 46 | 47 | \method{additive}{snp}(o) 48 | 49 | } 50 | 51 | \arguments{ 52 | \item{x}{either an object of class 'snp' or an object to be converted to class 'snp'} 53 | \item{sep}{character separator used to divide alleles when \code{x} is a vector of strings where 54 | each string holds both alleles. The default is "/". See below for details.} 55 | \item{name.genotypes}{the codes for the genotypes. This argument may be useful when genotypes 56 | are coded using three different codes (e.g., 0,1,2 or hom1, het, hom2)} 57 | \item{reorder}{how should genotypes within an individual be reordered. Possible values are 58 | 'common' or 'minor'. The default is 59 | \code{reorder="common"}. In that case, alleles are sorted within each individual by 60 | common homozygous.} 61 | \item{remove.spaces}{logical indicating whether spaces and tabs will be 62 | removed from the genotypes before processing} 63 | \item{allow.partial.missing}{logical indicating whether one allele is 64 | permitted to be missing. When set to 'FALSE' both alleles 65 | are set to 'NA' when either is missing.} 66 | \item{o}{an object of class 'snp' to be coded as a linear covariate: 0,1,2} 67 | \item{...}{optional arguments} 68 | } 69 | \details{ 70 | SNP objects hold information on which gene or marker alleles were 71 | observed for different individuals. For each individual, two alleles 72 | are recorded. 73 | 74 | The snp class considers the stored alleles to be unordered , i.e., "C/T" 75 | is equivalent to "T/C". It assumes that the order of the alleles is not important. 76 | 77 | When \code{snp} is called, \code{x} is a character vector, and it is 78 | assumed that each element encodes both alleles. In this case, if 79 | \code{sep} is a character string, \code{x} is assumed to be coded 80 | as "Allele1Allele2". If \code{sep} is a numeric value, it is 81 | assumed that character locations \code{1:sep} contain allele 1 and 82 | that remaining locations contain allele 2. 83 | 84 | \code{additive.snp} recodes the SNPs for being analyzed as a linear covariate (codes 0,1,2) 85 | 86 | 87 | } 88 | 89 | \references{ 90 | JR Gonzalez, L Armengol, X Sole, E Guino, JM Mercader, X Estivill, V Moreno. 91 | SNPassoc: an R package to perform whole genome association studies. Bioinformatics, 92 | 2007;23(5):654-5. 93 | } 94 | 95 | 96 | \value{ 97 | 98 | The snp class extends "factor" where the levels is a character vector of possible 99 | genotype values stored coded by \code{paste( allele1, "", allele2, sep="/")} 100 | } 101 | 102 | 103 | 104 | \seealso{ 105 | \code{\link{association}} 106 | } 107 | 108 | \examples{ 109 | # some examples of snp data in different formats 110 | 111 | dat1 <- c("21", "21", "11", "22", "21", 112 | "22", "22", "11", "11", NA) 113 | ans1 <- snp(dat1,sep="") 114 | ans1 115 | 116 | dat2 <- c("A/A","A/G","G/G","A/G","G/G", 117 | "A/A","A/A","G/G",NA) 118 | ans2 <- snp(dat2,sep="/") 119 | ans2 120 | 121 | dat3 <- c("C-C","C-T","C-C","T-T","C-C", 122 | "C-C","C-C","C-C","T-T",NA) 123 | ans3 <- snp(dat3,sep="-") 124 | ans3 125 | 126 | 127 | dat4 <- c("het","het","het","hom1","hom2", 128 | "het","het","hom1","hom1",NA) 129 | ans4 <- snp(dat4,name.genotypes=c("hom1","het","hom2")) 130 | ans4 131 | 132 | 133 | # summary 134 | summary(ans3) 135 | 136 | # plots 137 | 138 | plot(ans3) 139 | plot(ans3,type=pie) 140 | plot(ans3,type=pie,label="SNP 10045") 141 | 142 | } 143 | \keyword{utilities} 144 | 145 | -------------------------------------------------------------------------------- /R/interactionPval.R: -------------------------------------------------------------------------------- 1 | `interactionPval` <- 2 | function (formula, data, quantitative = is.quantitative(formula, 3 | data), model="codominant") 4 | { 5 | 6 | if(!inherits(data,"setupSNP")) 7 | stop("data must be an object of class 'setupSNP'") 8 | 9 | cl <- match.call() 10 | mf <- match.call(expand.dots = FALSE) 11 | m0 <- match(c("formula", "data"), names(mf), 0) 12 | mf <- mf[c(1, m0)] 13 | 14 | # 15 | # aceptar respuesta sin formula 16 | # 17 | if( length(grep("~",mf[[2]]))==0){ 18 | formula<-as.formula(paste(mf[[2]],"~1",sep="")) 19 | formula.1<- list(formula) 20 | mode(formula.1)<-"call" 21 | mf[2]<-formula.1 22 | } 23 | 24 | mf[[1]] <- as.name("model.frame") 25 | mf <- eval(mf, parent.frame()) 26 | mt <- attr(mf, "terms") 27 | temp0 <- as.character(mt) 28 | adj <- paste(temp0[2], temp0[1], temp0[3]) 29 | 30 | fam <- ifelse(quantitative,"gaussian","binomial") 31 | 32 | model.type <- c("codominant", "dominant", "recessive", 33 | "overdominant","log-additive") 34 | m <- charmatch(model, model.type, nomatch = 0) 35 | if (m == 0) 36 | stop("model must be codominant dominant recessive overdominant or log-additive") 37 | 38 | modelOK<-switch(m,codominant,dominant,recessive,overdominant,additive) 39 | 40 | 41 | colSNPs<-attr(data,"colSNPs") 42 | if (is.vector(colSNPs) & length(colSNPs) > 0) 43 | dataSNPs.sel <- data[, colSNPs] 44 | else stop("data should have an attribute called 'colSNPs'. Try again 'setupsNP' function") 45 | 46 | dataSNPs <- data.frame(lapply(dataSNPs.sel,function(x,model.sel) model.sel(x),model.sel=modelOK)) 47 | 48 | SNPs.label <- names(dataSNPs) 49 | 50 | dimnames(data)[[1]]<-1:nrow(data) 51 | 52 | i<-1 53 | n<-ncol(dataSNPs) 54 | 55 | pval<-matrix(NA,nrow=n,ncol=n) 56 | 57 | 58 | while (i<=n) 59 | { 60 | nas <- sum(!is.na(dataSNPs[, i])) 61 | n.nas <- length(dataSNPs[, i]) 62 | 63 | if (is.Monomorphic(dataSNPs[, i])) 64 | { 65 | pval[i,]<-rep(NA,n) 66 | } 67 | 68 | else if (nas/n.nas<0.80) 69 | { 70 | pval[i,]<-rep(NA,n) 71 | } 72 | 73 | else if (length(table(dataSNPs[, i]))==1) 74 | { 75 | pval[i,]<-rep(NA,n) 76 | } 77 | 78 | else 79 | { 80 | j<-i+1 81 | while (j<=n) 82 | { 83 | nas <- sum(!is.na(dataSNPs[, j])) 84 | n.nas <- length(dataSNPs[, j]) 85 | 86 | if (is.Monomorphic(dataSNPs[, j])) 87 | { 88 | pval[i,j]<-NA 89 | } 90 | else if (nas/n.nas<0.80) 91 | { 92 | pval[i,j]<-NA 93 | } 94 | 95 | else if (length(table(dataSNPs[, j]))==1) 96 | { 97 | pval[i,j]<-NA 98 | } 99 | 100 | 101 | else 102 | { 103 | mod.i <- glm(as.formula(paste(adj, "+ dataSNPs[, i]*dataSNPs[, j]")), 104 | data = data, family=fam) 105 | subset <- 1:nrow(data) %in% as.numeric(rownames(mod.i$model)) 106 | mod.a <- glm(as.formula(paste(adj, "+ dataSNPs[, i]+dataSNPs[, j]")), 107 | data = data, family=fam, subset=subset) 108 | 109 | mod.b1 <- glm(as.formula(paste(adj, "+ dataSNPs[, i]")), data = data, 110 | family=fam,subset=subset) 111 | mod.b2 <- glm(as.formula(paste(adj, "+ dataSNPs[, j]")), data = data, 112 | family=fam,subset=subset) 113 | 114 | if (quantitative) 115 | pval[i,j]<-anova(mod.a,mod.i,test="F")$"Pr(>F)"[2] 116 | else 117 | { 118 | t1 <- anova(mod.a, mod.i, test="Chisq") 119 | pval[i,j] <- t1[2, grep("^P.*Chi",names(t1))] 120 | } 121 | 122 | if(mod.b1$aic<=mod.b2$aic) 123 | { 124 | if (quantitative) 125 | pval[j,i] <- anova(mod.b1, mod.a, test="F")$"Pr(>F)"[2] 126 | else 127 | { 128 | t1 <- anova(mod.b1, mod.a, test="Chisq") 129 | pval[j,i] <- t1[2, grep("^P.*Chi",names(t1))] 130 | } 131 | } 132 | else 133 | { 134 | if (quantitative) 135 | pval[j,i] <- anova(mod.b2, mod.a, test="F")$"Pr(>F)"[2] 136 | else 137 | { 138 | t1 <- anova(mod.b2, mod.a, test="Chisq") 139 | pval[j,i] <- t1[2, grep("^P.*Chi",names(t1))] 140 | } 141 | } 142 | } 143 | j<-j+1 144 | } 145 | 146 | mod.0 <- glm(as.formula(paste(adj, "+ dataSNPs[, i]")), data = data, 147 | family="gaussian") 148 | subset <- 1:nrow(data) %in% as.numeric(rownames(mod.0$model)) 149 | mod.b <- glm(as.formula(paste(adj)), data = data, 150 | family="gaussian",subset=subset) 151 | pval[i,i] <- anova(mod.b,mod.0,test="F")$"Pr(>F)"[2] 152 | } 153 | 154 | i<-i+1 155 | } 156 | 157 | dimnames(pval)[[2]]<-SNPs.label 158 | dimnames(pval)[[1]]<-SNPs.label 159 | 160 | class(pval)<-"SNPinteraction" 161 | attr(pval,"model") <- model.type[m] 162 | attr(pval,"gen.info")<-attr(data,"gen.info") 163 | pval 164 | } 165 | 166 | -------------------------------------------------------------------------------- /R/snp.R: -------------------------------------------------------------------------------- 1 | `snp` <- 2 | function (x, sep = "/", name.genotypes, reorder="common", remove.spaces = TRUE, 3 | allow.partial.missing = FALSE) 4 | { 5 | 6 | if (is.snp(x)) { 7 | 8 | object <- x 9 | 10 | } else { 11 | 12 | if (sum(is.na(x)) == length(x)) { 13 | object <- rep(NA, length(x)) 14 | attr(object, "allele.names") <- NULL 15 | class(object) <- c("snp","logical") 16 | return(object) 17 | } 18 | 19 | if(missing(name.genotypes)) { 20 | alleles <- NULL 21 | x.d <- dim(x) 22 | x <- as.character(x) 23 | dim(x) <- x.d 24 | x[is.na(x)] <- "" 25 | 26 | if (remove.spaces) { 27 | xdim <- dim(x) 28 | x <- gsub("[ \t]", "", x) 29 | dim(x) <- xdim 30 | } 31 | 32 | if (!is.null(dim(x)) && ncol(x) > 1) { 33 | parts <- x[, 1:2] 34 | } else { 35 | if (sep == "") { 36 | sep <- 1 37 | } 38 | if (is.character(sep)) { 39 | part.list <- strsplit(x, sep) 40 | part.list[sapply(part.list, length) == 0] <- NA 41 | half.empties <- lapply(part.list, length) == 1 42 | part.list[half.empties] <- lapply(part.list[half.empties], c, NA) 43 | empties <- is.na(x) | lapply(part.list, length) == 0 44 | part.list[empties] <- list(c(NA, NA)) 45 | parts <- matrix(unlist(part.list), ncol = 2, byrow = TRUE) 46 | } else if (is.numeric(sep)) { 47 | # parts <- cbind(substring(x, 1, sep), substring(x, sep + 1, 9999)) 48 | #.19/08/2022.# parts <- cbind(substring(x, 1, sep), substring(x, sep + 1, nchar(x))) 49 | # Control sep length to avoid segfault (19/08/2022) 50 | allele_1 <- ifelse( sep > nchar(x), 51 | substring(x, 1, nchar(x)), 52 | substring(x, 1, sep) ) 53 | allele_2 <- ifelse( sep + 1 > nchar(x), 54 | "", 55 | substring(x, sep + 1, nchar(x)) ) 56 | 57 | if( length(allele_1) == length(allele_2) ) { 58 | parts <- cbind( allele_1, allele_2) 59 | } else { 60 | stop(paste("Error splitting alleles with sep=", sep)) 61 | } 62 | 63 | # parts <- cbind( ifelse( sep > nchar(x), 64 | # substring(x, 1, nchar(x)), 65 | # substring(x, 1, sep) ), 66 | # ifelse( sep + 1 > nchar(x), 67 | # "", 68 | # substring(x, sep + 1, nchar(x)) ) 69 | # ) 70 | } else { 71 | stop(paste("I don't know how to handle sep=", sep)) 72 | } 73 | } 74 | 75 | mode(parts) <- "character" 76 | temp <- grep("^[ \t]*$", parts) 77 | parts[temp] <- NA 78 | if (!allow.partial.missing) { 79 | parts[is.na(parts[, 1]) | is.na(parts[, 2]), ] <- c(NA, NA) 80 | } 81 | alleles <- unique(c(na.omit(parts))) 82 | 83 | if( length(alleles) > 2 ) { 84 | stop("SNP must have only two alleles") 85 | } 86 | 87 | 88 | tmp <- ifelse(is.na(parts[, 1]) & is.na(parts[, 2]), 89 | NA, 90 | apply(parts, 1, paste, collapse = "/") ) 91 | object <- factor(tmp) 92 | 93 | ll <- levels(object) <- na.omit(levels(object)) 94 | 95 | if ( length(ll)==4 ) { 96 | object[object==ll[3]] <- ll[2] 97 | object <- factor(object) 98 | } 99 | 100 | control <- paste( rep(alleles[1], 2), collapse="/") %in% ll 101 | 102 | if ( sum(control)==0 & length(ll)==3 ) { 103 | object[ object == ll[2] ] <- ll[1] 104 | object <- factor(object) 105 | } 106 | 107 | 108 | control <- paste( rep(alleles[2], 2), collapse="/") %in% ll 109 | 110 | if (sum(control) == 0 & length(ll) == 3) { 111 | object[ object == ll[3] ] <- ll[2] 112 | object <- factor(object) 113 | } 114 | 115 | if (length(object)==sum(is.na(object))) { 116 | stop("choose the correct character separator to divide alleles") 117 | } 118 | 119 | class(object) <- c("snp","factor") 120 | object <- reorder.snp(object, ref = reorder) 121 | attr(object, "allele.names") <- alleles 122 | 123 | } else { 124 | 125 | if (any(is.na(match(x[!is.na(x)],name.genotypes)))){ 126 | stop("'name.genotypes' must match with the observed genotypes") 127 | } 128 | 129 | x[ x==name.genotypes[1] ] <- "A/A" 130 | x[ x==name.genotypes[2] ] <- "A/B" 131 | x[ x==name.genotypes[3] ] <- "B/B" 132 | object <- as.factor(x) 133 | attr(object, "allele.names") <- c("A","B") 134 | class(object) <- c("snp","factor") 135 | } 136 | } 137 | 138 | object 139 | 140 | } 141 | 142 | -------------------------------------------------------------------------------- /man/WGassociation.Rd: -------------------------------------------------------------------------------- 1 | \name{WGassociation} 2 | \alias{WGassociation} 3 | \alias{WGstats} 4 | \alias{print.WGassociation} 5 | \alias{summary.WGassociation} 6 | \alias{labels.WGassociation} 7 | \alias{pvalues} 8 | \alias{pvalues.WGassociation} 9 | \alias{codominant.WGassociation} 10 | \alias{dominant.WGassociation} 11 | \alias{recessive.WGassociation} 12 | \alias{overdominant.WGassociation} 13 | \alias{additive.WGassociation} 14 | \alias{[.WGassociation} 15 | 16 | \title{Whole genome association analysis} 17 | 18 | \description{ 19 | This function carries out a whole genome association analysis between the SNPs and 20 | a dependent variable (phenotype) under five different genetic models (inheritance patterns): 21 | codominant, dominant, recessive, overdominant and log-additive. The phenotype may be quantitative 22 | or categorical. In the second case (e.g. case-control studies) this variable must be of class 'factor' 23 | with two levels. 24 | 25 | } 26 | \usage{ 27 | WGassociation(formula, data, model = c("all"), 28 | quantitative = is.quantitative(formula, data), 29 | genotypingRate = 80, level = 0.95, ...) 30 | } 31 | 32 | \arguments{ 33 | \item{formula}{either a symbolic description of the model to be fited (a formula object) without the SNP 34 | or the name of response variable in the case of fitting single models (e.g. unadjusted models). 35 | It might have either a continuous variable (quantitative traits) or a 36 | factor variable (case-control studies) as the response on the left of the \code{~} 37 | operator and terms with additional covariates on the right of the \code{~} operator may be 38 | added to fit an adjusted model (e.g., \code{~}var1+var2+...+varN+SNP). See details} 39 | \item{data}{a required dataframe of class 'setupSNP' containing the variables in the model and the SNPs} 40 | \item{model}{a character string specifying the type of genetic model (mode of inheritance) for the SNP. 41 | This indicates how the genotypes should be collapsed. Possible 42 | values are "codominant", "dominant", "recessive", "overdominant", "log-additive" or "all". The default 43 | is "all" that fits the 5 possible genetic models. Only the first words are required, e.g "co", "do", etc.} 44 | \item{quantitative}{logical value indicating whether the phenotype (that which is in the 45 | left of the operator ~ in 'formula' argument) is quantitative. The function 46 | 'is.quantitative' returns FALSE when the phenotype 47 | is a variable with two categories (i.e. indicating case-control status). Thus, 48 | it is not a required argument but it may be modified by the user.} 49 | \item{genotypingRate}{ minimum percentage of genotype rate for a given SNP to be included in the analysis. 50 | Default is 80\%.} 51 | \item{level}{signification level for confidence intervals. Defaul 95\%.} 52 | \item{...}{Other arguments to be passed through glm function} 53 | } 54 | 55 | \details{ 56 | This function assesses the association between the response variable included in the left side in 57 | the 'formula' and the SNPs included in the 'data' argument adjusted by those variables included 58 | in the right side of the 'formula'. Different genetic models may be analyzed using 'model' argument. 59 | } 60 | 61 | 62 | \value{ 63 | An object of class 'WGassociation'. 64 | 65 | 'summary' returns a summary table by groups defined in info (genes/chromosomes). 66 | 67 | 'WGstats' returns a detailed output, similar to the produced by \code{\link{association}}. 68 | 69 | 'pvalues' and 'print' return a table of p-values for each genetic model for each SNP. 70 | The first column indicates whether a problem with genotyping is present. 71 | 72 | 'plot' produces a plot of p values in the -log scale. See \code{\link{plot.WGassociation}} for 73 | further details. 74 | 75 | 'labels' returns the names of the SNPs analyzed. 76 | 77 | The functions 'codominat', 'dominant', 'recessive', 'overdominant' and 'additive' 78 | are used to obtain the p values under these genetic models. 79 | 80 | See examples for further illustration about all previous issues. 81 | 82 | } 83 | 84 | \references{ 85 | JR Gonzalez, L Armengol, X Sole, E Guino, JM Mercader, X Estivill, V Moreno. 86 | SNPassoc: an R package to perform whole genome association studies. Bioinformatics, 87 | 2007;23(5):654-5. 88 | } 89 | 90 | 91 | \seealso{ \code{\link{getSignificantSNPs}} \code{\link{association}} 92 | \code{\link{WGstats}} \code{\link{setupSNP}} 93 | \code{\link{plot.WGassociation}}} 94 | 95 | \examples{ 96 | data(SNPs) 97 | datSNP<-setupSNP(SNPs,6:40,sep="") 98 | ansAll<-WGassociation(protein~1,data=datSNP,model="all") 99 | 100 | # In that case the formula is not required. You can also write: 101 | # ansAll<-WGassociation(protein,data=datSNP,model="all") 102 | 103 | 104 | #only codominant and log-additive 105 | ansCoAd<-WGassociation(protein~1,data=datSNP,model=c("co","log-add")) 106 | 107 | #for printing p values 108 | print(ansAll) 109 | print(ansCoAd) 110 | 111 | #for obtaining a matrix with the p palues 112 | pvalAll<-pvalues(ansAll) 113 | pvalCoAd<-pvalues(ansCoAd) 114 | 115 | # when all models are fitted and we are interested in obtaining 116 | # p values for different genetic models 117 | 118 | # codominant model 119 | pvalCod<-codominant(ansAll) 120 | 121 | # recessive model 122 | pvalRec<-recessive(ansAll) 123 | 124 | # and the same for additive, dominant or overdominant 125 | 126 | 127 | #summary 128 | summary(ansAll) 129 | 130 | #for a detailed report 131 | WGstats(ansAll) 132 | 133 | #for plotting the p values 134 | plot(ansAll) 135 | 136 | } 137 | 138 | \keyword{utilities} 139 | -------------------------------------------------------------------------------- /R/table.interaction.R: -------------------------------------------------------------------------------- 1 | `table.interaction` <- 2 | function(var, dep, adj = NULL, int, num.status, level) 3 | { 4 | # taula.int(Datos$XRCC1.81, Datos$grupo, Datos[,c("sexo", "rcal.dia")], Datos$n.edad) 5 | # taula.int(Datos$XRCC1.81, Datos$grupo, NULL, Datos$n.edad) 6 | 7 | if (num.status==0) #Categorical response variable 8 | { 9 | var <- as.factor(var) 10 | dep <- as.factor(dep) 11 | 12 | if (is.null(adj)) 13 | { 14 | 15 | m.t <- glm(dep~ as.numeric(var) + int, family = binomial) 16 | 17 | subset <- 1:length(var)%in%as.numeric(rownames(m.t$model)); 18 | 19 | m.b <- glm(dep~ var + int, subset = subset, family = binomial) 20 | m.int <- glm(dep~ var/int, subset = subset, family = binomial) 21 | m.t.int <- glm(dep~ as.numeric(var) * int, subset = subset, family = binomial) 22 | 23 | } 24 | else 25 | { 26 | m.t <- glm(dep~. + as.numeric(var) + int, family = binomial, data=adj) 27 | 28 | subset <- 1:length(var)%in%as.numeric(rownames(m.t$model)); 29 | 30 | m.b <- glm(dep~. + var + int, subset = subset, family = binomial, data=adj) 31 | m.int <- glm(dep~. + var/int, subset = subset, family = binomial, data=adj) 32 | m.t.int <- glm(dep~. + as.numeric(var) * int, subset = subset, family = binomial, data=adj) 33 | 34 | } 35 | 36 | var.int <- factor(paste(levels(var)[var], levels(int)[int]), levels = outer(levels(var), levels(int), paste), 37 | exclude = c(paste(levels(var), ""), paste("", levels(int)), paste(" "))) 38 | 39 | ta <- table(var.int[subset], dep[subset]) 40 | 41 | # Matriu de coeficients i cov 42 | 43 | mat.coef <- merge(m.int$coef, summary(m.int)$coef, by=0, all.x=TRUE, sort=FALSE) 44 | nom.pos <- data.frame(names(m.int$coef), ordre=1:length(m.int$coef)) 45 | mat.ordre <- merge(nom.pos, mat.coef, by.x=1, by.y=1, all.x=TRUE, sort=FALSE) 46 | mat.ordre <- mat.ordre[order(mat.ordre$ordre),] 47 | 48 | a <- as.matrix(mat.ordre[,c("Estimate")]) 49 | se <- as.matrix(mat.ordre[,c("Std. Error")]) 50 | mat <- cbind(a, se) 51 | selec <- dim(mat)[1.] - (length(levels(int)) - 1.) * length(levels(var)) 52 | o <- (selec + 1.):dim(mat)[1.] 53 | k <- matrix(nrow = length(levels(var)), ncol = 3.) 54 | k[, 1.] <- 1. 55 | taula <- cbind(exp(mat[o, 1.]), exp(mat[o, 1.] - 1.96 * mat[o, 2.]), exp(mat[o, 1.] + 1.96 * mat[o, 2.])) 56 | taula[taula > 999.] <- NA 57 | ktaula <- rbind(k, round(taula, 2.)) 58 | 59 | ktaula <- cbind(ta, ktaula) 60 | 61 | i <- 1; 62 | j <- 1; 63 | step <- length(levels(var)); 64 | taula.int <- NULL; 65 | while (i <= nrow(ktaula)) 66 | { 67 | aux <- ktaula[i:(i+step-1),]; 68 | colnames(aux)[3] <- levels(int)[j]; 69 | taula.int <- cbind(taula.int, aux); 70 | i <- i + step; 71 | j <- j + 1; 72 | } 73 | 74 | #Check if interaction pvalues are NA 75 | t1 <- anova(m.b, m.int, test = "Chi") 76 | pval <- t1[2, grep("^P.*Chi",names(t1))] 77 | if (is.na(pval)) 78 | { 79 | pval <- "NA" 80 | } 81 | else 82 | { 83 | pval <- format.pval(pval) 84 | } 85 | 86 | t2 <- anova(m.t, m.t.int, test = "Chi") 87 | pval.trend <- t2[2, grep("^P.*Chi",names(t2))] 88 | if (is.na(pval.trend)) 89 | { 90 | pval.trend <- "NA" 91 | } 92 | else 93 | { 94 | pval.trend <- format.pval(pval.trend) 95 | } 96 | rownames(taula.int) <- levels(var); 97 | list(table=taula.int,pval=pval,trend=pval.trend) 98 | } 99 | else #Continuous response variable 100 | { 101 | var <- as.factor(var) 102 | 103 | if (is.null(adj)) 104 | { 105 | m.t <- glm(dep~as.numeric(var) + int, family = gaussian) 106 | 107 | subset <- 1:length(var)%in%as.numeric(rownames(m.t$model)); 108 | 109 | m.b <- glm(dep~ var + int, subset = subset, family = gaussian) 110 | m.int <- glm(dep~ var/int, subset = subset, family = gaussian) 111 | m.t.int <- glm(dep~ as.numeric(var) * int, subset = subset, family = gaussian) 112 | } 113 | else 114 | { 115 | m.t <- glm(dep~. + as.numeric(var) + int, family = gaussian, data=adj) 116 | 117 | subset <- 1:length(var)%in%as.numeric(rownames(m.t$model)); 118 | 119 | m.b <- glm(dep~. + var + int, subset = subset, family = gaussian, data=adj) 120 | m.int <- glm(dep~. + var/int, subset = subset, family = gaussian, data=adj) 121 | m.t.int <- glm(dep~. + as.numeric(var) * int, subset = subset, family = gaussian, data=adj) 122 | } 123 | var.int <- factor(paste(levels(var)[var], levels(int)[int]), levels = outer(levels(var), levels(int), paste), 124 | exclude = c(paste(levels(var), ""), paste("", levels(int)), paste(" "))) 125 | 126 | # Matriu de coeficients i cov 127 | 128 | mat.coef <- merge(m.int$coef, summary(m.int)$coef, by=0, all.x=TRUE, sort=FALSE) 129 | nom.pos <- data.frame(names(m.int$coef), ordre=1:length(m.int$coef)) 130 | mat.ordre <- merge(nom.pos, mat.coef, by.x=1, by.y=1, all.x=TRUE, sort=FALSE) 131 | mat.ordre <- mat.ordre[order(mat.ordre$ordre),] 132 | 133 | a <- as.matrix(mat.ordre[,c("Estimate")]) 134 | se <- as.matrix(mat.ordre[,c("Std. Error")]) 135 | mat <- cbind(dif=a, lo=a-(1.96*se), up=a+(1.96*se)) 136 | selec <- dim(mat)[1] - (length(levels(int)) - 1.) * length(levels(var)) 137 | o <- (selec + 1):dim(mat)[1] 138 | mat <- mat[o,]; 139 | 140 | i <- 1; 141 | while (i <= length(levels(var))) 142 | { 143 | mat <- rbind(c(0,NA,NA),mat); 144 | i <- i + 1; 145 | } 146 | 147 | res <- cbind(Table.mean.se(var.int, dep, subset)$tp, mat); 148 | 149 | i <- 1; 150 | j <- 1; 151 | step <- length(levels(var)); 152 | taula.int <- NULL; 153 | while (i <= nrow(res)) 154 | { 155 | aux <- res[i:(i+step-1),]; 156 | colnames(aux)[3] <- levels(int)[j]; 157 | taula.int <- cbind(taula.int, aux); 158 | i <- i + step; 159 | j <- j + 1; 160 | } 161 | 162 | pval <- anova(m.b, m.int, test = "F")$"Pr(>F)"[2]; 163 | if (is.na(pval)) 164 | { 165 | pval <- "NA"; 166 | } 167 | else 168 | { 169 | pval <- format.pval(pval); 170 | } 171 | 172 | pval.trend <- anova(m.t, m.t.int, test = "F")$"Pr(>F)"[2]; 173 | if (is.na(pval.trend)) 174 | { 175 | pval.trend <- "NA"; 176 | } 177 | else 178 | { 179 | pval.trend <- format.pval(pval.trend); 180 | } 181 | rownames(taula.int) <- levels(var); 182 | list(table=taula.int,pval=pval,trend=pval.trend); 183 | } 184 | } 185 | 186 | -------------------------------------------------------------------------------- /R/table.corner.R: -------------------------------------------------------------------------------- 1 | `table.corner` <- 2 | function (var, dep, adj = NULL, int = NULL, num.status, level) 3 | { 4 | 5 | 6 | glm2<- 7 | function (formula, family = gaussian, data, weights, subset, 8 | na.action, start = NULL, etastart, mustart, offset, control = glm.control(...), 9 | model = TRUE, method = "glm.fit", x = FALSE, y = TRUE, contrasts = NULL, 10 | ...) 11 | { 12 | # copia de glm 13 | # comentantdo mf$drop.unused.levels <- TRUE 14 | # para que no desaparezcan filas y columnas ver var-covar 15 | # 16 | call <- match.call() 17 | if (is.character(family)) 18 | family <- get(family, mode = "function", envir = parent.frame()) 19 | if (is.function(family)) 20 | family <- family() 21 | if (is.null(family$family)) { 22 | print(family) 23 | stop("'family' not recognized") 24 | } 25 | if (missing(data)) 26 | data <- environment(formula) 27 | mf <- match.call(expand.dots = FALSE) 28 | m <- match(c("formula", "data", "subset", "weights", "na.action", 29 | "etastart", "mustart", "offset"), names(mf), 0) 30 | mf <- mf[c(1, m)] 31 | # mf$drop.unused.levels <- TRUE 32 | mf[[1]] <- as.name("model.frame") 33 | mf <- eval(mf, parent.frame()) 34 | switch(method, model.frame = return(mf), glm.fit = 1, stop("invalid 'method': ", 35 | method)) 36 | mt <- attr(mf, "terms") 37 | Y <- model.response(mf, "any") 38 | if (length(dim(Y)) == 1) { 39 | nm <- rownames(Y) 40 | dim(Y) <- NULL 41 | if (!is.null(nm)) 42 | names(Y) <- nm 43 | } 44 | X <- if (!is.empty.model(mt)) 45 | model.matrix(mt, mf, contrasts) 46 | else matrix(, NROW(Y), 0) 47 | weights <- as.vector(model.weights(mf)) 48 | if (!is.null(weights) && !is.numeric(weights)) 49 | stop("'weights' must be a numeric vector") 50 | offset <- as.vector(model.offset(mf)) 51 | if (!is.null(weights) && any(weights < 0)) 52 | stop("negative weights not allowed") 53 | if (!is.null(offset)) { 54 | if (length(offset) == 1) 55 | offset <- rep(offset, NROW(Y)) 56 | else if (length(offset) != NROW(Y)) 57 | stop(gettextf("number of offsets is %d should equal %d (number of observations)", 58 | length(offset), NROW(Y)), domain = NA) 59 | } 60 | mustart <- model.extract(mf, "mustart") 61 | etastart <- model.extract(mf, "etastart") 62 | fit <- glm.fit(x = X, y = Y, weights = weights, start = start, 63 | etastart = etastart, mustart = mustart, offset = offset, 64 | family = family, control = control, intercept = attr(mt, 65 | "intercept") > 0) 66 | if (length(offset) && attr(mt, "intercept") > 0) { 67 | fit$null.deviance <- glm.fit(x = X[, "(Intercept)", drop = FALSE], 68 | y = Y, weights = weights, offset = offset, family = family, 69 | control = control, intercept = TRUE)$deviance 70 | } 71 | if (model) 72 | fit$model <- mf 73 | fit$na.action <- attr(mf, "na.action") 74 | if (x) 75 | fit$x <- X 76 | if (!y) 77 | fit$y <- NULL 78 | fit <- c(fit, list(call = call, formula = formula, terms = mt, 79 | data = data, offset = offset, control = control, method = method, 80 | contrasts = attr(X, "contrasts"), xlevels = .getXlevels(mt, 81 | mf))) 82 | class(fit) <- c("glm", "lm") 83 | fit 84 | } 85 | 86 | 87 | 88 | 89 | 90 | if (num.status == 0) { 91 | var <- as.factor(var) 92 | dep <- as.factor(dep) 93 | var.int <- factor(paste(levels(var)[var], levels(int)[int]), 94 | levels = outer(levels(var), levels(int), paste), 95 | exclude = c(paste(levels(var), ""), paste("", levels(int)), 96 | paste(" "))) 97 | if (is.null(adj)) { 98 | m.var.int <- glm2(dep ~ var.int, family = binomial) 99 | subset <- 1:length(var.int) %in% as.numeric(rownames(m.var.int$model)) 100 | m.b <- glm2(dep ~ NULL, subset = subset, family = binomial) 101 | } 102 | else { 103 | m.var.int <- glm2(dep ~ . + var.int, family = binomial, 104 | data = adj) 105 | subset <- 1:length(var.int) %in% as.numeric(rownames(m.var.int$model)) 106 | m.b <- glm2(dep ~ ., subset = subset, family = binomial, 107 | data = adj) 108 | } 109 | res <- cbind(table(var.int[subset], dep[subset]), intervals.or(m.var.int, 110 | level, m.b, var.int)$or.ic)[, 1:5] 111 | i <- 1 112 | j <- 1 113 | step <- length(levels(var)) 114 | taula.int <- NULL 115 | while (i <= nrow(res)) { 116 | aux <- res[i:(i + step - 1), ] 117 | colnames(aux)[3] <- levels(int)[j] 118 | taula.int <- cbind(taula.int, aux) 119 | i <- i + step 120 | j <- j + 1 121 | } 122 | rownames(taula.int) <- levels(var) 123 | colnames(taula.int)[1] <- colnames(taula.int)[3] 124 | colnames(taula.int)[2]<-c("") 125 | colnames(taula.int)[3]<-c("OR") 126 | colnames(taula.int)[6] <- colnames(taula.int)[8] 127 | colnames(taula.int)[7]<-c("") 128 | colnames(taula.int)[8]<-c("OR") 129 | 130 | taula.int 131 | } 132 | else { 133 | var <- as.factor(var) 134 | var.int <- factor(paste(levels(var)[var], levels(int)[int]), 135 | levels = outer(levels(var), levels(int), paste), 136 | exclude = c(paste(levels(var), ""), paste("", levels(int)), 137 | paste(" "))) 138 | if (is.null(adj)) { 139 | m.var.int <- glm2(dep ~ var.int, family = gaussian) 140 | subset <- 1:length(var.int) %in% as.numeric(rownames(m.var.int$model)) 141 | m.b <- glm2(dep ~ NULL, subset = subset, family = gaussian) 142 | } 143 | else { 144 | m.var.int <- glm2(dep ~ . + var.int, family = gaussian, 145 | data = adj) 146 | subset <- 1:length(var.int) %in% as.numeric(rownames(m.var.int$model)) 147 | m.b <- glm2(dep ~ ., subset = subset, family = gaussian, 148 | data = adj) 149 | } 150 | res <- cbind(Table.mean.se(var.int, dep, subset)$tp, 151 | intervals.dif(m.var.int, level, m.b, var.int, pval = FALSE)$m) 152 | i <- 1 153 | j <- 1 154 | step <- length(levels(var)) 155 | taula.int <- NULL 156 | while (i <= nrow(res)) { 157 | aux <- res[i:(i + step - 1), ] 158 | colnames(aux)[3] <- levels(int)[j] 159 | taula.int <- cbind(taula.int, aux) 160 | i <- i + step 161 | j <- j + 1 162 | } 163 | rownames(taula.int) <- levels(var) 164 | colnames(taula.int)[2] <- colnames(taula.int)[3] 165 | colnames(taula.int)[c(1,3)]<-c("","") 166 | colnames(taula.int)[8] <- colnames(taula.int)[9] 167 | colnames(taula.int)[c(7,9)]<-c("","") 168 | 169 | taula.int 170 | } 171 | } 172 | 173 | -------------------------------------------------------------------------------- /man/association.Rd: -------------------------------------------------------------------------------- 1 | \name{association} 2 | \alias{association} 3 | \alias{print.snpOut} 4 | 5 | \title{Association analysis between a single SNP and a given phenotype} 6 | 7 | \description{ 8 | This function carries out an association analysis between a single SNP and 9 | a dependent variable (phenotype) under five different genetic models (inheritance patterns): 10 | codominant, dominant, recessive, overdominant and log-additive. The phenotype may be quantitative 11 | or categorical. In the second case (e.g. case-control studies) this variable must be of class 'factor' 12 | with two levels. 13 | } 14 | 15 | \usage{ 16 | 17 | association(formula, data, model=c("all"), model.interaction= 18 | c("codominant"), subset, name.snp = NULL, quantitative = 19 | is.quantitative(formula,data), genotypingRate= 0, 20 | level = 0.95, ...) 21 | } 22 | 23 | \arguments{ 24 | \item{formula}{a symbolic description of the model to be fited (a formula object). 25 | It might have either a continuous variable (quantitative traits) or a 26 | factor variable (case-control studies) as the response on the left of the \code{~} 27 | operator and a term corresponding to the SNP on the right. This term must be 28 | of class \code{snp} (e.g. \code{~}snp(var), where var is a given SNP), and it is 29 | required. Terms with additional covariates on the right of the \code{~} operator may be 30 | added to fit an adjusted model (e.g., \code{~}var1+var2+...+varN+SNP). The formula 31 | allows to incorporate more than one object of class \code{snp}. In that case, 32 | the analysis is done for the first SNP which appears in the formula adjusted by the others 33 | covariates and other additional SNPs. } 34 | \item{data}{a required dataframe of class 'setupSNP' containing the variables in the model.} 35 | \item{model}{a character string specifying the type of genetic model (mode of inheritance) for the SNP. 36 | This indicates how the genotypes should be collapsed. Possible 37 | values are "codominant", "dominant", "recessive", "overdominant", "additive" or "all". The default 38 | is "all" that fits the 5 possible genetic models. Only the first words are required, e.g "co", "do", etc.} 39 | \item{model.interaction}{a character string specifying the type of genetic model (mode of inheritance) assumed for 40 | the SNP when it is included in a interaction term. Possible values are "codominant", 41 | "dominant", "recessive", "overdominant". The default is "codominant".} 42 | \item{subset}{an optional vector specifying a subset of observations to be used in the fitting process} 43 | \item{name.snp}{optional label of the SNP variable to be printed.} 44 | \item{quantitative}{logical value indicating whether the phenotype (that which is in the 45 | left of the operator ~ in 'formula' argument) is quantitative. The function 46 | 'is.quantitative' returns FALSE when the phenotype 47 | is a variable with two categories (i.e. indicating case-control status). Thus, 48 | it is not a required argument but it may be modified by the user.} 49 | \item{genotypingRate}{ minimum percentage of genotype rate for the SNP to be analyzed. 50 | Default is 0\% (e.g. all SNPs are analyzed). This parameter should not be 51 | changed. It is used in the function 'WGassociation'.} 52 | \item{level}{signification level for confidence intervals.} 53 | \item{...}{Other arguments to be passed through glm function} 54 | } 55 | 56 | \details{ 57 | This function should be called by the user when we are interested in analyzing an unique SNP. 58 | It is recommended to use \code{\link{WGassociation}} function when more than one SNP is studied. \cr 59 | 60 | } 61 | 62 | \value{ 63 | For each genetic model (codominant, dominant, recessive, overdominant, and log-additive) the function gives 64 | a matrix with sample size and percentages for each genotype, the Odds Ratio and its 95\% confidence interval 65 | (taking the most frequent homozygous genotype as the reference), the p-value corresponding to the likelihood ratio test obtained 66 | from a comparison with the null model, and the Akaike Information Criterion (AIC) of each genetic model. In the case 67 | of analyzing a quantitative trait, the function returns a matrix with sample size, mean and standard errors for each genotype, 68 | mean difference and its 95\% confidence interval with respect to the most frequent homozygous genotype, 69 | the p-value obtained from an overall gene effect and the Akaike Information Criterion (AIC) of each genetic model. 70 | 71 | When an interaction term (a categorical covariate with an SNP) is included in the model, three different tables are given. 72 | The first one correponds to the full interaction matrix where the ORs (or mean differences if a quantitative trait is analyzed) 73 | are expressed with respect to the non variant genotype and the first category of the covariate. The other two tables show the ORs 74 | and their 95\% confidence intervals for both marginal models. P values for interaction and trend are also showed in the output. 75 | 76 | } 77 | 78 | \references{ 79 | JR Gonzalez, L Armengol, X Sole, E Guino, JM Mercader, X Estivill, V Moreno. 80 | SNPassoc: an R package to perform whole genome association studies. Bioinformatics, 81 | 2007;23(5):654-5. 82 | 83 | Iniesta R, Guino E, Moreno V. Statistical analysis of genetic polymorphisms in epidemiological studies. 84 | Gac Sanit. 2005;19(4):333-41. 85 | 86 | Elston RC. Introduction and overview. Statistical methods in genetic epidemiology. Stat Methods Med Res. 87 | 2000;9:527-41. 88 | 89 | } 90 | 91 | 92 | \seealso{\code{\link{WGassociation}}} 93 | 94 | \examples{ 95 | data(SNPs) 96 | 97 | # first, we create an object of class 'setupSNP' 98 | datSNP<-setupSNP(SNPs,6:40,sep="") 99 | 100 | # case-control study, crude analysis 101 | association(casco~snp10001, data=datSNP) 102 | 103 | # case-control study, adjusted by sex and arterial blood pressure 104 | association(casco~sex+snp10001+blood.pre, data=datSNP) 105 | 106 | 107 | # quantitative trait, crude analysis 108 | association(log(protein)~snp10001,data=datSNP) 109 | # quantitative trait, adjusted by sex 110 | association(log(protein)~snp10001+sex,data=datSNP) 111 | 112 | 113 | # 114 | # Interaction analysis 115 | # 116 | 117 | # Interaction SNP and factor 118 | association(log(protein)~snp10001*sex+blood.pre, data=datSNP, 119 | model="codominant") 120 | 121 | # Interaction SNP and SNP (codominant and codominant) 122 | association(log(protein)~snp10001*factor(snp10002)+blood.pre, 123 | data=datSNP, model="codominant") 124 | 125 | # Interaction SNP and SNP (dominant and recessive) 126 | association(log(protein)~snp10001*factor(recessive(snp100019))+blood.pre, 127 | data=datSNP, model="dominant") 128 | 129 | 130 | } 131 | 132 | \keyword{utilities} 133 | 134 | -------------------------------------------------------------------------------- /inst/docs/changelog.txt: -------------------------------------------------------------------------------- 1 | v2.1-1 2 | 2024-10-25 3 | - Removed warnings and notes after checking the package using R-2.1.0 4 | 5 | v2.0-0 6 | 2020-08-23 7 | - Added dscore function 8 | - Added related function 9 | - Changed email address after receiving Brian's email 10 | 11 | v1.9-2 12 | 2014-04-22 13 | - fixed bug in tableHWE function (see e-mail from Martin Rijlaarsdam) 14 | 15 | v1.9-1 16 | 2013-09-05 (after being removed from CRAN) 17 | - mclapply with parallel 18 | - fixed problems with gfotran (scanWGassociation changes) 19 | 20 | v1.9-0 21 | 2013-09-05 22 | - mclapply 23 | 24 | v1.8-5 25 | 2012-12-17 26 | - bug fixed: spandSetupSNP function deals properly with 100% missing genotypes 27 | 28 | v1.8-4 29 | 2012-05-12 30 | - change: scanWGassociation (.Internal problems see e-mail from B. Ripley) 31 | - change: other Warnings after checking the package using R-1.5.1 32 | 33 | v1.8-3 34 | 2012-05-09 35 | - anova.glm labels (see Peter's e-mail) also changed in interactionPval function 36 | 37 | v1.8-2 38 | 2012-05-03 39 | - Dropped dependency on goat by adding trim and interleave 40 | 41 | v1.8-1 42 | 2011-11-22 (Changes to R-2.14.0) 43 | - require has been removed from firstlib function 44 | - HapMap.rda has been resaved using better compression tools::resaveRdaFiles() 45 | - anova.glm does not returns "P(>|Chi|)"[2] -> changed to t1[2, grep("^P.*Chi",names(t1))] (Peter's e-mail) 46 | - functions changed: intervals.dif, intervals.or, table.interaction, interactionPval 47 | 48 | v1.8-0 49 | 2011-09-14 50 | - bug fixed: problems when only 1 SNP is analyzed (see Victor's e-mail) 51 | - bug fixed: summary.snp does not compute the HWE when only two genotypes are available (see Victor's e-mail) 52 | 53 | v1.7-0 54 | 2011-02-28 55 | - bug fixed: problem with heterozygous codes (see Ross Whetten's e-mail) 56 | - added: a new function to compute the exact probability of a genetic score 57 | 58 | v1.6-1 59 | 2010-09-26 60 | - added: some extra-code to consider G-statistic equal to 0 after Jiexun's comment 61 | 62 | v1.6-0 63 | 2009-07-17 64 | - changed: labels of plots for p-values (-log[10]) 65 | 66 | v1.5-9 67 | 2009-07-04 68 | - changes in [.WGassociation 69 | - added: GenomicControl now deals with 2 d.f. chi-square test (e.g., codominant) 70 | - changes in plot.WGassociation 71 | 72 | v1.5-8 73 | 2009-03-09 74 | - changed: NAMESPACE includes methods for LD 75 | 76 | v1.5-7 77 | 2009-03-04 78 | - print.LD file added 79 | 80 | v1.5-6 81 | 2009-02-06 82 | - Changes in .Rd files to pass 'Rd files against version 2 parser' (R version 2.9.0) 83 | 84 | v1.5-5 85 | 2008-12-12 86 | - changed: 'association.fit' and 'association' to incorporate the possibility of using other arguments included in 'glm' function such as 'weigths'. Question posted by Ane Marie Closter 87 | 88 | -changed: 'plot.WGassociation' to show -log_10(p-values) instead of -log_10(p-values). Question posted by 89 | 90 | 91 | v1.5-4 92 | 2008-05-29 93 | - changed: 'association.fit' problems with factors and levels to determine when a snp is monomorphic [table(as.character(var)) instead of length(levels(var)) ] 94 | 95 | 96 | v1.5-3 97 | 2008-04-29 98 | - modified: intervals.haplo.glm.R 99 | - added: summary and print for LD objects 100 | - modified: Bonferroni.sig (number of tests when no monomorphic SNPs) 101 | - added: 'gdata' dependence 102 | 103 | v1.5-2 104 | 2008-04-22 105 | - added: LD functions adapted from LDplot in package genetics by Gregory Warnes et al. 106 | 107 | v1.5-1 108 | 2008-03-27 109 | - change in tableHWE to correct deal with strata option 110 | - expandsetupSNP.R has been modified 111 | - summary.snp has been modified and print.summary.snp has been created and added to the NAMESPACE 112 | 113 | v1.4-9 114 | 2007-10-16 115 | - modified: expandsetupSNP.R to show alleles sorted by frequency when summary of an object of class 'setupSNP' is executed 116 | - changed: e-mail address of the maintainer 117 | - removed: obsolete markup \non_function{} 118 | 119 | v1.4-8 120 | 2007-05-21 121 | - mvtnorm is required (change in DESCRIPTION, NAMESPACE and firstlib.R) 122 | - added: new generic function maxstat (to compute asymptotic p values for max-statistic) and some methods 123 | - added: function for inspecting overdispersion due to population substructure (qqpval) 124 | - added: function for correcting the p values using genomic control (GenomiControl) 125 | - added: example in the Rd file for odds function 126 | - added: argument nIndiv in association.fit (to deal with genotypeRate properly). association has also been changed (look for 'nSubject') 127 | - minor change in the function snp (when snp is applied to an object of class snp) 128 | - added: reference of SNPassoc to some .Rd files 129 | 130 | v1.4-7 131 | 2007-05-17 132 | - bug fixed: overdominant returned wrong levels (thanks to Nicholas Orr) 133 | 134 | 2007-05-02 135 | - bug fixed: [<-.setupSNP added any variable as new SNP, but could be a covariate 136 | - bug fixed: odds returns NA for monomorphic SNPs 137 | - bug fixed: association.fit detected monomorphic SNPs that were not so 138 | 139 | 2007-04-02 140 | -bug fixed: c.WGassociation when more than 2 files were combined 141 | -bug fixed: [.WGassociation when rows/columns were selected by name wrong selection was done 142 | -bug fixed: [.WGassociation now also subsets permuted SNPs if available 143 | -changed: [.setupSNP should be faster and preserve snp attributes 144 | -changed: glm2 in table.corner updated to avoid warnings if response was factor 145 | -new function odds to extract odds ratios, 95% CI and p values for a given model from an WGassociation object 146 | -new function [.snp, version of factor that preserves attributes 147 | -new function print.snp 148 | 149 | 150 | v1.4-6 151 | 2007-03-22 152 | - change in 'association' function to deal with SNP with 0% of genotype rate (e.g. all missing) when ajusted models are fitted 153 | 154 | v1.4-5 155 | 2007-03-20 156 | - bug fixed in the output labels for overdominant model 157 | 158 | v1.4-4 159 | 2007-03-02 160 | - bug fixed in the function 'intervals.haplo.glm' 161 | - added new functions: codominant.snp, dominant.snp, recessive.snp, overdominant.snp 162 | - [.WGassociation is able to select using logical statements 163 | - [.setupSNP recover 'allele.labels' from the SNPs 164 | - perm.Test changed options(warn=-1) to avoid waning messages to fit the beta distribution 165 | 166 | v1.4-3 167 | 2007-01-22 168 | - removed the file Makevars.win to ../src 169 | - changed SNPassoc.f to be compiled using g77 170 | - \usage sections for 'SNPassoc-internal' 'plot.WGassociation' and 'snp' have been fixed for S3 methods 171 | 172 | v1.4-2 173 | 2007-01-19 174 | - added the file Makevars.win to ../src 175 | 176 | v1.4-1 177 | 2007-01-17 178 | - minor changes in scanWGassociation 179 | - implemented rank truncated product in permTest function 180 | 181 | v1.4 182 | 2006-12-12 183 | - minor changes in print.WGassociation 184 | - zzz.R is replaced by firstlib.R 185 | - firstlib.R calls to a Fortran dll 186 | - created a Fortran program SNPassoc.f which is called from scanWGassociation 187 | - added a new argument to scanWGassociation function called "nperm" 188 | - added a new function called permTest to extract p values from permutation test obtained using scanWGassociation function 189 | - added methods for objects of class "permTest" 190 | 191 | v1.3 192 | 2006-11-27 193 | - snp, summary.snp and expandsetupSNP fixed to deal with SNP with 100% missing information 194 | 195 | 2006-11-26 196 | - manual for WGstats moved to WGassociation 197 | - summary.GWassociation (previous WGstats) fixed 198 | 199 | 2006-11-25 200 | - interactionPval now accepts only response and asumes formula response~1 201 | - summary.WGassociation and WGstats swapped 202 | - summary.WGassociation generates output even if no info is supplied (whole dataset) 203 | 204 | 2006-11-23 205 | - added methods for extraction and modification of setupSNP and WGassociation objects 206 | - genetic model functions generic 207 | - methods: default: recode (accepts snp, numeric, factor or character max 3 values) 208 | WGassociation: extracts pvalues for that model 209 | 210 | 211 | 212 | 213 | -------------------------------------------------------------------------------- /R/association.R: -------------------------------------------------------------------------------- 1 | `association` <- 2 | function (formula, data, model = c("all"), model.interaction = c("codominant"), 3 | subset, name.snp = NULL, quantitative = is.quantitative(formula, 4 | data), genotypingRate= 0, level = 0.95, ...) 5 | { 6 | cl <- match.call() 7 | mf <- match.call(expand.dots = FALSE) 8 | m0 <- match(c("formula", "data", "subset"), names(mf), 0) 9 | mf <- mf[c(1, m0)] 10 | mf[[1]] <- as.name("model.frame") 11 | mf <- eval(mf, parent.frame()) 12 | mt <- attr(mf, "terms") 13 | nSubject<-nrow(data) 14 | control <- unlist(lapply(mf, FUN = is.snp)) 15 | if (sum(control) == 0) { 16 | stop("a variable of class 'snp' should be included in the model") 17 | } 18 | special <- c("strata") 19 | Terms <- if (missing(data)) 20 | terms(formula, special) 21 | else terms(formula, special, data = data) 22 | strats <- attr(Terms, "specials")$strata 23 | ord <- attr(Terms, "order") 24 | if (any(ord > 1)) { 25 | varPos <- c(1:ncol(mf))[control][1] 26 | var <- mf[, varPos] 27 | dep <- mf[, 1] 28 | aux0 <- apply(attr(Terms, "factors"), 1, FUN = sum) 29 | aux <- names(aux0[aux0 == 2]) 30 | control2 <- aux[aux != names(control)[control]] 31 | intPos <- c(1:ncol(mf))[names(mf) == control2] 32 | int <- mf[, intPos] 33 | if (!length(levels(int))) 34 | stop("interaction term must be a factor") 35 | if (!is.null(strats)) 36 | stop("interaction analysis does not support 'strata'") 37 | if (ncol(mf) > 3 & is.null(strats)) { 38 | adj <- data.frame(mf[, -c(1, varPos, intPos)]) 39 | rownames(adj) <- 1:nrow(mf) 40 | variables <- attr(mt, "term.labels") 41 | varAdj <- variables[-c(varPos - 1, intPos - 1, length(variables))] 42 | } 43 | else { 44 | adj <- NULL 45 | varAdj <- NULL 46 | variables <- attr(mt, "term.labels") 47 | } 48 | int.nom <- variables[intPos - 1] 49 | if (is.null(name.snp)) 50 | var.nom <- variables[varPos - 1] 51 | else var.nom <- name.snp 52 | model.type <- c("codominant", "dominant", "recessive", 53 | "overdominant") 54 | m <- charmatch(model.interaction, model.type, nomatch = 0) 55 | if (m == 0) 56 | stop("interaction analysis need a pre-defined model: codominant, dominant, recessive, or overdominant") 57 | if (length(table(var)) == 1) 58 | stop("Monomorphic SNP") 59 | if (length(table(var)) == 2 & m > 1) 60 | stop("SNP with only two genotypes. Codominant model is the only model that can be fitted") 61 | mod.inher <- switch(m, codominant, dominant, recessive, 62 | overdominant) 63 | var <- mod.inher(var) 64 | res.corner <- table.corner(var, dep, adj, int, num.status = ifelse(quantitative, 65 | 1, 0), level) 66 | temp0 <- table.interaction(var, dep, adj, int, num.status = ifelse(quantitative, 67 | 1, 0), level) 68 | temp <- temp0$table 69 | p.interaction <- temp0$pval 70 | p.trend1 <- temp0$trend 71 | control.etiq <- ifelse(quantitative, 6, 5) 72 | etiq1 <- dimnames(temp)[[1]] 73 | aux0 <- dimnames(temp)[[2]] 74 | etiq2 <- aux0[seq(3, length(aux0), control.etiq)] 75 | ans <- list(NA) 76 | for (i in 1:nrow(temp)) { 77 | ans.i <- matrix(temp[i, ], nrow = length(etiq2), 78 | ncol = control.etiq, byrow = TRUE) 79 | ans[[i]] <- data.frame(ans.i) 80 | dimnames(ans[[i]])[[1]] <- etiq2 81 | if (!quantitative) 82 | names(ans[[i]]) <- c(aux0[1:2], "OR", "lower", 83 | "upper") 84 | else names(ans[[i]]) <- c(aux0[1:2], "se", "dif", 85 | "lower", "upper") 86 | } 87 | names(ans) <- etiq1 88 | res.int1 <- ans 89 | temp0 <- table.interaction(int, dep, adj, var, num.status = ifelse(quantitative, 90 | 1, 0), level) 91 | temp <- temp0$table 92 | p.trend2 <- temp0$trend 93 | etiq1 <- dimnames(temp)[[1]] 94 | aux0 <- dimnames(temp)[[2]] 95 | etiq2 <- aux0[seq(3, length(aux0), control.etiq)] 96 | ans2 <- list(NA) 97 | for (i in 1:nrow(temp)) { 98 | ans.i <- matrix(temp[i, ], nrow = length(etiq2), 99 | ncol = control.etiq, byrow = TRUE) 100 | ans2[[i]] <- data.frame(ans.i) 101 | dimnames(ans2[[i]])[[1]] <- etiq2 102 | if (!quantitative) 103 | names(ans2[[i]]) <- c(aux0[1:2], "OR", "lower", 104 | "upper") 105 | else names(ans2[[i]]) <- c(aux0[1:2], "se", "dif", 106 | "lower", "upper") 107 | } 108 | names(ans2) <- etiq1 109 | res.int2 <- ans2 110 | res <- list(res.corner, res.int1, res.int2, p.interaction, 111 | p.trend1, p.trend2) 112 | interaction <- TRUE 113 | } 114 | else { 115 | type <- charmatch(model, c("codominant", "dominant", 116 | "recessive", "overdominant", "log-additive", "all")) 117 | if (any(is.na(type))) 118 | stop("model must be 'codominant','dominant','recessive','overdominant', \n 'log-additive', 'all' or any combination of them") 119 | varPos <- c(1:ncol(mf))[control][1] 120 | dep <- mf[, 1] 121 | if (quantitative & !is.numeric(dep)) 122 | stop("dependent variable should be numeric. It has more than two categories") 123 | var <- mf[, varPos] 124 | if (ncol(mf) > 2 & is.null(strats) | ncol(mf) > 3 & !is.null(strats)) { 125 | adj <- data.frame(mf[, -c(1, varPos, strats)]) 126 | if (nrow(adj)>0) 127 | rownames(adj) <- 1:nrow(mf) 128 | variables <- attr(mt, "term.labels") 129 | varAdj <- variables[-c(varPos - 1, strats - 1)] 130 | } 131 | else { 132 | adj <- NULL 133 | varAdj <- NULL 134 | variables <- attr(mt, "term.labels") 135 | } 136 | if (is.null(name.snp)) 137 | var.nom <- variables[varPos - 1] 138 | else var.nom <- name.snp 139 | dropx <- NULL 140 | if (length(strats)) { 141 | temp <- untangle.specials(Terms, "strata", 1) 142 | dropx <- c(dropx, temp$terms) 143 | if (length(temp$vars) == 1) 144 | strata.keep <- mf[[temp$vars]] 145 | else strata.keep <- strata(mf[, temp$vars], shortlabel = TRUE) 146 | strats <- as.numeric(strata.keep) 147 | nstrats <- length(table(strats)) 148 | res <- list() 149 | if (is.null(adj)) { 150 | for (i in 1:nstrats) { 151 | res[[i]] <- association.fit(var[strats == i], 152 | dep[strats == i], adj, quantitative, type, 153 | level, nSubject, genotypingRate, ...) 154 | } 155 | } 156 | else { 157 | for (i in 1:nstrats) { 158 | res[[i]] <- association.fit(var[strats == i], 159 | dep[strats == i], data.frame(adj[strats == 160 | i, ]), quantitative, type, level, nSubject, 161 | genotypingRate, ...) 162 | } 163 | } 164 | attr(res, "strata") <- levels(strata.keep) 165 | } 166 | else { 167 | res <- association.fit(var, dep, adj, quantitative, 168 | type, level, nSubject, genotypingRate, ...) 169 | } 170 | interaction <- FALSE 171 | } 172 | class(res) <- "snpOut" 173 | attr(res, "varAdj") <- varAdj 174 | attr(res, "label.snp") <- var.nom 175 | if (interaction) 176 | attr(res, "label.int") <- int.nom 177 | attr(res, "BigTable") <- FALSE 178 | attr(res, "Interaction") <- interaction 179 | res 180 | } 181 | 182 | 183 | -------------------------------------------------------------------------------- /R/LD.R: -------------------------------------------------------------------------------- 1 | LD<-function(g1, ...) UseMethod("LD") 2 | 3 | LD.snp<- 4 | function (g1, g2, ...) 5 | { 6 | # adapted from LD.genotype in package genetics by Gregory Warnes et al 7 | if (!is.snp(g1) || !is.snp(g2)) 8 | stop("Please supply snp objects") 9 | prop.A <- summary(g1)$allele.freq[, 2]/100 10 | prop.B <- summary(g2)$allele.freq[, 2]/100 11 | major.A <- names(prop.A)[which.max(prop.A)] 12 | major.B <- names(prop.B)[which.max(prop.B)] 13 | pA <- max(prop.A, na.rm = TRUE) 14 | pB <- max(prop.B, na.rm = TRUE) 15 | if (pA<1 & pB<1){ 16 | pa <- 1 - pA 17 | pb <- 1 - pB 18 | Dmin <- max(-pA * pB, -pa * pb) 19 | pmin <- pA * pB + Dmin 20 | Dmax <- min(pA * pb, pB * pa) 21 | pmax <- pA * pB + Dmax 22 | 23 | # counts <- table(allele.count(g1, major.A), allele.count(g2, major.B)) 24 | 25 | counts <- table( as.numeric(reorder(g1, "common")), as.numeric(reorder(g2,"common")) ) 26 | 27 | n3x3 <- matrix(0, nrow = 3, ncol = 3) 28 | colnames(n3x3) <- rownames(n3x3) <- 0:2 29 | for (i in rownames(counts)) for (j in colnames(counts)) n3x3[3 - 30 | as.numeric(i), 3 - as.numeric(j)] <- counts[i, j] 31 | loglik <- function(pAB, ...) { 32 | (2 * n3x3[1, 1] + n3x3[1, 2] + n3x3[2, 1]) * log(pAB) + 33 | (2 * n3x3[1, 3] + n3x3[1, 2] + n3x3[2, 3]) * log(pA - 34 | pAB) + (2 * n3x3[3, 1] + n3x3[2, 1] + n3x3[3, 35 | 2]) * log(pB - pAB) + (2 * n3x3[3, 3] + n3x3[3, 2] + 36 | n3x3[2, 3]) * log(1 - pA - pB + pAB) + n3x3[2, 2] * 37 | log(pAB * (1 - pA - pB + pAB) + (pA - pAB) * (pB - 38 | pAB)) 39 | } 40 | solution <- optimize(loglik, lower = pmin + .Machine$double.eps, 41 | upper = pmax - .Machine$double.eps, maximum = TRUE) 42 | pAB <- solution$maximum 43 | estD <- pAB - pA * pB 44 | if (estD > 0) 45 | estDp <- estD/Dmax 46 | else estDp <- estD/Dmin 47 | n <- sum(n3x3) 48 | corr <- estD/sqrt(pA * pB * pa * pb) 49 | dchi <- (2 * n * estD^2)/(pA * pa * pB * pb) 50 | dpval <- 1 - pchisq(dchi, 1) 51 | retval <- list(call = match.call(), D = estD, "D'" = estDp, 52 | r = corr, "R^2" = corr^2, n = n, "X^2" = dchi, "P-value" = dpval) 53 | } else 54 | retval <- list(call = match.call(), D = NA, "D'" = NA, 55 | r = NA, "R^2" = NA, n = sum(table(g1,g2)) , "X^2" = NA, "P-value" = NA) 56 | 57 | class(retval) <- "LD" 58 | retval 59 | } 60 | 61 | LD.setupSNP<- 62 | function (g1, SNPs, ...) 63 | { 64 | # adapted from LD.data.frame in package genetics by Gregory Warnes et al 65 | if(missing(SNPs) & inherits(g1,"setupSNP")) SNPs<-labels(g1) 66 | g1 <- g1[, SNPs] 67 | 68 | P <- matrix(nrow = ncol(g1), ncol = ncol(g1)) 69 | rownames(P) <- colnames(g1) 70 | colnames(P) <- colnames(g1) 71 | P <- D <- Dprime <- nobs <- chisq <- p.value <- corr <- R.2 <- P 72 | for (i in 1:(ncol(g1) - 1)) for (j in (i + 1):ncol(g1)) { 73 | ld <- LD(g1[, i], g1[, j]) 74 | D[i, j] <- ld$D 75 | Dprime[i, j] <- ld$"D'" 76 | corr[i, j] <- ld$r 77 | R.2[i, j] <- ld$"R^2" 78 | nobs[i, j] <- ld$n 79 | chisq[i, j] <- ld$"X^2" 80 | p.value[i, j] <- ld$"P-value" 81 | } 82 | retval <- list(call = match.call(), D = D, "D'" = Dprime, 83 | r = corr, "R^2" = R.2, n = nobs, "X^2" = chisq, "P-value" = p.value) 84 | class(retval) <- "LD.data.frame" 85 | retval 86 | } 87 | 88 | LDtable<- 89 | function (x, colorcut = c(0, 0.01, 0.025, 0.05, 0.1, 1), colors = heat.colors(length(colorcut)), 90 | textcol = "black", digits = 3, show.all = FALSE, which = c("D", 91 | "D'", "r", "X^2", "P-value", "n"), colorize = "P-value", 92 | cex, ...) 93 | { 94 | # adapted from LDtable in package genetics by Gregory Warnes et al 95 | if(!inherits(x,"LD.data.frame")) 96 | stop("Object should be LD.data.frame, output of LD") 97 | if (!colorize %in% names(x)) 98 | stop(colorize, " not an element of ", deparse(substitute(x))) 99 | datatab <- summary(x) 100 | missmatch <- which[!(which %in% names(x))] 101 | if (length(missmatch) > 0) 102 | stop(missmatch, " not an element of ", deparse(substitute(x))) 103 | matform <- function(value, template) { 104 | dim(value) <- dim(template) 105 | dimnames(value) <- dimnames(template) 106 | value 107 | } 108 | tmp <- cut(x[[colorize]], colorcut, include.lowest = TRUE) 109 | colormat <- matform(as.numeric(tmp), x[[colorize]]) 110 | n <- matform(paste("(", x$n, ")", sep = ""), x$n) 111 | if (!show.all) { 112 | colormat <- colormat[-nrow(colormat), -1, drop = FALSE] 113 | n <- n[-nrow(n), -1, drop = FALSE] 114 | } 115 | image(x = 1:ncol(colormat), y = 1:ncol(colormat), z = t(colormat[nrow(colormat):1, 116 | ]), col = colors, xlab = "Marker 2\n\n", ylab = "Marker 1", 117 | xaxt = "n", yaxt = "n", ...) 118 | abline(v = -0.5 + 1:(ncol(colormat) + 1)) 119 | abline(h = -0.5 + 1:(nrow(colormat) + 1)) 120 | axis(3, 1:ncol(colormat), colnames(colormat)) 121 | axis(2, 1:nrow(colormat), rev(rownames(colormat))) 122 | 123 | # Reset par options on exit function 124 | cex.old <- par("cex") 125 | on.exit(par(cex.old)) 126 | 127 | if (missing(cex)) { 128 | cex <- min(c(1/10, 1/(length(which) + 1))/c(strwidth("W"), 129 | strheight("W") * 1.5)) 130 | } 131 | par(cex = cex) 132 | lineheight <- strheight("W") * 1.5 133 | center <- lineheight * length(which)/2 134 | for (i in 1:length(which)) { 135 | displaymat <- x[[which[i]]] 136 | if (!show.all){ 137 | displaymat <- displaymat[-nrow(displaymat), -1, drop = FALSE] 138 | } 139 | 140 | if (which[i] == "P-value"){ 141 | displaymat <- format.pval(displaymat, digits = digits) 142 | } else if (which[i] != "n"){ 143 | displaymat <- format(displaymat, digits = digits) 144 | } 145 | displaymat[] <- gsub("NA.*", "", as.character(displaymat)) 146 | text(x = col(colormat), y = nrow(colormat) - row(colormat) + 147 | 1 + center - lineheight * (i - 1), displaymat, col = textcol, 148 | adj = c(0.5, 1)) 149 | } 150 | text(x = 1, y = 1, paste(which, collapse = "\n"), adj = c(0.5, 0.5)) 151 | 152 | # par(cex = cex.old) 153 | 154 | title(main = "Linkage Disequilibrium\n") 155 | invisible(colormat) 156 | } 157 | 158 | 159 | LDplot<- 160 | function (x, digits = 3, marker, distance, which = c("D", "D'", 161 | "r", "X^2", "P-value", "n", " "), ...) 162 | { 163 | # adapted from LDplot in package genetics by Gregory Warnes et al 164 | if(!inherits(x,"LD.data.frame")) 165 | stop("Object should be LD.data.frame, output of LD") 166 | which = match.arg(which) 167 | if (missing(marker)) 168 | marker <- colnames(x[[which]]) 169 | else if (is.numeric(marker)) 170 | marker <- colnames(x[[which]])[marker] 171 | datamat <- ifelse(is.na(x[[which]]), t(x[[which]]), x[[which]]) 172 | if (which %in% c("D'", "r")) 173 | diag(datamat) <- 1 174 | else if (which == "P-value") 175 | diag(datamat) <- 0 176 | dimnames(datamat) <- dimnames(x[[which]]) 177 | if (missing(distance)) 178 | distance <- 1:ncol(datamat) 179 | distance <- matrix(distance, ncol = ncol(datamat), nrow = nrow(datamat), 180 | byrow = TRUE) 181 | dimnames(distance) <- dimnames(datamat) 182 | matplot(x = t(distance[marker, , drop = FALSE]), t(datamat[marker, 183 | , drop = FALSE]), type = "b", xlab = "Marker", ylab = paste("Linkage Disequilibrium: ", 184 | which, sep = ""), xaxt = "n", ...) 185 | axis(1, distance[1, ], paste(1:ncol(datamat), colnames(datamat), 186 | sep = ": ")) 187 | title("Pairwise Disequilibrium Plot") 188 | invisible() 189 | } 190 | 191 | --------------------------------------------------------------------------------