├── R ├── .Rapp.history ├── nobs.ridgeLinear.R ├── pvals.R ├── ridge.R ├── print.ridgeLinear.R ├── print.ridgeLogistic.R ├── objectiveFunction.R ├── coef.ridgeLinear.R ├── updateBeta.R ├── coef.ridgeLogistic.R ├── vcov.ridgeLinear.R ├── pvals.ridgeLinear.R ├── predict.ridgeLinear.R ├── plot.pvalsRidgeLogistic.R ├── plot.ridgeLogistic.R ├── plot.pvalsRidgeLinear.R ├── plot.ridgeLinear.R ├── pvals.ridgeLogistic.R ├── computeRidgeLogistic.R ├── print.pvalsRidgeLinear.R ├── print.pvalsRidgeLogistic.R ├── predict.ridgeLogistic.R ├── print.summary.ridgeLogistic.R ├── logisticRidgeGenotypesPredict.R ├── logisticRidgeGenotypesPredict.R.in ├── linearRidgeGenotypesPredict.R ├── linearRidgeGenotypesPredict.R.in ├── print.summary.ridgeLinear.R ├── summary.ridgeLinear.R ├── summary.ridgeLogistic.R ├── linearRidgeGenotypes.R ├── linearRidgeGenotypes.R.in ├── logisticRidgeGenotypes.R ├── logisticRidgeGenotypes.R.in ├── logisticRidge.R └── linearRidge.R ├── .github ├── .gitignore └── workflows │ ├── test-coverage.yaml │ ├── pkgdown.yaml │ └── R-CMD-check.yaml ├── .gitattributes ├── data ├── Hald.rda ├── GenBin.rda ├── GenCont.rda └── Gorman.rda ├── tests ├── testthat.R └── testthat │ └── test.R ├── man ├── figures │ └── ridge.png ├── ridge.Rd ├── Hald.Rd ├── ridge-package.Rd ├── Gorman.Rd ├── GenBin.Rd ├── GenCont.Rd ├── ridge-internal.Rd ├── pvals.Rd ├── linearRidgeGenotypesPredict.Rd ├── logisticRidgeGenotypesPredict.Rd ├── logisticRidge.Rd ├── linearRidge.Rd ├── linearRidgeGenotypes.Rd └── logisticRidgeGenotypes.Rd ├── cleanup ├── .Rbuildignore ├── src ├── Makevars.win ├── Makevars.in ├── logisticFunctions.h ├── linearFunctions.h ├── thin.h ├── registration.c ├── logistic.h ├── linear.h ├── commonFunctions.h ├── computeLinearRidge.h ├── computePvals.h ├── config.h.in ├── config.h ├── linearFunctions.c ├── depends.h ├── logisticFunctions.c ├── coordinateDescent.h ├── ridgeRegressionFunctions.h ├── ReadInData.h ├── computeLinearRidge.c ├── thin.c └── regression_wrapper_function.c ├── codecov.yml ├── .gitignore ├── ridge.Rproj ├── INSTALL ├── inst ├── CITATION └── extdata │ └── GenBin_phenotypes.txt ├── dev_hints.md ├── DESCRIPTION ├── NAMESPACE ├── README.md ├── configure.ac ├── NEWS.md └── autom4te.cache └── requests /R/.Rapp.history: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | src/* linguist-vendored 2 | 3 | -------------------------------------------------------------------------------- /data/Hald.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SteffenMoritz/ridge/HEAD/data/Hald.rda -------------------------------------------------------------------------------- /data/GenBin.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SteffenMoritz/ridge/HEAD/data/GenBin.rda -------------------------------------------------------------------------------- /data/GenCont.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SteffenMoritz/ridge/HEAD/data/GenCont.rda -------------------------------------------------------------------------------- /data/Gorman.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SteffenMoritz/ridge/HEAD/data/Gorman.rda -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(ridge) 3 | 4 | test_check("ridge") 5 | -------------------------------------------------------------------------------- /man/figures/ridge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SteffenMoritz/ridge/HEAD/man/figures/ridge.png -------------------------------------------------------------------------------- /cleanup: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | for f in ./config.*; do 3 | rm -f $f 4 | done 5 | 6 | rm ./src/Makevars 7 | 8 | exit 0 9 | 10 | -------------------------------------------------------------------------------- /R/nobs.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | #' @rdname nobs 2 | #' @export 3 | #' @importFrom stats nobs 4 | 5 | nobs.ridgeLinear <- function(object, ...) { 6 | length(object$y) 7 | } 8 | -------------------------------------------------------------------------------- /R/pvals.R: -------------------------------------------------------------------------------- 1 | ## Generic method for computing pvalues on ridgeLinear or ridgeLogistic models 2 | 3 | #' @export 4 | pvals <- function(x, ...) 5 | UseMethod("pvals") 6 | 7 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^.*\.yml$ 4 | ^.*\.ini$ 5 | ^.*\.txt$ 6 | ^appveyor\.yml$ 7 | Icon 8 | ^cran-comments\.md$ 9 | ^dev_hints.md$ 10 | ^.*\.cache$ 11 | ^codecov\.yml$ 12 | ^\.github$ 13 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | ## These lines are the same as those of gsl version 1.9-9 2 | ## (although I didn't copy them from there, I had them before) 3 | 4 | PKG_CFLAGS = -I$(LIB_GSL)/include 5 | PKG_LIBS = -L$(LIB_GSL)/lib -lgsl -lm -lgslcblas 6 | 7 | -------------------------------------------------------------------------------- /src/Makevars.in: -------------------------------------------------------------------------------- 1 | # Kindly supplied by Dirk Eddelbuettel 2 | # set by configure 3 | GSL_CFLAGS = @GSL_CFLAGS@ 4 | GSL_LIBS = @GSL_LIBS@ 5 | 6 | # combine to standard arguments for R 7 | PKG_CPPFLAGS = $(GSL_CFLAGS) -I. 8 | PKG_LIBS = $(GSL_LIBS) 9 | 10 | 11 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # RStudio files 2 | .Rproj.user 3 | .Rhistory 4 | .RData 5 | .Ruserdata 6 | 7 | # Compile artifacts 8 | src/*.o 9 | src/*.so 10 | src/*.dll 11 | 12 | # Miscellaneous OS X files 13 | /.dropbox 14 | /desktop.ini 15 | /.RData 16 | Icon 17 | 18 | # configure-generated files 19 | config.log 20 | config.status 21 | src/Makevars 22 | ^.*\.cache$ 23 | -------------------------------------------------------------------------------- /man/ridge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ridge.R 3 | \docType{package} 4 | \name{ridge} 5 | \alias{ridge} 6 | \title{ridge: Linear and logistic ridge regression functions.} 7 | \description{ 8 | Additionally includes special functions for 9 | genome-wide single-nucleotide polymorphism (SNP) data. 10 | } 11 | -------------------------------------------------------------------------------- /ridge.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /R/ridge.R: -------------------------------------------------------------------------------- 1 | ## Package-level documentation 2 | ## See https://r-pkgs.org/man.html#man-packages 3 | 4 | #' ridge: Linear and logistic ridge regression functions. 5 | #' 6 | #' Additionally includes special functions for 7 | #' genome-wide single-nucleotide polymorphism (SNP) data. 8 | #' 9 | #' @docType package 10 | #' @name ridge 11 | #' @useDynLib ridge, .registration=TRUE 12 | NULL 13 | #> NULL 14 | -------------------------------------------------------------------------------- /R/print.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | ## print function for ridgeLinear objects 2 | 3 | #' @rdname print 4 | #' @export 5 | #' @importFrom stats coef 6 | print.ridgeLinear <- function(x, all.coef = FALSE, ...) 7 | { 8 | cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), 9 | "\n\n", sep = "") 10 | print(coef(x, all.coef = all.coef), ...) 11 | cat("\n") 12 | invisible(x) 13 | } 14 | -------------------------------------------------------------------------------- /R/print.ridgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## print function for ridgeLogistic objects 2 | 3 | #' @rdname print 4 | #' @export 5 | #' @importFrom stats coef 6 | print.ridgeLogistic <- function(x, all.coef = FALSE, ...) 7 | { 8 | cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), 9 | "\n\n", sep = "") 10 | print(coef(x, all.coef = all.coef), ...) 11 | cat("\n") 12 | invisible(x) 13 | } 14 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | The ridge package requires GNU Scientific Library (GSL) to be installed on your system. 2 | 3 | Installing gsl from source requires you to download and install GSL first. 4 | 5 | Linux : 6 | sudo apt-get install -y libgsl0-dev 7 | 8 | Mac: 9 | brew install gsl 10 | 11 | For Ubuntu it might also be possible to go to the Software Center and install 12 | "GNU Scientific Library (GSL) -- development package" 13 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | year <- sub("-.*", "", meta$Date) 2 | vers <- paste("R package version", meta$Version) 3 | 4 | citHeader("To cite the ridge package, use:") 5 | 6 | bibentry(bibtype = "Manual", 7 | title = "ridge: Ridge Regression with Automatic Selection of the Penalty Parameter", 8 | author = personList(as.person("Erika Cule"), as.person("Steffen Moritz"), as.person("Dan Frankowski")), 9 | year = year, 10 | note = vers, 11 | url = "https://CRAN.R-project.org/package=ridge") -------------------------------------------------------------------------------- /R/objectiveFunction.R: -------------------------------------------------------------------------------- 1 | objectiveFunction <- function(B, X, y, k, intercept = TRUE) 2 | { 3 | #Numeric bits for objective function 4 | #compute p 5 | XB <- X%*%B 6 | expXB <- exp(XB) 7 | p <- expXB/(1+expXB) 8 | p[is.infinite(expXB)] <- 1 9 | top <- ifelse(y,log(p),log(1-p)) 10 | if(intercept) 11 | { 12 | objBeta <- sum(top) - k*crossprod(B[-1],B[-1]) 13 | } else { 14 | objBeta <- sum(top) - k*crossprod(B,B) 15 | } 16 | return(objBeta) 17 | } 18 | -------------------------------------------------------------------------------- /dev_hints.md: -------------------------------------------------------------------------------- 1 | Install latest autoconf version before next steps (also run autoupdate) 2 | 3 | When preparing an update and changing things in configure.ac 4 | 5 | Run path/ridge/autoconf in console to bring changes to configure file 6 | Do changes to configure.ac - running path/ridge/autoconf then creates the configure file 7 | 8 | configure file is not updated automatically with build() 9 | 10 | To check: Build Source package and manualle install from source 11 | 12 | Autoconf files for R packages: 13 | https://unconj.ca/blog/an-autoconf-primer-for-r-package-authors.html -------------------------------------------------------------------------------- /R/coef.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | ## coef method for ridgeLinear objects 2 | 3 | #' @rdname coef 4 | #' @export 5 | coef.ridgeLinear <- function (object, all.coef = FALSE, ...) 6 | { 7 | scaledcoef <- t(as.matrix(object$coef/object$scales)) 8 | if (object$Inter) { 9 | inter <- object$ym - scaledcoef %*% object$xm 10 | scaledcoef <- cbind(Intercept = inter, scaledcoef) 11 | colnames(scaledcoef)[1] <- "(Intercept)" 12 | } 13 | if(object$automatic && all.coef == FALSE) 14 | { 15 | scaledcoef <- scaledcoef[object$chosen.nPCs,] 16 | } 17 | drop(scaledcoef) 18 | } 19 | -------------------------------------------------------------------------------- /src/logisticFunctions.h: -------------------------------------------------------------------------------- 1 | #include "depends.h" 2 | #ifdef HAVE_GSL_HEADER 3 | 4 | /* Function prototype - read in phenotypes for logistic regression */ 5 | gsl_vector_int * readLogisticPhenotypes(char * phenotypefilename, 6 | int NINDIV); 7 | 8 | /* Function prototype - returning to original scale - component-wise computation of regression coefficients */ 9 | int returnToOriginalScaleLogistic(GSL_TYPE(vector) * betaOut, 10 | GSL_TYPE(vector) * Bridge, 11 | GSL_TYPE(vector) * means, 12 | GSL_TYPE(vector) * scales, 13 | int intercept_flag); 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /man/Hald.Rd: -------------------------------------------------------------------------------- 1 | \name{Hald} 2 | \alias{Hald} 3 | \docType{data} 4 | \title{ 5 | Hald data 6 | } 7 | \description{ 8 | The Hald data as used by Hoerl, Kennard and Baldwin (1975). 9 | These data are also in package \code{wle}. 10 | } 11 | \usage{data(Hald)} 12 | \format{ 13 | Numeric matrix. 14 | } 15 | \details{ 16 | The first column is the response and the remaining four columns are the predictors. 17 | } 18 | \references{ 19 | Ridge Regression: some simulations, Hoerl, A. E. \emph{et al}, 1975, Comm Stat Theor Method 4:105 20 | } 21 | \examples{ 22 | data(Hald) 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /R/updateBeta.R: -------------------------------------------------------------------------------- 1 | updateBeta <- function(B, X, y, k, intercept = TRUE, doff = FALSE) 2 | { 3 | XB <- X%*%B 4 | expXB <- exp(XB) 5 | p <- expXB/(1+expXB) 6 | p[is.infinite(expXB)] <- 1 7 | W <- diag(as.numeric((p*(1-p))),length(y),length(y)) 8 | WZ <- p*(1-p)*XB+(y-p) 9 | kI <- diag(2*k,dim(X)[2],dim(X)[2]) 10 | if(intercept) 11 | kI[1,1] <- 0 ##Intercept 12 | updatedBeta <- (solve(t(X)%*%W%*%X+kI))%*%t(X)%*%WZ 13 | if(doff) 14 | { 15 | res = list(updatedBeta = updatedBeta, kI = kI, W = W) 16 | } else { 17 | res = updatedBeta 18 | } 19 | return(res) 20 | } 21 | -------------------------------------------------------------------------------- /man/ridge-package.Rd: -------------------------------------------------------------------------------- 1 | \docType{package} 2 | \name{ridge-package} 3 | \alias{ridge-package} 4 | \title{ridge-package description} 5 | \description{ 6 | R package for fitting linear and logistic ridge regression models. 7 | } 8 | \details{ 9 | This package contains functions for fitting linear and logistic ridge 10 | regression models, including functions for fitting linear and logistic 11 | ridge regression models for genome-wide SNP data supplied as file names 12 | when the data are too big to read into R. 13 | 14 | For a complete 15 | list of functions, use \code{help(package="ridge")}. 16 | } 17 | \author{ 18 | Steffen Moritz, Erika Cule 19 | } 20 | -------------------------------------------------------------------------------- /src/linearFunctions.h: -------------------------------------------------------------------------------- 1 | #include "depends.h" 2 | #ifdef HAVE_GSL_HEADER 3 | #include "ridgeRegressionFunctions.h" 4 | 5 | #if _CUDA_ 6 | #include "cudaOnlyFunctions.h" 7 | #endif 8 | 9 | /* Function prototype - read in phenotypes for linear regression */ 10 | GSL_TYPE(vector) * readLinearPhenotypes(char * phenotypefilename, 11 | int NINDIV); 12 | 13 | /* Function prototype - compute Kr based on a, Z, y and r */ 14 | int computeLinearKr(GSL_TYPE(vector) * a, 15 | GSL_TYPE(matrix) * Z, 16 | GSL_TYPE(vector) * y, 17 | GSL_TYPE(vector) * D2, 18 | int r, 19 | PREC * kr, 20 | PREC * DofF); 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /R/coef.ridgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## coef method for ridgeLogistic objects 2 | 3 | #' @rdname coef 4 | #' @export 5 | coef.ridgeLogistic <- function (object, all.coef = FALSE, ...) 6 | { 7 | if (object$Inter) { 8 | Intercept <- apply(object$coef, 2, function(x) {x[1] - sum(object$xm * x[-1] / object$scales)}) 9 | scaledcoefs <- rbind(Intercept, apply(object$coef, 2, function(x){x[-1] / object$scales})) 10 | rownames(scaledcoefs)[1] <- "(Intercept)" 11 | } else { 12 | scaledcoefs <- apply(object$coef, 2, function(x){x/object$scales}) 13 | } 14 | if(object$automatic && all.coef == FALSE) 15 | { 16 | scaledcoefs <- scaledcoefs[,object$chosen.nPCs] 17 | } 18 | scaledcoefs <- t(scaledcoefs) 19 | drop(scaledcoefs) 20 | } 21 | -------------------------------------------------------------------------------- /src/thin.h: -------------------------------------------------------------------------------- 1 | #include "depends.h" 2 | #ifdef HAVE_GSL_HEADER 3 | 4 | #include "commonFunctions.h" 5 | #include "computeLinearRidge.h" 6 | #include "ReadInData.h" 7 | #include "ridgeRegressionFunctions.h" 8 | 9 | /* Header file for thin data functions in the regression package */ 10 | 11 | gsl_vector_int * readThinFile(char * thinfilename, 12 | char ** SNPNAMES, 13 | int thinning_distance, 14 | int NINDIV, 15 | int NSNPS, 16 | int * nThinnedSnps, 17 | int verbose); 18 | 19 | int readSNPsThinAndComputePCs(char * genofilename, 20 | gsl_vector_int * thin, 21 | GSL_TYPE(matrix) * Z, 22 | GSL_TYPE(matrix) * thinnedGenotypes, 23 | GSL_TYPE(vector) * D2, 24 | int * howManyK); 25 | 26 | #endif 27 | 28 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - uses: r-lib/actions/setup-r@v1 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v1 25 | with: 26 | extra-packages: covr 27 | 28 | - name: Test coverage 29 | run: covr::codecov() 30 | shell: Rscript {0} 31 | -------------------------------------------------------------------------------- /src/registration.c: -------------------------------------------------------------------------------- 1 | #include // for NULL 2 | #include 3 | #include 4 | #include 5 | 6 | /* FIXME: 7 | Check these declarations against the C/Fortran source code. 8 | */ 9 | 10 | /* FIXME: 11 | Check these declarations against the C/Fortran source code. 12 | */ 13 | 14 | /* .C calls */ 15 | extern void regression_wrapper_function(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 16 | 17 | static const R_CMethodDef CEntries[] = { 18 | {"regression_wrapper_function", (DL_FUNC) ®ression_wrapper_function, 11}, 19 | {NULL, NULL, 0} 20 | }; 21 | 22 | void R_init_ridge(DllInfo *dll) 23 | { 24 | 25 | R_registerRoutines(dll, CEntries, NULL, NULL, NULL); 26 | 27 | R_useDynamicSymbols(dll, FALSE); 28 | 29 | R_forceSymbols(dll, TRUE); 30 | 31 | 32 | } 33 | 34 | -------------------------------------------------------------------------------- /man/Gorman.Rd: -------------------------------------------------------------------------------- 1 | \name{Gorman} 2 | \alias{Gorman} 3 | \docType{data} 4 | \title{ 5 | The Ten-Factor data first described by Gorman and Toman (1966). 6 | } 7 | \description{ 8 | A Ten-Factor data set first described by Gornam and Toman (1966) and used by Hoerl and Kennard (1970) (and others) to 9 | investigate regression problems. 10 | } 11 | \usage{data(Gorman)} 12 | \format{ 13 | Numeric matrix. 14 | } 15 | \details{ 16 | The first column is the response on the log scale, the remaining columns are the predictors. 17 | } 18 | \source{ 19 | Selection of variables for fitting equations to data. Gorman, J. W. and Toman, R. J. (1966) Technometrics, 8:27. 20 | } 21 | \references{ 22 | Selection of variables for fitting equations to data. Gorman, J. W. and Toman, R. J. (1966) Technometrics, 8:27. 23 | Ridge Regression: Biased estimators for nonorthogonal problems. Hoerl, A. E. and Kennard, R. W. (1970) Technometrics, 24 | 12:55. 25 | } 26 | \examples{ 27 | data(Gorman) 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/GenBin.Rd: -------------------------------------------------------------------------------- 1 | \name{GenBin} 2 | \alias{GenBin} 3 | \alias{GenBin_genotypes} 4 | \alias{GenBin_phenotypes} 5 | \docType{data} 6 | \title{ 7 | Simulated genetic data with a binary phenotypes 8 | } 9 | \description{ 10 | Simulated genetic data at 15 SNPs, together with simulated binary phenotypes 11 | } 12 | \usage{data(GenBin)} 13 | \format{ 14 | GenBin is a saved R matrix with 500 rows and 15 columns. The first 15 | column is the pheotypes and columns 2-15 contain the genotypes. Each 16 | row represents an indiviaul. 17 | The same data are stored in flat text files in GenBin_genotypes and GenBin_phenotypes 18 | (in the directory extdata (in the installed package) or inst/extdata (in the source)). 19 | } 20 | \source{ 21 | Simulated using FREGENE 22 | } 23 | \references{ 24 | Fregene: Simulation of realistic sequence-level data in populations and ascertained samples Chadeau-Hyam, M. et al, 25 | 2008, BMC Bioinformatics 9:364 26 | } 27 | \examples{ 28 | data(GenBin) 29 | } 30 | \keyword{datasets} 31 | -------------------------------------------------------------------------------- /man/GenCont.Rd: -------------------------------------------------------------------------------- 1 | \name{GenCont} 2 | \alias{GenCont} 3 | \alias{GenCont_genotypes} 4 | \alias{GenCont_phenotypes} 5 | \docType{data} 6 | \title{ 7 | Simulated genetic data with continuous outcomes 8 | } 9 | \description{ 10 | Simulated genetic data with continuous outcomes. 11 | } 12 | \usage{data(GenCont)} 13 | \format{ 14 | GenCont is a saved R matrix with 500 rows and 13 columns. The first 15 | column is the pheotypes and columns 2-13 contain the genotypes. Each 16 | row represents an indiviaul. 17 | The same data are stored in flat text files in GenCont_genotypes and GenCont_phenotypes 18 | (in the directory extdata (in the installed package) or inst/extdata (in the source)). 19 | } 20 | 21 | \details{ 22 | Genotypes were simulated using FREGENE. 23 | } 24 | \references{ 25 | Fregene: Simulation of realistic sequence-level data in populations and ascertained samples Chadeau-Hyam, M. et al, 26 | 2008, BMC Bioinformatics 9:364 27 | } 28 | \examples{ 29 | data(GenCont) 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /src/logistic.h: -------------------------------------------------------------------------------- 1 | #include "depends.h" 2 | #ifdef HAVE_GSL_HEADER 3 | 4 | #include "computePvals.h" 5 | #include "coordinateDescent.h" 6 | #include "commonFunctions.h" 7 | #include "logisticFunctions.h" 8 | #include "ReadInData.h" 9 | #include "thin.h" 10 | 11 | /* Prototype for logistic main function */ 12 | int logisticMain(char * genofilename, 13 | char * thinfilename, 14 | char * phenofilename, 15 | char * covarfilename, 16 | char * betafilename, 17 | char * lambdafilename, 18 | char * lambdacovarfilename, 19 | char * approxtestfilename, 20 | char * permtestfilename, 21 | PREC lambda, 22 | PREC lambda_c, 23 | unsigned long int seed, 24 | int howManyK, 25 | int individualK, 26 | int intercept_flag, 27 | int standardize_flag, 28 | int standardize_c_flag, 29 | int thinning_distance, 30 | int NINDIV, 31 | int NPRED, 32 | int NCOVAR, 33 | int NSNPS, 34 | char ** SNPnames, 35 | char ** COVARnames, 36 | int predict_flag, 37 | PREC convergence_threshold, 38 | int verbose); 39 | #endif 40 | 41 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | release: 7 | types: [published] 8 | workflow_dispatch: 9 | 10 | name: pkgdown 11 | 12 | jobs: 13 | pkgdown: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - uses: r-lib/actions/setup-pandoc@v1 21 | 22 | - uses: r-lib/actions/setup-r@v1 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v1 27 | with: 28 | extra-packages: pkgdown 29 | needs: website 30 | 31 | - name: Deploy package 32 | run: | 33 | git config --local user.name "$GITHUB_ACTOR" 34 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 35 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 36 | -------------------------------------------------------------------------------- /src/linear.h: -------------------------------------------------------------------------------- 1 | #include "depends.h" 2 | #ifdef HAVE_GSL_HEADER 3 | 4 | #include "commonFunctions.h" 5 | #include "computePvals.h" 6 | #include "coordinateDescent.h" 7 | #include "linearFunctions.h" 8 | #include "ReadInData.h" 9 | #include "ridgeRegressionFunctions.h" 10 | #include "thin.h" 11 | 12 | /* Prototype for linear main function */ 13 | int linearMain(char * genofilename, 14 | char * thinfilename, 15 | char * phenofilename, 16 | char * covarfilename, 17 | char * betafilename, 18 | char * lambdafilename, 19 | char * lambdacovarfilename, 20 | char * approxtestfilename, 21 | char * permtestfilename, 22 | PREC lambda, 23 | PREC lambda_c, 24 | unsigned long int seed, 25 | int howManyK, 26 | int individualK, 27 | int intercept_flag, 28 | int standardize_flag, 29 | int standardize_c_flag, 30 | int thinning_distance, 31 | int NINDIV, 32 | int NPRED, 33 | int NCOVAR, 34 | int NSNPS, 35 | char ** SNPnames, 36 | char ** COVARnames, 37 | int predict_flag, 38 | PREC convergence_threshold, 39 | int verbose); 40 | #endif 41 | -------------------------------------------------------------------------------- /src/commonFunctions.h: -------------------------------------------------------------------------------- 1 | 2 | #include "depends.h" 3 | #ifdef HAVE_GSL_HEADER 4 | #include "ReadInData.h" // Required for getNROW in readCoefficients 5 | 6 | gsl_matrix_int * readShortGenotypes(char * genofilename, 7 | int NINDIV, 8 | int NSNPS); 9 | 10 | GSL_TYPE(matrix) * readGenotypes(char * genofilename, 11 | int NINDIV, 12 | int NSNPS); 13 | 14 | GSL_TYPE(vector) * readCoefficients(char * betafilename, 15 | int * intercept_flag, 16 | PREC * intercept_coefficient); 17 | 18 | int getGenotypeInfo(gsl_matrix_int * genotypes, 19 | int standardize_flag, 20 | int corr_form_flag, 21 | GSL_TYPE(vector) * means, 22 | GSL_TYPE(vector) * scales, 23 | char ** names); 24 | 25 | int convert_int_vector(const gsl_vector_int * src, GSL_TYPE(vector) 26 | * dest); 27 | 28 | int checkOperationType(PREC lambda, 29 | PREC lambda_c, 30 | char * lambdafilename, 31 | char * lambdacovarfilename, 32 | char * approxfilename, 33 | int howManyK, 34 | int individualK, 35 | int * automaticK, 36 | int * singleK, 37 | int predict_flag); 38 | 39 | int checkGenotypes(gsl_matrix_int * mat); 40 | 41 | int checkForInvariantPredictors(char * genofilename, 42 | int NINDIV); 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ridge 2 | Title: Ridge Regression with Automatic Selection of the Penalty Parameter 3 | Description: Linear and logistic ridge regression functions. Additionally includes special functions for 4 | genome-wide single-nucleotide polymorphism (SNP) data. More details can be found in 5 | and . 6 | Version: 3.3 7 | Date: 2022-04-11 8 | Authors@R: 9 | c( 10 | person("Steffen", "Moritz", email="steffen.moritz10@gmail.com", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-0085-1804")), 11 | person("Erika", "Cule", role=c("aut")), 12 | person("Dan", "Frankowski", role=c("aut")) 13 | ) 14 | Author: Steffen Moritz [aut, cre] (), Erika Cule [aut], Dan Frankowski [aut] 15 | Maintainer: Steffen Moritz 16 | Type: Package 17 | BugReports: https://github.com/SteffenMoritz/ridge/issues 18 | URL: https://github.com/SteffenMoritz/ridge 19 | Repository: CRAN 20 | Depends: 21 | R (>= 3.0.1) 22 | Imports: 23 | stats, 24 | graphics, 25 | grDevices, 26 | utils 27 | License: GPL-2 28 | SystemRequirements: Gnu Scientific Library version >= 1.14 29 | NeedsCompilation: yes 30 | RoxygenNote: 7.1.0 31 | Encoding: UTF-8 32 | Suggests: 33 | testthat, 34 | datasets, 35 | covr 36 | -------------------------------------------------------------------------------- /R/vcov.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | ## vcov method for ridgeLinear objects 2 | 3 | #' @rdname vcov 4 | #' @export 5 | #' @importFrom stats coef model.frame model.matrix .checkMFClasses 6 | vcov.ridgeLinear <- function (object, ...) 7 | { 8 | # TODO(dan): check we didn't get any arguments we are unprepared for 9 | 10 | # get back the original data 11 | data <- model.frame(object) 12 | 13 | # For now, only works if we have an intercept 14 | stopifnot(object$Inter == 1) 15 | # drop "(Intercept)" from names 16 | coef_names <- attr(coef(object), "names")[-1] 17 | # get data, with intercept term 18 | X <- model.matrix(object, data=data) 19 | # make y the original data: 20 | y_orig <- with(object, y+ym) 21 | # See also equation 3.44 of "elements of statistical learning", hastie, Tibshirani, Friedman 22 | # https://web.stanford.edu/~hastie/ElemStatLearn/ 23 | # Inverting this matrix may not be the most numerically stable. 24 | inv_mat = solve(t(X) %*% X + object$lambda * diag(ncol(X))) 25 | betaHat <- inv_mat %*% t(X) %*% y_orig 26 | # sum squared residuals divided by degrees of freedom 27 | # degrees of freedom: # values minus coefficients and intercept term 28 | sigma2 <- sum( (y_orig - predict(object, data=data))^2 ) / 29 | (length(y_orig) - length(coef_names) - 1) 30 | var_betaHat <- sigma2 * inv_mat 31 | return(var_betaHat) 32 | } 33 | -------------------------------------------------------------------------------- /R/pvals.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | ## computing pvals for ridgeLinear models 2 | 3 | #' @rdname pvals 4 | #' @export 5 | #' @importFrom stats pnorm 6 | pvals.ridgeLinear <- function(x, ...) 7 | { 8 | automatic <- x$automatic 9 | chosen.nPCs <- x$chosen.nPCs 10 | max.nPCs <- x$max.nPCs 11 | isScaled <- x$isScaled 12 | beta <- x$coef 13 | names(beta) <- colnames(x$x) 14 | y <- x$y 15 | n <- length(y) 16 | svdX <- svd(x$x) 17 | U <- svdX$u 18 | D <- svdX$d 19 | D2 <- svdX$d^2 20 | V <- svdX$v 21 | lambda <- x$lambda 22 | div <- lapply(lambda, function(x) {D2 + x}) 23 | sig2hat <- apply(rbind(lambda, do.call(cbind, div)), 2, function(x) {as.numeric(crossprod(y - U %*% diag((D2)/(x[-1])) %*% t(U) %*% y)) / (n - sum(D2 * (D2 + 2 * x[1]) / (x[-1]^2)))}) 24 | varmat <- lapply(div, function(x){V %*% diag(D2 / (x^2)) %*% t(V)}) 25 | varmat <- mapply(function(x, y){x * y}, sig2hat, varmat, SIMPLIFY = FALSE) 26 | se <- lapply(varmat, function(x){sqrt(diag(x))}) 27 | se <- do.call(cbind, se) 28 | rownames(se) <- rownames(beta) 29 | colnames(se) <- colnames(beta) 30 | tstat <- abs(beta / se) 31 | pval <- 2 * (1 - pnorm(tstat)) 32 | res <- list(coef = beta, se = se, tstat = tstat, pval = pval, isScaled = isScaled, automatic = automatic, lambda = lambda, chosen.nPCs = chosen.nPCs, max.nPCs = max.nPCs) 33 | class(res) <- "pvalsRidgeLinear" 34 | res 35 | } 36 | -------------------------------------------------------------------------------- /src/computeLinearRidge.h: -------------------------------------------------------------------------------- 1 | /* computeLinearRidge.h is the header file for the source file computeLinearRidge.c */ 2 | 3 | /* This file defines the function computeLinearRidge, computeLinearGeneralizedRidge, computeLinGenExpRidge */ 4 | 5 | /* includes */ 6 | #include "depends.h" 7 | #ifdef HAVE_GSL_HEADER 8 | 9 | /* SVD of any matrix */ 10 | int svdAnyMat(gsl_matrix * X, 11 | gsl_matrix * U, 12 | gsl_matrix * V, 13 | gsl_vector * D); 14 | 15 | /* Prepare the lambdas */ 16 | int prepareLambdas(gsl_vector * y, 17 | gsl_matrix * U, 18 | gsl_vector * D2, 19 | gsl_vector * lambdaVeckHKB, 20 | char * skhkbfilename, 21 | char * sklwfilename, 22 | gsl_vector * lambdaVeckLW, 23 | int randomized, 24 | int s); 25 | 26 | /* compute pvals linear ridge regression */ 27 | void computeLinearPvalsApprox(gsl_matrix * X, 28 | gsl_vector * B, 29 | gsl_vector * y, 30 | double lambda, 31 | char * pvalsfilename); 32 | 33 | /* compute pvals linear ridge regression - Malo's method */ 34 | void computeLinearPvaslMalo(gsl_matrix * X, 35 | gsl_vector * B, 36 | gsl_vector * y, 37 | double lambda, 38 | char * pvalsfilename); 39 | 40 | /* compute pvals linear ridge regression - our method */ 41 | void computeLinearPvalsOurs(gsl_matrix * X, 42 | gsl_vector * B, 43 | gsl_vector * y, 44 | double lambda, 45 | char * pvalsfilename); 46 | 47 | #endif 48 | 49 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(coef,ridgeLinear) 4 | S3method(coef,ridgeLogistic) 5 | S3method(nobs,ridgeLinear) 6 | S3method(plot,pvalsRidgeLinear) 7 | S3method(plot,pvalsRidgeLogistic) 8 | S3method(plot,ridgeLinear) 9 | S3method(plot,ridgeLogistic) 10 | S3method(predict,ridgeLinear) 11 | S3method(predict,ridgeLogistic) 12 | S3method(print,pvalsRidgeLinear) 13 | S3method(print,pvalsRidgeLogistic) 14 | S3method(print,ridgeLinear) 15 | S3method(print,ridgeLogistic) 16 | S3method(print,summary.ridgeLinear) 17 | S3method(print,summary.ridgeLogistic) 18 | S3method(pvals,ridgeLinear) 19 | S3method(pvals,ridgeLogistic) 20 | S3method(summary,ridgeLinear) 21 | S3method(summary,ridgeLogistic) 22 | S3method(vcov,ridgeLinear) 23 | export(linearRidge) 24 | export(linearRidgeGenotypes) 25 | export(linearRidgeGenotypesPredict) 26 | export(logisticRidge) 27 | export(logisticRidgeGenotypes) 28 | export(pvals) 29 | importFrom(grDevices,rainbow) 30 | importFrom(graphics,abline) 31 | importFrom(graphics,lines) 32 | importFrom(graphics,plot) 33 | importFrom(stats,.checkMFClasses) 34 | importFrom(stats,.getXlevels) 35 | importFrom(stats,coef) 36 | importFrom(stats,delete.response) 37 | importFrom(stats,model.frame) 38 | importFrom(stats,model.matrix) 39 | importFrom(stats,model.response) 40 | importFrom(stats,na.pass) 41 | importFrom(stats,nobs) 42 | importFrom(stats,pnorm) 43 | importFrom(stats,predict) 44 | importFrom(stats,printCoefmat) 45 | importFrom(stats,terms) 46 | importFrom(utils,read.table) 47 | useDynLib(ridge, .registration=TRUE) 48 | -------------------------------------------------------------------------------- /R/predict.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | ## predict method for linear ridge regression models 2 | 3 | #' @rdname predict 4 | #' @export 5 | #' @importFrom stats na.pass terms model.matrix model.frame delete.response .checkMFClasses coef 6 | predict.ridgeLinear <- function(object, newdata, 7 | na.action = na.pass, all.coef = FALSE, ...) 8 | { 9 | tt <- terms(object) 10 | 11 | if (!inherits(object, "ridgeLinear")) { 12 | warning("calling predict.ridgeLinear() ...") 13 | } 14 | 15 | if (missing(newdata) || is.null(newdata)) { 16 | newdata <- object$model_frame 17 | } 18 | 19 | 20 | Terms <- delete.response(tt) 21 | m <- model.frame(Terms, newdata, na.action = na.action, 22 | xlev = object$xlevels) 23 | if (!is.null(cl <- attr(Terms, "dataClasses"))) 24 | .checkMFClasses(cl, m) 25 | mm <- X <- model.matrix(Terms, m, contrasts.arg = object$contrasts) 26 | offset <- rep(0, nrow(X)) 27 | if (!is.null(off.num <- attr(tt, "offset"))) 28 | for (i in off.num) offset <- offset + eval(attr(tt, 29 | "variables")[[i + 1]], newdata) 30 | if (!is.null(object$call$offset)) 31 | offset <- offset + eval(object$call$offset, newdata) 32 | 33 | beta <- coef(object, all.coef = all.coef) 34 | 35 | if(all.coef) 36 | res <- apply(beta, 1, function(x){drop(as.matrix(mm) %*% x)}) 37 | else 38 | res <- drop(as.matrix(mm) %*% beta) 39 | res 40 | } 41 | -------------------------------------------------------------------------------- /R/plot.pvalsRidgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## Plot the pval trace 2 | ## For pvalsRidgeLogistic objects 3 | 4 | #' @rdname plot 5 | #' @export 6 | #' @importFrom graphics plot lines abline 7 | #' @importFrom grDevices rainbow 8 | plot.pvalsRidgeLogistic <- function(x, y = NULL, ...) 9 | { 10 | lambda <- x$lambda 11 | automatic <- x$automatic 12 | nPCs <- x$max.nPCs 13 | pval <- -1 * log10(x$pval) 14 | pval[is.infinite(pval)] <- NA 15 | ## x is a pvalsRidgeLinear object 16 | if(length(lambda) == 1) 17 | { 18 | col.vector <- rainbow(length(pval)) 19 | plot(x = rep(lambda, length(pval)), y = pval, xlab = "lambda", ylab = "-log(10) pvalue", col = col.vector, pch = 19) 20 | } else { 21 | col.vector <- rainbow(nrow(pval)) 22 | if (automatic) { 23 | chosen.nPCs <- x$chosen.nPCs 24 | plot(x = seq(nPCs), y = pval[1,], ylim = c(0, max(pval, na.rm = TRUE)), xlab = "nPCs", ylab = "-log(10) pvalue", col = col.vector[1], type = "l", main = "pvalue trace") 25 | for(i in 2:nrow(pval)) 26 | { 27 | lines(x = seq(nPCs), y = pval[i,], col = col.vector[i]) 28 | } 29 | abline(v = chosen.nPCs, lty = 2) 30 | } else { 31 | plot(x = lambda, y = pval[1,], xlim=range(lambda), ylim = c(0, max(pval, na.rm = TRUE)), xlab = "lambda", ylab = "-log(10) pvalue", col = col.vector[1], type = "l", main = "pvalue trace") 32 | for(i in 2:nrow(pval)) 33 | { 34 | lines(x = lambda, y = pval[i,], col = col.vector[i]) 35 | } 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /R/plot.ridgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## plot the ridge trace 2 | ## for ridgeLogistic objects 3 | 4 | #' @rdname plot 5 | #' @export 6 | #' @importFrom stats coef 7 | #' @importFrom graphics plot lines abline 8 | #' @importFrom grDevices rainbow 9 | plot.ridgeLogistic <- function(x, y = NULL, ...) 10 | { 11 | Inter <- x$Inter 12 | lambda <- x$lambda 13 | automatic <- x$automatic 14 | nPCs <- x$max.nPCs 15 | coefs <- rbind(coef(x, all.coef = TRUE)) 16 | if(Inter) 17 | { 18 | coefs <- coefs[,-Inter] 19 | } 20 | ## x is a ridgeLinear object 21 | if(length(lambda) == 1) 22 | { 23 | col.vector <- rainbow(length(coefs)) 24 | plot(x = rep(lambda, length(coefs)), y = coefs, xlab = "lambda", ylab = "coefficient", col = col.vector, pch = 19) 25 | } else { 26 | col.vector <- rainbow(ncol(coefs)) 27 | if (automatic) { 28 | chosen.nPCs <- x$chosen.nPCs 29 | plot(x = seq(nPCs), y = coefs[,1], ylim = range(coefs), xlab = "nPCs", ylab = "coefficient", col = col.vector[1], type = "l", main = "ridge trace") 30 | for(i in 2:ncol(coefs)) 31 | { 32 | lines(x = seq(nPCs), y = coefs[,i], col = col.vector[i]) 33 | } 34 | abline(v = chosen.nPCs, lty = 2) 35 | } else { 36 | plot(x = lambda, y = coefs[,1], xlim=range(lambda), ylim = range(coefs), xlab = "lambda", ylab = "coefficient", col = col.vector[1], type = "l", main = "ridge trace") 37 | for(i in 2:ncol(coefs)) 38 | { 39 | lines(x = lambda, y = coefs[,i], col = col.vector[i]) 40 | } 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /R/plot.pvalsRidgeLinear.R: -------------------------------------------------------------------------------- 1 | ## Plot the pval trace 2 | ## For pvalsRidgeLinear objects 3 | 4 | #' @rdname plot 5 | #' @export 6 | #' @importFrom utils read.table 7 | #' @importFrom graphics plot lines abline 8 | #' @importFrom grDevices rainbow 9 | plot.pvalsRidgeLinear <- function(x, y = NULL, ...) 10 | { 11 | lambda <- x$lambda 12 | automatic <- x$automatic 13 | nPCs <- x$max.nPCs 14 | pval <- -1 * log10(x$pval) 15 | pval[is.infinite(pval)] <- NA 16 | ## x is a pvalsRidgeLinear object 17 | if(length(lambda) == 1) 18 | { 19 | col.vector <- rainbow(length(pval)) 20 | plot(x = rep(lambda, length(pval)), y = pval, xlab = "lambda", ylab = "-log(10) pvalue", col = col.vector, pch = 19, ...) 21 | } else { 22 | col.vector <- rainbow(nrow(pval)) 23 | if (automatic) { 24 | chosen.nPCs <- x$chosen.nPCs 25 | plot(x = seq(nPCs), y = pval[1,], ylim = c(0, max(pval, na.rm = TRUE)), xlab = "nPCs", ylab = "-log(10) pvalue", col = col.vector[1], type = "l", main = 26 | "pvalue trace", ...) 27 | for(i in 2:nrow(pval)) 28 | { 29 | lines(x = seq(nPCs), y = pval[i,], col = col.vector[i], ...) 30 | } 31 | abline(v = chosen.nPCs, lty = 2) 32 | } else { 33 | plot(x = lambda, y = pval[1,], xlim=range(lambda), ylim = c(0, max(pval, na.rm = TRUE)), xlab = "lambda", ylab = "-log(10) pvalue", col = col.vector[1], 34 | type = "l", main = "pvalue trace", ...) 35 | for(i in 2:nrow(pval)) 36 | { 37 | lines(x = lambda, y = pval[i,], col = col.vector[i], ...) 38 | } 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /R/plot.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | ## plot the ridge trace 2 | ## for ridgeLinear objects 3 | 4 | #' @rdname plot 5 | #' @export 6 | #' @importFrom stats coef 7 | #' @importFrom graphics plot lines abline 8 | #' @importFrom grDevices rainbow 9 | plot.ridgeLinear <- function(x, y = NULL, ...) 10 | { 11 | Inter <- x$Inter 12 | lambda <- x$lambda 13 | automatic <- x$automatic 14 | nPCs <- x$max.nPCs 15 | coefs <- rbind(coef(x, all.coef = TRUE)) 16 | if(Inter) 17 | { 18 | coefs <- coefs[,-Inter] 19 | } 20 | ## x is a ridgeLinear object 21 | if(length(lambda) == 1) 22 | { 23 | col.vector <- rainbow(length(coefs)) 24 | plot(x = rep(lambda, length(coefs)), y = coefs, xlab = "lambda", ylab = "coefficient", col = col.vector, pch = 19, ...) 25 | } else { 26 | col.vector <- rainbow(ncol(coefs)) 27 | if (automatic) { 28 | chosen.nPCs <- x$chosen.nPCs 29 | plot(x = seq(nPCs), y = coefs[,1], ylim = range(coefs), xlab = "nPCs", ylab = "coefficient", col = col.vector[1], type = "l", main = "ridge trace", ...) 30 | for(i in 2:ncol(coefs)) 31 | { 32 | lines(x = seq(nPCs), y = coefs[,i], col = col.vector[i], ...) 33 | } 34 | abline(v = chosen.nPCs, lty = 2) 35 | } else { 36 | plot(x = lambda, y = coefs[,1], xlim=range(lambda), ylim = range(coefs), xlab = "lambda", ylab = "coefficient", col = col.vector[1], type = "l", main = 37 | "ridge trace", ...) 38 | for(i in 2:ncol(coefs)) 39 | { 40 | lines(x = lambda, y = coefs[,i], col = col.vector[i], ...) 41 | } 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ridge Logo 2 | 3 | 4 | [![Project Status: Active The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 5 | [![R-CMD-check](https://github.com/SteffenMoritz/ridge/workflows/R-CMD-check/badge.svg)](https://github.com/SteffenMoritz/ridge/actions) 6 | [![CRAN Version](https://www.r-pkg.org/badges/version/ridge)](https://cran.r-project.org/package=ridge) 7 | [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/ridge)](https://cran.r-project.org/package=ridge) 8 | 9 | 10 | 11 | # ridge: Ridge Regression with automatic selection of the penalty parameter 12 | 13 | ### About 14 | The ridge package offers linear and logistic ridge regression, especially for small 15 | data sets and genome-wide SNP data. 16 | 17 | ### Support 18 | If you found a bug or have suggestions, feel free to get in contact via 19 | steffen.moritz10 at gmail.com 20 | 21 | Since I only have taken over this package (it was orphaned) - 22 | it might be that I can't help with all details. 23 | 24 | Be aware, that therefore I also can not guarantee 25 | to 100% that everything works as expected. 26 | 27 | ### Further information 28 | Erika Cule published also a scientific paper about the method, which you can find here: https://onlinelibrary.wiley.com/doi/abs/10.1002/gepi.21750 29 | 30 | Cule, Erika, and Maria De Iorio. "Ridge regression in prediction problems: automatic choice of the ridge parameter." Genetic epidemiology 37.7 (2013): 704-714. 31 | 32 | 33 | ### Version 34 | **3.3** 35 | 36 | ### License 37 | GPL-2 38 | 39 | -------------------------------------------------------------------------------- /R/pvals.ridgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## computing pvals for ridgeLogistic models 2 | 3 | #' @rdname pvals 4 | #' @export 5 | #' @importFrom stats pnorm 6 | pvals.ridgeLogistic <- function(x, ...) 7 | { 8 | automatic <- x$automatic 9 | chosen.nPCs <- x$chosen.nPCs 10 | max.nPCs <- x$max.nPCs 11 | isScaled <- x$isScaled 12 | B <- x$coef 13 | Inter <- x$Inter 14 | if(Inter) 15 | { 16 | X <- cbind(1,x$x) 17 | } else { 18 | X <- x$x 19 | } 20 | ## lambda may be a vector 21 | lambda <- x$lambda 22 | ## B may be a matrix 23 | xb <- apply(B, 2, function(x){X %*% x}) 24 | expXB <- exp(xb) 25 | p <- expXB / (1 + expXB) 26 | W <- vector("list", length = ncol(p)) 27 | for(i in seq(ncol(p))) 28 | { 29 | W[[i]] <- diag(p[,i]*(1-p[,i]),length(p[,i]),length(p[,i])) 30 | } 31 | KI <- lapply(lambda, function(x){diag(2 * x, dim(X)[2], dim(X)[2])}) 32 | if(Inter) 33 | { 34 | for(i in seq(length(lambda))) 35 | { 36 | KI[[i]][1,1] <- 0 37 | } 38 | } 39 | computeV <- function(W, KI) 40 | { 41 | V <- solve(t(X)%*%W%*%X+KI) %*% (t(X)%*%W%*%X) %*% solve(t(X)%*%W%*%X+KI) 42 | return(V) 43 | } 44 | V <- mapply("computeV", W, KI, SIMPLIFY = FALSE) 45 | se <- sapply(V, function(x){sqrt(diag(x))}) 46 | tstat <-B/se 47 | pval <- 2*(1 - pnorm(abs(tstat))) 48 | if(Inter) 49 | { 50 | B <- B[-1, ] 51 | se <- se[-1, ] 52 | tstat <- tstat[-1, ] 53 | pval <- pval[-1, ] 54 | } 55 | res <- list(coef = cbind(B), se = cbind(se), tstat = cbind(tstat), pval = cbind(pval), isScaled = isScaled, automatic = automatic, lambda = lambda, chosen.nPCs = chosen.nPCs, max.nPCs = max.nPCs) 56 | class(res) <- "pvalsRidgeLogistic" 57 | res 58 | } 59 | -------------------------------------------------------------------------------- /src/computePvals.h: -------------------------------------------------------------------------------- 1 | /* 2 | header file for computePvals.h 3 | Contains prototypes for all the computePvals functions we could need 4 | i.e. the approx ones and the permutation ones for 5 | different sorts of models 6 | */ 7 | 8 | /* includes */ 9 | #include "depends.h" 10 | #ifdef HAVE_GSL_HEADER 11 | 12 | #include "ReadInData.h" 13 | #include "ridgeRegressionFunctions.h" 14 | #include "coordinateDescent.h" 15 | 16 | 17 | /* float version of cumulative distribution function */ 18 | float my_ugaussian_function(float x); 19 | 20 | /* Compute Approx Ps - Linear */ 21 | int computeApproxPsLinear(GSL_TYPE(vector) * B, 22 | GSL_TYPE(vector) * y, 23 | GSL_TYPE(matrix) * U, 24 | GSL_TYPE(vector) * D, 25 | GSL_TYPE(vector) * D2, 26 | GSL_TYPE(matrix) * V, 27 | PREC k, 28 | GSL_TYPE(vector) * approxPs); 29 | 30 | /* Compute Approx Ps - Generalized Linear*/ 31 | int computeApproxPsGeneralizedLinear(GSL_TYPE(vector) * beta, 32 | GSL_TYPE(matrix) * predictors, 33 | GSL_TYPE(vector) * y, 34 | GSL_TYPE(vector) * shrinkage, 35 | int intercept_flag, 36 | GSL_TYPE(vector) * approxPs); 37 | 38 | /* Compute ApproxPs - Logistic */ 39 | int computeApproxPsLogistic(GSL_TYPE(vector) * B, 40 | GSL_TYPE(matrix) * X, 41 | GSL_TYPE(vector) * shrinkage, 42 | int intercept_flag, 43 | GSL_TYPE(vector) * approxPs); 44 | 45 | 46 | /* Compute PermPs - all models */ 47 | int computePermPs(GSL_TYPE(vector) * permPs, 48 | GSL_TYPE(matrix) * pred, 49 | GSL_TYPE(vector) * pheno_linear, 50 | gsl_vector_int * pheno_logistic, 51 | GSL_TYPE(vector) * Bridge, 52 | PREC lambda, 53 | GSL_TYPE(vector) * shrinkage, 54 | int NPERM, 55 | int SEED, 56 | int intercept_flag, 57 | char *model); 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /R/computeRidgeLogistic.R: -------------------------------------------------------------------------------- 1 | computeRidgeLogistic <- function(X, y, k, intercept = TRUE, doff = FALSE) 2 | { 3 | if(is.null(ncol(X))) 4 | { 5 | X <- cbind(X) 6 | } 7 | if(is.null(colnames(X))) 8 | { 9 | colnames(X) <- paste("pred", seq(ncol(X)), sep = "") 10 | } 11 | if(intercept) 12 | { 13 | X <- cbind(1, X) 14 | colnames(X)[1] <- "(Intercept)" 15 | } 16 | ##Initialize the estimate of B 17 | B <- numeric(dim(X)[2]) 18 | ##Initial objective function 19 | objOld <- objectiveFunction(B, X, y, k, intercept) 20 | ##Updated estimates of beta 21 | newB <- updateBeta(B, X, y, k, intercept, doff) 22 | if(doff) 23 | { 24 | objNew <- objectiveFunction(newB$updatedBeta, X, y, k, intercept) 25 | } else { 26 | objNew <- objectiveFunction(newB, X, y, k, intercept) 27 | } 28 | index <- 0 29 | while(abs(diff(c(objOld,objNew)))>10^-6) 30 | { 31 | index <- index+1 32 | objOld <- objNew 33 | if(doff) 34 | { 35 | B <- newB$updatedBeta 36 | } else { 37 | B <- newB 38 | } 39 | newB <- updateBeta(B, X, y, k, intercept, doff) 40 | if(doff){ 41 | objNew <- objectiveFunction(newB$updatedBeta, X, y, k, intercept) 42 | } else { 43 | objNew <- objectiveFunction(newB, X, y, k, intercept) 44 | } 45 | } 46 | if(doff) 47 | { 48 | B <- newB$updatedBeta 49 | W <- newB$W 50 | kI <- newB$kI 51 | } else { 52 | B <- newB 53 | } 54 | if(doff) 55 | { 56 | H <- W %*% X %*% solve(t(X) %*% W %*% X + kI) %*% t(X) 57 | doff <- c( sum(diag(H)) , sum(diag(H %*% t(H)))) 58 | return(list(B = B, doff = doff)) 59 | } 60 | else 61 | return(B) 62 | } 63 | -------------------------------------------------------------------------------- /src/config.h.in: -------------------------------------------------------------------------------- 1 | /* src/config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define to 1 if you have the header file. */ 4 | #undef HAVE_GSL_GSL_VERSION_H 5 | 6 | /* Description */ 7 | #undef HAVE_GSL_HEADER 8 | 9 | /* Define to 1 if you have the header file. */ 10 | #undef HAVE_INTTYPES_H 11 | 12 | /* Define to 1 if you have the header file. */ 13 | #undef HAVE_STDINT_H 14 | 15 | /* Define to 1 if you have the header file. */ 16 | #undef HAVE_STDIO_H 17 | 18 | /* Define to 1 if you have the header file. */ 19 | #undef HAVE_STDLIB_H 20 | 21 | /* Define to 1 if you have the header file. */ 22 | #undef HAVE_STRINGS_H 23 | 24 | /* Define to 1 if you have the header file. */ 25 | #undef HAVE_STRING_H 26 | 27 | /* Define to 1 if you have the header file. */ 28 | #undef HAVE_SYS_STAT_H 29 | 30 | /* Define to 1 if you have the header file. */ 31 | #undef HAVE_SYS_TYPES_H 32 | 33 | /* Define to 1 if you have the header file. */ 34 | #undef HAVE_UNISTD_H 35 | 36 | /* Define to the address where bug reports for this package should be sent. */ 37 | #undef PACKAGE_BUGREPORT 38 | 39 | /* Define to the full name of this package. */ 40 | #undef PACKAGE_NAME 41 | 42 | /* Define to the full name and version of this package. */ 43 | #undef PACKAGE_STRING 44 | 45 | /* Define to the one symbol short name of this package. */ 46 | #undef PACKAGE_TARNAME 47 | 48 | /* Define to the home page for this package. */ 49 | #undef PACKAGE_URL 50 | 51 | /* Define to the version of this package. */ 52 | #undef PACKAGE_VERSION 53 | 54 | /* Define to 1 if all of the C90 standard headers exist (not just the ones 55 | required in a freestanding environment). This macro is provided for 56 | backward compatibility; new code need not use it. */ 57 | #undef STDC_HEADERS 58 | -------------------------------------------------------------------------------- /man/ridge-internal.Rd: -------------------------------------------------------------------------------- 1 | \name{computeRidgeLogistic} 2 | \alias{computeRidgeLogistic} 3 | \alias{updateBeta} 4 | \alias{objectiveFunction} 5 | \title{ 6 | Internal functions for logistic ridge regression. 7 | } 8 | \description{ 9 | Internal functions for logisitc ridge regression. 10 | } 11 | \usage{ 12 | computeRidgeLogistic(X, y, k, intercept = TRUE, doff = FALSE) 13 | updateBeta(B, X, y, k, intercept = TRUE, doff = FALSE) 14 | objectiveFunction(B, X, y, k, intercept = TRUE) 15 | } 16 | \arguments{ 17 | \item{X}{ 18 | Matrix of predictors. 19 | } 20 | \item{y}{ 21 | vector of outcomes. 22 | } 23 | \item{k}{ 24 | ridge regression parameter. 25 | } 26 | \item{intercept}{ 27 | does the model have an intercept? 28 | } 29 | \item{doff}{ 30 | should degrees of freedom of the model be computed? 31 | } 32 | } 33 | \details{ 34 | These functions are called in the function \code{logisticRidge}. 35 | They are not for calling directly by the user. 36 | } 37 | \value{ 38 | \code{computeRidgeLogistic} returns the fitted logistic ridge regression coefficients. If \code{doff = TRUE} 39 | it also returns the degrees of freedom of the model and the degrees of freedom for variance. 40 | \code{updateBeta} returns the fitted coefficients after one iteration of the Newton-Raphson algorithm. If 41 | \code{doff = TRUE}, it also returns the penalty matrix and weights matrix used to compute the degrees of 42 | freedom. 43 | \code{objectiveFunction} returns the objective function for the current iteration of the Newton-Raphson 44 | algorithm. 45 | } 46 | \references{ 47 | A semi-automatic method to guide the choice of ridge parameter in ridge regression. Cule, E. and De Iorio, M. (2012) arXiv:1205.0686v1 [stat.AP] 48 | } 49 | \author{ 50 | Erika Cule 51 | } 52 | \note{ 53 | These functions are not to be called directly by the user. They should be called via \code{logisticRidge}. 54 | } 55 | 56 | \seealso{ 57 | \code{\link{logisticRidge}} 58 | } 59 | \keyword{internal} 60 | -------------------------------------------------------------------------------- /R/print.pvalsRidgeLinear.R: -------------------------------------------------------------------------------- 1 | ## print method for pvalsRidgeLinear objects 2 | 3 | #' @rdname print 4 | #' @export 5 | #' @importFrom stats printCoefmat 6 | print.pvalsRidgeLinear <- function(x, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"), all.coef = FALSE, ...) 7 | { 8 | if(x$automatic && all.coef == FALSE) 9 | { 10 | i <- x$chosen.nPCs 11 | coefs <- cbind(x$coef[,i], x$se[,i], x$tstat[,i], x$pval[,i]) 12 | rownames(coefs) <- rownames(x$coef) 13 | if(x$isScaled) 14 | { 15 | colnames(coefs) <- c("Estimate (scaled)", "Std. Error (scaled)", "t value (scaled)", "Pr(>|t|)") 16 | } else { 17 | colnames(coefs) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") 18 | } 19 | cat(gettextf("\n\tlambda %f chosen automatically using %d PCs\n\n", x$lambda[i], x$chosen.nPCs)) 20 | printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 21 | na.print = "NA", ...) 22 | } else { 23 | for(i in seq(length(x$lambda))) 24 | { 25 | coefs <- cbind(x$coef[,i], x$se[,i], x$tstat[,i], x$pval[,i]) 26 | rownames(coefs) <- rownames(x$coef) 27 | if(x$isScaled) 28 | { 29 | colnames(coefs) <- c("Estimate (scaled)", "Std. Error (scaled)", "t value (scaled)", "Pr(>|t|)") 30 | } else { 31 | colnames(coefs) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") 32 | } 33 | cat(gettextf("\nlambda %f", x$lambda[i])) 34 | if(x$automatic && (x$chosen.nPCs == i)) 35 | cat(", chosen automatically") 36 | if(!is.null(x$max.nPCs)) 37 | cat(gettextf(", computed using %d PCs\n", i)) 38 | cat("\n") 39 | printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 40 | na.print = "NA", ...) 41 | } 42 | } 43 | invisible(x) 44 | } 45 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | 2 | AC_INIT([ridge],[version-3.3],[steffen.moritz10@gmail.com],[ridge_3.3.tar.gz],[http://cran.r-project.org/web/packages/ridge]) 3 | 4 | AC_CONFIG_HEADERS([src/config.h]) 5 | 6 | 7 | : ${R_HOME=`R RHOME`} 8 | if test -z "${R_HOME}"; then 9 | echo "could not determine R_HOME" 10 | exit 1 11 | fi 12 | CC=`"${R_HOME}/bin/R" CMD config CC` 13 | CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` 14 | CPPFLAGS=`"${R_HOME}/bin/R" CMD config CPPFLAGS` 15 | LDFLAGS=`"${R_HOME}/bin/R" CMD config LDFLAGS` 16 | 17 | # Checks for common programs using default macros 18 | AC_PROG_CC 19 | 20 | ## Use gsl-config to find arguments for compiler and linker flags 21 | ## 22 | ## Check for non-standard programs: gsl-config(1) 23 | AC_PATH_PROG([GSL_CONFIG], [gsl-config]) 24 | ## If gsl-config was found, let's use it 25 | if test "${GSL_CONFIG}" != ""; then 26 | # Use gsl-config for header and linker arguments 27 | GSL_CFLAGS=`${GSL_CONFIG} --cflags` 28 | GSL_LIBS=`${GSL_CONFIG} --libs` 29 | HAVE_GSL=TRUE 30 | else 31 | AC_MSG_WARN([gsl-config not found, is GSL installed?]) 32 | AC_MSG_WARN([ridge will be installed but some functions will be unavailable]) 33 | HAVE_GSL=FALSE 34 | fi 35 | 36 | if test "${HAVE_GSL}" = TRUE; then 37 | dnl Define HAVE_GSL_H in src/config.h 38 | AC_CHECK_HEADERS([gsl/gsl_version.h], 39 | AC_DEFINE(HAVE_GSL_HEADER,[], [Description]), [HAVE_GSL=FALSE]) 40 | if test "${HAVE_GSL}" = FALSE; then 41 | AC_MSG_WARN([gsl headers not found, perhaps check the path?]) 42 | AC_MSG_WARN([ridge will be installed, but some functions will be unavailable]) 43 | fi 44 | fi 45 | 46 | 47 | dnl Substitute HAVE_GSL in the functions R/l*Genotypes*.R.in 48 | AC_SUBST(HAVE_GSL) 49 | AC_CONFIG_FILES([R/l*Genotypes*.R]) 50 | 51 | 52 | 53 | 54 | dnl Now substitute these variables in src/Makevars.in to create src/Makevars 55 | if test "${HAVE_GSL}" = TRUE; then 56 | AC_SUBST(GSL_CFLAGS) 57 | AC_SUBST(GSL_LIBS) 58 | AC_CONFIG_FILES([src/Makevars]) 59 | AC_OUTPUT 60 | fi -------------------------------------------------------------------------------- /R/print.pvalsRidgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## print method for pvalsRidgeLogistic objects 2 | 3 | #' @rdname print 4 | #' @export 5 | #' @importFrom stats printCoefmat 6 | print.pvalsRidgeLogistic <- function(x, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"), all.coef = FALSE, ...) 7 | { 8 | if(x$automatic && all.coef == FALSE) 9 | { 10 | i <- x$chosen.nPCs 11 | coefs <- cbind(x$coef[,i], x$se[,i], x$tstat[,i], x$pval[,i]) 12 | rownames(coefs) <- rownames(x$coef) 13 | if(x$isScaled) 14 | { 15 | colnames(coefs) <- c("Estimate (scaled)", "Std. Error (scaled)", "t value (scaled)", "Pr(>|t|)") 16 | } else { 17 | colnames(coefs) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") 18 | } 19 | cat(gettextf("\n\tlambda %f chosen automatically using %d PCs\n\n", x$lambda[i], x$chosen.nPCs)) 20 | printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 21 | na.print = "NA", ...) 22 | } else { 23 | for(i in seq(length(x$lambda))) 24 | { 25 | coefs <- cbind(x$coef[,i], x$se[,i], x$tstat[,i], x$pval[,i]) 26 | rownames(coefs) <- rownames(x$coef) 27 | if(x$isScaled) 28 | { 29 | colnames(coefs) <- c("Estimate (scaled)", "Std. Error (scaled)", "t value (scaled)", "Pr(>|t|)") 30 | } else { 31 | colnames(coefs) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") 32 | } 33 | cat(gettextf("\nlambda %f", x$lambda[i])) 34 | if(x$automatic && (x$chosen.nPCs == i)) 35 | cat(", chosen automatically") 36 | if(!is.null(x$max.nPCs)) 37 | cat(gettextf(", computed using %d PCs\n", i)) 38 | cat("\n") 39 | printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 40 | na.print = "NA", ...) 41 | } 42 | } 43 | invisible(x) 44 | } 45 | 46 | -------------------------------------------------------------------------------- /R/predict.ridgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## predict method for logistic ridge regression models 2 | 3 | ## fitted.values are the predicted probabilities 4 | ## i.e. exp(XB) / (1 + exp(XB)) 5 | ## linear.predictors are the scores (X %*% B) 6 | ## on the scale of the original data 7 | 8 | ## predict.glm code 9 | 10 | #' @rdname predict 11 | #' @export 12 | #' @importFrom stats na.pass terms model.frame delete.response .checkMFClasses model.matrix coef 13 | predict.ridgeLogistic <- function (object, newdata = NULL, type = c("link", "response"), 14 | na.action = na.pass, all.coef = FALSE, ...) 15 | { 16 | tt <- terms(object) 17 | type <- match.arg(type) ## Match the type argument 18 | na.act <- object$na.action ## Get the na.action statement 19 | object$na.action <- NULL ## Set object$na.action to NULL 20 | 21 | if (missing(newdata) || is.null(newdata)) { ## If there is no newdata 22 | newdata <- object$model_frame 23 | } 24 | 25 | Terms <- delete.response(tt) 26 | m <- model.frame(Terms, newdata, na.action = na.action, 27 | xlev = object$xlevels) 28 | if (!is.null(cl <- attr(Terms, "dataClasses"))) 29 | .checkMFClasses(cl, m) 30 | mm <- X <- model.matrix(Terms, m) 31 | offset <- rep(0, nrow(X)) 32 | if (!is.null(off.num <- attr(tt, "offset"))) 33 | for (i in off.num) offset <- offset + eval(attr(tt, 34 | "variables")[[i + 1]], newdata) 35 | if (!is.null(object$call$offset)) 36 | offset <- offset + eval(object$call$offset, newdata) 37 | 38 | hasintercept <- attr(tt, "intercept") 39 | 40 | ll <- attr(tt, "term.labels") 41 | 42 | if(hasintercept) 43 | mm <- cbind(1, X[,ll]) 44 | else 45 | mm <- X[,ll] 46 | B <- coef(object, all.coef = all.coef) 47 | if(all.coef) 48 | { 49 | XB <- apply(B, 1, function(x) {as.matrix(X) %*% x}) 50 | } else { 51 | XB <- as.matrix(X) %*% B 52 | } 53 | expXB <- exp(XB) 54 | p <- expXB / (1 + expXB) 55 | pred <- switch(type, link = XB, response = p) 56 | 57 | pred 58 | } 59 | -------------------------------------------------------------------------------- /src/config.h: -------------------------------------------------------------------------------- 1 | /* src/config.h. Generated from config.h.in by configure. */ 2 | /* src/config.h.in. Generated from configure.ac by autoheader. */ 3 | 4 | /* Define to 1 if you have the header file. */ 5 | #define HAVE_GSL_GSL_VERSION_H 1 6 | 7 | /* Description */ 8 | #define HAVE_GSL_HEADER /**/ 9 | 10 | /* Define to 1 if you have the header file. */ 11 | #define HAVE_INTTYPES_H 1 12 | 13 | /* Define to 1 if you have the header file. */ 14 | #define HAVE_STDINT_H 1 15 | 16 | /* Define to 1 if you have the header file. */ 17 | #define HAVE_STDIO_H 1 18 | 19 | /* Define to 1 if you have the header file. */ 20 | #define HAVE_STDLIB_H 1 21 | 22 | /* Define to 1 if you have the header file. */ 23 | #define HAVE_STRINGS_H 1 24 | 25 | /* Define to 1 if you have the header file. */ 26 | #define HAVE_STRING_H 1 27 | 28 | /* Define to 1 if you have the header file. */ 29 | #define HAVE_SYS_STAT_H 1 30 | 31 | /* Define to 1 if you have the header file. */ 32 | #define HAVE_SYS_TYPES_H 1 33 | 34 | /* Define to 1 if you have the header file. */ 35 | #define HAVE_UNISTD_H 1 36 | 37 | /* Define to the address where bug reports for this package should be sent. */ 38 | #define PACKAGE_BUGREPORT "steffen.moritz10@gmail.com" 39 | 40 | /* Define to the full name of this package. */ 41 | #define PACKAGE_NAME "ridge" 42 | 43 | /* Define to the full name and version of this package. */ 44 | #define PACKAGE_STRING "ridge version-3.2" 45 | 46 | /* Define to the one symbol short name of this package. */ 47 | #define PACKAGE_TARNAME "ridge_3.2.tar.gz" 48 | 49 | /* Define to the home page for this package. */ 50 | #define PACKAGE_URL "http://cran.r-project.org/web/packages/ridge" 51 | 52 | /* Define to the version of this package. */ 53 | #define PACKAGE_VERSION "version-3.2" 54 | 55 | /* Define to 1 if all of the C90 standard headers exist (not just the ones 56 | required in a freestanding environment). This macro is provided for 57 | backward compatibility; new code need not use it. */ 58 | #define STDC_HEADERS 1 59 | -------------------------------------------------------------------------------- /src/linearFunctions.c: -------------------------------------------------------------------------------- 1 | #include "linearFunctions.h" 2 | #ifdef HAVE_GSL_HEADER 3 | 4 | GSL_TYPE(vector) * readLinearPhenotypes(char * phenotypefilename, 5 | int NINDIV) 6 | { 7 | GSL_TYPE(vector) * phen = GSL_FUNCTION(vector,calloc)(NINDIV); 8 | /* Open a file for reading */ 9 | FILE * phenofile = fopen(phenotypefilename,"r"); 10 | /* Scan the phenotypes into the int vector */ 11 | GSL_FUNCTION(vector,fscanf)(phenofile, phen); 12 | /* Close the file for reading */ 13 | fclose(phenofile); 14 | return phen; 15 | } 16 | 17 | /* */ 18 | 19 | /* compute Kr based on a, Z, y and r */ 20 | 21 | int computeLinearKr(GSL_TYPE(vector) * a, 22 | GSL_TYPE(matrix) * Z, 23 | GSL_TYPE(vector) * y, 24 | GSL_TYPE(vector) * D2, 25 | int r, 26 | PREC * kr, 27 | PREC * DofF) 28 | { 29 | // n is the number of individuals 30 | int n = y->size; 31 | // A vector view of a 32 | GSL_FUNCTION(vector,view) a_view = GSL_FUNCTION(vector,subvector)(a, 0, r); 33 | // The denominator of Kr 34 | PREC denom = 0.0; 35 | // Compute it 36 | GSL_BLAS_FUNCTION(dot)(&a_view.vector, &a_view.vector, &denom); 37 | // A matrix view of W 38 | GSL_FUNCTION(matrix,view) Zview = GSL_FUNCTION(matrix,submatrix)(Z, 0, 0, n, r); 39 | // Make Wview * a_view 40 | // A vector for the residuals 41 | GSL_TYPE(vector) * resid = GSL_FUNCTION(vector,calloc)(n); 42 | // Make the fitted ys and put them in the resid vector 43 | BLAS_FUNCTION(gemv)(CblasNoTrans, 1.0, &Zview.matrix, &a_view.vector, 0.0, resid); 44 | // Make the residual vector 45 | GSL_FUNCTION(vector,scale)(resid, -1); 46 | GSL_FUNCTION(vector,add)(resid, y); 47 | // Make the crossproduct 48 | PREC crossprod = 0.0; 49 | GSL_BLAS_FUNCTION(dot)(resid, resid, &crossprod); 50 | /* Divide it by n - r */ 51 | crossprod = crossprod / ((PREC)n - (PREC) r); 52 | // times it by i 53 | PREC numerator = ((PREC) r) * crossprod; 54 | *kr = numerator / denom; 55 | GSL_FUNCTION(vector,free)(resid); 56 | /* Compute DofF */ 57 | computeDofF(D2, *kr, DofF); 58 | return 0; 59 | } 60 | 61 | #endif 62 | 63 | typedef int make_iso_compilers_happy; 64 | -------------------------------------------------------------------------------- /src/depends.h: -------------------------------------------------------------------------------- 1 | /* Pre-requisites for the package regression */ 2 | #include 3 | #include "config.h" 4 | 5 | #ifdef HAVE_GSL_HEADER 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "R.h" 30 | 31 | #if _CUDA_ 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #endif 38 | 39 | #if _CUDA_ 40 | #define PREC float 41 | #define PREC_EPS FLT_EPSILON 42 | #define GSL_TYPE(type) gsl_ ## type ## _float 43 | #define GSL_FUNCTION(type,name) gsl_ ## type ## _float_ ## name 44 | #define GSL_STATS_FUNCTION(name) gsl_stats ## _float_ ## name 45 | #define MATHS_FUNCTION(name) name ## f 46 | #define SVD_FUNCTION svdAnyMatCuda 47 | #define BLAS_FUNCTION(name) my_cula_s ## name 48 | #define MY_FUNCTION(name) my_cula_ ## name 49 | #define UGAUSSIAN_FUNCTION my_ugaussian_function 50 | #define GSL_BLAS_FUNCTION(name) gsl_blas_s ## name 51 | #define PREPARE_FUNCTION(name) prepare ## name ## ForCoordinateDescentCuda 52 | #define PREC_DIFF -4 53 | #else 54 | #define PREC double 55 | #define PREC_EPS DBL_EPSILON 56 | #define GSL_TYPE(type) gsl_ ## type 57 | #define GSL_FUNCTION(type,name) gsl_ ## type ## _ ## name 58 | #define GSL_STATS_FUNCTION(name) gsl_stats ## _ ## name 59 | #define MATHS_FUNCTION(name) name 60 | #define SVD_FUNCTION svdAnyMat 61 | #define BLAS_FUNCTION(name) gsl_blas_d ## name 62 | #define MY_FUNCTION(name) my_gsl_ ## name 63 | #define UGAUSSIAN_FUNCTION gsl_cdf_ugaussian_P 64 | #define GSL_BLAS_FUNCTION(name) gsl_blas_d ## name 65 | #define PREPARE_FUNCTION(name) prepare ## name ## ForCoordinateDescent 66 | #define PREC_DIFF -6 67 | #endif 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /R/print.summary.ridgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## S3 method for class 'summary.ridgeLogistic' 2 | 3 | #' @rdname print 4 | #' @export 5 | #' @importFrom stats printCoefmat 6 | print.summary.ridgeLogistic <- function(x, digits = max(3, getOption("digits") - 3), 7 | signif.stars = getOption("show.signif.stars"), ...) 8 | { 9 | summaries <- x$summaries 10 | cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), 11 | "\n\n", sep = "") 12 | if(x$automatic && !x$all.coef) 13 | { 14 | chosenSummary <- summaries[[x$chosen.nPCs]] 15 | cat("\nCoefficients:\n") 16 | coefs <- chosenSummary$coefficients 17 | printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 18 | na.print = "NA", ...) 19 | cat("\nRidge paramter:", chosenSummary$lambda) 20 | if(x$automatic) 21 | cat(", chosen automatically") 22 | if(!is.null(x$chosen.nPCs)) 23 | cat(gettextf(", computed using %d PCs\n", x$chosen.nPCs)) 24 | else 25 | cat("\n") 26 | ## df: degrees of freedom: model, variance 27 | cat("\nDegrees of freedom: model", format(signif(chosenSummary$df[1], digits)), ", variance", format(signif(chosenSummary$df[2], digits)), "\n") 28 | cat("\n") 29 | } else { 30 | for(i in seq(length(x$summaries))) 31 | { 32 | chosenSummary <- summaries[[i]] 33 | cat("\nCoefficients:\n") 34 | coefs <- chosenSummary$coefficients 35 | printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 36 | na.print = "NA", ...) 37 | cat("\nRidge paramter:", chosenSummary$lambda) 38 | if(x$automatic && x$chosen.nPCs == i) 39 | cat(", chosen automatically") 40 | if(!is.null(chosenSummary$nPCs)) 41 | cat(gettextf(", computed using %d PCs\n", chosenSummary$nPCs)) 42 | else 43 | cat("\n") 44 | ## df: degrees of freedom: model, variance 45 | cat("\nDegrees of freedom: model", format(signif(chosenSummary$df[1], digits)), ", variance", format(signif(chosenSummary$df[2], digits)), "\n") 46 | invisible(x) 47 | } ## Ends for i in seq(length(x$summaries)) 48 | } ## Ends else 49 | } ## Ends function 50 | -------------------------------------------------------------------------------- /R/logisticRidgeGenotypesPredict.R: -------------------------------------------------------------------------------- 1 | ## Logistic Ridge Big Predict function (calls C) 2 | 3 | logisticRidgeGenotypesPredict <- function(genotypesfilename, 4 | betafilename, 5 | phenotypesfilename = NULL, 6 | verbose = FALSE) 7 | { 8 | if(!TRUE) 9 | stop("GSL >=1.14 is not installed, you cannot use this function") 10 | ## Tilde expansion of genotypesfilename 11 | ## (Because the C code cannot cope with the tilde) 12 | genotypesfilename <- path.expand(genotypesfilename) 13 | ## Check phenotypes file for reading 14 | ## mode = 4 tests for read permission 15 | if(file.access(names = genotypesfilename, mode = 4)) 16 | stop(gettextf("Cannot open file %s for reading", genotypesfilename)) 17 | ## Tilde expansion of betafilename 18 | ## (Because the C code cannot cope with the tilde) 19 | betafilename <- path.expand(betafilename) 20 | ## Check genotypes file for reading 21 | ## mode = 4 tests for read permission 22 | if(file.access(names = betafilename, mode = 4)) 23 | stop(gettextf("Cannot open file %s for reading", betafilename)) 24 | ## Check beta file name is set 25 | ## If it is not set it to beta.dat (print a warning) 26 | if(is.null(phenotypesfilename)) 27 | { 28 | phenotypesFileExists <- FALSE 29 | phenotypesfilename <- tempfile(pattern = "beta", fileext = ".dat") 30 | } else { 31 | phenotypesFileExists <- TRUE 32 | ## Else do the tilde expansion on betafilename 33 | ## (Because the C code cannot cope with the tilde) 34 | phenotypesfilename <- path.expand(phenotypesfilename) 35 | } 36 | res <- .C(regression_wrapper_function, 37 | genofilename = as.character(genotypesfilename), 38 | phenofilename = as.character(phenotypesfilename), 39 | betafilename = as.character(betafilename), 40 | approxfilename = as.character("NULL"), 41 | permfilename = as.character("NULL"), 42 | thinfilename = as.character("NULL"), 43 | intercept = as.integer(1), 44 | lambda = as.double(-1), 45 | model = as.character("logistic"), 46 | predict = as.integer(1), 47 | verbose = as.integer(verbose)) 48 | y <- read.table(phenotypesfilename, colClasses = c("numeric"), col.names = c("PredictedPhenotypes")) 49 | if(!phenotypesFileExists) 50 | unlink(phenotypesfilename) 51 | return(y) 52 | } 53 | -------------------------------------------------------------------------------- /R/logisticRidgeGenotypesPredict.R.in: -------------------------------------------------------------------------------- 1 | ## Logistic Ridge Big Predict function (calls C) 2 | 3 | logisticRidgeGenotypesPredict <- function(genotypesfilename, 4 | betafilename, 5 | phenotypesfilename = NULL, 6 | verbose = FALSE) 7 | { 8 | if(!@HAVE_GSL@) 9 | stop("GSL >=1.14 is not installed, you cannot use this function") 10 | ## Tilde expansion of genotypesfilename 11 | ## (Because the C code cannot cope with the tilde) 12 | genotypesfilename <- path.expand(genotypesfilename) 13 | ## Check phenotypes file for reading 14 | ## mode = 4 tests for read permission 15 | if(file.access(names = genotypesfilename, mode = 4)) 16 | stop(gettextf("Cannot open file %s for reading", genotypesfilename)) 17 | ## Tilde expansion of betafilename 18 | ## (Because the C code cannot cope with the tilde) 19 | betafilename <- path.expand(betafilename) 20 | ## Check genotypes file for reading 21 | ## mode = 4 tests for read permission 22 | if(file.access(names = betafilename, mode = 4)) 23 | stop(gettextf("Cannot open file %s for reading", betafilename)) 24 | ## Check beta file name is set 25 | ## If it is not set it to beta.dat (print a warning) 26 | if(is.null(phenotypesfilename)) 27 | { 28 | phenotypesFileExists <- FALSE 29 | phenotypesfilename <- tempfile(pattern = "beta", fileext = ".dat") 30 | } else { 31 | phenotypesFileExists <- TRUE 32 | ## Else do the tilde expansion on betafilename 33 | ## (Because the C code cannot cope with the tilde) 34 | phenotypesfilename <- path.expand(phenotypesfilename) 35 | } 36 | res <- .C(regression_wrapper_function, 37 | genofilename = as.character(genotypesfilename), 38 | phenofilename = as.character(phenotypesfilename), 39 | betafilename = as.character(betafilename), 40 | approxfilename = as.character("NULL"), 41 | permfilename = as.character("NULL"), 42 | thinfilename = as.character("NULL"), 43 | intercept = as.integer(1), 44 | lambda = as.double(-1), 45 | model = as.character("logistic"), 46 | predict = as.integer(1), 47 | verbose = as.integer(verbose)) 48 | y <- read.table(phenotypesfilename, colClasses = c("numeric"), col.names = c("PredictedPhenotypes")) 49 | if(!phenotypesFileExists) 50 | unlink(phenotypesfilename) 51 | return(y) 52 | } 53 | -------------------------------------------------------------------------------- /R/linearRidgeGenotypesPredict.R: -------------------------------------------------------------------------------- 1 | ## Linear Ridge Big Predict function (calls C) 2 | 3 | #' @export 4 | #' @importFrom utils read.table 5 | linearRidgeGenotypesPredict <- function(genotypesfilename, 6 | betafilename, 7 | phenotypesfilename = NULL, 8 | verbose = FALSE) 9 | { 10 | if(!TRUE) 11 | stop("GSL >=1.14 is not installed, you cannot use this function") 12 | ## Tilde expansion of genotypesfilename 13 | ## (Because the C code cannot cope with the tilde) 14 | genotypesfilename <- path.expand(genotypesfilename) 15 | ## Check phenotypes file for reading 16 | ## mode = 4 tests for read permission 17 | if(file.access(names = genotypesfilename, mode = 4)) 18 | stop(gettextf("Cannot open file %s for reading", genotypesfilename)) 19 | ## Tilde expansion of betafilename 20 | ## (Because the C code cannot cope with the tilde) 21 | betafilename <- path.expand(betafilename) 22 | ## Check genotypes file for reading 23 | ## mode = 4 tests for read permission 24 | if(file.access(names = betafilename, mode = 4)) 25 | stop(gettextf("Cannot open file %s for reading", betafilename)) 26 | ## Check beta file name is set 27 | ## If it is not set it to beta.dat (print a warning) 28 | if(is.null(phenotypesfilename)) 29 | { 30 | phenotypesFileExists <- FALSE 31 | phenotypesfilename <- tempfile(pattern = "beta", fileext = ".dat") 32 | } else { 33 | phenotypesFileExists <- TRUE 34 | ## Else do the tilde expansion on betafilename 35 | ## (Because the C code cannot cope with the tilde) 36 | phenotypesfilename <- path.expand(phenotypesfilename) 37 | } 38 | res <- .C(regression_wrapper_function, 39 | genofilename = as.character(genotypesfilename), 40 | phenofilename = as.character(phenotypesfilename), 41 | betafilename = as.character(betafilename), 42 | approxfilename = as.character("NULL"), 43 | permfilename = as.character("NULL"), 44 | thinfilename = as.character("NULL"), 45 | intercept = as.integer(1), 46 | lambda = as.double(-1), 47 | model = as.character("linear"), 48 | predict = as.integer(1), 49 | verbose = as.integer(verbose)) 50 | y <- read.table(phenotypesfilename, colClasses = c("numeric"), col.names = c("PredictedPhenotypes")) 51 | if(!phenotypesFileExists) 52 | unlink(phenotypesfilename) 53 | return(y) 54 | } 55 | -------------------------------------------------------------------------------- /R/linearRidgeGenotypesPredict.R.in: -------------------------------------------------------------------------------- 1 | ## Linear Ridge Big Predict function (calls C) 2 | 3 | #' @export 4 | #' @importFrom utils read.table 5 | linearRidgeGenotypesPredict <- function(genotypesfilename, 6 | betafilename, 7 | phenotypesfilename = NULL, 8 | verbose = FALSE) 9 | { 10 | if(!@HAVE_GSL@) 11 | stop("GSL >=1.14 is not installed, you cannot use this function") 12 | ## Tilde expansion of genotypesfilename 13 | ## (Because the C code cannot cope with the tilde) 14 | genotypesfilename <- path.expand(genotypesfilename) 15 | ## Check phenotypes file for reading 16 | ## mode = 4 tests for read permission 17 | if(file.access(names = genotypesfilename, mode = 4)) 18 | stop(gettextf("Cannot open file %s for reading", genotypesfilename)) 19 | ## Tilde expansion of betafilename 20 | ## (Because the C code cannot cope with the tilde) 21 | betafilename <- path.expand(betafilename) 22 | ## Check genotypes file for reading 23 | ## mode = 4 tests for read permission 24 | if(file.access(names = betafilename, mode = 4)) 25 | stop(gettextf("Cannot open file %s for reading", betafilename)) 26 | ## Check beta file name is set 27 | ## If it is not set it to beta.dat (print a warning) 28 | if(is.null(phenotypesfilename)) 29 | { 30 | phenotypesFileExists <- FALSE 31 | phenotypesfilename <- tempfile(pattern = "beta", fileext = ".dat") 32 | } else { 33 | phenotypesFileExists <- TRUE 34 | ## Else do the tilde expansion on betafilename 35 | ## (Because the C code cannot cope with the tilde) 36 | phenotypesfilename <- path.expand(phenotypesfilename) 37 | } 38 | res <- .C(regression_wrapper_function, 39 | genofilename = as.character(genotypesfilename), 40 | phenofilename = as.character(phenotypesfilename), 41 | betafilename = as.character(betafilename), 42 | approxfilename = as.character("NULL"), 43 | permfilename = as.character("NULL"), 44 | thinfilename = as.character("NULL"), 45 | intercept = as.integer(1), 46 | lambda = as.double(-1), 47 | model = as.character("linear"), 48 | predict = as.integer(1), 49 | verbose = as.integer(verbose)) 50 | y <- read.table(phenotypesfilename, colClasses = c("numeric"), col.names = c("PredictedPhenotypes")) 51 | if(!phenotypesFileExists) 52 | unlink(phenotypesfilename) 53 | return(y) 54 | } 55 | -------------------------------------------------------------------------------- /R/print.summary.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | ## S3 method for class 'summary.ridgeLinear' 2 | 3 | #' @rdname print 4 | #' @export 5 | #' @importFrom stats printCoefmat 6 | print.summary.ridgeLinear <- function(x, digits = max(3, getOption("digits") - 3), 7 | signif.stars = getOption("show.signif.stars"), ...) 8 | { 9 | summaries <- x$summaries 10 | cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), 11 | "\n\n", sep = "") 12 | if(x$automatic && !x$all.coef) 13 | { 14 | chosenSummary <- summaries[[x$chosen.nPCs]] 15 | cat("\nCoefficients:\n") 16 | coefs <- chosenSummary$coefficients 17 | printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 18 | na.print = "NA", ...) 19 | cat("\nRidge parameter:", chosenSummary$lambda) 20 | if(x$automatic) 21 | cat(", chosen automatically") 22 | if(!is.null(x$chosen.nPCs)) 23 | cat(gettextf(", computed using %d PCs\n", x$chosen.nPCs)) 24 | else 25 | cat("\n") 26 | ## df: degrees of freedom: model, variance, residual 27 | cat("\nDegrees of freedom: model", format(signif(chosenSummary$df[1], digits)), ", variance", format(signif(chosenSummary$df[2], digits)), ", residual", format(signif(chosenSummary$df[3], digits)), "\n") 28 | cat("\n") 29 | } else { 30 | ## Want to mark out the best chosen lambda 31 | ## in this bit 32 | for(i in seq(length(x$summaries))) 33 | { 34 | chosenSummary <- summaries[[i]] 35 | cat("\nCoefficients:\n") 36 | coefs <- chosenSummary$coefficients 37 | printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 38 | na.print = "NA", ...) 39 | cat("\nRidge parameter:", chosenSummary$lambda) 40 | if(x$automatic && (x$chosen.nPCs == i)) 41 | cat(", chosen automatically") 42 | if(!is.null(chosenSummary$nPCs)) 43 | cat(gettextf(", computed using %d PCs\n", chosenSummary$nPCs)) 44 | else 45 | cat("\n") 46 | ## df: degrees of freedom: model, variance, residual 47 | cat("\nDegrees of freedom: model", format(signif(chosenSummary$df[1], digits)), ", variance", format(signif(chosenSummary$df[2], digits)), ", residual", format(signif(chosenSummary$df[3], digits)), "\n") 48 | cat("\n") 49 | invisible(x) 50 | } ## Ends for i in seq(length(x$summaries)) 51 | } ## Ends else 52 | } 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow is overkill for most R packages and 5 | # check-standard.yaml is likely a better choice. 6 | # usethis::use_github_action("check-standard") will install it. 7 | on: 8 | push: 9 | branches: [main, master] 10 | pull_request: 11 | branches: [main, master] 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | - {os: macOS-latest, r: 'release'} 26 | 27 | # Need to figure out how to install gsl for windows 28 | # - {os: windows-latest, r: 'release'} 29 | # Use 3.6 to trigger usage of RTools35 30 | # - {os: windows-latest, r: '3.6'} 31 | 32 | # Use older ubuntu to maximise backward compatibility 33 | - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'} 34 | - {os: ubuntu-18.04, r: 'release'} 35 | - {os: ubuntu-18.04, r: 'oldrel-1'} 36 | - {os: ubuntu-18.04, r: 'oldrel-2'} 37 | - {os: ubuntu-18.04, r: 'oldrel-3'} 38 | - {os: ubuntu-18.04, r: 'oldrel-4'} 39 | 40 | env: 41 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 42 | R_KEEP_PKG_SOURCE: yes 43 | 44 | steps: 45 | - uses: actions/checkout@v2 46 | 47 | - uses: r-lib/actions/setup-pandoc@v1 48 | 49 | - uses: r-lib/actions/setup-r@v1 50 | with: 51 | r-version: ${{ matrix.config.r }} 52 | http-user-agent: ${{ matrix.config.http-user-agent }} 53 | use-public-rspm: true 54 | 55 | - name: Install system dependencies macOS 56 | if: runner.os == 'macOS' 57 | run: | 58 | brew update 59 | brew install gsl 60 | 61 | - name: Install system dependencies Linux 62 | if: runner.os == 'Linux' 63 | run: | 64 | sudo apt-get update -y 65 | sudo apt-get install -y libgsl-dev 66 | sudo apt-get install libopenblas-dev 67 | 68 | - uses: r-lib/actions/setup-r-dependencies@v1 69 | with: 70 | extra-packages: rcmdcheck 71 | 72 | - uses: r-lib/actions/check-r-package@v1 73 | 74 | - name: Show install_out 75 | run: | 76 | rcmdcheck::rcmdcheck()$install_out 77 | shell: Rscript {0} 78 | -------------------------------------------------------------------------------- /src/logisticFunctions.c: -------------------------------------------------------------------------------- 1 | #include "logisticFunctions.h" 2 | 3 | #ifdef HAVE_GSL_HEADER 4 | 5 | /* Logistic Regression Functions */ 6 | 7 | gsl_vector_int * readLogisticPhenotypes(char * phenotypefilename, int NINDIV) 8 | { 9 | gsl_vector_int * phen = gsl_vector_int_alloc(NINDIV); 10 | /* Open a file for reading */ 11 | FILE * phenofile = fopen(phenotypefilename,"r"); 12 | /* Turn off the error handler */ 13 | gsl_set_error_handler_off(); 14 | /* Scan the phenotypes into the int vector */ 15 | int scancheck = 0; 16 | scancheck = gsl_vector_int_fscanf(phenofile, phen); 17 | if(scancheck) 18 | { 19 | if(scancheck == GSL_EFAILED) 20 | { 21 | error("ERROR: phenotype file %s not formatted correctly\n", phenotypefilename); 22 | 23 | } else { 24 | error("failed, gsl_errno=%d\n", scancheck); 25 | } 26 | } 27 | /* Restore the error handler */ 28 | gsl_set_error_handler(NULL); 29 | /* Close the file for reading */ 30 | fclose(phenofile); 31 | /* 32 | Check that the phenotypes are either zero or one 33 | If they are then change them to -1, 1 (for coordinate descent) 34 | */ 35 | int i = 0; 36 | int tmp = 0; 37 | for(i = 0; i < NINDIV; i++) 38 | { 39 | tmp = gsl_vector_int_get(phen, i); 40 | if((tmp != 0) && (tmp != 1)) 41 | { 42 | error("ERROR: Phenotype value not permitted (must be 0 or 1)\n"); 43 | } else { 44 | gsl_vector_int_set(phen, i, 2 * tmp - 1); 45 | } 46 | } 47 | return phen; 48 | } 49 | 50 | /* return to original scale */ 51 | int returnToOriginalScaleLogistic(GSL_TYPE(vector) * betaOut, 52 | GSL_TYPE(vector) * Bridge, 53 | GSL_TYPE(vector) * means, 54 | GSL_TYPE(vector) * scales, 55 | int intercept_flag) 56 | { 57 | int i = 0; 58 | /* return to original scale */ 59 | int NPRED = scales->size + intercept_flag; 60 | /* get vector view of beta excluding intercept */ 61 | PREC beta = 0.0; 62 | // Use a vector view of Bridge 63 | GSL_FUNCTION(vector,view) Bridge1 = GSL_FUNCTION(vector,subvector)(Bridge, intercept_flag, scales->size); 64 | // Divide it by scales 65 | GSL_FUNCTION(vector,div)(&Bridge1.vector, scales); 66 | // Fill betaOut 67 | for(i = intercept_flag; i < NPRED; i++) 68 | { 69 | beta = GSL_FUNCTION(vector,get)(Bridge, i); 70 | GSL_FUNCTION(vector,set)(betaOut, i, beta); 71 | } 72 | // Intercept 73 | if(intercept_flag) 74 | { 75 | beta = GSL_FUNCTION(vector,get)(Bridge, 0); 76 | GSL_FUNCTION(vector,mul)(&Bridge1.vector, means); 77 | PREC tmp = 0.0; 78 | for(i = 0; i < scales->size; i++) 79 | { 80 | tmp = tmp + GSL_FUNCTION(vector, get)(&Bridge1.vector, i); 81 | } 82 | beta = beta - tmp; 83 | GSL_FUNCTION(vector,set)(betaOut, 0, beta); 84 | } 85 | return 0; 86 | } 87 | 88 | #endif 89 | 90 | typedef int make_iso_compilers_happy; 91 | 92 | -------------------------------------------------------------------------------- /R/summary.ridgeLinear.R: -------------------------------------------------------------------------------- 1 | ## summary function for ridgeLinear object 2 | 3 | #' @rdname summary 4 | #' @export 5 | #' @importFrom stats coef 6 | summary.ridgeLinear <- function(object, all.coef = FALSE, ...) 7 | { 8 | res <- vector("list") 9 | isScaled <- object$isScaled 10 | Inter <- object$Inter 11 | res$automatic <- object$automatic 12 | res$call <- object$call 13 | res$lambda <- object$lambda 14 | pvalues <- pvals(object) 15 | summaries <- vector("list", length(res$lambda)) 16 | res$all.coef = all.coef 17 | coefs <- rbind(coef(object, all.coef = TRUE)) 18 | if(res$automatic) 19 | { 20 | res$chosen.nPCs <- object$chosen.nPCs 21 | } 22 | for(i in seq(length(res$lambda))) 23 | { 24 | summary <- vector("list") 25 | if(Inter) 26 | { 27 | if(isScaled) 28 | { 29 | ## Intercept and scaled 30 | summary$coefficients <- cbind(coefs[i,], c(NA, object$coef[,i]), c(NA, pvalues$se[,i]), c(NA, pvalues$tstat[,i]), c(NA, pvalues$pval[,i])) 31 | dimnames(summary$coefficients) <- list(c("(Intercept)", colnames(object$x)), c("Estimate", "Scaled estimate", "Std. Error (scaled)", "t value (scaled)", "Pr(>|t|)")) 32 | } else { 33 | ## Intercept, no scaling 34 | summary$coefficients <- cbind(coefs[i,], c(NA, pvalues$se[,i]), c(NA, pvalues$tstat[,i]), c(NA, pvalues$pval[,i])) 35 | dimnames(summary$coefficients) <- list(c("(Intercept)", colnames(object$x)), c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) 36 | } 37 | } else { 38 | if(isScaled) 39 | { 40 | summary$coefficients <- cbind(coefs[i,], object$coef[,i], pvalues$se[,i], pvalues$tstat[,i], pvalues$pval[,i]) 41 | dimnames(summary$coefficients) <- list(colnames(object$x), c("Estimate", "Scaled estimate", "Std. Error (scaled)", "t value (scaled)", "Pr(>|t|)")) 42 | } else { 43 | ## No intercept, no scaling 44 | summary$coefficients <- cbind(coefs[i,], pvalues$se[,i], pvalues$tstat[,i], pvalues$pval[,i]) 45 | dimnames(summary$coefficients) <- list(colnames(object$x), c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) 46 | } 47 | } 48 | ## df: degrees of freedom: model, variance, residual 49 | summary$df <- object$df[i,] 50 | if(!is.null(object$max.nPCs)) 51 | { 52 | summary$nPCs <- i 53 | } 54 | summary$lambda <- object$lambda[i] 55 | summaries[[i]] <- summary 56 | names(summaries)[[i]] <- paste("summary", i, sep = "") 57 | rm(summary) 58 | } ## Ends for i in seq(length(res$lambda)) 59 | res$summaries <- summaries 60 | ## Make an object of class summary.ridgeLinear 61 | class(res) <- "summary.ridgeLinear" 62 | ## Call its print method (print.summary.ridgeLinear) 63 | res 64 | } 65 | -------------------------------------------------------------------------------- /R/summary.ridgeLogistic.R: -------------------------------------------------------------------------------- 1 | ## summary function for ridgeLogistic object 2 | 3 | #' @rdname summary 4 | #' @export 5 | #' @importFrom stats pnorm 6 | summary.ridgeLogistic <- function(object, all.coef = FALSE, ...) 7 | { 8 | res <- vector("list") 9 | isScaled <- object$isScaled 10 | Inter <- object$Inter 11 | res$automatic <- object$automatic 12 | res$call <- object$call 13 | res$lambda <- object$lambda 14 | pvalues <- pvals(object) 15 | summaries <- vector("list", length(res$lambda)) 16 | res$all.coef = all.coef 17 | coefs <- rbind(coef(object, all.coef = TRUE)) 18 | if(res$automatic) 19 | { 20 | res$chosen.nPCs <- object$chosen.nPCs 21 | } 22 | for(i in seq(length(res$lambda))) 23 | { 24 | summary <- vector("list") 25 | if(Inter) 26 | { 27 | if(isScaled) { 28 | ## Both intercept and scaled 29 | summary$coefficients <- cbind(coefs[i,], c(NA, pvalues$coef[,i]), c(NA, pvalues$se[,i]), c(NA, pvalues$tstat[,i]), c(NA, pvalues$pval[,i])) 30 | dimnames(summary$coefficients) <- list(c("(Intercept)", names(object$xm)), c("Estimate", "Scaled estimate", "Std. Error (scaled)", "t value (scaled)", "Pr(>|t|)")) 31 | } else { 32 | ## Intercept, not scaled 33 | summary$coefficients <- cbind(coefs[i,], c(NA, pvalues$se[,i]), c(NA, pvalues$tstat[,i]), c(NA, pvalues$pval[,i])) 34 | dimnames(summary$coefficients) <- list(c("(Intercept)", colnames(object$x)), c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) 35 | } 36 | } else { 37 | if(isScaled) { 38 | summary$coefficients <- cbind(coefs[i,], pvalues$coef[,i], pvalues$se[,i], pvalues$tstat[,i], pvalues$pval[,i]) 39 | dimnames(summary$coefficients) <- list(colnames(object$x), c("Estimate", "Scaled estimate", 40 | "Std. Error (scaled)", "t value (scaled)", "Pr(>|t|)")) 41 | } else { 42 | summary$coefficients <- cbind(coefs[i,], pvalues$se[,i], pvalues$tstat[,i], pvalues$pval[,i]) 43 | dimnames(summary$coefficients) <- list(colnames(object$x), c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) 44 | } 45 | } 46 | ## ## df: degrees of freedom: model, variance, residual (may as well return all three) 47 | summary$df <- object$df[i,] 48 | ## nPCs: number of principal components used to choose lambda 49 | if(!is.null(object$max.nPCs)) 50 | { 51 | summary$nPCs <- i 52 | } 53 | summary$lambda <- object$lambda[i] 54 | summaries[[i]] <- summary 55 | names(summaries)[[i]] <- paste("summary", i, sep = "") 56 | rm(summary) 57 | } ## ends for(i in seq(length(res$lambfa))) 58 | res$summaries <- summaries 59 | ## Make an object of class summary.ridgeLogistic 60 | class(res) <- "summary.ridgeLogistic" 61 | ## Call its print method (print.summary.ridgeLogistic) 62 | res 63 | } 64 | -------------------------------------------------------------------------------- /src/coordinateDescent.h: -------------------------------------------------------------------------------- 1 | #include "depends.h" 2 | #ifdef HAVE_GSL_HEADER 3 | 4 | #include "commonFunctions.h" 5 | #include "ReadInData.h" 6 | #include "ridgeRegressionFunctions.h" 7 | 8 | int coordinateDescentLogisticGenotypes(gsl_matrix_int * genotypes, 9 | gsl_vector_int * phenotypes, 10 | int intercept_flag, 11 | int standardize_flag, 12 | int unpenalized, 13 | GSL_TYPE(vector) * tau_vector, 14 | GSL_TYPE(vector) * means, 15 | GSL_TYPE(vector) * scales, 16 | GSL_TYPE(vector) * B, 17 | PREC epsilon); 18 | 19 | 20 | int coordinateDescentLogistic(GSL_TYPE(vector) * B, 21 | GSL_TYPE(matrix) * X, 22 | gsl_vector_int * y, 23 | GSL_TYPE(vector) * tau_vector, 24 | int intercept_flag, 25 | int unpenalized, 26 | PREC epsilon); 27 | 28 | PREC Fr(PREC r, PREC delta); 29 | 30 | PREC computeUpdate(GSL_TYPE(vector) * X_column, 31 | GSL_TYPE(vector) * y, 32 | GSL_TYPE(vector) * rvector, 33 | PREC B_element, 34 | PREC delta, 35 | PREC tau, 36 | int unpen_flag); 37 | 38 | int convergenceCheckLogistic(GSL_TYPE(vector) * deltar, 39 | GSL_TYPE(vector) * rvector, 40 | PREC epsilon); 41 | 42 | int preparePhenotypesForCoordinateDescent(GSL_TYPE(vector) * y_cd, const GSL_TYPE(vector) * y); 43 | 44 | int prepareShrinkageForCoordinateDescent(GSL_TYPE(vector) * shrinkage_cd, const GSL_TYPE(vector) * shrinkage); 45 | 46 | PREC computeDofFLogistic(GSL_TYPE(matrix) * X, 47 | GSL_TYPE(vector) * beta, 48 | PREC k); 49 | 50 | int coordinateDescentLinearFloat(GSL_TYPE(matrix) * Z, 51 | GSL_TYPE(vector) * y, 52 | GSL_TYPE(vector) * a, 53 | PREC epsilon); 54 | 55 | int updateYtilde(GSL_TYPE(vector) * ytilde, 56 | GSL_TYPE(matrix) * Z, 57 | GSL_TYPE(vector) * B, 58 | int j); 59 | 60 | int updateBetaLinear(GSL_TYPE(vector) * Bpen, 61 | GSL_TYPE(matrix) * Z, 62 | GSL_TYPE(vector) * y, 63 | GSL_TYPE(vector) * ytilde, 64 | int j, 65 | PREC penalty); 66 | 67 | int convergenceCheckLinear(GSL_TYPE(vector) * Bold, 68 | GSL_TYPE(vector) * Bpen, 69 | GSL_TYPE(vector) * B, 70 | PREC epsilon); 71 | 72 | int updateBetaLinearGenotypes(GSL_TYPE(vector) * Bpen, 73 | gsl_matrix_int * X, 74 | GSL_TYPE(vector) * means, 75 | GSL_TYPE(vector) * scales, 76 | GSL_TYPE(vector) * y, 77 | GSL_TYPE(vector) * ytilde, 78 | int j, 79 | PREC penalty); 80 | 81 | int updateYtildeGenotypes(GSL_TYPE(vector) * ytilde, 82 | gsl_matrix_int * X, 83 | GSL_TYPE(vector) * means, 84 | GSL_TYPE(vector) * scales, 85 | GSL_TYPE(vector) * B, 86 | int j); 87 | 88 | GSL_TYPE(vector) * getScaledColOfX(gsl_matrix_int * X, 89 | GSL_TYPE(vector) * means, 90 | GSL_TYPE(vector) * scales, 91 | int i); 92 | 93 | 94 | int coordinateDescentLinearGenotypes(gsl_matrix_int * X, 95 | GSL_TYPE(vector) * y, 96 | int intercept_flag, 97 | int standardize_flag, 98 | PREC lambda, 99 | GSL_TYPE(vector) * means, 100 | GSL_TYPE(vector) * scales, 101 | GSL_TYPE(vector) * Bout, 102 | PREC epsilon); 103 | 104 | int get_prev_variant_col(int current_pos, 105 | int number_of_columns); 106 | #endif 107 | 108 | -------------------------------------------------------------------------------- /man/pvals.Rd: -------------------------------------------------------------------------------- 1 | \name{pvals} 2 | \alias{pvals} 3 | \alias{pvals.ridgeLinear} 4 | \alias{pvals.ridgeLogistic} 5 | \alias{plot.pvalsRidgeLinear} 6 | \alias{plot.pvalsRidgeLogistic} 7 | \alias{print.pvalsRidgeLinear} 8 | \alias{print.pvalsRidgeLogistic} 9 | \title{ 10 | Compute p-values for ridgeLinear and ridgeLogistic models 11 | } 12 | \description{ 13 | Functions for computing, printing and plotting p-values for ridgeLinear 14 | and ridgeLogistic models. The p-values are computed using the 15 | significance test of Cule et al (2011). 16 | } 17 | \usage{ 18 | pvals(x, ...) 19 | 20 | \method{pvals}{ridgeLinear}(x, ...) 21 | 22 | \method{pvals}{ridgeLogistic}(x, ...) 23 | 24 | \method{print}{pvalsRidgeLinear}(x, digits = max(3, getOption("digits") - 3), 25 | signif.stars = getOption("show.signif.stars"), all.coef = FALSE, ...) 26 | 27 | \method{print}{pvalsRidgeLogistic}(x, digits = max(3, getOption("digits") - 3), 28 | signif.stars = getOption("show.signif.stars"), all.coef = FALSE, ...) 29 | 30 | \method{plot}{pvalsRidgeLinear}(x, y = NULL, ...) 31 | 32 | \method{plot}{pvalsRidgeLogistic}(x, y = NULL, ...) 33 | 34 | } 35 | \arguments{ 36 | 37 | \item{x}{For the pvals methods, an object of class "ridgeLinear" or "ridgeLogistic", typically from a call to "linearRidge" or 38 | "logisticRidge". For the print and plot methods, an object of class 39 | "pvalsRidgeLinear" or "pvalsRidgeLogistic", typically from a 40 | call to "pvals".} 41 | \item{digits}{minimum number of significant digits to be used for most numbers} 42 | \item{signif.stars}{logical; if \code{TRUE}, P-values are additionally encoded 43 | visually as \code{significance stars} in order to help scanning of 44 | long coefficient tables. It defaults to the 45 | \code{show.signif.stars} slot of \code{options}. 46 | } 47 | \item{all.coef}{Logical. Should p-values for all the ridge 48 | regression parameters be printed, or only the one from the ridge 49 | parameter chosen using the method of Cule et al (2012)} 50 | \item{y}{Dummy argument for compatibility with the default \code{plot} method. Ignored.} 51 | \item{\dots}{further arguments to be passed to or from other methods} 52 | 53 | } 54 | \details{ 55 | Standard errors, test statistics and p-values are computed using coefficients and data on the scale that was 56 | used to fit them. If the coefficients were standardized before the model was fitted, then the p-values 57 | relate to the scaled data. 58 | } 59 | \value{ 60 | For the pvals methods, an object of class "pvalsRidgeLinear" or "pvalsRidgeLogistic" 61 | which is a list with elements 62 | \item{coef}{The (scaled) regression coefficients} 63 | \item{se}{The standard errors of the regression coefficients} 64 | \item{tstat}{The test statistic of the regression coefficients} 65 | \item{pval}{The p-values of the regression coefficients} 66 | \item{isScaled}{Were the data scaled before the regression 67 | coefficients were fitted?} 68 | For the print methods, the argument \code{x} is returned invisibly. 69 | } 70 | \references{ 71 | Significance testing in ridge regression for genetic data. Cule, E. et al (2011) BMC Bioinformatics, 12:372 72 | } 73 | \author{ 74 | Erika Cule 75 | } 76 | \seealso{ 77 | \code{linearRidge}, \code{logisticRidge} 78 | } 79 | \examples{ 80 | data(GenBin) 81 | mod <- logisticRidge(Phenotypes ~ ., data = as.data.frame(GenBin)) 82 | pvalsMod <- pvals(mod) 83 | print(pvalsMod) 84 | print(pvalsMod, all.coef = TRUE) 85 | plot(pvalsMod) 86 | } 87 | -------------------------------------------------------------------------------- /inst/extdata/GenBin_phenotypes.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 1 4 | 0 5 | 0 6 | 0 7 | 0 8 | 0 9 | 0 10 | 0 11 | 0 12 | 0 13 | 0 14 | 0 15 | 0 16 | 0 17 | 0 18 | 1 19 | 0 20 | 0 21 | 0 22 | 0 23 | 0 24 | 0 25 | 0 26 | 0 27 | 0 28 | 0 29 | 0 30 | 0 31 | 0 32 | 0 33 | 0 34 | 0 35 | 0 36 | 0 37 | 0 38 | 0 39 | 0 40 | 0 41 | 0 42 | 0 43 | 0 44 | 1 45 | 0 46 | 0 47 | 0 48 | 0 49 | 0 50 | 0 51 | 0 52 | 0 53 | 0 54 | 0 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | 0 61 | 0 62 | 0 63 | 0 64 | 0 65 | 0 66 | 0 67 | 0 68 | 0 69 | 0 70 | 0 71 | 0 72 | 0 73 | 0 74 | 0 75 | 0 76 | 0 77 | 1 78 | 0 79 | 0 80 | 0 81 | 1 82 | 0 83 | 0 84 | 0 85 | 1 86 | 0 87 | 0 88 | 0 89 | 0 90 | 1 91 | 0 92 | 0 93 | 0 94 | 0 95 | 0 96 | 0 97 | 1 98 | 0 99 | 1 100 | 0 101 | 0 102 | 0 103 | 0 104 | 0 105 | 0 106 | 0 107 | 0 108 | 0 109 | 0 110 | 0 111 | 0 112 | 0 113 | 0 114 | 0 115 | 0 116 | 0 117 | 1 118 | 0 119 | 0 120 | 1 121 | 0 122 | 1 123 | 0 124 | 0 125 | 0 126 | 0 127 | 0 128 | 0 129 | 0 130 | 0 131 | 0 132 | 0 133 | 0 134 | 0 135 | 0 136 | 1 137 | 0 138 | 0 139 | 0 140 | 0 141 | 0 142 | 0 143 | 0 144 | 0 145 | 0 146 | 0 147 | 0 148 | 0 149 | 0 150 | 0 151 | 0 152 | 0 153 | 1 154 | 1 155 | 0 156 | 0 157 | 0 158 | 1 159 | 0 160 | 0 161 | 0 162 | 0 163 | 0 164 | 0 165 | 0 166 | 0 167 | 0 168 | 0 169 | 0 170 | 0 171 | 0 172 | 0 173 | 0 174 | 0 175 | 0 176 | 0 177 | 1 178 | 1 179 | 0 180 | 0 181 | 0 182 | 0 183 | 0 184 | 1 185 | 0 186 | 0 187 | 0 188 | 0 189 | 0 190 | 0 191 | 0 192 | 0 193 | 0 194 | 0 195 | 0 196 | 0 197 | 0 198 | 0 199 | 0 200 | 0 201 | 0 202 | 0 203 | 0 204 | 0 205 | 0 206 | 0 207 | 0 208 | 0 209 | 0 210 | 0 211 | 0 212 | 0 213 | 0 214 | 0 215 | 1 216 | 0 217 | 0 218 | 0 219 | 0 220 | 0 221 | 0 222 | 0 223 | 0 224 | 0 225 | 0 226 | 0 227 | 0 228 | 0 229 | 0 230 | 1 231 | 0 232 | 0 233 | 0 234 | 0 235 | 1 236 | 0 237 | 0 238 | 0 239 | 1 240 | 1 241 | 0 242 | 0 243 | 1 244 | 0 245 | 0 246 | 0 247 | 0 248 | 0 249 | 0 250 | 0 251 | 0 252 | 0 253 | 0 254 | 0 255 | 0 256 | 0 257 | 0 258 | 0 259 | 0 260 | 0 261 | 0 262 | 0 263 | 1 264 | 0 265 | 0 266 | 0 267 | 1 268 | 0 269 | 0 270 | 0 271 | 0 272 | 0 273 | 0 274 | 0 275 | 1 276 | 0 277 | 0 278 | 0 279 | 1 280 | 1 281 | 1 282 | 1 283 | 1 284 | 1 285 | 1 286 | 1 287 | 1 288 | 1 289 | 1 290 | 1 291 | 1 292 | 1 293 | 1 294 | 1 295 | 1 296 | 1 297 | 1 298 | 1 299 | 1 300 | 1 301 | 1 302 | 1 303 | 1 304 | 1 305 | 1 306 | 1 307 | 1 308 | 1 309 | 1 310 | 1 311 | 1 312 | 1 313 | 1 314 | 1 315 | 1 316 | 1 317 | 1 318 | 1 319 | 1 320 | 1 321 | 1 322 | 1 323 | 1 324 | 1 325 | 1 326 | 1 327 | 1 328 | 1 329 | 1 330 | 1 331 | 1 332 | 1 333 | 1 334 | 1 335 | 1 336 | 1 337 | 1 338 | 1 339 | 1 340 | 1 341 | 1 342 | 1 343 | 1 344 | 1 345 | 1 346 | 1 347 | 1 348 | 1 349 | 1 350 | 1 351 | 1 352 | 1 353 | 1 354 | 1 355 | 1 356 | 1 357 | 1 358 | 1 359 | 1 360 | 1 361 | 1 362 | 1 363 | 1 364 | 1 365 | 1 366 | 1 367 | 1 368 | 1 369 | 1 370 | 1 371 | 1 372 | 1 373 | 1 374 | 1 375 | 1 376 | 1 377 | 1 378 | 1 379 | 1 380 | 1 381 | 1 382 | 1 383 | 1 384 | 1 385 | 1 386 | 1 387 | 1 388 | 1 389 | 1 390 | 1 391 | 1 392 | 1 393 | 1 394 | 1 395 | 1 396 | 1 397 | 1 398 | 1 399 | 1 400 | 1 401 | 1 402 | 1 403 | 1 404 | 1 405 | 1 406 | 1 407 | 1 408 | 1 409 | 1 410 | 1 411 | 1 412 | 1 413 | 1 414 | 1 415 | 1 416 | 1 417 | 1 418 | 1 419 | 1 420 | 1 421 | 1 422 | 1 423 | 1 424 | 1 425 | 1 426 | 1 427 | 1 428 | 1 429 | 1 430 | 1 431 | 1 432 | 1 433 | 1 434 | 1 435 | 1 436 | 1 437 | 1 438 | 1 439 | 1 440 | 1 441 | 1 442 | 1 443 | 1 444 | 1 445 | 1 446 | 1 447 | 1 448 | 1 449 | 1 450 | 1 451 | 1 452 | 1 453 | 1 454 | 1 455 | 1 456 | 1 457 | 1 458 | 1 459 | 1 460 | 1 461 | 1 462 | 1 463 | 1 464 | 1 465 | 1 466 | 1 467 | 1 468 | 1 469 | 1 470 | 1 471 | 1 472 | 1 473 | 1 474 | 1 475 | 1 476 | 1 477 | 1 478 | 1 479 | 1 480 | 1 481 | 1 482 | 1 483 | 1 484 | 1 485 | 1 486 | 1 487 | 1 488 | 1 489 | 1 490 | 1 491 | 1 492 | 1 493 | 1 494 | 1 495 | 1 496 | 1 497 | 1 498 | 1 499 | 1 500 | 1 501 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | ## Changes Version 3.3 - Steffen Moritz 2 | 3 | * Fixed small bug with a unwanted print statement in linearRidge 4 | 5 | ## Changes Version 3.2 - Steffen Moritz 6 | 7 | * Fixed issue with predict() function: 8 | Wrong results, when predicting the training data without newdata argument. 9 | When supplying them via newdata, everything seemed fine. 10 | (https://github.com/SteffenMoritz/ridge/issues/16) 11 | Thanks to Mekala Sundaram for reporting the issue. 12 | 13 | ## Changes Version 3.1 - Steffen Moritz 14 | 15 | * Fixes to remain on CRAN 16 | 17 | * GitHub Actions as CI Tool 18 | 19 | 20 | ## Changes Version 2.7 until 3.0 - Steffen Moritz 21 | 22 | * Fixes related to autoconf (needed for passing new CRAN checks) 23 | 24 | 25 | ## Changes Version 2.6 - Steffen Moritz 26 | 27 | * Readme update 28 | 29 | * Fixes to pass CRAN checks 30 | 31 | ## Changes Version 2.5 - Dan Frankowski 32 | 33 | * Fix predict bug with factor variables 34 | 35 | * Add vcov.ridgeLinear 36 | 37 | * Add testthat tests 38 | 39 | * Changed documentation partially to roxygen2 40 | 41 | ## Changes Version 2.4 - Steffen Moritz 42 | 43 | * bugfix in logisticRidge and linearRidge see: https://github.com/SteffenMoritz/ridge/issues/2 44 | 45 | * bugfix to pass cran checks 'warning: missing template: HAVE_GSL_HEADER autoheader' 46 | 47 | * Updated Readme 48 | 49 | 50 | ## Changes Version 2.3 - Steffen Moritz 51 | 52 | * Some fixed to remain on CRAN and comly with CRAN policy 53 | 54 | * Improved Description file 55 | 56 | * Changed NEWS file to markup document 57 | 58 | 59 | ## Changes Version 2.2 - Steffen Moritz 60 | 61 | * Made package CRAN ready again 62 | 63 | * Created github repository for the package 64 | 65 | * Fixed warning using Wpendantic gcc 66 | 67 | * Adapted the DESCRIPTION file to latest CRAN requirements 68 | 69 | * Renamed CHANGELOG to NEWS 70 | 71 | * NEWS template update 72 | 73 | * Changes to .C Method registration 74 | 75 | * NAMESPACE fixes 76 | 77 | 78 | ## Changes 2014-3-02 - Erika Cule 79 | 80 | * Fixed layout of .Rd files 81 | 82 | * Added deletion of Makevars to cleanup script 83 | 84 | 85 | ## Changes 2012-9-27 - Erika Cule 86 | 87 | * Flat text (.txt) data files were moved from ridge/data to ridge/inst/extdata 88 | (in the source, which becomes ridge/extdata in the installed package). The .txt files 89 | should be in inst/extdata because they files are used by the package examples 90 | (albeit in not run sections), as described in Writing R Extensions 1.1.5 Data in packages. 91 | 92 | * Some users were reporting problems installing the package on some Linux OS. configure 93 | has been modified to fix this 94 | problem. 95 | 96 | 97 | ## Changes 2012-8-21 - Erika Cule 98 | 99 | * Bug fix in src/commonFunctions.c 100 | 101 | 102 | ## Changes 2012-8-21 - Erika Cule 103 | 104 | * Added configure.ac so that package will install if GSL >= 1.14 is not available 105 | (with linearRidgeGenotypes, logisticRidgeGenotypes, linearRidgeGenotypesPredict and 106 | logisticRidgeGenotypesPredict disabled). 107 | 108 | * configure.ac detects whether openblas is available and if it is found, links to that. 109 | This speeds up computation. (http://xianyi.github.com/OpenBLAS/) 110 | 111 | ## Changes 2012-7-19 Erika Cule 112 | 113 | * fixed a bug in linearRidge when scaling = "none" 114 | 115 | * added functions linearRidgeGenotypes and logisticRidgeGenotypes and their predicting counterparts 116 | linearRidgeGenotypesPredict and logisticRidgeGenotypespredict. 117 | These functions fit linear and logistic ridge regression models for genome-wide SNP data, 118 | optionally automatically choosing the ridge parameter 119 | 120 | * minor bug fix: "Intercept" now prints as "(Intercept)" 121 | (as is the case for lm and glm models) 122 | 123 | -------------------------------------------------------------------------------- /src/ridgeRegressionFunctions.h: -------------------------------------------------------------------------------- 1 | 2 | /* Header file for ridgeRegressionFunctions.c */ 3 | #include "depends.h" 4 | #ifdef HAVE_GSL_HEADER 5 | 6 | /* includes */ 7 | #if _CUDA_ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "cudaOnlyFunctions.h" 14 | #endif 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "computeLinearRidge.h" 24 | 25 | #include "ReadInData.h" 26 | 27 | /* Compute linear ridge coefficeints */ 28 | int computeLinearRidge(GSL_TYPE(vector) * ahat, 29 | GSL_TYPE(vector) * B, 30 | GSL_TYPE(vector) * D2, 31 | GSL_TYPE(matrix) * V, 32 | PREC lambda); 33 | 34 | /* linear generalized ridge regression */ 35 | GSL_TYPE(vector) * computeLinearGeneralizedRidge(GSL_TYPE(vector) * beta, 36 | GSL_TYPE(matrix) * pred, 37 | GSL_TYPE(vector) * pheno, 38 | GSL_TYPE(vector) * shrinkage, 39 | int intercept_flag); 40 | 41 | 42 | 43 | int computeLogisticRidge(GSL_TYPE(vector) * beta, 44 | GSL_TYPE(matrix) * pred, 45 | GSL_TYPE(vector) * pheno, 46 | GSL_TYPE(vector) * shrinkage, 47 | int intercept_flat, 48 | int Doff_flat, 49 | PREC * DofF); 50 | 51 | PREC objectiveFunction(GSL_TYPE(vector) * beta, 52 | GSL_TYPE(matrix) * X, 53 | GSL_TYPE(vector) * pheno, 54 | GSL_TYPE(vector) * shrinkage, 55 | int intercept_flat); 56 | 57 | int updateBeta(GSL_TYPE(vector) * beta, 58 | GSL_TYPE(matrix) * X, 59 | GSL_TYPE(vector) * pheno, 60 | GSL_TYPE(matrix) * kI, 61 | int intercept_flag, 62 | int DofF_flag, 63 | GSL_TYPE(matrix) * invtXWX_return, 64 | GSL_TYPE(matrix) * W_return); 65 | 66 | int getProb(GSL_TYPE(vector) * p, GSL_TYPE(vector) * XB); 67 | 68 | /* NB my_gsl_solve only works for matrices of type double 69 | due to the linalg functions only having been written for this type */ 70 | int my_gsl_solve(gsl_matrix * X, 71 | gsl_matrix * solvedX); 72 | 73 | int compute_XB_and_p(GSL_TYPE(matrix) * X, 74 | GSL_TYPE(vector) * B, 75 | GSL_TYPE(vector) * XB, 76 | GSL_TYPE(vector) * p); 77 | 78 | int chooseHowManyK(GSL_TYPE(vector) * D); 79 | 80 | int returnToOriginalScaleLinear(GSL_TYPE(vector) * betaOut, 81 | GSL_TYPE(vector) * Bridge, 82 | GSL_TYPE(vector) * means, 83 | GSL_TYPE(vector) * scales, 84 | PREC y_mean, 85 | int intercept_flag); 86 | 87 | /* return to original scale - generalized linear ridge regression */ 88 | int returnToOriginalScaleGenLinear(GSL_TYPE(vector) * Bridge, 89 | GSL_TYPE(vector) * betaOut, 90 | GSL_TYPE(matrix) * pred, 91 | GSL_TYPE(vector) * pheno, 92 | GSL_TYPE(vector) * scales, 93 | int intercept_flag); 94 | 95 | 96 | 97 | /* Convert trans */ 98 | char setTrans(CBLAS_TRANSPOSE_t Trans); 99 | 100 | /* prepare for linear ridge 101 | used when we are going to call linear rr on a range of different shrinkage parameters */ 102 | int prepareForLinearRidge(GSL_TYPE(matrix) * X, 103 | GSL_TYPE(vector) * y, 104 | GSL_TYPE(matrix) * U, 105 | GSL_TYPE(matrix) * V, 106 | GSL_TYPE(vector) * D, 107 | GSL_TYPE(vector) * D2, 108 | GSL_TYPE(matrix) * Z, 109 | GSL_TYPE(vector) * ahat); 110 | 111 | 112 | /* compute DofF for ridge model */ 113 | int computeDofF(GSL_TYPE(vector) * D2, 114 | PREC Kr, 115 | PREC * DofF); 116 | 117 | /* compute (A + BDC')^(-1) */ 118 | int invert_sum_of_matrices(const PREC Ainv, 119 | const GSL_TYPE(matrix) * B, 120 | const GSL_TYPE(vector) * Dinv, 121 | const GSL_TYPE(matrix) * tC, 122 | GSL_TYPE(matrix) * out); 123 | 124 | 125 | /* ordinary regression */ 126 | gsl_vector * my_gsl_linear_fit(gsl_matrix * X, 127 | gsl_vector * y, 128 | int NROW, 129 | int NCOL); 130 | 131 | #endif 132 | 133 | -------------------------------------------------------------------------------- /autom4te.cache/requests: -------------------------------------------------------------------------------- 1 | # This file was generated by Autom4te 2.71. 2 | # It contains the lists of macros which have been traced. 3 | # It can be safely removed. 4 | 5 | @request = ( 6 | bless( [ 7 | '0', 8 | 1, 9 | [ 10 | '/usr/local/Cellar/autoconf/2.71/share/autoconf' 11 | ], 12 | [ 13 | '/usr/local/Cellar/autoconf/2.71/share/autoconf/autoconf/autoconf.m4f', 14 | '/usr/local/Cellar/autoconf/2.71/share/autoconf/autoconf/trailer.m4', 15 | 'configure.ac' 16 | ], 17 | { 18 | 'AM_GNU_GETTEXT' => 1, 19 | 'AM_PROG_CXX_C_O' => 1, 20 | 'AM_PROG_CC_C_O' => 1, 21 | 'AM_NLS' => 1, 22 | 'AM_EXTRA_RECURSIVE_TARGETS' => 1, 23 | 'AM_AUTOMAKE_VERSION' => 1, 24 | 'AC_INIT' => 1, 25 | 'AM_CONDITIONAL' => 1, 26 | 'AM_XGETTEXT_OPTION' => 1, 27 | 'include' => 1, 28 | 'AC_CONFIG_LINKS' => 1, 29 | '_AM_SUBST_NOTMAKE' => 1, 30 | '_LT_AC_TAGCONFIG' => 1, 31 | 'LT_SUPPORTED_TAG' => 1, 32 | 'AC_PROG_LIBTOOL' => 1, 33 | 'LT_INIT' => 1, 34 | 'AM_PROG_AR' => 1, 35 | 'm4_pattern_allow' => 1, 36 | '_AM_MAKEFILE_INCLUDE' => 1, 37 | '_AM_COND_ENDIF' => 1, 38 | 'AC_SUBST_TRACE' => 1, 39 | '_m4_warn' => 1, 40 | 'AC_DEFINE_TRACE_LITERAL' => 1, 41 | 'm4_include' => 1, 42 | 'AM_INIT_AUTOMAKE' => 1, 43 | 'AC_FC_SRCEXT' => 1, 44 | 'AC_FC_PP_DEFINE' => 1, 45 | 'AM_ENABLE_MULTILIB' => 1, 46 | 'AC_CANONICAL_BUILD' => 1, 47 | '_AM_COND_ELSE' => 1, 48 | 'AC_CONFIG_FILES' => 1, 49 | 'AM_PROG_MKDIR_P' => 1, 50 | 'AC_REQUIRE_AUX_FILE' => 1, 51 | 'AC_LIBSOURCE' => 1, 52 | 'AM_PATH_GUILE' => 1, 53 | 'AM_MAKEFILE_INCLUDE' => 1, 54 | 'AC_CONFIG_AUX_DIR' => 1, 55 | 'AM_PROG_MOC' => 1, 56 | 'AH_OUTPUT' => 1, 57 | 'GTK_DOC_CHECK' => 1, 58 | 'sinclude' => 1, 59 | 'LT_CONFIG_LTDL_DIR' => 1, 60 | 'AM_SILENT_RULES' => 1, 61 | 'AC_CONFIG_HEADERS' => 1, 62 | 'AC_CANONICAL_HOST' => 1, 63 | 'AC_CONFIG_SUBDIRS' => 1, 64 | 'AM_PROG_F77_C_O' => 1, 65 | 'AC_CANONICAL_SYSTEM' => 1, 66 | 'AM_PROG_LIBTOOL' => 1, 67 | 'IT_PROG_INTLTOOL' => 1, 68 | 'AC_FC_FREEFORM' => 1, 69 | 'AM_POT_TOOLS' => 1, 70 | 'AC_CONFIG_MACRO_DIR_TRACE' => 1, 71 | 'AM_GNU_GETTEXT_INTL_SUBDIR' => 1, 72 | 'AM_MAINTAINER_MODE' => 1, 73 | 'AC_CANONICAL_TARGET' => 1, 74 | 'm4_sinclude' => 1, 75 | 'AC_CONFIG_LIBOBJ_DIR' => 1, 76 | 'AC_SUBST' => 1, 77 | '_AM_COND_IF' => 1, 78 | 'AC_FC_PP_SRCEXT' => 1, 79 | 'm4_pattern_forbid' => 1, 80 | 'AM_PROG_FC_C_O' => 1 81 | } 82 | ], 'Autom4te::Request' ) 83 | ); 84 | 85 | -------------------------------------------------------------------------------- /man/linearRidgeGenotypesPredict.Rd: -------------------------------------------------------------------------------- 1 | \name{linearRidgeGenotypesPredict} 2 | \alias{linearRidgeGenotypesPredict} 3 | 4 | \title{ 5 | Predict phenotypes from genome-wide SNP data based on a file of coefficients 6 | } 7 | \description{ 8 | Predict phenotypes from genome-wide SNP data based on a file of 9 | coefficients. Genotypes and fitted coefficients are provided as 10 | filenames, allowing the computation of fitted probabilities when SNP 11 | data are too large to be read into R. 12 | } 13 | \usage{ 14 | linearRidgeGenotypesPredict(genotypesfilename, betafilename, phenotypesfilename = NULL, 15 | verbose = FALSE) 16 | } 17 | 18 | \arguments{ 19 | \item{genotypesfilename}{ 20 | character string: path to file containing SNP genotypes coded 0, 1, 21 | 2. See \code{Input file formats}. 22 | } 23 | \item{betafilename}{ 24 | character string: path to file containing fitted coefficients. See \code{Input file formats}. 25 | } 26 | \item{phenotypesfilename}{ 27 | (optional) character string: path to file in which to write out the 28 | predicted phenotypes. See \code{Output file formats}. Whether or not this argument 29 | is supplied, the fitted coefficients are also returned by the function. 30 | } 31 | \item{verbose}{ 32 | Logical: If \code{TRUE}, additional information is printed to the R 33 | outupt as the code runs. Defaults to \code{FALSE}. 34 | } 35 | } 36 | 37 | \section{Input file formats}{ 38 | \describe{ 39 | \item{genotypesfilename:}{A header row, plus one row for each 40 | individual, one SNP per column. The header row contains SNP 41 | names. SNPs are coded as 0, 1, 2 for minor allele count. Missing 42 | values are not accommodated. } 43 | \item{betafilename:}{Two columns: First column is SNP names in same order as in \code{genotypesfilename}, second column is fitted coefficients. If the coefficients include an intercept then the first row of \code{betafilename} should contain it with the name Intercept in the first column. An Intercept thus labelled will be used appropriately in predicting the phenotypes. SNP names must match those in \code{genotypesfilename}. 44 | The format of \code{betafilename} is 45 | that of the output of \code{\link{linearRidgeGenotypes}}, meaning 46 | \code{linearRidgeGenotypesPredict} can be used to predict using 47 | coefficients fitted using \code{\link{linearRidgeGenotypes}} (see the example). 48 | } 49 | } 50 | } 51 | 52 | \section{Output file format}{ 53 | Whether or not \code{phenotypesfilename} is provided, predicted phenotypes are returned to the R workshpace. If \code{phenotypesfilename} is provided, predicted phenotypes are written to the file specified (in addition). 54 | \describe{ 55 | \item{phenotypesfilename:}{One column, containing predicted phenotypes, one individual per row.} 56 | } 57 | } 58 | 59 | \value{ 60 | A vector of fitted values, the same length as the number of 61 | individuals whose data are in \code{genotypesfilename}. If 62 | \code{phenotypesfilename} is supplied, the fitted values are also 63 | written there. 64 | } 65 | \references{ 66 | A semi-automatic method to guide the choice of ridge parameter in ridge regression. Cule, E. and De Iorio, M. (2012) arXiv:1205.0686v1 [stat.AP] 67 | } 68 | \author{ 69 | Erika Cule 70 | } 71 | \seealso{ 72 | \code{\link{linearRidgeGenotypes}} for model 73 | fitting. \code{\link{logisticRidgeGenotypes}} and 74 | \code{\link{logisticRidgeGenotypesPredict}} for corresponding functions 75 | to fit and predict on SNP data with binary outcomes. 76 | } 77 | \examples{ 78 | \dontrun{ 79 | genotypesfile <- system.file("extdata","GenCont_genotypes.txt",package = "ridge") 80 | phenotypesfile <- system.file("extdata","GenCont_phenotypes.txt",package = "ridge") 81 | betafile <- tempfile(pattern = "beta", fileext = ".dat") 82 | beta_linearRidgeGenotypes <- linearRidgeGenotypes(genotypesfilename = genotypesfile, 83 | phenotypesfilename = phenotypesfile, 84 | betafilename = betafile) 85 | pred_phen_geno <- linearRidgeGenotypesPredict(genotypesfilename = genotypesfile, 86 | betafilename = betafile) 87 | ## compare to output of linearRidge 88 | data(GenCont) ## Same data as in GenCont_genotypes.txt and GenCont_phenotypes.txt 89 | beta_linearRidge <- linearRidge(Phenotypes ~ ., data = as.data.frame(GenCont)) 90 | pred_phen <- predict(beta_linearRidge) 91 | print(cbind(pred_phen_geno, pred_phen)) 92 | ## Delete the temporary betafile 93 | unlink(betafile) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /man/logisticRidgeGenotypesPredict.Rd: -------------------------------------------------------------------------------- 1 | \name{logisticRidgeGenotypesPredict} 2 | \alias{logisticRidgeGenotypesPredict} 3 | 4 | \title{ 5 | Predict fitted probabilities from genome-wide SNP data based on a file of coefficients 6 | } 7 | \description{ 8 | Predict fitted probabilities from genome-wide SNP data based on a file of 9 | coefficients. Genotypes and fitted coefficients are provided as 10 | filenames, allowing the computation of fitted probabilities when SNP 11 | data are too large to be read into R. 12 | } 13 | \usage{ 14 | logisticRidgeGenotypesPredict(genotypesfilename, betafilename, 15 | phenotypesfilename = NULL, verbose = FALSE) 16 | } 17 | 18 | \arguments{ 19 | \item{genotypesfilename}{ 20 | character string: path to file containing SNP genotypes coded 0, 1, 21 | 2. See \code{Input file formats}. 22 | } 23 | \item{betafilename}{ 24 | character string: path to file containing fitted coefficients. See \code{Input file formats}. 25 | } 26 | \item{phenotypesfilename}{ 27 | (optional) character string: path to file in which to write out the 28 | fitted probabilities. See \code{Output file formats}. Whether or not this argument 29 | is supplied, the fitted coefficients are also returned by the function. 30 | } 31 | \item{verbose}{ 32 | Logical: If \code{TRUE}, additional information is printed to the R 33 | outupt as the code runs. Defaults to \code{FALSE}. 34 | } 35 | } 36 | \section{Input file formats}{ 37 | \describe{ 38 | \item{genotypesfilename:}{A header row, plus one row for each 39 | individual, one SNP per column. The header row contains SNP 40 | names. SNPs are coded as 0, 1, 2 for minor allele count. Missing 41 | values are not accommodated. } 42 | \item{betafilename:}{Two columns: First column is SNP names in same order as in \code{genotypesfilename}, second column is fitted coefficients. If the coefficients include an intercept then the first row of \code{betafilename} should contain it with the name Intercept in the first column. An Intercept thus labelled will be used appropriately in predicting the phenotypes. SNP names must match those in \code{genotypesfilename}. 43 | The format of \code{betafilename} is 44 | that of the output of \code{\link{linearRidgeGenotypes}}, meaning 45 | \code{linearRidgeGenotypesPredict} can be used to predict using 46 | coefficients fitted using \code{\link{linearRidgeGenotypes}} (see the example). 47 | } 48 | } 49 | } 50 | 51 | \section{Output file format}{ 52 | Whether or not \code{phenotypesfilename} is provided, fitted probabilities are returned to the R workshpace. If \code{phenotypesfilename} is provided, fitted probabilities are written to the file specified (in addition). 53 | \describe{ 54 | \item{phenotypesfilename:}{One column, containing fitted probabilities, one individual per row.} 55 | } 56 | } 57 | \value{ 58 | A vector of fitted probabilities, the same length as the number of 59 | individuals whose data are in \code{genotypesfilename}. If 60 | \code{phenotypesfilename} is supplied, the fitted probabilities are also 61 | written there. 62 | } 63 | \references{ 64 | A semi-automatic method to guide the choice of ridge parameter in ridge regression. Cule, E. and De Iorio, M. (2012) arXiv:1205.0686v1 [stat.AP] 65 | } 66 | \author{ 67 | Erika Cule 68 | } 69 | \seealso{ 70 | \code{\link{logisticRidgeGenotypes}} for model 71 | fitting. \code{\link{linearRidgeGenotypes}} and 72 | \code{\link{linearRidgeGenotypesPredict}} for corresponding functions 73 | to fit and predict on SNP data with continuous outcomes. 74 | } 75 | \examples{ 76 | \dontrun{ 77 | genotypesfile <- system.file("extdata","GenBin_genotypes.txt",package = "ridge") 78 | phenotypesfile <- system.file("extdata","GenBin_phenotypes.txt",package = "ridge") 79 | betafile <- tempfile(pattern = "beta", fileext = ".dat") 80 | beta_logisticRidgeGenotypes <- logisticRidgeGenotypes(genotypesfilename = genotypesfile, 81 | phenotypesfilename = phenotypesfile, 82 | betafilename = betafile) 83 | pred_phen_geno <- logisticRidgeGenotypesPredict(genotypesfilename = genotypesfile, 84 | betafilename = betafile) 85 | ## compare to output of logisticRidge 86 | data(GenBin) ## Same data as in GenBin_genotypes.txt and GenBin_phenotypes.txt 87 | beta_logisticRidge <- logisticRidge(Phenotypes ~ ., data = as.data.frame(GenBin)) 88 | pred_phen <- predict(beta_logisticRidge, type="response") 89 | print(cbind(pred_phen_geno, pred_phen)) 90 | ## Delete the temporary betafile 91 | unlink(betafile) 92 | } 93 | } 94 | 95 | -------------------------------------------------------------------------------- /R/linearRidgeGenotypes.R: -------------------------------------------------------------------------------- 1 | ## Linear Ridge Big function (calls C) 2 | 3 | #' @export 4 | #' @importFrom utils read.table 5 | linearRidgeGenotypes <- function(genotypesfilename, 6 | phenotypesfilename, 7 | lambda = -1, 8 | thinfilename = NULL, 9 | betafilename = NULL, 10 | approxfilename = NULL, 11 | permfilename = NULL, 12 | intercept = TRUE, 13 | verbose = FALSE) 14 | { 15 | if(!TRUE) 16 | stop("GSL >=1.14 is not installed, you cannot use this function") 17 | ## Tilde expansion of phenotypesfilename 18 | ## (Because the C code cannot cope with the tilde) 19 | phenotypesfilename <- path.expand(phenotypesfilename) 20 | ## Check phenotypes file for reading 21 | ## mode = 4 tests for read permission 22 | if(file.access(names = phenotypesfilename, mode = 4)) 23 | stop(gettextf("Cannot open file %s for reading", phenotypesfilename)) 24 | ## Tilde expansion of genotypesfilename 25 | ## (Because the C code cannot cope with the tilde) 26 | genotypesfilename <- path.expand(genotypesfilename) 27 | ## Check genotypes file for reading 28 | ## mode = 4 tests for read permission 29 | if(file.access(names = genotypesfilename, mode = 4)) 30 | stop(gettextf("Cannot open file %s for reading", genotypesfilename)) 31 | ## Check beta file name is set 32 | ## If it is not set it to beta.dat (print a warning) 33 | if(is.null(betafilename)) 34 | { 35 | betaFileExists <- FALSE 36 | betafilename <- tempfile(pattern = "beta", fileext = ".dat") 37 | } else { 38 | betaFileExists <- TRUE 39 | ## Else do the tilde expansion on betafilename 40 | ## (Because the C code cannot cope with the tilde) 41 | betafilename <- path.expand(betafilename) 42 | } 43 | ## Tilde expansion of approxfilename (if supplied) 44 | ## (Because the C code cannot cope with the tilde) 45 | if(!is.null(approxfilename)) 46 | { 47 | approxfilename <- path.expand(approxfilename) 48 | } else { 49 | ## Cannot pass NULL pointer to .C 50 | ## Therefore make it into a string 51 | approxfilename <- "NULL" 52 | } 53 | ## Tilde expansion of permfilename (if supplied) 54 | ## (Because the C code cannot cope with the tilde) 55 | if(!is.null(permfilename)) 56 | { 57 | permfilename <- path.expand(permfilename) 58 | } else { 59 | ## Cannot pass NULL pointer to .C 60 | ## Therefore make it into a string 61 | permfilename <- "NULL" 62 | } 63 | ## Tilde expansion of thinfilename (if supplied) 64 | ## (Because the C code cannot cope with the tilde) 65 | if(!is.null(thinfilename)) 66 | { 67 | ## Check if lambda has been supplied 68 | ## thinfilename is not needed if lambda has been supplied 69 | if(lambda == -1) 70 | { 71 | thinfilename <- path.expand(thinfilename) 72 | } else { 73 | stop(gettext("Cannot supply lambda and thinfilename. Please supply one or the other.")) 74 | } 75 | ## Check thinfile for read permission 76 | ## mode = 4 tests for read permission 77 | if(file.access(names = thinfilename, mode = 4)) 78 | stop(gettextf("Cannot open file %s for reading", permfilename)) 79 | } else { 80 | ## Cannot pass NULL pointer to .C 81 | ## Therefore make it into a string 82 | thinfilename <- "NULL" 83 | } 84 | res <- .C(regression_wrapper_function, 85 | genofilename = as.character(genotypesfilename), 86 | phenofilename = as.character(phenotypesfilename), 87 | betafilename = as.character(betafilename), 88 | approxfilename = as.character(approxfilename), 89 | permfilename = as.character(permfilename), 90 | thinfilename = as.character(thinfilename), 91 | intercept = as.integer(intercept), 92 | lambda = as.double(lambda), 93 | model = as.character("linear"), 94 | predict = as.integer(0), 95 | verbose = as.integer(verbose)) 96 | beta <- read.table(betafilename, row.names = 1, colClasses = c("character", "numeric"), col.names = c("", "B")) 97 | if(!betaFileExists) 98 | unlink(betafilename) 99 | return(beta) 100 | } 101 | -------------------------------------------------------------------------------- /R/linearRidgeGenotypes.R.in: -------------------------------------------------------------------------------- 1 | ## Linear Ridge Big function (calls C) 2 | 3 | #' @export 4 | #' @importFrom utils read.table 5 | linearRidgeGenotypes <- function(genotypesfilename, 6 | phenotypesfilename, 7 | lambda = -1, 8 | thinfilename = NULL, 9 | betafilename = NULL, 10 | approxfilename = NULL, 11 | permfilename = NULL, 12 | intercept = TRUE, 13 | verbose = FALSE) 14 | { 15 | if(!@HAVE_GSL@) 16 | stop("GSL >=1.14 is not installed, you cannot use this function") 17 | ## Tilde expansion of phenotypesfilename 18 | ## (Because the C code cannot cope with the tilde) 19 | phenotypesfilename <- path.expand(phenotypesfilename) 20 | ## Check phenotypes file for reading 21 | ## mode = 4 tests for read permission 22 | if(file.access(names = phenotypesfilename, mode = 4)) 23 | stop(gettextf("Cannot open file %s for reading", phenotypesfilename)) 24 | ## Tilde expansion of genotypesfilename 25 | ## (Because the C code cannot cope with the tilde) 26 | genotypesfilename <- path.expand(genotypesfilename) 27 | ## Check genotypes file for reading 28 | ## mode = 4 tests for read permission 29 | if(file.access(names = genotypesfilename, mode = 4)) 30 | stop(gettextf("Cannot open file %s for reading", genotypesfilename)) 31 | ## Check beta file name is set 32 | ## If it is not set it to beta.dat (print a warning) 33 | if(is.null(betafilename)) 34 | { 35 | betaFileExists <- FALSE 36 | betafilename <- tempfile(pattern = "beta", fileext = ".dat") 37 | } else { 38 | betaFileExists <- TRUE 39 | ## Else do the tilde expansion on betafilename 40 | ## (Because the C code cannot cope with the tilde) 41 | betafilename <- path.expand(betafilename) 42 | } 43 | ## Tilde expansion of approxfilename (if supplied) 44 | ## (Because the C code cannot cope with the tilde) 45 | if(!is.null(approxfilename)) 46 | { 47 | approxfilename <- path.expand(approxfilename) 48 | } else { 49 | ## Cannot pass NULL pointer to .C 50 | ## Therefore make it into a string 51 | approxfilename <- "NULL" 52 | } 53 | ## Tilde expansion of permfilename (if supplied) 54 | ## (Because the C code cannot cope with the tilde) 55 | if(!is.null(permfilename)) 56 | { 57 | permfilename <- path.expand(permfilename) 58 | } else { 59 | ## Cannot pass NULL pointer to .C 60 | ## Therefore make it into a string 61 | permfilename <- "NULL" 62 | } 63 | ## Tilde expansion of thinfilename (if supplied) 64 | ## (Because the C code cannot cope with the tilde) 65 | if(!is.null(thinfilename)) 66 | { 67 | ## Check if lambda has been supplied 68 | ## thinfilename is not needed if lambda has been supplied 69 | if(lambda == -1) 70 | { 71 | thinfilename <- path.expand(thinfilename) 72 | } else { 73 | stop(gettext("Cannot supply lambda and thinfilename. Please supply one or the other.")) 74 | } 75 | ## Check thinfile for read permission 76 | ## mode = 4 tests for read permission 77 | if(file.access(names = thinfilename, mode = 4)) 78 | stop(gettextf("Cannot open file %s for reading", permfilename)) 79 | } else { 80 | ## Cannot pass NULL pointer to .C 81 | ## Therefore make it into a string 82 | thinfilename <- "NULL" 83 | } 84 | res <- .C(regression_wrapper_function, 85 | genofilename = as.character(genotypesfilename), 86 | phenofilename = as.character(phenotypesfilename), 87 | betafilename = as.character(betafilename), 88 | approxfilename = as.character(approxfilename), 89 | permfilename = as.character(permfilename), 90 | thinfilename = as.character(thinfilename), 91 | intercept = as.integer(intercept), 92 | lambda = as.double(lambda), 93 | model = as.character("linear"), 94 | predict = as.integer(0), 95 | verbose = as.integer(verbose)) 96 | beta <- read.table(betafilename, row.names = 1, colClasses = c("character", "numeric"), col.names = c("", "B")) 97 | if(!betaFileExists) 98 | unlink(betafilename) 99 | return(beta) 100 | } 101 | -------------------------------------------------------------------------------- /R/logisticRidgeGenotypes.R: -------------------------------------------------------------------------------- 1 | ## Logistic Ridge Big function (calls C) 2 | 3 | #' @export 4 | #' @importFrom utils read.table 5 | logisticRidgeGenotypes <- function(genotypesfilename, 6 | phenotypesfilename, 7 | lambda = -1, 8 | thinfilename = NULL, 9 | betafilename = NULL, 10 | approxfilename = NULL, 11 | permfilename = NULL, 12 | intercept = TRUE, 13 | verbose = FALSE) 14 | { 15 | if(!TRUE) 16 | stop("GSL >=1.14 is not installed, you cannot use this function") 17 | ## Tilde expansion of phenotypesfilename 18 | ## (Because the C code cannot cope with the tilde) 19 | phenotypesfilename <- path.expand(phenotypesfilename) 20 | ## Check phenotypes file for reading 21 | ## mode = 4 tests for read permission 22 | if(file.access(names = phenotypesfilename, mode = 4)) 23 | stop(gettextf("Cannot open file %s for reading", phenotypesfilename)) 24 | ## Tilde expansion of genotypesfilename 25 | ## (Because the C code cannot cope with the tilde) 26 | genotypesfilename <- path.expand(genotypesfilename) 27 | ## Check genotypes file for reading 28 | ## mode = 4 tests for read permission 29 | if(file.access(names = genotypesfilename, mode = 4)) 30 | stop(gettextf("Cannot open file %s for reading", genotypesfilename)) 31 | ## Check beta file name is set 32 | ## If it is not set it to beta.dat (print a warning) 33 | if(is.null(betafilename)) 34 | { 35 | betaFileExists <- FALSE 36 | betafilename <- tempfile(pattern = "beta", fileext = ".dat") 37 | } else { 38 | betaFileExists <- TRUE 39 | ## Else do the tilde expansion on betafilename 40 | ## (Because the C code cannot cope with the tilde) 41 | betafilename <- path.expand(betafilename) 42 | } 43 | ## Tilde expansion of approxfilename (if supplied) 44 | ## (Because the C code cannot cope with the tilde) 45 | if(!is.null(approxfilename)) 46 | { 47 | approxfilename <- path.expand(approxfilename) 48 | } else { 49 | ## Cannot pass NULL pointer to .C 50 | ## Therefore make it into a string 51 | approxfilename <- "NULL" 52 | } 53 | ## Tilde expansion of permfilename (if supplied) 54 | ## (Because the C code cannot cope with the tilde) 55 | if(!is.null(permfilename)) 56 | { 57 | permfilename <- path.expand(permfilename) 58 | } else { 59 | ## Cannot pass NULL pointer to .C 60 | ## Therefore make it into a string 61 | permfilename <- "NULL" 62 | } 63 | ## Tilde expansion of thinfilename (if supplied) 64 | ## (Because the C code cannot cope with the tilde) 65 | if(!is.null(thinfilename)) 66 | { 67 | ## Check if lambda has been supplied 68 | ## thinfilename is not needed if lambda has been supplied 69 | if(lambda == -1) 70 | { 71 | thinfilename <- path.expand(thinfilename) 72 | } else { 73 | stop(gettext("Cannot supply lambda and thinfilename. Please supply one or the other.")) 74 | } 75 | ## Check thinfile for read permission 76 | ## mode = 4 tests for read permission 77 | if(file.access(names = thinfilename, mode = 4)) 78 | stop(gettextf("Cannot open file %s for reading", permfilename)) 79 | } else { 80 | ## Cannot pass NULL pointer to .C 81 | ## Therefore make it into a string 82 | thinfilename <- "NULL" 83 | } 84 | res <- .C(regression_wrapper_function, 85 | genofilename = as.character(genotypesfilename), 86 | phenofilename = as.character(phenotypesfilename), 87 | betafilename = as.character(betafilename), 88 | approxfilename = as.character(approxfilename), 89 | permfilename = as.character(permfilename), 90 | thinfilename = as.character(thinfilename), 91 | intercept = as.integer(intercept), 92 | lambda = as.double(lambda), 93 | model = as.character("logistic"), 94 | predict = as.integer(0), 95 | verbose = as.integer(verbose)) 96 | beta <- read.table(betafilename, row.names = 1, colClasses = c("character", "numeric"), col.names = c("", "B")) 97 | if(!betaFileExists) 98 | unlink(betafilename) 99 | return(beta) 100 | } 101 | -------------------------------------------------------------------------------- /R/logisticRidgeGenotypes.R.in: -------------------------------------------------------------------------------- 1 | ## Logistic Ridge Big function (calls C) 2 | 3 | #' @export 4 | #' @importFrom utils read.table 5 | logisticRidgeGenotypes <- function(genotypesfilename, 6 | phenotypesfilename, 7 | lambda = -1, 8 | thinfilename = NULL, 9 | betafilename = NULL, 10 | approxfilename = NULL, 11 | permfilename = NULL, 12 | intercept = TRUE, 13 | verbose = FALSE) 14 | { 15 | if(!@HAVE_GSL@) 16 | stop("GSL >=1.14 is not installed, you cannot use this function") 17 | ## Tilde expansion of phenotypesfilename 18 | ## (Because the C code cannot cope with the tilde) 19 | phenotypesfilename <- path.expand(phenotypesfilename) 20 | ## Check phenotypes file for reading 21 | ## mode = 4 tests for read permission 22 | if(file.access(names = phenotypesfilename, mode = 4)) 23 | stop(gettextf("Cannot open file %s for reading", phenotypesfilename)) 24 | ## Tilde expansion of genotypesfilename 25 | ## (Because the C code cannot cope with the tilde) 26 | genotypesfilename <- path.expand(genotypesfilename) 27 | ## Check genotypes file for reading 28 | ## mode = 4 tests for read permission 29 | if(file.access(names = genotypesfilename, mode = 4)) 30 | stop(gettextf("Cannot open file %s for reading", genotypesfilename)) 31 | ## Check beta file name is set 32 | ## If it is not set it to beta.dat (print a warning) 33 | if(is.null(betafilename)) 34 | { 35 | betaFileExists <- FALSE 36 | betafilename <- tempfile(pattern = "beta", fileext = ".dat") 37 | } else { 38 | betaFileExists <- TRUE 39 | ## Else do the tilde expansion on betafilename 40 | ## (Because the C code cannot cope with the tilde) 41 | betafilename <- path.expand(betafilename) 42 | } 43 | ## Tilde expansion of approxfilename (if supplied) 44 | ## (Because the C code cannot cope with the tilde) 45 | if(!is.null(approxfilename)) 46 | { 47 | approxfilename <- path.expand(approxfilename) 48 | } else { 49 | ## Cannot pass NULL pointer to .C 50 | ## Therefore make it into a string 51 | approxfilename <- "NULL" 52 | } 53 | ## Tilde expansion of permfilename (if supplied) 54 | ## (Because the C code cannot cope with the tilde) 55 | if(!is.null(permfilename)) 56 | { 57 | permfilename <- path.expand(permfilename) 58 | } else { 59 | ## Cannot pass NULL pointer to .C 60 | ## Therefore make it into a string 61 | permfilename <- "NULL" 62 | } 63 | ## Tilde expansion of thinfilename (if supplied) 64 | ## (Because the C code cannot cope with the tilde) 65 | if(!is.null(thinfilename)) 66 | { 67 | ## Check if lambda has been supplied 68 | ## thinfilename is not needed if lambda has been supplied 69 | if(lambda == -1) 70 | { 71 | thinfilename <- path.expand(thinfilename) 72 | } else { 73 | stop(gettext("Cannot supply lambda and thinfilename. Please supply one or the other.")) 74 | } 75 | ## Check thinfile for read permission 76 | ## mode = 4 tests for read permission 77 | if(file.access(names = thinfilename, mode = 4)) 78 | stop(gettextf("Cannot open file %s for reading", permfilename)) 79 | } else { 80 | ## Cannot pass NULL pointer to .C 81 | ## Therefore make it into a string 82 | thinfilename <- "NULL" 83 | } 84 | res <- .C(regression_wrapper_function, 85 | genofilename = as.character(genotypesfilename), 86 | phenofilename = as.character(phenotypesfilename), 87 | betafilename = as.character(betafilename), 88 | approxfilename = as.character(approxfilename), 89 | permfilename = as.character(permfilename), 90 | thinfilename = as.character(thinfilename), 91 | intercept = as.integer(intercept), 92 | lambda = as.double(lambda), 93 | model = as.character("logistic"), 94 | predict = as.integer(0), 95 | verbose = as.integer(verbose)) 96 | beta <- read.table(betafilename, row.names = 1, colClasses = c("character", "numeric"), col.names = c("", "B")) 97 | if(!betaFileExists) 98 | unlink(betafilename) 99 | return(beta) 100 | } 101 | -------------------------------------------------------------------------------- /src/ReadInData.h: -------------------------------------------------------------------------------- 1 | #include "depends.h" 2 | #ifdef HAVE_GSL_HEADER 3 | /* ReadInData.h is the header file for the source file ReadInData.c */ 4 | 5 | /*This file defines the functions writeGenotypes and writePhenotypes*/ 6 | 7 | /* scale the ys */ 8 | int scaley(GSL_TYPE(vector) * y, PREC * y_mean); 9 | 10 | /*get number of individuals by reading phenotypes file*/ 11 | int getNROW(FILE *fp); 12 | 13 | /*function prototypes - writing*/ 14 | void writePhenotypes(int NINDIV); 15 | 16 | /* Append char to string */ 17 | int appendToString(char * ptr, int *currentpos, char charToAppend); 18 | 19 | /* function prototype - prepareShrinkage */ 20 | /* The vector shrinkage is intercept_flag + NSNPS + NCOVAR long */ 21 | /* Not used in R package */ 22 | /* int prepareShrinkage(char * model, */ 23 | /* PREC * lambda, */ 24 | /* PREC * lambda_c, */ 25 | /* char * lambdafilename, */ 26 | /* char * lambdacovarfilename, */ 27 | /* int NSNPS, */ 28 | /* int NCOVAR, */ 29 | /* int intercept_flag, */ 30 | /* GSL_TYPE(vector) * shrinkage, */ 31 | /* int * automaticK, */ 32 | /* int * singleK); */ 33 | 34 | /* function prototype - concatenate two vectors */ 35 | gsl_vector * concatenateTwoVectors(gsl_vector * result, 36 | gsl_vector * vec1, 37 | gsl_vector * vec2); 38 | 39 | /* function prototype - print matrix */ 40 | void printMatrix(GSL_TYPE(matrix) * mat); 41 | 42 | /* function prototype - print matrix */ 43 | void printMatrixTen(GSL_TYPE(matrix) * mat); 44 | 45 | /* function prototype - print matrix */ 46 | void printIntMatrix(gsl_matrix_int * mat); 47 | 48 | /* function prototype - print matrix */ 49 | void printIntMatrixTen(gsl_matrix_int * mat); 50 | 51 | /* function prototype - print vector */ 52 | void printVector(GSL_TYPE(vector) * Vec); 53 | 54 | /* function prototype - print vector of ints */ 55 | void printIntVector(gsl_vector_int * Vec); 56 | 57 | /* function prototype - print vector */ 58 | void printVectorTen(GSL_TYPE(vector) * Vec); 59 | 60 | /* function prototype - print vector of ints - first ten elements */ 61 | void printIntVectorTen(gsl_vector_int * Vec); 62 | 63 | /* function prototype - compute sum of vector */ 64 | int sumVector(GSL_TYPE(vector) * vector, PREC * sum); 65 | 66 | /* function prototype - compute sum of vector - double precision - for coordinateDescent */ 67 | int sumVectorDouble(gsl_vector * vector, double * sum); 68 | 69 | /* check Model */ 70 | int checkModel(char * model); 71 | 72 | /* function prototype - prepare matrix of predictors */ 73 | GSL_TYPE(matrix) * preparePredictors(int NINDIV, 74 | int NSNPS, 75 | char ** SNPnames, 76 | int NCOVAR, 77 | char ** COVARnames, 78 | char * genofilename, 79 | char * covarfilename, 80 | int intercept_flag, 81 | int standardize_flag, 82 | int standardize_c_flag, 83 | GSL_TYPE(vector) * means, 84 | GSL_TYPE(vector) * scales, 85 | int automaticK); 86 | 87 | /* function prototype - prepare matrix of genotypes */ 88 | /* Not used in R package */ 89 | /* int prepareGenotypes(gsl_matrix * genotypes, int NINDIV, int NSNPS, char * genofilename); */ 90 | 91 | /* Get header row of file */ 92 | char ** getHeaderRow(char * filename, int *N); 93 | 94 | /* Check file exists and can be opened for reading */ 95 | void checkFileForReading(char * filename); 96 | 97 | /* Function prototype - get data from file into matrix 98 | excluding header row */ 99 | GSL_TYPE(matrix) * getDataWithoutHeaderRow(char * filename, int NROW, int NCOL); 100 | 101 | /* Check for invariant SNPs and covariates, standardize */ 102 | int checkForInvAndStandardize(GSL_TYPE(matrix) * mat, 103 | int START, 104 | int END, 105 | int standardize_flag, 106 | int corr_form_flag, 107 | GSL_TYPE(vector) * means, 108 | GSL_TYPE(vector) * sds, 109 | char ** names); 110 | 111 | int sumIntVec(gsl_vector_int * vec); 112 | 113 | int writeOut(int intercept_flag, 114 | int NSNPS, 115 | int NCOVAR, 116 | char ** SNPnames, 117 | char ** COVARnames, 118 | char * betafilename, 119 | GSL_TYPE(vector) * betaOut); 120 | 121 | int printBeta(char * name, PREC beta, FILE * file); 122 | 123 | /* Functions to safely free matrices and vectors */ 124 | 125 | int safelyFreeVector(GSL_TYPE(vector) * vec); 126 | 127 | int safelyFreeMatrix(GSL_TYPE(matrix) * mat); 128 | 129 | /* Print opening blurb */ 130 | /* Function not used in R package */ 131 | // void printOpening(void,); 132 | 133 | /* scaleX not used in R package */ 134 | /* int scaleX(gsl_matrix * X, int n2, int NSNPS); */ 135 | #endif 136 | -------------------------------------------------------------------------------- /man/logisticRidge.Rd: -------------------------------------------------------------------------------- 1 | \name{logisticRidge} 2 | \alias{logisticRidge} 3 | \alias{coef.ridgeLogistic} 4 | \alias{plot.ridgeLogistic} 5 | \alias{predict.ridgeLogistic} 6 | \alias{print.ridgeLogistic} 7 | \alias{summary.ridgeLogistic} 8 | \alias{print.summary.ridgeLogistic} 9 | \title{ 10 | Logistic ridge regression. 11 | } 12 | \description{ 13 | Fits a logistic ridge regression model. Optionally, the ridge regression parameter is chosen automatically using 14 | the method proposed by Cule et al (2012). 15 | } 16 | \usage{ 17 | logisticRidge(formula, data, lambda = "automatic", nPCs = NULL, 18 | scaling = c("corrForm", "scale", "none"), ...) 19 | 20 | \method{coef}{ridgeLogistic}(object, all.coef = FALSE, ...) 21 | 22 | \method{plot}{ridgeLogistic}(x, y = NULL, ...) 23 | 24 | \method{predict}{ridgeLogistic}(object, newdata = NULL, type = c("link", "response"), 25 | na.action = na.pass, all.coef = FALSE, ...) 26 | 27 | \method{print}{ridgeLogistic}(x, all.coef = FALSE, ...) 28 | 29 | \method{summary}{ridgeLogistic}(object, all.coef = FALSE, ...) 30 | 31 | \method{print}{summary.ridgeLogistic}(x, digits = max(3, getOption("digits") - 3), 32 | signif.stars = getOption("show.signif.stars"), ...) 33 | 34 | } 35 | \arguments{ 36 | \item{formula}{ 37 | a formula expression as for regression models, of the form \code{response ~ predictors}. See the 38 | documentation of \code{formula} for other details. 39 | } 40 | \item{data}{ 41 | an optional data frame in which to interpret the variables occuring in \code{formula}. 42 | } 43 | \item{lambda}{ 44 | A ridge regression parameter. If \code{lambda} is \code{"automatic"} (the default), then the ridge parameter 45 | is chosen automatically using the method of Cule et al (2012). 46 | } 47 | \item{nPCs}{ 48 | The number of principal components to use to choose the ridge regression parameter, following the method of 49 | Cule et al (2012). It is not possible to specify both \code{lambda} and \code{nPCs}. 50 | } 51 | \item{scaling}{ 52 | The method to be used to scale the predictors. One of 53 | \code{"corrform"}(the default) scales the predictors to correlation form, such that the correlation matrix 54 | has unit diagonal. 55 | \code{"scale"}Standardizes the predictors to have mean zero and unit variance. 56 | \code{"none"}No scaling. 57 | } 58 | \item{object}{ 59 | A ridgeLogistic object, typically generated by a call to \code{linearRidge}. 60 | } 61 | \item{newdata}{ 62 | An optional data frame in which to look for variables with 63 | which to predict. If omitted, the fitted values are used. 64 | } 65 | \item{type}{the type of prediction required. The default predictions are of log-odds 66 | (probabilities on logit scale) and \code{type = "response"} gives 67 | the predicted probabilities.} 68 | \item{na.action}{ 69 | function determining what should be done with missing values 70 | in \code{newdata}. The default is to predict \code{NA}. 71 | } 72 | \item{all.coef}{ 73 | Logical. Should results be returned for all ridge regression penalty 74 | parameters (\code{all.coef = TRUE}), or only for the ridge parameter chosen automatically using the method of Cule et al? 75 | } 76 | \item{x}{An object of class \code{ridgeLogistic} (for the 77 | \code{print.ridgeLogistic} and \code{plot.ridgeLogistic} functions) or an object of class 78 | \code{summary.ridgeLogistic} (for the \code{print.summary.ridgeLogistic} 79 | function)} 80 | \item{y}{Dummy argument for compatibility with the default \code{plot} method. Ignored.} 81 | \item{digits}{minimum number of significant digits to be used for most numbers} 82 | \item{signif.stars}{logical; if \code{TRUE}, P-values are additionally encoded 83 | visually as \code{significance stars} in order to help scanning of 84 | long coefficient tables. It defaults to the 85 | \code{show.signif.stars} slot of \code{options}. 86 | } 87 | \item{\dots}{ 88 | Additional arguments to be passed to or from other methods. 89 | } 90 | } 91 | \details{ 92 | If an intercept is present in the model, its coefficient is not penalised. If you want to penalise an 93 | intercept, put in your own constant term and remove the intercept. 94 | } 95 | \value{ 96 | An object of class \code{"ridgeLogistic"}, with components: 97 | \item{automatic}{Was \code{lambda} chosen automatically?} 98 | \item{call}{The matched call.} 99 | \item{coef}{A named vector of fitted coefficients.} 100 | \item{df}{A vector of degrees of freedom of the model fit and degrees of freedom for variance.} 101 | \item{Inter}{Was in antercept included?} 102 | \item{isScaled}{Were the predictors scaled before the model was fitted?} 103 | \item{lambda}{The ridge regression parameter.} 104 | \item{scales}{The scales used to standardize the predictors.} 105 | \item{terms}{The \code{\link{terms}} object used.} 106 | \item{x}{The scaled predictor matrix.} 107 | \item{xm}{A vector of means of the predictors.} 108 | \item{y}{The response.} 109 | And optionally the component 110 | \item{nPCs}{The number of principal components used to compute the ridge regression parameter.} 111 | } 112 | \references{ 113 | A semi-automatic method to guide the choice of ridge parameter in ridge regression. Cule, E. and De Iorio, M. (2012) arXiv:1205.0686v1 [stat.AP] 114 | } 115 | \author{ 116 | Erika Cule 117 | } 118 | \seealso{ 119 | \code{\link{linearRidge}} 120 | } 121 | \examples{ 122 | data(GenBin) 123 | mod <- logisticRidge(Phenotypes ~ ., data = as.data.frame(GenBin)) 124 | summary(mod) 125 | } 126 | -------------------------------------------------------------------------------- /man/linearRidge.Rd: -------------------------------------------------------------------------------- 1 | \name{linearRidge} 2 | \alias{linearRidge} 3 | \alias{coef.ridgeLinear} 4 | \alias{nobs.ridgeLinear} 5 | \alias{plot.ridgeLinear} 6 | \alias{predict.ridgeLinear} 7 | \alias{print.ridgeLinear} 8 | \alias{summary.ridgeLinear} 9 | \alias{print.summary.ridgeLinear} 10 | \title{ 11 | Linear ridge regression. 12 | } 13 | \description{ 14 | Fits a linear ridge regression model. Optionally, the ridge regression parameter is chosen automatically using 15 | the method proposed by Cule et al (2012). 16 | } 17 | \usage{ 18 | linearRidge(formula, data, lambda = "automatic", nPCs = NULL, 19 | scaling = c("corrForm", "scale", "none"), ...) 20 | 21 | \method{coef}{ridgeLinear}(object, all.coef = FALSE, ...) 22 | 23 | \method{plot}{ridgeLinear}(x, y = NULL, ...) 24 | 25 | \method{predict}{ridgeLinear}(object, newdata, na.action = na.pass, all.coef = FALSE, ...) 26 | 27 | \method{print}{ridgeLinear}(x, all.coef = FALSE, ...) 28 | 29 | \method{summary}{ridgeLinear}(object, all.coef = FALSE, ...) 30 | 31 | \method{print}{summary.ridgeLinear}(x, digits = max(3, 32 | getOption("digits") - 3), 33 | signif.stars = getOption("show.signif.stars"), ...) 34 | 35 | } 36 | \arguments{ 37 | \item{formula}{ 38 | a formula expression as for regression models, of the form \code{response ~ predictors}. See the 39 | documentation of \code{formula} for other details. 40 | } 41 | \item{data}{ 42 | an optional data frame in which to interpret the variables occuring in \code{formula}. 43 | } 44 | \item{lambda}{ 45 | A ridge regression parameter. May be a vector. If \code{lambda} is \code{"automatic"} (the default), then the ridge parameter 46 | is chosen automatically using the method of Cule et al (2012). 47 | } 48 | \item{nPCs}{ 49 | The number of principal components to use to choose the ridge regression parameter, following the method of 50 | Cule et al (2012). It is not possible to specify both \code{lambda} and \code{nPCs}. 51 | } 52 | \item{scaling}{ 53 | The method to be used to scale the predictors. One of 54 | \code{"corrform"}(the default) scales the predictors to correlation form, such that the correlation matrix 55 | has unit diagonal. 56 | \code{"scale"}Standardizes the predictors to have mean zero and unit variance. 57 | \code{"none"}No scaling. 58 | } 59 | \item{object}{ 60 | A ridgeLinear object, typically generated by a call to \code{linearRidge}. 61 | } 62 | \item{newdata}{ 63 | An optional data frame in which to look for variables with 64 | which to predict. If omitted, the fitted values are used. 65 | } 66 | \item{na.action}{ 67 | function determining what should be done with missing values 68 | in \code{newdata}. The default is to predict \code{NA}. 69 | } 70 | \item{all.coef}{ 71 | Logical. Should results be returned for all ridge regression penalty 72 | parameters (\code{all.coef = TRUE}), or only for the ridge parameter chosen automatically using the method of Cule et al? 73 | } 74 | \item{x}{An object of class \code{ridgeLinear} (for the 75 | \code{print.ridgeLinear} and \code{plot.ridgeLinear} functions) or an object of class 76 | \code{summary.ridgeLinear} (for the \code{print.summary.ridgeLinear} 77 | function)} 78 | \item{y}{Dummy argument for compatibility with the default \code{plot} 79 | method. Ignored.} 80 | \item{digits}{minimum number of significant digits to be used for most numbers} 81 | \item{signif.stars}{logical; if \code{TRUE}, P-values are additionally encoded 82 | visually as \code{significance stars} in order to help scanning of 83 | long coefficient tables. It defaults to the 84 | \code{show.signif.stars} slot of \code{options}. 85 | } 86 | \item{\dots}{ 87 | Additional arguments to be passed to or from other methods. 88 | } 89 | 90 | } 91 | \details{ 92 | If an intercept is present in the model, its coefficient is not penalised. If you want to penalise an 93 | intercept, put in your own constant term and remove the intercept. 94 | } 95 | \value{ 96 | An object of class \code{"ridgeLinear"}, with components: 97 | \item{automatic}{Logical. Was \code{lambda} chosen automatically?} 98 | \item{call}{The matched call.} 99 | \item{coef}{A named vector of fitted coefficients.} 100 | \item{df}{A vector of degrees of freedom of the model fit, degrees of freedom for variance, and residual 101 | degrees of freedom of the fitted model.} 102 | \item{Inter}{Was an intercept included?} 103 | \item{isScaled}{Were the predictors scaled before the model was fitted?} 104 | \item{lambda}{The ridge regression parameter(s).} 105 | \item{scales}{The scales used to standardize the predictors.} 106 | \item{terms}{The \code{\link{terms}} object used.} 107 | \item{x}{The scaled predictor matrix.} 108 | \item{xm}{A vector of means of the predictors.} 109 | \item{y}{The response.} 110 | \item{ym}{The mean of the response.} 111 | 112 | And optionally the components 113 | 114 | \item{max.nPCs}{The maximum number of principal components for which a ridge regression parameter was computed.} 115 | \item{chosen.nPCs}{The number of principal components used to compute the ridge parameter.} 116 | } 117 | \references{ 118 | A semi-automatic method to guide the choice of ridge parameter in ridge regression. Cule, E. and De Iorio, M. (2012) arXiv:1205.0686v1 [stat.AP] 119 | } 120 | \author{ 121 | Erika Cule 122 | } 123 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 124 | 125 | \seealso{ 126 | \code{\link{logisticRidge}} 127 | } 128 | \examples{ 129 | data(GenCont) 130 | mod <- linearRidge(Phenotypes ~ ., data = as.data.frame(GenCont)) 131 | summary(mod) 132 | } 133 | -------------------------------------------------------------------------------- /src/computeLinearRidge.c: -------------------------------------------------------------------------------- 1 | #include "computeLinearRidge.h" 2 | #ifdef HAVE_GSL_HEADER 3 | 4 | 5 | /* SVD of any matrix */ 6 | int svdAnyMat(gsl_matrix * X, 7 | gsl_matrix * U, 8 | gsl_matrix * V, 9 | gsl_vector * D) 10 | { 11 | // SVD part 12 | gsl_vector * work; 13 | int n = X->size1; 14 | int p = X->size2; 15 | if(p > n) 16 | { 17 | work = gsl_vector_alloc(n); 18 | gsl_matrix * tmpV = gsl_matrix_alloc(n, n); 19 | gsl_matrix * tmpU = gsl_matrix_alloc(p, n); 20 | // Transpose the matrix X 21 | gsl_matrix_transpose_memcpy(tmpU, X); 22 | // TmpU contians t(X) 23 | // There is linear dependence in X 24 | // Need to replace the NaNs with zeros in the matrix 25 | // Or something 26 | gsl_linalg_SV_decomp(tmpU, tmpV, D, work); 27 | gsl_vector_free(work); 28 | // Swap back 29 | gsl_matrix * tmp1 = gsl_matrix_alloc(tmpU->size1, tmpU->size2); 30 | gsl_matrix * tmp2 = gsl_matrix_alloc(tmpV->size1, tmpV->size2); 31 | gsl_matrix_memcpy(tmp1, tmpU); 32 | gsl_matrix_memcpy(tmp2, tmpV); 33 | gsl_matrix_free(tmpU); 34 | gsl_matrix_free(tmpV); 35 | gsl_matrix_memcpy(V, tmp1); 36 | gsl_matrix_memcpy(U, tmp2); 37 | gsl_matrix_free(tmp1); 38 | gsl_matrix_free(tmp2); 39 | } else { 40 | work = gsl_vector_alloc(p); 41 | gsl_matrix_memcpy(U, X); 42 | gsl_linalg_SV_decomp(U, V, D, work); 43 | gsl_vector_free(work); 44 | } 45 | return 0; 46 | } 47 | 48 | int prepareLambdas(gsl_vector * y, 49 | gsl_matrix * U, 50 | gsl_vector * D2, 51 | gsl_vector * lambdaVeckHKB, 52 | char * skhkbfilename, 53 | char * sklwfilename, 54 | gsl_vector * lambdaVeckLW, 55 | int randomized, 56 | int s) 57 | { 58 | double kHKB; 59 | double kLW; 60 | double crossprod; 61 | double numerator; 62 | double denominatorkHKB; 63 | double denominatorkLW; 64 | int lengthLambdaVec = lambdaVeckHKB->size; 65 | gsl_matrix_view Uview; // a matrix view 66 | int n = y->size; 67 | int i, j; 68 | gsl_vector * resid = gsl_vector_alloc(n); 69 | gsl_matrix * H = gsl_matrix_alloc(n, n); 70 | for(i = 0; i < lengthLambdaVec; i++) 71 | { 72 | gsl_matrix * diag = gsl_matrix_calloc((i+1), (i+1)); 73 | Uview = gsl_matrix_submatrix(U, 0, 0, n, (i + 1)); 74 | // Make the hat matrix 75 | gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, &Uview.matrix, &Uview.matrix, 0.0, H); 76 | // make the fitted ys - put in the resid vector 77 | gsl_blas_dgemv(CblasNoTrans, 1.0, H, y, 0.0, resid); 78 | // make the denominaotor for kLW 79 | if(sklwfilename != NULL) 80 | { 81 | gsl_blas_ddot(y, resid, &denominatorkLW); 82 | } 83 | // Make the residual vector 84 | gsl_vector_scale(resid, -1); 85 | gsl_vector_add(resid, y); 86 | // make the crossproduct 87 | gsl_blas_ddot(resid, resid, &crossprod); 88 | // times it by i 89 | numerator = crossprod * ((float) i + 1.0); 90 | // this gives the numerator 91 | // Make the denominator for kHKB 92 | // Make the diagonal matrix 93 | for(j = 0; j < diag->size1; j++) 94 | { 95 | gsl_matrix_set(diag, j, j, 1.0 / gsl_vector_get(D2, j)); 96 | } 97 | // 98 | // Make the matrix U diag D2 99 | gsl_matrix * UD2 = gsl_matrix_alloc(n, (i + 1)); 100 | gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Uview.matrix, diag, 0.0, UD2); 101 | // Make the matrix U diag D2 U' - put it into H 102 | gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, UD2, &Uview.matrix, 0.0, H); 103 | // Make the matrix U diag D2 U' y - put it into resid 104 | gsl_blas_dgemv(CblasNoTrans, 1.0, H, y, 0.0, resid); 105 | // Make the dot product 106 | gsl_blas_ddot(y, resid, &denominatorkHKB); 107 | // put in the matrix 108 | if(skhkbfilename != NULL) 109 | { 110 | gsl_blas_ddot(y, resid, &denominatorkHKB); 111 | denominatorkHKB = ((float) n - (float) i - 1.0) * denominatorkHKB; 112 | kHKB = numerator / denominatorkHKB; 113 | gsl_vector_set(lambdaVeckHKB, i, kHKB); 114 | } 115 | if(sklwfilename != NULL) 116 | { 117 | denominatorkLW = ((float) n - (float) i - 1.0) * denominatorkLW; 118 | kLW = numerator / denominatorkLW; 119 | gsl_vector_set(lambdaVeckLW, i, kLW); 120 | } 121 | gsl_matrix_free(UD2); 122 | gsl_matrix_free(diag); 123 | } 124 | if(randomized) 125 | { 126 | gsl_rng * rndm = gsl_rng_alloc(gsl_rng_mt19937); 127 | double weight; 128 | gsl_rng_set(rndm, s); 129 | for(i=0; isize; i++) 130 | { 131 | weight = gsl_ran_flat(rndm, 0.2, 1.0); 132 | gsl_vector_set(lambdaVeckHKB, i, weight * gsl_vector_get(lambdaVeckHKB, i)); 133 | weight = gsl_ran_flat(rndm, 0.2, 1.0); 134 | gsl_vector_set(lambdaVeckLW, i, weight * gsl_vector_get(lambdaVeckLW, i)); 135 | } 136 | gsl_rng_free(rndm); 137 | } 138 | gsl_vector_free(resid); 139 | gsl_matrix_free(H); 140 | return 0; 141 | } 142 | 143 | /* /\* and one function for which parts are different *\/ */ 144 | /* gsl_vector * computeLinearRidge(gsl_matrix * VoneOverS, gsl_vector * d2, gsl_vector * ahat, gsl_vector * Bridge, double shrinkage) */ 145 | /* { */ 146 | /* int i; */ 147 | /* gsl_vector * aridge = gsl_vector_alloc(ahat->size); */ 148 | /* gsl_vector_memcpy(aridge, ahat); */ 149 | /* gsl_vector * d2tmp = gsl_vector_alloc(d2->size); */ 150 | /* gsl_vector_memcpy(d2tmp, d2); */ 151 | /* gsl_vector_mul(aridge, d2); */ 152 | /* gsl_vector_add_constant(d2tmp, shrinkage); */ 153 | /* gsl_vector_div(aridge, d2tmp); */ 154 | /* gsl_blas_dgemv(CblasNoTrans, 1.0, VoneOverS, aridge, 0.0, Bridge); */ 155 | /* gsl_vector_free(aridge); */ 156 | /* gsl_vector_free(d2tmp); */ 157 | /* return Bridge; */ 158 | /* } */ 159 | 160 | #endif 161 | 162 | typedef int make_iso_compilers_happy; 163 | 164 | -------------------------------------------------------------------------------- /man/linearRidgeGenotypes.Rd: -------------------------------------------------------------------------------- 1 | \name{linearRidgeGenotypes} 2 | \alias{linearRidgeGenotypes} 3 | 4 | \title{ 5 | Fits linear ridge regression models for genome-wide SNP data. 6 | } 7 | \description{ 8 | Fits linear ridge regression models for genome-wide SNP data. The SNP 9 | genotypes are not read into R but file names are passed the code 10 | directly, enabling the analysis of genome-wide scale SNP data sets. 11 | } 12 | \usage{ 13 | linearRidgeGenotypes(genotypesfilename, phenotypesfilename, lambda = -1, 14 | thinfilename = NULL, betafilename = NULL, approxfilename = NULL, 15 | permfilename = NULL, intercept = 16 | TRUE, verbose = FALSE) 17 | } 18 | 19 | \arguments{ 20 | \item{genotypesfilename}{ 21 | character string: path to file containing SNP genotypes coded 0, 1, 22 | 2. See \code{Input file formats}. 23 | } 24 | \item{phenotypesfilename}{ 25 | character string: path to file containing phenotypes. See \code{Input file formats}. 26 | } 27 | \item{lambda}{ 28 | (optional) shrinkage parameter. If not provided, the default denotes 29 | automatic choice of the shrinkage parameter using the method of Cule & 30 | De Iorio (2012). 31 | } 32 | \item{thinfilename}{ 33 | (optional) character string: path to file containing three columns: SNP name, chromosme and SNP psotion. See \code{Input file formats}. (See \code{details}.) 34 | } 35 | \item{betafilename}{ 36 | (optional) character string: path to file where the output will be written. See \code{Output file formats}. 37 | } 38 | \item{approxfilename}{ 39 | (optional) character string: path to fine where the approximate test p-values will be written. 40 | Approximate p-values are not computed unless this argument is given. Approximate p-values 41 | are computed using the method of Cule et al (2011). See \code{Output file formats}. 42 | } 43 | \item{permfilename}{ 44 | (optional) character string: path to file where the permutation test 45 | p-values will be written. 46 | Permutation test p-values are not computed unless this argument is 47 | given. (See warning). See \code{Output file formats}. 48 | } 49 | \item{intercept}{ 50 | Logical: Should the ridge regression model be fitted with an 51 | intercept? (Defaults to \code{TRUE}) 52 | } 53 | \item{verbose}{ 54 | Logical: If \code{TRUE}, additional information is printed to the R 55 | output as the code runs. Defaults to \code{FALSE}. 56 | } 57 | } 58 | 59 | \section{Input file formats}{ 60 | \describe{ 61 | \item{genotypesfilename:}{A header row, plus one row for each 62 | individual, one SNP per column. The header row contains SNP 63 | names. SNPs are coded as 0, 1, 2 for minor allele count. Missing 64 | values are not accommodated. Invariant SNPs in the data cause an 65 | error, please remove these from the file before calling the function.} 66 | \item{phenofilename:}{A single column of phenotypes with the individuals in the same order as those in the file \code{genotypesfilename}.} 67 | \item{thin:}{(optional) Three columns and the same number of rows as there are SNPs in the file \code{genotypesfilename}, one row per SNP. First column: SNP names (must match names in \code{genotypesfilename}); second column: chromosome; third column: SNP position in BP.} 68 | } 69 | } 70 | 71 | \section{Output file formats}{ 72 | All output files are optional. Whether or not \code{betafilename} is provided, fitted coefficients are returned to the R workshpace. If \code{betafilename} is provided, fitted coefficients are written to the file specified (in addition). 73 | \describe{ 74 | \item{betafilename:}{Two columns: First column is SNP names in same order as in \code{genotypesfilename}, second column is fitted coefficients. If \code{intercept = TRUE} (the default) then the first row is the fitted intercept (with the name Intercept in the first column).} 75 | \item{approxfilename:}{Two columns: First column is SNP names in same order as in \code{genotypesfilename}, second column is approximate p-values.} 76 | \item{permfilename:}{Two columns: First column is SNP names in same order as in \code{genotypesfilename}, second column is permutation p-values.} 77 | } 78 | } 79 | 80 | \details{ 81 | If a file \code{thin} is supplied, and the shrinkage parameter 82 | \code{lambda} is being computed automatically based on the data, then 83 | this file is used to thin the SNP data by SNP position. If this file 84 | is not supplied, SNPs are thinned automatically based on number of SNPs. 85 | } 86 | 87 | \value{ 88 | The vector of fitted ridge regression coefficients. 89 | If \code{betafilename} is given, the fitted coefficients are written to this 90 | file as well as being returned. 91 | If \code{approxfilename} and/or \code{permfilename} are given, results of approximate 92 | test p-values and/or permutation test p-values are written to the files 93 | given in their arguments. 94 | } 95 | 96 | \section{Warning }{ 97 | When data are large, the permutation test p-values 98 | may take a very long time to compute. It is recommended not to request 99 | permutation test p-values (using the argument \code{permfilename}) 100 | when data are large.} 101 | 102 | \references{ 103 | Significance testing in ridge regression for genetic data. Cule, E. et 104 | al (2011) BMC Bioinformatics, 12:372 105 | A semi-automatic method to guide the choice of ridge parameter in 106 | ridge regression. Cule, E. and De Iorio, M. (2012) arXiv:1205.0686v1 [stat.AP] 107 | } 108 | 109 | \author{ 110 | Erika Cule 111 | } 112 | 113 | \seealso{ 114 | \code{\link{linearRidge}} for fitting linear ridge regression models 115 | when the data are small enough to be read into R. 116 | \code{\link{logisticRidge}} and \code{\link{logisticRidgeGenotypes}} for fitting logistic ridge 117 | regression models. 118 | } 119 | 120 | \examples{ 121 | \dontrun{ 122 | genotypesfile <- system.file("extdata","GenCont_genotypes.txt",package = "ridge") 123 | phenotypesfile <- system.file("extdata","GenCont_phenotypes.txt",package = "ridge") 124 | beta_linearRidgeGenotypes <- linearRidgeGenotypes(genotypesfilename = genotypesfile, 125 | phenotypesfilename = phenotypesfile) 126 | ## compare to output of linearRidge 127 | data(GenCont) ## Same data as in GenCont_genotypes.txt and GenCont_phenotypes.txt 128 | beta_linearRidge <- linearRidge(Phenotypes ~ ., data = as.data.frame(GenCont)) 129 | cbind(round(coef(beta_linearRidge), 6), beta_linearRidgeGenotypes) 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /man/logisticRidgeGenotypes.Rd: -------------------------------------------------------------------------------- 1 | \name{logisticRidgeGenotypes} 2 | \alias{logisticRidgeGenotypes} 3 | 4 | \title{ 5 | Fits logistic ridge regression models for genomoe-wide SNP data. 6 | } 7 | \description{ 8 | Fits logistic ridge regression models for genome-wide SNP data. The SNP 9 | genotypes are not read into R but file names are passed to the code 10 | directly, enabling the analysis of genome-wide SNP data sets which are 11 | too big to be read into R. 12 | } 13 | \usage{ 14 | logisticRidgeGenotypes(genotypesfilename, phenotypesfilename, lambda = -1, 15 | thinfilename = NULL, betafilename = NULL, approxfilename = NULL, 16 | permfilename = NULL, intercept = 17 | TRUE, verbose = FALSE) 18 | } 19 | 20 | \arguments{ 21 | \item{genotypesfilename}{ 22 | character string: path to file containing SNP genotypes coded 0, 1, 23 | 2. See \code{Input file formats}. 24 | } 25 | \item{phenotypesfilename}{ 26 | character string: path to file containing phenotypes. See \code{Input file formats}. 27 | } 28 | \item{lambda}{ 29 | (optional) shrinkage parameter. If not provided, the default denotes 30 | automatic choice of the shrinkage parameter using the method of Cule & 31 | De Iorio (2012). 32 | } 33 | \item{thinfilename}{ 34 | (optional) character string: path to file containing three columns: SNP name, chromosme and SNP psotion. See \code{Input file formats}. (See \code{details}.) 35 | } 36 | \item{betafilename}{ 37 | (optional) character string: path to file where the output will be written. See \code{Output file formats}. 38 | } 39 | \item{approxfilename}{ 40 | (optional) character string: path to fine where the approximate test p-values will be written. 41 | Approximate p-values are not computed unless this argument is given. Approximate p-values 42 | are computed using the method of Cule et al (2011). See \code{Output file formats}. 43 | } 44 | \item{permfilename}{ 45 | (optional) character string: path to file where the permutation test 46 | p-values will be written. 47 | Permutation test p-values are not computed unless this argument is 48 | given. (See warning). See \code{Output file formats}. 49 | } 50 | \item{intercept}{ 51 | Logical: Should the ridge regression model be fitted with an 52 | intercept? Defaults to \code{TRUE}. 53 | } 54 | \item{verbose}{ 55 | Logical: If \code{TRUE}, additional information is printed to the R 56 | output as the code runs. Defaults to \code{FALSE}. 57 | } 58 | } 59 | 60 | \section{Input file formats}{ 61 | \describe{ 62 | \item{genotypesfilename:}{A header row, plus one row for each 63 | individual, one SNP per column. The header row contains SNP 64 | names. SNPs are coded as 0, 1, 2 for minor allele count. Missing 65 | values are not accommodated. Invariant SNPs in the data cause an 66 | error, please remove these from the file before calling the function.} 67 | \item{phenofilename:}{A single column of phenotypes with the individuals in the same order as those in the file \code{genotypesfilename}. Phenotypes must be coded as 0 or 1.} 68 | \item{thin:}{(optional) Three columns and the same number of rows as there are SNPs in the file \code{genotypesfilename}, one row per SNP. First column: SNP names (must match names in \code{genotypesfilename}); second column: chromosome; third column: SNP position in BP.} 69 | } 70 | } 71 | 72 | \section{Output file formats}{ 73 | All output files are optional. Whether or not \code{betafilename} is provided, fitted coefficients are returned to the R workshpace. If \code{betafilename} is provided, fitted coefficients are written to the file specified (in addition). 74 | \describe{ 75 | \item{betafilename:}{Two columns: First column is SNP names in same order as in \code{genotypesfilename}, second column is fitted coefficients. If \code{intercept = TRUE} (the default) then the first row is the fitted intercept (with the name Intercept in the first column).} 76 | \item{approxfilename:}{Two columns: First column is SNP names in same order as in \code{genotypesfilename}, second column is approximate p-values.} 77 | \item{permfilename:}{Two columns: First column is SNP names in same order as in \code{genotypesfilename}, second column is permutation p-values.} 78 | } 79 | } 80 | 81 | \details{ 82 | If a file \code{thin} is supplied, and the shrinkage parameter 83 | \code{lambda} is being computed automatically based on the data, then 84 | this file is used to thin the SNP data by SNP position. If this file 85 | is not supplied, SNPs are thinned automatically based on number of SNPs. 86 | } 87 | 88 | \value{ 89 | The vector of fitted ridge regression coefficients. 90 | If \code{betafilename} is given, the fitted coefficients are written to this 91 | file as well as being returned. 92 | If \code{approxfilename} and/or \code{permfilename} are given, results of approximate 93 | test p-values and/or permutation test p-values are written to the files 94 | given in their arguments. 95 | } 96 | 97 | \section{Warning }{When data are large, the permutation test p-values 98 | may take a very long time to compute. It is recommended not to request 99 | permutation test p-values (using the argument \code{permfilename}) 100 | when data are large. 101 | } 102 | 103 | 104 | \references{ 105 | Significance testing in ridge regression for genetic data. Cule, E. et 106 | al (2011) BMC Bioinformatics, 12:372 107 | A semi-automatic method to guide the choice of ridge parameter in 108 | ridge regression. Cule, E. and De Iorio, M. (2012) arXiv:1205.0686v1 [stat.AP] 109 | } 110 | 111 | \author{ 112 | Erika Cule 113 | } 114 | 115 | \seealso{ 116 | \code{\link{logisticRidge}} for fitting logistic ridge regression models 117 | when the data are small enough to be read into R. 118 | \code{\link{linearRidge}} and \code{\link{linearRidgeGenotypes}} for fitting linear ridge 119 | regression models. 120 | } 121 | 122 | \examples{ 123 | \dontrun{ 124 | genotypesfile <- system.file("extdata","GenBin_genotypes.txt",package = "ridge") 125 | phenotypesfile <- system.file("extdata","GenBin_phenotypes.txt",package = "ridge") 126 | beta_logisticRidgeGenotypes <- 127 | logisticRidgeGenotypes(genotypesfilename = genotypesfile, phenotypesfilename = phenotypesfile) 128 | ## compare to output of logisticRidge 129 | data(GenBin) ## Same data as in GenBin_genotypes.txt and GenBin_phenotypes.txt 130 | beta_logisticRidge <- logisticRidge(Phenotypes ~ ., data = as.data.frame(GenBin)) 131 | cbind(round(coef(beta_logisticRidge), 6), beta_logisticRidgeGenotypes) 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /R/logisticRidge.R: -------------------------------------------------------------------------------- 1 | ## R function to fit the logistic ridge regression model 2 | 3 | #' @export 4 | #' @importFrom stats model.response model.matrix 5 | logisticRidge <- function(formula, data, lambda = "automatic", 6 | nPCs = NULL, scaling = c("corrForm", "scale", "none"), ...) 7 | { 8 | ## Check arguments 9 | if(lambda != "automatic" && !is.null(nPCs)) 10 | { 11 | stop(gettextf("you cannot specify both lambda and nPCs\n")) 12 | } else if(lambda == "automatic" && !is.null(nPCs)) { 13 | lambda <- NULL 14 | } 15 | automatic <- FALSE 16 | cl <- match.call() 17 | m <- match.call(expand.dots = FALSE) 18 | scaling <- match.arg(scaling) 19 | if((lambda == "automatic" && !is.null(lambda)) && scaling != "corrForm") 20 | { 21 | warning("lambda is chosen automatically so scaling is set to \"corrForm\" ") 22 | scaling <- "corrForm" 23 | cl$scaling <- scaling 24 | } 25 | if(is.null(lambda) && scaling != "corrForm") 26 | { 27 | warning("lambda is chosen based on number of components so scaling is set to \"corrForm\"") 28 | scaling <- "corrForm" 29 | cl$scaling <- scaling 30 | } 31 | if(scaling == "none") 32 | { 33 | isScaled <- FALSE 34 | corrForm <- FALSE 35 | standardize <- FALSE 36 | } else if (scaling == "corrForm") { 37 | isScaled <- TRUE 38 | corrForm <- TRUE 39 | standardize <- FALSE 40 | } else if (scaling == "scale") { 41 | isScaled <- TRUE 42 | corrForm <- FALSE 43 | standardize <- TRUE 44 | } 45 | m$model <- m$nPCs <- m$... <- m$lambda <- m$scaling <- NULL 46 | m[[1L]] <- as.name("model.frame") 47 | m <- eval.parent(m) 48 | Terms <- attr(m, "terms") 49 | 50 | ## Extract the response 51 | Y <- model.response(m) 52 | ## Construct the design matrix 53 | X <- model.matrix(Terms, m) 54 | ## get the dimensions of X in terms of n and p 55 | n <- nrow(X) 56 | p <- ncol(X) 57 | if(!is.null(nPCs) && nPCs > min(n, p)) 58 | stop(gettextf("You specified %d PCs which is greater than the maximum number of PCs in the data\n", nPCs)) 59 | ## Handle the intercept 60 | if (Inter <- attr(Terms, "intercept")) 61 | { 62 | Xm <- colMeans(X[, -Inter]) 63 | p <- p - 1 64 | ## Subtract the means from X 65 | X <- X[, -Inter] - rep(Xm, rep(n, p)) 66 | } 67 | else Ym <- Xm <- NA ## Else Ym and Xm are not needed 68 | ## Because an intercept does not have to be calculated 69 | if(corrForm) 70 | { 71 | Xscale <- drop(rep(1/(n - 1), n) %*% apply(X, 2, function(x){x - mean(x)})^2)^0.5 * sqrt(nrow(X) - 1) 72 | } else if (standardize) { 73 | Xscale <- drop(rep(1/(n - 1), n) %*% apply(X, 2, function(x){x - mean(x)})^2)^0.5 74 | } else { 75 | Xscale <- drop(rep(1, p)) 76 | names(Xscale) <- colnames(X) 77 | } 78 | X <- X/rep(Xscale, rep(n, p)) 79 | Xs <- svd(X) 80 | Q <- Xs$v 81 | ## Make the principal components 82 | Z <- X %*% Q 83 | Lambda <- Xs$d^2 84 | if(!is.null(lambda) && lambda == "automatic") 85 | { 86 | automatic <- TRUE 87 | if(is.null(nPCs)) 88 | { 89 | propVar <- cumsum(Lambda) / sum(Lambda) * 100 90 | 91 | # if there are values for propVar below 90 92 | if (length(which.max(propVar[propVar < 90])) > 0) 93 | { 94 | ifelse((length(propVar[propVar >= 90]) > 0), (max.nPCs <- which.max(propVar[propVar < 90]) + 1), (max.nPCs <- which.max(propVar[propVar < 90]))) 95 | } 96 | else 97 | { 98 | max.nPCs <- 1 99 | } 100 | } 101 | } 102 | if(lambda == "automatic" && is.null(nPCs)) 103 | { 104 | ks.vector <- vec.df <- numeric(max.nPCs) 105 | flag <- TRUE 106 | P <- 0 107 | while(P < max.nPCs && flag) 108 | { 109 | P <- P + 1 110 | tryCatch({ 111 | ks.vector[P] <- P / sum(computeRidgeLogistic(Z[,1:P], Y, 0, intercept = FALSE)^2) 112 | fittedB <- computeRidgeLogistic(X, Y, k = ks.vector[P], intercept = Inter, doff = TRUE) 113 | vec.df[P] <- fittedB$doff[2] 114 | }, error = function(e) { 115 | flag <- FALSE 116 | } 117 | ) 118 | } ## Ends while 119 | options("warn" = -1) 120 | nPCs <- ifelse(!is.infinite(min(which(ks.vector == 0))), 121 | min(which(ks.vector == 0)) - 1, 122 | max.nPCs) 123 | options("warn" = 0) 124 | ks.vector <- ks.vector[1:nPCs] 125 | vec.df <- vec.df[1:nPCs] 126 | lambda <- ks.vector 127 | ## Choose best lambda 128 | chosen.nPCs <- which.min(abs(vec.df - seq(nPCs) - Inter)) ## The -1 is for the intercept which was included in doff 129 | max.nPCs <- nPCs 130 | ## Ends if Lambda == automatic 131 | } else if (!is.null(nPCs)) { 132 | P <- nPCs 133 | tryCatch({ 134 | lambda <- P / sum(computeRidgeLogistic(Z[,1:P], Y, 0, intercept = FALSE)^2) 135 | }, error = function(e) { 136 | stop(gettextf("Unable to fit logistic ridge model using %d components\n", nPCs)) 137 | }) 138 | chosen.nPCs <- nPCs 139 | } 140 | ## Need to make a matrix of fitted B 141 | ## Don't scale X because X are already scaled and because B on scaled data is needed to compute p-values 142 | fittedB <- lapply(lambda, function(x){computeRidgeLogistic(X, Y, k = x, intercept = Inter, doff = TRUE)}) 143 | coef <- lapply(fittedB, function(x){x$B}) 144 | df <- lapply(fittedB, function(x){x$doff}) 145 | coef <- do.call(cbind, coef) 146 | ## Need to fix this line 147 | ifelse(Inter, 148 | rownames(coef) <- c("(Intercept)", colnames(X)), 149 | rownames <- colnames(X)) 150 | if(!is.null(nPCs)) 151 | { 152 | if(length(lambda) == 1) 153 | { 154 | colnames(coef) <- paste("nPCs", chosen.nPCs, sep = "") 155 | } else { 156 | colnames(coef) <- paste("nPCs", seq(max.nPCs), sep = "") 157 | } 158 | } else { 159 | colnames(coef) <- paste("lambda=", lambda, sep = "") 160 | } 161 | df <- do.call(rbind, df) 162 | if(!is.null(nPCs)) 163 | { 164 | if(length(lambda) == 1) 165 | { 166 | rownames(df) <- paste("nPCs", chosen.nPCs, sep = "") 167 | } else { 168 | rownames(df) <- paste("nPCs", seq(max.nPCs), sep = "") 169 | } 170 | } else { 171 | rownames(df) <- paste("lambda=", lambda, sep = "") 172 | } 173 | ## 174 | colnames(df) <- c("model", "variance") 175 | res <- list(automatic = automatic, call = cl, coef = cbind(drop(coef)), df = df, Inter = Inter, isScaled = isScaled, lambda = lambda, scales = Xscale, terms = Terms, x = X, xm = Xm, y = Y, model_frame = m) 176 | ## 177 | if(!is.null(nPCs)) 178 | { 179 | if(automatic) 180 | { 181 | res$max.nPCs <- max.nPCs 182 | } 183 | res$chosen.nPCs <- chosen.nPCs 184 | } 185 | class(res) <- "ridgeLogistic" 186 | res 187 | } 188 | -------------------------------------------------------------------------------- /tests/testthat/test.R: -------------------------------------------------------------------------------- 1 | library("ridge") 2 | library("datasets") 3 | 4 | tol <- 0.0001 5 | 6 | context("Basic tests") 7 | 8 | test_that("test linearRidge agrees with lm when lambda = 0", { 9 | model1 <- lm(mpg ~ wt + cyl, data = mtcars) 10 | model2 <- linearRidge(mpg ~ wt + cyl, data = mtcars, lambda = 0) 11 | 12 | expect_equal(coef(model1), coef(model2), tolerance = tol, label = "coefficients") 13 | expect_equal(predict(model1), predict(model2), tolerance = tol, label = "predictions") 14 | }) 15 | 16 | test_that("test linearRidge with formula variable that goes out of scope", { 17 | the_formula <- formula('mpg ~ wt + cyl') 18 | model1 <- lm(the_formula, data = mtcars) 19 | model2 <- linearRidge(the_formula, data = mtcars, lambda = 0) 20 | # suppose the_formula goes away: 21 | rm(the_formula) 22 | 23 | expect_equal(coef(model1), coef(model2), tolerance = tol, label = "coefficients") 24 | expect_equal(predict(model1), predict(model2), tolerance = tol, label = "predictions") 25 | }) 26 | 27 | test_that("test linearRidge near lm when lambda = 0.01", { 28 | model1 <- lm(mpg ~ wt + cyl, data = mtcars) 29 | model2 <- linearRidge(mpg ~ wt + cyl, data = mtcars, lambda = 0.01) 30 | 31 | # at least one coefficient is greater than 0.01 different: 32 | expect(any(abs(coef(model1) - coef(model2)) > 0.01), 33 | "coefficients agree too strongly") 34 | # all coefficients are less than 0.11 different: 35 | expect(all(abs(coef(model1) - coef(model2)) < 0.11), 36 | "coefficients too different") 37 | }) 38 | 39 | test_that("test linearRidge vcov method for lambda = 0, two terms", { 40 | model1 <- lm(mpg ~ wt + cyl, data = mtcars) 41 | model2 <- linearRidge(mpg ~ wt + cyl, data = mtcars, lambda = 0.0) 42 | 43 | vc1 <- vcov(model1) 44 | vc2 <- vcov(model2) 45 | # vcov agrees between model1 and model2 when lambda is 0 46 | expect_equal(as.vector(vc1), as.vector(vc2), tolerance = tol, label = "vcov lambda 0") 47 | }) 48 | 49 | test_that("test linearRidge vcov method for lambda = 0, three terms", { 50 | model1 <- lm(mpg ~ wt + cyl + disp, data = mtcars) 51 | model2 <- linearRidge(mpg ~ wt + cyl + disp, data = mtcars, lambda = 0.0) 52 | 53 | vc1 <- vcov(model1) 54 | vc2 <- vcov(model2) 55 | # vcov agrees between model1 and model2 when lambda is 0 56 | expect_equal(as.vector(vc1), as.vector(vc2), tolerance = tol, label = "vcov lambda 0") 57 | }) 58 | 59 | test_that("test linearRidge vcov method for lambda = 0.01", { 60 | model1 <- lm(mpg ~ wt + cyl, data = mtcars) 61 | model2 <- linearRidge(mpg ~ wt + cyl, data = mtcars, lambda = 0.01) 62 | 63 | vc1 <- vcov(model1) 64 | vc2 <- vcov(model2) 65 | # at least one coefficient is greater than 0.01 different: 66 | expect(any(abs(vc1 - vc2) > 0.01), "coefficients agree too strongly") 67 | # all coefficients are less than 0.013 different: 68 | expect(all(abs(vc1 - vc2) < 0.013), "coefficients too different") 69 | }) 70 | 71 | test_that("test linearRidge vcov method for lambda = 0, with a factor", { 72 | foo <- mtcars 73 | foo$cyl_factor <- as.factor(paste0("cyl", foo$cyl)) 74 | model1 <- lm(mpg ~ wt + cyl_factor, data = foo) 75 | model2 <- linearRidge(mpg ~ wt + cyl_factor, data = foo, lambda = 0.0) 76 | 77 | # coef agrees between model1 and model2 when lambda is 0, with factor 78 | expect_equal(coef(model1), coef(model2), tolerance = tol, label = "coefficients") 79 | # predict agrees between model1 and model2 when lambda is 0, with factor 80 | expect_equal(predict(model1), predict(model2), tolerance = tol, label = "predictions") 81 | 82 | # vcov agrees between model1 and model2 when lambda is 0, with factor 83 | expect_equal(as.vector(vcov(model1)), as.vector(vcov(model2)), 84 | tolerance = tol, label = "vcov lambda 0, factors") 85 | }) 86 | 87 | test_that("test linearRidge predict method for lambda = 0, with a factor and newdata", { 88 | foo <- mtcars 89 | foo$cyl_factor <- as.factor(paste0("cyl", foo$cyl)) 90 | model1 <- lm(mpg ~ wt + cyl_factor, data = foo) 91 | model2 <- linearRidge(mpg ~ wt + cyl_factor, data = foo, lambda = 0.0) 92 | 93 | # predict agrees between model1 and model2 when lambda is 0, with factor 94 | newdata <- data.frame(wt=c(1.0), cyl_factor=c("cyl4")) 95 | preds1 <- predict(model1, newdata) 96 | preds2 <- predict(model2, newdata) 97 | expect_equal(preds1, preds2, tolerance = tol, label = "predictions") 98 | }) 99 | 100 | 101 | context("Simple run tests for different datasets") 102 | 103 | test_that("Wrong argument - Hair is not numeric or logical", { 104 | data(HairEyeColor) 105 | expect_warning(linearRidge(Hair ~ ., data = as.data.frame(HairEyeColor))) 106 | }) 107 | 108 | test_that("Runs model + predict with HairEyeColor dataset", { 109 | 110 | data(HairEyeColor) 111 | model <- linearRidge(Freq ~ ., data = as.data.frame(HairEyeColor)) 112 | pred <- predict(model, as.data.frame(HairEyeColor)) 113 | expect_more_than(pred[1],18) 114 | }) 115 | 116 | test_that("Runs model + predict with HairEyeColor dataset - version with different formula", { 117 | 118 | data(HairEyeColor) 119 | model <- linearRidge(Freq ~ Eye, data = as.data.frame(HairEyeColor)) 120 | pred <- predict(model, as.data.frame(HairEyeColor)) 121 | expect_more_than(pred[1],22) 122 | }) 123 | 124 | 125 | test_that("Runs model + predict with GenBin dataset", { 126 | 127 | data(GenBin) 128 | model <- logisticRidge(Phenotypes ~ ., data = as.data.frame(GenBin)) 129 | pred <- predict(model, as.data.frame(GenBin)) 130 | expect_less_than(pred[1],0) 131 | }) 132 | 133 | 134 | test_that("Runs model + predict with Hald dataset", { 135 | 136 | data(Hald) 137 | model <- linearRidge(y ~ ., data = as.data.frame(Hald)) 138 | pred <- predict(model, as.data.frame(Hald)) 139 | expect_more_than(pred[1],70) 140 | }) 141 | 142 | test_that("Runs model + predict with Hald dataset", { 143 | 144 | data(Hald) 145 | model <- linearRidge(X1 ~ ., data = as.data.frame(Hald), scaling="none") 146 | pred <- predict(model, as.data.frame(Hald)) 147 | expect_more_than(pred[1],7) 148 | }) 149 | 150 | test_that("Runs model + predict with Hald dataset", { 151 | 152 | data(Hald) 153 | model <- linearRidge(y ~ X1 + X2 + X3, data = as.data.frame(Hald), scaling="none") 154 | pred <- predict(model, as.data.frame(Hald)) 155 | expect_more_than(pred[1],78) 156 | }) 157 | 158 | test_that("Runs model + predict with Hald dataset", { 159 | 160 | data(Hald) 161 | model <- linearRidge(y ~ X1 + X2 + X3, data = as.data.frame(Hald), lambda = 0.01, scaling="none") 162 | pred <- predict(model, as.data.frame(Hald)) 163 | expect_more_than(pred[1],78) 164 | }) 165 | 166 | 167 | test_that("Runs model + predict with Gorman dataset", { 168 | 169 | data(Gorman) 170 | model <- linearRidge(logY ~ ., data = as.data.frame(Gorman)) 171 | pred <- predict(model, as.data.frame(Gorman)) 172 | expect_more_than(pred[1],2) 173 | }) 174 | 175 | test_that("Runs model + predict with iris dataset", { 176 | 177 | data(iris) 178 | model <- linearRidge(Sepal.Length ~ ., data = as.data.frame(iris)) 179 | pred <- predict(model, as.data.frame(iris)) 180 | expect_more_than(pred[1],4) 181 | }) 182 | 183 | 184 | test_that("Runs model + predict with ToothGrowth dataset", { 185 | 186 | data(ToothGrowth) 187 | model <- linearRidge(len ~ ., data = as.data.frame(ToothGrowth)) 188 | pred <- predict(model, as.data.frame(ToothGrowth)) 189 | expect_more_than(pred[1],10) 190 | }) 191 | 192 | 193 | 194 | 195 | 196 | -------------------------------------------------------------------------------- /R/linearRidge.R: -------------------------------------------------------------------------------- 1 | ## R function to fit the linear ridge regression model 2 | 3 | #' @export 4 | #' @importFrom stats .getXlevels predict model.response model.matrix 5 | linearRidge <- function(formula, data, lambda = "automatic", 6 | nPCs = NULL, scaling = c("corrForm", "scale", "none"), ...) 7 | { 8 | ## Check arguments 9 | if(lambda != "automatic" && !is.null(nPCs)) 10 | { 11 | stop(gettextf("you cannot specify both lambda and nPCs\n")) 12 | } else if(lambda == "automatic" && !is.null(nPCs)) { 13 | lambda <- NULL 14 | } 15 | automatic <- FALSE 16 | cl <- match.call() 17 | m <- match.call(expand.dots = FALSE) 18 | scaling <- match.arg(scaling) 19 | if((lambda == "automatic" && !is.null(lambda)) && scaling != "corrForm") 20 | { 21 | warning("lambda is chosen automatically so scaling is set to \"corrForm\" ") 22 | scaling <- "corrForm" 23 | cl$scaling <- scaling 24 | } 25 | if(is.null(lambda) && scaling != "corrForm") 26 | { 27 | warning("lambda is chosen based on number of components so scaling is set to \"corrForm\"") 28 | scaling <- "corrForm" 29 | cl$scaling <- scaling 30 | } 31 | if(scaling == "none") 32 | { 33 | isScaled <- FALSE 34 | corrForm <- FALSE 35 | standardize <- FALSE 36 | } else if (scaling == "corrForm") { 37 | isScaled <- TRUE 38 | corrForm <- TRUE 39 | standardize <- FALSE 40 | } else if (scaling == "scale") { 41 | isScaled <- TRUE 42 | corrForm <- FALSE 43 | standardize <- TRUE 44 | } 45 | m$model <- m$allLambdas <- m$nPCs <- m$... <- m$lambda <- m$scaling <- NULL 46 | m[[1L]] <- as.name("model.frame") 47 | m <- eval.parent(m) 48 | Terms <- attr(m, "terms") 49 | 50 | ## Extract the response 51 | Y <- model.response(m) 52 | ## Construct the design matrix 53 | X <- model.matrix(Terms, m) 54 | contrasts <- attr(X, "contrasts") 55 | xlevels <- .getXlevels(Terms, m) 56 | ## get the dimensions of X in terms of n and p 57 | n <- nrow(X) 58 | p <- ncol(X) 59 | if(!is.null(nPCs) && nPCs > min(n, p)) 60 | stop(gettextf("You specified %d PCs which is greater than the maximum number of PCs in the data\n", nPCs)) 61 | ## Handle the intercept 62 | if (Inter <- attr(Terms, "intercept")) 63 | { 64 | Xm <- colMeans(X[, -Inter]) 65 | Ym <- mean(Y) 66 | p <- p - 1 67 | ## Subtract the means from X 68 | X <- X[, -Inter] - rep(Xm, rep(n, p)) 69 | ## Subtract the mean from Y 70 | Y <- Y - Ym 71 | } else { 72 | Xm <- colMeans(X) 73 | Ym <- mean(Y) 74 | ## Subtract the means from X 75 | X <- X - rep(Xm, rep(n, p)) 76 | ## Subtract the mean from y 77 | Y <- Y - Ym 78 | } 79 | ## Calculate the scales 80 | if(corrForm) 81 | { 82 | Xscale <- drop(rep(1/(n - 1), n) %*% apply(X, 2, function(x){x - mean(x)})^2)^0.5 * sqrt(nrow(X) - 1) 83 | } else if (standardize) { 84 | Xscale <- drop(rep(1/(n - 1), n) %*% apply(X, 2, function(x){x - mean(x)})^2)^0.5 85 | } else { 86 | Xscale <- drop(rep(1, p)) 87 | names(Xscale) <- colnames(X) 88 | } 89 | X <- X/rep(Xscale, rep(n, p)) 90 | Xs <- svd(X) 91 | Q <- Xs$v 92 | ## Make the principal components 93 | Z <- X %*% Q 94 | Lambda <- Xs$d^2 95 | if(!is.null(lambda) && lambda == "automatic") 96 | { 97 | automatic <- TRUE 98 | if(is.null(nPCs)) 99 | { 100 | propVar <- cumsum(Lambda) / sum(Lambda) * 100 101 | # if there are values for propVar below 90 102 | if (length(which.max(propVar[propVar < 90])) > 0) 103 | { 104 | ifelse((length(propVar[propVar >= 90]) > 0), (nPCs <- which.max(propVar[propVar < 90]) + 1), (nPCs <- which.max(propVar[propVar < 90]))) 105 | } 106 | else 107 | { 108 | nPCs <- 1 109 | } 110 | 111 | } 112 | } 113 | ## Compute ahat 114 | ahat <- diag(1 / Lambda) %*% t(Z) %*% Y 115 | if(!is.null(lambda) && lambda == "automatic" && !is.null(nPCs)) 116 | { 117 | ks.vector <- sig2hat.vector <- vec.df <- numeric(nPCs) 118 | flag <- TRUE 119 | P <- 0 120 | while((P < nPCs) && flag) 121 | { 122 | P <- P + 1 123 | ## compute sig2hatP 124 | sig2hat <- ifelse(P == 1, 125 | as.numeric(crossprod(Y - (Z[,1]) * ahat[1]) / (n - 1)), 126 | as.numeric(crossprod(Y - (Z[,1:P]) %*% ahat[1:P]) / (n - P)) 127 | ) 128 | ## compute ahatsum 129 | ahatsum <- ifelse(P == 1, 130 | ahat[1]^2, 131 | sum(ahat[1:P]^2) 132 | ) 133 | ## compute kHKB 134 | ks.vector[P] <- P * sig2hat / ahatsum 135 | if(is.finite(ks.vector[P])) 136 | { 137 | vec.df[ P ] <- sum(Lambda^2 / (Lambda + ks.vector[P])^2) 138 | } 139 | if(!is.finite(ks.vector[P])) 140 | { 141 | flag <- FALSE 142 | ## make everything the correct dimensions 143 | ks.vector <- ks.vector[1:(P - 1)] 144 | } 145 | } ## Ends while loop 146 | ## Choose best lambda 147 | nPCs.dof <- which.min(abs(vec.df - seq(nPCs))) 148 | ## Vector of lambdas 149 | lambda <- ks.vector 150 | ## The number of components 151 | chosen.nPCs <- nPCs.dof 152 | max.nPCs <- nPCs 153 | } else if (!is.null(nPCs)) 154 | { 155 | P <- nPCs 156 | sig2hat <- ifelse(P == 1, 157 | as.numeric(crossprod(Y - (Z[,1]) * ahat[1]) / (n - 1)), 158 | as.numeric(crossprod(Y - (Z[,1:P]) %*% ahat[1:P]) / (n - P)) 159 | ) 160 | ahatsum <- ifelse(P == 1, 161 | ahat[1]^2, 162 | sum(ahat[1:P]^2) 163 | ) 164 | ## compute lambda 165 | lambda <- P * sig2hat / ahatsum 166 | chosen.nPCs <- nPCs 167 | } 168 | ## compute coef as a matrix 169 | aridge <- lapply(lambda, function(x) {ahat * Lambda / (Lambda + x)}) 170 | coef <- lapply(aridge, function(x) {Q %*% x}) 171 | ## compute df as a matrix 172 | df <- lapply(lambda, function(x) {c(sum(Lambda / (Lambda + x)), sum(Lambda^2 / (Lambda + x)^2), sum(Lambda * (Lambda + 2* x) / (Lambda + x)^2))}) 173 | coef <- do.call(cbind, coef) 174 | rownames(coef) <- colnames(X) 175 | ## 176 | if(!is.null(nPCs)) 177 | { 178 | if(length(lambda) == 1) 179 | { 180 | colnames(coef) <- paste("nPCs", chosen.nPCs, sep = "") 181 | } else { 182 | colnames(coef) <- paste("nPCs", seq(max.nPCs), sep = "") 183 | } 184 | } else { 185 | colnames(coef) <- paste("lambda=", lambda, sep = "") 186 | } 187 | ## 188 | df <- do.call(rbind, df) 189 | ## This line needs fixing 190 | if(!is.null(nPCs)) 191 | { 192 | if(length(lambda) == 1) 193 | { 194 | rownames(df) <- paste("nPCs", chosen.nPCs, sep = "") 195 | } else { 196 | rownames(df) <- paste("nPCs", seq(max.nPCs), sep = "") 197 | } 198 | } else { 199 | rownames(df) <- paste("lambda=", lambda, sep = "") 200 | } 201 | ## 202 | colnames(df) <- c("model", "variance", "residual") 203 | res <- list(automatic = automatic, call = cl, coef = cbind(drop(coef)), df = df, 204 | Inter = Inter, isScaled = isScaled, lambda = lambda, scales = Xscale, 205 | terms = Terms, x = X, xm = Xm, y = Y, ym = Ym, model_frame = m, 206 | contrasts = contrasts, xlevels = xlevels) 207 | ## This line needs fixing 208 | if(!is.null(nPCs)) 209 | { 210 | if(automatic) 211 | { 212 | res$max.nPCs <- max.nPCs 213 | } 214 | res$chosen.nPCs <- chosen.nPCs 215 | } 216 | class(res) <- "ridgeLinear" 217 | res 218 | } 219 | -------------------------------------------------------------------------------- /src/thin.c: -------------------------------------------------------------------------------- 1 | #include "thin.h" 2 | #ifdef HAVE_GSL_HEADER 3 | 4 | gsl_vector_int * readThinFile(char * thinfilename, 5 | char ** SNPNAMES, 6 | int thinning_distance, 7 | int NINDIV, 8 | int NSNPS, 9 | int * nThinnedSnps, 10 | int verbose) 11 | { 12 | 13 | /* Allocate vector of int to indicate which SNPs to get for the thinned SNPs */ 14 | gsl_vector_int * thin = gsl_vector_int_calloc(NSNPS); 15 | 16 | int count = 0; 17 | 18 | /* if thinfilename has not been provided */ 19 | if(thinfilename == NULL) 20 | { 21 | if(thinning_distance == -1) 22 | { 23 | thinning_distance = GSL_MAX(1, NSNPS / NINDIV); 24 | } 25 | /* temporary int to store next pos */ 26 | int next_pos = 0; 27 | for(count = 0; count < NSNPS; count++) 28 | { 29 | if(count == next_pos) 30 | { 31 | gsl_vector_int_set(thin, count, 1); 32 | next_pos = count + thinning_distance; 33 | } else { 34 | // Do nothing 35 | } 36 | } 37 | /* else if thinfilename has been provided */ 38 | } else { 39 | /* Check thinning distance */ 40 | if(thinning_distance == -1) 41 | { 42 | if(verbose) 43 | { 44 | Rprintf("Thinning SNPs using default distance of 100000 bp\n"); 45 | } 46 | thinning_distance = 100000; 47 | } else { 48 | if(verbose) 49 | { 50 | Rprintf("Thinning SNPs using distance of %d bp\n", thinning_distance); 51 | } 52 | } 53 | 54 | /* Allocate vector of int for chromosomes */ 55 | gsl_vector_int * chromosomes = gsl_vector_int_alloc(NSNPS); 56 | /* Allocate vector of int for bp positions */ 57 | gsl_vector_int * positions = gsl_vector_int_alloc(NSNPS); 58 | 59 | /* Length of line */ 60 | int maxcharssnpname = 256; // Maximum number of characters of a SNP name 61 | int maxcharschrom = 2; // Maximum number of characters for a chromosome is 2 (i.e. for chromosomes 10..22) 62 | int maxcharspos = 11; // Maximum number of characters for chromosome position is 11 (i.e. 10,000,000,000 - nb human csome 1 has ~ 250,000,000 bp) 63 | char line[maxcharssnpname + maxcharschrom + maxcharspos + 3]; // The +3 are for the spaces and end of line character 64 | char * tmp; 65 | /* Make a file pointer for thinfile */ 66 | FILE * thinfile = fopen(thinfilename, "r"); 67 | count = -1; 68 | /* Read in thinfile */ 69 | if(thinfile != NULL) 70 | { 71 | while ( fgets ( line, sizeof line, thinfile ) != NULL ) /* read a line */ 72 | { 73 | count++; 74 | /* Extract SNP name */ 75 | tmp = strtok(line, " "); 76 | if(strcmp(SNPNAMES[count],tmp) != 0) 77 | { 78 | error("SNPnames in genotype file and thinfile do not match (%s vs %s)\n", SNPNAMES[count], tmp); 79 | } 80 | /* Extract chromosome */ 81 | tmp = strtok(NULL, " "); 82 | gsl_vector_int_set(chromosomes, count, atoi(tmp)); 83 | /* Extract SNP position */ 84 | tmp = strtok(NULL, "\n"); 85 | gsl_vector_int_set(positions, count, atoi(tmp)); 86 | } 87 | } else { 88 | error("could not open %s for reading\n", thinfilename); 89 | } 90 | /* Close thinfile */ 91 | fclose(thinfile); 92 | 93 | /* Choose which SNPs to thin by: follow method in plinkcomp */ 94 | int current_chr = -1; 95 | int start_of_interval = 0; 96 | int end_of_interval = 0; 97 | int first_snp = 0; // boolean 98 | int this_snp_chr = 0; 99 | 100 | for(count = 0; count < NSNPS; count++) 101 | { 102 | this_snp_chr = gsl_vector_int_get(chromosomes, count); 103 | if(current_chr != this_snp_chr) 104 | { 105 | current_chr = this_snp_chr; 106 | start_of_interval = 0; 107 | end_of_interval = start_of_interval + thinning_distance; 108 | first_snp = 1; // boolean 109 | } 110 | if(current_chr == 0) 111 | { 112 | // Do nothing 113 | } 114 | else 115 | { 116 | if(first_snp) 117 | { 118 | 119 | gsl_vector_int_set(thin, count, 1); 120 | start_of_interval = gsl_vector_int_get(positions, count); 121 | end_of_interval = start_of_interval + thinning_distance; 122 | first_snp = 0; 123 | } 124 | if((gsl_vector_int_get(positions, count) >= end_of_interval)) 125 | { 126 | gsl_vector_int_set(thin, count, 1); 127 | start_of_interval = gsl_vector_int_get(positions, count); 128 | end_of_interval = start_of_interval + thinning_distance; 129 | } else { 130 | // Do nothing 131 | } 132 | 133 | } // ends else 134 | } // ends for loop 135 | } // ends else (i.e. if thinfilename != NULL) 136 | * nThinnedSnps = sumIntVec(thin); 137 | return thin; 138 | } 139 | 140 | 141 | int readSNPsThinAndComputePCs(char * genofilename, 142 | gsl_vector_int * thin, 143 | GSL_TYPE(matrix) * Z, 144 | GSL_TYPE(matrix) * thinnedGenotypes, 145 | GSL_TYPE(vector) * D2, 146 | int * howManyK) 147 | { 148 | 149 | /* Allocate memory for the thinned genotypes */ 150 | int NSNPS = thin->size; 151 | int nThinnedSnps = sumIntVec(thin); 152 | int NINDIV = Z->size1; 153 | int i = 0; 154 | int j = 0; 155 | int count = 0; 156 | 157 | /* Allocate a bit integer matrix to read all of the SNPs in */ 158 | gsl_matrix_int * genotypes = gsl_matrix_int_calloc(NINDIV, NSNPS); 159 | 160 | GSL_TYPE(matrix) * U = GSL_FUNCTION(matrix, calloc)(Z->size1, Z->size2); 161 | GSL_TYPE(matrix) * V = GSL_FUNCTION(matrix, calloc)(nThinnedSnps, Z->size2); 162 | GSL_TYPE(vector) * D = GSL_FUNCTION(vector, calloc)(Z->size2); 163 | 164 | /* open genofile for reading */ 165 | FILE * genofile = NULL; 166 | genofile = fopen(genofilename, "r"); 167 | /* skip the header row */ 168 | char ch; 169 | ch = fgetc(genofile); 170 | while(ch != (int)'\n') 171 | { 172 | ch = fgetc(genofile); 173 | } 174 | /* Read in genotypes as int */ 175 | gsl_matrix_int_fscanf(genofile, genotypes); 176 | fclose(genofile); 177 | 178 | /* Temporary variables for counting */ 179 | int tmp = 0; 180 | for(i = 0; i < NSNPS; i++) 181 | /* Read the predictor into a vector of PREC */ 182 | { 183 | tmp = gsl_vector_int_get(thin, i); 184 | if(tmp == 1) 185 | { 186 | /* vector view of genotypes */ 187 | gsl_vector_int_view predictorsCol = gsl_matrix_int_column(genotypes, i); 188 | /* vector view of thinned genotypes */ 189 | GSL_FUNCTION(vector,view) thinnedGenotypesCol = GSL_FUNCTION(matrix,column)(thinnedGenotypes, count); 190 | /* Convert int to PREC and put in thinnedGenotypes matrix */ 191 | convert_int_vector(&predictorsCol.vector, &thinnedGenotypesCol.vector); 192 | count++; 193 | } 194 | } 195 | 196 | /* Free the genotypes matrix */ 197 | gsl_matrix_int_free(genotypes); 198 | 199 | /* scale the genotypes that have been read in */ 200 | /* some temporary variables for the mean and sd */ 201 | PREC mean = 0; 202 | PREC sd = 0; 203 | PREC sqrtn1 = MATHS_FUNCTION(sqrt)((PREC)NINDIV - 1.0); 204 | /* vector view for the column of genotypes matrix */ 205 | for(j = 0; j < nThinnedSnps; j++) 206 | { 207 | /* Get a vector view of the column of the genotypes */ 208 | GSL_FUNCTION(vector,view) genotype_vector = GSL_FUNCTION(matrix, column)(thinnedGenotypes, j); 209 | /* Compute the mean */ 210 | mean = GSL_STATS_FUNCTION(mean)(genotype_vector.vector.data, genotype_vector.vector.stride, genotype_vector.vector.size); 211 | /* Compute the standard deviation */ 212 | sd = GSL_STATS_FUNCTION(sd)(genotype_vector.vector.data, genotype_vector.vector.stride, genotype_vector.vector.size); 213 | /* Subtract the mean */ 214 | GSL_FUNCTION(vector, add_constant)(&genotype_vector.vector, -1*mean); 215 | /* Divide by the standard deviation corrected to correlation form */ 216 | GSL_FUNCTION(vector, scale)(&genotype_vector.vector, 1 / (sd * sqrtn1)); 217 | } 218 | /* SVD them */ 219 | SVD_FUNCTION(thinnedGenotypes, U, V, D); 220 | /* Compute D2 */ 221 | GSL_FUNCTION(vector,memcpy)(D2, D); 222 | GSL_FUNCTION(vector,mul)(D2, D); 223 | 224 | /* Compute Z */ 225 | BLAS_FUNCTION(gemm)(CblasNoTrans, CblasNoTrans, 1.0, thinnedGenotypes, V, 0.0, Z); 226 | if(*howManyK == 0) 227 | { 228 | *howManyK = chooseHowManyK(D); 229 | } 230 | 231 | /* Free U, V and D */ 232 | GSL_FUNCTION(matrix, free)(U); 233 | GSL_FUNCTION(matrix, free)(V); 234 | GSL_FUNCTION(vector, free)(D); 235 | 236 | return 0; 237 | } 238 | 239 | #endif 240 | 241 | typedef int make_iso_compilers_happy; 242 | 243 | -------------------------------------------------------------------------------- /src/regression_wrapper_function.c: -------------------------------------------------------------------------------- 1 | /* Wrapper function to the regression function 2 | to be called from within R 3 | in the linearRidgeBig or logisticRidgeBig functions*/ 4 | #include "depends.h" 5 | 6 | #ifdef HAVE_GSL_HEADER 7 | #include "linear.h" 8 | #include "logistic.h" 9 | #include "ReadInData.h" 10 | 11 | void regression_wrapper_function(char **g, 12 | char **p, 13 | char **b, 14 | char **a, 15 | char **perm, 16 | char ** thin, 17 | int * intercept, 18 | double * l, 19 | char **m, 20 | int * predict, 21 | int * v) { 22 | char * genofilename = *g; 23 | char * phenofilename = *p; 24 | char * betafilename = *b; 25 | 26 | double lambda = *l; 27 | char * model = *m; 28 | int predict_flag = *predict; 29 | 30 | int intercept_flag = *intercept; 31 | // int standardize_flag = *standardize; 32 | int standardize_flag = 1; 33 | int standardize_c_flag = -1; 34 | 35 | int verbose = *v; 36 | 37 | /* Check a, perm and thin files */ 38 | char * approxtestfilename = NULL; 39 | char * permtestfilename = NULL; 40 | char * thinfilename = NULL; 41 | if(strcmp(*a, "NULL") != 0) 42 | { 43 | approxtestfilename = *a; 44 | } 45 | if(strcmp(*perm, "NULL") != 0) 46 | { 47 | permtestfilename = *perm; 48 | } 49 | if(strcmp(*thin, "NULL") != 0) 50 | { 51 | thinfilename = *thin; 52 | } 53 | 54 | char * covarfilename = NULL; 55 | char * lambdafilename = NULL; 56 | char * lambdacovarfilename = NULL; 57 | 58 | PREC lambda_c = -1; 59 | PREC convergence_threshold = -1; 60 | unsigned long int seed = 0; 61 | int howManyK = 0; 62 | int individualK = 0; 63 | int thinning_distance = -1; 64 | 65 | /* Check for phenotypes file 66 | If it is supplied check it can be opened for reading */ 67 | /* if(!predict_flag) */ 68 | /* { */ 69 | /* if(phenofilename == NULL) */ 70 | /* { */ 71 | /* error("You didn't supply a vector of phenotypes (dependent variables), please do so (use -p)\n"); */ 72 | /* } else { */ 73 | /* checkFileForReading(phenofilename); */ 74 | /* } */ 75 | /* } else if (predict_flag) { */ 76 | /* if(betafilename == NULL) */ 77 | /* { */ 78 | /* error("You requested predict but didn't supply a file containing coefficients, please do so (use -b)\n"); */ 79 | /* } else { */ 80 | /* checkFileForReading(betafilename); */ 81 | /* } */ 82 | /* } */ 83 | 84 | /* if(howManyK != 0) */ 85 | /* { */ 86 | /* printf("howManyK set on command line: %d\n", howManyK); */ 87 | /* } */ 88 | 89 | /* /\* Check for genotypes and/or covariates file */ 90 | /* If one or both are supplied check they can be opened for reading *\/ */ 91 | /* if(genofilename == NULL && covarfilename == NULL) */ 92 | /* { */ 93 | /* printf("ERROR: You didn't supply a genotypes file or a covariates file, please do so (use -g and/or -c)\n"); */ 94 | /* exit(EXIT_FAILURE); */ 95 | /* } else { */ 96 | /* checkFileForReading(genofilename); */ 97 | /* checkFileForReading(covarfilename); */ 98 | /* } */ 99 | 100 | /* /\* Check beta file *\/ */ 101 | /* if(betafilename == NULL && !predict_flag) */ 102 | /* { */ 103 | /* printf("WARNING: You did not supply a name for your beta file. Defaulting to beta.dat\n"); */ 104 | /* betafilename = strdup("beta.dat"); */ 105 | /* } */ 106 | 107 | /* /\* Check prediction file *\/ */ 108 | /* if(predictionfilename == NULL && predict_flag) */ 109 | /* { */ 110 | /* printf("WARNING: You did not supply a name for your prediction results file. Defaulting to prediction.dat\n"); */ 111 | /* predictionfilename = "prediction.dat"; */ 112 | /* } */ 113 | 114 | /* // Check for a valid model */ 115 | /* checkModel(model); */ 116 | 117 | /* // handle the intercept */ 118 | /* if(intercept_flag == -1) */ 119 | /* { */ 120 | /* printf("WARNING: No intercept specified. Use --intercept (the default) or --no-intercept\n"); */ 121 | /* intercept_flag = 1; */ 122 | /* } */ 123 | 124 | /* // handle the standardization flag for genotypes */ 125 | /* if(genofilename != NULL && standardize_flag == -1) */ 126 | /* { */ 127 | /* printf("WARNING: No standardization flag specified for genotypes. Use --standardize (the default) or --no-standardize\n"); */ 128 | /* standardize_flag = 1; */ 129 | /* } */ 130 | 131 | /* // handle the standardization flag for covariates */ 132 | /* if(covarfilename != NULL && standardize_c_flag == -1 && !predict_flag) */ 133 | /* { */ 134 | /* printf("WARNING: No standardization flag specified for covariates. Use --standardize-c (the default) or --no-standardize-c\n"); */ 135 | /* standardize_c_flag == 1; */ 136 | /* } */ 137 | 138 | /* Get NINDIV: */ 139 | /* Declare an integer variable to hold the number of individuals */ 140 | int NINDIV = 0; 141 | /* If we are not predicting */ 142 | if(!predict_flag) 143 | { 144 | /* File pointer to open the file containing phenotypes */ 145 | FILE * phenofile; 146 | /* Open the fine */ 147 | phenofile = fopen(phenofilename,"r"); 148 | /* Get the number of individuals */ 149 | NINDIV = getNROW(phenofile); 150 | /* Close the file */ 151 | fclose(phenofile); 152 | /* If we are fitting and not predicting, we need to check for invariant predictors */ 153 | checkForInvariantPredictors(genofilename, 154 | NINDIV); 155 | /* Otherwise, if we are predicting */ 156 | } else if (predict_flag) { 157 | /* File pointer to open the file containing genotypes */ 158 | FILE * genofile; 159 | /* Open the file */ 160 | genofile = fopen(genofilename, "r"); 161 | /* Get the number of rows in the file */ 162 | NINDIV = getNROW(genofile); 163 | /* Close th efile */ 164 | fclose(genofile); 165 | /* Subtract 1 for the header row*/ 166 | NINDIV = NINDIV - 1; // For the header row 167 | } 168 | 169 | /* Get number of SNPs, number of covariates, and names of each */ 170 | 171 | int NSNPS = 0; 172 | char ** SNPnames = NULL; 173 | 174 | int NCOVAR = 0; 175 | char ** COVARnames = NULL; 176 | 177 | if(genofilename != NULL) 178 | { 179 | if(verbose) 180 | { 181 | Rprintf("Getting SNP names..."); 182 | } 183 | SNPnames = getHeaderRow(genofilename, &NSNPS); 184 | if(verbose) 185 | { 186 | Rprintf("done\n"); 187 | } 188 | } 189 | 190 | 191 | /* if(covarfilename != NULL) */ 192 | /* { */ 193 | /* printf("Getting covariate names..."); */ 194 | /* COVARnames = getHeaderRow(covarfilename, &NCOVAR); */ 195 | /* printf("done\n"); */ 196 | /* } */ 197 | 198 | /* /\* Check convergence threshold is set - if it was not set on the command line, set it to PREC_EPS *\/ */ 199 | /* if(convergence_threshold == -1) */ 200 | /* { */ 201 | /* convergence_threshold = PREC_EPS; */ 202 | /* } else { */ 203 | /* printf("Using %f as threshold for convergence in coordinate descent\n", convergence_threshold); */ 204 | /* } */ 205 | 206 | /* total number of predictors */ 207 | int NPRED = intercept_flag + NCOVAR + NSNPS; 208 | 209 | /* Call the function */ 210 | /* If model is linear: */ 211 | if(strcmp(model, "logistic") == 0) 212 | { 213 | /* We can use PREC_EPS as the convergence threshold */ 214 | convergence_threshold = PREC_EPS; 215 | /* Call logisticMain */ 216 | logisticMain(genofilename, 217 | thinfilename, 218 | phenofilename, 219 | covarfilename, 220 | betafilename, 221 | lambdafilename, 222 | lambdacovarfilename, 223 | approxtestfilename, 224 | permtestfilename, 225 | lambda, 226 | lambda_c, 227 | seed, 228 | howManyK, 229 | individualK, 230 | intercept_flag, 231 | standardize_flag, 232 | standardize_c_flag, 233 | thinning_distance, 234 | NINDIV, 235 | NPRED, 236 | NCOVAR, 237 | NSNPS, 238 | SNPnames, 239 | COVARnames, 240 | predict_flag, 241 | convergence_threshold, 242 | verbose); 243 | /* Else if model is logistic */ 244 | } else if (strcmp(model, "linear") == 0) 245 | { 246 | /* We must set the convergence threshold to 0.000001 */ 247 | convergence_threshold = 0.000001; 248 | /* Call linearMain */ 249 | linearMain(genofilename, 250 | thinfilename, 251 | phenofilename, 252 | covarfilename, 253 | betafilename, 254 | lambdafilename, 255 | lambdacovarfilename, 256 | approxtestfilename, 257 | permtestfilename, 258 | lambda, 259 | lambda_c, 260 | seed, 261 | howManyK, 262 | individualK, 263 | intercept_flag, 264 | standardize_flag, 265 | standardize_c_flag, 266 | thinning_distance, 267 | NINDIV, 268 | NPRED, 269 | NCOVAR, 270 | NSNPS, 271 | SNPnames, 272 | COVARnames, 273 | predict_flag, 274 | convergence_threshold, 275 | verbose); 276 | } 277 | /* iterator for freeing SNP and COVAR names */ 278 | int i = 0; 279 | /* Free SNP names */ 280 | if(NSNPS > 0) 281 | { 282 | for(i = 0; i < NSNPS; i++) 283 | { 284 | free(SNPnames[i]); 285 | } 286 | free(SNPnames); 287 | } 288 | /* Free covariate names */ 289 | if(NCOVAR > 0) 290 | { 291 | for(i = 0; i < NCOVAR; i++) 292 | { 293 | free(COVARnames[i]); 294 | } 295 | free(COVARnames); 296 | } 297 | } 298 | 299 | #endif 300 | 301 | typedef int make_iso_compilers_happy; 302 | 303 | --------------------------------------------------------------------------------