├── src ├── tess3.cpp ├── helpers.cpp └── RcppExports.cpp ├── tests ├── testthat │ ├── test-tess3.R │ ├── test-sampler.R │ ├── test-hypothesis-testing.R │ ├── test-helpers.R │ ├── test-lfmm.R │ ├── test-dat.R │ └── test-ridgeLFMM.R └── testthat.R ├── data ├── example.data.rda └── skin.exposure.rda ├── docs ├── reference │ ├── lfmm_test-1.png │ ├── lfmm_test-2.png │ ├── effect_size-1.png │ ├── effect_size-2.png │ ├── effect_size-3.png │ ├── lfmm_lasso-1.png │ ├── lfmm_lasso-2.png │ ├── lfmm_ridge-1.png │ ├── lfmm_ridge-2.png │ ├── forward_test-1.png │ ├── forward_test-2.png │ ├── lfmm_ridge_CV-1.png │ ├── lfmm_ridge_CV-2.png │ ├── predict_lfmm-1.png │ ├── predict_lfmm-2.png │ ├── lfmm.html │ ├── Dat.html │ ├── lfmm_fit.html │ ├── left.out.kfold.html │ ├── lfmm_impute.html │ ├── lfmm_residual_error2.html │ ├── lfmm_CV.html │ ├── LfmmDat.html │ ├── hypothesis_testing_lm.html │ ├── compute_P.html │ ├── SimulatedLfmmDat.html │ ├── lfmm_fit_knowing_loadings.html │ ├── lfmm_fit_knowing_loadings.ridgeLFMM.html │ ├── compute_pvalue_from_tscore.html │ ├── compute_pvalue_from_zscore.html │ ├── example.data.html │ ├── skin.exposure.html │ └── index.html ├── articles │ ├── lfmm_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-18-1.png │ │ │ ├── unnamed-chunk-22-1.png │ │ │ ├── unnamed-chunk-23-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ └── unnamed-chunk-9-1.png │ └── index.html ├── link.svg ├── pkgdown.js ├── jquery.sticky-kit.min.js ├── pkgdown.css ├── authors.html └── index.html ├── man ├── Dat.Rd ├── lfmm_fit.Rd ├── LfmmDat.Rd ├── lfmm_CV.Rd ├── SimulatedLfmmDat.Rd ├── lfmm_impute.Rd ├── lfmm.Rd ├── left.out.kfold.Rd ├── compute_P.Rd ├── lfmm_residual_error2.Rd ├── lfmm_fit_knowing_loadings.ridgeLFMM.Rd ├── hypothesis_testing_lm.Rd ├── compute_pvalue_from_zscore.Rd ├── lfmm_fit_knowing_loadings.Rd ├── compute_pvalue_from_tscore.Rd ├── example.data.Rd ├── skin.exposure.Rd ├── lfmm_sampler.Rd ├── lfmm_ridge_CV.Rd ├── effect_size.Rd ├── glm_test.Rd ├── predict_lfmm.Rd ├── lfmm_ridge.Rd ├── lfmm_lasso.Rd ├── forward_test.Rd └── lfmm_test.Rd ├── _pkgdown.yml ├── .gitignore ├── hooks └── post-receive.sh ├── R ├── example_data.R ├── RcppExports.R ├── Dat.R ├── skin_exposure.R ├── hypothesis-testing.R ├── io.R ├── matrix-factorizationR.R ├── cross-validation.R ├── helpers.R ├── sampler.R ├── LfmmDat.R ├── lassoLFMM.R └── ridgeLFMM.R ├── Makefile ├── DESCRIPTION ├── NAMESPACE ├── README.md └── inst └── doc └── lfmm.R /src/tess3.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/testthat/test-tess3.R: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(lfmm) 3 | 4 | test_check("lfmm") 5 | -------------------------------------------------------------------------------- /data/example.data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/data/example.data.rda -------------------------------------------------------------------------------- /data/skin.exposure.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/data/skin.exposure.rda -------------------------------------------------------------------------------- /docs/reference/lfmm_test-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/lfmm_test-1.png -------------------------------------------------------------------------------- /docs/reference/lfmm_test-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/lfmm_test-2.png -------------------------------------------------------------------------------- /docs/reference/effect_size-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/effect_size-1.png -------------------------------------------------------------------------------- /docs/reference/effect_size-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/effect_size-2.png -------------------------------------------------------------------------------- /docs/reference/effect_size-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/effect_size-3.png -------------------------------------------------------------------------------- /docs/reference/lfmm_lasso-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/lfmm_lasso-1.png -------------------------------------------------------------------------------- /docs/reference/lfmm_lasso-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/lfmm_lasso-2.png -------------------------------------------------------------------------------- /docs/reference/lfmm_ridge-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/lfmm_ridge-1.png -------------------------------------------------------------------------------- /docs/reference/lfmm_ridge-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/lfmm_ridge-2.png -------------------------------------------------------------------------------- /docs/reference/forward_test-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/forward_test-1.png -------------------------------------------------------------------------------- /docs/reference/forward_test-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/forward_test-2.png -------------------------------------------------------------------------------- /docs/reference/lfmm_ridge_CV-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/lfmm_ridge_CV-1.png -------------------------------------------------------------------------------- /docs/reference/lfmm_ridge_CV-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/lfmm_ridge_CV-2.png -------------------------------------------------------------------------------- /docs/reference/predict_lfmm-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/predict_lfmm-1.png -------------------------------------------------------------------------------- /docs/reference/predict_lfmm-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/reference/predict_lfmm-2.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/lfmm_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcm-uga/lfmm/HEAD/docs/articles/lfmm_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /man/Dat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Dat.R 3 | \name{Dat} 4 | \alias{Dat} 5 | \title{Class which store data} 6 | \usage{ 7 | Dat(Y) 8 | } 9 | \description{ 10 | Class which store data 11 | } 12 | -------------------------------------------------------------------------------- /man/lfmm_fit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/matrix-factorizationR.R 3 | \name{lfmm_fit} 4 | \alias{lfmm_fit} 5 | \title{Fit the model} 6 | \usage{ 7 | lfmm_fit(m, dat, ...) 8 | } 9 | \description{ 10 | Fit the model 11 | } 12 | -------------------------------------------------------------------------------- /man/LfmmDat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/LfmmDat.R 3 | \name{LfmmDat} 4 | \alias{LfmmDat} 5 | \title{Class which store data} 6 | \usage{ 7 | LfmmDat(Y, X, missing = TRUE) 8 | } 9 | \description{ 10 | Class which store data 11 | } 12 | -------------------------------------------------------------------------------- /man/lfmm_CV.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/matrix-factorizationR.R 3 | \name{lfmm_CV} 4 | \alias{lfmm_CV} 5 | \title{Cross validation} 6 | \usage{ 7 | lfmm_CV(m, dat, n.fold.row, n.fold.col, ...) 8 | } 9 | \description{ 10 | Cross validation 11 | } 12 | -------------------------------------------------------------------------------- /man/SimulatedLfmmDat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/LfmmDat.R 3 | \name{SimulatedLfmmDat} 4 | \alias{SimulatedLfmmDat} 5 | \title{Class which store data} 6 | \usage{ 7 | SimulatedLfmmDat(Y, X, outlier, U, V, B) 8 | } 9 | \description{ 10 | Class which store data 11 | } 12 | -------------------------------------------------------------------------------- /man/lfmm_impute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/matrix-factorizationR.R 3 | \name{lfmm_impute} 4 | \alias{lfmm_impute} 5 | \title{Impute Y with a fitted model.} 6 | \usage{ 7 | lfmm_impute(m, dat, ...) 8 | } 9 | \description{ 10 | Impute Y with a fitted model. 11 | } 12 | -------------------------------------------------------------------------------- /man/lfmm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/matrix-factorizationR.R 3 | \docType{package} 4 | \name{lfmm} 5 | \alias{lfmm} 6 | \alias{lfmm-package} 7 | \title{R package with matrix factorization algorithms} 8 | \description{ 9 | R package with matrix factorization algorithms 10 | } 11 | -------------------------------------------------------------------------------- /man/left.out.kfold.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cross-validation.R 3 | \name{left.out.kfold} 4 | \alias{left.out.kfold} 5 | \title{return a list of train/test indices} 6 | \usage{ 7 | left.out.kfold(kfold, J) 8 | } 9 | \description{ 10 | return a list of train/test indices 11 | } 12 | -------------------------------------------------------------------------------- /man/compute_P.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{compute_P} 4 | \alias{compute_P} 5 | \title{Compute the matrix used to reduce correlation with X} 6 | \usage{ 7 | compute_P(X, lambda) 8 | } 9 | \description{ 10 | see mon cahier 6/07/2017 11 | } 12 | \author{ 13 | cayek 14 | } 15 | -------------------------------------------------------------------------------- /man/lfmm_residual_error2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/matrix-factorizationR.R 3 | \name{lfmm_residual_error2} 4 | \alias{lfmm_residual_error2} 5 | \title{Compute the residual error} 6 | \usage{ 7 | lfmm_residual_error2(m, dat, ...) 8 | } 9 | \description{ 10 | Compute the residual error 11 | } 12 | -------------------------------------------------------------------------------- /man/lfmm_fit_knowing_loadings.ridgeLFMM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ridgeLFMM.R 3 | \name{lfmm_fit_knowing_loadings.ridgeLFMM} 4 | \alias{lfmm_fit_knowing_loadings.ridgeLFMM} 5 | \title{Fit assuming V and B} 6 | \usage{ 7 | \method{lfmm_fit_knowing_loadings}{ridgeLFMM}(m, dat) 8 | } 9 | \description{ 10 | Fit assuming V and B 11 | } 12 | -------------------------------------------------------------------------------- /man/hypothesis_testing_lm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypothesis-testing.R 3 | \name{hypothesis_testing_lm} 4 | \alias{hypothesis_testing_lm} 5 | \title{Hypothesis testing with lm} 6 | \usage{ 7 | hypothesis_testing_lm(dat, X, lambda) 8 | } 9 | \description{ 10 | linear model: 11 | Y = X B^T + E 12 | } 13 | \author{ 14 | cayek, francoio 15 | } 16 | -------------------------------------------------------------------------------- /man/compute_pvalue_from_zscore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{compute_pvalue_from_zscore} 4 | \alias{compute_pvalue_from_zscore} 5 | \title{score are assume to follow normal distibution} 6 | \usage{ 7 | compute_pvalue_from_zscore(score, mean = 0, sd = 1) 8 | } 9 | \description{ 10 | score are assume to follow normal distibution 11 | } 12 | -------------------------------------------------------------------------------- /man/lfmm_fit_knowing_loadings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/matrix-factorizationR.R 3 | \name{lfmm_fit_knowing_loadings} 4 | \alias{lfmm_fit_knowing_loadings} 5 | \title{Fit the model when latent factor loadings are known} 6 | \usage{ 7 | lfmm_fit_knowing_loadings(m, dat, ...) 8 | } 9 | \description{ 10 | Fit the model when latent factor loadings are known 11 | } 12 | -------------------------------------------------------------------------------- /man/compute_pvalue_from_tscore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{compute_pvalue_from_tscore} 4 | \alias{compute_pvalue_from_tscore} 5 | \title{score are assume to follow student distibution with df degre of freedom} 6 | \usage{ 7 | compute_pvalue_from_tscore(score, df) 8 | } 9 | \description{ 10 | score are assume to follow student distibution with df degre of freedom 11 | } 12 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | templates: 2 | params: 3 | bootswatch: readable 4 | 5 | navbar: 6 | title: "lfmm" 7 | left: 8 | - text: "Manual" 9 | href: reference/index.html 10 | - text: "Tutorial" 11 | href: articles/lfmm 12 | right: 13 | - icon: fa-github 14 | href: https://github.com/bcm-uga/lfmm.html 15 | 16 | reference: 17 | - title: Main functions 18 | contents: 19 | - lfmm_ridge 20 | - lfmm_ridge_CV 21 | - lfmm_lasso 22 | - lfmm_test 23 | - effect_size 24 | - predict_lfmm 25 | - forward_test 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # R 2 | .Rhistory 3 | .Rbuildignore 4 | .RData 5 | .Rproj.user 6 | lfmm.Rproj 7 | 8 | # Prerequisites 9 | *.d 10 | 11 | # Compiled Object files 12 | *.slo 13 | *.lo 14 | *.o 15 | *.obj 16 | 17 | # Precompiled Headers 18 | *.gch 19 | *.pch 20 | 21 | # Compiled Dynamic libraries 22 | *.so 23 | *.dylib 24 | *.dll 25 | 26 | # Fortran module files 27 | *.mod 28 | *.smod 29 | 30 | # Compiled Static libraries 31 | *.lai 32 | *.la 33 | *.a 34 | *.lib 35 | 36 | # Executables 37 | *.exe 38 | *.out 39 | *.app 40 | 41 | # tag 42 | TAGS 43 | 44 | Rplots.pdf 45 | 46 | 47 | #trucs zarb 48 | .DS_Store 49 | -------------------------------------------------------------------------------- /hooks/post-receive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | dest=/home/cayek/Projects/Thesis/lfmm 3 | 4 | echo "post-receive hook" 5 | 6 | while read oldrev newrev ref 7 | do 8 | if [[ $ref =~ .*/master$ ]]; 9 | then 10 | echo "Master ref received. Deploying master branch to production..." 11 | git --work-tree=$dest --git-dir=/home/cayek/Gits/2017/lfmm.git checkout -f 12 | cd $dest 13 | make lfmm_install 14 | # source activate MaThese 15 | # make lfmm_install 16 | else 17 | echo "Ref $ref successfully received. Doing nothing: only the master branch may be deployed on this server." 18 | fi 19 | done 20 | -------------------------------------------------------------------------------- /tests/testthat/test-sampler.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | context("Sampler") 3 | 4 | 5 | test_that("lfmm_sampler", { 6 | 7 | dat <- lfmm_sampler(n = 100, p = 1000, K = 3, 8 | outlier.prop = 0.1, 9 | cs = c(0.8), 10 | sigma = 0.2, 11 | B.sd = 1.0, 12 | U.sd = 1.0, 13 | V.sd = 1.0) 14 | 15 | expect_equal(dim(dat$Y), c(100, 1000)) 16 | expect_equal(dim(dat$X), c(100, 1)) 17 | expect_equal(dim(dat$B), c(1000, 1)) 18 | expect_equal(dim(dat$U), c(100, 3)) 19 | expect_equal(dim(dat$V), c(1000, 3)) 20 | 21 | }) 22 | 23 | -------------------------------------------------------------------------------- /R/example_data.R: -------------------------------------------------------------------------------- 1 | #' Genetic and phenotypic data for Arabidopsis thaliana 2 | #' 3 | #' A dataset containing SNP frequency and simulated phenotypic data for 170 plant accessions. 4 | #' The variables are as follows: 5 | #' 6 | #' \itemize{ 7 | #' \item genotype: binary (0 or 1) SNP frequency for 170 individuals (26943 SNPs). 8 | #' \item phenotype: simulated phenotypic data for 170 individuals. 9 | #' \item causal.set: set of indices for causal SNPs. 10 | #' \item chrpos: genetic map including chromosome position of each SNP. 11 | #' } 12 | #' 13 | #' @docType data 14 | #' @keywords datasets 15 | #' @name example.data 16 | #' @usage data(example.data) 17 | #' @format A list with 4 arguments: genotype, phenotype, causal.set, chrpos 18 | NULL 19 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: lfmm_testonR_install lfmm_test lfmm_document lfmm_check 2 | 3 | ## krak 4 | krakenator_deploy: 5 | git status 6 | ## git commit --allow-empty -am "deploy on krakenator" 7 | git push krakenator master 8 | 9 | krakenator_push_hook: 10 | scp ./hooks/post-receive.sh cayek@krakenator:/home/cayek/Gits/2017/lfmm.git/hooks/post-receive 11 | 12 | ## Rpackage 13 | lfmm_install: 14 | R -e 'devtools::install(pkg = ".")' 15 | 16 | lfmm_test: 17 | R -e 'devtools::test(pkg = ".")' 18 | 19 | lfmm_document: 20 | R -e 'devtools::document(pkg = ".")' 21 | 22 | lfmm_check: 23 | R -e 'devtools::check(pkg = ".")' 24 | 25 | lfmm_build_site: 26 | R -e 'pkgdown::build_site()' 27 | 28 | lfmm_clean: 29 | rm -f R/RcppExports.R src/RcppExports.cpp src/*.o src/*.so 30 | 31 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | compute_eigen_svd <- function(X) { 5 | .Call('_lfmm_compute_eigen_svd', PACKAGE = 'lfmm', X) 6 | } 7 | 8 | impute_lfmm_cpp <- function(Y, X, U, V, B, missingId) { 9 | invisible(.Call('_lfmm_impute_lfmm_cpp', PACKAGE = 'lfmm', Y, X, U, V, B, missingId)) 10 | } 11 | 12 | err2_lfmm_cpp <- function(Y, X, U, V, B) { 13 | .Call('_lfmm_err2_lfmm_cpp', PACKAGE = 'lfmm', Y, X, U, V, B) 14 | } 15 | 16 | err2s_lfmm_cpp <- function(Y, X, U, V, B) { 17 | .Call('_lfmm_err2s_lfmm_cpp', PACKAGE = 'lfmm', Y, X, U, V, B) 18 | } 19 | 20 | sum2_lm_cpp <- function(Y, X, B) { 21 | .Call('_lfmm_sum2_lm_cpp', PACKAGE = 'lfmm', Y, X, B) 22 | } 23 | 24 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /man/example.data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/example_data.R 3 | \docType{data} 4 | \name{example.data} 5 | \alias{example.data} 6 | \title{Genetic and phenotypic data for Arabidopsis thaliana} 7 | \format{A list with 4 arguments: genotype, phenotype, causal.set, chrpos} 8 | \usage{ 9 | data(example.data) 10 | } 11 | \description{ 12 | A dataset containing SNP frequency and simulated phenotypic data for 170 plant accessions. 13 | The variables are as follows: 14 | } 15 | \details{ 16 | \itemize{ 17 | \item genotype: binary (0 or 1) SNP frequency for 170 individuals (26943 SNPs). 18 | \item phenotype: simulated phenotypic data for 170 individuals. 19 | \item causal.set: set of indices for causal SNPs. 20 | \item chrpos: genetic map including chromosome position of each SNP. 21 | } 22 | } 23 | \keyword{datasets} 24 | -------------------------------------------------------------------------------- /R/Dat.R: -------------------------------------------------------------------------------- 1 | Dat.builder <- setRefClass("Dat", fields = c("Y", "meta", "missing.ind"), 2 | methods = list( 3 | getY = function() { 4 | return(.self$Y) 5 | }, 6 | productY = function(x) { 7 | .self$Y %*% x 8 | }, 9 | productYt = function(x) { 10 | crossprod(.self$Y, x) 11 | } 12 | ) 13 | ) 14 | 15 | #' Class which store data 16 | #' 17 | #' 18 | #' @export 19 | Dat <- function(Y) { 20 | dat <- Dat.builder(Y = read_input(Y), 21 | meta = list(), 22 | missing.ind = NULL) 23 | dat$missing.ind <- which(is.na(dat$Y)) 24 | dat 25 | } 26 | 27 | -------------------------------------------------------------------------------- /R/skin_exposure.R: -------------------------------------------------------------------------------- 1 | #' Simulated (and real) methylation levels for sun exposed patient patients 2 | #' 3 | #' A data set containing normalized beta values, and sun exposure and simulated 4 | #' phenotypic data for 78 tissue samples. 5 | #' 6 | #' The variables are: 7 | #' 8 | #' \itemize{ 9 | #' \item beta.value: 1496 filtered normalized beta values (methyation probabilities) 10 | #' for 78 tissue samples. 11 | #' \item exposure: Sun exposure levels for 78 tissue samples. 12 | #' \item phenotype: Simulated binary phenotypic data for 78 tissue samples. 13 | #' \item age: age of patients. 14 | #' \item gender: sex of patients. 15 | #' \item tissue: category for tissue samples. 16 | #' } 17 | #' 18 | #' @docType data 19 | #' @keywords datasets 20 | #' @details Reference: to be filled 21 | #' @name skin.exposure 22 | #' @usage data("skin.exposure") 23 | #' @format A list with 6 arguments: beta.value, phenotype, causal.set, chrpos 24 | NULL 25 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: lfmm 2 | Type: Package 3 | Title: Latent Factor Mixed Models 4 | Version: 1.0 5 | Date: 2017-06-14 6 | Authors@R: c( 7 | person("Kevin", "Caye", 8 | email = "kevin.caye@gmail.com", 9 | role = c("aut", "cre")), 10 | person("Olivier", "François", 11 | email = "olivier.francois@univ-grenoble-alpes.fr", 12 | role = "aut")) 13 | Author: Kevin Caye 14 | Maintainer: Kevin Caye 15 | Description: Implements statistical methods for adjusting confounding factors 16 | in association studies. 17 | License: GPL-3 + file LICENSE 18 | LazyData: TRUE 19 | Encoding: UTF-8 20 | Depends: 21 | R (>= 3.2.3) 22 | Suggests: 23 | testthat 24 | Imports: 25 | Rcpp (>= 0.12.3), 26 | RcppEigen (>= 0.3.2.8.1) 27 | LinkingTo: Rcpp, RcppEigen 28 | VignetteBuilder: knitr 29 | RoxygenNote: 6.0.1 30 | Roxygen: list(markdown = TRUE) 31 | URL: 32 | BugReports: https://github.com/bcm-uga/lfmm/issues 33 | -------------------------------------------------------------------------------- /R/hypothesis-testing.R: -------------------------------------------------------------------------------- 1 | ##' Hypothesis testing with lm 2 | ##' 3 | ##' linear model: 4 | ##' Y = X B^T + E 5 | ##' 6 | ##' @author cayek, francoio 7 | ##' @export 8 | hypothesis_testing_lm <- function(dat, X, lambda) { 9 | 10 | d <- ncol(X) 11 | p <- ncol(dat$Y) 12 | effective.degree.freedom <- nrow(dat$Y) - ncol(X) 13 | 14 | res <- list() 15 | ## B 16 | Af <- function(x) { 17 | t(dat$productYt(x)) 18 | } 19 | res$B <- compute_B_ridge(Af, X, lambda) 20 | 21 | ## compute Var(E) 22 | res$epsilon.sigma2 <- dat$sigma2_lm(X, res$B, effective.degree.freedom) 23 | 24 | ## compute Var(B) 25 | aux <- solve(crossprod(X) + diag(lambda, ncol(X), ncol(X))) 26 | res$B.sigma2 <- t(matrix(diag(aux), d, 1) %*% matrix(res$epsilon.sigma2, 1, p)) 27 | 28 | ## compute zscore 29 | res$score <- res$B / sqrt(res$B.sigma2) 30 | 31 | ## compute pvalue 32 | res$pvalue <- compute_pvalue_from_tscore(res$score, df = effective.degree.freedom) 33 | 34 | res 35 | } 36 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(lfmm_CV,lassoLFMM) 4 | S3method(lfmm_CV,ridgeLFMM) 5 | S3method(lfmm_fit,lassoLFMM) 6 | S3method(lfmm_fit,ridgeLFMM) 7 | S3method(lfmm_fit_knowing_loadings,ridgeLFMM) 8 | export(Dat) 9 | export(LfmmDat) 10 | export(SimulatedLfmmDat) 11 | export(compute_P) 12 | export(effect_size) 13 | export(forward_test) 14 | export(glm_test) 15 | export(hypothesis_testing_lm) 16 | export(impute_median) 17 | export(lassoLFMM) 18 | export(left.out.kfold) 19 | export(lfmm_CV) 20 | export(lfmm_fit) 21 | export(lfmm_fit_knowing_loadings) 22 | export(lfmm_impute) 23 | export(lfmm_lasso) 24 | export(lfmm_residual_error2) 25 | export(lfmm_ridge) 26 | export(lfmm_ridge_CV) 27 | export(lfmm_sampler) 28 | export(lfmm_test) 29 | export(predict_lfmm) 30 | export(read_input) 31 | export(ridgeLFMM) 32 | import(RcppEigen) 33 | importFrom(Rcpp,evalCpp) 34 | importFrom(foreach,"%:%") 35 | importFrom(foreach,"%do%") 36 | importFrom(foreach,"%dopar%") 37 | importFrom(foreach,foreach) 38 | useDynLib(lfmm) 39 | -------------------------------------------------------------------------------- /man/skin.exposure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/skin_exposure.R 3 | \docType{data} 4 | \name{skin.exposure} 5 | \alias{skin.exposure} 6 | \title{Simulated (and real) methylation levels for sun exposed patient patients} 7 | \format{A list with 6 arguments: beta.value, phenotype, causal.set, chrpos} 8 | \usage{ 9 | data("skin.exposure") 10 | } 11 | \description{ 12 | A data set containing normalized beta values, and sun exposure and simulated 13 | phenotypic data for 78 tissue samples. 14 | } 15 | \details{ 16 | The variables are: 17 | 18 | \itemize{ 19 | \item beta.value: 1496 filtered normalized beta values (methyation probabilities) 20 | for 78 tissue samples. 21 | \item exposure: Sun exposure levels for 78 tissue samples. 22 | \item phenotype: Simulated binary phenotypic data for 78 tissue samples. 23 | \item age: age of patients. 24 | \item gender: sex of patients. 25 | \item tissue: category for tissue samples. 26 | } 27 | 28 | Reference: to be filled 29 | } 30 | \keyword{datasets} 31 | -------------------------------------------------------------------------------- /R/io.R: -------------------------------------------------------------------------------- 1 | ##' @author cayek 2 | ##' @export 3 | read_input <- function(X) { 4 | if (is.matrix(X)) { 5 | return(X) 6 | } 7 | 8 | if (is.data.frame(X)) { 9 | return(X) 10 | } 11 | 12 | if (is.numeric(X)) { 13 | return(X) 14 | } 15 | 16 | if (is.logical(X)) { 17 | return(X) 18 | } 19 | 20 | if (is.integer(X)) { 21 | return(X) 22 | } 23 | 24 | 25 | if (is.character(X)) { 26 | if (tools::file_ext(X) == "lfmm") { 27 | return(as.matrix(readr::read_delim(X, delim = " ", 28 | col_names = FALSE, 29 | col_types = readr::cols(.default = readr::col_integer())) 30 | ) 31 | ) 32 | } else if (tools::file_ext(X) == "RData") { 33 | stop("TODO") 34 | } else if (tools::file_ext(X) == "rds") { 35 | return(readRDS(X)) 36 | } else { 37 | stop("TODO") 38 | } 39 | } 40 | 41 | if (is.null(X)) { 42 | return(NULL) 43 | } 44 | 45 | stop("X not handle") 46 | } 47 | -------------------------------------------------------------------------------- /R/matrix-factorizationR.R: -------------------------------------------------------------------------------- 1 | #' R package with matrix factorization algorithms 2 | #' 3 | #' 4 | #' @docType package 5 | #' 6 | #' @name lfmm 7 | #' @importFrom Rcpp evalCpp 8 | #' @importFrom foreach foreach %:% %do% %dopar% 9 | #' @useDynLib lfmm 10 | #' @import RcppEigen 11 | NULL 12 | 13 | #' Fit the model 14 | #' 15 | #' @export 16 | lfmm_fit <- function(m, dat, ...) { 17 | UseMethod("lfmm_fit") 18 | } 19 | 20 | #' Fit the model when latent factor loadings are known 21 | #' 22 | #' @export 23 | lfmm_fit_knowing_loadings <- function(m, dat, ...) { 24 | UseMethod("lfmm_fit_knowing_loadings") 25 | } 26 | 27 | #' Cross validation 28 | #' 29 | #' @export 30 | lfmm_CV <- function(m, dat, n.fold.row, n.fold.col, ...) { 31 | UseMethod("lfmm_CV") 32 | } 33 | 34 | #' Impute Y with a fitted model. 35 | #' 36 | #' @export 37 | lfmm_impute <- function(m, dat, ...) { 38 | UseMethod("lfmm_impute") 39 | } 40 | 41 | #' Compute the residual error 42 | #' 43 | #' @export 44 | lfmm_residual_error2 <- function(m, dat, ...) { 45 | UseMethod("lfmm_residual_error2") 46 | } 47 | -------------------------------------------------------------------------------- /tests/testthat/test-hypothesis-testing.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | context("hypothesis testing") 3 | 4 | 5 | test_that("hypothesis_testing_lm", { 6 | 7 | dat <- lfmm_sampler(n = 100, p = 1000, K = 1, 8 | outlier.prop = 0.1, 9 | cs = c(0.8), 10 | sigma = 0.2, 11 | B.sd = 1.0, 12 | U.sd = 1.0, 13 | V.sd = 1.0) 14 | 15 | X <- cbind(dat$X, dat$U, rnorm(100)) 16 | hp <- hypothesis_testing_lm(dat, X = X, lambda = 0.0) 17 | 18 | lm.res <- lm(dat$Y ~ X - 1) 19 | B.lm <- lm.res$coefficients 20 | expect_lt(mean(abs(t(B.lm) - hp$B)), 1e-15) 21 | 22 | s <- summary(lm.res) 23 | 24 | ## E 25 | E.lm <- lm.res$residuals 26 | 27 | 28 | ## score 29 | score <- sapply(seq_along(s), function(i) s[[i]]$coefficients[,3]) 30 | dim(score) 31 | expect_lt(mean(abs(t(score) - hp$score)), 1e-10) 32 | 33 | ## pvalue 34 | pvalue <- sapply(seq_along(s), function(i) s[[i]]$coefficients[,4]) 35 | dim(pvalue) 36 | expect_lt(mean(abs(t(pvalue) - hp$pvalue)), 1e-14) 37 | hist(pvalue[3,]) ## ok 38 | hist(hp$pvalue[,3]) ## ok 39 | 40 | }) 41 | 42 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | $("#sidebar").stick_in_parent({offset_top: 40}); 3 | $('body').scrollspy({ 4 | target: '#sidebar', 5 | offset: 60 6 | }); 7 | 8 | var cur_path = paths(location.pathname); 9 | $("#navbar ul li a").each(function(index, value) { 10 | if (value.text == "Home") 11 | return; 12 | if (value.getAttribute("href") === "#") 13 | return; 14 | 15 | var path = paths(value.pathname); 16 | if (is_prefix(cur_path, path)) { 17 | // Add class to parent
  • , and enclosing
  • if in dropdown 18 | var menu_anchor = $(value); 19 | menu_anchor.parent().addClass("active"); 20 | menu_anchor.closest("li.dropdown").addClass("active"); 21 | } 22 | }); 23 | }); 24 | 25 | function paths(pathname) { 26 | var pieces = pathname.split("/"); 27 | pieces.shift(); // always starts with / 28 | 29 | var end = pieces[pieces.length - 1]; 30 | if (end === "index.html" || end === "") 31 | pieces.pop(); 32 | return(pieces); 33 | } 34 | 35 | function is_prefix(needle, haystack) { 36 | if (needle.length > haystack.lengh) 37 | return(false); 38 | 39 | for (var i = 0; i < haystack.length; i++) { 40 | if (needle[i] != haystack[i]) 41 | return(false); 42 | } 43 | 44 | return(true); 45 | } 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## latent factor mixed models: lfmm 3 | 4 | Genome and epigenome-wide association studies are plagued with 5 | the problems of confounding and causality. The R package **lfmm** implements new 6 | algorithms for parameter estimation in latent factor mixed models (LFMM). The algorithms are designed for the correction of unobserved confounders. The new methods are computationally efficient, and provide statistically optimal corrections resulting in improved power and control for false discoveries. The package **lfmm** provides two main functions for estimating latent confounders (or factors): `lfmm_ridge` and `lfmm_lasso`. Those functions are based on optimal solutions of regularized least-squares problems. A short tutorial provides brief examples on how the R packages **lfmm** can be used for fitting latent factor mixed models and evaluating association between a response matrix (SNP genotype or methylation levels) and a variable of interest (phenotype or exposure levels) in genome-wide (GW), genome-environment (GE), epigenome-wide (EW) association studies. Corresponding software is available at the following url . 7 | ## Installation 8 | 9 | Installing the latest version from github requires [devtools](https://github.com/hadley/devtools): 10 | ```R 11 | # install.packages("devtools") 12 | devtools::install_github("bcm-uga/lfmm") 13 | ``` 14 | 15 | -------------------------------------------------------------------------------- /man/lfmm_sampler.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sampler.R 3 | \name{lfmm_sampler} 4 | \alias{lfmm_sampler} 5 | \title{LFMM generative data sampler} 6 | \usage{ 7 | lfmm_sampler(n, p, K, outlier.prop, cs, sigma = 0.2, B.sd = 1, B.mean = 0, 8 | U.sd = 1, V.sd = 1) 9 | } 10 | \arguments{ 11 | \item{n}{number of observations.} 12 | 13 | \item{p}{number of response variables.} 14 | 15 | \item{K}{number of latent variables (factors).} 16 | 17 | \item{outlier.prop}{proportion of outlier.} 18 | 19 | \item{cs}{correlation with between X and U.} 20 | 21 | \item{sigma}{standard deviation of residual errors.} 22 | 23 | \item{B.sd}{standard deviation for the effect size (B).} 24 | 25 | \item{B.mean}{mean of B.} 26 | 27 | \item{U.sd}{standard deviations for K factors.} 28 | 29 | \item{V.sd}{standard deviations for loadings.} 30 | } 31 | \value{ 32 | A list with simulated data. 33 | } 34 | \description{ 35 | Simulate data from the latent factor model. 36 | } 37 | \details{ 38 | \code{lfmm_sample()} sample a response matrix Y and a primary variable X such that 39 | 40 | Y = U t(V) + X t(B) + Epsilon. 41 | 42 | U,V, B and Epsilon are simulated according to normal multivariate distributions. 43 | Moreover U and X are such that \code{cor(U[,i], X) = cs[i]}. 44 | } 45 | \examples{ 46 | 47 | dat <- lfmm_sampler(n = 100, 48 | p = 1000, 49 | K = 3, 50 | outlier.prop = 0.1, 51 | cs = c(0.8), 52 | sigma = 0.2, 53 | B.sd = 1.0, 54 | B.mean = 0.0, 55 | U.sd = 1.0, 56 | V.sd = 1.0) 57 | } 58 | \author{ 59 | kevin caye, olivier francois 60 | } 61 | -------------------------------------------------------------------------------- /man/lfmm_ridge_CV.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lfmm.R 3 | \name{lfmm_ridge_CV} 4 | \alias{lfmm_ridge_CV} 5 | \title{Cross validation of LFMM estimates with ridge penalty} 6 | \usage{ 7 | lfmm_ridge_CV(Y, X, n.fold.row, n.fold.col, lambdas, Ks) 8 | } 9 | \arguments{ 10 | \item{Y}{a response variable matrix with n rows and p columns. 11 | Each column corresponds to a distinct response variable (e.g., SNP genotype, 12 | gene expression level, beta-normalized methylation profile, etc). 13 | Response variables must be encoded as numeric.} 14 | 15 | \item{X}{an explanatory variable matrix with n rows and d columns. 16 | Each column corresponds to a distinct explanatory variable (eg. phenotype). 17 | Explanatory variables must be encoded as numeric.} 18 | 19 | \item{n.fold.row}{number of cross-validation folds along rows.} 20 | 21 | \item{lambdas}{a list of numeric values for the regularization parameter.} 22 | 23 | \item{Ks}{a list of integer for the number of latent factors in the regression model.} 24 | 25 | \item{p.fold.col}{number of cross-validation folds along columns.} 26 | } 27 | \value{ 28 | a dataframe containing prediction errors for all values of lambda and K 29 | } 30 | \description{ 31 | This function splits the data set into a train set and a test set, and returns 32 | a prediction error. The function \code{\link{lfmm_ridge}} is run with the 33 | train set and the prediction error is evaluated from the test set. 34 | } 35 | \details{ 36 | The response variable matrix Y and the explanatory variables X are centered. 37 | } 38 | \examples{ 39 | library(ggplot2) 40 | library(lfmm) 41 | 42 | ## sample data 43 | K <- 3 44 | dat <- lfmm_sampler(n = 100, p = 1000, K = K, 45 | outlier.prop = 0.1, 46 | cs = c(0.8), 47 | sigma = 0.2, 48 | B.sd = 1.0, 49 | U.sd = 1.0, 50 | V.sd = 1.0) 51 | 52 | ## run cross validation 53 | errs <- lfmm_ridge_CV(Y = dat$Y, 54 | X = dat$X, 55 | n.fold.row = 5, 56 | n.fold.col = 5, 57 | lambdas = c(1e-10, 1, 1e20), 58 | Ks = c(1,2,3,4,5,6)) 59 | 60 | ## plot error 61 | ggplot(errs, aes(y = err, x = as.factor(K))) + 62 | geom_boxplot() + 63 | facet_grid(lambda ~ ., scale = "free") 64 | 65 | ggplot(errs, aes(y = err, x = as.factor(lambda))) + 66 | geom_boxplot() + 67 | facet_grid(K ~ ., scales = "free") 68 | 69 | } 70 | \seealso{ 71 | \code{\link{lfmm_ridge}} 72 | } 73 | \author{ 74 | cayek, francoio 75 | } 76 | -------------------------------------------------------------------------------- /man/effect_size.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lfmm.R 3 | \name{effect_size} 4 | \alias{effect_size} 5 | \title{Direct effect sizes estimated from latent factor models} 6 | \usage{ 7 | effect_size(Y, X, lfmm.object) 8 | } 9 | \arguments{ 10 | \item{Y}{a response variable matrix with n rows and p columns. 11 | Each column is a response variable (numeric).} 12 | 13 | \item{X}{an explanatory variable with n rows and d = 1 column (numeric).} 14 | 15 | \item{lfmm.object}{an object of class \code{lfmm} returned by the \link{lfmm_lasso} 16 | or \link{lfmm_ridge} function.} 17 | } 18 | \value{ 19 | a vector of length p containing all effect sizes for the regression 20 | of X on the matrix Y 21 | } 22 | \description{ 23 | This function returns 'direct' effect sizes for the regression of X (of dimension 1) on the matrix Y, 24 | as usually computed in genome-wide association studies. 25 | } 26 | \details{ 27 | The response variable matrix Y and the explanatory variable are centered. 28 | } 29 | \examples{ 30 | library(lfmm) 31 | 32 | ## Simulation of 1000 genotypes for 100 individuals (y) 33 | u <- matrix(rnorm(300, sd = 1), nrow = 100, ncol = 2) 34 | v <- matrix(rnorm(3000, sd = 2), nrow = 2, ncol = 1000) 35 | y <- matrix(rbinom(100000, size = 2, 36 | prob = 1/(1 + exp(-0.3*(u\%*\%v 37 | + rnorm(100000, sd = 2))))), 38 | nrow = 100, 39 | ncol = 1000) 40 | 41 | #PCA of genotypes, 3 main axes of variation (K = 2) 42 | plot(prcomp(y)) 43 | 44 | ## Simulation of 1000 phenotypes (x) 45 | ## Only the last 10 genotypes have significant effect sizes (b) 46 | b <- matrix(c(rep(0, 990), rep(6000, 10))) 47 | x <- y\%*\%b + rnorm(100, sd = 100) 48 | 49 | ## Compute effect sizes using lfmm_ridge 50 | ## Note that centering is important (scale = F). 51 | mod.lfmm <- lfmm_ridge(Y = y, 52 | X = x, 53 | K = 2) 54 | 55 | ## Compute direct effect sizes using lfmm_ridge estimates 56 | b.estimates <- effect_size(y, x, mod.lfmm) 57 | 58 | ## plot the last 30 effect sizes (true values are 0 and 6000) 59 | plot(b.estimates[971:1000]) 60 | abline(0, 0) 61 | abline(6000, 0, col = 2) 62 | 63 | ## Prediction of phenotypes 64 | candidates <- 991:1000 #set of causal loci 65 | x.pred <- scale(y[,candidates], scale = F) \%*\% matrix(b.estimates[candidates]) 66 | 67 | ## Check predictions 68 | plot(x - mean(x), x.pred, 69 | pch = 19, col = "grey", 70 | xlab = "Observed phenotypes (centered)", 71 | ylab = "Predicted from PRS") 72 | abline(0,1) 73 | abline(lm(x.pred ~ scale(x, scale = FALSE)), col = 2) 74 | } 75 | \author{ 76 | cayek, francoio 77 | } 78 | -------------------------------------------------------------------------------- /R/cross-validation.R: -------------------------------------------------------------------------------- 1 | #' return a list of train/test indices 2 | #' 3 | #' @export 4 | left.out.kfold <- function(kfold, J) { 5 | if (kfold == 1) { 6 | cuts <- rep(factor("[1,m]"), J) 7 | } else { 8 | cuts <- cut(sample.int(J), breaks = kfold) 9 | } 10 | folds <- list() 11 | for (l in seq_along(levels(cuts))) { 12 | folds[[l]] <- which(cuts == levels(cuts)[l]) 13 | } 14 | folds 15 | } 16 | 17 | CV <- function(m, dat, n.fold.row, n.fold.col, params, col.prop = 1.0, ...) { 18 | 19 | n <- nrow(dat$Y) 20 | p <- ncol(dat$Y) 21 | 22 | param.names <- names(params) 23 | 24 | m.train <- m 25 | ## main loops 26 | res <- foreach(i = 1:nrow(params), .combine = 'rbind') %dopar% 27 | { 28 | errs <- data.frame() 29 | 30 | ## param 31 | param <- params[i, , drop = FALSE] 32 | message("=== params") 33 | print.data.frame(param) 34 | 35 | ## copy dat object 36 | dat.train <- new(class(dat)) 37 | dat.test <- new(class(dat)) 38 | 39 | ## row folds 40 | row.folds <- left.out.kfold(n.fold.row, n) 41 | for (row.fold in row.folds) { 42 | 43 | ## init U 44 | m.train$U <- NULL 45 | 46 | ## train/test 47 | dat.train$Y <- dat$Y[-row.fold,,drop = FALSE] 48 | dat.train$X <- dat$X[-row.fold,,drop = FALSE] 49 | dat.test$Y <- dat$Y[row.fold,,drop = FALSE] 50 | dat.test$X <- dat$X[row.fold,,drop = FALSE] 51 | 52 | ## method 53 | m.train[param.names] <- param 54 | 55 | ## fit method 56 | m.train <- lfmm_fit(m.train, dat.train, ...) 57 | 58 | ## col with less error 59 | lfmm.err2s <- dat.train$err2s_lfmm(m.train$U, m.train$V, m.train$B) 60 | kept.col.ind <- order(lfmm.err2s)[1:(round(col.prop * p))] 61 | 62 | ## compute err 63 | col.folds <- left.out.kfold(n.fold.col, length(kept.col.ind)) 64 | err <- data.frame() 65 | for (col.fold in col.folds) { 66 | out.col.id <- kept.col.ind[col.fold] 67 | 68 | ## predict 69 | predicted.Y <- dat.test$predict_lfmm_knowing_loadings(V = m.train$V, 70 | B = m.train$B, 71 | unknown.j = out.col.id) 72 | ## compute error 73 | err <- rbind(err, 74 | data.frame(err = mean((predicted.Y - 75 | dat.test$Y[,out.col.id]) ^2), 76 | param, 77 | nozero.prop = mean(m.train$B != 0.0) 78 | )) 79 | } 80 | errs <- rbind(errs, err) 81 | } 82 | errs 83 | } 84 | res 85 | } 86 | -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | http://leafo.net 3 | */ 4 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 5 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
    "))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 6 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 8 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 9 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n 0.0] 63 | K <- length(svd.res$d) 64 | if (K > k) { 65 | warning("K is increasing, now K = ", K) 66 | } 67 | if (K > (k + 2) || K <= 0.0) { 68 | stop("K too big or too small, OMG, call 911 !!") 69 | } 70 | svd.res$u <-svd.res$u[,1:K] 71 | svd.res$v <- svd.res$v[,1:K] 72 | svd.res 73 | } 74 | 75 | ##' Compute the matrix used to reduce correlation with X 76 | ##' 77 | ##' see mon cahier 6/07/2017 78 | ##' @author cayek 79 | ##' @export 80 | compute_P <- function(X, lambda) { 81 | 82 | ## param 83 | d <- ncol(X) 84 | n <- nrow(X) 85 | 86 | res <- list() 87 | svd.res <- compute_eigen_svd(X) 88 | 89 | D1 <- diag(1, n, n) 90 | 91 | diag(D1)[1:d] <- sqrt(lambda / (lambda + svd.res$sigma)) 92 | 93 | D1.inv <- D1 94 | diag(D1.inv) <- 1 / diag(D1.inv) 95 | 96 | res$sqrt.P <- tcrossprod(D1, svd.res$Q) 97 | res$sqrt.P.inv <- svd.res$Q %*% D1.inv 98 | 99 | res 100 | } 101 | -------------------------------------------------------------------------------- /tests/testthat/test-helpers.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | context("Helpers") 3 | 4 | 5 | test_that("compute_P", { 6 | 7 | set.seed(135435) 8 | dat <- lfmm_sampler(n = 100, p = 1000, K = 3, 9 | outlier.prop = 0.1, 10 | cs = c(0.8), 11 | sigma = 0.2, 12 | B.sd = 1.0, 13 | U.sd = 1.0, 14 | V.sd = 1.0) 15 | 16 | res <- compute_P(X = dat$X, 17 | lambda = 1e-5) 18 | 19 | expect_lte(mean(abs(diag(1, nrow(dat$X), nrow(dat$X))) - res$sqrt.P %*% res$sqrt.P.inv), 1e-16) 20 | ## mean(abs(res$P - res$sqrt.P %*% res$sqrt.P)) 21 | 22 | }) 23 | 24 | test_that("compute_B_ridge", { 25 | 26 | dat <- lfmm_sampler(n = 100, p = 1000, K = 3, 27 | outlier.prop = 0.1, 28 | cs = c(0.8), 29 | sigma = 0.2, 30 | B.sd = 1.0, 31 | U.sd = 1.0, 32 | V.sd = 1.0) 33 | 34 | X <- cbind(matrix(1,100,1), dat$X) 35 | B <- compute_B_ridge(A = dat$Y, 36 | X = X, 37 | lambda = 1e-3) 38 | expect_equal(dim(B), c(1000, 2)) 39 | ## hist(B[,1]) ## RMK when lambda -> infinity B -> 0 OK 40 | 41 | }) 42 | 43 | test_that("compute_B_lasso", { 44 | 45 | dat <- lfmm_sampler(n = 100, p = 1000, K = 3, 46 | outlier.prop = 0.1, 47 | cs = c(0.8), 48 | sigma = 0.2, 49 | B.sd = 1.0, 50 | U.sd = 1.0, 51 | V.sd = 1.0) 52 | 53 | B_lasso <- function(A, X, lambda) { 54 | B_hat <- solve((crossprod(X,X) ), crossprod(X, A)) 55 | sign(B_hat) * ((abs(B_hat) - lambda) %>% purrr::map_dbl(~ max(.x, 0))) 56 | } 57 | 58 | lambda <- 1.5e0 59 | 60 | X <- cbind(matrix(1,100,1), dat$X) 61 | X <- svd(X)$u ## to have othogonal value 62 | B.pkg <- compute_B_lasso(A = dat$Y, 63 | X = X, 64 | lambda = lambda) 65 | 66 | expect_equal(dim(B.pkg), c(1000, 2)) 67 | ## hist(B[,2]) 68 | ## mean(B[,2] == 0) 69 | 70 | ## with R 71 | B.r <- B_lasso(dat$Y, X, lambda) 72 | 73 | ## comp 74 | expect_lt(mean(abs(t(B.r) - B.pkg)), 1e-15) 75 | expect_equal(mean(B.r != 0), mean(B.pkg != 0)) 76 | }) 77 | 78 | test_that("X svd with and ridge estimator", { 79 | 80 | dat <- lfmm_sampler(n = 100, p = 1000, K = 3, 81 | outlier.prop = 0.1, 82 | cs = c(0.8), 83 | sigma = 0.2, 84 | B.sd = 1.0, 85 | U.sd = 1.0, 86 | V.sd = 1.0) 87 | dat$X <- cbind(dat$X, rnorm(100)) 88 | lambda <- 1 89 | n <- nrow(dat$X) 90 | d <- ncol(dat$X) 91 | 92 | res <- compute_eigen_svd(X = dat$X) 93 | 94 | B <- compute_B_ridge(dat$Y, dat$X, lambda) 95 | 96 | ## compute B as on my cahier 6/07/2017 97 | D2 <- diag(res$sigma, d, n) 98 | diag(D2) <- res$sigma / (res$sigma ^ 2 + lambda) 99 | B.hat <- t(res$R %*% D2 %*% t(res$Q) %*% dat$Y) 100 | 101 | expect_lt(mean(abs(B - B.hat)), 1e-15) 102 | 103 | }) 104 | 105 | -------------------------------------------------------------------------------- /R/sampler.R: -------------------------------------------------------------------------------- 1 | SimulatedLfmmDat.builder <- setRefClass("SimulatedLfmmDat", contains = "LfmmDat", 2 | fields = c("B", "Epsilon", "U", "V", "outlier")) 3 | 4 | 5 | 6 | ##' LFMM generative data sampler 7 | ##' 8 | ##' Simulate data from the latent factor model. 9 | ##' 10 | ##' `lfmm_sample()` sample a response matrix Y and a primary variable X such that 11 | ##' 12 | ##' Y = U t(V) + X t(B) + Epsilon. 13 | ##' 14 | ##' U,V, B and Epsilon are simulated according to normal multivariate distributions. 15 | ##' Moreover U and X are such that `cor(U[,i], X) = cs[i]`. 16 | ##' 17 | ##' @return A list with simulated data. 18 | ##' @author kevin caye, olivier francois 19 | ##' @param n number of observations. 20 | ##' @param p number of response variables. 21 | ##' @param K number of latent variables (factors). 22 | ##' @param outlier.prop proportion of outlier. 23 | ##' @param cs correlation with between X and U. 24 | ##' @param sigma standard deviation of residual errors. 25 | ##' @param B.sd standard deviation for the effect size (B). 26 | ##' @param B.mean mean of B. 27 | ##' @param U.sd standard deviations for K factors. 28 | ##' @param V.sd standard deviations for loadings. 29 | ##' @export 30 | ##' 31 | ##' @examples 32 | ##' 33 | ##' dat <- lfmm_sampler(n = 100, 34 | ##' p = 1000, 35 | ##' K = 3, 36 | ##' outlier.prop = 0.1, 37 | ##' cs = c(0.8), 38 | ##' sigma = 0.2, 39 | ##' B.sd = 1.0, 40 | ##' B.mean = 0.0, 41 | ##' U.sd = 1.0, 42 | ##' V.sd = 1.0) 43 | lfmm_sampler <- function(n, p, K, 44 | outlier.prop, 45 | cs, 46 | sigma = 0.2, 47 | B.sd = 1.0, 48 | B.mean = 0.0, 49 | U.sd = 1.0, 50 | V.sd = 1.0) 51 | { 52 | 53 | ## sample outlier 54 | outlier <- sample.int(p, outlier.prop * p) 55 | outlier.nb = length(outlier) 56 | 57 | ## test cs 58 | if (length(cs) < K) { 59 | message("length(cs) < K. Filling cs with zero") 60 | cs <- c(cs, rep(0, times = K - length(cs))) 61 | } 62 | 63 | ## simulate U and X 64 | theta2 <- sum((cs/U.sd)^2) + 0.01 65 | 66 | Sigma <- diag(x = U.sd^2, nrow = K, ncol = K) 67 | Sigma <- rbind(Sigma, matrix(cs, nrow = 1)) 68 | Sigma <- cbind(Sigma, matrix(c(cs, theta2), ncol = 1)) 69 | UX <- MASS::mvrnorm(n, mu = rep(0.0, K + 1), Sigma = Sigma) 70 | U <- UX[,1:K, drop = FALSE] 71 | X <- UX[,K + 1, drop = FALSE] 72 | 73 | ## simulate V 74 | V <- MASS::mvrnorm(p, mu = rep(0.0, K), Sigma = V.sd^2 * diag(K)) 75 | 76 | 77 | ## simulate B 78 | B <- matrix(0, p, 1) 79 | B[outlier, 1] <- rnorm(outlier.nb, B.mean, B.sd) 80 | 81 | ## sample error 82 | Epsilon = MASS::mvrnorm(n, mu = rep(0.0, p), Sigma = sigma^2 * diag(p)) 83 | 84 | ## syntheses 85 | Y = U %*% t(V) + X %*% t(B) + Epsilon 86 | 87 | SimulatedLfmmDat.builder(Y = Y, 88 | X = X, 89 | outlier = outlier, 90 | U = U, 91 | V = V, 92 | B = B, 93 | meta = list(), 94 | missing.ind = c()) 95 | } 96 | -------------------------------------------------------------------------------- /R/LfmmDat.R: -------------------------------------------------------------------------------- 1 | LfmmDat.builder <- setRefClass("LfmmDat", contains = "Dat", 2 | fields = c("X"), 3 | methods = list( 4 | predict_lfmm_knowing_loadings = 5 | function(V, B, unknown.j) { 6 | n <- nrow(.self$Y) 7 | ## Compute U 8 | U <- (.self$Y[,-unknown.j,drop = FALSE] - 9 | tcrossprod(.self$X, B[-unknown.j,, drop = FALSE])) %*% 10 | V[-unknown.j, ,drop = FALSE] 11 | ## predict Y 12 | tcrossprod(U, V[unknown.j,,drop = FALSE]) + 13 | tcrossprod(.self$X, B[unknown.j,,drop = FALSE]) 14 | }, 15 | impute_lfmm = function(U, V, B) { 16 | impute_lfmm_cpp(.self$Y, .self$X, U, V, B, .self$missing.ind) 17 | }, 18 | err2_lfmm = function(U, V, B) { 19 | err2_lfmm_cpp(.self$Y, .self$X, U, V, B) 20 | }, 21 | err2s_lfmm = function(U, V, B) { 22 | err2s_lfmm_cpp(.self$Y, .self$X, U, V, B) 23 | }, 24 | sigma2_lm = function(X, B, nb.df) { 25 | if (is.matrix(.self$Y) && is.double(.self$Y)) { 26 | res <- sum2_lm_cpp(.self$Y, X, B) / nb.df 27 | } else { 28 | res <- 1:ncol(.self$Y) 29 | aux.f <- function(j) { 30 | aux <- .self$Y[,j] - tcrossprod(X , B[j,,drop = FALSE]) 31 | sum(aux * aux) 32 | } 33 | res <- sapply(res,aux.f) 34 | res <- res / nb.df 35 | res 36 | } 37 | } 38 | ) 39 | ) 40 | 41 | #' Class which store data 42 | #' 43 | #' 44 | #' @export 45 | LfmmDat <- function(Y, X, missing = TRUE) { 46 | dat <- LfmmDat.builder(Y = read_input(Y), 47 | X = read_input(X), 48 | missing.ind = NULL, 49 | meta = list()) 50 | if (missing) { 51 | dat$missing.ind <- which(is.na(dat$Y)) 52 | } 53 | dat 54 | } 55 | 56 | #' Class which store data 57 | #' 58 | #' 59 | #' @export 60 | SimulatedLfmmDat <- function(Y, X, outlier, U, V, B) { 61 | dat <- SimulatedLfmmDat.builder(Y = read_input(Y), 62 | meta = list(), 63 | X = read_input(X), 64 | outlier = read_input(outlier), 65 | U = read_input(U), 66 | B = read_input(B), 67 | V = read_input(V)) 68 | dat$missing.ind <- which(is.na(dat$Y)) 69 | dat 70 | } 71 | 72 | -------------------------------------------------------------------------------- /src/helpers.cpp: -------------------------------------------------------------------------------- 1 | // -*- mode: poly-c++r -*- 2 | 3 | #include 4 | #include 5 | #include 6 | #ifdef _OPENMP 7 | #include 8 | #endif 9 | 10 | using namespace Rcpp; 11 | using namespace Eigen; 12 | 13 | // [[Rcpp::export]] 14 | Rcpp::List compute_eigen_svd(const Eigen::Map & X) { 15 | 16 | // compute svd of X 17 | JacobiSVD svd(X, ComputeFullU | ComputeFullV); 18 | return Rcpp::List::create(Named("Q") = svd.matrixU(), 19 | Named("R") = svd.matrixV(), 20 | Named("sigma") = svd.singularValues() 21 | ); 22 | } 23 | 24 | 25 | // [[Rcpp::export]] 26 | void impute_lfmm_cpp(Eigen::Map Y, 27 | const Eigen::Map X, 28 | const Eigen::Map U, 29 | const Eigen::Map V, 30 | const Eigen::Map B, 31 | NumericVector missingId 32 | ) { 33 | 34 | // constants 35 | const int n = Y.rows(); 36 | int k = 0; 37 | int i = 0; 38 | int j = 0; 39 | 40 | for (int t = 0; t < missingId.size(); t++) { 41 | k = missingId[t] - 1; 42 | i = k % n; 43 | j = k / n; 44 | Y(i, j) = U.row(i).dot(V.row(j)) + 45 | X.row(i).dot(B.row(j)); 46 | } 47 | } 48 | 49 | // [[Rcpp::export]] 50 | double err2_lfmm_cpp(const Eigen::Map Y, 51 | const Eigen::Map X, 52 | const Eigen::Map U, 53 | const Eigen::Map V, 54 | const Eigen::Map B) { 55 | // constants 56 | const int n = Y.rows(); 57 | const int p = Y.cols(); 58 | double err2 = 0.0; 59 | double aux = 0.0; 60 | 61 | for (int i = 0; i < n; i++) { 62 | for (int j = 0; j < p; j++) { 63 | aux = Y(i, j) - 64 | U.row(i).dot(V.row(j)) - 65 | X.row(i).dot(B.row(j)); 66 | err2 += aux * aux; 67 | } 68 | } 69 | return(err2 / n / p); 70 | } 71 | 72 | // [[Rcpp::export]] 73 | Eigen::VectorXd err2s_lfmm_cpp(const Eigen::Map Y, 74 | const Eigen::Map X, 75 | const Eigen::Map U, 76 | const Eigen::Map V, 77 | const Eigen::Map B) { 78 | // constants 79 | const int n = Y.rows(); 80 | const int p = Y.cols(); 81 | Eigen::VectorXd err2s = VectorXd(p); 82 | double aux = 0.0; 83 | 84 | for (int j = 0; j < p; j++) { 85 | err2s(j) = 0.0; 86 | for (int i = 0; i < n; i++) { 87 | aux = Y(i, j) - 88 | U.row(i).dot(V.row(j)) - 89 | X.row(i).dot(B.row(j)); 90 | err2s(j) += aux * aux; 91 | } 92 | } 93 | return(err2s); 94 | } 95 | 96 | // [[Rcpp::export]] 97 | Eigen::VectorXd sum2_lm_cpp(const Eigen::Map Y, 98 | const Eigen::Map X, 99 | const Eigen::Map B) { 100 | // constants 101 | const int n = Y.rows(); 102 | const int p = Y.cols(); 103 | VectorXd err2 = VectorXd::Zero(p); 104 | double aux = 0.0; 105 | 106 | for (int j = 0; j < p; j++) { 107 | err2(j) = 0.0; 108 | for (int i = 0; i < n; i++) { 109 | aux = Y(i, j) - 110 | X.row(i).dot(B.row(j)); 111 | err2(j) += aux * aux; 112 | } 113 | } 114 | return(err2); 115 | } 116 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticker footer */ 2 | body > .container { 3 | display: flex; 4 | padding-top: 60px; 5 | min-height: calc(100vh); 6 | flex-direction: column; 7 | } 8 | 9 | body > .container .row { 10 | flex: 1; 11 | } 12 | 13 | footer { 14 | margin-top: 45px; 15 | padding: 35px 0 36px; 16 | border-top: 1px solid #e5e5e5; 17 | color: #666; 18 | display: flex; 19 | } 20 | footer p { 21 | margin-bottom: 0; 22 | } 23 | footer div { 24 | flex: 1; 25 | } 26 | footer .pkgdown { 27 | text-align: right; 28 | } 29 | footer p { 30 | margin-bottom: 0; 31 | } 32 | 33 | img.icon { 34 | float: right; 35 | } 36 | 37 | img { 38 | max-width: 100%; 39 | } 40 | 41 | /* Section anchors ---------------------------------*/ 42 | 43 | a.anchor { 44 | margin-left: -30px; 45 | display:inline-block; 46 | width: 30px; 47 | height: 30px; 48 | visibility: hidden; 49 | 50 | background-image: url(./link.svg); 51 | background-repeat: no-repeat; 52 | background-size: 20px 20px; 53 | background-position: center center; 54 | } 55 | 56 | .hasAnchor:hover a.anchor { 57 | visibility: visible; 58 | } 59 | 60 | @media (max-width: 767px) { 61 | .hasAnchor:hover a.anchor { 62 | visibility: hidden; 63 | } 64 | } 65 | 66 | 67 | /* Fixes for fixed navbar --------------------------*/ 68 | 69 | .contents h1, .contents h2, .contents h3, .contents h4 { 70 | padding-top: 60px; 71 | margin-top: -60px; 72 | } 73 | 74 | /* Static header placement on mobile devices */ 75 | @media (max-width: 767px) { 76 | .navbar-fixed-top { 77 | position: absolute; 78 | } 79 | .navbar { 80 | padding: 0; 81 | } 82 | } 83 | 84 | 85 | /* Sidebar --------------------------*/ 86 | 87 | #sidebar { 88 | margin-top: 30px; 89 | } 90 | #sidebar h2 { 91 | font-size: 1.5em; 92 | margin-top: 1em; 93 | } 94 | 95 | #sidebar h2:first-child { 96 | margin-top: 0; 97 | } 98 | 99 | #sidebar .list-unstyled li { 100 | margin-bottom: 0.5em; 101 | } 102 | 103 | /* Reference index & topics ----------------------------------------------- */ 104 | 105 | .ref-index th {font-weight: normal;} 106 | .ref-index h2 {font-size: 20px;} 107 | 108 | .ref-index td {vertical-align: top;} 109 | .ref-index .alias {width: 40%;} 110 | .ref-index .title {width: 60%;} 111 | 112 | .ref-index .alias {width: 40%;} 113 | .ref-index .title {width: 60%;} 114 | 115 | .ref-arguments th {text-align: right; padding-right: 10px;} 116 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 117 | .ref-arguments .name {width: 20%;} 118 | .ref-arguments .desc {width: 80%;} 119 | 120 | /* Nice scrolling for wide elements --------------------------------------- */ 121 | 122 | table { 123 | display: block; 124 | overflow: auto; 125 | } 126 | 127 | /* Syntax highlighting ---------------------------------------------------- */ 128 | 129 | pre { 130 | word-wrap: normal; 131 | word-break: normal; 132 | border: 1px solid #eee; 133 | } 134 | 135 | pre, code { 136 | background-color: #f8f8f8; 137 | color: #333; 138 | } 139 | 140 | pre .img { 141 | margin: 5px 0; 142 | } 143 | 144 | pre .img img { 145 | background-color: #fff; 146 | display: block; 147 | height: auto; 148 | } 149 | 150 | code a, pre a { 151 | color: #375f84; 152 | } 153 | 154 | .fl {color: #1514b5;} 155 | .fu {color: #000000;} /* function */ 156 | .ch,.st {color: #036a07;} /* string */ 157 | .kw {color: #264D66;} /* keyword */ 158 | .co {color: #888888;} /* comment */ 159 | 160 | .message { color: black; font-weight: bolder;} 161 | .error { color: orange; font-weight: bolder;} 162 | .warning { color: #6A0366; font-weight: bolder;} 163 | 164 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 | 80 | 81 |
    82 |
    83 |
    84 |

    All vignettes

    85 |

    86 | 87 | 90 |
    91 |
    92 |
    93 | 94 |
    95 | 98 | 99 |
    100 |

    Site built with pkgdown.

    101 |
    102 | 103 |
    104 |
    105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 |
      84 |
    • 85 |

      Kevin Caye. Author, maintainer. 86 |

      87 |
    • 88 |
    • 89 |

      Olivier François. Author. 90 |

      91 |
    • 92 |
    93 | 94 |
    95 | 96 |
    97 | 98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/lfmm.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | R package with matrix factorization algorithms — lfmm • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    R package with matrix factorization algorithms

    85 | 86 | 87 | 88 | 89 |
    90 | 96 |
    97 | 98 |
    99 | 102 | 103 |
    104 |

    Site built with pkgdown.

    105 |
    106 | 107 |
    108 |
    109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /docs/reference/Dat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Class which store data — Dat • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Class which store data

    85 | 86 | 87 |
    Dat(Y)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/lfmm_fit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Fit the model — lfmm_fit • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Fit the model

    85 | 86 | 87 |
    lfmm_fit(m, dat, ...)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/left.out.kfold.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | return a list of train/test indices — left.out.kfold • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    return a list of train/test indices

    85 | 86 | 87 |
    left.out.kfold(kfold, J)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/lfmm_impute.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Impute Y with a fitted model. — lfmm_impute • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Impute Y with a fitted model.

    85 | 86 | 87 |
    lfmm_impute(m, dat, ...)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/lfmm_residual_error2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Compute the residual error — lfmm_residual_error2 • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Compute the residual error

    85 | 86 | 87 |
    lfmm_residual_error2(m, dat, ...)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/lfmm_CV.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Cross validation — lfmm_CV • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Cross validation

    85 | 86 | 87 |
    lfmm_CV(m, dat, n.fold.row, n.fold.col, ...)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/LfmmDat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Class which store data — LfmmDat • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Class which store data

    85 | 86 | 87 |
    LfmmDat(Y, X, missing = TRUE)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/hypothesis_testing_lm.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Hypothesis testing with lm — hypothesis_testing_lm • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    linear model: 85 | Y = X B^T + E

    86 | 87 | 88 |
    hypothesis_testing_lm(dat, X)
    89 | 90 | 91 |
    92 | 102 |
    103 | 104 |
    105 | 108 | 109 |
    110 |

    Site built with pkgdown.

    111 |
    112 | 113 |
    114 |
    115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /docs/reference/compute_P.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Compute the matrix used to reduce correlation with X — compute_P • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    see mon cahier 6/07/2017

    85 | 86 | 87 |
    compute_P(X, lambda)
    88 | 89 | 90 |
    91 | 101 |
    102 | 103 |
    104 | 107 | 108 |
    109 |

    Site built with pkgdown.

    110 |
    111 | 112 |
    113 |
    114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /docs/reference/SimulatedLfmmDat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Class which store data — SimulatedLfmmDat • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Class which store data

    85 | 86 | 87 |
    SimulatedLfmmDat(Y, X, outlier, U, V, B)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/lfmm_fit_knowing_loadings.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Fit the model when latent factor loadings are known — lfmm_fit_knowing_loadings • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Fit the model when latent factor loadings are known

    85 | 86 | 87 |
    lfmm_fit_knowing_loadings(m, dat, ...)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/lfmm_fit_knowing_loadings.ridgeLFMM.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Fit assuming V and B — lfmm_fit_knowing_loadings.ridgeLFMM • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    Fit assuming V and B

    85 | 86 | 87 |
    # S3 method for ridgeLFMM
     88 | lfmm_fit_knowing_loadings(m, dat)
    89 | 90 | 91 |
    92 | 98 |
    99 | 100 |
    101 | 104 | 105 |
    106 |

    Site built with pkgdown.

    107 |
    108 | 109 |
    110 |
    111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /docs/reference/compute_pvalue_from_tscore.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | score are assume to follow student distibution with df degre of freedom — compute_pvalue_from_tscore • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    score are assume to follow student distibution with df degre of freedom

    85 | 86 | 87 |
    compute_pvalue_from_tscore(score, df)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/compute_pvalue_from_zscore.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | score are assume to follow normal distibution — compute_pvalue_from_zscore • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    score are assume to follow normal distibution

    85 | 86 | 87 |
    compute_pvalue_from_zscore(score, mean = 0, sd = 1)
    88 | 89 | 90 |
    91 | 97 |
    98 | 99 |
    100 | 103 | 104 |
    105 |

    Site built with pkgdown.

    106 |
    107 | 108 |
    109 |
    110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /man/lfmm_ridge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lfmm.R 3 | \name{lfmm_ridge} 4 | \alias{lfmm_ridge} 5 | \title{LFMM least-squares estimates with ridge penalty} 6 | \usage{ 7 | lfmm_ridge(Y, X, K, lambda = 1e-05, algorithm = "analytical", 8 | it.max = 100, relative.err.min = 1e-06) 9 | } 10 | \arguments{ 11 | \item{Y}{a response variable matrix with n rows and p columns. 12 | Each column corresponds to a distinct response variable (e.g., SNP genotype, 13 | gene expression level, beta-normalized methylation profile, etc). 14 | Response variables must be encoded as numeric.} 15 | 16 | \item{X}{an explanatory variable matrix with n rows and d columns. 17 | Each column corresponds to a distinct explanatory variable (eg. phenotype). 18 | Explanatory variables must be encoded as numeric variables.} 19 | 20 | \item{K}{an integer for the number of latent factors in the regression model.} 21 | 22 | \item{lambda}{a numeric value for the regularization parameter.} 23 | 24 | \item{algorithm}{exact (analytical) algorithm or numerical algorithm. 25 | The exact algorithm is based on the global minimum of the loss function and 26 | computation is very fast. The numerical algorithm converges toward a local 27 | minimum of the loss function. The exact method should preferred. The numerical method is 28 | for very large n.} 29 | 30 | \item{it.max}{an integer value for the number of iterations for the 31 | numerical algorithm.} 32 | 33 | \item{relative.err.epsilon}{a numeric value for a relative convergence error. Test 34 | whether the numerical algorithm converges or not (numerical algorithm only).} 35 | } 36 | \value{ 37 | an object of class \code{lfmm} with the following attributes: 38 | \itemize{ 39 | \item U the latent variable score matrix with dimensions n x K, 40 | \item V the latent variable axis matrix with dimensions p x K, 41 | \item B the effect size matrix with dimensions p x d. 42 | } 43 | } 44 | \description{ 45 | This function computes regularized least squares estimates 46 | for latent factor mixed models using a ridge penalty. 47 | } 48 | \details{ 49 | The algorithm minimizes the following penalized least-squares criterion 50 | \deqn{ L(U, V, B) = \frac{1}{2} ||Y - U V^{T} - X B^T||_{F}^2 51 | + \frac{\lambda}{2} ||B||^{2}_{2} ,} 52 | where Y is a response data matrix, X contains all explanatory variables, 53 | U denotes the score matrix, V is the loading matrix, B is the (direct) effect 54 | size matrix, and lambda is a regularization parameter. 55 | 56 | The response variable matrix Y and the explanatory variable are centered. 57 | } 58 | \examples{ 59 | 60 | library(lfmm) 61 | 62 | ## a GWAS example with Y = SNPs and X = phenotype 63 | data(example.data) 64 | Y <- example.data$genotype 65 | X <- example.data$phenotype 66 | 67 | ## Fit an LFMM with K = 6 factors 68 | mod.lfmm <- lfmm_ridge(Y = Y, 69 | X = X, 70 | K = 6) 71 | 72 | ## Perform association testing using the fitted model: 73 | pv <- lfmm_test(Y = Y, 74 | X = X, 75 | lfmm = mod.lfmm, 76 | calibrate = "gif") 77 | 78 | ## Manhattan plot with causal loci shown 79 | 80 | pvalues <- pv$calibrated.pvalue 81 | plot(-log10(pvalues), pch = 19, 82 | cex = .2, col = "grey", xlab = "SNP") 83 | points(example.data$causal.set, 84 | -log10(pvalues)[example.data$causal.set], 85 | type = "h", col = "blue") 86 | 87 | 88 | ## An EWAS example with Y = methylation data and X = exposure 89 | Y <- scale(skin.exposure$beta.value) 90 | X <- scale(as.numeric(skin.exposure$exposure)) 91 | 92 | ## Fit an LFMM with 2 latent factors 93 | mod.lfmm <- lfmm_ridge(Y = Y, 94 | X = X, 95 | K = 2) 96 | 97 | ## Perform association testing using the fitted model: 98 | pv <- lfmm_test(Y = Y, 99 | X = X, 100 | lfmm = mod.lfmm, 101 | calibrate = "gif") 102 | 103 | ## Manhattan plot with true associations shown 104 | pvalues <- pv$calibrated.pvalue 105 | plot(-log10(pvalues), 106 | pch = 19, 107 | cex = .3, 108 | xlab = "Probe", 109 | col = "grey") 110 | 111 | causal.set <- seq(11, 1496, by = 80) 112 | points(causal.set, 113 | -log10(pvalues)[causal.set], 114 | col = "blue") 115 | } 116 | \author{ 117 | cayek, francoio 118 | } 119 | -------------------------------------------------------------------------------- /man/lfmm_lasso.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lfmm.R 3 | \name{lfmm_lasso} 4 | \alias{lfmm_lasso} 5 | \title{LFMM least-squares estimates with lasso penalty} 6 | \usage{ 7 | lfmm_lasso(Y, X, K, nozero.prop = 0.01, lambda.num = 100, 8 | lambda.min.ratio = 0.01, lambda = NULL, it.max = 100, 9 | relative.err.epsilon = 1e-06) 10 | } 11 | \arguments{ 12 | \item{Y}{a response variable matrix with n rows and p columns. 13 | Each column is a response variable (e.g., SNP genotype, 14 | gene expression level, beta-normalized methylation profile, etc). 15 | Response variables must be encoded as numeric.} 16 | 17 | \item{X}{an explanatory variable matrix with n rows and d columns. 18 | Each column corresponds to a distinct explanatory variable (eg. phenotype). 19 | Explanatory variables must be encoded as numeric.} 20 | 21 | \item{K}{an integer for the number of latent factors in the regression model.} 22 | 23 | \item{nozero.prop}{a numeric value for the expected proportion of non-zero effect sizes.} 24 | 25 | \item{lambda.num}{a numeric value for the number of 'lambda' values (obscure).} 26 | 27 | \item{lambda.min.ratio}{(obscure parameter) a numeric value for the smallest \code{lambda} value, 28 | A fraction of \code{lambda.max}, the data derived entry value (i.e. the smallest value for 29 | which all coefficients are zero).} 30 | 31 | \item{lambda}{(obscure parameter) Smallest value of \code{lambda}. A fraction of 'lambda.max', 32 | the (data derived) entry value (i.e. the smallest value for which all 33 | coefficients are zero).} 34 | 35 | \item{it.max}{an integer value for the number of iterations of the algorithm.} 36 | 37 | \item{relative.err.epsilon}{a numeric value for a relative convergence error. Determine 38 | whether the algorithm converges or not.} 39 | } 40 | \value{ 41 | an object of class \code{lfmm} with the following attributes: 42 | \itemize{ 43 | \item U the latent variable score matrix with dimensions n x K, 44 | \item V the latent variable axes matrix with dimensions p x K, 45 | \item B the effect size matrix with dimensions p x d. 46 | } 47 | } 48 | \description{ 49 | This function computes regularized least squares estimates 50 | for latent factor mixed models using a lasso penalty. 51 | } 52 | \details{ 53 | The algorithm minimizes the following penalized least-squares criterion 54 | 55 | The response variable matrix Y and the explanatory variable are centered. 56 | } 57 | \examples{ 58 | 59 | library(lfmm) 60 | 61 | ## a GWAS example with Y = SNPs and X = phenotype 62 | data(example.data) 63 | Y <- example.data$genotype 64 | X <- example.data$phenotype 65 | 66 | ## Fit an LFMM with 6 factors 67 | mod.lfmm <- lfmm_lasso(Y = Y, 68 | X = X, 69 | K = 6, 70 | nozero.prop = 0.01) 71 | 72 | ## Perform association testing using the fitted model: 73 | pv <- lfmm_test(Y = Y, 74 | X = X, 75 | lfmm = mod.lfmm, 76 | calibrate = "gif") 77 | 78 | ## Manhattan plot with causal loci shown 79 | pvalues <- pv$calibrated.pvalue 80 | plot(-log10(pvalues), 81 | pch = 19, cex = .2, 82 | col = "grey", xlab = "SNP") 83 | 84 | points(example.data$causal.set, 85 | -log10(pvalues)[example.data$causal.set], 86 | type = "h", col = "blue") 87 | 88 | ## An EWAS example with Y = methylation data 89 | ## and X = exposure 90 | Y <- scale(skin.exposure$beta.value) 91 | X <- scale(as.numeric(skin.exposure$exposure)) 92 | 93 | ## Fit an LFMM with 2 latent factors 94 | mod.lfmm <- lfmm_lasso(Y = Y, 95 | X = X, 96 | K = 2, 97 | nozero.prop = 0.01) 98 | 99 | ## Perform association testing using the fitted model: 100 | pv <- lfmm_test(Y = Y, 101 | X = X, 102 | lfmm = mod.lfmm, 103 | calibrate = "gif") 104 | 105 | ## Manhattan plot with true associations shown 106 | pvalues <- pv$calibrated.pvalue 107 | plot(-log10(pvalues), 108 | pch = 19, 109 | cex = .3, 110 | xlab = "Probe", 111 | col = "grey") 112 | 113 | causal.set <- seq(11, 1496, by = 80) 114 | points(causal.set, 115 | -log10(pvalues)[causal.set], 116 | col = "blue") 117 | } 118 | \author{ 119 | cayek, francoio 120 | } 121 | -------------------------------------------------------------------------------- /man/forward_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lfmm.R 3 | \name{forward_test} 4 | \alias{forward_test} 5 | \title{Forward inclusion tests with latent factor mixed models} 6 | \usage{ 7 | forward_test(Y, X, K, niter = 5, scale = FALSE, candidate.list = NULL, 8 | rev.confounder = TRUE, lambda = 1e-05) 9 | } 10 | \arguments{ 11 | \item{Y}{a response variable matrix with n rows and p columns. 12 | Each column is a response variable (numeric).} 13 | 14 | \item{X}{an explanatory variable matrix with n rows and d = 1 column (eg. phenotype).} 15 | 16 | \item{K}{an integer for the number of latent factors in the regression model.} 17 | 18 | \item{niter}{an integer value for the number of forward inclusion tests.} 19 | 20 | \item{scale}{a boolean value, \code{TRUE} if the explanatory variable, X, is scaled 21 | (recommended option).} 22 | 23 | \item{candidate.list}{a vector of integers corresponding to response variables (columns in Y), 24 | which are known candidates for association. If \code{NULL}, a list of candidates 25 | is built in during the algorithm run.} 26 | 27 | \item{rev.confounder}{a boolean value. If \code{TRUE} confounders are revaluated in each 28 | conditional test. May take some time (default = \code{TRUE}).} 29 | 30 | \item{lambda}{a numeric value for the regularization parameter.} 31 | } 32 | \value{ 33 | a list with the following attributes: 34 | \itemize{ 35 | \item candidates a vector of niter response variables (column labels in Y) detected as top hits in 36 | each conditional association analysis. 37 | \item log.p a vector of uncorrected log p-values for checking that the algorithm behaves well (but not trustable for testing). 38 | } 39 | } 40 | \description{ 41 | This function tests for association between each column of the response matrix, Y, 42 | and the explanatory variables, X, by recursively conditioning on the top hits in the set 43 | of explanatory variables. The conditional tests are based on LFMMs with ridge penalty. 44 | } 45 | \details{ 46 | The response variable matrix Y and the explanatory variable are centered. 47 | } 48 | \examples{ 49 | library(lfmm) 50 | data("example.data") 51 | Y <- example.data$genotype 52 | X <- example.data$phenotype #scaled variable 53 | 54 | ## fits an LFMM, i.e, computes B, U, V: 55 | mod.lfmm <- lfmm_ridge(Y = Y, 56 | X = X, 57 | K = 6) 58 | 59 | ## performs initial association testing using the fitted model: 60 | pv <- lfmm_test(Y = Y, 61 | X = X, 62 | lfmm = mod.lfmm, 63 | calibrate = "gif") 64 | ## Manhattan plot 65 | plot(-log10(pv$calibrated.pvalue), 66 | pch = 19, 67 | cex = .2, 68 | col = "grey") 69 | 70 | ## Start forward tests (3 iterations) 71 | obj <- forward_test(Y, 72 | X, 73 | K = 6, 74 | niter = 3, 75 | scale = TRUE) 76 | 77 | ## Record Log p.values for the 3 top hits 78 | log.p <- obj$log.p 79 | log.p 80 | 81 | ## Check perfect hits for each causal SNPs (labelled from 1 to 20) 82 | obj$candidate \%in\% example.data$causal.set 83 | 84 | ## Check for candidates at distance 20 SNPs (about 10kb) 85 | theta <- 20 86 | ## Number of hits for each causal SNPs (1-20) 87 | hit.3 <- as.numeric( 88 | apply(sapply(obj$candidate, 89 | function(x) abs(x - example.data$causal.set) < theta), 90 | 2, 91 | which)) 92 | ## Number of hits for each causal SNPs (1-20) 93 | table(hit.3) 94 | 95 | 96 | ## Continue forward tests (2 additional iterations) 97 | obj <- forward_test(Y, 98 | X, 99 | K = 6, 100 | niter = 2, 101 | candidate.list = obj$candidates, 102 | scale = TRUE) 103 | 104 | ## Record Log p.values for all 5 top hits 105 | log.p <- c(log.p, obj$log.p) 106 | log.p 107 | 108 | ## Check perfect hits for each causal SNPs (labelled from 1 to 20) 109 | obj$candidate \%in\% example.data$causal.set 110 | 111 | ## Check for candidates at distance 5 SNPs (about 2.5kb) 112 | theta <- 5 113 | ## Number of hits for each causal SNPs (1-20) 114 | hit.5 <- as.numeric( 115 | apply(sapply(obj$candidate, 116 | function(x) abs(x - example.data$causal.set) < theta), 117 | 2, 118 | which)) 119 | ## Number of hits for each causal SNPs (1-20) 120 | table(hit.5) 121 | 122 | ## Plot log P 123 | plot(log.p, xlab = "Conditional test iteration", ylab="Top hit log(p)") 124 | } 125 | \author{ 126 | cayek, francoio 127 | } 128 | -------------------------------------------------------------------------------- /docs/reference/example.data.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Genetic and phenotypic data for Arabidopsis thaliana — example.data • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    A dataset containing SNP frequency and simulated phenotypic data for 170 plant accessions. 85 | The variables are as follows:

    86 | 87 | 88 |
    data(example.data)
    89 | 90 |

    Format

    91 | 92 |

    A list with 4 arguments: genotype, phenotype, causal.set, chrpos

    93 | 94 |

    Details

    95 | 96 |
      97 |
    • genotype: binary (0 or 1) SNP frequency for 170 individuals (26943 SNPs).

    • 98 |
    • phenotype: simulated phenotypic data for 170 individuals.

    • 99 |
    • causal.set: set of indices for causal SNPs.

    • 100 |
    • chrpos: genetic map including chromosome position of each SNP.

    • 101 |
    102 | 103 | 104 |
    105 | 115 |
    116 | 117 |
    118 | 121 | 122 |
    123 |

    Site built with pkgdown.

    124 |
    125 | 126 |
    127 |
    128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | // compute_eigen_svd 10 | Rcpp::List compute_eigen_svd(const Eigen::Map& X); 11 | RcppExport SEXP _lfmm_compute_eigen_svd(SEXP XSEXP) { 12 | BEGIN_RCPP 13 | Rcpp::RObject rcpp_result_gen; 14 | Rcpp::RNGScope rcpp_rngScope_gen; 15 | Rcpp::traits::input_parameter< const Eigen::Map& >::type X(XSEXP); 16 | rcpp_result_gen = Rcpp::wrap(compute_eigen_svd(X)); 17 | return rcpp_result_gen; 18 | END_RCPP 19 | } 20 | // impute_lfmm_cpp 21 | void impute_lfmm_cpp(Eigen::Map Y, const Eigen::Map X, const Eigen::Map U, const Eigen::Map V, const Eigen::Map B, NumericVector missingId); 22 | RcppExport SEXP _lfmm_impute_lfmm_cpp(SEXP YSEXP, SEXP XSEXP, SEXP USEXP, SEXP VSEXP, SEXP BSEXP, SEXP missingIdSEXP) { 23 | BEGIN_RCPP 24 | Rcpp::RNGScope rcpp_rngScope_gen; 25 | Rcpp::traits::input_parameter< Eigen::Map >::type Y(YSEXP); 26 | Rcpp::traits::input_parameter< const Eigen::Map >::type X(XSEXP); 27 | Rcpp::traits::input_parameter< const Eigen::Map >::type U(USEXP); 28 | Rcpp::traits::input_parameter< const Eigen::Map >::type V(VSEXP); 29 | Rcpp::traits::input_parameter< const Eigen::Map >::type B(BSEXP); 30 | Rcpp::traits::input_parameter< NumericVector >::type missingId(missingIdSEXP); 31 | impute_lfmm_cpp(Y, X, U, V, B, missingId); 32 | return R_NilValue; 33 | END_RCPP 34 | } 35 | // err2_lfmm_cpp 36 | double err2_lfmm_cpp(const Eigen::Map Y, const Eigen::Map X, const Eigen::Map U, const Eigen::Map V, const Eigen::Map B); 37 | RcppExport SEXP _lfmm_err2_lfmm_cpp(SEXP YSEXP, SEXP XSEXP, SEXP USEXP, SEXP VSEXP, SEXP BSEXP) { 38 | BEGIN_RCPP 39 | Rcpp::RObject rcpp_result_gen; 40 | Rcpp::RNGScope rcpp_rngScope_gen; 41 | Rcpp::traits::input_parameter< const Eigen::Map >::type Y(YSEXP); 42 | Rcpp::traits::input_parameter< const Eigen::Map >::type X(XSEXP); 43 | Rcpp::traits::input_parameter< const Eigen::Map >::type U(USEXP); 44 | Rcpp::traits::input_parameter< const Eigen::Map >::type V(VSEXP); 45 | Rcpp::traits::input_parameter< const Eigen::Map >::type B(BSEXP); 46 | rcpp_result_gen = Rcpp::wrap(err2_lfmm_cpp(Y, X, U, V, B)); 47 | return rcpp_result_gen; 48 | END_RCPP 49 | } 50 | // err2s_lfmm_cpp 51 | Eigen::VectorXd err2s_lfmm_cpp(const Eigen::Map Y, const Eigen::Map X, const Eigen::Map U, const Eigen::Map V, const Eigen::Map B); 52 | RcppExport SEXP _lfmm_err2s_lfmm_cpp(SEXP YSEXP, SEXP XSEXP, SEXP USEXP, SEXP VSEXP, SEXP BSEXP) { 53 | BEGIN_RCPP 54 | Rcpp::RObject rcpp_result_gen; 55 | Rcpp::RNGScope rcpp_rngScope_gen; 56 | Rcpp::traits::input_parameter< const Eigen::Map >::type Y(YSEXP); 57 | Rcpp::traits::input_parameter< const Eigen::Map >::type X(XSEXP); 58 | Rcpp::traits::input_parameter< const Eigen::Map >::type U(USEXP); 59 | Rcpp::traits::input_parameter< const Eigen::Map >::type V(VSEXP); 60 | Rcpp::traits::input_parameter< const Eigen::Map >::type B(BSEXP); 61 | rcpp_result_gen = Rcpp::wrap(err2s_lfmm_cpp(Y, X, U, V, B)); 62 | return rcpp_result_gen; 63 | END_RCPP 64 | } 65 | // sum2_lm_cpp 66 | Eigen::VectorXd sum2_lm_cpp(const Eigen::Map Y, const Eigen::Map X, const Eigen::Map B); 67 | RcppExport SEXP _lfmm_sum2_lm_cpp(SEXP YSEXP, SEXP XSEXP, SEXP BSEXP) { 68 | BEGIN_RCPP 69 | Rcpp::RObject rcpp_result_gen; 70 | Rcpp::RNGScope rcpp_rngScope_gen; 71 | Rcpp::traits::input_parameter< const Eigen::Map >::type Y(YSEXP); 72 | Rcpp::traits::input_parameter< const Eigen::Map >::type X(XSEXP); 73 | Rcpp::traits::input_parameter< const Eigen::Map >::type B(BSEXP); 74 | rcpp_result_gen = Rcpp::wrap(sum2_lm_cpp(Y, X, B)); 75 | return rcpp_result_gen; 76 | END_RCPP 77 | } 78 | 79 | static const R_CallMethodDef CallEntries[] = { 80 | {"_lfmm_compute_eigen_svd", (DL_FUNC) &_lfmm_compute_eigen_svd, 1}, 81 | {"_lfmm_impute_lfmm_cpp", (DL_FUNC) &_lfmm_impute_lfmm_cpp, 6}, 82 | {"_lfmm_err2_lfmm_cpp", (DL_FUNC) &_lfmm_err2_lfmm_cpp, 5}, 83 | {"_lfmm_err2s_lfmm_cpp", (DL_FUNC) &_lfmm_err2s_lfmm_cpp, 5}, 84 | {"_lfmm_sum2_lm_cpp", (DL_FUNC) &_lfmm_sum2_lm_cpp, 3}, 85 | {NULL, NULL, 0} 86 | }; 87 | 88 | RcppExport void R_init_lfmm(DllInfo *dll) { 89 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 90 | R_useDynamicSymbols(dll, FALSE); 91 | } 92 | -------------------------------------------------------------------------------- /docs/reference/skin.exposure.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Simulated (and real) methylation levels for sun exposed patient patients — skin.exposure • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 82 | 83 | 84 |

    A data set containing normalized beta values, and sun exposure and simulated 85 | phenotypic data for 78 tissue samples.

    86 | 87 | 88 |
    data("skin.exposure")
    89 | 90 |

    Format

    91 | 92 |

    A list with 6 arguments: beta.value, phenotype, causal.set, chrpos

    93 | 94 |

    Details

    95 | 96 |

    The variables are:

      97 |
    • beta.value: 1496 filtered normalized beta values (methyation probabilities) 98 | for 78 tissue samples.

    • 99 |
    • exposure: Sun exposure levels for 78 tissue samples.

    • 100 |
    • phenotype: Simulated binary phenotypic data for 78 tissue samples.

    • 101 |
    • age: age of patients.

    • 102 |
    • gender: sex of patients.

    • 103 |
    • tissue: category for tissue samples.

    • 104 |
    105 |

    Reference: to be filled

    106 | 107 | 108 |
    109 | 119 |
    120 | 121 |
    122 | 125 | 126 |
    127 |

    Site built with pkgdown.

    128 |
    129 | 130 |
    131 |
    132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /man/lfmm_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lfmm.R 3 | \name{lfmm_test} 4 | \alias{lfmm_test} 5 | \title{Statistical tests with latent factor mixed models (linear models)} 6 | \usage{ 7 | lfmm_test(Y, X, lfmm, calibrate = "gif") 8 | } 9 | \arguments{ 10 | \item{Y}{a response variable matrix with n rows and p columns. 11 | Each column is a response variable (numeric).} 12 | 13 | \item{X}{an explanatory variable matrix with n rows and d columns. 14 | Each column corresponds to an explanatory variable (numeric).} 15 | 16 | \item{lfmm}{an object of class \code{lfmm} returned by the \link{lfmm_lasso} 17 | or \link{lfmm_ridge} function} 18 | 19 | \item{calibrate}{a character string, "gif" or "median+MAD". If the "gif" option is set (default), 20 | significance values are calibrated by using the genomic control method. Genomic control 21 | uses a robust estimate of the variance of z-scores called "genomic inflation factor". 22 | If the "median+MAD" option is set, the pvalues are calibrated by computing the median and MAD of the zscores. If \code{NULL}, the 23 | pvalues are not calibrated.} 24 | } 25 | \value{ 26 | a list with the following attributes: 27 | \itemize{ 28 | \item B a p x d matrix of effect sizes for each locus and each explanatory variable. Note that the direction 29 | of association is "Y explained by X", 30 | \item calibrated.pvalue a p x d matrix which contains calibrated p-values for each explanatory variable, 31 | \item gif a numeric value for the genomic inflation factor, 32 | \item epsilon.sigma2 a vector of length p containing the residual variances for each locus, 33 | \item B.sigma2 a matrix of size n x (d+K) that contains the variance of effect sizes for the d explanatory variables 34 | and the K latent factors. It could be used to evaluate the proportion of the response variance (genetic variation) 35 | explained by the exposure (X) and latent factors (U) at each locus, 36 | \item score a p x d matrix which contains z-scores for each explanatory variable (columns of X), before calibration. 37 | This is equal to B\link{,j}/sqrt(B.sigma2\link{,j}) for variable j. 38 | \item pvalue a p x d matrix which contains uncalibrated p-values for each explanatory variable before calibration. 39 | This may be useful to users preferring alternative methods to the GIF, like the local FDR method. 40 | \item calibrated.score2 a p x d matrix which contains squared Z-score after calibration. 41 | This may be useful to expert users who may want to perform test recalibration with a different numeric value 42 | for the GIF. 43 | } 44 | } 45 | \description{ 46 | This function returns significance values for the association between each column of the 47 | response matrix, Y, and the explanatory variables, X, including correction for unobserved confounders 48 | (latent factors). The test is based on an LFMM fitted with a ridge or lasso penalty (linear model). 49 | } 50 | \details{ 51 | The response variable matrix Y and the explanatory variables X are centered. Note that 52 | scaling the Y and X matrices would convert the effect sizes into correlation coefficients. Calibrating 53 | p-values means that their distribution is uniform under the null-hypothesis. Additional corrections are 54 | required for multiple testing. For this, Benjamini-Hochberg or Bonferroni adjusted p-values could be obtained from 55 | the calibrated values by using one of several the packages that implements multiple testing corrections. 56 | } 57 | \examples{ 58 | 59 | library(lfmm) 60 | 61 | ## a GWAS example with Y = SNPs and X = phenotype 62 | data(example.data) 63 | Y <- example.data$genotype 64 | X <- example.data$phenotype 65 | 66 | ## Fit an LFMM with K = 6 factors 67 | mod.lfmm <- lfmm_ridge(Y = Y, 68 | X = X, 69 | K = 6) 70 | 71 | ## Perform association testing using the fitted model: 72 | pv <- lfmm_test(Y = Y, 73 | X = X, 74 | lfmm = mod.lfmm, 75 | calibrate = "gif") 76 | 77 | ## Manhattan plot with causal loci shown 78 | 79 | pvalues <- pv$calibrated.pvalue 80 | plot(-log10(pvalues), pch = 19, 81 | cex = .2, col = "grey", xlab = "SNP") 82 | points(example.data$causal.set, 83 | -log10(pvalues)[example.data$causal.set], 84 | type = "h", col = "blue") 85 | 86 | 87 | ## An EWAS example with Y = methylation data and X = exposure 88 | data("skin.exposure") 89 | Y <- scale(skin.exposure$beta.value) 90 | X <- scale(as.numeric(skin.exposure$exposure)) 91 | 92 | ## Fit an LFMM with 2 latent factors 93 | mod.lfmm <- lfmm_ridge(Y = Y, 94 | X = X, 95 | K = 2) 96 | 97 | ## Perform association testing using the fitted model: 98 | pv <- lfmm_test(Y = Y, 99 | X = X, 100 | lfmm = mod.lfmm, 101 | calibrate = "gif") 102 | 103 | ## Manhattan plot with true associations shown 104 | pvalues <- pv$calibrated.pvalue 105 | plot(-log10(pvalues), 106 | pch = 19, 107 | cex = .3, 108 | xlab = "Probe", 109 | col = "grey") 110 | 111 | causal.set <- seq(11, 1496, by = 80) 112 | points(causal.set, 113 | -log10(pvalues)[causal.set], 114 | col = "blue") 115 | } 116 | \seealso{ 117 | \link{glm_test} 118 | } 119 | \author{ 120 | cayek, francoio 121 | } 122 | -------------------------------------------------------------------------------- /R/lassoLFMM.R: -------------------------------------------------------------------------------- 1 | ##' @author cayek 2 | ##' @export 3 | lassoLFMM <- function(K, nozero.prop = 0.1, 4 | lambda.num = 100, lambda.min.ratio = 0.001, 5 | lambda = NULL) { 6 | m <- list(K = K, 7 | nozero.prop = nozero.prop, 8 | lambda.num = lambda.num, 9 | lambda.min.ratio = lambda.min.ratio, 10 | lambda = lambda) 11 | class(m) <- "lassoLFMM" 12 | m 13 | } 14 | 15 | 16 | lassoLFMM_heuristic_gamma_lambda_range<- function(m, dat) { 17 | 18 | K <- m$K 19 | 20 | ## compute gamma 21 | res <- list() 22 | Af <- function(x, args) { 23 | dat$productY(x) 24 | } 25 | Atransf <- function(x, args) { 26 | dat$productYt(x) 27 | } 28 | svd.res <- compute_svd(Af, Atransf, K + 1, K, K, dim = c(nrow(dat$Y), ncol(dat$Y))) 29 | res$gamma <- (svd.res$d[K] + svd.res$d[K + 1]) / 2 30 | U <-svd.res$u[,1:K] %*% diag(svd.res$d[1:K], K, K) 31 | V <- svd.res$v[,1:K] 32 | 33 | ## compute B 34 | Af <- function(x) { 35 | t(dat$productYt(x)) - tcrossprod(crossprod(x, U), V) 36 | } 37 | B <- compute_B_ridge(Af, dat$X, 0.0) 38 | 39 | ## lambda max and min 40 | lambda.max <- max(B) 41 | ## lambda.min = lambda.min.ratio * lambda.max like in Friedman et al. 2010 42 | lambda.min <- lambda.max * m$lambda.min.ratio 43 | 44 | ## strategie presented in Friedman et al. 2010 45 | ## log scaled sequence 46 | res$lambda.range <- exp(seq(log(lambda.max), log(lambda.min), length.out = m$lambda.num)) 47 | 48 | res 49 | } 50 | 51 | lassoLFMM_init <- function(m, dat) { 52 | 53 | ## compute gamma 54 | if (is.null(m$params)) { 55 | m$params <- lassoLFMM_heuristic_gamma_lambda_range(m, dat) 56 | } 57 | 58 | ## init B 59 | if (is.null(m$B)) { 60 | m$B <- matrix(0.0, ncol(dat$Y), ncol(dat$X)) 61 | } 62 | 63 | ## init U and V 64 | if (is.null(m$U)) { 65 | m$U <- matrix(0.0, nrow(dat$Y), m$K) 66 | } 67 | if (is.null(m$V)) { 68 | m$V <- matrix(0.0, ncol(dat$Y), m$K) 69 | } 70 | m 71 | } 72 | 73 | lassoLFMM_main <- function(m, dat, it.max = 100, relative.err.epsilon = 1e-6) { 74 | 75 | m <- lassoLFMM_init(m, dat) 76 | 77 | ## NA and input by median 78 | ## dat$missing.ind <- which(is.na(dat$Y)) 79 | ## dat$Y <- impute_median(dat$Y) 80 | 81 | ## main loop if lambda alone 82 | if (!is.null(m$lambda)) { 83 | m <- lassoLFMM_loop(m, dat, 84 | m$params$gamma, m$lambda, 85 | relative.err.epsilon, 86 | it.max) 87 | nozero.prop <- mean(m$B != 0.0) 88 | message("=== lambda = ", m$lambda, ", no zero B proportion = ", nozero.prop) 89 | } else { 90 | 91 | ## main loop if lambda range 92 | for (lambda in m$params$lambda.range) { 93 | 94 | m <- lassoLFMM_loop(m, dat, 95 | m$params$gamma, lambda, 96 | relative.err.epsilon, 97 | it.max) 98 | 99 | nozero.prop <- mean(m$B != 0.0) 100 | message("=== lambda = ", lambda, ", no zero B proportion = ", nozero.prop) 101 | if( nozero.prop > m$nozero.prop) { 102 | break 103 | } 104 | } 105 | } 106 | 107 | ## to avoid side effect 108 | ## dat$Y[dat$missing.ind] <- NA 109 | 110 | m 111 | } 112 | 113 | ##' @export 114 | lfmm_fit.lassoLFMM <- function(m, dat, it.max = 100, relative.err.epsilon = 1e-6) { 115 | 116 | if (anyNA(dat$Y)) { 117 | stop("TODO") 118 | } else { 119 | res <- lassoLFMM_main(m, dat, it.max, relative.err.epsilon) 120 | } 121 | res 122 | } 123 | 124 | lassoLFMM_loop <- function(m, dat, gamma, lambda, relative_err_epsilon, it_max) { 125 | 126 | 127 | ## constants 128 | n = nrow(dat$Y) 129 | p = ncol(dat$Y) 130 | 131 | ## variables 132 | err = 0.0 133 | err_new = dat$err2_lfmm(m$U, m$V, m$B) 134 | relative_err = .Machine$double.xmax 135 | it = 1 136 | 137 | ## main loop 138 | while ((it <= it_max) && (relative_err > relative_err_epsilon)) { 139 | err = err_new; 140 | message("It = ", it , "/", it_max, ", err2 = " ,err) 141 | 142 | ## step B 143 | Af <- function(x) { 144 | t(dat$productYt(x)) - tcrossprod(crossprod(x, m$U), m$V) 145 | } 146 | m$B <- compute_B_lasso(Af, dat$X, lambda) 147 | 148 | ## compute W = UV^T 149 | Af <- function(x, args) { 150 | dat$productY(x)- dat$X %*% crossprod(m$B, x) 151 | } 152 | Atransf <- function(x, args) { 153 | dat$productYt(x) - m$B %*% crossprod(dat$X, x) 154 | } 155 | res.rspectra <- compute_svd_soft(Af, Atransf, gamma, m$K, dim = c(nrow(dat$Y), ncol(dat$Y))) 156 | m$U <- res.rspectra$u %*% diag(res.rspectra$d, length(res.rspectra$d), length(res.rspectra$d)) 157 | m$V <- res.rspectra$v 158 | 159 | ## impute NA 160 | ## dat$impute_lfmm(m$U, m$V, m$B) 161 | 162 | ## err 163 | err_new = dat$err2_lfmm(m$U, m$V, m$B) 164 | relative_err = abs(err_new - err) / err 165 | it = it + 1 166 | } 167 | m 168 | } 169 | 170 | ##' @export 171 | lfmm_CV.lassoLFMM <- function(m, dat, n.fold.row, n.fold.col, 172 | col.prop = 1.0, 173 | it.max = 100, relative.err.epsilon = 1e-6) { 174 | 175 | m <- lassoLFMM_init(m, dat) 176 | 177 | params <- base::expand.grid(list(lambda = m$params$lambda.range)) 178 | CV(m = m, 179 | dat = dat, 180 | n.fold.row = n.fold.row, 181 | n.fold.col = n.fold.col, 182 | params = params, 183 | col.prop = col.prop, 184 | it.max = it.max, relative.err.epsilon = relative.err.epsilon 185 | ) 186 | 187 | } 188 | 189 | -------------------------------------------------------------------------------- /tests/testthat/test-lfmm.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | context("lfmm") 3 | 4 | test_that("lfmm_ridge", { 5 | 6 | K <- 3 7 | dat <- lfmm_sampler(n = 100, p = 1000, K = K, 8 | outlier.prop = 0.1, 9 | cs = c(0.8), 10 | sigma = 0.2, 11 | B.sd = 1.0, 12 | U.sd = 1.0, 13 | V.sd = 1.0) 14 | 15 | lfmm.res <- lfmm_ridge(Y = dat$Y, X = dat$X, K = 3, lambda = 1e-5) 16 | 17 | skip("plot") 18 | id <- seq_along(lfmm.res$B) 19 | cols <- c('red', 'green')[as.numeric(id %in% dat$outlier) + 1] 20 | plot(id, lfmm.res$B, col = cols) 21 | 22 | }) 23 | 24 | 25 | test_that("lfmm_ridge alternated", { 26 | 27 | ## rmk : the alternated algorithm is not guarenty to converge to the global minimum. 28 | K <- 3 29 | dat <- lfmm_sampler(n = 100, p = 1000, K = K, 30 | outlier.prop = 0.1, 31 | cs = c(0.8), 32 | sigma = 0.2, 33 | B.sd = 1.0, 34 | U.sd = 1.0, 35 | V.sd = 1.0) 36 | dat$X <- cbind(dat$X, matrix(rnorm(100), 100,1)) 37 | 38 | lfmm.res <- lfmm_ridge(Y = dat$Y, X = dat$X, K = 3, lambda = 1e-5) 39 | lfmm.alt.res <- lfmm_ridge(Y = dat$Y, X = dat$X, K = 3, 40 | lambda = 1e-5, 41 | algorithm = "alternated", 42 | it.max=100, 43 | relative.err.min=1e-8) 44 | 45 | expect_lte(max((lfmm.res$B - lfmm.alt.res$B)^2), 1e-1) 46 | ## axes not in same order. 47 | ## cor(lfmm.res$U, lfmm.alt.res$U) 48 | 49 | ## hypothesis testing 50 | test.res <- lfmm_test(Y = dat$Y, X = dat$X, lfmm = lfmm.res) 51 | test.altr.res <- lfmm_test(Y = dat$Y, X = dat$X, lfmm = lfmm.alt.res) 52 | 53 | expect_lte(mean((test.altr.res$B - test.res$B)^2), 1e-2) 54 | }) 55 | 56 | test_that("lfmm_ridge CV", { 57 | 58 | library(ggplot2) 59 | 60 | ## sample data 61 | K <- 3 62 | dat <- lfmm_sampler(n = 100, p = 1000, K = K, 63 | outlier.prop = 0.1, 64 | cs = c(0.8), 65 | sigma = 0.2, 66 | B.sd = 1.0, 67 | U.sd = 1.0, 68 | V.sd = 1.0) 69 | 70 | ## run cross validation 71 | errs <- lfmm_ridge_CV(Y = dat$Y, 72 | X = dat$X, 73 | n.fold.row = 5, 74 | n.fold.col = 5, 75 | lambdas = c(1e-10, 1, 1e20), 76 | Ks = c(1,2,3,4,5,6)) 77 | 78 | ## plot error 79 | skip("plot") 80 | ggplot(errs, aes(y = err, x = as.factor(K))) + 81 | geom_boxplot() + 82 | facet_grid(lambda ~ ., scale = "free") 83 | 84 | ggplot(errs, aes(y = err, x = as.factor(lambda))) + 85 | geom_boxplot() + 86 | facet_grid(K ~ ., scales = "free") 87 | 88 | }) 89 | 90 | 91 | test_that("ridge_lasso", { 92 | 93 | K <- 3 94 | dat <- lfmm_sampler(n = 100, p = 1000, K = K, 95 | outlier.prop = 0.1, 96 | cs = c(0.6), 97 | sigma = 0.2, 98 | B.sd = 1.0, 99 | U.sd = 1.0, 100 | V.sd = 1.0) 101 | 102 | lfmm.res <- lfmm_lasso(Y = dat$Y, X = dat$X, K = 3, nozero.prop= 0.2) 103 | 104 | skip("plot") 105 | id <- seq_along(lfmm.res$B) 106 | cols <- c('red', 'green')[as.numeric(id %in% dat$outlier) + 1] 107 | plot(id, lfmm.res$B, col = cols) 108 | 109 | }) 110 | 111 | test_that("lfmm_test", { 112 | 113 | K <- 3 114 | dat <- lfmm_sampler(n = 100, p = 1000, K = K, 115 | outlier.prop = 0.05, 116 | cs = c(0.8), 117 | sigma = 0.2, 118 | B.sd = 1.0, 119 | U.sd = 1.0, 120 | V.sd = 1.0) 121 | 122 | ## random co variate 123 | dat$X <- cbind(dat$X, matrix(rnorm(100, 100, 1))) 124 | 125 | ## lfmm 126 | lfmm.res <- lfmm_ridge(Y = dat$Y, X = dat$X, K = 3, lambda = 1e-5) 127 | 128 | ## hp with gif 129 | hp.res.gif <- lfmm_test(Y = dat$Y, X = dat$X, lfmm = lfmm.res, calibrate = "gif") 130 | expect_equal(length(hp.res.gif$gif), 2) 131 | 132 | ## hp with median+MAD 133 | hp.res.mad <- lfmm_test(Y = dat$Y, X = dat$X, lfmm = lfmm.res, calibrate = "median+MAD") 134 | expect_equal(length(hp.res.mad$mad), 2) 135 | expect_equal(length(hp.res.mad$median), 2) 136 | 137 | ## hp with median+MAD 138 | hp.res <- lfmm_test(Y = dat$Y, X = dat$X, lfmm = lfmm.res, calibrate = NULL) 139 | 140 | skip("plot") 141 | 142 | ## with gif 143 | ## plot score 144 | d <- 2 145 | id <- seq_along(hp.res.gif$calibrated.score[,d]) 146 | cols <- c('red', 'green')[as.numeric(id %in% dat$outlier) + 1] 147 | plot(id, hp.res.gif$calibrated.score2[,d], col = cols) 148 | ## plot pvalue 149 | cols <- c('red', 'green')[as.numeric(id %in% dat$outlier) + 1] 150 | plot(id, -log10(hp.res.gif$calibrated.pvalue[,d]), col = cols) 151 | hist(hp.res.gif$calibrated.pvalue[,d]) 152 | hist(hp.res.gif$pvalue[,d]) 153 | 154 | ## median+mad 155 | d <- 1 156 | id <- seq_along(hp.res.mad$calibrated.score[,d]) 157 | cols <- c('red', 'green')[as.numeric(id %in% dat$outlier) + 1] 158 | plot(id, hp.res.mad$calibrated.score[,d], col = cols) 159 | ## plot pvalue 160 | cols <- c('red', 'green')[as.numeric(id %in% dat$outlier) + 1] 161 | plot(id, -log10(hp.res.mad$calibrated.pvalue[,d]), col = cols) 162 | hist(hp.res.mad$calibrated.pvalue[,d]) 163 | hist(hp.res.mad$pvalue[,d]) 164 | 165 | }) 166 | -------------------------------------------------------------------------------- /tests/testthat/test-dat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | context("Dat wrapper") 3 | 4 | test_that("Dat", { 5 | Y <- "../Data/1000Genomes/Phase3/European_Chrm22.maf.05.sample.10000.rds" 6 | skip_if_not(file.exists(Y)) 7 | 8 | dat <- Dat(Y = Y) 9 | 10 | expect_equal(dim(dat$Y), c(503, 10000)) 11 | 12 | }) 13 | 14 | test_that("LfmmDat", { 15 | 16 | Y <- "../Data/1000Genomes/Phase3/European_Chrm22.maf.05.sample.10000.rds" 17 | skip_if_not(file.exists(Y)) 18 | 19 | X <- matrix(rnorm(503, 503, 1)) 20 | 21 | dat <- LfmmDat(Y = Y, X = X) 22 | 23 | expect_equal(dim(dat$Y), c(503, 10000)) 24 | expect_equal(dim(dat$X), c(503, 1)) 25 | }) 26 | 27 | test_that("Dat and Rspectra", { 28 | 29 | n <- 100 30 | m <- 1000 31 | k <- 3 32 | dat <- lfmm_sampler(n = 100, p = 1000, K = k, 33 | outlier.prop = 0.1, 34 | cs = c(0.8), 35 | sigma = 0.2, 36 | B.sd = 1.0, 37 | U.sd = 1.0, 38 | V.sd = 1.0) 39 | 40 | Af <- function(x, args) { 41 | args$dat$productY(x) 42 | } 43 | Atransf <- function(x, args) { 44 | args$dat$productYt(x) 45 | } 46 | res.rspectra <- RSpectra::svds(A = Af, 47 | k, nu = k, nv = k, 48 | Atrans = Atransf, dim = c(n, m), opts = list(tol = 1e-10), 49 | args = list(dat = dat)) 50 | res.svd <- svd(dat$Y, k, k) 51 | 52 | expect_lt(mean(abs(res.rspectra$u - res.svd$u)), 1) 53 | expect_lt(mean(abs(res.rspectra$d - res.svd$d[1:k])), 1e-10) 54 | expect_lt(mean(abs(res.rspectra$v - res.svd$v)), 1) 55 | W.svd <- tcrossprod(res.svd$u %*% diag(res.svd$d[1:k]), res.svd$v) 56 | W.rspectra <- tcrossprod(res.rspectra$u %*% diag(res.rspectra$d[1:k]), res.rspectra$v) 57 | expect_lt(mean(abs(W.rspectra - W.svd)), 1e-10) 58 | ## error because au PC get same variance 59 | 60 | }) 61 | 62 | test_that("LfmmDat impute and err2", { 63 | 64 | dat <- lfmm_sampler(n = 100, p = 1000, K = 3, 65 | outlier.prop = 0.1, 66 | cs = c(0.8), 67 | sigma = 0.2, 68 | B.sd = 1.0, 69 | U.sd = 1.0, 70 | V.sd = 1.0) 71 | dat$U <- NULL 72 | dat$V <- NULL 73 | dat$B <- NULL 74 | ## run lfmm ridge 75 | m <- ridgeLFMM(K = 3, 1e-5) 76 | m <- lfmm_fit(m, dat) 77 | 78 | ## NA 79 | prop <- 0.01 80 | n <- nrow(dat$Y) 81 | p <- ncol(dat$Y) 82 | dat$missing.ind <- sample(n * p, prop * n * p) 83 | dat$Y[dat$missing.ind] <- NA 84 | 85 | ## impute 86 | Y <- dat$Y 87 | Yux <- tcrossprod(dat$X, m$B) + tcrossprod(m$U, m$V) 88 | Y[dat$missing.ind] <- Yux[dat$missing.ind] 89 | dat$impute_lfmm(m$U, m$V, m$B) 90 | anyNA(dat$Y) 91 | expect_lte(mean(abs(dat$Y - Y)), 1e-18) 92 | 93 | ## err2 94 | Yux <- tcrossprod(dat$X, m$B) + tcrossprod(m$U, m$V) 95 | Yux <- Y - Yux 96 | err2.R <- mean(Yux ^ 2) 97 | err2.cpp <- dat$err2_lfmm(m$U, m$V, m$B) 98 | expect_lte(mean(abs(err2.cpp - err2.R)), 1e-10) 99 | 100 | ## sum2_lm 101 | E <- dat$Y - tcrossprod(dat$X, m$B) 102 | effective.degree.freedom <- 99 103 | epsilon.sigma2 <- apply(E, 2, function(x) sum(x ^ 2) / effective.degree.freedom) 104 | s2.cpp <- dat$sigma2_lm(dat$X, m$B, effective.degree.freedom) 105 | expect_lte(mean(abs(epsilon.sigma2 - s2.cpp)), 1e-10) 106 | 107 | ## err2s 108 | err2s.cpp <- dat$err2s_lfmm(m$U, m$V, m$B) 109 | expect_lte(mean(abs(err2.cpp - sum(err2s.cpp) / n / p)), 1e-10) 110 | 111 | 112 | }) 113 | 114 | test_that("Dat svd", { 115 | 116 | n <- 100 117 | m <- 1000 118 | k <- 3 119 | dat <- lfmm_sampler(n = 100, p = 1000, K = k, 120 | outlier.prop = 0.1, 121 | cs = c(0.8), 122 | sigma = 0.2, 123 | B.sd = 1.0, 124 | U.sd = 1.0, 125 | V.sd = 1.0) 126 | Af <- function(x, args) { 127 | dat$productY(x) 128 | } 129 | Atransf <- function(x, args) { 130 | dat$productYt(x) 131 | } 132 | res.rspectra <- compute_svd(Af, Atransf, k, k, k, dim = c(nrow(dat$Y), ncol(dat$Y))) 133 | res.svd <- svd(dat$Y, k, k) 134 | 135 | expect_lt(mean(abs(res.rspectra$u - res.svd$u)), 1) 136 | expect_lt(mean(abs(res.rspectra$d - res.svd$d[1:k])), 1e-10) 137 | expect_lt(mean(abs(res.rspectra$v - res.svd$v)), 1) 138 | W.svd <- tcrossprod(res.svd$u %*% diag(res.svd$d[1:k]), res.svd$v) 139 | W.rspectra <- tcrossprod(res.rspectra$u %*% diag(res.rspectra$d[1:k]), res.rspectra$v) 140 | ## error because au PC get same variance 141 | 142 | expect_lt(mean(abs(W.rspectra - W.svd)), 1e-10) 143 | 144 | }) 145 | 146 | test_that("lfmmDat predict_lfmm_knowing_loadings", { 147 | 148 | dat <- lfmm_sampler(n = 100, p = 1000, K = 3, 149 | outlier.prop = 0.1, 150 | cs = c(0.8), 151 | sigma = 0.2, 152 | B.sd = 1.0, 153 | U.sd = 1.0, 154 | V.sd = 1.0) 155 | 156 | ## run lfmm ridge 157 | m <- ridgeLFMM(K = 3, 1e-5) 158 | m <- lfmm_fit(m, dat) 159 | 160 | ## Predict 161 | unknown.j <- sample.int(1000, 500) 162 | predicted.Y <- dat$predict_lfmm_knowing_loadings(V = m$V, 163 | B = m$B, 164 | unknown.j = unknown.j) 165 | 166 | ## expect 167 | expect_equal(dim(predicted.Y), c(100, 500)) 168 | expect_lte(sd(predicted.Y - dat$Y[,unknown.j]) / sd(dat$Y), 0.6) ## 60 % d'erreur 169 | 170 | }) 171 | 172 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Latent Factor Mixed Models • lfmm 9 | 10 | 11 | 12 | 16 | 17 | 18 |
    19 |
    51 | 52 | 53 | 54 |
    55 |
    56 | 57 |
    58 |

    59 | latent factor mixed models: lfmm

    60 |

    Genome and epigenome-wide association studies are plagued with the problems of confounding and causality. The R package lfmm implements new algorithms for parameter estimation in latent factor mixed models (LFMM). The algorithms are designed for the correction of unobserved confounders. The new methods are computationally efficient, and provide statistically optimal corrections resulting in improved power and control for false discoveries. The package lfmm provides two main functions for estimating latent confounders (or factors): lfmm_ridge and lfmm_lasso. Those functions are based on optimal solutions of regularized least-squares problems. A short tutorial provides brief examples on how the R packages lfmm can be used for fitting latent factor mixed models and evaluating association between a response matrix (SNP genotype or methylation levels) and a variable of interest (phenotype or exposure levels) in genome-wide (GW), genome-environment (GE), epigenome-wide (EW) association studies. Corresponding software is available at the following url https://bcm-uga.github.io/lfmm/.

    61 |
    62 |
    63 |

    64 | Installation

    65 |

    Installing the latest version from github requires devtools:

    66 |
    # install.packages("devtools")
     67 | devtools::install_github("bcm-uga/lfmm")
    68 |
    69 | 70 |
    71 | 72 | 86 |
    87 | 88 | 89 |
    92 | 93 |
    94 |

    Site built with pkgdown.

    95 |
    96 | 97 |
    98 |
    99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /R/ridgeLFMM.R: -------------------------------------------------------------------------------- 1 | ##' @author cayek 2 | ##' @export 3 | ridgeLFMM <- function(K, lambda) { 4 | m <- list( K = K, 5 | lambda = lambda, 6 | algorithm = "analytical") 7 | class(m) <- "ridgeLFMM" 8 | m 9 | } 10 | 11 | ridgeLFMM_init <- function(m, dat) { 12 | 13 | ## init B 14 | if (is.null(m$B)) { 15 | m$B <- matrix(0.0, ncol(dat$Y), ncol(dat$X)) 16 | } 17 | 18 | ## init U and V 19 | if (is.null(m$U)) { 20 | m$U <- matrix(0.0, nrow(dat$Y), m$K) 21 | } 22 | if (is.null(m$V)) { 23 | m$V <- matrix(0.0, ncol(dat$Y), m$K) 24 | } 25 | m 26 | } 27 | 28 | 29 | ridgeLFMM_noNA<- function(m, dat) { 30 | ## compute of P 31 | P.list <- compute_P(X = dat$X, lambda = m$lambda) 32 | 33 | ## main algorithm 34 | m <- ridgeLFMM_main(m, dat, P.list) 35 | m 36 | } 37 | 38 | ridgeLFMM_noNA_alternated<- function(m, dat, relative.err.min = 1e-6, it.max = 100) { 39 | 40 | ## init 41 | m <- ridgeLFMM_init(m, dat) 42 | 43 | ## main loop 44 | err2 <- .Machine$double.xmax 45 | it <- 1 46 | repeat { 47 | ## main algorithm 48 | ## compute W = UV^T 49 | Af <- function(x, args) { 50 | dat$productY(x)- dat$X %*% crossprod(m$B, x) 51 | } 52 | Atransf <- function(x, args) { 53 | dat$productYt(x) - m$B %*% crossprod(dat$X, x) 54 | } 55 | res.rspectra <- compute_svd(Af, Atransf, k = m$K, nu = m$K, nv = m$K, 56 | dim = c(nrow(dat$Y), ncol(dat$Y))) 57 | m$U <- res.rspectra$u %*% diag(res.rspectra$d, length(res.rspectra$d), length(res.rspectra$d)) 58 | m$V <- res.rspectra$v 59 | 60 | ## step B 61 | Af <- function(x) { 62 | t(dat$productYt(x)) - tcrossprod(crossprod(x, m$U), m$V) 63 | } 64 | m$B <- compute_B_ridge(Af, dat$X, m$lambda) 65 | 66 | 67 | err2.new <- dat$err2_lfmm(m$U, m$V, m$B) 68 | message("It = ", it, "/", it.max, ", err2 = ", err2.new) 69 | if(it > it.max || (abs(err2 - err2.new) / err2) < relative.err.min) { 70 | break 71 | } 72 | err2 <- err2.new 73 | it <- it + 1 74 | } 75 | 76 | m 77 | } 78 | 79 | 80 | ridgeLFMM_main <- function(m, dat, P.list) { 81 | 82 | d <- ncol(dat$X) 83 | n <- nrow(dat$Y) 84 | p <- ncol(dat$Y) 85 | 86 | ## UV 87 | Af <- function(x, args) { 88 | args$P %*% args$dat$productY(x) 89 | } 90 | Atransf <- function(x, args) { 91 | args$dat$productYt(t(args$P) %*% x) 92 | } 93 | res.rspectra <- RSpectra::svds(A = Af, 94 | Atrans = Atransf, 95 | k = m$K, 96 | nu = m$K, nv = m$K, 97 | opts = list(tol = 10e-10), 98 | dim = c(n, p), 99 | args = list(P = P.list$sqrt.P, dat = dat)) 100 | ## res.rspectra <- svd(P.list$sqrt.P %*% dat$Y, nu = m$K, nv = m$K) ## debug 101 | m$U <- res.rspectra$u %*% diag(res.rspectra$d[1:m$K], m$K, m$K) 102 | m$U <- P.list$sqrt.P.inv %*% m$U 103 | m$V <- res.rspectra$v 104 | 105 | ## B 106 | Af <- function(x) { 107 | t(dat$productYt(x)) - tcrossprod(crossprod(x, m$U), m$V) 108 | } 109 | m$B <- compute_B_ridge(Af, dat$X, m$lambda) 110 | ## m$B <- compute_B_ridge(dat$Y - tcrossprod(m$U, m$V), dat$X, m$lambda) ## debug 111 | m 112 | } 113 | 114 | ridgeLFMM_withNA <- function(m, dat, relative.err.min = 1e-6, it.max = 100) { 115 | 116 | ## NA and input by median 117 | dat$missing.ind <- which(is.na(dat$Y)) 118 | dat$Y <- impute_median(dat$Y) 119 | 120 | ## compute of P 121 | P.list <- compute_P(X = dat$X, lambda = m$lambda) 122 | 123 | ## main loop 124 | err2 <- .Machine$double.xmax 125 | it <- 1 126 | repeat { 127 | ## main algorithm 128 | m <- ridgeLFMM_main(m, dat, P.list) 129 | 130 | dat$impute_lfmm(m$U, m$V, m$B) 131 | err2.new <- dat$err2_lfmm(m$U, m$V, m$B) 132 | if(it > it.max || (abs(err2 - err2.new) / err2) < relative.err.min) { 133 | break 134 | } 135 | err2 <- err2.new 136 | message("It = ", it, "/", it.max, ", err2 = ", err2) 137 | it <- it + 1 138 | } 139 | 140 | ## to avoid side effect 141 | dat$Y[dat$missing.ind] <- NA 142 | 143 | m 144 | } 145 | 146 | ##' @export 147 | lfmm_fit.ridgeLFMM <- function(m, dat, it.max = 100, relative.err.min = 1e-6){ 148 | 149 | if (!(m$algorithm %in% c("analytical", "alternated"))){ 150 | stop("algorithm must be analytical or alternated")} 151 | 152 | ## test if there missing value in Y 153 | if (anyNA(dat$Y)) { 154 | if (m$algorithm == "analytical"){ 155 | stop("Exact method doesn't allow missing data. 156 | Use an imputation method before running lfmm.") 157 | } else { 158 | res <- ridgeLFMM_withNA(m, dat, 159 | relative.err.min = relative.err.min, 160 | it.max = it.max) 161 | } 162 | } 163 | if (!anyNA(dat$Y)) { 164 | if (m$algorithm == "analytical") { 165 | res <- ridgeLFMM_noNA(m, dat) 166 | } else { 167 | res <- ridgeLFMM_noNA_alternated(m, dat, 168 | relative.err.min = relative.err.min, 169 | it.max = it.max) 170 | } 171 | } 172 | } 173 | 174 | ##' Fit assuming V and B 175 | ##' 176 | ##' @export 177 | lfmm_fit_knowing_loadings.ridgeLFMM <- function(m, dat) { 178 | m$U <- (dat$Y - tcrossprod(dat$X, m$B)) %*% m$V 179 | m 180 | } 181 | 182 | ##' @export 183 | lfmm_CV.ridgeLFMM <- function(m, dat, n.fold.row, n.fold.col, lambdas , Ks, 184 | col.prop = 1.0) { 185 | 186 | params <- base::expand.grid(list(lambda = lambdas, K = Ks)) 187 | CV(m = m, 188 | dat = dat, 189 | n.fold.row = n.fold.row, 190 | n.fold.col = n.fold.col, 191 | params = params, 192 | col.prop = col.prop) 193 | 194 | } 195 | 196 | -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Function reference • lfmm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 73 | 74 | 75 |
    76 | 77 |
    78 |
    79 | 85 | 86 |
    87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 100 | 101 | 102 | 103 | 106 | 107 | 108 | 109 | 112 | 113 | 114 | 115 | 118 | 119 | 120 | 121 | 124 | 125 | 126 | 127 | 130 | 131 | 132 | 133 | 136 | 137 | 138 | 139 | 142 | 143 | 144 | 145 |
    97 |

    Main functions

    98 |

    99 |
    104 |

    lfmm_ridge

    105 |

    LFMM least-squares estimates with ridge penalty

    110 |

    lfmm_ridge_CV

    111 |

    Cross validation of LFMM estimates with ridge penalty

    116 |

    lfmm_lasso

    117 |

    LFMM least-squares estimates with lasso penalty

    122 |

    lfmm_test

    123 |

    Statistical tests with latent factor mixed models

    128 |

    effect_size

    129 |

    Direct effect sizes estimated from latent factor models

    134 |

    predict_lfmm

    135 |

    Predict polygenic scores from latent factor models

    140 |

    forward_test

    141 |

    Forward inclusion tests with latent factor mixed models

    146 |
    147 |
    148 | 149 | 155 |
    156 | 157 | 167 |
    168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /inst/doc/lfmm.R: -------------------------------------------------------------------------------- 1 | ## ------------------------------------------------------------------------ 2 | #devtools::install_github("bcm-uga/lfmm") 3 | 4 | ## ------------------------------------------------------------------------ 5 | library(lfmm) 6 | 7 | ## ------------------------------------------------------------------------ 8 | ## Simulated phenotypes for Arabidopsis thaliana SNP data 9 | data("example.data") 10 | ## Simulated (and real) methylation levels for sun-exposed tissue sampled 11 | data("skin.exposure") 12 | 13 | ## ------------------------------------------------------------------------ 14 | Y <- example.data$genotype 15 | pc <- prcomp(Y) 16 | plot(pc$sdev[1:20]^2, xlab = 'PC', ylab = "Variance explained") 17 | points(6,pc$sdev[6]^2, type = "h", lwd = 3, col = "blue") 18 | 19 | ## ------------------------------------------------------------------------ 20 | Y <- skin.exposure$beta.value 21 | pc <- prcomp(Y) 22 | plot(pc$sdev[1:20]^2, xlab = 'PC', ylab = "Variance explained") 23 | points(2,pc$sdev[2]^2, type = "h", lwd = 3, col = "blue") 24 | 25 | ## ------------------------------------------------------------------------ 26 | Y <- example.data$genotype 27 | X <- example.data$phenotype #scaled phenotype 28 | 29 | ## ------------------------------------------------------------------------ 30 | ## Fit an LFMM, i.e, compute B, U, V estimates 31 | mod.lfmm <- lfmm_ridge(Y = Y, 32 | X = X, 33 | K = 6) 34 | 35 | ## ------------------------------------------------------------------------ 36 | ## performs association testing using the fitted model: 37 | pv <- lfmm_test(Y = Y, 38 | X = X, 39 | lfmm = mod.lfmm, 40 | calibrate = "gif") 41 | 42 | ## ------------------------------------------------------------------------ 43 | pvalues <- pv$calibrated.pvalue 44 | qqplot(rexp(length(pvalues), rate = log(10)), 45 | -log10(pvalues), xlab = "Expected quantile", 46 | pch = 19, cex = .4) 47 | abline(0,1) 48 | 49 | ## ------------------------------------------------------------------------ 50 | ## Manhattan plot 51 | plot(-log10(pvalues), 52 | pch = 19, 53 | cex = .2, 54 | xlab = "SNP", ylab = "-Log P", 55 | col = "grey") 56 | points(example.data$causal.set, 57 | -log10(pvalues)[example.data$causal.set], 58 | type = "h", 59 | col = "blue") 60 | 61 | ## ------------------------------------------------------------------------ 62 | Y <- scale(skin.exposure$beta.value) 63 | X <- scale(as.numeric(skin.exposure$exposure)) 64 | 65 | ## ------------------------------------------------------------------------ 66 | ## Fit and LFMM, i.e, compute B, U, V estimates 67 | mod.lfmm <- lfmm_ridge(Y = Y, 68 | X = X, 69 | K = 2) 70 | 71 | ## Perform association testing using the fitted model: 72 | pv <- lfmm_test(Y = Y, 73 | X = X, 74 | lfmm = mod.lfmm, 75 | calibrate = "gif") 76 | 77 | ## ------------------------------------------------------------------------ 78 | ## Manhattan plot 79 | plot(-log10(pv$calibrated.pvalue), 80 | pch = 19, 81 | cex = .3, 82 | xlab = "Probe", ylab = "-Log P", 83 | col = "grey") 84 | causal.set <- seq(11, 1496, by = 80) 85 | points(causal.set, 86 | -log10(pv$calibrated.pvalue)[causal.set], 87 | col = "blue") 88 | 89 | ## ------------------------------------------------------------------------ 90 | Y <- example.data$genotype 91 | X <- example.data$phenotype #scaled phenotype 92 | 93 | ## ---- message = FALSE---------------------------------------------------- 94 | ## Fit an LFMM, i.e, compute B, U, V estimates 95 | mod.lfmm <- lfmm_lasso(Y = Y, 96 | X = X, 97 | K = 6, 98 | nozero.prop = 0.01) 99 | 100 | ## ------------------------------------------------------------------------ 101 | ## performs association testing using the fitted model: 102 | pv <- lfmm_test(Y = Y, 103 | X = X, 104 | lfmm = mod.lfmm, 105 | calibrate = "gif") 106 | 107 | ## ------------------------------------------------------------------------ 108 | pvalues <- pv$calibrated.pvalue 109 | qqplot(rexp(length(pvalues), rate = log(10)), 110 | -log10(pvalues), xlab = "Expected quantile", 111 | pch = 19, cex = .4) 112 | abline(0,1) 113 | 114 | ## ------------------------------------------------------------------------ 115 | ## Manhattan plot 116 | plot(-log10(pvalues), 117 | pch = 19, 118 | cex = .2, 119 | xlab = "SNP", ylab = "-Log P", 120 | col = "grey") 121 | points(example.data$causal.set, 122 | -log10(pvalues)[example.data$causal.set], 123 | type = "h", 124 | col = "blue") 125 | 126 | ## ------------------------------------------------------------------------ 127 | ## Simulation of 1000 genotypes for 100 individuals (y) 128 | u <- matrix(rnorm(300, sd = 1), nrow = 100, ncol = 2) 129 | v <- matrix(rnorm(3000, sd = 2), nrow = 2, ncol = 1000) 130 | y <- matrix(rbinom(100000, size = 2, 131 | prob = 1/(1 + exp(-0.3*(u%*%v 132 | + rnorm(100000, sd = 2))))), 133 | nrow = 100, 134 | ncol = 1000) 135 | 136 | ## Simulation of 1000 phenotypes (x) 137 | ## Only the last 10 genotypes have significant effect sizes (b) 138 | b <- matrix(c(rep(0, 990), rep(6000, 10))) 139 | x <- y%*%b + rnorm(100, sd = 100) 140 | 141 | ## ------------------------------------------------------------------------ 142 | mod <- lfmm_ridge(Y = y, 143 | X = x, 144 | K = 2) 145 | 146 | ## ------------------------------------------------------------------------ 147 | candidates <- 991:1000 #causal loci 148 | b.values <- effect_size(Y = y, X = x, lfmm.object = mod) 149 | x.pred <- scale(y[,candidates], scale = F)%*% matrix(b.values[candidates]) 150 | 151 | ## ------------------------------------------------------------------------ 152 | ##Compare simulated and predicted/fitted phenotypes 153 | plot(x - mean(x), x.pred, 154 | pch = 19, col = "grey", 155 | xlab = "Observed phenotypes (centered)", 156 | ylab = "Predicted from PRS") 157 | abline(0,1) 158 | abline(lm(x.pred ~ scale(x, scale = FALSE)), col = 2) 159 | 160 | ## ------------------------------------------------------------------------ 161 | pred <- predict_lfmm(Y = y, 162 | X = x, 163 | fdr.level = 0.25, 164 | mod) 165 | 166 | ##Compare simulated and predicted/fitted phenotypes 167 | plot(x - mean(x), pred$pred, 168 | pch = 19, col = "grey", 169 | xlab = "Observed phenotypes (centered)", 170 | ylab = "Predicted from PRS") 171 | abline(0,1) 172 | abline(lm(pred$pred ~ scale(x, scale = FALSE)), col = 2) 173 | 174 | -------------------------------------------------------------------------------- /tests/testthat/test-ridgeLFMM.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | context("RidgeLFMM") 3 | 4 | test_that("RidgeLFMM_main", { 5 | 6 | K <- 3 7 | dat <- lfmm_sampler(n = 100, p = 1000, K = K, 8 | outlier.prop = 0.1, 9 | cs = c(0.8), 10 | sigma = 0.2, 11 | B.sd = 1.0, 12 | U.sd = 1.0, 13 | V.sd = 1.0) 14 | 15 | lambda <- 1e-5 16 | P.list <- compute_P(X = dat$X, lambda = lambda) 17 | 18 | m <- ridgeLFMM(K = K, 19 | lambda = lambda) 20 | 21 | res <- ridgeLFMM_main(m, dat, P.list) 22 | 23 | svd.res <- svd(P.list$sqrt.P %*% dat$Y) 24 | expect_lte(mean(abs(svd.res$v[,1:K] - res$V)), 0.06) 25 | ## RMK: error very high, it is because for this data first K singular values 26 | ## are quite the same. 27 | ## svd.res$d 28 | 29 | }) 30 | 31 | 32 | test_that("comp with lfmmR", { 33 | 34 | skip_if_not_installed("ThesisRpackage") 35 | require(ThesisRpackage) 36 | dat <- lfmm_sampler(n = 100, p = 1000, K = 3, 37 | outlier.prop = 0.1, 38 | cs = c(0.8), 39 | sigma = 0.2, 40 | B.sd = 1.0, 41 | U.sd = 1.0, 42 | V.sd = 1.0) 43 | dat.list <- list(G = dat$Y, X = dat$X) 44 | 45 | K <- 3 46 | lambda <- 1e-5 47 | 48 | ## lfmm implemented in R 49 | lfmmR <- finalLfmmRdigeMethod(K = 3, lambda = lambda) 50 | lfmmR$center <- FALSE 51 | lfmmR <- fit(lfmmR, dat.list) 52 | 53 | ## lfmm implemented with rsvd 54 | lfmm <- ridgeLFMM(K = K, 55 | lambda = lambda) 56 | lfmm <- lfmm_fit(lfmm, dat) 57 | 58 | expect_equal(dim(lfmm$B), c(1000, 1)) 59 | expect_equal(dim(lfmm$U), c(100, K)) 60 | expect_equal(dim(lfmm$V), c(1000, K)) 61 | 62 | ## comparison 63 | W.lfmm <- tcrossprod(lfmm$U, lfmm$V) 64 | W.lfmmR <- tcrossprod(lfmmR$U, lfmmR$V) 65 | expect_lte(mean(abs(W.lfmmR - W.lfmm)), 1e-6) 66 | expect_lte(mean(abs(lfmm$B - t(lfmmR$B))), 1e-6) 67 | ## rmk: we do not use same P... but it is still close :D 68 | 69 | }) 70 | 71 | test_that("ridgeLFMM of ThesisRpackage with NA", { 72 | 73 | skip("a test only to debug") 74 | skip_if_not_installed("ThesisRpackage") 75 | futile.logger::flog.threshold(futile.logger::TRACE, name = "ThesisRpackage") 76 | require("ThesisRpackage") 77 | 78 | n <- 100 79 | p <- 1000 80 | dat <- lfmm_sampler(n = n, p = p, K = 3, 81 | outlier.prop = 0.1, 82 | cs = c(0.8), 83 | sigma = 0.2, 84 | B.sd = 1.0, 85 | U.sd = 1.0, 86 | V.sd = 1.0) 87 | dat <- as.list(dat) 88 | dat$G <- dat$Y 89 | dat$Y <- NULL 90 | 91 | ## no NA 92 | lfmm.noNA <- finalLfmmRdigeMethod(K = 3, lambda = 1e-5) 93 | lfmm.noNA <- fit(lfmm.noNA, dat) 94 | 95 | ## add na 96 | na.ind <- sample.int(n * p, 0.5 * n * p) 97 | dat$G[na.ind] <- NA 98 | 99 | ## lfmm with na 100 | lfmm.NA <- finalLfmmRdigeMethod(K = 3, lambda = 1e-5) 101 | lfmm.NA <- fit(lfmm.NA, dat) 102 | 103 | ## impute by median first 104 | dat$G <- impute_median(dat$G) 105 | lfmm.NA.impute <- finalLfmmRdigeMethod(K = 3, lambda = 1e-5) 106 | lfmm.NA.impute <- fit(lfmm.NA.impute, dat) 107 | 108 | ## comparison W 109 | W.NA <- tcrossprod(lfmm.NA$U, lfmm.NA$V) 110 | W.noNA <- tcrossprod(lfmm.noNA$U, lfmm.noNA$V) 111 | W.NA.impute <- tcrossprod(lfmm.NA.impute$U, lfmm.NA.impute$V) 112 | e1 <- sqrt(mean((W.NA - W.noNA) ^ 2)) 113 | e2 <- sqrt(mean((W.NA.impute - W.noNA) ^ 2)) 114 | expect_gt((e2 - e1) / e1, 5) 115 | 116 | ## comparison B 117 | e1 <- sqrt(mean((lfmm.noNA$B - lfmm.NA$B) ^ 2)) 118 | e2 <- sqrt(mean((lfmm.noNA$B - lfmm.NA.impute$B) ^ 2)) 119 | expect_gt((e2 - e1) / e1, 1) 120 | 121 | }) 122 | 123 | test_that("ridgeLFMM with NA", { 124 | 125 | set.seed(454) 126 | n <- 100 127 | p <- 1000 128 | dat <- lfmm_sampler(n = n, p = p, K = 3, 129 | outlier.prop = 0.1, 130 | cs = c(0.8), 131 | sigma = 0.2, 132 | B.sd = 1.0, 133 | U.sd = 1.0, 134 | V.sd = 1.0) 135 | dat.list <- list(G = dat$Y, X = dat$X) 136 | 137 | ## no NA 138 | lfmm.noNA<- ridgeLFMM(K = 3, lambda = 1e-5) 139 | lfmm.noNA<- lfmm_fit(lfmm.noNA, dat) 140 | 141 | ## add na 142 | na.ind <- sample.int(n * p, 0.1 * n * p) 143 | dat$Y[na.ind] <- NA 144 | dat.list$Y[na.ind] <- NA 145 | 146 | ## lfmm with na 147 | lfmm.NA <- ridgeLFMM(K = 3, lambda = 1e-5) 148 | lfmm.NA <- lfmm_fit(lfmm.NA, dat) 149 | 150 | ## impute by median first 151 | dat$Y <- impute_median(dat$Y) 152 | lfmm.NA.impute <- ridgeLFMM(K = 3, lambda = 1e-5) 153 | lfmm.NA.impute <- lfmm_fit(lfmm.NA.impute, dat) 154 | 155 | ## comparison W 156 | W.NA <- tcrossprod(lfmm.NA$U, lfmm.NA$V) 157 | W.noNA <- tcrossprod(lfmm.noNA$U, lfmm.noNA$V) 158 | W.NA.impute <- tcrossprod(lfmm.NA.impute$U, lfmm.NA.impute$V) 159 | e1 <- sqrt(mean((W.NA - W.noNA) ^ 2)) 160 | e2 <- sqrt(mean((W.NA.impute - W.noNA) ^ 2)) 161 | (e2 - e1) / e1 162 | expect_gt((e2 - e1) / e1, 5) 163 | 164 | ## comparison B 165 | e1 <- sqrt(mean((lfmm.noNA$B - lfmm.NA$B) ^ 2)) 166 | e2 <- sqrt(mean((lfmm.noNA$B - lfmm.NA.impute$B) ^ 2)) 167 | (e2 - e1) / e1 168 | expect_gt((e2 - e1) / e1, 2) 169 | 170 | }) 171 | 172 | test_that("ridgeLFMM CV", { 173 | 174 | n <- 100 175 | p <- 1000 176 | dat <- lfmm_sampler(n = n, p = p, K = 3, 177 | outlier.prop = 0.1, 178 | cs = c(0.6), 179 | sigma = 0.2, 180 | B.sd = 1.0, 181 | U.sd = 1.0, 182 | V.sd = 1.0) 183 | 184 | lfmm <- ridgeLFMM(K = 3, lambda = 1e-5) 185 | 186 | cv.err <- lfmm_CV(m = lfmm, 187 | dat = dat, 188 | n.fold.row = 5, 189 | n.fold.col = 5, 190 | lambdas = c(1e-10, 1 , 1e20), 191 | Ks = c(1, 2,3,4,5,6), 192 | col.prop = 1.0) 193 | expect_equal(dim(cv.err), c(6 * 3 * 5 * 5, 4)) 194 | 195 | skip("plots") 196 | ggplot(cv.err, aes(y = err, x = as.factor(K))) + 197 | geom_boxplot() + 198 | facet_grid(lambda ~ ., scale = "free") 199 | 200 | ggplot(cv.err, aes(y = err, x = as.factor(lambda))) + 201 | geom_boxplot() + 202 | facet_grid(K ~ ., scales = "free") 203 | 204 | }) 205 | 206 | test_that("lassoLFMM CV", { 207 | 208 | 209 | skip('cross validation ne marche pas ....') 210 | 211 | n <- 100 212 | p <- 1000 213 | dat <- lfmm_sampler(n = n, p = p, K = 3, 214 | outlier.prop = 0.1, 215 | cs = c(0.6), 216 | sigma = 0.2, 217 | B.sd = 1.0, 218 | U.sd = 1.0, 219 | V.sd = 1.0) 220 | 221 | lfmm <- lassoLFMM(K = 3, nozero.prop = NULL, lambda.num = 10, lambda.min.ratio = 0.001) 222 | 223 | cv.err <- lfmm_CV(m = lfmm, 224 | dat = dat, 225 | n.fold.row = 5, 226 | n.fold.col = 2, 227 | col.prop = 1.0, 228 | it.max = 100, relative.err.epsilon = 1e-4 229 | ) 230 | 231 | expect_equal(dim(cv.err), c(6 * 3 * 2 * 5, 3)) 232 | 233 | skip("plots") 234 | 235 | ggplot(cv.err, aes(y = err, x = as.factor(lambda))) + 236 | geom_boxplot() 237 | 238 | ggplot(cv.err, aes(y = err, x = nozero.prop)) + 239 | geom_smooth() 240 | 241 | }) 242 | --------------------------------------------------------------------------------