├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── F_GramSchmidt.R ├── F_JacCol_constr.R ├── F_JacCol_constr_noLab.R ├── F_LR_nb.R ├── F_LR_nb_Jac.R ├── F_NBalphaInfl.R ├── F_NBcolInfl.R ├── F_NBjacobianAbunds.R ├── F_NBjacobianCol.R ├── F_NBjacobianColNP.R ├── F_NBjacobianLibSizes.R ├── F_NBjacobianPsi.R ├── F_NBjacobianRow.R ├── F_NBpsiInfl.R ├── F_NBrowInfl.R ├── F_RCM.R ├── F_RCM_NB.R ├── F_addOrthProjection.R ├── F_arrayProd.R ├── F_buildCentMat.R ├── F_buildConfMat.R ├── F_buildCovMat.R ├── F_buildDesign.R ├── F_checkAlias.R ├── F_constrCorresp.R ├── F_correctXMissingness.R ├── F_dLR_nb.R ├── F_dNBabunds.R ├── F_dNBlibSizes.R ├── F_dNBllcol.R ├── F_dNBllcolNP.R ├── F_dNBllcol_constr.R ├── F_dNBllcol_constr_noLab.R ├── F_dNBllrow.R ├── F_dNBpsis.R ├── F_data.R ├── F_deviances.R ├── F_ellipseCoord.R ├── F_estDisp.R ├── F_estNBparams.R ├── F_estNBparamsNoLab.R ├── F_estNPresp.R ├── F_extractCoord.R ├── F_extractE.R ├── F_filterConfounders.R ├── F_getDevMat.R ├── F_getDevianceRes.R ├── F_getDistCoord.R ├── F_getInflCol.R ├── F_getInflRow.R ├── F_getInt.R ├── F_getLogLik.R ├── F_getModelMat.R ├── F_getRowMat.R ├── F_heq_nb.R ├── F_heq_nb_jac.R ├── F_indentPlot.R ├── F_inertia.R ├── F_liks.R ├── F_permanova.R ├── F_plot.RCM.R ├── F_plotRespFun.R ├── F_residualPlot.R ├── F_respFunJacMat.R ├── F_respFunScoreMat.R ├── F_rowMultiply.R ├── F_seq_k.R └── F_trimOnConfounders.R ├── README.Rmd ├── README.md ├── README_figs ├── README-plotCond-1.png ├── README-plotNPTriplot-1.png ├── README-plotRichness-1.png ├── README-plotUnconstrainedRCMall-1.png ├── README-plotUnconstrainedRCMallColour-1.png ├── README-plotlin2cor-1.png ├── README-plotlin3-1.png └── README-plotlin3Triplot-1.png ├── data └── Zeller.RData ├── inst ├── CITATION ├── NEWS.md └── fits │ └── zellerFits.RData ├── man ├── GramSchmidt.Rd ├── JacCol_constr.Rd ├── JacCol_constr_noLab.Rd ├── LR_nb.Rd ├── LR_nb_Jac.Rd ├── NBalphaInfl.Rd ├── NBcolInfl.Rd ├── NBjacobianAbundsOld.Rd ├── NBjacobianColNP.Rd ├── NBjacobianColOld.Rd ├── NBjacobianLibSizes.Rd ├── NBjacobianPsi.Rd ├── NBjacobianRow.Rd ├── NBpsiInfl.Rd ├── NBrowInfl.Rd ├── RCM.Rd ├── RCM_NB.Rd ├── Zeller.Rd ├── addOrthProjection.Rd ├── arrayprod.Rd ├── buildCentMat.Rd ├── buildConfMat.Rd ├── buildConfMat.character.Rd ├── buildConfMat.data.frame.Rd ├── buildCovMat.Rd ├── buildDesign.Rd ├── checkAlias.Rd ├── constrCorresp.Rd ├── correctXMissingness.Rd ├── dLR_nb.Rd ├── dNBabundsOld.Rd ├── dNBlibSizes.Rd ├── dNBllcolNP.Rd ├── dNBllcolOld.Rd ├── dNBllcol_constr.Rd ├── dNBllcol_constr_noLab.Rd ├── dNBllrow.Rd ├── dNBpsis.Rd ├── deviances.Rd ├── ellipseCoord.Rd ├── estDisp.Rd ├── estNBparams.Rd ├── estNBparamsNoLab.Rd ├── estNPresp.Rd ├── extractCoord.Rd ├── extractE.Rd ├── filterConfounders.Rd ├── getDevMat.Rd ├── getDevianceRes.Rd ├── getDistCoord.Rd ├── getInflCol.Rd ├── getInflRow.Rd ├── getInt.Rd ├── getLogLik.Rd ├── getModelMat.Rd ├── getRowMat.Rd ├── heq_nb.Rd ├── heq_nb_jac.Rd ├── indentPlot.Rd ├── inertia.Rd ├── liks.Rd ├── permanova.Rd ├── plot.RCM.Rd ├── plotRespFun.Rd ├── residualPlot.Rd ├── respFunJacMat.Rd ├── respFunScoreMat.Rd ├── rowMultiply.Rd ├── seq_k.Rd └── trimOnConfounders.Rd ├── tests ├── testthat.R └── testthat │ ├── test-ArrayProd.R │ ├── test-RCMinput.R │ ├── test-RCMoutput.R │ ├── test-ResidualPlot.R │ ├── test-checkAlias.R │ ├── test-permanova.R │ ├── test-plotRCM.R │ ├── test-removeNA.R │ ├── test-rowMultiply.R │ └── test-seq_k.R └── vignettes └── RCMvignette.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | README* 4 | results 5 | vignettes/RCMvignette_cache 6 | vignettes/RCMvignette_figures 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .Rproj 6 | inst/doc 7 | .html 8 | NEWS.Rmd 9 | /vignettes/RCMvignette_cache/* 10 | *.Rproj 11 | *.access^ 12 | README_cache/ 13 | vignettes/RCMvignette.R 14 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: RCM 2 | Type: Package 3 | Title: Fit row-column association models with the negative binomial distribution for the microbiome 4 | Version: 1.11.4 5 | Authors@R: 6 | c(person(given = "Stijn", family = "Hawinkel", 7 | email = "stijn.hawinkel@psb.ugent.be", 8 | role=c("cre", "aut"), comment = c(ORCID = "0000-0002-4501-5180"))) 9 | Description: Combine ideas of log-linear analysis of contingency table, flexible response function estimation and empirical Bayes dispersion estimation for explorative visualization of microbiome datasets. The package includes unconstrained as well as constrained analysis. In addition, diagnostic plot to detect lack of fit are available. 10 | License: GPL-2 11 | Encoding: UTF-8 12 | RoxygenNote: 7.2.3 13 | Imports: RColorBrewer, alabama, edgeR, reshape2, tseries, stats, VGAM, ggplot2 (>= 2.2.1.9000), nleqslv, phyloseq, tensor, MASS, grDevices, graphics, methods 14 | Depends: R (>= 4.0), DBI 15 | LazyData: true 16 | Suggests: 17 | knitr, 18 | rmarkdown, 19 | testthat 20 | VignetteBuilder: knitr 21 | biocViews: Metagenomics, DimensionReduction, Microbiome, Visualization 22 | BugReports: https://github.com/CenterForStatistics-UGent/RCM/issues 23 | URL: https://bioconductor.org/packages/release/bioc/vignettes/RCM/inst/doc/RCMvignette.html/ 24 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(plot,RCM) 4 | export(RCM) 5 | export(RCM_NB) 6 | export(addOrthProjection) 7 | export(checkAlias) 8 | export(extractCoord) 9 | export(getDevianceRes) 10 | export(inertia) 11 | export(liks) 12 | export(permanova) 13 | export(plotRespFun) 14 | export(residualPlot) 15 | exportMethods(RCM) 16 | import(ggplot2) 17 | import(methods) 18 | import(phyloseq) 19 | importFrom(MASS,negative.binomial) 20 | importFrom(RColorBrewer,brewer.pal) 21 | importFrom(VGAM,coef) 22 | importFrom(VGAM,negbinomial.size) 23 | importFrom(VGAM,predict) 24 | importFrom(VGAM,s) 25 | importFrom(VGAM,vgam) 26 | importFrom(alabama,constrOptim.nl) 27 | importFrom(grDevices,colorRampPalette) 28 | importFrom(grDevices,rainbow) 29 | importFrom(graphics,par) 30 | importFrom(graphics,text) 31 | importFrom(nleqslv,nleqslv) 32 | importFrom(phyloseq,get_variable) 33 | importFrom(stats,alias) 34 | importFrom(stats,as.formula) 35 | importFrom(stats,contrasts) 36 | importFrom(stats,dist) 37 | importFrom(stats,dnbinom) 38 | importFrom(stats,dpois) 39 | importFrom(stats,formula) 40 | importFrom(stats,glm) 41 | importFrom(stats,integrate) 42 | importFrom(stats,model.matrix) 43 | importFrom(stats,na.omit) 44 | importFrom(stats,quantile) 45 | importFrom(stats,runif) 46 | importFrom(stats,weighted.mean) 47 | importFrom(tensor,tensor) 48 | importFrom(tseries,runs.test) 49 | -------------------------------------------------------------------------------- /R/F_GramSchmidt.R: -------------------------------------------------------------------------------- 1 | #'Gram-Schmidt orthogonalization of vectors 2 | #' 3 | #' @param x The vector that is to be orthogonalized 4 | #' @param otherVecs a matrix; x is orthogonalized with respect to its rows 5 | #' @param weights The weights used in the orthogonalization 6 | #' @return The orthogonalized vector 7 | GramSchmidt = function(x, otherVecs, weights = rep(1, length(x))){ 8 | for(i in seq_len(nrow(otherVecs))){ 9 | x = x- sum(x*otherVecs[i,]*weights)/sum(otherVecs[i,]^2*weights)*otherVecs[i,] 10 | x = x/sqrt(sum(x^2*weights)) 11 | } 12 | return(x) 13 | } 14 | -------------------------------------------------------------------------------- /R/F_JacCol_constr.R: -------------------------------------------------------------------------------- 1 | #' Jacobian of the constrained analysis with linear response function. 2 | #' 3 | #' @param betas a vector of v parameters of the response function 4 | #' of a single taxon 5 | #' @param X the count vector of length n 6 | #' @param reg a n-by-v matrix of regressors 7 | #' @param theta The dispersion parameter of this taxon 8 | #' @param muMarg offset of length n 9 | #' @param psi a scalar, the importance parameter 10 | #' @param allowMissingness A boolean, are missing values present 11 | #' @param naId The numeric index of the missing values in X 12 | #' 13 | #' Even though this approach does not imply normalization over 14 | #' the parameters of all taxa, it is very fast 15 | #' and they can be normalized afterwards 16 | #' 17 | #' @return The jacobian, a square symmetric matrix of dimension v 18 | JacCol_constr = function(betas, X, reg, theta, 19 | muMarg, psi, allowMissingness, naId) { 20 | mu = exp(c(reg %*% betas) * psi) * muMarg 21 | X = correctXMissingness(X, mu, allowMissingness, naId) 22 | tmp = (1 + X/theta) * mu/(1 + mu/theta)^2 23 | -crossprod(tmp * reg, reg) * psi^2 #Don't forget to square psi! 24 | 25 | } 26 | -------------------------------------------------------------------------------- /R/F_JacCol_constr_noLab.R: -------------------------------------------------------------------------------- 1 | #' The jacobian of the response function without taxon labels 2 | #' 3 | #' @param betas a vector of regression parameters with length v 4 | #' @param X the nxp data matrix 5 | #' @param reg a matrix of regressors of dimension nxv 6 | #' @param thetasMat A matrix of dispersion parameters 7 | #' @param muMarg offset matrix of dimension nxp 8 | #' @param preFabMat a prefabricated matrix 9 | #' @param psi a scalar, the importance parameter 10 | #' @param n an integer, number of rows of X 11 | #' @param v an integer, the number of parameters of the response function 12 | #' @param allowMissingness A boolean, are missing values present 13 | #' @param naId The numeric index of the missing values in X 14 | #' 15 | #' @return The jacobian (a v-by-v matrix) 16 | JacCol_constr_noLab = function(betas, X, 17 | reg, thetasMat, muMarg, psi, n, v, preFabMat, allowMissingness, naId) { 18 | mu = c(exp(reg %*% betas * psi)) * muMarg 19 | if(allowMissingness){ 20 | preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/thetasMat 21 | } 22 | tmp = preFabMat * mu/(1 + (mu/thetasMat))^2 * 23 | psi^2 #Don't forget to square psi! 24 | -crossprod(reg, vapply(seq_len(v), FUN.VALUE = vector("numeric", 25 | n), function(x) { 26 | rowSums(reg[, x] * tmp) 27 | })) 28 | } 29 | -------------------------------------------------------------------------------- /R/F_LR_nb.R: -------------------------------------------------------------------------------- 1 | #' Get the value of the log-likelihood ratio of alpha 2 | #' 3 | #' @param Alpha a vector of length d, the environmental gradient 4 | #' @param X the n-by-p count matrix 5 | #' @param CC the n-by-d covariate matrix 6 | #' @param responseFun a character string indicating 7 | #' the type of response function 8 | #' @param muMarg an n-by-p offset matrix 9 | #' @param psi a scalar, an importance parameter 10 | #' @param nleqslv.control the control list for the nleqslv() function 11 | #' @param n number of samples 12 | #' @param NB_params Starting values for the NB_params 13 | #' @param NB_params_noLab Starting values for the NB_params without label 14 | #' @param thetaMat a matrix of size n-by-p with estimated dispersion parameters 15 | #' @param ncols a scalar, the number of columns of X 16 | #' @param nonParamRespFun A list, the result of the estNPresp() function 17 | #' @param envGradEst a character string, 18 | #' indicating how the environmental gradient should be fitted. 19 | #' 'LR' using the likelihood-ratio criterion, 20 | #' or 'ML' a full maximum likelihood solution 21 | #' @param ... Further arguments passed on to other functions 22 | #' 23 | #' DON'T USE 'p' as variable name, 24 | #' partial matching in the grad-function in the numDeriv package 25 | #' 26 | #' @return: a scalar, the evaluation of the log-likelihood ratio 27 | #' at the given alpha 28 | LR_nb <- function(Alpha, X, CC, responseFun = c("linear", 29 | "quadratic", "nonparametric", "dynamic"), 30 | muMarg, psi, nleqslv.control = list(trace = FALSE), 31 | n, NB_params, NB_params_noLab, thetaMat, 32 | ncols, nonParamRespFun, envGradEst, ...) { 33 | 34 | sampleScore = CC %*% Alpha 35 | if (responseFun %in% c("linear", "quadratic", 36 | "dynamic")) { 37 | design = buildDesign(sampleScore, 38 | responseFun) 39 | muT = muMarg * c(exp(design %*% NB_params * 40 | psi)) 41 | if (envGradEst == "LR") 42 | mu0 = muMarg * c(exp(design %*% 43 | NB_params_noLab * psi)) 44 | } else { 45 | # Non-parametric response function 46 | muT = exp(nonParamRespFun$rowMat) * 47 | muMarg 48 | if (envGradEst == "LR") 49 | mu0 = c(exp(nonParamRespFun$rowVecOverall)) * 50 | muMarg 51 | } 52 | logDensj = dnbinom(X, mu = muT, size = thetaMat, 53 | log = TRUE) 54 | # Likelihoods under species specific 55 | # model Immediately return log 56 | # likelihoods 57 | 58 | if (envGradEst == "LR") 59 | logDens0 = dnbinom(X, mu = mu0, size = thetaMat, 60 | log = TRUE) #Likelihoods of null model 61 | 62 | lr <- sum(switch(envGradEst, "LR" = logDensj - 63 | logDens0, "ML" = logDensj), na.rm = TRUE) 64 | # The likelihood ratio 65 | return(-lr) # opposite sign for the minimization procedure 66 | } 67 | -------------------------------------------------------------------------------- /R/F_LR_nb_Jac.R: -------------------------------------------------------------------------------- 1 | #' A function that returns the Jacobian of the likelihood ratio 2 | #' 3 | #' @param Alpha a vector of length d + k*(2+(k-1)/2), 4 | #' the environmental gradient plus the lagrangian multipliers 5 | #' @param X the n-by-p count matrix 6 | #' @param CC a n-by-d covariate vector 7 | #' @param responseFun a character string indicating 8 | #' the type of response function 9 | #' @param psi a scalar, an importance parameter 10 | #' @param NB_params Starting values for the NB_params 11 | #' @param NB_params_noLab Starting values for the NB_params without label 12 | #' @param d an integer, the number of covariate parameters 13 | #' @param alphaK a matrix of environmental gradients of lower dimensions 14 | #' @param k an integer, the current dimension 15 | #' @param centMat a nLambda1s-by-d centering matrix 16 | #' @param nLambda an integer, number of lagrangian multipliers 17 | #' @param nLambda1s an integer, number of centering restrictions 18 | #' @param thetaMat a matrix of size n-by-p with estimated dispersion parameters 19 | #' @param muMarg an n-by-p offset matrix 20 | #' @param n an integer, the number of rows of X 21 | #' @param ncols a scalar, the number of columns of X 22 | #' @param preFabMat a prefabricated matrix 23 | #' @param envGradEst a character string, 24 | #' indicating how the environmental gradient should be fitted. 25 | #' 'LR' using the likelihood-ratio criterion, 26 | #' or 'ML' a full maximum likelihood solution 27 | #' @param allowMissingness A boolean, are missing values present 28 | #' @param naId The numeric index of the missing values in X 29 | #' @param ... Further arguments passed on to other functions 30 | #' 31 | #' @return A symmetric matrix, the evaluated Jacobian 32 | LR_nb_Jac = function(Alpha, X, CC, responseFun = c("linear", "quadratic", 33 | "nonparametric", "dynamic"), psi, NB_params, NB_params_noLab, d, alphaK, 34 | k, centMat, nLambda, nLambda1s, thetaMat, muMarg, n, ncols, preFabMat, 35 | envGradEst, allowMissingness, naId, ...) { 36 | did = seq_len(d) 37 | # Extract the parameters 38 | alpha = Alpha[did] 39 | lambda1s = Alpha[d + seq_len(nLambda1s)] 40 | # Multiple centering requirements now! 41 | lambda2 = Alpha[d + nLambda1s + 1] 42 | lambda3 = if (k == 1) { 43 | 0 44 | } else { 45 | Alpha[(d + nLambda1s + 2):(d + nLambda)] 46 | } 47 | sampleScore = CC %*% alpha 48 | # A linear combination of the environmental variables yields the 49 | # sampleScore 50 | design = buildDesign(sampleScore, responseFun) 51 | 52 | mu = muMarg * exp(design %*% NB_params * psi) 53 | if(allowMissingness){ 54 | X = correctXMissingness(X, mu, allowMissingness, naId) 55 | preFabMat = 1 + X/thetaMat 56 | } 57 | if (envGradEst == "LR") 58 | mu0 = muMarg * c(exp(design %*% NB_params_noLab * psi)) 59 | 60 | Jac = matrix(0, nrow = d + nLambda, ncol = d + nLambda) 61 | # dLag²/dalpha_{yk}dlambda_{1k} 62 | Jac[(d + seq_len(nLambda1s)), did] = centMat 63 | 64 | Jac[did, d + nLambda1s + 1] = 2 * alpha 65 | responseFun = switch(responseFun, dynamic = "quadratic", responseFun) 66 | 67 | if (responseFun == "linear") { 68 | tmp = rowMultiply(preFabMat * mu/(1 + mu/thetaMat)^2, NB_params[2, 69 | ]^2) 70 | if (envGradEst == "LR") { 71 | tmp0 = preFabMat * mu0/(1 + mu0/thetaMat)^2 * NB_params_noLab[2]^2 72 | } 73 | } else if (responseFun == "quadratic") { 74 | tmp = preFabMat * mu/(1 + mu/thetaMat)^2 75 | if (envGradEst == "LR") { 76 | tmp0 = preFabMat * mu0/(1 + mu0/thetaMat)^2 77 | } 78 | } 79 | # dLag²/ds_{ik}dlambda_{3kk'} 80 | if (k > 1) { 81 | Jac[(d + nLambda1s + 2):(d + nLambda), did] = alphaK 82 | } 83 | 84 | # Symmetrize 85 | Jac = Jac + t(Jac) 86 | cSam = c(sampleScore) 87 | cSam2 = cSam^2 88 | 89 | Jac[did, did] = switch(responseFun, linear = -psi^2 * 90 | (colSums(tensor(vapply(did, 91 | FUN.VALUE = tmp, function(x) { 92 | CC[, x] * switch(envGradEst, LR = (tmp - tmp0), ML = tmp) 93 | }), CC, 1, 1))), quadratic = switch(envGradEst, LR = 94 | colSums((tensor(vapply(did, 95 | FUN.VALUE = tmp, function(x) { 96 | CC[, x] * (-psi^2 * (tmp * (matrix(NB_params[2, ]^2, n, ncols, 97 | byrow = TRUE) + 4 * outer(cSam, NB_params[2, ] * NB_params[3, 98 | ]) + 4 * outer(cSam2, NB_params[3, ]^2)) - tmp0 * 99 | (NB_params_noLab[2] + 100 | NB_params_noLab[3] * 2 * cSam)^2) + 2 * psi * (rowMultiply((X - 101 | mu)/(1 + mu/thetaMat), NB_params[3, ]) - (X - mu0)/(1 + 102 | mu0/thetaMat) * NB_params_noLab[3])) 103 | }), CC, 1, 1))), ML = colSums(tensor(vapply(did, FUN.VALUE = tmp, 104 | function(x) { 105 | CC[, x] * (-psi^2 * (tmp * (matrix(NB_params[2, ]^2, n, ncols, 106 | byrow = TRUE) + 4 * outer(cSam, NB_params[2, ] * NB_params[3, 107 | ]) + 4 * outer(cSam2, NB_params[3, ]^2)) + 2 * psi * 108 | (rowMultiply((X - 109 | mu)/(1 + mu/thetaMat), NB_params[3, ])))) 110 | }), CC, 1, 1)))) 111 | 112 | diag(Jac)[did] = diag(Jac)[did] + 2 * lambda2 #Correct the diagonal 113 | Jac 114 | } 115 | -------------------------------------------------------------------------------- /R/F_NBalphaInfl.R: -------------------------------------------------------------------------------- 1 | #' Calculate the components of the influence functions 2 | #' 3 | #' @param rcm an rcm object 4 | #' @param Dim the required dimension 5 | #' 6 | #' @return An n-by-p-by-d array with the influence of every observation 7 | #' on every alpha parameter 8 | NBalphaInfl = function(rcm, Dim) { 9 | if (length(Dim) > 1) { 10 | stop("Influence of only one dimension at the time can be 11 | extratced! \n") 12 | } 13 | # Extract the parameters 14 | alpha = rcm$alpha[, Dim] 15 | centMat = buildCentMat(rcm) 16 | nLambda1s = NROW(centMat) 17 | lambdas = rcm$lambdasAlpha[seq_k(Dim, nLambda1s)] 18 | lambda1s = lambdas[seq_len(nLambda1s)] 19 | # Multiple centering requirements now! 20 | lambda2 = lambdas[nLambda1s + 1] 21 | lambda3 = if (Dim == 1) { 22 | 0 23 | } else { 24 | lambdas[-seq_len(nLambda1s)] 25 | } 26 | responseFun = rcm$responseFun 27 | CC = rcm$covariates 28 | X = rcm$X 29 | p = ncol(X) 30 | n = nrow(X) 31 | d = ncol(CC) 32 | envGradEst = if (is.null(rcm$envGradEst)) 33 | "LR" else rcm$envGradEst 34 | thetaMat = matrix(rcm$thetas[, paste0("Dim",Dim)], byrow = TRUE, n, p) 35 | NB_params = rcm$NB_params[, , Dim] 36 | NB_params_noLab = rcm$NB_params_noLab[, Dim] 37 | psi = rcm$psis[Dim] 38 | 39 | sampleScore = CC %*% alpha 40 | # A linear combination of the environmental variables yields the 41 | # sampleScore 42 | mu = extractE(rcm, seq_len(Dim)) 43 | X = correctXMissingness(X, mu, rcm$NApresent, is.na(X)) 44 | muMarg = extractE(rcm, seq_len(Dim - 1)) 45 | tmp = (X - mu)/(1 + mu/thetaMat) 46 | tmp2 = rowMultiply(tmp, NB_params[2, ]) 47 | 48 | if (envGradEst == "LR") { 49 | mu0 = muMarg * c(exp(buildDesign(sampleScore, responseFun) %*% 50 | NB_params_noLab * 51 | psi)) 52 | tmp0 = (X - mu0)/(1 + mu0/thetaMat) 53 | } 54 | # A n-by-p-by-d array 55 | score = switch(responseFun, linear = if (envGradEst == "LR") { 56 | psi * (vapply(seq_len(d), FUN.VALUE = tmp, function(i) { 57 | tmp2 * CC[, i] 58 | }) - NB_params_noLab[2] * vapply(seq_len(d), FUN.VALUE = tmp0, 59 | function(i) { 60 | tmp0 * CC[, i] 61 | })) 62 | } else { 63 | psi * (vapply(seq_len(d), FUN.VALUE = tmp, function(i) { 64 | tmp2 * CC[, i] 65 | })) 66 | }, quadratic = if (envGradEst == "LR") { 67 | psi * (vapply(seq_len(d), FUN.VALUE = tmp, function(i) { 68 | tmp2 * CC[, i] 69 | }) + 2 * vapply(seq_len(d), FUN.VALUE = tmp, function(i) { 70 | rowMultiply(tmp, NB_params[3, ]) * CC[, i] * c(sampleScore) 71 | }) - NB_params_noLab[2] * vapply(seq_len(d), FUN.VALUE = tmp0, 72 | function(i) { 73 | tmp0 * CC[, i] 74 | }) - 2 * NB_params_noLab[3] * vapply(seq_len(d), FUN.VALUE = tmp, 75 | function(i) { 76 | tmp0 * CC[, i] * c(sampleScore) 77 | })) 78 | } else { 79 | psi * (vapply(seq_len(d), FUN.VALUE = tmp, function(i) { 80 | tmp2 * CC[, i] 81 | }) - NB_params_noLab[2] * vapply(seq_len(d), FUN.VALUE = tmp0, 82 | function(i) { 83 | tmp0 * CC[, i] 84 | })) 85 | }, stop("Unknown response function provided! \n")) + 86 | rep(c(lambda1s %*% centMat) + lambda2 * 2 * alpha + 87 | if (Dim > 1) rowSums(rcm$alpha[, 88 | seq_len(Dim - 1), drop = FALSE] %*% lambda3) else 0, each = n *p) 89 | 90 | JacobianInv = -solve(LR_nb_Jac(Alpha = c(alpha, lambda1s, lambda2, lambda3), 91 | X = X, CC = CC, responseFun = responseFun, psi = psi, 92 | NB_params = NB_params, 93 | NB_params_noLab = NB_params_noLab, d = d, k = Dim, 94 | centMat = centMat, 95 | nLambda = nLambda1s + Dim, nLambda1s = nLambda1s, thetaMat = thetaMat, 96 | muMarg = extractE(rcm, seq_len(Dim - 1)), n = n, ncols = p, 97 | envGradEst = envGradEst, 98 | alphaK = rcm$alpha[, seq_len(Dim - 1), drop = FALSE], preFabMat = 1 + 99 | X/thetaMat, allowMissingness = rcm$NApresent, 100 | naId = is.na(rcm$X)))[seq_len(d), seq_len(d)] 101 | # Return only alpha indices, don't forget the minus sign! 102 | rownames(JacobianInv) = colnames(JacobianInv) = names(alpha) 103 | tensor(score, JacobianInv, 3, 1) 104 | } 105 | -------------------------------------------------------------------------------- /R/F_NBcolInfl.R: -------------------------------------------------------------------------------- 1 | #' The influence function for the column scores 2 | #' @param rcm an rcm object 3 | #' @param Dim the required dimension 4 | #' 5 | #' @return A list with components 6 | #' \item{score}{a matrix with components of the score function} 7 | #' \item{InvJac}{A square matrix of dimension p with the components of the 8 | #' Jacobian related to the column scores} 9 | NBcolInfl = function(rcm, Dim = 1) { 10 | reg = rcm$psis[Dim] * rcm$rMat[, Dim] 11 | mu = extractE(rcm, seq_len(Dim)) 12 | rcm$X = correctXMissingness(rcm$X, mu, rcm$NApresent) 13 | #Take also lower dimensions into account here 14 | thetaMat = matrix(byrow = TRUE, nrow = nrow(rcm$X), 15 | ncol = ncol(rcm$X), data = rcm$thetas[, 16 | switch(as.character(Dim), `0` = "Independence", 17 | `0.5` = "Filtered", paste0("Dim", 18 | Dim))]) 19 | lambdaCol = rcm$lambdaCol[seq_k(Dim)] 20 | cMatK = rcm$cMat[seq_len(Dim - 1), , 21 | drop = FALSE] 22 | tmp = if (Dim > 1) 23 | lambdaCol[-c(1, 2)] %*% cMatK else 0 24 | 25 | score = t(t((reg * (rcm$X - mu)/(1 + mu/thetaMat))) + 26 | rcm$colWeights * (lambdaCol[1] + 27 | lambdaCol[2] * 2 * rcm$cMat[Dim,] + 28 | tmp)) 29 | 30 | JacobianInv = solve(NBjacobianCol(beta = c(rcm$cMat[Dim, 31 | ], lambdaCol), X = rcm$X, reg = reg, 32 | thetas = thetaMat, muMarg = mu, k = Dim, 33 | p = nrow(rcm$X), n = ncol(rcm$X), 34 | colWeights = rcm$colWeights, nLambda = length(lambdaCol), 35 | cMatK = cMatK, allowMissingness = rcm$NApresent)) 36 | # Inverse Jacobian 37 | 38 | # After a long thought: The X's do not 39 | # affect the estimation of the lambda 40 | # parameters! Matrix becomes too large: 41 | # return score and inverse jacobian 42 | return(list(score = score, InvJac = JacobianInv[seq_len(ncol(rcm$X)), 43 | seq_len(ncol(rcm$X))])) 44 | } 45 | -------------------------------------------------------------------------------- /R/F_NBjacobianAbunds.R: -------------------------------------------------------------------------------- 1 | #'Jacobian for the column components of the independence model 2 | #' 3 | #'@param beta a vector of length p with current abundance estimates 4 | #'@param X a n-by-p count matrix 5 | #'@param reg a vector of length n with library sizes estimates 6 | #'@param thetas a n-by-p matrix with overdispersion estimates in the rows 7 | #' @param allowMissingness A boolean, are missing values present 8 | #' @param naId The numeric index of the missing values in X 9 | #' 10 | #'@return a diagonal matrix of dimension p with evaluations 11 | #'of the jacobian function 12 | NBjacobianAbundsOld = function(beta, X, reg, 13 | thetas, allowMissingness, naId) { 14 | mu = exp(outer(reg, beta, "+")) 15 | X = correctXMissingness(X, mu, allowMissingness, naId) 16 | -diag(colSums((1 + (X/thetas)) * mu/(1 + 17 | (mu/thetas))^2)) 18 | } 19 | NBjacobianAbunds = function(beta, X, reg, 20 | thetas, allowMissingness, naId) { 21 | mu = exp(reg+beta) 22 | X = correctXMissingness(X, mu, allowMissingness, naId) 23 | -as.matrix(sum((1 + (X/thetas)) * mu/(1 + 24 | (mu/thetas))^2)) 25 | } 26 | -------------------------------------------------------------------------------- /R/F_NBjacobianCol.R: -------------------------------------------------------------------------------- 1 | #' Jacobian for the estimation of the column scores 2 | #' 3 | #' @param beta vector of length p+1+1+(k-1): p row scores, 1 centering, 4 | #' one normalization 5 | #' and (k-1) orhtogonality lagrangian multipliers 6 | #' @param X the nxp data matrix 7 | #' @param reg a nx1 regressor matrix: outer product of rowScores and psis 8 | #' @param thetas nxp matrix with the dispersion parameters 9 | #' (converted to matrix for numeric reasons) 10 | #' @param muMarg the nxp offset 11 | #' @param k an integer, the dimension of the RC solution 12 | #' @param p an integer, the number of taxa 13 | #' @param n an integer, the number of samples 14 | #' @param nLambda an integer, the number of restrictions 15 | #' @param colWeights the weights used for the restrictions 16 | #' @param cMatK the lower dimensions of the colScores 17 | #' @param preFabMat a prefab matrix, (1+X/thetas) 18 | #' @param Jac an empty Jacobian matrix 19 | #' @param allowMissingness A boolean, are missing values present 20 | #' @param naId The numeric index of the missing values in X 21 | #' 22 | #' @return A matrix of dimension p+1+1+(k-1) with evaluations of the Jacobian 23 | NBjacobianColOld = function(beta, X, reg, thetas, 24 | muMarg, k, n, p, colWeights, nLambda, 25 | cMatK, preFabMat, Jac, allowMissingness, naId) { 26 | cMat = beta[seq_len(p)] 27 | 28 | # Calculate the mean 29 | mu = exp(reg %*% cMat) * muMarg 30 | 31 | # The symmetric jacobian matrix. The 32 | # upper part is filled first, then mirror 33 | # image is taken for lower triangle 34 | 35 | # dLag²/dr_{ik}dlambda_{1k} 36 | Jac[seq_len(p), p + 2] = Jac[p + 2, seq_len(p)] = colWeights * 37 | 2 * cMat 38 | 39 | if(allowMissingness){ 40 | preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/thetas 41 | } 42 | 43 | # dLag²/dr_{ik}² 44 | diag(Jac)[seq_len(p)] = -crossprod(preFabMat * 45 | mu/(1 + mu/thetas)^2, reg^2) + 2 * 46 | beta[p + 2] * colWeights 47 | Jac 48 | } 49 | NBjacobianCol = function(beta, X, reg, thetas, 50 | muMarg, k, n, p, colWeights, nLambda, 51 | cMatK, preFabMat, Jac, allowMissingness, naId) { 52 | # Calculate the mean 53 | mu = exp(reg *beta) * muMarg 54 | if(allowMissingness){ 55 | preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/thetas 56 | } 57 | as.matrix(-sum(preFabMat * mu/(1 + mu/thetas)^2 + reg^2)) 58 | } 59 | -------------------------------------------------------------------------------- /R/F_NBjacobianColNP.R: -------------------------------------------------------------------------------- 1 | #' Jacobian function for the estimation of a third degree GLM 2 | #' 3 | #' @param beta vector of any length 4 | #' @param X the data vector of length n 5 | #' @param reg a nxlength(beta) regressor matrix 6 | #' @param theta a scalar, the overdispersion 7 | #' @param muMarg the offset of length n 8 | #' 9 | #' @return A matrix of dimension 8-by-8 10 | NBjacobianColNP = function(beta, X, reg, 11 | theta, muMarg) { 12 | # Calculate the mean 13 | mu = exp(reg %*% beta) * muMarg 14 | # Return the Jacobian 15 | -crossprod(reg * c((1 + X/theta) * mu/(1 + 16 | mu/theta)^2), reg) 17 | } 18 | -------------------------------------------------------------------------------- /R/F_NBjacobianLibSizes.R: -------------------------------------------------------------------------------- 1 | #' Jacobian for the raw components of the independence model 2 | #' 3 | #'@param beta a vector of length n with current library size estimates 4 | #'@param X a n-by-p count matrix 5 | #'@param reg a vector of length p with relative abundance estimates 6 | #'@param thetas a n-by-p matrix with overdispersion estimates in the rows 7 | #' @param allowMissingness A boolean, are missing values present 8 | #' @param naId The numeric index of the missing values in X 9 | #' 10 | #'@return a diagonal matrix of dimension n: the Fisher information matrix 11 | NBjacobianLibSizes = function(beta, X, reg, 12 | thetas, allowMissingness, naId) { 13 | mu = exp(outer(beta, reg, "+")) 14 | X = correctXMissingness(X, mu, allowMissingness, naId) 15 | diag(-rowSums((1 + (X/thetas)) * mu/(1 + 16 | (mu/thetas))^2)) 17 | } 18 | -------------------------------------------------------------------------------- /R/F_NBjacobianPsi.R: -------------------------------------------------------------------------------- 1 | #' Jacobian for the psi of a given dimension 2 | #' 3 | #' @param beta a scalar, the current estimate 4 | #' @param X the n-by-p count matrix 5 | #' @param muMarg the nxp offset matrix 6 | #' @param reg the regressor matrix, 7 | #' the outer product of current row and column scores 8 | #' @param theta a n-by-p matrix with the dispersion parameters 9 | #' @param preFabMat a prefab matrix, (1+X/thetas) 10 | #' @param allowMissingness A boolean, are missing values present 11 | #' @param naId The numeric index of the missing values in X 12 | #' 13 | #' @return The evaluation of the jacobian function at beta, a 1-by-1 matrix 14 | NBjacobianPsi = function(beta, X, reg, muMarg, 15 | theta, preFabMat, allowMissingness, naId) { 16 | mu = muMarg * exp(reg * beta) 17 | if(allowMissingness){ 18 | preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/theta 19 | } 20 | matrix(-sum(reg^2 * preFabMat * mu/(1 + 21 | mu/theta)^2), 1, 1) 22 | } 23 | -------------------------------------------------------------------------------- /R/F_NBjacobianRow.R: -------------------------------------------------------------------------------- 1 | #' A jacobian function of the NB for the row scores 2 | #' 3 | #' @param beta a vector of of length n + k +1 regression parameters to optimize 4 | #' @param X the data matrix of dimensions nxp 5 | #' @param reg a 1xp regressor matrix: outer product of column scores and psis 6 | #' @param thetas nxp matrix with the dispersion parameters 7 | #' (converted to matrix for numeric reasons) 8 | #' @param muMarg an nxp offset matrix 9 | #' @param k a scalar, the dimension of the RC solution 10 | #' @param p a scalar, the number of taxa 11 | #' @param n a scalar, the number of samples 12 | #' @param rowWeights a vector of length n, the weights used for the restrictions 13 | #' @param nLambda an integer, the number of lagrangian multipliers 14 | #' @param rMatK the lower dimension row scores 15 | #' @param preFabMat a prefab matrix, (1+X/thetas) 16 | #' @param Jac an empty Jacobian matrix 17 | #' @param allowMissingness A boolean, are missing values present 18 | #' @param naId The numeric index of the missing values in X 19 | #' 20 | #' @return a symmetric jacobian matrix of size n+k + 1 21 | NBjacobianRow = function(beta, X, reg, thetas, 22 | muMarg, k, n, p, rowWeights, nLambda, 23 | rMatK, preFabMat, Jac, allowMissingness, naId) { 24 | rMat = beta[seq_len(n)] 25 | mu = exp(rMat %*% reg) * muMarg 26 | 27 | if(allowMissingness){ 28 | preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/thetas 29 | } 30 | 31 | # dLag²/dr_{ik}dlambda_{1k} already 32 | # happened dLag²/dr_{ik}dlambda_{2k} 33 | Jac[seq_len(n), n + 2] = Jac[n + 2, seq_len(n)] = 2 * 34 | rMat * rowWeights 35 | # dLag²/dr_{ik}dlambda_{3kk'} already 36 | # happened 37 | 38 | # dLag²/dr_{ik}² 39 | diag(Jac)[seq_len(n)] = -tcrossprod(reg^2, 40 | preFabMat * mu/(1 + mu/thetas)^2) + 41 | 2 * rowWeights * beta[n + 2] 42 | Jac 43 | } 44 | -------------------------------------------------------------------------------- /R/F_NBpsiInfl.R: -------------------------------------------------------------------------------- 1 | #' The influence function for the psis 2 | #' 3 | #' @param rcm an rcm object 4 | #' @param Dim the required dimensions 5 | #' 6 | #' @return The influence of every single observation 7 | #' on the psi value of this dimension 8 | NBpsiInfl = function(rcm, Dim = 1) { 9 | mu = extractE(rcm, seq_len(Dim)) 10 | # Take also lower dimensions into account 11 | # here 12 | thetaMat = matrix(byrow = TRUE, nrow = nrow(rcm$X), 13 | ncol = ncol(rcm$X), data = rcm$thetas[, 14 | switch(as.character(Dim), `0` = "Independence", 15 | `0.5` = "Filtered", paste0("Dim", 16 | Dim))]) 17 | reg = if (is.null(rcm$covariates)) { 18 | rcm$rMat[, Dim, drop = FALSE] %*% rcm$cMat[Dim,, drop = FALSE] 19 | } else {getRowMat(sampleScore = rcm$covariates %*% rcm$alpha[, Dim], 20 | responseFun = rcm$responseFun, 21 | NB_params = rcm$NB_params[, , Dim])} 22 | rcm$X = correctXMissingness(rcm$X, mu, rcm$NApresent, naId = is.na(rcm$X)) 23 | 24 | -((rcm$X - mu) * (thetaMat + mu))/(reg * 25 | (thetaMat + rcm$X) * mu) 26 | } 27 | -------------------------------------------------------------------------------- /R/F_NBrowInfl.R: -------------------------------------------------------------------------------- 1 | #' The influence function for the row scores 2 | #' 3 | #' @param rcm an rcm object 4 | #' @param Dim the required dimension 5 | #' 6 | #' @return A list with components 7 | #' \item{score}{a matrix with components of the score function} 8 | #' \item{InvJac}{A square matrix of dimension n with the components 9 | #' of the Jacobian related to the row scores} 10 | NBrowInfl = function(rcm, Dim = 1) { 11 | reg = rcm$psis[Dim] * rcm$cMat[Dim, ] 12 | mu = extractE(rcm, seq_len(Dim)) 13 | rcm$X = correctXMissingness(rcm$X, mu, rcm$NApresent) 14 | #Take also lower dimensions into account here 15 | thetaMat = matrix(byrow = TRUE, nrow = nrow(rcm$X), 16 | ncol = ncol(rcm$X), data = rcm$thetas[, 17 | switch(as.character(Dim), `0` = "Independence", 18 | `0.5` = "Filtered", paste0("Dim", 19 | Dim))]) 20 | lambdaRow = rcm$lambdaRow[seq_k(Dim)] 21 | rMatK = rcm$rMat[, seq_len(Dim - 1), 22 | drop = FALSE] 23 | tmp = if (Dim > 1) 24 | rcm$lambdaRow[-c(1, 2)] %*% rMatK else 0 25 | 26 | score = reg * (rcm$X - mu)/(1 + mu/thetaMat) + 27 | c(rcm$rowWeights * (lambdaRow[1] + 28 | lambdaRow[2] * 2 * rcm$rMat[, 29 | Dim] + tmp)) 30 | 31 | JacobianInv = solve(NBjacobianRow(beta = c(rcm$rMat[, 32 | Dim], lambdaRow), X = rcm$X, reg = reg, 33 | thetas = thetaMat, muMarg = mu, k = Dim, 34 | p = ncol(rcm$X), n = nrow(rcm$X), 35 | rowWeights = rcm$rowWeights, nLambda = Dim + 36 | 1, rMatK = rMatK, allowMissingness = rcm$NApresent, 37 | naId = is.na(rcm$X))) 38 | # Inverse Jacobian 39 | 40 | # After a long thought: The X's do not 41 | # affect the estimation of the lambda 42 | # parameters! Matrix of all influences 43 | # becomes too large: return score and 44 | # inverse jacobian 45 | return(list(score = score, InvJac = JacobianInv[seq_len(nrow(rcm$X)), 46 | seq_len(nrow(rcm$X))])) 47 | } 48 | -------------------------------------------------------------------------------- /R/F_addOrthProjection.R: -------------------------------------------------------------------------------- 1 | #' This function adds orthogonal projections to a given plot 2 | #' 3 | #' @param RCMplot the RCMplot object 4 | #' @param sample,species,variable names or approximate coordinates of sample, 5 | #' species or variable 6 | #' @param Dims The dimensions of the solutions that have been plotted 7 | #' @param addLabel a boolean, should the r-s-psi label be added? 8 | #' @param labPos the position of the label. Will be calculated if not provided 9 | #' 10 | #' @return a modified ggplot object that contains the geom_segment object 11 | #' that draws the projection 12 | #' @export 13 | #' @import ggplot2 14 | #' @import phyloseq 15 | #' @seealso \code{\link{plot.RCM}} 16 | #' @examples 17 | #' data(Zeller) 18 | #' require(phyloseq) 19 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(100)], 20 | #' prune_samples(sample_names(Zeller)[seq_len(50)], Zeller)) 21 | #' zellerRCM = RCM(tmpPhy, k = 2, round = TRUE) 22 | #' zellerPlot = plot(zellerRCM, returnCoords = TRUE) 23 | #' addOrthProjection(zellerPlot, species = c(-0.35,1.1), sample = c(1,1.2)) 24 | addOrthProjection = function(RCMplot, sample = NULL, species = NULL, 25 | variable = NULL, Dims = c(1,2), addLabel = FALSE, 26 | labPos = NULL) { 27 | nulls = is.null(sample) + is.null(species) + 28 | is.null(variable) 29 | if (nulls != 1) 30 | stop("Provide two variables categories for a projection! \n") 31 | if (is.null(species)) 32 | stop("Species should be provided, 33 | cannot project sample onto variable vector! \n") 34 | 35 | dimNames = paste0("Dim", Dims) 36 | if (is.numeric(sample)) { 37 | samp = which.min(colSums((t(RCMplot$samples[, 38 | dimNames]) - sample)^2)) 39 | # Closest to approximate coordinate 40 | sampName = rownames(RCMplot$samples)[samp] 41 | } else { 42 | sampName = sample 43 | } 44 | 45 | if (is.numeric(species)) { 46 | species = which.min(colSums((t(RCMplot$species[, 47 | paste0("end", Dims)]) - species)^2)) 48 | # Closest to approximate coordinate 49 | speciesName = rownames(RCMplot$species)[species] 50 | } else { 51 | speciesName = species 52 | } 53 | 54 | if (is.numeric(variable)) { 55 | variable = which.min(colSums((t(RCMplot$variables[, 56 | dimNames]) - species)^2)) 57 | # Closest to approximate coordinate 58 | varName = rownames(RCMplot$variables)[variable] 59 | } else { 60 | varName = variable 61 | } 62 | 63 | mat1 = unlist(if (is.null(variable)) { 64 | RCMplot$samples[sampName, dimNames] 65 | } else { 66 | RCMplot$variables[varName, dimNames] 67 | }) 68 | mat2 = unlist(RCMplot$species[speciesName, 69 | c(vapply(Dims, FUN.VALUE = character(2), 70 | function(x) { 71 | c(paste0("end", x), paste0("origin",x)) 72 | }))]) 73 | 74 | RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE, 75 | mapping = aes_string(x = 0, y = 0, 76 | xend = "Dim1", yend = "Dim2"), 77 | data = data.frame(t(mat1))) #The sample or variable vector 78 | IntCoordsXTip = (mat2["end1"] + mat2["end2"] * 79 | mat1[2]/mat1[1])/((mat1[2]/mat1[1])^2 + 80 | 1) 81 | IntCoordsYTip = IntCoordsXTip * mat1[2]/mat1[1] 82 | 83 | IntCoordsXStart = (mat2["origin1"] + 84 | mat2["origin2"] * mat1[2]/mat1[1])/((mat1[2]/mat1[1])^2 + 85 | 1) 86 | IntCoordsYStart = IntCoordsXStart * mat1[2]/mat1[1] 87 | 88 | dfTip = data.frame(x = mat2[grep(names(mat2), 89 | pattern = "end")][1], y = mat2[grep(names(mat2), 90 | pattern = "end")][2], xend = IntCoordsXTip, 91 | yend = IntCoordsYTip) 92 | dfStart = data.frame(x = mat2[grep(names(mat2), 93 | pattern = "origin")][1], y = mat2[grep(names(mat2), 94 | pattern = "origin")][2], xend = IntCoordsXStart, 95 | yend = IntCoordsYStart) 96 | 97 | RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE, 98 | mapping = aes_string(x = "x", y = "y", 99 | xend = "xend", yend = "yend"), 100 | data = dfTip, linetype = "dashed") 101 | RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE, 102 | mapping = aes_string(x = "x", y = "y", 103 | xend = "xend", yend = "yend"), 104 | data = dfStart, linetype = "dashed") 105 | 106 | # Add a red line for the projection 107 | dfRed = data.frame(xend = IntCoordsXTip, 108 | yend = IntCoordsYTip, x = IntCoordsXStart, 109 | y = IntCoordsYStart) 110 | RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE, 111 | col = "orange", mapping = aes_string(x = "x", 112 | y = "y", xend = "xend", yend = "yend"), 113 | data = dfRed) 114 | 115 | if (addLabel) { 116 | # Add some annotation 117 | labPos = if (is.null(labPos)) { 118 | apply(RCMplot$samples[, dimNames], 119 | 2, min) * 1.1 120 | } else { 121 | labPos 122 | } 123 | xLab = labPos[1] 124 | yLab = labPos[2] 125 | dfRed = within(dfRed, { 126 | xLab = xLab * 2 127 | yLab = yLab * 2 128 | }) 129 | RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE, 130 | mapping = aes_string(x = "xLab", 131 | y = "yLab", xend = "xend", 132 | yend = "yend"), data = dfRed/2, 133 | arrow = arrow(length = unit(0.2, 134 | "cm")), size = 0.25) + annotate("text", 135 | col = "orange", label = "r~psi~s", 136 | x = xLab, y = yLab, parse = TRUE, 137 | size = 7) 138 | } 139 | 140 | RCMplot$plot 141 | } 142 | -------------------------------------------------------------------------------- /R/F_arrayProd.R: -------------------------------------------------------------------------------- 1 | #' An auxiliary R function to 'array' multiply an array with a vector, 2 | #' kindly provided by Joris Meys 3 | #' 4 | #' @param x a axbxc array 5 | #' @param y a vector of length c 6 | #' 7 | #' @return a axb matrix. The ij-th element equals sum(x[i,j,]*y) 8 | arrayprod <- function(x, y) { 9 | 10 | xdim <- dim(x) 11 | outdim <- xdim[c(1, 2)] 12 | outn <- prod(outdim) 13 | 14 | yexpand <- rep(y, each = outn) 15 | 16 | tmp <- x * yexpand 17 | 18 | dim(tmp) <- c(outn, xdim[3]) 19 | out <- rowSums(tmp) 20 | 21 | dim(out) <- outdim 22 | 23 | out 24 | } 25 | -------------------------------------------------------------------------------- /R/F_buildCentMat.R: -------------------------------------------------------------------------------- 1 | #' A function to build a centering matrix based on a dataframe 2 | #' 3 | #' @param object an rcm object or dataframe 4 | #' 5 | #' @return a centering matrix consisting of ones and zeroes, 6 | #' or a list with components 7 | #' \item{centMat}{a centering matrix consisting of ones and zeroes} 8 | #' \item{datFrame}{The dataframe with factors with one level removed} 9 | buildCentMat = function(object) { 10 | if (is.data.frame(object)) { 11 | nFactorLevels = vapply(object, FUN.VALUE = double(1), 12 | function(x) { 13 | if (is.factor(x)) 14 | nlevels(x) else 1 15 | }) 16 | # Number of levels per factor 17 | oneLevelID = vapply(object, FUN.VALUE = TRUE, function(x) { 18 | length(unique(x)) == 1 19 | }) 20 | object[, oneLevelID] = NULL #Drop factors with one level 21 | if (any(oneLevelID)) { 22 | warning("The following variables were not included 23 | in the analyses because they have only one value: \n", 24 | paste(object[oneLevelID], sep = " \n"), immediate. = TRUE) 25 | } 26 | } else if (is(object, "RCM")) { 27 | nFactorLevels = vapply(FUN.VALUE = integer(1), unique(object$attribs), 28 | function(x) { 29 | sum(object$attribs == x) 30 | }) #Number of levels per factor 31 | 32 | } else { 33 | stop("Invalid object supplied! \n") 34 | } 35 | # Already prepare the matrix that defines the equations for 36 | # centering the coefficients of the dummy variables 37 | centMat = t(vapply(FUN.VALUE = numeric(sum(nFactorLevels)), 38 | seq_along(nFactorLevels), function(i) { 39 | c(rep.int(0L, sum(nFactorLevels[seq(0, i - 1)])), 40 | rep.int(if (nFactorLevels[i] == 1) 0L else 1L, 41 | nFactorLevels[i]), rep.int(0L, sum(nFactorLevels[-seq(1, 42 | i)]))) 43 | })) 44 | centMat = if (all(rowSums(centMat) == 0)) { 45 | matrix(0L, 1, sum(nFactorLevels)) 46 | } else { 47 | centMat[rowSums(centMat) > 0, , drop = FALSE] 48 | } 49 | if (is.data.frame(object)) 50 | return(list(centMat = centMat, datFrame = object)) else return(centMat) 51 | } 52 | -------------------------------------------------------------------------------- /R/F_buildConfMat.R: -------------------------------------------------------------------------------- 1 | #' A function to build the confounder matrices 2 | #' 3 | #' @param x a matrix, data frame or character string 4 | #' @param ... further arguments passed on to other methods 5 | #' 6 | #' For the preliminary trimming, we do not include an intercept, 7 | #' but we do include all the levels of the factors using contrasts=FALSE: 8 | #' we want to do the trimming in every subgroup, so no hidden reference levels 9 | #' For the filtering we just use a model with an intercept and 10 | #' treatment coding, here the interest is only in adjusting the offset 11 | #' 12 | #' @return a list with components 13 | #' \item{confModelMatTrim}{A confounder matrix without intercept, with all 14 | #' levels of factors present. This will be used to trim out taxa that have 15 | #' zero abundances in any subgroup defined by confounders} 16 | #' \item{confModelMat}{A confounder matrix with intercept, 17 | #' and with reference levels for factors absent. 18 | #' This will be used to fit the model to modify the independence model, 19 | #' and may include continuous variables} 20 | #' @importFrom stats model.matrix 21 | buildConfMat = function(x, ...) { 22 | UseMethod("buildConfMat", x) 23 | } 24 | 25 | #' buildConfMat.data.frame 26 | #' 27 | #' @param confounders a data frame of confounders 28 | #' @param n the number of rows of the count matrix 29 | #' 30 | #' @return see buidConfMat 31 | buildConfMat.data.frame = function(confounders, n) { 32 | if (n != NROW(confounders)) { 33 | # Check dimensions 34 | stop("Data and confounder matrix do not have the same number 35 | of samples! \n") 36 | } 37 | if (anyNA(confounders)) { 38 | stop("Confounders contain missing values!\n") 39 | } 40 | #Check alias structure 41 | checkAlias(confounders, names(confounders)) 42 | # No intercept or continuous variables for preliminary 43 | # trimming 44 | confModelMatTrim = model.matrix(object = as.formula(paste("~", 45 | paste(names(confounders)[vapply(FUN.VALUE = TRUE, confounders, 46 | is.factor)], collapse = "+"), "-1")), data = confounders, 47 | contrasts.arg = lapply(confounders[vapply(FUN.VALUE = TRUE, 48 | confounders, is.factor)], contrasts, contrasts = FALSE)) 49 | # With intercept for filtering 50 | confModelMat = model.matrix(object = as.formula(paste("~", 51 | paste(names(confounders), collapse = "+"))), data = confounders, 52 | contrasts.arg = lapply(confounders[vapply(FUN.VALUE = TRUE, 53 | confounders, is.factor)], contrasts, contrasts = TRUE)) 54 | list(confModelMatTrim = confModelMatTrim, confModelMat = confModelMat) 55 | } 56 | #' buildConfMat.character 57 | #' @param confounders a numeric matrix of confounders 58 | #' @param physeq a physeq object with a sample_data slot 59 | #' 60 | #' @return see buidConfMat.numeric 61 | buildConfMat.character = function(confounders, physeq) { 62 | if (!is(physeq, "phyloseq")) { 63 | stop("Providing confounders through variable names is only allowed 64 | if phyloseq object is provided! \n") 65 | } 66 | confounders = as(sample_data(physeq),"data.frame")[,make.names(confounders), 67 | drop = FALSE] 68 | # The dataframe with the confounders 69 | buildConfMat(confounders, n = nsamples(physeq)) 70 | } 71 | buildConfMat.default = function(...) { 72 | stop("Please provide the confounders either as 73 | dataframe, or character string! \n") 74 | } 75 | -------------------------------------------------------------------------------- /R/F_buildCovMat.R: -------------------------------------------------------------------------------- 1 | #' A function to build the covariate matrix of the constraints 2 | #' 3 | #' @param covariates the covariates, either as dataframe or as character string 4 | #' @param dat the phyloseq object 5 | #' 6 | #' In this case we will 1) Include dummy's for every level of the 7 | #' categorical variable, and force them to sum to zero. 8 | #' This is needed for plotting and required for 9 | #' reference level independent normalization. 10 | #' 2) Exclude an intercept. The density function f() 11 | #' will provide this already. 12 | #' 13 | #' @return a list with components 14 | #' \item{covModelMat}{The model matrix} 15 | #' \item{datFrame}{The dataframe used to construct the model matrix} 16 | #' @importFrom stats model.matrix formula 17 | buildCovMat = function(covariates, dat) { 18 | if (is.data.frame(covariates)) { 19 | datFrame = covariates 20 | covariatesNames = names(covariates) 21 | } else if (is.character(covariates)) { 22 | if (!is(dat, "phyloseq")) { 23 | stop("Providing covariates through variable names is only allowed 24 | if phyloseq object is provided! \n") 25 | } 26 | covariates = make.names(covariates) #Ensure valid names 27 | if (covariates[[1]] == "all") { 28 | covariates = as(sample_variables(dat), "data.frame") 29 | } 30 | # Enable the 'all' option if phyloseq object is provided 31 | datFrame = as(sample_data(dat), "data.frame")[, covariates, 32 | drop = FALSE] 33 | # The dataframe with the covariates 34 | covariatesNames = covariates 35 | } else { 36 | stop("Please provide the covariates either as dataframe 37 | or as character string! \n") 38 | } 39 | if (nsamples(dat) != NROW(datFrame)) { 40 | # Check dimensions 41 | stop("Data and covariate matrix do not have 42 | the same number of samples! \n") 43 | } 44 | logVec = vapply(FUN.VALUE = TRUE, datFrame, is.logical) 45 | intVec = vapply(FUN.VALUE = TRUE, datFrame, is.integer) 46 | charVec = vapply(FUN.VALUE = TRUE, datFrame, is.character) 47 | 48 | if (any(logVec)) { 49 | datFrame[, logVec] = lapply(datFrame[logVec], as.factor) 50 | # Convert logicals to factors warning('Logicals converted to 51 | # factors! \n', immediate. = TRUE). No warning needed 52 | } 53 | if (any(intVec)) { 54 | datFrame[, intVec] = lapply(datFrame[intVec], as.numeric) 55 | # Convert integers to numeric 56 | warning("Integer values treated as numeric! \n", immediate. = TRUE) 57 | } 58 | if (any(charVec)) { 59 | datFrame[, charVec] = lapply(datFrame[charVec], factor) 60 | # Convert characters to factor 61 | warning("Character vectors treated as factors! \n", immediate. = TRUE) 62 | } 63 | #Drop unused levels 64 | datFrame = droplevels(datFrame) 65 | nFactorLevels = vapply(FUN.VALUE = integer(1), datFrame, 66 | function(x) { 67 | if (is.factor(x)) 68 | nlevels(x) else 0L 69 | }) #Number of levels per factor 70 | singleFacID = vapply(FUN.VALUE = TRUE, datFrame, is.factor) & 71 | (nFactorLevels == 1L) 72 | if (any(singleFacID)) { 73 | warning("The following variables were not included in the analyses 74 | because they are factors with only one level: \n", 75 | paste(covariates[singleFacID], sep = " \n"), 76 | immediate. = TRUE, call. = FALSE) 77 | # Drop factors with one level 78 | covariatesNames = covariatesNames[!singleFacID] 79 | nFactorLevels = nFactorLevels[covariatesNames] 80 | datFrame = datFrame[, covariatesNames, drop = FALSE] 81 | } 82 | #Check for alias structures 83 | checkAlias(datFrame, covariatesNames) 84 | 85 | # Center and scale the continuous covariates 86 | datFrame[vapply(FUN.VALUE = TRUE, datFrame, is.numeric)] = 87 | scale(datFrame[vapply(FUN.VALUE = TRUE, datFrame, is.numeric)]) 88 | 89 | covModelMat = model.matrix( 90 | object = formula(paste("~", paste(covariatesNames, 91 | collapse = "+"), "-1")), data = datFrame, 92 | contrasts.arg = lapply(datFrame[vapply(datFrame, 93 | is.factor, FUN.VALUE = TRUE)], contrasts, contrasts = FALSE)) 94 | if (NCOL(covModelMat) == 1) 95 | stop("A constrained ordination with a variable 96 | with only one level is meaningless.\n 97 | Please provide more covariates or perform an unconstrained analysis.", 98 | call. = FALSE) 99 | list(covModelMat = covModelMat, datFrame = datFrame) 100 | } 101 | -------------------------------------------------------------------------------- /R/F_buildDesign.R: -------------------------------------------------------------------------------- 1 | #' A function to build the design matrix 2 | #' 3 | #' @param sampleScore a vector of environmental scores 4 | #' @param responseFun A character string, indicating the shape 5 | #' of the response function 6 | #' 7 | #' For dynamic response function estimation, the same desing matrix 8 | #' as for the quadratic one is returned. 9 | #' Will throw an error when an unknown repsonse function is provided 10 | #' 11 | #' @return A design matrix of dimension n-by-f 12 | #' @importFrom stats model.matrix 13 | buildDesign = function(sampleScore, responseFun) { 14 | # With intercept 15 | design = switch(responseFun, linear = model.matrix(~sampleScore), 16 | quadratic = model.matrix(~sampleScore + 17 | I(sampleScore^2)), dynamic = model.matrix(~sampleScore + 18 | I(sampleScore^2)), stop("Unknown response function")) 19 | } 20 | -------------------------------------------------------------------------------- /R/F_checkAlias.R: -------------------------------------------------------------------------------- 1 | #' Check for alias structures in a dataframe, and throw an error when one is found 2 | #' @param datFrame the data frame to be checked for alias structure 3 | #' @param covariatesNames The names of the variables to be considered 4 | #' 5 | #' @return Throws an error when an alias structure is detected, 6 | #' returns invisible otherwise 7 | #' 8 | #' @importFrom stats alias formula 9 | #' @export 10 | #' @examples 11 | #' #Make a dataframe with aliased variables 12 | #' df = data.frame(foo = rnorm(10), baa = rep(c(TRUE, FALSE), each = 5), 13 | #' foo2 = factor(rep(c("male", "female"), each = 5))) 14 | #' checkAlias(df, c("foo", "baa")) 15 | #' #Check test files for the error being thrown 16 | checkAlias = function(datFrame, covariatesNames){ 17 | mockDf = cbind("Mock" = 1, datFrame) 18 | #Fake dataframe for syntax purposes 19 | Alias = alias(object = formula(paste("Mock~", 20 | paste(covariatesNames, collapse = "+"), 21 | "-1")), mockDf) 22 | if(!is.null(Alias$Complete)){ 23 | stop("Sample variables\n'", paste(rownames(Alias$Complete), 24 | collapse ="' and '"), 25 | "'\nare aliased with other variables. 26 | Drop some sample-variables and try again.") 27 | } else { 28 | return(invisible()) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /R/F_constrCorresp.R: -------------------------------------------------------------------------------- 1 | #' Constrained correspondence analysis with adapted powers 2 | #' 3 | #' @param X outcome matrix 4 | #' @param Y constraining matrix 5 | #' @param rowExp,colExp see ?RCM_NB 6 | #' @param muMarg mean matrix under independence model 7 | #' 8 | #' @return a list with eigenvalues, aliased variables and environmentam gradients 9 | #' @details the vegan version, adapted for flexible powers rowExp and colExp 10 | #' @importFrom stats weighted.mean 11 | constrCorresp = function(X, Y, rowExp, colExp, 12 | muMarg = outer(rowSums(X), colSums(X))/sum(X)){ 13 | X = X/sum(X) 14 | RW = rowSums(X) 15 | CW = colSums(X) 16 | Xinit = diag(1/RW^rowExp) %*% (X - muMarg/sum(muMarg)) %*% diag(1/CW^colExp) 17 | ZERO <- sqrt(.Machine$double.eps) 18 | envcentre <- apply(Y, 2, weighted.mean, w = RW) 19 | Y <- scale(Y, center = envcentre, scale = FALSE) 20 | Y <- sweep(Y, 1, sqrt(RW), "*") 21 | Q <- qr(Y) 22 | rank <- sum(Q$pivot[seq_len(Q$rank)] > 0) 23 | if (length(Q$pivot) > Q$rank){ 24 | alias <- colnames(Q$qr)[-seq_len(Q$rank)] 25 | } else{ 26 | alias <- NULL 27 | } 28 | kept <- seq_along(Q$pivot) <= Q$rank & Q$pivot > 0 29 | Yfit <- qr.fitted(Q, Xinit) 30 | sol <- svd(Yfit) 31 | lambda <- sol$d^2 32 | u <- sol$u 33 | zeroev <- abs(lambda) < max(ZERO, ZERO * lambda[1L]) 34 | if (any(zeroev)) { 35 | lambda <- lambda[!zeroev] 36 | u <- u[, !zeroev, drop = FALSE] 37 | } 38 | posev <- lambda > 0 39 | xx <- Y[, Q$pivot[kept], drop = FALSE] 40 | bp <- (1/sqrt(colSums(xx^2))) * crossprod(xx, u[, posev, drop = FALSE]) 41 | list(eig = lambda, alias = alias, biplot = bp) 42 | } 43 | 44 | -------------------------------------------------------------------------------- /R/F_correctXMissingness.R: -------------------------------------------------------------------------------- 1 | #' Replace missing entries in X by their expectation to set their 2 | #' contribution to the estimating equations to zero 3 | #' @param X the matrix of counts 4 | #' @param mu the matrix of expectations 5 | #' @param allowMissingness A boolean, are missing values present 6 | #' @param naId The numeric index of the missing values in X 7 | #' 8 | #' @return The matrix X with the NA entries replaced by the 9 | #' corresponding entries in mu 10 | #' 11 | #' @note This may seem like a hacky approach, but it avoids having to deal 12 | #' with NAs in functions like crossprod(). 13 | correctXMissingness = function(X, mu, allowMissingness, naId){ 14 | if(allowMissingness){ 15 | X[naId] = mu[naId] 16 | } 17 | X 18 | } 19 | -------------------------------------------------------------------------------- /R/F_dLR_nb.R: -------------------------------------------------------------------------------- 1 | #' A function that returns the value of the partial derivative of the 2 | #' log-likelihood ratio to alpha, keeping the response functions fixed 3 | #' 4 | #' @param Alpha a vector of length d + k*(2+(k-1)/2), 5 | #' the environmental gradient plus the lagrangian multipliers 6 | #' @param X the n-by-p count matrix 7 | #' @param CC a n-by-d covariate vector 8 | #' @param responseFun a character string indicating 9 | #' the type of response function 10 | #' @param psi a scalar, an importance parameter 11 | #' @param NB_params Starting values for the NB_params 12 | #' @param NB_params_noLab Starting values for the NB_params without label 13 | #' @param d an integer, the number of covariate parameters 14 | #' @param alphaK a matrix of environmental gradients of lower dimensions 15 | #' @param k an integer, the current dimension 16 | #' @param centMat a nLambda1s-by-d centering matrix 17 | #' @param nLambda an integer, number of lagrangian multipliers 18 | #' @param nLambda1s an integer, number of centering restrictions 19 | #' @param thetaMat a matrix of size n-by-p with estimated dispersion parameters 20 | #' @param muMarg an n-by-p offset matrix 21 | #' @param ncols a scalar, the number of columns of X 22 | #' @param envGradEst a character string, indicating how the 23 | #' environmental gradient should be fitted. 24 | #' 'LR' using the likelihood-ratio criterion, 25 | #' or 'ML' a full maximum likelihood solution 26 | #' @param allowMissingness A boolean, are missing values present 27 | #' @param naId The numeric index of the missing values in X 28 | #' @param ... further arguments passed on to other methods 29 | #' 30 | #' @return: The value of the lagrangian and the constraining equations 31 | dLR_nb <- function(Alpha, X, CC, responseFun = c("linear", 32 | "quadratic", "nonparametric", "dynamic"), 33 | psi, NB_params, NB_params_noLab, d, alphaK, 34 | k, centMat, nLambda, nLambda1s, thetaMat, 35 | muMarg, ncols, envGradEst, allowMissingness, naId, ...) { 36 | 37 | # Extract the parameters 38 | alpha = Alpha[seq_len(d)] 39 | lambda1s = Alpha[d + seq_len(nLambda1s)] 40 | # Multiple centering requirements now! 41 | lambda2 = Alpha[d + nLambda1s + 1] 42 | lambda3 = if (k == 1) { 43 | 0 44 | } else { 45 | Alpha[(d + nLambda1s + 2):(d + nLambda)] 46 | } 47 | 48 | sampleScore = CC %*% alpha 49 | # A linear combination of the 50 | # environmental variables yields the 51 | # sampleScore 52 | design = buildDesign(sampleScore, responseFun) 53 | mu = muMarg * exp(design %*% NB_params * 54 | psi) 55 | X = correctXMissingness(X, mu, allowMissingness, naId) 56 | tmp = (X - mu)/(1 + mu/thetaMat) 57 | responseFun = switch(responseFun, dynamic = "quadratic", 58 | responseFun) 59 | 60 | if (envGradEst == "LR"){ 61 | mu0 = muMarg * c(exp(design %*% NB_params_noLab * 62 | psi)) 63 | tmp0 = (X - mu0)/(1 + mu0/thetaMat) 64 | } 65 | # The lagrangian depends on the shape of 66 | # the response function 67 | lag = switch(responseFun, linear = if (envGradEst == 68 | "LR") { 69 | psi * (crossprod(CC, tmp) %*% (NB_params[2, 70 | ]) - rowSums(crossprod(CC, tmp0 * 71 | NB_params_noLab[2]))) 72 | } else { 73 | psi * (crossprod(CC, tmp) %*% (NB_params[2, 74 | ])) 75 | }, quadratic = if (envGradEst == "LR") { 76 | psi * (c(crossprod(CC, tmp) %*% (NB_params[2, 77 | ])) + c(crossprod(CC * c(sampleScore), 78 | tmp) %*% (NB_params[3, ]) * 2) - 79 | rowSums(crossprod(CC, tmp0) * 80 | NB_params_noLab[2]) - rowSums(crossprod(CC * 81 | c(sampleScore), tmp0) * NB_params_noLab[3]) * 82 | 2) 83 | } else { 84 | psi * (c(crossprod(CC, tmp) %*% (NB_params[2, 85 | ])) + c(crossprod(CC * c(sampleScore), 86 | tmp) %*% (NB_params[3, ]) * 2)) 87 | }, stop("Unknown response function provided! \n")) + 88 | # Restrictions do not depend on response 89 | # function 90 | c(lambda1s %*% centMat) + lambda2 * 2 * 91 | alpha + if (k > 1) 92 | alphaK %*% lambda3 else 0 93 | 94 | centerFactors = centMat %*% alpha 95 | size = sum(alpha^2) - 1 96 | if(k == 1){ 97 | return(c(lag, centerFactors, size)) 98 | } 99 | ortho = crossprod(alphaK, alpha) 100 | c(lag, centerFactors, size, ortho) 101 | } 102 | -------------------------------------------------------------------------------- /R/F_dNBabunds.R: -------------------------------------------------------------------------------- 1 | #'A score function for the column components of the independence model 2 | #' (mean relative abundances) 3 | #' 4 | #'@param beta a vector of length p with current abundance estimates 5 | #'@param X a n-by-p count matrix 6 | #'@param reg a vector of length n with library sizes estimates 7 | #'@param thetas a n-by-p matrix with overdispersion estimates in the rows 8 | #' @param allowMissingness A boolean, are missing values present 9 | #' @param naId The numeric index of the missing values in X 10 | #' 11 | #'@return a vector of length p with evaluations of the score function 12 | dNBabundsOld = function(beta, X, reg, thetas, allowMissingness, naId) { 13 | mu = exp(outer(reg, beta, "+")) 14 | X = correctXMissingness(X, mu, allowMissingness, naId) 15 | score = colSums((X - mu)/(1 + (mu/thetas))) 16 | } 17 | dNBabunds = function(beta, X, reg, thetas, allowMissingness, naId) { 18 | mu = exp(reg + beta) 19 | X = correctXMissingness(X, mu, allowMissingness, naId) 20 | sum((X - mu)/(1 + (mu/thetas))) 21 | } 22 | -------------------------------------------------------------------------------- /R/F_dNBlibSizes.R: -------------------------------------------------------------------------------- 1 | #'A score function for the row components of the independence model 2 | #'(library sizes) 3 | #' 4 | #'@param beta a vector of length n with current library size estimates 5 | #'@param X a n-by-p count matrix 6 | #'@param reg a vector of length p with relative abundance estimates 7 | #'@param thetas a n-by-p matrix with overdispersion estimates in the rows 8 | #' @param allowMissingness A boolean, are missing values present 9 | #' @param naId The numeric index of the missing values in X 10 | #' 11 | #'@return a vector of length n with evaluations of the score function 12 | dNBlibSizes = function(beta, X, reg, thetas, allowMissingness, naId) { 13 | mu = exp(outer(beta, reg, "+")) 14 | X = correctXMissingness(X, mu, allowMissingness, naId) 15 | rowSums((X - mu)/(1 + (mu/thetas))) 16 | } 17 | -------------------------------------------------------------------------------- /R/F_dNBllcol.R: -------------------------------------------------------------------------------- 1 | #'A score function for the estimation of the column scores 2 | #' in an unconstrained RC(M) model 3 | #' 4 | #' @param beta vector of length p+1+1+(k-1): p row scores, 5 | #' 1 centering, one normalization and (k-1) orhtogonality lagrangian multipliers 6 | #' @param X the nxp data matrix 7 | #' @param reg a nx1 regressor matrix: outer product of rowScores and psis 8 | #' @param thetas nxp matrix with the dispersion parameters 9 | #' (converted to matrix for numeric reasons) 10 | #' @param muMarg the nxp offset 11 | #' @param k an integer, the dimension of the RC solution 12 | #' @param p an integer, the number of taxa 13 | #' @param n an integer, the number of samples 14 | #' @param nLambda an integer, the number of restrictions 15 | #' @param colWeights the weights used for the restrictions 16 | #' @param cMatK the lower dimensions of the colScores 17 | #' @param allowMissingness A boolean, are missing values present 18 | #' @param naId The numeric index of the missing values in X 19 | #' @param ... further arguments passed on to the jacobian 20 | 21 | #' @return A vector of length p+1+1+(k-1) with evaluations of the 22 | #' derivative of lagrangian 23 | dNBllcolOld = function(beta, X, reg, thetas, 24 | muMarg, k, p, n, colWeights, nLambda, 25 | cMatK, allowMissingness, naId, ...) { 26 | cMat = matrix(beta[seq_len(p)], byrow = TRUE, 27 | ncol = p, nrow = 1) 28 | mu = exp(reg %*% cMat) * muMarg 29 | X = correctXMissingness(X, mu, allowMissingness, naId) 30 | lambda1 = beta[p + 1] 31 | # Lagrangian multiplier for centering 32 | # restrictions sum(abunds*r_{ik}) = 0 33 | lambda2 = beta[p + 2] 34 | # Lagrangian multiplier for normalization 35 | # restrictions sum(abunds*r^2_{ik}) = 1 36 | lambda3 = if (k == 1) { 37 | 0 38 | } else { 39 | beta[(p + 3):length(beta)] 40 | } 41 | # Lagrangian multiplier for 42 | # orthogonalization restriction 43 | 44 | score = crossprod(reg, ((X - mu)/(1 + 45 | mu/thetas))) + colWeights * (lambda1 + 46 | lambda2 * 2 * cMat + (lambda3 %*% 47 | cMatK)) 48 | 49 | center = sum(colWeights * cMat) 50 | unitSum = sum(colWeights * cMat^2) - 51 | 1 52 | if (k == 1) { 53 | return(c(score, center, unitSum)) 54 | } 55 | orthogons = tcrossprod(cMatK, cMat * 56 | colWeights) 57 | return(c(score, center, unitSum, orthogons)) 58 | } 59 | 60 | dNBllcol = function(beta, X, reg, thetas, muMarg, allowMissingness, naId, ...) { 61 | mu = exp(reg * beta) * muMarg 62 | X = correctXMissingness(X, mu, allowMissingness, naId) 63 | sum(reg*((X - mu)/(1 + mu/thetas))) 64 | } 65 | -------------------------------------------------------------------------------- /R/F_dNBllcolNP.R: -------------------------------------------------------------------------------- 1 | #'Estimation of the parameters of a third degree GLM 2 | #' 3 | #' @param beta A vector of any length 4 | #' @param X the data vector of length n 5 | #' @param reg a nxlength(beta) regressor matrix 6 | #' @param theta a scalar, the overdispersion 7 | #' @param muMarg the offset of length n 8 | #' @param allowMissingness A boolean, are missing values present 9 | #' @param naId The numeric index of the missing values in X 10 | #' @param ... further arguments passed on to the jacobian 11 | 12 | #' @return A vector of the same length as beta with evaluations 13 | #' of the score function 14 | dNBllcolNP = function(beta, X, reg, theta, 15 | muMarg, allowMissingness, naId, ...) { 16 | mu = exp(reg %*% beta) * muMarg 17 | X = correctXMissingness(X, mu, allowMissingness, naId) 18 | crossprod(reg, ((X - mu)/(1 + mu/theta))) 19 | } 20 | -------------------------------------------------------------------------------- /R/F_dNBllcol_constr.R: -------------------------------------------------------------------------------- 1 | #' The score function of the response function for 1 taxon at the time 2 | #' @param betas a vector of v parameters of the 3 | #' response function of a single taxon 4 | #' @param X the count vector of length n 5 | #' @param reg a n-by-v matrix of regressors 6 | #' @param theta The dispersion parameter of this taxon 7 | #' @param muMarg offset of length n 8 | #' @param psi a scalar, the importance parameter 9 | #' @param allowMissingness A boolean, are missing values present 10 | #' @param naId The numeric index of the missing values in X 11 | #' 12 | #' Even though this approach does not imply normalization over the parameters 13 | #' of all taxa, it is very fast and they can be normalized afterwards 14 | #' 15 | #' @return A vector of length v with the evaluation of the score functions 16 | dNBllcol_constr = function(betas, X, reg, 17 | theta, muMarg, psi, allowMissingness, naId) { 18 | mu = exp(c(reg %*% betas) * psi) * muMarg 19 | X = correctXMissingness(X, mu, allowMissingness, naId) 20 | crossprod((X - mu)/(1 + mu/theta), reg) * 21 | psi 22 | } 23 | -------------------------------------------------------------------------------- /R/F_dNBllcol_constr_noLab.R: -------------------------------------------------------------------------------- 1 | #' The score function of the general response function 2 | #' 3 | #' @param betas a vector of regression parameters with length v 4 | #' @param X the nxp data matrix 5 | #' @param reg a matrix of regressors of dimension nxv 6 | #' @param thetasMat A matrix of dispersion parameters 7 | #' @param muMarg offset matrix of dimension nxp 8 | #' @param psi a scalar, the importance parameter 9 | #' @param allowMissingness A boolean, are missing values present 10 | #' @param naId The numeric index of the missing values in X 11 | #' @param ... further arguments passed on to the jacobian 12 | #' 13 | #' @return The evaluation of the score functions (a vector length v) 14 | dNBllcol_constr_noLab = function(betas, X, 15 | reg, thetasMat, muMarg, psi, allowMissingness, naId, ...) { 16 | mu = c(exp(reg %*% betas * psi)) * muMarg 17 | X = correctXMissingness(X, mu, allowMissingness, naId) 18 | colSums(crossprod((X - mu)/(1 + (mu/thetasMat)), 19 | reg) * psi) 20 | } 21 | -------------------------------------------------------------------------------- /R/F_dNBllrow.R: -------------------------------------------------------------------------------- 1 | #' A score function of the NB for the row scores 2 | #' 3 | #' @param beta a vector of of length n + k +1 regression parameters to optimize 4 | #' @param X the data matrix of dimensions nxp 5 | #' @param reg a 1xp regressor matrix: outer product of column scores and psis 6 | #' @param thetas nxp matrix with the dispersion parameters 7 | #' (converted to matrix for numeric reasons) 8 | #' @param muMarg an nxp offset matrix 9 | #' @param k a scalar, the dimension of the RC solution 10 | #' @param p a scalar, the number of taxa 11 | #' @param n a scalar, the number of samples 12 | #' @param rowWeights a vector of length n, the weights used for the restrictions 13 | #' @param nLambda an integer, the number of lagrangian multipliers 14 | #' @param rMatK the lower dimension row scores 15 | #' @param allowMissingness A boolean, are missing values present 16 | #' @param naId The numeric index of the missing values in X 17 | #' @param ... Other arguments passed on to the jacobian 18 | 19 | #' @return A vector of length n + k +1 with evaluations of the 20 | #' derivative of the lagrangian 21 | dNBllrow = function(beta, X, reg, thetas, 22 | muMarg, k, n, p, rowWeights, nLambda, 23 | rMatK, allowMissingness, naId, ...) { 24 | 25 | rMat = matrix(beta[seq_len(n)], byrow = FALSE, 26 | ncol = 1, nrow = n) 27 | mu = exp(rMat %*% reg) * muMarg 28 | X = correctXMissingness(X, mu, allowMissingness, naId) 29 | 30 | lambda1 = beta[n + 1] #Centering restrictions sum(abunds*r_{ik}) = 0 31 | lambda2 = beta[n + 2] #normalization restrictions sum(abunds*r^2_{ik}) = 1 32 | lambda3 = if (k == 1) { 33 | 0 34 | } else { 35 | beta[(n + 3):length(beta)] 36 | } 37 | 38 | score = c(tcrossprod(reg, (X - mu)/(1 + 39 | (mu/thetas)))) + rowWeights * (lambda1 + 40 | lambda2 * 2 * rMat + (rMatK %*% lambda3)) 41 | center = sum(rMat * rowWeights) 42 | unitSum = sum(rMat^2 * rowWeights) - 43 | 1 44 | if (k == 1) { 45 | return(c(score, center, unitSum)) 46 | } 47 | orthogons = crossprod(rMatK, rMat * rowWeights) 48 | return(c(score, center, unitSum, orthogons)) 49 | } 50 | -------------------------------------------------------------------------------- /R/F_dNBpsis.R: -------------------------------------------------------------------------------- 1 | #' A score function for the psi of a given dimension 2 | #' 3 | #' @param beta a scalar, the initial estimate 4 | #' @param X the n-by-p count matrix 5 | #' @param muMarg the nxp offset matrix 6 | #' @param reg the regressor matrix, the outer product of current row 7 | #' and column scores 8 | #' @param theta a n-by-p matrix with the dispersion parameters 9 | #' @param allowMissingness A boolean, are missing values present 10 | #' @param naId The numeric index of the missing values in X 11 | #' @param ... other arguments passed on to the jacobian 12 | 13 | #' @return The evaluation of the score function at beta, a scalar 14 | dNBpsis = function(beta, X, reg, theta, muMarg, allowMissingness, naId, 15 | ...) { 16 | mu = muMarg * exp(reg * beta) 17 | X = correctXMissingness(X, mu, allowMissingness, naId) 18 | sum(reg * (X - mu)/(1 + (mu/theta))) 19 | } 20 | -------------------------------------------------------------------------------- /R/F_data.R: -------------------------------------------------------------------------------- 1 | #' Microbiomes of colorectal cancer patients and healthy controls 2 | #' 3 | #' Microbiome sequencing data of colorectal cancer patients, 4 | #' patients with small adenoma and healthy controls, 5 | #' together with other baseline covariates 6 | #' 7 | #' @format A phyloseq object with an OTU-table and sample data 8 | #' \describe{ 9 | #' \item{otu_table}{Count data matrix of 709 taxa in 194 samples} 10 | #' \item{sample_data}{Data frame of patient covariates} 11 | #' } 12 | #' @source \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4299606/} 13 | "Zeller" 14 | -------------------------------------------------------------------------------- /R/F_deviances.R: -------------------------------------------------------------------------------- 1 | #' A function to extract deviances for all dimension, 2 | #' including after filtering on confounders 3 | #' 4 | #'@param rcm an object of the RCM class 5 | #'@param squaredSum a boolean, should total deviance be returned? 6 | #' 7 | #'Total deviances can be deceptive and not correspond to the differences in 8 | #' log-likelihood. As the dispersion is different for each model. 9 | #' To compare models it is better to compare likelihoods. 10 | #' 11 | #'@return If Sum is FALSE, a named array of deviance residuals of the 12 | #' independence model and all models with dimension 1 to k, including after 13 | #' filtering on confounders. Otherwise a table with total deviances (the sum of 14 | #' squared deviance residuals), deviance explained and cumulative deviance 15 | #' explained. 16 | deviances = function(rcm, squaredSum = FALSE) { 17 | vec = if(length(rcm$confModelMat)) 18 | c(0, 0.5, seq_len(rcm$k)) else c(0:rcm$k) 19 | outnames = c("independence", 20 | if (length(rcm$confModelMat)) "filtered" else NULL, 21 | paste0("Dim ", seq_len(rcm$k))) 22 | if (squaredSum) { 23 | tmp = vapply(FUN.VALUE = numeric(1), vec, FUN = function(i) { 24 | sum(getDevianceRes(rcm, i)^2) 25 | }) 26 | names(tmp) = outnames 27 | # Also make cumulative comparisons 28 | cumDevianceExplained = round((tmp - tmp[1])/(tmp[length(tmp)] - 29 | tmp[1]), 3) 30 | out = rbind(deviance = tmp, 31 | devianceExplained = c(0, diff(cumDevianceExplained)), 32 | cumDevianceExplained = cumDevianceExplained) 33 | } else { 34 | out = lapply(vec, function(i) { 35 | getDevianceRes(rcm, i) 36 | }) 37 | names(out) = outnames 38 | } 39 | return(out) 40 | } 41 | -------------------------------------------------------------------------------- /R/F_ellipseCoord.R: -------------------------------------------------------------------------------- 1 | #' A function that returns the coordinates of an ellipse 2 | #' 3 | #' @param a,b,c parameters of the quadratic function a^2x+bx+c 4 | #' @param quadDrop A scalar, fraction of peak height at which to draw 5 | #' the ellipse 6 | #' @param nPoints an integer, number of points to use to draw the ellipse 7 | #' 8 | #' @return a matrix with x and y coordinates of the ellipse 9 | ellipseCoord = function(a, b, c, quadDrop = 0.95, 10 | nPoints = 100) { 11 | center = -b/(2 * a) 12 | roots = mapply(a, b, c, FUN = function(A, 13 | B, C) { 14 | Re(polyroot(c(C - log(ifelse(A > 15 | 0, 1/quadDrop, quadDrop)) + (B^2 - 16 | 4 * A * C)/(4 * A), B, A))[1]) #Log to go to count scale. 17 | # Since function is symmetric, does not 18 | # matter which root we pick, only 19 | # distance to vertex matters so we just 20 | # take the first 21 | }) 22 | anglesEval = seq(0, 2 * pi, length.out = nPoints) 23 | ab = center - roots 24 | radii = prod(ab)/sqrt(sin(anglesEval)^2 * 25 | ab[1]^2 + ab[2]^2 * cos(anglesEval)^2) 26 | cbind(x = radii * cos(anglesEval) + center[1], 27 | y = radii * sin(anglesEval) + center[2]) 28 | } 29 | -------------------------------------------------------------------------------- /R/F_estDisp.R: -------------------------------------------------------------------------------- 1 | #' Estimate the overdispersion 2 | #' 3 | #' @details Information between taxa is shared with empirical Bayes 4 | #' using the edgeR pacakage, where the time-limiting steps are programmed in C. 5 | #' 6 | #' @param X the data matrix of dimensions nxp 7 | #' @param cMat a 1xp colum scores matrix 8 | #' @param rMat a nx1 rowscores matrix, if unconstrained 9 | #' @param muMarg an nxp offset matrix 10 | #' @param psis a scalar, the current psi estimate 11 | #' @param trended.dispersion a vector of length p with pre-calculated 12 | #' trended.dispersion estimates. They do not vary in function 13 | #' of the offset anyway 14 | #' @param prior.df an integer, number of degrees of freedom of the prior 15 | #' for the Bayesian shrinkage 16 | #' @param dispWeights Weights for estimating the dispersion 17 | #' in a zero-inflated model 18 | #' @param rowMat matrix of row scores in case of constrained ordination 19 | #' @param allowMissingness A boolean, are missing values present 20 | #' @param naId The numeric index of the missing values in X 21 | #' 22 | #' @return A vector of length p with dispersion estimates 23 | estDisp = function(X, cMat = NULL, rMat = NULL, 24 | muMarg, psis, trended.dispersion = NULL, 25 | prior.df = 10, dispWeights = NULL, rowMat = NULL, 26 | allowMissingness = FALSE, naId) { 27 | logMeansMat = if (!is.null(rMat)) { 28 | # Unconstrained 29 | t(rMat %*% (cMat * psis) + log(muMarg)) 30 | } else if (is.null(rowMat)) { 31 | t(log(muMarg)) #Non-parametric 32 | } else { 33 | # Constrained 34 | t(log(muMarg) + psis * rowMat) 35 | } 36 | if (any(is.infinite(logMeansMat))) 37 | stop("Overflow! Try trimming more lowly 38 | abundant taxa prior to model fitting. 39 | \n See prevCutOff argument in ?RCM.") 40 | X = correctXMissingness(X, exp(logMeansMat), allowMissingness, naId) 41 | 42 | trended.dispersion = if (is.null(trended.dispersion)) { 43 | edgeR::estimateGLMTrendedDisp(y = t(X), 44 | design = NULL, method = "bin.loess", 45 | offset = logMeansMat, weights = NULL) 46 | } else { 47 | trended.dispersion 48 | } 49 | trended.dispersion = if (is.list(trended.dispersion)) { 50 | trended.dispersion$dispersion 51 | } else trended.dispersion 52 | 53 | thetaEstsTmp <- edgeR::estimateGLMTagwiseDisp(y = t(X), 54 | design = NULL, prior.df = prior.df, 55 | offset = logMeansMat, dispersion = trended.dispersion, 56 | weights = dispWeights) 57 | 58 | thetaEsts = if (is.list(thetaEstsTmp)) { 59 | 1/thetaEstsTmp$tagwise.dispersion 60 | } else { 61 | 1/thetaEstsTmp 62 | } 63 | if (anyNA(thetaEsts)) { 64 | idNA = is.na(thetaEsts) 65 | thetaEsts[idNA] = mean(thetaEsts[!idNA]) 66 | warning(paste(sum(idNA), "dispersion estimations did not converge!")) 67 | } 68 | return(thetas = thetaEsts) 69 | } 70 | -------------------------------------------------------------------------------- /R/F_estNBparams.R: -------------------------------------------------------------------------------- 1 | #' A function to estimate the taxon-wise NB-params 2 | #' 3 | #' @param design an n-by-v design matrix 4 | #' @param thetas a vector of dispersion parameters of length p 5 | #' @param muMarg an offset matrix 6 | #' @param psi a scalar, the importance parameter 7 | #' @param X the data matrix 8 | #' @param nleqslv.control a list of control elements, passed on to nleqslv() 9 | #' @param ncols an integer, the number of columns of X 10 | #' @param initParam a v-by-p matrix of initial parameter estimates 11 | #' @param v an integer, the number of parameters per taxon 12 | #' @param dynamic a boolean, should response function be determined dynamically? 13 | #' See details 14 | #' @param envRange a vector of length 2, giving the range of observed 15 | #' environmental scores 16 | #' @param allowMissingness A boolean, are missing values present 17 | #' @param naId The numeric index of the missing values in X 18 | #' 19 | #' If dynamic is TRUE, quadratic response functions are fitted for every taxon. 20 | #' If the optimum falls outside of the observed range of environmental scores, 21 | #' a linear response function is fitted instead 22 | #' 23 | #' @return a v-by-p matrix of parameters of the response function 24 | estNBparams = function(design, thetas, muMarg, 25 | psi, X, nleqslv.control, ncols, initParam, 26 | v, dynamic = FALSE, envRange, allowMissingness, naId) { 27 | vapply(seq_len(ncols), FUN.VALUE = vector("numeric", v), function(i) { 28 | nleq = nleqslv(initParam[, i], reg = design, 29 | fn = dNBllcol_constr, theta = thetas[i], 30 | muMarg = muMarg[, i], psi = psi, 31 | X = X[, i], control = nleqslv.control, 32 | jac = JacCol_constr, allowMissingness = allowMissingness, 33 | naId = is.na(X[, i]))$x 34 | if (dynamic && ((-nleq[2]/(2 * nleq[3]) < 35 | envRange[1]) || (-nleq[2]/(2 * 36 | nleq[3]) > envRange[2]))) { 37 | # If out of observed range, fit a linear 38 | # model 39 | nleq = c( 40 | nleqslv( 41 | initParam[-3, i], 42 | reg = design[, -3], 43 | fn = dNBllcol_constr, 44 | theta = thetas[i], 45 | muMarg = muMarg[, i], 46 | psi = psi, 47 | X = X[, i], 48 | control = nleqslv.control, 49 | jac = JacCol_constr, 50 | allowMissingness = allowMissingness, 51 | naId = is.na(X[, i]) 52 | )$x,0) 53 | } 54 | return(nleq) 55 | }) 56 | } 57 | -------------------------------------------------------------------------------- /R/F_estNBparamsNoLab.R: -------------------------------------------------------------------------------- 1 | #' A function to estimate the NB-params ignoring the taxon labels 2 | #' 3 | #' @param design an n-by-v design matrix 4 | #' @param thetasMat A matrix of dispersion parameters 5 | #' @param muMarg an offset matrix 6 | #' @param psi a scalar, the importance parameter 7 | #' @param X the data matrix 8 | #' @param nleqslv.control a list of control elements, passed on to nleqslv() 9 | #' @param initParam a vector of length v of initial parameter estimates 10 | #' @param n an integer, the number of samples 11 | #' @param v an integer, the number of parameters per taxon 12 | #' @param dynamic a boolean, should response function be determined dynamically? 13 | #' See details 14 | #' @param envRange a vector of length 2, 15 | #' giving the range of observed environmental scores 16 | #' @param preFabMat a pre-fabricated auxiliary matrix 17 | #' @param allowMissingness A boolean, are missing values present 18 | #' @param naId The numeric index of the missing values in X 19 | #' 20 | #' If dynamic is TRUE, quadratic response functions are fitted for every taxon. 21 | #' If the optimum falls outside of the observed range of environmental scores, 22 | #' a linear response function is fitted instead 23 | #' 24 | #' @return a v-by-p matrix of parameters of the response function 25 | estNBparamsNoLab = function(design, thetasMat, 26 | muMarg, psi, X, nleqslv.control, initParam, 27 | n, v, dynamic, envRange, preFabMat, allowMissingness, naId) { 28 | # Without taxon Labels 29 | nleq = nleqslv(x = initParam, reg = design, 30 | fn = dNBllcol_constr_noLab, thetasMat = thetasMat, 31 | muMarg = muMarg, psi = psi, X = X, 32 | control = nleqslv.control, jac = JacCol_constr_noLab, 33 | n = n, v = v, preFabMat = preFabMat, 34 | allowMissingness = allowMissingness, naId = naId)$x 35 | if (dynamic && ((-nleq[2]/(2 * nleq[3]) < 36 | envRange[1]) || (-nleq[2]/(2 * nleq[3]) > 37 | envRange[2]))) { 38 | # If out of observed range, fit a linear 39 | # model 40 | nleq = c(nleqslv(initParam[-3], reg = design[, 41 | -3], fn = dNBllcol_constr_noLab, 42 | thetasMat = thetasMat, muMarg = muMarg, 43 | psi = psi, X = X, control = nleqslv.control, 44 | jac = JacCol_constr_noLab, preFabMat = preFabMat, 45 | n = n, v = v - 1, allowMissingness = allowMissingness, 46 | naId = naId)$x, 0) 47 | } 48 | return(nleq) 49 | } 50 | -------------------------------------------------------------------------------- /R/F_extractCoord.R: -------------------------------------------------------------------------------- 1 | #' A function to extract plotting coordinates, either for plot.RCM 2 | #' or to export to other plotting software 3 | #' 4 | #' @param RCM an RCm object 5 | #' @param Dim an integer vector of required dimensions 6 | #' 7 | #' The parameters for the ellipses of the quadratic response function come 8 | #' from the parametrization f(x) = a*x^2 + b*x + c 9 | #' For an unconstrained object the row and column coordinates are returned 10 | #' in separate matrices. The row names will correspond to the labels. 11 | #' For a constrained analysis also the variable points are returned. 12 | #' All variables still need to be scaled to optimally fill the available space 13 | #' 14 | #' @return A list with components 15 | #' \item{samples}{A dataframe of sample scores} 16 | #' \item{species}{A dataframe of column scores, with origin, slope, 17 | #' end and ellipse coordinates as needed} 18 | #' \item{variables}{A dataframe of variable scores, 19 | #' loadings of the environmental gradient} 20 | #' @export 21 | #' @examples 22 | #' data(Zeller) 23 | #' require(phyloseq) 24 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 25 | #' prune_samples(sample_names(Zeller)[1:50], Zeller)) 26 | #' zellerRCM = RCM(tmpPhy, k = 2, round = TRUE) 27 | #' coordsZeller = extractCoord(zellerRCM) 28 | extractCoord = function(RCM, Dim = c(1, 2)) { 29 | # Samples 30 | constrained = !is.null(RCM$covariates) 31 | dataSam <- if (constrained) { 32 | data.frame(RCM$covariates %*% RCM$alpha[, 33 | Dim, drop = FALSE] %*% diag(RCM$psis[Dim], nrow = length(Dim))) 34 | } else { 35 | data.frame(RCM$rMat[, Dim, drop = FALSE] %*% diag(RCM$psis[Dim])) 36 | } 37 | names(dataSam) = paste0("Dim", Dim) 38 | 39 | # Species 40 | if (constrained) { 41 | if (RCM$responseFun == "linear") { 42 | dataTax = data.frame(origin1 = -RCM$NB_params[1, 43 | , Dim[1]]/RCM$NB_params[2, 44 | , Dim[1]], origin2 = -RCM$NB_params[1, 45 | , Dim[2]]/RCM$NB_params[2, 46 | , Dim[2]], slope1 = RCM$NB_params[2, 47 | , Dim[1]], slope2 = RCM$NB_params[2, 48 | , Dim[2]]) 49 | dataTax$end1 = dataTax$origin1 + 50 | dataTax$slope1 51 | dataTax$end2 = dataTax$origin2 + 52 | dataTax$slope2 53 | rownames(dataTax) = colnames(RCM$X) 54 | } else if (RCM$responseFun == "quadratic") { 55 | dataTax = data.frame(apply(RCM$NB_params[c(2, 56 | 3), , Dim, drop = FALSE], c(2, 3), function(x) { 57 | a = x[2] 58 | b = x[1] 59 | -b/(2 * a) 60 | })) #The location of the extrema 61 | names(dataTax) = c("end1", "end2") 62 | 63 | peakHeights = apply(RCM$NB_params[,, Dim, drop = FALSE], 2, function(x) { 64 | A = x[3, ] 65 | B = x[2, ] 66 | C = x[1, ] 67 | vapply(FUN.VALUE = numeric(1),exp(B^2 - 4 * A * C)/(4 *A), 68 | function(y) {max(y, 1/y)}) 69 | }) 70 | # select largest relative departure 71 | rownames(peakHeights) = c("peak1","peak2") 72 | dataTax = cbind(dataTax, t(peakHeights)) 73 | 74 | # Get ellipse parameters 75 | dataEllipse = t(Reduce(x = lapply(Dim, 76 | function(x) {RCM$NB_params[, , x]}), f = rbind)) 77 | colnames(dataEllipse) = c(vapply(FUN.VALUE =character(length(Dim)), 78 | Dim, function(x) {paste0(c("c", "b", "a"),x)})) 79 | 80 | # Rescale peak heights for plotting 81 | dataTax[, c("peak1", "peak2")] = rowMultiply(dataTax[, 82 | c("peak1", "peak2")], apply(dataTax[, 83 | c("end1", "end2")], 2, function(x) { 84 | max(abs(x)) 85 | })/apply(dataTax[, c("peak1", 86 | "peak2")], 2, max)) 87 | dataTax = cbind(dataTax, dataEllipse) 88 | rownames(dataTax) = colnames(RCM$X) 89 | 90 | } else if (RCM$responseFun == "nonparametric") { 91 | # For non-parametric response function we 92 | # cannot plot the taxa 93 | dataTax = NULL 94 | } else { 95 | stop("No valid response function present in this RCM object!") 96 | } 97 | } else { 98 | # If not constrained 99 | dataTax = data.frame(cbind(t(RCM$cMat[Dim,, drop = FALSE]), 0, 0)) 100 | names(dataTax) = c("end1", "end2", 101 | "origin1", "origin2") 102 | rownames(dataTax) = colnames(RCM$X) 103 | } 104 | 105 | # Variables 106 | if (!constrained) { 107 | dataVar = NULL 108 | } else { 109 | dataVar = data.frame(RCM$alpha)[,Dim, drop = FALSE] 110 | } 111 | list(samples = dataSam, species = dataTax, 112 | variables = dataVar) 113 | } 114 | -------------------------------------------------------------------------------- /R/F_extractE.R: -------------------------------------------------------------------------------- 1 | #' A function to extract a matrix of expected values 2 | #' for any dimension of the fit 3 | #' 4 | #' @param rcm an object of class RCM 5 | #' @param Dim the desired dimension. Defaults to the maximum of the fit. 6 | #' Choose 0 for the independence model, 0.5 for the confounders filter model. 7 | #' 8 | #' @return The matrix of expected values 9 | extractE = function(rcm, Dim = rcm$k) { 10 | # Expectations 11 | Eind = outer(rcm$libSizes, rcm$abunds) #Expected values under independence 12 | if (Dim[1] %in% c(0, NA)) { 13 | Eind 14 | } else if (Dim[1] == 0.5) { 15 | Eind * exp(rcm$confModelMat %*% rcm$confParams) 16 | } else { 17 | if (!is.null(rcm$confModelMat)) 18 | Eind = Eind * exp(rcm$confModelMat %*% rcm$confParams) 19 | if (is.null(rcm$covariates)) { 20 | Eind * exp(rcm$rMat[, Dim, drop = FALSE] %*% (rcm$cMat[Dim, 21 | , drop = FALSE] * rcm$psis[Dim])) 22 | } else if (rcm$responseFun == "nonparametric") { 23 | Eind * exp(apply(vapply(Dim, FUN.VALUE = Eind, function(j) { 24 | rcm$nonParamRespFun[[j]]$rowMat 25 | }), c(1, 2), sum)) 26 | } else { 27 | if (!is.null(rcm$confModelMat)) 28 | Eind = Eind * exp(rcm$confModelMat %*% rcm$confParams) 29 | if (is.null(rcm$covariates)) { 30 | Eind * exp(rcm$rMat[, Dim, drop = FALSE] %*% (rcm$cMat[Dim, 31 | , drop = FALSE] * rcm$psis[Dim])) 32 | } else if (rcm$responseFun == "nonparametric") { 33 | Eind * exp(apply(vapply(Dim, FUN.VALUE = Eind, function(j) { 34 | rcm$nonParamRespFun[[j]]$rowMat 35 | }), c(1, 2), sum)) 36 | } else { 37 | Eind * exp(apply(vapply(Dim, FUN.VALUE = Eind, function(j) { 38 | rcm$psis[j] * getRowMat(sampleScore = rcm$covariates %*% 39 | rcm$alpha[, j], responseFun = rcm$responseFun, 40 | NB_params = rcm$NB_params[,, j]) 41 | }), c(1, 2), sum)) 42 | } 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /R/F_filterConfounders.R: -------------------------------------------------------------------------------- 1 | #'Filters out the effect of known confounders. This is done by fitting 2 | #'interactions of every taxon with the levels of the confounders. 3 | #'It returns a modified offset matrix for the remainder 4 | #' of the fitting procedure. 5 | #' 6 | #' @param muMarg a nxp matrix, the current offset 7 | #' @param confMat a nxt confounder matrix 8 | #' @param X the nxp data matrix 9 | #' @param thetas a vector of length p with the current dispersion estimates 10 | #' @param p an integer, the number of columns of X 11 | #' @param n an integer, the number of rows of X 12 | #' @param nleqslv.control see nleqslv() 13 | #' @param trended.dispersion a vector of length p 14 | #' with trended dispersion estimates 15 | #' @param tol a scalar, the convergence tolerance 16 | #' @param maxIt maximum number of iterations 17 | #' @param allowMissingness A boolean, are missing values present 18 | #' @param naId The numeric index of the missing values in X 19 | #' 20 | #' Fits the negative binomial mean parameters and overdispersion parameters 21 | #' iteratively. 22 | #' Convergence is determined based on the L2-norm 23 | #' of the absolute change of mean parameters 24 | #' 25 | #' @return a list with components: 26 | #' \item{thetas}{new theta estimates} 27 | #' \item{NB_params}{The estimated parameters of the interaction terms} 28 | 29 | filterConfounders = function(muMarg, confMat, X, thetas, p, n, nleqslv.control, 30 | trended.dispersion, tol = 0.001, maxIt = 20, allowMissingness, naId) { 31 | NB_params = matrix(0, ncol(confMat), p) 32 | 33 | iter = 1 34 | while ((iter == 1) || ((iter <= maxIt) && (!convergence))) { 35 | 36 | NB_params_old = NB_params 37 | 38 | NB_params = vapply(FUN.VALUE = numeric(nrow(NB_params)), seq_len(p), 39 | function(i) { 40 | nleq = try(nleqslv(NB_params[, i], reg = confMat, 41 | fn = dNBllcol_constr, 42 | theta = thetas[i], muMarg = muMarg[, i], X = X[, i], 43 | control = nleqslv.control, 44 | jac = JacCol_constr, psi = 1, 45 | allowMissingness = allowMissingness, 46 | naId = is.na(X[, i]))$x) 47 | # Fit the taxon-by taxon NB with given overdispersion parameters and 48 | # return predictions 49 | if (inherits(nleq, "try-error") | anyNA(nleq) | 50 | any(is.infinite(nleq))) { 51 | nleq = nleqslv(NB_params[, i], reg = confMat, 52 | fn = dNBllcol_constr, 53 | theta = thetas[i], muMarg = muMarg[, i], X = X[, i], 54 | control = nleqslv.control, psi = 1, 55 | allowMissingness = allowMissingness, 56 | naId = is.na(X[, i]))$x 57 | } 58 | # If fails try with numeric jacobian 59 | return(nleq) 60 | }) #Estimate response functions 61 | 62 | if (anyNA(NB_params)) { 63 | stop("Filtering on confounders failed because of 64 | failed fits. Consider more stringent filtering by 65 | increasing the prevCutOff parameter.\n") 66 | } 67 | 68 | thetas = estDisp(X = X, cMat = matrix(0, ncol = p), rMat = matrix(0, 69 | nrow = n), psis = 0, muMarg = muMarg * exp(confMat %*% NB_params), 70 | trended.dispersion = trended.dispersion, 71 | allowMissingness = allowMissingness) 72 | # Estimate overdispersion 73 | iter = iter + 1 74 | convergence = sqrt(mean((1 - NB_params/NB_params_old)^2)) < tol 75 | # Check for convergence, L2-norm 76 | } 77 | list(thetas = thetas, NB_params = NB_params) 78 | } 79 | -------------------------------------------------------------------------------- /R/F_getDevMat.R: -------------------------------------------------------------------------------- 1 | #' ACalculate the matrix of deviance residuals 2 | #' 3 | #'@param X the data matrix 4 | #'@param thetaMat the matrix of dispersions 5 | #'@param mu the matrix of means 6 | #' 7 | #'@return The matrix of deviance residuals 8 | getDevMat = function(X, thetaMat, mu) { 9 | tmpMat = suppressWarnings(sqrt(2 * (X * 10 | log(X/mu) - (X + thetaMat) * log((1 + 11 | X/thetaMat)/(1 + mu/thetaMat)))) * 12 | sign(X - mu)) 13 | tmpMat[X == 0] = -sqrt((2 * thetaMat * 14 | log(1 + mu/thetaMat))[X == 0]) 15 | # zero observations are always smaller 16 | # than the mean 17 | tmpMat[is.na(tmpMat)] = 0 #Correct for numerical/real NAs 18 | tmpMat 19 | } 20 | -------------------------------------------------------------------------------- /R/F_getDevianceRes.R: -------------------------------------------------------------------------------- 1 | #' A function to calculate the matrix of deviance residuals. 2 | #' 3 | #' @param RCM an RCM object 4 | #' @param Dim The dimensions to use 5 | #' 6 | #' For the deviance residuals we use the overdispersions from the reduced model. 7 | #' Standard dimensions used are only first and second, 8 | #' since these are also plotted 9 | #' 10 | #'@export 11 | #'@return A matrix with deviance residuals of the same size 12 | #' as the original data matrix 13 | #' 14 | #'@examples 15 | #'data(Zeller) 16 | #' require(phyloseq) 17 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:120], 18 | #' prune_samples(sample_names(Zeller)[1:75], Zeller)) 19 | #' #Subset for a quick fit 20 | #' zellerRCM = RCM(tmpPhy, k = 2, round = TRUE, prevCutOff = 0.03) 21 | #' devRes = getDevianceRes(zellerRCM) 22 | getDevianceRes = function(RCM, Dim = RCM$k) { 23 | mu = extractE(RCM, Dim) 24 | thetaMat = matrix(byrow = TRUE, nrow = nrow(RCM$X), 25 | ncol = ncol(RCM$X), data = RCM$thetas[, 26 | switch(as.character(Dim), `0` = "Independence", 27 | `0.5` = "Filtered", paste0("Dim", 28 | Dim))]) 29 | getDevMat(X = correctXMissingness(RCM$X, mu, RCM$NApresent, is.na(RCM$X)), thetaMat = thetaMat, 30 | mu = mu) 31 | } 32 | -------------------------------------------------------------------------------- /R/F_getDistCoord.R: -------------------------------------------------------------------------------- 1 | #' Get coordinates of a distance object of n observations for the provided indices 2 | #' 3 | #' @param indices The row indices for which distance indices are wanted 4 | #' @param n The total number of objects in the distance matrix 5 | #' 6 | #' @return a vector of coordinates 7 | getDistCoord = function(indices, n){ 8 | indexI = n*(indices-1) - indices*(indices+1)/2# See details of ?dist 9 | indexMat = outer(indexI, indices, FUN = "+") 10 | indexMat[upper.tri(indexMat)] 11 | } -------------------------------------------------------------------------------- /R/F_getInflCol.R: -------------------------------------------------------------------------------- 1 | #' A function to extract the influence for a given parameter index 2 | #' 3 | #' @param score a score matrix 4 | #' @param InvJac The inverted jacobian 5 | #' @param taxon The taxon name or index 6 | #' 7 | #' @return A matrix with all observations' influence on the given taxon 8 | getInflCol = function(score, InvJac, taxon) { 9 | rowMultiply(score * InvJac[, taxon]) 10 | } 11 | -------------------------------------------------------------------------------- /R/F_getInflRow.R: -------------------------------------------------------------------------------- 1 | #' Extract the influence of all observations on a given row score 2 | #' 3 | #' @param score the score function evaluated for every observation 4 | #' @param InvJac The inverse jacobian 5 | #' @param sample the row score or sample index 6 | #' 7 | #' @return A matrix with all observations' influence on the row score 8 | getInflRow = function(score, InvJac, sample) { 9 | score * InvJac[, sample] 10 | } 11 | -------------------------------------------------------------------------------- /R/F_getInt.R: -------------------------------------------------------------------------------- 1 | #' Integrate the spline of an vgam object 2 | #' 3 | #' @param coef A vector of coefficients 4 | #' @param spline The cubic smoothing spline 5 | #' @param sampleScore the observed environmental scores 6 | #' @param stop.on.error see ?integrate 7 | #' @param ... additional arguments passed on to integrate() 8 | #' 9 | #' @return a scalar, the value of the integral 10 | getInt = function(coef, spline, sampleScore, 11 | stop.on.error = FALSE, ...) { 12 | # Absolute values assure positive 13 | # outcomes 14 | integrate(f = function(y, coef, spline) { 15 | if (!is.null(spline)) { 16 | abs(getRowMat(sampleScore = y, 17 | taxonCoef = coef, spline = spline, 18 | responseFun = "nonparametric")) 19 | } else { 20 | # If GAM fails, GLM fit (or independence 21 | # model) 22 | abs(getModelMat(y, degree = length(coef) - 23 | 1) %*% coef) 24 | } 25 | }, lower = min(sampleScore), upper = max(sampleScore), 26 | coef = coef, spline = spline, stop.on.error = stop.on.error, 27 | ...)$value 28 | } 29 | -------------------------------------------------------------------------------- /R/F_getLogLik.R: -------------------------------------------------------------------------------- 1 | #' Extract the logged likelihood of every count 2 | #' 3 | #' @param rcm an RCM object 4 | #' @param Dim A vector of integers indicating which dimensions to take along, 5 | #' or Inf for the saturated model, or 0 for the independence model 6 | #' 7 | #' @return A matrix with logged likelihood of the size of the data matrix 8 | getLogLik = function(rcm, Dim) { 9 | if (Dim == Inf) { 10 | return(dpois(x = rcm$X, lambda = rcm$X, 11 | log = TRUE)) 12 | } 13 | E = extractE(rcm, if (Dim >= 1) 14 | seq_len(Dim) else Dim) 15 | thetaMat = matrix(byrow = TRUE, nrow = nrow(rcm$X), 16 | ncol = ncol(rcm$X), data = rcm$thetas[, 17 | switch(as.character(Dim), `0` = "Independence", 18 | `0.5` = "Filtered", paste0("Dim", 19 | Dim))]) 20 | dnbinom(x = rcm$X, mu = E, size = thetaMat, 21 | log = TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /R/F_getModelMat.R: -------------------------------------------------------------------------------- 1 | #' A function to construct a model matrix of a certain degree 2 | #' @param y the variable 3 | #' @param degree the degree 4 | #' 5 | #' @return A model matrix with degree+1 columns and as many rows as lenght(y) 6 | #' @importFrom stats model.matrix formula 7 | getModelMat = function(y, degree) { 8 | model.matrix(formula(paste("~", paste(paste("I(y^", 9 | seq_len(degree), ")"), collapse = "+")))) 10 | } 11 | -------------------------------------------------------------------------------- /R/F_getRowMat.R: -------------------------------------------------------------------------------- 1 | #' Return a matrix of row scores 2 | #' 3 | #' @param sampleScore a vector of length n with sample scores 4 | #' @param responseFun a character string, the type of response function, 5 | #' either 'linear' or 'quadratic' 6 | #' @param NB_params a v-by-p matrix of parameters of theresponse function 7 | #' @param taxonCoef A vector of coefficients 8 | #' @param spline The cubic smoothing spline 9 | #' 10 | #' Multiplying the old offset with the exponent matrix times 11 | #' the importance parameter obtains the new one based on lower dimension 12 | #' 13 | #' @return a n-by-p matrix of scores 14 | #' @importFrom VGAM predict 15 | getRowMat = function(sampleScore, responseFun, 16 | NB_params, taxonCoef, spline) { 17 | if (responseFun == "nonparametric") { 18 | cbind(1, sampleScore, predict(spline, 19 | x = sampleScore)$y) %*% c(taxonCoef, 20 | 1) 21 | } else { 22 | buildDesign(sampleScore, responseFun) %*% 23 | NB_params 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /R/F_heq_nb.R: -------------------------------------------------------------------------------- 1 | #' Define linear equality constraints for env. gradient 2 | #' 3 | #' @param Alpha the current estimate of the environmental gradient 4 | #' @param alphaK a matrix with the environmental gradients 5 | #' of the lower dimensions 6 | #' @param d an integer, the number of environmental variables, including dummies 7 | #' @param k an integer, the current dimension 8 | #' @param centMat a centering matrix 9 | #' @param ... further arguments for other methods, not needed in this one 10 | #' 11 | #' The centering matrix centMat ensures that the parameters of the dummies 12 | #' of the same categorical variable sum to zero 13 | #' 14 | #' @return a vector of with current values of the constraints, 15 | #' should evolve to zeroes only 16 | heq_nb = function(Alpha, alphaK, d, k, centMat, 17 | ...) { 18 | centerFactors = centMat %*% Alpha #Includes overal centering 19 | size = sum(Alpha^2) - 1 20 | if (k == 1) { 21 | return(c(centerFactors, size)) 22 | } 23 | ortho = crossprod(alphaK, Alpha) 24 | c(centerFactors, size, ortho) 25 | } 26 | -------------------------------------------------------------------------------- /R/F_heq_nb_jac.R: -------------------------------------------------------------------------------- 1 | #' The jacobian of the linear equality constraints 2 | #' 3 | #' @param Alpha the current estimate of the environmental gradient 4 | #' @param alphaK a matrix with the environmental gradients 5 | #' of the lower dimensions 6 | #' @param d an integer, the number of environmental variables, 7 | #' including dummies 8 | #' @param k an integer, the current dimension 9 | #' @param centMat a centering matrix 10 | #' @param ... further arguments for other methods, not needed in this one 11 | #' 12 | #' @return The jacobian matrix 13 | heq_nb_jac = function(Alpha, alphaK, d, k, 14 | centMat, ...) { 15 | if (k == 1) { 16 | return(rbind(centMat, 2 * Alpha)) 17 | } else { 18 | rbind(centMat, 2 * Alpha, t(alphaK)) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /R/F_indentPlot.R: -------------------------------------------------------------------------------- 1 | #' Functions to indent the plot to include the entire labels 2 | #' 3 | #' @param plt a ggplot object 4 | #' @param xInd a scalar or a vector of length 2, 5 | #' specifying the indentation left and right of the plot to allow for the labels 6 | #' to be printed entirely 7 | #' @param yInd a a scalar or a vector of length 2, 8 | #' specifying the indentation top and bottom of the plot 9 | #' to allow for the labels to be printed entirely 10 | #' @return a ggplot object, squared 11 | indentPlot <- function(plt, xInd = 0, yInd = 0) { 12 | return(plt + 13 | expand_limits( 14 | x = ggplot_build(plt)$layout$panel_params[[1]]$x.range + 15 | if (length(xInd) == 1) xInd * c(-1, 16 | 1) else xInd, 17 | y = ggplot_build(plt)$layout$panel_params[[1]]$y.range + 18 | if (length(yInd) == 1) yInd * c(-1, 19 | 1) else yInd)) 20 | } 21 | -------------------------------------------------------------------------------- /R/F_inertia.R: -------------------------------------------------------------------------------- 1 | #' Calculate the log-likelihoods of all possible models 2 | #' 3 | #'@param rcm an object of the RCM class 4 | #' 5 | #'@return A table with inertias, proportion inertia explained 6 | #' and cumulative proportion of inertia explained. 7 | #'@export 8 | #' @examples 9 | #' data(Zeller) 10 | #' require(phyloseq) 11 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 12 | #' prune_samples(sample_names(Zeller)[1:50], Zeller)) 13 | #' zellerRCM = RCM(tmpPhy, round = TRUE) 14 | #' inertia(zellerRCM) 15 | inertia = function(rcm) { 16 | vec = if (length(rcm$confModelMat)) 17 | c(0, 0.5, seq_len(rcm$k)) else c(0:rcm$k) 18 | outnames = c("independence", if (length(rcm$confModelMat)) 19 | "filtered" else NULL, 20 | paste0("Dim", seq_len(rcm$k)), "saturated") 21 | tmp = c(vapply(FUN.VALUE = numeric(1), vec, FUN = function(i) { 22 | eMat = extractE(rcm, i) 23 | round(sum(((eMat - rcm$X)^2/eMat), na.rm = TRUE)) 24 | }), 0) 25 | names(tmp) = outnames 26 | cumInertiaExplained = round((tmp - tmp[1])/(tmp[length(tmp)] - tmp[1]), 27 | 3) 28 | out = rbind(inertia = tmp, inertiaExplained = 29 | c(0, diff(cumInertiaExplained)), 30 | cumInertiaExplained = cumInertiaExplained) 31 | return(out) 32 | } 33 | -------------------------------------------------------------------------------- /R/F_liks.R: -------------------------------------------------------------------------------- 1 | #' Calculate the log-likelihoods of all possible models 2 | #' 3 | #'@param rcm an object of the RCM class 4 | #'@param Sum a boolean, should log-likelihoods be summed? 5 | #' 6 | #'@return If Sum is FALSE, a named array log-likelihoods 7 | #' of the independence model and all models with dimension 1 to k, 8 | #' including after filtering on confounders. 9 | #' Otherwise a table with log-likelihoods, 10 | #' deviance explained and cumulative deviance explained. 11 | #'@export 12 | #' @examples 13 | #' data(Zeller) 14 | #' require(phyloseq) 15 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 16 | #' prune_samples(sample_names(Zeller)[1:50], Zeller)) 17 | #' zellerRCM = RCM(tmpPhy, round = TRUE) 18 | #' liks(zellerRCM) 19 | liks = function(rcm, Sum = TRUE) { 20 | vec = if (length(rcm$confModelMat)) 21 | c(0, 0.5, seq_len(rcm$k), Inf) else c(0:rcm$k, Inf) 22 | outnames = c("independence", if (length(rcm$confModelMat)) 23 | "filtered" else NULL, 24 | paste0("Dim", seq_len(rcm$k)), "saturated") 25 | if (Sum) { 26 | tmp = vapply(FUN.VALUE = numeric(1), vec, FUN = function(i) { 27 | sum(getLogLik(rcm, i), na.rm = TRUE) 28 | }) 29 | names(tmp) = outnames 30 | } else { 31 | tmp = vapply(vec, FUN.VALUE = matrix(0, nrow(rcm$X), ncol(rcm$X)), 32 | FUN = function(i) { 33 | getLogLik(rcm, i) 34 | }) 35 | dimnames(tmp)[[3]] = outnames 36 | } 37 | if (Sum) { 38 | # Also make cumulative comparisons 39 | cumDevianceExplained = round((tmp - tmp[1])/(tmp[length(tmp)] - 40 | tmp[1]), 3) 41 | out = rbind(logLikelihood = tmp, logLikExplained = c(0, 42 | diff(cumDevianceExplained)), 43 | cumLogLikExplained = cumDevianceExplained) 44 | } 45 | return(out) 46 | } 47 | -------------------------------------------------------------------------------- /R/F_permanova.R: -------------------------------------------------------------------------------- 1 | #' Perform a PERMANOVA analysis for group differences of a predefined cofactor using the pseudo F-statistic 2 | #' 3 | #' @param rcmObj an RCM object 4 | #' @param groups a factor of length n with cluster memberships, or a name of a variable contained in the RCM object 5 | #' @param nPerm Number of permutations in the PERMANOVA 6 | #' @param Dim Dimensions on which the test should be performed. Defaults to all dimensions of the fitted RCM object. 7 | #' @param verbose a boolean, should output be printed? 8 | #' 9 | #' @return A list with components 10 | #' \item{statistic}{The pseudo F-statistic} 11 | #' \item{p.value}{The p-value of the PERMANOVA} 12 | #' 13 | #' @seealso \code{\link{RCM}} 14 | #' @importFrom phyloseq get_variable 15 | #' @importFrom stats dist 16 | #' @export 17 | #' @examples 18 | #' data(Zeller) 19 | #' require(phyloseq) 20 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 21 | #' prune_samples(sample_names(Zeller)[1:50], Zeller)) 22 | #' zellerRCM = RCM(tmpPhy, round = TRUE) 23 | #' zellerPermanova = permanova(zellerRCM, "Diagnosis") 24 | permanova = function(rcmObj, groups, nPerm = 1e4, Dim = seq_len(rcmObj$k), verbose = TRUE){ 25 | stopifnot(is(rcmObj, "RCM"), length(nPerm)==1) 26 | if(nPerm <= 100){ 27 | warning("Less than 100 permutations leads to low power of the permutation test!") 28 | } 29 | if(length(groups)==1){ 30 | groups = get_variable(rcmObj$physeq, groups) 31 | } 32 | coord = extractCoord(rcmObj, Dim)$samples 33 | N = nrow(coord) 34 | if(N != length(groups)){ 35 | stop("Length of grouping variable provided (", length(groups), 36 | "does not correspond to number of samples in RCM object (", N, ")") 37 | } 38 | a = length(unique(groups)) 39 | if(a <= 1){ 40 | stop("Provide more than one group in 'groups'.") 41 | } 42 | if(any(table(groups)==1)){ 43 | stop("Some groups contain only a single observation, no distances can be calculated.") 44 | } 45 | distSq = dist(coord)^2 46 | overalDist = sum(distSq)/N 47 | #Observed test statistic 48 | withinDistObs = sum(unlist(tapply(seq_len(nrow(coord)), groups, function(x){ 49 | distSq[getDistCoord(x, N)]/length(x) 50 | }))) 51 | FratioObs = (overalDist-withinDistObs)/withinDistObs * (a-1)/(N-a) 52 | #PERMANOVA 53 | withinDistPerm = vapply(seq_len(nPerm), FUN.VALUE = double(1), function(jj){ 54 | if(verbose && ((jj-1) %% (nPerm/10)) == 0){ 55 | cat("Permutation", jj, "out of", nPerm, "\n") 56 | } 57 | sum(unlist(tapply(seq_len(nrow(coord)), sample(groups), function(x){ 58 | distSq[getDistCoord(x, N)]/length(x) 59 | }))) 60 | }) 61 | FratioPerm = (overalDist-withinDistPerm)/withinDistPerm * (a-1)/(N-a) 62 | PvalPerm = mean(FratioObs < FratioPerm) 63 | return(list("statistic" = FratioObs, "p.value" = PvalPerm)) 64 | } 65 | -------------------------------------------------------------------------------- /R/F_residualPlot.R: -------------------------------------------------------------------------------- 1 | #' Make residual plots 2 | #' 3 | #' @param RCM an RCM object 4 | #' @param Dim an integer, which dimension? 5 | #' @param whichTaxa a character string or a character vector, 6 | #' for which taxa to plot the diagnostic plots 7 | #' @param resid the type of residuals to use, either 'Deviance' or 'Pearson' 8 | #' @param numTaxa an integer, the number of taxa to plot 9 | #' @param mfrow passed on to par(). 10 | #' If not supplied will be calculated based on numTaxa 11 | #' @param samColour,samShape Vectors or character strings denoting 12 | #' the sample colour and shape respectively. If character string is provided, 13 | #' the variables with this name is extracted from the phyloseq object in RCM 14 | #' @param legendLabSize size of the legend labels 15 | #' @param legendTitleSize size of the legend title 16 | #' @param axisLabSize size of the axis labels 17 | #' @param axisTitleSize size of the axis title 18 | #' @param taxTitle A boolean, should taxon title be printed 19 | #' @param h Position of reference line. Set to NA for no line 20 | #' 21 | #'@details If whichTaxa is 'run' or 'response' the taxa with the highest 22 | #' run statistics or steepest slopes of the response function are plotted, 23 | #' numTax indicates the number. If whichTaxa is a character vector, 24 | #' these are interpreted as taxon names to plot. 25 | #' This function is mainly meant for linear response functions, 26 | #' but can be used for others too. 27 | #' The runs test statistic from the tseries package is used. 28 | #'@return Plots a ggplot2-object to output 29 | #'@export 30 | #'@import ggplot2 31 | #'@import phyloseq 32 | #'@importFrom tseries runs.test 33 | #'@seealso \code{\link{RCM}} 34 | #'@examples 35 | #'data(Zeller) 36 | #' require(phyloseq) 37 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:120], 38 | #' prune_samples(sample_names(Zeller)[1:75], Zeller)) 39 | #' #Subset for a quick fit 40 | #' zellerRCMlin = RCM(tmpPhy, k = 2, 41 | #' covariates = c('BMI','Age','Country','Diagnosis','Gender'), 42 | #' responseFun = 'linear', round = TRUE, prevCutOff = 0.03) 43 | #' residualPlot(zellerRCMlin) 44 | residualPlot = function(RCM, Dim = 1, whichTaxa = "response", 45 | resid = "Deviance", numTaxa = 9, mfrow = NULL, 46 | samColour = NULL, samShape = NULL, legendLabSize = 15, 47 | legendTitleSize = 16, axisLabSize = 14, axisTitleSize = 16, 48 | taxTitle = TRUE, h = 0) { 49 | if(is.null(RCM$covariates)){ 50 | stop("Residual plots only implemented for constrained ordinations!") 51 | } 52 | sampleScore = RCM$covariates %*% RCM$alpha[, Dim, 53 | drop = FALSE] 54 | if (resid == "Deviance") { 55 | resMat = getDevianceRes(RCM, Dim) 56 | } else if (resid == "Pearson") { 57 | mu = extractE(RCM, seq_len(Dim)) 58 | # Residuals are also based on lower dimensions 59 | thetaMat = matrix(byrow = TRUE, nrow = nrow(RCM$X), 60 | ncol = ncol(RCM$X), data = RCM$thetas[, 61 | switch(as.character(Dim), `0` = "Independence", 62 | `0.5` = "Filtered", 63 | paste0("Dim",Dim))]) 64 | resMat = (RCM$X - mu)/sqrt(mu + mu^2/thetaMat) 65 | } else { 66 | stop("Unknown residual type!") 67 | } 68 | if (!whichTaxa %in% c("runs", "response")) { 69 | numTaxa = length(whichTaxa) 70 | idTaxa = whichTaxa 71 | } else { 72 | sizes = if (whichTaxa == "runs") 73 | apply(resMat > 0, 2, function(x) { 74 | runs.test(factor(x))$statistic 75 | }) else RCM$NB_params[2, , Dim] 76 | # Select taxa with longest runs or strongest 77 | # responses 78 | idTaxa = which(sizes >= sort(sizes, decreasing = TRUE)[numTaxa]) 79 | } 80 | # Prepare the plotting facets 81 | mfrow = if (is.null(mfrow)) 82 | rep(ceiling(sqrt(numTaxa)), 2) else mfrow 83 | parTmp = par(no.readonly = TRUE) 84 | on.exit(par(parTmp)) 85 | par(mfrow = mfrow) 86 | resMat = resMat[, idTaxa, drop = FALSE] 87 | Colour = if (is.null(samColour)) 88 | "black" else get_variable(RCM$physeq, samColour) 89 | Shape = if (is.null(samShape)){ 90 | if(length(idTaxa)>1) 1 else "none" 91 | } else get_variable(RCM$physeq, samShape) 92 | if (length(idTaxa) > 1) { 93 | foo = lapply(colnames(resMat), function(tax) { 94 | plot(x = sampleScore, y = resMat[, tax], 95 | ylab = paste(resid, "residuals"), 96 | xlab = paste("Environmental score in dimension", Dim), 97 | main = tax, col = Colour, pch = Shape) 98 | }) 99 | return(invisible()) 100 | } else { 101 | Plot = ggplot(data.frame(x = c(sampleScore), 102 | y = c(resMat), samColour = Colour, samShape = Shape), 103 | mapping = aes_string(x = "x", y = "y", 104 | colour = "samColour", shape = "samShape")) + 105 | ylab(paste(resid, "residuals")) + 106 | xlab(paste("Environmental score in dimension", 107 | Dim)) + geom_point() + ggtitle(ifelse(taxTitle, 108 | colnames(resMat), "")) 109 | Plot = Plot + if (is.null(samShape)) 110 | guides(shape = FALSE) else scale_shape_discrete(name = samShape) 111 | Plot = Plot + if (is.null(samColour)) { 112 | guides(colour = FALSE) 113 | } else if (is.factor(get_variable(RCM$physeq, 114 | samColour))) { 115 | scale_colour_discrete(name = samColour) 116 | } else scale_colour_continuous(name = samColour) 117 | Plot = Plot + geom_hline(yintercept = h, linetype = "dashed", col = "black") 118 | Plot = Plot + theme_bw() + theme(axis.title = element_text(size = axisTitleSize), 119 | axis.text = element_text(size = axisLabSize), 120 | legend.title = element_text(size = legendTitleSize), 121 | legend.text = element_text(size = legendLabSize)) 122 | return(Plot) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /R/F_respFunJacMat.R: -------------------------------------------------------------------------------- 1 | #' Calculates the Jacobian of the parametric response functions 2 | #' 3 | #' @param betas a vector of length (deg+1)*(p+1) with regression parameters 4 | #' with deg the degree of the response function and the lagrangian multipliers 5 | #' @param X the nxp data matrix 6 | #' @param reg a vector of regressors with the dimension n-by-v 7 | #' @param thetaMat The n-by-p matrix with dispersion parameters 8 | #' @param muMarg offset matrix of size nxp 9 | #' @param psi a scalar, the importance parameter 10 | #' @param v an integer, one plus the degree of the response function 11 | #' @param p an integer, the number of taxa 12 | #' @param IDmat an logical matrix with indices of non-zero elements 13 | #' @param IndVec a vector with indices with non-zero elements 14 | #' @param allowMissingness A boolean, are missing values present 15 | #' @param naId The numeric index of the missing values in X 16 | #' 17 | #' @return The jacobian, a square matrix of dimension (deg+1)*(p+1) 18 | respFunJacMat = function(betas, X, reg, thetaMat, 19 | muMarg, psi, v, p, IDmat, IndVec, allowMissingness, naId) { 20 | NBparams = matrix(betas[seq_len(p * v)], 21 | ncol = p) 22 | mu = exp(reg %*% NBparams * psi) * muMarg 23 | X = correctXMissingness(X, mu, allowMissingness, naId) 24 | Jac = matrix(0, (p + 1) * v, (p + 1) * 25 | v) 26 | did = seq_len(p * v) 27 | didv = seq_len(v) 28 | # d²Lag/dlambda dBeta 29 | Jac[didv + p * v, did][IndVec] = 2 * 30 | NBparams 31 | Jac = Jac + t(Jac) #symmetrize 32 | 33 | tmp = (1 + X/thetaMat) * mu/(1 + mu/thetaMat)^2 34 | tmp2 = vapply(didv, FUN.VALUE = tmp, 35 | function(x) { 36 | reg[, x] * tmp 37 | }) 38 | # d²Lag/dBeta² 39 | Jac[did, did][IDmat] = -aperm(tensor(reg, 40 | tmp2, 1, 1), c(3, 1, 2)) * psi^2 41 | # Permute the dimensions to assure 42 | # correct insertion 43 | 44 | diag(Jac)[did] = diag(Jac)[did] + 2 * 45 | betas[seq_len(v) + p * v] 46 | return(Jac) 47 | } 48 | -------------------------------------------------------------------------------- /R/F_respFunScoreMat.R: -------------------------------------------------------------------------------- 1 | #' Derivative of the Lagrangian of the parametric response function 2 | #' 3 | #' @param betas a vector of length (deg+1)*(p+1) with regression parameters with 4 | #' deg the degree of the response function and the lagrangian multipliers 5 | #' @param X the nxp data matrix 6 | #' @param reg a matrix of regressors with the dimension nx(deg+1) 7 | #' @param thetaMat The n-by-p matrix with dispersion parameters 8 | #' @param muMarg offset matrix of size nxp 9 | #' @param psi a scalar, the importance parameter 10 | #' @param v an integer, one plus the degree of the response function 11 | #' @param p an integer, the number of taxa 12 | #' @param allowMissingness A boolean, are missing values present 13 | #' @param naId The numeric index of the missing values in X 14 | #' @param ... further arguments passed on to the jacobian 15 | #' 16 | #' The parameters are restricted to be normalized, i.e. all squared intercepts, 17 | #' first order and second order parameters sum to 1 18 | #' 19 | #' @return The evaluation of the score functions, a vector of length (p+1)* 20 | #' (deg+1) 21 | #' 22 | respFunScoreMat = function(betas, X, reg, 23 | thetaMat, muMarg, psi, p, v, allowMissingness, naId,...) { 24 | NBparams = matrix(betas[seq_len(p * v)], 25 | ncol = p) 26 | mu = exp((reg %*% NBparams) * psi) * 27 | muMarg 28 | X = correctXMissingness(X, mu, allowMissingness, naId) 29 | score = crossprod(reg, (X - mu)/(1 + 30 | mu/thetaMat)) * psi + 2 * betas[seq_len(v) + 31 | p * v] * NBparams 32 | norm = rowSums(NBparams^2) - 1 33 | return(c(score, norm)) #Taxon per taxon 34 | } 35 | -------------------------------------------------------------------------------- /R/F_rowMultiply.R: -------------------------------------------------------------------------------- 1 | #' A function to efficiently row multiply a matrix and a vector 2 | #' 3 | #' @details Memory intensive but that does not matter with given matrix sizes 4 | #' 5 | #' @param matrix a numeric matrix of dimension a-by-b 6 | #' @param vector a numeric vector of length b 7 | #' 8 | #' t(t(matrix)*vector) but then faster 9 | #' 10 | #' @return a matrix, row multplied by the vector 11 | rowMultiply = function(matrix, vector) { 12 | matrix * rep(vector, rep(nrow(matrix), 13 | length(vector))) 14 | } 15 | -------------------------------------------------------------------------------- /R/F_seq_k.R: -------------------------------------------------------------------------------- 1 | #' A small auxiliary function for the length of the lambdas 2 | #' 3 | #' @param y an integer, the current dimension 4 | #' @param nLambda1s the number of centering restrictions 5 | #' 6 | #' @return a vector containing the ranks of the current lagrangian multipliers 7 | seq_k = function(y, nLambda1s = 1) { 8 | (y - 1) * (1 + nLambda1s + (y - 2)/2) + 9 | seq_len(y + nLambda1s) 10 | } 11 | -------------------------------------------------------------------------------- /R/F_trimOnConfounders.R: -------------------------------------------------------------------------------- 1 | #' Trim based on confounders to avoid taxa with only zero counts 2 | #' 3 | #' @param confounders a nxt confounder matrix 4 | #' @param X the nxp data matrix 5 | #' @param prevCutOff a scalar between 0 and 1, the prevalence cut off 6 | #' @param minFraction a scalar between 0 and 1, 7 | #' each taxon's total abundance should equal at least the number of samples n 8 | #' times minFraction, otherwise it is trimmed 9 | #' @param n the number of samples 10 | #' 11 | #' Should be called prior to fitting the independence model 12 | #' 13 | #' @return A trimmed data matrix nxp' 14 | trimOnConfounders = function(confounders, 15 | X, prevCutOff, minFraction, n) { 16 | trimmingID = apply(X, 2, function(x) { 17 | # Over taxa Over confounding variables 18 | any(apply(confounders, 2, function(conf) { 19 | tapply(X = x, INDEX = conf, FUN = function(y) { 20 | mean(!(y %in% c(0, NA))) <= prevCutOff | 21 | sum(y, na.rm = TRUE) < (n * minFraction) 22 | }) #Any all-zero subgroup? 23 | })) 24 | }) 25 | 26 | if (sum(!trimmingID) <= 1) { 27 | stop("All taxa would be trimmed, 28 | please provide a covariate with less levels, 29 | or reduce the prevalence cut-off! \n") 30 | } 31 | 32 | X[, !trimmingID] #Return trimmed X 33 | } 34 | -------------------------------------------------------------------------------- /README_figs/README-plotCond-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotCond-1.png -------------------------------------------------------------------------------- /README_figs/README-plotNPTriplot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotNPTriplot-1.png -------------------------------------------------------------------------------- /README_figs/README-plotRichness-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotRichness-1.png -------------------------------------------------------------------------------- /README_figs/README-plotUnconstrainedRCMall-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotUnconstrainedRCMall-1.png -------------------------------------------------------------------------------- /README_figs/README-plotUnconstrainedRCMallColour-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotUnconstrainedRCMallColour-1.png -------------------------------------------------------------------------------- /README_figs/README-plotlin2cor-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotlin2cor-1.png -------------------------------------------------------------------------------- /README_figs/README-plotlin3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotlin3-1.png -------------------------------------------------------------------------------- /README_figs/README-plotlin3Triplot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotlin3Triplot-1.png -------------------------------------------------------------------------------- /data/Zeller.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/data/Zeller.RData -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | year <- sub("-.*", "", meta$Date) 2 | note <- sprintf("R package version %s", meta$Version) 3 | 4 | bibentry(bibtype = "Manual", 5 | title = "{RCM}: A Unified Approach to Unconstrained and Constrained Visualization of Microbiome Read Count Data", 6 | author = c(person(family = "Hawinkel", "Stijn"), 7 | person(family = "Kerckhof", "Frederiek-Maarten"), 8 | person(family = "Bijnens", "Luc"), 9 | person(family = "Thas", "Olivier"), 10 | person("R Core Team")), 11 | year = year, 12 | note = note) 13 | -------------------------------------------------------------------------------- /inst/NEWS.md: -------------------------------------------------------------------------------- 1 | 2 | # 0.2.0 3 | 4 | - Importance parameters \(\psi\) are no longer calculated when 5 | non-parametric response functions are used. 6 | 7 | # 0.3.0 8 | 9 | - Importance parameters \(\psi\) are enabled again when non-parametric 10 | response functions are used, but not used for plotting. 11 | - 2D sample plots for constrained ordination with non-parametric 12 | response functions have been disabled, as they are not 13 | interpretable. Variable plots are the only 2D plots still allowed 14 | - Explained deviance and inertia can be plotted on the axes rahter 15 | than the \(\psi\)’s using the “plotPsi” argument to the plot.RCM() 16 | function. 17 | - Possibility to provide lower dimensional fits has been disabled. 18 | *RCM* is fast enough to fit the whole model. 19 | 20 | # 1.0.0 21 | 22 | - Release on BioConductor 23 | 24 | # 1.0.1 25 | 26 | - Bug fix in buildCovMat() to avoid false warning 27 | - Check for alias structure in confounder and covariate matrices 28 | 29 | # 1.2.0 30 | 31 | - Missing values in count matrix are now allowed. They simply do not 32 | contribute to the parameter estimation, but the rest of the row (or 33 | column) is still used. 34 | 35 | # 1.2.1 36 | 37 | - Vertical reference line in residual plot 38 | - Bug fix for problematic variable names 39 | 40 | # 1.2.2 41 | 42 | - Moving the online manual information to the vignette 43 | 44 | # 1.2.3 45 | 46 | - Rename *a* and *b* to *rowExp* and *colExp* to avoid partial 47 | matching 48 | - Allow *rowExp* and *colExp* to be adapted for constrained 49 | correspondence analysis starting values as well 50 | 51 | # 1.2.4 52 | 53 | - Adding a new inflVar variable to disambiguate in the influence 54 | plotting 55 | - More argument checking + tests for the plot.RCM function 56 | 57 | # 1.5.2 58 | 59 | - Avoid returning nulls for residualPlot 60 | 61 | # 1.5.4 62 | 63 | - A note in the vignette and in the help file of plot.RCM regarding 64 | limited number of combinations of constraining variables. Also a 65 | warning is now thrown 66 | 67 | # 1.5.5 68 | 69 | - Bug fix for higher dimension residualPlot function, and tests for 70 | this function 71 | 72 | # 1.5.6 73 | 74 | - Replace deprecated guides( =FALSE) by guides(=“none”) 75 | 76 | # 1.5.7 77 | 78 | - Update vignette to number table of contents 79 | 80 | # 1.11.0 81 | 82 | - Added FAQ section in vignette with first frequent question on number 83 | of samples not shown. 84 | - Fixed bugs for plots of data with missing values, and added tests. 85 | 86 | # 1.11.2 87 | 88 | - Explicitly import stats::model.matrix, and only load necessary VGAM 89 | functions 90 | 91 | # 1.11.3 92 | 93 | - For the unconstrained models: fit feature models one by one and 94 | Gram-Schmidt orthogonalize and center afterwards, rather than using 95 | Lagrange multipliers and huge Jacobian matrices. This will use less 96 | memory and speed up computations, but *may yield slightly different 97 | solutions*. Nothing changes for the constrained models. 98 | 99 | # 1.11.4 100 | 101 | - Introduction of permanova testing for user-supplied groups using the _permanova_ function. 102 | -------------------------------------------------------------------------------- /inst/fits/zellerFits.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/inst/fits/zellerFits.RData -------------------------------------------------------------------------------- /man/GramSchmidt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_GramSchmidt.R 3 | \name{GramSchmidt} 4 | \alias{GramSchmidt} 5 | \title{Gram-Schmidt orthogonalization of vectors} 6 | \usage{ 7 | GramSchmidt(x, otherVecs, weights = rep(1, length(x))) 8 | } 9 | \arguments{ 10 | \item{x}{The vector that is to be orthogonalized} 11 | 12 | \item{otherVecs}{a matrix; x is orthogonalized with respect to its rows} 13 | 14 | \item{weights}{The weights used in the orthogonalization} 15 | } 16 | \value{ 17 | The orthogonalized vector 18 | } 19 | \description{ 20 | Gram-Schmidt orthogonalization of vectors 21 | } 22 | -------------------------------------------------------------------------------- /man/JacCol_constr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_JacCol_constr.R 3 | \name{JacCol_constr} 4 | \alias{JacCol_constr} 5 | \title{Jacobian of the constrained analysis with linear response function.} 6 | \usage{ 7 | JacCol_constr(betas, X, reg, theta, muMarg, psi, allowMissingness, naId) 8 | } 9 | \arguments{ 10 | \item{betas}{a vector of v parameters of the response function 11 | of a single taxon} 12 | 13 | \item{X}{the count vector of length n} 14 | 15 | \item{reg}{a n-by-v matrix of regressors} 16 | 17 | \item{theta}{The dispersion parameter of this taxon} 18 | 19 | \item{muMarg}{offset of length n} 20 | 21 | \item{psi}{a scalar, the importance parameter} 22 | 23 | \item{allowMissingness}{A boolean, are missing values present} 24 | 25 | \item{naId}{The numeric index of the missing values in X 26 | 27 | Even though this approach does not imply normalization over 28 | the parameters of all taxa, it is very fast 29 | and they can be normalized afterwards} 30 | } 31 | \value{ 32 | The jacobian, a square symmetric matrix of dimension v 33 | } 34 | \description{ 35 | Jacobian of the constrained analysis with linear response function. 36 | } 37 | -------------------------------------------------------------------------------- /man/JacCol_constr_noLab.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_JacCol_constr_noLab.R 3 | \name{JacCol_constr_noLab} 4 | \alias{JacCol_constr_noLab} 5 | \title{The jacobian of the response function without taxon labels} 6 | \usage{ 7 | JacCol_constr_noLab( 8 | betas, 9 | X, 10 | reg, 11 | thetasMat, 12 | muMarg, 13 | psi, 14 | n, 15 | v, 16 | preFabMat, 17 | allowMissingness, 18 | naId 19 | ) 20 | } 21 | \arguments{ 22 | \item{betas}{a vector of regression parameters with length v} 23 | 24 | \item{X}{the nxp data matrix} 25 | 26 | \item{reg}{a matrix of regressors of dimension nxv} 27 | 28 | \item{thetasMat}{A matrix of dispersion parameters} 29 | 30 | \item{muMarg}{offset matrix of dimension nxp} 31 | 32 | \item{psi}{a scalar, the importance parameter} 33 | 34 | \item{n}{an integer, number of rows of X} 35 | 36 | \item{v}{an integer, the number of parameters of the response function} 37 | 38 | \item{preFabMat}{a prefabricated matrix} 39 | 40 | \item{allowMissingness}{A boolean, are missing values present} 41 | 42 | \item{naId}{The numeric index of the missing values in X} 43 | } 44 | \value{ 45 | The jacobian (a v-by-v matrix) 46 | } 47 | \description{ 48 | The jacobian of the response function without taxon labels 49 | } 50 | -------------------------------------------------------------------------------- /man/LR_nb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_LR_nb.R 3 | \name{LR_nb} 4 | \alias{LR_nb} 5 | \title{Get the value of the log-likelihood ratio of alpha} 6 | \usage{ 7 | LR_nb( 8 | Alpha, 9 | X, 10 | CC, 11 | responseFun = c("linear", "quadratic", "nonparametric", "dynamic"), 12 | muMarg, 13 | psi, 14 | nleqslv.control = list(trace = FALSE), 15 | n, 16 | NB_params, 17 | NB_params_noLab, 18 | thetaMat, 19 | ncols, 20 | nonParamRespFun, 21 | envGradEst, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{Alpha}{a vector of length d, the environmental gradient} 27 | 28 | \item{X}{the n-by-p count matrix} 29 | 30 | \item{CC}{the n-by-d covariate matrix} 31 | 32 | \item{responseFun}{a character string indicating 33 | the type of response function} 34 | 35 | \item{muMarg}{an n-by-p offset matrix} 36 | 37 | \item{psi}{a scalar, an importance parameter} 38 | 39 | \item{nleqslv.control}{the control list for the nleqslv() function} 40 | 41 | \item{n}{number of samples} 42 | 43 | \item{NB_params}{Starting values for the NB_params} 44 | 45 | \item{NB_params_noLab}{Starting values for the NB_params without label} 46 | 47 | \item{thetaMat}{a matrix of size n-by-p with estimated dispersion parameters} 48 | 49 | \item{ncols}{a scalar, the number of columns of X} 50 | 51 | \item{nonParamRespFun}{A list, the result of the estNPresp() function} 52 | 53 | \item{envGradEst}{a character string, 54 | indicating how the environmental gradient should be fitted. 55 | 'LR' using the likelihood-ratio criterion, 56 | or 'ML' a full maximum likelihood solution} 57 | 58 | \item{...}{Further arguments passed on to other functions 59 | 60 | DON'T USE 'p' as variable name, 61 | partial matching in the grad-function in the numDeriv package} 62 | } 63 | \value{ 64 | : a scalar, the evaluation of the log-likelihood ratio 65 | at the given alpha 66 | } 67 | \description{ 68 | Get the value of the log-likelihood ratio of alpha 69 | } 70 | -------------------------------------------------------------------------------- /man/LR_nb_Jac.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_LR_nb_Jac.R 3 | \name{LR_nb_Jac} 4 | \alias{LR_nb_Jac} 5 | \title{A function that returns the Jacobian of the likelihood ratio} 6 | \usage{ 7 | LR_nb_Jac( 8 | Alpha, 9 | X, 10 | CC, 11 | responseFun = c("linear", "quadratic", "nonparametric", "dynamic"), 12 | psi, 13 | NB_params, 14 | NB_params_noLab, 15 | d, 16 | alphaK, 17 | k, 18 | centMat, 19 | nLambda, 20 | nLambda1s, 21 | thetaMat, 22 | muMarg, 23 | n, 24 | ncols, 25 | preFabMat, 26 | envGradEst, 27 | allowMissingness, 28 | naId, 29 | ... 30 | ) 31 | } 32 | \arguments{ 33 | \item{Alpha}{a vector of length d + k*(2+(k-1)/2), 34 | the environmental gradient plus the lagrangian multipliers} 35 | 36 | \item{X}{the n-by-p count matrix} 37 | 38 | \item{CC}{a n-by-d covariate vector} 39 | 40 | \item{responseFun}{a character string indicating 41 | the type of response function} 42 | 43 | \item{psi}{a scalar, an importance parameter} 44 | 45 | \item{NB_params}{Starting values for the NB_params} 46 | 47 | \item{NB_params_noLab}{Starting values for the NB_params without label} 48 | 49 | \item{d}{an integer, the number of covariate parameters} 50 | 51 | \item{alphaK}{a matrix of environmental gradients of lower dimensions} 52 | 53 | \item{k}{an integer, the current dimension} 54 | 55 | \item{centMat}{a nLambda1s-by-d centering matrix} 56 | 57 | \item{nLambda}{an integer, number of lagrangian multipliers} 58 | 59 | \item{nLambda1s}{an integer, number of centering restrictions} 60 | 61 | \item{thetaMat}{a matrix of size n-by-p with estimated dispersion parameters} 62 | 63 | \item{muMarg}{an n-by-p offset matrix} 64 | 65 | \item{n}{an integer, the number of rows of X} 66 | 67 | \item{ncols}{a scalar, the number of columns of X} 68 | 69 | \item{preFabMat}{a prefabricated matrix} 70 | 71 | \item{envGradEst}{a character string, 72 | indicating how the environmental gradient should be fitted. 73 | 'LR' using the likelihood-ratio criterion, 74 | or 'ML' a full maximum likelihood solution} 75 | 76 | \item{allowMissingness}{A boolean, are missing values present} 77 | 78 | \item{naId}{The numeric index of the missing values in X} 79 | 80 | \item{...}{Further arguments passed on to other functions} 81 | } 82 | \value{ 83 | A symmetric matrix, the evaluated Jacobian 84 | } 85 | \description{ 86 | A function that returns the Jacobian of the likelihood ratio 87 | } 88 | -------------------------------------------------------------------------------- /man/NBalphaInfl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBalphaInfl.R 3 | \name{NBalphaInfl} 4 | \alias{NBalphaInfl} 5 | \title{Calculate the components of the influence functions} 6 | \usage{ 7 | NBalphaInfl(rcm, Dim) 8 | } 9 | \arguments{ 10 | \item{rcm}{an rcm object} 11 | 12 | \item{Dim}{the required dimension} 13 | } 14 | \value{ 15 | An n-by-p-by-d array with the influence of every observation 16 | on every alpha parameter 17 | } 18 | \description{ 19 | Calculate the components of the influence functions 20 | } 21 | -------------------------------------------------------------------------------- /man/NBcolInfl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBcolInfl.R 3 | \name{NBcolInfl} 4 | \alias{NBcolInfl} 5 | \title{The influence function for the column scores} 6 | \usage{ 7 | NBcolInfl(rcm, Dim = 1) 8 | } 9 | \arguments{ 10 | \item{rcm}{an rcm object} 11 | 12 | \item{Dim}{the required dimension} 13 | } 14 | \value{ 15 | A list with components 16 | \item{score}{a matrix with components of the score function} 17 | \item{InvJac}{A square matrix of dimension p with the components of the 18 | Jacobian related to the column scores} 19 | } 20 | \description{ 21 | The influence function for the column scores 22 | } 23 | -------------------------------------------------------------------------------- /man/NBjacobianAbundsOld.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBjacobianAbunds.R 3 | \name{NBjacobianAbundsOld} 4 | \alias{NBjacobianAbundsOld} 5 | \title{Jacobian for the column components of the independence model} 6 | \usage{ 7 | NBjacobianAbundsOld(beta, X, reg, thetas, allowMissingness, naId) 8 | } 9 | \arguments{ 10 | \item{beta}{a vector of length p with current abundance estimates} 11 | 12 | \item{X}{a n-by-p count matrix} 13 | 14 | \item{reg}{a vector of length n with library sizes estimates} 15 | 16 | \item{thetas}{a n-by-p matrix with overdispersion estimates in the rows} 17 | 18 | \item{allowMissingness}{A boolean, are missing values present} 19 | 20 | \item{naId}{The numeric index of the missing values in X} 21 | } 22 | \value{ 23 | a diagonal matrix of dimension p with evaluations 24 | of the jacobian function 25 | } 26 | \description{ 27 | Jacobian for the column components of the independence model 28 | } 29 | -------------------------------------------------------------------------------- /man/NBjacobianColNP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBjacobianColNP.R 3 | \name{NBjacobianColNP} 4 | \alias{NBjacobianColNP} 5 | \title{Jacobian function for the estimation of a third degree GLM} 6 | \usage{ 7 | NBjacobianColNP(beta, X, reg, theta, muMarg) 8 | } 9 | \arguments{ 10 | \item{beta}{vector of any length} 11 | 12 | \item{X}{the data vector of length n} 13 | 14 | \item{reg}{a nxlength(beta) regressor matrix} 15 | 16 | \item{theta}{a scalar, the overdispersion} 17 | 18 | \item{muMarg}{the offset of length n} 19 | } 20 | \value{ 21 | A matrix of dimension 8-by-8 22 | } 23 | \description{ 24 | Jacobian function for the estimation of a third degree GLM 25 | } 26 | -------------------------------------------------------------------------------- /man/NBjacobianColOld.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBjacobianCol.R 3 | \name{NBjacobianColOld} 4 | \alias{NBjacobianColOld} 5 | \title{Jacobian for the estimation of the column scores} 6 | \usage{ 7 | NBjacobianColOld( 8 | beta, 9 | X, 10 | reg, 11 | thetas, 12 | muMarg, 13 | k, 14 | n, 15 | p, 16 | colWeights, 17 | nLambda, 18 | cMatK, 19 | preFabMat, 20 | Jac, 21 | allowMissingness, 22 | naId 23 | ) 24 | } 25 | \arguments{ 26 | \item{beta}{vector of length p+1+1+(k-1): p row scores, 1 centering, 27 | one normalization 28 | and (k-1) orhtogonality lagrangian multipliers} 29 | 30 | \item{X}{the nxp data matrix} 31 | 32 | \item{reg}{a nx1 regressor matrix: outer product of rowScores and psis} 33 | 34 | \item{thetas}{nxp matrix with the dispersion parameters 35 | (converted to matrix for numeric reasons)} 36 | 37 | \item{muMarg}{the nxp offset} 38 | 39 | \item{k}{an integer, the dimension of the RC solution} 40 | 41 | \item{n}{an integer, the number of samples} 42 | 43 | \item{p}{an integer, the number of taxa} 44 | 45 | \item{colWeights}{the weights used for the restrictions} 46 | 47 | \item{nLambda}{an integer, the number of restrictions} 48 | 49 | \item{cMatK}{the lower dimensions of the colScores} 50 | 51 | \item{preFabMat}{a prefab matrix, (1+X/thetas)} 52 | 53 | \item{Jac}{an empty Jacobian matrix} 54 | 55 | \item{allowMissingness}{A boolean, are missing values present} 56 | 57 | \item{naId}{The numeric index of the missing values in X} 58 | } 59 | \value{ 60 | A matrix of dimension p+1+1+(k-1) with evaluations of the Jacobian 61 | } 62 | \description{ 63 | Jacobian for the estimation of the column scores 64 | } 65 | -------------------------------------------------------------------------------- /man/NBjacobianLibSizes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBjacobianLibSizes.R 3 | \name{NBjacobianLibSizes} 4 | \alias{NBjacobianLibSizes} 5 | \title{Jacobian for the raw components of the independence model} 6 | \usage{ 7 | NBjacobianLibSizes(beta, X, reg, thetas, allowMissingness, naId) 8 | } 9 | \arguments{ 10 | \item{beta}{a vector of length n with current library size estimates} 11 | 12 | \item{X}{a n-by-p count matrix} 13 | 14 | \item{reg}{a vector of length p with relative abundance estimates} 15 | 16 | \item{thetas}{a n-by-p matrix with overdispersion estimates in the rows} 17 | 18 | \item{allowMissingness}{A boolean, are missing values present} 19 | 20 | \item{naId}{The numeric index of the missing values in X} 21 | } 22 | \value{ 23 | a diagonal matrix of dimension n: the Fisher information matrix 24 | } 25 | \description{ 26 | Jacobian for the raw components of the independence model 27 | } 28 | -------------------------------------------------------------------------------- /man/NBjacobianPsi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBjacobianPsi.R 3 | \name{NBjacobianPsi} 4 | \alias{NBjacobianPsi} 5 | \title{Jacobian for the psi of a given dimension} 6 | \usage{ 7 | NBjacobianPsi(beta, X, reg, muMarg, theta, preFabMat, allowMissingness, naId) 8 | } 9 | \arguments{ 10 | \item{beta}{a scalar, the current estimate} 11 | 12 | \item{X}{the n-by-p count matrix} 13 | 14 | \item{reg}{the regressor matrix, 15 | the outer product of current row and column scores} 16 | 17 | \item{muMarg}{the nxp offset matrix} 18 | 19 | \item{theta}{a n-by-p matrix with the dispersion parameters} 20 | 21 | \item{preFabMat}{a prefab matrix, (1+X/thetas)} 22 | 23 | \item{allowMissingness}{A boolean, are missing values present} 24 | 25 | \item{naId}{The numeric index of the missing values in X} 26 | } 27 | \value{ 28 | The evaluation of the jacobian function at beta, a 1-by-1 matrix 29 | } 30 | \description{ 31 | Jacobian for the psi of a given dimension 32 | } 33 | -------------------------------------------------------------------------------- /man/NBjacobianRow.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBjacobianRow.R 3 | \name{NBjacobianRow} 4 | \alias{NBjacobianRow} 5 | \title{A jacobian function of the NB for the row scores} 6 | \usage{ 7 | NBjacobianRow( 8 | beta, 9 | X, 10 | reg, 11 | thetas, 12 | muMarg, 13 | k, 14 | n, 15 | p, 16 | rowWeights, 17 | nLambda, 18 | rMatK, 19 | preFabMat, 20 | Jac, 21 | allowMissingness, 22 | naId 23 | ) 24 | } 25 | \arguments{ 26 | \item{beta}{a vector of of length n + k +1 regression parameters to optimize} 27 | 28 | \item{X}{the data matrix of dimensions nxp} 29 | 30 | \item{reg}{a 1xp regressor matrix: outer product of column scores and psis} 31 | 32 | \item{thetas}{nxp matrix with the dispersion parameters 33 | (converted to matrix for numeric reasons)} 34 | 35 | \item{muMarg}{an nxp offset matrix} 36 | 37 | \item{k}{a scalar, the dimension of the RC solution} 38 | 39 | \item{n}{a scalar, the number of samples} 40 | 41 | \item{p}{a scalar, the number of taxa} 42 | 43 | \item{rowWeights}{a vector of length n, the weights used for the restrictions} 44 | 45 | \item{nLambda}{an integer, the number of lagrangian multipliers} 46 | 47 | \item{rMatK}{the lower dimension row scores} 48 | 49 | \item{preFabMat}{a prefab matrix, (1+X/thetas)} 50 | 51 | \item{Jac}{an empty Jacobian matrix} 52 | 53 | \item{allowMissingness}{A boolean, are missing values present} 54 | 55 | \item{naId}{The numeric index of the missing values in X} 56 | } 57 | \value{ 58 | a symmetric jacobian matrix of size n+k + 1 59 | } 60 | \description{ 61 | A jacobian function of the NB for the row scores 62 | } 63 | -------------------------------------------------------------------------------- /man/NBpsiInfl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBpsiInfl.R 3 | \name{NBpsiInfl} 4 | \alias{NBpsiInfl} 5 | \title{The influence function for the psis} 6 | \usage{ 7 | NBpsiInfl(rcm, Dim = 1) 8 | } 9 | \arguments{ 10 | \item{rcm}{an rcm object} 11 | 12 | \item{Dim}{the required dimensions} 13 | } 14 | \value{ 15 | The influence of every single observation 16 | on the psi value of this dimension 17 | } 18 | \description{ 19 | The influence function for the psis 20 | } 21 | -------------------------------------------------------------------------------- /man/NBrowInfl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_NBrowInfl.R 3 | \name{NBrowInfl} 4 | \alias{NBrowInfl} 5 | \title{The influence function for the row scores} 6 | \usage{ 7 | NBrowInfl(rcm, Dim = 1) 8 | } 9 | \arguments{ 10 | \item{rcm}{an rcm object} 11 | 12 | \item{Dim}{the required dimension} 13 | } 14 | \value{ 15 | A list with components 16 | \item{score}{a matrix with components of the score function} 17 | \item{InvJac}{A square matrix of dimension n with the components 18 | of the Jacobian related to the row scores} 19 | } 20 | \description{ 21 | The influence function for the row scores 22 | } 23 | -------------------------------------------------------------------------------- /man/RCM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_RCM.R 3 | \name{RCM} 4 | \alias{RCM} 5 | \alias{RCM,phyloseq-method} 6 | \alias{RCM,matrix-method} 7 | \title{Wrapper function for the RCM() function} 8 | \usage{ 9 | RCM(dat, ...) 10 | 11 | \S4method{RCM}{phyloseq}(dat, covariates = NULL, confounders = NULL, ...) 12 | 13 | \S4method{RCM}{matrix}( 14 | dat, 15 | k = 2, 16 | round = FALSE, 17 | prevCutOff = 0.05, 18 | minFraction = 0.1, 19 | rowWeights = "uniform", 20 | colWeights = "marginal", 21 | confModelMat = NULL, 22 | confTrimMat = NULL, 23 | covModelMat = NULL, 24 | centMat = NULL, 25 | allowMissingness = FALSE, 26 | ... 27 | ) 28 | } 29 | \arguments{ 30 | \item{dat}{an nxp count matrix or a phyloseq object with an otu_table slot} 31 | 32 | \item{...}{Further arguments passed on to the RCM.NB() function} 33 | 34 | \item{covariates}{In case 'dat' is a phyloseq object, 35 | the names of the sample 36 | variables to be used as covariates in the constrained analysis, 37 | or 'all' to 38 | indicate all variables to be used. 39 | In case 'dat' is a matrix, a nxf matrix 40 | or dataframe of covariates. 41 | Character variables will be converted to 42 | factors, with a warning. Defaults to NULL, 43 | in which case an unconstrained 44 | analysis is carried out.} 45 | 46 | \item{confounders}{In case 'dat' is a phyloseq object, 47 | the names of the sample variables to be used as confounders 48 | to be filtered 49 | out. In case 'dat' is a matrix, a nxf dataframe 50 | of confounders. 51 | Character variables will be converted to factors, with a warning. 52 | Defaults to NULL, in which case no filtering occurs.} 53 | 54 | \item{k}{an integer, the number of dimensions of the RCM solution} 55 | 56 | \item{round}{a boolean, whether to round to nearest integer. Defaults to 57 | FALSE.} 58 | 59 | \item{prevCutOff}{a scalar, the prevalance cutoff for the trimming. 60 | Defaults to 2.5e-2} 61 | 62 | \item{minFraction}{a scalar, each taxon's total abundance 63 | should equal 64 | at least the number of samples n times minFraction, 65 | otherwise it is trimmed. 66 | Defaults to 10\%} 67 | 68 | \item{rowWeights, colWeights}{character strings, 69 | the weighting procedures for the normalization of row and column scores. 70 | Defaults to 'uniform' and 'marginal' respectively} 71 | 72 | \item{confTrimMat, confModelMat, covModelMat, centMat}{Dedicated model matrices 73 | constructed based on phyloseq object.} 74 | 75 | \item{allowMissingness}{A boolean, should NA values be tolerated?} 76 | } 77 | \value{ 78 | see \code{\link{RCM_NB}} 79 | } 80 | \description{ 81 | This is a wrapper function, 82 | which currently only fits the negative binomial distribution, 83 | but which could easily be extended to other ones. 84 | } 85 | \details{ 86 | This function should be called on a raw count matrix, 87 | without rarefying or normalization to proportions. 88 | This functions trims on prevalence and total abundance to avoid instability 89 | of the algorithm. Covariate and confounder matrices are constructed, 90 | so that everything is passed on 91 | to the workhorse function RCM.NB() as matrices. 92 | } 93 | \examples{ 94 | data(Zeller) 95 | require(phyloseq) 96 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 97 | prune_samples(sample_names(Zeller)[1:50], Zeller)) 98 | zellerRCM = RCM(tmpPhy, round = TRUE) 99 | 100 | } 101 | \seealso{ 102 | \code{\link{RCM_NB}},\code{\link{plot.RCM}}, 103 | \code{\link{residualPlot}},\code{\link{plotRespFun}} 104 | } 105 | -------------------------------------------------------------------------------- /man/Zeller.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_data.R 3 | \docType{data} 4 | \name{Zeller} 5 | \alias{Zeller} 6 | \title{Microbiomes of colorectal cancer patients and healthy controls} 7 | \format{ 8 | A phyloseq object with an OTU-table and sample data 9 | \describe{ 10 | \item{otu_table}{Count data matrix of 709 taxa in 194 samples} 11 | \item{sample_data}{Data frame of patient covariates} 12 | } 13 | } 14 | \source{ 15 | \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4299606/} 16 | } 17 | \usage{ 18 | Zeller 19 | } 20 | \description{ 21 | Microbiome sequencing data of colorectal cancer patients, 22 | patients with small adenoma and healthy controls, 23 | together with other baseline covariates 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /man/addOrthProjection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_addOrthProjection.R 3 | \name{addOrthProjection} 4 | \alias{addOrthProjection} 5 | \title{This function adds orthogonal projections to a given plot} 6 | \usage{ 7 | addOrthProjection( 8 | RCMplot, 9 | sample = NULL, 10 | species = NULL, 11 | variable = NULL, 12 | Dims = c(1, 2), 13 | addLabel = FALSE, 14 | labPos = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{RCMplot}{the RCMplot object} 19 | 20 | \item{sample, species, variable}{names or approximate coordinates of sample, 21 | species or variable} 22 | 23 | \item{Dims}{The dimensions of the solutions that have been plotted} 24 | 25 | \item{addLabel}{a boolean, should the r-s-psi label be added?} 26 | 27 | \item{labPos}{the position of the label. Will be calculated if not provided} 28 | } 29 | \value{ 30 | a modified ggplot object that contains the geom_segment object 31 | that draws the projection 32 | } 33 | \description{ 34 | This function adds orthogonal projections to a given plot 35 | } 36 | \examples{ 37 | data(Zeller) 38 | require(phyloseq) 39 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(100)], 40 | prune_samples(sample_names(Zeller)[seq_len(50)], Zeller)) 41 | zellerRCM = RCM(tmpPhy, k = 2, round = TRUE) 42 | zellerPlot = plot(zellerRCM, returnCoords = TRUE) 43 | addOrthProjection(zellerPlot, species = c(-0.35,1.1), sample = c(1,1.2)) 44 | } 45 | \seealso{ 46 | \code{\link{plot.RCM}} 47 | } 48 | -------------------------------------------------------------------------------- /man/arrayprod.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_arrayProd.R 3 | \name{arrayprod} 4 | \alias{arrayprod} 5 | \title{An auxiliary R function to 'array' multiply an array with a vector, 6 | kindly provided by Joris Meys} 7 | \usage{ 8 | arrayprod(x, y) 9 | } 10 | \arguments{ 11 | \item{x}{a axbxc array} 12 | 13 | \item{y}{a vector of length c} 14 | } 15 | \value{ 16 | a axb matrix. The ij-th element equals sum(x[i,j,]*y) 17 | } 18 | \description{ 19 | An auxiliary R function to 'array' multiply an array with a vector, 20 | kindly provided by Joris Meys 21 | } 22 | -------------------------------------------------------------------------------- /man/buildCentMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_buildCentMat.R 3 | \name{buildCentMat} 4 | \alias{buildCentMat} 5 | \title{A function to build a centering matrix based on a dataframe} 6 | \usage{ 7 | buildCentMat(object) 8 | } 9 | \arguments{ 10 | \item{object}{an rcm object or dataframe} 11 | } 12 | \value{ 13 | a centering matrix consisting of ones and zeroes, 14 | or a list with components 15 | \item{centMat}{a centering matrix consisting of ones and zeroes} 16 | \item{datFrame}{The dataframe with factors with one level removed} 17 | } 18 | \description{ 19 | A function to build a centering matrix based on a dataframe 20 | } 21 | -------------------------------------------------------------------------------- /man/buildConfMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_buildConfMat.R 3 | \name{buildConfMat} 4 | \alias{buildConfMat} 5 | \title{A function to build the confounder matrices} 6 | \usage{ 7 | buildConfMat(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a matrix, data frame or character string} 11 | 12 | \item{...}{further arguments passed on to other methods 13 | 14 | For the preliminary trimming, we do not include an intercept, 15 | but we do include all the levels of the factors using contrasts=FALSE: 16 | we want to do the trimming in every subgroup, so no hidden reference levels 17 | For the filtering we just use a model with an intercept and 18 | treatment coding, here the interest is only in adjusting the offset} 19 | } 20 | \value{ 21 | a list with components 22 | \item{confModelMatTrim}{A confounder matrix without intercept, with all 23 | levels of factors present. This will be used to trim out taxa that have 24 | zero abundances in any subgroup defined by confounders} 25 | \item{confModelMat}{A confounder matrix with intercept, 26 | and with reference levels for factors absent. 27 | This will be used to fit the model to modify the independence model, 28 | and may include continuous variables} 29 | } 30 | \description{ 31 | A function to build the confounder matrices 32 | } 33 | -------------------------------------------------------------------------------- /man/buildConfMat.character.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_buildConfMat.R 3 | \name{buildConfMat.character} 4 | \alias{buildConfMat.character} 5 | \title{buildConfMat.character} 6 | \usage{ 7 | \method{buildConfMat}{character}(confounders, physeq) 8 | } 9 | \arguments{ 10 | \item{confounders}{a numeric matrix of confounders} 11 | 12 | \item{physeq}{a physeq object with a sample_data slot} 13 | } 14 | \value{ 15 | see buidConfMat.numeric 16 | } 17 | \description{ 18 | buildConfMat.character 19 | } 20 | -------------------------------------------------------------------------------- /man/buildConfMat.data.frame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_buildConfMat.R 3 | \name{buildConfMat.data.frame} 4 | \alias{buildConfMat.data.frame} 5 | \title{buildConfMat.data.frame} 6 | \usage{ 7 | \method{buildConfMat}{data.frame}(confounders, n) 8 | } 9 | \arguments{ 10 | \item{confounders}{a data frame of confounders} 11 | 12 | \item{n}{the number of rows of the count matrix} 13 | } 14 | \value{ 15 | see buidConfMat 16 | } 17 | \description{ 18 | buildConfMat.data.frame 19 | } 20 | -------------------------------------------------------------------------------- /man/buildCovMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_buildCovMat.R 3 | \name{buildCovMat} 4 | \alias{buildCovMat} 5 | \title{A function to build the covariate matrix of the constraints} 6 | \usage{ 7 | buildCovMat(covariates, dat) 8 | } 9 | \arguments{ 10 | \item{covariates}{the covariates, either as dataframe or as character string} 11 | 12 | \item{dat}{the phyloseq object 13 | 14 | In this case we will 1) Include dummy's for every level of the 15 | categorical variable, and force them to sum to zero. 16 | This is needed for plotting and required for 17 | reference level independent normalization. 18 | 2) Exclude an intercept. The density function f() 19 | will provide this already.} 20 | } 21 | \value{ 22 | a list with components 23 | \item{covModelMat}{The model matrix} 24 | \item{datFrame}{The dataframe used to construct the model matrix} 25 | } 26 | \description{ 27 | A function to build the covariate matrix of the constraints 28 | } 29 | -------------------------------------------------------------------------------- /man/buildDesign.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_buildDesign.R 3 | \name{buildDesign} 4 | \alias{buildDesign} 5 | \title{A function to build the design matrix} 6 | \usage{ 7 | buildDesign(sampleScore, responseFun) 8 | } 9 | \arguments{ 10 | \item{sampleScore}{a vector of environmental scores} 11 | 12 | \item{responseFun}{A character string, indicating the shape 13 | of the response function 14 | 15 | For dynamic response function estimation, the same desing matrix 16 | as for the quadratic one is returned. 17 | Will throw an error when an unknown repsonse function is provided} 18 | } 19 | \value{ 20 | A design matrix of dimension n-by-f 21 | } 22 | \description{ 23 | A function to build the design matrix 24 | } 25 | -------------------------------------------------------------------------------- /man/checkAlias.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_checkAlias.R 3 | \name{checkAlias} 4 | \alias{checkAlias} 5 | \title{Check for alias structures in a dataframe, and throw an error when one is found} 6 | \usage{ 7 | checkAlias(datFrame, covariatesNames) 8 | } 9 | \arguments{ 10 | \item{datFrame}{the data frame to be checked for alias structure} 11 | 12 | \item{covariatesNames}{The names of the variables to be considered} 13 | } 14 | \value{ 15 | Throws an error when an alias structure is detected, 16 | returns invisible otherwise 17 | } 18 | \description{ 19 | Check for alias structures in a dataframe, and throw an error when one is found 20 | } 21 | \examples{ 22 | #Make a dataframe with aliased variables 23 | df = data.frame(foo = rnorm(10), baa = rep(c(TRUE, FALSE), each = 5), 24 | foo2 = factor(rep(c("male", "female"), each = 5))) 25 | checkAlias(df, c("foo", "baa")) 26 | #Check test files for the error being thrown 27 | } 28 | -------------------------------------------------------------------------------- /man/constrCorresp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_constrCorresp.R 3 | \name{constrCorresp} 4 | \alias{constrCorresp} 5 | \title{Constrained correspondence analysis with adapted powers} 6 | \usage{ 7 | constrCorresp( 8 | X, 9 | Y, 10 | rowExp, 11 | colExp, 12 | muMarg = outer(rowSums(X), colSums(X))/sum(X) 13 | ) 14 | } 15 | \arguments{ 16 | \item{X}{outcome matrix} 17 | 18 | \item{Y}{constraining matrix} 19 | 20 | \item{rowExp, colExp}{see ?RCM_NB} 21 | 22 | \item{muMarg}{mean matrix under independence model} 23 | } 24 | \value{ 25 | a list with eigenvalues, aliased variables and environmentam gradients 26 | } 27 | \description{ 28 | Constrained correspondence analysis with adapted powers 29 | } 30 | \details{ 31 | the vegan version, adapted for flexible powers rowExp and colExp 32 | } 33 | -------------------------------------------------------------------------------- /man/correctXMissingness.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_correctXMissingness.R 3 | \name{correctXMissingness} 4 | \alias{correctXMissingness} 5 | \title{Replace missing entries in X by their expectation to set their 6 | contribution to the estimating equations to zero} 7 | \usage{ 8 | correctXMissingness(X, mu, allowMissingness, naId) 9 | } 10 | \arguments{ 11 | \item{X}{the matrix of counts} 12 | 13 | \item{mu}{the matrix of expectations} 14 | 15 | \item{allowMissingness}{A boolean, are missing values present} 16 | 17 | \item{naId}{The numeric index of the missing values in X} 18 | } 19 | \value{ 20 | The matrix X with the NA entries replaced by the 21 | corresponding entries in mu 22 | } 23 | \description{ 24 | Replace missing entries in X by their expectation to set their 25 | contribution to the estimating equations to zero 26 | } 27 | \note{ 28 | This may seem like a hacky approach, but it avoids having to deal 29 | with NAs in functions like crossprod(). 30 | } 31 | -------------------------------------------------------------------------------- /man/dLR_nb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dLR_nb.R 3 | \name{dLR_nb} 4 | \alias{dLR_nb} 5 | \title{A function that returns the value of the partial derivative of the 6 | log-likelihood ratio to alpha, keeping the response functions fixed} 7 | \usage{ 8 | dLR_nb( 9 | Alpha, 10 | X, 11 | CC, 12 | responseFun = c("linear", "quadratic", "nonparametric", "dynamic"), 13 | psi, 14 | NB_params, 15 | NB_params_noLab, 16 | d, 17 | alphaK, 18 | k, 19 | centMat, 20 | nLambda, 21 | nLambda1s, 22 | thetaMat, 23 | muMarg, 24 | ncols, 25 | envGradEst, 26 | allowMissingness, 27 | naId, 28 | ... 29 | ) 30 | } 31 | \arguments{ 32 | \item{Alpha}{a vector of length d + k*(2+(k-1)/2), 33 | the environmental gradient plus the lagrangian multipliers} 34 | 35 | \item{X}{the n-by-p count matrix} 36 | 37 | \item{CC}{a n-by-d covariate vector} 38 | 39 | \item{responseFun}{a character string indicating 40 | the type of response function} 41 | 42 | \item{psi}{a scalar, an importance parameter} 43 | 44 | \item{NB_params}{Starting values for the NB_params} 45 | 46 | \item{NB_params_noLab}{Starting values for the NB_params without label} 47 | 48 | \item{d}{an integer, the number of covariate parameters} 49 | 50 | \item{alphaK}{a matrix of environmental gradients of lower dimensions} 51 | 52 | \item{k}{an integer, the current dimension} 53 | 54 | \item{centMat}{a nLambda1s-by-d centering matrix} 55 | 56 | \item{nLambda}{an integer, number of lagrangian multipliers} 57 | 58 | \item{nLambda1s}{an integer, number of centering restrictions} 59 | 60 | \item{thetaMat}{a matrix of size n-by-p with estimated dispersion parameters} 61 | 62 | \item{muMarg}{an n-by-p offset matrix} 63 | 64 | \item{ncols}{a scalar, the number of columns of X} 65 | 66 | \item{envGradEst}{a character string, indicating how the 67 | environmental gradient should be fitted. 68 | 'LR' using the likelihood-ratio criterion, 69 | or 'ML' a full maximum likelihood solution} 70 | 71 | \item{allowMissingness}{A boolean, are missing values present} 72 | 73 | \item{naId}{The numeric index of the missing values in X} 74 | 75 | \item{...}{further arguments passed on to other methods} 76 | } 77 | \value{ 78 | : The value of the lagrangian and the constraining equations 79 | } 80 | \description{ 81 | A function that returns the value of the partial derivative of the 82 | log-likelihood ratio to alpha, keeping the response functions fixed 83 | } 84 | -------------------------------------------------------------------------------- /man/dNBabundsOld.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dNBabunds.R 3 | \name{dNBabundsOld} 4 | \alias{dNBabundsOld} 5 | \title{A score function for the column components of the independence model 6 | (mean relative abundances)} 7 | \usage{ 8 | dNBabundsOld(beta, X, reg, thetas, allowMissingness, naId) 9 | } 10 | \arguments{ 11 | \item{beta}{a vector of length p with current abundance estimates} 12 | 13 | \item{X}{a n-by-p count matrix} 14 | 15 | \item{reg}{a vector of length n with library sizes estimates} 16 | 17 | \item{thetas}{a n-by-p matrix with overdispersion estimates in the rows} 18 | 19 | \item{allowMissingness}{A boolean, are missing values present} 20 | 21 | \item{naId}{The numeric index of the missing values in X} 22 | } 23 | \value{ 24 | a vector of length p with evaluations of the score function 25 | } 26 | \description{ 27 | A score function for the column components of the independence model 28 | (mean relative abundances) 29 | } 30 | -------------------------------------------------------------------------------- /man/dNBlibSizes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dNBlibSizes.R 3 | \name{dNBlibSizes} 4 | \alias{dNBlibSizes} 5 | \title{A score function for the row components of the independence model 6 | (library sizes)} 7 | \usage{ 8 | dNBlibSizes(beta, X, reg, thetas, allowMissingness, naId) 9 | } 10 | \arguments{ 11 | \item{beta}{a vector of length n with current library size estimates} 12 | 13 | \item{X}{a n-by-p count matrix} 14 | 15 | \item{reg}{a vector of length p with relative abundance estimates} 16 | 17 | \item{thetas}{a n-by-p matrix with overdispersion estimates in the rows} 18 | 19 | \item{allowMissingness}{A boolean, are missing values present} 20 | 21 | \item{naId}{The numeric index of the missing values in X} 22 | } 23 | \value{ 24 | a vector of length n with evaluations of the score function 25 | } 26 | \description{ 27 | A score function for the row components of the independence model 28 | (library sizes) 29 | } 30 | -------------------------------------------------------------------------------- /man/dNBllcolNP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dNBllcolNP.R 3 | \name{dNBllcolNP} 4 | \alias{dNBllcolNP} 5 | \title{Estimation of the parameters of a third degree GLM} 6 | \usage{ 7 | dNBllcolNP(beta, X, reg, theta, muMarg, allowMissingness, naId, ...) 8 | } 9 | \arguments{ 10 | \item{beta}{A vector of any length} 11 | 12 | \item{X}{the data vector of length n} 13 | 14 | \item{reg}{a nxlength(beta) regressor matrix} 15 | 16 | \item{theta}{a scalar, the overdispersion} 17 | 18 | \item{muMarg}{the offset of length n} 19 | 20 | \item{allowMissingness}{A boolean, are missing values present} 21 | 22 | \item{naId}{The numeric index of the missing values in X} 23 | 24 | \item{...}{further arguments passed on to the jacobian} 25 | } 26 | \value{ 27 | A vector of the same length as beta with evaluations 28 | of the score function 29 | } 30 | \description{ 31 | Estimation of the parameters of a third degree GLM 32 | } 33 | -------------------------------------------------------------------------------- /man/dNBllcolOld.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dNBllcol.R 3 | \name{dNBllcolOld} 4 | \alias{dNBllcolOld} 5 | \title{A score function for the estimation of the column scores 6 | in an unconstrained RC(M) model} 7 | \usage{ 8 | dNBllcolOld( 9 | beta, 10 | X, 11 | reg, 12 | thetas, 13 | muMarg, 14 | k, 15 | p, 16 | n, 17 | colWeights, 18 | nLambda, 19 | cMatK, 20 | allowMissingness, 21 | naId, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{beta}{vector of length p+1+1+(k-1): p row scores, 27 | 1 centering, one normalization and (k-1) orhtogonality lagrangian multipliers} 28 | 29 | \item{X}{the nxp data matrix} 30 | 31 | \item{reg}{a nx1 regressor matrix: outer product of rowScores and psis} 32 | 33 | \item{thetas}{nxp matrix with the dispersion parameters 34 | (converted to matrix for numeric reasons)} 35 | 36 | \item{muMarg}{the nxp offset} 37 | 38 | \item{k}{an integer, the dimension of the RC solution} 39 | 40 | \item{p}{an integer, the number of taxa} 41 | 42 | \item{n}{an integer, the number of samples} 43 | 44 | \item{colWeights}{the weights used for the restrictions} 45 | 46 | \item{nLambda}{an integer, the number of restrictions} 47 | 48 | \item{cMatK}{the lower dimensions of the colScores} 49 | 50 | \item{allowMissingness}{A boolean, are missing values present} 51 | 52 | \item{naId}{The numeric index of the missing values in X} 53 | 54 | \item{...}{further arguments passed on to the jacobian} 55 | } 56 | \value{ 57 | A vector of length p+1+1+(k-1) with evaluations of the 58 | derivative of lagrangian 59 | } 60 | \description{ 61 | A score function for the estimation of the column scores 62 | in an unconstrained RC(M) model 63 | } 64 | -------------------------------------------------------------------------------- /man/dNBllcol_constr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dNBllcol_constr.R 3 | \name{dNBllcol_constr} 4 | \alias{dNBllcol_constr} 5 | \title{The score function of the response function for 1 taxon at the time} 6 | \usage{ 7 | dNBllcol_constr(betas, X, reg, theta, muMarg, psi, allowMissingness, naId) 8 | } 9 | \arguments{ 10 | \item{betas}{a vector of v parameters of the 11 | response function of a single taxon} 12 | 13 | \item{X}{the count vector of length n} 14 | 15 | \item{reg}{a n-by-v matrix of regressors} 16 | 17 | \item{theta}{The dispersion parameter of this taxon} 18 | 19 | \item{muMarg}{offset of length n} 20 | 21 | \item{psi}{a scalar, the importance parameter} 22 | 23 | \item{allowMissingness}{A boolean, are missing values present} 24 | 25 | \item{naId}{The numeric index of the missing values in X 26 | 27 | Even though this approach does not imply normalization over the parameters 28 | of all taxa, it is very fast and they can be normalized afterwards} 29 | } 30 | \value{ 31 | A vector of length v with the evaluation of the score functions 32 | } 33 | \description{ 34 | The score function of the response function for 1 taxon at the time 35 | } 36 | -------------------------------------------------------------------------------- /man/dNBllcol_constr_noLab.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dNBllcol_constr_noLab.R 3 | \name{dNBllcol_constr_noLab} 4 | \alias{dNBllcol_constr_noLab} 5 | \title{The score function of the general response function} 6 | \usage{ 7 | dNBllcol_constr_noLab( 8 | betas, 9 | X, 10 | reg, 11 | thetasMat, 12 | muMarg, 13 | psi, 14 | allowMissingness, 15 | naId, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{betas}{a vector of regression parameters with length v} 21 | 22 | \item{X}{the nxp data matrix} 23 | 24 | \item{reg}{a matrix of regressors of dimension nxv} 25 | 26 | \item{thetasMat}{A matrix of dispersion parameters} 27 | 28 | \item{muMarg}{offset matrix of dimension nxp} 29 | 30 | \item{psi}{a scalar, the importance parameter} 31 | 32 | \item{allowMissingness}{A boolean, are missing values present} 33 | 34 | \item{naId}{The numeric index of the missing values in X} 35 | 36 | \item{...}{further arguments passed on to the jacobian} 37 | } 38 | \value{ 39 | The evaluation of the score functions (a vector length v) 40 | } 41 | \description{ 42 | The score function of the general response function 43 | } 44 | -------------------------------------------------------------------------------- /man/dNBllrow.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dNBllrow.R 3 | \name{dNBllrow} 4 | \alias{dNBllrow} 5 | \title{A score function of the NB for the row scores} 6 | \usage{ 7 | dNBllrow( 8 | beta, 9 | X, 10 | reg, 11 | thetas, 12 | muMarg, 13 | k, 14 | n, 15 | p, 16 | rowWeights, 17 | nLambda, 18 | rMatK, 19 | allowMissingness, 20 | naId, 21 | ... 22 | ) 23 | } 24 | \arguments{ 25 | \item{beta}{a vector of of length n + k +1 regression parameters to optimize} 26 | 27 | \item{X}{the data matrix of dimensions nxp} 28 | 29 | \item{reg}{a 1xp regressor matrix: outer product of column scores and psis} 30 | 31 | \item{thetas}{nxp matrix with the dispersion parameters 32 | (converted to matrix for numeric reasons)} 33 | 34 | \item{muMarg}{an nxp offset matrix} 35 | 36 | \item{k}{a scalar, the dimension of the RC solution} 37 | 38 | \item{n}{a scalar, the number of samples} 39 | 40 | \item{p}{a scalar, the number of taxa} 41 | 42 | \item{rowWeights}{a vector of length n, the weights used for the restrictions} 43 | 44 | \item{nLambda}{an integer, the number of lagrangian multipliers} 45 | 46 | \item{rMatK}{the lower dimension row scores} 47 | 48 | \item{allowMissingness}{A boolean, are missing values present} 49 | 50 | \item{naId}{The numeric index of the missing values in X} 51 | 52 | \item{...}{Other arguments passed on to the jacobian} 53 | } 54 | \value{ 55 | A vector of length n + k +1 with evaluations of the 56 | derivative of the lagrangian 57 | } 58 | \description{ 59 | A score function of the NB for the row scores 60 | } 61 | -------------------------------------------------------------------------------- /man/dNBpsis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_dNBpsis.R 3 | \name{dNBpsis} 4 | \alias{dNBpsis} 5 | \title{A score function for the psi of a given dimension} 6 | \usage{ 7 | dNBpsis(beta, X, reg, theta, muMarg, allowMissingness, naId, ...) 8 | } 9 | \arguments{ 10 | \item{beta}{a scalar, the initial estimate} 11 | 12 | \item{X}{the n-by-p count matrix} 13 | 14 | \item{reg}{the regressor matrix, the outer product of current row 15 | and column scores} 16 | 17 | \item{theta}{a n-by-p matrix with the dispersion parameters} 18 | 19 | \item{muMarg}{the nxp offset matrix} 20 | 21 | \item{allowMissingness}{A boolean, are missing values present} 22 | 23 | \item{naId}{The numeric index of the missing values in X} 24 | 25 | \item{...}{other arguments passed on to the jacobian} 26 | } 27 | \value{ 28 | The evaluation of the score function at beta, a scalar 29 | } 30 | \description{ 31 | A score function for the psi of a given dimension 32 | } 33 | -------------------------------------------------------------------------------- /man/deviances.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_deviances.R 3 | \name{deviances} 4 | \alias{deviances} 5 | \title{A function to extract deviances for all dimension, 6 | including after filtering on confounders} 7 | \usage{ 8 | deviances(rcm, squaredSum = FALSE) 9 | } 10 | \arguments{ 11 | \item{rcm}{an object of the RCM class} 12 | 13 | \item{squaredSum}{a boolean, should total deviance be returned? 14 | 15 | Total deviances can be deceptive and not correspond to the differences in 16 | log-likelihood. As the dispersion is different for each model. 17 | To compare models it is better to compare likelihoods.} 18 | } 19 | \value{ 20 | If Sum is FALSE, a named array of deviance residuals of the 21 | independence model and all models with dimension 1 to k, including after 22 | filtering on confounders. Otherwise a table with total deviances (the sum of 23 | squared deviance residuals), deviance explained and cumulative deviance 24 | explained. 25 | } 26 | \description{ 27 | A function to extract deviances for all dimension, 28 | including after filtering on confounders 29 | } 30 | -------------------------------------------------------------------------------- /man/ellipseCoord.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_ellipseCoord.R 3 | \name{ellipseCoord} 4 | \alias{ellipseCoord} 5 | \title{A function that returns the coordinates of an ellipse} 6 | \usage{ 7 | ellipseCoord(a, b, c, quadDrop = 0.95, nPoints = 100) 8 | } 9 | \arguments{ 10 | \item{a, b, c}{parameters of the quadratic function a^2x+bx+c} 11 | 12 | \item{quadDrop}{A scalar, fraction of peak height at which to draw 13 | the ellipse} 14 | 15 | \item{nPoints}{an integer, number of points to use to draw the ellipse} 16 | } 17 | \value{ 18 | a matrix with x and y coordinates of the ellipse 19 | } 20 | \description{ 21 | A function that returns the coordinates of an ellipse 22 | } 23 | -------------------------------------------------------------------------------- /man/estDisp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_estDisp.R 3 | \name{estDisp} 4 | \alias{estDisp} 5 | \title{Estimate the overdispersion} 6 | \usage{ 7 | estDisp( 8 | X, 9 | cMat = NULL, 10 | rMat = NULL, 11 | muMarg, 12 | psis, 13 | trended.dispersion = NULL, 14 | prior.df = 10, 15 | dispWeights = NULL, 16 | rowMat = NULL, 17 | allowMissingness = FALSE, 18 | naId 19 | ) 20 | } 21 | \arguments{ 22 | \item{X}{the data matrix of dimensions nxp} 23 | 24 | \item{cMat}{a 1xp colum scores matrix} 25 | 26 | \item{rMat}{a nx1 rowscores matrix, if unconstrained} 27 | 28 | \item{muMarg}{an nxp offset matrix} 29 | 30 | \item{psis}{a scalar, the current psi estimate} 31 | 32 | \item{trended.dispersion}{a vector of length p with pre-calculated 33 | trended.dispersion estimates. They do not vary in function 34 | of the offset anyway} 35 | 36 | \item{prior.df}{an integer, number of degrees of freedom of the prior 37 | for the Bayesian shrinkage} 38 | 39 | \item{dispWeights}{Weights for estimating the dispersion 40 | in a zero-inflated model} 41 | 42 | \item{rowMat}{matrix of row scores in case of constrained ordination} 43 | 44 | \item{allowMissingness}{A boolean, are missing values present} 45 | 46 | \item{naId}{The numeric index of the missing values in X} 47 | } 48 | \value{ 49 | A vector of length p with dispersion estimates 50 | } 51 | \description{ 52 | Estimate the overdispersion 53 | } 54 | \details{ 55 | Information between taxa is shared with empirical Bayes 56 | using the edgeR pacakage, where the time-limiting steps are programmed in C. 57 | } 58 | -------------------------------------------------------------------------------- /man/estNBparams.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_estNBparams.R 3 | \name{estNBparams} 4 | \alias{estNBparams} 5 | \title{A function to estimate the taxon-wise NB-params} 6 | \usage{ 7 | estNBparams( 8 | design, 9 | thetas, 10 | muMarg, 11 | psi, 12 | X, 13 | nleqslv.control, 14 | ncols, 15 | initParam, 16 | v, 17 | dynamic = FALSE, 18 | envRange, 19 | allowMissingness, 20 | naId 21 | ) 22 | } 23 | \arguments{ 24 | \item{design}{an n-by-v design matrix} 25 | 26 | \item{thetas}{a vector of dispersion parameters of length p} 27 | 28 | \item{muMarg}{an offset matrix} 29 | 30 | \item{psi}{a scalar, the importance parameter} 31 | 32 | \item{X}{the data matrix} 33 | 34 | \item{nleqslv.control}{a list of control elements, passed on to nleqslv()} 35 | 36 | \item{ncols}{an integer, the number of columns of X} 37 | 38 | \item{initParam}{a v-by-p matrix of initial parameter estimates} 39 | 40 | \item{v}{an integer, the number of parameters per taxon} 41 | 42 | \item{dynamic}{a boolean, should response function be determined dynamically? 43 | See details} 44 | 45 | \item{envRange}{a vector of length 2, giving the range of observed 46 | environmental scores} 47 | 48 | \item{allowMissingness}{A boolean, are missing values present} 49 | 50 | \item{naId}{The numeric index of the missing values in X 51 | 52 | If dynamic is TRUE, quadratic response functions are fitted for every taxon. 53 | If the optimum falls outside of the observed range of environmental scores, 54 | a linear response function is fitted instead} 55 | } 56 | \value{ 57 | a v-by-p matrix of parameters of the response function 58 | } 59 | \description{ 60 | A function to estimate the taxon-wise NB-params 61 | } 62 | -------------------------------------------------------------------------------- /man/estNBparamsNoLab.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_estNBparamsNoLab.R 3 | \name{estNBparamsNoLab} 4 | \alias{estNBparamsNoLab} 5 | \title{A function to estimate the NB-params ignoring the taxon labels} 6 | \usage{ 7 | estNBparamsNoLab( 8 | design, 9 | thetasMat, 10 | muMarg, 11 | psi, 12 | X, 13 | nleqslv.control, 14 | initParam, 15 | n, 16 | v, 17 | dynamic, 18 | envRange, 19 | preFabMat, 20 | allowMissingness, 21 | naId 22 | ) 23 | } 24 | \arguments{ 25 | \item{design}{an n-by-v design matrix} 26 | 27 | \item{thetasMat}{A matrix of dispersion parameters} 28 | 29 | \item{muMarg}{an offset matrix} 30 | 31 | \item{psi}{a scalar, the importance parameter} 32 | 33 | \item{X}{the data matrix} 34 | 35 | \item{nleqslv.control}{a list of control elements, passed on to nleqslv()} 36 | 37 | \item{initParam}{a vector of length v of initial parameter estimates} 38 | 39 | \item{n}{an integer, the number of samples} 40 | 41 | \item{v}{an integer, the number of parameters per taxon} 42 | 43 | \item{dynamic}{a boolean, should response function be determined dynamically? 44 | See details} 45 | 46 | \item{envRange}{a vector of length 2, 47 | giving the range of observed environmental scores} 48 | 49 | \item{preFabMat}{a pre-fabricated auxiliary matrix} 50 | 51 | \item{allowMissingness}{A boolean, are missing values present} 52 | 53 | \item{naId}{The numeric index of the missing values in X 54 | 55 | If dynamic is TRUE, quadratic response functions are fitted for every taxon. 56 | If the optimum falls outside of the observed range of environmental scores, 57 | a linear response function is fitted instead} 58 | } 59 | \value{ 60 | a v-by-p matrix of parameters of the response function 61 | } 62 | \description{ 63 | A function to estimate the NB-params ignoring the taxon labels 64 | } 65 | -------------------------------------------------------------------------------- /man/estNPresp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_estNPresp.R 3 | \name{estNPresp} 4 | \alias{estNPresp} 5 | \title{Estimate the taxon-wise response functions non-parametrically} 6 | \usage{ 7 | estNPresp( 8 | sampleScore, 9 | muMarg, 10 | X, 11 | ncols, 12 | thetas, 13 | n, 14 | coefInit, 15 | coefInitOverall, 16 | dfSpline, 17 | vgamMaxit, 18 | degree, 19 | verbose, 20 | allowMissingness, 21 | naId, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{sampleScore}{a vector of length n with environmental scores} 27 | 28 | \item{muMarg}{the offset matrix} 29 | 30 | \item{X}{the n-by-p data matrix} 31 | 32 | \item{ncols}{an integer, the number of columns of X} 33 | 34 | \item{thetas}{a vector of length p with dispersion parameters} 35 | 36 | \item{n}{an integer, the number of samples} 37 | 38 | \item{coefInit}{a 2-by-p matrix with current taxon-wise parameter estimates} 39 | 40 | \item{coefInitOverall}{a vector of length 2 with current overall parameters} 41 | 42 | \item{dfSpline}{a scalar, the degrees of freedom for the smoothing spline.} 43 | 44 | \item{vgamMaxit}{Maximal number of iterations in the fitting of the GAM model} 45 | 46 | \item{degree}{The degree if the parametric fit if the VGAM fit fails} 47 | 48 | \item{verbose}{a boolean, should number of failed fits be reported} 49 | 50 | \item{allowMissingness}{A boolean, are missing values present} 51 | 52 | \item{naId}{The numeric index of the missing values in X} 53 | 54 | \item{...}{further arguments, passed on to the VGAM:::vgam() function 55 | 56 | The negative binomial likelihood is still maximized, 57 | but now the response function is a non-parametric one. 58 | To avoid a perfect fit and overly flexible functions, 59 | we enforce smoothness restrictions. In practice we use a 60 | generalized additive model (GAM), i.e. with splines. 61 | The same fitting procedure is carried out ignoring species labels. 62 | We do not normalize the parameters related to the splines: 63 | the psis can be calculated afterwards.} 64 | } 65 | \value{ 66 | A list with components 67 | \item{taxonCoef}{The fitted coefficients of the sample-wise response curves} 68 | \item{splinesList}{A list of all the B-spline objects} 69 | \item{rowMar}{The row matrix} 70 | \item{overall}{The overall fit ignoring taxon labels, 71 | as a list of coefficients and a spline} 72 | \item{rowVecOverall}{The overall row vector, ignoring taxon labels} 73 | } 74 | \description{ 75 | Estimate the taxon-wise response functions non-parametrically 76 | } 77 | -------------------------------------------------------------------------------- /man/extractCoord.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_extractCoord.R 3 | \name{extractCoord} 4 | \alias{extractCoord} 5 | \title{A function to extract plotting coordinates, either for plot.RCM 6 | or to export to other plotting software} 7 | \usage{ 8 | extractCoord(RCM, Dim = c(1, 2)) 9 | } 10 | \arguments{ 11 | \item{RCM}{an RCm object} 12 | 13 | \item{Dim}{an integer vector of required dimensions 14 | 15 | The parameters for the ellipses of the quadratic response function come 16 | from the parametrization f(x) = a*x^2 + b*x + c 17 | For an unconstrained object the row and column coordinates are returned 18 | in separate matrices. The row names will correspond to the labels. 19 | For a constrained analysis also the variable points are returned. 20 | All variables still need to be scaled to optimally fill the available space} 21 | } 22 | \value{ 23 | A list with components 24 | \item{samples}{A dataframe of sample scores} 25 | \item{species}{A dataframe of column scores, with origin, slope, 26 | end and ellipse coordinates as needed} 27 | \item{variables}{A dataframe of variable scores, 28 | loadings of the environmental gradient} 29 | } 30 | \description{ 31 | A function to extract plotting coordinates, either for plot.RCM 32 | or to export to other plotting software 33 | } 34 | \examples{ 35 | data(Zeller) 36 | require(phyloseq) 37 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 38 | prune_samples(sample_names(Zeller)[1:50], Zeller)) 39 | zellerRCM = RCM(tmpPhy, k = 2, round = TRUE) 40 | coordsZeller = extractCoord(zellerRCM) 41 | } 42 | -------------------------------------------------------------------------------- /man/extractE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_extractE.R 3 | \name{extractE} 4 | \alias{extractE} 5 | \title{A function to extract a matrix of expected values 6 | for any dimension of the fit} 7 | \usage{ 8 | extractE(rcm, Dim = rcm$k) 9 | } 10 | \arguments{ 11 | \item{rcm}{an object of class RCM} 12 | 13 | \item{Dim}{the desired dimension. Defaults to the maximum of the fit. 14 | Choose 0 for the independence model, 0.5 for the confounders filter model.} 15 | } 16 | \value{ 17 | The matrix of expected values 18 | } 19 | \description{ 20 | A function to extract a matrix of expected values 21 | for any dimension of the fit 22 | } 23 | -------------------------------------------------------------------------------- /man/filterConfounders.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_filterConfounders.R 3 | \name{filterConfounders} 4 | \alias{filterConfounders} 5 | \title{Filters out the effect of known confounders. This is done by fitting 6 | interactions of every taxon with the levels of the confounders. 7 | It returns a modified offset matrix for the remainder 8 | of the fitting procedure.} 9 | \usage{ 10 | filterConfounders( 11 | muMarg, 12 | confMat, 13 | X, 14 | thetas, 15 | p, 16 | n, 17 | nleqslv.control, 18 | trended.dispersion, 19 | tol = 0.001, 20 | maxIt = 20, 21 | allowMissingness, 22 | naId 23 | ) 24 | } 25 | \arguments{ 26 | \item{muMarg}{a nxp matrix, the current offset} 27 | 28 | \item{confMat}{a nxt confounder matrix} 29 | 30 | \item{X}{the nxp data matrix} 31 | 32 | \item{thetas}{a vector of length p with the current dispersion estimates} 33 | 34 | \item{p}{an integer, the number of columns of X} 35 | 36 | \item{n}{an integer, the number of rows of X} 37 | 38 | \item{nleqslv.control}{see nleqslv()} 39 | 40 | \item{trended.dispersion}{a vector of length p 41 | with trended dispersion estimates} 42 | 43 | \item{tol}{a scalar, the convergence tolerance} 44 | 45 | \item{maxIt}{maximum number of iterations} 46 | 47 | \item{allowMissingness}{A boolean, are missing values present} 48 | 49 | \item{naId}{The numeric index of the missing values in X 50 | 51 | Fits the negative binomial mean parameters and overdispersion parameters 52 | iteratively. 53 | Convergence is determined based on the L2-norm 54 | of the absolute change of mean parameters} 55 | } 56 | \value{ 57 | a list with components: 58 | \item{thetas}{new theta estimates} 59 | \item{NB_params}{The estimated parameters of the interaction terms} 60 | } 61 | \description{ 62 | Filters out the effect of known confounders. This is done by fitting 63 | interactions of every taxon with the levels of the confounders. 64 | It returns a modified offset matrix for the remainder 65 | of the fitting procedure. 66 | } 67 | -------------------------------------------------------------------------------- /man/getDevMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getDevMat.R 3 | \name{getDevMat} 4 | \alias{getDevMat} 5 | \title{ACalculate the matrix of deviance residuals} 6 | \usage{ 7 | getDevMat(X, thetaMat, mu) 8 | } 9 | \arguments{ 10 | \item{X}{the data matrix} 11 | 12 | \item{thetaMat}{the matrix of dispersions} 13 | 14 | \item{mu}{the matrix of means} 15 | } 16 | \value{ 17 | The matrix of deviance residuals 18 | } 19 | \description{ 20 | ACalculate the matrix of deviance residuals 21 | } 22 | -------------------------------------------------------------------------------- /man/getDevianceRes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getDevianceRes.R 3 | \name{getDevianceRes} 4 | \alias{getDevianceRes} 5 | \title{A function to calculate the matrix of deviance residuals.} 6 | \usage{ 7 | getDevianceRes(RCM, Dim = RCM$k) 8 | } 9 | \arguments{ 10 | \item{RCM}{an RCM object} 11 | 12 | \item{Dim}{The dimensions to use 13 | 14 | For the deviance residuals we use the overdispersions from the reduced model. 15 | Standard dimensions used are only first and second, 16 | since these are also plotted} 17 | } 18 | \value{ 19 | A matrix with deviance residuals of the same size 20 | as the original data matrix 21 | } 22 | \description{ 23 | A function to calculate the matrix of deviance residuals. 24 | } 25 | \examples{ 26 | data(Zeller) 27 | require(phyloseq) 28 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:120], 29 | prune_samples(sample_names(Zeller)[1:75], Zeller)) 30 | #Subset for a quick fit 31 | zellerRCM = RCM(tmpPhy, k = 2, round = TRUE, prevCutOff = 0.03) 32 | devRes = getDevianceRes(zellerRCM) 33 | } 34 | -------------------------------------------------------------------------------- /man/getDistCoord.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getDistCoord.R 3 | \name{getDistCoord} 4 | \alias{getDistCoord} 5 | \title{Get coordinates of a distance object of n observations for the provided indices} 6 | \usage{ 7 | getDistCoord(indices, n) 8 | } 9 | \arguments{ 10 | \item{indices}{The row indices for which distance indices are wanted} 11 | 12 | \item{n}{The total number of objects in the distance matrix} 13 | } 14 | \value{ 15 | a vector of coordinates 16 | } 17 | \description{ 18 | Get coordinates of a distance object of n observations for the provided indices 19 | } 20 | -------------------------------------------------------------------------------- /man/getInflCol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getInflCol.R 3 | \name{getInflCol} 4 | \alias{getInflCol} 5 | \title{A function to extract the influence for a given parameter index} 6 | \usage{ 7 | getInflCol(score, InvJac, taxon) 8 | } 9 | \arguments{ 10 | \item{score}{a score matrix} 11 | 12 | \item{InvJac}{The inverted jacobian} 13 | 14 | \item{taxon}{The taxon name or index} 15 | } 16 | \value{ 17 | A matrix with all observations' influence on the given taxon 18 | } 19 | \description{ 20 | A function to extract the influence for a given parameter index 21 | } 22 | -------------------------------------------------------------------------------- /man/getInflRow.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getInflRow.R 3 | \name{getInflRow} 4 | \alias{getInflRow} 5 | \title{Extract the influence of all observations on a given row score} 6 | \usage{ 7 | getInflRow(score, InvJac, sample) 8 | } 9 | \arguments{ 10 | \item{score}{the score function evaluated for every observation} 11 | 12 | \item{InvJac}{The inverse jacobian} 13 | 14 | \item{sample}{the row score or sample index} 15 | } 16 | \value{ 17 | A matrix with all observations' influence on the row score 18 | } 19 | \description{ 20 | Extract the influence of all observations on a given row score 21 | } 22 | -------------------------------------------------------------------------------- /man/getInt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getInt.R 3 | \name{getInt} 4 | \alias{getInt} 5 | \title{Integrate the spline of an vgam object} 6 | \usage{ 7 | getInt(coef, spline, sampleScore, stop.on.error = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{coef}{A vector of coefficients} 11 | 12 | \item{spline}{The cubic smoothing spline} 13 | 14 | \item{sampleScore}{the observed environmental scores} 15 | 16 | \item{stop.on.error}{see ?integrate} 17 | 18 | \item{...}{additional arguments passed on to integrate()} 19 | } 20 | \value{ 21 | a scalar, the value of the integral 22 | } 23 | \description{ 24 | Integrate the spline of an vgam object 25 | } 26 | -------------------------------------------------------------------------------- /man/getLogLik.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getLogLik.R 3 | \name{getLogLik} 4 | \alias{getLogLik} 5 | \title{Extract the logged likelihood of every count} 6 | \usage{ 7 | getLogLik(rcm, Dim) 8 | } 9 | \arguments{ 10 | \item{rcm}{an RCM object} 11 | 12 | \item{Dim}{A vector of integers indicating which dimensions to take along, 13 | or Inf for the saturated model, or 0 for the independence model} 14 | } 15 | \value{ 16 | A matrix with logged likelihood of the size of the data matrix 17 | } 18 | \description{ 19 | Extract the logged likelihood of every count 20 | } 21 | -------------------------------------------------------------------------------- /man/getModelMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getModelMat.R 3 | \name{getModelMat} 4 | \alias{getModelMat} 5 | \title{A function to construct a model matrix of a certain degree} 6 | \usage{ 7 | getModelMat(y, degree) 8 | } 9 | \arguments{ 10 | \item{y}{the variable} 11 | 12 | \item{degree}{the degree} 13 | } 14 | \value{ 15 | A model matrix with degree+1 columns and as many rows as lenght(y) 16 | } 17 | \description{ 18 | A function to construct a model matrix of a certain degree 19 | } 20 | -------------------------------------------------------------------------------- /man/getRowMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_getRowMat.R 3 | \name{getRowMat} 4 | \alias{getRowMat} 5 | \title{Return a matrix of row scores} 6 | \usage{ 7 | getRowMat(sampleScore, responseFun, NB_params, taxonCoef, spline) 8 | } 9 | \arguments{ 10 | \item{sampleScore}{a vector of length n with sample scores} 11 | 12 | \item{responseFun}{a character string, the type of response function, 13 | either 'linear' or 'quadratic'} 14 | 15 | \item{NB_params}{a v-by-p matrix of parameters of theresponse function} 16 | 17 | \item{taxonCoef}{A vector of coefficients} 18 | 19 | \item{spline}{The cubic smoothing spline 20 | 21 | Multiplying the old offset with the exponent matrix times 22 | the importance parameter obtains the new one based on lower dimension} 23 | } 24 | \value{ 25 | a n-by-p matrix of scores 26 | } 27 | \description{ 28 | Return a matrix of row scores 29 | } 30 | -------------------------------------------------------------------------------- /man/heq_nb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_heq_nb.R 3 | \name{heq_nb} 4 | \alias{heq_nb} 5 | \title{Define linear equality constraints for env. gradient} 6 | \usage{ 7 | heq_nb(Alpha, alphaK, d, k, centMat, ...) 8 | } 9 | \arguments{ 10 | \item{Alpha}{the current estimate of the environmental gradient} 11 | 12 | \item{alphaK}{a matrix with the environmental gradients 13 | of the lower dimensions} 14 | 15 | \item{d}{an integer, the number of environmental variables, including dummies} 16 | 17 | \item{k}{an integer, the current dimension} 18 | 19 | \item{centMat}{a centering matrix} 20 | 21 | \item{...}{further arguments for other methods, not needed in this one 22 | 23 | The centering matrix centMat ensures that the parameters of the dummies 24 | of the same categorical variable sum to zero} 25 | } 26 | \value{ 27 | a vector of with current values of the constraints, 28 | should evolve to zeroes only 29 | } 30 | \description{ 31 | Define linear equality constraints for env. gradient 32 | } 33 | -------------------------------------------------------------------------------- /man/heq_nb_jac.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_heq_nb_jac.R 3 | \name{heq_nb_jac} 4 | \alias{heq_nb_jac} 5 | \title{The jacobian of the linear equality constraints} 6 | \usage{ 7 | heq_nb_jac(Alpha, alphaK, d, k, centMat, ...) 8 | } 9 | \arguments{ 10 | \item{Alpha}{the current estimate of the environmental gradient} 11 | 12 | \item{alphaK}{a matrix with the environmental gradients 13 | of the lower dimensions} 14 | 15 | \item{d}{an integer, the number of environmental variables, 16 | including dummies} 17 | 18 | \item{k}{an integer, the current dimension} 19 | 20 | \item{centMat}{a centering matrix} 21 | 22 | \item{...}{further arguments for other methods, not needed in this one} 23 | } 24 | \value{ 25 | The jacobian matrix 26 | } 27 | \description{ 28 | The jacobian of the linear equality constraints 29 | } 30 | -------------------------------------------------------------------------------- /man/indentPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_indentPlot.R 3 | \name{indentPlot} 4 | \alias{indentPlot} 5 | \title{Functions to indent the plot to include the entire labels} 6 | \usage{ 7 | indentPlot(plt, xInd = 0, yInd = 0) 8 | } 9 | \arguments{ 10 | \item{plt}{a ggplot object} 11 | 12 | \item{xInd}{a scalar or a vector of length 2, 13 | specifying the indentation left and right of the plot to allow for the labels 14 | to be printed entirely} 15 | 16 | \item{yInd}{a a scalar or a vector of length 2, 17 | specifying the indentation top and bottom of the plot 18 | to allow for the labels to be printed entirely} 19 | } 20 | \value{ 21 | a ggplot object, squared 22 | } 23 | \description{ 24 | Functions to indent the plot to include the entire labels 25 | } 26 | -------------------------------------------------------------------------------- /man/inertia.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_inertia.R 3 | \name{inertia} 4 | \alias{inertia} 5 | \title{Calculate the log-likelihoods of all possible models} 6 | \usage{ 7 | inertia(rcm) 8 | } 9 | \arguments{ 10 | \item{rcm}{an object of the RCM class} 11 | } 12 | \value{ 13 | A table with inertias, proportion inertia explained 14 | and cumulative proportion of inertia explained. 15 | } 16 | \description{ 17 | Calculate the log-likelihoods of all possible models 18 | } 19 | \examples{ 20 | data(Zeller) 21 | require(phyloseq) 22 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 23 | prune_samples(sample_names(Zeller)[1:50], Zeller)) 24 | zellerRCM = RCM(tmpPhy, round = TRUE) 25 | inertia(zellerRCM) 26 | } 27 | -------------------------------------------------------------------------------- /man/liks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_liks.R 3 | \name{liks} 4 | \alias{liks} 5 | \title{Calculate the log-likelihoods of all possible models} 6 | \usage{ 7 | liks(rcm, Sum = TRUE) 8 | } 9 | \arguments{ 10 | \item{rcm}{an object of the RCM class} 11 | 12 | \item{Sum}{a boolean, should log-likelihoods be summed?} 13 | } 14 | \value{ 15 | If Sum is FALSE, a named array log-likelihoods 16 | of the independence model and all models with dimension 1 to k, 17 | including after filtering on confounders. 18 | Otherwise a table with log-likelihoods, 19 | deviance explained and cumulative deviance explained. 20 | } 21 | \description{ 22 | Calculate the log-likelihoods of all possible models 23 | } 24 | \examples{ 25 | data(Zeller) 26 | require(phyloseq) 27 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 28 | prune_samples(sample_names(Zeller)[1:50], Zeller)) 29 | zellerRCM = RCM(tmpPhy, round = TRUE) 30 | liks(zellerRCM) 31 | } 32 | -------------------------------------------------------------------------------- /man/permanova.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_permanova.R 3 | \name{permanova} 4 | \alias{permanova} 5 | \title{Perform a PERMANOVA analysis for group differences of a predefined cofactor using the pseudo F-statistic} 6 | \usage{ 7 | permanova( 8 | rcmObj, 9 | groups, 10 | nPerm = 10000, 11 | Dim = seq_len(rcmObj$k), 12 | verbose = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{rcmObj}{an RCM object} 17 | 18 | \item{groups}{a factor of length n with cluster memberships, or a name of a variable contained in the RCM object} 19 | 20 | \item{nPerm}{Number of permutations in the PERMANOVA} 21 | 22 | \item{Dim}{Dimensions on which the test should be performed. Defaults to all dimensions of the fitted RCM object.} 23 | 24 | \item{verbose}{a boolean, should output be printed?} 25 | } 26 | \value{ 27 | A list with components 28 | \item{statistic}{The pseudo F-statistic} 29 | \item{p.value}{The p-value of the PERMANOVA} 30 | } 31 | \description{ 32 | Perform a PERMANOVA analysis for group differences of a predefined cofactor using the pseudo F-statistic 33 | } 34 | \examples{ 35 | data(Zeller) 36 | require(phyloseq) 37 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 38 | prune_samples(sample_names(Zeller)[1:50], Zeller)) 39 | zellerRCM = RCM(tmpPhy, round = TRUE) 40 | zellerPermanova = permanova(zellerRCM, "Diagnosis") 41 | } 42 | \seealso{ 43 | \code{\link{RCM}} 44 | } 45 | -------------------------------------------------------------------------------- /man/plotRespFun.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_plotRespFun.R 3 | \name{plotRespFun} 4 | \alias{plotRespFun} 5 | \title{Plot the non-parametric response functions} 6 | \usage{ 7 | plotRespFun( 8 | RCM, 9 | taxa = NULL, 10 | type = "link", 11 | logTransformYAxis = FALSE, 12 | addSamples = TRUE, 13 | samSize = NULL, 14 | Dim = 1L, 15 | nPoints = 100L, 16 | labSize = 2.5, 17 | yLocVar = NULL, 18 | yLocSam = NULL, 19 | Palette = "Set3", 20 | addJitter = FALSE, 21 | nTaxa = 9L, 22 | angle = 90, 23 | legendLabSize = 15, 24 | legendTitleSize = 16, 25 | axisLabSize = 14, 26 | axisTitleSize = 16, 27 | lineSize = 0.75, 28 | ... 29 | ) 30 | } 31 | \arguments{ 32 | \item{RCM}{an RCM object} 33 | 34 | \item{taxa}{a character vector of taxa to be plotted} 35 | 36 | \item{type}{a character string, plot the response function on the log-scale 37 | ('link') or the abundance scale 'response', similar to predict.glm().} 38 | 39 | \item{logTransformYAxis}{a boolean, should y-axis be log transformed?} 40 | 41 | \item{addSamples}{a boolean, should sample points be shown?} 42 | 43 | \item{samSize}{a sample variable name or a vector of length equal to the 44 | number of samples, for the sample sizes} 45 | 46 | \item{Dim}{An integer, the dimension to be plotted} 47 | 48 | \item{nPoints}{the number of points to be used to plot the lines} 49 | 50 | \item{labSize}{the label size for the variables} 51 | 52 | \item{yLocVar}{the y-location of the variables, recycled if necessary} 53 | 54 | \item{yLocSam}{the y-location of the samples, recycled if necessary} 55 | 56 | \item{Palette}{which color palette to use} 57 | 58 | \item{addJitter}{A boolean, should variable names be jittered to make 59 | them more readable} 60 | 61 | \item{nTaxa}{an integer, number of taxa to plot} 62 | 63 | \item{angle}{angle at which variable labels should be turned} 64 | 65 | \item{legendLabSize}{size of the legend labels} 66 | 67 | \item{legendTitleSize}{size of the legend title} 68 | 69 | \item{axisLabSize}{size of the axis labels} 70 | 71 | \item{axisTitleSize}{size of the axis title} 72 | 73 | \item{lineSize}{size of the response function lines} 74 | 75 | \item{...}{Other argumens passed on to the ggplot() function} 76 | } 77 | \value{ 78 | Plots a ggplot2-object to output 79 | } 80 | \description{ 81 | Plots a number of response functions over the observed range of 82 | the environmental score. If no taxa are provided those who react most 83 | strongly to the environmental score are chosen. 84 | } 85 | \examples{ 86 | data(Zeller) 87 | require(phyloseq) 88 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100], 89 | prune_samples(sample_names(Zeller)[1:50], Zeller)) 90 | #Subset for a quick fit 91 | zellerRCMnp = RCM(tmpPhy, k = 2, 92 | covariates = c('BMI','Age','Country','Diagnosis','Gender'), 93 | round = TRUE, responseFun = 'nonparametric') 94 | plotRespFun(zellerRCMnp) 95 | } 96 | \seealso{ 97 | \code{\link{RCM}}, \code{\link{plot.RCM}},\code{\link{residualPlot}} 98 | } 99 | -------------------------------------------------------------------------------- /man/residualPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_residualPlot.R 3 | \name{residualPlot} 4 | \alias{residualPlot} 5 | \title{Make residual plots} 6 | \usage{ 7 | residualPlot( 8 | RCM, 9 | Dim = 1, 10 | whichTaxa = "response", 11 | resid = "Deviance", 12 | numTaxa = 9, 13 | mfrow = NULL, 14 | samColour = NULL, 15 | samShape = NULL, 16 | legendLabSize = 15, 17 | legendTitleSize = 16, 18 | axisLabSize = 14, 19 | axisTitleSize = 16, 20 | taxTitle = TRUE, 21 | h = 0 22 | ) 23 | } 24 | \arguments{ 25 | \item{RCM}{an RCM object} 26 | 27 | \item{Dim}{an integer, which dimension?} 28 | 29 | \item{whichTaxa}{a character string or a character vector, 30 | for which taxa to plot the diagnostic plots} 31 | 32 | \item{resid}{the type of residuals to use, either 'Deviance' or 'Pearson'} 33 | 34 | \item{numTaxa}{an integer, the number of taxa to plot} 35 | 36 | \item{mfrow}{passed on to par(). 37 | If not supplied will be calculated based on numTaxa} 38 | 39 | \item{samColour, samShape}{Vectors or character strings denoting 40 | the sample colour and shape respectively. If character string is provided, 41 | the variables with this name is extracted from the phyloseq object in RCM} 42 | 43 | \item{legendLabSize}{size of the legend labels} 44 | 45 | \item{legendTitleSize}{size of the legend title} 46 | 47 | \item{axisLabSize}{size of the axis labels} 48 | 49 | \item{axisTitleSize}{size of the axis title} 50 | 51 | \item{taxTitle}{A boolean, should taxon title be printed} 52 | 53 | \item{h}{Position of reference line. Set to NA for no line} 54 | } 55 | \value{ 56 | Plots a ggplot2-object to output 57 | } 58 | \description{ 59 | Make residual plots 60 | } 61 | \details{ 62 | If whichTaxa is 'run' or 'response' the taxa with the highest 63 | run statistics or steepest slopes of the response function are plotted, 64 | numTax indicates the number. If whichTaxa is a character vector, 65 | these are interpreted as taxon names to plot. 66 | This function is mainly meant for linear response functions, 67 | but can be used for others too. 68 | The runs test statistic from the tseries package is used. 69 | } 70 | \examples{ 71 | data(Zeller) 72 | require(phyloseq) 73 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:120], 74 | prune_samples(sample_names(Zeller)[1:75], Zeller)) 75 | #Subset for a quick fit 76 | zellerRCMlin = RCM(tmpPhy, k = 2, 77 | covariates = c('BMI','Age','Country','Diagnosis','Gender'), 78 | responseFun = 'linear', round = TRUE, prevCutOff = 0.03) 79 | residualPlot(zellerRCMlin) 80 | } 81 | \seealso{ 82 | \code{\link{RCM}} 83 | } 84 | -------------------------------------------------------------------------------- /man/respFunJacMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_respFunJacMat.R 3 | \name{respFunJacMat} 4 | \alias{respFunJacMat} 5 | \title{Calculates the Jacobian of the parametric response functions} 6 | \usage{ 7 | respFunJacMat( 8 | betas, 9 | X, 10 | reg, 11 | thetaMat, 12 | muMarg, 13 | psi, 14 | v, 15 | p, 16 | IDmat, 17 | IndVec, 18 | allowMissingness, 19 | naId 20 | ) 21 | } 22 | \arguments{ 23 | \item{betas}{a vector of length (deg+1)*(p+1) with regression parameters 24 | with deg the degree of the response function and the lagrangian multipliers} 25 | 26 | \item{X}{the nxp data matrix} 27 | 28 | \item{reg}{a vector of regressors with the dimension n-by-v} 29 | 30 | \item{thetaMat}{The n-by-p matrix with dispersion parameters} 31 | 32 | \item{muMarg}{offset matrix of size nxp} 33 | 34 | \item{psi}{a scalar, the importance parameter} 35 | 36 | \item{v}{an integer, one plus the degree of the response function} 37 | 38 | \item{p}{an integer, the number of taxa} 39 | 40 | \item{IDmat}{an logical matrix with indices of non-zero elements} 41 | 42 | \item{IndVec}{a vector with indices with non-zero elements} 43 | 44 | \item{allowMissingness}{A boolean, are missing values present} 45 | 46 | \item{naId}{The numeric index of the missing values in X} 47 | } 48 | \value{ 49 | The jacobian, a square matrix of dimension (deg+1)*(p+1) 50 | } 51 | \description{ 52 | Calculates the Jacobian of the parametric response functions 53 | } 54 | -------------------------------------------------------------------------------- /man/respFunScoreMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_respFunScoreMat.R 3 | \name{respFunScoreMat} 4 | \alias{respFunScoreMat} 5 | \title{Derivative of the Lagrangian of the parametric response function} 6 | \usage{ 7 | respFunScoreMat( 8 | betas, 9 | X, 10 | reg, 11 | thetaMat, 12 | muMarg, 13 | psi, 14 | p, 15 | v, 16 | allowMissingness, 17 | naId, 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{betas}{a vector of length (deg+1)*(p+1) with regression parameters with 23 | deg the degree of the response function and the lagrangian multipliers} 24 | 25 | \item{X}{the nxp data matrix} 26 | 27 | \item{reg}{a matrix of regressors with the dimension nx(deg+1)} 28 | 29 | \item{thetaMat}{The n-by-p matrix with dispersion parameters} 30 | 31 | \item{muMarg}{offset matrix of size nxp} 32 | 33 | \item{psi}{a scalar, the importance parameter} 34 | 35 | \item{p}{an integer, the number of taxa} 36 | 37 | \item{v}{an integer, one plus the degree of the response function} 38 | 39 | \item{allowMissingness}{A boolean, are missing values present} 40 | 41 | \item{naId}{The numeric index of the missing values in X} 42 | 43 | \item{...}{further arguments passed on to the jacobian 44 | 45 | The parameters are restricted to be normalized, i.e. all squared intercepts, 46 | first order and second order parameters sum to 1} 47 | } 48 | \value{ 49 | The evaluation of the score functions, a vector of length (p+1)* 50 | (deg+1) 51 | } 52 | \description{ 53 | Derivative of the Lagrangian of the parametric response function 54 | } 55 | -------------------------------------------------------------------------------- /man/rowMultiply.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_rowMultiply.R 3 | \name{rowMultiply} 4 | \alias{rowMultiply} 5 | \title{A function to efficiently row multiply a matrix and a vector} 6 | \usage{ 7 | rowMultiply(matrix, vector) 8 | } 9 | \arguments{ 10 | \item{matrix}{a numeric matrix of dimension a-by-b} 11 | 12 | \item{vector}{a numeric vector of length b 13 | 14 | t(t(matrix)*vector) but then faster} 15 | } 16 | \value{ 17 | a matrix, row multplied by the vector 18 | } 19 | \description{ 20 | A function to efficiently row multiply a matrix and a vector 21 | } 22 | \details{ 23 | Memory intensive but that does not matter with given matrix sizes 24 | } 25 | -------------------------------------------------------------------------------- /man/seq_k.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_seq_k.R 3 | \name{seq_k} 4 | \alias{seq_k} 5 | \title{A small auxiliary function for the length of the lambdas} 6 | \usage{ 7 | seq_k(y, nLambda1s = 1) 8 | } 9 | \arguments{ 10 | \item{y}{an integer, the current dimension} 11 | 12 | \item{nLambda1s}{the number of centering restrictions} 13 | } 14 | \value{ 15 | a vector containing the ranks of the current lagrangian multipliers 16 | } 17 | \description{ 18 | A small auxiliary function for the length of the lambdas 19 | } 20 | -------------------------------------------------------------------------------- /man/trimOnConfounders.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/F_trimOnConfounders.R 3 | \name{trimOnConfounders} 4 | \alias{trimOnConfounders} 5 | \title{Trim based on confounders to avoid taxa with only zero counts} 6 | \usage{ 7 | trimOnConfounders(confounders, X, prevCutOff, minFraction, n) 8 | } 9 | \arguments{ 10 | \item{confounders}{a nxt confounder matrix} 11 | 12 | \item{X}{the nxp data matrix} 13 | 14 | \item{prevCutOff}{a scalar between 0 and 1, the prevalence cut off} 15 | 16 | \item{minFraction}{a scalar between 0 and 1, 17 | each taxon's total abundance should equal at least the number of samples n 18 | times minFraction, otherwise it is trimmed} 19 | 20 | \item{n}{the number of samples 21 | 22 | Should be called prior to fitting the independence model} 23 | } 24 | \value{ 25 | A trimmed data matrix nxp' 26 | } 27 | \description{ 28 | Trim based on confounders to avoid taxa with only zero counts 29 | } 30 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(RCM) 3 | 4 | test_check("RCM") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-ArrayProd.R: -------------------------------------------------------------------------------- 1 | x = array(c(1,2,5,3,4,9,5,2,5,4,5,6,4,1,1,2,1,3,5,1,4,5,6,9), dim = c(2,3,4)) 2 | y = c(1,1,2,4) 3 | 4 | context("Array product") 5 | 6 | test_that("Fast array product yields correct outcome", { 7 | expect_equal( 8 | RCM:::arrayprod(x = x, y = y), 9 | apply(x, c(1,2), function(z, y){sum(z*y)}, y = y)) 10 | } 11 | ) 12 | -------------------------------------------------------------------------------- /tests/testthat/test-RCMinput.R: -------------------------------------------------------------------------------- 1 | context("RCM input") 2 | 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(100)], 4 | prune_samples(sample_names(Zeller)[seq_len(50)], Zeller)) 5 | 6 | test_that("RCM throws warnings for integer variables", { 7 | expect_warning(RCM(tmpPhy, covariates = c("Age","Diagnosis"), k = 1)) 8 | }) 9 | 10 | test_that("RCM throws errors for wrong input type", { 11 | expect_error(RCM("Zeller", covariates = c("Diagnosis", "Country", "Gender"), k = 1)) 12 | }) 13 | 14 | test_that("RCM throws errors when only one covariate with one level supplied", { 15 | expect_error(suppressWarnings(RCM(Zeller, covariates = "Age", k = 1))) 16 | }) 17 | 18 | test_that("RCM throws warning when less covariate combinations than samples supplied", { 19 | expect_warning(RCM(Zeller, covariates = c("Diagnosis", "Country"), k = 1)) 20 | }) 21 | 22 | test_that("RCM throws errors when NAs present in data matrix", { 23 | expect_error(RCM(matrix(c(1,2,3,NA),2,2), covariates = "Age", k = 1)) 24 | }) 25 | 26 | sample_data(Zeller)$bogusVariable = get_variable(Zeller, "Age") + 27 | as.integer(get_variable(Zeller,"Gender")) 28 | 29 | test_that("RCM throws errors when covariates are aliased", { 30 | expect_error(suppressWarnings(RCM(Zeller, covariates = c("Diagnosis", "Country", 31 | "Gender","Age", "bogusVariable"), 32 | k = 2))) 33 | }) 34 | 35 | test_that("RCM throws errors when confounders are aliased", { 36 | expect_error(RCM(Zeller, confounders = c("Diagnosis", "Country", 37 | "Gender","Age", "bogusVariable"), 38 | k = 2)) 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test-RCMoutput.R: -------------------------------------------------------------------------------- 1 | context("RCM output") 2 | 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(150)], 4 | prune_samples(sample_names(Zeller)[seq_len(100)], Zeller)) 5 | 6 | test_that("RCM returns element of class RCM", { 7 | expect_is(RCM(tmpPhy, k = 1), "RCM") 8 | }) 9 | 10 | test_that("RCM returns phyloseq object", { 11 | expect_is(RCM(tmpPhy, k = 1)$physeq, "phyloseq") 12 | }) 13 | 14 | test_that("RCM throws warning when not converged", { 15 | expect_warning(RCM(tmpPhy, k = 1, maxItOut = 2L)) 16 | }) 17 | 18 | #Introduce some NAs 19 | tmpPhyNA = transform_sample_counts(tmpPhy, fun = function(x){ 20 | x[sample(length(x), size = 3)] = NA 21 | x 22 | }) 23 | misUnconstr <- suppressWarnings(RCM(tmpPhyNA, k = 2, allowMissingness = TRUE)) 24 | suppressWarnings(misConstrLin <- RCM(tmpPhyNA, k = 2, allowMissingness = TRUE, 25 | covariates = c("Diagnosis", "Country", "Gender", "BMI"), 26 | confounders = "Age")) 27 | suppressWarnings(misConstrNP <- RCM(tmpPhyNA, k = 2, allowMissingness = TRUE, 28 | covariates = c("Diagnosis", "Country", "Gender", "BMI"), 29 | confounders = "Age", responseFun = "nonparametric")) 30 | test_that("RCM allows for missingness", { 31 | expect_is(misUnconstr, "RCM") 32 | expect_is(misConstrLin, "RCM") 33 | expect_is(misConstrNP, "RCM") 34 | }) 35 | test_that("All plotting functions still work with missing data", { 36 | expect_silent(plot(misUnconstr)) 37 | expect_silent(plot(misConstrLin)) 38 | expect_silent(plot(misConstrNP)) 39 | expect_silent(plot(misUnconstr, samColour = "Deviance")) 40 | expect_silent(plot(misUnconstr, taxCol = "Deviance", plotType = "species")) 41 | expect_silent(plot(misUnconstr, inflVar = "psi")) 42 | expect_silent(plot(misConstrLin, inflVar = "BMI")) 43 | expect_warning(plotRespFun(misConstrNP)) 44 | }) 45 | -------------------------------------------------------------------------------- /tests/testthat/test-ResidualPlot.R: -------------------------------------------------------------------------------- 1 | context("residual plot") 2 | 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(150)], 4 | prune_samples(sample_names(Zeller)[seq_len(100)], Zeller)) 5 | unconstrRcm = RCM(tmpPhy, k = 2, allowMissingness = TRUE, confounders = "Age") 6 | constrRcm = RCM(tmpPhy, k = 2, allowMissingness = TRUE, 7 | covariates = c("Diagnosis", "Country", "Gender", "BMI"), 8 | confounders = "Age") 9 | test_that("Residual plotting works, also for higher dimensions", { 10 | expect_silent(residualPlot(constrRcm)) 11 | expect_silent(residualPlot(constrRcm, Dim = 2)) 12 | }) 13 | test_that("Residual plotting returns errors for uncosntrained ordination", { 14 | expect_error(residualPlot(unconstrRcm)) 15 | }) 16 | 17 | -------------------------------------------------------------------------------- /tests/testthat/test-checkAlias.R: -------------------------------------------------------------------------------- 1 | context("Alias structure") 2 | 3 | # A dataframe with alias structure 4 | df = data.frame(foo = rnorm(10), baa = rep(c(TRUE, FALSE), each = 5), 5 | foo2 = factor(rep(c("male", "female"), each = 5))) 6 | 7 | test_that("Alias structure is discovered", { 8 | expect_error(checkAlias(df, names(df))) 9 | }) 10 | test_that("Alias structure is not falsely discovered", { 11 | expect_silent(checkAlias(df, c("foo", "baa"))) 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/test-permanova.R: -------------------------------------------------------------------------------- 1 | context("permanova") 2 | 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(150)], 4 | prune_samples(sample_names(Zeller)[seq_len(100)], Zeller)) 5 | rcmFit = RCM(tmpPhy) 6 | test_that("permanova runs as expected", { 7 | expect_is(permanova(rcmFit, "Diagnosis"), "list") 8 | expect_is(permanova(rcmFit, # Bogus, user-supplied grouping factor 9 | sample(c("a", "b"), replace = TRUE, nsamples(tmpPhy))), 10 | "list") 11 | }) 12 | 13 | test_that("permanova throws errros for wrong input", { 14 | expect_error(permanova(tmpPhy, get_variable(tmpPy, "Diagnosis"))) 15 | expect_error(permanova(rcmFit, groups = c("a", "b"))) 16 | expect_error(permanova(rcmFit, groups = rep("a", nsamples(tmpPhy)))) 17 | expect_error(permanova(rcmFit, groups = c("b", rep("a", nsamples(tmpPhy)-1)))) 18 | }) 19 | 20 | test_that("permanova throws warning for low number of permutations", { 21 | expect_warning(permanova(rcmFit, "Diagnosis", nPerm = 20)) 22 | }) 23 | -------------------------------------------------------------------------------- /tests/testthat/test-plotRCM.R: -------------------------------------------------------------------------------- 1 | context("RCM plot") 2 | 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(150)], 4 | prune_samples(sample_names(Zeller)[seq_len(100)], Zeller)) 5 | unconstrRcm = RCM(tmpPhy, k = 2, allowMissingness = TRUE, confounders = "Age") 6 | constrRcm = RCM(tmpPhy, k = 2, allowMissingness = TRUE, 7 | covariates = c("Diagnosis", "Country", "Gender", "BMI"), 8 | confounders = "Age") 9 | # 10 | test_that("Inappropriate plotting commands throw errors", { 11 | expect_error(plot(unconstrRcm, samColour = "Influence")) 12 | expect_error(plot(constrRcm, inflVar = "Diagnosis", samColour = "Influence")) 13 | }) 14 | 15 | test_that("Correct plotting commands yield ggplot2 objects", { 16 | expect_s3_class(plot(unconstrRcm, samColour = "Influence", inflVar = "psi"), 17 | "ggplot") 18 | expect_s3_class(plot(constrRcm, inflVar = "DiagnosisNormal", samColour = "Influence", 19 | samShape = "Diagnosis"), 20 | "ggplot") 21 | }) 22 | -------------------------------------------------------------------------------- /tests/testthat/test-removeNA.R: -------------------------------------------------------------------------------- 1 | context("Correct for missingness") 2 | 3 | n = 50; p = 100 4 | X = matrix(rnbinom(n*p, size = 1, mu = 2), n,p) 5 | X[sample(seq_len(n*p), n)] = NA 6 | mu = outer(rowSums(X, na.rm = TRUE), colSums(X, na.rm = TRUE))/ 7 | sum(X, na.rm = TRUE) 8 | 9 | test_that("correctXMissingness() removes all NAs", { 10 | expect_true(anyNA(RCM:::correctXMissingness(X, mu, FALSE))) 11 | expect_false(anyNA(RCM:::correctXMissingness(X, mu, TRUE))) 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/test-rowMultiply.R: -------------------------------------------------------------------------------- 1 | context("rowMultiply") 2 | 3 | n = 50; p = 100 4 | mat = matrix(rnorm(n*p), n,p) 5 | vec = rnorm(p) 6 | 7 | test_that("Rowmultiply works", { 8 | expect_equal(rowMultiply(mat, vec), t(t(mat)*vec)) 9 | }) 10 | -------------------------------------------------------------------------------- /tests/testthat/test-seq_k.R: -------------------------------------------------------------------------------- 1 | context("Index vector of lagrange multipliers of dimension k") 2 | 3 | test_that("Number of lagrange multipliers are correct", { 4 | expect_equal(seq_k(1), seq_len(2)) 5 | expect_equal(seq_k(2), 3:5) 6 | expect_equal(seq_k(3), 6:9) 7 | }) 8 | --------------------------------------------------------------------------------