├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── F_GramSchmidt.R
    ├── F_JacCol_constr.R
    ├── F_JacCol_constr_noLab.R
    ├── F_LR_nb.R
    ├── F_LR_nb_Jac.R
    ├── F_NBalphaInfl.R
    ├── F_NBcolInfl.R
    ├── F_NBjacobianAbunds.R
    ├── F_NBjacobianCol.R
    ├── F_NBjacobianColNP.R
    ├── F_NBjacobianLibSizes.R
    ├── F_NBjacobianPsi.R
    ├── F_NBjacobianRow.R
    ├── F_NBpsiInfl.R
    ├── F_NBrowInfl.R
    ├── F_RCM.R
    ├── F_RCM_NB.R
    ├── F_addOrthProjection.R
    ├── F_arrayProd.R
    ├── F_buildCentMat.R
    ├── F_buildConfMat.R
    ├── F_buildCovMat.R
    ├── F_buildDesign.R
    ├── F_checkAlias.R
    ├── F_constrCorresp.R
    ├── F_correctXMissingness.R
    ├── F_dLR_nb.R
    ├── F_dNBabunds.R
    ├── F_dNBlibSizes.R
    ├── F_dNBllcol.R
    ├── F_dNBllcolNP.R
    ├── F_dNBllcol_constr.R
    ├── F_dNBllcol_constr_noLab.R
    ├── F_dNBllrow.R
    ├── F_dNBpsis.R
    ├── F_data.R
    ├── F_deviances.R
    ├── F_ellipseCoord.R
    ├── F_estDisp.R
    ├── F_estNBparams.R
    ├── F_estNBparamsNoLab.R
    ├── F_estNPresp.R
    ├── F_extractCoord.R
    ├── F_extractE.R
    ├── F_filterConfounders.R
    ├── F_getDevMat.R
    ├── F_getDevianceRes.R
    ├── F_getDistCoord.R
    ├── F_getInflCol.R
    ├── F_getInflRow.R
    ├── F_getInt.R
    ├── F_getLogLik.R
    ├── F_getModelMat.R
    ├── F_getRowMat.R
    ├── F_heq_nb.R
    ├── F_heq_nb_jac.R
    ├── F_indentPlot.R
    ├── F_inertia.R
    ├── F_liks.R
    ├── F_permanova.R
    ├── F_plot.RCM.R
    ├── F_plotRespFun.R
    ├── F_residualPlot.R
    ├── F_respFunJacMat.R
    ├── F_respFunScoreMat.R
    ├── F_rowMultiply.R
    ├── F_seq_k.R
    └── F_trimOnConfounders.R
├── README.Rmd
├── README.md
├── README_figs
    ├── README-plotCond-1.png
    ├── README-plotNPTriplot-1.png
    ├── README-plotRichness-1.png
    ├── README-plotUnconstrainedRCMall-1.png
    ├── README-plotUnconstrainedRCMallColour-1.png
    ├── README-plotlin2cor-1.png
    ├── README-plotlin3-1.png
    └── README-plotlin3Triplot-1.png
├── data
    └── Zeller.RData
├── inst
    ├── CITATION
    ├── NEWS.md
    └── fits
    │   └── zellerFits.RData
├── man
    ├── GramSchmidt.Rd
    ├── JacCol_constr.Rd
    ├── JacCol_constr_noLab.Rd
    ├── LR_nb.Rd
    ├── LR_nb_Jac.Rd
    ├── NBalphaInfl.Rd
    ├── NBcolInfl.Rd
    ├── NBjacobianAbundsOld.Rd
    ├── NBjacobianColNP.Rd
    ├── NBjacobianColOld.Rd
    ├── NBjacobianLibSizes.Rd
    ├── NBjacobianPsi.Rd
    ├── NBjacobianRow.Rd
    ├── NBpsiInfl.Rd
    ├── NBrowInfl.Rd
    ├── RCM.Rd
    ├── RCM_NB.Rd
    ├── Zeller.Rd
    ├── addOrthProjection.Rd
    ├── arrayprod.Rd
    ├── buildCentMat.Rd
    ├── buildConfMat.Rd
    ├── buildConfMat.character.Rd
    ├── buildConfMat.data.frame.Rd
    ├── buildCovMat.Rd
    ├── buildDesign.Rd
    ├── checkAlias.Rd
    ├── constrCorresp.Rd
    ├── correctXMissingness.Rd
    ├── dLR_nb.Rd
    ├── dNBabundsOld.Rd
    ├── dNBlibSizes.Rd
    ├── dNBllcolNP.Rd
    ├── dNBllcolOld.Rd
    ├── dNBllcol_constr.Rd
    ├── dNBllcol_constr_noLab.Rd
    ├── dNBllrow.Rd
    ├── dNBpsis.Rd
    ├── deviances.Rd
    ├── ellipseCoord.Rd
    ├── estDisp.Rd
    ├── estNBparams.Rd
    ├── estNBparamsNoLab.Rd
    ├── estNPresp.Rd
    ├── extractCoord.Rd
    ├── extractE.Rd
    ├── filterConfounders.Rd
    ├── getDevMat.Rd
    ├── getDevianceRes.Rd
    ├── getDistCoord.Rd
    ├── getInflCol.Rd
    ├── getInflRow.Rd
    ├── getInt.Rd
    ├── getLogLik.Rd
    ├── getModelMat.Rd
    ├── getRowMat.Rd
    ├── heq_nb.Rd
    ├── heq_nb_jac.Rd
    ├── indentPlot.Rd
    ├── inertia.Rd
    ├── liks.Rd
    ├── permanova.Rd
    ├── plot.RCM.Rd
    ├── plotRespFun.Rd
    ├── residualPlot.Rd
    ├── respFunJacMat.Rd
    ├── respFunScoreMat.Rd
    ├── rowMultiply.Rd
    ├── seq_k.Rd
    └── trimOnConfounders.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-ArrayProd.R
    │   ├── test-RCMinput.R
    │   ├── test-RCMoutput.R
    │   ├── test-ResidualPlot.R
    │   ├── test-checkAlias.R
    │   ├── test-permanova.R
    │   ├── test-plotRCM.R
    │   ├── test-removeNA.R
    │   ├── test-rowMultiply.R
    │   └── test-seq_k.R
└── vignettes
    └── RCMvignette.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | README*
4 | results
5 | vignettes/RCMvignette_cache
6 | vignettes/RCMvignette_figures
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | .Rproj
 6 | inst/doc
 7 | .html
 8 | NEWS.Rmd
 9 | /vignettes/RCMvignette_cache/*
10 | *.Rproj
11 | *.access^
12 | README_cache/
13 | vignettes/RCMvignette.R
14 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: RCM
 2 | Type: Package
 3 | Title: Fit row-column association models with the negative binomial distribution for the microbiome
 4 | Version: 1.11.4
 5 | Authors@R: 
 6 |     c(person(given = "Stijn", family = "Hawinkel", 
 7 |     email = "stijn.hawinkel@psb.ugent.be", 
 8 |     role=c("cre", "aut"), comment = c(ORCID = "0000-0002-4501-5180")))
 9 | Description: Combine ideas of log-linear analysis of contingency table, flexible response function estimation and empirical Bayes dispersion estimation for explorative visualization of microbiome datasets. The package includes unconstrained as well as constrained analysis. In addition, diagnostic plot to detect lack of fit are available.
10 | License: GPL-2
11 | Encoding: UTF-8
12 | RoxygenNote: 7.2.3
13 | Imports: RColorBrewer, alabama, edgeR, reshape2, tseries, stats, VGAM, ggplot2 (>= 2.2.1.9000), nleqslv, phyloseq, tensor, MASS, grDevices, graphics, methods
14 | Depends: R (>= 4.0), DBI
15 | LazyData: true
16 | Suggests: 
17 |     knitr,
18 |     rmarkdown,
19 |     testthat
20 | VignetteBuilder: knitr
21 | biocViews: Metagenomics, DimensionReduction, Microbiome, Visualization
22 | BugReports: https://github.com/CenterForStatistics-UGent/RCM/issues
23 | URL: https://bioconductor.org/packages/release/bioc/vignettes/RCM/inst/doc/RCMvignette.html/
24 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(plot,RCM)
 4 | export(RCM)
 5 | export(RCM_NB)
 6 | export(addOrthProjection)
 7 | export(checkAlias)
 8 | export(extractCoord)
 9 | export(getDevianceRes)
10 | export(inertia)
11 | export(liks)
12 | export(permanova)
13 | export(plotRespFun)
14 | export(residualPlot)
15 | exportMethods(RCM)
16 | import(ggplot2)
17 | import(methods)
18 | import(phyloseq)
19 | importFrom(MASS,negative.binomial)
20 | importFrom(RColorBrewer,brewer.pal)
21 | importFrom(VGAM,coef)
22 | importFrom(VGAM,negbinomial.size)
23 | importFrom(VGAM,predict)
24 | importFrom(VGAM,s)
25 | importFrom(VGAM,vgam)
26 | importFrom(alabama,constrOptim.nl)
27 | importFrom(grDevices,colorRampPalette)
28 | importFrom(grDevices,rainbow)
29 | importFrom(graphics,par)
30 | importFrom(graphics,text)
31 | importFrom(nleqslv,nleqslv)
32 | importFrom(phyloseq,get_variable)
33 | importFrom(stats,alias)
34 | importFrom(stats,as.formula)
35 | importFrom(stats,contrasts)
36 | importFrom(stats,dist)
37 | importFrom(stats,dnbinom)
38 | importFrom(stats,dpois)
39 | importFrom(stats,formula)
40 | importFrom(stats,glm)
41 | importFrom(stats,integrate)
42 | importFrom(stats,model.matrix)
43 | importFrom(stats,na.omit)
44 | importFrom(stats,quantile)
45 | importFrom(stats,runif)
46 | importFrom(stats,weighted.mean)
47 | importFrom(tensor,tensor)
48 | importFrom(tseries,runs.test)
49 | 


--------------------------------------------------------------------------------
/R/F_GramSchmidt.R:
--------------------------------------------------------------------------------
 1 | #'Gram-Schmidt orthogonalization of vectors
 2 | #'
 3 | #' @param x The vector that is to be orthogonalized
 4 | #' @param otherVecs a matrix; x is orthogonalized with respect to its rows
 5 | #' @param weights The weights used in the orthogonalization
 6 | #' @return The orthogonalized vector
 7 | GramSchmidt = function(x, otherVecs, weights = rep(1, length(x))){
 8 |   for(i in seq_len(nrow(otherVecs))){
 9 |     x = x- sum(x*otherVecs[i,]*weights)/sum(otherVecs[i,]^2*weights)*otherVecs[i,]
10 |     x = x/sqrt(sum(x^2*weights))
11 |   }
12 |   return(x)
13 | }
14 | 


--------------------------------------------------------------------------------
/R/F_JacCol_constr.R:
--------------------------------------------------------------------------------
 1 | #' Jacobian of the constrained analysis with linear response function.
 2 | #'
 3 | #' @param betas a vector of v parameters of the response function
 4 | #'  of a single taxon
 5 | #' @param X the count vector of length n
 6 | #' @param reg a n-by-v matrix of regressors
 7 | #' @param theta The dispersion parameter of this taxon
 8 | #' @param muMarg offset of length n
 9 | #' @param psi a scalar, the importance parameter
10 | #' @param allowMissingness A boolean, are missing values present
11 | #' @param naId The numeric index of the missing values in X
12 | #'
13 | #' Even though this approach does not imply normalization over
14 | #' the parameters of all taxa, it is very fast
15 | #'  and they can be normalized afterwards
16 | #'
17 | #' @return The jacobian, a square symmetric matrix of dimension v
18 | JacCol_constr = function(betas, X, reg, theta,
19 |     muMarg, psi, allowMissingness, naId) {
20 |     mu = exp(c(reg %*% betas) * psi) * muMarg
21 |     X = correctXMissingness(X, mu, allowMissingness, naId)
22 |     tmp = (1 + X/theta) * mu/(1 + mu/theta)^2
23 |     -crossprod(tmp * reg, reg) * psi^2  #Don't forget to square psi!
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/R/F_JacCol_constr_noLab.R:
--------------------------------------------------------------------------------
 1 | #' The jacobian of the response function without taxon labels
 2 | #'
 3 | #' @param betas a vector of regression parameters with length v
 4 | #' @param X the nxp data matrix
 5 | #' @param reg a matrix of regressors of dimension nxv
 6 | #' @param thetasMat A matrix of dispersion parameters
 7 | #' @param muMarg offset matrix of dimension nxp
 8 | #' @param preFabMat a prefabricated matrix
 9 | #' @param psi a scalar, the importance parameter
10 | #' @param n an integer, number of rows of X
11 | #' @param v an integer, the number of parameters of the response function
12 | #' @param allowMissingness A boolean, are missing values present
13 | #' @param naId The numeric index of the missing values in X
14 | #'
15 | #' @return The jacobian (a v-by-v matrix)
16 | JacCol_constr_noLab = function(betas, X,
17 |     reg, thetasMat, muMarg, psi, n, v, preFabMat, allowMissingness, naId) {
18 |     mu = c(exp(reg %*% betas * psi)) * muMarg
19 |     if(allowMissingness){
20 |         preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/thetasMat
21 |     }
22 |     tmp = preFabMat * mu/(1 + (mu/thetasMat))^2 *
23 |         psi^2  #Don't forget to square psi!
24 |     -crossprod(reg, vapply(seq_len(v), FUN.VALUE = vector("numeric",
25 |         n), function(x) {
26 |         rowSums(reg[, x] * tmp)
27 |     }))
28 | }
29 | 


--------------------------------------------------------------------------------
/R/F_LR_nb.R:
--------------------------------------------------------------------------------
 1 | #' Get the value of the log-likelihood ratio of alpha
 2 | #'
 3 | #' @param Alpha a vector of length d, the environmental gradient
 4 | #' @param X the n-by-p count matrix
 5 | #' @param CC the n-by-d covariate matrix
 6 | #' @param responseFun a character string indicating
 7 | #' the type of response function
 8 | #' @param muMarg an n-by-p offset matrix
 9 | #' @param psi a scalar, an importance parameter
10 | #' @param nleqslv.control the control list for the nleqslv() function
11 | #' @param n number of samples
12 | #' @param NB_params Starting values for the NB_params
13 | #' @param NB_params_noLab Starting values for the NB_params without label
14 | #' @param thetaMat a matrix of size n-by-p with estimated dispersion parameters
15 | #' @param ncols a scalar, the number of columns of X
16 | #' @param nonParamRespFun A list, the result of the estNPresp() function
17 | #' @param envGradEst a character string,
18 | #' indicating how the environmental gradient should be fitted.
19 | #' 'LR' using the likelihood-ratio criterion,
20 | #' or 'ML' a full maximum likelihood solution
21 | #' @param ... Further arguments passed on to other functions
22 | #'
23 | #' DON'T USE 'p' as variable name,
24 | #'  partial matching in the grad-function in the numDeriv package
25 | #'
26 | #' @return: a scalar, the evaluation of the log-likelihood ratio
27 | #'  at the given alpha
28 | LR_nb <- function(Alpha, X, CC, responseFun = c("linear",
29 |     "quadratic", "nonparametric", "dynamic"),
30 |     muMarg, psi, nleqslv.control = list(trace = FALSE),
31 |     n, NB_params, NB_params_noLab, thetaMat,
32 |     ncols, nonParamRespFun, envGradEst, ...) {
33 | 
34 |     sampleScore = CC %*% Alpha
35 |     if (responseFun %in% c("linear", "quadratic",
36 |         "dynamic")) {
37 |         design = buildDesign(sampleScore,
38 |             responseFun)
39 |         muT = muMarg * c(exp(design %*% NB_params *
40 |             psi))
41 |         if (envGradEst == "LR")
42 |             mu0 = muMarg * c(exp(design %*%
43 |                 NB_params_noLab * psi))
44 |     } else {
45 |         # Non-parametric response function
46 |         muT = exp(nonParamRespFun$rowMat) *
47 |             muMarg
48 |         if (envGradEst == "LR")
49 |             mu0 = c(exp(nonParamRespFun$rowVecOverall)) *
50 |                 muMarg
51 |     }
52 |     logDensj = dnbinom(X, mu = muT, size = thetaMat,
53 |         log = TRUE)
54 |     # Likelihoods under species specific
55 |     # model Immediately return log
56 |     # likelihoods
57 | 
58 |     if (envGradEst == "LR")
59 |         logDens0 = dnbinom(X, mu = mu0, size = thetaMat,
60 |             log = TRUE)  #Likelihoods of null model
61 | 
62 |     lr <- sum(switch(envGradEst, "LR" = logDensj -
63 |         logDens0, "ML" = logDensj), na.rm = TRUE)
64 |     # The likelihood ratio
65 |     return(-lr)  # opposite sign for the minimization procedure
66 | }
67 | 


--------------------------------------------------------------------------------
/R/F_LR_nb_Jac.R:
--------------------------------------------------------------------------------
  1 | #' A function that returns the Jacobian of the likelihood ratio
  2 | #'
  3 | #' @param Alpha a vector of length d + k*(2+(k-1)/2),
  4 | #' the environmental gradient plus the lagrangian multipliers
  5 | #' @param X the n-by-p count matrix
  6 | #' @param CC a n-by-d covariate vector
  7 | #' @param responseFun a character string indicating
  8 | #' the type of response function
  9 | #' @param psi a scalar, an importance parameter
 10 | #' @param NB_params Starting values for the NB_params
 11 | #' @param NB_params_noLab Starting values for the NB_params without label
 12 | #' @param d an integer, the number of covariate parameters
 13 | #' @param alphaK a matrix of environmental gradients of lower dimensions
 14 | #' @param k an integer, the current dimension
 15 | #' @param centMat a nLambda1s-by-d centering matrix
 16 | #' @param nLambda an integer, number of lagrangian multipliers
 17 | #' @param nLambda1s an integer, number of centering restrictions
 18 | #' @param thetaMat a matrix of size n-by-p with estimated dispersion parameters
 19 | #' @param muMarg an n-by-p offset matrix
 20 | #' @param n an integer, the number of rows of X
 21 | #' @param ncols a scalar, the number of columns of X
 22 | #' @param preFabMat a prefabricated matrix
 23 | #' @param envGradEst a character string,
 24 | #'  indicating how the environmental gradient should be fitted.
 25 | #'  'LR' using the likelihood-ratio criterion,
 26 | #'  or 'ML' a full maximum likelihood solution
 27 | #' @param allowMissingness A boolean, are missing values present
 28 | #' @param naId The numeric index of the missing values in X
 29 | #' @param ... Further arguments passed on to other functions
 30 | #'
 31 | #' @return A symmetric matrix, the evaluated Jacobian
 32 | LR_nb_Jac = function(Alpha, X, CC, responseFun = c("linear", "quadratic",
 33 |     "nonparametric", "dynamic"), psi, NB_params, NB_params_noLab, d, alphaK,
 34 |     k, centMat, nLambda, nLambda1s, thetaMat, muMarg, n, ncols, preFabMat,
 35 |     envGradEst, allowMissingness, naId, ...) {
 36 |     did = seq_len(d)
 37 |     # Extract the parameters
 38 |     alpha = Alpha[did]
 39 |     lambda1s = Alpha[d + seq_len(nLambda1s)]
 40 |     # Multiple centering requirements now!
 41 |     lambda2 = Alpha[d + nLambda1s + 1]
 42 |     lambda3 = if (k == 1) {
 43 |         0
 44 |     } else {
 45 |         Alpha[(d + nLambda1s + 2):(d + nLambda)]
 46 |     }
 47 |     sampleScore = CC %*% alpha
 48 |     # A linear combination of the environmental variables yields the
 49 |     # sampleScore
 50 |     design = buildDesign(sampleScore, responseFun)
 51 | 
 52 |     mu = muMarg * exp(design %*% NB_params * psi)
 53 |     if(allowMissingness){
 54 |         X = correctXMissingness(X, mu, allowMissingness, naId)
 55 |         preFabMat = 1 + X/thetaMat
 56 |     }
 57 |     if (envGradEst == "LR")
 58 |         mu0 = muMarg * c(exp(design %*% NB_params_noLab * psi))
 59 | 
 60 |     Jac = matrix(0, nrow = d + nLambda, ncol = d + nLambda)
 61 |     # dLag²/dalpha_{yk}dlambda_{1k}
 62 |     Jac[(d + seq_len(nLambda1s)), did] = centMat
 63 | 
 64 |     Jac[did, d + nLambda1s + 1] = 2 * alpha
 65 |     responseFun = switch(responseFun, dynamic = "quadratic", responseFun)
 66 | 
 67 |     if (responseFun == "linear") {
 68 |         tmp = rowMultiply(preFabMat * mu/(1 + mu/thetaMat)^2, NB_params[2,
 69 |             ]^2)
 70 |         if (envGradEst == "LR") {
 71 |             tmp0 = preFabMat * mu0/(1 + mu0/thetaMat)^2 * NB_params_noLab[2]^2
 72 |         }
 73 |     } else if (responseFun == "quadratic") {
 74 |         tmp = preFabMat * mu/(1 + mu/thetaMat)^2
 75 |         if (envGradEst == "LR") {
 76 |             tmp0 = preFabMat * mu0/(1 + mu0/thetaMat)^2
 77 |         }
 78 |     }
 79 |     # dLag²/ds_{ik}dlambda_{3kk'}
 80 |     if (k > 1) {
 81 |         Jac[(d + nLambda1s + 2):(d + nLambda), did] = alphaK
 82 |     }
 83 | 
 84 |     # Symmetrize
 85 |     Jac = Jac + t(Jac)
 86 |     cSam = c(sampleScore)
 87 |     cSam2 = cSam^2
 88 | 
 89 |     Jac[did, did] = switch(responseFun, linear = -psi^2 *
 90 |                     (colSums(tensor(vapply(did,
 91 |         FUN.VALUE = tmp, function(x) {
 92 |             CC[, x] * switch(envGradEst, LR = (tmp - tmp0), ML = tmp)
 93 |         }), CC, 1, 1))), quadratic = switch(envGradEst, LR =
 94 |         colSums((tensor(vapply(did,
 95 |         FUN.VALUE = tmp, function(x) {
 96 |             CC[, x] * (-psi^2 * (tmp * (matrix(NB_params[2, ]^2, n, ncols,
 97 |                 byrow = TRUE) + 4 * outer(cSam, NB_params[2, ] * NB_params[3,
 98 |                 ]) + 4 * outer(cSam2, NB_params[3, ]^2)) - tmp0 *
 99 |                 (NB_params_noLab[2] +
100 |                 NB_params_noLab[3] * 2 * cSam)^2) + 2 * psi * (rowMultiply((X -
101 |                 mu)/(1 + mu/thetaMat), NB_params[3, ]) - (X - mu0)/(1 +
102 |                 mu0/thetaMat) * NB_params_noLab[3]))
103 |         }), CC, 1, 1))), ML = colSums(tensor(vapply(did, FUN.VALUE = tmp,
104 |         function(x) {
105 |             CC[, x] * (-psi^2 * (tmp * (matrix(NB_params[2, ]^2, n, ncols,
106 |                 byrow = TRUE) + 4 * outer(cSam, NB_params[2, ] * NB_params[3,
107 |                 ]) + 4 * outer(cSam2, NB_params[3, ]^2)) + 2 * psi *
108 |                 (rowMultiply((X -
109 |                 mu)/(1 + mu/thetaMat), NB_params[3, ]))))
110 |         }), CC, 1, 1))))
111 | 
112 |     diag(Jac)[did] = diag(Jac)[did] + 2 * lambda2  #Correct the diagonal
113 |     Jac
114 | }
115 | 


--------------------------------------------------------------------------------
/R/F_NBalphaInfl.R:
--------------------------------------------------------------------------------
  1 | #' Calculate the components of the influence functions
  2 | #'
  3 | #' @param rcm an rcm object
  4 | #' @param Dim the required dimension
  5 | #'
  6 | #' @return An n-by-p-by-d array with the influence of every observation
  7 | #'  on every alpha parameter
  8 | NBalphaInfl = function(rcm, Dim) {
  9 |     if (length(Dim) > 1) {
 10 |         stop("Influence of only one dimension at the time can be
 11 |         extratced! \n")
 12 |     }
 13 |     # Extract the parameters
 14 |     alpha = rcm$alpha[, Dim]
 15 |     centMat = buildCentMat(rcm)
 16 |     nLambda1s = NROW(centMat)
 17 |     lambdas = rcm$lambdasAlpha[seq_k(Dim, nLambda1s)]
 18 |     lambda1s = lambdas[seq_len(nLambda1s)]
 19 |     # Multiple centering requirements now!
 20 |     lambda2 = lambdas[nLambda1s + 1]
 21 |     lambda3 = if (Dim == 1) {
 22 |         0
 23 |     } else {
 24 |         lambdas[-seq_len(nLambda1s)]
 25 |     }
 26 |     responseFun = rcm$responseFun
 27 |     CC = rcm$covariates
 28 |     X = rcm$X
 29 |     p = ncol(X)
 30 |     n = nrow(X)
 31 |     d = ncol(CC)
 32 |     envGradEst = if (is.null(rcm$envGradEst))
 33 |         "LR" else rcm$envGradEst
 34 |     thetaMat = matrix(rcm$thetas[, paste0("Dim",Dim)], byrow = TRUE, n, p)
 35 |     NB_params = rcm$NB_params[, , Dim]
 36 |     NB_params_noLab = rcm$NB_params_noLab[, Dim]
 37 |     psi = rcm$psis[Dim]
 38 | 
 39 |     sampleScore = CC %*% alpha
 40 |     # A linear combination of the environmental variables yields the
 41 |     # sampleScore
 42 |     mu = extractE(rcm, seq_len(Dim))
 43 |     X = correctXMissingness(X, mu, rcm$NApresent, is.na(X))
 44 |     muMarg = extractE(rcm, seq_len(Dim - 1))
 45 |     tmp = (X - mu)/(1 + mu/thetaMat)
 46 |     tmp2 = rowMultiply(tmp, NB_params[2, ])
 47 | 
 48 |     if (envGradEst == "LR") {
 49 |         mu0 = muMarg * c(exp(buildDesign(sampleScore, responseFun) %*%
 50 |         NB_params_noLab *
 51 |             psi))
 52 |         tmp0 = (X - mu0)/(1 + mu0/thetaMat)
 53 |     }
 54 |     # A n-by-p-by-d array
 55 |     score = switch(responseFun, linear = if (envGradEst == "LR") {
 56 |         psi * (vapply(seq_len(d), FUN.VALUE = tmp, function(i) {
 57 |             tmp2 * CC[, i]
 58 |         }) - NB_params_noLab[2] * vapply(seq_len(d), FUN.VALUE = tmp0,
 59 |         function(i) {
 60 |             tmp0 * CC[, i]
 61 |         }))
 62 |     } else {
 63 |         psi * (vapply(seq_len(d), FUN.VALUE = tmp, function(i) {
 64 |             tmp2 * CC[, i]
 65 |         }))
 66 |     }, quadratic = if (envGradEst == "LR") {
 67 |         psi * (vapply(seq_len(d), FUN.VALUE = tmp, function(i) {
 68 |             tmp2 * CC[, i]
 69 |         }) + 2 * vapply(seq_len(d), FUN.VALUE = tmp, function(i) {
 70 |             rowMultiply(tmp, NB_params[3, ]) * CC[, i] * c(sampleScore)
 71 |         }) - NB_params_noLab[2] * vapply(seq_len(d), FUN.VALUE = tmp0,
 72 |         function(i) {
 73 |             tmp0 * CC[, i]
 74 |         }) - 2 * NB_params_noLab[3] * vapply(seq_len(d), FUN.VALUE = tmp,
 75 |             function(i) {
 76 |                 tmp0 * CC[, i] * c(sampleScore)
 77 |             }))
 78 |     } else {
 79 |         psi * (vapply(seq_len(d), FUN.VALUE = tmp, function(i) {
 80 |             tmp2 * CC[, i]
 81 |         }) - NB_params_noLab[2] * vapply(seq_len(d), FUN.VALUE = tmp0,
 82 |         function(i) {
 83 |             tmp0 * CC[, i]
 84 |         }))
 85 |     }, stop("Unknown response function provided! \n")) +
 86 |     rep(c(lambda1s %*% centMat) + lambda2 * 2 * alpha +
 87 |     if (Dim > 1) rowSums(rcm$alpha[,
 88 |         seq_len(Dim - 1), drop = FALSE] %*% lambda3) else 0, each = n *p)
 89 | 
 90 |     JacobianInv = -solve(LR_nb_Jac(Alpha = c(alpha, lambda1s, lambda2, lambda3),
 91 |         X = X, CC = CC, responseFun = responseFun, psi = psi,
 92 |         NB_params = NB_params,
 93 |         NB_params_noLab = NB_params_noLab, d = d, k = Dim,
 94 |         centMat = centMat,
 95 |         nLambda = nLambda1s + Dim, nLambda1s = nLambda1s, thetaMat = thetaMat,
 96 |         muMarg = extractE(rcm, seq_len(Dim - 1)), n = n, ncols = p,
 97 |         envGradEst = envGradEst,
 98 |         alphaK = rcm$alpha[, seq_len(Dim - 1), drop = FALSE], preFabMat = 1 +
 99 |             X/thetaMat, allowMissingness = rcm$NApresent,
100 |         naId = is.na(rcm$X)))[seq_len(d), seq_len(d)]
101 |     # Return only alpha indices, don't forget the minus sign!
102 |     rownames(JacobianInv) = colnames(JacobianInv) = names(alpha)
103 |     tensor(score, JacobianInv, 3, 1)
104 | }
105 | 


--------------------------------------------------------------------------------
/R/F_NBcolInfl.R:
--------------------------------------------------------------------------------
 1 | #' The influence function for the column scores
 2 | #' @param rcm an rcm object
 3 | #' @param Dim the required dimension
 4 | #'
 5 | #' @return A list with components
 6 | #' \item{score}{a matrix with components of the score function}
 7 | #' \item{InvJac}{A square matrix of dimension p with the components of the
 8 | #'  Jacobian related to the column scores}
 9 | NBcolInfl = function(rcm, Dim = 1) {
10 |     reg = rcm$psis[Dim] * rcm$rMat[, Dim]
11 |     mu = extractE(rcm, seq_len(Dim))
12 |     rcm$X  = correctXMissingness(rcm$X, mu, rcm$NApresent)
13 |     #Take also lower dimensions into account here
14 |     thetaMat = matrix(byrow = TRUE, nrow = nrow(rcm$X),
15 |         ncol = ncol(rcm$X), data = rcm$thetas[,
16 |             switch(as.character(Dim), `0` = "Independence",
17 |                 `0.5` = "Filtered", paste0("Dim",
18 |                 Dim))])
19 |     lambdaCol = rcm$lambdaCol[seq_k(Dim)]
20 |     cMatK = rcm$cMat[seq_len(Dim - 1), ,
21 |         drop = FALSE]
22 |     tmp = if (Dim > 1)
23 |         lambdaCol[-c(1, 2)] %*% cMatK else 0
24 | 
25 |     score = t(t((reg * (rcm$X - mu)/(1 + mu/thetaMat))) +
26 |         rcm$colWeights * (lambdaCol[1] +
27 |             lambdaCol[2] * 2 * rcm$cMat[Dim,] +
28 |             tmp))
29 | 
30 |     JacobianInv = solve(NBjacobianCol(beta = c(rcm$cMat[Dim,
31 |         ], lambdaCol), X = rcm$X, reg = reg,
32 |         thetas = thetaMat, muMarg = mu, k = Dim,
33 |         p = nrow(rcm$X), n = ncol(rcm$X),
34 |         colWeights = rcm$colWeights, nLambda = length(lambdaCol),
35 |         cMatK = cMatK, allowMissingness = rcm$NApresent))
36 |     # Inverse Jacobian
37 | 
38 |     # After a long thought: The X's do not
39 |     # affect the estimation of the lambda
40 |     # parameters! Matrix becomes too large:
41 |     # return score and inverse jacobian
42 |     return(list(score = score, InvJac = JacobianInv[seq_len(ncol(rcm$X)),
43 |         seq_len(ncol(rcm$X))]))
44 | }
45 | 


--------------------------------------------------------------------------------
/R/F_NBjacobianAbunds.R:
--------------------------------------------------------------------------------
 1 | #'Jacobian for the column components of the independence model
 2 | #'
 3 | #'@param beta a vector of length p with current abundance estimates
 4 | #'@param X a n-by-p count matrix
 5 | #'@param reg a vector of length n with library sizes estimates
 6 | #'@param thetas a n-by-p matrix with overdispersion estimates in the rows
 7 | #' @param allowMissingness A boolean, are missing values present
 8 | #' @param naId The numeric index of the missing values in X
 9 | #'
10 | #'@return a diagonal matrix of dimension p with evaluations
11 | #'of the jacobian function
12 | NBjacobianAbundsOld = function(beta, X, reg,
13 |     thetas, allowMissingness, naId) {
14 |     mu = exp(outer(reg, beta, "+"))
15 |     X = correctXMissingness(X, mu, allowMissingness, naId)
16 |     -diag(colSums((1 + (X/thetas)) * mu/(1 +
17 |         (mu/thetas))^2))
18 | }
19 | NBjacobianAbunds = function(beta, X, reg,
20 |                             thetas, allowMissingness, naId) {
21 |     mu = exp(reg+beta)
22 |     X = correctXMissingness(X, mu, allowMissingness, naId)
23 |     -as.matrix(sum((1 + (X/thetas)) * mu/(1 +
24 |                                              (mu/thetas))^2))
25 | }
26 | 


--------------------------------------------------------------------------------
/R/F_NBjacobianCol.R:
--------------------------------------------------------------------------------
 1 | #' Jacobian for the estimation of the column scores
 2 | #'
 3 | #' @param beta vector of length p+1+1+(k-1): p row scores, 1 centering,
 4 | #'  one normalization
 5 | #'  and (k-1) orhtogonality lagrangian multipliers
 6 | #' @param X the nxp data matrix
 7 | #' @param reg a nx1 regressor matrix: outer product of rowScores and psis
 8 | #' @param thetas nxp matrix with the dispersion parameters
 9 | #' (converted to matrix for numeric reasons)
10 | #' @param muMarg the nxp offset
11 | #' @param k an integer, the dimension of the RC solution
12 | #' @param p an integer, the number of taxa
13 | #' @param n an integer, the number of samples
14 | #' @param nLambda an integer, the number of restrictions
15 | #' @param colWeights the weights used for the restrictions
16 | #' @param cMatK the lower dimensions of the colScores
17 | #' @param preFabMat a prefab matrix, (1+X/thetas)
18 | #' @param Jac an empty Jacobian matrix
19 | #' @param allowMissingness A boolean, are missing values present
20 | #' @param naId The numeric index of the missing values in X
21 | #'
22 | #' @return A matrix of dimension p+1+1+(k-1) with evaluations of the Jacobian
23 | NBjacobianColOld = function(beta, X, reg, thetas,
24 |     muMarg, k, n, p, colWeights, nLambda,
25 |     cMatK, preFabMat, Jac, allowMissingness, naId) {
26 |     cMat = beta[seq_len(p)]
27 | 
28 |     # Calculate the mean
29 |     mu = exp(reg %*% cMat) * muMarg
30 | 
31 |     # The symmetric jacobian matrix. The
32 |     # upper part is filled first, then mirror
33 |     # image is taken for lower triangle
34 | 
35 |     # dLag²/dr_{ik}dlambda_{1k}
36 |     Jac[seq_len(p), p + 2] = Jac[p + 2, seq_len(p)] = colWeights *
37 |         2 * cMat
38 | 
39 |     if(allowMissingness){
40 |         preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/thetas
41 |     }
42 | 
43 |     # dLag²/dr_{ik}²
44 |     diag(Jac)[seq_len(p)] = -crossprod(preFabMat *
45 |         mu/(1 + mu/thetas)^2, reg^2) + 2 *
46 |         beta[p + 2] * colWeights
47 |     Jac
48 | }
49 | NBjacobianCol = function(beta, X, reg, thetas,
50 |                          muMarg, k, n, p, colWeights, nLambda,
51 |                          cMatK, preFabMat, Jac, allowMissingness, naId) {
52 |     # Calculate the mean
53 |     mu = exp(reg *beta) * muMarg
54 |     if(allowMissingness){
55 |         preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/thetas
56 |     }
57 |      as.matrix(-sum(preFabMat * mu/(1 + mu/thetas)^2 +  reg^2))
58 | }
59 | 


--------------------------------------------------------------------------------
/R/F_NBjacobianColNP.R:
--------------------------------------------------------------------------------
 1 | #' Jacobian function for the estimation of a third degree GLM
 2 | #'
 3 | #' @param beta vector of any length
 4 | #' @param X the data vector of length n
 5 | #' @param reg a nxlength(beta) regressor matrix
 6 | #' @param theta a scalar, the overdispersion
 7 | #' @param muMarg the offset of length n
 8 | #'
 9 | #' @return A matrix of dimension 8-by-8
10 | NBjacobianColNP = function(beta, X, reg,
11 |     theta, muMarg) {
12 |     # Calculate the mean
13 |     mu = exp(reg %*% beta) * muMarg
14 |     # Return the Jacobian
15 |     -crossprod(reg * c((1 + X/theta) * mu/(1 +
16 |         mu/theta)^2), reg)
17 | }
18 | 


--------------------------------------------------------------------------------
/R/F_NBjacobianLibSizes.R:
--------------------------------------------------------------------------------
 1 | #' Jacobian for the raw components of the independence model
 2 | #'
 3 | #'@param beta a vector of length n with current library size estimates
 4 | #'@param X a n-by-p count matrix
 5 | #'@param reg a vector of length p with relative abundance estimates
 6 | #'@param thetas a n-by-p matrix with overdispersion estimates in the rows
 7 | #' @param allowMissingness A boolean, are missing values present
 8 | #' @param naId The numeric index of the missing values in X
 9 | #'
10 | #'@return a diagonal matrix of dimension n: the Fisher information matrix
11 | NBjacobianLibSizes = function(beta, X, reg,
12 |     thetas, allowMissingness, naId) {
13 |     mu = exp(outer(beta, reg, "+"))
14 |     X = correctXMissingness(X, mu, allowMissingness, naId)
15 |     diag(-rowSums((1 + (X/thetas)) * mu/(1 +
16 |         (mu/thetas))^2))
17 | }
18 | 


--------------------------------------------------------------------------------
/R/F_NBjacobianPsi.R:
--------------------------------------------------------------------------------
 1 | #' Jacobian for the psi of a given dimension
 2 | #'
 3 | #' @param beta a scalar, the current estimate
 4 | #' @param X the n-by-p count matrix
 5 | #' @param muMarg the nxp offset matrix
 6 | #' @param reg the regressor matrix,
 7 | #' the outer product of current row and column scores
 8 | #' @param theta a n-by-p matrix with the dispersion parameters
 9 | #' @param preFabMat a prefab matrix, (1+X/thetas)
10 | #' @param allowMissingness A boolean, are missing values present
11 | #' @param naId The numeric index of the missing values in X
12 | #'
13 | #' @return The evaluation of the jacobian function at beta, a 1-by-1 matrix
14 | NBjacobianPsi = function(beta, X, reg, muMarg,
15 |     theta, preFabMat, allowMissingness, naId) {
16 |     mu = muMarg * exp(reg * beta)
17 |     if(allowMissingness){
18 |         preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/theta
19 |     }
20 |     matrix(-sum(reg^2 * preFabMat * mu/(1 +
21 |         mu/theta)^2), 1, 1)
22 | }
23 | 


--------------------------------------------------------------------------------
/R/F_NBjacobianRow.R:
--------------------------------------------------------------------------------
 1 | #' A jacobian function of the NB for the row scores
 2 | #'
 3 | #' @param beta a vector of of length n + k +1 regression parameters to optimize
 4 | #' @param X the data matrix of dimensions nxp
 5 | #' @param reg a 1xp regressor matrix: outer product of column scores and psis
 6 | #' @param thetas nxp matrix with the dispersion parameters
 7 | #' (converted to matrix for numeric reasons)
 8 | #' @param muMarg an nxp offset matrix
 9 | #' @param k a scalar, the dimension of the RC solution
10 | #' @param p a scalar, the number of taxa
11 | #' @param n a scalar, the number of samples
12 | #' @param rowWeights a vector of length n, the weights used for the restrictions
13 | #' @param nLambda an integer, the number of lagrangian multipliers
14 | #' @param rMatK the lower dimension row scores
15 | #' @param preFabMat a prefab matrix, (1+X/thetas)
16 | #' @param Jac an empty Jacobian matrix
17 | #' @param allowMissingness A boolean, are missing values present
18 | #' @param naId The numeric index of the missing values in X
19 | #'
20 | #' @return a symmetric jacobian matrix of size n+k + 1
21 | NBjacobianRow = function(beta, X, reg, thetas,
22 |     muMarg, k, n, p, rowWeights, nLambda,
23 |     rMatK, preFabMat, Jac, allowMissingness, naId) {
24 |     rMat = beta[seq_len(n)]
25 |     mu = exp(rMat %*% reg) * muMarg
26 | 
27 |     if(allowMissingness){
28 |       preFabMat = 1 + correctXMissingness(X, mu, allowMissingness, naId)/thetas
29 |     }
30 | 
31 |     # dLag²/dr_{ik}dlambda_{1k} already
32 |     # happened dLag²/dr_{ik}dlambda_{2k}
33 |     Jac[seq_len(n), n + 2] = Jac[n + 2, seq_len(n)] = 2 *
34 |         rMat * rowWeights
35 |     # dLag²/dr_{ik}dlambda_{3kk'} already
36 |     # happened
37 | 
38 |     # dLag²/dr_{ik}²
39 |     diag(Jac)[seq_len(n)] = -tcrossprod(reg^2,
40 |         preFabMat * mu/(1 + mu/thetas)^2) +
41 |         2 * rowWeights * beta[n + 2]
42 |     Jac
43 | }
44 | 


--------------------------------------------------------------------------------
/R/F_NBpsiInfl.R:
--------------------------------------------------------------------------------
 1 | #' The influence function for the psis
 2 | #'
 3 | #' @param rcm an rcm object
 4 | #' @param Dim the required dimensions
 5 | #'
 6 | #' @return The influence of every single observation
 7 | #'  on the psi value of this dimension
 8 | NBpsiInfl = function(rcm, Dim = 1) {
 9 |     mu = extractE(rcm, seq_len(Dim))
10 |     # Take also lower dimensions into account
11 |     # here
12 |     thetaMat = matrix(byrow = TRUE, nrow = nrow(rcm$X),
13 |         ncol = ncol(rcm$X), data = rcm$thetas[,
14 |             switch(as.character(Dim), `0` = "Independence",
15 |                 `0.5` = "Filtered", paste0("Dim",
16 |                 Dim))])
17 |     reg = if (is.null(rcm$covariates)) {
18 |         rcm$rMat[, Dim, drop = FALSE] %*% rcm$cMat[Dim,, drop = FALSE]
19 |     } else {getRowMat(sampleScore = rcm$covariates %*% rcm$alpha[, Dim],
20 |                       responseFun = rcm$responseFun,
21 |         NB_params = rcm$NB_params[, , Dim])}
22 |     rcm$X = correctXMissingness(rcm$X, mu, rcm$NApresent, naId = is.na(rcm$X))
23 | 
24 |     -((rcm$X - mu) * (thetaMat + mu))/(reg *
25 |         (thetaMat + rcm$X) * mu)
26 | }
27 | 


--------------------------------------------------------------------------------
/R/F_NBrowInfl.R:
--------------------------------------------------------------------------------
 1 | #' The influence function for the row scores
 2 | #'
 3 | #' @param rcm an rcm object
 4 | #' @param Dim the required dimension
 5 | #'
 6 | #' @return A list with components
 7 | #' \item{score}{a matrix with components of the score function}
 8 | #' \item{InvJac}{A square matrix of dimension n with the components
 9 | #'  of the Jacobian related to the row scores}
10 | NBrowInfl = function(rcm, Dim = 1) {
11 |     reg = rcm$psis[Dim] * rcm$cMat[Dim, ]
12 |     mu = extractE(rcm, seq_len(Dim))
13 |     rcm$X = correctXMissingness(rcm$X, mu, rcm$NApresent)
14 |     #Take also lower dimensions into account here
15 |     thetaMat = matrix(byrow = TRUE, nrow = nrow(rcm$X),
16 |         ncol = ncol(rcm$X), data = rcm$thetas[,
17 |             switch(as.character(Dim), `0` = "Independence",
18 |                 `0.5` = "Filtered", paste0("Dim",
19 |                 Dim))])
20 |     lambdaRow = rcm$lambdaRow[seq_k(Dim)]
21 |     rMatK = rcm$rMat[, seq_len(Dim - 1),
22 |         drop = FALSE]
23 |     tmp = if (Dim > 1)
24 |         rcm$lambdaRow[-c(1, 2)] %*% rMatK else 0
25 | 
26 |     score = reg * (rcm$X - mu)/(1 + mu/thetaMat) +
27 |         c(rcm$rowWeights * (lambdaRow[1] +
28 |             lambdaRow[2] * 2 * rcm$rMat[,
29 |                 Dim] + tmp))
30 | 
31 |     JacobianInv = solve(NBjacobianRow(beta = c(rcm$rMat[,
32 |         Dim], lambdaRow), X = rcm$X, reg = reg,
33 |         thetas = thetaMat, muMarg = mu, k = Dim,
34 |         p = ncol(rcm$X), n = nrow(rcm$X),
35 |         rowWeights = rcm$rowWeights, nLambda = Dim +
36 |             1, rMatK = rMatK, allowMissingness = rcm$NApresent,
37 |         naId = is.na(rcm$X)))
38 |     # Inverse Jacobian
39 | 
40 |     # After a long thought: The X's do not
41 |     # affect the estimation of the lambda
42 |     # parameters! Matrix of all influences
43 |     # becomes too large: return score and
44 |     # inverse jacobian
45 |     return(list(score = score, InvJac = JacobianInv[seq_len(nrow(rcm$X)),
46 |         seq_len(nrow(rcm$X))]))
47 | }
48 | 


--------------------------------------------------------------------------------
/R/F_addOrthProjection.R:
--------------------------------------------------------------------------------
  1 | #' This function adds orthogonal projections to a given plot
  2 | #'
  3 | #' @param RCMplot the RCMplot object
  4 | #' @param sample,species,variable names or approximate coordinates of sample,
  5 | #'  species or variable
  6 | #' @param Dims The dimensions of the solutions that have been plotted
  7 | #' @param addLabel a boolean, should the r-s-psi label be added?
  8 | #' @param labPos the position of the label. Will be calculated if not provided
  9 | #'
 10 | #' @return a modified ggplot object that contains the geom_segment object
 11 | #' that draws the projection
 12 | #' @export
 13 | #' @import ggplot2
 14 | #' @import phyloseq
 15 | #' @seealso \code{\link{plot.RCM}}
 16 | #' @examples
 17 | #' data(Zeller)
 18 | #' require(phyloseq)
 19 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(100)],
 20 | #' prune_samples(sample_names(Zeller)[seq_len(50)], Zeller))
 21 | #' zellerRCM = RCM(tmpPhy, k = 2, round = TRUE)
 22 | #' zellerPlot = plot(zellerRCM, returnCoords = TRUE)
 23 | #' addOrthProjection(zellerPlot, species = c(-0.35,1.1), sample = c(1,1.2))
 24 | addOrthProjection = function(RCMplot, sample = NULL, species = NULL,
 25 |                     variable = NULL, Dims = c(1,2), addLabel = FALSE,
 26 |                     labPos = NULL) {
 27 |     nulls = is.null(sample) + is.null(species) +
 28 |         is.null(variable)
 29 |     if (nulls != 1)
 30 |         stop("Provide two variables categories for a projection! \n")
 31 |     if (is.null(species))
 32 |         stop("Species should be provided,
 33 |                             cannot project sample onto variable vector! \n")
 34 | 
 35 |     dimNames = paste0("Dim", Dims)
 36 |     if (is.numeric(sample)) {
 37 |         samp = which.min(colSums((t(RCMplot$samples[,
 38 |             dimNames]) - sample)^2))
 39 |         # Closest to approximate coordinate
 40 |         sampName = rownames(RCMplot$samples)[samp]
 41 |     } else {
 42 |         sampName = sample
 43 |     }
 44 | 
 45 |     if (is.numeric(species)) {
 46 |         species = which.min(colSums((t(RCMplot$species[,
 47 |             paste0("end", Dims)]) - species)^2))
 48 |         # Closest to approximate coordinate
 49 |         speciesName = rownames(RCMplot$species)[species]
 50 |     } else {
 51 |         speciesName = species
 52 |     }
 53 | 
 54 |     if (is.numeric(variable)) {
 55 |         variable = which.min(colSums((t(RCMplot$variables[,
 56 |             dimNames]) - species)^2))
 57 |         # Closest to approximate coordinate
 58 |         varName = rownames(RCMplot$variables)[variable]
 59 |     } else {
 60 |         varName = variable
 61 |     }
 62 | 
 63 |     mat1 = unlist(if (is.null(variable)) {
 64 |         RCMplot$samples[sampName, dimNames]
 65 |     } else {
 66 |         RCMplot$variables[varName, dimNames]
 67 |     })
 68 |     mat2 = unlist(RCMplot$species[speciesName,
 69 |         c(vapply(Dims, FUN.VALUE = character(2),
 70 |             function(x) {
 71 |                 c(paste0("end", x), paste0("origin",x))
 72 |             }))])
 73 | 
 74 |     RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE,
 75 |         mapping = aes_string(x = 0, y = 0,
 76 |             xend = "Dim1", yend = "Dim2"),
 77 |         data = data.frame(t(mat1)))  #The sample or variable vector
 78 |     IntCoordsXTip = (mat2["end1"] + mat2["end2"] *
 79 |         mat1[2]/mat1[1])/((mat1[2]/mat1[1])^2 +
 80 |         1)
 81 |     IntCoordsYTip = IntCoordsXTip * mat1[2]/mat1[1]
 82 | 
 83 |     IntCoordsXStart = (mat2["origin1"] +
 84 |         mat2["origin2"] * mat1[2]/mat1[1])/((mat1[2]/mat1[1])^2 +
 85 |         1)
 86 |     IntCoordsYStart = IntCoordsXStart * mat1[2]/mat1[1]
 87 | 
 88 |     dfTip = data.frame(x = mat2[grep(names(mat2),
 89 |         pattern = "end")][1], y = mat2[grep(names(mat2),
 90 |         pattern = "end")][2], xend = IntCoordsXTip,
 91 |         yend = IntCoordsYTip)
 92 |     dfStart = data.frame(x = mat2[grep(names(mat2),
 93 |         pattern = "origin")][1], y = mat2[grep(names(mat2),
 94 |         pattern = "origin")][2], xend = IntCoordsXStart,
 95 |         yend = IntCoordsYStart)
 96 | 
 97 |     RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE,
 98 |         mapping = aes_string(x = "x", y = "y",
 99 |             xend = "xend", yend = "yend"),
100 |         data = dfTip, linetype = "dashed")
101 |     RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE,
102 |         mapping = aes_string(x = "x", y = "y",
103 |             xend = "xend", yend = "yend"),
104 |         data = dfStart, linetype = "dashed")
105 | 
106 |     # Add a red line for the projection
107 |     dfRed = data.frame(xend = IntCoordsXTip,
108 |         yend = IntCoordsYTip, x = IntCoordsXStart,
109 |         y = IntCoordsYStart)
110 |     RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE,
111 |         col = "orange", mapping = aes_string(x = "x",
112 |             y = "y", xend = "xend", yend = "yend"),
113 |         data = dfRed)
114 | 
115 |     if (addLabel) {
116 |         # Add some annotation
117 |         labPos = if (is.null(labPos)) {
118 |             apply(RCMplot$samples[, dimNames],
119 |                 2, min) * 1.1
120 |         } else {
121 |             labPos
122 |         }
123 |         xLab = labPos[1]
124 |         yLab = labPos[2]
125 |         dfRed = within(dfRed, {
126 |             xLab = xLab * 2
127 |             yLab = yLab * 2
128 |         })
129 |         RCMplot$plot = RCMplot$plot + geom_segment(inherit.aes = FALSE,
130 |             mapping = aes_string(x = "xLab",
131 |                 y = "yLab", xend = "xend",
132 |                 yend = "yend"), data = dfRed/2,
133 |             arrow = arrow(length = unit(0.2,
134 |                 "cm")), size = 0.25) + annotate("text",
135 |             col = "orange", label = "r~psi~s",
136 |             x = xLab, y = yLab, parse = TRUE,
137 |             size = 7)
138 |     }
139 | 
140 |     RCMplot$plot
141 | }
142 | 


--------------------------------------------------------------------------------
/R/F_arrayProd.R:
--------------------------------------------------------------------------------
 1 | #' An auxiliary R function to 'array' multiply an array with a vector,
 2 | #'  kindly provided by Joris Meys
 3 | #'
 4 | #' @param x a axbxc array
 5 | #' @param y a vector of length c
 6 | #'
 7 | #' @return a axb matrix. The ij-th element equals sum(x[i,j,]*y)
 8 | arrayprod <- function(x, y) {
 9 |     
10 |     xdim <- dim(x)
11 |     outdim <- xdim[c(1, 2)]
12 |     outn <- prod(outdim)
13 |     
14 |     yexpand <- rep(y, each = outn)
15 |     
16 |     tmp <- x * yexpand
17 |     
18 |     dim(tmp) <- c(outn, xdim[3])
19 |     out <- rowSums(tmp)
20 |     
21 |     dim(out) <- outdim
22 |     
23 |     out
24 | }
25 | 


--------------------------------------------------------------------------------
/R/F_buildCentMat.R:
--------------------------------------------------------------------------------
 1 | #' A function to build a centering matrix based on a dataframe
 2 | #'
 3 | #' @param object an rcm object or dataframe
 4 | #'
 5 | #' @return a centering matrix consisting of ones and zeroes,
 6 | #'  or a list with components
 7 | #' \item{centMat}{a centering matrix consisting of ones and zeroes}
 8 | #' \item{datFrame}{The dataframe with factors with one level removed}
 9 | buildCentMat = function(object) {
10 |     if (is.data.frame(object)) {
11 |         nFactorLevels = vapply(object, FUN.VALUE = double(1),
12 |             function(x) {
13 |                 if (is.factor(x))
14 |                     nlevels(x) else 1
15 |             })
16 |         # Number of levels per factor
17 |         oneLevelID = vapply(object, FUN.VALUE = TRUE, function(x) {
18 |             length(unique(x)) == 1
19 |         })
20 |         object[, oneLevelID] = NULL  #Drop factors with one level
21 |         if (any(oneLevelID)) {
22 |             warning("The following variables were not included
23 | in the analyses because they have only one value: \n",
24 |                 paste(object[oneLevelID], sep = " \n"), immediate. = TRUE)
25 |         }
26 |     } else if (is(object, "RCM")) {
27 |         nFactorLevels = vapply(FUN.VALUE = integer(1), unique(object$attribs),
28 |             function(x) {
29 |                 sum(object$attribs == x)
30 |             })  #Number of levels per factor
31 | 
32 |     } else {
33 |         stop("Invalid object supplied! \n")
34 |     }
35 |     # Already prepare the matrix that defines the equations for
36 |     # centering the coefficients of the dummy variables
37 |     centMat = t(vapply(FUN.VALUE = numeric(sum(nFactorLevels)),
38 |         seq_along(nFactorLevels), function(i) {
39 |             c(rep.int(0L, sum(nFactorLevels[seq(0, i - 1)])),
40 |                 rep.int(if (nFactorLevels[i] == 1) 0L else 1L,
41 |                 nFactorLevels[i]), rep.int(0L, sum(nFactorLevels[-seq(1,
42 |                     i)])))
43 |         }))
44 |     centMat = if (all(rowSums(centMat) == 0)) {
45 |         matrix(0L, 1, sum(nFactorLevels))
46 |     } else {
47 |         centMat[rowSums(centMat) > 0, , drop = FALSE]
48 |     }
49 |     if (is.data.frame(object))
50 |         return(list(centMat = centMat, datFrame = object)) else return(centMat)
51 | }
52 | 


--------------------------------------------------------------------------------
/R/F_buildConfMat.R:
--------------------------------------------------------------------------------
 1 | #' A function to build the confounder matrices
 2 | #'
 3 | #' @param x a matrix, data frame or character string
 4 | #' @param ... further arguments passed on to other methods
 5 | #'
 6 | #' For the preliminary trimming, we do not include an intercept,
 7 | #' but we do include all the levels of the factors using contrasts=FALSE:
 8 | #'  we want to do the trimming in every subgroup, so no hidden reference levels
 9 | #'   For the filtering we just use a model with an intercept and
10 | #'    treatment coding, here the interest is only in adjusting the offset
11 | #'
12 | #' @return a list with components
13 | #' \item{confModelMatTrim}{A confounder matrix without intercept, with all
14 | #'  levels of factors present. This will be used to trim out taxa that have
15 | #'   zero abundances in any subgroup defined by confounders}
16 | #' \item{confModelMat}{A confounder matrix with intercept,
17 | #' and with reference levels for factors absent.
18 | #' This will be used to fit the model to modify the independence model,
19 | #' and may include continuous variables}
20 | #' @importFrom stats model.matrix
21 | buildConfMat = function(x, ...) {
22 |     UseMethod("buildConfMat", x)
23 | }
24 | 
25 | #' buildConfMat.data.frame
26 | #'
27 | #' @param confounders a data frame of confounders
28 | #' @param n the number of rows of the count matrix
29 | #'
30 | #' @return see buidConfMat
31 | buildConfMat.data.frame = function(confounders, n) {
32 |     if (n != NROW(confounders)) {
33 |         # Check dimensions
34 |         stop("Data and confounder matrix do not have the same number
35 |         of samples! \n")
36 |     }
37 |     if (anyNA(confounders)) {
38 |         stop("Confounders contain missing values!\n")
39 |     }
40 |     #Check alias structure
41 |     checkAlias(confounders, names(confounders))
42 |     # No intercept or continuous variables for preliminary
43 |     # trimming
44 |     confModelMatTrim = model.matrix(object = as.formula(paste("~",
45 |         paste(names(confounders)[vapply(FUN.VALUE = TRUE, confounders,
46 |             is.factor)], collapse = "+"), "-1")), data = confounders,
47 |         contrasts.arg = lapply(confounders[vapply(FUN.VALUE = TRUE,
48 |             confounders, is.factor)], contrasts, contrasts = FALSE))
49 |     # With intercept for filtering
50 |     confModelMat = model.matrix(object = as.formula(paste("~",
51 |         paste(names(confounders), collapse = "+"))), data = confounders,
52 |         contrasts.arg = lapply(confounders[vapply(FUN.VALUE = TRUE,
53 |             confounders, is.factor)], contrasts, contrasts = TRUE))
54 |     list(confModelMatTrim = confModelMatTrim, confModelMat = confModelMat)
55 | }
56 | #' buildConfMat.character
57 | #' @param confounders a numeric matrix of confounders
58 | #' @param physeq a physeq object with a sample_data slot
59 | #'
60 | #' @return see buidConfMat.numeric
61 | buildConfMat.character = function(confounders, physeq) {
62 |     if (!is(physeq, "phyloseq")) {
63 |         stop("Providing confounders through variable names is only allowed
64 |         if phyloseq object is provided! \n")
65 |     }
66 |     confounders = as(sample_data(physeq),"data.frame")[,make.names(confounders),
67 |                                            drop = FALSE]
68 |     # The dataframe with the confounders
69 |     buildConfMat(confounders, n = nsamples(physeq))
70 | }
71 | buildConfMat.default = function(...) {
72 |     stop("Please provide the confounders either as
73 |     dataframe, or character string! \n")
74 | }
75 | 


--------------------------------------------------------------------------------
/R/F_buildCovMat.R:
--------------------------------------------------------------------------------
  1 | #' A function to build the covariate matrix of the constraints
  2 | #'
  3 | #' @param covariates the covariates, either as dataframe or as character string
  4 | #' @param dat the phyloseq object
  5 | #'
  6 | #' In this case we will 1) Include dummy's for every level of the
  7 | #'  categorical variable, and force them to sum to zero.
  8 | #'  This is needed for plotting and required for
  9 | #'   reference level independent normalization.
 10 | #'   2) Exclude an intercept. The density function f()
 11 | #'   will provide this already.
 12 | #'
 13 | #' @return a list with components
 14 | #' \item{covModelMat}{The model matrix}
 15 | #' \item{datFrame}{The dataframe used to construct the model matrix}
 16 | #' @importFrom stats model.matrix formula
 17 | buildCovMat = function(covariates, dat) {
 18 |     if (is.data.frame(covariates)) {
 19 |         datFrame = covariates
 20 |         covariatesNames = names(covariates)
 21 |     } else if (is.character(covariates)) {
 22 |         if (!is(dat, "phyloseq")) {
 23 |         stop("Providing covariates through variable names is only allowed
 24 |             if phyloseq object is provided! \n")
 25 |         }
 26 |       covariates = make.names(covariates) #Ensure valid names
 27 |         if (covariates[[1]] == "all") {
 28 |             covariates = as(sample_variables(dat), "data.frame")
 29 |         }
 30 |         # Enable the 'all' option if phyloseq object is provided
 31 |         datFrame = as(sample_data(dat), "data.frame")[, covariates,
 32 |             drop = FALSE]
 33 |         # The dataframe with the covariates
 34 |         covariatesNames = covariates
 35 |     } else {
 36 |         stop("Please provide the covariates either as dataframe
 37 |         or as character string! \n")
 38 |     }
 39 |     if (nsamples(dat) != NROW(datFrame)) {
 40 |         # Check dimensions
 41 |         stop("Data and covariate matrix do not have
 42 |         the same number of samples! \n")
 43 |     }
 44 |     logVec = vapply(FUN.VALUE = TRUE, datFrame, is.logical)
 45 |     intVec = vapply(FUN.VALUE = TRUE, datFrame, is.integer)
 46 |     charVec = vapply(FUN.VALUE = TRUE, datFrame, is.character)
 47 | 
 48 |     if (any(logVec)) {
 49 |         datFrame[, logVec] = lapply(datFrame[logVec], as.factor)
 50 |         # Convert logicals to factors warning('Logicals converted to
 51 |         # factors! \n', immediate. = TRUE). No warning needed
 52 |     }
 53 |     if (any(intVec)) {
 54 |         datFrame[, intVec] = lapply(datFrame[intVec], as.numeric)
 55 |         # Convert integers to numeric
 56 |         warning("Integer values treated as numeric! \n", immediate. = TRUE)
 57 |     }
 58 |     if (any(charVec)) {
 59 |         datFrame[, charVec] = lapply(datFrame[charVec], factor)
 60 |         # Convert characters to factor
 61 |         warning("Character vectors treated as factors! \n", immediate. = TRUE)
 62 |     }
 63 |     #Drop unused levels
 64 |     datFrame = droplevels(datFrame)
 65 |     nFactorLevels = vapply(FUN.VALUE = integer(1), datFrame,
 66 |         function(x) {
 67 |             if (is.factor(x))
 68 |                 nlevels(x) else 0L
 69 |         })  #Number of levels per factor
 70 |     singleFacID = vapply(FUN.VALUE = TRUE, datFrame, is.factor) &
 71 |         (nFactorLevels == 1L)
 72 |     if (any(singleFacID)) {
 73 |         warning("The following variables were not included in the analyses
 74 |             because they are factors with only one level: \n",
 75 |             paste(covariates[singleFacID], sep = " \n"),
 76 |             immediate. = TRUE, call. = FALSE)
 77 |         # Drop factors with one level
 78 |         covariatesNames = covariatesNames[!singleFacID]
 79 |         nFactorLevels = nFactorLevels[covariatesNames]
 80 |         datFrame = datFrame[, covariatesNames, drop = FALSE]
 81 |     }
 82 |     #Check for alias structures
 83 |     checkAlias(datFrame, covariatesNames)
 84 | 
 85 |     # Center and scale the continuous covariates
 86 |     datFrame[vapply(FUN.VALUE = TRUE, datFrame, is.numeric)] =
 87 |         scale(datFrame[vapply(FUN.VALUE = TRUE, datFrame, is.numeric)])
 88 | 
 89 |     covModelMat = model.matrix(
 90 |         object = formula(paste("~", paste(covariatesNames,
 91 |         collapse = "+"), "-1")), data = datFrame,
 92 |         contrasts.arg = lapply(datFrame[vapply(datFrame,
 93 |         is.factor, FUN.VALUE = TRUE)], contrasts, contrasts = FALSE))
 94 |     if (NCOL(covModelMat) == 1)
 95 |         stop("A constrained ordination with a variable
 96 | with only one level is meaningless.\n
 97 | Please provide more covariates or perform an unconstrained analysis.",
 98 |             call. = FALSE)
 99 |     list(covModelMat = covModelMat, datFrame = datFrame)
100 | }
101 | 


--------------------------------------------------------------------------------
/R/F_buildDesign.R:
--------------------------------------------------------------------------------
 1 | #' A function to build the design matrix
 2 | #'
 3 | #' @param sampleScore a vector of environmental scores
 4 | #' @param responseFun A character string, indicating the shape
 5 | #' of the response function
 6 | #'
 7 | #' For dynamic response function estimation, the same desing matrix
 8 | #' as for the quadratic one is returned.
 9 | #' Will throw an error when an unknown repsonse function is provided
10 | #'
11 | #' @return A design matrix of dimension n-by-f
12 | #' @importFrom stats model.matrix
13 | buildDesign = function(sampleScore, responseFun) {
14 |     # With intercept
15 |     design = switch(responseFun, linear = model.matrix(~sampleScore),
16 |         quadratic = model.matrix(~sampleScore +
17 |             I(sampleScore^2)), dynamic = model.matrix(~sampleScore +
18 |             I(sampleScore^2)), stop("Unknown response function"))
19 | }
20 | 


--------------------------------------------------------------------------------
/R/F_checkAlias.R:
--------------------------------------------------------------------------------
 1 | #' Check for alias structures in a dataframe, and throw an error when one is found
 2 | #' @param datFrame the data frame to be checked for alias structure
 3 | #' @param covariatesNames The names of the variables to be considered
 4 | #'
 5 | #' @return Throws an error when an alias structure is detected,
 6 | #' returns invisible otherwise
 7 | #'
 8 | #' @importFrom stats alias formula
 9 | #' @export
10 | #' @examples
11 | #' #Make a dataframe with aliased variables
12 | #' df = data.frame(foo = rnorm(10), baa = rep(c(TRUE, FALSE), each = 5),
13 | #' foo2 = factor(rep(c("male", "female"), each = 5)))
14 | #' checkAlias(df, c("foo", "baa"))
15 | #' #Check test files for the error being thrown
16 | checkAlias = function(datFrame, covariatesNames){
17 |     mockDf = cbind("Mock" = 1, datFrame)
18 |     #Fake dataframe for syntax purposes
19 |     Alias = alias(object = formula(paste("Mock~",
20 |                                 paste(covariatesNames, collapse = "+"),
21 |                                 "-1")), mockDf)
22 |     if(!is.null(Alias$Complete)){
23 |         stop("Sample variables\n'", paste(rownames(Alias$Complete),
24 |                                           collapse ="' and '"),
25 |             "'\nare aliased with other variables.
26 |             Drop some sample-variables and try again.")
27 |     } else {
28 |         return(invisible())
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/R/F_constrCorresp.R:
--------------------------------------------------------------------------------
 1 | #' Constrained correspondence analysis with adapted powers
 2 | #'
 3 | #' @param X outcome matrix
 4 | #' @param Y constraining matrix
 5 | #' @param rowExp,colExp see ?RCM_NB
 6 | #' @param muMarg mean matrix under independence model
 7 | #'
 8 | #' @return a list with eigenvalues, aliased variables and environmentam gradients
 9 | #' @details the vegan version, adapted for flexible powers rowExp and colExp
10 | #' @importFrom stats weighted.mean
11 | constrCorresp = function(X, Y, rowExp, colExp,
12 |                          muMarg = outer(rowSums(X), colSums(X))/sum(X)){
13 |   X = X/sum(X)
14 |   RW = rowSums(X)
15 |   CW = colSums(X)
16 |   Xinit = diag(1/RW^rowExp) %*% (X - muMarg/sum(muMarg)) %*% diag(1/CW^colExp)
17 |   ZERO <- sqrt(.Machine$double.eps)
18 |   envcentre <- apply(Y, 2, weighted.mean, w = RW)
19 |   Y <- scale(Y, center = envcentre, scale = FALSE)
20 |   Y <- sweep(Y, 1, sqrt(RW), "*")
21 |   Q <- qr(Y)
22 |   rank <- sum(Q$pivot[seq_len(Q$rank)] > 0)
23 |   if (length(Q$pivot) > Q$rank){
24 |     alias <- colnames(Q$qr)[-seq_len(Q$rank)]
25 |   } else{
26 |     alias <- NULL
27 |   }
28 |   kept <- seq_along(Q$pivot) <= Q$rank & Q$pivot > 0
29 |   Yfit <- qr.fitted(Q, Xinit)
30 |   sol <- svd(Yfit)
31 |   lambda <- sol$d^2
32 |   u <- sol$u
33 |   zeroev <- abs(lambda) < max(ZERO, ZERO * lambda[1L])
34 |   if (any(zeroev)) {
35 |     lambda <- lambda[!zeroev]
36 |     u <- u[, !zeroev, drop = FALSE]
37 |   }
38 |   posev <- lambda > 0
39 |   xx <- Y[, Q$pivot[kept], drop = FALSE]
40 |   bp <- (1/sqrt(colSums(xx^2))) * crossprod(xx, u[, posev, drop = FALSE])
41 |   list(eig = lambda, alias = alias, biplot = bp)
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/R/F_correctXMissingness.R:
--------------------------------------------------------------------------------
 1 | #' Replace missing entries in X by their expectation to set their
 2 | #' contribution to the estimating equations to zero
 3 | #' @param X the matrix of counts
 4 | #' @param mu the matrix of expectations
 5 | #' @param allowMissingness A boolean, are missing values present
 6 | #' @param naId The numeric index of the missing values in X
 7 | #'
 8 | #' @return The matrix X with the NA entries replaced by the
 9 | #' corresponding entries in mu
10 | #'
11 | #' @note This may seem like a hacky approach, but it avoids having to deal
12 | #'  with NAs in functions like crossprod().
13 | correctXMissingness = function(X, mu, allowMissingness, naId){
14 |     if(allowMissingness){
15 |         X[naId] = mu[naId]
16 |     }
17 |     X
18 | }
19 | 


--------------------------------------------------------------------------------
/R/F_dLR_nb.R:
--------------------------------------------------------------------------------
  1 | #' A function that returns the value of the partial derivative of the
  2 | #'  log-likelihood ratio to alpha, keeping the response functions fixed
  3 | #'
  4 | #' @param Alpha a vector of length d + k*(2+(k-1)/2),
  5 | #'  the environmental gradient plus the lagrangian multipliers
  6 | #' @param X the n-by-p count matrix
  7 | #' @param CC a n-by-d covariate vector
  8 | #' @param responseFun a character string indicating
  9 | #' the type of response function
 10 | #' @param psi a scalar, an importance parameter
 11 | #' @param NB_params Starting values for the NB_params
 12 | #' @param NB_params_noLab Starting values for the NB_params without label
 13 | #' @param d an integer, the number of covariate parameters
 14 | #' @param alphaK a matrix of environmental gradients of lower dimensions
 15 | #' @param k an integer, the current dimension
 16 | #' @param centMat a nLambda1s-by-d centering matrix
 17 | #' @param nLambda an integer, number of lagrangian multipliers
 18 | #' @param nLambda1s an integer, number of centering restrictions
 19 | #' @param thetaMat a matrix of size n-by-p with estimated dispersion parameters
 20 | #' @param muMarg an n-by-p offset matrix
 21 | #' @param ncols a scalar, the number of columns of X
 22 | #' @param envGradEst a character string, indicating how the
 23 | #'  environmental gradient should be fitted.
 24 | #'  'LR' using the likelihood-ratio criterion,
 25 | #'  or 'ML' a full maximum likelihood solution
 26 | #' @param allowMissingness A boolean, are missing values present
 27 | #' @param naId The numeric index of the missing values in X
 28 | #' @param ... further arguments passed on to other methods
 29 | #'
 30 | #' @return: The value of the lagrangian and the constraining equations
 31 | dLR_nb <- function(Alpha, X, CC, responseFun = c("linear",
 32 |     "quadratic", "nonparametric", "dynamic"),
 33 |     psi, NB_params, NB_params_noLab, d, alphaK,
 34 |     k, centMat, nLambda, nLambda1s, thetaMat,
 35 |     muMarg, ncols, envGradEst, allowMissingness, naId, ...) {
 36 | 
 37 |     # Extract the parameters
 38 |     alpha = Alpha[seq_len(d)]
 39 |     lambda1s = Alpha[d + seq_len(nLambda1s)]
 40 |     # Multiple centering requirements now!
 41 |     lambda2 = Alpha[d + nLambda1s + 1]
 42 |     lambda3 = if (k == 1) {
 43 |         0
 44 |     } else {
 45 |         Alpha[(d + nLambda1s + 2):(d + nLambda)]
 46 |     }
 47 | 
 48 |     sampleScore = CC %*% alpha
 49 |     # A linear combination of the
 50 |     # environmental variables yields the
 51 |     # sampleScore
 52 |     design = buildDesign(sampleScore, responseFun)
 53 |     mu = muMarg * exp(design %*% NB_params *
 54 |         psi)
 55 |     X = correctXMissingness(X, mu, allowMissingness, naId)
 56 |     tmp = (X - mu)/(1 + mu/thetaMat)
 57 |     responseFun = switch(responseFun, dynamic = "quadratic",
 58 |         responseFun)
 59 | 
 60 |     if (envGradEst == "LR"){
 61 |         mu0 = muMarg * c(exp(design %*% NB_params_noLab *
 62 |             psi))
 63 |         tmp0 = (X - mu0)/(1 + mu0/thetaMat)
 64 |     }
 65 |     # The lagrangian depends on the shape of
 66 |     # the response function
 67 |     lag = switch(responseFun, linear = if (envGradEst ==
 68 |         "LR") {
 69 |         psi * (crossprod(CC, tmp) %*% (NB_params[2,
 70 |             ]) - rowSums(crossprod(CC, tmp0 *
 71 |             NB_params_noLab[2])))
 72 |     } else {
 73 |         psi * (crossprod(CC, tmp) %*% (NB_params[2,
 74 |             ]))
 75 |     }, quadratic = if (envGradEst == "LR") {
 76 |         psi * (c(crossprod(CC, tmp) %*% (NB_params[2,
 77 |             ])) + c(crossprod(CC * c(sampleScore),
 78 |             tmp) %*% (NB_params[3, ]) * 2) -
 79 |             rowSums(crossprod(CC, tmp0) *
 80 |                 NB_params_noLab[2]) - rowSums(crossprod(CC *
 81 |             c(sampleScore), tmp0) * NB_params_noLab[3]) *
 82 |             2)
 83 |     } else {
 84 |         psi * (c(crossprod(CC, tmp) %*% (NB_params[2,
 85 |             ])) + c(crossprod(CC * c(sampleScore),
 86 |             tmp) %*% (NB_params[3, ]) * 2))
 87 |     }, stop("Unknown response function provided! \n")) +
 88 |         # Restrictions do not depend on response
 89 |     # function
 90 |     c(lambda1s %*% centMat) + lambda2 * 2 *
 91 |         alpha + if (k > 1)
 92 |         alphaK %*% lambda3 else 0
 93 | 
 94 |     centerFactors = centMat %*% alpha
 95 |     size = sum(alpha^2) - 1
 96 |     if(k == 1){
 97 |         return(c(lag, centerFactors, size))
 98 |     }
 99 |     ortho = crossprod(alphaK, alpha)
100 |     c(lag, centerFactors, size, ortho)
101 | }
102 | 


--------------------------------------------------------------------------------
/R/F_dNBabunds.R:
--------------------------------------------------------------------------------
 1 | #'A score function for the column components of the independence model
 2 | #' (mean relative abundances)
 3 | #'
 4 | #'@param beta a vector of length p with current abundance estimates
 5 | #'@param X a n-by-p count matrix
 6 | #'@param reg a vector of length n with library sizes estimates
 7 | #'@param thetas a n-by-p matrix with overdispersion estimates in the rows
 8 | #' @param allowMissingness A boolean, are missing values present
 9 | #' @param naId The numeric index of the missing values in X
10 | #'
11 | #'@return a vector of length p with evaluations of the score function
12 | dNBabundsOld = function(beta, X, reg, thetas, allowMissingness, naId) {
13 |     mu = exp(outer(reg, beta, "+"))
14 |     X = correctXMissingness(X, mu, allowMissingness, naId)
15 |     score = colSums((X - mu)/(1 + (mu/thetas)))
16 | }
17 | dNBabunds = function(beta, X, reg, thetas, allowMissingness, naId) {
18 |   mu = exp(reg + beta)
19 |   X = correctXMissingness(X, mu, allowMissingness, naId)
20 |   sum((X - mu)/(1 + (mu/thetas)))
21 | }
22 | 


--------------------------------------------------------------------------------
/R/F_dNBlibSizes.R:
--------------------------------------------------------------------------------
 1 | #'A score function for the row components of the independence model
 2 | #'(library sizes)
 3 | #'
 4 | #'@param beta a vector of length n with current library size estimates
 5 | #'@param X a n-by-p count matrix
 6 | #'@param reg a vector of length p with relative abundance estimates
 7 | #'@param thetas a n-by-p matrix with overdispersion estimates in the rows
 8 | #' @param allowMissingness A boolean, are missing values present
 9 | #' @param naId The numeric index of the missing values in X
10 | #'
11 | #'@return a vector of length n with evaluations of the score function
12 | dNBlibSizes = function(beta, X, reg, thetas, allowMissingness, naId) {
13 |     mu = exp(outer(beta, reg, "+"))
14 |     X = correctXMissingness(X, mu, allowMissingness, naId)
15 |     rowSums((X - mu)/(1 + (mu/thetas)))
16 | }
17 | 


--------------------------------------------------------------------------------
/R/F_dNBllcol.R:
--------------------------------------------------------------------------------
 1 | #'A score function for the estimation of the column scores
 2 | #' in an unconstrained RC(M) model
 3 | #'
 4 | #' @param beta vector of length p+1+1+(k-1): p row scores,
 5 | #' 1 centering, one normalization and (k-1) orhtogonality lagrangian multipliers
 6 | #' @param X the nxp data matrix
 7 | #' @param reg a nx1 regressor matrix: outer product of rowScores and psis
 8 | #' @param thetas nxp matrix with the dispersion parameters
 9 | #' (converted to matrix for numeric reasons)
10 | #' @param muMarg the nxp offset
11 | #' @param k an integer, the dimension of the RC solution
12 | #' @param p an integer, the number of taxa
13 | #' @param n an integer, the number of samples
14 | #' @param nLambda an integer, the number of restrictions
15 | #' @param colWeights the weights used for the restrictions
16 | #' @param cMatK the lower dimensions of the colScores
17 | #' @param allowMissingness A boolean, are missing values present
18 | #' @param naId The numeric index of the missing values in X
19 | #' @param ... further arguments passed on to the jacobian
20 | 
21 | #' @return A vector of length p+1+1+(k-1) with evaluations of the
22 | #'  derivative of lagrangian
23 | dNBllcolOld = function(beta, X, reg, thetas,
24 |     muMarg, k, p, n, colWeights, nLambda,
25 |     cMatK, allowMissingness, naId, ...) {
26 |     cMat = matrix(beta[seq_len(p)], byrow = TRUE,
27 |         ncol = p, nrow = 1)
28 |     mu = exp(reg %*% cMat) * muMarg
29 |     X = correctXMissingness(X, mu, allowMissingness, naId)
30 |     lambda1 = beta[p + 1]
31 |     # Lagrangian multiplier for centering
32 |     # restrictions sum(abunds*r_{ik}) = 0
33 |     lambda2 = beta[p + 2]
34 |     # Lagrangian multiplier for normalization
35 |     # restrictions sum(abunds*r^2_{ik}) = 1
36 |     lambda3 = if (k == 1) {
37 |         0
38 |     } else {
39 |         beta[(p + 3):length(beta)]
40 |     }
41 |     # Lagrangian multiplier for
42 |     # orthogonalization restriction
43 | 
44 |     score = crossprod(reg, ((X - mu)/(1 +
45 |         mu/thetas))) + colWeights * (lambda1 +
46 |         lambda2 * 2 * cMat + (lambda3 %*%
47 |         cMatK))
48 | 
49 |     center = sum(colWeights * cMat)
50 |     unitSum = sum(colWeights * cMat^2) -
51 |         1
52 |     if (k == 1) {
53 |         return(c(score, center, unitSum))
54 |     }
55 |     orthogons = tcrossprod(cMatK, cMat *
56 |         colWeights)
57 |     return(c(score, center, unitSum, orthogons))
58 | }
59 | 
60 | dNBllcol = function(beta, X, reg, thetas, muMarg, allowMissingness, naId, ...) {
61 |     mu = exp(reg * beta) * muMarg
62 |     X = correctXMissingness(X, mu, allowMissingness, naId)
63 |     sum(reg*((X - mu)/(1 + mu/thetas)))
64 | }
65 | 


--------------------------------------------------------------------------------
/R/F_dNBllcolNP.R:
--------------------------------------------------------------------------------
 1 | #'Estimation of the parameters of a third degree GLM
 2 | #'
 3 | #' @param beta A vector of any length
 4 | #' @param X the data vector of length n
 5 | #' @param reg a nxlength(beta) regressor matrix
 6 | #' @param theta a scalar, the overdispersion
 7 | #' @param muMarg the offset of length n
 8 | #' @param allowMissingness A boolean, are missing values present
 9 | #' @param naId The numeric index of the missing values in X
10 | #' @param ... further arguments passed on to the jacobian
11 | 
12 | #' @return A vector of the same length as beta with evaluations
13 | #'  of the score function
14 | dNBllcolNP = function(beta, X, reg, theta,
15 |     muMarg, allowMissingness, naId, ...) {
16 |     mu = exp(reg %*% beta) * muMarg
17 |     X = correctXMissingness(X, mu, allowMissingness, naId)
18 |     crossprod(reg, ((X - mu)/(1 + mu/theta)))
19 | }
20 | 


--------------------------------------------------------------------------------
/R/F_dNBllcol_constr.R:
--------------------------------------------------------------------------------
 1 | #' The score function of the response function for 1 taxon at the time
 2 | #' @param betas a vector of v parameters of the
 3 | #'  response function of a single taxon
 4 | #' @param X the count vector of length n
 5 | #' @param reg a n-by-v matrix of regressors
 6 | #' @param theta The dispersion parameter of this taxon
 7 | #' @param muMarg offset of length n
 8 | #' @param psi a scalar, the importance parameter
 9 | #' @param allowMissingness A boolean, are missing values present
10 | #' @param naId The numeric index of the missing values in X
11 | #'
12 | #'  Even though this approach does not imply normalization over the parameters
13 | #'  of all taxa, it is very fast and they can be normalized afterwards
14 | #'
15 | #' @return A vector of length v with the evaluation of the score functions
16 | dNBllcol_constr = function(betas, X, reg,
17 |     theta, muMarg, psi, allowMissingness, naId) {
18 |     mu = exp(c(reg %*% betas) * psi) * muMarg
19 |     X = correctXMissingness(X, mu, allowMissingness, naId)
20 |     crossprod((X - mu)/(1 + mu/theta), reg) *
21 |         psi
22 | }
23 | 


--------------------------------------------------------------------------------
/R/F_dNBllcol_constr_noLab.R:
--------------------------------------------------------------------------------
 1 | #' The score function of the general response function
 2 | #'
 3 | #' @param betas a vector of regression parameters with length v
 4 | #' @param X the nxp data matrix
 5 | #' @param reg a matrix of regressors of dimension nxv
 6 | #' @param thetasMat A matrix of dispersion parameters
 7 | #' @param muMarg offset matrix of dimension nxp
 8 | #' @param psi a scalar, the importance parameter
 9 | #' @param allowMissingness A boolean, are missing values present
10 | #' @param naId The numeric index of the missing values in X
11 | #' @param ... further arguments passed on to the jacobian
12 | #'
13 | #' @return The evaluation of the score functions (a vector length v)
14 | dNBllcol_constr_noLab = function(betas, X,
15 |     reg, thetasMat, muMarg, psi, allowMissingness, naId, ...) {
16 |     mu = c(exp(reg %*% betas * psi)) * muMarg
17 |     X = correctXMissingness(X, mu, allowMissingness, naId)
18 |     colSums(crossprod((X - mu)/(1 + (mu/thetasMat)),
19 |         reg) * psi)
20 | }
21 | 


--------------------------------------------------------------------------------
/R/F_dNBllrow.R:
--------------------------------------------------------------------------------
 1 | #' A score function of the NB for the row scores
 2 | #'
 3 | #' @param beta a vector of of length n + k +1 regression parameters to optimize
 4 | #' @param X the data matrix of dimensions nxp
 5 | #' @param reg a 1xp regressor matrix: outer product of column scores and psis
 6 | #' @param thetas nxp matrix with the dispersion parameters
 7 | #' (converted to matrix for numeric reasons)
 8 | #' @param muMarg an nxp offset matrix
 9 | #' @param k a scalar, the dimension of the RC solution
10 | #' @param p a scalar, the number of taxa
11 | #' @param n a scalar, the number of samples
12 | #' @param rowWeights a vector of length n, the weights used for the restrictions
13 | #' @param nLambda an integer, the number of lagrangian multipliers
14 | #' @param rMatK the lower dimension row scores
15 | #' @param allowMissingness A boolean, are missing values present
16 | #' @param naId The numeric index of the missing values in X
17 | #' @param ... Other arguments passed on to the jacobian
18 | 
19 | #' @return A vector of length n + k +1 with evaluations of the
20 | #'  derivative of the lagrangian
21 | dNBllrow = function(beta, X, reg, thetas,
22 |     muMarg, k, n, p, rowWeights, nLambda,
23 |     rMatK, allowMissingness, naId, ...) {
24 | 
25 |     rMat = matrix(beta[seq_len(n)], byrow = FALSE,
26 |         ncol = 1, nrow = n)
27 |     mu = exp(rMat %*% reg) * muMarg
28 |     X = correctXMissingness(X, mu, allowMissingness, naId)
29 | 
30 |     lambda1 = beta[n + 1]  #Centering restrictions sum(abunds*r_{ik}) = 0
31 |     lambda2 = beta[n + 2]  #normalization restrictions sum(abunds*r^2_{ik}) = 1
32 |     lambda3 = if (k == 1) {
33 |         0
34 |     } else {
35 |         beta[(n + 3):length(beta)]
36 |     }
37 | 
38 |     score = c(tcrossprod(reg, (X - mu)/(1 +
39 |         (mu/thetas)))) + rowWeights * (lambda1 +
40 |         lambda2 * 2 * rMat + (rMatK %*% lambda3))
41 |     center = sum(rMat * rowWeights)
42 |     unitSum = sum(rMat^2 * rowWeights) -
43 |         1
44 |     if (k == 1) {
45 |         return(c(score, center, unitSum))
46 |     }
47 |     orthogons = crossprod(rMatK, rMat * rowWeights)
48 |     return(c(score, center, unitSum, orthogons))
49 | }
50 | 


--------------------------------------------------------------------------------
/R/F_dNBpsis.R:
--------------------------------------------------------------------------------
 1 | #' A score function for the psi of a given dimension
 2 | #'
 3 | #' @param beta a scalar, the initial estimate
 4 | #' @param X the n-by-p count matrix
 5 | #' @param muMarg the nxp offset matrix
 6 | #' @param reg the regressor matrix, the outer product of current row
 7 | #'  and column scores
 8 | #' @param theta a n-by-p matrix with the dispersion parameters
 9 | #' @param allowMissingness A boolean, are missing values present
10 | #' @param naId The numeric index of the missing values in X
11 | #' @param ... other arguments passed on to the jacobian
12 | 
13 | #' @return The evaluation of the score function at beta, a scalar
14 | dNBpsis = function(beta, X, reg, theta, muMarg, allowMissingness, naId,
15 |     ...) {
16 |     mu = muMarg * exp(reg * beta)
17 |     X = correctXMissingness(X, mu, allowMissingness, naId)
18 |     sum(reg * (X - mu)/(1 + (mu/theta)))
19 | }
20 | 


--------------------------------------------------------------------------------
/R/F_data.R:
--------------------------------------------------------------------------------
 1 | #' Microbiomes of colorectal cancer patients and healthy controls
 2 | #'
 3 | #' Microbiome sequencing data of colorectal cancer patients,
 4 | #' patients with small adenoma and healthy controls,
 5 | #'  together with other baseline covariates
 6 | #'
 7 | #' @format A phyloseq object with an OTU-table and sample data
 8 | #' \describe{
 9 | #'   \item{otu_table}{Count data matrix of 709 taxa in 194 samples}
10 | #'   \item{sample_data}{Data frame of patient covariates}
11 | #' }
12 | #' @source \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4299606/}
13 | "Zeller"
14 | 


--------------------------------------------------------------------------------
/R/F_deviances.R:
--------------------------------------------------------------------------------
 1 | #' A function to extract deviances for all dimension,
 2 | #' including after filtering on confounders
 3 | #'
 4 | #'@param rcm an object of the RCM class
 5 | #'@param squaredSum a boolean, should total deviance be returned?
 6 | #'
 7 | #'Total deviances can be deceptive and not correspond to the differences in
 8 | #' log-likelihood. As the dispersion is different for each model.
 9 | #'  To compare models it is better to compare likelihoods.
10 | #'
11 | #'@return If Sum is FALSE, a named array of deviance residuals of the
12 | #'  independence model and all models with dimension 1 to k, including after
13 | #'  filtering on confounders. Otherwise a table with total deviances (the sum of
14 | #'  squared deviance residuals), deviance explained and cumulative deviance
15 | #'  explained.
16 | deviances = function(rcm, squaredSum = FALSE) {
17 |     vec = if(length(rcm$confModelMat))
18 |         c(0, 0.5, seq_len(rcm$k)) else c(0:rcm$k)
19 |     outnames = c("independence",
20 |         if (length(rcm$confModelMat)) "filtered" else NULL,
21 |         paste0("Dim ", seq_len(rcm$k)))
22 |     if (squaredSum) {
23 |         tmp = vapply(FUN.VALUE = numeric(1), vec, FUN = function(i) {
24 |             sum(getDevianceRes(rcm, i)^2)
25 |         })
26 |         names(tmp) = outnames
27 |         # Also make cumulative comparisons
28 |         cumDevianceExplained = round((tmp - tmp[1])/(tmp[length(tmp)] -
29 |             tmp[1]), 3)
30 |         out = rbind(deviance = tmp,
31 |                     devianceExplained = c(0, diff(cumDevianceExplained)),
32 |             cumDevianceExplained = cumDevianceExplained)
33 |     } else {
34 |         out = lapply(vec, function(i) {
35 |             getDevianceRes(rcm, i)
36 |         })
37 |         names(out) = outnames
38 |     }
39 |     return(out)
40 | }
41 | 


--------------------------------------------------------------------------------
/R/F_ellipseCoord.R:
--------------------------------------------------------------------------------
 1 | #' A function that returns the coordinates of an ellipse
 2 | #'
 3 | #' @param a,b,c parameters of the quadratic function a^2x+bx+c
 4 | #' @param quadDrop A scalar, fraction of peak height at which to draw
 5 | #'  the ellipse
 6 | #' @param nPoints an integer, number of points to use to draw the ellipse
 7 | #'
 8 | #' @return a matrix with x and y coordinates of the ellipse
 9 | ellipseCoord = function(a, b, c, quadDrop = 0.95, 
10 |     nPoints = 100) {
11 |     center = -b/(2 * a)
12 |     roots = mapply(a, b, c, FUN = function(A, 
13 |         B, C) {
14 |         Re(polyroot(c(C - log(ifelse(A > 
15 |             0, 1/quadDrop, quadDrop)) + (B^2 - 
16 |             4 * A * C)/(4 * A), B, A))[1])  #Log to go to count scale.
17 |         # Since function is symmetric, does not
18 |         # matter which root we pick, only
19 |         # distance to vertex matters so we just
20 |         # take the first
21 |     })
22 |     anglesEval = seq(0, 2 * pi, length.out = nPoints)
23 |     ab = center - roots
24 |     radii = prod(ab)/sqrt(sin(anglesEval)^2 * 
25 |         ab[1]^2 + ab[2]^2 * cos(anglesEval)^2)
26 |     cbind(x = radii * cos(anglesEval) + center[1], 
27 |         y = radii * sin(anglesEval) + center[2])
28 | }
29 | 


--------------------------------------------------------------------------------
/R/F_estDisp.R:
--------------------------------------------------------------------------------
 1 | #' Estimate the overdispersion
 2 | #'
 3 | #' @details Information between taxa is shared with empirical Bayes
 4 | #'  using the edgeR pacakage, where the time-limiting steps are programmed in C.
 5 | #'
 6 | #' @param X the data matrix of dimensions nxp
 7 | #' @param cMat a 1xp colum scores matrix
 8 | #' @param rMat a nx1 rowscores matrix, if unconstrained
 9 | #' @param muMarg an nxp offset matrix
10 | #' @param psis a scalar, the current psi estimate
11 | #' @param trended.dispersion a vector of length p with pre-calculated
12 | #'  trended.dispersion estimates. They do not vary in function
13 | #'  of the offset anyway
14 | #' @param prior.df an integer, number of degrees of freedom of the prior
15 | #'  for the Bayesian shrinkage
16 | #' @param dispWeights Weights for estimating the dispersion
17 | #' in a zero-inflated model
18 | #' @param rowMat matrix of row scores in case of constrained ordination
19 | #' @param allowMissingness A boolean, are missing values present
20 | #' @param naId The numeric index of the missing values in X
21 | #'
22 | #' @return A vector of length p with dispersion estimates
23 | estDisp = function(X, cMat = NULL, rMat = NULL,
24 |     muMarg, psis, trended.dispersion = NULL,
25 |     prior.df = 10, dispWeights = NULL, rowMat = NULL,
26 |     allowMissingness = FALSE, naId) {
27 |     logMeansMat = if (!is.null(rMat)) {
28 |         # Unconstrained
29 |         t(rMat %*% (cMat * psis) + log(muMarg))
30 |     } else if (is.null(rowMat)) {
31 |         t(log(muMarg))  #Non-parametric
32 |     } else {
33 |         # Constrained
34 |         t(log(muMarg) + psis * rowMat)
35 |     }
36 |     if (any(is.infinite(logMeansMat)))
37 |         stop("Overflow! Try trimming more lowly
38 |         abundant taxa prior to model fitting.
39 |         \n See prevCutOff argument in ?RCM.")
40 |     X = correctXMissingness(X, exp(logMeansMat), allowMissingness, naId)
41 | 
42 |     trended.dispersion = if (is.null(trended.dispersion)) {
43 |         edgeR::estimateGLMTrendedDisp(y = t(X),
44 |             design = NULL, method = "bin.loess",
45 |             offset = logMeansMat, weights = NULL)
46 |     } else {
47 |         trended.dispersion
48 |     }
49 |     trended.dispersion = if (is.list(trended.dispersion)) {
50 |         trended.dispersion$dispersion
51 |     } else trended.dispersion
52 | 
53 |     thetaEstsTmp <- edgeR::estimateGLMTagwiseDisp(y = t(X),
54 |         design = NULL, prior.df = prior.df,
55 |         offset = logMeansMat, dispersion = trended.dispersion,
56 |         weights = dispWeights)
57 | 
58 |     thetaEsts = if (is.list(thetaEstsTmp)) {
59 |         1/thetaEstsTmp$tagwise.dispersion
60 |     } else {
61 |         1/thetaEstsTmp
62 |     }
63 |     if (anyNA(thetaEsts)) {
64 |         idNA = is.na(thetaEsts)
65 |         thetaEsts[idNA] = mean(thetaEsts[!idNA])
66 |         warning(paste(sum(idNA), "dispersion estimations did not converge!"))
67 |     }
68 |     return(thetas = thetaEsts)
69 | }
70 | 


--------------------------------------------------------------------------------
/R/F_estNBparams.R:
--------------------------------------------------------------------------------
 1 | #' A function to estimate the taxon-wise NB-params
 2 | #'
 3 | #' @param design an n-by-v design matrix
 4 | #' @param thetas a vector of dispersion parameters of length p
 5 | #' @param muMarg an offset matrix
 6 | #' @param psi a scalar, the importance parameter
 7 | #' @param X the data matrix
 8 | #' @param nleqslv.control a list of control elements, passed on to nleqslv()
 9 | #' @param ncols an integer, the number of columns of X
10 | #' @param initParam a v-by-p matrix of initial parameter estimates
11 | #' @param v an integer, the number of parameters per taxon
12 | #' @param dynamic a boolean, should response function be determined dynamically?
13 | #'  See details
14 | #' @param envRange a vector of length 2, giving the range of observed
15 | #' environmental scores
16 | #' @param allowMissingness A boolean, are missing values present
17 | #' @param naId The numeric index of the missing values in X
18 | #'
19 | #' If dynamic is TRUE, quadratic response functions are fitted for every taxon.
20 | #' If the optimum falls outside of the observed range of environmental scores,
21 | #' a linear response function is fitted instead
22 | #'
23 | #' @return a v-by-p matrix of parameters of the response function
24 | estNBparams = function(design, thetas, muMarg,
25 |     psi, X, nleqslv.control, ncols, initParam,
26 |     v, dynamic = FALSE, envRange, allowMissingness, naId) {
27 |     vapply(seq_len(ncols), FUN.VALUE = vector("numeric", v), function(i) {
28 |         nleq = nleqslv(initParam[, i], reg = design,
29 |             fn = dNBllcol_constr, theta = thetas[i],
30 |             muMarg = muMarg[, i], psi = psi,
31 |             X = X[, i], control = nleqslv.control,
32 |             jac = JacCol_constr, allowMissingness = allowMissingness,
33 |             naId = is.na(X[, i]))$x
34 |         if (dynamic && ((-nleq[2]/(2 * nleq[3]) <
35 |             envRange[1]) || (-nleq[2]/(2 *
36 |             nleq[3]) > envRange[2]))) {
37 |             # If out of observed range, fit a linear
38 |             # model
39 |             nleq = c(
40 |             nleqslv(
41 |                 initParam[-3, i],
42 |                 reg = design[, -3],
43 |                 fn = dNBllcol_constr,
44 |                 theta = thetas[i],
45 |                 muMarg = muMarg[, i],
46 |                 psi = psi,
47 |                 X = X[, i],
48 |                 control = nleqslv.control,
49 |                 jac = JacCol_constr,
50 |                 allowMissingness = allowMissingness,
51 |                 naId = is.na(X[, i])
52 |                 )$x,0)
53 |         }
54 |         return(nleq)
55 |     })
56 | }
57 | 


--------------------------------------------------------------------------------
/R/F_estNBparamsNoLab.R:
--------------------------------------------------------------------------------
 1 | #' A function to estimate the NB-params ignoring the taxon labels
 2 | #'
 3 | #' @param design an n-by-v design matrix
 4 | #' @param thetasMat A matrix of dispersion parameters
 5 | #' @param muMarg an offset matrix
 6 | #' @param psi a scalar, the importance parameter
 7 | #' @param X the data matrix
 8 | #' @param nleqslv.control a list of control elements, passed on to nleqslv()
 9 | #' @param initParam a vector of length v of initial parameter estimates
10 | #' @param n an integer, the number of samples
11 | #' @param v an integer, the number of parameters per taxon
12 | #' @param dynamic a boolean, should response function be determined dynamically?
13 | #'  See details
14 | #' @param envRange a vector of length 2,
15 | #'  giving the range of observed environmental scores
16 | #' @param preFabMat a pre-fabricated auxiliary matrix
17 | #' @param allowMissingness A boolean, are missing values present
18 | #' @param naId The numeric index of the missing values in X
19 | #'
20 | #' If dynamic is TRUE, quadratic response functions are fitted for every taxon.
21 | #'  If the optimum falls outside of the observed range of environmental scores,
22 | #'  a linear response function is fitted instead
23 | #'
24 | #' @return a v-by-p matrix of parameters of the response function
25 | estNBparamsNoLab = function(design, thetasMat,
26 |     muMarg, psi, X, nleqslv.control, initParam,
27 |     n, v, dynamic, envRange, preFabMat, allowMissingness, naId) {
28 |     # Without taxon Labels
29 |     nleq = nleqslv(x = initParam, reg = design,
30 |         fn = dNBllcol_constr_noLab, thetasMat = thetasMat,
31 |         muMarg = muMarg, psi = psi, X = X,
32 |         control = nleqslv.control, jac = JacCol_constr_noLab,
33 |         n = n, v = v, preFabMat = preFabMat,
34 |         allowMissingness = allowMissingness, naId = naId)$x
35 |     if (dynamic && ((-nleq[2]/(2 * nleq[3]) <
36 |         envRange[1]) || (-nleq[2]/(2 * nleq[3]) >
37 |         envRange[2]))) {
38 |         # If out of observed range, fit a linear
39 |         # model
40 |         nleq = c(nleqslv(initParam[-3], reg = design[,
41 |             -3], fn = dNBllcol_constr_noLab,
42 |             thetasMat = thetasMat, muMarg = muMarg,
43 |             psi = psi, X = X, control = nleqslv.control,
44 |             jac = JacCol_constr_noLab, preFabMat = preFabMat,
45 |             n = n, v = v - 1, allowMissingness = allowMissingness,
46 |             naId = naId)$x, 0)
47 |     }
48 |     return(nleq)
49 | }
50 | 


--------------------------------------------------------------------------------
/R/F_extractCoord.R:
--------------------------------------------------------------------------------
  1 | #' A function to extract plotting coordinates, either for plot.RCM
  2 | #'  or to export to other plotting software
  3 | #'
  4 | #' @param RCM an RCm object
  5 | #' @param Dim an integer vector of required dimensions
  6 | #'
  7 | #' The parameters for the ellipses of the quadratic response function come
  8 | #'  from the parametrization f(x) = a*x^2 + b*x + c
  9 | #' For an unconstrained object the row and column coordinates are returned
 10 | #'  in separate matrices. The row names will correspond to the labels.
 11 | #'  For a constrained analysis also the variable points are returned.
 12 | #'  All variables still need to be scaled to optimally fill the available space
 13 | #'
 14 | #' @return A list with components
 15 | #' \item{samples}{A dataframe of sample scores}
 16 | #' \item{species}{A dataframe of column scores, with origin, slope,
 17 | #' end and ellipse coordinates as needed}
 18 | #' \item{variables}{A dataframe of variable scores,
 19 | #' loadings of the environmental gradient}
 20 | #' @export
 21 | #' @examples
 22 | #' data(Zeller)
 23 | #' require(phyloseq)
 24 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
 25 | #' prune_samples(sample_names(Zeller)[1:50], Zeller))
 26 | #' zellerRCM = RCM(tmpPhy, k = 2, round = TRUE)
 27 | #' coordsZeller = extractCoord(zellerRCM)
 28 | extractCoord = function(RCM, Dim = c(1, 2)) {
 29 |     # Samples
 30 |     constrained = !is.null(RCM$covariates)
 31 |     dataSam <- if (constrained) {
 32 |         data.frame(RCM$covariates %*% RCM$alpha[,
 33 |             Dim, drop = FALSE] %*% diag(RCM$psis[Dim], nrow = length(Dim)))
 34 |     } else {
 35 |         data.frame(RCM$rMat[, Dim, drop = FALSE] %*% diag(RCM$psis[Dim]))
 36 |     }
 37 |     names(dataSam) = paste0("Dim", Dim)
 38 | 
 39 |     # Species
 40 |     if (constrained) {
 41 |         if (RCM$responseFun == "linear") {
 42 |             dataTax = data.frame(origin1 = -RCM$NB_params[1,
 43 |                 , Dim[1]]/RCM$NB_params[2,
 44 |                 , Dim[1]], origin2 = -RCM$NB_params[1,
 45 |                 , Dim[2]]/RCM$NB_params[2,
 46 |                 , Dim[2]], slope1 = RCM$NB_params[2,
 47 |                 , Dim[1]], slope2 = RCM$NB_params[2,
 48 |                 , Dim[2]])
 49 |             dataTax$end1 = dataTax$origin1 +
 50 |                 dataTax$slope1
 51 |             dataTax$end2 = dataTax$origin2 +
 52 |                 dataTax$slope2
 53 |             rownames(dataTax) = colnames(RCM$X)
 54 |         } else if (RCM$responseFun == "quadratic") {
 55 |             dataTax = data.frame(apply(RCM$NB_params[c(2,
 56 |                 3), , Dim, drop = FALSE], c(2, 3), function(x) {
 57 |                 a = x[2]
 58 |                 b = x[1]
 59 |                 -b/(2 * a)
 60 |             }))  #The location of the extrema
 61 |             names(dataTax) = c("end1", "end2")
 62 | 
 63 |             peakHeights = apply(RCM$NB_params[,, Dim, drop = FALSE], 2, function(x) {
 64 |                 A = x[3, ]
 65 |                 B = x[2, ]
 66 |                 C = x[1, ]
 67 |                 vapply(FUN.VALUE = numeric(1),exp(B^2 - 4 * A * C)/(4 *A),
 68 |                     function(y) {max(y, 1/y)})
 69 |             })
 70 |             # select largest relative departure
 71 |             rownames(peakHeights) = c("peak1","peak2")
 72 |             dataTax = cbind(dataTax, t(peakHeights))
 73 | 
 74 |             # Get ellipse parameters
 75 |             dataEllipse = t(Reduce(x = lapply(Dim,
 76 |                 function(x) {RCM$NB_params[, , x]}), f = rbind))
 77 |             colnames(dataEllipse) = c(vapply(FUN.VALUE =character(length(Dim)),
 78 |                 Dim, function(x) {paste0(c("c", "b", "a"),x)}))
 79 | 
 80 |             # Rescale peak heights for plotting
 81 |             dataTax[, c("peak1", "peak2")] = rowMultiply(dataTax[,
 82 |                 c("peak1", "peak2")], apply(dataTax[,
 83 |                 c("end1", "end2")], 2, function(x) {
 84 |                 max(abs(x))
 85 |             })/apply(dataTax[, c("peak1",
 86 |                 "peak2")], 2, max))
 87 |             dataTax = cbind(dataTax, dataEllipse)
 88 |             rownames(dataTax) = colnames(RCM$X)
 89 | 
 90 |         } else if (RCM$responseFun == "nonparametric") {
 91 |             # For non-parametric response function we
 92 |             # cannot plot the taxa
 93 |             dataTax = NULL
 94 |         } else {
 95 |             stop("No valid response function present in this RCM object!")
 96 |         }
 97 |     } else {
 98 |         # If not constrained
 99 |         dataTax = data.frame(cbind(t(RCM$cMat[Dim,, drop = FALSE]), 0, 0))
100 |         names(dataTax) = c("end1", "end2",
101 |             "origin1", "origin2")
102 |         rownames(dataTax) = colnames(RCM$X)
103 |     }
104 | 
105 |     # Variables
106 |     if (!constrained) {
107 |         dataVar = NULL
108 |     } else {
109 |         dataVar = data.frame(RCM$alpha)[,Dim, drop = FALSE]
110 |     }
111 |     list(samples = dataSam, species = dataTax,
112 |         variables = dataVar)
113 | }
114 | 


--------------------------------------------------------------------------------
/R/F_extractE.R:
--------------------------------------------------------------------------------
 1 | #' A function to extract a matrix of expected values
 2 | #'  for any dimension of the fit
 3 | #'
 4 | #' @param rcm an object of class RCM
 5 | #' @param Dim the desired dimension. Defaults to the maximum of the fit.
 6 | #' Choose 0 for the independence model, 0.5 for the confounders filter model.
 7 | #'
 8 | #' @return The matrix of expected values
 9 | extractE = function(rcm, Dim = rcm$k) {
10 |     # Expectations
11 |     Eind = outer(rcm$libSizes, rcm$abunds)  #Expected values under independence
12 |     if (Dim[1] %in% c(0, NA)) {
13 |         Eind
14 |     } else if (Dim[1] == 0.5) {
15 |         Eind * exp(rcm$confModelMat %*% rcm$confParams)
16 |     } else {
17 |         if (!is.null(rcm$confModelMat))
18 |             Eind = Eind * exp(rcm$confModelMat %*% rcm$confParams)
19 |         if (is.null(rcm$covariates)) {
20 |             Eind * exp(rcm$rMat[, Dim, drop = FALSE] %*% (rcm$cMat[Dim,
21 |                 , drop = FALSE] * rcm$psis[Dim]))
22 |         } else if (rcm$responseFun == "nonparametric") {
23 |             Eind * exp(apply(vapply(Dim, FUN.VALUE = Eind, function(j) {
24 |                 rcm$nonParamRespFun[[j]]$rowMat
25 |             }), c(1, 2), sum))
26 |         } else {
27 |             if (!is.null(rcm$confModelMat))
28 |                 Eind = Eind * exp(rcm$confModelMat %*% rcm$confParams)
29 |             if (is.null(rcm$covariates)) {
30 |                 Eind * exp(rcm$rMat[, Dim, drop = FALSE] %*% (rcm$cMat[Dim,
31 |                 , drop = FALSE] * rcm$psis[Dim]))
32 |             } else if (rcm$responseFun == "nonparametric") {
33 |                 Eind * exp(apply(vapply(Dim, FUN.VALUE = Eind, function(j) {
34 |                 rcm$nonParamRespFun[[j]]$rowMat
35 |                 }), c(1, 2), sum))
36 |             } else {
37 |                 Eind * exp(apply(vapply(Dim, FUN.VALUE = Eind, function(j) {
38 |                 rcm$psis[j] * getRowMat(sampleScore = rcm$covariates %*%
39 |                     rcm$alpha[, j], responseFun = rcm$responseFun,
40 |                     NB_params = rcm$NB_params[,, j])
41 |                 }), c(1, 2), sum))
42 |             }
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/R/F_filterConfounders.R:
--------------------------------------------------------------------------------
 1 | #'Filters out the effect of known confounders. This is done by fitting
 2 | #'interactions of every taxon with the levels of the confounders.
 3 | #'It returns a modified offset matrix for the remainder
 4 | #' of the fitting procedure.
 5 | #'
 6 | #' @param muMarg a nxp matrix, the current offset
 7 | #' @param confMat a nxt confounder matrix
 8 | #' @param X the nxp data matrix
 9 | #' @param thetas a vector of length p with the current dispersion estimates
10 | #' @param p an integer, the number of columns of X
11 | #' @param n an integer, the number of rows of X
12 | #' @param nleqslv.control see nleqslv()
13 | #' @param trended.dispersion a vector of length p
14 | #'  with trended dispersion estimates
15 | #' @param tol a scalar, the convergence tolerance
16 | #' @param maxIt maximum number of iterations
17 | #' @param allowMissingness A boolean, are missing values present
18 | #' @param naId The numeric index of the missing values in X
19 | #'
20 | #' Fits the negative binomial mean parameters and overdispersion parameters
21 | #'  iteratively.
22 | #'  Convergence is determined based on the L2-norm
23 | #'   of the absolute change of mean parameters
24 | #'
25 | #' @return a list with components:
26 | #' \item{thetas}{new theta estimates}
27 | #' \item{NB_params}{The estimated parameters of the interaction terms}
28 | 
29 | filterConfounders = function(muMarg, confMat, X, thetas, p, n, nleqslv.control,
30 |     trended.dispersion, tol = 0.001, maxIt = 20, allowMissingness, naId) {
31 |     NB_params = matrix(0, ncol(confMat), p)
32 | 
33 |     iter = 1
34 |     while ((iter == 1) || ((iter <= maxIt) && (!convergence))) {
35 | 
36 |         NB_params_old = NB_params
37 | 
38 |         NB_params = vapply(FUN.VALUE = numeric(nrow(NB_params)), seq_len(p),
39 |             function(i) {
40 |                 nleq = try(nleqslv(NB_params[, i], reg = confMat,
41 |                 fn = dNBllcol_constr,
42 |                 theta = thetas[i], muMarg = muMarg[, i], X = X[, i],
43 |                 control = nleqslv.control,
44 |                 jac = JacCol_constr, psi = 1,
45 |                 allowMissingness = allowMissingness,
46 |                 naId = is.na(X[, i]))$x)
47 |         # Fit the taxon-by taxon NB with given overdispersion parameters and
48 |                 # return predictions
49 |                 if (inherits(nleq, "try-error") | anyNA(nleq) |
50 |                     any(is.infinite(nleq))) {
51 |                 nleq = nleqslv(NB_params[, i], reg = confMat,
52 |                 fn = dNBllcol_constr,
53 |                 theta = thetas[i], muMarg = muMarg[, i], X = X[, i],
54 |                 control = nleqslv.control, psi = 1,
55 |                 allowMissingness = allowMissingness,
56 |                 naId = is.na(X[, i]))$x
57 |                 }
58 |                 # If fails try with numeric jacobian
59 |                 return(nleq)
60 |             })  #Estimate response functions
61 | 
62 |         if (anyNA(NB_params)) {
63 |             stop("Filtering on confounders failed because of
64 |             failed fits. Consider more stringent filtering by
65 |             increasing the prevCutOff parameter.\n")
66 |         }
67 | 
68 |         thetas = estDisp(X = X, cMat = matrix(0, ncol = p), rMat = matrix(0,
69 |             nrow = n), psis = 0, muMarg = muMarg * exp(confMat %*% NB_params),
70 |             trended.dispersion = trended.dispersion,
71 |             allowMissingness = allowMissingness)
72 |         # Estimate overdispersion
73 |         iter = iter + 1
74 |         convergence = sqrt(mean((1 - NB_params/NB_params_old)^2)) < tol
75 |         # Check for convergence, L2-norm
76 |     }
77 |     list(thetas = thetas, NB_params = NB_params)
78 | }
79 | 


--------------------------------------------------------------------------------
/R/F_getDevMat.R:
--------------------------------------------------------------------------------
 1 | #' ACalculate the matrix of deviance residuals
 2 | #'
 3 | #'@param X the data matrix
 4 | #'@param thetaMat the matrix of dispersions
 5 | #'@param mu the matrix of means
 6 | #'
 7 | #'@return The matrix of deviance residuals
 8 | getDevMat = function(X, thetaMat, mu) {
 9 |     tmpMat = suppressWarnings(sqrt(2 * (X *
10 |         log(X/mu) - (X + thetaMat) * log((1 +
11 |         X/thetaMat)/(1 + mu/thetaMat)))) *
12 |         sign(X - mu))
13 |     tmpMat[X == 0] = -sqrt((2 * thetaMat *
14 |         log(1 + mu/thetaMat))[X == 0])
15 |     # zero observations are always smaller
16 |     # than the mean
17 |     tmpMat[is.na(tmpMat)] = 0  #Correct for numerical/real NAs
18 |     tmpMat
19 | }
20 | 


--------------------------------------------------------------------------------
/R/F_getDevianceRes.R:
--------------------------------------------------------------------------------
 1 | #' A function to calculate the matrix of deviance residuals.
 2 | #'
 3 | #' @param RCM an RCM object
 4 | #' @param Dim The dimensions to use
 5 | #'
 6 | #' For the deviance residuals we use the overdispersions from the reduced model.
 7 | #' Standard dimensions used are only first and second,
 8 | #'  since these are also plotted
 9 | #'
10 | #'@export
11 | #'@return A matrix with deviance residuals of the same size
12 | #' as the original data matrix
13 | #'
14 | #'@examples
15 | #'data(Zeller)
16 | #' require(phyloseq)
17 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:120],
18 | #' prune_samples(sample_names(Zeller)[1:75], Zeller))
19 | #' #Subset for a quick fit
20 | #' zellerRCM = RCM(tmpPhy, k = 2, round = TRUE, prevCutOff = 0.03)
21 | #' devRes = getDevianceRes(zellerRCM)
22 | getDevianceRes = function(RCM, Dim = RCM$k) {
23 |     mu = extractE(RCM, Dim)
24 |     thetaMat = matrix(byrow = TRUE, nrow = nrow(RCM$X),
25 |         ncol = ncol(RCM$X), data = RCM$thetas[,
26 |             switch(as.character(Dim), `0` = "Independence",
27 |                 `0.5` = "Filtered", paste0("Dim",
28 |                 Dim))])
29 |     getDevMat(X = correctXMissingness(RCM$X, mu, RCM$NApresent, is.na(RCM$X)), thetaMat = thetaMat,
30 |         mu = mu)
31 | }
32 | 


--------------------------------------------------------------------------------
/R/F_getDistCoord.R:
--------------------------------------------------------------------------------
 1 | #' Get coordinates of a distance object of n observations for the provided indices
 2 | #'
 3 | #' @param indices The row indices for which distance indices are wanted
 4 | #' @param n The total number of objects in the distance matrix
 5 | #'
 6 | #' @return a vector of coordinates
 7 | getDistCoord = function(indices, n){
 8 |     indexI = n*(indices-1) - indices*(indices+1)/2# See details of ?dist
 9 |     indexMat = outer(indexI, indices, FUN = "+")
10 |     indexMat[upper.tri(indexMat)]
11 | }


--------------------------------------------------------------------------------
/R/F_getInflCol.R:
--------------------------------------------------------------------------------
 1 | #' A function to extract the influence for a given parameter index
 2 | #'
 3 | #' @param score a score matrix
 4 | #' @param InvJac The inverted jacobian
 5 | #' @param taxon The taxon name or index
 6 | #'
 7 | #' @return A matrix with all observations' influence on the given taxon
 8 | getInflCol = function(score, InvJac, taxon) {
 9 |     rowMultiply(score * InvJac[, taxon])
10 | }
11 | 


--------------------------------------------------------------------------------
/R/F_getInflRow.R:
--------------------------------------------------------------------------------
 1 | #' Extract the influence of all observations on a given row score
 2 | #'
 3 | #' @param score the score function evaluated for every observation
 4 | #' @param InvJac The inverse jacobian
 5 | #' @param sample the row score or  sample index
 6 | #'
 7 | #' @return A matrix with all observations' influence on the row score
 8 | getInflRow = function(score, InvJac, sample) {
 9 |     score * InvJac[, sample]
10 | }
11 | 


--------------------------------------------------------------------------------
/R/F_getInt.R:
--------------------------------------------------------------------------------
 1 | #' Integrate the spline of an vgam object
 2 | #'
 3 | #' @param coef A vector of coefficients
 4 | #' @param spline The cubic smoothing spline
 5 | #' @param sampleScore the observed environmental scores
 6 | #' @param stop.on.error see ?integrate
 7 | #' @param ... additional arguments passed on to integrate()
 8 | #'
 9 | #' @return a scalar, the value of the integral
10 | getInt = function(coef, spline, sampleScore, 
11 |     stop.on.error = FALSE, ...) {
12 |     # Absolute values assure positive
13 |     # outcomes
14 |     integrate(f = function(y, coef, spline) {
15 |         if (!is.null(spline)) {
16 |             abs(getRowMat(sampleScore = y, 
17 |                 taxonCoef = coef, spline = spline, 
18 |                 responseFun = "nonparametric"))
19 |         } else {
20 |             # If GAM fails, GLM fit (or independence
21 |             # model)
22 |             abs(getModelMat(y, degree = length(coef) - 
23 |                 1) %*% coef)
24 |         }
25 |     }, lower = min(sampleScore), upper = max(sampleScore), 
26 |         coef = coef, spline = spline, stop.on.error = stop.on.error, 
27 |         ...)$value
28 | }
29 | 


--------------------------------------------------------------------------------
/R/F_getLogLik.R:
--------------------------------------------------------------------------------
 1 | #' Extract the logged likelihood of every count
 2 | #'
 3 | #' @param rcm an RCM object
 4 | #' @param Dim A vector of integers indicating which dimensions to take along,
 5 | #' or Inf for the saturated model, or 0 for the independence model
 6 | #'
 7 | #' @return A matrix with logged likelihood of the size of the data matrix
 8 | getLogLik = function(rcm, Dim) {
 9 |     if (Dim == Inf) {
10 |         return(dpois(x = rcm$X, lambda = rcm$X,
11 |             log = TRUE))
12 |     }
13 |     E = extractE(rcm, if (Dim >= 1)
14 |         seq_len(Dim) else Dim)
15 |     thetaMat = matrix(byrow = TRUE, nrow = nrow(rcm$X),
16 |         ncol = ncol(rcm$X), data = rcm$thetas[,
17 |             switch(as.character(Dim), `0` = "Independence",
18 |                 `0.5` = "Filtered", paste0("Dim",
19 |                 Dim))])
20 |     dnbinom(x = rcm$X, mu = E, size = thetaMat,
21 |         log = TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/R/F_getModelMat.R:
--------------------------------------------------------------------------------
 1 | #' A function to construct a model matrix of a certain degree
 2 | #' @param y the variable
 3 | #' @param degree the degree
 4 | #'
 5 | #' @return A model matrix with degree+1 columns and as many rows as lenght(y)
 6 | #' @importFrom stats model.matrix formula
 7 | getModelMat = function(y, degree) {
 8 |     model.matrix(formula(paste("~", paste(paste("I(y^",
 9 |         seq_len(degree), ")"), collapse = "+"))))
10 | }
11 | 


--------------------------------------------------------------------------------
/R/F_getRowMat.R:
--------------------------------------------------------------------------------
 1 | #' Return a matrix of row scores
 2 | #'
 3 | #' @param sampleScore a vector of length n with sample scores
 4 | #' @param responseFun a character string, the type of response function,
 5 | #'  either 'linear' or 'quadratic'
 6 | #' @param NB_params a v-by-p matrix of parameters of theresponse function
 7 | #' @param taxonCoef A vector of coefficients
 8 | #' @param spline The cubic smoothing spline
 9 | #'
10 | #' Multiplying the old offset with the exponent matrix times
11 | #' the importance parameter obtains the new one based on lower dimension
12 | #'
13 | #' @return a n-by-p matrix of scores
14 | #' @importFrom VGAM predict
15 | getRowMat = function(sampleScore, responseFun, 
16 |     NB_params, taxonCoef, spline) {
17 |     if (responseFun == "nonparametric") {
18 |         cbind(1, sampleScore, predict(spline, 
19 |             x = sampleScore)$y) %*% c(taxonCoef, 
20 |             1)
21 |     } else {
22 |         buildDesign(sampleScore, responseFun) %*% 
23 |             NB_params
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/R/F_heq_nb.R:
--------------------------------------------------------------------------------
 1 | #' Define linear equality constraints for env. gradient
 2 | #'
 3 | #' @param Alpha the current estimate of the environmental gradient
 4 | #' @param alphaK a matrix with the environmental gradients
 5 | #'  of the lower dimensions
 6 | #' @param d an integer, the number of environmental variables, including dummies
 7 | #' @param k an integer, the current dimension
 8 | #' @param centMat a centering matrix
 9 | #' @param ... further arguments for other methods, not needed in this one
10 | #'
11 | #' The centering matrix centMat ensures that the parameters of the dummies
12 | #' of the same categorical variable sum to zero
13 | #'
14 | #' @return a vector of with current values of the constraints,
15 | #'  should evolve to zeroes only
16 | heq_nb = function(Alpha, alphaK, d, k, centMat, 
17 |     ...) {
18 |     centerFactors = centMat %*% Alpha  #Includes overal centering
19 |     size = sum(Alpha^2) - 1
20 |     if (k == 1) {
21 |         return(c(centerFactors, size))
22 |     }
23 |     ortho = crossprod(alphaK, Alpha)
24 |     c(centerFactors, size, ortho)
25 | }
26 | 


--------------------------------------------------------------------------------
/R/F_heq_nb_jac.R:
--------------------------------------------------------------------------------
 1 | #' The jacobian of the linear equality constraints
 2 | #'
 3 | #' @param Alpha the current estimate of the environmental gradient
 4 | #' @param alphaK a matrix with the environmental gradients
 5 | #' of the lower dimensions
 6 | #' @param d an integer, the number of environmental variables,
 7 | #'  including dummies
 8 | #' @param k an integer, the current dimension
 9 | #' @param centMat a centering matrix
10 | #' @param ... further arguments for other methods, not needed in this one
11 | #'
12 | #' @return The jacobian matrix
13 | heq_nb_jac = function(Alpha, alphaK, d, k, 
14 |     centMat, ...) {
15 |     if (k == 1) {
16 |         return(rbind(centMat, 2 * Alpha))
17 |     } else {
18 |         rbind(centMat, 2 * Alpha, t(alphaK))
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/R/F_indentPlot.R:
--------------------------------------------------------------------------------
 1 | #' Functions to indent the plot to include the entire labels
 2 | #'
 3 | #' @param plt a ggplot object
 4 | #' @param xInd a scalar or a vector of length 2,
 5 | #' specifying the indentation left and right of the plot to allow for the labels
 6 | #'  to be printed entirely
 7 | #' @param yInd a a scalar or a vector of length 2,
 8 | #' specifying the indentation top and bottom of the plot
 9 | #'  to allow for the labels to be printed entirely
10 | #' @return a ggplot object, squared
11 | indentPlot <- function(plt, xInd = 0, yInd = 0) {
12 |     return(plt +
13 |             expand_limits(
14 |             x = ggplot_build(plt)$layout$panel_params[[1]]$x.range +
15 |         if (length(xInd) == 1) xInd * c(-1,
16 |             1) else xInd,
17 |         y = ggplot_build(plt)$layout$panel_params[[1]]$y.range +
18 |         if (length(yInd) == 1) yInd * c(-1,
19 |             1) else yInd))
20 | }
21 | 


--------------------------------------------------------------------------------
/R/F_inertia.R:
--------------------------------------------------------------------------------
 1 | #' Calculate the log-likelihoods of all possible models
 2 | #'
 3 | #'@param rcm an object of the RCM class
 4 | #'
 5 | #'@return A table with inertias, proportion inertia explained
 6 | #' and cumulative proportion of inertia explained.
 7 | #'@export
 8 | #' @examples
 9 | #' data(Zeller)
10 | #' require(phyloseq)
11 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
12 | #' prune_samples(sample_names(Zeller)[1:50], Zeller))
13 | #' zellerRCM = RCM(tmpPhy, round = TRUE)
14 | #' inertia(zellerRCM)
15 | inertia = function(rcm) {
16 |     vec = if (length(rcm$confModelMat))
17 |         c(0, 0.5, seq_len(rcm$k)) else c(0:rcm$k)
18 |     outnames = c("independence", if (length(rcm$confModelMat))
19 |     "filtered" else NULL,
20 |         paste0("Dim", seq_len(rcm$k)), "saturated")
21 |     tmp = c(vapply(FUN.VALUE = numeric(1), vec, FUN = function(i) {
22 |         eMat = extractE(rcm, i)
23 |         round(sum(((eMat - rcm$X)^2/eMat), na.rm = TRUE))
24 |     }), 0)
25 |     names(tmp) = outnames
26 |     cumInertiaExplained = round((tmp - tmp[1])/(tmp[length(tmp)] - tmp[1]),
27 |         3)
28 |     out = rbind(inertia = tmp, inertiaExplained =
29 |     c(0, diff(cumInertiaExplained)),
30 |         cumInertiaExplained = cumInertiaExplained)
31 |     return(out)
32 | }
33 | 


--------------------------------------------------------------------------------
/R/F_liks.R:
--------------------------------------------------------------------------------
 1 | #' Calculate the log-likelihoods of all possible models
 2 | #'
 3 | #'@param rcm an object of the RCM class
 4 | #'@param Sum a boolean, should log-likelihoods be summed?
 5 | #'
 6 | #'@return If Sum is FALSE, a named array log-likelihoods
 7 | #' of the independence model and all models with dimension 1 to k,
 8 | #' including after filtering on confounders.
 9 | #' Otherwise a table with log-likelihoods,
10 | #' deviance explained and cumulative deviance explained.
11 | #'@export
12 | #' @examples
13 | #' data(Zeller)
14 | #' require(phyloseq)
15 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
16 | #' prune_samples(sample_names(Zeller)[1:50], Zeller))
17 | #' zellerRCM = RCM(tmpPhy, round = TRUE)
18 | #' liks(zellerRCM)
19 | liks = function(rcm, Sum = TRUE) {
20 |     vec = if (length(rcm$confModelMat))
21 |         c(0, 0.5, seq_len(rcm$k), Inf) else c(0:rcm$k, Inf)
22 |     outnames = c("independence", if (length(rcm$confModelMat))
23 |     "filtered" else NULL,
24 |         paste0("Dim", seq_len(rcm$k)), "saturated")
25 |     if (Sum) {
26 |         tmp = vapply(FUN.VALUE = numeric(1), vec, FUN = function(i) {
27 |             sum(getLogLik(rcm, i), na.rm = TRUE)
28 |         })
29 |         names(tmp) = outnames
30 |     } else {
31 |         tmp = vapply(vec, FUN.VALUE = matrix(0, nrow(rcm$X), ncol(rcm$X)),
32 |             FUN = function(i) {
33 |                 getLogLik(rcm, i)
34 |             })
35 |         dimnames(tmp)[[3]] = outnames
36 |     }
37 |     if (Sum) {
38 |         # Also make cumulative comparisons
39 |         cumDevianceExplained = round((tmp - tmp[1])/(tmp[length(tmp)] -
40 |             tmp[1]), 3)
41 |         out = rbind(logLikelihood = tmp, logLikExplained = c(0,
42 |         diff(cumDevianceExplained)),
43 |         cumLogLikExplained = cumDevianceExplained)
44 |     }
45 |     return(out)
46 | }
47 | 


--------------------------------------------------------------------------------
/R/F_permanova.R:
--------------------------------------------------------------------------------
 1 | #' Perform a PERMANOVA analysis for group differences of a predefined cofactor using the pseudo F-statistic
 2 | #'
 3 | #' @param rcmObj an RCM object
 4 | #' @param groups a factor of length n with cluster memberships, or a name of a variable contained in the RCM object
 5 | #' @param nPerm Number of permutations in the PERMANOVA
 6 | #' @param Dim Dimensions on which the test should be performed. Defaults to all dimensions of the fitted RCM object.
 7 | #' @param verbose a boolean, should output be printed?
 8 | #'
 9 | #' @return A list with components
10 | #' \item{statistic}{The pseudo F-statistic}
11 | #' \item{p.value}{The p-value of the PERMANOVA}
12 | #'
13 | #' @seealso \code{\link{RCM}}
14 | #' @importFrom phyloseq get_variable
15 | #' @importFrom stats dist
16 | #' @export
17 | #' @examples
18 | #' data(Zeller)
19 | #' require(phyloseq)
20 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
21 | #' prune_samples(sample_names(Zeller)[1:50], Zeller))
22 | #' zellerRCM = RCM(tmpPhy, round = TRUE)
23 | #' zellerPermanova = permanova(zellerRCM, "Diagnosis")
24 | permanova = function(rcmObj, groups, nPerm = 1e4, Dim = seq_len(rcmObj$k), verbose = TRUE){
25 |   stopifnot(is(rcmObj, "RCM"), length(nPerm)==1)
26 |     if(nPerm <= 100){
27 |         warning("Less than 100 permutations leads to low power of the permutation test!")
28 |     }
29 |   if(length(groups)==1){
30 |       groups = get_variable(rcmObj$physeq, groups)
31 |   }
32 |   coord = extractCoord(rcmObj, Dim)$samples
33 |   N = nrow(coord)
34 |   if(N != length(groups)){
35 |       stop("Length of grouping variable provided (", length(groups),
36 |            "does not correspond to number of samples in RCM object (", N, ")")
37 |   }
38 |   a = length(unique(groups))
39 |   if(a <= 1){
40 |       stop("Provide more than one group in 'groups'.")
41 |   }
42 |   if(any(table(groups)==1)){
43 |       stop("Some groups contain only a single observation, no distances can be calculated.")
44 |   }
45 |   distSq = dist(coord)^2
46 |   overalDist = sum(distSq)/N
47 |   #Observed test statistic
48 |   withinDistObs = sum(unlist(tapply(seq_len(nrow(coord)), groups, function(x){
49 |         distSq[getDistCoord(x, N)]/length(x)
50 |   })))
51 |   FratioObs = (overalDist-withinDistObs)/withinDistObs * (a-1)/(N-a)
52 |   #PERMANOVA
53 |   withinDistPerm = vapply(seq_len(nPerm), FUN.VALUE = double(1), function(jj){
54 |       if(verbose && ((jj-1) %% (nPerm/10)) == 0){
55 |           cat("Permutation", jj, "out of", nPerm, "\n")
56 |       }
57 |       sum(unlist(tapply(seq_len(nrow(coord)), sample(groups), function(x){
58 |         distSq[getDistCoord(x, N)]/length(x)
59 |         })))
60 |   })
61 |   FratioPerm = (overalDist-withinDistPerm)/withinDistPerm * (a-1)/(N-a)
62 |   PvalPerm = mean(FratioObs < FratioPerm)
63 |   return(list("statistic" = FratioObs, "p.value" = PvalPerm))
64 | }
65 | 


--------------------------------------------------------------------------------
/R/F_residualPlot.R:
--------------------------------------------------------------------------------
  1 | #' Make residual plots
  2 | #'
  3 | #' @param RCM an RCM object
  4 | #' @param Dim an integer, which dimension?
  5 | #' @param whichTaxa a character string or a character vector,
  6 | #'  for which taxa to plot the diagnostic plots
  7 | #' @param resid the type of residuals to use, either 'Deviance' or 'Pearson'
  8 | #' @param numTaxa an integer, the number of taxa to plot
  9 | #' @param mfrow passed on to par().
 10 | #' If not supplied will be calculated based on numTaxa
 11 | #' @param samColour,samShape Vectors or character strings denoting
 12 | #' the sample colour and shape respectively. If character string is provided,
 13 | #' the variables with this name is extracted from the phyloseq object in RCM
 14 | #' @param legendLabSize size of the legend labels
 15 | #' @param legendTitleSize size of the legend title
 16 | #' @param axisLabSize size of the axis labels
 17 | #' @param axisTitleSize size of the axis title
 18 | #' @param taxTitle A boolean, should taxon title be printed
 19 | #' @param h Position of reference line. Set to NA for no line
 20 | #'
 21 | #'@details If whichTaxa is 'run' or 'response' the taxa with the highest
 22 | #' run statistics or steepest slopes of the response function are plotted,
 23 | #'  numTax indicates the number. If whichTaxa is a character vector,
 24 | #'  these are interpreted as taxon names to plot.
 25 | #'  This function is mainly meant for linear response functions,
 26 | #'  but can be used for others too.
 27 | #'  The runs test statistic from the tseries package is used.
 28 | #'@return Plots a ggplot2-object to output
 29 | #'@export
 30 | #'@import ggplot2
 31 | #'@import phyloseq
 32 | #'@importFrom tseries runs.test
 33 | #'@seealso \code{\link{RCM}}
 34 | #'@examples
 35 | #'data(Zeller)
 36 | #' require(phyloseq)
 37 | #' tmpPhy = prune_taxa(taxa_names(Zeller)[1:120],
 38 | #' prune_samples(sample_names(Zeller)[1:75], Zeller))
 39 | #' #Subset for a quick fit
 40 | #' zellerRCMlin = RCM(tmpPhy, k = 2,
 41 | #' covariates = c('BMI','Age','Country','Diagnosis','Gender'),
 42 | #' responseFun = 'linear', round = TRUE, prevCutOff = 0.03)
 43 | #' residualPlot(zellerRCMlin)
 44 | residualPlot = function(RCM, Dim = 1, whichTaxa = "response",
 45 |     resid = "Deviance", numTaxa = 9, mfrow = NULL,
 46 |     samColour = NULL, samShape = NULL, legendLabSize = 15,
 47 |     legendTitleSize = 16, axisLabSize = 14, axisTitleSize = 16,
 48 |     taxTitle = TRUE, h = 0) {
 49 |     if(is.null(RCM$covariates)){
 50 |       stop("Residual plots only implemented for constrained ordinations!")
 51 |     }
 52 |     sampleScore = RCM$covariates %*% RCM$alpha[, Dim,
 53 |         drop = FALSE]
 54 |     if (resid == "Deviance") {
 55 |         resMat = getDevianceRes(RCM, Dim)
 56 |     } else if (resid == "Pearson") {
 57 |         mu = extractE(RCM, seq_len(Dim))
 58 |         # Residuals are also based on lower dimensions
 59 |         thetaMat = matrix(byrow = TRUE, nrow = nrow(RCM$X),
 60 |             ncol = ncol(RCM$X), data = RCM$thetas[,
 61 |                 switch(as.character(Dim), `0` = "Independence",
 62 |                     `0.5` = "Filtered",
 63 |                     paste0("Dim",Dim))])
 64 |         resMat = (RCM$X - mu)/sqrt(mu + mu^2/thetaMat)
 65 |     } else {
 66 |         stop("Unknown residual type!")
 67 |     }
 68 |     if (!whichTaxa %in% c("runs", "response")) {
 69 |         numTaxa = length(whichTaxa)
 70 |         idTaxa = whichTaxa
 71 |     } else {
 72 |         sizes = if (whichTaxa == "runs")
 73 |             apply(resMat > 0, 2, function(x) {
 74 |                 runs.test(factor(x))$statistic
 75 |             }) else RCM$NB_params[2, , Dim]
 76 |         # Select taxa with longest runs or strongest
 77 |         # responses
 78 |         idTaxa = which(sizes >= sort(sizes, decreasing = TRUE)[numTaxa])
 79 |     }
 80 |     # Prepare the plotting facets
 81 |     mfrow = if (is.null(mfrow))
 82 |         rep(ceiling(sqrt(numTaxa)), 2) else mfrow
 83 |     parTmp = par(no.readonly = TRUE)
 84 |     on.exit(par(parTmp))
 85 |     par(mfrow = mfrow)
 86 |     resMat = resMat[, idTaxa, drop = FALSE]
 87 |     Colour = if (is.null(samColour))
 88 |       "black" else get_variable(RCM$physeq, samColour)
 89 |     Shape = if (is.null(samShape)){
 90 |       if(length(idTaxa)>1) 1 else "none"
 91 |     } else get_variable(RCM$physeq, samShape)
 92 |     if (length(idTaxa) > 1) {
 93 |         foo = lapply(colnames(resMat), function(tax) {
 94 |             plot(x = sampleScore, y = resMat[, tax],
 95 |                 ylab = paste(resid, "residuals"),
 96 |               xlab = paste("Environmental score in dimension", Dim),
 97 |                 main = tax, col = Colour, pch = Shape)
 98 |         })
 99 |         return(invisible())
100 |     } else {
101 |         Plot = ggplot(data.frame(x = c(sampleScore),
102 |             y = c(resMat), samColour = Colour, samShape = Shape),
103 |             mapping = aes_string(x = "x", y = "y",
104 |                 colour = "samColour", shape = "samShape")) +
105 |             ylab(paste(resid, "residuals")) +
106 |             xlab(paste("Environmental score in dimension",
107 |             Dim)) + geom_point() + ggtitle(ifelse(taxTitle,
108 |             colnames(resMat), ""))
109 |         Plot = Plot + if (is.null(samShape))
110 |             guides(shape = FALSE) else scale_shape_discrete(name = samShape)
111 |         Plot = Plot + if (is.null(samColour)) {
112 |             guides(colour = FALSE)
113 |         } else if (is.factor(get_variable(RCM$physeq,
114 |             samColour))) {
115 |             scale_colour_discrete(name = samColour)
116 |         } else scale_colour_continuous(name = samColour)
117 |         Plot = Plot + geom_hline(yintercept = h, linetype = "dashed", col = "black")
118 |         Plot = Plot + theme_bw() + theme(axis.title = element_text(size = axisTitleSize),
119 |             axis.text = element_text(size = axisLabSize),
120 |             legend.title = element_text(size = legendTitleSize),
121 |             legend.text = element_text(size = legendLabSize))
122 |         return(Plot)
123 |     }
124 |   }
125 | 


--------------------------------------------------------------------------------
/R/F_respFunJacMat.R:
--------------------------------------------------------------------------------
 1 | #' Calculates the Jacobian of the parametric response functions
 2 | #'
 3 | #' @param betas a vector of length (deg+1)*(p+1) with regression parameters
 4 | #'  with deg the degree of the response function and the lagrangian multipliers
 5 | #' @param X the nxp data matrix
 6 | #' @param reg a vector of regressors with the dimension n-by-v
 7 | #' @param thetaMat The n-by-p matrix with dispersion parameters
 8 | #' @param muMarg offset matrix of size nxp
 9 | #' @param psi a scalar, the importance parameter
10 | #' @param v an integer, one plus the degree of the response function
11 | #' @param p an integer, the number of taxa
12 | #' @param IDmat an logical matrix with indices of non-zero elements
13 | #' @param IndVec  a vector with indices with non-zero elements
14 | #' @param allowMissingness A boolean, are missing values present
15 | #' @param naId The numeric index of the missing values in X
16 | #'
17 | #' @return The jacobian, a square matrix of dimension (deg+1)*(p+1)
18 | respFunJacMat = function(betas, X, reg, thetaMat,
19 |     muMarg, psi, v, p, IDmat, IndVec, allowMissingness, naId) {
20 |     NBparams = matrix(betas[seq_len(p * v)],
21 |         ncol = p)
22 |     mu = exp(reg %*% NBparams * psi) * muMarg
23 |     X = correctXMissingness(X, mu, allowMissingness, naId)
24 |     Jac = matrix(0, (p + 1) * v, (p + 1) *
25 |         v)
26 |     did = seq_len(p * v)
27 |     didv = seq_len(v)
28 |     # d²Lag/dlambda dBeta
29 |     Jac[didv + p * v, did][IndVec] = 2 *
30 |         NBparams
31 |     Jac = Jac + t(Jac)  #symmetrize
32 | 
33 |     tmp = (1 + X/thetaMat) * mu/(1 + mu/thetaMat)^2
34 |     tmp2 = vapply(didv, FUN.VALUE = tmp,
35 |         function(x) {
36 |             reg[, x] * tmp
37 |         })
38 |     # d²Lag/dBeta²
39 |     Jac[did, did][IDmat] = -aperm(tensor(reg,
40 |         tmp2, 1, 1), c(3, 1, 2)) * psi^2
41 |     # Permute the dimensions to assure
42 |     # correct insertion
43 | 
44 |     diag(Jac)[did] = diag(Jac)[did] + 2 *
45 |         betas[seq_len(v) + p * v]
46 |     return(Jac)
47 | }
48 | 


--------------------------------------------------------------------------------
/R/F_respFunScoreMat.R:
--------------------------------------------------------------------------------
 1 | #' Derivative of the Lagrangian of the parametric response function
 2 | #'
 3 | #' @param betas a vector of length (deg+1)*(p+1) with regression parameters with
 4 | #'  deg the degree of the response function and the lagrangian multipliers
 5 | #' @param X the nxp data matrix
 6 | #' @param reg a matrix of regressors with the dimension nx(deg+1)
 7 | #' @param thetaMat The n-by-p matrix with dispersion parameters
 8 | #' @param muMarg offset matrix of size nxp
 9 | #' @param psi a scalar, the importance parameter
10 | #' @param v an integer, one plus the degree of the response function
11 | #' @param p an integer, the number of taxa
12 | #' @param allowMissingness A boolean, are missing values present
13 | #' @param naId The numeric index of the missing values in X
14 | #' @param ... further arguments passed on to the jacobian
15 | #'
16 | #' The parameters are restricted to be normalized, i.e. all squared intercepts,
17 | #'  first order and second order parameters sum to 1
18 | #'
19 | #' @return The evaluation of the score functions, a vector of length (p+1)*
20 | #' (deg+1)
21 | #'
22 | respFunScoreMat = function(betas, X, reg,
23 |     thetaMat, muMarg, psi, p, v, allowMissingness, naId,...) {
24 |     NBparams = matrix(betas[seq_len(p * v)],
25 |         ncol = p)
26 |     mu = exp((reg %*% NBparams) * psi) *
27 |         muMarg
28 |     X = correctXMissingness(X, mu, allowMissingness, naId)
29 |     score = crossprod(reg, (X - mu)/(1 +
30 |         mu/thetaMat)) * psi + 2 * betas[seq_len(v) +
31 |         p * v] * NBparams
32 |     norm = rowSums(NBparams^2) - 1
33 |     return(c(score, norm))  #Taxon per taxon
34 | }
35 | 


--------------------------------------------------------------------------------
/R/F_rowMultiply.R:
--------------------------------------------------------------------------------
 1 | #' A function to efficiently row multiply a matrix and a vector
 2 | #'
 3 | #' @details Memory intensive but that does not matter with given matrix sizes
 4 | #'
 5 | #' @param matrix a numeric matrix of dimension a-by-b
 6 | #' @param vector a numeric vector of length b
 7 | #'
 8 | #' t(t(matrix)*vector) but then faster
 9 | #'
10 | #' @return a matrix, row multplied by the vector
11 | rowMultiply = function(matrix, vector) {
12 |     matrix * rep(vector, rep(nrow(matrix), 
13 |         length(vector)))
14 | }
15 | 


--------------------------------------------------------------------------------
/R/F_seq_k.R:
--------------------------------------------------------------------------------
 1 | #' A small auxiliary function for the length of the lambdas
 2 | #'
 3 | #' @param y an integer, the current dimension
 4 | #' @param nLambda1s the number of centering restrictions
 5 | #'
 6 | #' @return a vector containing the ranks of the current lagrangian multipliers
 7 | seq_k = function(y, nLambda1s = 1) {
 8 |     (y - 1) * (1 + nLambda1s + (y - 2)/2) + 
 9 |         seq_len(y + nLambda1s)
10 | }
11 | 


--------------------------------------------------------------------------------
/R/F_trimOnConfounders.R:
--------------------------------------------------------------------------------
 1 | #' Trim based on confounders to avoid taxa with only zero counts
 2 | #'
 3 | #' @param confounders a nxt confounder matrix
 4 | #' @param X the nxp data matrix
 5 | #' @param prevCutOff a scalar between 0 and 1, the prevalence cut off
 6 | #' @param minFraction a scalar between 0 and 1,
 7 | #'  each taxon's total abundance should equal at least the number of samples n
 8 | #'   times minFraction, otherwise it is trimmed
 9 | #' @param n the number of samples
10 | #'
11 | #' Should be called prior to fitting the independence model
12 | #'
13 | #' @return A trimmed data matrix nxp'
14 | trimOnConfounders = function(confounders,
15 |     X, prevCutOff, minFraction, n) {
16 |     trimmingID = apply(X, 2, function(x) {
17 |         # Over taxa Over confounding variables
18 |         any(apply(confounders, 2, function(conf) {
19 |             tapply(X = x, INDEX = conf, FUN = function(y) {
20 |                 mean(!(y %in% c(0, NA))) <= prevCutOff |
21 |                 sum(y, na.rm = TRUE) < (n * minFraction)
22 |             })  #Any all-zero subgroup?
23 |         }))
24 |     })
25 | 
26 |     if (sum(!trimmingID) <= 1) {
27 |         stop("All taxa would be trimmed,
28 |         please provide a covariate with less levels,
29 |         or reduce the prevalence cut-off! \n")
30 |     }
31 | 
32 |     X[, !trimmingID]  #Return trimmed X
33 | }
34 | 


--------------------------------------------------------------------------------
/README_figs/README-plotCond-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotCond-1.png


--------------------------------------------------------------------------------
/README_figs/README-plotNPTriplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotNPTriplot-1.png


--------------------------------------------------------------------------------
/README_figs/README-plotRichness-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotRichness-1.png


--------------------------------------------------------------------------------
/README_figs/README-plotUnconstrainedRCMall-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotUnconstrainedRCMall-1.png


--------------------------------------------------------------------------------
/README_figs/README-plotUnconstrainedRCMallColour-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotUnconstrainedRCMallColour-1.png


--------------------------------------------------------------------------------
/README_figs/README-plotlin2cor-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotlin2cor-1.png


--------------------------------------------------------------------------------
/README_figs/README-plotlin3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotlin3-1.png


--------------------------------------------------------------------------------
/README_figs/README-plotlin3Triplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/README_figs/README-plotlin3Triplot-1.png


--------------------------------------------------------------------------------
/data/Zeller.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/data/Zeller.RData


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | year <- sub("-.*", "", meta$Date)
 2 | note <- sprintf("R package version %s", meta$Version)
 3 | 
 4 | bibentry(bibtype = "Manual",
 5 |          title = "{RCM}: A Unified Approach to Unconstrained and Constrained Visualization of Microbiome Read Count Data",
 6 |          author = c(person(family = "Hawinkel", "Stijn"),
 7 |                     person(family = "Kerckhof", "Frederiek-Maarten"),
 8 |                     person(family = "Bijnens", "Luc"),
 9 |                     person(family = "Thas", "Olivier"),
10 |                     person("R Core Team")),
11 |          year = year,
12 |          note = note)
13 | 


--------------------------------------------------------------------------------
/inst/NEWS.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # 0.2.0
  3 | 
  4 |   - Importance parameters \(\psi\) are no longer calculated when
  5 |     non-parametric response functions are used.
  6 | 
  7 | # 0.3.0
  8 | 
  9 |   - Importance parameters \(\psi\) are enabled again when non-parametric
 10 |     response functions are used, but not used for plotting.
 11 |   - 2D sample plots for constrained ordination with non-parametric
 12 |     response functions have been disabled, as they are not
 13 |     interpretable. Variable plots are the only 2D plots still allowed
 14 |   - Explained deviance and inertia can be plotted on the axes rahter
 15 |     than the \(\psi\)’s using the “plotPsi” argument to the plot.RCM()
 16 |     function.
 17 |   - Possibility to provide lower dimensional fits has been disabled.
 18 |     *RCM* is fast enough to fit the whole model.
 19 | 
 20 | # 1.0.0
 21 | 
 22 |   - Release on BioConductor
 23 | 
 24 | # 1.0.1
 25 | 
 26 |   - Bug fix in buildCovMat() to avoid false warning
 27 |   - Check for alias structure in confounder and covariate matrices
 28 | 
 29 | # 1.2.0
 30 | 
 31 |   - Missing values in count matrix are now allowed. They simply do not
 32 |     contribute to the parameter estimation, but the rest of the row (or
 33 |     column) is still used.
 34 | 
 35 | # 1.2.1
 36 | 
 37 |   - Vertical reference line in residual plot
 38 |   - Bug fix for problematic variable names
 39 | 
 40 | # 1.2.2
 41 | 
 42 |   - Moving the online manual information to the vignette
 43 | 
 44 | # 1.2.3
 45 | 
 46 |   - Rename *a* and *b* to *rowExp* and *colExp* to avoid partial
 47 |     matching
 48 |   - Allow *rowExp* and *colExp* to be adapted for constrained
 49 |     correspondence analysis starting values as well
 50 | 
 51 | # 1.2.4
 52 | 
 53 |   - Adding a new inflVar variable to disambiguate in the influence
 54 |     plotting
 55 |   - More argument checking + tests for the plot.RCM function
 56 | 
 57 | # 1.5.2
 58 | 
 59 |   - Avoid returning nulls for residualPlot
 60 | 
 61 | # 1.5.4
 62 | 
 63 |   - A note in the vignette and in the help file of plot.RCM regarding
 64 |     limited number of combinations of constraining variables. Also a
 65 |     warning is now thrown
 66 | 
 67 | # 1.5.5
 68 | 
 69 |   - Bug fix for higher dimension residualPlot function, and tests for
 70 |     this function
 71 | 
 72 | # 1.5.6
 73 | 
 74 |   - Replace deprecated guides( =FALSE) by guides(=“none”)
 75 | 
 76 | # 1.5.7
 77 | 
 78 |   - Update vignette to number table of contents
 79 | 
 80 | # 1.11.0
 81 | 
 82 |   - Added FAQ section in vignette with first frequent question on number
 83 |     of samples not shown.
 84 |   - Fixed bugs for plots of data with missing values, and added tests.
 85 | 
 86 | # 1.11.2
 87 | 
 88 |   - Explicitly import stats::model.matrix, and only load necessary VGAM
 89 |     functions
 90 | 
 91 | # 1.11.3
 92 | 
 93 |   - For the unconstrained models: fit feature models one by one and
 94 |     Gram-Schmidt orthogonalize and center afterwards, rather than using
 95 |     Lagrange multipliers and huge Jacobian matrices. This will use less
 96 |     memory and speed up computations, but *may yield slightly different
 97 |     solutions*. Nothing changes for the constrained models.
 98 |     
 99 | # 1.11.4
100 | 
101 |  - Introduction of permanova testing for user-supplied groups using the _permanova_ function.
102 | 


--------------------------------------------------------------------------------
/inst/fits/zellerFits.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CenterForStatistics-UGent/RCM/b1f32241b8582f5dd896239fae68c86f60b56449/inst/fits/zellerFits.RData


--------------------------------------------------------------------------------
/man/GramSchmidt.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_GramSchmidt.R
 3 | \name{GramSchmidt}
 4 | \alias{GramSchmidt}
 5 | \title{Gram-Schmidt orthogonalization of vectors}
 6 | \usage{
 7 | GramSchmidt(x, otherVecs, weights = rep(1, length(x)))
 8 | }
 9 | \arguments{
10 | \item{x}{The vector that is to be orthogonalized}
11 | 
12 | \item{otherVecs}{a matrix; x is orthogonalized with respect to its rows}
13 | 
14 | \item{weights}{The weights used in the orthogonalization}
15 | }
16 | \value{
17 | The orthogonalized vector
18 | }
19 | \description{
20 | Gram-Schmidt orthogonalization of vectors
21 | }
22 | 


--------------------------------------------------------------------------------
/man/JacCol_constr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_JacCol_constr.R
 3 | \name{JacCol_constr}
 4 | \alias{JacCol_constr}
 5 | \title{Jacobian of the constrained analysis with linear response function.}
 6 | \usage{
 7 | JacCol_constr(betas, X, reg, theta, muMarg, psi, allowMissingness, naId)
 8 | }
 9 | \arguments{
10 | \item{betas}{a vector of v parameters of the response function
11 | of a single taxon}
12 | 
13 | \item{X}{the count vector of length n}
14 | 
15 | \item{reg}{a n-by-v matrix of regressors}
16 | 
17 | \item{theta}{The dispersion parameter of this taxon}
18 | 
19 | \item{muMarg}{offset of length n}
20 | 
21 | \item{psi}{a scalar, the importance parameter}
22 | 
23 | \item{allowMissingness}{A boolean, are missing values present}
24 | 
25 | \item{naId}{The numeric index of the missing values in X
26 | 
27 | Even though this approach does not imply normalization over
28 | the parameters of all taxa, it is very fast
29 |  and they can be normalized afterwards}
30 | }
31 | \value{
32 | The jacobian, a square symmetric matrix of dimension v
33 | }
34 | \description{
35 | Jacobian of the constrained analysis with linear response function.
36 | }
37 | 


--------------------------------------------------------------------------------
/man/JacCol_constr_noLab.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_JacCol_constr_noLab.R
 3 | \name{JacCol_constr_noLab}
 4 | \alias{JacCol_constr_noLab}
 5 | \title{The jacobian of the response function without taxon labels}
 6 | \usage{
 7 | JacCol_constr_noLab(
 8 |   betas,
 9 |   X,
10 |   reg,
11 |   thetasMat,
12 |   muMarg,
13 |   psi,
14 |   n,
15 |   v,
16 |   preFabMat,
17 |   allowMissingness,
18 |   naId
19 | )
20 | }
21 | \arguments{
22 | \item{betas}{a vector of regression parameters with length v}
23 | 
24 | \item{X}{the nxp data matrix}
25 | 
26 | \item{reg}{a matrix of regressors of dimension nxv}
27 | 
28 | \item{thetasMat}{A matrix of dispersion parameters}
29 | 
30 | \item{muMarg}{offset matrix of dimension nxp}
31 | 
32 | \item{psi}{a scalar, the importance parameter}
33 | 
34 | \item{n}{an integer, number of rows of X}
35 | 
36 | \item{v}{an integer, the number of parameters of the response function}
37 | 
38 | \item{preFabMat}{a prefabricated matrix}
39 | 
40 | \item{allowMissingness}{A boolean, are missing values present}
41 | 
42 | \item{naId}{The numeric index of the missing values in X}
43 | }
44 | \value{
45 | The jacobian (a v-by-v matrix)
46 | }
47 | \description{
48 | The jacobian of the response function without taxon labels
49 | }
50 | 


--------------------------------------------------------------------------------
/man/LR_nb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_LR_nb.R
 3 | \name{LR_nb}
 4 | \alias{LR_nb}
 5 | \title{Get the value of the log-likelihood ratio of alpha}
 6 | \usage{
 7 | LR_nb(
 8 |   Alpha,
 9 |   X,
10 |   CC,
11 |   responseFun = c("linear", "quadratic", "nonparametric", "dynamic"),
12 |   muMarg,
13 |   psi,
14 |   nleqslv.control = list(trace = FALSE),
15 |   n,
16 |   NB_params,
17 |   NB_params_noLab,
18 |   thetaMat,
19 |   ncols,
20 |   nonParamRespFun,
21 |   envGradEst,
22 |   ...
23 | )
24 | }
25 | \arguments{
26 | \item{Alpha}{a vector of length d, the environmental gradient}
27 | 
28 | \item{X}{the n-by-p count matrix}
29 | 
30 | \item{CC}{the n-by-d covariate matrix}
31 | 
32 | \item{responseFun}{a character string indicating
33 | the type of response function}
34 | 
35 | \item{muMarg}{an n-by-p offset matrix}
36 | 
37 | \item{psi}{a scalar, an importance parameter}
38 | 
39 | \item{nleqslv.control}{the control list for the nleqslv() function}
40 | 
41 | \item{n}{number of samples}
42 | 
43 | \item{NB_params}{Starting values for the NB_params}
44 | 
45 | \item{NB_params_noLab}{Starting values for the NB_params without label}
46 | 
47 | \item{thetaMat}{a matrix of size n-by-p with estimated dispersion parameters}
48 | 
49 | \item{ncols}{a scalar, the number of columns of X}
50 | 
51 | \item{nonParamRespFun}{A list, the result of the estNPresp() function}
52 | 
53 | \item{envGradEst}{a character string,
54 | indicating how the environmental gradient should be fitted.
55 | 'LR' using the likelihood-ratio criterion,
56 | or 'ML' a full maximum likelihood solution}
57 | 
58 | \item{...}{Further arguments passed on to other functions
59 | 
60 | DON'T USE 'p' as variable name,
61 |  partial matching in the grad-function in the numDeriv package}
62 | }
63 | \value{
64 | : a scalar, the evaluation of the log-likelihood ratio
65 |  at the given alpha
66 | }
67 | \description{
68 | Get the value of the log-likelihood ratio of alpha
69 | }
70 | 


--------------------------------------------------------------------------------
/man/LR_nb_Jac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_LR_nb_Jac.R
 3 | \name{LR_nb_Jac}
 4 | \alias{LR_nb_Jac}
 5 | \title{A function that returns the Jacobian of the likelihood ratio}
 6 | \usage{
 7 | LR_nb_Jac(
 8 |   Alpha,
 9 |   X,
10 |   CC,
11 |   responseFun = c("linear", "quadratic", "nonparametric", "dynamic"),
12 |   psi,
13 |   NB_params,
14 |   NB_params_noLab,
15 |   d,
16 |   alphaK,
17 |   k,
18 |   centMat,
19 |   nLambda,
20 |   nLambda1s,
21 |   thetaMat,
22 |   muMarg,
23 |   n,
24 |   ncols,
25 |   preFabMat,
26 |   envGradEst,
27 |   allowMissingness,
28 |   naId,
29 |   ...
30 | )
31 | }
32 | \arguments{
33 | \item{Alpha}{a vector of length d + k*(2+(k-1)/2),
34 | the environmental gradient plus the lagrangian multipliers}
35 | 
36 | \item{X}{the n-by-p count matrix}
37 | 
38 | \item{CC}{a n-by-d covariate vector}
39 | 
40 | \item{responseFun}{a character string indicating
41 | the type of response function}
42 | 
43 | \item{psi}{a scalar, an importance parameter}
44 | 
45 | \item{NB_params}{Starting values for the NB_params}
46 | 
47 | \item{NB_params_noLab}{Starting values for the NB_params without label}
48 | 
49 | \item{d}{an integer, the number of covariate parameters}
50 | 
51 | \item{alphaK}{a matrix of environmental gradients of lower dimensions}
52 | 
53 | \item{k}{an integer, the current dimension}
54 | 
55 | \item{centMat}{a nLambda1s-by-d centering matrix}
56 | 
57 | \item{nLambda}{an integer, number of lagrangian multipliers}
58 | 
59 | \item{nLambda1s}{an integer, number of centering restrictions}
60 | 
61 | \item{thetaMat}{a matrix of size n-by-p with estimated dispersion parameters}
62 | 
63 | \item{muMarg}{an n-by-p offset matrix}
64 | 
65 | \item{n}{an integer, the number of rows of X}
66 | 
67 | \item{ncols}{a scalar, the number of columns of X}
68 | 
69 | \item{preFabMat}{a prefabricated matrix}
70 | 
71 | \item{envGradEst}{a character string,
72 | indicating how the environmental gradient should be fitted.
73 | 'LR' using the likelihood-ratio criterion,
74 | or 'ML' a full maximum likelihood solution}
75 | 
76 | \item{allowMissingness}{A boolean, are missing values present}
77 | 
78 | \item{naId}{The numeric index of the missing values in X}
79 | 
80 | \item{...}{Further arguments passed on to other functions}
81 | }
82 | \value{
83 | A symmetric matrix, the evaluated Jacobian
84 | }
85 | \description{
86 | A function that returns the Jacobian of the likelihood ratio
87 | }
88 | 


--------------------------------------------------------------------------------
/man/NBalphaInfl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBalphaInfl.R
 3 | \name{NBalphaInfl}
 4 | \alias{NBalphaInfl}
 5 | \title{Calculate the components of the influence functions}
 6 | \usage{
 7 | NBalphaInfl(rcm, Dim)
 8 | }
 9 | \arguments{
10 | \item{rcm}{an rcm object}
11 | 
12 | \item{Dim}{the required dimension}
13 | }
14 | \value{
15 | An n-by-p-by-d array with the influence of every observation
16 |  on every alpha parameter
17 | }
18 | \description{
19 | Calculate the components of the influence functions
20 | }
21 | 


--------------------------------------------------------------------------------
/man/NBcolInfl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBcolInfl.R
 3 | \name{NBcolInfl}
 4 | \alias{NBcolInfl}
 5 | \title{The influence function for the column scores}
 6 | \usage{
 7 | NBcolInfl(rcm, Dim = 1)
 8 | }
 9 | \arguments{
10 | \item{rcm}{an rcm object}
11 | 
12 | \item{Dim}{the required dimension}
13 | }
14 | \value{
15 | A list with components
16 | \item{score}{a matrix with components of the score function}
17 | \item{InvJac}{A square matrix of dimension p with the components of the
18 |  Jacobian related to the column scores}
19 | }
20 | \description{
21 | The influence function for the column scores
22 | }
23 | 


--------------------------------------------------------------------------------
/man/NBjacobianAbundsOld.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBjacobianAbunds.R
 3 | \name{NBjacobianAbundsOld}
 4 | \alias{NBjacobianAbundsOld}
 5 | \title{Jacobian for the column components of the independence model}
 6 | \usage{
 7 | NBjacobianAbundsOld(beta, X, reg, thetas, allowMissingness, naId)
 8 | }
 9 | \arguments{
10 | \item{beta}{a vector of length p with current abundance estimates}
11 | 
12 | \item{X}{a n-by-p count matrix}
13 | 
14 | \item{reg}{a vector of length n with library sizes estimates}
15 | 
16 | \item{thetas}{a n-by-p matrix with overdispersion estimates in the rows}
17 | 
18 | \item{allowMissingness}{A boolean, are missing values present}
19 | 
20 | \item{naId}{The numeric index of the missing values in X}
21 | }
22 | \value{
23 | a diagonal matrix of dimension p with evaluations
24 | of the jacobian function
25 | }
26 | \description{
27 | Jacobian for the column components of the independence model
28 | }
29 | 


--------------------------------------------------------------------------------
/man/NBjacobianColNP.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBjacobianColNP.R
 3 | \name{NBjacobianColNP}
 4 | \alias{NBjacobianColNP}
 5 | \title{Jacobian function for the estimation of a third degree GLM}
 6 | \usage{
 7 | NBjacobianColNP(beta, X, reg, theta, muMarg)
 8 | }
 9 | \arguments{
10 | \item{beta}{vector of any length}
11 | 
12 | \item{X}{the data vector of length n}
13 | 
14 | \item{reg}{a nxlength(beta) regressor matrix}
15 | 
16 | \item{theta}{a scalar, the overdispersion}
17 | 
18 | \item{muMarg}{the offset of length n}
19 | }
20 | \value{
21 | A matrix of dimension 8-by-8
22 | }
23 | \description{
24 | Jacobian function for the estimation of a third degree GLM
25 | }
26 | 


--------------------------------------------------------------------------------
/man/NBjacobianColOld.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBjacobianCol.R
 3 | \name{NBjacobianColOld}
 4 | \alias{NBjacobianColOld}
 5 | \title{Jacobian for the estimation of the column scores}
 6 | \usage{
 7 | NBjacobianColOld(
 8 |   beta,
 9 |   X,
10 |   reg,
11 |   thetas,
12 |   muMarg,
13 |   k,
14 |   n,
15 |   p,
16 |   colWeights,
17 |   nLambda,
18 |   cMatK,
19 |   preFabMat,
20 |   Jac,
21 |   allowMissingness,
22 |   naId
23 | )
24 | }
25 | \arguments{
26 | \item{beta}{vector of length p+1+1+(k-1): p row scores, 1 centering,
27 | one normalization
28 | and (k-1) orhtogonality lagrangian multipliers}
29 | 
30 | \item{X}{the nxp data matrix}
31 | 
32 | \item{reg}{a nx1 regressor matrix: outer product of rowScores and psis}
33 | 
34 | \item{thetas}{nxp matrix with the dispersion parameters
35 | (converted to matrix for numeric reasons)}
36 | 
37 | \item{muMarg}{the nxp offset}
38 | 
39 | \item{k}{an integer, the dimension of the RC solution}
40 | 
41 | \item{n}{an integer, the number of samples}
42 | 
43 | \item{p}{an integer, the number of taxa}
44 | 
45 | \item{colWeights}{the weights used for the restrictions}
46 | 
47 | \item{nLambda}{an integer, the number of restrictions}
48 | 
49 | \item{cMatK}{the lower dimensions of the colScores}
50 | 
51 | \item{preFabMat}{a prefab matrix, (1+X/thetas)}
52 | 
53 | \item{Jac}{an empty Jacobian matrix}
54 | 
55 | \item{allowMissingness}{A boolean, are missing values present}
56 | 
57 | \item{naId}{The numeric index of the missing values in X}
58 | }
59 | \value{
60 | A matrix of dimension p+1+1+(k-1) with evaluations of the Jacobian
61 | }
62 | \description{
63 | Jacobian for the estimation of the column scores
64 | }
65 | 


--------------------------------------------------------------------------------
/man/NBjacobianLibSizes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBjacobianLibSizes.R
 3 | \name{NBjacobianLibSizes}
 4 | \alias{NBjacobianLibSizes}
 5 | \title{Jacobian for the raw components of the independence model}
 6 | \usage{
 7 | NBjacobianLibSizes(beta, X, reg, thetas, allowMissingness, naId)
 8 | }
 9 | \arguments{
10 | \item{beta}{a vector of length n with current library size estimates}
11 | 
12 | \item{X}{a n-by-p count matrix}
13 | 
14 | \item{reg}{a vector of length p with relative abundance estimates}
15 | 
16 | \item{thetas}{a n-by-p matrix with overdispersion estimates in the rows}
17 | 
18 | \item{allowMissingness}{A boolean, are missing values present}
19 | 
20 | \item{naId}{The numeric index of the missing values in X}
21 | }
22 | \value{
23 | a diagonal matrix of dimension n: the Fisher information matrix
24 | }
25 | \description{
26 | Jacobian for the raw components of the independence model
27 | }
28 | 


--------------------------------------------------------------------------------
/man/NBjacobianPsi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBjacobianPsi.R
 3 | \name{NBjacobianPsi}
 4 | \alias{NBjacobianPsi}
 5 | \title{Jacobian for the psi of a given dimension}
 6 | \usage{
 7 | NBjacobianPsi(beta, X, reg, muMarg, theta, preFabMat, allowMissingness, naId)
 8 | }
 9 | \arguments{
10 | \item{beta}{a scalar, the current estimate}
11 | 
12 | \item{X}{the n-by-p count matrix}
13 | 
14 | \item{reg}{the regressor matrix,
15 | the outer product of current row and column scores}
16 | 
17 | \item{muMarg}{the nxp offset matrix}
18 | 
19 | \item{theta}{a n-by-p matrix with the dispersion parameters}
20 | 
21 | \item{preFabMat}{a prefab matrix, (1+X/thetas)}
22 | 
23 | \item{allowMissingness}{A boolean, are missing values present}
24 | 
25 | \item{naId}{The numeric index of the missing values in X}
26 | }
27 | \value{
28 | The evaluation of the jacobian function at beta, a 1-by-1 matrix
29 | }
30 | \description{
31 | Jacobian for the psi of a given dimension
32 | }
33 | 


--------------------------------------------------------------------------------
/man/NBjacobianRow.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBjacobianRow.R
 3 | \name{NBjacobianRow}
 4 | \alias{NBjacobianRow}
 5 | \title{A jacobian function of the NB for the row scores}
 6 | \usage{
 7 | NBjacobianRow(
 8 |   beta,
 9 |   X,
10 |   reg,
11 |   thetas,
12 |   muMarg,
13 |   k,
14 |   n,
15 |   p,
16 |   rowWeights,
17 |   nLambda,
18 |   rMatK,
19 |   preFabMat,
20 |   Jac,
21 |   allowMissingness,
22 |   naId
23 | )
24 | }
25 | \arguments{
26 | \item{beta}{a vector of of length n + k +1 regression parameters to optimize}
27 | 
28 | \item{X}{the data matrix of dimensions nxp}
29 | 
30 | \item{reg}{a 1xp regressor matrix: outer product of column scores and psis}
31 | 
32 | \item{thetas}{nxp matrix with the dispersion parameters
33 | (converted to matrix for numeric reasons)}
34 | 
35 | \item{muMarg}{an nxp offset matrix}
36 | 
37 | \item{k}{a scalar, the dimension of the RC solution}
38 | 
39 | \item{n}{a scalar, the number of samples}
40 | 
41 | \item{p}{a scalar, the number of taxa}
42 | 
43 | \item{rowWeights}{a vector of length n, the weights used for the restrictions}
44 | 
45 | \item{nLambda}{an integer, the number of lagrangian multipliers}
46 | 
47 | \item{rMatK}{the lower dimension row scores}
48 | 
49 | \item{preFabMat}{a prefab matrix, (1+X/thetas)}
50 | 
51 | \item{Jac}{an empty Jacobian matrix}
52 | 
53 | \item{allowMissingness}{A boolean, are missing values present}
54 | 
55 | \item{naId}{The numeric index of the missing values in X}
56 | }
57 | \value{
58 | a symmetric jacobian matrix of size n+k + 1
59 | }
60 | \description{
61 | A jacobian function of the NB for the row scores
62 | }
63 | 


--------------------------------------------------------------------------------
/man/NBpsiInfl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBpsiInfl.R
 3 | \name{NBpsiInfl}
 4 | \alias{NBpsiInfl}
 5 | \title{The influence function for the psis}
 6 | \usage{
 7 | NBpsiInfl(rcm, Dim = 1)
 8 | }
 9 | \arguments{
10 | \item{rcm}{an rcm object}
11 | 
12 | \item{Dim}{the required dimensions}
13 | }
14 | \value{
15 | The influence of every single observation
16 |  on the psi value of this dimension
17 | }
18 | \description{
19 | The influence function for the psis
20 | }
21 | 


--------------------------------------------------------------------------------
/man/NBrowInfl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_NBrowInfl.R
 3 | \name{NBrowInfl}
 4 | \alias{NBrowInfl}
 5 | \title{The influence function for the row scores}
 6 | \usage{
 7 | NBrowInfl(rcm, Dim = 1)
 8 | }
 9 | \arguments{
10 | \item{rcm}{an rcm object}
11 | 
12 | \item{Dim}{the required dimension}
13 | }
14 | \value{
15 | A list with components
16 | \item{score}{a matrix with components of the score function}
17 | \item{InvJac}{A square matrix of dimension n with the components
18 |  of the Jacobian related to the row scores}
19 | }
20 | \description{
21 | The influence function for the row scores
22 | }
23 | 


--------------------------------------------------------------------------------
/man/RCM.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/F_RCM.R
  3 | \name{RCM}
  4 | \alias{RCM}
  5 | \alias{RCM,phyloseq-method}
  6 | \alias{RCM,matrix-method}
  7 | \title{Wrapper function for the RCM() function}
  8 | \usage{
  9 | RCM(dat, ...)
 10 | 
 11 | \S4method{RCM}{phyloseq}(dat, covariates = NULL, confounders = NULL, ...)
 12 | 
 13 | \S4method{RCM}{matrix}(
 14 |   dat,
 15 |   k = 2,
 16 |   round = FALSE,
 17 |   prevCutOff = 0.05,
 18 |   minFraction = 0.1,
 19 |   rowWeights = "uniform",
 20 |   colWeights = "marginal",
 21 |   confModelMat = NULL,
 22 |   confTrimMat = NULL,
 23 |   covModelMat = NULL,
 24 |   centMat = NULL,
 25 |   allowMissingness = FALSE,
 26 |   ...
 27 | )
 28 | }
 29 | \arguments{
 30 | \item{dat}{an nxp count matrix or a phyloseq object with an otu_table slot}
 31 | 
 32 | \item{...}{Further arguments passed on to the RCM.NB() function}
 33 | 
 34 | \item{covariates}{In case 'dat' is a phyloseq object,
 35 | the names of the sample
 36 |   variables to be used as covariates in the constrained analysis,
 37 |    or 'all' to
 38 |   indicate all variables to be used.
 39 |   In case 'dat' is a matrix, a nxf matrix
 40 |   or dataframe of covariates.
 41 |    Character variables will be converted to
 42 |   factors, with a warning. Defaults to NULL,
 43 |   in which case an unconstrained
 44 |   analysis is carried out.}
 45 | 
 46 | \item{confounders}{In case 'dat' is a phyloseq object,
 47 | the names of the sample variables to be used as confounders
 48 |  to be filtered
 49 |  out. In case 'dat' is a matrix, a nxf dataframe
 50 |  of confounders.
 51 |   Character variables will be converted to factors, with a warning.
 52 |   Defaults to NULL, in which case no filtering occurs.}
 53 | 
 54 | \item{k}{an integer, the number of dimensions of the RCM solution}
 55 | 
 56 | \item{round}{a boolean, whether to round to nearest integer. Defaults to
 57 | FALSE.}
 58 | 
 59 | \item{prevCutOff}{a scalar, the prevalance cutoff for the trimming.
 60 | Defaults to 2.5e-2}
 61 | 
 62 | \item{minFraction}{a scalar, each taxon's total abundance
 63 | should equal
 64 |  at least the number of samples n times minFraction,
 65 |   otherwise it is trimmed.
 66 |   Defaults to 10\%}
 67 | 
 68 | \item{rowWeights, colWeights}{character strings,
 69 | the weighting procedures for the normalization of row and column scores.
 70 |  Defaults to 'uniform' and 'marginal' respectively}
 71 | 
 72 | \item{confTrimMat, confModelMat, covModelMat, centMat}{Dedicated model matrices
 73 | constructed based on phyloseq object.}
 74 | 
 75 | \item{allowMissingness}{A boolean, should NA values be tolerated?}
 76 | }
 77 | \value{
 78 | see \code{\link{RCM_NB}}
 79 | }
 80 | \description{
 81 | This is a wrapper function,
 82 | which currently only fits the negative binomial distribution,
 83 | but which could easily be extended to other ones.
 84 | }
 85 | \details{
 86 | This function should be called on a raw count matrix,
 87 | without rarefying or normalization to proportions.
 88 | This functions trims on prevalence and total abundance to avoid instability
 89 | of the algorithm. Covariate and confounder matrices are constructed,
 90 | so that everything is passed on
 91 |  to the workhorse function RCM.NB() as matrices.
 92 | }
 93 | \examples{
 94 | data(Zeller)
 95 | require(phyloseq)
 96 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
 97 | prune_samples(sample_names(Zeller)[1:50], Zeller))
 98 | zellerRCM = RCM(tmpPhy, round = TRUE)
 99 | 
100 | }
101 | \seealso{
102 | \code{\link{RCM_NB}},\code{\link{plot.RCM}},
103 | \code{\link{residualPlot}},\code{\link{plotRespFun}}
104 | }
105 | 


--------------------------------------------------------------------------------
/man/Zeller.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_data.R
 3 | \docType{data}
 4 | \name{Zeller}
 5 | \alias{Zeller}
 6 | \title{Microbiomes of colorectal cancer patients and healthy controls}
 7 | \format{
 8 | A phyloseq object with an OTU-table and sample data
 9 | \describe{
10 |   \item{otu_table}{Count data matrix of 709 taxa in 194 samples}
11 |   \item{sample_data}{Data frame of patient covariates}
12 | }
13 | }
14 | \source{
15 | \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4299606/}
16 | }
17 | \usage{
18 | Zeller
19 | }
20 | \description{
21 | Microbiome sequencing data of colorectal cancer patients,
22 | patients with small adenoma and healthy controls,
23 |  together with other baseline covariates
24 | }
25 | \keyword{datasets}
26 | 


--------------------------------------------------------------------------------
/man/addOrthProjection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_addOrthProjection.R
 3 | \name{addOrthProjection}
 4 | \alias{addOrthProjection}
 5 | \title{This function adds orthogonal projections to a given plot}
 6 | \usage{
 7 | addOrthProjection(
 8 |   RCMplot,
 9 |   sample = NULL,
10 |   species = NULL,
11 |   variable = NULL,
12 |   Dims = c(1, 2),
13 |   addLabel = FALSE,
14 |   labPos = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{RCMplot}{the RCMplot object}
19 | 
20 | \item{sample, species, variable}{names or approximate coordinates of sample,
21 | species or variable}
22 | 
23 | \item{Dims}{The dimensions of the solutions that have been plotted}
24 | 
25 | \item{addLabel}{a boolean, should the r-s-psi label be added?}
26 | 
27 | \item{labPos}{the position of the label. Will be calculated if not provided}
28 | }
29 | \value{
30 | a modified ggplot object that contains the geom_segment object
31 | that draws the projection
32 | }
33 | \description{
34 | This function adds orthogonal projections to a given plot
35 | }
36 | \examples{
37 | data(Zeller)
38 | require(phyloseq)
39 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(100)],
40 | prune_samples(sample_names(Zeller)[seq_len(50)], Zeller))
41 | zellerRCM = RCM(tmpPhy, k = 2, round = TRUE)
42 | zellerPlot = plot(zellerRCM, returnCoords = TRUE)
43 | addOrthProjection(zellerPlot, species = c(-0.35,1.1), sample = c(1,1.2))
44 | }
45 | \seealso{
46 | \code{\link{plot.RCM}}
47 | }
48 | 


--------------------------------------------------------------------------------
/man/arrayprod.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_arrayProd.R
 3 | \name{arrayprod}
 4 | \alias{arrayprod}
 5 | \title{An auxiliary R function to 'array' multiply an array with a vector,
 6 |  kindly provided by Joris Meys}
 7 | \usage{
 8 | arrayprod(x, y)
 9 | }
10 | \arguments{
11 | \item{x}{a axbxc array}
12 | 
13 | \item{y}{a vector of length c}
14 | }
15 | \value{
16 | a axb matrix. The ij-th element equals sum(x[i,j,]*y)
17 | }
18 | \description{
19 | An auxiliary R function to 'array' multiply an array with a vector,
20 |  kindly provided by Joris Meys
21 | }
22 | 


--------------------------------------------------------------------------------
/man/buildCentMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_buildCentMat.R
 3 | \name{buildCentMat}
 4 | \alias{buildCentMat}
 5 | \title{A function to build a centering matrix based on a dataframe}
 6 | \usage{
 7 | buildCentMat(object)
 8 | }
 9 | \arguments{
10 | \item{object}{an rcm object or dataframe}
11 | }
12 | \value{
13 | a centering matrix consisting of ones and zeroes,
14 |  or a list with components
15 | \item{centMat}{a centering matrix consisting of ones and zeroes}
16 | \item{datFrame}{The dataframe with factors with one level removed}
17 | }
18 | \description{
19 | A function to build a centering matrix based on a dataframe
20 | }
21 | 


--------------------------------------------------------------------------------
/man/buildConfMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_buildConfMat.R
 3 | \name{buildConfMat}
 4 | \alias{buildConfMat}
 5 | \title{A function to build the confounder matrices}
 6 | \usage{
 7 | buildConfMat(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a matrix, data frame or character string}
11 | 
12 | \item{...}{further arguments passed on to other methods
13 | 
14 | For the preliminary trimming, we do not include an intercept,
15 | but we do include all the levels of the factors using contrasts=FALSE:
16 |  we want to do the trimming in every subgroup, so no hidden reference levels
17 |   For the filtering we just use a model with an intercept and
18 |    treatment coding, here the interest is only in adjusting the offset}
19 | }
20 | \value{
21 | a list with components
22 | \item{confModelMatTrim}{A confounder matrix without intercept, with all
23 |  levels of factors present. This will be used to trim out taxa that have
24 |   zero abundances in any subgroup defined by confounders}
25 | \item{confModelMat}{A confounder matrix with intercept,
26 | and with reference levels for factors absent.
27 | This will be used to fit the model to modify the independence model,
28 | and may include continuous variables}
29 | }
30 | \description{
31 | A function to build the confounder matrices
32 | }
33 | 


--------------------------------------------------------------------------------
/man/buildConfMat.character.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_buildConfMat.R
 3 | \name{buildConfMat.character}
 4 | \alias{buildConfMat.character}
 5 | \title{buildConfMat.character}
 6 | \usage{
 7 | \method{buildConfMat}{character}(confounders, physeq)
 8 | }
 9 | \arguments{
10 | \item{confounders}{a numeric matrix of confounders}
11 | 
12 | \item{physeq}{a physeq object with a sample_data slot}
13 | }
14 | \value{
15 | see buidConfMat.numeric
16 | }
17 | \description{
18 | buildConfMat.character
19 | }
20 | 


--------------------------------------------------------------------------------
/man/buildConfMat.data.frame.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_buildConfMat.R
 3 | \name{buildConfMat.data.frame}
 4 | \alias{buildConfMat.data.frame}
 5 | \title{buildConfMat.data.frame}
 6 | \usage{
 7 | \method{buildConfMat}{data.frame}(confounders, n)
 8 | }
 9 | \arguments{
10 | \item{confounders}{a data frame of confounders}
11 | 
12 | \item{n}{the number of rows of the count matrix}
13 | }
14 | \value{
15 | see buidConfMat
16 | }
17 | \description{
18 | buildConfMat.data.frame
19 | }
20 | 


--------------------------------------------------------------------------------
/man/buildCovMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_buildCovMat.R
 3 | \name{buildCovMat}
 4 | \alias{buildCovMat}
 5 | \title{A function to build the covariate matrix of the constraints}
 6 | \usage{
 7 | buildCovMat(covariates, dat)
 8 | }
 9 | \arguments{
10 | \item{covariates}{the covariates, either as dataframe or as character string}
11 | 
12 | \item{dat}{the phyloseq object
13 | 
14 | In this case we will 1) Include dummy's for every level of the
15 |  categorical variable, and force them to sum to zero.
16 |  This is needed for plotting and required for
17 |   reference level independent normalization.
18 |   2) Exclude an intercept. The density function f()
19 |   will provide this already.}
20 | }
21 | \value{
22 | a list with components
23 | \item{covModelMat}{The model matrix}
24 | \item{datFrame}{The dataframe used to construct the model matrix}
25 | }
26 | \description{
27 | A function to build the covariate matrix of the constraints
28 | }
29 | 


--------------------------------------------------------------------------------
/man/buildDesign.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_buildDesign.R
 3 | \name{buildDesign}
 4 | \alias{buildDesign}
 5 | \title{A function to build the design matrix}
 6 | \usage{
 7 | buildDesign(sampleScore, responseFun)
 8 | }
 9 | \arguments{
10 | \item{sampleScore}{a vector of environmental scores}
11 | 
12 | \item{responseFun}{A character string, indicating the shape
13 | of the response function
14 | 
15 | For dynamic response function estimation, the same desing matrix
16 | as for the quadratic one is returned.
17 | Will throw an error when an unknown repsonse function is provided}
18 | }
19 | \value{
20 | A design matrix of dimension n-by-f
21 | }
22 | \description{
23 | A function to build the design matrix
24 | }
25 | 


--------------------------------------------------------------------------------
/man/checkAlias.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_checkAlias.R
 3 | \name{checkAlias}
 4 | \alias{checkAlias}
 5 | \title{Check for alias structures in a dataframe, and throw an error when one is found}
 6 | \usage{
 7 | checkAlias(datFrame, covariatesNames)
 8 | }
 9 | \arguments{
10 | \item{datFrame}{the data frame to be checked for alias structure}
11 | 
12 | \item{covariatesNames}{The names of the variables to be considered}
13 | }
14 | \value{
15 | Throws an error when an alias structure is detected,
16 | returns invisible otherwise
17 | }
18 | \description{
19 | Check for alias structures in a dataframe, and throw an error when one is found
20 | }
21 | \examples{
22 | #Make a dataframe with aliased variables
23 | df = data.frame(foo = rnorm(10), baa = rep(c(TRUE, FALSE), each = 5),
24 | foo2 = factor(rep(c("male", "female"), each = 5)))
25 | checkAlias(df, c("foo", "baa"))
26 | #Check test files for the error being thrown
27 | }
28 | 


--------------------------------------------------------------------------------
/man/constrCorresp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_constrCorresp.R
 3 | \name{constrCorresp}
 4 | \alias{constrCorresp}
 5 | \title{Constrained correspondence analysis with adapted powers}
 6 | \usage{
 7 | constrCorresp(
 8 |   X,
 9 |   Y,
10 |   rowExp,
11 |   colExp,
12 |   muMarg = outer(rowSums(X), colSums(X))/sum(X)
13 | )
14 | }
15 | \arguments{
16 | \item{X}{outcome matrix}
17 | 
18 | \item{Y}{constraining matrix}
19 | 
20 | \item{rowExp, colExp}{see ?RCM_NB}
21 | 
22 | \item{muMarg}{mean matrix under independence model}
23 | }
24 | \value{
25 | a list with eigenvalues, aliased variables and environmentam gradients
26 | }
27 | \description{
28 | Constrained correspondence analysis with adapted powers
29 | }
30 | \details{
31 | the vegan version, adapted for flexible powers rowExp and colExp
32 | }
33 | 


--------------------------------------------------------------------------------
/man/correctXMissingness.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_correctXMissingness.R
 3 | \name{correctXMissingness}
 4 | \alias{correctXMissingness}
 5 | \title{Replace missing entries in X by their expectation to set their
 6 | contribution to the estimating equations to zero}
 7 | \usage{
 8 | correctXMissingness(X, mu, allowMissingness, naId)
 9 | }
10 | \arguments{
11 | \item{X}{the matrix of counts}
12 | 
13 | \item{mu}{the matrix of expectations}
14 | 
15 | \item{allowMissingness}{A boolean, are missing values present}
16 | 
17 | \item{naId}{The numeric index of the missing values in X}
18 | }
19 | \value{
20 | The matrix X with the NA entries replaced by the
21 | corresponding entries in mu
22 | }
23 | \description{
24 | Replace missing entries in X by their expectation to set their
25 | contribution to the estimating equations to zero
26 | }
27 | \note{
28 | This may seem like a hacky approach, but it avoids having to deal
29 |  with NAs in functions like crossprod().
30 | }
31 | 


--------------------------------------------------------------------------------
/man/dLR_nb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dLR_nb.R
 3 | \name{dLR_nb}
 4 | \alias{dLR_nb}
 5 | \title{A function that returns the value of the partial derivative of the
 6 |  log-likelihood ratio to alpha, keeping the response functions fixed}
 7 | \usage{
 8 | dLR_nb(
 9 |   Alpha,
10 |   X,
11 |   CC,
12 |   responseFun = c("linear", "quadratic", "nonparametric", "dynamic"),
13 |   psi,
14 |   NB_params,
15 |   NB_params_noLab,
16 |   d,
17 |   alphaK,
18 |   k,
19 |   centMat,
20 |   nLambda,
21 |   nLambda1s,
22 |   thetaMat,
23 |   muMarg,
24 |   ncols,
25 |   envGradEst,
26 |   allowMissingness,
27 |   naId,
28 |   ...
29 | )
30 | }
31 | \arguments{
32 | \item{Alpha}{a vector of length d + k*(2+(k-1)/2),
33 | the environmental gradient plus the lagrangian multipliers}
34 | 
35 | \item{X}{the n-by-p count matrix}
36 | 
37 | \item{CC}{a n-by-d covariate vector}
38 | 
39 | \item{responseFun}{a character string indicating
40 | the type of response function}
41 | 
42 | \item{psi}{a scalar, an importance parameter}
43 | 
44 | \item{NB_params}{Starting values for the NB_params}
45 | 
46 | \item{NB_params_noLab}{Starting values for the NB_params without label}
47 | 
48 | \item{d}{an integer, the number of covariate parameters}
49 | 
50 | \item{alphaK}{a matrix of environmental gradients of lower dimensions}
51 | 
52 | \item{k}{an integer, the current dimension}
53 | 
54 | \item{centMat}{a nLambda1s-by-d centering matrix}
55 | 
56 | \item{nLambda}{an integer, number of lagrangian multipliers}
57 | 
58 | \item{nLambda1s}{an integer, number of centering restrictions}
59 | 
60 | \item{thetaMat}{a matrix of size n-by-p with estimated dispersion parameters}
61 | 
62 | \item{muMarg}{an n-by-p offset matrix}
63 | 
64 | \item{ncols}{a scalar, the number of columns of X}
65 | 
66 | \item{envGradEst}{a character string, indicating how the
67 | environmental gradient should be fitted.
68 | 'LR' using the likelihood-ratio criterion,
69 | or 'ML' a full maximum likelihood solution}
70 | 
71 | \item{allowMissingness}{A boolean, are missing values present}
72 | 
73 | \item{naId}{The numeric index of the missing values in X}
74 | 
75 | \item{...}{further arguments passed on to other methods}
76 | }
77 | \value{
78 | : The value of the lagrangian and the constraining equations
79 | }
80 | \description{
81 | A function that returns the value of the partial derivative of the
82 |  log-likelihood ratio to alpha, keeping the response functions fixed
83 | }
84 | 


--------------------------------------------------------------------------------
/man/dNBabundsOld.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dNBabunds.R
 3 | \name{dNBabundsOld}
 4 | \alias{dNBabundsOld}
 5 | \title{A score function for the column components of the independence model
 6 | (mean relative abundances)}
 7 | \usage{
 8 | dNBabundsOld(beta, X, reg, thetas, allowMissingness, naId)
 9 | }
10 | \arguments{
11 | \item{beta}{a vector of length p with current abundance estimates}
12 | 
13 | \item{X}{a n-by-p count matrix}
14 | 
15 | \item{reg}{a vector of length n with library sizes estimates}
16 | 
17 | \item{thetas}{a n-by-p matrix with overdispersion estimates in the rows}
18 | 
19 | \item{allowMissingness}{A boolean, are missing values present}
20 | 
21 | \item{naId}{The numeric index of the missing values in X}
22 | }
23 | \value{
24 | a vector of length p with evaluations of the score function
25 | }
26 | \description{
27 | A score function for the column components of the independence model
28 | (mean relative abundances)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/dNBlibSizes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dNBlibSizes.R
 3 | \name{dNBlibSizes}
 4 | \alias{dNBlibSizes}
 5 | \title{A score function for the row components of the independence model
 6 | (library sizes)}
 7 | \usage{
 8 | dNBlibSizes(beta, X, reg, thetas, allowMissingness, naId)
 9 | }
10 | \arguments{
11 | \item{beta}{a vector of length n with current library size estimates}
12 | 
13 | \item{X}{a n-by-p count matrix}
14 | 
15 | \item{reg}{a vector of length p with relative abundance estimates}
16 | 
17 | \item{thetas}{a n-by-p matrix with overdispersion estimates in the rows}
18 | 
19 | \item{allowMissingness}{A boolean, are missing values present}
20 | 
21 | \item{naId}{The numeric index of the missing values in X}
22 | }
23 | \value{
24 | a vector of length n with evaluations of the score function
25 | }
26 | \description{
27 | A score function for the row components of the independence model
28 | (library sizes)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/dNBllcolNP.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dNBllcolNP.R
 3 | \name{dNBllcolNP}
 4 | \alias{dNBllcolNP}
 5 | \title{Estimation of the parameters of a third degree GLM}
 6 | \usage{
 7 | dNBllcolNP(beta, X, reg, theta, muMarg, allowMissingness, naId, ...)
 8 | }
 9 | \arguments{
10 | \item{beta}{A vector of any length}
11 | 
12 | \item{X}{the data vector of length n}
13 | 
14 | \item{reg}{a nxlength(beta) regressor matrix}
15 | 
16 | \item{theta}{a scalar, the overdispersion}
17 | 
18 | \item{muMarg}{the offset of length n}
19 | 
20 | \item{allowMissingness}{A boolean, are missing values present}
21 | 
22 | \item{naId}{The numeric index of the missing values in X}
23 | 
24 | \item{...}{further arguments passed on to the jacobian}
25 | }
26 | \value{
27 | A vector of the same length as beta with evaluations
28 |  of the score function
29 | }
30 | \description{
31 | Estimation of the parameters of a third degree GLM
32 | }
33 | 


--------------------------------------------------------------------------------
/man/dNBllcolOld.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dNBllcol.R
 3 | \name{dNBllcolOld}
 4 | \alias{dNBllcolOld}
 5 | \title{A score function for the estimation of the column scores
 6 | in an unconstrained RC(M) model}
 7 | \usage{
 8 | dNBllcolOld(
 9 |   beta,
10 |   X,
11 |   reg,
12 |   thetas,
13 |   muMarg,
14 |   k,
15 |   p,
16 |   n,
17 |   colWeights,
18 |   nLambda,
19 |   cMatK,
20 |   allowMissingness,
21 |   naId,
22 |   ...
23 | )
24 | }
25 | \arguments{
26 | \item{beta}{vector of length p+1+1+(k-1): p row scores,
27 | 1 centering, one normalization and (k-1) orhtogonality lagrangian multipliers}
28 | 
29 | \item{X}{the nxp data matrix}
30 | 
31 | \item{reg}{a nx1 regressor matrix: outer product of rowScores and psis}
32 | 
33 | \item{thetas}{nxp matrix with the dispersion parameters
34 | (converted to matrix for numeric reasons)}
35 | 
36 | \item{muMarg}{the nxp offset}
37 | 
38 | \item{k}{an integer, the dimension of the RC solution}
39 | 
40 | \item{p}{an integer, the number of taxa}
41 | 
42 | \item{n}{an integer, the number of samples}
43 | 
44 | \item{colWeights}{the weights used for the restrictions}
45 | 
46 | \item{nLambda}{an integer, the number of restrictions}
47 | 
48 | \item{cMatK}{the lower dimensions of the colScores}
49 | 
50 | \item{allowMissingness}{A boolean, are missing values present}
51 | 
52 | \item{naId}{The numeric index of the missing values in X}
53 | 
54 | \item{...}{further arguments passed on to the jacobian}
55 | }
56 | \value{
57 | A vector of length p+1+1+(k-1) with evaluations of the
58 |  derivative of lagrangian
59 | }
60 | \description{
61 | A score function for the estimation of the column scores
62 | in an unconstrained RC(M) model
63 | }
64 | 


--------------------------------------------------------------------------------
/man/dNBllcol_constr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dNBllcol_constr.R
 3 | \name{dNBllcol_constr}
 4 | \alias{dNBllcol_constr}
 5 | \title{The score function of the response function for 1 taxon at the time}
 6 | \usage{
 7 | dNBllcol_constr(betas, X, reg, theta, muMarg, psi, allowMissingness, naId)
 8 | }
 9 | \arguments{
10 | \item{betas}{a vector of v parameters of the
11 | response function of a single taxon}
12 | 
13 | \item{X}{the count vector of length n}
14 | 
15 | \item{reg}{a n-by-v matrix of regressors}
16 | 
17 | \item{theta}{The dispersion parameter of this taxon}
18 | 
19 | \item{muMarg}{offset of length n}
20 | 
21 | \item{psi}{a scalar, the importance parameter}
22 | 
23 | \item{allowMissingness}{A boolean, are missing values present}
24 | 
25 | \item{naId}{The numeric index of the missing values in X
26 | 
27 |  Even though this approach does not imply normalization over the parameters
28 |  of all taxa, it is very fast and they can be normalized afterwards}
29 | }
30 | \value{
31 | A vector of length v with the evaluation of the score functions
32 | }
33 | \description{
34 | The score function of the response function for 1 taxon at the time
35 | }
36 | 


--------------------------------------------------------------------------------
/man/dNBllcol_constr_noLab.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dNBllcol_constr_noLab.R
 3 | \name{dNBllcol_constr_noLab}
 4 | \alias{dNBllcol_constr_noLab}
 5 | \title{The score function of the general response function}
 6 | \usage{
 7 | dNBllcol_constr_noLab(
 8 |   betas,
 9 |   X,
10 |   reg,
11 |   thetasMat,
12 |   muMarg,
13 |   psi,
14 |   allowMissingness,
15 |   naId,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{betas}{a vector of regression parameters with length v}
21 | 
22 | \item{X}{the nxp data matrix}
23 | 
24 | \item{reg}{a matrix of regressors of dimension nxv}
25 | 
26 | \item{thetasMat}{A matrix of dispersion parameters}
27 | 
28 | \item{muMarg}{offset matrix of dimension nxp}
29 | 
30 | \item{psi}{a scalar, the importance parameter}
31 | 
32 | \item{allowMissingness}{A boolean, are missing values present}
33 | 
34 | \item{naId}{The numeric index of the missing values in X}
35 | 
36 | \item{...}{further arguments passed on to the jacobian}
37 | }
38 | \value{
39 | The evaluation of the score functions (a vector length v)
40 | }
41 | \description{
42 | The score function of the general response function
43 | }
44 | 


--------------------------------------------------------------------------------
/man/dNBllrow.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dNBllrow.R
 3 | \name{dNBllrow}
 4 | \alias{dNBllrow}
 5 | \title{A score function of the NB for the row scores}
 6 | \usage{
 7 | dNBllrow(
 8 |   beta,
 9 |   X,
10 |   reg,
11 |   thetas,
12 |   muMarg,
13 |   k,
14 |   n,
15 |   p,
16 |   rowWeights,
17 |   nLambda,
18 |   rMatK,
19 |   allowMissingness,
20 |   naId,
21 |   ...
22 | )
23 | }
24 | \arguments{
25 | \item{beta}{a vector of of length n + k +1 regression parameters to optimize}
26 | 
27 | \item{X}{the data matrix of dimensions nxp}
28 | 
29 | \item{reg}{a 1xp regressor matrix: outer product of column scores and psis}
30 | 
31 | \item{thetas}{nxp matrix with the dispersion parameters
32 | (converted to matrix for numeric reasons)}
33 | 
34 | \item{muMarg}{an nxp offset matrix}
35 | 
36 | \item{k}{a scalar, the dimension of the RC solution}
37 | 
38 | \item{n}{a scalar, the number of samples}
39 | 
40 | \item{p}{a scalar, the number of taxa}
41 | 
42 | \item{rowWeights}{a vector of length n, the weights used for the restrictions}
43 | 
44 | \item{nLambda}{an integer, the number of lagrangian multipliers}
45 | 
46 | \item{rMatK}{the lower dimension row scores}
47 | 
48 | \item{allowMissingness}{A boolean, are missing values present}
49 | 
50 | \item{naId}{The numeric index of the missing values in X}
51 | 
52 | \item{...}{Other arguments passed on to the jacobian}
53 | }
54 | \value{
55 | A vector of length n + k +1 with evaluations of the
56 |  derivative of the lagrangian
57 | }
58 | \description{
59 | A score function of the NB for the row scores
60 | }
61 | 


--------------------------------------------------------------------------------
/man/dNBpsis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_dNBpsis.R
 3 | \name{dNBpsis}
 4 | \alias{dNBpsis}
 5 | \title{A score function for the psi of a given dimension}
 6 | \usage{
 7 | dNBpsis(beta, X, reg, theta, muMarg, allowMissingness, naId, ...)
 8 | }
 9 | \arguments{
10 | \item{beta}{a scalar, the initial estimate}
11 | 
12 | \item{X}{the n-by-p count matrix}
13 | 
14 | \item{reg}{the regressor matrix, the outer product of current row
15 | and column scores}
16 | 
17 | \item{theta}{a n-by-p matrix with the dispersion parameters}
18 | 
19 | \item{muMarg}{the nxp offset matrix}
20 | 
21 | \item{allowMissingness}{A boolean, are missing values present}
22 | 
23 | \item{naId}{The numeric index of the missing values in X}
24 | 
25 | \item{...}{other arguments passed on to the jacobian}
26 | }
27 | \value{
28 | The evaluation of the score function at beta, a scalar
29 | }
30 | \description{
31 | A score function for the psi of a given dimension
32 | }
33 | 


--------------------------------------------------------------------------------
/man/deviances.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_deviances.R
 3 | \name{deviances}
 4 | \alias{deviances}
 5 | \title{A function to extract deviances for all dimension,
 6 | including after filtering on confounders}
 7 | \usage{
 8 | deviances(rcm, squaredSum = FALSE)
 9 | }
10 | \arguments{
11 | \item{rcm}{an object of the RCM class}
12 | 
13 | \item{squaredSum}{a boolean, should total deviance be returned?
14 | 
15 | Total deviances can be deceptive and not correspond to the differences in
16 | log-likelihood. As the dispersion is different for each model.
17 |  To compare models it is better to compare likelihoods.}
18 | }
19 | \value{
20 | If Sum is FALSE, a named array of deviance residuals of the
21 |  independence model and all models with dimension 1 to k, including after
22 |  filtering on confounders. Otherwise a table with total deviances (the sum of
23 |  squared deviance residuals), deviance explained and cumulative deviance
24 |  explained.
25 | }
26 | \description{
27 | A function to extract deviances for all dimension,
28 | including after filtering on confounders
29 | }
30 | 


--------------------------------------------------------------------------------
/man/ellipseCoord.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_ellipseCoord.R
 3 | \name{ellipseCoord}
 4 | \alias{ellipseCoord}
 5 | \title{A function that returns the coordinates of an ellipse}
 6 | \usage{
 7 | ellipseCoord(a, b, c, quadDrop = 0.95, nPoints = 100)
 8 | }
 9 | \arguments{
10 | \item{a, b, c}{parameters of the quadratic function a^2x+bx+c}
11 | 
12 | \item{quadDrop}{A scalar, fraction of peak height at which to draw
13 | the ellipse}
14 | 
15 | \item{nPoints}{an integer, number of points to use to draw the ellipse}
16 | }
17 | \value{
18 | a matrix with x and y coordinates of the ellipse
19 | }
20 | \description{
21 | A function that returns the coordinates of an ellipse
22 | }
23 | 


--------------------------------------------------------------------------------
/man/estDisp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_estDisp.R
 3 | \name{estDisp}
 4 | \alias{estDisp}
 5 | \title{Estimate the overdispersion}
 6 | \usage{
 7 | estDisp(
 8 |   X,
 9 |   cMat = NULL,
10 |   rMat = NULL,
11 |   muMarg,
12 |   psis,
13 |   trended.dispersion = NULL,
14 |   prior.df = 10,
15 |   dispWeights = NULL,
16 |   rowMat = NULL,
17 |   allowMissingness = FALSE,
18 |   naId
19 | )
20 | }
21 | \arguments{
22 | \item{X}{the data matrix of dimensions nxp}
23 | 
24 | \item{cMat}{a 1xp colum scores matrix}
25 | 
26 | \item{rMat}{a nx1 rowscores matrix, if unconstrained}
27 | 
28 | \item{muMarg}{an nxp offset matrix}
29 | 
30 | \item{psis}{a scalar, the current psi estimate}
31 | 
32 | \item{trended.dispersion}{a vector of length p with pre-calculated
33 | trended.dispersion estimates. They do not vary in function
34 | of the offset anyway}
35 | 
36 | \item{prior.df}{an integer, number of degrees of freedom of the prior
37 | for the Bayesian shrinkage}
38 | 
39 | \item{dispWeights}{Weights for estimating the dispersion
40 | in a zero-inflated model}
41 | 
42 | \item{rowMat}{matrix of row scores in case of constrained ordination}
43 | 
44 | \item{allowMissingness}{A boolean, are missing values present}
45 | 
46 | \item{naId}{The numeric index of the missing values in X}
47 | }
48 | \value{
49 | A vector of length p with dispersion estimates
50 | }
51 | \description{
52 | Estimate the overdispersion
53 | }
54 | \details{
55 | Information between taxa is shared with empirical Bayes
56 |  using the edgeR pacakage, where the time-limiting steps are programmed in C.
57 | }
58 | 


--------------------------------------------------------------------------------
/man/estNBparams.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_estNBparams.R
 3 | \name{estNBparams}
 4 | \alias{estNBparams}
 5 | \title{A function to estimate the taxon-wise NB-params}
 6 | \usage{
 7 | estNBparams(
 8 |   design,
 9 |   thetas,
10 |   muMarg,
11 |   psi,
12 |   X,
13 |   nleqslv.control,
14 |   ncols,
15 |   initParam,
16 |   v,
17 |   dynamic = FALSE,
18 |   envRange,
19 |   allowMissingness,
20 |   naId
21 | )
22 | }
23 | \arguments{
24 | \item{design}{an n-by-v design matrix}
25 | 
26 | \item{thetas}{a vector of dispersion parameters of length p}
27 | 
28 | \item{muMarg}{an offset matrix}
29 | 
30 | \item{psi}{a scalar, the importance parameter}
31 | 
32 | \item{X}{the data matrix}
33 | 
34 | \item{nleqslv.control}{a list of control elements, passed on to nleqslv()}
35 | 
36 | \item{ncols}{an integer, the number of columns of X}
37 | 
38 | \item{initParam}{a v-by-p matrix of initial parameter estimates}
39 | 
40 | \item{v}{an integer, the number of parameters per taxon}
41 | 
42 | \item{dynamic}{a boolean, should response function be determined dynamically?
43 | See details}
44 | 
45 | \item{envRange}{a vector of length 2, giving the range of observed
46 | environmental scores}
47 | 
48 | \item{allowMissingness}{A boolean, are missing values present}
49 | 
50 | \item{naId}{The numeric index of the missing values in X
51 | 
52 | If dynamic is TRUE, quadratic response functions are fitted for every taxon.
53 | If the optimum falls outside of the observed range of environmental scores,
54 | a linear response function is fitted instead}
55 | }
56 | \value{
57 | a v-by-p matrix of parameters of the response function
58 | }
59 | \description{
60 | A function to estimate the taxon-wise NB-params
61 | }
62 | 


--------------------------------------------------------------------------------
/man/estNBparamsNoLab.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_estNBparamsNoLab.R
 3 | \name{estNBparamsNoLab}
 4 | \alias{estNBparamsNoLab}
 5 | \title{A function to estimate the NB-params ignoring the taxon labels}
 6 | \usage{
 7 | estNBparamsNoLab(
 8 |   design,
 9 |   thetasMat,
10 |   muMarg,
11 |   psi,
12 |   X,
13 |   nleqslv.control,
14 |   initParam,
15 |   n,
16 |   v,
17 |   dynamic,
18 |   envRange,
19 |   preFabMat,
20 |   allowMissingness,
21 |   naId
22 | )
23 | }
24 | \arguments{
25 | \item{design}{an n-by-v design matrix}
26 | 
27 | \item{thetasMat}{A matrix of dispersion parameters}
28 | 
29 | \item{muMarg}{an offset matrix}
30 | 
31 | \item{psi}{a scalar, the importance parameter}
32 | 
33 | \item{X}{the data matrix}
34 | 
35 | \item{nleqslv.control}{a list of control elements, passed on to nleqslv()}
36 | 
37 | \item{initParam}{a vector of length v of initial parameter estimates}
38 | 
39 | \item{n}{an integer, the number of samples}
40 | 
41 | \item{v}{an integer, the number of parameters per taxon}
42 | 
43 | \item{dynamic}{a boolean, should response function be determined dynamically?
44 | See details}
45 | 
46 | \item{envRange}{a vector of length 2,
47 | giving the range of observed environmental scores}
48 | 
49 | \item{preFabMat}{a pre-fabricated auxiliary matrix}
50 | 
51 | \item{allowMissingness}{A boolean, are missing values present}
52 | 
53 | \item{naId}{The numeric index of the missing values in X
54 | 
55 | If dynamic is TRUE, quadratic response functions are fitted for every taxon.
56 |  If the optimum falls outside of the observed range of environmental scores,
57 |  a linear response function is fitted instead}
58 | }
59 | \value{
60 | a v-by-p matrix of parameters of the response function
61 | }
62 | \description{
63 | A function to estimate the NB-params ignoring the taxon labels
64 | }
65 | 


--------------------------------------------------------------------------------
/man/estNPresp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_estNPresp.R
 3 | \name{estNPresp}
 4 | \alias{estNPresp}
 5 | \title{Estimate the taxon-wise response functions non-parametrically}
 6 | \usage{
 7 | estNPresp(
 8 |   sampleScore,
 9 |   muMarg,
10 |   X,
11 |   ncols,
12 |   thetas,
13 |   n,
14 |   coefInit,
15 |   coefInitOverall,
16 |   dfSpline,
17 |   vgamMaxit,
18 |   degree,
19 |   verbose,
20 |   allowMissingness,
21 |   naId,
22 |   ...
23 | )
24 | }
25 | \arguments{
26 | \item{sampleScore}{a vector of length n with environmental scores}
27 | 
28 | \item{muMarg}{the offset matrix}
29 | 
30 | \item{X}{the n-by-p data matrix}
31 | 
32 | \item{ncols}{an integer, the number of columns of X}
33 | 
34 | \item{thetas}{a vector of length p with dispersion parameters}
35 | 
36 | \item{n}{an integer, the number of samples}
37 | 
38 | \item{coefInit}{a 2-by-p matrix with current taxon-wise parameter estimates}
39 | 
40 | \item{coefInitOverall}{a vector of length 2 with current overall parameters}
41 | 
42 | \item{dfSpline}{a scalar, the degrees of freedom for the smoothing spline.}
43 | 
44 | \item{vgamMaxit}{Maximal number of iterations in the fitting of the GAM model}
45 | 
46 | \item{degree}{The degree if the parametric fit if the VGAM fit fails}
47 | 
48 | \item{verbose}{a boolean, should number of failed fits be reported}
49 | 
50 | \item{allowMissingness}{A boolean, are missing values present}
51 | 
52 | \item{naId}{The numeric index of the missing values in X}
53 | 
54 | \item{...}{further arguments, passed on to the VGAM:::vgam() function
55 | 
56 | The negative binomial likelihood is still maximized,
57 |  but now the response function is a non-parametric one.
58 |  To avoid a perfect fit and overly flexible functions,
59 |  we enforce smoothness restrictions. In practice we use a
60 |  generalized additive model (GAM), i.e. with splines.
61 |  The same fitting procedure is carried out ignoring species labels.
62 |   We do not normalize the parameters related to the splines:
63 |   the psis can be calculated afterwards.}
64 | }
65 | \value{
66 | A list with components
67 | \item{taxonCoef}{The fitted coefficients of the sample-wise response curves}
68 | \item{splinesList}{A list of all the B-spline objects}
69 | \item{rowMar}{The row matrix}
70 | \item{overall}{The overall fit ignoring taxon labels,
71 |  as a list of coefficients and a spline}
72 | \item{rowVecOverall}{The overall row vector, ignoring taxon labels}
73 | }
74 | \description{
75 | Estimate the taxon-wise response functions non-parametrically
76 | }
77 | 


--------------------------------------------------------------------------------
/man/extractCoord.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_extractCoord.R
 3 | \name{extractCoord}
 4 | \alias{extractCoord}
 5 | \title{A function to extract plotting coordinates, either for plot.RCM
 6 |  or to export to other plotting software}
 7 | \usage{
 8 | extractCoord(RCM, Dim = c(1, 2))
 9 | }
10 | \arguments{
11 | \item{RCM}{an RCm object}
12 | 
13 | \item{Dim}{an integer vector of required dimensions
14 | 
15 | The parameters for the ellipses of the quadratic response function come
16 |  from the parametrization f(x) = a*x^2 + b*x + c
17 | For an unconstrained object the row and column coordinates are returned
18 |  in separate matrices. The row names will correspond to the labels.
19 |  For a constrained analysis also the variable points are returned.
20 |  All variables still need to be scaled to optimally fill the available space}
21 | }
22 | \value{
23 | A list with components
24 | \item{samples}{A dataframe of sample scores}
25 | \item{species}{A dataframe of column scores, with origin, slope,
26 | end and ellipse coordinates as needed}
27 | \item{variables}{A dataframe of variable scores,
28 | loadings of the environmental gradient}
29 | }
30 | \description{
31 | A function to extract plotting coordinates, either for plot.RCM
32 |  or to export to other plotting software
33 | }
34 | \examples{
35 | data(Zeller)
36 | require(phyloseq)
37 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
38 | prune_samples(sample_names(Zeller)[1:50], Zeller))
39 | zellerRCM = RCM(tmpPhy, k = 2, round = TRUE)
40 | coordsZeller = extractCoord(zellerRCM)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/extractE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_extractE.R
 3 | \name{extractE}
 4 | \alias{extractE}
 5 | \title{A function to extract a matrix of expected values
 6 |  for any dimension of the fit}
 7 | \usage{
 8 | extractE(rcm, Dim = rcm$k)
 9 | }
10 | \arguments{
11 | \item{rcm}{an object of class RCM}
12 | 
13 | \item{Dim}{the desired dimension. Defaults to the maximum of the fit.
14 | Choose 0 for the independence model, 0.5 for the confounders filter model.}
15 | }
16 | \value{
17 | The matrix of expected values
18 | }
19 | \description{
20 | A function to extract a matrix of expected values
21 |  for any dimension of the fit
22 | }
23 | 


--------------------------------------------------------------------------------
/man/filterConfounders.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_filterConfounders.R
 3 | \name{filterConfounders}
 4 | \alias{filterConfounders}
 5 | \title{Filters out the effect of known confounders. This is done by fitting
 6 | interactions of every taxon with the levels of the confounders.
 7 | It returns a modified offset matrix for the remainder
 8 | of the fitting procedure.}
 9 | \usage{
10 | filterConfounders(
11 |   muMarg,
12 |   confMat,
13 |   X,
14 |   thetas,
15 |   p,
16 |   n,
17 |   nleqslv.control,
18 |   trended.dispersion,
19 |   tol = 0.001,
20 |   maxIt = 20,
21 |   allowMissingness,
22 |   naId
23 | )
24 | }
25 | \arguments{
26 | \item{muMarg}{a nxp matrix, the current offset}
27 | 
28 | \item{confMat}{a nxt confounder matrix}
29 | 
30 | \item{X}{the nxp data matrix}
31 | 
32 | \item{thetas}{a vector of length p with the current dispersion estimates}
33 | 
34 | \item{p}{an integer, the number of columns of X}
35 | 
36 | \item{n}{an integer, the number of rows of X}
37 | 
38 | \item{nleqslv.control}{see nleqslv()}
39 | 
40 | \item{trended.dispersion}{a vector of length p
41 | with trended dispersion estimates}
42 | 
43 | \item{tol}{a scalar, the convergence tolerance}
44 | 
45 | \item{maxIt}{maximum number of iterations}
46 | 
47 | \item{allowMissingness}{A boolean, are missing values present}
48 | 
49 | \item{naId}{The numeric index of the missing values in X
50 | 
51 | Fits the negative binomial mean parameters and overdispersion parameters
52 |  iteratively.
53 |  Convergence is determined based on the L2-norm
54 |   of the absolute change of mean parameters}
55 | }
56 | \value{
57 | a list with components:
58 | \item{thetas}{new theta estimates}
59 | \item{NB_params}{The estimated parameters of the interaction terms}
60 | }
61 | \description{
62 | Filters out the effect of known confounders. This is done by fitting
63 | interactions of every taxon with the levels of the confounders.
64 | It returns a modified offset matrix for the remainder
65 | of the fitting procedure.
66 | }
67 | 


--------------------------------------------------------------------------------
/man/getDevMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getDevMat.R
 3 | \name{getDevMat}
 4 | \alias{getDevMat}
 5 | \title{ACalculate the matrix of deviance residuals}
 6 | \usage{
 7 | getDevMat(X, thetaMat, mu)
 8 | }
 9 | \arguments{
10 | \item{X}{the data matrix}
11 | 
12 | \item{thetaMat}{the matrix of dispersions}
13 | 
14 | \item{mu}{the matrix of means}
15 | }
16 | \value{
17 | The matrix of deviance residuals
18 | }
19 | \description{
20 | ACalculate the matrix of deviance residuals
21 | }
22 | 


--------------------------------------------------------------------------------
/man/getDevianceRes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getDevianceRes.R
 3 | \name{getDevianceRes}
 4 | \alias{getDevianceRes}
 5 | \title{A function to calculate the matrix of deviance residuals.}
 6 | \usage{
 7 | getDevianceRes(RCM, Dim = RCM$k)
 8 | }
 9 | \arguments{
10 | \item{RCM}{an RCM object}
11 | 
12 | \item{Dim}{The dimensions to use
13 | 
14 | For the deviance residuals we use the overdispersions from the reduced model.
15 | Standard dimensions used are only first and second,
16 |  since these are also plotted}
17 | }
18 | \value{
19 | A matrix with deviance residuals of the same size
20 | as the original data matrix
21 | }
22 | \description{
23 | A function to calculate the matrix of deviance residuals.
24 | }
25 | \examples{
26 | data(Zeller)
27 | require(phyloseq)
28 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:120],
29 | prune_samples(sample_names(Zeller)[1:75], Zeller))
30 | #Subset for a quick fit
31 | zellerRCM = RCM(tmpPhy, k = 2, round = TRUE, prevCutOff = 0.03)
32 | devRes = getDevianceRes(zellerRCM)
33 | }
34 | 


--------------------------------------------------------------------------------
/man/getDistCoord.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getDistCoord.R
 3 | \name{getDistCoord}
 4 | \alias{getDistCoord}
 5 | \title{Get coordinates of a distance object of n observations for the provided indices}
 6 | \usage{
 7 | getDistCoord(indices, n)
 8 | }
 9 | \arguments{
10 | \item{indices}{The row indices for which distance indices are wanted}
11 | 
12 | \item{n}{The total number of objects in the distance matrix}
13 | }
14 | \value{
15 | a vector of coordinates
16 | }
17 | \description{
18 | Get coordinates of a distance object of n observations for the provided indices
19 | }
20 | 


--------------------------------------------------------------------------------
/man/getInflCol.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getInflCol.R
 3 | \name{getInflCol}
 4 | \alias{getInflCol}
 5 | \title{A function to extract the influence for a given parameter index}
 6 | \usage{
 7 | getInflCol(score, InvJac, taxon)
 8 | }
 9 | \arguments{
10 | \item{score}{a score matrix}
11 | 
12 | \item{InvJac}{The inverted jacobian}
13 | 
14 | \item{taxon}{The taxon name or index}
15 | }
16 | \value{
17 | A matrix with all observations' influence on the given taxon
18 | }
19 | \description{
20 | A function to extract the influence for a given parameter index
21 | }
22 | 


--------------------------------------------------------------------------------
/man/getInflRow.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getInflRow.R
 3 | \name{getInflRow}
 4 | \alias{getInflRow}
 5 | \title{Extract the influence of all observations on a given row score}
 6 | \usage{
 7 | getInflRow(score, InvJac, sample)
 8 | }
 9 | \arguments{
10 | \item{score}{the score function evaluated for every observation}
11 | 
12 | \item{InvJac}{The inverse jacobian}
13 | 
14 | \item{sample}{the row score or  sample index}
15 | }
16 | \value{
17 | A matrix with all observations' influence on the row score
18 | }
19 | \description{
20 | Extract the influence of all observations on a given row score
21 | }
22 | 


--------------------------------------------------------------------------------
/man/getInt.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getInt.R
 3 | \name{getInt}
 4 | \alias{getInt}
 5 | \title{Integrate the spline of an vgam object}
 6 | \usage{
 7 | getInt(coef, spline, sampleScore, stop.on.error = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{coef}{A vector of coefficients}
11 | 
12 | \item{spline}{The cubic smoothing spline}
13 | 
14 | \item{sampleScore}{the observed environmental scores}
15 | 
16 | \item{stop.on.error}{see ?integrate}
17 | 
18 | \item{...}{additional arguments passed on to integrate()}
19 | }
20 | \value{
21 | a scalar, the value of the integral
22 | }
23 | \description{
24 | Integrate the spline of an vgam object
25 | }
26 | 


--------------------------------------------------------------------------------
/man/getLogLik.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getLogLik.R
 3 | \name{getLogLik}
 4 | \alias{getLogLik}
 5 | \title{Extract the logged likelihood of every count}
 6 | \usage{
 7 | getLogLik(rcm, Dim)
 8 | }
 9 | \arguments{
10 | \item{rcm}{an RCM object}
11 | 
12 | \item{Dim}{A vector of integers indicating which dimensions to take along,
13 | or Inf for the saturated model, or 0 for the independence model}
14 | }
15 | \value{
16 | A matrix with logged likelihood of the size of the data matrix
17 | }
18 | \description{
19 | Extract the logged likelihood of every count
20 | }
21 | 


--------------------------------------------------------------------------------
/man/getModelMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getModelMat.R
 3 | \name{getModelMat}
 4 | \alias{getModelMat}
 5 | \title{A function to construct a model matrix of a certain degree}
 6 | \usage{
 7 | getModelMat(y, degree)
 8 | }
 9 | \arguments{
10 | \item{y}{the variable}
11 | 
12 | \item{degree}{the degree}
13 | }
14 | \value{
15 | A model matrix with degree+1 columns and as many rows as lenght(y)
16 | }
17 | \description{
18 | A function to construct a model matrix of a certain degree
19 | }
20 | 


--------------------------------------------------------------------------------
/man/getRowMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_getRowMat.R
 3 | \name{getRowMat}
 4 | \alias{getRowMat}
 5 | \title{Return a matrix of row scores}
 6 | \usage{
 7 | getRowMat(sampleScore, responseFun, NB_params, taxonCoef, spline)
 8 | }
 9 | \arguments{
10 | \item{sampleScore}{a vector of length n with sample scores}
11 | 
12 | \item{responseFun}{a character string, the type of response function,
13 | either 'linear' or 'quadratic'}
14 | 
15 | \item{NB_params}{a v-by-p matrix of parameters of theresponse function}
16 | 
17 | \item{taxonCoef}{A vector of coefficients}
18 | 
19 | \item{spline}{The cubic smoothing spline
20 | 
21 | Multiplying the old offset with the exponent matrix times
22 | the importance parameter obtains the new one based on lower dimension}
23 | }
24 | \value{
25 | a n-by-p matrix of scores
26 | }
27 | \description{
28 | Return a matrix of row scores
29 | }
30 | 


--------------------------------------------------------------------------------
/man/heq_nb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_heq_nb.R
 3 | \name{heq_nb}
 4 | \alias{heq_nb}
 5 | \title{Define linear equality constraints for env. gradient}
 6 | \usage{
 7 | heq_nb(Alpha, alphaK, d, k, centMat, ...)
 8 | }
 9 | \arguments{
10 | \item{Alpha}{the current estimate of the environmental gradient}
11 | 
12 | \item{alphaK}{a matrix with the environmental gradients
13 | of the lower dimensions}
14 | 
15 | \item{d}{an integer, the number of environmental variables, including dummies}
16 | 
17 | \item{k}{an integer, the current dimension}
18 | 
19 | \item{centMat}{a centering matrix}
20 | 
21 | \item{...}{further arguments for other methods, not needed in this one
22 | 
23 | The centering matrix centMat ensures that the parameters of the dummies
24 | of the same categorical variable sum to zero}
25 | }
26 | \value{
27 | a vector of with current values of the constraints,
28 |  should evolve to zeroes only
29 | }
30 | \description{
31 | Define linear equality constraints for env. gradient
32 | }
33 | 


--------------------------------------------------------------------------------
/man/heq_nb_jac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_heq_nb_jac.R
 3 | \name{heq_nb_jac}
 4 | \alias{heq_nb_jac}
 5 | \title{The jacobian of the linear equality constraints}
 6 | \usage{
 7 | heq_nb_jac(Alpha, alphaK, d, k, centMat, ...)
 8 | }
 9 | \arguments{
10 | \item{Alpha}{the current estimate of the environmental gradient}
11 | 
12 | \item{alphaK}{a matrix with the environmental gradients
13 | of the lower dimensions}
14 | 
15 | \item{d}{an integer, the number of environmental variables,
16 | including dummies}
17 | 
18 | \item{k}{an integer, the current dimension}
19 | 
20 | \item{centMat}{a centering matrix}
21 | 
22 | \item{...}{further arguments for other methods, not needed in this one}
23 | }
24 | \value{
25 | The jacobian matrix
26 | }
27 | \description{
28 | The jacobian of the linear equality constraints
29 | }
30 | 


--------------------------------------------------------------------------------
/man/indentPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_indentPlot.R
 3 | \name{indentPlot}
 4 | \alias{indentPlot}
 5 | \title{Functions to indent the plot to include the entire labels}
 6 | \usage{
 7 | indentPlot(plt, xInd = 0, yInd = 0)
 8 | }
 9 | \arguments{
10 | \item{plt}{a ggplot object}
11 | 
12 | \item{xInd}{a scalar or a vector of length 2,
13 | specifying the indentation left and right of the plot to allow for the labels
14 |  to be printed entirely}
15 | 
16 | \item{yInd}{a a scalar or a vector of length 2,
17 | specifying the indentation top and bottom of the plot
18 |  to allow for the labels to be printed entirely}
19 | }
20 | \value{
21 | a ggplot object, squared
22 | }
23 | \description{
24 | Functions to indent the plot to include the entire labels
25 | }
26 | 


--------------------------------------------------------------------------------
/man/inertia.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_inertia.R
 3 | \name{inertia}
 4 | \alias{inertia}
 5 | \title{Calculate the log-likelihoods of all possible models}
 6 | \usage{
 7 | inertia(rcm)
 8 | }
 9 | \arguments{
10 | \item{rcm}{an object of the RCM class}
11 | }
12 | \value{
13 | A table with inertias, proportion inertia explained
14 | and cumulative proportion of inertia explained.
15 | }
16 | \description{
17 | Calculate the log-likelihoods of all possible models
18 | }
19 | \examples{
20 | data(Zeller)
21 | require(phyloseq)
22 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
23 | prune_samples(sample_names(Zeller)[1:50], Zeller))
24 | zellerRCM = RCM(tmpPhy, round = TRUE)
25 | inertia(zellerRCM)
26 | }
27 | 


--------------------------------------------------------------------------------
/man/liks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_liks.R
 3 | \name{liks}
 4 | \alias{liks}
 5 | \title{Calculate the log-likelihoods of all possible models}
 6 | \usage{
 7 | liks(rcm, Sum = TRUE)
 8 | }
 9 | \arguments{
10 | \item{rcm}{an object of the RCM class}
11 | 
12 | \item{Sum}{a boolean, should log-likelihoods be summed?}
13 | }
14 | \value{
15 | If Sum is FALSE, a named array log-likelihoods
16 | of the independence model and all models with dimension 1 to k,
17 | including after filtering on confounders.
18 | Otherwise a table with log-likelihoods,
19 | deviance explained and cumulative deviance explained.
20 | }
21 | \description{
22 | Calculate the log-likelihoods of all possible models
23 | }
24 | \examples{
25 | data(Zeller)
26 | require(phyloseq)
27 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
28 | prune_samples(sample_names(Zeller)[1:50], Zeller))
29 | zellerRCM = RCM(tmpPhy, round = TRUE)
30 | liks(zellerRCM)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/permanova.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_permanova.R
 3 | \name{permanova}
 4 | \alias{permanova}
 5 | \title{Perform a PERMANOVA analysis for group differences of a predefined cofactor using the pseudo F-statistic}
 6 | \usage{
 7 | permanova(
 8 |   rcmObj,
 9 |   groups,
10 |   nPerm = 10000,
11 |   Dim = seq_len(rcmObj$k),
12 |   verbose = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{rcmObj}{an RCM object}
17 | 
18 | \item{groups}{a factor of length n with cluster memberships, or a name of a variable contained in the RCM object}
19 | 
20 | \item{nPerm}{Number of permutations in the PERMANOVA}
21 | 
22 | \item{Dim}{Dimensions on which the test should be performed. Defaults to all dimensions of the fitted RCM object.}
23 | 
24 | \item{verbose}{a boolean, should output be printed?}
25 | }
26 | \value{
27 | A list with components
28 | \item{statistic}{The pseudo F-statistic}
29 | \item{p.value}{The p-value of the PERMANOVA}
30 | }
31 | \description{
32 | Perform a PERMANOVA analysis for group differences of a predefined cofactor using the pseudo F-statistic
33 | }
34 | \examples{
35 | data(Zeller)
36 | require(phyloseq)
37 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
38 | prune_samples(sample_names(Zeller)[1:50], Zeller))
39 | zellerRCM = RCM(tmpPhy, round = TRUE)
40 | zellerPermanova = permanova(zellerRCM, "Diagnosis")
41 | }
42 | \seealso{
43 | \code{\link{RCM}}
44 | }
45 | 


--------------------------------------------------------------------------------
/man/plotRespFun.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_plotRespFun.R
 3 | \name{plotRespFun}
 4 | \alias{plotRespFun}
 5 | \title{Plot the non-parametric response functions}
 6 | \usage{
 7 | plotRespFun(
 8 |   RCM,
 9 |   taxa = NULL,
10 |   type = "link",
11 |   logTransformYAxis = FALSE,
12 |   addSamples = TRUE,
13 |   samSize = NULL,
14 |   Dim = 1L,
15 |   nPoints = 100L,
16 |   labSize = 2.5,
17 |   yLocVar = NULL,
18 |   yLocSam = NULL,
19 |   Palette = "Set3",
20 |   addJitter = FALSE,
21 |   nTaxa = 9L,
22 |   angle = 90,
23 |   legendLabSize = 15,
24 |   legendTitleSize = 16,
25 |   axisLabSize = 14,
26 |   axisTitleSize = 16,
27 |   lineSize = 0.75,
28 |   ...
29 | )
30 | }
31 | \arguments{
32 | \item{RCM}{an RCM object}
33 | 
34 | \item{taxa}{a character vector of taxa to be plotted}
35 | 
36 | \item{type}{a character string, plot the response function on the log-scale
37 | ('link') or the abundance scale 'response', similar to predict.glm().}
38 | 
39 | \item{logTransformYAxis}{a boolean, should y-axis be log transformed?}
40 | 
41 | \item{addSamples}{a boolean, should sample points be shown?}
42 | 
43 | \item{samSize}{a sample variable name or a vector of length equal to the
44 | number of samples, for the sample sizes}
45 | 
46 | \item{Dim}{An integer, the dimension to be plotted}
47 | 
48 | \item{nPoints}{the number of points to be used to plot the lines}
49 | 
50 | \item{labSize}{the label size for the variables}
51 | 
52 | \item{yLocVar}{the y-location of the variables, recycled if necessary}
53 | 
54 | \item{yLocSam}{the y-location of the samples, recycled if necessary}
55 | 
56 | \item{Palette}{which color palette to use}
57 | 
58 | \item{addJitter}{A boolean, should variable names be jittered to make
59 | them more readable}
60 | 
61 | \item{nTaxa}{an integer, number of taxa to plot}
62 | 
63 | \item{angle}{angle at which variable labels should be turned}
64 | 
65 | \item{legendLabSize}{size of the legend labels}
66 | 
67 | \item{legendTitleSize}{size of the legend title}
68 | 
69 | \item{axisLabSize}{size of the axis labels}
70 | 
71 | \item{axisTitleSize}{size of the axis title}
72 | 
73 | \item{lineSize}{size of the response function lines}
74 | 
75 | \item{...}{Other argumens passed on to the ggplot() function}
76 | }
77 | \value{
78 | Plots a ggplot2-object to output
79 | }
80 | \description{
81 | Plots a number of response functions over the observed range of
82 |  the environmental score. If no taxa are provided those who react most
83 |  strongly to the environmental score are chosen.
84 | }
85 | \examples{
86 | data(Zeller)
87 | require(phyloseq)
88 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:100],
89 | prune_samples(sample_names(Zeller)[1:50], Zeller))
90 | #Subset for a quick fit
91 | zellerRCMnp = RCM(tmpPhy, k = 2,
92 | covariates = c('BMI','Age','Country','Diagnosis','Gender'),
93 | round = TRUE, responseFun = 'nonparametric')
94 | plotRespFun(zellerRCMnp)
95 | }
96 | \seealso{
97 | \code{\link{RCM}}, \code{\link{plot.RCM}},\code{\link{residualPlot}}
98 | }
99 | 


--------------------------------------------------------------------------------
/man/residualPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_residualPlot.R
 3 | \name{residualPlot}
 4 | \alias{residualPlot}
 5 | \title{Make residual plots}
 6 | \usage{
 7 | residualPlot(
 8 |   RCM,
 9 |   Dim = 1,
10 |   whichTaxa = "response",
11 |   resid = "Deviance",
12 |   numTaxa = 9,
13 |   mfrow = NULL,
14 |   samColour = NULL,
15 |   samShape = NULL,
16 |   legendLabSize = 15,
17 |   legendTitleSize = 16,
18 |   axisLabSize = 14,
19 |   axisTitleSize = 16,
20 |   taxTitle = TRUE,
21 |   h = 0
22 | )
23 | }
24 | \arguments{
25 | \item{RCM}{an RCM object}
26 | 
27 | \item{Dim}{an integer, which dimension?}
28 | 
29 | \item{whichTaxa}{a character string or a character vector,
30 | for which taxa to plot the diagnostic plots}
31 | 
32 | \item{resid}{the type of residuals to use, either 'Deviance' or 'Pearson'}
33 | 
34 | \item{numTaxa}{an integer, the number of taxa to plot}
35 | 
36 | \item{mfrow}{passed on to par().
37 | If not supplied will be calculated based on numTaxa}
38 | 
39 | \item{samColour, samShape}{Vectors or character strings denoting
40 | the sample colour and shape respectively. If character string is provided,
41 | the variables with this name is extracted from the phyloseq object in RCM}
42 | 
43 | \item{legendLabSize}{size of the legend labels}
44 | 
45 | \item{legendTitleSize}{size of the legend title}
46 | 
47 | \item{axisLabSize}{size of the axis labels}
48 | 
49 | \item{axisTitleSize}{size of the axis title}
50 | 
51 | \item{taxTitle}{A boolean, should taxon title be printed}
52 | 
53 | \item{h}{Position of reference line. Set to NA for no line}
54 | }
55 | \value{
56 | Plots a ggplot2-object to output
57 | }
58 | \description{
59 | Make residual plots
60 | }
61 | \details{
62 | If whichTaxa is 'run' or 'response' the taxa with the highest
63 | run statistics or steepest slopes of the response function are plotted,
64 |  numTax indicates the number. If whichTaxa is a character vector,
65 |  these are interpreted as taxon names to plot.
66 |  This function is mainly meant for linear response functions,
67 |  but can be used for others too.
68 |  The runs test statistic from the tseries package is used.
69 | }
70 | \examples{
71 | data(Zeller)
72 | require(phyloseq)
73 | tmpPhy = prune_taxa(taxa_names(Zeller)[1:120],
74 | prune_samples(sample_names(Zeller)[1:75], Zeller))
75 | #Subset for a quick fit
76 | zellerRCMlin = RCM(tmpPhy, k = 2,
77 | covariates = c('BMI','Age','Country','Diagnosis','Gender'),
78 | responseFun = 'linear', round = TRUE, prevCutOff = 0.03)
79 | residualPlot(zellerRCMlin)
80 | }
81 | \seealso{
82 | \code{\link{RCM}}
83 | }
84 | 


--------------------------------------------------------------------------------
/man/respFunJacMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_respFunJacMat.R
 3 | \name{respFunJacMat}
 4 | \alias{respFunJacMat}
 5 | \title{Calculates the Jacobian of the parametric response functions}
 6 | \usage{
 7 | respFunJacMat(
 8 |   betas,
 9 |   X,
10 |   reg,
11 |   thetaMat,
12 |   muMarg,
13 |   psi,
14 |   v,
15 |   p,
16 |   IDmat,
17 |   IndVec,
18 |   allowMissingness,
19 |   naId
20 | )
21 | }
22 | \arguments{
23 | \item{betas}{a vector of length (deg+1)*(p+1) with regression parameters
24 | with deg the degree of the response function and the lagrangian multipliers}
25 | 
26 | \item{X}{the nxp data matrix}
27 | 
28 | \item{reg}{a vector of regressors with the dimension n-by-v}
29 | 
30 | \item{thetaMat}{The n-by-p matrix with dispersion parameters}
31 | 
32 | \item{muMarg}{offset matrix of size nxp}
33 | 
34 | \item{psi}{a scalar, the importance parameter}
35 | 
36 | \item{v}{an integer, one plus the degree of the response function}
37 | 
38 | \item{p}{an integer, the number of taxa}
39 | 
40 | \item{IDmat}{an logical matrix with indices of non-zero elements}
41 | 
42 | \item{IndVec}{a vector with indices with non-zero elements}
43 | 
44 | \item{allowMissingness}{A boolean, are missing values present}
45 | 
46 | \item{naId}{The numeric index of the missing values in X}
47 | }
48 | \value{
49 | The jacobian, a square matrix of dimension (deg+1)*(p+1)
50 | }
51 | \description{
52 | Calculates the Jacobian of the parametric response functions
53 | }
54 | 


--------------------------------------------------------------------------------
/man/respFunScoreMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_respFunScoreMat.R
 3 | \name{respFunScoreMat}
 4 | \alias{respFunScoreMat}
 5 | \title{Derivative of the Lagrangian of the parametric response function}
 6 | \usage{
 7 | respFunScoreMat(
 8 |   betas,
 9 |   X,
10 |   reg,
11 |   thetaMat,
12 |   muMarg,
13 |   psi,
14 |   p,
15 |   v,
16 |   allowMissingness,
17 |   naId,
18 |   ...
19 | )
20 | }
21 | \arguments{
22 | \item{betas}{a vector of length (deg+1)*(p+1) with regression parameters with
23 | deg the degree of the response function and the lagrangian multipliers}
24 | 
25 | \item{X}{the nxp data matrix}
26 | 
27 | \item{reg}{a matrix of regressors with the dimension nx(deg+1)}
28 | 
29 | \item{thetaMat}{The n-by-p matrix with dispersion parameters}
30 | 
31 | \item{muMarg}{offset matrix of size nxp}
32 | 
33 | \item{psi}{a scalar, the importance parameter}
34 | 
35 | \item{p}{an integer, the number of taxa}
36 | 
37 | \item{v}{an integer, one plus the degree of the response function}
38 | 
39 | \item{allowMissingness}{A boolean, are missing values present}
40 | 
41 | \item{naId}{The numeric index of the missing values in X}
42 | 
43 | \item{...}{further arguments passed on to the jacobian
44 | 
45 | The parameters are restricted to be normalized, i.e. all squared intercepts,
46 |  first order and second order parameters sum to 1}
47 | }
48 | \value{
49 | The evaluation of the score functions, a vector of length (p+1)*
50 | (deg+1)
51 | }
52 | \description{
53 | Derivative of the Lagrangian of the parametric response function
54 | }
55 | 


--------------------------------------------------------------------------------
/man/rowMultiply.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_rowMultiply.R
 3 | \name{rowMultiply}
 4 | \alias{rowMultiply}
 5 | \title{A function to efficiently row multiply a matrix and a vector}
 6 | \usage{
 7 | rowMultiply(matrix, vector)
 8 | }
 9 | \arguments{
10 | \item{matrix}{a numeric matrix of dimension a-by-b}
11 | 
12 | \item{vector}{a numeric vector of length b
13 | 
14 | t(t(matrix)*vector) but then faster}
15 | }
16 | \value{
17 | a matrix, row multplied by the vector
18 | }
19 | \description{
20 | A function to efficiently row multiply a matrix and a vector
21 | }
22 | \details{
23 | Memory intensive but that does not matter with given matrix sizes
24 | }
25 | 


--------------------------------------------------------------------------------
/man/seq_k.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_seq_k.R
 3 | \name{seq_k}
 4 | \alias{seq_k}
 5 | \title{A small auxiliary function for the length of the lambdas}
 6 | \usage{
 7 | seq_k(y, nLambda1s = 1)
 8 | }
 9 | \arguments{
10 | \item{y}{an integer, the current dimension}
11 | 
12 | \item{nLambda1s}{the number of centering restrictions}
13 | }
14 | \value{
15 | a vector containing the ranks of the current lagrangian multipliers
16 | }
17 | \description{
18 | A small auxiliary function for the length of the lambdas
19 | }
20 | 


--------------------------------------------------------------------------------
/man/trimOnConfounders.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/F_trimOnConfounders.R
 3 | \name{trimOnConfounders}
 4 | \alias{trimOnConfounders}
 5 | \title{Trim based on confounders to avoid taxa with only zero counts}
 6 | \usage{
 7 | trimOnConfounders(confounders, X, prevCutOff, minFraction, n)
 8 | }
 9 | \arguments{
10 | \item{confounders}{a nxt confounder matrix}
11 | 
12 | \item{X}{the nxp data matrix}
13 | 
14 | \item{prevCutOff}{a scalar between 0 and 1, the prevalence cut off}
15 | 
16 | \item{minFraction}{a scalar between 0 and 1,
17 | each taxon's total abundance should equal at least the number of samples n
18 |  times minFraction, otherwise it is trimmed}
19 | 
20 | \item{n}{the number of samples
21 | 
22 | Should be called prior to fitting the independence model}
23 | }
24 | \value{
25 | A trimmed data matrix nxp'
26 | }
27 | \description{
28 | Trim based on confounders to avoid taxa with only zero counts
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(RCM)
3 | 
4 | test_check("RCM")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ArrayProd.R:
--------------------------------------------------------------------------------
 1 | x = array(c(1,2,5,3,4,9,5,2,5,4,5,6,4,1,1,2,1,3,5,1,4,5,6,9), dim = c(2,3,4))
 2 | y = c(1,1,2,4)
 3 | 
 4 | context("Array product")
 5 | 
 6 | test_that("Fast array product yields correct outcome", {
 7 |   expect_equal(
 8 |   RCM:::arrayprod(x = x, y = y),
 9 | apply(x, c(1,2), function(z, y){sum(z*y)}, y = y))
10 | }
11 | )
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test-RCMinput.R:
--------------------------------------------------------------------------------
 1 | context("RCM input")
 2 | 
 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(100)],
 4 |  prune_samples(sample_names(Zeller)[seq_len(50)], Zeller))
 5 | 
 6 | test_that("RCM throws warnings for integer variables", {
 7 |  expect_warning(RCM(tmpPhy, covariates = c("Age","Diagnosis"), k = 1))
 8 | })
 9 | 
10 | test_that("RCM throws errors for wrong input type", {
11 |   expect_error(RCM("Zeller", covariates = c("Diagnosis", "Country", "Gender"), k = 1))
12 | })
13 | 
14 | test_that("RCM throws errors when only one covariate with one level supplied", {
15 |   expect_error(suppressWarnings(RCM(Zeller, covariates = "Age", k = 1)))
16 | })
17 | 
18 | test_that("RCM throws warning when less covariate combinations than samples supplied", {
19 |   expect_warning(RCM(Zeller, covariates = c("Diagnosis", "Country"), k = 1))
20 | })
21 | 
22 | test_that("RCM throws errors when NAs present in data matrix", {
23 |   expect_error(RCM(matrix(c(1,2,3,NA),2,2), covariates = "Age", k = 1))
24 | })
25 | 
26 | sample_data(Zeller)$bogusVariable = get_variable(Zeller, "Age") +
27 |   as.integer(get_variable(Zeller,"Gender"))
28 | 
29 | test_that("RCM throws errors when covariates are aliased", {
30 |     expect_error(suppressWarnings(RCM(Zeller, covariates = c("Diagnosis", "Country",
31 |                                           "Gender","Age", "bogusVariable"),
32 |                    k = 2)))
33 | })
34 | 
35 | test_that("RCM throws errors when confounders are aliased", {
36 |   expect_error(RCM(Zeller, confounders = c("Diagnosis", "Country",
37 |                                           "Gender","Age", "bogusVariable"),
38 |                    k = 2))
39 | })
40 | 


--------------------------------------------------------------------------------
/tests/testthat/test-RCMoutput.R:
--------------------------------------------------------------------------------
 1 | context("RCM output")
 2 | 
 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(150)],
 4 |                     prune_samples(sample_names(Zeller)[seq_len(100)], Zeller))
 5 | 
 6 | test_that("RCM returns element of class RCM", {
 7 |   expect_is(RCM(tmpPhy, k = 1), "RCM")
 8 | })
 9 | 
10 | test_that("RCM returns phyloseq object", {
11 |   expect_is(RCM(tmpPhy, k = 1)$physeq, "phyloseq")
12 | })
13 | 
14 | test_that("RCM throws warning when not converged", {
15 |   expect_warning(RCM(tmpPhy, k = 1, maxItOut = 2L))
16 | })
17 | 
18 | #Introduce some NAs
19 | tmpPhyNA = transform_sample_counts(tmpPhy, fun = function(x){
20 |     x[sample(length(x), size = 3)] = NA
21 |     x
22 | })
23 | misUnconstr <- suppressWarnings(RCM(tmpPhyNA, k = 2, allowMissingness = TRUE))
24 | suppressWarnings(misConstrLin <- RCM(tmpPhyNA, k = 2, allowMissingness = TRUE,
25 |                     covariates = c("Diagnosis", "Country", "Gender", "BMI"),
26 |                     confounders = "Age"))
27 | suppressWarnings(misConstrNP <- RCM(tmpPhyNA, k = 2, allowMissingness = TRUE,
28 |                    covariates = c("Diagnosis", "Country", "Gender", "BMI"),
29 |                    confounders = "Age", responseFun = "nonparametric"))
30 | test_that("RCM allows for missingness", {
31 |   expect_is(misUnconstr, "RCM")
32 |   expect_is(misConstrLin, "RCM")
33 |   expect_is(misConstrNP, "RCM")
34 | })
35 | test_that("All plotting functions still work with missing data", {
36 |   expect_silent(plot(misUnconstr))
37 |   expect_silent(plot(misConstrLin))
38 |   expect_silent(plot(misConstrNP))
39 |   expect_silent(plot(misUnconstr, samColour = "Deviance"))
40 |   expect_silent(plot(misUnconstr, taxCol = "Deviance", plotType = "species"))
41 |   expect_silent(plot(misUnconstr, inflVar = "psi"))
42 |   expect_silent(plot(misConstrLin, inflVar = "BMI"))
43 |   expect_warning(plotRespFun(misConstrNP))
44 | })
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ResidualPlot.R:
--------------------------------------------------------------------------------
 1 | context("residual plot")
 2 | 
 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(150)],
 4 |                     prune_samples(sample_names(Zeller)[seq_len(100)], Zeller))
 5 | unconstrRcm = RCM(tmpPhy, k = 2, allowMissingness = TRUE, confounders = "Age")
 6 | constrRcm = RCM(tmpPhy, k = 2, allowMissingness = TRUE,
 7 |                 covariates = c("Diagnosis", "Country", "Gender", "BMI"),
 8 |                 confounders = "Age")
 9 | test_that("Residual plotting works, also for higher dimensions", {
10 |   expect_silent(residualPlot(constrRcm))
11 |   expect_silent(residualPlot(constrRcm, Dim = 2))
12 | })
13 | test_that("Residual plotting returns errors for uncosntrained ordination", {
14 |   expect_error(residualPlot(unconstrRcm))
15 | })
16 | 
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test-checkAlias.R:
--------------------------------------------------------------------------------
 1 | context("Alias structure")
 2 | 
 3 | # A dataframe with alias structure
 4 | df = data.frame(foo = rnorm(10), baa = rep(c(TRUE, FALSE), each = 5),
 5 |                 foo2 = factor(rep(c("male", "female"), each = 5)))
 6 | 
 7 | test_that("Alias structure is discovered", {
 8 |   expect_error(checkAlias(df, names(df)))
 9 | })
10 | test_that("Alias structure is not falsely discovered", {
11 |   expect_silent(checkAlias(df, c("foo", "baa")))
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-permanova.R:
--------------------------------------------------------------------------------
 1 | context("permanova")
 2 | 
 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(150)],
 4 |                     prune_samples(sample_names(Zeller)[seq_len(100)], Zeller))
 5 | rcmFit = RCM(tmpPhy)
 6 | test_that("permanova runs as expected", {
 7 |   expect_is(permanova(rcmFit, "Diagnosis"), "list")
 8 |   expect_is(permanova(rcmFit, # Bogus, user-supplied grouping factor
 9 |                         sample(c("a", "b"), replace = TRUE, nsamples(tmpPhy))),
10 |               "list")
11 | })
12 | 
13 | test_that("permanova throws errros for wrong input", {
14 |   expect_error(permanova(tmpPhy, get_variable(tmpPy, "Diagnosis")))
15 |   expect_error(permanova(rcmFit, groups = c("a", "b")))
16 |   expect_error(permanova(rcmFit, groups = rep("a", nsamples(tmpPhy))))
17 |   expect_error(permanova(rcmFit, groups = c("b", rep("a", nsamples(tmpPhy)-1))))
18 | })
19 | 
20 | test_that("permanova throws warning for low number of permutations", {
21 |   expect_warning(permanova(rcmFit, "Diagnosis", nPerm = 20))
22 | })
23 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plotRCM.R:
--------------------------------------------------------------------------------
 1 | context("RCM plot")
 2 | 
 3 | tmpPhy = prune_taxa(taxa_names(Zeller)[seq_len(150)],
 4 |                     prune_samples(sample_names(Zeller)[seq_len(100)], Zeller))
 5 | unconstrRcm = RCM(tmpPhy, k = 2, allowMissingness = TRUE, confounders = "Age")
 6 | constrRcm = RCM(tmpPhy, k = 2, allowMissingness = TRUE,
 7 |     covariates = c("Diagnosis", "Country", "Gender", "BMI"),
 8 |     confounders = "Age")
 9 | #
10 | test_that("Inappropriate plotting commands throw errors", {
11 |   expect_error(plot(unconstrRcm, samColour = "Influence"))
12 |   expect_error(plot(constrRcm, inflVar = "Diagnosis", samColour = "Influence"))
13 | })
14 | 
15 | test_that("Correct plotting commands yield ggplot2 objects", {
16 |   expect_s3_class(plot(unconstrRcm, samColour = "Influence", inflVar = "psi"),
17 |                   "ggplot")
18 |   expect_s3_class(plot(constrRcm, inflVar = "DiagnosisNormal", samColour = "Influence",
19 |                        samShape = "Diagnosis"),
20 |                   "ggplot")
21 | })
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test-removeNA.R:
--------------------------------------------------------------------------------
 1 | context("Correct for missingness")
 2 | 
 3 | n = 50; p = 100
 4 | X = matrix(rnbinom(n*p, size = 1, mu = 2), n,p)
 5 | X[sample(seq_len(n*p), n)] = NA
 6 | mu = outer(rowSums(X, na.rm = TRUE), colSums(X, na.rm = TRUE))/
 7 |   sum(X, na.rm = TRUE)
 8 | 
 9 | test_that("correctXMissingness() removes all NAs", {
10 |   expect_true(anyNA(RCM:::correctXMissingness(X, mu, FALSE)))
11 |   expect_false(anyNA(RCM:::correctXMissingness(X, mu, TRUE)))
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-rowMultiply.R:
--------------------------------------------------------------------------------
 1 | context("rowMultiply")
 2 | 
 3 | n = 50; p = 100
 4 | mat = matrix(rnorm(n*p), n,p)
 5 | vec = rnorm(p)
 6 | 
 7 | test_that("Rowmultiply works", {
 8 |   expect_equal(rowMultiply(mat, vec), t(t(mat)*vec))
 9 | })
10 | 


--------------------------------------------------------------------------------
/tests/testthat/test-seq_k.R:
--------------------------------------------------------------------------------
1 | context("Index vector of lagrange multipliers of dimension k")
2 | 
3 | test_that("Number of lagrange multipliers are correct", {
4 |   expect_equal(seq_k(1), seq_len(2))
5 |   expect_equal(seq_k(2), 3:5)
6 |   expect_equal(seq_k(3), 6:9)
7 | })
8 | 


--------------------------------------------------------------------------------