├── .gitignore
├── AICc_PERMANOVA.R
├── AICc_compare.R
├── AICc_table_generation.R
├── PlotTaxaKD.R
├── README.md
├── anon_data.R
├── chisquared.R
├── eDNA_helpers.R
├── extract_taxa_TITAN.R
├── group_PERMANOVA.R
├── group_PERMANOVA_wP.R.R
├── group_lmPerm.R
├── group_lmer.R
├── repeat_multipatt.R
└── triplet_fixer.R


/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | 502_project_code_Wisconsin (2).R
 6 | MEE3_1_sm_Appendix_S1
 7 | Microbiome-master
 8 | R_Scripts.Rproj
 9 | fromSE.R
10 | *.zip
11 | test.csv
12 | 


--------------------------------------------------------------------------------
/AICc_PERMANOVA.R:
--------------------------------------------------------------------------------
  1 | # Function to calculate AICc for PERMANOVA. Requires input from adonis or adonis2 {vegan}
  2 | 
  3 | 
  4 | AICc.PERMANOVA <- function(adonis.model) {
  5 |     
  6 |     # check to see if object is an adonis model...
  7 |     
  8 |     if (!(adonis.model$aov.tab[1,1] >= 1))
  9 |         stop("object not output of adonis {vegan} ")
 10 |     
 11 |     # Ok, now extract appropriate terms from the adonis model
 12 |     # Calculating AICc using residual sum of squares (RSS) since I don't think that adonis returns something I can use as a liklihood function...
 13 |     
 14 |     RSS <- adonis.model$aov.tab[rownames(adonis.model$aov.tab) == "Residuals", "SumsOfSqs"]
 15 |     MSE <- adonis.model$aov.tab[rownames(adonis.model$aov.tab) == "Residuals", "MeanSqs"]
 16 |     
 17 |     k <- ncol(adonis.model$model.matrix)# + 1 # add one for error variance
 18 |     
 19 |     nn <- nrow(adonis.model$model.matrix)
 20 |     
 21 |     # AIC : 2*k + n*ln(RSS)
 22 |     # AICc: AIC + [2k(k+1)]/(n-k-1)
 23 | 
 24 |     # based on https://en.wikipedia.org/wiki/Akaike_information_criterion;
 25 |     # https://www.researchgate.net/post/What_is_the_AIC_formula;
 26 |     # http://avesbiodiv.mncn.csic.es/estadistica/ejemploaic.pdf
 27 |     
 28 |     # AIC.g is generalized version of AIC = 2k + n [Ln( 2(pi) RSS/n ) + 1]
 29 |     # AIC.pi = k + n [Ln( 2(pi) RSS/(n-k) ) +1],
 30 |     
 31 |     AIC <- 2*k + nn*log(RSS)
 32 |     AIC.g <- 2*k + nn * (1 + log( 2 * pi * RSS / nn))
 33 |     AIC.MSE <- 2*k + nn * log(MSE)
 34 |     AIC.pi <- k + nn*(1 + log( 2*pi*RSS/(nn-k) )   )
 35 |     AICc <- AIC + (2*k*(k + 1))/(nn - k - 1)
 36 |     AICc.MSE <- AIC.MSE + (2*k*(k + 1))/(nn - k - 1)
 37 |     AICc.pi <- AIC.pi + (2*k*(k + 1))/(nn - k - 1)
 38 |     
 39 |     output <- list("AIC" = AIC, "AIC.g" = AIC.g, "AICc" = AICc,
 40 |                    "AIC.MSE" = AIC.MSE, "AICc.MSE" = AICc.MSE,
 41 |                    "AIC.pi" = AIC.pi, "AICc.pi" = AICc.pi, "k" = k)
 42 |     
 43 |     return(output)   
 44 | 
 45 | }
 46 | 
 47 | AICc.PERMANOVA2 <- function(adonis2.model) {
 48 |     
 49 |     # check to see if object is an adonis2 model...
 50 |     
 51 |     if (is.na(adonis2.model$SumOfSqs[1]))
 52 |         stop("object not output of adonis2 {vegan} ")
 53 |     
 54 |     # Ok, now extract appropriate terms from the adonis model Calculating AICc
 55 |     # using residual sum of squares (RSS or SSE) since I don't think that adonis
 56 |     # returns something I can use as a likelihood function... maximum likelihood
 57 |     # and MSE estimates are the same when distribution is gaussian See e.g.
 58 |     # https://www.jessicayung.com/mse-as-maximum-likelihood/;
 59 |     # https://towardsdatascience.com/probability-concepts-explained-maximum-likelihood-estimation-c7b4342fdbb1
 60 |     # So using RSS or MSE estimates is fine as long as the residuals are
 61 |     # Gaussian https://robjhyndman.com/hyndsight/aic/ If models have different
 62 |     # conditional likelihoods then AIC is not valid. However, comparing models
 63 |     # with different error distributions is ok (above link).
 64 |     
 65 |     
 66 |     RSS <- adonis2.model$SumOfSqs[ length(adonis2.model$SumOfSqs) - 1 ]
 67 |     MSE <- RSS / adonis2.model$Df[ length(adonis2.model$Df) - 1 ]
 68 |     
 69 |     nn <- adonis2.model$Df[ length(adonis2.model$Df) ] + 1
 70 |     
 71 |     k <- nn - adonis2.model$Df[ length(adonis2.model$Df) - 1 ]
 72 |     
 73 |     
 74 |     # AIC : 2*k + n*ln(RSS/n)
 75 |     # AICc: AIC + [2k(k+1)]/(n-k-1)
 76 |     
 77 |     # based on https://en.wikipedia.org/wiki/Akaike_information_criterion;
 78 |     # https://www.statisticshowto.datasciencecentral.com/akaikes-information-criterion/ ;
 79 |     # https://www.researchgate.net/post/What_is_the_AIC_formula;
 80 |     # http://avesbiodiv.mncn.csic.es/estadistica/ejemploaic.pdf;
 81 |     # https://medium.com/better-programming/data-science-modeling-how-to-use-linear-regression-with-python-fdf6ca5481be 
 82 |     
 83 |     # AIC.g is generalized version of AIC = 2k + n [Ln( 2(pi) RSS/n ) + 1]
 84 |     # AIC.pi = k + n [Ln( 2(pi) RSS/(n-k) ) +1],
 85 |     
 86 |     AIC <- 2*k + nn*log(RSS/nn)
 87 |     AIC.g <- 2*k + nn * (1 + log( 2 * pi * RSS / nn))
 88 |     AIC.MSE <- 2*k + nn * log(MSE)
 89 |     AIC.pi <- k + nn*(1 + log( 2*pi*RSS/(nn-k) )   )
 90 |     AICc <- AIC + (2*k*(k + 1))/(nn - k - 1)
 91 |     AICc.MSE <- AIC.MSE + (2*k*(k + 1))/(nn - k - 1)
 92 |     AICc.pi <- AIC.pi + (2*k*(k + 1))/(nn - k - 1)
 93 |     
 94 |     output <- list("AIC" = AIC, "AICc" = AICc, "AIC.g" = AIC.g, 
 95 |                    "AIC.MSE" = AIC.MSE, "AICc.MSE" = AICc.MSE,
 96 |                    "AIC.pi" = AIC.pi, "AICc.pi" = AICc.pi, "k" = k, "N" = nn)
 97 |     
 98 |     return(output)   
 99 |     
100 | }
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/AICc_compare.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # Test script to automate AICc calculation after deciding which of your variables are significant. Like, you have 4 candidate variables and you want to compare 1, 2, 3, and 4 variable models based on AICc.
 3 | 
 4 | list.varcomb.AICc <- tibble(varibles = rep(0, 4*3*2),
 5 |                             AICc.values = rep(0))
 6 | 
 7 | for (i in 1:length(sig.incidence.vars.PERMANOVA)) {
 8 | 
 9 |     for (j in 1:length(as.vector(combn(sig.incidence.vars.PERMANOVA, m = i)))) {
10 | 
11 |         list.varcomb.AICc[i,variables] <- as.vector(combn(sig.incidence.vars.PERMANOVA, m = i))[2]
12 | 
13 |     }
14 | 
15 | 
16 | }
17 | 
18 | for (i in 1:4) {
19 |     print(combn(1:4, m = i))
20 | }
21 | 


--------------------------------------------------------------------------------
/AICc_table_generation.R:
--------------------------------------------------------------------------------
  1 | ## Create AICc tables for PERMANOVA output.
  2 | 
  3 | require(vegan)
  4 | require(tibble)
  5 | require(stringr)
  6 | 
  7 | 
  8 | ## -- For two variables: ------------------------------------
  9 | 
 10 | AICc.table.2var <- function(sig.vars, control.var.char = NULL, c.var = 0, matrix.char, perm, type = "AICc", method = "bray") {
 11 | 
 12 |     varcomb.2.AICc <- tibble(variables = rep("var.name", choose(length(sig.vars),2)),
 13 |                              AICc.values = rep(0),
 14 |                              `Pseudo-_F_` = rep(0),
 15 |                              `p-value` = rep(0),
 16 |                              `Var Explnd` = rep(0),
 17 |                              Model = rep("model"))
 18 | 
 19 |     if (is.character(control.var.char) == TRUE & c.var == 0) {c.var = length(control.var.char)} 
 20 |         
 21 |     combo.list <- combn(x = sig.vars, m = 2, simplify = FALSE)
 22 | 
 23 |     if (!is.null(control.var.char)) {
 24 |         control.var.char <- paste0(control.var.char, " +")
 25 |         }
 26 | 
 27 |     for (r in 1:choose(length(sig.vars),2)) {
 28 |         # label the row with variable names
 29 |         varcomb.2.AICc[r,1] <- paste(control.var.char, paste(combo.list[[r]], collapse = " + "))
 30 |         
 31 |         # create a temporary PERMANOVA to take info from
 32 |         
 33 |         temp <- adonis2(
 34 |             as.formula(paste0(
 35 |                 matrix.char,
 36 |                 " ~ ",
 37 |                 control.var.char,
 38 |                 paste0(combo.list[[r]], collapse =  "+")
 39 |             )),
 40 |             permutations = perm,
 41 |             method = method,
 42 |             by = NULL
 43 |         )
 44 |         
 45 |         
 46 |         varcomb.2.AICc[r, 2] <- AICc.PERMANOVA2(temp)[type]
 47 |         varcomb.2.AICc[r, 3] <- temp$F[1]
 48 |         varcomb.2.AICc[r, 4] <- temp$`Pr(>F)`[1]
 49 |         varcomb.2.AICc$`Var Explnd`[r] <- temp$SumOfSqs[1] / temp$SumOfSqs[3]
 50 |         
 51 | 
 52 |         r <- r + 1
 53 | 
 54 |     }
 55 | 
 56 |     varcomb.2.AICc$`Delta AICc` <- varcomb.2.AICc$AICc.values -
 57 |         min(varcomb.2.AICc$AICc.values)
 58 |     varcomb.2.AICc$`Relative Likelihood` <-
 59 |         exp(-.5 * (varcomb.2.AICc$AICc.values - min(varcomb.2.AICc$AICc.values)))
 60 | 
 61 |     # Relative likelihood compared with best model; see
 62 |     # https://en.wikipedia.org/wiki/Likelihood_function
 63 |     # https://www.rdocumentation.org/packages/qpcR/versions/1.4-1/topics/akaike.weights 
 64 |     
 65 |     
 66 | return(varcomb.2.AICc)
 67 | 
 68 | }
 69 | 
 70 | 
 71 | 
 72 | ## -- For N variables: ---------------------------------------------------
 73 | 
 74 | 
 75 | AICc.table.Nvar <- function(sig.vars, control.var.char = NULL, c.var = 0, matrix.char, perm, n.var = 1, composite = FALSE, type = "AICc", method = "bray") {
 76 | 
 77 |     if (n.var > length(sig.vars)) { stop("n.var greater than number of significant variables")}
 78 | 
 79 |     if (!is.null(control.var.char)) {
 80 |         control.var.char <- paste0(control.var.char, " + ")
 81 |     }
 82 |     if (is.character(control.var.char) == TRUE & c.var == 0) {c.var = 1} 
 83 |     
 84 | 
 85 |     varcomb.N.AICc <- tibble(variables = rep("var.name", choose(length(sig.vars), n.var)),
 86 |                              AICc.values = rep(0),
 87 |                              `Pseudo-_F_` = rep(0),
 88 |                              `p-value` = rep(0),
 89 |                              `Var Explnd` = rep(0),
 90 |                              Model = rep("model"))
 91 | 
 92 |     combo.list <- combn(x = sig.vars, m = n.var, simplify = FALSE)
 93 | 
 94 | 
 95 |     for (r in 1:choose(length(sig.vars), n.var)) {
 96 |         # label the row with variable names
 97 |         varcomb.N.AICc[r,1] <- paste(control.var.char, paste(combo.list[[r]], collapse = " and "))
 98 | 
 99 |         # create a temporary PERMANOVA to take info from
100 |         temp <- adonis2(
101 |             as.formula(paste0(
102 |                 matrix.char,
103 |                 " ~ ",
104 |                 control.var.char,
105 |                 paste0(combo.list[[r]], collapse =  "+")
106 |             )),
107 |             permutations = perm,
108 |             method = method,
109 |             by = NULL
110 |         )
111 |         
112 |         
113 |         varcomb.N.AICc[r,2] <- AICc.PERMANOVA2(temp)[type]
114 | 
115 |         varcomb.N.AICc[r,3] <- temp$`F`[1]
116 |         varcomb.N.AICc[r,4] <- temp$`Pr(>F)`[1]
117 |         varcomb.N.AICc$`Var Explnd`[r] <- temp$SumOfSqs[1] / temp$SumOfSqs[3]
118 |         
119 | 
120 |         r <- r + 1
121 | 
122 |     }
123 | 
124 | 
125 |     # Calculate diagnostic variables:
126 | 
127 | if (composite == FALSE) {
128 |     varcomb.N.AICc$`Delta AICc` <- varcomb.N.AICc$AICc.values -
129 |         min(varcomb.N.AICc$AICc.values)
130 |     varcomb.2.AICc$`Relative Likelihood` <-
131 |         exp(-.5 * (varcomb.2.AICc$AICc.values - min(varcomb.2.AICc$AICc.values)))
132 |     }
133 | 
134 | 
135 |     return(varcomb.N.AICc)
136 | 
137 | }
138 | 
139 | 
140 | ## -- Wrapper function: ---------------------------------------------------
141 | 
142 | # comb.incl is which # of variables for the combinations you want to include.
143 | # e.g. all combinations of 2 variables, 3 variables, etc.
144 | 
145 | AICc.table.all <- function(sig.vars, control.var.char = NULL, matrix.char, perm = 999, comb.incl = 1, extra.var = FALSE, extra.var.char = NULL, type = "AICc", method = "bray") {
146 | 
147 |     varcomb.all <- data.frame()
148 | 
149 |     # If there is a control variable, create a one variable model with only
150 |     # control variable, for comparison with rest of proposed models
151 |     # 
152 |     if (!is.null(control.var.char)) {
153 | 
154 |         temp <- AICc.table.Nvar(sig.vars = control.var.char, control.var.char = NULL,
155 |                                 matrix.char = matrix.char, n.var = 1, composite = TRUE,
156 |                                 type = type, method = method, perm = perm)
157 | 
158 |         varcomb.all <- rbind(varcomb.all, temp)
159 | 
160 | 
161 |     }
162 | 
163 |     # Iterate through the comb.incl. e.g. all 1 var models, then 2 var models, then 3 var models...
164 |     for (i in comb.incl) {
165 |     
166 |         temp <- AICc.table.Nvar(sig.vars = sig.vars, control.var.char = control.var.char,
167 |                                 matrix.char = matrix.char, n.var = i, composite = TRUE,
168 |                                 type = type, method = method, perm = perm)
169 |     
170 |         varcomb.all <- rbind(varcomb.all, temp)
171 |     
172 |     
173 |     }
174 | 
175 |     
176 |     
177 |     
178 |     # If you want to include a non-significant variable for comparison...
179 |     if (extra.var == TRUE) {
180 |         
181 |         for (i in 1:length(extra.var.char)) {
182 |         temp <- AICc.table.Nvar(sig.vars = extra.var.char[i], control.var.char = control.var.char,
183 |                                 matrix.char = matrix.char, n.var = 1, composite = TRUE, 
184 |                                 type = type, method = method, perm = perm)
185 |         
186 |         varcomb.all <- rbind(varcomb.all, temp)
187 |         
188 |         }
189 |     }
190 |     
191 |     
192 |     varcomb.all$`Delta AICc` <- varcomb.all$AICc.values -
193 |         min(varcomb.all$AICc.values)
194 |     varcomb.all$`Relative Likelihood` <- exp((min(varcomb.all$AICc.values) -
195 |                                                   varcomb.all$AICc.values)/2)
196 | 
197 | # exp( -0.5 * ∆AIC score for that model)
198 | 
199 |     return(varcomb.all)
200 | 
201 | }
202 | 
203 | 
204 | ## -- Sum of AIC Weights by Var: ---------------------------------------------------
205 | 
206 | # This requires an AIC/AICc table output from one of the above functions. The
207 | # rationalle behind this approach can be found in Arnold, T. W. (2010).
208 | # Uninformative parameters and model selection using Akaike's Information
209 | # Criterion. The Journal of Wildlife Management, 74(6), 1175-1178.
210 | 
211 | # Calculation method from http://brianomeara.info/tutorials/aic
212 | 
213 | 
214 | AICc.weights.byvar <- function(sig.vars, AIC.table.output){
215 |     
216 |     results.table <- tibble("Significant Variable" = sig.vars,
217 |                             "Summed AIC Weight" = rep(0))
218 | 
219 |     for (i in 1:length(sig.vars)){
220 |         
221 |         summed.weight = 0
222 |         
223 |         for (j in 1:nrow(AIC.table.output)){
224 |             
225 |             if (grepl(AIC.table.output$variables[j], pattern = sig.vars[i], fixed = TRUE)) {
226 |                 
227 |                 summed.weight <- summed.weight + AIC.table.output$`Relative Likelihood`[j]
228 |                 
229 |             } else summed.weight <- summed.weight
230 |             
231 |             
232 |         }
233 |         #sig vars loop
234 |         
235 |         results.table[i, 2] <- summed.weight
236 |         
237 |     }
238 |     # function loop
239 |     
240 |     return(results.table)
241 | }
242 |     
243 | 
244 | 
245 | # create a table with sig vars fed into the table + AIC weight sum column
246 | #     
247 | # for each significant variable,
248 | #     for each row
249 | #         check each row to see if there is a pattern match
250 | #         add the relative likelihood to sum if so         
251 | #         
252 | # report 
253 |     
254 | 
255 | 
256 | 
257 | ## Testing
258 | # sig.vars <- sigvars.symbionfNGS[-1]
259 | # matrix.char <- "sqrt(symbio.transpose.nf)"
260 | # control.var.char <- sigvars.symbionfNGS[1]
261 | #
262 | #
263 | # AICc.table.2var(sig.vars = sigvars.symbionfNGS[-1], control.var.char = sigvars.symbionfNGS[1], matrix.char = "sqrt(symbio.transpose.nf)")[3]
264 | #
265 | # AICc.table.Nvar(sig.vars = sigvars.symbionfNGS[-1], control.var.char = sigvars.symbionfNGS[1], matrix.char = "sqrt(symbio.transpose.nf)", n.var = 1)[3]
266 | #
267 | # test <- AICc.table.all(sig.vars = sigvars.symbionfNGS[-1], control.var.char = sigvars.symbionfNGS[1],
268 | #                        matrix.char = "sqrt(symbio.transpose.nf)", comb.incl = 1)
269 | #
270 | 
271 | 


--------------------------------------------------------------------------------
/PlotTaxaKD.R:
--------------------------------------------------------------------------------
  1 | plotTaxaKD <- function (titan.out,
  2 |           z1 = T,
  3 |           z2 = T,
  4 |           interval = T,
  5 |           prob95 = F,
  6 |           z.med = F,
  7 |           xlabel = "Environmental Gradient",
  8 |           log = "",
  9 |           at = NULL,
 10 |           xmin = min(titan.out$sppmax[, 8]),
 11 |           xmax = max(titan.out$sppmax[,
 12 |                                       12]) * 1.05,
 13 |           tck = 0.025,
 14 |           bty = NULL,
 15 |           ntick = 6,
 16 |           prtty = T,
 17 |           dig = 1,
 18 |           leg.x = 0.8,
 19 |           leg.y = 10,
 20 |           cex.taxa = 0.75,
 21 |           cex = 1,
 22 |           cex.axis = 1,
 23 |           cex.leg = .9,
 24 |           cex.lab = 1.25,
 25 |           legend = T,
 26 |           col1 = "black",
 27 |           fil1 = "black",
 28 |           col2 = "black",
 29 |           fil2 = "white",
 30 |           write = F,
 31 |           all = F,
 32 |           lab.side = 3, # 1 for bottom 3 for top
 33 |           lab.line = 1, # how far from the top/bottom of plot
 34 |           margin = c(3,8,3,8), # b,l,t,r
 35 |           sub1.labs.function = "identity", # name of function to revise left side row names
 36 |           sub2.labs.function = "identity", # name of function to revise right side row names
 37 |           ...)
 38 | {
 39 |     imax = titan.out$arguments[[5]]
 40 |     boot = titan.out$arguments[[3]] > 0.5
 41 |     if (all) {
 42 |         boot = F
 43 | 
 44 |     }
 45 |     if (boot) {
 46 |         if (z1) {
 47 |             sppsub1 <- subset(titan.out$sppmax, titan.out$sppmax[,
 48 |                                                                  16] == 1)
 49 |             sub1.labs <- rownames(sppsub1)
 50 |             sub1.labs <- lapply(X = sub1.labs, FUN = sub1.labs.function)
 51 |         }
 52 |         if (z2) {
 53 |             sppsub2 <- subset(titan.out$sppmax, titan.out$sppmax[,
 54 |                                                                  16] == 2)
 55 |             sub2.labs <- rownames(sppsub2)
 56 |             sub2.labs <- lapply(X = sub2.labs, FUN = sub2.labs.function)
 57 |         }
 58 |     }
 59 |     else {
 60 |         if (z1) {
 61 |             sppsub1 <- subset(titan.out$sppmax,
 62 |                               titan.out$sppmax[,
 63 |                                                4] == 1 &
 64 |                                   titan.out$sppmax[, 6] <= 0.05)
 65 |             sub1.labs <- rownames(sppsub1)
 66 |             sub1.labs <- lapply(X = sub1.labs, FUN = sub1.labs.function)
 67 | 
 68 | 
 69 |         }
 70 |         if (z2) {
 71 |             sppsub2 <- subset(titan.out$sppmax,
 72 |                               titan.out$sppmax[,
 73 |                                                4] == 2 &
 74 |                                   titan.out$sppmax[, 6] <= 0.05)
 75 |             sub2.labs <- rownames(sppsub2)
 76 |             sub2.labs <- lapply(X = sub2.labs, FUN = sub2.labs.function)
 77 | 
 78 | 
 79 |         }
 80 |     }
 81 |     if (z1) {
 82 |         if (nrow(sppsub1) < 1) {
 83 |             stop(
 84 |                 "z1 is empty, set z1=FALSE, change significance criteria, or set boot=FALSE if bootstrapping was not used to generate the titan object"
 85 |             )
 86 |         }
 87 |     }
 88 |     if (z2) {
 89 |         if (nrow(sppsub2) < 1) {
 90 |             stop(
 91 |                 "z2 is empty, set z2=FALSE, change significance criteria, or set boot=FALSE if bootstrapping was not used to generate the titan object"
 92 |             )
 93 |         }
 94 |     }
 95 |     par(mar = margin, oma = c(0, 3, 0, 3))
 96 |     if (z1 & z2) {
 97 |         if (nrow(sppsub1) >= nrow(sppsub2)) {
 98 |             sppsub.gt <- sppsub1
 99 |         }
100 |         else {
101 |             sppsub.gt <- sppsub2
102 |         }
103 |     }
104 |     else {
105 |         if (z1) {
106 |             sppsub.gt <- sppsub1
107 |         }
108 |         else {
109 |             sppsub.gt <- sppsub2
110 |         }
111 |     }
112 |     plot(
113 |         sppsub.gt[, 1],
114 |         ((max(
115 |             rank((sppsub.gt[, 1]), ties.method = "first")
116 |         ) +
117 |             1) - rank((sppsub.gt[, 1]), ties.method = "first")),
118 |         xlim = c(xmin, xmax),
119 |         ylim = c(0.5, max(
120 |             rank(sppsub.gt[,
121 |                            1], ties.method = "first") + 1
122 |         )),
123 |         cex = 0,
124 |         tck = tck,
125 |         log = log,
126 |         axes = FALSE,
127 |         ylab = "",
128 |         xlab = ""
129 |     )
130 |     if (boot) {
131 |         if (prob95) {
132 |             if (z1) {
133 |                 yvalues1 = ((max(
134 |                     rank((sppsub1[, 12]), ties.method = "first")
135 |                 ) +
136 |                     1) - rank((sppsub1[, 12]), ties.method = "first"))
137 |             }
138 |             if (z2) {
139 |                 yvalues2 = rank((sppsub2[, 8]), ties.method = "first") +
140 |                     0.5
141 |             }
142 |         }
143 |         else {
144 |             if (z.med) {
145 |                 if (z1) {
146 |                     yvalues1 = ((max(
147 |                         rank((sppsub1[, 10]), ties.method = "first")
148 |                     ) +
149 |                         1) - rank((sppsub1[, 10]), ties.method = "first"))
150 |                 }
151 |                 if (z2) {
152 |                     yvalues2 = rank((sppsub2[, 10]), ties.method = "first") +
153 |                         0.5
154 |                 }
155 |             }
156 |             else {
157 |                 if (imax) {
158 |                     if (z1) {
159 |                         yvalues1 = ((max(
160 |                             rank((sppsub1[, 1]), ties.method = "first")
161 |                         ) +
162 |                             1) - rank((sppsub1[, 1]), ties.method = "first"))
163 |                     }
164 |                     if (z2) {
165 |                         yvalues2 = rank((sppsub2[, 1]), ties.method = "first") +
166 |                             0.5
167 |                     }
168 |                 }
169 |                 else {
170 |                     if (z1) {
171 |                         yvalues1 = ((max(
172 |                             rank((sppsub1[, 2]), ties.method = "first")
173 |                         ) +
174 |                             1) - rank((sppsub1[, 2]), ties.method = "first"))
175 |                     }
176 |                     if (z2) {
177 |                         yvalues2 = rank((sppsub2[, 2]), ties.method = "first") +
178 |                             0.5
179 |                     }
180 |                 }
181 |             }
182 |         }
183 |     }
184 |     else {
185 |         if (imax) {
186 |             if (z1) {
187 |                 yvalues1 = ((max(
188 |                     rank((sppsub1[, 1]), ties.method = "first")
189 |                 ) +
190 |                     1) - rank((sppsub1[, 1]), ties.method = "first"))
191 |             }
192 |             if (z2) {
193 |                 yvalues2 = rank((sppsub2[, 1]), ties.method = "first") +
194 |                     0.5
195 |             }
196 |         }
197 |         else {
198 |             if (z1) {
199 |                 yvalues1 = ((max(
200 |                     rank((sppsub1[, 2]), ties.method = "first")
201 |                 ) +
202 |                     1) - rank((sppsub1[, 2]), ties.method = "first"))
203 |             }
204 |             if (z2) {
205 |                 yvalues2 = rank((sppsub2[, 2]), ties.method = "first") +
206 |                     0.5
207 |             }
208 |         }
209 |     }
210 |     if (boot & interval) {
211 |         if (z1) {
212 |             segments(sppsub1[, 8],
213 |                      yvalues1,
214 |                      sppsub1[, 12],
215 |                      yvalues1,
216 |                      col = col1,
217 |                      lwd = 2)
218 |         }
219 |         if (z2) {
220 |             segments(
221 |                 sppsub2[, 8],
222 |                 yvalues2,
223 |                 sppsub2[, 12],
224 |                 yvalues2,
225 |                 col = col2,
226 |                 lwd = 2,
227 |                 lty = 3
228 |             )
229 |         }
230 |     }
231 |     if (z1) {
232 |         grpcol = rep(NA, nrow(sppsub1))
233 |     }
234 |     if (z2) {
235 |         grpcol2 = rep(NA, nrow(sppsub2))
236 |     }
237 |     if (z1) {
238 |         for (i in 1:nrow(sppsub1)) {
239 |             if (sppsub1[i, 4] > 1.5) {
240 |                 grpcol[i] = col2
241 |             }
242 |             else {
243 |                 grpcol[i] = fil1
244 |             }
245 |         }
246 |     }
247 |     if (z2) {
248 |         for (i in 1:nrow(sppsub2)) {
249 |             if (sppsub2[i, 4] > 1.5) {
250 |                 grpcol2[i] = fil2
251 |             }
252 |             else {
253 |                 grpcol2[i] = fil1
254 |             }
255 |         }
256 |     }
257 |     if (boot) {
258 |         if (prob95) {
259 |             if (z.med) {
260 |                 if (z1) {
261 |                     symbols(
262 |                         sppsub1[, 12],
263 |                         yvalues1,
264 |                         circles = sppsub1[,
265 |                                           15],
266 |                         inches = 0.1,
267 |                         add = TRUE,
268 |                         xlim = c(0,
269 |                                  5),
270 |                         fg = col1,
271 |                         bg = grpcol,
272 |                         lwd = 2
273 |                     )
274 |                 }
275 |                 if (z2) {
276 |                     symbols(
277 |                         sppsub2[, 8],
278 |                         yvalues2,
279 |                         circles = sppsub2[,
280 |                                           15],
281 |                         inches = 0.1,
282 |                         add = TRUE,
283 |                         xlim = c(0,
284 |                                  5),
285 |                         fg = col2,
286 |                         bg = grpcol2,
287 |                         lwd = 2
288 |                     )
289 |                 }
290 |             }
291 |             else {
292 |                 if (z1) {
293 |                     symbols(
294 |                         sppsub1[, 12],
295 |                         yvalues1,
296 |                         circles = sppsub1[,
297 |                                           7],
298 |                         inches = 0.1,
299 |                         add = TRUE,
300 |                         xlim = c(0,
301 |                                  5),
302 |                         fg = col1,
303 |                         bg = grpcol,
304 |                         lwd = 2
305 |                     )
306 |                 }
307 |                 if (z2) {
308 |                     symbols(
309 |                         sppsub2[, 8],
310 |                         yvalues2,
311 |                         circles = sppsub2[,
312 |                                           7],
313 |                         inches = 0.1,
314 |                         add = TRUE,
315 |                         xlim = c(0,
316 |                                  5),
317 |                         fg = col2,
318 |                         bg = grpcol2,
319 |                         lwd = 2
320 |                     )
321 |                 }
322 |             }
323 |         }
324 |         else {
325 |             if (z.med) {
326 |                 if (z1) {
327 |                     symbols(
328 |                         sppsub1[, 10],
329 |                         yvalues1,
330 |                         circles = sppsub1[,
331 |                                           15],
332 |                         inches = 0.1,
333 |                         add = TRUE,
334 |                         xlim = c(0,
335 |                                  5),
336 |                         fg = col1,
337 |                         bg = grpcol,
338 |                         lwd = 2
339 |                     )
340 |                 }
341 |                 if (z2) {
342 |                     symbols(
343 |                         sppsub2[, 10],
344 |                         yvalues2,
345 |                         circles = sppsub2[,
346 |                                           15],
347 |                         inches = 0.1,
348 |                         add = TRUE,
349 |                         xlim = c(0,
350 |                                  5),
351 |                         fg = col2,
352 |                         bg = grpcol2,
353 |                         lwd = 2
354 |                     )
355 |                 }
356 |             }
357 |             else {
358 |                 if (imax) {
359 |                     if (z1) {
360 |                         symbols(
361 |                             sppsub1[, 1],
362 |                             yvalues1,
363 |                             circles = sppsub1[,
364 |                                               7],
365 |                             inches = 0.1,
366 |                             add = TRUE,
367 |                             xlim = c(0,
368 |                                      5),
369 |                             fg = col1,
370 |                             bg = grpcol,
371 |                             lwd = 2
372 |                         )
373 |                     }
374 |                     if (z2) {
375 |                         symbols(
376 |                             sppsub2[, 1],
377 |                             yvalues2,
378 |                             circles = sppsub2[,
379 |                                               7],
380 |                             inches = 0.1,
381 |                             add = TRUE,
382 |                             xlim = c(0,
383 |                                      5),
384 |                             fg = col2,
385 |                             bg = grpcol2,
386 |                             lwd = 2
387 |                         )
388 |                     }
389 |                 }
390 |                 else {
391 |                     if (z1) {
392 |                         symbols(
393 |                             sppsub1[, 2],
394 |                             yvalues1,
395 |                             circles = sppsub1[,
396 |                                               7],
397 |                             inches = 0.1,
398 |                             add = TRUE,
399 |                             xlim = c(0,
400 |                                      5),
401 |                             fg = col1,
402 |                             bg = grpcol,
403 |                             lwd = 2
404 |                         )
405 |                     }
406 |                     if (z2) {
407 |                         symbols(
408 |                             sppsub2[, 2],
409 |                             yvalues2,
410 |                             circles = sppsub2[,
411 |                                               7],
412 |                             inches = 0.1,
413 |                             add = TRUE,
414 |                             xlim = c(0,
415 |                                      5),
416 |                             fg = col2,
417 |                             bg = grpcol2,
418 |                             lwd = 2
419 |                         )
420 |                     }
421 |                 }
422 |             }
423 |         }
424 |     }
425 |     else {
426 |         if (imax) {
427 |             if (z1) {
428 |                 symbols(
429 |                     sppsub1[, 1],
430 |                     yvalues1,
431 |                     circles = sppsub1[,
432 |                                       7],
433 |                     inches = 0.1,
434 |                     add = TRUE,
435 |                     xlim = c(0,
436 |                              5),
437 |                     fg = col1,
438 |                     bg = grpcol,
439 |                     lwd = 2
440 |                 )
441 |             }
442 |             if (z2) {
443 |                 symbols(
444 |                     sppsub2[, 1],
445 |                     yvalues2,
446 |                     circles = sppsub2[,
447 |                                       7],
448 |                     inches = 0.1,
449 |                     add = TRUE,
450 |                     xlim = c(0,
451 |                              5),
452 |                     fg = col2,
453 |                     bg = grpcol2,
454 |                     lwd = 2
455 |                 )
456 |             }
457 |         }
458 |         else {
459 |             if (z1) {
460 |                 symbols(
461 |                     sppsub1[, 2],
462 |                     yvalues1,
463 |                     circles = sppsub1[,
464 |                                       7],
465 |                     inches = 0.1,
466 |                     add = TRUE,
467 |                     xlim = c(0,
468 |                              5),
469 |                     fg = col1,
470 |                     bg = grpcol,
471 |                     lwd = 2
472 |                 )
473 |             }
474 |             if (z2) {
475 |                 symbols(
476 |                     sppsub2[, 2],
477 |                     yvalues2,
478 |                     circles = sppsub2[ , 7],
479 |                     inches = 0.1,
480 |                     add = TRUE,
481 |                     xlim = c(0,
482 |                              5),
483 |                     fg = col2,
484 |                     bg = grpcol2,
485 |                     lwd = 2
486 |                 )
487 |             }
488 |         }
489 |     }
490 |     if (z1) {
491 |         axis(
492 |             2,
493 |             at = yvalues1,
494 |             labels = sub1.labs,
495 |             las = 1,
496 |             mgp = c(1, 0.5, 0),
497 |             cex.axis = cex.taxa,
498 |             tck = tck
499 |         )
500 |     }
501 |     if (z2) {
502 |         axis(
503 |             4,
504 |             at = yvalues2,
505 |             labels = sub2.labs,
506 |             mgp = c(1, 0.5, 0),
507 |             las = 1,
508 |             cex.axis = cex.taxa,
509 |             tck = tck
510 |         )
511 |     }
512 |     if (log == "x") {
513 |         axis(
514 |             1,
515 |             at = at,
516 |             mgp = c(2, 0.5, 0),
517 |             cex.axis = cex.axis,
518 |             tck = tck
519 |         )
520 |     }
521 |     else {
522 |         if (prtty) {
523 |             axis(
524 |                 1,
525 |                 pretty(xmin:xmax, ntick),
526 |                 mgp = c(2, 0.5, 0),
527 |                 cex.axis = cex.axis,
528 |                 tck = tck
529 |             )
530 |         }
531 |         else {
532 |             axis(
533 |                 1,
534 |                 at = seq(
535 |                     from = round(xmin, digits = dig),
536 |                     to = round(xmax, digits = dig),
537 |                     by = round((xmax -
538 |                                     xmin) /
539 |                                    4, digits = dig)
540 |                 ),
541 |                 mgp = c(2, 0.5, 0),
542 |                 cex.axis = cex.axis,
543 |                 tck = tck
544 |             )
545 |         }
546 |     }
547 |     mtext(xlabel,
548 |           side = lab.side,
549 |           line = lab.line,
550 |           cex = cex)
551 |     if (z1 & z2) {
552 |         leg = c("z-", "z+")
553 |         fill.leg = c(fil1, fil2)
554 |         legend(
555 |             leg.x,
556 |             leg.y,
557 |             leg,
558 |             fill = fill.leg,
559 |             ncol = 1,
560 |             bty = "n",
561 |             plot = TRUE,
562 |             cex = cex.leg
563 |         )
564 |     }
565 |     box(which = "plot", bty = bty)
566 |     if (z1 & z2 & write) {
567 |         sppsub <- list(sppsub1, sppsub2)
568 |         names(sppsub) <- c("sppsub1", "sppsub2")
569 |         return(sppsub)
570 |     }
571 |     if (z1 & write) {
572 |         return(sppsub1)
573 |     }
574 |     if (z2 & write) {
575 |         return(sppsub2)
576 |     }
577 | }
578 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # R_Scripts
2 | Helper R scripts for multiple PERMANOVA tests, AICc script for PERMANOVA, etc.
3 | Also some scripts for anonymizing data.
4 | 


--------------------------------------------------------------------------------
/anon_data.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # Make data anonymous! This script takes an input data table (csv or in memory)
 3 | # and a conversion table (lookup table) and then produces an output table (saved
 4 | # to disk or memory).
 5 | # 
 6 | # Based in part on information found at: https://stackoverflow.com/questions/35636315/replace-values-in-a-dataframe-based-on-lookup-table
 7 | #
 8 | # ... passes arguments to write.csv.
 9 | # 
10 | 
11 | 
12 | anon.data <- function(input.table, lookup.table, site.column, match.column, anon.column, output.path = NA, write.to.disk = FALSE, ...){
13 | 
14 | # -- Input table -------
15 | 
16 | if(is.character(input.table) == TRUE) {
17 |     
18 |     input.table <- read.csv(input.table, stringsAsFactors = FALSE)
19 |     
20 | }
21 | 
22 | exists("input.table")
23 | 
24 | # -- Lookup table ------------
25 | 
26 | if(is.character(lookup.table) == TRUE) {
27 |     
28 |     lookup.table <- read.csv(lookup.table, stringsAsFactors = FALSE)
29 |     
30 | }
31 | 
32 | exists("lookup.table")
33 | 
34 | # -- Anon the table -----------
35 | 
36 | replace.me <- input.table[ , site.column]
37 | lookup.match <- lookup.table[ , match.column]
38 | replace.with <- lookup.table[ , anon.column]
39 | 
40 | anon.table <- input.table
41 | anon.table[ , which(colnames(anon.table) == site.column)] <- replace.with[match(
42 |     replace.me, lookup.match)]
43 | 
44 | 
45 | # -- Export the output table ---------
46 | 
47 | if(is.character(output.path) == TRUE & write.to.disk == TRUE) {
48 |     
49 |     write.csv(x = anon.table, file = output.path, ...)
50 |     return(anon.table)
51 |     
52 | }
53 | 
54 | if(is.character(output.path) == TRUE & write.to.disk == FALSE) {
55 |     
56 |     warning("warning: table is chr and write to disk is false, creating a default output table in Global Environment")
57 |     
58 |     return (anon.table)
59 |     
60 | }
61 | 
62 | if(is.character(output.path) == FALSE & write.to.disk == FALSE) {
63 |     
64 |     return (anon.table)
65 |     
66 | }
67 | 
68 | 
69 | if(is.character(output.path) == FALSE & write.to.disk == TRUE) {
70 |     
71 |     warning("warning: no output path specified, csv not saved to disk.")
72 |     return (anon.table)
73 |     
74 | }
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/chisquared.R:
--------------------------------------------------------------------------------
  1 | # likertTable for all three functions should be formatted with question columns
  2 | # and demographic (or grouping) columns, each row should be a different
  3 | # respondents answer. Cells should be e.g. likert responses (More, etc.) or
  4 | # demographic data (e.g. age groups)
  5 | 
  6 | ## Results table for the addKruskal and addChiSquared functions should be formulated like this:
  7 | 
  8 | #
  9 | # activityGardensResults <-
 10 | #   tibble(
 11 | #     demographicVariable = c(
 12 | #       "Gender",
 13 | #       "Marital Status",
 14 | #       "Age",
 15 | #       "Housing",
 16 | #       "Town Type",
 17 | #       "Governorate",
 18 | #       "Employment",
 19 | #       "Work Location",
 20 | #       "Education",
 21 | #       "Before Income",
 22 | #       "After Income"
 23 | #     ),
 24 | #     afterGardensRelaxing_CHI = rep(0.0, length(demographicVariable)),
 25 | #     afterGardensRelaxing_PVAL = rep(0.0, length(demographicVariable)),
 26 | #     afterGardensRelaxing_CORPVAL = rep(0.0, length(demographicVariable)),
 27 | #     afterGardensExercise_CHI = rep(0.0, length(demographicVariable)),
 28 | #     afterGardensExercise_PVAL = rep(0.0, length(demographicVariable)),
 29 | #     afterGardensExercise_CORPVAL = rep(0.0, length(demographicVariable)),
 30 | #     afterGardensBirdPhotography_CHI = rep(0.0, length(demographicVariable)),
 31 | #     afterGardensBirdPhotography_PVAL = rep(0.0, length(demographicVariable)),
 32 | #     afterGardensBirdPhotography_CORPVAL = rep(0.0, length(demographicVariable))
 33 | #
 34 | #   )
 35 | 
 36 | 
 37 | 
 38 | addKruskal <-
 39 |   function(likertTable,
 40 |            resultsTable,
 41 |            questionColumns,
 42 |            demographicColumns) {
 43 |     i = min(demographicColumns) # row
 44 |     j = min(questionColumns) # column
 45 |     
 46 |     a = 1 # the first row in the resultsTable that should have results assigned.
 47 |     c = 4 # first column that should have corrected pvalues
 48 |     
 49 |     # Iterate through each demographic variable
 50 |     for (i in min(demographicColumns):max(demographicColumns)) {
 51 |       b = 2 # the first column in the resultsTable that should have results assigned to it
 52 |       
 53 |       for (j in min(questionColumns):max(questionColumns)) {
 54 |         # Create a temp table and filter out blanks.
 55 |         tempTable <- likertTable[, c(j, i)]
 56 |         tempTable <- tempTable[tempTable[2] != "",]
 57 |         
 58 |         # perform the kruskal wallis test
 59 |         tempKruskal <- kruskal.test(tempTable[, 1] ~ tempTable[, 2])
 60 |         
 61 |         # add statistic value and pvalue to the table.
 62 |         resultsTable[a, b] <- tempKruskal$statistic
 63 |         resultsTable[a, b + 1] <- tempKruskal$p.value
 64 |         
 65 |         # add 2 so next results go in the proper place
 66 |         b = b + 3
 67 |       } # end question loop
 68 |       
 69 |       a = a + 1 # go to next row for next set of demographic info
 70 |       
 71 |     } # end demographic loop
 72 |     
 73 |     # add corrected pvalue
 74 |     
 75 |     z = 1
 76 |     for (z in 1:length(questionColumns)) {
 77 |       tempCorrected <-
 78 |         p.adjust(as.vector(unlist(resultsTable[, c - 1])), method = "holm")
 79 |       
 80 |       resultsTable[, c] <- tempCorrected
 81 |       
 82 |       c = c + 3
 83 |       
 84 |     } # end addition of corrected pvalues
 85 |     
 86 |     return(resultsTable)
 87 |     
 88 |   } # end function
 89 | 
 90 | 
 91 | 
 92 | addChiSquared <-
 93 |   function(likertTable,
 94 |            resultsTable,
 95 |            questionColumns,
 96 |            demographicColumns,
 97 |            minVal = 25) {
 98 |     i = min(demographicColumns) # row
 99 |     j = min(questionColumns) # column
100 |     
101 |     a = 1 # the first row in the resultsTable that should have results assigned.
102 |     c = 4 # first column that should have corrected pvalues
103 |     
104 |     # Iterate through each demographic variable
105 |     for (i in min(demographicColumns):max(demographicColumns)) {
106 |       b = 2 # the first column in the resultsTable that should have results assigned to it
107 |       
108 |       for (j in min(questionColumns):max(questionColumns)) {
109 |         # Create a temp table and filter out blanks.
110 |         tempTable <- likertTable[, c(j, i)]
111 |         tempTable <-
112 |           tempTable[tempTable[2] != "" & !is.na(tempTable[1]) ,]
113 |         
114 |         # Check to see if any group has less than min responses.
115 |         testSize <-
116 |           tempTable %>% count(tempTable[1:2]) %>% group_by(across(.cols = 2)) %>% summarise(n = sum(n))
117 |         
118 |         tooSmall <- testSize[testSize[2] < minVal, 1]
119 |         print(unlist(tooSmall))
120 |         # print(colnames(tempTable)) # use to debug
121 |         
122 |         # Based on this, either assign a "can't be tested"/NA indicator or the values for the chi-squared test.
123 |         if (length(setdiff(unique(tempTable[, 2]), tooSmall)) < 2) {
124 |           resultsTable[a, b] <- NA
125 |           resultsTable[a, b + 1] <- NA
126 |           
127 |         } else {
128 |           tempTable <- tempTable[!(tempTable[, 2] %in% tooSmall),]
129 |           
130 |           # perform the chi squared test
131 |           tempChi <- chisq.test(tempTable[, 1], tempTable[, 2])
132 |           
133 |           # add statistic value and pvalue to the table.
134 |           resultsTable[a, b] <- tempChi$statistic
135 |           resultsTable[a, b + 1] <- tempChi$p.value
136 |           
137 |         } # end of else
138 |         
139 |         # Use this if you want to export a Pivot table
140 |         # pivot <- pivot_wider(testSize,
141 |         #                         names_from = colnames(testSize[1]),
142 |         #                         values_from = n)
143 |         
144 |         # Clean up
145 |         remove(testSize, tooSmall)
146 |         
147 |         # add 2 so next results go in the proper place
148 |         b = b + 3
149 |       } # end question loop
150 |       
151 |       a = a + 1 # go to next row for next set of demographic info
152 |       
153 |     } # end demographic loop
154 |     
155 |     # add corrected pvalue
156 |     
157 |     z = 1
158 |     for (z in 1:length(questionColumns)) {
159 |       tempCorrected <-
160 |         p.adjust(as.vector(unlist(resultsTable[, c - 1])), method = "holm")
161 |       
162 |       resultsTable[, c] <- tempCorrected
163 |       
164 |       c = c + 3
165 |       
166 |     } # end addition of corrected pvalues
167 |     
168 |     return(resultsTable)
169 |     
170 |   } # end function
171 | 
172 | 
173 | 
174 | # This function needs to compare between categories within a question to see
175 | # which categories are significantly different.
176 | 
177 | posthocChiSquared <-
178 |   function(likertTable,
179 |            questionColumn,
180 |            demographicColumn,
181 |            minVal = 25,
182 |            correction = TRUE) {
183 |     tempTable <- likertTable[, c(questionColumn, demographicColumn)]
184 |     tempTable <-
185 |       tempTable[tempTable[2] != "" & !is.na(tempTable[1]) , ]
186 |     
187 |     # Check to see if any group has less than min allowed responses.
188 |     testSize <-
189 |       tempTable %>% count(tempTable[1:2]) %>% group_by(across(.cols = 2)) %>% summarise(n = sum(n))
190 |     
191 |     tooSmall <- testSize[testSize[2] < minVal, 1]
192 |     print(unlist(tooSmall))
193 |     
194 |     # Filter out too small categories
195 |     tempTable <- tempTable[!(tempTable[, 2] %in% tooSmall), ]
196 |     # and make sure demographic is a factor (this gets lost sometimes)
197 |     if (is.factor(tempTable[, 2]) == FALSE) {
198 |       tempTable[, 2] <- as.factor(tempTable[, 2])
199 |     }
200 |     
201 |     # Make sure that the number of unique is more than two
202 |     if (nlevels(tempTable[, 2]) < 3) {
203 |       stop(print("Too few levels"))
204 |     }
205 |     
206 |     numLevels <- nlevels(tempTable[, 2])
207 |     
208 |     # create the results table
209 |     #create matrix with correct number of columns
210 |     resultsTable <- matrix(rep(999, times = numLevels ^ 2),
211 |                            ncol = numLevels,
212 |                            byrow = TRUE)
213 |     
214 |     #define column names and row names of matrix
215 |     tempLevels <- levels(tempTable[, 2])
216 |     colnames(resultsTable) <- tempLevels
217 |     rownames(resultsTable) <- tempLevels
218 |     
219 |     # for each [i,j] pair of factors add the pvalue for chisquared test
220 |     i = 1 # row
221 |     j = 1 # column
222 |     for (i in 1:numLevels) {
223 |       for (j in 1:numLevels) {
224 |         if (i != j) {
225 |           # subset for i and j levels
226 |           testTable <-
227 |             tempTable[tempTable[, 2] %in% tempLevels[c(i, j)] ,]
228 |           
229 |           # run test and assign pvalue to i,j spot
230 |           resultsTable[i, j] <-
231 |             chisq.test(testTable[, 1], testTable[, 2])$p.value
232 |           
233 |         } else {
234 |           resultsTable[i, j] <- NA
235 |         }
236 |         
237 |       } # end column loop
238 |       
239 |     } # end row loop
240 |     
241 |     # remove the lower triangle--can probably make this a filter but this is easy
242 |     resultsTable[lower.tri(resultsTable, diag = FALSE)] <- NA
243 |     
244 |     # correct pvalues
245 |     if (correction == TRUE) {
246 |       resultsTable <-
247 |         matrix(
248 |           p.adjust(as.vector(resultsTable), method = 'holm'),
249 |           ncol = numLevels,
250 |           dimnames = list(tempLevels, tempLevels)
251 |         )
252 |     }
253 |     #convert matrix to a tibble
254 |     resultsTable <- as_tibble(resultsTable, rownames = "levels")
255 |     
256 |     return(resultsTable)
257 |     
258 |   } # end function
259 | 


--------------------------------------------------------------------------------
/eDNA_helpers.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ## ----- Data manip. funs. | transpose ---------------------------
  3 | 
  4 | speciesSite <- function(inputOTU, rare = 0) {
  5 |     #input OTU should only be data, no text/ID fields.
  6 |     # inputs Species as columns, outputs Species as rows.
  7 |     temp <- inputOTU[ , colSums(inputOTU) > rare]
  8 |     temp <- as.data.frame(inputOTU)
  9 |     temp.col.names <- colnames(temp)
 10 |     temp <- as.data.frame(t(temp))
 11 |     rownames(temp) <- temp.col.names
 12 |     
 13 |     return(temp)
 14 | }
 15 | 
 16 | siteSpecies <- function(inputOTU, rowNames, rare = 0) {
 17 |     #input OTU should only be data, no text/descriptions.
 18 |     #inputs Species as rows, outputs Species as columns
 19 |     temp <- as.data.frame(inputOTU, row.names = rowNames)
 20 |     temp <- as.data.frame(t(temp))
 21 |     colnames(temp) <- rowNames
 22 |     temp <- temp[rowSums(temp) > rare,]
 23 |     
 24 |     return(temp)
 25 |     
 26 | }
 27 | 
 28 | 
 29 | ## ----- Data manip. funs. | Compositional Matrices ----------------
 30 | 
 31 | compMatrix <- function(inputMatrix, z.warning = 0.8){
 32 |     # square-root Bayesian-multiplicative replacement of zeros with the cmultRepl()
 33 |     # function (Ladin et al., 2021) 
 34 |     
 35 |     # Most other code in GitHub uses CZM. e.g. See
 36 |     # https://github.com/ggloor/CoDa_microbiome_tutorial/wiki/Part-1:-Exploratory-Compositional-PCA-Biplot
 37 |     # and https://raw.githubusercontent.com/ggloor/CoDaSeq/6ff864aade46cd3c8b0eff3bb54d5460775f92cd/CoDaSeq/vignettes/CoDaSeq_vignette.Rnw
 38 |     # This latter contends that this is the most principled method.
 39 |     
 40 |     zeroRepl <- cmultRepl(inputMatrix,
 41 |                           label = 0,
 42 |                           method = "CZM",
 43 |                           z.warning = z.warning)
 44 |     
 45 |     output <- cdt.acomp(x = zeroRepl) %>% 
 46 |         as.data.frame()
 47 |     #rownames(output) <- rownames(inputMatrix)
 48 |     
 49 |     ## see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7811025/
 50 |     
 51 |     return(output)
 52 | }
 53 | 
 54 | 
 55 | ## ----- Data Summary | Reads ------------------------------------------
 56 | # Creates a summary table for number of eDNA reads found; total number of species variants, etc. 
 57 | # note this is customized for Eco Mol output tables. Column names will differ for other providers.
 58 | 
 59 | 
 60 | ecoMolSummary <- function(ecoMolInput) {
 61 |     output <- tibble(
 62 |         sumReads = dplyr::select(ecoMolInput, asvAbsoluteAbundance) %>% sum(),
 63 |         ASVCount = dplyr::select(ecoMolInput, ASVHeader) %>% unique() %>% nrow(),
 64 |         genusCount = dplyr::select(ecoMolInput, genusBLASTn) %>% unique() %>% nrow()
 65 |     )
 66 |     
 67 |     return (output)
 68 | }
 69 | 
 70 | 
 71 | 
 72 | ## ----- Data Summary | Alpha Diversity --------------------------
 73 | 
 74 | # Note these two are based on the VEGAN function, but with modifications to preserve data order. Per this issue I created, changing the data order is the desired behavior and they won't fix it, so this is my workaround: https://github.com/vegandevs/vegan/issues/552
 75 | 
 76 | `specnumberMOD` <-
 77 |     function(x, groups, MARGIN = 1)
 78 |     {
 79 |         if (!missing(groups)) {
 80 |             if (length(groups) == 1)
 81 |                 groups <- rep(groups, nrow(x))
 82 |             groups <- factor(groups, levels = unique(groups)) # this preserves the data order!
 83 |             x <- aggregate(x, list(groups), max) # max is used because the actual number doesn't matter, just that it is over 0
 84 |             rownames(x) <- x[,1] 
 85 |             x <- x[,-1]
 86 |         }
 87 |         if (length(dim(x)) > 1)
 88 |             apply(x > 0, MARGIN, sum)
 89 |         else
 90 |             sum(x > 0)
 91 |     }
 92 | 
 93 | `diversityMOD` <- function (x, index = "shannon", groups, equalize.groups = FALSE, 
 94 |                             MARGIN = 1, base = exp(1)) 
 95 | {
 96 |     x <- drop(as.matrix(x))
 97 |     if (!is.numeric(x)) 
 98 |         stop("input data must be numeric")
 99 |     if (any(x < 0, na.rm = TRUE)) 
100 |         stop("input data must be non-negative")
101 |     if (!missing(groups)) {
102 |         if (MARGIN == 2) 
103 |             x <- t(x)
104 |         if (length(groups) == 1) 
105 |             groups <- rep(groups, NROW(x))
106 |         if (equalize.groups) 
107 |             x <- decostand(x, "total")
108 |         groups <- factor(groups, levels = unique(groups)) # this preserves the data order!
109 |         x <- aggregate(x, list(groups), sum)
110 |         rownames(x) <- x[, 1]
111 |         x <- x[, -1, drop = FALSE]
112 |         if (MARGIN == 2) 
113 |             x <- t(x)
114 |     }
115 |     INDICES <- c("shannon", "simpson", "invsimpson")
116 |     index <- match.arg(index, INDICES)
117 |     if (length(dim(x)) > 1) {
118 |         total <- apply(x, MARGIN, sum)
119 |         x <- sweep(x, MARGIN, total, "/")
120 |     }
121 |     else {
122 |         x <- x/(total <- sum(x))
123 |     }
124 |     if (index == "shannon") 
125 |         x <- -x * log(x, base)
126 |     else x <- x * x
127 |     if (length(dim(x)) > 1) 
128 |         H <- apply(x, MARGIN, sum, na.rm = TRUE)
129 |     else H <- sum(x, na.rm = TRUE)
130 |     if (index == "simpson") 
131 |         H <- 1 - H
132 |     else if (index == "invsimpson") 
133 |         H <- 1/H
134 |     if (any(NAS <- is.na(total))) 
135 |         H[NAS] <- NA
136 |     H
137 | }
138 | 
139 | 
140 | 
141 | 
142 | alphaMetrics <- function(inputOTUSiSp, groupNames, replNames){
143 |     
144 |     alphaTable <- tibble(
145 |         siteNames = rownames(inputOTUSiSp),
146 |         siteType = groupNames,
147 |         replicate = replNames,
148 |         speciesRichness = specnumberMOD(inputOTUSiSp , MARGIN = 1),
149 |         shannonRichness = vegan::diversity(inputOTUSiSp, index = "shannon"),
150 |         effectiveSR = exp(shannonRichness),
151 |         invSimpson = vegan::diversity(inputOTUSiSp, index = "invsimpson")
152 |     )
153 |     
154 |     temp <- alphaTable %>% group_by(siteType) %>% summarise(siteType_n = n())
155 |     
156 |     alphaTable <- left_join(x = alphaTable, y = temp)
157 |     
158 |     return(alphaTable)
159 |     
160 | }
161 | 
162 | alphaGroupMetrics <- function(inputOTUSiSp, groupNames) {
163 |     alphaTable <- tibble(
164 |         siteType = groupNames %>% unique(), # unique preserves order of groupNames, no alph reordering
165 |         speciesRichness = specnumberMOD(inputOTUSiSp, MARGIN = 1, groups = groupNames),
166 |         shannonRichness = diversityMOD(inputOTUSiSp, index = "shannon", groups = groupNames),
167 |         effectiveSR = exp(shannonRichness),
168 |         invSimpson = diversityMOD(inputOTUSiSp, index = "invsimpson", groups = groupNames)
169 |     )
170 |     
171 |     temp <- as_tibble(groupNames) %>% group_by(value) %>% summarise(siteType_n = n())
172 |     
173 |     alphaTable <- left_join(x = alphaTable, y = temp, join_by(siteType == value))
174 |     
175 |     return(alphaTable)
176 |     
177 | }
178 | 
179 | 
180 | ## ----- Plotting functions | Aitchison -------------------------------
181 | # helper function--note this is taken from other sources. I couldn't identify the original source of the code but e.g. it is used here: http://sthda.com/english/wiki/ggplot2-quick-correlation-matrix-heatmap-r-software-and-data-visualization
182 | 
183 | get_lower_tri <- function(inpMatrix){
184 |     inpMatrix[upper.tri(inpMatrix, diag = T)]<- NA
185 |     return(inpMatrix)
186 | }
187 | 
188 | 
189 | 
190 | aitHeatmap <- function(inputDist, fillColor1 = "blue", fillColor2 = "orange", textPlease = FALSE){
191 |     graph <-
192 |         inputDist %>%
193 |         as.matrix() %>%
194 |         get_lower_tri() %>%
195 |         as.data.frame() %>%
196 |         tibble::rownames_to_column("plot1") %>%
197 |         pivot_longer(-c(plot1),
198 |                      names_to = "plot2",
199 |                      values_to = "distance",
200 |                      values_drop_na = T) %>%
201 |         ggplot(aes(x = plot1, y = plot2, fill = distance)) + 
202 |         geom_raster() +
203 |         
204 |         scale_fill_gradient(low = fillColor1, high = fillColor2,  
205 |                             name="Aitchison\nDistance") +
206 |         theme(
207 |             axis.title.x = element_blank(),
208 |             axis.title.y = element_blank(),
209 |             panel.grid.major = element_blank(),
210 |             #panel.border = element_blank(),
211 |             panel.background = element_blank(),
212 |             #axis.ticks = element_blank(),
213 |             legend.justification = c(1, 0),
214 |             legend.position = c(0.5, 0.7),
215 |             legend.direction = "horizontal",
216 |             axis.text.x = element_text(angle = 45, vjust = 1, 
217 |                                        size = 12, hjust = 1),
218 |             axis.text.y = element_text(size = 12))+
219 |         guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
220 |                                      title.position = "top", title.hjust = 0.5))
221 |     
222 |     if(textPlease == TRUE){
223 |         graph <- graph +
224 |             geom_text(aes(label = round(distance)))
225 |         
226 |         
227 |     }
228 |     
229 |     
230 |     return(graph)   
231 | }
232 | 
233 | 
234 | # Create function that takes distance matrix, switches to long form, then
235 | # creates relevant columns. Then can feed it into a box plot.
236 | 
237 | aitComparison <- function(inputDist, remap = NULL, repeatSamples = FALSE, fillColor = NULL, levelsPlot = NULL, plotPlease = TRUE) {
238 |     # remap should have three columns: the original site names, pretty site names,
239 |     # and the type of site (for grouping)
240 |     temp <-
241 |         inputDist %>%
242 |         as.matrix() %>%
243 |         get_lower_tri() %>%
244 |         as.data.frame() %>%
245 |         tibble::rownames_to_column("plot1") %>%
246 |         pivot_longer(
247 |             -c(plot1),
248 |             names_to = "plot2",
249 |             values_to = "distance",
250 |             values_drop_na = T
251 |         )
252 |     
253 |     if(!is.null(remap)) {
254 |         # make this a remap so that the names aren't so awful. need to add else to handle if remap is NULL
255 |         old <- remap[[1]]
256 |         new <- remap[[2]]
257 |         temp$plot1[ temp$plot1 %in% old] <- new[base::match(temp$plot1, old)]
258 |         temp$plot2[ temp$plot2 %in% old] <- new[match(temp$plot2, old)]
259 |         
260 |         if(repeatSamples == TRUE) {
261 |             type <- remap[[3]]
262 |             temp$type1 <- type[match(temp$plot1, new)]
263 |             temp$type2 <- type[match(temp$plot2, new)]
264 |             
265 |         }
266 |         
267 |     }
268 |     
269 |     if(repeatSamples == FALSE){
270 |         temp <- temp %>%
271 |             mutate(pair = paste0(plot1, "-", plot2)) 
272 |     }
273 |     
274 |     if (repeatSamples == TRUE) {
275 |         temp <- temp %>%
276 |             mutate(pair = ifelse(
277 |                 type1 < type2,
278 |                 paste(type1, type2, sep = "-"),
279 |                 paste(type2, type1, sep = "-")
280 |             ))
281 |     } 
282 |     
283 |     print(temp$pair)
284 |     
285 |     if(is.null(fillColor) == TRUE) {
286 |         library(RColorBrewer)
287 |         fillColor <- brewer.pal(length(unique(temp$pair)),"Set1")
288 |     }
289 |     
290 |     if(is.null(levelsPlot) == TRUE){
291 |         levelsPlot <- sort(unique(temp$pair))
292 |     }
293 |     
294 |     
295 |     #make the graph here.
296 |     if(repeatSamples == TRUE & plotPlease == TRUE){
297 |         temp <- temp %>%
298 |             mutate(pair = factor(pair, levels = levelsPlot)) %>%
299 |             ggplot(aes(y = distance, x = pair)) +
300 |             stat_boxplot(geom = "errorbar",
301 |                          width = 0.25) +
302 |             geom_boxplot() +
303 |             geom_jitter(aes(color = pair), width = 0.05, size = 3) +
304 |             scale_x_discrete(limits = levelsPlot) +
305 |             theme(legend.position = "none",
306 |                   plot.margin = margin(t = .5,  # Top margin
307 |                                        r = .5,  # Right margin
308 |                                        b = .5,  # Bottom margin
309 |                                        l = 1.5,  # Left margin
310 |                                        unit = "cm"
311 |                   )) +
312 |             theme(axis.text.x = element_text(
313 |                 angle = 45,
314 |                 vjust = 1,
315 |                 size = 10,
316 |                 hjust = 1
317 |             )) +
318 |             scale_color_manual(values = fillColor) +
319 |             labs(x = element_blank(),
320 |                  y = "Aitchison Distance") 
321 |     }
322 |     
323 |     
324 |     
325 |     if(repeatSamples == FALSE & plotPlease == TRUE){
326 |         temp <- temp %>%
327 |             ggplot(aes(y = distance, x = pair)) +
328 |             geom_bar(aes(y = distance, fill = pair), stat = "identity") +
329 |             geom_text(aes(label = round(distance,1)), vjust = -0.5) +
330 |             scale_x_discrete(limits = levelsPlot) +
331 |             theme(legend.position = "none") +
332 |             theme(axis.text.x = element_text(angle = 45, vjust = 1, 
333 |                                              size = 10, hjust = 1)) +
334 |             scale_fill_manual(values = fillColor) +
335 |             labs(x = element_blank(),
336 |                  y = "Aitchison Distance"
337 |             )
338 |     }
339 |     
340 |     
341 |     return(temp)
342 | }
343 | 
344 | 
345 | 
346 | 
347 | 


--------------------------------------------------------------------------------
/extract_taxa_TITAN.R:
--------------------------------------------------------------------------------
  1 | require(dplyr)
  2 | require(tibble)
  3 | require(data.table)
  4 | 
  5 | ## --- Make some nice tables for each TITAN ------------------------------
  6 | 
  7 | ## For use with basic site x species/genus table, e.g. with mushrooms collected in the field.
  8 | 
  9 | extract.titan.taxa <-
 10 |     function(titan.out,
 11 |              purity.cutoff = 0.9,
 12 |              reliability.cutoff = 0.9,
 13 |              taxa.label.name = "Genus") {
 14 |         titan.out <- titan.out
 15 |         
 16 |         titan.out.filtered <- titan.out$sppmax %>%
 17 |             as.data.frame() %>%
 18 |             rownames_to_column()
 19 |         
 20 |         titan.out.filtered <-
 21 |             titan.out.filtered[titan.out.filtered$purity >= purity.cutoff &
 22 |                                    titan.out.filtered$reliability >= reliability.cutoff, ]
 23 |         titan.out.filtered <-
 24 |             titan.out.filtered[order(titan.out.filtered$zscore, decreasing = T), ] %>%
 25 |             mutate(zgrp = ifelse(maxgrp == 1, "z-", "z+"))
 26 |         
 27 |         
 28 |         titan.out.taxonomy.summary <-
 29 |             group_by(titan.out.filtered, zgrp, rowname) %>%
 30 |             summarise(
 31 |                 mean.zscore = mean(zscore),
 32 |                 mean.purity = mean(purity),
 33 |                 mean.reliability = mean(reliability),
 34 |                 mean.zenv.cp = mean(zenv.cp),
 35 |                 mean.5pct.cp = mean(`5%`),
 36 |                 mean.95pct.cp = mean(`95%`)
 37 |             )
 38 |         
 39 |         
 40 |         colnames(titan.out.taxonomy.summary)[1:ncol(titan.out.taxonomy.summary)] <-
 41 |             c(
 42 |                 "Decreasing/Increasing Z Taxa",
 43 |                 taxa.label.name,
 44 |                 "Mean Z Score",
 45 |                 "Mean Purity",
 46 |                 "Mean Reliability",
 47 |                 "Mean Env CP z-max",
 48 |                 "Mean 5% CP",
 49 |                 "Mean 95% CP"
 50 |             )
 51 |         
 52 |         return(ungroup(titan.out.taxonomy.summary))
 53 |         
 54 |     }
 55 | 
 56 | 
 57 | 
 58 | 
 59 | ## For use with NGS data, could be made to work with phyloseq etc.
 60 | 
 61 | # titan.out = I5CL.titan.pctN
 62 | # taxonomy.table = raw_dada2_taxa
 63 | 
 64 | 
 65 | extract.titan.taxa.NGS <-
 66 |     function(titan.out,
 67 |              taxonomy.table,
 68 |              taxonomy.table.merge = "taxa.label.unique",
 69 |              taxa.level = Genus,
 70 |              purity.cutoff = 0.90,
 71 |              reliability.cutoff = 0.90,
 72 |              taxa.label.name = "taxa.label.unique",
 73 |              label = label) {
 74 |         titan.out <- titan.out
 75 |         taxa.level <- enquo(taxa.level)
 76 |         label <- enquo(label)
 77 |         taxonomy.table <- taxonomy.table
 78 |         
 79 |         titan.out.filtered <- titan.out$sppmax %>%
 80 |             as.data.frame()
 81 |         titan.out.filtered[, taxa.label.name] <-
 82 |             rownames(titan.out.filtered)
 83 |         titan.out.filtered <-
 84 |             titan.out.filtered[titan.out.filtered$purity >= purity.cutoff &
 85 |                                    titan.out.filtered$reliability >= reliability.cutoff, ]
 86 |         titan.out.filtered <-
 87 |             titan.out.filtered[order(titan.out.filtered$zscore, decreasing = T), ]
 88 |         titan.out.filtered <-
 89 |             merge(
 90 |                 titan.out.filtered,
 91 |                 taxonomy.table,
 92 |                 by.x = taxa.label.name,
 93 |                 by.y = taxonomy.table.merge,
 94 |                 sort = F
 95 |             ) %>%
 96 |             mutate(zgrp = ifelse(maxgrp == 1, "z-", "z+"))
 97 |         
 98 |         
 99 |         titan.out.taxonomy.summary <-
100 |             group_by(titan.out.filtered, zgrp, !!taxa.level) %>%
101 |             summarise(
102 |                 mean.zscore = mean(zscore),
103 |                 mean.purity = mean(purity),
104 |                 mean.reliability = mean(reliability),
105 |                 mean.zenv.cp = mean(zenv.cp),
106 |                 mean.5pct.cp = mean(`5%`),
107 |                 mean.95pct.cp = mean(`95%`),
108 |                 count = n(),
109 |                 svs = paste(unique(!!label), collapse = ";")
110 |             )
111 |         
112 |         colnames(titan.out.taxonomy.summary)[1] <-
113 |             "Decreasing/Increasing Z Taxa"
114 |         colnames(titan.out.taxonomy.summary)[3:ncol(titan.out.taxonomy.summary)] <-
115 |             c(
116 |                 "Mean Z Score",
117 |                 "Mean Purity",
118 |                 "Mean Reliability",
119 |                 "Mean Env CP z-max",
120 |                 "Mean 5% CP",
121 |                 "Mean 95% CP",
122 |                 "Count",
123 |                 "All SVs or OTUs"
124 |             )
125 |         
126 |         return(ungroup(titan.out.taxonomy.summary))
127 |         
128 |     }
129 | 
130 | 
131 | 
132 | 
133 | 
134 | ## -- For cell filler for table assembler ---------------------
135 | 
136 | # this needs to fill cells for EITHER z1 or z2 (z- or z+ taxa)
137 | 
138 | # taxa.col.name <- quo(Genus)
139 | # titan.taxa.tables <- list(NGS.TITAN.cnd.Genus, NGS.TITAN.sesr.Genus)
140 | # table.col.names <- c("cnd", "sesr")
141 | # z.score.pct.cutoff <- .5
142 | # round.val <- 3
143 | # which.z = "z-"
144 | # taxon.list = all.z1.taxon
145 | # j = 3
146 | 
147 | 
148 | cell.filler <-
149 |     function(titan.taxa.tables,
150 |              which.z,
151 |              taxon.list,
152 |              taxa.col.name,
153 |              table.col.names,
154 |              z.score.pct.cutoff,
155 |              round.val) {
156 |         # create the table framework...
157 |         
158 |         taxa.comparison.table <- data.table(`All Taxon` = taxon.list,
159 |                                             `Z type` = rep(which.z, length(taxon.list)))
160 |         
161 |         
162 |         # ...and create the columns for the data to go into.
163 |         
164 |         for (i in 1:length(titan.taxa.tables)) {
165 |             taxa.comparison.table[, table.col.names[i]] <-
166 |                 rep("", length(taxon.list))
167 |             
168 |         }
169 |         
170 |         
171 |         
172 |         # Now fill everything.
173 |         
174 |         for (i in 1:length(titan.taxa.tables)) {
175 |             temp.table <- titan.taxa.tables[[i]]
176 |             temp.table <-
177 |                 temp.table[temp.table$`Decreasing/Increasing Z Taxa` == which.z ,]
178 |             
179 |             # create a cutoff based on user input.
180 |             z.score.percentile <-
181 |                 quantile(temp.table$`Mean Z Score`, z.score.pct.cutoff)
182 |             
183 |             
184 |             
185 |             # iterate through each taxon in your taxon.list...
186 |             for (j in 1:length(taxon.list)) {
187 |                 check.list <- unlist(select(temp.table, !!taxa.col.name))
188 |                 
189 |                 # is this taxon in the titan output of the ith table?
190 |                 if (taxon.list[j] %in% check.list) {
191 |                     # if yes, then give it some information like if it is a z- or z+ taxa, z score, change point
192 |                     
193 |                     # isolate the info for the right genus
194 |                     temp.row <-
195 |                         temp.table[select(temp.table, !!taxa.col.name) == taxon.list[j] &
196 |                                        !(is.na(select(
197 |                                            temp.table, !!taxa.col.name
198 |                                        ))), ]
199 |                     
200 |                     # bold the z score if it's above the cutoff
201 |                     z.score.text <-
202 |                         ifelse(
203 |                             temp.row[, "Mean Z Score"] >= z.score.percentile ,
204 |                             yes = paste0(
205 |                                 "**",
206 |                                 temp.row[, "Mean Z Score"] %>% round(digits = round.val),
207 |                                 "**"
208 |                             ),
209 |                             no = paste0(temp.row[, "Mean Z Score"])
210 |                         )
211 |                     
212 |                     text <-
213 |                         paste0("Z-score: ",
214 |                                z.score.text,
215 |                                "; CP: ",
216 |                                temp.row[, "Mean Env CP z-max"] %>% round(round.val))
217 |                     
218 |                     taxa.comparison.table[j , i + 2] <- text
219 |                     
220 |                     
221 |                 }
222 |                 
223 |                 # if not R will just leave it blank.
224 |                 
225 |                 
226 |             }
227 |         }
228 |         
229 |         return(taxa.comparison.table)
230 |         
231 |     }
232 | 
233 | ## --- Multivariable summary table assembly ----------------------
234 | 
235 | combine.titan.results <-
236 |     function(titan.taxa.tables,
237 |              taxa.col.name,
238 |              table.col.names,
239 |              z.score.pct.cutoff = .5,
240 |              round.val = 3) {
241 |         taxa.col.name <- enquo(taxa.col.name)
242 |         
243 |         all.z1.taxon <- c()
244 |         all.z2.taxon <- c()
245 |         
246 |         
247 |         # create z- and z+ separately b/c some taxa will have both (esp. for higher taxonomic levels.)
248 |         
249 |         # create a list of all taxonomic groups for z- taxon
250 |         for (i in 1:length(titan.taxa.tables)) {
251 |             temp.z1.table <-
252 |                 titan.taxa.tables[[i]] %>% subset(`Decreasing/Increasing Z Taxa` == "z-")
253 |             
254 |             all.z1.taxon <-
255 |                 c(all.z1.taxon,
256 |                   select(temp.z1.table, !!taxa.col.name) %>% unlist())
257 |             
258 |         }
259 |         
260 |         # create a list of all taxonomic groups for z+ taxon
261 |         for (i in 1:length(titan.taxa.tables)) {
262 |             temp.z2.table <-
263 |                 titan.taxa.tables[[i]] %>% subset(`Decreasing/Increasing Z Taxa` == "z+")
264 |             
265 |             all.z2.taxon <-
266 |                 c(all.z2.taxon,
267 |                   select(temp.z2.table, !!taxa.col.name) %>% unlist())
268 |             
269 |         }
270 |         
271 |         
272 |         # make that only uniques, and remove NA
273 |         
274 |         all.z1.taxon <-
275 |             all.z1.taxon[!(is.na(all.z1.taxon))] %>% unique()
276 |         all.z1.taxon <- all.z1.taxon[order(all.z1.taxon)]
277 |         
278 |         
279 |         all.z2.taxon <-
280 |             all.z2.taxon[!(is.na(all.z2.taxon))] %>% unique()
281 |         all.z2.taxon <- all.z2.taxon[order(all.z2.taxon)]
282 |         
283 |         
284 |         
285 |         
286 |         # and now iterate through to put text in each slot.
287 |         
288 |         summary.table.z1 <-
289 |             cell.filler(
290 |                 titan.taxa.tables = titan.taxa.tables,
291 |                 which.z = "z-",
292 |                 taxon.list = all.z1.taxon,
293 |                 taxa.col.name = taxa.col.name,
294 |                 table.col.names = table.col.names,
295 |                 z.score.pct.cutoff = z.score.pct.cutoff,
296 |                 round.val = round.val
297 |             )
298 |         
299 |         summary.table.z2 <-
300 |             cell.filler(
301 |                 titan.taxa.tables = titan.taxa.tables,
302 |                 which.z = "z+",
303 |                 taxon.list = all.z2.taxon,
304 |                 taxa.col.name = taxa.col.name,
305 |                 table.col.names = table.col.names,
306 |                 z.score.pct.cutoff = z.score.pct.cutoff,
307 |                 round.val = round.val
308 |             )
309 |         
310 |         
311 |         # smash the two resulting tables together
312 |         
313 |         summary.table <- rbind(summary.table.z1, summary.table.z2)
314 |         
315 |         # alphebetize
316 |         
317 |         summary.table <-
318 |             summary.table[order(summary.table$`All Taxon`),]
319 |         
320 |         
321 |         return(summary.table)
322 |         
323 |         
324 |     }
325 | 


--------------------------------------------------------------------------------
/group_PERMANOVA.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Function for sequentially testing multiple variables in PERMANOVA, then
  3 | ## adjusting pseudo-F values. Also calculates multivariate dispersion for
  4 | ## categorical variables.
  5 | 
  6 | require(vegan)
  7 | require(tibble)
  8 | 
  9 | # First, a quick function to calculate omega squared based on:
 10 |     # https://academic.oup.com/bioinformatics/article/31/15/2461/188732
 11 |     # http://www.real-statistics.com/multiple-regression/other-measures-effect-size-anova/ 
 12 |     # http://psych.colorado.edu/~willcutt/pdfs/Olejnik_2003.pdf if you want to make this generalized...
 13 |     # https://gist.github.com/arnoud999/e677516ed45e9a11817e for some r code of generalization
 14 | 
 15 | PERMANOVA.omega2 <- function(adonis2.object, num.control.vars) {
 16 |     
 17 | 
 18 |     SSE <- adonis2.object[ 1 + num.control.vars, "SumOfSqs"] 
 19 |     dfE <- adonis2.object[ 1 + num.control.vars, "Df"]
 20 |     MSE <- adonis2.object["Residual", "SumOfSqs"]/adonis2.object["Residual", "Df"]      # Mean Square for Error
 21 |     SST <- sum(adonis2.object$SumOfSqs)
 22 |     
 23 |     omega2 <- (SSE - dfE*MSE) / (SST + MSE)
 24 |     
 25 |     return(omega2)
 26 |     
 27 | }
 28 | 
 29 | 
 30 | ## Note that I have some defaults for adonis2: two processors (parallel = 2),
 31 | ## 99999 permutations, and using the bray-curtis distance matrix (method =
 32 | ## "bray")
 33 | 
 34 | ## Note also that this does not test for interaction effects.
 35 | 
 36 | ## Variable definitions:
 37 |      ## var.names should be a vector of column names, one for each variable to be tested
 38 |           # Might consider adding functionality to take column names or position numbers, but for now...
 39 |      ## var.table is the table where the independent variables are.
 40 |      ## var.table.c is a string of the table name: eventually generate this automatically.
 41 |      ## species.table is the species/site matrix (e.g. created by matrify)
 42 |      ## species.table.c is a string of the table name: eventually generate this automatically.
 43 |      ## control.vars are any variables that need to be included before testing variables--character string, e.g. "var1 + var2"
 44 |      ## num.control.vars specifies the number of control variables used. Replace this later with something deriving it from control.vars text...
 45 |      ## by.adonsi2 is to pass through for the by = argument in adonis2
 46 | 
 47 | 
 48 | group.PERMANOVA <- function(var.names, var.table, var.table.c, control.vars = "", species.table, species.table.c, num.control.vars = 0, by.adonis2 = "terms", AIC.type = "AICc", perms = 99999, method = "bray") {
 49 | 
 50 |      ## need to order columns otherwise the order of the columns and the order of col.numbers is incorrect.
 51 | 
 52 |      var.table <- var.table[ , order(colnames(var.table))]
 53 |      var.names <- var.names[order(var.names)]
 54 | 
 55 | 
 56 | 
 57 |      output <- tibble(var.names = var.names,
 58 |                                          var.explnd = rep(NA, times = length(var.names)),
 59 |                                          avg.var.explnd = rep(NA),
 60 |                                          pseudo.F = rep(NA),
 61 |                                          F.pval = rep(NA),
 62 |                                          adj.F.pval = rep(NA),
 63 |                                          disp.F.pval = rep(NA),
 64 |                                          adj.disp.F.pval = rep(NA),
 65 |                                          omega2 = rep(NA),
 66 |                                          AIC.stat = rep(NA)
 67 |                               )
 68 |      col.numbers <- which(colnames(var.table) %in% var.names)
 69 |      
 70 |      if (num.control.vars > 0) {
 71 |          control.vars <- paste0(control.vars, " +")
 72 |      }
 73 |      
 74 | 
 75 | 
 76 |      ## Populate the output table.
 77 | 
 78 |      for (i in 1:length(var.names)) {
 79 | 
 80 |         ## No parallel argument--while it would be nice, 2.5.1 vegan + parallel
 81 |         ## update fubar'd something and taking it out was the easiest way to fix
 82 |         ## it. 
 83 |          
 84 |          temp <-
 85 |              adonis2(
 86 |                  formula = as.formula(paste(
 87 |                      species.table.c, "~", control.vars, var.names[i]
 88 |                  )),
 89 |                  permutations = perms,
 90 |                  method = method,
 91 |                  by = by.adonis2,
 92 |                  data = var.table
 93 |              )
 94 | 
 95 |           output$var.explnd[i] <- temp$SumOfSqs[1 + num.control.vars] / 
 96 |                                         temp$SumOfSqs[length(temp$SumOfSqs)]
 97 |           output$avg.var.explnd[i] <- output$var.explnd[i] / temp$Df[1 + num.control.vars]
 98 |           output$omega2[i] <- PERMANOVA.omega2(temp, num.control.vars)
 99 |           output$pseudo.F[i] <- temp$F[num.control.vars + 1]
100 |           output$F.pval[i] <- temp$`Pr(>F)`[num.control.vars + 1]
101 |           output$AIC.stat[i] <- AICc.PERMANOVA2(temp)[[AIC.type]]
102 | 
103 | 
104 | 
105 |                # the ordering is so that col.names[i] works!
106 | 
107 |           if (is.character(var.table[[col.numbers[i]]]) == TRUE |
108 |               is.factor(var.table[[col.numbers[i]]]) == TRUE) {
109 |               
110 |               if (class(species.table) == "dist") {
111 |                   temp.disp <-
112 |                       anova(betadisper(
113 |                           species.table,
114 |                           as.factor(var.table[[col.numbers[i]]])
115 |                       ))
116 |                   
117 |                   output$disp.F.pval[i] <- temp.disp$`Pr(>F)`[1]
118 |                   
119 |               }
120 |               
121 |               else {temp.disp <-
122 |                   anova(betadisper(
123 |                       vegdist(species.table, method = method),
124 |                       as.factor(var.table[[col.numbers[i]]])
125 |                   ))
126 |               
127 |               output$disp.F.pval[i] <- temp.disp$`Pr(>F)`[1]
128 |               }
129 |           }
130 |           
131 |      }
132 |      
133 |      output$adj.F.pval <-
134 |          p.adjust(p = output$F.pval, method = "holm")
135 |      output$adj.disp.F.pval <-
136 |          p.adjust(p = output$disp.F.pval, method = "holm")
137 |      output$AIC.delta <- 
138 |          output$AIC.stat - min(output$AIC.stat)
139 | 
140 | return(output)
141 |      
142 |      
143 |      
144 | }
145 | 
146 | 
147 | group.univ.PERMANOVA <- function(var.names, var.table, var.table.c, control.vars = "", species.vector, species.vector.c, num.control.vars = 0, by.adonis2 = "terms", perms = 99999, method = "euclid", AIC.type ="AICc") {
148 |     
149 |     ## need to order columns otherwise the order of the columns and the order of col.numbers is incorrect.
150 |     
151 |     var.table <- var.table[ , order(colnames(var.table))]
152 |     var.names <- var.names[order(var.names)]
153 |     
154 |     
155 |     
156 |     output <- tibble(var.names = var.names,
157 |                          var.explnd = rep(NA, times = length(var.names)),
158 |                          avg.var.explnd = rep(NA),
159 |                          pseudo.F = rep(NA),
160 |                          F.pval = rep(NA),
161 |                          adj.F.pval = rep(NA),
162 |                          omega2 = rep(NA),
163 |                          AIC.stat = rep(NA)
164 |     )
165 |     col.numbers <- which(colnames(var.table) %in% var.names)
166 |     
167 |     if (num.control.vars > 0) {
168 |         control.vars <- paste0(control.vars, " +")
169 |     }
170 |     
171 |     ## Attempting to get the names from the tables
172 |     #var.table.t <- deparse(substitute(var.table))
173 | 
174 |     
175 |     
176 |     ## Populate the output table.
177 |     
178 |     for (i in 1:length(var.names)) {
179 |         
180 |         ## No parallel argument--while it would be nice, 2.5.1 vegan + parallel
181 |         ## update fubar'd something and taking it out was the easiest way to fix
182 |         ## it. 
183 |         
184 |         temp <-
185 |             adonis2(
186 |                 formula = as.formula(paste(
187 |                     species.vector.c, "~", control.vars, var.names[i]
188 |                 )),
189 |                 permutations = perms,
190 |                 method = method,
191 |                 by = by.adonis2,
192 |                 data = var.table
193 |             )
194 |         
195 |         output$var.explnd[i] <- temp$SumOfSqs[1 + num.control.vars] / temp$SumOfSqs[length(temp$SumOfSqs)]
196 |         output$avg.var.explnd[i] <- output$var.explnd[i] / temp$Df[1 + num.control.vars]
197 |         output$omega2[i] <- PERMANOVA.omega2(temp, num.control.vars)
198 |         output$pseudo.F[i] <- temp$F[num.control.vars + 1]
199 |         output$F.pval[i] <- temp$`Pr(>F)`[num.control.vars + 1]
200 |         output$AIC.stat[i] <- AICc.PERMANOVA2(temp)[[AIC.type]]
201 |         
202 |         
203 |         
204 |         # the ordering is so that col.names[i] works!
205 |         
206 | 
207 |     }
208 |     
209 |     output$adj.F.pval <- p.adjust(p = output$F.pval, method = "holm")
210 |     output$delta.aic <- output$AIC.stat - min(output$AIC.stat)
211 | 
212 | 
213 |     return(output)
214 |     
215 |     
216 |     
217 | }
218 | 
219 | 
220 | 
221 | ## with time, test:
222 |     #             paste0("\"", sample.covariates, "\"" )
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 


--------------------------------------------------------------------------------
/group_PERMANOVA_wP.R.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Function for sequentially testing multiple variables in PERMANOVA, then
  3 | ## adjusting pseudo-F values. Also calculates multivariate dispersion for
  4 | ## categorical variables.
  5 | 
  6 | # First, a quick function to calculate omega squared based on:
  7 |     # https://academic.oup.com/bioinformatics/article/31/15/2461/188732
  8 |     # http://www.real-statistics.com/multiple-regression/other-measures-effect-size-anova/ 
  9 |     # http://psych.colorado.edu/~willcutt/pdfs/Olejnik_2003.pdf if you want to make this generalized...
 10 |     # https://gist.github.com/arnoud999/e677516ed45e9a11817e for some r code of generalization
 11 | 
 12 | PERMANOVA.omega2 <- function(adonis2.object, num.control.vars) {
 13 |     
 14 | 
 15 |     SSE <- adonis2.object[ 1 + num.control.vars, "SumOfSqs"] 
 16 |     dfE <- adonis2.object[ 1 + num.control.vars, "Df"]
 17 |     MSE <- adonis2.object["Residual", "SumOfSqs"]/adonis2.object["Residual", "Df"]      # Mean Square for Error
 18 |     SST <- sum(adonis2.object$SumOfSqs)
 19 |     
 20 |     omega2 <- (SSE - dfE*MSE) / (SST + MSE)
 21 |     
 22 |     return(omega2)
 23 |     
 24 | }
 25 | 
 26 | 
 27 | 
 28 | ## Note that I have some defaults for adonis2: two processors (parallel = 2),
 29 | ## 99999 permutations, and using the bray-curtis distance matrix (method =
 30 | ## "bray")
 31 | 
 32 | ## Note also that this does not test for interaction effects.
 33 | 
 34 | ## Variable definitions:
 35 |      ## var.names should be a vector of column names, one for each variable to be tested
 36 |           # Might consider adding functionality to take column names or position numbers, but for now...
 37 |      ## var.table is the table where the independent variables are.
 38 |      ## var.table.c is a string of the table name: eventually generate this automatically.
 39 |      ## species.table is the species/site matrix (e.g. created by matrify)
 40 |      ## species.table.c is a string of the table name: eventually generate this automatically.
 41 |      ## control.vars are any variables that need to be included before testing variables--character string, e.g. "var1 + var2"
 42 |      ## num.control.vars specifies the number of control variables used. Replace this later with something deriving it from control.vars text...
 43 |      ## by.adonsi2 is to pass through for the by = argument in adonis2
 44 | 
 45 | 
 46 | group.PERMANOVA <- function(var.names, var.table, var.table.c, control.vars = "", species.table, species.table.c, num.control.vars = 0, by.adonis2 = "terms", perms = 99999) {
 47 | 
 48 |      ## need to order columns otherwise the order of the columns and the order of col.numbers is incorrect.
 49 | 
 50 |      var.table <- var.table[ , order(colnames(var.table))]
 51 |      var.names <- var.names[order(var.names)]
 52 | 
 53 | 
 54 | 
 55 |      output <- data.frame(row.names = var.names,
 56 |                                          var.explnd = rep(NA, times = length(var.names)),
 57 |                                          avg.var.explnd = rep(NA),
 58 |                                          pseudo.F = rep(NA),
 59 |                                          F.pval = rep(NA),
 60 |                                          adj.F.pval = rep(NA),
 61 |                                          disp.F.pval = rep(NA),
 62 |                                          adj.disp.F.pval = rep(NA)
 63 |                               )
 64 |      col.numbers <- which(colnames(var.table) %in% var.names)
 65 | 
 66 | 
 67 |      ## Attempting to get the names from the tables
 68 |      #var.table.t <- as.character(quote(var.table, env = ))
 69 |      #species.table.t <- as.character(var.table)
 70 | 
 71 | 
 72 |      ## Populate the output table.
 73 | 
 74 |      for (i in 1:length(var.names)) {
 75 | 
 76 | 
 77 |           temp <- adonis2(formula = as.formula(paste(species.table.c, "~", control.vars, "+", var.names[i])),
 78 |                           parallel = 2, permutations = perms, method = "bray", by = by.adonis2, data = var.table)
 79 | 
 80 |           output$var.explnd[i] <- temp$SumOfSqs[1 + num.control.vars] / sum(temp$SumOfSqs)
 81 |           output$avg.var.explnd[i] <- output$var.explnd[i] / temp$Df[1 + num.control.vars]
 82 |           output$omega2[i] <- PERMANOVA.omega2(temp, num.control.vars)
 83 |           output$pseudo.F[i] <- temp$F[num.control.vars + 1]
 84 |           output$F.pval[i] <- temp$`Pr(>F)`[num.control.vars + 1]
 85 |           
 86 | 
 87 | 
 88 |                # the ordering is so that col.names[i] works!
 89 | 
 90 |                if (is.character(var.table[[col.numbers[i]]]) == TRUE | is.factor(var.table[[col.numbers[i]]]) == TRUE) {
 91 | 
 92 |                     temp.disp <- anova(betadisper(vegdist(species.table, method = "bray"),
 93 |                                                   as.factor(var.table[[col.numbers[i]]])))
 94 | 
 95 |                     output$disp.F.pval[i] <- temp.disp$`Pr(>F)`[1]
 96 | 
 97 |                }
 98 | 
 99 |      }
100 | 
101 |      output$adj.F.pval <- p.adjust(p = output$F.pval, method = "holm")
102 |      output$adj.disp.F.pval <- p.adjust(p = output$disp.F.pval, method = "holm")
103 | 
104 | return(output)
105 |      
106 |      
107 |      
108 | }
109 | 
110 | 
111 | ## with time, test:
112 |     #             paste0("\"", sample.covariates, "\"" )
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/group_lmPerm.R:
--------------------------------------------------------------------------------
 1 | ## group lmPerm
 2 | 
 3 | require(lmPerm)
 4 | require(AICcmodavg)
 5 | 
 6 | 
 7 | group.lmp <- function(group.vars, # should be character string
 8 |                       var.table.name, # should be character string
 9 |                       dependent.var, #should be character string
10 |                       control.var = NULL,
11 |                       iter = 999999,
12 |                       Ca = .01
13 | 
14 | ) {
15 | 
16 | 
17 |     lmp.results <- data.frame(row.names = group.vars,
18 |                                   estimate = rep(NA, times = length(group.vars)),
19 |                                   var.explnd = rep(NA),
20 |                                   # avg.var.explnd = rep(NA),
21 |                                   F.stat.lmp = rep(NA),
22 |                                   prob.pval = rep(NA),
23 |                                   prob.iter = rep(NA),
24 |                                   adj.pval = rep(NA),
25 |                                   AICc.lm = rep(NA)
26 |     )
27 | 
28 |     if (!is.null(control.var)) {control.var <- paste0(control.var, "+")}
29 | 
30 | 
31 |     for (i in 1:length(group.vars)) {
32 | 
33 |         lmp.temp <- lmp(as.formula(paste0(dependent.var, " ~ ",
34 |                                           control.var,
35 |                                           var.table.name, "$", group.vars[i])),
36 |                         model = TRUE, seqs = TRUE, x = TRUE, y = TRUE,
37 |                         center = FALSE, maxIter = iter, Ca = Ca)
38 |         summary.lmp.temp <- lmPerm::summary.lmp(lmp.temp)
39 |         lm.temp <- lm(as.formula(paste0(dependent.var, " ~ ", var.table.name, "$",
40 |                                                group.vars[i])))
41 | 
42 | 
43 |         # add calculated values to table:
44 |         lmp.results$estimate[i] <- ifelse(length(summary.lmp.temp$coefficients[,1]) > 2, NA,
45 |                                               summary.lmp.temp$coefficients[2,1])
46 |         lmp.results$var.explnd[i] <- summary.lmp.temp$adj.r.squared
47 | 
48 | 
49 |         lmp.results$F.stat.lmp[i] <- summary.lmp.temp$fstatistic["value"]
50 |         lmp.results$prob.pval[i] <- summary.lmp.temp$coefficients[2,3]
51 |         lmp.results$prob.iter[i] <- summary.lmp.temp$coefficients[1,2]
52 |         lmp.results$AICc.lm[i] <- AICc(lm.temp)[1]
53 | 
54 | 
55 |         }
56 | 
57 | lmp.results$adj.pval <- p.adjust(p = lmp.results$prob.pval, method = "holm")
58 | 
59 | 
60 | 
61 | return(lmp.results)
62 | 
63 | 
64 | }
65 | 
66 | 


--------------------------------------------------------------------------------
/group_lmer.R:
--------------------------------------------------------------------------------
 1 | ## group lmPerm
 2 | 
 3 | # based on http://www.utstat.toronto.edu/~brunner/workshops/mixed/NormalWithR.pdf#:~:text=Repeated%20measures%20analysis%20with%20R%20Summary%20for%20experienced,to%20the%20model%20for%20the%20random%20subject%20effect.
 4 | # with information from https://stats.stackexchange.com/questions/7240/proportion-of-explained-variance-in-a-mixed-effects-model?rq=1
 5 | 
 6 | 
 7 | 
 8 | require(lme4) #
 9 | require(car)  # car also has vif function for multivariate models...
10 | require(MuMIn)
11 | 
12 | 
13 | group.lmer <- function(var.names, # should be character string
14 |                       in.data, 
15 |                       dependent.var, #should be character string
16 |                       control.var = NULL,
17 |                       random.subject.effect = NULL # should be character string in the format 
18 |                                                    # (1 | site)
19 | 
20 | ) {
21 | 
22 |     # Want to capture MSE, F, Pr(>F), estimate and standard error, AICc
23 |     
24 |     lmer.results <- data.frame(row.names = var.names,
25 |                                var.names = var.names,
26 |                                   slope.estimate = rep(NA, times = length(var.names)),
27 |                                   std.error = rep(NA),
28 |                                   Mean.Sq = rep(NA),
29 |                                   F.stat = rep(NA),
30 |                                   prob.pval = rep(NA),
31 |                                   adj.pval = rep(NA),
32 |                                   AICc = rep(NA),
33 |                                   AIC.REML = rep(NA),
34 |                                   logLik = rep(NA)
35 |     )
36 | 
37 |     if (!is.null(control.var)) {control.var <- paste0(control.var, "+")}
38 |     if (!is.null(random.subject.effect)) {random.subject.effect <- 
39 |                                                 paste0("+ ", random.subject.effect)}
40 |     
41 | i=1
42 |     for (i in 1:length(var.names)) {
43 | 
44 |         lmer.temp <- lmer(formula = as.formula(paste0(dependent.var, " ~ ",
45 |                                           control.var,
46 |                                           var.names[i],
47 |                                           random.subject.effect
48 |                                           )),
49 |                         data = in.data)
50 |         cT.temp <- MuMIn::coefTable(lmer.temp)
51 |         indx <- (2 + length(control.var))
52 | 
53 |         # add calculated values to table:
54 |         lmer.results$slope.estimate[i] <- cT.temp[indx, 1]
55 |         lmer.results$std.error[i] <- cT.temp[indx, 2]
56 |         
57 |         lmer.results$Mean.Sq[i] <- anova(lmer.temp)[var.names[i],3]
58 | 
59 |         lmer.results$F.stat[i] <- car::Anova(lmer.temp, test = "F")[var.names[i],"F"]
60 |         lmer.results$prob.pval[i] <- car::Anova(lmer.temp, test = "F")[var.names[i],"Pr(>F)"]
61 |         
62 |         
63 |         lmer.results$AICc[i] <- AICc(lmer.temp)[1] # this is for the model...
64 |         lmer.results$AIC.REML[i] <- lme4::llikAIC(lmer.temp)[[2]]
65 |         lmer.results$logLik[i] <- lme4::llikAIC(lmer.temp)[[1]]
66 | 
67 |         }
68 | 
69 | 
70 | lmer.results$adj.pval <- p.adjust(p = lmer.results$prob.pval, method = "holm")
71 | 
72 | return(lmer.results)
73 | 
74 | 
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/repeat_multipatt.R:
--------------------------------------------------------------------------------
  1 | ## This is a function that runs multipatt a specified number of times, then outputs two key things:
  2 | 
  3 |         # 1. A summary table of the species over 100 runs and
  4 |         # 2. A plot showing the different indicator species made in ggplot (OPTIONAL)
  5 | 
  6 | # Reminder to user: check that matrix site order and cluster order are the same
  7 | 
  8 | # repeat.multipatt(matrix.name = matrify.shrub.bysite, cluster.name = gc.cluster$gc.group.3, plot.please = FALSE, freq.cutoff = .1)
  9 | # repeat.multipatt(matrix.name = matrify.tree.species, cluster.name = tree.abs.cluster$tree.abs, func.name = "r.g")
 10 | # repeat.multipatt(matrix.name = matrify.foraging,
 11 | #                  cluster.name = shrub.abs.cluster$shrub.abs,
 12 | #                  xlab.input = "Bird Incidence Indicator Species \nof shrub clusters")
 13 | 
 14 | 
 15 | 
 16 | 
 17 | repeat.multipatt <- function(repeats = 100, matrix.name, cluster.name, p.cutoff = .1, func.name = "IndVal.g", phi = FALSE, plot.please = TRUE, plot.colors = c("deeppink2", "cyan3", "yellowgreen", "gray 50", "violet"), freq.cutoff = .5, xlab.input = "Indicator Species \n(freq > 50%)", ylab.input = "Mean Indicator Value over 100 Runs", quiet = FALSE, stat.cutoff = .5, graph.stat.cutoff = .75) {
 18 | 
 19 |     library(ggplot2)
 20 |     library(dplyr)
 21 |     library(indicspecies)
 22 |     library(stringr)
 23 | 
 24 | 
 25 | if (freq.cutoff != .5) {
 26 | 
 27 |     print("You probably want to change the default Y axis label!")
 28 | 
 29 | }
 30 | 
 31 |     if (length(unique(cluster.name)) > 5) {
 32 | 
 33 |         stop("Please use fewer than 5 clusters (or edit the function).")
 34 |     }
 35 | 
 36 |     if (func.name == "r.g" & phi == TRUE & quiet == FALSE) {
 37 | 
 38 |             matrix.name <- as.data.frame(ifelse(matrix.name > 0, 1, 0))
 39 |             print("you are using the r.g function to calculate Pearson's phi coefficient of association.")
 40 |     }
 41 | 
 42 |     if (func.name == "r.g" & phi == FALSE & quiet == FALSE) {
 43 | 
 44 |         print("you are using the r.g function to calculate the point biserial correlation coefficient.")
 45 |     }
 46 | 
 47 | 
 48 | mp.sign.dump <- data.frame()
 49 | mp.AB.dump <- data.frame()
 50 | i <- 1
 51 | 
 52 | for (i in 1:repeats) {
 53 |     multipatt.out <- multipatt(x = matrix.name,
 54 |                               cluster = cluster.name, func = func.name)
 55 | 
 56 |     mp.sign <- multipatt.out$sign[complete.cases(multipatt.out$sign),]
 57 |     mp.sign <- mp.sign[mp.sign$p.value <= p.cutoff & mp.sign$stat >= stat.cutoff, ]
 58 |     mp.sign$species <- rownames(mp.sign)
 59 | 
 60 |         if ( length(multipatt.out$A) > 0 ) {
 61 |             mp.A <- as.data.frame(multipatt.out$A[complete.cases(multipatt.out$A), ] , make.row.names = FALSE)
 62 |             mp.A <- mp.A[which(rownames(mp.A) %in% mp.sign$species), ]
 63 |                 colnames(mp.A) <- paste("A.", colnames(mp.A), sep = "")
 64 | 
 65 | 
 66 |             mp.B <- as.data.frame(multipatt.out$B[complete.cases(multipatt.out$B), ], make.row.names = FALSE)
 67 |             mp.B <- mp.B[which(rownames(mp.B) %in% mp.sign$species), ]
 68 |                 colnames(mp.B) <- paste("B.", colnames(mp.B), sep = "")
 69 | 
 70 |             mp.AB <- cbind.data.frame(mp.A, mp.B)
 71 |             mp.AB$species <- rownames(mp.AB)
 72 | 
 73 |                 if (nrow(mp.AB) > 0) {
 74 |                     mp.AB$i <- i
 75 |                     }
 76 | 
 77 |             }
 78 | 
 79 |         if (nrow(mp.sign) > 0) {
 80 |             mp.sign$i <- i
 81 |             }
 82 | 
 83 |     mp.sign.dump <- rbind(mp.sign.dump, mp.sign, make.row.names = FALSE)
 84 | 
 85 | 
 86 |     if (func.name %in% c("IndVal", "IndVal.g")) {
 87 |             mp.AB.dump <- rbind(mp.AB.dump, mp.AB, make.row.names = FALSE)
 88 |         }
 89 | 
 90 |     i <- i + 1
 91 | }
 92 | 
 93 | ct <- FALSE
 94 | 
 95 | if (!(any(colnames(mp.sign.dump) == "s.1"))) {
 96 |     ct <- TRUE
 97 |     
 98 | colname.temp <- tibble(group = colnames(mp.sign.dump))
 99 | colname.temp$number <- 1:nrow(colname.temp)
100 | 
101 | colnames(mp.sign.dump)[1:sum(str_detect(colnames(mp.sign.dump), "s\\."))] <- 
102 |                 paste("s.", seq.int(from = 1, to = sum(str_detect(colnames(mp.sign.dump), "s\\."))), sep = "")
103 | 
104 | colname.temp <- colname.temp[str_detect(colname.temp$group, "s\\.") , ]
105 | colname.temp$groupname <- str_sub(colname.temp$group, 3, -1)
106 |     
107 |     }
108 | 
109 | 
110 | if (!(any(colnames(mp.sign.dump) == "s.3"))) {mp.sign.dump$s.3 = rep(0, times = length(mp.sign.dump$s.1))}
111 | if (!(any(colnames(mp.sign.dump) == "s.4"))) {mp.sign.dump$s.4 = rep(0, times = length(mp.sign.dump$s.1))}
112 | if (!(any(colnames(mp.sign.dump) == "s.5"))) {mp.sign.dump$s.5 = rep(0, times = length(mp.sign.dump$s.1))}
113 | 
114 | 
115 | # Make summary tables
116 | 
117 | mp.summary <- group_by(mp.sign.dump, species) %>%
118 |                 summarize(
119 |                            count.sp = n(),
120 |                            frequency.sp = round(count.sp/repeats, digits = 3),
121 |                            mean.stat = round(mean(stat), digits = 3),
122 |                            group = paste(
123 |                                           (if (any(s.1 == 1)) {"1."}),
124 |                                           (if (any(s.2 == 1)) {".2."}),
125 |                                           (if (any(s.3 == 1)) {".3."}),
126 |                                           (if (any(s.4 == 1)) {".4."}),
127 |                                           (if (any(s.5 == 1)) {".5"}),
128 |                                                  sep = ""),
129 |                            min.p.val = min(p.value),
130 |                            max.p.val = max(p.value),
131 |                            all.p.vals = paste(p.value, collapse = ", ")
132 |                            )
133 | 
134 | mp.summary$group <- ifelse(grepl("[1-9]..[1-9]", mp.summary$group),
135 |                            gsub(pattern = "\\.\\.", replacement = " & ", mp.summary$group),
136 |                            mp.summary$group)
137 | 
138 | 
139 | if(ct == TRUE & nrow(mp.summary) > 0) {
140 |     
141 |     mp.summary$groupname <- mp.summary$group
142 |     mp.summary$groupname <- gsub(pattern = "\\.", replacement = "", mp.summary$group)
143 |     g = 1
144 |         for (g in 1:max(colname.temp$number)) {
145 |             
146 |                 mp.summary$groupname <- gsub(x = mp.summary$groupname,
147 |                                              pattern = as.character(g),
148 |                                              replacement = colname.temp$groupname[g])
149 |             
150 |             }
151 |     
152 | }
153 | 
154 | if(ct == TRUE & nrow(mp.summary) == 0) {
155 |     
156 |     warning("There are no species returned")
157 |     
158 |     mp.summary[1,1] <- "No Species Returned"
159 | }
160 | 
161 | 
162 | mp.summary$group <- paste("Group", gsub(pattern = "\\.", replacement = "", mp.summary$group), sep = " ")
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | if (func.name %in% c("IndVal", "IndVal.g")) {
170 |         mp.AB.summary  <- group_by(mp.AB.dump, species) %>%
171 |                             summarize_all(.funs = list(mean = mean))
172 |             mp.AB.summary$i <- NULL
173 | 
174 |         mp.AB.summary <- cbind(mp.AB.summary,
175 |                                group_by(mp.AB.dump, species) %>%
176 |                                 summarize(
177 |                                     count.sp = n(),
178 |                                     frequency.sp = round(count.sp/max(i), digits = 3)
179 |                                 )
180 |                             )
181 | }
182 | 
183 | 
184 | ## Output graph
185 | 
186 | if (plot.please == TRUE) {
187 | 
188 |     if (nrow(subset(mp.summary, frequency.sp > freq.cutoff)) > 0) {
189 | 
190 |         mp.plot <- ggplot(subset(mp.summary, frequency.sp > freq.cutoff & mean.stat > graph.stat.cutoff),
191 |                           aes(x = reorder(species, mean.stat))) +
192 |                 geom_bar(aes(y = mean.stat, fill = groupname), alpha = .75, stat = "identity") +
193 |                 geom_text(aes(y = mean.stat, label = mean.stat), hjust = 1.2) +
194 |                 facet_grid(group ~ ., scales = "free_x") + coord_flip() + guides(fill = "none") +
195 |                 xlab(xlab.input) + ylab(ylab.input) +
196 |                 scale_fill_manual(values = c(plot.colors))
197 | 
198 |         print(mp.plot)
199 | 
200 |     }
201 | 
202 |     else print("can't make graph; no indicator species for any clusters!")
203 | }
204 | 
205 | 
206 | 
207 | 
208 | # Output data tibble
209 | if (quiet == FALSE) {
210 | if (func.name %in% c("IndVal", "IndVal.g")) {
211 |     return(list(mp.summary, mp.AB.summary))
212 |     }
213 | else return(mp.summary)
214 | }
215 | 
216 | }
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | ## Testing
229 | # matrix.name = sqrt(sapro.transpose)
230 | # cluster.name = veg.group.soil[[3]]
231 | # func.name = "r.g"
232 | # repeats = 1
233 | # plot.please = FALSE
234 | # quiet = FALSE


--------------------------------------------------------------------------------
/triplet_fixer.R:
--------------------------------------------------------------------------------
 1 | # This script creates a sourceable function to turn any matrix into a triplet using the labdsv matrify()
 2 | # Written 4/21/2016
 3 | 
 4 | # requires the labdsv package
 5 | 
 6 | # The function inputs should be:
 7 |     # filename or data table
 8 |     # three columns that make up the triplet.
 9 | 
10 | # The function name is ez.matrify
11 | 
12 | ez.matrify <- function(filename, species.name, site.name, abundance) {
13 |     
14 |     library(labdsv)
15 |     
16 |     #check if filename is a .csv or already imported
17 |     if (is.character(filename))
18 |         data.for.analysis <- read.csv(file=filename, header = T) else
19 |         data.for.analysis <- filename
20 |     
21 |     proper.columns <- as.data.frame(data.for.analysis[ ,c(site.name, species.name, abundance)])
22 |     ez.matrify.output <- matrify(proper.columns)
23 |     return(ez.matrify.output)
24 |     
25 | }
26 | 
27 | 
28 | #test
29 | #head(ez.matrify(filename='../../DataRepository/VegetationData/ShrubsCSV.csv', species.name = 'SpeciesTaxonomic', site.name = 'SiteStandardGroup', abundance = 'RandomTest'))
30 | 
31 | #head(ez.matrify(filename=tree.data, species.name = 'tree.species', site.name = 'site', abundance = 'tree.number'))
32 | 
33 | 


--------------------------------------------------------------------------------