├── .gitignore ├── AICc_PERMANOVA.R ├── AICc_compare.R ├── AICc_table_generation.R ├── PlotTaxaKD.R ├── README.md ├── anon_data.R ├── chisquared.R ├── eDNA_helpers.R ├── extract_taxa_TITAN.R ├── group_PERMANOVA.R ├── group_PERMANOVA_wP.R.R ├── group_lmPerm.R ├── group_lmer.R ├── repeat_multipatt.R └── triplet_fixer.R /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | 502_project_code_Wisconsin (2).R 6 | MEE3_1_sm_Appendix_S1 7 | Microbiome-master 8 | R_Scripts.Rproj 9 | fromSE.R 10 | *.zip 11 | test.csv 12 | -------------------------------------------------------------------------------- /AICc_PERMANOVA.R: -------------------------------------------------------------------------------- 1 | # Function to calculate AICc for PERMANOVA. Requires input from adonis or adonis2 {vegan} 2 | 3 | 4 | AICc.PERMANOVA <- function(adonis.model) { 5 | 6 | # check to see if object is an adonis model... 7 | 8 | if (!(adonis.model$aov.tab[1,1] >= 1)) 9 | stop("object not output of adonis {vegan} ") 10 | 11 | # Ok, now extract appropriate terms from the adonis model 12 | # Calculating AICc using residual sum of squares (RSS) since I don't think that adonis returns something I can use as a liklihood function... 13 | 14 | RSS <- adonis.model$aov.tab[rownames(adonis.model$aov.tab) == "Residuals", "SumsOfSqs"] 15 | MSE <- adonis.model$aov.tab[rownames(adonis.model$aov.tab) == "Residuals", "MeanSqs"] 16 | 17 | k <- ncol(adonis.model$model.matrix)# + 1 # add one for error variance 18 | 19 | nn <- nrow(adonis.model$model.matrix) 20 | 21 | # AIC : 2*k + n*ln(RSS) 22 | # AICc: AIC + [2k(k+1)]/(n-k-1) 23 | 24 | # based on https://en.wikipedia.org/wiki/Akaike_information_criterion; 25 | # https://www.researchgate.net/post/What_is_the_AIC_formula; 26 | # http://avesbiodiv.mncn.csic.es/estadistica/ejemploaic.pdf 27 | 28 | # AIC.g is generalized version of AIC = 2k + n [Ln( 2(pi) RSS/n ) + 1] 29 | # AIC.pi = k + n [Ln( 2(pi) RSS/(n-k) ) +1], 30 | 31 | AIC <- 2*k + nn*log(RSS) 32 | AIC.g <- 2*k + nn * (1 + log( 2 * pi * RSS / nn)) 33 | AIC.MSE <- 2*k + nn * log(MSE) 34 | AIC.pi <- k + nn*(1 + log( 2*pi*RSS/(nn-k) ) ) 35 | AICc <- AIC + (2*k*(k + 1))/(nn - k - 1) 36 | AICc.MSE <- AIC.MSE + (2*k*(k + 1))/(nn - k - 1) 37 | AICc.pi <- AIC.pi + (2*k*(k + 1))/(nn - k - 1) 38 | 39 | output <- list("AIC" = AIC, "AIC.g" = AIC.g, "AICc" = AICc, 40 | "AIC.MSE" = AIC.MSE, "AICc.MSE" = AICc.MSE, 41 | "AIC.pi" = AIC.pi, "AICc.pi" = AICc.pi, "k" = k) 42 | 43 | return(output) 44 | 45 | } 46 | 47 | AICc.PERMANOVA2 <- function(adonis2.model) { 48 | 49 | # check to see if object is an adonis2 model... 50 | 51 | if (is.na(adonis2.model$SumOfSqs[1])) 52 | stop("object not output of adonis2 {vegan} ") 53 | 54 | # Ok, now extract appropriate terms from the adonis model Calculating AICc 55 | # using residual sum of squares (RSS or SSE) since I don't think that adonis 56 | # returns something I can use as a likelihood function... maximum likelihood 57 | # and MSE estimates are the same when distribution is gaussian See e.g. 58 | # https://www.jessicayung.com/mse-as-maximum-likelihood/; 59 | # https://towardsdatascience.com/probability-concepts-explained-maximum-likelihood-estimation-c7b4342fdbb1 60 | # So using RSS or MSE estimates is fine as long as the residuals are 61 | # Gaussian https://robjhyndman.com/hyndsight/aic/ If models have different 62 | # conditional likelihoods then AIC is not valid. However, comparing models 63 | # with different error distributions is ok (above link). 64 | 65 | 66 | RSS <- adonis2.model$SumOfSqs[ length(adonis2.model$SumOfSqs) - 1 ] 67 | MSE <- RSS / adonis2.model$Df[ length(adonis2.model$Df) - 1 ] 68 | 69 | nn <- adonis2.model$Df[ length(adonis2.model$Df) ] + 1 70 | 71 | k <- nn - adonis2.model$Df[ length(adonis2.model$Df) - 1 ] 72 | 73 | 74 | # AIC : 2*k + n*ln(RSS/n) 75 | # AICc: AIC + [2k(k+1)]/(n-k-1) 76 | 77 | # based on https://en.wikipedia.org/wiki/Akaike_information_criterion; 78 | # https://www.statisticshowto.datasciencecentral.com/akaikes-information-criterion/ ; 79 | # https://www.researchgate.net/post/What_is_the_AIC_formula; 80 | # http://avesbiodiv.mncn.csic.es/estadistica/ejemploaic.pdf; 81 | # https://medium.com/better-programming/data-science-modeling-how-to-use-linear-regression-with-python-fdf6ca5481be 82 | 83 | # AIC.g is generalized version of AIC = 2k + n [Ln( 2(pi) RSS/n ) + 1] 84 | # AIC.pi = k + n [Ln( 2(pi) RSS/(n-k) ) +1], 85 | 86 | AIC <- 2*k + nn*log(RSS/nn) 87 | AIC.g <- 2*k + nn * (1 + log( 2 * pi * RSS / nn)) 88 | AIC.MSE <- 2*k + nn * log(MSE) 89 | AIC.pi <- k + nn*(1 + log( 2*pi*RSS/(nn-k) ) ) 90 | AICc <- AIC + (2*k*(k + 1))/(nn - k - 1) 91 | AICc.MSE <- AIC.MSE + (2*k*(k + 1))/(nn - k - 1) 92 | AICc.pi <- AIC.pi + (2*k*(k + 1))/(nn - k - 1) 93 | 94 | output <- list("AIC" = AIC, "AICc" = AICc, "AIC.g" = AIC.g, 95 | "AIC.MSE" = AIC.MSE, "AICc.MSE" = AICc.MSE, 96 | "AIC.pi" = AIC.pi, "AICc.pi" = AICc.pi, "k" = k, "N" = nn) 97 | 98 | return(output) 99 | 100 | } 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /AICc_compare.R: -------------------------------------------------------------------------------- 1 | 2 | # Test script to automate AICc calculation after deciding which of your variables are significant. Like, you have 4 candidate variables and you want to compare 1, 2, 3, and 4 variable models based on AICc. 3 | 4 | list.varcomb.AICc <- tibble(varibles = rep(0, 4*3*2), 5 | AICc.values = rep(0)) 6 | 7 | for (i in 1:length(sig.incidence.vars.PERMANOVA)) { 8 | 9 | for (j in 1:length(as.vector(combn(sig.incidence.vars.PERMANOVA, m = i)))) { 10 | 11 | list.varcomb.AICc[i,variables] <- as.vector(combn(sig.incidence.vars.PERMANOVA, m = i))[2] 12 | 13 | } 14 | 15 | 16 | } 17 | 18 | for (i in 1:4) { 19 | print(combn(1:4, m = i)) 20 | } 21 | -------------------------------------------------------------------------------- /AICc_table_generation.R: -------------------------------------------------------------------------------- 1 | ## Create AICc tables for PERMANOVA output. 2 | 3 | require(vegan) 4 | require(tibble) 5 | require(stringr) 6 | 7 | 8 | ## -- For two variables: ------------------------------------ 9 | 10 | AICc.table.2var <- function(sig.vars, control.var.char = NULL, c.var = 0, matrix.char, perm, type = "AICc", method = "bray") { 11 | 12 | varcomb.2.AICc <- tibble(variables = rep("var.name", choose(length(sig.vars),2)), 13 | AICc.values = rep(0), 14 | `Pseudo-_F_` = rep(0), 15 | `p-value` = rep(0), 16 | `Var Explnd` = rep(0), 17 | Model = rep("model")) 18 | 19 | if (is.character(control.var.char) == TRUE & c.var == 0) {c.var = length(control.var.char)} 20 | 21 | combo.list <- combn(x = sig.vars, m = 2, simplify = FALSE) 22 | 23 | if (!is.null(control.var.char)) { 24 | control.var.char <- paste0(control.var.char, " +") 25 | } 26 | 27 | for (r in 1:choose(length(sig.vars),2)) { 28 | # label the row with variable names 29 | varcomb.2.AICc[r,1] <- paste(control.var.char, paste(combo.list[[r]], collapse = " + ")) 30 | 31 | # create a temporary PERMANOVA to take info from 32 | 33 | temp <- adonis2( 34 | as.formula(paste0( 35 | matrix.char, 36 | " ~ ", 37 | control.var.char, 38 | paste0(combo.list[[r]], collapse = "+") 39 | )), 40 | permutations = perm, 41 | method = method, 42 | by = NULL 43 | ) 44 | 45 | 46 | varcomb.2.AICc[r, 2] <- AICc.PERMANOVA2(temp)[type] 47 | varcomb.2.AICc[r, 3] <- temp$F[1] 48 | varcomb.2.AICc[r, 4] <- temp$`Pr(>F)`[1] 49 | varcomb.2.AICc$`Var Explnd`[r] <- temp$SumOfSqs[1] / temp$SumOfSqs[3] 50 | 51 | 52 | r <- r + 1 53 | 54 | } 55 | 56 | varcomb.2.AICc$`Delta AICc` <- varcomb.2.AICc$AICc.values - 57 | min(varcomb.2.AICc$AICc.values) 58 | varcomb.2.AICc$`Relative Likelihood` <- 59 | exp(-.5 * (varcomb.2.AICc$AICc.values - min(varcomb.2.AICc$AICc.values))) 60 | 61 | # Relative likelihood compared with best model; see 62 | # https://en.wikipedia.org/wiki/Likelihood_function 63 | # https://www.rdocumentation.org/packages/qpcR/versions/1.4-1/topics/akaike.weights 64 | 65 | 66 | return(varcomb.2.AICc) 67 | 68 | } 69 | 70 | 71 | 72 | ## -- For N variables: --------------------------------------------------- 73 | 74 | 75 | AICc.table.Nvar <- function(sig.vars, control.var.char = NULL, c.var = 0, matrix.char, perm, n.var = 1, composite = FALSE, type = "AICc", method = "bray") { 76 | 77 | if (n.var > length(sig.vars)) { stop("n.var greater than number of significant variables")} 78 | 79 | if (!is.null(control.var.char)) { 80 | control.var.char <- paste0(control.var.char, " + ") 81 | } 82 | if (is.character(control.var.char) == TRUE & c.var == 0) {c.var = 1} 83 | 84 | 85 | varcomb.N.AICc <- tibble(variables = rep("var.name", choose(length(sig.vars), n.var)), 86 | AICc.values = rep(0), 87 | `Pseudo-_F_` = rep(0), 88 | `p-value` = rep(0), 89 | `Var Explnd` = rep(0), 90 | Model = rep("model")) 91 | 92 | combo.list <- combn(x = sig.vars, m = n.var, simplify = FALSE) 93 | 94 | 95 | for (r in 1:choose(length(sig.vars), n.var)) { 96 | # label the row with variable names 97 | varcomb.N.AICc[r,1] <- paste(control.var.char, paste(combo.list[[r]], collapse = " and ")) 98 | 99 | # create a temporary PERMANOVA to take info from 100 | temp <- adonis2( 101 | as.formula(paste0( 102 | matrix.char, 103 | " ~ ", 104 | control.var.char, 105 | paste0(combo.list[[r]], collapse = "+") 106 | )), 107 | permutations = perm, 108 | method = method, 109 | by = NULL 110 | ) 111 | 112 | 113 | varcomb.N.AICc[r,2] <- AICc.PERMANOVA2(temp)[type] 114 | 115 | varcomb.N.AICc[r,3] <- temp$`F`[1] 116 | varcomb.N.AICc[r,4] <- temp$`Pr(>F)`[1] 117 | varcomb.N.AICc$`Var Explnd`[r] <- temp$SumOfSqs[1] / temp$SumOfSqs[3] 118 | 119 | 120 | r <- r + 1 121 | 122 | } 123 | 124 | 125 | # Calculate diagnostic variables: 126 | 127 | if (composite == FALSE) { 128 | varcomb.N.AICc$`Delta AICc` <- varcomb.N.AICc$AICc.values - 129 | min(varcomb.N.AICc$AICc.values) 130 | varcomb.2.AICc$`Relative Likelihood` <- 131 | exp(-.5 * (varcomb.2.AICc$AICc.values - min(varcomb.2.AICc$AICc.values))) 132 | } 133 | 134 | 135 | return(varcomb.N.AICc) 136 | 137 | } 138 | 139 | 140 | ## -- Wrapper function: --------------------------------------------------- 141 | 142 | # comb.incl is which # of variables for the combinations you want to include. 143 | # e.g. all combinations of 2 variables, 3 variables, etc. 144 | 145 | AICc.table.all <- function(sig.vars, control.var.char = NULL, matrix.char, perm = 999, comb.incl = 1, extra.var = FALSE, extra.var.char = NULL, type = "AICc", method = "bray") { 146 | 147 | varcomb.all <- data.frame() 148 | 149 | # If there is a control variable, create a one variable model with only 150 | # control variable, for comparison with rest of proposed models 151 | # 152 | if (!is.null(control.var.char)) { 153 | 154 | temp <- AICc.table.Nvar(sig.vars = control.var.char, control.var.char = NULL, 155 | matrix.char = matrix.char, n.var = 1, composite = TRUE, 156 | type = type, method = method, perm = perm) 157 | 158 | varcomb.all <- rbind(varcomb.all, temp) 159 | 160 | 161 | } 162 | 163 | # Iterate through the comb.incl. e.g. all 1 var models, then 2 var models, then 3 var models... 164 | for (i in comb.incl) { 165 | 166 | temp <- AICc.table.Nvar(sig.vars = sig.vars, control.var.char = control.var.char, 167 | matrix.char = matrix.char, n.var = i, composite = TRUE, 168 | type = type, method = method, perm = perm) 169 | 170 | varcomb.all <- rbind(varcomb.all, temp) 171 | 172 | 173 | } 174 | 175 | 176 | 177 | 178 | # If you want to include a non-significant variable for comparison... 179 | if (extra.var == TRUE) { 180 | 181 | for (i in 1:length(extra.var.char)) { 182 | temp <- AICc.table.Nvar(sig.vars = extra.var.char[i], control.var.char = control.var.char, 183 | matrix.char = matrix.char, n.var = 1, composite = TRUE, 184 | type = type, method = method, perm = perm) 185 | 186 | varcomb.all <- rbind(varcomb.all, temp) 187 | 188 | } 189 | } 190 | 191 | 192 | varcomb.all$`Delta AICc` <- varcomb.all$AICc.values - 193 | min(varcomb.all$AICc.values) 194 | varcomb.all$`Relative Likelihood` <- exp((min(varcomb.all$AICc.values) - 195 | varcomb.all$AICc.values)/2) 196 | 197 | # exp( -0.5 * ∆AIC score for that model) 198 | 199 | return(varcomb.all) 200 | 201 | } 202 | 203 | 204 | ## -- Sum of AIC Weights by Var: --------------------------------------------------- 205 | 206 | # This requires an AIC/AICc table output from one of the above functions. The 207 | # rationalle behind this approach can be found in Arnold, T. W. (2010). 208 | # Uninformative parameters and model selection using Akaike's Information 209 | # Criterion. The Journal of Wildlife Management, 74(6), 1175-1178. 210 | 211 | # Calculation method from http://brianomeara.info/tutorials/aic 212 | 213 | 214 | AICc.weights.byvar <- function(sig.vars, AIC.table.output){ 215 | 216 | results.table <- tibble("Significant Variable" = sig.vars, 217 | "Summed AIC Weight" = rep(0)) 218 | 219 | for (i in 1:length(sig.vars)){ 220 | 221 | summed.weight = 0 222 | 223 | for (j in 1:nrow(AIC.table.output)){ 224 | 225 | if (grepl(AIC.table.output$variables[j], pattern = sig.vars[i], fixed = TRUE)) { 226 | 227 | summed.weight <- summed.weight + AIC.table.output$`Relative Likelihood`[j] 228 | 229 | } else summed.weight <- summed.weight 230 | 231 | 232 | } 233 | #sig vars loop 234 | 235 | results.table[i, 2] <- summed.weight 236 | 237 | } 238 | # function loop 239 | 240 | return(results.table) 241 | } 242 | 243 | 244 | 245 | # create a table with sig vars fed into the table + AIC weight sum column 246 | # 247 | # for each significant variable, 248 | # for each row 249 | # check each row to see if there is a pattern match 250 | # add the relative likelihood to sum if so 251 | # 252 | # report 253 | 254 | 255 | 256 | 257 | ## Testing 258 | # sig.vars <- sigvars.symbionfNGS[-1] 259 | # matrix.char <- "sqrt(symbio.transpose.nf)" 260 | # control.var.char <- sigvars.symbionfNGS[1] 261 | # 262 | # 263 | # AICc.table.2var(sig.vars = sigvars.symbionfNGS[-1], control.var.char = sigvars.symbionfNGS[1], matrix.char = "sqrt(symbio.transpose.nf)")[3] 264 | # 265 | # AICc.table.Nvar(sig.vars = sigvars.symbionfNGS[-1], control.var.char = sigvars.symbionfNGS[1], matrix.char = "sqrt(symbio.transpose.nf)", n.var = 1)[3] 266 | # 267 | # test <- AICc.table.all(sig.vars = sigvars.symbionfNGS[-1], control.var.char = sigvars.symbionfNGS[1], 268 | # matrix.char = "sqrt(symbio.transpose.nf)", comb.incl = 1) 269 | # 270 | 271 | -------------------------------------------------------------------------------- /PlotTaxaKD.R: -------------------------------------------------------------------------------- 1 | plotTaxaKD <- function (titan.out, 2 | z1 = T, 3 | z2 = T, 4 | interval = T, 5 | prob95 = F, 6 | z.med = F, 7 | xlabel = "Environmental Gradient", 8 | log = "", 9 | at = NULL, 10 | xmin = min(titan.out$sppmax[, 8]), 11 | xmax = max(titan.out$sppmax[, 12 | 12]) * 1.05, 13 | tck = 0.025, 14 | bty = NULL, 15 | ntick = 6, 16 | prtty = T, 17 | dig = 1, 18 | leg.x = 0.8, 19 | leg.y = 10, 20 | cex.taxa = 0.75, 21 | cex = 1, 22 | cex.axis = 1, 23 | cex.leg = .9, 24 | cex.lab = 1.25, 25 | legend = T, 26 | col1 = "black", 27 | fil1 = "black", 28 | col2 = "black", 29 | fil2 = "white", 30 | write = F, 31 | all = F, 32 | lab.side = 3, # 1 for bottom 3 for top 33 | lab.line = 1, # how far from the top/bottom of plot 34 | margin = c(3,8,3,8), # b,l,t,r 35 | sub1.labs.function = "identity", # name of function to revise left side row names 36 | sub2.labs.function = "identity", # name of function to revise right side row names 37 | ...) 38 | { 39 | imax = titan.out$arguments[[5]] 40 | boot = titan.out$arguments[[3]] > 0.5 41 | if (all) { 42 | boot = F 43 | 44 | } 45 | if (boot) { 46 | if (z1) { 47 | sppsub1 <- subset(titan.out$sppmax, titan.out$sppmax[, 48 | 16] == 1) 49 | sub1.labs <- rownames(sppsub1) 50 | sub1.labs <- lapply(X = sub1.labs, FUN = sub1.labs.function) 51 | } 52 | if (z2) { 53 | sppsub2 <- subset(titan.out$sppmax, titan.out$sppmax[, 54 | 16] == 2) 55 | sub2.labs <- rownames(sppsub2) 56 | sub2.labs <- lapply(X = sub2.labs, FUN = sub2.labs.function) 57 | } 58 | } 59 | else { 60 | if (z1) { 61 | sppsub1 <- subset(titan.out$sppmax, 62 | titan.out$sppmax[, 63 | 4] == 1 & 64 | titan.out$sppmax[, 6] <= 0.05) 65 | sub1.labs <- rownames(sppsub1) 66 | sub1.labs <- lapply(X = sub1.labs, FUN = sub1.labs.function) 67 | 68 | 69 | } 70 | if (z2) { 71 | sppsub2 <- subset(titan.out$sppmax, 72 | titan.out$sppmax[, 73 | 4] == 2 & 74 | titan.out$sppmax[, 6] <= 0.05) 75 | sub2.labs <- rownames(sppsub2) 76 | sub2.labs <- lapply(X = sub2.labs, FUN = sub2.labs.function) 77 | 78 | 79 | } 80 | } 81 | if (z1) { 82 | if (nrow(sppsub1) < 1) { 83 | stop( 84 | "z1 is empty, set z1=FALSE, change significance criteria, or set boot=FALSE if bootstrapping was not used to generate the titan object" 85 | ) 86 | } 87 | } 88 | if (z2) { 89 | if (nrow(sppsub2) < 1) { 90 | stop( 91 | "z2 is empty, set z2=FALSE, change significance criteria, or set boot=FALSE if bootstrapping was not used to generate the titan object" 92 | ) 93 | } 94 | } 95 | par(mar = margin, oma = c(0, 3, 0, 3)) 96 | if (z1 & z2) { 97 | if (nrow(sppsub1) >= nrow(sppsub2)) { 98 | sppsub.gt <- sppsub1 99 | } 100 | else { 101 | sppsub.gt <- sppsub2 102 | } 103 | } 104 | else { 105 | if (z1) { 106 | sppsub.gt <- sppsub1 107 | } 108 | else { 109 | sppsub.gt <- sppsub2 110 | } 111 | } 112 | plot( 113 | sppsub.gt[, 1], 114 | ((max( 115 | rank((sppsub.gt[, 1]), ties.method = "first") 116 | ) + 117 | 1) - rank((sppsub.gt[, 1]), ties.method = "first")), 118 | xlim = c(xmin, xmax), 119 | ylim = c(0.5, max( 120 | rank(sppsub.gt[, 121 | 1], ties.method = "first") + 1 122 | )), 123 | cex = 0, 124 | tck = tck, 125 | log = log, 126 | axes = FALSE, 127 | ylab = "", 128 | xlab = "" 129 | ) 130 | if (boot) { 131 | if (prob95) { 132 | if (z1) { 133 | yvalues1 = ((max( 134 | rank((sppsub1[, 12]), ties.method = "first") 135 | ) + 136 | 1) - rank((sppsub1[, 12]), ties.method = "first")) 137 | } 138 | if (z2) { 139 | yvalues2 = rank((sppsub2[, 8]), ties.method = "first") + 140 | 0.5 141 | } 142 | } 143 | else { 144 | if (z.med) { 145 | if (z1) { 146 | yvalues1 = ((max( 147 | rank((sppsub1[, 10]), ties.method = "first") 148 | ) + 149 | 1) - rank((sppsub1[, 10]), ties.method = "first")) 150 | } 151 | if (z2) { 152 | yvalues2 = rank((sppsub2[, 10]), ties.method = "first") + 153 | 0.5 154 | } 155 | } 156 | else { 157 | if (imax) { 158 | if (z1) { 159 | yvalues1 = ((max( 160 | rank((sppsub1[, 1]), ties.method = "first") 161 | ) + 162 | 1) - rank((sppsub1[, 1]), ties.method = "first")) 163 | } 164 | if (z2) { 165 | yvalues2 = rank((sppsub2[, 1]), ties.method = "first") + 166 | 0.5 167 | } 168 | } 169 | else { 170 | if (z1) { 171 | yvalues1 = ((max( 172 | rank((sppsub1[, 2]), ties.method = "first") 173 | ) + 174 | 1) - rank((sppsub1[, 2]), ties.method = "first")) 175 | } 176 | if (z2) { 177 | yvalues2 = rank((sppsub2[, 2]), ties.method = "first") + 178 | 0.5 179 | } 180 | } 181 | } 182 | } 183 | } 184 | else { 185 | if (imax) { 186 | if (z1) { 187 | yvalues1 = ((max( 188 | rank((sppsub1[, 1]), ties.method = "first") 189 | ) + 190 | 1) - rank((sppsub1[, 1]), ties.method = "first")) 191 | } 192 | if (z2) { 193 | yvalues2 = rank((sppsub2[, 1]), ties.method = "first") + 194 | 0.5 195 | } 196 | } 197 | else { 198 | if (z1) { 199 | yvalues1 = ((max( 200 | rank((sppsub1[, 2]), ties.method = "first") 201 | ) + 202 | 1) - rank((sppsub1[, 2]), ties.method = "first")) 203 | } 204 | if (z2) { 205 | yvalues2 = rank((sppsub2[, 2]), ties.method = "first") + 206 | 0.5 207 | } 208 | } 209 | } 210 | if (boot & interval) { 211 | if (z1) { 212 | segments(sppsub1[, 8], 213 | yvalues1, 214 | sppsub1[, 12], 215 | yvalues1, 216 | col = col1, 217 | lwd = 2) 218 | } 219 | if (z2) { 220 | segments( 221 | sppsub2[, 8], 222 | yvalues2, 223 | sppsub2[, 12], 224 | yvalues2, 225 | col = col2, 226 | lwd = 2, 227 | lty = 3 228 | ) 229 | } 230 | } 231 | if (z1) { 232 | grpcol = rep(NA, nrow(sppsub1)) 233 | } 234 | if (z2) { 235 | grpcol2 = rep(NA, nrow(sppsub2)) 236 | } 237 | if (z1) { 238 | for (i in 1:nrow(sppsub1)) { 239 | if (sppsub1[i, 4] > 1.5) { 240 | grpcol[i] = col2 241 | } 242 | else { 243 | grpcol[i] = fil1 244 | } 245 | } 246 | } 247 | if (z2) { 248 | for (i in 1:nrow(sppsub2)) { 249 | if (sppsub2[i, 4] > 1.5) { 250 | grpcol2[i] = fil2 251 | } 252 | else { 253 | grpcol2[i] = fil1 254 | } 255 | } 256 | } 257 | if (boot) { 258 | if (prob95) { 259 | if (z.med) { 260 | if (z1) { 261 | symbols( 262 | sppsub1[, 12], 263 | yvalues1, 264 | circles = sppsub1[, 265 | 15], 266 | inches = 0.1, 267 | add = TRUE, 268 | xlim = c(0, 269 | 5), 270 | fg = col1, 271 | bg = grpcol, 272 | lwd = 2 273 | ) 274 | } 275 | if (z2) { 276 | symbols( 277 | sppsub2[, 8], 278 | yvalues2, 279 | circles = sppsub2[, 280 | 15], 281 | inches = 0.1, 282 | add = TRUE, 283 | xlim = c(0, 284 | 5), 285 | fg = col2, 286 | bg = grpcol2, 287 | lwd = 2 288 | ) 289 | } 290 | } 291 | else { 292 | if (z1) { 293 | symbols( 294 | sppsub1[, 12], 295 | yvalues1, 296 | circles = sppsub1[, 297 | 7], 298 | inches = 0.1, 299 | add = TRUE, 300 | xlim = c(0, 301 | 5), 302 | fg = col1, 303 | bg = grpcol, 304 | lwd = 2 305 | ) 306 | } 307 | if (z2) { 308 | symbols( 309 | sppsub2[, 8], 310 | yvalues2, 311 | circles = sppsub2[, 312 | 7], 313 | inches = 0.1, 314 | add = TRUE, 315 | xlim = c(0, 316 | 5), 317 | fg = col2, 318 | bg = grpcol2, 319 | lwd = 2 320 | ) 321 | } 322 | } 323 | } 324 | else { 325 | if (z.med) { 326 | if (z1) { 327 | symbols( 328 | sppsub1[, 10], 329 | yvalues1, 330 | circles = sppsub1[, 331 | 15], 332 | inches = 0.1, 333 | add = TRUE, 334 | xlim = c(0, 335 | 5), 336 | fg = col1, 337 | bg = grpcol, 338 | lwd = 2 339 | ) 340 | } 341 | if (z2) { 342 | symbols( 343 | sppsub2[, 10], 344 | yvalues2, 345 | circles = sppsub2[, 346 | 15], 347 | inches = 0.1, 348 | add = TRUE, 349 | xlim = c(0, 350 | 5), 351 | fg = col2, 352 | bg = grpcol2, 353 | lwd = 2 354 | ) 355 | } 356 | } 357 | else { 358 | if (imax) { 359 | if (z1) { 360 | symbols( 361 | sppsub1[, 1], 362 | yvalues1, 363 | circles = sppsub1[, 364 | 7], 365 | inches = 0.1, 366 | add = TRUE, 367 | xlim = c(0, 368 | 5), 369 | fg = col1, 370 | bg = grpcol, 371 | lwd = 2 372 | ) 373 | } 374 | if (z2) { 375 | symbols( 376 | sppsub2[, 1], 377 | yvalues2, 378 | circles = sppsub2[, 379 | 7], 380 | inches = 0.1, 381 | add = TRUE, 382 | xlim = c(0, 383 | 5), 384 | fg = col2, 385 | bg = grpcol2, 386 | lwd = 2 387 | ) 388 | } 389 | } 390 | else { 391 | if (z1) { 392 | symbols( 393 | sppsub1[, 2], 394 | yvalues1, 395 | circles = sppsub1[, 396 | 7], 397 | inches = 0.1, 398 | add = TRUE, 399 | xlim = c(0, 400 | 5), 401 | fg = col1, 402 | bg = grpcol, 403 | lwd = 2 404 | ) 405 | } 406 | if (z2) { 407 | symbols( 408 | sppsub2[, 2], 409 | yvalues2, 410 | circles = sppsub2[, 411 | 7], 412 | inches = 0.1, 413 | add = TRUE, 414 | xlim = c(0, 415 | 5), 416 | fg = col2, 417 | bg = grpcol2, 418 | lwd = 2 419 | ) 420 | } 421 | } 422 | } 423 | } 424 | } 425 | else { 426 | if (imax) { 427 | if (z1) { 428 | symbols( 429 | sppsub1[, 1], 430 | yvalues1, 431 | circles = sppsub1[, 432 | 7], 433 | inches = 0.1, 434 | add = TRUE, 435 | xlim = c(0, 436 | 5), 437 | fg = col1, 438 | bg = grpcol, 439 | lwd = 2 440 | ) 441 | } 442 | if (z2) { 443 | symbols( 444 | sppsub2[, 1], 445 | yvalues2, 446 | circles = sppsub2[, 447 | 7], 448 | inches = 0.1, 449 | add = TRUE, 450 | xlim = c(0, 451 | 5), 452 | fg = col2, 453 | bg = grpcol2, 454 | lwd = 2 455 | ) 456 | } 457 | } 458 | else { 459 | if (z1) { 460 | symbols( 461 | sppsub1[, 2], 462 | yvalues1, 463 | circles = sppsub1[, 464 | 7], 465 | inches = 0.1, 466 | add = TRUE, 467 | xlim = c(0, 468 | 5), 469 | fg = col1, 470 | bg = grpcol, 471 | lwd = 2 472 | ) 473 | } 474 | if (z2) { 475 | symbols( 476 | sppsub2[, 2], 477 | yvalues2, 478 | circles = sppsub2[ , 7], 479 | inches = 0.1, 480 | add = TRUE, 481 | xlim = c(0, 482 | 5), 483 | fg = col2, 484 | bg = grpcol2, 485 | lwd = 2 486 | ) 487 | } 488 | } 489 | } 490 | if (z1) { 491 | axis( 492 | 2, 493 | at = yvalues1, 494 | labels = sub1.labs, 495 | las = 1, 496 | mgp = c(1, 0.5, 0), 497 | cex.axis = cex.taxa, 498 | tck = tck 499 | ) 500 | } 501 | if (z2) { 502 | axis( 503 | 4, 504 | at = yvalues2, 505 | labels = sub2.labs, 506 | mgp = c(1, 0.5, 0), 507 | las = 1, 508 | cex.axis = cex.taxa, 509 | tck = tck 510 | ) 511 | } 512 | if (log == "x") { 513 | axis( 514 | 1, 515 | at = at, 516 | mgp = c(2, 0.5, 0), 517 | cex.axis = cex.axis, 518 | tck = tck 519 | ) 520 | } 521 | else { 522 | if (prtty) { 523 | axis( 524 | 1, 525 | pretty(xmin:xmax, ntick), 526 | mgp = c(2, 0.5, 0), 527 | cex.axis = cex.axis, 528 | tck = tck 529 | ) 530 | } 531 | else { 532 | axis( 533 | 1, 534 | at = seq( 535 | from = round(xmin, digits = dig), 536 | to = round(xmax, digits = dig), 537 | by = round((xmax - 538 | xmin) / 539 | 4, digits = dig) 540 | ), 541 | mgp = c(2, 0.5, 0), 542 | cex.axis = cex.axis, 543 | tck = tck 544 | ) 545 | } 546 | } 547 | mtext(xlabel, 548 | side = lab.side, 549 | line = lab.line, 550 | cex = cex) 551 | if (z1 & z2) { 552 | leg = c("z-", "z+") 553 | fill.leg = c(fil1, fil2) 554 | legend( 555 | leg.x, 556 | leg.y, 557 | leg, 558 | fill = fill.leg, 559 | ncol = 1, 560 | bty = "n", 561 | plot = TRUE, 562 | cex = cex.leg 563 | ) 564 | } 565 | box(which = "plot", bty = bty) 566 | if (z1 & z2 & write) { 567 | sppsub <- list(sppsub1, sppsub2) 568 | names(sppsub) <- c("sppsub1", "sppsub2") 569 | return(sppsub) 570 | } 571 | if (z1 & write) { 572 | return(sppsub1) 573 | } 574 | if (z2 & write) { 575 | return(sppsub2) 576 | } 577 | } 578 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # R_Scripts 2 | Helper R scripts for multiple PERMANOVA tests, AICc script for PERMANOVA, etc. 3 | Also some scripts for anonymizing data. 4 | -------------------------------------------------------------------------------- /anon_data.R: -------------------------------------------------------------------------------- 1 | # 2 | # Make data anonymous! This script takes an input data table (csv or in memory) 3 | # and a conversion table (lookup table) and then produces an output table (saved 4 | # to disk or memory). 5 | # 6 | # Based in part on information found at: https://stackoverflow.com/questions/35636315/replace-values-in-a-dataframe-based-on-lookup-table 7 | # 8 | # ... passes arguments to write.csv. 9 | # 10 | 11 | 12 | anon.data <- function(input.table, lookup.table, site.column, match.column, anon.column, output.path = NA, write.to.disk = FALSE, ...){ 13 | 14 | # -- Input table ------- 15 | 16 | if(is.character(input.table) == TRUE) { 17 | 18 | input.table <- read.csv(input.table, stringsAsFactors = FALSE) 19 | 20 | } 21 | 22 | exists("input.table") 23 | 24 | # -- Lookup table ------------ 25 | 26 | if(is.character(lookup.table) == TRUE) { 27 | 28 | lookup.table <- read.csv(lookup.table, stringsAsFactors = FALSE) 29 | 30 | } 31 | 32 | exists("lookup.table") 33 | 34 | # -- Anon the table ----------- 35 | 36 | replace.me <- input.table[ , site.column] 37 | lookup.match <- lookup.table[ , match.column] 38 | replace.with <- lookup.table[ , anon.column] 39 | 40 | anon.table <- input.table 41 | anon.table[ , which(colnames(anon.table) == site.column)] <- replace.with[match( 42 | replace.me, lookup.match)] 43 | 44 | 45 | # -- Export the output table --------- 46 | 47 | if(is.character(output.path) == TRUE & write.to.disk == TRUE) { 48 | 49 | write.csv(x = anon.table, file = output.path, ...) 50 | return(anon.table) 51 | 52 | } 53 | 54 | if(is.character(output.path) == TRUE & write.to.disk == FALSE) { 55 | 56 | warning("warning: table is chr and write to disk is false, creating a default output table in Global Environment") 57 | 58 | return (anon.table) 59 | 60 | } 61 | 62 | if(is.character(output.path) == FALSE & write.to.disk == FALSE) { 63 | 64 | return (anon.table) 65 | 66 | } 67 | 68 | 69 | if(is.character(output.path) == FALSE & write.to.disk == TRUE) { 70 | 71 | warning("warning: no output path specified, csv not saved to disk.") 72 | return (anon.table) 73 | 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /chisquared.R: -------------------------------------------------------------------------------- 1 | # likertTable for all three functions should be formatted with question columns 2 | # and demographic (or grouping) columns, each row should be a different 3 | # respondents answer. Cells should be e.g. likert responses (More, etc.) or 4 | # demographic data (e.g. age groups) 5 | 6 | ## Results table for the addKruskal and addChiSquared functions should be formulated like this: 7 | 8 | # 9 | # activityGardensResults <- 10 | # tibble( 11 | # demographicVariable = c( 12 | # "Gender", 13 | # "Marital Status", 14 | # "Age", 15 | # "Housing", 16 | # "Town Type", 17 | # "Governorate", 18 | # "Employment", 19 | # "Work Location", 20 | # "Education", 21 | # "Before Income", 22 | # "After Income" 23 | # ), 24 | # afterGardensRelaxing_CHI = rep(0.0, length(demographicVariable)), 25 | # afterGardensRelaxing_PVAL = rep(0.0, length(demographicVariable)), 26 | # afterGardensRelaxing_CORPVAL = rep(0.0, length(demographicVariable)), 27 | # afterGardensExercise_CHI = rep(0.0, length(demographicVariable)), 28 | # afterGardensExercise_PVAL = rep(0.0, length(demographicVariable)), 29 | # afterGardensExercise_CORPVAL = rep(0.0, length(demographicVariable)), 30 | # afterGardensBirdPhotography_CHI = rep(0.0, length(demographicVariable)), 31 | # afterGardensBirdPhotography_PVAL = rep(0.0, length(demographicVariable)), 32 | # afterGardensBirdPhotography_CORPVAL = rep(0.0, length(demographicVariable)) 33 | # 34 | # ) 35 | 36 | 37 | 38 | addKruskal <- 39 | function(likertTable, 40 | resultsTable, 41 | questionColumns, 42 | demographicColumns) { 43 | i = min(demographicColumns) # row 44 | j = min(questionColumns) # column 45 | 46 | a = 1 # the first row in the resultsTable that should have results assigned. 47 | c = 4 # first column that should have corrected pvalues 48 | 49 | # Iterate through each demographic variable 50 | for (i in min(demographicColumns):max(demographicColumns)) { 51 | b = 2 # the first column in the resultsTable that should have results assigned to it 52 | 53 | for (j in min(questionColumns):max(questionColumns)) { 54 | # Create a temp table and filter out blanks. 55 | tempTable <- likertTable[, c(j, i)] 56 | tempTable <- tempTable[tempTable[2] != "",] 57 | 58 | # perform the kruskal wallis test 59 | tempKruskal <- kruskal.test(tempTable[, 1] ~ tempTable[, 2]) 60 | 61 | # add statistic value and pvalue to the table. 62 | resultsTable[a, b] <- tempKruskal$statistic 63 | resultsTable[a, b + 1] <- tempKruskal$p.value 64 | 65 | # add 2 so next results go in the proper place 66 | b = b + 3 67 | } # end question loop 68 | 69 | a = a + 1 # go to next row for next set of demographic info 70 | 71 | } # end demographic loop 72 | 73 | # add corrected pvalue 74 | 75 | z = 1 76 | for (z in 1:length(questionColumns)) { 77 | tempCorrected <- 78 | p.adjust(as.vector(unlist(resultsTable[, c - 1])), method = "holm") 79 | 80 | resultsTable[, c] <- tempCorrected 81 | 82 | c = c + 3 83 | 84 | } # end addition of corrected pvalues 85 | 86 | return(resultsTable) 87 | 88 | } # end function 89 | 90 | 91 | 92 | addChiSquared <- 93 | function(likertTable, 94 | resultsTable, 95 | questionColumns, 96 | demographicColumns, 97 | minVal = 25) { 98 | i = min(demographicColumns) # row 99 | j = min(questionColumns) # column 100 | 101 | a = 1 # the first row in the resultsTable that should have results assigned. 102 | c = 4 # first column that should have corrected pvalues 103 | 104 | # Iterate through each demographic variable 105 | for (i in min(demographicColumns):max(demographicColumns)) { 106 | b = 2 # the first column in the resultsTable that should have results assigned to it 107 | 108 | for (j in min(questionColumns):max(questionColumns)) { 109 | # Create a temp table and filter out blanks. 110 | tempTable <- likertTable[, c(j, i)] 111 | tempTable <- 112 | tempTable[tempTable[2] != "" & !is.na(tempTable[1]) ,] 113 | 114 | # Check to see if any group has less than min responses. 115 | testSize <- 116 | tempTable %>% count(tempTable[1:2]) %>% group_by(across(.cols = 2)) %>% summarise(n = sum(n)) 117 | 118 | tooSmall <- testSize[testSize[2] < minVal, 1] 119 | print(unlist(tooSmall)) 120 | # print(colnames(tempTable)) # use to debug 121 | 122 | # Based on this, either assign a "can't be tested"/NA indicator or the values for the chi-squared test. 123 | if (length(setdiff(unique(tempTable[, 2]), tooSmall)) < 2) { 124 | resultsTable[a, b] <- NA 125 | resultsTable[a, b + 1] <- NA 126 | 127 | } else { 128 | tempTable <- tempTable[!(tempTable[, 2] %in% tooSmall),] 129 | 130 | # perform the chi squared test 131 | tempChi <- chisq.test(tempTable[, 1], tempTable[, 2]) 132 | 133 | # add statistic value and pvalue to the table. 134 | resultsTable[a, b] <- tempChi$statistic 135 | resultsTable[a, b + 1] <- tempChi$p.value 136 | 137 | } # end of else 138 | 139 | # Use this if you want to export a Pivot table 140 | # pivot <- pivot_wider(testSize, 141 | # names_from = colnames(testSize[1]), 142 | # values_from = n) 143 | 144 | # Clean up 145 | remove(testSize, tooSmall) 146 | 147 | # add 2 so next results go in the proper place 148 | b = b + 3 149 | } # end question loop 150 | 151 | a = a + 1 # go to next row for next set of demographic info 152 | 153 | } # end demographic loop 154 | 155 | # add corrected pvalue 156 | 157 | z = 1 158 | for (z in 1:length(questionColumns)) { 159 | tempCorrected <- 160 | p.adjust(as.vector(unlist(resultsTable[, c - 1])), method = "holm") 161 | 162 | resultsTable[, c] <- tempCorrected 163 | 164 | c = c + 3 165 | 166 | } # end addition of corrected pvalues 167 | 168 | return(resultsTable) 169 | 170 | } # end function 171 | 172 | 173 | 174 | # This function needs to compare between categories within a question to see 175 | # which categories are significantly different. 176 | 177 | posthocChiSquared <- 178 | function(likertTable, 179 | questionColumn, 180 | demographicColumn, 181 | minVal = 25, 182 | correction = TRUE) { 183 | tempTable <- likertTable[, c(questionColumn, demographicColumn)] 184 | tempTable <- 185 | tempTable[tempTable[2] != "" & !is.na(tempTable[1]) , ] 186 | 187 | # Check to see if any group has less than min allowed responses. 188 | testSize <- 189 | tempTable %>% count(tempTable[1:2]) %>% group_by(across(.cols = 2)) %>% summarise(n = sum(n)) 190 | 191 | tooSmall <- testSize[testSize[2] < minVal, 1] 192 | print(unlist(tooSmall)) 193 | 194 | # Filter out too small categories 195 | tempTable <- tempTable[!(tempTable[, 2] %in% tooSmall), ] 196 | # and make sure demographic is a factor (this gets lost sometimes) 197 | if (is.factor(tempTable[, 2]) == FALSE) { 198 | tempTable[, 2] <- as.factor(tempTable[, 2]) 199 | } 200 | 201 | # Make sure that the number of unique is more than two 202 | if (nlevels(tempTable[, 2]) < 3) { 203 | stop(print("Too few levels")) 204 | } 205 | 206 | numLevels <- nlevels(tempTable[, 2]) 207 | 208 | # create the results table 209 | #create matrix with correct number of columns 210 | resultsTable <- matrix(rep(999, times = numLevels ^ 2), 211 | ncol = numLevels, 212 | byrow = TRUE) 213 | 214 | #define column names and row names of matrix 215 | tempLevels <- levels(tempTable[, 2]) 216 | colnames(resultsTable) <- tempLevels 217 | rownames(resultsTable) <- tempLevels 218 | 219 | # for each [i,j] pair of factors add the pvalue for chisquared test 220 | i = 1 # row 221 | j = 1 # column 222 | for (i in 1:numLevels) { 223 | for (j in 1:numLevels) { 224 | if (i != j) { 225 | # subset for i and j levels 226 | testTable <- 227 | tempTable[tempTable[, 2] %in% tempLevels[c(i, j)] ,] 228 | 229 | # run test and assign pvalue to i,j spot 230 | resultsTable[i, j] <- 231 | chisq.test(testTable[, 1], testTable[, 2])$p.value 232 | 233 | } else { 234 | resultsTable[i, j] <- NA 235 | } 236 | 237 | } # end column loop 238 | 239 | } # end row loop 240 | 241 | # remove the lower triangle--can probably make this a filter but this is easy 242 | resultsTable[lower.tri(resultsTable, diag = FALSE)] <- NA 243 | 244 | # correct pvalues 245 | if (correction == TRUE) { 246 | resultsTable <- 247 | matrix( 248 | p.adjust(as.vector(resultsTable), method = 'holm'), 249 | ncol = numLevels, 250 | dimnames = list(tempLevels, tempLevels) 251 | ) 252 | } 253 | #convert matrix to a tibble 254 | resultsTable <- as_tibble(resultsTable, rownames = "levels") 255 | 256 | return(resultsTable) 257 | 258 | } # end function 259 | -------------------------------------------------------------------------------- /eDNA_helpers.R: -------------------------------------------------------------------------------- 1 | 2 | ## ----- Data manip. funs. | transpose --------------------------- 3 | 4 | speciesSite <- function(inputOTU, rare = 0) { 5 | #input OTU should only be data, no text/ID fields. 6 | # inputs Species as columns, outputs Species as rows. 7 | temp <- inputOTU[ , colSums(inputOTU) > rare] 8 | temp <- as.data.frame(inputOTU) 9 | temp.col.names <- colnames(temp) 10 | temp <- as.data.frame(t(temp)) 11 | rownames(temp) <- temp.col.names 12 | 13 | return(temp) 14 | } 15 | 16 | siteSpecies <- function(inputOTU, rowNames, rare = 0) { 17 | #input OTU should only be data, no text/descriptions. 18 | #inputs Species as rows, outputs Species as columns 19 | temp <- as.data.frame(inputOTU, row.names = rowNames) 20 | temp <- as.data.frame(t(temp)) 21 | colnames(temp) <- rowNames 22 | temp <- temp[rowSums(temp) > rare,] 23 | 24 | return(temp) 25 | 26 | } 27 | 28 | 29 | ## ----- Data manip. funs. | Compositional Matrices ---------------- 30 | 31 | compMatrix <- function(inputMatrix, z.warning = 0.8){ 32 | # square-root Bayesian-multiplicative replacement of zeros with the cmultRepl() 33 | # function (Ladin et al., 2021) 34 | 35 | # Most other code in GitHub uses CZM. e.g. See 36 | # https://github.com/ggloor/CoDa_microbiome_tutorial/wiki/Part-1:-Exploratory-Compositional-PCA-Biplot 37 | # and https://raw.githubusercontent.com/ggloor/CoDaSeq/6ff864aade46cd3c8b0eff3bb54d5460775f92cd/CoDaSeq/vignettes/CoDaSeq_vignette.Rnw 38 | # This latter contends that this is the most principled method. 39 | 40 | zeroRepl <- cmultRepl(inputMatrix, 41 | label = 0, 42 | method = "CZM", 43 | z.warning = z.warning) 44 | 45 | output <- cdt.acomp(x = zeroRepl) %>% 46 | as.data.frame() 47 | #rownames(output) <- rownames(inputMatrix) 48 | 49 | ## see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7811025/ 50 | 51 | return(output) 52 | } 53 | 54 | 55 | ## ----- Data Summary | Reads ------------------------------------------ 56 | # Creates a summary table for number of eDNA reads found; total number of species variants, etc. 57 | # note this is customized for Eco Mol output tables. Column names will differ for other providers. 58 | 59 | 60 | ecoMolSummary <- function(ecoMolInput) { 61 | output <- tibble( 62 | sumReads = dplyr::select(ecoMolInput, asvAbsoluteAbundance) %>% sum(), 63 | ASVCount = dplyr::select(ecoMolInput, ASVHeader) %>% unique() %>% nrow(), 64 | genusCount = dplyr::select(ecoMolInput, genusBLASTn) %>% unique() %>% nrow() 65 | ) 66 | 67 | return (output) 68 | } 69 | 70 | 71 | 72 | ## ----- Data Summary | Alpha Diversity -------------------------- 73 | 74 | # Note these two are based on the VEGAN function, but with modifications to preserve data order. Per this issue I created, changing the data order is the desired behavior and they won't fix it, so this is my workaround: https://github.com/vegandevs/vegan/issues/552 75 | 76 | `specnumberMOD` <- 77 | function(x, groups, MARGIN = 1) 78 | { 79 | if (!missing(groups)) { 80 | if (length(groups) == 1) 81 | groups <- rep(groups, nrow(x)) 82 | groups <- factor(groups, levels = unique(groups)) # this preserves the data order! 83 | x <- aggregate(x, list(groups), max) # max is used because the actual number doesn't matter, just that it is over 0 84 | rownames(x) <- x[,1] 85 | x <- x[,-1] 86 | } 87 | if (length(dim(x)) > 1) 88 | apply(x > 0, MARGIN, sum) 89 | else 90 | sum(x > 0) 91 | } 92 | 93 | `diversityMOD` <- function (x, index = "shannon", groups, equalize.groups = FALSE, 94 | MARGIN = 1, base = exp(1)) 95 | { 96 | x <- drop(as.matrix(x)) 97 | if (!is.numeric(x)) 98 | stop("input data must be numeric") 99 | if (any(x < 0, na.rm = TRUE)) 100 | stop("input data must be non-negative") 101 | if (!missing(groups)) { 102 | if (MARGIN == 2) 103 | x <- t(x) 104 | if (length(groups) == 1) 105 | groups <- rep(groups, NROW(x)) 106 | if (equalize.groups) 107 | x <- decostand(x, "total") 108 | groups <- factor(groups, levels = unique(groups)) # this preserves the data order! 109 | x <- aggregate(x, list(groups), sum) 110 | rownames(x) <- x[, 1] 111 | x <- x[, -1, drop = FALSE] 112 | if (MARGIN == 2) 113 | x <- t(x) 114 | } 115 | INDICES <- c("shannon", "simpson", "invsimpson") 116 | index <- match.arg(index, INDICES) 117 | if (length(dim(x)) > 1) { 118 | total <- apply(x, MARGIN, sum) 119 | x <- sweep(x, MARGIN, total, "/") 120 | } 121 | else { 122 | x <- x/(total <- sum(x)) 123 | } 124 | if (index == "shannon") 125 | x <- -x * log(x, base) 126 | else x <- x * x 127 | if (length(dim(x)) > 1) 128 | H <- apply(x, MARGIN, sum, na.rm = TRUE) 129 | else H <- sum(x, na.rm = TRUE) 130 | if (index == "simpson") 131 | H <- 1 - H 132 | else if (index == "invsimpson") 133 | H <- 1/H 134 | if (any(NAS <- is.na(total))) 135 | H[NAS] <- NA 136 | H 137 | } 138 | 139 | 140 | 141 | 142 | alphaMetrics <- function(inputOTUSiSp, groupNames, replNames){ 143 | 144 | alphaTable <- tibble( 145 | siteNames = rownames(inputOTUSiSp), 146 | siteType = groupNames, 147 | replicate = replNames, 148 | speciesRichness = specnumberMOD(inputOTUSiSp , MARGIN = 1), 149 | shannonRichness = vegan::diversity(inputOTUSiSp, index = "shannon"), 150 | effectiveSR = exp(shannonRichness), 151 | invSimpson = vegan::diversity(inputOTUSiSp, index = "invsimpson") 152 | ) 153 | 154 | temp <- alphaTable %>% group_by(siteType) %>% summarise(siteType_n = n()) 155 | 156 | alphaTable <- left_join(x = alphaTable, y = temp) 157 | 158 | return(alphaTable) 159 | 160 | } 161 | 162 | alphaGroupMetrics <- function(inputOTUSiSp, groupNames) { 163 | alphaTable <- tibble( 164 | siteType = groupNames %>% unique(), # unique preserves order of groupNames, no alph reordering 165 | speciesRichness = specnumberMOD(inputOTUSiSp, MARGIN = 1, groups = groupNames), 166 | shannonRichness = diversityMOD(inputOTUSiSp, index = "shannon", groups = groupNames), 167 | effectiveSR = exp(shannonRichness), 168 | invSimpson = diversityMOD(inputOTUSiSp, index = "invsimpson", groups = groupNames) 169 | ) 170 | 171 | temp <- as_tibble(groupNames) %>% group_by(value) %>% summarise(siteType_n = n()) 172 | 173 | alphaTable <- left_join(x = alphaTable, y = temp, join_by(siteType == value)) 174 | 175 | return(alphaTable) 176 | 177 | } 178 | 179 | 180 | ## ----- Plotting functions | Aitchison ------------------------------- 181 | # helper function--note this is taken from other sources. I couldn't identify the original source of the code but e.g. it is used here: http://sthda.com/english/wiki/ggplot2-quick-correlation-matrix-heatmap-r-software-and-data-visualization 182 | 183 | get_lower_tri <- function(inpMatrix){ 184 | inpMatrix[upper.tri(inpMatrix, diag = T)]<- NA 185 | return(inpMatrix) 186 | } 187 | 188 | 189 | 190 | aitHeatmap <- function(inputDist, fillColor1 = "blue", fillColor2 = "orange", textPlease = FALSE){ 191 | graph <- 192 | inputDist %>% 193 | as.matrix() %>% 194 | get_lower_tri() %>% 195 | as.data.frame() %>% 196 | tibble::rownames_to_column("plot1") %>% 197 | pivot_longer(-c(plot1), 198 | names_to = "plot2", 199 | values_to = "distance", 200 | values_drop_na = T) %>% 201 | ggplot(aes(x = plot1, y = plot2, fill = distance)) + 202 | geom_raster() + 203 | 204 | scale_fill_gradient(low = fillColor1, high = fillColor2, 205 | name="Aitchison\nDistance") + 206 | theme( 207 | axis.title.x = element_blank(), 208 | axis.title.y = element_blank(), 209 | panel.grid.major = element_blank(), 210 | #panel.border = element_blank(), 211 | panel.background = element_blank(), 212 | #axis.ticks = element_blank(), 213 | legend.justification = c(1, 0), 214 | legend.position = c(0.5, 0.7), 215 | legend.direction = "horizontal", 216 | axis.text.x = element_text(angle = 45, vjust = 1, 217 | size = 12, hjust = 1), 218 | axis.text.y = element_text(size = 12))+ 219 | guides(fill = guide_colorbar(barwidth = 7, barheight = 1, 220 | title.position = "top", title.hjust = 0.5)) 221 | 222 | if(textPlease == TRUE){ 223 | graph <- graph + 224 | geom_text(aes(label = round(distance))) 225 | 226 | 227 | } 228 | 229 | 230 | return(graph) 231 | } 232 | 233 | 234 | # Create function that takes distance matrix, switches to long form, then 235 | # creates relevant columns. Then can feed it into a box plot. 236 | 237 | aitComparison <- function(inputDist, remap = NULL, repeatSamples = FALSE, fillColor = NULL, levelsPlot = NULL, plotPlease = TRUE) { 238 | # remap should have three columns: the original site names, pretty site names, 239 | # and the type of site (for grouping) 240 | temp <- 241 | inputDist %>% 242 | as.matrix() %>% 243 | get_lower_tri() %>% 244 | as.data.frame() %>% 245 | tibble::rownames_to_column("plot1") %>% 246 | pivot_longer( 247 | -c(plot1), 248 | names_to = "plot2", 249 | values_to = "distance", 250 | values_drop_na = T 251 | ) 252 | 253 | if(!is.null(remap)) { 254 | # make this a remap so that the names aren't so awful. need to add else to handle if remap is NULL 255 | old <- remap[[1]] 256 | new <- remap[[2]] 257 | temp$plot1[ temp$plot1 %in% old] <- new[base::match(temp$plot1, old)] 258 | temp$plot2[ temp$plot2 %in% old] <- new[match(temp$plot2, old)] 259 | 260 | if(repeatSamples == TRUE) { 261 | type <- remap[[3]] 262 | temp$type1 <- type[match(temp$plot1, new)] 263 | temp$type2 <- type[match(temp$plot2, new)] 264 | 265 | } 266 | 267 | } 268 | 269 | if(repeatSamples == FALSE){ 270 | temp <- temp %>% 271 | mutate(pair = paste0(plot1, "-", plot2)) 272 | } 273 | 274 | if (repeatSamples == TRUE) { 275 | temp <- temp %>% 276 | mutate(pair = ifelse( 277 | type1 < type2, 278 | paste(type1, type2, sep = "-"), 279 | paste(type2, type1, sep = "-") 280 | )) 281 | } 282 | 283 | print(temp$pair) 284 | 285 | if(is.null(fillColor) == TRUE) { 286 | library(RColorBrewer) 287 | fillColor <- brewer.pal(length(unique(temp$pair)),"Set1") 288 | } 289 | 290 | if(is.null(levelsPlot) == TRUE){ 291 | levelsPlot <- sort(unique(temp$pair)) 292 | } 293 | 294 | 295 | #make the graph here. 296 | if(repeatSamples == TRUE & plotPlease == TRUE){ 297 | temp <- temp %>% 298 | mutate(pair = factor(pair, levels = levelsPlot)) %>% 299 | ggplot(aes(y = distance, x = pair)) + 300 | stat_boxplot(geom = "errorbar", 301 | width = 0.25) + 302 | geom_boxplot() + 303 | geom_jitter(aes(color = pair), width = 0.05, size = 3) + 304 | scale_x_discrete(limits = levelsPlot) + 305 | theme(legend.position = "none", 306 | plot.margin = margin(t = .5, # Top margin 307 | r = .5, # Right margin 308 | b = .5, # Bottom margin 309 | l = 1.5, # Left margin 310 | unit = "cm" 311 | )) + 312 | theme(axis.text.x = element_text( 313 | angle = 45, 314 | vjust = 1, 315 | size = 10, 316 | hjust = 1 317 | )) + 318 | scale_color_manual(values = fillColor) + 319 | labs(x = element_blank(), 320 | y = "Aitchison Distance") 321 | } 322 | 323 | 324 | 325 | if(repeatSamples == FALSE & plotPlease == TRUE){ 326 | temp <- temp %>% 327 | ggplot(aes(y = distance, x = pair)) + 328 | geom_bar(aes(y = distance, fill = pair), stat = "identity") + 329 | geom_text(aes(label = round(distance,1)), vjust = -0.5) + 330 | scale_x_discrete(limits = levelsPlot) + 331 | theme(legend.position = "none") + 332 | theme(axis.text.x = element_text(angle = 45, vjust = 1, 333 | size = 10, hjust = 1)) + 334 | scale_fill_manual(values = fillColor) + 335 | labs(x = element_blank(), 336 | y = "Aitchison Distance" 337 | ) 338 | } 339 | 340 | 341 | return(temp) 342 | } 343 | 344 | 345 | 346 | 347 | -------------------------------------------------------------------------------- /extract_taxa_TITAN.R: -------------------------------------------------------------------------------- 1 | require(dplyr) 2 | require(tibble) 3 | require(data.table) 4 | 5 | ## --- Make some nice tables for each TITAN ------------------------------ 6 | 7 | ## For use with basic site x species/genus table, e.g. with mushrooms collected in the field. 8 | 9 | extract.titan.taxa <- 10 | function(titan.out, 11 | purity.cutoff = 0.9, 12 | reliability.cutoff = 0.9, 13 | taxa.label.name = "Genus") { 14 | titan.out <- titan.out 15 | 16 | titan.out.filtered <- titan.out$sppmax %>% 17 | as.data.frame() %>% 18 | rownames_to_column() 19 | 20 | titan.out.filtered <- 21 | titan.out.filtered[titan.out.filtered$purity >= purity.cutoff & 22 | titan.out.filtered$reliability >= reliability.cutoff, ] 23 | titan.out.filtered <- 24 | titan.out.filtered[order(titan.out.filtered$zscore, decreasing = T), ] %>% 25 | mutate(zgrp = ifelse(maxgrp == 1, "z-", "z+")) 26 | 27 | 28 | titan.out.taxonomy.summary <- 29 | group_by(titan.out.filtered, zgrp, rowname) %>% 30 | summarise( 31 | mean.zscore = mean(zscore), 32 | mean.purity = mean(purity), 33 | mean.reliability = mean(reliability), 34 | mean.zenv.cp = mean(zenv.cp), 35 | mean.5pct.cp = mean(`5%`), 36 | mean.95pct.cp = mean(`95%`) 37 | ) 38 | 39 | 40 | colnames(titan.out.taxonomy.summary)[1:ncol(titan.out.taxonomy.summary)] <- 41 | c( 42 | "Decreasing/Increasing Z Taxa", 43 | taxa.label.name, 44 | "Mean Z Score", 45 | "Mean Purity", 46 | "Mean Reliability", 47 | "Mean Env CP z-max", 48 | "Mean 5% CP", 49 | "Mean 95% CP" 50 | ) 51 | 52 | return(ungroup(titan.out.taxonomy.summary)) 53 | 54 | } 55 | 56 | 57 | 58 | 59 | ## For use with NGS data, could be made to work with phyloseq etc. 60 | 61 | # titan.out = I5CL.titan.pctN 62 | # taxonomy.table = raw_dada2_taxa 63 | 64 | 65 | extract.titan.taxa.NGS <- 66 | function(titan.out, 67 | taxonomy.table, 68 | taxonomy.table.merge = "taxa.label.unique", 69 | taxa.level = Genus, 70 | purity.cutoff = 0.90, 71 | reliability.cutoff = 0.90, 72 | taxa.label.name = "taxa.label.unique", 73 | label = label) { 74 | titan.out <- titan.out 75 | taxa.level <- enquo(taxa.level) 76 | label <- enquo(label) 77 | taxonomy.table <- taxonomy.table 78 | 79 | titan.out.filtered <- titan.out$sppmax %>% 80 | as.data.frame() 81 | titan.out.filtered[, taxa.label.name] <- 82 | rownames(titan.out.filtered) 83 | titan.out.filtered <- 84 | titan.out.filtered[titan.out.filtered$purity >= purity.cutoff & 85 | titan.out.filtered$reliability >= reliability.cutoff, ] 86 | titan.out.filtered <- 87 | titan.out.filtered[order(titan.out.filtered$zscore, decreasing = T), ] 88 | titan.out.filtered <- 89 | merge( 90 | titan.out.filtered, 91 | taxonomy.table, 92 | by.x = taxa.label.name, 93 | by.y = taxonomy.table.merge, 94 | sort = F 95 | ) %>% 96 | mutate(zgrp = ifelse(maxgrp == 1, "z-", "z+")) 97 | 98 | 99 | titan.out.taxonomy.summary <- 100 | group_by(titan.out.filtered, zgrp, !!taxa.level) %>% 101 | summarise( 102 | mean.zscore = mean(zscore), 103 | mean.purity = mean(purity), 104 | mean.reliability = mean(reliability), 105 | mean.zenv.cp = mean(zenv.cp), 106 | mean.5pct.cp = mean(`5%`), 107 | mean.95pct.cp = mean(`95%`), 108 | count = n(), 109 | svs = paste(unique(!!label), collapse = ";") 110 | ) 111 | 112 | colnames(titan.out.taxonomy.summary)[1] <- 113 | "Decreasing/Increasing Z Taxa" 114 | colnames(titan.out.taxonomy.summary)[3:ncol(titan.out.taxonomy.summary)] <- 115 | c( 116 | "Mean Z Score", 117 | "Mean Purity", 118 | "Mean Reliability", 119 | "Mean Env CP z-max", 120 | "Mean 5% CP", 121 | "Mean 95% CP", 122 | "Count", 123 | "All SVs or OTUs" 124 | ) 125 | 126 | return(ungroup(titan.out.taxonomy.summary)) 127 | 128 | } 129 | 130 | 131 | 132 | 133 | 134 | ## -- For cell filler for table assembler --------------------- 135 | 136 | # this needs to fill cells for EITHER z1 or z2 (z- or z+ taxa) 137 | 138 | # taxa.col.name <- quo(Genus) 139 | # titan.taxa.tables <- list(NGS.TITAN.cnd.Genus, NGS.TITAN.sesr.Genus) 140 | # table.col.names <- c("cnd", "sesr") 141 | # z.score.pct.cutoff <- .5 142 | # round.val <- 3 143 | # which.z = "z-" 144 | # taxon.list = all.z1.taxon 145 | # j = 3 146 | 147 | 148 | cell.filler <- 149 | function(titan.taxa.tables, 150 | which.z, 151 | taxon.list, 152 | taxa.col.name, 153 | table.col.names, 154 | z.score.pct.cutoff, 155 | round.val) { 156 | # create the table framework... 157 | 158 | taxa.comparison.table <- data.table(`All Taxon` = taxon.list, 159 | `Z type` = rep(which.z, length(taxon.list))) 160 | 161 | 162 | # ...and create the columns for the data to go into. 163 | 164 | for (i in 1:length(titan.taxa.tables)) { 165 | taxa.comparison.table[, table.col.names[i]] <- 166 | rep("", length(taxon.list)) 167 | 168 | } 169 | 170 | 171 | 172 | # Now fill everything. 173 | 174 | for (i in 1:length(titan.taxa.tables)) { 175 | temp.table <- titan.taxa.tables[[i]] 176 | temp.table <- 177 | temp.table[temp.table$`Decreasing/Increasing Z Taxa` == which.z ,] 178 | 179 | # create a cutoff based on user input. 180 | z.score.percentile <- 181 | quantile(temp.table$`Mean Z Score`, z.score.pct.cutoff) 182 | 183 | 184 | 185 | # iterate through each taxon in your taxon.list... 186 | for (j in 1:length(taxon.list)) { 187 | check.list <- unlist(select(temp.table, !!taxa.col.name)) 188 | 189 | # is this taxon in the titan output of the ith table? 190 | if (taxon.list[j] %in% check.list) { 191 | # if yes, then give it some information like if it is a z- or z+ taxa, z score, change point 192 | 193 | # isolate the info for the right genus 194 | temp.row <- 195 | temp.table[select(temp.table, !!taxa.col.name) == taxon.list[j] & 196 | !(is.na(select( 197 | temp.table, !!taxa.col.name 198 | ))), ] 199 | 200 | # bold the z score if it's above the cutoff 201 | z.score.text <- 202 | ifelse( 203 | temp.row[, "Mean Z Score"] >= z.score.percentile , 204 | yes = paste0( 205 | "**", 206 | temp.row[, "Mean Z Score"] %>% round(digits = round.val), 207 | "**" 208 | ), 209 | no = paste0(temp.row[, "Mean Z Score"]) 210 | ) 211 | 212 | text <- 213 | paste0("Z-score: ", 214 | z.score.text, 215 | "; CP: ", 216 | temp.row[, "Mean Env CP z-max"] %>% round(round.val)) 217 | 218 | taxa.comparison.table[j , i + 2] <- text 219 | 220 | 221 | } 222 | 223 | # if not R will just leave it blank. 224 | 225 | 226 | } 227 | } 228 | 229 | return(taxa.comparison.table) 230 | 231 | } 232 | 233 | ## --- Multivariable summary table assembly ---------------------- 234 | 235 | combine.titan.results <- 236 | function(titan.taxa.tables, 237 | taxa.col.name, 238 | table.col.names, 239 | z.score.pct.cutoff = .5, 240 | round.val = 3) { 241 | taxa.col.name <- enquo(taxa.col.name) 242 | 243 | all.z1.taxon <- c() 244 | all.z2.taxon <- c() 245 | 246 | 247 | # create z- and z+ separately b/c some taxa will have both (esp. for higher taxonomic levels.) 248 | 249 | # create a list of all taxonomic groups for z- taxon 250 | for (i in 1:length(titan.taxa.tables)) { 251 | temp.z1.table <- 252 | titan.taxa.tables[[i]] %>% subset(`Decreasing/Increasing Z Taxa` == "z-") 253 | 254 | all.z1.taxon <- 255 | c(all.z1.taxon, 256 | select(temp.z1.table, !!taxa.col.name) %>% unlist()) 257 | 258 | } 259 | 260 | # create a list of all taxonomic groups for z+ taxon 261 | for (i in 1:length(titan.taxa.tables)) { 262 | temp.z2.table <- 263 | titan.taxa.tables[[i]] %>% subset(`Decreasing/Increasing Z Taxa` == "z+") 264 | 265 | all.z2.taxon <- 266 | c(all.z2.taxon, 267 | select(temp.z2.table, !!taxa.col.name) %>% unlist()) 268 | 269 | } 270 | 271 | 272 | # make that only uniques, and remove NA 273 | 274 | all.z1.taxon <- 275 | all.z1.taxon[!(is.na(all.z1.taxon))] %>% unique() 276 | all.z1.taxon <- all.z1.taxon[order(all.z1.taxon)] 277 | 278 | 279 | all.z2.taxon <- 280 | all.z2.taxon[!(is.na(all.z2.taxon))] %>% unique() 281 | all.z2.taxon <- all.z2.taxon[order(all.z2.taxon)] 282 | 283 | 284 | 285 | 286 | # and now iterate through to put text in each slot. 287 | 288 | summary.table.z1 <- 289 | cell.filler( 290 | titan.taxa.tables = titan.taxa.tables, 291 | which.z = "z-", 292 | taxon.list = all.z1.taxon, 293 | taxa.col.name = taxa.col.name, 294 | table.col.names = table.col.names, 295 | z.score.pct.cutoff = z.score.pct.cutoff, 296 | round.val = round.val 297 | ) 298 | 299 | summary.table.z2 <- 300 | cell.filler( 301 | titan.taxa.tables = titan.taxa.tables, 302 | which.z = "z+", 303 | taxon.list = all.z2.taxon, 304 | taxa.col.name = taxa.col.name, 305 | table.col.names = table.col.names, 306 | z.score.pct.cutoff = z.score.pct.cutoff, 307 | round.val = round.val 308 | ) 309 | 310 | 311 | # smash the two resulting tables together 312 | 313 | summary.table <- rbind(summary.table.z1, summary.table.z2) 314 | 315 | # alphebetize 316 | 317 | summary.table <- 318 | summary.table[order(summary.table$`All Taxon`),] 319 | 320 | 321 | return(summary.table) 322 | 323 | 324 | } 325 | -------------------------------------------------------------------------------- /group_PERMANOVA.R: -------------------------------------------------------------------------------- 1 | 2 | ## Function for sequentially testing multiple variables in PERMANOVA, then 3 | ## adjusting pseudo-F values. Also calculates multivariate dispersion for 4 | ## categorical variables. 5 | 6 | require(vegan) 7 | require(tibble) 8 | 9 | # First, a quick function to calculate omega squared based on: 10 | # https://academic.oup.com/bioinformatics/article/31/15/2461/188732 11 | # http://www.real-statistics.com/multiple-regression/other-measures-effect-size-anova/ 12 | # http://psych.colorado.edu/~willcutt/pdfs/Olejnik_2003.pdf if you want to make this generalized... 13 | # https://gist.github.com/arnoud999/e677516ed45e9a11817e for some r code of generalization 14 | 15 | PERMANOVA.omega2 <- function(adonis2.object, num.control.vars) { 16 | 17 | 18 | SSE <- adonis2.object[ 1 + num.control.vars, "SumOfSqs"] 19 | dfE <- adonis2.object[ 1 + num.control.vars, "Df"] 20 | MSE <- adonis2.object["Residual", "SumOfSqs"]/adonis2.object["Residual", "Df"] # Mean Square for Error 21 | SST <- sum(adonis2.object$SumOfSqs) 22 | 23 | omega2 <- (SSE - dfE*MSE) / (SST + MSE) 24 | 25 | return(omega2) 26 | 27 | } 28 | 29 | 30 | ## Note that I have some defaults for adonis2: two processors (parallel = 2), 31 | ## 99999 permutations, and using the bray-curtis distance matrix (method = 32 | ## "bray") 33 | 34 | ## Note also that this does not test for interaction effects. 35 | 36 | ## Variable definitions: 37 | ## var.names should be a vector of column names, one for each variable to be tested 38 | # Might consider adding functionality to take column names or position numbers, but for now... 39 | ## var.table is the table where the independent variables are. 40 | ## var.table.c is a string of the table name: eventually generate this automatically. 41 | ## species.table is the species/site matrix (e.g. created by matrify) 42 | ## species.table.c is a string of the table name: eventually generate this automatically. 43 | ## control.vars are any variables that need to be included before testing variables--character string, e.g. "var1 + var2" 44 | ## num.control.vars specifies the number of control variables used. Replace this later with something deriving it from control.vars text... 45 | ## by.adonsi2 is to pass through for the by = argument in adonis2 46 | 47 | 48 | group.PERMANOVA <- function(var.names, var.table, var.table.c, control.vars = "", species.table, species.table.c, num.control.vars = 0, by.adonis2 = "terms", AIC.type = "AICc", perms = 99999, method = "bray") { 49 | 50 | ## need to order columns otherwise the order of the columns and the order of col.numbers is incorrect. 51 | 52 | var.table <- var.table[ , order(colnames(var.table))] 53 | var.names <- var.names[order(var.names)] 54 | 55 | 56 | 57 | output <- tibble(var.names = var.names, 58 | var.explnd = rep(NA, times = length(var.names)), 59 | avg.var.explnd = rep(NA), 60 | pseudo.F = rep(NA), 61 | F.pval = rep(NA), 62 | adj.F.pval = rep(NA), 63 | disp.F.pval = rep(NA), 64 | adj.disp.F.pval = rep(NA), 65 | omega2 = rep(NA), 66 | AIC.stat = rep(NA) 67 | ) 68 | col.numbers <- which(colnames(var.table) %in% var.names) 69 | 70 | if (num.control.vars > 0) { 71 | control.vars <- paste0(control.vars, " +") 72 | } 73 | 74 | 75 | 76 | ## Populate the output table. 77 | 78 | for (i in 1:length(var.names)) { 79 | 80 | ## No parallel argument--while it would be nice, 2.5.1 vegan + parallel 81 | ## update fubar'd something and taking it out was the easiest way to fix 82 | ## it. 83 | 84 | temp <- 85 | adonis2( 86 | formula = as.formula(paste( 87 | species.table.c, "~", control.vars, var.names[i] 88 | )), 89 | permutations = perms, 90 | method = method, 91 | by = by.adonis2, 92 | data = var.table 93 | ) 94 | 95 | output$var.explnd[i] <- temp$SumOfSqs[1 + num.control.vars] / 96 | temp$SumOfSqs[length(temp$SumOfSqs)] 97 | output$avg.var.explnd[i] <- output$var.explnd[i] / temp$Df[1 + num.control.vars] 98 | output$omega2[i] <- PERMANOVA.omega2(temp, num.control.vars) 99 | output$pseudo.F[i] <- temp$F[num.control.vars + 1] 100 | output$F.pval[i] <- temp$`Pr(>F)`[num.control.vars + 1] 101 | output$AIC.stat[i] <- AICc.PERMANOVA2(temp)[[AIC.type]] 102 | 103 | 104 | 105 | # the ordering is so that col.names[i] works! 106 | 107 | if (is.character(var.table[[col.numbers[i]]]) == TRUE | 108 | is.factor(var.table[[col.numbers[i]]]) == TRUE) { 109 | 110 | if (class(species.table) == "dist") { 111 | temp.disp <- 112 | anova(betadisper( 113 | species.table, 114 | as.factor(var.table[[col.numbers[i]]]) 115 | )) 116 | 117 | output$disp.F.pval[i] <- temp.disp$`Pr(>F)`[1] 118 | 119 | } 120 | 121 | else {temp.disp <- 122 | anova(betadisper( 123 | vegdist(species.table, method = method), 124 | as.factor(var.table[[col.numbers[i]]]) 125 | )) 126 | 127 | output$disp.F.pval[i] <- temp.disp$`Pr(>F)`[1] 128 | } 129 | } 130 | 131 | } 132 | 133 | output$adj.F.pval <- 134 | p.adjust(p = output$F.pval, method = "holm") 135 | output$adj.disp.F.pval <- 136 | p.adjust(p = output$disp.F.pval, method = "holm") 137 | output$AIC.delta <- 138 | output$AIC.stat - min(output$AIC.stat) 139 | 140 | return(output) 141 | 142 | 143 | 144 | } 145 | 146 | 147 | group.univ.PERMANOVA <- function(var.names, var.table, var.table.c, control.vars = "", species.vector, species.vector.c, num.control.vars = 0, by.adonis2 = "terms", perms = 99999, method = "euclid", AIC.type ="AICc") { 148 | 149 | ## need to order columns otherwise the order of the columns and the order of col.numbers is incorrect. 150 | 151 | var.table <- var.table[ , order(colnames(var.table))] 152 | var.names <- var.names[order(var.names)] 153 | 154 | 155 | 156 | output <- tibble(var.names = var.names, 157 | var.explnd = rep(NA, times = length(var.names)), 158 | avg.var.explnd = rep(NA), 159 | pseudo.F = rep(NA), 160 | F.pval = rep(NA), 161 | adj.F.pval = rep(NA), 162 | omega2 = rep(NA), 163 | AIC.stat = rep(NA) 164 | ) 165 | col.numbers <- which(colnames(var.table) %in% var.names) 166 | 167 | if (num.control.vars > 0) { 168 | control.vars <- paste0(control.vars, " +") 169 | } 170 | 171 | ## Attempting to get the names from the tables 172 | #var.table.t <- deparse(substitute(var.table)) 173 | 174 | 175 | 176 | ## Populate the output table. 177 | 178 | for (i in 1:length(var.names)) { 179 | 180 | ## No parallel argument--while it would be nice, 2.5.1 vegan + parallel 181 | ## update fubar'd something and taking it out was the easiest way to fix 182 | ## it. 183 | 184 | temp <- 185 | adonis2( 186 | formula = as.formula(paste( 187 | species.vector.c, "~", control.vars, var.names[i] 188 | )), 189 | permutations = perms, 190 | method = method, 191 | by = by.adonis2, 192 | data = var.table 193 | ) 194 | 195 | output$var.explnd[i] <- temp$SumOfSqs[1 + num.control.vars] / temp$SumOfSqs[length(temp$SumOfSqs)] 196 | output$avg.var.explnd[i] <- output$var.explnd[i] / temp$Df[1 + num.control.vars] 197 | output$omega2[i] <- PERMANOVA.omega2(temp, num.control.vars) 198 | output$pseudo.F[i] <- temp$F[num.control.vars + 1] 199 | output$F.pval[i] <- temp$`Pr(>F)`[num.control.vars + 1] 200 | output$AIC.stat[i] <- AICc.PERMANOVA2(temp)[[AIC.type]] 201 | 202 | 203 | 204 | # the ordering is so that col.names[i] works! 205 | 206 | 207 | } 208 | 209 | output$adj.F.pval <- p.adjust(p = output$F.pval, method = "holm") 210 | output$delta.aic <- output$AIC.stat - min(output$AIC.stat) 211 | 212 | 213 | return(output) 214 | 215 | 216 | 217 | } 218 | 219 | 220 | 221 | ## with time, test: 222 | # paste0("\"", sample.covariates, "\"" ) 223 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /group_PERMANOVA_wP.R.R: -------------------------------------------------------------------------------- 1 | 2 | ## Function for sequentially testing multiple variables in PERMANOVA, then 3 | ## adjusting pseudo-F values. Also calculates multivariate dispersion for 4 | ## categorical variables. 5 | 6 | # First, a quick function to calculate omega squared based on: 7 | # https://academic.oup.com/bioinformatics/article/31/15/2461/188732 8 | # http://www.real-statistics.com/multiple-regression/other-measures-effect-size-anova/ 9 | # http://psych.colorado.edu/~willcutt/pdfs/Olejnik_2003.pdf if you want to make this generalized... 10 | # https://gist.github.com/arnoud999/e677516ed45e9a11817e for some r code of generalization 11 | 12 | PERMANOVA.omega2 <- function(adonis2.object, num.control.vars) { 13 | 14 | 15 | SSE <- adonis2.object[ 1 + num.control.vars, "SumOfSqs"] 16 | dfE <- adonis2.object[ 1 + num.control.vars, "Df"] 17 | MSE <- adonis2.object["Residual", "SumOfSqs"]/adonis2.object["Residual", "Df"] # Mean Square for Error 18 | SST <- sum(adonis2.object$SumOfSqs) 19 | 20 | omega2 <- (SSE - dfE*MSE) / (SST + MSE) 21 | 22 | return(omega2) 23 | 24 | } 25 | 26 | 27 | 28 | ## Note that I have some defaults for adonis2: two processors (parallel = 2), 29 | ## 99999 permutations, and using the bray-curtis distance matrix (method = 30 | ## "bray") 31 | 32 | ## Note also that this does not test for interaction effects. 33 | 34 | ## Variable definitions: 35 | ## var.names should be a vector of column names, one for each variable to be tested 36 | # Might consider adding functionality to take column names or position numbers, but for now... 37 | ## var.table is the table where the independent variables are. 38 | ## var.table.c is a string of the table name: eventually generate this automatically. 39 | ## species.table is the species/site matrix (e.g. created by matrify) 40 | ## species.table.c is a string of the table name: eventually generate this automatically. 41 | ## control.vars are any variables that need to be included before testing variables--character string, e.g. "var1 + var2" 42 | ## num.control.vars specifies the number of control variables used. Replace this later with something deriving it from control.vars text... 43 | ## by.adonsi2 is to pass through for the by = argument in adonis2 44 | 45 | 46 | group.PERMANOVA <- function(var.names, var.table, var.table.c, control.vars = "", species.table, species.table.c, num.control.vars = 0, by.adonis2 = "terms", perms = 99999) { 47 | 48 | ## need to order columns otherwise the order of the columns and the order of col.numbers is incorrect. 49 | 50 | var.table <- var.table[ , order(colnames(var.table))] 51 | var.names <- var.names[order(var.names)] 52 | 53 | 54 | 55 | output <- data.frame(row.names = var.names, 56 | var.explnd = rep(NA, times = length(var.names)), 57 | avg.var.explnd = rep(NA), 58 | pseudo.F = rep(NA), 59 | F.pval = rep(NA), 60 | adj.F.pval = rep(NA), 61 | disp.F.pval = rep(NA), 62 | adj.disp.F.pval = rep(NA) 63 | ) 64 | col.numbers <- which(colnames(var.table) %in% var.names) 65 | 66 | 67 | ## Attempting to get the names from the tables 68 | #var.table.t <- as.character(quote(var.table, env = )) 69 | #species.table.t <- as.character(var.table) 70 | 71 | 72 | ## Populate the output table. 73 | 74 | for (i in 1:length(var.names)) { 75 | 76 | 77 | temp <- adonis2(formula = as.formula(paste(species.table.c, "~", control.vars, "+", var.names[i])), 78 | parallel = 2, permutations = perms, method = "bray", by = by.adonis2, data = var.table) 79 | 80 | output$var.explnd[i] <- temp$SumOfSqs[1 + num.control.vars] / sum(temp$SumOfSqs) 81 | output$avg.var.explnd[i] <- output$var.explnd[i] / temp$Df[1 + num.control.vars] 82 | output$omega2[i] <- PERMANOVA.omega2(temp, num.control.vars) 83 | output$pseudo.F[i] <- temp$F[num.control.vars + 1] 84 | output$F.pval[i] <- temp$`Pr(>F)`[num.control.vars + 1] 85 | 86 | 87 | 88 | # the ordering is so that col.names[i] works! 89 | 90 | if (is.character(var.table[[col.numbers[i]]]) == TRUE | is.factor(var.table[[col.numbers[i]]]) == TRUE) { 91 | 92 | temp.disp <- anova(betadisper(vegdist(species.table, method = "bray"), 93 | as.factor(var.table[[col.numbers[i]]]))) 94 | 95 | output$disp.F.pval[i] <- temp.disp$`Pr(>F)`[1] 96 | 97 | } 98 | 99 | } 100 | 101 | output$adj.F.pval <- p.adjust(p = output$F.pval, method = "holm") 102 | output$adj.disp.F.pval <- p.adjust(p = output$disp.F.pval, method = "holm") 103 | 104 | return(output) 105 | 106 | 107 | 108 | } 109 | 110 | 111 | ## with time, test: 112 | # paste0("\"", sample.covariates, "\"" ) 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /group_lmPerm.R: -------------------------------------------------------------------------------- 1 | ## group lmPerm 2 | 3 | require(lmPerm) 4 | require(AICcmodavg) 5 | 6 | 7 | group.lmp <- function(group.vars, # should be character string 8 | var.table.name, # should be character string 9 | dependent.var, #should be character string 10 | control.var = NULL, 11 | iter = 999999, 12 | Ca = .01 13 | 14 | ) { 15 | 16 | 17 | lmp.results <- data.frame(row.names = group.vars, 18 | estimate = rep(NA, times = length(group.vars)), 19 | var.explnd = rep(NA), 20 | # avg.var.explnd = rep(NA), 21 | F.stat.lmp = rep(NA), 22 | prob.pval = rep(NA), 23 | prob.iter = rep(NA), 24 | adj.pval = rep(NA), 25 | AICc.lm = rep(NA) 26 | ) 27 | 28 | if (!is.null(control.var)) {control.var <- paste0(control.var, "+")} 29 | 30 | 31 | for (i in 1:length(group.vars)) { 32 | 33 | lmp.temp <- lmp(as.formula(paste0(dependent.var, " ~ ", 34 | control.var, 35 | var.table.name, "$", group.vars[i])), 36 | model = TRUE, seqs = TRUE, x = TRUE, y = TRUE, 37 | center = FALSE, maxIter = iter, Ca = Ca) 38 | summary.lmp.temp <- lmPerm::summary.lmp(lmp.temp) 39 | lm.temp <- lm(as.formula(paste0(dependent.var, " ~ ", var.table.name, "$", 40 | group.vars[i]))) 41 | 42 | 43 | # add calculated values to table: 44 | lmp.results$estimate[i] <- ifelse(length(summary.lmp.temp$coefficients[,1]) > 2, NA, 45 | summary.lmp.temp$coefficients[2,1]) 46 | lmp.results$var.explnd[i] <- summary.lmp.temp$adj.r.squared 47 | 48 | 49 | lmp.results$F.stat.lmp[i] <- summary.lmp.temp$fstatistic["value"] 50 | lmp.results$prob.pval[i] <- summary.lmp.temp$coefficients[2,3] 51 | lmp.results$prob.iter[i] <- summary.lmp.temp$coefficients[1,2] 52 | lmp.results$AICc.lm[i] <- AICc(lm.temp)[1] 53 | 54 | 55 | } 56 | 57 | lmp.results$adj.pval <- p.adjust(p = lmp.results$prob.pval, method = "holm") 58 | 59 | 60 | 61 | return(lmp.results) 62 | 63 | 64 | } 65 | 66 | -------------------------------------------------------------------------------- /group_lmer.R: -------------------------------------------------------------------------------- 1 | ## group lmPerm 2 | 3 | # based on http://www.utstat.toronto.edu/~brunner/workshops/mixed/NormalWithR.pdf#:~:text=Repeated%20measures%20analysis%20with%20R%20Summary%20for%20experienced,to%20the%20model%20for%20the%20random%20subject%20effect. 4 | # with information from https://stats.stackexchange.com/questions/7240/proportion-of-explained-variance-in-a-mixed-effects-model?rq=1 5 | 6 | 7 | 8 | require(lme4) # 9 | require(car) # car also has vif function for multivariate models... 10 | require(MuMIn) 11 | 12 | 13 | group.lmer <- function(var.names, # should be character string 14 | in.data, 15 | dependent.var, #should be character string 16 | control.var = NULL, 17 | random.subject.effect = NULL # should be character string in the format 18 | # (1 | site) 19 | 20 | ) { 21 | 22 | # Want to capture MSE, F, Pr(>F), estimate and standard error, AICc 23 | 24 | lmer.results <- data.frame(row.names = var.names, 25 | var.names = var.names, 26 | slope.estimate = rep(NA, times = length(var.names)), 27 | std.error = rep(NA), 28 | Mean.Sq = rep(NA), 29 | F.stat = rep(NA), 30 | prob.pval = rep(NA), 31 | adj.pval = rep(NA), 32 | AICc = rep(NA), 33 | AIC.REML = rep(NA), 34 | logLik = rep(NA) 35 | ) 36 | 37 | if (!is.null(control.var)) {control.var <- paste0(control.var, "+")} 38 | if (!is.null(random.subject.effect)) {random.subject.effect <- 39 | paste0("+ ", random.subject.effect)} 40 | 41 | i=1 42 | for (i in 1:length(var.names)) { 43 | 44 | lmer.temp <- lmer(formula = as.formula(paste0(dependent.var, " ~ ", 45 | control.var, 46 | var.names[i], 47 | random.subject.effect 48 | )), 49 | data = in.data) 50 | cT.temp <- MuMIn::coefTable(lmer.temp) 51 | indx <- (2 + length(control.var)) 52 | 53 | # add calculated values to table: 54 | lmer.results$slope.estimate[i] <- cT.temp[indx, 1] 55 | lmer.results$std.error[i] <- cT.temp[indx, 2] 56 | 57 | lmer.results$Mean.Sq[i] <- anova(lmer.temp)[var.names[i],3] 58 | 59 | lmer.results$F.stat[i] <- car::Anova(lmer.temp, test = "F")[var.names[i],"F"] 60 | lmer.results$prob.pval[i] <- car::Anova(lmer.temp, test = "F")[var.names[i],"Pr(>F)"] 61 | 62 | 63 | lmer.results$AICc[i] <- AICc(lmer.temp)[1] # this is for the model... 64 | lmer.results$AIC.REML[i] <- lme4::llikAIC(lmer.temp)[[2]] 65 | lmer.results$logLik[i] <- lme4::llikAIC(lmer.temp)[[1]] 66 | 67 | } 68 | 69 | 70 | lmer.results$adj.pval <- p.adjust(p = lmer.results$prob.pval, method = "holm") 71 | 72 | return(lmer.results) 73 | 74 | 75 | } 76 | 77 | -------------------------------------------------------------------------------- /repeat_multipatt.R: -------------------------------------------------------------------------------- 1 | ## This is a function that runs multipatt a specified number of times, then outputs two key things: 2 | 3 | # 1. A summary table of the species over 100 runs and 4 | # 2. A plot showing the different indicator species made in ggplot (OPTIONAL) 5 | 6 | # Reminder to user: check that matrix site order and cluster order are the same 7 | 8 | # repeat.multipatt(matrix.name = matrify.shrub.bysite, cluster.name = gc.cluster$gc.group.3, plot.please = FALSE, freq.cutoff = .1) 9 | # repeat.multipatt(matrix.name = matrify.tree.species, cluster.name = tree.abs.cluster$tree.abs, func.name = "r.g") 10 | # repeat.multipatt(matrix.name = matrify.foraging, 11 | # cluster.name = shrub.abs.cluster$shrub.abs, 12 | # xlab.input = "Bird Incidence Indicator Species \nof shrub clusters") 13 | 14 | 15 | 16 | 17 | repeat.multipatt <- function(repeats = 100, matrix.name, cluster.name, p.cutoff = .1, func.name = "IndVal.g", phi = FALSE, plot.please = TRUE, plot.colors = c("deeppink2", "cyan3", "yellowgreen", "gray 50", "violet"), freq.cutoff = .5, xlab.input = "Indicator Species \n(freq > 50%)", ylab.input = "Mean Indicator Value over 100 Runs", quiet = FALSE, stat.cutoff = .5, graph.stat.cutoff = .75) { 18 | 19 | library(ggplot2) 20 | library(dplyr) 21 | library(indicspecies) 22 | library(stringr) 23 | 24 | 25 | if (freq.cutoff != .5) { 26 | 27 | print("You probably want to change the default Y axis label!") 28 | 29 | } 30 | 31 | if (length(unique(cluster.name)) > 5) { 32 | 33 | stop("Please use fewer than 5 clusters (or edit the function).") 34 | } 35 | 36 | if (func.name == "r.g" & phi == TRUE & quiet == FALSE) { 37 | 38 | matrix.name <- as.data.frame(ifelse(matrix.name > 0, 1, 0)) 39 | print("you are using the r.g function to calculate Pearson's phi coefficient of association.") 40 | } 41 | 42 | if (func.name == "r.g" & phi == FALSE & quiet == FALSE) { 43 | 44 | print("you are using the r.g function to calculate the point biserial correlation coefficient.") 45 | } 46 | 47 | 48 | mp.sign.dump <- data.frame() 49 | mp.AB.dump <- data.frame() 50 | i <- 1 51 | 52 | for (i in 1:repeats) { 53 | multipatt.out <- multipatt(x = matrix.name, 54 | cluster = cluster.name, func = func.name) 55 | 56 | mp.sign <- multipatt.out$sign[complete.cases(multipatt.out$sign),] 57 | mp.sign <- mp.sign[mp.sign$p.value <= p.cutoff & mp.sign$stat >= stat.cutoff, ] 58 | mp.sign$species <- rownames(mp.sign) 59 | 60 | if ( length(multipatt.out$A) > 0 ) { 61 | mp.A <- as.data.frame(multipatt.out$A[complete.cases(multipatt.out$A), ] , make.row.names = FALSE) 62 | mp.A <- mp.A[which(rownames(mp.A) %in% mp.sign$species), ] 63 | colnames(mp.A) <- paste("A.", colnames(mp.A), sep = "") 64 | 65 | 66 | mp.B <- as.data.frame(multipatt.out$B[complete.cases(multipatt.out$B), ], make.row.names = FALSE) 67 | mp.B <- mp.B[which(rownames(mp.B) %in% mp.sign$species), ] 68 | colnames(mp.B) <- paste("B.", colnames(mp.B), sep = "") 69 | 70 | mp.AB <- cbind.data.frame(mp.A, mp.B) 71 | mp.AB$species <- rownames(mp.AB) 72 | 73 | if (nrow(mp.AB) > 0) { 74 | mp.AB$i <- i 75 | } 76 | 77 | } 78 | 79 | if (nrow(mp.sign) > 0) { 80 | mp.sign$i <- i 81 | } 82 | 83 | mp.sign.dump <- rbind(mp.sign.dump, mp.sign, make.row.names = FALSE) 84 | 85 | 86 | if (func.name %in% c("IndVal", "IndVal.g")) { 87 | mp.AB.dump <- rbind(mp.AB.dump, mp.AB, make.row.names = FALSE) 88 | } 89 | 90 | i <- i + 1 91 | } 92 | 93 | ct <- FALSE 94 | 95 | if (!(any(colnames(mp.sign.dump) == "s.1"))) { 96 | ct <- TRUE 97 | 98 | colname.temp <- tibble(group = colnames(mp.sign.dump)) 99 | colname.temp$number <- 1:nrow(colname.temp) 100 | 101 | colnames(mp.sign.dump)[1:sum(str_detect(colnames(mp.sign.dump), "s\\."))] <- 102 | paste("s.", seq.int(from = 1, to = sum(str_detect(colnames(mp.sign.dump), "s\\."))), sep = "") 103 | 104 | colname.temp <- colname.temp[str_detect(colname.temp$group, "s\\.") , ] 105 | colname.temp$groupname <- str_sub(colname.temp$group, 3, -1) 106 | 107 | } 108 | 109 | 110 | if (!(any(colnames(mp.sign.dump) == "s.3"))) {mp.sign.dump$s.3 = rep(0, times = length(mp.sign.dump$s.1))} 111 | if (!(any(colnames(mp.sign.dump) == "s.4"))) {mp.sign.dump$s.4 = rep(0, times = length(mp.sign.dump$s.1))} 112 | if (!(any(colnames(mp.sign.dump) == "s.5"))) {mp.sign.dump$s.5 = rep(0, times = length(mp.sign.dump$s.1))} 113 | 114 | 115 | # Make summary tables 116 | 117 | mp.summary <- group_by(mp.sign.dump, species) %>% 118 | summarize( 119 | count.sp = n(), 120 | frequency.sp = round(count.sp/repeats, digits = 3), 121 | mean.stat = round(mean(stat), digits = 3), 122 | group = paste( 123 | (if (any(s.1 == 1)) {"1."}), 124 | (if (any(s.2 == 1)) {".2."}), 125 | (if (any(s.3 == 1)) {".3."}), 126 | (if (any(s.4 == 1)) {".4."}), 127 | (if (any(s.5 == 1)) {".5"}), 128 | sep = ""), 129 | min.p.val = min(p.value), 130 | max.p.val = max(p.value), 131 | all.p.vals = paste(p.value, collapse = ", ") 132 | ) 133 | 134 | mp.summary$group <- ifelse(grepl("[1-9]..[1-9]", mp.summary$group), 135 | gsub(pattern = "\\.\\.", replacement = " & ", mp.summary$group), 136 | mp.summary$group) 137 | 138 | 139 | if(ct == TRUE & nrow(mp.summary) > 0) { 140 | 141 | mp.summary$groupname <- mp.summary$group 142 | mp.summary$groupname <- gsub(pattern = "\\.", replacement = "", mp.summary$group) 143 | g = 1 144 | for (g in 1:max(colname.temp$number)) { 145 | 146 | mp.summary$groupname <- gsub(x = mp.summary$groupname, 147 | pattern = as.character(g), 148 | replacement = colname.temp$groupname[g]) 149 | 150 | } 151 | 152 | } 153 | 154 | if(ct == TRUE & nrow(mp.summary) == 0) { 155 | 156 | warning("There are no species returned") 157 | 158 | mp.summary[1,1] <- "No Species Returned" 159 | } 160 | 161 | 162 | mp.summary$group <- paste("Group", gsub(pattern = "\\.", replacement = "", mp.summary$group), sep = " ") 163 | 164 | 165 | 166 | 167 | 168 | 169 | if (func.name %in% c("IndVal", "IndVal.g")) { 170 | mp.AB.summary <- group_by(mp.AB.dump, species) %>% 171 | summarize_all(.funs = list(mean = mean)) 172 | mp.AB.summary$i <- NULL 173 | 174 | mp.AB.summary <- cbind(mp.AB.summary, 175 | group_by(mp.AB.dump, species) %>% 176 | summarize( 177 | count.sp = n(), 178 | frequency.sp = round(count.sp/max(i), digits = 3) 179 | ) 180 | ) 181 | } 182 | 183 | 184 | ## Output graph 185 | 186 | if (plot.please == TRUE) { 187 | 188 | if (nrow(subset(mp.summary, frequency.sp > freq.cutoff)) > 0) { 189 | 190 | mp.plot <- ggplot(subset(mp.summary, frequency.sp > freq.cutoff & mean.stat > graph.stat.cutoff), 191 | aes(x = reorder(species, mean.stat))) + 192 | geom_bar(aes(y = mean.stat, fill = groupname), alpha = .75, stat = "identity") + 193 | geom_text(aes(y = mean.stat, label = mean.stat), hjust = 1.2) + 194 | facet_grid(group ~ ., scales = "free_x") + coord_flip() + guides(fill = "none") + 195 | xlab(xlab.input) + ylab(ylab.input) + 196 | scale_fill_manual(values = c(plot.colors)) 197 | 198 | print(mp.plot) 199 | 200 | } 201 | 202 | else print("can't make graph; no indicator species for any clusters!") 203 | } 204 | 205 | 206 | 207 | 208 | # Output data tibble 209 | if (quiet == FALSE) { 210 | if (func.name %in% c("IndVal", "IndVal.g")) { 211 | return(list(mp.summary, mp.AB.summary)) 212 | } 213 | else return(mp.summary) 214 | } 215 | 216 | } 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | ## Testing 229 | # matrix.name = sqrt(sapro.transpose) 230 | # cluster.name = veg.group.soil[[3]] 231 | # func.name = "r.g" 232 | # repeats = 1 233 | # plot.please = FALSE 234 | # quiet = FALSE -------------------------------------------------------------------------------- /triplet_fixer.R: -------------------------------------------------------------------------------- 1 | # This script creates a sourceable function to turn any matrix into a triplet using the labdsv matrify() 2 | # Written 4/21/2016 3 | 4 | # requires the labdsv package 5 | 6 | # The function inputs should be: 7 | # filename or data table 8 | # three columns that make up the triplet. 9 | 10 | # The function name is ez.matrify 11 | 12 | ez.matrify <- function(filename, species.name, site.name, abundance) { 13 | 14 | library(labdsv) 15 | 16 | #check if filename is a .csv or already imported 17 | if (is.character(filename)) 18 | data.for.analysis <- read.csv(file=filename, header = T) else 19 | data.for.analysis <- filename 20 | 21 | proper.columns <- as.data.frame(data.for.analysis[ ,c(site.name, species.name, abundance)]) 22 | ez.matrify.output <- matrify(proper.columns) 23 | return(ez.matrify.output) 24 | 25 | } 26 | 27 | 28 | #test 29 | #head(ez.matrify(filename='../../DataRepository/VegetationData/ShrubsCSV.csv', species.name = 'SpeciesTaxonomic', site.name = 'SiteStandardGroup', abundance = 'RandomTest')) 30 | 31 | #head(ez.matrify(filename=tree.data, species.name = 'tree.species', site.name = 'site', abundance = 'tree.number')) 32 | 33 | --------------------------------------------------------------------------------