├── Code └── GRooTExtraction.R ├── DataFiles ├── GRooTAggregateSpeciesVersion.zip └── GRooTFullVersion.zip └── README.md /Code/GRooTExtraction.R: -------------------------------------------------------------------------------- 1 | ### R code for obtaining error risk for species for traits in the GRooT Database & ### 2 | ### to calculate calculate mean, median, first and third quantiles per species ### 3 | 4 | # load data: GRooTFullVersion # 5 | 6 | GRooTFullVersion<- read.csv("C:/Users/GRooTFullVersion.csv", header=T, na.strings=c("", "NA")) ####Final 7 | str(GRooTFullVersion) 8 | names(GRooTFullVersion) 9 | 10 | GRooTFullVersion<-GRooTFullVersion[,c(1:71)] 11 | 12 | require(tidyverse) 13 | 14 | ################## 15 | ### error risk ### 16 | ################## 17 | 18 | ### data entries with information only at genus level ### 19 | ### these information is not included to calculate error risks at species level ### 20 | genusGRooT<-dplyr::filter(GRooTFullVersion,is.na(speciesTNRS)) 21 | genusGRooT$errorRiskEntries<-"" 22 | genusGRooT$errorRisk<-"" 23 | 24 | ### data entries with information at species level ### 25 | ### these data was used to calculate error risk at species level ### 26 | speciesGRooT<-dplyr::filter(GRooTFullVersion,!is.na(speciesTNRS)) 27 | 28 | names(speciesGRooT) 29 | 30 | #scale_this <- function(x) as.vector(scale(x)) 31 | 32 | ### error risks calculated for trait in which logarithmic transformation is required ### 33 | 34 | speciesGRooTlog<- speciesGRooT %>% 35 | dplyr::select(GRooTID, genusTNRS, speciesTNRS, traitName, traitValue) %>% 36 | group_by(genusTNRS, speciesTNRS, traitName) %>% 37 | filter(traitName %in% c("Root_cortex_thickness", "Root_stele_diameter", "Root_stele_fraction", "Root_vessel_diameter", 38 | "Root_branching_density", "Root_branching_ratio", "Root_C_N_ratio", 39 | "Root_Ca_concentration", "Root_K_concentration", "Root_Mg_concentration", 40 | "Root_Mn_concentration", "Root_N_concentration", "Root_N_P_ratio", "Root_P_concentration", 41 | "Root_lifespan_mean", "Root_lifespan_median", "Root_litter_mass_loss_rate", "Root_production", 42 | "Root_turnover_rate", "Mean_Root_diameter", "Root_dry_matter_content", "Root_tissue_density", 43 | "Specific_root_area", "Specific_root_length", "Specific_root_respiration", 44 | "Coarse_root_fine_root_mass_ratio", "Fine_root_mass_leaf_mass_ratio", "Root_length_density_volume", 45 | "Root_mass_density", "Rooting_depth")) %>% 46 | mutate(errorRiskEntries = n()) %>% 47 | mutate(traitValuelog2=log2(traitValue+0.0001)) %>% ###0.0001 was added to include values = 0 48 | mutate(meanSpp=mean(traitValuelog2), sdSpp=sd(traitValuelog2)) %>% 49 | group_by(traitName) %>% 50 | mutate(SDSppAvg=mean(sdSpp, na.rm =T)) %>% 51 | mutate(errorRisk=((meanSpp-traitValuelog2)/SDSppAvg)) %>% 52 | dplyr::select(GRooTID, genusTNRS, speciesTNRS, traitName, traitValue, errorRiskEntries, errorRisk) 53 | 54 | ####Error risk of zero means that only one observations is present for the that specific trait and species combination## 55 | 56 | ####Error risk for trait which follow a normal distribution### 57 | 58 | speciesGRooTother<- speciesGRooT %>% 59 | dplyr::select(GRooTID, genusTNRS, speciesTNRS, traitName, traitValue) %>% 60 | group_by(genusTNRS, speciesTNRS, traitName) %>% 61 | filter(traitName %in% c("Root_xylem_vessel_number", "Root_mass_fraction", "Root_C_concentration", 62 | "Root_lignin_concentration", "Root_total_structural_carbohydrate_concentration", 63 | "Lateral_spread", "Root_mycorrhizal colonization", "Net_nitrogen_uptake_rate")) %>% 64 | mutate(errorRiskEntries = n()) %>% 65 | mutate(meanSpp=mean(traitValue), sdSpp=sd(traitValue)) %>% 66 | group_by(traitName) %>% 67 | mutate(SDSppAvg=mean(sdSpp, na.rm =T)) %>% 68 | mutate(errorRisk=((meanSpp-traitValue)/SDSppAvg)) %>% 69 | dplyr::select(GRooTID, genusTNRS, speciesTNRS, traitName, traitValue, errorRiskEntries, errorRisk) 70 | 71 | speciesRisk<-rbind(speciesGRooTlog, speciesGRooTother) 72 | 73 | ### Zero values for error risk are produced when only 1 data entry is available or ### 74 | ### when all data entries have the same value for the species ### 75 | 76 | ### merge error risk with other information in the database ### 77 | speciesTotal<-merge(speciesGRooT, speciesRisk, by=c("GRooTID", "genusTNRS", "speciesTNRS", "traitName", "traitValue")) 78 | 79 | ### join the data at species and genus level ### 80 | GRooTFull<-rbind(speciesTotal, genusGRooT) 81 | 82 | ####change the order of columns##### 83 | 84 | GRooTFull<-GRooTFull %>% arrange(GRooTID) 85 | 86 | names(GRooTFull) 87 | 88 | GRooTFullVersion <- GRooTFull[, c(1, 6:17, 2:3, 18:71, 4:5, 72:73 )] 89 | 90 | names(GRooTFullVersion) 91 | 92 | 93 | setwd("C:/Users/kiran/Dropbox/sROOT_mine/sROOT_database/") 94 | write.csv(GRooTFullVersion, file = "GRooTFullVersionNew.csv",row.names=FALSE, na="") 95 | 96 | 97 | 98 | ######################################################################## 99 | ### Calculate mean, median, first and third percentiles per species #### 100 | ######################################################################## 101 | 102 | ###data entries which contain info at species level ### 103 | speciesGRooT<-dplyr::filter(GRooTFullVersion,!is.na(speciesTNRS)) 104 | 105 | ### Note 1: We calculated species by site values first to account for potential pseudo-replication and 106 | ### variability in data entries' resolutions in GRooT (i.e., mean values versus individual observations). 107 | ### However, the user can calculate mean values using all the data entries by removing "studySite". 108 | ### Note 2: Activate filter (by uncommenting L84 and 85 or L 101 and 102) is you want to: select for 109 | ### belowground entities, error risk, or specific traits. 110 | 111 | speciesGRooT$errorRisk<-as.numeric(speciesGRooT$errorRisk) 112 | 113 | ###For trait that are not normal distributed, mean values can be calculated by using log transform values and back transform or by using means in the original units### 114 | 115 | GRooTAggregateSpeciesVersion<- speciesGRooT %>% 116 | ##filter(belowgroundEntities == "FR") %>% #if you are interested only in particular entities (other option below) 117 | ##filter(between(errorRisk, -4, 4)) %>% #if you want to filter by error risk values 118 | mutate(studySite= paste(referencesAbbreviated, decimalLatitude, decimalLongitud, locationID, location)) %>% 119 | dplyr::select(studySite, genusTNRS, speciesTNRS, traitName, traitValue) %>% 120 | group_by(studySite, genusTNRS, speciesTNRS, traitName) %>% 121 | filter(traitName %in% c("Root_cortex_thickness", "Root_stele_diameter", "Root_stele_fraction", "Root_vessel_diameter", 122 | "Root_branching_density", "Root_branching_ratio", "Root_C_N_ratio", 123 | "Root_Ca_concentration", "Root_K_concentration", "Root_Mg_concentration", 124 | "Root_Mn_concentration", "Root_N_concentration", "Root_N_P_ratio", "Root_P_concentration", 125 | "Root_lifespan_mean", "Root_lifespan_median", "Root_litter_mass_loss_rate", "Root_production", 126 | "Root_turnover_rate", "Mean_Root_diameter", "Root_dry_matter_content", "Root_tissue_density", 127 | "Specific_root_area", "Specific_root_length", "Specific_root_respiration", 128 | "Coarse_root_fine_root_mass_ratio", "Fine_root_mass_leaf_mass_ratio", "Root_length_density_volume", 129 | "Root_mass_density", "Rooting_depth")) %>% 130 | mutate(valuesLog=log(traitValue+0.0001)) %>% 131 | summarise(meanStudySite = mean(traitValue), meanStudySiteLog = mean(valuesLog)) %>% 132 | #summarise(meanStudySiteLog = mean(valuesLog)) %>% 133 | group_by(genusTNRS, speciesTNRS, traitName) %>% 134 | summarise(entriesStudySite = n(), meanSpecies = mean(meanStudySite), meanSpeciesExp = exp(mean(meanStudySiteLog)), 135 | medianSpecies = median(meanStudySite), firstQuantile = quantile(meanStudySite, probs = c(0.25)), 136 | thirdQuantile = quantile(meanStudySite, probs = c(0.75))) 137 | 138 | ###For trait normal distributed### 139 | 140 | GRooTAggregateSpeciesVersion2<- speciesGRooT %>% 141 | ##filter(belowgroundEntities == "FR") %>% #if you are interested only in particular entities (other option below) 142 | ##filter(between(errorRisk, -4, 4)) %>% #if you want to filter by error risk values 143 | mutate(studySite= paste(referencesAbbreviated, decimalLatitude, decimalLongitud, locationID, location)) %>% 144 | dplyr::select(studySite, genusTNRS, speciesTNRS, traitName, traitValue) %>% 145 | group_by(studySite, genusTNRS, speciesTNRS, traitName) %>% 146 | filter(traitName %in% c("Root_xylem_vessel_number", "Root_mass_fraction", "Root_C_concentration", 147 | "Root_lignin_concentration", "Root_total_structural_carbohydrate_concentration", 148 | "Lateral_spread", "Root_mycorrhizal colonization", "Net_nitrogen_uptake_rate")) %>% 149 | summarise(meanStudySite = mean(traitValue)) %>% 150 | group_by(genusTNRS, speciesTNRS, traitName) %>% 151 | summarise(entriesStudySite = n(), meanSpecies = mean(meanStudySite), 152 | medianSpecies = median(meanStudySite), firstQuantile = quantile(meanStudySite, probs = c(0.25)), 153 | thirdQuantile = quantile(meanStudySite, probs = c(0.75))) 154 | 155 | 156 | #####option b for belowground entities #### 157 | 158 | GRooTAggregateSpeciesVersion1<- speciesGRooT %>% 159 | ##filter(between(errorRisk, -4, 4)) %>% ## if you want to filter error risk 160 | mutate(studySite= paste(referencesAbbreviated, decimalLatitude, decimalLongitud, locationID, location)) %>% 161 | select(studySite, belowgroundEntities, genusTNRS, speciesTNRS, traitName, traitValue, errorRisk) %>% 162 | group_by(studySite, belowgroundEntities, genusTNRS, speciesTNRS, traitName) %>% 163 | #filter(traitName %in% c("Root_N_concentration", "Mean_Root_diameter", "Root_dry_matter_content", 164 | # "Root_tissue_density", "Specific_root_length")) %>% 165 | summarise(meanStudySite = mean(traitValue)) %>% 166 | group_by(belowgroundEntities, genusTNRS, speciesTNRS, traitName) %>% 167 | summarise(entriesStudySite = n(), meanSpecies = mean(meanStudySite), 168 | medianSpecies = median(meanStudySite), firstQuantile = quantile(meanStudySite, probs = c(0.25)), 169 | thirdQuantile = quantile(meanStudySite, probs = c(0.75))) 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /DataFiles/GRooTAggregateSpeciesVersion.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRooT-Database/GRooT-Data/6816dab453eb40382f44bb2e51dffef1c9b4a217/DataFiles/GRooTAggregateSpeciesVersion.zip -------------------------------------------------------------------------------- /DataFiles/GRooTFullVersion.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRooT-Database/GRooT-Data/6816dab453eb40382f44bb2e51dffef1c9b4a217/DataFiles/GRooTFullVersion.zip -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Global Root Trait (GRooT) Database 2 | 3 | Our principal objective with GRooT is to contribute towards the inclusion of root traits in large-scale comparative studies and global models by offering standardized and curated data of key root traits. 4 | 5 | GRooT includes 38 root traits, 38,276 species-by-site mean values based on 114,222 trait records. GRooT includes more than 1,000 species with data on the following nine traits: root mass fraction, root carbon and nitrogen concentration, lateral spread, root mycorrhizal colonization intensity, mean root diameter, root tissue density, specific root length, and maximum rooting depth. 6 | 7 | 8 |