├── .gitignore
├── CZ-Krp_fetch.png
├── EC workflow.Rproj
├── EC_workflow
    ├── KRP16_settings_2025-04-27.R
    ├── WF_1_data_preparation_2025-04-27.R
    ├── WF_2_QC_2025-04-27.R
    ├── WF_3_GF_&_FP_2025-04-27.R
    ├── WF_4_Summary_2025-04-27.R
    └── utilities_2025-04-27.R
├── NEWS.md
├── Processing_chain.jpg
├── README.Rmd
├── README.md
├── ROI boundary example.jpg
├── combn_QC.jpg
├── qa_and_qc.jpg
└── qc_application.jpg


/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | KRP16 - before processing.zip
6 | KRP16 - processed.zip


--------------------------------------------------------------------------------
/CZ-Krp_fetch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsigut/EC_workflow/59a347c3a73695bffb7726a05da4e42d5aea890f/CZ-Krp_fetch.png


--------------------------------------------------------------------------------
/EC workflow.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: d26f4bec-d8d0-489e-ab6e-734137efb7a4
 3 | 
 4 | RestoreWorkspace: Default
 5 | SaveWorkspace: Default
 6 | AlwaysSaveHistory: Default
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 | 


--------------------------------------------------------------------------------
/EC_workflow/KRP16_settings_2025-04-27.R:
--------------------------------------------------------------------------------
  1 | ### Description ================================================================
  2 | 
  3 | # Settings for all EC workflow files are stored here for easier workflow updates 
  4 | # and setup editing. General settings are provided first, settings relevant to
  5 | # particular workflow can be found in the respective section.
  6 | #
  7 | # Code developed by Ladislav Sigut (sigut.l@czechglobe.cz).
  8 | 
  9 | # Contact information
 10 | name <- "Ladislav Sigut" # person that performed processing
 11 | mail <- "sigut.l@czechglobe.cz" # mail of the person that performed processing
 12 | 
 13 | # Edit the siteyear
 14 | # - included in folder and file names
 15 | siteyear <- "KRP16"
 16 | 
 17 | # Edit the start and end of time period to post-process
 18 | # - start <- 2016; end <- 2016: to assure complete year (recommended)
 19 | # - start <- NULL; end <- NULL: use timestamp extent from input files
 20 | # - start <- "2016-02-01 10:00:00"; end <- "2017-02-01 10:00:00": arbitrary period
 21 | start <- 2016
 22 | end <- 2016
 23 | 
 24 | # Specify the time shift (in seconds) to be applied to the date-time information
 25 | # in order to represent the center of averaging period
 26 | shift.by <- -900
 27 | 
 28 | # Specify site metadata
 29 | lat <- 49.5732575 # edit site latitude
 30 | long <- 15.0787731 # edit site longtitude
 31 | tzh <- 1 # timezone hour
 32 | 
 33 | 
 34 | # WF_1_data_preparation ========================================================
 35 | 
 36 | # Set Meteo mapping
 37 | # - edit Meteo mapping according to available variables at given site
 38 | # - regular expressions (?regex) can be used to select replicates of given 
 39 | #   variable (e.g. Ts) that will be averaged by remap_vars()
 40 | # - typical set of variables: global radiation (GR), photosynthetically active
 41 | #   radiation (PAR), Net radiation (Rn), air temperature (Tair), soil 
 42 | #   temperature (Tsoil), relative humidity (RH), vapor pressure dificit (VPD),
 43 | #   soil water content (SWC), precipitation (P), soil heat flux (G)
 44 | # - variables required by openeddy: PAR, Rn, Tair, Tsoil, VPD, P (only PAR
 45 | #   really needed for optimal setup, the rest can be initialized with NA values)
 46 | # - variables required by REddyProc: GR, Tair, Tsoil, VPD (and/or RH)	
 47 | Met_mapping <- tribble(
 48 |   ~Meteo_varname, ~workflow_varname,
 49 |   "date/time",       "timestamp",
 50 |   "GRin",            "GR",
 51 |   "PARin",           "PAR",
 52 |   "^Rn",             "Rn",
 53 |   "TaKA02.0",        "Tair",
 54 |   "TsKO0.05",        "Tsoil",
 55 |   "RHKA02.0",        "RH",
 56 |   "VPDKA02.0",       "VPD",
 57 |   "sumP",            "P"
 58 | )
 59 | 
 60 | # Provide timestamp format of your Meteo data (see ?strptime_eddy)
 61 | # - default: "%Y-%m-%d %H:%M"
 62 | meteo_format <- "%Y-%m-%d %H:%M"
 63 | 
 64 | # WF_2_QC (quality control) ====================================================
 65 | 
 66 | # Do you want to perform manual data quality check? 
 67 | # - default interactive_session <- TRUE is currently recommended
 68 | # - it mainly affects manual QC step >check_manually()<
 69 | # - set FALSE if you want to simply rerun this workflow part  
 70 | # - if FALSE, informative plotting will be skipped
 71 | # - if FALSE, manual QC will be still used if created previously
 72 | interactive_session <- TRUE
 73 | 
 74 | # Do you want to apply storage correction to H, LE and CO2 flux?
 75 | # - currently only storage correction estimated by EddyPro using discrete 
 76 | #   (one point) approach is implemented
 77 | # - if TRUE, storage flux is added to the respective original flux
 78 | # - recommended for sites with short canopy, e.g. grasslands, wetlands, 
 79 | #   croplands
 80 | apply_storage <- TRUE
 81 | 
 82 | # Specify the type of coordinate rotation applied during EddyPro processing
 83 | # - supported rotation types: 
 84 | #   - "double": for double (2D) rotation
 85 | #   - "planar fit": for planar fit rotation
 86 | rotation_type <- "double"
 87 | 
 88 | # Specify the type of IRGA used in eddy covariance system
 89 | # - supported IRGA types: 
 90 | #   - "en_closed": both for closed and enclosed path systems 
 91 | #   - "open": for open path systems
 92 | IRGA_type <- "en_closed"
 93 | 
 94 | # Set Fetch filter boundary 
 95 | # - edit the region of interest boundary for your site
 96 | # - here KRP boundary version 20160206
 97 | # - see ROI boundary concept description at https://github.com/lsigut/EC_workflow
 98 | # - it can be determined manually (e.g. using Google Earth app)
 99 | # - R scripts for obtaining ROI boundary based on defined KML file can be found
100 | #   here: https://github.com/lsigut/ROI_boundary 
101 | boundary <- 
102 |   c(453, 489, 469, 455, 444, 410, 375, 348, 86, 82, 78, 76, 74, 73, 72, 72, 73, 
103 |     74, 76, 78, 81, 85, 91, 97, 106, 116, 114, 113, 131, 372, 496, 500, 507, 
104 |     519, 531, 541, 555, 562, 565, 572, 584, 605, 633, 749, 863, 1012, 1128, 
105 |     1098, 802, 863, 871, 903, 403, 360, 328, 303, 283, 486, 466, 451, 441, 412, 
106 |     390, 373, 360, 350, 349, 356, 367, 381, 399, 422)
107 | 
108 | # Precheck variables
109 | # - set of variables typically available in EddyPro full output that can be
110 | #   useful for preliminary check before quality control procedure
111 | # - the selected variables will be plotted (no further dependencies)
112 | precheck_vars <- c(
113 |   "u_rot", "v_rot", "w_unrot", "w_rot",
114 |   "sonic_temperature", "max_wind_speed",
115 |   "Tau", "ustar", "H", "LE", "NEE",
116 |   "u_var", "v_var", "w_var", "ts_var", "h2o_var", "co2_var",
117 |   "rand_err_Tau", "rand_err_H", "rand_err_LE", "rand_err_NEE",
118 |   "Tau_scf", "H_scf", "LE_scf", "co2_scf",
119 |   "u_spikes", "v_spikes", "w_spikes", "ts_spikes", "co2_spikes", "h2o_spikes",
120 |   "H_strg", "LE_strg", "co2_strg",
121 |   "h2o_v_adv", "co2_v_adv",
122 |   "co2_mixing_ratio", "h2o_mixing_ratio",
123 |   "co2_time_lag", "h2o_time_lag",
124 |   "x_peak", "x_70perc",
125 |   "mean_value_RSSI_LI_7200", "co2_signal_strength_7200_mean",
126 |   "h2o_signal_strength_7200_mean", "flowrate_mean")
127 | 
128 | # Quality Control Essential Variables
129 | # - a set of variables useful when working with quality controlled data
130 | # - some variables might have dependencies in further processing steps
131 | essential_vars_QC <- c(
132 |   "timestamp", "GR", "qc_GR", "PAR", "qc_PAR", "Rn", "qc_Rn", "Tair",
133 |   "qc_Tair", "Tsoil", "qc_Tsoil", "RH", "qc_RH", "VPD", "qc_VPD", "SWC",
134 |   "qc_SWC", "WTD", "qc_WTD", "GWL", "qc_GWL", "P", "qc_P", "G", "qc_G", "Tau",
135 |   "Tau_orig", "qc_Tau_forGF", "qc_Tau_SSITC", "rand_err_Tau", "H", "H_orig",
136 |   "qc_H_forGF", "qc_H_SSITC", "rand_err_H", "LE", "LE_orig", "qc_LE_forGF",
137 |   "qc_LE_SSITC", "rand_err_LE", "NEE", "NEE_orig", "qc_NEE_forGF",
138 |   "qc_NEE_SSITC", "rand_err_NEE", "H_strg", "LE_strg", "co2_strg", "wind_speed",
139 |   "wind_dir", "ustar", "L", "zeta", "model", "x_peak", "x_70perc")
140 | 
141 | # WF_3_GF_&_FP (gap filling and flux partitioning) =============================
142 | 
143 | # Temperature used for flux partitioning ('Tsoil' or 'Tair') 
144 | # - default: FP_temp <- 'Tsoil'
145 | # - note that MDS gap-filling is based on 'Tair'
146 | FP_temp <- 'Tsoil'
147 | 
148 | # Save figures as "png" (default) or "pdf"
149 | # - NEEvsUStar plots are fixed to "pdf"
150 | plot_as <- "png" 
151 | 
152 | # Set fixed ustar threshold if needed (skip ustar threshold estimation)
153 | # - default is: fixed_UT <- NA; i.e. UT estimation is recommended
154 | # - if provided, seasonal_ustar and use_CPD settings will be ignored
155 | fixed_UT <- NA
156 | 
157 | # Choose ustar threshold estimation method resolution
158 | # - either seasonal ustar threshold (seasonal_ustar <- TRUE; default)
159 | # - or annual thresholds (seasonal_ustar <- FALSE)
160 | # - seasonal UT is recommended as it keeps more data (see Wutzler et al., 2018)
161 | seasonal_ustar <- TRUE
162 | 
163 | # Should change-point detection (CPD) method (Barr et al. 2013) be used 
164 | # (use_CPD <- TRUE) instead of classical approach (Reichstein et al. 2005, 
165 | # Papale et al. 2006) using binned classes of ustar and temperature? 
166 | # (use_CPD <- FALSE; default)
167 | # The changepoint is estimated based on the entire subset within one season 
168 | # and one temperature class; currently using argument 'isUsingCPTSeveralT' 
169 | # of function usControlUstarEst()
170 | use_CPD <- FALSE
171 | 
172 | # WF_4_Summary =================================================================
173 | 
174 | # Specify variables needed for different procedures during aggregation 
175 | # - used for averaging, summation, uncertainty estimation and plotting
176 | # - some variables have dependencies in further processing steps
177 | # - consider adding instead of removing from the list
178 | # - GWL (ground water level), SWC (soil water content) and G (soil heat flux)
179 | #   are not available in KRP16 example data set (they will be reported and 
180 | #   skipped)
181 | mean <- c("Tair", "Tsoil", "RH", "VPD", "GR", "Rn", "PAR", "GWL", "SWC", "G", 
182 |           "H_f", "H_fqc", "LE_f", "LE_fqc", "ET_f", "ET_fqc", 
183 |           "NEE_uStar_f", "NEE_uStar_fqc", "GPP_uStar_f", "GPP_DT_uStar", 
184 |           "Reco_uStar", "Reco_DT_uStar")
185 | sum <- c("P", "GR", "Rn", "PAR", "G", "H_f", "LE_f", "ET_f", 
186 |          grep("(NEE|GPP).+f$", names(data), value = TRUE),
187 |          grep("GPP_DT.+[^D]$", names(data), value = TRUE),
188 |          grep("Reco.+[^D]$", names(data), value = TRUE))
189 | err_agg <- grep("(^H|^LE|^ET|^NEE|^Reco|^GPP).*(sd|SD)$", names(data), 
190 |                 value = TRUE)
191 | 


--------------------------------------------------------------------------------
/EC_workflow/WF_1_data_preparation_2025-04-27.R:
--------------------------------------------------------------------------------
  1 | ### Description ================================================================
  2 | 
  3 | # Eddy covariance workflow part 1/4 (https://github.com/lsigut/EC_workflow) This
  4 | # code primarily aims at data preparation for quality control (QC). Meteo data
  5 | # and EddyPro full output files are validated, accordingly formatted, merged and
  6 | # saved with documentation. All numeric values are rounded to a reasonable
  7 | # precision. Meteo variable names are remapped according to the requirements of
  8 | # openeddy and REddyProc packages. If Meteo contains replicates of the same
  9 | # variable, mean of all replicates and their QC is returned. It is expected that
 10 | # Meteo data underwent separate quality control and gap-filling (not in the
 11 | # scope of openeddy). Notice that especially gaps in incoming radiation (GR -
 12 | # global radiation) can have negative impact on the reliability and quality
 13 | # of the final products.
 14 | #
 15 | # You can find example data set at https://doi.org/10.5281/zenodo.6631498
 16 | #
 17 | # For documentation of EddyPro variable names see:
 18 | # https://www.licor.com/env/support/EddyPro/topics/output-files-full-output.html
 19 | #
 20 | # Code developed by Ladislav Sigut (sigut.l@czechglobe.cz).
 21 | 
 22 | ### Set working directory to the folder where this document is saved ===========
 23 | 
 24 | # This expects you are working in RStudio and this document is saved in the root
 25 | # of already existing folder structure
 26 | # - see structure_eddy()
 27 | setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
 28 | 
 29 | ### Install and load required packages and functions ===========================
 30 | 
 31 | # Load eddy covariance workflow utility functions
 32 | utilities_file <- list.files(pattern = "utilities", full.names = TRUE)
 33 | source(utilities_file)
 34 | 
 35 | # Attach packages from GitHub
 36 | # - you might need to have RTools for Windows machine to install openeddy:
 37 | #   https://cran.r-project.org/bin/windows/Rtools
 38 | # - uses attach_pkg() function saved in utilities.R
 39 | attach_pkg("openeddy", github = "lsigut/openeddy")
 40 | 
 41 | # Attach packages from CRAN
 42 | attach_pkg("tibble") # required for tribble()
 43 | 
 44 | # Check if openeddy version conforms to requirements
 45 | if (packageVersion("openeddy") < package_version("0.0.0.9009"))
 46 |   warning("this version of workflow works reliably only with openeddy version ",
 47 |           "'0.0.0.9009'")
 48 | 
 49 | ### Provide metadata and set file paths and arguments ==========================
 50 | 
 51 | # Load the site-year settings file
 52 | settings_file <- list.files(pattern = "settings", full.names = TRUE)
 53 | source(settings_file)
 54 | 
 55 | # Load the list of folder structure paths
 56 | # - automated, no input required if proposed folder structure is followed
 57 | paths <- make_paths()
 58 | 
 59 | # Required input files are meteo data and processed eddy covariance data
 60 | 
 61 | # Meteo data
 62 | # - automated, no input required if proposed folder structure is followed
 63 | # - Meteo groups checklist (30 min): 
 64 | #   1) Radiation: GRin, PARin, Rn
 65 | #   2) Temperature: Ta (from EC height), Ts (from the top soil layer) 
 66 | #   3) Moisture: Ma (from EC height), Ms (from the top soil layer) 
 67 | #   4) Precipitation: sumP
 68 | #   5) Heat flux: HF
 69 | 
 70 | # EddyPro data
 71 | # - automated, no input required if proposed folder structure is followed
 72 | # - folder can contain multiple EddyPro files to be merged by merge_eddy()
 73 | # - folder is expected to contain CSV (csv) files with EddyPro data to merge and
 74 | #   optionally also TXT (txt) files with their documentation; their file names 
 75 | #   should differ only in their file extension
 76 | 
 77 | # Timestamp of the computation
 78 | # - automated, will be included in file names
 79 | Tstamp <- format(Sys.time(), "%Y-%m-%d") 
 80 | 
 81 | ### Load and format Meteo data =================================================
 82 | 
 83 | # read_eddy() reads single meteo CSV file including units (placed on the second 
 84 | # row below header) at a standardized path. 
 85 | mf <- list.files(paths$qc_input_meteo, pattern = "\\.[Cc][Ss][Vv]$",
 86 |                  full.names = TRUE)[1]
 87 | M <- read_eddy(mf, check.names = FALSE)
 88 | 
 89 | # Rename Meteo data variables as required by openeddy and REddyProc packages
 90 | # - other columns than those included in Met_mapping table are dropped
 91 | # - not available columns are reported and automatically initialized with NAs
 92 | M <- remap_vars(M, Met_mapping$workflow_varname, Met_mapping$Meteo_varname,
 93 |                 regexp = TRUE, qc = "_qcode")
 94 | 
 95 | # strptime_eddy() rewrites the original varname of the timestamp column 
 96 | # - retain the original varname for documentation purposes
 97 | vars <- openeddy::varnames(M)
 98 | 
 99 | # timestamp requires conversion to POSIXct for validation
100 | M$timestamp <- strptime_eddy(M$timestamp, format = meteo_format, 
101 |                              allow_gaps = TRUE)
102 | 
103 | # reset original varnames 
104 | openeddy::varnames(M) <- vars
105 | 
106 | # merge_eddy() assures that timestamp is complete and has defined range
107 | M <- merge_eddy(list(M), start = start, end = end)
108 | 
109 | # Correct units
110 | # - not included in correct() as it is Czechglobe specific formatting
111 | # - it should not impact sites with other formatting
112 | openeddy::units(M) <- gsub("st. ", "deg", openeddy::units(M))
113 | 
114 | ### Load and format EddyPro full output ========================================
115 | 
116 | # Load the EddyPro files (untouched originals) and bind them together
117 | 
118 | # read_EddyPro() reads all EddyPro files at given path and merges them together.
119 | # The expectation is that files represent variables for given site and different
120 | # periods. Function merges them vertically (along generated complete timestamp).
121 | # Original column names are retained for reliable variable remapping.
122 | 
123 | # - set correct skip parameter (number of lines above header in the input)
124 | # - set correct file encoding (e.g. fileEncoding = "UTF-8") 
125 | # - make sure that all files have the same encoding
126 | # - included EddyPro output contains duplicated column names "co2_mean" and 
127 | #   "h2o_mean" that are respective concentrations in different units (their
128 | #   correct match across files is not automated)
129 | # - older versions of EddyPro had more duplicated column names in the output
130 | # - old EddyPro column name "max_speed" is corrected to "max_wind_speed" 
131 | EP <- read_EddyPro(paths$qc_input_eddypro, start = start, end = end, skip = 1, 
132 |                    fileEncoding = "UTF-8")
133 | 
134 | # Correct column names
135 | names(EP) <- correct(names(EP))
136 | 
137 | # Correct units
138 | openeddy::units(EP) <- correct(openeddy::units(EP), attr = "units")
139 | # - check whether µ was correctly interpreted due to the encoding issues
140 | 
141 | # Rename EddyPro variables RH and VPD to avoid duplication with meteo variables
142 | # measured by slow response sensors (slow response sensors are more reliable)
143 | # - not included in correct() as it is context specific 
144 | names(EP)[names(EP) %in% c("RH", "VPD")] <- c("RH_EddyPro", "VPD_EddyPro")
145 | 
146 | # Merge Meteo and EddyPro data =================================================
147 | 
148 | # Simple merge() works because both M and EP have continuous equidistant
149 | # timestamp covering whole year. Assumption is that M and EP do not have other
150 | # shared column names than timestamp. In case user decides to shorten timestamp
151 | # of one of the data frames by "start" and "end" arguments, the merge() output
152 | # will be also shortened.
153 | data <- merge(M, EP)
154 | nrow(data) # if 0 rows, M & EP shared more columns than just timestamp
155 | 
156 | # Change the timestamp formatting to default timestamp format
157 | # - format = "%Y-%m-%d %H:%M" is enforced to simplify further processing steps
158 | data$timestamp <- format(data$timestamp, format = "%Y-%m-%d %H:%M", tz = "GMT")
159 | 
160 | # Round the columns of numeric mode type double to 6 significant digits
161 | data <- round_df(data)
162 | 
163 | # Reset units lost during merging
164 | # - varnames are used only for documentation below
165 | openeddy::units(data) <- c(openeddy::units(M), openeddy::units(EP[-1]))
166 | 
167 | # Save the merged data with documentation ======================================
168 | 
169 | # Set the name of merged output file
170 | data_name_out <- name_merged(paths$qc_input_eddypro, siteyear)
171 | 
172 | # Save the merged Meteo and EddyPro data
173 | write_eddy(data, file.path(paths$input_for_qc, data_name_out))
174 | 
175 | # Documentation of merged files
176 | # - TXT files with the same name as the merged input CSV files are used if 
177 | #   present in respective folders with input data
178 | # - information about the Meteo remapping and session info are also included
179 | # - the documentation file will not be overwritten if it already exists, this is
180 | #   to avoid overwriting manually edited documentation; to overwrite it, check 
181 | #   file content and delete it manually if safe
182 | document_merged(data_name_out, paths$qc_input_eddypro, paths$qc_input_meteo, 
183 |                 paths$input_for_qc, Tstamp, 
184 |                 name, mail, M)
185 | 
186 | # EOF
187 | 


--------------------------------------------------------------------------------
/EC_workflow/WF_2_QC_2025-04-27.R:
--------------------------------------------------------------------------------
  1 | ### Description ================================================================
  2 | 
  3 | # Eddy covariance workflow part 2/4 (https://github.com/lsigut/EC_workflow) This
  4 | # code primarily aims at data quality checking (QC) and preparation of input
  5 | # data for gap-filling. Data before, during and after QC are plotted and further
  6 | # statistics are computed. Intermediate results are plotted or reported in
  7 | # console to assist the user. Storage correction and correction of rotated
  8 | # vertical wind speed (w_rot) is optional and overwrites the original values of
  9 | # respective variables.
 10 | #
 11 | # You can find example data set at https://doi.org/10.5281/zenodo.6631498
 12 | #
 13 | # For documentation of EddyPro variable names see:
 14 | # https://www.licor.com/env/support/EddyPro/topics/output-files-full-output.html
 15 | #
 16 | # Code developed by Ladislav Sigut (sigut.l@czechglobe.cz).
 17 | 
 18 | ### Set working directory to the folder where this document is saved ===========
 19 | 
 20 | # This expects you are working in RStudio and this document is saved in the root
 21 | # of already existing folder structure
 22 | # - see structure_eddy()
 23 | setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
 24 | 
 25 | ### Install and load required packages and functions ===========================
 26 | 
 27 | # Load eddy covariance workflow utility functions
 28 | utilities_file <- list.files(pattern = "utilities", full.names = TRUE)
 29 | source(utilities_file)
 30 | 
 31 | # Attach packages from GitHub
 32 | # - you might need to have RTools for Windows machine to install openeddy:
 33 | #   https://cran.r-project.org/bin/windows/Rtools
 34 | # - uses attach_pkg() function saved in utilities.R
 35 | attach_pkg("openeddy", github = "lsigut/openeddy")
 36 | 
 37 | # Attach packages from CRAN
 38 | packages <- c("tibble", "dplyr", "ggplot2", "gridExtra", "reshape2")
 39 | invisible(lapply(packages, attach_pkg))
 40 | 
 41 | # Check if openeddy version conforms to requirements
 42 | if (packageVersion("openeddy") < package_version("0.0.0.9009"))
 43 |   warning("this version of workflow works reliably only with openeddy version ",
 44 |           "'0.0.0.9009'")
 45 | 
 46 | ### Provide metadata and set file paths and arguments ==========================
 47 | 
 48 | # Load the site-year settings file
 49 | settings_file <- list.files(pattern = "settings", full.names = TRUE)
 50 | source(settings_file)
 51 | 
 52 | # Load the list of folder structure paths
 53 | # - automated, no input required if proposed folder structure is followed
 54 | paths <- make_paths()
 55 | 
 56 | # Timestamp of the computation
 57 | # - automated, will be included in file names
 58 | Tstamp <- format(Sys.time(), "%Y-%m-%d") 
 59 | 
 60 | ### Load and format data =======================================================
 61 | 
 62 | # Path to QC input 
 63 | # - automated, no input required if proposed folder structure is followed
 64 | # - these are meteo and EddyPro data merged in the previous >data preparation< 
 65 | #   processing step to a single validated CSV file
 66 | lf <- list.files(paths$input_for_qc, pattern = "\\.[Cc][Ss][Vv]$",
 67 |                  full.names = TRUE)[1] # "\\." is literal dot
 68 | if (length(lf) == 0) stop("no CSV in folder ", 
 69 |                                  sQuote(paths$input_for_qc, q = FALSE ))
 70 | data <- read_eddy(lf)
 71 | 
 72 | # Convert timestamp to POSIXct and shift the date-time information to represent 
 73 | # the center of averaging period which is required for reliable processing
 74 | data$timestamp <- strptime_eddy(data$timestamp, shift.by = shift.by)
 75 | 
 76 | ### Plot data for visual precheck ==============================================
 77 | 
 78 | # Save plots of variables that can help identify problems with instruments
 79 | # - precheck_vars is an object defined within siteyear_settings.R file
 80 | # - choose only names available in data
 81 | precheck <- choose_avail(precheck_vars, names(data))
 82 | 
 83 | # Save plots of precheck variable to single pdf at paths$precheck
 84 | save_precheck_plots(data, precheck, siteyear, Tstamp, paths$precheck)
 85 | 
 86 | # Show dependence of w_unrot on wind direction with additional statistics
 87 | # - png file is saved to respective file path
 88 | # - to reduce sensitivity of computed stats to outliers, median and mad is used
 89 | # - qrange is quantile range for ylim to reduce impact of outliers
 90 | #   - qrange has only visual effect, it does not affect computed statistics 
 91 | #   - if you do not want to limit y-axis, set qrange = NULL
 92 | ggsave(file.path(
 93 |   paths$wd_dependency,
 94 |   paste0(siteyear, "_w_unrot_WD_stats_", Tstamp, ".png")),
 95 |   ggplot_stats(data, "wind_dir", "w_unrot", circular = TRUE),
 96 |   type = "cairo-png", width = 297, height = 210, units = "mm")
 97 | 
 98 | # Show dependence of w_rot on wind direction with additional statistics
 99 | # - png file is saved to respective file path
100 | ggsave(file.path(
101 |   paths$wd_dependency,
102 |   paste0(siteyear, "_orig_w_rot_WD_stats_", Tstamp, ".png")),
103 |   ggplot_stats(data, "wind_dir", "w_rot", circular = TRUE),
104 |   type = "cairo-png", width = 297, height = 210, units = "mm")
105 | 
106 | # Check the overall w_rot median
107 | # - this section is mainly relevant if rotation_type == "planar fit" and
108 | #   evaluation of w wind component residual will be applied in the QC scheme
109 | # - ideally, w_rot median should be close to zero as any substantial deviations
110 | #   could lead to excessive flagging by wresid filter
111 | # - this can be corrected by forcing the overall w_rot median to zero (double
112 | #   rotation forces w_rot to zero for each half-hour)
113 | 
114 | # Obtain w_rot_correction value that will be subtracted from w_rot
115 | # - estimation of overall w_rot median is automated
116 | # - if rotation_type == "double", w_rot_correction is not considered
117 | w_rot_correction <- 
118 |   if (rotation_type == "double") "none" else median(data$w_rot, na.rm = TRUE)
119 | 
120 | # Store original value of w_rot_correction for documentation purposes
121 | # - once correction is applied, w_rot_correction is set to "none"
122 | applied_w_rot_correction <- w_rot_correction
123 | 
124 | # Skip if w_rot_correction = "none"
125 | if (!w_rot_correction == "none" && rotation_type == "planar fit") {
126 |   data$w_rot <- data$w_rot - w_rot_correction 
127 |   # plot the corrected w_rot 
128 |   # - png file is saved to respective file path
129 |   ggsave(file.path(
130 |     paths$wd_dependency,
131 |     paste0(siteyear, "_corrected_w_rot_WD_stats_", Tstamp, ".png")),
132 |     ggplot_stats(data, "wind_dir", "w_rot", circular = TRUE),
133 |     type = "cairo-png", width = 297, height = 210, units = "mm")
134 |   w_rot_correction <- "none" # avoid rerunning of the correction by mistake
135 | }
136 | 
137 | # Save flux time series precheck plots with distinguished QC along basic meteo 
138 | # - SSITC is the standard "Foken flag" (e.g. qc_H) from EddyPro renamed by 
139 | #   correct() within data_preparation workflow
140 | save_flux_plots(data, "SSITC", siteyear, "%s_precheck", Tstamp, paths$precheck, 
141 |                 fluxes)
142 | 
143 | ### Extract flags of predefined tests/filters ==================================
144 | 
145 | # A set of filters is extracted that may or may not be useful to apply at a
146 | # given site
147 | # - it is suggested to experiment with the setup to get optimal results
148 | # - applied filters in this workflow seem to be a practical combination but 
149 | #   they should be further tested considering their flagging efficiency 
150 | 
151 | # Notes for wresid filter:
152 | # - recommended only for short canopy sites with setting rotation == "double"
153 | # - typically excessive flagging for sites with rotation == "planar fit"
154 | # - needs further testing
155 | QC <- extract_QC(data, rotation = rotation_type)
156 | summary_QC(QC, names(QC))
157 | 
158 | # Save the results to the main data frame
159 | data[names(QC)] <- QC 
160 | 
161 | ### Combine flags representing mainly technical issues with fluxes =============
162 | 
163 | # Create a tibble with QC filter names to combine for each flux 
164 | prelim <- tribble(
165 |   ~Tau,              ~H,               ~LE,                ~NEE,
166 |   "qc_Tau_SSITC",    "qc_H_SSITC",     "qc_LE_SSITC",      "qc_NEE_SSITC",
167 |   "qc_SA_abslim",    "qc_SA_abslim",   "qc_SAGA_abslim",   "qc_SAGA_abslim",
168 |   "qc_SA_spikesHF",  "qc_SA_spikesHF", "qc_SAGA_spikesHF", "qc_SAGA_spikesHF",  
169 |   "qc_Tau_missfrac", "qc_H_missfrac",  "qc_LE_missfrac",   "qc_NEE_missfrac",
170 |   "qc_Tau_scf",      "qc_H_scf",       "qc_LE_scf",        "qc_NEE_scf",
171 |   NA,                "qc_H_var",       NA,                 NA,
172 |   "qc_Tau_runs",     "qc_H_runs",      "qc_LE_runs",       "qc_NEE_runs",
173 |   NA,                "qc_H_lowcov",    "qc_LE_lowcov",     "qc_NEE_lowcov",
174 |   NA,                NA,               "qc_GA_LI7200",     "qc_GA_LI7200", 
175 |   "qc_ALL_wresid",   "qc_ALL_wresid",  "qc_ALL_wresid",    "qc_ALL_wresid"
176 | )
177 | 
178 | # Combine specified flags for given flux to produce preliminary flags
179 | pre_res <- combn_prelim_QC(data, prelim)
180 | 
181 | ### Apply flux interdependency =================================================
182 | 
183 | # Evaluate flux interdependency based on the preliminary flags
184 | # - preliminary H flag is needed only if IRGA = "open", otherwise not used
185 | # - preliminary Tau flag is not used
186 | interdep <- interdep(pre_res$qc_LE_prelim, pre_res$qc_H_prelim, IRGA_type)
187 | summary_QC(interdep, names(interdep))
188 | 
189 | # Save the results to the main data frame
190 | data[names(interdep)] <- interdep
191 | 
192 | # In case of IRGA_type == "en_closed" LE is not affected by flux interdependency
193 | qc_LE_interdep <- ifelse(IRGA_type == "open", "qc_LE_interdep", NA)
194 | 
195 | # Include flux interdependency among QC filter names to combine
196 | # - flux interdependency does not affect Tau
197 | prelim_ad <- tribble(
198 |   ~Tau, ~H,              ~LE,            ~NEE,
199 |   NA,   "qc_H_interdep", qc_LE_interdep, "qc_NEE_interdep"
200 | )
201 | 
202 | prelim2 <- rbind(prelim, prelim_ad)
203 | 
204 | # Combine specified flags for given flux to produce preliminary flags
205 | pre2_res <- combn_prelim_QC(data, prelim2)
206 | 
207 | if (interactive_session) {
208 |   # Show effect of flux interdependency test on QC flags
209 |   # - no effect on Tau flags (flux interdependency not defined for Tau) 
210 |   # - no effect on LE flags if IRGA_type == "en_closed"
211 |   plot_QC_summary(data, prelim2, cumul = FALSE)
212 |   plot_QC_summary(data, prelim2, cumul = TRUE)
213 |   
214 |   # Update flux time series precheck plots according to prelim2 QC flags
215 |   save_flux_plots(cbind(data, pre2_res), "prelim2", siteyear, "%s_precheck",
216 |                   Tstamp, paths$precheck, fluxes)
217 | }
218 | 
219 | ### Apply storage correction ===================================================
220 | 
221 | # Store original value of apply_storage for documentation purposes
222 | # - once correction is applied, apply_storage is set to FALSE
223 | strg_applied <- apply_storage
224 | 
225 | if (apply_storage) {
226 |   # Storage flux is added to the respective original flux
227 |   # - correction  overwrites the original values of respective variables 
228 |   data$H <- add_st(data$H, data$H_strg)
229 |   data$LE <- add_st(data$LE, data$LE_strg)
230 |   data$NEE <- add_st(data$NEE, data$co2_strg)
231 |   apply_storage <- FALSE # avoid rerunning of the correction by mistake
232 | }
233 | 
234 | ### Apply prelim2 filters to fluxes ============================================
235 | 
236 | # Produce flux columns with suffix "_orig" only with fluxes passing previous QC 
237 | orig_fluxes <- sapply(fluxes, function(x) paste0(x, "_orig")) 
238 | for (i in seq_along(fluxes)) {
239 |   data[orig_fluxes[i]] <- apply_QC(data[, fluxes[i]], pre2_res[, i])
240 | }
241 | 
242 | ### Extract flags based on low frequency data despiking ========================
243 | desp <- data[0]
244 | 
245 | # Low frequency data despiking is not applied for Tau
246 | # - change qrange if needed
247 | # - red circles show identified spikes
248 | plot_precheck(data, "H_orig", qrange = c(0, 1), pch = 19)
249 | desp$qc_H_spikesLF <- 
250 |   despikeLF(cbind(data, pre2_res), var = "H", qc_flag = "qc_H_prelim2",
251 |             name_out = "qc_H_spikesLF", var_thr = c(-200, 800))
252 | points(H_orig ~ timestamp, data[as.logical(desp$qc_H_spikesLF), ], col = "red")
253 | 
254 | plot_precheck(data, "LE_orig", qrange = c(0, 1), pch = 19)
255 | desp$qc_LE_spikesLF <- 
256 |   despikeLF(cbind(data, pre2_res), var = "LE", qc_flag = "qc_LE_prelim2",
257 |             name_out = "qc_LE_spikesLF", var_thr = c(-200, 800))
258 | points(LE_orig ~ timestamp, data[as.logical(desp$qc_LE_spikesLF), ], col = "red")
259 | 
260 | plot_precheck(data, "NEE_orig", qrange = c(0.005, 0.995), pch = 19)
261 | desp$qc_NEE_spikesLF <- 
262 |   despikeLF(cbind(data, pre2_res), var = "NEE", qc_flag = "qc_NEE_prelim2",
263 |             name_out = "qc_NEE_spikesLF", var_thr = c(-100, 100))
264 | points(NEE_orig ~ timestamp, data[as.logical(desp$qc_NEE_spikesLF), ], 
265 |        col = "red")
266 | 
267 | # Check the results
268 | # - the test has 3 outcomes: 0 - OK; 2 - spike; NA - excluded from despiking
269 | # - since NA means not checked, NA is thus interpreted as flag 0
270 | summary_QC(desp, names(desp))
271 | 
272 | # Save the results to the main data frame
273 | data[names(desp)] <- desp 
274 | 
275 | ### Extract fetch filter =======================================================
276 | 
277 | # Fetch filter is not applied for Tau
278 | data$qc_ALL_fetch70 <- fetch_filter(
279 |   data, "x_70perc", "wind_dir", boundary, "qc_ALL_fetch70")
280 | summary_QC(data, "qc_ALL_fetch70")
281 | 
282 | ### Combine QC flags before manual QC ==========================================
283 | 
284 | # Include despiking and fetch filter among QC filter names to combine
285 | # - despiking and fetch filter is not applied for Tau
286 | prelim2_ad <- tribble(
287 |   ~Tau, ~H,               ~LE,                 ~NEE,
288 |   NA,   "qc_H_spikesLF",  "qc_LE_spikesLF",    "qc_NEE_spikesLF",
289 |   NA,   "qc_ALL_fetch70", "qc_ALL_fetch70",    "qc_ALL_fetch70"
290 | )
291 | 
292 | prelim3 <- rbind(prelim2, prelim2_ad)
293 | 
294 | # Combine specified flags for given flux to produce preliminary flags
295 | pre3_res <- combn_prelim_QC(data, prelim3)
296 | 
297 | if (interactive_session) {
298 |   # Show effect of all filters before manual QC
299 |   plot_QC_summary(data, prelim3, cumul = FALSE)
300 |   plot_QC_summary(data, prelim3, cumul = TRUE)
301 | 
302 |   # Update flux time series precheck plots according to prelim3 QC flags
303 |   save_flux_plots(cbind(data, pre3_res), qc_suffix = "prelim3", siteyear, 
304 |                   "%s_precheck", Tstamp, paths$precheck, fluxes)
305 | }
306 | 
307 | ### Apply prelim3 filters to fluxes ============================================
308 | 
309 | # Produce flux columns with suffix "_orig" only with fluxes passing previous QC 
310 | for (i in seq_along(fluxes)) {
311 |   data[orig_fluxes[i]] <- apply_QC(data[, fluxes[i]], pre3_res[, i])
312 | }
313 | 
314 | ### Run manual quality control =================================================
315 | 
316 | # Check all fluxes specified by 'vars' already screened by QC scheme above
317 | # - at any point, intermediate progress can be saved using option '6. finalize'
318 | #   and inserting 'y' to following dialog: 
319 | #   "save current progress to file at 'path'?"
320 | # - progress after the flagging of last flux should be saved in order to fully 
321 | #   reproduce the manual flags when rerunning the code next time
322 | # - if saving to a file is omitted, results are still saved to 'man' object
323 | #   and later also to data frame 'data' but it does not allow easy rerunning
324 | # - returned timestamp is removed in 'man' but kept in saved CSV file
325 | # - if not interactive and no manual QC found: NULL returned 
326 | man <- check_manually(cbind(data, pre3_res), paths$quality_checking, 
327 |                       vars = data.frame(
328 |                         x = fluxes,
329 |                         y = c("PAR", "Rn", "Rn", "PAR"),
330 |                         z = c("wind_speed", "LE_orig" , "H_orig", "Tair")
331 |                       ), 
332 |                       qc_prefix = "qc_", qc_suffix = "_prelim3", 
333 |                       interactive_session, siteyear)[-1]
334 | summary_QC(man, names(man))
335 | 
336 | # Save the results to the main data frame
337 | data[names(man)] <- man 
338 | 
339 | ### Combine QC flags for last checkout =========================================
340 | 
341 | # Note: whole section can be run iteratively
342 | 
343 | # Consider only existing manual flags
344 | # - there might be variables without manual QC or 'man' can be NULL
345 | man_names <- set_man_names(fluxes, man)
346 | 
347 | # Include manual QC among QC filter names to combine
348 | prelim3_ad <- tribble(
349 |   ~Tau,          ~H,          ~LE,          ~NEE,
350 |   man_names$Tau, man_names$H, man_names$LE, man_names$NEE
351 | )
352 | 
353 | prelim4 <- rbind(prelim3, prelim3_ad)
354 | 
355 | # Combine specified flags for given flux to produce preliminary flags
356 | pre4_res <- combn_prelim_QC(data, prelim4)
357 | 
358 | if (interactive_session) {
359 |   # Update flux time series precheck plots according to prelim4 QC flags
360 |   save_flux_plots(cbind(data, pre4_res), qc_suffix = "prelim4", siteyear, 
361 |                   "%s_precheck", Tstamp, paths$precheck, fluxes)
362 | }
363 | 
364 | # Apply prelim4 filters to fluxes
365 | # - produce flux columns with suffix "_orig" only with fluxes passing previous QC 
366 | for (i in seq_along(fluxes)) {
367 |   data[orig_fluxes[i]] <- apply_QC(data[, fluxes[i]], pre4_res[, i])
368 | }
369 | 
370 | man <- check_manually(cbind(data, pre4_res), paths$quality_checking, 
371 |                       vars = data.frame(
372 |                         x = fluxes,
373 |                         y = c("PAR", "Rn", "Rn", "PAR"),
374 |                         z = c("wind_speed", "LE_orig" , "H_orig", "Tair")
375 |                       ), 
376 |                       qc_prefix = "qc_", qc_suffix = "_prelim4", 
377 |                       interactive_session, siteyear)[-1]
378 | summary_QC(man, names(man))
379 | 
380 | # Save the results to the main data frame
381 | data[names(man)] <- man 
382 | 
383 | ### Combine QC flags to final flag for gap-filling =============================
384 | 
385 | # Consider only existing manual flags
386 | # - there might be variables without manual QC or 'man' can be NULL
387 | man_names <- set_man_names(fluxes, man)
388 | 
389 | # Include manual QC among QC filter names to combine
390 | prelim3_ad <- tribble(
391 |   ~Tau,          ~H,          ~LE,          ~NEE,
392 |   man_names$Tau, man_names$H, man_names$LE, man_names$NEE
393 |   )
394 | 
395 | forGF <- rbind(prelim3, prelim3_ad)
396 | 
397 | # Combine specified flags for given flux to produce final forGF flags
398 | forGF_res <- combn_prelim_QC(data, forGF)
399 | 
400 | # Save the results to the main data frame
401 | data[names(forGF_res)] <- forGF_res 
402 | 
403 | ### Apply forGF filters to fluxes ==============================================
404 | 
405 | # Produce flux columns with suffix "_orig" only with fluxes passing final QC 
406 | for (i in seq_along(fluxes)) {
407 |   data[orig_fluxes[i]] <- apply_QC(data[, fluxes[i]], forGF_res[, i])
408 | }
409 | 
410 | ### Produce QC summary and save and plot the results ===========================
411 | 
412 | # Show the percentage of fluxes flagged by individual filters and their 
413 | # cumulative effect
414 | list_QC_ind <- lapply(names(forGF), function(x) 
415 |   print(summary_QC(data, na.omit(pull(forGF, x)))))
416 | list_QC_cum <- lapply(names(forGF), function(x) 
417 |   print(summary_QC(data, na.omit(pull(forGF, x)), cumul = TRUE)))
418 | list_QC <- c(list_QC_ind, list_QC_cum)
419 | names(list_QC) <- paste0(names(forGF), rep(c("", "_cumulative"), each = 4))
420 | 
421 | # Save each element of list_QC to separate file in QC_summary folder
422 | for (i in names(list_QC)) {
423 |   write.csv(list_QC[[i]], 
424 |             file.path(
425 |               paths$qc_summary,
426 |               paste0(siteyear, "_QC_summary_", i, "_", Tstamp, ".csv")))
427 | }
428 | 
429 | # Save the QC summary results as plots
430 | save_QC_summary_plots(data, forGF, paths$qc_summary, siteyear, Tstamp)
431 | 
432 | ### Plot the quality checked data ==============================================
433 | 
434 | # Save the plots that show the data with QC flag used for gap-filling
435 | save_flux_plots(data, qc_suffix = "forGF", siteyear, "forGF_QC_%s", Tstamp, 
436 |                 paths$quality_checking, fluxes)
437 | 
438 | ### Write QC results to a file =================================================
439 | 
440 | # Allow data to be usable also after saving (no timestamp reformatting)
441 | save_data <- data
442 | 
443 | # Correct timestamp to its original state
444 | save_data$timestamp <- save_data$timestamp - shift.by 
445 | 
446 | # Remap variable names and filter columns needed for online gap-filling tool:
447 | OT_in <- set_OT_input(save_data,
448 |                       c("qc_NEE_forGF", "NEE", "qc_LE_forGF", "LE",
449 |                         "qc_H_forGF", "H", "GR", "Tair", "Tsoil", "RH",
450 |                         "VPD", "qc_Tau_forGF", "ustar"))
451 | 
452 | # Save the standardized Online Tool/REddyProc input to '.txt' file
453 | # - rounding not required as QC flags are integers and the rest was rounded
454 | #   in data preparation step
455 | write_eddy(OT_in, 
456 |            file.path(
457 |              paths$input_for_gf,
458 |              paste0(siteyear, "_", Tstamp, ".txt")),
459 |            quote = FALSE, sep = "\t")
460 | 
461 | # Change the timestamp formatting to its original state:
462 | save_data$timestamp <- format(save_data$timestamp, format = "%Y-%m-%d %H:%M", 
463 |                               tz = "GMT")
464 | 
465 | # Save the full output
466 | # - rounding not required as QC flags are integers and the rest was rounded
467 | #   in data preparation step
468 | write_eddy(save_data, 
469 |            file.path(
470 |              paths$quality_checking,
471 |              paste0(siteyear, "_forGF_QC_full_output_", Tstamp, ".csv")))
472 | 
473 | # Choose the most important variables to later combine with gap-filling results
474 | # - essential_vars_QC is an object defined within WF settings
475 | # - use only those that are available
476 | essentials <- choose_avail(essential_vars_QC, names(save_data))
477 | 
478 | # Save essential output
479 | write_eddy(save_data[essentials],
480 |            file.path(
481 |              paths$input_for_gf,
482 |              paste0(siteyear, "_forGF_QC_essentials_", Tstamp, ".csv")))
483 | 
484 | # Save documentation information about executed quality control 
485 | names(forGF) <- names(forGF_res)
486 | document_QC(Tstamp, name, mail, strg_applied, forGF,
487 |             paths$quality_checking, siteyear)
488 | #EOF
489 | 


--------------------------------------------------------------------------------
/EC_workflow/WF_3_GF_&_FP_2025-04-27.R:
--------------------------------------------------------------------------------
  1 | ### Description ================================================================
  2 | 
  3 | # Eddy covariance workflow part 3/4 (https://github.com/lsigut/EC_workflow) This
  4 | # code primarily aims at u*-filtering, gap-filling (GF), flux partitioning (FP)
  5 | # and preparation of input data for summaries. Original and final data are
  6 | # plotted and further statistics are computed. Computation of bootstrap u*
  7 | # thresholds and estimation of standard deviation based on look-up tables
  8 | # provides further information about measurement uncertainty. This workflow part
  9 | # relies heavily on REddyProc package. For documentation of output variable
 10 | # names please visit MPI Online Tool website:
 11 | # https://bgc.iwww.mpg.de/5622399/REddyProc
 12 | #
 13 | # You can find example data set at https://doi.org/10.5281/zenodo.6631498
 14 | #
 15 | # Code developed by Ladislav Sigut (sigut.l@czechglobe.cz).
 16 | 
 17 | ### Set working directory to the folder where this document is saved ===========
 18 | 
 19 | # This expects you are working in RStudio and this document is saved in the root
 20 | # of already existing folder structure
 21 | # - see structure_eddy()
 22 | setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
 23 | 
 24 | ### Install and load required packages and functions ===========================
 25 | 
 26 | # Load eddy covariance workflow utility functions
 27 | utilities_file <- list.files(pattern = "utilities", full.names = TRUE)
 28 | source(utilities_file)
 29 | 
 30 | # Attach packages from GitHub
 31 | # - you might need to have RTools for Windows machine to install openeddy:
 32 | #   https://cran.r-project.org/bin/windows/Rtools
 33 | # - uses attach_pkg() function saved in utilities.R
 34 | attach_pkg("openeddy", github = "lsigut/openeddy")
 35 | 
 36 | # Attach packages from CRAN
 37 | # - see https://github.com/bgctw/REddyProc if mlegp not available
 38 | packages <- c("REddyProc", "bigleaf", "mlegp", "tibble")
 39 | invisible(lapply(packages, attach_pkg))
 40 | 
 41 | # Check if openeddy version conforms to requirements
 42 | if (packageVersion("openeddy") < package_version("0.0.0.9009"))
 43 |   warning("this version of workflow works reliably only with openeddy version ",
 44 |           "'0.0.0.9009'")
 45 | 
 46 | # Check if REddyProc version conforms to requirements
 47 | if (packageVersion("REddyProc") < package_version("1.3.0"))
 48 |   warning("this version of workflow works reliably with REddyProc ",
 49 |           "version '1.3.0'")
 50 | 
 51 | # REddyProc documentation:
 52 | # https://github.com/bgctw/REddyProc
 53 | # https://bgc.iwww.mpg.de/5624551/REddyProc-Rpackage
 54 | 
 55 | # Data formatting description:
 56 | # https://bgc.iwww.mpg.de/5624884/Data-Formats
 57 | 
 58 | # Article:
 59 | # https://www.biogeosciences.net/15/5015/2018/
 60 | 
 61 | ### Provide metadata and set file paths and arguments ==========================
 62 | 
 63 | # Load the site-year settings file
 64 | settings_file <- list.files(pattern = "settings", full.names = TRUE)
 65 | source(settings_file)
 66 | 
 67 | # Load the list of folder structure paths
 68 | # - automated, no input required if proposed folder structure is followed
 69 | paths <- make_paths()
 70 | 
 71 | # Meteo variables that will be plotted, gap-filled and exported
 72 | # - FP expects Meteo columns produced during GF (even if no gaps in Meteo)
 73 | # - minimal set must be: c('Rg', 'Tair', 'Tsoil', 'VPD')
 74 | meteo <- c('Rg', 'Tair', 'Tsoil', 'VPD')
 75 | 
 76 | # The path where input for gap-filling is located (automated)
 77 | input <- list.files(paths$input_for_gf, pattern = paste0(siteyear, ".*txt"),
 78 |                     full.names = TRUE)[1]
 79 | 
 80 | # The path where file with essential variables is located (automated)
 81 | ess_in <- list.files(paths$input_for_gf, 
 82 |                      pattern = paste0(siteyear, ".*essentials.*csv"),
 83 |                      full.names = TRUE)[1]
 84 | 
 85 | # Timestamp of the computation
 86 | # - automated, will be included in file names
 87 | Tstamp <- format(Sys.time(), "%Y-%m-%d") 
 88 | 
 89 | ### Prepare REddyProc input data ===============================================
 90 | 
 91 | # Load data with one header and one unit row from (tab-delimited) text file
 92 | EddyData.F <- read_eddy(input, sep = "\t")
 93 | # head(EddyData.F)
 94 | # str(EddyData.F)
 95 | 
 96 | # If not provided or if including gaps, calculate VPD from Tair and rH
 97 | if (!"VPD" %in% names(EddyData.F) || anyNA(EddyData.F$VPD)) {
 98 |   EddyData.F$VPD <- fCalcVPDfromRHandTair(EddyData.F$rH, EddyData.F$Tair)
 99 | }
100 | 
101 | # Add time stamp in POSIX time format
102 | EddyDataWithPosix.F <- fConvertTimeToPosix(
103 |   EddyData.F, 'YDH', Year = 'Year', Day = 'DoY', Hour = 'Hour')
104 | 
105 | # Initalize R5 reference class sEddyProc for processing of eddy data
106 | # with all variables needed for processing later
107 | variables <- c('NEE', 'LE', 'H', 'Rg','Tair', 'Tsoil', 'VPD', 'Ustar')
108 | EddyProc.C <- sEddyProc$new(siteyear, EddyDataWithPosix.F, variables)
109 | EddyProc.C$sSetLocationInfo(lat, long, tzh)  # site location info
110 | 
111 | # See the content
112 | str(EddyProc.C)
113 | EddyProc.C$sPrintFrames(NumRows.i = 6L)
114 | 
115 | ### Apply uStar-filtering ======================================================
116 | 
117 | # Seasons contained within one year (e.g. Dec 2014 is pooled with Jan, Feb 2014)
118 | # - skip estimation if fixed ustar threshold was provided
119 | if (is.na(fixed_UT)) {
120 |   set.seed(0815)
121 |   season_factor <- usCreateSeasonFactorMonthWithinYear(
122 |     EddyDataWithPosix.F$DateTime + shift.by)
123 |   table(season_factor)
124 |   (uStarRes <- EddyProc.C$sEstUstarThresholdDistribution(
125 |     nSample = 200L, seasonFactor = season_factor,
126 |     ctrlUstarEst = usControlUstarEst(isUsingCPTSeveralT = use_CPD)))
127 |   
128 |   # Round and save the results
129 |   uStarRes <- round_df(uStarRes)
130 |   write.csv(uStarRes, row.names = FALSE, 
131 |             file.path(
132 |               paths$ustar_filtering,
133 |               paste0("Ustar_thresholds_", Tstamp, ".csv")))
134 |   
135 |   # Plot saturation of NEE with UStar for available seasons
136 |   for (i in seq_along(levels(uStarRes$season))) {
137 |     EddyProc.C$sPlotNEEVersusUStarForSeason(
138 |       levels(uStarRes$season)[i], dir = paths$ustar_filtering)
139 |   }
140 |   
141 |   # Use annual or seasonal estimates
142 |   UstarThres.df <- if (seasonal_ustar) {
143 |     usGetSeasonalSeasonUStarMap(uStarRes)
144 |   } else {
145 |     usGetAnnualSeasonUStarMap(uStarRes)
146 |   }
147 |   
148 |   # Save results to a file for fast reload if needed
149 |   # - ?readRDS
150 |   saveRDS(UstarThres.df, file.path(paths$ustar_filtering, "UstarThres.df.rds"))
151 | }
152 | 
153 | ### Run gap-filling ============================================================
154 | 
155 | # Fill gaps in energy fluxes with MDS gap filling algorithm 
156 | EddyProc.C$sMDSGapFill(c('H'), FillAll = TRUE)	
157 | EddyProc.C$sMDSGapFill(c('LE'), FillAll = TRUE)
158 | 
159 | # NEE gap filling
160 | # - the maximum of all available seasons is taken to mark periods with low uStar
161 | #   if seasonal_ustar == FALSE (higher exclusion fraction)
162 | # - Note: the ustar filtering is implemented here only for nighttime and if 
163 | #   uStar value is missing, the half hour is not filtered; thus respective NEE 
164 | #   values are removed in quality control step (see ?set_OT_input)
165 | if (is.na(fixed_UT)) {
166 |   (uStarThAgg <- EddyProc.C$sGetEstimatedUstarThresholdDistribution())
167 |   EddyProc.C$sSetUstarScenarios(usGetSeasonalSeasonUStarMap(uStarThAgg))
168 |   EddyProc.C$sGetUstarScenarios() # check the applied thresholds
169 |   EddyProc.C$sMDSGapFillUStarScens('NEE', FillAll = TRUE)
170 | } else {
171 |   # the alternative if using fixed_UT
172 |   EddyProc.C$sMDSGapFillAfterUstar('NEE', uStarTh = fixed_UT, FillAll = TRUE)
173 | }
174 | 
175 | # Fill gaps in variables with MDS gap filling algorithm without prior ustar 
176 | # filtering for comparison
177 | EddyProc.C$sMDSGapFill('NEE', FillAll = TRUE, suffix = 'UNone')
178 | 
179 | # Meteo must be gap-filled even when without gaps to run the partitioning 
180 | for (met_var in meteo) {
181 |   EddyProc.C$sMDSGapFill(met_var, FillAll = TRUE)
182 | }
183 | saveRDS(EddyProc.C, file.path(paths$gap_filling, "EddyProc.C_GF.rds"))
184 | 
185 | ### Run flux partitioning ======================================================
186 | 
187 | # Perform flux partitioning for gap-filled product
188 | suffixes <- if (is.na(fixed_UT)) {
189 |   c('uStar', 'U05', 'U50', 'U95', 'UNone')
190 | } else {
191 |   c('uStar', 'UNone')
192 | }
193 | 
194 | # Reichstein et al. (2005) - further abbreviated as MR05
195 | # - TempRange is automatically reduced in case of not sufficient amount of data
196 | # - if solution was not found for TempRange > 0, consider removing given suffix
197 | for (i in suffixes) {
198 |   TempRange <- 5
199 |   repeat {
200 |     suppressWarnings(
201 |       EddyProc.C$sMRFluxPartition(
202 |         TempVar = paste0(FP_temp, "_f"), 
203 |         QFTempVar = paste0(FP_temp, "_fqc"),
204 |         suffix = i,
205 |         parsE0Regression = list(TempRange = TempRange))
206 |     )
207 |     TempRange <- TempRange - 1
208 |     if (TempRange <= 0) stop("MR05 flux partitioning failed for suffix ", i)
209 |     if (is.null(EddyProc.C$sExportResults()$E_0_NEW[1])) break
210 |   }
211 | }
212 | # Save reference class and the column names at the end of MR05 FP
213 | saveRDS(EddyProc.C, file.path(paths$gap_filling, "EddyProc.C_MR05.rds"))
214 | MR05_cols <- colnames(EddyProc.C$sExportResults())
215 | 
216 | # Lasslop et al. (2010) - further abbreviated as GL10
217 | FP_GL10_out_list <- vector("list", length(suffixes))
218 | for (i in suffixes) {
219 |   rm(EddyProc.C)
220 |   EddyProc.C <- readRDS(file.path(paths$gap_filling, "EddyProc.C_MR05.rds"))
221 |   EddyProc.C$sGLFluxPartition(
222 |     TempVar = paste0(FP_temp, "_f"), 
223 |     QFTempVar = paste0(FP_temp, "_fqc"),
224 |     suffix = i)
225 |   if (i == 'uStar') 
226 |     saveRDS(EddyProc.C, 
227 |             file.path(paths$gap_filling, "EddyProc.C_GL10_uStar.rds"))
228 |   out <- EddyProc.C$sExportResults()
229 |   cols <- colnames(out)
230 |   cols_out <- cols[!cols %in% MR05_cols]
231 |   out <- out[cols_out]
232 |   corr_filter <- !cols_out %in% grep(paste(suffixes, collapse = "|"), 
233 |                                      cols_out, value = TRUE)
234 |   names(out)[corr_filter] <- paste(cols_out[corr_filter], i, sep = '_')
235 |   FP_GL10_out_list[[which(suffixes == i)]] <- out
236 | }
237 | 
238 | # Column-bind the list with results to single data frame and save GL10 results
239 | FP_GL10_out <- do.call(cbind, FP_GL10_out_list)
240 | saveRDS(FP_GL10_out, file.path(paths$gap_filling, "FP_GL10_out.rds"))
241 | 
242 | ### Make following sections independent on previous processing =================
243 | 
244 | # Reload objects created during previous steps
245 | # - uStar-filtering, gap-filling (GF) and flux partitioning (FP) are 
246 | #   computationally expensive operations
247 | # - sEddyProc reference class object or its outputs are saved during different 
248 | #   processing stages
249 | # - if above sections were run at least once for given setup, uStar-filtering,
250 | #   GF and FP can be omitted if only amends in further steps are required 
251 | #   (note that all sections preceding 'Apply uStar-filtering' must be run) 
252 | # - saving column names from different stages of post-processing simplifies
253 | #   column selection for different output files and keeps output structure
254 | 
255 | # Save snapshots of result colnames in steps to a list
256 | colnames <- list()
257 | 
258 | # Save the column names at the end of Reichstein et al. (2005) FP
259 | FP_MR05 <- readRDS(file.path(paths$gap_filling, "EddyProc.C_MR05.rds"))
260 | colnames$FP_MR05 <- colnames(FP_MR05$sExportResults())
261 | 
262 | # Save the column names at the end of Lasslop et al. (2005) FP
263 | # - contains results of only one scenario (uStar)
264 | FP_GL10_out <- readRDS(file.path(paths$gap_filling, "FP_GL10_out.rds"))
265 | colnames$FP_GL10 <- colnames(FP_GL10_out)
266 | 
267 | # Reload the reference class in the state after uStar FP scenario
268 | EddyProc.C <- readRDS(file.path(paths$gap_filling, "EddyProc.C_GL10_uStar.rds"))
269 | 
270 | ### Plot the results using REddyProc ===========================================
271 | 
272 | # Plot daily sums of fluxes with their uncertainties 
273 | for (Var in c("H_f", "LE_f", "NEE_uStar_f")) {
274 |   EddyProc.C$sPlotDailySums(Var, paste0(Var, "sd"), 
275 |                             Dir = paths$plots, Format = plot_as)
276 | }
277 | # Plot fingerprints of relevant variables (with gaps and also gap-filled)
278 | FP_vars <- c(paste0(meteo, "_f"), "H", "LE", "NEE", "H_f", "LE_f", 
279 |              "NEE_uStar_f", "Reco_uStar", "GPP_uStar_f", "Reco_DT_uStar", 
280 |              "GPP_DT_uStar")
281 | for (Var in FP_vars) {
282 |   EddyProc.C$sPlotFingerprint(Var, Dir = paths$plots, Format = plot_as)
283 | }
284 | # Plot diurnal cycle of relevant variables (only gap-filled)
285 | DC_vars <- c(paste0(meteo, "_f"), "H_f", "LE_f", "NEE_uStar_f", 
286 |              "Reco_uStar", "GPP_uStar_f", "Reco_DT_uStar", "GPP_DT_uStar") 
287 | for (Var in DC_vars) {
288 |   EddyProc.C$sPlotDiurnalCycle(Var, Dir = paths$plots, Format = plot_as)
289 | }
290 | 
291 | ### Convert LE to ET and combine ustar filter (UF) with qc_forGF_NEE ===========
292 | 
293 | # Load essential QC file
294 | # - remove columns with "_orig" suffixes to prevent duplication
295 | ess <- read_eddy(ess_in)
296 | ess[grep("_orig$", names(ess), value = TRUE)] <- NULL
297 | 
298 | # Export input data, gap-filling and flux partitioning results
299 | all_out <- cbind(ess["timestamp"], EddyData.F, FP_MR05$sExportResults(), 
300 |                  FP_GL10_out)
301 | 
302 | # Convert LE to ET [mm hour-1]
303 | # - columns other than LE_f are required for agg_fsd() uncertainty evaluation
304 | # - LE_fqc is included in the conversion only to keep proper ordering 
305 | LE_vars <- c("LE_orig", "LE_f", "LE_fqc", "LE_fall", "LE_fsd")
306 | ET_vars <- gsub("LE", "ET", LE_vars)
307 | all_out[, ET_vars] <- 
308 |   lapply(LE_vars, 
309 |          function(x) LE.to.ET(all_out[, x], all_out$Tair_f) * 3600)
310 | openeddy::units(all_out[ET_vars]) <- rep("mm hour-1", length(ET_vars))
311 | 
312 | # Overwrite ET_fqc with proper values
313 | all_out$ET_fqc <- all_out$LE_fqc
314 | openeddy::units(all_out$ET_fqc) <- "-" 
315 | 
316 | # Data are excluded (flag 2) by ustar filter (UF) when Ustar_uStar_fqc > 0
317 | qc_uStar <- as.data.frame(ifelse(all_out["Ustar_uStar_fqc"] > 0, 2, 0))
318 | names(qc_uStar) <- "qc_uStar"
319 | 
320 | # Create resulting QC flag for NEE
321 | all_out$qc_NEE_forGF_UF <- combn_QC(cbind(ess["qc_NEE_forGF"], qc_uStar), 
322 |                                     c("qc_NEE_forGF", "qc_uStar"), 
323 |                                     "qc_NEE_forGF_UF")
324 | 
325 | ### Save outputs and documentation =============================================
326 | 
327 | # Round and save all results together with input data into CSV file
328 | all_out <- round_df(all_out)
329 | write_eddy(all_out,
330 |            file.path(
331 |              paths$gap_filling,
332 |              paste0(siteyear, "_GF_full_output_", Tstamp, ".csv")))
333 | 
334 | # Select the most important variables obtained during gap-filling
335 | # - add also ET columns created by conversion from LE
336 | gf_ess <- grep(paste(c("_orig$", "_f$", "_fqc$", "_fall$", "_fsd$", "_Thres$"), 
337 |                      collapse = "|"), colnames(all_out), value = TRUE)
338 | gf_ess <- gf_ess[!grepl("U05|U50|U95|UNone|GPP", gf_ess)]
339 | 
340 | # Select the most important variables obtained during flux partitioning
341 | fp_ess <- grep("DT_uStar", colnames$FP_GL10, value = TRUE)
342 | 
343 | ess_vars <- c("qc_NEE_forGF_UF", gf_ess, "Reco_uStar", "GPP_uStar_f", fp_ess)
344 | 
345 | # Create object with essential variables including gap-filling (GF) 
346 | # and flux partitioning results
347 | ess_vars <- choose_avail(ess_vars, names(all_out))
348 | ess_out <- cbind(ess, all_out[ess_vars])
349 | 
350 | # Save the essential outputs
351 | # - rounding of numerical columns is not needed as 'all_out' is already rounded
352 | write_eddy(ess_out, 
353 |            file.path(
354 |              paths$gap_filling,
355 |              paste0(siteyear, "_GF_essentials_", Tstamp, ".csv")))
356 | 
357 | # Save documentation about executed gap-filling and flux partitioning 
358 | document_GF(all_out, Tstamp, name, mail, siteyear, lat, long, tzh,
359 |             FP_temp, fixed_UT, seasonal_ustar, use_CPD, paths$gap_filling)
360 | 
361 | ### Plot the results using openeddy ============================================
362 | 
363 | # Correct timestamp for proper display
364 | ess_out$timestamp <- strptime_eddy(ess_out$timestamp, "%Y-%m-%d %H:%M",
365 |                                    shift.by = shift.by)
366 | 
367 | # Saving plots with gap-filled fluxes 
368 | # -two kinds of plots for each flux: 
369 | # 1) _f: filled flux with original measurements incorporated
370 | # 2) _fall: filled flux with original measurements excluded
371 | pdf(file.path(
372 |   paths$gap_filling,
373 |   paste0(siteyear, "_H_f_", Tstamp, ".pdf")),
374 |   width = 11.00, height = 8.27)
375 | plot_eddy(ess_out, "H", "qc_H_forGF", "qc_H_forGF",
376 |           flux_gf = "H_f")
377 | dev.off()
378 | pdf(file.path(
379 |   paths$gap_filling,
380 |   paste0(siteyear, "_H_fall_", Tstamp, ".pdf")),
381 |   width = 11.00, height = 8.27)
382 | plot_eddy(ess_out, "H", "qc_H_forGF", "qc_H_forGF", flux_gf = "H_fall")
383 | dev.off()
384 | 
385 | pdf(file.path(
386 |   paths$gap_filling,
387 |   paste0(siteyear, "_LE_f_", Tstamp, ".pdf")),
388 |   width = 11.00, height = 8.27)
389 | plot_eddy(ess_out, "LE", "qc_LE_forGF", "qc_LE_forGF",
390 |           flux_gf = "LE_f")
391 | dev.off()
392 | pdf(file.path(
393 |   paths$gap_filling,
394 |   paste0(siteyear, "_LE_fall_", Tstamp, ".pdf")),
395 |   width = 11.00, height = 8.27)
396 | plot_eddy(ess_out, "LE", "qc_LE_forGF", "qc_LE_forGF",
397 |           flux_gf = "LE_fall")
398 | dev.off()
399 | 
400 | # Saving plots with gap-filled and flux partitioned data 
401 | # -two kinds of flux partitioning for NEE:
402 | # 1) _MR05: Reichstein et al. (2005)
403 | # 2) _GL10: Lasslop et al. (2010)
404 | # -two kinds of plots for each flux: 
405 | # 1) _f: filled flux with original measurements incorporated
406 | # 2) _fall: filled flux with original measurements excluded
407 | MR05 <- ess_out
408 | MR05_FP_names_filter <- names(MR05) %in% c("Reco_uStar", "GPP_uStar_f")
409 | names(MR05)[MR05_FP_names_filter] <- c("Reco", "GPP")
410 | pdf(file.path(
411 |   paths$gap_filling,
412 |   paste0(siteyear, "_NEE_uStar_f_MR05_", Tstamp, ".pdf")),
413 |   width = 11.00, height = 8.27)
414 | plot_eddy(MR05, "NEE", "qc_NEE_forGF_UF", "qc_NEE_forGF_UF",
415 |           flux_gf = "NEE_uStar_f", NEE_sep = TRUE)
416 | dev.off()
417 | pdf(file.path(
418 |   paths$gap_filling,
419 |   paste0(siteyear, "_NEE_uStar_fall_MR05_", Tstamp, ".pdf")),
420 |   width = 11.00, height = 8.27)
421 | plot_eddy(MR05, "NEE", "qc_NEE_forGF_UF", "qc_NEE_forGF_UF",
422 |           flux_gf = "NEE_uStar_fall", NEE_sep = TRUE)
423 | dev.off()
424 | 
425 | GL10 <- ess_out
426 | GL10_FP_names_filter <- names(GL10) %in% c("Reco_DT_uStar", "GPP_DT_uStar")
427 | names(GL10)[GL10_FP_names_filter] <- c("Reco", "GPP")
428 | pdf(file.path(
429 |   paths$gap_filling,
430 |   paste0(siteyear, "_NEE_uStar_f_GL10_", Tstamp, ".pdf")),
431 |   width = 11.00, height = 8.27)
432 | plot_eddy(GL10, "NEE", "qc_NEE_forGF_UF", "qc_NEE_forGF_UF",
433 |           flux_gf = "NEE_uStar_f", NEE_sep = TRUE)
434 | dev.off()
435 | pdf(file.path(
436 |   paths$gap_filling,
437 |   paste0(siteyear, "_NEE_uStar_fall_GL10_", Tstamp, ".pdf")),
438 |   width = 11.00, height = 8.27)
439 | plot_eddy(GL10, "NEE", "qc_NEE_forGF_UF", "qc_NEE_forGF_UF",
440 |           flux_gf = "NEE_uStar_fall", NEE_sep = TRUE)
441 | dev.off()
442 | 
443 | # EOF
444 | 


--------------------------------------------------------------------------------
/EC_workflow/WF_4_Summary_2025-04-27.R:
--------------------------------------------------------------------------------
  1 | ### Description ================================================================
  2 | 
  3 | # Eddy covariance workflow part 4/4 (https://github.com/lsigut/EC_workflow)
  4 | # This code primarily aims to produce the summary of processed data at different
  5 | # timescales (daily, weekly, monthly, yearly) and to plot them.
  6 | #
  7 | # You can find example data set at https://doi.org/10.5281/zenodo.6631498
  8 | #
  9 | # Code developed by Ladislav Sigut (sigut.l@czechglobe.cz).
 10 | 
 11 | ### Set working directory to the folder where this document is saved ===========
 12 | 
 13 | # This expects you are working in RStudio and this document is saved in the root
 14 | # of already existing folder structure
 15 | # - see structure_eddy()
 16 | setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
 17 | 
 18 | ### Install and load required packages and functions ===========================
 19 | 
 20 | # Load eddy covariance workflow utility functions
 21 | utilities_file <- list.files(pattern = "utilities", full.names = TRUE)
 22 | source(utilities_file)
 23 | 
 24 | # Attach packages from GitHub
 25 | # - you might need to have RTools for Windows machine to install openeddy:
 26 | #   https://cran.r-project.org/bin/windows/Rtools
 27 | # - uses attach_pkg() function saved in utilities.R
 28 | attach_pkg("openeddy", github = "lsigut/openeddy")
 29 | 
 30 | # Attach packages from CRAN
 31 | attach_pkg("openair", "tibble")
 32 | 
 33 | # Check if openeddy version conforms to requirements
 34 | if (packageVersion("openeddy") < package_version("0.0.0.9009"))
 35 |   warning("this version of workflow works reliably only with openeddy version ",
 36 |           "'0.0.0.9009'")
 37 | 
 38 | ### Provide metadata and set file paths and arguments ==========================
 39 | 
 40 | # Load the site-year settings file
 41 | settings_file <- list.files(pattern = "settings", full.names = TRUE)
 42 | source(settings_file)
 43 | 
 44 | # Timestamp of the computation
 45 | # - automated, will be included in file names
 46 | Tstamp <- format(Sys.time(), "%Y-%m-%d") 
 47 | 
 48 | # Load the list of folder structure paths
 49 | # - automated, no input required if proposed folder structure is followed
 50 | paths <- make_paths()
 51 | 
 52 | # Input path for summary (automated)
 53 | # - gap-filled and partitioned data
 54 | path_in <- list.files(paths$gap_filling, 
 55 |                       pattern = paste0(siteyear, ".*GF_essentials.*csv"),
 56 |                       full.names = TRUE)[1]
 57 | 
 58 | # Specify the time shift (in seconds) to be applied to the date-time information
 59 | # in order to represent the center of averaging period
 60 | shift.by <- -900
 61 | 
 62 | ### Load the input file and convert timestamp and date =========================
 63 | 
 64 | data <- read_eddy(path_in)
 65 | str(data) # check if loaded properly
 66 | data$timestamp <- strptime_eddy(data$timestamp, "%Y-%m-%d %H:%M", 
 67 |                                 shift.by = shift.by)
 68 | head(data$timestamp)
 69 | 
 70 | ### Plot half-hourly data before unit conversion ===============================
 71 | 
 72 | mean <- choose_avail(mean, names(data))
 73 | sum <- choose_avail(sum, names(data))
 74 | err_agg <- choose_avail(err_agg, names(data))
 75 | 
 76 | # Specify variables for plots at half-hour resolution
 77 | hh_vars <- grep("[^c]$", unique(c(mean, sum, err_agg)), value = TRUE)
 78 | 
 79 | # Print half-hourly results to pdf and png
 80 | pdf(file.path(
 81 |   paths$summary,
 82 |   paste0(siteyear, "_half-hourly_plots_", Tstamp, ".pdf")),
 83 |   width = 11.00, height = 8.27)
 84 | invisible(lapply(hh_vars, plot_hh, x = data))
 85 | dev.off()
 86 | 
 87 | for (i in hh_vars) {
 88 |   png(file.path(
 89 |     paths$png,
 90 |     paste0(siteyear, "_hh_", i, "_", Tstamp, ".png")),
 91 |       width = 3508, height = 2480, res = 400)
 92 |   plot_hh(data, i)
 93 |   dev.off()
 94 | }
 95 | 
 96 | ### Plot wind roses and 1D footprint results ===================================
 97 | 
 98 | # Format and prepare data for openair package
 99 | wrose_all <- data.frame(ws = data$wind_speed, wd = data$wind_dir)
100 | wrose_all$time <- cut(data$PAR, c(-Inf, 10, Inf), 
101 |                       labels = c("nighttime", "daytime"))
102 | wrose_all$months <- ordered(month.name[as.POSIXlt(data$timestamp)$mon + 1], 
103 |                             month.name)
104 | # - number of groups to which zeta should be cut
105 | ngroups <- 6
106 | # - identify cut breakpoints
107 | breakpoints <- quantile(data$zeta, seq(0, 1, len = ngroups + 1), na.rm = TRUE)
108 | # - cut zeta so intervals are closed on the left and highest value is included
109 | # - negative (positive) zeta represents unstable (stable) conditions
110 | wrose_all$stability <- cut(data$zeta, breakpoints, right = FALSE,
111 |                            include.lowest = TRUE)
112 | 
113 | # Print all to pdf
114 | pdf(file.path(
115 |   paths$summary,
116 |   paste0(siteyear, "_wind_roses_", Tstamp, ".pdf")),
117 |   width = 11.00, height = 8.27)
118 | windRose(wrose_all[complete.cases(wrose_all[c("ws", "wd")]), ], 
119 |          angle = 15, paddle = FALSE, breaks = 5)
120 | windRose(wrose_all[complete.cases(wrose_all[c("ws", "wd", "time")]), ], 
121 |          type = "time", angle = 15, paddle = FALSE, breaks = 5)
122 | windRose(wrose_all[complete.cases(wrose_all[c("ws", "wd", "months")]), ], 
123 |          type = "months", angle = 15, paddle = FALSE, breaks = 5, 
124 |          grid.line = 10)
125 | windRose(wrose_all[complete.cases(wrose_all[c("ws", "wd", "stability")]), ], 
126 |          type = "stability", angle = 15, paddle = FALSE, 
127 |          breaks = 5, grid.line = 10, 
128 |          main = "Zeta parameter based stability classes")
129 | print(ggplot_stats(data, "wind_dir", "x_peak", circular = TRUE))
130 | print(ggplot_stats(data, "wind_dir", "x_70perc", circular = TRUE))
131 | print(ggplot_stats(data, "wind_dir", "wind_speed", circular = TRUE))
132 | print(ggplot_stats(data, "wind_dir", "ustar", circular = TRUE))
133 | print(ggplot_stats(data, "wind_dir", "zeta", circular = TRUE))
134 | dev.off()
135 | 
136 | # Print separately to png
137 | # - save_png() is defined in utilities.R as a helper function for saving plots
138 | save_png("wind_rose_all", paths$png, siteyear, Tstamp)
139 | windRose(wrose_all[complete.cases(wrose_all[c("ws", "wd")]), ], 
140 |          angle = 22.5, paddle = FALSE, breaks = 5)
141 | dev.off()
142 | 
143 | save_png("wind_rose_day-night", paths$png, siteyear, Tstamp)
144 | windRose(wrose_all[complete.cases(wrose_all[c("ws", "wd", "time")]), ], 
145 |          type = "time", angle = 22.5, paddle = FALSE, breaks = 5)
146 | dev.off()
147 | 
148 | save_png("wind_rose_months", paths$png, siteyear, Tstamp)
149 | windRose(wrose_all[complete.cases(wrose_all[c("ws", "wd", "months")]), ], 
150 |          type = "months", angle = 45, paddle = FALSE, breaks = 5, 
151 |          grid.line = 10)
152 | dev.off()
153 | 
154 | save_png("wind_rose_stability", paths$png, siteyear, Tstamp)
155 | windRose(wrose_all[complete.cases(wrose_all[c("ws", "wd", "stability")]), ], 
156 |          type = "stability", angle = 22.5, paddle = FALSE, 
157 |          breaks = 5, grid.line = 10, 
158 |          main = "Zeta parameter based stability classes")
159 | dev.off()
160 | 
161 | save_png("wind_dir_x_peak", paths$png, siteyear, Tstamp)
162 | print(ggplot_stats(data, "wind_dir", "x_peak", circular = TRUE))
163 | dev.off()
164 | 
165 | save_png("wind_dir_x_70perc", paths$png, siteyear, Tstamp)
166 | print(ggplot_stats(data, "wind_dir", "x_70perc", circular = TRUE))
167 | dev.off()
168 | 
169 | save_png("wind_dir_wind_speed", paths$png, siteyear, Tstamp)
170 | print(ggplot_stats(data, "wind_dir", "wind_speed", circular = TRUE))
171 | dev.off()
172 | 
173 | save_png("wind_dir_ustar", paths$png, siteyear, Tstamp)
174 | print(ggplot_stats(data, "wind_dir", "ustar", circular = TRUE))
175 | dev.off()
176 | 
177 | save_png("wind_dir_zeta", paths$png, siteyear, Tstamp)
178 | print(ggplot_stats(data, "wind_dir", "zeta", circular = TRUE))
179 | dev.off()
180 | 
181 | ### Compute summaries for different intervals ==================================
182 | 
183 | # Daily, weekly, monthly and yearly means, sums and uncertainties
184 | intervals <- c("%Y-%m-%d", "%W_%y", "%b-%y", "%Y")
185 | agg_periods <- c("day-1", "week-1", "month-1", "year-1")
186 | 
187 | means <- lapply(intervals, function(x) agg_mean(data[c("timestamp", mean)], x))
188 | sums <- mapply(function(x, y) agg_sum(data[c("timestamp", sum)], x, agg_per = y),
189 |                x = intervals,
190 |                y = agg_periods,
191 |                SIMPLIFY = FALSE)
192 | fsd <- mapply(function(x, y) agg_fsd(data, x, agg_per = y),
193 |               x = intervals,
194 |               y = agg_periods,
195 |               SIMPLIFY = FALSE)
196 | DT_SD <- mapply(function(x, y) agg_DT_SD(data, x, agg_per = y),
197 |                 x = intervals,
198 |                 y = agg_periods,
199 |                 SIMPLIFY = FALSE)
200 | 
201 | # Compute additional parameters
202 | pars <- vector("list", length(means))
203 | 
204 | for (i in seq_along(means)) {
205 |   pars[[i]] <- means[[i]][c("Intervals", "days")]
206 |   pars[[i]]$bowen_ratio_f <- sums[[i]]$H_f_sum / sums[[i]]$LE_f_sum
207 |   pars[[i]]$evaporative_fraction <- sums[[i]]$LE_f_sum / 
208 |     (sums[[i]]$H_f_sum + sums[[i]]$LE_f_sum)
209 |   pars[[i]]$closure_fraction <- (sums[[i]]$LE_f_sum + sums[[i]]$H_f_sum) / 
210 |     sums[[i]]$Rn_sum
211 |   openeddy::units(pars[[i]]) <- rep("-", ncol(pars[[i]]))
212 | }
213 | 
214 | # Mark days with positive carbon uptake and Tair above 5 degC (CUP and GSL)
215 | pars[[1]]$CUP <- 0L
216 | pars[[1]]$CUP[sums[[1]]$NEP_uStar_f_sum > 0] <- 1L
217 | pars[[1]]$GSL <- 0L
218 | pars[[1]]$GSL[means[[1]]$Tair_mean > 5] <- 1L
219 | 
220 | # Count days with positive carbon uptake and Tair above 5 degC in intervals
221 | # - requires daily resolution of factors representing aggregation intervals
222 | # - number of days in interval is included
223 | # - first element of list "days" is left NULL to simplify merging
224 | resol <- c("%W_%y", "%b-%y", "%Y")
225 | days <- vector("list", length(means))
226 | 
227 | for (i in seq_along(resol)) {
228 |   grouping_str <- strftime(means[[1]]$Intervals, format = resol[i], tz = "GMT")
229 |   grouping <- factor(grouping_str, levels = unique(grouping_str))
230 |   days[[i+1]] <- aggregate(means[[1]]$Intervals, list(grouping), length)[2]
231 |   days[[i+1]]$CUP <- aggregate(pars[[1]]$CUP, list(grouping), sum)[, 2]
232 |   days[[i+1]]$GSL <- aggregate(pars[[1]]$GSL, list(grouping), sum)[, 2]
233 |   names(days[[i+1]])[1] <- "days"
234 | }
235 | 
236 | # Report percentage of original measured data in intervals
237 | orig <- c("H_orig", "LE_orig", "NEE_uStar_orig")
238 | avail <- vector("list", length(intervals))
239 | 
240 | for (i in seq_along(intervals)) {
241 |   g_str <- strftime(data$timestamp, format = intervals[i], tz = "GMT")
242 |   g <- factor(g_str, levels = unique(g_str))
243 |   ilen <- aggregate(data$timestamp, list(g), length)
244 |   av <- aggregate(data[orig], list(g), function(x) sum(!is.na(x)))
245 |   avail[[i]] <- round(av[-1] / ilen$x * 100, 1)
246 |   openeddy::units(avail[[i]]) <- rep("%", 3)
247 | }
248 | 
249 | # Merge additional variables with pars data frame and add varnames and units
250 | for (i in seq_along(means)) {
251 |   pars[[i]][names(days[[i]][-1])] <- days[[i]][-1]
252 |   pars[[i]][orig] <- avail[[i]]
253 |   openeddy::varnames(pars[[i]][c("CUP", "GSL", orig)]) <- c(
254 |     "Carbon Uptake Period", "Growing Season Length", 
255 |     paste("percentage of", gsub("_orig", "", orig), "original records"))
256 |   openeddy::units(pars[[i]][c("CUP", "GSL")]) <- rep(
257 |     paste0("days ", agg_periods[i]), 2)
258 | }
259 | 
260 | # Combine summaries to a single data frame per interval (all in one list)
261 | # and round and save the results
262 | summaries <- vector("list", length(means))
263 | resol_names <- c("daily", "weekly", "monthly", "yearly")
264 | names(summaries) <- resol_names
265 | 
266 | # Save the summaries to CSV files
267 | for (i in seq_along(means)) {
268 |   summaries[[i]] <- cbind(means[[i]], fsd[[i]]$mean[-c(1:2)], 
269 |                           DT_SD[[i]]$mean[-c(1:2)], sums[[i]][-c(1:2)], 
270 |                           fsd[[i]]$sum[-c(1:2)], DT_SD[[i]]$sum[-c(1:2)],
271 |                           pars[[i]][-c(1:2)])
272 |   write_eddy(
273 |     round_df(summaries[[i]]), 
274 |     file.path(
275 |       paths$summary,
276 |       paste0(siteyear, "_", resol_names[i], "_summary_", Tstamp, ".csv"))
277 |   )
278 | }
279 | 
280 | ### Plot summaries for different intervals ===================================== 
281 | 
282 | vars <- names(summaries$daily)
283 | vars <- vars[!(vars %in% c("Intervals", "days"))]
284 | 
285 | # Plot aggregated variables per given intervals to pdf 
286 | for (interval in c("daily", "weekly", "monthly")) {
287 |   pdf(file.path(
288 |     paths$summary,
289 |     paste0(siteyear, "_", interval, "_plots_", Tstamp, ".pdf")),
290 |       width = 11.00, height = 8.27)
291 |   for (var in vars) {
292 |     barplot_agg(summaries[[interval]], var = var, interval)
293 |   }
294 |   dev.off()
295 | }
296 | 
297 | # Plot aggregated variables per given intervals to png 
298 | for (interval in c("daily", "weekly", "monthly")) {
299 |   for (var in vars) {
300 |     png(file.path(
301 |       paths$png,
302 |       paste0(siteyear, "_", interval, "_", var, "_", Tstamp, ".png")),
303 |       width = 3508, height = 2480, res = 350)
304 |     barplot_agg(summaries[[interval]], var = var, interval)
305 |     dev.off()
306 |   }
307 | }
308 | 
309 | ### Plot spatio-temporal sampling coverage to pdf and png ======================
310 | 
311 | # - see Griebel et al. (2020) or ?spti_coverage for details
312 | 
313 | # Create a list of ggplot objects
314 | spti_covp <- spti_coverage(
315 |   data, "timestamp", "wind_dir", "NEE_uStar_f", "NEE_uStar_fqc",
316 |   plot = TRUE)
317 | 
318 | # Save all ggplots to pdf
319 | pdf(file.path(
320 |   paths$summary,
321 |   paste0(siteyear, "_spatio-temporal_sampling_coverage_", Tstamp, ".pdf")),
322 |   width = 11.00, height = 8.27)
323 | print(spti_covp)
324 | dev.off()
325 | 
326 | # Save specified ggplots to png
327 | save_png("spatial_sampling_coverage", paths$png, siteyear, Tstamp)
328 | print(spti_covp[[1]]$spatial_sampling_coverage)
329 | dev.off()
330 | 
331 | save_png("temporal_sampling_coverage", paths$png, siteyear, Tstamp)
332 | print(spti_covp[[1]]$temporal_sampling_coverage)
333 | dev.off()
334 | 
335 | # EOF
336 | 


--------------------------------------------------------------------------------
/EC_workflow/utilities_2025-04-27.R:
--------------------------------------------------------------------------------
  1 | ### Description ================================================================
  2 | 
  3 | # Collection of utility functions that seem to have merit only in the given
  4 | # workflow and may or may not be moved to openeddy package. To simplify the
  5 | # workflow for 4 different fluxes high level functions were designed and saved
  6 | # in 'utilities.R'. This is mostly when multiple commands should be run without
  7 | # the need of user intervention. User can still adapt function arguments.
  8 | #
  9 | # Code developed by Ladislav Sigut (sigut.l@czechglobe.cz).
 10 | 
 11 | #' Supported Fluxes
 12 | #'
 13 | #' A complete set of supported fluxes in eddy covariance workflow.
 14 | #'
 15 | #' While users can utilize [openeddy] to create their own processing workflow
 16 | #' for any set of fluxes or variables, the original intent was to process
 17 | #' specified fluxes. The processing workflow is available here:
 18 | #' <https://github.com/lsigut/EC_workflow>.
 19 | fluxes <- c("Tau", "H", "LE", "NEE")
 20 | 
 21 | # Attach an R package
 22 | # package: A character string specifying single package
 23 | # github: A character string specifying github repository
 24 | # - used in all workflows to attach or install and attach packages if not present
 25 | attach_pkg <- function(package, github = NULL) {
 26 |   # load package if available
 27 |   avail <- require(package, character.only = TRUE)
 28 |   if (!avail) {
 29 |     # if not available and placed at github
 30 |     if (!is.null(github)) {
 31 |       # devtools package is required
 32 |       if (!require("devtools")) install.packages("devtools")
 33 |       devtools::install_github(github)
 34 |     } else {
 35 |       # if not available and placed at CRAN
 36 |       install.packages(package)
 37 |     }
 38 |     # load installed package
 39 |     require(package, character.only = TRUE)
 40 |   }
 41 | }
 42 | 
 43 | # Name merged output
 44 | # EP_path: A character string specifying folder name including EddyPro data
 45 | # siteyear: A character string specifying siteyear
 46 | # - used in data_preparation workflow
 47 | name_merged <- function(EP_path, siteyear) {
 48 |   # Names of merged output files
 49 |   # - data in CSV files and documentation in TXT files
 50 |   data_name_out <- list.files(EP_path)
 51 |   data_name_out <- grep("[.][Cc][Ss][Vv]$", data_name_out, value = TRUE)
 52 |   if (length(data_name_out) == 1) {
 53 |     data_name_out <- gsub("[.][Cc][Ss][Vv]$", "_met.csv", data_name_out)
 54 |   } else {
 55 |     data_name_out <- paste0("eddypro_", siteyear, 
 56 |                             "_full_output_merged_adv_met.csv")
 57 |   }
 58 |   return(data_name_out)
 59 | }
 60 | 
 61 | #' Combine Documentation
 62 | #'
 63 | #' Read documentation from single or multiple TXT files. In case of multiple
 64 | #' files, combine them together with one additional line separating them.
 65 | #'
 66 | #' @param path A character vector. The full paths to TXT files.
 67 | #'
 68 | #' @seealso \code{\link{readLines}}.
 69 | #'
 70 | #' @export
 71 | #' - used in document_merged() below
 72 | combine_docu <- function(path) {
 73 |   unlist(lapply(path, function(x) c(readLines(x, warn = FALSE), "")))
 74 | }
 75 | 
 76 | # Document merged files
 77 | # - this function is called for its side effect - writing TXT documentation file
 78 | # data_name_out: A character string. Name of merged output file.
 79 | # EP_path: A character string specifying folder name including EddyPro data
 80 | # Meteo_path: A character string specifying folder name including Meteo data
 81 | # out_path: A character string specifying folder name for output files
 82 | # Tstamp: A character string specifying timestamp of the computation
 83 | # name, mail: character string with contact information
 84 | # M: A data frame with merged Meteo data 
 85 | # - used in data_preparation workflow
 86 | document_merged <- function(data_name_out, EP_path, Meteo_path, out_path,
 87 |                             Tstamp, name, mail, M) {
 88 |   docu_name_out <- gsub("[.][Cc][Ss][Vv]$", "\\.txt", data_name_out)
 89 |   
 90 |   EP_names <- list.files(EP_path, full.names = TRUE)
 91 |   EP_names <- grep("[.][Cc][Ss][Vv]$", EP_names, value = TRUE)
 92 |   M_names <- list.files(Meteo_path, full.names = TRUE)
 93 |   M_names <- grep("[.][Cc][Ss][Vv]$", M_names, value = TRUE)
 94 |   
 95 |   docu_name_in <- list.files(c(EP_path, Meteo_path), full.names = TRUE)
 96 |   docu_name_in <- grep("[.][Tt][Xx][Tt]$", docu_name_in, value = TRUE)
 97 |   
 98 |   # The documentation file will not be overwritten if it already exists
 99 |   # - this is to avoid overwriting manually edited documentation
100 |   # - to overwrite it, check file content and delete it manually if safe
101 |   if (docu_name_out %in% list.files(out_path)) {
102 |     message("Combined documentation already exists")
103 |   } else {
104 |     fp <- file.path(out_path, docu_name_out)
105 |     message("saving file to ", fp)
106 |     writeLines(c(paste0(Tstamp, ":"),
107 |                  paste0("Files merged by ", name, " (", mail, ")"),
108 |                  "",
109 |                  "Merged files:", 
110 |                  M_names,
111 |                  EP_names,
112 |                  "",
113 |                  "Variables from meteo database remapped to:",
114 |                  paste(names(M), varnames(M), sep = " = ", collapse = "\n"), 
115 |                  "", 
116 |                  combine_docu(docu_name_in),
117 |                  "Information about the R session:",
118 |                  capture.output(sessionInfo())), 
119 |                fp, sep = "\n")
120 |   }
121 | }
122 | 
123 | # Save plots of precheck variables in a single pdf to specified path
124 | # data: A data frame with column names and "timestamp" column in POSIXt format.
125 | # precheck: A character vector of available precheck variables.
126 | # siteyear: A character string specifying siteyear
127 | # Tstamp: A character string specifying timestamp of the computation
128 | # path: A character string specifying folder name for saving the pdf
129 | # width, height: The width and height of the graphics region in inches.
130 | # qrange: A numeric vector of length 2, giving the quantile range of y-axis.
131 | # - used in QC workflow
132 | save_precheck_plots <- function(data, precheck, siteyear, Tstamp, path, 
133 |                                 width = 11.00, height = 8.27, 
134 |                                 qrange = c(0.005, 0.995)) {
135 |   fp <- file.path(path, 
136 |                   paste0(siteyear, "_auxiliary_precheck_", Tstamp, ".pdf"))
137 |   message("saving file to ", fp)
138 |   pdf(fp, width = width, height = height)
139 |   on.exit(dev.off(), add = TRUE)
140 |   invisible(lapply(precheck, plot_precheck, x = data, qrange = qrange))
141 | }
142 | 
143 | # Save plots of fluxes with meteo in separate pdfs to specified path
144 | # data: A data frame with column names and "timestamp" column in POSIXt format.
145 | # qc_suffix: A character string identifying respective QC flag included in data.
146 | # siteyear: A character string specifying siteyear
147 | # sname: A character string to be evaluated by sprintf and %s substituted for flux  
148 | # Tstamp: A character string specifying timestamp of the computation
149 | # path: A character string specifying folder name for saving the pdf
150 | # fluxes: A character vector of supported flux names
151 | # width, height: The width and height of the graphics region in inches.
152 | # - used in QC workflow
153 | save_flux_plots <- function(data, qc_suffix = "prelim", siteyear, sname,
154 |                             Tstamp, path, fluxes, 
155 |                             width = 11.00, height = 8.27) {
156 |   for (i in fluxes) {
157 |     fp <- file.path(
158 |       path,
159 |       paste0(siteyear, "_", sprintf(sname, i), "_", Tstamp, ".pdf"))
160 |     message("saving file to ", fp)
161 |     pdf(fp, width = width, height = height)
162 |     qc <- paste("qc", i, qc_suffix, sep = "_")
163 |     plot_eddy(data, i, qc, qc)
164 |     dev.off()
165 |   }
166 | }
167 | 
168 | # Show independent or cumulative effect of all filters 
169 | # data: A data frame with column names. 
170 | # prelim: A tibble with names of quality control flags to combine
171 | # cumul: A logical value that determines if cumulative (cumul = TRUE) or
172 | #   individual (cumul = FALSE) effects of quality control flags should be shown.
173 | # - used in QC workflow
174 | plot_QC_summary <- function(data, prelim, cumul) {
175 |   gridExtra::grid.arrange(grobs = lapply(names(prelim), function(x) 
176 |     summary_QC(data, na.omit(pull(prelim, x)), cumul = cumul, plot = TRUE, 
177 |                flux = x)), 
178 |     nrow = 2)
179 | }
180 | 
181 | # Save QC summary plots produced by plot_QC_summary()
182 | # data: A data frame with column names. 
183 | # prelim: A tibble with names of quality control flags to combine
184 | # path: A character string specifying folder name for saving the png
185 | # siteyear: A character string specifying siteyear
186 | # Tstamp: A character string specifying timestamp of the computation
187 | # width, height: The width and height of the graphics region in inches.
188 | # - used in QC workflow
189 | save_QC_summary_plots <- function(data, prelim, path, siteyear, Tstamp,
190 |                                   width = 297, height = 210) {
191 |   fp_ind <- file.path(
192 |     path,
193 |     paste0(siteyear, "_QC_summary_", Tstamp, ".png"))
194 |   message("saving file to ", fp_ind)
195 |   ggsave(fp_ind,
196 |     plot_QC_summary(data, prelim, cumul = FALSE),
197 |     type = "cairo-png", width = width, height = height, units = "mm")
198 |   fp_cum <- file.path(
199 |     path,
200 |     paste0(siteyear, "_QC_summary_cumulative_", Tstamp, ".png"))
201 |   message("saving file to ", fp_cum)
202 |   ggsave(fp_cum,
203 |     plot_QC_summary(data, prelim, cumul = TRUE),
204 |     type = "cairo-png", width = width, height = height, units = "mm")
205 | }
206 | 
207 | # Combine specified flags for given flux to produce preliminary flags
208 | # - informative naming is useful e.g. for despiking and manual QC
209 | # data: A data frame with quality control flags specified in prelim
210 | # prelim: A tibble with names of quality control flags to combine
211 | # - used in QC workflow
212 | combn_prelim_QC <- function(data, prelim) {
213 |   res <- sapply(names(prelim), 
214 |                 function(x) combn_QC(data, na.omit(pull(prelim, x))))
215 |   res <- as.data.frame(res)
216 |   names(res) <- paste("qc", names(res), substitute(prelim), sep = "_")
217 |   return(res)
218 | }
219 | 
220 | # Set names of existing manual quality control columns
221 | # fluxes: A character vector containing names of supported fluxes
222 | # man: A data frame with manual quality control flags
223 | # - there might be variables without manual QC or 'man' can be NULL
224 | # - used in QC workflow
225 | set_man_names <- function(fluxes, man) {
226 |   mnames <- paste0("qc_", fluxes, "_man")
227 |   names(mnames) <- fluxes
228 |   is.na(mnames) <- !(mnames %in% names(man))
229 |   mnames <- as.list(mnames)
230 |   return(mnames)
231 | }
232 | 
233 | # Document quality control step
234 | # - this function is called for its side effect - writing TXT documentation file
235 | # Tstamp: A character string specifying timestamp of the computation
236 | # name, mail: character string with contact information
237 | # strg_applied: A logical value documenting whether storage correction was applied
238 | # forGF: A tibble with names of quality control flags used for final QC
239 | # path: A character string specifying folder name for saving the TXT file
240 | # siteyear: A character string specifying siteyear
241 | # - used in QC workflow
242 | document_QC <- function(Tstamp, name, mail, strg_applied, forGF,
243 |                         path, siteyear) {
244 |   fp <- file.path(
245 |     path,
246 |     paste0(siteyear, '_QC_info_', Tstamp, '.txt'))
247 |   message("saving file to ", fp)
248 |   writeLines(c(paste0(Tstamp, ":"),
249 |                paste0("Quality controlled by ", name, " (", mail, ")"),
250 |                "",
251 |                paste0("Storage corrected fluxes: ", strg_applied),
252 |                "",
253 |                paste0("Applied w_rot correction: ", applied_w_rot_correction),
254 |                "",
255 |                "Applied quality control scheme:", 
256 |                capture.output(as.data.frame(forGF)),
257 |                "",
258 |                "Information about the R session:",
259 |                capture.output(sessionInfo())), 
260 |              fp, 
261 |              sep = "\n")
262 | }
263 | 
264 | # Document gap-filling and flux partitioning
265 | # - this function is called for its side effect - writing TXT documentation file
266 | # all_out: A data frame with column names containing REddyProc exported results
267 | # Tstamp: A character string specifying timestamp of the computation
268 | # name, mail: character string with contact information
269 | # siteyear: A character string specifying siteyear
270 | # lat, long, tzh: Numeric values specifying latitude, longtitude and timezone
271 | # FP_temp: A character string. Temperature used for flux partitioning
272 | # seasonal_ustar: A logical value. Was ustar threshold resolved seasonally? 
273 | # use_CPD: A logical value. Was change-point detection method used?
274 | # path: A character string specifying folder name for saving the TXT file
275 | # - used in GF workflow
276 | document_GF <- function(all_out, Tstamp, name, mail, siteyear, lat, long, tzh,
277 |                         FP_temp, fixed_UT, seasonal_ustar, use_CPD, path) {
278 |   # compute flux availability percentage
279 |   perc_records <- nrow(all_out) / 100
280 |   flux_avail_names <- c("H_orig", "LE_orig", "NEE_uStar_orig")
281 |   avail_rec <- lapply(flux_avail_names, function(x) {
282 |     temp <- table(!is.na(all_out[x]))
283 |     round(unname(temp["TRUE"] / perc_records), 1)
284 |   })
285 |   names(avail_rec) <- flux_avail_names
286 |   
287 |   # document GF output
288 |   fp <- file.path(
289 |     path,
290 |     paste0(siteyear, '_documentation_', Tstamp, '.txt'))
291 |   message("saving file to ", fp)
292 |   writeLines(c(paste0(Tstamp, ":"),
293 |                paste0("Processed by ", name, " (", mail, ")"),
294 |                "",
295 |                paste0("Siteyear:"),
296 |                siteyear,
297 |                "",
298 |                "Used site metadata:",
299 |                paste0("latitude = ", lat, ", longitude = ", long, ", timezone = ", 
300 |                       tzh),
301 |                "",
302 |                "Temperature used for flux partitioning:",
303 |                FP_temp,
304 |                "",
305 |                "Ustar filtering settings:",
306 |                if (is.na(fixed_UT)) {
307 |                  c(paste0("seasonal_ustar = ", seasonal_ustar),
308 |                    paste0("use_changepoint_detection = ", use_CPD))
309 |                } else {
310 |                  paste("fixed_ustar_threshold:", fixed_UT, "m s-1")
311 |                },
312 |                 "",
313 |                "Availability of original records for respective flux:",
314 |                paste0("H = ", avail_rec$H_orig, "%"),
315 |                paste0("LE = ", avail_rec$LE_orig, "%"),
316 |                paste0("NEE = ", avail_rec$NEE_uStar_orig, "%"),
317 |                "",
318 |                "Information about the R session:",
319 |                capture.output(sessionInfo())), 
320 |              fp, 
321 |              sep = "\n")
322 | }
323 | 
324 | # Create utility function for saving plots to png
325 | # x: A character string specifying naming of the given plot
326 | # path: A character string specifying folder name for saving the png
327 | # siteyear: A character string specifying siteyear
328 | # Tstamp: A character string specifying timestamp of the computation
329 | # width, height: The width and height of the graphics region in inches
330 | # res: An integer specifying png resolution (see ?png)
331 | # - used in Summary workflow
332 | save_png <- function(x, path, siteyear, Tstamp, width = 3508, height = 2480, 
333 |                      res = 400) {
334 |   png(file.path(
335 |     path,
336 |     paste0(siteyear, "_", x, "_", Tstamp, ".png")),
337 |     width = width, height = height, res = res)
338 | }
339 | 
340 | # EOF
341 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | ## EC_workflow 2025-04-27
 2 | 
 3 | -   all workflow files moved to common folder EC_workflow
 4 | -   site-year prefix is now kept only for settings file as workflow files should
 5 |     not need editing
 6 | -   improved efficiency of folder structure and command line support of
 7 |     folder names by the implementation of make_paths()
 8 | -   automated loading of inputs using make_paths()
 9 | -   Meteo data is expected as a single file with head and units that can be read
10 |     by read_eddy() 
11 | -   settings for all workflow files were extracted to a single file
12 |     (siteyear_settings.R) for easier workflow updates and setup editing
13 | -   README: updated folder structure description and graphics
14 | -   README: new sections QC principles and Flagging scheme
15 | -   README: new section Change of EC system or EddyPro settings
16 | -   allow flexibility of setting processing period (start and end
17 |     variables instead of year)
18 | -   automated setting of optimal TempRange in nighttime partitioning
19 |     (REddyProc package)
20 | 
21 | ## EC_workflow 2023-04-14
22 | 
23 | -   automate w_rot correction
24 | -   update Zenodo link
25 | -   README: include link to openeddy tutorials
26 | 
27 | ## EC_workflow 2022-03-18
28 | 
29 | -   update Zenodo files, workflows and description
30 | -   add utilities file with functions related to workflow
31 | -   README: describe Manual QC and how to load meteo
32 | -   rename workflows to reflect their order
33 | -   include flags for checking instruments in Czechglobe-specific
34 |     section
35 | -   increase angular resolution of wind roses
36 | -   support G, SWC and GWL variables
37 | -   assure that ggplots are printed also when sourcing
38 | -   specify auxiliary vars in check_manually()
39 | -   update requirements for package version
40 | -   README: include new QC filters
41 | -   Czechglobe specific branch removed after formalizing instrument
42 |     filters
43 | -   formalize documentation functions and move them to utilities
44 | -   simplify QC workflow - formalize functions and move to utilities
45 | -   document w_rot correction
46 | -   inform about saving files
47 | -   amend GF workflow and move GF documentation to utilities
48 | -   add support for fixed Ustar threshold
49 | 
50 | ## EC_workflow 2018-09-17
51 | 
52 | -   include workflow version date suffix
53 | -   documentation improvements
54 | -   gap-fill VPD if any record is NA
55 | -   include workflow description in README
56 | -   provide link to datasets through Zenodo
57 | -   include ROI boundary description and figs in README
58 | 
59 | ## EC_workflow (no date suffix)
60 | 
61 | ### 2018-09-09
62 | 
63 | -   update QC workflow with manual QC and remove spikesHF & wresid
64 |     filter
65 | -   correct units in summary (fractions & ratios)
66 | 
67 | ### 2018-09-03
68 | 
69 | -   convert NEE to NEP
70 | -   implement support for Meteo vars replicates
71 | 
72 | ### 2018-08-31
73 | 
74 | -   implement automated merging of EddyPro & Meteo
75 | 
76 | ### 2018-08-28
77 | 
78 | -   FCO2 renamed to NEE for both storage corrected and uncorrected
79 |     fluxes to unify workflows
80 | -   unify QC for sites with/without storage correction
81 | -   include wind rose plotting
82 | 


--------------------------------------------------------------------------------
/Processing_chain.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsigut/EC_workflow/59a347c3a73695bffb7726a05da4e42d5aea890f/Processing_chain.jpg


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = NA,
 11 |   fig.path = "README-",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # Eddy covariance workflow
 17 | 
 18 | ## Overview
 19 | 
 20 | The eddy covariance (EC) workflow demonstrates how to use
 21 | [openeddy](https://github.com/lsigut/openeddy) package together with
 22 | [REddyProc](https://github.com/bgctw/REddyProc) package to facilitate
 23 | automated and reproducible EC data post-processing, supporting Tau, H,
 24 | LE and CO<sub>2</sub> (NEE) fluxes. The presented EC workflow is a set
 25 | of post-processing steps that were applied for a particular cropland
 26 | site
 27 | [CZ-KrP](http://www.europe-fluxdata.eu/home/site-details?id=CZ-KrP). The
 28 | main purpose of EC workflow is to show how to utilize the `openeddy`
 29 | software infrastructure. It is not meant to represent the optimal best
 30 | post-processing workflow, especially concerning the quality control. The
 31 | `openeddy` provides enough flexibility for users to adapt
 32 | post-processing to specifics of their site and will hopefully facilitate
 33 | discussion and exchange of best practices concerning various types of
 34 | ecosystems and EC setups.
 35 | 
 36 | Comments in the workflow files explain how individual steps lead to the
 37 | desired output. If you look for the settings and details about the
 38 | `openeddy` functions, see their respective help files
 39 | (`?function_name`). Notice that there is also a set of interactive
 40 | [openeddy tutorials](https://github.com/lsigut/openeddy_tutorials) that
 41 | provide more context.
 42 | 
 43 | ## Requirements
 44 | 
 45 | The EC workflow is currently aligned with `EddyPro` software
 46 | [output](https://www.licor.com/env/support/EddyPro/topics/output-files-full-output.html).
 47 | It is expected that meteorological data passed its own separate workflow
 48 | (not in the scope of `openeddy`), i.e. they are already converted to
 49 | physical units, underwent quality control and are gap-filled.
 50 | 
 51 | List of expected meteorological variables (see [Naming strategy with the
 52 | EC workflow]):
 53 | 
 54 | | Recommended setup                        | Minimum setup |
 55 | |:-----------------------------------------|:--------------|
 56 | | GR, PAR, Rn, Tair, Tsoil, VPD (or RH), P | GR, Tair      |
 57 | 
 58 | Gaps in meteorological data are allowed (except for global radiation,
 59 | GR, needed for day/night data separation in `despikeLF()`), but
 60 | `REddyProc` gap filling of meteorological data performs well mostly for
 61 | short gaps. Minimum setup describes bare minimum needed for used
 62 | functions and would require adaptations of the workflow.
 63 | 
 64 | Processing of multiple or incomplete years is supported but it requires
 65 | edits in EC workflow scripts. Note that for data fragments shorter than
 66 | a half year the processing (mainly `REddyProc` gap filling and flux
 67 | partitioning) might not be reliable. `REddyProc` has defined constraints
 68 | that has to be met, otherwise processing will not proceed and
 69 | informative error message should be produced. Note that `REddyProc` is
 70 | meant to fill the gaps within the period when sampling occurs, instead
 71 | of extrapolating data from a short measurement period to a much larger
 72 | period (e.g. you cannot use few months of measurements to obtain annual
 73 | budgets).
 74 | 
 75 | **Adapting workflow for a new site** mainly requires to edit the
 76 | `KRP16_0_settings_2025-04-27.R` file according to provided comments.
 77 | Other workflow files should remain unchanged. Settings edits include
 78 | renaming of meteorological variables to workflow standard (`Met_mapping`
 79 | object) and defining region of interest (ROI) `boundary`. In order to
 80 | run `fetch_filter()`, `QC` workflow requires the ROI `boundary` for
 81 | given site-year. ROI is provided by the user in a form of numeric vector
 82 | (see [ROI boundary] section below and
 83 | <https://github.com/lsigut/ROI_boundary>).
 84 | 
 85 | Support for other EC processing
 86 | [software](https://ameriflux.lbl.gov/resources/resource-list/tools-and-software-for-flux-scientists/raw-data-processing-and-qa-qc/)
 87 | (e.g. TK3, EdiRe, EddyUH, EddySoft) is not explicitly provided but
 88 | alternative workflow should be achievable already with the existing
 89 | `openeddy` capabilities. The easiest approach could be to `remap_vars()`
 90 | using the pairing of column names of `EddyPro` full output and the EC
 91 | processing software used. `EddyPro`-specific tests/filters mainly
 92 | present in `extract_QC()` would need to be substituted for their
 93 | alternatives. Similarly, support for CH<sub>4</sub>, N<sub>2</sub>O or
 94 | other trace gases is not planned, however they should be feasible to
 95 | some degree (note that look-up tables might not be optimal for gap
 96 | filling of such fluxes).
 97 | 
 98 | ## Usage
 99 | 
100 | To run EC workflow for the example site-year `KRP16`:
101 | 
102 | Download `KRP16 - before processing.zip` from
103 | [Zenodo](https://doi.org/10.5281/zenodo.1442530) and unzip. Run workflow
104 | files in specified order according to instructions there:
105 | 
106 | 1.  `WF_1_data_preparation`: formatting and merging inputs.
107 | 2.  `WF_2_QC`: eddy covariance quality control and storage correction.
108 | 3.  `WF_3_GF_&_FP`: uStar filtering, gap filling and flux partitioning.
109 | 4.  `WF_4_Summary`: aggregation and plotting of results.
110 | 
111 | For a new site-year, `settings` workflow file should be edited first.
112 | File prefix `KRP16` can be changed to identify related site-year. Date
113 | suffix identifies workflow version and should not be edited. Workflow
114 | file `utilites` is required by the workflow scripts and is not meant for
115 | user interaction.
116 | 
117 | Commands in `WF_1` and `WF_2` should be run one by one, to get better
118 | feedback about problems with data inputs or data quality. Especially
119 | `WF_2` includes interactive function (`check_manually()`) that requires
120 | direct user input. `WF_3` should require minimal user supervision.
121 | `WF_4` can be `source()`d.
122 | 
123 | Note that using `source()` for the QC workflow will not produce desired
124 | outcome if variable `interactive = TRUE` because `check_manually()` will
125 | expect interactive session (manual marking of outliers). Once the manual
126 | QC is finalized and saved, changing to `interactive = FALSE` in
127 | `settings` allows to reproduce the results by sourcing.
128 | 
129 | You can compare your results with those of `KRP16 - processed.zip` at
130 | [Zenodo](https://doi.org/10.5281/zenodo.1442530). Notice that in order
131 | to obtain identical results, you would need to copy the subjective
132 | manual screening done by site PI located at
133 | `.\level_2\quality_checking\KRP16_manual_QC.csv`.
134 | 
135 | ## Description
136 | 
137 | The proposed workflow allows to process eddy covariance data with single
138 | processing chain consisting of four stages:
139 | 
140 | 1.  **Data preparation:** prepare data for QC. Meteo data and EddyPro
141 |     full output files are validated, accordingly formatted, merged and
142 |     saved with documentation. All numeric values are rounded to a
143 |     reasonable precision. Meteo variable names are remapped according to
144 |     the requirements of `openeddy` and `REddyProc` packages.
145 |     `WF_1_data_preparation` produces files at `.\level_1\input_for_qc\`
146 |     folder.
147 | 
148 | 2.  **Quality control:** load the `EddyPro` output and gap-filled
149 |     meteorological data and apply automated tests and filters
150 |     implemented in `openeddy` to quality check fluxes of momentum (Tau),
151 |     sensible (H) and latent heat (LE) and net ecosystem exchange (NEE).
152 |     Perform storage correction of fluxes using discrete (one point)
153 |     storage estimate available in the `EddyPro` full output. While this
154 |     is sufficient for sites with short canopy (e.g. the example cropland
155 |     site `CZ-KrP`), one point approximation is less suitable with
156 |     increasing EC measurement height. Computation of storage flux from
157 |     profile measurements is not in the scope of `openeddy`. Export
158 |     documentation of applied QC and produce the outputs needed in next
159 |     steps. `WF_2_QC` produces files at `.\level_2\quality_checking\` and
160 |     `.\level_2\input_for_gf\` folders.
161 | 
162 | 3.  **Gap filling and flux partitioning:** use `REddyProc` to estimate
163 |     uStar threshold, apply uStar filtering, gap fill (H, LE, NEE) and
164 |     partition (NEE) fluxes. Use `openeddy` to visualize H, LE and NEE
165 |     fluxes. The setup allows to change and document some processing
166 |     options in an organized way. `WF_3_GF_&_FP` produces files at
167 |     `.\level_3\gap_filling\`.
168 | 
169 | 4.  **Summary:** visualize processed data, convert units and aggregate
170 |     results to daily, weekly, monthly and yearly timescales. A limited
171 |     amount of computed parameters is also produced, including different
172 |     uncertainty estimates. `WF_4_Summary` produces files at
173 |     `.\level_3\summary\`.
174 | 
175 | The EC workflow assumes certain folder structure for each site-year that
176 | makes data handling more effective. The folder structure can be created
177 | using `make_paths()` with following content:
178 | 
179 | ```{r echo=FALSE, message=FALSE}
180 | library(openeddy)
181 | library(fs)
182 | zz <- make_paths("site_year", create_dirs = TRUE)
183 | fs::dir_tree("site_year")
184 | unlink("site_year", recursive = TRUE)
185 | ```
186 | 
187 | -   Level 1: half-hourly data processed by `EddyPro` and gap-filled
188 |     meteorological data.
189 | -   Level 2: results and documentation of QC, storage corrected fluxes
190 |     for GF & FP.
191 | -   Level 3: results of GF & FP and the data set summaries.
192 | 
193 | The complete processing chain in the context of above folder structure
194 | can be summarized as:
195 | 
196 | ![](Processing_chain.jpg)
197 | 
198 | ## QC principles
199 | 
200 | Quality assurance (QA) is always preferred to QC. If faulty instrument
201 | producing spurious measurements can be exchanged, repaired or
202 | calibrated, it is always preferred to simply flagging and removing
203 | affected period. This requires frequent check of instruments, timely
204 | maintenance, well educated technical support and established service
205 | routine or calendar. Therefore QC cannot substitute neglected QA and the
206 | concept "garbage in, garbage out" applies also for EC workflow. QA and
207 | QC can be understood also as a learning process described in the
208 | following diagram.
209 | 
210 | <p align="center">
211 | 
212 | <img src="qa_and_qc.jpg" width="300"/>
213 | 
214 | </p>
215 | 
216 | In `openeddy`, `EddyPro` software serves as a way of input data
217 | standardization. Data frames have defined column names and units with
218 | conserved data formatting. This property is used to easily read the
219 | expected columns, thus the typical input data structure of most
220 | functions is a data frame. Quality control consists of two phases:
221 | 
222 | 1.  obtaining QC filters (filters must be defined and columns containing
223 |     QC flags saved to a data frame)
224 | 
225 | 2.  applying QC filters (either directly by removing flux values or by
226 |     combining all applied filters and assuring that data will be
227 |     interpreted in respect to the combined QC flags).
228 | 
229 | This approach allows to evaluate a complete set of QC filters and select
230 | for application only those with the best flagging efficiency (trade-off
231 | between the count of removed spurious records and the amount of
232 | available records after the QC filter application). Note that such
233 | selected QC scheme can depend also on the type of follow up analysis
234 | (data application). E.g. if the data will be used to compute annual
235 | budgets, outlying values would bias the look-up table statistics when
236 | filling gaps and thus should be removed. On the other hand, if the focus
237 | of following analysis are exceptional fluxes, outlying values should be
238 | kept and analyzed.
239 | 
240 | <p align="center">
241 | 
242 | <img src="qc_application.jpg" width="400"/>
243 | 
244 | </p>
245 | 
246 | The application of QC filters depends on whether the applied filters are
247 | independent (most of the QC filters; QC flags are interpreted
248 | independently on their order as maximum value is taken) or additive
249 | (*wresid* and *interdep* filters; they serve as flag corrections and
250 | thus the outcome depends on their position within the QC workflow).
251 | 
252 | <p align="center">
253 | 
254 | <img src="combn_QC.jpg" width="400"/>
255 | 
256 | </p>
257 | 
258 | ### Flagging scheme
259 | 
260 | -   flag 0 – high quality
261 | -   flag 1 – minor issues
262 | -   flag 2 – major issues
263 | 
264 | Due to the strict testing withing the QC workflow, both flag 0 and flag
265 | 1 data are suggested to be used for fundamental research. Only flag 2
266 | (low data quality) is meant to be discarded. Note that if QC filter A
267 | and filter B both flag 25% of data with flag 2, the fraction of excluded
268 | data is typically lower than 50%. This is because the averaging periods
269 | with major issues are often flagged by multiple filters.
270 | 
271 | ## ROI boundary
272 | 
273 | The outline delimiting the spatial extent of the studied ecosystem
274 | (region of interest; ROI) is specified by its ROI boundary that
275 | describes the distance from EC tower to the edge of the studied
276 | ecosystem for given wind direction. In order to work with `openeddy`,
277 | ROI boundary has to be provided as a numeric vector with following
278 | properties:
279 | 
280 | -   The number of circular sectors is the same as the number of provided
281 |     distances (length of the vector).
282 | -   The angular resolution of the ROI boundary is given by
283 |     `360° / number of angular sectors`.
284 | -   The ROI boundary distances are assigned to the centers of their
285 |     respective circular sectors with first sector centered on 0°.
286 | 
287 | ### ROI boundary example
288 | 
289 | <p align="center">
290 | 
291 | ![](ROI%20boundary%20example.jpg)
292 | 
293 | </p>
294 | 
295 | In this simplified case ROI boundary would be specified as:
296 | 
297 | ``` r
298 | c(150, 200, 250, 300)
299 | ```
300 | 
301 | **Interpretation:**
302 | 
303 | -   There would be 4 circular sectors with 90° angular resolution.
304 | -   ROI boundary is specified for the whole first sector (315°, 45°] at
305 |     the distance 150 m from tower (center of the sector is 0°).
306 | -   Boundary of the second sector (45°, 135°] is at the distance 200 m.
307 | -   Third sector (135°, 225°] is at the distance 250 m.
308 | -   Fourth sector (225°, 315°] is at the distance 300 m.
309 | 
310 | Realistic representation of ROI boundary can look e.g. like this:
311 | 
312 | ![](CZ-Krp_fetch.png)
313 | 
314 | ## Change of EC system or EddyPro settings
315 | 
316 | Functions `extract_QC(..., filters = c("missfrac", "wresid"))` and
317 | `interdep()` require information about EC or EddyPro setup. For
318 | *missfrac* filter it is the number of records in averaging period (e.g.
319 | 36 000 for half-hourly period with 20 Hz measurements), for *wresid* it
320 | is the coordinate rotation type (double or planar fit rotation), for
321 | *interdep* it is the infrared gas analyzer type (either open path or
322 | (en)closed path). These specifications can be provided as single values
323 | if the properties did not change throughout the data set. If the
324 | properties changed (typically when merging multiple EddyPro files from
325 | extended period), user can provide additional columns ("max_records",
326 | "used_rotation", "used_IRGA") by `label_periods()` to specify these
327 | properties for each averaging period (see relevant function help files:
328 | `?extract_QC`, `?interdep`, `?label_periods`). For the columns to be
329 | recognized, respective changes need to be made also in QC workflow (see
330 | the description there).
331 | 
332 | ## Naming strategy with the EC workflow
333 | 
334 | In order to take advantage of `openeddy` default arguments, certain
335 | naming strategy is recommended.
336 | 
337 | `EddyPro` full output [variable
338 | names](https://www.licor.com/env/support/EddyPro/topics/output-files-full-output.html)
339 | are used with a few modifications if symbols were included in the
340 | variable name (e.g. Monin-Obukhov stability parameter `(z-d)/L` is
341 | corrected to `zeta`).
342 | 
343 | Expected names of meteorological variables are due to historical
344 | reasons:
345 | 
346 | -   GR: global radiation [W m-2]
347 | -   PAR: photosynthetically active radiation [umol m-2 s-1]
348 | -   Rn: net radiation [W m-2]
349 | -   Tair: air temperature at EC height [degC]
350 | -   Tsoil: soil temperature at soil surface [degC]
351 | -   RH: relative humidity at EC height [%]
352 | -   VPD: vapor pressure deficit at EC height [hPa]
353 | -   P: precipitation [mm]
354 | 
355 | `openeddy` offers full flexibility concerning QC column names. However,
356 | in order to avoid QC column duplication and to partly document the type
357 | of QC test/filter and flux that it corresponds to, following naming
358 | strategy was devised:
359 | 
360 | ### QC prefixes
361 | 
362 | They specify which flux is affected by given QC column:
363 | 
364 | -   qc_Tau\_, qc_H, qc_LE, qc_NEE: only applicable for the respective
365 |     fluxes.
366 | -   qc_SA\_: applicable to fluxes relying only on sonic (Tau, H).
367 | -   qc_GA\_: applicable to fluxes relying on GA (LE, NEE); only GA
368 |     issues considered.
369 | -   qc_SAGA\_: applicable to fluxes relying both on SA and GA (LE, NEE);
370 |     both SA and GA issues considered.
371 | -   qc_ALL\_: applicable to all fluxes (in practice often not applied to
372 |     Tau).
373 | 
374 | ### QC suffixes
375 | 
376 | They specify which QC test/filter was applied to get the QC flags:
377 | 
378 | -   \_SSITC: steady state test and test of integral turbulence
379 |     characteristics.
380 | -   \_spikesHF: check of [high frequency data spike
381 |     percentage](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Despiking)
382 |     in averaging period against thresholds.
383 | -   \_ampres: check of [amplitude
384 |     resolution](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Amplituderesolution)
385 |     in the recorded data.
386 | -   \_dropout: check of
387 |     [drop-outs](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Dropouts),
388 |     i.e. situations when the time series stays for "too long" on a value
389 |     that is far from the mean.
390 | -   \_abslim: check of [absolute
391 |     limits](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Absolutelimits)
392 |     when raw data are out of plausible range.
393 | -   \_skewkurt_sf, \_skewkurt_hf, \_skewkurt: check of [skewness and
394 |     kurtosis](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Skewnessandkurtosis)
395 |     limits.
396 | -   \_discont_sf, \_discont_hf, \_discont: check of
397 |     [discontinuities](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Discontinuities)
398 |     that lead to semi-permanent changes in the time series.
399 | -   \_timelag_sf, \_timelag_hf, \_timelag: check of estimated
400 |     [timelags](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Timelags)
401 |     compared to the expected timelags.
402 | -   \_attangle: check of [angle of
403 |     attack](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Angleofattack).
404 | -   \_nonsteady: check of [steadiness of horizontal
405 |     wind](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Steadinessofhorizontalwind).
406 | -   \_missfrac: check of missing data in averaging period against
407 |     thresholds.
408 | -   \_scf: check of spectral correction factor against thresholds.
409 | -   \_wresid: check of mean unrotated *w* (double rotation) or *w*
410 |     residual (planar fit) against thresholds.
411 | -   \_runs: check of runs with repeating values.
412 | -   \_lowcov: check of fluxes too close to zero (assuming issues during
413 |     covariance computation).
414 | -   \_var: check of variances against thresholds.
415 | -   \_LI7200: check of CO2 and H2O signal strength against thresholds.
416 | -   \_interdep: flux interdependency.
417 | -   \_man: manual quality control.
418 | -   \_spikesLF: identification of likely outliers in low frequency data.
419 | -   \_fetch70: check of distance corresponding to [70% signal
420 |     contribution](https://www.licor.com/env/support/EddyPro/topics/estimating-flux-footprint.html)
421 |     against fetch distance for given wind direction.\
422 | -   \_forGF: the composite QC column used to screen fluxes for
423 |     gap-filling combining selected above test/filter results.
424 | 
425 | For details see documentation of `extract_QC()`.
426 | 
427 | `REddyProc` naming strategy is available at [MPI Online Tool
428 | website](https://bgc.iwww.mpg.de/5624929/Output-Format).
429 | 
430 | ## Manual QC guide
431 | 
432 | Theoretically, manual QC using `check_manually()` is introducing
433 | subjectivity to the workflow and should be avoided. However, in
434 | practice, certain events can occur that might be difficult to screen
435 | based on auxiliary data or the tests are not sensitive enough to capture
436 | them. It should be noted that data not falling within the expected range
437 | might represent interesting rare phenomena and should be carefully
438 | investigated before manual removal. The screening typically depends on
439 | the user experience with the site, considering meteo conditions and
440 | phenology. Examples of manually excluded half-hours could be those
441 | affected by precipitation, strong advection and unexpected technical
442 | issues. Neighbors of outlying values or isolated points can be good
443 | candidates for exclusion as they might have escaped the automated
444 | screening. Change of weather fronts can lead to unexpected energy fluxes
445 | that however reflect real conditions. In these conditions it could
446 | depend on the research question whether such cases should be excluded.
447 | 
448 | ## Abbreviations
449 | 
450 | -   EC: Eddy Covariance
451 | -   QC: Quality Control
452 | -   QA: Quality Assurance
453 | -   SA: Sonic Anemometer
454 | -   GA: Gas Analyzer
455 | -   Tau: Momentum flux [kg m-1 s-2]
456 | -   H: Sensible heat flux [W m-2]
457 | -   LE: Latent heat flux [W m-2]
458 | -   NEE: Net ecosystem exchange [umol m-2 s-1]
459 | -   u: Longitudinal wind speed component [m s-1]
460 | -   w: Vertical wind speed component [m s-1]
461 | -   ts: Sonic temperature [degC]
462 | -   h2o: H2O concentration [mmol mol-1]
463 | -   co2: CO2 concentration [umol mol-1]
464 | 
465 | ## References
466 | 
467 | Publication describing `openeddy` is not yet available. When describing
468 | the proposed quality control scheme, please refer to:
469 | 
470 | McGloin, R., Sigut, L., Havrankova, K., Dusek, J., Pavelka, M., Sedlak,
471 | P., 2018. Energy balance closure at a variety of ecosystems in Central
472 | Europe with contrasting topographies. Agric. For. Meteorol. 248,
473 | 418-431. <https://doi.org/10.1016/j.agrformet.2017.10.003>
474 | 
475 | Other references relevant to the applied quality control:
476 | 
477 | Foken, T., Wichura, B., 1996. Tools for quality assessment of
478 | surface-based flux measurements. Agric. For. Meteorol. 78, 83--105.
479 | [https://doi.org/10.1016/0168-1923(95)02248-1](https://doi.org/10.1016/0168-1923(95)02248-1){.uri}
480 | 
481 | Vickers, D. and Mahrt, L., 1997. Quality Control and Flux Sampling
482 | Problems for Tower and Aircraft Data. Journal of Atmospheric and Oceanic
483 | Technology, 14(3), 512-526.
484 | [https://doi.org/10.1175/1520-0426(1997)014\<0512:QCAFSP\>2.0.CO;2](https://doi.org/10.1175/1520-0426(1997)014%3C0512:QCAFSP%3E2.0.CO;2){.uri}
485 | 
486 | Mauder, M., Cuntz, M., Drüe, C., Graf, A., Rebmann, C., Schmid, H.P.,
487 | Schmidt, M., Steinbrecher, R., 2013. A strategy for quality and
488 | uncertainty assessment of long-term eddy-covariance measurements. Agric.
489 | For. Meteorol. 169, 122-135,
490 | <https://doi.org/10.1016/j.agrformet.2012.09.006>
491 | 
492 | The methodology and benchmark of `REddyProc 1.1.3` is described in the
493 | following paper:
494 | 
495 | Wutzler, T., Lucas-Moffat, A., Migliavacca, M., Knauer, J., Sickel, K.,
496 | Šigut, L., Menzer, O., and Reichstein, M. (2018): Basic and extensible
497 | post-processing of eddy covariance flux data with REddyProc,
498 | Biogeosciences, 15, 5015-5030,
499 | <https://doi.org/10.5194/bg-15-5015-2018>.
500 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # Eddy covariance workflow
  5 | 
  6 | ## Overview
  7 | 
  8 | The eddy covariance (EC) workflow demonstrates how to use
  9 | [openeddy](https://github.com/lsigut/openeddy) package together with
 10 | [REddyProc](https://github.com/bgctw/REddyProc) package to facilitate
 11 | automated and reproducible EC data post-processing, supporting Tau, H,
 12 | LE and CO<sub>2</sub> (NEE) fluxes. The presented EC workflow is a set
 13 | of post-processing steps that were applied for a particular cropland
 14 | site
 15 | [CZ-KrP](http://www.europe-fluxdata.eu/home/site-details?id=CZ-KrP). The
 16 | main purpose of EC workflow is to show how to utilize the `openeddy`
 17 | software infrastructure. It is not meant to represent the optimal best
 18 | post-processing workflow, especially concerning the quality control. The
 19 | `openeddy` provides enough flexibility for users to adapt
 20 | post-processing to specifics of their site and will hopefully facilitate
 21 | discussion and exchange of best practices concerning various types of
 22 | ecosystems and EC setups.
 23 | 
 24 | Comments in the workflow files explain how individual steps lead to the
 25 | desired output. If you look for the settings and details about the
 26 | `openeddy` functions, see their respective help files
 27 | (`?function_name`). Notice that there is also a set of interactive
 28 | [openeddy tutorials](https://github.com/lsigut/openeddy_tutorials) that
 29 | provide more context.
 30 | 
 31 | ## Requirements
 32 | 
 33 | The EC workflow is currently aligned with `EddyPro` software
 34 | [output](https://www.licor.com/env/support/EddyPro/topics/output-files-full-output.html).
 35 | It is expected that meteorological data passed its own separate workflow
 36 | (not in the scope of `openeddy`), i.e. they are already converted to
 37 | physical units, underwent quality control and are gap-filled.
 38 | 
 39 | List of expected meteorological variables (see [Naming strategy with the
 40 | EC workflow](#naming-strategy-with-the-ec-workflow)):
 41 | 
 42 | | Recommended setup                        | Minimum setup |
 43 | |:-----------------------------------------|:--------------|
 44 | | GR, PAR, Rn, Tair, Tsoil, VPD (or RH), P | GR, Tair      |
 45 | 
 46 | Gaps in meteorological data are allowed (except for global radiation,
 47 | GR, needed for day/night data separation in `despikeLF()`), but
 48 | `REddyProc` gap filling of meteorological data performs well mostly for
 49 | short gaps. Minimum setup describes bare minimum needed for used
 50 | functions and would require adaptations of the workflow.
 51 | 
 52 | Processing of multiple or incomplete years is supported but it requires
 53 | edits in EC workflow scripts. Note that for data fragments shorter than
 54 | a half year the processing (mainly `REddyProc` gap filling and flux
 55 | partitioning) might not be reliable. `REddyProc` has defined constraints
 56 | that has to be met, otherwise processing will not proceed and
 57 | informative error message should be produced. Note that `REddyProc` is
 58 | meant to fill the gaps within the period when sampling occurs, instead
 59 | of extrapolating data from a short measurement period to a much larger
 60 | period (e.g. you cannot use few months of measurements to obtain annual
 61 | budgets).
 62 | 
 63 | **Adapting workflow for a new site** mainly requires to edit the
 64 | `KRP16_0_settings_2025-04-27.R` file according to provided comments.
 65 | Other workflow files should remain unchanged. Settings edits include
 66 | renaming of meteorological variables to workflow standard (`Met_mapping`
 67 | object) and defining region of interest (ROI) `boundary`. In order to
 68 | run `fetch_filter()`, `QC` workflow requires the ROI `boundary` for
 69 | given site-year. ROI is provided by the user in a form of numeric vector
 70 | (see [ROI boundary](#roi-boundary) section below and
 71 | <https://github.com/lsigut/ROI_boundary>).
 72 | 
 73 | Support for other EC processing
 74 | [software](https://ameriflux.lbl.gov/resources/resource-list/tools-and-software-for-flux-scientists/raw-data-processing-and-qa-qc/)
 75 | (e.g. TK3, EdiRe, EddyUH, EddySoft) is not explicitly provided but
 76 | alternative workflow should be achievable already with the existing
 77 | `openeddy` capabilities. The easiest approach could be to `remap_vars()`
 78 | using the pairing of column names of `EddyPro` full output and the EC
 79 | processing software used. `EddyPro`-specific tests/filters mainly
 80 | present in `extract_QC()` would need to be substituted for their
 81 | alternatives. Similarly, support for CH<sub>4</sub>, N<sub>2</sub>O or
 82 | other trace gases is not planned, however they should be feasible to
 83 | some degree (note that look-up tables might not be optimal for gap
 84 | filling of such fluxes).
 85 | 
 86 | ## Usage
 87 | 
 88 | To run EC workflow for the example site-year `KRP16`:
 89 | 
 90 | Download `KRP16 - before processing.zip` from
 91 | [Zenodo](https://doi.org/10.5281/zenodo.1442530) and unzip. Run workflow
 92 | files in specified order according to instructions there:
 93 | 
 94 | 1.  `WF_1_data_preparation`: formatting and merging inputs.
 95 | 2.  `WF_2_QC`: eddy covariance quality control and storage correction.
 96 | 3.  `WF_3_GF_&_FP`: uStar filtering, gap filling and flux partitioning.
 97 | 4.  `WF_4_Summary`: aggregation and plotting of results.
 98 | 
 99 | For a new site-year, `settings` workflow file should be edited first.
100 | File prefix `KRP16` can be changed to identify related site-year. Date
101 | suffix identifies workflow version and should not be edited. Workflow
102 | file `utilites` is required by the workflow scripts and is not meant for
103 | user interaction.
104 | 
105 | Commands in `WF_1` and `WF_2` should be run one by one, to get better
106 | feedback about problems with data inputs or data quality. Especially
107 | `WF_2` includes interactive function (`check_manually()`) that requires
108 | direct user input. `WF_3` should require minimal user supervision.
109 | `WF_4` can be `source()`d.
110 | 
111 | Note that using `source()` for the QC workflow will not produce desired
112 | outcome if variable `interactive = TRUE` because `check_manually()` will
113 | expect interactive session (manual marking of outliers). Once the manual
114 | QC is finalized and saved, changing to `interactive = FALSE` in
115 | `settings` allows to reproduce the results by sourcing.
116 | 
117 | You can compare your results with those of `KRP16 - processed.zip` at
118 | [Zenodo](https://doi.org/10.5281/zenodo.1442530). Notice that in order
119 | to obtain identical results, you would need to copy the subjective
120 | manual screening done by site PI located at
121 | `.\level_2\quality_checking\KRP16_manual_QC.csv`.
122 | 
123 | ## Description
124 | 
125 | The proposed workflow allows to process eddy covariance data with single
126 | processing chain consisting of four stages:
127 | 
128 | 1.  **Data preparation:** prepare data for QC. Meteo data and EddyPro
129 |     full output files are validated, accordingly formatted, merged and
130 |     saved with documentation. All numeric values are rounded to a
131 |     reasonable precision. Meteo variable names are remapped according to
132 |     the requirements of `openeddy` and `REddyProc` packages.
133 |     `WF_1_data_preparation` produces files at `.\level_1\input_for_qc\`
134 |     folder.
135 | 
136 | 2.  **Quality control:** load the `EddyPro` output and gap-filled
137 |     meteorological data and apply automated tests and filters
138 |     implemented in `openeddy` to quality check fluxes of momentum (Tau),
139 |     sensible (H) and latent heat (LE) and net ecosystem exchange (NEE).
140 |     Perform storage correction of fluxes using discrete (one point)
141 |     storage estimate available in the `EddyPro` full output. While this
142 |     is sufficient for sites with short canopy (e.g. the example cropland
143 |     site `CZ-KrP`), one point approximation is less suitable with
144 |     increasing EC measurement height. Computation of storage flux from
145 |     profile measurements is not in the scope of `openeddy`. Export
146 |     documentation of applied QC and produce the outputs needed in next
147 |     steps. `WF_2_QC` produces files at `.\level_2\quality_checking\` and
148 |     `.\level_2\input_for_gf\` folders.
149 | 
150 | 3.  **Gap filling and flux partitioning:** use `REddyProc` to estimate
151 |     uStar threshold, apply uStar filtering, gap fill (H, LE, NEE) and
152 |     partition (NEE) fluxes. Use `openeddy` to visualize H, LE and NEE
153 |     fluxes. The setup allows to change and document some processing
154 |     options in an organized way. `WF_3_GF_&_FP` produces files at
155 |     `.\level_3\gap_filling\`.
156 | 
157 | 4.  **Summary:** visualize processed data, convert units and aggregate
158 |     results to daily, weekly, monthly and yearly timescales. A limited
159 |     amount of computed parameters is also produced, including different
160 |     uncertainty estimates. `WF_4_Summary` produces files at
161 |     `.\level_3\summary\`.
162 | 
163 | The EC workflow assumes certain folder structure for each site-year that
164 | makes data handling more effective. The folder structure can be created
165 | using `make_paths()` with following content:
166 | 
167 |     site_year
168 |     ├── level_1
169 |     │   ├── input_for_qc
170 |     │   ├── qc_input_eddypro
171 |     │   └── qc_input_meteo
172 |     ├── level_2
173 |     │   ├── input_for_gf
174 |     │   └── quality_checking
175 |     │       ├── precheck
176 |     │       │   └── wd_dependency
177 |     │       └── qc_summary
178 |     └── level_3
179 |         ├── gap_filling
180 |         │   ├── plots
181 |         │   └── ustar_filtering
182 |         └── summary
183 |             └── png
184 | 
185 | - Level 1: half-hourly data processed by `EddyPro` and gap-filled
186 |   meteorological data.
187 | - Level 2: results and documentation of QC, storage corrected fluxes for
188 |   GF & FP.
189 | - Level 3: results of GF & FP and the data set summaries.
190 | 
191 | The complete processing chain in the context of above folder structure
192 | can be summarized as:
193 | 
194 | ![](Processing_chain.jpg)
195 | 
196 | ## QC principles
197 | 
198 | Quality assurance (QA) is always preferred to QC. If faulty instrument
199 | producing spurious measurements can be exchanged, repaired or
200 | calibrated, it is always preferred to simply flagging and removing
201 | affected period. This requires frequent check of instruments, timely
202 | maintenance, well educated technical support and established service
203 | routine or calendar. Therefore QC cannot substitute neglected QA and the
204 | concept “garbage in, garbage out” applies also for EC workflow. QA and
205 | QC can be understood also as a learning process described in the
206 | following diagram.
207 | 
208 | <p align="center">
209 | 
210 | <img src="qa_and_qc.jpg" width="300"/>
211 | 
212 | </p>
213 | 
214 | In `openeddy`, `EddyPro` software serves as a way of input data
215 | standardization. Data frames have defined column names and units with
216 | conserved data formatting. This property is used to easily read the
217 | expected columns, thus the typical input data structure of most
218 | functions is a data frame. Quality control consists of two phases:
219 | 
220 | 1.  obtaining QC filters (filters must be defined and columns containing
221 |     QC flags saved to a data frame)
222 | 
223 | 2.  applying QC filters (either directly by removing flux values or by
224 |     combining all applied filters and assuring that data will be
225 |     interpreted in respect to the combined QC flags).
226 | 
227 | This approach allows to evaluate a complete set of QC filters and select
228 | for application only those with the best flagging efficiency (trade-off
229 | between the count of removed spurious records and the amount of
230 | available records after the QC filter application). Note that such
231 | selected QC scheme can depend also on the type of follow up analysis
232 | (data application). E.g. if the data will be used to compute annual
233 | budgets, outlying values would bias the look-up table statistics when
234 | filling gaps and thus should be removed. On the other hand, if the focus
235 | of following analysis are exceptional fluxes, outlying values should be
236 | kept and analyzed.
237 | 
238 | <p align="center">
239 | 
240 | <img src="qc_application.jpg" width="400"/>
241 | 
242 | </p>
243 | 
244 | The application of QC filters depends on whether the applied filters are
245 | independent (most of the QC filters; QC flags are interpreted
246 | independently on their order as maximum value is taken) or additive
247 | (*wresid* and *interdep* filters; they serve as flag corrections and
248 | thus the outcome depends on their position within the QC workflow).
249 | 
250 | <p align="center">
251 | 
252 | <img src="combn_QC.jpg" width="400"/>
253 | 
254 | </p>
255 | 
256 | ### Flagging scheme
257 | 
258 | - flag 0 – high quality
259 | - flag 1 – minor issues
260 | - flag 2 – major issues
261 | 
262 | Due to the strict testing withing the QC workflow, both flag 0 and flag
263 | 1 data are suggested to be used for fundamental research. Only flag 2
264 | (low data quality) is meant to be discarded. Note that if QC filter A
265 | and filter B both flag 25% of data with flag 2, the fraction of excluded
266 | data is typically lower than 50%. This is because the averaging periods
267 | with major issues are often flagged by multiple filters.
268 | 
269 | ## ROI boundary
270 | 
271 | The outline delimiting the spatial extent of the studied ecosystem
272 | (region of interest; ROI) is specified by its ROI boundary that
273 | describes the distance from EC tower to the edge of the studied
274 | ecosystem for given wind direction. In order to work with `openeddy`,
275 | ROI boundary has to be provided as a numeric vector with following
276 | properties:
277 | 
278 | - The number of circular sectors is the same as the number of provided
279 |   distances (length of the vector).
280 | - The angular resolution of the ROI boundary is given by
281 |   `360° / number of angular sectors`.
282 | - The ROI boundary distances are assigned to the centers of their
283 |   respective circular sectors with first sector centered on 0°.
284 | 
285 | ### ROI boundary example
286 | 
287 | <p align="center">
288 | 
289 | ![](ROI%20boundary%20example.jpg)
290 | 
291 | </p>
292 | 
293 | In this simplified case ROI boundary would be specified as:
294 | 
295 | ``` r
296 | c(150, 200, 250, 300)
297 | ```
298 | 
299 | **Interpretation:**
300 | 
301 | - There would be 4 circular sectors with 90° angular resolution.
302 | - ROI boundary is specified for the whole first sector (315°, 45°\] at
303 |   the distance 150 m from tower (center of the sector is 0°).
304 | - Boundary of the second sector (45°, 135°\] is at the distance 200 m.
305 | - Third sector (135°, 225°\] is at the distance 250 m.
306 | - Fourth sector (225°, 315°\] is at the distance 300 m.
307 | 
308 | Realistic representation of ROI boundary can look e.g. like this:
309 | 
310 | ![](CZ-Krp_fetch.png)
311 | 
312 | ## Change of EC system or EddyPro settings
313 | 
314 | Functions `extract_QC(..., filters = c("missfrac", "wresid"))` and
315 | `interdep()` require information about EC or EddyPro setup. For
316 | *missfrac* filter it is the number of records in averaging period (e.g.
317 | 36 000 for half-hourly period with 20 Hz measurements), for *wresid* it
318 | is the coordinate rotation type (double or planar fit rotation), for
319 | *interdep* it is the infrared gas analyzer type (either open path or
320 | (en)closed path). These specifications can be provided as single values
321 | if the properties did not change throughout the data set. If the
322 | properties changed (typically when merging multiple EddyPro files from
323 | extended period), user can provide additional columns (“max_records”,
324 | “used_rotation”, “used_IRGA”) by `label_periods()` to specify these
325 | properties for each averaging period (see relevant function help files:
326 | `?extract_QC`, `?interdep`, `?label_periods`). For the columns to be
327 | recognized, respective changes need to be made also in QC workflow (see
328 | the description there).
329 | 
330 | ## Naming strategy with the EC workflow
331 | 
332 | In order to take advantage of `openeddy` default arguments, certain
333 | naming strategy is recommended.
334 | 
335 | `EddyPro` full output [variable
336 | names](https://www.licor.com/env/support/EddyPro/topics/output-files-full-output.html)
337 | are used with a few modifications if symbols were included in the
338 | variable name (e.g. Monin-Obukhov stability parameter `(z-d)/L` is
339 | corrected to `zeta`).
340 | 
341 | Expected names of meteorological variables are due to historical
342 | reasons:
343 | 
344 | - GR: global radiation \[W m-2\]
345 | - PAR: photosynthetically active radiation \[umol m-2 s-1\]
346 | - Rn: net radiation \[W m-2\]
347 | - Tair: air temperature at EC height \[degC\]
348 | - Tsoil: soil temperature at soil surface \[degC\]
349 | - RH: relative humidity at EC height \[%\]
350 | - VPD: vapor pressure deficit at EC height \[hPa\]
351 | - P: precipitation \[mm\]
352 | 
353 | `openeddy` offers full flexibility concerning QC column names. However,
354 | in order to avoid QC column duplication and to partly document the type
355 | of QC test/filter and flux that it corresponds to, following naming
356 | strategy was devised:
357 | 
358 | ### QC prefixes
359 | 
360 | They specify which flux is affected by given QC column:
361 | 
362 | - qc_Tau\_, qc_H, qc_LE, qc_NEE: only applicable for the respective
363 |   fluxes.
364 | - qc_SA\_: applicable to fluxes relying only on sonic (Tau, H).
365 | - qc_GA\_: applicable to fluxes relying on GA (LE, NEE); only GA issues
366 |   considered.
367 | - qc_SAGA\_: applicable to fluxes relying both on SA and GA (LE, NEE);
368 |   both SA and GA issues considered.
369 | - qc_ALL\_: applicable to all fluxes (in practice often not applied to
370 |   Tau).
371 | 
372 | ### QC suffixes
373 | 
374 | They specify which QC test/filter was applied to get the QC flags:
375 | 
376 | - \_SSITC: steady state test and test of integral turbulence
377 |   characteristics.
378 | - \_spikesHF: check of [high frequency data spike
379 |   percentage](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Despiking)
380 |   in averaging period against thresholds.
381 | - \_ampres: check of [amplitude
382 |   resolution](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Amplituderesolution)
383 |   in the recorded data.
384 | - \_dropout: check of
385 |   [drop-outs](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Dropouts),
386 |   i.e. situations when the time series stays for “too long” on a value
387 |   that is far from the mean.
388 | - \_abslim: check of [absolute
389 |   limits](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Absolutelimits)
390 |   when raw data are out of plausible range.
391 | - \_skewkurt_sf, \_skewkurt_hf, \_skewkurt: check of [skewness and
392 |   kurtosis](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Skewnessandkurtosis)
393 |   limits.
394 | - \_discont_sf, \_discont_hf, \_discont: check of
395 |   [discontinuities](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Discontinuities)
396 |   that lead to semi-permanent changes in the time series.
397 | - \_timelag_sf, \_timelag_hf, \_timelag: check of estimated
398 |   [timelags](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Timelags)
399 |   compared to the expected timelags.
400 | - \_attangle: check of [angle of
401 |   attack](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Angleofattack).
402 | - \_nonsteady: check of [steadiness of horizontal
403 |   wind](https://www.licor.com/env/support/EddyPro/topics/despiking-raw-statistical-screening.html#Steadinessofhorizontalwind).
404 | - \_missfrac: check of missing data in averaging period against
405 |   thresholds.
406 | - \_scf: check of spectral correction factor against thresholds.
407 | - \_wresid: check of mean unrotated *w* (double rotation) or *w*
408 |   residual (planar fit) against thresholds.
409 | - \_runs: check of runs with repeating values.
410 | - \_lowcov: check of fluxes too close to zero (assuming issues during
411 |   covariance computation).
412 | - \_var: check of variances against thresholds.
413 | - \_LI7200: check of CO2 and H2O signal strength against thresholds.
414 | - \_interdep: flux interdependency.
415 | - \_man: manual quality control.
416 | - \_spikesLF: identification of likely outliers in low frequency data.
417 | - \_fetch70: check of distance corresponding to [70% signal
418 |   contribution](https://www.licor.com/env/support/EddyPro/topics/estimating-flux-footprint.html)
419 |   against fetch distance for given wind direction.  
420 | - \_forGF: the composite QC column used to screen fluxes for gap-filling
421 |   combining selected above test/filter results.
422 | 
423 | For details see documentation of `extract_QC()`.
424 | 
425 | `REddyProc` naming strategy is available at [MPI Online Tool
426 | website](https://bgc.iwww.mpg.de/5624929/Output-Format).
427 | 
428 | ## Manual QC guide
429 | 
430 | Theoretically, manual QC using `check_manually()` is introducing
431 | subjectivity to the workflow and should be avoided. However, in
432 | practice, certain events can occur that might be difficult to screen
433 | based on auxiliary data or the tests are not sensitive enough to capture
434 | them. It should be noted that data not falling within the expected range
435 | might represent interesting rare phenomena and should be carefully
436 | investigated before manual removal. The screening typically depends on
437 | the user experience with the site, considering meteo conditions and
438 | phenology. Examples of manually excluded half-hours could be those
439 | affected by precipitation, strong advection and unexpected technical
440 | issues. Neighbors of outlying values or isolated points can be good
441 | candidates for exclusion as they might have escaped the automated
442 | screening. Change of weather fronts can lead to unexpected energy fluxes
443 | that however reflect real conditions. In these conditions it could
444 | depend on the research question whether such cases should be excluded.
445 | 
446 | ## Abbreviations
447 | 
448 | - EC: Eddy Covariance
449 | - QC: Quality Control
450 | - QA: Quality Assurance
451 | - SA: Sonic Anemometer
452 | - GA: Gas Analyzer
453 | - Tau: Momentum flux \[kg m-1 s-2\]
454 | - H: Sensible heat flux \[W m-2\]
455 | - LE: Latent heat flux \[W m-2\]
456 | - NEE: Net ecosystem exchange \[umol m-2 s-1\]
457 | - u: Longitudinal wind speed component \[m s-1\]
458 | - w: Vertical wind speed component \[m s-1\]
459 | - ts: Sonic temperature \[degC\]
460 | - h2o: H2O concentration \[mmol mol-1\]
461 | - co2: CO2 concentration \[umol mol-1\]
462 | 
463 | ## References
464 | 
465 | Publication describing `openeddy` is not yet available. When describing
466 | the proposed quality control scheme, please refer to:
467 | 
468 | McGloin, R., Sigut, L., Havrankova, K., Dusek, J., Pavelka, M., Sedlak,
469 | P., 2018. Energy balance closure at a variety of ecosystems in Central
470 | Europe with contrasting topographies. Agric. For. Meteorol. 248,
471 | 418-431. <https://doi.org/10.1016/j.agrformet.2017.10.003>
472 | 
473 | Other references relevant to the applied quality control:
474 | 
475 | Foken, T., Wichura, B., 1996. Tools for quality assessment of
476 | surface-based flux measurements. Agric. For. Meteorol. 78, 83–105.
477 | <https://doi.org/10.1016/0168-1923(95)02248-1>
478 | 
479 | Vickers, D. and Mahrt, L., 1997. Quality Control and Flux Sampling
480 | Problems for Tower and Aircraft Data. Journal of Atmospheric and Oceanic
481 | Technology, 14(3), 512-526.
482 | <https://doi.org/10.1175/1520-0426(1997)014%3C0512:QCAFSP%3E2.0.CO;2>
483 | 
484 | Mauder, M., Cuntz, M., Drüe, C., Graf, A., Rebmann, C., Schmid, H.P.,
485 | Schmidt, M., Steinbrecher, R., 2013. A strategy for quality and
486 | uncertainty assessment of long-term eddy-covariance measurements. Agric.
487 | For. Meteorol. 169, 122-135,
488 | <https://doi.org/10.1016/j.agrformet.2012.09.006>
489 | 
490 | The methodology and benchmark of `REddyProc 1.1.3` is described in the
491 | following paper:
492 | 
493 | Wutzler, T., Lucas-Moffat, A., Migliavacca, M., Knauer, J., Sickel, K.,
494 | Šigut, L., Menzer, O., and Reichstein, M. (2018): Basic and extensible
495 | post-processing of eddy covariance flux data with REddyProc,
496 | Biogeosciences, 15, 5015-5030,
497 | <https://doi.org/10.5194/bg-15-5015-2018>.
498 | 


--------------------------------------------------------------------------------
/ROI boundary example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsigut/EC_workflow/59a347c3a73695bffb7726a05da4e42d5aea890f/ROI boundary example.jpg


--------------------------------------------------------------------------------
/combn_QC.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsigut/EC_workflow/59a347c3a73695bffb7726a05da4e42d5aea890f/combn_QC.jpg


--------------------------------------------------------------------------------
/qa_and_qc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsigut/EC_workflow/59a347c3a73695bffb7726a05da4e42d5aea890f/qa_and_qc.jpg


--------------------------------------------------------------------------------
/qc_application.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsigut/EC_workflow/59a347c3a73695bffb7726a05da4e42d5aea890f/qc_application.jpg


--------------------------------------------------------------------------------