├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── author_name.R
    ├── collapse_01.R
    ├── dag_manage.R
    ├── data_dict.R
    ├── development
    │   ├── upload_file_same.R
    │   ├── upload_file_unique.R
    │   └── upload_record_new.R
    ├── example_data.R
    ├── group2csv.R
    ├── orcid_name.R
    ├── orcid_valid.R
    ├── redcap_compare.R
    ├── redcap_data.R
    ├── redcap_format_repeat.R
    ├── redcap_log.R
    ├── redcap_metadata.R
    ├── redcap_sum.R
    ├── report_auth.R
    ├── report_miss.R
    ├── retired
    │   └── user_roles.R
    ├── user_import.R
    ├── user_manage.R
    ├── user_role.R
    └── user_summarise.R
├── README.md
├── collaborator.Rproj
├── data
    ├── example_data_dict.rda
    ├── example_df_user.rda
    └── example_report_author.rda
├── man
    ├── author_name.Rd
    ├── collapse01.Rd
    ├── dag_manage.Rd
    ├── data_dict.Rd
    ├── example_data_dict.Rd
    ├── example_df_user.Rd
    ├── example_report_author.Rd
    ├── group2csv.Rd
    ├── orcid_name.Rd
    ├── orcid_valid.Rd
    ├── redcap_compare.Rd
    ├── redcap_data.Rd
    ├── redcap_format_repeat.Rd
    ├── redcap_log.Rd
    ├── redcap_metadata.Rd
    ├── redcap_sum.Rd
    ├── report_auth.Rd
    ├── report_miss.Rd
    ├── user_import.Rd
    ├── user_manage.Rd
    ├── user_role.Rd
    └── user_summarise.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   └── test_collaborator_all.R
└── vignettes
    ├── figures
        ├── collaborator_logo.png
        └── rmd_email.png
    ├── render.R
    ├── vignette_authors.Rmd
    ├── vignette_authors.md
    ├── vignette_data.Rmd
    ├── vignette_data.md
    ├── vignette_email.Rmd
    ├── vignette_email.md
    ├── vignette_email_body.Rmd
    ├── vignette_missing.Rmd
    ├── vignette_missing.md
    ├── vignette_summary.Rmd
    ├── vignette_summary.md
    ├── vignette_user.Rmd
    └── vignette_user.md


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^data-raw$
4 | ^packrat/
5 | ^\.Rprofile$
6 | ^\.httr-oauth$
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | inst/doc
 5 | packrat/lib*/
 6 | .httr-oauth
 7 | R/development
 8 | R/retired
 9 | admin/
10 | vignettes/folder_csv
11 | presentation
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: r
2 | cache: packages
3 | warnings_are_errors: false
4 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: collaborator
 2 | Title: Scalable multi-centre research using R and REDCap
 3 | Version: 0.0.0.9000
 4 | Authors@R: as.person(c(
 5 |     "Kenneth McLean <v1kmcle6@ed.ac.uk> [aut, cre]", 
 6 |     "Riinu Ots <r.ots@ed.ac.uk> [aut]",
 7 |     "Tom Drake <t.drake@ed.ac.uk> [aut]",
 8 |     "Ewen Harrison <ewen.harrison@ed.ac.uk> [aut]"))
 9 | Description: A cluster of functions to help facilitate administration of
10 |     multi-centre research using R and REDCap (Research Electronic Data Capture). It
11 |     allows automatic allocation of users within a REDCap project to data access groups
12 |     with appropriate user rights, and supplies several functions to support
13 |     this. It also contains functions which allow the easy creation of summaries, reports
14 |     (e.g. of missing data), authorship lists, and sharable data dictionaries
15 | Depends: R (>= 3.3.1)
16 | Imports: dplyr,
17 |   gdata,
18 |   gmailr,
19 |   here,
20 |   jsonlite,
21 |   lubridate,
22 |   magrittr,
23 |   purrr,
24 |   httr,
25 |   readr,
26 |   rmarkdown,
27 |   rvest,
28 |   scales,
29 |   stats,
30 |   stringi,
31 |   stringr,
32 |   tibble,
33 |   tidyr,
34 |   tidyselect,
35 |   xml2,
36 |   zip
37 | License: GPL-3
38 | Encoding: UTF-8
39 | LazyData: true
40 | RoxygenNote: 7.3.2
41 | Suggests: knitr,
42 |     rmarkdown,
43 |     testthat
44 | VignetteBuilder: knitr
45 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(author_name)
 4 | export(collapse01)
 5 | export(dag_manage)
 6 | export(data_dict)
 7 | export(group2csv)
 8 | export(orcid_name)
 9 | export(orcid_valid)
10 | export(redcap_compare)
11 | export(redcap_data)
12 | export(redcap_format_repeat)
13 | export(redcap_log)
14 | export(redcap_metadata)
15 | export(redcap_sum)
16 | export(report_auth)
17 | export(report_miss)
18 | export(user_import)
19 | export(user_manage)
20 | export(user_role)
21 | export(user_summarise)
22 | import(dplyr)
23 | import(here)
24 | import(readr)
25 | import(stringr)
26 | import(tibble)
27 | import(tidyr)
28 | import(xml2)
29 | importFrom(Hmisc,capitalize)
30 | importFrom(furrr,future_map)
31 | importFrom(furrr,future_map_dfr)
32 | importFrom(httr,GET)
33 | importFrom(httr,POST)
34 | importFrom(httr,content)
35 | importFrom(lubridate,as_date)
36 | importFrom(lubridate,as_datetime)
37 | importFrom(lubridate,day)
38 | importFrom(lubridate,is.Date)
39 | importFrom(lubridate,month)
40 | importFrom(lubridate,origin)
41 | importFrom(lubridate,year)
42 | importFrom(lubridate,ymd)
43 | importFrom(purrr,is_empty)
44 | importFrom(purrr,map)
45 | importFrom(purrr,map_chr)
46 | importFrom(purrr,map_df)
47 | importFrom(readr,read_csv)
48 | importFrom(readr,read_file)
49 | importFrom(readr,write_csv)
50 | importFrom(readr,write_file)
51 | importFrom(scales,percent)
52 | importFrom(stats,median)
53 | importFrom(stringi,stri_replace_all_fixed)
54 | importFrom(stringr,str_detect)
55 | importFrom(stringr,str_remove_all)
56 | importFrom(stringr,str_split)
57 | importFrom(stringr,str_split_fixed)
58 | importFrom(stringr,str_sub)
59 | importFrom(stringr,str_to_title)
60 | importFrom(tibble,enframe)
61 | importFrom(tibble,tibble)
62 | importFrom(tidyr,drop_na)
63 | importFrom(tidyr,pivot_wider)
64 | importFrom(tidyr,separate)
65 | importFrom(tidyr,separate_rows)
66 | importFrom(tidyselect,all_of)
67 | 


--------------------------------------------------------------------------------
/R/author_name.R:
--------------------------------------------------------------------------------
 1 | # author_name--------------------------------
 2 | # Documentation
 3 | #' Pull first name(s) and last name for a given list of orcid
 4 | #' @description Pull and format first name(s) and last name for a given list of orcid
 5 | #' @param data datafame containing a vectors of the author name (split into first_name and last_name)
 6 | #' @param first_name Column name of vector containing the first and middle name(s) (default = "first_name")
 7 | #' @param last_name Column name of vector containing the last or family name (default = "last_name")
 8 | #' @param initial Should the first / middle name(s) be converted to initial (default = TRUE)
 9 | #' @param position initial to "left" or "right" of last name (default = "right")
10 | #' @param initial_max Maximum number of digits (default = 3)
11 | #' @return Vector of the combined name composing the full author name in the requested format.
12 | #' @import dplyr
13 | #' @importFrom purrr map_chr
14 | #' @importFrom stringr str_sub str_split str_to_title
15 | #' @importFrom Hmisc capitalize
16 | #' @export
17 | 
18 | author_name <- function(data, first_name = "first_name", last_name = "last_name", initial=T, position = "right", initial_max=3){
19 |   require(dplyr);require(purrr); require(stringr); require(Hmisc)
20 | 
21 |   output <- data %>%
22 |     dplyr::mutate_at(vars(first_name, last_name),
23 |                      function(x){purrr::map_chr(x,
24 |                                              function(y){ifelse(stringr::str_detect(y, "^[[:upper:][:space:][:punct:]]+$"),
25 |                                                                 stringr::str_replace_all(y,"^[[:alpha:]]+|[[:space:][:punct:]][[:alpha:]]+",
26 |                                                                                          stringr::str_to_title),y)})}) %>%
27 |     dplyr::mutate(first_name = dplyr::pull(., first_name),
28 |                   last_name = dplyr::pull(., last_name)) %>%
29 |     dplyr::mutate(last_name = Hmisc::capitalize(last_name)) %>%
30 |     dplyr::mutate(initial_name = purrr::map_chr(stringr::str_split(first_name," "),
31 |                                                 function(x){stringr::str_sub(x,1,1) %>% paste(collapse = "")}))%>%
32 |     dplyr::mutate(initial_name = toupper(initial_name),
33 |                   name_yn = ifelse(is.na(first_name)==T|is.na(last_name)==T, "No", "Yes"))
34 | 
35 |   if(initial==T&position == "right"){output <- output %>% dplyr::mutate(author_name = ifelse(name_yn=="Yes", paste0(last_name, " ", initial_name), NA))}
36 |   if(initial==F&position == "right"){output <- output %>% dplyr::mutate(author_name = ifelse(name_yn=="Yes", paste0(last_name, " ", first_name), NA))}
37 |   if(initial==T&position == "left"){output <- output %>% dplyr::mutate(author_name = ifelse(name_yn=="Yes", paste0(initial_name, " ", last_name), NA))}
38 |   if(initial==F&position == "left"){output <- output %>% dplyr::mutate(author_name = ifelse(name_yn=="Yes", paste0(first_name, " ", last_name), NA))}
39 | 
40 |   return(output %>% select(-initial_name, -name_yn, -first_name, -last_name))}
41 | 


--------------------------------------------------------------------------------
/R/collapse_01.R:
--------------------------------------------------------------------------------
 1 | # collapse01--------------------------------
 2 | 
 3 | # Documentation
 4 | #' Collapse a group of binary variables (e.g. those generated by checkbox variables in REDCap) into summary columns.
 5 | #' @description Collapse a group of binary variables (e.g. those generated by checkbox variables in REDCap) into summary columns.
 6 | #' @param df Dataframe.
 7 | #' @param cols List of columnn names of binary variables desired to be collapsed.
 8 | #' @param prefix String to add to the start of all summary columns names.
 9 | #' @param suffix String to add to the end of all summary columns names.
10 | #' @param remove Logical value to remove columns supplied to the "cols" parameter
11 | #' @param output List of desired outputs: yesno (any values selected), n (number of values selected) and list (list of column names of all values selected).
12 | #' @param binary0 List of all values corresponding to "0" (No) in the binary variable (default = 0 or no)
13 | #' @param binary1 List of all values corresponding to "1" (Yes) in the binary variable (default = 1 or yes).
14 | #' @return Dataframe with up to 3 additional columns: yesno (any values selected), n (number of values selected) and list (list of column names of all values selected).
15 | #' @import dplyr
16 | #' @import tibble
17 | #' @import tidyr
18 | #' @importFrom tidyselect all_of
19 | #' @export
20 | 
21 | # Function
22 | collapse01 <- function(df, cols, prefix=NULL, suffix = NULL,
23 |                        output = c("list", "yesno", "n"), remove = TRUE,
24 |                        binary0 = c(0, "0", "No", "no"), binary1 = c(1, "1", "Yes", "yes")){
25 | 
26 |   require(tibble);require(dplyr);require(tidyr);require(tidyselect)
27 |   #clean df
28 |   out <- df %>%
29 |     dplyr::select_at(tidyselect::all_of(cols)) %>%
30 |     dplyr::mutate_all(function(x){ifelse(x %in% binary0, NA, x)}) %>%
31 |     tibble::rowid_to_column() %>%
32 | 
33 |     # wranging to get colname inserted (inefficent)
34 |     tidyr::pivot_longer(cols = -rowid) %>%
35 |     dplyr::mutate(value = ifelse(value %in% binary1, name, value)) %>%
36 |     tidyr::pivot_wider(id_cols = rowid) %>%
37 | 
38 |     # create columns of interest (yesno, n, and list)
39 |     tidyr::unite(col = "list", -rowid, na.rm = T, sep = ", ") %>%
40 |     dplyr::mutate(list = ifelse(list=="", NA, list)) %>%
41 |     dplyr::mutate(n = stringr::str_count(list, ", ")+1) %>%
42 |     dplyr::mutate(n = ifelse(is.na(n)==T, 0, n)) %>%
43 |     dplyr::mutate(yesno = factor(ifelse(n==0, "No", "Yes"), levels = c("No", "Yes"))) %>%
44 |     dplyr::select(all_of(output)) %>%
45 | 
46 |     dplyr::rename_all(function(x){paste0(if(is.null(prefix)==T){""}else{prefix},
47 |                                          x,
48 |                                          if(is.null(suffix)==T){""}else{suffix})})
49 | 
50 |   out <- tibble::as_tibble(dplyr::bind_cols(df, out))
51 | 
52 |   if(remove==TRUE){out <- out %>% dplyr::select(-any_of(cols))}
53 | 
54 |   return(out)}
55 | 


--------------------------------------------------------------------------------
/R/dag_manage.R:
--------------------------------------------------------------------------------
 1 | # redcap_sum--------------------------------
 2 | # Documentation
 3 | #' Generate REDCap summary data.
 4 | #' @description Used to generate high quality summary data for REDCap projects at overall, and DAG-specific level.
 5 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
 6 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
 7 | #' @param import A list of new DAGs to import into the project (Default = NULL).
 8 | #' @param remove A list of current DAGs to delete in the project (Default = NULL).
 9 | #' @return A dataframe of DAGs specifiying those which are new, deleted, or unchanged (-).
10 | #' @import dplyr
11 | #' @importFrom tibble enframe
12 | #' @importFrom httr POST content
13 | #' @importFrom readr read_csv
14 | #' @export
15 | 
16 | # Function:
17 | dag_manage <- function(redcap_project_uri, redcap_project_token,
18 |                        import = NULL, remove = NULL){
19 |   # Load required functions
20 |   require(dplyr);require(httr);require(tibble)
21 | 
22 |   # Allow flexible use of either just a unique name or also allow label of dag
23 |   # Stop if unique_group_name > 18 characters
24 | 
25 |   dag_current <- httr::POST(url=redcap_project_uri,
26 |                             body = list("token"= redcap_project_token, content='dag',
27 |                                         action='export', format='csv')) %>%
28 |     httr::content(show_col_types = FALSE)
29 | 
30 |   if(is.null(import)==F){
31 |   if(TRUE %in% c(nchar(import)>18)){
32 |     stop(paste0("Please ensure <18 characters for all unique_group_name: ",
33 |                 paste0(import[nchar(import)>18], collapse = ", ")))}
34 | 
35 |   # Export and format existing dags (including if none already exist)
36 | 
37 | 
38 |   dag_import <- import %>%
39 |     tibble::enframe(name = NULL, value = "data_access_group_name") %>%
40 |     dplyr::mutate(data_access_group_name =  iconv(data_access_group_name, to ="ASCII//TRANSLIT")) %>%
41 |     dplyr::anti_join(dag_current,by = "data_access_group_name")
42 | 
43 | 
44 |   # Import DAGs into project
45 |   if(nrow(dag_import) > 0){
46 | 
47 |   dag_import <- dag_import %>%
48 |     dplyr::mutate(data_access_group_name = paste0('{"data_access_group_name":"',
49 |                                                   data_access_group_name, '","unique_group_name":""}')) %>%
50 |     dplyr::summarise(data_access_group_name = paste0(data_access_group_name, collapse = ", ")) %>%
51 |     dplyr::mutate(data_access_group_name = paste0("[", data_access_group_name, "]")) %>%
52 |     dplyr::pull(data_access_group_name)
53 | 
54 | 
55 |   httr::POST(url=redcap_project_uri,
56 |              body = list("token"= redcap_project_token, data= dag_import,
57 |                          content='dag',action='import',
58 |                          format='json',returnFormat='json'), encode = "form")}}
59 | 
60 |   if(is.null(remove)==F){
61 | 
62 | 
63 | 
64 |     dag_remove <- remove %>%
65 |       tibble::enframe(name = NULL, value = "dags") %>%
66 |       dplyr::mutate(n =  1:n()-1) %>%
67 |       dplyr::mutate(out = paste0("'dags[", n, "]'='",dags, "'")) %>%
68 |       dplyr::summarise(dags = paste0(out, collapse = ",")) %>%
69 |       dplyr::pull(dags)
70 | 
71 | 
72 |     eval(parse(text = paste0("httr::POST(url=redcap_project_uri,
73 |                body = list('token'= redcap_project_token, data= dag_import,
74 |                            content='dag',action='delete',",
75 |                              dag_remove,
76 |                              ", returnFormat='json'), encode = 'form')")))}
77 | 
78 | 
79 |   dag_updated <- httr::POST(url=redcap_project_uri,
80 |                             body = list("token"= redcap_project_token, content='dag',
81 |                                         action='export', format='csv')) %>%
82 |     httr::content(show_col_types = FALSE)
83 | 
84 | out <- full_join(dag_current %>% mutate("status"  = 1),
85 |                  dag_updated %>% mutate("status"  = 1),
86 |                  by = c("data_access_group_name", "unique_group_name")) %>%
87 |   dplyr::mutate(status = case_when(is.na(status.x)==T&is.na(status.y)==F ~ "added",
88 |                                    is.na(status.x)==F&is.na(status.y)==T ~ "removed",
89 |                                    TRUE ~ "-")) %>%
90 |   dplyr::select(-status.x, -status.y)
91 | 
92 | return(out)}
93 | 


--------------------------------------------------------------------------------
/R/data_dict.R:
--------------------------------------------------------------------------------
  1 | # data_dict--------------------------------
  2 | 
  3 | # Documentation
  4 | #' Generate a data dictionary.
  5 | #' @description Used to generate an easily sharable data dictionary for an R dataframe. This supports the following classes: numeric, integer, logical, Date, character, String, factor, ordered.
  6 | #' @param df Dataframe.
  7 | #' @param var_exclude Vector of names of variables that are desired to be excluded from the data dictionary (default: NULL).
  8 | #' @param var_include Vector of names of variables that are desired to be included in the data dictionary (default: NULL).
  9 | #' @param label Logical value (default = FALSE). If TRUE, then include the variable label for each variable (if assigned).
 10 | #' @return Dataframe with 4 columns: variable (variable name), class (variable class), na_pct (the percentage of data which is NA for that variable), and value (an appropriate summary for the variable class).
 11 | #' @import dplyr
 12 | #' @import tibble
 13 | #' @import tidyr
 14 | #' @importFrom purrr map
 15 | #' @importFrom lubridate ymd origin is.Date
 16 | #' @importFrom stats median
 17 | #' @export
 18 | 
 19 | # Function:
 20 | data_dict <- function(df, var_include = NULL, var_exclude=NULL, label = FALSE){
 21 |   require(dplyr);require(purrr);require(tibble);require(tidyr);require(lubridate);require(stats)
 22 | 
 23 |   if(is.null(var_exclude)==F){df <- df %>% dplyr::select(-one_of(var_exclude))}
 24 | 
 25 |   if(is.null(var_include)==F){df <- df %>% dplyr::select(all_of(var_include))}
 26 | 
 27 |   dict <- df %>%
 28 |     purrr::map(function(x){class(x) %>%
 29 |         paste(collapse="") %>%
 30 |         gsub("labelled", "", .)}) %>%
 31 |     tibble::enframe(name ="variable", value = "class") %>%
 32 |     dplyr::mutate(n_na = purrr::map(df, function(x){is.na(x) %>% sum()})) %>%
 33 |     tidyr::unnest(cols = c(class, n_na)) %>%
 34 |     dplyr::mutate(na_pct = paste0(format(round(n_na / nrow(df) *100, 1), nsmall=1), "%"))
 35 | 
 36 |   # Create numeric values
 37 |   value_num   <- NULL
 38 |   if(nrow(dplyr::filter(dict, class=="numeric"|class=="integer"))>0){
 39 |     value_num <- df %>%
 40 |       dplyr::select_if(function(x){is.numeric(x)|is.integer(x)}) %>%
 41 |       tidyr::pivot_longer(cols = everything(), names_to = "variable") %>%
 42 |       dplyr::group_split(variable) %>%
 43 |       purrr::map(function(x){x %>% dplyr::summarise(variable = unique(variable),
 44 |                                                     mean = mean(value, na.rm = T) %>% signif(3),
 45 |                                                     median = stats::median(value, na.rm = T) %>% signif(3),
 46 |                                                     min = suppressWarnings(min(value, na.rm = T)) %>% signif(3),
 47 |                                                     max = suppressWarnings(max(value, na.rm = T)) %>% signif(3))}) %>%
 48 |       dplyr::bind_rows() %>%
 49 |       dplyr::mutate(value = paste0("Mean: ", mean,"; Median: ",median, "; Range: ", min, " to ", max)) %>%
 50 |       dplyr::select(variable, value)}
 51 | 
 52 |   # Create date values
 53 |   value_date   <- NULL
 54 |   if(nrow(dplyr::filter(dict, class=="Date"))>0){
 55 |      value_date <- df %>%
 56 |        dplyr::select_if(function(x){lubridate::is.Date(x)}) %>%
 57 |        tidyr::pivot_longer(cols = everything(), names_to = "variable") %>%
 58 |        dplyr::group_split(variable) %>%
 59 |        purrr::map(function(x){x %>% dplyr::summarise(variable = unique(variable),
 60 |                                                      min = suppressWarnings(min(value, na.rm = T)),
 61 |                                                      max = suppressWarnings(max(value, na.rm = T)))}) %>%
 62 |        dplyr::bind_rows() %>%
 63 |        dplyr::mutate(value = paste0("Range: ", min, " to ", max)) %>%
 64 |        dplyr::select(variable, value)}
 65 | 
 66 |   # Create logical values
 67 |   value_logic   <- NULL
 68 |   if(nrow(dplyr::filter(dict, class=="logical"))>0){
 69 |     value_logic <- df %>%
 70 |       dplyr::select_if(function(x){is.logical(x)}) %>%
 71 |       tidyr::pivot_longer(cols = everything(), names_to = "variable") %>%
 72 |       dplyr::group_split(variable) %>%
 73 |       purrr::map(function(x){x %>% dplyr::summarise(variable = unique(variable),
 74 |                                                     value = paste(head(value, 10), collapse = ", "))}) %>%
 75 |       dplyr::bind_rows() %>%
 76 |       dplyr::select(variable, value)}
 77 | 
 78 |   # Create character values
 79 |   value_char   <- NULL
 80 |   if(nrow(dplyr::filter(dict, class=="character"|class=="String"))>0){
 81 |     value_char <- df %>%
 82 |       dplyr::select_if(function(x){is.character(x)}) %>%
 83 |       tidyr::pivot_longer(cols = everything(), names_to = "variable") %>%
 84 |       dplyr::group_split(variable) %>%
 85 |       purrr::map(function(x){x %>% dplyr::summarise(variable = unique(variable),
 86 |                                                     n_unique = length(unique(value)),
 87 |                                                     value = paste(head(unique(value), 10), collapse = ", "))}) %>%
 88 |       dplyr::bind_rows() %>%
 89 |       dplyr::mutate(value = paste0(n_unique, " Unique: ",value)) %>%
 90 |       dplyr::select(variable, value)}
 91 | 
 92 |   # Create factor values
 93 |   value_factor   <- NULL; var_factor <- NULL
 94 |   if(nrow(dplyr::filter(dict, class=="factor"|class=="orderedfactor"))>0){
 95 |     var_factor <- df %>%
 96 |       dplyr::select_if(function(x){is.factor(x)|is.ordered(x)})
 97 | 
 98 |     value_factor <- var_factor %>%
 99 |       purrr::map(function(x){tibble::tibble("n_levels" = nlevels(x),
100 |                                             "levels" = paste0(levels(x) %>% head(10), collapse = ", "))}) %>%
101 |       dplyr::bind_rows() %>%
102 |       dplyr::mutate(variable = colnames(var_factor),
103 |                     value = paste0(n_levels, " Levels: ",levels)) %>%
104 |       dplyr::select(variable, value)}
105 | 
106 | 
107 |   class_supported <- c("factor", "character", "String", "Date", "numeric", "logical", "orderedfactor")
108 | 
109 |   dict_full <- dplyr::bind_rows(value_factor, value_char, value_logic, value_date, value_num) %>%
110 |     dplyr::left_join(dict, by = "variable") %>%
111 |     dplyr::mutate(value = ifelse(class %in% class_supported, value, "Class not supported")) %>%
112 |     dplyr::mutate(variable = factor(variable, levels = colnames(df))) %>%
113 |     dplyr::arrange(variable) %>% dplyr::mutate(variable = as.character(variable)) %>%
114 |     dplyr::select(variable, class, value, na_pct)
115 | 
116 |   if(label ==TRUE){
117 |     extract_labels = function(.data){
118 |       # Struggled to make this work and look elegant!
119 |       # Works but surely there is a better way.
120 |       df.out = lapply(.data, function(x) {
121 |         vlabel = attr(x, "label")
122 |         list(vlabel = vlabel)}) %>%
123 |         do.call(rbind, .)
124 |       df.out = data.frame(vname = rownames(df.out), vlabel = unlist(as.character(df.out)),
125 |                           stringsAsFactors = FALSE)
126 |       df.out$vfill = df.out$vlabel
127 |       df.out$vfill[df.out$vlabel == "NULL"] = df.out$vname[df.out$vlabel=="NULL"]
128 |       return(df.out)}
129 | 
130 |     dict_full <- df %>%
131 |       extract_labels() %>%
132 |       tibble::as_tibble() %>%
133 |       dplyr::select("variable" = vname, "label" = vfill) %>%
134 |       dplyr::right_join(dict_full, by = "variable") %>%
135 |       dplyr::select(variable, label, everything())}
136 | 
137 | 
138 | 
139 |   return(dict_full)}
140 | 


--------------------------------------------------------------------------------
/R/development/upload_file_same.R:
--------------------------------------------------------------------------------
 1 | # upload_file_same-----------------------
 2 | # Documentation
 3 | #' Upload the same file to all records
 4 | #' @description Use to upload the same file to all records in record_id
 5 | 
 6 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
 7 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
 8 | #' @param record_id Vector of record ids which the file should be uploaded to.
 9 | #' @param var_upload The field name of the upload location
10 | #' @param name_file The location of the file to be uploaded to all records in "record_id".
11 | #' @import dplyr
12 | #' @importFrom purrr imap_dfr
13 | #' @importFrom REDCapR redcap_upload_file_oneshot
14 | #' @return Tibble of all records which the file could not be uploaded to.
15 | #' @export
16 | 
17 | # if you want to upload the same file to all
18 | upload_file_same <- function(redcap_project_uri,
19 |                              redcap_project_token,
20 |                              record_id,var_upload,
21 |                              name_file){
22 | 
23 | 
24 | 
25 |   outcome <- purrr::imap_dfr(record_id,
26 |                              ~ REDCapR::redcap_upload_file_oneshot(file_name = name_file,
27 |                                                                    record = record_id[.y],
28 |                                                                    redcap_uri = redcap_project_uri,
29 |                                                                    token = redcap_project_token,
30 |                                                                    field = var_upload))
31 |   outcome <- outcome %>%
32 |     dplyr::select(success, "record_id" = affected_ids, "error message" = raw_text) %>%
33 |     dplyr::filter(success==FALSE)
34 | 
35 |   return(outcome)}
36 | 


--------------------------------------------------------------------------------
/R/development/upload_file_unique.R:
--------------------------------------------------------------------------------
 1 | # upload_file_unique-----------------------
 2 | # Documentation
 3 | #' Upload the same file to all records
 4 | #' @description Use to upload the same file to all records in record_id
 5 | 
 6 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
 7 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
 8 | #' @param record_id Vector of record ids which the file should be uploaded to.
 9 | #' @param var_upload The field name of the upload location
10 | #' @param name_folder The location of the folder with files to be uploaded to all records in "record_id".
11 | #' @param name_file_suffix Each file must start with the appropriate record_id and end with a file extension (".csv", ".txt", etc). Further characters can be incorported if desired before the file extention (e.g. "_result.txt")
12 | #' @import dplyr
13 | #' @importFrom purrr imap_dfr
14 | #' @importFrom REDCapR redcap_upload_file_oneshot
15 | #' @return Tibble of all records which the file could not be uploaded to
16 | #' @export
17 | 
18 | 
19 | 
20 | upload_file_unique <- function(redcap_project_uri,
21 |                                redcap_project_token,
22 |                                record_id, var_upload,
23 |                                name_folder, name_file_suffix){
24 | 
25 |   outcome <- purrr::imap_dfr(record_id,
26 |                              ~ REDCapR::redcap_upload_file_oneshot(file_name = paste0(name_folder, record_id[.y], name_file_suffix),
27 |                                                                    record = record_id[.y],
28 |                                                                    redcap_uri = redcap_project_uri,
29 |                                                                    token = redcap_project_token,
30 |                                                                    field = var_upload))
31 | 
32 |   outcome <- outcome %>%
33 |     dplyr::select(success, "record_id" = affected_ids, "error message" = raw_text) %>%
34 |     dplyr::filter(success==FALSE)
35 | 
36 |   return(outcome)}
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/R/development/upload_record_new.R:
--------------------------------------------------------------------------------
 1 | # upload_record_new-----------------------
 2 | # Documentation
 3 | #' Upload new records to REDCap project
 4 | #' @description Use to upload new records to REDCap project
 5 | 
 6 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
 7 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
 8 | #' @param df_new Dataframe of record_id (mandatory), and other columns (optional) that match the names / required formats of the original dataset. Columns not specified will be uploaded as blank.
 9 | #' @import dplyr
10 | #' @importFrom REDCapR redcap_read_oneshot redcap_write_oneshot
11 | #' @return REDCap project updated to include new record_id.
12 | #' @export
13 | 
14 | upload_record_new <- function(redcap_project_uri,
15 |                               redcap_project_token,
16 |                               df_new){
17 | 
18 |   "%ni%" <- Negate("%in%")
19 | 
20 |   # Get original dataset
21 |   df_original <- REDCapR::redcap_read_oneshot(redcap_uri=redcap_project_uri,
22 |                                               token = redcap_project_token,
23 |                                               export_data_access_groups = TRUE)$data
24 | 
25 | 
26 |   # Ensure not overriding existing record_id (must be new)
27 |   df_new <- dplyr::filter(df_new, record_id %ni% df_original$record_id)
28 | 
29 |   # Create new (clean) records
30 |   df_clean <- df_original %>%
31 |     dplyr::select(-(one_of(names(df_new)))) %>%
32 |     head(nrow(df_new)) %>%
33 |     dplyr::mutate_at(vars(ends_with("_complete")), function(x){x=0}) %>%
34 |     dplyr::mutate_at(vars(-ends_with("_complete")), function(x){x=NA})
35 | 
36 |   # New dataset (same column order as original dataset - for merge)
37 |   df_new_upload <- dplyr::bind_cols(df_new,df_clean) %>%
38 |     dplyr::select(names(df_original))
39 | 
40 |   # Upload updated dataset
41 |   REDCapR::redcap_write_oneshot(ds = rbind.data.frame(df_original, df_new_upload),
42 |                                 redcap_uri = redcap_project_uri,
43 |                                 token = redcap_project_token)}
44 | 


--------------------------------------------------------------------------------
/R/example_data.R:
--------------------------------------------------------------------------------
 1 | # Documentation of datasets --------------------------------
 2 | # example_data_dict
 3 | #' @docType data
 4 | #' @title Example dataset of surgical patients
 5 | #' @name example_data_dict
 6 | #' @aliases example_data_dict
 7 | #' @description Example dataset to be used within the \href{https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_data_dict.md}{Generating a Simple, Easily-Shareable Data Dictionary} vignette (all data randomly generated).
 8 | #' @format A data frame with 20 rows and 11 variables.
 9 | #' @usage data(example_data_dict)
10 | #' @keywords data
11 | NULL
12 | 
13 | # example_report_author
14 | #' @docType data
15 | #' @title Example dataset of authors
16 | #' @name example_report_author
17 | #' @aliases example_report_author
18 | #' @description Example dataset to be used within the \href{https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_authors.md}{Generating Authorship Lists} vignette (all data randomly generated).
19 | #' @format A data frame with 100 rows and 3 variables.
20 | #' @usage data(example_report_author)
21 | #' @keywords data
22 | NULL
23 | 
24 | 
25 | # example_df_user
26 | #' @docType data
27 | #' @title Example dataset of REDCap users
28 | #' @name example_df_user
29 | #' @aliases example_df_user
30 | #' @description Example dataset to be used within the \href{https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_user_1_explore.md}{Redcap User Management: 1. Explore Current Users} vignette (all data randomly generated).
31 | #' @format A data frame with 34 rows and 2 variables.
32 | #' @usage data(example_df_user)
33 | #' @keywords data
34 | NULL
35 | 


--------------------------------------------------------------------------------
/R/group2csv.R:
--------------------------------------------------------------------------------
 1 | # group2csv------------------------------
 2 | # Documentation
 3 | #' Split a tibble/dataframe into CSV by "group" variable
 4 | #' @description Split a tibble/dataframe by "group" variable, then save in a subfolder as CSV.
 5 | #' @param data Dataframe with at least 1 column - corresponding to a "group".
 6 | #' @param group Grouping variable (must be unique values) who will recieve unique email.
 7 | #' @param subfolder Folder within working directory (e.g. string entered into here::here()) where CSV will be stored. Default = "folder_csv".
 8 | #' @param file_prefix String to be prefixed to "group" when naming CSV file.
 9 | #' @param file_suffix String to be suffixed to "group" when naming CSV file.
10 | #' @return Dataframe of group AND csv file path ("file").
11 | #' @import dplyr
12 | #' @import readr
13 | #' @import here
14 | #' @import tibble
15 | #' @importFrom purrr map
16 | #' @export
17 | 
18 | # Function
19 | group2csv <- function(data, group, subfolder = "folder_csv", file_prefix = "", file_suffix = ""){
20 | 
21 |   dir.create(subfolder, showWarnings = FALSE)
22 | 
23 |   data <- data %>%
24 |     dplyr::mutate(group = dplyr::pull(., group)) %>%
25 |     dplyr::filter(is.na(group)==F)
26 | 
27 |   data_group <- data %>%
28 |     dplyr::group_by(group) %>%
29 |     dplyr::summarise(file = paste0(subfolder, "/", file_prefix, unique(group), file_suffix, ".csv"))
30 | 
31 | 
32 |   data %>%
33 |     # Split by group into separate datasets
34 |     dplyr::group_split(group) %>%
35 | 
36 |     purrr::map(function(x){readr::write_csv(x = x,
37 |                                             path = paste0(subfolder, "/",
38 |                                                           file_prefix, unique(x$group),
39 |                                                           file_suffix, ".csv"))})
40 | 
41 |   return(data_group)}
42 | 
43 | 


--------------------------------------------------------------------------------
/R/orcid_name.R:
--------------------------------------------------------------------------------
 1 | # orcid_name--------------------------------
 2 | # Documentation
 3 | #' Pull first name(s) and last name for a given list of orcid
 4 | #' @description Pull and format first name(s) and last name for a given list of orcid
 5 | #' @param data datafame containing a vector of ORCID (XXXX-XXXX-XXXX-XXXX format)
 6 | #' @param orcid Column name of vector containing ORCID (default = "orcid")
 7 | #' @param reason Logical value to determine whether output should include reasons for NA values (default = FALSE) or vector of ORCID (TRUE).
 8 | #' @param na.rm Remove NA (invalid ORCID) from output (default = TRUE)
 9 | #' @return Dataframe with 5 mandatory columns: orcid, full name, first name(s), last name, publication name.
10 | #' @import dplyr
11 | #' @import tidyr
12 | #' @import tibble
13 | #' @import xml2
14 | #' @import stringr
15 | #' @importFrom furrr future_map future_map_dfr
16 | #' @importFrom httr GET content
17 | #' @export
18 | 
19 | orcid_name <- function(data, orcid = "orcid", reason = FALSE, na.rm = FALSE){
20 |   # Packages / Functions
21 |   require(dplyr);require(furrr);require(httr);require(dplyr);
22 |   require(tibble);require(tidyr);require(stringr);library(magrittr)
23 | 
24 |   data <- data %>%
25 |     dplyr::mutate(orcid = pull(., orcid)) %>%
26 |     dplyr::select(-starts_with("orcid_check_"),-starts_with("orcid_valid"), -starts_with("orcid_name_")) %>%
27 |     collaborator::orcid_valid(orcid = orcid, reason = T, na.rm = F)
28 | 
29 |   # Check extract name from valid orcids
30 |   input_orcid <- data %>%
31 |     dplyr::filter(is.na(orcid_valid)==F) %>%
32 |     dplyr::distinct(orcid_valid) %>%
33 |     dplyr::pull(orcid_valid)
34 | 
35 |   # https://info.orcid.org/documentation/api-tutorials/api-tutorial-searching-the-orcid-registry/
36 |   output <- input_orcid %>%
37 |     furrr::future_map(function(x){
38 |       tryCatch(
39 |         httr::GET(url = paste0("https://pub.orcid.org/v3.0/",
40 |                                x,
41 |                                "/personal-details")) %>%
42 |           httr::content(as = "parsed", encoding = "UTF-8"),
43 | 
44 | 
45 |         error = function(e){NA})}) %>%
46 |     furrr::future_map_dfr(function(x){
47 | 
48 |       if(is.null(x$name$path)==T){out <- tibble::tibble(orcid_check_access = "No",
49 |                                                         "orcid_name_first" = NA,
50 |                                                         "orcid_name_last" = NA,
51 |                                                         "orcid_name_credit" = NA)}
52 | 
53 |       if(is.null(x$name$path)==F){out <-  tibble::tibble(orcid = x$name$path,
54 |                                                          orcid_check_access = "Yes",
55 |                                                          "orcid_name_first" = ifelse(is.null(x$name$`given-names`$value)==T,
56 |                                                                                      NA, x$name$`given-names`$value),
57 |                                                          "orcid_name_last" = ifelse(is.null(x$name$`family-name`$value)==T,
58 |                                                                                     NA, x$name$`family-name`$value),
59 |                                                          "orcid_name_credit" = ifelse(is.null(x$name$`credit-name`$value)==T,
60 |                                                                                       NA, x$name$`credit-name`$value))}}) %>%
61 |     dplyr::mutate(orcid_name_credit = ifelse(is.na(orcid_name_credit)==T&(is.na(orcid_name_first)==F&is.na(orcid_name_last)==F),
62 |                                              paste0(orcid_name_first, " ", orcid_name_last), orcid_name_credit),
63 |                   orcid_name_credit_first = stringr::str_remove(orcid_name_credit, orcid_name_last) %>% stringr::str_trim())
64 | 
65 | 
66 |   final <- data %>%
67 |     dplyr::select(-any_of(names(output)[! names(output) %in% c("orcid")])) %>%
68 |     dplyr::left_join(output, by = c("orcid_valid"="orcid")) %>%
69 |     dplyr::select(orcid, everything())  %>%
70 |     dplyr::mutate(orcid_check_access = ifelse(is.na(orcid_check_access)==T, "No", orcid_check_access)) %>%
71 | 
72 |     # Check if name extracted
73 |     dplyr::mutate(orcid_check_name = ifelse(is.na(orcid_name_first)==T|is.na(orcid_name_last)==T, "No", "Yes")) %>%
74 |     dplyr::mutate(orcid_valid_yn = ifelse(orcid_valid_yn=="Yes"&orcid_check_name=="Yes"&orcid_check_access=="Yes",
75 |                                           "Yes", "No")) %>%
76 |     dplyr::mutate(orcid_valid = ifelse(orcid_valid_yn=="No", NA, orcid_valid)) %>%
77 |     dplyr::mutate(orcid_valid_reason = ifelse(is.na(orcid_valid_reason)==T&orcid_check_name=="No",
78 |                                               "No first or last name recorded", orcid_valid_reason),
79 |                   orcid_valid_reason = ifelse(is.na(orcid_check_access)==T&orcid_check_access=="No",
80 |                                               "Unable to access orcid record", orcid_valid_reason)) %>%
81 |     dplyr::select(any_of(names(data)[stringr::str_detect(names(data), "orcid")==F]),
82 |                   orcid, orcid_valid, starts_with("orcid_name_"), starts_with("orcid_valid_"), starts_with("orcid_check_"))
83 | 
84 |   if(na.rm==T){final <- final %>% dplyr::filter(is.na(orcid_name_first)==F&is.na(orcid_name_last)==F)}
85 | 
86 |   if(na.rm==F&reason==F){final <- final %>% dplyr::select(-starts_with("orcid_check_"),-starts_with("orcid_valid_"))}
87 | 
88 |   return(final)}
89 | 


--------------------------------------------------------------------------------
/R/orcid_valid.R:
--------------------------------------------------------------------------------
 1 | # orcid_valid--------------------------------
 2 | # Documentation
 3 | #' Validate vector of ORCID
 4 | #' @description Validate vector of ORCID based number of digits / format / checksum.
 5 | #' @param data datafame containing a vector of ORCID
 6 | #' @param orcid Column name of vector containing ORCID (default = "orcid")
 7 | #' @param reason Logical value to determine whether output should include reasons for validity (default = FALSE) or vector of ORCID (TRUE).
 8 | #' @param na.rm Remove NA (invalid ORCID) from output
 9 | #' @return Vector of orcid (reason = FALSE) or tibble with columns specifying the validation checks failed by the ORCID ("check_" columns)
10 | #' @import dplyr
11 | #' @importFrom tibble enframe
12 | #' @importFrom tidyr separate drop_na
13 | #' @importFrom stringr str_sub str_remove_all
14 | #' @export
15 | 
16 | orcid_valid <- function(data, orcid = "orcid", reason = FALSE, na.rm = FALSE){
17 |   require(dplyr);require(tibble);require(tidyr);require(stringr)
18 |   check_numeric <- function(x){suppressWarnings(is.numeric(as.numeric(x))==T&is.na(as.numeric(x))==F)}
19 | 
20 |   # https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier.
21 |   out <- data %>%
22 |     dplyr::mutate(orcid = dplyr::pull(., orcid)) %>%
23 |     dplyr::select(orcid) %>%
24 |     tibble::rowid_to_column(var = "name") %>%
25 |     dplyr::mutate(orcid_raw = stringr::str_remove_all(orcid, pattern = "[[:punct:]]") %>%
26 |                     stringr::str_trim() %>% toupper(),
27 |                   check_present = ifelse(is.na(orcid)==F, "Yes", "No")) %>%
28 |     dplyr::mutate(check_length = ifelse(is.na(orcid_raw)==F&nchar(orcid_raw)==16, "Yes", "No")) %>%
29 |     dplyr::mutate(digit15 = ifelse(check_length=="Yes", stringr::str_sub(orcid_raw,1,15), NA),
30 |                   digit16 = ifelse(check_length=="Yes", stringr::str_sub(orcid_raw,16,16), NA)) %>%
31 |     dplyr::mutate(check_format = ifelse(check_numeric(digit15)==T&(check_numeric(digit16)==T|digit16=="X"), "Yes", "No")) %>%
32 |     dplyr::select(name, orcid_raw, "orcid_original" = orcid, check_present, check_length, check_format)
33 | 
34 |   orcid_valid <- out %>%
35 |     dplyr::filter_at(vars(contains("check_")), all_vars(.=="Yes")) %>%
36 |     dplyr::select(name, orcid_raw) %>%
37 | 
38 |     #separate each number
39 |     dplyr::mutate(orcid = gsub('(?=(?:.{1})+$)', "-", orcid_raw, perl = TRUE) %>%
40 |                     stringr::str_sub(2, nchar(.))) %>%
41 |     tidyr::separate(col = orcid, into = paste0("orcid_", seq(1:16)), sep ="-", remove =F) %>%
42 |     dplyr::mutate_at(vars(orcid_1:orcid_15), as.numeric) %>%
43 |     dplyr::mutate(check_digit =  orcid_1*2,
44 |                   check_digit = (orcid_2+check_digit)*2,
45 |                   check_digit = (orcid_3+check_digit)*2,
46 |                   check_digit = (orcid_4+check_digit)*2,
47 |                   check_digit = (orcid_5+check_digit)*2,
48 |                   check_digit = (orcid_6+check_digit)*2,
49 |                   check_digit = (orcid_7+check_digit)*2,
50 |                   check_digit = (orcid_8+check_digit)*2,
51 |                   check_digit = (orcid_9+check_digit)*2,
52 |                   check_digit = (orcid_10+check_digit)*2,
53 |                   check_digit = (orcid_11+check_digit)*2,
54 |                   check_digit = (orcid_12+check_digit)*2,
55 |                   check_digit = (orcid_13+check_digit)*2,
56 |                   check_digit = (orcid_14+check_digit)*2,
57 |                   check_digit = (orcid_15+check_digit)*2) %>%
58 |     dplyr::mutate(check_digit = check_digit %% 11) %>% # %% == remainder
59 |     dplyr::mutate(check_digit = (12-check_digit) %% 11) %>%
60 |     dplyr::mutate(check_digit = ifelse(check_digit==10, "X", ifelse(check_digit==11, 0, check_digit))) %>%
61 |     dplyr::mutate(check_sum = ifelse(as.character(check_digit)==as.character(orcid_16), "Yes", "No")) %>%
62 |     dplyr::select(orcid_raw, check_sum) %>%
63 |     dplyr::distinct()
64 | 
65 |   out <- out %>%
66 |     dplyr::left_join(orcid_valid, by = "orcid_raw") %>%
67 |     dplyr::mutate_at(vars(contains("check_")), function(x){ifelse(is.na(x)==T, "No", x)}) %>%
68 |     dplyr::mutate(valid_yn = ifelse(check_present=="Yes"&check_length=="Yes"&check_format=="Yes"&check_sum=="Yes",
69 |                                     "Yes", "No"),
70 |                   orcid_valid = ifelse(valid_yn=="Yes",orcid_raw, NA)) %>%
71 |     dplyr::mutate(reason = ifelse(check_present=="No", "Missing ORCID", ""),
72 |                   reason = ifelse(check_present=="Yes"&check_length=="No",
73 |                                        paste0(reason,", Not 16 characters"), reason),
74 |                   reason = ifelse(check_present=="Yes"&check_format=="No",
75 |                                        paste0(reason,", Not ORCID format"), reason),
76 |                   reason = ifelse(check_present=="Yes"&check_sum=="No",
77 |                                        paste0(reason,", Failed checksum"), reason)) %>%
78 |     dplyr::mutate(reason = stringr::str_remove(reason, "^, ")) %>%
79 |     dplyr::mutate(reason = ifelse(reason =="", NA, reason)) %>%
80 | 
81 |     dplyr::mutate(orcid_valid = gsub('(?=(?:.{4})+$)', "-", orcid_valid, perl = TRUE) %>% stringr::str_sub(2, nchar(.))) %>%
82 |     dplyr::select("orcid" = orcid_original,
83 |                   "orcid_valid_yn" = valid_yn, orcid_valid, "orcid_valid_reason" = reason,
84 |                   starts_with("check_")) %>%
85 |     dplyr::rename_at(vars(starts_with("check_")), function(x){stringr::str_replace(x, "check_", "orcid_check_")})
86 | 
87 |   data_out <- dplyr::bind_cols(data, dplyr::select(out, -orcid))
88 | 
89 |   if(na.rm==T){data_out <- data_out %>% dplyr::filter(is.na(orcid_valid)==F)}
90 | 
91 |   if(reason==F){data_out <- data_out %>% dplyr::select(-starts_with("orcid_check_"), -orcid_valid_yn, -orcid_valid_reason)}
92 | 
93 |   return(data_out)}
94 | 


--------------------------------------------------------------------------------
/R/redcap_compare.R:
--------------------------------------------------------------------------------
  1 | # redcap_compare--------------------------------
  2 | # Documentation
  3 | #' Compare multiple REDCap projects to determine discrepancies in structure or user rights.
  4 | #' @description Used to compare multiple REDCap projects to determine discrepancies in structure or user rights.
  5 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance (must all be on same REDCap instance)
  6 | #' @param redcap_token_list List of API (Application Programming Interface) for the REDCap projects.
  7 | #' @param comparison What should be compared - project structure ("metadata") or user roles ("role").
  8 | #' @return Nested tibble of the full comparison across projects ("full") and the specific discrepancies ("discrepancies").
  9 | #' @import dplyr
 10 | #' @import tibble
 11 | #' @importFrom httr POST content
 12 | #' @importFrom purrr map_chr map_df is_empty map
 13 | #' @importFrom readr read_csv
 14 | #' @importFrom stringr str_split
 15 | #' @export
 16 | 
 17 | redcap_compare <- function(redcap_project_uri, redcap_token_list, comparison){
 18 | 
 19 | 
 20 |   output <- NULL
 21 |   if(comparison=="metadata"){
 22 |     metadata <- redcap_token_list %>%
 23 |       purrr::map_df(function(x){
 24 | 
 25 |         title = httr::POST(url = redcap_project_uri,
 26 |                            body = list("token"=x,
 27 |                                        content='project',
 28 |                                        format='csv',
 29 |                                        returnFormat='json'), encode = "form") %>%
 30 |           httr::content(type = "text/csv",show_col_types = FALSE,
 31 |                         guess_max = 100000, encoding = "UTF-8") %>%
 32 |           dplyr::pull(project_title)
 33 | 
 34 |         out <- collaborator::redcap_metadata(redcap_project_uri = redcap_project_uri, redcap_project_token = x) %>%
 35 |           dplyr::mutate(project = title) %>%
 36 |           dplyr::select(project, everything())
 37 | 
 38 | 
 39 |         return(out)}) %>%
 40 |       dplyr::mutate(project = factor(project),
 41 |                     variable_name = factor(variable_name, levels = unique(variable_name)))
 42 | 
 43 |     meta_constant <- c("project", "n")
 44 | 
 45 | 
 46 |     full <- metadata %>%
 47 |       dplyr::group_by(variable_name, variable_label, variable_type,
 48 |                       variable_validation, variable_validation_min, variable_validation_max,
 49 |                       factor_level, factor_label,
 50 |                       branch_logic) %>%
 51 |       dplyr::summarise(n = n(),
 52 |                        project = paste0(unique(project), collapse = ", "),
 53 |                        .groups = "drop") %>%
 54 |       dplyr::select(all_of(meta_constant), everything())
 55 | 
 56 |     keep_col <- full %>%
 57 |       dplyr::filter(n!=length(redcap_token_list)) %>%
 58 |       purrr::map_chr(function(x){length(unique(x)) > 1}) %>% tibble::enframe() %>% filter(value ==T) %>%
 59 |       dplyr::pull(name)
 60 | 
 61 |     if(purrr::is_empty(keep_col)){keep_col <- NULL}else{keep_col <- c("variable_name", keep_col)}
 62 | 
 63 |     discrepancies <- full %>%
 64 |       dplyr::filter(n!=length(redcap_token_list)) %>%
 65 |       dplyr::select(all_of(meta_constant), dplyr::all_of(c(keep_col)))
 66 | 
 67 |     output <- list("full" = full %>% dplyr::filter(n==length(redcap_token_list)),
 68 |                    "discrepancies" =discrepancies)}
 69 | 
 70 | 
 71 |   if(comparison=="role"){
 72 |     userdata <- redcap_token_list %>%
 73 |       purrr::map_df(function(x){
 74 | 
 75 |         title = httr::POST(url = redcap_project_uri,
 76 |                            body = list("token"=x,
 77 |                                        content='project',
 78 |                                        format='csv',
 79 |                                        returnFormat='json'), encode = "form") %>%
 80 |           httr::content(type = "text/csv",show_col_types = FALSE,
 81 |                         guess_max = 100000, encoding = "UTF-8") %>%
 82 |           dplyr::pull(project_title)
 83 | 
 84 |         out <- user_role(redcap_project_uri = redcap_project_uri, redcap_project_token =x,
 85 |                          show_rights = T, remove_id = T)$sum %>%
 86 |           dplyr::mutate(project = title) %>%
 87 | 
 88 |           dplyr::select(project, role_name, "user_n" = n, user_name = "username", everything())
 89 | 
 90 |         return(out)}) %>%
 91 |       dplyr::mutate(project = factor(project))
 92 | 
 93 |     user_constant <- c("project", "role_name", "user_n", "user_name")
 94 | 
 95 |     full <- userdata %>%
 96 |       group_by(across(starts_with("right_"))) %>%
 97 |       dplyr::summarise(n = n(),
 98 |                        project = list(as.character(unique(project))),
 99 |                        role_name = list(as.character(unique(role_name))),
100 |                        user_name = unique(user_name),
101 |                        .groups = "drop") %>%
102 |       dplyr::mutate(user_n = purrr::map_chr(user_name, function(x){length(x)}),
103 |                     user_name = purrr::map(user_name, function(x){unique(x) %>% paste0(collapse = "; ")}),
104 |                     project = purrr::map(project, function(x){unique(x) %>% paste0(collapse = "; ")}),
105 |                     role_name = purrr::map(role_name, function(x){unique(x) %>% paste0(collapse = "; ")})) %>%
106 |       dplyr::select(any_of(user_constant), everything())
107 | 
108 |     keep_col <- full %>%
109 |       dplyr::filter(n!=length(redcap_token_list)) %>%
110 |       purrr::map_chr(function(x){length(unique(x)) > 1}) %>% tibble::enframe() %>% filter(value ==T) %>%
111 |       dplyr::pull(name)
112 | 
113 |     discrepancies <- full %>%
114 |       dplyr::filter(n!=length(redcap_token_list)) %>%
115 |       dplyr::select(all_of(user_constant), dplyr::all_of(keep_col))
116 | 
117 |     output <- list("full" = full)}
118 | 
119 | 
120 |   return(output)}
121 | 


--------------------------------------------------------------------------------
/R/redcap_format_repeat.R:
--------------------------------------------------------------------------------
 1 | # redcap_format_repeat--------------------------------
 2 | # Documentation
 3 | #' Change structure of repeating data
 4 | #' @description Change structure of repeating data from redcap_data from long to either list or wide.
 5 | #' @param data Output from redcap_data$data
 6 | #' @param format The format the repeating instrument data should be provided in. Options include "long" (default), "wide" (each instance a separate column), or "list" (nested instances).
 7 | #' @return Dataframe
 8 | #' @import dplyr
 9 | #' @import tidyr
10 | #' @export
11 | 
12 | # Function:
13 | 
14 | redcap_format_repeat <- function(data, format = "long"){
15 | 
16 |   if(("redcap_repeat_instance" %in% names(data))==F){stop("Must contain repeating instruments.")}
17 | 
18 | 
19 |   if(format=="long"){output <- data}
20 | 
21 |   if(format%in% c("list", "wide")){
22 | 
23 |     data_repeat <- data %>%
24 |       dplyr::select(record_id, redcap_repeat_instance:last_col())
25 | 
26 |     data_norepeat <- data %>%
27 |       dplyr::select(-all_of(names(data_repeat)[! (names(data_repeat) %in% "record_id")]))
28 | 
29 |     if(format=="list"){
30 |       data_repeat <- data_repeat %>%
31 |         dplyr::group_by(record_id) %>%
32 |         dplyr::summarise(across(everything(), function(x){list(x)}))}
33 | 
34 |     if(format=="wide"){
35 |       data_repeat <- data_repeat %>%
36 |         dplyr::group_by(record_id) %>%
37 |         tidyr::pivot_wider(id_cols = "record_id", names_from = "redcap_repeat_instance",
38 |                            values_from = names(data_repeat)[! (names(data_repeat) %in% c("record_id", "redcap_repeat_instance"))],
39 |                            names_prefix = "instance")}
40 | 
41 |     output <- data_norepeat %>%
42 |       dplyr::distinct() %>%
43 |       left_join(data_repeat, by = c("record_id"))}
44 | 
45 |   return(output)}
46 | 


--------------------------------------------------------------------------------
/R/redcap_log.R:
--------------------------------------------------------------------------------
 1 | # redcap_log--------------------------------
 2 | # Documentation
 3 | #' Export the redcap log
 4 | #' @description Export the redcap log to gain insight into redcap changes over time
 5 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
 6 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
 7 | #' @param date_start Limit to the start date extracted in the format of YYYY-MM-DD (default = NULL)
 8 | #' @param date_end  Limit to the end date extracted in the format of YYYY-MM-DD (default = NULL)
 9 | #' @param item A list of all types of events wanted to be exported (default = NULL aka all). These can include "record", "user", "page_view","lock_record", "manage", "record_add", "record_edit", "record_delete", "export".
10 | #' @return Logging record of the specified events
11 | #' @import dplyr
12 | #' @importFrom purrr map_df
13 | #' @importFrom lubridate day month year origin
14 | #' @importFrom httr POST content
15 | #' @importFrom readr read_csv
16 | #' @export
17 | #'
18 | 
19 | redcap_log <- function(redcap_project_uri, redcap_project_token,
20 |                        date_start = NULL, date_end = NULL,
21 |                        item = NULL){
22 | 
23 |   if(is.null(date_start)==T){date_start=''}else{date_start = paste0(date_start, " 00:00")}
24 |   if(is.null(date_end)==T){date_end=''}else{date_end = paste0(date_end, " 23:59")}
25 |   if(is.null(item)==T){item=''}
26 | 
27 | 
28 |   df_log <- item %>%
29 |     purrr::map_df(function(x){
30 |       httr::POST(url = redcap_project_uri,
31 |                  body = list("token"=redcap_project_token, content='log',
32 |                              logtype=x,
33 |                              user='', record='',
34 |                              beginTime = date_start, endTime = date_end,
35 |                              format='csv', returnFormat='json'),
36 | 
37 |                  encode = "form") %>%
38 |         httr::content(type = "text/csv",show_col_types = FALSE,
39 |                       guess_max = 100000, encoding = "UTF-8")})
40 | 
41 |   return(df_log)}
42 | 


--------------------------------------------------------------------------------
/R/redcap_metadata.R:
--------------------------------------------------------------------------------
  1 | # redcap_metadata--------------------------------
  2 | # Documentation
  3 | #' Export REDCap metadata (with individual checkbox variables if present) and variable class in R.
  4 | #' @description Used to generate high quality summary data for REDCap projects at overall, and DAG-specific level.
  5 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
  6 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
  7 | #' @param descriptive Logical value whether to include descriptive fields within the dataset (default = FALSE)
  8 | #' @return Tibble of REDCap project metadata (with individual checkbox variables if present) and variable class in R.
  9 | #' @import dplyr
 10 | #' @importFrom httr POST content
 11 | #' @importFrom tidyr separate_rows
 12 | #' @importFrom purrr map
 13 | #' @importFrom stringr str_split_fixed
 14 | #' @importFrom stringi stri_replace_all_fixed
 15 | #'
 16 | #' @export
 17 | 
 18 | redcap_metadata <- function(redcap_project_uri, redcap_project_token, descriptive = FALSE){
 19 |   require(dplyr); require(httr); require(tidyr); require(stringr); require(purrr); require(stringi)
 20 | 
 21 |   if(descriptive==F){var_descriptive <- NULL}
 22 | 
 23 |   df_meta <- httr::POST(url = redcap_project_uri,
 24 |                         body = list("token"=redcap_project_token,
 25 |                                     content='metadata',
 26 |                                     action='export',
 27 |                                     format='csv',
 28 |                                     type='flat',
 29 |                                     returnFormat='json'),
 30 |                         encode = "form") %>%
 31 |     httr::content(type = "text/csv",show_col_types = FALSE,
 32 |                   guess_max = 100000, encoding = "UTF-8") %>%
 33 |     dplyr::select(form_name, "matrix_name" = matrix_group_name, "variable_name" = field_name, "variable_type" = field_type,
 34 |                   "variable_validation" = text_validation_type_or_show_slider_number,
 35 |                   "variable_validation_min" = text_validation_min, "variable_validation_max" = text_validation_max,
 36 |                   "branch_logic" = branching_logic, "variable_identifier" = identifier,"variable_required" =required_field,
 37 |                   "variable_label" = field_label, select_choices_or_calculations) %>%
 38 | 
 39 |     # remove any html coding from text
 40 |     dplyr::mutate(variable_label = ifelse(variable_type=="descriptive", variable_label, gsub("<.*?>", "", variable_label))) %>%
 41 |     dplyr::mutate(variable_identifier = ifelse(variable_identifier=="y"&is.na(variable_identifier)==F, "Yes", "No"),
 42 |                   variable_required = ifelse(variable_required=="y"&is.na(variable_required)==F, "Yes", "No"),
 43 |                   n = 1:n(),
 44 |                   altrecord_id = case_when(n==1&variable_name!="record_id" ~ variable_name,
 45 |                                            TRUE ~ NA),
 46 |                   variable_name = case_when(n==1&variable_name!="record_id" ~ "record_id",
 47 |                                             TRUE ~ variable_name)) %>%
 48 |     dplyr::filter(! variable_type %in% var_descriptive)
 49 | 
 50 |   # add in checkbox variables
 51 |   if("checkbox" %in% df_meta$variable_type){
 52 |     df_meta_xbox <- df_meta %>%
 53 |       dplyr::filter(variable_type %in% "checkbox") %>%
 54 |       tidyr::separate_rows(select_choices_or_calculations, sep = "\\|", convert = FALSE) %>%
 55 |       dplyr::mutate(factor_n = trimws(stringr::str_split_fixed(select_choices_or_calculations, ", ", 2)[,1]),
 56 |                     select_choices_or_calculations = stringr::str_split_fixed(trimws(select_choices_or_calculations), ", ", 2)[,2]) %>%
 57 |       dplyr::mutate(variable_name_original = variable_name,
 58 |                     variable_xbox_original = paste0(variable_name, "(", factor_n, ")"),
 59 |                     variable_name = paste0(variable_name, "___", factor_n),
 60 |                     variable_label = paste0(variable_label, " {", select_choices_or_calculations, "}"))
 61 | 
 62 |     df_meta <- df_meta %>%
 63 |       dplyr::mutate(factor_n = NA,
 64 |                     variable_name_original = variable_name,
 65 |                     variable_xbox_original = NA) %>%
 66 |       dplyr::filter(! variable_type %in% "checkbox") %>%
 67 |       dplyr::bind_rows(df_meta_xbox) %>%
 68 |       dplyr::mutate(variable_name_original = factor(variable_name_original, levels = df_meta$variable_name)) %>%
 69 |       # dplyr::arrange(variable_name_original, factor_n) %>% ? Error: `...` is not empty.
 70 |       dplyr::select(-variable_name_original, -factor_n,-variable_xbox_original)
 71 | 
 72 |     for(i in c(1:nrow(df_meta_xbox))){
 73 |       df_meta <- df_meta %>%
 74 |         dplyr::mutate(branch_logic = iconv(tolower(as.character(branch_logic)), to ="ASCII//TRANSLIT"),
 75 |                       variable_name = iconv(tolower(as.character(variable_name)), to ="ASCII//TRANSLIT")) %>%
 76 |         dplyr::mutate(branch_logic = stringi::stri_replace_all_fixed(branch_logic,
 77 |                                                                      df_meta_xbox$variable_xbox_original[[i]],
 78 |                                                                      df_meta_xbox$variable_name[[i]]))}}
 79 | 
 80 | 
 81 | 
 82 |   # Factors
 83 |   factor_01 <- NULL
 84 |   factor_other <- NULL
 85 | 
 86 |   if("checkbox" %in% df_meta$variable_type| "yesno" %in% df_meta$variable_type){
 87 |     factor_01 <- df_meta %>%
 88 |       dplyr::filter(variable_type %in% c("checkbox", "yesno")) %>%
 89 |       dplyr::mutate(factor_level = rep(list(c(0, 1)), nrow(.)),
 90 |                     factor_label = rep(list(c("No", "Yes")),nrow(.))) %>%
 91 |       dplyr::select(variable_name, factor_level, factor_label)}
 92 | 
 93 |   if("radio" %in% df_meta$variable_type| "dropdown" %in% df_meta$variable_type){
 94 |     factor_other <- df_meta %>%
 95 |       dplyr::filter(variable_type %in% c("radio", "dropdown")) %>%
 96 |       tidyr::separate_rows(select_choices_or_calculations, sep = "\\|") %>%
 97 |       dplyr::mutate(select_choices_or_calculations = trimws(select_choices_or_calculations)) %>%
 98 |       dplyr::mutate(factor_level = stringr::str_split_fixed(select_choices_or_calculations, ", ", 2)[,1],
 99 |                     factor_label = stringr::str_split_fixed(select_choices_or_calculations, ", ", 2)[,2]) %>%
100 |       dplyr::group_by(variable_name, factor_label) %>%
101 |       dplyr::mutate(factor_dup =1:n()) %>%
102 |       dplyr::ungroup() %>%
103 |       dplyr::mutate(factor_label = ifelse(factor_dup>1, paste0(factor_label, "_", factor_dup), factor_label)) %>%
104 |       dplyr::group_by(variable_name) %>%
105 |       dplyr::summarise(factor_level = list(factor_level),
106 |                        factor_label = list(factor_label)) %>%
107 |       dplyr::ungroup()}
108 | 
109 |   factor_all <- dplyr::bind_rows(factor_01, factor_other)
110 | 
111 |   if(nrow(factor_all)>0){
112 |     df_meta <- df_meta %>%
113 |       dplyr::left_join(factor_all, by = "variable_name") %>%
114 |       dplyr::mutate(class = purrr::map(factor_level, function(x){ifelse(is.null(x)==T, NA, "factor")})) %>%
115 |       dplyr::mutate(class = as.character(class) %>% ifelse(.=="NA", NA, .)) %>%
116 |       dplyr::select(class, everything())}else{df_meta <- df_meta %>%
117 |         dplyr::mutate(class = NA, factor_level = NA, factor_label = NA) %>%
118 |         dplyr::select(class, everything())}
119 | 
120 |   # Other variable types
121 |   output <- df_meta %>%
122 |     dplyr::mutate(class = ifelse(variable_type %in% c("slider", "calc")|(variable_type=="text" & variable_validation %in% c("number", "integer")),
123 |                                  "numeric", class),
124 |                   class = ifelse(variable_type == "text" & grepl("date_", variable_validation), "date", class),
125 |                   class = ifelse(variable_type == "text" & grepl("datetime_", variable_validation), "datetime", class),
126 |                   class = ifelse(variable_type %in% "truefalse", "logical", class),
127 |                   class = ifelse(variable_type == "file", "file", class),
128 |                   class = ifelse(is.na(class), "character", class)) %>%
129 | 
130 |     # have sliders have a variable_validation_min and variable_validation_max
131 |     # (not directly exported - have to rely on labels from select_choices_or_calculations)
132 |     mutate(slidersplit =ifelse(variable_type=="slider", str_split(select_choices_or_calculations, " \\| "), NA),
133 |            variable_validation_min = ifelse(is.na(slidersplit)==F, map_chr(slidersplit, function(x){head(x, 1)}), variable_validation_min),
134 |            variable_validation_max = ifelse(is.na(slidersplit)==F, map_chr(slidersplit, function(x){tail(x, 1)}), variable_validation_max)) %>%
135 | 
136 |     mutate(across(variable_validation_min:variable_validation_max,
137 |                   function(x){case_when(class=="date"&x=="today" ~ as.character(Sys.Date()),
138 |                                         class=="datetime"&x=="today" ~ paste0(Sys.Date(), " 23:59:59"),
139 |                                         TRUE ~ x)}))
140 | 
141 |   # Get event / arm data
142 |   df_event <- tryCatch(httr::POST(url = redcap_project_uri,
143 |                                   body = list("token"=redcap_project_token,
144 |                                               content='formEventMapping',
145 |                                               action='export',
146 |                                               format='csv',
147 |                                               type='flat',
148 |                                               returnFormat='json'),
149 |                                   encode = "form") %>%
150 |                          httr::content(type = "text/csv",show_col_types = FALSE,
151 |                                        guess_max = 100000, encoding = "UTF-8"), error=function(e) NULL)
152 | 
153 | 
154 |   if(nrow(df_event)>0){
155 |     df_event <- df_event %>%
156 |       group_by(form) %>%
157 |       dplyr::summarise_all(function(x){unique(x) %>% list()}) %>%
158 |       dplyr::rename("arm" = arm_num, "redcap_event_name" = unique_event_name, "form_name" = form)
159 | 
160 |     output <- output %>%
161 |       dplyr::left_join(df_event,by = "form_name") %>%
162 |       dplyr::select(form_name, variable_name, matrix_name, class, everything())}else{output <- output %>% dplyr::mutate(arm = list(NA), redcap_event_name = list(NA))}
163 | 
164 |   return(output)}
165 | 


--------------------------------------------------------------------------------
/R/redcap_sum.R:
--------------------------------------------------------------------------------
  1 | # redcap_sum--------------------------------
  2 | # Documentation
  3 | #' Generate REDCap summary data.
  4 | #' @description Used to generate high quality summary data for REDCap projects at overall, and DAG-specific level.
  5 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
  6 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
  7 | #' @param centre_sum Logical value to determine whether data access group-level summaries will be produced (Default = TRUE).
  8 | #' @param top When centre_sum = TRUE, defines output of the number of centres with the most records uploaded (default is top 10).
  9 | #' @param var_include Vector of names of variables that are desired to be specifically used to assess data completness (alternate method from using "var_exclude").
 10 | #' @param var_exclude Vector of names of variables that are desired to be excluded from assessment of data completness (any NA value will be counted as incomplete).
 11 | #' @param user_include Vector of redcap usernames that are desired to be included in the user count (note all users not assigned to a DAG will automatically be excluded).
 12 | #' @param user_exclude Vector of redcap usernames that are desired to be excluded from the user count (note all users not assigned to a DAG will automatically be excluded).
 13 | #' @param record_include Vector of redcap record_id that are desired to be included in the record count.
 14 | #' @param record_exclude Vector of redcap record_id that are desired to be excluded from the record count.
 15 | #' @param dag_include Vector of redcap data access group names that are desired to be included in the record count.
 16 | #' @param dag_exclude Vector of redcap data access group names that are desired to be excluded from the record count.
 17 | #' @return Nested dataframes of (i) overall summary statistics for the project ("sum_overall") (ii). DAG-specific summary statistics for the project ("dag_all") (iii). DAGs with no data uploaded, but users assigned ("dag_nodata") (iv). DAGs with <100% completeness ("dag_incom") (v). The top n recruiting centres ("dag_top").
 18 | #' @import dplyr
 19 | #' @importFrom scales percent
 20 | #' @importFrom lubridate day month year origin
 21 | #' @importFrom httr POST content
 22 | #' @importFrom readr read_csv
 23 | #' @export
 24 | #'
 25 | 
 26 | 
 27 | redcap_sum <- function(redcap_project_uri = NULL, redcap_project_token = NULL,
 28 |                        centre_sum = TRUE, top = 10,
 29 |                        var_include = NULL, var_exclude = NULL,
 30 |                        user_include = NULL, user_exclude = NULL,
 31 |                        dag_exclude = NULL, dag_include = NULL,
 32 |                        record_include = NULL, record_exclude = NULL){
 33 | 
 34 |   # Load functions / packages
 35 |   require(dplyr);require(scales);require(lubridate);require(httr);require(readr)
 36 | 
 37 |   # Dataframe of current records----------------------------
 38 |   # Load data from REDCap
 39 |   df_record <- httr::POST(url = redcap_project_uri,
 40 |                           body = list("token"=redcap_project_token,
 41 |                                       content='record',
 42 |                                       action='export',
 43 |                                       format='csv',
 44 |                                       type='flat',
 45 |                                       csvDelimiter='',
 46 |                                       rawOrLabel='raw',
 47 |                                       rawOrLabelHeaders='raw',
 48 |                                       exportCheckboxLabel='false',
 49 |                                       exportSurveyFields='false',
 50 |                                       exportDataAccessGroups='true',
 51 |                                       returnFormat='json'),
 52 |                           encode = "form") %>%
 53 |     httr::content(type = "text/csv",show_col_types = FALSE,
 54 |                   guess_max = 100000, encoding = "UTF-8") %>%
 55 |     dplyr::select(-contains("_complete")) %>%
 56 |     dplyr::filter(is.na(redcap_data_access_group)==F) %>%
 57 |     dplyr::mutate(redcap_data_access_group = as.character(redcap_data_access_group))
 58 | 
 59 |   if(! "record_id" %in% names(df_record)){
 60 |     if(record_id == "record_id"){stop("record_id column not present in the dataframe, please specify the name of the record_id variable")}
 61 | 
 62 |     df_record <- df_record %>% dplyr::mutate(record_id = pull(., record_id))}
 63 | 
 64 |   # Clean dataset
 65 |   if(is.null(var_exclude)==F){df_record <- df_record %>% dplyr::select(-one_of(var_exclude))}
 66 |   if(is.null(var_include)==F){df_record <- df_record %>% dplyr::select(redcap_data_access_group, all_of(var_include))}
 67 | 
 68 |   if(is.null(dag_exclude)==F){df_record <- df_record %>% dplyr::filter(! redcap_data_access_group %in% dag_exclude)}
 69 |   if(is.null(dag_include)==F){df_record <- df_record %>% dplyr::filter(redcap_data_access_group %in% dag_include)}
 70 | 
 71 |   if(is.null(record_exclude)==F){df_record <- df_record %>% dplyr::filter(! record_id %in% record_exclude)}
 72 |   if(is.null(record_include)==F){df_record <- df_record %>% dplyr::filter(record_id %in% record_include)}
 73 | 
 74 |   # Summarise record by DAG
 75 |   df_record_sum_dag <- df_record %>%
 76 |     dplyr::distinct() %>%
 77 |     # count the number of NA by row (1 = complete record)
 78 |     dplyr::mutate(com = ifelse(rowSums(is.na(.)==T)>0, 0, 1)) %>%
 79 |     dplyr::group_by(redcap_data_access_group) %>%
 80 | 
 81 |     # count number of records / number of complete records by centre
 82 |     dplyr::summarise(record_all = n(),
 83 |                      record_com = sum(com)) %>%
 84 |     dplyr::mutate(prop_com = record_com/record_all,
 85 |                   pct_com = scales::percent(prop_com))
 86 | 
 87 |   # Summarise all records
 88 |   df_record_sum_all <- df_record_sum_dag %>%
 89 |     dplyr::select(redcap_data_access_group, record_all, record_com) %>%
 90 | 
 91 |     # count number of records / number of complete records overall
 92 |     dplyr::summarise(record_dag = nrow(.),
 93 |                      record_all = sum(record_all),
 94 |                      record_com = sum(record_com)) %>%
 95 |     dplyr::mutate(prop_com = record_com/record_all,
 96 |                   pct_com = scales::percent(prop_com))
 97 | 
 98 |   # Dataframe of current users----------------------------
 99 |   # Load data from REDCap
100 | 
101 |   df_user <- httr::POST(url = redcap_project_uri,
102 |                         body = list("token"=redcap_project_token,
103 |                                     content='user',
104 |                                     action='export',
105 |                                     format='csv',
106 |                                     type='flat',
107 |                                     csvDelimiter='',
108 |                                     rawOrLabel='raw',
109 |                                     rawOrLabelHeaders='raw',
110 |                                     returnFormat='json'),
111 |                         encode = "form") %>%
112 |     httr::content(type = "text/csv",show_col_types = FALSE,
113 |                   guess_max = 100000, encoding = "UTF-8") %>%
114 |     dplyr::select("redcap_data_access_group" = data_access_group, username) %>%
115 |     dplyr::filter(is.na(redcap_data_access_group)==F) %>%
116 |     dplyr::mutate(redcap_data_access_group = as.character(redcap_data_access_group))
117 | 
118 |   # Clean dataset
119 |   if(is.null(dag_exclude)==F){df_user <- df_user %>% dplyr::filter(! redcap_data_access_group %in% dag_exclude)}
120 |   if(is.null(dag_include)==F){df_user <- df_user %>% dplyr::filter(redcap_data_access_group %in% dag_include)}
121 | 
122 |   if(is.null(user_exclude)==F){df_user <- df_user %>% dplyr::filter(! username %in% user_exclude)}
123 |   if(is.null(user_include)==F){df_user <- df_user %>% dplyr::filter(username %in% user_include)}
124 | 
125 | 
126 |   # Summarise user by DAG
127 |   df_user_sum_dag <- df_user %>%
128 |     dplyr::group_by(redcap_data_access_group) %>%
129 |     dplyr::summarise(user_all = n())
130 | 
131 |   # Summarise all records
132 |   df_user_sum_all <- df_user %>%
133 |     dplyr::summarise(user_all = n())
134 | 
135 |   # Overall summary output ---------------------------
136 |   sum_overall <- dplyr::bind_cols(df_record_sum_all,
137 |                                   df_user_sum_all,
138 |                                   last_update = paste(lubridate::day(Sys.Date()),
139 |                                                       lubridate::month(Sys.Date(), label=TRUE),
140 |                                                       lubridate::year(Sys.Date()), sep="-")) %>%
141 |     dplyr::select("n_record_all" = record_all, "n_record_com" = record_com,
142 |                   prop_com, pct_com, "n_dag" = record_dag, n_users = "user_all",
143 |                   last_update)
144 | 
145 |   if(centre_sum==F){report_summary <- sum_overall}
146 | 
147 |   # DAG summary output ---------------------------
148 |   if(centre_sum==T){
149 | 
150 |     sum_dag_all <- dplyr::full_join(df_record_sum_dag,
151 |                                     df_user_sum_dag,
152 |                                     by="redcap_data_access_group") %>%
153 |       dplyr::mutate(record_all = ifelse(is.na(record_all)==T, 0,record_all),
154 |                     last_update = paste(lubridate::day(Sys.Date()),
155 |                                         lubridate::month(Sys.Date(), label=TRUE),
156 |                                         lubridate::year(Sys.Date()),sep="-")) %>%
157 |       dplyr::arrange(-record_all)
158 | 
159 |     # combine output
160 |     report_summary <- list("sum_overall" = sum_overall,
161 |                            "dag_all" = sum_dag_all,
162 |                            "dag_nodata" = dplyr::filter(sum_dag_all, record_all==0),
163 |                            "dag_incom" = dplyr::filter(sum_dag_all, prop_com<1),
164 |                            "dag_top" = dplyr::select(sum_dag_all, redcap_data_access_group, record_all) %>% head(top))}
165 | 
166 |   return(report_summary)}
167 | 


--------------------------------------------------------------------------------
/R/report_auth.R:
--------------------------------------------------------------------------------
 1 | # report_auth-----------------------------------------
 2 | # Documentation
 3 | #' Generate a formatted authorship list.
 4 | #' @description Used to generate a formatted authorship list for all users by group (the group the user belongs to e.g. the centre which they participated at). Optional subdivisions can be created to stratify users and groups. This could be a role (e.g. collaborator and validator) or region/country.
 5 | #' @param df Dataframe with authors in rows.
 6 | #' @param group Column name of a variable in the dataframe by which to group authorship (the default is NULL).
 7 | #' @param subdivision Column name of an additional variable in the dataframe by which to subdivide authorship (the default is NULL).
 8 | #' @param name_sep Character(s) which will separate names within the group (the default is ", ").
 9 | #' @param group_brachet Character(s) bracheting the group (the default is "()").
10 | #' @param group_sep Character(s) which will separate the groups (the default is "; ").
11 | #' @param path Path or connection to write to as .txt file.
12 | #' @import dplyr
13 | #' @importFrom stringr str_sub
14 | #' @importFrom readr write_file read_file
15 | #' @return Returns a formated string (and an optional .txt file specified using path)
16 | #' @export
17 | 
18 | report_auth <- function(df, name, group = NULL, subdivision = NULL, path = NULL,
19 |                         name_sep = ", ", group_brachet = "()",group_sep = "; "){
20 | 
21 |   require(stringr);require(readr);require(dplyr)
22 | 
23 | 
24 |   if(is.null(group)==FALSE&length(group)>1){group <- head(group,1)
25 |   print("More than 1 group supplied - only first value used as group")}
26 | 
27 |   if(is.null(subdivision)==FALSE&length(subdivision)>1){subdivision <- head(subdivision,1)
28 |   print("More than 1 subdivision supplied - only first value used as subdivision")}
29 | 
30 |   group_brachet_L = stringr::str_sub(group_brachet, 1, 1)
31 |   group_brachet_R = stringr::str_sub(group_brachet, 2, 2)
32 | 
33 |   df <- df %>% dplyr::mutate(name = dplyr::pull(., name))
34 | 
35 | 
36 |   # No groups / subdivisions
37 |   if(is.null(group)==TRUE&is.null(subdivision)==TRUE){
38 |     output <- df %>%
39 |       dplyr::summarise(auth_out = paste(name, collapse=name_sep) %>% paste0("."))
40 | 
41 |     if(is.null(path)==F){readr::write_file(output$auth_out, path=path)}}
42 | 
43 |   # Just groups
44 |   if(is.null(group)==FALSE&is.null(subdivision)==TRUE){
45 |     output <- df %>%
46 |       dplyr::mutate(group = dplyr::pull(., group)) %>%
47 | 
48 |       dplyr::group_by(group) %>%
49 | 
50 |       dplyr::summarise(name_list = paste(name, collapse=name_sep)) %>%
51 | 
52 |       dplyr::mutate(name_group = paste0(name_list, " ",group_brachet_L, group, group_brachet_R)) %>%
53 | 
54 |       dplyr::summarise(auth_out = paste(name_group, collapse = group_sep) %>% paste0("."))
55 | 
56 |     if(is.null(path)==F){readr::write_file(output$auth_out, path=path)}}
57 | 
58 |   # Just subdivisions
59 |   if(is.null(group)==TRUE&is.null(subdivision)==FALSE){
60 |     output <- df %>%
61 |       dplyr::mutate(subdivision = dplyr::pull(., subdivision)) %>%
62 | 
63 |       dplyr::group_by(subdivision) %>%
64 | 
65 |       dplyr::summarise(name_list = paste(name, collapse=name_sep) %>% paste0(".")) %>%
66 | 
67 |       dplyr::mutate(auth_out = paste0(subdivision, ": ", name_list)) %>%
68 | 
69 |       dplyr::select(auth_out)
70 | 
71 |     if(is.null(path)==F){readr::write_file(output$auth_out, path=path)}}
72 | 
73 | 
74 |   # Groups and subdivisions
75 |   if(is.null(group)==FALSE&is.null(subdivision)==FALSE){
76 |     output <- df %>%
77 |       dplyr::mutate(group = dplyr::pull(., group),
78 |                     subdivision = dplyr::pull(., subdivision)) %>%
79 | 
80 |       dplyr::select(subdivision, group, name) %>%
81 | 
82 |       dplyr::group_by(subdivision, group) %>%
83 |       dplyr::summarise(name_list = paste(name, collapse=name_sep)) %>%
84 | 
85 |       # add group characteristics
86 |       dplyr::mutate(name_group = paste0(name_list, " ",group_brachet_L, group, group_brachet_R)) %>%
87 | 
88 |       # combine groups (by subdivision)
89 |       dplyr::summarise(auth_out = paste(name_group, collapse = group_sep) %>% paste0(".")) %>%
90 | 
91 |       dplyr::mutate(auth_out = paste0(subdivision, ": ", auth_out)) %>%
92 | 
93 |       dplyr::select(auth_out)
94 | 
95 |     if(is.null(path)==F){readr::write_file(output$auth_out, path=path)}}
96 | 
97 | 
98 |   return(gsub("\n\n", " ", output$auth_out))}
99 | 


--------------------------------------------------------------------------------
/R/report_miss.R:
--------------------------------------------------------------------------------
  1 | # report_miss---------------------------------------
  2 | # Documentation
  3 | #' Generate a missing data report for a REDCap project.
  4 | #' @description Used to generate a report of record-level + redcap_data_access_group-level missing data within a REDCap project (which accounts for branching logic in the dataframe).
  5 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
  6 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
  7 | #' @param missing_threshold The overall proportion of missing data that is acceptable (default = 0.05).
  8 | #' @param var_include Vector of names of variables that are desired to be specifically used to assess data completness (alternate method from using "var_exclude").
  9 | #' @param var_exclude Vector of names of variables that are desired to be excluded from assessment of data completness (any NA value will be counted as incomplete).
 10 | #' @param record_include Vector of redcap record_id that are desired to be included in the record count.
 11 | #' @param record_exclude Vector of redcap record_id that are desired to be excluded from the record count.
 12 | #' @param dag_include Vector of redcap data access group names that are desired to be included in the record count.
 13 | #' @param dag_exclude Vector of redcap data access group names that are desired to be excluded from the record count.
 14 | #' @param record_id String of variable name which fufills the record_id role (default = "record_id")
 15 | #' @import dplyr
 16 | #' @import tibble
 17 | #' @importFrom stringr str_split_fixed
 18 | #' @importFrom stringi stri_replace_all_fixed
 19 | #' @importFrom scales percent
 20 | #' @importFrom httr POST content
 21 | #' @importFrom readr read_csv
 22 | #' @importFrom tidyr separate_rows
 23 | #' @importFrom tidyselect all_of
 24 | #' @return Nested dataframe with a summary of missing data at the redcap_data_access_group level and the record level.
 25 | #' @export
 26 | 
 27 | # Function:
 28 | report_miss <- function(redcap_project_uri, redcap_project_token, missing_threshold = 0.05,
 29 |                         var_include = NULL, var_exclude = NULL, record_exclude = NULL, record_include = NULL,
 30 |                         dag_include = NULL, dag_exclude = NULL, record_id = "record_id"){
 31 |   # Prepare dataset----------------
 32 |   # Load functions / packages
 33 | 
 34 |   df_record <-  httr::POST(url = redcap_project_uri,
 35 |                            body = list("token"=redcap_project_token,
 36 |                                        content='record',
 37 |                                        action='export',
 38 |                                        format='csv',
 39 |                                        type='flat',
 40 |                                        csvDelimiter='',
 41 |                                        rawOrLabel='raw',
 42 |                                        rawOrLabelHeaders='raw',
 43 |                                        exportCheckboxLabel='false',
 44 |                                        exportSurveyFields='false',
 45 |                                        exportDataAccessGroups='true',
 46 |                                        returnFormat='json'),
 47 |                            encode = "form") %>%
 48 |     httr::content(type = "text/csv",show_col_types = FALSE,
 49 |                   guess_max = 100000, encoding = "UTF-8") %>%
 50 |     dplyr::select(-contains("_complete"))
 51 | 
 52 |   # Data dictionary set-up---------------------
 53 |   # Convert data dictionary branching to R format
 54 | 
 55 |   df_metadata <- collaborator::redcap_metadata(redcap_project_uri = redcap_project_uri,
 56 |                                                redcap_project_token = redcap_project_token)
 57 | 
 58 |   # Format branching logic
 59 |   df_meta <- df_metadata %>%
 60 |     dplyr::select(variable_name, variable_label, variable_type, branch_logic) %>%
 61 |     dplyr::mutate(branch_logic = iconv(tolower(as.character(branch_logic)), to ="ASCII//TRANSLIT")) %>%
 62 | 
 63 |     # clean branching logic
 64 |     dplyr::mutate(branch_logic = gsub("\n", " ", branch_logic)) %>%
 65 |     dplyr::mutate(branch_logic = gsub("   ", " ", branch_logic)) %>%
 66 |     dplyr::mutate(branch_logic = gsub("  ", " ", branch_logic)) %>%
 67 |     dplyr::mutate(branch_logic = gsub("\\[|\\]", "", branch_logic)) %>%
 68 |     dplyr::mutate(branch_logic = gsub("=", "==", branch_logic)) %>%
 69 |     dplyr::mutate(branch_logic = gsub("<>", "!=", branch_logic)) %>%
 70 |     dplyr::mutate(branch_logic = gsub("!==", "!=", branch_logic)) %>%
 71 |     dplyr::mutate(branch_logic = gsub(">==", ">=", branch_logic)) %>%
 72 |     dplyr::mutate(branch_logic = gsub("> ==", ">=", branch_logic)) %>%
 73 |     dplyr::mutate(branch_logic = gsub("<==", "<=", branch_logic)) %>%
 74 |     dplyr::mutate(branch_logic = gsub(" or ", "| df_record$", branch_logic)) %>%
 75 |     dplyr::mutate(branch_logic = gsub(" and ", " & df_record$", branch_logic)) %>%
 76 |     dplyr::mutate(branch_logic = gsub("$ ", "$", branch_logic)) %>%
 77 |     dplyr::mutate(branch_logic = ifelse(is.na(branch_logic)==F, paste0("df_record$", branch_logic), NA)) %>%
 78 | 
 79 |     dplyr::mutate(branch_logic = stringi::stri_replace_all_fixed(branch_logic, "df_record$(((", "(((df_record$")) %>%
 80 |     dplyr::mutate(branch_logic = stringi::stri_replace_all_fixed(branch_logic, "df_record$((", "((df_record$")) %>%
 81 |     dplyr::mutate(branch_logic = stringi::stri_replace_all_fixed(branch_logic, "df_record$(", "(df_record$"))
 82 | 
 83 |   # Clean final data dictionary
 84 |   df_meta <- df_meta %>%
 85 |     dplyr::bind_rows(dplyr::bind_cols("variable_name" = "redcap_data_access_group",
 86 |                                       "variable_label" = "REDCap Data Access Group",
 87 |                                       "variable_type" = NA,
 88 |                                       "branch_logic" = NA)) %>%
 89 |     dplyr::mutate(variable_name = factor(variable_name, levels = colnames(df_record))) %>% # only variables in the dataset
 90 |     dplyr::filter(is.na(variable_name)==F) %>%
 91 |     dplyr::arrange(variable_name) %>% dplyr::mutate(variable_name = as.character(variable_name)) %>%
 92 |     dplyr::select(variable_name, branch_logic)
 93 | 
 94 | 
 95 |   # Identify if repeating forms present / which variables
 96 |   if(("redcap_repeat_instrument" %in% names(df_record))==F){
 97 |     df_metadata <- df_metadata %>% dplyr::mutate(form_repeat = "No")}
 98 | 
 99 |   if(("redcap_repeat_instrument" %in% names(df_record))==T){
100 | 
101 |     form_repeat <- df_record %>%
102 |       filter(is.na(redcap_repeat_instrument)==F) %>%
103 |       pull(redcap_repeat_instrument) %>% unique()
104 | 
105 |     df_metadata <- df_metadata %>%
106 |       dplyr::mutate(form_repeat = ifelse(form_name %in% form_repeat, "Yes", "No"))
107 | 
108 |     record_repeat <- df_record %>%
109 |       filter(is.na(redcap_repeat_instrument)==F) %>%
110 |       pull(record_id) %>% unique()
111 | 
112 |     var_repeat <- df_metadata %>%
113 |       filter(form_repeat=="Yes") %>%
114 |       dplyr::pull(variable_name)
115 | 
116 |     var_norepeat <- df_metadata %>%
117 |       filter(form_repeat=="No") %>%
118 |       dplyr::pull(variable_name)
119 | 
120 |     df_record <- df_record %>%
121 |       dplyr::mutate(redcap_repeat_instrument= factor(redcap_repeat_instrument, levels=sort(unique(redcap_repeat_instrument))),
122 |                     redcap_repeat_instance = as.numeric(redcap_repeat_instance),
123 |                     redcap_repeat_instance = ifelse(is.na(redcap_repeat_instance)==T, 0, redcap_repeat_instance)) %>%
124 |       group_by(record_id, redcap_data_access_group,redcap_repeat_instance) %>%
125 |       tidyr::fill(all_of(var_repeat), .direction = "updown") %>%
126 |       group_by(record_id, redcap_data_access_group) %>%
127 |       tidyr::fill(all_of(var_norepeat), .direction = "down") %>%
128 |       dplyr::ungroup() %>%
129 |       dplyr::select(-redcap_repeat_instrument) %>%
130 |       dplyr::mutate(redcap_repeat_instance = ifelse(record_id %in% record_repeat, redcap_repeat_instance, 1)) %>%
131 |       filter(redcap_repeat_instance!=0) %>%
132 |       dplyr::distinct() %>%
133 |       dplyr::arrange(record_id, redcap_data_access_group,redcap_repeat_instance)}
134 | 
135 | 
136 |   # Determine missing data-------------------------------
137 |   # 1. Determine branching variables
138 |   redcap_dd_branch <- df_meta %>% dplyr::filter(is.na(branch_logic)==F)
139 |   redcap_dd_nobranch <- df_meta %>%
140 |     dplyr::filter(is.na(branch_logic)==T) %>%
141 |     dplyr::filter(! variable_name %in% c("record_id", "redcap_data_access_group")) %>%
142 |     dplyr::pull(variable_name)
143 | 
144 |   # 2. Convert branching to present ("."), missing ("M") or appropriately missing ("NA") based on branching logic
145 |   if(nrow(redcap_dd_branch)>0){
146 | 
147 |     df_record_clean <- df_record %>%
148 |       dplyr::mutate_all(function(x){as.character(x)})
149 | 
150 |     for(i in 1:nrow(redcap_dd_branch)) {
151 | 
152 |       df_record_clean <- df_record_clean %>%
153 |         # evaluate branching logic within dataset
154 |         dplyr::mutate(logic_fufilled = parse(text=eval(redcap_dd_branch$branch_logic[[i]])) %>% eval(),
155 | 
156 |                       # add in original data
157 |                       variable_data = dplyr::pull(., redcap_dd_branch$variable_name[[i]])) %>%
158 | 
159 |         # if the branching logic has not been fufilled (value==F or NA) then NA
160 |         dplyr::mutate(variable_out = ifelse(is.na(logic_fufilled)==T|logic_fufilled==F, NA,
161 | 
162 |                                             # if logic_fufilled == T, and the data is NA then "Missing"
163 |                                             ifelse(is.na(variable_data), "M", "."))) %>%
164 |         dplyr::select(-all_of(redcap_dd_branch$variable_name[[i]])) %>%
165 |         dplyr::rename_at(vars(matches("variable_out")), function(x){redcap_dd_branch$variable_name[[i]]})}}
166 | 
167 | 
168 |   # 3. Convert non-branching to present (".") or missing ("M") based on NA status
169 |   df_record_clean <- df_record_clean %>%
170 |     dplyr::mutate(across(everything(), as.character)) %>%
171 |     dplyr::mutate(across(tidyselect::all_of(redcap_dd_nobranch), function(x){ifelse(is.na(x)==T, "M", ".")})) %>%
172 |     dplyr::select(-logic_fufilled, -variable_data)
173 | 
174 |   # 4. Re-combine checkbox variables
175 |   xbox_names <- df_metadata %>%
176 |     dplyr::filter(variable_type=="checkbox") %>%
177 |     dplyr::mutate(variable_name_original = stringr::str_split_fixed(variable_name, "___", 2)[,1]) %>%
178 |     dplyr::select(variable_name_original, variable_name) %>%
179 |     dplyr::group_by(variable_name_original) %>%
180 |     dplyr::summarise(variable_name = list(variable_name))
181 | 
182 |   for(i in 1:nrow(xbox_names)){
183 |     df_record_clean <- df_record_clean %>%
184 |       tidyr::unite(col = !!eval(unique(xbox_names$variable_name_original[i])),
185 |                    tidyselect::all_of(c(xbox_names$variable_name[[i]])), na.rm = T, remove = T)}
186 | 
187 |   df_record_clean <- df_record_clean %>%
188 |     dplyr::mutate_at(vars(all_of(xbox_names$variable_name_original)), function(x){ifelse(x=="", NA, ".")})
189 | 
190 |   # Clean dataset
191 |   if(is.null(var_exclude)==F){df_record_clean <- df_record_clean %>% dplyr::select(-tidyselect::any_of(var_exclude))}
192 |   if(is.null(var_include)==F){df_record_clean <- df_record_clean %>% dplyr::select(record_id, redcap_data_access_group, tidyselect::any_of(var_include))}
193 | 
194 |   if(is.null(dag_exclude)==F){df_record_clean <- df_record_clean %>% dplyr::filter(! redcap_data_access_group %in% dag_exclude)}
195 |   if(is.null(dag_include)==F){df_record_clean <- df_record_clean %>% dplyr::filter(redcap_data_access_group %in% dag_include)}
196 | 
197 |   if(is.null(record_exclude)==F){df_record_clean <- df_record_clean %>% dplyr::filter(! record_id %in% record_exclude)}
198 |   if(is.null(record_include)==F){df_record_clean <- df_record_clean %>% dplyr::filter(record_id %in% record_include)}
199 | 
200 |   # Create missing data reports---------------------------
201 |   xbox_remove <- df_metadata %>%
202 |     dplyr::filter(variable_type == "checkbox") %>%
203 |     dplyr::filter(stringr::str_split_fixed(variable_name, "___", 2)[,2]!="1") %>%
204 |     dplyr::pull(variable_name)
205 | 
206 |   # Patient-level
207 |   if(("redcap_repeat_instance" %in% names(df_record_clean))==F){
208 |   data_missing_pt <- df_record_clean %>%
209 |     dplyr::rename("record_id" = eval(record_id)) %>%
210 |     # remove checkbox variables (will always be the same value) except the first and rename
211 |     dplyr::select(-any_of(xbox_remove)) %>%
212 |     dplyr::rename_all(function(x){stringr::str_remove(x, "___1")}) %>%
213 |     dplyr::mutate(miss_n = rowSums(dplyr::select(., -dplyr::one_of("record_id", "redcap_data_access_group"))=="M", na.rm=T),
214 |                   fields_n = rowSums(is.na(dplyr::select(., -dplyr::one_of("record_id", "redcap_data_access_group")))==F)) %>%
215 |     dplyr::mutate(miss_prop = miss_n/fields_n) %>%
216 |     dplyr::mutate(miss_pct = scales::percent(miss_prop),
217 |                   miss_threshold = factor(ifelse(miss_prop>missing_threshold, "Yes", "No"))) %>%
218 |     dplyr::select(record_id, redcap_data_access_group, miss_n:miss_threshold, everything())}
219 | 
220 | 
221 |   if(("redcap_repeat_instance" %in% names(df_record_clean))==T){
222 |     data_missing_pt <- df_record_clean %>%
223 |       dplyr::rename("record_id" = eval(record_id)) %>%
224 |       # remove checkbox variables (will always be the same value) except the first and rename
225 |       dplyr::select(-any_of(xbox_remove)) %>%
226 |       dplyr::rename_all(function(x){stringr::str_remove(x, "___1")}) %>%
227 |       dplyr::relocate(redcap_repeat_instance, var_repeat, .after = last_col()) %>%
228 |       redcap_format_repeat(format = "wide") %>%
229 |       dplyr::mutate(miss_n = rowSums(dplyr::select(., -dplyr::one_of("record_id", "redcap_data_access_group"))=="M", na.rm=T),
230 |                     fields_n = rowSums(is.na(dplyr::select(., -dplyr::one_of("record_id", "redcap_data_access_group")))==F)) %>%
231 |       dplyr::mutate(miss_prop = miss_n/fields_n) %>%
232 |       dplyr::mutate(miss_pct = scales::percent(miss_prop),
233 |                     miss_threshold = factor(ifelse(miss_prop>missing_threshold, "Yes", "No"))) %>%
234 |       dplyr::select(record_id, redcap_data_access_group, miss_n:miss_threshold, everything())}
235 | 
236 |   # Centre-level
237 |   data_missing_cen <- data_missing_pt %>%
238 |     dplyr::select(redcap_data_access_group, miss_n, fields_n, miss_threshold) %>%
239 | 
240 |     dplyr::group_by(redcap_data_access_group) %>%
241 | 
242 |     dplyr::summarise(n_pt = n(),
243 |                      n_threshold = sum(miss_threshold=="Yes"),
244 |                      cen_miss_n = sum(miss_n),
245 |                      cen_field_n = sum(fields_n)) %>%
246 | 
247 |     dplyr::mutate(cen_miss_prop = cen_miss_n/cen_field_n) %>%
248 | 
249 |     dplyr::mutate(cen_miss_pct = scales::percent(cen_miss_prop))
250 | 
251 |   # Create output
252 |   data_missing <- list("group" = data_missing_cen,"record" = data_missing_pt)
253 | 
254 |   return(data_missing)}
255 | 


--------------------------------------------------------------------------------
/R/retired/user_roles.R:
--------------------------------------------------------------------------------
 1 | # user_roles-----------------------------
 2 | # Documentation
 3 | #' Assign named roles to REDCap users
 4 | #' @description Used to assign a role name to current redcap users based on the example users given in role_users_example. Note: The number of roles should match the output from user_roles_n.
 5 | #' @param data
 6 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
 7 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
 8 | #' @param role_users_example Dataframe with 2 columns: role (specifiying the name of the role), and username (the username with the desired rights to be associated with the role).
 9 | #' @param use_ssl Logical value whether verify the peer's SSL certificate should be evaluated (default=TRUE)
10 | #' @import dplyr
11 | #' @importFrom tidyr unite
12 | #' @importFrom RCurl postForm curlOptions
13 | #' @importFrom readr read_csv
14 | #' @importFrom zoo na.locf
15 | #' @return Dataframe of REDCap project users with an additional "role" column.
16 | #' @export
17 | 
18 | # Function:
19 | user_roles <- function(data = NULL, redcap_project_uri = NULL, redcap_project_token = NULL, role_users_example, use_ssl = TRUE){
20 |   require(dplyr);require(readr);require(tidyr); require(zoo); require(RCurl)
21 | 
22 |   if(is.null(data)==F&(is.null(redcap_project_uri)==T|is.null(redcap_project_token)==T)){
23 |     user_current <- data %>%
24 |       dplyr::select(username, email, firstname, lastname, expiration,
25 |                     data_access_group, data_access_group_id, design, user_rights,
26 |                     data_access_groups, data_export, reports, stats_and_charts,
27 |                     manage_survey_participants, calendar, data_import_tool, data_comparison_tool,
28 |                     logging, file_repository, data_quality_create, data_quality_execute, api_export,
29 |                     api_import, mobile_app, mobile_app_download_data,
30 |                     record_create, record_rename, record_delete, lock_records_all_forms,
31 |                     lock_records, lock_records_customization, forms) %>%
32 |       tidyr::unite(col = "role_rights", design:forms, sep = "; ", remove = F) %>%
33 |       dplyr::left_join(role_users_example, by = c("username")) %>%
34 |       dplyr::select(role, role_rights, everything()) %>%
35 |       dplyr::arrange(role_rights, role) %>%
36 |       dplyr::mutate(role = zoo::na.locf(role)) %>%
37 |       dplyr::select(-role_rights)}
38 | 
39 | 
40 |   if(is.null(data)==T&(is.null(redcap_project_uri)==F&is.null(redcap_project_token)==F)){
41 |   user_current <- RCurl::postForm(uri=redcap_project_uri,
42 |                                   token= redcap_project_token,
43 |                                   content='user',
44 |                                   .opts = RCurl::curlOptions(ssl.verifypeer = if(use_ssl==F){FALSE}else{TRUE}),
45 |                                   format='csv') %>%
46 |     readr::read_csv() %>%
47 |     dplyr::left_join(role_users_example, by="username") %>%
48 |     tidyr::unite(col = "role_rights", design:forms, sep = "; ", remove = F) %>%
49 |     dplyr::mutate(role_rights = as.numeric(factor(role_rights))) %>%
50 |     dplyr::select(role, role_rights, everything()) %>%
51 |     dplyr::group_by(role_rights) %>%
52 |     dplyr::arrange(role_rights, role) %>%
53 |     dplyr::mutate(role = zoo::na.locf(role)) %>%
54 |     dplyr::ungroup() %>%
55 |     dplyr::select(-role_rights)}
56 | 
57 |   return(user_current)}
58 | 


--------------------------------------------------------------------------------
/R/user_import.R:
--------------------------------------------------------------------------------
 1 | # user_import-----------------------------------------
 2 | 
 3 | # Documentation
 4 | #' Generate a csv file to upload new user accounts to REDCap
 5 | #' @description Used to generate a csv file that can be used to upload new user accounts to REDCap directly (via control centre). This requires a dataframe of at least 4 mandatory columns (corresponding to: username, first name, last name, and email address) and 4 optional columns (corresponding to: institution, sponser, expiration, comments). All optional columns will be blank unless otherwise specified.
 6 | #' @param df Dataframe of at least 4 mandatory columns (corresponding to: username, first name, last name, and email address) and 4 optional columns (corresponding to: institution, sponser, expiration, comments).
 7 | #' @param username Column name (Mandatory) which corresponds to "Username".
 8 | #' @param first_name Column name (Mandatory) which corresponds to "First name".
 9 | #' @param last_name Column name (Mandatory) which corresponds to "Last name".
10 | #' @param email Column name (Mandatory) which corresponds to "Email address".
11 | #' @param institution Column name (Optional/Recommended) which corresponds to "Institution ID". Can be used to record the data_access_group / centre of the user.
12 | #' @param sponser Column name (Optional) which corresponds to "Sponsor username".
13 | #' @param expiration Column name (Optional) which corresponds to "Expiration". Must be in YYYY-MM-DD HH:MM or MM/DD/YYYY HH:MM format.
14 | #' @param comments Column name (Optional) which corresponds to "Comments".
15 | #' @param path Path or connection to write to as .csv file.
16 | #' @import dplyr
17 | #' @importFrom readr write_csv
18 | #' @return Returns a dataframe formated for REDCap user import (and an optional CSV file specified using path)
19 | #' @export
20 | 
21 | # Function
22 | user_import <- function(df, username, first_name, last_name, email,
23 |                         institution = NULL, sponser = NULL, expiration = NULL, comments = NULL,
24 |                         path = NULL){
25 |   require(dplyr);require(readr)
26 |   user_import_df <- df %>%
27 |     dplyr::mutate("Username" = dplyr::pull(., username),
28 |                   "First name" = dplyr::pull(., first_name),
29 |                   "Last name" = dplyr::pull(., last_name),
30 |                   "Email address" = pull(., email)) %>%
31 |     dplyr::mutate("Institution ID" = if(is.null(institution)==T){""}else{dplyr::pull(., institution)},
32 |                   "Sponsor username" = if(is.null(sponser)==T){""}else{dplyr::pull(., sponser)},
33 |                   "Expiration" = if(is.null(expiration)==T){""}else{dplyr::pull(., expiration)},
34 |                   "Comments" = if(is.null(comments)==T){""}else{dplyr::pull(., comments)}) %>%
35 |     dplyr::select(Username:Comments) %>%
36 |     dplyr::mutate_at(vars(`Institution ID`:Comments), function(x){ifelse(is.na(x)==T, "", x)})
37 | 
38 |   if(is.null(path)==F){user_import_df %>% readr::write_csv(path=path)}
39 | 
40 |   return(user_import_df)}
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/R/user_manage.R:
--------------------------------------------------------------------------------
  1 | # user_manage-------------------------
  2 | # Documentation
  3 | #' Used to manage REDCap project users
  4 | #' @description Used to manage users - whether to change users present, or their specific roles and data access groups on a redcap project.
  5 | 
  6 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
  7 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
  8 | #' @param users Vector of usernames or a dataframe containing at least 1 column ("username"). Further columns specifying individual patients to be removed ("remove") or assigned to roles ("role") or data access groups ("dag") can be added.
  9 | #' @param role String of a single role ID, role name or username of user with the desired user rights to be applied to ALL users specified in "users"  (use a column in users if wanting to be different for each user). Must specify "none" if no assignment of role.
 10 | #' @param dag String of a unique DAG to ALL users specified in "users" will be assigned to (use a column in users if wanting to be different for each user). Must specify "none" if no assignment of DAG.
 11 | #' @param remove Logical value indicating if ALL users specified in "users" are to be removed  (use a column in users if wanting to be different for each user). Default is FALSE (no users to be removed).
 12 | #' @import dplyr
 13 | #' @importFrom httr POST content
 14 | #' @importFrom readr read_csv
 15 | #' @importFrom tibble enframe
 16 | #' @importFrom tidyr pivot_wider
 17 | #' @importFrom stringr str_detect
 18 | #' @return Nested tibbles of the outcome (1) "correct" users with the correct allocation specified (2) "error" users with an allocation unable to be completed. User acccounts are still required to be entered manually.
 19 | #' @export
 20 | 
 21 | # Function:
 22 | user_manage <- function(redcap_project_uri, redcap_project_token, users = NULL,
 23 |                         role = NULL, dag = NULL, remove = FALSE){
 24 | 
 25 |   # Load required functions
 26 | 
 27 |   user_current <- collaborator::user_role(redcap_project_uri=redcap_project_uri,
 28 |                                           redcap_project_token = redcap_project_token)
 29 | 
 30 |   role_current <- c(levels(user_current$sum$role_name), user_current$sum$role_id, unlist(user_current$sum$username))
 31 |   role_current <- c(na.omit(role_current), "none")
 32 | 
 33 |   dag_current <- c(collaborator::dag_manage(redcap_project_uri=redcap_project_uri,
 34 |                                             redcap_project_token = redcap_project_token)$unique_group_name, "none")
 35 | 
 36 | 
 37 |   if(is.null("users")==F){
 38 | 
 39 |     if(is.vector(users)){users <- users %>%
 40 |       tibble::enframe(name=NULL, value = "username") %>%
 41 |       dplyr::mutate(role = ifelse(is.null(role)==T, NA, role),
 42 |                     dag = ifelse(is.null(dag)==T, NA, dag))}
 43 | 
 44 |     # Remove users-------------------------
 45 |     # if no remove column, add column
 46 |     user_remove <- NULL
 47 |     if(! "remove" %in% names(users)&is.null(remove)==F){users <- users %>% mutate(remove = remove)}
 48 | 
 49 |     # Ensure remove column logical / no NA
 50 |     if(is.logical(users$remove)==F){stop("Remove column must be logical (TRUE / FALSE)")}
 51 |     users <- users %>%
 52 |       dplyr::mutate(remove = ifelse(is.na(remove)==T, FALSE, remove)) %>%
 53 |       dplyr::mutate(role = ifelse(remove==T, NA, role),
 54 |                     dag = ifelse(remove==T, NA, dag))
 55 | 
 56 |     user_remove <- users %>% filter(remove==TRUE)
 57 | 
 58 |     if(nrow(user_remove)>0){
 59 |       user_delete <- user_remove %>%
 60 |         dplyr::mutate(n =  1:n()-1) %>%
 61 |         dplyr::mutate(out = paste0("'users[", n, "]'='",username, "'")) %>%
 62 |         dplyr::summarise(users = paste0(out, collapse = ",")) %>%
 63 |         dplyr::pull(users)
 64 | 
 65 |       eval(parse(text = paste0("httr::POST(url=redcap_project_uri,
 66 |                body = list('token'= redcap_project_token,
 67 |                            content='user',action='delete',",
 68 |                                user_delete,
 69 |                                ", returnFormat='json'), encode = 'form')")))}
 70 | 
 71 | 
 72 |     # Add / change users in roles------------------------
 73 |     user_amend <- users %>%
 74 |       filter(remove==F)
 75 | 
 76 |     user_add <- NULL
 77 |     if(remove==F&nrow(user_amend)>0){
 78 | 
 79 |       # if no role column:
 80 |       ## AND no role specified, error
 81 |       if(! "role" %in% names(user_amend)&is.null(role)==T){stop("No roles provided. If no role is to be assigned, use 'none'.")}
 82 | 
 83 |       ## BUT a role specified, make a column
 84 |       if(! "role" %in% names(user_amend)&is.null(role)==F){user_amend <- user_amend %>% mutate(role = role)}
 85 | 
 86 |       # if NA in role column:
 87 |       if(NA %in% user_amend$role){stop("No role provided for one or more usernames. If no role is to be assigned, use 'none'.")}
 88 | 
 89 |       # if role not recognised:
 90 |       role_missing <- user_amend %>% filter(! role %in% role_current)
 91 |       if(nrow(role_missing)>0){stop(paste0("One or more unrecognised roles: ", paste0(dag_missing$role, collapse = ", ")))}
 92 | 
 93 |       user_add <- user_amend %>%
 94 |         filter(! (username %in% user_current$all$username))
 95 | 
 96 |       if(nrow(user_add)>0){
 97 |         import_user <- user_add %>%
 98 |           dplyr::mutate(json = paste0('{"username":"', username, '"}')) %>%
 99 |           dplyr::summarise(json = paste0(json, collapse = ", ")) %>%
100 |           dplyr::mutate(json = paste0("[", json, "]")) %>%
101 |           dplyr::pull(json)
102 | 
103 |         httr::POST(url=redcap_project_uri,
104 |                    body = list("token"= redcap_project_token, content='user',
105 |                                data = import_user,
106 |                                action='import', format='json'))}
107 | 
108 |       # Assign role to user
109 |       import_role <- user_amend %>%
110 |         dplyr::mutate(unique_role_name = ifelse(role=="none", "", role)) %>%
111 |         dplyr::left_join(user_current$sum %>% select(-username,-n), by = c("unique_role_name"="role_name")) %>%
112 |         dplyr::mutate(role_id = ifelse(is.na(role_id), "", role_id)) %>%
113 |         dplyr::mutate(json = paste0('{"username":"', username, '","unique_role_name":"', role_id,'"}')) %>%
114 |         dplyr::summarise(json = paste0(json, collapse = ", ")) %>%
115 |         dplyr::mutate(json = paste0("[", json, "]")) %>%
116 |         dplyr::pull(json)
117 | 
118 |       httr::POST(url=redcap_project_uri,
119 |                  body = list("token"= redcap_project_token, content='userRoleMapping',
120 |                              data = import_role,
121 |                              action='import', format='json'))}
122 | 
123 | 
124 |     # Add / Assign DAG to user-------------------------
125 |     if(remove==F&nrow(user_amend)>0){
126 |       if(length(dag_current)>1&dag_current[1]!="none"){
127 | 
128 |         # if no DAG column:
129 |         ## AND no DAG specified, error
130 |         if(! "dag" %in% names(user_amend)&is.null(dag)==T){stop("No DAG provided. If no DAG is to be assigned, use 'none'.")}
131 | 
132 |         ## BUT a DAG specified, make a column
133 |         if(! "dag" %in% names(user_amend)&is.null(dag)==F){user_amend <- user_amend %>% mutate(dag = dag)}
134 | 
135 |         # if NA in DAG column:
136 |         if(NA %in% user_amend$dag){stop("No DAG provided for one or more usernames. If no DAG is to be assigned, use 'none'.")}
137 | 
138 |         # if dag not recognised:
139 |         dag_missing <- user_amend %>% filter(! dag %in% dag_current)
140 |         if(nrow(dag_missing)>0){stop(paste0("One or more unrecognised DAGs: ", paste0(dag_missing$dag, collapse = ", ")))}
141 | 
142 | 
143 |         # Assign DAG to user
144 |         import_dag <- user_amend %>%
145 |           dplyr::mutate(redcap_data_access_group = ifelse(dag=="none", "", dag)) %>%
146 |           dplyr::mutate(json = paste0('{"username":"', username, '","redcap_data_access_group":"', redcap_data_access_group,'"}')) %>%
147 |           dplyr::summarise(json = paste0(json, collapse = ", ")) %>%
148 |           dplyr::mutate(json = paste0("[", json, "]")) %>%
149 |           dplyr::pull(json)
150 | 
151 |         httr::POST(url=redcap_project_uri,
152 |                    body = list("token"= redcap_project_token, content='userDagMapping',
153 |                                data = import_dag,
154 |                                action='import', format='json'))}}
155 | 
156 | 
157 | 
158 |     # Compare changes -------------------------
159 |     user_update <- user_role(redcap_project_uri=redcap_project_uri,
160 |                              redcap_project_token = redcap_project_token)$all %>%
161 |       dplyr::mutate(type = "new",
162 |                     present = 1) %>%
163 |       select(type, username, "role" = role_name, "dag" = data_access_group,present) %>%
164 |       bind_rows(user_current$all %>%
165 |                   dplyr::mutate(type = "old",
166 |                                 present = 1) %>%
167 |                   select(type, username, "role" = role_name, "dag" = data_access_group,present)) %>%
168 |       dplyr::mutate(role = as.character(role),
169 |                     role = ifelse(is.na(role)==T, "none", role),
170 |                     dag = ifelse(is.na(dag)==T, "none", dag)) %>%
171 |       tidyr::pivot_wider(id_cols = "username", names_from = "type", values_from =c("role", "dag","present")) %>%
172 | 
173 |       dplyr::mutate(status_intended = case_when(username %in% user_add$username ~ "added",
174 |                                                 username %in% user_remove$username ~ "removed",
175 |                                                 TRUE ~ NA_character_))
176 | 
177 |     out <- users %>%
178 |       select(username,"role_intended" = "role",  "dag_intended" = "dag") %>%
179 |       full_join(user_update, by = "username") %>%
180 |       filter(username %in% c(users$username, user_add$username, user_remove$username)) %>%
181 |       dplyr::mutate(role_correct = ifelse(role_intended==role_new|(is.na(role_intended)==T&is.na(role_new)==T), "Yes", "No"),
182 |                     dag_correct = ifelse(dag_intended==dag_new|(is.na(dag_intended)==T&is.na(dag_new)==T), "Yes", "No"),
183 |                     status_correct = case_when(is.na(present_old)==F&is.na(present_new)==T&username %in% user_add$username ~ "Not added",
184 |                                                is.na(present_old)==T&is.na(present_new)==F&username %in% user_remove$username ~ "Not removed",
185 |                                                TRUE ~ "Yes")) %>%
186 | 
187 | 
188 |       dplyr::mutate(action = case_when(is.na(present_old)==F&is.na(present_new)==F ~ "unchanged",
189 |                                        is.na(present_old)==F&is.na(present_new)==T ~ "remove",
190 |                                        is.na(present_old)==T&is.na(present_new)==F ~ "add",
191 |                                        is.na(present_old)==T&is.na(present_new)==T ~ "absent"),
192 |                     role = case_when(action=="remove" ~ paste0(role_old, " --> NA"),
193 |                                      action=="add" ~ paste0("NA --> ",role_new),
194 |                                      action=="absent" ~ NA_character_,
195 |                                      action=="unchanged" & role_old==role_new ~ role_old,
196 |                                      action=="unchanged" & role_old!=role_new ~ paste0(role_old, " --> ",role_new)),
197 |                     dag = case_when(action=="remove" ~ paste0(dag_old, " --> NA"),
198 |                                     action=="add" ~ paste0("NA --> ",dag_new),
199 |                                     action=="absent" ~ NA_character_,
200 |                                     action=="unchanged" & dag_old==dag_new ~ dag_old,
201 |                                     action=="unchanged" & dag_old!=dag_new ~ paste0(dag_old, " --> ",dag_new)),
202 |                     action = case_when(action=="unchanged"&stringr::str_detect(role, "-->")==T&stringr::str_detect(dag, "-->")==T ~ "change (dag/role)",
203 |                                        action=="unchanged"&stringr::str_detect(role, "-->")==F&stringr::str_detect(dag, "-->")==T ~ "change (dag)",
204 |                                        action=="unchanged"&stringr::str_detect(role, "-->")==T&stringr::str_detect(dag, "-->")==F ~ "change (role)",
205 |                                        TRUE ~ action))
206 |     correct <- out %>%
207 |       filter(role_correct=="Yes"&dag_correct=="Yes"&status_correct=="Yes") %>%
208 |       select(username, action, role, dag)
209 | 
210 |     error = out %>%
211 |       filter(role_correct=="No"|dag_correct=="No"|status_correct!="Yes") %>%
212 |       dplyr::select(username, action, status_intended, role, role_intended, dag, dag_intended)
213 | 
214 |     return(list("correct" = correct, "error" = error))}
215 | 
216 |   if(is.null("users")==T){return(user_current)}}
217 | 
218 | 
219 | 
220 | 


--------------------------------------------------------------------------------
/R/user_role.R:
--------------------------------------------------------------------------------
 1 | # user_role-----------------------------
 2 | # Documentation
 3 | #' Identifies unique REDCap user roles
 4 | #' @description Used to count the number of unique roles (e.g. unique combinations of user rights) on the REDCap project. Note: this replaces the function of roles on the user rights page of the REDCap.
 5 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
 6 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
 7 | #' @param user_exclude Vector of usernames to be excluded (e.g. those with unique rights). Default is none (e.g. "").
 8 | #' @param remove_id Logical value to remove identifying details of users (e.g. name, email). Default is TRUE
 9 | #' @param show_rights  Logical value to show user rights allocated to each role. Default is FALSE
10 | #' @import dplyr
11 | #' @importFrom httr POST content
12 | #' @return Dataframe of all users by unique role
13 | #' @export
14 | 
15 | 
16 | # Function:
17 | user_role <- function(redcap_project_uri, redcap_project_token, user_exclude = NULL, remove_id = T, show_rights = F){
18 |   require(httr); require(dplyr)
19 | 
20 |   # PULL users
21 |   user_current <- httr::POST(url=redcap_project_uri,
22 |                              body = list("token"= redcap_project_token, content='user',
23 |                                          action='export', format='csv')) %>%
24 |     httr::content(show_col_types = FALSE)
25 | 
26 | 
27 |   user_rights <- user_current%>%
28 |     dplyr::select(-any_of(c("email","firstname","lastname","expiration",
29 |                             "data_access_group","data_access_group_id"))) %>%
30 |     dplyr::rename_with(function(x){paste0("right_", x)}) %>%
31 |     dplyr::rename("username" = "right_username")
32 | 
33 |   # Add user role (if used)
34 | 
35 |   pull_role <- httr::POST(url=redcap_project_uri,
36 |                           body = list("token"= redcap_project_token, content='userRole',
37 |                                       action='export', format='csv')) %>%
38 |     httr::content(show_col_types = FALSE)
39 | 
40 |   role = tibble::tibble("role_id" = NA, "role_name" = NA)
41 |   user_role = tibble::tibble("role_id" = NA, "role_name" = NA, "username" = NA)
42 | 
43 |   if(is.null(pull_role)==F){
44 |     role <- pull_role  %>%
45 |       dplyr::select( "role_name" = role_label,"role_id" = unique_role_name)
46 | 
47 |     user_role <- httr::POST(url=redcap_project_uri,
48 |                             body = list("token"= redcap_project_token, content='userRoleMapping',
49 |                                         action='export', format='csv')) %>%
50 |       httr::content(show_col_types = FALSE) %>%
51 |       dplyr::rename("role_id" = unique_role_name) %>%
52 |       full_join(role, by = "role_id") %>%
53 |       dplyr::select(role_name, role_id,username)}
54 | 
55 | 
56 |   all <- user_current %>%
57 |     dplyr::select(username:data_access_group_id)  %>%
58 |     filter(is.na(username)==F) %>%
59 |     left_join(user_role %>%
60 |                 dplyr::mutate(role_name = factor(role_name)) %>%
61 |                 dplyr::arrange(role_name), by = "username") %>%
62 |     dplyr::ungroup()
63 | 
64 | 
65 |   if(remove_id==T){
66 |     all <- all %>%
67 |       select(-all_of(c("email", "firstname", "lastname", "expiration")))}
68 | 
69 | 
70 |   sum <- all %>%
71 |     dplyr::left_join(user_rights, by = "username") %>%
72 |     dplyr::select(-any_of(c("email","firstname","lastname","expiration","data_access_group","data_access_group_id"))) %>%
73 |     group_by(across(c(-all_of("username")))) %>%
74 |     dplyr::summarise(n = n(),
75 |                      username = list(unique(username)),
76 |                      .groups = "drop") %>%
77 |     dplyr::full_join(role,by = c("role_name", "role_id")) %>%
78 |     dplyr::mutate(role_name = factor(role_name, levels = sort(role$role_name))) %>%
79 |     dplyr::arrange(role_name) %>%
80 |     dplyr::mutate(n = ifelse(is.na(n)==T, 0, n)) %>%
81 |     dplyr::select(role_name, role_id, n, username, everything())
82 | 
83 |   if(show_rights==F){sum <- sum %>% select(-starts_with("right_"))}
84 | 
85 | 
86 | 
87 |   return(list("sum" = sum, "all" = all))}
88 | 


--------------------------------------------------------------------------------
/R/user_summarise.R:
--------------------------------------------------------------------------------
 1 | # user_summarise--------------------------------
 2 | # Documentation
 3 | #' Summarise the REDCap user dataframe by group (data access group)
 4 | #' @description Group current REDCap project users by DAG (and role) to provide a summarised dataframe of users, names, and emails.
 5 | #' @param data Dataset previously exported of users authorized for a project (5 required columns: data_access_group, username, firstname, lastname, email)
 6 | #' @param redcap_project_uri URI (Uniform Resource Identifier) for the REDCap instance.
 7 | #' @param redcap_project_token API (Application Programming Interface) for the REDCap project.
 8 | #' @param use_ssl Logical value whether to verify the peer's SSL certificate should be evaluated during the API pull (default=TRUE)
 9 | #' @param user_exclude Vector of usernames to be excluded e.g. those with unique rights (default = NULL).
10 | #' @return Dataframe summarising the user dataframe by group (data access group), number of users, and username/fullname/emails (separated by ";").
11 | #' @import dplyr
12 | #' @importFrom httr POST content
13 | #' @importFrom readr read_csv
14 | #' @export
15 | 
16 | 
17 | # Function
18 | user_summarise <- function(redcap_project_uri, redcap_project_token,
19 |                            user_exclude = NULL, role_exclude = NULL){
20 | 
21 |   require(httr); require(dplyr); require(stringr); require(tidyr)
22 | 
23 |   user <- collaborator::user_role(redcap_project_uri = redcap_project_uri,
24 |                             redcap_project_token = redcap_project_token,
25 |                     remove_id = F)$all %>%
26 |     dplyr::filter(! (username %in% user_exclude)) %>%
27 |     dplyr::filter(! (role_name %in% role_exclude)) %>%
28 | 
29 |     # select relevant columns
30 |     dplyr::select(data_access_group, username, firstname, lastname, email)
31 | 
32 |   output <- user %>%
33 |     # must have an email and data_access_group
34 |     dplyr::filter(is.na(email)==F) %>%
35 | 
36 |     # ensure no special characters
37 |     dplyr::mutate(across(everything(), function(x){iconv(x, to ="ASCII//TRANSLIT")}),
38 |                   across(contains("name"), function(x){stringr::str_replace_all(x, ";", "")})) %>%
39 | 
40 |     #summarise by DAG
41 |     dplyr::group_by(data_access_group) %>%
42 |     dplyr::summarise(user_n = n(),
43 |                      user_usernames = paste(paste0(username), collapse = "; "),
44 |                      user_fullnames = paste(paste0(firstname, " ", lastname), collapse = "; "),
45 |                      user_firstnames = paste(paste0(firstname), collapse = "; "),
46 |                      user_lastnames = paste(paste0(lastname), collapse = "; "),
47 |                      user_email = paste0(email, collapse = "; "), .groups = "drop")
48 | 
49 |   return(output)}
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | collaboratoR
 2 | ==========
 3 | 
 4 | <img src="vignettes/figures/collaborator_logo.png" align="right" width="159" height="174"/>
 5 | 
 6 | The `collaboratoR` package provides functions which help facilitate administration of multi-centre research using R and the [REDCap](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2700030/) (Research Electronic Data Capture) application. These functions have been developed and utilised within several collaborative research projects from:
 7 | 
 8 | - [**Student Audit and Research in Surgery (STARSurg)**](https://www.starsurg.org)
 9 |   - [Outcomes After Kidney injury in Surgery (OAKS)](https://starsurg.org/oaks-2/)
10 |   - [REspiratory COmplications after abdomiNal Surgery (RECON)](https://starsurg.org/recon-project-18-19/)
11 | <br><br>
12 |   
13 | - [**EuroSurg**](http://www.eurosurg.org)
14 |   - [Ileus Management International (IMAGINE)](http://eurosurg.org/imagine-hub/)
15 |   - [Management of COMPlicAted intra-abdominal collectionS after colorectal Surgery (COMPASS)](http://eurosurg.org/compass-study-hub/)
16 | <br><br>
17 | 
18 | - [**GlobalSurg**](http://globalsurg.org/)
19 |   - [GlobalSurg 3: Quality and outcomes after global cancer surgery](http://globalsurg.org/projects/cohort-studies/globalsurg-3/)
20 |   - [CovidSurg](https://globalsurg.org/covidsurg/)
21 | <br><br>
22 | 
23 | - [**West Midlands Research Collaborative (WMRC)**](http://www.wmresearch.org.uk)
24 |   - [Oesophago-Gastric Anastomosis Audit (OGAA)](https://ogaa.org.uk/)
25 | <br><br>
26 | 
27 | All functions have been developed with the aim of being applicable to a broad range of REDCap projects. Any suggestions for further functions are welcome, however these would need to be aligned with this aim.
28 | 
29 | If the CollaboratoR package is used within your project, please e-mail the authors [here](mailto:v1kmcle6@ed.ac.uk) so the list above can be updated. Please also consider citing the package in any resultant publications with the suggested format: "McLean KA, Ots R, Drake TM, Harrison EM. *CollaboratoR: Scalable multi-centre research using R and REDCap*. 2019. Avaliable at: [https://github.com/kamclean/collaborator](https://github.com/kamclean/collaborator)."
30 | 
31 | Installation and Documentation
32 | ------------------------------
33 | 
34 | You can install `collaborator` from github with:
35 | 
36 | ``` r
37 | # install.packages("remotes")
38 | remotes::install_github("kamclean/collaborator")
39 | ```
40 | 
41 | It is recommended that this package is used together with `tidyverse` packages.
42 | 
43 | Furthermore, functions to faciliate analysis of data collected in R can be found in the [finalfit](https://github.com/ewenharrison/finalfit/blob/master/README.md) package.
44 | 
45 | Vignettes
46 | ---------
47 | [Generating Redcap Summary Data](https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_summary.md)
48 | 
49 | [Redcap User Management: 1. Explore Current Users](https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_user_1_explore.md)
50 | 
51 | [Redcap User Management: 2. Automatically Assign User Rights](https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_user_2_assign.md)
52 | 
53 | [Generating Authorship Lists](https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_authors.md)
54 | 
55 | [Generating Missing Data Reports](https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_missing.md)
56 | 
57 | [Generating a Simple, Easily-Shareable Data Dictionary](https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_data_dict.md)
58 | 
59 | [Mailmerge in R with personalised attachments](https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_email.md)
60 | 


--------------------------------------------------------------------------------
/collaborator.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/data/example_data_dict.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kamclean/collaborator/c302ab59695f2f781ea6810872abc69c4c41bf21/data/example_data_dict.rda


--------------------------------------------------------------------------------
/data/example_df_user.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kamclean/collaborator/c302ab59695f2f781ea6810872abc69c4c41bf21/data/example_df_user.rda


--------------------------------------------------------------------------------
/data/example_report_author.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kamclean/collaborator/c302ab59695f2f781ea6810872abc69c4c41bf21/data/example_report_author.rda


--------------------------------------------------------------------------------
/man/author_name.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/author_name.R
 3 | \name{author_name}
 4 | \alias{author_name}
 5 | \title{Pull first name(s) and last name for a given list of orcid}
 6 | \usage{
 7 | author_name(
 8 |   data,
 9 |   first_name = "first_name",
10 |   last_name = "last_name",
11 |   initial = T,
12 |   position = "right",
13 |   initial_max = 3
14 | )
15 | }
16 | \arguments{
17 | \item{data}{datafame containing a vectors of the author name (split into first_name and last_name)}
18 | 
19 | \item{first_name}{Column name of vector containing the first and middle name(s) (default = "first_name")}
20 | 
21 | \item{last_name}{Column name of vector containing the last or family name (default = "last_name")}
22 | 
23 | \item{initial}{Should the first / middle name(s) be converted to initial (default = TRUE)}
24 | 
25 | \item{position}{initial to "left" or "right" of last name (default = "right")}
26 | 
27 | \item{initial_max}{Maximum number of digits (default = 3)}
28 | }
29 | \value{
30 | Vector of the combined name composing the full author name in the requested format.
31 | }
32 | \description{
33 | Pull and format first name(s) and last name for a given list of orcid
34 | }
35 | 


--------------------------------------------------------------------------------
/man/collapse01.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/collapse_01.R
 3 | \name{collapse01}
 4 | \alias{collapse01}
 5 | \title{Collapse a group of binary variables (e.g. those generated by checkbox variables in REDCap) into summary columns.}
 6 | \usage{
 7 | collapse01(
 8 |   df,
 9 |   cols,
10 |   prefix = NULL,
11 |   suffix = NULL,
12 |   output = c("list", "yesno", "n"),
13 |   remove = TRUE,
14 |   binary0 = c(0, "0", "No", "no"),
15 |   binary1 = c(1, "1", "Yes", "yes")
16 | )
17 | }
18 | \arguments{
19 | \item{df}{Dataframe.}
20 | 
21 | \item{cols}{List of columnn names of binary variables desired to be collapsed.}
22 | 
23 | \item{prefix}{String to add to the start of all summary columns names.}
24 | 
25 | \item{suffix}{String to add to the end of all summary columns names.}
26 | 
27 | \item{output}{List of desired outputs: yesno (any values selected), n (number of values selected) and list (list of column names of all values selected).}
28 | 
29 | \item{remove}{Logical value to remove columns supplied to the "cols" parameter}
30 | 
31 | \item{binary0}{List of all values corresponding to "0" (No) in the binary variable (default = 0 or no)}
32 | 
33 | \item{binary1}{List of all values corresponding to "1" (Yes) in the binary variable (default = 1 or yes).}
34 | }
35 | \value{
36 | Dataframe with up to 3 additional columns: yesno (any values selected), n (number of values selected) and list (list of column names of all values selected).
37 | }
38 | \description{
39 | Collapse a group of binary variables (e.g. those generated by checkbox variables in REDCap) into summary columns.
40 | }
41 | 


--------------------------------------------------------------------------------
/man/dag_manage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dag_manage.R
 3 | \name{dag_manage}
 4 | \alias{dag_manage}
 5 | \title{Generate REDCap summary data.}
 6 | \usage{
 7 | dag_manage(
 8 |   redcap_project_uri,
 9 |   redcap_project_token,
10 |   import = NULL,
11 |   remove = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
16 | 
17 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
18 | 
19 | \item{import}{A list of new DAGs to import into the project (Default = NULL).}
20 | 
21 | \item{remove}{A list of current DAGs to delete in the project (Default = NULL).}
22 | }
23 | \value{
24 | A dataframe of DAGs specifiying those which are new, deleted, or unchanged (-).
25 | }
26 | \description{
27 | Used to generate high quality summary data for REDCap projects at overall, and DAG-specific level.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/data_dict.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_dict.R
 3 | \name{data_dict}
 4 | \alias{data_dict}
 5 | \title{Generate a data dictionary.}
 6 | \usage{
 7 | data_dict(df, var_include = NULL, var_exclude = NULL, label = FALSE)
 8 | }
 9 | \arguments{
10 | \item{df}{Dataframe.}
11 | 
12 | \item{var_include}{Vector of names of variables that are desired to be included in the data dictionary (default: NULL).}
13 | 
14 | \item{var_exclude}{Vector of names of variables that are desired to be excluded from the data dictionary (default: NULL).}
15 | 
16 | \item{label}{Logical value (default = FALSE). If TRUE, then include the variable label for each variable (if assigned).}
17 | }
18 | \value{
19 | Dataframe with 4 columns: variable (variable name), class (variable class), na_pct (the percentage of data which is NA for that variable), and value (an appropriate summary for the variable class).
20 | }
21 | \description{
22 | Used to generate an easily sharable data dictionary for an R dataframe. This supports the following classes: numeric, integer, logical, Date, character, String, factor, ordered.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/example_data_dict.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/example_data.R
 3 | \docType{data}
 4 | \name{example_data_dict}
 5 | \alias{example_data_dict}
 6 | \title{Example dataset of surgical patients}
 7 | \format{
 8 | A data frame with 20 rows and 11 variables.
 9 | }
10 | \usage{
11 | data(example_data_dict)
12 | }
13 | \description{
14 | Example dataset to be used within the \href{https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_data_dict.md}{Generating a Simple, Easily-Shareable Data Dictionary} vignette (all data randomly generated).
15 | }
16 | \keyword{data}
17 | 


--------------------------------------------------------------------------------
/man/example_df_user.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/example_data.R
 3 | \docType{data}
 4 | \name{example_df_user}
 5 | \alias{example_df_user}
 6 | \title{Example dataset of REDCap users}
 7 | \format{
 8 | A data frame with 34 rows and 2 variables.
 9 | }
10 | \usage{
11 | data(example_df_user)
12 | }
13 | \description{
14 | Example dataset to be used within the \href{https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_user_1_explore.md}{Redcap User Management: 1. Explore Current Users} vignette (all data randomly generated).
15 | }
16 | \keyword{data}
17 | 


--------------------------------------------------------------------------------
/man/example_report_author.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/example_data.R
 3 | \docType{data}
 4 | \name{example_report_author}
 5 | \alias{example_report_author}
 6 | \title{Example dataset of authors}
 7 | \format{
 8 | A data frame with 100 rows and 3 variables.
 9 | }
10 | \usage{
11 | data(example_report_author)
12 | }
13 | \description{
14 | Example dataset to be used within the \href{https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_authors.md}{Generating Authorship Lists} vignette (all data randomly generated).
15 | }
16 | \keyword{data}
17 | 


--------------------------------------------------------------------------------
/man/group2csv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group2csv.R
 3 | \name{group2csv}
 4 | \alias{group2csv}
 5 | \title{Split a tibble/dataframe into CSV by "group" variable}
 6 | \usage{
 7 | group2csv(
 8 |   data,
 9 |   group,
10 |   subfolder = "folder_csv",
11 |   file_prefix = "",
12 |   file_suffix = ""
13 | )
14 | }
15 | \arguments{
16 | \item{data}{Dataframe with at least 1 column - corresponding to a "group".}
17 | 
18 | \item{group}{Grouping variable (must be unique values) who will recieve unique email.}
19 | 
20 | \item{subfolder}{Folder within working directory (e.g. string entered into here::here()) where CSV will be stored. Default = "folder_csv".}
21 | 
22 | \item{file_prefix}{String to be prefixed to "group" when naming CSV file.}
23 | 
24 | \item{file_suffix}{String to be suffixed to "group" when naming CSV file.}
25 | }
26 | \value{
27 | Dataframe of group AND csv file path ("file").
28 | }
29 | \description{
30 | Split a tibble/dataframe by "group" variable, then save in a subfolder as CSV.
31 | }
32 | 


--------------------------------------------------------------------------------
/man/orcid_name.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/orcid_name.R
 3 | \name{orcid_name}
 4 | \alias{orcid_name}
 5 | \title{Pull first name(s) and last name for a given list of orcid}
 6 | \usage{
 7 | orcid_name(data, orcid = "orcid", reason = FALSE, na.rm = FALSE)
 8 | }
 9 | \arguments{
10 | \item{data}{datafame containing a vector of ORCID (XXXX-XXXX-XXXX-XXXX format)}
11 | 
12 | \item{orcid}{Column name of vector containing ORCID (default = "orcid")}
13 | 
14 | \item{reason}{Logical value to determine whether output should include reasons for NA values (default = FALSE) or vector of ORCID (TRUE).}
15 | 
16 | \item{na.rm}{Remove NA (invalid ORCID) from output (default = TRUE)}
17 | }
18 | \value{
19 | Dataframe with 5 mandatory columns: orcid, full name, first name(s), last name, publication name.
20 | }
21 | \description{
22 | Pull and format first name(s) and last name for a given list of orcid
23 | }
24 | 


--------------------------------------------------------------------------------
/man/orcid_valid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/orcid_valid.R
 3 | \name{orcid_valid}
 4 | \alias{orcid_valid}
 5 | \title{Validate vector of ORCID}
 6 | \usage{
 7 | orcid_valid(data, orcid = "orcid", reason = FALSE, na.rm = FALSE)
 8 | }
 9 | \arguments{
10 | \item{data}{datafame containing a vector of ORCID}
11 | 
12 | \item{orcid}{Column name of vector containing ORCID (default = "orcid")}
13 | 
14 | \item{reason}{Logical value to determine whether output should include reasons for validity (default = FALSE) or vector of ORCID (TRUE).}
15 | 
16 | \item{na.rm}{Remove NA (invalid ORCID) from output}
17 | }
18 | \value{
19 | Vector of orcid (reason = FALSE) or tibble with columns specifying the validation checks failed by the ORCID ("check_" columns)
20 | }
21 | \description{
22 | Validate vector of ORCID based number of digits / format / checksum.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/redcap_compare.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/redcap_compare.R
 3 | \name{redcap_compare}
 4 | \alias{redcap_compare}
 5 | \title{Compare multiple REDCap projects to determine discrepancies in structure or user rights.}
 6 | \usage{
 7 | redcap_compare(redcap_project_uri, redcap_token_list, comparison)
 8 | }
 9 | \arguments{
10 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance (must all be on same REDCap instance)}
11 | 
12 | \item{redcap_token_list}{List of API (Application Programming Interface) for the REDCap projects.}
13 | 
14 | \item{comparison}{What should be compared - project structure ("metadata") or user roles ("role").}
15 | }
16 | \value{
17 | Nested tibble of the full comparison across projects ("full") and the specific discrepancies ("discrepancies").
18 | }
19 | \description{
20 | Used to compare multiple REDCap projects to determine discrepancies in structure or user rights.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/redcap_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/redcap_data.R
 3 | \name{redcap_data}
 4 | \alias{redcap_data}
 5 | \title{Export REDCap dataset and label using metadata}
 6 | \usage{
 7 | redcap_data(
 8 |   redcap_project_uri,
 9 |   redcap_project_token,
10 |   forms = "all",
11 |   report_id = NULL,
12 |   filterlogic = NULL,
13 |   checkbox_value = "label",
14 |   repeat_format = "long",
15 |   include_original = F,
16 |   include_complete = F,
17 |   include_surveyfield = F,
18 |   include_label = F
19 | )
20 | }
21 | \arguments{
22 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
23 | 
24 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
25 | 
26 | \item{forms}{A list of forms wanted to be extracted, rather the the full dataset (default = "all"). This MUST align with the form_name as per redcap_metadata().}
27 | 
28 | \item{report_id}{A specific REDCap report ID wanted to be extracted, rather the the full dataset (default = NULL).}
29 | 
30 | \item{filterlogic}{Filter wished to be applied to the redcap project prior to pulling. MUST be in REDCap format not R format (default = NULL).}
31 | 
32 | \item{checkbox_value}{Determine if output checkbox variables should be unchanged from the REDCap record export ("raw") or labelled ("label"). Default = "raw".}
33 | 
34 | \item{repeat_format}{The format the repeating instrument data should be provided in. Options include "long" (default), "wide" (each instance a separate column), or "list" (nested instances).}
35 | 
36 | \item{include_original}{Logical value to determine whether original data should be provided too (default = FALSE).}
37 | 
38 | \item{include_complete}{Logical value to determine whether columns specifiying if forms are complete should be retained.}
39 | 
40 | \item{include_surveyfield}{Logical value to determine whether survey fields are extracted (e.g. timestamps)}
41 | 
42 | \item{include_label}{Logical value to determine whether ff_label should be used to apply the (human readable) label from REDCap to columns}
43 | }
44 | \value{
45 | Three nested tibbles: (1) "exported": REDcap record export (unchanged) (2) labelled": REDcap record export with variables classified and columns labelled as specified via column_name and column_attr (3) "metadata": Cleaned metadata file for the REDCap dataset.
46 | }
47 | \description{
48 | Export the REDCap dataset, and use the metadata to classify the variables and label the columns.
49 | }
50 | 


--------------------------------------------------------------------------------
/man/redcap_format_repeat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/redcap_format_repeat.R
 3 | \name{redcap_format_repeat}
 4 | \alias{redcap_format_repeat}
 5 | \title{Change structure of repeating data}
 6 | \usage{
 7 | redcap_format_repeat(data, format = "long")
 8 | }
 9 | \arguments{
10 | \item{data}{Output from redcap_data$data}
11 | 
12 | \item{format}{The format the repeating instrument data should be provided in. Options include "long" (default), "wide" (each instance a separate column), or "list" (nested instances).}
13 | }
14 | \value{
15 | Dataframe
16 | }
17 | \description{
18 | Change structure of repeating data from redcap_data from long to either list or wide.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/redcap_log.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/redcap_log.R
 3 | \name{redcap_log}
 4 | \alias{redcap_log}
 5 | \title{Export the redcap log}
 6 | \usage{
 7 | redcap_log(
 8 |   redcap_project_uri,
 9 |   redcap_project_token,
10 |   date_start = NULL,
11 |   date_end = NULL,
12 |   item = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
17 | 
18 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
19 | 
20 | \item{date_start}{Limit to the start date extracted in the format of YYYY-MM-DD (default = NULL)}
21 | 
22 | \item{date_end}{Limit to the end date extracted in the format of YYYY-MM-DD (default = NULL)}
23 | 
24 | \item{item}{A list of all types of events wanted to be exported (default = NULL aka all). These can include "record", "user", "page_view","lock_record", "manage", "record_add", "record_edit", "record_delete", "export".}
25 | }
26 | \value{
27 | Logging record of the specified events
28 | }
29 | \description{
30 | Export the redcap log to gain insight into redcap changes over time
31 | }
32 | 


--------------------------------------------------------------------------------
/man/redcap_metadata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/redcap_metadata.R
 3 | \name{redcap_metadata}
 4 | \alias{redcap_metadata}
 5 | \title{Export REDCap metadata (with individual checkbox variables if present) and variable class in R.}
 6 | \usage{
 7 | redcap_metadata(redcap_project_uri, redcap_project_token, descriptive = FALSE)
 8 | }
 9 | \arguments{
10 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
11 | 
12 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
13 | 
14 | \item{descriptive}{Logical value whether to include descriptive fields within the dataset (default = FALSE)}
15 | }
16 | \value{
17 | Tibble of REDCap project metadata (with individual checkbox variables if present) and variable class in R.
18 | }
19 | \description{
20 | Used to generate high quality summary data for REDCap projects at overall, and DAG-specific level.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/redcap_sum.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/redcap_sum.R
 3 | \name{redcap_sum}
 4 | \alias{redcap_sum}
 5 | \title{Generate REDCap summary data.}
 6 | \usage{
 7 | redcap_sum(
 8 |   redcap_project_uri = NULL,
 9 |   redcap_project_token = NULL,
10 |   centre_sum = TRUE,
11 |   top = 10,
12 |   var_include = NULL,
13 |   var_exclude = NULL,
14 |   user_include = NULL,
15 |   user_exclude = NULL,
16 |   dag_exclude = NULL,
17 |   dag_include = NULL,
18 |   record_include = NULL,
19 |   record_exclude = NULL
20 | )
21 | }
22 | \arguments{
23 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
24 | 
25 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
26 | 
27 | \item{centre_sum}{Logical value to determine whether data access group-level summaries will be produced (Default = TRUE).}
28 | 
29 | \item{top}{When centre_sum = TRUE, defines output of the number of centres with the most records uploaded (default is top 10).}
30 | 
31 | \item{var_include}{Vector of names of variables that are desired to be specifically used to assess data completness (alternate method from using "var_exclude").}
32 | 
33 | \item{var_exclude}{Vector of names of variables that are desired to be excluded from assessment of data completness (any NA value will be counted as incomplete).}
34 | 
35 | \item{user_include}{Vector of redcap usernames that are desired to be included in the user count (note all users not assigned to a DAG will automatically be excluded).}
36 | 
37 | \item{user_exclude}{Vector of redcap usernames that are desired to be excluded from the user count (note all users not assigned to a DAG will automatically be excluded).}
38 | 
39 | \item{dag_exclude}{Vector of redcap data access group names that are desired to be excluded from the record count.}
40 | 
41 | \item{dag_include}{Vector of redcap data access group names that are desired to be included in the record count.}
42 | 
43 | \item{record_include}{Vector of redcap record_id that are desired to be included in the record count.}
44 | 
45 | \item{record_exclude}{Vector of redcap record_id that are desired to be excluded from the record count.}
46 | }
47 | \value{
48 | Nested dataframes of (i) overall summary statistics for the project ("sum_overall") (ii). DAG-specific summary statistics for the project ("dag_all") (iii). DAGs with no data uploaded, but users assigned ("dag_nodata") (iv). DAGs with <100% completeness ("dag_incom") (v). The top n recruiting centres ("dag_top").
49 | }
50 | \description{
51 | Used to generate high quality summary data for REDCap projects at overall, and DAG-specific level.
52 | }
53 | 


--------------------------------------------------------------------------------
/man/report_auth.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/report_auth.R
 3 | \name{report_auth}
 4 | \alias{report_auth}
 5 | \title{Generate a formatted authorship list.}
 6 | \usage{
 7 | report_auth(
 8 |   df,
 9 |   name,
10 |   group = NULL,
11 |   subdivision = NULL,
12 |   path = NULL,
13 |   name_sep = ", ",
14 |   group_brachet = "()",
15 |   group_sep = "; "
16 | )
17 | }
18 | \arguments{
19 | \item{df}{Dataframe with authors in rows.}
20 | 
21 | \item{group}{Column name of a variable in the dataframe by which to group authorship (the default is NULL).}
22 | 
23 | \item{subdivision}{Column name of an additional variable in the dataframe by which to subdivide authorship (the default is NULL).}
24 | 
25 | \item{path}{Path or connection to write to as .txt file.}
26 | 
27 | \item{name_sep}{Character(s) which will separate names within the group (the default is ", ").}
28 | 
29 | \item{group_brachet}{Character(s) bracheting the group (the default is "()").}
30 | 
31 | \item{group_sep}{Character(s) which will separate the groups (the default is "; ").}
32 | }
33 | \value{
34 | Returns a formated string (and an optional .txt file specified using path)
35 | }
36 | \description{
37 | Used to generate a formatted authorship list for all users by group (the group the user belongs to e.g. the centre which they participated at). Optional subdivisions can be created to stratify users and groups. This could be a role (e.g. collaborator and validator) or region/country.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/report_miss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/report_miss.R
 3 | \name{report_miss}
 4 | \alias{report_miss}
 5 | \title{Generate a missing data report for a REDCap project.}
 6 | \usage{
 7 | report_miss(
 8 |   redcap_project_uri,
 9 |   redcap_project_token,
10 |   missing_threshold = 0.05,
11 |   var_include = NULL,
12 |   var_exclude = NULL,
13 |   record_exclude = NULL,
14 |   record_include = NULL,
15 |   dag_include = NULL,
16 |   dag_exclude = NULL,
17 |   record_id = "record_id"
18 | )
19 | }
20 | \arguments{
21 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
22 | 
23 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
24 | 
25 | \item{missing_threshold}{The overall proportion of missing data that is acceptable (default = 0.05).}
26 | 
27 | \item{var_include}{Vector of names of variables that are desired to be specifically used to assess data completness (alternate method from using "var_exclude").}
28 | 
29 | \item{var_exclude}{Vector of names of variables that are desired to be excluded from assessment of data completness (any NA value will be counted as incomplete).}
30 | 
31 | \item{record_exclude}{Vector of redcap record_id that are desired to be excluded from the record count.}
32 | 
33 | \item{record_include}{Vector of redcap record_id that are desired to be included in the record count.}
34 | 
35 | \item{dag_include}{Vector of redcap data access group names that are desired to be included in the record count.}
36 | 
37 | \item{dag_exclude}{Vector of redcap data access group names that are desired to be excluded from the record count.}
38 | 
39 | \item{record_id}{String of variable name which fufills the record_id role (default = "record_id")}
40 | }
41 | \value{
42 | Nested dataframe with a summary of missing data at the redcap_data_access_group level and the record level.
43 | }
44 | \description{
45 | Used to generate a report of record-level + redcap_data_access_group-level missing data within a REDCap project (which accounts for branching logic in the dataframe).
46 | }
47 | 


--------------------------------------------------------------------------------
/man/user_import.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/user_import.R
 3 | \name{user_import}
 4 | \alias{user_import}
 5 | \title{Generate a csv file to upload new user accounts to REDCap}
 6 | \usage{
 7 | user_import(
 8 |   df,
 9 |   username,
10 |   first_name,
11 |   last_name,
12 |   email,
13 |   institution = NULL,
14 |   sponser = NULL,
15 |   expiration = NULL,
16 |   comments = NULL,
17 |   path = NULL
18 | )
19 | }
20 | \arguments{
21 | \item{df}{Dataframe of at least 4 mandatory columns (corresponding to: username, first name, last name, and email address) and 4 optional columns (corresponding to: institution, sponser, expiration, comments).}
22 | 
23 | \item{username}{Column name (Mandatory) which corresponds to "Username".}
24 | 
25 | \item{first_name}{Column name (Mandatory) which corresponds to "First name".}
26 | 
27 | \item{last_name}{Column name (Mandatory) which corresponds to "Last name".}
28 | 
29 | \item{email}{Column name (Mandatory) which corresponds to "Email address".}
30 | 
31 | \item{institution}{Column name (Optional/Recommended) which corresponds to "Institution ID". Can be used to record the data_access_group / centre of the user.}
32 | 
33 | \item{sponser}{Column name (Optional) which corresponds to "Sponsor username".}
34 | 
35 | \item{expiration}{Column name (Optional) which corresponds to "Expiration". Must be in YYYY-MM-DD HH:MM or MM/DD/YYYY HH:MM format.}
36 | 
37 | \item{comments}{Column name (Optional) which corresponds to "Comments".}
38 | 
39 | \item{path}{Path or connection to write to as .csv file.}
40 | }
41 | \value{
42 | Returns a dataframe formated for REDCap user import (and an optional CSV file specified using path)
43 | }
44 | \description{
45 | Used to generate a csv file that can be used to upload new user accounts to REDCap directly (via control centre). This requires a dataframe of at least 4 mandatory columns (corresponding to: username, first name, last name, and email address) and 4 optional columns (corresponding to: institution, sponser, expiration, comments). All optional columns will be blank unless otherwise specified.
46 | }
47 | 


--------------------------------------------------------------------------------
/man/user_manage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/user_manage.R
 3 | \name{user_manage}
 4 | \alias{user_manage}
 5 | \title{Used to manage REDCap project users}
 6 | \usage{
 7 | user_manage(
 8 |   redcap_project_uri,
 9 |   redcap_project_token,
10 |   users = NULL,
11 |   role = NULL,
12 |   dag = NULL,
13 |   remove = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
18 | 
19 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
20 | 
21 | \item{users}{Vector of usernames or a dataframe containing at least 1 column ("username"). Further columns specifying individual patients to be removed ("remove") or assigned to roles ("role") or data access groups ("dag") can be added.}
22 | 
23 | \item{role}{String of a single role ID, role name or username of user with the desired user rights to be applied to ALL users specified in "users"  (use a column in users if wanting to be different for each user). Must specify "none" if no assignment of role.}
24 | 
25 | \item{dag}{String of a unique DAG to ALL users specified in "users" will be assigned to (use a column in users if wanting to be different for each user). Must specify "none" if no assignment of DAG.}
26 | 
27 | \item{remove}{Logical value indicating if ALL users specified in "users" are to be removed  (use a column in users if wanting to be different for each user). Default is FALSE (no users to be removed).}
28 | }
29 | \value{
30 | Nested tibbles of the outcome (1) "correct" users with the correct allocation specified (2) "error" users with an allocation unable to be completed. User acccounts are still required to be entered manually.
31 | }
32 | \description{
33 | Used to manage users - whether to change users present, or their specific roles and data access groups on a redcap project.
34 | }
35 | 


--------------------------------------------------------------------------------
/man/user_role.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/user_role.R
 3 | \name{user_role}
 4 | \alias{user_role}
 5 | \title{Identifies unique REDCap user roles}
 6 | \usage{
 7 | user_role(
 8 |   redcap_project_uri,
 9 |   redcap_project_token,
10 |   user_exclude = NULL,
11 |   remove_id = T,
12 |   show_rights = F
13 | )
14 | }
15 | \arguments{
16 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
17 | 
18 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
19 | 
20 | \item{user_exclude}{Vector of usernames to be excluded (e.g. those with unique rights). Default is none (e.g. "").}
21 | 
22 | \item{remove_id}{Logical value to remove identifying details of users (e.g. name, email). Default is TRUE}
23 | 
24 | \item{show_rights}{Logical value to show user rights allocated to each role. Default is FALSE}
25 | }
26 | \value{
27 | Dataframe of all users by unique role
28 | }
29 | \description{
30 | Used to count the number of unique roles (e.g. unique combinations of user rights) on the REDCap project. Note: this replaces the function of roles on the user rights page of the REDCap.
31 | }
32 | 


--------------------------------------------------------------------------------
/man/user_summarise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/user_summarise.R
 3 | \name{user_summarise}
 4 | \alias{user_summarise}
 5 | \title{Summarise the REDCap user dataframe by group (data access group)}
 6 | \usage{
 7 | user_summarise(
 8 |   redcap_project_uri,
 9 |   redcap_project_token,
10 |   user_exclude = NULL,
11 |   role_exclude = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{redcap_project_uri}{URI (Uniform Resource Identifier) for the REDCap instance.}
16 | 
17 | \item{redcap_project_token}{API (Application Programming Interface) for the REDCap project.}
18 | 
19 | \item{user_exclude}{Vector of usernames to be excluded e.g. those with unique rights (default = NULL).}
20 | 
21 | \item{data}{Dataset previously exported of users authorized for a project (5 required columns: data_access_group, username, firstname, lastname, email)}
22 | 
23 | \item{use_ssl}{Logical value whether to verify the peer's SSL certificate should be evaluated during the API pull (default=TRUE)}
24 | }
25 | \value{
26 | Dataframe summarising the user dataframe by group (data access group), number of users, and username/fullname/emails (separated by ";").
27 | }
28 | \description{
29 | Group current REDCap project users by DAG (and role) to provide a summarised dataframe of users, names, and emails.
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | Sys.setenv("R_TESTS" = "") # https://github.com/r-lib/testthat/issues/86
2 | library(testthat)
3 | library(collaborator)
4 | 
5 | test_check("collaborator")
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test_collaborator_all.R:
--------------------------------------------------------------------------------
 1 | # test_collaborator_all-------------------
 2 | library(collaborator)
 3 | 
 4 | 
 5 | # Test data_dict ----------------------
 6 | testthat::test_that("data_dict excludes variables",
 7 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict, var_exclude = c("id_num","sex"))$variable[8]),
 8 |                                             "op_procedure_code")})
 9 | 
10 | testthat::test_that("data_dict check character",
11 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict)$values[1]),
12 |                                             "20 Unique: 1, 10, 2, 3, 4, 5, 6, 7, 8, 9")})
13 | 
14 | testthat::test_that("data_dict check numeric",
15 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict)$values[2]),
16 |                                             "Mean: 50.7 Median: 49.5 Range: 22.0 to 79.0")})
17 | 
18 | testthat::test_that("data_dict check factor",
19 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict)$values[3]),
20 |                                             "2 Levels: female, male")})
21 | 
22 | testthat::test_that("data_dict check Orderedfactor",
23 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict)$values[4]),
24 |                                             "5 Levels: I, II, III, IV, V")})
25 | 
26 | testthat::test_that("data_dict check logical",
27 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict)$values[5]),
28 |                                             "TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE")})
29 | 
30 | testthat::test_that("data_dict check Date",
31 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict)$values[7]),
32 |                                             "Range:  2018-07-29 to 2018-08-11")})
33 | 
34 | testthat::test_that("data_dict check NA calculation",
35 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict)$na_pct[11]),
36 |                                             "45.0%")})
37 | 
38 | testthat::test_that("data_dict check NA calculation",
39 |                     {testthat::expect_equal(as.character(data_dict(collaborator::example_data_dict)$na_pct[1]),
40 |                                             " 0.0%")})
41 | 


--------------------------------------------------------------------------------
/vignettes/figures/collaborator_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kamclean/collaborator/c302ab59695f2f781ea6810872abc69c4c41bf21/vignettes/figures/collaborator_logo.png


--------------------------------------------------------------------------------
/vignettes/figures/rmd_email.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kamclean/collaborator/c302ab59695f2f781ea6810872abc69c4c41bf21/vignettes/figures/rmd_email.png


--------------------------------------------------------------------------------
/vignettes/render.R:
--------------------------------------------------------------------------------
1 | rmarkdown::render("vignettes/vignette_authors.Rmd")
2 | 


--------------------------------------------------------------------------------
/vignettes/vignette_authors.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Generating Authorship Lists"
  3 | author: "Kenneth McLean"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   md_document: default
  7 |   html_document: default
  8 | always_allow_html: true
  9 | vignette: >
 10 |   %\VignetteIndexEntry{predict}
 11 |   %\VignetteEngine{knitr::rmarkdown}
 12 |   %\VignetteEncoding{UTF-8}
 13 | ---
 14 | 
 15 | ```{r setup, include = FALSE}
 16 | knitr::opts_chunk$set(collapse = FALSE, warning=F, message=F)
 17 | library(dplyr);library(collaborator);library(knitr)
 18 |   
 19 | ```
 20 | 
 21 | 
 22 | # Collaborator: Generating Authorship Lists
 23 | 
 24 | Generating and formatting authorship lists for multi-centre research projects can be a challenging data wrangling task. In the case of collaborative research projects, there can be thousands of collaborators across hundreds of sites with a variety of [roles](https://doi.org/10.1016/j.ijsu.2017.12.019).
 25 | 
 26 | 
 27 | ## Author name extraction using ORCID
 28 | 
 29 | ORCID provides a persistent digital identifier (an ORCID iD) that each individual own and control, and that distinguishes them from every other researcher. This is free to register for and can be used to empower collaborators to specify how their name should appear in publications. When working with 1000s collaborators, this provides a simple route to ensure accuate display of names on an authorship list and can be simply extracted from the ORCID website using the ORCID. 
 30 | 
 31 | 
 32 | ### Validation of ORCID
 33 | 
 34 | ORCIDs follow a specific format of 16 characters in the format of "XXXX-XXXX-XXXX-XXXX" (16 characters in groups of 4 and separated by a dash). The extraction from the ORCID website will not work if not in this format. However, we can use the `orcid_valid()` function to investigate whether the ORCIDs on record are valid or not to use.
 35 | 
 36 | ```{r}
 37 | data <- tibble::tibble(n = c(1:7),
 38 |                orcid = c("0000-0001-6482-9086", "0000000250183066", "0000-0002-8738-4902",
 39 |                           "00O0-0002-8738-490X", "0000-0002-8738-490X", "0000-0002-8738-490", NA))
 40 | 
 41 | collaborator::orcid_valid(data, orcid = "orcid", reason = T) %>%
 42 |   knitr::kable()
 43 |   
 44 | ```
 45 | This will output the same dataframe with the "orcid_valid" column appended with a correctly formatted orcid (if it is valid to use). All non-valid orcids will be listed as "NA". If you want to investigate further, you can use the argument ("reason==T") in the function to get additional columns:
 46 | 
 47 |  - "orcid_check_present": A binary value if any value was provided in that row or not (e.g. when a "NA" value was supplied)
 48 | 
 49 |  - "orcid_check_length": A binary value if the ORCID supplied is 16 characters or not (e.g. "0000-0002-8738-490" where a character has been missed)
 50 |  
 51 |  - "orcid_check_format": A binary value if the ORCID supplied fits the correct format of either 16 numbers or 15 numbers with an X at the end (e.g. "00O0-0002-8738-490X" value supplied)
 52 | 
 53 |  - "orcid_check_sum": ORCID uses an internal "checksum" to make sure not just any random set of 16 characters can be entered. This is a binary value if the ORCID supplied either passes or fails this "checksum" (e.g. "0000-0002-8738-490X" is indistinguishable from a valid ORCID, except it fails the checksum, so it had to have been entered incorrectly)
 54 | 
 55 | If any of the values above are "No", then the ORCID is not valid and so cannot be used. The final column "orcid_valid_reason" summarises all the reasons why an ORCID is not valid so these can be addressed.
 56 | 
 57 | 
 58 | ### Extraction of names from ORCID
 59 | 
 60 | Now we know what ORCIDs are valid, lets extract the names of just these using `orcid_name()`. Names on ORCID are recorded in 2 ways:
 61 | 
 62 | 1. "Your given and family names" ("orcid_name_first" and "orcid_name_last").
 63 | 
 64 | 2. "Your published name" (orcid_name_credit"): This is the full name displayed on ORCID, however this is not automatically separated into first name / last name. 
 65 | 
 66 | Given this is recorded in 2 different ways, there can be discrepancies between the two methods (and why both are returned). It is recommended that "Your given and family names" is preferentially used since this avoids any confusion about first/middle vs last names for authorship lists (since the format required for authorship lists is often that given names are converted into initials).
 67 | 
 68 | 
 69 | ```{r}
 70 | data %>%
 71 |   collaborator::orcid_valid(data, orcid = "orcid", reason = F) %>%
 72 |   filter(is.na(orcid_valid)==F) %>%
 73 |   orcid_name(orcid = "orcid_valid", reason = F) %>%
 74 |   knitr::kable()
 75 |   
 76 | ```
 77 | 
 78 | ### Formatting of names
 79 | 
 80 | If you need to format the names of collaborators as initials, this can be simply done using `author_name()`. This will convert every name in the "first_name" column into initials, which can be placed before or after the last name. This is shown in the "author_name" column below.
 81 | 
 82 | ```{r}
 83 | data %>%
 84 |   collaborator::orcid_valid(data, orcid = "orcid", reason = F) %>%
 85 |   collaborator::orcid_name(orcid = "orcid_valid", reason = F) %>%
 86 |   collaborator::author_name(first_name = "orcid_name_first", last_name = "orcid_name_last",position = "left", initial_max=3) %>%
 87 |   dplyr::select(n:orcid_valid, orcid_name_first:orcid_name_last, author_name)%>%
 88 |   knitr::kable()
 89 | ```
 90 | 
 91 | 
 92 | 
 93 | 
 94 | ## Generating the formatted authorship list
 95 | 
 96 | Once you have your final list of authors, the `report_auth()` function aims to simplify the process of generating the fully formatted authorship list, with inbuilt flexibility in how these are presented.
 97 | 
 98 | ### Requirements
 99 | In order for the `report_auth()` function to operate as intended, we must first create a dataframe of all authors/collaborators containing at least 1 column: "name".
100 | 
101 | Example dataframe (`data_author`):
102 | ```{r, warning=FALSE, message=FALSE}
103 | 
104 | data_author <- collaborator::example_report_author
105 | knitr::kable(head(data_author, n=10)) # Please note all names have been randomly generated
106 | ```
107 | 
108 | ### Main Features
109 | #### (1) Basic Function
110 | At it's most basic, `report_auth()` can produce a formatted list of a column of names.
111 | ```{r, warning=FALSE, message=FALSE}
112 |   collaborator::report_auth(data_author) %>% # Please note all names have been randomly generated
113 |   knitr::kable(, col.names= "") 
114 | ```
115 | 
116 | #### (2) Grouping and subdivision of names
117 | 
118 | These names can be further grouped by another column in the dataframe:
119 | ```{r, warning=FALSE, message=FALSE}
120 | collaborator::report_auth(data_author, group = "hospital") %>% # Please note all names have been randomly generated
121 |   knitr::kable(col.names= "") 
122 | ```
123 | 
124 | Or can be subdivided by another column in the dataframe:
125 | ```{r, warning=FALSE, message=FALSE}
126 | 
127 | collaborator::report_auth(data_author, subdivision = "country") %>% # Please note all names have been randomly generated
128 |   knitr::kable(col.names= "")
129 | ```
130 | 
131 | Or groups can be further subdivided (for example by region/country, or by role)
132 | ```{r, warning=FALSE, message=FALSE}
133 | collaborator::report_auth(data_author,
134 |             group = "hospital",
135 |             subdivision = "country") %>% # Please note all names have been randomly generated
136 |   knitr::kable(col.names= "")
137 | ```
138 | 
139 | #### (3) Formatting
140 | Clear and consistent formatting of authorship lists allows the contributions and affiliations of each collaborator/author to be represented. Within `report_auth()`, names are usually separated by a comma (","), with groups separated by a semicolon (";"). Furthermore the name of groups are separated by round brackets ("()"). However, there is a degree of inbuilt flexibility to facilitate customisation.
141 | 
142 | Below if for demonstration of this concept (not intented to reflect how these should be formatted!)
143 | 
144 | 
145 | ```{r, warning=FALSE, message=FALSE}
146 | collaborator::report_auth(data_author, group="hospital", subdivision = "country",
147 |             name_sep = " +", group_brachet = "[]",group_sep = " --- ") %>% # Please note all names have been randomly generated
148 |   knitr::kable(col.names= "")
149 | ```
150 | 


--------------------------------------------------------------------------------
/vignettes/vignette_data.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Handling Data from REDCap"
  3 | author: "Kenneth McLean"
  4 | output:
  5 |   md_document: default
  6 |   html_document: default
  7 | always_allow_html: true
  8 | vignette: >
  9 |   %\VignetteIndexEntry{predict}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | 
 14 | ```{r setup, include = FALSE}
 15 | knitr::opts_chunk$set(collapse = TRUE)
 16 | library(collaborator); library(dplyr)
 17 | ```
 18 | 
 19 | ## REDCap Data for R
 20 | 
 21 | REDCap is a fantastic database, however the ability to export data is limited to the "raw" data (e.g. factors stored as numbers) or "labelled" data (e.g. factors stored as characters). While code is able to be obtained to convert data into the appropriate format, this the unwieldy and needs to be refershed if the underlying project design is changed.  
 22 | 
 23 | The `redcap_data()` function provides a simple way to export, clean, and format data ready for analysis in R. This utilities data available in the metadata of the project to ensure numeric data is numeric, factors are factors in the appropriate order, dates are dates objects, etc. It remains aligned to the data on REDCap despite any changes in the project design.
 24 | 
 25 | ```{r}
 26 | redcap <- collaborator::redcap_data(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
 27 |                                     redcap_project_token = Sys.getenv("collaborator_test_token"),
 28 |                                     include_original = T, include_complete = T)
 29 | ```
 30 | 
 31 | There are 3 potential outputs from this function:
 32 | 
 33 | 1. `data`: The cleaned and formatted REDCap dataset:
 34 | 
 35 | ```{r}
 36 | knitr::kable(redcap$data %>% head(5))
 37 | ```
 38 | 
 39 | 2. `metadata`: The metadata used to create `data`.
 40 | 
 41 | ```{r}
 42 | knitr::kable(redcap$metadata %>% head(5))
 43 | ```
 44 | 3. `original`: An optional output showing the raw dataset extracted from REDCap (included if `include_original = T`):
 45 | 
 46 | ```{r}
 47 | knitr::kable(redcap$original %>% head(5))
 48 | ```
 49 | 
 50 | ## Handling Repeating Instruments
 51 | 
 52 | You may note above that the structure of `redcap$data` and `redcap$original` are different, and that there are multiple rows with the record ID 1. This is because the project includes repeating instruments (forms that can be completed repeatedly to facilitate longitudinal data collection) - this add complexity to how the data is structured / has to be handled.
 53 | 
 54 | The default for both the original and formatted data is to provide the data in a "long" format - one row per repeating instrument:
 55 | 
 56 | - Data which is not part of a repeating instrument is copied across all repeating rows.
 57 | 
 58 | - Data which is part of a repeating instrument is shown to the right of the "redcap_repeat_instance" column (it indicates which instrument each row belongs to).
 59 | 
 60 | ```{r}
 61 | knitr::kable(redcap$data %>% dplyr::select(record_id,pt_age:pt_sex, redcap_repeat_instance:last_col())%>% head(5))
 62 | ```
 63 | 
 64 | However if you require 1 record per row for analysis (the majority of cases), but wish to keep data from ALL repeating instruments you can easily change the data structure by applying `redcap_format_repeat()` to `redcap_data()$data` or specify in `redcap_data()`upfront using the `format` argument. This has 2 options instead of "long" format:
 65 | 
 66 | 1. `wide`: The repeating instruments are all transposed and numbered accordingly. The consistent naming scheme allows re-conversion to a long format using `tidyr::pivot_longer()`.
 67 | 
 68 | ```{r}
 69 | redcap$data %>%
 70 |   redcap_format_repeat(format = "wide") %>%
 71 |   dplyr::select(record_id, contains("instance")) %>%
 72 |   knitr::kable()
 73 | ```
 74 | 
 75 | 2. `list`: The repeating instruments are all stored as a nested list for each record (more efficent storage of data). This can be unnested at a later point using `tidyr::unnest()`.
 76 | 
 77 | ```{r}
 78 | redcap$data %>%
 79 |   redcap_format_repeat(format = "list") %>%
 80 |   dplyr::select(record_id, redcap_repeat_instance:last_col()) %>%
 81 |   knitr::kable()
 82 | ```
 83 | 
 84 | ## Generating a Simple, Easily-Shareable Data Dictionary
 85 | 
 86 | The function `data_dict()` can be used to generate an easily sharable and informative data dictionary for an R dataframe. Unlike the `str()` function typically used to display the internal structure of dataframes in R, this produces a dataframe alongside summarising information relevant to the class of variable, and the proportion of missing data (NA) within each variable.
 87 | 
 88 | This can be useful in quickly understanding how data is structured within the dataset, and in assessing data quality (e.g. outlying and incorrect or quantity of missing values). This can be easily exported from R and shared as a spreadsheet.
 89 | 
 90 | ### Requirements
 91 | The `data_dict()` function can be applied to any dataframe object. At present, it supports the following classes (other classes will be shown as "Class not supported" in the values column):
 92 | 
 93 | * Numeric, integer.
 94 | * Logical.
 95 | * Date.
 96 | * Character, String.
 97 | * Factor, orderedfactor.
 98 | 
 99 | ### Output
100 | 
101 | The `data_dict()` function produces a dataframe which identifies the class, summarised values, and proportion of missing data for each variable in the original dataframe.
102 | 
103 | The output can be easily converted to a spreadsheet file (e.g. csv file) and exported for sharing. Let's use the data extracted above.
104 | 
105 | ```{r, warning=FALSE, message=FALSE}
106 | data <- redcap$data %>% redcap_format_repeat(format = "wide")
107 | 
108 | data_dict(data) %>%
109 |   knitr::kable()
110 | 
111 | ```
112 | 
113 | Through summarising the variables, data will not necessarily be linkable to individual patients (bar in the circumstance where variable(s) contain a direct patient identifier e.g. Community Health Index (CHI) Number, hospital numbers, etc).
114 | 
115 | However, should any variable(s) (such as a direct patient identifier) be desirable to exclude from the output, this can be achieved using the "var_exclude" parameter. 
116 | 
117 | ```{r, warning=FALSE, message=FALSE}
118 | knitr::kable(collaborator::data_dict(data, var_exclude = c("id_num","sex")))
119 | ```
120 | 


--------------------------------------------------------------------------------
/vignettes/vignette_email.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Group-specific emails"
  3 | author: "Kenneth McLean"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   md_document:
  7 |     variant: gfm
  8 | vignette: >
  9 |   %\VignetteIndexEntry{Vignette Title}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | 
 14 | ```{r, include = FALSE}
 15 | knitr::opts_chunk$set(collapse = FALSE)
 16 | library(collaborator); library(dplyr)
 17 | 
 18 | ```
 19 | 
 20 | # CollaboratoR: Group-specific emails (mailmerge in R)
 21 | 
 22 | In large-scale multicentre reseach, communication with data collectors in a meaningful way can be challenging. Often, group-specific emails (with group-specific attachments) can be desired (for example reports of missing data for a particular site). Yet there is a limited number of non-proprietary softwares that allow this to be automated at scale, and often this is required to be done on a manual basis. 
 23 | 
 24 | CollaboratoR has several functions that have been designed to work together to faciliate the process of sending group-specific emails (including attachments). This has been developed with interoperability with REDCap in mind, but the "email_" functions do not require REDCap to work.
 25 | 
 26 | **Why would you choose to do this over mailmerge or other equivalent software?**
 27 | 
 28 |  - The main advantage of this method is the capability to attach group-specific attachments and to reproducibility email regular updates.
 29 | 
 30 | &nbsp;
 31 | &nbsp;
 32 | 
 33 | # 1. Build email dataset
 34 | 
 35 | The first step is to define the groups of people that will be emailed. 
 36 | 
 37 | #### a). REDCap user export 
 38 | 
 39 | For projects on REDCap, all users with access rights to each data access group (DAG) can be accessed via the API.
 40 | 
 41 | This can be done via the `user_role()` CollaboratoR function (see [Redcap User Management: 1. Explore Current Users](https://github.com/kamclean/collaborator/blob/master/vignettes/vignette_user_1_explore.Rmd) for more details), alongside several other functions from other packages ( [redcapAPI](https://cran.r-project.org/web/packages/REDCapR/index.html) / [REDCapAPI](https://cran.r-project.org/web/packages/redcapAPI/index.html) / [RCurl](https://cran.r-project.org/web/packages/RCurl/index.html) ).
 42 | 
 43 | ```{r, warning=FALSE, message=FALSE}
 44 | df_user_all <- collaborator::user_role(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
 45 |                                        redcap_project_token = Sys.getenv("collaborator_test_token"))$full
 46 | 
 47 | knitr::kable(head(df_user_all, n=10)) # Please note all names / emails have been randomly generated
 48 | 
 49 | ```
 50 | 
 51 | However, these do not produce the correct data format as the data (email) must be summarised by group (data_access_group). This can be done directly via the `user_summarise()` function. This produces data in the exact format required by the subsequent "email_" functions (alongside some additional summarised data which may be of interest). This is the recommended option for handing REDCap user data for this purpose. 
 52 | 
 53 | ```{r, warning=FALSE, message=FALSE}
 54 | 
 55 | df_user <- collaborator::user_summarise(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
 56 |                                         redcap_project_token = Sys.getenv("collaborator_test_token"),
 57 |                                         user_exclude = "y_o’doherty")
 58 | 
 59 | knitr::kable(df_user) # Please note all names have been randomly generated
 60 | 
 61 | ```
 62 | 
 63 | &nbsp;
 64 | 
 65 | #### b). Other sources
 66 | 
 67 | While these email functions were developed for the intent of integration with REDCap, the subsequent "email_" functions are build to not require REDCap to work. However, the same minimum input format is required:
 68 | 
 69 |  - One unique group per row (listed within a single column).
 70 |  
 71 |  - A string of group-specific email addresses, separated by a semicolon (listed within a single column).
 72 |  
 73 | For example (using the data above to illustrate):
 74 | ```{r, warning=FALSE, message=FALSE, echo = T}
 75 | df_user_other <- df_user %>%
 76 |   dplyr::select("group" = data_access_group, "group_specific_emails_recipients" = user_email)
 77 | 
 78 |   knitr::kable(df_user_other)
 79 | 
 80 | ```
 81 | 
 82 | There may be any number of additional columns present that can be used to "mail-merge" within the email subject or body.
 83 | 
 84 | &nbsp;
 85 | &nbsp;
 86 | 
 87 | # 2. Build group-specific emails
 88 | 
 89 | At this stage, we have a dataframe of the grouped recipents of the emails. Now we can begin to build the group-specific components of the emails.
 90 | 
 91 | #### a). Generate email fields
 92 | The `email_field()` function wrangles the dataframe of grouped recipents (e.g. `df_user` above) into the format required by `email_send()`. There are two aspects of email fields that can be customised:
 93 | 
 94 |  * Recipients: We define who will recieve the email. Different columns of emails can be specified as the main recipents (`recipient_main`), cc'd (`recipient_cc`), or bcc'd (`recipient_bcc`). For example, you may want to specify the primary investigator for a site as the `recipient_main` and others involved at that site as `recipient_cc`.
 95 | 
 96 |  * Subject: We define what the name of the email will be. This can be the same for all emails, or the subject can be made *group-specific* by incorporting column names within the string. These names must be within square brackets e.g. `"[colname]"`.
 97 | 
 98 | ```{r, warning=FALSE, message=FALSE}
 99 | 	
100 | df_email <- collaborator::email_field(df_email = df_user,
101 |                                       group = "data_access_group",
102 |                                       recipient_main = NULL,
103 |                                       recipient_cc = NULL,
104 |                                       recipient_bcc = "user_email", # we want all the recipients to be "BCC". 
105 |                                       subject = "Hello to [user_n] collaborators at [data_access_group]")
106 | 
107 | knitr::kable(df_email)
108 | 
109 | ```
110 | 
111 | &nbsp;
112 | 
113 | #### b). Add email body 
114 | 
115 | The `email_body()` function will perform a mailmerge using a specified rmarkdown file (e.g. "vignette_email_body.Rmd") and the output from `email_field()`. This will form the email body to be sent via `email_send()`.
116 | 
117 | - The rmarkdown file must have the YAML specified as "output: html_document" and fields to be mailmerged must be specified as "x$colname" (see "vignettes/vignette_email_body.Rmd"). The text format / spacing/ etc can be edited as usual for html rmarkdown documents.
118 | 
119 | - The output from `email_body()` is the original dataframe with up to 2 columns appended (based on the `html_output` parameter):
120 | 
121 |   - "file" - The path for group-specific html file produced (can be viewed to verify the rendered Rmd document displays as desired). These will only be saved if `html_output` includes "file". *Note: All files will be placed in a subfolder specified by `subfolder` (default = "folder_html" within current working directory), and will be named according to the group name (this can be customised using `file_prefix` and `file_suffix`)*.
122 |  
123 |   - "code" - The html code for the mailmerged html document produced. *Note: at present, GmailR does not allow allow html files to be directly attached as the email body. Therefore, html_output should always include "code" and this should be used as the email body in `email_send()`*.
124 | 
125 | ```{r, warning=FALSE, message=FALSE}
126 | df_email <- collaborator::email_body(df_email, 
127 |                                      html_output = "code", 
128 |                                      rmd_file = here::here("vignettes/vignette_email_body.Rmd"))
129 | 
130 | tibble::as_tibble(df_email)
131 | 
132 | ```
133 | 
134 | &nbsp;
135 | 
136 | #### c). Add email attachments 
137 | 
138 | The `group2csv()` function will split a tibble/dataframe by "group" variable, then save grouped data in a subfolder as individual CSV files. This can be used as a group-specific attachment to be sent via `email_send()`.
139 | 
140 | - The groups in the tibble/dataframe supplied (`data`) must **exactly** match the groups within the `email_field()` function output.
141 | 
142 | - The output from `email_body()` is a tibble with the "group":
143 | 
144 |   - "file" - The path for group-specific CSV files produced (not any data not in a group will be discarded). *Note: All files will be placed in a subfolder specified by `subfolder` (default = "folder_csv" within current working directory), and will be named according to the group name (this can be customised using `file_prefix` and `file_suffix`)*.
145 | 
146 | ```{r, warning=FALSE, message=FALSE}
147 | 
148 | # Generate patient-level / anonomysed missing data report
149 | report <- collaborator::report_miss(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
150 |                                     redcap_project_token = Sys.getenv("collaborator_test_token"))$record
151 | 
152 | 
153 | attach <- collaborator::group2csv(data = report,
154 |                                   group = "redcap_data_access_group",
155 |                                   subfolder = here::here("vignettes/folder_csv"), file_prefix = "missing_data_")
156 | 
157 | knitr::kable(attach)
158 | ```
159 | 
160 | The output from `group2csv()` and any other attachments must be appended to the output from `email_field()` as additional columns.
161 | 
162 |  - Files that are intended to be sent to all groups can be appended here in addition (any file type).
163 | 
164 | ```{r, warning=FALSE, message=FALSE}
165 | df_email <- df_email %>%
166 |   dplyr::left_join(attach, group=c("redcap_data_access_group" = "group")) %>%
167 |   
168 |   dplyr::mutate(file2 = here::here("man/figures/collaborator_logo.png"))
169 | ```
170 | 
171 | Note: Only `group2csv()` will be supported as a function for creating group-specific files en mass (e.g. rather than pdf, word, etc) as these are too heterogeneous and specific in their purpose. However, once created via rmarkdown, their pathways can be joined to the correct group and will be attached if included in the `email_send()` function.
172 | 
173 | &nbsp;
174 | &nbsp;
175 | 
176 | 
177 | # 3. Send group-specific emails
178 | 
179 | We now have created our four components to the group-specific email:
180 | 
181 |  * **Group** (Essential): Column created via `email_field()` ("group").
182 | 
183 |  * **Email fields** (Essential): Columns created via `email_field()` ("recipient_main", "recipient_cc", "recipient_bcc", "subject") .
184 |  
185 |  * **Email body** (Essential): Column created via `email_body()` ("code"). 
186 |  
187 |  * **Attachments** (Optional): Column created via `group2csv()` (e.g. "file") or added manually (e.g. "file2").
188 | 
189 | &nbsp;
190 | 
191 | #### a). Gmail connection set-up 
192 | The `email_send()` function is currently built for use with gmail via the [gmailr package](https://github.com/r-lib/gmailr). To function, a connection to the Gmail API must be established in advance (find step by step process [here](https://github.com/jennybc/send-email-with-r)).
193 | 
194 |  - *Note: Rstudio hosted on virtual machines have difficulty in connecting to gmail. For sending emails, an Rstudio installed on a physical computer is needed at present*.
195 | 
196 | ```{r, warning=FALSE, message=FALSE, eval=FALSE, echo = TRUE, error=F}
197 | gmailr::gm_auth_configure(path = "gmail.json")
198 | ```
199 | 
200 | &nbsp;
201 | 
202 | #### b). Sending
203 | The `email_send()` will allow automated sending of the prepared group-specific emails and their attachments. The following parameters can be specified:
204 | 
205 |  - `sender`: The email account from which the emails will be sent (must be a gmail account and match the )
206 |  
207 |  - `body`: The column containing the html code produced by the `email_body()` function for each group.
208 |  
209 |  - `attach`: An (optional) list of columns containing the paths of all files to be attached (including those which are group-specific). If zip = TRUE (default = FALSE), then these files will be compressed into a zip folder. 
210 |  
211 |  - `draft`: To prevent premature sending of emails, the default setting is to send emails to the gmail draft folder (draft = FALSE). When ready to send emails automatically this should be changed to draft = TRUE. *Note: gmail allows a maximum of 500 emails to be sent per day - if you plan to exceed this number then split the dataset and send on different days.* 
212 | 
213 | ```{r, warning=FALSE, message=FALSE, eval=FALSE, echo = TRUE}
214 | 
215 | collaborator::email_send(df_email = df_email,
216 |                          sender = "email@gmail.com", 
217 |                          email_body = "code", 
218 |                          attach = c("file", "file2"), zip = T,
219 |                          draft = FALSE)
220 | ```
221 | 
222 | The `email_send()` function will print the number + group of emails as they are sent. This facilitates troubleshooting in the event of errors.
223 | 
224 | &nbsp;
225 | &nbsp;
226 | 


--------------------------------------------------------------------------------
/vignettes/vignette_email_body.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "&nbsp;" # To avoid "[WARNING] This document format requires a nonempty <title> element. Please specify either 'title' or 'pagetitle' in the metadata.".
 3 | 
 4 | output: html_document
 5 | ---
 6 | 
 7 | ```{r setup, echo = FALSE, include = FALSE, message=FALSE}
 8 | library(dplyr);library(readr);library(knitr)
 9 | ```
10 | 
11 | #### **Dear `r x$user_fullnames`**,
12 | 
13 | Thank you for your work at `r x$group`. 
14 | 
15 | You will find *2 attatchments* ...
16 | 
17 | Many thanks,
18 | Kenneth
19 | 


--------------------------------------------------------------------------------
/vignettes/vignette_missing.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Generating Missing Data Reports"
 3 | author: "Kenneth McLean"
 4 | date: "`r Sys.Date()`"
 5 | output:
 6 |   md_document: default
 7 |   html_document: default
 8 | always_allow_html: true
 9 | vignette: >
10 |   %\VignetteIndexEntry{predict}
11 |   %\VignetteEngine{knitr::rmarkdown}
12 |   %\VignetteEncoding{UTF-8}
13 | ---
14 | 
15 | ```{r setup, include = FALSE}
16 | knitr::opts_chunk$set(collapse = FALSE)
17 | library(collaborator);library(dplyr)
18 | ```
19 | # Collaborator: Generating Missing Data Reports
20 | 
21 | Ensuring high levels of completeness within research projects is an important task for ensuring the highest quality dataset for subsequent analyses. However, determining what data is missing within a REDCap project, particularly accounting for appropriately missing data (such as in the case of unfulfilled branching logic) can be a challenging and time-consuming task to produce in real-time. 
22 | 
23 | The `report_miss()` function is designed to easily produce a high quality and informative report of missing data at a data_access_group and individual record level. This report highlights all missing data within a REDCap project (delineating between appropriately missing and true missing data), while removing all other data (so this can be shared in line with duties of data protection).
24 | 
25 | ## Requirements:
26 | The `report_miss()` function is designed to be simple from the point of use - the only requirements are a valid URI (Uniform Resource Identifier) and API (Application Programming Interface) for the REDCap project.
27 | 
28 | There is a high degree of customisability, with the following able to be specified to focus on a subset of the dataset:
29 | 
30 |  - Variables (columns): Modified using the "var_include" and "var_exclude" parameters. 
31 | 
32 |  - Records / DAGs (rows): Modified using the "record_include"/"dag_include" and "record_exclude"/"dag_exclude".
33 | 
34 | Limitations:
35 | 
36 | - This function has not yet been tested on REDCap projects with multiple events.
37 | 
38 | ## Output:
39 | 
40 | ### Record level report
41 | Example of a record level report of missing data. This not only quantifies the missing data within the record, but also highlights it's location within the dataset.
42 | 
43 | **1. Record level summary**
44 | 
45 |  - `miss_n` is the number of missing data fields ("M").
46 |  
47 |  - `fields_n` is the number of all data fields (excluding appropriately missing data).
48 |  
49 |  - `miss_prop` / `miss_pct` are respective proportions and percentages of data that are missing for each record.
50 |  
51 |  - `miss_threshold` is a yes/no variable indicating if the variable has **over** the specified missing data threshold (default = 5%).
52 |  
53 | **2. Missing data locations (column 8 onwards)**
54 | 
55 |   - "NA" fields represent appropriately missing data (e.g. secondary to unfulfilled branching logic). Therefore, these are excluded from the missing data count entirely.
56 | 
57 |   - "M" fields represent 'true' missing data (which may require follow up), and so all counts of missing data are based these.
58 | 
59 | ```{r, warning=FALSE, message=FALSE}
60 | collaborator::report_miss(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
61 |                           redcap_project_token = Sys.getenv("collaborator_test_token"))$record %>%
62 |   View()
63 |   head(15) %>% # first 15 records
64 |   knitr::kable()
65 | 
66 | 
67 | ```
68 | 
69 | 
70 | ### Data access group level report
71 | Example of a data access group (DAG) level report of missing data (summarising missing data for all records within the DAG).
72 | 
73 |  - `n_pt` is the number of patients within the data_access_group.
74 |  - `n_threshold` is the number of patients **over** the specified missing data threshold (default = 5%).
75 |  - `cen_miss_n` is the number of missing data fields ("M") within the data_access_group.
76 |  - `fields_n` is the number of all data fields within the data_access_group (excluding appropriately missing data).
77 |  - `cen_miss_prop` / `cen_miss_pct` are respective proportions and percentages of data that are missing for each data_access_group.
78 |  
79 | ```{r, warning=FALSE, message=FALSE}
80 | report_miss(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
81 |             redcap_project_token = Sys.getenv("collaborator_test_token"), missing_threshold = 0.2)$group %>%
82 |   knitr::kable()
83 | ```
84 | 


--------------------------------------------------------------------------------
/vignettes/vignette_missing.md:
--------------------------------------------------------------------------------
  1 | Collaborator: Generating Missing Data Reports
  2 | =============================================
  3 | 
  4 | Ensuring high levels of completeness within research projects is an
  5 | important task for ensuring the highest quality dataset for subsequent
  6 | analyses. However, determining what data is missing within a REDCap
  7 | project, particularly accounting for appropriately missing data (such as
  8 | in the case of unfulfilled branching logic) can be a challenging and
  9 | time-consuming task to produce in real-time.
 10 | 
 11 | The `report_miss()` function is designed to easily produce a high
 12 | quality and informative report of missing data at a data\_access\_group
 13 | and individual record level. This report highlights all missing data
 14 | within a REDCap project (delineating between appropriately missing and
 15 | true missing data), while removing all other data (so this can be shared
 16 | in line with duties of data protection).
 17 | 
 18 | Requirements:
 19 | -------------
 20 | 
 21 | The `report_miss()` function is designed to be simple from the point of
 22 | use - the only requirements are a valid URI (Uniform Resource
 23 | Identifier) and API (Application Programming Interface) for the REDCap
 24 | project.
 25 | 
 26 | There is a high degree of customisability, with the following able to be
 27 | specified to focus on a subset of the dataset:
 28 | 
 29 | -   Variables (columns): Modified using the “var\_include” and
 30 |     “var\_exclude” parameters.
 31 | 
 32 | -   Records / DAGs (rows): Modified using the
 33 |     “record\_include”/“dag\_include” and
 34 |     “record\_exclude”/“dag\_exclude”.
 35 | 
 36 | Limitations:
 37 | 
 38 | -   This function has not yet been tested on REDCap projects with
 39 |     multiple events.
 40 | 
 41 | Output:
 42 | -------
 43 | 
 44 | ### Record level report
 45 | 
 46 | Example of a record level report of missing data. This not only
 47 | quantifies the missing data within the record, but also highlights it’s
 48 | location within the dataset.
 49 | 
 50 | **1. Record level summary**
 51 | 
 52 | -   `miss_n` is the number of missing data fields (“M”).
 53 | 
 54 | -   `fields_n` is the number of all data fields (excluding appropriately
 55 |     missing data).
 56 | 
 57 | -   `miss_prop` / `miss_pct` are respective proportions and percentages
 58 |     of data that are missing for each record.
 59 | 
 60 | -   `miss_threshold` is a yes/no variable indicating if the variable has
 61 |     **over** the specified missing data threshold (default = 5%).
 62 | 
 63 | **2. Missing data locations (column 8 onwards)**
 64 | 
 65 | -   “NA” fields represent appropriately missing data (e.g. secondary to
 66 |     unfulfilled branching logic). Therefore, these are excluded from the
 67 |     missing data count entirely.
 68 | 
 69 | -   “M” fields represent ‘true’ missing data (which may require follow
 70 |     up), and so all counts of missing data are based these.
 71 | 
 72 | ``` r
 73 | collaborator::report_miss(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
 74 |                           redcap_project_token = Sys.getenv("collaborator_test_token"))$record %>%
 75 |   head(15) %>% # first 15 records
 76 |   knitr::kable()
 77 | ```
 78 | 
 79 | | record\_id | redcap\_data\_access\_group |  miss\_n|  fields\_n|  miss\_prop| miss\_pct | miss\_threshold | dmy\_hms | enrol\_tf | enrol\_signature | pt\_age | pt\_sex | smoking\_status | body\_mass\_index | pmh\_\_\_1 | pmh\_\_\_2 | pmh\_\_\_3 | asa\_grade | pt\_ethnicity | pt\_ethnicity\_other | adm\_date | adm\_vas | op\_date | time2op | op\_urgency | op\_procedure\_code | follow\_up | follow\_up\_readm | follow\_up\_mort | file |
 80 | |:-----------|:----------------------------|--------:|----------:|-----------:|:----------|:----------------|:---------|:----------|:-----------------|:--------|:--------|:----------------|:------------------|:-----------|:-----------|:-----------|:-----------|:--------------|:---------------------|:----------|:---------|:---------|:--------|:------------|:--------------------|:-----------|:------------------|:-----------------|:-----|
 81 | | 1          | hospital\_a                 |        4|         21|   0.1904762| 19%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 82 | | 2          | hospital\_a                 |        6|         21|   0.2857143| 29%       | Yes             | M        | M         | NA               | .       | .       | M               | M                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 83 | | 3          | hospital\_a                 |        5|         21|   0.2380952| 24%       | Yes             | M        | M         | NA               | .       | .       | .               | M                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 84 | | 4          | hospital\_a                 |        4|         21|   0.1904762| 19%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 85 | | 5          | hospital\_a                 |        4|         19|   0.2105263| 21%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | NA                | NA               | .    |
 86 | | 6          | hospital\_a                 |        4|         19|   0.2105263| 21%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | NA                | NA               | .    |
 87 | | 7          | hospital\_a                 |        4|         21|   0.1904762| 19%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 88 | | 8          | hospital\_a                 |        4|         21|   0.1904762| 19%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 89 | | 9          | hospital\_a                 |        4|         19|   0.2105263| 21%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | NA                | NA               | .    |
 90 | | 10         | hospital\_a                 |        4|         21|   0.1904762| 19%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 91 | | 11         | hospital\_b                 |        4|         21|   0.1904762| 19%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 92 | | 12         | hospital\_b                 |        6|         19|   0.3157895| 32%       | Yes             | M        | M         | NA               | .       | M       | .               | M                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | NA                | NA               | .    |
 93 | | 13         | hospital\_b                 |        4|         19|   0.2105263| 21%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | NA                | NA               | .    |
 94 | | 14         | hospital\_b                 |        4|         21|   0.1904762| 19%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | .          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | .                 | .                | .    |
 95 | | 15         | hospital\_b                 |        5|         19|   0.2631579| 26%       | Yes             | M        | M         | NA               | .       | .       | .               | .                 | .          | .          | .          | M          | .             | NA                   | .         | M        | .        | M       | .           | .                   | .          | NA                | NA               | .    |
 96 | 
 97 | ### Data access group level report
 98 | 
 99 | Example of a data access group (DAG) level report of missing data
100 | (summarising missing data for all records within the DAG).
101 | 
102 | -   `n_pt` is the number of patients within the data\_access\_group.
103 | -   `n_threshold` is the number of patients **over** the specified
104 |     missing data threshold (default = 5%).
105 | -   `cen_miss_n` is the number of missing data fields (“M”) within the
106 |     data\_access\_group.
107 | -   `fields_n` is the number of all data fields within the
108 |     data\_access\_group (excluding appropriately missing data).
109 | -   `cen_miss_prop` / `cen_miss_pct` are respective proportions and
110 |     percentages of data that are missing for each data\_access\_group.
111 | 
112 | ``` r
113 | report_miss(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
114 |             redcap_project_token = Sys.getenv("collaborator_test_token"), missing_threshold = 0.2)$group %>%
115 |   knitr::kable()
116 | ```
117 | 
118 | | redcap\_data\_access\_group |  n\_pt|  n\_threshold|  cen\_miss\_n|  cen\_field\_n|  cen\_miss\_prop| cen\_miss\_pct |
119 | |:----------------------------|------:|-------------:|-------------:|--------------:|----------------:|:---------------|
120 | | hospital\_a                 |     10|             5|            43|            204|        0.2107843| 21.0784%       |
121 | | hospital\_b                 |      6|             3|            27|            120|        0.2250000| 22.5000%       |
122 | | hospital\_c                 |      2|             0|             8|             42|        0.1904762| 19.0476%       |
123 | | hospital\_d                 |      4|             3|            19|             84|        0.2261905| 22.6190%       |
124 | | hospital\_e                 |      9|             3|            39|            185|        0.2108108| 21.0811%       |
125 | | hospital\_f                 |      6|             3|            29|            126|        0.2301587| 23.0159%       |
126 | | hospital\_g                 |      7|             3|            34|            144|        0.2361111| 23.6111%       |
127 | | hospital\_h                 |      6|             4|            30|            125|        0.2400000| 24.0000%       |
128 | 


--------------------------------------------------------------------------------
/vignettes/vignette_summary.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Generating REDCap summary data"
 3 | author:
 4 |  - "Kenneth McLean"
 5 |  - "Riinu Ots"
 6 | date: "`r Sys.Date()`"
 7 | output:
 8 |   md_document: default
 9 |   html_document: default
10 | always_allow_html: true
11 | vignette: >
12 |   %\VignetteIndexEntry{predict}
13 |   %\VignetteEngine{knitr::rmarkdown}
14 |   %\VignetteEncoding{UTF-8}
15 | ---
16 | 
17 | ```{r setup, include = FALSE}
18 | knitr::opts_chunk$set(collapse = FALSE)
19 | 
20 | library(collaborator);library(dplyr)
21 | 
22 | ```
23 | # Collaborator: REDCap summary data
24 | 


--------------------------------------------------------------------------------
/vignettes/vignette_summary.md:
--------------------------------------------------------------------------------
  1 | # Collaborator: REDCap summary data
  2 | 
  3 | Evaluating data uploaded to a REDCap project in the context of
  4 | multi-centre research projects is an important task for ensuring the
  5 | highest quality dataset for subsequent analyses, and for sharing
  6 | progress (whether internally or externally via social media). However,
  7 | summarising this data, particularly at a DAG-level, can be a challenging
  8 | and time-consuming task to produce on a regular basis.
  9 | 
 10 | The `redcap_sum()` function is designed to easily produce high quality
 11 | and informative summary data on a REDCap project (overall and at a
 12 | data\_access\_group level). This can be used for the purposes of sharing
 13 | progress (including identifying top performing DAGs), and identifying
 14 | individual DAGs which have not yet uploaded data or have not completed
 15 | data upload.
 16 | 
 17 | ## Requirements
 18 | 
 19 | The `redcap_sum()` function is designed to be simple from the point of
 20 | use - the only requirements are a valid URI (Uniform Resource
 21 | Identifier) and API (Application Programming Interface) for the REDCap
 22 | project.
 23 | 
 24 | However, this is intended to have a high degree of customisability to
 25 | fit the needs of a variety of projects. For example, being able to
 26 | easily:
 27 | 
 28 |   - Select variables (using `var_include` or `var_exclude`) to be
 29 |     included in the assessment of completeness. For example, to focus on
 30 |     only essential variables to determine level of completeness.
 31 | 
 32 |   - Select individual records (using `record_exclude` or
 33 |     `record_include`) or whole data access groups (using `dag_exclude`
 34 |     or `dag_include`) to be assessed. For example to remove records or
 35 |     DAGs that were found to be ineligible.
 36 | 
 37 |   - Select individual users (using `user_exclude` or `user_include`) or
 38 |     whole data access groups (using `dag_exclude` or `dag_include`) to
 39 |     be assessed. For example to remove users (e.g. administrator user
 40 |     accounts) or DAGs that were found to be ineligible from the total
 41 |     REDCap user count.
 42 | 
 43 |   - Generation of summary data by DAG unless `centre_sum` is specified
 44 |     as FALSE (default `centre_sum=T`)
 45 | 
 46 | Limitations:
 47 | 
 48 |   - This function has not yet been tested on REDCap projects with
 49 |     multiple events.
 50 | 
 51 | ## Main Features
 52 | 
 53 | ### (1) Basic Function
 54 | 
 55 | At it’s most basic, `redcap_sum()` can produce an overall summary of
 56 | current data on the REDCap project: - `n_record_all` is the number of
 57 | all records currently on the REDCap project (minus any records removed
 58 | using `record_exclude` or in DAGs removed using `dag_exclude`).
 59 | 
 60 |   - `n_record_com` is the number of all complete records currently on
 61 |     the REDCap project (minus any records removed using `record_exclude`
 62 |     or in DAGs removed using `dag_exclude`), with no missing data across
 63 |     the record (unless certain data fields are either excluded
 64 |     (`var_exclude`) or specified (`var_complete`)).
 65 | 
 66 |   - `prop_com` /`pct_com` is the respective proportion and percentage of
 67 |     complete records in the project.
 68 | 
 69 |   - `n_dag` is the number of data access groups (DAGs) for all records
 70 |     currently on the REDCap project (minus any records removed using
 71 |     `record_exclude` or in DAGs removed using `dag_exclude`).
 72 | 
 73 |   - `n_users` is the number of users on the REDCap project (minus any
 74 |     users in DAGs removed using `dag_exclude`). Note all users not
 75 |     assigned to a DAG will automatically be excluded.
 76 | 
 77 |   - `last_update` is the date which the summary data was generated
 78 | on.
 79 | 
 80 | <!-- end list -->
 81 | 
 82 | ``` r
 83 | collaborator::redcap_sum(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
 84 |                          redcap_project_token = Sys.getenv("collaborator_test_token"),
 85 |                          centre_sum = F)  %>%
 86 |   knitr::kable()
 87 | ```
 88 | 
 89 | | n\_record\_all | n\_record\_com | prop\_com | pct\_com | n\_dag | n\_users | last\_update |
 90 | | -------------: | -------------: | --------: | :------- | -----: | -------: | :----------- |
 91 | |             50 |              0 |         0 | 0%       |      8 |       30 | 21-Apr-2020  |
 92 | 
 93 | ### (2) Centre summary data
 94 | 
 95 | However, more granular summary data on each DAG can also be obtained
 96 | using the same function. This centre summary data will automatically be
 97 | included within the output from `redcap_sum()` unless `centre_sum` is
 98 | specified as FALSE.
 99 | 
100 | #### 1\. `$dag_all` Output
101 | 
102 | This will produce a dataframe of the same summary data as outlined above
103 | **grouped by each DAG instead** (minus any DAGs removed using
104 | `dag_exclude`).
105 | 
106 | ``` r
107 | output <- collaborator::redcap_sum(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
108 |                                    redcap_project_token = Sys.getenv("collaborator_test_token"),
109 |                                    centre_sum = T)
110 | 
111 |   knitr::kable(output$dag_all)
112 | ```
113 | 
114 | | redcap\_data\_access\_group | record\_all | record\_com | prop\_com | pct\_com | user\_all | last\_update |
115 | | :-------------------------- | ----------: | ----------: | --------: | :------- | --------: | :----------- |
116 | | hospital\_a                 |          10 |           0 |         0 | 0%       |         3 | 21-Apr-2020  |
117 | | hospital\_e                 |           9 |           0 |         0 | 0%       |         4 | 21-Apr-2020  |
118 | | hospital\_g                 |           7 |           0 |         0 | 0%       |         2 | 21-Apr-2020  |
119 | | hospital\_b                 |           6 |           0 |         0 | 0%       |         3 | 21-Apr-2020  |
120 | | hospital\_f                 |           6 |           0 |         0 | 0%       |         2 | 21-Apr-2020  |
121 | | hospital\_h                 |           6 |           0 |         0 | 0%       |         4 | 21-Apr-2020  |
122 | | hospital\_d                 |           4 |           0 |         0 | 0%       |         3 | 21-Apr-2020  |
123 | | hospital\_c                 |           2 |           0 |         0 | 0%       |         1 | 21-Apr-2020  |
124 | | hospital\_i                 |           0 |          NA |        NA | NA       |         4 | 21-Apr-2020  |
125 | | hospital\_j                 |           0 |          NA |        NA | NA       |         4 | 21-Apr-2020  |
126 | 
127 | #### 2\. `$dag_nodata` Output
128 | 
129 | This will produce a dataframe of all DAG with users assigned on the
130 | REDCap project, but no data uploaded to REDCap. This may be useful for
131 | the purposes of targeting encouragement to upload data, or establishing
132 | authorship on any research
133 | output.
134 | 
135 | ``` r
136 | knitr::kable(output$dag_nodata)
137 | ```
138 | 
139 | | redcap\_data\_access\_group | record\_all | record\_com | prop\_com | pct\_com | user\_all | last\_update |
140 | | :-------------------------- | ----------: | ----------: | --------: | :------- | --------: | :----------- |
141 | | hospital\_i                 |           0 |          NA |        NA | NA       |         4 | 21-Apr-2020  |
142 | | hospital\_j                 |           0 |          NA |        NA | NA       |         4 | 21-Apr-2020  |
143 | 
144 | #### 3\. `$dag_incom` Output
145 | 
146 | This will produce a dataframe of all DAG with incomplete records (the
147 | definition of completeness customisable as discussed above). This may be
148 | useful for the purposes of follow up regarding (essential) missing data
149 | at each of these
150 | DAGs.
151 | 
152 | ``` r
153 | knitr::kable(output$dag_incom)
154 | ```
155 | 
156 | | redcap\_data\_access\_group | record\_all | record\_com | prop\_com | pct\_com | user\_all | last\_update |
157 | | :-------------------------- | ----------: | ----------: | --------: | :------- | --------: | :----------- |
158 | | hospital\_a                 |          10 |           0 |         0 | 0%       |         3 | 21-Apr-2020  |
159 | | hospital\_e                 |           9 |           0 |         0 | 0%       |         4 | 21-Apr-2020  |
160 | | hospital\_g                 |           7 |           0 |         0 | 0%       |         2 | 21-Apr-2020  |
161 | | hospital\_b                 |           6 |           0 |         0 | 0%       |         3 | 21-Apr-2020  |
162 | | hospital\_f                 |           6 |           0 |         0 | 0%       |         2 | 21-Apr-2020  |
163 | | hospital\_h                 |           6 |           0 |         0 | 0%       |         4 | 21-Apr-2020  |
164 | | hospital\_d                 |           4 |           0 |         0 | 0%       |         3 | 21-Apr-2020  |
165 | | hospital\_c                 |           2 |           0 |         0 | 0%       |         1 | 21-Apr-2020  |
166 | 
167 | #### 4\. `$dag_top_n` Output
168 | 
169 | This will produce a dataframe of the DAGs with the most records uploaded
170 | overall (the number of DAGs listed is defined by `top` with top 10 DAG
171 | being default). This may be useful for the purposes of publicity
172 | surrounding the
173 | project.
174 | 
175 | ``` r
176 | collaborator::redcap_sum(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
177 |                                    redcap_project_token = Sys.getenv("collaborator_test_token"),
178 |                                    centre_sum = T, top = 5)$dag_top %>% 
179 |   knitr::kable()
180 | ```
181 | 
182 | | redcap\_data\_access\_group | record\_all |
183 | | :-------------------------- | ----------: |
184 | | hospital\_a                 |          10 |
185 | | hospital\_e                 |           9 |
186 | | hospital\_g                 |           7 |
187 | | hospital\_b                 |           6 |
188 | | hospital\_f                 |           6 |
189 | 


--------------------------------------------------------------------------------
/vignettes/vignette_user.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Redcap User Management"
  3 | author: "Kenneth McLean"
  4 | output:
  5 |   md_document: default
  6 |   html_document: default
  7 | always_allow_html: true
  8 | vignette: >
  9 |   %\VignetteIndexEntry{predict}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | 
 14 | ```{r setup, include = FALSE}
 15 | knitr::opts_chunk$set(collapse = FALSE)
 16 | library(collaborator);library(dplyr)
 17 | ```
 18 | 
 19 | # Collaborator: Redcap User Management
 20 | 
 21 | Management of user rights in REDCap becomes increasingly laborious as the scale of the research project expands (e.g. with the number of users, and the number of data access groups). Here are a series of functions to understand and manage users on a project
 22 | 
 23 | For a user to be able to use a REDCap project, there are two prerequisites they must have:
 24 | 
 25 | 1. **User account** - This username allows the user to log onto the REDCap instance.
 26 | 
 27 | 2. **User rights** - This is required to access a specific REDCap project, and determines the capabilities the user has (e.g. to access certain forms, to be restricted to a specific data access group, to import/export data, etc)
 28 | 
 29 | 
 30 | ## Create REDCap Accounts
 31 | 
 32 | REDCap user accounts cannot be generated via R at present, and need to be manually uploaded at present (however there is capability to bulk upload via a csv file). This function can be used to generate the csv file in the exact format required for direct upload via the control centre.
 33 | 
 34 | It requires a dataframe of at least 4 mandatory columns (corresponding to: username, first name, last name, and email address) and 4 optional columns (corresponding to: institution, sponsor, expiration, comments). All optional columns will be blank unless otherwise specified.
 35 | 
 36 | ```{r, warning=FALSE, message=FALSE}
 37 | library(collaborator);library(dplyr)
 38 | 
 39 | # Create example of new users output from user_role()
 40 | collaborator::user_role(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
 41 |                         redcap_project_token = Sys.getenv("collaborator_test_token"),
 42 |                         remove_id = F)$all %>%
 43 |   dplyr::filter(role_name=="collaborator") %>% head(10) %>%
 44 |   dplyr::select(username, email, firstname, lastname, data_access_group) %>%
 45 |   
 46 |   # Format these new users to allow account creation
 47 |   collaborator::user_import(username = "username", first_name = "firstname", last_name = "lastname",
 48 |                             email = "email", institution = "data_access_group") %>%
 49 |   
 50 |   knitr::kable()
 51 | 
 52 | ```
 53 | 
 54 | ## View Project Users and Data Access Groups (DAGs)
 55 | 
 56 | Use `user_role()` to count the number of unique user "roles" within the REDCap Project (e.g. the number of unique combinations of user rights). The users without an allocated data access group or role will be listed as `NA`. Please note those without an assigned role will have the minimum user righst by default, but those without an assigned data access group will have access to ALL data on the project. 
 57 | 
 58 | The output from `user_role()` is a nested dataframe of:
 59 | 
 60 | **1). $all:** A dataframe of all users and their allocated role on the redcap project.
 61 | 
 62 | - By default the user name and emails are not provided (however this can be by changing "remove_id" to FALSE)
 63 | 
 64 | ```{r, warning=FALSE, message=FALSE}
 65 | # Example output from user_role()
 66 | # please note all names are randomly generated
 67 | user_role <- collaborator::user_role(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
 68 |                                      redcap_project_token = Sys.getenv("collaborator_test_token"))
 69 | 
 70 | knitr::kable(user_role$all)
 71 | 
 72 | ```
 73 | 
 74 | 
 75 | **ii). $sum:** A dataframe of each role on REDCap, alongside the total and list of usernames with those rights. These can be used in later functions to assign or change user roles.
 76 | 
 77 | - By default the exact rights these roles have are not shown (however this can be by changing "show_rights" to TRUE)
 78 | 
 79 | ```{r, warning=FALSE, message=FALSE}
 80 | knitr::kable(user_role$sum)
 81 | ```
 82 | 
 83 | 
 84 | ## Manage Project Data Access Groups (DAGs) and Project Users
 85 | 
 86 | The automatic management of users and data access groups (DAGs) has several important advantages over the manual method:
 87 | 
 88 | - Once set-up, it involves a fraction of the time and labour (compared to doing so manually), and can be easily repeated using R.  This enables multicentre research using REDCap to become easily scalable irrespective of the number of users and number of data access groups.
 89 | 
 90 | - It significantly reduces allocation errors (e.g. users being allocated to incorrect DAGs).
 91 | 
 92 | 
 93 | ### `dag_manage()`
 94 | 
 95 | Effective management of DAGs is essential to ensure access to data is restricted to only appropriate users - this can be done using `dag_manage()`. 
 96 | 
 97 | If you simply want to view the current DAGs on the project, just enter the URL and TOKEN. However, if you wish to add or remove DAGs, use the `import` and `remove` arguments. **It is highly recommended you keep the DAG name limited to 18 characters, with no special characters or spaces to avoid issues with duplicate or altered DAG names on the REDCap project**. 
 98 | 
 99 | - When importing, the "data_access_group_name" will be the DAG imported, and "unique_group_name" the automatically generated REDCap name.
100 | 
101 | 
102 | ```{r}
103 | dag_manage(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
104 |            redcap_project_token = Sys.getenv("collaborator_test_token"),
105 |            import = "hospital_n", remove = "hospital_w") %>%
106 |   knitr::kable()
107 | ```
108 | 
109 | The `dag_manage()` function output provides a list of all DAGs with a breakdown of the outcome:
110 | 
111 | - The outcome of the DAGs will be displayed ("status") - either unchanged ("-"), "removed" or "added".
112 | 
113 | 
114 | ### `user_manage()` 
115 | 
116 | Effective management of users is essential to ensure users have the user rights and data access appropriate to them - this can be done using `user_manage()`. 
117 | 
118 | Usernames to manage can be supplied to the `users` argument as a vector or as a tibble with at least 1 column ("username"). If you simply want to view the current users on the project, just enter the URL and TOKEN and you will be shown the output from `user_role()`.
119 | 
120 | ```{r}
121 | newuser <- tibble::tibble("username" = "gs2s1005789")
122 | 
123 | knitr::kable(newuser)
124 | ```
125 | 
126 | 
127 | There are 2 main ways to manage users:
128 | 
129 | ##### 1.  **Add or amend users**: 
130 | 
131 | The role and/or DAG can be specified for ALL users supplied to `users` ("role" and "dag"), or for individual users by adding a "role" and/or "dag" column to `users` with the appropriate value for each username. If present, the information from the columns will take precedence.
132 | 
133 | - `role`: This must exactly match either a role name or ID, or a username with the appropriate role (see the output from `user_role()`)
134 | 
135 | - `dag`: This must exactly match the "unique_group_name" of an existing DAG (see the output from `dag_manage()`)
136 | 
137 | In order to prevent errors due to users not being assigned to specific roles and DAGs, there will be an error message if either of these are listed as `NA`. If you want a user to not have a specific role or DAG, then these must be explicitly listed as "none". 
138 | 
139 | ```{r}
140 | add_outcome <- user_manage(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
141 |                            redcap_project_token = Sys.getenv("collaborator_test_token"),
142 |                            users = newuser %>% mutate("role" = "manager", "dag" = "none"))
143 | ```
144 | 
145 | This function output provides a breakdown of the outcome for each username in `users`:
146 | 
147 | 1. `correct`: Users who have been allocated correctly according to the information provided, and provides details on this change.
148 | 
149 | ```{r}
150 | knitr::kable(add_outcome$correct)
151 | ```
152 | 
153 | 
154 | 
155 | 2. `error`: Users who have NOT been allocated correctly according to the information provided, and provides details on what the current status of that user is / what the outcome should have been according to the information supplied. This may be an incorrect specification of the username, role, or DAG.
156 | 
157 | ```{r}
158 | knitr::kable(add_outcome$error)
159 | ```
160 | 
161 | 
162 | ##### 2. **Remove users**:
163 | 
164 | This can be specified for ALL users supplied to `users` ("remove==T") or for individual users by adding a "remove" column to `users` with the value `TRUE` for each username wanting to be removed (this allows users to be added and removed at the same time).
165 | 
166 | - If present, the information from the columns will take precedence.
167 | 
168 | ```{r}
169 | remove_outcome <- user_manage(redcap_project_uri = Sys.getenv("collaborator_test_uri"),
170 |                               redcap_project_token = Sys.getenv("collaborator_test_token"),
171 |                               users = newuser$username, remove = T)
172 | 
173 | knitr::kable(remove_outcome$correct)
174 | ```
175 | 
176 | 


--------------------------------------------------------------------------------