├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── pkgdown.yaml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── add_line_breaks.R ├── bibliography_functions.R ├── clean_functions.R ├── code_lookup.R ├── deduplication_functions.R ├── deprecated.R ├── detect_functions.R ├── format_citation.R ├── fuzz_functions.R ├── parse_bibtex.R ├── parse_csv_tsv.R ├── parse_pubmed.R ├── parse_ris.R ├── prep_ris.R ├── read_refs.R ├── reexports.R ├── string_functions.R ├── synthesisr-package.R └── write_refs.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── data └── code_lookup.RData ├── inst ├── examples │ ├── clean_.R │ ├── deduplicate.R │ ├── detect_.R │ ├── format_citation.R │ ├── fuzzdist.R │ ├── merge_columns.R │ ├── parse_.R │ └── read_refs.R ├── extdata │ ├── scopus.ris │ └── zoorec.txt ├── hex │ ├── Space_Mono │ │ ├── OFL.txt │ │ ├── SpaceMono-Bold.ttf │ │ └── SpaceMono-Regular.ttf │ └── hex.R ├── ris_tags │ └── code_lookup.csv └── test-data │ └── test_files.R ├── man ├── add_line_breaks.Rd ├── bibliography-class.Rd ├── clean_.Rd ├── code_lookup.Rd ├── deduplicate.Rd ├── detect_.Rd ├── extract_unique_references.Rd ├── figures │ └── logo.png ├── find_duplicates.Rd ├── format_citation.Rd ├── fuzz_.Rd ├── merge_columns.Rd ├── override_duplicates.Rd ├── parse_.Rd ├── read_refs.Rd ├── reexports.Rd ├── review_duplicates.Rd ├── string_.Rd ├── synthesisr-package.Rd └── write_refs.Rd ├── pkgdown ├── extra.css └── favicon │ ├── apple-touch-icon.png │ ├── favicon-96x96.png │ ├── favicon.ico │ ├── favicon.svg │ ├── site.webmanifest │ ├── web-app-manifest-192x192.png │ └── web-app-manifest-512x512.png ├── synthesisr.Rproj ├── tests ├── testthat.R └── testthat │ ├── test-clean.R │ ├── test-deduplicate.R │ ├── test-detect.R │ ├── test-format_citation.R │ ├── test-fuzz_functions.R │ ├── test-merge_columns.R │ ├── test-read_write.R │ ├── test-write.R │ └── testdata │ ├── ASP_ris_example.ris │ ├── Ovid_ris_example.ris │ ├── PubMed_example.txt │ ├── Scopus_bib_example.bib │ ├── Scopus_ris_example.ris │ ├── WoS_ciw_example.ciw │ ├── WoS_txt_example.txt │ ├── citesource_issue_24.ris │ ├── eviatlas.txt │ ├── litsearchr.txt │ └── res_synth_methods.txt └── vignettes ├── .gitignore └── overview.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | synthesisr_hex.png 2 | synthesisr.Rproj 3 | ./inst/ris_tags 4 | README.md 5 | ^doc$ 6 | ^Meta$ 7 | ^.*\.Rproj$ 8 | ^\.Rproj\.user$ 9 | ^_pkgdown\.yml$ 10 | ^docs$ 11 | ^pkgdown$ 12 | ^\.github$ 13 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | release: 8 | types: [published] 9 | workflow_dispatch: 10 | 11 | name: pkgdown.yaml 12 | 13 | permissions: read-all 14 | 15 | jobs: 16 | pkgdown: 17 | runs-on: ubuntu-latest 18 | # Only restrict concurrency for non-PR jobs 19 | concurrency: 20 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 21 | env: 22 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 23 | permissions: 24 | contents: write 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - uses: r-lib/actions/setup-pandoc@v2 29 | 30 | - uses: r-lib/actions/setup-r@v2 31 | with: 32 | use-public-rspm: true 33 | 34 | - uses: r-lib/actions/setup-r-dependencies@v2 35 | with: 36 | extra-packages: any::pkgdown, local::. 37 | needs: website 38 | 39 | - name: Build site 40 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 41 | shell: Rscript {0} 42 | 43 | - name: Deploy to GitHub pages 🚀 44 | if: github.event_name != 'pull_request' 45 | uses: JamesIves/github-pages-deploy-action@v4.5.0 46 | with: 47 | clean: false 48 | branch: gh-pages 49 | folder: docs 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .Rapp.history 3 | .Rproj.user 4 | .Rhistory 5 | .RData 6 | .Ruserdata 7 | media/ 8 | doc 9 | Meta 10 | docs/* 11 | /doc/ 12 | /Meta/ 13 | docs 14 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: synthesisr 2 | Type: Package 3 | Title: Import, Assemble, and Deduplicate Bibliographic Datasets 4 | Version: 0.3.0 5 | Authors@R: c( 6 | person( 7 | given = "Martin", 8 | family = "Westgate", 9 | role = c("aut", "cre"), 10 | email = "martinjwestgate@gmail.com", 11 | comment = c(ORCID = "0000-0003-0854-2034")), 12 | person( 13 | given = "Eliza", 14 | family = "Grames", 15 | role = c("aut"), 16 | email = "eliza.grames@uconn.edu", 17 | comment = c(ORCID = "0000-0003-1743-6815"))) 18 | Description: A critical first step in systematic literature reviews 19 | and mining of academic texts is to identify relevant texts from a range 20 | of sources, particularly databases such as 'Web of Science' or 'Scopus'. 21 | These databases often export in different formats or with different metadata 22 | tags. 'synthesisr' expands on the tools outlined by Westgate (2019) 23 | to import bibliographic data from a range of formats 24 | (such as 'bibtex', 'ris', or 'ciw') in a standard way, and allows merging 25 | and deduplication of the resulting dataset. 26 | Depends: R (>= 4.0.0) 27 | Imports: 28 | dplyr, 29 | purrr, 30 | rlang, 31 | stringdist, 32 | tibble, 33 | unglue, 34 | vroom 35 | Suggests: 36 | knitr, 37 | rmarkdown, 38 | testthat 39 | Date: 2023-06-07 40 | License: GPL-3 41 | URL: https://martinwestgate.com/synthesisr/ 42 | LazyData: true 43 | RoxygenNote: 7.3.2 44 | VignetteBuilder: knitr 45 | Encoding: UTF-8 46 | Roxygen: list(markdown = TRUE) 47 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("[",bibliography) 4 | S3method(as.data.frame,bibliography) 5 | S3method(as_tibble,bibliography) 6 | S3method(c,bibliography) 7 | S3method(print,bibliography) 8 | S3method(summary,bibliography) 9 | export(add_line_breaks) 10 | export(as.bibliography) 11 | export(as_tibble) 12 | export(clean_authors) 13 | export(clean_colnames) 14 | export(clean_df) 15 | export(deduplicate) 16 | export(detect_delimiter) 17 | export(detect_lookup) 18 | export(detect_parser) 19 | export(detect_year) 20 | export(extract_unique_references) 21 | export(find_duplicates) 22 | export(format_citation) 23 | export(fuzz_m_ratio) 24 | export(fuzz_partial_ratio) 25 | export(fuzz_token_set_ratio) 26 | export(fuzz_token_sort_ratio) 27 | export(fuzzdist) 28 | export(merge_columns) 29 | export(override_duplicates) 30 | export(parse_bibtex) 31 | export(parse_csv) 32 | export(parse_pubmed) 33 | export(parse_ris) 34 | export(parse_tsv) 35 | export(read_refs) 36 | export(review_duplicates) 37 | export(string_cosine) 38 | export(string_dl) 39 | export(string_hamming) 40 | export(string_jaccard) 41 | export(string_jw) 42 | export(string_lcs) 43 | export(string_lv) 44 | export(string_osa) 45 | export(string_qgram) 46 | export(string_soundex) 47 | export(write_bib) 48 | export(write_refs) 49 | export(write_ris) 50 | importFrom(dplyr,bind_rows) 51 | importFrom(purrr,list_transpose) 52 | importFrom(rlang,abort) 53 | importFrom(rlang,warn) 54 | importFrom(stringdist,stringdist) 55 | importFrom(tibble,as_tibble) 56 | importFrom(tibble,tibble) 57 | importFrom(unglue,unglue_data) 58 | importFrom(vroom,default_locale) 59 | importFrom(vroom,vroom_lines) 60 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # synthesisr 0.3.0 2 | 3 | This is a minor rebuild to meet modern data standards - mainly supporting 4 | tibbles rather than data.frames as a default 5 | -------------------------------------------------------------------------------- /R/add_line_breaks.R: -------------------------------------------------------------------------------- 1 | #' Add line breaks to one or more strings 2 | #' 3 | #' This function takes a vector of strings and adds line breaks 4 | #' every n characters. Primarily built to be called internally by 5 | #' `format_citation()`, this function has been made available as it can be 6 | #' useful in other contexts. 7 | #' @param x Either a string or a vector; if the vector is not of class character 8 | #' if will be coerced to one using `as.character()`. 9 | #' @param n Numeric: The desired number of characters that should separate 10 | #' consecutive line breaks. 11 | #' @param html Logical: Should the line breaks be specified in html? 12 | #' @param max_n DEPRECATED: If provided will currently overwrite `n`; otherwise 13 | #' synonymous with `n` and will be removed from future versions. 14 | #' @param max_time DEPRECATED: Previously the maximum amount of time (in 15 | #' seconds) allowed to adjust groups until character thresholds are reached. 16 | #' Ignored. 17 | #' @details Line breaks are only added between words, so the value of n is 18 | #' actually a threshold value rather than being matched exactly. 19 | #' @return Returns the input vector unaltered except for the addition of line 20 | #' breaks. 21 | #' @importFrom rlang abort 22 | #' @examples add_line_breaks(c("On the Origin of Species"), n = 10) 23 | #' @export 24 | add_line_breaks <- function(x, 25 | n = 50, 26 | max_n = NULL, 27 | html = FALSE, 28 | max_time = NULL 29 | ){ 30 | if(!is.null(max_n)){ 31 | n <- max_n 32 | } 33 | 34 | if(html){ 35 | break_string <- "
" 36 | }else{ 37 | break_string <- "\n" 38 | } 39 | split_text <- strsplit(as.character(x), " ") 40 | out_list <- lapply(split_text, function(a){ 41 | if(length(a) == 0){ 42 | return("") 43 | }else{ 44 | result <- data.frame( 45 | text = a, 46 | nchars = nchar(a, allowNA = TRUE, keepNA = TRUE) + 1, 47 | stringsAsFactors = FALSE 48 | ) 49 | if(any(is.na(result$nchars))){ 50 | result$nchars[which(is.na(result$nchars))] <- 2 51 | } 52 | 53 | result$group <- cumulative_assign(result$nchars, n) 54 | result_list <- lapply(split(result$text, result$group), 55 | function(a){paste(a, collapse = " ")}) 56 | result <- paste(unlist(result_list), collapse = break_string) 57 | return(result) 58 | } 59 | }) 60 | return(unlist(out_list)) 61 | } 62 | 63 | #' Internal function to assign words to groups 64 | #' 65 | #' Functions by taking vector of string lengths, and iteratively assigning to 66 | #' groups within a while loop 67 | #' @param x is nchar() of a character vector + 1 68 | #' @param n is the maximum line length allowed 69 | #' @noRd 70 | #' @keywords Internal 71 | cumulative_assign <- function(x, n){ 72 | result_vec <- vector(mode = "integer", length = length(x)) 73 | window_size <- round(n / mean(x) * 2, 0) # this may be too large 74 | group_value <- 1 75 | while(any(result_vec < 1)){ 76 | available_rows <- which(result_vec < 1) 77 | window_tr <- min(c(window_size, length(available_rows))) 78 | vec_tr <- x[available_rows[seq_len(window_tr)]] 79 | keep_rows <- which(cumsum(vec_tr) < n) 80 | result_vec[available_rows[keep_rows]] <- group_value 81 | group_value <- group_value + 1 82 | } 83 | result_vec 84 | } 85 | -------------------------------------------------------------------------------- /R/bibliography_functions.R: -------------------------------------------------------------------------------- 1 | #' Methods for class bibliography 2 | #' 3 | #' @title bibliography-class 4 | #' @description This is a small number of standard methods for interacting with class 'bibliography'. More may be added later. 5 | #' @param x An object of class 'bibliography' 6 | #' @param object An object of class 'bibliography' 7 | #' @param n Number of items to select/print 8 | #' @param ... Any further information 9 | #' @aliases summary.bibliography, print.bibliography, c.bibliography, as.data.frame.bibliography 10 | #' @name bibliography-class 11 | #' @export 12 | summary.bibliography <- function(object, ...){ 13 | 14 | # are any abstracts completely missing? 15 | null_check <- unlist(lapply( 16 | object, 17 | function(a){is.null(a$abstract)} 18 | )) 19 | null_count <- length(object) - length(which(null_check)) 20 | null_percent <- round((100/length(object)) * null_count, 1) 21 | 22 | # how many sources? 23 | sources <- unlist(lapply( 24 | object, 25 | function(a){a$journal} 26 | )) 27 | if(!is.null(sources)){ 28 | n_sources <- length(unique(sources)) 29 | source_freq <- sort( 30 | xtabs(~ sources), 31 | decreasing = TRUE 32 | )[seq_len(min(c(5, n_sources)))] 33 | # put text together 34 | result <- paste( 35 | paste0( 36 | "Object of class 'bibliography' containing ", 37 | length(object), 38 | " entries.", 39 | "\n ", 40 | "Number containing abstracts: ", 41 | null_count, 42 | " (", 43 | null_percent, 44 | "%)", 45 | "\n", 46 | "Number of sources: ", 47 | n_sources, 48 | "\n", 49 | "Most common sources:", 50 | "\n " 51 | ), 52 | paste( 53 | names(source_freq), 54 | " (n = ", 55 | as.numeric(source_freq), 56 | ")", 57 | sep = "", 58 | collapse = "\n " 59 | ), 60 | sep = "", 61 | collapse = "\n") 62 | }else{ 63 | result <- paste0( 64 | "Object of class 'bibliography' containing ", 65 | length(object), 66 | " entries.", 67 | "\n ", 68 | "Number containing abstracts: ", 69 | null_count, 70 | " (", 71 | null_percent, 72 | "%)", 73 | "\n" 74 | ) 75 | } 76 | cat(result, sep = "\n") 77 | } 78 | 79 | #' @rdname bibliography-class 80 | #' @export 81 | print.bibliography <- function(x, n, ...){ 82 | length_tr <- length(x) 83 | if(missing(n)){ 84 | n <- min(c(length_tr, 5)) 85 | }else{ 86 | if(n > length_tr){ 87 | n <- length_tr 88 | } 89 | } 90 | text_tr <- format_citation(x[seq_len(n)]) 91 | cat(paste(unlist(text_tr), collapse = "\n\n")) 92 | } 93 | 94 | #' @rdname bibliography-class 95 | #' @importFrom rlang abort 96 | #' @export 97 | '[.bibliography' <- function(x, n){ 98 | class(x) <- "list" 99 | if(all(n %in% seq_len(length(x))) == FALSE){ 100 | abort("subset out of bounds") 101 | } 102 | z <- x[n] 103 | class(z) <- "bibliography" 104 | return(z) 105 | } 106 | 107 | #' @rdname bibliography-class 108 | #' @export 109 | c.bibliography <- function(...){ 110 | result <- lapply(list(...), function(a){ 111 | class(a) <- "list" 112 | return(a) 113 | }) 114 | result <- do.call(c, result) 115 | class(result) <- "bibliography" 116 | return(result) 117 | } 118 | 119 | #' @rdname bibliography-class 120 | #' @export 121 | as.data.frame.bibliography <- function(x, ...){ 122 | 123 | cols <- unique(unlist(lapply(x, names))) 124 | # cols <- cols[which(cols != "further_info")] 125 | 126 | x_list <- lapply(x, function(a, cols){ 127 | result <- lapply(cols, function(b, lookup){ 128 | if(any(names(lookup) == b)){ 129 | data_tr <- lookup[[b]] 130 | if(length(data_tr) > 1){ 131 | data_tr <- paste0(data_tr, collapse = " and ") 132 | } 133 | return(data_tr) 134 | }else{ 135 | return(NA) 136 | } 137 | }, 138 | lookup = a) 139 | names(result) <- cols 140 | return( 141 | as.data.frame( 142 | result, 143 | stringsAsFactors=FALSE 144 | ) 145 | ) 146 | }, 147 | cols = cols 148 | ) 149 | 150 | x_dframe <- data.frame( 151 | do.call(rbind, x_list), 152 | stringsAsFactors = FALSE 153 | ) 154 | rownames(x_dframe) <- NULL 155 | 156 | return(x_dframe) 157 | } 158 | 159 | 160 | #' @rdname bibliography-class 161 | #' @importFrom rlang abort 162 | #' @export 163 | as.bibliography <- function(x, ...){ 164 | 165 | if(!inherits(x, "data.frame")){ 166 | abort("as.bibliography can only be called for objects of class 'data.frame'") 167 | } 168 | 169 | x_list <- lapply( 170 | split(x, seq_len(nrow(x))), 171 | function(a){ 172 | a <- as.list(a) 173 | if(any(names(a) == "author")){ 174 | a$author <- strsplit(a$author, " and ")[[1]] 175 | } 176 | if(any(names(a) == "keywords")){ 177 | a$keywords <- strsplit(a$keywords, " and ")[[1]] 178 | } 179 | return(a) 180 | } 181 | ) 182 | names(x_list) <- seq_len(nrow(x)) 183 | class(x_list) <- "bibliography" 184 | return(x_list) 185 | } 186 | 187 | #' @rdname bibliography-class 188 | #' @param .rows currently ignored 189 | #' @param .name_repair currently ignored 190 | #' @param rownames currently ignored 191 | #' @importFrom purrr list_transpose 192 | #' @importFrom tibble as_tibble 193 | #' @export 194 | as_tibble.bibliography <- function(x, 195 | ..., 196 | .rows, 197 | .name_repair, 198 | rownames){ 199 | class(x) <- "list" 200 | as_tibble(list_transpose(x)) 201 | } 202 | -------------------------------------------------------------------------------- /R/clean_functions.R: -------------------------------------------------------------------------------- 1 | #' Clean a `tibble` or vector 2 | #' 3 | #' Cleans column and author names 4 | #' @param data A `tibble` with bibliographic information. 5 | #' @param x A vector of strings 6 | #' @return Returns the input, but cleaner. 7 | #' @example inst/examples/clean_.R 8 | #' @name clean_ 9 | #' @export 10 | clean_df <- function(data){ 11 | colnames(data) <- clean_colnames(colnames(data)) 12 | if(any(colnames(data) == "author")){ 13 | data$author <- clean_authors(data$author) 14 | } 15 | data <- remove_factors(data) 16 | return(data) 17 | } 18 | 19 | 20 | # Standardize author delimiters 21 | #' @rdname clean_ 22 | #' @export 23 | clean_authors <- function(x){ 24 | if(any(grepl("\\sand\\s|\\sAND\\s|\\s&\\s", x))){ 25 | x <- gsub("\\sAND\\s|\\s&\\s", " and ", x) 26 | }else{ 27 | x <- gsub(",(?=\\s[[:alpha:]]{2,})", " and ", x, perl = TRUE) 28 | } 29 | x <- gsub("\\s{2, }", " ", x) 30 | return(x) 31 | } 32 | 33 | 34 | # Clean common issues with column names 35 | #' @rdname clean_ 36 | #' @export 37 | clean_colnames <- function( 38 | x # colnames 39 | ){ 40 | if(inherits(x, "data.frame")){ 41 | x <- colnames(x) 42 | } 43 | x <- sub("^(X|Y|Z)\\.+", "", x) # remove leading X 44 | x <- sub("^[[:punct:]]*", "", x) # leading punctuation 45 | x <- sub("[[:punct:]]*$", "", x) # trailing punctuation 46 | x <- gsub("\\.+", "_", x) # replace 1 or more dots with underscore 47 | non_codes <- nchar(x) > 2 # for colnames with nchar > 2, convert to lower case 48 | x[non_codes] <- tolower(x[non_codes]) 49 | x <- sub("authors", "author", x) # remove plural authors 50 | x <- make.unique(x, sep = "_") 51 | x <- gsub(" ", "_", x) 52 | return(x) 53 | } 54 | 55 | #' Remove factors from an object 56 | #' 57 | #' Internal functions called by `clean_df()`: 58 | #' @description This function converts factors to characters to avoid errors with 59 | #' levels. 60 | #' @param z A data.frame 61 | #' @return Returns the input data.frame with all factors converted to character. 62 | #' @noRd 63 | #' @keywords Internal 64 | remove_factors <- function(z){ 65 | z[] <- lapply(z, function(x){ 66 | if(is.factor(x)){as.character(x)}else{x} 67 | }) 68 | return(z) 69 | } 70 | -------------------------------------------------------------------------------- /R/code_lookup.R: -------------------------------------------------------------------------------- 1 | #' Bibliographic code lookup for search results assembly 2 | #' 3 | #' A data frame that can be used to look up common codes for different 4 | #' bibliographic fields across databases and merge them to a common format. 5 | #' 6 | #' @format A `data.frame` with 226 obs of 12 variables 7 | #' 8 | #' \describe{ 9 | #' \item{code}{code used in search results} 10 | #' \item{order}{the order in which to rank fields in assembled results} 11 | #' \item{category_description}{type of bibliographic data} 12 | #' \item{entry_description}{description of field} 13 | #' \item{field}{bibliographic field that codes correspond to} 14 | #' \item{ris_generic}{logical: If the code is used in generic ris files} 15 | #' \item{ris_wos}{logical: If the code is used in Web of Science ris files} 16 | #' \item{ris_pubmed}{logical: If the code is used in PubMed ris files} 17 | #' \item{ris_scopus}{logical: If the code is used in Scopus ris files} 18 | #' \item{ris_asp}{logical: If the code is used in Academic Search Premier ris files} 19 | #' \item{ris_ovid}{logical: If the code is used in Ovid ris files} 20 | #' \item{ris_synthesisr}{logical: If the code used in synthesisr imports & exports}} 21 | #' 22 | "code_lookup" 23 | -------------------------------------------------------------------------------- /R/deprecated.R: -------------------------------------------------------------------------------- 1 | #' Bind two or more data frames with different columns 2 | #' 3 | #' @description Takes two or more `data.frames` with different column names or 4 | #' different column orders and binds them to a single `data.frame.` This 5 | #' function is maintained for backwards compatibility, but it is synonymous with 6 | #' `dplyr::bind_rows()` and will be depracated in future. 7 | #' @param x Either a data.frame or a list of data.frames. 8 | #' @param y A data.frame, optional if x is a list. 9 | #' @return Returns a single data.frame with all the input data frames merged. 10 | #' @example inst/examples/merge_columns.R 11 | #' @importFrom dplyr bind_rows 12 | #' @importFrom rlang abort 13 | #' @export 14 | merge_columns <- function( 15 | x, # either a data.frame or a list of the same 16 | y # a data.frame, optional 17 | ){ 18 | if(missing(x)){ 19 | abort("object x is missing with no default") 20 | } 21 | if(!(inherits(x, "data.frame") | inherits(x, "list"))){ 22 | abort("object x must be either a data.frame or a list") 23 | } 24 | if(inherits(x, "data.frame")){ 25 | if(missing(y)){ 26 | return(x) 27 | # abort("If x is a data.frame, then y must be supplied") 28 | }else{ 29 | x <- list(x, y) 30 | } 31 | }else{ # i.e. for lists 32 | if(!all(unlist(lapply(x, function(a){inherits(a, "data.frame")})))){ 33 | abort("x must only contain data.frames") 34 | } 35 | } 36 | bind_rows(x) 37 | } 38 | -------------------------------------------------------------------------------- /R/detect_functions.R: -------------------------------------------------------------------------------- 1 | #' Detect file formatting information 2 | #' 3 | #' @description Bibliographic data can be stored in a number of different file 4 | #' types, meaning that detecting consistent attributes of those files is 5 | #' necessary if they are to be parsed accurately. These functions attempt to 6 | #' identify some of those key file attributes. Specifically, `detect_parser()` 7 | #' determines which [parse_] function to use; `detect_delimiter()` 8 | #' and `detect_lookup()` identify different attributes of RIS files; and 9 | #' `detect_year()` attempts to fill gaps in publication years from other 10 | #' information stored in a `tibble`. 11 | #' @param x A character vector containing bibliographic data 12 | #' @param tags A character vector containing RIS tags. 13 | #' @param df a data.frame containing bibliographic data 14 | #' @return `detect_parser()` and `detect_delimiter()` return a length-1 15 | #' character; `detect_year()` returns a character vector listing estimated 16 | #' publication years; and `detect_lookup()` returns a `data.frame.` 17 | #' @example inst/examples/detect_.R 18 | #' @name detect_ 19 | #' @importFrom rlang abort 20 | #' @export 21 | detect_parser <- function(x){ 22 | 23 | # calculate proportional of lines containing likely tags 24 | proportions <- unlist(lapply( 25 | c( 26 | ",(\"|[[:alnum:]])", 27 | "\t", 28 | "\\{|\\}", 29 | "(^[[:upper:]]{2,4}\\s*(-|:)\\s)|(^([[:upper:]]{2}|[[:upper:]][[:digit:]])\\s*(-|:){0,2}\\s*)" 30 | ), 31 | function(a, z){proportion_delimited(z, a)}, 32 | z = x 33 | )) 34 | 35 | # if any are detection, pick the most likely one 36 | if(any(proportions > 0.2)){ 37 | result <- switch( 38 | c("comma", "tab", "bibtex", "ris")[which.max(proportions)], 39 | "comma" = "parse_csv", 40 | "tab" = "parse_tsv", 41 | "bibtex" = "parse_bibtex", 42 | "ris" = { 43 | if(length(which(grepl("PMID", x))) > 0){ 44 | "parse_pubmed" 45 | }else{ 46 | "parse_ris" 47 | } 48 | } 49 | ) 50 | }else{ 51 | result <- "unknown" 52 | } 53 | return(result) 54 | } 55 | 56 | 57 | #' @rdname detect_ 58 | #' @export 59 | detect_delimiter <- function(x){ 60 | if(any(grepl("^ER", x))){ 61 | delimiter <- "endrow" 62 | }else{ 63 | # special break: same character repeated >6 times, no other characters 64 | char_list <- strsplit(x, "") 65 | char_break_test <- unlist( 66 | lapply(char_list, 67 | function(a){length(unique(a)) == 1 & length(a > 6)} 68 | ) 69 | ) 70 | if(any(char_break_test)){ 71 | delimiter <- "character" 72 | }else{ 73 | # use space as a ref break (last choice) 74 | space_break_check <- unlist(lapply( 75 | char_list, 76 | function(a){all(a == "" | a == " ")} 77 | )) 78 | if(any(space_break_check)){ 79 | delimiter <- "space" 80 | }else{ 81 | abort("import failed: unknown reference delimiter") 82 | } 83 | } 84 | } 85 | return(delimiter) 86 | } 87 | 88 | 89 | #' @rdname detect_ 90 | #' @export 91 | detect_lookup <- function( 92 | tags # a vector of strings representing ris tags 93 | ){ 94 | rows <- which(synthesisr::code_lookup$code %in% tags) 95 | ris_list <- split( 96 | synthesisr::code_lookup[rows, grepl("ris_", colnames(synthesisr::code_lookup))], 97 | synthesisr::code_lookup$code[rows] 98 | ) 99 | ris_matrix <- do.call( 100 | rbind, 101 | lapply(ris_list, function(a){apply(a, 2, any)}) 102 | ) 103 | ris_sums <- apply(ris_matrix, 2, sum) 104 | best_match <- which.max(ris_sums[-1]) 105 | best_proportion <- ris_sums[best_match + 1] / nrow(ris_matrix) 106 | generic_proportion <- ris_sums[1] / nrow(ris_matrix) 107 | # default to ris_generic if everything else is bad 108 | if(best_proportion < 0.75 & generic_proportion > best_proportion){ 109 | match_df <- synthesisr::code_lookup[synthesisr::code_lookup$ris_generic, ] 110 | }else{ # i.e. if the 'best' match performs perfectly 111 | if(best_proportion > 0.99){ # i.e. a perfect match 112 | match_df <- synthesisr::code_lookup[ 113 | synthesisr::code_lookup[, names(best_match)], 114 | 115 | ] 116 | }else{ # otherwise use the best choice, then generic to fill gaps 117 | rows_best <- which( 118 | synthesisr::code_lookup[, names(best_match)] & 119 | synthesisr::code_lookup$code %in% names(which(ris_matrix[, names(best_match)])) 120 | ) 121 | rows_generic <- which( 122 | synthesisr::code_lookup$ris_generic & 123 | synthesisr::code_lookup$code %in% names(which(!ris_matrix[, names(best_match)])) 124 | ) 125 | match_df <- synthesisr::code_lookup[c(rows_best, rows_generic), ] 126 | } 127 | } 128 | 129 | return(match_df[, c("code", "order", "field")]) 130 | } 131 | 132 | #' @rdname detect_ 133 | #' @export 134 | detect_year <- function(df){ 135 | if(!inherits(df, "data.frame")){ 136 | abort(print("detect_year expects an object of class data.frame as input")) 137 | } 138 | lc_colnames <- tolower(colnames(df)) 139 | dates <- grepl("date", lc_colnames) & !grepl("access", lc_colnames) 140 | if(any(dates)){ 141 | if(any(colnames(df) == "year")) { 142 | result <- df$year 143 | }else{ 144 | result <- rep(NA, nrow(df)) 145 | } 146 | na_rows <- is.na(result) 147 | if(any(na_rows)){ 148 | result[na_rows] <- unlist(lapply( 149 | split(df[na_rows, dates], seq_along(na_rows)), 150 | guess_year 151 | )) 152 | } 153 | }else{ 154 | result <- rep(NA, nrow(df)) 155 | } 156 | return(result) 157 | } 158 | 159 | #' internal function to calculate the proportion of lines that contain a particular regex 160 | #' called by detect_parser 161 | #' @noRd 162 | #' @keywords Internal 163 | proportion_delimited <- function(x, regex){ 164 | delimiter_count <- unlist(lapply( 165 | gregexpr(regex, x, perl = TRUE), 166 | function(a){length(which(a > 0))} 167 | )) 168 | full_lines <- nchar(x, type = "bytes") > 0 169 | proportion <- length(which(delimiter_count > 0)) / length(which(full_lines)) 170 | return(proportion) 171 | } 172 | 173 | #' internal function for detect_year 174 | #' @noRd 175 | #' @keywords Internal 176 | guess_year <- function(x){ 177 | number_lookup <- regexpr("[[:alnum:]]{4}", as.character(x)) 178 | if(any(number_lookup > 0)){ 179 | x <- x[number_lookup > 0] 180 | result_vec <- unlist(lapply(seq_along(x), function(a){ 181 | substr(x[a], start = number_lookup[a], stop = number_lookup[a] + 3) 182 | })) 183 | # return(max(as.numeric(result))) 184 | result <- names(sort(xtabs(~result_vec), decreasing = TRUE)[1]) 185 | return(result) 186 | }else{ 187 | return(NA) 188 | } 189 | } 190 | 191 | #' Compute the rolling sum of detections 192 | #' 193 | #' This function is intended to ensure multiple consecutive empty rows are 194 | #' removed. Called by `detect_delimiter()`. 195 | #' @noRd 196 | #' @keywords Internal 197 | rollingsum <- function(a, n = 2L){ 198 | tail(cumsum(a) - cumsum(c(rep(0, n), head(a, -n))), -n + 1) 199 | } 200 | -------------------------------------------------------------------------------- /R/format_citation.R: -------------------------------------------------------------------------------- 1 | #' Format a citation 2 | #' 3 | #' @description This function takes an object of class `data.frame`, `list`, or 4 | #' `bibliography` and returns a formatted citation. 5 | #' @param data An object of class `data.frame`, `list`, or `bibliography.` 6 | #' @param details Logical: Should identifying information such as author names & 7 | #' journal titles be displayed? Defaults to `TRUE`. 8 | #' @param abstract Logical: Should the abstract be shown (if available)? 9 | #' Defaults to `FALSE.` 10 | #' @param add_html Logical: Should the journal title be italicized using html 11 | #' codes? Defaults to `FALSE`. 12 | #' @param line_breaks Either logical, stating whether line breaks should be 13 | #' added, or numeric stating how many characters should separate consecutive 14 | #' line breaks. Defaults to `FALSE`. 15 | #' @param ... any other arguments. 16 | #' @return Returns a string of length equal to `length(data)` that contains 17 | #' formatted citations. 18 | #' @importFrom rlang abort 19 | #' @example inst/examples/format_citation.R 20 | #' @export 21 | format_citation <- function( 22 | data, 23 | details = TRUE, 24 | abstract = FALSE, 25 | add_html = FALSE, 26 | line_breaks = FALSE, 27 | ... 28 | ){ 29 | if(!inherits(data, c("data.frame", "bibliography", "list"))){ 30 | abort("format_citation expects input data to be an object of class data.frame, bibliography, or list") 31 | } 32 | 33 | if(!inherits(data, "data.frame")){ 34 | data <- as.data.frame(data) 35 | } 36 | 37 | colnames(data) <- clean_colnames(colnames(data)) 38 | if(any(names(data) == "journal")){ 39 | source <- "journal" 40 | }else{ 41 | source_check <- grepl("source", names(data)) 42 | if(any(source_check)){ 43 | source <- names(data)[which(source_check)] 44 | if(length(source) > 1){ 45 | source <- source[which.max(nchar(data[source], type = "bytes"))] 46 | } 47 | }else{ 48 | source <- NA 49 | } 50 | } 51 | 52 | # this section should be made more flexible to use any available information 53 | # if(details){ 54 | data_list <- split(data, seq_len(nrow(data))) 55 | data_out <- unlist(lapply(data_list, function(a){ 56 | cols_tr <- names(a) 57 | text_list <- as.list(rep(NA, 4)) 58 | names(text_list) <- c("author", "year", "title", "journal") 59 | # title 60 | if(any(cols_tr == "title")){ 61 | title_text <- tools::toTitleCase(tolower(a$title)) 62 | if(grepl("[[:punct:]]$", title_text)){ 63 | text_list$title <- title_text 64 | }else{ 65 | text_list$title <- paste0(title_text, ".") 66 | } 67 | }else{ 68 | text_list$title <- "" 69 | } 70 | if(details){ 71 | # year 72 | if(any(cols_tr == "year")){ 73 | text_list$year <- paste0("(", a$year, ")") 74 | }else{ 75 | text_list$year <- NA 76 | } 77 | # journal 78 | if(!is.na(source)){ 79 | if(!is.na(a[[source]])){ 80 | journal_text <- tools::toTitleCase(tolower(a[[source]])) 81 | if(add_html){ 82 | text_list$journal <- paste0("", journal_text, ". ") 83 | }else{ 84 | text_list$journal <- paste0(journal_text, ". ") 85 | } 86 | }else{ 87 | text_list$journal <- NA 88 | } 89 | } 90 | # authors 91 | if(any(cols_tr == "author")){ 92 | author_vector <- strsplit(a[['author']], " and ")[[1]] 93 | if(length(author_vector) == 1){ 94 | text_list$author <- a[['author']] 95 | }else{ 96 | text_list$author <- paste0(author_vector[1], " et al.") 97 | } 98 | }else{ 99 | if(!all(is.na(text_list))){ 100 | text_list$author <- "Anon." 101 | } 102 | } 103 | } # end if(details) 104 | text_vec <- unlist(text_list) 105 | if(all(is.na(text_vec))){ 106 | return(a[1]) 107 | }else{ 108 | return( 109 | paste(text_vec[!is.na(text_vec)], collapse = " ") 110 | ) 111 | } 112 | })) 113 | 114 | # add line breaks if required 115 | if(is.logical(line_breaks)){ 116 | if(line_breaks){ 117 | data_out <- add_line_breaks(data_out) 118 | } 119 | }else{ 120 | if(is.numeric(line_breaks)){ 121 | data_out <- add_line_breaks(data_out, line_breaks) 122 | } 123 | } 124 | data_out <- unlist(lapply(data_out, trimws)) 125 | return(data_out) 126 | } 127 | -------------------------------------------------------------------------------- /R/fuzz_functions.R: -------------------------------------------------------------------------------- 1 | # Functions from the 'fuzzywuzzy' Python library 2 | # github.com/seatgeek/fuzzywuzzy 3 | # these functions coded by Martin Westgate on 4th June 2018 based on description given here: 4 | # chairnerd.seatgeek.com/fuzzywuzzy-fuzzy-string-matching-in-python/ 5 | 6 | #' Calculate similarity between two strings 7 | #' @description These functions duplicate the approach of the 'fuzzywuzzy' 8 | #' Python library for calculating string similarity. 9 | #' @param a A character vector of items to match to b. 10 | #' @param b A character vector of items to match to a. 11 | #' @param method The method to use for fuzzy matching. 12 | #' @note `fuzz_m_ratio()` is a measure of the number of letters that match 13 | #' between two strings. It is calculated as one minus two times the number of 14 | #' matched characters, divided by the number of characters in both strings. 15 | #' @note `fuzz_partial_ratio()` calculates the extent to which one string is a 16 | #' subset of the other. If one string is a perfect subset, then this will be 17 | #' zero. 18 | #' @note `fuzz_token_sort_ratio()` sorts the words in both strings into 19 | #' alphabetical order, and checks their similarity using `fuzz_m_ratio()`. 20 | #' @note `fuzz_token_set_ratio()` is similar to `fuzz_token_sort_ratio()`, but 21 | #' compares both sorted strings to each other, and to a third group made of 22 | #' words common to both strings. It then returns the maximum value of 23 | #' `fuzz_m_ratio()` from these comparisons. 24 | #' @note `fuzzdist()` is a wrapper function, for compatability with `stringdist`. 25 | #' @return Returns a score of same length as b, giving the proportional 26 | #' dissimilarity between a and b. 27 | #' @example inst/examples/fuzzdist.R 28 | #' @name fuzz_ 29 | #' @export 30 | fuzzdist <- function(a, b, method = c( 31 | "fuzz_m_ratio", "fuzz_partial_ratio", "fuzz_token_sort_ratio", "fuzz_token_set_ratio") 32 | ){ 33 | method <- match.arg(method) 34 | do.call( 35 | method, 36 | list(a, b) 37 | ) 38 | } 39 | 40 | #' @rdname fuzz_ 41 | #' @export 42 | fuzz_m_ratio <- function(a, b){ 43 | out <- lapply(b, function(b, a){ 44 | z <- c(a, b) 45 | if(any(is.na(z))){ 46 | return(NA) 47 | }else{ 48 | z_list <- lapply(strsplit(z, ""), 49 | function(x, minval){x[1:minval]}, 50 | minval = min(nchar(z)) 51 | ) 52 | z_match <- apply( 53 | do.call(cbind, z_list), 54 | 1, 55 | function(x){x[1] == x[2]} 56 | ) 57 | return( 58 | 1 - (2 * length(which(z_match)) / sum(nchar(z))) 59 | ) 60 | } 61 | }, 62 | a = a) 63 | return(as.numeric(out)) 64 | } 65 | 66 | 67 | #' @rdname fuzz_ 68 | #' @export 69 | fuzz_partial_ratio <- function(a, b){ 70 | out <- lapply(b, function(b, a){ 71 | z <- c(a, b) 72 | if(any(is.na(z))){ 73 | return(NA) 74 | }else{ 75 | zn <- nchar(z) 76 | n_reps <- (max(zn) - min(zn)) 77 | z_list <- lapply( 78 | c(0: n_reps), 79 | function(x, lookup, keep){lookup[(keep + x)]}, 80 | lookup = strsplit(z[which.max(zn)], "")[[1]], 81 | keep = seq_len(min(zn)) 82 | ) 83 | z_ratio <- lapply(z_list, function(x, comparison){ 84 | match_value <- apply( 85 | cbind(x, comparison), 86 | 1, 87 | function(y){y[1] == y[2]} 88 | ) 89 | length(which(match_value))/length(x) 90 | }, 91 | comparison = strsplit(z[which.min(zn)], "")[[1]] 92 | ) 93 | return(1 - max(as.numeric(z_ratio))) 94 | } 95 | }, 96 | a = a) 97 | return(as.numeric(out)) 98 | } 99 | 100 | 101 | #' @rdname fuzz_ 102 | #' @export 103 | fuzz_token_sort_ratio <- function(a, b){ 104 | out <- lapply(b, function(b, a){ 105 | z <- c(a, b) 106 | if(any(is.na(z))){ 107 | return(NA) 108 | }else{ 109 | z_split <- strsplit(z, " ") 110 | z_split <- lapply(z_split, make.unique, sep="_XDUP_") 111 | in_check <- z_split[[1]] %in% z_split[[2]] 112 | intersection <- sort(z_split[[1]][which(in_check)]) 113 | string_list <- list( 114 | t0 = intersection, 115 | t1 = c(intersection, 116 | sort(z_split[[1]][which(!in_check)]) 117 | ), 118 | t2 = c(intersection, 119 | unlist(lapply(z_split[[2]][which(!(z_split[[2]] %in% intersection))], function(x){strsplit(x, "_XDUP_")[[1]][1]})) 120 | ) 121 | ) 122 | string_list <- lapply(string_list, function(x){ 123 | if(length(x) < 1){ 124 | return("") 125 | }else{ 126 | return(paste(x, collapse = " ")) 127 | } 128 | }) 129 | result <- c( 130 | fuzz_m_ratio(string_list$t0, string_list$t1), 131 | fuzz_m_ratio(string_list$t0, string_list$t2), 132 | fuzz_m_ratio(string_list$t1, string_list$t2) 133 | ) 134 | return(max(result)) 135 | } 136 | }, 137 | a = a) 138 | return(as.numeric(out)) 139 | return(as.numeric(out)) 140 | } 141 | 142 | 143 | #' @rdname fuzz_ 144 | #' @export 145 | fuzz_token_set_ratio <- function(a, b){ 146 | out <- lapply(b, function(b, a){ 147 | z <- c(a, b) 148 | if(any(is.na(z))){ 149 | return(NA) 150 | }else{ 151 | z_split <- strsplit(z, " ") 152 | in_check <- z_split[[1]] %in% z_split[[2]] 153 | intersection <- sort(z_split[[1]][which(in_check)]) 154 | string_list <- list( 155 | t0 = intersection, 156 | t1 = c(intersection, 157 | sort(z_split[[1]][which(!in_check)]) 158 | ), 159 | t2 = c(intersection, 160 | sort(z_split[[2]][which(!(z_split[[2]] %in% intersection))]) 161 | ) 162 | ) 163 | string_list <- lapply(string_list, function(x){ 164 | if(length(x) < 1){ 165 | return("") 166 | }else{ 167 | return(paste(x, collapse = " ")) 168 | } 169 | }) 170 | result <- c( 171 | fuzz_m_ratio(string_list$t0, string_list$t1), 172 | fuzz_m_ratio(string_list$t0, string_list$t2), 173 | fuzz_m_ratio(string_list$t1, string_list$t2) 174 | ) 175 | return(max(result)) 176 | } 177 | }, 178 | a = a) 179 | return(as.numeric(out)) 180 | } 181 | -------------------------------------------------------------------------------- /R/parse_bibtex.R: -------------------------------------------------------------------------------- 1 | #' @rdname parse_ 2 | #' @importFrom dplyr bind_rows 3 | #' @importFrom tibble tibble 4 | #' @importFrom unglue unglue_data 5 | #' @export 6 | parse_bibtex <- function(x){ 7 | # use `unglue` to parse text 8 | raw_df <- unglue_data(x, 9 | patterns = c("[variable]={[value]},", 10 | "@[variable]{[value],"), 11 | open = "[", 12 | close = "]") 13 | 14 | # remove missing values 15 | raw_df <- raw_df[!(is.na(raw_df$variable) | is.na(raw_df$value)), ] 16 | 17 | # create a vector assigning rows to articles 18 | article_vec <- as.integer(raw_df$variable == "ARTICLE") 19 | article_vec[is.na(article_vec)] <- 0 20 | raw_df$article <- cumsum(article_vec) 21 | 22 | # split by article and transpose 23 | result <- lapply( 24 | split(raw_df[, 1:2], raw_df$article), 25 | function(a){ 26 | result <- as.data.frame(t(a$value)) 27 | colnames(result) <- a$variable 28 | return(result) 29 | }) |> 30 | bind_rows() |> 31 | tibble() 32 | 33 | # split authors 34 | if(any(names(result) == "author")){ 35 | if(any(grepl("and", result$author))){ 36 | result$author <- strsplit(result$author, "\\s*and\\s*") 37 | } 38 | } 39 | 40 | # join duplicated columns 41 | # note: needs to be done simultaneously with calling `tibble()` 42 | 43 | return(result) 44 | } 45 | -------------------------------------------------------------------------------- /R/parse_csv_tsv.R: -------------------------------------------------------------------------------- 1 | #' @rdname parse_ 2 | #' @export 3 | parse_csv <- function(x){ 4 | read.table( 5 | text = x, 6 | header = TRUE, 7 | sep = ",", 8 | quote = "\"", 9 | dec = ".", 10 | fill = TRUE, 11 | stringsAsFactors = FALSE, 12 | row.names = NULL) |> 13 | match_columns() |> 14 | tibble() 15 | } 16 | 17 | #' @rdname parse_ 18 | #' @export 19 | parse_tsv <- function(x){ 20 | read.table( 21 | text = x, 22 | header = TRUE, 23 | sep = "\t", 24 | quote = "\"", 25 | dec = ".", 26 | fill = TRUE, 27 | stringsAsFactors = FALSE, 28 | row.names = NULL) |> 29 | match_columns() |> 30 | tibble() 31 | } 32 | 33 | #' Internal function used by parse_csv and parse_tsv: 34 | #' Matches imported data to reference codes 35 | #' 36 | #' @description Takes an imported data.frame and rearranges it to match lookup 37 | #' codes. 38 | #' @param df A data.frame that contains bibliographic information. 39 | #' @return Returns a data.frame rearranged and coded to match standard 40 | #' bibliographic fields, with unrecognized fields appended. 41 | #' @noRd 42 | #' @keywords Internal 43 | #' @example inst/examples/match_columns.R 44 | match_columns <- function(df){ 45 | # figure out which columns match known tags 46 | hits <- as.numeric(match(synthesisr::code_lookup$code, colnames(df))) 47 | newcolnames <- synthesisr::code_lookup$field[ 48 | match(colnames(df), 49 | synthesisr::code_lookup$code) 50 | ] 51 | colnames(df)[!is.na(newcolnames)] <- newcolnames[!is.na(newcolnames)] 52 | 53 | # rearrange data in standard(ish) order 54 | if(any(is.na(hits))){ 55 | hits <- hits[!is.na(hits)] 56 | } 57 | 58 | # retain columns even if they did not match lookup 59 | retain <- append(hits, seq(1, length(df), 1)[!(seq(1, length(df), 1) %in% hits)]) 60 | 61 | return(df[,retain]) 62 | } 63 | -------------------------------------------------------------------------------- /R/parse_pubmed.R: -------------------------------------------------------------------------------- 1 | #' Parse bibliographic text in a variety of formats 2 | #' 3 | #' @description Text in standard formats - such as imported via 4 | #' `base::readLines()` - can be parsed using a variety of standard formats. Use 5 | #' `detect_parser()` to determine which is the most appropriate parser for your 6 | #' situation. Note that `parse_tsv()` and `parse_csv()` are maintained for 7 | #' backwards compatability only; within `read_ref` these have been replaced 8 | #' by `vroom::vroom()`. 9 | #' @param x A character vector containing bibliographic information in ris 10 | #' format. 11 | #' @return Returns an object of class `bibliography` (ris, bib, or pubmed 12 | #' formats) or `data.frame` (csv or tsv). 13 | #' @example inst/examples/parse_.R 14 | #' @name parse_ 15 | #' @export 16 | parse_pubmed <- function(x){ 17 | 18 | x <- prep_ris(x, detect_delimiter(x), type = "pubmed") 19 | 20 | x_merge <- merge(x, 21 | synthesisr::code_lookup[ 22 | synthesisr::code_lookup$ris_pubmed, 23 | c("code", "order", "field") 24 | ], 25 | by.x = "ris", 26 | by.y = "code", 27 | all.x = TRUE, 28 | all.y = FALSE 29 | ) 30 | x_merge <- x_merge[order(x_merge$row_order), ] 31 | 32 | # find a way to store missing .bib data rather than discard 33 | if(any(is.na(x_merge$field))){ 34 | rows_tr <- which(is.na(x_merge$field)) 35 | x_merge$field[rows_tr] <- x_merge$ris[rows_tr] 36 | 37 | # ensure all headings have an order 38 | if(all(is.na(x_merge$order))){ 39 | start_val <- 0 40 | }else{ 41 | start_val <- max(x_merge$order, na.rm = TRUE) 42 | } 43 | x_merge$order[rows_tr] <- as.numeric(as.factor(x_merge$ris[rows_tr])) + start_val 44 | } 45 | 46 | # convert into a list, where each reference is a separate entry 47 | x_split <- split(x_merge[c("field", "text", "order")], x_merge$ref) 48 | x_final <- lapply(x_split, function(a){ 49 | result <- split(a$text, a$field) 50 | if(any(names(result) == "abstract")){ 51 | result$abstract <- paste(result$abstract, collapse = " ") 52 | } 53 | if(any(names(result) == "address")){ 54 | result$address <- strsplit( 55 | paste(result$address, collapse = " "), 56 | "\\.\\s" 57 | )[[1]] 58 | } 59 | if(any(names(result) == "title")){ 60 | if(length(result$title) > 1){ 61 | result$title <- paste(result$title, collapse = " ") 62 | } 63 | } 64 | if(any(names(result) == "term_other")){ 65 | names(result)[which(names(result) == "term_other")] <- "keywords" 66 | } 67 | if(any(names(result) == "date_published")){ 68 | result$year <- substr(result$date_published, start = 1, stop = 4) 69 | } 70 | if(any(names(result) == "article_id")){ 71 | doi_check <- grepl("doi", result$article_id) 72 | if(any(doi_check)){ 73 | result$doi <- strsplit(result$article_id[which(doi_check)], " ")[[1]][1] 74 | } 75 | } 76 | 77 | # ensure result is returned in the correct order 78 | result_order <- order( 79 | unlist(lapply(split(a$order, a$field), function(b){b[1]})) 80 | ) 81 | return(result[result_order]) 82 | }) 83 | 84 | names(x_final) <- unlist(lapply(x_final, function(a){a$pubmed_id})) 85 | class(x_final) <- "bibliography" 86 | return(x_final) 87 | } 88 | -------------------------------------------------------------------------------- /R/parse_ris.R: -------------------------------------------------------------------------------- 1 | #' @rdname parse_ 2 | #' @param tag_naming What format are ris tags in? Defaults to `"best_guess"` See 3 | #' `read_refs()` for a list of accepted arguments. 4 | #' @export 5 | parse_ris <- function(x, tag_naming = "best_guess"){ 6 | 7 | # clean up input file 8 | x <- prep_ris(x, 9 | detect_delimiter(x), 10 | type = "generic") 11 | 12 | # merge data with lookup info, to provide bib-style tags 13 | tag_lookup_thisfile <- get_tag_lookup(x, tag_naming) 14 | x_merge <- merge(x, 15 | tag_lookup_thisfile, 16 | by.x = "ris", 17 | by.y = "code", 18 | all.x = TRUE, 19 | all.y = FALSE 20 | ) 21 | x_merge <- x_merge[order(x_merge$row_order), ] 22 | 23 | # find a way to store missing .bib data rather than discard 24 | if(any(is.na(x_merge$field))){ 25 | rows_tr <- which(is.na(x_merge$field)) 26 | x_merge$field[rows_tr] <- x_merge$ris[rows_tr] 27 | 28 | # ensure all headings have an order 29 | if(all(is.na(x_merge$order))){ 30 | start_val <- 0 31 | }else{ 32 | start_val <- max(x_merge$order, na.rm = TRUE) 33 | } 34 | x_merge$order[rows_tr] <- as.numeric(as.factor(x_merge$ris[rows_tr])) + start_val 35 | } 36 | 37 | # tidy up specific columns 38 | x_merge <- x_merge |> 39 | clean_ris_years() |> 40 | clean_ris_authors() 41 | 42 | # convert into a list, where each reference is a separate entry 43 | x_split <- split(x_merge[c("field", "ris", "text", "order")], x_merge$ref) 44 | 45 | # there is an issue with date accessed creating non-existing records 46 | # removing datasets with 1 row fixes this 47 | if(any(unlist(lapply(x_split, nrow))==1)){ 48 | x_split <- x_split[ -which(unlist(lapply(x_split, nrow))==1)] 49 | } 50 | 51 | # convert to list format 52 | x_final <- lapply(x_split, function(a){ 53 | result <- split(a$text, a$field) |> 54 | parse_ris_year() |> 55 | parse_ris_title() |> 56 | parse_ris_journal() |> 57 | parse_ris_abstract() |> 58 | parse_ris_page_numbers() 59 | # ensure result is returned in the correct order 60 | result_order <- order( 61 | unlist(lapply(split(a$order, a$field), function(b){b[1]})) 62 | ) 63 | return(result[result_order]) 64 | }) 65 | class(x_final) <- "bibliography" 66 | return(x_final) 67 | } 68 | 69 | #' Internal function to clean year data (above) 70 | #' @noRd 71 | #' @keywords Internal 72 | clean_ris_years <- function(x){ 73 | # method to systematically search for year data 74 | year_check <- regexpr("^\\d{4}$", x$text) 75 | if(any(year_check > 0)){ 76 | check_rows <- which(year_check > 0) 77 | year_strings <- as.numeric(x$text[check_rows]) 78 | 79 | # for entries with a bib entry labelled year, check that there aren't multiple years 80 | if(any(x$field[check_rows] == "year", na.rm = TRUE)){ 81 | # check for repeated year information 82 | year_freq <- xtabs(~ ref, data = x[which(x$field == "year"), ]) 83 | if(any(year_freq > 1)){ 84 | year_df <- x[which(x$field == "year"), ] 85 | year_list <- split(nchar(year_df$text), year_df$ris) 86 | year_4 <- sqrt((4 - unlist(lapply(year_list, mean))) ^ 2) 87 | # rename bib entries that have >4 characters to 'year_additional' 88 | incorrect_rows <- which( 89 | x$ris != names(which.min(year_4)[1]) & 90 | x$field == "year" 91 | ) 92 | x$field[incorrect_rows] <- "year_additional" 93 | } 94 | }else{ 95 | possible_rows <- which( 96 | year_strings > 0 & 97 | year_strings <= as.numeric(format(Sys.Date(), "%Y")) + 1 98 | ) 99 | tag_frequencies <- as.data.frame( 100 | xtabs(~ x$ris[check_rows[possible_rows]]), 101 | stringsAsFactors = FALSE 102 | ) 103 | colnames(tag_frequencies) <- c("tag", "n") 104 | # now work out what proportion of each tag contain year data 105 | # compare against number of references to determine likelihood of being 'the' year tag 106 | tag_frequencies$prop <- tag_frequencies$n/(max(x$ref)+1) # number of references 107 | if(any(tag_frequencies$prop > 0.9)){ 108 | year_tag <- tag_frequencies$tag[which.max(tag_frequencies$prop)] 109 | rows.tr <- which(x$ris == year_tag) 110 | x$field[rows.tr] <- "year" 111 | x$row_order[rows.tr] <- 3 112 | } 113 | } 114 | } 115 | x 116 | } 117 | 118 | #' Internal function to clean author data (above) 119 | #' @noRd 120 | #' @keywords Internal 121 | clean_ris_authors <- function(x){ 122 | # ensure author data from a single ris tag 123 | if(any(x$field == "author")){ 124 | lookup.tags <- xtabs( ~ x$ris[which(x$field == "author")]) 125 | if(length(lookup.tags) > 1){ 126 | replace_tags <- names(which(lookup.tags < max(lookup.tags))) 127 | replace_rows <- which(x$ris %in% replace_tags) 128 | x$field[replace_rows] <- x$ris[replace_rows] 129 | if(all(is.na(x$row_order))){ 130 | start_val <- 0 131 | }else{ 132 | start_val <- max(x$row_order, na.rm = TRUE) 133 | } 134 | x$row_order[replace_rows] <- start_val + as.numeric( 135 | as.factor(x$ris[replace_rows]) 136 | ) 137 | } 138 | } 139 | x 140 | } 141 | 142 | #' Internal function to build a tag lookup table 143 | #' @noRd 144 | #' @keywords Internal 145 | get_tag_lookup <- function(x, tag_naming){ 146 | # create the appropriate lookup file for the specified tag 147 | if(inherits(tag_naming, "data.frame")){ 148 | if(!any(colnames(tag_naming) == "order")){ 149 | tag_naming$order <- seq_len(nrow(tag_naming)) 150 | } 151 | code_lookup_thisfile <- tag_naming 152 | }else{ 153 | if(tag_naming == "none"){ 154 | ris_vals <- unique(x$ris) 155 | code_lookup_thisfile <- data.frame( 156 | code = ris_vals, 157 | field = ris_vals, 158 | order = seq_along(ris_vals), 159 | stringsAsFactors = FALSE 160 | ) 161 | }else if(tag_naming == "best_guess"){ 162 | code_lookup_thisfile <- detect_lookup(tags = unique(x$ris)) 163 | }else if(any(c("wos", "scopus", "ovid", "asp", "synthesisr") == tag_naming)){ 164 | rows <- which(synthesisr::code_lookup[, paste0("ris_", tag_naming)]) 165 | code_lookup_thisfile <- synthesisr::code_lookup[ 166 | rows, 167 | c("code", "order", "field") 168 | ] 169 | } 170 | } 171 | code_lookup_thisfile 172 | } 173 | 174 | #' Internal function to handle abstracts 175 | #' @noRd 176 | #' @keywords Internal 177 | parse_ris_abstract <- function(result){ 178 | if(length(result$abstract > 1)){ 179 | result$abstract <- paste(result$abstract, collapse = " ") 180 | result$abstract <- gsub("\\s+", " ", result$abstract) # remove multiple spaces 181 | } 182 | result 183 | } 184 | 185 | #' Internal function to handle years 186 | #' @noRd 187 | #' @keywords Internal 188 | parse_ris_year <- function(result){ 189 | if(any(names(result) == "year")){ 190 | if(any(nchar(result$year) >= 4)){ 191 | year_check <- regexpr("\\d{4}", result$year) 192 | if(any(year_check > 0)){ 193 | result$year <- substr( 194 | x = result$year[which(year_check>0)], 195 | start = year_check[1], 196 | stop = year_check[1]+3 197 | ) 198 | }else{ 199 | result$year <- "" 200 | } 201 | }else{ 202 | result$year <- "" 203 | } 204 | } 205 | result 206 | } 207 | 208 | #' Internal function to handle titles 209 | #' @noRd 210 | #' @keywords Internal 211 | parse_ris_title <- function(result){ 212 | if(any(names(result) == "title")){ 213 | if(length(result$title) > 1){ 214 | if(result$title[1] == result$title[2]){ 215 | result$title <- result$title[1] 216 | }else{ 217 | result$title <- paste(result$title, collapse = " ") 218 | } 219 | } 220 | result$title <- gsub("\\s+", " ", result$title) # remove multiple spaces 221 | result$title <- sub("\\.$", "", result$title) # remove final full stops 222 | } 223 | result 224 | } 225 | 226 | #' Internal function to handle journals 227 | #' @noRd 228 | #' @keywords Internal 229 | parse_ris_journal <- function(result){ 230 | if(any(names(result) == "journal")){ 231 | unique_journals <- unique(result$journal) 232 | if(length(unique_journals) > 1){ 233 | unique_journals <- unique_journals[order( 234 | nchar(unique_journals), 235 | decreasing = FALSE 236 | )] 237 | result$journal <- unique_journals[1] 238 | result$journal_secondary <- paste( 239 | unique_journals[c(2:length(unique_journals))], 240 | collapse = "; " 241 | ) 242 | }else{ 243 | result$journal <- unique_journals 244 | } 245 | result$journal <-gsub(" ", " ", result$journal) 246 | result$journal <-sub("\\.$", "", result$journal) 247 | } 248 | result 249 | } 250 | 251 | #' Internal function to handle page numbers 252 | #' @noRd 253 | #' @keywords Internal 254 | parse_ris_page_numbers <- function(result){ 255 | if(any(names(result) == "pages")){ 256 | if(length(result$pages) > 1){ 257 | result$pages <- paste(sort(result$pages), collapse = "-") 258 | } 259 | } 260 | result 261 | } 262 | -------------------------------------------------------------------------------- /R/prep_ris.R: -------------------------------------------------------------------------------- 1 | #' Internal function to clean a .ris file for import 2 | #' 3 | #' This function preps RIS files by cleaning common issues and converting to a 4 | #' common format. 5 | #' @param z A character vector that contains RIS bibliographic information. 6 | #' @param delimiter A string indicating the type of delimiter separating entries. 7 | #' @param type A string indicating the ris source; options are pubmed or generic. 8 | #' @return Returns a `data.frame` intended for import with `parse_ris()`. 9 | #' @noRd 10 | #' @keywords Internal 11 | prep_ris <- function( 12 | z, 13 | delimiter, 14 | type # either "pubmed" or "generic". Not specified by user 15 | ){ 16 | # detect tags 17 | if(type == "pubmed"){ 18 | ris_regex <- "^[[:upper:]]{2,4}\\s*-\\s" 19 | }else{ # i.e. generic 20 | ris_regex <- "(^([[:upper:]]{2}|[[:upper:]][[:digit:]])\\s+)|^ER$" 21 | # NOTE: "^ER$" is a bug fix for .ciw end rows 22 | } 23 | tags <- regexpr(ris_regex, perl = TRUE, z) 24 | z_dframe <- data.frame( 25 | text = z, 26 | row = seq_along(z), 27 | match_length = attr(tags, "match.length"), 28 | stringsAsFactors = FALSE 29 | ) 30 | z_list <- split(z_dframe, z_dframe$match_length) 31 | z_list <- lapply(z_list, function(a){ 32 | n <- a$match_length[1] 33 | if(n < 0){ 34 | result <- data.frame( 35 | ris = "", 36 | text = a$text, 37 | row_order = a$row, 38 | stringsAsFactors = FALSE 39 | ) 40 | }else{ 41 | result <- data.frame( 42 | ris = sub("\\s{0,}-\\s{0,}|^\\s+|\\s+$", "", substr(a$text, 1, n)), 43 | text = gsub("^\\s+|\\s+$", "", substr(a$text, n+1, nchar(a$text))), 44 | row_order = a$row, 45 | stringsAsFactors = FALSE 46 | ) 47 | } 48 | return(result) 49 | }) 50 | z_dframe <- do.call(rbind, z_list) 51 | z_dframe <- z_dframe[order(z_dframe$row), ] 52 | 53 | # clean up obvious errors 54 | z_dframe$ris <- gsub("[[:punct:]]", "", z_dframe$ris) 55 | z_dframe$text <- sub("^[[:punct:]]{0,1}\\s*", "", z_dframe$text) 56 | 57 | # replace tag information for delimiter == character | space 58 | if(delimiter == "character"){ # i.e. a single character repeated many times 59 | z_dframe$ris[which( 60 | unlist(lapply( 61 | strsplit(z, ""), 62 | function(a){ 63 | length(unique(a)) == 1 & length(a > 6) 64 | } 65 | )) 66 | )] <- "ER" 67 | } 68 | if(delimiter == "space"){ 69 | z_dframe$ris[which(z_dframe$ris == "" & z_dframe$text == "")] <- "ER" 70 | 71 | z_rollsum <- rollingsum(z_dframe$ris == "ER") 72 | if(any(z_rollsum > 1)){ 73 | z_dframe <- z_dframe[which(z_rollsum <= 1), ] 74 | } 75 | } 76 | if(delimiter == "endrow"){ 77 | # work out what most common starting tag is 78 | z_dframe$ref <- c(0, cumsum(z_dframe$ris == "ER")[ 79 | seq_len(nrow(z_dframe)-1)] 80 | ) # split by reference 81 | 82 | start_tags <- unlist(lapply( 83 | split(z_dframe$ris, z_dframe$ref), 84 | function(a){a[which(a != "")[1]]} 85 | )) 86 | 87 | # fix bug where not all entries start with same tag 88 | start_tag_xtab <- xtabs(~ start_tags ) 89 | end_rows <- which(z_dframe$ris == "ER") 90 | # previous behavior: 91 | if(max(xtabs(~ start_tags)) == length(which(z_dframe$ris == "ER"))){ 92 | start_tag <- names(which.max(xtabs(~ start_tags))) 93 | row_df <- data.frame( 94 | start = which(z_dframe$ris == start_tag), 95 | end = end_rows 96 | ) 97 | # new option: 98 | }else{ 99 | row_df <- data.frame( 100 | start = c(1, end_rows[seq_len(length(end_rows) - 1)]), 101 | end = end_rows 102 | ) 103 | } 104 | 105 | z_list <- apply( 106 | row_df, 107 | 1, 108 | function(a){c(a[1]:a[2])} 109 | ) 110 | z_list <- lapply( 111 | z_list, 112 | function(a, lookup){lookup[a, ]}, 113 | lookup = z_dframe 114 | ) 115 | z_dframe <- as.data.frame( 116 | do.call(rbind, z_list) 117 | ) 118 | } 119 | 120 | # cleaning 121 | z_dframe$ref <- c(0, cumsum(z_dframe$ris == "ER")[ 122 | seq_len(nrow(z_dframe)-1)] 123 | ) # split by reference 124 | z_dframe <- z_dframe[which(z_dframe$text != ""), ] # remove empty rows 125 | z_dframe <- z_dframe[which(z_dframe$ris != "ER"), ] # remove end rows 126 | z_dframe$text <- trimws(z_dframe$text) 127 | 128 | # fill missing tags 129 | z_split <- split(z_dframe, z_dframe$ref) 130 | z_split <- lapply(z_split, function(a){ 131 | if(a$ris[1] == ""){ 132 | a$ris[1] <- "ZZ" 133 | } 134 | accum_ris <- Reduce(c, a$ris, accumulate = TRUE) 135 | a$ris <- unlist(lapply( 136 | accum_ris, 137 | function(b){ 138 | good_vals <- which(b != "") 139 | b[good_vals[length(good_vals)]] 140 | })) 141 | return(a) 142 | }) 143 | z_dframe <- as.data.frame( 144 | do.call(rbind, z_split) 145 | ) 146 | 147 | return(z_dframe) 148 | } 149 | -------------------------------------------------------------------------------- /R/read_refs.R: -------------------------------------------------------------------------------- 1 | #' Import bibliographic search results 2 | #' 3 | #' Import common bibliographic reference formats such as `.bib`, `.ris`, or 4 | #' `.txt`. 5 | #' @param filename A path to a filename or vector of filenames containing search 6 | #' results to import. 7 | #' @param tag_naming Either a length-1 character stating how should ris tags be 8 | #' replaced (see details for a list of options), or an object inheriting from 9 | #' class `data.frame` containing user-defined replacement tags. 10 | #' @param return_df If `TRUE` (default), returns a `data.frame`; if `FALSE`, 11 | #' returns a list. 12 | #' @param verbose If `TRUE`, prints status updates (defaults to `FALSE`). 13 | #' @details The default for argument `tag_naming` is `"best_guess"`, 14 | #' which estimates what database has been used for ris tag replacement, then 15 | #' fills any gaps with generic tags. Any tags missing from the database (i.e. 16 | #' `code_lookup`) are passed unchanged. Other options are to use tags from 17 | #' Web of Science (`"wos"`), Scopus (`"scopus"`), Ovid (`"ovid"`) 18 | #' or Academic Search Premier (`"asp"`). If a `data.frame` is given, 19 | #' then it must contain two columns: `"code"` listing the original tags in 20 | #' the source document, and `"field"` listing the replacement column/tag 21 | #' names. The `data.frame` may optionally include a third column named 22 | #' `"order"`, which specifies the order of columns in the resulting 23 | #' `data.frame`; otherwise this will be taken as the row order. Finally, 24 | #' passing `"none"` to `replace_tags` suppresses tag replacement. 25 | #' @return Returns a `data.frame` or `list` of assembled search results. 26 | #' @importFrom dplyr bind_rows 27 | #' @importFrom rlang abort 28 | #' @importFrom vroom default_locale 29 | #' @example inst/examples/read_refs.R 30 | #' @export 31 | read_refs <- function( 32 | filename, 33 | tag_naming = "best_guess", 34 | return_df = TRUE, 35 | verbose = FALSE, 36 | locale = vroom::default_locale() 37 | ){ 38 | 39 | if(missing(filename)){ 40 | abort("filename is missing with no default") 41 | } 42 | file_check <- unlist(lapply(filename, file.exists)) 43 | if(any(!file_check)){ 44 | abort("file not found") 45 | } 46 | 47 | if(length(filename) > 1){ 48 | result_list <- lapply(filename, function(a){ 49 | read_ref( 50 | filename = a, 51 | tag_naming = tag_naming, 52 | return_df = return_df, 53 | verbose = verbose, 54 | locale = locale 55 | ) 56 | }) 57 | names(result_list) <- filename 58 | 59 | # drop any unrecognized file types 60 | null_check <- unlist(lapply(result_list, is.null)) 61 | if(any(null_check)){ 62 | result_list <- result_list[-which(null_check)] 63 | } 64 | 65 | if(return_df){ 66 | result <- bind_rows(result_list) 67 | result$filename <- unlist( 68 | lapply(seq_len(length(result_list)), 69 | function(a, data){ 70 | rep(names(data)[a], nrow(data[[a]])) 71 | }, 72 | data = result_list 73 | )) 74 | return(result) 75 | }else{ 76 | result <- do.call(c, result_list) 77 | return(result) 78 | } 79 | 80 | }else{ # i.e. if only one filename given 81 | return( 82 | read_ref( 83 | filename, 84 | tag_naming = tag_naming, 85 | return_df = return_df, 86 | verbose = verbose, 87 | locale = locale 88 | ) 89 | ) 90 | } 91 | } 92 | 93 | #' Internal function called by read_refs for each file 94 | #' 95 | #' @description This is the underlying workhorse function that imports 96 | #' bibliographic files; primarily intended to be called from read_refs. 97 | #' @param filename A path to a filename containing search results to import. 98 | #' @param return_df If TRUE, returns a data.frame; if FALSE, returns a list. 99 | #' @param verbose If TRUE, prints status updates. 100 | #' @return Returns a data.frame or list of assembled search results. 101 | #' @importFrom rlang abort 102 | #' @importFrom rlang warn 103 | #' @importFrom tibble tibble 104 | #' @importFrom vroom default_locale 105 | #' @importFrom vroom vroom_lines 106 | #' @noRd 107 | #' @keywords Internal 108 | read_ref <- function( 109 | filename, 110 | tag_naming = "best_guess", 111 | return_df = TRUE, 112 | verbose = FALSE, 113 | locale = default_locale() 114 | ){ 115 | 116 | # error checking for replace tags 117 | valid_tags <- c("best_guess", "none", "wos", "scopus", "ovid", "asp", "synthesisr") 118 | if(inherits(tag_naming, "character")){ 119 | if(!any(valid_tags == tag_naming)){ 120 | abort("tag_naming should be one of 'best_guess', 'none', 'wos', 'scopus', 'ovid', 'asp' or 'synthesisr'.") 121 | } 122 | } 123 | if(inherits(tag_naming, "data.frame")){ 124 | if(any(!(c("code", "field") %in% colnames(tag_naming)))){ 125 | abort("if a data.frame is supplied to replace_tags, it must contain columns 'code' & 'field'.") 126 | } 127 | } 128 | 129 | if(verbose){cat(paste0("Reading file ", filename, " ... "))} 130 | parse_function <- vroom_lines(filename, 131 | n_max = 200, 132 | locale = locale) |> 133 | detect_parser() 134 | 135 | df <- switch(parse_function, 136 | "parse_ris" = { 137 | parse_ris(x = vroom_lines(filename, locale = locale), 138 | tag_naming = tag_naming) 139 | }, 140 | "parse_pubmed" = { 141 | parse_pubmed(x = vroom_lines(filename, locale = locale)) 142 | }, 143 | "parse_bibtex" = { 144 | parse_bibtex(x = vroom_lines(filename, locale = locale)) 145 | }, 146 | "parse_csv" = { 147 | vroom(filename, 148 | delim = ",", 149 | locale = locale) |> 150 | match_columns() 151 | }, 152 | "parse_tsv" = { 153 | vroom(filename, 154 | delim = "\t", 155 | locale = locale) |> 156 | match_columns() 157 | }, 158 | { # aka "unknown" 159 | NULL 160 | } 161 | ) 162 | 163 | if(is.null(df)){ 164 | warn(paste("file type not recognised for ", filename, " - skipping")) 165 | return(NULL) 166 | } 167 | 168 | # return object in correct format 169 | # note: the `if` test here is needed because `csv` and `tsv` are already 170 | # `data.frame`s, whereas all other formats return `bibliography`s 171 | if(inherits(df, "data.frame")){ 172 | if(!return_df){df <- as.bibliography(df)} 173 | }else{ 174 | if(return_df){df <- as.data.frame(df) |> tibble()} 175 | } 176 | if(inherits(df, "data.frame")){df <- clean_df(df)} 177 | if(verbose){cat("done\n")} 178 | return(df) 179 | } 180 | -------------------------------------------------------------------------------- /R/reexports.R: -------------------------------------------------------------------------------- 1 | #' @importFrom tibble as_tibble 2 | #' @export 3 | tibble::as_tibble 4 | -------------------------------------------------------------------------------- /R/string_functions.R: -------------------------------------------------------------------------------- 1 | #' Calculate similarity between two strings 2 | #' @description These functions each access a specific `"methods"` argument 3 | #' provided by `stringdist`, and are provided for convenient calling by 4 | #' `find_duplicates()`. They do not include any new functionality beyond that 5 | #' given by `stringdist`, which you should use for your own analyses. 6 | #' @param a A character vector of items to match to b. 7 | #' @param b A character vector of items to match to a. 8 | #' @return Returns a score of same length as b, giving the dissimilarity between 9 | #' a and b. 10 | #' @importFrom stringdist stringdist 11 | #' @name string_ 12 | #' @export 13 | string_osa <- function(a, b){stringdist(a, b, method = "osa")} 14 | 15 | ## NOTE: This looks like poor coding practice. Consider deprecating. 16 | 17 | #' @rdname string_ 18 | #' @export 19 | string_lv <- function(a, b){stringdist(a, b, method = "lv")} 20 | 21 | #' @rdname string_ 22 | #' @export 23 | string_dl <- function(a, b){stringdist(a, b, method = "dl")} 24 | 25 | #' @rdname string_ 26 | #' @export 27 | string_hamming <- function(a, b){stringdist(a, b, method = "hamming")} 28 | 29 | #' @rdname string_ 30 | #' @export 31 | string_lcs <- function(a, b){stringdist(a, b, method = "lcs")} 32 | 33 | #' @rdname string_ 34 | #' @export 35 | string_qgram <- function(a, b){stringdist(a, b, method = "qgram")} 36 | 37 | #' @rdname string_ 38 | #' @export 39 | string_cosine <- function(a, b){stringdist(a, b, method = "cosine")} 40 | 41 | #' @rdname string_ 42 | #' @export 43 | string_jaccard <- function(a, b){stringdist(a, b, method = "jaccard")} 44 | 45 | #' @rdname string_ 46 | #' @export 47 | string_jw <- function(a, b){stringdist(a, b, method = "jw")} 48 | 49 | #' @rdname string_ 50 | #' @export 51 | string_soundex <- function(a, b){stringdist(a, b, method = "soundex")} 52 | -------------------------------------------------------------------------------- /R/synthesisr-package.R: -------------------------------------------------------------------------------- 1 | #' synthesisr: Import, assemble, and deduplicate bibiliographic datasets 2 | #' 3 | #' Systematic review searches include multiple databases 4 | #' that export results in a variety of formats with overlap in 5 | #' coverage between databases. To streamline the process of importing, 6 | #' assembling, and deduplicating results, `synthesisr` recognizes 7 | #' bibliographic files exported from databases commonly used for 8 | #' systematic reviews and merges results into a standardized format. 9 | #' 10 | #' @section Import & Export: 11 | #' The key task performed by `synthesisr` is flexible import and 12 | #' presentation of bibliographic data. This is typically achieved by 13 | #' `read_refs()`, which can import multiple files at once and link them together 14 | #' into a single `data.frame`. Conversely, export is via `write_refs()`. Users 15 | #' that require more detailed control can use the following functions: 16 | #' 17 | #' \itemize{ 18 | #' \item [read_refs] Read bibliographic data 19 | #' \item [write_refs] Write bibliographic data 20 | #' \item [detect_] Detect file attributes 21 | #' \item [parse_] Parse a vector containing bibliographic data 22 | #' \item [clean_] Cleaning functions for author and column names 23 | #' \item [code_lookup] A dataset of potential ris tags 24 | #' } 25 | #' 26 | #' @section Formatting: 27 | #' \itemize{ 28 | #' \item [bibliography-class] Methods for class `bibliography` 29 | #' \item [format_citation] Return a clean citation from a `bibliography` or `data.frame` 30 | #' \item [add_line_breaks] Set a maximum character width for strings 31 | #'} 32 | #' 33 | #' @section Deduplication: 34 | #' When importing from multiple databases, it is likely that there will be 35 | #' duplicates in the resulting dataset. The easiest way to deal with this 36 | #' problem in `synthesisr` is using the `deduplicate()` function; but this can 37 | #' be risky, particularly if there are no DOIs in the dataset. To get finer 38 | #' control of the deduplication process, consider using the sub-functions: 39 | #' 40 | #'\itemize{ 41 | #' \item [deduplicate] Semi-automated duplicate removal 42 | #' \item [find_duplicates] Locate potentially duplicated references 43 | #' \item [extract_unique_references] Return a data.frame with only 'unique' references 44 | #' \item [review_duplicates] Manually review potential duplicates 45 | #' \item [override_duplicates] Manually override identified duplicates 46 | #' \item [fuzz_] Fuzzy string matching c/o `fuzzywuzzy` 47 | #' \item [string_] Fuzzy string matching c/o `stringdist` 48 | #'} 49 | #' 50 | #' @section Deprecated: 51 | #' 52 | #' \itemize{ 53 | #' \item [merge_columns] Synonymous with [dplyr::bind_rows] 54 | #' } 55 | #' @name synthesisr-package 56 | #' @docType package 57 | "_PACKAGE" 58 | -------------------------------------------------------------------------------- /R/write_refs.R: -------------------------------------------------------------------------------- 1 | #' Export data to a bibliographic format 2 | #' 3 | #' @description This function exports data.frames containing bibliographic 4 | #' information to either a .ris or .bib file. 5 | #' @param x Either a data.frame containing bibliographic information or an 6 | #' object of class bibliography. 7 | #' @param file filename to save to. 8 | #' @param format What format should the data be exported as? Options are ris or 9 | #' bib. 10 | #' @param tag_naming what naming convention should be used to write RIS files? 11 | #' See details for options. 12 | #' @param write Logical should a file should be written? If FALSE returns a 13 | #' `list`. 14 | #' @return This function is typically called for it's side effect of writing a 15 | #' file in the specified location and format. If \code{write} is FALSE, returns 16 | #' a character vector containing bibliographic information in the specified 17 | #' format. 18 | #' @example inst/examples/parse_.R 19 | #' @rdname write_refs 20 | #' @importFrom rlang abort 21 | #' @export 22 | write_refs <- function( 23 | x, 24 | file, 25 | format = "ris", 26 | tag_naming = "synthesisr", 27 | write = TRUE 28 | ){ 29 | # check input data 30 | if(!inherits(x, c("bibliography", "data.frame"))) { 31 | abort("write_bibliography only accepts objects of class 'data.frame' or 'bibliography'") 32 | } 33 | if(inherits(x, "data.frame")){ 34 | x <- x |> 35 | as.data.frame() |> 36 | as.bibliography() 37 | } 38 | 39 | if(missing(file) & (write == TRUE)){ 40 | abort("`file` is missing, with no default") 41 | } 42 | 43 | # check format 44 | if(!(format %in% c("ris", "bib"))){ 45 | abort("format must be either 'ris' or 'bib'") 46 | } 47 | 48 | # check output format - consistent with read_refs 49 | if(format == "ris"){ 50 | valid_tags <- c("best_guess", "none", "wos", "scopus", "ovid", "asp", "synthesisr") 51 | if(inherits(tag_naming, "character")){ 52 | if(!any(valid_tags == tag_naming)){ 53 | abort("tag_naming should be one of 'best_guess', 'none', 'wos', 'scopus', 'ovid', 'asp' or 'synthesisr'.") 54 | } 55 | }else if(inherits(tag_naming, "data.frame")){ 56 | if(any(!(c("code", "field") %in% colnames(tag_naming)))){ 57 | abort("if a data.frame is supplied to replace_tags, it must contain columns 'code' & 'field'.") 58 | } 59 | } 60 | } 61 | 62 | # write result in correct format 63 | export <- switch(format, 64 | "bib" = {write_bib(x)}, 65 | "ris" = {write_ris(x, tag_naming = tag_naming)} 66 | ) 67 | names(export) <- NULL 68 | 69 | if(write) { 70 | write.table( 71 | export, 72 | check_filename(file), 73 | quote = FALSE, 74 | row.names = FALSE, 75 | col.names = FALSE 76 | ) 77 | }else{ 78 | invisible(return(export)) 79 | } 80 | } 81 | 82 | #' Internal function to check file names 83 | #' @noRd 84 | #' @keywords Internal 85 | check_filename <- function(x){ 86 | # check file information 87 | if(length(x) > 1){ 88 | abort("argument 'file' should be a length-1 character") 89 | } 90 | if(!inherits(x, "character")){ 91 | abort("argument 'file' should be an object of class `character`") 92 | } 93 | if(grepl("\\.[[:alpha:]]{2,4}$", x)){ 94 | filename <- x 95 | }else{ 96 | filename <- paste(x, format, sep = ".") 97 | } 98 | filename 99 | } 100 | 101 | 102 | # Parse an object of class bibliography for export in bib format 103 | #' @rdname write_refs 104 | #' @export 105 | write_bib <- function(x) { 106 | # process basic text 107 | result <- lapply(x, function(a) { 108 | if (any(names(a) == "author")) { 109 | a$author <- paste(a$author, collapse = " and ") 110 | } 111 | a <- lapply(a, function(b) { 112 | # ensure only one entry per value 113 | if (length(b) > 1) { 114 | paste(b, collapse = "; ") 115 | } else{ 116 | b 117 | } 118 | }) 119 | paste0(names(a), "={", a, "},") # format as text 120 | }) 121 | 122 | # add article identifier info 123 | export <- unlist( 124 | lapply(seq_len(length(result)), 125 | function(a, source, entry_names) { 126 | c(paste0("@ARTICLE{", entry_names[a], ","), 127 | source[a], 128 | "}", 129 | "") 130 | }, 131 | source = result, 132 | entry_names = names(x))) 133 | names(export) <- NULL 134 | return(export) 135 | 136 | } 137 | 138 | 139 | # Parse an object of class bibliography for export in ris format 140 | #' @rdname write_refs 141 | #' @export 142 | write_ris <- function(x, 143 | tag_naming = "synthesisr" 144 | ){ 145 | result <- lapply(x, function(a, lookup) { 146 | 147 | # convert to tagged vector 148 | b <- do.call(c, a) 149 | b <- b[!is.na(b)] 150 | b <- data.frame( 151 | tag = c(names(b), "end"), 152 | entry = c(b, ""), 153 | stringsAsFactors = FALSE 154 | ) 155 | rownames(b) <- NULL 156 | b$tag <- gsub("[[:digit:]]", "", b$tag) 157 | 158 | # page information needs to be treated separately 159 | if(any(b$tag == "pages")){ 160 | page_row <- which(b$tag == "pages") 161 | page_text <- b$entry[page_row] 162 | if(grepl("-", page_text)){ 163 | text_lookup <- list( 164 | regexpr("^[[:digit:]]+", page_text), 165 | regexpr("-[[:digit:]]+", page_text) 166 | ) 167 | if(all(text_lookup > 0)){ 168 | text_cleaned <- unlist(lapply( 169 | text_lookup, 170 | function(b){substr(page_text, b, b + attr(b, "match.length") - 1)} 171 | )) 172 | new_rows <- data.frame( 173 | tag = c("startpage", "endpage"), 174 | entry = gsub("[[:punct:]]", "", text_cleaned), 175 | stringsAsFactors = FALSE 176 | ) 177 | b <- as.data.frame(rbind( 178 | b[c(1:(page_row - 1)),], 179 | new_rows, 180 | b[c((page_row + 1):nrow(b)),] 181 | )) 182 | } 183 | } 184 | } 185 | b$order <- seq_len(nrow(b)) 186 | 187 | # substitute tags for ris format versions 188 | b <- merge( 189 | lookup, 190 | b, 191 | by.x = "field", 192 | by.y = "tag", 193 | all.x = FALSE, 194 | all.y = FALSE 195 | ) 196 | b <- b[order(b$order), c(2:3)] 197 | 198 | # concatenate rows, return a vector of strings 199 | return( 200 | c(paste(b$code, b$entry, sep = " - "), "ER - ", "") 201 | ) 202 | 203 | }, 204 | lookup = synthesisr::code_lookup[ 205 | synthesisr::code_lookup[, paste0("ris_", tag_naming)], 206 | c("code", "field") 207 | ] 208 | ) 209 | 210 | export <- do.call(c, result) 211 | return(export) 212 | } 213 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>" 11 | ) 12 | ``` 13 | 14 |

Tools for bibliographic data

15 | 16 | Metascientific analyses - such as systematic reviews and meta-analyses - commonly 17 | involve searches of multiple bibliographic databases. These databases use a 18 | range of different data formats, and have differing degrees of overlap in the 19 | journals and articles that they index. To streamline the process of importing, 20 | assembling, and deduplicating results, `synthesisr` recognizes the file output 21 | of commonly used databases for systematic reviews and merges results into a 22 | tibble. 23 | 24 | If you have questions, comments, feature requests, or find a bug, [please open an 25 | issue](https://github.com/mjwestgate/synthesisr/issues). 26 | 27 | ## Installation 28 | 29 | `synthesisr` is available on CRAN: 30 | 31 | ```{r} 32 | #| eval: false 33 | install.packages("synthesisr") 34 | ``` 35 | 36 | Alternatively you can install from GitHub: 37 | 38 | ```{r} 39 | #| eval: FALSE 40 | remotes::install_github("mjwestgate/synthesisr") 41 | ``` 42 | 43 | ## Basic usage 44 | 45 | The default function for importing bibliographic data is `read_refs()`: 46 | 47 | ```{r} 48 | #| eval: false 49 | x <- read_refs("a_file.bib") 50 | ``` 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

5 | Tools for bibliographic data 6 |

7 | 8 | Metascientific analyses - such as systematic reviews and meta-analyses - 9 | commonly involve searches of multiple bibliographic databases. These 10 | databases use a range of different data formats, and have differing 11 | degrees of overlap in the journals and articles that they index. To 12 | streamline the process of importing, assembling, and deduplicating 13 | results, `synthesisr` recognizes the file output of commonly used 14 | databases for systematic reviews and merges results into a tibble. 15 | 16 | If you have questions, comments, feature requests, or find a bug, 17 | [please open an issue](https://github.com/mjwestgate/synthesisr/issues). 18 | 19 | ## Installation 20 | 21 | `synthesisr` is available on CRAN: 22 | 23 | ``` r 24 | install.packages("synthesisr") 25 | ``` 26 | 27 | Alternatively you can install from GitHub: 28 | 29 | ``` r 30 | remotes::install_github("mjwestgate/synthesisr") 31 | ``` 32 | 33 | ## Basic usage 34 | 35 | The default function for importing bibliographic data is `read_refs()`: 36 | 37 | ``` r 38 | x <- read_refs("a_file.bib") 39 | ``` 40 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://martinwestgate.com/synthesisr/ 2 | template: 3 | bootstrap: 5 4 | bslib: 5 | code_font: {google: "Source Code Pro"} 6 | pkgdown-nav-height: 130px 7 | params: 8 | bootswatch: litera 9 | development: 10 | mode: auto 11 | navbar: 12 | structure: 13 | left: 14 | - home 15 | - articles 16 | - reference 17 | right: 18 | - search 19 | - news 20 | - github 21 | components: 22 | articles: 23 | text: Articles 24 | menu: 25 | - text: Overview 26 | href: articles/overview.html 27 | news: 28 | text: News 29 | href: news/index.html 30 | reference: 31 | - title: Overview 32 | contents: 33 | - synthesisr-package 34 | - title: Import & Export 35 | contents: 36 | - read_refs 37 | - write_refs 38 | - detect_ 39 | - parse_ 40 | - clean_ 41 | - code_lookup 42 | - title: Formatting 43 | contents: 44 | - bibliography-class 45 | - format_citation 46 | - add_line_breaks 47 | - title: Deduplication 48 | contents: 49 | - deduplicate 50 | - find_duplicates 51 | - extract_unique_references 52 | - review_duplicates 53 | - override_duplicates 54 | - fuzz_ 55 | - string_ 56 | - title: Deprecated 57 | contents: 58 | - merge_columns 59 | -------------------------------------------------------------------------------- /data/code_lookup.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/data/code_lookup.RData -------------------------------------------------------------------------------- /inst/examples/clean_.R: -------------------------------------------------------------------------------- 1 | df <- data.frame( 2 | X..title. = c( 3 | "EviAtlas: a tool for visualising evidence synthesis databases", 4 | "revtools: An R package to support article screening for evidence synthesis", 5 | "An automated approach to identifying search terms for systematic reviews", 6 | "Reproducible, flexible and high-throughput data extraction from primary literature"), 7 | YEAR = c("2019", "2019", "2019", "2019"), 8 | authors = c( 9 | "Haddaway et al", 10 | "Westgate", 11 | "EM Grames AND AN Stillman & MW Tingley and CS Elphick", 12 | "Pick et al") 13 | ) 14 | 15 | clean_df(df) 16 | 17 | # or use sub-functions 18 | colnames(df) <- clean_colnames(df) 19 | # colnames(df) <- clean_colnames(colnames(df)) # also works 20 | df$author <- clean_authors(df$author) 21 | 22 | -------------------------------------------------------------------------------- /inst/examples/deduplicate.R: -------------------------------------------------------------------------------- 1 | my_df <- data.frame( 2 | title = c( 3 | "EviAtlas: a tool for visualising evidence synthesis databases", 4 | "revtools: An R package to support article screening for evidence synthesis", 5 | "An automated approach to identifying search terms for systematic reviews", 6 | "Reproducible, flexible and high-throughput data extraction from primary literature", 7 | "eviatlas:tool for visualizing evidence synthesis databases.", 8 | "REVTOOLS a package to support article-screening for evidence synthsis" 9 | ), 10 | year = c("2019", "2019", "2019", "2019", NA, NA), 11 | authors = c("Haddaway et al", "Westgate", 12 | "Grames et al", "Pick et al", NA, NA), 13 | stringsAsFactors = FALSE 14 | ) 15 | 16 | # run deduplication 17 | dups <- find_duplicates( 18 | my_df$title, 19 | method = "string_osa", 20 | rm_punctuation = TRUE, 21 | to_lower = TRUE 22 | ) 23 | 24 | extract_unique_references(my_df, matches = dups) 25 | 26 | # or, in one line: 27 | deduplicate(my_df, "title", 28 | method = "string_osa", 29 | rm_punctuation = TRUE, 30 | to_lower = TRUE) 31 | -------------------------------------------------------------------------------- /inst/examples/detect_.R: -------------------------------------------------------------------------------- 1 | revtools <- c( 2 | "", 3 | "PMID- 31355546", 4 | "VI - 10", 5 | "IP - 4", 6 | "DP - 2019 Dec", 7 | "TI - revtools: An R package to support article 8 | screening for evidence synthesis.", 9 | "PG - 606-614", 10 | "LID - 10.1002/jrsm.1374 [doi]", 11 | "AU - Westgate MJ", 12 | "LA - eng", 13 | "PT - Journal Article", 14 | "JT - Research Synthesis Methods", 15 | "" 16 | ) 17 | 18 | # detect basic attributes of ris files 19 | detect_parser(revtools) 20 | detect_delimiter(revtools) 21 | 22 | # determine which tag format to use 23 | tags <- trimws(unlist(lapply( 24 | strsplit(revtools, "- "), 25 | function(a){a[1]} 26 | ))) 27 | pubmed_tag_list <- detect_lookup(tags[!is.na(tags)]) 28 | 29 | # find year data in other columns 30 | df <- as.data.frame(parse_pubmed(revtools)) 31 | df$year <- detect_year(df) 32 | -------------------------------------------------------------------------------- /inst/examples/format_citation.R: -------------------------------------------------------------------------------- 1 | roses <- c("@article{haddaway2018, 2 | title={ROSES RepOrting standards for Systematic Evidence Syntheses: 3 | pro forma, flow-diagram and descriptive summary of the plan and 4 | conduct of environmental systematic reviews and systematic maps}, 5 | author={Haddaway, Neal R and Macura, Biljana and Whaley, Paul and Pullin, Andrew S}, 6 | journal={Environmental Evidence}, 7 | volume={7}, 8 | number={1}, 9 | pages={7}, 10 | year={2018}, 11 | publisher={Springer} 12 | }") 13 | 14 | tmp <- tempfile() 15 | writeLines(roses, tmp) 16 | 17 | citation <- read_ref(tmp) 18 | format_citation(citation) 19 | -------------------------------------------------------------------------------- /inst/examples/fuzzdist.R: -------------------------------------------------------------------------------- 1 | fuzzdist("On the Origin of Species", 2 | "Of the Original Specs", 3 | method = "fuzz_m_ratio") 4 | -------------------------------------------------------------------------------- /inst/examples/merge_columns.R: -------------------------------------------------------------------------------- 1 | df_1 <- data.frame( 2 | title = c( 3 | "EviAtlas: a tool for visualising evidence synthesis databases", 4 | "revtools: An R package to support article screening for evidence synthesis" 5 | ), 6 | year = c("2019", "2019") 7 | ) 8 | 9 | df_2 <- data.frame( 10 | title = c( 11 | "An automated approach to identifying search terms for systematic reviews", 12 | "Reproducible, flexible and high-throughput data extraction from primary literature" 13 | ), 14 | authors = c("Grames et al", "Pick et al") 15 | ) 16 | 17 | merge_columns(df_1, df_2) 18 | -------------------------------------------------------------------------------- /inst/examples/parse_.R: -------------------------------------------------------------------------------- 1 | eviatlas <- c( 2 | "TY - JOUR", 3 | "AU - Haddaway, Neal R.", 4 | "AU - Feierman, Andrew", 5 | "AU - Grainger, Matthew J.", 6 | "AU - Gray, Charles T.", 7 | "AU - Tanriver-Ayder, Ezgi", 8 | "AU - Dhaubanjar, Sanita", 9 | "AU - Westgate, Martin J.", 10 | "PY - 2019", 11 | "DA - 2019/06/04", 12 | "TI - EviAtlas: a tool for visualising evidence synthesis databases", 13 | "JO - Environmental Evidence", 14 | "SP - 22", 15 | "VL - 8", 16 | "IS - 1", 17 | "SN - 2047-2382", 18 | "UR - https://doi.org/10.1186/s13750-019-0167-1", 19 | "DO - 10.1186/s13750-019-0167-1", 20 | "ID - Haddaway2019", 21 | "ER - " 22 | ) 23 | 24 | detect_parser(eviatlas) # = "parse_ris" 25 | df <- as.data.frame(parse_ris(eviatlas)) 26 | ris_out <- write_refs(df, format = "ris", file = FALSE) 27 | -------------------------------------------------------------------------------- /inst/examples/read_refs.R: -------------------------------------------------------------------------------- 1 | litsearchr <- c( 2 | "@article{grames2019, 3 | title={An automated approach to identifying search terms for 4 | systematic reviews using keyword co-occurrence networks}, 5 | author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S}, 6 | journal={Methods in Ecology and Evolution}, 7 | volume={10}, 8 | number={10}, 9 | pages={1645--1654}, 10 | year={2019}, 11 | publisher={Wiley Online Library} 12 | }" 13 | ) 14 | 15 | tmp <- tempfile() 16 | 17 | writeLines(litsearchr, tmp) 18 | 19 | df <- read_refs(tmp, return_df = TRUE, verbose = TRUE) 20 | -------------------------------------------------------------------------------- /inst/hex/Space_Mono/OFL.txt: -------------------------------------------------------------------------------- 1 | Copyright 2016 Google Inc. All Rights Reserved. 2 | 3 | This Font Software is licensed under the SIL Open Font License, Version 1.1. 4 | This license is copied below, and is also available with a FAQ at: 5 | http://scripts.sil.org/OFL 6 | 7 | 8 | ----------------------------------------------------------- 9 | SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 10 | ----------------------------------------------------------- 11 | 12 | PREAMBLE 13 | The goals of the Open Font License (OFL) are to stimulate worldwide 14 | development of collaborative font projects, to support the font creation 15 | efforts of academic and linguistic communities, and to provide a free and 16 | open framework in which fonts may be shared and improved in partnership 17 | with others. 18 | 19 | The OFL allows the licensed fonts to be used, studied, modified and 20 | redistributed freely as long as they are not sold by themselves. The 21 | fonts, including any derivative works, can be bundled, embedded, 22 | redistributed and/or sold with any software provided that any reserved 23 | names are not used by derivative works. The fonts and derivatives, 24 | however, cannot be released under any other type of license. The 25 | requirement for fonts to remain under this license does not apply 26 | to any document created using the fonts or their derivatives. 27 | 28 | DEFINITIONS 29 | "Font Software" refers to the set of files released by the Copyright 30 | Holder(s) under this license and clearly marked as such. This may 31 | include source files, build scripts and documentation. 32 | 33 | "Reserved Font Name" refers to any names specified as such after the 34 | copyright statement(s). 35 | 36 | "Original Version" refers to the collection of Font Software components as 37 | distributed by the Copyright Holder(s). 38 | 39 | "Modified Version" refers to any derivative made by adding to, deleting, 40 | or substituting -- in part or in whole -- any of the components of the 41 | Original Version, by changing formats or by porting the Font Software to a 42 | new environment. 43 | 44 | "Author" refers to any designer, engineer, programmer, technical 45 | writer or other person who contributed to the Font Software. 46 | 47 | PERMISSION & CONDITIONS 48 | Permission is hereby granted, free of charge, to any person obtaining 49 | a copy of the Font Software, to use, study, copy, merge, embed, modify, 50 | redistribute, and sell modified and unmodified copies of the Font 51 | Software, subject to the following conditions: 52 | 53 | 1) Neither the Font Software nor any of its individual components, 54 | in Original or Modified Versions, may be sold by itself. 55 | 56 | 2) Original or Modified Versions of the Font Software may be bundled, 57 | redistributed and/or sold with any software, provided that each copy 58 | contains the above copyright notice and this license. These can be 59 | included either as stand-alone text files, human-readable headers or 60 | in the appropriate machine-readable metadata fields within text or 61 | binary files as long as those fields can be easily viewed by the user. 62 | 63 | 3) No Modified Version of the Font Software may use the Reserved Font 64 | Name(s) unless explicit written permission is granted by the corresponding 65 | Copyright Holder. This restriction only applies to the primary font name as 66 | presented to the users. 67 | 68 | 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font 69 | Software shall not be used to promote, endorse or advertise any 70 | Modified Version, except to acknowledge the contribution(s) of the 71 | Copyright Holder(s) and the Author(s) or with their explicit written 72 | permission. 73 | 74 | 5) The Font Software, modified or unmodified, in part or in whole, 75 | must be distributed entirely under this license, and must not be 76 | distributed under any other license. The requirement for fonts to 77 | remain under this license does not apply to any document created 78 | using the Font Software. 79 | 80 | TERMINATION 81 | This license becomes null and void if any of the above conditions are 82 | not met. 83 | 84 | DISCLAIMER 85 | THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF 87 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 88 | OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE 89 | COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 90 | INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL 91 | DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 92 | FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM 93 | OTHER DEALINGS IN THE FONT SOFTWARE. 94 | -------------------------------------------------------------------------------- /inst/hex/Space_Mono/SpaceMono-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/inst/hex/Space_Mono/SpaceMono-Bold.ttf -------------------------------------------------------------------------------- /inst/hex/Space_Mono/SpaceMono-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/inst/hex/Space_Mono/SpaceMono-Regular.ttf -------------------------------------------------------------------------------- /inst/hex/hex.R: -------------------------------------------------------------------------------- 1 | # draw a hex sticker for `synthesisr` 2 | library(tibble) 3 | library(dplyr) 4 | library(string2path) 5 | library(showtext) 6 | library(sf) 7 | library(ggplot2) 8 | library(hexSticker) 9 | # library(viridis) 10 | # remotes::install_github("johannesbjork/LaCroixColoR") 11 | # library(LaCroixColoR) 12 | 13 | # get 'synthesisr' text as a polygon 14 | final_size <- 1.4 15 | synth_line <- string2path("synthesisr", 16 | font = "inst/hex/Space_Mono/SpaceMono-Bold.ttf") |> 17 | tibble::rowid_to_column() |> 18 | tibble() |> 19 | mutate(x = x - min(x), y = y - min(y)) |> # place both mins at 0 20 | mutate(y = y / max(x), x = x / max(x)) |> # now at x = c(0, 1) 21 | mutate(x = (x * final_size) - (final_size * 0.5), y = y * final_size) |> # scale to required size 22 | mutate(y = y - (max(y) * 0.5)) # centre vertically 23 | 24 | # convert to `sf` object to allow calculation of spatial properties 25 | text_polygons <- synth_line %>% 26 | st_as_sf(coords = c("x", "y")) |> 27 | group_by(path_id) |> 28 | summarise(geometry = st_combine(geometry)) |> 29 | st_cast("POLYGON") 30 | 31 | # need to clip 6 ('e') with 7 (inside of 'e') 32 | text_cutouts <- text_polygons[7, ] 33 | text_polygons <- text_polygons[-7, ] 34 | words <- st_difference(text_polygons, text_cutouts) 35 | 36 | # clean up 37 | rm(final_size, synth_line, text_polygons, text_cutouts) 38 | 39 | # now create hexagons 40 | # from hexSticker, but using sf objects 41 | create_hexagon <- function(scale = 1){ 42 | hexd <- data.frame(x = 1+c(rep(-sqrt(3)/2, 2), 0, rep(sqrt(3)/2, 2), 0), 43 | y = 1+c(0.5, -0.5, -1, -0.5, 0.5, 1)) 44 | rbind(hexd, hexd[1, ]) |> 45 | tibble() |> 46 | mutate(x = (x - 1) * scale, 47 | y = (y - 1) * scale) |> 48 | st_as_sf(coords = c("x", "y")) |> 49 | summarise(geometry = st_combine(geometry)) %>% 50 | st_cast("POLYGON") 51 | } 52 | 53 | external_hexagon <- create_hexagon(scale = 1.00) 54 | internal_hexagon <- create_hexagon(scale = 0.935) 55 | 56 | # now create vertical lines that intersect with words 57 | x_vec <- seq(-0.87, 0.87, by = 0.005) 58 | result_internal <- lapply(x_vec, function(a){ 59 | b <- data.frame(x = a, y = c(-1, 1)) |> 60 | st_as_sf(coords = c("x", "y")) |> 61 | summarise(geometry = st_combine(geometry)) |> 62 | st_cast("LINESTRING") |> 63 | st_intersection(words) 64 | 65 | tibble(x = a, length = sum(st_length(b))) 66 | }) |> 67 | bind_rows() 68 | 69 | result_external <- lapply(x_vec, function(a){ 70 | b <- data.frame(x = a, y = c(-1, 1)) |> 71 | st_as_sf(coords = c("x", "y")) |> 72 | summarise(geometry = st_combine(geometry)) |> 73 | st_cast("LINESTRING") |> 74 | st_intersection(internal_hexagon) 75 | 76 | b |> mutate(x = a) 77 | }) |> 78 | bind_rows() 79 | 80 | # merge 81 | background_lines <- left_join(result_external, 82 | result_internal, 83 | by = "x") 84 | 85 | # clean up 86 | rm(x_vec) 87 | 88 | # draw 89 | font_add("spacemono", "inst/hex/Space_Mono/SpaceMono-Regular.ttf") 90 | showtext_auto() 91 | 92 | edge_color <- "#000000" # "#b951c9" 93 | # palette <- lacroix_palette("CranRaspberry", n = 15, type = "continuous") |> 94 | # as.character() 95 | 96 | # example colors: 97 | # x <- lacroix_palette("CranRaspberry", n = 7, type = "continuous") |> as.character() 98 | simple_palette <- c("#c92029", 99 | "#a3086a", 100 | "#6c159e", 101 | "#0a238a") 102 | 103 | p <- ggplot() + 104 | geom_sf(data = external_hexagon, fill = "white", color = NA) + 105 | geom_sf(data = background_lines, 106 | mapping = aes( 107 | color = x, 108 | alpha = (length ^ 1.2)), 109 | linewidth = 0.3) + 110 | geom_sf(data = internal_hexagon, fill = NA, color = edge_color, linewidth = 0.1) + 111 | geom_sf(data = words, fill = "white", color = edge_color, linewidth = 0.1) + 112 | annotate(geom = "text", 113 | x = 0.7, 114 | y = -0.17, 115 | label = "mjwestgate", 116 | family = "spacemono", 117 | size = 8, 118 | hjust = 1, 119 | color = "#ffffff") + 120 | # geom_vline(xintercept = 0.35) + 121 | scale_colour_gradientn(colors = simple_palette) + 122 | # scale_color_viridis(option = "H") + 123 | scale_alpha(range = c(0.5, 1)) + 124 | # scale_color_gradient(low = "#800194", high = "#b951c9") + 125 | theme_void() + 126 | theme(legend.position = "none") 127 | 128 | ggsave("man/figures/logo.png", 129 | p, 130 | width = 43.9, 131 | height = 50.8, 132 | units = "mm", 133 | bg = "transparent", 134 | dpi = 600) 135 | -------------------------------------------------------------------------------- /inst/test-data/test_files.R: -------------------------------------------------------------------------------- 1 | eviatlas <- c( 2 | "TY - JOUR", 3 | "AU - Haddaway, Neal R.", 4 | "AU - Feierman, Andrew", 5 | "AU - Grainger, Matthew J.", 6 | "AU - Gray, Charles T.", 7 | "AU - Tanriver-Ayder, Ezgi", 8 | "AU - Dhaubanjar, Sanita", 9 | "AU - Westgate, Martin J.", 10 | "PY - 2019", 11 | "DA - 2019/06/04", 12 | "TI - EviAtlas: a tool for visualising evidence synthesis databases", 13 | "JO - Environmental Evidence", 14 | "SP - 22", 15 | "VL - 8", 16 | "IS - 1", 17 | "SN - 2047-2382", 18 | "UR - https://doi.org/10.1186/s13750-019-0167-1", 19 | "DO - 10.1186/s13750-019-0167-1", 20 | "ID - Haddaway2019", 21 | "ER - " 22 | ) 23 | 24 | litsearchr <- c( 25 | "@article{grames2019automated, 26 | title={An automated approach to identifying search terms for systematic reviews using keyword co-occurrence networks}, 27 | author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S}, 28 | journal={Methods in Ecology and Evolution}, 29 | volume={10}, 30 | number={10}, 31 | pages={1645--1654}, 32 | year={2019}, 33 | publisher={Wiley Online Library} 34 | }" 35 | ) 36 | 37 | res_synth_methods <- 38 | c( 39 | "", 40 | "PMID- 32336025", 41 | "OWN - NLM", 42 | "STAT- Publisher", 43 | "LR - 20200426", 44 | "IS - 1759-2887 (Electronic)", 45 | "IS - 1759-2879 (Linking)", 46 | "DP - 2020 Apr 26", 47 | "TI - Risk-Of-Bias VISualization (robvis): an R package and Shiny web app for", 48 | " visualizing risk-of-bias assessments.", 49 | "LID - 10.1002/jrsm.1411 [doi]", 50 | "AB - Despite a major increase in the range and number of software offerings now", 51 | " available to help researchers produce evidence syntheses, there is currently no", 52 | " generic tool for producing figures to display and explore the risk-of-bias", 53 | " assessments that routinely take place as part of systematic review. However,", 54 | " tools such as the R programming environment and Shiny (an R package for building ", 55 | " interactive web apps) have made it straightforward to produce new tools to help", 56 | " in producing evidence syntheses. We present a new tool, robvis (Risk-Of-Bias", 57 | " VISualization), available as an R package and web app, which facilitates rapid", 58 | " production of publication-quality risk-of-bias assessment figures. We present a", 59 | " timeline of the tool's development and its key functionality. This article is", 60 | " protected by copyright. All rights reserved.", 61 | "CI - This article is protected by copyright. All rights reserved.", 62 | "FAU - McGuinness, Luke A", 63 | "AU - McGuinness LA", 64 | "AUID- ORCID: https://orcid.org/0000-0001-8730-9761", 65 | "AD - MRC Integrative Epidemiology Unit at the University of Bristol, Bristol, UK.", 66 | "AD - Population Health Sciences, Bristol Medical School, University of Bristol,", 67 | " Bristol, UK.", 68 | "FAU - Higgins, Julian Pt", 69 | "AU - Higgins JP", 70 | "AD - MRC Integrative Epidemiology Unit at the University of Bristol, Bristol, UK.", 71 | "AD - Population Health Sciences, Bristol Medical School, University of Bristol,", 72 | " Bristol, UK.", 73 | "LA - eng", 74 | "PT - Journal Article", 75 | "DEP - 20200426", 76 | "PL - England", 77 | "TA - Res Synth Methods", 78 | "JT - Research synthesis methods", 79 | "JID - 101543738", 80 | "SB - IM", 81 | "OTO - NOTNLM", 82 | "OT - Data visualization", 83 | "OT - Evidence synthesis", 84 | "OT - R", 85 | "OT - Risk of bias", 86 | "EDAT- 2020/04/27 06:00", 87 | "MHDA- 2020/04/27 06:00", 88 | "CRDT- 2020/04/27 06:00", 89 | "PHST- 2020/02/27 00:00 [received]", 90 | "PHST- 2020/04/16 00:00 [revised]", 91 | "PHST- 2020/04/18 00:00 [accepted]", 92 | "PHST- 2020/04/27 06:00 [entrez]", 93 | "PHST- 2020/04/27 06:00 [pubmed]", 94 | "PHST- 2020/04/27 06:00 [medline]", 95 | "AID - 10.1002/jrsm.1411 [doi]", 96 | "PST - aheadofprint", 97 | "SO - Res Synth Methods. 2020 Apr 26. doi: 10.1002/jrsm.1411.", 98 | "", 99 | "PMID- 31355546", 100 | "OWN - NLM", 101 | "STAT- In-Process", 102 | "LR - 20200226", 103 | "IS - 1759-2887 (Electronic)", 104 | "IS - 1759-2879 (Linking)", 105 | "VI - 10", 106 | "IP - 4", 107 | "DP - 2019 Dec", 108 | "TI - revtools: An R package to support article screening for evidence synthesis.", 109 | "PG - 606-614", 110 | "LID - 10.1002/jrsm.1374 [doi]", 111 | "AB - The field of evidence synthesis is growing rapidly, with a corresponding increase", 112 | " in the number of software tools and workflows to support the construction of", 113 | " systematic reviews, systematic maps, and meta-analyses. Despite much progress,", 114 | " however, a number of problems remain, including slow integration of new", 115 | " statistical or methodological approaches into user-friendly software, low", 116 | " prevalence of open-source software, and poor integration among distinct software ", 117 | " tools. These issues hinder the utility and transparency of new methods to the", 118 | " research community. Here, I present revtools, an R package to support article", 119 | " screening during evidence synthesis projects. It provides tools for the import", 120 | " and deduplication of bibliographic data, screening of articles by title or", 121 | " abstract, and visualization of article content using topic models. The software", 122 | " is entirely open-source and combines command-line scripting for experienced", 123 | " programmers with custom-built user interfaces for casual users, with further", 124 | " methods to support article screening to be added over time. revtools provides", 125 | " free access to novel methods in an open-source environment and represents a", 126 | " valuable step in expanding the capacity of R to support evidence synthesis", 127 | " projects.", 128 | "CI - (c) 2019 John Wiley & Sons, Ltd.", 129 | "FAU - Westgate, Martin J", 130 | "AU - Westgate MJ", 131 | "AUID- ORCID: https://orcid.org/0000-0003-0854-2034", 132 | "AD - Fenner School of Environment & Society, The Australian National University,", 133 | " Acton, ACT, Australia.", 134 | "LA - eng", 135 | "PT - Journal Article", 136 | "DEP - 20191018", 137 | "PL - England", 138 | "TA - Res Synth Methods", 139 | "JT - Research synthesis methods", 140 | "JID - 101543738", 141 | "SB - IM", 142 | "OTO - NOTNLM", 143 | "OT - data visualization", 144 | "OT - meta-analysis", 145 | "OT - natural language processing", 146 | "OT - systematic review", 147 | "OT - topic models", 148 | "EDAT- 2019/07/30 06:00", 149 | "MHDA- 2019/07/30 06:00", 150 | "CRDT- 2019/07/30 06:00", 151 | "PHST- 2019/02/25 00:00 [received]", 152 | "PHST- 2019/06/12 00:00 [revised]", 153 | "PHST- 2019/07/23 00:00 [accepted]", 154 | "PHST- 2019/07/30 06:00 [pubmed]", 155 | "PHST- 2019/07/30 06:00 [medline]", 156 | "PHST- 2019/07/30 06:00 [entrez]", 157 | "AID - 10.1002/jrsm.1374 [doi]", 158 | "PST - ppublish", 159 | "SO - Res Synth Methods. 2019 Dec;10(4):606-614. doi: 10.1002/jrsm.1374. Epub 2019 Oct ", 160 | " 18." 161 | ) 162 | 163 | writeLines(eviatlas, "tests/testthat/testdata/eviatlas.txt") 164 | writeLines(litsearchr, "tests/testthat/testdata/litsearchr.txt") 165 | writeLines(res_synth_methods, "tests/testthat/testdata/res_synth_methods.txt") 166 | -------------------------------------------------------------------------------- /man/add_line_breaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/add_line_breaks.R 3 | \name{add_line_breaks} 4 | \alias{add_line_breaks} 5 | \title{Add line breaks to one or more strings} 6 | \usage{ 7 | add_line_breaks(x, n = 50, max_n = NULL, html = FALSE, max_time = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{Either a string or a vector; if the vector is not of class character 11 | if will be coerced to one using \code{as.character()}.} 12 | 13 | \item{n}{Numeric: The desired number of characters that should separate 14 | consecutive line breaks.} 15 | 16 | \item{max_n}{DEPRECATED: If provided will currently overwrite \code{n}; otherwise 17 | synonymous with \code{n} and will be removed from future versions.} 18 | 19 | \item{html}{Logical: Should the line breaks be specified in html?} 20 | 21 | \item{max_time}{DEPRECATED: Previously the maximum amount of time (in 22 | seconds) allowed to adjust groups until character thresholds are reached. 23 | Ignored.} 24 | } 25 | \value{ 26 | Returns the input vector unaltered except for the addition of line 27 | breaks. 28 | } 29 | \description{ 30 | This function takes a vector of strings and adds line breaks 31 | every n characters. Primarily built to be called internally by 32 | \code{format_citation()}, this function has been made available as it can be 33 | useful in other contexts. 34 | } 35 | \details{ 36 | Line breaks are only added between words, so the value of n is 37 | actually a threshold value rather than being matched exactly. 38 | } 39 | \examples{ 40 | add_line_breaks(c("On the Origin of Species"), n = 10) 41 | } 42 | -------------------------------------------------------------------------------- /man/bibliography-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bibliography_functions.R 3 | \name{bibliography-class} 4 | \alias{bibliography-class} 5 | \alias{summary.bibliography} 6 | \alias{summary.bibliography,} 7 | \alias{print.bibliography,} 8 | \alias{c.bibliography,} 9 | \alias{as.data.frame.bibliography} 10 | \alias{print.bibliography} 11 | \alias{[.bibliography} 12 | \alias{c.bibliography} 13 | \alias{as.bibliography} 14 | \alias{as_tibble.bibliography} 15 | \title{bibliography-class} 16 | \usage{ 17 | \method{summary}{bibliography}(object, ...) 18 | 19 | \method{print}{bibliography}(x, n, ...) 20 | 21 | \method{[}{bibliography}(x, n) 22 | 23 | \method{c}{bibliography}(...) 24 | 25 | \method{as.data.frame}{bibliography}(x, ...) 26 | 27 | as.bibliography(x, ...) 28 | 29 | \method{as_tibble}{bibliography}(x, ..., .rows, .name_repair, rownames) 30 | } 31 | \arguments{ 32 | \item{object}{An object of class 'bibliography'} 33 | 34 | \item{...}{Any further information} 35 | 36 | \item{x}{An object of class 'bibliography'} 37 | 38 | \item{n}{Number of items to select/print} 39 | 40 | \item{.rows}{currently ignored} 41 | 42 | \item{.name_repair}{currently ignored} 43 | 44 | \item{rownames}{currently ignored} 45 | } 46 | \description{ 47 | This is a small number of standard methods for interacting with class 'bibliography'. More may be added later. 48 | } 49 | \details{ 50 | Methods for class bibliography 51 | } 52 | -------------------------------------------------------------------------------- /man/clean_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clean_functions.R 3 | \name{clean_} 4 | \alias{clean_} 5 | \alias{clean_df} 6 | \alias{clean_authors} 7 | \alias{clean_colnames} 8 | \title{Clean a \code{tibble} or vector} 9 | \usage{ 10 | clean_df(data) 11 | 12 | clean_authors(x) 13 | 14 | clean_colnames(x) 15 | } 16 | \arguments{ 17 | \item{data}{A \code{tibble} with bibliographic information.} 18 | 19 | \item{x}{A vector of strings} 20 | } 21 | \value{ 22 | Returns the input, but cleaner. 23 | } 24 | \description{ 25 | Cleans column and author names 26 | } 27 | \examples{ 28 | df <- data.frame( 29 | X..title. = c( 30 | "EviAtlas: a tool for visualising evidence synthesis databases", 31 | "revtools: An R package to support article screening for evidence synthesis", 32 | "An automated approach to identifying search terms for systematic reviews", 33 | "Reproducible, flexible and high-throughput data extraction from primary literature"), 34 | YEAR = c("2019", "2019", "2019", "2019"), 35 | authors = c( 36 | "Haddaway et al", 37 | "Westgate", 38 | "EM Grames AND AN Stillman & MW Tingley and CS Elphick", 39 | "Pick et al") 40 | ) 41 | 42 | clean_df(df) 43 | 44 | # or use sub-functions 45 | colnames(df) <- clean_colnames(df) 46 | # colnames(df) <- clean_colnames(colnames(df)) # also works 47 | df$author <- clean_authors(df$author) 48 | 49 | } 50 | -------------------------------------------------------------------------------- /man/code_lookup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/code_lookup.R 3 | \docType{data} 4 | \name{code_lookup} 5 | \alias{code_lookup} 6 | \title{Bibliographic code lookup for search results assembly} 7 | \format{ 8 | A \code{data.frame} with 226 obs of 12 variables 9 | 10 | \describe{ 11 | \item{code}{code used in search results} 12 | \item{order}{the order in which to rank fields in assembled results} 13 | \item{category_description}{type of bibliographic data} 14 | \item{entry_description}{description of field} 15 | \item{field}{bibliographic field that codes correspond to} 16 | \item{ris_generic}{logical: If the code is used in generic ris files} 17 | \item{ris_wos}{logical: If the code is used in Web of Science ris files} 18 | \item{ris_pubmed}{logical: If the code is used in PubMed ris files} 19 | \item{ris_scopus}{logical: If the code is used in Scopus ris files} 20 | \item{ris_asp}{logical: If the code is used in Academic Search Premier ris files} 21 | \item{ris_ovid}{logical: If the code is used in Ovid ris files} 22 | \item{ris_synthesisr}{logical: If the code used in synthesisr imports & exports}} 23 | } 24 | \usage{ 25 | code_lookup 26 | } 27 | \description{ 28 | A data frame that can be used to look up common codes for different 29 | bibliographic fields across databases and merge them to a common format. 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /man/deduplicate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deduplication_functions.R 3 | \name{deduplicate} 4 | \alias{deduplicate} 5 | \title{Remove duplicates from a bibliographic data set} 6 | \usage{ 7 | deduplicate(data, match_by, method, type = "merge", ...) 8 | } 9 | \arguments{ 10 | \item{data}{A \code{data.frame} containing bibliographic information.} 11 | 12 | \item{match_by}{Name of the column in \code{data} where duplicates should be sought.} 13 | 14 | \item{method}{The duplicate detection function to use; see 15 | \code{link{string_}} or \code{link{fuzz_}} for examples. Passed to 16 | \code{find_duplicates()}.} 17 | 18 | \item{type}{How should entries be selected? Default is \code{"merge"} which 19 | selects the entries with the largest number of characters in each column. 20 | Alternatively \code{"select"} returns the row with the highest total number of 21 | characters.} 22 | 23 | \item{\dots}{Arguments passed to \code{find_duplicates()}.} 24 | } 25 | \value{ 26 | A \code{data.frame} containing data identified as unique. 27 | } 28 | \description{ 29 | Removes duplicates using sensible defaults 30 | } 31 | \details{ 32 | This is a wrapper function to \code{find_duplicates()} and 33 | \code{extract_unique_references()}, which tries to choose some sensible defaults. 34 | Use with care. 35 | } 36 | \examples{ 37 | my_df <- data.frame( 38 | title = c( 39 | "EviAtlas: a tool for visualising evidence synthesis databases", 40 | "revtools: An R package to support article screening for evidence synthesis", 41 | "An automated approach to identifying search terms for systematic reviews", 42 | "Reproducible, flexible and high-throughput data extraction from primary literature", 43 | "eviatlas:tool for visualizing evidence synthesis databases.", 44 | "REVTOOLS a package to support article-screening for evidence synthsis" 45 | ), 46 | year = c("2019", "2019", "2019", "2019", NA, NA), 47 | authors = c("Haddaway et al", "Westgate", 48 | "Grames et al", "Pick et al", NA, NA), 49 | stringsAsFactors = FALSE 50 | ) 51 | 52 | # run deduplication 53 | dups <- find_duplicates( 54 | my_df$title, 55 | method = "string_osa", 56 | rm_punctuation = TRUE, 57 | to_lower = TRUE 58 | ) 59 | 60 | extract_unique_references(my_df, matches = dups) 61 | 62 | # or, in one line: 63 | deduplicate(my_df, "title", 64 | method = "string_osa", 65 | rm_punctuation = TRUE, 66 | to_lower = TRUE) 67 | } 68 | \seealso{ 69 | \code{find_duplicates()} and \code{extract_unique_references()} for underlying 70 | functions. 71 | } 72 | -------------------------------------------------------------------------------- /man/detect_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detect_functions.R 3 | \name{detect_} 4 | \alias{detect_} 5 | \alias{detect_parser} 6 | \alias{detect_delimiter} 7 | \alias{detect_lookup} 8 | \alias{detect_year} 9 | \title{Detect file formatting information} 10 | \usage{ 11 | detect_parser(x) 12 | 13 | detect_delimiter(x) 14 | 15 | detect_lookup(tags) 16 | 17 | detect_year(df) 18 | } 19 | \arguments{ 20 | \item{x}{A character vector containing bibliographic data} 21 | 22 | \item{tags}{A character vector containing RIS tags.} 23 | 24 | \item{df}{a data.frame containing bibliographic data} 25 | } 26 | \value{ 27 | \code{detect_parser()} and \code{detect_delimiter()} return a length-1 28 | character; \code{detect_year()} returns a character vector listing estimated 29 | publication years; and \code{detect_lookup()} returns a \code{data.frame.} 30 | } 31 | \description{ 32 | Bibliographic data can be stored in a number of different file 33 | types, meaning that detecting consistent attributes of those files is 34 | necessary if they are to be parsed accurately. These functions attempt to 35 | identify some of those key file attributes. Specifically, \code{detect_parser()} 36 | determines which \link{parse_} function to use; \code{detect_delimiter()} 37 | and \code{detect_lookup()} identify different attributes of RIS files; and 38 | \code{detect_year()} attempts to fill gaps in publication years from other 39 | information stored in a \code{tibble}. 40 | } 41 | \examples{ 42 | revtools <- c( 43 | "", 44 | "PMID- 31355546", 45 | "VI - 10", 46 | "IP - 4", 47 | "DP - 2019 Dec", 48 | "TI - revtools: An R package to support article 49 | screening for evidence synthesis.", 50 | "PG - 606-614", 51 | "LID - 10.1002/jrsm.1374 [doi]", 52 | "AU - Westgate MJ", 53 | "LA - eng", 54 | "PT - Journal Article", 55 | "JT - Research Synthesis Methods", 56 | "" 57 | ) 58 | 59 | # detect basic attributes of ris files 60 | detect_parser(revtools) 61 | detect_delimiter(revtools) 62 | 63 | # determine which tag format to use 64 | tags <- trimws(unlist(lapply( 65 | strsplit(revtools, "- "), 66 | function(a){a[1]} 67 | ))) 68 | pubmed_tag_list <- detect_lookup(tags[!is.na(tags)]) 69 | 70 | # find year data in other columns 71 | df <- as.data.frame(parse_pubmed(revtools)) 72 | df$year <- detect_year(df) 73 | } 74 | -------------------------------------------------------------------------------- /man/extract_unique_references.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deduplication_functions.R 3 | \name{extract_unique_references} 4 | \alias{extract_unique_references} 5 | \title{Remove duplicates from a bibliographic data set} 6 | \usage{ 7 | extract_unique_references(data, matches, type = "merge") 8 | } 9 | \arguments{ 10 | \item{data}{A \code{data.frame} containing bibliographic information.} 11 | 12 | \item{matches}{A vector showing which entries in \code{data} are duplicates.} 13 | 14 | \item{type}{How should entries be selected to retain? Default is \code{"merge"}, 15 | which selects the entries with the largest number of characters in each 16 | column. Alternatively, \code{"select"} returns the row with the highest total 17 | number of characters.} 18 | } 19 | \value{ 20 | Returns a \code{data.frame} of unique references. 21 | } 22 | \description{ 23 | Given a list of duplicate entries and a data set, this function 24 | extracts only unique references. 25 | } 26 | \examples{ 27 | my_df <- data.frame( 28 | title = c( 29 | "EviAtlas: a tool for visualising evidence synthesis databases", 30 | "revtools: An R package to support article screening for evidence synthesis", 31 | "An automated approach to identifying search terms for systematic reviews", 32 | "Reproducible, flexible and high-throughput data extraction from primary literature", 33 | "eviatlas:tool for visualizing evidence synthesis databases.", 34 | "REVTOOLS a package to support article-screening for evidence synthsis" 35 | ), 36 | year = c("2019", "2019", "2019", "2019", NA, NA), 37 | authors = c("Haddaway et al", "Westgate", 38 | "Grames et al", "Pick et al", NA, NA), 39 | stringsAsFactors = FALSE 40 | ) 41 | 42 | # run deduplication 43 | dups <- find_duplicates( 44 | my_df$title, 45 | method = "string_osa", 46 | rm_punctuation = TRUE, 47 | to_lower = TRUE 48 | ) 49 | 50 | extract_unique_references(my_df, matches = dups) 51 | 52 | # or, in one line: 53 | deduplicate(my_df, "title", 54 | method = "string_osa", 55 | rm_punctuation = TRUE, 56 | to_lower = TRUE) 57 | } 58 | \seealso{ 59 | \code{find_duplicates()}, \code{deduplicate()} 60 | } 61 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/man/figures/logo.png -------------------------------------------------------------------------------- /man/find_duplicates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deduplication_functions.R 3 | \name{find_duplicates} 4 | \alias{find_duplicates} 5 | \title{Detect duplicate values} 6 | \usage{ 7 | find_duplicates( 8 | data, 9 | method = "exact", 10 | group_by, 11 | threshold, 12 | to_lower = FALSE, 13 | rm_punctuation = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{A character vector containing duplicate bibliographic entries.} 18 | 19 | \item{method}{A string indicating how matching should be calculated. Either 20 | \code{"exact"} for exact matching (the default), or the name of a function for 21 | calculating string distance.} 22 | 23 | \item{group_by}{An optional vector, data.frame or list containing data to use 24 | as 'grouping' variables; that is, categories within which duplicates should 25 | be sought. Defaults to NULL, in which case all entries are compared against 26 | all others. Ignored if \code{method = "exact"}.} 27 | 28 | \item{threshold}{Numeric: the cutoff threshold for deciding if two strings 29 | are duplicates. Sensible values depend on the \code{method} chosen. Defaults to 5 30 | if \code{method = "string_osa"} and must be specified in all other instances 31 | except \code{method = "exact"} (where no threshold is required).} 32 | 33 | \item{to_lower}{Logical: Should all entries be converted to lower case before 34 | calculating string distance? Defaults to \code{FALSE.}} 35 | 36 | \item{rm_punctuation}{Logical: Should punctuation should be removed before 37 | calculating string distance? Defaults to \code{FALSE.}} 38 | } 39 | \value{ 40 | Returns a vector of duplicate matches, with \code{attributes} listing 41 | methods used. 42 | } 43 | \description{ 44 | Identifies duplicate bibliographic entries using different duplicate 45 | detection methods. 46 | } 47 | \examples{ 48 | my_df <- data.frame( 49 | title = c( 50 | "EviAtlas: a tool for visualising evidence synthesis databases", 51 | "revtools: An R package to support article screening for evidence synthesis", 52 | "An automated approach to identifying search terms for systematic reviews", 53 | "Reproducible, flexible and high-throughput data extraction from primary literature", 54 | "eviatlas:tool for visualizing evidence synthesis databases.", 55 | "REVTOOLS a package to support article-screening for evidence synthsis" 56 | ), 57 | year = c("2019", "2019", "2019", "2019", NA, NA), 58 | authors = c("Haddaway et al", "Westgate", 59 | "Grames et al", "Pick et al", NA, NA), 60 | stringsAsFactors = FALSE 61 | ) 62 | 63 | # run deduplication 64 | dups <- find_duplicates( 65 | my_df$title, 66 | method = "string_osa", 67 | rm_punctuation = TRUE, 68 | to_lower = TRUE 69 | ) 70 | 71 | extract_unique_references(my_df, matches = dups) 72 | 73 | # or, in one line: 74 | deduplicate(my_df, "title", 75 | method = "string_osa", 76 | rm_punctuation = TRUE, 77 | to_lower = TRUE) 78 | } 79 | \seealso{ 80 | \code{\link{string_}} or \code{\link{fuzz_}} for suitable functions 81 | to pass to \code{methods}; \code{\link{extract_unique_references}} and 82 | \code{\link{deduplicate}} for higher-level functions. 83 | } 84 | -------------------------------------------------------------------------------- /man/format_citation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/format_citation.R 3 | \name{format_citation} 4 | \alias{format_citation} 5 | \title{Format a citation} 6 | \usage{ 7 | format_citation( 8 | data, 9 | details = TRUE, 10 | abstract = FALSE, 11 | add_html = FALSE, 12 | line_breaks = FALSE, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{An object of class \code{data.frame}, \code{list}, or \code{bibliography.}} 18 | 19 | \item{details}{Logical: Should identifying information such as author names & 20 | journal titles be displayed? Defaults to \code{TRUE}.} 21 | 22 | \item{abstract}{Logical: Should the abstract be shown (if available)? 23 | Defaults to \code{FALSE.}} 24 | 25 | \item{add_html}{Logical: Should the journal title be italicized using html 26 | codes? Defaults to \code{FALSE}.} 27 | 28 | \item{line_breaks}{Either logical, stating whether line breaks should be 29 | added, or numeric stating how many characters should separate consecutive 30 | line breaks. Defaults to \code{FALSE}.} 31 | 32 | \item{...}{any other arguments.} 33 | } 34 | \value{ 35 | Returns a string of length equal to \code{length(data)} that contains 36 | formatted citations. 37 | } 38 | \description{ 39 | This function takes an object of class \code{data.frame}, \code{list}, or 40 | \code{bibliography} and returns a formatted citation. 41 | } 42 | \examples{ 43 | roses <- c("@article{haddaway2018, 44 | title={ROSES RepOrting standards for Systematic Evidence Syntheses: 45 | pro forma, flow-diagram and descriptive summary of the plan and 46 | conduct of environmental systematic reviews and systematic maps}, 47 | author={Haddaway, Neal R and Macura, Biljana and Whaley, Paul and Pullin, Andrew S}, 48 | journal={Environmental Evidence}, 49 | volume={7}, 50 | number={1}, 51 | pages={7}, 52 | year={2018}, 53 | publisher={Springer} 54 | }") 55 | 56 | tmp <- tempfile() 57 | writeLines(roses, tmp) 58 | 59 | citation <- read_ref(tmp) 60 | format_citation(citation) 61 | } 62 | -------------------------------------------------------------------------------- /man/fuzz_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fuzz_functions.R 3 | \name{fuzz_} 4 | \alias{fuzz_} 5 | \alias{fuzzdist} 6 | \alias{fuzz_m_ratio} 7 | \alias{fuzz_partial_ratio} 8 | \alias{fuzz_token_sort_ratio} 9 | \alias{fuzz_token_set_ratio} 10 | \title{Calculate similarity between two strings} 11 | \usage{ 12 | fuzzdist( 13 | a, 14 | b, 15 | method = c("fuzz_m_ratio", "fuzz_partial_ratio", "fuzz_token_sort_ratio", 16 | "fuzz_token_set_ratio") 17 | ) 18 | 19 | fuzz_m_ratio(a, b) 20 | 21 | fuzz_partial_ratio(a, b) 22 | 23 | fuzz_token_sort_ratio(a, b) 24 | 25 | fuzz_token_set_ratio(a, b) 26 | } 27 | \arguments{ 28 | \item{a}{A character vector of items to match to b.} 29 | 30 | \item{b}{A character vector of items to match to a.} 31 | 32 | \item{method}{The method to use for fuzzy matching.} 33 | } 34 | \value{ 35 | Returns a score of same length as b, giving the proportional 36 | dissimilarity between a and b. 37 | } 38 | \description{ 39 | These functions duplicate the approach of the 'fuzzywuzzy' 40 | Python library for calculating string similarity. 41 | } 42 | \note{ 43 | \code{fuzz_m_ratio()} is a measure of the number of letters that match 44 | between two strings. It is calculated as one minus two times the number of 45 | matched characters, divided by the number of characters in both strings. 46 | 47 | \code{fuzz_partial_ratio()} calculates the extent to which one string is a 48 | subset of the other. If one string is a perfect subset, then this will be 49 | zero. 50 | 51 | \code{fuzz_token_sort_ratio()} sorts the words in both strings into 52 | alphabetical order, and checks their similarity using \code{fuzz_m_ratio()}. 53 | 54 | \code{fuzz_token_set_ratio()} is similar to \code{fuzz_token_sort_ratio()}, but 55 | compares both sorted strings to each other, and to a third group made of 56 | words common to both strings. It then returns the maximum value of 57 | \code{fuzz_m_ratio()} from these comparisons. 58 | 59 | \code{fuzzdist()} is a wrapper function, for compatability with \code{stringdist}. 60 | } 61 | \examples{ 62 | fuzzdist("On the Origin of Species", 63 | "Of the Original Specs", 64 | method = "fuzz_m_ratio") 65 | } 66 | -------------------------------------------------------------------------------- /man/merge_columns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deprecated.R 3 | \name{merge_columns} 4 | \alias{merge_columns} 5 | \title{Bind two or more data frames with different columns} 6 | \usage{ 7 | merge_columns(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Either a data.frame or a list of data.frames.} 11 | 12 | \item{y}{A data.frame, optional if x is a list.} 13 | } 14 | \value{ 15 | Returns a single data.frame with all the input data frames merged. 16 | } 17 | \description{ 18 | Takes two or more \code{data.frames} with different column names or 19 | different column orders and binds them to a single \code{data.frame.} This 20 | function is maintained for backwards compatibility, but it is synonymous with 21 | \code{dplyr::bind_rows()} and will be depracated in future. 22 | } 23 | \examples{ 24 | df_1 <- data.frame( 25 | title = c( 26 | "EviAtlas: a tool for visualising evidence synthesis databases", 27 | "revtools: An R package to support article screening for evidence synthesis" 28 | ), 29 | year = c("2019", "2019") 30 | ) 31 | 32 | df_2 <- data.frame( 33 | title = c( 34 | "An automated approach to identifying search terms for systematic reviews", 35 | "Reproducible, flexible and high-throughput data extraction from primary literature" 36 | ), 37 | authors = c("Grames et al", "Pick et al") 38 | ) 39 | 40 | merge_columns(df_1, df_2) 41 | } 42 | -------------------------------------------------------------------------------- /man/override_duplicates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deduplication_functions.R 3 | \name{override_duplicates} 4 | \alias{override_duplicates} 5 | \title{Manually override duplicates} 6 | \usage{ 7 | override_duplicates(matches, overrides) 8 | } 9 | \arguments{ 10 | \item{matches}{Numeric: a vector of group numbers for texts that indicates 11 | duplicates and unique values returned by the \code{\link{find_duplicates}} 12 | function.} 13 | 14 | \item{overrides}{Numeric: a vector of group numbers that are not true 15 | duplicates.} 16 | } 17 | \value{ 18 | The input \code{matches} vector with unique group numbers for members 19 | of groups that the user overrides. 20 | } 21 | \description{ 22 | Re-assign group numbers to text that was classified as 23 | duplicated but is unique. 24 | } 25 | -------------------------------------------------------------------------------- /man/parse_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_bibtex.R, R/parse_csv_tsv.R, 3 | % R/parse_pubmed.R, R/parse_ris.R 4 | \name{parse_bibtex} 5 | \alias{parse_bibtex} 6 | \alias{parse_csv} 7 | \alias{parse_tsv} 8 | \alias{parse_} 9 | \alias{parse_pubmed} 10 | \alias{parse_ris} 11 | \title{Parse bibliographic text in a variety of formats} 12 | \usage{ 13 | parse_bibtex(x) 14 | 15 | parse_csv(x) 16 | 17 | parse_tsv(x) 18 | 19 | parse_pubmed(x) 20 | 21 | parse_ris(x, tag_naming = "best_guess") 22 | } 23 | \arguments{ 24 | \item{x}{A character vector containing bibliographic information in ris 25 | format.} 26 | 27 | \item{tag_naming}{What format are ris tags in? Defaults to \code{"best_guess"} See 28 | \code{read_refs()} for a list of accepted arguments.} 29 | } 30 | \value{ 31 | Returns an object of class \code{bibliography} (ris, bib, or pubmed 32 | formats) or \code{data.frame} (csv or tsv). 33 | } 34 | \description{ 35 | Text in standard formats - such as imported via 36 | \code{base::readLines()} - can be parsed using a variety of standard formats. Use 37 | \code{detect_parser()} to determine which is the most appropriate parser for your 38 | situation. Note that \code{parse_tsv()} and \code{parse_csv()} are maintained for 39 | backwards compatability only; within \code{read_ref} these have been replaced 40 | by \code{vroom::vroom()}. 41 | } 42 | \examples{ 43 | eviatlas <- c( 44 | "TY - JOUR", 45 | "AU - Haddaway, Neal R.", 46 | "AU - Feierman, Andrew", 47 | "AU - Grainger, Matthew J.", 48 | "AU - Gray, Charles T.", 49 | "AU - Tanriver-Ayder, Ezgi", 50 | "AU - Dhaubanjar, Sanita", 51 | "AU - Westgate, Martin J.", 52 | "PY - 2019", 53 | "DA - 2019/06/04", 54 | "TI - EviAtlas: a tool for visualising evidence synthesis databases", 55 | "JO - Environmental Evidence", 56 | "SP - 22", 57 | "VL - 8", 58 | "IS - 1", 59 | "SN - 2047-2382", 60 | "UR - https://doi.org/10.1186/s13750-019-0167-1", 61 | "DO - 10.1186/s13750-019-0167-1", 62 | "ID - Haddaway2019", 63 | "ER - " 64 | ) 65 | 66 | detect_parser(eviatlas) # = "parse_ris" 67 | df <- as.data.frame(parse_ris(eviatlas)) 68 | ris_out <- write_refs(df, format = "ris", file = FALSE) 69 | } 70 | -------------------------------------------------------------------------------- /man/read_refs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_refs.R 3 | \name{read_refs} 4 | \alias{read_refs} 5 | \title{Import bibliographic search results} 6 | \usage{ 7 | read_refs( 8 | filename, 9 | tag_naming = "best_guess", 10 | return_df = TRUE, 11 | verbose = FALSE, 12 | locale = default_locale() 13 | ) 14 | } 15 | \arguments{ 16 | \item{filename}{A path to a filename or vector of filenames containing search 17 | results to import.} 18 | 19 | \item{tag_naming}{Either a length-1 character stating how should ris tags be 20 | replaced (see details for a list of options), or an object inheriting from 21 | class \code{data.frame} containing user-defined replacement tags.} 22 | 23 | \item{return_df}{If \code{TRUE} (default), returns a \code{data.frame}; if \code{FALSE}, 24 | returns a list.} 25 | 26 | \item{verbose}{If \code{TRUE}, prints status updates (defaults to \code{FALSE}).} 27 | } 28 | \value{ 29 | Returns a \code{data.frame} or \code{list} of assembled search results. 30 | } 31 | \description{ 32 | Imports common bibliographic reference formats (i.e. .bib, .ris, 33 | or .txt). 34 | } 35 | \details{ 36 | The default for argument \code{tag_naming} is \code{"best_guess"}, 37 | which estimates what database has been used for ris tag replacement, then 38 | fills any gaps with generic tags. Any tags missing from the database (i.e. 39 | \code{code_lookup}) are passed unchanged. Other options are to use tags from 40 | Web of Science (\code{"wos"}), Scopus (\code{"scopus"}), Ovid (\code{"ovid"}) 41 | or Academic Search Premier (\code{"asp"}). If a \code{data.frame} is given, 42 | then it must contain two columns: \code{"code"} listing the original tags in 43 | the source document, and \code{"field"} listing the replacement column/tag 44 | names. The \code{data.frame} may optionally include a third column named 45 | \code{"order"}, which specifies the order of columns in the resulting 46 | \code{data.frame}; otherwise this will be taken as the row order. Finally, 47 | passing \code{"none"} to \code{replace_tags} suppresses tag replacement. 48 | } 49 | \examples{ 50 | litsearchr <- c( 51 | "@article{grames2019, 52 | title={An automated approach to identifying search terms for 53 | systematic reviews using keyword co-occurrence networks}, 54 | author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S}, 55 | journal={Methods in Ecology and Evolution}, 56 | volume={10}, 57 | number={10}, 58 | pages={1645--1654}, 59 | year={2019}, 60 | publisher={Wiley Online Library} 61 | }" 62 | ) 63 | 64 | tmp <- tempfile() 65 | 66 | writeLines(litsearchr, tmp) 67 | 68 | df <- read_refs(tmp, return_df = TRUE, verbose = TRUE) 69 | } 70 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reexports.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{as_tibble} 7 | \title{Objects exported from other packages} 8 | \keyword{internal} 9 | \description{ 10 | These objects are imported from other packages. Follow the links 11 | below to see their documentation. 12 | 13 | \describe{ 14 | \item{tibble}{\code{\link[tibble]{as_tibble}}} 15 | }} 16 | 17 | -------------------------------------------------------------------------------- /man/review_duplicates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deduplication_functions.R 3 | \name{review_duplicates} 4 | \alias{review_duplicates} 5 | \title{Manually review potential duplicates} 6 | \usage{ 7 | review_duplicates(text, matches) 8 | } 9 | \arguments{ 10 | \item{text}{A character vector of the text that was used to identify 11 | potential duplicates.} 12 | 13 | \item{matches}{Numeric: a vector of group numbers for texts that indicates 14 | duplicates and unique values returned by the \code{\link{find_duplicates}} 15 | function.} 16 | } 17 | \value{ 18 | A \code{data.frame} of potential duplicates grouped together. 19 | } 20 | \description{ 21 | Allows users to manually review articles classified as 22 | duplicates. 23 | } 24 | -------------------------------------------------------------------------------- /man/string_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/string_functions.R 3 | \name{string_} 4 | \alias{string_} 5 | \alias{string_osa} 6 | \alias{string_lv} 7 | \alias{string_dl} 8 | \alias{string_hamming} 9 | \alias{string_lcs} 10 | \alias{string_qgram} 11 | \alias{string_cosine} 12 | \alias{string_jaccard} 13 | \alias{string_jw} 14 | \alias{string_soundex} 15 | \title{Calculate similarity between two strings} 16 | \usage{ 17 | string_osa(a, b) 18 | 19 | string_lv(a, b) 20 | 21 | string_dl(a, b) 22 | 23 | string_hamming(a, b) 24 | 25 | string_lcs(a, b) 26 | 27 | string_qgram(a, b) 28 | 29 | string_cosine(a, b) 30 | 31 | string_jaccard(a, b) 32 | 33 | string_jw(a, b) 34 | 35 | string_soundex(a, b) 36 | } 37 | \arguments{ 38 | \item{a}{A character vector of items to match to b.} 39 | 40 | \item{b}{A character vector of items to match to a.} 41 | } 42 | \value{ 43 | Returns a score of same length as b, giving the dissimilarity between 44 | a and b. 45 | } 46 | \description{ 47 | These functions each access a specific \code{"methods"} argument 48 | provided by \code{stringdist}, and are provided for convenient calling by 49 | \code{find_duplicates()}. They do not include any new functionality beyond that 50 | given by \code{stringdist}, which you should use for your own analyses. 51 | } 52 | -------------------------------------------------------------------------------- /man/synthesisr-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/synthesisr-package.R 3 | \docType{package} 4 | \name{synthesisr-package} 5 | \alias{synthesisr} 6 | \alias{synthesisr-package} 7 | \title{synthesisr: Import, assemble, and deduplicate bibiliographic datasets} 8 | \description{ 9 | Systematic review searches include multiple databases 10 | that export results in a variety of formats with overlap in 11 | coverage between databases. To streamline the process of importing, 12 | assembling, and deduplicating results, \code{synthesisr} recognizes 13 | bibliographic files exported from databases commonly used for 14 | systematic reviews and merges results into a standardized format. 15 | } 16 | \section{Import & Export}{ 17 | 18 | The key task performed by \code{synthesisr} is flexible import and 19 | presentation of bibliographic data. This is typically achieved by 20 | \code{read_refs()}, which can import multiple files at once and link them together 21 | into a single \code{data.frame}. Conversely, export is via \code{write_refs()}. Users 22 | that require more detailed control can use the following functions: 23 | 24 | \itemize{ 25 | \item \link{read_refs} Read bibliographic data 26 | \item \link{write_refs} Write bibliographic data 27 | \item \link{detect_} Detect file attributes 28 | \item \link{parse_} Parse a vector containing bibliographic data 29 | \item \link{clean_} Cleaning functions for author and column names 30 | \item \link{code_lookup} A dataset of potential ris tags 31 | } 32 | } 33 | 34 | \section{Formatting}{ 35 | 36 | \itemize{ 37 | \item \linkS4class{bibliography} Methods for class \code{bibliography} 38 | \item \link{format_citation} Return a clean citation from a \code{bibliography} or \code{data.frame} 39 | \item \link{add_line_breaks} Set a maximum character width for strings 40 | } 41 | } 42 | 43 | \section{Deduplication}{ 44 | 45 | When importing from multiple databases, it is likely that there will be 46 | duplicates in the resulting dataset. The easiest way to deal with this 47 | problem in \code{synthesisr} is using the \code{deduplicate()} function; but this can 48 | be risky, particularly if there are no DOIs in the dataset. To get finer 49 | control of the deduplication process, consider using the sub-functions: 50 | 51 | \itemize{ 52 | \item \link{deduplicate} Semi-automated duplicate removal 53 | \item \link{find_duplicates} Locate potentially duplicated references 54 | \item \link{extract_unique_references} Return a data.frame with only 'unique' references 55 | \item \link{review_duplicates} Manually review potential duplicates 56 | \item \link{override_duplicates} Manually override identified duplicates 57 | \item \link{fuzz_} Fuzzy string matching c/o \code{fuzzywuzzy} 58 | \item \link{string_} Fuzzy string matching c/o \code{stringdist} 59 | } 60 | } 61 | 62 | \section{Deprecated}{ 63 | 64 | 65 | \itemize{ 66 | \item \link{merge_columns} Synonymous with \link[dplyr:bind_rows]{dplyr::bind_rows} 67 | } 68 | } 69 | 70 | \seealso{ 71 | Useful links: 72 | \itemize{ 73 | \item \url{https://mjwestgate.github.io/synthesisr} 74 | } 75 | 76 | } 77 | \author{ 78 | \strong{Maintainer}: Martin Westgate \email{martinjwestgate@gmail.com} (\href{https://orcid.org/0000-0003-0854-2034}{ORCID}) 79 | 80 | Authors: 81 | \itemize{ 82 | \item Eliza Grames \email{eliza.grames@uconn.edu} (\href{https://orcid.org/0000-0003-1743-6815}{ORCID}) 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /man/write_refs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write_refs.R 3 | \name{write_refs} 4 | \alias{write_refs} 5 | \alias{write_bib} 6 | \alias{write_ris} 7 | \title{Export data to a bibliographic format} 8 | \usage{ 9 | write_refs(x, file, format = "ris", tag_naming = "synthesisr", write = TRUE) 10 | 11 | write_bib(x) 12 | 13 | write_ris(x, tag_naming = "synthesisr") 14 | } 15 | \arguments{ 16 | \item{x}{Either a data.frame containing bibliographic information or an 17 | object of class bibliography.} 18 | 19 | \item{file}{filename to save to.} 20 | 21 | \item{format}{What format should the data be exported as? Options are ris or 22 | bib.} 23 | 24 | \item{tag_naming}{what naming convention should be used to write RIS files? 25 | See details for options.} 26 | 27 | \item{write}{Logical should a file should be written? If FALSE returns a 28 | \code{list}.} 29 | } 30 | \value{ 31 | This function is typically called for it's side effect of writing a 32 | file in the specified location and format. If \code{write} is FALSE, returns 33 | a character vector containing bibliographic information in the specified 34 | format. 35 | } 36 | \description{ 37 | This function exports data.frames containing bibliographic 38 | information to either a .ris or .bib file. 39 | } 40 | \examples{ 41 | eviatlas <- c( 42 | "TY - JOUR", 43 | "AU - Haddaway, Neal R.", 44 | "AU - Feierman, Andrew", 45 | "AU - Grainger, Matthew J.", 46 | "AU - Gray, Charles T.", 47 | "AU - Tanriver-Ayder, Ezgi", 48 | "AU - Dhaubanjar, Sanita", 49 | "AU - Westgate, Martin J.", 50 | "PY - 2019", 51 | "DA - 2019/06/04", 52 | "TI - EviAtlas: a tool for visualising evidence synthesis databases", 53 | "JO - Environmental Evidence", 54 | "SP - 22", 55 | "VL - 8", 56 | "IS - 1", 57 | "SN - 2047-2382", 58 | "UR - https://doi.org/10.1186/s13750-019-0167-1", 59 | "DO - 10.1186/s13750-019-0167-1", 60 | "ID - Haddaway2019", 61 | "ER - " 62 | ) 63 | 64 | detect_parser(eviatlas) # = "parse_ris" 65 | df <- as.data.frame(parse_ris(eviatlas)) 66 | ris_out <- write_refs(df, format = "ris", file = FALSE) 67 | } 68 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | @import url(https://fonts.googleapis.com/css?family=Lato); 2 | @import url(https://fonts.googleapis.com/css?family=Roboto); 3 | 4 | h1, h2, h3, h4, .h1, .h2, .h3, .main-title, .main-subtitle { 5 | font-family: Lato; 6 | font-weight: normal; 7 | color: #D9565C; 8 | } 9 | 10 | a { 11 | color: #0a238a; 12 | text-decoration: none; 13 | } 14 | 15 | a:hover { 16 | color: #6c159e; 17 | } 18 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/favicon-96x96.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /pkgdown/favicon/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "short_name": "", 4 | "icons": [ 5 | { 6 | "src": "/web-app-manifest-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png", 9 | "purpose": "maskable" 10 | }, 11 | { 12 | "src": "/web-app-manifest-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png", 15 | "purpose": "maskable" 16 | } 17 | ], 18 | "theme_color": "#ffffff", 19 | "background_color": "#ffffff", 20 | "display": "standalone" 21 | } -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/web-app-manifest-192x192.png -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/web-app-manifest-512x512.png -------------------------------------------------------------------------------- /synthesisr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(synthesisr) 3 | 4 | test_check("synthesisr") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-clean.R: -------------------------------------------------------------------------------- 1 | test_that("clean_authors() works", { 2 | author_list <- 3 | c( 4 | "Haddaway, N.R., A. Feirman, M.J. Grainger, C.T. Gray, E. Tanriver-Ayder, S. Dhaubanjar, & M.J Westgate", 5 | "Grames, E.M., A.N. Stillman, M.W. Tingley AND C.S. Elphick" 6 | ) 7 | expect_false(any(grepl("&", clean_authors(author_list)))) 8 | expect_false(any(grepl("AND", clean_authors(author_list)))) 9 | }) 10 | 11 | test_that("clean_colnames() works", { 12 | cleaned <- clean_colnames(c(".title...", "X..YEAR", 13 | "authors..", ".AUTHOR")) 14 | expect_false(any(grepl("[[:punct::]]", cleaned))) 15 | expect_false(any(duplicated(cleaned))) 16 | expect_false(any(grepl("^(X|Y|Z)\\.+", cleaned))) 17 | }) 18 | 19 | test_that("clean_df() cleans authors, colnames and factors", { 20 | test_df <- data.frame( 21 | authors = c("Haddaway, N.R., A. Feirman AND M.J. Grainger", "Some authors"), 22 | "..misc." = c("text", "text2"), 23 | JOURNAL = as.factor(c("A journal", "Another journal")) 24 | ) 25 | result <- clean_df(test_df) 26 | expect_false(any(grepl("AND", result$authors))) 27 | test_cols <- colnames(result) 28 | expect_false(any(grepl("[[:punct::]]", test_cols))) 29 | expect_equal(tolower(test_cols), test_cols) 30 | expect_false(any(unlist(lapply(result, is.factor)))) 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test-deduplicate.R: -------------------------------------------------------------------------------- 1 | test_that("deduplicate works", { 2 | my_df <- data.frame( 3 | title = c( 4 | "EviAtlas: a tool for visualising evidence synthesis databases", 5 | "revtools: An R package to support article screening for evidence synthesis", 6 | "An automated approach to identifying search terms for systematic reviews using keyword co-occurrence networks", 7 | "Reproducible, flexible and high-throughput data extraction from primary literature: The metaDigitise r package", 8 | "eviatlas:tool for visualizing evidence synthesis databases.", 9 | "REVTOOLS a package to support article-screening for evidence synthsis"), 10 | year = c("2019", "2019", "2019", "2019", NA, NA), 11 | authors = c("Haddaway et al", "Westgate", "Grames et al", "Pick et al", NA, NA), 12 | stringsAsFactors = FALSE) 13 | 14 | # run deduplication 15 | dups <- find_duplicates(my_df$title, 16 | method = "string_osa", 17 | rm_punctuation = TRUE, 18 | to_lower = TRUE) 19 | deduped <- extract_unique_references(my_df, matches = dups) 20 | deduped2 <- deduplicate(my_df, "title", 21 | rm_punctuation = TRUE, 22 | to_lower = TRUE) 23 | 24 | expect_equal(length(dups), nrow(my_df)) 25 | expect_true(all(dups[5:6] == dups[1:2])) 26 | expect_equal(length(unique(dups)), nrow(deduped)) 27 | expect_equal(deduped, deduped2) 28 | }) 29 | 30 | 31 | -------------------------------------------------------------------------------- /tests/testthat/test-detect.R: -------------------------------------------------------------------------------- 1 | test_that("detect_delimiter() works for ris", { 2 | lines <- readLines("testdata/eviatlas.txt") 3 | expect_equal(detect_delimiter(lines), "endrow") 4 | }) 5 | 6 | test_that("detect_parser recognises files correctly", { 7 | file_names <- list.files("testdata") 8 | file_names <- file_names[ 9 | !grepl("eviatlas|litsearchr|res_synth_methods", file_names)] 10 | file_types <- lapply(file_names, function(a){ 11 | x <- readLines(paste0("./testdata/", a), warn = FALSE) 12 | detect_parser(x) 13 | }) |> 14 | unlist() 15 | detected_formats <- sub("^parse_", "", file_types) 16 | expect_equal( 17 | c("ris", "ris", "ris", "pubmed", "bibtex", "ris", "ris", "ris"), 18 | detected_formats) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-format_citation.R: -------------------------------------------------------------------------------- 1 | test_that("format_citation() works for an object of class bibliography", { 2 | bib <- read_refs("testdata/eviatlas.txt", return_df = FALSE) 3 | expect_equal( 4 | format_citation(bib)[[1]], 5 | "Haddaway, Neal R. et al. (2019) Eviatlas: a Tool for Visualising Evidence Synthesis Databases. Environmental Evidence.") 6 | }) 7 | 8 | test_that("format_citation() works for an object of class data.frame", { 9 | df <- read_refs("testdata/eviatlas.txt", return_df = TRUE) 10 | expect_equal( 11 | as.character(format_citation(df[1, ])), 12 | "Haddaway, Neal R. et al. (2019) Eviatlas: a Tool for Visualising Evidence Synthesis Databases. Environmental Evidence.") 13 | }) 14 | 15 | test_that("format_citation() gives same result from .bib and data.frame", { 16 | bib <- read_refs("testdata/eviatlas.txt", return_df = FALSE) 17 | df <- read_refs("testdata/eviatlas.txt", return_df = TRUE) 18 | expect_equal(format_citation(df), format_citation(bib)) 19 | }) 20 | 21 | test_that("add_line_breaks() limits lines to supplied length", { 22 | title <- 23 | "On the Origin of Species by Means of Natural Selection, or the Preservation of Favoured Races in the Struggle for Life" 24 | lines_added <- add_line_breaks(title, n = 20) 25 | split_text <- strsplit(lines_added, "\n")[[1]] 26 | expect_equal(length(split_text), 8) 27 | expect_true(all(unlist(lapply(split_text, nchar)) <= 20)) 28 | 29 | # and with higher n 30 | lines_added <- add_line_breaks(title, n = 40) 31 | split_text <- strsplit(lines_added, "\n")[[1]] 32 | expect_equal(length(split_text), 4) 33 | expect_true(all(unlist(lapply(split_text, nchar)) <= 40)) 34 | }) 35 | 36 | test_that("add_line_breaks() works on vectors", { 37 | titles <- c( 38 | "It is a truth universally acknowledged, that a single man in possession of a good fortune must be in want of a wife.", 39 | "No one would have believed in the last years of the nineteenth century that this world was being watched keenly and closely by intelligences greater than man’s and yet as mortal as his own" 40 | ) 41 | lines_added <- add_line_breaks(titles, n = 50) 42 | string_lengths <- unlist(lapply(strsplit(lines_added, "\n"), nchar)) 43 | expect_true(all(string_lengths <= 50)) 44 | expect_equal(length(lines_added), 2) 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test-fuzz_functions.R: -------------------------------------------------------------------------------- 1 | # to match the fuzzy wuzzy test set, needs to be out of 100 as a similarity score 2 | 3 | test_that("fuzz_m_ratio() works", { 4 | ratio <- 5 | 100 - round(fuzz_m_ratio("this is a test", "this is a test!"), 2) * 100 6 | expect_equal(ratio, 97) 7 | }) 8 | 9 | test_that("fuzz_partial_ratio() works", { 10 | partial_ratio <- 11 | 100 - (round(fuzz_partial_ratio("this is a test", "this is a test!"), 2) * 12 | 100) 13 | expect_equal(partial_ratio, 100) 14 | }) 15 | 16 | test_that("fuzz_m_ratio() works", { 17 | ratio2 <- 18 | 100 - (round(fuzz_m_ratio( 19 | "fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear" 20 | ), 2) * 100) 21 | expect_equal(ratio2, 91) 22 | }) 23 | 24 | test_that("fuzz_token_sort_ratio() works", { 25 | sort_ratio <- 26 | 100 - (round( 27 | fuzz_token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear"), 28 | 2 29 | ) * 100) 30 | expect_equal(sort_ratio, 100) 31 | }) 32 | 33 | test_that("fuzz_token_sort_ratio() works twice", { 34 | sort_ratio2 <- 35 | 100 - (round( 36 | fuzz_token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear"), 37 | 2 38 | ) * 100) 39 | expect_equal(sort_ratio2, 84) 40 | }) 41 | 42 | # I have not been able to sort out why the R implementation does not match python 43 | # It seems fuzzywuzzy matches by tokens, but m_dist is not doing this 44 | # So actually, token_sort_ratio is the same as token_set_ratio when sorted, but without removing unique 45 | 46 | test_that("fuzz_token_set_ratio() works", { 47 | set_ratio <- 48 | 100 - (round( 49 | fuzz_token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear"), 50 | 2 51 | ) * 100) 52 | expect_equal(set_ratio, 100) 53 | }) 54 | -------------------------------------------------------------------------------- /tests/testthat/test-merge_columns.R: -------------------------------------------------------------------------------- 1 | test_that("merge_columns() works", { 2 | my_df <- data.frame(PY = 2019, 3 | DB = "Scopus", 4 | TI = "revtools: An R package to support article screening for evidence synthesis", 5 | AU = "Westgate, M.J.") 6 | 7 | my_df2 <- data.frame( 8 | PY = 2019, 9 | doi = "10.1186/s13750-019-0167-1", 10 | DB = "Scopus", 11 | TI = "revtools: An R package to support article screening for evidence synthesis", 12 | AU = "Westgate, M.J.", 13 | random_noise = c("non-bibliographic data")) 14 | 15 | db <- merge_columns(list(my_df, my_df2)) 16 | vars <- unique(c(colnames(my_df), colnames(my_df2))) 17 | expect_true(all(vars %in% colnames(db))) 18 | }) 19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/testthat/test-read_write.R: -------------------------------------------------------------------------------- 1 | test_that("read_ref() works for simple imports", { 2 | df <- read_refs("testdata/eviatlas.txt", 3 | return_df = TRUE, 4 | verbose = FALSE) 5 | expect_true(inherits(df, c("tbl", "data.frame"))) 6 | expect_equal(nrow(df), 1) 7 | expect_true(any(grep("EviAtlas", df[1, ]))) 8 | }) 9 | 10 | test_that("read_refs() works for simple imports", { 11 | testfiles <- paste0("testdata/", 12 | c("eviatlas.txt", "litsearchr.txt", "res_synth_methods.txt")) 13 | df <- read_refs(testfiles, 14 | return_df = TRUE, 15 | verbose = FALSE) 16 | expect_true(inherits(df, c("tbl", "data.frame"))) 17 | expect_equal(nrow(df), 4) 18 | expect_true(any(grep("EviAtlas", df[1, ]))) 19 | expect_true(any(grep("litsearchr", df[2, ]))) 20 | expect_true(any(grep("robvis", df[3, ]))) 21 | }) 22 | 23 | test_that("pubmed formats are read correctly", { 24 | x <- read_refs("testdata/PubMed_example.txt") 25 | expect_s3_class(x, c("tbl_df", "tbl", "data.frame")) 26 | expect_equal(nrow(x), 3) 27 | expect_equal(ncol(x), 37) 28 | expect_true(all( 29 | c("publication_type", "author", "journal", "title", "abstract") %in% 30 | colnames(x))) 31 | }) 32 | 33 | # test_that("read_refs() imports special characters correctly", { 34 | # }) 35 | 36 | # test_that("read_refs() stores multi-value fields as list columns", { 37 | # df <- read_refs("testdata/Scopus_ris_example.ris", 38 | # return_df = FALSE, 39 | # verbose = FALSE) 40 | # result <- as_tibble(df) 41 | # # test goes here 42 | # }) 43 | 44 | test_that("bibtex imports properly with json code", { 45 | x <- read_ref("testdata/Scopus_bib_example.bib") 46 | expect_true(inherits(x, c("data.frame", "tbl"))) 47 | expect_equal(nrow(x), 3) 48 | }) 49 | 50 | test_that("write_refs() works", { 51 | lines <- read_refs("testdata/eviatlas.txt") 52 | evi_ris <- write_refs(lines, format = "ris", write = FALSE) 53 | evi_bib <- write_refs(lines, format = "bib", write = FALSE) 54 | expect_equal(detect_parser(evi_ris), "parse_ris") 55 | expect_equal(detect_parser(evi_bib), "parse_bibtex") 56 | expect_true(any(grep("ER ", evi_ris, ignore.case = FALSE))) 57 | }) 58 | 59 | test_that("read-write-read roundtripping works for .ris files", { 60 | x <- read_refs("./testdata/citesource_issue_24.ris") 61 | dir.create("TEMP") 62 | write_refs(x, file = "TEMP/issue24.ris", format = "ris") 63 | expect_no_error({y <- read_refs("TEMP/issue24.ris")}) 64 | expect_true(colnames(y)[1] == "source_type") 65 | expect_equal(nrow(x), nrow(y)) 66 | # expect_equal(ncol(x), ncol(y)) # fails at present - i.e. round-tripping is lossy 67 | unlink("TEMP", recursive = TRUE) 68 | }) 69 | -------------------------------------------------------------------------------- /tests/testthat/test-write.R: -------------------------------------------------------------------------------- 1 | test_that("write_refs() works", { 2 | lines <- read_refs("testdata/eviatlas.txt") 3 | evi_ris <- write_refs(lines, format = "ris", write = FALSE) 4 | evi_bib <- write_refs(lines, format = "bib", write = FALSE) 5 | expect_equal(detect_parser(evi_ris), "parse_ris") 6 | expect_equal(detect_parser(evi_bib), "parse_bibtex") 7 | expect_true(any(grep("ER ", evi_ris, ignore.case = FALSE))) 8 | }) 9 | 10 | -------------------------------------------------------------------------------- /tests/testthat/testdata/Ovid_ris_example.ris: -------------------------------------------------------------------------------- 1 | 1. 2 | TY - JOUR 3 | ID - 20203152553 4 | DO - http://dx.doi.org/10.4236/ojf.2020.101008 5 | T1 - Detection of retention trees on clearcuts, a 50-year perspective. 6 | A1 - Holmstrom, E. 7 | A1 - Nordstrom, E. 8 | A1 - Lariviere, D. 9 | A1 - Wallin, I. 10 | Y1 - 2020// 11 | N2 - Changes in clearcut management over time were evaluated using aerial photographs taken between 1960 and 2010. Temporal changes were analysed in two different climatic zones in Sweden: a typical boreal forest zone (the coast of Vasterbotten County), and the hemi-boreal zone of southern Sweden (represented by Kronoberg County). The study covers the periods before and after the paradigm shift in Swedish forestry caused by the equalization of the production and nature conservation objectives specified in the first paragraph of the Swedish Forestry Act. Photographs were processed to determine clearcut size and shape and to register solitary retention trees and groups of retention trees. Small but significant changes in clearcut size were detected over time. The number of retention trees increased over time, a result that was also found in other studies using different methodologies. The results demonstrate that measurable structural changes have occurred in Swedish forests over the 25 years since the paradigm shift. Results from this study also show that digital detection of green tree retention could be a future complement to field inventory and monitoring. 12 | KW - aerial photography 13 | KW - boreal forests 14 | KW - clear felling 15 | KW - climatic zones 16 | KW - detection 17 | KW - forest management 18 | KW - forests 19 | KW - monitoring 20 | KW - nature conservation 21 | KW - resource conservation 22 | KW - temporal variation 23 | JF - Open Journal of Forestry 24 | JA - Open Journal of Forestry 25 | VL - 10 26 | IS - 1 27 | SP - 110 28 | EP - 123 29 | CY - USA 30 | SN - 2163-0429 31 | AD - Irvine 32 | M1 - Southern Swedish Forest Centre, Swedish University of Agricultural Science, Alnarp, Sweden. 33 | UR - https://www.scirp.org/html/8-1620633_97627.htm 34 | ER - 35 | 36 | Link to the Ovid Full Text or citation: http://ovidsp.ovid.com/ovidweb.cgi?T=JS&CSC=Y&NEWS=N&PAGE=fulltext&D=caba6&AN=20203152553Link to the External Link Resolver: http://jn8sf5hk5v.search.serialssolutions.com/?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/Ovid:caba6&rft.genre=article&rft_id=info:doi/10.4236%2Fojf.2020.101008&rft_id=info:pmid/&rft.issn=2163-0429&rft.volume=10&rft.issue=1&rft.spage=110&rft.pages=110-123&rft.date=2020&rft.jtitle=Open+Journal+of+Forestry&rft.atitle=Detection+of+retention+trees+on+clearcuts%2C+a+50-year+perspective.&rft.aulast=Holmstrom 37 | 38 | 2. 39 | TY - JOUR 40 | ID - 20203155626 41 | DO - http://dx.doi.org/10.1098/rspb.2019.1969 42 | T1 - Host density drives viral, but not trypanosome, transmission in a key pollinator. 43 | A1 - Bailes, E. J. 44 | A1 - Bagi, J. 45 | A1 - Coltman, J. 46 | A1 - Fountain, M. T. 47 | A1 - Wilfert, L. 48 | A1 - Brown, M. J. F. 49 | Y1 - 2020// 50 | N2 - Supplemental feeding of wildlife populations can locally increase the density of individuals, which may in turn impact disease dynamics. Flower strips are a widely used intervention in intensive agricultural systems to nutritionally support pollinators such as bees. Using a controlled experimental semi-field design, we asked how density impacts transmission of a virus and a trypanosome parasite in bumblebees. We manipulated bumblebee density by using different numbers of colonies within the same area of floral resource. In high-density compartments, slow bee paralysis virus was transmitted more quickly, resulting in higher prevalence and level of infection in bumblebee hosts. By contrast, there was no impact of density on the transmission of the trypanosome Crithidia bombi, which may reflect the ease with which this parasite is transmitted. These results suggest that agri-environment schemes such as flower strips, which are known to enhance the nutrition and survival of bumblebees, may also have negative impacts on pollinators through enhanced disease transmission. Future studies should assess how changing the design of these schemes could minimize disease transmission and thus maximise their health benefits to wild pollinators. 51 | KW - disease transmission 52 | KW - pollinators 53 | KW - disease prevalence 54 | KW - wildlife 55 | JF - Proceedings of the Royal Society B. Biological Sciences 56 | JA - Proceedings of the Royal Society B. Biological Sciences 57 | VL - 287 58 | IS - 1918 59 | CY - UK 60 | SN - 0962-8452 61 | AD - London 62 | M1 - Department of Biological Sciences, Royal Holloway University of London, Bourne Building, Egham TW20 0EX, UK. 63 | UR - https://royalsocietypublishing.org/doi/full/10.1098/rspb.2019.1969 64 | ER - 65 | 66 | Link to the Ovid Full Text or citation: http://ovidsp.ovid.com/ovidweb.cgi?T=JS&CSC=Y&NEWS=N&PAGE=fulltext&D=caba6&AN=20203155626Link to the External Link Resolver: http://jn8sf5hk5v.search.serialssolutions.com/?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/Ovid:caba6&rft.genre=article&rft_id=info:doi/10.1098%2Frspb.2019.1969&rft_id=info:pmid/&rft.issn=0962-8452&rft.volume=287&rft.issue=1918&rft.spage=20191969&rft.pages=&rft.date=2020&rft.jtitle=Proceedings+of+the+Royal+Society+B.+Biological+Sciences&rft.atitle=Host+density+drives+viral%2C+but+not+trypanosome%2C+transmission+in+a+key+pollinator.&rft.aulast=Bailes 67 | 68 | 3. 69 | TY - JOUR 70 | ID - 20203152349 71 | DO - http://dx.doi.org/10.1016/j.jenvman.2019.109817 72 | T1 - Export of nitrogen and phosphorus from golf courses: a review. 73 | A1 - Bock, E. M. 74 | A1 - Easton, Z. M. 75 | Y1 - 2020// 76 | N2 - Mitigating the environmental impact of nonpoint source pollution from intensively managed urban and agricultural landscapes is of paramount concern to watershed managers. Golf course turfgrass systems, which receive significant fertilizer inputs, have been cited as significant sources of nutrient loading to groundwater and surface water, but a contemporary synthesis of golf course nutrient export rates is lacking. This review of nitrogen (N) and phosphorus (P) loss from golf courses and the factors affecting it aims to support watershed management efforts and decision making. We discuss previous literature reviews, examine seven golf course studies that quantify nutrient export from delineated drainage areas, and analyze the results of 40 turfgrass plot experiments. Studies were collected systematically and selected based on predetermined inclusion criteria. Combining evidence from both watershed- and plot-scale studies, typical inorganic N and P losses from golf courses via leaching and runoff are on the order of 2-12 kg ha-1 yr-1 and 0.1-1.0 kg ha-1 yr-1, respectively. Typical total N and P losses are around 2-20 kg ha-1 yr-1 and 1.5-5 kg ha-1 yr-1, respectively. However, the potential for large variation in export rates across 2-3 orders of magnitude must be emphasized. The body of turfgrass literature stresses the importance of best management practices (BMPs) related to applying fertilizer to match plant needs and reducing opportunities for its transport. Accounting for all sources of nutrients, especially soil P, in determining fertilizer application rates and avoiding excessive irrigation to prevent leaching of nutrients from the rootzone is particularly important. BMPs can also reduce nutrient leaching and runoff by controlling the movement of water across the landscape and promoting natural nutrient attenuation, such as with vegetative stream buffers. 77 | KW - water quality 78 | KW - water pollution 79 | KW - phosphorus 80 | KW - nitrogen 81 | KW - golf courses 82 | KW - drainage 83 | KW - lawns and turf 84 | KW - leaching 85 | KW - runoff 86 | KW - fertilizers 87 | KW - application rates 88 | KW - irrigation 89 | KW - nonpoint source pollution 90 | KW - watersheds 91 | JF - Journal of Environmental Management 92 | JA - Journal of Environmental Management 93 | VL - 255 94 | CY - Netherlands 95 | SN - 0301-4797 96 | AD - Amsterdam 97 | M1 - Department of Biological Systems Engineering 155 Ag Quad Lane, 203 Seitz Hall Virginia Tech, Blacksburg, VA 24061, USA. 98 | UR - https://www.sciencedirect.com/science/article/pii/S030147971931535X 99 | ER - 100 | 101 | Link to the Ovid Full Text or citation: http://ovidsp.ovid.com/ovidweb.cgi?T=JS&CSC=Y&NEWS=N&PAGE=fulltext&D=caba6&AN=20203152349Link to the External Link Resolver: http://jn8sf5hk5v.search.serialssolutions.com/?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/Ovid:caba6&rft.genre=article&rft_id=info:doi/10.1016%2Fj.jenvman.2019.109817&rft_id=info:pmid/&rft.issn=0301-4797&rft.volume=255&rft.issue=&rft.spage=109817&rft.pages=&rft.date=2020&rft.jtitle=Journal+of+Environmental+Management&rft.atitle=Export+of+nitrogen+and+phosphorus+from+golf+courses%3A+a+review.&rft.aulast=Bock 102 | 103 | 4. 104 | TY - JOUR 105 | ID - 20203150108 106 | T1 - Diversity of yard plants in the buffer zone of the Cyclop Nature Reserve, Jayapura City Papua Province, Indonesia. 107 | A1 - Antoh, A. A. 108 | A1 - Raunsay, E. K. 109 | Y1 - 2019// 110 | N2 - The Cyclop Mountains are located on the island of Papua, Indonesia which has a high biodiversity conservation status. Cyclop has many springs and is very important for the service needs for people in Jayapura City and Regency. The high disturbance in conservation areas is a problem for the community at this time. This study aims to analyze the diversity of garden plant species and their benefits around community settlements in buffer zones. The results showed that sweet potatoes and cassava were very dominantly planted around the yard of the house. Cassava and sweet potatoes are used as food besides being consumed by them but also used as animal feed. 111 | KW - nature conservation 112 | KW - species diversity 113 | KW - conservation 114 | KW - biodiversity 115 | KW - nature reserves 116 | KW - gardens 117 | KW - settlement 118 | KW - sweet potatoes 119 | KW - cassava 120 | KW - feeds 121 | KW - ecological disturbance 122 | KW - resource conservation 123 | KW - mountain areas 124 | JF - AAB Bioflux 125 | JA - AAB Bioflux 126 | VL - 11 127 | IS - 3 128 | SP - 157 129 | EP - 161 130 | CY - Romania 131 | SN - 2066-7639 132 | AD - Cluj-Napoca 133 | M1 - Biology Education Study Program, Mathematics and Natural Sciences Education Department, Training and Education Faculty, Cenderawasih University, 99351 Papua, Indonesia. 134 | UR - http://www.aab.bioflux.com.ro/docs/2019.157-161.pdf 135 | ER - 136 | 137 | Link to the Ovid Full Text or citation: http://ovidsp.ovid.com/ovidweb.cgi?T=JS&CSC=Y&NEWS=N&PAGE=fulltext&D=caba6&AN=20203150108Link to the External Link Resolver: http://jn8sf5hk5v.search.serialssolutions.com/?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/Ovid:caba6&rft.genre=article&rft_id=info:doi/&rft_id=info:pmid/&rft.issn=2066-7639&rft.volume=11&rft.issue=3&rft.spage=157&rft.pages=157-161&rft.date=2019&rft.jtitle=AAB+Bioflux&rft.atitle=Diversity+of+yard+plants+in+the+buffer+zone+of+the+Cyclop+Nature+Reserve%2C+Jayapura+City+Papua+Province%2C+Indonesia.&rft.aulast=Antoh -------------------------------------------------------------------------------- /tests/testthat/testdata/Scopus_bib_example.bib: -------------------------------------------------------------------------------- 1 | 2 | @ARTICLE{Li2020, 3 | author={Li, W. and Dou, Z. and Cui, L. and Zhao, X. and Zhang, M. and Zhang, Y. and Gao, C. and Yang, Z. and Lei, Y. and Pan, X.}, 4 | title={Soil fauna diversity at different stages of reed restoration in a lakeshore wetland at Lake Taihu, China}, 5 | journal={Ecosystem Health and Sustainability}, 6 | year={2020}, 7 | volume={6}, 8 | number={1}, 9 | doi={10.1080/20964129.2020.1722034}, 10 | art_number={1722034}, 11 | note={cited By 0}, 12 | url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85079532227&doi=10.1080%2f20964129.2020.1722034&partnerID=40&md5=a4a9d08e21dbf151663a0e842336c225}, 13 | affiliation={Institute of Wetland Research, Chinese Academy of Forestry, Beijing, China; Beijing Key Laboratory of Wetland Services and Restoration, Beijing, China; School of Geographical Sciences, University of Bristol, Bristol, United Kingdom}, 14 | abstract={Introduction: Wetland soil fauna support material cycling and restoration processes in wetland ecosystems. In our study, we observed variations in wetland soil fauna on the shores of Lake Taihu, China. We examined the relationships between fauna and major environmental factors, and looked at the short-and long-term changes in reed wetlands under restoration and in the natural reed lakeshore. Outcomes: We identified 93 groups of soil fauna in different wetlands and found significant differences in the lakeshore wetlands’ soil fauna assemblages, depending on the length of the restoration period. By analyzing the soil fauna community evenness, dominance, number of taxa, and diversity, we found minimal seasonal variation in the soil fauna community diversity and abundance. The abundance of soil fauna in the sites under restoration decreased with depth below the soil surface. The reed restoration was obvious in the succession of the soil fauna groups in the long-term site. Although the restoration had an overall positive long-term effect on the soil fauna communities, there were no obvious short-term changes in the number of individuals. Conclusion: The study explored various potential measures to restore soil fauna in the Lake Taihu wetland and developed a theoretical basis for restoring the lakeshore wetland ecosystem. © 2020, © 2020 The Author(s). Published by Taylor & Francis Group and Science Press on behalf of the Ecological Society of China.}, 15 | author_keywords={Environmental factors; Lake Taihu; lakeshore wetland; reed; soil fauna; wetland restoration}, 16 | funding_details={University of TwenteUniversity of Twente}, 17 | funding_details={Special Fund for Forest Scientific Research in the Public WelfareSpecial Fund for Forest Scientific Research in the Public Welfare, 201404305, 200904001, CAFYBB2011007}, 18 | funding_text 1={The Special Fund for Forest Scientific Research in the Public Welfare [Grant Nos. 201404305 and 200904001] and ?The Lecture and Study Program for Outstanding Scholars from Home and Abroad? [CAFYBB2011007] funded this research. The authors thank Amjad Ali and Abel Ramoelo for valuable comments on the draft manuscript and John Wasige from the University of Twente, The Netherlands, for writing assistance.}, 19 | correspondence_address1={Cui, L.; Institute of Wetland Research, Chinese Academy of ForestryChina; email: wetlands108@126.com}, 20 | publisher={Taylor and Francis Ltd.}, 21 | issn={20964129}, 22 | language={English}, 23 | abbrev_source_title={Ecosyst. Health Sustain.}, 24 | document_type={Article}, 25 | source={Scopus}, 26 | } 27 | 28 | @ARTICLE{Cao2020, 29 | author={Cao, F. and Li, J. and Fu, X. and Wu, G.}, 30 | title={Impacts of land conversion and management measures on net primary productivity in semi-arid grassland}, 31 | journal={Ecosystem Health and Sustainability}, 32 | year={2020}, 33 | volume={6}, 34 | number={1}, 35 | doi={10.1080/20964129.2020.1749010}, 36 | art_number={1749010}, 37 | note={cited By 0}, 38 | url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85083526403&doi=10.1080%2f20964129.2020.1749010&partnerID=40&md5=82da8427a9c2e5b22167c4428a08a62f}, 39 | affiliation={State Key Laboratory of Urban and Regional Ecology, Research Center for Eco-Environmental Sciences, Chinese Academy of Sciences, Beijing, China; University of Chinese Academy of Sciences, Beijing, China}, 40 | abstract={Ecological restoration measures implemented in China have profoundly impacted vegetation NPP. This study aimed to estimate the effects of the land conversion and management measures on the grassland ecosystem in semi-arid regions. Land use data were employed from 2000 to 2015 to compare land conversion and coverage changes in Xilingol grassland. Then, the contributions of land conversion and management policies were quantified by assessing the difference between actual NPP and climate-induced NPP changes. The results indicated that the grassland area had a net loss of 534.42 km2, and the net area of increased vegetation coverage was 74,683.05 km2. Furthermore, the total NPP increased by 8,010.73 Gg C·yr−1 (1 Gg = 109 g), of which the human activities, including grazing management measures (+6,809.40 Gg C·yr−1) and land conversion (45.72 Gg C·yr−1) contributed to 85.58% of the increase in NPP. Transformation from desert and farmland dominated grassland expansion and NPP increase, while urbanization and desertification caused large grassland reduction and NPP loss. The grazing management increased vegetation NPP in most regions except for some regions in the desert steppe and the farming-pastoral zone. Related policies should be further adjusted to strengthen the management of the desert steppe and farming-pastoral regions. © 2020, © 2020 The Author(s). Published by Taylor & Francis Group and Science Press on behalf of the Ecological Society of China.}, 41 | author_keywords={anthropogenic/human activities affects; Grassland degradation; land use and land cover change; management measures and policies; net primary productivity (NPP); Xilingol grassland}, 42 | funding_details={2016YFC0503603, 2016YFC0501101}, 43 | funding_text 1={This work was supported primarily by the State Key Research Development Program of China (No. 2016YFC0501101), (No. 2016YFC0503603).}, 44 | correspondence_address1={Wu, G.; State Key Laboratory of Urban and Regional Ecology, Research Center for Eco-Environmental Sciences, Chinese Academy of SciencesChina; email: wug@rcees.ac.cn}, 45 | publisher={Taylor and Francis Ltd.}, 46 | issn={20964129}, 47 | language={English}, 48 | abbrev_source_title={Ecosyst. Health Sustain.}, 49 | document_type={Article}, 50 | source={Scopus}, 51 | } 52 | 53 | @ARTICLE{Tang2020, 54 | author={Tang, H. and Geng, G. and zhou, M.}, 55 | title={Application of Digital Processing in Relic Image Restoration Design}, 56 | journal={Sensing and Imaging}, 57 | year={2020}, 58 | volume={21}, 59 | number={1}, 60 | doi={10.1007/s11220-019-0265-8}, 61 | art_number={6}, 62 | note={cited By 0}, 63 | url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85076498356&doi=10.1007%2fs11220-019-0265-8&partnerID=40&md5=25095ff13ed950e89470779e2d571d68}, 64 | affiliation={School of Information and Technology, Northwest University, Xi’an, Shaanxi 710127, China; Xi’an University of Finance and Economics, Xi’an, Shaanxi 710100, China; College of Information Science and Technology, Beijing Normal University, Beijing, 100875, China}, 65 | abstract={Cultural relic is the carrier of human historic culture, which can reflect the cultural and social environment, but cultural relics as a material will be damaged over time. Before the advent of computer technology, the damaged cultural relics would not be repaired due to cost. Computer vision technology has been applied to the restoration of cultural relics, mainly for the virtual restoration of damaged cultural relics images. This paper briefly introduced the Criminisi image restoration algorithm and the structure tensor used to improve the algorithm in the digital cultural relics image restoration. A damaged cultural relics image and a complete image which was damaged by human were repaired respectively using the classical Criminisi image restoration algorithm and the improved structure tensor based repair algorithm on MATLAB software. The results showed that the Criminisi image restoration algorithm could be used to repair the damaged images of ancient fabrics. It was found that the classical image restoration algorithm had some shortcomings, such as inappropriate texture structure, obvious repair marks and addition of redundant information, but the improved algorithm effectively avoided the above shortcomings. The peak signal to noise ratio (SNR) of the complete image which was damaged by human was compared objectively, and it was found that the improved algorithm had better restoration performance. © 2019, Springer Science+Business Media, LLC, part of Springer Nature.}, 66 | author_keywords={Criminisi algorithm; Digital; Relic restoration; Structure tensor}, 67 | keywords={Image enhancement; MATLAB; Restoration; Signal to noise ratio; Tensors; Textures, Computer technology; Computer vision technology; Digital; Image restoration algorithms; Improved structures; Peak signal to noise ratio; Structure tensors; Virtual restoration, Image reconstruction}, 68 | correspondence_address1={Tang, H.; Xi’an University of Finance and EconomicsChina; email: huitangxa@yeah.net}, 69 | publisher={Springer}, 70 | issn={15572064}, 71 | language={English}, 72 | abbrev_source_title={Sens. Imaging}, 73 | document_type={Article}, 74 | source={Scopus}, 75 | } -------------------------------------------------------------------------------- /tests/testthat/testdata/Scopus_ris_example.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | TI - Soil fauna diversity at different stages of reed restoration in a lakeshore wetland at Lake Taihu, China 3 | T2 - Ecosystem Health and Sustainability 4 | J2 - Ecosyst. Health Sustain. 5 | VL - 6 6 | IS - 1 7 | PY - 2020 8 | DO - 10.1080/20964129.2020.1722034 9 | SN - 20964129 (ISSN) 10 | AU - Li, W. 11 | AU - Dou, Z. 12 | AU - Cui, L. 13 | AU - Zhao, X. 14 | AU - Zhang, M. 15 | AU - Zhang, Y. 16 | AU - Gao, C. 17 | AU - Yang, Z. 18 | AU - Lei, Y. 19 | AU - Pan, X. 20 | AD - Institute of Wetland Research, Chinese Academy of Forestry, Beijing, China 21 | AD - Beijing Key Laboratory of Wetland Services and Restoration, Beijing, China 22 | AD - School of Geographical Sciences, University of Bristol, Bristol, United Kingdom 23 | AB - Introduction: Wetland soil fauna support material cycling and restoration processes in wetland ecosystems. In our study, we observed variations in wetland soil fauna on the shores of Lake Taihu, China. We examined the relationships between fauna and major environmental factors, and looked at the short-and long-term changes in reed wetlands under restoration and in the natural reed lakeshore. Outcomes: We identified 93 groups of soil fauna in different wetlands and found significant differences in the lakeshore wetlands’ soil fauna assemblages, depending on the length of the restoration period. By analyzing the soil fauna community evenness, dominance, number of taxa, and diversity, we found minimal seasonal variation in the soil fauna community diversity and abundance. The abundance of soil fauna in the sites under restoration decreased with depth below the soil surface. The reed restoration was obvious in the succession of the soil fauna groups in the long-term site. Although the restoration had an overall positive long-term effect on the soil fauna communities, there were no obvious short-term changes in the number of individuals. Conclusion: The study explored various potential measures to restore soil fauna in the Lake Taihu wetland and developed a theoretical basis for restoring the lakeshore wetland ecosystem. © 2020, © 2020 The Author(s). Published by Taylor & Francis Group and Science Press on behalf of the Ecological Society of China. 24 | KW - Environmental factors 25 | KW - Lake Taihu 26 | KW - lakeshore wetland 27 | KW - reed 28 | KW - soil fauna 29 | KW - wetland restoration 30 | PB - Taylor and Francis Ltd. 31 | N1 - Export Date: 30 April 2020 32 | M3 - Article 33 | DB - Scopus 34 | C7 - 1722034 35 | LA - English 36 | N1 - Correspondence Address: Cui, L.; Institute of Wetland Research, Chinese Academy of ForestryChina; email: wetlands108@126.com 37 | N1 - Funding details: University of Twente 38 | N1 - Funding details: Special Fund for Forest Scientific Research in the Public Welfare, 201404305, 200904001, CAFYBB2011007 39 | N1 - Funding text 1: The Special Fund for Forest Scientific Research in the Public Welfare [Grant Nos. 201404305 and 200904001] and ?The Lecture and Study Program for Outstanding Scholars from Home and Abroad? [CAFYBB2011007] funded this research. The authors thank Amjad Ali and Abel Ramoelo for valuable comments on the draft manuscript and John Wasige from the University of Twente, The Netherlands, for writing assistance. 40 | UR - https://www.scopus.com/inward/record.uri?eid=2-s2.0-85079532227&doi=10.1080%2f20964129.2020.1722034&partnerID=40&md5=a4a9d08e21dbf151663a0e842336c225 41 | ER - 42 | 43 | TY - JOUR 44 | TI - Impacts of land conversion and management measures on net primary productivity in semi-arid grassland 45 | T2 - Ecosystem Health and Sustainability 46 | J2 - Ecosyst. Health Sustain. 47 | VL - 6 48 | IS - 1 49 | PY - 2020 50 | DO - 10.1080/20964129.2020.1749010 51 | SN - 20964129 (ISSN) 52 | AU - Cao, F. 53 | AU - Li, J. 54 | AU - Fu, X. 55 | AU - Wu, G. 56 | AD - State Key Laboratory of Urban and Regional Ecology, Research Center for Eco-Environmental Sciences, Chinese Academy of Sciences, Beijing, China 57 | AD - University of Chinese Academy of Sciences, Beijing, China 58 | AB - Ecological restoration measures implemented in China have profoundly impacted vegetation NPP. This study aimed to estimate the effects of the land conversion and management measures on the grassland ecosystem in semi-arid regions. Land use data were employed from 2000 to 2015 to compare land conversion and coverage changes in Xilingol grassland. Then, the contributions of land conversion and management policies were quantified by assessing the difference between actual NPP and climate-induced NPP changes. The results indicated that the grassland area had a net loss of 534.42 km2, and the net area of increased vegetation coverage was 74,683.05 km2. Furthermore, the total NPP increased by 8,010.73 Gg C·yr−1 (1 Gg = 109 g), of which the human activities, including grazing management measures (+6,809.40 Gg C·yr−1) and land conversion (45.72 Gg C·yr−1) contributed to 85.58% of the increase in NPP. Transformation from desert and farmland dominated grassland expansion and NPP increase, while urbanization and desertification caused large grassland reduction and NPP loss. The grazing management increased vegetation NPP in most regions except for some regions in the desert steppe and the farming-pastoral zone. Related policies should be further adjusted to strengthen the management of the desert steppe and farming-pastoral regions. © 2020, © 2020 The Author(s). Published by Taylor & Francis Group and Science Press on behalf of the Ecological Society of China. 59 | KW - anthropogenic/human activities affects 60 | KW - Grassland degradation 61 | KW - land use and land cover change 62 | KW - management measures and policies 63 | KW - net primary productivity (NPP) 64 | KW - Xilingol grassland 65 | PB - Taylor and Francis Ltd. 66 | N1 - Export Date: 30 April 2020 67 | M3 - Article 68 | DB - Scopus 69 | C7 - 1749010 70 | LA - English 71 | N1 - Correspondence Address: Wu, G.; State Key Laboratory of Urban and Regional Ecology, Research Center for Eco-Environmental Sciences, Chinese Academy of SciencesChina; email: wug@rcees.ac.cn 72 | N1 - Funding details: 2016YFC0503603, 2016YFC0501101 73 | N1 - Funding text 1: This work was supported primarily by the State Key Research Development Program of China (No. 2016YFC0501101), (No. 2016YFC0503603). 74 | UR - https://www.scopus.com/inward/record.uri?eid=2-s2.0-85083526403&doi=10.1080%2f20964129.2020.1749010&partnerID=40&md5=82da8427a9c2e5b22167c4428a08a62f 75 | ER - 76 | 77 | TY - JOUR 78 | TI - Application of Digital Processing in Relic Image Restoration Design 79 | T2 - Sensing and Imaging 80 | J2 - Sens. Imaging 81 | VL - 21 82 | IS - 1 83 | PY - 2020 84 | DO - 10.1007/s11220-019-0265-8 85 | SN - 15572064 (ISSN) 86 | AU - Tang, H. 87 | AU - Geng, G. 88 | AU - zhou, M. 89 | AD - School of Information and Technology, Northwest University, Xi’an, Shaanxi 710127, China 90 | AD - Xi’an University of Finance and Economics, Xi’an, Shaanxi 710100, China 91 | AD - College of Information Science and Technology, Beijing Normal University, Beijing, 100875, China 92 | AB - Cultural relic is the carrier of human historic culture, which can reflect the cultural and social environment, but cultural relics as a material will be damaged over time. Before the advent of computer technology, the damaged cultural relics would not be repaired due to cost. Computer vision technology has been applied to the restoration of cultural relics, mainly for the virtual restoration of damaged cultural relics images. This paper briefly introduced the Criminisi image restoration algorithm and the structure tensor used to improve the algorithm in the digital cultural relics image restoration. A damaged cultural relics image and a complete image which was damaged by human were repaired respectively using the classical Criminisi image restoration algorithm and the improved structure tensor based repair algorithm on MATLAB software. The results showed that the Criminisi image restoration algorithm could be used to repair the damaged images of ancient fabrics. It was found that the classical image restoration algorithm had some shortcomings, such as inappropriate texture structure, obvious repair marks and addition of redundant information, but the improved algorithm effectively avoided the above shortcomings. The peak signal to noise ratio (SNR) of the complete image which was damaged by human was compared objectively, and it was found that the improved algorithm had better restoration performance. © 2019, Springer Science+Business Media, LLC, part of Springer Nature. 93 | KW - Criminisi algorithm 94 | KW - Digital 95 | KW - Relic restoration 96 | KW - Structure tensor 97 | KW - Image enhancement 98 | KW - MATLAB 99 | KW - Restoration 100 | KW - Signal to noise ratio 101 | KW - Tensors 102 | KW - Textures 103 | KW - Computer technology 104 | KW - Computer vision technology 105 | KW - Digital 106 | KW - Image restoration algorithms 107 | KW - Improved structures 108 | KW - Peak signal to noise ratio 109 | KW - Structure tensors 110 | KW - Virtual restoration 111 | KW - Image reconstruction 112 | PB - Springer 113 | N1 - Export Date: 30 April 2020 114 | M3 - Article 115 | DB - Scopus 116 | C7 - 6 117 | LA - English 118 | N1 - Correspondence Address: Tang, H.; Xi’an University of Finance and EconomicsChina; email: huitangxa@yeah.net 119 | UR - https://www.scopus.com/inward/record.uri?eid=2-s2.0-85076498356&doi=10.1007%2fs11220-019-0265-8&partnerID=40&md5=25095ff13ed950e89470779e2d571d68 120 | ER - -------------------------------------------------------------------------------- /tests/testthat/testdata/WoS_ciw_example.ciw: -------------------------------------------------------------------------------- 1 | FN Clarivate Analytics Web of Science 2 | VR 1.0 3 | PT J 4 | AU Li, Wei 5 | Dou, Zhiguo 6 | Cui, Lijuan 7 | Zhao, Xinsheng 8 | Zhang, Manyin 9 | Zhang, Yan 10 | Gao, Changjun 11 | Yang, Zheng 12 | Lei, Yinru 13 | Pan, Xu 14 | TI Soil fauna diversity at different stages of reed restoration in a 15 | lakeshore wetland at Lake Taihu, China 16 | SO ECOSYSTEM HEALTH AND SUSTAINABILITY 17 | VL 6 18 | IS 1 19 | AR UNSP 1722034 20 | DI 10.1080/20964129.2020.1722034 21 | PD DEC 16 2020 22 | PY 2020 23 | AB Introduction: Wetland soil fauna support material cycling and 24 | restoration processes in wetland ecosystems. In our study, we observed 25 | variations in wetland soil fauna on the shores of Lake Taihu, China. We 26 | examined the relationships between fauna and major environmental 27 | factors, and looked at the short-and long-term changes in reed wetlands 28 | under restoration and in the natural reed lakeshore. Outcomes: We 29 | identified 93 groups of soil fauna in different wetlands and found 30 | significant differences in the lakeshore wetlands' soil fauna 31 | assemblages, depending on the length of the restoration period. By 32 | analyzing the soil fauna community evenness, dominance, number of taxa, 33 | and diversity, we found minimal seasonal variation in the soil fauna 34 | community diversity and abundance. The abundance of soil fauna in the 35 | sites under restoration decreased with depth below the soil surface. The 36 | reed restoration was obvious in the succession of the soil fauna groups 37 | in the long-term site. Although the restoration had an overall positive 38 | long-term effect on the soil fauna communities, there were no obvious 39 | short-term changes in the number of individuals. Conclusion: The study 40 | explored various potential measures to restore soil fauna in the Lake 41 | Taihu wetland and developed a theoretical basis for restoring the 42 | lakeshore wetland ecosystem. 43 | RI Zhang, Manyin/AAK-5171-2020; LI, WEI/; Dou, Zhiguo/ 44 | OI LI, WEI/0000-0002-2133-9287; Dou, Zhiguo/0000-0001-8031-4330 45 | ZB 0 46 | ZR 0 47 | ZS 0 48 | TC 0 49 | Z8 0 50 | Z9 0 51 | SN 2096-4129 52 | EI 2332-8878 53 | UT WOS:000514379300001 54 | ER 55 | 56 | PT J 57 | AU Higgins, Kathleen Marie 58 | TI Aesthetics and the Containment of Grief 59 | SO JOURNAL OF AESTHETICS AND ART CRITICISM 60 | VL 78 61 | IS 1 62 | BP 9 63 | EP 20 64 | DI 10.1111/jaac.12686 65 | PD DEC 2020 66 | PY 2020 67 | AB My point of departure is the observation that people ubiquitously turn 68 | to aesthetic practices in response to the loss of a loved one. I argue 69 | that profound loss catapults the bereaved person into an alternate 70 | "world" that differs in marked ways from the world we usually occupy, an 71 | alternate world lacking even the basic coherence we need to function. 72 | Aesthetic practices facilitate restoration of coherence to our 73 | experience, as well as reconnection with the social world and recovery 74 | from the breakdown that profound loss involves. While the aesthetic 75 | notion of closure is frequently invoked in connection with the needs of 76 | the bereaved, I suggest that while containing the emotions experienced 77 | in connection with loss is vital if they are to be processed, 78 | unrealistic aspirations toward closure can encourage expectations that 79 | harm the bereaved. By contrast, I suggest that the aims of aesthetically 80 | punctuating experience and communicating through aesthetic gestures are 81 | beneficial for helping the bereaved adjust to their new circumstances. 82 | TC 0 83 | ZS 0 84 | Z8 0 85 | ZR 0 86 | ZB 0 87 | Z9 0 88 | SN 0021-8529 89 | EI 1540-6245 90 | UT WOS:000510870600001 91 | ER 92 | 93 | PT J 94 | AU Tang, Hui 95 | Geng, Guohua 96 | Zhou, Mingquan 97 | TI Application of Digital Processing in Relic Image Restoration Design 98 | SO SENSING AND IMAGING 99 | VL 21 100 | IS 1 101 | AR 6 102 | DI 10.1007/s11220-019-0265-8 103 | PD DEC 2020 104 | PY 2020 105 | AB Cultural relic is the carrier of human historic culture, which can 106 | reflect the cultural and social environment, but cultural relics as a 107 | material will be damaged over time. Before the advent of computer 108 | technology, the damaged cultural relics would not be repaired due to 109 | cost. Computer vision technology has been applied to the restoration of 110 | cultural relics, mainly for the virtual restoration of damaged cultural 111 | relics images. This paper briefly introduced the Criminisi image 112 | restoration algorithm and the structure tensor used to improve the 113 | algorithm in the digital cultural relics image restoration. A damaged 114 | cultural relics image and a complete image which was damaged by human 115 | were repaired respectively using the classical Criminisi image 116 | restoration algorithm and the improved structure tensor based repair 117 | algorithm on MATLAB software. The results showed that the Criminisi 118 | image restoration algorithm could be used to repair the damaged images 119 | of ancient fabrics. It was found that the classical image restoration 120 | algorithm had some shortcomings, such as inappropriate texture 121 | structure, obvious repair marks and addition of redundant information, 122 | but the improved algorithm effectively avoided the above shortcomings. 123 | The peak signal to noise ratio (SNR) of the complete image which was 124 | damaged by human was compared objectively, and it was found that the 125 | improved algorithm had better restoration performance. 126 | Z8 0 127 | ZS 0 128 | TC 0 129 | ZB 0 130 | ZR 0 131 | Z9 0 132 | SN 1557-2064 133 | EI 1557-2072 134 | UT WOS:000502504100001 135 | ER 136 | 137 | PT J 138 | AU Zhang, Xinrong 139 | Zhang, Lei 140 | Chen, Ming 141 | Liu, Dongying 142 | TI miR-324-5p inhibits gallbladder carcinoma cell metastatic behaviours by 143 | downregulation of transforming growth factor beta 2 expression. 144 | SO Artificial cells, nanomedicine, and biotechnology 145 | VL 48 146 | IS 1 147 | BP 315 148 | EP 324 149 | DI 10.1080/21691401.2019.1703724 150 | PD 2020-Dec 151 | PY 2020 152 | AB Increasing studies have demonstrated that microRNAs (miRNAs) are 153 | associated with the metastasis of gallbladder carcinoma (GBC). Recently, 154 | miR-324-5p has been reported to be a tumour-suppressive miRNA in many 155 | types of malignant cancer. However, the biological function and 156 | molecular mechanism of miR-324-5p in GBC still remain largely unknown. 157 | Here, we found that miR-324-5p expression was notably down-regulated in 158 | both GBC tissues and cells compared with that in normal controls. 159 | Downregulated miR-324-5p expression was negatively associated with the 160 | status of local invasion and lymph node metastasis and predicted a poor 161 | prognosis in GBC patients. Further functional assays revealed that 162 | restoration of miR-324-5p significantly suppressed GBC cell migration, 163 | invasion and epithelial-mesenchymal transition (EMT) invitro and impeded 164 | the metastasis of GBC cells invivo. Moreover, RNA immunoprecipitation 165 | (RIP) and dual-luciferase reporter assay confirmed that the transforming 166 | growth factor beta 2 (TGFB2) was a direct target gene of miR-324-5p in 167 | GBC cells. Mechanically, small interfering RNA (siRNA)-mediated 168 | knockdown of TGFB2 partially phenocopied the inhibitory effects of 169 | miR-324-5p overexpression on GBC cell metastatic phenotypes. In summary, 170 | our findings demonstrated that miR-324-5p targets TGFB2 expression to 171 | inhibit GBC cell metastatic behaviors, and implying miR-324-5p as a 172 | potential biomarker for diagnostic and therapeutic strategies in GBC. 173 | ZB 0 174 | ZR 0 175 | Z8 0 176 | ZS 0 177 | TC 0 178 | Z9 0 179 | EI 2169-141X 180 | UT MEDLINE:31858815 181 | PM 31858815 182 | ER 183 | 184 | PT J 185 | AU Yu, Yijun 186 | Ma, Lan 187 | Zhang, He 188 | Sun, Weibin 189 | Zheng, Lichun 190 | Liu, Chao 191 | Miao, Leiying 192 | TI EPO could be regulated by HIF-1 and promote osteogenesis and accelerate 193 | bone repair. 194 | SO Artificial cells, nanomedicine, and biotechnology 195 | VL 48 196 | IS 1 197 | BP 206 198 | EP 217 199 | DI 10.1080/21691401.2019.1699827 200 | PD 2020-Dec 201 | PY 2020 202 | AB Bone defects caused by many factors prompt further study of pathological 203 | process and restoration methods. This study was aimed to clarify the 204 | effect of erythropoietin on the repair of bone defect. We added the 205 | designated concentration of rhEPO to endothelial progenitor cells and 206 | marrow stromal cells, then detected its osteogenic and angiogenesis 207 | effects. The results showed that rhEPO promoted the proliferation of EPC 208 | and ST2 by promoting the mitosis without affecting cell apoptosis. The 209 | protein and mRNA levels of angiogenesis and osteogenic related factors 210 | exhibited higher expressions. Additionally, rhEPO encapsulated in PLGA 211 | scaffolds accelerated the new bone formation in rat calvaria bone defect 212 | model. Since the centre of bone defect was hypoxia environment, we 213 | cultured EPC and ST2 under hypoxia. SiRNA and an inhibitor of HIF-1 were 214 | used to interfere HIF-1, then the following changes of VEGF and EPO were 215 | detected. The results showed that all the factors were upregulated under 216 | the hypoxia environment. The expression of VEGF at protein and mRNA 217 | level decreased as HIF-1 was inhibited or interfered from 6h, while the 218 | mRNA expression of EPO from 6h and changed significantly at protein 219 | level from 12h. Therefore, EPO is a promising factor for further 220 | studies. 221 | ZR 0 222 | ZS 0 223 | TC 0 224 | Z8 0 225 | ZB 0 226 | Z9 0 227 | EI 2169-141X 228 | UT MEDLINE:31851837 229 | PM 31851837 230 | ER 231 | 232 | EF -------------------------------------------------------------------------------- /tests/testthat/testdata/WoS_txt_example.txt: -------------------------------------------------------------------------------- 1 | FN Clarivate Analytics Web of Science 2 | VR 1.0 3 | PT J 4 | AU Li, Wei 5 | Dou, Zhiguo 6 | Cui, Lijuan 7 | Zhao, Xinsheng 8 | Zhang, Manyin 9 | Zhang, Yan 10 | Gao, Changjun 11 | Yang, Zheng 12 | Lei, Yinru 13 | Pan, Xu 14 | TI Soil fauna diversity at different stages of reed restoration in a 15 | lakeshore wetland at Lake Taihu, China 16 | SO ECOSYSTEM HEALTH AND SUSTAINABILITY 17 | VL 6 18 | IS 1 19 | AR UNSP 1722034 20 | DI 10.1080/20964129.2020.1722034 21 | PD DEC 16 2020 22 | PY 2020 23 | AB Introduction: Wetland soil fauna support material cycling and 24 | restoration processes in wetland ecosystems. In our study, we observed 25 | variations in wetland soil fauna on the shores of Lake Taihu, China. We 26 | examined the relationships between fauna and major environmental 27 | factors, and looked at the short-and long-term changes in reed wetlands 28 | under restoration and in the natural reed lakeshore. Outcomes: We 29 | identified 93 groups of soil fauna in different wetlands and found 30 | significant differences in the lakeshore wetlands' soil fauna 31 | assemblages, depending on the length of the restoration period. By 32 | analyzing the soil fauna community evenness, dominance, number of taxa, 33 | and diversity, we found minimal seasonal variation in the soil fauna 34 | community diversity and abundance. The abundance of soil fauna in the 35 | sites under restoration decreased with depth below the soil surface. The 36 | reed restoration was obvious in the succession of the soil fauna groups 37 | in the long-term site. Although the restoration had an overall positive 38 | long-term effect on the soil fauna communities, there were no obvious 39 | short-term changes in the number of individuals. Conclusion: The study 40 | explored various potential measures to restore soil fauna in the Lake 41 | Taihu wetland and developed a theoretical basis for restoring the 42 | lakeshore wetland ecosystem. 43 | RI Zhang, Manyin/AAK-5171-2020; LI, WEI/; Dou, Zhiguo/ 44 | OI LI, WEI/0000-0002-2133-9287; Dou, Zhiguo/0000-0001-8031-4330 45 | ZB 0 46 | ZR 0 47 | ZS 0 48 | TC 0 49 | Z8 0 50 | Z9 0 51 | SN 2096-4129 52 | EI 2332-8878 53 | UT WOS:000514379300001 54 | ER 55 | 56 | PT J 57 | AU Higgins, Kathleen Marie 58 | TI Aesthetics and the Containment of Grief 59 | SO JOURNAL OF AESTHETICS AND ART CRITICISM 60 | VL 78 61 | IS 1 62 | BP 9 63 | EP 20 64 | DI 10.1111/jaac.12686 65 | PD DEC 2020 66 | PY 2020 67 | AB My point of departure is the observation that people ubiquitously turn 68 | to aesthetic practices in response to the loss of a loved one. I argue 69 | that profound loss catapults the bereaved person into an alternate 70 | "world" that differs in marked ways from the world we usually occupy, an 71 | alternate world lacking even the basic coherence we need to function. 72 | Aesthetic practices facilitate restoration of coherence to our 73 | experience, as well as reconnection with the social world and recovery 74 | from the breakdown that profound loss involves. While the aesthetic 75 | notion of closure is frequently invoked in connection with the needs of 76 | the bereaved, I suggest that while containing the emotions experienced 77 | in connection with loss is vital if they are to be processed, 78 | unrealistic aspirations toward closure can encourage expectations that 79 | harm the bereaved. By contrast, I suggest that the aims of aesthetically 80 | punctuating experience and communicating through aesthetic gestures are 81 | beneficial for helping the bereaved adjust to their new circumstances. 82 | TC 0 83 | ZS 0 84 | Z8 0 85 | ZR 0 86 | ZB 0 87 | Z9 0 88 | SN 0021-8529 89 | EI 1540-6245 90 | UT WOS:000510870600001 91 | ER 92 | 93 | PT J 94 | AU Tang, Hui 95 | Geng, Guohua 96 | Zhou, Mingquan 97 | TI Application of Digital Processing in Relic Image Restoration Design 98 | SO SENSING AND IMAGING 99 | VL 21 100 | IS 1 101 | AR 6 102 | DI 10.1007/s11220-019-0265-8 103 | PD DEC 2020 104 | PY 2020 105 | AB Cultural relic is the carrier of human historic culture, which can 106 | reflect the cultural and social environment, but cultural relics as a 107 | material will be damaged over time. Before the advent of computer 108 | technology, the damaged cultural relics would not be repaired due to 109 | cost. Computer vision technology has been applied to the restoration of 110 | cultural relics, mainly for the virtual restoration of damaged cultural 111 | relics images. This paper briefly introduced the Criminisi image 112 | restoration algorithm and the structure tensor used to improve the 113 | algorithm in the digital cultural relics image restoration. A damaged 114 | cultural relics image and a complete image which was damaged by human 115 | were repaired respectively using the classical Criminisi image 116 | restoration algorithm and the improved structure tensor based repair 117 | algorithm on MATLAB software. The results showed that the Criminisi 118 | image restoration algorithm could be used to repair the damaged images 119 | of ancient fabrics. It was found that the classical image restoration 120 | algorithm had some shortcomings, such as inappropriate texture 121 | structure, obvious repair marks and addition of redundant information, 122 | but the improved algorithm effectively avoided the above shortcomings. 123 | The peak signal to noise ratio (SNR) of the complete image which was 124 | damaged by human was compared objectively, and it was found that the 125 | improved algorithm had better restoration performance. 126 | Z8 0 127 | ZS 0 128 | TC 0 129 | ZB 0 130 | ZR 0 131 | Z9 0 132 | SN 1557-2064 133 | EI 1557-2072 134 | UT WOS:000502504100001 135 | ER 136 | 137 | PT J 138 | AU Zhang, Xinrong 139 | Zhang, Lei 140 | Chen, Ming 141 | Liu, Dongying 142 | TI miR-324-5p inhibits gallbladder carcinoma cell metastatic behaviours by 143 | downregulation of transforming growth factor beta 2 expression. 144 | SO Artificial cells, nanomedicine, and biotechnology 145 | VL 48 146 | IS 1 147 | BP 315 148 | EP 324 149 | DI 10.1080/21691401.2019.1703724 150 | PD 2020-Dec 151 | PY 2020 152 | AB Increasing studies have demonstrated that microRNAs (miRNAs) are 153 | associated with the metastasis of gallbladder carcinoma (GBC). Recently, 154 | miR-324-5p has been reported to be a tumour-suppressive miRNA in many 155 | types of malignant cancer. However, the biological function and 156 | molecular mechanism of miR-324-5p in GBC still remain largely unknown. 157 | Here, we found that miR-324-5p expression was notably down-regulated in 158 | both GBC tissues and cells compared with that in normal controls. 159 | Downregulated miR-324-5p expression was negatively associated with the 160 | status of local invasion and lymph node metastasis and predicted a poor 161 | prognosis in GBC patients. Further functional assays revealed that 162 | restoration of miR-324-5p significantly suppressed GBC cell migration, 163 | invasion and epithelial-mesenchymal transition (EMT) invitro and impeded 164 | the metastasis of GBC cells invivo. Moreover, RNA immunoprecipitation 165 | (RIP) and dual-luciferase reporter assay confirmed that the transforming 166 | growth factor beta 2 (TGFB2) was a direct target gene of miR-324-5p in 167 | GBC cells. Mechanically, small interfering RNA (siRNA)-mediated 168 | knockdown of TGFB2 partially phenocopied the inhibitory effects of 169 | miR-324-5p overexpression on GBC cell metastatic phenotypes. In summary, 170 | our findings demonstrated that miR-324-5p targets TGFB2 expression to 171 | inhibit GBC cell metastatic behaviors, and implying miR-324-5p as a 172 | potential biomarker for diagnostic and therapeutic strategies in GBC. 173 | ZB 0 174 | ZR 0 175 | Z8 0 176 | ZS 0 177 | TC 0 178 | Z9 0 179 | EI 2169-141X 180 | UT MEDLINE:31858815 181 | PM 31858815 182 | ER 183 | 184 | EF -------------------------------------------------------------------------------- /tests/testthat/testdata/eviatlas.txt: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | AU - Haddaway, Neal R. 3 | AU - Feierman, Andrew 4 | AU - Grainger, Matthew J. 5 | AU - Gray, Charles T. 6 | AU - Tanriver-Ayder, Ezgi 7 | AU - Dhaubanjar, Sanita 8 | AU - Westgate, Martin J. 9 | PY - 2019 10 | DA - 2019/06/04 11 | TI - EviAtlas: a tool for visualising evidence synthesis databases 12 | JO - Environmental Evidence 13 | SP - 22 14 | VL - 8 15 | IS - 1 16 | SN - 2047-2382 17 | UR - https://doi.org/10.1186/s13750-019-0167-1 18 | DO - 10.1186/s13750-019-0167-1 19 | ID - Haddaway2019 20 | ER - 21 | -------------------------------------------------------------------------------- /tests/testthat/testdata/litsearchr.txt: -------------------------------------------------------------------------------- 1 | @article{grames2019automated, 2 | title={An automated approach to identifying search terms for systematic reviews using keyword co-occurrence networks}, 3 | author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S}, 4 | journal={Methods in Ecology and Evolution}, 5 | volume={10}, 6 | number={10}, 7 | pages={1645--1654}, 8 | year={2019}, 9 | publisher={Wiley Online Library} 10 | } 11 | -------------------------------------------------------------------------------- /tests/testthat/testdata/res_synth_methods.txt: -------------------------------------------------------------------------------- 1 | 2 | PMID- 32336025 3 | OWN - NLM 4 | STAT- Publisher 5 | LR - 20200426 6 | IS - 1759-2887 (Electronic) 7 | IS - 1759-2879 (Linking) 8 | DP - 2020 Apr 26 9 | TI - Risk-Of-Bias VISualization (robvis): an R package and Shiny web app for 10 | visualizing risk-of-bias assessments. 11 | LID - 10.1002/jrsm.1411 [doi] 12 | AB - Despite a major increase in the range and number of software offerings now 13 | available to help researchers produce evidence syntheses, there is currently no 14 | generic tool for producing figures to display and explore the risk-of-bias 15 | assessments that routinely take place as part of systematic review. However, 16 | tools such as the R programming environment and Shiny (an R package for building 17 | interactive web apps) have made it straightforward to produce new tools to help 18 | in producing evidence syntheses. We present a new tool, robvis (Risk-Of-Bias 19 | VISualization), available as an R package and web app, which facilitates rapid 20 | production of publication-quality risk-of-bias assessment figures. We present a 21 | timeline of the tool's development and its key functionality. This article is 22 | protected by copyright. All rights reserved. 23 | CI - This article is protected by copyright. All rights reserved. 24 | FAU - McGuinness, Luke A 25 | AU - McGuinness LA 26 | AUID- ORCID: https://orcid.org/0000-0001-8730-9761 27 | AD - MRC Integrative Epidemiology Unit at the University of Bristol, Bristol, UK. 28 | AD - Population Health Sciences, Bristol Medical School, University of Bristol, 29 | Bristol, UK. 30 | FAU - Higgins, Julian Pt 31 | AU - Higgins JP 32 | AD - MRC Integrative Epidemiology Unit at the University of Bristol, Bristol, UK. 33 | AD - Population Health Sciences, Bristol Medical School, University of Bristol, 34 | Bristol, UK. 35 | LA - eng 36 | PT - Journal Article 37 | DEP - 20200426 38 | PL - England 39 | TA - Res Synth Methods 40 | JT - Research synthesis methods 41 | JID - 101543738 42 | SB - IM 43 | OTO - NOTNLM 44 | OT - Data visualization 45 | OT - Evidence synthesis 46 | OT - R 47 | OT - Risk of bias 48 | EDAT- 2020/04/27 06:00 49 | MHDA- 2020/04/27 06:00 50 | CRDT- 2020/04/27 06:00 51 | PHST- 2020/02/27 00:00 [received] 52 | PHST- 2020/04/16 00:00 [revised] 53 | PHST- 2020/04/18 00:00 [accepted] 54 | PHST- 2020/04/27 06:00 [entrez] 55 | PHST- 2020/04/27 06:00 [pubmed] 56 | PHST- 2020/04/27 06:00 [medline] 57 | AID - 10.1002/jrsm.1411 [doi] 58 | PST - aheadofprint 59 | SO - Res Synth Methods. 2020 Apr 26. doi: 10.1002/jrsm.1411. 60 | 61 | PMID- 31355546 62 | OWN - NLM 63 | STAT- In-Process 64 | LR - 20200226 65 | IS - 1759-2887 (Electronic) 66 | IS - 1759-2879 (Linking) 67 | VI - 10 68 | IP - 4 69 | DP - 2019 Dec 70 | TI - revtools: An R package to support article screening for evidence synthesis. 71 | PG - 606-614 72 | LID - 10.1002/jrsm.1374 [doi] 73 | AB - The field of evidence synthesis is growing rapidly, with a corresponding increase 74 | in the number of software tools and workflows to support the construction of 75 | systematic reviews, systematic maps, and meta-analyses. Despite much progress, 76 | however, a number of problems remain, including slow integration of new 77 | statistical or methodological approaches into user-friendly software, low 78 | prevalence of open-source software, and poor integration among distinct software 79 | tools. These issues hinder the utility and transparency of new methods to the 80 | research community. Here, I present revtools, an R package to support article 81 | screening during evidence synthesis projects. It provides tools for the import 82 | and deduplication of bibliographic data, screening of articles by title or 83 | abstract, and visualization of article content using topic models. The software 84 | is entirely open-source and combines command-line scripting for experienced 85 | programmers with custom-built user interfaces for casual users, with further 86 | methods to support article screening to be added over time. revtools provides 87 | free access to novel methods in an open-source environment and represents a 88 | valuable step in expanding the capacity of R to support evidence synthesis 89 | projects. 90 | CI - (c) 2019 John Wiley & Sons, Ltd. 91 | FAU - Westgate, Martin J 92 | AU - Westgate MJ 93 | AUID- ORCID: https://orcid.org/0000-0003-0854-2034 94 | AD - Fenner School of Environment & Society, The Australian National University, 95 | Acton, ACT, Australia. 96 | LA - eng 97 | PT - Journal Article 98 | DEP - 20191018 99 | PL - England 100 | TA - Res Synth Methods 101 | JT - Research synthesis methods 102 | JID - 101543738 103 | SB - IM 104 | OTO - NOTNLM 105 | OT - data visualization 106 | OT - meta-analysis 107 | OT - natural language processing 108 | OT - systematic review 109 | OT - topic models 110 | EDAT- 2019/07/30 06:00 111 | MHDA- 2019/07/30 06:00 112 | CRDT- 2019/07/30 06:00 113 | PHST- 2019/02/25 00:00 [received] 114 | PHST- 2019/06/12 00:00 [revised] 115 | PHST- 2019/07/23 00:00 [accepted] 116 | PHST- 2019/07/30 06:00 [pubmed] 117 | PHST- 2019/07/30 06:00 [medline] 118 | PHST- 2019/07/30 06:00 [entrez] 119 | AID - 10.1002/jrsm.1374 [doi] 120 | PST - ppublish 121 | SO - Res Synth Methods. 2019 Dec;10(4):606-614. doi: 10.1002/jrsm.1374. Epub 2019 Oct 122 | 18. 123 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/overview.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Overview" 3 | author: Martin Westgate & Eliza Grames 4 | date: 2025-02-21 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Overview} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r, include = FALSE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | ## Introduction 20 | 21 | Systematic review searches include multiple databases that export results in a 22 | variety of formats with overlap in coverage between databases. To streamline the 23 | process of importing, assembling, and deduplicating results, `synthesisr` 24 | recognizes bibliographic files exported from databases commonly used for 25 | systematic reviews and merges results into a standardized format. 26 | 27 | ## Read and assemble bibliographic files 28 | 29 | `synthesisr` can read any BibTex or RIS formatted bibliographic data files. It 30 | detects whether files are more bib-like or ris-like and imports them 31 | accordingly. Note that files from some databases may contain non-standard fields 32 | or non-standard characters that cause import failure in rare cases; if this 33 | happens, we recommend converting the file in open source bibliographic 34 | management software such as Zotero. 35 | 36 | In the code below, we will demonstrate how to read and assemble bibliographic 37 | data files with example datasets included in the `synthesisr` package. Note that 38 | if you are using the code with your own data, you will not need to use 39 | `system.file()` and instead will want to pass a character vector of the path(s) 40 | to the file(s) you want to import. For example, if you have saved all your 41 | search results in a directory called "search_results", you may want to use 42 | `list.files("./search_results/")` instead. 43 | 44 | ```{r} 45 | #| eval: false 46 | # system.file will look for the path to where synthesisr is installed 47 | # by using the example bibliographic data files, you can reproduce the vignette 48 | bibfiles <- list.files( 49 | system.file("extdata/", package = "synthesisr"), 50 | full.names = TRUE 51 | ) 52 | 53 | # we can print the list of bibfiles to confirm what we will import 54 | # in this example, we have bibliographic data exported from Scopus and Zoological Record 55 | print(bibfiles) 56 | 57 | # now we can use read_refs to read in our bibliographic data files 58 | # we save them to a data.frame object (because return_df=TRUE) called imported_files 59 | library(synthesisr) 60 | imported_files <- read_refs( 61 | filename = bibfiles, 62 | return_df = TRUE) 63 | 64 | ``` 65 | 66 | ## Deduplicate bibliographic data 67 | 68 | Many journals are indexed in multiple databases, so searching across databases 69 | will retrieve duplicates. After import, `synthesisr` can detect duplicates and 70 | retain only unique bibliographic records using a variety of methods such as 71 | string distance or fuzzy matching records. A good place to start is removing 72 | articles that have identical titles, especially since this reduces computational 73 | time for more sophisticated deduplication methods. 74 | 75 | ```{r} 76 | #| eval: false 77 | ## first, we will remove articles that have identical titles 78 | ## this is a fairly conservative approach, so we will remove them without review 79 | # df <- deduplicate( 80 | # imported_files, 81 | # match_by = "title", 82 | # method = "exact" 83 | # ) 84 | 85 | ``` 86 | 87 | In some cases, it may be useful to know which articles were identified as 88 | duplicates so they can be manually reviewed or so that information from two 89 | records can be merged. Using our partially-deduplicated dataset, we check a few 90 | titles and use string distance methods to find additional duplicate articles in 91 | the code below and then remove them by extracting unique references. Although 92 | here we only use one secondary deduplication method (string distance), we could 93 | look for additional duplicates based on fuzzy matching abstracts, for example. 94 | 95 | ## NOTE: the examples below don't match now; need updating 96 | 97 | ```{r} 98 | #| eval: false 99 | # there are still some duplicate articles that were not removed 100 | # for example, the titles for articles 91 and 114 appear identical 101 | ## df$title[c(91,114)] 102 | # the dash-like symbol in title 91, however, is a special character not punctuation 103 | # so it was not classified as identical 104 | 105 | # similarly, there is a missing space in the title for article 96 106 | ## df$title[c(21,96)] 107 | 108 | # and an extra space in title 47 109 | ## df$title[c(47, 101)] 110 | 111 | # # in this example, we will use string distance to identify likely duplicates 112 | # duplicates_string <- find_duplicates( 113 | # df$title, 114 | # method = "string_osa", 115 | # to_lower = TRUE, 116 | # rm_punctuation = TRUE, 117 | # threshold = 7 118 | # ) 119 | 120 | # we can extract the line numbers from the dataset that are likely duplicated 121 | # this lets us manually review those titles to confirm they are duplicates 122 | 123 | # manual_checks <- review_duplicates(df$title, duplicates_string) 124 | 125 | ``` 126 | 127 | ```{r, include=FALSE, eval=TRUE} 128 | #| eval: false 129 | # manual_checks[,1] <- substring(manual_checks[,1], 1, 60) 130 | # 131 | # print(manual_checks[1:10, ]) 132 | ``` 133 | 134 | 135 | ```{r} 136 | #| eval: false 137 | # # the titles under match #99 are not duplicates, so we need to keep them both 138 | # # we can use the override_duplicates function to manually mark them as unique 139 | # new_duplicates <- synthesisr::override_duplicates(duplicates_string, 99) 140 | # 141 | # # now we can extract unique references from our dataset 142 | # # we need to pass it the dataset (df) and the matching articles (new_duplicates) 143 | # results <- extract_unique_references(df, new_duplicates) 144 | 145 | ``` 146 | 147 | ## Write bibliographic files 148 | 149 | To facilitate exporting results to other platforms after assembly and 150 | deduplication, `synthesisr` can write bibliographic data to .ris or .bib files. 151 | Optionally, `write_refs()` can write directly to a text file stored locally. 152 | 153 | ```{r} 154 | #| paged.print: TRUE 155 | #| eval: false 156 | # # synthesisr can write the full dataset to a bibliographic file 157 | # # but in this example, we will just write the first citation 158 | # # we also want it to be a nice clean bibliographic file, so we remove NA data 159 | # # this makes it easier to view the output when working with a single article 160 | # citation <- df[1, !is.na(df[1,])] 161 | # 162 | # format_citation(citation) 163 | # 164 | # write_refs(citation, 165 | # format = "bib", 166 | # file = FALSE 167 | # ) 168 | 169 | ``` 170 | --------------------------------------------------------------------------------