├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   └── pkgdown.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── add_line_breaks.R
    ├── bibliography_functions.R
    ├── clean_functions.R
    ├── code_lookup.R
    ├── deduplication_functions.R
    ├── deprecated.R
    ├── detect_functions.R
    ├── format_citation.R
    ├── fuzz_functions.R
    ├── parse_bibtex.R
    ├── parse_csv_tsv.R
    ├── parse_pubmed.R
    ├── parse_ris.R
    ├── prep_ris.R
    ├── read_refs.R
    ├── reexports.R
    ├── string_functions.R
    ├── synthesisr-package.R
    └── write_refs.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── data
    └── code_lookup.RData
├── inst
    ├── examples
    │   ├── clean_.R
    │   ├── deduplicate.R
    │   ├── detect_.R
    │   ├── format_citation.R
    │   ├── fuzzdist.R
    │   ├── merge_columns.R
    │   ├── parse_.R
    │   └── read_refs.R
    ├── extdata
    │   ├── scopus.ris
    │   └── zoorec.txt
    ├── hex
    │   ├── Space_Mono
    │   │   ├── OFL.txt
    │   │   ├── SpaceMono-Bold.ttf
    │   │   └── SpaceMono-Regular.ttf
    │   └── hex.R
    ├── ris_tags
    │   └── code_lookup.csv
    └── test-data
    │   └── test_files.R
├── man
    ├── add_line_breaks.Rd
    ├── bibliography-class.Rd
    ├── clean_.Rd
    ├── code_lookup.Rd
    ├── deduplicate.Rd
    ├── detect_.Rd
    ├── extract_unique_references.Rd
    ├── figures
    │   └── logo.png
    ├── find_duplicates.Rd
    ├── format_citation.Rd
    ├── fuzz_.Rd
    ├── merge_columns.Rd
    ├── override_duplicates.Rd
    ├── parse_.Rd
    ├── read_refs.Rd
    ├── reexports.Rd
    ├── review_duplicates.Rd
    ├── string_.Rd
    ├── synthesisr-package.Rd
    └── write_refs.Rd
├── pkgdown
    ├── extra.css
    └── favicon
    │   ├── apple-touch-icon.png
    │   ├── favicon-96x96.png
    │   ├── favicon.ico
    │   ├── favicon.svg
    │   ├── site.webmanifest
    │   ├── web-app-manifest-192x192.png
    │   └── web-app-manifest-512x512.png
├── synthesisr.Rproj
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-clean.R
    │   ├── test-deduplicate.R
    │   ├── test-detect.R
    │   ├── test-format_citation.R
    │   ├── test-fuzz_functions.R
    │   ├── test-merge_columns.R
    │   ├── test-read_write.R
    │   ├── test-write.R
    │   └── testdata
    │       ├── ASP_ris_example.ris
    │       ├── Ovid_ris_example.ris
    │       ├── PubMed_example.txt
    │       ├── Scopus_bib_example.bib
    │       ├── Scopus_ris_example.ris
    │       ├── WoS_ciw_example.ciw
    │       ├── WoS_txt_example.txt
    │       ├── citesource_issue_24.ris
    │       ├── eviatlas.txt
    │       ├── litsearchr.txt
    │       └── res_synth_methods.txt
└── vignettes
    ├── .gitignore
    └── overview.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | synthesisr_hex.png
 2 | synthesisr.Rproj
 3 | ./inst/ris_tags
 4 | README.md
 5 | ^doc$
 6 | ^Meta$
 7 | ^.*\.Rproj$
 8 | ^\.Rproj\.user$
 9 | ^_pkgdown\.yml$
10 | ^docs$
11 | ^pkgdown$
12 | ^\.github$
13 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |   release:
 8 |     types: [published]
 9 |   workflow_dispatch:
10 | 
11 | name: pkgdown.yaml
12 | 
13 | permissions: read-all
14 | 
15 | jobs:
16 |   pkgdown:
17 |     runs-on: ubuntu-latest
18 |     # Only restrict concurrency for non-PR jobs
19 |     concurrency:
20 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
21 |     env:
22 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
23 |     permissions:
24 |       contents: write
25 |     steps:
26 |       - uses: actions/checkout@v4
27 | 
28 |       - uses: r-lib/actions/setup-pandoc@v2
29 | 
30 |       - uses: r-lib/actions/setup-r@v2
31 |         with:
32 |           use-public-rspm: true
33 | 
34 |       - uses: r-lib/actions/setup-r-dependencies@v2
35 |         with:
36 |           extra-packages: any::pkgdown, local::.
37 |           needs: website
38 | 
39 |       - name: Build site
40 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
41 |         shell: Rscript {0}
42 | 
43 |       - name: Deploy to GitHub pages 🚀
44 |         if: github.event_name != 'pull_request'
45 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
46 |         with:
47 |           clean: false
48 |           branch: gh-pages
49 |           folder: docs
50 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .Rapp.history
 3 | .Rproj.user
 4 | .Rhistory
 5 | .RData
 6 | .Ruserdata
 7 | media/
 8 | doc
 9 | Meta
10 | docs/*
11 | /doc/
12 | /Meta/
13 | docs
14 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: synthesisr
 2 | Type: Package
 3 | Title: Import, Assemble, and Deduplicate Bibliographic Datasets
 4 | Version: 0.3.0
 5 | Authors@R: c(
 6 |   person(
 7 |     given = "Martin",
 8 |     family = "Westgate",
 9 |     role = c("aut", "cre"),
10 |     email = "martinjwestgate@gmail.com",
11 |     comment = c(ORCID = "0000-0003-0854-2034")),
12 |   person(
13 |     given = "Eliza",
14 |     family = "Grames",
15 |     role = c("aut"),
16 |     email = "eliza.grames@uconn.edu",
17 |     comment = c(ORCID = "0000-0003-1743-6815")))
18 | Description: A critical first step in systematic literature reviews
19 |   and mining of academic texts is to identify relevant texts from a range
20 |   of sources, particularly databases such as 'Web of Science' or 'Scopus'.
21 |   These databases often export in different formats or with different metadata
22 |   tags. 'synthesisr' expands on the tools outlined by Westgate (2019)
23 |   <doi:10.1002/jrsm.1374> to import bibliographic data from a range of formats
24 |   (such as 'bibtex', 'ris', or 'ciw') in a standard way, and allows merging
25 |   and deduplication of the resulting dataset.
26 | Depends: R (>= 4.0.0)
27 | Imports:
28 |     dplyr,
29 |     purrr,
30 |     rlang,
31 |     stringdist,
32 |     tibble,
33 |     unglue,
34 |     vroom
35 | Suggests: 
36 |     knitr, 
37 |     rmarkdown, 
38 |     testthat
39 | Date: 2023-06-07
40 | License: GPL-3
41 | URL: https://martinwestgate.com/synthesisr/
42 | LazyData: true
43 | RoxygenNote: 7.3.2
44 | VignetteBuilder: knitr
45 | Encoding: UTF-8
46 | Roxygen: list(markdown = TRUE)
47 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method("[",bibliography)
 4 | S3method(as.data.frame,bibliography)
 5 | S3method(as_tibble,bibliography)
 6 | S3method(c,bibliography)
 7 | S3method(print,bibliography)
 8 | S3method(summary,bibliography)
 9 | export(add_line_breaks)
10 | export(as.bibliography)
11 | export(as_tibble)
12 | export(clean_authors)
13 | export(clean_colnames)
14 | export(clean_df)
15 | export(deduplicate)
16 | export(detect_delimiter)
17 | export(detect_lookup)
18 | export(detect_parser)
19 | export(detect_year)
20 | export(extract_unique_references)
21 | export(find_duplicates)
22 | export(format_citation)
23 | export(fuzz_m_ratio)
24 | export(fuzz_partial_ratio)
25 | export(fuzz_token_set_ratio)
26 | export(fuzz_token_sort_ratio)
27 | export(fuzzdist)
28 | export(merge_columns)
29 | export(override_duplicates)
30 | export(parse_bibtex)
31 | export(parse_csv)
32 | export(parse_pubmed)
33 | export(parse_ris)
34 | export(parse_tsv)
35 | export(read_refs)
36 | export(review_duplicates)
37 | export(string_cosine)
38 | export(string_dl)
39 | export(string_hamming)
40 | export(string_jaccard)
41 | export(string_jw)
42 | export(string_lcs)
43 | export(string_lv)
44 | export(string_osa)
45 | export(string_qgram)
46 | export(string_soundex)
47 | export(write_bib)
48 | export(write_refs)
49 | export(write_ris)
50 | importFrom(dplyr,bind_rows)
51 | importFrom(purrr,list_transpose)
52 | importFrom(rlang,abort)
53 | importFrom(rlang,warn)
54 | importFrom(stringdist,stringdist)
55 | importFrom(tibble,as_tibble)
56 | importFrom(tibble,tibble)
57 | importFrom(unglue,unglue_data)
58 | importFrom(vroom,default_locale)
59 | importFrom(vroom,vroom_lines)
60 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # synthesisr 0.3.0
2 | 
3 | This is a minor rebuild to meet modern data standards - mainly supporting 
4 | tibbles rather than data.frames as a default
5 | 


--------------------------------------------------------------------------------
/R/add_line_breaks.R:
--------------------------------------------------------------------------------
 1 | #' Add line breaks to one or more strings
 2 | #'
 3 | #' This function takes a vector of strings and adds line breaks
 4 | #' every n characters. Primarily built to be called internally by
 5 | #' `format_citation()`, this function has been made available as it can be
 6 | #' useful in other contexts.
 7 | #' @param x Either a string or a vector; if the vector is not of class character
 8 | #' if will be coerced to one using `as.character()`.
 9 | #' @param n Numeric: The desired number of characters that should separate
10 | #' consecutive line breaks.
11 | #' @param html Logical: Should the line breaks be specified in html?
12 | #' @param max_n DEPRECATED: If provided will currently overwrite `n`; otherwise
13 | #' synonymous with `n` and will be removed from future versions.
14 | #' @param max_time DEPRECATED: Previously the maximum amount of time (in
15 | #' seconds) allowed to adjust groups until character thresholds are reached.
16 | #' Ignored.
17 | #' @details Line breaks are only added between words, so the value of n is
18 | #' actually a threshold value rather than being matched exactly.
19 | #' @return Returns the input vector unaltered except for the addition of line
20 | #' breaks.
21 | #' @importFrom rlang abort
22 | #' @examples add_line_breaks(c("On the Origin of Species"), n = 10)
23 | #' @export
24 | add_line_breaks <- function(x,
25 |                             n = 50,
26 |                             max_n = NULL,
27 |                             html = FALSE,
28 |                             max_time = NULL
29 |                             ){
30 |   if(!is.null(max_n)){
31 |     n <- max_n
32 |   }
33 | 
34 |   if(html){
35 |     break_string <- "<br>"
36 |   }else{
37 |     break_string <- "\n"
38 |   }
39 |   split_text <- strsplit(as.character(x), " ")
40 |   out_list <- lapply(split_text, function(a){
41 |     if(length(a) == 0){
42 |       return("")
43 |     }else{
44 |       result <- data.frame(
45 |         text = a,
46 |         nchars = nchar(a, allowNA = TRUE, keepNA = TRUE) + 1,
47 |         stringsAsFactors = FALSE
48 |       )
49 |       if(any(is.na(result$nchars))){
50 |         result$nchars[which(is.na(result$nchars))] <- 2
51 |       }
52 | 
53 |       result$group <- cumulative_assign(result$nchars, n)
54 |       result_list <- lapply(split(result$text, result$group),
55 |                             function(a){paste(a, collapse = " ")})
56 |       result <- paste(unlist(result_list), collapse = break_string)
57 |       return(result)
58 |     }
59 |   })
60 |   return(unlist(out_list))
61 | }
62 | 
63 | #' Internal function to assign words to groups
64 | #'
65 | #' Functions by taking vector of string lengths, and iteratively assigning to
66 | #' groups within a while loop
67 | #' @param x is nchar() of a character vector + 1
68 | #' @param n is the maximum line length allowed
69 | #' @noRd
70 | #' @keywords Internal
71 | cumulative_assign <- function(x, n){
72 |   result_vec <- vector(mode = "integer", length = length(x))
73 |   window_size <- round(n / mean(x) * 2, 0) # this may be too large
74 |   group_value <- 1
75 |   while(any(result_vec < 1)){
76 |     available_rows <- which(result_vec < 1)
77 |     window_tr <- min(c(window_size, length(available_rows)))
78 |     vec_tr <- x[available_rows[seq_len(window_tr)]]
79 |     keep_rows <- which(cumsum(vec_tr) < n)
80 |     result_vec[available_rows[keep_rows]] <- group_value
81 |     group_value <- group_value + 1
82 |   }
83 |   result_vec
84 | }
85 | 


--------------------------------------------------------------------------------
/R/bibliography_functions.R:
--------------------------------------------------------------------------------
  1 | #' Methods for class bibliography
  2 | #'
  3 | #' @title bibliography-class
  4 | #' @description This is a small number of standard methods for interacting with class 'bibliography'. More may be added later.
  5 | #' @param x An object of class 'bibliography'
  6 | #' @param object An object of class 'bibliography'
  7 | #' @param n Number of items to select/print
  8 | #' @param ... Any further information
  9 | #' @aliases summary.bibliography, print.bibliography, c.bibliography, as.data.frame.bibliography
 10 | #' @name bibliography-class
 11 | #' @export
 12 | summary.bibliography <- function(object, ...){
 13 | 
 14 |   # are any abstracts completely missing?
 15 |   null_check <- unlist(lapply(
 16 |     object,
 17 |     function(a){is.null(a$abstract)}
 18 |   ))
 19 |   null_count <- length(object) - length(which(null_check))
 20 |   null_percent <- round((100/length(object)) * null_count, 1)
 21 | 
 22 |   # how many sources?
 23 |   sources <- unlist(lapply(
 24 |     object,
 25 |     function(a){a$journal}
 26 |   ))
 27 |   if(!is.null(sources)){
 28 |     n_sources <- length(unique(sources))
 29 |     source_freq <- sort(
 30 |       xtabs(~ sources),
 31 |       decreasing = TRUE
 32 |     )[seq_len(min(c(5, n_sources)))]
 33 |     # put text together
 34 |     result <- paste(
 35 |       paste0(
 36 |         "Object of class 'bibliography' containing ",
 37 |         length(object),
 38 |         " entries.",
 39 |         "\n  ",
 40 |         "Number containing abstracts: ",
 41 |         null_count,
 42 |         " (",
 43 |         null_percent,
 44 |         "%)",
 45 |         "\n",
 46 |         "Number of sources: ",
 47 |         n_sources,
 48 |         "\n",
 49 |         "Most common sources:",
 50 |         "\n  "
 51 |       ),
 52 |       paste(
 53 |         names(source_freq),
 54 |         " (n = ",
 55 |         as.numeric(source_freq),
 56 |         ")",
 57 |         sep = "",
 58 |         collapse = "\n  "
 59 |       ),
 60 |       sep = "",
 61 |       collapse = "\n")
 62 |   }else{
 63 |     result <- paste0(
 64 |       "Object of class 'bibliography' containing ",
 65 |       length(object),
 66 |       " entries.",
 67 |       "\n  ",
 68 |       "Number containing abstracts: ",
 69 |       null_count,
 70 |       " (",
 71 |       null_percent,
 72 |       "%)",
 73 |       "\n"
 74 |     )
 75 |   }
 76 |   cat(result, sep = "\n")
 77 | }
 78 | 
 79 | #' @rdname bibliography-class
 80 | #' @export
 81 | print.bibliography <- function(x, n, ...){
 82 |   length_tr <- length(x)
 83 |   if(missing(n)){
 84 |     n <- min(c(length_tr, 5))
 85 |   }else{
 86 |     if(n > length_tr){
 87 |       n <- length_tr
 88 |     }
 89 |   }
 90 |   text_tr <- format_citation(x[seq_len(n)])
 91 |   cat(paste(unlist(text_tr), collapse = "\n\n"))
 92 | }
 93 | 
 94 | #' @rdname bibliography-class
 95 | #' @importFrom rlang abort
 96 | #' @export
 97 | '[.bibliography' <- function(x, n){
 98 |   class(x) <- "list"
 99 |   if(all(n %in% seq_len(length(x))) == FALSE){
100 |     abort("subset out of bounds")
101 |   }
102 |   z <- x[n]
103 |   class(z) <- "bibliography"
104 |   return(z)
105 | }
106 | 
107 | #' @rdname bibliography-class
108 | #' @export
109 | c.bibliography <- function(...){
110 |   result <- lapply(list(...), function(a){
111 |     class(a) <- "list"
112 |     return(a)
113 |   })
114 |   result <- do.call(c, result)
115 |   class(result) <- "bibliography"
116 |   return(result)
117 | }
118 | 
119 | #' @rdname bibliography-class
120 | #' @export
121 | as.data.frame.bibliography <- function(x, ...){
122 | 
123 |   cols <- unique(unlist(lapply(x, names)))
124 |   # cols <- cols[which(cols != "further_info")]
125 | 
126 |   x_list <- lapply(x, function(a, cols){
127 |     result <- lapply(cols, function(b, lookup){
128 |       if(any(names(lookup) == b)){
129 |         data_tr <- lookup[[b]]
130 |         if(length(data_tr) > 1){
131 |           data_tr <- paste0(data_tr, collapse = " and ")
132 |         }
133 |         return(data_tr)
134 |       }else{
135 |         return(NA)
136 |       }
137 |     },
138 |     lookup = a)
139 |     names(result) <- cols
140 |     return(
141 |       as.data.frame(
142 |         result,
143 |         stringsAsFactors=FALSE
144 |       )
145 |     )
146 |     },
147 |     cols = cols
148 |   )
149 | 
150 |   x_dframe <- data.frame(
151 |     do.call(rbind, x_list),
152 |     stringsAsFactors = FALSE
153 |   )
154 |   rownames(x_dframe) <- NULL
155 | 
156 |   return(x_dframe)
157 | }
158 | 
159 | 
160 | #' @rdname bibliography-class
161 | #' @importFrom rlang abort
162 | #' @export
163 | as.bibliography <- function(x, ...){
164 | 
165 |   if(!inherits(x, "data.frame")){
166 |     abort("as.bibliography can only be called for objects of class 'data.frame'")
167 |   }
168 | 
169 |   x_list <- lapply(
170 |     split(x, seq_len(nrow(x))),
171 |     function(a){
172 |       a <- as.list(a)
173 |       if(any(names(a) == "author")){
174 |         a$author <- strsplit(a$author, " and ")[[1]]
175 |       }
176 |       if(any(names(a) == "keywords")){
177 |         a$keywords <- strsplit(a$keywords, " and ")[[1]]
178 |       }
179 |       return(a)
180 |     }
181 |   )
182 |   names(x_list) <- seq_len(nrow(x))
183 |   class(x_list) <- "bibliography"
184 |   return(x_list)
185 | }
186 | 
187 | #' @rdname bibliography-class
188 | #' @param .rows currently ignored
189 | #' @param .name_repair currently ignored
190 | #' @param rownames currently ignored
191 | #' @importFrom purrr list_transpose
192 | #' @importFrom tibble as_tibble
193 | #' @export
194 | as_tibble.bibliography <- function(x,
195 |                                    ...,
196 |                                    .rows,
197 |                                    .name_repair,
198 |                                    rownames){
199 |   class(x) <- "list"
200 |   as_tibble(list_transpose(x))
201 | }
202 | 


--------------------------------------------------------------------------------
/R/clean_functions.R:
--------------------------------------------------------------------------------
 1 | #' Clean a `tibble` or vector
 2 | #'
 3 | #' Cleans column and author names
 4 | #' @param data A `tibble` with bibliographic information.
 5 | #' @param x A vector of strings
 6 | #' @return Returns the input, but cleaner.
 7 | #' @example inst/examples/clean_.R
 8 | #' @name clean_
 9 | #' @export
10 | clean_df <- function(data){
11 |   colnames(data) <- clean_colnames(colnames(data))
12 |   if(any(colnames(data) == "author")){
13 |     data$author <- clean_authors(data$author)
14 |   }
15 |   data <- remove_factors(data)
16 |   return(data)
17 | }
18 | 
19 | 
20 | # Standardize author delimiters
21 | #' @rdname clean_
22 | #' @export
23 | clean_authors <- function(x){
24 |   if(any(grepl("\\sand\\s|\\sAND\\s|\\s&\\s", x))){
25 |     x <- gsub("\\sAND\\s|\\s&\\s", " and ", x)
26 |   }else{
27 |     x <- gsub(",(?=\\s[[:alpha:]]{2,})", " and ", x, perl = TRUE)
28 |   }
29 |   x <- gsub("\\s{2, }", " ", x)
30 |   return(x)
31 | }
32 | 
33 | 
34 | # Clean common issues with column names
35 | #' @rdname clean_
36 | #' @export
37 | clean_colnames <- function(
38 |   x # colnames
39 | ){
40 |   if(inherits(x, "data.frame")){
41 |     x <- colnames(x)
42 |   }
43 |   x <- sub("^(X|Y|Z)\\.+", "", x) # remove leading X
44 |   x <- sub("^[[:punct:]]*", "", x) # leading punctuation
45 |   x <- sub("[[:punct:]]*$", "", x) # trailing punctuation
46 |   x <- gsub("\\.+", "_", x) # replace 1 or more dots with underscore
47 |   non_codes <- nchar(x) > 2 # for colnames with nchar > 2, convert to lower case
48 |   x[non_codes] <- tolower(x[non_codes])
49 |   x <- sub("authors", "author", x) # remove plural authors
50 |   x <- make.unique(x, sep = "_")
51 |   x <- gsub(" ", "_", x)
52 |   return(x)
53 | }
54 | 
55 | #' Remove factors from an object
56 | #'
57 | #' Internal functions called by `clean_df()`:
58 | #' @description This function converts factors to characters to avoid errors with
59 | #' levels.
60 | #' @param z A data.frame
61 | #' @return Returns the input data.frame with all factors converted to character.
62 | #' @noRd
63 | #' @keywords Internal
64 | remove_factors <- function(z){
65 |   z[] <- lapply(z, function(x){
66 |     if(is.factor(x)){as.character(x)}else{x}
67 |   })
68 |   return(z)
69 | }
70 | 


--------------------------------------------------------------------------------
/R/code_lookup.R:
--------------------------------------------------------------------------------
 1 | #' Bibliographic code lookup for search results assembly
 2 | #'
 3 | #' A data frame that can be used to look up common codes for different
 4 | #' bibliographic fields across databases and merge them to a common format.
 5 | #'
 6 | #' @format A `data.frame` with 226 obs of 12 variables
 7 | #'
 8 | #' \describe{
 9 | #'  \item{code}{code used in search results}
10 | #'  \item{order}{the order in which to rank fields in assembled results}
11 | #'  \item{category_description}{type of bibliographic data}
12 | #'  \item{entry_description}{description of field}
13 | #'  \item{field}{bibliographic field that codes correspond to}
14 | #'  \item{ris_generic}{logical: If the code is used in generic ris files}
15 | #'  \item{ris_wos}{logical: If the code is used in Web of Science ris files}
16 | #'  \item{ris_pubmed}{logical: If the code is used in PubMed ris files}
17 | #'  \item{ris_scopus}{logical: If the code is used in Scopus ris files}
18 | #'  \item{ris_asp}{logical: If the code is used in Academic Search Premier ris files}
19 | #'  \item{ris_ovid}{logical: If the code is used in Ovid ris files}
20 | #'  \item{ris_synthesisr}{logical: If the code used in synthesisr imports & exports}}
21 | #'
22 | "code_lookup"
23 | 


--------------------------------------------------------------------------------
/R/deprecated.R:
--------------------------------------------------------------------------------
 1 | #' Bind two or more data frames with different columns
 2 | #'
 3 | #' @description Takes two or more `data.frames` with different column names or
 4 | #' different column orders and binds them to a single `data.frame.` This
 5 | #' function is maintained for backwards compatibility, but it is synonymous with
 6 | #' `dplyr::bind_rows()` and will be depracated in future.
 7 | #' @param x Either a data.frame or a list of data.frames.
 8 | #' @param y A data.frame, optional if x is a list.
 9 | #' @return Returns a single data.frame with all the input data frames merged.
10 | #' @example inst/examples/merge_columns.R
11 | #' @importFrom dplyr bind_rows
12 | #' @importFrom rlang abort
13 | #' @export
14 | merge_columns <- function(
15 |   x, # either a data.frame or a list of the same
16 |   y # a data.frame, optional
17 | ){
18 |   if(missing(x)){
19 |     abort("object x is missing with no default")
20 |   }
21 |   if(!(inherits(x, "data.frame") | inherits(x, "list"))){
22 |     abort("object x must be either a data.frame or a list")
23 |   }
24 |   if(inherits(x, "data.frame")){
25 |     if(missing(y)){
26 |       return(x)
27 |       # abort("If x is a data.frame, then y must be supplied")
28 |     }else{
29 |       x <- list(x, y)
30 |     }
31 |   }else{ # i.e. for lists
32 |     if(!all(unlist(lapply(x, function(a){inherits(a, "data.frame")})))){
33 |       abort("x must only contain data.frames")
34 |     }
35 |   }
36 |   bind_rows(x)
37 | }
38 | 


--------------------------------------------------------------------------------
/R/detect_functions.R:
--------------------------------------------------------------------------------
  1 | #' Detect file formatting information
  2 | #'
  3 | #' @description Bibliographic data can be stored in a number of different file
  4 | #' types, meaning that detecting consistent attributes of those files is
  5 | #' necessary if they are to be parsed accurately. These functions attempt to
  6 | #' identify some of those key file attributes. Specifically, `detect_parser()`
  7 | #' determines which [parse_] function to use; `detect_delimiter()`
  8 | #' and `detect_lookup()` identify different attributes of RIS files; and
  9 | #' `detect_year()` attempts to fill gaps in publication years from other
 10 | #' information stored in a `tibble`.
 11 | #' @param x A character vector containing bibliographic data
 12 | #' @param tags A character vector containing RIS tags.
 13 | #' @param df a data.frame containing bibliographic data
 14 | #' @return `detect_parser()` and `detect_delimiter()` return a length-1
 15 | #' character; `detect_year()` returns a character vector listing estimated
 16 | #' publication years; and `detect_lookup()` returns a `data.frame.`
 17 | #' @example inst/examples/detect_.R
 18 | #' @name detect_
 19 | #' @importFrom rlang abort
 20 | #' @export
 21 | detect_parser <- function(x){
 22 | 
 23 |   # calculate proportional of lines containing likely tags
 24 |   proportions <- unlist(lapply(
 25 |     c(
 26 |       ",(\"|[[:alnum:]])",
 27 |       "\t",
 28 |       "\\{|\\}",
 29 |       "(^[[:upper:]]{2,4}\\s*(-|:)\\s)|(^([[:upper:]]{2}|[[:upper:]][[:digit:]])\\s*(-|:){0,2}\\s*)"
 30 |     ),
 31 |     function(a, z){proportion_delimited(z, a)},
 32 |     z = x
 33 |   ))
 34 | 
 35 |   # if any are detection, pick the most likely one
 36 |   if(any(proportions > 0.2)){
 37 |     result <- switch(
 38 |       c("comma", "tab", "bibtex", "ris")[which.max(proportions)],
 39 |       "comma" = "parse_csv",
 40 |       "tab" = "parse_tsv",
 41 |       "bibtex" = "parse_bibtex",
 42 |       "ris" = {
 43 |         if(length(which(grepl("PMID", x))) > 0){
 44 |           "parse_pubmed"
 45 |         }else{
 46 |           "parse_ris"
 47 |         }
 48 |       }
 49 |     )
 50 |   }else{
 51 |     result <- "unknown"
 52 |   }
 53 |   return(result)
 54 | }
 55 | 
 56 | 
 57 | #' @rdname detect_
 58 | #' @export
 59 | detect_delimiter <- function(x){
 60 |   if(any(grepl("^ER", x))){
 61 |     delimiter <- "endrow"
 62 |   }else{
 63 |     # special break: same character repeated >6 times, no other characters
 64 |     char_list <- strsplit(x, "")
 65 |     char_break_test <- unlist(
 66 |       lapply(char_list,
 67 |              function(a){length(unique(a)) == 1 & length(a > 6)}
 68 |       )
 69 |     )
 70 |     if(any(char_break_test)){
 71 |       delimiter <- "character"
 72 |     }else{
 73 |       # use space as a ref break (last choice)
 74 |       space_break_check <- unlist(lapply(
 75 |         char_list,
 76 |         function(a){all(a == "" | a == " ")}
 77 |       ))
 78 |       if(any(space_break_check)){
 79 |         delimiter <- "space"
 80 |       }else{
 81 |         abort("import failed: unknown reference delimiter")
 82 |       }
 83 |     }
 84 |   }
 85 |   return(delimiter)
 86 | }
 87 | 
 88 | 
 89 | #' @rdname detect_
 90 | #' @export
 91 | detect_lookup <- function(
 92 |   tags # a vector of strings representing ris tags
 93 | ){
 94 |   rows <- which(synthesisr::code_lookup$code %in% tags)
 95 |   ris_list <- split(
 96 |     synthesisr::code_lookup[rows, grepl("ris_", colnames(synthesisr::code_lookup))],
 97 |     synthesisr::code_lookup$code[rows]
 98 |   )
 99 |   ris_matrix <- do.call(
100 |     rbind,
101 |     lapply(ris_list, function(a){apply(a, 2, any)})
102 |   )
103 |   ris_sums <- apply(ris_matrix, 2, sum)
104 |   best_match <- which.max(ris_sums[-1])
105 |   best_proportion <- ris_sums[best_match + 1] / nrow(ris_matrix)
106 |   generic_proportion <- ris_sums[1] / nrow(ris_matrix)
107 |   # default to ris_generic if everything else is bad
108 |   if(best_proportion < 0.75 & generic_proportion > best_proportion){
109 |     match_df <- synthesisr::code_lookup[synthesisr::code_lookup$ris_generic, ]
110 |   }else{ # i.e. if the 'best' match performs perfectly
111 |     if(best_proportion > 0.99){ # i.e. a perfect match
112 |       match_df <- synthesisr::code_lookup[
113 |         synthesisr::code_lookup[, names(best_match)],
114 | 
115 |       ]
116 |     }else{ # otherwise use the best choice, then generic to fill gaps
117 |        rows_best <- which(
118 |          synthesisr::code_lookup[, names(best_match)] &
119 |          synthesisr::code_lookup$code %in% names(which(ris_matrix[, names(best_match)]))
120 |        )
121 |        rows_generic <- which(
122 |          synthesisr::code_lookup$ris_generic &
123 |          synthesisr::code_lookup$code %in% names(which(!ris_matrix[, names(best_match)]))
124 |        )
125 |       match_df <- synthesisr::code_lookup[c(rows_best, rows_generic), ]
126 |     }
127 |   }
128 | 
129 |   return(match_df[, c("code", "order", "field")])
130 | }
131 | 
132 | #' @rdname detect_
133 | #' @export
134 | detect_year <- function(df){
135 |   if(!inherits(df, "data.frame")){
136 |     abort(print("detect_year expects an object of class data.frame as input"))
137 |   }
138 |   lc_colnames <- tolower(colnames(df))
139 |   dates <- grepl("date", lc_colnames) & !grepl("access", lc_colnames)
140 |   if(any(dates)){
141 |     if(any(colnames(df) == "year")) {
142 |       result <- df$year
143 |     }else{
144 |       result <- rep(NA, nrow(df))
145 |     }
146 |     na_rows <- is.na(result)
147 |     if(any(na_rows)){
148 |       result[na_rows] <- unlist(lapply(
149 |         split(df[na_rows, dates], seq_along(na_rows)),
150 |         guess_year
151 |       ))
152 |     }
153 |   }else{
154 |     result <- rep(NA, nrow(df))
155 |   }
156 |   return(result)
157 | }
158 | 
159 | #' internal function to calculate the proportion of lines that contain a particular regex
160 | #' called by detect_parser
161 | #' @noRd
162 | #' @keywords Internal
163 | proportion_delimited <- function(x, regex){
164 |   delimiter_count <- unlist(lapply(
165 |     gregexpr(regex, x, perl = TRUE),
166 |     function(a){length(which(a > 0))}
167 |   ))
168 |   full_lines <- nchar(x, type = "bytes") > 0
169 |   proportion <- length(which(delimiter_count > 0)) / length(which(full_lines))
170 |   return(proportion)
171 | }
172 | 
173 | #' internal function for detect_year
174 | #' @noRd
175 | #' @keywords Internal
176 | guess_year <- function(x){
177 |   number_lookup <- regexpr("[[:alnum:]]{4}", as.character(x))
178 |   if(any(number_lookup > 0)){
179 |     x <- x[number_lookup > 0]
180 |     result_vec <- unlist(lapply(seq_along(x), function(a){
181 |       substr(x[a], start = number_lookup[a], stop = number_lookup[a] + 3)
182 |     }))
183 |     # return(max(as.numeric(result)))
184 |     result <- names(sort(xtabs(~result_vec), decreasing = TRUE)[1])
185 |     return(result)
186 |   }else{
187 |     return(NA)
188 |   }
189 | }
190 | 
191 | #' Compute the rolling sum of detections
192 | #'
193 | #' This function is intended to ensure multiple consecutive empty rows are
194 | #' removed. Called by `detect_delimiter()`.
195 | #' @noRd
196 | #' @keywords Internal
197 | rollingsum <- function(a, n = 2L){
198 |   tail(cumsum(a) - cumsum(c(rep(0, n), head(a, -n))), -n + 1)
199 | }
200 | 


--------------------------------------------------------------------------------
/R/format_citation.R:
--------------------------------------------------------------------------------
  1 | #' Format a citation
  2 | #'
  3 | #' @description This function takes an object of class `data.frame`, `list`, or
  4 | #' `bibliography` and returns a formatted citation.
  5 | #' @param data An object of class `data.frame`, `list`, or `bibliography.`
  6 | #' @param details Logical: Should identifying information such as author names &
  7 | #' journal titles be displayed? Defaults to `TRUE`.
  8 | #' @param abstract Logical: Should the abstract be shown (if available)?
  9 | #' Defaults to `FALSE.`
 10 | #' @param add_html Logical: Should the journal title be italicized using html
 11 | #' codes? Defaults to `FALSE`.
 12 | #' @param line_breaks Either logical, stating whether line breaks should be
 13 | #' added, or numeric stating how many characters should separate consecutive
 14 | #' line breaks. Defaults to `FALSE`.
 15 | #' @param ... any other arguments.
 16 | #' @return Returns a string of length equal to `length(data)` that contains
 17 | #' formatted citations.
 18 | #' @importFrom rlang abort
 19 | #' @example inst/examples/format_citation.R
 20 | #' @export
 21 | format_citation <- function(
 22 |   data,
 23 |   details = TRUE,
 24 |   abstract = FALSE,
 25 |   add_html = FALSE,
 26 |   line_breaks = FALSE,
 27 |   ...
 28 | ){
 29 |   if(!inherits(data, c("data.frame", "bibliography", "list"))){
 30 |     abort("format_citation expects input data to be an object of class data.frame, bibliography, or list")
 31 |   }
 32 | 
 33 |   if(!inherits(data, "data.frame")){
 34 |     data <- as.data.frame(data)
 35 |   }
 36 | 
 37 |   colnames(data) <- clean_colnames(colnames(data))
 38 |   if(any(names(data) == "journal")){
 39 |     source <- "journal"
 40 |   }else{
 41 |     source_check <- grepl("source", names(data))
 42 |     if(any(source_check)){
 43 |       source <- names(data)[which(source_check)]
 44 |       if(length(source) > 1){
 45 |         source <- source[which.max(nchar(data[source], type = "bytes"))]
 46 |       }
 47 |     }else{
 48 |       source <- NA
 49 |     }
 50 |   }
 51 | 
 52 |   # this section should be made more flexible to use any available information
 53 |   # if(details){
 54 |   data_list <- split(data, seq_len(nrow(data)))
 55 |   data_out <- unlist(lapply(data_list, function(a){
 56 |     cols_tr <- names(a)
 57 |     text_list <- as.list(rep(NA, 4))
 58 |     names(text_list) <- c("author", "year", "title", "journal")
 59 |     # title
 60 |     if(any(cols_tr == "title")){
 61 |       title_text <- tools::toTitleCase(tolower(a$title))
 62 |       if(grepl("[[:punct:]]$", title_text)){
 63 |         text_list$title <- title_text
 64 |       }else{
 65 |         text_list$title <- paste0(title_text, ".")
 66 |       }
 67 |     }else{
 68 |       text_list$title <- ""
 69 |     }
 70 |     if(details){
 71 |       # year
 72 |       if(any(cols_tr == "year")){
 73 |         text_list$year <- paste0("(", a$year, ")")
 74 |       }else{
 75 |         text_list$year <- NA
 76 |       }
 77 |       # journal
 78 |       if(!is.na(source)){
 79 |         if(!is.na(a[[source]])){
 80 |           journal_text <- tools::toTitleCase(tolower(a[[source]]))
 81 |           if(add_html){
 82 |             text_list$journal <- paste0("<i>", journal_text, "</i>. ")
 83 |           }else{
 84 |             text_list$journal <- paste0(journal_text, ". ")
 85 |           }
 86 |         }else{
 87 |           text_list$journal <- NA
 88 |         }
 89 |       }
 90 |       # authors
 91 |       if(any(cols_tr == "author")){
 92 |         author_vector <- strsplit(a[['author']], " and ")[[1]]
 93 |         if(length(author_vector) == 1){
 94 |           text_list$author <- a[['author']]
 95 |         }else{
 96 |           text_list$author <- paste0(author_vector[1], " et al.")
 97 |         }
 98 |       }else{
 99 |         if(!all(is.na(text_list))){
100 |           text_list$author <- "Anon."
101 |         }
102 |       }
103 |     } # end if(details)
104 |     text_vec <- unlist(text_list)
105 |     if(all(is.na(text_vec))){
106 |       return(a[1])
107 |     }else{
108 |       return(
109 |         paste(text_vec[!is.na(text_vec)], collapse = " ")
110 |       )
111 |     }
112 |   }))
113 | 
114 |   # add line breaks if required
115 |   if(is.logical(line_breaks)){
116 |     if(line_breaks){
117 |       data_out <- add_line_breaks(data_out)
118 |     }
119 |   }else{
120 |     if(is.numeric(line_breaks)){
121 |       data_out <- add_line_breaks(data_out, line_breaks)
122 |     }
123 |   }
124 |   data_out <- unlist(lapply(data_out, trimws))
125 |   return(data_out)
126 | }
127 | 


--------------------------------------------------------------------------------
/R/fuzz_functions.R:
--------------------------------------------------------------------------------
  1 | # Functions from the 'fuzzywuzzy' Python library
  2 | # github.com/seatgeek/fuzzywuzzy
  3 | # these functions coded by Martin Westgate on 4th June 2018 based on description given here:
  4 | # chairnerd.seatgeek.com/fuzzywuzzy-fuzzy-string-matching-in-python/
  5 | 
  6 | #' Calculate similarity between two strings
  7 | #' @description These functions duplicate the approach of the 'fuzzywuzzy'
  8 | #' Python library for calculating string similarity.
  9 | #' @param a A character vector of items to match to b.
 10 | #' @param b A character vector of items to match to a.
 11 | #' @param method The method to use for fuzzy matching.
 12 | #' @note `fuzz_m_ratio()` is a measure of the number of letters that match
 13 | #' between two strings. It is calculated as one minus two times the number of
 14 | #' matched characters, divided by the number of characters in both strings.
 15 | #' @note `fuzz_partial_ratio()` calculates the extent to which one string is a
 16 | #' subset of the other. If one string is a perfect subset, then this will be
 17 | #' zero.
 18 | #' @note `fuzz_token_sort_ratio()` sorts the words in both strings into
 19 | #' alphabetical order, and checks their similarity using `fuzz_m_ratio()`.
 20 | #' @note `fuzz_token_set_ratio()` is similar to `fuzz_token_sort_ratio()`, but
 21 | #' compares both sorted strings to each other, and to a third group made of
 22 | #' words common to both strings. It then returns the maximum value of
 23 | #' `fuzz_m_ratio()` from these comparisons.
 24 | #' @note `fuzzdist()` is a wrapper function, for compatability with `stringdist`.
 25 | #' @return Returns a score of same length as b, giving the proportional
 26 | #' dissimilarity between a and b.
 27 | #' @example inst/examples/fuzzdist.R
 28 | #' @name fuzz_
 29 | #' @export
 30 | fuzzdist <- function(a, b, method = c(
 31 |   "fuzz_m_ratio", "fuzz_partial_ratio", "fuzz_token_sort_ratio", "fuzz_token_set_ratio")
 32 | ){
 33 |   method <- match.arg(method)
 34 |   do.call(
 35 |     method,
 36 |     list(a, b)
 37 |   )
 38 | }
 39 | 
 40 | #' @rdname fuzz_
 41 | #' @export
 42 | fuzz_m_ratio <- function(a, b){
 43 |   out <- lapply(b, function(b, a){
 44 |     z <- c(a, b)
 45 |     if(any(is.na(z))){
 46 |       return(NA)
 47 |     }else{
 48 |       z_list <- lapply(strsplit(z, ""),
 49 |         function(x, minval){x[1:minval]},
 50 |         minval = min(nchar(z))
 51 |         )
 52 |       z_match <- apply(
 53 |         do.call(cbind, z_list),
 54 |         1,
 55 |         function(x){x[1] == x[2]}
 56 |       )
 57 |       return(
 58 |         1 - (2 * length(which(z_match)) / sum(nchar(z)))
 59 |       )
 60 |     }
 61 |   },
 62 |   a = a)
 63 |   return(as.numeric(out))
 64 | }
 65 | 
 66 | 
 67 | #' @rdname fuzz_
 68 | #' @export
 69 | fuzz_partial_ratio <- function(a, b){
 70 |   out <- lapply(b, function(b, a){
 71 |     z <- c(a, b)
 72 |     if(any(is.na(z))){
 73 |       return(NA)
 74 |     }else{
 75 |       zn <- nchar(z)
 76 |       n_reps <- (max(zn) - min(zn))
 77 |       z_list <- lapply(
 78 |         c(0: n_reps),
 79 |         function(x, lookup, keep){lookup[(keep + x)]},
 80 |         lookup = strsplit(z[which.max(zn)], "")[[1]],
 81 |         keep = seq_len(min(zn))
 82 |       )
 83 |       z_ratio <- lapply(z_list, function(x, comparison){
 84 |         match_value <- apply(
 85 |           cbind(x, comparison),
 86 |           1,
 87 |           function(y){y[1] == y[2]}
 88 |         )
 89 |         length(which(match_value))/length(x)
 90 |       },
 91 |       comparison = strsplit(z[which.min(zn)], "")[[1]]
 92 |       )
 93 |       return(1 - max(as.numeric(z_ratio)))
 94 |     }
 95 |   },
 96 |   a = a)
 97 |   return(as.numeric(out))
 98 | }
 99 | 
100 | 
101 | #' @rdname fuzz_
102 | #' @export
103 | fuzz_token_sort_ratio <- function(a, b){
104 |   out <- lapply(b, function(b, a){
105 |     z <- c(a, b)
106 |     if(any(is.na(z))){
107 |       return(NA)
108 |     }else{
109 |       z_split <- strsplit(z, " ")
110 |       z_split <- lapply(z_split, make.unique, sep="_XDUP_")
111 |       in_check <- z_split[[1]] %in% z_split[[2]]
112 |       intersection <- sort(z_split[[1]][which(in_check)])
113 |       string_list <- list(
114 |         t0 = intersection,
115 |         t1 = c(intersection,
116 |                sort(z_split[[1]][which(!in_check)])
117 |         ),
118 |         t2 = c(intersection,
119 |                unlist(lapply(z_split[[2]][which(!(z_split[[2]] %in% intersection))], function(x){strsplit(x, "_XDUP_")[[1]][1]}))
120 |         )
121 |       )
122 |       string_list <- lapply(string_list, function(x){
123 |         if(length(x) < 1){
124 |           return("")
125 |         }else{
126 |           return(paste(x, collapse = " "))
127 |         }
128 |       })
129 |       result <- c(
130 |         fuzz_m_ratio(string_list$t0, string_list$t1),
131 |         fuzz_m_ratio(string_list$t0, string_list$t2),
132 |         fuzz_m_ratio(string_list$t1, string_list$t2)
133 |       )
134 |       return(max(result))
135 |     }
136 |   },
137 |   a = a)
138 |   return(as.numeric(out))
139 |   return(as.numeric(out))
140 | }
141 | 
142 | 
143 | #' @rdname fuzz_
144 | #' @export
145 | fuzz_token_set_ratio <- function(a, b){
146 |   out <- lapply(b, function(b, a){
147 |     z <- c(a, b)
148 |     if(any(is.na(z))){
149 |       return(NA)
150 |     }else{
151 |       z_split <- strsplit(z, " ")
152 |       in_check <- z_split[[1]] %in% z_split[[2]]
153 |       intersection <- sort(z_split[[1]][which(in_check)])
154 |       string_list <- list(
155 |         t0 = intersection,
156 |         t1 = c(intersection,
157 |           sort(z_split[[1]][which(!in_check)])
158 |         ),
159 |         t2 = c(intersection,
160 |           sort(z_split[[2]][which(!(z_split[[2]] %in% intersection))])
161 |         )
162 |       )
163 |       string_list <- lapply(string_list, function(x){
164 |         if(length(x) < 1){
165 |           return("")
166 |         }else{
167 |           return(paste(x, collapse = " "))
168 |         }
169 |       })
170 |       result <- c(
171 |         fuzz_m_ratio(string_list$t0, string_list$t1),
172 |         fuzz_m_ratio(string_list$t0, string_list$t2),
173 |         fuzz_m_ratio(string_list$t1, string_list$t2)
174 |         )
175 |       return(max(result))
176 |     }
177 |   },
178 |   a = a)
179 |   return(as.numeric(out))
180 | }
181 | 


--------------------------------------------------------------------------------
/R/parse_bibtex.R:
--------------------------------------------------------------------------------
 1 | #' @rdname parse_
 2 | #' @importFrom dplyr bind_rows
 3 | #' @importFrom tibble tibble
 4 | #' @importFrom unglue unglue_data
 5 | #' @export
 6 | parse_bibtex <- function(x){
 7 |   # use `unglue` to parse text
 8 |   raw_df <- unglue_data(x,
 9 |                         patterns = c("[variable]={[value]},",
10 |                                      "@[variable]{[value],"),
11 |                         open = "[",
12 |                         close = "]")
13 | 
14 |   # remove missing values
15 |   raw_df <- raw_df[!(is.na(raw_df$variable) | is.na(raw_df$value)), ]
16 | 
17 |   # create a vector assigning rows to articles
18 |   article_vec <- as.integer(raw_df$variable == "ARTICLE")
19 |   article_vec[is.na(article_vec)] <- 0
20 |   raw_df$article <- cumsum(article_vec)
21 | 
22 |   # split by article and transpose
23 |   result <- lapply(
24 |     split(raw_df[, 1:2], raw_df$article),
25 |     function(a){
26 |       result <- as.data.frame(t(a$value))
27 |       colnames(result) <- a$variable
28 |       return(result)
29 |     }) |>
30 |     bind_rows() |>
31 |     tibble()
32 | 
33 |   # split authors
34 |   if(any(names(result) == "author")){
35 |     if(any(grepl("and", result$author))){
36 |       result$author <- strsplit(result$author, "\\s*and\\s*")
37 |     }
38 |   }
39 | 
40 |   # join duplicated columns
41 |   # note: needs to be done simultaneously with calling `tibble()`
42 | 
43 |   return(result)
44 | }
45 | 


--------------------------------------------------------------------------------
/R/parse_csv_tsv.R:
--------------------------------------------------------------------------------
 1 | #' @rdname parse_
 2 | #' @export
 3 | parse_csv <- function(x){
 4 |   read.table(
 5 |     text = x,
 6 |     header = TRUE,
 7 |     sep = ",",
 8 |     quote = "\"",
 9 |     dec = ".",
10 |     fill = TRUE,
11 |     stringsAsFactors = FALSE,
12 |     row.names = NULL) |>
13 |   match_columns() |>
14 |   tibble()
15 | }
16 | 
17 | #' @rdname parse_
18 | #' @export
19 | parse_tsv <- function(x){
20 |   read.table(
21 |     text = x,
22 |     header = TRUE,
23 |     sep = "\t",
24 |     quote = "\"",
25 |     dec = ".",
26 |     fill = TRUE,
27 |     stringsAsFactors = FALSE,
28 |     row.names = NULL) |>
29 |   match_columns() |>
30 |   tibble()
31 | }
32 | 
33 | #' Internal function used by parse_csv and parse_tsv:
34 | #' Matches imported data to reference codes
35 | #'
36 | #' @description Takes an imported data.frame and rearranges it to match lookup
37 | #' codes.
38 | #' @param df A data.frame that contains bibliographic information.
39 | #' @return Returns a data.frame rearranged and coded to match standard
40 | #' bibliographic fields, with unrecognized fields appended.
41 | #' @noRd
42 | #' @keywords Internal
43 | #' @example inst/examples/match_columns.R
44 | match_columns <- function(df){
45 |   # figure out which columns match known tags
46 |   hits <- as.numeric(match(synthesisr::code_lookup$code, colnames(df)))
47 |   newcolnames <- synthesisr::code_lookup$field[
48 |     match(colnames(df),
49 |           synthesisr::code_lookup$code)
50 |   ]
51 |   colnames(df)[!is.na(newcolnames)] <- newcolnames[!is.na(newcolnames)]
52 | 
53 |   # rearrange data in standard(ish) order
54 |   if(any(is.na(hits))){
55 |     hits <- hits[!is.na(hits)]
56 |   }
57 | 
58 |   # retain columns even if they did not match lookup
59 |   retain <- append(hits, seq(1, length(df), 1)[!(seq(1, length(df), 1) %in% hits)])
60 | 
61 |   return(df[,retain])
62 | }
63 | 


--------------------------------------------------------------------------------
/R/parse_pubmed.R:
--------------------------------------------------------------------------------
 1 | #' Parse bibliographic text in a variety of formats
 2 | #'
 3 | #' @description Text in standard formats - such as imported via
 4 | #' `base::readLines()` - can be parsed using a variety of standard formats. Use
 5 | #' `detect_parser()` to determine which is the most appropriate parser for your
 6 | #' situation. Note that `parse_tsv()` and `parse_csv()` are maintained for
 7 | #' backwards compatability only; within `read_ref` these have been replaced
 8 | #' by `vroom::vroom()`.
 9 | #' @param x A character vector containing bibliographic information in ris
10 | #' format.
11 | #' @return Returns an object of class `bibliography` (ris, bib, or pubmed
12 | #' formats) or `data.frame` (csv or tsv).
13 | #' @example inst/examples/parse_.R
14 | #' @name parse_
15 | #' @export
16 | parse_pubmed <- function(x){
17 | 
18 |   x <- prep_ris(x, detect_delimiter(x), type = "pubmed")
19 | 
20 |   x_merge <- merge(x,
21 |                    synthesisr::code_lookup[
22 |                      synthesisr::code_lookup$ris_pubmed,
23 |                      c("code", "order", "field")
24 |                    ],
25 |                    by.x = "ris",
26 |                    by.y = "code",
27 |                    all.x = TRUE,
28 |                    all.y = FALSE
29 |   )
30 |   x_merge <- x_merge[order(x_merge$row_order), ]
31 | 
32 |   # find a way to store missing .bib data rather than discard
33 |   if(any(is.na(x_merge$field))){
34 |     rows_tr <- which(is.na(x_merge$field))
35 |     x_merge$field[rows_tr] <- x_merge$ris[rows_tr]
36 | 
37 |     # ensure all headings have an order
38 |     if(all(is.na(x_merge$order))){
39 |       start_val <- 0
40 |     }else{
41 |       start_val <- max(x_merge$order, na.rm = TRUE)
42 |     }
43 |     x_merge$order[rows_tr] <- as.numeric(as.factor(x_merge$ris[rows_tr])) + start_val
44 |   }
45 | 
46 |   # convert into a list, where each reference is a separate entry
47 |   x_split <- split(x_merge[c("field", "text", "order")], x_merge$ref)
48 |   x_final <- lapply(x_split, function(a){
49 |     result <- split(a$text, a$field)
50 |     if(any(names(result) == "abstract")){
51 |       result$abstract <- paste(result$abstract, collapse = " ")
52 |     }
53 |     if(any(names(result) == "address")){
54 |       result$address <- strsplit(
55 |         paste(result$address, collapse = " "),
56 |         "\\.\\s"
57 |       )[[1]]
58 |     }
59 |     if(any(names(result) == "title")){
60 |       if(length(result$title) > 1){
61 |         result$title <- paste(result$title, collapse = " ")
62 |       }
63 |     }
64 |     if(any(names(result) == "term_other")){
65 |       names(result)[which(names(result) == "term_other")] <- "keywords"
66 |     }
67 |     if(any(names(result) == "date_published")){
68 |       result$year <- substr(result$date_published, start = 1, stop = 4)
69 |     }
70 |     if(any(names(result) == "article_id")){
71 |       doi_check <- grepl("doi", result$article_id)
72 |       if(any(doi_check)){
73 |         result$doi <- strsplit(result$article_id[which(doi_check)], " ")[[1]][1]
74 |       }
75 |     }
76 | 
77 |     # ensure result is returned in the correct order
78 |     result_order <- order(
79 |       unlist(lapply(split(a$order, a$field), function(b){b[1]}))
80 |     )
81 |     return(result[result_order])
82 |   })
83 | 
84 |   names(x_final) <- unlist(lapply(x_final, function(a){a$pubmed_id}))
85 |   class(x_final) <- "bibliography"
86 |   return(x_final)
87 | }
88 | 


--------------------------------------------------------------------------------
/R/parse_ris.R:
--------------------------------------------------------------------------------
  1 | #' @rdname parse_
  2 | #' @param tag_naming What format are ris tags in? Defaults to `"best_guess"` See
  3 | #' `read_refs()` for a list of accepted arguments.
  4 | #' @export
  5 | parse_ris <- function(x, tag_naming = "best_guess"){
  6 | 
  7 |   # clean up input file
  8 |   x <- prep_ris(x,
  9 |                 detect_delimiter(x),
 10 |                 type = "generic")
 11 | 
 12 |   # merge data with lookup info, to provide bib-style tags
 13 |   tag_lookup_thisfile <- get_tag_lookup(x, tag_naming)
 14 |   x_merge <- merge(x,
 15 |                    tag_lookup_thisfile,
 16 |                    by.x = "ris",
 17 |                    by.y = "code",
 18 |                    all.x = TRUE,
 19 |                    all.y = FALSE
 20 |   )
 21 |   x_merge <- x_merge[order(x_merge$row_order), ]
 22 | 
 23 |   # find a way to store missing .bib data rather than discard
 24 |   if(any(is.na(x_merge$field))){
 25 |     rows_tr <- which(is.na(x_merge$field))
 26 |     x_merge$field[rows_tr] <- x_merge$ris[rows_tr]
 27 | 
 28 |     # ensure all headings have an order
 29 |     if(all(is.na(x_merge$order))){
 30 |       start_val <- 0
 31 |     }else{
 32 |       start_val <- max(x_merge$order, na.rm = TRUE)
 33 |     }
 34 |     x_merge$order[rows_tr] <- as.numeric(as.factor(x_merge$ris[rows_tr])) + start_val
 35 |   }
 36 | 
 37 |   # tidy up specific columns
 38 |   x_merge <- x_merge |>
 39 |     clean_ris_years() |>
 40 |     clean_ris_authors()
 41 | 
 42 |   # convert into a list, where each reference is a separate entry
 43 |   x_split <- split(x_merge[c("field", "ris", "text", "order")], x_merge$ref)
 44 | 
 45 |   # there is an issue with date accessed creating non-existing records
 46 |   # removing datasets with 1 row fixes this
 47 |   if(any(unlist(lapply(x_split, nrow))==1)){
 48 |     x_split <- x_split[  -which(unlist(lapply(x_split, nrow))==1)]
 49 |   }
 50 | 
 51 |   # convert to list format
 52 |   x_final <- lapply(x_split, function(a){
 53 |     result <- split(a$text, a$field) |>
 54 |       parse_ris_year() |>
 55 |       parse_ris_title() |>
 56 |       parse_ris_journal() |>
 57 |       parse_ris_abstract() |>
 58 |       parse_ris_page_numbers()
 59 |     # ensure result is returned in the correct order
 60 |     result_order <- order(
 61 |       unlist(lapply(split(a$order, a$field), function(b){b[1]}))
 62 |     )
 63 |     return(result[result_order])
 64 |   })
 65 |   class(x_final) <- "bibliography"
 66 |   return(x_final)
 67 | }
 68 | 
 69 | #' Internal function to clean year data (above)
 70 | #' @noRd
 71 | #' @keywords Internal
 72 | clean_ris_years <- function(x){
 73 |   # method to systematically search for year data
 74 |   year_check <- regexpr("^\\d{4}$", x$text)
 75 |   if(any(year_check > 0)){
 76 |     check_rows <- which(year_check > 0)
 77 |     year_strings <- as.numeric(x$text[check_rows])
 78 | 
 79 |     # for entries with a bib entry labelled year, check that there aren't multiple years
 80 |     if(any(x$field[check_rows] == "year", na.rm = TRUE)){
 81 |       # check for repeated year information
 82 |       year_freq <- xtabs(~ ref, data = x[which(x$field == "year"), ])
 83 |       if(any(year_freq > 1)){
 84 |         year_df <- x[which(x$field == "year"), ]
 85 |         year_list <- split(nchar(year_df$text), year_df$ris)
 86 |         year_4 <- sqrt((4 - unlist(lapply(year_list, mean))) ^ 2)
 87 |         # rename bib entries that have >4 characters to 'year_additional'
 88 |         incorrect_rows <- which(
 89 |           x$ris != names(which.min(year_4)[1]) &
 90 |             x$field == "year"
 91 |         )
 92 |         x$field[incorrect_rows] <- "year_additional"
 93 |       }
 94 |     }else{
 95 |       possible_rows <- which(
 96 |         year_strings > 0 &
 97 |           year_strings <= as.numeric(format(Sys.Date(), "%Y")) + 1
 98 |       )
 99 |       tag_frequencies <- as.data.frame(
100 |         xtabs(~ x$ris[check_rows[possible_rows]]),
101 |         stringsAsFactors = FALSE
102 |       )
103 |       colnames(tag_frequencies) <- c("tag", "n")
104 |       # now work out what proportion of each tag contain year data
105 |       # compare against number of references to determine likelihood of being 'the' year tag
106 |       tag_frequencies$prop <- tag_frequencies$n/(max(x$ref)+1) # number of references
107 |       if(any(tag_frequencies$prop > 0.9)){
108 |         year_tag <- tag_frequencies$tag[which.max(tag_frequencies$prop)]
109 |         rows.tr <- which(x$ris == year_tag)
110 |         x$field[rows.tr] <- "year"
111 |         x$row_order[rows.tr] <- 3
112 |       }
113 |     }
114 |   }
115 |   x
116 | }
117 | 
118 | #' Internal function to clean author data (above)
119 | #' @noRd
120 | #' @keywords Internal
121 | clean_ris_authors <- function(x){
122 |   # ensure author data from a single ris tag
123 |   if(any(x$field == "author")){
124 |     lookup.tags <- xtabs( ~ x$ris[which(x$field == "author")])
125 |     if(length(lookup.tags) > 1){
126 |       replace_tags <- names(which(lookup.tags < max(lookup.tags)))
127 |       replace_rows <- which(x$ris %in% replace_tags)
128 |       x$field[replace_rows] <- x$ris[replace_rows]
129 |       if(all(is.na(x$row_order))){
130 |         start_val <- 0
131 |       }else{
132 |         start_val <- max(x$row_order, na.rm = TRUE)
133 |       }
134 |       x$row_order[replace_rows] <- start_val + as.numeric(
135 |         as.factor(x$ris[replace_rows])
136 |       )
137 |     }
138 |   }
139 |   x
140 | }
141 | 
142 | #' Internal function to build a tag lookup table
143 | #' @noRd
144 | #' @keywords Internal
145 | get_tag_lookup <- function(x, tag_naming){
146 |   # create the appropriate lookup file for the specified tag
147 |   if(inherits(tag_naming, "data.frame")){
148 |     if(!any(colnames(tag_naming) == "order")){
149 |       tag_naming$order <- seq_len(nrow(tag_naming))
150 |     }
151 |     code_lookup_thisfile <- tag_naming
152 |   }else{
153 |     if(tag_naming == "none"){
154 |       ris_vals <- unique(x$ris)
155 |       code_lookup_thisfile <- data.frame(
156 |         code = ris_vals,
157 |         field = ris_vals,
158 |         order = seq_along(ris_vals),
159 |         stringsAsFactors = FALSE
160 |       )
161 |     }else if(tag_naming == "best_guess"){
162 |       code_lookup_thisfile <- detect_lookup(tags = unique(x$ris))
163 |     }else if(any(c("wos", "scopus", "ovid", "asp", "synthesisr") == tag_naming)){
164 |       rows <- which(synthesisr::code_lookup[, paste0("ris_", tag_naming)])
165 |       code_lookup_thisfile <- synthesisr::code_lookup[
166 |         rows,
167 |         c("code", "order", "field")
168 |       ]
169 |     }
170 |   }
171 |   code_lookup_thisfile
172 | }
173 | 
174 | #' Internal function to handle abstracts
175 | #' @noRd
176 | #' @keywords Internal
177 | parse_ris_abstract <- function(result){
178 |   if(length(result$abstract > 1)){
179 |     result$abstract <- paste(result$abstract, collapse = " ")
180 |     result$abstract <- gsub("\\s+", " ", result$abstract) # remove multiple spaces
181 |   }
182 |   result
183 | }
184 | 
185 | #' Internal function to handle years
186 | #' @noRd
187 | #' @keywords Internal
188 | parse_ris_year <- function(result){
189 |   if(any(names(result) == "year")){
190 |     if(any(nchar(result$year) >= 4)){
191 |       year_check <- regexpr("\\d{4}", result$year)
192 |       if(any(year_check > 0)){
193 |         result$year <- substr(
194 |           x = result$year[which(year_check>0)],
195 |           start = year_check[1],
196 |           stop = year_check[1]+3
197 |         )
198 |       }else{
199 |         result$year <- ""
200 |       }
201 |     }else{
202 |       result$year <- ""
203 |     }
204 |   }
205 |   result
206 | }
207 | 
208 | #' Internal function to handle titles
209 | #' @noRd
210 | #' @keywords Internal
211 | parse_ris_title <- function(result){
212 |   if(any(names(result) == "title")){
213 |     if(length(result$title) > 1){
214 |       if(result$title[1] == result$title[2]){
215 |         result$title <- result$title[1]
216 |       }else{
217 |         result$title <- paste(result$title, collapse = " ")
218 |       }
219 |     }
220 |     result$title <- gsub("\\s+", " ", result$title) # remove multiple spaces
221 |     result$title <- sub("\\.$", "", result$title) # remove final full stops
222 |   }
223 |   result
224 | }
225 | 
226 | #' Internal function to handle journals
227 | #' @noRd
228 | #' @keywords Internal
229 | parse_ris_journal <- function(result){
230 |   if(any(names(result) == "journal")){
231 |     unique_journals <- unique(result$journal)
232 |     if(length(unique_journals) > 1){
233 |       unique_journals <- unique_journals[order(
234 |         nchar(unique_journals),
235 |         decreasing = FALSE
236 |       )]
237 |       result$journal <- unique_journals[1]
238 |       result$journal_secondary <- paste(
239 |         unique_journals[c(2:length(unique_journals))],
240 |         collapse = "; "
241 |       )
242 |     }else{
243 |       result$journal <- unique_journals
244 |     }
245 |     result$journal <-gsub("  ", " ", result$journal)
246 |     result$journal <-sub("\\.$", "", result$journal)
247 |   }
248 |   result
249 | }
250 | 
251 | #' Internal function to handle page numbers
252 | #' @noRd
253 | #' @keywords Internal
254 | parse_ris_page_numbers <- function(result){
255 |   if(any(names(result) == "pages")){
256 |     if(length(result$pages) > 1){
257 |       result$pages <- paste(sort(result$pages), collapse = "-")
258 |     }
259 |   }
260 |   result
261 | }
262 | 


--------------------------------------------------------------------------------
/R/prep_ris.R:
--------------------------------------------------------------------------------
  1 | #' Internal function to clean a .ris file for import
  2 | #'
  3 | #' This function preps RIS files by cleaning common issues and converting to a
  4 | #' common format.
  5 | #' @param z A character vector that contains RIS bibliographic information.
  6 | #' @param delimiter A string indicating the type of delimiter separating entries.
  7 | #' @param type A string indicating the ris source; options are pubmed or generic.
  8 | #' @return Returns a `data.frame` intended for import with `parse_ris()`.
  9 | #' @noRd
 10 | #' @keywords Internal
 11 | prep_ris <- function(
 12 |   z,
 13 |   delimiter,
 14 |   type # either "pubmed" or "generic". Not specified by user
 15 | ){
 16 |   # detect tags
 17 |   if(type == "pubmed"){
 18 |     ris_regex <- "^[[:upper:]]{2,4}\\s*-\\s"
 19 |   }else{ # i.e. generic
 20 |     ris_regex <- "(^([[:upper:]]{2}|[[:upper:]][[:digit:]])\\s+)|^ER$"
 21 |     # NOTE: "^ER$" is a bug fix for .ciw end rows
 22 |   }
 23 |   tags <- regexpr(ris_regex, perl = TRUE, z)
 24 |   z_dframe <- data.frame(
 25 |     text = z,
 26 |     row = seq_along(z),
 27 |     match_length = attr(tags, "match.length"),
 28 |     stringsAsFactors = FALSE
 29 |   )
 30 |   z_list <- split(z_dframe, z_dframe$match_length)
 31 |   z_list <- lapply(z_list, function(a){
 32 |     n <- a$match_length[1]
 33 |     if(n < 0){
 34 |       result <- data.frame(
 35 |         ris = "",
 36 |         text = a$text,
 37 |         row_order = a$row,
 38 |         stringsAsFactors = FALSE
 39 |       )
 40 |     }else{
 41 |       result <- data.frame(
 42 |         ris = sub("\\s{0,}-\\s{0,}|^\\s+|\\s+$", "", substr(a$text, 1, n)),
 43 |         text = gsub("^\\s+|\\s+$", "", substr(a$text, n+1, nchar(a$text))),
 44 |         row_order = a$row,
 45 |         stringsAsFactors = FALSE
 46 |       )
 47 |     }
 48 |     return(result)
 49 |   })
 50 |   z_dframe <- do.call(rbind, z_list)
 51 |   z_dframe <- z_dframe[order(z_dframe$row), ]
 52 | 
 53 |   # clean up obvious errors
 54 |   z_dframe$ris <- gsub("[[:punct:]]", "", z_dframe$ris)
 55 |   z_dframe$text <- sub("^[[:punct:]]{0,1}\\s*", "", z_dframe$text)
 56 | 
 57 |   # replace tag information for delimiter == character | space
 58 |   if(delimiter == "character"){ # i.e. a single character repeated many times
 59 |     z_dframe$ris[which(
 60 |       unlist(lapply(
 61 |         strsplit(z, ""),
 62 |         function(a){
 63 |           length(unique(a)) == 1 & length(a > 6)
 64 |         }
 65 |       ))
 66 |     )] <- "ER"
 67 |   }
 68 |   if(delimiter == "space"){
 69 |     z_dframe$ris[which(z_dframe$ris == "" & z_dframe$text == "")] <- "ER"
 70 | 
 71 |     z_rollsum <- rollingsum(z_dframe$ris == "ER")
 72 |     if(any(z_rollsum > 1)){
 73 |       z_dframe <- z_dframe[which(z_rollsum <= 1), ]
 74 |     }
 75 |   }
 76 |   if(delimiter == "endrow"){
 77 |     # work out what most common starting tag is
 78 |     z_dframe$ref <- c(0, cumsum(z_dframe$ris == "ER")[
 79 |       seq_len(nrow(z_dframe)-1)]
 80 |     ) # split by reference
 81 | 
 82 |     start_tags <- unlist(lapply(
 83 |       split(z_dframe$ris, z_dframe$ref),
 84 |       function(a){a[which(a != "")[1]]}
 85 |     ))
 86 | 
 87 |     # fix bug where not all entries start with same tag
 88 |     start_tag_xtab <- xtabs(~ start_tags )
 89 |     end_rows <- which(z_dframe$ris == "ER")
 90 |     # previous behavior:
 91 |     if(max(xtabs(~ start_tags)) == length(which(z_dframe$ris == "ER"))){
 92 |       start_tag <- names(which.max(xtabs(~ start_tags)))
 93 |       row_df <- data.frame(
 94 |         start = which(z_dframe$ris == start_tag),
 95 |         end = end_rows
 96 |       )
 97 |     # new option:
 98 |     }else{
 99 |       row_df <- data.frame(
100 |         start = c(1, end_rows[seq_len(length(end_rows) - 1)]),
101 |         end = end_rows
102 |       )
103 |     }
104 | 
105 |     z_list <- apply(
106 |       row_df,
107 |       1,
108 |       function(a){c(a[1]:a[2])}
109 |     )
110 |     z_list <- lapply(
111 |       z_list,
112 |       function(a, lookup){lookup[a, ]},
113 |       lookup = z_dframe
114 |     )
115 |     z_dframe <- as.data.frame(
116 |       do.call(rbind, z_list)
117 |     )
118 |   }
119 | 
120 |   # cleaning
121 |   z_dframe$ref <- c(0, cumsum(z_dframe$ris == "ER")[
122 |     seq_len(nrow(z_dframe)-1)]
123 |   ) # split by reference
124 |   z_dframe <- z_dframe[which(z_dframe$text != ""), ] # remove empty rows
125 |   z_dframe <- z_dframe[which(z_dframe$ris != "ER"), ] # remove end rows
126 |   z_dframe$text <- trimws(z_dframe$text)
127 | 
128 |   # fill missing tags
129 |   z_split <- split(z_dframe, z_dframe$ref)
130 |   z_split <- lapply(z_split, function(a){
131 |     if(a$ris[1] == ""){
132 |       a$ris[1] <- "ZZ"
133 |     }
134 |     accum_ris <- Reduce(c, a$ris, accumulate = TRUE)
135 |     a$ris <- unlist(lapply(
136 |       accum_ris,
137 |       function(b){
138 |         good_vals <- which(b != "")
139 |         b[good_vals[length(good_vals)]]
140 |       }))
141 |     return(a)
142 |   })
143 |   z_dframe <- as.data.frame(
144 |     do.call(rbind, z_split)
145 |   )
146 | 
147 |   return(z_dframe)
148 | }
149 | 


--------------------------------------------------------------------------------
/R/read_refs.R:
--------------------------------------------------------------------------------
  1 | #' Import bibliographic search results
  2 | #'
  3 | #' Import common bibliographic reference formats  such as `.bib`, `.ris`, or
  4 | #' `.txt`.
  5 | #' @param filename A path to a filename or vector of filenames containing search
  6 | #' results to import.
  7 | #' @param tag_naming Either a length-1 character stating how should ris tags be
  8 | #' replaced (see details for a list of options), or an object inheriting from
  9 | #' class `data.frame` containing user-defined replacement tags.
 10 | #' @param return_df If `TRUE` (default), returns a `data.frame`; if `FALSE`,
 11 | #' returns a list.
 12 | #' @param verbose If `TRUE`, prints status updates (defaults to `FALSE`).
 13 | #' @details The default for argument `tag_naming` is `"best_guess"`,
 14 | #' which estimates what database has been used for ris tag replacement, then
 15 | #' fills any gaps with generic tags. Any tags missing from the database (i.e.
 16 | #' `code_lookup`) are passed unchanged. Other options are to use tags from
 17 | #' Web of Science (`"wos"`), Scopus (`"scopus"`), Ovid (`"ovid"`)
 18 | #' or Academic Search Premier (`"asp"`). If a `data.frame` is given,
 19 | #' then it must contain two columns: `"code"` listing the original tags in
 20 | #' the source document, and `"field"` listing the replacement column/tag
 21 | #' names. The `data.frame` may optionally include a third column named
 22 | #' `"order"`, which specifies the order of columns in the resulting
 23 | #' `data.frame`; otherwise this will be taken as the row order. Finally,
 24 | #' passing `"none"` to `replace_tags` suppresses tag replacement.
 25 | #' @return Returns a `data.frame` or `list` of assembled search results.
 26 | #' @importFrom dplyr bind_rows
 27 | #' @importFrom rlang abort
 28 | #' @importFrom vroom default_locale
 29 | #' @example inst/examples/read_refs.R
 30 | #' @export
 31 | read_refs <- function(
 32 |   filename,
 33 |   tag_naming = "best_guess",
 34 |   return_df = TRUE,
 35 |   verbose = FALSE,
 36 |   locale = vroom::default_locale()
 37 | ){
 38 | 
 39 |   if(missing(filename)){
 40 |     abort("filename is missing with no default")
 41 |   }
 42 |   file_check <- unlist(lapply(filename, file.exists))
 43 |   if(any(!file_check)){
 44 |     abort("file not found")
 45 |   }
 46 | 
 47 |   if(length(filename) > 1){
 48 |     result_list <- lapply(filename, function(a){
 49 |       read_ref(
 50 |         filename = a,
 51 |         tag_naming = tag_naming,
 52 |         return_df = return_df,
 53 |         verbose = verbose,
 54 |         locale = locale
 55 |       )
 56 |     })
 57 |     names(result_list) <- filename
 58 | 
 59 |     # drop any unrecognized file types
 60 |     null_check <- unlist(lapply(result_list, is.null))
 61 |     if(any(null_check)){
 62 |       result_list <- result_list[-which(null_check)]
 63 |     }
 64 | 
 65 |     if(return_df){
 66 |       result <- bind_rows(result_list)
 67 |       result$filename <- unlist(
 68 |         lapply(seq_len(length(result_list)),
 69 |         function(a, data){
 70 |           rep(names(data)[a], nrow(data[[a]]))
 71 |         },
 72 |         data = result_list
 73 |       ))
 74 |       return(result)
 75 |     }else{
 76 |       result <- do.call(c, result_list)
 77 |       return(result)
 78 |     }
 79 | 
 80 |   }else{ # i.e. if only one filename given
 81 |     return(
 82 |       read_ref(
 83 |         filename,
 84 |         tag_naming = tag_naming,
 85 |         return_df = return_df,
 86 |         verbose = verbose,
 87 |         locale = locale
 88 |       )
 89 |     )
 90 |   }
 91 | }
 92 | 
 93 | #' Internal function called by read_refs for each file
 94 | #'
 95 | #' @description This is the underlying workhorse function that imports
 96 | #' bibliographic files; primarily intended to be called from read_refs.
 97 | #' @param filename A path to a filename containing search results to import.
 98 | #' @param return_df If TRUE, returns a data.frame; if FALSE, returns a list.
 99 | #' @param verbose If TRUE, prints status updates.
100 | #' @return Returns a data.frame or list of assembled search results.
101 | #' @importFrom rlang abort
102 | #' @importFrom rlang warn
103 | #' @importFrom tibble tibble
104 | #' @importFrom vroom default_locale
105 | #' @importFrom vroom vroom_lines
106 | #' @noRd
107 | #' @keywords Internal
108 | read_ref <- function(
109 |   filename,
110 |   tag_naming = "best_guess",
111 |   return_df = TRUE,
112 |   verbose = FALSE,
113 |   locale = default_locale()
114 | ){
115 | 
116 |   # error checking for replace tags
117 |   valid_tags <- c("best_guess", "none", "wos", "scopus", "ovid", "asp", "synthesisr")
118 |   if(inherits(tag_naming, "character")){
119 |     if(!any(valid_tags == tag_naming)){
120 |       abort("tag_naming should be one of 'best_guess', 'none', 'wos', 'scopus', 'ovid',  'asp' or 'synthesisr'.")
121 |     }
122 |   }
123 |   if(inherits(tag_naming, "data.frame")){
124 |     if(any(!(c("code", "field") %in% colnames(tag_naming)))){
125 |       abort("if a data.frame is supplied to replace_tags, it must contain columns 'code' & 'field'.")
126 |     }
127 |   }
128 | 
129 |   if(verbose){cat(paste0("Reading file ", filename, " ... "))}
130 |   parse_function <- vroom_lines(filename,
131 |                                 n_max = 200,
132 |                                 locale = locale) |>
133 |                     detect_parser()
134 | 
135 |   df <- switch(parse_function,
136 |          "parse_ris" = {
137 |            parse_ris(x = vroom_lines(filename, locale = locale),
138 |                      tag_naming = tag_naming)
139 |          },
140 |          "parse_pubmed" = {
141 |            parse_pubmed(x = vroom_lines(filename, locale = locale))
142 |          },
143 |          "parse_bibtex" = {
144 |            parse_bibtex(x = vroom_lines(filename, locale = locale))
145 |          },
146 |          "parse_csv" = {
147 |            vroom(filename,
148 |                  delim =  ",",
149 |                  locale = locale) |>
150 |            match_columns()
151 |          },
152 |          "parse_tsv" = {
153 |            vroom(filename,
154 |                  delim =  "\t",
155 |                  locale = locale) |>
156 |            match_columns()
157 |          },
158 |          { # aka "unknown"
159 |            NULL
160 |          }
161 |         )
162 | 
163 |   if(is.null(df)){
164 |     warn(paste("file type not recognised for ", filename, " - skipping"))
165 |     return(NULL)
166 |   }
167 | 
168 |   # return object in correct format
169 |   # note: the `if` test here is needed because `csv` and `tsv` are already
170 |   # `data.frame`s, whereas all other formats return `bibliography`s
171 |   if(inherits(df, "data.frame")){
172 |     if(!return_df){df <- as.bibliography(df)}
173 |   }else{
174 |     if(return_df){df <- as.data.frame(df) |> tibble()}
175 |   }
176 |   if(inherits(df, "data.frame")){df <- clean_df(df)}
177 |   if(verbose){cat("done\n")}
178 |   return(df)
179 | }
180 | 


--------------------------------------------------------------------------------
/R/reexports.R:
--------------------------------------------------------------------------------
1 | #' @importFrom tibble as_tibble
2 | #' @export
3 | tibble::as_tibble
4 | 


--------------------------------------------------------------------------------
/R/string_functions.R:
--------------------------------------------------------------------------------
 1 | #' Calculate similarity between two strings
 2 | #' @description These functions each access a specific `"methods"` argument
 3 | #' provided by `stringdist`, and are provided for convenient calling by
 4 | #' `find_duplicates()`. They do not include any new functionality beyond that
 5 | #' given by `stringdist`, which you should use for your own analyses.
 6 | #' @param a A character vector of items to match to b.
 7 | #' @param b A character vector of items to match to a.
 8 | #' @return Returns a score of same length as b, giving the dissimilarity between
 9 | #' a and b.
10 | #' @importFrom stringdist stringdist
11 | #' @name string_
12 | #' @export
13 | string_osa <- function(a, b){stringdist(a, b, method = "osa")}
14 | 
15 | ## NOTE: This looks like poor coding practice. Consider deprecating.
16 | 
17 | #' @rdname string_
18 | #' @export
19 | string_lv <- function(a, b){stringdist(a, b, method = "lv")}
20 | 
21 | #' @rdname string_
22 | #' @export
23 | string_dl <- function(a, b){stringdist(a, b, method = "dl")}
24 | 
25 | #' @rdname string_
26 | #' @export
27 | string_hamming <- function(a, b){stringdist(a, b, method = "hamming")}
28 | 
29 | #' @rdname string_
30 | #' @export
31 | string_lcs <- function(a, b){stringdist(a, b, method = "lcs")}
32 | 
33 | #' @rdname string_
34 | #' @export
35 | string_qgram <- function(a, b){stringdist(a, b, method = "qgram")}
36 | 
37 | #' @rdname string_
38 | #' @export
39 | string_cosine <- function(a, b){stringdist(a, b, method = "cosine")}
40 | 
41 | #' @rdname string_
42 | #' @export
43 | string_jaccard <- function(a, b){stringdist(a, b, method = "jaccard")}
44 | 
45 | #' @rdname string_
46 | #' @export
47 | string_jw <- function(a, b){stringdist(a, b, method = "jw")}
48 | 
49 | #' @rdname string_
50 | #' @export
51 | string_soundex <- function(a, b){stringdist(a, b, method = "soundex")}
52 | 


--------------------------------------------------------------------------------
/R/synthesisr-package.R:
--------------------------------------------------------------------------------
 1 | #' synthesisr: Import, assemble, and deduplicate bibiliographic datasets
 2 | #'
 3 | #'  Systematic review searches include multiple databases
 4 | #'  that export results in a variety of formats with overlap in
 5 | #'  coverage between databases. To streamline the process of importing,
 6 | #'  assembling, and deduplicating results, `synthesisr` recognizes
 7 | #'  bibliographic files exported from databases commonly used for
 8 | #'  systematic reviews and merges results into a standardized format.
 9 | #'
10 | #' @section Import & Export:
11 | #' The key task performed by `synthesisr` is flexible import and
12 | #' presentation of bibliographic data. This is typically achieved by
13 | #' `read_refs()`, which can import multiple files at once and link them together
14 | #' into a single `data.frame`. Conversely, export is via `write_refs()`. Users
15 | #' that require more detailed control can use the following functions:
16 | #'
17 | #' \itemize{
18 | #'   \item [read_refs] Read bibliographic data
19 | #'   \item [write_refs] Write bibliographic data
20 | #'   \item [detect_] Detect file attributes
21 | #'   \item [parse_] Parse a vector containing bibliographic data
22 | #'   \item [clean_] Cleaning functions for author and column names
23 | #'   \item [code_lookup] A dataset of potential ris tags
24 | #' }
25 | #'
26 | #' @section Formatting:
27 | #' \itemize{
28 | #'   \item [bibliography-class] Methods for class `bibliography`
29 | #'   \item [format_citation] Return a clean citation from a `bibliography` or `data.frame`
30 | #'   \item [add_line_breaks] Set a maximum character width for strings
31 | #'}
32 | #'
33 | #' @section Deduplication:
34 | #' When importing from multiple databases, it is likely that there will be
35 | #' duplicates in the resulting dataset. The easiest way to deal with this
36 | #' problem in `synthesisr` is using the `deduplicate()` function; but this can
37 | #' be risky, particularly if there are no DOIs in the dataset. To get finer
38 | #' control of the deduplication process, consider using the sub-functions:
39 | #'
40 | #'\itemize{
41 | #'   \item [deduplicate] Semi-automated duplicate removal
42 | #'   \item [find_duplicates] Locate potentially duplicated references
43 | #'   \item [extract_unique_references] Return a data.frame with only 'unique' references
44 | #'   \item [review_duplicates] Manually review potential duplicates
45 | #'   \item [override_duplicates] Manually override identified duplicates
46 | #'   \item [fuzz_] Fuzzy string matching c/o `fuzzywuzzy`
47 | #'   \item [string_] Fuzzy string matching c/o `stringdist`
48 | #'}
49 | #'
50 | #' @section Deprecated:
51 | #'
52 | #' \itemize{
53 | #'   \item [merge_columns] Synonymous with [dplyr::bind_rows]
54 | #' }
55 | #' @name synthesisr-package
56 | #' @docType package
57 | "_PACKAGE"
58 | 


--------------------------------------------------------------------------------
/R/write_refs.R:
--------------------------------------------------------------------------------
  1 | #' Export data to a bibliographic format
  2 | #'
  3 | #' @description This function exports data.frames containing bibliographic
  4 | #' information to either a .ris or .bib file.
  5 | #' @param x Either a data.frame containing bibliographic information or an
  6 | #' object of class bibliography.
  7 | #' @param file filename to save to.
  8 | #' @param format What format should the data be exported as? Options are ris or
  9 | #' bib.
 10 | #' @param tag_naming what naming convention should be used to write RIS files?
 11 | #' See details for options.
 12 | #' @param write Logical should a file should be written? If FALSE returns a
 13 | #' `list`.
 14 | #' @return This function is typically called for it's side effect of writing a
 15 | #' file in the specified location and format. If \code{write} is FALSE, returns
 16 | #' a character vector containing bibliographic information in the specified
 17 | #' format.
 18 | #' @example inst/examples/parse_.R
 19 | #' @rdname write_refs
 20 | #' @importFrom rlang abort
 21 | #' @export
 22 | write_refs <- function(
 23 |     x,
 24 |     file,
 25 |     format = "ris",
 26 |     tag_naming = "synthesisr",
 27 |     write = TRUE
 28 | ){
 29 |   # check input data
 30 |   if(!inherits(x, c("bibliography", "data.frame"))) {
 31 |     abort("write_bibliography only accepts objects of class 'data.frame' or 'bibliography'")
 32 |   }
 33 |   if(inherits(x, "data.frame")){
 34 |     x <- x |>
 35 |       as.data.frame() |>
 36 |       as.bibliography()
 37 |   }
 38 | 
 39 |   if(missing(file) & (write == TRUE)){
 40 |     abort("`file` is missing, with no default")
 41 |   }
 42 | 
 43 |   # check format
 44 |   if(!(format %in% c("ris", "bib"))){
 45 |     abort("format must be either 'ris' or 'bib'")
 46 |   }
 47 | 
 48 |   # check output format - consistent with read_refs
 49 |   if(format == "ris"){
 50 |     valid_tags <- c("best_guess", "none", "wos", "scopus", "ovid", "asp", "synthesisr")
 51 |     if(inherits(tag_naming, "character")){
 52 |       if(!any(valid_tags == tag_naming)){
 53 |         abort("tag_naming should be one of 'best_guess', 'none', 'wos', 'scopus', 'ovid',  'asp' or 'synthesisr'.")
 54 |       }
 55 |     }else if(inherits(tag_naming, "data.frame")){
 56 |       if(any(!(c("code", "field") %in% colnames(tag_naming)))){
 57 |         abort("if a data.frame is supplied to replace_tags, it must contain columns 'code' & 'field'.")
 58 |       }
 59 |     }
 60 |   }
 61 | 
 62 |   # write result in correct format
 63 |   export <- switch(format,
 64 |                    "bib" = {write_bib(x)},
 65 |                    "ris" = {write_ris(x, tag_naming = tag_naming)}
 66 |   )
 67 |   names(export) <- NULL
 68 | 
 69 |   if(write) {
 70 |     write.table(
 71 |       export,
 72 |       check_filename(file),
 73 |       quote = FALSE,
 74 |       row.names = FALSE,
 75 |       col.names = FALSE
 76 |     )
 77 |   }else{
 78 |     invisible(return(export))
 79 |   }
 80 | }
 81 | 
 82 | #' Internal function to check file names
 83 | #' @noRd
 84 | #' @keywords Internal
 85 | check_filename <- function(x){
 86 |   # check file information
 87 |   if(length(x) > 1){
 88 |     abort("argument 'file' should be a length-1 character")
 89 |   }
 90 |   if(!inherits(x, "character")){
 91 |     abort("argument 'file' should be an object of class `character`")
 92 |   }
 93 |   if(grepl("\\.[[:alpha:]]{2,4}$", x)){
 94 |     filename <- x
 95 |   }else{
 96 |     filename <- paste(x, format, sep = ".")
 97 |   }
 98 |   filename
 99 | }
100 | 
101 | 
102 | # Parse an object of class bibliography for export in bib format
103 | #' @rdname write_refs
104 | #' @export
105 | write_bib <- function(x) {
106 |   # process basic text
107 |   result <- lapply(x, function(a) {
108 |     if (any(names(a) == "author")) {
109 |       a$author <- paste(a$author, collapse = " and ")
110 |     }
111 |     a <- lapply(a, function(b) {
112 |       # ensure only one entry per value
113 |       if (length(b) > 1) {
114 |         paste(b, collapse = "; ")
115 |       } else{
116 |         b
117 |       }
118 |     })
119 |     paste0(names(a), "={", a, "},") # format as text
120 |   })
121 | 
122 |   # add article identifier info
123 |   export <- unlist(
124 |     lapply(seq_len(length(result)),
125 |     function(a, source, entry_names) {
126 |       c(paste0("@ARTICLE{", entry_names[a], ","),
127 |         source[a],
128 |         "}",
129 |         "")
130 |     },
131 |     source = result,
132 |     entry_names = names(x)))
133 |   names(export) <- NULL
134 |   return(export)
135 | 
136 | }
137 | 
138 | 
139 | # Parse an object of class bibliography for export in ris format
140 | #' @rdname write_refs
141 | #' @export
142 | write_ris <- function(x,
143 |   tag_naming = "synthesisr"
144 | ){
145 |   result <- lapply(x, function(a, lookup) {
146 | 
147 |     # convert to tagged vector
148 |     b <- do.call(c, a)
149 |     b <- b[!is.na(b)]
150 |     b <- data.frame(
151 |       tag = c(names(b), "end"),
152 |       entry = c(b, ""),
153 |       stringsAsFactors = FALSE
154 |     )
155 |     rownames(b) <- NULL
156 |     b$tag <- gsub("[[:digit:]]", "", b$tag)
157 | 
158 |     # page information needs to be treated separately
159 |     if(any(b$tag == "pages")){
160 |       page_row <- which(b$tag == "pages")
161 |       page_text <- b$entry[page_row]
162 |       if(grepl("-", page_text)){
163 |         text_lookup <- list(
164 |           regexpr("^[[:digit:]]+", page_text),
165 |           regexpr("-[[:digit:]]+", page_text)
166 |         )
167 |         if(all(text_lookup > 0)){
168 |           text_cleaned <- unlist(lapply(
169 |             text_lookup,
170 |             function(b){substr(page_text, b, b + attr(b, "match.length") - 1)}
171 |           ))
172 |           new_rows <- data.frame(
173 |             tag = c("startpage", "endpage"),
174 |             entry = gsub("[[:punct:]]", "", text_cleaned),
175 |             stringsAsFactors = FALSE
176 |           )
177 |           b <- as.data.frame(rbind(
178 |             b[c(1:(page_row - 1)),],
179 |             new_rows,
180 |             b[c((page_row + 1):nrow(b)),]
181 |           ))
182 |         }
183 |       }
184 |     }
185 |     b$order <- seq_len(nrow(b))
186 | 
187 |     # substitute tags for ris format versions
188 |     b <- merge(
189 |       lookup,
190 |       b,
191 |       by.x = "field",
192 |       by.y = "tag",
193 |       all.x = FALSE,
194 |       all.y = FALSE
195 |     )
196 |     b <- b[order(b$order), c(2:3)]
197 | 
198 |     # concatenate rows, return a vector of strings
199 |     return(
200 |       c(paste(b$code, b$entry, sep = "  - "), "ER  - ", "")
201 |     )
202 | 
203 |   },
204 |   lookup = synthesisr::code_lookup[
205 |     synthesisr::code_lookup[, paste0("ris_", tag_naming)],
206 |     c("code", "field")
207 |   ]
208 |   )
209 | 
210 |   export <- do.call(c, result)
211 |   return(export)
212 | }
213 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 |   output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, echo = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>"
11 | )
12 | ```
13 | 
14 | <img src="man/figures/logo.png" align="left" style="margin: 0px 25px 0px 0px;" alt="" width="120"/><h2>Tools for bibliographic data</h2>
15 | 
16 | Metascientific analyses - such as systematic reviews and meta-analyses - commonly
17 | involve searches of multiple bibliographic databases. These databases use a 
18 | range of different data formats, and have differing degrees of overlap in the
19 | journals and articles that they index. To streamline the process of importing,
20 | assembling, and deduplicating results, `synthesisr` recognizes the file output
21 | of commonly used databases for systematic reviews and merges results into a
22 | tibble.
23 | 
24 | If you have questions, comments, feature requests, or find a bug, [please open an 
25 | issue](https://github.com/mjwestgate/synthesisr/issues).
26 | 
27 | ## Installation
28 | 
29 | `synthesisr` is available on CRAN:
30 | 
31 | ```{r}
32 | #| eval: false
33 | install.packages("synthesisr")
34 | ```
35 | 
36 | Alternatively you can install from GitHub:
37 | 
38 | ```{r}
39 | #| eval: FALSE
40 | remotes::install_github("mjwestgate/synthesisr")
41 | ```
42 | 
43 | ## Basic usage
44 | 
45 | The default function for importing bibliographic data is `read_refs()`:
46 | 
47 | ```{r}
48 | #| eval: false
49 | x <- read_refs("a_file.bib")
50 | ```
51 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | <img src="man/figures/logo.png" align="left" style="margin: 0px 25px 0px 0px;" alt="" width="120"/>
 4 | <h2>
 5 | Tools for bibliographic data
 6 | </h2>
 7 | 
 8 | Metascientific analyses - such as systematic reviews and meta-analyses -
 9 | commonly involve searches of multiple bibliographic databases. These
10 | databases use a range of different data formats, and have differing
11 | degrees of overlap in the journals and articles that they index. To
12 | streamline the process of importing, assembling, and deduplicating
13 | results, `synthesisr` recognizes the file output of commonly used
14 | databases for systematic reviews and merges results into a tibble.
15 | 
16 | If you have questions, comments, feature requests, or find a bug,
17 | [please open an issue](https://github.com/mjwestgate/synthesisr/issues).
18 | 
19 | ## Installation
20 | 
21 | `synthesisr` is available on CRAN:
22 | 
23 | ``` r
24 | install.packages("synthesisr")
25 | ```
26 | 
27 | Alternatively you can install from GitHub:
28 | 
29 | ``` r
30 | remotes::install_github("mjwestgate/synthesisr")
31 | ```
32 | 
33 | ## Basic usage
34 | 
35 | The default function for importing bibliographic data is `read_refs()`:
36 | 
37 | ``` r
38 | x <- read_refs("a_file.bib")
39 | ```
40 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://martinwestgate.com/synthesisr/
 2 | template:
 3 |   bootstrap: 5
 4 |   bslib:
 5 |     code_font: {google: "Source Code Pro"}
 6 |     pkgdown-nav-height: 130px
 7 |   params:
 8 |     bootswatch: litera
 9 | development:
10 |   mode: auto
11 | navbar:
12 |   structure:
13 |     left:
14 |     - home
15 |     - articles
16 |     - reference
17 |     right:
18 |     - search
19 |     - news
20 |     - github
21 |   components:
22 |     articles:
23 |       text: Articles
24 |       menu:
25 |       - text: Overview
26 |         href: articles/overview.html
27 |     news:
28 |       text: News
29 |       href: news/index.html
30 | reference:
31 | - title: Overview
32 |   contents:
33 |   - synthesisr-package
34 | - title: Import & Export
35 |   contents:
36 |   - read_refs
37 |   - write_refs
38 |   - detect_
39 |   - parse_
40 |   - clean_
41 |   - code_lookup
42 | - title: Formatting
43 |   contents:
44 |   - bibliography-class
45 |   - format_citation
46 |   - add_line_breaks
47 | - title: Deduplication
48 |   contents:
49 |   - deduplicate
50 |   - find_duplicates
51 |   - extract_unique_references
52 |   - review_duplicates
53 |   - override_duplicates
54 |   - fuzz_
55 |   - string_
56 | - title: Deprecated
57 |   contents:
58 |   - merge_columns
59 | 


--------------------------------------------------------------------------------
/data/code_lookup.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/data/code_lookup.RData


--------------------------------------------------------------------------------
/inst/examples/clean_.R:
--------------------------------------------------------------------------------
 1 | df <-  data.frame(
 2 |   X..title. = c(
 3 |     "EviAtlas: a tool for visualising evidence synthesis databases",
 4 |     "revtools: An R package to support article screening for evidence synthesis",
 5 |     "An automated approach to identifying search terms for systematic reviews",
 6 |     "Reproducible, flexible and high-throughput data extraction from primary literature"),
 7 |   YEAR = c("2019", "2019", "2019", "2019"),
 8 |   authors = c(
 9 |     "Haddaway et al",
10 |     "Westgate",
11 |     "EM Grames AND AN Stillman  & MW Tingley and CS Elphick",
12 |     "Pick et al")
13 | )
14 | 
15 | clean_df(df)
16 | 
17 | # or use sub-functions
18 | colnames(df) <- clean_colnames(df)
19 | # colnames(df) <- clean_colnames(colnames(df)) # also works
20 | df$author <- clean_authors(df$author)
21 | 
22 | 


--------------------------------------------------------------------------------
/inst/examples/deduplicate.R:
--------------------------------------------------------------------------------
 1 | my_df <-  data.frame(
 2 |   title = c(
 3 |     "EviAtlas: a tool for visualising evidence synthesis databases",
 4 |     "revtools: An R package to support article screening for evidence synthesis",
 5 |     "An automated approach to identifying search terms for systematic reviews",
 6 |     "Reproducible, flexible and high-throughput data extraction from primary literature",
 7 |     "eviatlas:tool for visualizing evidence synthesis databases.",
 8 |     "REVTOOLS a package to support article-screening for evidence synthsis"
 9 |   ),
10 |   year = c("2019", "2019", "2019", "2019", NA, NA),
11 |   authors = c("Haddaway et al", "Westgate",
12 |               "Grames et al", "Pick et al", NA, NA),
13 |   stringsAsFactors = FALSE
14 | )
15 | 
16 | # run deduplication
17 | dups <- find_duplicates(
18 |   my_df$title,
19 |   method = "string_osa",
20 |   rm_punctuation = TRUE,
21 |   to_lower = TRUE
22 | )
23 | 
24 | extract_unique_references(my_df, matches = dups)
25 | 
26 | # or, in one line:
27 | deduplicate(my_df, "title",
28 |   method = "string_osa",
29 |   rm_punctuation = TRUE,
30 |   to_lower = TRUE)
31 | 


--------------------------------------------------------------------------------
/inst/examples/detect_.R:
--------------------------------------------------------------------------------
 1 | revtools <- c(
 2 |   "",
 3 |   "PMID- 31355546",
 4 |   "VI  - 10",
 5 |   "IP  - 4",
 6 |   "DP  - 2019 Dec",
 7 |   "TI  - revtools: An R package to support article
 8 |          screening for evidence synthesis.",
 9 |   "PG  - 606-614",
10 |   "LID - 10.1002/jrsm.1374 [doi]",
11 |   "AU  - Westgate MJ",
12 |   "LA  - eng",
13 |   "PT  - Journal Article",
14 |   "JT  - Research Synthesis Methods",
15 |   ""
16 | )
17 | 
18 | # detect basic attributes of ris files
19 | detect_parser(revtools)
20 | detect_delimiter(revtools)
21 | 
22 | # determine which tag format to use
23 | tags <- trimws(unlist(lapply(
24 |   strsplit(revtools, "- "),
25 |   function(a){a[1]}
26 | )))
27 | pubmed_tag_list <- detect_lookup(tags[!is.na(tags)])
28 | 
29 | # find year data in other columns
30 | df <- as.data.frame(parse_pubmed(revtools))
31 | df$year <- detect_year(df)
32 | 


--------------------------------------------------------------------------------
/inst/examples/format_citation.R:
--------------------------------------------------------------------------------
 1 | roses <- c("@article{haddaway2018,
 2 |   title={ROSES RepOrting standards for Systematic Evidence Syntheses:
 3 |   pro forma, flow-diagram and descriptive summary of the plan and
 4 |   conduct of environmental systematic reviews and systematic maps},
 5 |   author={Haddaway, Neal R and Macura, Biljana and Whaley, Paul and Pullin, Andrew S},
 6 |   journal={Environmental Evidence},
 7 |   volume={7},
 8 |   number={1},
 9 |   pages={7},
10 |   year={2018},
11 |   publisher={Springer}
12 | }")
13 | 
14 | tmp <- tempfile()
15 | writeLines(roses, tmp)
16 | 
17 | citation <- read_ref(tmp)
18 | format_citation(citation)
19 | 


--------------------------------------------------------------------------------
/inst/examples/fuzzdist.R:
--------------------------------------------------------------------------------
1 | fuzzdist("On the Origin of Species",
2 |          "Of the Original Specs",
3 |          method = "fuzz_m_ratio")
4 | 


--------------------------------------------------------------------------------
/inst/examples/merge_columns.R:
--------------------------------------------------------------------------------
 1 | df_1 <-  data.frame(
 2 |   title = c(
 3 |     "EviAtlas: a tool for visualising evidence synthesis databases",
 4 |     "revtools: An R package to support article screening for evidence synthesis"
 5 |   ),
 6 |   year = c("2019", "2019")
 7 | )
 8 | 
 9 | df_2 <-  data.frame(
10 |   title = c(
11 |     "An automated approach to identifying search terms for systematic reviews",
12 |     "Reproducible, flexible and high-throughput data extraction from primary literature"
13 |   ),
14 |   authors = c("Grames et al", "Pick et al")
15 | )
16 | 
17 | merge_columns(df_1, df_2)
18 | 


--------------------------------------------------------------------------------
/inst/examples/parse_.R:
--------------------------------------------------------------------------------
 1 | eviatlas <- c(
 2 |   "TY  - JOUR",
 3 |   "AU  - Haddaway, Neal R.",
 4 |   "AU  - Feierman, Andrew",
 5 |   "AU  - Grainger, Matthew J.",
 6 |   "AU  - Gray, Charles T.",
 7 |   "AU  - Tanriver-Ayder, Ezgi",
 8 |   "AU  - Dhaubanjar, Sanita",
 9 |   "AU  - Westgate, Martin J.",
10 |   "PY  - 2019",
11 |   "DA  - 2019/06/04",
12 |   "TI  - EviAtlas: a tool for visualising evidence synthesis databases",
13 |   "JO  - Environmental Evidence",
14 |   "SP  - 22",
15 |   "VL  - 8",
16 |   "IS  - 1",
17 |   "SN  - 2047-2382",
18 |   "UR  - https://doi.org/10.1186/s13750-019-0167-1",
19 |   "DO  - 10.1186/s13750-019-0167-1",
20 |   "ID  - Haddaway2019",
21 |   "ER  - "
22 | )
23 | 
24 | detect_parser(eviatlas) # = "parse_ris"
25 | df <- as.data.frame(parse_ris(eviatlas))
26 | ris_out <- write_refs(df, format = "ris", file = FALSE)
27 | 


--------------------------------------------------------------------------------
/inst/examples/read_refs.R:
--------------------------------------------------------------------------------
 1 | litsearchr <- c(
 2 |   "@article{grames2019,
 3 |   title={An automated approach to identifying search terms for
 4 |   systematic reviews using keyword co-occurrence networks},
 5 |   author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S},
 6 |   journal={Methods in Ecology and Evolution},
 7 |   volume={10},
 8 |   number={10},
 9 |   pages={1645--1654},
10 |   year={2019},
11 |   publisher={Wiley Online Library}
12 | }"
13 | )
14 | 
15 | tmp <- tempfile()
16 | 
17 | writeLines(litsearchr, tmp)
18 | 
19 | df <- read_refs(tmp, return_df = TRUE, verbose = TRUE)
20 | 


--------------------------------------------------------------------------------
/inst/hex/Space_Mono/OFL.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2016 Google Inc. All Rights Reserved.
 2 | 
 3 | This Font Software is licensed under the SIL Open Font License, Version 1.1.
 4 | This license is copied below, and is also available with a FAQ at:
 5 | http://scripts.sil.org/OFL
 6 | 
 7 | 
 8 | -----------------------------------------------------------
 9 | SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
10 | -----------------------------------------------------------
11 | 
12 | PREAMBLE
13 | The goals of the Open Font License (OFL) are to stimulate worldwide
14 | development of collaborative font projects, to support the font creation
15 | efforts of academic and linguistic communities, and to provide a free and
16 | open framework in which fonts may be shared and improved in partnership
17 | with others.
18 | 
19 | The OFL allows the licensed fonts to be used, studied, modified and
20 | redistributed freely as long as they are not sold by themselves. The
21 | fonts, including any derivative works, can be bundled, embedded, 
22 | redistributed and/or sold with any software provided that any reserved
23 | names are not used by derivative works. The fonts and derivatives,
24 | however, cannot be released under any other type of license. The
25 | requirement for fonts to remain under this license does not apply
26 | to any document created using the fonts or their derivatives.
27 | 
28 | DEFINITIONS
29 | "Font Software" refers to the set of files released by the Copyright
30 | Holder(s) under this license and clearly marked as such. This may
31 | include source files, build scripts and documentation.
32 | 
33 | "Reserved Font Name" refers to any names specified as such after the
34 | copyright statement(s).
35 | 
36 | "Original Version" refers to the collection of Font Software components as
37 | distributed by the Copyright Holder(s).
38 | 
39 | "Modified Version" refers to any derivative made by adding to, deleting,
40 | or substituting -- in part or in whole -- any of the components of the
41 | Original Version, by changing formats or by porting the Font Software to a
42 | new environment.
43 | 
44 | "Author" refers to any designer, engineer, programmer, technical
45 | writer or other person who contributed to the Font Software.
46 | 
47 | PERMISSION & CONDITIONS
48 | Permission is hereby granted, free of charge, to any person obtaining
49 | a copy of the Font Software, to use, study, copy, merge, embed, modify,
50 | redistribute, and sell modified and unmodified copies of the Font
51 | Software, subject to the following conditions:
52 | 
53 | 1) Neither the Font Software nor any of its individual components,
54 | in Original or Modified Versions, may be sold by itself.
55 | 
56 | 2) Original or Modified Versions of the Font Software may be bundled,
57 | redistributed and/or sold with any software, provided that each copy
58 | contains the above copyright notice and this license. These can be
59 | included either as stand-alone text files, human-readable headers or
60 | in the appropriate machine-readable metadata fields within text or
61 | binary files as long as those fields can be easily viewed by the user.
62 | 
63 | 3) No Modified Version of the Font Software may use the Reserved Font
64 | Name(s) unless explicit written permission is granted by the corresponding
65 | Copyright Holder. This restriction only applies to the primary font name as
66 | presented to the users.
67 | 
68 | 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
69 | Software shall not be used to promote, endorse or advertise any
70 | Modified Version, except to acknowledge the contribution(s) of the
71 | Copyright Holder(s) and the Author(s) or with their explicit written
72 | permission.
73 | 
74 | 5) The Font Software, modified or unmodified, in part or in whole,
75 | must be distributed entirely under this license, and must not be
76 | distributed under any other license. The requirement for fonts to
77 | remain under this license does not apply to any document created
78 | using the Font Software.
79 | 
80 | TERMINATION
81 | This license becomes null and void if any of the above conditions are
82 | not met.
83 | 
84 | DISCLAIMER
85 | THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
87 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
88 | OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
89 | COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
90 | INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
91 | DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
92 | FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
93 | OTHER DEALINGS IN THE FONT SOFTWARE.
94 | 


--------------------------------------------------------------------------------
/inst/hex/Space_Mono/SpaceMono-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/inst/hex/Space_Mono/SpaceMono-Bold.ttf


--------------------------------------------------------------------------------
/inst/hex/Space_Mono/SpaceMono-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/inst/hex/Space_Mono/SpaceMono-Regular.ttf


--------------------------------------------------------------------------------
/inst/hex/hex.R:
--------------------------------------------------------------------------------
  1 | # draw a hex sticker for `synthesisr`
  2 | library(tibble)
  3 | library(dplyr)
  4 | library(string2path)
  5 | library(showtext)
  6 | library(sf)
  7 | library(ggplot2)
  8 | library(hexSticker)
  9 | # library(viridis)
 10 | # remotes::install_github("johannesbjork/LaCroixColoR")
 11 | # library(LaCroixColoR)
 12 | 
 13 | # get 'synthesisr' text as a polygon
 14 | final_size <- 1.4
 15 | synth_line <- string2path("synthesisr",
 16 |                           font = "inst/hex/Space_Mono/SpaceMono-Bold.ttf") |>
 17 |   tibble::rowid_to_column() |>
 18 |   tibble() |>
 19 |   mutate(x = x - min(x), y = y - min(y)) |> # place both mins at 0
 20 |   mutate(y = y / max(x), x = x / max(x)) |> # now at x = c(0, 1)
 21 |   mutate(x = (x * final_size) - (final_size * 0.5), y = y * final_size) |> # scale to required size
 22 |   mutate(y = y - (max(y) * 0.5)) # centre vertically
 23 | 
 24 | # convert to `sf` object to allow calculation of spatial properties
 25 | text_polygons <- synth_line %>%
 26 |   st_as_sf(coords = c("x", "y")) |>
 27 |   group_by(path_id) |>
 28 |   summarise(geometry = st_combine(geometry)) |>
 29 |   st_cast("POLYGON")
 30 | 
 31 | # need to clip 6 ('e') with 7 (inside of 'e')
 32 | text_cutouts <- text_polygons[7, ]
 33 | text_polygons <- text_polygons[-7, ]
 34 | words <- st_difference(text_polygons, text_cutouts)
 35 | 
 36 | # clean up
 37 | rm(final_size, synth_line, text_polygons, text_cutouts)
 38 | 
 39 | # now create hexagons
 40 | # from hexSticker, but using sf objects
 41 | create_hexagon <- function(scale = 1){
 42 |   hexd <- data.frame(x = 1+c(rep(-sqrt(3)/2, 2), 0, rep(sqrt(3)/2, 2), 0),
 43 |                      y = 1+c(0.5, -0.5, -1, -0.5, 0.5, 1))
 44 |   rbind(hexd, hexd[1, ]) |>
 45 |     tibble() |>
 46 |     mutate(x = (x - 1) * scale,
 47 |            y = (y - 1) * scale) |>
 48 |     st_as_sf(coords = c("x", "y")) |>
 49 |     summarise(geometry = st_combine(geometry)) %>%
 50 |     st_cast("POLYGON")
 51 | }
 52 | 
 53 | external_hexagon <- create_hexagon(scale = 1.00)
 54 | internal_hexagon <- create_hexagon(scale = 0.935)
 55 | 
 56 | # now create vertical lines that intersect with words
 57 | x_vec <- seq(-0.87, 0.87, by = 0.005)
 58 | result_internal <- lapply(x_vec, function(a){
 59 |   b <- data.frame(x = a, y = c(-1, 1)) |>
 60 |     st_as_sf(coords = c("x", "y")) |>
 61 |     summarise(geometry = st_combine(geometry)) |>
 62 |     st_cast("LINESTRING") |>
 63 |     st_intersection(words)
 64 | 
 65 |   tibble(x = a, length = sum(st_length(b)))
 66 | }) |>
 67 |   bind_rows()
 68 | 
 69 | result_external <- lapply(x_vec, function(a){
 70 |   b <- data.frame(x = a, y = c(-1, 1)) |>
 71 |     st_as_sf(coords = c("x", "y")) |>
 72 |     summarise(geometry = st_combine(geometry)) |>
 73 |     st_cast("LINESTRING") |>
 74 |     st_intersection(internal_hexagon)
 75 | 
 76 |   b |> mutate(x = a)
 77 | }) |>
 78 |   bind_rows()
 79 | 
 80 | # merge
 81 | background_lines <- left_join(result_external,
 82 |                               result_internal,
 83 |                               by = "x")
 84 | 
 85 | # clean up
 86 | rm(x_vec)
 87 | 
 88 | # draw
 89 | font_add("spacemono", "inst/hex/Space_Mono/SpaceMono-Regular.ttf")
 90 | showtext_auto()
 91 | 
 92 | edge_color <- "#000000" # "#b951c9"
 93 | # palette <- lacroix_palette("CranRaspberry", n = 15, type = "continuous") |>
 94 | #   as.character()
 95 | 
 96 | # example colors:
 97 | # x <- lacroix_palette("CranRaspberry", n = 7, type = "continuous") |> as.character()
 98 | simple_palette <- c("#c92029",
 99 |                     "#a3086a",
100 |                     "#6c159e",
101 |                     "#0a238a")
102 | 
103 | p <- ggplot() +
104 |   geom_sf(data = external_hexagon, fill = "white", color = NA) +
105 |   geom_sf(data = background_lines,
106 |           mapping = aes(
107 |             color = x,
108 |             alpha = (length ^ 1.2)),
109 |           linewidth = 0.3) +
110 |   geom_sf(data = internal_hexagon, fill = NA, color = edge_color, linewidth = 0.1) +
111 |   geom_sf(data = words, fill = "white", color = edge_color, linewidth = 0.1) +
112 |   annotate(geom = "text",
113 |            x = 0.7,
114 |            y = -0.17,
115 |            label = "mjwestgate",
116 |            family = "spacemono",
117 |            size = 8,
118 |            hjust = 1,
119 |            color = "#ffffff") +
120 |   # geom_vline(xintercept = 0.35) +
121 |   scale_colour_gradientn(colors = simple_palette) +
122 |   # scale_color_viridis(option = "H") +
123 |   scale_alpha(range = c(0.5, 1)) +
124 |   # scale_color_gradient(low = "#800194", high = "#b951c9") +
125 |   theme_void() +
126 |   theme(legend.position = "none")
127 | 
128 | ggsave("man/figures/logo.png",
129 |        p,
130 |        width = 43.9,
131 |        height = 50.8,
132 |        units = "mm",
133 |        bg = "transparent",
134 |        dpi = 600)
135 | 


--------------------------------------------------------------------------------
/inst/test-data/test_files.R:
--------------------------------------------------------------------------------
  1 | eviatlas <- c(
  2 |   "TY  - JOUR",
  3 |   "AU  - Haddaway, Neal R.",
  4 |   "AU  - Feierman, Andrew",
  5 |   "AU  - Grainger, Matthew J.",
  6 |   "AU  - Gray, Charles T.",
  7 |   "AU  - Tanriver-Ayder, Ezgi",
  8 |   "AU  - Dhaubanjar, Sanita",
  9 |   "AU  - Westgate, Martin J.",
 10 |   "PY  - 2019",
 11 |   "DA  - 2019/06/04",
 12 |   "TI  - EviAtlas: a tool for visualising evidence synthesis databases",
 13 |   "JO  - Environmental Evidence",
 14 |   "SP  - 22",
 15 |   "VL  - 8",
 16 |   "IS  - 1",
 17 |   "SN  - 2047-2382",
 18 |   "UR  - https://doi.org/10.1186/s13750-019-0167-1",
 19 |   "DO  - 10.1186/s13750-019-0167-1",
 20 |   "ID  - Haddaway2019",
 21 |   "ER  - "
 22 | )
 23 | 
 24 | litsearchr <- c(
 25 |   "@article{grames2019automated,
 26 |   title={An automated approach to identifying search terms for systematic reviews using keyword co-occurrence networks},
 27 |   author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S},
 28 |   journal={Methods in Ecology and Evolution},
 29 |   volume={10},
 30 |   number={10},
 31 |   pages={1645--1654},
 32 |   year={2019},
 33 |   publisher={Wiley Online Library}
 34 | }"
 35 | )
 36 | 
 37 | res_synth_methods <-
 38 |   c(
 39 |     "",
 40 |     "PMID- 32336025",
 41 |     "OWN - NLM",
 42 |     "STAT- Publisher",
 43 |     "LR  - 20200426",
 44 |     "IS  - 1759-2887 (Electronic)",
 45 |     "IS  - 1759-2879 (Linking)",
 46 |     "DP  - 2020 Apr 26",
 47 |     "TI  - Risk-Of-Bias VISualization (robvis): an R package and Shiny web app for",
 48 |     "      visualizing risk-of-bias assessments.",
 49 |     "LID - 10.1002/jrsm.1411 [doi]",
 50 |     "AB  - Despite a major increase in the range and number of software offerings now",
 51 |     "      available to help researchers produce evidence syntheses, there is currently no",
 52 |     "      generic tool for producing figures to display and explore the risk-of-bias",
 53 |     "      assessments that routinely take place as part of systematic review. However,",
 54 |     "      tools such as the R programming environment and Shiny (an R package for building ",
 55 |     "      interactive web apps) have made it straightforward to produce new tools to help",
 56 |     "      in producing evidence syntheses. We present a new tool, robvis (Risk-Of-Bias",
 57 |     "      VISualization), available as an R package and web app, which facilitates rapid",
 58 |     "      production of publication-quality risk-of-bias assessment figures. We present a",
 59 |     "      timeline of the tool's development and its key functionality. This article is",
 60 |     "      protected by copyright. All rights reserved.",
 61 |     "CI  - This article is protected by copyright. All rights reserved.",
 62 |     "FAU - McGuinness, Luke A",
 63 |     "AU  - McGuinness LA",
 64 |     "AUID- ORCID: https://orcid.org/0000-0001-8730-9761",
 65 |     "AD  - MRC Integrative Epidemiology Unit at the University of Bristol, Bristol, UK.",
 66 |     "AD  - Population Health Sciences, Bristol Medical School, University of Bristol,",
 67 |     "      Bristol, UK.",
 68 |     "FAU - Higgins, Julian Pt",
 69 |     "AU  - Higgins JP",
 70 |     "AD  - MRC Integrative Epidemiology Unit at the University of Bristol, Bristol, UK.",
 71 |     "AD  - Population Health Sciences, Bristol Medical School, University of Bristol,",
 72 |     "      Bristol, UK.",
 73 |     "LA  - eng",
 74 |     "PT  - Journal Article",
 75 |     "DEP - 20200426",
 76 |     "PL  - England",
 77 |     "TA  - Res Synth Methods",
 78 |     "JT  - Research synthesis methods",
 79 |     "JID - 101543738",
 80 |     "SB  - IM",
 81 |     "OTO - NOTNLM",
 82 |     "OT  - Data visualization",
 83 |     "OT  - Evidence synthesis",
 84 |     "OT  - R",
 85 |     "OT  - Risk of bias",
 86 |     "EDAT- 2020/04/27 06:00",
 87 |     "MHDA- 2020/04/27 06:00",
 88 |     "CRDT- 2020/04/27 06:00",
 89 |     "PHST- 2020/02/27 00:00 [received]",
 90 |     "PHST- 2020/04/16 00:00 [revised]",
 91 |     "PHST- 2020/04/18 00:00 [accepted]",
 92 |     "PHST- 2020/04/27 06:00 [entrez]",
 93 |     "PHST- 2020/04/27 06:00 [pubmed]",
 94 |     "PHST- 2020/04/27 06:00 [medline]",
 95 |     "AID - 10.1002/jrsm.1411 [doi]",
 96 |     "PST - aheadofprint",
 97 |     "SO  - Res Synth Methods. 2020 Apr 26. doi: 10.1002/jrsm.1411.",
 98 |     "",
 99 |     "PMID- 31355546",
100 |     "OWN - NLM",
101 |     "STAT- In-Process",
102 |     "LR  - 20200226",
103 |     "IS  - 1759-2887 (Electronic)",
104 |     "IS  - 1759-2879 (Linking)",
105 |     "VI  - 10",
106 |     "IP  - 4",
107 |     "DP  - 2019 Dec",
108 |     "TI  - revtools: An R package to support article screening for evidence synthesis.",
109 |     "PG  - 606-614",
110 |     "LID - 10.1002/jrsm.1374 [doi]",
111 |     "AB  - The field of evidence synthesis is growing rapidly, with a corresponding increase",
112 |     "      in the number of software tools and workflows to support the construction of",
113 |     "      systematic reviews, systematic maps, and meta-analyses. Despite much progress,",
114 |     "      however, a number of problems remain, including slow integration of new",
115 |     "      statistical or methodological approaches into user-friendly software, low",
116 |     "      prevalence of open-source software, and poor integration among distinct software ",
117 |     "      tools. These issues hinder the utility and transparency of new methods to the",
118 |     "      research community. Here, I present revtools, an R package to support article",
119 |     "      screening during evidence synthesis projects. It provides tools for the import",
120 |     "      and deduplication of bibliographic data, screening of articles by title or",
121 |     "      abstract, and visualization of article content using topic models. The software",
122 |     "      is entirely open-source and combines command-line scripting for experienced",
123 |     "      programmers with custom-built user interfaces for casual users, with further",
124 |     "      methods to support article screening to be added over time. revtools provides",
125 |     "      free access to novel methods in an open-source environment and represents a",
126 |     "      valuable step in expanding the capacity of R to support evidence synthesis",
127 |     "      projects.",
128 |     "CI  - (c) 2019 John Wiley & Sons, Ltd.",
129 |     "FAU - Westgate, Martin J",
130 |     "AU  - Westgate MJ",
131 |     "AUID- ORCID: https://orcid.org/0000-0003-0854-2034",
132 |     "AD  - Fenner School of Environment & Society, The Australian National University,",
133 |     "      Acton, ACT, Australia.",
134 |     "LA  - eng",
135 |     "PT  - Journal Article",
136 |     "DEP - 20191018",
137 |     "PL  - England",
138 |     "TA  - Res Synth Methods",
139 |     "JT  - Research synthesis methods",
140 |     "JID - 101543738",
141 |     "SB  - IM",
142 |     "OTO - NOTNLM",
143 |     "OT  - data visualization",
144 |     "OT  - meta-analysis",
145 |     "OT  - natural language processing",
146 |     "OT  - systematic review",
147 |     "OT  - topic models",
148 |     "EDAT- 2019/07/30 06:00",
149 |     "MHDA- 2019/07/30 06:00",
150 |     "CRDT- 2019/07/30 06:00",
151 |     "PHST- 2019/02/25 00:00 [received]",
152 |     "PHST- 2019/06/12 00:00 [revised]",
153 |     "PHST- 2019/07/23 00:00 [accepted]",
154 |     "PHST- 2019/07/30 06:00 [pubmed]",
155 |     "PHST- 2019/07/30 06:00 [medline]",
156 |     "PHST- 2019/07/30 06:00 [entrez]",
157 |     "AID - 10.1002/jrsm.1374 [doi]",
158 |     "PST - ppublish",
159 |     "SO  - Res Synth Methods. 2019 Dec;10(4):606-614. doi: 10.1002/jrsm.1374. Epub 2019 Oct ",
160 |     "      18."
161 |   )
162 | 
163 | writeLines(eviatlas, "tests/testthat/testdata/eviatlas.txt")
164 | writeLines(litsearchr, "tests/testthat/testdata/litsearchr.txt")
165 | writeLines(res_synth_methods, "tests/testthat/testdata/res_synth_methods.txt")
166 | 


--------------------------------------------------------------------------------
/man/add_line_breaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_line_breaks.R
 3 | \name{add_line_breaks}
 4 | \alias{add_line_breaks}
 5 | \title{Add line breaks to one or more strings}
 6 | \usage{
 7 | add_line_breaks(x, n = 50, max_n = NULL, html = FALSE, max_time = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{Either a string or a vector; if the vector is not of class character
11 | if will be coerced to one using \code{as.character()}.}
12 | 
13 | \item{n}{Numeric: The desired number of characters that should separate
14 | consecutive line breaks.}
15 | 
16 | \item{max_n}{DEPRECATED: If provided will currently overwrite \code{n}; otherwise
17 | synonymous with \code{n} and will be removed from future versions.}
18 | 
19 | \item{html}{Logical: Should the line breaks be specified in html?}
20 | 
21 | \item{max_time}{DEPRECATED: Previously the maximum amount of time (in
22 | seconds) allowed to adjust groups until character thresholds are reached.
23 | Ignored.}
24 | }
25 | \value{
26 | Returns the input vector unaltered except for the addition of line
27 | breaks.
28 | }
29 | \description{
30 | This function takes a vector of strings and adds line breaks
31 | every n characters. Primarily built to be called internally by
32 | \code{format_citation()}, this function has been made available as it can be
33 | useful in other contexts.
34 | }
35 | \details{
36 | Line breaks are only added between words, so the value of n is
37 | actually a threshold value rather than being matched exactly.
38 | }
39 | \examples{
40 | add_line_breaks(c("On the Origin of Species"), n = 10)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/bibliography-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bibliography_functions.R
 3 | \name{bibliography-class}
 4 | \alias{bibliography-class}
 5 | \alias{summary.bibliography}
 6 | \alias{summary.bibliography,}
 7 | \alias{print.bibliography,}
 8 | \alias{c.bibliography,}
 9 | \alias{as.data.frame.bibliography}
10 | \alias{print.bibliography}
11 | \alias{[.bibliography}
12 | \alias{c.bibliography}
13 | \alias{as.bibliography}
14 | \alias{as_tibble.bibliography}
15 | \title{bibliography-class}
16 | \usage{
17 | \method{summary}{bibliography}(object, ...)
18 | 
19 | \method{print}{bibliography}(x, n, ...)
20 | 
21 | \method{[}{bibliography}(x, n)
22 | 
23 | \method{c}{bibliography}(...)
24 | 
25 | \method{as.data.frame}{bibliography}(x, ...)
26 | 
27 | as.bibliography(x, ...)
28 | 
29 | \method{as_tibble}{bibliography}(x, ..., .rows, .name_repair, rownames)
30 | }
31 | \arguments{
32 | \item{object}{An object of class 'bibliography'}
33 | 
34 | \item{...}{Any further information}
35 | 
36 | \item{x}{An object of class 'bibliography'}
37 | 
38 | \item{n}{Number of items to select/print}
39 | 
40 | \item{.rows}{currently ignored}
41 | 
42 | \item{.name_repair}{currently ignored}
43 | 
44 | \item{rownames}{currently ignored}
45 | }
46 | \description{
47 | This is a small number of standard methods for interacting with class 'bibliography'. More may be added later.
48 | }
49 | \details{
50 | Methods for class bibliography
51 | }
52 | 


--------------------------------------------------------------------------------
/man/clean_.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean_functions.R
 3 | \name{clean_}
 4 | \alias{clean_}
 5 | \alias{clean_df}
 6 | \alias{clean_authors}
 7 | \alias{clean_colnames}
 8 | \title{Clean a \code{tibble} or vector}
 9 | \usage{
10 | clean_df(data)
11 | 
12 | clean_authors(x)
13 | 
14 | clean_colnames(x)
15 | }
16 | \arguments{
17 | \item{data}{A \code{tibble} with bibliographic information.}
18 | 
19 | \item{x}{A vector of strings}
20 | }
21 | \value{
22 | Returns the input, but cleaner.
23 | }
24 | \description{
25 | Cleans column and author names
26 | }
27 | \examples{
28 | df <-  data.frame(
29 |   X..title. = c(
30 |     "EviAtlas: a tool for visualising evidence synthesis databases",
31 |     "revtools: An R package to support article screening for evidence synthesis",
32 |     "An automated approach to identifying search terms for systematic reviews",
33 |     "Reproducible, flexible and high-throughput data extraction from primary literature"),
34 |   YEAR = c("2019", "2019", "2019", "2019"),
35 |   authors = c(
36 |     "Haddaway et al",
37 |     "Westgate",
38 |     "EM Grames AND AN Stillman  & MW Tingley and CS Elphick",
39 |     "Pick et al")
40 | )
41 | 
42 | clean_df(df)
43 | 
44 | # or use sub-functions
45 | colnames(df) <- clean_colnames(df)
46 | # colnames(df) <- clean_colnames(colnames(df)) # also works
47 | df$author <- clean_authors(df$author)
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/man/code_lookup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/code_lookup.R
 3 | \docType{data}
 4 | \name{code_lookup}
 5 | \alias{code_lookup}
 6 | \title{Bibliographic code lookup for search results assembly}
 7 | \format{
 8 | A \code{data.frame} with 226 obs of 12 variables
 9 | 
10 | \describe{
11 | \item{code}{code used in search results}
12 | \item{order}{the order in which to rank fields in assembled results}
13 | \item{category_description}{type of bibliographic data}
14 | \item{entry_description}{description of field}
15 | \item{field}{bibliographic field that codes correspond to}
16 | \item{ris_generic}{logical: If the code is used in generic ris files}
17 | \item{ris_wos}{logical: If the code is used in Web of Science ris files}
18 | \item{ris_pubmed}{logical: If the code is used in PubMed ris files}
19 | \item{ris_scopus}{logical: If the code is used in Scopus ris files}
20 | \item{ris_asp}{logical: If the code is used in Academic Search Premier ris files}
21 | \item{ris_ovid}{logical: If the code is used in Ovid ris files}
22 | \item{ris_synthesisr}{logical: If the code used in synthesisr imports & exports}}
23 | }
24 | \usage{
25 | code_lookup
26 | }
27 | \description{
28 | A data frame that can be used to look up common codes for different
29 | bibliographic fields across databases and merge them to a common format.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/deduplicate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deduplication_functions.R
 3 | \name{deduplicate}
 4 | \alias{deduplicate}
 5 | \title{Remove duplicates from a bibliographic data set}
 6 | \usage{
 7 | deduplicate(data, match_by, method, type = "merge", ...)
 8 | }
 9 | \arguments{
10 | \item{data}{A \code{data.frame} containing bibliographic information.}
11 | 
12 | \item{match_by}{Name of the column in \code{data} where duplicates should be sought.}
13 | 
14 | \item{method}{The duplicate detection function to use; see
15 | \code{link{string_}} or \code{link{fuzz_}} for examples. Passed to
16 | \code{find_duplicates()}.}
17 | 
18 | \item{type}{How should entries be selected? Default is \code{"merge"} which
19 | selects the entries with the largest number of characters in each column.
20 | Alternatively \code{"select"} returns the row with the highest total number of
21 | characters.}
22 | 
23 | \item{\dots}{Arguments passed to \code{find_duplicates()}.}
24 | }
25 | \value{
26 | A \code{data.frame} containing data identified as unique.
27 | }
28 | \description{
29 | Removes duplicates using sensible defaults
30 | }
31 | \details{
32 | This is a wrapper function to \code{find_duplicates()} and
33 | \code{extract_unique_references()}, which tries to choose some sensible defaults.
34 | Use with care.
35 | }
36 | \examples{
37 | my_df <-  data.frame(
38 |   title = c(
39 |     "EviAtlas: a tool for visualising evidence synthesis databases",
40 |     "revtools: An R package to support article screening for evidence synthesis",
41 |     "An automated approach to identifying search terms for systematic reviews",
42 |     "Reproducible, flexible and high-throughput data extraction from primary literature",
43 |     "eviatlas:tool for visualizing evidence synthesis databases.",
44 |     "REVTOOLS a package to support article-screening for evidence synthsis"
45 |   ),
46 |   year = c("2019", "2019", "2019", "2019", NA, NA),
47 |   authors = c("Haddaway et al", "Westgate",
48 |               "Grames et al", "Pick et al", NA, NA),
49 |   stringsAsFactors = FALSE
50 | )
51 | 
52 | # run deduplication
53 | dups <- find_duplicates(
54 |   my_df$title,
55 |   method = "string_osa",
56 |   rm_punctuation = TRUE,
57 |   to_lower = TRUE
58 | )
59 | 
60 | extract_unique_references(my_df, matches = dups)
61 | 
62 | # or, in one line:
63 | deduplicate(my_df, "title",
64 |   method = "string_osa",
65 |   rm_punctuation = TRUE,
66 |   to_lower = TRUE)
67 | }
68 | \seealso{
69 | \code{find_duplicates()} and \code{extract_unique_references()} for underlying
70 | functions.
71 | }
72 | 


--------------------------------------------------------------------------------
/man/detect_.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/detect_functions.R
 3 | \name{detect_}
 4 | \alias{detect_}
 5 | \alias{detect_parser}
 6 | \alias{detect_delimiter}
 7 | \alias{detect_lookup}
 8 | \alias{detect_year}
 9 | \title{Detect file formatting information}
10 | \usage{
11 | detect_parser(x)
12 | 
13 | detect_delimiter(x)
14 | 
15 | detect_lookup(tags)
16 | 
17 | detect_year(df)
18 | }
19 | \arguments{
20 | \item{x}{A character vector containing bibliographic data}
21 | 
22 | \item{tags}{A character vector containing RIS tags.}
23 | 
24 | \item{df}{a data.frame containing bibliographic data}
25 | }
26 | \value{
27 | \code{detect_parser()} and \code{detect_delimiter()} return a length-1
28 | character; \code{detect_year()} returns a character vector listing estimated
29 | publication years; and \code{detect_lookup()} returns a \code{data.frame.}
30 | }
31 | \description{
32 | Bibliographic data can be stored in a number of different file
33 | types, meaning that detecting consistent attributes of those files is
34 | necessary if they are to be parsed accurately. These functions attempt to
35 | identify some of those key file attributes. Specifically, \code{detect_parser()}
36 | determines which \link{parse_} function to use; \code{detect_delimiter()}
37 | and \code{detect_lookup()} identify different attributes of RIS files; and
38 | \code{detect_year()} attempts to fill gaps in publication years from other
39 | information stored in a \code{tibble}.
40 | }
41 | \examples{
42 | revtools <- c(
43 |   "",
44 |   "PMID- 31355546",
45 |   "VI  - 10",
46 |   "IP  - 4",
47 |   "DP  - 2019 Dec",
48 |   "TI  - revtools: An R package to support article
49 |          screening for evidence synthesis.",
50 |   "PG  - 606-614",
51 |   "LID - 10.1002/jrsm.1374 [doi]",
52 |   "AU  - Westgate MJ",
53 |   "LA  - eng",
54 |   "PT  - Journal Article",
55 |   "JT  - Research Synthesis Methods",
56 |   ""
57 | )
58 | 
59 | # detect basic attributes of ris files
60 | detect_parser(revtools)
61 | detect_delimiter(revtools)
62 | 
63 | # determine which tag format to use
64 | tags <- trimws(unlist(lapply(
65 |   strsplit(revtools, "- "),
66 |   function(a){a[1]}
67 | )))
68 | pubmed_tag_list <- detect_lookup(tags[!is.na(tags)])
69 | 
70 | # find year data in other columns
71 | df <- as.data.frame(parse_pubmed(revtools))
72 | df$year <- detect_year(df)
73 | }
74 | 


--------------------------------------------------------------------------------
/man/extract_unique_references.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deduplication_functions.R
 3 | \name{extract_unique_references}
 4 | \alias{extract_unique_references}
 5 | \title{Remove duplicates from a bibliographic data set}
 6 | \usage{
 7 | extract_unique_references(data, matches, type = "merge")
 8 | }
 9 | \arguments{
10 | \item{data}{A \code{data.frame} containing bibliographic information.}
11 | 
12 | \item{matches}{A vector showing which entries in \code{data} are duplicates.}
13 | 
14 | \item{type}{How should entries be selected to retain? Default is \code{"merge"},
15 | which selects the entries with the largest number of characters in each
16 | column. Alternatively, \code{"select"} returns the row with the highest total
17 | number of characters.}
18 | }
19 | \value{
20 | Returns a \code{data.frame} of unique references.
21 | }
22 | \description{
23 | Given a list of duplicate entries and a data set, this function
24 | extracts only unique references.
25 | }
26 | \examples{
27 | my_df <-  data.frame(
28 |   title = c(
29 |     "EviAtlas: a tool for visualising evidence synthesis databases",
30 |     "revtools: An R package to support article screening for evidence synthesis",
31 |     "An automated approach to identifying search terms for systematic reviews",
32 |     "Reproducible, flexible and high-throughput data extraction from primary literature",
33 |     "eviatlas:tool for visualizing evidence synthesis databases.",
34 |     "REVTOOLS a package to support article-screening for evidence synthsis"
35 |   ),
36 |   year = c("2019", "2019", "2019", "2019", NA, NA),
37 |   authors = c("Haddaway et al", "Westgate",
38 |               "Grames et al", "Pick et al", NA, NA),
39 |   stringsAsFactors = FALSE
40 | )
41 | 
42 | # run deduplication
43 | dups <- find_duplicates(
44 |   my_df$title,
45 |   method = "string_osa",
46 |   rm_punctuation = TRUE,
47 |   to_lower = TRUE
48 | )
49 | 
50 | extract_unique_references(my_df, matches = dups)
51 | 
52 | # or, in one line:
53 | deduplicate(my_df, "title",
54 |   method = "string_osa",
55 |   rm_punctuation = TRUE,
56 |   to_lower = TRUE)
57 | }
58 | \seealso{
59 | \code{find_duplicates()}, \code{deduplicate()}
60 | }
61 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/man/figures/logo.png


--------------------------------------------------------------------------------
/man/find_duplicates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deduplication_functions.R
 3 | \name{find_duplicates}
 4 | \alias{find_duplicates}
 5 | \title{Detect duplicate values}
 6 | \usage{
 7 | find_duplicates(
 8 |   data,
 9 |   method = "exact",
10 |   group_by,
11 |   threshold,
12 |   to_lower = FALSE,
13 |   rm_punctuation = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{data}{A character vector containing duplicate bibliographic entries.}
18 | 
19 | \item{method}{A string indicating how matching should be calculated. Either
20 | \code{"exact"} for exact matching (the default), or the name of a function for
21 | calculating string distance.}
22 | 
23 | \item{group_by}{An optional vector, data.frame or list containing data to use
24 | as 'grouping' variables; that is, categories within which duplicates should
25 | be sought. Defaults to NULL, in which case all entries are compared against
26 | all others. Ignored if \code{method = "exact"}.}
27 | 
28 | \item{threshold}{Numeric: the cutoff threshold for deciding if two strings
29 | are duplicates. Sensible values depend on the \code{method} chosen. Defaults to 5
30 | if \code{method = "string_osa"} and must be specified in all other instances
31 | except \code{method = "exact"} (where no threshold is required).}
32 | 
33 | \item{to_lower}{Logical: Should all entries be converted to lower case before
34 | calculating string distance? Defaults to \code{FALSE.}}
35 | 
36 | \item{rm_punctuation}{Logical: Should punctuation should be removed before
37 | calculating string distance? Defaults to \code{FALSE.}}
38 | }
39 | \value{
40 | Returns a vector of duplicate matches, with \code{attributes} listing
41 | methods used.
42 | }
43 | \description{
44 | Identifies duplicate bibliographic entries using different duplicate
45 | detection methods.
46 | }
47 | \examples{
48 | my_df <-  data.frame(
49 |   title = c(
50 |     "EviAtlas: a tool for visualising evidence synthesis databases",
51 |     "revtools: An R package to support article screening for evidence synthesis",
52 |     "An automated approach to identifying search terms for systematic reviews",
53 |     "Reproducible, flexible and high-throughput data extraction from primary literature",
54 |     "eviatlas:tool for visualizing evidence synthesis databases.",
55 |     "REVTOOLS a package to support article-screening for evidence synthsis"
56 |   ),
57 |   year = c("2019", "2019", "2019", "2019", NA, NA),
58 |   authors = c("Haddaway et al", "Westgate",
59 |               "Grames et al", "Pick et al", NA, NA),
60 |   stringsAsFactors = FALSE
61 | )
62 | 
63 | # run deduplication
64 | dups <- find_duplicates(
65 |   my_df$title,
66 |   method = "string_osa",
67 |   rm_punctuation = TRUE,
68 |   to_lower = TRUE
69 | )
70 | 
71 | extract_unique_references(my_df, matches = dups)
72 | 
73 | # or, in one line:
74 | deduplicate(my_df, "title",
75 |   method = "string_osa",
76 |   rm_punctuation = TRUE,
77 |   to_lower = TRUE)
78 | }
79 | \seealso{
80 | \code{\link{string_}} or \code{\link{fuzz_}} for suitable functions
81 | to pass to \code{methods}; \code{\link{extract_unique_references}} and
82 | \code{\link{deduplicate}} for higher-level functions.
83 | }
84 | 


--------------------------------------------------------------------------------
/man/format_citation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/format_citation.R
 3 | \name{format_citation}
 4 | \alias{format_citation}
 5 | \title{Format a citation}
 6 | \usage{
 7 | format_citation(
 8 |   data,
 9 |   details = TRUE,
10 |   abstract = FALSE,
11 |   add_html = FALSE,
12 |   line_breaks = FALSE,
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{data}{An object of class \code{data.frame}, \code{list}, or \code{bibliography.}}
18 | 
19 | \item{details}{Logical: Should identifying information such as author names &
20 | journal titles be displayed? Defaults to \code{TRUE}.}
21 | 
22 | \item{abstract}{Logical: Should the abstract be shown (if available)?
23 | Defaults to \code{FALSE.}}
24 | 
25 | \item{add_html}{Logical: Should the journal title be italicized using html
26 | codes? Defaults to \code{FALSE}.}
27 | 
28 | \item{line_breaks}{Either logical, stating whether line breaks should be
29 | added, or numeric stating how many characters should separate consecutive
30 | line breaks. Defaults to \code{FALSE}.}
31 | 
32 | \item{...}{any other arguments.}
33 | }
34 | \value{
35 | Returns a string of length equal to \code{length(data)} that contains
36 | formatted citations.
37 | }
38 | \description{
39 | This function takes an object of class \code{data.frame}, \code{list}, or
40 | \code{bibliography} and returns a formatted citation.
41 | }
42 | \examples{
43 | roses <- c("@article{haddaway2018,
44 |   title={ROSES RepOrting standards for Systematic Evidence Syntheses:
45 |   pro forma, flow-diagram and descriptive summary of the plan and
46 |   conduct of environmental systematic reviews and systematic maps},
47 |   author={Haddaway, Neal R and Macura, Biljana and Whaley, Paul and Pullin, Andrew S},
48 |   journal={Environmental Evidence},
49 |   volume={7},
50 |   number={1},
51 |   pages={7},
52 |   year={2018},
53 |   publisher={Springer}
54 | }")
55 | 
56 | tmp <- tempfile()
57 | writeLines(roses, tmp)
58 | 
59 | citation <- read_ref(tmp)
60 | format_citation(citation)
61 | }
62 | 


--------------------------------------------------------------------------------
/man/fuzz_.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fuzz_functions.R
 3 | \name{fuzz_}
 4 | \alias{fuzz_}
 5 | \alias{fuzzdist}
 6 | \alias{fuzz_m_ratio}
 7 | \alias{fuzz_partial_ratio}
 8 | \alias{fuzz_token_sort_ratio}
 9 | \alias{fuzz_token_set_ratio}
10 | \title{Calculate similarity between two strings}
11 | \usage{
12 | fuzzdist(
13 |   a,
14 |   b,
15 |   method = c("fuzz_m_ratio", "fuzz_partial_ratio", "fuzz_token_sort_ratio",
16 |     "fuzz_token_set_ratio")
17 | )
18 | 
19 | fuzz_m_ratio(a, b)
20 | 
21 | fuzz_partial_ratio(a, b)
22 | 
23 | fuzz_token_sort_ratio(a, b)
24 | 
25 | fuzz_token_set_ratio(a, b)
26 | }
27 | \arguments{
28 | \item{a}{A character vector of items to match to b.}
29 | 
30 | \item{b}{A character vector of items to match to a.}
31 | 
32 | \item{method}{The method to use for fuzzy matching.}
33 | }
34 | \value{
35 | Returns a score of same length as b, giving the proportional
36 | dissimilarity between a and b.
37 | }
38 | \description{
39 | These functions duplicate the approach of the 'fuzzywuzzy'
40 | Python library for calculating string similarity.
41 | }
42 | \note{
43 | \code{fuzz_m_ratio()} is a measure of the number of letters that match
44 | between two strings. It is calculated as one minus two times the number of
45 | matched characters, divided by the number of characters in both strings.
46 | 
47 | \code{fuzz_partial_ratio()} calculates the extent to which one string is a
48 | subset of the other. If one string is a perfect subset, then this will be
49 | zero.
50 | 
51 | \code{fuzz_token_sort_ratio()} sorts the words in both strings into
52 | alphabetical order, and checks their similarity using \code{fuzz_m_ratio()}.
53 | 
54 | \code{fuzz_token_set_ratio()} is similar to \code{fuzz_token_sort_ratio()}, but
55 | compares both sorted strings to each other, and to a third group made of
56 | words common to both strings. It then returns the maximum value of
57 | \code{fuzz_m_ratio()} from these comparisons.
58 | 
59 | \code{fuzzdist()} is a wrapper function, for compatability with \code{stringdist}.
60 | }
61 | \examples{
62 | fuzzdist("On the Origin of Species",
63 |          "Of the Original Specs",
64 |          method = "fuzz_m_ratio")
65 | }
66 | 


--------------------------------------------------------------------------------
/man/merge_columns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deprecated.R
 3 | \name{merge_columns}
 4 | \alias{merge_columns}
 5 | \title{Bind two or more data frames with different columns}
 6 | \usage{
 7 | merge_columns(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Either a data.frame or a list of data.frames.}
11 | 
12 | \item{y}{A data.frame, optional if x is a list.}
13 | }
14 | \value{
15 | Returns a single data.frame with all the input data frames merged.
16 | }
17 | \description{
18 | Takes two or more \code{data.frames} with different column names or
19 | different column orders and binds them to a single \code{data.frame.} This
20 | function is maintained for backwards compatibility, but it is synonymous with
21 | \code{dplyr::bind_rows()} and will be depracated in future.
22 | }
23 | \examples{
24 | df_1 <-  data.frame(
25 |   title = c(
26 |     "EviAtlas: a tool for visualising evidence synthesis databases",
27 |     "revtools: An R package to support article screening for evidence synthesis"
28 |   ),
29 |   year = c("2019", "2019")
30 | )
31 | 
32 | df_2 <-  data.frame(
33 |   title = c(
34 |     "An automated approach to identifying search terms for systematic reviews",
35 |     "Reproducible, flexible and high-throughput data extraction from primary literature"
36 |   ),
37 |   authors = c("Grames et al", "Pick et al")
38 | )
39 | 
40 | merge_columns(df_1, df_2)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/override_duplicates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deduplication_functions.R
 3 | \name{override_duplicates}
 4 | \alias{override_duplicates}
 5 | \title{Manually override duplicates}
 6 | \usage{
 7 | override_duplicates(matches, overrides)
 8 | }
 9 | \arguments{
10 | \item{matches}{Numeric: a vector of group numbers for texts that indicates
11 | duplicates and unique values returned by the \code{\link{find_duplicates}}
12 | function.}
13 | 
14 | \item{overrides}{Numeric: a vector of group numbers that are not true
15 | duplicates.}
16 | }
17 | \value{
18 | The input \code{matches} vector with unique group numbers for members
19 | of groups that the user overrides.
20 | }
21 | \description{
22 | Re-assign group numbers to text that was classified as
23 | duplicated but is unique.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/parse_.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parse_bibtex.R, R/parse_csv_tsv.R,
 3 | %   R/parse_pubmed.R, R/parse_ris.R
 4 | \name{parse_bibtex}
 5 | \alias{parse_bibtex}
 6 | \alias{parse_csv}
 7 | \alias{parse_tsv}
 8 | \alias{parse_}
 9 | \alias{parse_pubmed}
10 | \alias{parse_ris}
11 | \title{Parse bibliographic text in a variety of formats}
12 | \usage{
13 | parse_bibtex(x)
14 | 
15 | parse_csv(x)
16 | 
17 | parse_tsv(x)
18 | 
19 | parse_pubmed(x)
20 | 
21 | parse_ris(x, tag_naming = "best_guess")
22 | }
23 | \arguments{
24 | \item{x}{A character vector containing bibliographic information in ris
25 | format.}
26 | 
27 | \item{tag_naming}{What format are ris tags in? Defaults to \code{"best_guess"} See
28 | \code{read_refs()} for a list of accepted arguments.}
29 | }
30 | \value{
31 | Returns an object of class \code{bibliography} (ris, bib, or pubmed
32 | formats) or \code{data.frame} (csv or tsv).
33 | }
34 | \description{
35 | Text in standard formats - such as imported via
36 | \code{base::readLines()} - can be parsed using a variety of standard formats. Use
37 | \code{detect_parser()} to determine which is the most appropriate parser for your
38 | situation. Note that \code{parse_tsv()} and \code{parse_csv()} are maintained for
39 | backwards compatability only; within \code{read_ref} these have been replaced
40 | by \code{vroom::vroom()}.
41 | }
42 | \examples{
43 | eviatlas <- c(
44 |   "TY  - JOUR",
45 |   "AU  - Haddaway, Neal R.",
46 |   "AU  - Feierman, Andrew",
47 |   "AU  - Grainger, Matthew J.",
48 |   "AU  - Gray, Charles T.",
49 |   "AU  - Tanriver-Ayder, Ezgi",
50 |   "AU  - Dhaubanjar, Sanita",
51 |   "AU  - Westgate, Martin J.",
52 |   "PY  - 2019",
53 |   "DA  - 2019/06/04",
54 |   "TI  - EviAtlas: a tool for visualising evidence synthesis databases",
55 |   "JO  - Environmental Evidence",
56 |   "SP  - 22",
57 |   "VL  - 8",
58 |   "IS  - 1",
59 |   "SN  - 2047-2382",
60 |   "UR  - https://doi.org/10.1186/s13750-019-0167-1",
61 |   "DO  - 10.1186/s13750-019-0167-1",
62 |   "ID  - Haddaway2019",
63 |   "ER  - "
64 | )
65 | 
66 | detect_parser(eviatlas) # = "parse_ris"
67 | df <- as.data.frame(parse_ris(eviatlas))
68 | ris_out <- write_refs(df, format = "ris", file = FALSE)
69 | }
70 | 


--------------------------------------------------------------------------------
/man/read_refs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read_refs.R
 3 | \name{read_refs}
 4 | \alias{read_refs}
 5 | \title{Import bibliographic search results}
 6 | \usage{
 7 | read_refs(
 8 |   filename,
 9 |   tag_naming = "best_guess",
10 |   return_df = TRUE,
11 |   verbose = FALSE,
12 |   locale = default_locale()
13 | )
14 | }
15 | \arguments{
16 | \item{filename}{A path to a filename or vector of filenames containing search
17 | results to import.}
18 | 
19 | \item{tag_naming}{Either a length-1 character stating how should ris tags be
20 | replaced (see details for a list of options), or an object inheriting from
21 | class \code{data.frame} containing user-defined replacement tags.}
22 | 
23 | \item{return_df}{If \code{TRUE} (default), returns a \code{data.frame}; if \code{FALSE},
24 | returns a list.}
25 | 
26 | \item{verbose}{If \code{TRUE}, prints status updates (defaults to \code{FALSE}).}
27 | }
28 | \value{
29 | Returns a \code{data.frame} or \code{list} of assembled search results.
30 | }
31 | \description{
32 | Imports common bibliographic reference formats (i.e. .bib, .ris,
33 | or .txt).
34 | }
35 | \details{
36 | The default for argument \code{tag_naming} is \code{"best_guess"},
37 | which estimates what database has been used for ris tag replacement, then
38 | fills any gaps with generic tags. Any tags missing from the database (i.e.
39 | \code{code_lookup}) are passed unchanged. Other options are to use tags from
40 | Web of Science (\code{"wos"}), Scopus (\code{"scopus"}), Ovid (\code{"ovid"})
41 | or Academic Search Premier (\code{"asp"}). If a \code{data.frame} is given,
42 | then it must contain two columns: \code{"code"} listing the original tags in
43 | the source document, and \code{"field"} listing the replacement column/tag
44 | names. The \code{data.frame} may optionally include a third column named
45 | \code{"order"}, which specifies the order of columns in the resulting
46 | \code{data.frame}; otherwise this will be taken as the row order. Finally,
47 | passing \code{"none"} to \code{replace_tags} suppresses tag replacement.
48 | }
49 | \examples{
50 | litsearchr <- c(
51 |   "@article{grames2019,
52 |   title={An automated approach to identifying search terms for
53 |   systematic reviews using keyword co-occurrence networks},
54 |   author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S},
55 |   journal={Methods in Ecology and Evolution},
56 |   volume={10},
57 |   number={10},
58 |   pages={1645--1654},
59 |   year={2019},
60 |   publisher={Wiley Online Library}
61 | }"
62 | )
63 | 
64 | tmp <- tempfile()
65 | 
66 | writeLines(litsearchr, tmp)
67 | 
68 | df <- read_refs(tmp, return_df = TRUE, verbose = TRUE)
69 | }
70 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reexports.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{as_tibble}
 7 | \title{Objects exported from other packages}
 8 | \keyword{internal}
 9 | \description{
10 | These objects are imported from other packages. Follow the links
11 | below to see their documentation.
12 | 
13 | \describe{
14 |   \item{tibble}{\code{\link[tibble]{as_tibble}}}
15 | }}
16 | 
17 | 


--------------------------------------------------------------------------------
/man/review_duplicates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deduplication_functions.R
 3 | \name{review_duplicates}
 4 | \alias{review_duplicates}
 5 | \title{Manually review potential duplicates}
 6 | \usage{
 7 | review_duplicates(text, matches)
 8 | }
 9 | \arguments{
10 | \item{text}{A character vector of the text that was used to identify
11 | potential duplicates.}
12 | 
13 | \item{matches}{Numeric: a vector of group numbers for texts that indicates
14 | duplicates and unique values returned by the \code{\link{find_duplicates}}
15 | function.}
16 | }
17 | \value{
18 | A \code{data.frame} of potential duplicates grouped together.
19 | }
20 | \description{
21 | Allows users to manually review articles classified as
22 | duplicates.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/string_.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/string_functions.R
 3 | \name{string_}
 4 | \alias{string_}
 5 | \alias{string_osa}
 6 | \alias{string_lv}
 7 | \alias{string_dl}
 8 | \alias{string_hamming}
 9 | \alias{string_lcs}
10 | \alias{string_qgram}
11 | \alias{string_cosine}
12 | \alias{string_jaccard}
13 | \alias{string_jw}
14 | \alias{string_soundex}
15 | \title{Calculate similarity between two strings}
16 | \usage{
17 | string_osa(a, b)
18 | 
19 | string_lv(a, b)
20 | 
21 | string_dl(a, b)
22 | 
23 | string_hamming(a, b)
24 | 
25 | string_lcs(a, b)
26 | 
27 | string_qgram(a, b)
28 | 
29 | string_cosine(a, b)
30 | 
31 | string_jaccard(a, b)
32 | 
33 | string_jw(a, b)
34 | 
35 | string_soundex(a, b)
36 | }
37 | \arguments{
38 | \item{a}{A character vector of items to match to b.}
39 | 
40 | \item{b}{A character vector of items to match to a.}
41 | }
42 | \value{
43 | Returns a score of same length as b, giving the dissimilarity between
44 | a and b.
45 | }
46 | \description{
47 | These functions each access a specific \code{"methods"} argument
48 | provided by \code{stringdist}, and are provided for convenient calling by
49 | \code{find_duplicates()}. They do not include any new functionality beyond that
50 | given by \code{stringdist}, which you should use for your own analyses.
51 | }
52 | 


--------------------------------------------------------------------------------
/man/synthesisr-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/synthesisr-package.R
 3 | \docType{package}
 4 | \name{synthesisr-package}
 5 | \alias{synthesisr}
 6 | \alias{synthesisr-package}
 7 | \title{synthesisr: Import, assemble, and deduplicate bibiliographic datasets}
 8 | \description{
 9 | Systematic review searches include multiple databases
10 | that export results in a variety of formats with overlap in
11 | coverage between databases. To streamline the process of importing,
12 | assembling, and deduplicating results, \code{synthesisr} recognizes
13 | bibliographic files exported from databases commonly used for
14 | systematic reviews and merges results into a standardized format.
15 | }
16 | \section{Import & Export}{
17 | 
18 | The key task performed by \code{synthesisr} is flexible import and
19 | presentation of bibliographic data. This is typically achieved by
20 | \code{read_refs()}, which can import multiple files at once and link them together
21 | into a single \code{data.frame}. Conversely, export is via \code{write_refs()}. Users
22 | that require more detailed control can use the following functions:
23 | 
24 | \itemize{
25 | \item \link{read_refs} Read bibliographic data
26 | \item \link{write_refs} Write bibliographic data
27 | \item \link{detect_} Detect file attributes
28 | \item \link{parse_} Parse a vector containing bibliographic data
29 | \item \link{clean_} Cleaning functions for author and column names
30 | \item \link{code_lookup} A dataset of potential ris tags
31 | }
32 | }
33 | 
34 | \section{Formatting}{
35 | 
36 | \itemize{
37 | \item \linkS4class{bibliography} Methods for class \code{bibliography}
38 | \item \link{format_citation} Return a clean citation from a \code{bibliography} or \code{data.frame}
39 | \item \link{add_line_breaks} Set a maximum character width for strings
40 | }
41 | }
42 | 
43 | \section{Deduplication}{
44 | 
45 | When importing from multiple databases, it is likely that there will be
46 | duplicates in the resulting dataset. The easiest way to deal with this
47 | problem in \code{synthesisr} is using the \code{deduplicate()} function; but this can
48 | be risky, particularly if there are no DOIs in the dataset. To get finer
49 | control of the deduplication process, consider using the sub-functions:
50 | 
51 | \itemize{
52 | \item \link{deduplicate} Semi-automated duplicate removal
53 | \item \link{find_duplicates} Locate potentially duplicated references
54 | \item \link{extract_unique_references} Return a data.frame with only 'unique' references
55 | \item \link{review_duplicates} Manually review potential duplicates
56 | \item \link{override_duplicates} Manually override identified duplicates
57 | \item \link{fuzz_} Fuzzy string matching c/o \code{fuzzywuzzy}
58 | \item \link{string_} Fuzzy string matching c/o \code{stringdist}
59 | }
60 | }
61 | 
62 | \section{Deprecated}{
63 | 
64 | 
65 | \itemize{
66 | \item \link{merge_columns} Synonymous with \link[dplyr:bind_rows]{dplyr::bind_rows}
67 | }
68 | }
69 | 
70 | \seealso{
71 | Useful links:
72 | \itemize{
73 |   \item \url{https://mjwestgate.github.io/synthesisr}
74 | }
75 | 
76 | }
77 | \author{
78 | \strong{Maintainer}: Martin Westgate \email{martinjwestgate@gmail.com} (\href{https://orcid.org/0000-0003-0854-2034}{ORCID})
79 | 
80 | Authors:
81 | \itemize{
82 |   \item Eliza Grames \email{eliza.grames@uconn.edu} (\href{https://orcid.org/0000-0003-1743-6815}{ORCID})
83 | }
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/man/write_refs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/write_refs.R
 3 | \name{write_refs}
 4 | \alias{write_refs}
 5 | \alias{write_bib}
 6 | \alias{write_ris}
 7 | \title{Export data to a bibliographic format}
 8 | \usage{
 9 | write_refs(x, file, format = "ris", tag_naming = "synthesisr", write = TRUE)
10 | 
11 | write_bib(x)
12 | 
13 | write_ris(x, tag_naming = "synthesisr")
14 | }
15 | \arguments{
16 | \item{x}{Either a data.frame containing bibliographic information or an
17 | object of class bibliography.}
18 | 
19 | \item{file}{filename to save to.}
20 | 
21 | \item{format}{What format should the data be exported as? Options are ris or
22 | bib.}
23 | 
24 | \item{tag_naming}{what naming convention should be used to write RIS files?
25 | See details for options.}
26 | 
27 | \item{write}{Logical should a file should be written? If FALSE returns a
28 | \code{list}.}
29 | }
30 | \value{
31 | This function is typically called for it's side effect of writing a
32 | file in the specified location and format. If \code{write} is FALSE, returns
33 | a character vector containing bibliographic information in the specified
34 | format.
35 | }
36 | \description{
37 | This function exports data.frames containing bibliographic
38 | information to either a .ris or .bib file.
39 | }
40 | \examples{
41 | eviatlas <- c(
42 |   "TY  - JOUR",
43 |   "AU  - Haddaway, Neal R.",
44 |   "AU  - Feierman, Andrew",
45 |   "AU  - Grainger, Matthew J.",
46 |   "AU  - Gray, Charles T.",
47 |   "AU  - Tanriver-Ayder, Ezgi",
48 |   "AU  - Dhaubanjar, Sanita",
49 |   "AU  - Westgate, Martin J.",
50 |   "PY  - 2019",
51 |   "DA  - 2019/06/04",
52 |   "TI  - EviAtlas: a tool for visualising evidence synthesis databases",
53 |   "JO  - Environmental Evidence",
54 |   "SP  - 22",
55 |   "VL  - 8",
56 |   "IS  - 1",
57 |   "SN  - 2047-2382",
58 |   "UR  - https://doi.org/10.1186/s13750-019-0167-1",
59 |   "DO  - 10.1186/s13750-019-0167-1",
60 |   "ID  - Haddaway2019",
61 |   "ER  - "
62 | )
63 | 
64 | detect_parser(eviatlas) # = "parse_ris"
65 | df <- as.data.frame(parse_ris(eviatlas))
66 | ris_out <- write_refs(df, format = "ris", file = FALSE)
67 | }
68 | 


--------------------------------------------------------------------------------
/pkgdown/extra.css:
--------------------------------------------------------------------------------
 1 | @import url(https://fonts.googleapis.com/css?family=Lato);
 2 | @import url(https://fonts.googleapis.com/css?family=Roboto);
 3 | 
 4 | h1, h2, h3, h4, .h1, .h2, .h3, .main-title, .main-subtitle {
 5 |   font-family: Lato;
 6 |   font-weight: normal;
 7 |   color: #D9565C;
 8 | }
 9 | 
10 | a {
11 |     color: #0a238a;
12 |     text-decoration: none;
13 | }
14 | 
15 | a:hover {
16 |   color: #6c159e;
17 | }
18 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/favicon-96x96.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/pkgdown/favicon/site.webmanifest:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "",
 3 |   "short_name": "",
 4 |   "icons": [
 5 |     {
 6 |       "src": "/web-app-manifest-192x192.png",
 7 |       "sizes": "192x192",
 8 |       "type": "image/png",
 9 |       "purpose": "maskable"
10 |     },
11 |     {
12 |       "src": "/web-app-manifest-512x512.png",
13 |       "sizes": "512x512",
14 |       "type": "image/png",
15 |       "purpose": "maskable"
16 |     }
17 |   ],
18 |   "theme_color": "#ffffff",
19 |   "background_color": "#ffffff",
20 |   "display": "standalone"
21 | }


--------------------------------------------------------------------------------
/pkgdown/favicon/web-app-manifest-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/web-app-manifest-192x192.png


--------------------------------------------------------------------------------
/pkgdown/favicon/web-app-manifest-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjwestgate/synthesisr/9f1d884fe1fe3462e3e98be0157bdb9baf75310e/pkgdown/favicon/web-app-manifest-512x512.png


--------------------------------------------------------------------------------
/synthesisr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(synthesisr)
3 | 
4 | test_check("synthesisr")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-clean.R:
--------------------------------------------------------------------------------
 1 | test_that("clean_authors() works", {
 2 |   author_list <-
 3 |     c(
 4 |       "Haddaway, N.R., A. Feirman, M.J. Grainger, C.T. Gray, E. Tanriver-Ayder, S. Dhaubanjar, & M.J Westgate",
 5 |       "Grames, E.M., A.N. Stillman, M.W. Tingley AND C.S. Elphick"
 6 |     )
 7 |   expect_false(any(grepl("&", clean_authors(author_list))))
 8 |   expect_false(any(grepl("AND", clean_authors(author_list))))
 9 | })
10 | 
11 | test_that("clean_colnames() works", {
12 |   cleaned <- clean_colnames(c(".title...", "X..YEAR",
13 |                               "authors..", ".AUTHOR"))
14 |   expect_false(any(grepl("[[:punct::]]", cleaned)))
15 |   expect_false(any(duplicated(cleaned)))
16 |   expect_false(any(grepl("^(X|Y|Z)\\.+", cleaned)))
17 | })
18 | 
19 | test_that("clean_df() cleans authors, colnames and factors", {
20 |   test_df <- data.frame(
21 |     authors = c("Haddaway, N.R., A. Feirman AND M.J. Grainger", "Some authors"),
22 |     "..misc." = c("text", "text2"),
23 |     JOURNAL = as.factor(c("A journal", "Another journal"))
24 |   )
25 |   result <- clean_df(test_df)
26 |   expect_false(any(grepl("AND", result$authors)))
27 |   test_cols <- colnames(result)
28 |   expect_false(any(grepl("[[:punct::]]", test_cols)))
29 |   expect_equal(tolower(test_cols), test_cols)
30 |   expect_false(any(unlist(lapply(result, is.factor))))
31 | })
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-deduplicate.R:
--------------------------------------------------------------------------------
 1 | test_that("deduplicate works", {
 2 |   my_df <-  data.frame(
 3 |     title = c(
 4 |       "EviAtlas: a tool for visualising evidence synthesis databases",
 5 |       "revtools: An R package to support article screening for evidence synthesis",
 6 |       "An automated approach to identifying search terms for systematic reviews using keyword co-occurrence networks",
 7 |       "Reproducible, flexible and high-throughput data extraction from primary literature: The metaDigitise r package",
 8 |       "eviatlas:tool for visualizing evidence synthesis databases.",
 9 |       "REVTOOLS a package to support article-screening for evidence synthsis"),
10 |     year = c("2019", "2019", "2019", "2019", NA, NA),
11 |     authors = c("Haddaway et al", "Westgate", "Grames et al", "Pick et al", NA, NA),
12 |     stringsAsFactors = FALSE)
13 | 
14 |   # run deduplication
15 |   dups <- find_duplicates(my_df$title,
16 |                           method = "string_osa",
17 |                           rm_punctuation = TRUE,
18 |                           to_lower = TRUE)
19 |   deduped <- extract_unique_references(my_df, matches = dups)
20 |   deduped2 <- deduplicate(my_df, "title",
21 |                           rm_punctuation = TRUE,
22 |                           to_lower = TRUE)
23 | 
24 |   expect_equal(length(dups), nrow(my_df))
25 |   expect_true(all(dups[5:6] == dups[1:2]))
26 |   expect_equal(length(unique(dups)), nrow(deduped))
27 |   expect_equal(deduped, deduped2)
28 | })
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/tests/testthat/test-detect.R:
--------------------------------------------------------------------------------
 1 | test_that("detect_delimiter() works for ris", {
 2 |   lines <- readLines("testdata/eviatlas.txt")
 3 |   expect_equal(detect_delimiter(lines), "endrow")
 4 | })
 5 | 
 6 | test_that("detect_parser recognises files correctly", {
 7 |   file_names <- list.files("testdata")
 8 |   file_names <- file_names[
 9 |     !grepl("eviatlas|litsearchr|res_synth_methods", file_names)]
10 |   file_types <- lapply(file_names, function(a){
11 |     x <- readLines(paste0("./testdata/", a), warn = FALSE)
12 |     detect_parser(x)
13 |   }) |>
14 |     unlist()
15 |   detected_formats <- sub("^parse_", "", file_types)
16 |   expect_equal(
17 |     c("ris", "ris", "ris", "pubmed", "bibtex", "ris", "ris", "ris"),
18 |     detected_formats)
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-format_citation.R:
--------------------------------------------------------------------------------
 1 | test_that("format_citation() works for an object of class bibliography", {
 2 |   bib <- read_refs("testdata/eviatlas.txt", return_df = FALSE)
 3 |   expect_equal(
 4 |     format_citation(bib)[[1]],
 5 |     "Haddaway, Neal R. et al. (2019) Eviatlas: a Tool for Visualising Evidence Synthesis Databases. Environmental Evidence.")
 6 | })
 7 | 
 8 | test_that("format_citation() works for an object of class data.frame", {
 9 |   df <- read_refs("testdata/eviatlas.txt", return_df = TRUE)
10 |   expect_equal(
11 |     as.character(format_citation(df[1, ])),
12 |     "Haddaway, Neal R. et al. (2019) Eviatlas: a Tool for Visualising Evidence Synthesis Databases. Environmental Evidence.")
13 | })
14 | 
15 | test_that("format_citation() gives same result from .bib and data.frame", {
16 |   bib <- read_refs("testdata/eviatlas.txt", return_df = FALSE)
17 |   df <- read_refs("testdata/eviatlas.txt", return_df = TRUE)
18 |   expect_equal(format_citation(df), format_citation(bib))
19 | })
20 | 
21 | test_that("add_line_breaks() limits lines to supplied length", {
22 |   title <-
23 |     "On the Origin of Species by Means of Natural Selection, or the Preservation of Favoured Races in the Struggle for Life"
24 |   lines_added <- add_line_breaks(title, n = 20)
25 |   split_text <- strsplit(lines_added, "\n")[[1]]
26 |   expect_equal(length(split_text), 8)
27 |   expect_true(all(unlist(lapply(split_text, nchar)) <= 20))
28 | 
29 |   # and with higher n
30 |   lines_added <- add_line_breaks(title, n = 40)
31 |   split_text <- strsplit(lines_added, "\n")[[1]]
32 |   expect_equal(length(split_text), 4)
33 |   expect_true(all(unlist(lapply(split_text, nchar)) <= 40))
34 | })
35 | 
36 | test_that("add_line_breaks() works on vectors", {
37 |   titles <- c(
38 |     "It is a truth universally acknowledged, that a single man in possession of a good fortune must be in want of a wife.",
39 |     "No one would have believed in the last years of the nineteenth century that this world was being watched keenly and closely by intelligences greater than man’s and yet as mortal as his own"
40 |   )
41 |   lines_added <- add_line_breaks(titles, n = 50)
42 |   string_lengths <- unlist(lapply(strsplit(lines_added, "\n"), nchar))
43 |   expect_true(all(string_lengths <= 50))
44 |   expect_equal(length(lines_added), 2)
45 | })
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fuzz_functions.R:
--------------------------------------------------------------------------------
 1 | # to match the fuzzy wuzzy test set, needs to be out of 100 as a similarity score
 2 | 
 3 | test_that("fuzz_m_ratio() works", {
 4 |   ratio <-
 5 |     100 - round(fuzz_m_ratio("this is a test", "this is a test!"), 2) * 100
 6 |   expect_equal(ratio, 97)
 7 | })
 8 | 
 9 | test_that("fuzz_partial_ratio() works", {
10 |   partial_ratio <-
11 |     100 - (round(fuzz_partial_ratio("this is a test", "this is a test!"), 2) *
12 |              100)
13 |   expect_equal(partial_ratio, 100)
14 | })
15 | 
16 | test_that("fuzz_m_ratio() works", {
17 |   ratio2 <-
18 |     100 - (round(fuzz_m_ratio(
19 |       "fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear"
20 |     ), 2) * 100)
21 |   expect_equal(ratio2, 91)
22 | })
23 | 
24 | test_that("fuzz_token_sort_ratio() works", {
25 |   sort_ratio <-
26 |     100 - (round(
27 |       fuzz_token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear"),
28 |       2
29 |     ) * 100)
30 |   expect_equal(sort_ratio, 100)
31 | })
32 | 
33 | test_that("fuzz_token_sort_ratio() works twice", {
34 |   sort_ratio2 <-
35 |     100 - (round(
36 |       fuzz_token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear"),
37 |       2
38 |     ) * 100)
39 |   expect_equal(sort_ratio2, 84)
40 | })
41 | 
42 | # I have not been able to sort out why the R implementation does not match python
43 | # It seems fuzzywuzzy matches by tokens, but m_dist is not doing this
44 | # So actually, token_sort_ratio is the same as token_set_ratio when sorted, but without removing unique
45 | 
46 | test_that("fuzz_token_set_ratio() works", {
47 |   set_ratio <-
48 |     100 - (round(
49 |       fuzz_token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear"),
50 |       2
51 |     ) * 100)
52 |   expect_equal(set_ratio, 100)
53 | })
54 | 


--------------------------------------------------------------------------------
/tests/testthat/test-merge_columns.R:
--------------------------------------------------------------------------------
 1 | test_that("merge_columns() works", {
 2 |   my_df <- data.frame(PY = 2019,
 3 |                       DB = "Scopus",
 4 |                       TI = "revtools: An R package to support article screening for evidence synthesis",
 5 |                       AU = "Westgate, M.J.")
 6 | 
 7 |   my_df2 <- data.frame(
 8 |     PY = 2019,
 9 |     doi = "10.1186/s13750-019-0167-1",
10 |     DB = "Scopus",
11 |     TI = "revtools: An R package to support article screening for evidence synthesis",
12 |     AU = "Westgate, M.J.",
13 |     random_noise = c("non-bibliographic data"))
14 | 
15 |   db <- merge_columns(list(my_df, my_df2))
16 |   vars <- unique(c(colnames(my_df), colnames(my_df2)))
17 |   expect_true(all(vars %in% colnames(db)))
18 | })
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-read_write.R:
--------------------------------------------------------------------------------
 1 | test_that("read_ref() works for simple imports", {
 2 |   df <- read_refs("testdata/eviatlas.txt",
 3 |                   return_df = TRUE,
 4 |                   verbose = FALSE)
 5 |   expect_true(inherits(df, c("tbl", "data.frame")))
 6 |   expect_equal(nrow(df), 1)
 7 |   expect_true(any(grep("EviAtlas", df[1, ])))
 8 | })
 9 | 
10 | test_that("read_refs() works for simple imports", {
11 |   testfiles <- paste0("testdata/",
12 |                       c("eviatlas.txt", "litsearchr.txt", "res_synth_methods.txt"))
13 |   df <- read_refs(testfiles,
14 |                   return_df = TRUE,
15 |                   verbose = FALSE)
16 |   expect_true(inherits(df, c("tbl", "data.frame")))
17 |   expect_equal(nrow(df), 4)
18 |   expect_true(any(grep("EviAtlas", df[1, ])))
19 |   expect_true(any(grep("litsearchr", df[2, ])))
20 |   expect_true(any(grep("robvis", df[3, ])))
21 | })
22 | 
23 | test_that("pubmed formats are read correctly", {
24 |   x <- read_refs("testdata/PubMed_example.txt")
25 |   expect_s3_class(x, c("tbl_df", "tbl", "data.frame"))
26 |   expect_equal(nrow(x), 3)
27 |   expect_equal(ncol(x), 37)
28 |   expect_true(all(
29 |     c("publication_type", "author", "journal", "title", "abstract") %in%
30 |     colnames(x)))
31 | })
32 | 
33 | # test_that("read_refs() imports special characters correctly", {
34 | # })
35 | 
36 | # test_that("read_refs() stores multi-value fields as list columns", {
37 | #   df <- read_refs("testdata/Scopus_ris_example.ris",
38 | #                   return_df = FALSE,
39 | #                   verbose = FALSE)
40 | #   result <- as_tibble(df)
41 | #   # test goes here
42 | # })
43 | 
44 | test_that("bibtex imports properly with json code", {
45 |   x <- read_ref("testdata/Scopus_bib_example.bib")
46 |   expect_true(inherits(x, c("data.frame", "tbl")))
47 |   expect_equal(nrow(x), 3)
48 | })
49 | 
50 | test_that("write_refs() works", {
51 |   lines <- read_refs("testdata/eviatlas.txt")
52 |   evi_ris <- write_refs(lines, format = "ris", write = FALSE)
53 |   evi_bib <- write_refs(lines, format = "bib", write = FALSE)
54 |   expect_equal(detect_parser(evi_ris), "parse_ris")
55 |   expect_equal(detect_parser(evi_bib), "parse_bibtex")
56 |   expect_true(any(grep("ER ", evi_ris, ignore.case = FALSE)))
57 | })
58 | 
59 | test_that("read-write-read roundtripping works for .ris files", {
60 |   x <- read_refs("./testdata/citesource_issue_24.ris")
61 |   dir.create("TEMP")
62 |   write_refs(x, file = "TEMP/issue24.ris", format = "ris")
63 |   expect_no_error({y <- read_refs("TEMP/issue24.ris")})
64 |   expect_true(colnames(y)[1] == "source_type")
65 |   expect_equal(nrow(x), nrow(y))
66 |   # expect_equal(ncol(x), ncol(y)) # fails at present - i.e. round-tripping is lossy
67 |   unlink("TEMP", recursive = TRUE)
68 | })
69 | 


--------------------------------------------------------------------------------
/tests/testthat/test-write.R:
--------------------------------------------------------------------------------
 1 | test_that("write_refs() works", {
 2 |   lines <- read_refs("testdata/eviatlas.txt")
 3 |   evi_ris <- write_refs(lines, format = "ris", write = FALSE)
 4 |   evi_bib <- write_refs(lines, format = "bib", write = FALSE)
 5 |   expect_equal(detect_parser(evi_ris), "parse_ris")
 6 |   expect_equal(detect_parser(evi_bib), "parse_bibtex")
 7 |   expect_true(any(grep("ER ", evi_ris, ignore.case = FALSE)))
 8 | })
 9 | 
10 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/Ovid_ris_example.ris:
--------------------------------------------------------------------------------
  1 | 1.
  2 | TY  - JOUR
  3 | ID  - 20203152553
  4 | DO  - http://dx.doi.org/10.4236/ojf.2020.101008
  5 | T1  - Detection of retention trees on clearcuts, a 50-year perspective.
  6 | A1  - Holmstrom, E.
  7 | A1  - Nordstrom, E.
  8 | A1  - Lariviere, D.
  9 | A1  - Wallin, I.
 10 | Y1  - 2020//
 11 | N2  - Changes in clearcut management over time were evaluated using aerial photographs taken between 1960 and 2010. Temporal changes were analysed in two different climatic zones in Sweden: a typical boreal forest zone (the coast of Vasterbotten County), and the hemi-boreal zone of southern Sweden (represented by Kronoberg County). The study covers the periods before and after the paradigm shift in Swedish forestry caused by the equalization of the production and nature conservation objectives specified in the first paragraph of the Swedish Forestry Act. Photographs were processed to determine clearcut size and shape and to register solitary retention trees and groups of retention trees. Small but significant changes in clearcut size were detected over time. The number of retention trees increased over time, a result that was also found in other studies using different methodologies. The results demonstrate that measurable structural changes have occurred in Swedish forests over the 25 years since the paradigm shift. Results from this study also show that digital detection of green tree retention could be a future complement to field inventory and monitoring.
 12 | KW  - aerial photography
 13 | KW  - boreal forests
 14 | KW  - clear felling
 15 | KW  - climatic zones
 16 | KW  - detection
 17 | KW  - forest management
 18 | KW  - forests
 19 | KW  - monitoring
 20 | KW  - nature conservation
 21 | KW  - resource conservation
 22 | KW  - temporal variation
 23 | JF  - Open Journal of Forestry
 24 | JA  - Open Journal of Forestry
 25 | VL  - 10
 26 | IS  - 1
 27 | SP  - 110
 28 | EP  - 123
 29 | CY  - USA
 30 | SN  - 2163-0429
 31 | AD  - Irvine
 32 | M1  - Southern Swedish Forest Centre, Swedish University of Agricultural Science, Alnarp, Sweden.
 33 | UR  - https://www.scirp.org/html/8-1620633_97627.htm
 34 | ER  -
 35 | 
 36 | Link to the Ovid Full Text or citation: http://ovidsp.ovid.com/ovidweb.cgi?T=JS&CSC=Y&NEWS=N&PAGE=fulltext&D=caba6&AN=20203152553Link to the External Link Resolver: http://jn8sf5hk5v.search.serialssolutions.com/?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/Ovid:caba6&rft.genre=article&rft_id=info:doi/10.4236%2Fojf.2020.101008&rft_id=info:pmid/&rft.issn=2163-0429&rft.volume=10&rft.issue=1&rft.spage=110&rft.pages=110-123&rft.date=2020&rft.jtitle=Open+Journal+of+Forestry&rft.atitle=Detection+of+retention+trees+on+clearcuts%2C+a+50-year+perspective.&rft.aulast=Holmstrom
 37 | 
 38 | 2.
 39 | TY  - JOUR
 40 | ID  - 20203155626
 41 | DO  - http://dx.doi.org/10.1098/rspb.2019.1969
 42 | T1  - Host density drives viral, but not trypanosome, transmission in a key pollinator.
 43 | A1  - Bailes, E. J.
 44 | A1  - Bagi, J.
 45 | A1  - Coltman, J.
 46 | A1  - Fountain, M. T.
 47 | A1  - Wilfert, L.
 48 | A1  - Brown, M. J. F.
 49 | Y1  - 2020//
 50 | N2  - Supplemental feeding of wildlife populations can locally increase the density of individuals, which may in turn impact disease dynamics. Flower strips are a widely used intervention in intensive agricultural systems to nutritionally support pollinators such as bees. Using a controlled experimental semi-field design, we asked how density impacts transmission of a virus and a trypanosome parasite in bumblebees. We manipulated bumblebee density by using different numbers of colonies within the same area of floral resource. In high-density compartments, slow bee paralysis virus was transmitted more quickly, resulting in higher prevalence and level of infection in bumblebee hosts. By contrast, there was no impact of density on the transmission of the trypanosome Crithidia bombi, which may reflect the ease with which this parasite is transmitted. These results suggest that agri-environment schemes such as flower strips, which are known to enhance the nutrition and survival of bumblebees, may also have negative impacts on pollinators through enhanced disease transmission. Future studies should assess how changing the design of these schemes could minimize disease transmission and thus maximise their health benefits to wild pollinators.
 51 | KW  - disease transmission
 52 | KW  - pollinators
 53 | KW  - disease prevalence
 54 | KW  - wildlife
 55 | JF  - Proceedings of the Royal Society B. Biological Sciences
 56 | JA  - Proceedings of the Royal Society B. Biological Sciences
 57 | VL  - 287
 58 | IS  - 1918
 59 | CY  - UK
 60 | SN  - 0962-8452
 61 | AD  - London
 62 | M1  - Department of Biological Sciences, Royal Holloway University of London, Bourne Building, Egham TW20 0EX, UK.
 63 | UR  - https://royalsocietypublishing.org/doi/full/10.1098/rspb.2019.1969
 64 | ER  -
 65 | 
 66 | Link to the Ovid Full Text or citation: http://ovidsp.ovid.com/ovidweb.cgi?T=JS&CSC=Y&NEWS=N&PAGE=fulltext&D=caba6&AN=20203155626Link to the External Link Resolver: http://jn8sf5hk5v.search.serialssolutions.com/?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/Ovid:caba6&rft.genre=article&rft_id=info:doi/10.1098%2Frspb.2019.1969&rft_id=info:pmid/&rft.issn=0962-8452&rft.volume=287&rft.issue=1918&rft.spage=20191969&rft.pages=&rft.date=2020&rft.jtitle=Proceedings+of+the+Royal+Society+B.+Biological+Sciences&rft.atitle=Host+density+drives+viral%2C+but+not+trypanosome%2C+transmission+in+a+key+pollinator.&rft.aulast=Bailes
 67 | 
 68 | 3.
 69 | TY  - JOUR
 70 | ID  - 20203152349
 71 | DO  - http://dx.doi.org/10.1016/j.jenvman.2019.109817
 72 | T1  - Export of nitrogen and phosphorus from golf courses: a review.
 73 | A1  - Bock, E. M.
 74 | A1  - Easton, Z. M.
 75 | Y1  - 2020//
 76 | N2  - Mitigating the environmental impact of nonpoint source pollution from intensively managed urban and agricultural landscapes is of paramount concern to watershed managers. Golf course turfgrass systems, which receive significant fertilizer inputs, have been cited as significant sources of nutrient loading to groundwater and surface water, but a contemporary synthesis of golf course nutrient export rates is lacking. This review of nitrogen (N) and phosphorus (P) loss from golf courses and the factors affecting it aims to support watershed management efforts and decision making. We discuss previous literature reviews, examine seven golf course studies that quantify nutrient export from delineated drainage areas, and analyze the results of 40 turfgrass plot experiments. Studies were collected systematically and selected based on predetermined inclusion criteria. Combining evidence from both watershed- and plot-scale studies, typical inorganic N and P losses from golf courses via leaching and runoff are on the order of 2-12 kg ha-1 yr-1 and 0.1-1.0 kg ha-1 yr-1, respectively. Typical total N and P losses are around 2-20 kg ha-1 yr-1 and 1.5-5 kg ha-1 yr-1, respectively. However, the potential for large variation in export rates across 2-3 orders of magnitude must be emphasized. The body of turfgrass literature stresses the importance of best management practices (BMPs) related to applying fertilizer to match plant needs and reducing opportunities for its transport. Accounting for all sources of nutrients, especially soil P, in determining fertilizer application rates and avoiding excessive irrigation to prevent leaching of nutrients from the rootzone is particularly important. BMPs can also reduce nutrient leaching and runoff by controlling the movement of water across the landscape and promoting natural nutrient attenuation, such as with vegetative stream buffers.
 77 | KW  - water quality
 78 | KW  - water pollution
 79 | KW  - phosphorus
 80 | KW  - nitrogen
 81 | KW  - golf courses
 82 | KW  - drainage
 83 | KW  - lawns and turf
 84 | KW  - leaching
 85 | KW  - runoff
 86 | KW  - fertilizers
 87 | KW  - application rates
 88 | KW  - irrigation
 89 | KW  - nonpoint source pollution
 90 | KW  - watersheds
 91 | JF  - Journal of Environmental Management
 92 | JA  - Journal of Environmental Management
 93 | VL  - 255
 94 | CY  - Netherlands
 95 | SN  - 0301-4797
 96 | AD  - Amsterdam
 97 | M1  - Department of Biological Systems Engineering 155 Ag Quad Lane, 203 Seitz Hall Virginia Tech, Blacksburg, VA 24061, USA.
 98 | UR  - https://www.sciencedirect.com/science/article/pii/S030147971931535X
 99 | ER  -
100 | 
101 | Link to the Ovid Full Text or citation: http://ovidsp.ovid.com/ovidweb.cgi?T=JS&CSC=Y&NEWS=N&PAGE=fulltext&D=caba6&AN=20203152349Link to the External Link Resolver: http://jn8sf5hk5v.search.serialssolutions.com/?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/Ovid:caba6&rft.genre=article&rft_id=info:doi/10.1016%2Fj.jenvman.2019.109817&rft_id=info:pmid/&rft.issn=0301-4797&rft.volume=255&rft.issue=&rft.spage=109817&rft.pages=&rft.date=2020&rft.jtitle=Journal+of+Environmental+Management&rft.atitle=Export+of+nitrogen+and+phosphorus+from+golf+courses%3A+a+review.&rft.aulast=Bock
102 | 
103 | 4.
104 | TY  - JOUR
105 | ID  - 20203150108
106 | T1  - Diversity of yard plants in the buffer zone of the Cyclop Nature Reserve, Jayapura City Papua Province, Indonesia.
107 | A1  - Antoh, A. A.
108 | A1  - Raunsay, E. K.
109 | Y1  - 2019//
110 | N2  - The Cyclop Mountains are located on the island of Papua, Indonesia which has a high biodiversity conservation status. Cyclop has many springs and is very important for the service needs for people in Jayapura City and Regency. The high disturbance in conservation areas is a problem for the community at this time. This study aims to analyze the diversity of garden plant species and their benefits around community settlements in buffer zones. The results showed that sweet potatoes and cassava were very dominantly planted around the yard of the house. Cassava and sweet potatoes are used as food besides being consumed by them but also used as animal feed.
111 | KW  - nature conservation
112 | KW  - species diversity
113 | KW  - conservation
114 | KW  - biodiversity
115 | KW  - nature reserves
116 | KW  - gardens
117 | KW  - settlement
118 | KW  - sweet potatoes
119 | KW  - cassava
120 | KW  - feeds
121 | KW  - ecological disturbance
122 | KW  - resource conservation
123 | KW  - mountain areas
124 | JF  - AAB Bioflux
125 | JA  - AAB Bioflux
126 | VL  - 11
127 | IS  - 3
128 | SP  - 157
129 | EP  - 161
130 | CY  - Romania
131 | SN  - 2066-7639
132 | AD  - Cluj-Napoca
133 | M1  - Biology Education Study Program, Mathematics and Natural Sciences Education Department, Training and Education Faculty, Cenderawasih University, 99351 Papua, Indonesia.
134 | UR  - http://www.aab.bioflux.com.ro/docs/2019.157-161.pdf
135 | ER  -
136 | 
137 | Link to the Ovid Full Text or citation: http://ovidsp.ovid.com/ovidweb.cgi?T=JS&CSC=Y&NEWS=N&PAGE=fulltext&D=caba6&AN=20203150108Link to the External Link Resolver: http://jn8sf5hk5v.search.serialssolutions.com/?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/Ovid:caba6&rft.genre=article&rft_id=info:doi/&rft_id=info:pmid/&rft.issn=2066-7639&rft.volume=11&rft.issue=3&rft.spage=157&rft.pages=157-161&rft.date=2019&rft.jtitle=AAB+Bioflux&rft.atitle=Diversity+of+yard+plants+in+the+buffer+zone+of+the+Cyclop+Nature+Reserve%2C+Jayapura+City+Papua+Province%2C+Indonesia.&rft.aulast=Antoh


--------------------------------------------------------------------------------
/tests/testthat/testdata/Scopus_bib_example.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @ARTICLE{Li2020,
 3 | author={Li, W. and Dou, Z. and Cui, L. and Zhao, X. and Zhang, M. and Zhang, Y. and Gao, C. and Yang, Z. and Lei, Y. and Pan, X.},
 4 | title={Soil fauna diversity at different stages of reed restoration in a lakeshore wetland at Lake Taihu, China},
 5 | journal={Ecosystem Health and Sustainability},
 6 | year={2020},
 7 | volume={6},
 8 | number={1},
 9 | doi={10.1080/20964129.2020.1722034},
10 | art_number={1722034},
11 | note={cited By 0},
12 | url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85079532227&doi=10.1080%2f20964129.2020.1722034&partnerID=40&md5=a4a9d08e21dbf151663a0e842336c225},
13 | affiliation={Institute of Wetland Research, Chinese Academy of Forestry, Beijing, China; Beijing Key Laboratory of Wetland Services and Restoration, Beijing, China; School of Geographical Sciences, University of Bristol, Bristol, United Kingdom},
14 | abstract={Introduction: Wetland soil fauna support material cycling and restoration processes in wetland ecosystems. In our study, we observed variations in wetland soil fauna on the shores of Lake Taihu, China. We examined the relationships between fauna and major environmental factors, and looked at the short-and long-term changes in reed wetlands under restoration and in the natural reed lakeshore. Outcomes: We identified 93 groups of soil fauna in different wetlands and found significant differences in the lakeshore wetlands’ soil fauna assemblages, depending on the length of the restoration period. By analyzing the soil fauna community evenness, dominance, number of taxa, and diversity, we found minimal seasonal variation in the soil fauna community diversity and abundance. The abundance of soil fauna in the sites under restoration decreased with depth below the soil surface. The reed restoration was obvious in the succession of the soil fauna groups in the long-term site. Although the restoration had an overall positive long-term effect on the soil fauna communities, there were no obvious short-term changes in the number of individuals. Conclusion: The study explored various potential measures to restore soil fauna in the Lake Taihu wetland and developed a theoretical basis for restoring the lakeshore wetland ecosystem. © 2020, © 2020 The Author(s). Published by Taylor & Francis Group and Science Press on behalf of the Ecological Society of China.},
15 | author_keywords={Environmental factors;  Lake Taihu;  lakeshore wetland;  reed;  soil fauna;  wetland restoration},
16 | funding_details={University of TwenteUniversity of Twente},
17 | funding_details={Special Fund for Forest Scientific Research in the Public WelfareSpecial Fund for Forest Scientific Research in the Public Welfare, 201404305, 200904001, CAFYBB2011007},
18 | funding_text 1={The Special Fund for Forest Scientific Research in the Public Welfare [Grant Nos. 201404305 and 200904001] and ?The Lecture and Study Program for Outstanding Scholars from Home and Abroad? [CAFYBB2011007] funded this research. The authors thank Amjad Ali and Abel Ramoelo for valuable comments on the draft manuscript and John Wasige from the University of Twente, The Netherlands, for writing assistance.},
19 | correspondence_address1={Cui, L.; Institute of Wetland Research, Chinese Academy of ForestryChina; email: wetlands108@126.com},
20 | publisher={Taylor and Francis Ltd.},
21 | issn={20964129},
22 | language={English},
23 | abbrev_source_title={Ecosyst. Health Sustain.},
24 | document_type={Article},
25 | source={Scopus},
26 | }
27 | 
28 | @ARTICLE{Cao2020,
29 | author={Cao, F. and Li, J. and Fu, X. and Wu, G.},
30 | title={Impacts of land conversion and management measures on net primary productivity in semi-arid grassland},
31 | journal={Ecosystem Health and Sustainability},
32 | year={2020},
33 | volume={6},
34 | number={1},
35 | doi={10.1080/20964129.2020.1749010},
36 | art_number={1749010},
37 | note={cited By 0},
38 | url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85083526403&doi=10.1080%2f20964129.2020.1749010&partnerID=40&md5=82da8427a9c2e5b22167c4428a08a62f},
39 | affiliation={State Key Laboratory of Urban and Regional Ecology, Research Center for Eco-Environmental Sciences, Chinese Academy of Sciences, Beijing, China; University of Chinese Academy of Sciences, Beijing, China},
40 | abstract={Ecological restoration measures implemented in China have profoundly impacted vegetation NPP. This study aimed to estimate the effects of the land conversion and management measures on the grassland ecosystem in semi-arid regions. Land use data were employed from 2000 to 2015 to compare land conversion and coverage changes in Xilingol grassland. Then, the contributions of land conversion and management policies were quantified by assessing the difference between actual NPP and climate-induced NPP changes. The results indicated that the grassland area had a net loss of 534.42 km2, and the net area of increased vegetation coverage was 74,683.05 km2. Furthermore, the total NPP increased by 8,010.73 Gg C·yr−1 (1 Gg = 109 g), of which the human activities, including grazing management measures (+6,809.40 Gg C·yr−1) and land conversion (45.72 Gg C·yr−1) contributed to 85.58% of the increase in NPP. Transformation from desert and farmland dominated grassland expansion and NPP increase, while urbanization and desertification caused large grassland reduction and NPP loss. The grazing management increased vegetation NPP in most regions except for some regions in the desert steppe and the farming-pastoral zone. Related policies should be further adjusted to strengthen the management of the desert steppe and farming-pastoral regions. © 2020, © 2020 The Author(s). Published by Taylor & Francis Group and Science Press on behalf of the Ecological Society of China.},
41 | author_keywords={anthropogenic/human activities affects;  Grassland degradation;  land use and land cover change;  management measures and policies;  net primary productivity (NPP);  Xilingol grassland},
42 | funding_details={2016YFC0503603, 2016YFC0501101},
43 | funding_text 1={This work was supported primarily by the State Key Research Development Program of China (No. 2016YFC0501101), (No. 2016YFC0503603).},
44 | correspondence_address1={Wu, G.; State Key Laboratory of Urban and Regional Ecology, Research Center for Eco-Environmental Sciences, Chinese Academy of SciencesChina; email: wug@rcees.ac.cn},
45 | publisher={Taylor and Francis Ltd.},
46 | issn={20964129},
47 | language={English},
48 | abbrev_source_title={Ecosyst. Health Sustain.},
49 | document_type={Article},
50 | source={Scopus},
51 | }
52 | 
53 | @ARTICLE{Tang2020,
54 | author={Tang, H. and Geng, G. and zhou, M.},
55 | title={Application of Digital Processing in Relic Image Restoration Design},
56 | journal={Sensing and Imaging},
57 | year={2020},
58 | volume={21},
59 | number={1},
60 | doi={10.1007/s11220-019-0265-8},
61 | art_number={6},
62 | note={cited By 0},
63 | url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85076498356&doi=10.1007%2fs11220-019-0265-8&partnerID=40&md5=25095ff13ed950e89470779e2d571d68},
64 | affiliation={School of Information and Technology, Northwest University, Xi’an, Shaanxi  710127, China; Xi’an University of Finance and Economics, Xi’an, Shaanxi  710100, China; College of Information Science and Technology, Beijing Normal University, Beijing, 100875, China},
65 | abstract={Cultural relic is the carrier of human historic culture, which can reflect the cultural and social environment, but cultural relics as a material will be damaged over time. Before the advent of computer technology, the damaged cultural relics would not be repaired due to cost. Computer vision technology has been applied to the restoration of cultural relics, mainly for the virtual restoration of damaged cultural relics images. This paper briefly introduced the Criminisi image restoration algorithm and the structure tensor used to improve the algorithm in the digital cultural relics image restoration. A damaged cultural relics image and a complete image which was damaged by human were repaired respectively using the classical Criminisi image restoration algorithm and the improved structure tensor based repair algorithm on MATLAB software. The results showed that the Criminisi image restoration algorithm could be used to repair the damaged images of ancient fabrics. It was found that the classical image restoration algorithm had some shortcomings, such as inappropriate texture structure, obvious repair marks and addition of redundant information, but the improved algorithm effectively avoided the above shortcomings. The peak signal to noise ratio (SNR) of the complete image which was damaged by human was compared objectively, and it was found that the improved algorithm had better restoration performance. © 2019, Springer Science+Business Media, LLC, part of Springer Nature.},
66 | author_keywords={Criminisi algorithm;  Digital;  Relic restoration;  Structure tensor},
67 | keywords={Image enhancement;  MATLAB;  Restoration;  Signal to noise ratio;  Tensors;  Textures, Computer technology;  Computer vision technology;  Digital;  Image restoration algorithms;  Improved structures;  Peak signal to noise ratio;  Structure tensors;  Virtual restoration, Image reconstruction},
68 | correspondence_address1={Tang, H.; Xi’an University of Finance and EconomicsChina; email: huitangxa@yeah.net},
69 | publisher={Springer},
70 | issn={15572064},
71 | language={English},
72 | abbrev_source_title={Sens. Imaging},
73 | document_type={Article},
74 | source={Scopus},
75 | }


--------------------------------------------------------------------------------
/tests/testthat/testdata/Scopus_ris_example.ris:
--------------------------------------------------------------------------------
  1 | TY  - JOUR
  2 | TI  - Soil fauna diversity at different stages of reed restoration in a lakeshore wetland at Lake Taihu, China
  3 | T2  - Ecosystem Health and Sustainability
  4 | J2  - Ecosyst. Health Sustain.
  5 | VL  - 6
  6 | IS  - 1
  7 | PY  - 2020
  8 | DO  - 10.1080/20964129.2020.1722034
  9 | SN  - 20964129 (ISSN) 
 10 | AU  - Li, W.
 11 | AU  - Dou, Z.
 12 | AU  - Cui, L.
 13 | AU  - Zhao, X.
 14 | AU  - Zhang, M.
 15 | AU  - Zhang, Y.
 16 | AU  - Gao, C.
 17 | AU  - Yang, Z.
 18 | AU  - Lei, Y.
 19 | AU  - Pan, X.
 20 | AD  - Institute of Wetland Research, Chinese Academy of Forestry, Beijing, China
 21 | AD  - Beijing Key Laboratory of Wetland Services and Restoration, Beijing, China
 22 | AD  - School of Geographical Sciences, University of Bristol, Bristol, United Kingdom
 23 | AB  - Introduction: Wetland soil fauna support material cycling and restoration processes in wetland ecosystems. In our study, we observed variations in wetland soil fauna on the shores of Lake Taihu, China. We examined the relationships between fauna and major environmental factors, and looked at the short-and long-term changes in reed wetlands under restoration and in the natural reed lakeshore. Outcomes: We identified 93 groups of soil fauna in different wetlands and found significant differences in the lakeshore wetlands’ soil fauna assemblages, depending on the length of the restoration period. By analyzing the soil fauna community evenness, dominance, number of taxa, and diversity, we found minimal seasonal variation in the soil fauna community diversity and abundance. The abundance of soil fauna in the sites under restoration decreased with depth below the soil surface. The reed restoration was obvious in the succession of the soil fauna groups in the long-term site. Although the restoration had an overall positive long-term effect on the soil fauna communities, there were no obvious short-term changes in the number of individuals. Conclusion: The study explored various potential measures to restore soil fauna in the Lake Taihu wetland and developed a theoretical basis for restoring the lakeshore wetland ecosystem. © 2020, © 2020 The Author(s). Published by Taylor & Francis Group and Science Press on behalf of the Ecological Society of China.
 24 | KW  - Environmental factors
 25 | KW  - Lake Taihu
 26 | KW  - lakeshore wetland
 27 | KW  - reed
 28 | KW  - soil fauna
 29 | KW  - wetland restoration
 30 | PB  - Taylor and Francis Ltd.
 31 | N1  - Export Date: 30 April 2020
 32 | M3  - Article
 33 | DB  - Scopus
 34 | C7  - 1722034
 35 | LA  - English
 36 | N1  - Correspondence Address: Cui, L.; Institute of Wetland Research, Chinese Academy of ForestryChina; email: wetlands108@126.com
 37 | N1  - Funding details: University of Twente
 38 | N1  - Funding details: Special Fund for Forest Scientific Research in the Public Welfare, 201404305, 200904001, CAFYBB2011007
 39 | N1  - Funding text 1: The Special Fund for Forest Scientific Research in the Public Welfare [Grant Nos. 201404305 and 200904001] and ?The Lecture and Study Program for Outstanding Scholars from Home and Abroad? [CAFYBB2011007] funded this research. The authors thank Amjad Ali and Abel Ramoelo for valuable comments on the draft manuscript and John Wasige from the University of Twente, The Netherlands, for writing assistance.
 40 | UR  - https://www.scopus.com/inward/record.uri?eid=2-s2.0-85079532227&doi=10.1080%2f20964129.2020.1722034&partnerID=40&md5=a4a9d08e21dbf151663a0e842336c225
 41 | ER  - 
 42 | 
 43 | TY  - JOUR
 44 | TI  - Impacts of land conversion and management measures on net primary productivity in semi-arid grassland
 45 | T2  - Ecosystem Health and Sustainability
 46 | J2  - Ecosyst. Health Sustain.
 47 | VL  - 6
 48 | IS  - 1
 49 | PY  - 2020
 50 | DO  - 10.1080/20964129.2020.1749010
 51 | SN  - 20964129 (ISSN) 
 52 | AU  - Cao, F.
 53 | AU  - Li, J.
 54 | AU  - Fu, X.
 55 | AU  - Wu, G.
 56 | AD  - State Key Laboratory of Urban and Regional Ecology, Research Center for Eco-Environmental Sciences, Chinese Academy of Sciences, Beijing, China
 57 | AD  - University of Chinese Academy of Sciences, Beijing, China
 58 | AB  - Ecological restoration measures implemented in China have profoundly impacted vegetation NPP. This study aimed to estimate the effects of the land conversion and management measures on the grassland ecosystem in semi-arid regions. Land use data were employed from 2000 to 2015 to compare land conversion and coverage changes in Xilingol grassland. Then, the contributions of land conversion and management policies were quantified by assessing the difference between actual NPP and climate-induced NPP changes. The results indicated that the grassland area had a net loss of 534.42 km2, and the net area of increased vegetation coverage was 74,683.05 km2. Furthermore, the total NPP increased by 8,010.73 Gg C·yr−1 (1 Gg = 109 g), of which the human activities, including grazing management measures (+6,809.40 Gg C·yr−1) and land conversion (45.72 Gg C·yr−1) contributed to 85.58% of the increase in NPP. Transformation from desert and farmland dominated grassland expansion and NPP increase, while urbanization and desertification caused large grassland reduction and NPP loss. The grazing management increased vegetation NPP in most regions except for some regions in the desert steppe and the farming-pastoral zone. Related policies should be further adjusted to strengthen the management of the desert steppe and farming-pastoral regions. © 2020, © 2020 The Author(s). Published by Taylor & Francis Group and Science Press on behalf of the Ecological Society of China.
 59 | KW  - anthropogenic/human activities affects
 60 | KW  - Grassland degradation
 61 | KW  - land use and land cover change
 62 | KW  - management measures and policies
 63 | KW  - net primary productivity (NPP)
 64 | KW  - Xilingol grassland
 65 | PB  - Taylor and Francis Ltd.
 66 | N1  - Export Date: 30 April 2020
 67 | M3  - Article
 68 | DB  - Scopus
 69 | C7  - 1749010
 70 | LA  - English
 71 | N1  - Correspondence Address: Wu, G.; State Key Laboratory of Urban and Regional Ecology, Research Center for Eco-Environmental Sciences, Chinese Academy of SciencesChina; email: wug@rcees.ac.cn
 72 | N1  - Funding details: 2016YFC0503603, 2016YFC0501101
 73 | N1  - Funding text 1: This work was supported primarily by the State Key Research Development Program of China (No. 2016YFC0501101), (No. 2016YFC0503603).
 74 | UR  - https://www.scopus.com/inward/record.uri?eid=2-s2.0-85083526403&doi=10.1080%2f20964129.2020.1749010&partnerID=40&md5=82da8427a9c2e5b22167c4428a08a62f
 75 | ER  - 
 76 | 
 77 | TY  - JOUR
 78 | TI  - Application of Digital Processing in Relic Image Restoration Design
 79 | T2  - Sensing and Imaging
 80 | J2  - Sens. Imaging
 81 | VL  - 21
 82 | IS  - 1
 83 | PY  - 2020
 84 | DO  - 10.1007/s11220-019-0265-8
 85 | SN  - 15572064 (ISSN) 
 86 | AU  - Tang, H.
 87 | AU  - Geng, G.
 88 | AU  - zhou, M.
 89 | AD  - School of Information and Technology, Northwest University, Xi’an, Shaanxi  710127, China
 90 | AD  - Xi’an University of Finance and Economics, Xi’an, Shaanxi  710100, China
 91 | AD  - College of Information Science and Technology, Beijing Normal University, Beijing, 100875, China
 92 | AB  - Cultural relic is the carrier of human historic culture, which can reflect the cultural and social environment, but cultural relics as a material will be damaged over time. Before the advent of computer technology, the damaged cultural relics would not be repaired due to cost. Computer vision technology has been applied to the restoration of cultural relics, mainly for the virtual restoration of damaged cultural relics images. This paper briefly introduced the Criminisi image restoration algorithm and the structure tensor used to improve the algorithm in the digital cultural relics image restoration. A damaged cultural relics image and a complete image which was damaged by human were repaired respectively using the classical Criminisi image restoration algorithm and the improved structure tensor based repair algorithm on MATLAB software. The results showed that the Criminisi image restoration algorithm could be used to repair the damaged images of ancient fabrics. It was found that the classical image restoration algorithm had some shortcomings, such as inappropriate texture structure, obvious repair marks and addition of redundant information, but the improved algorithm effectively avoided the above shortcomings. The peak signal to noise ratio (SNR) of the complete image which was damaged by human was compared objectively, and it was found that the improved algorithm had better restoration performance. © 2019, Springer Science+Business Media, LLC, part of Springer Nature.
 93 | KW  - Criminisi algorithm
 94 | KW  - Digital
 95 | KW  - Relic restoration
 96 | KW  - Structure tensor
 97 | KW  - Image enhancement
 98 | KW  - MATLAB
 99 | KW  - Restoration
100 | KW  - Signal to noise ratio
101 | KW  - Tensors
102 | KW  - Textures
103 | KW  - Computer technology
104 | KW  - Computer vision technology
105 | KW  - Digital
106 | KW  - Image restoration algorithms
107 | KW  - Improved structures
108 | KW  - Peak signal to noise ratio
109 | KW  - Structure tensors
110 | KW  - Virtual restoration
111 | KW  - Image reconstruction
112 | PB  - Springer
113 | N1  - Export Date: 30 April 2020
114 | M3  - Article
115 | DB  - Scopus
116 | C7  - 6
117 | LA  - English
118 | N1  - Correspondence Address: Tang, H.; Xi’an University of Finance and EconomicsChina; email: huitangxa@yeah.net
119 | UR  - https://www.scopus.com/inward/record.uri?eid=2-s2.0-85076498356&doi=10.1007%2fs11220-019-0265-8&partnerID=40&md5=25095ff13ed950e89470779e2d571d68
120 | ER  - 


--------------------------------------------------------------------------------
/tests/testthat/testdata/WoS_ciw_example.ciw:
--------------------------------------------------------------------------------
  1 | FN Clarivate Analytics Web of Science
  2 | VR 1.0
  3 | PT J
  4 | AU Li, Wei
  5 |    Dou, Zhiguo
  6 |    Cui, Lijuan
  7 |    Zhao, Xinsheng
  8 |    Zhang, Manyin
  9 |    Zhang, Yan
 10 |    Gao, Changjun
 11 |    Yang, Zheng
 12 |    Lei, Yinru
 13 |    Pan, Xu
 14 | TI Soil fauna diversity at different stages of reed restoration in a
 15 |    lakeshore wetland at Lake Taihu, China
 16 | SO ECOSYSTEM HEALTH AND SUSTAINABILITY
 17 | VL 6
 18 | IS 1
 19 | AR UNSP 1722034
 20 | DI 10.1080/20964129.2020.1722034
 21 | PD DEC 16 2020
 22 | PY 2020
 23 | AB Introduction: Wetland soil fauna support material cycling and
 24 |    restoration processes in wetland ecosystems. In our study, we observed
 25 |    variations in wetland soil fauna on the shores of Lake Taihu, China. We
 26 |    examined the relationships between fauna and major environmental
 27 |    factors, and looked at the short-and long-term changes in reed wetlands
 28 |    under restoration and in the natural reed lakeshore. Outcomes: We
 29 |    identified 93 groups of soil fauna in different wetlands and found
 30 |    significant differences in the lakeshore wetlands' soil fauna
 31 |    assemblages, depending on the length of the restoration period. By
 32 |    analyzing the soil fauna community evenness, dominance, number of taxa,
 33 |    and diversity, we found minimal seasonal variation in the soil fauna
 34 |    community diversity and abundance. The abundance of soil fauna in the
 35 |    sites under restoration decreased with depth below the soil surface. The
 36 |    reed restoration was obvious in the succession of the soil fauna groups
 37 |    in the long-term site. Although the restoration had an overall positive
 38 |    long-term effect on the soil fauna communities, there were no obvious
 39 |    short-term changes in the number of individuals. Conclusion: The study
 40 |    explored various potential measures to restore soil fauna in the Lake
 41 |    Taihu wetland and developed a theoretical basis for restoring the
 42 |    lakeshore wetland ecosystem.
 43 | RI Zhang, Manyin/AAK-5171-2020; LI, WEI/; Dou, Zhiguo/
 44 | OI LI, WEI/0000-0002-2133-9287; Dou, Zhiguo/0000-0001-8031-4330
 45 | ZB 0
 46 | ZR 0
 47 | ZS 0
 48 | TC 0
 49 | Z8 0
 50 | Z9 0
 51 | SN 2096-4129
 52 | EI 2332-8878
 53 | UT WOS:000514379300001
 54 | ER
 55 | 
 56 | PT J
 57 | AU Higgins, Kathleen Marie
 58 | TI Aesthetics and the Containment of Grief
 59 | SO JOURNAL OF AESTHETICS AND ART CRITICISM
 60 | VL 78
 61 | IS 1
 62 | BP 9
 63 | EP 20
 64 | DI 10.1111/jaac.12686
 65 | PD DEC 2020
 66 | PY 2020
 67 | AB My point of departure is the observation that people ubiquitously turn
 68 |    to aesthetic practices in response to the loss of a loved one. I argue
 69 |    that profound loss catapults the bereaved person into an alternate
 70 |    "world" that differs in marked ways from the world we usually occupy, an
 71 |    alternate world lacking even the basic coherence we need to function.
 72 |    Aesthetic practices facilitate restoration of coherence to our
 73 |    experience, as well as reconnection with the social world and recovery
 74 |    from the breakdown that profound loss involves. While the aesthetic
 75 |    notion of closure is frequently invoked in connection with the needs of
 76 |    the bereaved, I suggest that while containing the emotions experienced
 77 |    in connection with loss is vital if they are to be processed,
 78 |    unrealistic aspirations toward closure can encourage expectations that
 79 |    harm the bereaved. By contrast, I suggest that the aims of aesthetically
 80 |    punctuating experience and communicating through aesthetic gestures are
 81 |    beneficial for helping the bereaved adjust to their new circumstances.
 82 | TC 0
 83 | ZS 0
 84 | Z8 0
 85 | ZR 0
 86 | ZB 0
 87 | Z9 0
 88 | SN 0021-8529
 89 | EI 1540-6245
 90 | UT WOS:000510870600001
 91 | ER
 92 | 
 93 | PT J
 94 | AU Tang, Hui
 95 |    Geng, Guohua
 96 |    Zhou, Mingquan
 97 | TI Application of Digital Processing in Relic Image Restoration Design
 98 | SO SENSING AND IMAGING
 99 | VL 21
100 | IS 1
101 | AR 6
102 | DI 10.1007/s11220-019-0265-8
103 | PD DEC 2020
104 | PY 2020
105 | AB Cultural relic is the carrier of human historic culture, which can
106 |    reflect the cultural and social environment, but cultural relics as a
107 |    material will be damaged over time. Before the advent of computer
108 |    technology, the damaged cultural relics would not be repaired due to
109 |    cost. Computer vision technology has been applied to the restoration of
110 |    cultural relics, mainly for the virtual restoration of damaged cultural
111 |    relics images. This paper briefly introduced the Criminisi image
112 |    restoration algorithm and the structure tensor used to improve the
113 |    algorithm in the digital cultural relics image restoration. A damaged
114 |    cultural relics image and a complete image which was damaged by human
115 |    were repaired respectively using the classical Criminisi image
116 |    restoration algorithm and the improved structure tensor based repair
117 |    algorithm on MATLAB software. The results showed that the Criminisi
118 |    image restoration algorithm could be used to repair the damaged images
119 |    of ancient fabrics. It was found that the classical image restoration
120 |    algorithm had some shortcomings, such as inappropriate texture
121 |    structure, obvious repair marks and addition of redundant information,
122 |    but the improved algorithm effectively avoided the above shortcomings.
123 |    The peak signal to noise ratio (SNR) of the complete image which was
124 |    damaged by human was compared objectively, and it was found that the
125 |    improved algorithm had better restoration performance.
126 | Z8 0
127 | ZS 0
128 | TC 0
129 | ZB 0
130 | ZR 0
131 | Z9 0
132 | SN 1557-2064
133 | EI 1557-2072
134 | UT WOS:000502504100001
135 | ER
136 | 
137 | PT J
138 | AU Zhang, Xinrong
139 |    Zhang, Lei
140 |    Chen, Ming
141 |    Liu, Dongying
142 | TI miR-324-5p inhibits gallbladder carcinoma cell metastatic behaviours by
143 |    downregulation of transforming growth factor beta 2 expression.
144 | SO Artificial cells, nanomedicine, and biotechnology
145 | VL 48
146 | IS 1
147 | BP 315
148 | EP 324
149 | DI 10.1080/21691401.2019.1703724
150 | PD 2020-Dec
151 | PY 2020
152 | AB Increasing studies have demonstrated that microRNAs (miRNAs) are
153 |    associated with the metastasis of gallbladder carcinoma (GBC). Recently,
154 |    miR-324-5p has been reported to be a tumour-suppressive miRNA in many
155 |    types of malignant cancer. However, the biological function and
156 |    molecular mechanism of miR-324-5p in GBC still remain largely unknown.
157 |    Here, we found that miR-324-5p expression was notably down-regulated in
158 |    both GBC tissues and cells compared with that in normal controls.
159 |    Downregulated miR-324-5p expression was negatively associated with the
160 |    status of local invasion and lymph node metastasis and predicted a poor
161 |    prognosis in GBC patients. Further functional assays revealed that
162 |    restoration of miR-324-5p significantly suppressed GBC cell migration,
163 |    invasion and epithelial-mesenchymal transition (EMT) invitro and impeded
164 |    the metastasis of GBC cells invivo. Moreover, RNA immunoprecipitation
165 |    (RIP) and dual-luciferase reporter assay confirmed that the transforming
166 |    growth factor beta 2 (TGFB2) was a direct target gene of miR-324-5p in
167 |    GBC cells. Mechanically, small interfering RNA (siRNA)-mediated
168 |    knockdown of TGFB2 partially phenocopied the inhibitory effects of
169 |    miR-324-5p overexpression on GBC cell metastatic phenotypes. In summary,
170 |    our findings demonstrated that miR-324-5p targets TGFB2 expression to
171 |    inhibit GBC cell metastatic behaviors, and implying miR-324-5p as a
172 |    potential biomarker for diagnostic and therapeutic strategies in GBC.
173 | ZB 0
174 | ZR 0
175 | Z8 0
176 | ZS 0
177 | TC 0
178 | Z9 0
179 | EI 2169-141X
180 | UT MEDLINE:31858815
181 | PM 31858815
182 | ER
183 | 
184 | PT J
185 | AU Yu, Yijun
186 |    Ma, Lan
187 |    Zhang, He
188 |    Sun, Weibin
189 |    Zheng, Lichun
190 |    Liu, Chao
191 |    Miao, Leiying
192 | TI EPO could be regulated by HIF-1 and promote osteogenesis and accelerate
193 |    bone repair.
194 | SO Artificial cells, nanomedicine, and biotechnology
195 | VL 48
196 | IS 1
197 | BP 206
198 | EP 217
199 | DI 10.1080/21691401.2019.1699827
200 | PD 2020-Dec
201 | PY 2020
202 | AB Bone defects caused by many factors prompt further study of pathological
203 |    process and restoration methods. This study was aimed to clarify the
204 |    effect of erythropoietin on the repair of bone defect. We added the
205 |    designated concentration of rhEPO to endothelial progenitor cells and
206 |    marrow stromal cells, then detected its osteogenic and angiogenesis
207 |    effects. The results showed that rhEPO promoted the proliferation of EPC
208 |    and ST2 by promoting the mitosis without affecting cell apoptosis. The
209 |    protein and mRNA levels of angiogenesis and osteogenic related factors
210 |    exhibited higher expressions. Additionally, rhEPO encapsulated in PLGA
211 |    scaffolds accelerated the new bone formation in rat calvaria bone defect
212 |    model. Since the centre of bone defect was hypoxia environment, we
213 |    cultured EPC and ST2 under hypoxia. SiRNA and an inhibitor of HIF-1 were
214 |    used to interfere HIF-1, then the following changes of VEGF and EPO were
215 |    detected. The results showed that all the factors were upregulated under
216 |    the hypoxia environment. The expression of VEGF at protein and mRNA
217 |    level decreased as HIF-1 was inhibited or interfered from 6h, while the
218 |    mRNA expression of EPO from 6h and changed significantly at protein
219 |    level from 12h. Therefore, EPO is a promising factor for further
220 |    studies.
221 | ZR 0
222 | ZS 0
223 | TC 0
224 | Z8 0
225 | ZB 0
226 | Z9 0
227 | EI 2169-141X
228 | UT MEDLINE:31851837
229 | PM 31851837
230 | ER
231 | 
232 | EF


--------------------------------------------------------------------------------
/tests/testthat/testdata/WoS_txt_example.txt:
--------------------------------------------------------------------------------
  1 | FN Clarivate Analytics Web of Science
  2 | VR 1.0
  3 | PT J
  4 | AU Li, Wei
  5 |    Dou, Zhiguo
  6 |    Cui, Lijuan
  7 |    Zhao, Xinsheng
  8 |    Zhang, Manyin
  9 |    Zhang, Yan
 10 |    Gao, Changjun
 11 |    Yang, Zheng
 12 |    Lei, Yinru
 13 |    Pan, Xu
 14 | TI Soil fauna diversity at different stages of reed restoration in a
 15 |    lakeshore wetland at Lake Taihu, China
 16 | SO ECOSYSTEM HEALTH AND SUSTAINABILITY
 17 | VL 6
 18 | IS 1
 19 | AR UNSP 1722034
 20 | DI 10.1080/20964129.2020.1722034
 21 | PD DEC 16 2020
 22 | PY 2020
 23 | AB Introduction: Wetland soil fauna support material cycling and
 24 |    restoration processes in wetland ecosystems. In our study, we observed
 25 |    variations in wetland soil fauna on the shores of Lake Taihu, China. We
 26 |    examined the relationships between fauna and major environmental
 27 |    factors, and looked at the short-and long-term changes in reed wetlands
 28 |    under restoration and in the natural reed lakeshore. Outcomes: We
 29 |    identified 93 groups of soil fauna in different wetlands and found
 30 |    significant differences in the lakeshore wetlands' soil fauna
 31 |    assemblages, depending on the length of the restoration period. By
 32 |    analyzing the soil fauna community evenness, dominance, number of taxa,
 33 |    and diversity, we found minimal seasonal variation in the soil fauna
 34 |    community diversity and abundance. The abundance of soil fauna in the
 35 |    sites under restoration decreased with depth below the soil surface. The
 36 |    reed restoration was obvious in the succession of the soil fauna groups
 37 |    in the long-term site. Although the restoration had an overall positive
 38 |    long-term effect on the soil fauna communities, there were no obvious
 39 |    short-term changes in the number of individuals. Conclusion: The study
 40 |    explored various potential measures to restore soil fauna in the Lake
 41 |    Taihu wetland and developed a theoretical basis for restoring the
 42 |    lakeshore wetland ecosystem.
 43 | RI Zhang, Manyin/AAK-5171-2020; LI, WEI/; Dou, Zhiguo/
 44 | OI LI, WEI/0000-0002-2133-9287; Dou, Zhiguo/0000-0001-8031-4330
 45 | ZB 0
 46 | ZR 0
 47 | ZS 0
 48 | TC 0
 49 | Z8 0
 50 | Z9 0
 51 | SN 2096-4129
 52 | EI 2332-8878
 53 | UT WOS:000514379300001
 54 | ER
 55 | 
 56 | PT J
 57 | AU Higgins, Kathleen Marie
 58 | TI Aesthetics and the Containment of Grief
 59 | SO JOURNAL OF AESTHETICS AND ART CRITICISM
 60 | VL 78
 61 | IS 1
 62 | BP 9
 63 | EP 20
 64 | DI 10.1111/jaac.12686
 65 | PD DEC 2020
 66 | PY 2020
 67 | AB My point of departure is the observation that people ubiquitously turn
 68 |    to aesthetic practices in response to the loss of a loved one. I argue
 69 |    that profound loss catapults the bereaved person into an alternate
 70 |    "world" that differs in marked ways from the world we usually occupy, an
 71 |    alternate world lacking even the basic coherence we need to function.
 72 |    Aesthetic practices facilitate restoration of coherence to our
 73 |    experience, as well as reconnection with the social world and recovery
 74 |    from the breakdown that profound loss involves. While the aesthetic
 75 |    notion of closure is frequently invoked in connection with the needs of
 76 |    the bereaved, I suggest that while containing the emotions experienced
 77 |    in connection with loss is vital if they are to be processed,
 78 |    unrealistic aspirations toward closure can encourage expectations that
 79 |    harm the bereaved. By contrast, I suggest that the aims of aesthetically
 80 |    punctuating experience and communicating through aesthetic gestures are
 81 |    beneficial for helping the bereaved adjust to their new circumstances.
 82 | TC 0
 83 | ZS 0
 84 | Z8 0
 85 | ZR 0
 86 | ZB 0
 87 | Z9 0
 88 | SN 0021-8529
 89 | EI 1540-6245
 90 | UT WOS:000510870600001
 91 | ER
 92 | 
 93 | PT J
 94 | AU Tang, Hui
 95 |    Geng, Guohua
 96 |    Zhou, Mingquan
 97 | TI Application of Digital Processing in Relic Image Restoration Design
 98 | SO SENSING AND IMAGING
 99 | VL 21
100 | IS 1
101 | AR 6
102 | DI 10.1007/s11220-019-0265-8
103 | PD DEC 2020
104 | PY 2020
105 | AB Cultural relic is the carrier of human historic culture, which can
106 |    reflect the cultural and social environment, but cultural relics as a
107 |    material will be damaged over time. Before the advent of computer
108 |    technology, the damaged cultural relics would not be repaired due to
109 |    cost. Computer vision technology has been applied to the restoration of
110 |    cultural relics, mainly for the virtual restoration of damaged cultural
111 |    relics images. This paper briefly introduced the Criminisi image
112 |    restoration algorithm and the structure tensor used to improve the
113 |    algorithm in the digital cultural relics image restoration. A damaged
114 |    cultural relics image and a complete image which was damaged by human
115 |    were repaired respectively using the classical Criminisi image
116 |    restoration algorithm and the improved structure tensor based repair
117 |    algorithm on MATLAB software. The results showed that the Criminisi
118 |    image restoration algorithm could be used to repair the damaged images
119 |    of ancient fabrics. It was found that the classical image restoration
120 |    algorithm had some shortcomings, such as inappropriate texture
121 |    structure, obvious repair marks and addition of redundant information,
122 |    but the improved algorithm effectively avoided the above shortcomings.
123 |    The peak signal to noise ratio (SNR) of the complete image which was
124 |    damaged by human was compared objectively, and it was found that the
125 |    improved algorithm had better restoration performance.
126 | Z8 0
127 | ZS 0
128 | TC 0
129 | ZB 0
130 | ZR 0
131 | Z9 0
132 | SN 1557-2064
133 | EI 1557-2072
134 | UT WOS:000502504100001
135 | ER
136 | 
137 | PT J
138 | AU Zhang, Xinrong
139 |    Zhang, Lei
140 |    Chen, Ming
141 |    Liu, Dongying
142 | TI miR-324-5p inhibits gallbladder carcinoma cell metastatic behaviours by
143 |    downregulation of transforming growth factor beta 2 expression.
144 | SO Artificial cells, nanomedicine, and biotechnology
145 | VL 48
146 | IS 1
147 | BP 315
148 | EP 324
149 | DI 10.1080/21691401.2019.1703724
150 | PD 2020-Dec
151 | PY 2020
152 | AB Increasing studies have demonstrated that microRNAs (miRNAs) are
153 |    associated with the metastasis of gallbladder carcinoma (GBC). Recently,
154 |    miR-324-5p has been reported to be a tumour-suppressive miRNA in many
155 |    types of malignant cancer. However, the biological function and
156 |    molecular mechanism of miR-324-5p in GBC still remain largely unknown.
157 |    Here, we found that miR-324-5p expression was notably down-regulated in
158 |    both GBC tissues and cells compared with that in normal controls.
159 |    Downregulated miR-324-5p expression was negatively associated with the
160 |    status of local invasion and lymph node metastasis and predicted a poor
161 |    prognosis in GBC patients. Further functional assays revealed that
162 |    restoration of miR-324-5p significantly suppressed GBC cell migration,
163 |    invasion and epithelial-mesenchymal transition (EMT) invitro and impeded
164 |    the metastasis of GBC cells invivo. Moreover, RNA immunoprecipitation
165 |    (RIP) and dual-luciferase reporter assay confirmed that the transforming
166 |    growth factor beta 2 (TGFB2) was a direct target gene of miR-324-5p in
167 |    GBC cells. Mechanically, small interfering RNA (siRNA)-mediated
168 |    knockdown of TGFB2 partially phenocopied the inhibitory effects of
169 |    miR-324-5p overexpression on GBC cell metastatic phenotypes. In summary,
170 |    our findings demonstrated that miR-324-5p targets TGFB2 expression to
171 |    inhibit GBC cell metastatic behaviors, and implying miR-324-5p as a
172 |    potential biomarker for diagnostic and therapeutic strategies in GBC.
173 | ZB 0
174 | ZR 0
175 | Z8 0
176 | ZS 0
177 | TC 0
178 | Z9 0
179 | EI 2169-141X
180 | UT MEDLINE:31858815
181 | PM 31858815
182 | ER
183 | 
184 | EF


--------------------------------------------------------------------------------
/tests/testthat/testdata/eviatlas.txt:
--------------------------------------------------------------------------------
 1 | TY  - JOUR
 2 | AU  - Haddaway, Neal R.
 3 | AU  - Feierman, Andrew
 4 | AU  - Grainger, Matthew J.
 5 | AU  - Gray, Charles T.
 6 | AU  - Tanriver-Ayder, Ezgi
 7 | AU  - Dhaubanjar, Sanita
 8 | AU  - Westgate, Martin J.
 9 | PY  - 2019
10 | DA  - 2019/06/04
11 | TI  - EviAtlas: a tool for visualising evidence synthesis databases
12 | JO  - Environmental Evidence
13 | SP  - 22
14 | VL  - 8
15 | IS  - 1
16 | SN  - 2047-2382
17 | UR  - https://doi.org/10.1186/s13750-019-0167-1
18 | DO  - 10.1186/s13750-019-0167-1
19 | ID  - Haddaway2019
20 | ER  - 
21 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/litsearchr.txt:
--------------------------------------------------------------------------------
 1 | @article{grames2019automated,
 2 |   title={An automated approach to identifying search terms for systematic reviews using keyword co-occurrence networks},
 3 |   author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S},
 4 |   journal={Methods in Ecology and Evolution},
 5 |   volume={10},
 6 |   number={10},
 7 |   pages={1645--1654},
 8 |   year={2019},
 9 |   publisher={Wiley Online Library}
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/res_synth_methods.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | PMID- 32336025
  3 | OWN - NLM
  4 | STAT- Publisher
  5 | LR  - 20200426
  6 | IS  - 1759-2887 (Electronic)
  7 | IS  - 1759-2879 (Linking)
  8 | DP  - 2020 Apr 26
  9 | TI  - Risk-Of-Bias VISualization (robvis): an R package and Shiny web app for
 10 |       visualizing risk-of-bias assessments.
 11 | LID - 10.1002/jrsm.1411 [doi]
 12 | AB  - Despite a major increase in the range and number of software offerings now
 13 |       available to help researchers produce evidence syntheses, there is currently no
 14 |       generic tool for producing figures to display and explore the risk-of-bias
 15 |       assessments that routinely take place as part of systematic review. However,
 16 |       tools such as the R programming environment and Shiny (an R package for building 
 17 |       interactive web apps) have made it straightforward to produce new tools to help
 18 |       in producing evidence syntheses. We present a new tool, robvis (Risk-Of-Bias
 19 |       VISualization), available as an R package and web app, which facilitates rapid
 20 |       production of publication-quality risk-of-bias assessment figures. We present a
 21 |       timeline of the tool's development and its key functionality. This article is
 22 |       protected by copyright. All rights reserved.
 23 | CI  - This article is protected by copyright. All rights reserved.
 24 | FAU - McGuinness, Luke A
 25 | AU  - McGuinness LA
 26 | AUID- ORCID: https://orcid.org/0000-0001-8730-9761
 27 | AD  - MRC Integrative Epidemiology Unit at the University of Bristol, Bristol, UK.
 28 | AD  - Population Health Sciences, Bristol Medical School, University of Bristol,
 29 |       Bristol, UK.
 30 | FAU - Higgins, Julian Pt
 31 | AU  - Higgins JP
 32 | AD  - MRC Integrative Epidemiology Unit at the University of Bristol, Bristol, UK.
 33 | AD  - Population Health Sciences, Bristol Medical School, University of Bristol,
 34 |       Bristol, UK.
 35 | LA  - eng
 36 | PT  - Journal Article
 37 | DEP - 20200426
 38 | PL  - England
 39 | TA  - Res Synth Methods
 40 | JT  - Research synthesis methods
 41 | JID - 101543738
 42 | SB  - IM
 43 | OTO - NOTNLM
 44 | OT  - Data visualization
 45 | OT  - Evidence synthesis
 46 | OT  - R
 47 | OT  - Risk of bias
 48 | EDAT- 2020/04/27 06:00
 49 | MHDA- 2020/04/27 06:00
 50 | CRDT- 2020/04/27 06:00
 51 | PHST- 2020/02/27 00:00 [received]
 52 | PHST- 2020/04/16 00:00 [revised]
 53 | PHST- 2020/04/18 00:00 [accepted]
 54 | PHST- 2020/04/27 06:00 [entrez]
 55 | PHST- 2020/04/27 06:00 [pubmed]
 56 | PHST- 2020/04/27 06:00 [medline]
 57 | AID - 10.1002/jrsm.1411 [doi]
 58 | PST - aheadofprint
 59 | SO  - Res Synth Methods. 2020 Apr 26. doi: 10.1002/jrsm.1411.
 60 | 
 61 | PMID- 31355546
 62 | OWN - NLM
 63 | STAT- In-Process
 64 | LR  - 20200226
 65 | IS  - 1759-2887 (Electronic)
 66 | IS  - 1759-2879 (Linking)
 67 | VI  - 10
 68 | IP  - 4
 69 | DP  - 2019 Dec
 70 | TI  - revtools: An R package to support article screening for evidence synthesis.
 71 | PG  - 606-614
 72 | LID - 10.1002/jrsm.1374 [doi]
 73 | AB  - The field of evidence synthesis is growing rapidly, with a corresponding increase
 74 |       in the number of software tools and workflows to support the construction of
 75 |       systematic reviews, systematic maps, and meta-analyses. Despite much progress,
 76 |       however, a number of problems remain, including slow integration of new
 77 |       statistical or methodological approaches into user-friendly software, low
 78 |       prevalence of open-source software, and poor integration among distinct software 
 79 |       tools. These issues hinder the utility and transparency of new methods to the
 80 |       research community. Here, I present revtools, an R package to support article
 81 |       screening during evidence synthesis projects. It provides tools for the import
 82 |       and deduplication of bibliographic data, screening of articles by title or
 83 |       abstract, and visualization of article content using topic models. The software
 84 |       is entirely open-source and combines command-line scripting for experienced
 85 |       programmers with custom-built user interfaces for casual users, with further
 86 |       methods to support article screening to be added over time. revtools provides
 87 |       free access to novel methods in an open-source environment and represents a
 88 |       valuable step in expanding the capacity of R to support evidence synthesis
 89 |       projects.
 90 | CI  - (c) 2019 John Wiley & Sons, Ltd.
 91 | FAU - Westgate, Martin J
 92 | AU  - Westgate MJ
 93 | AUID- ORCID: https://orcid.org/0000-0003-0854-2034
 94 | AD  - Fenner School of Environment & Society, The Australian National University,
 95 |       Acton, ACT, Australia.
 96 | LA  - eng
 97 | PT  - Journal Article
 98 | DEP - 20191018
 99 | PL  - England
100 | TA  - Res Synth Methods
101 | JT  - Research synthesis methods
102 | JID - 101543738
103 | SB  - IM
104 | OTO - NOTNLM
105 | OT  - data visualization
106 | OT  - meta-analysis
107 | OT  - natural language processing
108 | OT  - systematic review
109 | OT  - topic models
110 | EDAT- 2019/07/30 06:00
111 | MHDA- 2019/07/30 06:00
112 | CRDT- 2019/07/30 06:00
113 | PHST- 2019/02/25 00:00 [received]
114 | PHST- 2019/06/12 00:00 [revised]
115 | PHST- 2019/07/23 00:00 [accepted]
116 | PHST- 2019/07/30 06:00 [pubmed]
117 | PHST- 2019/07/30 06:00 [medline]
118 | PHST- 2019/07/30 06:00 [entrez]
119 | AID - 10.1002/jrsm.1374 [doi]
120 | PST - ppublish
121 | SO  - Res Synth Methods. 2019 Dec;10(4):606-614. doi: 10.1002/jrsm.1374. Epub 2019 Oct 
122 |       18.
123 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/overview.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Overview"
  3 | author: Martin Westgate & Eliza Grames
  4 | date: 2025-02-21
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Overview}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | ```{r, include = FALSE}
 13 | knitr::opts_chunk$set(
 14 |   collapse = TRUE,
 15 |   comment = "#>"
 16 | )
 17 | ```
 18 | 
 19 | ## Introduction
 20 | 
 21 | Systematic review searches include multiple databases that export results in a 
 22 | variety of formats with overlap in coverage between databases. To streamline the 
 23 | process of importing, assembling, and deduplicating results, `synthesisr` 
 24 | recognizes bibliographic files exported from databases commonly used for 
 25 | systematic reviews and merges results into a standardized format.  
 26 | 
 27 | ## Read and assemble bibliographic files
 28 | 
 29 | `synthesisr` can read any BibTex or RIS formatted bibliographic data files. It 
 30 | detects whether files are more bib-like or ris-like and imports them 
 31 | accordingly. Note that files from some databases may contain non-standard fields 
 32 | or non-standard characters that cause import failure in rare cases; if this 
 33 | happens, we recommend converting the file in open source bibliographic 
 34 | management software such as Zotero.
 35 | 
 36 | In the code below, we will demonstrate how to read and assemble bibliographic 
 37 | data files with example datasets included in the `synthesisr` package. Note that 
 38 | if you are using the code with your own data, you will not need to use 
 39 | `system.file()` and instead will want to pass a character vector of the path(s) 
 40 | to the file(s) you want to import. For example, if you have saved all your 
 41 | search results in a directory called "search_results", you may want to use 
 42 | `list.files("./search_results/")` instead.
 43 | 
 44 | ```{r}
 45 | #| eval: false
 46 | # system.file will look for the path to where synthesisr is installed
 47 | # by using the example bibliographic data files, you can reproduce the vignette
 48 | bibfiles <- list.files(
 49 |   system.file("extdata/", package = "synthesisr"),
 50 |   full.names = TRUE
 51 | )
 52 | 
 53 | # we can print the list of bibfiles to confirm what we will import
 54 | # in this example, we have bibliographic data exported from Scopus and Zoological Record
 55 | print(bibfiles)
 56 | 
 57 | # now we can use read_refs to read in our bibliographic data files
 58 | # we save them to a data.frame object (because return_df=TRUE) called imported_files
 59 | library(synthesisr)
 60 | imported_files <- read_refs(
 61 |   filename = bibfiles,
 62 |   return_df = TRUE)
 63 | 
 64 | ```
 65 | 
 66 | ## Deduplicate bibliographic data
 67 | 
 68 | Many journals are indexed in multiple databases, so searching across databases 
 69 | will retrieve duplicates. After import, `synthesisr` can detect duplicates and 
 70 | retain only unique bibliographic records using a variety of methods such as 
 71 | string distance or fuzzy matching records. A good place to start is removing 
 72 | articles that have identical titles, especially since this reduces computational 
 73 | time for more sophisticated deduplication methods.
 74 | 
 75 | ```{r}
 76 | #| eval: false
 77 | ## first, we will remove articles that have identical titles
 78 | ## this is a fairly conservative approach, so we will remove them without review
 79 | # df <- deduplicate(
 80 | #   imported_files,
 81 | #   match_by = "title",
 82 | #   method = "exact"
 83 | # )
 84 | 
 85 | ```
 86 | 
 87 | In some cases, it may be useful to know which articles were identified as 
 88 | duplicates so they can be manually reviewed or so that information from two 
 89 | records can be merged. Using our partially-deduplicated dataset, we check a few 
 90 | titles and use string distance methods to find additional duplicate articles in 
 91 | the code below and then remove them by extracting unique references. Although 
 92 | here we only use one secondary deduplication method (string distance), we could 
 93 | look for additional duplicates based on fuzzy matching abstracts, for example.
 94 | 
 95 | ## NOTE: the examples below don't match now; need updating
 96 | 
 97 | ```{r}
 98 | #| eval: false
 99 | # there are still some duplicate articles that were not removed
100 | # for example, the titles for articles 91 and 114 appear identical
101 | ## df$title[c(91,114)]
102 | # the dash-like symbol in title 91, however, is a special character not punctuation
103 | # so it was not classified as identical
104 | 
105 | # similarly, there is a missing space in the title for article 96
106 | ## df$title[c(21,96)]
107 | 
108 | # and an extra space in title 47
109 | ## df$title[c(47, 101)]
110 | 
111 | # # in this example, we will use string distance to identify likely duplicates
112 | # duplicates_string <- find_duplicates(
113 | #   df$title,
114 | #   method = "string_osa",
115 | #   to_lower = TRUE,
116 | #   rm_punctuation = TRUE,
117 | #   threshold = 7
118 | # )
119 | 
120 | # we can extract the line numbers from the dataset that are likely duplicated
121 | # this lets us manually review those titles to confirm they are duplicates
122 | 
123 | # manual_checks <- review_duplicates(df$title, duplicates_string)
124 | 
125 | ```
126 | 
127 | ```{r, include=FALSE, eval=TRUE}
128 | #| eval: false
129 | # manual_checks[,1] <- substring(manual_checks[,1], 1, 60)
130 | # 
131 | # print(manual_checks[1:10, ])
132 | ```
133 | 
134 | 
135 | ```{r}
136 | #| eval: false
137 | # # the titles under match #99 are not duplicates, so we need to keep them both
138 | # # we can use the override_duplicates function to manually mark them as unique
139 | # new_duplicates <- synthesisr::override_duplicates(duplicates_string, 99)
140 | # 
141 | # # now we can extract unique references from our dataset
142 | # # we need to pass it the dataset (df) and the matching articles (new_duplicates)
143 | # results <- extract_unique_references(df, new_duplicates)
144 | 
145 | ```
146 | 
147 | ## Write bibliographic files
148 | 
149 | To facilitate exporting results to other platforms after assembly and 
150 | deduplication, `synthesisr` can write bibliographic data to .ris or .bib files. 
151 | Optionally, `write_refs()` can write directly to a text file stored locally.
152 | 
153 | ```{r}
154 | #| paged.print: TRUE
155 | #| eval: false
156 | # # synthesisr can write the full dataset to a bibliographic file
157 | # # but in this example, we will just write the first citation
158 | # # we also want it to be a nice clean bibliographic file, so we remove NA data
159 | # # this makes it easier to view the output when working with a single article
160 | # citation <- df[1, !is.na(df[1,])]
161 | # 
162 | # format_citation(citation)
163 | # 
164 | # write_refs(citation,
165 | #   format = "bib",
166 | #   file = FALSE
167 | # )
168 | 
169 | ```
170 | 


--------------------------------------------------------------------------------