├── .DS_Store ├── .Rbuildignore ├── .github ├── .gitignore ├── r-depends.rds └── workflows │ ├── R-cmd-check.yaml_OLD │ ├── R-cmd-check.yml │ ├── deploy.yml_OLD │ ├── document-and-deploy-manual.yml │ ├── document-and-deploy-r2u.yaml_OLD │ ├── document-and-deploy.yml │ ├── pkgdown.yaml_OLD │ └── pr-commands.yaml ├── .gitignore ├── CiteSource.Rproj ├── DESCRIPTION ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── .DS_Store ├── CiteSource.R ├── compare.R ├── count.R ├── dedup.R ├── export.R ├── helpers.R ├── import.R ├── import_export_helpers.R ├── new_count_and_table.R ├── plots.R ├── reimport.R ├── runShiny.R ├── sysdata.rda └── tables.R ├── README.md ├── _pkgdown.yml ├── cran-comments.md ├── inst ├── .DS_Store ├── extdata │ └── examplecitations.rds └── shiny-app │ └── CiteSource │ ├── app.R │ ├── google_analytics_dev.html │ ├── google_analytics_main.html │ └── www │ ├── CS.gif │ ├── about.md │ ├── favicon.png │ ├── use-cases.md │ └── user_guide.md ├── man ├── CiteSource-package.Rd ├── calculate_detailed_records.Rd ├── calculate_initial_records.Rd ├── calculate_phase_count.Rd ├── calculate_phase_records.Rd ├── calculate_record_counts.Rd ├── citation_summary_table.Rd ├── compare_sources.Rd ├── count_unique.Rd ├── create_detailed_record_table.Rd ├── create_initial_record_table.Rd ├── create_precision_sensitivity_table.Rd ├── dedup_citations.Rd ├── dedup_citations_add_manual.Rd ├── detect_.Rd ├── export_bib.Rd ├── export_csv.Rd ├── export_ris.Rd ├── merge_columns.Rd ├── parse_.Rd ├── pipe.Rd ├── plot_contributions.Rd ├── plot_source_overlap_heatmap.Rd ├── plot_source_overlap_upset.Rd ├── precision_sensitivity_table.Rd ├── read_citations.Rd ├── record_counts.Rd ├── record_counts_table.Rd ├── record_level_table.Rd ├── record_summary_table.Rd ├── reimport_csv.Rd ├── reimport_ris.Rd ├── runShiny.Rd ├── synthesisr_read_refs.Rd └── write_refs.Rd ├── renv ├── .gitignore ├── activate.R └── settings.json ├── tests ├── .DS_Store ├── shinytest │ ├── test_1.ris │ ├── test_2.ris │ └── test_3.ris ├── testthat.R └── testthat │ ├── .DS_Store │ ├── data │ └── 1_WoS.ris │ ├── test-import.R │ └── test-tables.R └── vignettes ├── .DS_Store ├── benchmark_data ├── Benchmarking.ris ├── Search1_1.ris ├── Search2_1.ris ├── Search2_2.ris ├── Search2_3.ris ├── Search2_4.ris ├── Search3_1.ris ├── Search3_2.ris ├── Search3_3.ris ├── Search3_4.ris ├── Search3_5.ris ├── Search4_1.ris ├── Search4_2.ris ├── Search4_3.ris ├── Search5_1.ris ├── Search5_2.ris ├── Search5_3.ris ├── Search5_4.ris ├── Search5_5.ris ├── Search5_6.ris ├── Search5_7.ris ├── Search5_8.ris ├── Search6_1.ris ├── Search6_2.ris ├── Search6_3.ris └── Search6_4.ris ├── citesource_analysis_across_screening_phases.rmd ├── citesource_benchmark_testing.Rmd ├── citesource_new_benchmark_testing.Rmd ├── citesource_vignette_db-pre-screen_validation.Rmd ├── citesource_vignette_db-topic-coverage.Rmd ├── citesource_working_example.rmd ├── new_benchmark_data ├── benchmark_15.ris ├── search1_166.ris ├── search2_278.ris ├── search3_302.ris ├── search4_460.ris └── search5_495.ris ├── new_stage_data ├── Dimensions_246.ris ├── econlit_3.ris ├── envindex_100.ris ├── final_24.ris ├── lens_343.ris ├── medline_84.ris ├── screened_128.ris └── wos_278.ris ├── topic_data ├── .DS_Store ├── 20221207_gambling-harms_crimjust_41.ris ├── 20221207_gambling-harms_lens_49.ris ├── 20221207_gambling-harms_psycinfo_124.ris ├── 20221207_gambling-harms_pubmed_176.ris └── 20221207_gambling-harms_scopus_255.ris ├── valid_data ├── WoS_79.ris ├── benchmark.ris ├── psycinfo_64.ris └── pubmed_46.ris ├── vignette_exports ├── analysis_across_phases_shiny_export.csv ├── analysis_across_phases_shiny_export.ris ├── citesource_benchmark_export.csv └── citesource_benchmark_export.ris └── working_example_data ├── AGRIS.ris ├── CAB.ris ├── EconLit.ris ├── Final.ris ├── GreenFile.ris ├── McK.ris ├── RM.ris ├── TiAb.ris ├── WoS_early.ris └── WoS_later.ris /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/.DS_Store -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^renv$ 2 | ^renv\.lock$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^LICENSE\.md$ 6 | ^\.github$ 7 | ^pkgdown$ 8 | ^_pkgdown.yml$ 9 | ^cran-comments\.md$ 10 | ^\.httr-oauth$ 11 | ^docs$ 12 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | /man -------------------------------------------------------------------------------- /.github/r-depends.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/.github/r-depends.rds -------------------------------------------------------------------------------- /.github/workflows/R-cmd-check.yaml_OLD: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [main, master] 4 | pull_request: 5 | branches: [main, master] 6 | 7 | name: R-CMD-check 8 | 9 | jobs: 10 | document-and-dispatch: 11 | name: document 12 | runs-on: ubuntu-latest 13 | env: 14 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 15 | steps: 16 | - uses: actions/checkout@v2 17 | - uses: r-lib/actions/setup-r@v2 18 | with: 19 | use-public-rspm: true 20 | r-version: 'release' 21 | - name: Delete-Namespace 22 | run: Rscript -e 'file.remove("NAMESPACE")' 23 | - uses: r-lib/actions/setup-pandoc@v2 24 | - name: system dependencies 25 | run: sudo apt-get install libcurl4-openssl-dev libnode-dev 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: any::rcmdcheck, any::roxygen2, any::devtools 29 | needs: check 30 | - uses: xu-cheng/texlive-action@v2 31 | - name: Update date 32 | run: Rscript -e 'descr <- readLines("DESCRIPTION")' -e 'descr <- stringr::str_replace(descr, "^Date.*$", paste("Date:", Sys.Date()))' -e 'writeLines(descr, "DESCRIPTION")' 33 | - name: Document 34 | run: Rscript -e 'roxygen2::roxygenise()' 35 | - name: Manual 36 | continue-on-error: true 37 | run: Rscript -e 'devtools::build_manual()' 38 | 39 | - name: commit 40 | run: | 41 | git config --local user.email "actions@github.com" 42 | git config --local user.name "GitHub Actions" 43 | git add -f man/\* NAMESPACE 44 | git commit -m 'Documentation' || echo "No changes to commit" 45 | git push origin || echo "No changes to commit" 46 | 47 | 48 | - uses: r-lib/actions/check-r-package@v2 49 | -------------------------------------------------------------------------------- /.github/workflows/R-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # Run CI for R using https://eddelbuettel.github.io/r-ci/ 2 | 3 | name: R-CMD-Check 4 | 5 | on: 6 | workflow_run: 7 | workflows: ["document-and-deploy"] 8 | branches: [main] 9 | types: 10 | - completed 11 | workflow_dispatch: 12 | 13 | env: 14 | _R_CHECK_FORCE_SUGGESTS_: "false" 15 | 16 | jobs: 17 | R-CMD-check: 18 | strategy: 19 | matrix: 20 | include: 21 | - {os: macOS-latest} 22 | - {os: ubuntu-latest} 23 | 24 | runs-on: ${{ matrix.os }} 25 | 26 | steps: 27 | - name: Checkout 28 | uses: actions/checkout@v4 29 | 30 | - uses: r-lib/actions/setup-pandoc@v2 31 | 32 | - name: Setup 33 | uses: eddelbuettel/github-actions/r-ci-setup@master 34 | 35 | - name: Bootstrap 36 | run: ./run.sh bootstrap 37 | 38 | - name: Install rcmdcheck 39 | env: 40 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 41 | run: | 42 | R -e " 43 | install.packages(c('rcmdcheck', 'devtools')); 44 | devtools::install_local() 45 | " 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml_OLD: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: [released] 4 | pull_request: 5 | types: [opened, synchronize, reopened] 6 | push: 7 | branches: 8 | - main 9 | - master 10 | workflow_dispatch: 11 | 12 | name: Deploy 13 | 14 | jobs: 15 | deploy-latest: 16 | runs-on: ubuntu-latest 17 | if: github.event_name == 'push' 18 | steps: 19 | - name: Checkout code 20 | uses: actions/checkout@v4 21 | - name: Set up R2u (faster dependency installation) 22 | uses: eddelbuettel/github-actions/r2u-setup@master 23 | - name: Install dependencies and deploy 24 | run: | 25 | R -e " 26 | install.packages(c('shiny', 'rsconnect')); 27 | if (!require('remotes')) install.packages('remotes'); 28 | if (!require('gitcreds')) install.packages('gitcreds'); 29 | gitcreds::gitcreds_delete(url = 'https://github.com'); 30 | remotes::install_github('ESHackathon/CiteSource'); 31 | rsconnect::setAccountInfo(name='${{secrets.SHINY_LUKAS_ACCOUNT}}', token='${{secrets.SHINY_LUKAS_TOKEN}}', secret='${{secrets.SHINY_LUKAS_SECRET}}'); 32 | rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource')" 33 | 34 | env: 35 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 36 | 37 | deploy-release: 38 | runs-on: ubuntu-latest 39 | if: github.event_name == 'release' && github.event.release.tag_name != '*test*' 40 | steps: 41 | - name: Checkout code 42 | uses: actions/checkout@v4 43 | - name: Set up R2u (faster dependency installation) 44 | uses: eddelbuettel/github-actions/r2u-setup@master 45 | - name: Install dependencies and deploy 46 | run: | 47 | R -e " 48 | install.packages(c('shiny', 'rsconnect')); 49 | if (!require('remotes')) install.packages('remotes'); 50 | remotes::install_github('ESHackathon/CiteSource'); 51 | gitcreds_delete(url = 'https://github.com'); 52 | rsconnect::setAccountInfo(name='${{secrets.SHINY_LUKAS_ACCOUNT}}', token='${{secrets.SHINY_LUKAS_TOKEN}}', secret='${{secrets.SHINY_LUKAS_SECRET}}'); 53 | rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource')" 54 | 55 | env: 56 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 57 | -------------------------------------------------------------------------------- /.github/workflows/document-and-deploy-manual.yml: -------------------------------------------------------------------------------- 1 | # Run CI for R using https://eddelbuettel.github.io/r-ci/ 2 | 3 | name: document-and-deploy-manual 4 | 5 | on: 6 | workflow_dispatch: 7 | inputs: 8 | rversion: 9 | description: 'R version to use (compatible with shinyapps.io)' 10 | required: true 11 | default: '4.4.1' 12 | 13 | env: 14 | _R_CHECK_FORCE_SUGGESTS_: "false" 15 | 16 | jobs: 17 | document-and-deploy-manual: 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v4 23 | 24 | - uses: r-lib/actions/setup-pandoc@v2 25 | 26 | - name: Set up R version 27 | uses: r-lib/actions/setup-r@v2 28 | with: 29 | r-version: ${{ github.event.inputs.rversion }} 30 | use-public-rspm: true 31 | 32 | - name: Set up R Dependencies 33 | uses: r-lib/actions/setup-r-dependencies@v2 34 | with: 35 | extra-packages: devtools, plogr, roxygen2, remotes, rsconnect, pkgdown 36 | 37 | - name: Create documentation 38 | run: | 39 | R -e " 40 | file.remove('NAMESPACE'); 41 | descr <- readLines('DESCRIPTION'); 42 | descr <- stringr::str_replace(descr, '^Date.*$', paste('Date:', Sys.Date())); 43 | writeLines(descr, 'DESCRIPTION'); 44 | roxygen2::roxygenise(); 45 | try(devtools::build_manual()) 46 | " 47 | 48 | - name: commit 49 | run: | 50 | git config --local user.email "actions@github.com" 51 | git config --local user.name "GitHub Actions" 52 | git add -f man/\* NAMESPACE 53 | git commit -m 'Documentation' || echo "No changes to commit" 54 | git push origin || echo "No changes to commit" 55 | 56 | - name: Deploy latest from dev 57 | if: github.ref == 'refs/heads/dev' 58 | env: 59 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 60 | run: | 61 | R -e " 62 | remotes::install_github('ESHackathon/CiteSource', force = TRUE); 63 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); 64 | rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)" 65 | 66 | - name: Deploy stable version from main 67 | if: github.ref == 'refs/heads/main' 68 | env: 69 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 70 | run: | 71 | R -e " 72 | remotes::install_github('ESHackathon/CiteSource', force = TRUE); 73 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); 74 | rsconnect::deployApp(appName = 'CiteSource', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)" 75 | 76 | - name: Create pkgdown 77 | env: 78 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 79 | run: | 80 | R -e " 81 | if (!require(CiteSource)) remotes::install_github('ESHackathon/CiteSource', force = TRUE); 82 | pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)" 83 | 84 | - name: Deploy to GitHub pages 🚀 85 | if: ${{ github.ref_name }} == 'main' || ${{ github.ref_name }} == 'master' 86 | uses: JamesIves/github-pages-deploy-action@v4 87 | with: 88 | clean: true 89 | branch: gh-pages 90 | folder: docs 91 | -------------------------------------------------------------------------------- /.github/workflows/document-and-deploy-r2u.yaml_OLD: -------------------------------------------------------------------------------- 1 | # Run CI for R using https://eddelbuettel.github.io/r-ci/ 2 | 3 | name: document-and-deploy 4 | 5 | on: 6 | push: 7 | pull_request: 8 | release: 9 | 10 | env: 11 | _R_CHECK_FORCE_SUGGESTS_: "false" 12 | 13 | jobs: 14 | document-and-deploy: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-pandoc@v2 22 | 23 | - name: Setup 24 | uses: eddelbuettel/github-actions/r-ci-setup@master 25 | 26 | - name: Bootstrap 27 | run: ./run.sh bootstrap 28 | 29 | #- name: Dependencies 30 | # run: ./run.sh install_deps 31 | 32 | - name: All Dependencies 33 | run: ./run.sh install_all 34 | 35 | - name: Create documentation 36 | run: | 37 | R -e " 38 | install.packages(c('rcmdcheck', 'roxygen2', 'devtools')); 39 | file.remove('NAMESPACE'); 40 | descr <- readLines('DESCRIPTION'); 41 | descr <- stringr::str_replace(descr, '^Date.*$', paste('Date:', Sys.Date())); 42 | writeLines(descr, 'DESCRIPTION'); 43 | roxygen2::roxygenise(); 44 | try(devtools::build_manual()) 45 | " 46 | 47 | - name: commit 48 | run: | 49 | git config --local user.email "actions@github.com" 50 | git config --local user.name "GitHub Actions" 51 | git add -f man/\* NAMESPACE 52 | git commit -m 'Documentation' || echo "No changes to commit" 53 | git push origin || echo "No changes to commit" 54 | 55 | - name: Deploy latest (from dev) 56 | if: github.ref == 'refs/heads/dev' 57 | env: 58 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 59 | run: | 60 | R -e " 61 | install.packages(c('rsconnect', 'remotes')); 62 | bspm::disable(); 63 | remotes::install_github('ESHackathon/CiteSource', ref = "dev", force = TRUE); 64 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); 65 | rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)" 66 | 67 | - name: Deploy stable version (from main) 68 | if: github.ref == 'refs/heads/main' 69 | env: 70 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 71 | run: | 72 | R -e " 73 | install.packages(c('rsconnect', 'remotes')); 74 | bspm::disable(); 75 | remotes::install_github('ESHackathon/CiteSource', force = TRUE); 76 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); 77 | rsconnect::deployApp(appName = 'CiteSource', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)" 78 | 79 | - name: Create pkgdown 80 | env: 81 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 82 | run: | 83 | R -e " 84 | install.packages('pkgdown'); 85 | bspm::disable(); 86 | if (!require(CiteSource)) remotes::install_github('ESHackathon/CiteSource', force = TRUE); 87 | pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)" 88 | 89 | - name: Deploy to GitHub pages 🚀 90 | if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master' 91 | uses: JamesIves/github-pages-deploy-action@v4 92 | with: 93 | clean: true 94 | branch: gh-pages 95 | folder: docs 96 | -------------------------------------------------------------------------------- /.github/workflows/document-and-deploy.yml: -------------------------------------------------------------------------------- 1 | name: document-and-deploy 2 | 3 | on: 4 | push: 5 | pull_request: 6 | release: 7 | 8 | jobs: 9 | document-and-deploy: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v4 15 | 16 | - uses: r-lib/actions/setup-pandoc@v2 17 | 18 | - name: Set up R version 19 | uses: r-lib/actions/setup-r@v2 20 | with: 21 | r-version: "oldrel" # As shinyapps.io needs some time to catch up after releases 22 | use-public-rspm: true 23 | 24 | - name: Set up R Dependencies 25 | uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | extra-packages: devtools, plogr, roxygen2, remotes, rsconnect, pkgdown 28 | 29 | - name: Create documentation 30 | run: | 31 | R -e " 32 | file.remove('NAMESPACE'); 33 | descr <- readLines('DESCRIPTION'); 34 | descr <- stringr::str_replace(descr, '^Date.*$', paste('Date:', Sys.Date())); 35 | writeLines(descr, 'DESCRIPTION'); 36 | roxygen2::roxygenise(); 37 | try(devtools::build_manual()) 38 | " 39 | 40 | - name: commit 41 | run: | 42 | git config --local user.email "actions@github.com" 43 | git config --local user.name "GitHub Actions" 44 | git add -f man/\* NAMESPACE 45 | git commit -m 'Documentation' || echo "No changes to commit" 46 | git push origin || echo "No changes to commit" 47 | 48 | - name: Deploy latest from dev 49 | if: github.ref == 'refs/heads/dev' 50 | env: 51 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 52 | run: | 53 | R -e " 54 | remotes::install_github('ESHackathon/CiteSource', force = TRUE); 55 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); 56 | rsconnect::deployApp( 57 | appName = 'CiteSource_latest', 58 | appDir = './inst/shiny-app/CiteSource', 59 | forceUpdate = TRUE)" 60 | 61 | - name: Deploy stable version from main 62 | if: github.ref == 'refs/heads/main' 63 | env: 64 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 65 | run: | 66 | R -e " 67 | remotes::install_github('ESHackathon/CiteSource', force = TRUE); 68 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); 69 | rsconnect::deployApp( 70 | appName = 'CiteSource', 71 | appDir = './inst/shiny-app/CiteSource', 72 | forceUpdate = TRUE)" 73 | 74 | - name: Create pkgdown 75 | env: 76 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 77 | run: | 78 | R -e " 79 | if (!require(CiteSource)) remotes::install_github('ESHackathon/CiteSource', force = TRUE); 80 | pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)" 81 | 82 | - name: Deploy to GitHub pages 🚀 83 | if: ${{ github.ref_name }} == 'main' 84 | uses: JamesIves/github-pages-deploy-action@v4 85 | with: 86 | clean: true 87 | branch: gh-pages 88 | folder: docs 89 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml_OLD: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | workflow_dispatch: 9 | 10 | name: pkgdown 11 | 12 | jobs: 13 | pkgdown: 14 | runs-on: ubuntu-latest 15 | # Only restrict concurrency for non-PR jobs 16 | concurrency: 17 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - uses: r-lib/actions/setup-pandoc@v2 24 | 25 | - uses: r-lib/actions/setup-r@v2 26 | with: 27 | use-public-rspm: true 28 | 29 | - uses: r-lib/actions/setup-r-dependencies@v2 30 | with: 31 | extra-packages: any::pkgdown, local::. 32 | needs: website 33 | 34 | - name: Build site 35 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 36 | shell: Rscript {0} 37 | 38 | - name: Deploy to GitHub pages 🚀 39 | if: github.event_name != 'pull_request' 40 | uses: JamesIves/github-pages-deploy-action@4.1.4 41 | with: 42 | clean: true 43 | branch: gh-pages 44 | folder: docs -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | name: Commands 8 | 9 | jobs: 10 | document: 11 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} 12 | name: document 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - uses: r-lib/actions/pr-fetch@v1 20 | with: 21 | repo-token: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | - uses: r-lib/actions/setup-r@v1 24 | with: 25 | use-public-rspm: true 26 | 27 | - uses: r-lib/actions/setup-r-dependencies@v1 28 | with: 29 | extra-packages: roxygen2 30 | 31 | - name: Document 32 | run: Rscript -e 'roxygen2::roxygenise()' 33 | 34 | - name: commit 35 | run: | 36 | git config --local user.name "$GITHUB_ACTOR" 37 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 38 | git add man/\* NAMESPACE 39 | git commit -m 'Document' 40 | 41 | - uses: r-lib/actions/pr-push@v1 42 | with: 43 | repo-token: ${{ secrets.GITHUB_TOKEN }} 44 | 45 | style: 46 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} 47 | name: style 48 | runs-on: ubuntu-latest 49 | env: 50 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 51 | steps: 52 | - uses: actions/checkout@v2 53 | 54 | - uses: r-lib/actions/pr-fetch@v1 55 | with: 56 | repo-token: ${{ secrets.GITHUB_TOKEN }} 57 | 58 | - uses: r-lib/actions/setup-r@v1 59 | 60 | - name: Install dependencies 61 | run: Rscript -e 'install.packages("styler")' 62 | 63 | - name: Style 64 | run: Rscript -e 'styler::style_pkg()' 65 | 66 | - name: commit 67 | run: | 68 | git config --local user.name "$GITHUB_ACTOR" 69 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 70 | git add \*.R 71 | git commit -m 'Style' 72 | 73 | - uses: r-lib/actions/pr-push@v1 74 | with: 75 | repo-token: ${{ secrets.GITHUB_TOKEN }} 76 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .DS_Store 6 | inst/doc 7 | .httr-oauth 8 | -------------------------------------------------------------------------------- /CiteSource.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: CiteSource 2 | Title: Analyze the Utility of Information Sources and Retrieval Methodologies for Evidence Synthesis 3 | Version: 0.1.1 4 | Date: 2023-06-22 5 | Authors@R: c( 6 | person("Trevor", "Riley", , "trevor.riley@noaa.gov", role = c("aut", "cre"), 7 | comment = c(ORCID = "0000-0002-6834-9802")), 8 | person("Kaitlyn", "Hair", , "kaitlyn.hair@ed.ac.uk", role = "aut", 9 | comment = c(ORCID = "0000-0003-0180-7343")), 10 | person("Lukas", "Wallrich", , "lukas.wallrich@gmail.com", role = "aut", 11 | comment = c(ORCID = "0000-0003-2121-5177")), 12 | person("Matthew", "Grainger", , "matthewjamesgrainger@gmail.com", role = "aut", 13 | comment = c(ORCID = "0000-0001-8426-6495")), 14 | person("Sarah", "Young", , "sarahy@andrew.cmu.edu", role = "aut", 15 | comment = c(ORCID = "0000-0002-8301-5106")), 16 | person("Chris", "Pritchard", , "chris.pritchard@ntu.ac.uk", role = "aut", 17 | comment = c(ORCID = "0000-0002-1143-9751")), 18 | person("Neal", "Haddaway", , "nealhaddaway@gmail.com", role = "aut", 19 | comment = c(ORCID = "0000-0003-3902-2234")), 20 | person("Martin", "Westgate", role = "cph", comment = "Author of included synthesisr fragments"), 21 | person("Eliza", "Grames", role = "cph", comment = "Author of included synthesisr fragments") 22 | ) 23 | Description: This package was developed in order to provide researchers the ability to 24 | examine the utility and efficacy of literature resources and search 25 | methodologies. The package provides users with the ability to deduplicate 26 | references while maintaining customizable metadata. The resulting data 27 | can be analyzed using predeveloped plots and tables, including a summary 28 | table. Users are also able to export and reimport data in .ris, and csv. 29 | files. The package includes a shiny application for interactive use. 30 | License: GPL (>= 3) 31 | URL: https://www.eshackathon.org/CiteSource 32 | BugReports: https://github.com/ESHackathon/CiteSource/issues 33 | Imports: 34 | ASySD (>= 0.3.0), 35 | dplyr, 36 | DT, 37 | forcats, 38 | ggnewscale, 39 | ggplot2, 40 | glue, 41 | gt, 42 | purrr, 43 | rlang, 44 | scales, 45 | stringr, 46 | tibble, 47 | tidyr, 48 | tidyselect, 49 | UpSetR 50 | Suggests: 51 | htmltools, 52 | knitr, 53 | networkD3, 54 | plotly, 55 | rmarkdown, 56 | shiny, 57 | shinyalert, 58 | shinyjs, 59 | progressr, 60 | shinybusy, 61 | shinyWidgets, 62 | testthat (>= 3.0.0) 63 | Additional_repositories: 64 | https://github.com/camaradesuk/ASySD 65 | VignetteBuilder: 66 | knitr 67 | Config/testthat/edition: 3 68 | Encoding: UTF-8 69 | Roxygen: list(markdown = TRUE) 70 | RoxygenNote: 7.2.3 71 | Depends: 72 | R (>= 3.5.0) 73 | Remotes: 74 | camaradesuk/ASySD 75 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as.data.frame,bibliography) 4 | export("%>%") 5 | export(calculate_detailed_records) 6 | export(calculate_initial_records) 7 | export(calculate_phase_count) 8 | export(calculate_phase_records) 9 | export(calculate_record_counts) 10 | export(citation_summary_table) 11 | export(compare_sources) 12 | export(count_unique) 13 | export(create_detailed_record_table) 14 | export(create_initial_record_table) 15 | export(create_precision_sensitivity_table) 16 | export(dedup_citations) 17 | export(dedup_citations_add_manual) 18 | export(export_bib) 19 | export(export_csv) 20 | export(export_ris) 21 | export(plot_contributions) 22 | export(plot_source_overlap_heatmap) 23 | export(plot_source_overlap_upset) 24 | export(precision_sensitivity_table) 25 | export(read_citations) 26 | export(record_counts) 27 | export(record_counts_table) 28 | export(record_level_table) 29 | export(record_summary_table) 30 | export(reimport_csv) 31 | export(reimport_ris) 32 | export(runShiny) 33 | export(run_shiny) 34 | import(dplyr) 35 | import(gt) 36 | import(rlang) 37 | import(scales) 38 | import(tidyr) 39 | importFrom(dplyr,"%>%") 40 | importFrom(gt,cells_column_labels) 41 | importFrom(gt,cols_label) 42 | importFrom(gt,gt) 43 | importFrom(gt,tab_footnote) 44 | importFrom(gt,tab_header) 45 | importFrom(rlang,":=") 46 | importFrom(rlang,.data) 47 | importFrom(stats,xtabs) 48 | importFrom(utils,head) 49 | importFrom(utils,read.table) 50 | importFrom(utils,tail) 51 | importFrom(utils,write.table) 52 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # CiteSource 0.0.1 2 | 3 | * Added a `NEWS.md` file to track changes to the package. 4 | 5 | - Added dependency on latest version of the ASySD R package 6 | 7 | - Simplified dedup function arguments (now specified within call to ASySD) 8 | 9 | - Integrated new dedup function into R shiny app 10 | 11 | # CiteSource 0.1.1 12 | 13 | - Added new functions which allow creation of tables and plots based on deduplicated (reimported) data. 14 | 15 | - Updated shiny functionality, look and feel, and documentation 16 | 17 | - Added new vignettes -------------------------------------------------------------------------------- /R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/R/.DS_Store -------------------------------------------------------------------------------- /R/CiteSource.R: -------------------------------------------------------------------------------- 1 | #' CiteSource: A package to compare sources of citation records 2 | #' 3 | #' The CiteSource package supports evidence aggregation by helping with the 4 | #' processing of results of various searches in different sources. It allows to 5 | #' deduplicate results while retaining meta-data on where those results were 6 | #' found and then enables users to compare the contribution of different sources 7 | "_PACKAGE" 8 | 9 | # Use magrittr rather than base R pipe to be compatible with older R versions 10 | # And data pronoun .data to avoid issues/warnings due to dplyr non-standard evaluation 11 | #' Pipe operator 12 | #' 13 | #' @name %>% 14 | #' @rdname pipe 15 | #' @keywords internal 16 | #' @export 17 | #' @importFrom dplyr %>% 18 | #' @usage lhs \%>\% rhs 19 | #' @param lhs A value or the magrittr placeholder. 20 | #' @param rhs A function call using the magrittr semantics. 21 | #' @return The result of calling `rhs(lhs)`. 22 | NULL 23 | #' @importFrom rlang .data := 24 | NULL 25 | 26 | # Declare . as global variable to remove warnings 27 | utils::globalVariables(".") 28 | 29 | # Set Shiny upload size depending on interactive use or deployment on shinyapps.io 30 | .onLoad <- function(libname, pkgname) { 31 | if (interactive() ) { 32 | options(shiny.maxRequestSize=2000*1024^2, timeout = 40000000) 33 | } else { 34 | options(shiny.maxRequestSize=250*1024^2, timeout = 40000000) 35 | } 36 | } 37 | 38 | 39 | key_fields <- c("author", "title", "year", "journal", "abstract", "doi", "number", "pages", 40 | "volume", "isbn", "record_id", "label", "source", "issue", "url", 41 | "issn", "start_page", "ID") -------------------------------------------------------------------------------- /R/compare.R: -------------------------------------------------------------------------------- 1 | #' Count number of unique and non-unique citations from different sources, labels, and strings 2 | #' @export 3 | #' @param unique_data from ASySD, merged unique rows with duplicate IDs 4 | #' @param include_references Should bibliographic detail be included in return? 5 | #' @return dataframe with indicators of where a citation appears, with source/label/string as column 6 | #' @examples 7 | #' # Load example data from the package 8 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 9 | #' examplecitations <- readRDS(examplecitations_path) 10 | #' 11 | #' # Deduplicate citations 12 | #' dedup_results <- dedup_citations(examplecitations) 13 | #' 14 | #' # Count unique and non-unique citations 15 | #' count_unique(dedup_results) 16 | 17 | count_unique <- function(unique_data, include_references = FALSE) { 18 | # Start a pipeline with the input data 19 | out <- unique_data %>% 20 | # Filter out rows where 'cite_source' is empty 21 | dplyr::filter(!.data$cite_source == "") %>% 22 | # Select specific columns 23 | dplyr::select(.data$duplicate_id, .data$cite_source, .data$cite_label, .data$cite_string, tidyselect::any_of("record_ids")) %>% 24 | # Separate rows by 'cite_source', 'cite_label', and 'cite_string' 25 | tidyr::separate_rows(.data$cite_source, convert = TRUE, sep = ", ") %>% 26 | tidyr::separate_rows(.data$cite_label, convert = TRUE, sep = ", ") %>% 27 | tidyr::separate_rows(.data$cite_string, convert = TRUE, sep = ", ") %>% 28 | # Group by 'duplicate_id' 29 | dplyr::group_by(.data$duplicate_id) %>% 30 | # Add 'unique' and 'type' columns 31 | dplyr::mutate( 32 | unique = ifelse(length(unique(.data$cite_source)) == 1, TRUE, FALSE), # 'unique' is TRUE if 'cite_source' is unique 33 | type = ifelse(.data$unique, "unique", "duplicated") %>% factor(levels = c("unique", "duplicated")) # 'type' is 'unique' if 'unique' is TRUE, 'duplicated' otherwise 34 | ) %>% 35 | # Ungroup the data 36 | dplyr::ungroup() %>% 37 | # Remove duplicate rows 38 | unique() 39 | 40 | # If 'include_references' is TRUE, join 'out' with 'unique_data' on 'duplicate_id' 41 | if (include_references == TRUE) { 42 | out %>% dplyr::left_join(unique_data %>% dplyr::select(-dplyr::all_of(setdiff(intersect(names(.), names(out)), "duplicate_id"))), by = "duplicate_id") 43 | } else { 44 | # Otherwise, return 'out' as is 45 | out 46 | } 47 | } 48 | 49 | #' Compare duplicate citations across sources, labels, and strings 50 | #' 51 | #' @export 52 | #' @param unique_data from ASySD, merged unique rows with duplicate IDs 53 | #' @param comp_type Specify which fields are to be included. One or more of "sources", "strings" or "labels" - defaults to all. 54 | #' @param include_references Should bibliographic detail be included in return? 55 | #' @return dataframe with indicators of where a citation appears, with sources/labels/strings as columns 56 | #' @examples 57 | #' if (interactive()) { 58 | #' # Load example data from the package 59 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 60 | #' examplecitations <- readRDS(examplecitations_path) 61 | #' 62 | #' # Deduplicate citations and compare sources 63 | #' dedup_results <- dedup_citations(examplecitations) 64 | #' compare_sources(unique_citations, comp_type = "sources") 65 | #' } 66 | 67 | 68 | compare_sources <- function(unique_data, comp_type = c("sources", "strings", "labels"), include_references = FALSE) { 69 | 70 | out <- list(unique_data %>% dplyr::select("duplicate_id")) 71 | 72 | if ("sources" %in% comp_type) { 73 | source_comparison <- unique_data %>% 74 | dplyr::select(.data$duplicate_id, .data$cite_source, tidyselect::any_of("record_ids")) %>% 75 | dplyr::filter(!cite_source == "") %>% 76 | tidyr::separate_rows(.data$cite_source, sep = ", ", convert = TRUE) %>% 77 | unique() %>% 78 | tidyr::pivot_wider( 79 | id_cols = .data$duplicate_id, names_prefix = "source__", names_from = .data$cite_source, values_from = .data$cite_source, 80 | values_fn = function(x) TRUE, 81 | values_fill = FALSE 82 | ) 83 | 84 | out <- c(out, list(source_comparison)) 85 | } 86 | 87 | if ("strings" %in% comp_type) { 88 | source_comparison <- unique_data %>% 89 | dplyr::select(.data$duplicate_id, .data$cite_string, tidyselect::any_of("record_ids")) %>% 90 | dplyr::filter(!.data$cite_string == "") %>% 91 | tidyr::separate_rows(.data$cite_string, sep = ", ", convert = TRUE) %>% 92 | unique() %>% 93 | tidyr::pivot_wider( 94 | id_cols = .data$duplicate_id, names_prefix = "string__", names_from = .data$cite_string, values_from = .data$cite_string, 95 | values_fn = function(x) TRUE, 96 | values_fill = FALSE 97 | ) 98 | 99 | out <- c(out, list(source_comparison)) 100 | } 101 | 102 | if ("labels" %in% comp_type) { 103 | source_comparison <- unique_data %>% 104 | dplyr::select(.data$duplicate_id, .data$cite_label, tidyselect::any_of("record_ids")) %>% 105 | dplyr::filter(!cite_label == "") %>% 106 | tidyr::separate_rows(.data$cite_label, sep = ", ", convert = TRUE) %>% 107 | unique() %>% 108 | tidyr::pivot_wider( 109 | id_cols = .data$duplicate_id, names_prefix = "label__", names_from = .data$cite_label, 110 | values_from = .data$cite_label, 111 | values_fn = function(x) TRUE, 112 | values_fill = FALSE 113 | ) 114 | out <- c(out, list(source_comparison)) 115 | 116 | 117 | if (any(stringr::str_detect(names(source_comparison), "[Ss]earch"))) { 118 | search_stage <- stringr::str_subset(names(source_comparison), "[Ss]earch") 119 | if (length(search_stage) == 1) { 120 | not_in_search <- sum(!source_comparison[[search_stage]]) 121 | if (not_in_search > 0) { 122 | warning( 123 | "Beware: ", not_in_search, " records were not included in ", search_stage, " but in other labels.", 124 | " *If* this label indicates the full search stage, this might indicate that you ommitted a source, ", 125 | "or that the deduplication did not go right. Please treat results with caution until you fix this, ", 126 | "e.g., by using export_csv and then reimport_csv." 127 | ) 128 | } 129 | } 130 | } 131 | } 132 | 133 | if (length(out) == 0) stop('comp_type must be one or more of "sources", "strings" or "labels"') 134 | 135 | 136 | out <- purrr::reduce(out, dplyr::left_join, by = "duplicate_id") 137 | 138 | # Deals with entries missing source or label 139 | out <- out %>% dplyr::mutate(dplyr::across(dplyr::everything(), ~tidyr::replace_na(.x, FALSE))) 140 | 141 | if (include_references == TRUE) { 142 | out %>% dplyr::left_join(unique_data %>% dplyr::select(-dplyr::all_of(setdiff(intersect(names(.), names(out)), "duplicate_id"))), by = "duplicate_id") 143 | } else { 144 | out 145 | } 146 | } 147 | 148 | -------------------------------------------------------------------------------- /R/dedup.R: -------------------------------------------------------------------------------- 1 | #' Deduplicate citations - ASySD wrapper 2 | #' 3 | #' This function deduplicates citation data. Note that duplicates are assumed to published 4 | #' in the same journal, so pre-prints and similar results will not be identified here. 5 | #' 6 | #' @export 7 | #' @param raw_citations Citation dataframe with relevant columns 8 | #' @param manual logical. If TRUE, manually specify pairs of duplicates to merge. Default is FALSE. 9 | #' @param show_unknown_tags When a label, source, or other merged field is missing, do you want this to show as "unknown"? 10 | #' @return unique citations formatted for CiteSource 11 | #' 12 | #' @examples 13 | #' # Load example data from the package 14 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 15 | #' examplecitations <- readRDS(examplecitations_path) 16 | #' 17 | #' # Deduplicate citations without manually specifying pairs and without showing unknown tags 18 | #' dedup_results <- dedup_citations(examplecitations) 19 | #' 20 | #' # Deduplicate citations with manual specification of pairs and showing unknown tags 21 | #' dedup_results_manual_unknown <- dedup_citations( 22 | #' examplecitations, 23 | #' manual = TRUE, 24 | #' show_unknown_tags = TRUE 25 | #' ) 26 | 27 | dedup_citations <- function(raw_citations, manual=FALSE, show_unknown_tags=FALSE){ 28 | 29 | # rename or coalesce columns 30 | targets <- c("journal", "number", "pages", "isbn", "record_id") 31 | sources <- c("source", "issue", "start_page", "issn", "ID") 32 | raw_citations <- add_cols(raw_citations, sources) 33 | 34 | for (i in seq_along(targets)) { 35 | if (targets[i] %in% names(raw_citations)) { 36 | raw_citations[[targets[i]]] <- dplyr::coalesce(raw_citations[[targets[i]]], raw_citations[[sources[i]]]) 37 | } else { 38 | raw_citations[[targets[i]]] <- raw_citations[[sources[i]]] 39 | } 40 | } 41 | 42 | raw_citations <- add_cols(raw_citations, c("record_id", "cite_label", "cite_source", "cite_string")) 43 | 44 | raw_citations$source <- raw_citations$cite_source 45 | raw_citations$label <- raw_citations$cite_label 46 | 47 | dedup_results <- ASySD::dedup_citations(raw_citations, merge_citations = TRUE, extra_merge_fields = "cite_string", show_unknown_tags = show_unknown_tags) 48 | 49 | if(manual == FALSE){ 50 | 51 | unique_post_dedup <- dedup_results$unique 52 | unique_post_dedup$cite_source = unique_post_dedup$source 53 | unique_post_dedup$cite_label = unique_post_dedup$label 54 | 55 | # Remove temporary columns 56 | unique_post_dedup <- unique_post_dedup %>% 57 | dplyr::select(-source, -label) 58 | 59 | return(unique_post_dedup) 60 | 61 | } else { 62 | 63 | unique_post_dedup <- dedup_results 64 | unique_post_dedup$unique$cite_source = unique_post_dedup$unique$source 65 | unique_post_dedup$unique$cite_label = unique_post_dedup$unique$label 66 | 67 | # Remove temporary columns 68 | unique_post_dedup$unique <- unique_post_dedup$unique %>% 69 | dplyr::select(-source, -label) 70 | 71 | return(unique_post_dedup) 72 | } 73 | 74 | } 75 | 76 | #' Remove pairs with manual dedup - ASySD wrapper 77 | #' 78 | #' This function deduplicates citation data. Note that duplicates are assumed to published 79 | #' in the same journal, so pre-prints and similar results will not be identified here. 80 | #' 81 | #' @export 82 | #' @param unique_citations Unique citations post deduplication 83 | #' @param additional_pairs TRUE duplicate pairs 84 | #' @return unique citations formatted for CiteSource 85 | #' @examples 86 | #' # Load example data from the package 87 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 88 | #' examplecitations <- readRDS(examplecitations_path) 89 | #' 90 | #' # Deduplicate citations 91 | #' dedup_results <- dedup_citations(examplecitations) 92 | #' 93 | dedup_citations_add_manual <- function(unique_citations, additional_pairs) { 94 | 95 | unique_citations$source = unique_citations$cite_source 96 | unique_citations$label = unique_citations$cite_label 97 | 98 | dedup_results <- ASySD::dedup_citations_add_manual(unique_citations, 99 | additional_pairs = additional_pairs, 100 | extra_merge_fields = "cite_string") 101 | 102 | dedup_results$cite_source <- dedup_results$source 103 | dedup_results$cite_label <- dedup_results$label 104 | 105 | # Remove temporary columns 106 | dedup_results <- dedup_results %>% 107 | dplyr::select(-source, -label) 108 | 109 | return(dedup_results) 110 | 111 | } 112 | 113 | 114 | #' ####------ Add columns ------ #### 115 | 116 | #' This function adds citesource columns to citation data if missing 117 | #' @param raw_citations Citation dataframe with relevant columns 118 | #' @param cname column names which are required in dataframe 119 | #' @return Dataframe of citations with id 120 | #' @noRd 121 | add_cols <- function(raw_citations, cname) { 122 | add <- cname[!cname %in% names(raw_citations)] 123 | 124 | if (length(add) != 0) raw_citations[add] <- NA 125 | raw_citations 126 | } 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /R/export.R: -------------------------------------------------------------------------------- 1 | #' Export deduplicated citations with source data as CSV file 2 | #' 3 | #' This function saves deduplicated citations as a CSV file for further analysis and/or reporting. 4 | #' Metadata can be separated into one column per source, label or string, which facilitates analysis. 5 | #' Note that *existing files are overwritten without warning.* 6 | #' 7 | #' @param unique_citations Dataframe with unique citations, resulting from `dedup_citations()` 8 | #' @param filename Name (and path) of file, should end in .csv 9 | #' @param separate Character vector indicating which (if any) of cite_source, cite_string and cite_label should be split into separate columns to facilitate further analysis. 10 | #' @param trim_abstracts Some databases may return full-text that is misidentified as an abstract. This inflates file size and may lead to issues with Excel, 11 | #' which cannot deal with more than 32,000 characters per field. Therefore, the default is to trim very long abstracts to 32,000 characters. Set a lower number to reduce file size, or 12 | #' NULL to retain abstracts as they are. 13 | #' @return The function saves the deduplicated citations as a CSV file to the specified location. 14 | #' @export 15 | #' @examples 16 | #' if (interactive()) { 17 | #' # Load example data from the package 18 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 19 | #' examplecitations <- readRDS(examplecitations_path) 20 | #' dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE) 21 | #' export_csv(dedup_results, "cite_sources.csv", separate = "cite_source") 22 | #' } 23 | 24 | export_csv <- function(unique_citations, filename = "citesource_exported_citations.csv", separate = NULL, trim_abstracts = 32000) { 25 | # Warn if the filename doesn't end with .csv 26 | if (tolower(tools::file_ext(filename)) != "csv") { 27 | warning("Function saves a CSV file, so filename should (usually) end in .csv. For now, name is used as provided.") 28 | } 29 | 30 | if (!is.null(separate)) { 31 | separate <- match.arg(separate, choices = c("cite_source", "cite_label", "cite_string"), several.ok = TRUE) 32 | 33 | separated <- purrr::map_dfc(separate, function(x) { 34 | unique_citations %>% 35 | dplyr::select(tidyselect::all_of(x), .data$duplicate_id, .data$record_ids) %>% 36 | tidyr::separate_rows(1, sep = ", ", convert = TRUE) %>% 37 | unique() %>% 38 | tidyr::pivot_wider( 39 | id_cols = .data$duplicate_id, names_prefix = paste0(stringr::str_remove(x, "cite_"), "_"), 40 | names_from = 1, values_from = c(.data$record_ids), 41 | values_fn = function(x) TRUE, 42 | values_fill = FALSE 43 | ) %>% 44 | dplyr::select(tidyselect::starts_with(paste0(stringr::str_remove(x, "cite_")))) 45 | }) 46 | 47 | # Trim abstracts if required 48 | if (!is.null(trim_abstracts)) { 49 | unique_citations <- unique_citations %>% 50 | dplyr::mutate(abstract = stringr::str_sub(.data$abstract, 1, trim_abstracts)) 51 | } 52 | 53 | 54 | unique_citations <- unique_citations %>% 55 | dplyr::select(-tidyselect::all_of(separate)) %>% 56 | dplyr::bind_cols(separated) 57 | } 58 | utils::write.csv(unique_citations, filename, row.names = FALSE) 59 | } 60 | 61 | #' Export data frame to RIS file 62 | #' 63 | #' This function saves a data frame as a RIS file with specified columns mapped to RIS fields. Note that 64 | #' *existing files are overwritten without warning.* 65 | #' 66 | #' @param citations Dataframe to be exported to RIS file 67 | #' @param filename Name (and path) of file, should end in .ris 68 | #' @param source_field Field in `citations` representing the source. Default is "DB". 69 | #' @param label_field Field in `citations` representing the label. Default is "C7". 70 | #' @param string_field Field in `citations` representing additional string information. Default is "C8". 71 | #' @export 72 | #' @examples 73 | #' if (interactive()) { 74 | #' # Load example data from the package 75 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 76 | #' examplecitations <- readRDS(examplecitations_path) 77 | #' dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE) 78 | #' export_ris( 79 | #' dedup_results$unique, 80 | #' "cite_sources.ris", 81 | #' user_mapping = list( 82 | #' "DB" = "cite_source_include", 83 | #' "C7" = "cite_label_include" 84 | #' ) 85 | #' ) 86 | #' } 87 | 88 | export_ris <- function(citations, filename = "citations.ris", source_field = "DB", label_field = "C7", string_field = "C8") { 89 | 90 | if (tolower(tools::file_ext(filename)) != "ris") warning("Function saves a RIS file, so filename should (usually) end in .ris. For now, name is used as provided.") 91 | 92 | synthesisr_codes <- dplyr::bind_rows( 93 | tibble::tribble( 94 | ~code, ~field, ~ris_synthesisr, 95 | source_field, "cite_source", TRUE, 96 | string_field, "cite_string", TRUE, 97 | label_field, "cite_label", TRUE, 98 | "C1", "duplicate_id", TRUE, 99 | "C2", "record_ids", TRUE, 100 | "TY", "type", TRUE 101 | ), 102 | synthesisr_code_lookup %>% dplyr::filter(.data$ris_synthesisr) 103 | ) %>% dplyr::distinct(.data$code, .keep_all = TRUE) # Remove fields from synthesisr specification used for CiteSource metadata 104 | 105 | # Currently, write_refs does not accept tibbles, thus converted 106 | write_refs(as.data.frame(citations), file = filename, tag_naming = synthesisr_codes) 107 | } 108 | 109 | #' Export deduplicated citations to .bib file 110 | #' 111 | #' This function saves deduplicated citations as a BibTex file with sources, labels and strings 112 | #' included in the `note` field (if they were initially provided for any of the citations). Therefore, 113 | #' beware that **any `note` field that might be included in `citations` will be overwritten**. Also note that 114 | #' *existing files are overwritten without warning.* 115 | #' 116 | #' @param citations Dataframe with unique citations, resulting from `dedup_citations()` 117 | #' @param filename Name (and path) of file, should end in .ris 118 | #' @param include Character. One or more of sources, labels or strings 119 | #' @export 120 | #' @examples 121 | #' if (interactive()) { 122 | #' # Load example data from the package 123 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 124 | #' examplecitations <- readRDS(examplecitations_path) 125 | #' dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE) 126 | #' export_bib(dedup_results$unique, "cite_sources.bib", include = "sources") 127 | #' } 128 | 129 | export_bib <- function(citations, filename = "citations.bib", include = c("sources", "labels", "strings")) { 130 | if (tolower(tools::file_ext(filename)) != "bib") warning("Function saves a BibTex file, so filename should (usually) end in .bib. For now, name is used as provided.") 131 | 132 | include <- stringr::str_remove(include, "s$") %>% paste0("cite_", .) 133 | 134 | notes <- citations %>% dplyr::select(tidyselect::all_of(include)) 135 | 136 | for (i in seq_along(include)) { 137 | notes[include[i]] <- paste(include[i], notes[[include[i]]], sep = ": ") 138 | } 139 | 140 | notes <- notes %>% 141 | tidyr::unite("note", dplyr::everything(), sep = "; ") %>% 142 | dplyr::pull(.data$note) 143 | 144 | citations["note"] <- notes 145 | 146 | citations <- citations %>% 147 | dplyr::select(-dplyr::starts_with("cite_"), -tidyselect::any_of(c("duplicate_id", "record_ids", "record_id"))) 148 | 149 | write_refs(as.data.frame(citations), format = "bib", file = filename) 150 | } 151 | -------------------------------------------------------------------------------- /R/helpers.R: -------------------------------------------------------------------------------- 1 | # Licensed under the MIT licence 2 | # YEAR: 2020 3 | # COPYRIGHT HOLDER: usethis authors 4 | # see https://github.com/r-lib/usethis/blob/main/LICENSE 5 | 6 | ui_yeah <- function (x, yes = c("Yes", "Definitely", "For sure", "Yup", 7 | "Yeah", "I agree", "Absolutely"), no = c("No way", "Not now", 8 | "Negative", "No", "Nope", "Absolutely not"), n_yes = 1, n_no = 2, 9 | shuffle = TRUE, .envir = parent.frame()) 10 | { 11 | x <- glue::glue_collapse(x, "\n") 12 | x <- glue::glue(x, .envir = .envir) 13 | if (!interactive()) { 14 | stop(c("User input required, but session is not interactive.", 15 | glue::glue("Query: {x}"))) 16 | } 17 | n_yes <- min(n_yes, length(yes)) 18 | n_no <- min(n_no, length(no)) 19 | qs <- c(sample(yes, n_yes), sample(no, n_no)) 20 | if (shuffle) { 21 | qs <- sample(qs) 22 | } 23 | rlang::inform(x) 24 | out <- utils::menu(qs) 25 | out != 0L && qs[[out]] %in% yes 26 | } -------------------------------------------------------------------------------- /R/import.R: -------------------------------------------------------------------------------- 1 | #' Import citations from file 2 | #' 3 | #' This function imports RIS and Bibtex files with citations and merges them 4 | #' into one long tibble with one record per line. 5 | #' 6 | #' @param files One or multiple RIS or Bibtex files with citations. 7 | #' Should be .bib or .ris files 8 | #' @param cite_sources The origin of the citation files (e.g. "Scopus", "WOS", "Medline") - vector with one value per file, defaults to file names. 9 | #' @param cite_strings Optional. The search string used (or another grouping to analyse) - vector with one value per file 10 | #' @param cite_labels Optional. An additional label per file, for instance the stage of search - vector with one value per file 11 | #' @param metadata A tibble with file names and metadata for each file. Can be specified as an *alternative* to files, cite_sources, cite_strings and cite_labels. 12 | #' @param verbose Should number of reference and allocation of labels be reported? 13 | #' @param only_key_fields Should only key fields (e.g., those used by CiteCourse) be imported? If FALSE, all RIS data is retained. Can also be a character vector of field names to retain (after they have been renamed by the import function) in addition to the essential ones. 14 | #' @inheritParams synthesisr_read_refs 15 | #' @return A tibble with one row per citation 16 | #' @examples 17 | #' if (interactive()) { 18 | #' # Import only key fields from the RIS files 19 | #' read_citations(c("res.ris", "res.bib"), 20 | #' cite_sources = c("CINAHL", "MEDLINE"), 21 | #' cite_strings = c("Search1", "Search2"), 22 | #' cite_labels = c("raw", "screened"), 23 | #' only_key_fields = TRUE 24 | #' ) 25 | #' 26 | #' # or equivalently 27 | #' metadata_tbl_key_fields <- tibble::tribble( 28 | #' ~files, ~cite_sources, ~cite_strings, ~cite_labels, ~only_key_fields, 29 | #' "res.ris", "CINAHL", "Search1", "raw", TRUE, 30 | #' "res.bib", "MEDLINE", "Search2", "screened", TRUE 31 | #' ) 32 | #' 33 | #' read_citations(metadata = metadata_tbl_key_fields) 34 | #' } 35 | #' @export 36 | 37 | read_citations <- function(files = NULL, 38 | cite_sources = NULL, 39 | cite_strings = NULL, 40 | cite_labels = NULL, 41 | metadata = NULL, 42 | verbose = TRUE, 43 | tag_naming = "best_guess", 44 | only_key_fields = TRUE) { 45 | 46 | if (is.character(only_key_fields)) { 47 | only_key_fields <- union(key_fields, only_key_fields) 48 | } else if (only_key_fields == TRUE) { 49 | only_key_fields <- key_fields 50 | } else if (!only_key_fields == FALSE) { 51 | stop("only_key_fields must be TRUE, FALSE or a character vector") 52 | } else { 53 | only_key_fields <- NULL 54 | } 55 | 56 | if (is.null(files) && is.null(metadata)) stop("Either files or metadata must be specified.") 57 | if (!is.null(files) && !is.null(metadata)) stop("files and metadata cannot both be specified.") 58 | 59 | if (!is.null(metadata)) { 60 | if (!is.data.frame(metadata)) stop("metadata must be a tibble/dataframe.") 61 | if (!("files" %in% colnames(metadata))) stop("metadata must contain at least a `files` column") 62 | files <- metadata[["files"]] 63 | cite_sources <- metadata[["cite_sources"]] 64 | cite_strings <- metadata[["cite_strings"]] 65 | cite_labels <- metadata[["cite_labels"]] 66 | } 67 | 68 | 69 | if (is.null(cite_sources)) { 70 | cite_sources <- purrr::map_chr(files, ~ tools::file_path_sans_ext(basename(.x))) 71 | 72 | if (any(duplicated(cite_sources))) { 73 | cite_sources <- make.unique(cite_sources, sep = "_") 74 | message("Some file names were duplicated. Therefore, their cite_source values are distinguished by suffixes (_1 etc). For greater clarity, specify cite_sources explicitly or rename files.") 75 | } 76 | } 77 | 78 | if (length(files) != length(cite_sources)) { 79 | stop("Files and origins cite_sources be of equal length") 80 | } 81 | if (!is.null(cite_strings)) { 82 | if (length(cite_sources) != length(cite_strings)) { 83 | stop("Cite_sources and cite_strings must be of equal length") 84 | } 85 | } 86 | if (!is.null(cite_labels)) { 87 | if (length(cite_sources) != length(cite_labels)) { 88 | stop("Cite_sources and cite_labels must be of equal length") 89 | } 90 | } 91 | 92 | contains_commas <- any(stringr::str_detect(c(cite_sources, cite_labels, cite_strings), ",")) 93 | 94 | if (!is.na(contains_commas) && contains_commas) { 95 | stop("',' must not be used in cite_source, cite_labels or cite_strings (or filenames if these are not specified)") 96 | } 97 | 98 | # Need to import files separately to add origin, platform, and searches 99 | ref_list <- purrr::map(files, 100 | \(x) synthesisr_read_refs(x, tag_naming = tag_naming, select_fields = only_key_fields), 101 | .progress = list( total = 100, 102 | format = "Importing files {cli::pb_bar} {cli::pb_percent}") 103 | ) 104 | 105 | # Drop empty citations 106 | ref_list <- lapply( 107 | ref_list, 108 | function(data) data[rowSums(is.na(data)) != (ncol(data) - 1), ] 109 | ) 110 | 111 | ref_counts <- numeric(length(files)) 112 | 113 | for (i in seq_along(files)) { 114 | ref_counts[i] <- nrow(ref_list[[i]]) 115 | } 116 | 117 | for (index in seq_len(length(files))) { 118 | ref_list[[index]]$cite_source <- cite_sources[[index]] 119 | if (!is.null(cite_strings)) { 120 | ref_list[[index]]$cite_string <- cite_strings[[index]] 121 | } 122 | if (!is.null(cite_labels)) { 123 | ref_list[[index]]$cite_label <- cite_labels[[index]] 124 | } 125 | } 126 | 127 | if (verbose) { 128 | report <- data.frame( 129 | file = basename(files), 130 | cite_source = cite_sources, 131 | cite_string = if (is.null(cite_strings)) NA_character_ else cite_strings, 132 | cite_label = if (is.null(cite_labels)) NA_character_ else cite_labels, 133 | citations = ref_counts 134 | ) 135 | 136 | message("Import completed - with the following details:") 137 | message(paste0(utils::capture.output(report), collapse = "\n")) 138 | } 139 | 140 | ref_list %>% 141 | purrr::map(tibble::as_tibble) %>% 142 | purrr::reduce(dplyr::bind_rows) 143 | 144 | } 145 | 146 | 147 | -------------------------------------------------------------------------------- /R/reimport.R: -------------------------------------------------------------------------------- 1 | #' Reimport a CSV-file exported from CiteSource 2 | #' 3 | #' This function reimports a csv file that was tagged and deduplicated by CiteSource. 4 | #' It allows to continue with further analyses without repeating that step, and also 5 | #' allows users to make any manual corrections to tagging or deduplication. Note that 6 | #' this function only works on CSV files that were written with `export_csv(..., separate = NULL)` 7 | #' 8 | #' @param filename Name (and path) of CSV file to be reimported, should end in .csv 9 | #' @return A data frame containing the imported citation data if all required columns are present. 10 | #' @export 11 | #' @examples 12 | #' \dontrun{ 13 | #' #example usage 14 | #' citations <- reimport_csv("path/to/citations.csv") 15 | #' } 16 | #' 17 | reimport_csv <- function(filename) { 18 | # Warn if the filename doesn't end with .csv 19 | if (tolower(tools::file_ext(filename)) != "csv") warning("Function reads a CSV file, so filename should (usually) end in .csv. For now, name is used as provided.") 20 | 21 | # Read the CSV file 22 | unique_citations_imported <- utils::read.csv(filename, stringsAsFactors = FALSE) 23 | 24 | # Check if the required columns are present 25 | if (!all(c("cite_source", "cite_label", "cite_string", "duplicate_id", "record_ids") %in% names(unique_citations_imported))) { 26 | stop( 27 | "CiteSource meta-data (i.e. columns cite_source, cite_label cite_string, duplicate_id, record_ids) were not found in ", filename, 28 | ". This function is intended to be used for files exported from CiteSource and thus requires these fields.", 29 | " Note that export_csv must be called with separate = NULL (the default value)." 30 | ) 31 | } 32 | 33 | unique_citations_imported 34 | } 35 | 36 | #' Reimport a RIS-file exported from CiteSource 37 | #' 38 | #' This function reimports a RIS file that was tagged and deduplicated by CiteSource. 39 | #' It allows to continue with further analyses without repeating that step, and also 40 | #' allows users to make any manual corrections to tagging or deduplication. The function 41 | #' can also be used to replace the import step (for instance if tags are to be added to 42 | #' individual citations rather than entire files) - in this case, just call `dedup_citations()` 43 | #' after the import. 44 | #' 45 | #' Note that this functions defaults' are based on those in `export_ris()` so that these functions 46 | #' can easily be combined. 47 | #' 48 | #' @param filename Name (and path) of RIS file to be reimported, should end in .ris 49 | #' @param source_field Character. Which RIS field should cite_sources be read from? NULL to set to missing 50 | #' @param label_field Character. Which RIS field should cite_labels be read from? NULL to set to missing 51 | #' @param string_field Character. Which RIS field should cite_strings be read from? NULL to set to missing 52 | #' @param duplicate_id_field Character. Which RIS field should duplicate IDs be read from? NULL to recreate based on row number (note that neither duplicate nor record IDs directly affect CiteSource analyses - they can only allow you to connect processed data with raw data) 53 | #' @param record_id_field Character. Which RIS field should record IDs be read from? NULL to recreate based on row number 54 | #' @param tag_naming Synthesisr option specifying how RIS tags should be replaced with names. This should not 55 | #' be changed when using this function to reimport a file exported from CiteSource. If you import your own 56 | #' RIS, check `names(CiteSource:::synthesisr_code_lookup)` and select any of the options that start with `ris_` 57 | #' @param verbose Should confirmation message be displayed? 58 | #' @export 59 | #' @examples 60 | #' if (interactive()) { 61 | #' dedup_results <- dedup_citations(citations, merge_citations = TRUE) 62 | #' export_ris(dedup_results$unique, "citations.ris") 63 | #' unique_citations2 <- reimport_ris("citations.ris") 64 | #' } 65 | #' 66 | reimport_ris <- function(filename = "citations.ris", 67 | source_field = "DB", label_field = "C7", string_field = "C8", 68 | duplicate_id_field = "C1", record_id_field = "C2", 69 | tag_naming = "ris_synthesisr", verbose = TRUE) { 70 | 71 | if (!tag_naming %in% names(synthesisr_code_lookup)) { 72 | stop("tag_naming must be one of ", names(synthesisr_code_lookup) %>% stringr::str_subset("^ris_") %>% 73 | glue::glue_collapse(sep = ", ", last = " or ")) 74 | } 75 | 76 | if (is.null(source_field)) source_field <- NA 77 | if (is.null(string_field)) string_field <- NA 78 | if (is.null(label_field)) label_field <- NA 79 | if (is.null(duplicate_id_field)) duplicate_id_field <- NA 80 | if (is.null(record_id_field)) record_id_field <- NA 81 | 82 | 83 | custom_codes <- tibble::tribble( 84 | ~code, ~field, ~tag_naming, 85 | source_field, "cite_source", TRUE, 86 | string_field, "cite_string", TRUE, 87 | label_field, "cite_label", TRUE, 88 | duplicate_id_field, "duplicate_id", TRUE, 89 | record_id_field, "record_ids", TRUE 90 | ) 91 | 92 | names(custom_codes)[3] <- tag_naming 93 | 94 | synthesisr_codes <- dplyr::bind_rows( 95 | custom_codes, 96 | synthesisr_code_lookup %>% dplyr::filter(.data[[tag_naming]]) 97 | ) %>% 98 | dplyr::filter(!is.na(.data$code)) %>% 99 | dplyr::distinct(.data$code, .keep_all = TRUE) # Remove fields from synthesisr specification used for CiteSource metadata 100 | 101 | citations <- read_ref(filename, tag_naming = synthesisr_codes) 102 | 103 | 104 | if (!"cite_source" %in% names(citations)) { 105 | message("No non-empty cite_source values found") 106 | citations$cite_source <- NA 107 | } 108 | 109 | if (!"cite_string" %in% names(citations)) { 110 | message("No non-empty cite_string values found") 111 | citations$cite_string <- NA 112 | } 113 | if (!"cite_label" %in% names(citations)) { 114 | message("No non-empty cite_label values found") 115 | citations$cite_label <- NA 116 | } 117 | 118 | if (!"duplicate_id" %in% names(citations)) { 119 | message("Duplicate IDs not found - will be recreated based on row number") 120 | citations$duplicate_id <- seq_len(nrow(citations)) 121 | } else if (any(is.na(citations$duplicate_id))) { 122 | message("Some duplicate IDs are missing - these will be recreated based on row number") 123 | citations$duplicate_id[is.na(citations$duplicate_id)] <- seq_len(sum(is.na(citations$duplicate_id))) 124 | } 125 | 126 | if (!"record_ids" %in% names(citations)) { 127 | message("Record IDs not found - will be recreated based on row number") 128 | citations$record_ids <- seq_len(nrow(citations)) 129 | } else if (any(is.na(citations$record_ids))) { 130 | message("Some record IDs are missing - these will be recreated based on row number") 131 | citations$record_ids[is.na(citations$record_ids)] <- seq_len(sum(is.na(citations$record_ids))) 132 | } 133 | 134 | citations 135 | } 136 | -------------------------------------------------------------------------------- /R/runShiny.R: -------------------------------------------------------------------------------- 1 | #' A wrapper function to run Shiny Apps from \code{CiteSource}. 2 | #' 3 | #' Running this function will launch the CiteSource shiny app 4 | #' 5 | #' @return CiteSource shiny app 6 | #' @param app Defaults to CiteSource - possibly other apps will be included in the future 7 | #' @param offer_install Should user be prompted to install required packages if they are missing? 8 | #' @export 9 | #' @aliases run_shiny 10 | #' @examples 11 | #' if (interactive()) { 12 | #' # To run the CiteSource Shiny app: 13 | #' runShiny() 14 | #' } 15 | runShiny <- function(app = "CiteSource", offer_install = interactive()) { 16 | 17 | # Check that required packages are installed 18 | req_packages <- c("shiny", "shinyalert", "shinybusy", "shinyWidgets", "plotly") 19 | if (!all(available <- purrr::map_lgl(req_packages, ~ requireNamespace(.x, quietly = TRUE)))) { 20 | if (offer_install == TRUE) { 21 | message("Some packages required for the CiteSource shiny app are missing. The following need to be installed: ", 22 | glue::glue_collapse(req_packages[!available], sep = ", ", last = " and ")) 23 | if (ui_yeah("Should these packages be installed?", n_no = 1) == TRUE) { 24 | utils::install.packages(req_packages[!available]) 25 | } 26 | runShiny(offer_install = FALSE) 27 | } else { 28 | stop("Some packages required for the CiteSource shiny app are missing. Ensure you have all of the following installed: ", 29 | glue::glue_collapse(req_packages, sep = ", ", last = "and")) 30 | } 31 | } 32 | 33 | # find and launch the app 34 | appDir <- system.file("shiny-app", app, package = "CiteSource") 35 | 36 | shiny::runApp(appDir, display.mode = "normal") 37 | } 38 | 39 | # Alias to offer function in line with usual snake_case style 40 | #' @export 41 | run_shiny <- runShiny -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/R/sysdata.rda -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CiteSource 2 | 3 | 4 | 5 | 6 | [![R-CMD-check](https://github.com/ESHackathon/CiteSource/workflows/R-CMD-Check/badge.svg)](https://github.com/ESHackathon/CiteSource/actions) 7 | [![Status](https://img.shields.io/badge/Status-Work%20in%20Progress-orange)](https://github.com/ESHackathon/CiteSource) 8 | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) 9 | 10 | 11 | ## About the Package 12 | 13 | CiteSource was developed to provide researchers the ability to examine the utility and efficacy of literature resources and search methodologies. The idea behind CiteSource is simply allowing users to deduplicate citation records, while maintaining customizable metadata about the citation. 14 | 15 | **Development** 16 | 17 | Development of this project began as part of the Evidence Synthesis Hackathon and as part of Evidence Synthesis & Meta-Analysis in R Conference - ESMARConf 2022. to learn more about this awesome conference and hackathon please visit @ https://esmarconf.org/ 18 | 19 | **License** 20 | 21 | CiteSource was created under [the General Public License (>=v3)](https://www.gnu.org/licenses/gpl-3.0.html). 22 | 23 | **Shiny Web Application** 24 | 25 | Whether you know R or not, we want you to be able to use CiteSource! Check out our [CiteSource Shiny App!](https://litrev.shinyapps.io/CiteSource/) 26 | 27 | ## Features 28 | **Customizable Metadata Tags** 29 | 30 | Users can provide customizable metadata in three fields, cite_source, cite_string, and cite_label. Metadata can include anything from a resource name (e.g. Web of Science, LENS.org, PubMed), a method (database search, handsearching, citation snowballing), a variation used within a method (WoS string #1, Wos string #2, WoS string #3), a research phase (search, Ti/Ab screening, Full-text Screening), or a unique group of citations (benchmarking articles, articles from a previous review, articles with a specific author affiliation). 31 | 32 | **Record Merging** 33 | 34 | The CiteSource deduplication process is better described as a record merging process due to the fact that the customizable metadata from duplicate records is maintained through the creation of a single, primary record. Beyond the merging of customizable metadata, the primary record is created by using the most complete metadata available between duplicate records (currently DOI and Abstract fields).The ASySD package, developed by Kaitlyn Hair, serves as the backbone of this process. 35 | 36 | **Table and Plot Visualizations** 37 | 38 | Once records are deduplicated, users are able to easily create plots and tables to answer specific questions or to simply explore the data in an effort to develop new hypotheses. Examples of analysis may include how many unique records a specific source contributed or how traditional methods of searching fare against a new AI discovery tool in finding relevant articles. Users may want to understand the overlap in records between two different search strings or evaluate the impact of including Google Scholar in a review. Before searching, a user may even develop a targeted search to better understand the topical coverage across databases that they intend to search, and once the search has been developed, how a particular source, string, or method performed in discovering benchmarking articles. 39 | 40 | **Exporting and Re-importing Data** 41 | 42 | Once records have been processed, users are able to export data in .csv, .ris, and .bib formats. Furthermore, users are able to reimport .csv and .ris files in order to recreate plots and tables. 43 | 44 | ## Getting Started 45 | **Installation** 46 | 47 | Install CiteSource in R with remotes::install_github("ESHackathon/CiteSource") 48 | 49 | **Vignettes** 50 | 51 | Vignettes covering various use cases can be found on the [CiteSource web page](https://www.eshackathon.org/CiteSource/). 52 | 53 | ## Feedback 54 | 55 | Be sure to check out [our discussion page](https://github.com/ESHackathon/CiteSource/discussions) to engage with us or to learn more about the various use cases for CiteSource. You can provide comments/suggestions or suggest a vignette for a specific use case. 56 | 57 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: http://www.eshackathon.org/CiteSource/ 2 | template: 3 | bootstrap: 5 4 | 5 | articles: 6 | - title: Vignettes 7 | navbar: ~ 8 | contents: 9 | - citesource_working_example 10 | - citesource_benchmark_testing 11 | - citesource_new_benchmark_testing 12 | - citesource_vignette_db-pre-screen_validation 13 | - citesource_vignette_db-topic-coverage 14 | - citesource_analysis_across_screening_phases 15 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 1 note 4 | 5 | * This is a new release. 6 | -------------------------------------------------------------------------------- /inst/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/inst/.DS_Store -------------------------------------------------------------------------------- /inst/extdata/examplecitations.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/inst/extdata/examplecitations.rds -------------------------------------------------------------------------------- /inst/shiny-app/CiteSource/google_analytics_dev.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /inst/shiny-app/CiteSource/google_analytics_main.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /inst/shiny-app/CiteSource/www/CS.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/inst/shiny-app/CiteSource/www/CS.gif -------------------------------------------------------------------------------- /inst/shiny-app/CiteSource/www/about.md: -------------------------------------------------------------------------------- 1 | ## About CiteSource 2 | 3 | 4 | 5 | 6 | CiteSource is an R package and accompanying Shiny web application designed to support evidence data-driven decision-making during search strategy development. CiteSource also allows users to analyze and report on the impact of information sources and search methods. 7 | 8 | CiteSource was developed as part of the [Evidence Synthesis Hackathon](https://www.eshackathon.org/) initiative. 9 | 10 | --- 11 | 12 | ### Key Features: 13 | 14 |
15 | Flexible Metadata for Provenance Tracking: 16 | 17 | > * A core strength of CiteSource is its ability to assign and retain custom metadata to track the *provenance* of each citation – precisely where and how it was found. Users can tag records using three key fields: 18 | > * `cite_source`: Identify the origin database ('Web of Science', 'Scopus'), platform ('Lens.org'), or the specific search method used ('Citation Searching', 'String_1'). 19 | > * `cite_label`: Track citations through screening phases using standardized terms: `search` (for initial results, benchmarks), `screened` (for records passing title/abstract review), and `final` (for records included in the synthesis after full-text review). 20 | > * `cite_string`: Add further detail, such as variations in search string syntax tested ('String_1a_truncation'), specific supplementary methods ('Handsearching_JournalX'), or other custom categories relevant to analysis. 21 | > * This detailed tagging enables rigorous analysis of the performance and contribution of each component of your overall search strategy. 22 | 23 |
24 | 25 |
26 | Advanced Deduplication & Intelligent Merging: 27 | 28 | > * CiteSource employs the [`ASySD` (Automated Systematic Search Deduplicator) R package](https://github.com/camaradesuk/ASySD) to perform robust identification and merging of duplicate records. 29 | > * It conducts both *internal deduplication* (identifying duplicates within a single uploaded file/source, resulting in *distinct* records) and *external deduplication* (identifying duplicates across all uploaded files/sources, resulting in the set of *unique* citations). 30 | > * The process uses *intelligent merging*: custom metadata tags (`source`, `label`, `string`) from all identified duplicates are combined onto the primary record, preserving the full discovery history. 31 | > * The most complete bibliographic data (prioritizing DOI, Abstract) across duplicates is retained in the primary record. 32 | > * An optional *manual review* stage presents potential duplicates that fall below the automatic matching threshold, allowing users to confirm or reject merges for maximum accuracy. 33 | 34 |
35 | 36 |
37 | Data-Driven Analysis & Visualization: 38 | 39 | > * Once deduplication is complete, CiteSource offers a suite of analysis and visualization tools designed specifically to speed up the *iterative process* of developing, testing, and validating search strategies: 40 | > * Visualize Overlap: Use interactive **Heatmaps** (pairwise overlap) and **Upset Plots** (multi-set intersections) to understand shared and unique records across sources, labels, or strings. 41 | > * Track Phase Progression: Employ the **Phase Analysis plot** (bar chart) to see the flow of unique and duplicate records through screening stages (`search` -> `screened` -> `final`). 42 | > * Generate Summary Tables: Access quantitative insights via automated tables detailing: 43 | > * Initial Record counts (showing the impact of internal deduplication). 44 | > * Record Summaries (detailing unique/overlapping records contributed by each source/method). 45 | > * Precision/Sensitivity calculations (evaluating source/method performance against the `final` included set). 46 | > * A detailed, interactive **Record Level Table** for quickly examining and linking to citations . 47 | 48 |
49 | 50 |
51 | Enhanced Reporting & Transparent Export: 52 | 53 | > * CiteSource facilitates *transparent reporting* of search methods and results, aligning with guidelines like PRISMA. 54 | > * Export your final, deduplicated dataset in standard bibliographic formats (`.csv`, `.ris`, `.bib`). 55 | > * The custom metadata is embedded directly into standard fields within the export files (e.g., using C1, C2, C7, C8, DB fields in `.ris` format), providing a clear and reproducible audit trail for your methodology. 56 | 57 |
58 | 59 | --- 60 | 61 | ### Why use CiteSource for Evidence Synthesis? 62 | 63 | CiteSource is built for anyone involved in evidence synthesis. It helps you: 64 | 65 | * **Evaluate and optimize** information source selection based on unique record contributions. 66 | * **Refine and optimize** search strings by quickly testing variations. 67 | * **Analyze and report** the added value of different search methods, including supplementary searching techniques. 68 | * **Perform benchmark testing** to ensure key articles are captured by your strategy. 69 | * **Increase transparency and effectiveness** of your search strategy and processess through built-in tables for reporting. 70 | * **Save time** during the iterative search development. 71 | 72 | ### What Other Applications does CiteSource Serve ? 73 | 74 | * Training in evidence synthesis search methods - MLIS classroom use for skill/knowledge development. 75 | * Methods research & development - large-scale methods testing, quick/live updates to analysis. 76 | * Library collection development - analyzing coverage of new databases compared to current subscriptions. 77 | 78 | --- 79 | 80 | *CiteSource is available both as this interactive Shiny application and as a full R package with detailed vignettes. For more information on the R package, visit the [CiteSource Website](https://www.eshackathon.org/CiteSource/).* -------------------------------------------------------------------------------- /inst/shiny-app/CiteSource/www/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/inst/shiny-app/CiteSource/www/favicon.png -------------------------------------------------------------------------------- /inst/shiny-app/CiteSource/www/use-cases.md: -------------------------------------------------------------------------------- 1 | ## CiteSource Use Cases: Overview 2 | 3 | 4 | 5 | CiteSource provides a suite of tools to support data-driven decision-making throughout the evidence synthesis process. Its applications generally fall into two main categories: **Optimizing Search Strategies** (typically during protocol development and iterative searching) and **Analyzing Search Impact** (often after screening is complete, for reporting and methodological insights). Additional applications extend to training and resource management. 6 | 7 | --- 8 | 9 | ### I. Optimizing Search Strategies 10 | 11 | CiteSource enables researchers to move beyond reliance on experience or potentially outdated guidance by providing empirical data specific to their project *during* the search development phase. 12 | 13 |
14 | Information Source/Method Selection & Optimization 15 | 16 | > Choosing the most effective and efficient set of databases, platforms, or indexes (e.g., Web of Science, Scopus, ASFA, Dimensions, OATD) can be challenging, especially for interdisciplinary topics where overlap and unique contributions are unknown. CiteSource addresses this by allowing users to empirically compare potential sources *before* committing significant time. After uploading initial search results and tagging them using the `cite_source` field (e.g., `Web of Science`, `Scopus`), deduplication the overlapping and unique records across sources and methods. This analysis enables informed, data-driven decisions about which sources and methods provide the best return on investment and helps optimize the selection, potentially reducing redundancy. Key CiteSource features used include: 17 | > * Tagging records with `cite_source` metadata. 18 | > * Robust internal and external deduplication (using `ASySD`). 19 | > * Visualization of overlap using interactive **Heatmaps** and **Upset Plots**. 20 | > * Quick analysis of individual citations using the interactive **Record Level Table** 21 | 22 |
23 | 24 |
25 | Search String Development & Optimization 26 | 27 | > Developing effective search strings is an iterative process involving testing terms, syntax variations, Boolean logic, proximity operators, field codes, etc. Comparing the impact of these subtle changes across potentially multiple databases is time-consuming. CiteSource assists by streamlining the analysis of string effectiveness. Users can upload results from different string variations, tag them using `cite_source` and `cite_string` (e.g., `String_1`, `String_2_proximity`), and visualize the impact on retrieval after deduplication. This allows for rapid assessment of how changes affect results, speeding up refinement for an optimal balance of sensitivity and precision and helping identify errors in logic or syntax. CiteSource facilitates this via: 28 | > * Tagging result sets with `cite_source` and `cite_string` metadata. 29 | > * Deduplication to compare results accurately. 30 | > * Visualization of unique/overlapping records retrieved by different strings using **Upset Plots**. 31 | > * Quick examination of individual citations using the interactive **Record Level Table**. 32 | 33 |
34 | 35 |
36 | Benchmark Testing 37 | 38 | > Ensuring a search strategy retrieves known, key relevant articles (benchmark or reference articles) is crucial for assessing sensitivity. CiteSource facilitates this by comparing search results against a predefined benchmark set. After uploading search results and the benchmark set (tagging each appropriately using `cite_source`, `cite_string`, and `cite_label`), deduplication allows for direct comparison. This provides a quantitative assessment of how well different strings or sources capture the benchmark articles, highlighting potential weaknesses or indexing gaps. While benchmark sets should be used cautiously due to potential bias, this process aids refinement and is useful for updates or developing standardized protocols. Key functionalities include: 39 | > * Tagging benchmark sets and search results distinctly using `cite_source` and `cite_label`. 40 | > * Deduplication to identify matches between search results and the benchmark set. 41 | > * Using **Upset Plots** to visualize captured vs. missed benchmark articles across different strings/sources. 42 | > * Investigating specific missed articles using the interactive **Record Level Table**. 43 | 44 |
45 | 46 |
47 | Efficient Iterative Testing (Overarching Benefit) 48 | 49 | > The entire process of testing variations in sources, strings, and methods is inherently iterative. CiteSource significantly compresses this cycle by providing rapid analysis and visualization (**Heatmaps**, **Upset Plots**, **Summary Tables**) immediately after deduplication. This drastic reduction in the time needed to evaluate the impact of each iteration saves researcher time and allows for more thorough testing and validation, leading to a more optimized and well-documented strategy. 50 | 51 |
52 | 53 | --- 54 | 55 | ### II. Analyzing Search Impact (Post-Screening / Reporting) 56 | 57 | After screening is complete, CiteSource analyzes the *actual* contribution of different search components to the final set of included studies and enhances reporting. 58 | 59 |
60 | Analyzing Information Source & Search Method Contribution 61 | 62 | > Understanding which sources or methods were most effective in identifying the studies ultimately included in the synthesis is crucial for methodological reflection and reporting. CiteSource enables this analysis by tracking records through screening phases. By tagging records with `cite_source`/`cite_string` and progressively updating the `cite_label` (`search` -> `screened` -> `final`), users can quantify the "true impact" or ROI of each component. This identifies high-yield sources/methods versus those contributing mostly irrelevant records, providing valuable data for reporting and future strategy refinement. Analysis tools include: 63 | > * Tracking records using `cite_source`, `cite_string`, and `cite_label` tags. 64 | > * Visualizing the flow through screening stages with the **Bar Chart (Phase Analysis Plot)**. 65 | > * Quantifying performance using the **Precision/Sensitivity Table** (calculating precision and recall against the `final` set). 66 | > * Examining contributions at each stage using the **Record Summary Table**. 67 | 68 |
69 | 70 |
71 | Enhanced Reporting & Transparency 72 | 73 | > Reporting guidelines like PRISMA require transparent and detailed documentation of the search process. CiteSource directly supports this by generating clear outputs and ensuring provenance is maintained. The plots and tables offer visual and quantitative summaries of the search process, outcomes, and source/method contributions. Furthermore, exporting the final dataset embeds the custom metadata tags (`cite_source`, `cite_label`, `cite_string`) into standard bibliographic fields (e.g., C1, C2, C7, C8, DB in `.ris` format), providing a clear, reproducible audit trail. This enhances transparency and allows reviewers/readers to scrutinize the methodology effectively. Key outputs for reporting include: 74 | > * Ready-to-use **plots** (**Heatmaps**, **Upset Plots**, **Bar Charts**). 75 | > * Summary **tables** (**Initial Record**, **Record Summary**, **Precision/Sensitivity**, **Record Level**). 76 | > * Exported datasets (`.csv`, `.ris`, `.bib`) with embedded provenance metadata. 77 | 78 |
79 | 80 | --- 81 | 82 | ### III. Broader Applications 83 | 84 | Beyond individual reviews, CiteSource has wider utility: 85 | 86 |
87 | Training & Education 88 | 89 | > CiteSource serves as an effective training tool for evidence synthesis methods. Its interactive visualizations provide a hands-on way for students and early-career researchers to understand abstract concepts like database overlap, string variation impacts, and benchmark testing. Instructors can use it to demonstrate best practices in real-time, building practical skills and competence in systematic searching. 90 | 91 |
92 | 93 |
94 | Library Collection Development 95 | 96 | > Librarians can leverage CiteSource to support collection development decisions. By analyzing search results from institutional researchers or targeted test searches, they can gain empirical data on database coverage and overlap for specific research topics. This helps justify subscription costs, compare existing resources with potential new ones, and make effective recommendations based on demonstrated value and uniqueness. 97 | 98 |
99 | 100 |
101 | Methodological Research 102 | 103 | > CiteSource facilitates methodological research on searching itself. When researchers use the tool and report their quantitative findings on source/method performance (e.g., precision/sensitivity, unique contributions), they contribute valuable empirical data to the wider evidence synthesis community. Aggregating such findings across studies can inform the development and refinement of evidence-based search guidelines and best practices, potentially supporting "Studies Within A Review" (SWAR) focused on search methodology. 104 | 105 |
106 | 107 | --- -------------------------------------------------------------------------------- /inst/shiny-app/CiteSource/www/user_guide.md: -------------------------------------------------------------------------------- 1 | ## CiteSource User Guide 2 | 3 | 4 | 5 | > CiteSource has a number of applications. This guide walks users through the step-wise process of uploading, deduplicating and analyzing data > within the shiny application. For step by step instructions for running CiteSource in R, [check out our vignettes](https://www.eshackathon.org/CiteSource/articles/) 6 | --- 7 | 8 | ### Using CiteSource: Step-by-Step 9 | 10 |
11 | Step 1: File Upload, Labeling, & Re-importing 12 | 13 | > **Standard Upload:** 14 | > 15 | > * Navigate to the 'File upload' tab. 16 | > * Use the 'Set Label for Uploaded File(s)' dropdown to select the appropriate stage for the file(s) you are about to upload (e.g., `search`, `screened`, or `final`). This label helps organize records, especially for phase analysis and some summary tables. 17 | > * Click the file input area ('Browse...') to select one or more citation files from your computer. Supported formats are `.ris`, `.bib`, and `.txt`. 18 | > * The label you selected will be applied to all citation records within the file(s) uploaded in that specific action. 19 | > 20 | > **Re-importing Previously Processed Data:** 21 | > 22 | > * If you have previously exported data from CiteSource as a `.ris` or `.csv` file (these exported files contain special `cite_` columns), you can re-upload this file directly. 23 | > * On the 'File upload' tab, use the 'OR: Re-upload an .ris or .csv exported from CiteSource' file input. 24 | > * This bypasses the initial upload processing and deduplication steps (Steps 3 & 4), allowing you to proceed directly to the 'Visualise' and 'Tables' tabs with your previously processed data. 25 | > 26 | > * **NOTE**: Raw citation exports from some platforms (e.g.OVID) may be incompatible due to abnormal .ris field use or structuring. If you are having issues, please be sure to try importing them using citation software (e.g. Zotero, EndNote) and exporting them before uploading to CiteSource.* 27 | 28 |
29 | 30 |
31 | Step 2: Review Uploads & Edit 32 | 33 | > * After uploading citations, a summary table appears in the main panel showing each file, its detected record count, and the assigned source name, label, and string. 34 | > * To correct the auto-assigned source name, or to change the label or string for *all* records from a specific file after upload, you can double-click the corresponding cell in the table and type the new value. 35 | 36 |
37 | 38 |
39 | Step 3: Automated Deduplication 40 | 41 | > * Navigate to the 'Deduplicate' tab and ensure you are on the 'Automated deduplication' sub-tab. 42 | > * Click the 'Find duplicates' button. 43 | > * CiteSource will process all the records you've uploaded. It compares metadata fields (like DOI, title, authors, journal, year, volume, pages) to identify potential duplicates both *within* the same source file (internal deduplication) and *across* different source files (external deduplication). 44 | > * A pop-up message will summarize the results, indicating the number of unique records found and if any potential duplicates require manual review. 45 | 46 |
47 | 48 |
49 | Step 4: Manual Deduplication (If Needed) 50 | 51 | > * If the summary message from Step 3 indicates potential duplicates need review, or if you want to manually inspect potential matches, go to the 'Manual deduplication' sub-tab. 52 | > * Pairs of records identified as potential duplicates are displayed. Each row represents a pair, showing selected metadata side-by-side (e.g., Title 1 vs. Title 2). 53 | > * Use the 'Choose columns' filter dropdown (filter icon) above the table to select which metadata fields (e.g., author, year, abstract) you want to see for comparison. 54 | > * Carefully review each pair. If you determine a pair represents the *same* underlying citation, click on that row to select it. 55 | > * After selecting all rows that are true duplicates, click the 'Remove additional duplicates' button (this button only appears after you select at least one row). This merges the selected pairs, keeping only one unique record with combined metadata. 56 | > * If you finish reviewing or decide no manual merging is needed, click 'Go to visualisations'. 57 | 58 |
59 | 60 |
61 | Step 5: Visualise Overlap 62 | 63 | > * Navigate to the 'Visualise' tab. 64 | > * Use the sidebar controls to tailor the analysis: 65 | > * **Choose comparison type:** Select whether you want to compare overlap based on 'sources' (original files/databases), 'labels' (e.g., search vs screened), or 'strings' (if used). 66 | > * **Filter data:** Select specific sources, labels, or strings to include in the visualizations. 67 | > * Explore the generated plots: 68 | > * **Heatmap:** This matrix shows pairwise overlap. Each cell represents the number of citations shared between two groups (the groups depend on your chosen comparison type). Darker cells indicate higher overlap. Hover over cells to see exact counts. It helps quickly identify pairs with significant commonality. 69 | > * **Upset Plot:** This plot visualizes intersections among multiple groups simultaneously. The large bottom bar chart shows the number of citations unique to specific combinations of groups (e.g., found only in Source A, or found in both Source A and B but not C). The smaller top bar chart shows the total number of unique citations in each individual group. It's excellent for understanding complex overlap patterns involving more than two groups. 70 | > * **Phase Analysis Plot:** This plot is most useful when comparing by 'labels' representing stages (e.g., `search`, `screened`, `final`). It shows the total number of records at each stage, broken down into those that are unique (first identified at that stage) versus those that were already found in a previous stage (duplicates relative to earlier stages). It helps visualize the yield and deduplication effectiveness across a review workflow. 71 | > * Use the 'Download' buttons above each plot to save them as image files. 72 | 73 |
74 | 75 |
76 | Step 6: Summary Tables & Record Review 77 | 78 | > * Navigate to the 'Tables' tab. 79 | > * Use the sidebar filters (Sources, Labels, Strings) to select the subset of data you want summarized. 80 | > * Generate specific summary tables by clicking the corresponding 'Generate...' button: 81 | > * **Initial Records Table:** Provides a high-level count based on the earliest phase (typically records labeled `search`). Shows the total uploaded records for that phase. This table distinguishes between the number of uploaded records andduplicates found *within* the each source file. 82 | > * **Detailed Record Table:** Breaks down the citation counts by individual source/method (within your selected filters). For each set of records, it shows how many citations were unique to that set and how many were also found in other sets. This helps identify which sources/methods contributed the most unique records and which have a high level of overlap. 83 | > * **Precision/Sensitivity Table:** Calculates performance metrics, requiring data labeled as `final` to be present and selected. It compares each source, method, or search string against this 'final' set. 'Precision' tells you what proportion of records retrieved by a source were actually relevant ('final' records). 'Sensitivity' (or Recall) tells you what proportion of all relevant ('final') records were found by that specific source. Useful for evaluating search strategy performance. 84 | > * **Review individual records:** Click 'Generate the table' on the "Review individual records" sub-tab to view the detailed, deduplicated citation list. This table may take a while to load if you have a large number of records. 85 | > 86 | > **Using the Interactive Record Table:** 87 | > 88 | > * **Expand/Collapse Row:** Click the `⊕` symbol in a row to view the full APA reference. Click `⊖` to hide it again. 89 | > * **Sort by Single Column:** Click any column header (like 'Citation' or a source name) to sort the table by that column's values. Click the header again to reverse the sort order. 90 | > * **Sort by Multiple Columns:** Click the primary column header you want to sort by. Then, hold down the **Shift** key on your keyboard and click a second column header. You can repeat this for more sorting levels. 91 | > * **Filter/Search:** Type into the search box located at the top-right of the table to dynamically filter records based on any information displayed. 92 | > * **Download Data:** Click the 'Download CSV' button (located above the table, next to 'Print') to save the data currently shown in the table (including applied filters) as a CSV file. 93 | 94 |
95 | 96 |
97 | Step 7: Export Results 98 | 99 | > * Navigate to the 'Export' tab. 100 | > * This tab becomes active after you have run the deduplication process (Step 3). 101 | > * Click the button corresponding to your desired file format: 'Download csv', 'Download RIS', or 'Download BibTex'. 102 | > * The custom metadata is embedded directly into fields within the export files (e.g., using C1, C2, C7, C8, DB fields in `.ris` format) 103 | > * This will save the final dataset of unique citations (after both automated and any manual deduplication). 104 | > * **Note:** Only `.csv` and `.ris` files can be re-imported later. 105 | 106 |
107 | 108 | --- -------------------------------------------------------------------------------- /man/CiteSource-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CiteSource.R 3 | \docType{package} 4 | \name{CiteSource-package} 5 | \alias{CiteSource} 6 | \alias{CiteSource-package} 7 | \title{CiteSource: A package to compare sources of citation records} 8 | \description{ 9 | The CiteSource package supports evidence aggregation by helping with the 10 | processing of results of various searches in different sources. It allows to 11 | deduplicate results while retaining meta-data on where those results were 12 | found and then enables users to compare the contribution of different sources 13 | } 14 | \seealso{ 15 | Useful links: 16 | \itemize{ 17 | \item \url{https://www.eshackathon.org/CiteSource} 18 | \item Report bugs at \url{https://github.com/ESHackathon/CiteSource/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: Trevor Riley \email{trevor.riley@noaa.gov} (\href{https://orcid.org/0000-0002-6834-9802}{ORCID}) 24 | 25 | Authors: 26 | \itemize{ 27 | \item Kaitlyn Hair \email{kaitlyn.hair@ed.ac.uk} (\href{https://orcid.org/0000-0003-0180-7343}{ORCID}) 28 | \item Lukas Wallrich \email{lukas.wallrich@gmail.com} (\href{https://orcid.org/0000-0003-2121-5177}{ORCID}) 29 | \item Matthew Grainger \email{matthewjamesgrainger@gmail.com} (\href{https://orcid.org/0000-0001-8426-6495}{ORCID}) 30 | \item Sarah Young \email{sarahy@andrew.cmu.edu} (\href{https://orcid.org/0000-0002-8301-5106}{ORCID}) 31 | \item Chris Pritchard \email{chris.pritchard@ntu.ac.uk} (\href{https://orcid.org/0000-0002-1143-9751}{ORCID}) 32 | \item Neal Haddaway \email{nealhaddaway@gmail.com} (\href{https://orcid.org/0000-0003-3902-2234}{ORCID}) 33 | } 34 | 35 | Other contributors: 36 | \itemize{ 37 | \item Martin Westgate (Author of included synthesisr fragments) [copyright holder] 38 | \item Eliza Grames (Author of included synthesisr fragments) [copyright holder] 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/calculate_detailed_records.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/new_count_and_table.R 3 | \name{calculate_detailed_records} 4 | \alias{calculate_detailed_records} 5 | \title{Calculate Detailed Record Counts} 6 | \usage{ 7 | calculate_detailed_records( 8 | unique_citations, 9 | n_unique, 10 | labels_to_include = NULL 11 | ) 12 | } 13 | \arguments{ 14 | \item{unique_citations}{A data frame containing unique citations. 15 | The data frame must include the columns \code{cite_source}, \code{cite_label}, and \code{duplicate_id}.} 16 | 17 | \item{n_unique}{A data frame containing counts of unique records, typically filtered 18 | by specific criteria (e.g., \code{cite_label == "search"}).} 19 | 20 | \item{labels_to_include}{An optional character vector of labels to filter the citations. 21 | If provided, only citations matching these labels will be included in the counts. 22 | if 'NULL' all labels are included. Default is 'NULL'.} 23 | } 24 | \value{ 25 | A data frame with detailed counts for each citation source, including: 26 | \itemize{ 27 | \item \verb{Records Imported}: Total number of records imported. 28 | \item \verb{Distinct Records}: Number of distinct records after deduplication. 29 | \item \verb{Unique Records}: Number of unique records specific to a source. 30 | \item \verb{Non-unique Records}: Number of records found in other sources. 31 | \item \verb{Source Contribution \%}: Percentage contribution of each source to the total distinct records. 32 | \item \verb{Source Unique Contribution \%}: Percentage contribution of each source to the total unique records. 33 | \item \verb{Source Unique \%}: Percentage of unique records within the distinct records for each source. 34 | } 35 | } 36 | \description{ 37 | This function processes a dataset and expands the 'cite_source' column, filters on 38 | user-specified labels (if provided), and calculates detailed counts such as the records imported, 39 | distinct records, unique records, non-unique records, and several percentage contributions for 40 | each citation source/method it also adds a total row summarizing these counts. 41 | } 42 | \details{ 43 | The function first checks if the required columns are present in the input data frames. 44 | It then expands the \code{cite_source} column, filters the data based on the provided labels (if any), 45 | and calculates various counts and percentages for each citation source. The function also adds 46 | a total row summarizing these counts across all sources. 47 | } 48 | \examples{ 49 | # Example usage with a sample dataset 50 | unique_citations <- data.frame( 51 | cite_source = c("Source1, Source2", "Source2", "Source3"), 52 | cite_label = c("Label1", "Label2", "Label1"), 53 | duplicate_id = c(1, 2, 3) 54 | ) 55 | n_unique <- data.frame( 56 | cite_source = c("Source1", "Source2", "Source3"), 57 | cite_label = c("search", "search", "search"), 58 | unique = c(10, 20, 30) 59 | ) 60 | calculate_detailed_records(unique_citations, n_unique, labels_to_include = "search") 61 | } 62 | -------------------------------------------------------------------------------- /man/calculate_initial_records.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/new_count_and_table.R 3 | \name{calculate_initial_records} 4 | \alias{calculate_initial_records} 5 | \title{Calculate Initial Records Unique Citations} 6 | \usage{ 7 | calculate_initial_records(unique_citations, labels_to_include = NULL) 8 | } 9 | \arguments{ 10 | \item{unique_citations}{A data frame containing the unique citations. 11 | It must contain the columns \code{cite_source}, \code{cite_label}, and \code{duplicate_id}.} 12 | 13 | \item{labels_to_include}{An optional character vector of labels to filter the citations. 14 | If provided, only citations matching these labels will be included in the counts. 15 | Default is NULL, meaning no filtering will be applied.} 16 | } 17 | \value{ 18 | A data frame containing the counts of \verb{Records Imported} and \verb{Distinct Records} 19 | for each citation source. The data frame also includes a "Total" row summing 20 | the counts across all sources. 21 | } 22 | \description{ 23 | This function processes a dataset of unique citations, expands the \code{cite_source} column, 24 | filters based on user-specified labels (if provided), and then calculates the number 25 | of records imported and distinct records for each citation source. It also adds a 26 | total row summarizing these counts. 27 | } 28 | \details{ 29 | The function first checks if the required columns are present in the input data frame. 30 | It then expands the \code{cite_source} column to handle multiple sources listed in a 31 | single row and filters the dataset based on the provided labels (if any). 32 | The function calculates the number of records imported (total rows) and the number 33 | of distinct records (unique \code{duplicate_id} values) for each citation source. 34 | Finally, a total row is added to summarize the counts across all sources. 35 | } 36 | \examples{ 37 | # Example usage with a sample dataset 38 | unique_citations <- data.frame( 39 | cite_source = c("Source1", "Source2", "Source3"), 40 | cite_label = c("Label1", "Label2", "Label3"), 41 | duplicate_id = c(1, 2, 3) 42 | ) 43 | calculate_initial_records(unique_citations) 44 | } 45 | -------------------------------------------------------------------------------- /man/calculate_phase_count.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count.R 3 | \name{calculate_phase_count} 4 | \alias{calculate_phase_count} 5 | \title{Calculate phase counts, precision, and recall} 6 | \usage{ 7 | calculate_phase_count(unique_citations, citations, db_colname) 8 | } 9 | \arguments{ 10 | \item{unique_citations}{A dataframe containing unique citations with phase information. 11 | The phase information must be provided in a column named 'cite_label' in the dataframe.} 12 | 13 | \item{citations}{A dataframe containing all citations with phase information. The phase 14 | information must be provided in a column named 'cite_label' in the dataframe.} 15 | 16 | \item{db_colname}{The name of the column representing the source database.} 17 | } 18 | \value{ 19 | A dataframe containing distinct counts, counts for different phases, precision, 20 | and recall for each source, as well as totals. 21 | } 22 | \description{ 23 | This function calculates counts for different phases and calculates precision and recall 24 | for each source based on unique citations and citations dataframe. The phases should be 25 | labeled as 'screened' and 'final' (case-insensitive) in the input dataframes. The function 26 | will give a warning if these labels are not present in the input dataframes. 27 | } 28 | \details{ 29 | The function will give a warning if 'screened' and 'final' labels are not present 30 | in the 'cite_label' column of the input dataframes. 31 | } 32 | \examples{ 33 | unique_citations <- data.frame( 34 | db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"), 35 | cite_label = c("screened", "final", "screened", "final", "screened", "final"), 36 | duplicate_id = c(102, 102, 103, 103, 104, 104), 37 | other_data = 1:6 38 | ) 39 | 40 | citations <- data.frame( 41 | db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"), 42 | cite_label = c("screened", "final", "screened", "final", "screened", "final"), 43 | other_data = 7:12 44 | ) 45 | 46 | result <- calculate_phase_count(unique_citations, citations, "db_source") 47 | result 48 | } 49 | -------------------------------------------------------------------------------- /man/calculate_phase_records.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/new_count_and_table.R 3 | \name{calculate_phase_records} 4 | \alias{calculate_phase_records} 5 | \title{Calculate Phase Counts with Precision and Recall} 6 | \usage{ 7 | calculate_phase_records(unique_citations, n_unique, db_colname) 8 | } 9 | \arguments{ 10 | \item{unique_citations}{A data frame containing unique citations. 11 | It must include the columns \code{cite_source}, \code{cite_label}, and \code{duplicate_id}.} 12 | 13 | \item{n_unique}{A data frame containing counts of unique records. 14 | Typically filtered by specific criteria, such as \code{cite_label == "search"}.} 15 | 16 | \item{db_colname}{The name of the column representing the citation source 17 | in the \code{unique_citations} data frame.} 18 | } 19 | \value{ 20 | A data frame with phase counts and calculated precision and recall 21 | for each citation source, including: 22 | \itemize{ 23 | \item \verb{Distinct Records}: The count of distinct records per source. 24 | \item \code{screened}: The count of records in the "screened" phase. 25 | \item \code{final}: The count of records in the "final" phase. 26 | \item \code{Precision}: The precision metric calculated as \verb{final / Distinct Records}. 27 | \item \code{Recall}: The recall metric calculated as \verb{final / Total final records}. 28 | } 29 | } 30 | \description{ 31 | This function calculates the distinct record counts, as well as screened 32 | and final record counts, for each citation source across different phases 33 | (e.g., "screened", "final"). It also calculates precision and recall metrics 34 | for each source. 35 | } 36 | \details{ 37 | The function starts by calculating the total distinct records, as well as 38 | the total "screened" and "final" records across all sources. It then 39 | calculates distinct counts for each source, followed by counts for "screened" 40 | and "final" records. Finally, it calculates precision and recall metrics and 41 | adds a total row summarizing these counts across all sources. 42 | } 43 | \examples{ 44 | # Example usage with a sample dataset 45 | unique_citations <- data.frame( 46 | cite_source = c("Source1", "Source2", "Source3"), 47 | cite_label = c("screened","screened", "final"), 48 | duplicate_id = c(1, 2, 3) 49 | ) 50 | n_unique <- data.frame( 51 | cite_source = c("Source1", "Source2", "Source3"), 52 | unique = c(10, 20, 30) 53 | ) 54 | calculate_phase_records(unique_citations, n_unique, "cite_source") 55 | } 56 | -------------------------------------------------------------------------------- /man/calculate_record_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count.R 3 | \name{calculate_record_counts} 4 | \alias{calculate_record_counts} 5 | \title{Calculate record counts function 6 | Calculate and combine counts of distinct records, imported records, and unique records for each database} 7 | \usage{ 8 | calculate_record_counts(unique_citations, citations, n_unique, db_colname) 9 | } 10 | \arguments{ 11 | \item{unique_citations}{Dataframe. The dataframe for calculating distinct records count.} 12 | 13 | \item{citations}{Dataframe. The dataframe for calculating records imported count.} 14 | 15 | \item{n_unique}{Dataframe. The dataframe for calculating unique records count.} 16 | 17 | \item{db_colname}{Character. The name of the column containing the database source information.} 18 | } 19 | \value{ 20 | A dataframe with counts of distinct records, imported records, and unique records for each source, including total counts and several calculated ratios and percentages. 21 | } 22 | \description{ 23 | This function calculates the counts of distinct records, records imported, and unique records for each database source. 24 | It combines these counts into one dataframe and calculates several ratios and percentages related to the unique and distinct counts. 25 | It also calculates the total for each count type. 26 | } 27 | \examples{ 28 | unique_citations <- data.frame( 29 | db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"), 30 | other_data = 1:6 31 | ) 32 | 33 | citations <- data.frame( 34 | db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"), 35 | other_data = 7:12 36 | ) 37 | 38 | n_unique <- data.frame( 39 | cite_source = c("Database1", "Database2", "Database2", "Database3", "Database3", "Database3"), 40 | cite_label = c("search", "final", "search", "search", "search", "final"), 41 | unique = c(1, 0, 1, 1, 1, 0) 42 | ) 43 | 44 | result <- calculate_record_counts(unique_citations, citations, n_unique, "db_source") 45 | print(result) 46 | } 47 | -------------------------------------------------------------------------------- /man/citation_summary_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tables.R 3 | \name{citation_summary_table} 4 | \alias{citation_summary_table} 5 | \title{Contribution summary table} 6 | \usage{ 7 | citation_summary_table( 8 | citations, 9 | comparison_type = "sources", 10 | search_label = "search", 11 | screening_label = "final", 12 | top_n = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{citations}{A deduplicated tibble as returned by \code{dedup_citations()}.} 17 | 18 | \item{comparison_type}{Either "sources" to summarise and assess sources or "strings" to consider strings.} 19 | 20 | \item{search_label}{One or multiple labels that identify initial search results (default: "search") - if multiple labels are provided, they are merged.} 21 | 22 | \item{screening_label}{One or multiple label that identify screened records (default: "final") - if multiple are provided, each is compared to the search stage.} 23 | 24 | \item{top_n}{Number of sources/strings to display, based on the number of total records they contributed at the search stage. Note that calculations and totals will still be based on all citations. Defaults to NULL, then all sources/strings are displayed.} 25 | } 26 | \value{ 27 | A tibble containing the contribution summary table, which shows the contribution of each source and the overall performance of the search 28 | } 29 | \description{ 30 | Create a summary table to show the contribution of each source and the overall performance of the search. For this to work, 31 | labels need to be used that contrast a "search" stage with one or more later stages. 32 | } 33 | \examples{ 34 | if (interactive()) { 35 | # Load example data from the package 36 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 37 | examplecitations <- readRDS(examplecitations_path) 38 | 39 | # Deduplicate citations and compare sources 40 | unique_citations <- dedup_citations(examplecitations) 41 | 42 | unique_citations |> 43 | dplyr::filter(stringr::str_detect(cite_label, "final")) |> 44 | record_level_table(return = "DT") 45 | citation_summary_table(unique_citations, screening_label = c("screened", "final")) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /man/compare_sources.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare.R 3 | \name{compare_sources} 4 | \alias{compare_sources} 5 | \title{Compare duplicate citations across sources, labels, and strings} 6 | \usage{ 7 | compare_sources( 8 | unique_data, 9 | comp_type = c("sources", "strings", "labels"), 10 | include_references = FALSE 11 | ) 12 | } 13 | \arguments{ 14 | \item{unique_data}{from ASySD, merged unique rows with duplicate IDs} 15 | 16 | \item{comp_type}{Specify which fields are to be included. One or more of "sources", "strings" or "labels" - defaults to all.} 17 | 18 | \item{include_references}{Should bibliographic detail be included in return?} 19 | } 20 | \value{ 21 | dataframe with indicators of where a citation appears, with sources/labels/strings as columns 22 | } 23 | \description{ 24 | Compare duplicate citations across sources, labels, and strings 25 | } 26 | \examples{ 27 | if (interactive()) { 28 | # Load example data from the package 29 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 30 | examplecitations <- readRDS(examplecitations_path) 31 | 32 | # Deduplicate citations and compare sources 33 | dedup_results <- dedup_citations(examplecitations) 34 | compare_sources(unique_citations, comp_type = "sources") 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /man/count_unique.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare.R 3 | \name{count_unique} 4 | \alias{count_unique} 5 | \title{Count number of unique and non-unique citations from different sources, labels, and strings} 6 | \usage{ 7 | count_unique(unique_data, include_references = FALSE) 8 | } 9 | \arguments{ 10 | \item{unique_data}{from ASySD, merged unique rows with duplicate IDs} 11 | 12 | \item{include_references}{Should bibliographic detail be included in return?} 13 | } 14 | \value{ 15 | dataframe with indicators of where a citation appears, with source/label/string as column 16 | } 17 | \description{ 18 | Count number of unique and non-unique citations from different sources, labels, and strings 19 | } 20 | \examples{ 21 | # Load example data from the package 22 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 23 | examplecitations <- readRDS(examplecitations_path) 24 | 25 | # Deduplicate citations 26 | dedup_results <- dedup_citations(examplecitations) 27 | 28 | # Count unique and non-unique citations 29 | count_unique(dedup_results) 30 | } 31 | -------------------------------------------------------------------------------- /man/create_detailed_record_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/new_count_and_table.R 3 | \name{create_detailed_record_table} 4 | \alias{create_detailed_record_table} 5 | \title{Create a Detailed Record Table} 6 | \usage{ 7 | create_detailed_record_table(data) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame containing the detailed counts for each citation source. 11 | The data frame must include the following columns: 12 | \itemize{ 13 | \item \code{Source}: The name of the citation source. 14 | \item \verb{Records Imported}: The total number of records imported from the source. 15 | \item \verb{Distinct Records}: The number of distinct records after deduplication within the source. 16 | \item \verb{Unique Records}: The number of records unique to that source. 17 | \item \verb{Non-unique Records}: The number of records found in at least one other source. 18 | \item \verb{Source Contribution \%}: The percentage contribution of each source to the total distinct records. 19 | \item \verb{Source Unique Contribution \%}: The percentage contribution of each source to the total unique records. 20 | \item \verb{Source Unique \%}: The percentage of records from each source that were unique. 21 | }} 22 | } 23 | \value{ 24 | A \code{gt} table object summarizing the detailed record counts for each citation source. 25 | } 26 | \description{ 27 | This function generates a formatted summary table using the \code{gt} package, 28 | which displays detailed counts for each citation source. The table includes 29 | columns for the number of records imported, distinct records, unique records, 30 | non-unique records, and various contribution percentages. Data from the 31 | function calculate_detailed_records is pre-formatted for this table. 32 | } 33 | \details{ 34 | The function checks for the presence of all required columns in the input data frame. 35 | If any required columns are missing, the function stops and returns an error message 36 | specifying the missing columns. This ensures that the input data is correctly formatted 37 | before attempting to generate the table. 38 | 39 | The generated table includes a header and footnotes that provide additional context 40 | for each column, explaining the meaning of the data presented. 41 | } 42 | \examples{ 43 | # Example usage with a sample dataset 44 | sample_data <- data.frame( 45 | Source = c("Source1", "Source2", "Source3", "Total"), 46 | `Records Imported` = c(100, 150, 250, 500), 47 | `Distinct Records` = c(90, 140, 230, 460), 48 | `Unique Records` = c(50, 70, 120, 240), 49 | `Non-unique Records` = c(40, 70, 110, 220), 50 | `Source Contribution \%` = c("39.1\%", "60.9\%", "100\%", "100\%"), 51 | `Source Unique Contribution \%` = c("41.7\%", "58.3\%", "100\%", "100\%"), 52 | `Source Unique \%` = c("55.6\%", "50\%", "52.2\%", "52.2\%"), 53 | check.names = FALSE 54 | ) 55 | 56 | # Create the detailed record table 57 | create_detailed_record_table(sample_data) 58 | } 59 | -------------------------------------------------------------------------------- /man/create_initial_record_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/new_count_and_table.R 3 | \name{create_initial_record_table} 4 | \alias{create_initial_record_table} 5 | \title{Initial Record Table} 6 | \usage{ 7 | create_initial_record_table(data) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame containing the record counts for each citation source. 11 | It must include columns \code{Source}, \code{Records_Imported}, and \code{Distinct_Records}.} 12 | } 13 | \value{ 14 | A \code{gt} table object summarizing the record counts for each citation source. 15 | } 16 | \description{ 17 | This function generates a formatted table displaying the record counts 18 | for each citation source, including the number of records imported and 19 | the distinct records after deduplication. 20 | } 21 | \details{ 22 | The function checks if the input data frame is empty and returns an empty \code{gt} table 23 | if no data is present. Otherwise, it generates a formatted table with labeled columns 24 | and adds footnotes explaining the meaning of each column. 25 | } 26 | \examples{ 27 | # Example usage with a sample dataset 28 | sample_data <- data.frame( 29 | Source = c("Source1", "Source2", "Source3"), 30 | Records_Imported = c(100, 150, 250), 31 | Distinct_Records = c(90, 140, 230) 32 | ) 33 | create_initial_record_table(sample_data) 34 | } 35 | -------------------------------------------------------------------------------- /man/create_precision_sensitivity_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/new_count_and_table.R 3 | \name{create_precision_sensitivity_table} 4 | \alias{create_precision_sensitivity_table} 5 | \title{Count and Precision/Sensitivity Table} 6 | \usage{ 7 | create_precision_sensitivity_table(data) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame containing phase-specific counts and calculated metrics 11 | for each citation source. It must include columns such as \code{Source}, 12 | \code{Distinct_Records}, \code{final}, \code{Precision}, \code{Recall}, and optionally \code{screened}.} 13 | } 14 | \value{ 15 | A \code{gt} table object summarizing the precision and sensitivity 16 | metrics for each citation source, with relevant footnotes and labels. 17 | } 18 | \description{ 19 | This function generates a formatted table that displays the precision 20 | and sensitivity (recall) metrics for each citation source, along with 21 | distinct records and phase-specific counts such as "screened" and "final". 22 | } 23 | \details{ 24 | The function first checks whether all values in the \code{screened} column are zero. 25 | If so, the column is removed from the table. The table is then generated 26 | using the \code{gt} package, with labeled columns and footnotes explaining the metrics. 27 | } 28 | \examples{ 29 | # Example usage with a sample dataset 30 | sample_data <- data.frame( 31 | Source = c("Source1", "Source2", "Total"), 32 | Distinct_Records = c(100, 150, 250), 33 | final = c(80, 120, 200), 34 | Precision = c(80.0, 80.0, 80.0), 35 | Recall = c(40.0, 60.0, 100.0), 36 | screened = c(90, 140, 230) 37 | ) 38 | create_precision_sensitivity_table(sample_data) 39 | } 40 | -------------------------------------------------------------------------------- /man/dedup_citations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dedup.R 3 | \name{dedup_citations} 4 | \alias{dedup_citations} 5 | \title{Deduplicate citations - ASySD wrapper} 6 | \usage{ 7 | dedup_citations(raw_citations, manual = FALSE, show_unknown_tags = FALSE) 8 | } 9 | \arguments{ 10 | \item{raw_citations}{Citation dataframe with relevant columns} 11 | 12 | \item{manual}{logical. If TRUE, manually specify pairs of duplicates to merge. Default is FALSE.} 13 | 14 | \item{show_unknown_tags}{When a label, source, or other merged field is missing, do you want this to show as "unknown"?} 15 | } 16 | \value{ 17 | unique citations formatted for CiteSource 18 | } 19 | \description{ 20 | This function deduplicates citation data. Note that duplicates are assumed to published 21 | in the same journal, so pre-prints and similar results will not be identified here. 22 | } 23 | \examples{ 24 | # Load example data from the package 25 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 26 | examplecitations <- readRDS(examplecitations_path) 27 | 28 | # Deduplicate citations without manually specifying pairs and without showing unknown tags 29 | dedup_results <- dedup_citations(examplecitations) 30 | 31 | # Deduplicate citations with manual specification of pairs and showing unknown tags 32 | dedup_results_manual_unknown <- dedup_citations( 33 | examplecitations, 34 | manual = TRUE, 35 | show_unknown_tags = TRUE 36 | ) 37 | } 38 | -------------------------------------------------------------------------------- /man/dedup_citations_add_manual.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dedup.R 3 | \name{dedup_citations_add_manual} 4 | \alias{dedup_citations_add_manual} 5 | \title{Remove pairs with manual dedup - ASySD wrapper} 6 | \usage{ 7 | dedup_citations_add_manual(unique_citations, additional_pairs) 8 | } 9 | \arguments{ 10 | \item{unique_citations}{Unique citations post deduplication} 11 | 12 | \item{additional_pairs}{TRUE duplicate pairs} 13 | } 14 | \value{ 15 | unique citations formatted for CiteSource 16 | } 17 | \description{ 18 | This function deduplicates citation data. Note that duplicates are assumed to published 19 | in the same journal, so pre-prints and similar results will not be identified here. 20 | } 21 | \examples{ 22 | # Load example data from the package 23 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 24 | examplecitations <- readRDS(examplecitations_path) 25 | 26 | # Deduplicate citations 27 | dedup_results <- dedup_citations(examplecitations) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /man/detect_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_export_helpers.R 3 | \name{detect_} 4 | \alias{detect_} 5 | \alias{detect_parser} 6 | \alias{detect_delimiter} 7 | \alias{detect_lookup} 8 | \alias{detect_year} 9 | \title{Detect file formatting information} 10 | \usage{ 11 | detect_parser(x) 12 | 13 | detect_delimiter(x) 14 | 15 | detect_lookup(tags) 16 | 17 | detect_year(df) 18 | } 19 | \arguments{ 20 | \item{x}{A character vector containing bibliographic data} 21 | 22 | \item{tags}{A character vector containing RIS tags.} 23 | 24 | \item{df}{a data.frame containing bibliographic data} 25 | } 26 | \value{ 27 | \code{detect_parser} and \code{detect_delimiter} return a length-1 character; \code{detect_year} returns a character vector listing estimated publication years; and \code{detect_lookup} returns a \code{data.frame}. 28 | } 29 | \description{ 30 | Bibliographic data can be stored in a number of different file types, meaning that detecting consistent attributes of those files is necessary if they are to be parsed accurately. These functions attempt to identify some of those key file attributes. Specifically, \code{detect_parser} determines which \code{\link{parse_}} function to use; \code{detect_delimiter} and \code{detect_lookup} identify different attributes of RIS files; and \code{detect_year} attempts to fill gaps in publication years from other information stored in a \code{data.frame}. 31 | } 32 | -------------------------------------------------------------------------------- /man/export_bib.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export.R 3 | \name{export_bib} 4 | \alias{export_bib} 5 | \title{Export deduplicated citations to .bib file} 6 | \usage{ 7 | export_bib( 8 | citations, 9 | filename = "citations.bib", 10 | include = c("sources", "labels", "strings") 11 | ) 12 | } 13 | \arguments{ 14 | \item{citations}{Dataframe with unique citations, resulting from \code{dedup_citations()}} 15 | 16 | \item{filename}{Name (and path) of file, should end in .ris} 17 | 18 | \item{include}{Character. One or more of sources, labels or strings} 19 | } 20 | \description{ 21 | This function saves deduplicated citations as a BibTex file with sources, labels and strings 22 | included in the \code{note} field (if they were initially provided for any of the citations). Therefore, 23 | beware that \strong{any \code{note} field that might be included in \code{citations} will be overwritten}. Also note that 24 | \emph{existing files are overwritten without warning.} 25 | } 26 | \examples{ 27 | if (interactive()) { 28 | # Load example data from the package 29 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 30 | examplecitations <- readRDS(examplecitations_path) 31 | dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE) 32 | export_bib(dedup_results$unique, "cite_sources.bib", include = "sources") 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /man/export_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export.R 3 | \name{export_csv} 4 | \alias{export_csv} 5 | \title{Export deduplicated citations with source data as CSV file} 6 | \usage{ 7 | export_csv( 8 | unique_citations, 9 | filename = "citesource_exported_citations.csv", 10 | separate = NULL, 11 | trim_abstracts = 32000 12 | ) 13 | } 14 | \arguments{ 15 | \item{unique_citations}{Dataframe with unique citations, resulting from \code{dedup_citations()}} 16 | 17 | \item{filename}{Name (and path) of file, should end in .csv} 18 | 19 | \item{separate}{Character vector indicating which (if any) of cite_source, cite_string and cite_label should be split into separate columns to facilitate further analysis.} 20 | 21 | \item{trim_abstracts}{Some databases may return full-text that is misidentified as an abstract. This inflates file size and may lead to issues with Excel, 22 | which cannot deal with more than 32,000 characters per field. Therefore, the default is to trim very long abstracts to 32,000 characters. Set a lower number to reduce file size, or 23 | NULL to retain abstracts as they are.} 24 | } 25 | \value{ 26 | The function saves the deduplicated citations as a CSV file to the specified location. 27 | } 28 | \description{ 29 | This function saves deduplicated citations as a CSV file for further analysis and/or reporting. 30 | Metadata can be separated into one column per source, label or string, which facilitates analysis. 31 | Note that \emph{existing files are overwritten without warning.} 32 | } 33 | \examples{ 34 | if (interactive()) { 35 | # Load example data from the package 36 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 37 | examplecitations <- readRDS(examplecitations_path) 38 | dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE) 39 | export_csv(dedup_results, "cite_sources.csv", separate = "cite_source") 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /man/export_ris.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export.R 3 | \name{export_ris} 4 | \alias{export_ris} 5 | \title{Export data frame to RIS file} 6 | \usage{ 7 | export_ris( 8 | citations, 9 | filename = "citations.ris", 10 | source_field = "DB", 11 | label_field = "C7", 12 | string_field = "C8" 13 | ) 14 | } 15 | \arguments{ 16 | \item{citations}{Dataframe to be exported to RIS file} 17 | 18 | \item{filename}{Name (and path) of file, should end in .ris} 19 | 20 | \item{source_field}{Field in \code{citations} representing the source. Default is "DB".} 21 | 22 | \item{label_field}{Field in \code{citations} representing the label. Default is "C7".} 23 | 24 | \item{string_field}{Field in \code{citations} representing additional string information. Default is "C8".} 25 | } 26 | \description{ 27 | This function saves a data frame as a RIS file with specified columns mapped to RIS fields. Note that 28 | \emph{existing files are overwritten without warning.} 29 | } 30 | \examples{ 31 | if (interactive()) { 32 | # Load example data from the package 33 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 34 | examplecitations <- readRDS(examplecitations_path) 35 | dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE) 36 | export_ris( 37 | dedup_results$unique, 38 | "cite_sources.ris", 39 | user_mapping = list( 40 | "DB" = "cite_source_include", 41 | "C7" = "cite_label_include" 42 | ) 43 | ) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /man/merge_columns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_export_helpers.R 3 | \name{merge_columns} 4 | \alias{merge_columns} 5 | \title{Bind two or more data frames with different columns} 6 | \usage{ 7 | merge_columns(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Either a data.frame or a list of data.frames.} 11 | 12 | \item{y}{A data.frame, optional if x is a list.} 13 | } 14 | \value{ 15 | Returns a single data.frame with all the input data frames merged. 16 | } 17 | \description{ 18 | Takes two or more data.frames with different column names or different column orders and binds them to a single data.frame. 19 | } 20 | -------------------------------------------------------------------------------- /man/parse_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_export_helpers.R 3 | \name{parse_} 4 | \alias{parse_} 5 | \alias{parse_pubmed} 6 | \alias{parse_ris} 7 | \alias{parse_bibtex} 8 | \alias{parse_csv} 9 | \alias{parse_tsv} 10 | \title{Parse bibliographic text in a variety of formats} 11 | \usage{ 12 | parse_pubmed(x) 13 | 14 | parse_ris(x, tag_naming = "best_guess") 15 | 16 | parse_bibtex(x) 17 | 18 | parse_csv(x) 19 | 20 | parse_tsv(x) 21 | } 22 | \arguments{ 23 | \item{x}{A character vector containing bibliographic information in ris format.} 24 | 25 | \item{tag_naming}{What format are ris tags in? Defaults to "best_guess" See \code{\link{synthesisr_read_refs}} for a list of accepted arguments.} 26 | } 27 | \value{ 28 | Returns an object of class \code{bibliography} (ris, bib, or pubmed formats) or \code{data.frame} (csv or tsv). 29 | } 30 | \description{ 31 | Text in standard formats - such as imported via \code{\link{readLines}} - can be parsed using a variety of standard formats. Use \code{\link{detect_parser}} to determine which is the most appropriate parser for your situation. 32 | } 33 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CiteSource.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \arguments{ 10 | \item{lhs}{A value or the magrittr placeholder.} 11 | 12 | \item{rhs}{A function call using the magrittr semantics.} 13 | } 14 | \value{ 15 | The result of calling \code{rhs(lhs)}. 16 | } 17 | \description{ 18 | Pipe operator 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/plot_contributions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_contributions} 4 | \alias{plot_contributions} 5 | \title{Create a bar chart that compares source contributions over stages} 6 | \usage{ 7 | plot_contributions( 8 | data, 9 | facets = cite_source, 10 | bars = cite_label, 11 | color = type, 12 | center = FALSE, 13 | bar_order = "keep", 14 | facet_order = "keep", 15 | color_order = "keep", 16 | totals_in_legend = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{data}{A tibble with one hit per row, with variables indicating meta-data of interest.} 21 | 22 | \item{facets}{Variable in data used for facets (i.e. sub-plots). Defaults to source (i.e. cite_source). Specify NULL to refrain from faceting.} 23 | 24 | \item{bars}{Variable in data used for bars. Defaults to label (i.e. cite_label)} 25 | 26 | \item{color}{Color used to fill bars. Default to \code{unique}} 27 | 28 | \item{center}{Logical. Should one color be above and one below the axis?} 29 | 30 | \item{bar_order}{Character. Order of bars within each facet, any levels not specified will follow at the end. If "keep", then this is based on factor levels (or the first value) in the input data.} 31 | 32 | \item{facet_order}{Character. Order of facets. Any levels not specified will follow at the end.} 33 | 34 | \item{color_order}{Character. Order of values on the color scale.} 35 | 36 | \item{totals_in_legend}{Logical. Should totals be shown in legend (e.g. as Unique (N = 1234))} 37 | } 38 | \description{ 39 | Create a faceted plot that shows unique contributions and duplicated records across 40 | two metadata dimensions. Most typical use-case might be to show the contributions of each source 41 | across different screening stages. 42 | } 43 | \examples{ 44 | data <- data.frame( 45 | article_id = 1:100, 46 | cite_source = sample(c("DB 1", "DB 2", "DB 3"), 100, replace = TRUE), 47 | cite_label = sample(c("2020", "2021", "2022"), 100, replace = TRUE), 48 | type = c("unique", "duplicated")[rbinom(100, 1, .7) + 1] 49 | ) 50 | 51 | plot_contributions(data, 52 | center = TRUE, bar_order = c("2022", "2021", "2020"), 53 | color_order = c("unique", "duplicated") 54 | ) 55 | 56 | } 57 | -------------------------------------------------------------------------------- /man/plot_source_overlap_heatmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_source_overlap_heatmap} 4 | \alias{plot_source_overlap_heatmap} 5 | \title{Create a heatmap matrix showing the overlap between sources} 6 | \usage{ 7 | plot_source_overlap_heatmap( 8 | data, 9 | cells = "source", 10 | facets = NULL, 11 | plot_type = c("counts", "percentages"), 12 | sort_sources = TRUE, 13 | interactive = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{A tibble with one record per row, an id column and then one column 18 | per source indicating whether the record was found in that source (usually obtained from \code{compare_sources()})} 19 | 20 | \item{cells}{Variable to display in the cells. Should be 'source', 'label' or 'string'} 21 | 22 | \item{facets}{Variable in data used for facets (i.e. sub-plots). Should be NULL, 'source', 'label' or 'string'} 23 | 24 | \item{plot_type}{Either \code{counts} (number of shared records) or \code{percentages} 25 | (share of overlapping records).} 26 | 27 | \item{sort_sources}{Should sources be shown based on the number of records they 28 | contained? If FALSE, order of data is retained.} 29 | 30 | \item{interactive}{Should returned plot be interactive and enable user to export 31 | records underlying each field?} 32 | } 33 | \value{ 34 | The requested plot as a either a \code{ggplot2} object (when interactive = FALSE), which can then be 35 | further formatted or saved using \code{\link[ggplot2:ggsave]{ggplot2::ggsave()}}, or a \code{plotly} object when \code{interactive = TRUE} 36 | } 37 | \description{ 38 | Show overlap between different record sources, either by showing the 39 | number or the percentages of shared records between any pair of sources. 40 | } 41 | \examples{ 42 | data <- data.frame( 43 | article_id = 1:500, 44 | source__source1 = rbinom(500, 1, .5) == 1, 45 | source__source2 = rbinom(500, 1, .2) == 1, 46 | source__source3 = rbinom(500, 1, .1) == 1, 47 | source__source4 = rbinom(500, 1, .6) == 1, 48 | source__source5 = rbinom(500, 1, .7) == 1 49 | ) 50 | 51 | plot_source_overlap_heatmap(data) 52 | plot_source_overlap_heatmap(data, plot_type = "percentages") 53 | 54 | } 55 | -------------------------------------------------------------------------------- /man/plot_source_overlap_upset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_source_overlap_upset} 4 | \alias{plot_source_overlap_upset} 5 | \title{Create an UpSetR upset plot showing the overlap between sources} 6 | \usage{ 7 | plot_source_overlap_upset( 8 | data, 9 | groups = "source", 10 | nsets = NULL, 11 | sets.x.label = "Number of records", 12 | mainbar.y.label = "Overlapping record count", 13 | order.by = c("freq", "degree"), 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{data}{A tibble with one record per row, an id column and then one column 19 | per source indicating whether the record was found in that source.} 20 | 21 | \item{groups}{Variable to use as groups. Should be 'source', 'label' or 'string' - defaults to source.} 22 | 23 | \item{nsets}{Number of sets to look at} 24 | 25 | \item{sets.x.label}{The x-axis label of the set size bar plot} 26 | 27 | \item{mainbar.y.label}{The y-axis label of the intersection size bar plot} 28 | 29 | \item{order.by}{How the intersections in the matrix should be ordered by. Options include frequency (entered as "freq"), degree, or both in any order.} 30 | 31 | \item{...}{ 32 | Arguments passed on to \code{\link[UpSetR:upset]{UpSetR::upset}} 33 | \describe{ 34 | \item{\code{nintersects}}{Number of intersections to plot. If set to NA, all intersections will be plotted.} 35 | \item{\code{sets}}{Specific sets to look at (Include as combinations. Ex: c("Name1", "Name2"))} 36 | \item{\code{keep.order}}{Keep sets in the order entered using the sets parameter. The default is FALSE, which orders the sets by their sizes.} 37 | \item{\code{set.metadata}}{Metadata that offers insight to an attribute of the sets. Input should be a data frame where the first column is set names, and the 38 | remaining columns are attributes of those sets. To learn how to use this parameter it is highly suggested to view the set metadata vignette. The link 39 | can be found on the package's GitHub page.} 40 | \item{\code{intersections}}{Specific intersections to include in plot entered as a list of lists. 41 | Ex: list(list("Set name1", "Set name2"), list("Set name1", "Set name3")). If data is entered into this parameter the only data shown on the UpSet plot 42 | will be the specific intersections listed.} 43 | \item{\code{matrix.color}}{Color of the intersection points} 44 | \item{\code{main.bar.color}}{Color of the main bar plot} 45 | \item{\code{mainbar.y.max}}{The maximum y value of the intersection size bar plot scale. May be useful when aligning multiple UpSet plots horizontally.} 46 | \item{\code{sets.bar.color}}{Color of set size bar plot} 47 | \item{\code{point.size}}{Size of points in matrix plot} 48 | \item{\code{line.size}}{Width of lines in matrix plot} 49 | \item{\code{mb.ratio}}{Ratio between matrix plot and main bar plot (Keep in terms of hundredths)} 50 | \item{\code{expression}}{Expression to subset attributes of intersection or element query data. Enter as string (Ex: "ColName > 3")} 51 | \item{\code{att.pos}}{Position of attribute plot. If NULL or "bottom" the plot will be at below UpSet plot. If "top" it will be above UpSet plot} 52 | \item{\code{att.color}}{Color of attribute histogram bins or scatterplot points for unqueried data represented by main bars. Default set to color of main bars.} 53 | \item{\code{decreasing}}{How the variables in order.by should be ordered. "freq" is decreasing (greatest to least) and "degree" is increasing (least to greatest)} 54 | \item{\code{show.numbers}}{Show numbers of intersection sizes above bars} 55 | \item{\code{number.angles}}{The angle of the numbers atop the intersection size bars} 56 | \item{\code{group.by}}{How the data should be grouped ("degree" or "sets")} 57 | \item{\code{cutoff}}{The number of intersections from each set (to cut off at) when aggregating by sets} 58 | \item{\code{queries}}{Unified query of intersections, elements, and custom row functions. Entered as a list that contains a list of 59 | queries. query is the type of query being conducted. params are the parameters of the query (if any). color is the color of the points on the 60 | plot that will represent the query. If no color is selected one will be provided automatically. active takes TRUE or FALSE, and if 61 | TRUE, it will overlay the bars present with the results from the query. If FALSE a tick mark will indicate the intersection size. 62 | See examples section on how to do this.} 63 | \item{\code{query.legend}}{Position query legend on top or bottom of UpSet plot} 64 | \item{\code{shade.color}}{Color of row shading in matrix} 65 | \item{\code{shade.alpha}}{Transparency of shading in matrix} 66 | \item{\code{matrix.dot.alpha}}{Transparency of the empty intersections points in the matrix} 67 | \item{\code{empty.intersections}}{Additionally display empty sets up to nintersects} 68 | \item{\code{color.pal}}{Color palette for attribute plots} 69 | \item{\code{boxplot.summary}}{Boxplots representing the distribution of a selected attribute for each intersection. Select attributes by entering a character vector of attribute names (e.g. c("Name1", "Name2")). 70 | The maximum number of attributes that can be entered is 2.} 71 | \item{\code{attribute.plots}}{Create custom ggplot using intersection data represented in the main bar plot. Prior to adding custom plots, the UpSet plot is set up in a 100 by 100 grid. 72 | The attribute.plots parameter takes a list that contains the number of rows that should be allocated for the custom plot, and a list of plots with specified positions. 73 | nrows is the number of rows the custom plots should take up. There is already 100 allocated for the custom plot. plots takes a list that contains a function that returns 74 | a custom ggplot and the x and y aesthetics for the function. ncols is the number of columns that your ggplots should take up. See examples for how to add custom ggplots.} 75 | \item{\code{scale.intersections}}{The scale to be used for the intersection sizes. Options: "identity", "log10", "log2"} 76 | \item{\code{scale.sets}}{The scale to be used for the set sizes. Options: "identity", "log10", "log2"} 77 | \item{\code{text.scale}}{Numeric, value to scale the text sizes, applies to all axis labels, tick labels, and numbers above bar plot. Can be a universal scale, or a vector containing individual scales 78 | in the following format: c(intersection size title, intersection size tick labels, set size title, set size tick labels, set names, numbers above bars)} 79 | \item{\code{set_size.angles}}{Numeric, angle to rotate the set size plot x-axis text} 80 | \item{\code{set_size.show}}{Logical, display the set sizes on the set size bar chart} 81 | \item{\code{set_size.numbers_size}}{If set_size.show is TRUE, adjust the size of the numbers} 82 | \item{\code{set_size.scale_max}}{Increase the maximum of set size scale} 83 | }} 84 | } 85 | \description{ 86 | Show records found in specific sets of sources to identify the unique contribution 87 | of each source and of any subsets 88 | } 89 | \examples{ 90 | data <- data.frame( 91 | article_id = 1:500, 92 | source__source1 = rbinom(500, 1, .5) == 1, 93 | source__source2 = rbinom(500, 1, .2) == 1, 94 | source__source3 = rbinom(500, 1, .1) == 1, 95 | source__source4 = rbinom(500, 1, .6) == 1, 96 | source__source5 = rbinom(500, 1, .7) == 1 97 | ) 98 | 99 | plot_source_overlap_upset(data) 100 | 101 | # To start with the records shared among the greatest number of sources, use 102 | 103 | plot_source_overlap_upset(data, decreasing = c(TRUE, TRUE)) 104 | 105 | } 106 | \references{ 107 | Conway, J. R., Lex, A., & Gehlenborg, N. (2017). UpSetR: an R package for the visualization of intersecting sets and their properties. Bioinformatics. 108 | } 109 | -------------------------------------------------------------------------------- /man/precision_sensitivity_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tables.R 3 | \name{precision_sensitivity_table} 4 | \alias{precision_sensitivity_table} 5 | \title{precision_sensitivity_table} 6 | \usage{ 7 | precision_sensitivity_table(data) 8 | } 9 | \arguments{ 10 | \item{data}{A data.frame. The dataset to build the table from. 11 | It should contain the columns 'screened', 'final', 'Precision', 'Recall'.} 12 | } 13 | \value{ 14 | A gt object representing the table. 15 | } 16 | \description{ 17 | This function creates a gt table from a given data, and 18 | removes the 'screened' column and its associated footnotes if all its values are zero. 19 | } 20 | -------------------------------------------------------------------------------- /man/read_citations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import.R 3 | \name{read_citations} 4 | \alias{read_citations} 5 | \title{Import citations from file} 6 | \usage{ 7 | read_citations( 8 | files = NULL, 9 | cite_sources = NULL, 10 | cite_strings = NULL, 11 | cite_labels = NULL, 12 | metadata = NULL, 13 | verbose = TRUE, 14 | tag_naming = "best_guess", 15 | only_key_fields = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{files}{One or multiple RIS or Bibtex files with citations. 20 | Should be .bib or .ris files} 21 | 22 | \item{cite_sources}{The origin of the citation files (e.g. "Scopus", "WOS", "Medline") - vector with one value per file, defaults to file names.} 23 | 24 | \item{cite_strings}{Optional. The search string used (or another grouping to analyse) - vector with one value per file} 25 | 26 | \item{cite_labels}{Optional. An additional label per file, for instance the stage of search - vector with one value per file} 27 | 28 | \item{metadata}{A tibble with file names and metadata for each file. Can be specified as an \emph{alternative} to files, cite_sources, cite_strings and cite_labels.} 29 | 30 | \item{verbose}{Should number of reference and allocation of labels be reported?} 31 | 32 | \item{tag_naming}{Either a length-1 character stating how should ris tags be replaced (see details for a list of options), or an object inheriting from class \code{data.frame} containing user-defined replacement tags.} 33 | 34 | \item{only_key_fields}{Should only key fields (e.g., those used by CiteCourse) be imported? If FALSE, all RIS data is retained. Can also be a character vector of field names to retain (after they have been renamed by the import function) in addition to the essential ones.} 35 | } 36 | \value{ 37 | A tibble with one row per citation 38 | } 39 | \description{ 40 | This function imports RIS and Bibtex files with citations and merges them 41 | into one long tibble with one record per line. 42 | } 43 | \examples{ 44 | if (interactive()) { 45 | # Import only key fields from the RIS files 46 | read_citations(c("res.ris", "res.bib"), 47 | cite_sources = c("CINAHL", "MEDLINE"), 48 | cite_strings = c("Search1", "Search2"), 49 | cite_labels = c("raw", "screened"), 50 | only_key_fields = TRUE 51 | ) 52 | 53 | # or equivalently 54 | metadata_tbl_key_fields <- tibble::tribble( 55 | ~files, ~cite_sources, ~cite_strings, ~cite_labels, ~only_key_fields, 56 | "res.ris", "CINAHL", "Search1", "raw", TRUE, 57 | "res.bib", "MEDLINE", "Search2", "screened", TRUE 58 | ) 59 | 60 | read_citations(metadata = metadata_tbl_key_fields) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /man/record_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count.R 3 | \name{record_counts} 4 | \alias{record_counts} 5 | \title{Record counts function 6 | Calculate and combine counts of distinct records and imported records for each database} 7 | \usage{ 8 | record_counts(unique_citations, citations, db_colname) 9 | } 10 | \arguments{ 11 | \item{unique_citations}{Dataframe. The dataframe for calculating distinct records count.} 12 | 13 | \item{citations}{Dataframe. The dataframe for calculating records imported count.} 14 | 15 | \item{db_colname}{Character. The name of the column containing the database source information.} 16 | } 17 | \value{ 18 | A dataframe with counts of distinct records and imported records for each source, including total counts. 19 | } 20 | \description{ 21 | This function calculates the counts of distinct records and records imported for each database source. 22 | It combines these counts into one dataframe and calculates the total for each count type. 23 | } 24 | \examples{ 25 | # Create synthetic data for example 26 | unique_citations <- data.frame( 27 | title = paste("Article", 1:10), 28 | db_source = sample(c("Database 1", "Database 2", "Database 3"), 10, replace = TRUE), 29 | stringsAsFactors = FALSE 30 | ) 31 | 32 | citations <- data.frame( 33 | title = paste("Article", 1:20), 34 | db_source = sample(c("Database 1", "Database 2", "Database 3"), 20, replace = TRUE), 35 | stringsAsFactors = FALSE 36 | ) 37 | 38 | # Use the synthetic data with the function 39 | result <- record_counts(unique_citations, citations, "db_source") 40 | result 41 | } 42 | -------------------------------------------------------------------------------- /man/record_counts_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tables.R 3 | \name{record_counts_table} 4 | \alias{record_counts_table} 5 | \title{record_counts_table} 6 | \usage{ 7 | record_counts_table(data) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame that must contain the columns "Source", "Records Imported", 11 | and "Distinct Records". The "Source" column is used as the row names of the table.} 12 | } 13 | \value{ 14 | A gt object representing the table. 15 | } 16 | \description{ 17 | This function creates a table with footnotes for columns in the table. 18 | It uses the gt package to create the table and adds footnotes to 19 | the "Records Imported" and "Distinct Records" columns. 20 | } 21 | -------------------------------------------------------------------------------- /man/record_level_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tables.R 3 | \name{record_level_table} 4 | \alias{record_level_table} 5 | \title{Record-level table} 6 | \usage{ 7 | record_level_table( 8 | citations, 9 | include = "sources", 10 | include_empty = TRUE, 11 | return = c("tibble", "DT"), 12 | indicator_presence = NULL, 13 | indicator_absence = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{citations}{A deduplicated tibble as returned by \code{dedup_citations()}.} 18 | 19 | \item{include}{Which metadata should be included in the table? Defaults to 'sources', can be replaced or expanded with 'labels' and/or 'strings'} 20 | 21 | \item{include_empty}{Should records with empty metadata (e.g., no information on 'sources') be included in the table? Defaults to FALSE.} 22 | 23 | \item{return}{Either a \code{tibble} that can be exported, e.g. as a csv, or a DataTable (\code{DT}) that allows for interactive exploration. Note that the DataTable allows 24 | users to download a .csv file; in that file, presence and absence is always indicated as TRUE and FALSE to prevent issues with character encodings.} 25 | 26 | \item{indicator_presence}{How should it be indicated that a value is present in a source/label/string? Defaults to TRUE in tibbles and a tickmark in DT tables} 27 | 28 | \item{indicator_absence}{How should it be indicated that a value is \emph{not} present in a source/label/string? Defaults to FALSE in tibbles and a cross in DT tables} 29 | } 30 | \value{ 31 | A tibble or DataTable containing the per-record table that shows which sources (and/or labels/strings) each item was found in. 32 | } 33 | \description{ 34 | Creates a per-record table that shows which sources (and/or labels/strings) each item was found in. 35 | } 36 | \examples{ 37 | # Load example data from the package 38 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource") 39 | examplecitations <- readRDS(examplecitations_path) 40 | 41 | # Deduplicate citations and compare sources 42 | unique_citations <- dedup_citations(examplecitations) 43 | 44 | unique_citations |> 45 | dplyr::filter(stringr::str_detect(cite_label, "final")) |> 46 | record_level_table(return = "DT") 47 | } 48 | -------------------------------------------------------------------------------- /man/record_summary_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tables.R 3 | \name{record_summary_table} 4 | \alias{record_summary_table} 5 | \title{search_summary_table} 6 | \usage{ 7 | record_summary_table(data) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame that must contain the columns "Source", "Records Imported", 11 | "Distinct Records", "Unique records", "Non-unique Records", "Source Contribution \%", 12 | "Source Unique Contribution \%", and "Source Unique \%". The "Source" column is used as the row names of the table.} 13 | } 14 | \value{ 15 | A gt object representing the table. 16 | } 17 | \description{ 18 | This function creates a table with footnotes for columns in the table. 19 | It uses the gt package to create the table and adds footnotes to various columns. 20 | } 21 | -------------------------------------------------------------------------------- /man/reimport_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reimport.R 3 | \name{reimport_csv} 4 | \alias{reimport_csv} 5 | \title{Reimport a CSV-file exported from CiteSource} 6 | \usage{ 7 | reimport_csv(filename) 8 | } 9 | \arguments{ 10 | \item{filename}{Name (and path) of CSV file to be reimported, should end in .csv} 11 | } 12 | \value{ 13 | A data frame containing the imported citation data if all required columns are present. 14 | } 15 | \description{ 16 | This function reimports a csv file that was tagged and deduplicated by CiteSource. 17 | It allows to continue with further analyses without repeating that step, and also 18 | allows users to make any manual corrections to tagging or deduplication. Note that 19 | this function only works on CSV files that were written with \code{export_csv(..., separate = NULL)} 20 | } 21 | \examples{ 22 | \dontrun{ 23 | #example usage 24 | citations <- reimport_csv("path/to/citations.csv") 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /man/reimport_ris.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reimport.R 3 | \name{reimport_ris} 4 | \alias{reimport_ris} 5 | \title{Reimport a RIS-file exported from CiteSource} 6 | \usage{ 7 | reimport_ris( 8 | filename = "citations.ris", 9 | source_field = "DB", 10 | label_field = "C7", 11 | string_field = "C8", 12 | duplicate_id_field = "C1", 13 | record_id_field = "C2", 14 | tag_naming = "ris_synthesisr", 15 | verbose = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{filename}{Name (and path) of RIS file to be reimported, should end in .ris} 20 | 21 | \item{source_field}{Character. Which RIS field should cite_sources be read from? NULL to set to missing} 22 | 23 | \item{label_field}{Character. Which RIS field should cite_labels be read from? NULL to set to missing} 24 | 25 | \item{string_field}{Character. Which RIS field should cite_strings be read from? NULL to set to missing} 26 | 27 | \item{duplicate_id_field}{Character. Which RIS field should duplicate IDs be read from? NULL to recreate based on row number (note that neither duplicate nor record IDs directly affect CiteSource analyses - they can only allow you to connect processed data with raw data)} 28 | 29 | \item{record_id_field}{Character. Which RIS field should record IDs be read from? NULL to recreate based on row number} 30 | 31 | \item{tag_naming}{Synthesisr option specifying how RIS tags should be replaced with names. This should not 32 | be changed when using this function to reimport a file exported from CiteSource. If you import your own 33 | RIS, check \code{names(CiteSource:::synthesisr_code_lookup)} and select any of the options that start with \code{ris_}} 34 | 35 | \item{verbose}{Should confirmation message be displayed?} 36 | } 37 | \description{ 38 | This function reimports a RIS file that was tagged and deduplicated by CiteSource. 39 | It allows to continue with further analyses without repeating that step, and also 40 | allows users to make any manual corrections to tagging or deduplication. The function 41 | can also be used to replace the import step (for instance if tags are to be added to 42 | individual citations rather than entire files) - in this case, just call \code{dedup_citations()} 43 | after the import. 44 | } 45 | \details{ 46 | Note that this functions defaults' are based on those in \code{export_ris()} so that these functions 47 | can easily be combined. 48 | } 49 | \examples{ 50 | if (interactive()) { 51 | dedup_results <- dedup_citations(citations, merge_citations = TRUE) 52 | export_ris(dedup_results$unique, "citations.ris") 53 | unique_citations2 <- reimport_ris("citations.ris") 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /man/runShiny.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runShiny.R 3 | \name{runShiny} 4 | \alias{runShiny} 5 | \alias{run_shiny} 6 | \title{A wrapper function to run Shiny Apps from \code{CiteSource}.} 7 | \usage{ 8 | runShiny(app = "CiteSource", offer_install = interactive()) 9 | } 10 | \arguments{ 11 | \item{app}{Defaults to CiteSource - possibly other apps will be included in the future} 12 | 13 | \item{offer_install}{Should user be prompted to install required packages if they are missing?} 14 | } 15 | \value{ 16 | CiteSource shiny app 17 | } 18 | \description{ 19 | Running this function will launch the CiteSource shiny app 20 | } 21 | \examples{ 22 | if (interactive()) { 23 | # To run the CiteSource Shiny app: 24 | runShiny() 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /man/synthesisr_read_refs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_export_helpers.R 3 | \name{synthesisr_read_refs} 4 | \alias{synthesisr_read_refs} 5 | \alias{read_ref} 6 | \title{Import bibliographic search results} 7 | \usage{ 8 | synthesisr_read_refs( 9 | filename, 10 | tag_naming = "best_guess", 11 | return_df = TRUE, 12 | verbose = FALSE, 13 | select_fields = NULL 14 | ) 15 | 16 | read_ref( 17 | filename, 18 | tag_naming = "best_guess", 19 | return_df = TRUE, 20 | verbose = FALSE, 21 | select_fields = NULL 22 | ) 23 | } 24 | \arguments{ 25 | \item{filename}{A path to a filename or vector of filenames containing search results to import.} 26 | 27 | \item{tag_naming}{Either a length-1 character stating how should ris tags be replaced (see details for a list of options), or an object inheriting from class \code{data.frame} containing user-defined replacement tags.} 28 | 29 | \item{return_df}{If TRUE (default), returns a data.frame; if FALSE, returns a list.} 30 | 31 | \item{verbose}{If TRUE, prints status updates (defaults to FALSE).} 32 | 33 | \item{select_fields}{Character vector of fields to be retained. If NULL, all fields from the RIS file are returned} 34 | } 35 | \value{ 36 | Returns a data.frame or list of assembled search results. 37 | } 38 | \description{ 39 | Imports common bibliographic reference formats (i.e. .bib, .ris, or .txt). 40 | } 41 | \details{ 42 | The default for argument \code{tag_naming} is \code{"best_guess"}, which estimates what database has been used for ris tag replacement, then fills any gaps with generic tags. Any tags missing from the database (i.e. \code{code_lookup}) are passed unchanged. Other options are to use tags from Web of Science (\code{"wos"}), Scopus (\code{"scopus"}), Ovid (\code{"ovid"}) or Academic Search Premier (\code{"asp"}). If a \code{data.frame} is given, then it must contain two columns: \code{"code"} listing the original tags in the source document, and \code{"field"} listing the replacement column/tag names. The \code{data.frame} may optionally include a third column named \code{"order"}, which specifies the order of columns in the resulting \code{data.frame}; otherwise this will be taken as the row order. Finally, passing \code{"none"} to \code{replace_tags} suppresses tag replacement. 43 | } 44 | \section{Functions}{ 45 | \itemize{ 46 | \item \code{read_ref()}: Import a single file 47 | 48 | }} 49 | -------------------------------------------------------------------------------- /man/write_refs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_export_helpers.R 3 | \name{write_bib} 4 | \alias{write_bib} 5 | \alias{write_ris} 6 | \alias{write_refs} 7 | \title{Export data to a bibliographic format} 8 | \usage{ 9 | write_bib(x) 10 | 11 | write_ris(x, tag_naming = "synthesisr") 12 | 13 | write_refs(x, format = "ris", tag_naming = "synthesisr", file = FALSE) 14 | } 15 | \arguments{ 16 | \item{x}{Either a data.frame containing bibliographic information or an object of class bibliography.} 17 | 18 | \item{tag_naming}{what naming convention should be used to write RIS files? See details for options.} 19 | 20 | \item{format}{What format should the data be exported as? Options are ris or bib.} 21 | 22 | \item{file}{Either logical indicating whether a file should be written (defaulting to FALSE), or a character giving the name of the file to be written.} 23 | } 24 | \value{ 25 | Returns a character vector containing bibliographic information in the specified format if \code{file} is FALSE, or saves output to a file if TRUE. 26 | } 27 | \description{ 28 | This function exports data.frames containing bibliographic information to either a .ris or .bib file. 29 | } 30 | \section{Functions}{ 31 | \itemize{ 32 | \item \code{write_bib()}: Format a bib file for export 33 | 34 | \item \code{write_ris()}: Format a ris file for export 35 | 36 | }} 37 | -------------------------------------------------------------------------------- /renv/.gitignore: -------------------------------------------------------------------------------- 1 | library/ 2 | local/ 3 | cellar/ 4 | lock/ 5 | python/ 6 | sandbox/ 7 | staging/ 8 | -------------------------------------------------------------------------------- /renv/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "bioconductor.version": null, 3 | "external.libraries": [], 4 | "ignored.packages": [], 5 | "package.dependency.fields": [ 6 | "Imports", 7 | "Depends", 8 | "LinkingTo" 9 | ], 10 | "ppm.enabled": null, 11 | "ppm.ignored.urls": [], 12 | "r.version": null, 13 | "snapshot.type": "implicit", 14 | "use.cache": true, 15 | "vcs.ignore.cellar": true, 16 | "vcs.ignore.library": true, 17 | "vcs.ignore.local": true, 18 | "vcs.manage.ignores": true 19 | } 20 | -------------------------------------------------------------------------------- /tests/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/tests/.DS_Store -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(CiteSource) 3 | 4 | test_check("CiteSource") 5 | -------------------------------------------------------------------------------- /tests/testthat/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/tests/testthat/.DS_Store -------------------------------------------------------------------------------- /tests/testthat/test-import.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("ris import works", { 3 | x <- read_citations(testthat::test_path("data", "1_WoS.ris")) 4 | expect_equal(nrow(x), 219) 5 | expect_equal(x$cite_source[1], "1_WoS") 6 | }) 7 | 8 | litsearchr <- c( 9 | "@article{grames2019, 10 | title={An automated approach to identifying search terms for 11 | systematic reviews using keyword co-occurrence networks}, 12 | author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S}, 13 | journal={Methods in Ecology and Evolution}, 14 | volume={10}, 15 | number={10}, 16 | pages={1645--1654}, 17 | year={2019}, 18 | publisher={Wiley Online Library} 19 | }" 20 | ) 21 | 22 | tmp <- tempfile() 23 | 24 | writeLines(litsearchr, tmp) 25 | 26 | 27 | test_that("bib import works", { 28 | x <- read_citations(tmp, cite_sources = "A", cite_strings = "B", cite_labels = "C") 29 | expect_equal(nrow(x), 1) 30 | expect_equal(x$cite_label, "C") 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test-tables.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(CiteSource) 3 | 4 | authors <- c('Mill, John Stuart and Shelley, Mary and Lovelave, Eda and Hemingway, Ernest and Garcia Marquez, Gabriel', 5 | 'Miller, Arthur and Snow, John', 6 | 'Woolf, Virginia', 7 | 'Miller, Arthur and Snow, John', 8 | 'Mill, John Stuart and Shelley, Mary and Eliot, TS', 9 | 'Woolf, Walter', 10 | 'Mill, Arthur and Shelley, Mary and Eliot, TS', 11 | 'Mill, Arthur and Shelley, Mary and Eliot, TS') 12 | 13 | years <- c(rep(1900, 7), 1901) 14 | 15 | test_that("disambiguated citations work", { 16 | expect_equal(generate_apa_citation(authors, years), 17 | c("J. S. Mill, Shelley, Lovelave et al. (1900)", 18 | "Miller & Snow (1900a)", "V. Woolf (1900)", 19 | "Miller & Snow (1900b)", "J. S. Mill, Shelley & Eliot (1900)", 20 | "W. Woolf (1900)", "A. Mill et al. (1900)", "A. Mill et al. (1901)") 21 | 22 | ) 23 | }) 24 | 25 | 26 | 27 | test_that("missing columns do not fail", { 28 | expect_warning(generate_apa_reference(LETTERS[1:5])) 29 | }) 30 | 31 | test_that("numeric columns do not fail", { 32 | expect_warning(generate_apa_reference(paste(LETTERS[1:5], LETTERS[1:5], sep = ", "), 2000:2004)) 33 | }) 34 | 35 | -------------------------------------------------------------------------------- /vignettes/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/vignettes/.DS_Store -------------------------------------------------------------------------------- /vignettes/benchmark_data/Search2_4.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | AU - Krauss, KW 3 | AU - Osland, MJ 4 | TI - Tropical cyclones and the organization of mangrove forests: a review 5 | T2 - ANNALS OF BOTANY 6 | SN - 0305-7364 7 | SN - 1095-8290 8 | DA - JAN 23 9 | PY - 2020 10 | VL - 125 11 | IS - 2 12 | SP - 213 13 | EP - 234 14 | DO - 10.1093/aob/mcz161 15 | AN - WOS:000540297900002 16 | ER - 17 | 18 | TY - JOUR 19 | AU - Jaap, WC 20 | TI - Coral reef restoration 21 | T2 - ECOLOGICAL ENGINEERING 22 | CP - Symposium on Goal Setting and Success Criteria for Coastal Habitat Restoration 23 | SN - 0925-8574 24 | DA - JUL 25 | PY - 2000 26 | VL - 15 27 | IS - 3-4 28 | SP - 345 29 | EP - 364 30 | DO - 10.1016/S0925-8574(00)00085-9 31 | AN - WOS:000088549600014 32 | ER - 33 | 34 | TY - JOUR 35 | AU - Roth, F 36 | AU - Karcher, DB 37 | AU - Radecker, N 38 | AU - Hohn, S 39 | AU - Carvalho, S 40 | AU - Thomson, T 41 | AU - Saalmann, F 42 | AU - Voolstra, CR 43 | AU - Kurten, B 44 | AU - Struck, U 45 | AU - Jones, BH 46 | AU - Wild, C 47 | TI - High rates of carbon and dinitrogen fixation suggest a critical role of benthic pioneer communities in the energy and nutrient dynamics of coral reefs 48 | T2 - FUNCTIONAL ECOLOGY 49 | SN - 0269-8463 50 | SN - 1365-2435 51 | DA - SEP 52 | PY - 2020 53 | VL - 34 54 | IS - 9 55 | SP - 1991 56 | EP - 2004 57 | DO - 10.1111/1365-2435.13625 58 | C6 - JUL 2020 59 | AN - WOS:000551223100001 60 | ER - 61 | 62 | TY - CPAPER 63 | AU - Sivadas, TK 64 | A1 - IEEE 65 | TI - Development of Sensors & Measurement Techniques and their Implementation for Oceanographic Observations 66 | T2 - 2015 IEEE UNDERWATER TECHNOLOGY (UT) 67 | CP - 2015 IEEE Underwater Technology (UT) 68 | SN - 2573-3788 69 | SN - 2573-3796 70 | SN - 978-1-4799-8301-8 71 | PY - 2015 72 | AN - WOS:000380439000024 73 | ER - 74 | 75 | TY - CPAPER 76 | AU - de Santoli, L 77 | AU - Garcia, DA 78 | AU - Violante, AC 79 | ED - Mander, U 80 | ED - Brebbia, CA 81 | ED - MarinDuque, JF 82 | TI - Planning of flood defence management and rehabilitation of the natural habitat in the downstream part of the river Tiber 83 | T2 - GEO-ENVIRONMENT AND LANDSCAPE EVOLUTION III 84 | CP - 3rd International Conference on Evolution, Monitoring, Simulation, Management and Remediation of the Geological Environment and Landscape 85 | SN - 1746-4498 86 | SN - 978-1-84564-117-7 87 | PY - 2008 88 | VL - 100 89 | SP - 25 90 | EP - 34 91 | AN - WOS:000258182300003 92 | ER - 93 | 94 | TY - BOOK 95 | AU - Thrush, SF 96 | AU - Townsend, M 97 | AU - Hewitt, JE 98 | AU - Davies, K 99 | AU - Lohrer, AM 100 | AU - Lundquist, C 101 | AU - Cartner, K 102 | ED - Dymond, JR 103 | TI - THE MANY USES AND VALUES OF ESTUARINE ECOSYSTEMS 104 | T2 - ECOSYSTEM SERVICES IN NEW ZEALAND: CONDITIONS AND TRENDS 105 | SN - 978-0-478-34736-4 106 | PY - 2013 107 | SP - 226 108 | EP - 237 109 | AN - WOS:000331018800016 110 | ER - 111 | 112 | TY - JOUR 113 | AU - Maloney, JM 114 | AU - Bentley, SJ 115 | AU - Xe, KH 116 | AU - Obelcz, J 117 | AU - Georgiou, IY 118 | AU - Miner, MD 119 | TI - Mississippi River subaqueous delta is entering a stage of retrogradation 120 | T2 - MARINE GEOLOGY 121 | SN - 0025-3227 122 | SN - 1872-6151 123 | DA - JUN 1 124 | PY - 2018 125 | VL - 400 126 | SP - 12 127 | EP - 23 128 | DO - 10.1016/j.margeo.2018.03.001 129 | AN - WOS:000432234400002 130 | ER - 131 | 132 | TY - JOUR 133 | AU - Pleskachevsky, AL 134 | AU - Lehner, S 135 | AU - Rosenthal, W 136 | TI - Storm observations by remote sensing and influences of gustiness on ocean waves and on generation of rogue waves 137 | T2 - OCEAN DYNAMICS 138 | SN - 1616-7341 139 | DA - SEP 140 | PY - 2012 141 | VL - 62 142 | IS - 9 143 | SP - 1335 144 | EP - 1351 145 | DO - 10.1007/s10236-012-0567-z 146 | AN - WOS:000308345600005 147 | ER - 148 | 149 | TY - CPAPER 150 | AU - Sivadas, TK 151 | A1 - IEEE 152 | TI - Integrated Approach for Ocean Observation Systems with Development, Implementation, Training and Education 153 | T2 - OCEANS 2015 - GENOVA 154 | CP - Oceans 2015 Genova 155 | SN - 978-1-4799-8737-5 156 | PY - 2015 157 | DO - 10.1109/OCEANS-Genova.2015.7271647 158 | AN - WOS:000380485500328 159 | ER - 160 | 161 | TY - CPAPER 162 | AU - Poteras, G 163 | AU - Deak, G 164 | AU - Baraitaru, AG 165 | AU - Olteanu, MV 166 | AU - Raischi, NS 167 | AU - Halin, DSC 168 | ED - Noor, NM 169 | ED - Rahim, NL 170 | ED - Ting, SS 171 | ED - Zakarya, IA 172 | ED - Yusof, SY 173 | ED - Izhar, TNT 174 | ED - Amat, RC 175 | ED - Ibrahim, NM 176 | TI - Bioengineering technologies used for the development and equipment of complex installations to obtain energy from three renewable sources. Complex installations for coastal areas 177 | T2 - 2ND INTERNATIONAL CONFERENCE ON GREEN ENVIRONMENTAL ENGINEERING AND TECHNOLOGY 178 | CP - 2nd International Conference on Green Environmental Engineering and Technology (IConGEET) 179 | SN - 1755-1307 180 | PY - 2020 181 | VL - 616 182 | C7 - 012028 183 | DO - 10.1088/1755-1315/616/1/012028 184 | AN - WOS:000661130800028 185 | ER - 186 | 187 | TY - CHAP 188 | AU - Riosmena-Rodriguez, R 189 | ED - RiosmenaRodriguez, R 190 | ED - Nelson, W 191 | ED - Aguirre, J 192 | TI - Natural History of Rhodolith/Maerl Beds: Their Role in Near-Shore Biodiversity and Management 193 | T2 - RHODOLITH/MAERL BEDS: A GLOBAL PERSPECTIVE 194 | SN - 2211-0577 195 | SN - 2211-0585 196 | SN - 978-3-319-29315-8 197 | SN - 978-3-319-29313-4 198 | PY - 2017 199 | VL - 15 200 | SP - 3 201 | EP - 26 202 | DO - 10.1007/978-3-319-29315-8_1 203 | DO - 10.1007/978-3-319-29315-8 204 | AN - WOS:000430072100001 205 | ER - 206 | 207 | TY - JOUR 208 | AU - Herran, N 209 | AU - Narayan, GR 210 | AU - Reymond, CE 211 | AU - Westphal, H 212 | TI - Calcium Carbonate Production, Coral Cover and Diversity along a Distance Gradient from Stone Town: A Case Study from Zanzibar, Tanzania 213 | T2 - FRONTIERS IN MARINE SCIENCE 214 | SN - 2296-7745 215 | PY - 2017 216 | VL - 4 217 | C7 - 412 218 | DO - 10.3389/fmars.2017.00412 219 | AN - WOS:000457690600408 220 | ER - 221 | 222 | TY - JOUR 223 | AU - Asokan, R 224 | AU - Swamy, HMM 225 | AU - Thimmegowda, GG 226 | AU - Mahmood, R 227 | TI - Diversity analysis and characterization of Coleoptera-, Hemiptera- and Nematode-active cry genes in native isolates of Bacillus thuringiensis 228 | T2 - ANNALS OF MICROBIOLOGY 229 | SN - 1590-4261 230 | SN - 1869-2044 231 | DA - MAR 232 | PY - 2014 233 | VL - 64 234 | IS - 1 235 | SP - 85 236 | EP - 98 237 | DO - 10.1007/s13213-013-0636-7 238 | AN - WOS:000331648300010 239 | ER - 240 | 241 | TY - JOUR 242 | AU - Divya, K 243 | AU - Jisha, MS 244 | TI - Chitosan nanoparticles preparation and applications 245 | T2 - ENVIRONMENTAL CHEMISTRY LETTERS 246 | SN - 1610-3653 247 | SN - 1610-3661 248 | DA - MAR 249 | PY - 2018 250 | VL - 16 251 | IS - 1 252 | SP - 101 253 | EP - 112 254 | DO - 10.1007/s10311-017-0670-y 255 | AN - WOS:000425008300010 256 | ER - 257 | 258 | TY - JOUR 259 | AU - Horta, PA 260 | AU - Riul, P 261 | AU - Amado, GM 262 | AU - Gurgel, CFD 263 | AU - Berchez, F 264 | AU - Nunes, JMD 265 | AU - Scherner, F 266 | AU - Pereira, S 267 | AU - Lotufo, T 268 | AU - Peres, L 269 | AU - Sissini, M 270 | AU - Bastos, ED 271 | AU - Rosa, J 272 | AU - Munoz, P 273 | AU - Martins, C 274 | AU - Gouvea, L 275 | AU - Carvalho, V 276 | AU - Bergstrom, E 277 | AU - Schubert, N 278 | AU - Bahia, RG 279 | AU - Rodrigues, AC 280 | AU - Rorig, L 281 | AU - Barufi, JB 282 | AU - Figueiredo, M 283 | TI - Rhodoliths in Brazil: Current knowledge and potential impacts of climate change 284 | T2 - BRAZILIAN JOURNAL OF OCEANOGRAPHY 285 | SN - 1679-8759 286 | SN - 1982-436X 287 | PY - 2016 288 | VL - 64 289 | DO - 10.1590/S1679-875920160870064sp2 290 | AN - WOS:000381509100009 291 | ER - 292 | 293 | -------------------------------------------------------------------------------- /vignettes/citesource_benchmark_testing.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Benchmark Testing" 3 | 4 | author: "" 5 | 6 | date: "`r Sys.Date()`" 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{Benchmark Testing} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | 15 | ```{r setup, include = FALSE} 16 | # This code chunk sets global options for all subsequent code chunks in the document using the `knitr` package in R. 17 | knitr::opts_chunk$set( 18 | collapse = TRUE, # Collapses output with no extra whitespace. 19 | comment = "#>", # Uses `#>` to comment out code in output. 20 | warning = FALSE, # Turns off warnings for all code chunks. 21 | fig.width = 6, # Sets default figure width to 6 inches. 22 | fig.height = 6 # Sets default figure height to 6 inches. 23 | ) 24 | ``` 25 | ## About this vignette 26 | 27 | When estimating he comprehensiveness of a search, researchers often compile a list of relevant studies and evaluate whether or not they are discovered using their search strategy. While benchmarking is an important step in testing the sensitivity of a search, this process can be very time consuming if variations of a string are being tested. 28 | 29 | This vignette will provide an example of how CiteSource can be used to speed up the process of benchmarking especially when comparing variations of search strings or search strategies. 30 | 31 | ## 1. Installation of packages and loading libraries 32 | 33 | Use the following code to install CiteSource. Currently, CiteSource lives on GitHub, so you may need to first install the remotes package. This vignette also uses functions from the ggplot2 and dplyr packages. 34 | 35 | ```{r, results = FALSE, message=FALSE, warning=FALSE} 36 | #Install the remotes packages to enable installation from GitHub 37 | #install.packages("remotes") 38 | #library(remotes) 39 | 40 | #Install CiteSource 41 | #remotes::install_github("ESHackathon/CiteSource") 42 | 43 | #Load the necessary libraries 44 | library(CiteSource) 45 | library(ggplot2) 46 | library(dplyr) 47 | ``` 48 | ## 2. Import files from multiple sources 49 | 50 | Users can import multiple .ris or .bib files into CiteSource, which the user can label with source information such as database or platform. In this case we are uploading the results from six different strings, which were applied to Web of Science. 51 | 52 | ```{r} 53 | # Import citation files from folder 54 | citation_files <- list.files(path = "benchmark_data", pattern = "\\.ris", full.names = TRUE) 55 | 56 | # Print list of citation files to console 57 | citation_files 58 | 59 | # Set the path to the directory containing the citation files 60 | file_path <- "../vignettes/benchmark_data/" 61 | 62 | # Create a tibble that contains metadata about the citation files 63 | metadata_tbl <- tibble::tribble( 64 | ~files, ~cite_sources, ~cite_labels, 65 | "Benchmarking.ris", "Benchmark", "Benchmark", 66 | "Search1_1.ris", "search1", "search", 67 | "Search2_1.ris", "search2", "search", 68 | "Search2_2.ris", "search2", "search", 69 | "Search2_3.ris", "search2", "search", 70 | "Search2_4.ris", "search2", "search", 71 | "Search3_1.ris", "search3", "search", 72 | "Search3_2.ris", "search3", "search", 73 | "Search3_3.ris", "search3", "search", 74 | "Search3_4.ris", "search3", "search", 75 | "Search3_5.ris", "search3", "search", 76 | "Search4_1.ris", "search4", "search", 77 | "Search4_2.ris", "search4", "search", 78 | "Search4_3.ris", "search4", "search", 79 | "Search5_1.ris", "search5", "search", 80 | "Search5_2.ris", "search5", "search", 81 | "Search5_3.ris", "search5", "search", 82 | "Search5_4.ris", "search5", "search", 83 | "Search5_5.ris", "search5", "search", 84 | "Search5_6.ris", "search5", "search", 85 | "Search5_7.ris", "search5", "search", 86 | "Search5_8.ris", "search5", "search", 87 | "Search6_1.ris", "search6", "search", 88 | "Search6_2.ris", "search6", "search", 89 | "Search6_3.ris", "search6", "search", 90 | "Search6_4.ris", "search6", "search", 91 | ) %>% 92 | # Append the file path to each file name in the 'files' column 93 | dplyr::mutate(files = paste0(file_path, files)) 94 | # Read in citations using metadata table 95 | citations <- read_citations(metadata = metadata_tbl) 96 | ``` 97 | ## 3. Deduplication and source information 98 | 99 | CiteSource allows users to merge duplicates while maintaining information in the cite_source metadata field. Thus, information about the origin of the records is not lost in the deduplication process. The next few steps produce the dataframes that we can use in subsequent analyses. 100 | 101 | ```{r, results = FALSE, message=FALSE, warning=FALSE} 102 | 103 | #Deduplicate citations. This yields a dataframe of all records with duplicates merged, but the originating source information maintained in a new variable called cite_source. 104 | unique_citations <- dedup_citations(citations) 105 | 106 | #Count number of unique and non-unique citations from different sources and labels 107 | n_unique <- count_unique(unique_citations) 108 | 109 | #For each unique citation, determine which sources were present 110 | source_comparison <- compare_sources(unique_citations, comp_type = "sources") 111 | 112 | #Initial upload/post internal deduplication table creation 113 | initial_counts<-record_counts(unique_citations, citations, "cite_source") 114 | record_counts_table(initial_counts) 115 | 116 | ``` 117 | 118 | ## 4. Upset plot to compare discovery of benchmarking articles 119 | 120 | An upset plot is useful for visualizing overlap across multiple sources and provides detail about the number of shared and unique records. Using this data we'll outline a few potential uses, when looking at the discovery of benchmarking articles. 121 | 122 | We have uploaded 55 benchmarking articles. Of these 55 articles we can see that all but 6 have been found across the six searches. We can see the number of benchmarking articles that were discovered by each string as well as the number of articles that were shared between searches. 123 | 124 | Looking at the first column, we see that 9 benchmarking articles were found across every search. One may hypothesize that the 140 citations that follow in the second column may have a high number of relevant articles due to the fact that they were also discovered across the 6 searches. If a researcher was interested in building a larger group of benchmarking articles, they may want to review these articles first. 125 | 126 | Looking at the plot we can see that search #5 has the largest number of results, well over 6000. Of these, 5,964 are unique to that search. We can also see that search #5 finds 3 benchmarking articles that would have otherwise not been found. While a researcher may want to ensure that they capture the highest number of benchmarking articles, the addition of ~6k articles may not be efficient when the result is only 3 benchmarking articles. Instead of including this search in their final strategy, they may consider reviewing the three articles that were found by this search and work to adjust their other searches instead. 127 | 128 | Another decision in this case may be to drop search #4 and #6 as each of these strings do not contribute uniquely to the discovery of any benchmarking articles. While the data backs up this decision, there may also be more to consider. For example, if benchmarking articles are biased for any known reason, certain search strategies may be employed with an understanding that benchmarking data may not accurately reflect their potential contribution. (e.g. benchmarking articles were gathered from previous systematic reviews that focused on a specific geographic region and the current review is global in nature). 129 | 130 | ```{r} 131 | #Generate a source comparison upset plot. 132 | plot_source_overlap_upset(source_comparison, decreasing = c(TRUE, TRUE)) 133 | 134 | ``` 135 | 136 | ## 5. Reviewing the record table 137 | This record table is helpful in reviewing which citations were found across each database as well as quickly checking to see which benchmarking articles were not found in the search. 138 | 139 | ```{r} 140 | 141 | unique_citations %>% 142 | dplyr::filter(stringr::str_detect(cite_label, "Benchmark")) %>% 143 | record_level_table(return = "DT") 144 | 145 | ``` 146 | -------------------------------------------------------------------------------- /vignettes/citesource_new_benchmark_testing.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "New Benchmark Testing" 3 | 4 | author: "" 5 | 6 | date: "`r Sys.Date()`" 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{New Benchmark Testing} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | 15 | ```{r setup, include = FALSE} 16 | # This code chunk sets global options for all subsequent code chunks in the document using the `knitr` package in R. 17 | knitr::opts_chunk$set( 18 | collapse = TRUE, # Collapses output with no extra whitespace. 19 | comment = "#>", # Uses `#>` to comment out code in output. 20 | warning = FALSE, # Turns off warnings for all code chunks. 21 | fig.width = 6, # Sets default figure width to 6 inches. 22 | fig.height = 6 # Sets default figure height to 6 inches. 23 | ) 24 | ``` 25 | ## About this vignette 26 | 27 | When estimating the comprehensiveness of a search, researchers often compile a list of relevant studies and evaluate whether or not they are found using their search strategy. While benchmarking is an important step in testing the sensitivity of a search, this process can be time consuming if variations of a string are being tested. 28 | 29 | This vignette will provide an example of how CiteSource can be used to speed up the process of benchmarking especially when comparing variations of search strings or search strategies. 30 | 31 | ## 1. Install and loading CiteSource 32 | 33 | Use the following code to install CiteSource. Currently, CiteSource lives on GitHub, so you may need to first install the remotes package. 34 | 35 | ```{r, results = FALSE, message=FALSE, warning=FALSE} 36 | #Install the remotes packages to enable installation from GitHub 37 | #install.packages("remotes") 38 | #library(remotes) 39 | 40 | #Install CiteSource 41 | #remotes::install_github("ESHackathon/CiteSource") 42 | 43 | #Load the CiteSource 44 | library(CiteSource) 45 | ``` 46 | ## 2. Import citation files 47 | 48 | Users can import multiple .ris or .bib files into CiteSource, which the user can label with source information such as database or platform. In this case we are uploading the results from five different strings, which were run in Web of Science. 49 | 50 | ```{r} 51 | # Import citation files from a folder 52 | file_path <- "../vignettes/new_benchmark_data/" 53 | citation_files <- list.files(path = file_path, pattern = "\\.ris", full.names = TRUE) 54 | 55 | # Print citation_files to double check the order in which R imported the files. 56 | citation_files 57 | ``` 58 | 59 | ## 3. Assign custom metadata 60 | In this example the benchmark file takes an NA for cite_source while the search files are tagged with search 1, search 2, etc. The cite_label for these files is tagged as search, while the benchmark is used for the benchmark files. In further vignettes you will see how the label can be used for post screening and citations that were included in the final synthesis. 61 | ```{r} 62 | # Create a tibble that contains metadata about the citation files 63 | imported_tbl <- tibble::tribble( 64 | ~files, ~cite_sources, ~cite_labels, 65 | "benchmark_15.ris", "benchmark", "search", 66 | "search1_166.ris", "search 1", "search", 67 | "search2_278.ris", "search 2", "search", 68 | "search3_302.ris", "search 3", "search", 69 | "search4_460.ris", "search 4", "search", 70 | "search5_495.ris", "search 5", "search" 71 | ) %>% 72 | # Append the file path to each file name in the 'files' column 73 | dplyr::mutate(files = paste0(file_path, files)) 74 | 75 | # Save the imported citations as raw_citations 76 | raw_citations <- read_citations(metadata = imported_tbl, verbose = FALSE) 77 | ``` 78 | ## 4. Deduplicate & create data tables 79 | 80 | CiteSource allows users to merge duplicates while maintaining information in the cite_source metadata field. Thus, information about the origin of the records is not lost in the deduplication process. The next few steps produce the dataframes that are used in subsequent plots and tables. 81 | 82 | ```{r} 83 | #Deduplicating yields a dataframe. Each record has a duplicate_id which is unique, 84 | #citations that were duplicated will retain each duplicate_id in the record_ids column. 85 | #source and label tags from each duplicate will be merged as a list in their respective columns. 86 | unique_citations <- dedup_citations(raw_citations) 87 | 88 | #Count_unique yields another dataframe which is used in creation of plots and tables 89 | n_unique <- count_unique(unique_citations) 90 | 91 | #For each unique citation, determine which sources were present 92 | source_comparison <- compare_sources(unique_citations, comp_type = "sources") 93 | ``` 94 | 95 | ## 5. Review internal duplication 96 | 97 | Once we have imported, added custom metadata, and identified duplicates, it can be helpful to review the initial record count data to ensure everything looks okay. As a part of the deduplication process, duplicate records may have been identified within sources. The initial record table will provide you with a count of how many records were initially in each source file, and the count of distinct records that will vary if there were any duplicates identified within the source file. 98 | 99 | ```{r} 100 | #Initial upload/post internal deduplication table creation 101 | initial_records_search <- calculate_initial_records(unique_citations) 102 | initial_record_table_search <- create_initial_record_table(initial_records_search) 103 | initial_record_table_search 104 | 105 | ``` 106 | 107 | ## 6. Compare overlapp with an upset plot 108 | 109 | An upset plot is useful for visualizing overlap across multiple sources and provides detail about the number of shared and unique records. Using this data we'll outline a few potential uses when benchmarking testing a search. 110 | 111 | We have uploaded 15 benchmarking articles. Of these 15 articles, the upset plot shows us that all but 4 were found across the five searches. We can see the number of benchmarking articles that were discovered by each string as well as the number of articles that were shared between searches. 112 | 113 | ```{r, fig.alt="An upset plot visualizing the overlap of benchmarking articles found across five search strategies. The plot highlights that nine articles were identified by all five searches, while four benchmarking articles were missed entirely. Additional columns show the number of articles shared across different combinations of search strategies."} 114 | #Generate a source comparison upset plot. 115 | plot_source_overlap_upset(source_comparison, decreasing = c(TRUE, TRUE)) 116 | ``` 117 | 118 | Looking at the first column, we see that 9 benchmarking articles were found across every search. One may hypothesize that the 157 citations that follow in the second column may have a high number of relevant articles due to the fact that they were also discovered across the five searches. If a researcher was interested in building a larger group of benchmarking articles, they may want to review these articles first. 119 | 120 | Looking at the plot we can see that search #4 and #5 have the largest number of results, at close to 500 each. Of these, 180 are unique to those two searches. We can also see that search #4 and #5 found two of the benchmarking articles that would not have been found otherwise. While a researcher may want to ensure that they capture the highest number of benchmarking articles, the addition of close to 200 articles more than search #2 and #3 may not be efficient when the result is only 2 benchmarking articles. Instead of including this search in their final strategy, they may consider reviewing the two articles that were found by this search and work to adjust their other searches instead. 121 | 122 | Another decision in this case may be to drop search #2 and #3 as each of these strings do not contribute uniquely to the discovery of any benchmarking articles. While the data backs up this decision, there may also be more to consider. For example, if benchmarking articles are biased for any known reason, certain search strategies may be employed with an understanding that benchmarking data may not accurately reflect their potential contribution. (e.g. benchmarking articles were gathered from previous systematic reviews that focused on a specific geographic region and the current review is global in nature). 123 | 124 | Finally, as we'll see in the next step, we can examine closely the four articles that weren't found in any search approach. This will help us adjust our search to better capture relevant studies. 125 | 126 | ## 7. Compare overlapp with a record level table 127 | This record table is helpful in reviewing which citations were found across each database as well as quickly checking to see which benchmarking articles were not found in the search. 128 | 129 | ```{r} 130 | unique_citations %>% 131 | dplyr::filter(stringr::str_detect(cite_source, "benchmark")) %>% 132 | record_level_table(return = "DT") 133 | ``` 134 | 135 | ## 8. Exporting for further analysis 136 | 137 | We may want to export the deduplicated set of results for further analysis. CiteSource offers a set of export functions called `export_csv`, `export_ris` and `export_bib` that will save dataframes as a .csv file, .ris file or .bib file, respectively. 138 | 139 | You can then reimport .csv and .ris files to pick up a project or analysis without having to start from scratch, or after making manual adjustments (such as adding missing abstract data) to a file. 140 | 141 | 142 | ### Generate a .csv file 143 | The separate argument can be used to create separate columns for cite_source, cite_label or cite_string to facilitate analysis. CSV files can be reimported into CiteSource in order to recreate all plots and tables. 144 | 145 | ```{r} 146 | #export_csv(unique_citations, filename = "citesource_export.csv") 147 | ``` 148 | 149 | ### Generate a .ris file 150 | Generate and .ris and indicate custom field location for cite_source, cite_label or cite_string. In this example, we'll be using EndNote, so we put cite_sources in the DB field, which will appear as the "Name of Database" field in EndNote and cite_labels into C5, which will appear as the "Custom 5" metadata field in EndNote. 151 | 152 | ```{r} 153 | #export_ris(unique_citations, filename = "citesource_export.ris", source_field = "DB", label_field = "C5") 154 | ``` 155 | 156 | ### Generate a bibtex file 157 | Generate a bibtex file and include data from cite_source, cite_label or cite_string. 158 | 159 | ```{r} 160 | #export_bib(unique_citations, filename = "citesource_export.bib", include = c("sources", "labels", "strings")) 161 | ``` 162 | 163 | ### Re-importing CiteSource a CiteSource exported file 164 | In order to reimport a .csv or a .ris you can use the following. Here is an example of how you would re-import the file if it were on your desktop. 165 | 166 | When reimporting you can simply name the data 'unique_citations' as this is the primary dataframe that all other functions use. See the following example. 167 | 168 | ```{r} 169 | #reimpor the citaitons and name 'unique_citations' 170 | #unique_citations <-reimport_csv("citesource_export.csv") 171 | 172 | #create other datasets required for visualizations 173 | #n_unique <- count_unique(unique_citations) 174 | #source_comparison <- compare_sources(unique_citations, comp_type = "sources") 175 | 176 | 177 | #citesource_working_example <-reimport_ris("citesource_export.ris") 178 | 179 | ``` 180 | 181 | -------------------------------------------------------------------------------- /vignettes/new_benchmark_data/benchmark_15.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | TI - A systematic review of the impact of wildfires on sleep disturbances 3 | AU - Isaac, Fadia 4 | AU - Toukhsati, Samia R 5 | AU - Di Benedetto, Mirella 6 | AU - Kennedy, Gerard A 7 | T2 - International journal of environmental research and public health 8 | DA - 2021/// 9 | PY - 2021 10 | VL - 18 11 | IS - 19 12 | SP - 10152 13 | ER - 14 | 15 | TY - JOUR 16 | TI - A systematic review of the physical health impacts from non-occupational exposure to wildfire smoke 17 | AU - Liu, Jia C 18 | AU - Pereira, Gavin 19 | AU - Uhl, Sarah A 20 | AU - Bravo, Mercedes A 21 | AU - Bell, Michelle L 22 | T2 - Environmental research 23 | DA - 2015/// 24 | PY - 2015 25 | VL - 136 26 | SP - 120 27 | EP - 132 28 | ER - 29 | 30 | TY - JOUR 31 | TI - Long-term impacts of non-occupational wildfire exposure on human health: A systematic review 32 | AU - Gao, Yuan 33 | AU - Huang, Wenzhong 34 | AU - Yu, Pei 35 | AU - Xu, Rongbin 36 | AU - Yang, Zhengyu 37 | AU - Gasevic, Danijela 38 | AU - Ye, Tingting 39 | AU - Guo, Yuming 40 | AU - Li, Shanshan 41 | T2 - Environmental pollution 42 | DA - 2023/// 43 | PY - 2023 44 | VL - 320 45 | SP - 121041 46 | ER - 47 | 48 | TY - JOUR 49 | TI - How to measure the economic health cost of wildfires–A systematic review of the literature for northern America 50 | AU - Dittrich, Ruth 51 | AU - McCallum, Stuart 52 | T2 - International journal of wildland fire 53 | DA - 2020/// 54 | PY - 2020 55 | VL - 29 56 | IS - 11 57 | SP - 961 58 | EP - 973 59 | ER - 60 | 61 | TY - JOUR 62 | TI - Critical review of health impacts of wildfire smoke exposure 63 | AU - Reid, Colleen E 64 | AU - Brauer, Michael 65 | AU - Johnston, Fay H 66 | AU - Jerrett, Michael 67 | AU - Balmes, John R 68 | AU - Elliott, Catherine T 69 | T2 - Environmental health perspectives 70 | DA - 2016/// 71 | PY - 2016 72 | VL - 124 73 | IS - 9 74 | SP - 1334 75 | EP - 1343 76 | ER - 77 | 78 | TY - JOUR 79 | TI - Fire and rain: A systematic review of the impacts of wildfire and associated runoff on aquatic fauna 80 | AU - Gomez Isaza, Daniel F 81 | AU - Cramp, Rebecca L 82 | AU - Franklin, Craig E 83 | T2 - Global Change Biology 84 | DA - 2022/// 85 | PY - 2022 86 | VL - 28 87 | IS - 8 88 | SP - 2578 89 | EP - 2595 90 | ER - 91 | 92 | TY - JOUR 93 | TI - Wildfire exposure during pregnancy and the risk of adverse birth outcomes: a systematic review 94 | AU - Amjad, Sana 95 | AU - Chojecki, Dagmara 96 | AU - Osornio-Vargas, Alvaro 97 | AU - Ospina, Maria B 98 | T2 - Environment International 99 | DA - 2021/// 100 | PY - 2021 101 | VL - 156 102 | SP - 106644 103 | ER - 104 | 105 | TY - JOUR 106 | TI - Continent-based systematic review of the short-term health impacts of wildfire emissions 107 | AU - Barros, Bela 108 | AU - Oliveira, Marta 109 | AU - Morais, Simone 110 | T2 - Journal of Toxicology and Environmental Health, Part B 111 | DA - 2023/// 112 | PY - 2023 113 | VL - 26 114 | IS - 7 115 | SP - 387 116 | EP - 415 117 | ER - 118 | 119 | TY - JOUR 120 | TI - Health impacts of wildfire smoke on children and adolescents: a systematic review and meta-analysis 121 | AU - Zhang, Yiwen 122 | AU - Tingting, Ye 123 | AU - Huang, Wenzhong 124 | AU - Yu, Pei 125 | AU - Chen, Gongbo 126 | AU - Xu, Rongbin 127 | AU - Song, Jiangning 128 | AU - Guo, Yuming 129 | AU - Li, Shanshan 130 | T2 - Current Environmental Health Reports 131 | DA - 2024/// 132 | PY - 2024 133 | VL - 11 134 | IS - 1 135 | SP - 46 136 | EP - 60 137 | ER - 138 | 139 | TY - JOUR 140 | TI - A systematic review of relationships between mountain wildfire and ecosystem services 141 | AU - Vukomanovic, Jelena 142 | AU - Steelman, Toddi 143 | T2 - Landscape Ecology 144 | DA - 2019/// 145 | PY - 2019 146 | VL - 34 147 | SP - 1179 148 | EP - 1194 149 | ER - 150 | 151 | TY - JOUR 152 | TI - A review of the effects of wildfire smoke on the health and behavior of wildlife 153 | AU - Sanderfoot, Olivia V 154 | AU - Bassing, Sarah B 155 | AU - Brusa, Jamie L 156 | AU - Emmet, Robert L 157 | AU - Gillman, Sierra J 158 | AU - Swift, K 159 | AU - Gardner, Beth 160 | T2 - Environmental Research Letters 161 | DA - 2022/// 162 | PY - 2022 163 | VL - 16 164 | IS - 12 165 | SP - 123003 166 | ER - 167 | 168 | TY - JOUR 169 | TI - Estimating wildfire suppression costs: a systematic review 170 | AU - Mattioli, W 171 | AU - Ferrara, C 172 | AU - Lombardo, E 173 | AU - Barbati, Anna 174 | AU - Salvati, L 175 | AU - Tomao, A 176 | T2 - International Forestry Review 177 | DA - 2022/// 178 | PY - 2022 179 | VL - 24 180 | IS - 1 181 | SP - 15 182 | EP - 29 183 | ER - 184 | 185 | TY - JOUR 186 | TI - The economic cost of adverse health effects from wildfire-smoke exposure: a review 187 | AU - Kochi, Ikuho 188 | AU - Donovan, Geoffrey H 189 | AU - Champ, Patricia A 190 | AU - Loomis, John B 191 | T2 - International Journal of Wildland Fire 192 | DA - 2010/// 193 | PY - 2010 194 | VL - 19 195 | IS - 7 196 | SP - 803 197 | EP - 817 198 | ER - 199 | 200 | TY - JOUR 201 | TI - Economic Analysis of Wildfire Impacts to Water Quality: a Review 202 | AU - Wibbenmeyer, Matthew 203 | AU - Sloggy, Matthew R 204 | AU - Sánchez, José J 205 | T2 - Journal of Forestry 206 | DA - 2023/// 207 | PY - 2023 208 | VL - 121 209 | IS - 4 210 | SP - 374 211 | EP - 382 212 | ER - 213 | 214 | TY - JOUR 215 | TI - A systematic review of the health impacts of occupational exposure to wildland fires 216 | AU - Groot, Emily 217 | AU - Caturay, Alexa 218 | AU - Khan, Yasmin 219 | AU - Copes, Ray 220 | T2 - International journal of occupational medicine and environmental health 221 | DA - 2019/// 222 | PY - 2019 223 | VL - 32 224 | IS - 2 225 | SP - 121 226 | EP - 140 227 | ER - 228 | 229 | -------------------------------------------------------------------------------- /vignettes/new_stage_data/econlit_3.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | AU - Mueller, Julie M. 3 | AU - Loomis, John B. 4 | AD - Northern AZ U 5 | AD - CO State U 6 | T1 - Spatial Dependence in Hedonic Property Models: Do Different Corrections for Spatial Dependence Result in Economically Significant Differences in Estimated Implicit Prices? 7 | JO - Journal of Agricultural and Resource Economics 8 | JF - Journal of Agricultural and Resource Economics 9 | Y1 - 2008/08// 10 | VL - 33 11 | IS - 2 12 | SP - 212 13 | EP - 231 14 | SN - 10685502 15 | N1 - Accession Number: 0992879; Keywords: Hedonic; Spatial; Geographic Descriptors: U.S.; Geographic Region: Northern America; Publication Type: Journal Article; Update Code: 20080901 16 | N2 - While data used in hedonic property models are inherently spatial in nature, to date the majority of past regression analyses have used OLS models that overlook possible spatial dependence in the data when estimating implicit prices for environmental hazards. This paper explicitly addresses spatial dependence in a hedonic property model. We use robust testing procedures to determine the existence and type of spatial dependence in our OLS Model. After identifying the nature of the spatial dependence, OLS estimates of the implicit price of wildfire risk are compared to implicit prices obtained using a spatial error model with three different spatial weighting matrices. Spatially corrected estimates of implicit prices are found to be nearly the same as those obtained using OLS. Our results indicate that the inefficiency of OLS in the presence of spatially correlated errors may not always be economically significant, suggesting nonspatial hedonic property models may provide results useful for policy analysis, and spatial and nonspatial hedonic property models might be pooled in meta-analysis. 17 | KW - Valuation of Environmental Effects Q51 18 | KW - Urban, Rural, Regional, Real Estate, and Transportation Economics: Housing Demand R21 19 | KW - Housing Supply and Markets R31 20 | UR - https://search.ebscohost.com/login.aspx?direct=true&AuthType=ip,sso&db=ecn&AN=0992879&site=ehost-live&scope=site&custid=s8368349 21 | DP - EBSCOhost 22 | DB - ecn 23 | ER - 24 | 25 | TY - THES 26 | AU - Ma, Qiuhua 27 | T1 - Systematic Investigation of the Effect of Wildfire Events and Risks on Property Values 28 | PB - University of New Mexico 29 | Y1 - 2017/05// 30 | N1 - Accession Number: 1678288; Geographic Descriptors: U.S.; Geographic Region: Northern America; Publication Type: Dissertation; Update Code: 20171201 31 | N2 - Wildfires frequency and severity have been increasing in the western United States over the past few decades. Previous hedonic studies investigating wildfire examined the effect of occurrence and risk independently, overlooking the potentially confounding influence. Further, these studies find mixed and/or inconsistent results, which are complicated by a variety of data availability issues as well as varied econometric modeling decisions made by analysts. This analysis applies spatial econometrics modeling strategies in a hedonic pricing model framework to examine the joint effect of both past fire occurrence and current risk on property values in Santa Fe County, New Mexico. Overall, variations in data and econometric modeling techniques yield 2,000 regression results for hedonic models. Using hedonic results as primary estimates, I then employ an internal meta-analysis to investigate what factors explain variation in wildfire effects. 32 | M1 - Ph.D. 33 | KW - Renewable Resources and Conservation: Forestry Q23 34 | KW - Climate; Natural Disasters and Their Management; Global Warming Q54 35 | KW - Ecological Economics: Ecosystem Services; Biodiversity Conservation; Bioeconomics; Industrial Ecology Q57 36 | UR - https://search.ebscohost.com/login.aspx?direct=true&AuthType=ip,sso&db=ecn&AN=1678288&site=ehost-live&scope=site&custid=s8368349 37 | DP - EBSCOhost 38 | DB - ecn 39 | ER - 40 | 41 | TY - THES 42 | AU - Prante, Tyler 43 | T1 - Developing Social Capital as a Response to the Wildfire Problem 44 | PB - University of New Mexico 45 | Y1 - 2008/08// 46 | N1 - Accession Number: 1075662; Keywords: Natural Resource; Revealed Preference; Social Capital; Geographic Descriptors: U.S.; Geographic Region: Northern America; Publication Type: Dissertation; Update Code: 20091201 47 | N2 - This research analyzes the use of natural resource management techniques that attempt to build social capital. As an application, a primary focus is on the policy response to wildfire. The principal research is segmented into 3 related chapters. First, meta-analysis is used to statistically summarize the literature of Coasean bargaining experiments. Results from this analysis suggest that the social dimensions characterizing an experiment impact bargaining behavior on a magnitude similar to that of variables that describe transaction costs. This result suggests an increased focus in future experimental work on the impact of social dimension characteristics is warranted. Next, revealed preference analysis is applied to a unique forest management program in New Mexico. The Collaborative Forest Restoration Program includes stakeholders actively in the creation and implantation of management on federal forested lands. Statistical analysis of the funding pattern from the program reveals that developing social capital and maintaining forest health stand out among a set of potentially divergent goals, while implementing projects that span multiple land jurisdictions is argued to be a missed opportunity for developing social capital. Finally, experimental economics is used to analyze the effectiveness of a set of potential policy tools to induce private spending on wildfire risk mitigation. The policy tools considered here are shown to have the opposite impact, reducing private spending. When success is assessed alternatively by the probability of engaging in risk mitigation, these policy tools have the desired effect. Given the potential to crowd out private expenditure and the sensitivity to the specific mitigation objective, the implementation of these policy tools takes on greater significance. 48 | M1 - Ph.D. 49 | KW - Ecological Economics: Ecosystem Services; Biodiversity Conservation; Bioeconomics; Industrial Ecology Q57 50 | KW - Economic Sociology; Economic Anthropology; Language; Social and Economic Stratification Z13 51 | UR - https://search.ebscohost.com/login.aspx?direct=true&AuthType=ip,sso&db=ecn&AN=1075662&site=ehost-live&scope=site&custid=s8368349 52 | DP - EBSCOhost 53 | DB - ecn 54 | ER - 55 | -------------------------------------------------------------------------------- /vignettes/topic_data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/vignettes/topic_data/.DS_Store -------------------------------------------------------------------------------- /vignettes/valid_data/benchmark.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | TI - Marital status and problem gambling among Australian older adults: The mediating role of loneliness 3 | AU - Botterill, Emma 4 | AU - Gill, Peter Richard 5 | AU - McLaren, Suzanne 6 | AU - Gomez, Rapson 7 | T2 - Journal of gambling studies 8 | DA - 2016/// 9 | PY - 2016 10 | DO - 10.1007/s10899-015-9575-5 11 | DP - Google Scholar 12 | VL - 32 13 | IS - 3 14 | SP - 1027 15 | EP - 1038 16 | ST - Marital status and problem gambling among Australian older adults 17 | L2 - https://link.springer.com/article/10.1007/s10899-015-9575-5 18 | ER - 19 | 20 | TY - JOUR 21 | TI - Worsened Anxiety and Loneliness Influenced Gaming and Gambling during the COVID-19 Pandemic 22 | AU - Mohamed, Mohamed S. 23 | AU - Rukh, Gull 24 | AU - Schiöth, Helgi B. 25 | AU - Vadlin, Sofia 26 | AU - Olofsdotter, Susanne 27 | AU - Åslund, Cecilia 28 | AU - Nilsson, Kent W. 29 | T2 - Journal of Clinical Medicine 30 | DA - 2022/// 31 | PY - 2022 32 | DO - 10.3390/jcm12010249 33 | DP - Google Scholar 34 | VL - 12 35 | IS - 1 36 | SP - 249 37 | L2 - https://www.mdpi.com/2077-0383/12/1/249 38 | ER - 39 | 40 | TY - JOUR 41 | TI - At-risk and problem gambling among Finnish youth: The examination of risky alcohol consumption, tobacco smoking, mental health and loneliness as gender-specific correlates 42 | AU - Edgren, Robert 43 | AU - Castrén, Sari 44 | AU - Jokela, Markus 45 | AU - Salonen, Anne H. 46 | T2 - Nordic Studies on Alcohol and Drugs 47 | DA - 2016/// 48 | PY - 2016 49 | DO - 10.1515/nsad-2016-0005 50 | DP - Google Scholar 51 | VL - 33 52 | IS - 1 53 | SP - 61 54 | EP - 80 55 | ST - At-risk and problem gambling among Finnish youth 56 | L2 - https://journals.sagepub.com/doi/abs/10.1515/nsad-2016-0005 57 | ER - 58 | 59 | TY - JOUR 60 | TI - Loneliness and online gambling-community participation of young social media users 61 | AU - Sirola, Anu 62 | AU - Kaakinen, Markus 63 | AU - Savolainen, Iina 64 | AU - Oksanen, Atte 65 | T2 - Computers in Human Behavior 66 | DA - 2019/// 67 | PY - 2019 68 | DO - 10.1016/j.chb.2019.01.023 69 | DP - Google Scholar 70 | VL - 95 71 | SP - 136 72 | EP - 145 73 | L2 - https://www.sciencedirect.com/science/article/pii/S0747563219300287?casa_token=hXPqVjd7zHEAAAAA:Ve8c7FOVsC2BMSTDDimo9VHCZVS4GpxE9Rwke3MWYOMd56JYckpztNuD_D3fML_2_qufhD_NNg 74 | ER - 75 | 76 | TY - JOUR 77 | TI - The role of loneliness and self-control in predicting problem gambling behaviour 78 | AU - McQuade, Anne 79 | AU - Gill, Peter 80 | T2 - Gambling Research: Journal of the National Association for Gambling Studies (Australia) 81 | DA - 2012/// 82 | PY - 2012 83 | DP - Google Scholar 84 | VL - 24 85 | IS - 1 86 | SP - 18 87 | EP - 30 88 | L2 - https://search.informit.org/doi/abs/10.3316/INFORMIT.859155913483693 89 | KW - ⛔ No DOI found 90 | ER - 91 | 92 | TY - JOUR 93 | TI - The role of loneliness, negative affectivity, mentalization, and alcohol use in adolescent gambling 94 | AU - Cosenza, Marina 95 | AU - Ciccarelli, Maria 96 | AU - Pizzini, Barbara 97 | AU - Griffiths, Mark David 98 | AU - Sacco, Mariagiulia 99 | AU - Nigro, Giovanna 100 | T2 - Mediterranean Journal of Clinical Psychology 101 | DA - 2022/// 102 | PY - 2022 103 | DP - Google Scholar 104 | VL - 10 105 | IS - 3 106 | KW - ⛔ No DOI found 107 | ER - 108 | 109 | TY - JOUR 110 | TI - Loneliness and life dissatisfaction in gamblers 111 | AU - Porter, James 112 | AU - Ungar, Julia 113 | AU - Frisch, G. Ron 114 | AU - Chopra, Reena 115 | T2 - Journal of Gambling issues 116 | DA - 2004/// 117 | PY - 2004 118 | DO - 10.4309/jgi.2004.11.13 119 | DP - Google Scholar 120 | IS - 11 121 | ER - 122 | 123 | TY - JOUR 124 | TI - Interaction of the loneliness phenomenon and gambling addiction 125 | AU - Akbieva, Zarema Soltamuradovna 126 | AU - Damadaeva, Angela Strgeevna 127 | AU - Magomedova, Sabirat Magomedovna 128 | AU - Ibragimova, Khava Shakhbanovna 129 | T2 - Research Journal of Pharmaceutical, Biological and Chemical Sciences 130 | DA - 2016/// 131 | PY - 2016 132 | DP - Google Scholar 133 | VL - 7 134 | IS - 5 135 | SP - 1109 136 | EP - 1117 137 | L2 - https://elibrary.ru/item.asp?id=27579286 138 | KW - ⛔ No DOI found 139 | ER - 140 | 141 | TY - CONF 142 | TI - Young, stressed, and blind-minded: An experimental investigation of the routes from loneliness to adolescent problem gambling 143 | AU - Ciccarelli, Maria 144 | AU - Nigro, Giovanna 145 | AU - Pizzini, Barbara 146 | AU - Sacco, Mariagiulia 147 | AU - Griffiths, Mark 148 | AU - Cosenza, Marina 149 | C3 - JOURNAL OF BEHAVIORAL ADDICTIONS 150 | DA - 2022/// 151 | PY - 2022 152 | DP - Google Scholar 153 | VL - 11 154 | SP - 281 155 | EP - 282 156 | PB - AKADEMIAI KIADO ZRT BUDAFOKI UT 187-189-A-3, H-1117 BUDAPEST, HUNGARY 157 | ST - Young, stressed, and blind-minded 158 | KW - ⛔ No DOI found 159 | ER - 160 | 161 | TY - CONF 162 | TI - The impact of social isolation and loneliness in a problem gambling population 163 | AU - Spence-Thomas, R. 164 | AU - Thomas, T. 165 | C3 - Australian Journal of Psychology 166 | DA - 2003/// 167 | PY - 2003 168 | DP - Google Scholar 169 | VL - 55 170 | SP - 213 171 | EP - 213 172 | PB - AUSTRALIAN PSYCHOLOGICAL SOC 1 GRATTAN STREET, CARLTON, VICTORIA 3053, AUSTRALIA 173 | KW - ⛔ No DOI found 174 | ER - 175 | 176 | TY - JOUR 177 | TI - Dealing with the negative consequences of gambling addiction 178 | AU - Blanco Miguel, Pilar 179 | T2 - CUADERNOS DE TRABAJO SOCIAL 180 | DA - 2016/// 181 | PY - 2016 182 | DO - 10.5209/cuts.48858 183 | DP - Google Scholar 184 | VL - 29 185 | IS - 2 186 | SP - 335 187 | EP - 344 188 | ER - 189 | 190 | TY - JOUR 191 | TI - Gambling alone? A study of solitary and social gambling in America 192 | AU - Bernhard, Bo J. 193 | AU - Dickens, David R. 194 | AU - Shapiro, Paul D. 195 | T2 - UNLV Gaming Research & Review Journal 196 | AB - In his acclaimed 2000 book Bowling Alone, Robert Putnam documents a disturbing social trend of the broadest kind. Putnam cites a wide variety of data that indicate that over the past fifty years, Americans have become increasingly socially disengaged. In developing this theme, Putnam specifically cites the increase in casino gambling (and especially machine gambling) as evidence in support of his argument. Building on the empirical and theoretical work of Putnam, this exploratory article examines the sub-phenomenon of 'gambling alone' by exploring sample survey data on solitary and social gambling behavior among adults who reside in Las Vegas, Nevada. Specifically, to further understand these phenomena, a number of demographic, attitudinal, and behavioral variables are examined for their explanatory power in predicting solitary vs. social gambling behavior. (PsycINFO Database Record (c) 2016 APA, all rights reserved) 197 | DA - 2007/// 198 | PY - 2007 199 | DP - EBSCOhost 200 | VL - 11 201 | IS - 2 202 | SP - 1 203 | EP - 13 204 | J2 - UNLV Gaming Research & Review Journal 205 | SN - 1531-0930 206 | ST - Gambling alone? 207 | UR - https://search.ebscohost.com/login.aspx?direct=true&db=psyh&AN=2007-17475-001&site=ehost-live&scope=site 208 | AN - 2007-17475-001 209 | Y2 - 2023/01/23/21:42:58 210 | KW - ⛔ No DOI found 211 | KW - Gambling 212 | KW - gambling alone 213 | KW - Loneliness 214 | KW - Social Behavior 215 | KW - social gambling 216 | KW - solitary 217 | ER - 218 | 219 | TY - JOUR 220 | TI - The association between loneliness, social isolation and women's electronic gaming machine gambling 221 | AU - Trevorrow, Karen 222 | AU - Moore, Susan 223 | T2 - Journal of Gambling Studies 224 | AB - Examined the extent to which motivations for women gambling were boredom, loneliness, and isolation. Research was conducted with a comparative study of female electronic gaming machine (EGM) gamblers and non-gamblers, matched for age, education, and geographic location. Women EGM gamblers did not differ significantly from the non-gambling sample in terms of loneliness, friendship satisfaction, or adequacy of social networks. However, a sub-group classified by the South Oaks Gambling Screen as problem gamblers were significantly more lonely than the rest of the sample. These women were also more likely to be involved in social networks where gambling was normative. Limitation of the research and need for a longitudinal study are discussed. (PsycINFO Database Record (c) 2019 APA, all rights reserved) 225 | DA - 1998/// 226 | PY - 1998 227 | DO - 10.1023/a:1022057609568 228 | DP - EBSCOhost 229 | VL - 14 230 | IS - 3 231 | SP - 263 232 | EP - 284 233 | J2 - Journal of Gambling Studies 234 | SN - 1050-5350 235 | AN - 1999-03840-004 236 | L2 - https://search.ebscohost.com/login.aspx?direct=true&db=psyh&AN=1999-03840-004&site=ehost-live&scope=site&scope=cite 237 | KW - (18–77 yrs old) women 238 | KW - Boredom 239 | KW - boredom vs loneliness vs isolation 240 | KW - Digital Gaming 241 | KW - Gambling 242 | KW - Loneliness 243 | KW - Motivation 244 | KW - motivation to gamble 245 | KW - Social Isolation 246 | ER - 247 | 248 | -------------------------------------------------------------------------------- /vignettes/working_example_data/AGRIS.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | AB - NA 3 | AU - Ahmad, S. S. 4 | AU - Husain, S. Z. 5 | KW - species 6 | drug plants 7 | traditional uses 8 | surveys 9 | pakistan 10 | saline soils 11 | traditional medicines 12 | ethnobotany 13 | N1 - (Fatima Jinnah Women Univ., Rawalpindi (Pakistan). Dept. of Environmental Sciences) 14 | PY - 2008 15 | ST - Ethno medicinal survey of plants from salt range (Kallar Kahar) of Pakistan 16 | TI - Ethno medicinal survey of plants from salt range (Kallar Kahar) of Pakistan 17 | VL - v. 40 18 | ID - 4202 19 | ER - 20 | 21 | TY - JOUR 22 | AB - NA 23 | AU - Awotide, Olawale D. 24 | AU - Kehinde, Adetunji L. 25 | AU - Agbola, Peter O. 26 | KW - equations 27 | family resource management 28 | livelihood 29 | rural sociology 30 | mathematical models 31 | regression analysis 32 | sociodemographic characteristics 33 | rural poverty 34 | low income households 35 | farm income 36 | sport hunting 37 | LA - English 38 | PY - 2010 39 | ST - Poverty and rural livelihood diversification among farming households in southwest Nigeria 40 | TI - Poverty and rural livelihood diversification among farming households in southwest Nigeria 41 | VL - v. 8 42 | ID - 4203 43 | ER - 44 | 45 | TY - JOUR 46 | AB - NA 47 | AU - da Silva Dias, João Carlos 48 | KW - landraces 49 | germplasm conservation 50 | food production 51 | breeders' rights 52 | genetic erosion 53 | cultivars 54 | plant breeding 55 | genetic improvement 56 | hybrids 57 | vegetables 58 | open pollination 59 | crop yield 60 | artificial selection 61 | genetic resistance 62 | disease resistance 63 | vegetable crops 64 | plant variety protection 65 | food security 66 | poverty 67 | LA - English 68 | PY - 2010 69 | ST - Impact of improved vegetable cultivars in overcoming food insecurity 70 | TI - Impact of improved vegetable cultivars in overcoming food insecurity 71 | VL - v. v. 176 72 | ID - 4204 73 | ER - 74 | 75 | TY - JOUR 76 | AB - NA 77 | AU - Dahmardeh, Majid 78 | AU - Dahmardeh, Mehdi 79 | AU - Yazdani, Saeed 80 | AU - Piri, Eissa 81 | KW - wetlands 82 | water resources 83 | water supply 84 | lakes 85 | socioeconomic status 86 | LA - English 87 | PY - 2009 88 | ST - socio-economic effects of Hamoon Lake in Sistan region of Iran 89 | TI - socio-economic effects of Hamoon Lake in Sistan region of Iran 90 | VL - v. 7 91 | ID - 4205 92 | ER - 93 | 94 | TY - JOUR 95 | AB - NA 96 | AU - Guei, Robert G. 97 | AU - Barra, Abdoulaye 98 | AU - Silue, Drissa 99 | KW - financial institutions 100 | Food and Agriculture Organization 101 | funding 102 | seed quality 103 | business enterprises 104 | plant cultural practices 105 | corn 106 | farmers 107 | rice 108 | farm income 109 | crop production 110 | millets 111 | seed certification 112 | yields 113 | food security 114 | poverty 115 | grain sorghum 116 | LA - English 117 | PY - 2011 118 | ST - Promoting smallholder seed enterprises: quality seed production of rice, maize, sorghum and millet in northern Cameroon 119 | TI - Promoting smallholder seed enterprises: quality seed production of rice, maize, sorghum and millet in northern Cameroon 120 | VL - v. 9 121 | ID - 4206 122 | ER - 123 | 124 | TY - JOUR 125 | AB - NA 126 | AU - Leisher, C. 127 | AU - Nature Conservancy, Carlton Victoria Australia Program eng 128 | AU - Van Beukering, P. 129 | AU - Scherl, L. M. 130 | KW - PROTECTED AREAS 131 | MARINE AREAS 132 | AUTONOMISATION 133 | COMMUNITY INVOLVEMENT 134 | ZONE PROTEGEE 135 | DEVELOPPEMENT DE LA COMMUNAUTE 136 | PARTICIPACION COMUNITARIA 137 | DEVELOPPEMENT RURAL 138 | CONSERVACION DE LA NATURALEZA 139 | ETUDE DE CAS 140 | CASE STUDIES 141 | DEVELOPMENT PROJECTS 142 | ZONAS PROTEGIDAS 143 | RURAL DEVELOPMENT 144 | EMPOWERMENT 145 | HOGARES 146 | NATURE CONSERVATION 147 | households 148 | AUTONOMIZACION 149 | DESARROLLO DE LA COMUNIDAD 150 | PROJET DE DEVELOPPEMENT 151 | POBREZA 152 | POVERTY 153 | PROYECTOS DE DESARROLLO 154 | ESTUDIOS DE CASOS PRACTICOS 155 | DESARROLLO RURAL 156 | PAUVRETE 157 | REGION MARINE 158 | PARTICIPATION COMMUNAUTAIRE 159 | COMMUNITY DEVELOPMENT 160 | MENAGE 161 | ZONAS MARINAS 162 | CONSERVATION DE LA NATURE 163 | N1 - (Australia) 164 | PY - 2007 165 | ST - Nature's investment bank: how marine protected areas contributed to poverty reduction 166 | TI - Nature's investment bank: how marine protected areas contributed to poverty reduction 167 | ID - 4207 168 | ER - 169 | 170 | TY - JOUR 171 | AB - NA 172 | AU - Leisher, C. 173 | AU - Nature Conservancy, Carlton Victoria Australia Program eng 174 | AU - Van Beukering, P. 175 | AU - Scherl, L. M. 176 | KW - PROTECTED AREAS 177 | MARINE AREAS 178 | AUTONOMISATION 179 | COMMUNITY INVOLVEMENT 180 | ZONE PROTEGEE 181 | DEVELOPPEMENT DE LA COMMUNAUTE 182 | PARTICIPACION COMUNITARIA 183 | DEVELOPPEMENT RURAL 184 | CONSERVACION DE LA NATURALEZA 185 | ETUDE DE CAS 186 | CASE STUDIES 187 | DEVELOPMENT PROJECTS 188 | ZONAS PROTEGIDAS 189 | RURAL DEVELOPMENT 190 | EMPOWERMENT 191 | HOGARES 192 | NATURE CONSERVATION 193 | households 194 | AUTONOMIZACION 195 | DESARROLLO DE LA COMUNIDAD 196 | PROJET DE DEVELOPPEMENT 197 | POBREZA 198 | POVERTY 199 | PROYECTOS DE DESARROLLO 200 | ESTUDIOS DE CASOS PRACTICOS 201 | DESARROLLO RURAL 202 | PAUVRETE 203 | REGION MARINE 204 | PARTICIPATION COMMUNAUTAIRE 205 | COMMUNITY DEVELOPMENT 206 | MENAGE 207 | ZONAS MARINAS 208 | CONSERVATION DE LA NATURE 209 | N1 - (Australia) 210 | PY - 2007 211 | ST - Nature's investment bank: how marine protected areas contributed to poverty reduction 212 | TI - Nature's investment bank: how marine protected areas contributed to poverty reduction 213 | ID - 4208 214 | ER - 215 | 216 | TY - JOUR 217 | AB - NA 218 | AU - Leisher, C. 219 | AU - Nature Conservancy, Carlton Victoria Australia Program eng 220 | AU - Van Beukering, P. 221 | AU - Scherl, L. M. 222 | KW - PROTECTED AREAS 223 | MARINE AREAS 224 | AUTONOMISATION 225 | COMMUNITY INVOLVEMENT 226 | ZONE PROTEGEE 227 | DEVELOPPEMENT DE LA COMMUNAUTE 228 | PARTICIPACION COMUNITARIA 229 | DEVELOPPEMENT RURAL 230 | CONSERVACION DE LA NATURALEZA 231 | ETUDE DE CAS 232 | CASE STUDIES 233 | DEVELOPMENT PROJECTS 234 | ZONAS PROTEGIDAS 235 | RURAL DEVELOPMENT 236 | EMPOWERMENT 237 | HOGARES 238 | NATURE CONSERVATION 239 | households 240 | AUTONOMIZACION 241 | DESARROLLO DE LA COMUNIDAD 242 | PROJET DE DEVELOPPEMENT 243 | POBREZA 244 | POVERTY 245 | PROYECTOS DE DESARROLLO 246 | ESTUDIOS DE CASOS PRACTICOS 247 | DESARROLLO RURAL 248 | PAUVRETE 249 | REGION MARINE 250 | PARTICIPATION COMMUNAUTAIRE 251 | COMMUNITY DEVELOPMENT 252 | MENAGE 253 | ZONAS MARINAS 254 | CONSERVATION DE LA NATURE 255 | N1 - (Australia) 256 | PY - 2007 257 | ST - Nature's investment bank: how marine protected areas contributed to poverty reduction 258 | TI - Nature's investment bank: how marine protected areas contributed to poverty reduction 259 | ID - 4209 260 | ER - 261 | 262 | TY - JOUR 263 | AB - NA 264 | AU - Meinzen-Dick, Ruth S. 265 | AU - Devaux, Andre 266 | AU - Antezana, Ivonne 267 | KW - social capital 268 | agricultural policy 269 | potatoes 270 | development aid 271 | economic development 272 | development policy 273 | market access 274 | assets 275 | vegetable crops 276 | sustainable livelihoods framework 277 | poverty 278 | indigenous knowledge 279 | market power 280 | food processing 281 | development projects 282 | supply chain 283 | Solanum tuberosum 284 | collective action 285 | genetic variation 286 | cultivars 287 | sustainable development 288 | food marketing 289 | humans 290 | poverty reduction 291 | small-scale farming 292 | LA - English 293 | PY - 2009 294 | ST - Underground assets: potato biodiversity to improve the livelihoods of the poor 295 | TI - Underground assets: potato biodiversity to improve the livelihoods of the poor 296 | VL - v. 7 issue 4 297 | ID - 4210 298 | ER - 299 | 300 | TY - JOUR 301 | AB - NA 302 | AU - Quang, N. V. 303 | AU - Sato, N. 304 | KW - income 305 | land diversion 306 | utilizacion de la tierra 307 | revenu 308 | retrait des terres 309 | foret 310 | utilisation des terres 311 | vietnam 312 | forests 313 | bosques 314 | land use 315 | desviacion del uso de la tierra 316 | viet nam 317 | comunidades 318 | renta 319 | communities 320 | LA - English 321 | N1 - (Kyushu Univ., Fukuoka (Japan). Faculty of Agriculture) 322 | PY - 2008 323 | ST - The role of forest in people's livelihood: A case study in North-eastern Vietnam 324 | TI - The role of forest in people's livelihood: A case study in North-eastern Vietnam 325 | VL - v. 53 326 | ID - 4211 327 | ER - 328 | 329 | TY - JOUR 330 | AB - NA 331 | AU - Rana, M. P. 332 | AU - Sohel, M. S. I. 333 | AU - Akhter, S. 334 | AU - Alam, M. S. 335 | KW - wetlands 336 | rural communities 337 | habitats 338 | land use 339 | households 340 | occupations 341 | bangladesh 342 | N1 - (Shahjalal Univ. of Science and Technology, Sylhet (Bangladesh). Dept. of Forestry and Environmental Science) 343 | (Bangladesh Agricultural Univ. (Bangladesh). Dept. of Fisheries Management) 344 | PY - 2010 345 | ST - Haor based livelihood dependency of a rural community: A study on hakaluki haor in Bangladesh 346 | TI - Haor based livelihood dependency of a rural community: A study on hakaluki haor in Bangladesh 347 | VL - v. 47 348 | ID - 4212 349 | ER - 350 | 351 | TY - JOUR 352 | AB - NA 353 | AU - Vianna, André Luiz Menezes 354 | AU - Fearnside, Philip Martin 355 | KW - sustainable forestry 356 | biomass 357 | carbon 358 | logging 359 | natural regeneration 360 | forest management 361 | trees 362 | managers 363 | carbon sinks 364 | LA - English 365 | PY - 2014 366 | ST - Impact of Community Forest Management on Biomass Carbon Stocks in the Uatumã Sustainable Development Reserve, Amazonas, Brazil 367 | TI - Impact of Community Forest Management on Biomass Carbon Stocks in the Uatumã Sustainable Development Reserve, Amazonas, Brazil 368 | VL - v. 33 369 | ID - 4213 370 | ER - 371 | 372 | --------------------------------------------------------------------------------