├── .DS_Store
├── .Rbuildignore
├── .github
├── .gitignore
├── r-depends.rds
└── workflows
│ ├── R-cmd-check.yaml_OLD
│ ├── R-cmd-check.yml
│ ├── deploy.yml_OLD
│ ├── document-and-deploy-manual.yml
│ ├── document-and-deploy-r2u.yaml_OLD
│ ├── document-and-deploy.yml
│ ├── pkgdown.yaml_OLD
│ └── pr-commands.yaml
├── .gitignore
├── CiteSource.Rproj
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
├── .DS_Store
├── CiteSource.R
├── compare.R
├── count.R
├── dedup.R
├── export.R
├── helpers.R
├── import.R
├── import_export_helpers.R
├── new_count_and_table.R
├── plots.R
├── reimport.R
├── runShiny.R
├── sysdata.rda
└── tables.R
├── README.md
├── _pkgdown.yml
├── cran-comments.md
├── inst
├── .DS_Store
├── extdata
│ └── examplecitations.rds
└── shiny-app
│ └── CiteSource
│ ├── app.R
│ ├── google_analytics_dev.html
│ ├── google_analytics_main.html
│ └── www
│ ├── CS.gif
│ ├── about.md
│ ├── favicon.png
│ ├── use-cases.md
│ └── user_guide.md
├── man
├── CiteSource-package.Rd
├── calculate_detailed_records.Rd
├── calculate_initial_records.Rd
├── calculate_phase_count.Rd
├── calculate_phase_records.Rd
├── calculate_record_counts.Rd
├── citation_summary_table.Rd
├── compare_sources.Rd
├── count_unique.Rd
├── create_detailed_record_table.Rd
├── create_initial_record_table.Rd
├── create_precision_sensitivity_table.Rd
├── dedup_citations.Rd
├── dedup_citations_add_manual.Rd
├── detect_.Rd
├── export_bib.Rd
├── export_csv.Rd
├── export_ris.Rd
├── merge_columns.Rd
├── parse_.Rd
├── pipe.Rd
├── plot_contributions.Rd
├── plot_source_overlap_heatmap.Rd
├── plot_source_overlap_upset.Rd
├── precision_sensitivity_table.Rd
├── read_citations.Rd
├── record_counts.Rd
├── record_counts_table.Rd
├── record_level_table.Rd
├── record_summary_table.Rd
├── reimport_csv.Rd
├── reimport_ris.Rd
├── runShiny.Rd
├── synthesisr_read_refs.Rd
└── write_refs.Rd
├── renv
├── .gitignore
├── activate.R
└── settings.json
├── tests
├── .DS_Store
├── shinytest
│ ├── test_1.ris
│ ├── test_2.ris
│ └── test_3.ris
├── testthat.R
└── testthat
│ ├── .DS_Store
│ ├── data
│ └── 1_WoS.ris
│ ├── test-import.R
│ └── test-tables.R
└── vignettes
├── .DS_Store
├── benchmark_data
├── Benchmarking.ris
├── Search1_1.ris
├── Search2_1.ris
├── Search2_2.ris
├── Search2_3.ris
├── Search2_4.ris
├── Search3_1.ris
├── Search3_2.ris
├── Search3_3.ris
├── Search3_4.ris
├── Search3_5.ris
├── Search4_1.ris
├── Search4_2.ris
├── Search4_3.ris
├── Search5_1.ris
├── Search5_2.ris
├── Search5_3.ris
├── Search5_4.ris
├── Search5_5.ris
├── Search5_6.ris
├── Search5_7.ris
├── Search5_8.ris
├── Search6_1.ris
├── Search6_2.ris
├── Search6_3.ris
└── Search6_4.ris
├── citesource_analysis_across_screening_phases.rmd
├── citesource_benchmark_testing.Rmd
├── citesource_new_benchmark_testing.Rmd
├── citesource_vignette_db-pre-screen_validation.Rmd
├── citesource_vignette_db-topic-coverage.Rmd
├── citesource_working_example.rmd
├── new_benchmark_data
├── benchmark_15.ris
├── search1_166.ris
├── search2_278.ris
├── search3_302.ris
├── search4_460.ris
└── search5_495.ris
├── new_stage_data
├── Dimensions_246.ris
├── econlit_3.ris
├── envindex_100.ris
├── final_24.ris
├── lens_343.ris
├── medline_84.ris
├── screened_128.ris
└── wos_278.ris
├── topic_data
├── .DS_Store
├── 20221207_gambling-harms_crimjust_41.ris
├── 20221207_gambling-harms_lens_49.ris
├── 20221207_gambling-harms_psycinfo_124.ris
├── 20221207_gambling-harms_pubmed_176.ris
└── 20221207_gambling-harms_scopus_255.ris
├── valid_data
├── WoS_79.ris
├── benchmark.ris
├── psycinfo_64.ris
└── pubmed_46.ris
├── vignette_exports
├── analysis_across_phases_shiny_export.csv
├── analysis_across_phases_shiny_export.ris
├── citesource_benchmark_export.csv
└── citesource_benchmark_export.ris
└── working_example_data
├── AGRIS.ris
├── CAB.ris
├── EconLit.ris
├── Final.ris
├── GreenFile.ris
├── McK.ris
├── RM.ris
├── TiAb.ris
├── WoS_early.ris
└── WoS_later.ris
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/.DS_Store
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^renv$
2 | ^renv\.lock$
3 | ^.*\.Rproj$
4 | ^\.Rproj\.user$
5 | ^LICENSE\.md$
6 | ^\.github$
7 | ^pkgdown$
8 | ^_pkgdown.yml$
9 | ^cran-comments\.md$
10 | ^\.httr-oauth$
11 | ^docs$
12 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | /man
--------------------------------------------------------------------------------
/.github/r-depends.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/.github/r-depends.rds
--------------------------------------------------------------------------------
/.github/workflows/R-cmd-check.yaml_OLD:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches: [main, master]
4 | pull_request:
5 | branches: [main, master]
6 |
7 | name: R-CMD-check
8 |
9 | jobs:
10 | document-and-dispatch:
11 | name: document
12 | runs-on: ubuntu-latest
13 | env:
14 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
15 | steps:
16 | - uses: actions/checkout@v2
17 | - uses: r-lib/actions/setup-r@v2
18 | with:
19 | use-public-rspm: true
20 | r-version: 'release'
21 | - name: Delete-Namespace
22 | run: Rscript -e 'file.remove("NAMESPACE")'
23 | - uses: r-lib/actions/setup-pandoc@v2
24 | - name: system dependencies
25 | run: sudo apt-get install libcurl4-openssl-dev libnode-dev
26 | - uses: r-lib/actions/setup-r-dependencies@v2
27 | with:
28 | extra-packages: any::rcmdcheck, any::roxygen2, any::devtools
29 | needs: check
30 | - uses: xu-cheng/texlive-action@v2
31 | - name: Update date
32 | run: Rscript -e 'descr <- readLines("DESCRIPTION")' -e 'descr <- stringr::str_replace(descr, "^Date.*$", paste("Date:", Sys.Date()))' -e 'writeLines(descr, "DESCRIPTION")'
33 | - name: Document
34 | run: Rscript -e 'roxygen2::roxygenise()'
35 | - name: Manual
36 | continue-on-error: true
37 | run: Rscript -e 'devtools::build_manual()'
38 |
39 | - name: commit
40 | run: |
41 | git config --local user.email "actions@github.com"
42 | git config --local user.name "GitHub Actions"
43 | git add -f man/\* NAMESPACE
44 | git commit -m 'Documentation' || echo "No changes to commit"
45 | git push origin || echo "No changes to commit"
46 |
47 |
48 | - uses: r-lib/actions/check-r-package@v2
49 |
--------------------------------------------------------------------------------
/.github/workflows/R-cmd-check.yml:
--------------------------------------------------------------------------------
1 | # Run CI for R using https://eddelbuettel.github.io/r-ci/
2 |
3 | name: R-CMD-Check
4 |
5 | on:
6 | workflow_run:
7 | workflows: ["document-and-deploy"]
8 | branches: [main]
9 | types:
10 | - completed
11 | workflow_dispatch:
12 |
13 | env:
14 | _R_CHECK_FORCE_SUGGESTS_: "false"
15 |
16 | jobs:
17 | R-CMD-check:
18 | strategy:
19 | matrix:
20 | include:
21 | - {os: macOS-latest}
22 | - {os: ubuntu-latest}
23 |
24 | runs-on: ${{ matrix.os }}
25 |
26 | steps:
27 | - name: Checkout
28 | uses: actions/checkout@v4
29 |
30 | - uses: r-lib/actions/setup-pandoc@v2
31 |
32 | - name: Setup
33 | uses: eddelbuettel/github-actions/r-ci-setup@master
34 |
35 | - name: Bootstrap
36 | run: ./run.sh bootstrap
37 |
38 | - name: Install rcmdcheck
39 | env:
40 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
41 | run: |
42 | R -e "
43 | install.packages(c('rcmdcheck', 'devtools'));
44 | devtools::install_local()
45 | "
46 |
47 | - uses: r-lib/actions/check-r-package@v2
48 |
--------------------------------------------------------------------------------
/.github/workflows/deploy.yml_OLD:
--------------------------------------------------------------------------------
1 | on:
2 | release:
3 | types: [released]
4 | pull_request:
5 | types: [opened, synchronize, reopened]
6 | push:
7 | branches:
8 | - main
9 | - master
10 | workflow_dispatch:
11 |
12 | name: Deploy
13 |
14 | jobs:
15 | deploy-latest:
16 | runs-on: ubuntu-latest
17 | if: github.event_name == 'push'
18 | steps:
19 | - name: Checkout code
20 | uses: actions/checkout@v4
21 | - name: Set up R2u (faster dependency installation)
22 | uses: eddelbuettel/github-actions/r2u-setup@master
23 | - name: Install dependencies and deploy
24 | run: |
25 | R -e "
26 | install.packages(c('shiny', 'rsconnect'));
27 | if (!require('remotes')) install.packages('remotes');
28 | if (!require('gitcreds')) install.packages('gitcreds');
29 | gitcreds::gitcreds_delete(url = 'https://github.com');
30 | remotes::install_github('ESHackathon/CiteSource');
31 | rsconnect::setAccountInfo(name='${{secrets.SHINY_LUKAS_ACCOUNT}}', token='${{secrets.SHINY_LUKAS_TOKEN}}', secret='${{secrets.SHINY_LUKAS_SECRET}}');
32 | rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource')"
33 |
34 | env:
35 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
36 |
37 | deploy-release:
38 | runs-on: ubuntu-latest
39 | if: github.event_name == 'release' && github.event.release.tag_name != '*test*'
40 | steps:
41 | - name: Checkout code
42 | uses: actions/checkout@v4
43 | - name: Set up R2u (faster dependency installation)
44 | uses: eddelbuettel/github-actions/r2u-setup@master
45 | - name: Install dependencies and deploy
46 | run: |
47 | R -e "
48 | install.packages(c('shiny', 'rsconnect'));
49 | if (!require('remotes')) install.packages('remotes');
50 | remotes::install_github('ESHackathon/CiteSource');
51 | gitcreds_delete(url = 'https://github.com');
52 | rsconnect::setAccountInfo(name='${{secrets.SHINY_LUKAS_ACCOUNT}}', token='${{secrets.SHINY_LUKAS_TOKEN}}', secret='${{secrets.SHINY_LUKAS_SECRET}}');
53 | rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource')"
54 |
55 | env:
56 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
57 |
--------------------------------------------------------------------------------
/.github/workflows/document-and-deploy-manual.yml:
--------------------------------------------------------------------------------
1 | # Run CI for R using https://eddelbuettel.github.io/r-ci/
2 |
3 | name: document-and-deploy-manual
4 |
5 | on:
6 | workflow_dispatch:
7 | inputs:
8 | rversion:
9 | description: 'R version to use (compatible with shinyapps.io)'
10 | required: true
11 | default: '4.4.1'
12 |
13 | env:
14 | _R_CHECK_FORCE_SUGGESTS_: "false"
15 |
16 | jobs:
17 | document-and-deploy-manual:
18 | runs-on: ubuntu-latest
19 |
20 | steps:
21 | - name: Checkout
22 | uses: actions/checkout@v4
23 |
24 | - uses: r-lib/actions/setup-pandoc@v2
25 |
26 | - name: Set up R version
27 | uses: r-lib/actions/setup-r@v2
28 | with:
29 | r-version: ${{ github.event.inputs.rversion }}
30 | use-public-rspm: true
31 |
32 | - name: Set up R Dependencies
33 | uses: r-lib/actions/setup-r-dependencies@v2
34 | with:
35 | extra-packages: devtools, plogr, roxygen2, remotes, rsconnect, pkgdown
36 |
37 | - name: Create documentation
38 | run: |
39 | R -e "
40 | file.remove('NAMESPACE');
41 | descr <- readLines('DESCRIPTION');
42 | descr <- stringr::str_replace(descr, '^Date.*$', paste('Date:', Sys.Date()));
43 | writeLines(descr, 'DESCRIPTION');
44 | roxygen2::roxygenise();
45 | try(devtools::build_manual())
46 | "
47 |
48 | - name: commit
49 | run: |
50 | git config --local user.email "actions@github.com"
51 | git config --local user.name "GitHub Actions"
52 | git add -f man/\* NAMESPACE
53 | git commit -m 'Documentation' || echo "No changes to commit"
54 | git push origin || echo "No changes to commit"
55 |
56 | - name: Deploy latest from dev
57 | if: github.ref == 'refs/heads/dev'
58 | env:
59 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
60 | run: |
61 | R -e "
62 | remotes::install_github('ESHackathon/CiteSource', force = TRUE);
63 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}});
64 | rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)"
65 |
66 | - name: Deploy stable version from main
67 | if: github.ref == 'refs/heads/main'
68 | env:
69 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
70 | run: |
71 | R -e "
72 | remotes::install_github('ESHackathon/CiteSource', force = TRUE);
73 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}});
74 | rsconnect::deployApp(appName = 'CiteSource', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)"
75 |
76 | - name: Create pkgdown
77 | env:
78 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
79 | run: |
80 | R -e "
81 | if (!require(CiteSource)) remotes::install_github('ESHackathon/CiteSource', force = TRUE);
82 | pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)"
83 |
84 | - name: Deploy to GitHub pages 🚀
85 | if: ${{ github.ref_name }} == 'main' || ${{ github.ref_name }} == 'master'
86 | uses: JamesIves/github-pages-deploy-action@v4
87 | with:
88 | clean: true
89 | branch: gh-pages
90 | folder: docs
91 |
--------------------------------------------------------------------------------
/.github/workflows/document-and-deploy-r2u.yaml_OLD:
--------------------------------------------------------------------------------
1 | # Run CI for R using https://eddelbuettel.github.io/r-ci/
2 |
3 | name: document-and-deploy
4 |
5 | on:
6 | push:
7 | pull_request:
8 | release:
9 |
10 | env:
11 | _R_CHECK_FORCE_SUGGESTS_: "false"
12 |
13 | jobs:
14 | document-and-deploy:
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - name: Checkout
19 | uses: actions/checkout@v4
20 |
21 | - uses: r-lib/actions/setup-pandoc@v2
22 |
23 | - name: Setup
24 | uses: eddelbuettel/github-actions/r-ci-setup@master
25 |
26 | - name: Bootstrap
27 | run: ./run.sh bootstrap
28 |
29 | #- name: Dependencies
30 | # run: ./run.sh install_deps
31 |
32 | - name: All Dependencies
33 | run: ./run.sh install_all
34 |
35 | - name: Create documentation
36 | run: |
37 | R -e "
38 | install.packages(c('rcmdcheck', 'roxygen2', 'devtools'));
39 | file.remove('NAMESPACE');
40 | descr <- readLines('DESCRIPTION');
41 | descr <- stringr::str_replace(descr, '^Date.*$', paste('Date:', Sys.Date()));
42 | writeLines(descr, 'DESCRIPTION');
43 | roxygen2::roxygenise();
44 | try(devtools::build_manual())
45 | "
46 |
47 | - name: commit
48 | run: |
49 | git config --local user.email "actions@github.com"
50 | git config --local user.name "GitHub Actions"
51 | git add -f man/\* NAMESPACE
52 | git commit -m 'Documentation' || echo "No changes to commit"
53 | git push origin || echo "No changes to commit"
54 |
55 | - name: Deploy latest (from dev)
56 | if: github.ref == 'refs/heads/dev'
57 | env:
58 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
59 | run: |
60 | R -e "
61 | install.packages(c('rsconnect', 'remotes'));
62 | bspm::disable();
63 | remotes::install_github('ESHackathon/CiteSource', ref = "dev", force = TRUE);
64 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}});
65 | rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)"
66 |
67 | - name: Deploy stable version (from main)
68 | if: github.ref == 'refs/heads/main'
69 | env:
70 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
71 | run: |
72 | R -e "
73 | install.packages(c('rsconnect', 'remotes'));
74 | bspm::disable();
75 | remotes::install_github('ESHackathon/CiteSource', force = TRUE);
76 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}});
77 | rsconnect::deployApp(appName = 'CiteSource', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)"
78 |
79 | - name: Create pkgdown
80 | env:
81 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
82 | run: |
83 | R -e "
84 | install.packages('pkgdown');
85 | bspm::disable();
86 | if (!require(CiteSource)) remotes::install_github('ESHackathon/CiteSource', force = TRUE);
87 | pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)"
88 |
89 | - name: Deploy to GitHub pages 🚀
90 | if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
91 | uses: JamesIves/github-pages-deploy-action@v4
92 | with:
93 | clean: true
94 | branch: gh-pages
95 | folder: docs
96 |
--------------------------------------------------------------------------------
/.github/workflows/document-and-deploy.yml:
--------------------------------------------------------------------------------
1 | name: document-and-deploy
2 |
3 | on:
4 | push:
5 | pull_request:
6 | release:
7 |
8 | jobs:
9 | document-and-deploy:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - name: Checkout
14 | uses: actions/checkout@v4
15 |
16 | - uses: r-lib/actions/setup-pandoc@v2
17 |
18 | - name: Set up R version
19 | uses: r-lib/actions/setup-r@v2
20 | with:
21 | r-version: "oldrel" # As shinyapps.io needs some time to catch up after releases
22 | use-public-rspm: true
23 |
24 | - name: Set up R Dependencies
25 | uses: r-lib/actions/setup-r-dependencies@v2
26 | with:
27 | extra-packages: devtools, plogr, roxygen2, remotes, rsconnect, pkgdown
28 |
29 | - name: Create documentation
30 | run: |
31 | R -e "
32 | file.remove('NAMESPACE');
33 | descr <- readLines('DESCRIPTION');
34 | descr <- stringr::str_replace(descr, '^Date.*$', paste('Date:', Sys.Date()));
35 | writeLines(descr, 'DESCRIPTION');
36 | roxygen2::roxygenise();
37 | try(devtools::build_manual())
38 | "
39 |
40 | - name: commit
41 | run: |
42 | git config --local user.email "actions@github.com"
43 | git config --local user.name "GitHub Actions"
44 | git add -f man/\* NAMESPACE
45 | git commit -m 'Documentation' || echo "No changes to commit"
46 | git push origin || echo "No changes to commit"
47 |
48 | - name: Deploy latest from dev
49 | if: github.ref == 'refs/heads/dev'
50 | env:
51 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
52 | run: |
53 | R -e "
54 | remotes::install_github('ESHackathon/CiteSource', force = TRUE);
55 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}});
56 | rsconnect::deployApp(
57 | appName = 'CiteSource_latest',
58 | appDir = './inst/shiny-app/CiteSource',
59 | forceUpdate = TRUE)"
60 |
61 | - name: Deploy stable version from main
62 | if: github.ref == 'refs/heads/main'
63 | env:
64 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
65 | run: |
66 | R -e "
67 | remotes::install_github('ESHackathon/CiteSource', force = TRUE);
68 | rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}});
69 | rsconnect::deployApp(
70 | appName = 'CiteSource',
71 | appDir = './inst/shiny-app/CiteSource',
72 | forceUpdate = TRUE)"
73 |
74 | - name: Create pkgdown
75 | env:
76 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
77 | run: |
78 | R -e "
79 | if (!require(CiteSource)) remotes::install_github('ESHackathon/CiteSource', force = TRUE);
80 | pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)"
81 |
82 | - name: Deploy to GitHub pages 🚀
83 | if: ${{ github.ref_name }} == 'main'
84 | uses: JamesIves/github-pages-deploy-action@v4
85 | with:
86 | clean: true
87 | branch: gh-pages
88 | folder: docs
89 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml_OLD:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 | workflow_dispatch:
9 |
10 | name: pkgdown
11 |
12 | jobs:
13 | pkgdown:
14 | runs-on: ubuntu-latest
15 | # Only restrict concurrency for non-PR jobs
16 | concurrency:
17 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
18 | env:
19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 | steps:
21 | - uses: actions/checkout@v3
22 |
23 | - uses: r-lib/actions/setup-pandoc@v2
24 |
25 | - uses: r-lib/actions/setup-r@v2
26 | with:
27 | use-public-rspm: true
28 |
29 | - uses: r-lib/actions/setup-r-dependencies@v2
30 | with:
31 | extra-packages: any::pkgdown, local::.
32 | needs: website
33 |
34 | - name: Build site
35 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
36 | shell: Rscript {0}
37 |
38 | - name: Deploy to GitHub pages 🚀
39 | if: github.event_name != 'pull_request'
40 | uses: JamesIves/github-pages-deploy-action@4.1.4
41 | with:
42 | clean: true
43 | branch: gh-pages
44 | folder: docs
--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | issue_comment:
5 | types: [created]
6 |
7 | name: Commands
8 |
9 | jobs:
10 | document:
11 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
12 | name: document
13 | runs-on: ubuntu-latest
14 | env:
15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | steps:
17 | - uses: actions/checkout@v2
18 |
19 | - uses: r-lib/actions/pr-fetch@v1
20 | with:
21 | repo-token: ${{ secrets.GITHUB_TOKEN }}
22 |
23 | - uses: r-lib/actions/setup-r@v1
24 | with:
25 | use-public-rspm: true
26 |
27 | - uses: r-lib/actions/setup-r-dependencies@v1
28 | with:
29 | extra-packages: roxygen2
30 |
31 | - name: Document
32 | run: Rscript -e 'roxygen2::roxygenise()'
33 |
34 | - name: commit
35 | run: |
36 | git config --local user.name "$GITHUB_ACTOR"
37 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
38 | git add man/\* NAMESPACE
39 | git commit -m 'Document'
40 |
41 | - uses: r-lib/actions/pr-push@v1
42 | with:
43 | repo-token: ${{ secrets.GITHUB_TOKEN }}
44 |
45 | style:
46 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
47 | name: style
48 | runs-on: ubuntu-latest
49 | env:
50 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
51 | steps:
52 | - uses: actions/checkout@v2
53 |
54 | - uses: r-lib/actions/pr-fetch@v1
55 | with:
56 | repo-token: ${{ secrets.GITHUB_TOKEN }}
57 |
58 | - uses: r-lib/actions/setup-r@v1
59 |
60 | - name: Install dependencies
61 | run: Rscript -e 'install.packages("styler")'
62 |
63 | - name: Style
64 | run: Rscript -e 'styler::style_pkg()'
65 |
66 | - name: commit
67 | run: |
68 | git config --local user.name "$GITHUB_ACTOR"
69 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
70 | git add \*.R
71 | git commit -m 'Style'
72 |
73 | - uses: r-lib/actions/pr-push@v1
74 | with:
75 | repo-token: ${{ secrets.GITHUB_TOKEN }}
76 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .DS_Store
6 | inst/doc
7 | .httr-oauth
8 |
--------------------------------------------------------------------------------
/CiteSource.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: CiteSource
2 | Title: Analyze the Utility of Information Sources and Retrieval Methodologies for Evidence Synthesis
3 | Version: 0.1.1
4 | Date: 2023-06-22
5 | Authors@R: c(
6 | person("Trevor", "Riley", , "trevor.riley@noaa.gov", role = c("aut", "cre"),
7 | comment = c(ORCID = "0000-0002-6834-9802")),
8 | person("Kaitlyn", "Hair", , "kaitlyn.hair@ed.ac.uk", role = "aut",
9 | comment = c(ORCID = "0000-0003-0180-7343")),
10 | person("Lukas", "Wallrich", , "lukas.wallrich@gmail.com", role = "aut",
11 | comment = c(ORCID = "0000-0003-2121-5177")),
12 | person("Matthew", "Grainger", , "matthewjamesgrainger@gmail.com", role = "aut",
13 | comment = c(ORCID = "0000-0001-8426-6495")),
14 | person("Sarah", "Young", , "sarahy@andrew.cmu.edu", role = "aut",
15 | comment = c(ORCID = "0000-0002-8301-5106")),
16 | person("Chris", "Pritchard", , "chris.pritchard@ntu.ac.uk", role = "aut",
17 | comment = c(ORCID = "0000-0002-1143-9751")),
18 | person("Neal", "Haddaway", , "nealhaddaway@gmail.com", role = "aut",
19 | comment = c(ORCID = "0000-0003-3902-2234")),
20 | person("Martin", "Westgate", role = "cph", comment = "Author of included synthesisr fragments"),
21 | person("Eliza", "Grames", role = "cph", comment = "Author of included synthesisr fragments")
22 | )
23 | Description: This package was developed in order to provide researchers the ability to
24 | examine the utility and efficacy of literature resources and search
25 | methodologies. The package provides users with the ability to deduplicate
26 | references while maintaining customizable metadata. The resulting data
27 | can be analyzed using predeveloped plots and tables, including a summary
28 | table. Users are also able to export and reimport data in .ris, and csv.
29 | files. The package includes a shiny application for interactive use.
30 | License: GPL (>= 3)
31 | URL: https://www.eshackathon.org/CiteSource
32 | BugReports: https://github.com/ESHackathon/CiteSource/issues
33 | Imports:
34 | ASySD (>= 0.3.0),
35 | dplyr,
36 | DT,
37 | forcats,
38 | ggnewscale,
39 | ggplot2,
40 | glue,
41 | gt,
42 | purrr,
43 | rlang,
44 | scales,
45 | stringr,
46 | tibble,
47 | tidyr,
48 | tidyselect,
49 | UpSetR
50 | Suggests:
51 | htmltools,
52 | knitr,
53 | networkD3,
54 | plotly,
55 | rmarkdown,
56 | shiny,
57 | shinyalert,
58 | shinyjs,
59 | progressr,
60 | shinybusy,
61 | shinyWidgets,
62 | testthat (>= 3.0.0)
63 | Additional_repositories:
64 | https://github.com/camaradesuk/ASySD
65 | VignetteBuilder:
66 | knitr
67 | Config/testthat/edition: 3
68 | Encoding: UTF-8
69 | Roxygen: list(markdown = TRUE)
70 | RoxygenNote: 7.2.3
71 | Depends:
72 | R (>= 3.5.0)
73 | Remotes:
74 | camaradesuk/ASySD
75 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(as.data.frame,bibliography)
4 | export("%>%")
5 | export(calculate_detailed_records)
6 | export(calculate_initial_records)
7 | export(calculate_phase_count)
8 | export(calculate_phase_records)
9 | export(calculate_record_counts)
10 | export(citation_summary_table)
11 | export(compare_sources)
12 | export(count_unique)
13 | export(create_detailed_record_table)
14 | export(create_initial_record_table)
15 | export(create_precision_sensitivity_table)
16 | export(dedup_citations)
17 | export(dedup_citations_add_manual)
18 | export(export_bib)
19 | export(export_csv)
20 | export(export_ris)
21 | export(plot_contributions)
22 | export(plot_source_overlap_heatmap)
23 | export(plot_source_overlap_upset)
24 | export(precision_sensitivity_table)
25 | export(read_citations)
26 | export(record_counts)
27 | export(record_counts_table)
28 | export(record_level_table)
29 | export(record_summary_table)
30 | export(reimport_csv)
31 | export(reimport_ris)
32 | export(runShiny)
33 | export(run_shiny)
34 | import(dplyr)
35 | import(gt)
36 | import(rlang)
37 | import(scales)
38 | import(tidyr)
39 | importFrom(dplyr,"%>%")
40 | importFrom(gt,cells_column_labels)
41 | importFrom(gt,cols_label)
42 | importFrom(gt,gt)
43 | importFrom(gt,tab_footnote)
44 | importFrom(gt,tab_header)
45 | importFrom(rlang,":=")
46 | importFrom(rlang,.data)
47 | importFrom(stats,xtabs)
48 | importFrom(utils,head)
49 | importFrom(utils,read.table)
50 | importFrom(utils,tail)
51 | importFrom(utils,write.table)
52 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # CiteSource 0.0.1
2 |
3 | * Added a `NEWS.md` file to track changes to the package.
4 |
5 | - Added dependency on latest version of the ASySD R package
6 |
7 | - Simplified dedup function arguments (now specified within call to ASySD)
8 |
9 | - Integrated new dedup function into R shiny app
10 |
11 | # CiteSource 0.1.1
12 |
13 | - Added new functions which allow creation of tables and plots based on deduplicated (reimported) data.
14 |
15 | - Updated shiny functionality, look and feel, and documentation
16 |
17 | - Added new vignettes
--------------------------------------------------------------------------------
/R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/R/.DS_Store
--------------------------------------------------------------------------------
/R/CiteSource.R:
--------------------------------------------------------------------------------
1 | #' CiteSource: A package to compare sources of citation records
2 | #'
3 | #' The CiteSource package supports evidence aggregation by helping with the
4 | #' processing of results of various searches in different sources. It allows to
5 | #' deduplicate results while retaining meta-data on where those results were
6 | #' found and then enables users to compare the contribution of different sources
7 | "_PACKAGE"
8 |
9 | # Use magrittr rather than base R pipe to be compatible with older R versions
10 | # And data pronoun .data to avoid issues/warnings due to dplyr non-standard evaluation
11 | #' Pipe operator
12 | #'
13 | #' @name %>%
14 | #' @rdname pipe
15 | #' @keywords internal
16 | #' @export
17 | #' @importFrom dplyr %>%
18 | #' @usage lhs \%>\% rhs
19 | #' @param lhs A value or the magrittr placeholder.
20 | #' @param rhs A function call using the magrittr semantics.
21 | #' @return The result of calling `rhs(lhs)`.
22 | NULL
23 | #' @importFrom rlang .data :=
24 | NULL
25 |
26 | # Declare . as global variable to remove warnings
27 | utils::globalVariables(".")
28 |
29 | # Set Shiny upload size depending on interactive use or deployment on shinyapps.io
30 | .onLoad <- function(libname, pkgname) {
31 | if (interactive() ) {
32 | options(shiny.maxRequestSize=2000*1024^2, timeout = 40000000)
33 | } else {
34 | options(shiny.maxRequestSize=250*1024^2, timeout = 40000000)
35 | }
36 | }
37 |
38 |
39 | key_fields <- c("author", "title", "year", "journal", "abstract", "doi", "number", "pages",
40 | "volume", "isbn", "record_id", "label", "source", "issue", "url",
41 | "issn", "start_page", "ID")
--------------------------------------------------------------------------------
/R/compare.R:
--------------------------------------------------------------------------------
1 | #' Count number of unique and non-unique citations from different sources, labels, and strings
2 | #' @export
3 | #' @param unique_data from ASySD, merged unique rows with duplicate IDs
4 | #' @param include_references Should bibliographic detail be included in return?
5 | #' @return dataframe with indicators of where a citation appears, with source/label/string as column
6 | #' @examples
7 | #' # Load example data from the package
8 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
9 | #' examplecitations <- readRDS(examplecitations_path)
10 | #'
11 | #' # Deduplicate citations
12 | #' dedup_results <- dedup_citations(examplecitations)
13 | #'
14 | #' # Count unique and non-unique citations
15 | #' count_unique(dedup_results)
16 |
17 | count_unique <- function(unique_data, include_references = FALSE) {
18 | # Start a pipeline with the input data
19 | out <- unique_data %>%
20 | # Filter out rows where 'cite_source' is empty
21 | dplyr::filter(!.data$cite_source == "") %>%
22 | # Select specific columns
23 | dplyr::select(.data$duplicate_id, .data$cite_source, .data$cite_label, .data$cite_string, tidyselect::any_of("record_ids")) %>%
24 | # Separate rows by 'cite_source', 'cite_label', and 'cite_string'
25 | tidyr::separate_rows(.data$cite_source, convert = TRUE, sep = ", ") %>%
26 | tidyr::separate_rows(.data$cite_label, convert = TRUE, sep = ", ") %>%
27 | tidyr::separate_rows(.data$cite_string, convert = TRUE, sep = ", ") %>%
28 | # Group by 'duplicate_id'
29 | dplyr::group_by(.data$duplicate_id) %>%
30 | # Add 'unique' and 'type' columns
31 | dplyr::mutate(
32 | unique = ifelse(length(unique(.data$cite_source)) == 1, TRUE, FALSE), # 'unique' is TRUE if 'cite_source' is unique
33 | type = ifelse(.data$unique, "unique", "duplicated") %>% factor(levels = c("unique", "duplicated")) # 'type' is 'unique' if 'unique' is TRUE, 'duplicated' otherwise
34 | ) %>%
35 | # Ungroup the data
36 | dplyr::ungroup() %>%
37 | # Remove duplicate rows
38 | unique()
39 |
40 | # If 'include_references' is TRUE, join 'out' with 'unique_data' on 'duplicate_id'
41 | if (include_references == TRUE) {
42 | out %>% dplyr::left_join(unique_data %>% dplyr::select(-dplyr::all_of(setdiff(intersect(names(.), names(out)), "duplicate_id"))), by = "duplicate_id")
43 | } else {
44 | # Otherwise, return 'out' as is
45 | out
46 | }
47 | }
48 |
49 | #' Compare duplicate citations across sources, labels, and strings
50 | #'
51 | #' @export
52 | #' @param unique_data from ASySD, merged unique rows with duplicate IDs
53 | #' @param comp_type Specify which fields are to be included. One or more of "sources", "strings" or "labels" - defaults to all.
54 | #' @param include_references Should bibliographic detail be included in return?
55 | #' @return dataframe with indicators of where a citation appears, with sources/labels/strings as columns
56 | #' @examples
57 | #' if (interactive()) {
58 | #' # Load example data from the package
59 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
60 | #' examplecitations <- readRDS(examplecitations_path)
61 | #'
62 | #' # Deduplicate citations and compare sources
63 | #' dedup_results <- dedup_citations(examplecitations)
64 | #' compare_sources(unique_citations, comp_type = "sources")
65 | #' }
66 |
67 |
68 | compare_sources <- function(unique_data, comp_type = c("sources", "strings", "labels"), include_references = FALSE) {
69 |
70 | out <- list(unique_data %>% dplyr::select("duplicate_id"))
71 |
72 | if ("sources" %in% comp_type) {
73 | source_comparison <- unique_data %>%
74 | dplyr::select(.data$duplicate_id, .data$cite_source, tidyselect::any_of("record_ids")) %>%
75 | dplyr::filter(!cite_source == "") %>%
76 | tidyr::separate_rows(.data$cite_source, sep = ", ", convert = TRUE) %>%
77 | unique() %>%
78 | tidyr::pivot_wider(
79 | id_cols = .data$duplicate_id, names_prefix = "source__", names_from = .data$cite_source, values_from = .data$cite_source,
80 | values_fn = function(x) TRUE,
81 | values_fill = FALSE
82 | )
83 |
84 | out <- c(out, list(source_comparison))
85 | }
86 |
87 | if ("strings" %in% comp_type) {
88 | source_comparison <- unique_data %>%
89 | dplyr::select(.data$duplicate_id, .data$cite_string, tidyselect::any_of("record_ids")) %>%
90 | dplyr::filter(!.data$cite_string == "") %>%
91 | tidyr::separate_rows(.data$cite_string, sep = ", ", convert = TRUE) %>%
92 | unique() %>%
93 | tidyr::pivot_wider(
94 | id_cols = .data$duplicate_id, names_prefix = "string__", names_from = .data$cite_string, values_from = .data$cite_string,
95 | values_fn = function(x) TRUE,
96 | values_fill = FALSE
97 | )
98 |
99 | out <- c(out, list(source_comparison))
100 | }
101 |
102 | if ("labels" %in% comp_type) {
103 | source_comparison <- unique_data %>%
104 | dplyr::select(.data$duplicate_id, .data$cite_label, tidyselect::any_of("record_ids")) %>%
105 | dplyr::filter(!cite_label == "") %>%
106 | tidyr::separate_rows(.data$cite_label, sep = ", ", convert = TRUE) %>%
107 | unique() %>%
108 | tidyr::pivot_wider(
109 | id_cols = .data$duplicate_id, names_prefix = "label__", names_from = .data$cite_label,
110 | values_from = .data$cite_label,
111 | values_fn = function(x) TRUE,
112 | values_fill = FALSE
113 | )
114 | out <- c(out, list(source_comparison))
115 |
116 |
117 | if (any(stringr::str_detect(names(source_comparison), "[Ss]earch"))) {
118 | search_stage <- stringr::str_subset(names(source_comparison), "[Ss]earch")
119 | if (length(search_stage) == 1) {
120 | not_in_search <- sum(!source_comparison[[search_stage]])
121 | if (not_in_search > 0) {
122 | warning(
123 | "Beware: ", not_in_search, " records were not included in ", search_stage, " but in other labels.",
124 | " *If* this label indicates the full search stage, this might indicate that you ommitted a source, ",
125 | "or that the deduplication did not go right. Please treat results with caution until you fix this, ",
126 | "e.g., by using export_csv and then reimport_csv."
127 | )
128 | }
129 | }
130 | }
131 | }
132 |
133 | if (length(out) == 0) stop('comp_type must be one or more of "sources", "strings" or "labels"')
134 |
135 |
136 | out <- purrr::reduce(out, dplyr::left_join, by = "duplicate_id")
137 |
138 | # Deals with entries missing source or label
139 | out <- out %>% dplyr::mutate(dplyr::across(dplyr::everything(), ~tidyr::replace_na(.x, FALSE)))
140 |
141 | if (include_references == TRUE) {
142 | out %>% dplyr::left_join(unique_data %>% dplyr::select(-dplyr::all_of(setdiff(intersect(names(.), names(out)), "duplicate_id"))), by = "duplicate_id")
143 | } else {
144 | out
145 | }
146 | }
147 |
148 |
--------------------------------------------------------------------------------
/R/dedup.R:
--------------------------------------------------------------------------------
1 | #' Deduplicate citations - ASySD wrapper
2 | #'
3 | #' This function deduplicates citation data. Note that duplicates are assumed to published
4 | #' in the same journal, so pre-prints and similar results will not be identified here.
5 | #'
6 | #' @export
7 | #' @param raw_citations Citation dataframe with relevant columns
8 | #' @param manual logical. If TRUE, manually specify pairs of duplicates to merge. Default is FALSE.
9 | #' @param show_unknown_tags When a label, source, or other merged field is missing, do you want this to show as "unknown"?
10 | #' @return unique citations formatted for CiteSource
11 | #'
12 | #' @examples
13 | #' # Load example data from the package
14 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
15 | #' examplecitations <- readRDS(examplecitations_path)
16 | #'
17 | #' # Deduplicate citations without manually specifying pairs and without showing unknown tags
18 | #' dedup_results <- dedup_citations(examplecitations)
19 | #'
20 | #' # Deduplicate citations with manual specification of pairs and showing unknown tags
21 | #' dedup_results_manual_unknown <- dedup_citations(
22 | #' examplecitations,
23 | #' manual = TRUE,
24 | #' show_unknown_tags = TRUE
25 | #' )
26 |
27 | dedup_citations <- function(raw_citations, manual=FALSE, show_unknown_tags=FALSE){
28 |
29 | # rename or coalesce columns
30 | targets <- c("journal", "number", "pages", "isbn", "record_id")
31 | sources <- c("source", "issue", "start_page", "issn", "ID")
32 | raw_citations <- add_cols(raw_citations, sources)
33 |
34 | for (i in seq_along(targets)) {
35 | if (targets[i] %in% names(raw_citations)) {
36 | raw_citations[[targets[i]]] <- dplyr::coalesce(raw_citations[[targets[i]]], raw_citations[[sources[i]]])
37 | } else {
38 | raw_citations[[targets[i]]] <- raw_citations[[sources[i]]]
39 | }
40 | }
41 |
42 | raw_citations <- add_cols(raw_citations, c("record_id", "cite_label", "cite_source", "cite_string"))
43 |
44 | raw_citations$source <- raw_citations$cite_source
45 | raw_citations$label <- raw_citations$cite_label
46 |
47 | dedup_results <- ASySD::dedup_citations(raw_citations, merge_citations = TRUE, extra_merge_fields = "cite_string", show_unknown_tags = show_unknown_tags)
48 |
49 | if(manual == FALSE){
50 |
51 | unique_post_dedup <- dedup_results$unique
52 | unique_post_dedup$cite_source = unique_post_dedup$source
53 | unique_post_dedup$cite_label = unique_post_dedup$label
54 |
55 | # Remove temporary columns
56 | unique_post_dedup <- unique_post_dedup %>%
57 | dplyr::select(-source, -label)
58 |
59 | return(unique_post_dedup)
60 |
61 | } else {
62 |
63 | unique_post_dedup <- dedup_results
64 | unique_post_dedup$unique$cite_source = unique_post_dedup$unique$source
65 | unique_post_dedup$unique$cite_label = unique_post_dedup$unique$label
66 |
67 | # Remove temporary columns
68 | unique_post_dedup$unique <- unique_post_dedup$unique %>%
69 | dplyr::select(-source, -label)
70 |
71 | return(unique_post_dedup)
72 | }
73 |
74 | }
75 |
76 | #' Remove pairs with manual dedup - ASySD wrapper
77 | #'
78 | #' This function deduplicates citation data. Note that duplicates are assumed to published
79 | #' in the same journal, so pre-prints and similar results will not be identified here.
80 | #'
81 | #' @export
82 | #' @param unique_citations Unique citations post deduplication
83 | #' @param additional_pairs TRUE duplicate pairs
84 | #' @return unique citations formatted for CiteSource
85 | #' @examples
86 | #' # Load example data from the package
87 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
88 | #' examplecitations <- readRDS(examplecitations_path)
89 | #'
90 | #' # Deduplicate citations
91 | #' dedup_results <- dedup_citations(examplecitations)
92 | #'
93 | dedup_citations_add_manual <- function(unique_citations, additional_pairs) {
94 |
95 | unique_citations$source = unique_citations$cite_source
96 | unique_citations$label = unique_citations$cite_label
97 |
98 | dedup_results <- ASySD::dedup_citations_add_manual(unique_citations,
99 | additional_pairs = additional_pairs,
100 | extra_merge_fields = "cite_string")
101 |
102 | dedup_results$cite_source <- dedup_results$source
103 | dedup_results$cite_label <- dedup_results$label
104 |
105 | # Remove temporary columns
106 | dedup_results <- dedup_results %>%
107 | dplyr::select(-source, -label)
108 |
109 | return(dedup_results)
110 |
111 | }
112 |
113 |
114 | #' ####------ Add columns ------ ####
115 |
116 | #' This function adds citesource columns to citation data if missing
117 | #' @param raw_citations Citation dataframe with relevant columns
118 | #' @param cname column names which are required in dataframe
119 | #' @return Dataframe of citations with id
120 | #' @noRd
121 | add_cols <- function(raw_citations, cname) {
122 | add <- cname[!cname %in% names(raw_citations)]
123 |
124 | if (length(add) != 0) raw_citations[add] <- NA
125 | raw_citations
126 | }
127 |
128 |
129 |
130 |
131 |
--------------------------------------------------------------------------------
/R/export.R:
--------------------------------------------------------------------------------
1 | #' Export deduplicated citations with source data as CSV file
2 | #'
3 | #' This function saves deduplicated citations as a CSV file for further analysis and/or reporting.
4 | #' Metadata can be separated into one column per source, label or string, which facilitates analysis.
5 | #' Note that *existing files are overwritten without warning.*
6 | #'
7 | #' @param unique_citations Dataframe with unique citations, resulting from `dedup_citations()`
8 | #' @param filename Name (and path) of file, should end in .csv
9 | #' @param separate Character vector indicating which (if any) of cite_source, cite_string and cite_label should be split into separate columns to facilitate further analysis.
10 | #' @param trim_abstracts Some databases may return full-text that is misidentified as an abstract. This inflates file size and may lead to issues with Excel,
11 | #' which cannot deal with more than 32,000 characters per field. Therefore, the default is to trim very long abstracts to 32,000 characters. Set a lower number to reduce file size, or
12 | #' NULL to retain abstracts as they are.
13 | #' @return The function saves the deduplicated citations as a CSV file to the specified location.
14 | #' @export
15 | #' @examples
16 | #' if (interactive()) {
17 | #' # Load example data from the package
18 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
19 | #' examplecitations <- readRDS(examplecitations_path)
20 | #' dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE)
21 | #' export_csv(dedup_results, "cite_sources.csv", separate = "cite_source")
22 | #' }
23 |
24 | export_csv <- function(unique_citations, filename = "citesource_exported_citations.csv", separate = NULL, trim_abstracts = 32000) {
25 | # Warn if the filename doesn't end with .csv
26 | if (tolower(tools::file_ext(filename)) != "csv") {
27 | warning("Function saves a CSV file, so filename should (usually) end in .csv. For now, name is used as provided.")
28 | }
29 |
30 | if (!is.null(separate)) {
31 | separate <- match.arg(separate, choices = c("cite_source", "cite_label", "cite_string"), several.ok = TRUE)
32 |
33 | separated <- purrr::map_dfc(separate, function(x) {
34 | unique_citations %>%
35 | dplyr::select(tidyselect::all_of(x), .data$duplicate_id, .data$record_ids) %>%
36 | tidyr::separate_rows(1, sep = ", ", convert = TRUE) %>%
37 | unique() %>%
38 | tidyr::pivot_wider(
39 | id_cols = .data$duplicate_id, names_prefix = paste0(stringr::str_remove(x, "cite_"), "_"),
40 | names_from = 1, values_from = c(.data$record_ids),
41 | values_fn = function(x) TRUE,
42 | values_fill = FALSE
43 | ) %>%
44 | dplyr::select(tidyselect::starts_with(paste0(stringr::str_remove(x, "cite_"))))
45 | })
46 |
47 | # Trim abstracts if required
48 | if (!is.null(trim_abstracts)) {
49 | unique_citations <- unique_citations %>%
50 | dplyr::mutate(abstract = stringr::str_sub(.data$abstract, 1, trim_abstracts))
51 | }
52 |
53 |
54 | unique_citations <- unique_citations %>%
55 | dplyr::select(-tidyselect::all_of(separate)) %>%
56 | dplyr::bind_cols(separated)
57 | }
58 | utils::write.csv(unique_citations, filename, row.names = FALSE)
59 | }
60 |
61 | #' Export data frame to RIS file
62 | #'
63 | #' This function saves a data frame as a RIS file with specified columns mapped to RIS fields. Note that
64 | #' *existing files are overwritten without warning.*
65 | #'
66 | #' @param citations Dataframe to be exported to RIS file
67 | #' @param filename Name (and path) of file, should end in .ris
68 | #' @param source_field Field in `citations` representing the source. Default is "DB".
69 | #' @param label_field Field in `citations` representing the label. Default is "C7".
70 | #' @param string_field Field in `citations` representing additional string information. Default is "C8".
71 | #' @export
72 | #' @examples
73 | #' if (interactive()) {
74 | #' # Load example data from the package
75 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
76 | #' examplecitations <- readRDS(examplecitations_path)
77 | #' dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE)
78 | #' export_ris(
79 | #' dedup_results$unique,
80 | #' "cite_sources.ris",
81 | #' user_mapping = list(
82 | #' "DB" = "cite_source_include",
83 | #' "C7" = "cite_label_include"
84 | #' )
85 | #' )
86 | #' }
87 |
88 | export_ris <- function(citations, filename = "citations.ris", source_field = "DB", label_field = "C7", string_field = "C8") {
89 |
90 | if (tolower(tools::file_ext(filename)) != "ris") warning("Function saves a RIS file, so filename should (usually) end in .ris. For now, name is used as provided.")
91 |
92 | synthesisr_codes <- dplyr::bind_rows(
93 | tibble::tribble(
94 | ~code, ~field, ~ris_synthesisr,
95 | source_field, "cite_source", TRUE,
96 | string_field, "cite_string", TRUE,
97 | label_field, "cite_label", TRUE,
98 | "C1", "duplicate_id", TRUE,
99 | "C2", "record_ids", TRUE,
100 | "TY", "type", TRUE
101 | ),
102 | synthesisr_code_lookup %>% dplyr::filter(.data$ris_synthesisr)
103 | ) %>% dplyr::distinct(.data$code, .keep_all = TRUE) # Remove fields from synthesisr specification used for CiteSource metadata
104 |
105 | # Currently, write_refs does not accept tibbles, thus converted
106 | write_refs(as.data.frame(citations), file = filename, tag_naming = synthesisr_codes)
107 | }
108 |
109 | #' Export deduplicated citations to .bib file
110 | #'
111 | #' This function saves deduplicated citations as a BibTex file with sources, labels and strings
112 | #' included in the `note` field (if they were initially provided for any of the citations). Therefore,
113 | #' beware that **any `note` field that might be included in `citations` will be overwritten**. Also note that
114 | #' *existing files are overwritten without warning.*
115 | #'
116 | #' @param citations Dataframe with unique citations, resulting from `dedup_citations()`
117 | #' @param filename Name (and path) of file, should end in .ris
118 | #' @param include Character. One or more of sources, labels or strings
119 | #' @export
120 | #' @examples
121 | #' if (interactive()) {
122 | #' # Load example data from the package
123 | #' examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
124 | #' examplecitations <- readRDS(examplecitations_path)
125 | #' dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE)
126 | #' export_bib(dedup_results$unique, "cite_sources.bib", include = "sources")
127 | #' }
128 |
129 | export_bib <- function(citations, filename = "citations.bib", include = c("sources", "labels", "strings")) {
130 | if (tolower(tools::file_ext(filename)) != "bib") warning("Function saves a BibTex file, so filename should (usually) end in .bib. For now, name is used as provided.")
131 |
132 | include <- stringr::str_remove(include, "s$") %>% paste0("cite_", .)
133 |
134 | notes <- citations %>% dplyr::select(tidyselect::all_of(include))
135 |
136 | for (i in seq_along(include)) {
137 | notes[include[i]] <- paste(include[i], notes[[include[i]]], sep = ": ")
138 | }
139 |
140 | notes <- notes %>%
141 | tidyr::unite("note", dplyr::everything(), sep = "; ") %>%
142 | dplyr::pull(.data$note)
143 |
144 | citations["note"] <- notes
145 |
146 | citations <- citations %>%
147 | dplyr::select(-dplyr::starts_with("cite_"), -tidyselect::any_of(c("duplicate_id", "record_ids", "record_id")))
148 |
149 | write_refs(as.data.frame(citations), format = "bib", file = filename)
150 | }
151 |
--------------------------------------------------------------------------------
/R/helpers.R:
--------------------------------------------------------------------------------
1 | # Licensed under the MIT licence
2 | # YEAR: 2020
3 | # COPYRIGHT HOLDER: usethis authors
4 | # see https://github.com/r-lib/usethis/blob/main/LICENSE
5 |
6 | ui_yeah <- function (x, yes = c("Yes", "Definitely", "For sure", "Yup",
7 | "Yeah", "I agree", "Absolutely"), no = c("No way", "Not now",
8 | "Negative", "No", "Nope", "Absolutely not"), n_yes = 1, n_no = 2,
9 | shuffle = TRUE, .envir = parent.frame())
10 | {
11 | x <- glue::glue_collapse(x, "\n")
12 | x <- glue::glue(x, .envir = .envir)
13 | if (!interactive()) {
14 | stop(c("User input required, but session is not interactive.",
15 | glue::glue("Query: {x}")))
16 | }
17 | n_yes <- min(n_yes, length(yes))
18 | n_no <- min(n_no, length(no))
19 | qs <- c(sample(yes, n_yes), sample(no, n_no))
20 | if (shuffle) {
21 | qs <- sample(qs)
22 | }
23 | rlang::inform(x)
24 | out <- utils::menu(qs)
25 | out != 0L && qs[[out]] %in% yes
26 | }
--------------------------------------------------------------------------------
/R/import.R:
--------------------------------------------------------------------------------
1 | #' Import citations from file
2 | #'
3 | #' This function imports RIS and Bibtex files with citations and merges them
4 | #' into one long tibble with one record per line.
5 | #'
6 | #' @param files One or multiple RIS or Bibtex files with citations.
7 | #' Should be .bib or .ris files
8 | #' @param cite_sources The origin of the citation files (e.g. "Scopus", "WOS", "Medline") - vector with one value per file, defaults to file names.
9 | #' @param cite_strings Optional. The search string used (or another grouping to analyse) - vector with one value per file
10 | #' @param cite_labels Optional. An additional label per file, for instance the stage of search - vector with one value per file
11 | #' @param metadata A tibble with file names and metadata for each file. Can be specified as an *alternative* to files, cite_sources, cite_strings and cite_labels.
12 | #' @param verbose Should number of reference and allocation of labels be reported?
13 | #' @param only_key_fields Should only key fields (e.g., those used by CiteCourse) be imported? If FALSE, all RIS data is retained. Can also be a character vector of field names to retain (after they have been renamed by the import function) in addition to the essential ones.
14 | #' @inheritParams synthesisr_read_refs
15 | #' @return A tibble with one row per citation
16 | #' @examples
17 | #' if (interactive()) {
18 | #' # Import only key fields from the RIS files
19 | #' read_citations(c("res.ris", "res.bib"),
20 | #' cite_sources = c("CINAHL", "MEDLINE"),
21 | #' cite_strings = c("Search1", "Search2"),
22 | #' cite_labels = c("raw", "screened"),
23 | #' only_key_fields = TRUE
24 | #' )
25 | #'
26 | #' # or equivalently
27 | #' metadata_tbl_key_fields <- tibble::tribble(
28 | #' ~files, ~cite_sources, ~cite_strings, ~cite_labels, ~only_key_fields,
29 | #' "res.ris", "CINAHL", "Search1", "raw", TRUE,
30 | #' "res.bib", "MEDLINE", "Search2", "screened", TRUE
31 | #' )
32 | #'
33 | #' read_citations(metadata = metadata_tbl_key_fields)
34 | #' }
35 | #' @export
36 |
37 | read_citations <- function(files = NULL,
38 | cite_sources = NULL,
39 | cite_strings = NULL,
40 | cite_labels = NULL,
41 | metadata = NULL,
42 | verbose = TRUE,
43 | tag_naming = "best_guess",
44 | only_key_fields = TRUE) {
45 |
46 | if (is.character(only_key_fields)) {
47 | only_key_fields <- union(key_fields, only_key_fields)
48 | } else if (only_key_fields == TRUE) {
49 | only_key_fields <- key_fields
50 | } else if (!only_key_fields == FALSE) {
51 | stop("only_key_fields must be TRUE, FALSE or a character vector")
52 | } else {
53 | only_key_fields <- NULL
54 | }
55 |
56 | if (is.null(files) && is.null(metadata)) stop("Either files or metadata must be specified.")
57 | if (!is.null(files) && !is.null(metadata)) stop("files and metadata cannot both be specified.")
58 |
59 | if (!is.null(metadata)) {
60 | if (!is.data.frame(metadata)) stop("metadata must be a tibble/dataframe.")
61 | if (!("files" %in% colnames(metadata))) stop("metadata must contain at least a `files` column")
62 | files <- metadata[["files"]]
63 | cite_sources <- metadata[["cite_sources"]]
64 | cite_strings <- metadata[["cite_strings"]]
65 | cite_labels <- metadata[["cite_labels"]]
66 | }
67 |
68 |
69 | if (is.null(cite_sources)) {
70 | cite_sources <- purrr::map_chr(files, ~ tools::file_path_sans_ext(basename(.x)))
71 |
72 | if (any(duplicated(cite_sources))) {
73 | cite_sources <- make.unique(cite_sources, sep = "_")
74 | message("Some file names were duplicated. Therefore, their cite_source values are distinguished by suffixes (_1 etc). For greater clarity, specify cite_sources explicitly or rename files.")
75 | }
76 | }
77 |
78 | if (length(files) != length(cite_sources)) {
79 | stop("Files and origins cite_sources be of equal length")
80 | }
81 | if (!is.null(cite_strings)) {
82 | if (length(cite_sources) != length(cite_strings)) {
83 | stop("Cite_sources and cite_strings must be of equal length")
84 | }
85 | }
86 | if (!is.null(cite_labels)) {
87 | if (length(cite_sources) != length(cite_labels)) {
88 | stop("Cite_sources and cite_labels must be of equal length")
89 | }
90 | }
91 |
92 | contains_commas <- any(stringr::str_detect(c(cite_sources, cite_labels, cite_strings), ","))
93 |
94 | if (!is.na(contains_commas) && contains_commas) {
95 | stop("',' must not be used in cite_source, cite_labels or cite_strings (or filenames if these are not specified)")
96 | }
97 |
98 | # Need to import files separately to add origin, platform, and searches
99 | ref_list <- purrr::map(files,
100 | \(x) synthesisr_read_refs(x, tag_naming = tag_naming, select_fields = only_key_fields),
101 | .progress = list( total = 100,
102 | format = "Importing files {cli::pb_bar} {cli::pb_percent}")
103 | )
104 |
105 | # Drop empty citations
106 | ref_list <- lapply(
107 | ref_list,
108 | function(data) data[rowSums(is.na(data)) != (ncol(data) - 1), ]
109 | )
110 |
111 | ref_counts <- numeric(length(files))
112 |
113 | for (i in seq_along(files)) {
114 | ref_counts[i] <- nrow(ref_list[[i]])
115 | }
116 |
117 | for (index in seq_len(length(files))) {
118 | ref_list[[index]]$cite_source <- cite_sources[[index]]
119 | if (!is.null(cite_strings)) {
120 | ref_list[[index]]$cite_string <- cite_strings[[index]]
121 | }
122 | if (!is.null(cite_labels)) {
123 | ref_list[[index]]$cite_label <- cite_labels[[index]]
124 | }
125 | }
126 |
127 | if (verbose) {
128 | report <- data.frame(
129 | file = basename(files),
130 | cite_source = cite_sources,
131 | cite_string = if (is.null(cite_strings)) NA_character_ else cite_strings,
132 | cite_label = if (is.null(cite_labels)) NA_character_ else cite_labels,
133 | citations = ref_counts
134 | )
135 |
136 | message("Import completed - with the following details:")
137 | message(paste0(utils::capture.output(report), collapse = "\n"))
138 | }
139 |
140 | ref_list %>%
141 | purrr::map(tibble::as_tibble) %>%
142 | purrr::reduce(dplyr::bind_rows)
143 |
144 | }
145 |
146 |
147 |
--------------------------------------------------------------------------------
/R/reimport.R:
--------------------------------------------------------------------------------
1 | #' Reimport a CSV-file exported from CiteSource
2 | #'
3 | #' This function reimports a csv file that was tagged and deduplicated by CiteSource.
4 | #' It allows to continue with further analyses without repeating that step, and also
5 | #' allows users to make any manual corrections to tagging or deduplication. Note that
6 | #' this function only works on CSV files that were written with `export_csv(..., separate = NULL)`
7 | #'
8 | #' @param filename Name (and path) of CSV file to be reimported, should end in .csv
9 | #' @return A data frame containing the imported citation data if all required columns are present.
10 | #' @export
11 | #' @examples
12 | #' \dontrun{
13 | #' #example usage
14 | #' citations <- reimport_csv("path/to/citations.csv")
15 | #' }
16 | #'
17 | reimport_csv <- function(filename) {
18 | # Warn if the filename doesn't end with .csv
19 | if (tolower(tools::file_ext(filename)) != "csv") warning("Function reads a CSV file, so filename should (usually) end in .csv. For now, name is used as provided.")
20 |
21 | # Read the CSV file
22 | unique_citations_imported <- utils::read.csv(filename, stringsAsFactors = FALSE)
23 |
24 | # Check if the required columns are present
25 | if (!all(c("cite_source", "cite_label", "cite_string", "duplicate_id", "record_ids") %in% names(unique_citations_imported))) {
26 | stop(
27 | "CiteSource meta-data (i.e. columns cite_source, cite_label cite_string, duplicate_id, record_ids) were not found in ", filename,
28 | ". This function is intended to be used for files exported from CiteSource and thus requires these fields.",
29 | " Note that export_csv must be called with separate = NULL (the default value)."
30 | )
31 | }
32 |
33 | unique_citations_imported
34 | }
35 |
36 | #' Reimport a RIS-file exported from CiteSource
37 | #'
38 | #' This function reimports a RIS file that was tagged and deduplicated by CiteSource.
39 | #' It allows to continue with further analyses without repeating that step, and also
40 | #' allows users to make any manual corrections to tagging or deduplication. The function
41 | #' can also be used to replace the import step (for instance if tags are to be added to
42 | #' individual citations rather than entire files) - in this case, just call `dedup_citations()`
43 | #' after the import.
44 | #'
45 | #' Note that this functions defaults' are based on those in `export_ris()` so that these functions
46 | #' can easily be combined.
47 | #'
48 | #' @param filename Name (and path) of RIS file to be reimported, should end in .ris
49 | #' @param source_field Character. Which RIS field should cite_sources be read from? NULL to set to missing
50 | #' @param label_field Character. Which RIS field should cite_labels be read from? NULL to set to missing
51 | #' @param string_field Character. Which RIS field should cite_strings be read from? NULL to set to missing
52 | #' @param duplicate_id_field Character. Which RIS field should duplicate IDs be read from? NULL to recreate based on row number (note that neither duplicate nor record IDs directly affect CiteSource analyses - they can only allow you to connect processed data with raw data)
53 | #' @param record_id_field Character. Which RIS field should record IDs be read from? NULL to recreate based on row number
54 | #' @param tag_naming Synthesisr option specifying how RIS tags should be replaced with names. This should not
55 | #' be changed when using this function to reimport a file exported from CiteSource. If you import your own
56 | #' RIS, check `names(CiteSource:::synthesisr_code_lookup)` and select any of the options that start with `ris_`
57 | #' @param verbose Should confirmation message be displayed?
58 | #' @export
59 | #' @examples
60 | #' if (interactive()) {
61 | #' dedup_results <- dedup_citations(citations, merge_citations = TRUE)
62 | #' export_ris(dedup_results$unique, "citations.ris")
63 | #' unique_citations2 <- reimport_ris("citations.ris")
64 | #' }
65 | #'
66 | reimport_ris <- function(filename = "citations.ris",
67 | source_field = "DB", label_field = "C7", string_field = "C8",
68 | duplicate_id_field = "C1", record_id_field = "C2",
69 | tag_naming = "ris_synthesisr", verbose = TRUE) {
70 |
71 | if (!tag_naming %in% names(synthesisr_code_lookup)) {
72 | stop("tag_naming must be one of ", names(synthesisr_code_lookup) %>% stringr::str_subset("^ris_") %>%
73 | glue::glue_collapse(sep = ", ", last = " or "))
74 | }
75 |
76 | if (is.null(source_field)) source_field <- NA
77 | if (is.null(string_field)) string_field <- NA
78 | if (is.null(label_field)) label_field <- NA
79 | if (is.null(duplicate_id_field)) duplicate_id_field <- NA
80 | if (is.null(record_id_field)) record_id_field <- NA
81 |
82 |
83 | custom_codes <- tibble::tribble(
84 | ~code, ~field, ~tag_naming,
85 | source_field, "cite_source", TRUE,
86 | string_field, "cite_string", TRUE,
87 | label_field, "cite_label", TRUE,
88 | duplicate_id_field, "duplicate_id", TRUE,
89 | record_id_field, "record_ids", TRUE
90 | )
91 |
92 | names(custom_codes)[3] <- tag_naming
93 |
94 | synthesisr_codes <- dplyr::bind_rows(
95 | custom_codes,
96 | synthesisr_code_lookup %>% dplyr::filter(.data[[tag_naming]])
97 | ) %>%
98 | dplyr::filter(!is.na(.data$code)) %>%
99 | dplyr::distinct(.data$code, .keep_all = TRUE) # Remove fields from synthesisr specification used for CiteSource metadata
100 |
101 | citations <- read_ref(filename, tag_naming = synthesisr_codes)
102 |
103 |
104 | if (!"cite_source" %in% names(citations)) {
105 | message("No non-empty cite_source values found")
106 | citations$cite_source <- NA
107 | }
108 |
109 | if (!"cite_string" %in% names(citations)) {
110 | message("No non-empty cite_string values found")
111 | citations$cite_string <- NA
112 | }
113 | if (!"cite_label" %in% names(citations)) {
114 | message("No non-empty cite_label values found")
115 | citations$cite_label <- NA
116 | }
117 |
118 | if (!"duplicate_id" %in% names(citations)) {
119 | message("Duplicate IDs not found - will be recreated based on row number")
120 | citations$duplicate_id <- seq_len(nrow(citations))
121 | } else if (any(is.na(citations$duplicate_id))) {
122 | message("Some duplicate IDs are missing - these will be recreated based on row number")
123 | citations$duplicate_id[is.na(citations$duplicate_id)] <- seq_len(sum(is.na(citations$duplicate_id)))
124 | }
125 |
126 | if (!"record_ids" %in% names(citations)) {
127 | message("Record IDs not found - will be recreated based on row number")
128 | citations$record_ids <- seq_len(nrow(citations))
129 | } else if (any(is.na(citations$record_ids))) {
130 | message("Some record IDs are missing - these will be recreated based on row number")
131 | citations$record_ids[is.na(citations$record_ids)] <- seq_len(sum(is.na(citations$record_ids)))
132 | }
133 |
134 | citations
135 | }
136 |
--------------------------------------------------------------------------------
/R/runShiny.R:
--------------------------------------------------------------------------------
1 | #' A wrapper function to run Shiny Apps from \code{CiteSource}.
2 | #'
3 | #' Running this function will launch the CiteSource shiny app
4 | #'
5 | #' @return CiteSource shiny app
6 | #' @param app Defaults to CiteSource - possibly other apps will be included in the future
7 | #' @param offer_install Should user be prompted to install required packages if they are missing?
8 | #' @export
9 | #' @aliases run_shiny
10 | #' @examples
11 | #' if (interactive()) {
12 | #' # To run the CiteSource Shiny app:
13 | #' runShiny()
14 | #' }
15 | runShiny <- function(app = "CiteSource", offer_install = interactive()) {
16 |
17 | # Check that required packages are installed
18 | req_packages <- c("shiny", "shinyalert", "shinybusy", "shinyWidgets", "plotly")
19 | if (!all(available <- purrr::map_lgl(req_packages, ~ requireNamespace(.x, quietly = TRUE)))) {
20 | if (offer_install == TRUE) {
21 | message("Some packages required for the CiteSource shiny app are missing. The following need to be installed: ",
22 | glue::glue_collapse(req_packages[!available], sep = ", ", last = " and "))
23 | if (ui_yeah("Should these packages be installed?", n_no = 1) == TRUE) {
24 | utils::install.packages(req_packages[!available])
25 | }
26 | runShiny(offer_install = FALSE)
27 | } else {
28 | stop("Some packages required for the CiteSource shiny app are missing. Ensure you have all of the following installed: ",
29 | glue::glue_collapse(req_packages, sep = ", ", last = "and"))
30 | }
31 | }
32 |
33 | # find and launch the app
34 | appDir <- system.file("shiny-app", app, package = "CiteSource")
35 |
36 | shiny::runApp(appDir, display.mode = "normal")
37 | }
38 |
39 | # Alias to offer function in line with usual snake_case style
40 | #' @export
41 | run_shiny <- runShiny
--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/R/sysdata.rda
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CiteSource
2 |
3 |
4 |
5 |
6 | [](https://github.com/ESHackathon/CiteSource/actions)
7 | [](https://github.com/ESHackathon/CiteSource)
8 | [](https://www.gnu.org/licenses/gpl-3.0)
9 |
10 |
11 | ## About the Package
12 |
13 | CiteSource was developed to provide researchers the ability to examine the utility and efficacy of literature resources and search methodologies. The idea behind CiteSource is simply allowing users to deduplicate citation records, while maintaining customizable metadata about the citation.
14 |
15 | **Development**
16 |
17 | Development of this project began as part of the Evidence Synthesis Hackathon and as part of Evidence Synthesis & Meta-Analysis in R Conference - ESMARConf 2022. to learn more about this awesome conference and hackathon please visit @ https://esmarconf.org/
18 |
19 | **License**
20 |
21 | CiteSource was created under [the General Public License (>=v3)](https://www.gnu.org/licenses/gpl-3.0.html).
22 |
23 | **Shiny Web Application**
24 |
25 | Whether you know R or not, we want you to be able to use CiteSource! Check out our [CiteSource Shiny App!](https://litrev.shinyapps.io/CiteSource/)
26 |
27 | ## Features
28 | **Customizable Metadata Tags**
29 |
30 | Users can provide customizable metadata in three fields, cite_source, cite_string, and cite_label. Metadata can include anything from a resource name (e.g. Web of Science, LENS.org, PubMed), a method (database search, handsearching, citation snowballing), a variation used within a method (WoS string #1, Wos string #2, WoS string #3), a research phase (search, Ti/Ab screening, Full-text Screening), or a unique group of citations (benchmarking articles, articles from a previous review, articles with a specific author affiliation).
31 |
32 | **Record Merging**
33 |
34 | The CiteSource deduplication process is better described as a record merging process due to the fact that the customizable metadata from duplicate records is maintained through the creation of a single, primary record. Beyond the merging of customizable metadata, the primary record is created by using the most complete metadata available between duplicate records (currently DOI and Abstract fields).The ASySD package, developed by Kaitlyn Hair, serves as the backbone of this process.
35 |
36 | **Table and Plot Visualizations**
37 |
38 | Once records are deduplicated, users are able to easily create plots and tables to answer specific questions or to simply explore the data in an effort to develop new hypotheses. Examples of analysis may include how many unique records a specific source contributed or how traditional methods of searching fare against a new AI discovery tool in finding relevant articles. Users may want to understand the overlap in records between two different search strings or evaluate the impact of including Google Scholar in a review. Before searching, a user may even develop a targeted search to better understand the topical coverage across databases that they intend to search, and once the search has been developed, how a particular source, string, or method performed in discovering benchmarking articles.
39 |
40 | **Exporting and Re-importing Data**
41 |
42 | Once records have been processed, users are able to export data in .csv, .ris, and .bib formats. Furthermore, users are able to reimport .csv and .ris files in order to recreate plots and tables.
43 |
44 | ## Getting Started
45 | **Installation**
46 |
47 | Install CiteSource in R with remotes::install_github("ESHackathon/CiteSource")
48 |
49 | **Vignettes**
50 |
51 | Vignettes covering various use cases can be found on the [CiteSource web page](https://www.eshackathon.org/CiteSource/).
52 |
53 | ## Feedback
54 |
55 | Be sure to check out [our discussion page](https://github.com/ESHackathon/CiteSource/discussions) to engage with us or to learn more about the various use cases for CiteSource. You can provide comments/suggestions or suggest a vignette for a specific use case.
56 |
57 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: http://www.eshackathon.org/CiteSource/
2 | template:
3 | bootstrap: 5
4 |
5 | articles:
6 | - title: Vignettes
7 | navbar: ~
8 | contents:
9 | - citesource_working_example
10 | - citesource_benchmark_testing
11 | - citesource_new_benchmark_testing
12 | - citesource_vignette_db-pre-screen_validation
13 | - citesource_vignette_db-topic-coverage
14 | - citesource_analysis_across_screening_phases
15 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 |
3 | 0 errors | 0 warnings | 1 note
4 |
5 | * This is a new release.
6 |
--------------------------------------------------------------------------------
/inst/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/inst/.DS_Store
--------------------------------------------------------------------------------
/inst/extdata/examplecitations.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/inst/extdata/examplecitations.rds
--------------------------------------------------------------------------------
/inst/shiny-app/CiteSource/google_analytics_dev.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/inst/shiny-app/CiteSource/google_analytics_main.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/inst/shiny-app/CiteSource/www/CS.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/inst/shiny-app/CiteSource/www/CS.gif
--------------------------------------------------------------------------------
/inst/shiny-app/CiteSource/www/about.md:
--------------------------------------------------------------------------------
1 | ## About CiteSource
2 |
3 |
4 |
5 |
6 | CiteSource is an R package and accompanying Shiny web application designed to support evidence data-driven decision-making during search strategy development. CiteSource also allows users to analyze and report on the impact of information sources and search methods.
7 |
8 | CiteSource was developed as part of the [Evidence Synthesis Hackathon](https://www.eshackathon.org/) initiative.
9 |
10 | ---
11 |
12 | ### Key Features:
13 |
14 |
15 | Flexible Metadata for Provenance Tracking:
16 |
17 | > * A core strength of CiteSource is its ability to assign and retain custom metadata to track the *provenance* of each citation – precisely where and how it was found. Users can tag records using three key fields:
18 | > * `cite_source`: Identify the origin database ('Web of Science', 'Scopus'), platform ('Lens.org'), or the specific search method used ('Citation Searching', 'String_1').
19 | > * `cite_label`: Track citations through screening phases using standardized terms: `search` (for initial results, benchmarks), `screened` (for records passing title/abstract review), and `final` (for records included in the synthesis after full-text review).
20 | > * `cite_string`: Add further detail, such as variations in search string syntax tested ('String_1a_truncation'), specific supplementary methods ('Handsearching_JournalX'), or other custom categories relevant to analysis.
21 | > * This detailed tagging enables rigorous analysis of the performance and contribution of each component of your overall search strategy.
22 |
23 |
24 |
25 |
26 | Advanced Deduplication & Intelligent Merging:
27 |
28 | > * CiteSource employs the [`ASySD` (Automated Systematic Search Deduplicator) R package](https://github.com/camaradesuk/ASySD) to perform robust identification and merging of duplicate records.
29 | > * It conducts both *internal deduplication* (identifying duplicates within a single uploaded file/source, resulting in *distinct* records) and *external deduplication* (identifying duplicates across all uploaded files/sources, resulting in the set of *unique* citations).
30 | > * The process uses *intelligent merging*: custom metadata tags (`source`, `label`, `string`) from all identified duplicates are combined onto the primary record, preserving the full discovery history.
31 | > * The most complete bibliographic data (prioritizing DOI, Abstract) across duplicates is retained in the primary record.
32 | > * An optional *manual review* stage presents potential duplicates that fall below the automatic matching threshold, allowing users to confirm or reject merges for maximum accuracy.
33 |
34 |
35 |
36 |
37 | Data-Driven Analysis & Visualization:
38 |
39 | > * Once deduplication is complete, CiteSource offers a suite of analysis and visualization tools designed specifically to speed up the *iterative process* of developing, testing, and validating search strategies:
40 | > * Visualize Overlap: Use interactive **Heatmaps** (pairwise overlap) and **Upset Plots** (multi-set intersections) to understand shared and unique records across sources, labels, or strings.
41 | > * Track Phase Progression: Employ the **Phase Analysis plot** (bar chart) to see the flow of unique and duplicate records through screening stages (`search` -> `screened` -> `final`).
42 | > * Generate Summary Tables: Access quantitative insights via automated tables detailing:
43 | > * Initial Record counts (showing the impact of internal deduplication).
44 | > * Record Summaries (detailing unique/overlapping records contributed by each source/method).
45 | > * Precision/Sensitivity calculations (evaluating source/method performance against the `final` included set).
46 | > * A detailed, interactive **Record Level Table** for quickly examining and linking to citations .
47 |
48 |
49 |
50 |
51 | Enhanced Reporting & Transparent Export:
52 |
53 | > * CiteSource facilitates *transparent reporting* of search methods and results, aligning with guidelines like PRISMA.
54 | > * Export your final, deduplicated dataset in standard bibliographic formats (`.csv`, `.ris`, `.bib`).
55 | > * The custom metadata is embedded directly into standard fields within the export files (e.g., using C1, C2, C7, C8, DB fields in `.ris` format), providing a clear and reproducible audit trail for your methodology.
56 |
57 |
58 |
59 | ---
60 |
61 | ### Why use CiteSource for Evidence Synthesis?
62 |
63 | CiteSource is built for anyone involved in evidence synthesis. It helps you:
64 |
65 | * **Evaluate and optimize** information source selection based on unique record contributions.
66 | * **Refine and optimize** search strings by quickly testing variations.
67 | * **Analyze and report** the added value of different search methods, including supplementary searching techniques.
68 | * **Perform benchmark testing** to ensure key articles are captured by your strategy.
69 | * **Increase transparency and effectiveness** of your search strategy and processess through built-in tables for reporting.
70 | * **Save time** during the iterative search development.
71 |
72 | ### What Other Applications does CiteSource Serve ?
73 |
74 | * Training in evidence synthesis search methods - MLIS classroom use for skill/knowledge development.
75 | * Methods research & development - large-scale methods testing, quick/live updates to analysis.
76 | * Library collection development - analyzing coverage of new databases compared to current subscriptions.
77 |
78 | ---
79 |
80 | *CiteSource is available both as this interactive Shiny application and as a full R package with detailed vignettes. For more information on the R package, visit the [CiteSource Website](https://www.eshackathon.org/CiteSource/).*
--------------------------------------------------------------------------------
/inst/shiny-app/CiteSource/www/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/inst/shiny-app/CiteSource/www/favicon.png
--------------------------------------------------------------------------------
/inst/shiny-app/CiteSource/www/use-cases.md:
--------------------------------------------------------------------------------
1 | ## CiteSource Use Cases: Overview
2 |
3 |
4 |
5 | CiteSource provides a suite of tools to support data-driven decision-making throughout the evidence synthesis process. Its applications generally fall into two main categories: **Optimizing Search Strategies** (typically during protocol development and iterative searching) and **Analyzing Search Impact** (often after screening is complete, for reporting and methodological insights). Additional applications extend to training and resource management.
6 |
7 | ---
8 |
9 | ### I. Optimizing Search Strategies
10 |
11 | CiteSource enables researchers to move beyond reliance on experience or potentially outdated guidance by providing empirical data specific to their project *during* the search development phase.
12 |
13 |
14 | Information Source/Method Selection & Optimization
15 |
16 | > Choosing the most effective and efficient set of databases, platforms, or indexes (e.g., Web of Science, Scopus, ASFA, Dimensions, OATD) can be challenging, especially for interdisciplinary topics where overlap and unique contributions are unknown. CiteSource addresses this by allowing users to empirically compare potential sources *before* committing significant time. After uploading initial search results and tagging them using the `cite_source` field (e.g., `Web of Science`, `Scopus`), deduplication the overlapping and unique records across sources and methods. This analysis enables informed, data-driven decisions about which sources and methods provide the best return on investment and helps optimize the selection, potentially reducing redundancy. Key CiteSource features used include:
17 | > * Tagging records with `cite_source` metadata.
18 | > * Robust internal and external deduplication (using `ASySD`).
19 | > * Visualization of overlap using interactive **Heatmaps** and **Upset Plots**.
20 | > * Quick analysis of individual citations using the interactive **Record Level Table**
21 |
22 |
23 |
24 |
25 | Search String Development & Optimization
26 |
27 | > Developing effective search strings is an iterative process involving testing terms, syntax variations, Boolean logic, proximity operators, field codes, etc. Comparing the impact of these subtle changes across potentially multiple databases is time-consuming. CiteSource assists by streamlining the analysis of string effectiveness. Users can upload results from different string variations, tag them using `cite_source` and `cite_string` (e.g., `String_1`, `String_2_proximity`), and visualize the impact on retrieval after deduplication. This allows for rapid assessment of how changes affect results, speeding up refinement for an optimal balance of sensitivity and precision and helping identify errors in logic or syntax. CiteSource facilitates this via:
28 | > * Tagging result sets with `cite_source` and `cite_string` metadata.
29 | > * Deduplication to compare results accurately.
30 | > * Visualization of unique/overlapping records retrieved by different strings using **Upset Plots**.
31 | > * Quick examination of individual citations using the interactive **Record Level Table**.
32 |
33 |
34 |
35 |
36 | Benchmark Testing
37 |
38 | > Ensuring a search strategy retrieves known, key relevant articles (benchmark or reference articles) is crucial for assessing sensitivity. CiteSource facilitates this by comparing search results against a predefined benchmark set. After uploading search results and the benchmark set (tagging each appropriately using `cite_source`, `cite_string`, and `cite_label`), deduplication allows for direct comparison. This provides a quantitative assessment of how well different strings or sources capture the benchmark articles, highlighting potential weaknesses or indexing gaps. While benchmark sets should be used cautiously due to potential bias, this process aids refinement and is useful for updates or developing standardized protocols. Key functionalities include:
39 | > * Tagging benchmark sets and search results distinctly using `cite_source` and `cite_label`.
40 | > * Deduplication to identify matches between search results and the benchmark set.
41 | > * Using **Upset Plots** to visualize captured vs. missed benchmark articles across different strings/sources.
42 | > * Investigating specific missed articles using the interactive **Record Level Table**.
43 |
44 |
45 |
46 |
47 | Efficient Iterative Testing (Overarching Benefit)
48 |
49 | > The entire process of testing variations in sources, strings, and methods is inherently iterative. CiteSource significantly compresses this cycle by providing rapid analysis and visualization (**Heatmaps**, **Upset Plots**, **Summary Tables**) immediately after deduplication. This drastic reduction in the time needed to evaluate the impact of each iteration saves researcher time and allows for more thorough testing and validation, leading to a more optimized and well-documented strategy.
50 |
51 |
52 |
53 | ---
54 |
55 | ### II. Analyzing Search Impact (Post-Screening / Reporting)
56 |
57 | After screening is complete, CiteSource analyzes the *actual* contribution of different search components to the final set of included studies and enhances reporting.
58 |
59 |
60 | Analyzing Information Source & Search Method Contribution
61 |
62 | > Understanding which sources or methods were most effective in identifying the studies ultimately included in the synthesis is crucial for methodological reflection and reporting. CiteSource enables this analysis by tracking records through screening phases. By tagging records with `cite_source`/`cite_string` and progressively updating the `cite_label` (`search` -> `screened` -> `final`), users can quantify the "true impact" or ROI of each component. This identifies high-yield sources/methods versus those contributing mostly irrelevant records, providing valuable data for reporting and future strategy refinement. Analysis tools include:
63 | > * Tracking records using `cite_source`, `cite_string`, and `cite_label` tags.
64 | > * Visualizing the flow through screening stages with the **Bar Chart (Phase Analysis Plot)**.
65 | > * Quantifying performance using the **Precision/Sensitivity Table** (calculating precision and recall against the `final` set).
66 | > * Examining contributions at each stage using the **Record Summary Table**.
67 |
68 |
69 |
70 |
71 | Enhanced Reporting & Transparency
72 |
73 | > Reporting guidelines like PRISMA require transparent and detailed documentation of the search process. CiteSource directly supports this by generating clear outputs and ensuring provenance is maintained. The plots and tables offer visual and quantitative summaries of the search process, outcomes, and source/method contributions. Furthermore, exporting the final dataset embeds the custom metadata tags (`cite_source`, `cite_label`, `cite_string`) into standard bibliographic fields (e.g., C1, C2, C7, C8, DB in `.ris` format), providing a clear, reproducible audit trail. This enhances transparency and allows reviewers/readers to scrutinize the methodology effectively. Key outputs for reporting include:
74 | > * Ready-to-use **plots** (**Heatmaps**, **Upset Plots**, **Bar Charts**).
75 | > * Summary **tables** (**Initial Record**, **Record Summary**, **Precision/Sensitivity**, **Record Level**).
76 | > * Exported datasets (`.csv`, `.ris`, `.bib`) with embedded provenance metadata.
77 |
78 |
79 |
80 | ---
81 |
82 | ### III. Broader Applications
83 |
84 | Beyond individual reviews, CiteSource has wider utility:
85 |
86 |
87 | Training & Education
88 |
89 | > CiteSource serves as an effective training tool for evidence synthesis methods. Its interactive visualizations provide a hands-on way for students and early-career researchers to understand abstract concepts like database overlap, string variation impacts, and benchmark testing. Instructors can use it to demonstrate best practices in real-time, building practical skills and competence in systematic searching.
90 |
91 |
92 |
93 |
94 | Library Collection Development
95 |
96 | > Librarians can leverage CiteSource to support collection development decisions. By analyzing search results from institutional researchers or targeted test searches, they can gain empirical data on database coverage and overlap for specific research topics. This helps justify subscription costs, compare existing resources with potential new ones, and make effective recommendations based on demonstrated value and uniqueness.
97 |
98 |
99 |
100 |
101 | Methodological Research
102 |
103 | > CiteSource facilitates methodological research on searching itself. When researchers use the tool and report their quantitative findings on source/method performance (e.g., precision/sensitivity, unique contributions), they contribute valuable empirical data to the wider evidence synthesis community. Aggregating such findings across studies can inform the development and refinement of evidence-based search guidelines and best practices, potentially supporting "Studies Within A Review" (SWAR) focused on search methodology.
104 |
105 |
106 |
107 | ---
--------------------------------------------------------------------------------
/inst/shiny-app/CiteSource/www/user_guide.md:
--------------------------------------------------------------------------------
1 | ## CiteSource User Guide
2 |
3 |
4 |
5 | > CiteSource has a number of applications. This guide walks users through the step-wise process of uploading, deduplicating and analyzing data > within the shiny application. For step by step instructions for running CiteSource in R, [check out our vignettes](https://www.eshackathon.org/CiteSource/articles/)
6 | ---
7 |
8 | ### Using CiteSource: Step-by-Step
9 |
10 |
11 | Step 1: File Upload, Labeling, & Re-importing
12 |
13 | > **Standard Upload:**
14 | >
15 | > * Navigate to the 'File upload' tab.
16 | > * Use the 'Set Label for Uploaded File(s)' dropdown to select the appropriate stage for the file(s) you are about to upload (e.g., `search`, `screened`, or `final`). This label helps organize records, especially for phase analysis and some summary tables.
17 | > * Click the file input area ('Browse...') to select one or more citation files from your computer. Supported formats are `.ris`, `.bib`, and `.txt`.
18 | > * The label you selected will be applied to all citation records within the file(s) uploaded in that specific action.
19 | >
20 | > **Re-importing Previously Processed Data:**
21 | >
22 | > * If you have previously exported data from CiteSource as a `.ris` or `.csv` file (these exported files contain special `cite_` columns), you can re-upload this file directly.
23 | > * On the 'File upload' tab, use the 'OR: Re-upload an .ris or .csv exported from CiteSource' file input.
24 | > * This bypasses the initial upload processing and deduplication steps (Steps 3 & 4), allowing you to proceed directly to the 'Visualise' and 'Tables' tabs with your previously processed data.
25 | >
26 | > * **NOTE**: Raw citation exports from some platforms (e.g.OVID) may be incompatible due to abnormal .ris field use or structuring. If you are having issues, please be sure to try importing them using citation software (e.g. Zotero, EndNote) and exporting them before uploading to CiteSource.*
27 |
28 |
29 |
30 |
31 | Step 2: Review Uploads & Edit
32 |
33 | > * After uploading citations, a summary table appears in the main panel showing each file, its detected record count, and the assigned source name, label, and string.
34 | > * To correct the auto-assigned source name, or to change the label or string for *all* records from a specific file after upload, you can double-click the corresponding cell in the table and type the new value.
35 |
36 |
37 |
38 |
39 | Step 3: Automated Deduplication
40 |
41 | > * Navigate to the 'Deduplicate' tab and ensure you are on the 'Automated deduplication' sub-tab.
42 | > * Click the 'Find duplicates' button.
43 | > * CiteSource will process all the records you've uploaded. It compares metadata fields (like DOI, title, authors, journal, year, volume, pages) to identify potential duplicates both *within* the same source file (internal deduplication) and *across* different source files (external deduplication).
44 | > * A pop-up message will summarize the results, indicating the number of unique records found and if any potential duplicates require manual review.
45 |
46 |
47 |
48 |
49 | Step 4: Manual Deduplication (If Needed)
50 |
51 | > * If the summary message from Step 3 indicates potential duplicates need review, or if you want to manually inspect potential matches, go to the 'Manual deduplication' sub-tab.
52 | > * Pairs of records identified as potential duplicates are displayed. Each row represents a pair, showing selected metadata side-by-side (e.g., Title 1 vs. Title 2).
53 | > * Use the 'Choose columns' filter dropdown (filter icon) above the table to select which metadata fields (e.g., author, year, abstract) you want to see for comparison.
54 | > * Carefully review each pair. If you determine a pair represents the *same* underlying citation, click on that row to select it.
55 | > * After selecting all rows that are true duplicates, click the 'Remove additional duplicates' button (this button only appears after you select at least one row). This merges the selected pairs, keeping only one unique record with combined metadata.
56 | > * If you finish reviewing or decide no manual merging is needed, click 'Go to visualisations'.
57 |
58 |
59 |
60 |
61 | Step 5: Visualise Overlap
62 |
63 | > * Navigate to the 'Visualise' tab.
64 | > * Use the sidebar controls to tailor the analysis:
65 | > * **Choose comparison type:** Select whether you want to compare overlap based on 'sources' (original files/databases), 'labels' (e.g., search vs screened), or 'strings' (if used).
66 | > * **Filter data:** Select specific sources, labels, or strings to include in the visualizations.
67 | > * Explore the generated plots:
68 | > * **Heatmap:** This matrix shows pairwise overlap. Each cell represents the number of citations shared between two groups (the groups depend on your chosen comparison type). Darker cells indicate higher overlap. Hover over cells to see exact counts. It helps quickly identify pairs with significant commonality.
69 | > * **Upset Plot:** This plot visualizes intersections among multiple groups simultaneously. The large bottom bar chart shows the number of citations unique to specific combinations of groups (e.g., found only in Source A, or found in both Source A and B but not C). The smaller top bar chart shows the total number of unique citations in each individual group. It's excellent for understanding complex overlap patterns involving more than two groups.
70 | > * **Phase Analysis Plot:** This plot is most useful when comparing by 'labels' representing stages (e.g., `search`, `screened`, `final`). It shows the total number of records at each stage, broken down into those that are unique (first identified at that stage) versus those that were already found in a previous stage (duplicates relative to earlier stages). It helps visualize the yield and deduplication effectiveness across a review workflow.
71 | > * Use the 'Download' buttons above each plot to save them as image files.
72 |
73 |
74 |
75 |
76 | Step 6: Summary Tables & Record Review
77 |
78 | > * Navigate to the 'Tables' tab.
79 | > * Use the sidebar filters (Sources, Labels, Strings) to select the subset of data you want summarized.
80 | > * Generate specific summary tables by clicking the corresponding 'Generate...' button:
81 | > * **Initial Records Table:** Provides a high-level count based on the earliest phase (typically records labeled `search`). Shows the total uploaded records for that phase. This table distinguishes between the number of uploaded records andduplicates found *within* the each source file.
82 | > * **Detailed Record Table:** Breaks down the citation counts by individual source/method (within your selected filters). For each set of records, it shows how many citations were unique to that set and how many were also found in other sets. This helps identify which sources/methods contributed the most unique records and which have a high level of overlap.
83 | > * **Precision/Sensitivity Table:** Calculates performance metrics, requiring data labeled as `final` to be present and selected. It compares each source, method, or search string against this 'final' set. 'Precision' tells you what proportion of records retrieved by a source were actually relevant ('final' records). 'Sensitivity' (or Recall) tells you what proportion of all relevant ('final') records were found by that specific source. Useful for evaluating search strategy performance.
84 | > * **Review individual records:** Click 'Generate the table' on the "Review individual records" sub-tab to view the detailed, deduplicated citation list. This table may take a while to load if you have a large number of records.
85 | >
86 | > **Using the Interactive Record Table:**
87 | >
88 | > * **Expand/Collapse Row:** Click the `⊕` symbol in a row to view the full APA reference. Click `⊖` to hide it again.
89 | > * **Sort by Single Column:** Click any column header (like 'Citation' or a source name) to sort the table by that column's values. Click the header again to reverse the sort order.
90 | > * **Sort by Multiple Columns:** Click the primary column header you want to sort by. Then, hold down the **Shift** key on your keyboard and click a second column header. You can repeat this for more sorting levels.
91 | > * **Filter/Search:** Type into the search box located at the top-right of the table to dynamically filter records based on any information displayed.
92 | > * **Download Data:** Click the 'Download CSV' button (located above the table, next to 'Print') to save the data currently shown in the table (including applied filters) as a CSV file.
93 |
94 |
95 |
96 |
97 | Step 7: Export Results
98 |
99 | > * Navigate to the 'Export' tab.
100 | > * This tab becomes active after you have run the deduplication process (Step 3).
101 | > * Click the button corresponding to your desired file format: 'Download csv', 'Download RIS', or 'Download BibTex'.
102 | > * The custom metadata is embedded directly into fields within the export files (e.g., using C1, C2, C7, C8, DB fields in `.ris` format)
103 | > * This will save the final dataset of unique citations (after both automated and any manual deduplication).
104 | > * **Note:** Only `.csv` and `.ris` files can be re-imported later.
105 |
106 |
107 |
108 | ---
--------------------------------------------------------------------------------
/man/CiteSource-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CiteSource.R
3 | \docType{package}
4 | \name{CiteSource-package}
5 | \alias{CiteSource}
6 | \alias{CiteSource-package}
7 | \title{CiteSource: A package to compare sources of citation records}
8 | \description{
9 | The CiteSource package supports evidence aggregation by helping with the
10 | processing of results of various searches in different sources. It allows to
11 | deduplicate results while retaining meta-data on where those results were
12 | found and then enables users to compare the contribution of different sources
13 | }
14 | \seealso{
15 | Useful links:
16 | \itemize{
17 | \item \url{https://www.eshackathon.org/CiteSource}
18 | \item Report bugs at \url{https://github.com/ESHackathon/CiteSource/issues}
19 | }
20 |
21 | }
22 | \author{
23 | \strong{Maintainer}: Trevor Riley \email{trevor.riley@noaa.gov} (\href{https://orcid.org/0000-0002-6834-9802}{ORCID})
24 |
25 | Authors:
26 | \itemize{
27 | \item Kaitlyn Hair \email{kaitlyn.hair@ed.ac.uk} (\href{https://orcid.org/0000-0003-0180-7343}{ORCID})
28 | \item Lukas Wallrich \email{lukas.wallrich@gmail.com} (\href{https://orcid.org/0000-0003-2121-5177}{ORCID})
29 | \item Matthew Grainger \email{matthewjamesgrainger@gmail.com} (\href{https://orcid.org/0000-0001-8426-6495}{ORCID})
30 | \item Sarah Young \email{sarahy@andrew.cmu.edu} (\href{https://orcid.org/0000-0002-8301-5106}{ORCID})
31 | \item Chris Pritchard \email{chris.pritchard@ntu.ac.uk} (\href{https://orcid.org/0000-0002-1143-9751}{ORCID})
32 | \item Neal Haddaway \email{nealhaddaway@gmail.com} (\href{https://orcid.org/0000-0003-3902-2234}{ORCID})
33 | }
34 |
35 | Other contributors:
36 | \itemize{
37 | \item Martin Westgate (Author of included synthesisr fragments) [copyright holder]
38 | \item Eliza Grames (Author of included synthesisr fragments) [copyright holder]
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/man/calculate_detailed_records.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/new_count_and_table.R
3 | \name{calculate_detailed_records}
4 | \alias{calculate_detailed_records}
5 | \title{Calculate Detailed Record Counts}
6 | \usage{
7 | calculate_detailed_records(
8 | unique_citations,
9 | n_unique,
10 | labels_to_include = NULL
11 | )
12 | }
13 | \arguments{
14 | \item{unique_citations}{A data frame containing unique citations.
15 | The data frame must include the columns \code{cite_source}, \code{cite_label}, and \code{duplicate_id}.}
16 |
17 | \item{n_unique}{A data frame containing counts of unique records, typically filtered
18 | by specific criteria (e.g., \code{cite_label == "search"}).}
19 |
20 | \item{labels_to_include}{An optional character vector of labels to filter the citations.
21 | If provided, only citations matching these labels will be included in the counts.
22 | if 'NULL' all labels are included. Default is 'NULL'.}
23 | }
24 | \value{
25 | A data frame with detailed counts for each citation source, including:
26 | \itemize{
27 | \item \verb{Records Imported}: Total number of records imported.
28 | \item \verb{Distinct Records}: Number of distinct records after deduplication.
29 | \item \verb{Unique Records}: Number of unique records specific to a source.
30 | \item \verb{Non-unique Records}: Number of records found in other sources.
31 | \item \verb{Source Contribution \%}: Percentage contribution of each source to the total distinct records.
32 | \item \verb{Source Unique Contribution \%}: Percentage contribution of each source to the total unique records.
33 | \item \verb{Source Unique \%}: Percentage of unique records within the distinct records for each source.
34 | }
35 | }
36 | \description{
37 | This function processes a dataset and expands the 'cite_source' column, filters on
38 | user-specified labels (if provided), and calculates detailed counts such as the records imported,
39 | distinct records, unique records, non-unique records, and several percentage contributions for
40 | each citation source/method it also adds a total row summarizing these counts.
41 | }
42 | \details{
43 | The function first checks if the required columns are present in the input data frames.
44 | It then expands the \code{cite_source} column, filters the data based on the provided labels (if any),
45 | and calculates various counts and percentages for each citation source. The function also adds
46 | a total row summarizing these counts across all sources.
47 | }
48 | \examples{
49 | # Example usage with a sample dataset
50 | unique_citations <- data.frame(
51 | cite_source = c("Source1, Source2", "Source2", "Source3"),
52 | cite_label = c("Label1", "Label2", "Label1"),
53 | duplicate_id = c(1, 2, 3)
54 | )
55 | n_unique <- data.frame(
56 | cite_source = c("Source1", "Source2", "Source3"),
57 | cite_label = c("search", "search", "search"),
58 | unique = c(10, 20, 30)
59 | )
60 | calculate_detailed_records(unique_citations, n_unique, labels_to_include = "search")
61 | }
62 |
--------------------------------------------------------------------------------
/man/calculate_initial_records.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/new_count_and_table.R
3 | \name{calculate_initial_records}
4 | \alias{calculate_initial_records}
5 | \title{Calculate Initial Records Unique Citations}
6 | \usage{
7 | calculate_initial_records(unique_citations, labels_to_include = NULL)
8 | }
9 | \arguments{
10 | \item{unique_citations}{A data frame containing the unique citations.
11 | It must contain the columns \code{cite_source}, \code{cite_label}, and \code{duplicate_id}.}
12 |
13 | \item{labels_to_include}{An optional character vector of labels to filter the citations.
14 | If provided, only citations matching these labels will be included in the counts.
15 | Default is NULL, meaning no filtering will be applied.}
16 | }
17 | \value{
18 | A data frame containing the counts of \verb{Records Imported} and \verb{Distinct Records}
19 | for each citation source. The data frame also includes a "Total" row summing
20 | the counts across all sources.
21 | }
22 | \description{
23 | This function processes a dataset of unique citations, expands the \code{cite_source} column,
24 | filters based on user-specified labels (if provided), and then calculates the number
25 | of records imported and distinct records for each citation source. It also adds a
26 | total row summarizing these counts.
27 | }
28 | \details{
29 | The function first checks if the required columns are present in the input data frame.
30 | It then expands the \code{cite_source} column to handle multiple sources listed in a
31 | single row and filters the dataset based on the provided labels (if any).
32 | The function calculates the number of records imported (total rows) and the number
33 | of distinct records (unique \code{duplicate_id} values) for each citation source.
34 | Finally, a total row is added to summarize the counts across all sources.
35 | }
36 | \examples{
37 | # Example usage with a sample dataset
38 | unique_citations <- data.frame(
39 | cite_source = c("Source1", "Source2", "Source3"),
40 | cite_label = c("Label1", "Label2", "Label3"),
41 | duplicate_id = c(1, 2, 3)
42 | )
43 | calculate_initial_records(unique_citations)
44 | }
45 |
--------------------------------------------------------------------------------
/man/calculate_phase_count.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/count.R
3 | \name{calculate_phase_count}
4 | \alias{calculate_phase_count}
5 | \title{Calculate phase counts, precision, and recall}
6 | \usage{
7 | calculate_phase_count(unique_citations, citations, db_colname)
8 | }
9 | \arguments{
10 | \item{unique_citations}{A dataframe containing unique citations with phase information.
11 | The phase information must be provided in a column named 'cite_label' in the dataframe.}
12 |
13 | \item{citations}{A dataframe containing all citations with phase information. The phase
14 | information must be provided in a column named 'cite_label' in the dataframe.}
15 |
16 | \item{db_colname}{The name of the column representing the source database.}
17 | }
18 | \value{
19 | A dataframe containing distinct counts, counts for different phases, precision,
20 | and recall for each source, as well as totals.
21 | }
22 | \description{
23 | This function calculates counts for different phases and calculates precision and recall
24 | for each source based on unique citations and citations dataframe. The phases should be
25 | labeled as 'screened' and 'final' (case-insensitive) in the input dataframes. The function
26 | will give a warning if these labels are not present in the input dataframes.
27 | }
28 | \details{
29 | The function will give a warning if 'screened' and 'final' labels are not present
30 | in the 'cite_label' column of the input dataframes.
31 | }
32 | \examples{
33 | unique_citations <- data.frame(
34 | db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"),
35 | cite_label = c("screened", "final", "screened", "final", "screened", "final"),
36 | duplicate_id = c(102, 102, 103, 103, 104, 104),
37 | other_data = 1:6
38 | )
39 |
40 | citations <- data.frame(
41 | db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"),
42 | cite_label = c("screened", "final", "screened", "final", "screened", "final"),
43 | other_data = 7:12
44 | )
45 |
46 | result <- calculate_phase_count(unique_citations, citations, "db_source")
47 | result
48 | }
49 |
--------------------------------------------------------------------------------
/man/calculate_phase_records.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/new_count_and_table.R
3 | \name{calculate_phase_records}
4 | \alias{calculate_phase_records}
5 | \title{Calculate Phase Counts with Precision and Recall}
6 | \usage{
7 | calculate_phase_records(unique_citations, n_unique, db_colname)
8 | }
9 | \arguments{
10 | \item{unique_citations}{A data frame containing unique citations.
11 | It must include the columns \code{cite_source}, \code{cite_label}, and \code{duplicate_id}.}
12 |
13 | \item{n_unique}{A data frame containing counts of unique records.
14 | Typically filtered by specific criteria, such as \code{cite_label == "search"}.}
15 |
16 | \item{db_colname}{The name of the column representing the citation source
17 | in the \code{unique_citations} data frame.}
18 | }
19 | \value{
20 | A data frame with phase counts and calculated precision and recall
21 | for each citation source, including:
22 | \itemize{
23 | \item \verb{Distinct Records}: The count of distinct records per source.
24 | \item \code{screened}: The count of records in the "screened" phase.
25 | \item \code{final}: The count of records in the "final" phase.
26 | \item \code{Precision}: The precision metric calculated as \verb{final / Distinct Records}.
27 | \item \code{Recall}: The recall metric calculated as \verb{final / Total final records}.
28 | }
29 | }
30 | \description{
31 | This function calculates the distinct record counts, as well as screened
32 | and final record counts, for each citation source across different phases
33 | (e.g., "screened", "final"). It also calculates precision and recall metrics
34 | for each source.
35 | }
36 | \details{
37 | The function starts by calculating the total distinct records, as well as
38 | the total "screened" and "final" records across all sources. It then
39 | calculates distinct counts for each source, followed by counts for "screened"
40 | and "final" records. Finally, it calculates precision and recall metrics and
41 | adds a total row summarizing these counts across all sources.
42 | }
43 | \examples{
44 | # Example usage with a sample dataset
45 | unique_citations <- data.frame(
46 | cite_source = c("Source1", "Source2", "Source3"),
47 | cite_label = c("screened","screened", "final"),
48 | duplicate_id = c(1, 2, 3)
49 | )
50 | n_unique <- data.frame(
51 | cite_source = c("Source1", "Source2", "Source3"),
52 | unique = c(10, 20, 30)
53 | )
54 | calculate_phase_records(unique_citations, n_unique, "cite_source")
55 | }
56 |
--------------------------------------------------------------------------------
/man/calculate_record_counts.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/count.R
3 | \name{calculate_record_counts}
4 | \alias{calculate_record_counts}
5 | \title{Calculate record counts function
6 | Calculate and combine counts of distinct records, imported records, and unique records for each database}
7 | \usage{
8 | calculate_record_counts(unique_citations, citations, n_unique, db_colname)
9 | }
10 | \arguments{
11 | \item{unique_citations}{Dataframe. The dataframe for calculating distinct records count.}
12 |
13 | \item{citations}{Dataframe. The dataframe for calculating records imported count.}
14 |
15 | \item{n_unique}{Dataframe. The dataframe for calculating unique records count.}
16 |
17 | \item{db_colname}{Character. The name of the column containing the database source information.}
18 | }
19 | \value{
20 | A dataframe with counts of distinct records, imported records, and unique records for each source, including total counts and several calculated ratios and percentages.
21 | }
22 | \description{
23 | This function calculates the counts of distinct records, records imported, and unique records for each database source.
24 | It combines these counts into one dataframe and calculates several ratios and percentages related to the unique and distinct counts.
25 | It also calculates the total for each count type.
26 | }
27 | \examples{
28 | unique_citations <- data.frame(
29 | db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"),
30 | other_data = 1:6
31 | )
32 |
33 | citations <- data.frame(
34 | db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"),
35 | other_data = 7:12
36 | )
37 |
38 | n_unique <- data.frame(
39 | cite_source = c("Database1", "Database2", "Database2", "Database3", "Database3", "Database3"),
40 | cite_label = c("search", "final", "search", "search", "search", "final"),
41 | unique = c(1, 0, 1, 1, 1, 0)
42 | )
43 |
44 | result <- calculate_record_counts(unique_citations, citations, n_unique, "db_source")
45 | print(result)
46 | }
47 |
--------------------------------------------------------------------------------
/man/citation_summary_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tables.R
3 | \name{citation_summary_table}
4 | \alias{citation_summary_table}
5 | \title{Contribution summary table}
6 | \usage{
7 | citation_summary_table(
8 | citations,
9 | comparison_type = "sources",
10 | search_label = "search",
11 | screening_label = "final",
12 | top_n = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{citations}{A deduplicated tibble as returned by \code{dedup_citations()}.}
17 |
18 | \item{comparison_type}{Either "sources" to summarise and assess sources or "strings" to consider strings.}
19 |
20 | \item{search_label}{One or multiple labels that identify initial search results (default: "search") - if multiple labels are provided, they are merged.}
21 |
22 | \item{screening_label}{One or multiple label that identify screened records (default: "final") - if multiple are provided, each is compared to the search stage.}
23 |
24 | \item{top_n}{Number of sources/strings to display, based on the number of total records they contributed at the search stage. Note that calculations and totals will still be based on all citations. Defaults to NULL, then all sources/strings are displayed.}
25 | }
26 | \value{
27 | A tibble containing the contribution summary table, which shows the contribution of each source and the overall performance of the search
28 | }
29 | \description{
30 | Create a summary table to show the contribution of each source and the overall performance of the search. For this to work,
31 | labels need to be used that contrast a "search" stage with one or more later stages.
32 | }
33 | \examples{
34 | if (interactive()) {
35 | # Load example data from the package
36 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
37 | examplecitations <- readRDS(examplecitations_path)
38 |
39 | # Deduplicate citations and compare sources
40 | unique_citations <- dedup_citations(examplecitations)
41 |
42 | unique_citations |>
43 | dplyr::filter(stringr::str_detect(cite_label, "final")) |>
44 | record_level_table(return = "DT")
45 | citation_summary_table(unique_citations, screening_label = c("screened", "final"))
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/man/compare_sources.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/compare.R
3 | \name{compare_sources}
4 | \alias{compare_sources}
5 | \title{Compare duplicate citations across sources, labels, and strings}
6 | \usage{
7 | compare_sources(
8 | unique_data,
9 | comp_type = c("sources", "strings", "labels"),
10 | include_references = FALSE
11 | )
12 | }
13 | \arguments{
14 | \item{unique_data}{from ASySD, merged unique rows with duplicate IDs}
15 |
16 | \item{comp_type}{Specify which fields are to be included. One or more of "sources", "strings" or "labels" - defaults to all.}
17 |
18 | \item{include_references}{Should bibliographic detail be included in return?}
19 | }
20 | \value{
21 | dataframe with indicators of where a citation appears, with sources/labels/strings as columns
22 | }
23 | \description{
24 | Compare duplicate citations across sources, labels, and strings
25 | }
26 | \examples{
27 | if (interactive()) {
28 | # Load example data from the package
29 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
30 | examplecitations <- readRDS(examplecitations_path)
31 |
32 | # Deduplicate citations and compare sources
33 | dedup_results <- dedup_citations(examplecitations)
34 | compare_sources(unique_citations, comp_type = "sources")
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/man/count_unique.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/compare.R
3 | \name{count_unique}
4 | \alias{count_unique}
5 | \title{Count number of unique and non-unique citations from different sources, labels, and strings}
6 | \usage{
7 | count_unique(unique_data, include_references = FALSE)
8 | }
9 | \arguments{
10 | \item{unique_data}{from ASySD, merged unique rows with duplicate IDs}
11 |
12 | \item{include_references}{Should bibliographic detail be included in return?}
13 | }
14 | \value{
15 | dataframe with indicators of where a citation appears, with source/label/string as column
16 | }
17 | \description{
18 | Count number of unique and non-unique citations from different sources, labels, and strings
19 | }
20 | \examples{
21 | # Load example data from the package
22 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
23 | examplecitations <- readRDS(examplecitations_path)
24 |
25 | # Deduplicate citations
26 | dedup_results <- dedup_citations(examplecitations)
27 |
28 | # Count unique and non-unique citations
29 | count_unique(dedup_results)
30 | }
31 |
--------------------------------------------------------------------------------
/man/create_detailed_record_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/new_count_and_table.R
3 | \name{create_detailed_record_table}
4 | \alias{create_detailed_record_table}
5 | \title{Create a Detailed Record Table}
6 | \usage{
7 | create_detailed_record_table(data)
8 | }
9 | \arguments{
10 | \item{data}{A data frame containing the detailed counts for each citation source.
11 | The data frame must include the following columns:
12 | \itemize{
13 | \item \code{Source}: The name of the citation source.
14 | \item \verb{Records Imported}: The total number of records imported from the source.
15 | \item \verb{Distinct Records}: The number of distinct records after deduplication within the source.
16 | \item \verb{Unique Records}: The number of records unique to that source.
17 | \item \verb{Non-unique Records}: The number of records found in at least one other source.
18 | \item \verb{Source Contribution \%}: The percentage contribution of each source to the total distinct records.
19 | \item \verb{Source Unique Contribution \%}: The percentage contribution of each source to the total unique records.
20 | \item \verb{Source Unique \%}: The percentage of records from each source that were unique.
21 | }}
22 | }
23 | \value{
24 | A \code{gt} table object summarizing the detailed record counts for each citation source.
25 | }
26 | \description{
27 | This function generates a formatted summary table using the \code{gt} package,
28 | which displays detailed counts for each citation source. The table includes
29 | columns for the number of records imported, distinct records, unique records,
30 | non-unique records, and various contribution percentages. Data from the
31 | function calculate_detailed_records is pre-formatted for this table.
32 | }
33 | \details{
34 | The function checks for the presence of all required columns in the input data frame.
35 | If any required columns are missing, the function stops and returns an error message
36 | specifying the missing columns. This ensures that the input data is correctly formatted
37 | before attempting to generate the table.
38 |
39 | The generated table includes a header and footnotes that provide additional context
40 | for each column, explaining the meaning of the data presented.
41 | }
42 | \examples{
43 | # Example usage with a sample dataset
44 | sample_data <- data.frame(
45 | Source = c("Source1", "Source2", "Source3", "Total"),
46 | `Records Imported` = c(100, 150, 250, 500),
47 | `Distinct Records` = c(90, 140, 230, 460),
48 | `Unique Records` = c(50, 70, 120, 240),
49 | `Non-unique Records` = c(40, 70, 110, 220),
50 | `Source Contribution \%` = c("39.1\%", "60.9\%", "100\%", "100\%"),
51 | `Source Unique Contribution \%` = c("41.7\%", "58.3\%", "100\%", "100\%"),
52 | `Source Unique \%` = c("55.6\%", "50\%", "52.2\%", "52.2\%"),
53 | check.names = FALSE
54 | )
55 |
56 | # Create the detailed record table
57 | create_detailed_record_table(sample_data)
58 | }
59 |
--------------------------------------------------------------------------------
/man/create_initial_record_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/new_count_and_table.R
3 | \name{create_initial_record_table}
4 | \alias{create_initial_record_table}
5 | \title{Initial Record Table}
6 | \usage{
7 | create_initial_record_table(data)
8 | }
9 | \arguments{
10 | \item{data}{A data frame containing the record counts for each citation source.
11 | It must include columns \code{Source}, \code{Records_Imported}, and \code{Distinct_Records}.}
12 | }
13 | \value{
14 | A \code{gt} table object summarizing the record counts for each citation source.
15 | }
16 | \description{
17 | This function generates a formatted table displaying the record counts
18 | for each citation source, including the number of records imported and
19 | the distinct records after deduplication.
20 | }
21 | \details{
22 | The function checks if the input data frame is empty and returns an empty \code{gt} table
23 | if no data is present. Otherwise, it generates a formatted table with labeled columns
24 | and adds footnotes explaining the meaning of each column.
25 | }
26 | \examples{
27 | # Example usage with a sample dataset
28 | sample_data <- data.frame(
29 | Source = c("Source1", "Source2", "Source3"),
30 | Records_Imported = c(100, 150, 250),
31 | Distinct_Records = c(90, 140, 230)
32 | )
33 | create_initial_record_table(sample_data)
34 | }
35 |
--------------------------------------------------------------------------------
/man/create_precision_sensitivity_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/new_count_and_table.R
3 | \name{create_precision_sensitivity_table}
4 | \alias{create_precision_sensitivity_table}
5 | \title{Count and Precision/Sensitivity Table}
6 | \usage{
7 | create_precision_sensitivity_table(data)
8 | }
9 | \arguments{
10 | \item{data}{A data frame containing phase-specific counts and calculated metrics
11 | for each citation source. It must include columns such as \code{Source},
12 | \code{Distinct_Records}, \code{final}, \code{Precision}, \code{Recall}, and optionally \code{screened}.}
13 | }
14 | \value{
15 | A \code{gt} table object summarizing the precision and sensitivity
16 | metrics for each citation source, with relevant footnotes and labels.
17 | }
18 | \description{
19 | This function generates a formatted table that displays the precision
20 | and sensitivity (recall) metrics for each citation source, along with
21 | distinct records and phase-specific counts such as "screened" and "final".
22 | }
23 | \details{
24 | The function first checks whether all values in the \code{screened} column are zero.
25 | If so, the column is removed from the table. The table is then generated
26 | using the \code{gt} package, with labeled columns and footnotes explaining the metrics.
27 | }
28 | \examples{
29 | # Example usage with a sample dataset
30 | sample_data <- data.frame(
31 | Source = c("Source1", "Source2", "Total"),
32 | Distinct_Records = c(100, 150, 250),
33 | final = c(80, 120, 200),
34 | Precision = c(80.0, 80.0, 80.0),
35 | Recall = c(40.0, 60.0, 100.0),
36 | screened = c(90, 140, 230)
37 | )
38 | create_precision_sensitivity_table(sample_data)
39 | }
40 |
--------------------------------------------------------------------------------
/man/dedup_citations.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dedup.R
3 | \name{dedup_citations}
4 | \alias{dedup_citations}
5 | \title{Deduplicate citations - ASySD wrapper}
6 | \usage{
7 | dedup_citations(raw_citations, manual = FALSE, show_unknown_tags = FALSE)
8 | }
9 | \arguments{
10 | \item{raw_citations}{Citation dataframe with relevant columns}
11 |
12 | \item{manual}{logical. If TRUE, manually specify pairs of duplicates to merge. Default is FALSE.}
13 |
14 | \item{show_unknown_tags}{When a label, source, or other merged field is missing, do you want this to show as "unknown"?}
15 | }
16 | \value{
17 | unique citations formatted for CiteSource
18 | }
19 | \description{
20 | This function deduplicates citation data. Note that duplicates are assumed to published
21 | in the same journal, so pre-prints and similar results will not be identified here.
22 | }
23 | \examples{
24 | # Load example data from the package
25 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
26 | examplecitations <- readRDS(examplecitations_path)
27 |
28 | # Deduplicate citations without manually specifying pairs and without showing unknown tags
29 | dedup_results <- dedup_citations(examplecitations)
30 |
31 | # Deduplicate citations with manual specification of pairs and showing unknown tags
32 | dedup_results_manual_unknown <- dedup_citations(
33 | examplecitations,
34 | manual = TRUE,
35 | show_unknown_tags = TRUE
36 | )
37 | }
38 |
--------------------------------------------------------------------------------
/man/dedup_citations_add_manual.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dedup.R
3 | \name{dedup_citations_add_manual}
4 | \alias{dedup_citations_add_manual}
5 | \title{Remove pairs with manual dedup - ASySD wrapper}
6 | \usage{
7 | dedup_citations_add_manual(unique_citations, additional_pairs)
8 | }
9 | \arguments{
10 | \item{unique_citations}{Unique citations post deduplication}
11 |
12 | \item{additional_pairs}{TRUE duplicate pairs}
13 | }
14 | \value{
15 | unique citations formatted for CiteSource
16 | }
17 | \description{
18 | This function deduplicates citation data. Note that duplicates are assumed to published
19 | in the same journal, so pre-prints and similar results will not be identified here.
20 | }
21 | \examples{
22 | # Load example data from the package
23 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
24 | examplecitations <- readRDS(examplecitations_path)
25 |
26 | # Deduplicate citations
27 | dedup_results <- dedup_citations(examplecitations)
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/man/detect_.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/import_export_helpers.R
3 | \name{detect_}
4 | \alias{detect_}
5 | \alias{detect_parser}
6 | \alias{detect_delimiter}
7 | \alias{detect_lookup}
8 | \alias{detect_year}
9 | \title{Detect file formatting information}
10 | \usage{
11 | detect_parser(x)
12 |
13 | detect_delimiter(x)
14 |
15 | detect_lookup(tags)
16 |
17 | detect_year(df)
18 | }
19 | \arguments{
20 | \item{x}{A character vector containing bibliographic data}
21 |
22 | \item{tags}{A character vector containing RIS tags.}
23 |
24 | \item{df}{a data.frame containing bibliographic data}
25 | }
26 | \value{
27 | \code{detect_parser} and \code{detect_delimiter} return a length-1 character; \code{detect_year} returns a character vector listing estimated publication years; and \code{detect_lookup} returns a \code{data.frame}.
28 | }
29 | \description{
30 | Bibliographic data can be stored in a number of different file types, meaning that detecting consistent attributes of those files is necessary if they are to be parsed accurately. These functions attempt to identify some of those key file attributes. Specifically, \code{detect_parser} determines which \code{\link{parse_}} function to use; \code{detect_delimiter} and \code{detect_lookup} identify different attributes of RIS files; and \code{detect_year} attempts to fill gaps in publication years from other information stored in a \code{data.frame}.
31 | }
32 |
--------------------------------------------------------------------------------
/man/export_bib.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/export.R
3 | \name{export_bib}
4 | \alias{export_bib}
5 | \title{Export deduplicated citations to .bib file}
6 | \usage{
7 | export_bib(
8 | citations,
9 | filename = "citations.bib",
10 | include = c("sources", "labels", "strings")
11 | )
12 | }
13 | \arguments{
14 | \item{citations}{Dataframe with unique citations, resulting from \code{dedup_citations()}}
15 |
16 | \item{filename}{Name (and path) of file, should end in .ris}
17 |
18 | \item{include}{Character. One or more of sources, labels or strings}
19 | }
20 | \description{
21 | This function saves deduplicated citations as a BibTex file with sources, labels and strings
22 | included in the \code{note} field (if they were initially provided for any of the citations). Therefore,
23 | beware that \strong{any \code{note} field that might be included in \code{citations} will be overwritten}. Also note that
24 | \emph{existing files are overwritten without warning.}
25 | }
26 | \examples{
27 | if (interactive()) {
28 | # Load example data from the package
29 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
30 | examplecitations <- readRDS(examplecitations_path)
31 | dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE)
32 | export_bib(dedup_results$unique, "cite_sources.bib", include = "sources")
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/man/export_csv.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/export.R
3 | \name{export_csv}
4 | \alias{export_csv}
5 | \title{Export deduplicated citations with source data as CSV file}
6 | \usage{
7 | export_csv(
8 | unique_citations,
9 | filename = "citesource_exported_citations.csv",
10 | separate = NULL,
11 | trim_abstracts = 32000
12 | )
13 | }
14 | \arguments{
15 | \item{unique_citations}{Dataframe with unique citations, resulting from \code{dedup_citations()}}
16 |
17 | \item{filename}{Name (and path) of file, should end in .csv}
18 |
19 | \item{separate}{Character vector indicating which (if any) of cite_source, cite_string and cite_label should be split into separate columns to facilitate further analysis.}
20 |
21 | \item{trim_abstracts}{Some databases may return full-text that is misidentified as an abstract. This inflates file size and may lead to issues with Excel,
22 | which cannot deal with more than 32,000 characters per field. Therefore, the default is to trim very long abstracts to 32,000 characters. Set a lower number to reduce file size, or
23 | NULL to retain abstracts as they are.}
24 | }
25 | \value{
26 | The function saves the deduplicated citations as a CSV file to the specified location.
27 | }
28 | \description{
29 | This function saves deduplicated citations as a CSV file for further analysis and/or reporting.
30 | Metadata can be separated into one column per source, label or string, which facilitates analysis.
31 | Note that \emph{existing files are overwritten without warning.}
32 | }
33 | \examples{
34 | if (interactive()) {
35 | # Load example data from the package
36 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
37 | examplecitations <- readRDS(examplecitations_path)
38 | dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE)
39 | export_csv(dedup_results, "cite_sources.csv", separate = "cite_source")
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/man/export_ris.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/export.R
3 | \name{export_ris}
4 | \alias{export_ris}
5 | \title{Export data frame to RIS file}
6 | \usage{
7 | export_ris(
8 | citations,
9 | filename = "citations.ris",
10 | source_field = "DB",
11 | label_field = "C7",
12 | string_field = "C8"
13 | )
14 | }
15 | \arguments{
16 | \item{citations}{Dataframe to be exported to RIS file}
17 |
18 | \item{filename}{Name (and path) of file, should end in .ris}
19 |
20 | \item{source_field}{Field in \code{citations} representing the source. Default is "DB".}
21 |
22 | \item{label_field}{Field in \code{citations} representing the label. Default is "C7".}
23 |
24 | \item{string_field}{Field in \code{citations} representing additional string information. Default is "C8".}
25 | }
26 | \description{
27 | This function saves a data frame as a RIS file with specified columns mapped to RIS fields. Note that
28 | \emph{existing files are overwritten without warning.}
29 | }
30 | \examples{
31 | if (interactive()) {
32 | # Load example data from the package
33 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
34 | examplecitations <- readRDS(examplecitations_path)
35 | dedup_results <- dedup_citations(examplecitations, merge_citations = TRUE)
36 | export_ris(
37 | dedup_results$unique,
38 | "cite_sources.ris",
39 | user_mapping = list(
40 | "DB" = "cite_source_include",
41 | "C7" = "cite_label_include"
42 | )
43 | )
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/man/merge_columns.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/import_export_helpers.R
3 | \name{merge_columns}
4 | \alias{merge_columns}
5 | \title{Bind two or more data frames with different columns}
6 | \usage{
7 | merge_columns(x, y)
8 | }
9 | \arguments{
10 | \item{x}{Either a data.frame or a list of data.frames.}
11 |
12 | \item{y}{A data.frame, optional if x is a list.}
13 | }
14 | \value{
15 | Returns a single data.frame with all the input data frames merged.
16 | }
17 | \description{
18 | Takes two or more data.frames with different column names or different column orders and binds them to a single data.frame.
19 | }
20 |
--------------------------------------------------------------------------------
/man/parse_.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/import_export_helpers.R
3 | \name{parse_}
4 | \alias{parse_}
5 | \alias{parse_pubmed}
6 | \alias{parse_ris}
7 | \alias{parse_bibtex}
8 | \alias{parse_csv}
9 | \alias{parse_tsv}
10 | \title{Parse bibliographic text in a variety of formats}
11 | \usage{
12 | parse_pubmed(x)
13 |
14 | parse_ris(x, tag_naming = "best_guess")
15 |
16 | parse_bibtex(x)
17 |
18 | parse_csv(x)
19 |
20 | parse_tsv(x)
21 | }
22 | \arguments{
23 | \item{x}{A character vector containing bibliographic information in ris format.}
24 |
25 | \item{tag_naming}{What format are ris tags in? Defaults to "best_guess" See \code{\link{synthesisr_read_refs}} for a list of accepted arguments.}
26 | }
27 | \value{
28 | Returns an object of class \code{bibliography} (ris, bib, or pubmed formats) or \code{data.frame} (csv or tsv).
29 | }
30 | \description{
31 | Text in standard formats - such as imported via \code{\link{readLines}} - can be parsed using a variety of standard formats. Use \code{\link{detect_parser}} to determine which is the most appropriate parser for your situation.
32 | }
33 |
--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CiteSource.R
3 | \name{\%>\%}
4 | \alias{\%>\%}
5 | \title{Pipe operator}
6 | \usage{
7 | lhs \%>\% rhs
8 | }
9 | \arguments{
10 | \item{lhs}{A value or the magrittr placeholder.}
11 |
12 | \item{rhs}{A function call using the magrittr semantics.}
13 | }
14 | \value{
15 | The result of calling \code{rhs(lhs)}.
16 | }
17 | \description{
18 | Pipe operator
19 | }
20 | \keyword{internal}
21 |
--------------------------------------------------------------------------------
/man/plot_contributions.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plots.R
3 | \name{plot_contributions}
4 | \alias{plot_contributions}
5 | \title{Create a bar chart that compares source contributions over stages}
6 | \usage{
7 | plot_contributions(
8 | data,
9 | facets = cite_source,
10 | bars = cite_label,
11 | color = type,
12 | center = FALSE,
13 | bar_order = "keep",
14 | facet_order = "keep",
15 | color_order = "keep",
16 | totals_in_legend = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{A tibble with one hit per row, with variables indicating meta-data of interest.}
21 |
22 | \item{facets}{Variable in data used for facets (i.e. sub-plots). Defaults to source (i.e. cite_source). Specify NULL to refrain from faceting.}
23 |
24 | \item{bars}{Variable in data used for bars. Defaults to label (i.e. cite_label)}
25 |
26 | \item{color}{Color used to fill bars. Default to \code{unique}}
27 |
28 | \item{center}{Logical. Should one color be above and one below the axis?}
29 |
30 | \item{bar_order}{Character. Order of bars within each facet, any levels not specified will follow at the end. If "keep", then this is based on factor levels (or the first value) in the input data.}
31 |
32 | \item{facet_order}{Character. Order of facets. Any levels not specified will follow at the end.}
33 |
34 | \item{color_order}{Character. Order of values on the color scale.}
35 |
36 | \item{totals_in_legend}{Logical. Should totals be shown in legend (e.g. as Unique (N = 1234))}
37 | }
38 | \description{
39 | Create a faceted plot that shows unique contributions and duplicated records across
40 | two metadata dimensions. Most typical use-case might be to show the contributions of each source
41 | across different screening stages.
42 | }
43 | \examples{
44 | data <- data.frame(
45 | article_id = 1:100,
46 | cite_source = sample(c("DB 1", "DB 2", "DB 3"), 100, replace = TRUE),
47 | cite_label = sample(c("2020", "2021", "2022"), 100, replace = TRUE),
48 | type = c("unique", "duplicated")[rbinom(100, 1, .7) + 1]
49 | )
50 |
51 | plot_contributions(data,
52 | center = TRUE, bar_order = c("2022", "2021", "2020"),
53 | color_order = c("unique", "duplicated")
54 | )
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/man/plot_source_overlap_heatmap.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plots.R
3 | \name{plot_source_overlap_heatmap}
4 | \alias{plot_source_overlap_heatmap}
5 | \title{Create a heatmap matrix showing the overlap between sources}
6 | \usage{
7 | plot_source_overlap_heatmap(
8 | data,
9 | cells = "source",
10 | facets = NULL,
11 | plot_type = c("counts", "percentages"),
12 | sort_sources = TRUE,
13 | interactive = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{data}{A tibble with one record per row, an id column and then one column
18 | per source indicating whether the record was found in that source (usually obtained from \code{compare_sources()})}
19 |
20 | \item{cells}{Variable to display in the cells. Should be 'source', 'label' or 'string'}
21 |
22 | \item{facets}{Variable in data used for facets (i.e. sub-plots). Should be NULL, 'source', 'label' or 'string'}
23 |
24 | \item{plot_type}{Either \code{counts} (number of shared records) or \code{percentages}
25 | (share of overlapping records).}
26 |
27 | \item{sort_sources}{Should sources be shown based on the number of records they
28 | contained? If FALSE, order of data is retained.}
29 |
30 | \item{interactive}{Should returned plot be interactive and enable user to export
31 | records underlying each field?}
32 | }
33 | \value{
34 | The requested plot as a either a \code{ggplot2} object (when interactive = FALSE), which can then be
35 | further formatted or saved using \code{\link[ggplot2:ggsave]{ggplot2::ggsave()}}, or a \code{plotly} object when \code{interactive = TRUE}
36 | }
37 | \description{
38 | Show overlap between different record sources, either by showing the
39 | number or the percentages of shared records between any pair of sources.
40 | }
41 | \examples{
42 | data <- data.frame(
43 | article_id = 1:500,
44 | source__source1 = rbinom(500, 1, .5) == 1,
45 | source__source2 = rbinom(500, 1, .2) == 1,
46 | source__source3 = rbinom(500, 1, .1) == 1,
47 | source__source4 = rbinom(500, 1, .6) == 1,
48 | source__source5 = rbinom(500, 1, .7) == 1
49 | )
50 |
51 | plot_source_overlap_heatmap(data)
52 | plot_source_overlap_heatmap(data, plot_type = "percentages")
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/man/plot_source_overlap_upset.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plots.R
3 | \name{plot_source_overlap_upset}
4 | \alias{plot_source_overlap_upset}
5 | \title{Create an UpSetR upset plot showing the overlap between sources}
6 | \usage{
7 | plot_source_overlap_upset(
8 | data,
9 | groups = "source",
10 | nsets = NULL,
11 | sets.x.label = "Number of records",
12 | mainbar.y.label = "Overlapping record count",
13 | order.by = c("freq", "degree"),
14 | ...
15 | )
16 | }
17 | \arguments{
18 | \item{data}{A tibble with one record per row, an id column and then one column
19 | per source indicating whether the record was found in that source.}
20 |
21 | \item{groups}{Variable to use as groups. Should be 'source', 'label' or 'string' - defaults to source.}
22 |
23 | \item{nsets}{Number of sets to look at}
24 |
25 | \item{sets.x.label}{The x-axis label of the set size bar plot}
26 |
27 | \item{mainbar.y.label}{The y-axis label of the intersection size bar plot}
28 |
29 | \item{order.by}{How the intersections in the matrix should be ordered by. Options include frequency (entered as "freq"), degree, or both in any order.}
30 |
31 | \item{...}{
32 | Arguments passed on to \code{\link[UpSetR:upset]{UpSetR::upset}}
33 | \describe{
34 | \item{\code{nintersects}}{Number of intersections to plot. If set to NA, all intersections will be plotted.}
35 | \item{\code{sets}}{Specific sets to look at (Include as combinations. Ex: c("Name1", "Name2"))}
36 | \item{\code{keep.order}}{Keep sets in the order entered using the sets parameter. The default is FALSE, which orders the sets by their sizes.}
37 | \item{\code{set.metadata}}{Metadata that offers insight to an attribute of the sets. Input should be a data frame where the first column is set names, and the
38 | remaining columns are attributes of those sets. To learn how to use this parameter it is highly suggested to view the set metadata vignette. The link
39 | can be found on the package's GitHub page.}
40 | \item{\code{intersections}}{Specific intersections to include in plot entered as a list of lists.
41 | Ex: list(list("Set name1", "Set name2"), list("Set name1", "Set name3")). If data is entered into this parameter the only data shown on the UpSet plot
42 | will be the specific intersections listed.}
43 | \item{\code{matrix.color}}{Color of the intersection points}
44 | \item{\code{main.bar.color}}{Color of the main bar plot}
45 | \item{\code{mainbar.y.max}}{The maximum y value of the intersection size bar plot scale. May be useful when aligning multiple UpSet plots horizontally.}
46 | \item{\code{sets.bar.color}}{Color of set size bar plot}
47 | \item{\code{point.size}}{Size of points in matrix plot}
48 | \item{\code{line.size}}{Width of lines in matrix plot}
49 | \item{\code{mb.ratio}}{Ratio between matrix plot and main bar plot (Keep in terms of hundredths)}
50 | \item{\code{expression}}{Expression to subset attributes of intersection or element query data. Enter as string (Ex: "ColName > 3")}
51 | \item{\code{att.pos}}{Position of attribute plot. If NULL or "bottom" the plot will be at below UpSet plot. If "top" it will be above UpSet plot}
52 | \item{\code{att.color}}{Color of attribute histogram bins or scatterplot points for unqueried data represented by main bars. Default set to color of main bars.}
53 | \item{\code{decreasing}}{How the variables in order.by should be ordered. "freq" is decreasing (greatest to least) and "degree" is increasing (least to greatest)}
54 | \item{\code{show.numbers}}{Show numbers of intersection sizes above bars}
55 | \item{\code{number.angles}}{The angle of the numbers atop the intersection size bars}
56 | \item{\code{group.by}}{How the data should be grouped ("degree" or "sets")}
57 | \item{\code{cutoff}}{The number of intersections from each set (to cut off at) when aggregating by sets}
58 | \item{\code{queries}}{Unified query of intersections, elements, and custom row functions. Entered as a list that contains a list of
59 | queries. query is the type of query being conducted. params are the parameters of the query (if any). color is the color of the points on the
60 | plot that will represent the query. If no color is selected one will be provided automatically. active takes TRUE or FALSE, and if
61 | TRUE, it will overlay the bars present with the results from the query. If FALSE a tick mark will indicate the intersection size.
62 | See examples section on how to do this.}
63 | \item{\code{query.legend}}{Position query legend on top or bottom of UpSet plot}
64 | \item{\code{shade.color}}{Color of row shading in matrix}
65 | \item{\code{shade.alpha}}{Transparency of shading in matrix}
66 | \item{\code{matrix.dot.alpha}}{Transparency of the empty intersections points in the matrix}
67 | \item{\code{empty.intersections}}{Additionally display empty sets up to nintersects}
68 | \item{\code{color.pal}}{Color palette for attribute plots}
69 | \item{\code{boxplot.summary}}{Boxplots representing the distribution of a selected attribute for each intersection. Select attributes by entering a character vector of attribute names (e.g. c("Name1", "Name2")).
70 | The maximum number of attributes that can be entered is 2.}
71 | \item{\code{attribute.plots}}{Create custom ggplot using intersection data represented in the main bar plot. Prior to adding custom plots, the UpSet plot is set up in a 100 by 100 grid.
72 | The attribute.plots parameter takes a list that contains the number of rows that should be allocated for the custom plot, and a list of plots with specified positions.
73 | nrows is the number of rows the custom plots should take up. There is already 100 allocated for the custom plot. plots takes a list that contains a function that returns
74 | a custom ggplot and the x and y aesthetics for the function. ncols is the number of columns that your ggplots should take up. See examples for how to add custom ggplots.}
75 | \item{\code{scale.intersections}}{The scale to be used for the intersection sizes. Options: "identity", "log10", "log2"}
76 | \item{\code{scale.sets}}{The scale to be used for the set sizes. Options: "identity", "log10", "log2"}
77 | \item{\code{text.scale}}{Numeric, value to scale the text sizes, applies to all axis labels, tick labels, and numbers above bar plot. Can be a universal scale, or a vector containing individual scales
78 | in the following format: c(intersection size title, intersection size tick labels, set size title, set size tick labels, set names, numbers above bars)}
79 | \item{\code{set_size.angles}}{Numeric, angle to rotate the set size plot x-axis text}
80 | \item{\code{set_size.show}}{Logical, display the set sizes on the set size bar chart}
81 | \item{\code{set_size.numbers_size}}{If set_size.show is TRUE, adjust the size of the numbers}
82 | \item{\code{set_size.scale_max}}{Increase the maximum of set size scale}
83 | }}
84 | }
85 | \description{
86 | Show records found in specific sets of sources to identify the unique contribution
87 | of each source and of any subsets
88 | }
89 | \examples{
90 | data <- data.frame(
91 | article_id = 1:500,
92 | source__source1 = rbinom(500, 1, .5) == 1,
93 | source__source2 = rbinom(500, 1, .2) == 1,
94 | source__source3 = rbinom(500, 1, .1) == 1,
95 | source__source4 = rbinom(500, 1, .6) == 1,
96 | source__source5 = rbinom(500, 1, .7) == 1
97 | )
98 |
99 | plot_source_overlap_upset(data)
100 |
101 | # To start with the records shared among the greatest number of sources, use
102 |
103 | plot_source_overlap_upset(data, decreasing = c(TRUE, TRUE))
104 |
105 | }
106 | \references{
107 | Conway, J. R., Lex, A., & Gehlenborg, N. (2017). UpSetR: an R package for the visualization of intersecting sets and their properties. Bioinformatics.
108 | }
109 |
--------------------------------------------------------------------------------
/man/precision_sensitivity_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tables.R
3 | \name{precision_sensitivity_table}
4 | \alias{precision_sensitivity_table}
5 | \title{precision_sensitivity_table}
6 | \usage{
7 | precision_sensitivity_table(data)
8 | }
9 | \arguments{
10 | \item{data}{A data.frame. The dataset to build the table from.
11 | It should contain the columns 'screened', 'final', 'Precision', 'Recall'.}
12 | }
13 | \value{
14 | A gt object representing the table.
15 | }
16 | \description{
17 | This function creates a gt table from a given data, and
18 | removes the 'screened' column and its associated footnotes if all its values are zero.
19 | }
20 |
--------------------------------------------------------------------------------
/man/read_citations.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/import.R
3 | \name{read_citations}
4 | \alias{read_citations}
5 | \title{Import citations from file}
6 | \usage{
7 | read_citations(
8 | files = NULL,
9 | cite_sources = NULL,
10 | cite_strings = NULL,
11 | cite_labels = NULL,
12 | metadata = NULL,
13 | verbose = TRUE,
14 | tag_naming = "best_guess",
15 | only_key_fields = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{files}{One or multiple RIS or Bibtex files with citations.
20 | Should be .bib or .ris files}
21 |
22 | \item{cite_sources}{The origin of the citation files (e.g. "Scopus", "WOS", "Medline") - vector with one value per file, defaults to file names.}
23 |
24 | \item{cite_strings}{Optional. The search string used (or another grouping to analyse) - vector with one value per file}
25 |
26 | \item{cite_labels}{Optional. An additional label per file, for instance the stage of search - vector with one value per file}
27 |
28 | \item{metadata}{A tibble with file names and metadata for each file. Can be specified as an \emph{alternative} to files, cite_sources, cite_strings and cite_labels.}
29 |
30 | \item{verbose}{Should number of reference and allocation of labels be reported?}
31 |
32 | \item{tag_naming}{Either a length-1 character stating how should ris tags be replaced (see details for a list of options), or an object inheriting from class \code{data.frame} containing user-defined replacement tags.}
33 |
34 | \item{only_key_fields}{Should only key fields (e.g., those used by CiteCourse) be imported? If FALSE, all RIS data is retained. Can also be a character vector of field names to retain (after they have been renamed by the import function) in addition to the essential ones.}
35 | }
36 | \value{
37 | A tibble with one row per citation
38 | }
39 | \description{
40 | This function imports RIS and Bibtex files with citations and merges them
41 | into one long tibble with one record per line.
42 | }
43 | \examples{
44 | if (interactive()) {
45 | # Import only key fields from the RIS files
46 | read_citations(c("res.ris", "res.bib"),
47 | cite_sources = c("CINAHL", "MEDLINE"),
48 | cite_strings = c("Search1", "Search2"),
49 | cite_labels = c("raw", "screened"),
50 | only_key_fields = TRUE
51 | )
52 |
53 | # or equivalently
54 | metadata_tbl_key_fields <- tibble::tribble(
55 | ~files, ~cite_sources, ~cite_strings, ~cite_labels, ~only_key_fields,
56 | "res.ris", "CINAHL", "Search1", "raw", TRUE,
57 | "res.bib", "MEDLINE", "Search2", "screened", TRUE
58 | )
59 |
60 | read_citations(metadata = metadata_tbl_key_fields)
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/man/record_counts.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/count.R
3 | \name{record_counts}
4 | \alias{record_counts}
5 | \title{Record counts function
6 | Calculate and combine counts of distinct records and imported records for each database}
7 | \usage{
8 | record_counts(unique_citations, citations, db_colname)
9 | }
10 | \arguments{
11 | \item{unique_citations}{Dataframe. The dataframe for calculating distinct records count.}
12 |
13 | \item{citations}{Dataframe. The dataframe for calculating records imported count.}
14 |
15 | \item{db_colname}{Character. The name of the column containing the database source information.}
16 | }
17 | \value{
18 | A dataframe with counts of distinct records and imported records for each source, including total counts.
19 | }
20 | \description{
21 | This function calculates the counts of distinct records and records imported for each database source.
22 | It combines these counts into one dataframe and calculates the total for each count type.
23 | }
24 | \examples{
25 | # Create synthetic data for example
26 | unique_citations <- data.frame(
27 | title = paste("Article", 1:10),
28 | db_source = sample(c("Database 1", "Database 2", "Database 3"), 10, replace = TRUE),
29 | stringsAsFactors = FALSE
30 | )
31 |
32 | citations <- data.frame(
33 | title = paste("Article", 1:20),
34 | db_source = sample(c("Database 1", "Database 2", "Database 3"), 20, replace = TRUE),
35 | stringsAsFactors = FALSE
36 | )
37 |
38 | # Use the synthetic data with the function
39 | result <- record_counts(unique_citations, citations, "db_source")
40 | result
41 | }
42 |
--------------------------------------------------------------------------------
/man/record_counts_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tables.R
3 | \name{record_counts_table}
4 | \alias{record_counts_table}
5 | \title{record_counts_table}
6 | \usage{
7 | record_counts_table(data)
8 | }
9 | \arguments{
10 | \item{data}{A data frame that must contain the columns "Source", "Records Imported",
11 | and "Distinct Records". The "Source" column is used as the row names of the table.}
12 | }
13 | \value{
14 | A gt object representing the table.
15 | }
16 | \description{
17 | This function creates a table with footnotes for columns in the table.
18 | It uses the gt package to create the table and adds footnotes to
19 | the "Records Imported" and "Distinct Records" columns.
20 | }
21 |
--------------------------------------------------------------------------------
/man/record_level_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tables.R
3 | \name{record_level_table}
4 | \alias{record_level_table}
5 | \title{Record-level table}
6 | \usage{
7 | record_level_table(
8 | citations,
9 | include = "sources",
10 | include_empty = TRUE,
11 | return = c("tibble", "DT"),
12 | indicator_presence = NULL,
13 | indicator_absence = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{citations}{A deduplicated tibble as returned by \code{dedup_citations()}.}
18 |
19 | \item{include}{Which metadata should be included in the table? Defaults to 'sources', can be replaced or expanded with 'labels' and/or 'strings'}
20 |
21 | \item{include_empty}{Should records with empty metadata (e.g., no information on 'sources') be included in the table? Defaults to FALSE.}
22 |
23 | \item{return}{Either a \code{tibble} that can be exported, e.g. as a csv, or a DataTable (\code{DT}) that allows for interactive exploration. Note that the DataTable allows
24 | users to download a .csv file; in that file, presence and absence is always indicated as TRUE and FALSE to prevent issues with character encodings.}
25 |
26 | \item{indicator_presence}{How should it be indicated that a value is present in a source/label/string? Defaults to TRUE in tibbles and a tickmark in DT tables}
27 |
28 | \item{indicator_absence}{How should it be indicated that a value is \emph{not} present in a source/label/string? Defaults to FALSE in tibbles and a cross in DT tables}
29 | }
30 | \value{
31 | A tibble or DataTable containing the per-record table that shows which sources (and/or labels/strings) each item was found in.
32 | }
33 | \description{
34 | Creates a per-record table that shows which sources (and/or labels/strings) each item was found in.
35 | }
36 | \examples{
37 | # Load example data from the package
38 | examplecitations_path <- system.file("extdata", "examplecitations.rds", package = "CiteSource")
39 | examplecitations <- readRDS(examplecitations_path)
40 |
41 | # Deduplicate citations and compare sources
42 | unique_citations <- dedup_citations(examplecitations)
43 |
44 | unique_citations |>
45 | dplyr::filter(stringr::str_detect(cite_label, "final")) |>
46 | record_level_table(return = "DT")
47 | }
48 |
--------------------------------------------------------------------------------
/man/record_summary_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tables.R
3 | \name{record_summary_table}
4 | \alias{record_summary_table}
5 | \title{search_summary_table}
6 | \usage{
7 | record_summary_table(data)
8 | }
9 | \arguments{
10 | \item{data}{A data frame that must contain the columns "Source", "Records Imported",
11 | "Distinct Records", "Unique records", "Non-unique Records", "Source Contribution \%",
12 | "Source Unique Contribution \%", and "Source Unique \%". The "Source" column is used as the row names of the table.}
13 | }
14 | \value{
15 | A gt object representing the table.
16 | }
17 | \description{
18 | This function creates a table with footnotes for columns in the table.
19 | It uses the gt package to create the table and adds footnotes to various columns.
20 | }
21 |
--------------------------------------------------------------------------------
/man/reimport_csv.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reimport.R
3 | \name{reimport_csv}
4 | \alias{reimport_csv}
5 | \title{Reimport a CSV-file exported from CiteSource}
6 | \usage{
7 | reimport_csv(filename)
8 | }
9 | \arguments{
10 | \item{filename}{Name (and path) of CSV file to be reimported, should end in .csv}
11 | }
12 | \value{
13 | A data frame containing the imported citation data if all required columns are present.
14 | }
15 | \description{
16 | This function reimports a csv file that was tagged and deduplicated by CiteSource.
17 | It allows to continue with further analyses without repeating that step, and also
18 | allows users to make any manual corrections to tagging or deduplication. Note that
19 | this function only works on CSV files that were written with \code{export_csv(..., separate = NULL)}
20 | }
21 | \examples{
22 | \dontrun{
23 | #example usage
24 | citations <- reimport_csv("path/to/citations.csv")
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/man/reimport_ris.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reimport.R
3 | \name{reimport_ris}
4 | \alias{reimport_ris}
5 | \title{Reimport a RIS-file exported from CiteSource}
6 | \usage{
7 | reimport_ris(
8 | filename = "citations.ris",
9 | source_field = "DB",
10 | label_field = "C7",
11 | string_field = "C8",
12 | duplicate_id_field = "C1",
13 | record_id_field = "C2",
14 | tag_naming = "ris_synthesisr",
15 | verbose = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{filename}{Name (and path) of RIS file to be reimported, should end in .ris}
20 |
21 | \item{source_field}{Character. Which RIS field should cite_sources be read from? NULL to set to missing}
22 |
23 | \item{label_field}{Character. Which RIS field should cite_labels be read from? NULL to set to missing}
24 |
25 | \item{string_field}{Character. Which RIS field should cite_strings be read from? NULL to set to missing}
26 |
27 | \item{duplicate_id_field}{Character. Which RIS field should duplicate IDs be read from? NULL to recreate based on row number (note that neither duplicate nor record IDs directly affect CiteSource analyses - they can only allow you to connect processed data with raw data)}
28 |
29 | \item{record_id_field}{Character. Which RIS field should record IDs be read from? NULL to recreate based on row number}
30 |
31 | \item{tag_naming}{Synthesisr option specifying how RIS tags should be replaced with names. This should not
32 | be changed when using this function to reimport a file exported from CiteSource. If you import your own
33 | RIS, check \code{names(CiteSource:::synthesisr_code_lookup)} and select any of the options that start with \code{ris_}}
34 |
35 | \item{verbose}{Should confirmation message be displayed?}
36 | }
37 | \description{
38 | This function reimports a RIS file that was tagged and deduplicated by CiteSource.
39 | It allows to continue with further analyses without repeating that step, and also
40 | allows users to make any manual corrections to tagging or deduplication. The function
41 | can also be used to replace the import step (for instance if tags are to be added to
42 | individual citations rather than entire files) - in this case, just call \code{dedup_citations()}
43 | after the import.
44 | }
45 | \details{
46 | Note that this functions defaults' are based on those in \code{export_ris()} so that these functions
47 | can easily be combined.
48 | }
49 | \examples{
50 | if (interactive()) {
51 | dedup_results <- dedup_citations(citations, merge_citations = TRUE)
52 | export_ris(dedup_results$unique, "citations.ris")
53 | unique_citations2 <- reimport_ris("citations.ris")
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/man/runShiny.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runShiny.R
3 | \name{runShiny}
4 | \alias{runShiny}
5 | \alias{run_shiny}
6 | \title{A wrapper function to run Shiny Apps from \code{CiteSource}.}
7 | \usage{
8 | runShiny(app = "CiteSource", offer_install = interactive())
9 | }
10 | \arguments{
11 | \item{app}{Defaults to CiteSource - possibly other apps will be included in the future}
12 |
13 | \item{offer_install}{Should user be prompted to install required packages if they are missing?}
14 | }
15 | \value{
16 | CiteSource shiny app
17 | }
18 | \description{
19 | Running this function will launch the CiteSource shiny app
20 | }
21 | \examples{
22 | if (interactive()) {
23 | # To run the CiteSource Shiny app:
24 | runShiny()
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/man/synthesisr_read_refs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/import_export_helpers.R
3 | \name{synthesisr_read_refs}
4 | \alias{synthesisr_read_refs}
5 | \alias{read_ref}
6 | \title{Import bibliographic search results}
7 | \usage{
8 | synthesisr_read_refs(
9 | filename,
10 | tag_naming = "best_guess",
11 | return_df = TRUE,
12 | verbose = FALSE,
13 | select_fields = NULL
14 | )
15 |
16 | read_ref(
17 | filename,
18 | tag_naming = "best_guess",
19 | return_df = TRUE,
20 | verbose = FALSE,
21 | select_fields = NULL
22 | )
23 | }
24 | \arguments{
25 | \item{filename}{A path to a filename or vector of filenames containing search results to import.}
26 |
27 | \item{tag_naming}{Either a length-1 character stating how should ris tags be replaced (see details for a list of options), or an object inheriting from class \code{data.frame} containing user-defined replacement tags.}
28 |
29 | \item{return_df}{If TRUE (default), returns a data.frame; if FALSE, returns a list.}
30 |
31 | \item{verbose}{If TRUE, prints status updates (defaults to FALSE).}
32 |
33 | \item{select_fields}{Character vector of fields to be retained. If NULL, all fields from the RIS file are returned}
34 | }
35 | \value{
36 | Returns a data.frame or list of assembled search results.
37 | }
38 | \description{
39 | Imports common bibliographic reference formats (i.e. .bib, .ris, or .txt).
40 | }
41 | \details{
42 | The default for argument \code{tag_naming} is \code{"best_guess"}, which estimates what database has been used for ris tag replacement, then fills any gaps with generic tags. Any tags missing from the database (i.e. \code{code_lookup}) are passed unchanged. Other options are to use tags from Web of Science (\code{"wos"}), Scopus (\code{"scopus"}), Ovid (\code{"ovid"}) or Academic Search Premier (\code{"asp"}). If a \code{data.frame} is given, then it must contain two columns: \code{"code"} listing the original tags in the source document, and \code{"field"} listing the replacement column/tag names. The \code{data.frame} may optionally include a third column named \code{"order"}, which specifies the order of columns in the resulting \code{data.frame}; otherwise this will be taken as the row order. Finally, passing \code{"none"} to \code{replace_tags} suppresses tag replacement.
43 | }
44 | \section{Functions}{
45 | \itemize{
46 | \item \code{read_ref()}: Import a single file
47 |
48 | }}
49 |
--------------------------------------------------------------------------------
/man/write_refs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/import_export_helpers.R
3 | \name{write_bib}
4 | \alias{write_bib}
5 | \alias{write_ris}
6 | \alias{write_refs}
7 | \title{Export data to a bibliographic format}
8 | \usage{
9 | write_bib(x)
10 |
11 | write_ris(x, tag_naming = "synthesisr")
12 |
13 | write_refs(x, format = "ris", tag_naming = "synthesisr", file = FALSE)
14 | }
15 | \arguments{
16 | \item{x}{Either a data.frame containing bibliographic information or an object of class bibliography.}
17 |
18 | \item{tag_naming}{what naming convention should be used to write RIS files? See details for options.}
19 |
20 | \item{format}{What format should the data be exported as? Options are ris or bib.}
21 |
22 | \item{file}{Either logical indicating whether a file should be written (defaulting to FALSE), or a character giving the name of the file to be written.}
23 | }
24 | \value{
25 | Returns a character vector containing bibliographic information in the specified format if \code{file} is FALSE, or saves output to a file if TRUE.
26 | }
27 | \description{
28 | This function exports data.frames containing bibliographic information to either a .ris or .bib file.
29 | }
30 | \section{Functions}{
31 | \itemize{
32 | \item \code{write_bib()}: Format a bib file for export
33 |
34 | \item \code{write_ris()}: Format a ris file for export
35 |
36 | }}
37 |
--------------------------------------------------------------------------------
/renv/.gitignore:
--------------------------------------------------------------------------------
1 | library/
2 | local/
3 | cellar/
4 | lock/
5 | python/
6 | sandbox/
7 | staging/
8 |
--------------------------------------------------------------------------------
/renv/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "bioconductor.version": null,
3 | "external.libraries": [],
4 | "ignored.packages": [],
5 | "package.dependency.fields": [
6 | "Imports",
7 | "Depends",
8 | "LinkingTo"
9 | ],
10 | "ppm.enabled": null,
11 | "ppm.ignored.urls": [],
12 | "r.version": null,
13 | "snapshot.type": "implicit",
14 | "use.cache": true,
15 | "vcs.ignore.cellar": true,
16 | "vcs.ignore.library": true,
17 | "vcs.ignore.local": true,
18 | "vcs.manage.ignores": true
19 | }
20 |
--------------------------------------------------------------------------------
/tests/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/tests/.DS_Store
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(CiteSource)
3 |
4 | test_check("CiteSource")
5 |
--------------------------------------------------------------------------------
/tests/testthat/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/tests/testthat/.DS_Store
--------------------------------------------------------------------------------
/tests/testthat/test-import.R:
--------------------------------------------------------------------------------
1 |
2 | test_that("ris import works", {
3 | x <- read_citations(testthat::test_path("data", "1_WoS.ris"))
4 | expect_equal(nrow(x), 219)
5 | expect_equal(x$cite_source[1], "1_WoS")
6 | })
7 |
8 | litsearchr <- c(
9 | "@article{grames2019,
10 | title={An automated approach to identifying search terms for
11 | systematic reviews using keyword co-occurrence networks},
12 | author={Grames, Eliza M and Stillman, Andrew N and Tingley, Morgan W and Elphick, Chris S},
13 | journal={Methods in Ecology and Evolution},
14 | volume={10},
15 | number={10},
16 | pages={1645--1654},
17 | year={2019},
18 | publisher={Wiley Online Library}
19 | }"
20 | )
21 |
22 | tmp <- tempfile()
23 |
24 | writeLines(litsearchr, tmp)
25 |
26 |
27 | test_that("bib import works", {
28 | x <- read_citations(tmp, cite_sources = "A", cite_strings = "B", cite_labels = "C")
29 | expect_equal(nrow(x), 1)
30 | expect_equal(x$cite_label, "C")
31 | })
32 |
--------------------------------------------------------------------------------
/tests/testthat/test-tables.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(CiteSource)
3 |
4 | authors <- c('Mill, John Stuart and Shelley, Mary and Lovelave, Eda and Hemingway, Ernest and Garcia Marquez, Gabriel',
5 | 'Miller, Arthur and Snow, John',
6 | 'Woolf, Virginia',
7 | 'Miller, Arthur and Snow, John',
8 | 'Mill, John Stuart and Shelley, Mary and Eliot, TS',
9 | 'Woolf, Walter',
10 | 'Mill, Arthur and Shelley, Mary and Eliot, TS',
11 | 'Mill, Arthur and Shelley, Mary and Eliot, TS')
12 |
13 | years <- c(rep(1900, 7), 1901)
14 |
15 | test_that("disambiguated citations work", {
16 | expect_equal(generate_apa_citation(authors, years),
17 | c("J. S. Mill, Shelley, Lovelave et al. (1900)",
18 | "Miller & Snow (1900a)", "V. Woolf (1900)",
19 | "Miller & Snow (1900b)", "J. S. Mill, Shelley & Eliot (1900)",
20 | "W. Woolf (1900)", "A. Mill et al. (1900)", "A. Mill et al. (1901)")
21 |
22 | )
23 | })
24 |
25 |
26 |
27 | test_that("missing columns do not fail", {
28 | expect_warning(generate_apa_reference(LETTERS[1:5]))
29 | })
30 |
31 | test_that("numeric columns do not fail", {
32 | expect_warning(generate_apa_reference(paste(LETTERS[1:5], LETTERS[1:5], sep = ", "), 2000:2004))
33 | })
34 |
35 |
--------------------------------------------------------------------------------
/vignettes/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/vignettes/.DS_Store
--------------------------------------------------------------------------------
/vignettes/benchmark_data/Search2_4.ris:
--------------------------------------------------------------------------------
1 | TY - JOUR
2 | AU - Krauss, KW
3 | AU - Osland, MJ
4 | TI - Tropical cyclones and the organization of mangrove forests: a review
5 | T2 - ANNALS OF BOTANY
6 | SN - 0305-7364
7 | SN - 1095-8290
8 | DA - JAN 23
9 | PY - 2020
10 | VL - 125
11 | IS - 2
12 | SP - 213
13 | EP - 234
14 | DO - 10.1093/aob/mcz161
15 | AN - WOS:000540297900002
16 | ER -
17 |
18 | TY - JOUR
19 | AU - Jaap, WC
20 | TI - Coral reef restoration
21 | T2 - ECOLOGICAL ENGINEERING
22 | CP - Symposium on Goal Setting and Success Criteria for Coastal Habitat Restoration
23 | SN - 0925-8574
24 | DA - JUL
25 | PY - 2000
26 | VL - 15
27 | IS - 3-4
28 | SP - 345
29 | EP - 364
30 | DO - 10.1016/S0925-8574(00)00085-9
31 | AN - WOS:000088549600014
32 | ER -
33 |
34 | TY - JOUR
35 | AU - Roth, F
36 | AU - Karcher, DB
37 | AU - Radecker, N
38 | AU - Hohn, S
39 | AU - Carvalho, S
40 | AU - Thomson, T
41 | AU - Saalmann, F
42 | AU - Voolstra, CR
43 | AU - Kurten, B
44 | AU - Struck, U
45 | AU - Jones, BH
46 | AU - Wild, C
47 | TI - High rates of carbon and dinitrogen fixation suggest a critical role of benthic pioneer communities in the energy and nutrient dynamics of coral reefs
48 | T2 - FUNCTIONAL ECOLOGY
49 | SN - 0269-8463
50 | SN - 1365-2435
51 | DA - SEP
52 | PY - 2020
53 | VL - 34
54 | IS - 9
55 | SP - 1991
56 | EP - 2004
57 | DO - 10.1111/1365-2435.13625
58 | C6 - JUL 2020
59 | AN - WOS:000551223100001
60 | ER -
61 |
62 | TY - CPAPER
63 | AU - Sivadas, TK
64 | A1 - IEEE
65 | TI - Development of Sensors & Measurement Techniques and their Implementation for Oceanographic Observations
66 | T2 - 2015 IEEE UNDERWATER TECHNOLOGY (UT)
67 | CP - 2015 IEEE Underwater Technology (UT)
68 | SN - 2573-3788
69 | SN - 2573-3796
70 | SN - 978-1-4799-8301-8
71 | PY - 2015
72 | AN - WOS:000380439000024
73 | ER -
74 |
75 | TY - CPAPER
76 | AU - de Santoli, L
77 | AU - Garcia, DA
78 | AU - Violante, AC
79 | ED - Mander, U
80 | ED - Brebbia, CA
81 | ED - MarinDuque, JF
82 | TI - Planning of flood defence management and rehabilitation of the natural habitat in the downstream part of the river Tiber
83 | T2 - GEO-ENVIRONMENT AND LANDSCAPE EVOLUTION III
84 | CP - 3rd International Conference on Evolution, Monitoring, Simulation, Management and Remediation of the Geological Environment and Landscape
85 | SN - 1746-4498
86 | SN - 978-1-84564-117-7
87 | PY - 2008
88 | VL - 100
89 | SP - 25
90 | EP - 34
91 | AN - WOS:000258182300003
92 | ER -
93 |
94 | TY - BOOK
95 | AU - Thrush, SF
96 | AU - Townsend, M
97 | AU - Hewitt, JE
98 | AU - Davies, K
99 | AU - Lohrer, AM
100 | AU - Lundquist, C
101 | AU - Cartner, K
102 | ED - Dymond, JR
103 | TI - THE MANY USES AND VALUES OF ESTUARINE ECOSYSTEMS
104 | T2 - ECOSYSTEM SERVICES IN NEW ZEALAND: CONDITIONS AND TRENDS
105 | SN - 978-0-478-34736-4
106 | PY - 2013
107 | SP - 226
108 | EP - 237
109 | AN - WOS:000331018800016
110 | ER -
111 |
112 | TY - JOUR
113 | AU - Maloney, JM
114 | AU - Bentley, SJ
115 | AU - Xe, KH
116 | AU - Obelcz, J
117 | AU - Georgiou, IY
118 | AU - Miner, MD
119 | TI - Mississippi River subaqueous delta is entering a stage of retrogradation
120 | T2 - MARINE GEOLOGY
121 | SN - 0025-3227
122 | SN - 1872-6151
123 | DA - JUN 1
124 | PY - 2018
125 | VL - 400
126 | SP - 12
127 | EP - 23
128 | DO - 10.1016/j.margeo.2018.03.001
129 | AN - WOS:000432234400002
130 | ER -
131 |
132 | TY - JOUR
133 | AU - Pleskachevsky, AL
134 | AU - Lehner, S
135 | AU - Rosenthal, W
136 | TI - Storm observations by remote sensing and influences of gustiness on ocean waves and on generation of rogue waves
137 | T2 - OCEAN DYNAMICS
138 | SN - 1616-7341
139 | DA - SEP
140 | PY - 2012
141 | VL - 62
142 | IS - 9
143 | SP - 1335
144 | EP - 1351
145 | DO - 10.1007/s10236-012-0567-z
146 | AN - WOS:000308345600005
147 | ER -
148 |
149 | TY - CPAPER
150 | AU - Sivadas, TK
151 | A1 - IEEE
152 | TI - Integrated Approach for Ocean Observation Systems with Development, Implementation, Training and Education
153 | T2 - OCEANS 2015 - GENOVA
154 | CP - Oceans 2015 Genova
155 | SN - 978-1-4799-8737-5
156 | PY - 2015
157 | DO - 10.1109/OCEANS-Genova.2015.7271647
158 | AN - WOS:000380485500328
159 | ER -
160 |
161 | TY - CPAPER
162 | AU - Poteras, G
163 | AU - Deak, G
164 | AU - Baraitaru, AG
165 | AU - Olteanu, MV
166 | AU - Raischi, NS
167 | AU - Halin, DSC
168 | ED - Noor, NM
169 | ED - Rahim, NL
170 | ED - Ting, SS
171 | ED - Zakarya, IA
172 | ED - Yusof, SY
173 | ED - Izhar, TNT
174 | ED - Amat, RC
175 | ED - Ibrahim, NM
176 | TI - Bioengineering technologies used for the development and equipment of complex installations to obtain energy from three renewable sources. Complex installations for coastal areas
177 | T2 - 2ND INTERNATIONAL CONFERENCE ON GREEN ENVIRONMENTAL ENGINEERING AND TECHNOLOGY
178 | CP - 2nd International Conference on Green Environmental Engineering and Technology (IConGEET)
179 | SN - 1755-1307
180 | PY - 2020
181 | VL - 616
182 | C7 - 012028
183 | DO - 10.1088/1755-1315/616/1/012028
184 | AN - WOS:000661130800028
185 | ER -
186 |
187 | TY - CHAP
188 | AU - Riosmena-Rodriguez, R
189 | ED - RiosmenaRodriguez, R
190 | ED - Nelson, W
191 | ED - Aguirre, J
192 | TI - Natural History of Rhodolith/Maerl Beds: Their Role in Near-Shore Biodiversity and Management
193 | T2 - RHODOLITH/MAERL BEDS: A GLOBAL PERSPECTIVE
194 | SN - 2211-0577
195 | SN - 2211-0585
196 | SN - 978-3-319-29315-8
197 | SN - 978-3-319-29313-4
198 | PY - 2017
199 | VL - 15
200 | SP - 3
201 | EP - 26
202 | DO - 10.1007/978-3-319-29315-8_1
203 | DO - 10.1007/978-3-319-29315-8
204 | AN - WOS:000430072100001
205 | ER -
206 |
207 | TY - JOUR
208 | AU - Herran, N
209 | AU - Narayan, GR
210 | AU - Reymond, CE
211 | AU - Westphal, H
212 | TI - Calcium Carbonate Production, Coral Cover and Diversity along a Distance Gradient from Stone Town: A Case Study from Zanzibar, Tanzania
213 | T2 - FRONTIERS IN MARINE SCIENCE
214 | SN - 2296-7745
215 | PY - 2017
216 | VL - 4
217 | C7 - 412
218 | DO - 10.3389/fmars.2017.00412
219 | AN - WOS:000457690600408
220 | ER -
221 |
222 | TY - JOUR
223 | AU - Asokan, R
224 | AU - Swamy, HMM
225 | AU - Thimmegowda, GG
226 | AU - Mahmood, R
227 | TI - Diversity analysis and characterization of Coleoptera-, Hemiptera- and Nematode-active cry genes in native isolates of Bacillus thuringiensis
228 | T2 - ANNALS OF MICROBIOLOGY
229 | SN - 1590-4261
230 | SN - 1869-2044
231 | DA - MAR
232 | PY - 2014
233 | VL - 64
234 | IS - 1
235 | SP - 85
236 | EP - 98
237 | DO - 10.1007/s13213-013-0636-7
238 | AN - WOS:000331648300010
239 | ER -
240 |
241 | TY - JOUR
242 | AU - Divya, K
243 | AU - Jisha, MS
244 | TI - Chitosan nanoparticles preparation and applications
245 | T2 - ENVIRONMENTAL CHEMISTRY LETTERS
246 | SN - 1610-3653
247 | SN - 1610-3661
248 | DA - MAR
249 | PY - 2018
250 | VL - 16
251 | IS - 1
252 | SP - 101
253 | EP - 112
254 | DO - 10.1007/s10311-017-0670-y
255 | AN - WOS:000425008300010
256 | ER -
257 |
258 | TY - JOUR
259 | AU - Horta, PA
260 | AU - Riul, P
261 | AU - Amado, GM
262 | AU - Gurgel, CFD
263 | AU - Berchez, F
264 | AU - Nunes, JMD
265 | AU - Scherner, F
266 | AU - Pereira, S
267 | AU - Lotufo, T
268 | AU - Peres, L
269 | AU - Sissini, M
270 | AU - Bastos, ED
271 | AU - Rosa, J
272 | AU - Munoz, P
273 | AU - Martins, C
274 | AU - Gouvea, L
275 | AU - Carvalho, V
276 | AU - Bergstrom, E
277 | AU - Schubert, N
278 | AU - Bahia, RG
279 | AU - Rodrigues, AC
280 | AU - Rorig, L
281 | AU - Barufi, JB
282 | AU - Figueiredo, M
283 | TI - Rhodoliths in Brazil: Current knowledge and potential impacts of climate change
284 | T2 - BRAZILIAN JOURNAL OF OCEANOGRAPHY
285 | SN - 1679-8759
286 | SN - 1982-436X
287 | PY - 2016
288 | VL - 64
289 | DO - 10.1590/S1679-875920160870064sp2
290 | AN - WOS:000381509100009
291 | ER -
292 |
293 |
--------------------------------------------------------------------------------
/vignettes/citesource_benchmark_testing.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Benchmark Testing"
3 |
4 | author: ""
5 |
6 | date: "`r Sys.Date()`"
7 | output: rmarkdown::html_vignette
8 | vignette: >
9 | %\VignetteIndexEntry{Benchmark Testing}
10 | %\VignetteEngine{knitr::rmarkdown}
11 | %\VignetteEncoding{UTF-8}
12 | ---
13 |
14 |
15 | ```{r setup, include = FALSE}
16 | # This code chunk sets global options for all subsequent code chunks in the document using the `knitr` package in R.
17 | knitr::opts_chunk$set(
18 | collapse = TRUE, # Collapses output with no extra whitespace.
19 | comment = "#>", # Uses `#>` to comment out code in output.
20 | warning = FALSE, # Turns off warnings for all code chunks.
21 | fig.width = 6, # Sets default figure width to 6 inches.
22 | fig.height = 6 # Sets default figure height to 6 inches.
23 | )
24 | ```
25 | ## About this vignette
26 |
27 | When estimating he comprehensiveness of a search, researchers often compile a list of relevant studies and evaluate whether or not they are discovered using their search strategy. While benchmarking is an important step in testing the sensitivity of a search, this process can be very time consuming if variations of a string are being tested.
28 |
29 | This vignette will provide an example of how CiteSource can be used to speed up the process of benchmarking especially when comparing variations of search strings or search strategies.
30 |
31 | ## 1. Installation of packages and loading libraries
32 |
33 | Use the following code to install CiteSource. Currently, CiteSource lives on GitHub, so you may need to first install the remotes package. This vignette also uses functions from the ggplot2 and dplyr packages.
34 |
35 | ```{r, results = FALSE, message=FALSE, warning=FALSE}
36 | #Install the remotes packages to enable installation from GitHub
37 | #install.packages("remotes")
38 | #library(remotes)
39 |
40 | #Install CiteSource
41 | #remotes::install_github("ESHackathon/CiteSource")
42 |
43 | #Load the necessary libraries
44 | library(CiteSource)
45 | library(ggplot2)
46 | library(dplyr)
47 | ```
48 | ## 2. Import files from multiple sources
49 |
50 | Users can import multiple .ris or .bib files into CiteSource, which the user can label with source information such as database or platform. In this case we are uploading the results from six different strings, which were applied to Web of Science.
51 |
52 | ```{r}
53 | # Import citation files from folder
54 | citation_files <- list.files(path = "benchmark_data", pattern = "\\.ris", full.names = TRUE)
55 |
56 | # Print list of citation files to console
57 | citation_files
58 |
59 | # Set the path to the directory containing the citation files
60 | file_path <- "../vignettes/benchmark_data/"
61 |
62 | # Create a tibble that contains metadata about the citation files
63 | metadata_tbl <- tibble::tribble(
64 | ~files, ~cite_sources, ~cite_labels,
65 | "Benchmarking.ris", "Benchmark", "Benchmark",
66 | "Search1_1.ris", "search1", "search",
67 | "Search2_1.ris", "search2", "search",
68 | "Search2_2.ris", "search2", "search",
69 | "Search2_3.ris", "search2", "search",
70 | "Search2_4.ris", "search2", "search",
71 | "Search3_1.ris", "search3", "search",
72 | "Search3_2.ris", "search3", "search",
73 | "Search3_3.ris", "search3", "search",
74 | "Search3_4.ris", "search3", "search",
75 | "Search3_5.ris", "search3", "search",
76 | "Search4_1.ris", "search4", "search",
77 | "Search4_2.ris", "search4", "search",
78 | "Search4_3.ris", "search4", "search",
79 | "Search5_1.ris", "search5", "search",
80 | "Search5_2.ris", "search5", "search",
81 | "Search5_3.ris", "search5", "search",
82 | "Search5_4.ris", "search5", "search",
83 | "Search5_5.ris", "search5", "search",
84 | "Search5_6.ris", "search5", "search",
85 | "Search5_7.ris", "search5", "search",
86 | "Search5_8.ris", "search5", "search",
87 | "Search6_1.ris", "search6", "search",
88 | "Search6_2.ris", "search6", "search",
89 | "Search6_3.ris", "search6", "search",
90 | "Search6_4.ris", "search6", "search",
91 | ) %>%
92 | # Append the file path to each file name in the 'files' column
93 | dplyr::mutate(files = paste0(file_path, files))
94 | # Read in citations using metadata table
95 | citations <- read_citations(metadata = metadata_tbl)
96 | ```
97 | ## 3. Deduplication and source information
98 |
99 | CiteSource allows users to merge duplicates while maintaining information in the cite_source metadata field. Thus, information about the origin of the records is not lost in the deduplication process. The next few steps produce the dataframes that we can use in subsequent analyses.
100 |
101 | ```{r, results = FALSE, message=FALSE, warning=FALSE}
102 |
103 | #Deduplicate citations. This yields a dataframe of all records with duplicates merged, but the originating source information maintained in a new variable called cite_source.
104 | unique_citations <- dedup_citations(citations)
105 |
106 | #Count number of unique and non-unique citations from different sources and labels
107 | n_unique <- count_unique(unique_citations)
108 |
109 | #For each unique citation, determine which sources were present
110 | source_comparison <- compare_sources(unique_citations, comp_type = "sources")
111 |
112 | #Initial upload/post internal deduplication table creation
113 | initial_counts<-record_counts(unique_citations, citations, "cite_source")
114 | record_counts_table(initial_counts)
115 |
116 | ```
117 |
118 | ## 4. Upset plot to compare discovery of benchmarking articles
119 |
120 | An upset plot is useful for visualizing overlap across multiple sources and provides detail about the number of shared and unique records. Using this data we'll outline a few potential uses, when looking at the discovery of benchmarking articles.
121 |
122 | We have uploaded 55 benchmarking articles. Of these 55 articles we can see that all but 6 have been found across the six searches. We can see the number of benchmarking articles that were discovered by each string as well as the number of articles that were shared between searches.
123 |
124 | Looking at the first column, we see that 9 benchmarking articles were found across every search. One may hypothesize that the 140 citations that follow in the second column may have a high number of relevant articles due to the fact that they were also discovered across the 6 searches. If a researcher was interested in building a larger group of benchmarking articles, they may want to review these articles first.
125 |
126 | Looking at the plot we can see that search #5 has the largest number of results, well over 6000. Of these, 5,964 are unique to that search. We can also see that search #5 finds 3 benchmarking articles that would have otherwise not been found. While a researcher may want to ensure that they capture the highest number of benchmarking articles, the addition of ~6k articles may not be efficient when the result is only 3 benchmarking articles. Instead of including this search in their final strategy, they may consider reviewing the three articles that were found by this search and work to adjust their other searches instead.
127 |
128 | Another decision in this case may be to drop search #4 and #6 as each of these strings do not contribute uniquely to the discovery of any benchmarking articles. While the data backs up this decision, there may also be more to consider. For example, if benchmarking articles are biased for any known reason, certain search strategies may be employed with an understanding that benchmarking data may not accurately reflect their potential contribution. (e.g. benchmarking articles were gathered from previous systematic reviews that focused on a specific geographic region and the current review is global in nature).
129 |
130 | ```{r}
131 | #Generate a source comparison upset plot.
132 | plot_source_overlap_upset(source_comparison, decreasing = c(TRUE, TRUE))
133 |
134 | ```
135 |
136 | ## 5. Reviewing the record table
137 | This record table is helpful in reviewing which citations were found across each database as well as quickly checking to see which benchmarking articles were not found in the search.
138 |
139 | ```{r}
140 |
141 | unique_citations %>%
142 | dplyr::filter(stringr::str_detect(cite_label, "Benchmark")) %>%
143 | record_level_table(return = "DT")
144 |
145 | ```
146 |
--------------------------------------------------------------------------------
/vignettes/citesource_new_benchmark_testing.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "New Benchmark Testing"
3 |
4 | author: ""
5 |
6 | date: "`r Sys.Date()`"
7 | output: rmarkdown::html_vignette
8 | vignette: >
9 | %\VignetteIndexEntry{New Benchmark Testing}
10 | %\VignetteEngine{knitr::rmarkdown}
11 | %\VignetteEncoding{UTF-8}
12 | ---
13 |
14 |
15 | ```{r setup, include = FALSE}
16 | # This code chunk sets global options for all subsequent code chunks in the document using the `knitr` package in R.
17 | knitr::opts_chunk$set(
18 | collapse = TRUE, # Collapses output with no extra whitespace.
19 | comment = "#>", # Uses `#>` to comment out code in output.
20 | warning = FALSE, # Turns off warnings for all code chunks.
21 | fig.width = 6, # Sets default figure width to 6 inches.
22 | fig.height = 6 # Sets default figure height to 6 inches.
23 | )
24 | ```
25 | ## About this vignette
26 |
27 | When estimating the comprehensiveness of a search, researchers often compile a list of relevant studies and evaluate whether or not they are found using their search strategy. While benchmarking is an important step in testing the sensitivity of a search, this process can be time consuming if variations of a string are being tested.
28 |
29 | This vignette will provide an example of how CiteSource can be used to speed up the process of benchmarking especially when comparing variations of search strings or search strategies.
30 |
31 | ## 1. Install and loading CiteSource
32 |
33 | Use the following code to install CiteSource. Currently, CiteSource lives on GitHub, so you may need to first install the remotes package.
34 |
35 | ```{r, results = FALSE, message=FALSE, warning=FALSE}
36 | #Install the remotes packages to enable installation from GitHub
37 | #install.packages("remotes")
38 | #library(remotes)
39 |
40 | #Install CiteSource
41 | #remotes::install_github("ESHackathon/CiteSource")
42 |
43 | #Load the CiteSource
44 | library(CiteSource)
45 | ```
46 | ## 2. Import citation files
47 |
48 | Users can import multiple .ris or .bib files into CiteSource, which the user can label with source information such as database or platform. In this case we are uploading the results from five different strings, which were run in Web of Science.
49 |
50 | ```{r}
51 | # Import citation files from a folder
52 | file_path <- "../vignettes/new_benchmark_data/"
53 | citation_files <- list.files(path = file_path, pattern = "\\.ris", full.names = TRUE)
54 |
55 | # Print citation_files to double check the order in which R imported the files.
56 | citation_files
57 | ```
58 |
59 | ## 3. Assign custom metadata
60 | In this example the benchmark file takes an NA for cite_source while the search files are tagged with search 1, search 2, etc. The cite_label for these files is tagged as search, while the benchmark is used for the benchmark files. In further vignettes you will see how the label can be used for post screening and citations that were included in the final synthesis.
61 | ```{r}
62 | # Create a tibble that contains metadata about the citation files
63 | imported_tbl <- tibble::tribble(
64 | ~files, ~cite_sources, ~cite_labels,
65 | "benchmark_15.ris", "benchmark", "search",
66 | "search1_166.ris", "search 1", "search",
67 | "search2_278.ris", "search 2", "search",
68 | "search3_302.ris", "search 3", "search",
69 | "search4_460.ris", "search 4", "search",
70 | "search5_495.ris", "search 5", "search"
71 | ) %>%
72 | # Append the file path to each file name in the 'files' column
73 | dplyr::mutate(files = paste0(file_path, files))
74 |
75 | # Save the imported citations as raw_citations
76 | raw_citations <- read_citations(metadata = imported_tbl, verbose = FALSE)
77 | ```
78 | ## 4. Deduplicate & create data tables
79 |
80 | CiteSource allows users to merge duplicates while maintaining information in the cite_source metadata field. Thus, information about the origin of the records is not lost in the deduplication process. The next few steps produce the dataframes that are used in subsequent plots and tables.
81 |
82 | ```{r}
83 | #Deduplicating yields a dataframe. Each record has a duplicate_id which is unique,
84 | #citations that were duplicated will retain each duplicate_id in the record_ids column.
85 | #source and label tags from each duplicate will be merged as a list in their respective columns.
86 | unique_citations <- dedup_citations(raw_citations)
87 |
88 | #Count_unique yields another dataframe which is used in creation of plots and tables
89 | n_unique <- count_unique(unique_citations)
90 |
91 | #For each unique citation, determine which sources were present
92 | source_comparison <- compare_sources(unique_citations, comp_type = "sources")
93 | ```
94 |
95 | ## 5. Review internal duplication
96 |
97 | Once we have imported, added custom metadata, and identified duplicates, it can be helpful to review the initial record count data to ensure everything looks okay. As a part of the deduplication process, duplicate records may have been identified within sources. The initial record table will provide you with a count of how many records were initially in each source file, and the count of distinct records that will vary if there were any duplicates identified within the source file.
98 |
99 | ```{r}
100 | #Initial upload/post internal deduplication table creation
101 | initial_records_search <- calculate_initial_records(unique_citations)
102 | initial_record_table_search <- create_initial_record_table(initial_records_search)
103 | initial_record_table_search
104 |
105 | ```
106 |
107 | ## 6. Compare overlapp with an upset plot
108 |
109 | An upset plot is useful for visualizing overlap across multiple sources and provides detail about the number of shared and unique records. Using this data we'll outline a few potential uses when benchmarking testing a search.
110 |
111 | We have uploaded 15 benchmarking articles. Of these 15 articles, the upset plot shows us that all but 4 were found across the five searches. We can see the number of benchmarking articles that were discovered by each string as well as the number of articles that were shared between searches.
112 |
113 | ```{r, fig.alt="An upset plot visualizing the overlap of benchmarking articles found across five search strategies. The plot highlights that nine articles were identified by all five searches, while four benchmarking articles were missed entirely. Additional columns show the number of articles shared across different combinations of search strategies."}
114 | #Generate a source comparison upset plot.
115 | plot_source_overlap_upset(source_comparison, decreasing = c(TRUE, TRUE))
116 | ```
117 |
118 | Looking at the first column, we see that 9 benchmarking articles were found across every search. One may hypothesize that the 157 citations that follow in the second column may have a high number of relevant articles due to the fact that they were also discovered across the five searches. If a researcher was interested in building a larger group of benchmarking articles, they may want to review these articles first.
119 |
120 | Looking at the plot we can see that search #4 and #5 have the largest number of results, at close to 500 each. Of these, 180 are unique to those two searches. We can also see that search #4 and #5 found two of the benchmarking articles that would not have been found otherwise. While a researcher may want to ensure that they capture the highest number of benchmarking articles, the addition of close to 200 articles more than search #2 and #3 may not be efficient when the result is only 2 benchmarking articles. Instead of including this search in their final strategy, they may consider reviewing the two articles that were found by this search and work to adjust their other searches instead.
121 |
122 | Another decision in this case may be to drop search #2 and #3 as each of these strings do not contribute uniquely to the discovery of any benchmarking articles. While the data backs up this decision, there may also be more to consider. For example, if benchmarking articles are biased for any known reason, certain search strategies may be employed with an understanding that benchmarking data may not accurately reflect their potential contribution. (e.g. benchmarking articles were gathered from previous systematic reviews that focused on a specific geographic region and the current review is global in nature).
123 |
124 | Finally, as we'll see in the next step, we can examine closely the four articles that weren't found in any search approach. This will help us adjust our search to better capture relevant studies.
125 |
126 | ## 7. Compare overlapp with a record level table
127 | This record table is helpful in reviewing which citations were found across each database as well as quickly checking to see which benchmarking articles were not found in the search.
128 |
129 | ```{r}
130 | unique_citations %>%
131 | dplyr::filter(stringr::str_detect(cite_source, "benchmark")) %>%
132 | record_level_table(return = "DT")
133 | ```
134 |
135 | ## 8. Exporting for further analysis
136 |
137 | We may want to export the deduplicated set of results for further analysis. CiteSource offers a set of export functions called `export_csv`, `export_ris` and `export_bib` that will save dataframes as a .csv file, .ris file or .bib file, respectively.
138 |
139 | You can then reimport .csv and .ris files to pick up a project or analysis without having to start from scratch, or after making manual adjustments (such as adding missing abstract data) to a file.
140 |
141 |
142 | ### Generate a .csv file
143 | The separate argument can be used to create separate columns for cite_source, cite_label or cite_string to facilitate analysis. CSV files can be reimported into CiteSource in order to recreate all plots and tables.
144 |
145 | ```{r}
146 | #export_csv(unique_citations, filename = "citesource_export.csv")
147 | ```
148 |
149 | ### Generate a .ris file
150 | Generate and .ris and indicate custom field location for cite_source, cite_label or cite_string. In this example, we'll be using EndNote, so we put cite_sources in the DB field, which will appear as the "Name of Database" field in EndNote and cite_labels into C5, which will appear as the "Custom 5" metadata field in EndNote.
151 |
152 | ```{r}
153 | #export_ris(unique_citations, filename = "citesource_export.ris", source_field = "DB", label_field = "C5")
154 | ```
155 |
156 | ### Generate a bibtex file
157 | Generate a bibtex file and include data from cite_source, cite_label or cite_string.
158 |
159 | ```{r}
160 | #export_bib(unique_citations, filename = "citesource_export.bib", include = c("sources", "labels", "strings"))
161 | ```
162 |
163 | ### Re-importing CiteSource a CiteSource exported file
164 | In order to reimport a .csv or a .ris you can use the following. Here is an example of how you would re-import the file if it were on your desktop.
165 |
166 | When reimporting you can simply name the data 'unique_citations' as this is the primary dataframe that all other functions use. See the following example.
167 |
168 | ```{r}
169 | #reimpor the citaitons and name 'unique_citations'
170 | #unique_citations <-reimport_csv("citesource_export.csv")
171 |
172 | #create other datasets required for visualizations
173 | #n_unique <- count_unique(unique_citations)
174 | #source_comparison <- compare_sources(unique_citations, comp_type = "sources")
175 |
176 |
177 | #citesource_working_example <-reimport_ris("citesource_export.ris")
178 |
179 | ```
180 |
181 |
--------------------------------------------------------------------------------
/vignettes/new_benchmark_data/benchmark_15.ris:
--------------------------------------------------------------------------------
1 | TY - JOUR
2 | TI - A systematic review of the impact of wildfires on sleep disturbances
3 | AU - Isaac, Fadia
4 | AU - Toukhsati, Samia R
5 | AU - Di Benedetto, Mirella
6 | AU - Kennedy, Gerard A
7 | T2 - International journal of environmental research and public health
8 | DA - 2021///
9 | PY - 2021
10 | VL - 18
11 | IS - 19
12 | SP - 10152
13 | ER -
14 |
15 | TY - JOUR
16 | TI - A systematic review of the physical health impacts from non-occupational exposure to wildfire smoke
17 | AU - Liu, Jia C
18 | AU - Pereira, Gavin
19 | AU - Uhl, Sarah A
20 | AU - Bravo, Mercedes A
21 | AU - Bell, Michelle L
22 | T2 - Environmental research
23 | DA - 2015///
24 | PY - 2015
25 | VL - 136
26 | SP - 120
27 | EP - 132
28 | ER -
29 |
30 | TY - JOUR
31 | TI - Long-term impacts of non-occupational wildfire exposure on human health: A systematic review
32 | AU - Gao, Yuan
33 | AU - Huang, Wenzhong
34 | AU - Yu, Pei
35 | AU - Xu, Rongbin
36 | AU - Yang, Zhengyu
37 | AU - Gasevic, Danijela
38 | AU - Ye, Tingting
39 | AU - Guo, Yuming
40 | AU - Li, Shanshan
41 | T2 - Environmental pollution
42 | DA - 2023///
43 | PY - 2023
44 | VL - 320
45 | SP - 121041
46 | ER -
47 |
48 | TY - JOUR
49 | TI - How to measure the economic health cost of wildfires–A systematic review of the literature for northern America
50 | AU - Dittrich, Ruth
51 | AU - McCallum, Stuart
52 | T2 - International journal of wildland fire
53 | DA - 2020///
54 | PY - 2020
55 | VL - 29
56 | IS - 11
57 | SP - 961
58 | EP - 973
59 | ER -
60 |
61 | TY - JOUR
62 | TI - Critical review of health impacts of wildfire smoke exposure
63 | AU - Reid, Colleen E
64 | AU - Brauer, Michael
65 | AU - Johnston, Fay H
66 | AU - Jerrett, Michael
67 | AU - Balmes, John R
68 | AU - Elliott, Catherine T
69 | T2 - Environmental health perspectives
70 | DA - 2016///
71 | PY - 2016
72 | VL - 124
73 | IS - 9
74 | SP - 1334
75 | EP - 1343
76 | ER -
77 |
78 | TY - JOUR
79 | TI - Fire and rain: A systematic review of the impacts of wildfire and associated runoff on aquatic fauna
80 | AU - Gomez Isaza, Daniel F
81 | AU - Cramp, Rebecca L
82 | AU - Franklin, Craig E
83 | T2 - Global Change Biology
84 | DA - 2022///
85 | PY - 2022
86 | VL - 28
87 | IS - 8
88 | SP - 2578
89 | EP - 2595
90 | ER -
91 |
92 | TY - JOUR
93 | TI - Wildfire exposure during pregnancy and the risk of adverse birth outcomes: a systematic review
94 | AU - Amjad, Sana
95 | AU - Chojecki, Dagmara
96 | AU - Osornio-Vargas, Alvaro
97 | AU - Ospina, Maria B
98 | T2 - Environment International
99 | DA - 2021///
100 | PY - 2021
101 | VL - 156
102 | SP - 106644
103 | ER -
104 |
105 | TY - JOUR
106 | TI - Continent-based systematic review of the short-term health impacts of wildfire emissions
107 | AU - Barros, Bela
108 | AU - Oliveira, Marta
109 | AU - Morais, Simone
110 | T2 - Journal of Toxicology and Environmental Health, Part B
111 | DA - 2023///
112 | PY - 2023
113 | VL - 26
114 | IS - 7
115 | SP - 387
116 | EP - 415
117 | ER -
118 |
119 | TY - JOUR
120 | TI - Health impacts of wildfire smoke on children and adolescents: a systematic review and meta-analysis
121 | AU - Zhang, Yiwen
122 | AU - Tingting, Ye
123 | AU - Huang, Wenzhong
124 | AU - Yu, Pei
125 | AU - Chen, Gongbo
126 | AU - Xu, Rongbin
127 | AU - Song, Jiangning
128 | AU - Guo, Yuming
129 | AU - Li, Shanshan
130 | T2 - Current Environmental Health Reports
131 | DA - 2024///
132 | PY - 2024
133 | VL - 11
134 | IS - 1
135 | SP - 46
136 | EP - 60
137 | ER -
138 |
139 | TY - JOUR
140 | TI - A systematic review of relationships between mountain wildfire and ecosystem services
141 | AU - Vukomanovic, Jelena
142 | AU - Steelman, Toddi
143 | T2 - Landscape Ecology
144 | DA - 2019///
145 | PY - 2019
146 | VL - 34
147 | SP - 1179
148 | EP - 1194
149 | ER -
150 |
151 | TY - JOUR
152 | TI - A review of the effects of wildfire smoke on the health and behavior of wildlife
153 | AU - Sanderfoot, Olivia V
154 | AU - Bassing, Sarah B
155 | AU - Brusa, Jamie L
156 | AU - Emmet, Robert L
157 | AU - Gillman, Sierra J
158 | AU - Swift, K
159 | AU - Gardner, Beth
160 | T2 - Environmental Research Letters
161 | DA - 2022///
162 | PY - 2022
163 | VL - 16
164 | IS - 12
165 | SP - 123003
166 | ER -
167 |
168 | TY - JOUR
169 | TI - Estimating wildfire suppression costs: a systematic review
170 | AU - Mattioli, W
171 | AU - Ferrara, C
172 | AU - Lombardo, E
173 | AU - Barbati, Anna
174 | AU - Salvati, L
175 | AU - Tomao, A
176 | T2 - International Forestry Review
177 | DA - 2022///
178 | PY - 2022
179 | VL - 24
180 | IS - 1
181 | SP - 15
182 | EP - 29
183 | ER -
184 |
185 | TY - JOUR
186 | TI - The economic cost of adverse health effects from wildfire-smoke exposure: a review
187 | AU - Kochi, Ikuho
188 | AU - Donovan, Geoffrey H
189 | AU - Champ, Patricia A
190 | AU - Loomis, John B
191 | T2 - International Journal of Wildland Fire
192 | DA - 2010///
193 | PY - 2010
194 | VL - 19
195 | IS - 7
196 | SP - 803
197 | EP - 817
198 | ER -
199 |
200 | TY - JOUR
201 | TI - Economic Analysis of Wildfire Impacts to Water Quality: a Review
202 | AU - Wibbenmeyer, Matthew
203 | AU - Sloggy, Matthew R
204 | AU - Sánchez, José J
205 | T2 - Journal of Forestry
206 | DA - 2023///
207 | PY - 2023
208 | VL - 121
209 | IS - 4
210 | SP - 374
211 | EP - 382
212 | ER -
213 |
214 | TY - JOUR
215 | TI - A systematic review of the health impacts of occupational exposure to wildland fires
216 | AU - Groot, Emily
217 | AU - Caturay, Alexa
218 | AU - Khan, Yasmin
219 | AU - Copes, Ray
220 | T2 - International journal of occupational medicine and environmental health
221 | DA - 2019///
222 | PY - 2019
223 | VL - 32
224 | IS - 2
225 | SP - 121
226 | EP - 140
227 | ER -
228 |
229 |
--------------------------------------------------------------------------------
/vignettes/new_stage_data/econlit_3.ris:
--------------------------------------------------------------------------------
1 | TY - JOUR
2 | AU - Mueller, Julie M.
3 | AU - Loomis, John B.
4 | AD - Northern AZ U
5 | AD - CO State U
6 | T1 - Spatial Dependence in Hedonic Property Models: Do Different Corrections for Spatial Dependence Result in Economically Significant Differences in Estimated Implicit Prices?
7 | JO - Journal of Agricultural and Resource Economics
8 | JF - Journal of Agricultural and Resource Economics
9 | Y1 - 2008/08//
10 | VL - 33
11 | IS - 2
12 | SP - 212
13 | EP - 231
14 | SN - 10685502
15 | N1 - Accession Number: 0992879; Keywords: Hedonic; Spatial; Geographic Descriptors: U.S.; Geographic Region: Northern America; Publication Type: Journal Article; Update Code: 20080901
16 | N2 - While data used in hedonic property models are inherently spatial in nature, to date the majority of past regression analyses have used OLS models that overlook possible spatial dependence in the data when estimating implicit prices for environmental hazards. This paper explicitly addresses spatial dependence in a hedonic property model. We use robust testing procedures to determine the existence and type of spatial dependence in our OLS Model. After identifying the nature of the spatial dependence, OLS estimates of the implicit price of wildfire risk are compared to implicit prices obtained using a spatial error model with three different spatial weighting matrices. Spatially corrected estimates of implicit prices are found to be nearly the same as those obtained using OLS. Our results indicate that the inefficiency of OLS in the presence of spatially correlated errors may not always be economically significant, suggesting nonspatial hedonic property models may provide results useful for policy analysis, and spatial and nonspatial hedonic property models might be pooled in meta-analysis.
17 | KW - Valuation of Environmental Effects Q51
18 | KW - Urban, Rural, Regional, Real Estate, and Transportation Economics: Housing Demand R21
19 | KW - Housing Supply and Markets R31
20 | UR - https://search.ebscohost.com/login.aspx?direct=true&AuthType=ip,sso&db=ecn&AN=0992879&site=ehost-live&scope=site&custid=s8368349
21 | DP - EBSCOhost
22 | DB - ecn
23 | ER -
24 |
25 | TY - THES
26 | AU - Ma, Qiuhua
27 | T1 - Systematic Investigation of the Effect of Wildfire Events and Risks on Property Values
28 | PB - University of New Mexico
29 | Y1 - 2017/05//
30 | N1 - Accession Number: 1678288; Geographic Descriptors: U.S.; Geographic Region: Northern America; Publication Type: Dissertation; Update Code: 20171201
31 | N2 - Wildfires frequency and severity have been increasing in the western United States over the past few decades. Previous hedonic studies investigating wildfire examined the effect of occurrence and risk independently, overlooking the potentially confounding influence. Further, these studies find mixed and/or inconsistent results, which are complicated by a variety of data availability issues as well as varied econometric modeling decisions made by analysts. This analysis applies spatial econometrics modeling strategies in a hedonic pricing model framework to examine the joint effect of both past fire occurrence and current risk on property values in Santa Fe County, New Mexico. Overall, variations in data and econometric modeling techniques yield 2,000 regression results for hedonic models. Using hedonic results as primary estimates, I then employ an internal meta-analysis to investigate what factors explain variation in wildfire effects.
32 | M1 - Ph.D.
33 | KW - Renewable Resources and Conservation: Forestry Q23
34 | KW - Climate; Natural Disasters and Their Management; Global Warming Q54
35 | KW - Ecological Economics: Ecosystem Services; Biodiversity Conservation; Bioeconomics; Industrial Ecology Q57
36 | UR - https://search.ebscohost.com/login.aspx?direct=true&AuthType=ip,sso&db=ecn&AN=1678288&site=ehost-live&scope=site&custid=s8368349
37 | DP - EBSCOhost
38 | DB - ecn
39 | ER -
40 |
41 | TY - THES
42 | AU - Prante, Tyler
43 | T1 - Developing Social Capital as a Response to the Wildfire Problem
44 | PB - University of New Mexico
45 | Y1 - 2008/08//
46 | N1 - Accession Number: 1075662; Keywords: Natural Resource; Revealed Preference; Social Capital; Geographic Descriptors: U.S.; Geographic Region: Northern America; Publication Type: Dissertation; Update Code: 20091201
47 | N2 - This research analyzes the use of natural resource management techniques that attempt to build social capital. As an application, a primary focus is on the policy response to wildfire. The principal research is segmented into 3 related chapters. First, meta-analysis is used to statistically summarize the literature of Coasean bargaining experiments. Results from this analysis suggest that the social dimensions characterizing an experiment impact bargaining behavior on a magnitude similar to that of variables that describe transaction costs. This result suggests an increased focus in future experimental work on the impact of social dimension characteristics is warranted. Next, revealed preference analysis is applied to a unique forest management program in New Mexico. The Collaborative Forest Restoration Program includes stakeholders actively in the creation and implantation of management on federal forested lands. Statistical analysis of the funding pattern from the program reveals that developing social capital and maintaining forest health stand out among a set of potentially divergent goals, while implementing projects that span multiple land jurisdictions is argued to be a missed opportunity for developing social capital. Finally, experimental economics is used to analyze the effectiveness of a set of potential policy tools to induce private spending on wildfire risk mitigation. The policy tools considered here are shown to have the opposite impact, reducing private spending. When success is assessed alternatively by the probability of engaging in risk mitigation, these policy tools have the desired effect. Given the potential to crowd out private expenditure and the sensitivity to the specific mitigation objective, the implementation of these policy tools takes on greater significance.
48 | M1 - Ph.D.
49 | KW - Ecological Economics: Ecosystem Services; Biodiversity Conservation; Bioeconomics; Industrial Ecology Q57
50 | KW - Economic Sociology; Economic Anthropology; Language; Social and Economic Stratification Z13
51 | UR - https://search.ebscohost.com/login.aspx?direct=true&AuthType=ip,sso&db=ecn&AN=1075662&site=ehost-live&scope=site&custid=s8368349
52 | DP - EBSCOhost
53 | DB - ecn
54 | ER -
55 |
--------------------------------------------------------------------------------
/vignettes/topic_data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ESHackathon/CiteSource/270e01c907d8dfc37d2dd66323e62e860dfc5c19/vignettes/topic_data/.DS_Store
--------------------------------------------------------------------------------
/vignettes/valid_data/benchmark.ris:
--------------------------------------------------------------------------------
1 | TY - JOUR
2 | TI - Marital status and problem gambling among Australian older adults: The mediating role of loneliness
3 | AU - Botterill, Emma
4 | AU - Gill, Peter Richard
5 | AU - McLaren, Suzanne
6 | AU - Gomez, Rapson
7 | T2 - Journal of gambling studies
8 | DA - 2016///
9 | PY - 2016
10 | DO - 10.1007/s10899-015-9575-5
11 | DP - Google Scholar
12 | VL - 32
13 | IS - 3
14 | SP - 1027
15 | EP - 1038
16 | ST - Marital status and problem gambling among Australian older adults
17 | L2 - https://link.springer.com/article/10.1007/s10899-015-9575-5
18 | ER -
19 |
20 | TY - JOUR
21 | TI - Worsened Anxiety and Loneliness Influenced Gaming and Gambling during the COVID-19 Pandemic
22 | AU - Mohamed, Mohamed S.
23 | AU - Rukh, Gull
24 | AU - Schiöth, Helgi B.
25 | AU - Vadlin, Sofia
26 | AU - Olofsdotter, Susanne
27 | AU - Åslund, Cecilia
28 | AU - Nilsson, Kent W.
29 | T2 - Journal of Clinical Medicine
30 | DA - 2022///
31 | PY - 2022
32 | DO - 10.3390/jcm12010249
33 | DP - Google Scholar
34 | VL - 12
35 | IS - 1
36 | SP - 249
37 | L2 - https://www.mdpi.com/2077-0383/12/1/249
38 | ER -
39 |
40 | TY - JOUR
41 | TI - At-risk and problem gambling among Finnish youth: The examination of risky alcohol consumption, tobacco smoking, mental health and loneliness as gender-specific correlates
42 | AU - Edgren, Robert
43 | AU - Castrén, Sari
44 | AU - Jokela, Markus
45 | AU - Salonen, Anne H.
46 | T2 - Nordic Studies on Alcohol and Drugs
47 | DA - 2016///
48 | PY - 2016
49 | DO - 10.1515/nsad-2016-0005
50 | DP - Google Scholar
51 | VL - 33
52 | IS - 1
53 | SP - 61
54 | EP - 80
55 | ST - At-risk and problem gambling among Finnish youth
56 | L2 - https://journals.sagepub.com/doi/abs/10.1515/nsad-2016-0005
57 | ER -
58 |
59 | TY - JOUR
60 | TI - Loneliness and online gambling-community participation of young social media users
61 | AU - Sirola, Anu
62 | AU - Kaakinen, Markus
63 | AU - Savolainen, Iina
64 | AU - Oksanen, Atte
65 | T2 - Computers in Human Behavior
66 | DA - 2019///
67 | PY - 2019
68 | DO - 10.1016/j.chb.2019.01.023
69 | DP - Google Scholar
70 | VL - 95
71 | SP - 136
72 | EP - 145
73 | L2 - https://www.sciencedirect.com/science/article/pii/S0747563219300287?casa_token=hXPqVjd7zHEAAAAA:Ve8c7FOVsC2BMSTDDimo9VHCZVS4GpxE9Rwke3MWYOMd56JYckpztNuD_D3fML_2_qufhD_NNg
74 | ER -
75 |
76 | TY - JOUR
77 | TI - The role of loneliness and self-control in predicting problem gambling behaviour
78 | AU - McQuade, Anne
79 | AU - Gill, Peter
80 | T2 - Gambling Research: Journal of the National Association for Gambling Studies (Australia)
81 | DA - 2012///
82 | PY - 2012
83 | DP - Google Scholar
84 | VL - 24
85 | IS - 1
86 | SP - 18
87 | EP - 30
88 | L2 - https://search.informit.org/doi/abs/10.3316/INFORMIT.859155913483693
89 | KW - ⛔ No DOI found
90 | ER -
91 |
92 | TY - JOUR
93 | TI - The role of loneliness, negative affectivity, mentalization, and alcohol use in adolescent gambling
94 | AU - Cosenza, Marina
95 | AU - Ciccarelli, Maria
96 | AU - Pizzini, Barbara
97 | AU - Griffiths, Mark David
98 | AU - Sacco, Mariagiulia
99 | AU - Nigro, Giovanna
100 | T2 - Mediterranean Journal of Clinical Psychology
101 | DA - 2022///
102 | PY - 2022
103 | DP - Google Scholar
104 | VL - 10
105 | IS - 3
106 | KW - ⛔ No DOI found
107 | ER -
108 |
109 | TY - JOUR
110 | TI - Loneliness and life dissatisfaction in gamblers
111 | AU - Porter, James
112 | AU - Ungar, Julia
113 | AU - Frisch, G. Ron
114 | AU - Chopra, Reena
115 | T2 - Journal of Gambling issues
116 | DA - 2004///
117 | PY - 2004
118 | DO - 10.4309/jgi.2004.11.13
119 | DP - Google Scholar
120 | IS - 11
121 | ER -
122 |
123 | TY - JOUR
124 | TI - Interaction of the loneliness phenomenon and gambling addiction
125 | AU - Akbieva, Zarema Soltamuradovna
126 | AU - Damadaeva, Angela Strgeevna
127 | AU - Magomedova, Sabirat Magomedovna
128 | AU - Ibragimova, Khava Shakhbanovna
129 | T2 - Research Journal of Pharmaceutical, Biological and Chemical Sciences
130 | DA - 2016///
131 | PY - 2016
132 | DP - Google Scholar
133 | VL - 7
134 | IS - 5
135 | SP - 1109
136 | EP - 1117
137 | L2 - https://elibrary.ru/item.asp?id=27579286
138 | KW - ⛔ No DOI found
139 | ER -
140 |
141 | TY - CONF
142 | TI - Young, stressed, and blind-minded: An experimental investigation of the routes from loneliness to adolescent problem gambling
143 | AU - Ciccarelli, Maria
144 | AU - Nigro, Giovanna
145 | AU - Pizzini, Barbara
146 | AU - Sacco, Mariagiulia
147 | AU - Griffiths, Mark
148 | AU - Cosenza, Marina
149 | C3 - JOURNAL OF BEHAVIORAL ADDICTIONS
150 | DA - 2022///
151 | PY - 2022
152 | DP - Google Scholar
153 | VL - 11
154 | SP - 281
155 | EP - 282
156 | PB - AKADEMIAI KIADO ZRT BUDAFOKI UT 187-189-A-3, H-1117 BUDAPEST, HUNGARY
157 | ST - Young, stressed, and blind-minded
158 | KW - ⛔ No DOI found
159 | ER -
160 |
161 | TY - CONF
162 | TI - The impact of social isolation and loneliness in a problem gambling population
163 | AU - Spence-Thomas, R.
164 | AU - Thomas, T.
165 | C3 - Australian Journal of Psychology
166 | DA - 2003///
167 | PY - 2003
168 | DP - Google Scholar
169 | VL - 55
170 | SP - 213
171 | EP - 213
172 | PB - AUSTRALIAN PSYCHOLOGICAL SOC 1 GRATTAN STREET, CARLTON, VICTORIA 3053, AUSTRALIA
173 | KW - ⛔ No DOI found
174 | ER -
175 |
176 | TY - JOUR
177 | TI - Dealing with the negative consequences of gambling addiction
178 | AU - Blanco Miguel, Pilar
179 | T2 - CUADERNOS DE TRABAJO SOCIAL
180 | DA - 2016///
181 | PY - 2016
182 | DO - 10.5209/cuts.48858
183 | DP - Google Scholar
184 | VL - 29
185 | IS - 2
186 | SP - 335
187 | EP - 344
188 | ER -
189 |
190 | TY - JOUR
191 | TI - Gambling alone? A study of solitary and social gambling in America
192 | AU - Bernhard, Bo J.
193 | AU - Dickens, David R.
194 | AU - Shapiro, Paul D.
195 | T2 - UNLV Gaming Research & Review Journal
196 | AB - In his acclaimed 2000 book Bowling Alone, Robert Putnam documents a disturbing social trend of the broadest kind. Putnam cites a wide variety of data that indicate that over the past fifty years, Americans have become increasingly socially disengaged. In developing this theme, Putnam specifically cites the increase in casino gambling (and especially machine gambling) as evidence in support of his argument. Building on the empirical and theoretical work of Putnam, this exploratory article examines the sub-phenomenon of 'gambling alone' by exploring sample survey data on solitary and social gambling behavior among adults who reside in Las Vegas, Nevada. Specifically, to further understand these phenomena, a number of demographic, attitudinal, and behavioral variables are examined for their explanatory power in predicting solitary vs. social gambling behavior. (PsycINFO Database Record (c) 2016 APA, all rights reserved)
197 | DA - 2007///
198 | PY - 2007
199 | DP - EBSCOhost
200 | VL - 11
201 | IS - 2
202 | SP - 1
203 | EP - 13
204 | J2 - UNLV Gaming Research & Review Journal
205 | SN - 1531-0930
206 | ST - Gambling alone?
207 | UR - https://search.ebscohost.com/login.aspx?direct=true&db=psyh&AN=2007-17475-001&site=ehost-live&scope=site
208 | AN - 2007-17475-001
209 | Y2 - 2023/01/23/21:42:58
210 | KW - ⛔ No DOI found
211 | KW - Gambling
212 | KW - gambling alone
213 | KW - Loneliness
214 | KW - Social Behavior
215 | KW - social gambling
216 | KW - solitary
217 | ER -
218 |
219 | TY - JOUR
220 | TI - The association between loneliness, social isolation and women's electronic gaming machine gambling
221 | AU - Trevorrow, Karen
222 | AU - Moore, Susan
223 | T2 - Journal of Gambling Studies
224 | AB - Examined the extent to which motivations for women gambling were boredom, loneliness, and isolation. Research was conducted with a comparative study of female electronic gaming machine (EGM) gamblers and non-gamblers, matched for age, education, and geographic location. Women EGM gamblers did not differ significantly from the non-gambling sample in terms of loneliness, friendship satisfaction, or adequacy of social networks. However, a sub-group classified by the South Oaks Gambling Screen as problem gamblers were significantly more lonely than the rest of the sample. These women were also more likely to be involved in social networks where gambling was normative. Limitation of the research and need for a longitudinal study are discussed. (PsycINFO Database Record (c) 2019 APA, all rights reserved)
225 | DA - 1998///
226 | PY - 1998
227 | DO - 10.1023/a:1022057609568
228 | DP - EBSCOhost
229 | VL - 14
230 | IS - 3
231 | SP - 263
232 | EP - 284
233 | J2 - Journal of Gambling Studies
234 | SN - 1050-5350
235 | AN - 1999-03840-004
236 | L2 - https://search.ebscohost.com/login.aspx?direct=true&db=psyh&AN=1999-03840-004&site=ehost-live&scope=site&scope=cite
237 | KW - (18–77 yrs old) women
238 | KW - Boredom
239 | KW - boredom vs loneliness vs isolation
240 | KW - Digital Gaming
241 | KW - Gambling
242 | KW - Loneliness
243 | KW - Motivation
244 | KW - motivation to gamble
245 | KW - Social Isolation
246 | ER -
247 |
248 |
--------------------------------------------------------------------------------
/vignettes/working_example_data/AGRIS.ris:
--------------------------------------------------------------------------------
1 | TY - JOUR
2 | AB - NA
3 | AU - Ahmad, S. S.
4 | AU - Husain, S. Z.
5 | KW - species
6 | drug plants
7 | traditional uses
8 | surveys
9 | pakistan
10 | saline soils
11 | traditional medicines
12 | ethnobotany
13 | N1 - (Fatima Jinnah Women Univ., Rawalpindi (Pakistan). Dept. of Environmental Sciences)
14 | PY - 2008
15 | ST - Ethno medicinal survey of plants from salt range (Kallar Kahar) of Pakistan
16 | TI - Ethno medicinal survey of plants from salt range (Kallar Kahar) of Pakistan
17 | VL - v. 40
18 | ID - 4202
19 | ER -
20 |
21 | TY - JOUR
22 | AB - NA
23 | AU - Awotide, Olawale D.
24 | AU - Kehinde, Adetunji L.
25 | AU - Agbola, Peter O.
26 | KW - equations
27 | family resource management
28 | livelihood
29 | rural sociology
30 | mathematical models
31 | regression analysis
32 | sociodemographic characteristics
33 | rural poverty
34 | low income households
35 | farm income
36 | sport hunting
37 | LA - English
38 | PY - 2010
39 | ST - Poverty and rural livelihood diversification among farming households in southwest Nigeria
40 | TI - Poverty and rural livelihood diversification among farming households in southwest Nigeria
41 | VL - v. 8
42 | ID - 4203
43 | ER -
44 |
45 | TY - JOUR
46 | AB - NA
47 | AU - da Silva Dias, João Carlos
48 | KW - landraces
49 | germplasm conservation
50 | food production
51 | breeders' rights
52 | genetic erosion
53 | cultivars
54 | plant breeding
55 | genetic improvement
56 | hybrids
57 | vegetables
58 | open pollination
59 | crop yield
60 | artificial selection
61 | genetic resistance
62 | disease resistance
63 | vegetable crops
64 | plant variety protection
65 | food security
66 | poverty
67 | LA - English
68 | PY - 2010
69 | ST - Impact of improved vegetable cultivars in overcoming food insecurity
70 | TI - Impact of improved vegetable cultivars in overcoming food insecurity
71 | VL - v. v. 176
72 | ID - 4204
73 | ER -
74 |
75 | TY - JOUR
76 | AB - NA
77 | AU - Dahmardeh, Majid
78 | AU - Dahmardeh, Mehdi
79 | AU - Yazdani, Saeed
80 | AU - Piri, Eissa
81 | KW - wetlands
82 | water resources
83 | water supply
84 | lakes
85 | socioeconomic status
86 | LA - English
87 | PY - 2009
88 | ST - socio-economic effects of Hamoon Lake in Sistan region of Iran
89 | TI - socio-economic effects of Hamoon Lake in Sistan region of Iran
90 | VL - v. 7
91 | ID - 4205
92 | ER -
93 |
94 | TY - JOUR
95 | AB - NA
96 | AU - Guei, Robert G.
97 | AU - Barra, Abdoulaye
98 | AU - Silue, Drissa
99 | KW - financial institutions
100 | Food and Agriculture Organization
101 | funding
102 | seed quality
103 | business enterprises
104 | plant cultural practices
105 | corn
106 | farmers
107 | rice
108 | farm income
109 | crop production
110 | millets
111 | seed certification
112 | yields
113 | food security
114 | poverty
115 | grain sorghum
116 | LA - English
117 | PY - 2011
118 | ST - Promoting smallholder seed enterprises: quality seed production of rice, maize, sorghum and millet in northern Cameroon
119 | TI - Promoting smallholder seed enterprises: quality seed production of rice, maize, sorghum and millet in northern Cameroon
120 | VL - v. 9
121 | ID - 4206
122 | ER -
123 |
124 | TY - JOUR
125 | AB - NA
126 | AU - Leisher, C.
127 | AU - Nature Conservancy, Carlton Victoria Australia Program eng
128 | AU - Van Beukering, P.
129 | AU - Scherl, L. M.
130 | KW - PROTECTED AREAS
131 | MARINE AREAS
132 | AUTONOMISATION
133 | COMMUNITY INVOLVEMENT
134 | ZONE PROTEGEE
135 | DEVELOPPEMENT DE LA COMMUNAUTE
136 | PARTICIPACION COMUNITARIA
137 | DEVELOPPEMENT RURAL
138 | CONSERVACION DE LA NATURALEZA
139 | ETUDE DE CAS
140 | CASE STUDIES
141 | DEVELOPMENT PROJECTS
142 | ZONAS PROTEGIDAS
143 | RURAL DEVELOPMENT
144 | EMPOWERMENT
145 | HOGARES
146 | NATURE CONSERVATION
147 | households
148 | AUTONOMIZACION
149 | DESARROLLO DE LA COMUNIDAD
150 | PROJET DE DEVELOPPEMENT
151 | POBREZA
152 | POVERTY
153 | PROYECTOS DE DESARROLLO
154 | ESTUDIOS DE CASOS PRACTICOS
155 | DESARROLLO RURAL
156 | PAUVRETE
157 | REGION MARINE
158 | PARTICIPATION COMMUNAUTAIRE
159 | COMMUNITY DEVELOPMENT
160 | MENAGE
161 | ZONAS MARINAS
162 | CONSERVATION DE LA NATURE
163 | N1 - (Australia)
164 | PY - 2007
165 | ST - Nature's investment bank: how marine protected areas contributed to poverty reduction
166 | TI - Nature's investment bank: how marine protected areas contributed to poverty reduction
167 | ID - 4207
168 | ER -
169 |
170 | TY - JOUR
171 | AB - NA
172 | AU - Leisher, C.
173 | AU - Nature Conservancy, Carlton Victoria Australia Program eng
174 | AU - Van Beukering, P.
175 | AU - Scherl, L. M.
176 | KW - PROTECTED AREAS
177 | MARINE AREAS
178 | AUTONOMISATION
179 | COMMUNITY INVOLVEMENT
180 | ZONE PROTEGEE
181 | DEVELOPPEMENT DE LA COMMUNAUTE
182 | PARTICIPACION COMUNITARIA
183 | DEVELOPPEMENT RURAL
184 | CONSERVACION DE LA NATURALEZA
185 | ETUDE DE CAS
186 | CASE STUDIES
187 | DEVELOPMENT PROJECTS
188 | ZONAS PROTEGIDAS
189 | RURAL DEVELOPMENT
190 | EMPOWERMENT
191 | HOGARES
192 | NATURE CONSERVATION
193 | households
194 | AUTONOMIZACION
195 | DESARROLLO DE LA COMUNIDAD
196 | PROJET DE DEVELOPPEMENT
197 | POBREZA
198 | POVERTY
199 | PROYECTOS DE DESARROLLO
200 | ESTUDIOS DE CASOS PRACTICOS
201 | DESARROLLO RURAL
202 | PAUVRETE
203 | REGION MARINE
204 | PARTICIPATION COMMUNAUTAIRE
205 | COMMUNITY DEVELOPMENT
206 | MENAGE
207 | ZONAS MARINAS
208 | CONSERVATION DE LA NATURE
209 | N1 - (Australia)
210 | PY - 2007
211 | ST - Nature's investment bank: how marine protected areas contributed to poverty reduction
212 | TI - Nature's investment bank: how marine protected areas contributed to poverty reduction
213 | ID - 4208
214 | ER -
215 |
216 | TY - JOUR
217 | AB - NA
218 | AU - Leisher, C.
219 | AU - Nature Conservancy, Carlton Victoria Australia Program eng
220 | AU - Van Beukering, P.
221 | AU - Scherl, L. M.
222 | KW - PROTECTED AREAS
223 | MARINE AREAS
224 | AUTONOMISATION
225 | COMMUNITY INVOLVEMENT
226 | ZONE PROTEGEE
227 | DEVELOPPEMENT DE LA COMMUNAUTE
228 | PARTICIPACION COMUNITARIA
229 | DEVELOPPEMENT RURAL
230 | CONSERVACION DE LA NATURALEZA
231 | ETUDE DE CAS
232 | CASE STUDIES
233 | DEVELOPMENT PROJECTS
234 | ZONAS PROTEGIDAS
235 | RURAL DEVELOPMENT
236 | EMPOWERMENT
237 | HOGARES
238 | NATURE CONSERVATION
239 | households
240 | AUTONOMIZACION
241 | DESARROLLO DE LA COMUNIDAD
242 | PROJET DE DEVELOPPEMENT
243 | POBREZA
244 | POVERTY
245 | PROYECTOS DE DESARROLLO
246 | ESTUDIOS DE CASOS PRACTICOS
247 | DESARROLLO RURAL
248 | PAUVRETE
249 | REGION MARINE
250 | PARTICIPATION COMMUNAUTAIRE
251 | COMMUNITY DEVELOPMENT
252 | MENAGE
253 | ZONAS MARINAS
254 | CONSERVATION DE LA NATURE
255 | N1 - (Australia)
256 | PY - 2007
257 | ST - Nature's investment bank: how marine protected areas contributed to poverty reduction
258 | TI - Nature's investment bank: how marine protected areas contributed to poverty reduction
259 | ID - 4209
260 | ER -
261 |
262 | TY - JOUR
263 | AB - NA
264 | AU - Meinzen-Dick, Ruth S.
265 | AU - Devaux, Andre
266 | AU - Antezana, Ivonne
267 | KW - social capital
268 | agricultural policy
269 | potatoes
270 | development aid
271 | economic development
272 | development policy
273 | market access
274 | assets
275 | vegetable crops
276 | sustainable livelihoods framework
277 | poverty
278 | indigenous knowledge
279 | market power
280 | food processing
281 | development projects
282 | supply chain
283 | Solanum tuberosum
284 | collective action
285 | genetic variation
286 | cultivars
287 | sustainable development
288 | food marketing
289 | humans
290 | poverty reduction
291 | small-scale farming
292 | LA - English
293 | PY - 2009
294 | ST - Underground assets: potato biodiversity to improve the livelihoods of the poor
295 | TI - Underground assets: potato biodiversity to improve the livelihoods of the poor
296 | VL - v. 7 issue 4
297 | ID - 4210
298 | ER -
299 |
300 | TY - JOUR
301 | AB - NA
302 | AU - Quang, N. V.
303 | AU - Sato, N.
304 | KW - income
305 | land diversion
306 | utilizacion de la tierra
307 | revenu
308 | retrait des terres
309 | foret
310 | utilisation des terres
311 | vietnam
312 | forests
313 | bosques
314 | land use
315 | desviacion del uso de la tierra
316 | viet nam
317 | comunidades
318 | renta
319 | communities
320 | LA - English
321 | N1 - (Kyushu Univ., Fukuoka (Japan). Faculty of Agriculture)
322 | PY - 2008
323 | ST - The role of forest in people's livelihood: A case study in North-eastern Vietnam
324 | TI - The role of forest in people's livelihood: A case study in North-eastern Vietnam
325 | VL - v. 53
326 | ID - 4211
327 | ER -
328 |
329 | TY - JOUR
330 | AB - NA
331 | AU - Rana, M. P.
332 | AU - Sohel, M. S. I.
333 | AU - Akhter, S.
334 | AU - Alam, M. S.
335 | KW - wetlands
336 | rural communities
337 | habitats
338 | land use
339 | households
340 | occupations
341 | bangladesh
342 | N1 - (Shahjalal Univ. of Science and Technology, Sylhet (Bangladesh). Dept. of Forestry and Environmental Science)
343 | (Bangladesh Agricultural Univ. (Bangladesh). Dept. of Fisheries Management)
344 | PY - 2010
345 | ST - Haor based livelihood dependency of a rural community: A study on hakaluki haor in Bangladesh
346 | TI - Haor based livelihood dependency of a rural community: A study on hakaluki haor in Bangladesh
347 | VL - v. 47
348 | ID - 4212
349 | ER -
350 |
351 | TY - JOUR
352 | AB - NA
353 | AU - Vianna, André Luiz Menezes
354 | AU - Fearnside, Philip Martin
355 | KW - sustainable forestry
356 | biomass
357 | carbon
358 | logging
359 | natural regeneration
360 | forest management
361 | trees
362 | managers
363 | carbon sinks
364 | LA - English
365 | PY - 2014
366 | ST - Impact of Community Forest Management on Biomass Carbon Stocks in the Uatumã Sustainable Development Reserve, Amazonas, Brazil
367 | TI - Impact of Community Forest Management on Biomass Carbon Stocks in the Uatumã Sustainable Development Reserve, Amazonas, Brazil
368 | VL - v. 33
369 | ID - 4213
370 | ER -
371 |
372 |
--------------------------------------------------------------------------------