├── .github └── workflows │ ├── documentation.yml │ ├── package_release.yml │ ├── pull_request.yml │ ├── pypi.yml │ └── tests.yml ├── .gitignore ├── CHANGELOG.rst ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile ├── api.rst ├── changelog.rst ├── conf.py ├── guide.rst ├── index.rst ├── jsonschema.rst ├── license.rst ├── make.bat ├── reporting.rst ├── requirements.txt ├── todo.rst ├── tokenization.rst └── tutorial.rst ├── example_configs ├── many_projects_and_authors_project_and_summary_reports_no_duplicates__full_example.json ├── many_projects_and_authors_with_summary_reports__full_example.json ├── many_projects_and_authors_with_summary_reports__template.json └── many_projects_and_authors_with_summary_reports_no_duplicates__template.json ├── pyproject.toml ├── requirements.txt ├── src └── academic_tracker │ ├── __init__.py │ ├── __main__.py │ ├── athr_srch_emails_and_reports.py │ ├── athr_srch_modularized.py │ ├── athr_srch_webio.py │ ├── citation_parsing.py │ ├── emails_and_reports_helpers.py │ ├── fileio.py │ ├── helper_functions.py │ ├── ref_srch_emails_and_reports.py │ ├── ref_srch_modularized.py │ ├── ref_srch_webio.py │ ├── tracker_schema.py │ ├── user_input_checking.py │ └── webio.py └── tests ├── conftest.py ├── fixtures.py ├── pytest.ini ├── regen_intermediate_files_athr.py ├── regen_intermediate_files_ref.py ├── test_CLI.py ├── test_athr_srch_emails_and_reports.py ├── test_athr_srch_modularized.py ├── test_athr_srch_webio_no_internet.py ├── test_citation_parsing.py ├── test_emails_and_reports_helpers.py ├── test_fileio.py ├── test_helper_functions.py ├── test_main.py ├── test_ref_srch_emails_and_reports.py ├── test_ref_srch_modularized.py ├── test_ref_srch_webio_no_internet.py ├── test_user_input_checking.py ├── test_webio_no_internet.py └── testing_files ├── .gitignore ├── Crossref_DOI_query.json ├── Crossref_grant_query.json ├── Crossref_merge.json ├── Crossref_misc.json ├── Crossref_pub_dict.json ├── Crossref_query.json ├── Google_Scholar_merge.json ├── Google_Scholar_misc.json ├── ORCID_author_search_query.json ├── ORCID_merge.json ├── ORCID_misc.json ├── ORCID_pub_dict.json ├── ORCID_query.json ├── PMID_reference.docx ├── PMID_reference.json ├── PMID_reference.txt ├── PubMed_merge.json ├── PubMed_modified_to_save_no_PMCID.json ├── PubMed_modified_to_save_with_PMCID.json ├── PubMed_rare_cases.json ├── add_authors.csv ├── add_authors_missing_all_name_columns.csv ├── add_authors_missing_all_names.csv ├── add_authors_missing_collective_name.csv ├── add_authors_missing_column.csv ├── add_authors_missing_first_and_last_names.csv ├── add_authors_missing_first_name_column.csv ├── add_authors_missing_last_name_column.csv ├── add_authors_missing_value.csv ├── all_queries.json ├── all_queries_ref.json ├── athr_project_emails.json ├── athr_project_emails_tabular.json ├── athr_srch_build_author_loop.txt ├── athr_srch_build_loop_template_string.txt ├── athr_srch_summary_report.txt ├── athr_srch_summary_report_custom_template.txt ├── authors.json ├── authors_by_project_dict_tabular.json ├── authors_by_project_dict_truncated.json ├── collaborator_emails.json ├── collaborator_emails_tabular.json ├── collaborator_report2.txt ├── collective_author_XML.xml ├── config.json ├── config_Hunter_only.json ├── config_tabular.json ├── config_truncated.json ├── config_truncated_authors_adjusted.json ├── config_truncated_noCrossref.json ├── config_truncated_noORCID.json ├── config_truncated_noPubMed.json ├── config_truncated_ref_srch_summary_report.json ├── emails.json ├── empty_file.txt ├── gen_reports_ref_summary_report.txt ├── has_author.xml ├── has_pubmed_grants.xml ├── intermediate_results ├── author_search │ ├── all │ │ ├── publication_dict.json │ │ ├── running_pubs1.json │ │ ├── running_pubs2.json │ │ ├── running_pubs3.json │ │ ├── running_pubs4.json │ │ ├── running_pubs5.json │ │ ├── running_pubs6.json │ │ ├── running_pubs7.json │ │ └── running_pubs8.json │ ├── no_Crossref │ │ ├── publication_dict.json │ │ ├── running_pubs1.json │ │ ├── running_pubs2.json │ │ ├── running_pubs3.json │ │ ├── running_pubs4.json │ │ ├── running_pubs5.json │ │ └── running_pubs6.json │ ├── no_Google_Scholar │ │ ├── publication_dict.json │ │ ├── running_pubs1.json │ │ ├── running_pubs2.json │ │ ├── running_pubs3.json │ │ ├── running_pubs4.json │ │ ├── running_pubs5.json │ │ └── running_pubs6.json │ ├── no_ORCID │ │ ├── publication_dict.json │ │ ├── running_pubs1.json │ │ ├── running_pubs2.json │ │ ├── running_pubs3.json │ │ ├── running_pubs4.json │ │ ├── running_pubs5.json │ │ └── running_pubs6.json │ └── no_PubMed │ │ ├── publication_dict.json │ │ ├── running_pubs1.json │ │ ├── running_pubs2.json │ │ ├── running_pubs3.json │ │ ├── running_pubs4.json │ │ ├── running_pubs5.json │ │ └── running_pubs6.json └── ref_search │ ├── all │ ├── matching_key_for_citation1.json │ ├── matching_key_for_citation2.json │ ├── matching_key_for_citation3.json │ ├── matching_key_for_citation4.json │ ├── publication_dict.json │ ├── running_pubs1.json │ ├── running_pubs2.json │ ├── running_pubs3.json │ ├── running_pubs4.json │ └── tokenized_reference.json │ ├── no_Crossref │ ├── matching_key_for_citation1.json │ ├── matching_key_for_citation2.json │ ├── publication_dict.json │ ├── running_pubs1.json │ ├── running_pubs2.json │ └── tokenized_reference.json │ └── no_PubMed │ ├── matching_key_for_citation1.json │ ├── matching_key_for_citation2.json │ ├── publication_dict.json │ ├── running_pubs1.json │ ├── running_pubs2.json │ └── tokenized_reference.json ├── medline.txt ├── modified_PubMed_XML.xml ├── myncbi_webpages.json ├── no_author.xml ├── nsf_award_page.txt ├── parse_citations_test.txt ├── project_report.txt ├── pub_dict_from_PMID.json ├── pub_with_PMCID.xml ├── publication_dict.json ├── publication_dict_truncated.json ├── pubs_by_author_dict.json ├── pubs_by_author_dict_truncated.json ├── pymed_pubs.pkl ├── ref_srch_Crossref_keys_for_citations.json ├── ref_srch_Crossref_pub_dict.json ├── ref_srch_Crossref_queries.json ├── ref_srch_PubMed_pubs.json ├── ref_srch_gen_reports_test_pub_dict.json ├── ref_srch_keys_for_citations.json ├── ref_srch_keys_for_citations_Crossref_duplicate_citation.json ├── ref_srch_keys_for_citations_Crossref_merge.json ├── ref_srch_keys_for_citations_PubMed_duplicate_citation.json ├── ref_srch_keys_for_citations_PubMed_merge.json ├── ref_srch_publication_dict.json ├── ref_srch_publication_dict_Crossref_duplicate_citation.json ├── ref_srch_publication_dict_Crossref_merge.json ├── ref_srch_publication_dict_Crossref_title_match.json ├── ref_srch_publication_dict_PubMed_duplicate_citation.json ├── ref_srch_publication_dict_PubMed_merge.json ├── ref_srch_publication_dict_PubMed_title_match.json ├── ref_srch_report_default.txt ├── ref_srch_report_tabular1.csv ├── ref_srch_report_tabular2.csv ├── ref_srch_report_tabular3.csv ├── ref_srch_report_tabular4.xlsx ├── ref_srch_report_template_string.txt ├── ref_srch_report_test1.txt ├── ref_srch_report_test2.txt ├── reference_test.docx ├── reference_test.txt ├── scholarly_DOIs.json ├── scholarly_author_query.json ├── scholarly_pub_dict.json ├── scholarly_pubs.json ├── scholarly_query.json ├── solo_Crossref.json ├── solo_Google_Scholar.json ├── testing_csv.csv ├── testing_docx.docx ├── testing_text.txt ├── tokenization_report.txt ├── tokenized_MEDLINE.json ├── tokenized_MEDLINE2.json ├── tokenized_citations.json ├── tokenized_citations_duplicates_removed.json ├── tokenized_citations_for_report_test.json ├── tokenized_citations_for_report_test2.json ├── tokenized_citations_for_report_test_empty.json ├── tokenized_citations_missing_ref_line.json ├── tokenized_myncbi_page1.json ├── tokenized_nsf_award_page.json ├── tokenized_parsing_test.json └── tokenized_ref_test.json /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- 1 | name: Publish Documentation 2 | 3 | on: 4 | workflow_call: 5 | 6 | jobs: 7 | publish-documentation: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - uses: actions/setup-python@v5 12 | with: 13 | python-version: '3.x' 14 | - name: Upgrade pip, install package, install requirements, build docs 15 | run: | 16 | pip install --upgrade pip 17 | pip install . 18 | if [ -f ./docs/requirements.txt ]; then pip install -r ./docs/requirements.txt; fi 19 | pip install sphinx 20 | sphinx-build docs ./docs/_build/html/ 21 | # Create an artifact of the html output. 22 | - uses: actions/upload-artifact@v1 23 | with: 24 | name: DocumentationHTML 25 | path: docs/_build/html/ 26 | # Publish built docs to gh-pages branch. 27 | # =============================== 28 | - name: Commit documentation changes 29 | env: 30 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 31 | run: | 32 | git config --global user.name "${GITHUB_ACTOR}" 33 | git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com" 34 | git clone "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" --branch gh-pages --single-branch gh-pages 35 | cp -r docs/_build/html/* gh-pages/ 36 | cd gh-pages 37 | touch .nojekyll 38 | git add . 39 | git commit -m "Update documentation." -a || true 40 | # The above command will fail if no changes were present, so we ignore 41 | # that. 42 | - name: Push changes 43 | uses: ad-m/github-push-action@master 44 | with: 45 | branch: gh-pages 46 | directory: gh-pages 47 | github_token: ${{ secrets.GITHUB_TOKEN }} 48 | # =============================== -------------------------------------------------------------------------------- /.github/workflows/package_release.yml: -------------------------------------------------------------------------------- 1 | name: Package and Documentation Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | release-version: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - id: parse-version 12 | name: Parse release version 13 | run: | 14 | echo "version=${RELEASE_VERSION/v/}" >> "$GITHUB_OUTPUT" 15 | env: 16 | RELEASE_VERSION: ${{ github.event.release.tag_name }} 17 | outputs: 18 | version: ${{ steps.parse-version.outputs.version }} 19 | publish-test-pypi: 20 | uses: ./.github/workflows/pypi.yml 21 | with: 22 | repository_url: https://test.pypi.org/legacy/ 23 | secrets: 24 | API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} 25 | test-test-pypi: 26 | needs: [release-version, publish-test-pypi] 27 | uses: ./.github/workflows/tests.yml 28 | with: 29 | install_command: "python3 -m pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple academic_tracker==${{ needs.release-version.outputs.version }}" 30 | publish-pypi: 31 | needs: test-test-pypi 32 | uses: ./.github/workflows/pypi.yml 33 | with: 34 | repository_url: https://upload.pypi.org/legacy/ 35 | secrets: 36 | API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} 37 | test-pypi: 38 | needs: [release-version, publish-pypi] 39 | uses: ./.github/workflows/tests.yml 40 | with: 41 | install_command: "python3 -m pip install academic_tracker==${{ needs.release-version.outputs.version }}" 42 | publish-documentation: 43 | needs: test-pypi 44 | uses: ./.github/workflows/documentation.yml -------------------------------------------------------------------------------- /.github/workflows/pull_request.yml: -------------------------------------------------------------------------------- 1 | name: Pull request 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | workflow_dispatch: 11 | 12 | jobs: 13 | pull-request: 14 | uses: ./.github/workflows/tests.yml 15 | with: 16 | install_command: "python3 -m pip install -e ." -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish package 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | repository_url: 7 | description: The URL of the PyPi distribution 8 | required: true 9 | type: string 10 | secrets: 11 | API_TOKEN: 12 | required: true 13 | 14 | jobs: 15 | publish-package: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: actions/setup-python@v5 20 | with: 21 | python-version: '3.x' 22 | - name: Install dependencies 23 | run: | 24 | python3 -m pip install --upgrade pip 25 | python3 -m pip install build 26 | - name: Build package 27 | run: python3 -m build 28 | - name: Publish package to a PyPi distribution 29 | uses: pypa/gh-action-pypi-publish@release/v1 30 | with: 31 | password: ${{ secrets.API_TOKEN }} 32 | repository-url: ${{ inputs.repository_url }} -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Run Tests 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | install_command: 7 | description: The command for installing the package to test. 8 | required: true 9 | type: string 10 | 11 | jobs: 12 | run-tests: 13 | strategy: 14 | matrix: 15 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 16 | os: [ ubuntu-latest, windows-latest, macOS-latest ] 17 | runs-on: ${{matrix.os}} 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install testing environment 25 | run: | 26 | python3 -m pip install --upgrade pip 27 | python3 -m pip install pytest pytest-mock pytest-cov 28 | - name: Install package 29 | uses: Wandalen/wretry.action@master 30 | with: 31 | command: ${{ inputs.install_command }} 32 | attempt_limit: 10 33 | attempt_delay: 10000 34 | - name: Run tests on package 35 | run: pytest --cov-branch --cov-report=term-missing --cov=academic_tracker tests/ 36 | # - name: Debug with tmate on failure 37 | # if: ${{ failure() }} 38 | # uses: mxschmitt/action-tmate@v3 39 | # with: 40 | # limit-access-to-actor: true -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | docs/_build 3 | venv/* 4 | data/* 5 | .DS_Store 6 | .ipynb_checkpoints 7 | .idea 8 | *.egg-info 9 | .pytest_cache 10 | build/ 11 | dist/ 12 | coverage.xml 13 | .coverage 14 | htmlcov/ 15 | README_old.rst 16 | testing_scratch/ 17 | tests/testing_files/new_intermediate_results/ 18 | src/academic_tracker/_version.py 19 | setup_old.py 20 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Release History 2 | =============== 3 | 4 | 5 | 1.0.0 (2021-11-12) 6 | ~~~~~~~~~~~~~~~~~~ 7 | 8 | - Initial public release. -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.0.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Thompson" 5 | given-names: "Travis" 6 | orcid: "https://orcid.org/0000-0002-8198-1327" 7 | - family-names: "Powell" 8 | given-names: "Christian" 9 | orcid: "https://orcid.org/0000-0002-4242-080X" 10 | - family-names: "Moseley" 11 | given-names: "Hunter" 12 | orcid: "https://orcid.org/0000-0003-3995-5368" 13 | title: "academic_tracker" 14 | version: 1.0.3 15 | date-released: 2022-03-28 16 | url: "https://github.com/MoseleyBioinformaticsLab/academic_tracker" 17 | preferred-citation: 18 | type: article 19 | authors: 20 | - family-names: "Thompson" 21 | given-names: "Travis" 22 | orcid: "https://orcid.org/0000-0002-8198-1327" 23 | - family-names: "Powell" 24 | given-names: "Christian" 25 | orcid: "https://orcid.org/0000-0002-4242-080X" 26 | - family-names: "Moseley" 27 | given-names: "Hunter" 28 | orcid: "https://orcid.org/0000-0003-3995-5368" 29 | doi: "10.1371/journal.pone.0277834" 30 | journal: "Plos one" 31 | month: 11 32 | title: "Academic Tracker: Software for tracking and reporting publications associated with authors and grants" 33 | number: 11 34 | volume: 17 35 | year: 2022 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The Clear BSD License with Extra Clause 2 | 3 | Copyright (c) 2022, Travis Thompson, Hunter N.B. Moseley 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, 7 | are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * All advertising materials mentioning features or use of this software must 17 | display the following acknowledgement: This product includes software 18 | developed by the copyright holder. 19 | 20 | * Neither the name of the copyright holder nor the names of its 21 | contributors may be used to endorse or promote products derived from this 22 | software without specific prior written permission. 23 | 24 | * If the source code is used in a published work, then proper citation of 25 | the source code must be included with the published work. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND 28 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 29 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 30 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE 31 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 33 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 34 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 35 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst LICENSE CHANGELOG.rst 2 | include requirements.txt 3 | include docs/Makefile 4 | recursive-include docs *.rst *.txt *.py *.png *.svg 5 | recursive-include src/academic_tracker *.py *.pyx *.c -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. automodule:: academic_tracker 5 | 6 | 7 | .. automodule:: academic_tracker.user_input_checking 8 | :members: 9 | 10 | .. automodule:: academic_tracker.athr_srch_modularized 11 | :members: 12 | .. automodule:: academic_tracker.athr_srch_webio 13 | :members: 14 | .. automodule:: academic_tracker.athr_srch_emails_and_reports 15 | :members: 16 | 17 | 18 | .. automodule:: academic_tracker.ref_srch_modularized 19 | :members: 20 | .. automodule:: academic_tracker.ref_srch_webio 21 | :members: 22 | .. automodule:: academic_tracker.ref_srch_emails_and_reports 23 | :members: 24 | 25 | .. automodule:: academic_tracker.citation_parsing 26 | :members: 27 | 28 | .. automodule:: academic_tracker.fileio 29 | :members: 30 | .. automodule:: academic_tracker.helper_functions 31 | :members: 32 | .. automodule:: academic_tracker.webio 33 | :members: 34 | .. automodule:: academic_tracker.emails_and_reports_helpers 35 | :members: 36 | 37 | 38 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | Change Log 2 | ========== 3 | 4 | Version 2.0.0 5 | ~~~~~~~~~~~~~ 6 | 7 | Changes 8 | ------- 9 | In the 1.0.0 version each source was queried in a certain order and if later sources found the 10 | same publicaiton as a previous one it was simply ignored. Now a best attempt is made to try and 11 | merge information from the previous source with information from later sources. An additional 12 | "queried_sources" attribute was added to the publication object created for each publication to 13 | indicate all of the sources where the publication was found. It is a list field, and each source 14 | is appended to it as it is found. 15 | 16 | Enhancements 17 | ------------ 18 | A "references" attribute was added to the publication object for each publication and the references 19 | for the publication will appear there if available. It is a list of objects that have the attributes 20 | "citation", "title", "PMID", "PMCID", and "DOI". Fields that can't be determined will have a null value. 21 | 22 | More information is able to be obtained from PubMed, such as DOI author affiliations, and author ORCIDs. 23 | 24 | Collective authors can now be specified and are handled appropriately when present on information from 25 | queried sources. 26 | 27 | All new publication attributes were added to the reporting and the documentation updated. 28 | 29 | The raw queries from each source can now be saved using the --save-all-queries option. An "all_results.json" 30 | file will be saved in the output if the option is given. 31 | 32 | The --keep-duplicates option was added to reference_search. This allows the user to force the search 33 | not to drop what it deems as duplicates. The default is that they are still dropped automatically, but 34 | this option allows for an override when the program thinks, incorrectly, that 2 references are the same. 35 | 36 | Bug Fixes 37 | --------- 38 | Crossref publication dates will now have day and month when available. A bug made it so only the year 39 | was captured even if month and day were available. 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | sys.path.insert(0, os.path.abspath('../src')) 19 | 20 | from academic_tracker import __version__ 21 | 22 | 23 | # -- Project information ----------------------------------------------------- 24 | 25 | project = 'Academic Tracker' 26 | copyright = '2021, Travis Thompson' 27 | author = 'Travis Thompson' 28 | 29 | # The short X.Y version 30 | version = __version__ 31 | # The full version, including alpha/beta/rc tags 32 | release = __version__ 33 | 34 | 35 | # -- General configuration --------------------------------------------------- 36 | 37 | # If your documentation needs a minimal Sphinx version, state it here. 38 | # 39 | # needs_sphinx = '1.0' 40 | 41 | # Add any Sphinx extension module names here, as strings. They can be 42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 43 | # ones. 44 | extensions = [ 45 | 'sphinx.ext.autodoc', 46 | 'sphinx.ext.doctest', 47 | 'sphinx.ext.intersphinx', 48 | 'sphinx.ext.todo', 49 | 'sphinx.ext.coverage', 50 | 'sphinx.ext.viewcode', 51 | 'sphinx.ext.githubpages', 52 | 'sphinx.ext.napoleon', 53 | # 'sphinxcontrib.fulltoc' 54 | ] 55 | 56 | # Add any paths that contain templates here, relative to this directory. 57 | templates_path = ['_templates'] 58 | 59 | # The suffix(es) of source filenames. 60 | # You can specify multiple suffix as a list of string: 61 | # 62 | # source_suffix = ['.rst', '.md'] 63 | source_suffix = '.rst' 64 | 65 | # The master toctree document. 66 | master_doc = 'index' 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = "en" 74 | 75 | # List of patterns, relative to source directory, that match files and 76 | # directories to ignore when looking for source files. 77 | # This pattern also affects html_static_path and html_extra_path. 78 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 79 | 80 | # The name of the Pygments (syntax highlighting) style to use. 81 | pygments_style = "sphinx" 82 | 83 | 84 | # -- Options for HTML output ------------------------------------------------- 85 | 86 | # The theme to use for HTML and HTML Help pages. See the documentation for 87 | # a list of builtin themes. 88 | # 89 | #html_theme = 'alabaster' 90 | html_theme = 'sphinx_rtd_theme' 91 | 92 | # Theme options are theme-specific and customize the look and feel of a theme 93 | # further. For a list of options available for each theme, see the 94 | # documentation. 95 | # 96 | # html_theme_options = {} 97 | 98 | # Add any paths that contain custom static files (such as style sheets) here, 99 | # relative to this directory. They are copied after the builtin static files, 100 | # so a file named "default.css" will overwrite the builtin "default.css". 101 | html_static_path = ['_static'] 102 | 103 | # Custom sidebar templates, must be a dictionary that maps document names 104 | # to template names. 105 | # 106 | # The default sidebars (for documents that don't match any pattern) are 107 | # defined by theme itself. Builtin themes are using these templates by 108 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 109 | # 'searchbox.html']``. 110 | # 111 | # html_sidebars = {} 112 | 113 | 114 | # -- Options for HTMLHelp output --------------------------------------------- 115 | 116 | # Output file base name for HTML help builder. 117 | htmlhelp_basename = 'AcademicTrackerdoc' 118 | 119 | 120 | # -- Options for LaTeX output ------------------------------------------------ 121 | 122 | latex_elements = { 123 | # The paper size ('letterpaper' or 'a4paper'). 124 | # 125 | # 'papersize': 'letterpaper', 126 | 127 | # The font size ('10pt', '11pt' or '12pt'). 128 | # 129 | # 'pointsize': '10pt', 130 | 131 | # Additional stuff for the LaTeX preamble. 132 | # 133 | # 'preamble': '', 134 | 135 | # Latex figure (float) alignment 136 | # 137 | # 'figure_align': 'htbp', 138 | } 139 | 140 | # Grouping the document tree into LaTeX files. List of tuples 141 | # (source start file, target name, title, 142 | # author, documentclass [howto, manual, or own class]). 143 | latex_documents = [ 144 | (master_doc, 'AcademicTracker.tex', 'Academic Tracker Documentation', 145 | 'Travis Thompson', 'manual'), 146 | ] 147 | 148 | 149 | # -- Options for manual page output ------------------------------------------ 150 | 151 | # One entry per manual page. List of tuples 152 | # (source start file, name, description, authors, manual section). 153 | man_pages = [ 154 | (master_doc, 'academictracker', 'Academic Tracker Documentation', 155 | [author], 1) 156 | ] 157 | 158 | 159 | # -- Options for Texinfo output ---------------------------------------------- 160 | 161 | # Grouping the document tree into Texinfo files. List of tuples 162 | # (source start file, target name, title, author, 163 | # dir menu entry, description, category) 164 | texinfo_documents = [ 165 | (master_doc, 'AcademicTracker', 'Academic Tracker Documentation', 166 | author, 'AcademicTracker', 'One line description of project.', 167 | 'Miscellaneous'), 168 | ] 169 | 170 | 171 | # -- Options for Epub output ------------------------------------------------- 172 | 173 | # Bibliographic Dublin Core info. 174 | epub_title = project 175 | epub_author = author 176 | epub_publisher = author 177 | epub_copyright = copyright 178 | 179 | # The unique identifier of the text. This can be a ISBN number 180 | # or the project homepage. 181 | # 182 | # epub_identifier = '' 183 | 184 | # A unique identification for the text. 185 | # 186 | # epub_uid = '' 187 | 188 | # A list of files that should not be packed into the epub file. 189 | epub_exclude_files = ['search.html'] 190 | 191 | 192 | # -- Extension configuration ------------------------------------------------- 193 | 194 | # -- Options for intersphinx extension --------------------------------------- 195 | 196 | # Example configuration for intersphinx: refer to the Python standard library. 197 | intersphinx_mapping = {'https://docs.python.org/': None} 198 | 199 | # -- Options for todo extension ---------------------------------------------- 200 | 201 | # If true, `todo` and `todoList` produce output, else they produce nothing. 202 | todo_include_todos = True 203 | 204 | 205 | 206 | # def process_main_docstring(app, what, name, obj, options, lines): 207 | # if what == 'module' and "__main__" in name: 208 | # for i in range(len(lines)): 209 | # lines[i] = " " + lines[i] 210 | # lines.insert(0, "") 211 | # lines.insert(0, ".. code-block:: console") 212 | 213 | # def setup(app): 214 | # app.connect('autodoc-process-docstring', process_main_docstring) 215 | 216 | 217 | rinoh_documents = [ 218 | dict(doc='index', target='AcademicTracker', template='article') 219 | ] -------------------------------------------------------------------------------- /docs/guide.rst: -------------------------------------------------------------------------------- 1 | User Guide 2 | ========== 3 | 4 | Description 5 | ~~~~~~~~~~~ 6 | 7 | Academic Tracker was created to automate the process of making sure that federally 8 | funded publications get listed on PubMed and that the grant funding source for 9 | them is cited. 10 | 11 | Academic Tracker searches PubMed, ORCID, Crossref, and Google Scholar to look 12 | for publications. The 2 main use cases allows users to search by author names or 13 | a publication citation/reference. The output is customizable by the user, but in 14 | general will be a JSON file of publication information, a JSON file of email 15 | information if emails were sent, and text files of summary information. 16 | 17 | A secondary use case of searching by author names is to create a report of the 18 | collaborators they have worked with. This can be done by specifying the creation 19 | of that report in the configuration file. Details on reports are in the `documentation `__. 20 | 21 | 22 | Installation 23 | ~~~~~~~~~~~~ 24 | 25 | The Academic Tracker package runs under Python 3.7+. Use pip_ to install. 26 | Starting with Python 3.4, pip_ is included by default. 27 | 28 | 29 | Install on Linux, Mac OS X 30 | -------------------------- 31 | 32 | .. code:: bash 33 | 34 | python3 -m pip install academic_tracker 35 | 36 | 37 | Install on Windows 38 | ------------------ 39 | 40 | .. code:: bash 41 | 42 | py -3 -m pip install academic_tracker 43 | 44 | 45 | Upgrade on Linux, Mac OS X 46 | -------------------------- 47 | 48 | .. code:: bash 49 | 50 | python3 -m pip install academic_tracker --upgrade 51 | 52 | 53 | Upgrade on Windows 54 | ------------------ 55 | 56 | .. code:: bash 57 | 58 | py -3 -m pip install academic_tracker --upgrade 59 | 60 | 61 | 62 | Install inside virtualenv 63 | ------------------------- 64 | 65 | For an isolated install, you can run the same inside a virtualenv_. 66 | 67 | .. code:: bash 68 | 69 | $ virtualenv -p /usr/bin/python3 venv # create virtual environment, use python3 interpreter 70 | 71 | $ source venv/bin/activate # activate virtual environment 72 | 73 | $ python3 -m pip install academic_tracker # install academic_tracker as usual 74 | 75 | $ deactivate # if you are done working in the virtual environment 76 | 77 | Get the source code 78 | ~~~~~~~~~~~~~~~~~~~ 79 | 80 | Code is available on GitHub: https://github.com/MoseleyBioinformaticsLab/academic_tracker 81 | 82 | You can either clone the public repository: 83 | 84 | .. code:: bash 85 | 86 | $ https://github.com/MoseleyBioinformaticsLab/academic_tracker.git 87 | 88 | Or, download the tarball and/or zipball: 89 | 90 | .. code:: bash 91 | 92 | $ curl -OL https://github.com/MoseleyBioinformaticsLab/academic_tracker/tarball/main 93 | 94 | $ curl -OL https://github.com/MoseleyBioinformaticsLab/academic_tracker/zipball/main 95 | 96 | Once you have a copy of the source, you can embed it in your own Python package, 97 | or install it into your system site-packages easily: 98 | 99 | .. code:: bash 100 | 101 | $ python3 setup.py install 102 | 103 | Dependencies 104 | ~~~~~~~~~~~~ 105 | 106 | The Academic Tracker package depends on several Python libraries. The ``pip`` command 107 | will install all dependencies automatically, but if you wish to install them manually, 108 | run the following commands: 109 | 110 | * docopt_ for creating the command-line interface. 111 | * To install docopt_ run the following: 112 | 113 | .. code:: bash 114 | 115 | python3 -m pip install docopt # On Linux, Mac OS X 116 | py -3 -m pip install docopt # On Windows 117 | 118 | * pymed_ for querying PubMed. 119 | * To install the pymed_ Python library run the following: 120 | 121 | .. code:: bash 122 | 123 | python3 -m pip install pymed # On Linux, Mac OS X 124 | py -3 -m pip install pymed # On Windows 125 | 126 | * jsonschema_ for validating JSON. 127 | * To install the jsonschema_ Python library run the following: 128 | 129 | .. code:: bash 130 | 131 | python3 -m pip install jsonschema # On Linux, Mac OS X 132 | py -3 -m pip install jsonschema # On Windows 133 | 134 | * habanero_ for querying Crossref. 135 | * To install the habanero_ Python library run the following: 136 | 137 | .. code:: bash 138 | 139 | python3 -m pip install habanero # On Linux, Mac OS X 140 | py -3 -m pip install habanero # On Windows 141 | 142 | * orcid_ for quering ORCID. 143 | * To install the orcid_ Python library run the following: 144 | 145 | .. code:: bash 146 | 147 | python3 -m pip install orcid # On Linux, Mac OS X 148 | py -3 -m pip install orcid # On Windows 149 | 150 | * scholarly_ for querying Google Scholar. 151 | * To install the scholarly_ Python library run the following: 152 | 153 | .. code:: bash 154 | 155 | python3 -m pip install scholarly # On Linux, Mac OS X 156 | py -3 -m pip install scholarly # On Windows 157 | 158 | * beautifulsoup4_ for parsing webpages. 159 | * To install the beautifulsoup4_ Python library run the following: 160 | 161 | .. code:: bash 162 | 163 | python3 -m pip install beautifulsoup4 # On Linux, Mac OS X 164 | py -3 -m pip install beautifulsoup4 # On Windows 165 | 166 | * fuzzywuzzy_ for fuzzy matching publication titles. 167 | * To install the fuzzywuzzy_ Python library run the following: 168 | 169 | .. code:: bash 170 | 171 | python3 -m pip install fuzzywuzzy # On Linux, Mac OS X 172 | py -3 -m pip install fuzzywuzzy # On Windows 173 | 174 | * python-docx_ for reading docx files. 175 | * To install the python-docx_ Python library run the following: 176 | 177 | .. code:: bash 178 | 179 | python3 -m pip install python-docx # On Linux, Mac OS X 180 | py -3 -m pip install python-docx # On Windows 181 | 182 | * pandas_ for easy data manipulation. 183 | * To install the pandas_ Python library run the following: 184 | 185 | .. code:: bash 186 | 187 | python3 -m pip install pandas # On Linux, Mac OS X 188 | py -3 -m pip install pandas # On Windows 189 | 190 | * openpyxl_ for saving Excel files in pandas. 191 | * To install the openpyxl_ Python library run the following: 192 | 193 | .. code:: bash 194 | 195 | python3 -m pip install openpyxl # On Linux, Mac OS X 196 | py -3 -m pip install openpyxl # On Windows 197 | 198 | * requests_ for making internet requests. 199 | * To install the requests_ Python library run the following: 200 | 201 | .. code:: bash 202 | 203 | python3 -m pip install requests # On Linux, Mac OS X 204 | py -3 -m pip install requests # On Windows 205 | 206 | * deepdiff_ for comparing publication data. 207 | * To install the deepdiff_ Python library run the following: 208 | 209 | .. code:: bash 210 | 211 | python3 -m pip install deepdiff # On Linux, Mac OS X 212 | py -3 -m pip install deepdiff # On Windows 213 | 214 | 215 | Basic usage 216 | ~~~~~~~~~~~ 217 | 218 | Academic Tracker expects at least a configuration JSON file, and possibly more 219 | depending on the usage. The 2 main use cases are author_search and reference_search, 220 | with the other usages mostly included to support those. author_search searches 221 | by the authors given in the configuration JSON file while reference_search searches 222 | by the publication references given in the reference file or URL. Details about 223 | the JSON files are in the :doc:`jsonschema` section, and more information about 224 | the use cases with examples are in the :doc:`tutorial` section. 225 | 226 | .. literalinclude:: ../src/academic_tracker/__main__.py 227 | :start-at: Usage: 228 | :end-before: """ 229 | :language: none 230 | 231 | 232 | 233 | .. _pip: https://pip.pypa.io/ 234 | .. _virtualenv: https://virtualenv.pypa.io/ 235 | .. _docopt: https://pypi.org/project/docopt/ 236 | .. _pymed: https://pypi.org/project/pymed/ 237 | .. _jsonschema: https://pypi.org/project/jsonschema/ 238 | .. _habanero: https://pypi.org/project/habanero/ 239 | .. _orcid: https://pypi.org/project/orcid/ 240 | .. _scholarly: https://pypi.org/project/scholarly/ 241 | .. _beautifulsoup4: https://pypi.org/project/beautifulsoup4/ 242 | .. _fuzzywuzzy: https://pypi.org/project/fuzzywuzzy/ 243 | .. _python-docx: https://pypi.org/project/python-docx/ 244 | .. _pandas: https://pypi.org/project/pandas/ 245 | .. _openpyxl: https://pypi.org/project/openpyxl/ 246 | .. _requests: https://pypi.org/project/requests/ 247 | .. _deepdiff: https://pypi.org/project/deepdiff/ -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Publication Tracker documentation master file, created by 2 | sphinx-quickstart on Fri Oct 8 00:10:07 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Academic Tracker's Documentation! 7 | ============================================ 8 | 9 | .. include:: ../README.rst 10 | 11 | Documentation index: 12 | ==================== 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | :caption: Contents: 17 | 18 | guide 19 | tutorial 20 | jsonschema 21 | reporting 22 | tokenization 23 | api 24 | license 25 | todo 26 | changelog 27 | 28 | 29 | Indices and tables 30 | ================== 31 | 32 | * :ref:`genindex` 33 | * :ref:`modindex` 34 | * :ref:`search` 35 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | .. _license: 2 | 3 | License 4 | ======= 5 | 6 | .. include:: ../LICENSE -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinxcontrib-napoleon 2 | sphinx_rtd_theme 3 | docopt >= 0.6.2 4 | pymed >= 0.8.9 5 | jsonschema >= 4.4.0 6 | habanero >= 1.0.0 7 | orcid >= 1.0.3 8 | scholarly >= 1.4.5 9 | beautifulsoup4 >= 4.9.3 10 | fuzzywuzzy >= 0.18.0 11 | python-docx >= 0.8.11 12 | pandas >= 0.24.2 13 | openpyxl >= 2.6.2 14 | requests >= 2.21.0 15 | deepdiff >= 5.7.0 -------------------------------------------------------------------------------- /docs/todo.rst: -------------------------------------------------------------------------------- 1 | TODO List 2 | ========= 3 | 4 | 5 | .. todolist:: 6 | 7 | 8 | 9 | Improve reference search to see if every author on the pub has the pub associated with them on ORCID. 10 | 11 | Let the authors_file for add_authors be an excel file. 12 | 13 | Add recipes for common use cases such as a trainee project to the documentation. 14 | 15 | Add PMCID and grants to pymed package. 16 | 17 | Add expanded search to orcid package or look for more up to date package to use. Expanded search was added to ORCID's API with 3.0 release. orcid package appears to be 2.0 only. 18 | 19 | Add capability to get the citations each paper cites. 20 | 21 | Switch to a merge style from each source, so try to fill in information that wasn't found previously. 22 | Keep the queries from each source, and do 2 passes with the new merge logic. This makes it so that if 23 | a publication was on PubMed, but an author couldn't be matched, but an author was matched at another 24 | source we can merge the information with the second pass. Would need to change the logic to first look 25 | and see if the publication is in the list already and if it is then we don't need to make an author match 26 | because an author match was made from another source. The big changes are to keep the queries, do a second 27 | pass, merge information, and use existence in the list as verification in addition to an author match. 28 | 29 | Save references out in "citation" format. Look at formats Google Scholar offers, example EndNote. -------------------------------------------------------------------------------- /docs/tokenization.rst: -------------------------------------------------------------------------------- 1 | Tokenization 2 | ============ 3 | 4 | Academic Tracker is aware of MLA, APA, Harvard, Chicago, and Vancouver style citations. 5 | Each line is determined to be a citation in one of the styles and then parsed into 6 | author, title, and tail sections based on the style. Academic Tracker then looks 7 | for a DOI or PMID in the tail section. A DOI must be indicated with "DOI: doi_address" 8 | and a PMID must be indicated with "PMID: pubmed_id" (PMID: and DOI: are case insensitive). 9 | These are optional and only help in searching for the publication and verifying 10 | identity in any search results. 11 | 12 | Although the citation styles Academic Tracker is aware of have standards for 13 | citations in reality these standards are not strictly adhered to by the public. 14 | In developing the citation parsing for Academic Tracker a few different sources 15 | that generate citations supposedly matching one of the styles were used, including 16 | Google Scholar. These sources often do not match what is indicated by the standards 17 | for each style. Due to these discrepancies and the somewhat subjective interpretations 18 | of "standards" Academic Tracker parses citations with a more heuristic approach. 19 | The point is, do not expect Academic Tracker to be able to flawlessly parse even 20 | machine generated citations. It is unfortunately the nature of citations not to 21 | be standard and therefore difficult to parse. 22 | 23 | 24 | Regular Expressions 25 | ~~~~~~~~~~~~~~~~~~~ 26 | The specific regular expressions for each style are shown below. The regular expressions 27 | break 1 line into 3 parts. The left most part is authors, the middle is the title, 28 | and the end is the tail. The authors part is then further tokenized into individual 29 | authors, and a DOI and PMID looked for in the tail. The specifics of how the authors 30 | are tokenized will not be described here, but the code can be found in the citation_parsing.py 31 | file in the Academic Tracker source code. 32 | 33 | .. code-block:: console 34 | 35 | "MLA":r"([^0-9!@#$%^*()[\]_+=\\|<>:;'\"{}`~/?]+)\s+\"(.*)\"\s+(.*)" 36 | "APA":r"([^0-9!@#$%^*()[\]_+=\\|<>:;'\"{}`~/?]+)\s+\(\d\d\d\d\)\.\s+([^\.]+)\.\s+(.*)" 37 | "Chicago":r"([^0-9!@#$%^*()[\]_+=\\|<>:;'\"{}`~/?]+)\s+\"(.*)\"\s+(.*)" 38 | "Harvard":r"([^0-9!@#$%^*()[\]_+=\\|<>:;'\"{}`~/?]+)\s+\d\d\d\d\.\s+([^\.]+)\.\s+(.*)" 39 | "Vancouver":r"([^0-9!@#$%^*()[\]_+=\\|<>:;'\"{}`~/?.]+)\.\s+([^\.]+)\.\s+(.*)" 40 | 41 | 42 | Special Cases 43 | ~~~~~~~~~~~~~ 44 | There are 2 special cases where tokenization is not done as described above. 45 | 46 | One case is where the reference is a MyNCBI My Bibliography page. For this case each 47 | page of the bibliography is visited and the references are tokenized using the 48 | HTML tags. The specifics can be found in the tokenize_myncbi_citations function in the 49 | citation_parsing.py file of the source code. 50 | 51 | The other case is where the reference is a MEDLINE_ formatted file. For this type 52 | of file the tags on the left hand side of the file are used to identify the relevant 53 | tokens. The specifics can be found in the parse_MEDLINE_format function in the 54 | citation_parsing.py file of the source code. 55 | 56 | 57 | 58 | .. _MEDLINE: https://www.nlm.nih.gov/bsd/mms/medlineelements.html -------------------------------------------------------------------------------- /example_configs/many_projects_and_authors_with_summary_reports__template.json: -------------------------------------------------------------------------------- 1 | { 2 | "comments": [ "This is a template configuration file to generate cumulative reports.", 3 | "The reports have repeated publications for authors that are on the same", 4 | "publication or in multiple projects. These reports are created so authors", 5 | "can easily filter to the publications they are responsible for." ], 6 | "template_author": "Travis Thompson", 7 | "project_descriptions": { 8 | "[Project-Name]": { 9 | "affiliations": [ 10 | "[institution]", 11 | "..." 12 | ], 13 | "authors": [ 14 | "[author-name]", 15 | "..." 16 | ], 17 | "project_report": { 18 | "columns": { 19 | "Author Searched": "", 20 | "First Author": "", 21 | "Last Author": "", 22 | "All Authors": "", 23 | "Article Title": "", 24 | "Journal Name": "<journal>", 25 | "Journal Publication Date": "<publication_month>-<publication_day>-<publication_year>", 26 | "PubMed PMID #": "<PMID>", 27 | "PubMed Central PMCID #": "<PMCID>", 28 | "DOI #": "<DOI>" 29 | }, 30 | "sort": [ 31 | "Author Searched" 32 | ], 33 | "file_format": "[xlsx|json]" 34 | }, 35 | "cutoff_year": [integer-year], 36 | "grants": [ 37 | "[grant-id]", 38 | "..." 39 | ] 40 | }, 41 | ... 42 | }, 43 | "ORCID_search": { 44 | "ORCID_key": "[key]", 45 | "ORCID_secret": "[secret]" 46 | }, 47 | "PubMed_search": { 48 | "PubMed_email": "[email]" 49 | }, 50 | "Crossref_search": { 51 | "mailto_email": "[email]" 52 | }, 53 | "summary_report": { 54 | "columns": { 55 | "Project": "<project_name>", 56 | "Author Searched": "<author_name_search>", 57 | "First Author": "<first_author>", 58 | "Last Author": "<last_author>", 59 | "All Authors": "<authors>", 60 | "Article Title": "<title>", 61 | "Journal Name": "<journal>", 62 | "Journal Publication Date": "<publication_month>-<publication_day>-<publication_year>", 63 | "PubMed PMID #": "<PMID>", 64 | "PubMed Central PMCID #": "<PMCID>", 65 | "DOI #": "<DOI>" 66 | }, 67 | "sort": [ 68 | "Project", 69 | "Author Searched" 70 | ], 71 | "file_format": "[xlsx|json]" 72 | }, 73 | "Authors": { 74 | "[author-name]": { 75 | "ORCID": "[id]", 76 | "affiliations": [ 77 | "[institution]" 78 | ], 79 | "first_name": "[first-name]", 80 | "last_name": "[last-name]", 81 | "pubmed_name_search": "[author-name-for-searching]", 82 | "scholar_id": "[id]" 83 | }, 84 | ... 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /example_configs/many_projects_and_authors_with_summary_reports_no_duplicates__template.json: -------------------------------------------------------------------------------- 1 | { 2 | "comments": ["This is an example configuration file used by the University of Kentucky.", 3 | "Academic Tracker is ran once a month and the reports are cumulative.", 4 | "The reports do not have duplicate publications except for the case where", 5 | "a title is too dissimilar between web portals. These reports are created", 6 | "so it is easy to see which publications need attention."], 7 | "template_author": "Travis Thompson", 8 | "project_descriptions": { 9 | "[Project-Name]": { 10 | "affiliations": [ 11 | "[institution]", 12 | "..." 13 | ], 14 | "authors": [ 15 | "[author-name]", 16 | "..." 17 | ], 18 | "project_report": { 19 | "columns": { 20 | "First Author": "<first_author>", 21 | "Last Author": "<last_author>", 22 | "All Authors": "<authors>", 23 | "Article Title": "<title>", 24 | "Journal Name": "<journal>", 25 | "Journal Publication Date": "<publication_month>-<publication_day>-<publication_year>", 26 | "PubMed PMID #": "<PMID>", 27 | "PubMed Central PMCID #": "<PMCID>", 28 | "DOI #": "<DOI>" 29 | }, 30 | "sort": [ 31 | "Author Searched" 32 | ], 33 | "file_format": "[xlsx|json]" 34 | }, 35 | "cutoff_year": [integer-year], 36 | "grants": [ 37 | "[grant-id]", 38 | "..." 39 | ] 40 | }, 41 | ... 42 | }, 43 | "ORCID_search": { 44 | "ORCID_key": "[key]", 45 | "ORCID_secret": "[secret]" 46 | }, 47 | "PubMed_search": { 48 | "PubMed_email": "[email]" 49 | }, 50 | "Crossref_search": { 51 | "mailto_email": "[email]" 52 | }, 53 | "summary_report": { 54 | "columns": { 55 | "First Author": "<first_author>", 56 | "Last Author": "<last_author>", 57 | "All Authors": "<authors>", 58 | "Article Title": "<title>", 59 | "Journal Name": "<journal>", 60 | "Journal Publication Date": "<publication_month>-<publication_day>-<publication_year>", 61 | "PubMed PMID #": "<PMID>", 62 | "PubMed Central PMCID #": "<PMCID>", 63 | "DOI #": "<DOI>" 64 | }, 65 | "sort": [ 66 | "Project", 67 | "Author Searched" 68 | ], 69 | "file_format": "[xlsx|json]" 70 | }, 71 | "Authors": { 72 | "[author-name]": { 73 | "ORCID": "[id]", 74 | "affiliations": [ 75 | "[institution]" 76 | ], 77 | "first_name": "[first-name]", 78 | "last_name": "[last-name]", 79 | "pubmed_name_search": "[author-name-for-searching]", 80 | "scholar_id": "[id]" 81 | }, 82 | ... 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "setuptools_scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "academic_tracker" 7 | description = "Find publications on PubMed, Crossref, ORCID, and Google Scholar for given authors or references." 8 | readme = "README.rst" 9 | requires-python = ">=3.8" 10 | keywords = ["PubMed", "publications", "citations", "Crossref", "ORCID", "Google Scholar"] 11 | license = {file = "LICENSE"} 12 | classifiers = [ 13 | 'Environment :: Console', 14 | 'Intended Audience :: Developers', 15 | 'Intended Audience :: Science/Research', 16 | 'License :: OSI Approved :: BSD License', 17 | 'Operating System :: OS Independent', 18 | 'Programming Language :: Python :: 3.8', 19 | 'Programming Language :: Python :: 3.9', 20 | 'Programming Language :: Python :: 3.10', 21 | 'Programming Language :: Python :: 3.11', 22 | 'Programming Language :: Python :: 3.12', 23 | 'Topic :: Software Development :: Libraries :: Python Modules', 24 | ] 25 | dynamic = ["version", "dependencies"] 26 | 27 | [project.urls] 28 | "Homepage" = "https://github.com/MoseleyBioinformaticsLab/academic_tracker" 29 | "Documentation" = "https://moseleybioinformaticslab.github.io/academic_tracker/" 30 | "GitHub" = "https://github.com/MoseleyBioinformaticsLab/academic_tracker" 31 | "Issues" = "https://github.com/MoseleyBioinformaticsLab/academic_tracker/issues" 32 | 33 | [tool.setuptools.dynamic] 34 | dependencies = {file = "requirements.txt"} 35 | 36 | [project.scripts] 37 | academic_tracker = "academic_tracker.__main__:main" 38 | 39 | [tool.setuptools_scm] 40 | write_to = "src/academic_tracker/_version.py" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | docopt >= 0.6.2 2 | pymed >= 0.8.9 3 | jsonschema >= 4.4.0 4 | habanero >= 1.0.0 5 | orcid >= 1.0.3 6 | scholarly >= 1.4.5 7 | beautifulsoup4 >= 4.9.3 8 | fuzzywuzzy >= 0.18.0 9 | python-docx >= 0.8.11 10 | pandas >= 1.3.5 11 | openpyxl >= 2.6.2 12 | requests >= 2.21.0 13 | deepdiff >= 5.7.0 14 | setuptools_scm >= 7.0.5 -------------------------------------------------------------------------------- /src/academic_tracker/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This package has the following modules: 4 | 5 | ``user_input_checking`` 6 | This module contains functions for checking user input. 7 | 8 | ``tracker_schema`` 9 | This module contains the schema for validating user input. 10 | 11 | ``athr_srch_modularized`` 12 | This module contains functions to complete the author_search command modularized into pieces. 13 | 14 | ``athr_srch_webio`` 15 | This module contains functions for author_search to interface with the internet. 16 | 17 | ``athr_srch_emails_and_reports`` 18 | This module contains functions to create emails and reports for author_search. 19 | 20 | ``ref_srch_modularized`` 21 | This module contains functions to complete the reference_search command modularized into pieces. 22 | 23 | ``ref_srch_webio`` 24 | This module contains functions for reference_search to interface with the internet. 25 | 26 | ``ref_srch_emails_and_reports`` 27 | This module contains functions to create emails and reports for reference_search. 28 | 29 | ``citation_parsing`` 30 | This module contains functions for parsing references and citations for reference_search. 31 | 32 | ``fileio`` 33 | This module contains functions for reading and writing files. 34 | 35 | ``helper_functions`` 36 | This module contains functions that help the other modules function. The functions do things such as fuzzy matching, regex searching, and printing. 37 | 38 | ``emails_and_reports_helpers`` 39 | This module contains functions that help create emails and reports. 40 | 41 | ``webio`` 42 | This module contains general functions for interfacing with the internet. 43 | 44 | """ 45 | try: 46 | # -- Distribution mode -- 47 | # import from _version.py generated by setuptools_scm during release 48 | from ._version import version as __version__ 49 | except ImportError: 50 | # -- Source mode -- 51 | # use setuptools_scm to get the current version from src using git 52 | from setuptools_scm import get_version as _gv 53 | from os import path as _path 54 | __version__ = _gv(_path.join(_path.dirname(__file__), _path.pardir)) 55 | -------------------------------------------------------------------------------- /src/academic_tracker/fileio.py: -------------------------------------------------------------------------------- 1 | """ 2 | Fileio 3 | ~~~~~~ 4 | 5 | This module contains the functions that read and write files. 6 | """ 7 | 8 | 9 | import re 10 | import os 11 | import sys 12 | import json 13 | 14 | import docx 15 | import pandas 16 | 17 | from . import helper_functions 18 | 19 | 20 | 21 | def load_json(filepath): 22 | """Adds error checking around loading a json file. 23 | 24 | Args: 25 | filepath (str): filepath to the json file 26 | 27 | Returns: 28 | internal_data (dict): json read from file in a dictionary 29 | 30 | Raises: 31 | Exception: If file opening has a problem will raise an exception. 32 | """ 33 | if os.path.exists(filepath): 34 | try: 35 | with open(filepath, "r") as f: 36 | internal_data = json.loads(f.read()) 37 | except Exception as e: 38 | raise e 39 | 40 | return internal_data 41 | else: 42 | helper_functions.vprint("No such file: " + filepath) 43 | sys.exit() 44 | 45 | 46 | 47 | 48 | def read_previous_publications(filepath): 49 | """Read in the previous publication json file. 50 | 51 | If the prev_pub option was given by the user then that filepath is used to read in the file 52 | and it is checked to make sure the json is a list and each value is a string. If the prev_pub 53 | option was not given then look for a "tracker-timestamp" directory in the current working 54 | directory and if it has a publications.json file then read in that file. 55 | If no previous publications are found then an empty dict is returned for prev_pubs. 56 | 57 | Args: 58 | filepath (str or None): path to the publications JSON to read in. 59 | 60 | Returns: 61 | has_previous_pubs (bool): True means that a previous publications file was found 62 | prev_pubs (dict): dict where keys are publication ids and values are a dict of publication attributes 63 | """ 64 | 65 | has_previous_pubs = False 66 | if filepath: 67 | 68 | if filepath.lower() == "ignore": 69 | return False, {} 70 | 71 | prev_pubs = load_json(filepath) 72 | has_previous_pubs = True 73 | 74 | else: 75 | dir_contents = os.listdir() 76 | ## find all directories matching the tracker directory structure and convert the timestamps to ints to find the largest one. 77 | tracker_dirs = [int(re.match(r"tracker-(\d{10})", folder).group(1)) for folder in dir_contents if re.match(r"tracker-(\d{10})", folder)] 78 | if len(tracker_dirs) > 0: 79 | tracker_dirs.sort(reverse=True) 80 | for latest_dir in tracker_dirs: 81 | prev_publication_filepath = os.path.join(os.getcwd(), "tracker-"+str(latest_dir), "publications.json") 82 | if os.path.exists(prev_publication_filepath): 83 | prev_pubs = load_json(prev_publication_filepath) 84 | has_previous_pubs = True 85 | break 86 | 87 | if has_previous_pubs: 88 | 89 | return has_previous_pubs, prev_pubs 90 | 91 | else: 92 | return has_previous_pubs, {} 93 | 94 | 95 | 96 | 97 | 98 | def save_emails_to_file(email_messages, save_dir_name): 99 | """Save email_messages to "emails.json" in save_dir_name in the current working directory. 100 | 101 | Args: 102 | email_messages (dict): keys are author names and values are the of the email 103 | save_dir_name (str): directory name to append to the current working directory to save the emails.json file in 104 | """ 105 | 106 | email_save_path = os.path.join(os.getcwd(), save_dir_name, "emails.json") 107 | 108 | with open(email_save_path, 'w') as outFile: 109 | print(json.dumps(email_messages, indent=2, sort_keys=False), file=outFile) 110 | 111 | 112 | 113 | 114 | def save_publications_to_file(save_dir_name, publication_dict, prev_pubs): 115 | """Saves the publication_dict to "publications.json" in save_dir_name in the current working directory. 116 | 117 | prev_pubs and publication_dict will be combined before saving. 118 | 119 | Args: 120 | save_dir_name (str): directory name to append to the current working directory to save the publications.json file in 121 | publication_dict (dict): dictionary with publication ids as the keys to the dict 122 | prev_pubs (list): List of publication ids that are publications previously found. 123 | """ 124 | 125 | publications_save_path = os.path.join(os.getcwd(), save_dir_name, "publications.json") 126 | 127 | prev_pubs.update(publication_dict) 128 | with open(publications_save_path, 'w') as outFile: 129 | print(json.dumps(prev_pubs, indent=2, sort_keys=True), file=outFile) 130 | 131 | 132 | 133 | 134 | def read_text_from_docx(doc_path): 135 | """Open docx file at doc_path and read contents into a string. 136 | 137 | Args: 138 | doc_path (str): path to docx file. 139 | 140 | Returns: 141 | (str): A string of the contents of the docx file. Each line concatenated with a newline character. 142 | 143 | Raises: 144 | Exception: If file opening has a problem will raise an exception. 145 | """ 146 | 147 | ## https://stackoverflow.com/questions/25228106/how-to-extract-text-from-an-existing-docx-file-using-python-docx 148 | if os.path.exists(doc_path): 149 | try: 150 | document = docx.Document(doc_path) 151 | return u"\n".join([u"".join([r.text for r in paragraph._element.xpath(".//w:t")]) for paragraph in document.paragraphs]) 152 | except Exception as e: 153 | raise e 154 | else: 155 | helper_functions.vprint("No such file: " + doc_path) 156 | sys.exit() 157 | 158 | 159 | 160 | def read_text_from_txt(doc_path): 161 | """Open txt or csv file at doc_path and read contents into a string. 162 | 163 | Args: 164 | doc_path (str): path to txt or csv file. 165 | 166 | Returns: 167 | (str): A string of the contents of the txt or csv file. Each line concatenated with a newline character. 168 | 169 | Raises: 170 | Exception: If file opening has a problem will raise an exception. 171 | """ 172 | 173 | if os.path.exists(doc_path): 174 | try: 175 | with open(doc_path, encoding = "utf-8") as document: 176 | lines = document.readlines() 177 | except Exception as e: 178 | raise e 179 | 180 | return "".join(lines) 181 | else: 182 | helper_functions.vprint("No such file: " + doc_path) 183 | sys.exit() 184 | 185 | 186 | def read_csv(doc_path): 187 | """Read csv into a pandas dataframe. 188 | 189 | Args: 190 | doc_path (str): path to the csv file to read in. 191 | 192 | Returns: 193 | df (DataFrame): Pandas dataframe of the csv contents. 194 | 195 | Raises: 196 | Exception: If file opening has a problem will raise an exception. 197 | """ 198 | 199 | if os.path.exists(doc_path): 200 | try: 201 | df = pandas.read_csv(doc_path) 202 | except Exception as e: 203 | raise e 204 | 205 | return df 206 | else: 207 | helper_functions.vprint("No such file: " + doc_path) 208 | sys.exit() 209 | 210 | 211 | 212 | 213 | def save_string_to_file(save_dir_name, file_name, text_to_save): 214 | """Save a string to file. 215 | 216 | Args: 217 | save_dir_name (str): directory in the current working directory to save the string to. 218 | file_name (str): string to name the file. 219 | text_to_save (str): the string to put in the file contents. 220 | """ 221 | 222 | save_path = os.path.join(os.getcwd(), save_dir_name, file_name) 223 | 224 | with open(save_path, 'wb') as outFile: 225 | outFile.write(text_to_save.encode("utf-8")) 226 | 227 | 228 | 229 | def save_json_to_file(save_dir_name, file_name, json_dict, sort_keys=True): 230 | """Saves the json_dict to file_name in save_dir_name in the current working directory. 231 | 232 | Args: 233 | save_dir_name (str): directory name to append to the current working directory to save the json_dict in. 234 | file_name (str): the name to give the file, should have '.json' as the extension. 235 | json_dict (dict or list): data to save to file. 236 | sort_keys (bool): passed to json.dumps, if True sort the dictionary keys before saving. 237 | """ 238 | 239 | save_path = os.path.join(os.getcwd(), save_dir_name, file_name) 240 | 241 | with open(save_path, 'w') as outFile: 242 | print(json.dumps(json_dict, indent=2, sort_keys=sort_keys), file=outFile) 243 | 244 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from academic_tracker import __main__ 6 | 7 | @pytest.fixture(autouse=True) 8 | def set_verbose_and_silent(monkeypatch): 9 | monkeypatch.setattr(__main__, "SILENT", False) 10 | monkeypatch.setattr(__main__, "VERBOSE", True) 11 | 12 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = network_access: marks test that need the internet to work. -------------------------------------------------------------------------------- /tests/test_citation_parsing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import pytest 6 | 7 | from academic_tracker.citation_parsing import parse_text_for_citations, tokenize_Vancouver_authors, tokenize_MLA_or_Chicago_authors 8 | from academic_tracker.citation_parsing import tokenize_APA_or_Harvard_authors, tokenize_myncbi_citations, parse_MEDLINE_format 9 | from academic_tracker.fileio import load_json, read_text_from_txt 10 | 11 | 12 | 13 | 14 | def test_parse_text_for_citations(): 15 | 16 | text = read_text_from_txt(os.path.join("tests", "testing_files", "parse_citations_test.txt")) 17 | 18 | expected_tokenized_citations = load_json(os.path.join("tests", "testing_files", "tokenized_parsing_test.json")) 19 | 20 | actual_tokenized_citations = parse_text_for_citations(text) 21 | 22 | assert expected_tokenized_citations == actual_tokenized_citations 23 | 24 | 25 | 26 | @pytest.mark.parametrize("authors_string, authors", [ 27 | ("last_name initials", [{"last":"last_name", "initials":"initials"}]), 28 | ("last_name initials et al", [{"last":"last_name", "initials":"initials"}]), 29 | ("last_name1 initials1, last_name2 initials2", [{"last":"last_name1", "initials":"initials1"}, 30 | {"last":"last_name2", "initials":"initials2"}]), 31 | ("last_name1 initials1, and last_name2 initials2", [{"last":"last_name1", "initials":"initials1"}, 32 | {"last":"last_name2", "initials":"initials2"}]), 33 | ("last_name1 initials1 and last_name2 initials2", [{"last":"last_name1", "initials":"initials1"}, 34 | {"last":"last_name2", "initials":"initials2"}]), 35 | ("last_name1 initials1 & last_name2 initials2", [{"last":"last_name1", "initials":"initials1"}, 36 | {"last":"last_name2", "initials":"initials2"}]), 37 | ]) 38 | 39 | def test_tokenize_Vancouver_authors(authors_string, authors): 40 | 41 | assert authors == tokenize_Vancouver_authors(authors_string) 42 | 43 | 44 | 45 | @pytest.mark.parametrize("authors_string, authors", [ 46 | ("last_name, first_name middle_name", [{"first":"first_name", "middle":"middle_name", "last":"last_name"}]), 47 | ("last_name, first_name", [{"first":"first_name", "middle":"", "last":"last_name"}]), 48 | ("last_name, first_name middle_name et al.", [{"first":"first_name", "middle":"middle_name", "last":"last_name"}]), 49 | ("first_name middle_name last_name", [{"first":"first_name", "middle":"middle_name", "last":"last_name"}]), 50 | ("last_name1, first_name1 middle_name1, first_name2 middle_name2 last_name2", [{"first":"first_name1", "middle":"middle_name1", "last":"last_name1"}, 51 | {"first":"first_name2", "middle":"middle_name2", "last":"last_name2"}]), 52 | ("last_name1, first_name1 middle_name1, and first_name2 middle_name2 last_name2", [{"first":"first_name1", "middle":"middle_name1", "last":"last_name1"}, 53 | {"first":"first_name2", "middle":"middle_name2", "last":"last_name2"}]), 54 | ("last_name1, first_name1 middle_name1 and first_name2 middle_name2 last_name2", [{"first":"first_name1", "middle":"middle_name1", "last":"last_name1"}, 55 | {"first":"first_name2", "middle":"middle_name2", "last":"last_name2"}]), 56 | ("last_name1, first_name1 middle_name1, & first_name2 middle_name2 last_name2", [{"first":"first_name1", "middle":"middle_name1", "last":"last_name1"}, 57 | {"first":"first_name2", "middle":"middle_name2", "last":"last_name2"}]), 58 | ("last_name1, first_name1 middle_name1, & last_name2", [{"first":"first_name1", "middle":"middle_name1", "last":"last_name1"}, 59 | {"first":"", "middle":"", "last":"last_name2"}]), 60 | ("last_name1, first_name1 middle_name1, & first_name2 last_name2", [{"first":"first_name1", "middle":"middle_name1", "last":"last_name1"}, 61 | {"first":"first_name2", "middle":"", "last":"last_name2"}]), 62 | ]) 63 | 64 | 65 | def test_tokenize_MLA_or_Chicago_authors(authors_string, authors): 66 | 67 | assert authors == tokenize_MLA_or_Chicago_authors(authors_string) 68 | 69 | 70 | 71 | 72 | @pytest.mark.parametrize("authors_string, authors", [ 73 | ("last_name, A.B.", [{"last":"last_name", "initials":"A.B."}]), 74 | (", A.B.", [{"last":"", "initials":"A.B."}]), 75 | ("last_name, A.B. et al.", [{"last":"last_name", "initials":"A.B."}]), 76 | ("last_name1, A.B., last_name2, C.D.", [{"last":"last_name1", "initials":"A.B."}, 77 | {"last":"last_name2", "initials":"C.D."}]), 78 | ("last_name1, A.B., and last_name2, C.D.", [{"last":"last_name1", "initials":"A.B."}, 79 | {"last":"last_name2", "initials":"C.D."}]), 80 | ("last_name1, A.B. and last_name2, C.D.", [{"last":"last_name1", "initials":"A.B."}, 81 | {"last":"last_name2", "initials":"C.D."}]), 82 | ("last_name1, A.B. & last_name2, C.D.", [{"last":"last_name1", "initials":"A.B."}, 83 | {"last":"last_name2", "initials":"C.D."}]), 84 | ("last_name1, A.B. & last_name2, C.D.,", [{"last":"last_name1", "initials":"A.B."}, 85 | {"last":"last_name2", "initials":"C.D."}]), 86 | ("last_name1, A.B. & last_name2", [{"last":"last_name1", "initials":"A.B."}, 87 | {"last":"last_name2", "initials":""}]), 88 | ]) 89 | 90 | def test_tokenize_APA_or_Harvard_authors(authors_string, authors): 91 | 92 | assert authors == tokenize_APA_or_Harvard_authors(authors_string) 93 | 94 | 95 | 96 | 97 | def test_tokenize_myncbi_citations(): 98 | pages = load_json(os.path.join("tests", "testing_files", "myncbi_webpages.json")) 99 | 100 | expected_tokenized_citations = load_json(os.path.join("tests", "testing_files", "tokenized_myncbi_page1.json")) 101 | 102 | actual_tokenized_citations = tokenize_myncbi_citations(pages[0]) 103 | 104 | assert expected_tokenized_citations == actual_tokenized_citations 105 | 106 | 107 | 108 | 109 | def test_parse_MEDLINE_format(): 110 | expected_tokenized_citations = load_json(os.path.join("tests", "testing_files", "tokenized_MEDLINE.json")) 111 | 112 | actual_tokenized_citations = parse_MEDLINE_format(read_text_from_txt(os.path.join("tests", "testing_files", "medline.txt"))) 113 | 114 | assert expected_tokenized_citations == actual_tokenized_citations 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /tests/test_webio_no_internet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | 5 | import os 6 | import copy 7 | 8 | import pytest 9 | import requests 10 | 11 | from fixtures import authors_dict 12 | from academic_tracker.webio import search_ORCID_for_ids, search_Google_Scholar_for_ids 13 | from academic_tracker.webio import get_DOI_from_Crossref 14 | # from academic_tracker.webio import get_grants_from_Crossref 15 | from academic_tracker.fileio import load_json 16 | 17 | 18 | @pytest.fixture(autouse=True) 19 | def disable_network_calls(monkeypatch): 20 | def stunted_get(): 21 | raise RuntimeError("Network access not allowed during testing!") 22 | monkeypatch.setattr(requests, "get", lambda *args, **kwargs: stunted_get()) 23 | 24 | 25 | 26 | @pytest.fixture 27 | def ORCID_query(): 28 | return load_json(os.path.join("tests", "testing_files", "ORCID_author_search_query.json")) 29 | 30 | 31 | def test_search_ORCID_for_ids_already_has_id(ORCID_query, authors_dict, mocker): 32 | def mock_query(*args, **kwargs): 33 | return ORCID_query 34 | mocker.patch("academic_tracker.webio.orcid.PublicAPI.search", mock_query) 35 | 36 | def mock_token(*args, **kwargs): 37 | return "sdfg" 38 | mocker.patch("academic_tracker.webio.orcid.PublicAPI.get_search_token_from_orcid", mock_token) 39 | 40 | authors_dict_check = copy.deepcopy(authors_dict) 41 | 42 | assert search_ORCID_for_ids("qwerqwer", "asdfasdf", authors_dict) == authors_dict_check 43 | 44 | 45 | def test_search_ORCID_for_ids_no_affiliations(ORCID_query, authors_dict, mocker): 46 | def mock_query(*args, **kwargs): 47 | return ORCID_query 48 | mocker.patch("academic_tracker.webio.orcid.PublicAPI.search", mock_query) 49 | 50 | def mock_token(*args, **kwargs): 51 | return "sdfg" 52 | mocker.patch("academic_tracker.webio.orcid.PublicAPI.get_search_token_from_orcid", mock_token) 53 | 54 | del authors_dict["Andrew Morris"]["affiliations"] 55 | authors_dict["Andrew Morris"]["ORCID"] = "" 56 | 57 | authors_dict_check = copy.deepcopy(authors_dict) 58 | 59 | assert search_ORCID_for_ids("qwerqwer", "asdfasdf", authors_dict) == authors_dict_check 60 | 61 | 62 | def test_search_ORCID_for_ids_not_found(ORCID_query, authors_dict, mocker): 63 | def mock_query(*args, **kwargs): 64 | return ORCID_query 65 | mocker.patch("academic_tracker.webio.orcid.PublicAPI.search", mock_query) 66 | 67 | def mock_token(*args, **kwargs): 68 | return "sdfg" 69 | mocker.patch("academic_tracker.webio.orcid.PublicAPI.get_search_token_from_orcid", mock_token) 70 | 71 | authors_dict["Andrew Morris"]["ORCID"] = "" 72 | 73 | authors_dict_check = copy.deepcopy(authors_dict) 74 | 75 | assert search_ORCID_for_ids("qwerqwer", "asdfasdf", authors_dict) == authors_dict_check 76 | 77 | 78 | def test_search_ORCID_for_ids_found(ORCID_query, authors_dict, mocker): 79 | def mock_query(*args, **kwargs): 80 | return ORCID_query 81 | mocker.patch("academic_tracker.webio.orcid.PublicAPI.search", mock_query) 82 | 83 | def mock_token(*args, **kwargs): 84 | return "sdfg" 85 | mocker.patch("academic_tracker.webio.orcid.PublicAPI.get_search_token_from_orcid", mock_token) 86 | 87 | authors_dict["Andrew Morris"]["ORCID"] = "" 88 | authors_dict["Andrew Morris"]["affiliations"] = ["Bristol"] 89 | 90 | authors_dict_check = copy.deepcopy(authors_dict) 91 | authors_dict_check["Andrew Morris"]["ORCID"] = "0000-0003-1910-4865" 92 | 93 | assert search_ORCID_for_ids("qwerqwer", "asdfasdf", authors_dict) == authors_dict_check 94 | 95 | 96 | 97 | 98 | @pytest.fixture 99 | def scholarly_authors(): 100 | return load_json(os.path.join("tests", "testing_files", "scholarly_author_query.json")) 101 | 102 | 103 | def test_search_Google_Scholar_for_ids_already_has_id(scholarly_authors, authors_dict, mocker): 104 | def mock_queried_author(*args, **kwargs): 105 | return scholarly_authors 106 | mocker.patch("academic_tracker.webio.scholarly.scholarly.search_author", mock_queried_author) 107 | 108 | authors_dict_check = copy.deepcopy(authors_dict) 109 | 110 | assert search_Google_Scholar_for_ids(authors_dict) == authors_dict_check 111 | 112 | 113 | def test_search_Google_Scholar_for_ids_no_affiliations(scholarly_authors, authors_dict, mocker): 114 | def mock_queried_author(*args, **kwargs): 115 | return scholarly_authors 116 | mocker.patch("academic_tracker.webio.scholarly.scholarly.search_author", mock_queried_author) 117 | 118 | del authors_dict["Andrew Morris"]["affiliations"] 119 | authors_dict["Andrew Morris"]["scholar_id"] = "" 120 | 121 | authors_dict_check = copy.deepcopy(authors_dict) 122 | 123 | assert search_Google_Scholar_for_ids(authors_dict) == authors_dict_check 124 | 125 | 126 | def test_search_Google_Scholar_for_ids_not_found(scholarly_authors, authors_dict, mocker): 127 | def mock_queried_author(*args, **kwargs): 128 | return scholarly_authors 129 | mocker.patch("academic_tracker.webio.scholarly.scholarly.search_author", mock_queried_author) 130 | 131 | authors_dict["Andrew Morris"]["scholar_id"] = "" 132 | authors_dict["Andrew Morris"]["first_name"] = "asdf" 133 | 134 | authors_dict_check = copy.deepcopy(authors_dict) 135 | 136 | assert search_Google_Scholar_for_ids(authors_dict) == authors_dict_check 137 | 138 | 139 | def test_search_Google_Scholar_for_ids_found(scholarly_authors, authors_dict, mocker): 140 | def mock_queried_author(*args, **kwargs): 141 | return scholarly_authors 142 | mocker.patch("academic_tracker.webio.scholarly.scholarly.search_author", mock_queried_author) 143 | 144 | authors_dict["Andrew Morris"]["scholar_id"] = "" 145 | 146 | authors_dict_check = copy.deepcopy(authors_dict) 147 | authors_dict_check["Andrew Morris"]["scholar_id"] = "-j7fxnEAAAAJ" 148 | 149 | assert search_Google_Scholar_for_ids(authors_dict) == authors_dict_check 150 | 151 | 152 | 153 | 154 | 155 | def test_get_DOI_from_Crossref_DOI_found(mocker): 156 | def mock_query(*args, **kwargs): 157 | return load_json(os.path.join("tests", "testing_files", "Crossref_DOI_query.json")) 158 | mocker.patch("academic_tracker.webio.habanero.Crossref.works", mock_query) 159 | 160 | assert get_DOI_from_Crossref("The Existential Dimension to Aging", "ptth222@uky.edu") == '10.1353/pbm.2020.0014' 161 | 162 | 163 | def test_get_DOI_from_Crossref_DOI_not_found(mocker): 164 | def mock_query(*args, **kwargs): 165 | return load_json(os.path.join("tests", "testing_files", "Crossref_DOI_query.json")) 166 | mocker.patch("academic_tracker.webio.habanero.Crossref.works", mock_query) 167 | 168 | assert get_DOI_from_Crossref("asdfasdf", "ptth222@uky.edu") == None 169 | 170 | 171 | ## This function is unused in the actual code. 172 | # def test_get_grants_from_Crossref_grants_found(mocker): 173 | # def mock_query(*args, **kwargs): 174 | # return load_json(os.path.join("tests", "testing_files", "Crossref_grant_query.json")) 175 | # mocker.patch("academic_tracker.webio.habanero.Crossref.works", mock_query) 176 | 177 | # assert get_grants_from_Crossref("Multifunctional temperature\u2010responsive polymers as advanced biomaterials and beyond", "ptth222@uky.edu", ['P42ES007380']) == ['P42ES007380'] 178 | 179 | 180 | # def test_get_grants_from_Crossref_grants_not_found(mocker): 181 | # def mock_query(*args, **kwargs): 182 | # return load_json(os.path.join("tests", "testing_files", "Crossref_grant_query.json")) 183 | # mocker.patch("academic_tracker.webio.habanero.Crossref.works", mock_query) 184 | 185 | # assert get_grants_from_Crossref("asdfasdf", "ptth222@uky.edu", ['P42ES007380']) == None 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | -------------------------------------------------------------------------------- /tests/testing_files/.gitignore: -------------------------------------------------------------------------------- 1 | tracker-* -------------------------------------------------------------------------------- /tests/testing_files/Crossref_misc.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.1371/journal.pone.0277834": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "ORCID": null, 8 | "affiliation": null, 9 | "author_id": null, 10 | "firstname": "P. Travis", 11 | "initials": null, 12 | "lastname": "Thompson" 13 | }, 14 | { 15 | "ORCID": null, 16 | "affiliation": null, 17 | "author_id": null, 18 | "firstname": "Christian D.", 19 | "initials": null, 20 | "lastname": "Powell" 21 | }, 22 | { 23 | "ORCID": "0000-0003-3995-5368", 24 | "affiliation": null, 25 | "author_id": "Hunter Moseley", 26 | "firstname": "Hunter N. B.", 27 | "initials": null, 28 | "lastname": "Moseley" 29 | } 30 | ], 31 | "conclusions": null, 32 | "copyrights": null, 33 | "doi": "10.1371/journal.pone.0277834", 34 | "grants": [ 35 | "2020026", 36 | "P42 ES007380", 37 | "U54 TR001998-05A1" 38 | ], 39 | "journal": "Public Library of Science (PLoS)", 40 | "keywords": null, 41 | "methods": null, 42 | "publication_date": { 43 | "day": 18, 44 | "month": 11, 45 | "year": 2022 46 | }, 47 | "pubmed_id": null, 48 | "queried_sources": [ 49 | "Crossref" 50 | ], 51 | "references": [ 52 | { 53 | "PMCID": null, 54 | "citation": null, 55 | "doi": "10.1073/pnas.98.2.381", 56 | "pubmed_id": null, 57 | "title": "PubMed Central: The GenBank of the published literature" 58 | }, 59 | { 60 | "PMCID": null, 61 | "citation": "Consolidated Appropriations Act, 2014, H.R. 3547, Editor. 2013: Congressional Record.", 62 | "doi": null, 63 | "pubmed_id": null, 64 | "title": null 65 | }, 66 | { 67 | "PMCID": null, 68 | "citation": null, 69 | "doi": null, 70 | "pubmed_id": null, 71 | "title": "CrossRef text and data mining services" 72 | } 73 | ], 74 | "results": null, 75 | "title": "Academic Tracker: Software for tracking and reporting publications associated with authors and grants" 76 | } 77 | } -------------------------------------------------------------------------------- /tests/testing_files/Crossref_pub_dict.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.1042/bsr20181883": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "affiliation": "Division of Cardiovascular Medicine, University of Kentucky College of Medicine and Lexington Veterans Affairs, Lexington, KY, U.S.A.", 8 | "author_id": "Andrew Morris", 9 | "firstname": "Andrew J.", 10 | "initials": null, 11 | "lastname": "Morris" 12 | } 13 | ], 14 | "conclusions": null, 15 | "copyrights": null, 16 | "doi": "10.1042/bsr20181883", 17 | "grants": null, 18 | "journal": "Portland Press Ltd.", 19 | "keywords": null, 20 | "methods": null, 21 | "publication_date": { 22 | "day": null, 23 | "month": null, 24 | "year": 2019 25 | }, 26 | "pubmed_id": null, 27 | "results": null, 28 | "title": "Phospholipases D: making sense of redundancy and duplication" 29 | }, 30 | "https://doi.org/10.1093/jat/bkz097": { 31 | "PMCID": null, 32 | "abstract": null, 33 | "authors": [ 34 | { 35 | "affiliation": "Division of Cardiovascular Medicine, The Gill Heart and Vascular Institute, Superfund Research Center and Center for Appalachian Research in Environmental Sciences, University of Kentucky College of Medicine, Lexington Veterans Affairs Medical Center, Lexington, KY 40536, USA", 36 | "firstname": "M Abdul", 37 | "initials": null, 38 | "lastname": "Mottaleb" 39 | }, 40 | { 41 | "affiliation": "Division of Cardiovascular Medicine, The Gill Heart and Vascular Institute, Superfund Research Center and Center for Appalachian Research in Environmental Sciences, University of Kentucky College of Medicine, Lexington Veterans Affairs Medical Center, Lexington, KY 40536, USA", 42 | "firstname": "Michael C", 43 | "initials": null, 44 | "lastname": "Petriello" 45 | }, 46 | { 47 | "affiliation": "Division of Cardiovascular Medicine, The Gill Heart and Vascular Institute, Superfund Research Center and Center for Appalachian Research in Environmental Sciences, University of Kentucky College of Medicine, Lexington Veterans Affairs Medical Center, Lexington, KY 40536, USA", 48 | "author_id": "Andrew Morris", 49 | "firstname": "Andrew J", 50 | "initials": null, 51 | "lastname": "Morris" 52 | } 53 | ], 54 | "conclusions": null, 55 | "copyrights": null, 56 | "doi": "10.1093/jat/bkz097", 57 | "grants": null, 58 | "journal": "Oxford University Press (OUP)", 59 | "keywords": null, 60 | "methods": null, 61 | "publication_date": { 62 | "day": null, 63 | "month": null, 64 | "year": 2020 65 | }, 66 | "pubmed_id": null, 67 | "results": null, 68 | "title": "High-Throughput UHPLC-MS/MS Measurement of Per- and Poly-Fluorinated Alkyl Substances in Human Serum" 69 | }, 70 | "https://doi.org/10.1096/fasebj.2021.35.s1.03810": { 71 | "PMCID": null, 72 | "abstract": null, 73 | "authors": [ 74 | { 75 | "affiliation": "University of KentuckyLexingtonKY", 76 | "firstname": "Cetewayo", 77 | "initials": null, 78 | "lastname": "Rashid" 79 | }, 80 | { 81 | "affiliation": "University of KentuckyLexingtonKY", 82 | "firstname": "Sara", 83 | "initials": null, 84 | "lastname": "Tenlep" 85 | }, 86 | { 87 | "affiliation": "University of KentuckyLexingtonKY", 88 | "firstname": "Jianzhong", 89 | "initials": null, 90 | "lastname": "Chen" 91 | }, 92 | { 93 | "affiliation": "University of KentuckyLexingtonKY", 94 | "author_id": "Andrew Morris", 95 | "firstname": "Andrew", 96 | "initials": null, 97 | "lastname": "Morris" 98 | } 99 | ], 100 | "conclusions": null, 101 | "copyrights": null, 102 | "doi": "10.1096/fasebj.2021.35.s1.03810", 103 | "grants": null, 104 | "journal": "Wiley", 105 | "keywords": null, 106 | "methods": null, 107 | "publication_date": { 108 | "day": null, 109 | "month": null, 110 | "year": 2021 111 | }, 112 | "pubmed_id": null, 113 | "results": null, 114 | "title": "Tris(1,3\u2010Dichloro\u20102\u2010Propyl)Phosphate Is an Endocrine Disrupting Compound Causing Sex\u2010Specific Changes in Body Composition and Insulin Sensitivity" 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /tests/testing_files/Google_Scholar_misc.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.1016/s0959-440x(99)00019-6": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "ORCID": "0000-0003-3995-5368", 8 | "author_id": "Hunter Moseley", 9 | "collectivename": "Some Collective" 10 | } 11 | ], 12 | "conclusions": null, 13 | "copyrights": null, 14 | "doi": "10.1016/s0959-440x(99)00019-6", 15 | "grants": [], 16 | "journal": null, 17 | "keywords": null, 18 | "methods": null, 19 | "publication_date": { 20 | "day": null, 21 | "month": null, 22 | "year": 2022 23 | }, 24 | "pubmed_id": null, 25 | "queried_sources": [ 26 | "Google Scholar" 27 | ], 28 | "references": [], 29 | "results": null, 30 | "title": "Automated analysis of NMR assignments and structures for proteins" 31 | } 32 | } -------------------------------------------------------------------------------- /tests/testing_files/ORCID_misc.json: -------------------------------------------------------------------------------- 1 | { 2 | "10.3390/metabo13070842": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "ORCID": "0000-0003-3995-5368", 8 | "author_id": "Hunter Moseley", 9 | "collectivename": "Some Collective" 10 | } 11 | ], 12 | "conclusions": null, 13 | "copyrights": null, 14 | "doi": null, 15 | "grants": [], 16 | "journal": null, 17 | "keywords": null, 18 | "methods": null, 19 | "publication_date": { 20 | "day": 12, 21 | "month": 7, 22 | "year": 2023 23 | }, 24 | "pubmed_id": "10.3390/metabo13070842", 25 | "queried_sources": [ 26 | "ORCID" 27 | ], 28 | "references": [], 29 | "results": null, 30 | "title": "MESSES: Software for Transforming Messy Research Datasets into Clean Submissions to Metabolomics Workbench for Public Sharing" 31 | }, 32 | "asdf": { 33 | "PMCID": null, 34 | "abstract": null, 35 | "authors": [ 36 | { 37 | "ORCID": "0000-0003-3995-5368", 38 | "author_id": "Hunter Moseley", 39 | "collectivename": "Some Collective" 40 | } 41 | ], 42 | "conclusions": null, 43 | "copyrights": null, 44 | "doi": null, 45 | "grants": [], 46 | "journal": null, 47 | "keywords": null, 48 | "methods": null, 49 | "publication_date": { 50 | "day": 24, 51 | "month": 7, 52 | "year": 2023 53 | }, 54 | "pubmed_id": null, 55 | "queried_sources": [ 56 | "ORCID" 57 | ], 58 | "references": [], 59 | "results": null, 60 | "title": "The metabolomics workbench file status website: a metadata repository promoting FAIR principles of metabolomics data" 61 | } 62 | } -------------------------------------------------------------------------------- /tests/testing_files/ORCID_pub_dict.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.1016/j.celrep.2021.110013": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "affiliation": "kentucky", 8 | "author_id": "Andrew Morris", 9 | "firstname": "Andrew", 10 | "initials": null, 11 | "lastname": "Morris" 12 | } 13 | ], 14 | "conclusions": null, 15 | "copyrights": null, 16 | "doi": "10.1016/j.celrep.2021.110013", 17 | "grants": null, 18 | "journal": null, 19 | "keywords": null, 20 | "methods": null, 21 | "publication_date": { 22 | "day": null, 23 | "month": 11, 24 | "year": 2021 25 | }, 26 | "pubmed_id": null, 27 | "results": null, 28 | "title": "Autotaxin impedes anti-tumor immunity by suppressing chemotaxis and tumor infiltration of CD8+ T cells" 29 | }, 30 | "https://doi.org/10.1016/j.chroma.2021.462426": { 31 | "PMCID": null, 32 | "abstract": null, 33 | "authors": [ 34 | { 35 | "affiliation": "kentucky", 36 | "author_id": "Andrew Morris", 37 | "firstname": "Andrew", 38 | "initials": null, 39 | "lastname": "Morris" 40 | } 41 | ], 42 | "conclusions": null, 43 | "copyrights": null, 44 | "doi": "10.1016/j.chroma.2021.462426", 45 | "grants": null, 46 | "journal": null, 47 | "keywords": null, 48 | "methods": null, 49 | "publication_date": { 50 | "day": null, 51 | "month": 9, 52 | "year": 2021 53 | }, 54 | "pubmed_id": null, 55 | "results": null, 56 | "title": "Direct injection analysis of per and polyfluoroalkyl substances in surface and drinking water by sample filtration and liquid chromatography-tandem mass spectrometry" 57 | }, 58 | "https://doi.org/10.1042/bsr20181883": { 59 | "PMCID": null, 60 | "abstract": null, 61 | "authors": [ 62 | { 63 | "affiliation": "kentucky", 64 | "author_id": "Andrew Morris", 65 | "firstname": "Andrew", 66 | "initials": null, 67 | "lastname": "Morris" 68 | } 69 | ], 70 | "conclusions": null, 71 | "copyrights": null, 72 | "doi": "10.1042/bsr20181883", 73 | "grants": null, 74 | "journal": null, 75 | "keywords": null, 76 | "methods": null, 77 | "publication_date": { 78 | "day": 28, 79 | "month": 6, 80 | "year": 2019 81 | }, 82 | "pubmed_id": null, 83 | "results": null, 84 | "title": "Phospholipases D: making sense of redundancy and duplication" 85 | }, 86 | "https://doi.org/10.1172/jci.insight.143650": { 87 | "PMCID": null, 88 | "abstract": null, 89 | "authors": [ 90 | { 91 | "affiliation": "kentucky", 92 | "author_id": "Andrew Morris", 93 | "firstname": "Andrew", 94 | "initials": null, 95 | "lastname": "Morris" 96 | } 97 | ], 98 | "conclusions": null, 99 | "copyrights": null, 100 | "doi": "10.1172/jci.insight.143650", 101 | "grants": null, 102 | "journal": null, 103 | "keywords": null, 104 | "methods": null, 105 | "publication_date": { 106 | "day": 22, 107 | "month": 3, 108 | "year": 2021 109 | }, 110 | "pubmed_id": null, 111 | "results": null, 112 | "title": "Pioglitazone does not synergize with mirabegron to increase beige fat or further improve glucose metabolism" 113 | }, 114 | "https://doi.org/10.1172/jci134892": { 115 | "PMCID": null, 116 | "abstract": null, 117 | "authors": [ 118 | { 119 | "affiliation": "kentucky", 120 | "author_id": "Andrew Morris", 121 | "firstname": "Andrew", 122 | "initials": null, 123 | "lastname": "Morris" 124 | } 125 | ], 126 | "conclusions": null, 127 | "copyrights": null, 128 | "doi": "10.1172/jci134892", 129 | "grants": null, 130 | "journal": null, 131 | "keywords": null, 132 | "methods": null, 133 | "publication_date": { 134 | "day": 23, 135 | "month": 3, 136 | "year": 2020 137 | }, 138 | "pubmed_id": null, 139 | "results": null, 140 | "title": "The \u03b23-adrenergic receptor agonist mirabegron improves glucose homeostasis in obese humans" 141 | }, 142 | "https://doi.org/10.1194/jlr.m093096": { 143 | "PMCID": null, 144 | "abstract": null, 145 | "authors": [ 146 | { 147 | "affiliation": "kentucky", 148 | "author_id": "Andrew Morris", 149 | "firstname": "Andrew", 150 | "initials": null, 151 | "lastname": "Morris" 152 | } 153 | ], 154 | "conclusions": null, 155 | "copyrights": null, 156 | "doi": "10.1194/jlr.m093096", 157 | "grants": null, 158 | "journal": null, 159 | "keywords": null, 160 | "methods": null, 161 | "publication_date": { 162 | "day": 4, 163 | "month": 11, 164 | "year": 2019 165 | }, 166 | "pubmed_id": null, 167 | "results": null, 168 | "title": "Effects of diet and hyperlipidemia on levels and distribution of circulating lysophosphatidic acid" 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /tests/testing_files/PMID_reference.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/academic_tracker/eee61cb899ce190edef0e882a11dfedf19bae263/tests/testing_files/PMID_reference.docx -------------------------------------------------------------------------------- /tests/testing_files/PMID_reference.json: -------------------------------------------------------------------------------- 1 | [ 2 | "32095784", 3 | "34811960", 4 | "34622577" 5 | ] 6 | -------------------------------------------------------------------------------- /tests/testing_files/PMID_reference.txt: -------------------------------------------------------------------------------- 1 | 32095784 2 | 34811960 3 | 34622577 -------------------------------------------------------------------------------- /tests/testing_files/add_authors.csv: -------------------------------------------------------------------------------- 1 | author_id,email,pubmed_name_search,first_name,last_name,affiliations 2 | Name McNamerson,ptth222@uky.edu,Name McNamerson,Name,McNamerson,"kentucky,asdf,qwr" 3 | -------------------------------------------------------------------------------- /tests/testing_files/add_authors_missing_all_name_columns.csv: -------------------------------------------------------------------------------- 1 | author_id,email,pubmed_name_search,affiliations 2 | Name McNamerson,ptth222@uky.edu,Name McNamerson,"kentucky,asdf,qwr" 3 | some name,,some name, 4 | -------------------------------------------------------------------------------- /tests/testing_files/add_authors_missing_all_names.csv: -------------------------------------------------------------------------------- 1 | author_id,email,pubmed_name_search,first_name,last_name,affiliations,collective_name 2 | Name McNamerson,ptth222@uky.edu,Name McNamerson,Name,McNamerson,"kentucky,asdf,qwr", 3 | some name,,some name,,,,some name 4 | error name,,error name,,,, 5 | -------------------------------------------------------------------------------- /tests/testing_files/add_authors_missing_collective_name.csv: -------------------------------------------------------------------------------- 1 | author_id,email,pubmed_name_search,affiliations,collective_name 2 | Name McNamerson,ptth222@uky.edu,Name McNamerson,"kentucky,asdf,qwr", 3 | some name,,some name,,some name 4 | -------------------------------------------------------------------------------- /tests/testing_files/add_authors_missing_column.csv: -------------------------------------------------------------------------------- 1 | author_id,email,first_name,affiliations 2 | Name McNamerson,ptth222@uky.edu,Name,"kentucky,asdf,qwr" 3 | -------------------------------------------------------------------------------- /tests/testing_files/add_authors_missing_first_and_last_names.csv: -------------------------------------------------------------------------------- 1 | author_id,email,pubmed_name_search,first_name,last_name,affiliations 2 | Name McNamerson,ptth222@uky.edu,Name McNamerson,Name,McNamerson,"kentucky,asdf,qwr" 3 | some name,,some name,,, 4 | -------------------------------------------------------------------------------- /tests/testing_files/add_authors_missing_first_name_column.csv: -------------------------------------------------------------------------------- 1 | author_id,email,pubmed_name_search,last_name,affiliations,collective_name 2 | Name McNamerson,ptth222@uky.edu,Name McNamerson,McNamerson,"kentucky,asdf,qwr", 3 | some name,,some name,,,some name 4 | -------------------------------------------------------------------------------- /tests/testing_files/add_authors_missing_last_name_column.csv: -------------------------------------------------------------------------------- 1 | author_id,email,pubmed_name_search,first_name,affiliations,collective_name 2 | Name McNamerson,ptth222@uky.edu,Name McNamerson,Name,"kentucky,asdf,qwr", 3 | some name,,some name,,,some name 4 | -------------------------------------------------------------------------------- /tests/testing_files/add_authors_missing_value.csv: -------------------------------------------------------------------------------- 1 | author_id,email,pubmed_name_search,first_name,last_name,affiliations 2 | Name McNamerson,ptth222@uky.edu,,Name,asdf,"kentucky,asdf,qwr" 3 | -------------------------------------------------------------------------------- /tests/testing_files/athr_project_emails.json: -------------------------------------------------------------------------------- 1 | { 2 | "creation_date": "2023-10-02 18:15", 3 | "emails": [ 4 | { 5 | "attachment": "\tPlk1 phosphorylation of PHGDH to regulate serine metabolism Hunter Moseley None Found\n\tHepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333\n\tIdentifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell P42 ES007380, 2020026\n", 6 | "attachment_filename": "Core A Administrative Core_Hunter Moseley_project_report.txt", 7 | "author": "Hunter Moseley", 8 | "body": "Attached is the project report for publications found for Core A Administractive Core.\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 9 | "cc": "", 10 | "from": "ptth222@uky.edu", 11 | "subject": "New PubMed Publications", 12 | "to": "hunter.moseley@gmail.com" 13 | }, 14 | { 15 | "attachment": "Hunter Moseley:\n\tPlk1 phosphorylation of PHGDH to regulate serine metabolism Hunter Moseley None Found\n\tHepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333\n\tIdentifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell P42 ES007380, 2020026\n", 16 | "attachment_filename": "Project 1_project_report.txt", 17 | "body": "Attached is the project report for publications found for Project 1.\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 18 | "cc": "", 19 | "from": "ptth222@uky.edu", 20 | "subject": "New PubMed Publications", 21 | "to": "ptth222@uky.edu" 22 | }, 23 | { 24 | "attachment": "Hunter Moseley:\n\tPlk1 phosphorylation of PHGDH to regulate serine metabolism Hunter Moseley None Found\n\tHepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333\n\tIdentifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell P42 ES007380, 2020026\n", 25 | "attachment_filename": "No Authors_Hunter Moseley_project_report.txt", 26 | "author": "Hunter Moseley", 27 | "body": "Attached is the project report for publications found for all authors.\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 28 | "cc": "", 29 | "from": "ptth222@uky.edu", 30 | "subject": "New PubMed Publications", 31 | "to": "hunter.moseley@gmail.com" 32 | }, 33 | { 34 | "attachment": "Travis Thompson:\n\tIdentifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell P42 ES007380, 2020026\n", 35 | "attachment_filename": "No Authors_Travis Thompson_project_report.txt", 36 | "author": "Travis Thompson", 37 | "body": "Attached is the project report for publications found for all authors.\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 38 | "cc": "", 39 | "from": "ptth222@uky.edu", 40 | "subject": "New PubMed Publications", 41 | "to": "ptth222@uky.edu" 42 | } 43 | ] 44 | } -------------------------------------------------------------------------------- /tests/testing_files/athr_project_emails_tabular.json: -------------------------------------------------------------------------------- 1 | { 2 | "creation_date": "2023-10-12 15:32", 3 | "emails": [ 4 | { 5 | "attachment": "Col1,Col2\n\"Hunter, Moseley\",Plk1 phosphorylation of PHGDH to regulate serine metabolism\n\"Hunter, Moseley\",Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents.\n\"Hunter, Moseley\",Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water.\n", 6 | "attachment_filename": "asdf.csv", 7 | "author": "Hunter Moseley", 8 | "body": "Attached is the project report for publications found for Core A Administractive Core.\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 9 | "cc": "", 10 | "from": "ptth222@uky.edu", 11 | "subject": "New PubMed Publications", 12 | "to": "hunter.moseley@gmail.com" 13 | }, 14 | { 15 | "attachment": "test_dir\\Project 1_project_report.xlsx", 16 | "attachment_filename": "Project 1_project_report.xlsx", 17 | "body": "Attached is the project report for publications found for Project 1.\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 18 | "cc": "", 19 | "from": "ptth222@uky.edu", 20 | "subject": "New PubMed Publications", 21 | "to": "ptth222@uky.edu" 22 | }, 23 | { 24 | "attachment": "Col1,Col2\n\"Hunter, Moseley\",Plk1 phosphorylation of PHGDH to regulate serine metabolism\n\"Hunter, Moseley\",Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents.\n\"Hunter, Moseley\",Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water.\n", 25 | "attachment_filename": "No Authors_Hunter Moseley_project_report.csv", 26 | "author": "Hunter Moseley", 27 | "body": "Attached is the project report for publications found for all authors.\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 28 | "cc": "", 29 | "from": "ptth222@uky.edu", 30 | "subject": "New PubMed Publications", 31 | "to": "hunter.moseley@gmail.com" 32 | }, 33 | { 34 | "attachment": "Col1,Col2\n\"Travis, Thompson\",Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water.\n", 35 | "attachment_filename": "No Authors_Travis Thompson_project_report.csv", 36 | "author": "Travis Thompson", 37 | "body": "Attached is the project report for publications found for all authors.\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 38 | "cc": "", 39 | "from": "ptth222@uky.edu", 40 | "subject": "New PubMed Publications", 41 | "to": "ptth222@uky.edu" 42 | } 43 | ] 44 | } -------------------------------------------------------------------------------- /tests/testing_files/athr_srch_build_loop_template_string.txt: -------------------------------------------------------------------------------- 1 | <project_loop><author_loop><pub_loop> 2 | Authors: <authors> 3 | Grants: <grants> 4 | Abstract: <abstract> 5 | Conclusions: <conclusions> 6 | Copyrights: <copyrights> 7 | DOI: <DOI> 8 | Journal: <journal> 9 | Keywords: <keywords> 10 | Methods: <methods> 11 | PMID: <PMID> 12 | Results: <results> 13 | Title: <title> 14 | PMCID: <PMCID> 15 | Publication Year: <publication_year> 16 | Publication Month: <publication_month> 17 | Publication Day: <publication_day> 18 | Author First: <author_first> 19 | Author Last: <author_last> 20 | Name Search: <author_name_search> 21 | Email: <author_email> 22 | First Author: <first_author> 23 | Last Author: <last_author> 24 | Pub_Authors: <pub_author_loop>\n<pub_author_last>, <pub_author_first> <pub_author_initials> <pub_author_affiliations></pub_author_loop> 25 | References: <reference_loop>\nCitation: <reference_citation>\nTitle: <reference_title>\nPMID: <reference_PMID>\nPMCID: <reference_PMCID>\nDOI: <reference_DOI>\n</reference_loop> 26 | </pub_loop></author_loop></project_loop> -------------------------------------------------------------------------------- /tests/testing_files/athr_srch_summary_report.txt: -------------------------------------------------------------------------------- 1 | Core A Administrative Core 2 | Hunter Moseley: 3 | Title: Plk1 phosphorylation of PHGDH to regulate serine metabolism 4 | Authors: Hunter Moseley 5 | Journal: None 6 | DOI: None 7 | PMID: None 8 | PMCID: None 9 | Grants: None Found 10 | 11 | Title: Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. 12 | Authors: Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds 13 | Journal: Hepatology (Baltimore, Md.) 14 | DOI: 10.1002/hep.32467 15 | PMID: 35313030 16 | PMCID: PMC9489820 17 | Grants: R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333 18 | 19 | Title: Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. 20 | Authors: Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell 21 | Journal: Scientific data 22 | DOI: 10.1038/s41597-023-02277-x 23 | PMID: 37328532 24 | PMCID: PMC10275912 25 | Grants: P42 ES007380, 2020026 26 | 27 | Project 1 28 | Hunter Moseley: 29 | Title: Plk1 phosphorylation of PHGDH to regulate serine metabolism 30 | Authors: Hunter Moseley 31 | Journal: None 32 | DOI: None 33 | PMID: None 34 | PMCID: None 35 | Grants: None Found 36 | 37 | Title: Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. 38 | Authors: Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds 39 | Journal: Hepatology (Baltimore, Md.) 40 | DOI: 10.1002/hep.32467 41 | PMID: 35313030 42 | PMCID: PMC9489820 43 | Grants: R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333 44 | 45 | Title: Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. 46 | Authors: Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell 47 | Journal: Scientific data 48 | DOI: 10.1038/s41597-023-02277-x 49 | PMID: 37328532 50 | PMCID: PMC10275912 51 | Grants: P42 ES007380, 2020026 52 | 53 | No Project Report 54 | Hunter Moseley: 55 | Title: Plk1 phosphorylation of PHGDH to regulate serine metabolism 56 | Authors: Hunter Moseley 57 | Journal: None 58 | DOI: None 59 | PMID: None 60 | PMCID: None 61 | Grants: None Found 62 | 63 | Title: Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. 64 | Authors: Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds 65 | Journal: Hepatology (Baltimore, Md.) 66 | DOI: 10.1002/hep.32467 67 | PMID: 35313030 68 | PMCID: PMC9489820 69 | Grants: R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333 70 | 71 | Title: Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. 72 | Authors: Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell 73 | Journal: Scientific data 74 | DOI: 10.1038/s41597-023-02277-x 75 | PMID: 37328532 76 | PMCID: PMC10275912 77 | Grants: P42 ES007380, 2020026 78 | 79 | No Authors 80 | Hunter Moseley: 81 | Title: Plk1 phosphorylation of PHGDH to regulate serine metabolism 82 | Authors: Hunter Moseley 83 | Journal: None 84 | DOI: None 85 | PMID: None 86 | PMCID: None 87 | Grants: None Found 88 | 89 | Title: Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. 90 | Authors: Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds 91 | Journal: Hepatology (Baltimore, Md.) 92 | DOI: 10.1002/hep.32467 93 | PMID: 35313030 94 | PMCID: PMC9489820 95 | Grants: R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333 96 | 97 | Title: Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. 98 | Authors: Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell 99 | Journal: Scientific data 100 | DOI: 10.1038/s41597-023-02277-x 101 | PMID: 37328532 102 | PMCID: PMC10275912 103 | Grants: P42 ES007380, 2020026 104 | 105 | Travis Thompson: 106 | Title: Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. 107 | Authors: Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell 108 | Journal: Scientific data 109 | DOI: 10.1038/s41597-023-02277-x 110 | PMID: 37328532 111 | PMCID: PMC10275912 112 | Grants: P42 ES007380, 2020026 113 | 114 | No from_email 115 | Hunter Moseley: 116 | Title: Plk1 phosphorylation of PHGDH to regulate serine metabolism 117 | Authors: Hunter Moseley 118 | Journal: None 119 | DOI: None 120 | PMID: None 121 | PMCID: None 122 | Grants: None Found 123 | 124 | Title: Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. 125 | Authors: Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds 126 | Journal: Hepatology (Baltimore, Md.) 127 | DOI: 10.1002/hep.32467 128 | PMID: 35313030 129 | PMCID: PMC9489820 130 | Grants: R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333 131 | 132 | Title: Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. 133 | Authors: Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell 134 | Journal: Scientific data 135 | DOI: 10.1038/s41597-023-02277-x 136 | PMID: 37328532 137 | PMCID: PMC10275912 138 | Grants: P42 ES007380, 2020026 139 | 140 | Travis Thompson: 141 | Title: Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. 142 | Authors: Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell 143 | Journal: Scientific data 144 | DOI: 10.1038/s41597-023-02277-x 145 | PMID: 37328532 146 | PMCID: PMC10275912 147 | Grants: P42 ES007380, 2020026 148 | 149 | -------------------------------------------------------------------------------- /tests/testing_files/collaborator_emails.json: -------------------------------------------------------------------------------- 1 | { 2 | "creation_date": "2023-10-02 20:16", 3 | "emails": [ 4 | { 5 | "attachment": "Col1\n\"Ali S, Imami, AS, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.\"\n\"Atta M, Nawabi, AM, Division of Transplant and Hepatobiliary, Department of Surgery, The University of Kansas Medical Center, Kansas City, Kansas, USA.\"\n\"Christian D, Powell, CD, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Computer Science (Data Science Program), Lexington, Kentucky, USA.\"\n\"Genesee J, Martinez, GJ, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n\"Jennifer A, Hipp, JA, Strata Oncology, Ann Arbor, Michigan, USA.\"\n\"Justin F, Creeden, JF, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.\"\n\"Kelly G, Pennell, KG, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA. kellypennell@uky.edu.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA. kellypennell@uky.edu.\"\n\"Khaled, Alganem, K, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.\"\n\"Laura E, Nagy, LE, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA.\nDepartment of Gastroenterology and Hepatology, Center for Liver Disease Research, Cleveland Clinic, Cleveland, Ohio, USA.\nDepartment of Molecular Medicine, Case Western Reserve University, Cleveland, Ohio, USA.\"\n\"Megan R, McMullen, MR, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA.\"\n\"Mei, Xu, M, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n\"P Travis, Thompson, PT, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\"\n\"Robert M, Flight, RM, Department of Molecular & Cellular Biochemistry, University of Kentucky, Lexington, Kentucky, USA.\nMarkey Cancer Center, University of Kentucky, Lexington, Kentucky, USA.\nResource Center for Stable Isotope Resolved Metabolomics, University of Kentucky, Lexington, Kentucky, USA.\"\n\"Robert, McCullumsmith, R, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.\nNeurosciences Institute, ProMedica, Toledo, Ohio, USA.\"\n\"Samir, Softic, S, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\nDepartment of Pediatrics, University of Kentucky, Lexington, Kentucky, USA.\"\n\"Sanjoy, Roychowdhury, S, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA.\"\n\"Steven A, Weinman, SA, Department of Internal Medicine and Liver Center, University of Kansas Medical Center, Kansas City, Kansas, USA.\"\n\"Sweta, Ojha, S, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\"\n\"Terry D, Hinds, TD, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\nMarkey Cancer Center, University of Kentucky, Lexington, Kentucky, USA.\nBarnstable Brown Diabetes Center, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n\"Wang-Hsin, Lee, WH, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n\"Zachary A, Kipp, ZA, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n", 6 | "attachment_filename": "name_test.csv", 7 | "author": "Hunter Moseley", 8 | "body": "asdf", 9 | "cc": "", 10 | "from": "ptth222@uky.edu", 11 | "subject": "asdf", 12 | "to": "ptth222@uky.edu" 13 | }, 14 | { 15 | "attachment": "P Travis, Thompson, PT, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.Christian D, Powell, CD, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Computer Science (Data Science Program), Lexington, Kentucky, USA.Hunter N B, Moseley, HNB, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Molecular and Cellular Biochemistry, Lexington, Kentucky, USA.Kelly G, Pennell, KG, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA. kellypennell@uky.edu.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA. kellypennell@uky.edu.", 16 | "attachment_filename": "name_test.txt", 17 | "author": "Sweta Ojha", 18 | "body": "asdf", 19 | "cc": "", 20 | "from": "ptth222@uky.edu", 21 | "subject": "asdf", 22 | "to": "sweta.ojha@uky.edu" 23 | }, 24 | { 25 | "attachment": "Sweta, Ojha, S, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.Christian D, Powell, CD, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Computer Science (Data Science Program), Lexington, Kentucky, USA.Hunter N B, Moseley, HNB, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Molecular and Cellular Biochemistry, Lexington, Kentucky, USA.Kelly G, Pennell, KG, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA. kellypennell@uky.edu.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA. kellypennell@uky.edu.", 26 | "attachment_filename": "Travis Thompson_collaborators.txt", 27 | "author": "Travis Thompson", 28 | "body": "asdf", 29 | "cc": "", 30 | "from": "ptth222@uky.edu", 31 | "subject": "asdf", 32 | "to": "ptth222@uky.edu" 33 | }, 34 | { 35 | "attachment": "test_dir\\Kelly Pennell_collaborators.xlsx", 36 | "attachment_filename": "Kelly Pennell_collaborators.xlsx", 37 | "author": "Kelly Pennell", 38 | "body": "asdf", 39 | "cc": "", 40 | "from": "ptth222@uky.edu", 41 | "subject": "asdf", 42 | "to": "ptth222@uky.edu" 43 | } 44 | ] 45 | } -------------------------------------------------------------------------------- /tests/testing_files/collaborator_emails_tabular.json: -------------------------------------------------------------------------------- 1 | { 2 | "creation_date": "2023-10-02 20:03", 3 | "emails": [ 4 | { 5 | "attachment": "Col1\n\"Ali S, Imami, AS, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.\"\n\"Atta M, Nawabi, AM, Division of Transplant and Hepatobiliary, Department of Surgery, The University of Kansas Medical Center, Kansas City, Kansas, USA.\"\n\"Christian D, Powell, CD, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Computer Science (Data Science Program), Lexington, Kentucky, USA.\"\n\"Genesee J, Martinez, GJ, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n\"Jennifer A, Hipp, JA, Strata Oncology, Ann Arbor, Michigan, USA.\"\n\"Justin F, Creeden, JF, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.\"\n\"Kelly G, Pennell, KG, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA. kellypennell@uky.edu.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA. kellypennell@uky.edu.\"\n\"Khaled, Alganem, K, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.\"\n\"Laura E, Nagy, LE, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA.\nDepartment of Gastroenterology and Hepatology, Center for Liver Disease Research, Cleveland Clinic, Cleveland, Ohio, USA.\nDepartment of Molecular Medicine, Case Western Reserve University, Cleveland, Ohio, USA.\"\n\"Megan R, McMullen, MR, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA.\"\n\"Mei, Xu, M, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n\"P Travis, Thompson, PT, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\"\n\"Robert M, Flight, RM, Department of Molecular & Cellular Biochemistry, University of Kentucky, Lexington, Kentucky, USA.\nMarkey Cancer Center, University of Kentucky, Lexington, Kentucky, USA.\nResource Center for Stable Isotope Resolved Metabolomics, University of Kentucky, Lexington, Kentucky, USA.\"\n\"Robert, McCullumsmith, R, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.\nNeurosciences Institute, ProMedica, Toledo, Ohio, USA.\"\n\"Samir, Softic, S, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\nDepartment of Pediatrics, University of Kentucky, Lexington, Kentucky, USA.\"\n\"Sanjoy, Roychowdhury, S, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA.\"\n\"Steven A, Weinman, SA, Department of Internal Medicine and Liver Center, University of Kansas Medical Center, Kansas City, Kansas, USA.\"\n\"Sweta, Ojha, S, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\"\n\"Terry D, Hinds, TD, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\nMarkey Cancer Center, University of Kentucky, Lexington, Kentucky, USA.\nBarnstable Brown Diabetes Center, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n\"Wang-Hsin, Lee, WH, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n\"Zachary A, Kipp, ZA, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.\"\n", 6 | "attachment_filename": "Hunter Moseley_collaborators.csv", 7 | "author": "Hunter Moseley", 8 | "body": "asdf", 9 | "cc": "", 10 | "from": "ptth222@uky.edu", 11 | "subject": "asdf", 12 | "to": "ptth222@uky.edu" 13 | }, 14 | { 15 | "attachment": "Name,Affiliations\n\"Moseley, Hunter N B\",\"University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Molecular and Cellular Biochemistry, Lexington, Kentucky, USA.\"\n\"Pennell, Kelly G\",\"University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA. kellypennell@uky.edu.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA. kellypennell@uky.edu.\"\n\"Powell, Christian D\",\"University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Computer Science (Data Science Program), Lexington, Kentucky, USA.\"\n\"Thompson, P Travis\",\"University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\"\n", 16 | "attachment_filename": "Sweta Ojha_collaborators.csv", 17 | "author": "Sweta Ojha", 18 | "body": "asdf", 19 | "cc": "", 20 | "from": "ptth222@uky.edu", 21 | "subject": "asdf", 22 | "to": "sweta.ojha@uky.edu" 23 | }, 24 | { 25 | "attachment": "Col1\n\"Sweta, Ojha, S, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\"\n\"Christian D, Powell, CD, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Computer Science (Data Science Program), Lexington, Kentucky, USA.\"\n\"Hunter N B, Moseley, HNB, University of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA.\nUniversity of Kentucky, Department of Molecular and Cellular Biochemistry, Lexington, Kentucky, USA.\"\n\"Kelly G, Pennell, KG, University of Kentucky, College of Engineering, Department of Civil Engineering, Lexington, Kentucky, USA. kellypennell@uky.edu.\nUniversity of Kentucky Superfund Research Center (UKSRC), Lexington, Kentucky, USA. kellypennell@uky.edu.\"\n", 26 | "attachment_filename": "Travis Thompson_collaborators.csv", 27 | "author": "Travis Thompson", 28 | "body": "asdf", 29 | "cc": "", 30 | "from": "ptth222@uky.edu", 31 | "subject": "asdf", 32 | "to": "ptth222@uky.edu" 33 | } 34 | ] 35 | } -------------------------------------------------------------------------------- /tests/testing_files/collaborator_report2.txt: -------------------------------------------------------------------------------- 1 | Justin F, Creeden, JF, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.Zachary A, Kipp, ZA, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.Mei, Xu, M, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.Robert M, Flight, RM, Department of Molecular & Cellular Biochemistry, University of Kentucky, Lexington, Kentucky, USA. 2 | Markey Cancer Center, University of Kentucky, Lexington, Kentucky, USA. 3 | Resource Center for Stable Isotope Resolved Metabolomics, University of Kentucky, Lexington, Kentucky, USA.Genesee J, Martinez, GJ, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.Wang-Hsin, Lee, WH, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA.Khaled, Alganem, K, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.Ali S, Imami, AS, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA.Megan R, McMullen, MR, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA.Sanjoy, Roychowdhury, S, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA.Atta M, Nawabi, AM, Division of Transplant and Hepatobiliary, Department of Surgery, The University of Kansas Medical Center, Kansas City, Kansas, USA.Jennifer A, Hipp, JA, Strata Oncology, Ann Arbor, Michigan, USA.Samir, Softic, S, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA. 4 | Department of Pediatrics, University of Kentucky, Lexington, Kentucky, USA.Steven A, Weinman, SA, Department of Internal Medicine and Liver Center, University of Kansas Medical Center, Kansas City, Kansas, USA.Robert, McCullumsmith, R, Department of Neurosciences, University of Toledo College of Medicine and Life Sciences, Toledo, Ohio, USA. 5 | Neurosciences Institute, ProMedica, Toledo, Ohio, USA.Laura E, Nagy, LE, Department of Inflammation and Immunity, Cleveland Clinic, Cleveland, Ohio, USA. 6 | Department of Gastroenterology and Hepatology, Center for Liver Disease Research, Cleveland Clinic, Cleveland, Ohio, USA. 7 | Department of Molecular Medicine, Case Western Reserve University, Cleveland, Ohio, USA.Terry D, Hinds, TD, Department of Pharmacology and Nutritional Sciences, University of Kentucky College of Medicine, Lexington, Kentucky, USA. 8 | Markey Cancer Center, University of Kentucky, Lexington, Kentucky, USA. 9 | Barnstable Brown Diabetes Center, University of Kentucky College of Medicine, Lexington, Kentucky, USA. -------------------------------------------------------------------------------- /tests/testing_files/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "affiliations": [ 3 | "kentucky" 4 | ], 5 | "cc_email": [], 6 | "cutoff_year": 2019, 7 | "email_subject": "New PubMed Publications", 8 | "email_template": "Hey <author_first_name>,\n\nThese are the publications I was able to find on PubMed. Are any missing?\n\n<total_pubs>\n\nKind regards,\n\nThis email was sent by an automated service. If you have any questions or concerns please email my creator ptth222@uky.edu", 9 | "from_email": "ptth222@uky.edu", 10 | "grants": [ 11 | "P42ES007380", 12 | "P42 ES007380" 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /tests/testing_files/config_Hunter_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "project_descriptions": { 3 | "Core C DMAC": { 4 | "affiliations": [ 5 | "kentucky" 6 | ], 7 | "authors": [ 8 | "Hunter Moseley" 9 | ], 10 | "project_report": { 11 | "columns": { 12 | "Author Searched": "<author_name_search>", 13 | "First Author": "<first_author>", 14 | "Last Author": "<last_author>", 15 | "All Authors": "<authors>", 16 | "Article Title": "<title>", 17 | "Journal Name": "<journal>", 18 | "Journal Publication Date": "<publication_month>-<publication_day>-<publication_year>", 19 | "PubMed PMID #": "<PMID>", 20 | "PubMed Central PMCID #": "<PMCID>", 21 | "DOI #": "<DOI>" 22 | }, 23 | "sort": [ 24 | "Author Searched" 25 | ], 26 | "file_format": "xlsx" 27 | }, 28 | "cutoff_year": 2022, 29 | "grants": [ 30 | "P42ES007380", 31 | "P42 ES007380" 32 | ] 33 | } 34 | }, 35 | "ORCID_search": { 36 | "ORCID_key": "APP-JE4OT4MYTV4KNQXO", 37 | "ORCID_secret": "fec1c999-5a62-41e1-ae69-a09ffa00a7e2" 38 | }, 39 | "PubMed_search": { 40 | "PubMed_email": "ptth222@uky.edu" 41 | }, 42 | "Crossref_search": { 43 | "mailto_email": "ptth222@uky.edu" 44 | }, 45 | "summary_report": { 46 | "columns": { 47 | "Project": "<project_name>", 48 | "Author Searched": "<author_name_search>", 49 | "First Author": "<first_author>", 50 | "Last Author": "<last_author>", 51 | "All Authors": "<authors>", 52 | "Article Title": "<title>", 53 | "Journal Name": "<journal>", 54 | "Journal Publication Date": "<publication_month>-<publication_day>-<publication_year>", 55 | "PubMed PMID #": "<PMID>", 56 | "PubMed Central PMCID #": "<PMCID>", 57 | "DOI #": "<DOI>" 58 | }, 59 | "sort": [ 60 | "Project", 61 | "Author Searched" 62 | ], 63 | "file_format": "xlsx" 64 | }, 65 | "Authors": { 66 | "Hunter Moseley": { 67 | "ORCID": "0000-0003-3995-5368", 68 | "affiliations": [ 69 | "kentucky" 70 | ], 71 | "email": "hunter.moseley@gmail.com", 72 | "first_name": "Hunter", 73 | "last_name": "Moseley", 74 | "pubmed_name_search": "Hunter Moseley", 75 | "scholar_id": "ctE_FZMAAAAJ", 76 | "grants": [ 77 | "P42ES007380", 78 | "P42 ES007380" 79 | ], 80 | "cutoff_year": 2022 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /tests/testing_files/empty_file.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/academic_tracker/eee61cb899ce190edef0e882a11dfedf19bae263/tests/testing_files/empty_file.txt -------------------------------------------------------------------------------- /tests/testing_files/gen_reports_ref_summary_report.txt: -------------------------------------------------------------------------------- 1 | Authors: Christian D. Powell, Hunter N.B. Moseley 2 | Grants: None Found 3 | Abstract: None 4 | Conclusions: None 5 | Copyrights: None 6 | DOI: 10.3390/metabo11030163 7 | Journal: MDPI AG 8 | Keywords: None 9 | Methods: None 10 | PMID: None 11 | Results: None 12 | Title: The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository 13 | PMCID: None 14 | Publication Year: 2021 15 | Publication Month: None 16 | Publication Day: None 17 | Tok Title: The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository. 18 | Tok DOI: 10.3390/metabo11030163 19 | Tok PMID: None 20 | Tok Authors: Powell C, Moseley H 21 | Ref Line: Powell C, Moseley H. The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository. Metabolites. 2021 March; 11(3):163-. doi: 10.3390/metabo11030163. 22 | Comparison: N/A 23 | 24 | Authors: Huan Jin, Joshua M. Mitchell, Hunter N. B. Moseley 25 | Grants: None Found 26 | Abstract: None 27 | Conclusions: None 28 | Copyrights: None 29 | DOI: 10.3390/metabo10090368 30 | Journal: MDPI AG 31 | Keywords: None 32 | Methods: None 33 | PMID: None 34 | Results: None 35 | Title: Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases 36 | PMCID: None 37 | Publication Year: 2020 38 | Publication Month: None 39 | Publication Day: None 40 | Tok Title: Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. 41 | Tok DOI: None 42 | Tok PMID: 1234 43 | Tok Authors: Jin H, Mitchell J, Moseley H 44 | Ref Line: Jin H, Mitchell J, Moseley H. Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368. 45 | Comparison: N/A 46 | 47 | Authors: Huan Jin, Joshua M. Mitchell, Hunter N. B. Moseley 48 | Grants: None Found 49 | Abstract: None 50 | Conclusions: None 51 | Copyrights: None 52 | DOI: 10.3390/metabo10090368 53 | Journal: MDPI AG 54 | Keywords: None 55 | Methods: None 56 | PMID: None 57 | Results: None 58 | Title: Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases 59 | PMCID: None 60 | Publication Year: 2020 61 | Publication Month: None 62 | Publication Day: None 63 | Tok Title: Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. 64 | Tok DOI: None 65 | Tok PMID: None 66 | Tok Authors: Jin H, Mitchell J, Moseley H 67 | Ref Line: Jin H, Mitchell J, Moseley H. Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368. 68 | Comparison: N/A 69 | 70 | -------------------------------------------------------------------------------- /tests/testing_files/has_author.xml: -------------------------------------------------------------------------------- 1 | <PubmedArticle><MedlineCitation IndexingMethod="Curated" Owner="NLM" Status="MEDLINE"><PMID Version="1">34352431</PMID><DateCompleted><Year>2021</Year><Month>10</Month><Day>18</Day></DateCompleted><DateRevised><Year>2021</Year><Month>10</Month><Day>18</Day></DateRevised><Article PubModel="Print-Electronic"><Journal><ISSN IssnType="Electronic">1873-3778</ISSN><JournalIssue CitedMedium="Internet"><Volume>1653</Volume><PubDate><Year>2021</Year><Month>Sep</Month><Day>13</Day></PubDate></JournalIssue><Title>Journal of chromatography. AJ Chromatogr ADirect injection analysis of per and polyfluoroalkyl substances in surface and drinking water by sample filtration and liquid chromatography-tandem mass spectrometry.462426S0021-9673(21)00550-110.1016/j.chroma.2021.462426We developed and validated a method for direct determination of per- and polyfluoroalkylated substances (PFASs) in environmental water samples without prior sample concentration. Samples are centrifuged and supernatants passed through an Acrodisc Filter (GXF/GHP 0.2  um, 25  mm diameter). After addition of ammonium acetate, samples are analyzed by UPLC-MS/MS using an AB Sciex 6500 plus Q-Trap mass spectrometer operated in negative multiple reaction-monitoring (MRM) mode. The instrument system incorporates a delay column between the pumps and autosampler to mitigate interference from background PFAS. The method monitors eight short-/long-chain PFAS which are identified by monitoring specific precursor product ion pairs and by their retention times and quantified using isotope mass-labeled internal standard based calibration plots. Average spiked recoveries (n = 8) of target analytes ranged from 84 to 110% with 4-9% relative standard deviation (RSD). The mean spiked recoveries (n = 8) of four surrogates were 94-106% with 3-8% RSD. For continuous calibration verification (CCV), average spiked recoveries (n = 8) for target analytes ranged from 88 to 114% with 4-11% RSD and for surrogates ranged from 104-112% with 3-11% RSD. The recoveries (n = 6) of matrix spike (MX), matrix spike duplicate (MXD), and field reagent blank (FRB) met our acceptance criteria. The limit of detection for the target analytes was between 0.007 and 0.04 ng/mL. The method was used to measure PFAS in tap water and surface water.Copyright © 2021. Published by Elsevier B.V.MottalebM AbdulMASuperfund Research Center, University of Kentucky, Lexington KY, 40506, United States; Center for Appalachian Research in Environmental Sciences, University of Kentucky, Lexington KY, 40506, United States; Division of Cardiovascular, Medicine, College of Medicine, University of Kentucky and Lexington VA Medical Center, Lexington, KY, 40536, United States. a.j.morris@uky.edu; Pressent address: Institute of Drug & Biotherapeutic Innovation, DRC, 1100 South Grand Blvd, Saint Louis University, Saint Louis, MO 63104 United States. Electronic address: m.a.mottaleb@uky.edu.DingQunxing XQXDepartment of Biology, College of Arts and Sciences, Kent State University, Kent, OH, 44242, United States. Electronic address: qding@kent.edu.PennellKelly GKGSuperfund Research Center, University of Kentucky, Lexington KY, 40506, United States; Center for Appalachian Research in Environmental Sciences, University of Kentucky, Lexington KY, 40506, United States; Department of Civil Engineering, College of Engineering, University of Kentucky, Lexington KY, 40506, United States. Electronic address: kellypennell@uky.edu.HaynesErin NENSuperfund Research Center, University of Kentucky, Lexington KY, 40506, United States; Center for Appalachian Research in Environmental Sciences, University of Kentucky, Lexington KY, 40506, United States; Department of Epidemiology, College of Public Health, University of Kentucky, Lexington KY, 40536, United States. Electronic address: erin.haynes@uky.edu.MorrisAndrew JAJSuperfund Research Center, University of Kentucky, Lexington KY, 40506, United States; Center for Appalachian Research in Environmental Sciences, University of Kentucky, Lexington KY, 40506, United States; Division of Cardiovascular, Medicine, College of Medicine, University of Kentucky and Lexington VA Medical Center, Lexington, KY, 40536, United States. a.j.morris@uky.edu; Pressent address: Institute of Drug & Biotherapeutic Innovation, DRC, 1100 South Grand Blvd, Saint Louis University, Saint Louis, MO 63104 United States. Electronic address: a.j.morris@uky.edu.engP30 ES026529ESNIEHS NIH HHSUnited StatesJournal Article20210721NetherlandsJ Chromatogr A93184880021-96730Drinking Water0FluorocarbonsIMChromatography, LiquidDrinking WaterFluorocarbonsSolid Phase ExtractionTandem Mass SpectrometryAcrodisc filtrationDirect injectionDrinking and surface waterPFASUPLC-MS/MSDeclaration of Competing Interest The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.20210430202107162021071620218660202186602021852018ppublish34352431S0021-9673(21)00550-110.1016/j.chroma.2021.462426 -------------------------------------------------------------------------------- /tests/testing_files/has_pubmed_grants.xml: -------------------------------------------------------------------------------- 1 | 338307772021061720210617
1535-390720520210507Journal of proteome researchJ Proteome ResUntargeted Stable Isotope Probing of the Gut Microbiota Metabolome Using 13C-Labeled Dietary Fibers.2904-291310.1021/acs.jproteome.1c00124The gut microbiome generates numerous metabolites that exert local effects and enter the circulation to affect the functions of many organs. Despite extensive sequencing-based characterization of the gut microbiome, there remains a lack of understanding of microbial metabolism. Here, we developed an untargeted stable isotope-resolved metabolomics (SIRM) approach for the holistic study of gut microbial metabolites. Viable microbial cells were extracted from fresh mice feces and incubated anaerobically with 13C-labeled dietary fibers including inulin or cellulose. High-resolution mass spectrometry was used to monitor 13C enrichment in metabolites associated with glycolysis, the Krebs cycle, the pentose phosphate pathway, nucleotide synthesis, and pyruvate catabolism in both microbial cells and the culture medium. We observed the differential use of inulin and cellulose as substrates for biosynthesis of essential and non-essential amino acids, neurotransmitters, vitamin B5, and other coenzymes. Specifically, the use of inulin for these biosynthetic pathways was markedly more efficient than the use of cellulose, reflecting distinct metabolic pathways of dietary fibers in the gut microbiome, which could be related with host effects. This technology facilitates deeper and holistic insights into the metabolic function of the gut microbiome (Metabolomic Workbench Study ID: ST001651).DengPanP0000-0003-2974-7389Superfund Research Center, University of Kentucky, Lexington 40536, Kentucky, United States.Department of Pharmaceutical Sciences, University of Kentucky, Lexington 40536, Kentucky, United States.ValentinoTaylorTDepartment of Physiology, University of Kentucky, Lexington 40536, Kentucky, United States.FlytheMichael DMDDepartment of Animal and Food Sciences, University of Kentucky, Lexington 40536, Kentucky, United States.United States Department of Agriculture, Agriculture Research Service, Forage Animal Production Research Unit, Lexington 40536, Kentucky, United States.MoseleyHunter N BHNBSuperfund Research Center, University of Kentucky, Lexington 40536, Kentucky, United States.Institute for Biomedical Informatics, University of Kentucky, Lexington 40536, Kentucky, United States.Department of Molecular and Cellular Biochemistry, University of Kentucky, Lexington 40536, Kentucky, United States.LeachmanJacqueline RJRDepartment of Pharmacology and Nutritional Sciences, University of Kentucky, Lexington 40536, Kentucky, United States.MorrisAndrew JAJSuperfund Research Center, University of Kentucky, Lexington 40536, Kentucky, United States.Division of Cardiovascular Medicine, University of Kentucky, Lexington 40536, Kentucky, United States.HennigBernhardBSuperfund Research Center, University of Kentucky, Lexington 40536, Kentucky, United States.Department of Animal and Food Sciences, University of Kentucky, Lexington 40536, Kentucky, United States.engP42 ES007380ESNIEHS NIH HHSUnited StatesP30 GM127211GMNIGMS NIH HHSUnited StatesJournal ArticleResearch Support, N.I.H., Extramural20210408
United StatesJ Proteome Res1011287751535-38930Dietary Fiber0IsotopesIMAnimalsDietary FiberFecesGastrointestinal MicrobiomeIsotopesMetabolomeMetabolomicsMicedietary fiberinulinmetabolitemetabolomicsmicrobiomestable isotope
202149602021622602021481715ppublish3383077710.1021/acs.jproteome.1c00124
-------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/author_search/no_PubMed/running_pubs1.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.1038/s41597-023-02277-x": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "ORCID": "0000-0003-3995-5368", 8 | "affiliation": "kentucky", 9 | "author_id": "Hunter Moseley", 10 | "firstname": "Hunter", 11 | "initials": null, 12 | "lastname": "Moseley" 13 | } 14 | ], 15 | "conclusions": null, 16 | "copyrights": null, 17 | "doi": "10.1038/s41597-023-02277-x", 18 | "grants": [], 19 | "journal": null, 20 | "keywords": null, 21 | "methods": null, 22 | "publication_date": { 23 | "day": 16, 24 | "month": 6, 25 | "year": 2023 26 | }, 27 | "pubmed_id": null, 28 | "queried_sources": [ 29 | "ORCID" 30 | ], 31 | "references": [], 32 | "results": null, 33 | "title": "Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water" 34 | }, 35 | "https://doi.org/10.1038/s41597-023-02281-1": { 36 | "PMCID": null, 37 | "abstract": null, 38 | "authors": [ 39 | { 40 | "ORCID": "0000-0003-3995-5368", 41 | "affiliation": "kentucky", 42 | "author_id": "Hunter Moseley", 43 | "firstname": "Hunter", 44 | "initials": null, 45 | "lastname": "Moseley" 46 | } 47 | ], 48 | "conclusions": null, 49 | "copyrights": null, 50 | "doi": "10.1038/s41597-023-02281-1", 51 | "grants": [], 52 | "journal": null, 53 | "keywords": null, 54 | "methods": null, 55 | "publication_date": { 56 | "day": 16, 57 | "month": 6, 58 | "year": 2023 59 | }, 60 | "pubmed_id": null, 61 | "queried_sources": [ 62 | "ORCID" 63 | ], 64 | "references": [], 65 | "results": null, 66 | "title": "A proposed FAIR approach for disseminating geospatial information system maps" 67 | }, 68 | "https://doi.org/10.1186/s12859-023-05423-9": { 69 | "PMCID": null, 70 | "abstract": null, 71 | "authors": [ 72 | { 73 | "ORCID": "0000-0003-3995-5368", 74 | "affiliation": "kentucky", 75 | "author_id": "Hunter Moseley", 76 | "firstname": "Hunter", 77 | "initials": null, 78 | "lastname": "Moseley" 79 | } 80 | ], 81 | "conclusions": null, 82 | "copyrights": null, 83 | "doi": "10.1186/s12859-023-05423-9", 84 | "grants": [], 85 | "journal": null, 86 | "keywords": null, 87 | "methods": null, 88 | "publication_date": { 89 | "day": 24, 90 | "month": 7, 91 | "year": 2023 92 | }, 93 | "pubmed_id": null, 94 | "queried_sources": [ 95 | "ORCID" 96 | ], 97 | "references": [], 98 | "results": null, 99 | "title": "The metabolomics workbench file status website: a metadata repository promoting FAIR principles of metabolomics data" 100 | }, 101 | "https://doi.org/10.1371/journal.pone.0277834": { 102 | "PMCID": null, 103 | "abstract": null, 104 | "authors": [ 105 | { 106 | "ORCID": "0000-0003-3995-5368", 107 | "affiliation": "kentucky", 108 | "author_id": "Hunter Moseley", 109 | "firstname": "Hunter", 110 | "initials": null, 111 | "lastname": "Moseley" 112 | } 113 | ], 114 | "conclusions": null, 115 | "copyrights": null, 116 | "doi": "10.1371/journal.pone.0277834", 117 | "grants": [], 118 | "journal": null, 119 | "keywords": null, 120 | "methods": null, 121 | "publication_date": { 122 | "day": 18, 123 | "month": 11, 124 | "year": 2022 125 | }, 126 | "pubmed_id": null, 127 | "queried_sources": [ 128 | "ORCID" 129 | ], 130 | "references": [], 131 | "results": null, 132 | "title": "Academic Tracker: Software for tracking and reporting publications associated with authors and grants" 133 | }, 134 | "https://doi.org/10.3390/metabo12060515": { 135 | "PMCID": null, 136 | "abstract": null, 137 | "authors": [ 138 | { 139 | "ORCID": "0000-0003-3995-5368", 140 | "affiliation": "kentucky", 141 | "author_id": "Hunter Moseley", 142 | "firstname": "Hunter", 143 | "initials": null, 144 | "lastname": "Moseley" 145 | } 146 | ], 147 | "conclusions": null, 148 | "copyrights": null, 149 | "doi": "10.3390/metabo12060515", 150 | "grants": [], 151 | "journal": null, 152 | "keywords": null, 153 | "methods": null, 154 | "publication_date": { 155 | "day": 2, 156 | "month": 6, 157 | "year": 2022 158 | }, 159 | "pubmed_id": null, 160 | "queried_sources": [ 161 | "ORCID" 162 | ], 163 | "references": [], 164 | "results": null, 165 | "title": "Scan-Centric, Frequency-Based Method for Characterizing Peaks from Direct Injection Fourier Transform Mass Spectrometry Experiments" 166 | }, 167 | "https://doi.org/10.3390/metabo13020215": { 168 | "PMCID": null, 169 | "abstract": null, 170 | "authors": [ 171 | { 172 | "ORCID": "0000-0003-3995-5368", 173 | "affiliation": "kentucky", 174 | "author_id": "Hunter Moseley", 175 | "firstname": "Hunter", 176 | "initials": null, 177 | "lastname": "Moseley" 178 | } 179 | ], 180 | "conclusions": null, 181 | "copyrights": null, 182 | "doi": "10.3390/metabo13020215", 183 | "grants": [], 184 | "journal": null, 185 | "keywords": null, 186 | "methods": null, 187 | "publication_date": { 188 | "day": 1, 189 | "month": 2, 190 | "year": 2023 191 | }, 192 | "pubmed_id": null, 193 | "queried_sources": [ 194 | "ORCID" 195 | ], 196 | "references": [], 197 | "results": null, 198 | "title": "Bilirubin Nanoparticle Treatment in Obese Mice Inhibits Hepatic Ceramide Production and Remodels Liver Fat Content" 199 | }, 200 | "https://doi.org/10.3390/metabo13070842": { 201 | "PMCID": null, 202 | "abstract": null, 203 | "authors": [ 204 | { 205 | "ORCID": "0000-0003-3995-5368", 206 | "affiliation": "kentucky", 207 | "author_id": "Hunter Moseley", 208 | "firstname": "Hunter", 209 | "initials": null, 210 | "lastname": "Moseley" 211 | } 212 | ], 213 | "conclusions": null, 214 | "copyrights": null, 215 | "doi": "10.3390/metabo13070842", 216 | "grants": [], 217 | "journal": null, 218 | "keywords": null, 219 | "methods": null, 220 | "publication_date": { 221 | "day": 12, 222 | "month": 7, 223 | "year": 2023 224 | }, 225 | "pubmed_id": null, 226 | "queried_sources": [ 227 | "ORCID" 228 | ], 229 | "references": [], 230 | "results": null, 231 | "title": "MESSES: Software for Transforming Messy Research Datasets into Clean Submissions to Metabolomics Workbench for Public Sharing" 232 | } 233 | } -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/all/matching_key_for_citation1.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | null, 4 | "https://doi.org/10.1007/978-1-4939-1258-2_11" 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/all/matching_key_for_citation2.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | "https://doi.org/10.1186/1471-2105-15-s10-p36", 4 | null 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/all/matching_key_for_citation3.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | null, 4 | "https://doi.org/10.1007/978-1-4939-1258-2_11" 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/all/matching_key_for_citation4.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | "https://doi.org/10.1186/1471-2105-15-s10-p36", 4 | null 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/all/tokenized_reference.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": null, 4 | "PMID": null, 5 | "authors": [ 6 | { 7 | "first": "William", 8 | "last": "Carreer", 9 | "middle": "J." 10 | }, 11 | { 12 | "first": "Robert", 13 | "last": "Flight", 14 | "middle": "M." 15 | }, 16 | { 17 | "first": "Hunter", 18 | "last": "Moseley", 19 | "middle": "N.B." 20 | } 21 | ], 22 | "pub_dict_key": "https://doi.org/10.3390/metabo3040853", 23 | "reference_line": "William J. Carreer, Robert M. Flight, and Hunter N.B. Moseley. \"A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets\" Metabolites , v.3 , 2013 , p.853", 24 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets" 25 | }, 26 | { 27 | "DOI": null, 28 | "PMID": null, 29 | "authors": [ 30 | { 31 | "first": "Joshua", 32 | "last": "Mitchell", 33 | "middle": "M." 34 | }, 35 | { 36 | "first": "Teresa", 37 | "last": "Fan", 38 | "middle": "W-.M." 39 | }, 40 | { 41 | "first": "Andrew", 42 | "last": "Lane", 43 | "middle": "N." 44 | }, 45 | { 46 | "first": "Hunter", 47 | "last": "Moseley", 48 | "middle": "N.B." 49 | } 50 | ], 51 | "pub_dict_key": "https://doi.org/10.1186/1471-2105-15-s10-p36", 52 | "reference_line": "Joshua M. Mitchell, Teresa W-.M. Fan, Andrew N. Lane, and Hunter N.B. Moseley \"Development of Large-Scale Metabolite Identification Methods for Metabolomics\" Frontiers in Genetics - Systems Biology , v.5 , 2014 , p.237 10.3389/fgene.2014.00237", 53 | "title": "Development of Large-Scale Metabolite Identification Methods for Metabolomics" 54 | }, 55 | { 56 | "DOI": null, 57 | "PMID": null, 58 | "authors": [ 59 | { 60 | "first": "Richard", 61 | "last": "Higashi", 62 | "middle": "M." 63 | }, 64 | { 65 | "first": "Teresa", 66 | "last": "Fan", 67 | "middle": "W-M." 68 | }, 69 | { 70 | "first": "Pawel", 71 | "last": "Lorkiewicz", 72 | "middle": "K." 73 | }, 74 | { 75 | "first": "Hunter", 76 | "last": "Moseley", 77 | "middle": "N.B." 78 | }, 79 | { 80 | "first": "Andrew", 81 | "last": "Lane", 82 | "middle": "N." 83 | } 84 | ], 85 | "pub_dict_key": "https://doi.org/10.1007/978-1-4939-1258-2_11", 86 | "reference_line": "Richard M. Higashi, Teresa W-M. Fan, Pawel K. Lorkiewicz, Hunter N.B. Moseley, Andrew N. Lane \"Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS\" Mass Spectrometry Methods in Metabolomics , v.53 , 2015 , p.337 10.1002/mrc.4199", 87 | "title": "Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS" 88 | } 89 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/no_Crossref/matching_key_for_citation1.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | null, 4 | "https://doi.org/10.1007/978-1-4939-1258-2_11" 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/no_Crossref/matching_key_for_citation2.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | null, 4 | "https://doi.org/10.1007/978-1-4939-1258-2_11" 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/no_Crossref/tokenized_reference.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": null, 4 | "PMID": null, 5 | "authors": [ 6 | { 7 | "first": "William", 8 | "last": "Carreer", 9 | "middle": "J." 10 | }, 11 | { 12 | "first": "Robert", 13 | "last": "Flight", 14 | "middle": "M." 15 | }, 16 | { 17 | "first": "Hunter", 18 | "last": "Moseley", 19 | "middle": "N.B." 20 | } 21 | ], 22 | "pub_dict_key": "https://doi.org/10.3390/metabo3040853", 23 | "reference_line": "William J. Carreer, Robert M. Flight, and Hunter N.B. Moseley. \"A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets\" Metabolites , v.3 , 2013 , p.853", 24 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets" 25 | }, 26 | { 27 | "DOI": null, 28 | "PMID": null, 29 | "authors": [ 30 | { 31 | "first": "Joshua", 32 | "last": "Mitchell", 33 | "middle": "M." 34 | }, 35 | { 36 | "first": "Teresa", 37 | "last": "Fan", 38 | "middle": "W-.M." 39 | }, 40 | { 41 | "first": "Andrew", 42 | "last": "Lane", 43 | "middle": "N." 44 | }, 45 | { 46 | "first": "Hunter", 47 | "last": "Moseley", 48 | "middle": "N.B." 49 | } 50 | ], 51 | "pub_dict_key": "", 52 | "reference_line": "Joshua M. Mitchell, Teresa W-.M. Fan, Andrew N. Lane, and Hunter N.B. Moseley \"Development of Large-Scale Metabolite Identification Methods for Metabolomics\" Frontiers in Genetics - Systems Biology , v.5 , 2014 , p.237 10.3389/fgene.2014.00237", 53 | "title": "Development of Large-Scale Metabolite Identification Methods for Metabolomics" 54 | }, 55 | { 56 | "DOI": null, 57 | "PMID": null, 58 | "authors": [ 59 | { 60 | "first": "Richard", 61 | "last": "Higashi", 62 | "middle": "M." 63 | }, 64 | { 65 | "first": "Teresa", 66 | "last": "Fan", 67 | "middle": "W-M." 68 | }, 69 | { 70 | "first": "Pawel", 71 | "last": "Lorkiewicz", 72 | "middle": "K." 73 | }, 74 | { 75 | "first": "Hunter", 76 | "last": "Moseley", 77 | "middle": "N.B." 78 | }, 79 | { 80 | "first": "Andrew", 81 | "last": "Lane", 82 | "middle": "N." 83 | } 84 | ], 85 | "pub_dict_key": "https://doi.org/10.1007/978-1-4939-1258-2_11", 86 | "reference_line": "Richard M. Higashi, Teresa W-M. Fan, Pawel K. Lorkiewicz, Hunter N.B. Moseley, Andrew N. Lane \"Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS\" Mass Spectrometry Methods in Metabolomics , v.53 , 2015 , p.337 10.1002/mrc.4199", 87 | "title": "Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS" 88 | } 89 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/no_PubMed/matching_key_for_citation1.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | "https://doi.org/10.1186/1471-2105-15-s10-p36", 4 | null 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/no_PubMed/matching_key_for_citation2.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | "https://doi.org/10.1186/1471-2105-15-s10-p36", 4 | null 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/intermediate_results/ref_search/no_PubMed/tokenized_reference.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": null, 4 | "PMID": null, 5 | "authors": [ 6 | { 7 | "first": "William", 8 | "last": "Carreer", 9 | "middle": "J." 10 | }, 11 | { 12 | "first": "Robert", 13 | "last": "Flight", 14 | "middle": "M." 15 | }, 16 | { 17 | "first": "Hunter", 18 | "last": "Moseley", 19 | "middle": "N.B." 20 | } 21 | ], 22 | "pub_dict_key": "https://doi.org/10.3390/metabo3040853", 23 | "reference_line": "William J. Carreer, Robert M. Flight, and Hunter N.B. Moseley. \"A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets\" Metabolites , v.3 , 2013 , p.853", 24 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets" 25 | }, 26 | { 27 | "DOI": null, 28 | "PMID": null, 29 | "authors": [ 30 | { 31 | "first": "Joshua", 32 | "last": "Mitchell", 33 | "middle": "M." 34 | }, 35 | { 36 | "first": "Teresa", 37 | "last": "Fan", 38 | "middle": "W-.M." 39 | }, 40 | { 41 | "first": "Andrew", 42 | "last": "Lane", 43 | "middle": "N." 44 | }, 45 | { 46 | "first": "Hunter", 47 | "last": "Moseley", 48 | "middle": "N.B." 49 | } 50 | ], 51 | "pub_dict_key": "https://doi.org/10.1186/1471-2105-15-s10-p36", 52 | "reference_line": "Joshua M. Mitchell, Teresa W-.M. Fan, Andrew N. Lane, and Hunter N.B. Moseley \"Development of Large-Scale Metabolite Identification Methods for Metabolomics\" Frontiers in Genetics - Systems Biology , v.5 , 2014 , p.237 10.3389/fgene.2014.00237", 53 | "title": "Development of Large-Scale Metabolite Identification Methods for Metabolomics" 54 | }, 55 | { 56 | "DOI": null, 57 | "PMID": null, 58 | "authors": [ 59 | { 60 | "first": "Richard", 61 | "last": "Higashi", 62 | "middle": "M." 63 | }, 64 | { 65 | "first": "Teresa", 66 | "last": "Fan", 67 | "middle": "W-M." 68 | }, 69 | { 70 | "first": "Pawel", 71 | "last": "Lorkiewicz", 72 | "middle": "K." 73 | }, 74 | { 75 | "first": "Hunter", 76 | "last": "Moseley", 77 | "middle": "N.B." 78 | }, 79 | { 80 | "first": "Andrew", 81 | "last": "Lane", 82 | "middle": "N." 83 | } 84 | ], 85 | "pub_dict_key": "", 86 | "reference_line": "Richard M. Higashi, Teresa W-M. Fan, Pawel K. Lorkiewicz, Hunter N.B. Moseley, Andrew N. Lane \"Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS\" Mass Spectrometry Methods in Metabolomics , v.53 , 2015 , p.337 10.1002/mrc.4199", 87 | "title": "Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS" 88 | } 89 | ] -------------------------------------------------------------------------------- /tests/testing_files/parse_citations_test.txt: -------------------------------------------------------------------------------- 1 | Carreer, William J., Robert M. Flight, and Hunter NB Moseley. "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets." Metabolites 3.4 (2013): 853-866. PMID: 24404440 2 | 3 | Carreer, W. J., Flight, R. M., & Moseley, H. N. (2013). A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets. Metabolites, 3(4), 853-866. DOI: 10.3390/metabo3040853 4 | 5 | Carreer, William J., Robert M. Flight, and Hunter NB Moseley. "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets." Metabolites 3, no. 4 (2013): 853-866. DOI: https://doi.org/10.3390/metabo3040853 6 | 7 | Carreer, W.J., Flight, R.M. and Moseley, H.N., 2013. A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets. Metabolites, 3(4), pp.853-866. PMID: 24404440, DOI: 10.3390/metabo3040853 8 | 9 | Carreer WJ, Flight RM, Moseley HN. A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets. Metabolites. 2013 Dec;3(4):853-66. -------------------------------------------------------------------------------- /tests/testing_files/project_report.txt: -------------------------------------------------------------------------------- 1 | Kelly Pennell: 2 | Hunter Moseley: 3 | Plk1 phosphorylation of PHGDH to regulate serine metabolism Hunter Moseley None Found 4 | Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents. Justin F Creeden, Zachary A Kipp, Mei Xu, Robert M Flight, Hunter N B Moseley, Genesee J Martinez, Wang-Hsin Lee, Khaled Alganem, Ali S Imami, Megan R McMullen, Sanjoy Roychowdhury, Atta M Nawabi, Jennifer A Hipp, Samir Softic, Steven A Weinman, Robert McCullumsmith, Laura E Nagy, Terry D Hinds R01 MH121102, R01 AG057598, R01 DK121797, R01 MH107487, P30 CA177558, P50 AA024333 5 | Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell P42 ES007380, 2020026 6 | Travis Thompson: 7 | Identifying and sharing per-and polyfluoroalkyl substances hot-spot areas and exposures in drinking water. Sweta Ojha, P Travis Thompson, Christian D Powell, Hunter N B Moseley, Kelly G Pennell P42 ES007380, 2020026 8 | -------------------------------------------------------------------------------- /tests/testing_files/pub_dict_from_PMID.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.1016/j.chroma.2021.462426": { 3 | "PMCID": null, 4 | "abstract": "We developed and validated a method for direct determination of per- and polyfluoroalkylated substances (PFASs) in environmental water samples without prior sample concentration. Samples are centrifuged and supernatants passed through an Acrodisc Filter (GXF/GHP 0.2\u00a0\u00a0um, 25\u00a0\u00a0mm diameter). After addition of ammonium acetate, samples are analyzed by UPLC-MS/MS using an AB Sciex 6500 plus Q-Trap mass spectrometer operated in negative multiple reaction-monitoring (MRM) mode. The instrument system incorporates a delay column between the pumps and autosampler to mitigate interference from background PFAS. The method monitors eight short-/long-chain PFAS which are identified by monitoring specific precursor product ion pairs and by their retention times and quantified using isotope mass-labeled internal standard based calibration plots. Average spiked recoveries (n\u00a0=\u00a08) of target analytes ranged from 84 to 110% with 4-9% relative standard deviation (RSD). The mean spiked recoveries (n\u00a0=\u00a08) of four surrogates were 94-106% with 3-8% RSD. For continuous calibration verification (CCV), average spiked recoveries (n\u00a0=\u00a08) for target analytes ranged from 88 to 114% with 4-11% RSD and for surrogates ranged from 104-112% with 3-11% RSD. The recoveries (n\u00a0=\u00a06) of matrix spike (MX), matrix spike duplicate (MXD), and field reagent blank (FRB) met our acceptance criteria. The limit of detection for the target analytes was between 0.007 and 0.04\u00a0ng/mL. The method was used to measure PFAS in tap water and surface water.", 5 | "authors": [ 6 | { 7 | "ORCID": null, 8 | "affiliation": "Superfund Research Center, University of Kentucky, Lexington KY, 40506, United States; Center for Appalachian Research in Environmental Sciences, University of Kentucky, Lexington KY, 40506, United States; Division of Cardiovascular, Medicine, College of Medicine, University of Kentucky and Lexington VA Medical Center, Lexington, KY, 40536, United States. a.j.morris@uky.edu; Pressent address: Institute of Drug & Biotherapeutic Innovation, DRC, 1100 South Grand Blvd, Saint Louis University, Saint Louis, MO 63104 United States. Electronic address: m.a.mottaleb@uky.edu.", 9 | "author_id": null, 10 | "firstname": "M Abdul", 11 | "initials": "MA", 12 | "lastname": "Mottaleb" 13 | }, 14 | { 15 | "ORCID": null, 16 | "affiliation": "Department of Biology, College of Arts and Sciences, Kent State University, Kent, OH, 44242, United States. Electronic address: qding@kent.edu.", 17 | "author_id": null, 18 | "firstname": "Qunxing X", 19 | "initials": "QX", 20 | "lastname": "Ding" 21 | }, 22 | { 23 | "ORCID": null, 24 | "affiliation": "Superfund Research Center, University of Kentucky, Lexington KY, 40506, United States; Center for Appalachian Research in Environmental Sciences, University of Kentucky, Lexington KY, 40506, United States; Department of Civil Engineering, College of Engineering, University of Kentucky, Lexington KY, 40506, United States. Electronic address: kellypennell@uky.edu.", 25 | "author_id": null, 26 | "firstname": "Kelly G", 27 | "initials": "KG", 28 | "lastname": "Pennell" 29 | }, 30 | { 31 | "ORCID": null, 32 | "affiliation": "Superfund Research Center, University of Kentucky, Lexington KY, 40506, United States; Center for Appalachian Research in Environmental Sciences, University of Kentucky, Lexington KY, 40506, United States; Department of Epidemiology, College of Public Health, University of Kentucky, Lexington KY, 40536, United States. Electronic address: erin.haynes@uky.edu.", 33 | "author_id": null, 34 | "firstname": "Erin N", 35 | "initials": "EN", 36 | "lastname": "Haynes" 37 | }, 38 | { 39 | "ORCID": null, 40 | "affiliation": "Superfund Research Center, University of Kentucky, Lexington KY, 40506, United States; Center for Appalachian Research in Environmental Sciences, University of Kentucky, Lexington KY, 40506, United States; Division of Cardiovascular, Medicine, College of Medicine, University of Kentucky and Lexington VA Medical Center, Lexington, KY, 40536, United States. a.j.morris@uky.edu; Pressent address: Institute of Drug & Biotherapeutic Innovation, DRC, 1100 South Grand Blvd, Saint Louis University, Saint Louis, MO 63104 United States. Electronic address: a.j.morris@uky.edu.", 41 | "author_id": null, 42 | "firstname": "Andrew J", 43 | "initials": "AJ", 44 | "lastname": "Morris" 45 | } 46 | ], 47 | "conclusions": null, 48 | "copyrights": "Copyright \u00a9 2021. Published by Elsevier B.V.", 49 | "doi": "10.1016/j.chroma.2021.462426", 50 | "grants": [ 51 | "P30 ES026529" 52 | ], 53 | "journal": "Journal of chromatography. A", 54 | "keywords": [ 55 | "Acrodisc filtration", 56 | "Direct injection", 57 | "Drinking and surface water", 58 | "PFAS", 59 | "UPLC-MS/MS" 60 | ], 61 | "methods": null, 62 | "publication_date": { 63 | "day": 6, 64 | "month": 8, 65 | "year": 2021 66 | }, 67 | "pubmed_id": "34352431", 68 | "references": [], 69 | "results": null, 70 | "title": "Direct injection analysis of per and polyfluoroalkyl substances in surface and drinking water by sample filtration and liquid chromatography-tandem mass spectrometry." 71 | } 72 | } -------------------------------------------------------------------------------- /tests/testing_files/pubs_by_author_dict.json: -------------------------------------------------------------------------------- 1 | { 2 | "Hunter Moseley": { 3 | "https://aacrjournals.org/cancerres/article/83/7_Supplement/3673/719740": [], 4 | "https://chemrxiv.org/engage/chemrxiv/article-details/62da093f13e3659590e0d5eb": [], 5 | "https://doi.org/10.1002/hep.32467": [ 6 | "R01 MH121102", 7 | "R01 AG057598", 8 | "R01 DK121797", 9 | "R01 MH107487", 10 | "P30 CA177558", 11 | "P50 AA024333" 12 | ], 13 | "https://doi.org/10.1038/s41467-023-35784-x": [ 14 | "133123-RSG-19-081-01-TBG", 15 | "GM121327", 16 | "P30 CA177558", 17 | "R01 CA237643", 18 | "ES007266-30" 19 | ], 20 | "https://doi.org/10.1038/s41597-023-02277-x": [ 21 | "P42 ES007380", 22 | "2020026" 23 | ], 24 | "https://doi.org/10.1038/s41597-023-02281-1": [ 25 | "P42 ES007380", 26 | "2020026" 27 | ], 28 | "https://doi.org/10.1186/s12859-023-05208-0": [ 29 | "2020026", 30 | "R03OD030603" 31 | ], 32 | "https://doi.org/10.1186/s12859-023-05423-9": [ 33 | "P42 ES007380", 34 | "2020026", 35 | "CF R03OD030603" 36 | ], 37 | "https://doi.org/10.1289/ehp11484": [], 38 | "https://doi.org/10.1371/journal.pone.0277834": [ 39 | "2020026", 40 | "P42 ES007380", 41 | "U54 TR001998-05A1" 42 | ], 43 | "https://doi.org/10.3390/metabo12060515": [ 44 | "2020026", 45 | "1P01CA163223-01A1", 46 | "1U24DK097215-01A1", 47 | "P01 CA163223", 48 | "P30 CA177558", 49 | "P30 CA177558", 50 | "U24 DK097215", 51 | "1419282" 52 | ], 53 | "https://doi.org/10.3390/metabo13020215": [ 54 | "P30 GM127211", 55 | "P20 GM104357", 56 | "R01DK121797", 57 | "P20GM104357", 58 | "R01DK126884" 59 | ], 60 | "https://doi.org/10.3390/metabo13070842": [ 61 | "P42 ES007380", 62 | "P42ES007380", 63 | "2020026" 64 | ], 65 | "https://www.biorxiv.org/content/10.1101/2022.02.24.481854.abstract": [], 66 | "https://www.biorxiv.org/content/10.1101/2022.12.08.519680.abstract": [] 67 | } 68 | } -------------------------------------------------------------------------------- /tests/testing_files/pubs_by_author_dict_truncated.json: -------------------------------------------------------------------------------- 1 | { 2 | "Hunter Moseley": { 3 | "https://aacrjournals.org/cancerres/article/83/7_Supplement/3673/719740": [], 4 | "https://doi.org/10.1002/hep.32467": [ 5 | "R01 MH121102", 6 | "R01 AG057598", 7 | "R01 DK121797", 8 | "R01 MH107487", 9 | "P30 CA177558", 10 | "P50 AA024333" 11 | ], 12 | "https://doi.org/10.1038/s41597-023-02277-x": [ 13 | "P42 ES007380", 14 | "2020026" 15 | ] 16 | } 17 | } -------------------------------------------------------------------------------- /tests/testing_files/pymed_pubs.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/academic_tracker/eee61cb899ce190edef0e882a11dfedf19bae263/tests/testing_files/pymed_pubs.pkl -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_Crossref_keys_for_citations.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo11030163", 3 | "https://doi.org/10.3390/metabo10090368", 4 | null 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_gen_reports_test_pub_dict.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.3390/metabo11030163": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "affiliation": null, 8 | "firstname": "Christian D.", 9 | "initials": null, 10 | "lastname": "Powell", 11 | "ORCID": null, 12 | "author_id": null 13 | }, 14 | { 15 | "affiliation": null, 16 | "firstname": "Hunter N.B.", 17 | "initials": null, 18 | "lastname": "Moseley", 19 | "ORCID": null, 20 | "author_id": null 21 | } 22 | ], 23 | "conclusions": null, 24 | "copyrights": null, 25 | "doi": "10.3390/metabo11030163", 26 | "grants": null, 27 | "journal": "MDPI AG", 28 | "keywords": null, 29 | "methods": null, 30 | "publication_date": { 31 | "day": null, 32 | "month": null, 33 | "year": 2021 34 | }, 35 | "pubmed_id": null, 36 | "queried_sources": ["PubMed"], 37 | "references": [], 38 | "results": null, 39 | "title": "The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository" 40 | }, 41 | "1234": { 42 | "PMCID": null, 43 | "abstract": null, 44 | "authors": [ 45 | { 46 | "affiliation": null, 47 | "firstname": "Huan", 48 | "initials": null, 49 | "lastname": "Jin", 50 | "ORCID": null, 51 | "author_id": null 52 | }, 53 | { 54 | "affiliation": null, 55 | "firstname": "Joshua M.", 56 | "initials": null, 57 | "lastname": "Mitchell", 58 | "ORCID": null, 59 | "author_id": null 60 | }, 61 | { 62 | "affiliation": null, 63 | "firstname": "Hunter N. B.", 64 | "initials": null, 65 | "lastname": "Moseley", 66 | "ORCID": null, 67 | "author_id": null 68 | } 69 | ], 70 | "conclusions": null, 71 | "copyrights": null, 72 | "doi": "10.3390/metabo10090368", 73 | "grants": null, 74 | "journal": "MDPI AG", 75 | "keywords": null, 76 | "methods": null, 77 | "publication_date": { 78 | "day": null, 79 | "month": null, 80 | "year": 2020 81 | }, 82 | "pubmed_id": null, 83 | "queried_sources": ["PubMed"], 84 | "references": [], 85 | "results": null, 86 | "title": "Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases" 87 | }, 88 | "Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases.": { 89 | "PMCID": null, 90 | "abstract": null, 91 | "authors": [ 92 | { 93 | "affiliation": null, 94 | "firstname": "Huan", 95 | "initials": null, 96 | "lastname": "Jin", 97 | "ORCID": null, 98 | "author_id": null 99 | }, 100 | { 101 | "affiliation": null, 102 | "firstname": "Joshua M.", 103 | "initials": null, 104 | "lastname": "Mitchell", 105 | "ORCID": null, 106 | "author_id": null 107 | }, 108 | { 109 | "affiliation": null, 110 | "firstname": "Hunter N. B.", 111 | "initials": null, 112 | "lastname": "Moseley", 113 | "ORCID": null, 114 | "author_id": null 115 | } 116 | ], 117 | "conclusions": null, 118 | "copyrights": null, 119 | "doi": "10.3390/metabo10090368", 120 | "grants": null, 121 | "journal": "MDPI AG", 122 | "keywords": null, 123 | "methods": null, 124 | "publication_date": { 125 | "day": null, 126 | "month": null, 127 | "year": 2020 128 | }, 129 | "pubmed_id": null, 130 | "queried_sources": ["PubMed"], 131 | "references": [], 132 | "results": null, 133 | "title": "Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases" 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_keys_for_citations.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo11030163", 3 | "https://doi.org/10.3390/metabo10090368", 4 | null 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_keys_for_citations_Crossref_duplicate_citation.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | "https://doi.org/10.3390/metabo3040853" 4 | ] -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_keys_for_citations_Crossref_merge.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | "https://doi.org/10.1186/1471-2105-15-s10-p36", 4 | null 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_keys_for_citations_PubMed_duplicate_citation.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | "https://doi.org/10.3390/metabo3040853" 4 | ] -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_keys_for_citations_PubMed_merge.json: -------------------------------------------------------------------------------- 1 | [ 2 | "https://doi.org/10.3390/metabo3040853", 3 | null, 4 | "https://doi.org/10.1007/978-1-4939-1258-2_11" 5 | ] -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_publication_dict_Crossref_duplicate_citation.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.3390/metabo3040853": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "ORCID": null, 8 | "affiliation": null, 9 | "author_id": null, 10 | "firstname": "William", 11 | "initials": null, 12 | "lastname": "Carreer" 13 | }, 14 | { 15 | "ORCID": null, 16 | "affiliation": null, 17 | "author_id": null, 18 | "firstname": "Robert", 19 | "initials": null, 20 | "lastname": "Flight" 21 | }, 22 | { 23 | "ORCID": null, 24 | "affiliation": null, 25 | "author_id": null, 26 | "firstname": "Hunter", 27 | "initials": null, 28 | "lastname": "Moseley" 29 | } 30 | ], 31 | "conclusions": null, 32 | "copyrights": null, 33 | "doi": "10.3390/metabo3040853", 34 | "grants": [], 35 | "journal": "MDPI AG", 36 | "keywords": null, 37 | "methods": null, 38 | "publication_date": { 39 | "day": 25, 40 | "month": 9, 41 | "year": 2013 42 | }, 43 | "pubmed_id": null, 44 | "queried_sources": [ 45 | "Crossref" 46 | ], 47 | "references": [ 48 | { 49 | "PMCID": null, 50 | "citation": null, 51 | "doi": "10.1016/S0021-9258(18)74342-1", 52 | "pubmed_id": null, 53 | "title": "Deuterium as an indicator in the study of intermediary metabolism" 54 | }, 55 | { 56 | "PMCID": null, 57 | "citation": null, 58 | "doi": "10.1152/physrev.1940.20.2.218", 59 | "pubmed_id": null, 60 | "title": "The study of intermediary metabolism of animals with the aid of isotopes" 61 | }, 62 | { 63 | "PMCID": null, 64 | "citation": null, 65 | "doi": "10.1016/S0021-9258(18)75075-8", 66 | "pubmed_id": null, 67 | "title": "Deuterium as an indicator in the study of intermediary metabolism" 68 | }, 69 | { 70 | "PMCID": null, 71 | "citation": null, 72 | "doi": "10.2174/1568009033481769", 73 | "pubmed_id": null, 74 | "title": null 75 | }, 76 | { 77 | "PMCID": null, 78 | "citation": null, 79 | "doi": "10.1186/1476-4598-8-41", 80 | "pubmed_id": null, 81 | "title": null 82 | }, 83 | { 84 | "PMCID": null, 85 | "citation": null, 86 | "doi": "10.1016/j.aca.2009.08.032", 87 | "pubmed_id": null, 88 | "title": null 89 | }, 90 | { 91 | "PMCID": null, 92 | "citation": null, 93 | "doi": "10.1186/1471-2105-11-139", 94 | "pubmed_id": null, 95 | "title": null 96 | }, 97 | { 98 | "PMCID": null, 99 | "citation": null, 100 | "doi": "10.1021/ac061906b", 101 | "pubmed_id": null, 102 | "title": null 103 | }, 104 | { 105 | "PMCID": null, 106 | "citation": null, 107 | "doi": "10.1186/1741-7007-9-37", 108 | "pubmed_id": null, 109 | "title": null 110 | }, 111 | { 112 | "PMCID": null, 113 | "citation": null, 114 | "doi": "10.1021/bp000058h", 115 | "pubmed_id": null, 116 | "title": null 117 | }, 118 | { 119 | "PMCID": null, 120 | "citation": null, 121 | "doi": "10.1046/j.1432-1033.2003.03448.x", 122 | "pubmed_id": null, 123 | "title": null 124 | }, 125 | { 126 | "PMCID": null, 127 | "citation": null, 128 | "doi": null, 129 | "pubmed_id": null, 130 | "title": "Mass isotopomer distribution analysis at eight years: Theoretical, analytic, and experimental considerations" 131 | }, 132 | { 133 | "PMCID": null, 134 | "citation": null, 135 | "doi": "10.1002/bms.1200200804", 136 | "pubmed_id": null, 137 | "title": null 138 | }, 139 | { 140 | "PMCID": null, 141 | "citation": null, 142 | "doi": null, 143 | "pubmed_id": null, 144 | "title": "Efficient calculation of exact mass isotopic distributions" 145 | }, 146 | { 147 | "PMCID": null, 148 | "citation": null, 149 | "doi": "10.1002/bit.10393", 150 | "pubmed_id": null, 151 | "title": null 152 | }, 153 | { 154 | "PMCID": null, 155 | "citation": null, 156 | "doi": "10.1002/bit.10909", 157 | "pubmed_id": null, 158 | "title": null 159 | }, 160 | { 161 | "PMCID": null, 162 | "citation": null, 163 | "doi": null, 164 | "pubmed_id": null, 165 | "title": "An automated method for the analysis of stable isotope labeling data in proteomics" 166 | }, 167 | { 168 | "PMCID": null, 169 | "citation": null, 170 | "doi": "10.1002/(SICI)1096-9888(199603)31:3<255::AID-JMS290>3.0.CO;2-3", 171 | "pubmed_id": null, 172 | "title": null 173 | }, 174 | { 175 | "PMCID": null, 176 | "citation": null, 177 | "doi": null, 178 | "pubmed_id": null, 179 | "title": "Efficient calculation of accurate masses of isotopic peaks" 180 | }, 181 | { 182 | "PMCID": null, 183 | "citation": null, 184 | "doi": "10.1021/ac951158i", 185 | "pubmed_id": null, 186 | "title": null 187 | }, 188 | { 189 | "PMCID": null, 190 | "citation": null, 191 | "doi": "10.1016/0020-7381(83)85053-0", 192 | "pubmed_id": null, 193 | "title": null 194 | }, 195 | { 196 | "PMCID": null, 197 | "citation": "The Python Programming Languagehttp://www.python.org/", 198 | "doi": null, 199 | "pubmed_id": null, 200 | "title": null 201 | }, 202 | { 203 | "PMCID": null, 204 | "citation": null, 205 | "doi": null, 206 | "pubmed_id": null, 207 | "title": "Python: A programming language for software integration and development" 208 | }, 209 | { 210 | "PMCID": null, 211 | "citation": null, 212 | "doi": null, 213 | "pubmed_id": null, 214 | "title": "A Guide to NumPy" 215 | }, 216 | { 217 | "PMCID": null, 218 | "citation": null, 219 | "doi": null, 220 | "pubmed_id": null, 221 | "title": "Design Patterns: Elements of Reusable Object-Oriented Software" 222 | }, 223 | { 224 | "PMCID": null, 225 | "citation": null, 226 | "doi": "10.5936/csbj.201301006", 227 | "pubmed_id": null, 228 | "title": "Error analysis and propagation in metabolomics data analysis" 229 | }, 230 | { 231 | "PMCID": null, 232 | "citation": "Moseley Bioinformatics Laboratory Software Repository for downloadhttp://bioinformatics.cesb.uky.edu/bin/view/Main/SoftwareDevelopment/", 233 | "doi": null, 234 | "pubmed_id": null, 235 | "title": null 236 | } 237 | ], 238 | "results": null, 239 | "title": "A Computational Framework for High-Throughput Isotopic Natural Abundance Correction of Omics-Level Ultra-High Resolution FT-MS Datasets" 240 | } 241 | } -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_publication_dict_Crossref_title_match.json: -------------------------------------------------------------------------------- 1 | { 2 | "https://doi.org/10.3390/metabo3040853": { 3 | "PMCID": null, 4 | "abstract": null, 5 | "authors": [ 6 | { 7 | "ORCID": null, 8 | "affiliation": null, 9 | "author_id": null, 10 | "firstname": "William", 11 | "initials": null, 12 | "lastname": "Carreer" 13 | }, 14 | { 15 | "ORCID": null, 16 | "affiliation": null, 17 | "author_id": null, 18 | "firstname": "Robert", 19 | "initials": null, 20 | "lastname": "Flight" 21 | }, 22 | { 23 | "ORCID": null, 24 | "affiliation": null, 25 | "author_id": null, 26 | "firstname": "Hunter", 27 | "initials": null, 28 | "lastname": "Moseley" 29 | } 30 | ], 31 | "conclusions": null, 32 | "copyrights": null, 33 | "doi": "10.3390/metabo3040853", 34 | "grants": [], 35 | "journal": "MDPI AG", 36 | "keywords": null, 37 | "methods": null, 38 | "publication_date": { 39 | "day": 25, 40 | "month": 9, 41 | "year": 2013 42 | }, 43 | "pubmed_id": null, 44 | "queried_sources": [ 45 | "Crossref" 46 | ], 47 | "references": [ 48 | { 49 | "PMCID": null, 50 | "citation": null, 51 | "doi": "10.1016/S0021-9258(18)74342-1", 52 | "pubmed_id": null, 53 | "title": "Deuterium as an indicator in the study of intermediary metabolism" 54 | }, 55 | { 56 | "PMCID": null, 57 | "citation": null, 58 | "doi": "10.1152/physrev.1940.20.2.218", 59 | "pubmed_id": null, 60 | "title": "The study of intermediary metabolism of animals with the aid of isotopes" 61 | }, 62 | { 63 | "PMCID": null, 64 | "citation": null, 65 | "doi": "10.1016/S0021-9258(18)75075-8", 66 | "pubmed_id": null, 67 | "title": "Deuterium as an indicator in the study of intermediary metabolism" 68 | }, 69 | { 70 | "PMCID": null, 71 | "citation": null, 72 | "doi": "10.2174/1568009033481769", 73 | "pubmed_id": null, 74 | "title": null 75 | }, 76 | { 77 | "PMCID": null, 78 | "citation": null, 79 | "doi": "10.1186/1476-4598-8-41", 80 | "pubmed_id": null, 81 | "title": null 82 | }, 83 | { 84 | "PMCID": null, 85 | "citation": null, 86 | "doi": "10.1016/j.aca.2009.08.032", 87 | "pubmed_id": null, 88 | "title": null 89 | }, 90 | { 91 | "PMCID": null, 92 | "citation": null, 93 | "doi": "10.1186/1471-2105-11-139", 94 | "pubmed_id": null, 95 | "title": null 96 | }, 97 | { 98 | "PMCID": null, 99 | "citation": null, 100 | "doi": "10.1021/ac061906b", 101 | "pubmed_id": null, 102 | "title": null 103 | }, 104 | { 105 | "PMCID": null, 106 | "citation": null, 107 | "doi": "10.1186/1741-7007-9-37", 108 | "pubmed_id": null, 109 | "title": null 110 | }, 111 | { 112 | "PMCID": null, 113 | "citation": null, 114 | "doi": "10.1021/bp000058h", 115 | "pubmed_id": null, 116 | "title": null 117 | }, 118 | { 119 | "PMCID": null, 120 | "citation": null, 121 | "doi": "10.1046/j.1432-1033.2003.03448.x", 122 | "pubmed_id": null, 123 | "title": null 124 | }, 125 | { 126 | "PMCID": null, 127 | "citation": null, 128 | "doi": null, 129 | "pubmed_id": null, 130 | "title": "Mass isotopomer distribution analysis at eight years: Theoretical, analytic, and experimental considerations" 131 | }, 132 | { 133 | "PMCID": null, 134 | "citation": null, 135 | "doi": "10.1002/bms.1200200804", 136 | "pubmed_id": null, 137 | "title": null 138 | }, 139 | { 140 | "PMCID": null, 141 | "citation": null, 142 | "doi": null, 143 | "pubmed_id": null, 144 | "title": "Efficient calculation of exact mass isotopic distributions" 145 | }, 146 | { 147 | "PMCID": null, 148 | "citation": null, 149 | "doi": "10.1002/bit.10393", 150 | "pubmed_id": null, 151 | "title": null 152 | }, 153 | { 154 | "PMCID": null, 155 | "citation": null, 156 | "doi": "10.1002/bit.10909", 157 | "pubmed_id": null, 158 | "title": null 159 | }, 160 | { 161 | "PMCID": null, 162 | "citation": null, 163 | "doi": null, 164 | "pubmed_id": null, 165 | "title": "An automated method for the analysis of stable isotope labeling data in proteomics" 166 | }, 167 | { 168 | "PMCID": null, 169 | "citation": null, 170 | "doi": "10.1002/(SICI)1096-9888(199603)31:3<255::AID-JMS290>3.0.CO;2-3", 171 | "pubmed_id": null, 172 | "title": null 173 | }, 174 | { 175 | "PMCID": null, 176 | "citation": null, 177 | "doi": null, 178 | "pubmed_id": null, 179 | "title": "Efficient calculation of accurate masses of isotopic peaks" 180 | }, 181 | { 182 | "PMCID": null, 183 | "citation": null, 184 | "doi": "10.1021/ac951158i", 185 | "pubmed_id": null, 186 | "title": null 187 | }, 188 | { 189 | "PMCID": null, 190 | "citation": null, 191 | "doi": "10.1016/0020-7381(83)85053-0", 192 | "pubmed_id": null, 193 | "title": null 194 | }, 195 | { 196 | "PMCID": null, 197 | "citation": "The Python Programming Languagehttp://www.python.org/", 198 | "doi": null, 199 | "pubmed_id": null, 200 | "title": null 201 | }, 202 | { 203 | "PMCID": null, 204 | "citation": null, 205 | "doi": null, 206 | "pubmed_id": null, 207 | "title": "Python: A programming language for software integration and development" 208 | }, 209 | { 210 | "PMCID": null, 211 | "citation": null, 212 | "doi": null, 213 | "pubmed_id": null, 214 | "title": "A Guide to NumPy" 215 | }, 216 | { 217 | "PMCID": null, 218 | "citation": null, 219 | "doi": null, 220 | "pubmed_id": null, 221 | "title": "Design Patterns: Elements of Reusable Object-Oriented Software" 222 | }, 223 | { 224 | "PMCID": null, 225 | "citation": null, 226 | "doi": "10.5936/csbj.201301006", 227 | "pubmed_id": null, 228 | "title": "Error analysis and propagation in metabolomics data analysis" 229 | }, 230 | { 231 | "PMCID": null, 232 | "citation": "Moseley Bioinformatics Laboratory Software Repository for downloadhttp://bioinformatics.cesb.uky.edu/bin/view/Main/SoftwareDevelopment/", 233 | "doi": null, 234 | "pubmed_id": null, 235 | "title": null 236 | } 237 | ], 238 | "results": null, 239 | "title": "A Computational Framework for High-Throughput Isotopic Natural Abundance Correction of Omics-Level Ultra-High Resolution FT-MS Datasets" 240 | } 241 | } -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_report_default.txt: -------------------------------------------------------------------------------- 1 | Reference Line: 2 | Jin H, Mitchell J, Moseley H. Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368. 3 | Tokenized Reference: 4 | Authors: Jin H, Mitchell J, Moseley H 5 | Title: Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. 6 | PMID: None 7 | DOI: 10.3390/metabo10090368 8 | Queried Information: 9 | DOI: 10.3390/metabo10090368 10 | PMID: None 11 | PMCID: None 12 | Grants: 1419282 13 | 14 | Reference Line: 15 | Powell C, Moseley H. The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository. Metabolites. 2021 March; 11(3):163-. doi: 10.3390/metabo11030163. 16 | Tokenized Reference: 17 | Authors: Powell C, Moseley H 18 | Title: The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository. 19 | PMID: None 20 | DOI: 10.3390/metabo11030163 21 | Queried Information: 22 | DOI: 10.3390/metabo11030163 23 | PMID: None 24 | PMCID: None 25 | Grants: P42ES007380, R03OD030603, 1419282, 2020026 26 | 27 | -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_report_tabular3.csv: -------------------------------------------------------------------------------- 1 | Authors,Grants,Abstract,Conclusions,Copyrights,DOI,Journal,Keywords,Methods,PMID,Results,Title,PMCID,Publication Year,Publication Month,Publication Day,Tok Title,Tok DOI,Tok PMID,Tok Authors,Ref Line,Comparison,First Author,Last Author,Pub_Authors 2 | "Huan Jin, Joshua M. Mitchell, Hunter N. B. Moseley",1419282,None,None,None,10.3390/metabo10090368,MDPI AG,None,None,None,None,Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases,None,2020,9,11,Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases.,10.3390/metabo10090368,None,"Jin H, Mitchell J, Moseley H","Jin H, Mitchell J, Moseley H. Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368.",N/A,"Jin, Huan","Moseley, Hunter N. B.","Jin, Huan None None" 3 | "Huan Jin, Joshua M. Mitchell, Hunter N. B. Moseley",1419282,None,None,None,10.3390/metabo10090368,MDPI AG,None,None,None,None,Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases,None,2020,9,11,Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases.,10.3390/metabo10090368,None,"Jin H, Mitchell J, Moseley H","Jin H, Mitchell J, Moseley H. Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368.",N/A,"Jin, Huan","Moseley, Hunter N. B.","Mitchell, Joshua M. None None" 4 | "Huan Jin, Joshua M. Mitchell, Hunter N. B. Moseley",1419282,None,None,None,10.3390/metabo10090368,MDPI AG,None,None,None,None,Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases,None,2020,9,11,Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases.,10.3390/metabo10090368,None,"Jin H, Mitchell J, Moseley H","Jin H, Mitchell J, Moseley H. Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368.",N/A,"Jin, Huan","Moseley, Hunter N. B.","Moseley, Hunter N. B. None None" 5 | "Christian D. Powell, Hunter N.B. Moseley","P42ES007380, R03OD030603, 1419282, 2020026",None,None,None,10.3390/metabo11030163,MDPI AG,None,None,None,None,"The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository",None,2021,3,12,"The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository.",10.3390/metabo11030163,None,"Powell C, Moseley H","Powell C, Moseley H. The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository. Metabolites. 2021 March; 11(3):163-. doi: 10.3390/metabo11030163.",N/A,"Powell, Christian D.","Moseley, Hunter N.B.","Powell, Christian D. None None" 6 | "Christian D. Powell, Hunter N.B. Moseley","P42ES007380, R03OD030603, 1419282, 2020026",None,None,None,10.3390/metabo11030163,MDPI AG,None,None,None,None,"The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository",None,2021,3,12,"The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository.",10.3390/metabo11030163,None,"Powell C, Moseley H","Powell C, Moseley H. The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository. Metabolites. 2021 March; 11(3):163-. doi: 10.3390/metabo11030163.",N/A,"Powell, Christian D.","Moseley, Hunter N.B.","Moseley, Hunter N.B. None None" 7 | -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_report_tabular4.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/academic_tracker/eee61cb899ce190edef0e882a11dfedf19bae263/tests/testing_files/ref_srch_report_tabular4.xlsx -------------------------------------------------------------------------------- /tests/testing_files/ref_srch_report_template_string.txt: -------------------------------------------------------------------------------- 1 | Authors: 2 | Grants: 3 | Abstract: 4 | Conclusions: 5 | Copyrights: 6 | DOI: 7 | Journal: 8 | Keywords: 9 | Methods: 10 | PMID: 11 | Results: 12 | Title: 13 | PMCID: <PMCID> 14 | Publication Year: <publication_year> 15 | Publication Month: <publication_month> 16 | Publication Day: <publication_day> 17 | Tok Title: <tok_title> 18 | Tok DOI: <tok_DOI> 19 | Tok PMID: <tok_PMID> 20 | Tok Authors: <tok_authors> 21 | Ref Line: <ref_line> 22 | Comparison: <is_in_comparison_file> 23 | First Author: <first_author> 24 | Last Author: <last_author> 25 | Pub_Authors: <pub_author_loop>\n<pub_author_last>, <pub_author_first> <pub_author_initials> <pub_author_affiliations></pub_author_loop> 26 | References: <reference_loop>\nCitation: <reference_citation>\nTitle: <reference_title>\nPMID: <reference_PMID>\nPMCID: <reference_PMCID>\nDOI: <reference_DOI>\n</reference_loop> 27 | 28 | </pub_loop> -------------------------------------------------------------------------------- /tests/testing_files/reference_test.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/academic_tracker/eee61cb899ce190edef0e882a11dfedf19bae263/tests/testing_files/reference_test.docx -------------------------------------------------------------------------------- /tests/testing_files/reference_test.txt: -------------------------------------------------------------------------------- 1 | William J. Carreer, Robert M. Flight, and Hunter N.B. Moseley. "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets" Metabolites , v.3 , 2013 , p.853 2 | Joshua M. Mitchell, Teresa W-.M. Fan, Andrew N. Lane, and Hunter N.B. Moseley "Development of Large-Scale Metabolite Identification Methods for Metabolomics" Frontiers in Genetics - Systems Biology , v.5 , 2014 , p.237 10.3389/fgene.2014.00237 3 | Richard M. Higashi, Teresa W-M. Fan, Pawel K. Lorkiewicz, Hunter N.B. Moseley, Andrew N. Lane "Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS" Mass Spectrometry Methods in Metabolomics , v.53 , 2015 , p.337 10.1002/mrc.4199 -------------------------------------------------------------------------------- /tests/testing_files/scholarly_DOIs.json: -------------------------------------------------------------------------------- 1 | { 2 | "Adipose-derived autotaxin regulates inflammation and steatosis associated with diet-induced obesity": "10.1371/journal.pone.0208099", 3 | "Antibodies against Lysophosphatidic acid protect against blast-induced ocular injuries": "10.3389/fneur.2020.611816", 4 | "Application of metabolomics to characterize environmental pollutant toxicity and disease risks": "10.1515/reveh-2019-0030", 5 | "Autotaxin impedes anti-tumor immunity by suppressing chemotaxis and tumor infiltration of CD8+ T cells": "10.1016/j.celrep.2021.110013", 6 | "Autotaxin inhibition reduces cardiac inflammation and mitigates adverse cardiac remodeling after myocardial infarction": "10.1016/j.yjmcc.2020.09.011", 7 | "Calcium ion chelation preserves platelet function during cold storage": "10.1161/atvbaha.120.314879", 8 | "Ceramide regulates interaction of HSD17B4 with PEX5 and function of peroxisomes in astrocytes": "10.1016/j.bbalip.2019.05.017", 9 | "Ceramide regulates interaction of Hsd17b4 with Pex5 and function of peroxisomes": "10.1016/j.bbalip.2019.05.017", 10 | "Chemical sympathectomy reduces peripheral inflammatory responses to acute and chronic sleep fragmentation": "10.1152/ajpregu.00358.2019", 11 | "Co-exposure to PCB126 and PFOS increases biomarkers associated with cardiovascular disease risk and liver injury in mice": "10.1016/j.taap.2020.115301", 12 | "Coronary Artery Disease Risk-Associated Plpp3 Gene and Its Product Lipid Phosphate Phosphatase 3 Regulate Experimental Atherosclerosis": "10.1161/atvbaha.119.313056", 13 | "Correction: Synaptic phospholipids as a new target for cortical hyperexcitability and E/I balance in psychiatric disorders": "10.1038/s41380-018-0320-1", 14 | "De novo fatty acid synthesis-driven sphingolipid metabolism promotes metastatic potential of colorectal cancer": "10.1158/1541-7786.mcr-18-0199", 15 | "Dietary inulin decreases circulating ceramides by suppressing neutral sphingomyelinase expression and activity in mice": "10.1194/jlr.ra119000346", 16 | "Direct injection analysis of per and polyfluoroalkyl substances in surface and drinking water by sample filtration and liquid chromatography-tandem mass spectrometry": "10.1016/j.chroma.2021.462426", 17 | "Discovery of glycerol phosphate modification on streptococcal rhamnose polysaccharides": "10.1038/s41589-019-0251-4", 18 | "Dual-Functional Phosphorene Nanocomposite Membranes for the Treatment of Perfluorinated Water: An Investigation of Perfluorooctanoic Acid Removal via Filtration Combined with \u2026": "10.3390/membranes11010018", 19 | "Effects of aryl hydrocarbon receptor deficiency on PCB-77-induced impairment of glucose homeostasis during weight loss in male and female obese mice": "10.1289/ehp4133", 20 | "Effects of diet and hyperlipidemia on levels and distribution of circulating lysophosphatidic acid": "10.1194/jlr.m093096", 21 | "Hepatic metabolomics reveals that liver injury increases PCB 126-induced oxidative stress and metabolic dysfunction": "10.1016/j.chemosphere.2018.10.196", 22 | "High-Throughput UHPLC-MS/MS Measurement of Per-and Poly-Fluorinated Alkyl Substances in Human Serum.": "10.1093/jat/bkz097", 23 | "High-throughput uhplc-ms/ms measurement of per-and poly-fluorinated alkyl substances in human serum": "10.1093/jat/bkz097", 24 | "LPA receptor 4 deficiency attenuates experimental atherosclerosis": "10.1194/jlr.m091066", 25 | "Local M-CSF (Macrophage Colony-Stimulating Factor) Expression Regulates Macrophage Proliferation and Apoptosis in Atherosclerosis": "10.1161/atvbaha.120.315255", 26 | "Mitoquinone mesylate (MitoQ) prevents sepsis-induced diaphragm dysfunction": "10.1152/japplphysiol.01053.2020", 27 | "Molecular basis for the exploitation of nuclear mRNA export by influenza A virus": "10.1096/fasebj.2020.34.s1.03380", 28 | "Myeloid-specific Deletion in Lipid Phosphate Phosphatase3 (plpp3) Increases Cardiac Inflammation": "10.1161/circ.142.suppl_3.16007", 29 | "Phospholipases D: making sense of redundancy and duplication": "10.1042/bsr20181883", 30 | "Pioglitazone does not synergize with mirabegron to increase beige fat or further improve glucose metabolism": "10.1172/jci.insight.143650", 31 | "Prebiotic inulin consumption reduces dioxin-like PCB 126-mediated hepatotoxicity and gut dysbiosis in hyperlipidemic Ldlr deficient mice": "10.1016/j.envpol.2020.114183", 32 | "Prostaglandin D2-ethanolamide induces skin cancer apoptosis by suppressing the activity of cellular antioxidants": "10.1016/j.prostaglandins.2019.03.001", 33 | "Proteomic analysis reveals novel mechanisms by which polychlorinated biphenyls compromise the liver promoting diet-induced steatohepatitis": "10.1021/acs.jproteome.8b00886", 34 | "Pseudomonas aeruginosa-derived pyocyanin reduces adipocyte differentiation, body weight, and fat mass as mechanisms contributing to septic cachexia": "10.1016/j.fct.2019.05.012", 35 | "Regulation of PLPP3 gene expression by NF-\u03baB family transcription factors": "10.1074/jbc.ra119.009002", 36 | "Roles for lysophosphatidic acid signaling in vascular development and disease": "10.1016/j.bbalip.2020.158734", 37 | "Serum concentrations of legacy and emerging per-and polyfluoroalkyl substances in the Anniston Community Health Surveys (ACHS I and ACHS II)": "10.1016/j.envint.2021.106907", 38 | "Spermine synthase and MYC cooperate to maintain colorectal cancer cell survival by repressing Bim expression": "10.1038/s41467-020-17067-x", 39 | "The late stage of COPI vesicle fission requires shorter forms of phosphatidic acid and diacylglycerol": "10.1038/s41467-019-11324-4", 40 | "The \u03b23-adrenergic receptor agonist mirabegron improves glucose homeostasis in obese humans": "10.1530/ey.17.11.10", 41 | "Therapeutic development of group B Streptococcus meningitis by targeting a host cell signaling network involving EGFR": "10.15252/emmm.202012651", 42 | "Trimethylamine N-oxide binds and activates PERK to promote metabolic dysfunction": "10.1016/j.cmet.2019.08.021", 43 | "Untargeted Stable Isotope Probing of the Gut Microbiota Metabolome Using 13C-Labeled Dietary Fibers": "10.1021/acs.jproteome.1c00124", 44 | "Untargeted Stable Isotope Probing of the Gut Microbiota Metabolome Using \u00b9\u00b3C-Labeled Dietary Fibers": "10.1021/acs.jproteome.1c00124", 45 | "XX sex chromosome complement promotes atherosclerosis in mice": "10.1038/s41467-019-10462-z" 46 | } 47 | -------------------------------------------------------------------------------- /tests/testing_files/testing_csv.csv: -------------------------------------------------------------------------------- 1 | col1,col2 2 | data1,data2 3 | -------------------------------------------------------------------------------- /tests/testing_files/testing_docx.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/academic_tracker/eee61cb899ce190edef0e882a11dfedf19bae263/tests/testing_files/testing_docx.docx -------------------------------------------------------------------------------- /tests/testing_files/testing_text.txt: -------------------------------------------------------------------------------- 1 | line 1 2 | line 2 -------------------------------------------------------------------------------- /tests/testing_files/tokenization_report.txt: -------------------------------------------------------------------------------- 1 | Reference Line: 2 | Powell C, Moseley H. The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository. Metabolites. 2021 March; 11(3):163-. doi: 10.3390/metabo11030163. 3 | Tokenized Reference: 4 | Authors: Powell C, Moseley H 5 | Title: The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository. 6 | PMID: None 7 | DOI: 10.3390/metabo11030163 8 | 9 | Reference Line: 10 | Jin H, Mitchell J, Moseley H. Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368. 11 | Tokenized Reference: 12 | Authors: Jin H, Mitchell J, Moseley H 13 | Title: Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases. 14 | PMID: None 15 | DOI: 10.3390/metabo10090368 16 | 17 | Reference Line: 18 | N/A 19 | Tokenized Reference: 20 | Authors: None 21 | Title: None 22 | PMID: None 23 | DOI: None 24 | 25 | -------------------------------------------------------------------------------- /tests/testing_files/tokenized_MEDLINE2.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": "10.1002/hep.32467", 4 | "PMID": "35313030", 5 | "authors": [ 6 | { 7 | "initials": "JF", 8 | "last": "Creeden" 9 | }, 10 | { 11 | "initials": "ZA", 12 | "last": "Kipp" 13 | }, 14 | { 15 | "initials": "M", 16 | "last": "Xu" 17 | }, 18 | { 19 | "initials": "RM", 20 | "last": "Flight" 21 | }, 22 | { 23 | "initials": "HNB", 24 | "last": "Moseley" 25 | }, 26 | { 27 | "initials": "GJ", 28 | "last": "Martinez" 29 | }, 30 | { 31 | "initials": "WH", 32 | "last": "Lee" 33 | }, 34 | { 35 | "initials": "K", 36 | "last": "Alganem" 37 | }, 38 | { 39 | "initials": "AS", 40 | "last": "Imami" 41 | }, 42 | { 43 | "initials": "MR", 44 | "last": "McMullen" 45 | }, 46 | { 47 | "initials": "S", 48 | "last": "Roychowdhury" 49 | }, 50 | { 51 | "initials": "AM", 52 | "last": "Nawabi" 53 | }, 54 | { 55 | "initials": "JA", 56 | "last": "Hipp" 57 | }, 58 | { 59 | "initials": "S", 60 | "last": "Softic" 61 | }, 62 | { 63 | "initials": "SA", 64 | "last": "Weinman" 65 | }, 66 | { 67 | "initials": "R", 68 | "last": "McCullumsmith" 69 | }, 70 | { 71 | "initials": "LE", 72 | "last": "Nagy" 73 | }, 74 | { 75 | "initials": "TD", 76 | "last": "Hinds" 77 | } 78 | ], 79 | "pub_dict_key": "", 80 | "reference_line": "", 81 | "title": "Hepatic kinome atlas: An in-depth identification of kinase pathways in liver fibrosis of humans and rodents." 82 | } 83 | ] -------------------------------------------------------------------------------- /tests/testing_files/tokenized_citations_for_report_test.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": "10.3390/metabo11030163", 4 | "PMID": "", 5 | "authors": [ 6 | { 7 | "initials": "C", 8 | "last": "Powell" 9 | }, 10 | { 11 | "initials": "H", 12 | "last": "Moseley" 13 | } 14 | ], 15 | "pub_dict_key": "https://doi.org/10.3390/metabo11030163", 16 | "reference_line": "Powell C, Moseley H.\n \n The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository.\n Metabolites. 2021 March; 11(3):163-. doi: 10.3390/metabo11030163.", 17 | "title": "The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository." 18 | }, 19 | { 20 | "DOI": "10.3390/metabo10090368", 21 | "PMID": "", 22 | "authors": [ 23 | { 24 | "initials": "H", 25 | "last": "Jin" 26 | }, 27 | { 28 | "initials": "J", 29 | "last": "Mitchell" 30 | }, 31 | { 32 | "initials": "H", 33 | "last": "Moseley" 34 | } 35 | ], 36 | "pub_dict_key": "https://doi.org/10.3390/metabo10090368", 37 | "reference_line": "Jin H, Mitchell J, Moseley H.\n \n Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases.\n Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368.", 38 | "title": "Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases." 39 | } 40 | ] 41 | -------------------------------------------------------------------------------- /tests/testing_files/tokenized_citations_for_report_test2.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": "10.3390/metabo11030163", 4 | "PMID": "", 5 | "authors": [ 6 | { 7 | "initials": "C", 8 | "last": "Powell" 9 | }, 10 | { 11 | "initials": "H", 12 | "last": "Moseley" 13 | } 14 | ], 15 | "pub_dict_key": "", 16 | "reference_line": "Powell C, Moseley H.\n \n The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository.\n Metabolites. 2021 March; 11(3):163-. doi: 10.3390/metabo11030163.", 17 | "title": "The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository." 18 | }, 19 | { 20 | "DOI": "", 21 | "PMID": "1234", 22 | "authors": [ 23 | { 24 | "initials": "H", 25 | "last": "Jin" 26 | }, 27 | { 28 | "initials": "J", 29 | "last": "Mitchell" 30 | }, 31 | { 32 | "initials": "H", 33 | "last": "Moseley" 34 | } 35 | ], 36 | "pub_dict_key": "", 37 | "reference_line": "Jin H, Mitchell J, Moseley H.\n \n Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases.\n Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368.", 38 | "title": "Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases." 39 | }, 40 | { 41 | "DOI": "", 42 | "PMID": "", 43 | "authors": [ 44 | { 45 | "initials": "H", 46 | "last": "Jin" 47 | }, 48 | { 49 | "initials": "J", 50 | "last": "Mitchell" 51 | }, 52 | { 53 | "initials": "H", 54 | "last": "Moseley" 55 | } 56 | ], 57 | "pub_dict_key": "", 58 | "reference_line": "Jin H, Mitchell J, Moseley H.\n \n Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases.\n Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368.", 59 | "title": "Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases." 60 | } 61 | ] 62 | -------------------------------------------------------------------------------- /tests/testing_files/tokenized_citations_for_report_test_empty.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": "", 4 | "PMID": "", 5 | "authors": [ 6 | { 7 | "initials": "C", 8 | "last": "Powell" 9 | }, 10 | { 11 | "initials": "H", 12 | "last": "Moseley" 13 | } 14 | ], 15 | "pub_dict_key": "", 16 | "reference_line": "Powell C, Moseley H.\n \n The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository.\n Metabolites. 2021 March; 11(3):163-. doi: 10.3390/metabo11030163.", 17 | "title": "" 18 | }, 19 | { 20 | "DOI": "", 21 | "PMID": "", 22 | "authors": [ 23 | { 24 | "initials": "H", 25 | "last": "Jin" 26 | }, 27 | { 28 | "initials": "J", 29 | "last": "Mitchell" 30 | }, 31 | { 32 | "initials": "H", 33 | "last": "Moseley" 34 | } 35 | ], 36 | "pub_dict_key": "", 37 | "reference_line": "Jin H, Mitchell J, Moseley H.\n \n Atom Identifiers Generated by a Neighborhood-Specific Graph Coloring Method Enable Compound Harmonization across Metabolic Databases.\n Metabolites. 2020 September; 10(9):368-. doi: 10.3390/metabo10090368.", 38 | "title": "" 39 | } 40 | ] 41 | -------------------------------------------------------------------------------- /tests/testing_files/tokenized_parsing_test.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": null, 4 | "PMID": "24404440", 5 | "authors": [ 6 | { 7 | "first": "William", 8 | "last": "Carreer", 9 | "middle": "J." 10 | }, 11 | { 12 | "first": "Robert", 13 | "last": "Flight", 14 | "middle": "M." 15 | }, 16 | { 17 | "first": "Hunter", 18 | "last": "Moseley", 19 | "middle": "NB" 20 | } 21 | ], 22 | "pub_dict_key": "", 23 | "reference_line": "Carreer, William J., Robert M. Flight, and Hunter NB Moseley. \"A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets.\" Metabolites 3.4 (2013): 853-866. PMID: 24404440", 24 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets." 25 | }, 26 | { 27 | "DOI": "10.3390/metabo3040853", 28 | "PMID": null, 29 | "authors": [ 30 | { 31 | "initials": "W.J.", 32 | "last": "Carreer" 33 | }, 34 | { 35 | "initials": "R.M.", 36 | "last": "Flight" 37 | }, 38 | { 39 | "initials": "H.N.", 40 | "last": "Moseley" 41 | } 42 | ], 43 | "pub_dict_key": "", 44 | "reference_line": "Carreer, W. J., Flight, R. M., & Moseley, H. N. (2013). A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets. Metabolites, 3(4), 853-866. DOI: 10.3390/metabo3040853", 45 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets" 46 | }, 47 | { 48 | "DOI": "10.3390/metabo3040853", 49 | "PMID": null, 50 | "authors": [ 51 | { 52 | "first": "William", 53 | "last": "Carreer", 54 | "middle": "J." 55 | }, 56 | { 57 | "first": "Robert", 58 | "last": "Flight", 59 | "middle": "M." 60 | }, 61 | { 62 | "first": "Hunter", 63 | "last": "Moseley", 64 | "middle": "NB" 65 | } 66 | ], 67 | "pub_dict_key": "", 68 | "reference_line": "Carreer, William J., Robert M. Flight, and Hunter NB Moseley. \"A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets.\" Metabolites 3, no. 4 (2013): 853-866. DOI: https://doi.org/10.3390/metabo3040853", 69 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets." 70 | }, 71 | { 72 | "DOI": "10.3390/metabo3040853", 73 | "PMID": "24404440", 74 | "authors": [ 75 | { 76 | "initials": "W.J.", 77 | "last": "Carreer" 78 | }, 79 | { 80 | "initials": "R.M.", 81 | "last": "Flight" 82 | }, 83 | { 84 | "initials": "H.N.", 85 | "last": "Moseley" 86 | } 87 | ], 88 | "pub_dict_key": "", 89 | "reference_line": "Carreer, W.J., Flight, R.M. and Moseley, H.N., 2013. A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets. Metabolites, 3(4), pp.853-866. PMID: 24404440, DOI: 10.3390/metabo3040853", 90 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets" 91 | }, 92 | { 93 | "DOI": null, 94 | "PMID": null, 95 | "authors": [ 96 | { 97 | "initials": "WJ", 98 | "last": "Carreer" 99 | }, 100 | { 101 | "initials": "RM", 102 | "last": "Flight" 103 | }, 104 | { 105 | "initials": "HN", 106 | "last": "Moseley" 107 | } 108 | ], 109 | "pub_dict_key": "", 110 | "reference_line": "Carreer WJ, Flight RM, Moseley HN. A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets. Metabolites. 2013 Dec;3(4):853-66.", 111 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets" 112 | } 113 | ] 114 | -------------------------------------------------------------------------------- /tests/testing_files/tokenized_ref_test.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "DOI": null, 4 | "PMID": null, 5 | "authors": [ 6 | { 7 | "first": "William", 8 | "last": "Carreer", 9 | "middle": "J." 10 | }, 11 | { 12 | "first": "Robert", 13 | "last": "Flight", 14 | "middle": "M." 15 | }, 16 | { 17 | "first": "Hunter", 18 | "last": "Moseley", 19 | "middle": "N.B." 20 | } 21 | ], 22 | "pub_dict_key": "", 23 | "reference_line": "William J. Carreer, Robert M. Flight, and Hunter N.B. Moseley. \"A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets\" Metabolites , v.3 , 2013 , p.853", 24 | "title": "A computational framework for high-throughput isotopic natural abundance correction of omics-level ultra-high resolution FT-MS datasets" 25 | }, 26 | { 27 | "DOI": null, 28 | "PMID": null, 29 | "authors": [ 30 | { 31 | "first": "Joshua", 32 | "last": "Mitchell", 33 | "middle": "M." 34 | }, 35 | { 36 | "first": "Teresa", 37 | "last": "Fan", 38 | "middle": "W-.M." 39 | }, 40 | { 41 | "first": "Andrew", 42 | "last": "Lane", 43 | "middle": "N." 44 | }, 45 | { 46 | "first": "Hunter", 47 | "last": "Moseley", 48 | "middle": "N.B." 49 | } 50 | ], 51 | "pub_dict_key": "", 52 | "reference_line": "Joshua M. Mitchell, Teresa W-.M. Fan, Andrew N. Lane, and Hunter N.B. Moseley \"Development of Large-Scale Metabolite Identification Methods for Metabolomics\" Frontiers in Genetics - Systems Biology , v.5 , 2014 , p.237 10.3389/fgene.2014.00237", 53 | "title": "Development of Large-Scale Metabolite Identification Methods for Metabolomics" 54 | }, 55 | { 56 | "DOI": null, 57 | "PMID": null, 58 | "authors": [ 59 | { 60 | "first": "Richard", 61 | "last": "Higashi", 62 | "middle": "M." 63 | }, 64 | { 65 | "first": "Teresa", 66 | "last": "Fan", 67 | "middle": "W-M." 68 | }, 69 | { 70 | "first": "Pawel", 71 | "last": "Lorkiewicz", 72 | "middle": "K." 73 | }, 74 | { 75 | "first": "Hunter", 76 | "last": "Moseley", 77 | "middle": "N.B." 78 | }, 79 | { 80 | "first": "Andrew", 81 | "last": "Lane", 82 | "middle": "N." 83 | } 84 | ], 85 | "pub_dict_key": "", 86 | "reference_line": "Richard M. Higashi, Teresa W-M. Fan, Pawel K. Lorkiewicz, Hunter N.B. Moseley, Andrew N. Lane \"Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS\" Mass Spectrometry Methods in Metabolomics , v.53 , 2015 , p.337 10.1002/mrc.4199", 87 | "title": "Stable Isotope Labeled Tracers for Metabolic Pathway Elucidation by GC-MS and FT-MS" 88 | } 89 | ] 90 | --------------------------------------------------------------------------------